c.lm1.a.knit

Linear Regression - Advanced

Example from class: David Sullivan’s heme data

h2o2 <- rep(c(0,10,25,50),each=3)
pf3d7 <- c(0.3399,0.3563,0.3538,
           0.3168,0.3054,0.3174,
           0.2460,0.2618,0.2848,
           0.1535,0.1613,0.1525)

The regression of optical density on hydrogen peroxide concentration, from scratch

n <- length(h2o2)
n

## [1] 12

yb <- mean(pf3d7)
yb

## [1] 0.2707917

xb <- mean(h2o2)
xb

## [1] 21.25

sxy <- sum((h2o2-xb)*(pf3d7-yb))
sxy

## [1] -16.47588

sxx <- sum((h2o2-xb)^2)
sxx

## [1] 4256.25

b1hat <- sxy/sxx
b1hat

## [1] -0.003870984

b0hat <- yb-b1hat*xb
b0hat

## [1] 0.3530501

yhat <- b0hat + b1hat*h2o2
yhat

##  [1] 0.3530501 0.3530501 0.3530501 0.3143402 0.3143402 0.3143402 0.2562755 0.2562755 0.2562755 0.1595009
## [11] 0.1595009 0.1595009

rss <- sum((pf3d7-yhat)^2)
rss

## [1] 0.001317403

sigmahat <- sqrt(rss/(n-2))
sigmahat

## [1] 0.01147782

Testing parameters

Testing whether the intercept is equal to zero.

se.b0hat <- sigmahat*sqrt(1/n+xb^2/sxx)
se.b0hat

## [1] 0.004995519

t.stat <- b0hat/se.b0hat
t.stat

## [1] 70.67335

p <- 2*pt(-abs(t.stat),n-2)
p

## [1] 7.844454e-15

Testing whether the slope is equal to zero.

se.b1hat <- sigmahat/sqrt(sxx)
se.b1hat

## [1] 0.0001759324

t.stat <- b1hat/se.b1hat
t.stat

## [1] -22.00268

p <- 2*pt(-abs(t.stat),n-2)
p

## [1] 8.426407e-10

Compare to the results from the built-in ‘lm’ function.

lm.out <- lm(pf3d7 ~ h2o2)
lm.sum <- summary(lm.out)
lm.sum$coef

##                 Estimate   Std. Error   t value     Pr(>|t|)
## (Intercept)  0.353050073 0.0049955194  70.67335 7.844454e-15
## h2o2        -0.003870984 0.0001759324 -22.00268 8.426407e-10

lm.sum$coef[,1]

##  (Intercept)         h2o2 
##  0.353050073 -0.003870984

lm.sum$coef[,2]

##  (Intercept)         h2o2 
## 0.0049955194 0.0001759324

lm.sum$coef[,3]

## (Intercept)        h2o2 
##    70.67335   -22.00268

lm.sum$coef[,4]

##  (Intercept)         h2o2 
## 7.844454e-15 8.426407e-10

The variance-covariance matrix

The parameter estimate variance-covariance matrix (not including sigma).

lm.sum$cov.unscaled

##              (Intercept)          h2o2
## (Intercept)  0.189427313 -0.0049926579
## h2o2        -0.004992658  0.0002349486

The square root of the diagonal elements.

diag(lm.sum$cov)

##  (Intercept)         h2o2 
## 0.1894273128 0.0002349486

sqrt(diag(lm.sum$cov))

## (Intercept)        h2o2 
##  0.43523248  0.01532803

The estimated parameter standard errors.

lm.sum$sigma * sqrt(diag(lm.sum$cov))

##  (Intercept)         h2o2 
## 0.0049955194 0.0001759324

Building confidence intervals for the regression parameters

b0hat + c(-1,1)*qt(0.975,n-2)*se.b0hat

## [1] 0.3419194 0.3641808

b1hat + c(-1,1)*qt(0.975,n-2)*se.b1hat

## [1] -0.004262986 -0.003478982

Compare to the output from the ‘confint’ function.

confint(lm.out)

##                    2.5 %       97.5 %
## (Intercept)  0.341919363  0.364180784
## h2o2        -0.004262986 -0.003478982

Checking model assumptions

lm.out$fitted

##         1         2         3         4         5         6         7         8         9        10        11 
## 0.3530501 0.3530501 0.3530501 0.3143402 0.3143402 0.3143402 0.2562755 0.2562755 0.2562755 0.1595009 0.1595009 
##        12 
## 0.1595009

lm.out$residuals

##             1             2             3             4             5             6             7 
## -0.0131500734  0.0032499266  0.0007499266  0.0024597651 -0.0089402349  0.0030597651 -0.0102754772 
##             8             9            10            11            12 
##  0.0055245228  0.0285245228 -0.0060008811  0.0017991189 -0.0070008811

Alternatively, there are also generic functions.

fitted(lm.out)

##         1         2         3         4         5         6         7         8         9        10        11 
## 0.3530501 0.3530501 0.3530501 0.3143402 0.3143402 0.3143402 0.2562755 0.2562755 0.2562755 0.1595009 0.1595009 
##        12 
## 0.1595009

residuals(lm.out)

##             1             2             3             4             5             6             7 
## -0.0131500734  0.0032499266  0.0007499266  0.0024597651 -0.0089402349  0.0030597651 -0.0102754772 
##             8             9            10            11            12 
##  0.0055245228  0.0285245228 -0.0060008811  0.0017991189 -0.0070008811

The residual qq plot.

qqnorm(lm.out$residuals, main="")
qqline(lm.out$residuals, col="blue", lty=2)

Fitted values versus residuals.

plot(lm.out$fitted, lm.out$residuals, pch=1, xlab="fitted values", ylab="residuals")
abline(h=0, col="blue", lty=2)