c.lm3.a.knit

Prediction and Calibration

Example: David Sullivan’s heme data

h2o2 <- rep(c(0,10,25,50), each=3)
pf3d7 <- c(0.3399,0.3563,0.3538,
           0.3168,0.3054,0.3174,
           0.2460,0.2618,0.2848,
           0.1535,0.1613,0.1525)
dat <- data.frame(conc=h2o2, od=pf3d7)

The regression of optical density on hydrogen peroxide concentration.

lm.fit <- lm(od ~ conc, data=dat)
lm.out <- summary(lm.fit)
lm.out$coef

##                 Estimate   Std. Error   t value     Pr(>|t|)
## (Intercept)  0.353050073 0.0049955194  70.67335 7.844454e-15
## conc        -0.003870984 0.0001759324 -22.00268 8.426407e-10

Predict the optical density at H202 concentration 30.

b0.hat <- lm.out$coef[1,1]
b0.hat

## [1] 0.3530501

b1.hat <- lm.out$coef[2,1]
b1.hat

## [1] -0.003870984

y.hat.30 <- b0.hat+b1.hat*30
y.hat.30

## [1] 0.2369206

Get the 95% confidence interval for the mean optical density at H202 concentration 30.

n <- length(dat$conc)
n

## [1] 12

m <- mean(dat$conc)
m

## [1] 21.25

SXX <- sum((dat$conc-m)^2)
SXX

## [1] 4256.25

s.hat <- lm.out$sigma
s.hat

## [1] 0.01147782

y.hat.30 + c(-1,1)*qt(0.975,n-2)*s.hat*sqrt(1/n+(30-m)^2/SXX)

## [1] 0.2287800 0.2450611

Easier:

predict(lm.fit, data.frame(conc=30), interval="confidence")

##         fit     lwr       upr
## 1 0.2369206 0.22878 0.2450611

Get the 95% prediction interval for a new observation at H202 concentration 30.

y.hat.30 + c(-1,1)*qt(0.975,n-2)*s.hat*sqrt(1+1/n+(30-m)^2/SXX)

## [1] 0.2100820 0.2637591

Easier:

predict(lm.fit, data.frame(conc=30), interval="prediction")

##         fit      lwr       upr
## 1 0.2369206 0.210082 0.2637591

Confidence and prediction intervals: a simple example

Generate some random data, 20 design points equally spaced on the x-axis, and assume E[y|x] = 10 + 5x. Assume Gaussian noise with mean zero and standard deviation 10.

set.seed(1)
x <- 1:20
x

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

eps <- rnorm(20, sd=10)
eps

##  [1]  -6.2645381   1.8364332  -8.3562861  15.9528080   3.2950777  -8.2046838   4.8742905   7.3832471
##  [9]   5.7578135  -3.0538839  15.1178117   3.8984324  -6.2124058 -22.1469989  11.2493092  -0.4493361
## [17]  -0.1619026   9.4383621   8.2122120   5.9390132

y <- 10 + 5*x + eps
y

##  [1]   8.735462  21.836433  16.643714  45.952808  38.295078  31.795316  49.874291  57.383247  60.757814
## [10]  56.946116  80.117812  73.898432  68.787594  57.853001  96.249309  89.550664  94.838097 109.438362
## [19] 113.212212 115.939013

par(las=1)
plot(x,y)

Fit a linear regression model.

xydat <- data.frame(x,y)
head(xydat)

##   x         y
## 1 1  8.735462
## 2 2 21.836433
## 3 3 16.643714
## 4 4 45.952808
## 5 5 38.295078
## 6 6 31.795316

lm.fit <- lm(y ~ x, data=xydat)
summary(lm.fit)

## 
## Call:
## lm(formula = y ~ x, data = xydat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.808  -5.168   1.875   4.833  15.450 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.6391     4.3158   2.233   0.0385 *  
## x             5.2158     0.3603  14.477 2.33e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.291 on 18 degrees of freedom
## Multiple R-squared:  0.9209, Adjusted R-squared:  0.9165 
## F-statistic: 209.6 on 1 and 18 DF,  p-value: 2.33e-11

confint(lm.fit)

##                2.5 %    97.5 %
## (Intercept) 0.571931 18.706212
## x           4.458915  5.972736

The fitted values.

lm.fit$fitted

##         1         2         3         4         5         6         7         8         9        10        11 
##  14.85490  20.07072  25.28655  30.50237  35.71820  40.93402  46.14985  51.36568  56.58150  61.79733  67.01315 
##        12        13        14        15        16        17        18        19        20 
##  72.22898  77.44480  82.66063  87.87645  93.09228  98.30810 103.52393 108.73976 113.95558

fitted(lm.fit)

##         1         2         3         4         5         6         7         8         9        10        11 
##  14.85490  20.07072  25.28655  30.50237  35.71820  40.93402  46.14985  51.36568  56.58150  61.79733  67.01315 
##        12        13        14        15        16        17        18        19        20 
##  72.22898  77.44480  82.66063  87.87645  93.09228  98.30810 103.52393 108.73976 113.95558

predict(lm.fit)

##         1         2         3         4         5         6         7         8         9        10        11 
##  14.85490  20.07072  25.28655  30.50237  35.71820  40.93402  46.14985  51.36568  56.58150  61.79733  67.01315 
##        12        13        14        15        16        17        18        19        20 
##  72.22898  77.44480  82.66063  87.87645  93.09228  98.30810 103.52393 108.73976 113.95558

Make a picture with the regression line and the fitted values.

par(las=1)
plot(xydat$x, xydat$y, xlab="x", ylab="y")
abline(lsfit(xydat$x, xydat$y))
points(xydat$x, predict(lm.fit), pch=21, bg="grey")

You can use the ‘predict’ function to get predicted values and confidence limits for the mean response for any value of x.

predict(lm.fit, data.frame(x=c(5,10,20)), interval="confidence")

##         fit       lwr       upr
## 1  35.71820  29.68662  41.74978
## 2  61.79733  57.41639  66.17826
## 3 113.95558 105.54399 122.36717

predict(lm.fit, data.frame(x=c(5,10,20)), interval="confidence", level=0.99)

##         fit       lwr       upr
## 1  35.71820  27.45442  43.98198
## 2  61.79733  55.79508  67.79958
## 3 113.95558 102.43100 125.48016

Plot the confidence band for the mean response.

xx <- seq(1, 20, by=0.1)
predict.mean <- predict(lm.fit, data.frame(x=xx), interval="confidence")
par(las=1)
plot(xydat$x, xydat$y, xlab="x", ylab="y", ylim=range(predict.mean), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)

You can also use the ‘predict’ function to get prediction intervals for new observations.

predict(lm.fit, data.frame(x=c(5,10,20)), interval="prediction")

##         fit      lwr       upr
## 1  35.71820 15.28863  56.14777
## 2  61.79733 41.79283  81.80182
## 3 113.95558 92.70136 135.20980

Plot the confidence band for the mean response, and the prediction interval for new observations.

predict.new <- predict(lm.fit, data.frame(x=xx), interval="prediction")
par(las=1)
plot(xydat$x, xydat$y, xlab="x", ylab="y", ylim=range(predict.new), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)
lines(xx, predict.new[,2], lty=2, col="blue")
lines(xx, predict.new[,3], lty=2, col="blue")

Statistics for Laboratory Scientists ( 140.615 )

Prediction and Calibration

Example: David Sullivan’s heme data

Confidence and prediction intervals: a simple example