h2o2 <- rep(c(0,10,25,50), each=3)
pf3d7 <- c(0.3399,0.3563,0.3538,
0.3168,0.3054,0.3174,
0.2460,0.2618,0.2848,
0.1535,0.1613,0.1525)
dat <- data.frame(conc=h2o2, od=pf3d7)
The regression of optical density on hydrogen peroxide concentration.
lm.fit <- lm(od ~ conc, data=dat)
lm.out <- summary(lm.fit)
lm.out$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.353050073 0.0049955194 70.67335 7.844454e-15
## conc -0.003870984 0.0001759324 -22.00268 8.426407e-10
Predict the optical density at H202 concentration 30.
b0.hat <- lm.out$coef[1,1]
b0.hat
## [1] 0.3530501
b1.hat <- lm.out$coef[2,1]
b1.hat
## [1] -0.003870984
y.hat.30 <- b0.hat+b1.hat*30
y.hat.30
## [1] 0.2369206
Get the 95% confidence interval for the mean optical density at H202 concentration 30.
n <- length(dat$conc)
n
## [1] 12
m <- mean(dat$conc)
m
## [1] 21.25
SXX <- sum((dat$conc-m)^2)
SXX
## [1] 4256.25
s.hat <- lm.out$sigma
s.hat
## [1] 0.01147782
y.hat.30 + c(-1,1)*qt(0.975,n-2)*s.hat*sqrt(1/n+(30-m)^2/SXX)
## [1] 0.2287800 0.2450611
Easier:
predict(lm.fit, data.frame(conc=30), interval="confidence")
## fit lwr upr
## 1 0.2369206 0.22878 0.2450611
Get the 95% prediction interval for a new observation at H202 concentration 30.
y.hat.30 + c(-1,1)*qt(0.975,n-2)*s.hat*sqrt(1+1/n+(30-m)^2/SXX)
## [1] 0.2100820 0.2637591
Easier:
predict(lm.fit, data.frame(conc=30), interval="prediction")
## fit lwr upr
## 1 0.2369206 0.210082 0.2637591
Generate some random data, 20 design points equally spaced on the x-axis, and assume E[y|x] = 10 + 5x. Assume Gaussian noise with mean zero and standard deviation 10.
set.seed(1)
x <- 1:20
x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
eps <- rnorm(20, sd=10)
eps
## [1] -6.2645381 1.8364332 -8.3562861 15.9528080 3.2950777 -8.2046838 4.8742905 7.3832471
## [9] 5.7578135 -3.0538839 15.1178117 3.8984324 -6.2124058 -22.1469989 11.2493092 -0.4493361
## [17] -0.1619026 9.4383621 8.2122120 5.9390132
y <- 10 + 5*x + eps
y
## [1] 8.735462 21.836433 16.643714 45.952808 38.295078 31.795316 49.874291 57.383247 60.757814
## [10] 56.946116 80.117812 73.898432 68.787594 57.853001 96.249309 89.550664 94.838097 109.438362
## [19] 113.212212 115.939013
par(las=1)
plot(x,y)
Fit a linear regression model.
xydat <- data.frame(x,y)
head(xydat)
## x y
## 1 1 8.735462
## 2 2 21.836433
## 3 3 16.643714
## 4 4 45.952808
## 5 5 38.295078
## 6 6 31.795316
lm.fit <- lm(y ~ x, data=xydat)
summary(lm.fit)
##
## Call:
## lm(formula = y ~ x, data = xydat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.808 -5.168 1.875 4.833 15.450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.6391 4.3158 2.233 0.0385 *
## x 5.2158 0.3603 14.477 2.33e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.291 on 18 degrees of freedom
## Multiple R-squared: 0.9209, Adjusted R-squared: 0.9165
## F-statistic: 209.6 on 1 and 18 DF, p-value: 2.33e-11
confint(lm.fit)
## 2.5 % 97.5 %
## (Intercept) 0.571931 18.706212
## x 4.458915 5.972736
The fitted values.
lm.fit$fitted
## 1 2 3 4 5 6 7 8 9 10 11
## 14.85490 20.07072 25.28655 30.50237 35.71820 40.93402 46.14985 51.36568 56.58150 61.79733 67.01315
## 12 13 14 15 16 17 18 19 20
## 72.22898 77.44480 82.66063 87.87645 93.09228 98.30810 103.52393 108.73976 113.95558
fitted(lm.fit)
## 1 2 3 4 5 6 7 8 9 10 11
## 14.85490 20.07072 25.28655 30.50237 35.71820 40.93402 46.14985 51.36568 56.58150 61.79733 67.01315
## 12 13 14 15 16 17 18 19 20
## 72.22898 77.44480 82.66063 87.87645 93.09228 98.30810 103.52393 108.73976 113.95558
predict(lm.fit)
## 1 2 3 4 5 6 7 8 9 10 11
## 14.85490 20.07072 25.28655 30.50237 35.71820 40.93402 46.14985 51.36568 56.58150 61.79733 67.01315
## 12 13 14 15 16 17 18 19 20
## 72.22898 77.44480 82.66063 87.87645 93.09228 98.30810 103.52393 108.73976 113.95558
Make a picture with the regression line and the fitted values.
par(las=1)
plot(xydat$x, xydat$y, xlab="x", ylab="y")
abline(lsfit(xydat$x, xydat$y))
points(xydat$x, predict(lm.fit), pch=21, bg="grey")
You can use the ‘predict’ function to get predicted values and confidence limits for the mean response for any value of x.
predict(lm.fit, data.frame(x=c(5,10,20)), interval="confidence")
## fit lwr upr
## 1 35.71820 29.68662 41.74978
## 2 61.79733 57.41639 66.17826
## 3 113.95558 105.54399 122.36717
predict(lm.fit, data.frame(x=c(5,10,20)), interval="confidence", level=0.99)
## fit lwr upr
## 1 35.71820 27.45442 43.98198
## 2 61.79733 55.79508 67.79958
## 3 113.95558 102.43100 125.48016
Plot the confidence band for the mean response.
xx <- seq(1, 20, by=0.1)
predict.mean <- predict(lm.fit, data.frame(x=xx), interval="confidence")
par(las=1)
plot(xydat$x, xydat$y, xlab="x", ylab="y", ylim=range(predict.mean), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)
You can also use the ‘predict’ function to get prediction intervals for new observations.
predict(lm.fit, data.frame(x=c(5,10,20)), interval="prediction")
## fit lwr upr
## 1 35.71820 15.28863 56.14777
## 2 61.79733 41.79283 81.80182
## 3 113.95558 92.70136 135.20980
Plot the confidence band for the mean response, and the prediction interval for new observations.
predict.new <- predict(lm.fit, data.frame(x=xx), interval="prediction")
par(las=1)
plot(xydat$x, xydat$y, xlab="x", ylab="y", ylim=range(predict.new), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)
lines(xx, predict.new[,2], lty=2, col="blue")
lines(xx, predict.new[,3], lty=2, col="blue")