Statistics for Laboratory Scientists ( 140.615 )

Analysis of Variance - Model Assumptions and Diagnostics

Example from class: IL10 cytokines

Read the data, and include log10 of IL10 as a column.

library(SPH.140.615)
head(il10)
##   Strain     IL10
## 1      7  985.425
## 2      7 1057.254
## 3      7  989.480
## 4      7  816.093
## 5      7 1410.767
## 6     25 1159.141
il10 <- cbind(il10, logIL10=log10(il10$IL10))
head(il10)
##   Strain     IL10  logIL10
## 1      7  985.425 2.993624
## 2      7 1057.254 3.024179
## 3      7  989.480 2.995407
## 4      7  816.093 2.911740
## 5      7 1410.767 3.149455
## 6     25 1159.141 3.064136

Plot the data.

par(las=1, mfrow=c(1,2))
stripchart(il10$IL10 ~ il10$Strain, pch=1, ylab="Strain", xlab="IL10 response")
abline(h=1:21, lty=2, col="gray")
stripchart(il10$logIL10 ~ il10$Strain, pch=1, ylab="Strain", xlab="log10 IL10 response")
abline(h=1:21, lty=2, col="gray")

The analysis of variance tables.

aov.il10 <- aov(IL10 ~ Strain, data=il10)
anova(aov.il10)
## Analysis of Variance Table
## 
## Response: IL10
##            Df    Sum Sq Mean Sq F value  Pr(>F)  
## Strain     20  33757424 1687871  1.7001 0.04154 *
## Residuals 125 124099169  992793                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov.logil10 <- aov(logIL10 ~ Strain, data=il10)
anova(aov.logil10)
## Analysis of Variance Table
## 
## Response: logIL10
##            Df Sum Sq  Mean Sq F value   Pr(>F)   
## Strain     20 3.3476 0.167380  2.2528 0.003575 **
## Residuals 125 9.2871 0.074297                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot the residuals. Note that you can get the residuals and fitted values from the output of the ‘aov’ function.

attributes(aov.il10)
## $names
##  [1] "coefficients"  "residuals"     "effects"       "rank"          "fitted.values" "assign"       
##  [7] "qr"            "df.residual"   "contrasts"     "xlevels"       "call"          "terms"        
## [13] "model"        
## 
## $class
## [1] "aov" "lm"
par(las=1, mfrow=c(1,2)) 
stripchart(aov.il10$residuals ~ il10$Strain, pch=1, ylab="Strain", xlab="residuals (IL10)")
abline(h=1:21, lty=2, col="gray")
abline(v=0, lty=2, col="red")
stripchart(aov.logil10$residuals ~ il10$Strain, pch=1, ylab="Strain", xlab="log10 residuals (IL10)")
abline(h=1:21, lty=2, col="gray")
abline(v=0, lty=2, col="red")

The qq-plots of all residuals, with histograms Note: ‘mfcol’ versus ‘mfrow’ leads you to fill up the different plots working down columns rather than across rows, and ‘qqline’ adds a line to the ‘qqnorm’ plot.

par(las=1, mfcol=c(2,2))  
hist(aov.il10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="IL10")
qqnorm(aov.il10$residuals, main="IL10")
qqline(aov.il10$residuals, col="blue", lty=2, lwd=1)
hist(aov.logil10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="log10 IL10")
qqnorm(aov.logil10$residuals, main="log10 IL10")
qqline(aov.logil10$residuals, col="blue", lty=2, lwd=1)

Plot the residuals versus the fitted values.

par(las=1, mfrow=c(1,2))
plot(aov.il10$fitted, aov.il10$residuals, pch=1, xlab="fitted values",ylab="residuals", main="IL10")
abline(h=0, lty=2, col="red")
plot(aov.logil10$fitted, aov.logil10$residuals, pch=1, xlab="fitted values",ylab="residuals", main="log10 IL10")
abline(h=0, lty=2, col="red")

Plot the standard deviations versus the means.

m <- tapply(il10$IL10, il10$Strain, mean)
s <- tapply(il10$IL10, il10$Strain, sd)  
logm <- tapply(il10$logIL10, il10$Strain, mean)
logs <- tapply(il10$logIL10, il10$Strain, sd)
par(las=1, mfrow=c(1,2))
plot(m, s, pch=1, xlab="Mean", ylab="SD", main="IL10")
plot(logm, logs, pch=1, xlab="Mean", ylab="SD", main="log10 IL10)")

Plot is a generic function, and you can “plot” the fitted ANOVA models!

When you pass an object of class ‘aov’ to the plot function, it creates a series of diagnostic plots.

The first plot (which=1) are the fitted values versus the residuals.

par(las=1,mfrow=c(1,2))
plot(aov.il10,which=1)
plot(aov.logil10,which=1)

The second plot (which=2) is the qq-plot of the residuals.

par(las=1,mfrow=c(1,2))
plot(aov.il10,which=2)
plot(aov.logil10,which=2)

Bartlett’s test for equality of variances

The simple way, using the built-in function.

bartlett.test(il10$IL10 ~ il10$Strain) 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  il10$IL10 by il10$Strain
## Bartlett's K-squared = 74.842, df = 20, p-value = 2.895e-08
bartlett.test(il10$logIL10 ~ il10$Strain) 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  il10$logIL10 by il10$Strain
## Bartlett's K-squared = 31.842, df = 20, p-value = 0.04501