Statistics for Laboratory Scientists ( 140.615 )

Analysis of Variance - Two-Way Models

Example 1 from class: rats and lard

The data.

lard.dat <- data.frame(rsp = c(709,592,679, 538,699,476, 657,508,594, 505,677,539),
                       sex = factor(rep(c("M","F"), each=6)),
                       lard = factor(rep(c("fresh","rancid"), 6)))
lard.dat
##    rsp sex   lard
## 1  709   M  fresh
## 2  592   M rancid
## 3  679   M  fresh
## 4  538   M rancid
## 5  699   M  fresh
## 6  476   M rancid
## 7  657   F  fresh
## 8  508   F rancid
## 9  594   F  fresh
## 10 505   F rancid
## 11 677   F  fresh
## 12 539   F rancid
str(lard.dat)
## 'data.frame':    12 obs. of  3 variables:
##  $ rsp : num  709 592 679 538 699 476 657 508 594 505 ...
##  $ sex : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 1 1 1 1 ...
##  $ lard: Factor w/ 2 levels "fresh","rancid": 1 2 1 2 1 2 1 2 1 2 ...
summary(lard.dat)
##       rsp        sex       lard  
##  Min.   :476.0   F:6   fresh :6  
##  1st Qu.:530.5   M:6   rancid:6  
##  Median :593.0                   
##  Mean   :597.8                   
##  3rd Qu.:677.5                   
##  Max.   :709.0

Plot the data.

par(las=1)
stripchart(split(jitter(lard.dat$rsp,factor=2), list(lard.dat$sex, lard.dat$lard)),
           ylab="response", pch=1, vertical=TRUE, xlim=c(0.5,4.5))

stripchart(split(jitter(lard.dat$rsp,factor=2), list(lard.dat$lard, lard.dat$sex)),
           ylab="response", pch=1, vertical=TRUE, xlim=c(0.5,4.5))

The one-way ANOVA. The “:” symbol pastes the two factors together.

summary(aov(rsp ~ sex:lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sex:lard     3  65904   21968   15.06 0.00118 **
## Residuals    8  11667    1458                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Which groups are different?

TukeyHSD(aov(rsp ~ sex:lard, data=lard.dat))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = rsp ~ sex:lard, data = lard.dat)
## 
## $`sex:lard`
##                        diff        lwr        upr     p adj
## M:fresh-F:fresh     53.0000  -46.85087 152.850867 0.3831394
## F:rancid-F:fresh  -125.3333 -225.18420 -25.482466 0.0162217
## M:rancid-F:fresh  -107.3333 -207.18420  -7.482466 0.0357276
## F:rancid-M:fresh  -178.3333 -278.18420 -78.482466 0.0019873
## M:rancid-M:fresh  -160.3333 -260.18420 -60.482466 0.0038939
## M:rancid-F:rancid   18.0000  -81.85087 117.850867 0.9361534

Means by sex.

sex.means <- tapply(lard.dat$rsp, lard.dat$sex, mean)
sex.means
##     F     M 
## 580.0 615.5

Means by treatment.

lard.means <- tapply(lard.dat$rsp, lard.dat$lard, mean)
lard.means
##    fresh   rancid 
## 669.1667 526.3333

Within-group means.

means <- tapply(lard.dat$rsp, list(lard.dat$sex,lard.dat$lard), mean)
means
##      fresh   rancid
## F 642.6667 517.3333
## M 695.6667 535.3333

An interaction plot.

par(las=1)
interaction.plot(lard.dat$sex, lard.dat$lard, lard.dat$rsp, lty=1, col=c("blue","red"), lwd=2)

The same plot, but reversing the role of “sex” and “lard”.

par(las=1)
interaction.plot(lard.dat$lard, lard.dat$sex, lard.dat$rsp, lty=1, col=c("blue","red"), lwd=2)

The two-way ANOVA. The “*” symbol includes the interaction.

summary(aov(rsp ~ sex * lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sex          1   3781    3781   2.593 0.146036    
## lard         1  61204   61204  41.969 0.000192 ***
## sex:lard     1    919     919   0.630 0.450255    
## Residuals    8  11667    1458                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The model with interaction can also be written this way.

summary(aov(rsp ~ sex + lard + sex:lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sex          1   3781    3781   2.593 0.146036    
## lard         1  61204   61204  41.969 0.000192 ***
## sex:lard     1    919     919   0.630 0.450255    
## Residuals    8  11667    1458                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

A two-way ANOVA with an additive model. The interaction sum of squares and degrees of freedom are then included in the “Residual” SS and DFs.

summary(aov(rsp ~ sex + lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sex          1   3781    3781   2.704    0.135    
## lard         1  61204   61204  43.768 9.75e-05 ***
## Residuals    9  12585    1398                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostic plots.

plot(aov(rsp ~ sex + lard, data=lard.dat), which=1)

plot(aov(rsp ~ sex + lard, data=lard.dat), which=2)

Example 2 from class: mouse, food and temperature

The data.

library(SPH.140.615)
summary(mouse)
##       rsp                   food       temp   
##  Min.   : 46.22   restricted  :12   18deg:16  
##  1st Qu.: 62.49   unrestricted:12   8deg : 8  
##  Median : 71.78                               
##  Mean   : 79.84                               
##  3rd Qu.: 87.22                               
##  Max.   :144.30

Plot the data. A logarithmic transformation is needed.

par(las=1)
stripchart(split(mouse$rsp,list(mouse$food, mouse$temp)), 
           ylab="", pch=1, vertical=TRUE, xlim=c(0.5,4.5))

stripchart(split(mouse$rsp,list(mouse$food, mouse$temp)), 
           ylab="", pch=1, vertical=TRUE, xlim=c(0.5,4.5), log="y")

stripchart(split(log(mouse$rsp),list(mouse$food, mouse$temp)), 
           ylab="", pch=1, vertical=TRUE, xlim=c(0.5,4.5))

stripchart(split(log(mouse$rsp),list(mouse$temp, mouse$food)), 
           ylab="", pch=1, vertical=TRUE, xlim=c(0.5,4.5))

Means by food.

tapply(log(mouse$rsp), mouse$food, mean)
##   restricted unrestricted 
##     4.120406     4.543194

Means by temperature.

tapply(log(mouse$rsp), mouse$temp, mean)
##    18deg     8deg 
## 4.302683 4.390035

Within-group means.

tapply(log(mouse$rsp), list(mouse$food, mouse$temp), mean)
##                 18deg     8deg
## restricted   4.123502 4.114214
## unrestricted 4.481863 4.665856

Numbers of individuals per condition,

tapply(mouse$rsp, list(mouse$food, mouse$temp), length)
##              18deg 8deg
## restricted       8    4
## unrestricted     8    4

The two-way ANOVA.

aov.out <- aov(log(rsp) ~ food * temp, data=mouse)
summary(aov.out)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## food         1 1.0725  1.0725  21.421 0.000162 ***
## temp         1 0.0407  0.0407   0.813 0.378025    
## food:temp    1 0.0498  0.0498   0.995 0.330479    
## Residuals   20 1.0014  0.0501                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Two-way ANOVA without interaction.

aov.out <- aov(log(rsp) ~ food + temp, data=mouse)
summary(aov.out)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## food         1 1.0725  1.0725  21.426 0.000145 ***
## temp         1 0.0407  0.0407   0.813 0.377463    
## Residuals   21 1.0512  0.0501                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostic plots.

plot(aov.out,which=1)

plot(aov.out,which=2)

Example 3 from class: flies, density, strain, no replicates

The data.

flies <- data.frame(rsp = c( 9.6, 9.3, 9.3,
                            10.6, 9.1, 9.2,
                             9.8, 9.3, 9.5,
                            10.7, 9.1,10.0,
                            11.1,11.1,10.4,
                            10.9,11.8,10.8,
                            12.8,10.6,10.7),
                   strain = factor(rep(c("OL","BELL","bwb"),7)),
                   density = factor(rep(c(60,80,160,320,640,1280,2560),rep(3,7))))
summary(flies)
##       rsp         strain  density 
##  Min.   : 9.10   BELL:7   60  :3  
##  1st Qu.: 9.30   bwb :7   80  :3  
##  Median :10.40   OL  :7   160 :3  
##  Mean   :10.27            320 :3  
##  3rd Qu.:10.80            640 :3  
##  Max.   :12.80            1280:3  
##                           2560:3

Plot the data.

par(las=1)
interaction.plot(flies$density, flies$strain, flies$rsp, 
                 lwd=2, col=c("blue","red","green3"), lty=1, type="b", 
                 xlab="density", ylab="response")

Note: we can’t fit an interaction. If we try, we have no ability to get an error SS, so we don’t get p-values either.

flies.aovA <- aov(rsp ~ strain * density, data=flies)
summary(flies.aovA) 
##                Df Sum Sq Mean Sq
## strain          2  2.789  1.3943
## density         6 12.543  2.0905
## strain:density 12  4.111  0.3426

Assume an additive model.

flies.aovB = aov(rsp ~ strain + density, data=flies)
summary(flies.aovB) 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## strain       2  2.789  1.3943   4.069 0.04476 * 
## density      6 12.543  2.0905   6.101 0.00395 **
## Residuals   12  4.111  0.3426                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Example 4

Do four different treatments (A, B, C, D) and three different buffer formulations (X, Y, Z) affect the activity of an enzyme assay (measured as activity units)? For each combination of the factor levels we obtain 5 measurements.

activity <- c(15.6,15.3,14.9,16.0,15.9,
              16.4,16.6,16.3,15.3,15.8,
              15.6,15.0,15.4,15.2,14.9,
              14.8,14.9,15.2,14.0,14.6,
              14.6,14.0,15.1,15.1,14.3,
              14.7,14.7,14.8,13.8,13.8,
              15.0,15.5,15.5,14.9,15.7,
              14.7,15.9,15.9,15.2,16.2,
              14.7,15.0,15.4,14.9,14.9,
              15.2,15.4,14.0,14.7,14.4,
              15.2,14.4,14.7,14.2,15.3,
              13.7,15.2,14.0,14.9,13.2)
treat <- factor(rep(LETTERS[1:4],each=15))
buffer <- factor(rep(rep(LETTERS[24:26],each=5),4))
dat <- data.frame(activity,treat,buffer)
summary(dat)
##     activity     treat  buffer
##  Min.   :13.20   A:15   X:20  
##  1st Qu.:14.70   B:15   Y:20  
##  Median :15.00   C:15   Z:20  
##  Mean   :15.01   D:15         
##  3rd Qu.:15.40                
##  Max.   :16.60

Plot the data.

par(las=1)
stripchart(split(dat$activity, list(dat$treat, dat$buffer)), pch=1, vertical=TRUE)
abline(v=c(4.5,8.5), lty="dotted")

stripchart(split(dat$activity, list(dat$buffer, dat$treat)), pch=1,vertical=TRUE)
abline(v=c(3.5,6.5,9.5), lty="dotted")

Interaction plots.

par(las=1)
interaction.plot(dat$buffer, dat$treat, dat$activity)

interaction.plot(dat$treat, dat$buffer, dat$activity)

A two-way fixed-effects ANOVA with an interaction.

aov.fit <- aov(activity~treat*buffer,data=dat)
summary(aov.fit)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## treat         3 12.650   4.217  16.407 1.79e-07 ***
## buffer        2  3.382   1.691   6.580  0.00298 ** 
## treat:buffer  6  0.738   0.123   0.478  0.82107    
## Residuals    48 12.336   0.257                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

A two-way fixed-effects ANOVA without an interaction.

aov.fit <- aov(activity~treat+buffer,data=dat)
summary(aov.fit)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## treat        3 12.650   4.217  17.416 4.92e-08 ***
## buffer       2  3.382   1.691   6.985    0.002 ** 
## Residuals   54 13.074   0.242                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Both factors have a significant effect on enzyme activity. Which treatments differ? Which buffers differ?

TukeyHSD(aov.fit,"treat")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = activity ~ treat + buffer, data = dat)
## 
## $treat
##             diff        lwr        upr     p adj
## B-A -1.053333333 -1.5296113 -0.5770553 0.0000017
## C-A -0.320000000 -0.7962780  0.1562780 0.2935029
## D-A -1.046666667 -1.5229447 -0.5703887 0.0000019
## C-B  0.733333333  0.2570553  1.2096113 0.0008349
## D-B  0.006666667 -0.4696113  0.4829447 0.9999814
## D-C -0.726666667 -1.2029447 -0.2503887 0.0009409
TukeyHSD(aov.fit,"buffer")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = activity ~ treat + buffer, data = dat)
## 
## $buffer
##       diff        lwr         upr     p adj
## Y-X  0.185 -0.1899868  0.55998679 0.4648195
## Z-X -0.385 -0.7599868 -0.01001321 0.0429661
## Z-Y -0.570 -0.9449868 -0.19501321 0.0016229

Example 1 from class: rats and lard ( revisited )

A two-way ANOVA with an additive model.

summary(aov(rsp ~ sex + lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sex          1   3781    3781   2.704    0.135    
## lard         1  61204   61204  43.768 9.75e-05 ***
## Residuals    9  12585    1398                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

A one-way ANOVA using only the factor lard.

summary(aov(rsp ~ lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## lard         1  61204   61204    37.4 0.000113 ***
## Residuals   10  16366    1637                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The same inference using a 2-sample t-test.

subset(lard.dat, lard=="fresh")
##    rsp sex  lard
## 1  709   M fresh
## 3  679   M fresh
## 5  699   M fresh
## 7  657   F fresh
## 9  594   F fresh
## 11 677   F fresh
subset(lard.dat, lard=="rancid")
##    rsp sex   lard
## 2  592   M rancid
## 4  538   M rancid
## 6  476   M rancid
## 8  508   F rancid
## 10 505   F rancid
## 12 539   F rancid
x <- subset(lard.dat, lard=="fresh")$rsp
x
## [1] 709 679 699 657 594 677
y <- subset(lard.dat, lard=="rancid")$rsp
y
## [1] 592 538 476 508 505 539
t.test(x, y, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  x and y
## t = 6.1153, df = 10, p-value = 0.0001134
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   90.7912 194.8755
## sample estimates:
## mean of x mean of y 
##  669.1667  526.3333
t.test(x, y, var.equal=TRUE)$p.val
## [1] 0.0001133845
t.test(x, y, var.equal=TRUE)$stat
##        t 
## 6.115285
t.test(x, y, var.equal=TRUE)$stat^2
##        t 
## 37.39671

These are the same statistics as in the ANOVA using only the factor lard.

summary(aov(rsp ~ lard, data=lard.dat))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## lard         1  61204   61204    37.4 0.000113 ***
## Residuals   10  16366    1637                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1