For the example, we take 100 random draws from a standard Normal distribution.
set.seed(1)
n <- 100
y <- rnorm(n)
plot(sort(y))
The expected normal quantiles. Note that other possibilities have been proposed as well, for example qnorm((1:n)/(n+1)).
p <- ((1:n)-0.5)/n
p
## [1] 0.005 0.015 0.025 0.035 0.045 0.055 0.065 0.075 0.085 0.095 0.105 0.115 0.125 0.135 0.145 0.155 0.165
## [18] 0.175 0.185 0.195 0.205 0.215 0.225 0.235 0.245 0.255 0.265 0.275 0.285 0.295 0.305 0.315 0.325 0.335
## [35] 0.345 0.355 0.365 0.375 0.385 0.395 0.405 0.415 0.425 0.435 0.445 0.455 0.465 0.475 0.485 0.495 0.505
## [52] 0.515 0.525 0.535 0.545 0.555 0.565 0.575 0.585 0.595 0.605 0.615 0.625 0.635 0.645 0.655 0.665 0.675
## [69] 0.685 0.695 0.705 0.715 0.725 0.735 0.745 0.755 0.765 0.775 0.785 0.795 0.805 0.815 0.825 0.835 0.845
## [86] 0.855 0.865 0.875 0.885 0.895 0.905 0.915 0.925 0.935 0.945 0.955 0.965 0.975 0.985 0.995
x <- qnorm(p)
x
## [1] -2.57582930 -2.17009038 -1.95996398 -1.81191067 -1.69539771 -1.59819314 -1.51410189 -1.43953147
## [9] -1.37220381 -1.31057911 -1.25356544 -1.20035886 -1.15034938 -1.10306256 -1.05812162 -1.01522203
## [17] -0.97411388 -0.93458929 -0.89647336 -0.85961736 -0.82389363 -0.78919165 -0.75541503 -0.72247905
## [25] -0.69030882 -0.65883769 -0.62800601 -0.59776013 -0.56805150 -0.53883603 -0.51007346 -0.48172685
## [33] -0.45376219 -0.42614801 -0.39885507 -0.37185609 -0.34512553 -0.31863936 -0.29237490 -0.26631061
## [41] -0.24042603 -0.21470157 -0.18911843 -0.16365849 -0.13830421 -0.11303854 -0.08784484 -0.06270678
## [49] -0.03760829 -0.01253347 0.01253347 0.03760829 0.06270678 0.08784484 0.11303854 0.13830421
## [57] 0.16365849 0.18911843 0.21470157 0.24042603 0.26631061 0.29237490 0.31863936 0.34512553
## [65] 0.37185609 0.39885507 0.42614801 0.45376219 0.48172685 0.51007346 0.53883603 0.56805150
## [73] 0.59776013 0.62800601 0.65883769 0.69030882 0.72247905 0.75541503 0.78919165 0.82389363
## [81] 0.85961736 0.89647336 0.93458929 0.97411388 1.01522203 1.05812162 1.10306256 1.15034938
## [89] 1.20035886 1.25356544 1.31057911 1.37220381 1.43953147 1.51410189 1.59819314 1.69539771
## [97] 1.81191067 1.95996398 2.17009038 2.57582930
plot(x, p)
abline(h=c(0,1))
axis(4, p, rep("",n))
The qq-plot ‘from scratch’, and the built-in function.
par(mfrow=c(1,2))
plot(x, sort(y), main="from scratch")
qqnorm(y)
Fancier plot. Re-order the factor levels (strain) for plotting.
library(SPH.140.615)
il10 <- cbind(il10, logIL10=log10(il10$IL10))
il10$Strain <- factor(il10$Strain, levels=rev(c("A","B6",as.character(c(1,2,4:8,10:15,17:19,24:26)))))
Calculate the strain means.
m <- tapply(il10$IL10, il10$Strain, mean)
logm <- tapply(il10$logIL10, il10$Strain, mean)
Plot the data. Note: ‘segments’ adds line segments at the strain means, ‘abline’ adds horizontal lines to indicate the groups, and ‘expression’ allows you to put fancy things in the axis labels.
par(las=1, mfrow=c(1,2))
stripchart(il10$IL10~il10$Strain,
method="jitter", pch=1, ylab="Strain", xlab="IL10 response", jitter=0.2)
segments(m, (1:21)-0.3, m, (1:21)+0.3, lwd=2, col="blue")
abline(h=1:21, lty=2, col="gray")
stripchart(il10$logIL10~il10$Strain,
method="jitter", pch=1, ylab="Strain", xlab=expression(paste(log[10]," IL10 response")), jitter=0.2)
segments(logm, (1:21)-0.3, logm, (1:21)+0.3, lwd=2, col="blue")
abline(h=1:21, lty=2, col="gray")
You can generate a bunch of diagnostic plots using the ‘plot’ function.
aov.il10 <- aov(IL10 ~ Strain, data=il10)
aov.logil10 <- aov(logIL10 ~ Strain, data=il10)
par(mfrow=c(2,2))
plot(aov.il10)
plot(aov.logil10)
The brute-force method for IL10.
s <- tapply(il10$IL10/1000, il10$Strain, sd)
s
## 26 25 24 19 18 17 15 14 13 12 11
## 0.1734159 0.3655768 0.8545685 1.2901688 0.6531433 0.5424332 1.0700615 0.1712304 0.4702388 1.1606007 0.5594971
## 10 8 7 6 5 4 2 1 B6 A
## 0.6338465 0.8701644 0.2195076 1.2141755 1.6383134 1.6405483 0.4498755 1.9180254 0.7141536 0.5934426
n <- table(il10$Strain)
n
##
## 26 25 24 19 18 17 15 14 13 12 11 10 8 7 6 5 4 2 1 B6 A
## 5 5 10 5 5 5 5 5 10 10 5 5 10 5 5 5 10 10 9 9 8
spsq <- sum((n-1)*s^2)/sum(n-1)
spsq
## [1] 0.9927934
xsq <- (sum(n)-length(n)) * log(spsq) - sum((n-1)*log(s^2))
xsq
## [1] 79.8956
K <- 1 + (sum(1/(n-1)) - 1/sum(n-1))/(3*(length(n)-1))
K
## [1] 1.067525
stat <- xsq/K
stat
## [1] 74.84187
pchisq(stat, length(n)-1, lower.tail=FALSE)
## [1] 2.894759e-08
Using the built-in function.
bartlett.test(il10$IL10 ~ il10$Strain)
##
## Bartlett test of homogeneity of variances
##
## data: il10$IL10 by il10$Strain
## Bartlett's K-squared = 74.842, df = 20, p-value = 2.895e-08
The brute-force method for log10(IL10).
logs <- tapply(il10$logIL10, il10$Strain, sd)
logs
## 26 25 24 19 18 17 15 14 13 12
## 0.11822716 0.29026204 0.27849516 0.25272549 0.16997770 0.19775197 0.35512698 0.05671307 0.20586828 0.34798329
## 11 10 8 7 6 5 4 2 1 B6
## 0.27747688 0.41181170 0.24434276 0.08611903 0.50270177 0.31693434 0.28082376 0.18508639 0.34149839 0.22880190
## A
## 0.22178466
logspsq <- sum((n-1)*logs^2)/sum(n-1)
logspsq
## [1] 0.07429708
logxsq <- (sum(n)-length(n)) * log(logspsq) - sum((n-1)*log(logs^2))
logxsq
## [1] 33.99216
logstat <- logxsq/K
logstat
## [1] 31.84202
pchisq(logstat, length(n)-1, lower.tail=FALSE)
## [1] 0.04501105
Using the built-in function.
bartlett.test(il10$logIL10 ~ il10$Strain)
##
## Bartlett test of homogeneity of variances
##
## data: il10$logIL10 by il10$Strain
## Bartlett's K-squared = 31.842, df = 20, p-value = 0.04501