Methods in Biostatistics 2 (140.652)

Non-parametric test

xA <- c(27.0,54.6,33.5,27.6,46.0,22.2,44.2,17.3,15.9,32.8)
xB <- c(17.4,20.5,13.9,14.8,27.9,10.6,33.7,15.4,25.0,24.1)

stripchart(list(xA,xB),vertical=T,pch=21,xlim=c(0.5,2.5))
segments(0.9,mean(xA),1.1,mean(xA),lwd=2,col="red")
segments(1.9,mean(xB),2.1,mean(xB),lwd=2,col="blue")
abline(h=mean(c(xA,xB)),lty=2)

t.test(xA,xB)
## 
##  Welch Two Sample t-test
## 
## data:  xA and xB
## t = 2.5336, df = 14.224, p-value = 0.02364
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   1.822368 21.737632
## sample estimates:
## mean of x mean of y 
##     32.11     20.33
wilcox.test(xA,xB)
## 
##  Wilcoxon rank sum test
## 
## data:  xA and xB
## W = 78, p-value = 0.03546
## alternative hypothesis: true location shift is not equal to 0

Let’s see how one outlier influences the test statistics!

sort(xA)
##  [1] 15.9 17.3 22.2 27.0 27.6 32.8 33.5 44.2 46.0 54.6
sort(xA,decreasing=T)
##  [1] 54.6 46.0 44.2 33.5 32.8 27.6 27.0 22.2 17.3 15.9
order(xA)
##  [1]  9  8  6  1  4 10  3  7  5  2
order(xA,decreasing=T)
##  [1]  2  5  7  3 10  4  1  6  8  9
xAnew <- xA
xAnew[2] <- 99.9
xA
##  [1] 27.0 54.6 33.5 27.6 46.0 22.2 44.2 17.3 15.9 32.8
xAnew
##  [1] 27.0 99.9 33.5 27.6 46.0 22.2 44.2 17.3 15.9 32.8
stripchart(list(xAnew,xB),vertical=T,pch=21,xlim=c(0.5,2.5))
segments(0.9,mean(xAnew),1.1,mean(xAnew),lwd=2,col="red")
segments(1.9,mean(xB),2.1,mean(xB),lwd=2,col="blue")
abline(h=mean(c(xAnew,xB)),lty=2)

t.test(xA,xB)$p.value
## [1] 0.02364417
t.test(xAnew,xB)$p.value # why is the p-value now > 0.05?
## [1] 0.06869718
wilcox.test(xA,xB)$p.value
## [1] 0.03546299
wilcox.test(xAnew,xB)$p.value # why are the p-values the same?
## [1] 0.03546299

Permutation test

Let’s write a function first, to make things easier. Note that the input are two vectors, generically called xa and xb.

my.perm <- function(xa,xb){
  # combine the two vectots
  x <- c(xa,xb)
  # calculate the total length of observations
  n <- length(x)
  # calculate the length of the first vector
  na <- length(xa)
  # shuffle the order of the observations across grups
  z <- x[sample(1:n)]
  # return the t test statistic from the shuffled data
  return(t.test(z[1:na],z[-(1:na)],var.equal=T)$stat)
}

Example:

# Let's try one with our xA and xB
my.perm(xA,xB)
##           t 
## -0.06278934
# And onther one
my.perm(xA,xB)
##         t 
## 0.6157433
# And onther one
my.perm(xA,xB)
##           t 
## -0.06278934
# Let's do 1000 shuffles
nit <- 1000
# Create a vector of length nit (1000)
myres <- rep(NA,nit)
# Fill in the slots of that vector with the permutation test t statistics
for(j in 1:nit) myres[j] <- my.perm(xA,xB)

# Plot the data
hist(myres,breaks=21,col="lightgrey")
# Add the observed test statistic to the plot
tst <- t.test(xA,xB,var.equal=T)$stat 
abline(v=tst,col="red")

# Calculate the permutation p-value
mean(abs(myres)>abs(tst))
## [1] 0.029

End of code