Methods in Biostatistics 2 (140.652)

Sample size and power calculations

Power in case pop’n SDs are known

# population SDs
sigA <- 2
sigB <- 4

# sample sizes
n <- 5
m <- 15

# standard deviation of Xbar - Ybar
se <- sqrt(sigA^2/n + sigB^2/m)

# difference between population means
delta <- 3

# critical value for alpha = 0.05
C <- qnorm(0.975)

# the power 
1 - pnorm(C - delta/se) + pnorm(-C - delta/se)
## [1] 0.5932266

Power in case pop’n SDs are not known but are equal

# same parameters as above, but we don't know sigA and sigB

# critical value for alpha = 0.05
C <- qt(0.975, n+m-2)

# the power 
1 - pt(C, n+m-2, delta/se) + pt(-C, n+m-2, delta/se)
## [1] 0.5469743

Examples using power.t.test

# A
power.t.test(n=10, delta=5, sd=10)
## 
##      Two-sample t test power calculation 
## 
##               n = 10
##           delta = 5
##              sd = 10
##       sig.level = 0.05
##           power = 0.1838375
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
# B
power.t.test(delta=5, sd=10, power=0.8)
## 
##      Two-sample t test power calculation 
## 
##               n = 63.76576
##           delta = 5
##              sd = 10
##       sig.level = 0.05
##           power = 0.8
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
# C
power.t.test(delta=5, sd=10, power=0.8, alternative="one.sided")
## 
##      Two-sample t test power calculation 
## 
##               n = 50.1508
##           delta = 5
##              sd = 10
##       sig.level = 0.05
##           power = 0.8
##     alternative = one.sided
## 
## NOTE: n is number in *each* group

Computer simulation to estimate power in the case that pop’n SDs are neither known nor equal

# A function to simulate normal data and get the p-value from the t-test.

# n,m = sample sizes
# sigA,sigB = population SDs
# delta = difference between population means

sim <- function(n, m, sigA, sigB, delta){
  x <- rnorm(n, delta, sigA)
  y <- rnorm(m, 0, sigB)
  t.test(x,y)$p.value
}

# For n=5, m=10, sigA = 1, sigB = 2, and delta = 0, 0.5, 1.0, ..., 2.5, run 1000 simulations and get the p-value:

delta <- c(0.0, 0.5, 1.0, 1.5, 2.0, 2.5)
p <- matrix(nrow=1000,ncol=length(delta))
for(i in 1:length(delta)){
  for(j in 1:nrow(p)){
    p[j,i] <- sim(5, 10, 1, 2, delta[i])
  }
}

# Estimate the power
pow <- apply(p, 2, function(a) mean(a<0.05))
print(pow)
## [1] 0.047 0.084 0.215 0.417 0.666 0.840
plot(delta,100*pow,xlab=expression(Delta),ylab="power [ % ]",ylim=c(0,100),pch=20,col="blue",cex=1.5)
abline(h=seq(0,100,10),lty="dotted",col="lightgrey")

End of code