######################################### attach("/home2/biostats/fdominic/teaching/LDA/datasets") # within the attach command, you need to specify the locations # of the data sets ######################################################### # EXPLORATORY DATA ANALYSIS # SPLUS FUNCTIONS WRITTEN BY RAFAEL IRIZARRY ######################################################### ## FIGURE 1.3 SPRUCE TREES TREES <- matrix(scan("/home2/biostats/fdominic/teaching/LDA/datasets/trees.data"),byrow=T,ncol=13) TIMES <- TREES[1,] TREES <- TREES[-1,] dimnames(TREES)[[2]] <- list(as.character(TIMES)) ###27,26,12,13 b <- c(1,28,55,67) e <- c(27,54,66,79) titles <- c("Ozone 1","Ozone 2","Control 1","Control 2") postscript("/home2/biostats/fdominic/teaching/LDA/fig/fig1.3.ps") par(mfrow=c(2,2),oma=c(0,0,2,0)) for (i in 1:4){ plot(TIMES,TREES[1,],ylim=range(TREES),ylab="Log-Size",main=titles[i],type="n") apply(TREES[b[i]:e[i],],1,function(y) { lines(TIMES[1:5],y[1:5]) lines(TIMES[6:13],y[6:13]) }) } mtext(side=3,cex=2,text="Figure 1.3: Sitka Spruce Tree Growth",outer=T) dev.off() ####CD4 data CD4.data <- read.table("/home2/biostats/fdominic/teaching/LDA/datasets/cd4.data",col.names=c("Time","CD4","Age","Packs","Drugs","Sex","Cesd","ID")) attach(CD4.data) postscript("/home2/biostats/fdominic/teaching/LDA/fig/fig3.4.ps") plot(Time,CD4,type="n",main="All connected") sapply(ID,function(i){ lines(Time[ID==i],CD4[ID==i]);NULL}) dev.off() postscript("/home2/biostats/fdominic/teaching/LDA/fig/fig3.5A.ps") plot(Time,CD4,pch=".",main="5 random individuals") sapply(sample(ID,5),function(i){ lines(Time[ID==i],CD4[ID==i]);NULL}) dev.off() ###RESIDUALS FOR MOVING AVERAGE (CONTINUES FIGURE 13) ##Actually I cheat and use loess postscript("/home2/biostats/fdominic/teaching/LDA/fig/fig3.5B.ps") Residuals <- loess(CD4~Time,span=0.25,degree=1)$res plot(Time,Residuals,pch=".",main="Figure 3.5: Running mean residuals") sapply(sample(ID,5),function(i){ lines(Time[ID==i],Residuals[ID==i]);NULL}) dev.off() ##########ZAP PLOT ############################## postscript("/home2/biostats/fdominic/teaching/LDA/fig/fig3.6.ps") aux <- tapply(CD4,ID,function(x) x[1]) aux <- names(aux)[order(aux)] aux <- as.numeric(aux[c(1,round(c(.25,.5,.75,1)*length(aux)))]) plot(Time,CD4,pch=".",main="``Quantiles''") sapply(1:5,function(i){ lines(Time[ID==aux[i]],CD4[ID==aux[i]]) text((Time[ID==aux[i]])[1],(CD4[ID==aux[i]])[1],c("0%","25%","50%","75%","100%")[i]) }) dev.off() ### MOVING AVERAGE EXPLANTION PLOT postscript("/home2/biostats/fdominic/teaching/LDA/fig/fig3.10.ps") par(mfrow=c(1,1)) o <- sample(1:length(Time),400) plot(Time[o],CD4[o],pch=".",main="Figure 3.10: Running Mean") b <- c(-2,1,3) - 0.5 e <- b + 1 for(i in 1:length(b)){ abline(v=c(b[i],e[i]),pch=2) oo <- o[Time[o]>b[i] & Time[o]