###################################################################### # Computer notes Biostatistics 140.752 # Homework 5 Johns Hopkins University ###################################################################### # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # In R for Windows, you may wish to open this file from the menu bar # (File:Display file); you can then easily copy commands into the # command window. (Use the mouse to highlight one or more lines; then # right-click and select "Paste to console".) ###################################################################### ############################################### # 1 ############################################### Y=c(75,67,70,75,65,71,67,67,76,68, 57,58,60,59,62,60,60,57,59,61, 58,61,56,58,57,56,61,60,57,58, 58,59,58,61,57,56,58,57,57,59, 62,66,65,63,64,62,65,65,62,67) X=rep(c("C","G","F","G+F","S"),rep(10,5)) fc=unique(X);k=length(fc);my.fit=rep(0,k);n=rep(0,k) for(j in 1:k){ my.fit[j]=mean(subset(Y,X==fc[j])) n[j]=length(subset(Y,X==fc[j]))} mf=rep(my.fit,n);s2=sum((Y-mf)^2)/(sum(n)-k) cv=rep(0,4);level=0.95 cv[1]=qt(1-(1-level)/2,sum(n)-k) cv[2]=qt(1-(1-level)/(2*k),sum(n)-k) cv[3]=qt(0.5*(1+level^(1/k)),sum(n)-k) cv[4]=sqrt(k*qf(level,k,sum(n)-k)) z=as.list(1:k);names(z)=unique(X) for (j in 1:k){ z2=matrix(0,ncol=3,nrow=4) for (i in 1:4){ z2[i,]=my.fit[j]+c(-1,0,1)*cv[i]*sqrt(s2/n[j])} z2=as.data.frame(z2) names(z2)=c("lower","fitted","upper") row.names(z2)=c("unadjusted","bonferroni","max.modulus","scheffe") z[[j]]=round(z2,rnd)} print(z) ############################################### # 2a ############################################### Y=c( 541,524,561,414,410,457,344,467,464,498,580,471,525,508,566,635,603,714,865, 640,649,540,464,547,460,566,577,631,574,534,571,554,577,628,487,644,640,704, 648,968,587,699,632,591,782,510,610,524) X=c( 52.5,57.2,58.0,52.9,54.4,57.1,45.1,55.3,52.9,55.2,53.0,52.5,57.4,54.5,60.8, 58.6,57.2,54.0,72.4,67.7,66.3,60.2,51.1,51.7,55.1,54.4,54.8,57.9,56.3,49.3, 51.8,51.3,57.8,54.7,48.7,62.9,56.6,58.6,66.3,67.2,62.6,56.3,60.3,50.8,67.2, 57.1,62.3,59.3) # the mylm() function from HW#2 mylm=function(x,y,hatmat=F,addint=F){ if(addint) x=cbind(1,x) if(det(t(x)%*%x)==0) stop("The design matrix is rank deficient!") beta=as.vector(solve(t(x)%*%x)%*%t(x)%*%y) rss=as.vector((t((y-x%*%beta))%*%(y-x%*%beta))) sigma=sqrt(rss/(dim(x)[1]-dim(x)[2])) varbeta=sigma^2*solve(t(x)%*%x) hat=x%*%solve(t(x)%*%x)%*%t(x) fitted=as.vector(x%*%beta) residuals=as.vector(y-x%*%beta) if(hatmat){ z=list(beta=beta,sigma=sigma,varbeta=varbeta,fitted=fitted, residuals=residuals,hat=hat)} else{ z=list(beta=beta,sigma=sigma,varbeta=varbeta,fitted=fitted, residuals=residuals)} return(z)} # this function calculates the F statistics for various # b0/b1 combinations my.image=function(b0,b1,X,Y,A,ai=T){ n0=length(b0);n1=length(b1);n=length(Y);d=dim(A)[1] if(ai) X=cbind(rep(1,n),X) mlm=mylm(X,Y) bh=mlm$beta;s2=(mlm$sigma)^2 m2=solve(A%*%solve(t(X)%*%X)%*%t(A)) z=matrix(ncol=n0,nrow=n1) for (j in 1:n0){ for (k in 1:n1){ b=c(b0[j],b1[k]) m1=A%*%(bh-b) z[j,k]=drop(t(m1)%*%m2%*%m1/(d*s2))}} return(z)} # this function calculates unadjusted CI ci.lm=function(X,Y,level=0.95,ai=T){ n=length(Y);lv=1-(1-level)/2 if(ai) X=cbind(rep(1,n),X) if(is.vector(X)) k=1 if(is.matrix(X)) k=dim(X)[2] mlm=mylm(X,Y) z=matrix(nrow=k,ncol=3) bt=qt(lv,n-k);be=sqrt(diag(mlm$varbeta)) z[,2]=mlm$beta z[,1]=mlm$beta-bt*be z[,3]=mlm$beta+bt*be z=as.data.frame(z) names(z)=c("lower","fitted","upper") row.names(z)=0:(k-1) return(z)} # this function calculates the Scheffe CI ci.sch=function(X,Y,h,level=0.95,ai=T){ n=length(Y) if(ai) X=cbind(rep(1,n),X) if(is.vector(X)) k=1 if(is.matrix(X)) k=dim(X)[2] mlm=mylm(X,Y) bh=mlm$beta;s2=(mlm$sigma)^2 mp=drop(h%*%bh);bm=solve(t(X)%*%X) cl=sqrt(2*qf(level,2,n-2)*s2*drop(h%*%bm%*%h)) z=rep(0,3);z[1]=mp-cl;z[2]=mp;z[3]=mp+cl return(z)} # the actual F statistics for the joint confidence region in the example A=diag(2);b0=seq(-600,100,length=151);b1=seq(8,20,length=151) z=my.image(b0,b1,X,Y,A);z=log(z);z[z<0]=0 # calculate the unadjusted and simultaneous scheffe confidence intervals ci1=ci.lm(X,Y);h=c(0,1);ci2a=ci.sch(X,Y,h);h=c(1,0);ci2b=ci.sch(X,Y,h) # the figure with unadjusted CI par(las=1,pty="s") image(b0,b1,z,col=grey(seq(0.7,0.9,0.01)), xlab=expression(beta[0]),ylab=expression(beta[1])) contour(b0,b1,z,drawlabels=F,add=T,levels=c(log(qf(0.95,2,46)))) points(ci1[1,2],ci1[2,2],cex=1,pch=15) text(-170,15,expression(paste("( ",hat(beta)[0]," , ",hat(beta)[1]," )"))) abline(v=ci1[1,c(1,3)],lty=2);abline(h=ci1[2,c(1,3)],lty=2) # the figure with Scheffe CI par(las=1,pty="s") image(b0,b1,z,col=grey(seq(0.7,0.9,0.01)), xlab=expression(beta[0]),ylab=expression(beta[1])) contour(b0,b1,z,drawlabels=F,add=T,levels=c(log(qf(0.95,2,46)))) points(ci1[1,2],ci1[2,2],cex=1,pch=15) text(-170,15,expression(paste("( ",hat(beta)[0]," , ",hat(beta)[1]," )"))) abline(h=ci2a[c(1,3)],lty=2);abline(v=ci2b[c(1,3)],lty=2) ############################################### # 2b ############################################### # this function calculates the values for the confidence band my.band=function(x,X,Y,level=0.95){ n=length(Y);k=length(x) x=cbind(rep(1,k),x) X=cbind(rep(1,n),X) mlm=mylm(X,Y) bh=mlm$beta;s2=(mlm$sigma)^2 bm=solve(t(X)%*%X) fc=2*qf(level,2,n-2)*s2 z=matrix(ncol=3,nrow=k) for (j in 1:k){ xc=x[j,] cl=sqrt(fc*drop(xc%*%bm%*%xc)) z[j,2]=drop(xc%*%bh) z[j,1]=drop(xc%*%bh)-cl z[j,3]=drop(xc%*%bh)+cl} return(z)} # calculate the values for the auto data x=seq(40,75,1);rgs=my.band(x,X,Y) # generate the plot plot(X,Y,type="n") polygon(c(x,rev(x)),c(rgs[,1],rev(rgs[,3])),col=grey(0.9)) points(X,Y,pch=15) abline(lsfit(X,Y),lty=2) ############################################### # 3 ############################################### n2=20;s2sq=1;n1=n2*2^(-1:3);s1sq=s2sq*2^(-3:3);hm=10000 myt=matrix(0,length(n1),length(s1sq)) for (i in 1:length(n1)){ print(i) for (j in 1:length(s1sq)){ x1=matrix(sqrt(s1sq[j])*rnorm(hm*n1[i]),nrow=hm) x2=matrix(rnorm(n2*hm),nrow=hm) m1=apply(x1,1,mean) m2=apply(x2,1,mean) var1=apply(x1,1,var) var2=apply(x2,1,var) mys=((n1[i]-1)*var1+(n2-1)*var2)/(n1[i]+n2-2) z=abs((m1-m2)/sqrt(mys*(1/n1[i]+1/n2))) myt[i,j]=length(subset(z,z>qt(0.975,n1[i]+n2-2)))}} print(round(myt/hm,2)) ################## # End of hw5.R ##################