##needs to run where NSB experiment data is library(affy) library(geneplotter) pd <- read.phenoData("pdata.txt",sep="\t",as.is=TRUE) pd@varLabels <- list(label="0 means no label, 1 means label",sample_type="type of nucleic acid hybridized",filename="Original filenames") Data <- ReadAffy(filenames=pd$filename,phenoData=pd,description="miame.txt") bitmap("figure-01.png",res=300,width=9,pointsize=20) mypar(1,1) boxplot(Data,col=1:6) dev.off() bitmap("figure-02.png",res=300,pointsize=20) mypar(1,1) hist(Data[,c(2,4)],col=1:6,lwd=2) dev.off() ###SpikeIn library(SpikeIn) data(SpikeIn95) Index <- which(probeNames(SpikeIn95)%in%colnames(pData(SpikeIn95))) pms <- pm(SpikeIn95)[Index,] pns <- probeNames(SpikeIn95)[Index] nominal <- sapply(1:ncol(pms),function(i) unlist(pData(SpikeIn95)[i,pns])) avg=2^tapply(log2(as.vector(pms)),as.vector(nominal),mean) bitmap("figure-03.png",res=300,pointsize=20) mypar(1,1) plot(as.numeric(names(avg)),avg,xlab="Nominal Concentration",ylab="Observered Average Intensity",type="b",xlim=c(0,8),ylim=c(0,410)) dev.off() bitmap("figure-04.png",res=300,pointsize=20) mypar(1,1) plot(log2(as.numeric(names(avg))),log2(avg),xlab="Nominal Log Concentration",ylab="Observered Average Log Intensity",type="b",xlim=log2(c(0.25,8)),ylim=log2(c(180,410))) dev.off() Index <- which(pns==colnames(pData(SpikeIn95))[1]) colIndex <- c(1:12,13,17) ##so that each conc just once x=nominal[Index[1],colIndex] x[1] <- 0.125 ##so that it shows up in log scale y=t(pms[Index,colIndex]) bitmap("figure-05.png",res=300,pointsize=20) mypar(1,1) matplot(log2(x),log2(y),xlab="Nominal Log Concentration",ylab="Observed log concentration",type="l",lwd=2,xaxt="n") axis(1,at=c(-3,seq(-2,10,2)),label=c("log(0)",as.character(seq(-2,10,2)))) dev.off() data(SpikeIn133) PMs=log2(pm(SpikeIn133[,1])) MMs=log2(mm(SpikeIn133[,1])) bitmap("figure-06.png",res=300,pointsize=20) mypar(1,1) smoothScatter(MMs,PMs) abline(0,1) dev.off() MM.Larger.Than.9 <- which(MMs>9) bitmap("figure-07.png",res=300,pointsize=20) mypar(1,1) smoothScatter(MMs[MM.Larger.Than.9],PMs[MM.Larger.Than.9]) abline(0,1) dev.off() ## PMs=log2(pm(Data[,4])) MMs=log2(mm(Data[,4])) ##the following 2 take a long time bitmap("figure-08.png",res=300,pointsize=20) mypar(1,1) plot(MMs,PMs,pch=".") abline(0,1) dev.off() bitmap("figure-09.png",res=300,pointsize=20) mypar(1,1) smoothScatter(MMs,PMs) abline(0,1) dev.off() ###Simultion library(MASS) mus <- 2^c(runif(10000,0,10),runif(1000,10,12),runif(800,12,14),runif(400,14,16)) N <- length(mus) bg <- 2^mvrnorm(N,c(5,5),matrix(2*c(1,.6,.6,1),2,2)) eps <- 2^mvrnorm(N,rep(0,4),diag(4)*.05) delt <- 2^mvrnorm(N,rep(0,2),diag(2)*.03) opt <- 32 PM1 <- opt+mus*delt[,1]+bg[,1]*eps[,1] MM1 <- opt+bg[,2]*eps[,2] PM2 <- opt+mus*delt[,2]+bg[,1]*eps[,3] MM2 <- opt+bg[,2]*eps[,4] for(i in 1:5){ k=seq(0,1,len=5)[i] bitmap(paste("figure-10-",i,".png",sep=""),width=12,res=300,pointsize=20) mypar(1,2) Index = which(PM1-k*MM1 > 0 & PM2-k*MM2 > 0) e1=log2(PM1[Index]-k*MM1[Index]) e2=log2(PM2[Index]-k*MM2[Index]) A <- (e1+e2)/2 M <- e2-e1 plot(A,M,ylim=c(-4,4),xlim=c(0,16),cex=.25,pch=16,main="Precision") fc2Index <- which(abs(M)>1) points(A[fc2Index],M[fc2Index],cex=.25,pch=16,col="red") nominal=mus[Index] y=e1[order(nominal)[seq(1,length(nominal),len=500)]] x=log2(nominal)[order(nominal)[seq(1,length(nominal),len=500)]] fit1 <- loess(y~x) plot(log2(nominal),e1,xlim=log2(range(nominal)),ylim=log2(range(nominal)),cex=.25,pch=16,main="Accuracy") lines(x,fit1$fitted,col=1,lwd=2) abline(0,1,col=2,lwd=2,lty=2) dev.off() } if(!file.exists("esets.rda")){ ##this takes a long time rma95 <- rma(SpikeIn95) mas95 <- mas5(SpikeIn95) save(rma95,mas95,file="esets.rda") } else{load("esets.rda")} exprs(mas95)<-log2(exprs(mas95)) bitmap("figure-11.png",res=300,pointsize=20,width=12) mypar(1,2) whats <- c("mas95","rma95") mains <- c("Similar to k=1","Similar to k=0") for(i in 1:2){ x=get(whats[i]) A<- (exprs(x)[,13]+exprs(x)[,17])/2 M<- exprs(x)[,17]-exprs(x)[,13] plot(A,M,ylim=c(-4,4),xlim=c(2,16),cex=.25,pch=16,main=mains[i]) Index <- which(abs(M)>1) points(A[Index],M[Index],cex=.25,pch=16,col="red") } dev.off() Index <- match(colnames(pData(mas95)),geneNames(mas95)) nominal <- unlist(log2(pData(mas95)[17,]/pData(mas95)[13,])) whats <- c("mas95","rma95") for(i in 1:2){ x=get(whats[i]) A<- (exprs(x)[,13]+exprs(x)[,17])/2 M<- exprs(x)[,17]-exprs(x)[,13] bitmap(paste("figure-11-",i,".png",sep=""),res=300,pointsize=20) mypar(1,1) smoothScatter(A,M,ylim=c(-11,5),xlim=c(2,16),cex=.25,pch=16) text(A[Index],M[Index],seq(along=Index),col=c(2,4,6)[as.numeric(as.factor(nominal))]) dev.off() } ##this will be much much easier with oligo package library(gcrma) cdfname <- cdfName(Data) cleancdf <- cleancdfname(cdfname, addcdf = FALSE) probepackagename <- paste(cleancdf, "probe", sep = "") library(probepackagename,character.only=TRUE) p <- get(probepackagename) cleancdf <- cleancdfname(cdfname, addcdf = FALSE) cdfpackagename <- paste(cleancdf, "cdf", sep = "") library(cdfpackagename,character.only=TRUE) tmp <- get("xy2i", paste("package:", cdfpackagename, sep = "")) pmIndex <- unlist(pmindex(Data)) subIndex <- match(tmp(p$x, p$y), pmIndex) pmSequence <- vector("character",length(pmIndex)) pmSequence[subIndex] <- p$sequence Index <-sample(length(pmIndex),25000) Index <- setdiff(Index,which(pmSequence=="")) seqs <- pmSequence[Index] mat <- .Call("gcrma_getSeq2",paste(seqs,collapse=""),length(seqs),PACKAGE="gcrma") colnames(mat) <- paste(rep(c("A","C","G"),rep(25,3))) PMs <- log2(pm(Data)[Index,4]) fit1 <- lm(PMs~mat) seqeffect = matrix(0,25,4) seqeffect[,1] <- fit1$coef[2:26]; seqeffect[,2] <- fit1$coef[27:51]; seqeffect[,3] <- fit1$coef[52:76]; seqeffect[,4] <- rep(0,25) seqeffect<- sweep(seqeffect,1,rowMeans(seqeffect)) bitmap("figure-12.png",res=300,pointsize=20,width=8) mypar(1,1) matplot(1:25,seqeffect,pch=c("A","C","G","T"),col=brewer.pal(8,"Dark2"),type="b",ylim=max(abs(seqeffect))*c(-1,1),xlab="Position",ylab="Effect") dev.off() cdfname <- cdfName(SpikeIn133) cleancdf <- cleancdfname(cdfname, addcdf = FALSE) probepackagename <- paste(cleancdf, "probe", sep = "") library(probepackagename,character.only=TRUE) p <- get(probepackagename) cleancdf <- cleancdfname(cdfname, addcdf = FALSE) cdfpackagename <- paste(cleancdf, "cdf", sep = "") library(cdfpackagename,character.only=TRUE) tmp <- get("xy2i", paste("package:", cdfpackagename, sep = "")) pmIndex <- unlist(pmindex(SpikeIn133)) subIndex <- match(tmp(p$x, p$y), pmIndex) pmSequence <- vector("character",length(pmIndex)) pmSequence[subIndex] <- p$sequence PMs=log2(pm(SpikeIn133[,1])) MMs=log2(mm(SpikeIn133[,1])) MM.Larger.Than.9 <- which(MMs>9) pmSequence <- pmSequence[MM.Larger.Than.9] PMs <- PMs[MM.Larger.Than.9] MMs <- MMs[MM.Larger.Than.9] tmp <- substr(pmSequence,13,13) CTIndex <- which(tmp%in%c("C","T")) AGIndex <- which(tmp%in%c("A","G")) bitmap("figure-13.png",res=300,pointsize=20) mypar(1,1) smoothScatter(MMs[CTIndex],PMs[CTIndex]) abline(0,1) dev.off() bitmap("figure-14.png",res=300,pointsize=20) mypar(1,1) smoothScatter(MMs[AGIndex],PMs[AGIndex]) abline(0,1) dev.off()