################################################################################ ## Copyright (C) 2012 Johns Hopkins University ################################################################################ remove.events <- function(tab, gap.symptom, gap.rf, cause, outcome) { #All rows except those for reflux and 'OA' events must already have been removed (in simp). stopifnot(all(tab$event%in%c(cause,outcome))) tab = tab[order(tab$seconds),] ##Remove all OA events that happen within 'gap' seconds after the previous OA event with nothing else happening in between: ap.inds = sort(which(tab$event==outcome)) iba = diff(tab$seconds[ap.inds]) ibi = diff(ap.inds) mer = which(iba<=gap.symptom) mer2 = which(ibi==1) mer3 = intersect(mer,mer2) if(length(mer3)>0) tab[ap.inds[mer3+1],] = NA tab = tab[!is.na(tab[,1]),] ##Remove all reflux events that happen within 'gap' seconds before the next reflux event with nothing else happening in between: rf.inds = sort(which(tab$event!=outcome)) iba = diff(tab$seconds[rf.inds]) ibi = diff(rf.inds) mer = which(iba<=gap.rf) mer2 = which(ibi==1) mer3 = intersect(mer,mer2) if(length(mer3)>0) tab[rf.inds[mer3],] = NA tab = tab[!is.na(tab[,1]),] tab } count_events <- function(tab, interval, stat, ns=TRUE, cause, outcome) { #stat="sensitivity" or "ppv" #All rows except those for reflux and symptom events must already have been removed (in simp). stopifnot(all(tab$event%in%c(cause,outcome))) tab = tab[order(tab$seconds),] #tab = remove.events(tab=tab,gap.symptom=gap.symptom,gap.rf=gap.rf,cause=cause,outcome=outcome) if(stat=="sensitivity") { #what percent of the apneas were within 'interval' seconds after a reflux (i.e. what percent of apneas were correctly predicted by reflux) ap.inds = which(tab$event==outcome) prec = rep(NA,length(ap.inds)) inds = 1:nrow(tab) for(i in 1:length(ap.inds)) { th = inds < ap.inds[i] & !inds%in%ap.inds if(any(th)) prec[i] = max(which(th)) } subend = tab$seconds[prec] #if *all* prec NA, subend will have length=length(tab$seconds) if(all(is.na(subend))) subend = subend[1:length(ap.inds)] dif = tab$seconds[ap.inds]-subend dif[is.na(dif)] = Inf stopifnot(all(dif>=0)) if(ns) return(c(mean(dif<=interval),nrow(tab)-length(dif),length(dif))) if(!ns) return(mean(dif<=interval)) } else if(stat=="ppv") { #what percent of the refluxes were 'interval' seconds before an apnea (i.e., what percent of refluxes correctly predicted apnea) rf.inds = which(tab$event!=outcome) succ = rep(NA,length(rf.inds)) inds = 1:nrow(tab) for(i in 1:length(rf.inds)) { th = inds > rf.inds[i] & !inds%in%rf.inds if(any(th)) succ[i] = min(which(th)) } subor = tab$seconds[succ] #if *all* succ NA, subor will have length=length(tab$seconds) if(all(is.na(subor))) subor = subor[1:length(rf.inds)] dif = subor-tab$seconds[rf.inds] dif[is.na(dif)] = Inf stopifnot(all(dif>=0)) if(ns) return(c(mean(dif<=interval),length(dif),nrow(tab)-length(dif))) if(!ns) return(mean(dif<=interval)) } } ##since the number of refluxes that were within 'interval' seconds before an apnea = number of apneas that were within 'interval' seconds after an apnea (doing the preprocessing above in which no reflux or apnea is double-counted), returning sum(dif<=interval) instead of the ratio would result in the p-value for sensitivity and specificity being identical. Also, it would underestimate the significance since in the random iterations fewer will be removed in the preprocessing and so you'll be looking at counts out of a larger number of possible counts. ##Could speed up process by allowing stat to be a vector. I started doing this in thefunctions_beta.R but it would take more time and isn't worth it for now. simp <- function(tab, numiter, interval, cause, outcome, stat, method, gap.symptom, gap.rf, seed=4243859, verbose=TRUE) { #cause="pH","MII", or "either" #method=1 for permuting times between events, and method=2 for just simulating event times from a uniform distribution. stopifnot(length(method)==1) if(method%in%c(1,3) & missing(gap.symptom)) gap.symptom=0 if(method%in%c(1,3) & missing(gap.rf)) gap.rf=0 if(method==2 & missing(gap.symptom)) gap.symptom=interval if(method==2 & missing(gap.rf)) gap.rf=interval if(!method%in%c(1,2,3)) stop("The choices for the method argument are 1, 2, or 3.") if(sum(tab$event=="study end")!=1) stop("One row of tab must have event=study end") end.time = tab$seconds[tab$event=="study end"] tab = subset(tab,event%in%c(cause,outcome)) tab = remove.events(tab=tab,gap.symptom=gap.symptom,gap.rf=gap.rf,cause=cause,outcome=outcome) orig = matrix(NA,nrow=length(interval),ncol=3) colnames(orig) = c("st","n.rf","n.symptom") rownames(orig) = interval for(k in 1:length(interval)) { orig[k,] = count_events(tab=tab, interval=interval[k], stat=stat, cause=cause, outcome=outcome) } stopifnot(length(unique(orig[,"n.rf"]))==1) stopifnot(length(unique(orig[,"n.symptom"]))==1) retn = c(unique(orig[,"n.rf"]), unique(orig[,"n.symptom"])) names(retn) = c("n.rf","n.symptom") orig = orig[,"st"] difsoa = diff(c(0,tab$seconds[tab$event==outcome],end.time)) difsrf = diff(c(0,tab$seconds[tab$event!=outcome],end.time)) numoa = sum(tab$event==outcome) numrf = sum(tab$event!=outcome) nulld = matrix(NA,nrow=numiter,ncol=length(interval)) if(verbose) { pb = txtProgressBar(min=1,max=numiter,initial=0,style=1) } set.seed(seed) for(k in 1:numiter) { if(method==1) { ran.oa.times = cumsum(sample(difsoa))[-(numoa+1)] ran.rf.times = cumsum(sample(difsrf))[-(numrf+1)] } else if(method==2) { ## This takes too long: #continue=TRUE #while(continue) { # ran.oa.times = round(runif(numoa, -0.5, end.time+0.5)) # if(!any(diff(ran.oa.times)end.time] = tmpoa[tmpoa>end.time]-end.time ran.oa.times = sort(tmpoa) tmprf = tab$seconds[tab$event!=outcome]+round(runif(1, -0.5, end.time+0.5)) tmprf[tmprf>end.time] = tmprf[tmprf>end.time]-end.time ran.rf.times = sort(tmprf) } all.times = c(ran.oa.times,ran.rf.times) lbls = c(rep(outcome,numoa),rep(cause,numrf)) ord = order(all.times) newtab = data.frame(event=lbls[ord], seconds=all.times[ord], stringsAsFactors=FALSE) for(h in 1:length(interval)) { nulld[k,h] <- count_events(tab=newtab, interval=interval[h], stat=stat, ns=FALSE, cause=cause, outcome=outcome) } if(verbose) { setTxtProgressBar(pb,k) } } if(verbose) { close(pb) } ps = vector() for(k in 1:length(interval)) { ps[k] = mean(nulld[,k]>=orig[k]) } ret = cbind(round(orig,4) ,round(ps,4)) colnames(ret) = c(stat,"p") list(resx = ret, resn = retn) } runif_gap <- function(n, min=0, max=1, gap=0, quiet=FALSE) { stopifnot(n>=1) if(max<=min) stop("max must be greater than min") if(gap>(max-min)) stop("gap larger than the interval itself") stopifnot(gap>=0) if(gap>0) if(n>floor((max-min)/gap +1)) stop("It is impossible to sample ",n," points from ",min," to ",max," with gap ",gap) out = sort(runif(n,min,max)) toss = c(diff(out)