################################################################################
## Copyright (C) 2012 Johns Hopkins University <pmurakam@jhsph.edu>
################################################################################

remove.events <- function(tab, gap.symptom, gap.rf, cause, outcome) {
    #All rows except those for reflux and 'OA' events must already have been removed (in simp).
    stopifnot(all(tab$event%in%c(cause,outcome)))
    tab = tab[order(tab$seconds),]
    
    ##Remove all OA events that happen within 'gap' seconds after the previous OA event with nothing else happening in between:
    ap.inds = sort(which(tab$event==outcome))
    iba = diff(tab$seconds[ap.inds])
    ibi = diff(ap.inds)
    mer = which(iba<=gap.symptom)
    mer2 = which(ibi==1)
    mer3 = intersect(mer,mer2)
    if(length(mer3)>0) tab[ap.inds[mer3+1],] = NA
    tab = tab[!is.na(tab[,1]),]
    ##Remove all reflux events that happen within 'gap' seconds before the next reflux event with nothing else happening in between:
    rf.inds = sort(which(tab$event!=outcome))
    iba = diff(tab$seconds[rf.inds])
    ibi = diff(rf.inds)
    mer = which(iba<=gap.rf)
    mer2 = which(ibi==1)
    mer3 = intersect(mer,mer2)
    if(length(mer3)>0) tab[rf.inds[mer3],] = NA
    tab = tab[!is.na(tab[,1]),]
    tab
}

count_events <- function(tab, interval, stat, ns=TRUE, cause, outcome) { #stat="sensitivity" or "ppv"
    #All rows except those for reflux and symptom events must already have been removed (in simp).
    stopifnot(all(tab$event%in%c(cause,outcome)))
    tab = tab[order(tab$seconds),]
    
    #tab = remove.events(tab=tab,gap.symptom=gap.symptom,gap.rf=gap.rf,cause=cause,outcome=outcome)
    if(stat=="sensitivity") { #what percent of the apneas were within 'interval' seconds after a reflux (i.e. what percent of apneas were correctly predicted by reflux)
        ap.inds = which(tab$event==outcome)
        prec = rep(NA,length(ap.inds))
        inds = 1:nrow(tab)
        for(i in 1:length(ap.inds)) {
            th = inds < ap.inds[i] & !inds%in%ap.inds
            if(any(th)) prec[i] = max(which(th))
        }
        subend = tab$seconds[prec] #if *all* prec NA, subend will have length=length(tab$seconds)
        if(all(is.na(subend))) subend = subend[1:length(ap.inds)]
        dif = tab$seconds[ap.inds]-subend
        dif[is.na(dif)] = Inf
        stopifnot(all(dif>=0))        
        if(ns)  return(c(mean(dif<=interval),nrow(tab)-length(dif),length(dif)))
        if(!ns) return(mean(dif<=interval))
    } else if(stat=="ppv") { #what percent of the refluxes were 'interval' seconds before an apnea (i.e., what percent of refluxes correctly predicted apnea)
        rf.inds = which(tab$event!=outcome)
        succ = rep(NA,length(rf.inds))
        inds = 1:nrow(tab)
        for(i in 1:length(rf.inds)) {
            th = inds > rf.inds[i] & !inds%in%rf.inds
            if(any(th)) succ[i] = min(which(th))
        }
        subor = tab$seconds[succ] #if *all* succ NA, subor will have length=length(tab$seconds)
        if(all(is.na(subor))) subor = subor[1:length(rf.inds)]
        dif = subor-tab$seconds[rf.inds]        
        dif[is.na(dif)] = Inf
        stopifnot(all(dif>=0))
        if(ns)  return(c(mean(dif<=interval),length(dif),nrow(tab)-length(dif)))
        if(!ns) return(mean(dif<=interval))
    }
} ##since the number of refluxes that were within 'interval' seconds before an apnea = number of apneas that were within 'interval' seconds after an apnea (doing the preprocessing above in which no reflux or apnea is double-counted), returning sum(dif<=interval) instead of the ratio would result in the p-value for sensitivity and specificity being identical.  Also, it would underestimate the significance since in the random iterations fewer will be removed in the preprocessing and so you'll be looking at counts out of a larger number of possible counts.

##Could speed up process by allowing stat to be a vector.  I started doing this in thefunctions_beta.R but it would take more time and isn't worth it for now.
simp <- function(tab, numiter, interval, cause, outcome, stat, method, gap.symptom, gap.rf, seed=4243859, verbose=TRUE) {
#cause="pH","MII", or "either"
#method=1 for permuting times between events, and method=2 for just simulating event times from a uniform distribution.
    stopifnot(length(method)==1)
    if(method%in%c(1,3) & missing(gap.symptom)) gap.symptom=0
    if(method%in%c(1,3) & missing(gap.rf)) gap.rf=0
    if(method==2 & missing(gap.symptom)) gap.symptom=interval
    if(method==2 & missing(gap.rf)) gap.rf=interval
    if(!method%in%c(1,2,3)) stop("The choices for the method argument are 1, 2, or 3.")
    if(sum(tab$event=="study end")!=1) stop("One row of tab must have event=study end")

    end.time = tab$seconds[tab$event=="study end"]
    tab = subset(tab,event%in%c(cause,outcome))
    tab = remove.events(tab=tab,gap.symptom=gap.symptom,gap.rf=gap.rf,cause=cause,outcome=outcome)    

    orig = matrix(NA,nrow=length(interval),ncol=3)
    colnames(orig) = c("st","n.rf","n.symptom")
    rownames(orig) = interval
    for(k in 1:length(interval)) {
        orig[k,] = count_events(tab=tab, interval=interval[k], stat=stat,
                                cause=cause, outcome=outcome)
    }
    stopifnot(length(unique(orig[,"n.rf"]))==1)
    stopifnot(length(unique(orig[,"n.symptom"]))==1)
    retn = c(unique(orig[,"n.rf"]), unique(orig[,"n.symptom"]))
    names(retn) = c("n.rf","n.symptom")
    orig = orig[,"st"]

    difsoa = diff(c(0,tab$seconds[tab$event==outcome],end.time))
    difsrf = diff(c(0,tab$seconds[tab$event!=outcome],end.time))
    numoa = sum(tab$event==outcome)
    numrf = sum(tab$event!=outcome)
    nulld = matrix(NA,nrow=numiter,ncol=length(interval))
    if(verbose) { pb = txtProgressBar(min=1,max=numiter,initial=0,style=1) }
    set.seed(seed)
    for(k in 1:numiter) {
        if(method==1) {
            ran.oa.times = cumsum(sample(difsoa))[-(numoa+1)]
            ran.rf.times = cumsum(sample(difsrf))[-(numrf+1)]  
        } else if(method==2) {
            ## This takes too long:
            #continue=TRUE
            #while(continue) {
            #    ran.oa.times = round(runif(numoa, -0.5, end.time+0.5))
            #    if(!any(diff(ran.oa.times)<gap.symptom)) continue=FALSE else print("Trying symptom again")
            #}
            #continue=TRUE
            #while(continue) {            
            #    ran.rf.times = round(runif(numrf, -0.5, end.time+0.5))
            #    if(!any(diff(ran.rf.times)<gap.rf)) continue=FALSE else print("Trying rf again")
            #}
            ## so just do this instead:          
            ran.oa.times = round(runif_gap(numoa, -0.5, end.time+0.5, gap=gap.symptom, quiet=TRUE))
            ran.rf.times = round(runif_gap(numrf, -0.5, end.time+0.5, gap=gap.rf, quiet=TRUE))
        } else if(method==3) { #wrap-around permutation testing method
            tmpoa = tab$seconds[tab$event==outcome]+round(runif(1, -0.5, end.time+0.5))
            tmpoa[tmpoa>end.time] = tmpoa[tmpoa>end.time]-end.time
            ran.oa.times = sort(tmpoa)
            tmprf = tab$seconds[tab$event!=outcome]+round(runif(1, -0.5, end.time+0.5))
            tmprf[tmprf>end.time] = tmprf[tmprf>end.time]-end.time
            ran.rf.times = sort(tmprf)
        }
        all.times = c(ran.oa.times,ran.rf.times)
        lbls = c(rep(outcome,numoa),rep(cause,numrf))
        ord = order(all.times)
        newtab = data.frame(event=lbls[ord], seconds=all.times[ord], stringsAsFactors=FALSE)
        
        for(h in 1:length(interval)) {
            nulld[k,h] <- count_events(tab=newtab, interval=interval[h], stat=stat, ns=FALSE,
                                       cause=cause, outcome=outcome)
        }
        if(verbose) { setTxtProgressBar(pb,k) }
    }
    if(verbose) { close(pb) }
    ps = vector()
    for(k in 1:length(interval)) { ps[k] = mean(nulld[,k]>=orig[k]) }
    ret = cbind(round(orig,4) ,round(ps,4))
    colnames(ret) = c(stat,"p")
    list(resx = ret, resn = retn)
}

runif_gap <- function(n, min=0, max=1, gap=0, quiet=FALSE) {
    stopifnot(n>=1)
    if(max<=min) stop("max must be greater than min")
    if(gap>(max-min)) stop("gap larger than the interval itself")
    stopifnot(gap>=0)    
    if(gap>0) if(n>floor((max-min)/gap +1)) stop("It is impossible to sample ",n," points from ",min," to ",max," with gap ",gap)
    out = sort(runif(n,min,max))
    toss = c(diff(out)<gap,FALSE)
    out = out[!toss] #gets rid of preceding one
    while(length(out)!=n) {
        if(!quiet) message("sampling again")      
        out = sort(c(out, runif(sum(toss),min,max)))
        toss = c(diff(out)<gap,FALSE)
        out = out[!toss] #gets rid of preceding one
    }
    if(any(diff(out)<gap)) stop("Report a bug in this function.")
    out
}