######## THIS IS THE SIMULATION PRESENTED IN SECTION 5.3 ####### IT is the LOGISTIC REGRESSION SIMULATION options(object.size=10^100) ##FUNCTIONS: det returns the determinant of matrix, geninv is generlized ##and Window defines the window function... we use gaussian here for ease ##of computation det <- function(M) prod(svd(M, nu = 0, nv = 0)$d) geninv <- function(M){ aux <- svd(M) auxd <- aux$d auxd[auxd!=0] <- 1/auxd aux$v%*%diag(auxd,nrow=length(auxd))%*%t(aux$u) } Window <- function(o,middle=pi/2,sigma=5){ dnorm((o-middle)/max(abs(o-middle))*sigma) } cat(rep("#",20),"\n","SIM 3\n",rep("#",20),"\n") Ns <- c(100,200,500)+1 #DONT CONSIDER 50:causes sparsness,no convergence of glm for(i in 1:(length(Ns))){ print(i) N <- Ns[i] HALF <- (N-1)/2; HALF <- (N-1)/2; HOLE <- round(.06*HALF) ###Leave a hole to avoid line=constant o1 <- c(runif(HALF-HOLE,0,pi/2-.2),runif(HOLE,pi/2-.1,pi/2)) o <- sort(c(o1,pi/2,runif(HALF,pi/2,3/2*pi))) w <- Window(o) w <- w/sum(w)*N truemean <- 2*cos(o/10) o <- o*10 #make o bigger to avoid problem with geninv truemean <- exp(truemean)/(1+exp(truemean)) ##inverse logit of s(x) K <- 4 X <- apply(as.array(0:(K-1)),1,function(k) (o^k)) AICp <- c(1:K) res <- array(0,dim=c(NN,8,K)) j <- 1 while(j <= NN){ if(round((j-1)/NN*100)!=round(j/NN*100)) cat(round(j/NN*100)) y <- rbinom(N,1,truemean) FLAG <- F for(k in 1:K){ XX<-as.array(X[,1:k]) auxfit <- glm(y~XX-1,family="binomial",weight=w) ##glm may not converge.... if(any(is.na(auxfit$fitted))){ FLAG <- T} else{ if(any(auxfit$fitted==Inf) || any(auxfit$fitted==-Inf)){ FLAG <-T} else{ p <- auxfit$fitted loglik <- -sum(y*log(p) + (1-y)*log(1-p)) wloglik <- -sum(w*(y*log(p) + (1-y)*log(1-p))) p1 <- p[(N+1)/2]; p0 <- truemean[(N+1)/2] mse <- p0*log(p0/p1) + (1-p0)*log((1-p0)/(1-p1)) AUX <- sweep(XX,1,w,FUN="*") AUX <- sweep(AUX,1,p*(1-p),FUN="*") AUX1 <- t(AUX)%*%XX AUX0 <- t(XX)%*%XX AUX <- sweep(AUX,1,w,FUN="*") AUX2 <- t(AUX)%*%XX NP <- sum(diag(geninv(AUX1) %*% AUX2)) AIC <- loglik + AICp[k] WAIC <- wloglik + NP BIC <- loglik + .5*AICp[k]*log(N) SICf <- loglik + .5*log(det(AUX0)) WBIC <- wloglik + .5*log(det(AUX1)) CAICF <- loglik + AICp[k]+.5*log(det(AUX0)) WCAICF <- wloglik + NP + .5*log(det(AUX1)) res[j,,k] <- c(mse,AIC,WAIC,BIC,SICf,WBIC,CAICF,WCAICF) }}} j <- j +1 if(FLAG){cat("*"); j <- j - 1} ##IF GLM didnt converged etc.. } xx <- apply(res,c(1,2),function(x) order(x)[1]) xxx <- apply(xx,2,function(x) table(factor(x,levels=c(1:K)))/NN*100) xxx[,1] <- signif(apply(res[,1,],2,mean),8) xxx <- t(xxx) dimnames(xxx) <- list(c("&MSE","&AIC","&WAIC","&BIC","&SICf","&WBIC","&CAICF","&WCAICF"),as.character(c(1:K))) cat("\n") for(h in 1:dim(xxx)[1]){ ##HERE WE WRITE Out the ready for latex tables cat(dimnames(xxx)[[1]][h]) for(g in 1:dim(xxx)[2]) cat("&",xxx[h,g]) cat("&\n")} } sink()