propscore.profile=function(dat,myregressors="x1+x2",nameoutcome="y",test=T,compare=T,group.ref=1,nsim=500){ # input : dat, myregressors, nameoutcome # implicit: name of column for group must be "group"; # objective: estimate probabilities of y=1 in each group after standardizing the covariate distribution to that of all units, and after using shrinkage; the use should check externally for the balance of the covariates provided by the prop. scores (labelled 'probs' in the output) as estimated with the input regressors. # output : probs=matrix of propensity scores for each person (row) to belong in each group (column); prob.of.y1=estimates and cis of the standardized probabilities of y=1 for each group; tau2obs=estimate of the variance of the logit(standardized probabilities of y=1); setau2obs=se of tau2obs; pvalue.all.p.equal=pvalue of testing equality of all standardized probabilities; oddsratio.to.ref= estimates of standardized odds ratios of having y=1 for each group compared to "group.ref" of input; lphat=logit(estimate of probabilities of y=1, after using prop. score but before shrinkage), vlphat=variance estimates of lphat. # routine starts # step 1. # Estimates probs=prop score matrix [i,j]=Pr(subject i is in group # vecgroup[j] | Xi) print("computes propensity scores,",quote=F) cat("\n\n") datemp=dat vecgroup=sort(unique(datemp$group)) ngroup=length(vecgroup) myformula=paste("isgroupi~",myregressors) probs=NULL for(igroup in 1:ngroup){ whichgroup=vecgroup[igroup] datemp$isgroupi=1*(datemp$group==whichgroup) model=glm(as.formula(myformula),family=binomial(link=logit),data=datemp) probs=cbind(probs,model$fitted)} sumprobs=apply(probs,1,sum) probs=probs*(1/sumprobs) # step 2. # Estimate quantities for each group, class using prop scores # Focusing on group g # winclass[c,g]=%number of people in class "c of pscore g" # ninclassgroup[c,g]=number of people in group g and class c of pscore g # aveclassgroup[c,g]=average y of people in ninclassgroup[c,g] # vaveclassgroup[c,g]=se^2(aveclassgroup[c,g]) aveclassgroup=matrix(NA,nrow=5,ncol=ngroup); vaveclassgroup=aveclassgroup; ninclassgroup=aveclassgroup;winclass=aveclassgroup expit=function(x){exp(x)/(1+exp(x))};logit=function(p){log(p/(1-p))} contrmat=rbind(c(1,0,0,0,0),c(1,1,0,0,0),c(1,0,1,0,0),c(1,0,0,1,0),c(1,0,0,0,1)) for(igroup in 1:ngroup){ # assign classes to each subject based on pscore for g datemp$class=as.numeric(cut(probs[,igroup],c(0,quantile(probs[,igroup],c(.2,.4,.6,.8)),1))) # get winclass[,igroup] tempvec=as.numeric(table(datemp$class)) winclass[,igroup]=tempvec/sum(tempvec) # select subset who *are* in group igroup whichgroup=vecgroup[igroup] datai=datemp[datemp$group==whichgroup,] # find probabilities of Y=1 at each class model=glm(as.formula(paste(nameoutcome,"~as.factor(class)")),family=binomial(link=logit),data=datai) ## have checked that the model command produced coefs in order of class 1-5, ## not in order of class apearance in data. aveclassgroup[,igroup]=expit(contrmat%*%model$coef) # find ninclassgroup[,igroup],vaveclassgroup[,igroup] ninclassgroup[,igroup]=as.numeric(table(datai$class)) vaveclassgroup[,igroup]=aveclassgroup[,igroup]*(1-aveclassgroup[,igroup]) vaveclassgroup[,igroup]=vaveclassgroup[,igroup]/ninclassgroup[,igroup]} # step 3. # get phat= the pscore estimate of Y=1 for each group, # lphat=logit(phat), and vphat=se^2(phat), and vlphat=se^2(lphat) phat=apply(aveclassgroup*winclass,2,sum) vphat=apply((vaveclassgroup*winclass^2),2,sum) lphat=logit(phat) vlphat=vphat/(phat*(1-phat))^2 # step 4. print("computes shrinkage probability estimates,",quote=F) cat("\n\n") # get shrikage estimates lps using everson and morris; # transform to get prob estimates as ps, and, lower, upper cis model=norm.hm(lphat,vlphat) lps=model$output[,6] selps=model$output[,7] ps=expit(lps) lower=expit(lps-1.96*selps) upper=expit(lps+1.96*selps) prob.of.y1=data.frame(round(cbind(vecgroup,ps,lower,upper),3)) names(prob.of.y1)=c("group", "estimate","95%CI: lower limit","upper limit") # step 5. Test for equality of outcome across groups, # adjusting for covariates ptau=NULL tau2obs=model$Tau^2 setau2obs=sqrt(model$Tau2[2]) if(test==T){ print("simulates to test for equality of probabilities,",quote=F) cat("\n\n") model=test.hm(lphat,vlphat,nsim) ptau=mean(1*(tau2obs <= model$tau2sim)) } # step 6. Get covariance of lps, and compare odds # to reference group oddsratio.to.ref=NULL if(compare==T){ print("simulates to compare to ref group",quote=F) cat("\n\n") model=sim.hm(lphat,vlphat,nsim) covlps=var(model$qsim) igroup.ref=c(1:ngroup)[vecgroup==group.ref] contrmat=diag(rep(1,ngroup)) contrmat[,igroup.ref]=rep(-1,ngroup) contrmat[igroup.ref,igroup.ref]=0 or=exp(contrmat%*%lps) se.lor=contrmat%*%covlps%*%t(contrmat) se.lor=diag(se.lor)^.5 lower=exp(log(or)-1.96*se.lor) upper=exp(log(or)+1.96*se.lor) oddsratio.to.ref=data.frame(round(cbind(vecgroup,or,lower,upper),3)) names(oddsratio.to.ref)=c("group", "estimate","95%CI: lower limit","upper limit") } list(probs=probs,prob.of.y1=prob.of.y1,tau2obs=tau2obs,setau2obs=setau2obs,pvalue.all.p.equal=ptau,oddsratio.to.ref=oddsratio.to.ref,lphat=lphat,vlphat=vlphat)} norm.hm=function(Y, V, X = NA, intercept = T, title = "", labelx = NULL, labelobs = NULL, srt = F, tol = 1e-05, dig = 3){ # routine by Everson and Morris for normal hierarchical model k <- length(Y) rtV <- sqrt(V) if(missing(X)) { X <- matrix(rep(1, k), ncol = 1) r <- 1 } else { X <- as.matrix(X) if(intercept) { if(max(abs(X[, 1] - rep(1, k))) != 0) X <- cbind(rep(1, k), X) } r <- dim(as.matrix(X))[[2]] } if(missing(labelx)) { if(intercept) labelx <- c(0:(r - 1)) else labelx <- c(1:r) } if(missing(labelobs)) labelobs <- 1:k dimnames(X) <- list(labelobs, labelx) dat <- cbind(Y, rtV, X) summary.mat <- apply(dat, 2, summary) summary.mat <- apply(summary.mat * 10^dig, 2, round)/10^dig tau2 <- mean(V) #initial guess at tau2 Tol <- 1 #print(paste("**********", title, "**********"), quote = F) #print(" Hierarchical Normal Regression Model", quote = F) #print(" copyright Sept. 1993 P. J. Everson & C. N. Morris", quote = F ) # cat("\n\n") # cat("**********CONVERGENCE ITERATIONS FOR TAU^2**********", "\n\n") while(Tol > tol) { #iterate until within tolorance W <- 1/(V + tau2) D <- diag(W) var.beta <- solve(t(X) %*% D %*% X) beta <- var.beta %*% t(X) %*% D %*% Y mu <- X %*% beta tau22 <- max((sum(W * ((k/(k - r)) * (Y - mu)^2 - V))/sum( W)), 0) Vtau22 <- 2/((k - r) * mean(W)^2) Tol <- abs(tau2 - tau22)/sqrt(Vtau22) tau2 <- tau22 # cat(signif(tau2, 5), ",") } W <- 1/(V + tau2) D <- diag(W) var.beta <- solve(t(X) %*% D %*% X) beta <- var.beta %*% t(X) %*% D %*% Y mu <- X %*% beta Vtau2 <- 2/((k - r) * mean(W)^2) #cat("\n\n") sd.beta <- sqrt(diag(var.beta)) ratio <- round(beta/sd.beta, dig) sd.beta <- round(sd.beta, dig) beta <- round(beta, dig) beta.mat <- cbind(beta, sd.beta, ratio) tau <- round(sqrt(tau2), dig) B <- (((k - r - 2)/(k - r)) * V)/(V + tau2) rr <- k * W * diag(X %*% solve(t(X) %*% D %*% X) %*% t(X)) Vbar <- sum(W * V)/sum(W) v <- ((2/(k - r - 2)) * B^2 * (Vbar + tau2))/(V + tau2) rtv <- sqrt(v) s2 <- V * (1 - ((k - rr)/k) * B) + v * (Y - mu)^2 theta <- (1 - B) * Y + B * mu s <- sqrt(s2) aveB <- mean(B) dimnames(beta.mat) <- list(paste("b^", labelx), c("beta^", "se", "beta^/se")) tau2.vec <- c(tau2, signif(sqrt(Vtau2), dig - 1)) names(tau2.vec) <- c("Tau^2", "SE(Tau^2)") out <- cbind(Y, sqrt(V), mu, B, rtv, theta, s) out <- apply((10^dig) * out, 2, round)/10^dig dimnames(out) <- list(labelobs, c("Y", "sqrtV", "mu", "B", "se(B)", "theta^", "se(thet^)")) if(srt) out <- out[order(V), ] list(summary = summary.mat, hyperparameter = beta.mat, Tau2 = tau2.vec, Tau = tau, aveB = round(aveB, dig), output = out) } sim.hm=function(q,v,iter){ # routine for estimating covariance matrix of # the point estimators of Everson and Morris (E+M) when applied to # q,v. This is based on simulating normals from the E+M estimates # and finding the variance of the E+M estimators that use those # normals nx=length(q) qsim=matrix(NA,nrow=iter,ncol=nx) tau2sim=rep(NA,iter) model0=norm.hm(q,v,intercept=F) q0=model0$output[,6] # q0 is treated now as truth for(i in 1:iter){ qtemp=rnorm(nx,mean=q0,sd=v^.5) modeltemp=norm.hm(qtemp,v,intercept=F) qsim[i,]=modeltemp$output[,6] tau2sim[i]=modeltemp$Tau^2 cat("iter","",i,"")} list(qhat=q0,qsim=qsim,tau2sim=tau2sim)} test.hm=function(q,v,iter){ # routine for testing if the means underlying q,v # are equal. This is based by calculating a common q, called q0, # and then simulating normals from q0 with variances v, # and finding the distribution of the estimator of tau2 # This distribution will then be compared to the estimate # of tau2 when applying E+M on (q,v). nx=length(q) qcommon=sum(q/v)/sum(1/v) qcommon=rep(qcommon,nx) tau2sim=rep(NA,iter) # q0 is treated now as truth for(i in 1:iter){ qtemp=rnorm(nx,mean=qcommon,sd=v^.5) modeltemp=norm.hm(qtemp,v,intercept=F) tau2sim[i]=modeltemp$Tau^2 cat("iter","",i,"")} list(tau2sim=tau2sim)}