propscore.profile=function(dat,myregressors="x1+x2",nameoutcome="y",test=T,compare=T,group.ref=1,nsim=500){
# input   : dat, myregressors, nameoutcome
# implicit: name of column for group must be "group";

# objective: estimate probabilities of y=1 in each group after standardizing the covariate distribution to that of all units, and after using shrinkage; the use should check externally for the balance of the covariates provided by the prop. scores (labelled 'probs' in the output) as estimated with the input regressors. 
# output  :   probs=matrix of propensity scores for each person (row) to belong in each group (column); prob.of.y1=estimates and cis of the standardized probabilities of y=1 for each group; tau2obs=estimate of the variance of the logit(standardized probabilities of y=1); setau2obs=se of tau2obs; pvalue.all.p.equal=pvalue of testing equality of all standardized probabilities; oddsratio.to.ref= estimates of standardized odds ratios of having y=1 for each group compared to "group.ref" of input; lphat=logit(estimate of probabilities of y=1, after using prop. score but before shrinkage), vlphat=variance estimates of lphat.

# routine starts
  
# step 1.
# Estimates probs=prop score matrix [i,j]=Pr(subject i is in group 
#  vecgroup[j] | Xi)
print("computes propensity scores,",quote=F)
cat("\n\n")
datemp=dat
vecgroup=sort(unique(datemp$group))
ngroup=length(vecgroup)
myformula=paste("isgroupi~",myregressors)
probs=NULL

for(igroup in 1:ngroup){
  whichgroup=vecgroup[igroup]
  datemp$isgroupi=1*(datemp$group==whichgroup)
  
model=glm(as.formula(myformula),family=binomial(link=logit),data=datemp)
probs=cbind(probs,model$fitted)}

sumprobs=apply(probs,1,sum)
probs=probs*(1/sumprobs)

# step 2.
# Estimate quantities for each group, class using prop scores
# Focusing on group g
# winclass[c,g]=%number of people in class "c of pscore g"
# ninclassgroup[c,g]=number of people in group g and class c of pscore g
# aveclassgroup[c,g]=average y of people in ninclassgroup[c,g]
# vaveclassgroup[c,g]=se^2(aveclassgroup[c,g])
aveclassgroup=matrix(NA,nrow=5,ncol=ngroup); vaveclassgroup=aveclassgroup; ninclassgroup=aveclassgroup;winclass=aveclassgroup

expit=function(x){exp(x)/(1+exp(x))};logit=function(p){log(p/(1-p))}
contrmat=rbind(c(1,0,0,0,0),c(1,1,0,0,0),c(1,0,1,0,0),c(1,0,0,1,0),c(1,0,0,0,1))
for(igroup in 1:ngroup){
  # assign classes to each subject based on pscore for g
  datemp$class=as.numeric(cut(probs[,igroup],c(0,quantile(probs[,igroup],c(.2,.4,.6,.8)),1)))
  # get winclass[,igroup]
  tempvec=as.numeric(table(datemp$class))
  winclass[,igroup]=tempvec/sum(tempvec)
  # select subset who *are* in group igroup
  whichgroup=vecgroup[igroup]
  datai=datemp[datemp$group==whichgroup,]
  # find probabilities of Y=1 at each class
  model=glm(as.formula(paste(nameoutcome,"~as.factor(class)")),family=binomial(link=logit),data=datai)
## have checked that the model command produced coefs in order of class 1-5,
## not in order of class apearance in data.
aveclassgroup[,igroup]=expit(contrmat%*%model$coef)
  # find ninclassgroup[,igroup],vaveclassgroup[,igroup]
  ninclassgroup[,igroup]=as.numeric(table(datai$class))
  vaveclassgroup[,igroup]=aveclassgroup[,igroup]*(1-aveclassgroup[,igroup])
  vaveclassgroup[,igroup]=vaveclassgroup[,igroup]/ninclassgroup[,igroup]}

# step 3.
# get phat= the pscore estimate of Y=1 for each group,
# lphat=logit(phat), and vphat=se^2(phat), and vlphat=se^2(lphat)
phat=apply(aveclassgroup*winclass,2,sum)
vphat=apply((vaveclassgroup*winclass^2),2,sum)
lphat=logit(phat)
vlphat=vphat/(phat*(1-phat))^2

# step 4.
print("computes shrinkage probability estimates,",quote=F)
cat("\n\n")
# get shrikage estimates lps using everson and morris;
# transform to get prob estimates as ps, and, lower, upper cis 
model=norm.hm(lphat,vlphat)
lps=model$output[,6]
selps=model$output[,7]
ps=expit(lps)
lower=expit(lps-1.96*selps)
upper=expit(lps+1.96*selps)
prob.of.y1=data.frame(round(cbind(vecgroup,ps,lower,upper),3))
names(prob.of.y1)=c("group", "estimate","95%CI: lower limit","upper limit")

# step 5. Test for equality of outcome across groups,
#         adjusting for covariates
ptau=NULL
tau2obs=model$Tau^2
setau2obs=sqrt(model$Tau2[2])
if(test==T){
 print("simulates to test for equality of probabilities,",quote=F)
 
 cat("\n\n")
model=test.hm(lphat,vlphat,nsim)
ptau=mean(1*(tau2obs <= model$tau2sim))
}

# step 6. Get covariance of lps, and compare odds
#         to reference group
oddsratio.to.ref=NULL
if(compare==T){
print("simulates to compare to ref group",quote=F)

cat("\n\n")
model=sim.hm(lphat,vlphat,nsim)
covlps=var(model$qsim)
igroup.ref=c(1:ngroup)[vecgroup==group.ref]
contrmat=diag(rep(1,ngroup))
contrmat[,igroup.ref]=rep(-1,ngroup)
contrmat[igroup.ref,igroup.ref]=0
or=exp(contrmat%*%lps)
se.lor=contrmat%*%covlps%*%t(contrmat)
se.lor=diag(se.lor)^.5
lower=exp(log(or)-1.96*se.lor)
upper=exp(log(or)+1.96*se.lor)
oddsratio.to.ref=data.frame(round(cbind(vecgroup,or,lower,upper),3))
names(oddsratio.to.ref)=c("group", "estimate","95%CI: lower limit","upper limit")
}

list(probs=probs,prob.of.y1=prob.of.y1,tau2obs=tau2obs,setau2obs=setau2obs,pvalue.all.p.equal=ptau,oddsratio.to.ref=oddsratio.to.ref,lphat=lphat,vlphat=vlphat)}




norm.hm=function(Y, V, X = NA, intercept = T, title = "", labelx = NULL, labelobs = NULL, srt = F, tol = 1e-05, dig = 3){
  # routine by Everson and Morris for normal hierarchical model
	k <- length(Y)
	rtV <- sqrt(V)
	if(missing(X)) {
		X <- matrix(rep(1, k), ncol = 1)
		r <- 1
	}
	else {
		X <- as.matrix(X)
		if(intercept) {
			if(max(abs(X[, 1] - rep(1, k))) != 0)
				X <- cbind(rep(1, k), X)
		}
		r <- dim(as.matrix(X))[[2]]
	}
	if(missing(labelx)) {
		if(intercept)
			labelx <- c(0:(r - 1))
		else labelx <- c(1:r)
	}
	if(missing(labelobs))
		labelobs <- 1:k
	dimnames(X) <- list(labelobs, labelx)
	dat <- cbind(Y, rtV, X)
	summary.mat <- apply(dat, 2, summary)
	summary.mat <- apply(summary.mat * 10^dig, 2, round)/10^dig
	tau2 <- mean(V)
	#initial guess at tau2
	Tol <- 1
	#print(paste("**********", title, "**********"), quote = F)
	#print("                Hierarchical Normal Regression Model", quote = F)
	#print("   copyright Sept. 1993 P. J. Everson & C. N. Morris", quote = F	)
#	cat("\n\n")
#	cat("**********CONVERGENCE ITERATIONS FOR TAU^2**********", "\n\n")
	while(Tol > tol) {
		#iterate until within tolorance
		W <- 1/(V + tau2)
		D <- diag(W)
		var.beta <- solve(t(X) %*% D %*% X)
		beta <- var.beta %*% t(X) %*% D %*% Y
		mu <- X %*% beta
		tau22 <- max((sum(W * ((k/(k - r)) * (Y - mu)^2 - V))/sum(
			W)), 0)
		Vtau22 <- 2/((k - r) * mean(W)^2)
		Tol <- abs(tau2 - tau22)/sqrt(Vtau22)
		tau2 <- tau22
	#	cat(signif(tau2, 5), ",")
	}
	W <- 1/(V + tau2)
	D <- diag(W)
	var.beta <- solve(t(X) %*% D %*% X)
	beta <- var.beta %*% t(X) %*% D %*% Y
	mu <- X %*% beta
	Vtau2 <- 2/((k - r) * mean(W)^2)
	#cat("\n\n")
	sd.beta <- sqrt(diag(var.beta))
	ratio <- round(beta/sd.beta, dig)
	sd.beta <- round(sd.beta, dig)
	beta <- round(beta, dig)
	beta.mat <- cbind(beta, sd.beta, ratio)
	tau <- round(sqrt(tau2), dig)
	B <- (((k - r - 2)/(k - r)) * V)/(V + tau2)
	rr <- k * W * diag(X %*% solve(t(X) %*% D %*% X) %*% t(X))
	Vbar <- sum(W * V)/sum(W)
	v <- ((2/(k - r - 2)) * B^2 * (Vbar + tau2))/(V + tau2)
	rtv <- sqrt(v)
	s2 <- V * (1 - ((k - rr)/k) * B) + v * (Y - mu)^2
	theta <- (1 - B) * Y + B * mu
	s <- sqrt(s2)
	aveB <- mean(B)
	dimnames(beta.mat) <- list(paste("b^", labelx), c("beta^", "se", 
		"beta^/se"))
	tau2.vec <- c(tau2, signif(sqrt(Vtau2), dig - 1))
	names(tau2.vec) <- c("Tau^2", "SE(Tau^2)")
	out <- cbind(Y, sqrt(V), mu, B, rtv, theta, s)
	out <- apply((10^dig) * out, 2, round)/10^dig
	dimnames(out) <- list(labelobs, c("Y", "sqrtV", "mu", "B", "se(B)",
		"theta^", "se(thet^)"))
	if(srt)
		out <- out[order(V),  ]
	list(summary = summary.mat, hyperparameter = beta.mat, Tau2 = tau2.vec,
		Tau = tau, aveB = round(aveB, dig), output = out)
}

sim.hm=function(q,v,iter){
  # routine for estimating covariance matrix of
  # the point estimators of Everson and Morris (E+M) when applied to
  # q,v. This is based on simulating normals from the E+M estimates
  # and finding the variance of the E+M estimators that use those
  # normals
  nx=length(q)
  qsim=matrix(NA,nrow=iter,ncol=nx)
  tau2sim=rep(NA,iter)
  model0=norm.hm(q,v,intercept=F)
  q0=model0$output[,6]
  # q0 is treated now as truth
  for(i in 1:iter){
    qtemp=rnorm(nx,mean=q0,sd=v^.5)
    modeltemp=norm.hm(qtemp,v,intercept=F)
    qsim[i,]=modeltemp$output[,6]
    tau2sim[i]=modeltemp$Tau^2
    cat("iter","",i,"")}
  list(qhat=q0,qsim=qsim,tau2sim=tau2sim)}

test.hm=function(q,v,iter){
  # routine for testing if the means underlying q,v
  # are equal. This is based by calculating a common q, called q0,
  # and then simulating normals from q0 with variances v,
  # and finding the distribution of the estimator of tau2
  # This distribution will then be compared to the estimate
  # of tau2 when applying E+M on (q,v).
  nx=length(q)
  qcommon=sum(q/v)/sum(1/v)
  qcommon=rep(qcommon,nx)
  tau2sim=rep(NA,iter)
  # q0 is treated now as truth
for(i in 1:iter){
  qtemp=rnorm(nx,mean=qcommon,sd=v^.5)
  modeltemp=norm.hm(qtemp,v,intercept=F)
  tau2sim[i]=modeltemp$Tau^2
cat("iter","",i,"")}
  list(tau2sim=tau2sim)}