######################################################################################## # Analysis of data from the Peer Review Game # Date: 6-8-11 # Copyright (C) 2011 Jeffrey T. Leek (http://www.biostat.jhsph.edu/~jleek/contact.html) # Margaret A. Taub (http://www.biostat.jhsph.edu/people/postdocs/taub.shtml) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details, see . # # The data is stored in a matrix "dat" calculated from the processed data # using the function peer-review-preprocess.R. Each row corresponds to one solution # the columns are: # # solver - The ID of the solver (unique within each Study ID) # solve_start - The Julian time when the solver started solving the problem (seconds) # solve_stop - The Julian time when the solver finished solving the problem (seconds) # solve_complete - The experimental time the solver finished solving the problem (0 <= solve_complete <= 2400, seconds) # answer - The answer the solver gave to the problem # reviewer - The ID of the reviewer (unique within Study ID, within Study solver/reviewer IDs are the same) # review_start - The Julian time when the reviewer started reviewing the problem (seconds) # review_stop - The Julian time when the reviewer finished reviewing the problem (seconds) # review_complete - The experimental time the reviewer finished reviewing the problem (0 <= review_complete <= 2400, seconds) # accept - An indicator of whether the solution was accepted (1 = accept, 0 = reject, NA = review not complete) # correct_answer - The correct answer to the GRE problem # problem_type - The type of problem a = analogy, alg = algebra, alg-comp = algebra comparison # ant = antonyms, app = math application, app-comp= math application comparison # comp = comparison, geom = geometry, geom-comp = geometry comparison # graph = graphic problem, num = numeric problem, num-com = numberic comparison # quant = quantitative analysis, sc = sentence completion, wp = word problem, # wp = word problem math, wp-app = word problem application, wp-comp = word problem comparison # correct - An indicator of whether the submitted solution is correct (1 = correct, 0 = not correct) # study_type - "anon" = closed peer review, "non-anon" = open peer review # study_id - A unique ID for each of the 6 peer review experiments. # Depends: RColorBrewer, lme4, pixmap ######################################################################################## ############################################ ############################################ ### Load Source Functions ############################################ ############################################ source("peer-review-help.R") ############################################ ############################################ ### Load the data - "dat" is the name of the data matrix ############################################ ############################################ load("peer-review-data.rda") reviewed = dat[!is.na(dat$review_complete),] ############################################ ############################################ ### Calculate sample sizes ############################################ ############################################ # Number of participants per experiment colSums(table(dat$solver, dat$study_id) > 0) # Total number of solutions dim(dat)[1] # Total number of reviews nRev = sum(!is.na(dat$review_complete)) # Total number of solutions nSol = dim(dat) ############################################ ############################################ ### Calculate the % correct and the % correct among those ### acccepted. ############################################ ############################################ mean(reviewed$correct) mean(reviewed$correct[reviewed$accept==0]) mean(reviewed$correct[reviewed$accept==1]) ############################################ ############################################ ### Make a plot of the ratio of solving to ### reviewing time ############################################ ############################################ png(file="percent-solving.png",height=560*2,width=520*3) ### Plot solved problems par(mfcol=c(2,3),mar=rep(2.2,4)) cols = c(6,2,6,2,6,2) ii = 1 for(ee in c("exp1","exp2","exp3","exp4","exp5","exp6")){ tmpdat = reviewed[reviewed$study_id == ee,] subj = unique(tmpdat$solver) nsubj = length(subj) plot(1:2400,seq(0,1,length=2400),type="n",xaxt="n",xlab="Time (min)",ylab="% of Time Solving",cex.axis=1.5,cex.lab=1.5,main=ee) axis(1,at=c(0,(600*1:4)),label=c(0,10,20,30,40),cex=1.5,cex.lab=1.5) mm = 0 for(i in 1:nsubj){ tmpSolveTime = (2400 - tmpdat$solve_complete[tmpdat$solver==subj[i]]) tmpReviewTime = (2400 - tmpdat$review_complete[tmpdat$reviewer==subj[i]]) cuts = seq(0,2400,length=100) cumSolveTime = cumReviewTime = rep(NA,100) tmp = (tmpdat$solve_stop[tmpdat$solver==subj[i]] - tmpdat$solve_start[tmpdat$solver==subj[i]]) tmp2 = (tmpdat$review_stop[tmpdat$solver==subj[i]] - tmpdat$review_start[tmpdat$solver==subj[i]]) for(j in 1:100){ cumSolveTime[j] = sum(tmp[tmpSolveTime <= cuts[j]],na.rm=T) cumReviewTime[j] = sum(tmp2[tmpReviewTime <= cuts[j]],na.rm=T) } lines(cuts,cumSolveTime/(cumSolveTime + cumReviewTime),col=cols[ii],lwd=2) } ii = ii + 1 } dev.off() ################################################### ################################################### ### Calculate the improvement in acceptance probability ### for each additional review ################################################### ################################################### numberReviewed = numberAccepted = rep(NA, nRev) for(i in 1:nRev){ numberReviewed[i] = sum(reviewed$reviewer == reviewed$solver[i] & reviewed$study_id == reviewed$study_id[i] & reviewed$review_complete >= reviewed$review_complete[i]) numberAccepted[i] = sum(reviewed$reviewer == reviewed$solver[i] & reviewed$study_id == reviewed$study_id[i] & reviewed$review_complete >= reviewed$review_complete[i] & reviewed$accept==1) } uniqueReviewers = paste(reviewed$reviewer,reviewed$study_id) uniqueSolvers = paste(reviewed$solver,reviewed$study_id) acceptedIndicator = reviewed$accept studyType = as.character(reviewed$study_type) studyType = (studyType=="non-anon")*1 studyID = reviewed$study_id glmm1 = glmer(acceptedIndicator ~ numberAccepted*studyType + (1|as.factor(uniqueSolvers)) + (1 |as.factor(uniqueReviewers)) ,family="gaussian") p.values.lmer(glmm1) ################################################### ################################################### ### Calculate whether top reviewers won the game ################################################### ################################################### for(ee in c("exp1","exp2","exp3","exp4","exp5","exp6")){ print(ee) cat("\n") # Get the number of solutions accepted by each individual tmpAccepted = sapply(split(reviewed$accept[reviewed$study_id==ee],reviewed$reviewer[reviewed$study_id==ee]),sum) # Find the top 3 tmpBestReviewers = names(tmpAccepted)[rank(-tmpAccepted) <= 2] # Get the number of points for each individual tmpPoints= sapply(split(reviewed$accept[reviewed$study_id==ee],reviewed$solver[reviewed$study_id==ee]),sum) # Find the top 2 tmpHighestScorers = names(tmpPoints)[rank(-tmpPoints) < 2] # Calculate how many top reviewers are top scorers print(sum(tmpBestReviewers %in% tmpHighestScorers)) } ############################################ ############################################ ### Make the cooperation plots ############################################ ############################################ nCooperate = nObstruct = rep(NA,6) sz = 0.6 png(file = "cooperation.png",height=560*2,width=520*3) par(mfcol=c(2,3),mar=rep(5,4)) for(k in 1:6){ # Subset to a single experiment tmp1 = reviewed[reviewed$study_id == paste('exp',k,sep=""),] # Get some variables reviewTime = 2400 - tmp1$review_complete solveTime = 2400 - tmp1$solve_complete reviewer = tmp1$reviewer solver = tmp1$solver accept = tmp1$accept # Make reviewer and solver ids integers from 1:nPlayers ii = make.consecutive.int(c(reviewer,solver)) reviewer = ii[1:length(reviewer)] solver = ii[(length(solver)+1):length(ii)] reviewer = reviewer + 1 solver = solver + 1 nPlayers = max(c(solver,reviewer)) reviewer = reviewer[order(reviewTime)] solver = solver[order(reviewTime)] accept = accept[order(reviewTime)] tmpN = length(solver) mat = matrix(0,nPlayers,nPlayers) for(i in 1:nPlayers){ for(j in 1:nPlayers){ if(sum(reviewer == i & solver == j & accept ==1) > 0){ mat[i,j] = (mean(accept[reviewer==i & solver==j]) - mean(accept[reviewer==i])) } } } mat[is.nan(mat)] = 0 mat = mat*15 wt = abs(mat) + t(abs(mat)) out = cplotPicture(0,0,3,nPlayers,col="black",sz=sz,pch=as.character(k),cex=2,xaxt="n",yaxt="n",xlab="",ylab="",main=k) for(i in 1:nPlayers){ for(j in 1:i){ ci = "darkgrey" if(mat[i,j] > 0 & mat[j,i] > 0){ci = 2} if(mat[i,j] < 0 & mat[j,i] < 0){ci = 6} segments(out$x[i],out$y[i],out$x[j],out$y[j],lwd = wt[i,j],col=ci) } } for(i in 1:length(out$x)){ addlogo(manbw,c(out$x[i]-sz,out$x[i]+sz),c(out$y[i]-sz,out$y[i]+sz)) } nCooperate[k] = sum(mat > 0 & t(mat) > 0)/2 nObstruct[k] = sum(mat < 0 & t(mat) < 0)/2 } dev.off() ############################################ ############################################ ### Get statistical significance of cooperation ############################################ ############################################ # Calculate the number of cooperative interactions under the closed and open systems coopInteractOpen = sum(nCooperate[c(2,4,6)]) coopInteractClosed = sum(nCooperate[c(1,3,5)]) # Calculate the number of possible interactions under each system nClosedInteractions = sum(choose(colSums(table(dat$solver, dat$study_id) > 0)[c(1,3,5)],2)) nOpenInteractions = sum(choose(colSums(table(dat$solver, dat$study_id) > 0)[c(2,4,6)],2)) # Do a test that the proportions are the same prop.test(c(coopInteractOpen,coopInteractClosed),c(nOpenInteractions,nClosedInteractions)) ############################################ ############################################ ### Perform analysis of cooperative pairs/solvers ############################################ ############################################ correctReview = (reviewed$correct == reviewed$accept) # start by making 0-1 vectors indicating if each reviewed solution involved # a cooperative solver-reviewer pair, or a solver involved in a cooporative pair coopPair<-rep(0, nrow(reviewed)) coopSolver<-rep(0, nrow(reviewed)) for(k in 1:6){ # Subset to a single experiment tmp1 = reviewed[reviewed$study_id == paste('exp',k,sep=""),] tmpIdx<-which(reviewed$study_id == paste('exp',k,sep="")) # Get some variables reviewTime = 2400 - tmp1$review_complete solveTime = 2400 - tmp1$solve_complete reviewer = tmp1$reviewer solver = tmp1$solver accept = tmp1$accept # Make reviewer and solver ids integers from 1:nPlayers ii = make.consecutive.int(c(reviewer,solver)) reviewer = ii[1:length(reviewer)] solver = ii[(length(solver)+1):length(ii)] reviewer = reviewer + 1 solver = solver + 1 nPlayers = max(c(solver,reviewer)) mat = matrix(0,nPlayers,nPlayers) for(i in 1:nPlayers){ for(j in 1:nPlayers){ if(sum(reviewer == i & solver == j & accept ==1) > 0){ mat[i,j] = (mean(accept[reviewer==i & solver==j]) - mean(accept[reviewer==i])) } } } mat[is.nan(mat)] = 0 mat = mat*15 coopPairReview<-rep(0, length(accept)) coopSolverReview<-rep(0, length(accept)) for(i in 1:nPlayers){ for(j in 1:i){ if(mat[i,j] > 0 & mat[j,i] > 0){ coopPairReview[reviewer==i & solver==j]<-1 coopPairReview[reviewer==j & solver==i]<-1 coopSolverReview[solver %in% c(i,j)]<-1 } } } coopPair[tmpIdx]<-coopPairReview coopSolver[tmpIdx]<-coopSolverReview } ### Focus on cooperative pair -- seems to have an effect on accuracy glmmCoop = glmer(correctReview ~ coopPair + (1 |uniqueReviewers) + (1 |uniqueSolvers),family="gaussian") p.values.lmer(glmmCoop) ### Focus on solver from cooperative pair -- does not seem to have an effect on accuracy glmmCoopS = glmer(correctReview ~ coopSolver + (1 |uniqueReviewers) + (1 |uniqueSolvers),family="gaussian") p.values.lmer(glmmCoopS) ### The pair seems to have an effect above that of the solver alone coopComp<-factor(coopSolver+coopPair) glmmCoopInt = glmer(correctReview ~ coopComp + (1 |uniqueReviewers) + (1 |uniqueSolvers),family="gaussian") p.values.lmer(glmmCoopInt) ################################################### ################################################### ### Calculate the estimated accuracy differences ### between the two groups ################################################### ################################################### glmmAcc = glmer(correctReview ~ studyType + (1 |uniqueReviewers) + (1 |uniqueSolvers),family="gaussian") p.values.lmer(glmmAcc)