#--------- Supplementary R code for the paper "Longitudinal Functional Principal Component Analysis ------------------# ########################################################################################## ### R function LFPCA implementing LFPCA for model (2) ### ########################################################################################## LFPCA <- function(Y, # an n x D matrix with rows=subject-visits and columns=locations (along curve) subject, # a vector of length n containing subject identifiers for rows of Y T, # a vector of length n containing the covariate for the random slope L = 0.90, # the pre-specified level of variance explained, determines number of components N.X = NA, # the number of components to keep for X; N.X and N.U override L if not NA N.U = NA, # the number of components to keep for U; N.X and N.U override L if not NA smooth = FALSE, # smooth=TRUE adds smoothing of the covariance matrices not done for smooth=FALSE bf = 10 # the number of basis functions used per dimension for all smooths ){ require(mgcv) require(Matrix) ### checks for consistency of input ### if (nrow(Y) != length(subject)){ stop("The number of rows in Y needs to agree with the length of the subject vector") } if (nrow(Y) != length(time)){ stop("The number of rows in Y needs to agree with the length of the time vector") } if (!is.na(L)){ if (L > 1 | L < 0){ stop("The level of explained variance needs to be between 0 and 1") } } if (is.na(L) & (is.na(N.X) | is.na(N.U) | N.X < 0 | N.U < 0)){ stop("If L is NA, both N.X and N.U need to be integers") } if (is.na(N.X) & !is.na(N.U)){ warning("As N.X is missing, N.U will not be used. Default to variance explained.") } if (is.na(N.U) & !is.na(N.X)){ warning("As N.U is missing, N.X will not be used. Default to variance explained.") } if (!is.na(N.X)){ if (N.X > floor(N.X)){ warning("N.X has to be an integer. Will use rounded value.") N.X <- round(N.X) } } if (!is.na(N.U)){ if (N.U > floor(N.U)){ warning("N.U has to be an integer. Will use rounded value.") N.U <- round(N.U) } } if (!is.na(L) & (!is.na(N.X) | !is.na(N.U))){ warning("N.X and N.U will override choice of variance explained L") } ### set up ### D <- ncol(Y) # number of points per curve n <- nrow(Y) # overall number of visits time <- (T-mean(T)) / sqrt(var(T)) # standardize the time variable d.vec <- rep(1:D, each = n) time.vec <- rep(time, D) J.vec <- sapply(unique(subject[order(subject)]), function(subj){sum(subject == subj)}) # number of visits for each subject I <- length(J.vec) # number of subjects m <- sum(J.vec^2) # overall number of visit pairs within subjects ### estimate mean function eta(d, T_ij) and overall mean eta0(d) ### gam1 <- gamm(as.vector(Y) ~ te(d.vec, time.vec, k = bf)) # overall fixed effects surface eta(d, T) gam0 <- gamm(as.vector(Y) ~ s(d.vec, k = bf)) # time invariant mean function eta(d) for plotting eta.matrix <- matrix(predict(gam1\$gam), n, D) # mean function eta(d, T) at grid points for plotting Y.tilde <- Y - eta.matrix # centered Y (residuals) ### estimate covariance functions using least squares ### G.0 <- G.01 <- H.01 <- G.1 <- G.U <- matrix(0, D, D) # set up empty covariance matrices diago <- rep(NA, D) # set up empty diagonal for K.U i1 <- function(i){ # find all j for indicator vectors for all i-(j,k) pairs before <- sum(J.vec[1:(i-1)]) * (i > 1) rep(before + (1:J.vec[i]), each = J.vec[i]) } i2 <- function(i){ # find all k for indicator vectors for all i-(j,k) pairs before <- sum(J.vec[1:(i-1)]) * (i > 1) rep(before + (1:J.vec[i]), J.vec[i]) } ind1 <- as.vector(unlist(sapply(1:I, i1))) # indicator vectors for j in all i-(j,k) pairs ind2 <- as.vector(unlist(sapply(1:I, i2))) # indicator vectors for k in all i-(j,k) pairs X <- cbind(rep(1, m), time[ind2], time[ind1], time[ind1] * time[ind2], (ind1 == ind2) * 1) # set up design matrix for linear regression according to (5) c <- matrix(unlist(lapply(1:(D-1), FUN = function(s){Y.tilde[ind1, s] * Y.tilde[ind2, (s+1):D]})), length(ind1), D * (D-1) / 2) # set up outcome vector (empirical covariances) for linear regression beta <- solve(crossprod(X), t(X) %*% c) # estimate covariances K.U(s,t) and K.X(s,t) (containing K.0(s,t), K.1(s,t), K.01(s,t) and K.01(t,s)) for all s != t using linear regression based on (5) Xss <- cbind(X[, 1], X[, 2] + X[, 3], X[, 4], X[, 5])[(ind1 >= ind2), ] # set up design matrix according to (5) css <- sapply(1:D, FUN = function(s){Y.tilde[ind1[(ind1 >= ind2)], s] * Y.tilde[ind2[(ind1 >= ind2)], s]}) # set up outcome vector betass <- solve(crossprod(Xss), t(Xss) %*% css) # estimate covariances K.U(s,s) and K.X(s,s) (containing K.0(s,s), K.1(s,s) and K.01(s,s)) for all s using linear regression based on (5) G.0[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[1, ] # estimates for K.0(s,t) are in the 1st column G.0 <- G.0 + t(G.0) # symmetry constraints yield K.0(s,t) = K.0(t,s) G.1[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[4, ] # estimates for K.1(s,t) are in the 4th column G.1 <- G.1 + t(G.1) # symmetry constraints yield K.1(s,t) = K.1(t,s) G.U[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[5, ] # estimates for K.U(s,t) are in the 5th column G.U <- G.U + t(G.U) # symmetry constraints yield K.U(s,t) = K.U(t,s) H.01[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[2, ] # estimates for K.10(s,t) are in the 2nd column G.01[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[3, ] # estimates for K.01(s,t) are in the 3rd column G.01 <- G.01 + t(H.01) # symmetry constraints yield K.01(s,t) = K.10(t,s) diag(G.0) <- betass[1, ] # estimates for K.0(s,s) are in the 1st column diag(G.1) <- betass[3, ] # estimates for K.1(s,s) are in the 3rd column diag(G.01) <- betass[2, ] # estimates for K.01(s,s) are in the 2nd column diago <- betass[4, ] # estimates for K.U(s,s)+sigma^2 are in the 4th column diag(G.U) <- rep(NA, D) # do not use diagonal K.U(s,s)+sigma^2 in smoothing K.U ### smoothing of covariance functions, estimation of sigma^2 ### row.vec <- rep(1:D, each = D) # set up row variable for bivariate smoothing col.vec <- rep(1:D, D) # set up column variable for bivariate smoothing if (smooth == TRUE){ # if smoothing is selected (corresponds to method described in the paper): K.0 <- matrix(predict(gamm(as.vector(G.0) ~ te(row.vec, col.vec, k = bf))\$gam), D, D) # smooth K.0 K.0 <- (K.0 + t(K.0)) / 2 # after smoothing, symmetrize K.0 K.1 <- matrix(predict(gamm(as.vector(G.1) ~ te(row.vec, col.vec, k = bf))\$gam), D, D) # smooth K.1 K.1 <- (K.1 + t(K.1)) / 2 # after smoothing, symmetrize K.1 K.01 <- matrix(predict(gamm(as.vector(G.01) ~ te(row.vec, col.vec, k = bf))\$gam), D, D) # smooth K.01 K.U <- matrix(predict(gamm(as.vector(G.U) ~ te(row.vec, col.vec, k = bf))\$gam, newdata = data.frame(row.vec = row.vec, col.vec = col.vec)), D, D) # smooth K.U K.U <- (K.U + t(K.U)) / 2 # after smoothing, symmetrize K.U } else { # if no smoothing is selected (faster): K.U <- G.U # do not smooth K.U (off-diagonal) K.0 <- G.0 # do not smooth K.0 K.01 <- G.01 # do not smooth K.01 K.1 <- G.1 # do not smooth K.1 diag(K.U) <- predict(gamm(as.vector(K.U) ~ te(row.vec, col.vec, k = bf))\$gam, newdata = data.frame(row.vec = 1:D, col.vec = 1:D)) # only separate diagonal K.U(s,s) + sigma^2 into K.U(s,s) and sigma^2 using bivariate smoothing } K.X <- rbind(cbind(K.0, K.01), cbind(t(K.01), K.1)) # put together K.X, containing K.0, K.01, K.10 and K.1 sigma2.hat <- max(mean(diago - diag(K.U)), 0) # estimate sigma^2 as mean difference between diagonal # K.U(s,s) + sigma^2 and smoothed K.U(s,s) (0, if smaller 0 otherwise) ### estimate eigenfunctions, compute variance explained, N.X and N.U ### lambda.hat <- eigen(K.X, symmetric = TRUE, only.values = TRUE)\$values # eigenvalues of K.X yield estimates for the lambda_k nu.hat <- eigen(K.U, symmetric = TRUE, only.values = TRUE)\$values # eigenvalues of K.U yield estimates for the nu_k total.variance <- sum(lambda.hat * (lambda.hat > 0)) + sum(nu.hat * (nu.hat > 0)) + sigma2.hat # the total average variance is the sum of all variance terms lambda_k, nu_k and sigma^2 according to Lemma 1 if (is.na(N.X) | is.na(N.U)){ # if no values for the numbers of principal components N.X and N.U are specified: prop <- N.X <- N.U <- 0 while(prop < L){ # add components for X or U with decreasing variance, # until level L of explained average variance is reached if (lambda.hat[N.X + 1] >= nu.hat[N.U + 1]){ N.X <- N.X + 1 } else { N.U <- N.U + 1 } prop <- (sum(lambda.hat[1:N.X]) + sum(nu.hat[1:N.U])) / total.variance # update explained average variance } } explained.variance <- (sum(lambda.hat[1:N.X]) + sum(nu.hat[1:N.U])) / total.variance # explained average variance lambda.hat <- lambda.hat[1:N.X] # keep first N.X values in lambda.hat nu.hat <- nu.hat[1:N.U] # keep first N.U values in nu.hat phi.X <- eigen(K.X, symmetric = TRUE)\$vectors[, 1:N.X] # eigenvectors of K.X yield estimated for eigenfunctions phi.X_k phi.U <- eigen(K.U, symmetric = TRUE)\$vectors[, 1:N.U] # eigenvectors of K.U yield estimated for eigenfunctions phi.U_k phi.0 <- phi.X[1:D, ] # phi.X_k = (phi.0_k, phi.1_k) phi.1 <- phi.X[(D+1):(2*D), ] ### estimate scores ### subject.ind <- unlist(sapply(subject, function(su){which(unique(subject[order(subject)]) == su)})) Z.X <- kronecker(Diagonal(I)[subject.ind, ] , phi.0) + kronecker(Diagonal(n, time) %*% Diagonal(I)[subject.ind, ], phi.1) # set up the matrices for BLUP Z.U <- kronecker(Diagonal(n) , phi.U) # estimation using the Woodbury Z <- cBind(Z.X, Z.U) # formula D.inv <- Diagonal(N.X*I + N.U*n, c(rep(1 / lambda.hat, I), rep(1 / nu.hat, n))) b.hat <- solve(crossprod(Z) + sigma2.hat * D.inv, t(Z) %*% as.vector(t(Y.tilde))) # estimate scores according to Thm. 2 xi.hat <- t(matrix(b.hat[1:(N.X*I)], N.X, I)) # b.hat contains first xi.hat, zeta.hat <- t(matrix(b.hat[(N.X*I) + (1:(N.U*n))], N.U, n)) # then zeta.hat ### return results ### results <- list(Y = Y, subject = subject, time = time, eta = gam1\$gam, eta0 = gam0\$gam, eta.matrix = eta.matrix, phi.0 = phi.0, phi.1 = phi.1, phi.U = phi.U, K.X = K.X, K.U = K.U, sigma2 = sigma2.hat, lambda = lambda.hat, nu = nu.hat, xi = xi.hat, zeta = zeta.hat, N.X = N.X, N.U = N.U, L = L, totvar = total.variance, exvar = explained.variance ) return(results) } ########################################################################################## ### R function plot.LFPCA plotting results from fitting model (2) using function LFPCA ### ########################################################################################## plot.LFPCA <- function(res, # a results file from LFPCA outpdf, # a pdf file to which to plot the results group = rep(1, nrow(res\$Y)) # a grouping variable for the boxplots of the eigenscores (optional) ){ D <- ncol(res\$Y) # the number of points per curve group2 <- unique(cbind(res\$subject, group))[, 2] # the grouping variable per subject (not visit) eta <- predict(res\$eta0, newdata = data.frame(d.vec = 1:D)) # the time invariant mean function for plotting lu <- range(cbind(matrix(eta, D, res\$N.X) - 2*res\$phi.0 %*% diag(sqrt(res\$lambda)), # limits for plotting matrix(eta, D, res\$N.X) + 2*res\$phi.0 %*% diag(sqrt(res\$lambda)), matrix(eta, D, res\$N.X) - 2*res\$phi.1 %*% diag(sqrt(res\$lambda)), matrix(eta, D, res\$N.X) + 2*res\$phi.1 %*% diag(sqrt(res\$lambda)), matrix(eta, D, res\$N.U) - 2*res\$phi.U %*% diag(sqrt(res\$nu )), matrix(eta, D, res\$N.U) + 2*res\$phi.U %*% diag(sqrt(res\$nu )))) plot.ef <- function(ev, ef, yl, var){ # for given eigenfunctions and corresponding eigenvalues: plot(1:D, eta, type = "l", xlab = "d", ylab = yl, ylim = lu, main = paste(round(var), "% variance", sep = "")) # plot mean function points(eta + 2*sqrt(ev)*ef, pch = "+") # plus / minus 2 times the standard deviation times the points(eta - 2*sqrt(ev)*ef, pch = "-") # corresponding eigenfunction } pdf(file = outpdf) plot(res\$eta, main = "Estimated mean function", xlab = "d", ylab = "T") # plot estimated mean profile eta(d,T) plot(res\$eta0, main = "Estimated time-constant mean function", xlab = "d", ylab = expression(eta[0](d))) # plot estimated time-constant mean function eta(d) par(mfrow = c(ceiling(sqrt(res\$N.X)), ceiling(res\$N.X / ceiling(sqrt(res\$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.X){ # for each eigenfunction, plot boxplots of subject-specific scores by the grouping variable boxplot(res\$xi[, k] ~ group2, ylab = eval(substitute(expression(hat(xi)[i][j]), list(j = k)))) abline(h = 0, col = 8) } par(mfrow = c(ceiling(sqrt(res\$N.X)), ceiling(res\$N.X / ceiling(sqrt(res\$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.X){ # for each phi.0_k, plot mean function +/- 2 the standard deviation sqrt(lambda_k) times phi.0_k plot.ef(ev = res\$lambda[k], ef = res\$phi.0[, k], yl = substitute(hat(phi)[k]^0, list(k = k)), res\$phi.0[, k] %*% res\$phi.0[, k] * res\$lambda[k] / res\$totvar * 100) } par(mfrow = c(ceiling(sqrt(res\$N.X)), ceiling(res\$N.X / ceiling(sqrt(res\$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.X){ # for each phi.1_k, plot mean function +/- 2 the standard deviation sqrt(lambda_k) times phi.1_k plot.ef(ev = res\$lambda[k], ef = res\$phi.1[, k], yl = substitute(hat(phi)[k]^1, list(k = k)), res\$phi.1[, k] %*% res\$phi.1[, k] * res\$lambda[k] / res\$totvar * 100) } par(mfrow = c(ceiling(sqrt(res\$N.X)), ceiling(res\$N.X / ceiling(sqrt(res\$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.X){ # plot each phi.0_k plot(res\$phi.0[, k], type = "l", xlab = "d", ylab = substitute(hat(phi)[k]^0, list(k = k)), main = paste(round(res\$phi.0[, k] %*% res\$phi.0[, k] * res\$lambda[k] / res\$totvar * 100), "% variance", sep = "")) } par(mfrow = c(ceiling(sqrt(res\$N.X)), ceiling(res\$N.X / ceiling(sqrt(res\$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.X){ # plot each phi.1_k plot(res\$phi.1[, k], type = "l", xlab = "d", ylab = substitute(hat(phi)[k]^1, list(k = k)), main = paste(round(res\$phi.1[, k] %*% res\$phi.1[, k] * res\$lambda[k] / res\$totvar * 100), "% variance", sep = "")) } par(mfrow = c(ceiling(sqrt(res\$N.U)), ceiling(res\$N.U / ceiling(sqrt(res\$N.U)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.U){ # for each eigenfunction, plot boxplots of subject-visit-specific scores by the grouping variable boxplot(res\$zeta[, k] ~ group, ylab = eval(substitute(expression(hat(zeta)[ij][l]), list(l = k)))) abline(h = 0, col = 8) } par(mfrow = c(ceiling(sqrt(res\$N.U)), ceiling(res\$N.U / ceiling(sqrt(res\$N.U)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.U){ # for each phi.U_k, plot mean function +/- 2 the standard deviation sqrt(nu_k) times phi.U_k plot.ef(ev = res\$nu[k], ef = res\$phi.U[, k], yl = substitute(hat(phi)[k]^U, list(k = k)), res\$nu[k] / res\$totvar * 100) } par(mfrow = c(ceiling(sqrt(res\$N.U)), ceiling(res\$N.U / ceiling(sqrt(res\$N.U)))), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:res\$N.U){ # plot each phi.U_k plot(res\$phi.U[, k], type = "l", xlab = "d", ylab = substitute(hat(phi)[k]^U, list(k = k)), main = paste(round(res\$nu[k] / res\$totvar * 100), "% variance", sep = "")) } par(mfrow = c(1, 1)) # plot all variances in one plot plot(res\$lambda, ylim = c(0, max(res\$lambda)), ylab = "Estimated variance components", xaxt = "n", xlab = "") points(res\$nu, pch = "+") points(res\$sigma2, pch = 15) legend("topright", pch = c(1, 3, 15), legend = expression(hat(lambda), hat(nu), hat(sigma)^2)) dev.off() } ########################################################################################## ### example code calling function LFPCA used for the simulations detailed in Section 4 ### ########################################################################################## library(orthopolynom) ### set parameters for simulation ### rep <- 2 #1000 # number of replications in the simulation parms <- 1 #determines one out of 64 settings A <- matrix(NA, 5, 64) # set up a matrix A that contains all parameter combinations, then select corresponding column count <- 1 for (smoothing in c("ns", "sm")){ for (I in c(50, 100, 200, 500)){ for (balanced in c("ba", "ub")){ for (orthogonal in c("or", "no")){ for (normal in c("nor", "mix")){ A[, count] <- c(I, balanced, orthogonal, smoothing, normal) count <- count + 1 } } } } } I <- as.numeric(A[1, parms]) # I - number of subjects (50, 100, 200 or 500) balanced <- A[2, parms] # balanced ("ba") or unbalanced ("ub") design orthogonal <- A[3, parms] # orthogonal ("no") / nonorthogonal ("or") bases for X_0 and X_1 smoothing <- A[4, parms] # smoothing ("sm") or no smoothing ("ns") of covariance matrices normal <- A[5, parms] # normal ("nor") or mixture ("mix") distribution for scores name <- paste("2_", I, balanced, "_", orthogonal, smoothing, normal, sep = "") # indicates parameters in output files J.vec <- rep(4, I) # if balanced design, all subjects have 4 visits if (balanced == "ub"){ # if unbalanced design, 1 to 9 visits ncl <- (I / 50) * c(2, 3, 4, 5, 5, 6, 9, 8, 8) J.vec <- rep(9:1, ncl) } D <- 120 # number of points per curve n <- sum(J.vec) # overall number of visits sigma <- 0.05 # sigma^2 is the variance of the errors eps_ij lambda <- nu <- 0.5^((1:4) - 1) # variances of the scores N.X <- length(lambda) # number of non-zero variances for X N.U <- length(nu) # number of non-zero variances for U subject <- rep(1:I, J.vec) # vector with subject-IDs myfun <- function(j){ t <- 0 if (j > 1){ for (j in 2:j){ t <- cbind(t, sum(t) + runif(1)) } } return(t - mean(t)) } time <- unlist(lapply(J.vec, FUN = myfun)) # generate visit times with uniform increments time <- time / sqrt(var(time)) # standardize time variable etaf <- function(t, d){ # define mean function eta(d,T) (t/4 - d/D)^2 / 2 } eta <- outer(time, 1:D, etaf) # compute values eta(d,T_ij) if (orthogonal == "or"){ # define functions phi.0_k, phi.1_k, phi.U_k phi0 <- function(d, k){ 1/sqrt(D) * sqrt(3/2) * ((k%%2) * sin((k + k%%2) * pi * (d - 0.5)/D) + (1 - k%%2) * cos((k + k%%2) * pi * (d - 0.5)/D)) } phi1 <- function(d, k){ as.function(legendre.polynomials(k, normalized = TRUE)[[k]])((2*(d - 0.5)/D - 1))/sqrt(D) * (sqrt(1/2)) } phiU <- function(d, k){ if (k == 1){ phi1(d, k) * sqrt(4) } else { phi0(d, k-1) * sqrt(4/3) } } } else { phi0 <- function(d, k){ 1/sqrt(D) * ((k%%2) * sin((k + k%%2) * pi * (d - 0.5)/D) + (1 - k%%2) * cos((k + k%%2) * pi * (d - 0.5)/D)) } phi1 <- function(d, k){ ((k == 1)/sqrt(2 * D) + (k == 2) * sin(6 * pi * (d - 0.5)/D)/sqrt(D) + (k == 3) * cos(6 * pi * (d - 0.5)/D)/sqrt(D) + (k == 4) * sin(8 * pi * (d - 0.5)/D)/sqrt(D)) } phiU <- function(d, k){ as.function(legendre.polynomials(k, normalized = TRUE)[[k]])((2 * (d - 0.5)/D - 1))/sqrt(D/2) } } phiU.matrix <- matrix(NA, N.U, D) phi1.matrix <- phi0.matrix <- matrix(NA, N.X, D) for (k in 1:N.U){ # compute phi.0_k(d), phi.1_k(d), phi.U_k(d) phiU.matrix[k, ] <- phiU(1:D, k) # for all k and d phi0.matrix[k, ] <- phi0(1:D, k) phi1.matrix[k, ] <- phi1(1:D, k) } phiX.matrix <- cbind(phi0.matrix, phi1.matrix) xi <- xi.hat <- array(NA, dim = c(I, N.X, rep)) # set up empty vectors and matrices for all zeta <- zeta.hat <- array(NA, dim = c(n, N.U, rep)) # for all quantities to be estimated in phiU.hat <- array(NA, dim = c(D, N.U, rep)) # simulations phi0.hat <- phi1.hat <- array(NA, dim = c(D, N.X, rep)) eta.hat <- Y.matrix <- array(NA, dim = c(n, D, rep)) sigma2.hat <- rep(NA, rep) lambda.hat <- matrix(NA, N.X, rep) nu.hat <- matrix(NA, N.U, rep) r <- 1 errors <- 0 # count any errors that might occur while (r <= rep){ # do rep simulation repetitions without errors if (r%%(rep / 10) == 0){ print(paste("Simulation", r * 100 / rep, "% done")) # give out progress report } ### generate one simulated set of Ys ### if (normal == "nor"){ # simulate scores accoring to normal zeta[, , r] <- matrix(rnorm(N.U * n), n, N.U) %*% diag(sqrt(nu)) xi[, , r] <- matrix(rnorm(N.X * I), I, N.X) %*% diag(sqrt(lambda)) } else { # or mixture distribution, as selected zeta[, , r] <- matrix(rnorm(N.U * n), n, N.U) %*% diag(sqrt(nu / 2)) + matrix(2 * rbinom(n * N.U, 1, 0.5) - 1, n, N.U) %*% diag(sqrt(nu / 2)) xi[, , r] <- matrix(rnorm(N.X * I), I, N.X) %*% diag(sqrt(lambda / 2)) + matrix(2 * rbinom(I * N.X, 1, 0.5) - 1, I, N.X) %*% diag(sqrt(lambda / 2)) } U <- zeta[, , r] %*% phiU.matrix # generate visit-specific deviations X.0 <- xi[rep(1:I, J.vec), , r] %*% phi0.matrix # generate random functional intercept X.1 <- xi[rep(1:I, J.vec), , r] %*% phi1.matrix # generate random functional slope eps <- matrix(rnorm(D * n, 0, sigma), n, D) # generate measurement error Y <- eta + X.0 + X.1 * matrix(rep(time, D), n, D) + U + eps # compute observations according to model (2) ### plot example Ys ### if (r == 1){ # plot some example Y's in the first iteration pdf(file = paste("../results/LFPCA_", name, "_subjects.pdf", sep = "")) for (i in 1:6){ Y.plot <- Y[(subject == i), ] plot(c(1, 120), c(min(Y.plot), max(Y.plot)), col = 0, xlab = "", ylab = "", main = paste("Subject ", i, sep = "")) for (j in 1:J.vec[i]){ lines(Y.plot[j, ], type = "l", col = j) } legend(x = "bottomleft", col = c(1:J.vec[i]), lty = 1, legend = 1:J.vec[i], title = "Visit")} dev.off() } ### LPFCA ### results <- try(LFPCA(Y = Y, subject = subject, T = time, N.X = N.X, N.U = N.U, L = NA, smooth = (smoothing == "sm"))) # use LFPCA to estimate all quantities if(class(results) == "try-error" ){ # if an error occured, count error, save relevant data, and repeat iteration save(Y, time, subject, file = paste("LFPCA_error_", name, "_", r, ".Rdata", sep = "")) errors <- errors + 1 next } ### as eigenfunctions are only unique up to the sign, flip eigenfunctions to minimize ### ### distance to true function, if necessary ### soq <- function(x, y){ # sums of squares to compare distances t(x - y) %*% (x - y) } for (k in 1:N.U){ flip <- (soq(phiU.matrix[k, 1:D], results\$phi.U[, k]) > soq(phiU.matrix[k, 1:D], - results\$phi.U[, k])) if (flip){ # if distance to true function would be smaller when eigenfunction were flipped, flip it (and the scores) results\$phi.U[, k] <- - results\$phi.U[, k] results\$zeta[, k] <- - results\$zeta[, k] } } phiX.hat <- rbind(results\$phi.0, results\$phi.1) for (k in 1:N.X){ #analogous for the eigenfunctions for X (flip phi.0_k and phi.1_k together) flip <- (soq(phiX.matrix[k, ], phiX.hat[, k]) > soq(phiX.matrix[k, ], - phiX.hat[, k])) if (flip){ results\$phi.0[, k] <- - results\$phi.0[, k] results\$phi.1[, k] <- - results\$phi.1[, k] results\$xi[, k] <- - results\$xi[, k] } } ### store results ### phi0.hat[, , r] <- results\$phi.0 phi1.hat[, , r] <- results\$phi.1 phiU.hat[, , r] <- results\$phi.U xi.hat[, , r] <- results\$xi zeta.hat[, , r] <- results\$zeta eta.hat[, , r] <- results\$eta.matrix Y.matrix[, , r] <- Y lambda.hat[, r] <- results\$lambda nu.hat[, r] <- results\$nu sigma2.hat[r] <- results\$sigma2 r <- r + 1 # go to next iteration } print(paste(errors, "errors out of", rep, "replications")) # print out the number of errors for future reference ### save results ### save(Y.matrix, time, subject, phi0.matrix, phi1.matrix, phiU.matrix, phi0.hat, phi1.hat, phiU.hat, eta, eta.hat, xi, xi.hat, zeta, zeta.hat, lambda, nu, lambda.hat, nu.hat, sigma, sigma2.hat, file = paste("LFPCA_", name, ".Rdata", sep = "")) #load(file = paste("LFPCA_", name, ".Rdata", sep = "")); rep <- dim(phi0.hat)[3] ### plot simulation results ### plotresults <- function(f, # the true function g, # an array containing the function estimates ind # the index - U for phi.U, 0 for phi.0, 1 for phi.1 ){ for (k in 1:4){ lims <- 0.18 + 0.07 * (ind == "U") plot(eval(f(1:D, k)), type = "l", ylab = substitute(phi[k]^ind, list(k = k, ind = ind)), xlab = "", ylim = c(-lims, lims)) # plot true fctn for (r in 1:min(rep, 100)){ lines(eval(g(1:D, k, r)), col = 8) # plot first 100 estimates } lines(eval(f(1:D, k)), lwd = 2) means <- sapply(1:D, function(d){mean(g(d, k, 1:rep))}) # plot mean of all estimated functions lines(means, col = 2) abline(h = 0, col = 3) quants <- sapply(1:D, function(d){quantile(g(d, k, 1:rep), probs = c(0.05, 0.95), type = 8)}) for (i in 1:2){ lines((quants[i, ]), col = 4) # plot 5th and 95th percentile of estimated functions } } } time1 <- unique(time[order(time)]) find.t.ind <- function(row){ which(time == time1[row])[1] } t.ind <- sapply(1:length(time1), find.t.ind) eta.mean <- apply(eta.hat[t.ind, , ], c(1, 2), FUN = "mean") # compute mean of estimated mean functions eta, and eta.q5 <- apply(eta.hat[t.ind, , ], c(1, 2), FUN = function(x){quantile(x, probs = c(0.05), type = 8)}) # 5th and eta.q95 <- apply(eta.hat[t.ind, , ], c(1, 2), FUN = function(x){quantile(x, probs = c(0.95), type = 8)}) # 95th quantile pdf(file = paste("../results/LFPCA_", name, ".pdf", sep = "")) ### plot eigenfunctions ### par(mfrow = c(2, 2), mar = c(5.1, 5.1, 1.1, 1.1)) plotresults(phi0, function(d, k, r){phi0.hat[d, k, r]}, "0") # plots of estimated and true functions for phi.0, plotresults(phi1, function(d, k, r){phi1.hat[d, k, r]}, "1") # phi.1, plotresults(phiU, function(d, k, r){phiU.hat[d, k, r]}, "U") # and phi.U ### plot mean function eta ### par(mfrow = c(1, 1)) contour(1:D, time1, t(outer(time1, 1:D, etaf)), levels = seq(0, 1, 0.1), xlab = "d", ylab = "time") # contour plot of the true eta, contour(1:D, time1, t(eta.mean), col = 2, add = TRUE, levels = seq(0, 1, 0.1)) # mean of estimated functions contour(1:D, time1, t(eta.q5), col = 3, add = TRUE, levels = seq(0, 1, 0.1)) # and of 5th and contour(1:D, time1, t(eta.q95), col = 4, add = TRUE, levels = seq(0, 1, 0.1)) # 95th pointwise quantiles ### plot scores ### par(mfrow = c(2, 2), mar = c(5.1, 5.1, 1.1, 1.1)) for (k in 1:4){ boxplot((xi.hat - xi)[, k, ] / sqrt(lambda[k]), (zeta.hat - zeta)[, k, ] / sqrt(nu[k]), ylim = c(-4 + 1.7 * (I > 50), 4 - 1.7 * (I > 50)), names = eval(substitute(expression((hat(xi)[i][l] - xi[i][l]) / sqrt(lambda[l]), (hat(zeta)[ij][l] - zeta[ij][l]) / sqrt(nu[l])), list(l = k)))) # boxplots of standardized score estimates points(1, mean((xi.hat - xi)[, k, ] / sqrt(lambda[k])), pch = "x", col = 2) # add corresponding means points(2, mean( (zeta.hat - zeta)[, k, ] / sqrt(nu[k])), pch = "x", col = 2) abline(h = 0, col = 3) } for (k in 1:4){ # scatter plot of estimated versus true scores xi plot(as.vector(xi[1:3, k, ]), as.vector(xi.hat[1:3, k, ]), xlab = eval(substitute(expression(xi[i][l], list(l = k)))), ylab = eval(substitute(expression(hat(xi)[i][l], list(l = k))))) abline(coef = 0:1, col = 2) } for (k in 1:4){ # scatter plot of estimated versus true scores zeta plot(zeta[1:3, k, ], zeta.hat[1:3, k, ], xlab = substitute(zeta[i][l], list(l = k)), ylab = substitute(hat(zeta)[i][l], list(l = k))) abline(coef = 0:1, col = 2) } ### plot variances ### for (k in 1:4){ # boxplots of estimated variances lambda_k and nu_k boxplot(lambda.hat[k, ], nu.hat[k, ], ylim = c(min(c(lambda.hat[k, ], lambda[k], nu.hat[k, ], nu[k])), max(c(lambda.hat[k, ], lambda[k], nu.hat[k, ], nu[k]))), col = 8, main = "", ylab = "Estimated variance", names = eval(substitute(expression(lambda[i], nu[i]), list(i = k)))) points(1, mean(lambda.hat[k, ]), pch = "x", col = 2) # add means of estimates points(2, mean(nu.hat[k, ]), pch = "x", col = 2) abline(h = lambda[k], col = 1, lwd = 2) # indicate true value lambda_k = nu_k by a horizontal line abline(h = 0, col = 3) } par(mfrow = c(1, 1)) # boxplots of estimated variance sigma^2 boxplot(sigma2.hat, ylim = c(min(c(sigma2.hat, sigma^2)), max(c(sigma2.hat, sigma^2))), col = 8, main = "", ylab = expression(hat(sigma)^2)) points(1, mean(sigma2.hat), pch = "x", col = 2) # add mean of estimates abline(h = sigma^2, col = 1, lwd = 2) # indicate true value sigma^2 by a horizontal line abline(h = 0, col = 3) dev.off()