#--------- Supplementary R code for the paper "Longitudinal Functional Principal Component Analysis ------------------#

##########################################################################################
### R function LFPCA implementing LFPCA for model (2)                                  ###
##########################################################################################

LFPCA <- function(Y,               # an n x D matrix with rows=subject-visits and columns=locations (along curve)
                  subject,         # a vector of length n containing subject identifiers for rows of Y
                  T,               # a vector of length n containing the covariate for the random slope
                  L = 0.90,        # the pre-specified level of variance explained, determines number of components   
                  N.X = NA,        # the number of components to keep for X; N.X and N.U override L if not NA
                  N.U = NA,        # the number of components to keep for U; N.X and N.U override L if not NA
                  smooth = FALSE,  # smooth=TRUE adds smoothing of the covariance matrices not done for smooth=FALSE    
                  bf = 10          # the number of basis functions used per dimension for all smooths
                  ){
require(mgcv)
require(Matrix)

### checks for consistency of input ###
if (nrow(Y) != length(subject)){
   stop("The number of rows in Y needs to agree with the length of the subject vector")
}
if (nrow(Y) != length(time)){
   stop("The number of rows in Y needs to agree with the length of the time vector")
}
if (!is.na(L)){
   if (L > 1 | L < 0){
      stop("The level of explained variance needs to be between 0 and 1")
   }
}
if (is.na(L) & (is.na(N.X) | is.na(N.U) | N.X < 0 | N.U < 0)){
   stop("If L is NA, both N.X and N.U need to be integers")
}
if (is.na(N.X) & !is.na(N.U)){
   warning("As N.X is missing, N.U will not be used. Default to variance explained.")
}
if (is.na(N.U) & !is.na(N.X)){
   warning("As N.U is missing, N.X will not be used. Default to variance explained.")
}
if (!is.na(N.X)){
   if (N.X > floor(N.X)){
      warning("N.X has to be an integer. Will use rounded value.")
      N.X <- round(N.X)
   }
}
if (!is.na(N.U)){
   if (N.U > floor(N.U)){
      warning("N.U has to be an integer. Will use rounded value.")
      N.U <- round(N.U)
   }
}
if (!is.na(L) & (!is.na(N.X) | !is.na(N.U))){
   warning("N.X and N.U will override choice of variance explained L")
}
   
### set up ###
D        <- ncol(Y)                        # number of points per curve
n        <- nrow(Y)                        # overall number of visits
time     <- (T-mean(T)) / sqrt(var(T))     # standardize the time variable
d.vec    <- rep(1:D, each = n)                
time.vec <- rep(time, D)
J.vec    <- sapply(unique(subject[order(subject)]), function(subj){sum(subject == subj)})
                                           # number of visits for each subject
I        <- length(J.vec)                  # number of subjects
m        <- sum(J.vec^2)                   # overall number of visit pairs within subjects
                                                         
### estimate mean function eta(d, T_ij) and overall mean eta0(d) ###
gam1       <- gamm(as.vector(Y) ~ te(d.vec, time.vec, k = bf))  # overall fixed effects surface eta(d, T)
gam0       <- gamm(as.vector(Y) ~ s(d.vec, k = bf))             # time invariant mean function eta(d) for plotting
eta.matrix <- matrix(predict(gam1$gam), n, D)                 # mean function eta(d, T) at grid points for plotting
Y.tilde    <- Y - eta.matrix                                  # centered Y (residuals)
                                                         
### estimate covariance functions using least squares ###
G.0 <- G.01 <- H.01 <- G.1 <- G.U <- matrix(0, D, D)          # set up empty covariance matrices
diago <- rep(NA, D)                                           # set up empty diagonal for K.U
i1 <- function(i){                                            # find all j for indicator vectors for all i-(j,k) pairs
   before <- sum(J.vec[1:(i-1)]) * (i > 1)
   rep(before + (1:J.vec[i]), each = J.vec[i])
} 
i2 <- function(i){                                            # find all k for indicator vectors for all i-(j,k) pairs
   before <- sum(J.vec[1:(i-1)]) * (i > 1)
   rep(before + (1:J.vec[i]), J.vec[i])
}
ind1 <- as.vector(unlist(sapply(1:I, i1)))                    # indicator vectors for j in all i-(j,k) pairs
ind2 <- as.vector(unlist(sapply(1:I, i2)))                    # indicator vectors for k in all i-(j,k) pairs

X <- cbind(rep(1, m), time[ind2], time[ind1], time[ind1] * time[ind2], (ind1 == ind2) * 1)
                                        # set up design matrix for linear regression according to (5)
c <- matrix(unlist(lapply(1:(D-1), FUN = function(s){Y.tilde[ind1, s] * Y.tilde[ind2, (s+1):D]})), length(ind1), D * (D-1) / 2)
                                        # set up outcome vector (empirical covariances) for linear regression      
beta <- solve(crossprod(X), t(X) %*% c)   # estimate covariances K.U(s,t) and K.X(s,t) (containing K.0(s,t), K.1(s,t), K.01(s,t) and K.01(t,s)) for all s != t using linear regression based on (5)

Xss <- cbind(X[, 1], X[, 2] + X[, 3], X[, 4], X[, 5])[(ind1 >= ind2), ] # set up design matrix according to (5)
css <- sapply(1:D, FUN = function(s){Y.tilde[ind1[(ind1 >= ind2)], s] * Y.tilde[ind2[(ind1 >= ind2)], s]}) # set up outcome vector
betass <- solve(crossprod(Xss), t(Xss) %*% css)   # estimate covariances K.U(s,s) and K.X(s,s) (containing K.0(s,s), K.1(s,s) and K.01(s,s)) for all s using linear regression based on (5)

G.0[outer(1:D, 1:D, FUN = function(s, t){(s > t)})]  <- beta[1, ]   # estimates for K.0(s,t) are in the 1st column
    G.0 <- G.0 + t(G.0)                                         # symmetry constraints yield K.0(s,t) = K.0(t,s)
G.1[outer(1:D, 1:D, FUN = function(s, t){(s > t)})]  <- beta[4, ]   # estimates for K.1(s,t) are in the 4th column
    G.1 <- G.1 + t(G.1)                                         # symmetry constraints yield K.1(s,t) = K.1(t,s)
G.U[outer(1:D, 1:D, FUN = function(s, t){(s > t)})]  <- beta[5, ]   # estimates for K.U(s,t) are in the 5th column
    G.U <- G.U + t(G.U)                                         # symmetry constraints yield K.U(s,t) = K.U(t,s)
H.01[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[2, ]   # estimates for K.10(s,t) are in the 2nd column
G.01[outer(1:D, 1:D, FUN = function(s, t){(s > t)})] <- beta[3, ]   # estimates for K.01(s,t) are in the 3rd column
    G.01 <- G.01 + t(H.01)                                      # symmetry constraints yield K.01(s,t) = K.10(t,s)
diag(G.0)  <- betass[1, ]                                       # estimates for K.0(s,s) are in the 1st column
diag(G.1)  <- betass[3, ]                                       # estimates for K.1(s,s) are in the 3rd column
diag(G.01) <- betass[2, ]                                       # estimates for K.01(s,s) are in the 2nd column
diago      <- betass[4, ]                                       # estimates for K.U(s,s)+sigma^2 are in the 4th column
diag(G.U)  <- rep(NA, D)                                        # do not use diagonal K.U(s,s)+sigma^2 in smoothing K.U

### smoothing of covariance functions, estimation of sigma^2 ###                                        
row.vec <- rep(1:D, each = D)     # set up row variable for bivariate smoothing                             
col.vec <- rep(1:D, D)          # set up column variable for bivariate smoothing
if (smooth == TRUE){      # if smoothing is selected (corresponds to method described in the paper):                       
   K.0  <- matrix(predict(gamm(as.vector(G.0)  ~ te(row.vec, col.vec, k = bf))$gam), D, D) # smooth K.0
      K.0 <- (K.0 + t(K.0)) / 2                                                            # after smoothing, symmetrize K.0 
   K.1  <- matrix(predict(gamm(as.vector(G.1)  ~ te(row.vec, col.vec, k = bf))$gam), D, D) # smooth K.1
      K.1 <- (K.1 + t(K.1)) / 2                                                            # after smoothing, symmetrize K.1
   K.01 <- matrix(predict(gamm(as.vector(G.01) ~ te(row.vec, col.vec, k = bf))$gam), D, D) # smooth K.01
   K.U  <- matrix(predict(gamm(as.vector(G.U)  ~ te(row.vec, col.vec, k = bf))$gam,
                       newdata = data.frame(row.vec = row.vec, col.vec = col.vec)), D, D)  # smooth K.U
      K.U <- (K.U + t(K.U)) / 2                                                            # after smoothing, symmetrize K.U
}
else {            # if no smoothing is selected (faster):
      K.U <- G.U    # do not smooth K.U (off-diagonal) 
      K.0 <- G.0    # do not smooth K.0
      K.01 <- G.01  # do not smooth K.01
      K.1 <- G.1    # do not smooth K.1
      diag(K.U) <- predict(gamm(as.vector(K.U) ~ te(row.vec, col.vec, k = bf))$gam, newdata = data.frame(row.vec = 1:D, col.vec = 1:D))                   # only separate diagonal K.U(s,s) + sigma^2 into K.U(s,s) and sigma^2 using bivariate smoothing 
}
K.X <- rbind(cbind(K.0, K.01), cbind(t(K.01), K.1))   # put together K.X, containing K.0, K.01, K.10 and K.1          
sigma2.hat <- max(mean(diago - diag(K.U)), 0)         # estimate sigma^2 as mean difference between diagonal
                                                   # K.U(s,s) + sigma^2 and smoothed K.U(s,s) (0, if smaller 0 otherwise)

### estimate eigenfunctions, compute variance explained, N.X and N.U ###
lambda.hat <- eigen(K.X, symmetric = TRUE, only.values = TRUE)$values # eigenvalues of K.X yield estimates for the lambda_k
nu.hat     <- eigen(K.U, symmetric = TRUE, only.values = TRUE)$values # eigenvalues of K.U yield estimates for the nu_k
total.variance <- sum(lambda.hat * (lambda.hat > 0)) + sum(nu.hat * (nu.hat > 0)) + sigma2.hat
           # the total average variance is the sum of all variance terms lambda_k, nu_k and sigma^2 according to Lemma 1 
if (is.na(N.X) | is.na(N.U)){  # if no values for the numbers of principal components N.X and N.U are specified:
   prop <- N.X <- N.U <- 0
   while(prop < L){              # add components for X or U with decreasing variance,
                               # until level L of explained average variance is reached
      if (lambda.hat[N.X + 1] >= nu.hat[N.U + 1]){
         N.X <- N.X + 1
      }
      else {
            N.U <- N.U + 1
      }
      prop <- (sum(lambda.hat[1:N.X]) + sum(nu.hat[1:N.U])) / total.variance  # update explained average variance
   }
}
explained.variance <- (sum(lambda.hat[1:N.X]) + sum(nu.hat[1:N.U])) / total.variance  # explained average variance 
                                                                
lambda.hat <- lambda.hat[1:N.X]                          # keep first N.X values in lambda.hat
nu.hat     <- nu.hat[1:N.U]                              # keep first N.U values in nu.hat
phi.X      <- eigen(K.X, symmetric = TRUE)$vectors[, 1:N.X]  # eigenvectors of K.X yield estimated for eigenfunctions phi.X_k
phi.U      <- eigen(K.U, symmetric = TRUE)$vectors[, 1:N.U]  # eigenvectors of K.U yield estimated for eigenfunctions phi.U_k
phi.0      <- phi.X[1:D, ]                                # phi.X_k = (phi.0_k, phi.1_k)
phi.1      <- phi.X[(D+1):(2*D), ]
                                                                              

### estimate scores ###
subject.ind <- unlist(sapply(subject, function(su){which(unique(subject[order(subject)]) == su)}))
Z.X         <- kronecker(Diagonal(I)[subject.ind, ]                   , phi.0) +
               kronecker(Diagonal(n, time) %*% Diagonal(I)[subject.ind, ], phi.1)        # set up the matrices for BLUP   
Z.U         <- kronecker(Diagonal(n)                                 , phi.U)        #   estimation using the Woodbury 
Z           <- cBind(Z.X, Z.U)                                                       #   formula
D.inv       <- Diagonal(N.X*I + N.U*n, c(rep(1 / lambda.hat, I), rep(1 / nu.hat, n)))
b.hat       <- solve(crossprod(Z) + sigma2.hat * D.inv, t(Z) %*% as.vector(t(Y.tilde))) # estimate scores according to Thm. 2
xi.hat      <- t(matrix(b.hat[1:(N.X*I)], N.X, I))                                    #   b.hat contains first xi.hat, 
zeta.hat    <- t(matrix(b.hat[(N.X*I) + (1:(N.U*n))], N.U, n))                          #   then zeta.hat

### return results ###
results <- list(Y = Y, subject = subject, time = time, eta = gam1$gam, eta0 = gam0$gam, eta.matrix = eta.matrix,
                phi.0 = phi.0, phi.1 = phi.1, phi.U = phi.U, K.X = K.X, K.U = K.U,
                sigma2 = sigma2.hat, lambda = lambda.hat, nu = nu.hat,
                xi = xi.hat, zeta = zeta.hat, N.X = N.X, N.U = N.U,
                L = L, totvar = total.variance, exvar = explained.variance
               )
return(results)
}




##########################################################################################
### R function plot.LFPCA plotting results from fitting model (2) using function LFPCA ###
##########################################################################################

plot.LFPCA <- function(res,                      # a results file from LFPCA
                       outpdf,                   # a pdf file to which to plot the results
                       group = rep(1, nrow(res$Y)) # a grouping variable for the boxplots of the eigenscores (optional)
                       ){                                        
                                        
D      <- ncol(res$Y)                                      # the number of points per curve
group2 <- unique(cbind(res$subject, group))[, 2]            # the grouping variable per subject (not visit)
eta    <- predict(res$eta0, newdata = data.frame(d.vec = 1:D))  # the time invariant mean function for plotting
lu     <- range(cbind(matrix(eta, D, res$N.X) - 2*res$phi.0 %*% diag(sqrt(res$lambda)),  # limits for plotting
                      matrix(eta, D, res$N.X) + 2*res$phi.0 %*% diag(sqrt(res$lambda)),
                      matrix(eta, D, res$N.X) - 2*res$phi.1 %*% diag(sqrt(res$lambda)),
                      matrix(eta, D, res$N.X) + 2*res$phi.1 %*% diag(sqrt(res$lambda)),
                      matrix(eta, D, res$N.U) - 2*res$phi.U %*% diag(sqrt(res$nu    )),
                      matrix(eta, D, res$N.U) + 2*res$phi.U %*% diag(sqrt(res$nu    ))))

plot.ef <- function(ev, ef, yl, var){  # for given eigenfunctions and corresponding eigenvalues:
              plot(1:D, eta, type = "l", xlab = "d", ylab = yl, ylim = lu,
                   main = paste(round(var), "% variance", sep = ""))
                                        # plot mean function
              points(eta + 2*sqrt(ev)*ef, pch = "+")   # plus / minus 2 times the standard deviation times the 
              points(eta - 2*sqrt(ev)*ef, pch = "-")   #    corresponding eigenfunction
                                 }
pdf(file = outpdf)
  plot(res$eta, main = "Estimated mean function", xlab = "d", ylab = "T")   # plot estimated mean profile eta(d,T)
  plot(res$eta0, main = "Estimated time-constant mean function", xlab = "d", ylab = expression(eta[0](d)))
                                                                   # plot estimated time-constant mean function eta(d)
  par(mfrow = c(ceiling(sqrt(res$N.X)), ceiling(res$N.X / ceiling(sqrt(res$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.X){  # for each eigenfunction, plot boxplots of subject-specific scores by the grouping variable
    boxplot(res$xi[, k] ~ group2, ylab = eval(substitute(expression(hat(xi)[i][j]), list(j = k))))
    abline(h = 0, col = 8)
  }
  par(mfrow = c(ceiling(sqrt(res$N.X)), ceiling(res$N.X / ceiling(sqrt(res$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.X){ # for each phi.0_k, plot mean function +/- 2 the standard deviation sqrt(lambda_k) times phi.0_k
    plot.ef(ev = res$lambda[k], ef = res$phi.0[, k], yl = substitute(hat(phi)[k]^0, list(k = k)),
            res$phi.0[, k] %*% res$phi.0[, k] * res$lambda[k] / res$totvar * 100)
  }
  par(mfrow = c(ceiling(sqrt(res$N.X)), ceiling(res$N.X / ceiling(sqrt(res$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.X){ # for each phi.1_k, plot mean function +/- 2 the standard deviation sqrt(lambda_k) times phi.1_k
    plot.ef(ev = res$lambda[k], ef = res$phi.1[, k], yl = substitute(hat(phi)[k]^1, list(k = k)),
            res$phi.1[, k] %*% res$phi.1[, k] * res$lambda[k] / res$totvar * 100)
  }
  par(mfrow = c(ceiling(sqrt(res$N.X)), ceiling(res$N.X / ceiling(sqrt(res$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.X){ # plot each phi.0_k
    plot(res$phi.0[, k], type = "l", xlab = "d", ylab = substitute(hat(phi)[k]^0, list(k = k)),
         main = paste(round(res$phi.0[, k] %*% res$phi.0[, k] * res$lambda[k] / res$totvar * 100), "% variance", sep = ""))
  }
  par(mfrow = c(ceiling(sqrt(res$N.X)), ceiling(res$N.X / ceiling(sqrt(res$N.X)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.X){ # plot each phi.1_k
    plot(res$phi.1[, k], type = "l", xlab = "d", ylab = substitute(hat(phi)[k]^1, list(k = k)),
         main = paste(round(res$phi.1[, k] %*% res$phi.1[, k] * res$lambda[k] / res$totvar * 100), "% variance", sep = ""))
  }
  par(mfrow = c(ceiling(sqrt(res$N.U)), ceiling(res$N.U / ceiling(sqrt(res$N.U)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.U){ # for each eigenfunction, plot boxplots of subject-visit-specific scores by the grouping variable
    boxplot(res$zeta[, k] ~ group, ylab = eval(substitute(expression(hat(zeta)[ij][l]), list(l = k))))
    abline(h = 0, col = 8)
  }
  par(mfrow = c(ceiling(sqrt(res$N.U)), ceiling(res$N.U / ceiling(sqrt(res$N.U)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.U){ # for each phi.U_k, plot mean function +/- 2 the standard deviation sqrt(nu_k) times phi.U_k
    plot.ef(ev = res$nu[k], ef = res$phi.U[, k], yl = substitute(hat(phi)[k]^U, list(k = k)), res$nu[k] / res$totvar * 100)
  }
  par(mfrow = c(ceiling(sqrt(res$N.U)), ceiling(res$N.U / ceiling(sqrt(res$N.U)))), mar = c(5.1, 5.1, 1.1, 1.1))
  for (k in 1:res$N.U){ # plot each phi.U_k
    plot(res$phi.U[, k], type = "l", xlab = "d", ylab = substitute(hat(phi)[k]^U, list(k = k)),
         main = paste(round(res$nu[k] / res$totvar * 100), "% variance", sep = ""))
  }
  par(mfrow = c(1, 1))
  # plot all variances in one plot
  plot(res$lambda, ylim = c(0, max(res$lambda)), ylab = "Estimated variance components", xaxt = "n", xlab = "") 
  points(res$nu, pch = "+")
  points(res$sigma2, pch = 15)
  legend("topright", pch = c(1, 3, 15), legend = expression(hat(lambda), hat(nu), hat(sigma)^2))
dev.off()
}


##########################################################################################
### example code calling function LFPCA used for the simulations detailed in Section 4 ###
##########################################################################################

library(orthopolynom)

### set parameters for simulation ###
rep <- 2 #1000    # number of replications in the simulation
parms <- 1     #determines one out of 64 settings

A <- matrix(NA, 5, 64) # set up a matrix A that contains all parameter combinations, then select corresponding column 
count <- 1
for (smoothing in c("ns", "sm")){
  for (I in c(50, 100, 200, 500)){
    for (balanced in c("ba", "ub")){
      for (orthogonal in c("or", "no")){
        for (normal in c("nor", "mix")){
          A[, count] <- c(I, balanced, orthogonal, smoothing, normal)
          count <- count + 1
        }
      }
    }
  }
}

I           <- as.numeric(A[1, parms]) # I - number of subjects (50, 100, 200 or 500)
balanced    <- A[2, parms]             # balanced ("ba") or unbalanced ("ub") design
orthogonal  <- A[3, parms]             # orthogonal ("no") / nonorthogonal ("or") bases for X_0 and X_1
smoothing   <- A[4, parms]             # smoothing ("sm") or no smoothing ("ns") of covariance matrices
normal      <- A[5, parms]             # normal ("nor") or mixture ("mix") distribution for scores

name <- paste("2_", I, balanced, "_", orthogonal, smoothing, normal, sep = "") # indicates parameters in output files
J.vec <- rep(4, I)                                                           # if balanced design, all subjects have 4 visits
if (balanced == "ub"){                                                         # if unbalanced design, 1 to 9 visits
  ncl <- (I / 50) * c(2, 3, 4, 5, 5, 6, 9, 8, 8)
  J.vec <- rep(9:1, ncl)
}
D <- 120                                                              # number of points per curve
n <- sum(J.vec)                                                       # overall number of visits
sigma <- 0.05                                                         # sigma^2 is the variance of the errors eps_ij
lambda <- nu <- 0.5^((1:4) - 1)                                       # variances of the scores
N.X <- length(lambda)                                                 # number of non-zero variances for X
N.U <- length(nu)                                                     # number of non-zero variances for U
subject <- rep(1:I, J.vec)                                            # vector with subject-IDs
myfun <- function(j){
  t <- 0
  if (j > 1){
    for (j in 2:j){
      t <- cbind(t, sum(t) + runif(1))
    }
  }
  return(t - mean(t))
}
time <- unlist(lapply(J.vec, FUN = myfun))                              # generate visit times with uniform increments 
time <- time / sqrt(var(time))                                          # standardize time variable
              
etaf <- function(t, d){                                                 # define mean function eta(d,T)
  (t/4 - d/D)^2 / 2
}                                   
eta <- outer(time, 1:D, etaf)                                           # compute values eta(d,T_ij)

if (orthogonal == "or"){                                                # define functions phi.0_k, phi.1_k, phi.U_k 
  phi0 <- function(d, k){
    1/sqrt(D) * sqrt(3/2) * ((k%%2) * sin((k + k%%2) * pi * (d - 0.5)/D) + (1 - k%%2) * cos((k + k%%2) * pi * (d - 0.5)/D))
  }
  phi1 <- function(d, k){
    as.function(legendre.polynomials(k, normalized = TRUE)[[k]])((2*(d - 0.5)/D - 1))/sqrt(D) * (sqrt(1/2))
  }
  phiU <- function(d, k){
    if (k == 1){
      phi1(d, k) * sqrt(4)
    }
    else {
      phi0(d, k-1) * sqrt(4/3)
    }
  }
} else {
  phi0 <- function(d, k){
    1/sqrt(D) * ((k%%2) * sin((k + k%%2) * pi * (d - 0.5)/D) + (1 - k%%2) * cos((k + k%%2) * pi * (d - 0.5)/D))
  }
  phi1 <- function(d, k){
    ((k == 1)/sqrt(2 * D) + (k == 2) * sin(6 * pi * (d - 0.5)/D)/sqrt(D) + (k == 3) * cos(6 * pi * (d - 0.5)/D)/sqrt(D) +
     (k == 4) * sin(8 * pi * (d - 0.5)/D)/sqrt(D))
  }
  phiU <- function(d, k){
    as.function(legendre.polynomials(k, normalized = TRUE)[[k]])((2 * (d - 0.5)/D - 1))/sqrt(D/2)
  }
}  
phiU.matrix <-                matrix(NA, N.U, D)
phi1.matrix <- phi0.matrix <- matrix(NA, N.X, D)
for (k in 1:N.U){                                                       # compute phi.0_k(d), phi.1_k(d), phi.U_k(d)
  phiU.matrix[k, ] <- phiU(1:D, k)                                      #    for all k and d
  phi0.matrix[k, ] <- phi0(1:D, k)
  phi1.matrix[k, ] <- phi1(1:D, k)
}
phiX.matrix <- cbind(phi0.matrix, phi1.matrix)

xi         <- xi.hat   <- array(NA, dim = c(I, N.X, rep))                  # set up empty vectors and matrices for all
zeta       <- zeta.hat <- array(NA, dim = c(n, N.U, rep))                  #    for all quantities to be estimated in 
phiU.hat   <-             array(NA, dim = c(D, N.U, rep))                  #    simulations
phi0.hat   <- phi1.hat <- array(NA, dim = c(D, N.X, rep))
eta.hat    <- Y.matrix <- array(NA, dim = c(n, D, rep))
sigma2.hat <- rep(NA, rep)
lambda.hat <- matrix(NA, N.X, rep)
nu.hat     <- matrix(NA, N.U, rep)

r <- 1
errors <- 0                                                           # count any errors that might occur
while (r <= rep){                                                       # do rep simulation repetitions without errors
  if (r%%(rep / 10) == 0){
    print(paste("Simulation", r * 100 / rep, "% done"))                   # give out progress report
  }

### generate one simulated set of Ys ###
if (normal == "nor"){                                                   # simulate scores accoring to normal 
  zeta[, , r] <- matrix(rnorm(N.U * n), n, N.U) %*% diag(sqrt(nu))
  xi[, , r]   <- matrix(rnorm(N.X * I), I, N.X) %*% diag(sqrt(lambda))
} else {                                                              #    or mixture distribution, as selected
  zeta[, , r] <- matrix(rnorm(N.U * n), n, N.U) %*% diag(sqrt(nu / 2)) + matrix(2 * rbinom(n * N.U, 1, 0.5) - 1, n, N.U) %*% diag(sqrt(nu / 2))
  xi[, , r] <- matrix(rnorm(N.X * I), I, N.X) %*% diag(sqrt(lambda / 2)) + matrix(2 * rbinom(I * N.X, 1, 0.5) - 1, I, N.X) %*% diag(sqrt(lambda / 2))
}
U   <- zeta[, , r]             %*% phiU.matrix                 # generate visit-specific deviations
X.0 <- xi[rep(1:I, J.vec), , r] %*% phi0.matrix                 # generate random functional intercept
X.1 <- xi[rep(1:I, J.vec), , r] %*% phi1.matrix                 # generate random functional slope
eps <- matrix(rnorm(D * n, 0, sigma), n, D)                        # generate measurement error
Y   <- eta + X.0 + X.1 * matrix(rep(time, D), n, D) + U + eps     # compute observations according to model (2)

### plot example Ys ###
if (r == 1){      # plot some example Y's in the first iteration        
  pdf(file = paste("../results/LFPCA_", name, "_subjects.pdf", sep = ""))
  for (i in 1:6){
    Y.plot <- Y[(subject == i), ]
    plot(c(1, 120), c(min(Y.plot), max(Y.plot)), col = 0, xlab = "", ylab = "", main = paste("Subject ", i, sep = ""))
    for (j in 1:J.vec[i]){
      lines(Y.plot[j, ], type = "l", col = j)
    }
    legend(x = "bottomleft", col = c(1:J.vec[i]), lty = 1, legend = 1:J.vec[i], title = "Visit")}
  dev.off()
}

### LPFCA ###
results <- try(LFPCA(Y = Y, subject = subject, T = time, N.X = N.X, N.U = N.U, L = NA, smooth = (smoothing == "sm")))
         # use LFPCA to estimate all quantities
if(class(results) == "try-error" ){  # if an error occured, count error, save relevant data, and repeat iteration
  save(Y, time, subject, file = paste("LFPCA_error_", name, "_", r, ".Rdata", sep = ""))
  errors <- errors + 1
  next
}
                        
### as eigenfunctions are only unique up to the sign, flip eigenfunctions to minimize ###
### distance to true function, if necessary                                           ###
soq <- function(x, y){  # sums of squares to compare distances
  t(x - y) %*% (x - y)
}
for (k in 1:N.U){
  flip <- (soq(phiU.matrix[k, 1:D], results$phi.U[, k]) > soq(phiU.matrix[k, 1:D], - results$phi.U[, k]))
  if (flip){  # if distance to true function would be smaller when eigenfunction were flipped, flip it (and the scores)
    results$phi.U[, k] <- - results$phi.U[, k]
    results$zeta[, k]  <- - results$zeta[, k]
  }
}
phiX.hat <- rbind(results$phi.0, results$phi.1)
for (k in 1:N.X){ #analogous for the eigenfunctions for X (flip phi.0_k and phi.1_k together)
  flip <- (soq(phiX.matrix[k, ], phiX.hat[, k]) > soq(phiX.matrix[k, ], - phiX.hat[, k]))
  if (flip){
    results$phi.0[, k] <- - results$phi.0[, k]
    results$phi.1[, k] <- - results$phi.1[, k]
    results$xi[, k]    <- - results$xi[, k]
  }
}

### store results ###
phi0.hat[, , r]  <- results$phi.0
phi1.hat[, , r]  <- results$phi.1
phiU.hat[, , r]  <- results$phi.U
xi.hat[, , r]    <- results$xi
zeta.hat[, , r]  <- results$zeta 
eta.hat[, , r]   <- results$eta.matrix
Y.matrix[, , r]  <- Y
lambda.hat[, r]  <- results$lambda
nu.hat[, r]      <- results$nu
sigma2.hat[r]    <- results$sigma2

r <- r + 1  # go to next iteration
}
print(paste(errors, "errors out of", rep, "replications"))  # print out the number of errors for future reference


### save results ###
save(Y.matrix, time, subject, phi0.matrix, phi1.matrix, phiU.matrix, phi0.hat, phi1.hat, phiU.hat, 
     eta, eta.hat, xi, xi.hat, zeta, zeta.hat, lambda, nu, lambda.hat, nu.hat, sigma, sigma2.hat,
                                                           file = paste("LFPCA_", name, ".Rdata", sep = ""))

#load(file = paste("LFPCA_", name, ".Rdata", sep = "")); rep <- dim(phi0.hat)[3]



### plot simulation results ###
plotresults <- function(f,  # the true function
                        g,  # an array containing the function estimates
                        ind # the index - U for phi.U, 0 for phi.0, 1 for phi.1
                        ){
  for (k in 1:4){
    lims <- 0.18 + 0.07 * (ind == "U")
    plot(eval(f(1:D, k)), type = "l", ylab = substitute(phi[k]^ind, list(k = k, ind = ind)), xlab = "", ylim = c(-lims, lims)) # plot true fctn
    for (r in 1:min(rep, 100)){
      lines(eval(g(1:D, k, r)), col = 8)  # plot first 100 estimates
    }
    lines(eval(f(1:D, k)), lwd = 2)
    means <- sapply(1:D, function(d){mean(g(d, k, 1:rep))})   # plot mean of all estimated functions
    lines(means, col = 2)
    abline(h = 0, col = 3)
    quants <- sapply(1:D, function(d){quantile(g(d, k, 1:rep), probs = c(0.05, 0.95), type = 8)})
    for (i in 1:2){
      lines((quants[i, ]), col = 4)  # plot 5th and 95th percentile of estimated functions
    }
  }
}
time1      <- unique(time[order(time)])
find.t.ind <- function(row){
  which(time == time1[row])[1]
}
t.ind      <- sapply(1:length(time1), find.t.ind)
eta.mean   <- apply(eta.hat[t.ind, , ], c(1, 2), FUN = "mean")        # compute mean of estimated mean functions eta, and
eta.q5     <- apply(eta.hat[t.ind, , ], c(1, 2), FUN = function(x){quantile(x, probs = c(0.05), type = 8)})  # 5th and
eta.q95    <- apply(eta.hat[t.ind, , ], c(1, 2), FUN = function(x){quantile(x, probs = c(0.95), type = 8)})  # 95th quantile

pdf(file = paste("../results/LFPCA_", name, ".pdf", sep = ""))
   ### plot eigenfunctions ###
par(mfrow = c(2, 2), mar = c(5.1, 5.1, 1.1, 1.1))
plotresults(phi0, function(d, k, r){phi0.hat[d, k, r]}, "0")  # plots of estimated and true functions for phi.0,
plotresults(phi1, function(d, k, r){phi1.hat[d, k, r]}, "1")  #    phi.1,
plotresults(phiU, function(d, k, r){phiU.hat[d, k, r]}, "U")  #    and phi.U
   ### plot mean function eta ###
par(mfrow = c(1, 1))
contour(1:D, time1, t(outer(time1, 1:D, etaf)), levels = seq(0, 1, 0.1), xlab = "d", ylab = "time") # contour plot of the true eta,
contour(1:D, time1, t(eta.mean), col = 2, add = TRUE, levels = seq(0, 1, 0.1))                      #   mean of estimated functions
contour(1:D, time1, t(eta.q5),   col = 3, add = TRUE, levels = seq(0, 1, 0.1))                      #   and of 5th and 
contour(1:D, time1, t(eta.q95),  col = 4, add = TRUE, levels = seq(0, 1, 0.1))                      #   95th pointwise quantiles
   ### plot scores ###
par(mfrow = c(2, 2), mar = c(5.1, 5.1, 1.1, 1.1))
for (k in 1:4){
boxplot((xi.hat - xi)[, k, ] / sqrt(lambda[k]), (zeta.hat - zeta)[, k, ] / sqrt(nu[k]), ylim = c(-4 + 1.7 * (I > 50), 4 - 1.7 * (I > 50)),
        names = eval(substitute(expression((hat(xi)[i][l] - xi[i][l]) / sqrt(lambda[l]),
            (hat(zeta)[ij][l] - zeta[ij][l]) / sqrt(nu[l])), list(l = k))))   # boxplots of standardized score estimates
points(1, mean((xi.hat - xi)[, k, ] / sqrt(lambda[k])), pch = "x", col = 2)         # add corresponding means
points(2, mean( (zeta.hat - zeta)[, k, ] / sqrt(nu[k])), pch = "x", col = 2)
abline(h = 0, col = 3)
}
for (k in 1:4){  # scatter plot of estimated versus true scores xi
  plot(as.vector(xi[1:3, k, ]), as.vector(xi.hat[1:3, k, ]), xlab = eval(substitute(expression(xi[i][l], list(l = k)))),
       ylab = eval(substitute(expression(hat(xi)[i][l], list(l = k)))))
  abline(coef = 0:1, col = 2)
}
for (k in 1:4){  # scatter plot of estimated versus true scores zeta
  plot(zeta[1:3, k, ], zeta.hat[1:3, k, ], xlab = substitute(zeta[i][l], list(l = k)),
       ylab = substitute(hat(zeta)[i][l], list(l = k)))
  abline(coef = 0:1, col = 2)
}
   ### plot variances ###
for (k in 1:4){  # boxplots of estimated variances lambda_k and nu_k
  boxplot(lambda.hat[k, ], nu.hat[k, ], ylim = c(min(c(lambda.hat[k, ], lambda[k], nu.hat[k, ], nu[k])),
                                      max(c(lambda.hat[k, ], lambda[k], nu.hat[k, ], nu[k]))), col = 8, main = "",
          ylab = "Estimated variance", names = eval(substitute(expression(lambda[i], nu[i]), list(i = k))))
  points(1, mean(lambda.hat[k, ]), pch = "x", col = 2) # add means of estimates
  points(2, mean(nu.hat[k, ]), pch = "x", col = 2)
  abline(h = lambda[k], col = 1, lwd = 2)              # indicate true value lambda_k = nu_k by a horizontal line
  abline(h = 0, col = 3)
}
par(mfrow = c(1, 1)) # boxplots of estimated variance sigma^2
boxplot(sigma2.hat, ylim = c(min(c(sigma2.hat, sigma^2)), max(c(sigma2.hat, sigma^2))), col = 8, main = "",
        ylab = expression(hat(sigma)^2))
points(1, mean(sigma2.hat), pch = "x", col = 2)  # add mean of estimates
abline(h = sigma^2, col = 1, lwd = 2)             # indicate true value sigma^2 by a horizontal line
abline(h = 0, col = 3)
dev.off()