#setup
options(width=60)
olocale=Sys.setlocale(locale="C")

library(Matrix)

# sufficient statistics
# y = Xb + e

data(KNex)
attach(KNex)
ls(2)

mmm <- as.matrix(mm)
object.size(mmm)/1024

dim(mmm)
length(y)

# B(v) = (t(X) %*% X)^-1 t(X) %*% y(v)
#
# suff. statistics
xx <- t(mmm) %*% mmm
xy <- t(mmm) %*% y
dim(xx)
length(xy)

b <- lm.fit(mmm,y)$coef
b.inv <- solve(t(mmm) %*% mmm) %*% t(mmm) %*% y
all.equal(b,b.inv[,1],check.attr=FALSE,check.names=FALSE)

# 30 permutations of y
np <- 30
y.perm <- replicate(np,sample(y))

# use lm.fit
system.time(for (i in 1:np) lm.fit(mmm,y.perm[,i])$coef)

# straight way 
system.time(for (i in 1:np) solve(t(mmm) %*% mmm) %*% t(mmm)%*%y.perm[,i])

# use solve, not inversion
# solution above using inverse is one way of solving normal equations for b
# (t(X) %*% X) %*% b = t(X) %*% y
# there are other more efficient and stable ways, so let R solve it
system.time(for (i in 1:np) solve(t(mmm) %*% mmm,t(mmm)%*%y.perm[,i]))

# recycle suff. stat t(X) %*% X
system.time({xx <- t(mmm) %*% mmm; for (i in 1:np) solve(xx,t(mmm) %*% y.perm[,i])})

# use crossprod functions instead
system.time({xx <- crossprod(mmm); for (i in 1:np) solve(xx,crossprod(mmm,y.perm[,i]))})

# taking advantage of structure
# xx = t(X) %*% X is positive definite (sometimes not, but leave that for another trickathon)
# positive definite matrices have Cholesky factorizations

# cholesky factor r satisfies
# t(r) %*% r = t(mmm) %*% mmm
#
# but r is upper triangular
# makes solving linear system much easier
# to solve (t(r) %*% r) %*% b = z
# solve t(r) %*% u = z (forward substitution)
# solve r %*% b = u (backward substitution)

# to solve using cholesky factorization
r <- chol(crossprod(mmm))
u <- forwardsolve(r,crossprod(mmm,y),upper=TRUE,transpose=TRUE)
b.chol <- backsolve(r,u)
all.equal(b,b.chol[,1],check.attr=FALSE,check.names=FALSE)

# what should really be recycled is the cholesky factor of sufficient statistic
system.time({r <- chol(crossprod(mmm));
             for (i in 1:np) backsolve(r,
                                       forwardsolve(r,
                                                    crossprod(mmm,y.perm[,i]),
                                                    upper=TRUE,trans=TRUE))})

# now using Matrix
mm2 <- as(mm,"dgeMatrix")
mm2[1:5,1:5]

xx <- crossprod(mm2)
class(xx)
help("dpoMatrix-class")
getClassDef("dpoMatrix")

xx[1:10,1:10]

mm[1:10,1:10]
object.size(mm2)/1024
object.size(mm)/1024

b.Mat <- solve(crossprod(mm),crossprod(mm,y))
all.equal(b,b.Mat[,1],check.attr=FALSE,check.names=FALSE)

# using solve
system.time(for (i in 1:np) solve(crossprod(mm),crossprod(mm,y.perm[,i])))

# reusing suff. stat (reuses cholesky factor in the background)
system.time({xx <- crossprod(mm); for (i in 1:np) solve(xx,crossprod(mm,y.perm[,i]))})


# the High Performance and Parallel Computing cran task view is here:
# http://cran.r-project.org/web/views/HighPerformanceComputing.html