## @knitr loadData, comment="", prompt=TRUE, echo=FALSE, cache=TRUE
load("~/Dropbox/WinterRClass/Datasets/saved_datasets_list.rda")


## @knitr assignData, comment="", prompt=TRUE, echo=FALSE, cache=TRUE
Sal <- fileList[["Salaries2011"]]
mon <- fileList$Monuments
circ <- fileList$CirculatorRidership
bike <- fileList$BikeLanes
rest <- fileList$Restaurants
xx=runif(1)


# sapply(Sal$Agency, function(x) {
#   dat <- Sal[Sal == x, ]
#   write.csv(x=dat, file=paste("~/Dropbox/WinterRClass/Datasets/Salary_", x, ".csv", sep=""))
#   })


## @knitr makeList, comment="", prompt=TRUE
mylist <- list(letters=c("A", "b", "c"), numbers=1:3, matrix(1:25, ncol=5))


## @knitr Lists, comment="", prompt=TRUE
head(mylist)


## @knitr Listsref1, comment="", prompt=TRUE
mylist[1] # returns a list
mylist["letters"] # returns a list


## @knitr Listsrefvec, comment="", prompt=TRUE
mylist[[1]] # returns the vector 'letters'
mylist$letters # returns vector
mylist[["letters"]] # returns the vector 'letters'


## @knitr Listsref2, comment="", prompt=TRUE
mylist[1:2] # returns a list


## @knitr Listsref3, comment="", prompt=TRUE
mylist$letters[1]
mylist[[2]][1]
mylist[[3]][1:2,1:2]


## @knitr table, comment="", prompt=TRUE
table(c(0, 1, 2, 3, NA, 3, 3, 2,2, 3), useNA="ifany")
table(c(0, 1, 2, 3, 2, 3, 3, 2,2, 3), useNA="always")
tab <- table(c(0, 1, 2, 3, 2, 3, 3, 2,2, 3), c(0, 1, 2, 3, 2, 3, 3, 4, 4, 3), useNA="always")
margin.table(tab, 2)
prop.table(tab, 2) # tab x y, col in stata (1 for row), neither for cell


## @knitr isna, comment="", prompt=TRUE
any(is.na(Sal$Name))
# remove leading $ off money amount
sals <- as.numeric(gsub(pattern="$",  replacement="", 
                          Sal$AnnualSalary, ,fixed=TRUE))
quantile(sals)


## @knitr xtab, comment="", prompt=TRUE
warpbreaks$replicate <- rep(1:9, len = nrow(warpbreaks))
print(xt <- xtabs(breaks ~ wool + tension + replicate, data = warpbreaks))


## @knitr ftab, comment="", prompt=TRUE
ftable(xt)


## @knitr gender, comment="", prompt=TRUE, echo=FALSE
set.seed(4)
gender <- sample(c("Male", "mAle", "MaLe", "M", "MALE", "Ma", "FeMAle", "F", "Woman", "Man", "Fm", "FEMALE"), 1000, replace =TRUE)


## @knitr gentab, comment="", prompt=TRUE
table(gender)


## @knitr RawlMatch, comment="", prompt=TRUE
grep("Rawlings",Sal$Name) # These are the indices/elements where the pattern match occurs


## @knitr grepl, comment="", prompt=TRUE
head(grep("Rawlings",Sal$Name))
head(grepl("Rawlings",Sal$Name))
head(Rawlings <- Sal[grepl("Rawlings",Sal$Name), c("Name", "JobTitle")], 2)


## @knitr greppers, comment="", prompt=TRUE
head(grep("Tajhgh",Sal$Name, value=TRUE))
grep("Jaffe",Sal$Name)
length(grep("Jaffe",Sal$Name))


## @knitr grepstar, comment="", prompt=TRUE
grep("Payne.*", x=Sal$Name, value=TRUE)
grep("Leonard.?S", x=Sal$Name, value=TRUE)[1:5]
grep("Spence.*C.*", x=Sal$Name, value=TRUE)


## @knitr classSal, comment="", prompt=TRUE
class(Sal$AnnualSalary)


## @knitr orderstring, comment="", prompt=TRUE
sort(c("1", "2", "10")) #  not sort correctly (order simply ranks the data)
order(c("1", "2", "10"))


## @knitr destringSal, comment="", prompt=TRUE
head(as.numeric(Sal$AnnualSalary), 4)


## @knitr orderSal, comment="", prompt=TRUE
Sal$AnnualSalary <- as.numeric(gsub(pattern="$", replacement="", Sal$AnnualSalary, fixed=TRUE))
Sal <- Sal[order(-Sal$AnnualSalary), ] # use negative to sort descending
head(Sal[, c("Name", "AnnualSalary", "JobTitle")])


## @knitr Paste, comment="", prompt=TRUE
paste("Visit", 1:5, sep="_")
paste("Visit", 1:5, sep="_", collapse=" ")
paste("To", "is going be the ", "we go to the store!", sep="day ")


## @knitr return2, comment="",prompt=TRUE
return2 = function(x) {
  return(x[2])
}
return2(c(1,4,5,76))


## @knitr return2a, comment="",prompt=TRUE
return2a = function(x) {
  x[2]
}
return2a(c(1,4,5,76))


## @knitr return2b, comment="",prompt=TRUE
return2b = function(x) x[2]
return2b(c(1,4,5,76))


## @knitr strsplit, comment="", prompt=TRUE
x <- c("I really", "like writing", "R code")
ss <- strsplit(x, split=" ")
ss[[2]]
sapply(ss, return2b) # use your own function
sapply(ss, function(x) x[2]) # on the fly


## @knitr sapply2, comment="",prompt=TRUE
x = ss[[1]]
x[2]
x = ss[[2]]
x[2]


## @knitr merging, comment="", prompt=TRUE
base <- data.frame(id=1:10, Age= rnorm(10, mean=65, sd=5))
visits <- data.frame(id=rep(1:8, 3), visit= rep(1:3, 8), Outcome= rnorm(2*3, mean=4, sd=2))
merged.data <- merge(base, visits, by="id")
table(merged.data$id)


## @knitr mergeall, comment="", prompt=TRUE
all.data <- merge(base, visits, by="id", all=TRUE)
table(all.data$id)


## @knitr NAmerge, comment="", prompt=TRUE
all.data[all.data$id %in% c(9, 10),]


## @knitr mergevar, comment="", prompt=TRUE
base$base <- 1
visits$visits <- 1
all.data <- merge(base, visits, by="id", all=TRUE)
all.data[is.na(all.data$visits), ]


## @knitr dftab, comment="", prompt=TRUE
tab <- table(Agency=Sal$Agency, useNA="ifany")
head(tab <- as.data.frame(tab, responseName = "N_Employees", stringsAsFactors=FALSE), 2)
Sal <- merge(Sal, tab, by="Agency")
head(Sal[, c("Name", "Agency", "N_Employees")], 2)


## @knitr binding, comment="", prompt=TRUE
head(all.data, 2)
head(t(all.data)[, 1:2]) # data is transposed
head(cbind(all.data, c("hey", "ho"))) #it will repeat to fill in the column
tail(rbind(all.data, c(11, 59.34232, 1, 4.2223))) #adding a row


## @knitr badbind, comment="", prompt=TRUE
cbind(c(0, 1, 2), c(3, 4))


## @knitr bind, comment="", prompt=TRUE
cbind(c(0, 1, 2), c(3, 4, 5))
cbind(c(1:10), c(1:5))[3:7, ]


## @knitr longwide, comment="", prompt=TRUE, echo=FALSE
wide <- data.frame(id=1, visit1="Good", visit2="Good", visit3 = "Bad")
long <- data.frame(id=rep(1, 3), visit=c(1, 2, 3), Outcome=c("Good", "Good", "Bad"))


## @knitr showlong, comment="", prompt=TRUE, echo=TRUE
head(wide)
head(long)


## @knitr reshape, comment="", prompt=TRUE
times <- c("purple", "green", "orange", "banner")
v.names <- c("Boardings", "Alightings", "Average")
print(varying <- c(sapply(times, paste, sep="", v.names)))


## @knitr reshape2, comment="", prompt=TRUE
circ$date <- as.Date(circ$date, "%m/%d/%Y") # creating a date for sorting
## important - varying, times, and v.names need to be in a correct order
long <- reshape(data=circ, direction="long", varying=varying, times=times, v.names=v.names, timevar="line", idvar=c("date"))
rownames(long) <- NULL # taking out row names
long <- long[order(long$date), ]
head(long)


## @knitr dropNAlong, comment="", prompt=TRUE
dim(long)
long <- long[!is.na(long$Boardings) & !is.na(long$Alightings) & !is.na(long$Average),]
dim(long)


## @knitr newlong, comment="", prompt=TRUE
head(long)


## @knitr rewide, comment="", prompt=TRUE
head(reshape(long, direction="wide"), 2)


## @knitr TB, comment="", prompt=TRUE, cache=TRUE
library(xlsx,verbose=FALSE)
TB <- read.xlsx(file="~/Dropbox/WinterRClass/Datasets/indicator_estimatedincidencealltbper100000.xlsx", sheetName="Data")
head(TB, 1)
TB$NA. <- NULL
head(TB, 1)


## @knitr TB.hd, comment="", prompt=TRUE, cache=FALSE
colnames(TB) <- c("Country", paste("Year", 1990:2007, sep="."))
head(TB,1)


## @knitr TB.long, comment="", prompt=TRUE
TB.long <- reshape(TB, idvar="Country", v.names="Cases", times=1990:2007, direction="long", timevar="Year", varying = paste("Year", 1990:2007, sep="."))

head(TB.long, 4)
rownames(TB.long) <- NULL
head(TB.long, 4)


## @knitr TB.long2, comment="", prompt=TRUE
TB.long2 <- reshape(TB, idvar="Country", direction="long", timevar="Year", varying = paste("Year", 1990:2007, sep="."))
head(TB.long2, 3) ### what happened?
TB.long2 <- reshape(TB, idvar="Country", direction="long", timevar="Blah", varying = paste("Year", 1990:2007, sep="."))
head(TB.long2, 3) ## Timevar can't be the stub of the original variable


## @knitr spag, comment="", prompt=TRUE, cache=TRUE
library(lattice)
xyplot(Cases ~ Year, groups= Country, data=TB.long, type="l")


## @knitr spag_short, comment="", prompt=TRUE, cache=TRUE
## Only keep a  few countries
xyplot(Cases ~ Year, groups= Country, data=TB.long, subset=Country %in% c("United States of America", "United Kingdom", "Zimbabwe"), type="l")


## @knitr spag_short2, comment="", prompt=TRUE, cache=TRUE,fig.width=4,fig.height=4
## plot things "by" Country
# xyplot(Cases ~ Year | Country, data=TB.long, subset=Country %in% c("United States of America", "United Kingdom", "Zimbabwe"), type="l")
TBC <- TB.long[TB.long$Country %in% c("United States of America", "United Kingdom", "Zimbabwe"),]
TBC$Country <- factor(TBC$Country)
xyplot(Cases ~ Year, groups= Country, data=TBC, type="l", key = simpleKey(levels(TBC$Country), lines=TRUE, points=FALSE))


## @knitr rewide2, comment="", prompt=TRUE
head(Indometh, 2)
wide <- reshape(Indometh, v.names = "conc", idvar = "Subject",
                timevar = "time", direction = "wide")
head(Indometh, 2)