## ----writecsv, comment="",prompt=TRUE------------------------------------ dat = read.csv("data/Charm_City_Circulator_Ridership.csv", header=TRUE,as.is=TRUE) dat2 = dat[,c("day","date", "orangeAverage","purpleAverage","greenAverage", "bannerAverage","daily")] write.csv(dat2, file="data/charmcitycirc_reduced.csv", row.names=FALSE) ## ----xlsx1, comment="",prompt=TRUE--------------------------------------- ## install.packages("xlsx",repos="http://cran.us.r-project.org") library(xlsx) # or require(xlsx) ## ----save1, comment="",prompt=TRUE,eval=FALSE---------------------------- save(dat,dat2,file="data/charmcirc.rda") ## ----ls, comment="",prompt=TRUE------------------------------------------ ls() ## ----loadData, comment="",prompt=TRUE------------------------------------ tmp=load("data/charmcirc.rda") tmp ls() ## ----negativeIndex, comment="",prompt=TRUE------------------------------- x = c(1,3,77,54,23,7,76,5) x[1:3] # first 3 x[-2] # all but the second ## ----negativeIndex2, comment="",prompt=TRUE------------------------------ x[-c(1,2,3)] # drop first 3 x[-1:3] # shorthand. R sees as -1 to 3 x[-(1:3)] # needs parentheses ## ----andEx, comment="",prompt=TRUE--------------------------------------- # which Mondays had more than 3000 average riders? which(dat$day =="Monday" & dat$daily > 3000)[1:20] ## ----andEx2, comment="",prompt=TRUE-------------------------------------- Index=which(dat$daily > 10000 & dat$purpleAverage > 3000) length(Index) # the number of days head(dat[Index,],2) # first 2 rows ## ----orEx1, comment="",prompt=TRUE--------------------------------------- Index=which(dat$daily > 10000 | dat$purpleAverage > 3000) length(Index) # the number of days head(dat[Index,],2) # first 2 rows ## ----naEval, comment="",prompt=TRUE-------------------------------------- dat$purpleAverage[1:10] > 0 which(dat$purpleAverage > 0)[1:10] ## ----inEx, comment="",prompt=TRUE---------------------------------------- (dat$day %in% c("Monday","Tuesday"))[1:20] # select entries that are monday or tuesday which(dat$day %in% c("Monday","Tuesday"))[1:20] # which indices are true? ## ----colSelect, comment="",prompt=TRUE----------------------------------- dat[1:3, c("purpleAverage","orangeAverage")] dat[1:3, c(7,5)] ## ----colRemove, comment="",prompt=TRUE----------------------------------- tmp = dat2 tmp$daily=NULL tmp[1:3,]