## ----readCSV------------------------------------------------------------- read.csv ## ----readCSV2, comment="",results='markup'------------------------------- mon = read.csv("data/Monuments.csv",header=TRUE,as.is=TRUE) head(mon) ## ----subset5, comment="", prompt=TRUE------------------------------------ colnames(mon) head(mon$zipCode) head(mon$neighborhood) ## ----workingDirectory, comment="", prompt=TRUE--------------------------- ## get the working directory getwd() setwd("~/Dropbox/winterR_2015/Lectures") ## ----directoryNav, comment="", prompt=TRUE------------------------------- dir("./") # shows directory contents dir("..") ## ----readCSV3, comment="", prompt=TRUE----------------------------------- class(mon) str(mon) ## ----names1, comment="", prompt=TRUE------------------------------------- names(mon)[1] = "Name" names(mon) names(mon)[1] = "name" names(mon) ## ----logical1, comment="", prompt=TRUE----------------------------------- z = c(TRUE,FALSE,TRUE,FALSE) class(z) sum(z) # number of TRUEs ## ----logical2, comment="", prompt=TRUE----------------------------------- z2 = c("TRUE","FALSE","TRUE","FALSE") class(z2) sum(z2) identical(z,z2) ## ----logical3, comment="", prompt=TRUE----------------------------------- x = 1:6 x > 4 x == 3 ## ----logical4, comment="", prompt=TRUE----------------------------------- Index = (mon$zipCode == 21202) sum(Index) table(Index) mon2 = mon[Index,] ## ----logical5, comment="", prompt=TRUE----------------------------------- dim(mon2) head(mon2) ## ----which, comment="", prompt=TRUE-------------------------------------- mon$Location.1 != "" which(mon$Location.1 != "") ## ----q1, comment="", prompt=TRUE----------------------------------------- names(mon) names(mon)[6] = "location" names(mon) ## ----q2, comment="", prompt=TRUE----------------------------------------- nrow(mon) dim(mon) length(mon$name) ## ----q3a, comment="", prompt=TRUE---------------------------------------- unique(mon$zipCode) unique(mon$policeDistrict) unique(mon$councilDistrict) ## ----q3b, comment="", prompt=TRUE---------------------------------------- unique(mon$neighborhood) ## ----q3c, comment="", prompt=TRUE---------------------------------------- length(unique(mon$zipCode)) length(unique(mon$policeDistrict)) length(unique(mon$councilDistrict)) length(unique(mon$neighborhood)) ## ----q3d, comment="", prompt=TRUE---------------------------------------- table(mon$zipCode) length(table(mon$zipCode)) ## ----q4a, comment="", prompt=TRUE---------------------------------------- tab = table(mon$zipCode, mon$neighborhood) # tab tab[,"Downtown"] length(unique(tab[,"Downtown"])) ## ----q4b, comment="", prompt=TRUE---------------------------------------- tt = tab[,"Downtown"] tt tt == 0 # which entries are equal to 0 ## ----q4c, comment="", prompt=TRUE---------------------------------------- tab[,"Downtown"] !=0 sum(tab[,"Downtown"] !=0) sum(tab[,"Johns Hopkins Homewood"] !=0) ## ----q4d, comment="", prompt=TRUE---------------------------------------- dt = mon[mon$neighborhood == "Downtown",] head(mon$neighborhood == "Downtown",10) dim(dt) length(unique(dt$zipCode)) ## ----q5, comment="", prompt=TRUE----------------------------------------- head(mon$location) table(mon$location != "") # FALSE=DO NOT and TRUE=DO ## ----q6a, comment="", prompt=TRUE---------------------------------------- tabZ = table(mon$zipCode) head(tabZ) max(tabZ) tabZ[tabZ == max(tabZ)] ## ----q6b, comment="", prompt=TRUE---------------------------------------- which.max(tabZ) # this is the element number tabZ[which.max(tabZ)] # this is the actual maximum ## ----q6c, comment="", prompt=TRUE---------------------------------------- tabN = table(mon$neighborhood) tabN[which.max(tabN)] tabC = table(mon$councilDistrict) tabC[which.max(tabC)] tabP = table(mon$policeDistrict) tabP[which.max(tabP)] ## ----q7, comment="", prompt=TRUE----------------------------------------- monTab = read.delim("http://biostat.jhsph.edu/~ajaffe/winterR_2015/data/Monuments-tab.txt", header=TRUE, as.is=TRUE) identical(mon$name,monTab$name)