##Sal Data from https://data.baltimorecity.gov/Financial/Baltimore-City-Employee-Salaries-2011/ijfz-2v3c ## Salary Data ## rest <- restaurant data from Sal <- read.csv(file="Baltimore_City_Employee_Salaries_2011.csv", header=TRUE, as.is=TRUE) rest <- read.csv(file="Restaurants.csv", header=TRUE, as.is=TRUE) mon <- read.csv(file="Monuments.csv", header=TRUE, as.is=TRUE) ## Question 1 - Finding strings # Make an object called health.sal using the salaries data set, with only agencies of those with "fire" (or any forms), if any, in the name: unique(Sal\$Agency[grep(x=toupper(Sal\$Agency), pattern="FIRE")]) ## Question 2 - Finding strings # Make a data set called trans which contains only agencies that contain "TRANS". trans <- Sal[ grepl( x=Sal\$Agency, pattern="TRANS"), ] table(trans\$Agency) ## What TRANS Agency has the most employees? ## Take out the \$ - somewhat like destring, ignore("\$") trans\$AnnualSalary <- gsub(pattern="\$", replacement="", x=trans\$AnnualSalary, fixed=TRUE) ## make it a numeric variable trans\$AnnualSalary <- as.numeric(trans\$AnnualSalary) ## take means tapply(trans\$AnnualSalary, trans\$Agency, mean) ## What TRANS Agency has the highest paid employees (Annual Salary) on average? # 3 What is/are the profession(s) of people who have "abra" in their # name for Baltimore's Salaries? abras <- grep(pattern="abra", x=Sal\$Name) Sal\$JobTitle[abras] ## Question 4 # Reshape the restaurants data set to wide, on council district. # You may need to create an id variable by the code: # `rest\$id <- 1:nrow(rest)` rest\$id <- 1:nrow(rest) wide.rest <- reshape(rest, idvar="id", timevar="councilDistrict", direction="wide") dim(rest) dim(wide.rest) ## Using the wide data set, find the maximum number of restaurants by sort(unique(rest\$councilDistrict)) name.cols <- paste("name", sort(unique(rest\$councilDistrict)), sep=".") colSums(!is.na(wide.rest[, name.cols])) ## and confirm with table(rest\$councilDistrict) # Q5 length(grep("Monument",mon\$name)) sum(grepl("Monument",mon\$name)) ## For the most common memorial name, what police districts are they in? tab <- table(mon\$name) which.max(tab) max.mon <- names(which.max(tab)) table(mon\$policeDistrict[mon\$name %in% max.mon])