#################### # Module 7 - Lab # 1/8/2014 #################### ## Part A # Bike Lanes Dataset: BikeBaltimore is the Department of Transportation's bike program. # https://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms # Download as a CSV (like the Monuments dataset) in your current working directory # 1. Using tapply(): # (a) Which project category has the longest average bike lane? tab=tapply(bike\$length,bike\$project, mean,na.rm=TRUE) tab[which.max(tab)] # (b) What was the average bike lane length per year that they were installed? tapply(bike\$length,bike\$dateInstalled,mean,na.rm=TRUE) # 2. (a) Numerically [hint: `quantile()`] and (b) graphically [hint: `hist()` or `plot(density())`] # describe the distribution of bike "lane" lengths. hist(bike\$length) hist(bike\$length,breaks=100) quantile(bike\$length) # 3. Then describe as above, after stratifying by i) type then ii) number of lanes boxplot(bike\$length~bike\$type) levels(factor(bike\$type)) # this is the order of boxes boxplot(bike\$length~bike\$numLanes) tapply(bike\$length,bike\$type, quantile,na.rm=TRUE) tapply(bike\$length,bike\$numLanes, quantile,na.rm=TRUE) ## Part B # Download the CSV: http://biostat.jhsph.edu/~ajaffe/files/indicatordeadkids35.csv # Via: http://www.gapminder.org/data/ # Definition of indicator: How many children the average couple had that die before the age 35. # 4. Plot the distribution of average country's count across all year. rowMeans(death,na.rm=TRUE) hist(rowMeans(death,na.rm=TRUE)) # 5.(a) How many entries are less than 1? death < 1 sum(death < 1,na.rm=TRUE) mean(death < 1,na.rm=TRUE) # (b) Which array indices do they correspond to? [hint: `arr.ind` argument in `which()`] head(which(death<1,arr.ind=FALSE)) head(which(death<1,arr.ind=TRUE)) ind =which(death<1,arr.ind=TRUE) # 6. Plot the count for each country across year in a line plot [hint: `matplot()`] matplot(death,type="l")