####################
# Module 7 - Lab
# 1/8/2014
####################
## Part A
# Bike Lanes Dataset: BikeBaltimore is the Department of Transportation's bike program.
# https://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
# Download as a CSV (like the Monuments dataset) in your current working directory
# 1. Using tapply():
# (a) Which project category has the longest average bike lane?
tab=tapply(bike$length,bike$project, mean,na.rm=TRUE)
tab[which.max(tab)]
# (b) What was the average bike lane length per year that they were installed?
tapply(bike$length,bike$dateInstalled,mean,na.rm=TRUE)
# 2. (a) Numerically [hint: `quantile()`] and (b) graphically [hint: `hist()` or `plot(density())`]
# describe the distribution of bike "lane" lengths.
hist(bike$length)
hist(bike$length,breaks=100)
quantile(bike$length)
# 3. Then describe as above, after stratifying by i) type then ii) number of lanes
boxplot(bike$length~bike$type)
levels(factor(bike$type)) # this is the order of boxes
boxplot(bike$length~bike$numLanes)
tapply(bike$length,bike$type, quantile,na.rm=TRUE)
tapply(bike$length,bike$numLanes, quantile,na.rm=TRUE)
## Part B
# Download the CSV: http://biostat.jhsph.edu/~ajaffe/files/indicatordeadkids35.csv
# Via: http://www.gapminder.org/data/
# Definition of indicator: How many children the average couple had that die before the age 35.
# 4. Plot the distribution of average country's count across all year.
rowMeans(death,na.rm=TRUE)
hist(rowMeans(death,na.rm=TRUE))
# 5.(a) How many entries are less than 1?
death < 1
sum(death < 1,na.rm=TRUE)
mean(death < 1,na.rm=TRUE)
# (b) Which array indices do they correspond to? [hint: `arr.ind` argument in `which()`]
head(which(death<1,arr.ind=FALSE))
head(which(death<1,arr.ind=TRUE))
ind =which(death<1,arr.ind=TRUE)
# 6. Plot the count for each country across year in a line plot [hint: `matplot()`]
matplot(death,type="l")