########### # Lecture 6 - script # load in data for this lecture load("for_lec6.rda") ######################## ## A very simple plot ## ######################## x <- c(2,4,6,8,10) y <- c(1.5,3,7,8,15) plot(y ~ x) ############## ## XY Plots ## ############## # Let's use some real data head(people) # A simple XY plot plot(people$weight ~ people$height) # Some plot formatting options: plot(people$weight ~ people$height, main="Weight by Height", ylab="Weight (kg)", xlab="Height (cm)") plot(people$weight ~ people$height, pch=19) plot(people$weight ~ people$height, col="red") plot(people$weight ~ people$height, col=people$gender) # The final XY plot plot(people$weight ~ people$height, pch=19, col=people$gender, main="Weight by Height", ylab="Weight (kg)", xlab="Height (cm)") legend("bottomright", c("Male", "Female"), text.col=c(1,2)) # Look at some of the other plotting point styles: example(points) ################ ## A bar plot ## ################ # How many men/women do we have in the dataset? g <- table(people$gender) g barplot(g) barplot(g, names.arg=c("Men", "Women"), ylab="Number", main="Number of men and women", ) # Make the bars horizontal instead of vertical barplot(g, names.arg=c("Men", "Women"), main="Number of men and women", horiz = TRUE, las=1) ################# ## A histogram ## ################# # First calculate bmi bmi = people$weight / (people$height/100)^2 # Plot the histogram hist(bmi) # Add two vertical lines lines(x=c(18.5, 18.5), y = c(0, 140), col="red") lines(x=c(25, 25), y = c(0, 140), col="red") # An alternative, simpler way of plotting horizontal or vertical lines abline(v=18.5, col="red") # Add some text to the plot text(22, 135, "Ideal BMI") ############### ## A boxplot ## ############### boxplot(bmi ~ people$gender, names=c("Male", "Female")) ###################### ## Multipanel plots ## ###################### # Subset the dataset to create separate male/female datasets m = subset(people, gender==1) # Note the double equals sign f = subset(people, gender==2) nrow(people) nrow(m) nrow(f) # Set up a 2-row, 1-column plotting window par(mfrow=c(2,1)) # Make the male/female plots hist(m$height, main="Male") hist(f$height, main="Female") # Set the scales explicitly hist(m$height, main="Male", xlim=c(140,200)) hist(f$height, main="Female", xlim=c(140,200)) ################################# ## Save the plot to a PDF file ## ################################# pdf(file="~/Desktop/lec6/bmi.pdf") par(mfrow=c(2,1)) hist(m$height, main="Male", xlim=c(140,200)) hist(f$height, main="Female", xlim=c(140,200)) dev.off() # Reset the plotting window to a single plot par(mfrow=c(1,1)) ############################### ## Some other types of plots ## ############################### example(pie) example(persp) #################################################### ## 'Manually' constructing a plot of the dog data ## #################################################### # now, we will make a matrix with time of each exposure ('yes' or 'no') # instead of just time at risk # we still need to take out missing time though # one more column, which will be exposure out2 = data.frame(matrix(nrow = 0, ncol = 4)) names(out2) = c("id","exp","start","end") # note the names # columns we care about cols = grep("dog", names(dat)) for(i in 1:nrow(dat)) { hold = as.numeric(dat[i,cols]) # now we want to preserve the 'exposure' categories x = rle(hold) x = data.frame(cbind(x$values, x$length)) names(x) <- c("exp", "length") x$end <- cumsum(x$length) - 1 x$start <- x$end - x$length + 1 # keep exposure, start and end columns tmp = x[!is.na(x$exp),c(1,4,3)] id = dat[i,1] tmp = cbind(id, tmp) out2 = rbind(out2,tmp) } rownames(out2) = 1:nrow(out2) # still need to account for the one month lag # as it's dog borrowing in the PREVIOUS month Index = which(out2$start > 0) out2$start[Index] <- out2$start[Index] - 1 # note we have more entries dim(out2) head(out2) # Use a list to group together rows belonging to the same id Indexes = split(seq(along = out2$id), out2$id) ############# # Plotting the data # we need to initiate a blank plot, and then # fill it in with two 'for' loops # plot the first 20 people plot(0 ~ 0, type = "n", ylim = c(1,20), # first 20 people xlim = c(0,12), # 12 months xlab = "Monthly Visit", ylab = "Person") # note that the IDs are not sequential, so instead of plotting # ID on the y-axis, we just plotted 'person' for(i in 1:20) { # first 20 Index = Indexes[[i]] tmp = out2[Index,] for(j in 1:nrow(tmp)) { lines(x = as.numeric(tmp[j,c("start", "end")]), y = c(i,i), col = tmp$exp[j], lwd = 4) } } # plot everyone plot(0 ~ 0, type = "n", ylim = c(1,length(Indexes)), xlim = c(0,12), # 12 months xlab = "Monthly Visit", ylab = "Person") for(i in seq(along = Indexes)) { Index = Indexes[[i]] tmp = out2[Index,] for(j in 1:nrow(tmp)) { lines(x = as.numeric(tmp[j,3:4]), y = c(i,i), col = tmp$exp[j], lwd = 1) } }