# Importing Data hw3 <- read.table("lungc.txt",header=TRUE) attach (hw3) # Or for csv group <- read.csv("Group_Project.csv",header=TRUE) # Or for STATA files load "foreign" # which loads a package that is # already installed on in your computer # you may have to download packages # from CRAN if they're not already installed lab4 <- read.dta("lab4_p2.dta") attach (lab4) # Viewing data lab4 [1:15,] summary(data set name) xtabs(variable~variable) # makes a basic cross tabulation ftable(variable, variable) # shows frequency tables table(variable, variable) # Merging data merged <- merge(geo.new, alive, all=TRUE) # Get data from STATA to SAS alive <- read.dta("final1.dta") alive [1:5,] summary(alive) write.table(alive, file = "alive.txt") # Sorting Data (this is easier in other software packages) x <- mydata$COD y <- mydata$LOGO2UP m <- mydata order(y) sortdat <- m[order(x),] # Making new variables emp <- rep(NA, length(employed)) emp[employed == "yes"] <- 1 emp[employed == "no"] <- 0 # Make a data frame data.frame(data) # Afer making new variable must bind to data set lab7.new <- cbind (lab7, ln.py, year0to10) # Basic univariate tests x <- matrix (c (2, 4, 13, 15),2) chisq.test(x, correct = FALSE) OR = apply(z, 1, function(x) x[1] * (n2 - x[2]) / (x[2] * (n1 - x[1])) ) lnOR <- log(OR) dat <- read.csv("task1.csv", header = FALSE) dat2 <- dat[,1 : 10] dat2 <- dat2[complete.cases(dat2),] vec1 <- as.vector(unlist(dat2)) chisq.test(table (vec1), correct = FALSE) fisher.test(dat, alternative = "less") wilcox.test(x,y, alternative = c("two.sided")) # Create spline term: CODspl <- (COD-5000)*(COD>4999) # Plotting lines fitcii <- lm(formula = LOGO2~CODc+CODc2)$fit plot(LOGO2~COD,cex=0.6,main="1cii: Quadratic regression") lines(fitcii~COD,col="blue") abline(h=0) # Making Dummy Variables c.mos <- factor(tot_mos) c.mos cbind (tot_mos, model.matrix(~c.mos-1)) # Or use factor(tot_mos) in model # Subsetting Data men <- subset (lab7, ma0fe1 == 0) men women <- subset (lab7, ma0fe1 == 1) women # Changing reference level with catagorical data new <- relevel( factor(tot_mos), ref = "12") # Lowess Smoother # install sma plot(jitter(nMedExp,0.1), jitter(case,0.1)) plot.smooth.line (jitter(nMedExp,0.1), jitter(case,0.1)) # ANOVA Table fit.small <- (lm (disease. ~ crowding. + airqual.)) fit.big <- (lm (disease. ~ crowding. + airqual. + education.)) anova(fit.small) anova(fit.big) anova(fit.small, fit.big, test = "F") # Linear models summary (lm (cd4~bl.age + other + variables)) # Cox load Survival model <- (coxph (Surv(fu.time, sc) ~ ma0fe1 + sexyear)) # Logistic Regression load "Design" lrm(formula, data, subset, na.action=na.delete, method="lrm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, strata.penalty=0, var.penalty=c('simple','sandwich'), weights, normwt, ...) #or mod <- glm (I(n.sym/n.mice)~dose, family = binomial (link = "logit")) # Poisson pfit <- glm(cases~factor(cals)+factor(train)+factor(bank), family=poisson (link=log), data=p1, offset = log(mm)) # Mixed Models load "lme" #or load "nlme" #for non linear mixed models # Big thing of graphs pairs (~disease. + crowding. + airqual. + education. + nutrition. + smoking) # Residuals resid <- model1$resid plot (salary ~ service, ylab = "Salary", xlab = "Service") plot (resid ~ fit, xlab = "Fitted Values", ylab = "Residuals") abline (h = 0) plot (resid ~ service, ylab = "Residuals", xlab = "Service") abline (h = 0) # Influence hii <- lm.influence(model1)$hat stud <- 1 – hii cook <- cooks.distance(model1) stem (hii, 2.5) sresid <- ls.diag(model1)$stud.res plot (model1, which = 5) # makes funky graph round (cbind (hw4, hii, stud, cook, sresid), 3) # VIF library(Design) vif(big)