Last active
December 11, 2015 23:29
-
-
Save sjackman/4676931 to your computer and use it in GitHub Desktop.
STAT 540 Seminar 5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# STAT 540 Seminar 5 | |
# Shaun Jackman | |
# 2013-01-30 | |
# Brief pause to load the photoRec data and the lattice package | |
library(lattice) # if you don't already have this loaded ... | |
prDat <- read.table("../data/photoRec/GSE4051_data.txt") # the whole enchilada | |
str(prDat, max.level = 0) | |
load("../data/photoRec/GSE4051_design.robj") # load exp design as 'prDes' | |
str(prDes) | |
# Write a function to prepare a mini-dataset for a small number of genes | |
luckyGenes <- c("1419655_at", "1438815_at") | |
library(reshape2) | |
prepareData <- function(genes) | |
melt(cbind(prDes, t(prDat[genes,])), | |
id.vars=colnames(prDes), | |
variable.name='gene', value.name='gExp') | |
jDat <- prepareData(luckyGenes) | |
str(jDat) | |
head(jDat) | |
tail(jDat) | |
stripplot(gExp ~ devStage | gene, jDat, group = gType, | |
jitter.data = TRUE, auto.key = TRUE, | |
type = c("p", "a"), grid = TRUE) | |
# Write a function to stripplot a mini-dataset | |
makeStripplot <- function(x, ...) | |
stripplot(gExp ~ devStage | gene, x, group = gType, | |
jitter.data = TRUE, auto.key = TRUE, | |
type = c("p", "a"), grid = TRUE, | |
...) | |
makeStripplot(jDat) | |
makeStripplot(jDat, pch = 17, cex = 3) | |
makeStripplot(newDat <- prepareData('1456341_a_at')) | |
str(newDat) | |
head(newDat) | |
# Do a two-sample t-test | |
t.test(gExp ~ devStage, | |
subset(newDat, devStage %in% c('P2', '4_weeks'))) | |
# Fit a linear model with a categorical covariate | |
makeStripplot(mDat <- prepareData('1438786_a_at')) | |
summary(mFit <- lm(gExp ~ devStage, mDat, gType == 'wt')) | |
# Perform inference for a contrast | |
coef(mFit) | |
contMat <- rbind(c(0, -1, 0, 1, 0)) | |
obsDiff <- contMat %*% coef(mFit) | |
sampMeans <- with(subset(mDat, gType == 'wt'), | |
tapply(gExp, devStage, mean)) | |
sampMeans["P2"] - sampMeans["P10"] | |
vcov(mFit) | |
summary(mFit)$coefficients[, "Std. Error"] | |
sqrt(diag(vcov(mFit))) | |
estSe <- contMat %*% vcov(mFit) %*% t(contMat) | |
testStat <- obsDiff/estSe | |
2 * pt(abs(testStat), df = df.residual(mFit), lower.tail = FALSE) | |
# Fit a linear model with two categorical covariates | |
makeStripplot(oDat <- prepareData("1448690_at")) | |
str(oDat) | |
summary(oFitBig <- lm(gExp ~ gType * devStage, oDat)) | |
summary(oFitSmall <- lm(gExp ~ gType + devStage, oDat)) | |
anova(oFitSmall, oFitBig) | |
makeStripplot(oDat <- prepareData('1429225_at')) | |
summary(oFitBig <- lm(gExp ~ gType * devStage, oDat)) | |
summary(oFitSmall <- lm(gExp ~ gType + devStage, oDat)) | |
anova(oFitSmall, oFitBig) | |
# Ideas for further work | |
# We wrote functions to prepare and plot data for more than 1 gene. But when we started fitting models and condusting tests, we only worked with 1 gene at a time. Can you use data aggregation strategies from last week to do some of the same work for small sets of genes? | |
makeStripplot(oDat <- prepareData(c('1448690_at', '1429225_at'))) | |
by(oDat, oDat$gene, function(x) { | |
print(summary(oFitBig <- lm(gExp ~ gType * devStage, x))) | |
print(summary(oFitSmall <- lm(gExp ~ gType + devStage, x))) | |
anova(oFitSmall, oFitBig)}) | |
# In lecture we also experimented with a quantitative version of devStage, which we called age. This opens the door to modelling with a quantitative covariate. Can you fit linear and quadratic models to the expression data for one or several genes? | |
makeStripplot(oDat <- prepareData(c('1448690_at', '1429225_at'))) | |
devStageToDays <- c('E2'=-2, 'P2'=2, 'P6'=6, 'P10'=10, '4_weeks'=28) | |
oDat <- prepareData(c('1448690_at', '1429225_at')) | |
oDat$age <- devStageToDays[oDat$devStage] | |
str(oDat) | |
makeXYplot <- function(x, ...) | |
xyplot(gExp ~ age | gene, x, group = gType, | |
jitter.data = TRUE, auto.key = TRUE, | |
type = c("p", "a"), grid = TRUE, | |
...) | |
makeXYplot(oDat) | |
by(oDat, oDat$gene, function(x) { | |
print(summary(oFitBig <- lm(gExp ~ gType + age, x))) | |
print(summary(oFitSmall <- lm(gExp ~ gType + age + I(age^2), x))) | |
anova(oFitSmall, oFitBig)}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment