Skip to content

Instantly share code, notes, and snippets.

@sjackman
Last active December 12, 2015 05:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sjackman/4725134 to your computer and use it in GitHub Desktop.
Save sjackman/4725134 to your computer and use it in GitHub Desktop.
STAT 540 Seminar 6
# STAT 540 Seminar 6
# Shaun Jackman
# Preliminaries
library(limma)
library(lattice)
prDat <- read.table("../data/photoRec/GSE4051_data.txt") # the whole enchilada
str(prDat, max.level = 0)
load("../data/photoRec/GSE4051_design.robj") # load exp design as 'prDes'
str(prDes)
# The difficulty in estimating gene-wise variance
m <- 1000
n <- 3
x <- matrix(rnorm(m * n), nrow = m)
obsVars <- apply(x, 1, var)
summary(obsVars)
mean(obsVars < 1/3)
densityplot(~obsVars, n = 200)
# Fit a linear model: explain gene expression in the wild type mice as a function of developmental stage (one-way ANOVA)
wtDes <- subset(prDes, gType == "wt")
str(wtDes)
wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)
wtDesMat <- model.matrix(~devStage, wtDes)
str(wtDesMat)
wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)
topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"))
colnames(coef(wtEbFit))
dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))))
# Use the hit list you stored above and your functions for extracting and plotting data to produce this plot for hits 3, 6, and 9 on the list.
library(reshape2)
prepareData <- function(genes)
melt(cbind(prDes, t(prDat[genes,])),
id.vars=colnames(prDes),
variable.name='gene', value.name='gExp')
makeStripplot <- function(x, ...)
stripplot(gExp ~ devStage | gene, x, subset = gType == 'wt',
jitter.data = TRUE, auto.key = TRUE,
type = c("p", "a"), grid = TRUE,
...)
makeStripplot(prepareData(dsHits$ID[c(3,6,9)]))
# Be the boss of topTable()
# How many probes have Benjamini-Hochberg ("BH") adjusted p-values for the F test conducted above that are less than 1e-05?
hits = topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"),
p.value=1e-5, number=9999)
nrow(hits)
# What is the 63rd hit on this list? Provide it's Affy ID, F statistic, BH adjusted p-value, and the estimated effect for developmental stage "P6" in that order.
hits[63,c('F', 'adj.P.Val', 'devStageP6')]
# Scatterplot the t statistics for the test that the P2 effect is zero against that for P10.
hits <- topTable(wtEbFit, coef = c('devStageP2', 'devStageP10'), number=99999)
smoothScatter(hits$devStageP2, hits$devStageP10, asp=1)
# Create a densityplot of the associated p-values, so you can get a sense of which developmental stage, P2 or P10, is more clearly distinguished from baseline E16.
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=99999)
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=99999)
densityplot(~P10Hits$adj.P.Val + P2Hits$adj.P.Val)
# Is this what you'd expect?
# Yes, it makes sense that P10 is more distinguished than P2 from the baseline E16.
# If you require a BH adjusted p-value less than 1e-03, how many hits do you get for P2? How many for P10? How much overlap is there?
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=Inf, sort='none')
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=Inf, sort='none')
threshold <- 1e-3
addmargins(with(data.frame(
P2 = P2Hits$adj.P.Val < threshold,
P10 = P10Hits$adj.P.Val < threshold),
table(P2, P10)))
# Now just focus on the P10 effect. Create a scatterplot matrix of raw p-values, BH adjusted p-values, and BY p-values.
xyplot(adj.P.Val ~ P.Value, P10Hits)
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', adjust.method='BY', p.value=1e-3, number=99999)
xyplot(adj.P.Val ~ P.Value, P10Hits)
# Perform inference for some contrasts
colnames(wtDesMat)
cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6,
fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat)
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)
# The top hits are probes where there is big change from P6 to P10, from P10 to 4_weeks, or both. Let's check that by plotting the data from the top 4 hits.
makeStripplot(prepareData(rownames(topTable(wtEbFitCont))[1:4]))
cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
makeStripplot(prepareData(hits1))
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits2[1:4]))
intersect(hits1, hits2)
hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
makeStripplot(prepareData(hits3[1:4]))
intersect(hits1, hits3)
intersect(hits2, hits3)
cutoff <- 0.01
nHits <- 8
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
makeStripplot(prepareData(hits1[1:nHits]))
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits2[1:nHits]))
hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)]
makeStripplot(prepareData(hits3[1:nHits]))
hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
makeStripplot(prepareData(hits4[1:nHits]))
vennDiagram(wtResCont)
hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] != 0)]
makeStripplot(prepareData(hits5))
hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits6))
# Take-home exercise: See if you can find one or more probes that have some expression changes up to P6 and then hold steady all the way to 4_weeks. Here's some I found.
cont.matrix <- makeContrasts(
P2VsE16 = devStageP2,
P6VsP2 = devStageP6 - devStageP2,
P10VsP6 = devStageP10 - devStageP6,
fourweeksVsP10 = devStage4_weeks - devStageP10,
levels = wtDesMat)
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)
wtResCont <- decideTests(wtEbFitCont, p.value = 0.05, method = "global")
summary(wtResCont)
hits <- rownames(prDat)[which(
wtResCont[, "P2VsE16"] != 0
& wtResCont[, "P6VsP2"] != 0
& wtResCont[, "P10VsP6"] == 0
& wtResCont[, "fourweeksVsP10"] == 0)]
length(hits)
wtResCont[hits,]
makeStripplot(prepareData(hits))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment