Last active
December 12, 2015 05:58
-
-
Save sjackman/4725134 to your computer and use it in GitHub Desktop.
STAT 540 Seminar 6
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# STAT 540 Seminar 6 | |
# Shaun Jackman | |
# Preliminaries | |
library(limma) | |
library(lattice) | |
prDat <- read.table("../data/photoRec/GSE4051_data.txt") # the whole enchilada | |
str(prDat, max.level = 0) | |
load("../data/photoRec/GSE4051_design.robj") # load exp design as 'prDes' | |
str(prDes) | |
# The difficulty in estimating gene-wise variance | |
m <- 1000 | |
n <- 3 | |
x <- matrix(rnorm(m * n), nrow = m) | |
obsVars <- apply(x, 1, var) | |
summary(obsVars) | |
mean(obsVars < 1/3) | |
densityplot(~obsVars, n = 200) | |
# Fit a linear model: explain gene expression in the wild type mice as a function of developmental stage (one-way ANOVA) | |
wtDes <- subset(prDes, gType == "wt") | |
str(wtDes) | |
wtDat <- subset(prDat, select = prDes$gType == "wt") | |
str(wtDat, max.level = 0) | |
wtDesMat <- model.matrix(~devStage, wtDes) | |
str(wtDesMat) | |
wtFit <- lmFit(wtDat, wtDesMat) | |
wtEbFit <- eBayes(wtFit) | |
topTable(wtEbFit) | |
topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks")) | |
colnames(coef(wtEbFit)) | |
dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit)))) | |
# Use the hit list you stored above and your functions for extracting and plotting data to produce this plot for hits 3, 6, and 9 on the list. | |
library(reshape2) | |
prepareData <- function(genes) | |
melt(cbind(prDes, t(prDat[genes,])), | |
id.vars=colnames(prDes), | |
variable.name='gene', value.name='gExp') | |
makeStripplot <- function(x, ...) | |
stripplot(gExp ~ devStage | gene, x, subset = gType == 'wt', | |
jitter.data = TRUE, auto.key = TRUE, | |
type = c("p", "a"), grid = TRUE, | |
...) | |
makeStripplot(prepareData(dsHits$ID[c(3,6,9)])) | |
# Be the boss of topTable() | |
# How many probes have Benjamini-Hochberg ("BH") adjusted p-values for the F test conducted above that are less than 1e-05? | |
hits = topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"), | |
p.value=1e-5, number=9999) | |
nrow(hits) | |
# What is the 63rd hit on this list? Provide it's Affy ID, F statistic, BH adjusted p-value, and the estimated effect for developmental stage "P6" in that order. | |
hits[63,c('F', 'adj.P.Val', 'devStageP6')] | |
# Scatterplot the t statistics for the test that the P2 effect is zero against that for P10. | |
hits <- topTable(wtEbFit, coef = c('devStageP2', 'devStageP10'), number=99999) | |
smoothScatter(hits$devStageP2, hits$devStageP10, asp=1) | |
# Create a densityplot of the associated p-values, so you can get a sense of which developmental stage, P2 or P10, is more clearly distinguished from baseline E16. | |
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=99999) | |
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=99999) | |
densityplot(~P10Hits$adj.P.Val + P2Hits$adj.P.Val) | |
# Is this what you'd expect? | |
# Yes, it makes sense that P10 is more distinguished than P2 from the baseline E16. | |
# If you require a BH adjusted p-value less than 1e-03, how many hits do you get for P2? How many for P10? How much overlap is there? | |
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=Inf, sort='none') | |
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=Inf, sort='none') | |
threshold <- 1e-3 | |
addmargins(with(data.frame( | |
P2 = P2Hits$adj.P.Val < threshold, | |
P10 = P10Hits$adj.P.Val < threshold), | |
table(P2, P10))) | |
# Now just focus on the P10 effect. Create a scatterplot matrix of raw p-values, BH adjusted p-values, and BY p-values. | |
xyplot(adj.P.Val ~ P.Value, P10Hits) | |
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', adjust.method='BY', p.value=1e-3, number=99999) | |
xyplot(adj.P.Val ~ P.Value, P10Hits) | |
# Perform inference for some contrasts | |
colnames(wtDesMat) | |
cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6, | |
fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat) | |
wtFitCont <- contrasts.fit(wtFit, cont.matrix) | |
wtEbFitCont <- eBayes(wtFitCont) | |
topTable(wtEbFitCont) | |
# The top hits are probes where there is big change from P6 to P10, from P10 to 4_weeks, or both. Let's check that by plotting the data from the top 4 hits. | |
makeStripplot(prepareData(rownames(topTable(wtEbFitCont))[1:4])) | |
cutoff <- 1e-04 | |
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global") | |
summary(wtResCont) | |
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)] | |
makeStripplot(prepareData(hits1)) | |
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)] | |
makeStripplot(prepareData(hits2[1:4])) | |
intersect(hits1, hits2) | |
hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)] | |
makeStripplot(prepareData(hits3[1:4])) | |
intersect(hits1, hits3) | |
intersect(hits2, hits3) | |
cutoff <- 0.01 | |
nHits <- 8 | |
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global") | |
summary(wtResCont) | |
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)] | |
makeStripplot(prepareData(hits1[1:nHits])) | |
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)] | |
makeStripplot(prepareData(hits2[1:nHits])) | |
hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)] | |
makeStripplot(prepareData(hits3[1:nHits])) | |
hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)] | |
makeStripplot(prepareData(hits4[1:nHits])) | |
vennDiagram(wtResCont) | |
hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] != 0)] | |
makeStripplot(prepareData(hits5)) | |
hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] < 0)] | |
makeStripplot(prepareData(hits6)) | |
# Take-home exercise: See if you can find one or more probes that have some expression changes up to P6 and then hold steady all the way to 4_weeks. Here's some I found. | |
cont.matrix <- makeContrasts( | |
P2VsE16 = devStageP2, | |
P6VsP2 = devStageP6 - devStageP2, | |
P10VsP6 = devStageP10 - devStageP6, | |
fourweeksVsP10 = devStage4_weeks - devStageP10, | |
levels = wtDesMat) | |
wtFitCont <- contrasts.fit(wtFit, cont.matrix) | |
wtEbFitCont <- eBayes(wtFitCont) | |
topTable(wtEbFitCont) | |
wtResCont <- decideTests(wtEbFitCont, p.value = 0.05, method = "global") | |
summary(wtResCont) | |
hits <- rownames(prDat)[which( | |
wtResCont[, "P2VsE16"] != 0 | |
& wtResCont[, "P6VsP2"] != 0 | |
& wtResCont[, "P10VsP6"] == 0 | |
& wtResCont[, "fourweeksVsP10"] == 0)] | |
length(hits) | |
wtResCont[hits,] | |
makeStripplot(prepareData(hits)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment