sjackman/stat540-seminar6.R

## stat540-seminar6.R
# STAT 540 Seminar 6
# Shaun Jackman

# Preliminaries
library(limma)
library(lattice)
prDat <- read.table("../data/photoRec/GSE4051_data.txt")  # the whole enchilada
str(prDat, max.level = 0)
load("../data/photoRec/GSE4051_design.robj")  # load exp design as 'prDes'
str(prDes)

# The difficulty in estimating gene-wise variance
m <- 1000
n <- 3
x <- matrix(rnorm(m * n), nrow = m)
obsVars <- apply(x, 1, var)
summary(obsVars)
mean(obsVars < 1/3)
densityplot(~obsVars, n = 200)

# Fit a linear model: explain gene expression in the wild type mice as a function of developmental stage (one-way ANOVA)
wtDes <- subset(prDes, gType == "wt")
str(wtDes)
wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)
wtDesMat <- model.matrix(~devStage, wtDes)
str(wtDesMat)
wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)
topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"))
colnames(coef(wtEbFit))
dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))))
# Use the hit list you stored above and your functions for extracting and plotting data to produce this plot for hits 3, 6, and 9 on the list.
library(reshape2)
prepareData <- function(genes)
  melt(cbind(prDes, t(prDat[genes,])),
       id.vars=colnames(prDes),
       variable.name='gene', value.name='gExp')
makeStripplot <- function(x, ...)
  stripplot(gExp ~ devStage | gene, x, subset = gType == 'wt',
            jitter.data = TRUE, auto.key = TRUE,
            type = c("p", "a"), grid = TRUE,
            ...)
makeStripplot(prepareData(dsHits$ID[c(3,6,9)]))

# Be the boss of topTable()
# How many probes have Benjamini-Hochberg ("BH") adjusted p-values for the F test conducted above that are less than 1e-05?
hits = topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"),
             p.value=1e-5, number=9999)
nrow(hits)
# What is the 63rd hit on this list? Provide it's Affy ID, F statistic, BH adjusted p-value, and the estimated effect for developmental stage "P6" in that order.
hits[63,c('F', 'adj.P.Val', 'devStageP6')]
# Scatterplot the t statistics for the test that the P2 effect is zero against that for P10.
hits <- topTable(wtEbFit, coef = c('devStageP2', 'devStageP10'), number=99999)
smoothScatter(hits$devStageP2, hits$devStageP10, asp=1)
# Create a densityplot of the associated p-values, so you can get a sense of which developmental stage, P2 or P10, is more clearly distinguished from baseline E16.
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=99999)
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=99999)
densityplot(~P10Hits$adj.P.Val + P2Hits$adj.P.Val)
# Is this what you'd expect?
# Yes, it makes sense that P10 is more distinguished than P2 from the baseline E16.
# If you require a BH adjusted p-value less than 1e-03, how many hits do you get for P2? How many for P10? How much overlap is there?
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=Inf, sort='none')
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=Inf, sort='none')
threshold <- 1e-3
addmargins(with(data.frame(
		P2 = P2Hits$adj.P.Val < threshold,
		P10 = P10Hits$adj.P.Val < threshold),
	table(P2, P10)))
# Now just focus on the P10 effect. Create a scatterplot matrix of raw p-values, BH adjusted p-values, and BY p-values.
xyplot(adj.P.Val ~ P.Value, P10Hits)
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', adjust.method='BY', p.value=1e-3, number=99999)
xyplot(adj.P.Val ~ P.Value, P10Hits)

# Perform inference for some contrasts
colnames(wtDesMat)
cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6,
                             fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat)
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)
# The top hits are probes where there is big change from P6 to P10, from P10 to 4_weeks, or both. Let's check that by plotting the data from the top 4 hits.
makeStripplot(prepareData(rownames(topTable(wtEbFitCont))[1:4]))
cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
makeStripplot(prepareData(hits1))
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits2[1:4]))
intersect(hits1, hits2)
hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
makeStripplot(prepareData(hits3[1:4]))
intersect(hits1, hits3)
intersect(hits2, hits3)
cutoff <- 0.01
nHits <- 8
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
makeStripplot(prepareData(hits1[1:nHits]))
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits2[1:nHits]))
hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)]
makeStripplot(prepareData(hits3[1:nHits]))
hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
makeStripplot(prepareData(hits4[1:nHits]))
vennDiagram(wtResCont)
hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] != 0)]
makeStripplot(prepareData(hits5))
hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits6))

# Take-home exercise: See if you can find one or more probes that have some expression changes up to P6 and then hold steady all the way to 4_weeks. Here's some I found.
cont.matrix <- makeContrasts(
  P2VsE16 = devStageP2,
  P6VsP2 = devStageP6 - devStageP2,
  P10VsP6 = devStageP10 - devStageP6,
  fourweeksVsP10 = devStage4_weeks - devStageP10,
  levels = wtDesMat)
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)

wtResCont <- decideTests(wtEbFitCont, p.value = 0.05, method = "global")
summary(wtResCont)
hits <- rownames(prDat)[which(
  wtResCont[, "P2VsE16"] != 0
  & wtResCont[, "P6VsP2"] != 0
  & wtResCont[, "P10VsP6"] == 0
  & wtResCont[, "fourweeksVsP10"] == 0)]
length(hits)
wtResCont[hits,]
makeStripplot(prepareData(hits))
	# STAT 540 Seminar 6
	# Shaun Jackman

	# Preliminaries
	library(limma)
	library(lattice)
	prDat <- read.table("../data/photoRec/GSE4051_data.txt") # the whole enchilada
	str(prDat, max.level = 0)
	load("../data/photoRec/GSE4051_design.robj") # load exp design as 'prDes'
	str(prDes)

	# The difficulty in estimating gene-wise variance
	m <- 1000
	n <- 3
	x <- matrix(rnorm(m * n), nrow = m)
	obsVars <- apply(x, 1, var)
	summary(obsVars)
	mean(obsVars < 1/3)
	densityplot(~obsVars, n = 200)

	# Fit a linear model: explain gene expression in the wild type mice as a function of developmental stage (one-way ANOVA)
	wtDes <- subset(prDes, gType == "wt")
	str(wtDes)
	wtDat <- subset(prDat, select = prDes$gType == "wt")
	str(wtDat, max.level = 0)
	wtDesMat <- model.matrix(~devStage, wtDes)
	str(wtDesMat)
	wtFit <- lmFit(wtDat, wtDesMat)
	wtEbFit <- eBayes(wtFit)
	topTable(wtEbFit)
	topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"))
	colnames(coef(wtEbFit))
	dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))))
	# Use the hit list you stored above and your functions for extracting and plotting data to produce this plot for hits 3, 6, and 9 on the list.
	library(reshape2)
	prepareData <- function(genes)
	melt(cbind(prDes, t(prDat[genes,])),
	id.vars=colnames(prDes),
	variable.name='gene', value.name='gExp')
	makeStripplot <- function(x, ...)
	stripplot(gExp ~ devStage \| gene, x, subset = gType == 'wt',
	jitter.data = TRUE, auto.key = TRUE,
	type = c("p", "a"), grid = TRUE,
	...)
	makeStripplot(prepareData(dsHits$ID[c(3,6,9)]))

	# Be the boss of topTable()
	# How many probes have Benjamini-Hochberg ("BH") adjusted p-values for the F test conducted above that are less than 1e-05?
	hits = topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"),
	p.value=1e-5, number=9999)
	nrow(hits)
	# What is the 63rd hit on this list? Provide it's Affy ID, F statistic, BH adjusted p-value, and the estimated effect for developmental stage "P6" in that order.
	hits[63,c('F', 'adj.P.Val', 'devStageP6')]
	# Scatterplot the t statistics for the test that the P2 effect is zero against that for P10.
	hits <- topTable(wtEbFit, coef = c('devStageP2', 'devStageP10'), number=99999)
	smoothScatter(hits$devStageP2, hits$devStageP10, asp=1)
	# Create a densityplot of the associated p-values, so you can get a sense of which developmental stage, P2 or P10, is more clearly distinguished from baseline E16.
	P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=99999)
	P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=99999)
	densityplot(~P10Hits$adj.P.Val + P2Hits$adj.P.Val)
	# Is this what you'd expect?
	# Yes, it makes sense that P10 is more distinguished than P2 from the baseline E16.
	# If you require a BH adjusted p-value less than 1e-03, how many hits do you get for P2? How many for P10? How much overlap is there?
	P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=Inf, sort='none')
	P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=Inf, sort='none')
	threshold <- 1e-3
	addmargins(with(data.frame(
	P2 = P2Hits$adj.P.Val < threshold,
	P10 = P10Hits$adj.P.Val < threshold),
	table(P2, P10)))
	# Now just focus on the P10 effect. Create a scatterplot matrix of raw p-values, BH adjusted p-values, and BY p-values.
	xyplot(adj.P.Val ~ P.Value, P10Hits)
	P10Hits <- topTable(wtEbFit, coef = 'devStageP10', adjust.method='BY', p.value=1e-3, number=99999)
	xyplot(adj.P.Val ~ P.Value, P10Hits)

	# Perform inference for some contrasts
	colnames(wtDesMat)
	cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6,
	fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat)
	wtFitCont <- contrasts.fit(wtFit, cont.matrix)
	wtEbFitCont <- eBayes(wtFitCont)
	topTable(wtEbFitCont)
	# The top hits are probes where there is big change from P6 to P10, from P10 to 4_weeks, or both. Let's check that by plotting the data from the top 4 hits.
	makeStripplot(prepareData(rownames(topTable(wtEbFitCont))[1:4]))
	cutoff <- 1e-04
	wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
	summary(wtResCont)
	hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
	makeStripplot(prepareData(hits1))
	hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
	makeStripplot(prepareData(hits2[1:4]))
	intersect(hits1, hits2)
	hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
	makeStripplot(prepareData(hits3[1:4]))
	intersect(hits1, hits3)
	intersect(hits2, hits3)
	cutoff <- 0.01
	nHits <- 8
	wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
	summary(wtResCont)
	hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
	makeStripplot(prepareData(hits1[1:nHits]))
	hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
	makeStripplot(prepareData(hits2[1:nHits]))
	hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)]
	makeStripplot(prepareData(hits3[1:nHits]))
	hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
	makeStripplot(prepareData(hits4[1:nHits]))
	vennDiagram(wtResCont)
	hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] != 0)]
	makeStripplot(prepareData(hits5))
	hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] < 0)]
	makeStripplot(prepareData(hits6))

	# Take-home exercise: See if you can find one or more probes that have some expression changes up to P6 and then hold steady all the way to 4_weeks. Here's some I found.
	cont.matrix <- makeContrasts(
	P2VsE16 = devStageP2,
	P6VsP2 = devStageP6 - devStageP2,
	P10VsP6 = devStageP10 - devStageP6,
	fourweeksVsP10 = devStage4_weeks - devStageP10,
	levels = wtDesMat)
	wtFitCont <- contrasts.fit(wtFit, cont.matrix)
	wtEbFitCont <- eBayes(wtFitCont)
	topTable(wtEbFitCont)

	wtResCont <- decideTests(wtEbFitCont, p.value = 0.05, method = "global")
	summary(wtResCont)
	hits <- rownames(prDat)[which(
	wtResCont[, "P2VsE16"] != 0
	& wtResCont[, "P6VsP2"] != 0
	& wtResCont[, "P10VsP6"] == 0
	& wtResCont[, "fourweeksVsP10"] == 0)]
	length(hits)
	wtResCont[hits,]
	makeStripplot(prepareData(hits))