Created
March 1, 2013 20:15
-
-
Save sjackman/5067425 to your computer and use it in GitHub Desktop.
STAT 540 Seminar 8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# STAT 540 Seminar 8 | |
# Shaun Jackman | |
# edgeR | |
library(edgeR) | |
dat <- read.table("../data/bottomly/bottomly_count_table.txt", | |
header = TRUE, row.names = 1) | |
des <- read.table("../data/bottomly/bottomly_phenodata.txt", | |
header = TRUE, row.names = 1) | |
all(rownames(des) == colnames(dat)) | |
# GLM edgeR | |
table(des$strain) | |
group <- factor(c(rep("1", 10), rep(2, 11))) | |
dge.glm <- DGEList(counts = dat, group = group) | |
design <- model.matrix(~group) | |
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE) | |
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp) | |
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design) | |
plotBCV(dge.glm.tag.disp) | |
fit <- glmFit(dge.glm.tag.disp, design) | |
lrt <- glmLRT(fit, coef = 2) | |
topTags(lrt) | |
tt.glm <- topTags(lrt, n = Inf) | |
nrow(tt.glm$table[tt.glm$table$FDR < 0.01, ]) | |
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ]) | |
cpm(dge.glm.tag.disp)[interestingSamples, ] | |
summary(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH")) | |
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)] | |
plotSmear(lrt, de.tags = tags.glm) | |
abline(h = c(-2, 2), col = "blue") | |
# Mini exercise: | |
# Redo the above analysis but first filter the data and remove any gene that | |
# has: 1. count equal tot zero across all samples 2. count equal to zero in at | |
# least one sample in each genotype group | |
do.edgeR <- function(dat) { | |
dge.glm <- DGEList(counts = dat, group = group) | |
design <- model.matrix(~group) | |
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE) | |
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp) | |
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design) | |
plotBCV(dge.glm.tag.disp) | |
fit <- glmFit(dge.glm.tag.disp, design) | |
lrt <- glmLRT(fit, coef = 2) | |
cat('Top genes:') | |
print(topTags(lrt)) | |
tt.glm <- topTags(lrt, n = Inf) | |
cat('Number of genes with FDR < 0.01:') | |
print(sum(tt.glm$table$FDR < 0.01)) | |
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ]) | |
cat('Interesting genes:') | |
print(cpm(dge.glm.tag.disp)[interestingSamples, ]) | |
cat('Interesting genes with BH FDR < 0.05:') | |
print(table(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH"))) | |
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)] | |
plotSmear(lrt, de.tags = tags.glm) | |
abline(h = c(-2, 2), col = "blue") | |
de.glm | |
} | |
edgeR.hits <- do.edgeR(subset(dat, apply(dat, 1, function(x) !all(x == 0)))) | |
edgeR.hits <- do.edgeR(subset(dat, apply(dat, 1, function(x) !any(x == 0)))) | |
# DESeq | |
library(DESeq) | |
deSeqDat <- newCountDataSet(dat, group) | |
deSeqDat <- estimateSizeFactors(deSeqDat) | |
deSeqDat <- estimateDispersions(deSeqDat) | |
plotDispEsts(deSeqDat) | |
deseq.results <- nbinomTest(deSeqDat, levels(group)[1], levels(group)[2]) | |
plotMA(deseq.results) | |
# Voom & limma | |
library(limma) | |
dat.voomed <- voom(dat, design) | |
fit <- eBayes(lmFit(dat.voomed, design)) | |
# Take Home Problem | |
# Choose a specific threshold for the adjusted p value, find the genes | |
# identified as differentially expressed using each of edgeR, DESeq and | |
# voom+limma. Compare the number of genes in these 3 lists, and draw a venn | |
# digram demonstrating the overlap (if any!). | |
edgeR.hits <- do.edgeR(dat) | |
table(deseq.results$padj < 0.05) | |
voom.hits <- topTable(fit, coef='group2', num=Inf, sort.by='none') | |
table(voom.hits$adj.P.Val < 0.05) | |
results <- data.frame( | |
edgeR = edgeR.hits != 0, | |
DESeq = deseq.results$padj < 0.05, | |
voom.limma = voom.hits$adj.P.Val < 0.05) | |
table(results) | |
vennDiagram(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment