Skip to content

Instantly share code, notes, and snippets.

@sjackman
Created March 1, 2013 20:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sjackman/5067425 to your computer and use it in GitHub Desktop.
Save sjackman/5067425 to your computer and use it in GitHub Desktop.
STAT 540 Seminar 8
# STAT 540 Seminar 8
# Shaun Jackman
# edgeR
library(edgeR)
dat <- read.table("../data/bottomly/bottomly_count_table.txt",
header = TRUE, row.names = 1)
des <- read.table("../data/bottomly/bottomly_phenodata.txt",
header = TRUE, row.names = 1)
all(rownames(des) == colnames(dat))
# GLM edgeR
table(des$strain)
group <- factor(c(rep("1", 10), rep(2, 11)))
dge.glm <- DGEList(counts = dat, group = group)
design <- model.matrix(~group)
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp)
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design)
plotBCV(dge.glm.tag.disp)
fit <- glmFit(dge.glm.tag.disp, design)
lrt <- glmLRT(fit, coef = 2)
topTags(lrt)
tt.glm <- topTags(lrt, n = Inf)
nrow(tt.glm$table[tt.glm$table$FDR < 0.01, ])
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ])
cpm(dge.glm.tag.disp)[interestingSamples, ]
summary(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH"))
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)]
plotSmear(lrt, de.tags = tags.glm)
abline(h = c(-2, 2), col = "blue")
# Mini exercise:
# Redo the above analysis but first filter the data and remove any gene that
# has: 1. count equal tot zero across all samples 2. count equal to zero in at
# least one sample in each genotype group
do.edgeR <- function(dat) {
dge.glm <- DGEList(counts = dat, group = group)
design <- model.matrix(~group)
dge.glm.com.disp <- estimateGLMCommonDisp(dge.glm, design, verbose = TRUE)
dge.glm.trend.disp <- estimateGLMTrendedDisp(dge.glm.com.disp)
dge.glm.tag.disp <- estimateGLMTagwiseDisp(dge.glm.trend.disp, design)
plotBCV(dge.glm.tag.disp)
fit <- glmFit(dge.glm.tag.disp, design)
lrt <- glmLRT(fit, coef = 2)
cat('Top genes:')
print(topTags(lrt))
tt.glm <- topTags(lrt, n = Inf)
cat('Number of genes with FDR < 0.01:')
print(sum(tt.glm$table$FDR < 0.01))
interestingSamples <- rownames(tt.glm$table[tt.glm$table$FDR < 1e-50, ])
cat('Interesting genes:')
print(cpm(dge.glm.tag.disp)[interestingSamples, ])
cat('Interesting genes with BH FDR < 0.05:')
print(table(de.glm <- decideTestsDGE(lrt, p = 0.05, adjust = "BH")))
tags.glm <- rownames(dge.glm.tag.disp)[as.logical(de.glm)]
plotSmear(lrt, de.tags = tags.glm)
abline(h = c(-2, 2), col = "blue")
de.glm
}
edgeR.hits <- do.edgeR(subset(dat, apply(dat, 1, function(x) !all(x == 0))))
edgeR.hits <- do.edgeR(subset(dat, apply(dat, 1, function(x) !any(x == 0))))
# DESeq
library(DESeq)
deSeqDat <- newCountDataSet(dat, group)
deSeqDat <- estimateSizeFactors(deSeqDat)
deSeqDat <- estimateDispersions(deSeqDat)
plotDispEsts(deSeqDat)
deseq.results <- nbinomTest(deSeqDat, levels(group)[1], levels(group)[2])
plotMA(deseq.results)
# Voom & limma
library(limma)
dat.voomed <- voom(dat, design)
fit <- eBayes(lmFit(dat.voomed, design))
# Take Home Problem
# Choose a specific threshold for the adjusted p value, find the genes
# identified as differentially expressed using each of edgeR, DESeq and
# voom+limma. Compare the number of genes in these 3 lists, and draw a venn
# digram demonstrating the overlap (if any!).
edgeR.hits <- do.edgeR(dat)
table(deseq.results$padj < 0.05)
voom.hits <- topTable(fit, coef='group2', num=Inf, sort.by='none')
table(voom.hits$adj.P.Val < 0.05)
results <- data.frame(
edgeR = edgeR.hits != 0,
DESeq = deseq.results$padj < 0.05,
voom.limma = voom.hits$adj.P.Val < 0.05)
table(results)
vennDiagram(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment