Skip to content

Instantly share code, notes, and snippets.

@mschubert
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mschubert/ae2a385aeddd0e89d759 to your computer and use it in GitHub Desktop.
Save mschubert/ae2a385aeddd0e89d759 to your computer and use it in GitHub Desktop.
#
# loading the table, converting it to an expression matrix
#
df = read.table(
"GBM.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.data.txt",
header=TRUE, sep="\t", stringsAsFactors=FALSE)
expr = data.matrix(df[2:nrow(df), df[1,]=="raw_count"])
rownames(expr) = df[2:nrow(df), 1]
#
# filtering the row- and column names to what we want
#
colnames(expr) = substr(colnames(expr), 15, 15)
rownames(expr) = sub("\\|.*$", "", rownames(expr))
expr = expr[rownames(expr) != "?",]
#
# transform the raw read counts using voom
# -- with this, we can use standard linear modeling tools
#
# if you start with a .bam file instead of the TCGA text file,
# there are more specific tools to get differential expression
# -- see DESeq2, edgeR packages
#
library(limma)
vexpr = voom(expr)$E
axpr = avereps(vexpr) # average if multiple measurements on same gene
#
# use lmFit to compute differential expression
#
design = model.matrix(~ 0 + as.factor(colnames(axpr)))
colnames(design) = c("primary", "recurrent")
fit.1 = lmFit(axpr, design)
contrast = makeContrasts("primary-recurrent", levels=design)
fit.2 = contrasts.fit(fit.1, contrast)
fit.3 = eBayes(fit.2)
#
# make a summary table ordered by p-value
#
result = data.frame(
gene = rownames(fit.3),
p.value = fit.3$p.value[,1],
fold_change = fit.3$coefficients[,1],
stringsAsFactors = FALSE
)
result$adj.p = p.adjust(result$p.value, method="fdr")
result = result[order(result$p.value),]
cat(result$gene[result$adj.p < 0.01], sep="\n") # print all genes w/ FDR<1%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment