Created
August 24, 2018 16:37
-
-
Save j-andrews7/f387e83e6e2f84bbdb71589f95521607 to your computer and use it in GitHub Desktop.
Making heatmaps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
make.heatmaps <- function(dds, res, rld, level, g1, g2, plot_annos=NULL) { | |
# plot_annos should be a vector of at least two columns from the sample sheet to use for labeling plots. | |
# g1 and g2 should be strings for the groups to be compared for the level (eg. "Progression", "Response") | |
# dds must be a DESeqExperiment object. | |
# rld is the log-transformed dds object. | |
## Should really make another generic function for the actual heatmap plots and feed it in the pval/lfc values. | |
# Set color breaks and palatte. | |
breaks <- c(seq(-3,-1.251,length=250),seq(-1.25,-0.1001,length=250),seq(-0.1,0.1,length=1),seq(0.1001,1.25,length=250),seq(1.251,3,length=250)) | |
colors <- colorRampPalette(c("#67001f","#b2182b", "#f5f5f5", "#2166ac","#053061"))(n = 1000) | |
# Get the DEGs. | |
resSig <- subset(res, padj <= 0.1) | |
resSig <- resSig[order(resSig$padj),] | |
res100 <- row.names(head(resSig, 100)) | |
x <- assay(rld) | |
matrix <- x[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1 - RLD", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
matrix <- x[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resSig <- subset(res, padj < 0.1) | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the same thing but only with the wanted samples. | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
if (!(ncol(x) == ncol(x.sub))) { | |
matrix <- x.sub[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1 - RLD", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resSig <- subset(res, padj < 0.1) | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
} | |
### Now let's throw in a LFC magnitude threshold. | |
resSig <- subset(res, padj <= 0.1) | |
resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2) | |
resSig <- resSig[order(resSig$padj),] | |
res100 <- row.names(head(resSig, 100)) | |
x <- assay(rld) | |
matrix <- x[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
matrix <- x[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the same thing but only with the wanted samples. | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
if (!(ncol(x) == ncol(x.sub))) { | |
matrix <- x.sub[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resSig <- subset(res, padj <= 0.1) | |
resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2) | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
} | |
### PADJ <= 0.05 ### | |
# Get the variance-stabilized counts. | |
resSig <- subset(res, padj <= 0.05) | |
resSig <- resSig[order(resSig$padj),] | |
res100 <- row.names(head(resSig, 100)) | |
x <- assay(rld) | |
matrix <- x[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05 - RLD", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
matrix <- x[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resSig <- subset(res, padj < 0.05) | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the same thing but only with the wanted samples. | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
if (!(ncol(x) == ncol(x.sub))) { | |
matrix <- x.sub[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05 - RLD", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resSig <- subset(res, padj < 0.05) | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
} | |
### Now let's throw in a LFC magnitude threshold. | |
resSig <- subset(res, padj <= 0.05) | |
resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2) | |
resSig <- resSig[order(resSig$padj),] | |
res100 <- row.names(head(resSig, 100)) | |
x <- assay(rld) | |
matrix <- x[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
matrix <- x[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
matrix <- x[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the same thing but only with the wanted samples. | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
if (!(ncol(x) == ncol(x.sub))) { | |
matrix <- x.sub[res100,] | |
# Set which columns we want to use for annotating samples. | |
annotation_data <- as.data.frame(colData(rld)[plot_annos]) | |
# Plot the top 100. | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now just get the normalized counts and change the color breaks. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[res100,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), fontsize_row=4, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T, | |
main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5) | |
# Now heatmaps for all DEGs with the variance-stabilized counts. | |
resSig <- subset(res, padj <= 0.05) | |
resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2) | |
resDEG <- row.names(resSig) | |
x <- assay(rld) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
# Now the normalized counts for all DEGs. | |
x <- assay(dds) | |
x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)] | |
matrix <- x.sub[resDEG,] | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5) | |
pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F, | |
main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5) | |
} | |
} | |
run.DESeq2 <- function(outpath, quantspath, samplesheet, tx2gene, level, g1, g2, plot_annos=NULL, block=NULL) { | |
# plot_annos should be a vector of at least two columns from the sample sheet to use for labeling plots. | |
# g1 and g2 should be strings for the groups to be compared for the level (eg. "Progression", "Response") | |
# block can either be a single string ("cells") or a vector of multiple (c("cells", "disease")) | |
message("### EXPLORATORY DATA ANALYSIS ###\n") | |
message("# SET DIRECTORY STRUCTURE AND MODEL DESIGN #\n") | |
### SET DIRECTORY STRUCTURE AND MODEL DESIGN ### | |
# Base folder to create output folders in. Create output folders. | |
base <- outpath | |
# Determine if there is a blocking factor or not and set design. | |
if (!is.null(block)) { | |
design <- formula(paste("~", paste(c(block, level), sep='', collapse=" + "))) | |
if (!dir.exists(file.path(base,paste(c(block,"Block"), sep='', collapse='')))) { | |
dir.create(file.path(base,paste(c(block,"Block"), sep='', collapse=''))) | |
} | |
if (!dir.exists(file.path(base,paste(c(block,"Block/GenericFigures"), sep='', collapse='')))) { | |
dir.create(file.path(base,paste(c(block,"Block/GenericFigures"), sep='', collapse=''))) | |
} | |
if (!dir.exists(file.path(base,paste(c(block,"Block/GeneBoxPlots"), sep='', collapse='')))) { | |
dir.create(file.path(base,paste(c(block,"Block/GeneBoxPlots"), sep='', collapse=''))) | |
} | |
base = file.path(base, paste(c(block,"Block"),sep='', collapse='')) | |
} else { | |
design <- formula(paste("~", level)) | |
if (!dir.exists(file.path(base,"NoBlock"))) { | |
dir.create(file.path(base,"NoBlock")) | |
} | |
if (!dir.exists(file.path(base,"NoBlock/GenericFigures"))) { | |
dir.create(file.path(base,"NoBlock/GenericFigures")) | |
} | |
if (!dir.exists(file.path(base,"NoBlock/GeneBoxPlots"))) { | |
dir.create(file.path(base,"NoBlock/GeneBoxPlots")) | |
} | |
base = file.path(base, "NoBlock") | |
} | |
if (is.null(plot_annos)) { | |
plot_annos <- level | |
} | |
message("# FILE LOADING & PRE-FILTERING #\n") | |
### FILE LOADING & PRE-FILTERING ### | |
tx2gene = read.table(tx2gene, sep="\t") | |
samples <- read.table(samplesheet, header=TRUE) | |
rownames(samples) <- samples$name | |
files <- file.path(quantspath, samples$name, "quant.sf") | |
names(files) <- samples$name | |
# Read in our actual count files now. | |
txi <- tximport(files, type="salmon", tx2gene=tx2gene) | |
message(paste("\n\nDesign is:",design,".\n\n")) | |
# Create the DESeqDataSet object. | |
dds <- DESeqDataSetFromTximport(txi, | |
colData = samples, | |
design = design) | |
# Pre-filter transcripts with really low read counts (<100 across all samples) | |
message(paste("\ndds starting with",nrow(dds),"genes.")) | |
dds <- dds[rowSums(counts(dds)) >= 100,] | |
message(paste("\ndds has",nrow(dds),"genes after filtering rows with under 100 reads total.\n")) | |
message("\n# VARIANCE STABILIZATION COMPARISONS #\n") | |
message(paste(base,"/GenericFigures/transformation.pdf\n", sep="")) | |
### VARIANCE STABILIZATION COMPARISONS ### | |
rld <- rlog(dds, blind = FALSE) | |
vsd <- vst(dds, blind = FALSE) | |
pdf(paste(base,"/GenericFigures/transformation.pdf", sep="")) | |
dds <- DESeq2::estimateSizeFactors(dds) | |
df <- bind_rows( | |
as_data_frame(log2(counts(dds, normalized=TRUE)[, 1:2]+1)) %>% | |
mutate(transformation = "log2(x + 1)"), | |
as_data_frame(assay(rld)[, 1:2]) %>% mutate(transformation = "rlog"), | |
as_data_frame(assay(vsd)[, 1:2]) %>% mutate(transformation = "vst")) | |
df | |
colnames(df)[1:2] <- c("x", "y") | |
p <- ggplot(df, aes(x = x, y = y)) + geom_hex(bins = 80) + | |
coord_fixed() + facet_grid( . ~ transformation) | |
print(p) | |
dev.off() | |
### SAMPLE DISTANCES ### | |
message(paste("\n# PLOTTING SAMPLE DISTANCES #\n", paste(base,"/GenericFigures/samp_dist.pdf\n", sep=""))) | |
sampleDists <- dist(t(assay(rld))) | |
pdf(paste(base,"/GenericFigures/samp_dist.pdf", sep="")) | |
## ----distheatmap, fig.width = 6.1, fig.height = 4.5---------------------- | |
sampleDistMatrix <- as.matrix( sampleDists ) | |
rownames(sampleDistMatrix) <- paste( colData(rld)[,level], rld$name, sep = " - " ) | |
colnames(sampleDistMatrix) <- NULL | |
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) | |
pheatmap(sampleDistMatrix, | |
clustering_distance_rows = sampleDists, | |
clustering_distance_cols = sampleDists, | |
col = colors) | |
# Now with only the groups we want to compare. | |
rld.sub = rld[,colData(rld)[,level] %in% c(g1, g2)] | |
sampleDists <- dist(t(assay(rld.sub))) | |
sampleDistMatrix <- as.matrix( sampleDists ) | |
rownames(sampleDistMatrix) <- paste( colData(rld.sub)[,level], rld.sub$name, sep = " - " ) | |
pheatmap(sampleDistMatrix, | |
clustering_distance_rows = sampleDists, | |
clustering_distance_cols = sampleDists, | |
col = colors) | |
dev.off() | |
### PCA PLOTS ### | |
message(paste("# PCA PLOTS #\n",paste(base, "/GenericFigures/pca.pdf\n", sep=''))) | |
pcaData <- DESeq2::plotPCA(rld, intgroup = plot_annos, returnData = TRUE) | |
pcaData | |
percentVar <- round(100 * attr(pcaData, "percentVar")) | |
pdf(paste(base, "/GenericFigures/pca.pdf", sep='')) | |
p <- DESeq2::plotPCA(rld, intgroup = plot_annos) | |
print(p) | |
## ----ggplotpca, fig.width=6, fig.height=4.5------------------------------ | |
p <- ggplot(pcaData, aes(x = PC1, y = PC2, color = colData(rld)[,plot_annos[1]], shape = colData(rld)[,plot_annos[2]])) + | |
geom_point(size =3) + | |
xlab(paste0("PC1: ", percentVar[1], "% variance")) + | |
ylab(paste0("PC2: ", percentVar[2], "% variance")) + | |
coord_fixed() | |
print(p) | |
# Now with only the groups we want to compare. | |
pcaData <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos, returnData = TRUE) | |
percentVar <- round(100 * attr(pcaData, "percentVar")) | |
p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) | |
print(p) | |
## ----ggplotpca, fig.width=6, fig.height=4.5------------------------------ | |
p <- ggplot(pcaData, aes(x = PC1, y = PC2, color = colData(rld.sub)[,plot_annos[1]], shape = colData(rld.sub)[,plot_annos[2]])) + | |
geom_point(size =3) + | |
xlab(paste0("PC1: ", percentVar[1], "% variance")) + | |
ylab(paste0("PC2: ", percentVar[2], "% variance")) + | |
coord_fixed() | |
print(p) | |
dev.off() | |
#======================================# | |
### DIFFERENTIAL EXPRESSION ANALYSIS ### | |
message("\n### DIFFERENTIAL EXPRESSION ANALYSIS ###\n") | |
dds <- DESeq(dds) | |
res <- lfcShrink(dds, contrast=c(level, g1, g2), type='ashr') | |
### PLOTTING THE RESULTS ### | |
# This gets all the normalized counts for all genes/samples. | |
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) | |
names(resdata)[1] <- "Gene" | |
# Make table for just DEGs. | |
resSig <- subset(resdata, padj <= 0.1) | |
### DEG BOXPLOTS ### | |
message("\n# DEG BOXPLOTS #\n") | |
message("Creating boxplots for all genes with padj <= 0.1") | |
# If you have more than 7 levels for your contrast, you may need to add colors here. | |
fill = c("#A6CEE3","#B2DF8A","#FDBF6F","#CAB2D6","#f4cae4","#f1e2cc","#b3e2cd") | |
line = c("#1F78B4","#33A02C","#FF7F00","#6A3D9A","#e7298a","#a6761d","#1b9e77") | |
for (i in 1:nrow(resSig)){ | |
if (!file.exists(paste(base,"/GeneBoxPlots/",gsub('/','-',resSig$Gene[i]),".BoxPlot.pdf",sep=""))){ | |
pdf(paste(base,"/GeneBoxPlots/",gsub('/','-',resSig$Gene[i]),".BoxPlot.pdf",sep="")) | |
d = plotCounts(dds, gene=resSig$Gene[i], intgroup=level) | |
d = plotCounts(dds, gene=resSig$Gene[i], intgroup=level, returnData = T) | |
p = ggplot(d, aes(x=d[,level], y=count)) + geom_boxplot(fill=fill[1:length(levels(colData(rld)[,level]))],colour=line[1:length(levels(colData(rld)[,level]))]) + ggtitle(resSig$Gene[i]) + coord_trans(y="log10") | |
print(p) | |
d = plotCounts(dds, gene=resSig$Gene[i], intgroup=level, returnData = T) | |
e = subset(d, (get(level) == g1 | get(level) == g2)) | |
p = ggplot(e, aes(x=e[,level], y=count)) + geom_boxplot(fill=fill[1:2],colour=line[1:2]) + ggtitle(resSig$Gene[i]) + coord_trans(y="log10") | |
print(p) | |
dev.off() | |
} | |
} | |
### DEG PCA PLOTS ### | |
# TODO - Make this a generic function. | |
message(paste("\n# DEG PCA PLOTS #\n", base,"/DEG_pca.pdf\n",sep='')) | |
pdf(paste(base,"/DEG_pca.pdf", sep='')) | |
resdata <- merge(as.data.frame(res), as.data.frame(assay(rld)), by="row.names", sort=FALSE) | |
names(resdata)[1] <- "Gene" | |
resSig <- subset(resdata, padj <= 0.1) | |
# Show only DEG genes and only the wanted groups. | |
rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)] | |
p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.1") | |
print(p) | |
p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.1") | |
print(p) | |
# Now with lfc threshold on genes as well. | |
resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2) | |
# Show only DEG genes and only the wanted groups. | |
rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)] | |
p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.1 & LFC >|< 2") | |
print(p) | |
p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.1 & LFC >|< 2") | |
print(p) | |
resSig <- subset(resdata, padj <= 0.05) | |
# Show only DEG genes and only the wanted groups. | |
rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)] | |
p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.05") | |
print(p) | |
p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.05") | |
print(p) | |
# Now with lfc threshold on genes as well. | |
resSig <- subset(resdata, padj <= 0.05) | |
resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2) | |
# Show only DEG genes and only the wanted groups. | |
rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)] | |
p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.05 & LFC >|< 2") | |
print(p) | |
p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.05 & LFC >|< 2") | |
print(p) | |
dev.off() | |
### MA PLOT ### | |
message(paste("\n# MA PLOT #\n", base, "/MA_plot.pdf\n", sep='')) | |
pdf(paste(base,"/MA_plot.pdf", sep='')) | |
plotMA(res, ylim = c(-5, 5)) | |
dev.off() | |
### GENE CLUSTERING - VARIABLE ### | |
# First with all samples included. | |
message(paste("\n# VARIABLE GENE CLUSTERING #\n", base, "/Top20_VariableGenes_HeatmapDistances.pdf\n", sep='')) | |
topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) | |
mat <- assay(rld)[ topVarGenes, ] | |
mat <- mat - rowMeans(mat) | |
anno <- as.data.frame(colData(rld)[, plot_annos]) | |
pdf(paste(base,"/Top20_VariableGenes_HeatmapDistances.pdf", sep='')) | |
pheatmap(mat, annotation_col = anno, main="Top 20 Variable Genes") | |
#Then with only the wanted groups. | |
rld.sub = rld[, colData(rld)[,level] %in% c(g1, g2)] | |
topVarGenes <- head(order(rowVars(assay(rld.sub)), decreasing = TRUE), 20) | |
mat <- assay(rld.sub)[ topVarGenes, ] | |
mat <- mat - rowMeans(mat) | |
anno <- as.data.frame(colData(rld.sub)[, plot_annos]) | |
pheatmap(mat, annotation_col = anno, main="Top 20 Variable Genes") | |
dev.off() | |
### VOLCANO PLOT ### | |
message(paste("# VOLCANO PLOT #\n", getwd(), "/", substr(base, 2, 500), "/",g1,"-v-",g2,".DEGs.Volcano.html\n", sep='')) | |
# Set up results table. | |
res <- res[order(res$padj),] | |
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) | |
names(resdata)[1] <- "Gene" | |
diff_df = resdata[c("Gene","log2FoldChange","padj")] | |
# add a grouping column; default value is "not significant" | |
diff_df["group"] <- "NotSignificant" | |
# for our plot, we want to highlight | |
# padj < 0.1 (significance level) | |
# log2FoldChange > 2 | |
# change the grouping for the entries with significance but not a large enough Fold change | |
diff_df[which(diff_df['padj'] <= 0.1 & abs(diff_df['log2FoldChange']) <= 2 ),"group"] <- "Significant" | |
# change the grouping for the entries with a large enough Fold change but not a low enough p value | |
diff_df[which(diff_df['padj'] >= 0.1 & abs(diff_df['log2FoldChange']) >= 2 ),"group"] <- "FoldChange" | |
# change the grouping for the entries with both significance and large enough fold change | |
diff_df[which(diff_df['padj'] <= 0.1 & abs(diff_df['log2FoldChange']) >= 2 ),"group"] <- "Significant&FoldChange" | |
# Find and label the top peaks. | |
top_peaks <- diff_df[with(diff_df, order(log2FoldChange, padj)),][1:5,] | |
top_peaks <- rbind(top_peaks, diff_df[with(diff_df, order(-log2FoldChange, padj)),][1:5,]) | |
# Add gene labels for all of the top genes we found | |
a <- list( | |
x = top_peaks$log2FoldChange, | |
y = -log10(top_peaks$padj), | |
text = top_peaks$Gene, | |
xref = "x", | |
yref = "y", | |
showarrow = TRUE, | |
arrowhead = 0.5, | |
ax = 20, | |
ay = -40 | |
) | |
p <- plot_ly(diff_df, x = ~log2FoldChange, y = -log10(diff_df$padj), text = ~Gene, color = ~group,mode = "markers", type="scatter") %>% | |
layout(title = paste(g1,"v",g2, sep='-'), xaxis=list(title="log2 Fold Change"), yaxis=list(title="-log10(adjusted p-value)")) | |
# Save plot - has to use absolute path because the htmltools package is apparently hot garbage. | |
htmlwidgets::saveWidget(as_widget(p), paste(getwd(), "/", substr(base, 2, 500), "/",g1,"-v-",g2,".DEGs.Volcano.html", sep='')) | |
### HEATMAPS ### | |
message(paste("\n# DEG HEATMAPS #\n", sep='')) | |
pdf(paste(base, "/DEG_Heatmaps.pdf", sep='')) | |
make.heatmaps(dds, res, rld, level, g1, g2, plot_annos) | |
dev.off() | |
### SAVING TABLES ### | |
message("\n# SAVING RESULTS TABLES #\n") | |
write.csv(resdata, file=paste(base, "/",g1,"-v-",g2,".ALL.csv",sep=''), row.names=F) | |
# Make table for just DEGs too. | |
resSig <- subset(resdata, padj <= 0.1) | |
write.csv(resSig, file=paste(base, "/",g1,"-v-",g2,".DEGs.padj0.1.csv", sep=''), row.names=F) | |
resSig <- subset(resdata, padj <= 0.05) | |
write.csv(resSig, file=paste(base, "/",g1,"-v-",g2,".DEGs.padj0.05.csv", sep=''), row.names=F) | |
return(list(res=res, dds=dds, rld=rld)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment