j-andrews7/For_Nina_Heatmaps.R

## For_Nina_Heatmaps.R
make.heatmaps <- function(dds, res, rld, level, g1, g2, plot_annos=NULL) {
    # plot_annos should be a vector of at least two columns from the sample sheet to use for labeling plots.
    # g1 and g2 should be strings for the groups to be compared for the level (eg. "Progression", "Response")
    # dds must be a DESeqExperiment object.
    # rld is the log-transformed dds object.

    ## Should really make another generic function for the actual heatmap plots and feed it in the pval/lfc values.

    # Set color breaks and palatte.
    breaks <- c(seq(-3,-1.251,length=250),seq(-1.25,-0.1001,length=250),seq(-0.1,0.1,length=1),seq(0.1001,1.25,length=250),seq(1.251,3,length=250))
    colors <- colorRampPalette(c("#67001f","#b2182b", "#f5f5f5", "#2166ac","#053061"))(n = 1000)

    # Get the DEGs.
    resSig <- subset(res, padj <= 0.1)
    resSig <- resSig[order(resSig$padj),]
    res100 <- row.names(head(resSig, 100))
    x <- assay(rld)
    matrix <- x[res100,]

    # Set which columns we want to use for annotating samples.
    annotation_data <- as.data.frame(colData(rld)[plot_annos])

    # Plot the top 100.
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1 - RLD", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now just get the normalized counts and change the color breaks.
    x <- assay(dds)
    matrix <- x[res100,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now heatmaps for all DEGs with the variance-stabilized counts.
    resSig <- subset(res, padj < 0.1)
    resDEG <- row.names(resSig)
    x <- assay(rld)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the normalized counts for all DEGs.
    x <- assay(dds)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the same thing but only with the wanted samples.
    x <- assay(rld)
    x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
    if (!(ncol(x) == ncol(x.sub))) {
        matrix <- x.sub[res100,]

        # Set which columns we want to use for annotating samples.
        annotation_data <- as.data.frame(colData(rld)[plot_annos])

        # Plot the top 100.
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1 - RLD", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now just get the normalized counts and change the color breaks.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[res100,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now heatmaps for all DEGs with the variance-stabilized counts.
        resSig <- subset(res, padj < 0.1)
        resDEG <- row.names(resSig)
        x <- assay(rld)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

        # Now the normalized counts for all DEGs.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)
    }

    ### Now let's throw in a LFC magnitude threshold.
    resSig <- subset(res, padj <= 0.1)
    resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2)
    resSig <- resSig[order(resSig$padj),]
    res100 <- row.names(head(resSig, 100))
    x <- assay(rld)
    matrix <- x[res100,]

    # Set which columns we want to use for annotating samples.
    annotation_data <- as.data.frame(colData(rld)[plot_annos])

    # Plot the top 100.
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now just get the normalized counts and change the color breaks.
    x <- assay(dds)
    matrix <- x[res100,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now heatmaps for all DEGs with the variance-stabilized counts.
    resDEG <- row.names(resSig)
    x <- assay(rld)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the normalized counts for all DEGs.
    x <- assay(dds)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the same thing but only with the wanted samples.
    x <- assay(rld)
    x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
    if (!(ncol(x) == ncol(x.sub))) {
        matrix <- x.sub[res100,]

        # Set which columns we want to use for annotating samples.
        annotation_data <- as.data.frame(colData(rld)[plot_annos])

        # Plot the top 100.
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now just get the normalized counts and change the color breaks.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[res100,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now heatmaps for all DEGs with the variance-stabilized counts.
        resSig <- subset(res, padj <= 0.1)
        resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2)
        resDEG <- row.names(resSig)
        x <- assay(rld)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

        # Now the normalized counts for all DEGs.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.1 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)
    }

    ### PADJ <= 0.05 ###
    # Get the variance-stabilized counts.
    resSig <- subset(res, padj <= 0.05)
    resSig <- resSig[order(resSig$padj),]
    res100 <- row.names(head(resSig, 100))
    x <- assay(rld)
    matrix <- x[res100,]

    # Set which columns we want to use for annotating samples.
    annotation_data <- as.data.frame(colData(rld)[plot_annos])

    # Plot the top 100.
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05 - RLD", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now just get the normalized counts and change the color breaks.
    x <- assay(dds)
    matrix <- x[res100,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now heatmaps for all DEGs with the variance-stabilized counts.
    resSig <- subset(res, padj < 0.05)
    resDEG <- row.names(resSig)
    x <- assay(rld)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the normalized counts for all DEGs.
    x <- assay(dds)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the same thing but only with the wanted samples.
    x <- assay(rld)
    x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
    if (!(ncol(x) == ncol(x.sub))) {
        matrix <- x.sub[res100,]

        # Set which columns we want to use for annotating samples.
        annotation_data <- as.data.frame(colData(rld)[plot_annos])

        # Plot the top 100.
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05 - RLD", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now just get the normalized counts and change the color breaks.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[res100,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now heatmaps for all DEGs with the variance-stabilized counts.
        resSig <- subset(res, padj < 0.05)
        resDEG <- row.names(resSig)
        x <- assay(rld)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

        # Now the normalized counts for all DEGs.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)
    }

    ### Now let's throw in a LFC magnitude threshold.
    resSig <- subset(res, padj <= 0.05)
    resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2)
    resSig <- resSig[order(resSig$padj),]
    res100 <- row.names(head(resSig, 100))
    x <- assay(rld)
    matrix <- x[res100,]

    # Set which columns we want to use for annotating samples.
    annotation_data <- as.data.frame(colData(rld)[plot_annos])

    # Plot the top 100.
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now just get the normalized counts and change the color breaks.
    x <- assay(dds)
    matrix <- x[res100,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", fontsize_row=4, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
             main="Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", cluster_cols=F, fontsize_row=4, fontsize_col=5)

    # Now heatmaps for all DEGs with the variance-stabilized counts.
    resDEG <- row.names(resSig)
    x <- assay(rld)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the normalized counts for all DEGs.
    x <- assay(dds)
    matrix <- x[resDEG,]

    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5)
    pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
             main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

    # Now the same thing but only with the wanted samples.
    x <- assay(rld)
    x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
    if (!(ncol(x) == ncol(x.sub))) {
        matrix <- x.sub[res100,]

        # Set which columns we want to use for annotating samples.
        annotation_data <- as.data.frame(colData(rld)[plot_annos])

        # Plot the top 100.
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now just get the normalized counts and change the color breaks.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[res100,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), fontsize_row=4, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = T,
                 main=paste("Top 100 DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=4, fontsize_col=5)

        # Now heatmaps for all DEGs with the variance-stabilized counts.
        resSig <- subset(res, padj <= 0.05)
        resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2)
        resDEG <- row.names(resSig)
        x <- assay(rld)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2 - RLD", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)

        # Now the normalized counts for all DEGs.
        x <- assay(dds)
        x.sub <- x[, colData(rld)[,level] %in% c(g1, g2)]
        matrix <- x.sub[resDEG,]
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), fontsize_row=3, fontsize_col=5)
        pheatmap(matrix, annotation_col=annotation_data, col=colors, scale="row", breaks=breaks, show_rownames = F,
                 main=paste("ALL DE Genes - padj <= 0.05 & LFC >|< 2", sep=''), cluster_cols=F, fontsize_row=3, fontsize_col=5)
    }
}

run.DESeq2 <- function(outpath, quantspath, samplesheet, tx2gene, level, g1, g2, plot_annos=NULL, block=NULL) {
    # plot_annos should be a vector of at least two columns from the sample sheet to use for labeling plots.
    # g1 and g2 should be strings for the groups to be compared for the level (eg. "Progression", "Response")
    # block can either be a single string ("cells") or a vector of multiple (c("cells", "disease"))

    message("### EXPLORATORY DATA ANALYSIS ###\n")
    message("# SET DIRECTORY STRUCTURE AND MODEL DESIGN #\n")
    ### SET DIRECTORY STRUCTURE AND MODEL DESIGN ###
    # Base folder to create output folders in. Create output folders.
    base <- outpath

    # Determine if there is a blocking factor or not and set design.
    if (!is.null(block)) {
        design <- formula(paste("~", paste(c(block, level), sep='', collapse=" + ")))
        if (!dir.exists(file.path(base,paste(c(block,"Block"), sep='', collapse='')))) {
            dir.create(file.path(base,paste(c(block,"Block"), sep='', collapse='')))
        }
        if (!dir.exists(file.path(base,paste(c(block,"Block/GenericFigures"), sep='', collapse='')))) {
            dir.create(file.path(base,paste(c(block,"Block/GenericFigures"), sep='', collapse='')))
        }
        if (!dir.exists(file.path(base,paste(c(block,"Block/GeneBoxPlots"), sep='', collapse='')))) {
            dir.create(file.path(base,paste(c(block,"Block/GeneBoxPlots"), sep='', collapse='')))
        }
        base = file.path(base, paste(c(block,"Block"),sep='', collapse=''))
    } else {
        design <- formula(paste("~", level))
        if (!dir.exists(file.path(base,"NoBlock"))) {
            dir.create(file.path(base,"NoBlock"))
        }
        if (!dir.exists(file.path(base,"NoBlock/GenericFigures"))) {
            dir.create(file.path(base,"NoBlock/GenericFigures"))
        }
        if (!dir.exists(file.path(base,"NoBlock/GeneBoxPlots"))) {
            dir.create(file.path(base,"NoBlock/GeneBoxPlots"))
        }
        base = file.path(base, "NoBlock")
    }

    if (is.null(plot_annos)) {
        plot_annos <- level
    }

    message("# FILE LOADING & PRE-FILTERING #\n")
    ### FILE LOADING & PRE-FILTERING ###
    tx2gene = read.table(tx2gene, sep="\t")
    samples <- read.table(samplesheet, header=TRUE)
    rownames(samples) <- samples$name
    files <- file.path(quantspath, samples$name, "quant.sf")
    names(files) <- samples$name

    # Read in our actual count files now.
    txi <- tximport(files, type="salmon", tx2gene=tx2gene)

    message(paste("\n\nDesign is:",design,".\n\n"))
    # Create the DESeqDataSet object.
    dds <- DESeqDataSetFromTximport(txi,
                                       colData = samples,
                                       design = design)

    # Pre-filter transcripts with really low read counts (<100 across all samples)
    message(paste("\ndds starting with",nrow(dds),"genes."))
    dds <- dds[rowSums(counts(dds)) >= 100,]
    message(paste("\ndds has",nrow(dds),"genes after filtering rows with under 100 reads total.\n"))

    message("\n# VARIANCE STABILIZATION COMPARISONS #\n")
    message(paste(base,"/GenericFigures/transformation.pdf\n", sep=""))
    ### VARIANCE STABILIZATION COMPARISONS ###
    rld <- rlog(dds, blind = FALSE)
    vsd <- vst(dds, blind = FALSE)

    pdf(paste(base,"/GenericFigures/transformation.pdf", sep=""))

    dds <- DESeq2::estimateSizeFactors(dds)

    df <- bind_rows(
      as_data_frame(log2(counts(dds, normalized=TRUE)[, 1:2]+1)) %>%
             mutate(transformation = "log2(x + 1)"),
      as_data_frame(assay(rld)[, 1:2]) %>% mutate(transformation = "rlog"),
      as_data_frame(assay(vsd)[, 1:2]) %>% mutate(transformation = "vst"))

    df
    colnames(df)[1:2] <- c("x", "y")

    p <- ggplot(df, aes(x = x, y = y)) + geom_hex(bins = 80) +
      coord_fixed() + facet_grid( . ~ transformation)
    print(p)
    dev.off()

    ### SAMPLE DISTANCES ###
    message(paste("\n# PLOTTING SAMPLE DISTANCES #\n", paste(base,"/GenericFigures/samp_dist.pdf\n", sep="")))
    sampleDists <- dist(t(assay(rld)))

    pdf(paste(base,"/GenericFigures/samp_dist.pdf", sep=""))
    ## ----distheatmap, fig.width = 6.1, fig.height = 4.5----------------------
    sampleDistMatrix <- as.matrix( sampleDists )
    rownames(sampleDistMatrix) <- paste( colData(rld)[,level], rld$name, sep = " - " )
    colnames(sampleDistMatrix) <- NULL
    colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
    pheatmap(sampleDistMatrix,
             clustering_distance_rows = sampleDists,
             clustering_distance_cols = sampleDists,
             col = colors)

    # Now with only the groups we want to compare.
    rld.sub = rld[,colData(rld)[,level] %in% c(g1, g2)]
    sampleDists <- dist(t(assay(rld.sub)))
    sampleDistMatrix <- as.matrix( sampleDists )
    rownames(sampleDistMatrix) <- paste( colData(rld.sub)[,level], rld.sub$name, sep = " - " )
    pheatmap(sampleDistMatrix,
             clustering_distance_rows = sampleDists,
             clustering_distance_cols = sampleDists,
             col = colors)
    dev.off()

    ### PCA PLOTS ###
    message(paste("# PCA PLOTS #\n",paste(base, "/GenericFigures/pca.pdf\n", sep='')))
    pcaData <- DESeq2::plotPCA(rld, intgroup = plot_annos, returnData = TRUE)
    pcaData
    percentVar <- round(100 * attr(pcaData, "percentVar"))
    pdf(paste(base, "/GenericFigures/pca.pdf", sep=''))
    p <- DESeq2::plotPCA(rld, intgroup = plot_annos)
    print(p)
    ## ----ggplotpca, fig.width=6, fig.height=4.5------------------------------
    p <- ggplot(pcaData, aes(x = PC1, y = PC2, color = colData(rld)[,plot_annos[1]], shape = colData(rld)[,plot_annos[2]])) +
      geom_point(size =3) +
      xlab(paste0("PC1: ", percentVar[1], "% variance")) +
      ylab(paste0("PC2: ", percentVar[2], "% variance")) +
      coord_fixed()
    print(p)
    # Now with only the groups we want to compare.
    pcaData <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos, returnData = TRUE)
    percentVar <- round(100 * attr(pcaData, "percentVar"))
    p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos)
    print(p)
    ## ----ggplotpca, fig.width=6, fig.height=4.5------------------------------
    p <- ggplot(pcaData, aes(x = PC1, y = PC2, color = colData(rld.sub)[,plot_annos[1]], shape = colData(rld.sub)[,plot_annos[2]])) +
      geom_point(size =3) +
      xlab(paste0("PC1: ", percentVar[1], "% variance")) +
      ylab(paste0("PC2: ", percentVar[2], "% variance")) +
      coord_fixed()
    print(p)
    dev.off()

    #======================================#
    ### DIFFERENTIAL EXPRESSION ANALYSIS ###
    message("\n### DIFFERENTIAL EXPRESSION ANALYSIS ###\n")
    dds <- DESeq(dds)
    res <- lfcShrink(dds, contrast=c(level, g1, g2), type='ashr')

    ### PLOTTING THE RESULTS ###
    # This gets all the normalized counts for all genes/samples.
    resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
    names(resdata)[1] <- "Gene"

    # Make table for just DEGs.
    resSig <- subset(resdata, padj <= 0.1)

    ### DEG BOXPLOTS ###
    message("\n# DEG BOXPLOTS #\n")
    message("Creating boxplots for all genes with padj <= 0.1")
    # If you have more than 7 levels for your contrast, you may need to add colors here.
    fill = c("#A6CEE3","#B2DF8A","#FDBF6F","#CAB2D6","#f4cae4","#f1e2cc","#b3e2cd")
    line = c("#1F78B4","#33A02C","#FF7F00","#6A3D9A","#e7298a","#a6761d","#1b9e77")
    for (i in 1:nrow(resSig)){
        if (!file.exists(paste(base,"/GeneBoxPlots/",gsub('/','-',resSig$Gene[i]),".BoxPlot.pdf",sep=""))){
            pdf(paste(base,"/GeneBoxPlots/",gsub('/','-',resSig$Gene[i]),".BoxPlot.pdf",sep=""))
            d = plotCounts(dds, gene=resSig$Gene[i], intgroup=level)
            d = plotCounts(dds, gene=resSig$Gene[i], intgroup=level, returnData = T)
            p = ggplot(d, aes(x=d[,level], y=count)) + geom_boxplot(fill=fill[1:length(levels(colData(rld)[,level]))],colour=line[1:length(levels(colData(rld)[,level]))]) + ggtitle(resSig$Gene[i]) + coord_trans(y="log10")
            print(p)

            d = plotCounts(dds, gene=resSig$Gene[i], intgroup=level, returnData = T)
            e = subset(d, (get(level) == g1 | get(level) == g2))
            p = ggplot(e, aes(x=e[,level], y=count)) + geom_boxplot(fill=fill[1:2],colour=line[1:2]) + ggtitle(resSig$Gene[i]) + coord_trans(y="log10")
            print(p)
            dev.off()
        }
    }

    ### DEG PCA PLOTS ###
    # TODO - Make this a generic function.
    message(paste("\n# DEG PCA PLOTS #\n", base,"/DEG_pca.pdf\n",sep=''))
    pdf(paste(base,"/DEG_pca.pdf", sep=''))
    resdata <- merge(as.data.frame(res), as.data.frame(assay(rld)), by="row.names", sort=FALSE)
    names(resdata)[1] <- "Gene"
    resSig <- subset(resdata, padj <= 0.1)
    # Show only DEG genes and only the wanted groups.
    rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)]
    p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.1")
    print(p)
    p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.1")
    print(p)

    # Now with lfc threshold on genes as well.
    resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2)
    # Show only DEG genes and only the wanted groups.
    rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)]
    p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.1 & LFC >|< 2")
    print(p)
    p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.1 & LFC >|< 2")
    print(p)

    resSig <- subset(resdata, padj <= 0.05)
    # Show only DEG genes and only the wanted groups.
    rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)]
    p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.05")
    print(p)
    p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.05")
    print(p)

    # Now with lfc threshold on genes as well.
    resSig <- subset(resdata, padj <= 0.05)
    resSig <- subset(resSig, log2FoldChange >= 2 | log2FoldChange <= -2)
    # Show only DEG genes and only the wanted groups.
    rld.sub = rld[resSig$Gene, colData(rld)[,level] %in% c(g1, g2)]
    p <- DESeq2::plotPCA(rld.sub, intgroup = plot_annos) + ggtitle("DEGs - padj <= 0.05 & LFC >|< 2")
    print(p)
    p <- DESeq2::plotPCA(rld.sub, intgroup = level) + ggtitle("DEGs - padj <= 0.05 & LFC >|< 2")
    print(p)

    dev.off()

    ### MA PLOT ###
    message(paste("\n# MA PLOT #\n", base, "/MA_plot.pdf\n", sep=''))
    pdf(paste(base,"/MA_plot.pdf", sep=''))
    plotMA(res, ylim = c(-5, 5))
    dev.off()

    ### GENE CLUSTERING - VARIABLE ###
    # First with all samples included.
    message(paste("\n# VARIABLE GENE CLUSTERING #\n", base, "/Top20_VariableGenes_HeatmapDistances.pdf\n", sep=''))
    topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
    mat  <- assay(rld)[ topVarGenes, ]
    mat  <- mat - rowMeans(mat)
    anno <- as.data.frame(colData(rld)[, plot_annos])
    pdf(paste(base,"/Top20_VariableGenes_HeatmapDistances.pdf", sep=''))
    pheatmap(mat, annotation_col = anno, main="Top 20 Variable Genes")

    #Then with only the wanted groups.
    rld.sub = rld[, colData(rld)[,level] %in% c(g1, g2)]
    topVarGenes <- head(order(rowVars(assay(rld.sub)), decreasing = TRUE), 20)
    mat  <- assay(rld.sub)[ topVarGenes, ]
    mat  <- mat - rowMeans(mat)
    anno <- as.data.frame(colData(rld.sub)[, plot_annos])
    pheatmap(mat, annotation_col = anno, main="Top 20 Variable Genes")
    dev.off()

    ### VOLCANO PLOT ###
    message(paste("# VOLCANO PLOT #\n", getwd(), "/", substr(base, 2, 500), "/",g1,"-v-",g2,".DEGs.Volcano.html\n", sep=''))

    # Set up results table.
    res <- res[order(res$padj),]
    resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
    names(resdata)[1] <- "Gene"

    diff_df = resdata[c("Gene","log2FoldChange","padj")]

    # add a grouping column; default value is "not significant"
    diff_df["group"] <- "NotSignificant"

    # for our plot, we want to highlight
    # padj < 0.1 (significance level)
    # log2FoldChange > 2

    # change the grouping for the entries with significance but not a large enough Fold change
    diff_df[which(diff_df['padj'] <= 0.1 & abs(diff_df['log2FoldChange']) <= 2 ),"group"] <- "Significant"
    # change the grouping for the entries with a large enough Fold change but not a low enough p value
    diff_df[which(diff_df['padj'] >= 0.1 & abs(diff_df['log2FoldChange']) >= 2 ),"group"] <- "FoldChange"
    # change the grouping for the entries with both significance and large enough fold change
    diff_df[which(diff_df['padj'] <= 0.1 & abs(diff_df['log2FoldChange']) >= 2 ),"group"] <- "Significant&FoldChange"

    # Find and label the top peaks.
    top_peaks <- diff_df[with(diff_df, order(log2FoldChange, padj)),][1:5,]
    top_peaks <- rbind(top_peaks, diff_df[with(diff_df, order(-log2FoldChange, padj)),][1:5,])

    # Add gene labels for all of the top genes we found
    a <- list(
        x = top_peaks$log2FoldChange,
        y = -log10(top_peaks$padj),
        text = top_peaks$Gene,
        xref = "x",
        yref = "y",
        showarrow = TRUE,
        arrowhead = 0.5,
        ax = 20,
        ay = -40
    )

    p <- plot_ly(diff_df, x = ~log2FoldChange, y = -log10(diff_df$padj), text = ~Gene, color = ~group,mode = "markers", type="scatter") %>%
        layout(title = paste(g1,"v",g2, sep='-'), xaxis=list(title="log2 Fold Change"), yaxis=list(title="-log10(adjusted p-value)"))

    # Save plot - has to use absolute path because the htmltools package is apparently hot garbage.
    htmlwidgets::saveWidget(as_widget(p), paste(getwd(), "/", substr(base, 2, 500), "/",g1,"-v-",g2,".DEGs.Volcano.html", sep=''))

    ### HEATMAPS ###
    message(paste("\n# DEG HEATMAPS #\n", sep=''))

    pdf(paste(base, "/DEG_Heatmaps.pdf", sep=''))
    make.heatmaps(dds, res, rld, level, g1, g2, plot_annos)
    dev.off()

    ### SAVING TABLES ###
    message("\n# SAVING RESULTS TABLES #\n")

    write.csv(resdata, file=paste(base, "/",g1,"-v-",g2,".ALL.csv",sep=''), row.names=F)

    # Make table for just DEGs too.
    resSig <- subset(resdata, padj <= 0.1)
    write.csv(resSig, file=paste(base, "/",g1,"-v-",g2,".DEGs.padj0.1.csv", sep=''), row.names=F)

    resSig <- subset(resdata, padj <= 0.05)
    write.csv(resSig, file=paste(base, "/",g1,"-v-",g2,".DEGs.padj0.05.csv", sep=''), row.names=F)
    return(list(res=res, dds=dds, rld=rld))
}