DarioS/rnaPlot.R

## rnaPlot.R
.plotValues <- function(data, plot.columns, y.range, y.lab, plot.type, cols, ord, main,
                        g.symbol, first.plot)
{
    if(plot.type == "line")
    {
        matplot(data[, plot.columns, drop = FALSE], ylim = y.range, type = 'l', lty = 1, lwd = 2,
                ylab = y.lab, col = cols, main = main, xaxt = 'n')
        axis(1, 1:nrow(data), ord)
    } else {
        plot.values <- t(data[, plot.columns, drop = FALSE])
        if(ncol(plot.values) < 5)
            bar.x.lim <- c(1, 15)
        else
            bar.x.lim <- c(1, ncol(plot.values) * (nrow(plot.values) + 1))
        barplot(plot.values, xlim = bar.x.lim, ylim = y.range, names.arg = ord,
                ylab = y.lab, col = cols, beside = TRUE, main = main)
    }
    if(first.plot)
        title(g.symbol, outer = TRUE)
}

.shiftLabels <- function(labels.x, labels.y, x.min, x.max, n.models)
{
    lab.space <- 0.01 * (x.max - x.min)
    labels.x <- labels.x[, order(labels.x[2, , drop = FALSE]), drop = FALSE]

    max.consec <- ifelse(n.models <= 4, 6, 3)
    consec = 1
    for(index in 2:length(labels.y))
    {
        if(abs(labels.x[1, index] - labels.x[1, index - 1]) < lab.space && consec < max.consec)
        {
            labels.y[index] <- labels.y[index - 1] - 0.03
            consec = consec + 1
        } else {
            consec = 1
        }
    }

    list(labels.x, labels.y)
}

.findLimits <- function(scores, diffs)
{
    if(any(is.finite(scores)))
    {
        if(is.null(diffs)) y.min <- 0 else y.min <- min(scores[is.finite(scores)])
        y.max <- max(scores[is.finite(scores)])
        if(y.min > 0) y.min <- 0
        if(y.max < 0) y.max <- 0
    } else {
        y.min <- -50
        y.max <- 50
    }
    c(y.min, y.max)
}

setGeneric("rnaPlot", function(counts, ...)
           {standardGeneric("rnaPlot")})

setMethod("rnaPlot", c("GRanges"),
    function(counts, diffs = NULL, scale = c("none", "log", "sqrt"), genes = NULL, tx.db, cols = NULL,
             plot.columns = NULL, plot.type = c("line", "bar"), y.lab = '',
             plot.exons = TRUE, plot.intronic = TRUE, plot.junctions = TRUE,
             verbose = TRUE)
{
    if(is.null(cols))
        stop("'cols' not given.")
    if(is.null(plot.columns) && is.null(diffs))
        stop("Columns to use in plotting not given. Can only be NULL if 'diffs' is given.")
    plot.type <- match.arg(plot.type)
    scale <- match.arg(scale)
    counts.df <- as.data.frame(counts)

    if(!is.null(diffs))
    {
        diffs.scores <- lapply(diffs, function(diff)
                              {
                                  diff <- strsplit(diff, " {0, }- {0, }")[[1]]
                                  use.counts <- counts.df[, diff]
                                  if(scale == "log") use.counts <- log2(use.counts) else use.counts <- sqrt(use.counts)
                                  use.counts[, 2] - use.counts[, 1]
                              })
        names(diffs.scores) <- diffs
        counts.df <- cbind(counts.df, diffs.scores)
        plot.columns <- match(diffs, colnames(counts.df))
    } else {
        plot.columns <- plot.columns + 5 # Offset for coordinates columns.
    }

    if(!is.null(genes))
        counts.df <- counts.df[counts.df[, "gene"] %in% genes, ]
    counts.df$pos <- (counts.df$start + counts.df$end) / 2
    counts.df <- counts.df[order(counts.df[, "pos"]), ]

    genes.scores <- split(as.data.frame(counts.df), factor(counts.df$gene, levels = genes))

    if("gene.symbol" %in% colnames(counts.df))
        symbols <- counts.df[match(names(genes.scores), counts.df$gene), "gene.symbol"]
    else
        symbols <- names(genes.scores)
    # Get transcripts of each gene.
    gene.txs <- transcriptsBy(tx.db, "gene")
    gene.txs <- gene.txs[names(genes.scores)]
    # Get exons of each transcript.
    tx.exons <- exonsBy(tx.db, "tx")

    n.graphs <- sum(plot.exons || plot.junctions, plot.intronic)
    g.row <- match(names(genes.scores), counts.df[, "gene"])
    strands <- counts.df[g.row, "strand"]
    chrs <- counts.df[g.row, "seqnames"]
    par(oma = c(3, 1, 2, 1))

    feat.used <- character()
    if(plot.exons) feat.used <- c(feat.used, "exon")
    if(plot.junctions) feat.used <- c(feat.used, "junction")
    if(plot.intronic) feat.used <- c(feat.used, "intronic")

    if(verbose) message("Plotting genes.")
    for(g.index in 1:length(symbols))
    {
        curr.scores <- genes.scores[[g.index]]
        curr.txs <- gene.txs[[g.index]]
        models.frac <- 1/2 + min(length(curr.txs), 5) / 10

        par(mar = c(1, 4, 3, 1))
        layout(matrix(1:(2*(n.graphs + 1)), ncol = 2, byrow = TRUE),
               heights = c(rep(1/n.graphs, n.graphs), models.frac), widths = c(7, 1))

        g.strand <- strands[g.index]

        y.range <- .findLimits(as.matrix(curr.scores[curr.scores[, "type"] %in% feat.used, plot.columns]), diffs)

        if(plot.exons || plot.junctions)
        {
            if(plot.exons)
                keep <- "exon"
            if(plot.junctions)
                keep <- c(keep, "junction")

            tx.scores <- curr.scores[curr.scores[, "type"] %in% keep, ]

            if(g.strand == '+')
                ex.jct.ord <- 1:nrow(tx.scores)
            else
                ex.jct.ord <- nrow(tx.scores):1

            tx.IR <- IRanges(tx.scores[, "start"], tx.scores[, "end"])
            .plotValues(tx.scores, plot.columns, y.range, y.lab, plot.type, cols, ex.jct.ord,
                        "Exons and Junctions", symbols[g.index], TRUE)

            # Put legend on the side
            par(mar = c(1, 0, 3, 0))
            plot.new()
            legend("top", colnames(counts.df[, plot.columns, drop = FALSE]), fill = cols)
        }

        if(plot.intronic)
        {
            par(mar = c(1, 4, 3, 1))
            is.first <- ifelse(plot.exons || plot.junctions, FALSE, TRUE)
            in.scores <- curr.scores[curr.scores[, "type"] %in% "intronic", ]

            if(nrow(in.scores) > 0)
            {
                if(g.strand == '+') in.ord <- 1:nrow(in.scores) else in.ord <- nrow(in.scores):1
                .plotValues(in.scores, plot.columns, y.range, y.lab, plot.type, cols, in.ord,
                            "Intronic Gaps", symbols[g.index], is.first)
            } else {
                plot.new()
                plot.window(c(0, 1), c(0, 1))
                text(0.5, 0.5, "No intronic regions\nfor this gene.")
            }
            par(mar = c(1, 0, 3, 0))
            plot.new()
            if(is.first)
                legend("top", colnames(counts.df[, plot.columns, drop = FALSE]), fill = cols)
        }

        x.min <- min(start(curr.txs))
        x.max <- max(end(curr.txs))
        x.space <- 0.01 * (x.max - x.min)
        x.min <- x.min -  x.space
        x.max <- x.max + x.space
        x.ticks <- seq(x.min, x.max, floor((x.max - x.min) / 10))

        par(mar = c(1, 2, 0, 1))
        plot.new()
        plot.window(xlim = c(x.min, x.max), ylim = c(0, models.frac))
        axis(1, at = x.ticks)

        n.models <- length(curr.txs) * (plot.exons || plot.junctions) + plot.intronic
        model.ys <- models.frac - 0.05 - (models.frac - 0.05) / n.models * 0:(n.models - 1)
        tx.ys <- if(plot.intronic) model.ys[-length(model.ys)] else model.ys

        curr.txs.df <- as.data.frame(curr.txs)
        gene.disjoint.exons <- tx.scores[tx.scores[, "type"] == "exon", c("start", "end")]
        disjoint.exons.IR <- IRanges(gene.disjoint.exons[, 1], gene.disjoint.exons[, 2])
        mapply(function(x, y)
        {
            labels.x <- NULL
            if(plot.exons)
            {
                curr.exons <- ranges(tx.exons[[y]])
                curr.exons <- disjoint.exons.IR[queryHits(findOverlaps(disjoint.exons.IR, curr.exons, type = "within"))]
                if(g.strand == '-') curr.exons <- rev(curr.exons)
                ex.ord.id <- ex.jct.ord[intersect(subjectHits(findOverlaps(curr.exons, tx.IR, type = "equal")), which(tx.scores[, "type"] == "exon"))]

                # Draw exon rectangles.
                mapply(function(st, end)
                {
                    rect(st, x - 0.02, end, x + 0.02, col = "darkblue", lty = 0)
                }, start(curr.exons), end(curr.exons))

                labels.x <- rbind((start(curr.exons) + end(curr.exons)) / 2, ex.ord.id)
            }

            # Draw junction lines and arrowheads.
            if(plot.junctions)
            {
                curr.juncts <- gaps(curr.exons)
                if(length(curr.juncts) > 0)
                {
                    junct.ord.id <- ex.jct.ord[subjectHits(findOverlaps(curr.juncts, tx.IR, type = "equal"))]
                    mapply(function(st, end)
                    {
                        middle <- (st + end) / 2
                        lines(c(st, middle, middle, end), x + c(0.02, 0.04, 0.04, 0.02), col = "darkblue")

                    }, start(curr.juncts), end(curr.juncts))
                    labels.x <- cbind(labels.x, rbind((start(curr.juncts) + end(curr.juncts)) / 2, junct.ord.id))
                }
            }

            labels.y <- rep(x - 0.04, ncol(labels.x))
            if(ncol(labels.x) > 1)
            {
                xy.list <- .shiftLabels(labels.x, labels.y, x.min, x.max, n.models)
                labels.x <- xy.list[[1]]
                labels.y <- xy.list[[2]]
            }
            text(labels.x[1, ], labels.y, labels.x[2, ])
        }, tx.ys, curr.txs.df[, "tx_id"])

        if(plot.intronic)
        {
            y.in <- model.ys[length(model.ys)]
            if(nrow(in.scores) > 0)
            {
                mapply(function(st, end, col)
                {
                    lines(c(st, end), rep(y.in, 2), col = col)
                }, in.scores[, "start"], in.scores[, "end"], "darkblue")

                labels.x <- rbind(in.scores[, "pos"], in.ord)
                labels.y <- rep(y.in - 0.03, nrow(in.scores))
                if(ncol(labels.x) > 1)
                {
                    xy.list <- .shiftLabels(labels.x, labels.y, x.min, x.max, n.models)
                    labels.x <- xy.list[[1]]
                    labels.y <- xy.list[[2]]
                }
                text(labels.x[1, ], labels.y, labels.x[2, ])
            } else {
                text(mean(c(x.min, x.max)), y.in, "No intronic regions\nfor this gene.")
            }
        }

        mtext(chrs[g.index], 1, 3)
        par(mar = c(1, 1, 0, 0))
        plot.new()
        plot.window(xlim = c(0, 1), ylim = c(0, models.frac))
        text(0.5, model.ys, c(as.character(curr.txs.df[, "tx_name"]), if(plot.intronic) "Intronic Gaps"))
    }
})
	.plotValues <- function(data, plot.columns, y.range, y.lab, plot.type, cols, ord, main,
	g.symbol, first.plot)
	{
	if(plot.type == "line")
	{
	matplot(data[, plot.columns, drop = FALSE], ylim = y.range, type = 'l', lty = 1, lwd = 2,
	ylab = y.lab, col = cols, main = main, xaxt = 'n')
	axis(1, 1:nrow(data), ord)
	} else {
	plot.values <- t(data[, plot.columns, drop = FALSE])
	if(ncol(plot.values) < 5)
	bar.x.lim <- c(1, 15)
	else
	bar.x.lim <- c(1, ncol(plot.values) * (nrow(plot.values) + 1))
	barplot(plot.values, xlim = bar.x.lim, ylim = y.range, names.arg = ord,
	ylab = y.lab, col = cols, beside = TRUE, main = main)
	}
	if(first.plot)
	title(g.symbol, outer = TRUE)
	}

	.shiftLabels <- function(labels.x, labels.y, x.min, x.max, n.models)
	{
	lab.space <- 0.01 * (x.max - x.min)
	labels.x <- labels.x[, order(labels.x[2, , drop = FALSE]), drop = FALSE]

	max.consec <- ifelse(n.models <= 4, 6, 3)
	consec = 1
	for(index in 2:length(labels.y))
	{
	if(abs(labels.x[1, index] - labels.x[1, index - 1]) < lab.space && consec < max.consec)
	{
	labels.y[index] <- labels.y[index - 1] - 0.03
	consec = consec + 1
	} else {
	consec = 1
	}
	}

	list(labels.x, labels.y)
	}

	.findLimits <- function(scores, diffs)
	{
	if(any(is.finite(scores)))
	{
	if(is.null(diffs)) y.min <- 0 else y.min <- min(scores[is.finite(scores)])
	y.max <- max(scores[is.finite(scores)])
	if(y.min > 0) y.min <- 0
	if(y.max < 0) y.max <- 0
	} else {
	y.min <- -50
	y.max <- 50
	}
	c(y.min, y.max)
	}

	setGeneric("rnaPlot", function(counts, ...)
	{standardGeneric("rnaPlot")})

	setMethod("rnaPlot", c("GRanges"),
	function(counts, diffs = NULL, scale = c("none", "log", "sqrt"), genes = NULL, tx.db, cols = NULL,
	plot.columns = NULL, plot.type = c("line", "bar"), y.lab = '',
	plot.exons = TRUE, plot.intronic = TRUE, plot.junctions = TRUE,
	verbose = TRUE)
	{
	if(is.null(cols))
	stop("'cols' not given.")
	if(is.null(plot.columns) && is.null(diffs))
	stop("Columns to use in plotting not given. Can only be NULL if 'diffs' is given.")
	plot.type <- match.arg(plot.type)
	scale <- match.arg(scale)
	counts.df <- as.data.frame(counts)

	if(!is.null(diffs))
	{
	diffs.scores <- lapply(diffs, function(diff)
	{
	diff <- strsplit(diff, " {0, }- {0, }")[[1]]
	use.counts <- counts.df[, diff]
	if(scale == "log") use.counts <- log2(use.counts) else use.counts <- sqrt(use.counts)
	use.counts[, 2] - use.counts[, 1]
	})
	names(diffs.scores) <- diffs
	counts.df <- cbind(counts.df, diffs.scores)
	plot.columns <- match(diffs, colnames(counts.df))
	} else {
	plot.columns <- plot.columns + 5 # Offset for coordinates columns.
	}

	if(!is.null(genes))
	counts.df <- counts.df[counts.df[, "gene"] %in% genes, ]
	counts.df$pos <- (counts.df$start + counts.df$end) / 2
	counts.df <- counts.df[order(counts.df[, "pos"]), ]

	genes.scores <- split(as.data.frame(counts.df), factor(counts.df$gene, levels = genes))

	if("gene.symbol" %in% colnames(counts.df))
	symbols <- counts.df[match(names(genes.scores), counts.df$gene), "gene.symbol"]
	else
	symbols <- names(genes.scores)
	# Get transcripts of each gene.
	gene.txs <- transcriptsBy(tx.db, "gene")
	gene.txs <- gene.txs[names(genes.scores)]
	# Get exons of each transcript.
	tx.exons <- exonsBy(tx.db, "tx")

	n.graphs <- sum(plot.exons \|\| plot.junctions, plot.intronic)
	g.row <- match(names(genes.scores), counts.df[, "gene"])
	strands <- counts.df[g.row, "strand"]
	chrs <- counts.df[g.row, "seqnames"]
	par(oma = c(3, 1, 2, 1))

	feat.used <- character()
	if(plot.exons) feat.used <- c(feat.used, "exon")
	if(plot.junctions) feat.used <- c(feat.used, "junction")
	if(plot.intronic) feat.used <- c(feat.used, "intronic")

	if(verbose) message("Plotting genes.")
	for(g.index in 1:length(symbols))
	{
	curr.scores <- genes.scores[[g.index]]
	curr.txs <- gene.txs[[g.index]]
	models.frac <- 1/2 + min(length(curr.txs), 5) / 10

	par(mar = c(1, 4, 3, 1))
	layout(matrix(1:(2*(n.graphs + 1)), ncol = 2, byrow = TRUE),
	heights = c(rep(1/n.graphs, n.graphs), models.frac), widths = c(7, 1))

	g.strand <- strands[g.index]

	y.range <- .findLimits(as.matrix(curr.scores[curr.scores[, "type"] %in% feat.used, plot.columns]), diffs)

	if(plot.exons \|\| plot.junctions)
	{
	if(plot.exons)
	keep <- "exon"
	if(plot.junctions)
	keep <- c(keep, "junction")

	tx.scores <- curr.scores[curr.scores[, "type"] %in% keep, ]

	if(g.strand == '+')
	ex.jct.ord <- 1:nrow(tx.scores)
	else
	ex.jct.ord <- nrow(tx.scores):1

	tx.IR <- IRanges(tx.scores[, "start"], tx.scores[, "end"])
	.plotValues(tx.scores, plot.columns, y.range, y.lab, plot.type, cols, ex.jct.ord,
	"Exons and Junctions", symbols[g.index], TRUE)

	# Put legend on the side
	par(mar = c(1, 0, 3, 0))
	plot.new()
	legend("top", colnames(counts.df[, plot.columns, drop = FALSE]), fill = cols)
	}

	if(plot.intronic)
	{
	par(mar = c(1, 4, 3, 1))
	is.first <- ifelse(plot.exons \|\| plot.junctions, FALSE, TRUE)
	in.scores <- curr.scores[curr.scores[, "type"] %in% "intronic", ]

	if(nrow(in.scores) > 0)
	{
	if(g.strand == '+') in.ord <- 1:nrow(in.scores) else in.ord <- nrow(in.scores):1
	.plotValues(in.scores, plot.columns, y.range, y.lab, plot.type, cols, in.ord,
	"Intronic Gaps", symbols[g.index], is.first)
	} else {
	plot.new()
	plot.window(c(0, 1), c(0, 1))
	text(0.5, 0.5, "No intronic regions\nfor this gene.")
	}
	par(mar = c(1, 0, 3, 0))
	plot.new()
	if(is.first)
	legend("top", colnames(counts.df[, plot.columns, drop = FALSE]), fill = cols)
	}

	x.min <- min(start(curr.txs))
	x.max <- max(end(curr.txs))
	x.space <- 0.01 * (x.max - x.min)
	x.min <- x.min - x.space
	x.max <- x.max + x.space
	x.ticks <- seq(x.min, x.max, floor((x.max - x.min) / 10))

	par(mar = c(1, 2, 0, 1))
	plot.new()
	plot.window(xlim = c(x.min, x.max), ylim = c(0, models.frac))
	axis(1, at = x.ticks)

	n.models <- length(curr.txs) * (plot.exons \|\| plot.junctions) + plot.intronic
	model.ys <- models.frac - 0.05 - (models.frac - 0.05) / n.models * 0:(n.models - 1)
	tx.ys <- if(plot.intronic) model.ys[-length(model.ys)] else model.ys

	curr.txs.df <- as.data.frame(curr.txs)
	gene.disjoint.exons <- tx.scores[tx.scores[, "type"] == "exon", c("start", "end")]
	disjoint.exons.IR <- IRanges(gene.disjoint.exons[, 1], gene.disjoint.exons[, 2])
	mapply(function(x, y)
	{
	labels.x <- NULL
	if(plot.exons)
	{
	curr.exons <- ranges(tx.exons[[y]])
	curr.exons <- disjoint.exons.IR[queryHits(findOverlaps(disjoint.exons.IR, curr.exons, type = "within"))]
	if(g.strand == '-') curr.exons <- rev(curr.exons)
	ex.ord.id <- ex.jct.ord[intersect(subjectHits(findOverlaps(curr.exons, tx.IR, type = "equal")), which(tx.scores[, "type"] == "exon"))]

	# Draw exon rectangles.
	mapply(function(st, end)
	{
	rect(st, x - 0.02, end, x + 0.02, col = "darkblue", lty = 0)
	}, start(curr.exons), end(curr.exons))

	labels.x <- rbind((start(curr.exons) + end(curr.exons)) / 2, ex.ord.id)
	}

	# Draw junction lines and arrowheads.
	if(plot.junctions)
	{
	curr.juncts <- gaps(curr.exons)
	if(length(curr.juncts) > 0)
	{
	junct.ord.id <- ex.jct.ord[subjectHits(findOverlaps(curr.juncts, tx.IR, type = "equal"))]
	mapply(function(st, end)
	{
	middle <- (st + end) / 2
	lines(c(st, middle, middle, end), x + c(0.02, 0.04, 0.04, 0.02), col = "darkblue")

	}, start(curr.juncts), end(curr.juncts))
	labels.x <- cbind(labels.x, rbind((start(curr.juncts) + end(curr.juncts)) / 2, junct.ord.id))
	}
	}

	labels.y <- rep(x - 0.04, ncol(labels.x))
	if(ncol(labels.x) > 1)
	{
	xy.list <- .shiftLabels(labels.x, labels.y, x.min, x.max, n.models)
	labels.x <- xy.list[[1]]
	labels.y <- xy.list[[2]]
	}
	text(labels.x[1, ], labels.y, labels.x[2, ])
	}, tx.ys, curr.txs.df[, "tx_id"])

	if(plot.intronic)
	{
	y.in <- model.ys[length(model.ys)]
	if(nrow(in.scores) > 0)
	{
	mapply(function(st, end, col)
	{
	lines(c(st, end), rep(y.in, 2), col = col)
	}, in.scores[, "start"], in.scores[, "end"], "darkblue")

	labels.x <- rbind(in.scores[, "pos"], in.ord)
	labels.y <- rep(y.in - 0.03, nrow(in.scores))
	if(ncol(labels.x) > 1)
	{
	xy.list <- .shiftLabels(labels.x, labels.y, x.min, x.max, n.models)
	labels.x <- xy.list[[1]]
	labels.y <- xy.list[[2]]
	}
	text(labels.x[1, ], labels.y, labels.x[2, ])
	} else {
	text(mean(c(x.min, x.max)), y.in, "No intronic regions\nfor this gene.")
	}
	}

	mtext(chrs[g.index], 1, 3)
	par(mar = c(1, 1, 0, 0))
	plot.new()
	plot.window(xlim = c(0, 1), ylim = c(0, models.frac))
	text(0.5, model.ys, c(as.character(curr.txs.df[, "tx_name"]), if(plot.intronic) "Intronic Gaps"))
	}
	})