A simple function to create plots of multiple protein isoforms in R.
Last active
December 15, 2015 09:39
-
-
Save aaronwolen/5240514 to your computer and use it in GitHub Desktop.
Plot multiple protein isoforms in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot multiple protein isoforms | |
# Variables | |
############ | |
# proteinData: data.frame containing three columns: label, start & stop. | |
# The first containing unique protein identifiers. | |
# The remaining columns containing protein coordinates. | |
# If labels contain square brackets, they will be parsed as expressions | |
# so anything within the square brackets will be written as a subscript | |
# with a Delta prefix | |
# colorScaleColumn: optional character string, identify a column name in proteinData that contains quantitative data and the protein colors will be mapped to this data | |
# space: numeric value between 0 and 1 that indicates how much space to insert | |
# between protein rectangles. 0 would mean no space between rectangles. | |
# guideLines: logical, if TRUE, dashed lines will be plotted under each protein. | |
# repeatStart: numeric vector indicating start position of repeats | |
# repeatLengths: numeric vector specifiying each repeats length | |
# repeatColor: color of repeat triangles | |
# topPlotSize: numeric value between 0 and 1 indicating what percentage of | |
# figure the top plot should occupy | |
# sizeAxisLabel: numberic value indicating font expansion factor | |
# ggplotBackground: logical, should a ggplot2-like background be added to the bottom plot | |
# xAxisTop: logical, do you want an xaxis on the top plot? | |
# xAxisBottom: logical, do you want an xaxis on the bottom plot? | |
# markerBand: numeric vector (e.g. c(10,16)) used to highlight a specificp protein region | |
protein_plot <- function(proteinData, | |
colorScaleColumn, colorScaleLow="#3B4FB8", colorScaleHigh="#B71B1A", | |
topProteinColor = "grey90", | |
markerBand, markerBandColor="yellow", | |
repeatStarts, repeatLengths, repeatColor = "grey50", | |
space, bottomProteinColor = "grey90", guideLines=T, | |
xAxisTop = T, xAxisBottom = F, | |
sizeAxisLabel = 1, topPlotSize = .15, ggplotBackground = F) { | |
# Function to draw xaxis | |
add_xaxis <- function(at, labels, cex){ | |
axis(1, at=at, labels=F, lwd=0, lwd.tick=cex * 1.1, | |
col.ticks="grey50") | |
mtext(labels, cex = cex, | |
at=at, side=1, line=.7, padj=0, adj=.5) | |
} | |
# Tick length | |
tck <- -.15 | |
# Use a shorter variable name | |
p <- proteinData | |
# Add y coordinates for plotting | |
p$ybottom <- rev(0:(nrow(p)-1)) | |
p$ytop <- rev(1:nrow(p)) | |
p$ymid <- apply(p[,c("ytop", "ybottom")], M=1, F=mean) | |
# Adjust y coordinates if space was provided | |
if(!missing(space)){ | |
p$ybottom <- p$ybottom + (space/2) | |
p$ytop <- p$ytop - (space/2) | |
} | |
# Protein labels | |
labels <- as.character(p[,1]) | |
# If labels contain square brackets parse them as expressions | |
if(length(grep("\\[", labels))>0){ | |
parse.labels <- TRUE | |
for(i in 1:length(labels)){ | |
if(i==1){orig.labels <- labels} | |
# Move on if current label doesn't have a square bracket | |
if(length(grep("\\[", orig.labels[i]))>0){ | |
# Split label based on first square bracket and remove second bracket | |
split.label <- gsub("\\]", "", unlist(strsplit(orig.labels[i], split="\\["))) | |
# Use substitute to insert a Delta and brackets | |
# Then wrap as expression so they can still be saved as a char. vector | |
labels[i] <- as.expression( | |
substitute(l1[Delta*l2], list(l1=split.label[1], l2=split.label[2]))) | |
} else { | |
labels[i] <- orig.labels[i] | |
} | |
} | |
} else { | |
parse.labels <- FALSE | |
} | |
# Setup plot layout | |
lmat <- matrix(1:2, ncol=1) | |
lhei <- c(topPlotSize, 1-topPlotSize) | |
lwid <- 1 | |
# Accommodate a legend if colorScaleColumn is provided | |
if(!missing(colorScaleColumn)){ | |
lmat <- cbind(lmat, c(0,3)) | |
lwid <- c(.85, .15) | |
# Color palette | |
colorPal <- colorRampPalette(c(colorScaleLow, colorScaleHigh)) | |
# Color legend breaks | |
pretty_breaks <- pretty(p[, colorScaleColumn]) | |
breaks <- pretty(seq(min(pretty_breaks), max(pretty_breaks), | |
diff(pretty_breaks[1:2])), n=nrow(p)*10) | |
nColors <- length(breaks)-1 | |
color.key <- data.frame(col = colorPal(length(breaks)), row.names = breaks) | |
bottomProteinColor <- as.character( | |
color.key[cut(p[,colorScaleColumn], breaks, labels=F), "col"]) | |
} | |
layout(lmat, heights=lhei, widths=lwid) | |
# Determine length & height of longest label | |
labelWidth <- strwidth(labels[which.max(nchar(labels))], | |
units="inches") * sizeAxisLabel | |
# Determine plot and tick dimensions | |
labelHeight <- strheight(labels[which.max(nchar(labels))], | |
units="inches") * sizeAxisLabel | |
plotWidth <- par("pin")[1] | |
plotHeight <- par("pin")[2] | |
tickLength <- plotHeight * 0.01 | |
# Axis coordinates | |
xaxis <- pretty(c(p$start, p$stop)) | |
xint <- diff(head(xaxis,2))/2 | |
xminor <- seq(min(xaxis)+xint, max(xaxis)-xint, xint) | |
# Bernice only wants the x-axis to extend as far as the longest protein | |
xaxis[length(xaxis)] <- max(p$stop) | |
if(max(xminor)>max(xaxis)){ | |
xminor <- head(xminor, -1) | |
truncate.labels <- T | |
} else { | |
truncate.labels <- F | |
} | |
yaxis <- p$ymid | |
yminor <- 0:nrow(p) | |
# Top plot of full protein | |
############################################################################## | |
# mai gives margin size in inches | |
mai.bot <- ifelse(xAxisTop, labelHeight*2, 0) | |
mai.left <- labelWidth | |
mai.right <- .35 | |
par(mai = c(mai.bot, mai.left, 0, mai.right), xaxs="i") | |
plot(NA, axes=F, xlab="", ylab="", | |
xlim=c(0, max(p$stop)), ylim=c(0,1), type="n") | |
rect(min(p$start), 0, max(p$stop), 1, col=topProteinColor, border=NA) | |
if(xAxisTop){ | |
if(truncate.labels){ | |
add_xaxis(at=head(xaxis, -1), labels=head(xaxis, -1), cex=sizeAxisLabel) | |
} else { | |
add_xaxis(at=xaxis, labels=xaxis, cex=sizeAxisLabel) | |
} | |
} | |
# Identify N and C terminals | |
axis(2, at=.5, labels=F, pos=0, lwd=0, lwd.tick=1) | |
axis(4, at=.5, labels=F, pos=max(xaxis), lwd=0, lwd.tick=1) | |
mtext("N", side=2, cex = sizeAxisLabel, line=.55, las=2) | |
mtext("C", side=4, cex = sizeAxisLabel, line=.55, las=2) | |
# Add repeat arrows | |
if(!missing(repeatStarts)){ | |
if(!missing(repeatLengths)){ | |
# If only one repeatStart is provided create a contiguous train | |
# of repeats that start at that site | |
if(length(repeatStarts)==1){ | |
repeats <- data.frame( | |
start = c(repeatStarts[1], head(repeatStarts[1]+cumsum(repeatLengths),-1)), | |
stop = repeatStarts[1]+cumsum(repeatLengths)) | |
} else { | |
repeats <- data.frame( | |
start = repeatStarts, | |
stop = repeatStarts + repeatLengths) | |
} | |
for(r in 1:nrow(repeats)){ | |
with(repeats, | |
polygon(x=c(start[r], start[r], stop[r]), | |
y=c(0, 1, .5), col=repeatColor, border=NA)) | |
} | |
} | |
} | |
# Add markerBand to top protein | |
if(!missing(markerBand)){ | |
rect(xleft=markerBand[1], ybottom=0, xright=markerBand[2], | |
ytop=1, col=markerBandColor, border=NA) | |
} | |
# Bottom plot of individual isoforms | |
############################################################################## | |
# mai gives margin size in inches | |
mai.bot <- ifelse(xAxisBottom, labelHeight*2.5, 0) | |
par(mai = c(mai.bot, mai.left, labelHeight, mai.right), xaxs="i") | |
plot(NA, axes=F, xlab="", ylab="", | |
xlim=c(0, max(xaxis)), ylim=c(0,nrow(p)), type="n") | |
# ggplot2 like background | |
if(ggplotBackground){ | |
usr <- par("usr") | |
rect(usr[1], usr[3], usr[2], usr[4], col="grey90", border=NA) | |
abline(h=yaxis, v=xaxis, col="white", lwd=sizeAxisLabel * 1.1) | |
abline(h=yminor, v=xminor, col="grey95", lwd=sizeAxisLabel * .98) | |
} | |
# Plot each protein rectangle | |
for(i in 1:nrow(p)){ | |
if(guideLines){ | |
abline(h=p$ymid[i], lty=2) | |
} | |
rect(p$start[i], p$ybottom[i], p$stop[i], p$ytop[i], | |
col=bottomProteinColor, border=NA) | |
# Add markerBand to bottom proteins if the coordinates overlap | |
if(!missing(markerBand)){ | |
if(markerBand[1] >= p$start[i] & markerBand[2] <= p$stop[i]){ | |
rect(xleft=markerBand[1], ybottom=p$ybottom[i], xright=markerBand[2], | |
ytop=p$ytop[i], col=markerBandColor, border=NA) | |
} | |
} | |
} | |
# Add protein labels | |
mtext(labels, side=2, line=.5, at=p$ymid, las=2) | |
# axis(2, labels=labels, at=yaxis, lwd=0, | |
# lwd.tick=ifelse(ggplotBackground, sizeAxisLabel * 1.1, 0), | |
# col.ticks="grey50", las=2, cex.axis = sizeAxisLabel) | |
if(xAxisBottom){ | |
if(truncate.labels){ | |
add_xaxis(at=head(xaxis, -1), labels=head(xaxis, -1), cex=sizeAxisLabel) | |
} else { | |
add_xaxis(at=xaxis, labels=xaxis, cex=sizeAxisLabel) | |
} | |
} | |
# Color scale legend | |
############################################################################## | |
if(!missing(colorScaleColumn)){ | |
par(mar=c(3, 0, 3, 3.5)) | |
pretty_breaks_mids <- sapply(2:length(pretty_breaks), F=function(x) | |
mean(pretty_breaks[c(x,x-1)])) | |
image(y=breaks, z=t(rev(breaks)), breaks=breaks, col=colorPal(nColors), | |
axes=F, ylim=range(pretty_breaks), ylab="") | |
abline(h=pretty_breaks, col="white", lwd=1.5) | |
box(col="white", lwd=1.5) | |
mtext(side=4, pretty_breaks_mids, at=pretty_breaks_mids, | |
las=2, line=.25, cex=sizeAxisLabel * .8) | |
mtext(side=2, colorScaleColumn, cex=sizeAxisLabel*.8, font=2) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment