Skip to content

Instantly share code, notes, and snippets.

@aaronwolen
Last active December 15, 2015 09:39
Show Gist options
  • Save aaronwolen/5240514 to your computer and use it in GitHub Desktop.
Save aaronwolen/5240514 to your computer and use it in GitHub Desktop.
Plot multiple protein isoforms in R

A simple function to create plots of multiple protein isoforms in R.

# Plot multiple protein isoforms
# Variables
############
# proteinData: data.frame containing three columns: label, start & stop.
# The first containing unique protein identifiers.
# The remaining columns containing protein coordinates.
# If labels contain square brackets, they will be parsed as expressions
# so anything within the square brackets will be written as a subscript
# with a Delta prefix
# colorScaleColumn: optional character string, identify a column name in proteinData that contains quantitative data and the protein colors will be mapped to this data
# space: numeric value between 0 and 1 that indicates how much space to insert
# between protein rectangles. 0 would mean no space between rectangles.
# guideLines: logical, if TRUE, dashed lines will be plotted under each protein.
# repeatStart: numeric vector indicating start position of repeats
# repeatLengths: numeric vector specifiying each repeats length
# repeatColor: color of repeat triangles
# topPlotSize: numeric value between 0 and 1 indicating what percentage of
# figure the top plot should occupy
# sizeAxisLabel: numberic value indicating font expansion factor
# ggplotBackground: logical, should a ggplot2-like background be added to the bottom plot
# xAxisTop: logical, do you want an xaxis on the top plot?
# xAxisBottom: logical, do you want an xaxis on the bottom plot?
# markerBand: numeric vector (e.g. c(10,16)) used to highlight a specificp protein region
protein_plot <- function(proteinData,
colorScaleColumn, colorScaleLow="#3B4FB8", colorScaleHigh="#B71B1A",
topProteinColor = "grey90",
markerBand, markerBandColor="yellow",
repeatStarts, repeatLengths, repeatColor = "grey50",
space, bottomProteinColor = "grey90", guideLines=T,
xAxisTop = T, xAxisBottom = F,
sizeAxisLabel = 1, topPlotSize = .15, ggplotBackground = F) {
# Function to draw xaxis
add_xaxis <- function(at, labels, cex){
axis(1, at=at, labels=F, lwd=0, lwd.tick=cex * 1.1,
col.ticks="grey50")
mtext(labels, cex = cex,
at=at, side=1, line=.7, padj=0, adj=.5)
}
# Tick length
tck <- -.15
# Use a shorter variable name
p <- proteinData
# Add y coordinates for plotting
p$ybottom <- rev(0:(nrow(p)-1))
p$ytop <- rev(1:nrow(p))
p$ymid <- apply(p[,c("ytop", "ybottom")], M=1, F=mean)
# Adjust y coordinates if space was provided
if(!missing(space)){
p$ybottom <- p$ybottom + (space/2)
p$ytop <- p$ytop - (space/2)
}
# Protein labels
labels <- as.character(p[,1])
# If labels contain square brackets parse them as expressions
if(length(grep("\\[", labels))>0){
parse.labels <- TRUE
for(i in 1:length(labels)){
if(i==1){orig.labels <- labels}
# Move on if current label doesn't have a square bracket
if(length(grep("\\[", orig.labels[i]))>0){
# Split label based on first square bracket and remove second bracket
split.label <- gsub("\\]", "", unlist(strsplit(orig.labels[i], split="\\[")))
# Use substitute to insert a Delta and brackets
# Then wrap as expression so they can still be saved as a char. vector
labels[i] <- as.expression(
substitute(l1[Delta*l2], list(l1=split.label[1], l2=split.label[2])))
} else {
labels[i] <- orig.labels[i]
}
}
} else {
parse.labels <- FALSE
}
# Setup plot layout
lmat <- matrix(1:2, ncol=1)
lhei <- c(topPlotSize, 1-topPlotSize)
lwid <- 1
# Accommodate a legend if colorScaleColumn is provided
if(!missing(colorScaleColumn)){
lmat <- cbind(lmat, c(0,3))
lwid <- c(.85, .15)
# Color palette
colorPal <- colorRampPalette(c(colorScaleLow, colorScaleHigh))
# Color legend breaks
pretty_breaks <- pretty(p[, colorScaleColumn])
breaks <- pretty(seq(min(pretty_breaks), max(pretty_breaks),
diff(pretty_breaks[1:2])), n=nrow(p)*10)
nColors <- length(breaks)-1
color.key <- data.frame(col = colorPal(length(breaks)), row.names = breaks)
bottomProteinColor <- as.character(
color.key[cut(p[,colorScaleColumn], breaks, labels=F), "col"])
}
layout(lmat, heights=lhei, widths=lwid)
# Determine length & height of longest label
labelWidth <- strwidth(labels[which.max(nchar(labels))],
units="inches") * sizeAxisLabel
# Determine plot and tick dimensions
labelHeight <- strheight(labels[which.max(nchar(labels))],
units="inches") * sizeAxisLabel
plotWidth <- par("pin")[1]
plotHeight <- par("pin")[2]
tickLength <- plotHeight * 0.01
# Axis coordinates
xaxis <- pretty(c(p$start, p$stop))
xint <- diff(head(xaxis,2))/2
xminor <- seq(min(xaxis)+xint, max(xaxis)-xint, xint)
# Bernice only wants the x-axis to extend as far as the longest protein
xaxis[length(xaxis)] <- max(p$stop)
if(max(xminor)>max(xaxis)){
xminor <- head(xminor, -1)
truncate.labels <- T
} else {
truncate.labels <- F
}
yaxis <- p$ymid
yminor <- 0:nrow(p)
# Top plot of full protein
##############################################################################
# mai gives margin size in inches
mai.bot <- ifelse(xAxisTop, labelHeight*2, 0)
mai.left <- labelWidth
mai.right <- .35
par(mai = c(mai.bot, mai.left, 0, mai.right), xaxs="i")
plot(NA, axes=F, xlab="", ylab="",
xlim=c(0, max(p$stop)), ylim=c(0,1), type="n")
rect(min(p$start), 0, max(p$stop), 1, col=topProteinColor, border=NA)
if(xAxisTop){
if(truncate.labels){
add_xaxis(at=head(xaxis, -1), labels=head(xaxis, -1), cex=sizeAxisLabel)
} else {
add_xaxis(at=xaxis, labels=xaxis, cex=sizeAxisLabel)
}
}
# Identify N and C terminals
axis(2, at=.5, labels=F, pos=0, lwd=0, lwd.tick=1)
axis(4, at=.5, labels=F, pos=max(xaxis), lwd=0, lwd.tick=1)
mtext("N", side=2, cex = sizeAxisLabel, line=.55, las=2)
mtext("C", side=4, cex = sizeAxisLabel, line=.55, las=2)
# Add repeat arrows
if(!missing(repeatStarts)){
if(!missing(repeatLengths)){
# If only one repeatStart is provided create a contiguous train
# of repeats that start at that site
if(length(repeatStarts)==1){
repeats <- data.frame(
start = c(repeatStarts[1], head(repeatStarts[1]+cumsum(repeatLengths),-1)),
stop = repeatStarts[1]+cumsum(repeatLengths))
} else {
repeats <- data.frame(
start = repeatStarts,
stop = repeatStarts + repeatLengths)
}
for(r in 1:nrow(repeats)){
with(repeats,
polygon(x=c(start[r], start[r], stop[r]),
y=c(0, 1, .5), col=repeatColor, border=NA))
}
}
}
# Add markerBand to top protein
if(!missing(markerBand)){
rect(xleft=markerBand[1], ybottom=0, xright=markerBand[2],
ytop=1, col=markerBandColor, border=NA)
}
# Bottom plot of individual isoforms
##############################################################################
# mai gives margin size in inches
mai.bot <- ifelse(xAxisBottom, labelHeight*2.5, 0)
par(mai = c(mai.bot, mai.left, labelHeight, mai.right), xaxs="i")
plot(NA, axes=F, xlab="", ylab="",
xlim=c(0, max(xaxis)), ylim=c(0,nrow(p)), type="n")
# ggplot2 like background
if(ggplotBackground){
usr <- par("usr")
rect(usr[1], usr[3], usr[2], usr[4], col="grey90", border=NA)
abline(h=yaxis, v=xaxis, col="white", lwd=sizeAxisLabel * 1.1)
abline(h=yminor, v=xminor, col="grey95", lwd=sizeAxisLabel * .98)
}
# Plot each protein rectangle
for(i in 1:nrow(p)){
if(guideLines){
abline(h=p$ymid[i], lty=2)
}
rect(p$start[i], p$ybottom[i], p$stop[i], p$ytop[i],
col=bottomProteinColor, border=NA)
# Add markerBand to bottom proteins if the coordinates overlap
if(!missing(markerBand)){
if(markerBand[1] >= p$start[i] & markerBand[2] <= p$stop[i]){
rect(xleft=markerBand[1], ybottom=p$ybottom[i], xright=markerBand[2],
ytop=p$ytop[i], col=markerBandColor, border=NA)
}
}
}
# Add protein labels
mtext(labels, side=2, line=.5, at=p$ymid, las=2)
# axis(2, labels=labels, at=yaxis, lwd=0,
# lwd.tick=ifelse(ggplotBackground, sizeAxisLabel * 1.1, 0),
# col.ticks="grey50", las=2, cex.axis = sizeAxisLabel)
if(xAxisBottom){
if(truncate.labels){
add_xaxis(at=head(xaxis, -1), labels=head(xaxis, -1), cex=sizeAxisLabel)
} else {
add_xaxis(at=xaxis, labels=xaxis, cex=sizeAxisLabel)
}
}
# Color scale legend
##############################################################################
if(!missing(colorScaleColumn)){
par(mar=c(3, 0, 3, 3.5))
pretty_breaks_mids <- sapply(2:length(pretty_breaks), F=function(x)
mean(pretty_breaks[c(x,x-1)]))
image(y=breaks, z=t(rev(breaks)), breaks=breaks, col=colorPal(nColors),
axes=F, ylim=range(pretty_breaks), ylab="")
abline(h=pretty_breaks, col="white", lwd=1.5)
box(col="white", lwd=1.5)
mtext(side=4, pretty_breaks_mids, at=pretty_breaks_mids,
las=2, line=.25, cex=sizeAxisLabel * .8)
mtext(side=2, colorScaleColumn, cex=sizeAxisLabel*.8, font=2)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment