Skip to content

Instantly share code, notes, and snippets.

@tleja
Last active April 23, 2023 10:03
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tleja/7783150 to your computer and use it in GitHub Desktop.
Save tleja/7783150 to your computer and use it in GitHub Desktop.
R: Enhanced Heat Map v.3
## original R code $Id: heatmap.2.R 1724 2013-10-11 20:19:22Z warnes $
## updated by Thomas W. Leja: heatmap.3 2013-12-04
## Updates include:
## 1. z-score data transformation prior to the clustering: scale=c("none","row","column")
## 2. option to reassign the extremes within the scaled data: zlim=c(-3,3)
## 3. option to switch off dendrogram reordering: reorder=FALSE
heatmap.3 <- function (x,
## dendrogram control
Rowv = TRUE,
Colv=if(symm)"Rowv" else TRUE,
distfun = dist,
hclustfun = hclust,
dendrogram = c("both","row","column","none"),
reorder = TRUE,
symm = FALSE,
## data scaling
scale = c("none","row", "column"),
zlim = c(-3,3),
na.rm=TRUE,
## image plot
revC = identical(Colv, "Rowv"),
add.expr,
## mapping data to colors
breaks,
symbreaks=min(x < 0, na.rm=TRUE) || scale!="none",
## colors
col="heat.colors",
## block sepration
colsep,
rowsep,
sepcolor="white",
sepwidth=c(0.05,0.05),
## cell labeling
cellnote,
notecex=1.0,
notecol="cyan",
na.color=par("bg"),
## level trace
trace=c("column","row","both","none"),
tracecol="cyan",
hline=median(breaks),
vline=median(breaks),
linecol=tracecol,
## Row/Column Labeling
margins = c(5, 5),
ColSideColors,
RowSideColors,
cexRow = 0.2 + 1/log10(nr),
cexCol = 0.2 + 1/log10(nc),
labRow = NULL,
labCol = NULL,
srtRow = NULL,
srtCol = NULL,
adjRow = c(0,NA),
adjCol = c(NA,0),
offsetRow = 0.5,
offsetCol = 0.5,
## color key + density info
key = TRUE,
keysize = 1.5,
density.info=c("histogram","density","none"),
denscol=tracecol,
symkey = min(x < 0, na.rm=TRUE) || symbreaks,
densadj = 0.25,
## plot labels
main = NULL,
xlab = NULL,
ylab = NULL,
## plot layout
lmat = NULL,
lhei = NULL,
lwid = NULL,
## extras
...
)
{
scale01 <- function(x, low=min(x), high=max(x) )
{
x <- (x-low)/(high - low)
x
}
retval <- list()
scale <- if(symm && missing(scale)) "none" else match.arg(scale)
dendrogram <- match.arg(dendrogram)
trace <- match.arg(trace)
density.info <- match.arg(density.info)
if(length(col)==1 && is.character(col) )
col <- get(col, mode="function")
if(!missing(breaks) && (scale!="none"))
warning("Using scale=\"row\" or scale=\"column\" when breaks are",
"specified can produce unpredictable results.",
"Please consider using only one or the other.")
## key & density don't make sense when data is not all on the same scale
## if(scale!="none" && key==TRUE)
## {
## warning("Key cannot be plotted when scale!=\"none\".")
## key=FALSE
## }
if ( is.null(Rowv) || is.na(Rowv) )
Rowv <- FALSE
if ( is.null(Colv) || is.na(Colv) )
Colv <- FALSE
else if( Colv=="Rowv" && !isTRUE(Rowv) )
Colv <- FALSE
if(length(di <- dim(x)) != 2 || !is.numeric(x))
stop("`x' must be a numeric matrix")
nr <- di[1]
nc <- di[2]
if(nr <= 1 || nc <= 1)
stop("`x' must have at least 2 rows and 2 columns")
if(!is.numeric(margins) || length(margins) != 2)
stop("`margins' must be a numeric vector of length 2")
if(missing(cellnote))
cellnote <- matrix("", ncol=ncol(x), nrow=nrow(x))
if(!inherits(Rowv, "dendrogram")) {
## Check if Rowv and dendrogram arguments are consistent
if ( ( (!isTRUE(Rowv)) || (is.null(Rowv))) &&
(dendrogram %in% c("both","row") ) )
{
if (is.logical(Colv) && (Colv))
dendrogram <- "column"
else
dedrogram <- "none"
warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
dendrogram, "'. Omitting row dendogram.")
}
}
if(!inherits(Colv, "dendrogram")) {
## Check if Colv and dendrogram arguments are consistent
if ( ( (!isTRUE(Colv)) || (is.null(Colv)))
&& (dendrogram %in% c("both","column")) )
{
if (is.logical(Rowv) && (Rowv))
dendrogram <- "row"
else
dendrogram <- "none"
warning("Discrepancy: Colv is FALSE, while dendrogram is `",
dendrogram, "'. Omitting column dendogram.")
}
}
## scaling
if(scale == "row") {
retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
x <- sweep(x, 1, rm)
retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
x <- sweep(x, 1, sx, "/")
if (!missing(zlim)) x <- pmin(pmax(x, zlim[1]), zlim[2])
}
else if(scale == "column") {
retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
x <- sweep(x, 2, rm)
retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
x <- sweep(x, 2, sx, "/")
if (!missing(zlim)) x <- pmin(pmax(x, zlim[1]), zlim[2])
}
## by default order by row/col mean
## if(is.null(Rowv)) Rowv <- rowMeans(x, na.rm = na.rm)
## if(is.null(Colv)) Colv <- colMeans(x, na.rm = na.rm)
## get the dendrograms and reordering indices
## if( dendrogram %in% c("both","row") )
## { ## dendrogram option is used *only* for display purposes
if(inherits(Rowv, "dendrogram"))
{
ddr <- Rowv ## use Rowv 'as-is', when it is dendrogram
rowInd <- order.dendrogram(ddr)
}
else if (is.integer(Rowv))
{ ## Compute dendrogram and do reordering based on given vector
hcr <- hclustfun(distfun(x))
ddr <- as.dendrogram(hcr)
if (isTRUE(reorder)) ddr <- reorder(ddr, Rowv)
rowInd <- order.dendrogram(ddr)
if(nr != length(rowInd))
stop("row dendrogram ordering gave index of wrong length")
}
else if (isTRUE(Rowv))
{ ## If TRUE, compute dendrogram and do reordering based on rowMeans
Rowv <- rowMeans(x, na.rm = na.rm)
hcr <- hclustfun(distfun(x))
ddr <- as.dendrogram(hcr)
if (isTRUE(reorder)) ddr <- reorder(ddr, Rowv)
rowInd <- order.dendrogram(ddr)
if(nr != length(rowInd))
stop("row dendrogram ordering gave index of wrong length")
} else {
rowInd <- nr:1
}
## if( dendrogram %in% c("both","column") )
## {
if(inherits(Colv, "dendrogram"))
{
ddc <- Colv ## use Colv 'as-is', when it is dendrogram
colInd <- order.dendrogram(ddc)
}
else if(identical(Colv, "Rowv")) {
if(nr != nc)
stop('Colv = "Rowv" but nrow(x) != ncol(x)')
if(exists("ddr"))
{
ddc <- ddr
colInd <- order.dendrogram(ddc)
} else
colInd <- rowInd
} else if(is.integer(Colv))
{## Compute dendrogram and do reordering based on given vector
hcc <- hclustfun(distfun(if(symm)x else t(x)))
ddc <- as.dendrogram(hcc)
if (isTRUE(reorder)) ddc <- reorder(ddc, Colv)
colInd <- order.dendrogram(ddc)
if(nc != length(colInd))
stop("column dendrogram ordering gave index of wrong length")
}
else if (isTRUE(Colv))
{## If TRUE, compute dendrogram and do reordering based on rowMeans
Colv <- colMeans(x, na.rm = na.rm)
hcc <- hclustfun(distfun(if(symm)x else t(x)))
ddc <- as.dendrogram(hcc)
if (isTRUE(reorder)) ddc <- reorder(ddc, Colv)
colInd <- order.dendrogram(ddc)
if(nc != length(colInd))
stop("column dendrogram ordering gave index of wrong length")
}
else
{
colInd <- 1:nc
}
retval$rowInd <- rowInd
retval$colInd <- colInd
retval$call <- match.call()
## reorder x & cellnote
x <- x[rowInd, colInd]
x.unscaled <- x
cellnote <- cellnote[rowInd, colInd]
if(is.null(labRow))
labRow <- if(is.null(rownames(x))) (1:nr)[rowInd] else rownames(x)
else
labRow <- labRow[rowInd]
if(is.null(labCol))
labCol <- if(is.null(colnames(x))) (1:nc)[colInd] else colnames(x)
else
labCol <- labCol[colInd]
## Set up breaks and force values outside the range into the endmost bins
if(missing(breaks) || is.null(breaks) || length(breaks)<1 )
{
if( missing(col) || is.function(col) )
breaks <- 16
else
breaks <- length(col)+1
}
if(length(breaks)==1)
{
if(!symbreaks)
breaks <- seq( min(x, na.rm=na.rm), max(x,na.rm=na.rm), length=breaks)
else
{
extreme <- max(abs(x), na.rm=TRUE)
breaks <- seq( -extreme, extreme, length=breaks )
}
}
nbr <- length(breaks)
ncol <- length(breaks)-1
if(class(col)=="function")
col <- col(ncol)
min.breaks <- min(breaks)
max.breaks <- max(breaks)
x[x<min.breaks] <- min.breaks
x[x>max.breaks] <- max.breaks
## Calculate the plot layout
if( missing(lhei) || is.null(lhei) )
lhei <- c(keysize, 4)
if( missing(lwid) || is.null(lwid) )
lwid <- c(keysize, 4)
if( missing(lmat) || is.null(lmat) )
{
lmat <- rbind(4:3, 2:1)
if(!missing(ColSideColors)) { ## add middle row to layout
if(!is.character(ColSideColors) || length(ColSideColors) != nc)
stop("'ColSideColors' must be a character vector of length ncol(x)")
lmat <- rbind(lmat[1,]+1, c(NA,1), lmat[2,]+1)
lhei <- c(lhei[1], 0.2, lhei[2])
}
if(!missing(RowSideColors)) { ## add middle column to layout
if(!is.character(RowSideColors) || length(RowSideColors) != nr)
stop("'RowSideColors' must be a character vector of length nrow(x)")
lmat <- cbind(lmat[,1]+1, c(rep(NA, nrow(lmat)-1), 1), lmat[,2]+1)
lwid <- c(lwid[1], 0.2, lwid[2])
}
lmat[is.na(lmat)] <- 0
}
if(length(lhei) != nrow(lmat))
stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
if(length(lwid) != ncol(lmat))
stop("lwid must have length = ncol(lmat) =", ncol(lmat))
## Graphics `output' -----------------------
op <- par(no.readonly = TRUE)
on.exit(par(op))
layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
## draw the side bars
if(!missing(RowSideColors)) {
par(mar = c(margins[1],0, 0,0.5))
image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
}
if(!missing(ColSideColors)) {
par(mar = c(0.5,0, 0,margins[2]))
image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
}
## draw the main carpet
par(mar = c(margins[1], 0, 0, margins[2]))
#if(scale != "none" || !symm)
# {
x <- t(x)
cellnote <- t(cellnote)
# }
if(revC)
{ ## x columns reversed
iy <- nr:1
if(exists("ddr"))
ddr <- rev(ddr)
x <- x[,iy]
cellnote <- cellnote[,iy]
}
else iy <- 1:nr
## display the main carpet
image(1:nc, 1:nr, x, xlim = 0.5+ c(0, nc), ylim = 0.5+ c(0, nr),
axes = FALSE, xlab = "", ylab = "", col=col, breaks=breaks,
...)
retval$carpet <- x
if(exists("ddr"))
retval$rowDendrogram <- ddr
if(exists("ddc"))
retval$colDendrogram <- ddc
retval$breaks <- breaks
retval$col <- col
## fill 'na' positions with na.color
if(!invalid(na.color) & any(is.na(x)))
{
mmat <- ifelse(is.na(x), 1, NA)
image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
col=na.color, add=TRUE)
}
## add column labels
if(is.null(srtCol))
axis(1,
1:nc,
labels= labCol,
las= 2,
line= -0.5 + offsetCol,
tick= 0,
cex.axis= cexCol,
hadj=adjCol[1],
padj=adjCol[2]
)
else
{
if(is.numeric(srtCol))
{
if(missing(adjCol) || is.null(adjCol))
adjCol=c(1,NA)
xpd.orig <- par("xpd")
par(xpd=NA)
xpos <- axis(1, 1:nc, labels=rep("", nc), las=2, tick=0)
text(x=xpos,
y=par("usr")[3] - (1.0 + offsetCol) * strheight("M"),
labels=labCol,
##pos=1,
adj=adjCol,
cex=cexCol,
srt=srtCol)
par(xpd=xpd.orig)
}
else
warning("Invalid value for srtCol ignored.")
}
## add row labels
if(is.null(srtRow))
{
axis(4,
iy,
labels=labRow,
las=2,
line=-0.5+offsetRow,
tick=0,
cex.axis=cexRow,
hadj=adjRow[1],
padj=adjRow[2]
)
}
else
{
if(is.numeric(srtRow))
{
xpd.orig <- par("xpd")
par(xpd=NA)
ypos <- axis(4, iy, labels=rep("", nr), las=2, line= -0.5, tick=0)
text(x=par("usr")[2] + (1.0 + offsetRow) * strwidth("M"),
y=ypos,
labels=labRow,
adj=adjRow,
cex=cexRow,
srt=srtRow
)
par(xpd=xpd.orig)
}
else
warning("Invalid value for srtRow ignored.")
}
## add row and column headings (xlab, ylab)
if(!is.null(xlab)) mtext(xlab, side = 1, line = margins[1] - 1.25)
if(!is.null(ylab)) mtext(ylab, side = 4, line = margins[2] - 1.25)
## perform user-specified function
if (!missing(add.expr))
eval(substitute(add.expr))
## add 'background' colored spaces to visually separate sections
if(!missing(colsep))
for(csep in colsep)
rect(xleft =csep+0.5, ybottom=0,
xright=csep+0.5+sepwidth[1], ytop=ncol(x)+1,
lty=1, lwd=1, col=sepcolor, border=sepcolor)
if(!missing(rowsep))
for(rsep in rowsep)
rect(xleft =0, ybottom= (ncol(x)+1-rsep)-0.5,
xright=nrow(x)+1, ytop = (ncol(x)+1-rsep)-0.5 - sepwidth[2],
lty=1, lwd=1, col=sepcolor, border=sepcolor)
## show traces
min.scale <- min(breaks)
max.scale <- max(breaks)
x.scaled <- scale01(t(x), min.scale, max.scale)
if(trace %in% c("both","column") )
{
retval$vline <- vline
vline.vals <- scale01(vline, min.scale, max.scale)
for( i in colInd )
{
if(!is.null(vline))
{
abline(v=i-0.5 + vline.vals, col=linecol, lty=2)
}
xv <- rep(i, nrow(x.scaled)) + x.scaled[,i] - 0.5
xv <- c(xv[1], xv)
yv <- 1:length(xv)-0.5
lines(x=xv, y=yv, lwd=1, col=tracecol, type="s")
}
}
if(trace %in% c("both","row") )
{
retval$hline <- hline
hline.vals <- scale01(hline, min.scale, max.scale)
for( i in rowInd )
{
if(!is.null(hline))
{
abline(h=i + hline, col=linecol, lty=2)
}
yv <- rep(i, ncol(x.scaled)) + x.scaled[i,] - 0.5
yv <- rev(c(yv[1], yv))
xv <- length(yv):1-0.5
lines(x=xv, y=yv, lwd=1, col=tracecol, type="s")
}
}
if(!missing(cellnote))
text(x=c(row(cellnote)),
y=c(col(cellnote)),
labels=c(cellnote),
col=notecol,
cex=notecex)
## the two dendrograms :
par(mar = c(margins[1], 0, 0, 0))
if( dendrogram %in% c("both","row") )
{
plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
}
else
plot.new()
par(mar = c(0, 0, if(!is.null(main)) 5 else 0, margins[2]))
if( dendrogram %in% c("both","column") )
{
plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
}
else
plot.new()
## title
if(!is.null(main)) title(main, cex.main = 1.5*op[["cex.main"]])
## Add the color-key
if(key)
{
par(mar = c(5, 4, 2, 1), cex=0.75)
tmpbreaks <- breaks
if(symkey)
{
max.raw <- max(abs(c(x,breaks)),na.rm=TRUE)
min.raw <- -max.raw
tmpbreaks[1] <- -max(abs(x), na.rm=TRUE)
tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm=TRUE)
}
else
{
min.raw <- min(x, na.rm=TRUE) ## Again, modified to use scaled
max.raw <- max(x, na.rm=TRUE) ## or unscaled (SD 12/2/03)
}
z <- seq(min.raw, max.raw, length=length(col))
image(z=matrix(z, ncol=1),
col=col, breaks=tmpbreaks,
xaxt="n", yaxt="n")
par(usr=c(0,1,0,1))
lv <- pretty(breaks)
xv <- scale01(as.numeric(lv), min.raw, max.raw)
axis(1, at=xv, labels=lv)
if(scale=="row")
mtext(side=1,"Row Z-Score", line=2)
else if(scale=="column")
mtext(side=1,"Column Z-Score", line=2)
else
mtext(side=1,"Value", line=2)
if(density.info=="density")
{
## Experimental : also plot density of data
dens <- density(x, adjust=densadj, na.rm=TRUE)
omit <- dens$x < min(breaks) | dens$x > max(breaks)
dens$x <- dens$x[-omit]
dens$y <- dens$y[-omit]
dens$x <- scale01(dens$x,min.raw,max.raw)
lines(dens$x, dens$y / max(dens$y) * 0.95, col=denscol, lwd=1)
axis(2, at=pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y) )
title("Color Key\nand Density Plot")
par(cex=0.5)
mtext(side=2,"Density", line=2)
}
else if(density.info=="histogram")
{
h <- hist(x, plot=FALSE, breaks=breaks)
hx <- scale01(breaks,min.raw,max.raw)
hy <- c(h$counts, h$counts[length(h$counts)])
lines(hx, hy/max(hy)*0.95, lwd=1, type="s", col=denscol)
axis(2, at=pretty(hy)/max(hy) * 0.95, pretty(hy) )
title("Color Key\nand Histogram")
par(cex=0.5)
mtext(side=2,"Count", line=2)
}
else
title("Color Key")
}
else
plot.new()
## Create a table showing how colors match to (transformed) data ranges
retval$colorTable <- data.frame(
low=retval$breaks[-length(retval$breaks)],
high=retval$breaks[-1],
color=retval$col
)
invisible( retval )
}
@stanstrup
Copy link

I hope you don't mind I turned this into a package: https://github.com/stanstrup/heatmap3
It was very useful to me and it was easier to load it that way.
I noticed that heatmap.2 was updated quite a lot since you made these modifications. But it quite difficult now to port your changes since I cannot see exactly what was changed. Any interest in porting the changes to newest heatmap.2?

If you don't feel like porting your changes do you know which version was the basis for your changes? That was it might be easier to port the changes or at least copy the man page and update with your changes.

@stanstrup
Copy link

I figured it was better to be able to use heatmap.2 if possible so I can get possible updates to the function. So I took your zClust function posted here: http://stackoverflow.com/questions/17924828/differences-in-heatmap-clustering-defaults-in-r-heatplot-versus-heatmap-2 and extended it to have the features of your heatmap.3. You can find the code here for the moment: https://github.com/stanstrup/chemhelper/blob/master/R/calculations.R#L154
I hope you don't mind.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment