sgibb/plotProteome.R

## plotProteome.R
library("synapter")
library("MSnbase")
load_all()

combMSnSet <- readRDS("refCombMSNSet.RDS")

plotProteomes <- function(x,
                          xlim=log2(range(exprs(x), na.rm=TRUE)),
                          ylim=c(-4, 4),
                          xlab=expression(log[2](B)),
                          ylab=expression(log[2](A:B)),
                          mapping=factor(rep(1:2, each=5), labels=c("A", "B")),
                          ratios=c("Homo sapiens"=1,
                                   "Saccharomyces cerevisiae"=2,
                                   "Escherichia coli"=1/4,
                                   "misc"=NA),
                          col=c("Homo sapiens"="#1b9e77",
                                "Saccharomyces cerevisiae"="#d95f02",
                                "Escherichia coli"="#7570b3",
                                "misc"="#e6ab02"),
                          pch=c("Homo sapiens"=20,
                                "Saccharomyces cerevisiae"=20,
                                "Escherichia coli"=20,
                                "misc"=18),
                          pcex=0.75,
                          alpha=255/2,
                          f=2/3, ## lowess span
                          st=Inf, ## saturation threshold
                          ...){

  ## fetch protein description
  pd <- as.matrix(fData(x)[, grep("protein\\.Description", fvarLabels(x))])

  ## fetch organism name
  sp <- gsub(".*OS=([A-z]+ [A-z]+).*", "\\1", pd)

  ## a few proteins are not identified (and contain NA)
  ## or are identified as different species
  ## we choose the "mean" species
  species <- sp[,1]
  ## misidentified?
  mi <- rowSums(sp[,1] != sp)
  mi <- is.na(mi) | mi > 0
  species[mi] <- apply(sp[mi,, drop=FALSE], 1, function(xx)names(which.max(table(xx)))[1])

  .rowMeansGroups <- function(x, groups) {
    na <- !is.na(x)
    mode(na) <- "numeric"
    na <- t(rowsum(t(na), groups, na.rm=TRUE))
    rs <- t(rowsum(t(x), groups, na.rm=TRUE))
    rs/na
  }

  m <- .rowMeansGroups(exprs(x), mapping)
  m[, 1] <- m[,1]/m[,2]
  colnames(m)[1] <- "A/B"
  m <- log2(m)

  ## misc colour for species different from Humans, Yeast or Ecoli
  scol <- col[species]
  scol[!species %in% names(col)] <- col["misc"]
  scol <- paste0(scol, as.hexmode(round(alpha)))

  ## misc pch for species different from Humans, Yeast or Ecoli
  spch <- pch[species]
  spch[!species %in% names(pch)] <- pch["misc"]

  plot(NA, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, ...)
  abline(h=log2(ratios), col=col[names(ratios)], lty=2)
  points(m[,2], m[,1], col=scol, pch=spch, cex=pcex)
  legend("topright", legend=names(col), col=col, pch=pch, cex=pcex, bty="n")

  abline(v=log2(st), lty=2, col="#808080")
  text(x=log2(st), y=-4,
       labels=paste0("saturation threshold = ", prettyNum(st)),
       col="#808080", cex=pcex, pos=ifelse(mean(xlim) < log2(st), 2, 4))

  if (!is.na(f) && !is.null(f)) {
    for (nm in names(col)[names(col) != "misc"]) {
      s <- m[species == nm, 2:1]
      s <- s[rowSums(is.na(s) | is.infinite(s)) == 0L,]
      lines(lowess(s, f=f), col=col[nm], lty=6, lwd=2)
    }
  }
}

msnSetSum <- requantify(combMSnSet, method="sum", saturationThreshold=Inf, onlyCommonIsotopes=FALSE)
msnSetRef <- requantify(combMSnSet, method="reference", saturationThreshold=Inf)
msnSetTh <- requantify(combMSnSet, method="th.mean", saturationThreshold=Inf, requantifyAll=FALSE)

xl <- c(10, 22)
pcex <- 0.5

pdf("compare.pdf", width=12, height=7, paper="a4r")
par(mfrow=c(2, 2))
plotProteomes(combMSnSet, main="none", xlim=xl, pcex=pcex)
plotProteomes(msnSetSum, main="sum", xlim=xl, pcex=pcex)
plotProteomes(msnSetRef, main="reference", xlim=xl, pcex=pcex)
plotProteomes(msnSetTh, main="th.mean", xlim=xl, pcex=pcex)
par(mfrow=c(1, 1))
dev.off()
	library("synapter")
	library("MSnbase")
	load_all()

	combMSnSet <- readRDS("refCombMSNSet.RDS")

	plotProteomes <- function(x,
	xlim=log2(range(exprs(x), na.rm=TRUE)),
	ylim=c(-4, 4),
	xlab=expression(log[2](B)),
	ylab=expression(log[2](A:B)),
	mapping=factor(rep(1:2, each=5), labels=c("A", "B")),
	ratios=c("Homo sapiens"=1,
	"Saccharomyces cerevisiae"=2,
	"Escherichia coli"=1/4,
	"misc"=NA),
	col=c("Homo sapiens"="#1b9e77",
	"Saccharomyces cerevisiae"="#d95f02",
	"Escherichia coli"="#7570b3",
	"misc"="#e6ab02"),
	pch=c("Homo sapiens"=20,
	"Saccharomyces cerevisiae"=20,
	"Escherichia coli"=20,
	"misc"=18),
	pcex=0.75,
	alpha=255/2,
	f=2/3, ## lowess span
	st=Inf, ## saturation threshold
	...){

	## fetch protein description
	pd <- as.matrix(fData(x)[, grep("protein\\.Description", fvarLabels(x))])

	## fetch organism name
	sp <- gsub(".OS=([A-z]+ [A-z]+).", "\\1", pd)

	## a few proteins are not identified (and contain NA)
	## or are identified as different species
	## we choose the "mean" species
	species <- sp[,1]
	## misidentified?
	mi <- rowSums(sp[,1] != sp)
	mi <- is.na(mi) \| mi > 0
	species[mi] <- apply(sp[mi,, drop=FALSE], 1, function(xx)names(which.max(table(xx)))[1])

	.rowMeansGroups <- function(x, groups) {
	na <- !is.na(x)
	mode(na) <- "numeric"
	na <- t(rowsum(t(na), groups, na.rm=TRUE))
	rs <- t(rowsum(t(x), groups, na.rm=TRUE))
	rs/na
	}

	m <- .rowMeansGroups(exprs(x), mapping)
	m[, 1] <- m[,1]/m[,2]
	colnames(m)[1] <- "A/B"
	m <- log2(m)

	## misc colour for species different from Humans, Yeast or Ecoli
	scol <- col[species]
	scol[!species %in% names(col)] <- col["misc"]
	scol <- paste0(scol, as.hexmode(round(alpha)))

	## misc pch for species different from Humans, Yeast or Ecoli
	spch <- pch[species]
	spch[!species %in% names(pch)] <- pch["misc"]

	plot(NA, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, ...)
	abline(h=log2(ratios), col=col[names(ratios)], lty=2)
	points(m[,2], m[,1], col=scol, pch=spch, cex=pcex)
	legend("topright", legend=names(col), col=col, pch=pch, cex=pcex, bty="n")

	abline(v=log2(st), lty=2, col="#808080")
	text(x=log2(st), y=-4,
	labels=paste0("saturation threshold = ", prettyNum(st)),
	col="#808080", cex=pcex, pos=ifelse(mean(xlim) < log2(st), 2, 4))

	if (!is.na(f) && !is.null(f)) {
	for (nm in names(col)[names(col) != "misc"]) {
	s <- m[species == nm, 2:1]
	s <- s[rowSums(is.na(s) \| is.infinite(s)) == 0L,]
	lines(lowess(s, f=f), col=col[nm], lty=6, lwd=2)
	}
	}
	}

	msnSetSum <- requantify(combMSnSet, method="sum", saturationThreshold=Inf, onlyCommonIsotopes=FALSE)
	msnSetRef <- requantify(combMSnSet, method="reference", saturationThreshold=Inf)
	msnSetTh <- requantify(combMSnSet, method="th.mean", saturationThreshold=Inf, requantifyAll=FALSE)

	xl <- c(10, 22)
	pcex <- 0.5

	pdf("compare.pdf", width=12, height=7, paper="a4r")
	par(mfrow=c(2, 2))
	plotProteomes(combMSnSet, main="none", xlim=xl, pcex=pcex)
	plotProteomes(msnSetSum, main="sum", xlim=xl, pcex=pcex)
	plotProteomes(msnSetRef, main="reference", xlim=xl, pcex=pcex)
	plotProteomes(msnSetTh, main="th.mean", xlim=xl, pcex=pcex)
	par(mfrow=c(1, 1))
	dev.off()