Skip to content

Instantly share code, notes, and snippets.

@sgibb
Created February 28, 2014 23:00
Show Gist options
  • Save sgibb/9281811 to your computer and use it in GitHub Desktop.
Save sgibb/9281811 to your computer and use it in GitHub Desktop.
MWE to demonstrate the speed improvement of findMSeEMRTs2
library("synapter")
library("synapterdata")
## map some private functions to global NAMESPACE
doHDMSePredictions <- synapter:::doHDMSePredictions
error.ppm <- synapter:::error.ppm
findMSeEMRTs <- synapter:::findMSeEMRTs
###########################################################################
## some boring stuff to prepare the Synapter object
###########################################################################
hdmsefile <- getHDMSeFinalPeptide()[2]
msefile <- getMSeFinalPeptide()[2]
msepep3dfile <- getMSePep3D()[2]
fas <- getFasta()
input <- list(identpeptide = hdmsefile,
quantpeptide = msefile,
quantpep3d = msepep3dfile,
fasta = fas)
s <- Synapter(input)
## apply default filtering
filterUniqueDbPeptides(s, verbose=TRUE)
fdr <- fpr <- 0.05
ppm <- 20
filterIdentPepScore(s, method="BH", fdr=fdr)
filterQuantPepScore(s, method="BH", fdr=fdr)
filterIdentProtFpr(s, fpr=fpr)
filterQuantProtFpr(s, fpr=fpr)
filterIdentPpmError(s, ppm=ppm)
filterQuantPpmError(s, ppm=ppm)
mergePeptides(s)
modelRt(s, span=0.05)
## prepare some values
grid.ppm.from <- 5
grid.ppm.to <- 20
grid.ppm.by <- 2
grid.nsd.from <- 0.5
grid.nsd.to <- 5.0
grid.nsd.by <- 0.5
grid.subset <- 1
ppms <- seq(grid.ppm.from, grid.ppm.to, by=grid.ppm.by)
nsds <- seq(grid.nsd.from, grid.nsd.to, by=grid.nsd.by)
###########################################################################
## END OF preprocessing
###########################################################################
findMSeEMRTs2 <- function(identpep, pep3d, mergedpep, nsd,
ppmthreshold,
model,
mergedEMRTs) {
hdmseData <- doHDMSePredictions(identpep, model, nsd)
## sanity checking - v 0.4.6
stopifnot(all(hdmseData$lower <= hdmseData$upper))
stopifnot(length(hdmseData$lower) == length(hdmseData$upper))
n <- length(hdmseData$lower)
###########################################################################
## new part
###########################################################################
sortedPep3d <- pep3d
## add additional index row to avoid ugly calculation for subindices in the
## apply call
sortedPep3d$idx <- 1:nrow(pep3d)
sortedPep3d <- sortedPep3d[order(pep3d$rt_min),]
lowerIdx <- findInterval(hdmseData$lower, sortedPep3d$rt_min)+1
upperIdx <- findInterval(hdmseData$upper, sortedPep3d$rt_min)
## just to be sure
lowerIdx <- pmin(lowerIdx, upperIdx)
## create matrix with boundaries (col 1 and 2) and the rownumbers
idxs <- cbind(lower=lowerIdx, upper=upperIdx, r=1:n)
res <- apply(idxs, 1, function(i) {
selPpm <- which((abs(error.ppm(obs = sortedPep3d$mwHPlus[i[1]:i[2]] ,
theo = hdmseData$mass[i[3]])) < ppmthreshold))
sortedPep3d$idx[i[1]:i[2]][selPpm]
})
k <- sapply(res, length)
## Those that match *1* spectumIDs will be transferred
## BUT there is no guarantee that with *1* unique match,
## we get the correct one, even for those that were
## part of the matched high confidence ident+quant
## identified subset!
## #############################################################
## n <- length(k) ## already have n
m <- ncol(pep3d)
## to initialise the new pep3d2 with with n rows
## and same nb of columns than pep3d
pep3d2 <- matrix(NA, nrow=n, ncol=m, dimnames=list(c(), colnames(pep3d)))
pep3d2[, 1] <- k
k1 <- which(k == 1)
pep3d2[k1, ] <- unlist(pep3d[unlist(res[k1]), ])
###########################################################################
## END OF new part
###########################################################################
## #############################################################
ans <- cbind(identpep, pep3d2)
ans$matched.quant.spectrumIDs <- sapply(res, paste0, collapse = ",")
ans$precursor.leID.quant <- NA
idx <- match(mergedpep$precursor.leID.ident, ans$precursor.leID)
ans$precursor.leID.quant[idx] <- mergedpep$precursor.leID.quant
i <- grep("precursor.leID$", names(ans))
names(ans)[i] <- "precursor.leID.ident" ## to avoid any confusion
ans$idSource <- "transfer"
if (mergedEMRTs == "rescue") {
## these are those that were in the merged data set but that
## did NOT get transferred because they did NOT uniquely matched
## a pep3D EMRT
lost <- ans$Function != 1 & ans$precursor.leID.ident %in% mergedpep$precursor.leID.ident
## rescue these by adding their quant straight from QuantPeptideData
lostids <- ans$precursor.leID.ident[lost]
ans[lost, "Counts"] <-
mergedpep[match(lostids, mergedpep$precursor.leID.ident), "precursor.inten.quant"]
ans[lost, "idSource"] <- "rescue"
} else if (mergedEMRTs == "copy") {
allmerged <- ans$precursor.leID.ident %in% mergedpep$precursor.leID.ident
allmergedids <- ans$precursor.leID.ident[allmerged]
ans[allmerged, "Counts"] <-
mergedpep[match(allmergedids, mergedpep$precursor.leID.ident), "precursor.inten.quant"]
ans[allmerged, "idSource"] <- "copy"
} ## else if (fromQuant == "transfer") keep as is
## since v 0.5.0 - removing multiply matched EMRTs
## dupIDs <- ans$spectrumID[ans$Function == 1 & duplicated(ans$spectrumID)]
## dupRows <- ans$spectrumID %in% dupIDs
## ans[dupRows, (ncol(identpep)+1) : ncol(ans)] <- -1
return(ans)
}
system.time(
fm1 <- findMSeEMRTs(s$IdentPeptideData, s$QuantPep3DData,
s$MergedFeatures[TRUE, ], ppms[1], nsds[1],
s$RtModel, mergedEMRTs="transfer"))
# user system elapsed
# 31.018 0.464 31.876
system.time(
fm2 <- findMSeEMRTs2(s$IdentPeptideData, s$QuantPep3DData,
s$MergedFeatures[TRUE, ], ppms[1], nsds[1],
s$RtModel, mergedEMRTs="transfer"))
# user system elapsed
# 0.440 0.008 0.456
all.equal(fm1, fm2)
# TRUE
system.time(
fm3 <- findMSeEMRTs(s$IdentPeptideData, s$QuantPep3DData,
s$MergedFeatures[TRUE, ], ppms[8], nsds[10],
s$RtModel, mergedEMRTs="transfer"))
# user system elapsed
# 38.674 0.124 38.914
system.time(
fm4 <- findMSeEMRTs2(s$IdentPeptideData, s$QuantPep3DData,
s$MergedFeatures[TRUE, ], ppms[8], nsds[10],
s$RtModel, mergedEMRTs="transfer"))
# user system elapsed
# 0.900 0.012 0.925
all.equal(fm3, fm4)
# TRUE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment