Skip to content

Instantly share code, notes, and snippets.

@mwalzer
Last active March 13, 2017 20:52
Show Gist options
  • Save mwalzer/427e14807ef96c9d5bbedaef63dea1ab to your computer and use it in GitHub Desktop.
Save mwalzer/427e14807ef96c9d5bbedaef63dea1ab to your computer and use it in GitHub Desktop.
OpenMS_TextExporter2MSstat
library(ggplot2)
library(dplyr)
library(tidyr)
msstats<- peptides.lfq.tutorial
# Assume cf_charge as PrecursorCharge for all features in one row of TextExporter output
msstats <- msstats %>% rename(PrecursorCharge=charge_cf)
# Drop useless cols
msstats <- msstats %>% select(-matches("_cf")) %>%
select(-matches("width")) %>% select(-matches("mz")) %>% select(-matches("rt")) %>%
select(-matches("rank")) %>% select(-matches("score")) %>%
select(-matches("peptide_charge")) %>% select(-matches("score_type")) %>%
select(-matches("search_identifier"))
#convert 0 observation runs to NA (all 0 in _n), take charge from _cf
msstats <- msstats %>% unite(PeptideSequence,aa_before,sequence,aa_after,sep='.') %>%
rename(ProteinName=accessions)
msstats$obs <- 1:nrow(msstats) # to keep track of duplicate observations
msstats <- msstats %>% gather(key, value, -PeptideSequence, -ProteinName, -obs, -PrecursorCharge) %>% # set key, value, cols to gather
separate(key, c("variable", "Condition"), -2) %>% # split key into var & type
spread(variable, value, convert = TRUE) %>% # name & inc reversed??
select(-matches("charge_"))
msstats <- msstats %>% rename(Intensity=intensity_) #%>% rename(PrecursorCharge=charge_)
msstats <- msstats %>% select(-obs) # discard differentiation between duplicate observations
msstats <- msstats %>% mutate(Intensity=ifelse(PrecursorCharge>0,Intensity,NA))
msstats$FragmentIon <- NA
msstats$ProductCharge <- NA
msstats$IsotopeLabelType <- 'L' # L in case of no labeling
msstats$BioReplicate <- 1
#no technical replicate: 'Run' is combination of Condition and BioReplicate. (Concatenation by '_')
#n>0 technical replicates, 'Run' is combination of Condition, BioReplicate and technical replicates.
#latter this must come from mztab?!
#This one is for a simple LFQ approach
msstats$Run <- paste(paste(msstats$Condition,'_',sep=""),msstats$BioReplicate,sep="")
write.table(msstats, file='msstats_from_lfq_tut.tsv', quote=FALSE, sep='\t', col.names = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment