Last active
March 13, 2017 20:52
-
-
Save mwalzer/427e14807ef96c9d5bbedaef63dea1ab to your computer and use it in GitHub Desktop.
OpenMS_TextExporter2MSstat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(dplyr) | |
library(tidyr) | |
msstats<- peptides.lfq.tutorial | |
# Assume cf_charge as PrecursorCharge for all features in one row of TextExporter output | |
msstats <- msstats %>% rename(PrecursorCharge=charge_cf) | |
# Drop useless cols | |
msstats <- msstats %>% select(-matches("_cf")) %>% | |
select(-matches("width")) %>% select(-matches("mz")) %>% select(-matches("rt")) %>% | |
select(-matches("rank")) %>% select(-matches("score")) %>% | |
select(-matches("peptide_charge")) %>% select(-matches("score_type")) %>% | |
select(-matches("search_identifier")) | |
#convert 0 observation runs to NA (all 0 in _n), take charge from _cf | |
msstats <- msstats %>% unite(PeptideSequence,aa_before,sequence,aa_after,sep='.') %>% | |
rename(ProteinName=accessions) | |
msstats$obs <- 1:nrow(msstats) # to keep track of duplicate observations | |
msstats <- msstats %>% gather(key, value, -PeptideSequence, -ProteinName, -obs, -PrecursorCharge) %>% # set key, value, cols to gather | |
separate(key, c("variable", "Condition"), -2) %>% # split key into var & type | |
spread(variable, value, convert = TRUE) %>% # name & inc reversed?? | |
select(-matches("charge_")) | |
msstats <- msstats %>% rename(Intensity=intensity_) #%>% rename(PrecursorCharge=charge_) | |
msstats <- msstats %>% select(-obs) # discard differentiation between duplicate observations | |
msstats <- msstats %>% mutate(Intensity=ifelse(PrecursorCharge>0,Intensity,NA)) | |
msstats$FragmentIon <- NA | |
msstats$ProductCharge <- NA | |
msstats$IsotopeLabelType <- 'L' # L in case of no labeling | |
msstats$BioReplicate <- 1 | |
#no technical replicate: 'Run' is combination of Condition and BioReplicate. (Concatenation by '_') | |
#n>0 technical replicates, 'Run' is combination of Condition, BioReplicate and technical replicates. | |
#latter this must come from mztab?! | |
#This one is for a simple LFQ approach | |
msstats$Run <- paste(paste(msstats$Condition,'_',sep=""),msstats$BioReplicate,sep="") | |
write.table(msstats, file='msstats_from_lfq_tut.tsv', quote=FALSE, sep='\t', col.names = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment