Created
November 24, 2021 17:46
-
-
Save meowcat/42e4660e28e024d05ad5e90e4da5aab2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
#' Add arbitrary data to record from an infolist column | |
#' | |
#' Normal behaviour of RMassBank is that only specific columns from infolist.csv | |
#' are added to records. Any extra data is discarded. | |
#' | |
#' This function provides a possibility to add or overwrite data in a record | |
#' in a per-compound manner (as opposed to a global value specified in the | |
#' RMassBank settings). For example, the user may overwrite the retention time | |
#' or add a CCS value. | |
#' | |
#' Note that tags are not reordered and new tags are added after all existing tags. | |
#' Therefore, only use this for pre-existing tags or optional tags (that don't have | |
#' a prespecified order). | |
#' | |
#' | |
#' @param mb `mbWorkspace` after step 4 of mbWorkflow (i.e. after record compilation) | |
#' @param infolist a CSV infolist with extra columns. These need to be named | |
#' according to the target tag. I.e. for a tag `AC$CHROMATOGRAPHY: RETENTION_TIME`, | |
#' the column must be called `AC$CHROMATOGRAPHY.RETENTION_TIME` (it can be obtained | |
#' with `make.names("AC$CHROMATOGRAPHY: RETENTION_TIME")`. | |
#' @param column The column to be inserted. Note that an infolist may have multiple | |
#' extra columns. Each one needs to be added separately. | |
#' | |
#' @return | |
#' The modified `mbWorkspace` | |
#' @export | |
#' | |
#' @examples | |
#' \dontrun{ | |
#' | |
#' mb <- newMbWorkspace(w) | |
#' mb <- resetInfolists(mb) | |
#' mb <- loadInfolists(mb, "../infolists") #FOLDER | |
#' mb <- mbWorkflow(mb, steps = c(1:4)) | |
#' # Currently only inserting from single infolists is supported (not from an entire | |
#' # folder of infolists) | |
#' mb <- insertExtraData(mb, "Infolist_PNEG_20eV_modified.csv", "AC$CHROMATOGRAPHY.RETENTION_TIME") | |
#' mb <- insertExtraData(mb, "Infolist_PNEG_20eV_modified.csv", "AC$CHROMATOGRAPHY.CCS") | |
#' mb <- mbWorkflow(mb, steps = c(5:8)) | |
#' } | |
#' | |
insertExtraData <- function(mb, infolist, column) { | |
infolist_ <- read.csv(infolist) | |
mb@compiled_ok <- mb@compiled_ok %>% | |
map(function(cpd) { | |
infolist_row <- infolist_ %>% | |
filter(id == as.numeric(cpd@id)) %>% | |
as.list() | |
cpd@children <- cpd@children %>% | |
as.list() %>% | |
map(function(sp) { | |
target <- str_split(column, fixed(".")) | |
source <- str_replace_all(column, fixed("$"), ".") | |
# Note: We don't take care of reordering here, | |
# so use at your own peril | |
if(length(target) == 2) { | |
sp@info[[target[[1]]]][[target[[2]]]] <- | |
infolist_row[[source]] | |
} else if(length(target) == 1) { | |
sp@info[[target[[1]]]] <- | |
infolist_row[[source]] | |
} | |
sp | |
}) %>% | |
as("SimpleList") | |
cpd | |
}) | |
mb | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment