meowcat/insertExtraData.R

## insertExtraData.R
library(tidyverse)

#' Add arbitrary data to record from an infolist column
#'
#' Normal behaviour of RMassBank is that only specific columns from infolist.csv
#' are added to records. Any extra data is discarded.
#'
#' This function provides a possibility to add or overwrite data in a record
#' in a per-compound manner (as opposed to a global value specified in the
#' RMassBank settings). For example, the user may overwrite the retention time
#' or add a CCS value.
#'
#' Note that tags are not reordered and new tags are added after all existing tags.
#' Therefore, only use this for pre-existing tags or optional tags (that don't have
#' a prespecified order).
#'
#'
#' @param mb `mbWorkspace` after step 4 of mbWorkflow (i.e. after record compilation)
#' @param infolist a CSV infolist with extra columns. These need to be named
#'  according to the target tag. I.e. for a tag `AC$CHROMATOGRAPHY: RETENTION_TIME`,
#'  the column must be called `AC$CHROMATOGRAPHY.RETENTION_TIME` (it can be obtained
#'  with `make.names("AC$CHROMATOGRAPHY: RETENTION_TIME")`.
#' @param column The column to be inserted. Note that an infolist may have multiple
#'  extra columns. Each one needs to be added separately.
#'
#' @return
#' The modified `mbWorkspace`
#' @export
#'
#' @examples
#' \dontrun{
#'
#' mb <- newMbWorkspace(w)
#' mb <- resetInfolists(mb)
#' mb <- loadInfolists(mb, "../infolists") #FOLDER
#' mb <- mbWorkflow(mb, steps = c(1:4))
#' # Currently only inserting from single infolists is supported (not from an entire
#' # folder of infolists)
#' mb <- insertExtraData(mb, "Infolist_PNEG_20eV_modified.csv", "AC$CHROMATOGRAPHY.RETENTION_TIME")
#' mb <- insertExtraData(mb, "Infolist_PNEG_20eV_modified.csv", "AC$CHROMATOGRAPHY.CCS")
#' mb <- mbWorkflow(mb, steps = c(5:8))
#' }
#'
insertExtraData <- function(mb, infolist, column) {
  infolist_ <- read.csv(infolist)
  mb@compiled_ok <- mb@compiled_ok %>%
    map(function(cpd) {

      infolist_row <- infolist_ %>%
        filter(id == as.numeric(cpd@id)) %>%
        as.list()
      cpd@children <- cpd@children %>%
        as.list() %>%
        map(function(sp) {

          target <- str_split(column, fixed("."))
          source <- str_replace_all(column, fixed("$"), ".")


          # Note: We don't take care of reordering here,
          # so use at your own peril
          if(length(target) == 2) {
            sp@info[[target[[1]]]][[target[[2]]]] <-
              infolist_row[[source]]
          } else if(length(target) == 1) {
            sp@info[[target[[1]]]] <-
              infolist_row[[source]]
          }
          sp
        }) %>%
        as("SimpleList")
      cpd
    })
  mb
}
	library(tidyverse)

	#' Add arbitrary data to record from an infolist column
	#'
	#' Normal behaviour of RMassBank is that only specific columns from infolist.csv
	#' are added to records. Any extra data is discarded.
	#'
	#' This function provides a possibility to add or overwrite data in a record
	#' in a per-compound manner (as opposed to a global value specified in the
	#' RMassBank settings). For example, the user may overwrite the retention time
	#' or add a CCS value.
	#'
	#' Note that tags are not reordered and new tags are added after all existing tags.
	#' Therefore, only use this for pre-existing tags or optional tags (that don't have
	#' a prespecified order).
	#'
	#'
	#' @param mb `mbWorkspace` after step 4 of mbWorkflow (i.e. after record compilation)
	#' @param infolist a CSV infolist with extra columns. These need to be named
	#' according to the target tag. I.e. for a tag `AC$CHROMATOGRAPHY: RETENTION_TIME`,
	#' the column must be called `AC$CHROMATOGRAPHY.RETENTION_TIME` (it can be obtained
	#' with `make.names("AC$CHROMATOGRAPHY: RETENTION_TIME")`.
	#' @param column The column to be inserted. Note that an infolist may have multiple
	#' extra columns. Each one needs to be added separately.
	#'
	#' @return
	#' The modified `mbWorkspace`
	#' @export
	#'
	#' @examples
	#' \dontrun{
	#'
	#' mb <- newMbWorkspace(w)
	#' mb <- resetInfolists(mb)
	#' mb <- loadInfolists(mb, "../infolists") #FOLDER
	#' mb <- mbWorkflow(mb, steps = c(1:4))
	#' # Currently only inserting from single infolists is supported (not from an entire
	#' # folder of infolists)
	#' mb <- insertExtraData(mb, "Infolist_PNEG_20eV_modified.csv", "AC$CHROMATOGRAPHY.RETENTION_TIME")
	#' mb <- insertExtraData(mb, "Infolist_PNEG_20eV_modified.csv", "AC$CHROMATOGRAPHY.CCS")
	#' mb <- mbWorkflow(mb, steps = c(5:8))
	#' }
	#'
	insertExtraData <- function(mb, infolist, column) {
	infolist_ <- read.csv(infolist)
	mb@compiled_ok <- mb@compiled_ok %>%
	map(function(cpd) {

	infolist_row <- infolist_ %>%
	filter(id == as.numeric(cpd@id)) %>%
	as.list()
	cpd@children <- cpd@children %>%
	as.list() %>%
	map(function(sp) {

	target <- str_split(column, fixed("."))
	source <- str_replace_all(column, fixed("$"), ".")


	# Note: We don't take care of reordering here,
	# so use at your own peril
	if(length(target) == 2) {
	sp@info[[target[[1]]]][[target[[2]]]] <-
	infolist_row[[source]]
	} else if(length(target) == 1) {
	sp@info[[target[[1]]]] <-
	infolist_row[[source]]
	}
	sp
	}) %>%
	as("SimpleList")
	cpd
	})
	mb
	}