Skip to content

Instantly share code, notes, and snippets.

@btupper
Created February 13, 2020 00:49
Show Gist options
  • Save btupper/98119fddcb2ed298bf3638673c303984 to your computer and use it in GitHub Desktop.
Save btupper/98119fddcb2ed298bf3638673c303984 to your computer and use it in GitHub Desktop.
Read a NanoSIMS file (possibly corrupted header line)
# Read a NanoSIMS file (possibly corrupted header line)
# into a data frame (tibble). Possibly overwrite the orginal with the
# correct header line.
#
# Requires [readr](https://CRAN.R-project.org/package=readr) package
#
# Usage within R session:
# > source("/path/to/nanosims.R")
# # make resave FALSE to preserve the orginal file
# > x <- read_nanosims("/path/to/nanosims/file.csv", resave = TRUE)
# # process a directory filled with NanoSIMS .csv files
# > x <- read_nanosims("/path/to/nanosims/directory", resave = TRUE)
#
# Usage from shell prompt
# $ Rscript /path/to/nanosims.R /path/to/nanosims/file.csv --resave TRUE
# or
# $ Rscript /path/to/nanosims.R /path/to/nanosims/directory --resave TRUE
library(readr)
#' Replace text in a character vector
#'
#' @param x character, a vector of strings possibly witht he pattern to replace
#' @param pat list of one or more 2 element character patterns stored in a
#' character vector ala c(find_pattern, replace_pattern)
#' @param ... further arguments for gsub
#' @return character vector of same length as input with pattern replaced as specified.
replace_text <- function(x,
pat = list(c("\nmean", "mean"),
c("\nstddev", "stddev") ),
...){
for (i in seq_along(pat)){
x <- gsub(pat[[i]][1], pat[[i]][2], x, ...)
}
return(x)
}
#' Read a NanoSIMS file as a text stream with replacement of specified patterns.
#'
#' @param x character, the name (with path) of the file or a directory of
#' NanoSIMS CSV files
#' @param ... arguments for \code{replace_text}
#' @param resave logical, if TRUE resave the file (overwrites original)
#' @param data.frame or a list of data frames if x is a directory with one
#' element (a data frame) per file found
read_nanosims <- function(x, ..., resave = FALSE){
fi <- file.info(x[1])
if (fi$isdir[1]){
ff <- list.files(x[1], pattern = glob2rx("*.csv"), full.names = TRUE)
r <- lapply(ff, read_nanosims, ..., resave = resave)
} else {
stopifnot(file.exists(x[1]))
s <- readChar(x[1], file.info(x[1])$size)
s <- replace_text(s, ...)
if (resave) writeLines(s, con = x[1], sep = "")
r <- readr::read_csv(s)
}
r
}
# This only runs if called as a script from the command line
if (!interactive()){
arg <- commandArgs(trailingOnly = TRUE)
# caller might have provded no arguments - print help
# Rscript nanosims.R
#
# caller might have provided one argument ala
# Rscript nanosims.R filename
# or two
# Rscript nanosims.R filename --resave TRUE
# or
# Rscript nanosims.R --resave TRUE filename
resave <- FALSE
i <- 1
if (length(arg) == 0){
cat("Usage: Rscript /path/to/nanosims.R /path/to/file/or/directory [--resave TRUE|FALSE]\n")
quit(save = "no", status = 0)
}
if (length(arg) > 1){
ix <- grep("-resave", arg, fixed = TRUE)
if (length(ix) >= 1){
resave <- tolower(arg[ix+1]) %in% c("true", "yes", "1")
if (ix[1] == 1){
i <- 3
}
}
}
x <- try(read_nanosims(arg[i], resave = resave))
ok <- inherits(x, "try-error")
quit(save = "no", status = as.numeric(ok))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment