Created
February 13, 2020 00:49
-
-
Save btupper/98119fddcb2ed298bf3638673c303984 to your computer and use it in GitHub Desktop.
Read a NanoSIMS file (possibly corrupted header line)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Read a NanoSIMS file (possibly corrupted header line) | |
# into a data frame (tibble). Possibly overwrite the orginal with the | |
# correct header line. | |
# | |
# Requires [readr](https://CRAN.R-project.org/package=readr) package | |
# | |
# Usage within R session: | |
# > source("/path/to/nanosims.R") | |
# # make resave FALSE to preserve the orginal file | |
# > x <- read_nanosims("/path/to/nanosims/file.csv", resave = TRUE) | |
# # process a directory filled with NanoSIMS .csv files | |
# > x <- read_nanosims("/path/to/nanosims/directory", resave = TRUE) | |
# | |
# Usage from shell prompt | |
# $ Rscript /path/to/nanosims.R /path/to/nanosims/file.csv --resave TRUE | |
# or | |
# $ Rscript /path/to/nanosims.R /path/to/nanosims/directory --resave TRUE | |
library(readr) | |
#' Replace text in a character vector | |
#' | |
#' @param x character, a vector of strings possibly witht he pattern to replace | |
#' @param pat list of one or more 2 element character patterns stored in a | |
#' character vector ala c(find_pattern, replace_pattern) | |
#' @param ... further arguments for gsub | |
#' @return character vector of same length as input with pattern replaced as specified. | |
replace_text <- function(x, | |
pat = list(c("\nmean", "mean"), | |
c("\nstddev", "stddev") ), | |
...){ | |
for (i in seq_along(pat)){ | |
x <- gsub(pat[[i]][1], pat[[i]][2], x, ...) | |
} | |
return(x) | |
} | |
#' Read a NanoSIMS file as a text stream with replacement of specified patterns. | |
#' | |
#' @param x character, the name (with path) of the file or a directory of | |
#' NanoSIMS CSV files | |
#' @param ... arguments for \code{replace_text} | |
#' @param resave logical, if TRUE resave the file (overwrites original) | |
#' @param data.frame or a list of data frames if x is a directory with one | |
#' element (a data frame) per file found | |
read_nanosims <- function(x, ..., resave = FALSE){ | |
fi <- file.info(x[1]) | |
if (fi$isdir[1]){ | |
ff <- list.files(x[1], pattern = glob2rx("*.csv"), full.names = TRUE) | |
r <- lapply(ff, read_nanosims, ..., resave = resave) | |
} else { | |
stopifnot(file.exists(x[1])) | |
s <- readChar(x[1], file.info(x[1])$size) | |
s <- replace_text(s, ...) | |
if (resave) writeLines(s, con = x[1], sep = "") | |
r <- readr::read_csv(s) | |
} | |
r | |
} | |
# This only runs if called as a script from the command line | |
if (!interactive()){ | |
arg <- commandArgs(trailingOnly = TRUE) | |
# caller might have provded no arguments - print help | |
# Rscript nanosims.R | |
# | |
# caller might have provided one argument ala | |
# Rscript nanosims.R filename | |
# or two | |
# Rscript nanosims.R filename --resave TRUE | |
# or | |
# Rscript nanosims.R --resave TRUE filename | |
resave <- FALSE | |
i <- 1 | |
if (length(arg) == 0){ | |
cat("Usage: Rscript /path/to/nanosims.R /path/to/file/or/directory [--resave TRUE|FALSE]\n") | |
quit(save = "no", status = 0) | |
} | |
if (length(arg) > 1){ | |
ix <- grep("-resave", arg, fixed = TRUE) | |
if (length(ix) >= 1){ | |
resave <- tolower(arg[ix+1]) %in% c("true", "yes", "1") | |
if (ix[1] == 1){ | |
i <- 3 | |
} | |
} | |
} | |
x <- try(read_nanosims(arg[i], resave = resave)) | |
ok <- inherits(x, "try-error") | |
quit(save = "no", status = as.numeric(ok)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment