Skip to content

Instantly share code, notes, and snippets.

@tonyelhabr
Last active October 10, 2019 12:49
Show Gist options
  • Save tonyelhabr/3c374264886efd27c0dd0aa769e46553 to your computer and use it in GitHub Desktop.
Save tonyelhabr/3c374264886efd27c0dd0aa769e46553 to your computer and use it in GitHub Desktop.
R functions for files.
# NOTE: Most of these are from my {vp2} project, which builds upon other past work.
file_path <- function(...) {
file.path(..., fsep = '/')
}
# Reference: https://github.com/tonyelhabr/nba-rapm/blob/master/R/files.R
.create_dir_ifnecessary <-
function(dir, ...) {
if(!dir.exists(dir)) {
invisible(dir.create(dir, recursive = TRUE))
.display_info(
glue::glue('Created {crayon::yellow(dir)} at {Sys.time()}.'),
...
)
}
invisible(dir)
}
.get_choose_basename_safely <- function(basename = NULL, f = NULL, ...) {
if(!is.null(basename)) {
return(basename)
}
f(...)
}
.get_basename_safely <- function(file = NULL, ext = NULL, basename = NULL) {
if(is.null(basename)) {
stopifnot(!is.null(file))
stopifnot(!is.null(ext))
basename <- paste0(file, '.', ext)
}
basename
}
# Reference: https://stackoverflow.com/questions/20038732/how-to-check-a-file-is-opened-or-closed-in-r
is_file_open <- function(path) {
suppressWarnings(
'try-error' %in% class(
try(file(path, open = 'w'),
silent = TRUE
)
)
)
}
# Reference: {teproj} package.
.get_path_safely <-
function(dir = NULL,
file = NULL,
ext = NULL,
basename = NULL,
path = NULL) {
if (is.null(path)) {
.get_basename_safely(file = file, ext = ext, basename = basename)
if(is.null(dir)) {
dir <- getwd()
}
path <- file.path(dir, basename)
}
# path <- normalize_path(path)
path
}
# Add `verbose`, etc. (i.e. `backup`) to this(?).
.import_data <-
function(..., path) {
ext <- tools::file_ext(path)
if(ext == 'csv') {
# Set `verbose = FALSE` always to suppress verBose `data.table::fread()` messages.
# Also, note that both `data.table::fread()` and `rio::import()` set `call. = FALSE` in `stop()`
# if the path does not exist.
# res <- data.table::fread(file = path, sep = ',', verbose = FALSE)
# readr::read_csv(file = path, ...)
# res <- rio::import(..., verbose = FALSE)
res <- rio::import(file = path, verbose = FALSE)
# } else if(str_detect(ext, '^[R|r]') {
} else if(ext == 'Rda') {
load(path)
return(invisible(NULL))
} else {
# Is this necessary ? Can't it be captured by a regular expression for file
# extensions beginning with [R|r]?
res <- rio::import(file = path)
}
# This is an early-exit for model-like data.
if(!any('data.frame' == class(res))) {
return(res)
}
res %>%
tibble::as_tibble() %>%
janitor::clean_names()
}
.import_data_from_path <-
function(...,
path,
# `import` is only included here in order to be analogous with `export`
# for `.export_*()`. In reality, `skip` is used before this function
# is ever called, so `import` is irrelevant.
import = TRUE,
# This `.return_type` argument was created spcifically for the `.try_import*nbastatr()`
# family of functions, which depends on NOT throwing an error if the file does not exist.
.return_type = c('error', 'warning')) {
if(!import) {
return(invisible(NULL))
}
# dots <- list(...)
# browser()
if(!file.exists(path)) {
.return_type <- match.arg(.return_type)
f_display <-
switch(.return_type, error = .display_error, warning = .display_warning)
f_display(
glue::glue('No file at {crayon::yellow(path)} exists.'),
...
)
return(invisible(NULL))
}
data <- .import_data(..., path = path)
# TODO: Maybe limit this message to only times less than 1 hour/day?
# path_info <- fs::file_info(path)
# # diff_time0 <- lubridate::as.difftime(Sys.time() - path_info$modification_time)
# diff_time <-
# sprintf(
# '%.1f',
# (lubridate::interval(path_info$modification_time, Sys.time()) / lubridate::minutes(1))
# )
.display_info(
glue::glue(
'{crayon::green("Imported")} data from {crayon::yellow(path)}.'
# 'Imported data from {crayon::yellow(path)}.'# ,
# ' (Last modification at {path_info$modification_time}).'
# '{diff_time0}.'
# ' (Last modification: {diff_time} min. ago).'
),
...
)
invisible(data)
}
# NOTE: Changed these `.export` functions a bit from the originals.
.units_viz <- 'in'
.height_viz <- 5
.width_viz <- 7
.export_data <-
function(...,
data,
path,
units = .units_viz,
height = .height_viz,
width = .width_viz) {
# path_export <- rio::export(data, path, ...)
.class <- class(data)
if(any(tools::file_ext(path) %in% c('RData', 'Rda', 'rda'))) {
data_export <- rio::export(file = path, x = data, ...)
} else if(any('gg' == .class)) {
# See `.import_data()` for the reasoning for setting `verbose = FALSE` here.
data_export <-
ggplot2::ggsave(
# Passing dots will cause an error because `grDevices::png()` does not accept unused arguments.
# ...,
plot = data,
filename = path,
units = units,
height = height,
width = width
)
} else if(any('character' == .class)) {
data_export <- write_lines(data, path = path, ...)
} else if(any(.class %in% c('matrix', 'data.frame', 'tibble'))) {
data_export <- rio::export(file = path, x = data, ...)
} else {
msg <- glue::glue('Don\'t know how to export data of type `{.class}`.')
message(msg)
return(NULL)
}
invisible(data_export)
}
.backup <- TRUE
.export <- TRUE
.export_data_to_path <-
function(data,
path,
...,
backup = .backup,
export = .export) {
if(!export) {
return(invisible(FALSE))
}
# path <- .get_path_from(..., path = path)
# browser()
if (backup) {
path_backup <- .create_backup(..., path = path)
}
.create_dir_ifnecessary(..., dir = dirname(path))
data_export <-
.export_data(
...,
data = data,
path = path
)
.display_info(
# glue::glue('Exported data to {crayon::yellow(path)}.'),
glue::glue(
'{crayon::red("Exported")} data to {crayon::yellow(path)}.'
),
...
)
invisible(data_export)
}
export_data_to_path <- .export_data_to_path
.clean <- TRUE
.n_backup <- 1
.create_backup <-
function(path,
...,
file = tools::file_path_sans_ext(path),
ext = tools::file_ext(path),
suffix_backup = format(Sys.time(), '%Y%m%d%H%M%S'),
path_backup = sprintf('%s-%s.%s', file, suffix_backup, ext),
clean = .clean,
n_backup = .n_backup) {
if (!file.exists(path)) {
.display_info(
glue::glue(
'Backup file at {crayon::yellow(path_backup)} cannot be created because file to copy at {crayon::yellow(path)} cannot be found.'
),
...
)
return(invisible(path_backup))
}
if (file.exists(path_backup)) {
.display_error(
glue::glue(
'Backup file at {path_backup} already exists. Are you sure you want to overwrite it?'
),
...
)
return(invisible(NULL))
}
invisible(file.copy(from = path, to = path_backup))
.display_info(
glue::glue(
'Backed up file at {crayon::yellow(path_backup)} before exporting data to {crayon::yellow(path)}.'
),
...
)
if(clean) {
.clean_backup(..., path = path, n_backup = n_backup)
}
invisible(path_backup)
}
.clean_backup <-
function(path,
...,
n_backup = .n_backup,
dir = dirname(path),
rgx = paste0(tools::file_path_sans_ext(basename(path)),'-.*', tools::file_ext(path))) {
paths_like_backup <-
list.files(
path = dir,
pattern = rgx,
recursive = FALSE,
full.names = TRUE
)
n <- length(paths_like_backup)
if (n < n_backup) {
if (n == 0L) {
.display_info(
glue::glue('No backup files to delete.'),
...
)
return(path)
}
.display_info(
glue::glue(
'Number of backup files ({sprintf("%.0f", n)}) is less than `n_backup` ({sprintf("%.0f", n_backup)}),so not deleting any backup files.'
),
...
)
return(path)
}
paths_to_keep <-
sort(paths_like_backup, decreasing = TRUE)[1L:n_backup]
paths_to_delete <- setdiff(paths_like_backup, paths_to_keep)
invisible(sapply(
paths_to_delete,
unlink,
recursive = TRUE,
force = TRUE
))
.display_info(
glue::glue('Deleted {sprintf("%.0f", length(paths_to_delete))} backup files at {Sys.time()}.'),
...
)
invisible(path)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment