Skip to content

Instantly share code, notes, and snippets.

@jmclawson
Last active January 27, 2023 21:57
Show Gist options
  • Save jmclawson/52252349dd100e426c2267b5de48aade to your computer and use it in GitHub Desktop.
Save jmclawson/52252349dd100e426c2267b5de48aade to your computer and use it in GitHub Desktop.
Log details for replicable analyses using stylo, then re-run prior analyses.
##### stylo_log #####
# Pipe from stylo() directly into stylo_log()
# or wrap stylo() in stylo_log()
# Examples:
# stylo() |> stylo_log()
# stylo_log(stylo())
stylo_log <- function(
stylo_object,
log_label = NULL,
add_dir_date = FALSE){
this_object <- eval(stylo_object)
slog <- manage_stylo_log_meta(log_label, add_dir_date)
if(!dir.exists(slog$dir)) {
dir.create(slog$dir)
}
if (!file.exists("stylo_config.txt")) {
stylo_config <- "(Error: stylo_config.txt file not found)"
} else{
stylo_config <- readLines("stylo_config.txt") |>
paste(collapse="\n")
}
the_call <- deparse(stylo_object$call) |>
paste0(collapse = "\n")
stylo_object_log <-
paste0(
"Processed:\n",
Sys.time(),
"\n\n",
"Call:\n",
the_call,
"\n\n",
"Config:\n",
stylo_config,
"\n\n=====",
"\n\n"
)
log_path <- file.path(slog$dir, slog$label) |>
paste0(".txt")
cat(stylo_object_log, file = log_path, append = TRUE)
log_files(slog$dir)
return(this_object)
}
##### stylo_replicate #####
# Use stylo_replicate() in place of stylo() to replicate
# a specific analysis from before. It must be run from
# the same working directory as the previous call to
# stylo(). If it is given an argument for date_time,
# it will replicate a previous run of stylo(). If none is
# given, it will run a combination of both stylo() and
# stylo_log(), with arguments passed off to each function
# so that Stylo will run, and its settings will be logged.
# Example:
# stylo_replicate()
# stylo_replicate("2023-01-27 13:46:26")
stylo_replicate <- function(
date_time = NULL,
log_label = NULL,
add_dir_date = FALSE,
log_date = Sys.Date(),
...){
if(is.null(date_time)){
stylo(...) |>
stylo_log(log_label,
add_dir_date)
} else {
slog <- manage_stylo_log_meta(log_label,
log_date,
add_dir_date)
log_path <- file.path(slog$dir, slog$label) |>
paste0(".txt")
log_table <- process_log(log_path) |>
# add any corpus.dir from call to config
mutate(
config = call |>
str_extract('corpus.dir[ ]?=[ ]?".*"') |>
{\(x) ifelse(is.na(x), "", paste0(x, ", "))}() |>
paste0(config))
# limit to the command for the chosen date
stylo_call <- log_table |>
filter(date == date_time)
if(str_detect(stylo_call$call, "replicated")) {
stylo_call <- stylo_call |>
pull(call) |>
str_replace_all("\n","") |>
paste0(collapse=" ") |>
str_replace_all("[ ]{1,}", " ")
} else {
stylo_call <- stylo_call |>
pull(config)
stylo_call <- paste0(
"stylo(gui = FALSE,",
stylo_call,
", replicated = \"",
date_time,
"\")")
}
eval(parse(text = stylo_call)) |> stylo_log()
}
}
##### manage_stylo_log_meta() #####
# Handles a couple things that are used by both
# functions, stylo_log() and stylo_replicate().
# For internal use only.
manage_stylo_log_meta <- function(
log_label,
log_date,
add_dir_date){
if(is.null(log_label)) {
log_label <- "stylo_log"
}
if(add_dir_date) {
dir_label <- paste(log_label, log_date)
} else {
dir_label <- log_label
}
log_label <- paste(log_label, log_date)
return(list(label=log_label, dir=dir_label))
}
##### process_log #####
# Parses the log file as a table. Mostly for internal
# use, but it can be used.
# Example:
# process_log("stylo_log/stylo_log 2023-01-27.txt")
process_log <- function(log_path){
if(!file.exists(log_path)) {
message("There is no log file found. Please be sure to set appropriate values for the log_label and add_dir_date arguments.")
} else {
log_whole <- readLines(log_path)
all_processed <-
grep("Processed:", log_whole, ignore.case = TRUE)
all_call <-
grep("Call:", log_whole, ignore.case = TRUE)
all_config <-
grep("Config:", log_whole, ignore.case = TRUE)
all_end <-
grep("=====", log_whole, ignore.case = TRUE)
log_table <- data.frame(
date = log_whole[all_processed[1] + 1],
call = log_whole[(all_call[1] + 1):(all_config[1]-2)] |>
paste0(collapse = ", "),
config = log_whole[(all_config[1] + 1):(all_end[1]-1)] |>
paste0(collapse = ", ")
)
if (length(all_processed)>1) {
for(i in 2:length(all_processed)) {
this_row <- data.frame(
date = log_whole[all_processed[i] + 1],
call = log_whole[(all_call[i] + 1):(all_config[i]-2)] |>
paste0(collapse = " "),
config = log_whole[(all_config[i] + 1):(all_end[i]-2)] |>
paste0(collapse = ", ")
)
log_table <- rbind(log_table, this_row)
}
}
return(log_table)
}
}
##### log_files #####
# Logs files that were modified near the same time as
# stylo_config.txt
log_files <- function(log_dir) {
file_data <- file.info(list.files()) |>
as.data.frame() |>
rownames_to_column(var = "file")
# find the modification time of stylo_config.txt
target_time <- file_data |>
filter(file == "stylo_config.txt") |>
pull("mtime")
# add column of difference from target time
file_data <- file_data |>
mutate(difference = difftime(mtime, target_time)) |>
# limit to things in the past 5 seconds
filter(abs(difference) < 5,
!isdir)
new_filenames <-
target_time |>
str_replace_all(":", "-") |>
paste0(" - ", file_data$file)
new_filenames <- file.path(log_dir, new_filenames)
file.copy(from=file_data$file, to=new_filenames)
}
@jmclawson
Copy link
Author

More details available here:
computationalstylistics/stylo#53

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment