Last active
August 14, 2019 16:41
-
-
Save gadenbuie/284671997992aefe295bed34bb53fde6 to your computer and use it in GitHub Desktop.
Convert Rmd document (knitr::knit) to an R script (knitr::spin) #RMarkdown #knitr #knit #spin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
warning("You probably don't want to use this `backstitch()` function.", | |
"\n It's hacky and there's a much better option in knitr called `purl()`.", | |
"\n More info at: `?knitr::purl`") | |
#' Backstitch an Rmd file to an R script | |
#' | |
#' Takes an Rmd file -- that would be converted with knitr::knit() -- and | |
#' "backstitches" it into an R script suitable for knitr::purl(). The output | |
#' file is the just the backstitched R script when `output_type = 'script'`, or | |
#' just the code chunks when `output_type = 'code'` (note that all inline code | |
#' is dropped in this case). Or finally, output both with | |
#' `output_type = 'both'`. | |
#' | |
#' You can load this function by calling: | |
#' devtools::source_gist('284671997992aefe295bed34bb53fde6', filename = 'backstitch.R') | |
#' | |
#' @param infile Input file name | |
#' @param outfile Output file name (`.R` extension added if not included) | |
#' @param ouput_type One of `script`, `code` or `both`. If `both`, two files are | |
#' created, with `_code` appended to the file name of the code chunks. | |
#' @param chunk_header Chunk header style, valid options are `"#-"`, `"#+"`, and `"# ----"` | |
backstitch <- function( | |
infile, | |
outfile = NULL, | |
output_type = c('both'), | |
chunk_header = "# ----" | |
) { | |
requireNamespace('knitr', quietly = TRUE) | |
requireNamespace('stringr', quietly = TRUE) | |
stopifnot(output_type %in% c('script', 'code', 'both')) | |
if (is.null(outfile) && output_type == 'both') | |
stop("Please choose output_type of 'script' or 'code' when not outputting to a file.") | |
knitr::knit_patterns$set(knitr::all_patterns[['md']]) | |
x <- readLines(infile) | |
if (inherits(infile, 'connection')) close(infile) | |
empty_lines <- which(stringr::str_detect(x, "^\\s?+$")) | |
last_non_empty_line <- max(setdiff(seq_along(x), empty_lines)) | |
x <- x[1:last_non_empty_line] | |
x_type <- rep('text', length(x)) | |
# Find YAML section | |
yaml_markers <- which(stringr::str_detect(x, "^[-.]{3}\\s*$")) | |
if (length(yaml_markers) > 2) { | |
message("Input file may have multiple YAML chunks, only considering lines", | |
paste(yaml_markers[1:2], collapse='-'), 'as YAML header.') | |
} | |
if (length(yaml_markers) > 0) { | |
i.yaml <- yaml_markers[1]:yaml_markers[2] | |
x_type[i.yaml] <- 'yaml' | |
} | |
# Mark code chunk.begin, chunk.end and regular chunk codelines | |
i.chunk.begin <- which(stringr::str_detect(x, knitr::knit_patterns$get('chunk.begin'))) | |
i.chunk.end <- which(stringr::str_detect(x, knitr::knit_patterns$get('chunk.end'))) | |
x_type[i.chunk.end] <- 'chunk.end' | |
for (j in i.chunk.begin) { | |
j.chunk.end <- min(i.chunk.end[i.chunk.end > j])-1 | |
x_type[j:j.chunk.end] <- 'chunk' | |
} | |
x_type[i.chunk.begin] <- 'chunk.begin' | |
# Check for inline code | |
i.inline <- which(stringr::str_detect(x, knitr::knit_patterns$get('inline.code'))) | |
i.inline <- intersect(i.inline, which(x_type == 'text')) | |
x_type[i.inline] <- 'inline' | |
# Check empty lines | |
i.empty <- which(stringr::str_detect(x, "^\\s*$")) | |
i.empty <- intersect(i.empty, which(x_type == 'text')) | |
x_type[i.empty] <- 'empty' | |
really_empty <- function(x_type, j, n = -1) { | |
if (grepl('(chunk|yaml)', x_type[j + n])) { | |
return('empty') | |
} else if (n < 0) { | |
return(really_empty(x_type, j, 1)) | |
} else if (x_type[j + n] %in% c('text', 'inline')) { | |
return('text') | |
} else { | |
return(really_empty(x_type, j, n+1)) | |
} | |
} | |
for (j in i.empty) { | |
x_type[j] <- really_empty(x_type, j) | |
} | |
# Rewrite lines helper functions | |
comment <- function(x) paste("#'", x) | |
make_chunk_header <- function(x, chunk_header) { | |
stringr::str_replace(stringr::str_replace(x, knitr::knit_patterns$get('chunk.begin'), "\\1"), | |
"^r[, ]?", paste(chunk_header, "")) | |
} | |
# Rewrite lines | |
y <- x | |
regex_inline_grouped <- "`r[ ]?#?(([^`]+)\\s*)`" | |
i.empty <- which(x_type == 'empty') | |
i.text <- which(x_type == 'text') | |
y[i.chunk.begin] <- make_chunk_header(x[i.chunk.begin], chunk_header) | |
y[i.inline] <- comment(stringr::str_replace_all(x[i.inline], regex_inline_grouped, "{{\\1}}")) | |
y[i.text] <- comment(x[i.text]) | |
if (length(yaml_markers) > 0) y[i.yaml] <- comment(x[i.yaml]) | |
y[i.empty] <- "" | |
y[i.chunk.end] <- "" | |
y_code <- y[which(stringr::str_detect(x_type, 'chunk'))] | |
if (!is.null(outfile)){ | |
outfile_name <- stringr::str_replace(outfile, "(.+)\\.R$", "\\1") | |
if (output_type == "script") { | |
cat(c(y, ""), file = paste0(outfile_name, ".R"), sep = '\n') | |
} else if (output_type == "code") { | |
cat(c(y_code, ""), file = paste0(outfile_name, ".R"), sep = '\n') | |
} else { | |
cat(c(y, ""), file = paste0(outfile_name, ".R"), sep = '\n') | |
cat(c(y_code, ""), file = paste0(outfile_name, "_code.R"), sep = '\n') | |
} | |
} else { | |
switch( | |
output_type, | |
'script' = unname(y), | |
'code' = unname(y_code) | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@isteves - thanks! I apparently have a habit of unwittingly re-writing parts of knitr, and this was one of those times.