Skip to content

Instantly share code, notes, and snippets.

@nanxstats
Last active August 20, 2022 04:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nanxstats/22e8f0e4c68ed7d4a060b17f3dd6d58b to your computer and use it in GitHub Desktop.
Save nanxstats/22e8f0e4c68ed7d4a060b17f3dd6d58b to your computer and use it in GitHub Desktop.
Print HTML to PDF using chromote
library("promises")
library("chromote")
#' Print HTML to PDF using chromote
#'
#' @param url Input URL
#' @param filename Output file name
#' @param wait_ If TRUE, run in synchronous mode,
#' otherwise, run in asynchronous mode.
#' @param ... Additional parameters for Page.printToPDF, see
#' <https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF>
#' for possible options.
print_to_pdf <- function(url, filename = NULL, wait_ = FALSE, ...) {
if (is.null(filename)) {
filename <- url |>
gsub("^.*://", "", x = _) |>
gsub("/$", "", x = _) |>
fs::path_sanitize(replacement = "_") |>
paste0(".pdf")
}
b <- ChromoteSession$new()
p <-
{
b$Page$navigate(url, wait_ = FALSE)
} %...>%
{
b$Page$loadEventFired(wait_ = FALSE)
} %...>%
{
b$Page$printToPDF(..., wait_ = FALSE)
} %...>%
{
.$data
} %...>%
{
outfile <- file(filename, "wb")
base64enc::base64decode(., output = outfile)
close(outfile)
} %...>%
{
message(filename)
} %>%
finally(~ b$close())
if (wait_) {
b$wait_for(p)
} else {
p
}
invisible(filename)
}
# ioslides presentation and pagedown book
urls <- c(
"https://nanx.me/talks/reimagine-rpkgs/",
"https://pagedown.rbind.io/"
)
fn <- lapply(urls, print_to_pdf, printBackground = TRUE)
fn[[1]] |>
pdftools::pdf_info() |>
str()
fn[[2]] |>
pdftools::pdf_info() |>
str()
# Also supports file:// URLs
# (but only after running remote URLs like above AND using `wait_ = TRUE`)
f <- "https://nanx.me/blog/post/r-readability-parser/example.html" |>
curl::curl_download(tempfile(fileext = ".html"))
print_to_pdf(
paste0("file://", normalizePath(f, winslash = "/")),
filename = "example.pdf",
wait_ = TRUE
) |>
pdftools::pdf_info() |>
str()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment