Skip to content

Instantly share code, notes, and snippets.

@RLesur
Last active November 14, 2018 10:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RLesur/522e0e7e7b75826e2c9d35bdfcaa1a89 to your computer and use it in GitHub Desktop.
Save RLesur/522e0e7e7b75826e2c9d35bdfcaa1a89 to your computer and use it in GitHub Desktop.
PDF generation from R using the Chrome DevTools Protocol
# Packages ---------------------------------------------------------------
remotes::install_github('milesmcbain/chradle')
remotes::install_github('rlesur/pagedown@automation')
library(chradle)
library(pagedown)
library(httpuv)
# Chrome headless helpers -------------------------------------------------
# Thanks to Miles McBain
browser_init <- function(debug_port = 9222, bin = "google-chrome"){
debug_process <- processx::process$new(bin,
c("--use-test-config",
"--headless",
"--no-first-run",
"--new-window",
"--user-data-dir=remote-profile",
glue::glue("--remote-debugging-port={debug_port}")))
chradle:::debugger_200_ok(debug_port)
list(debug_process = debug_process, debug_port = debug_port)
}
ws_addr <- function(debug_port = 9222){
open_debuggers <-
jsonlite::read_json(glue::glue("http://localhost:{debug_port}/json"), simplifyVector = TRUE)
open_debuggers$webSocketDebuggerUrl[open_debuggers$type == "page"]
}
# Generate and serve a paged document -------------------------------------
rmd_file <- file.path(tempdir(), "index.Rmd")
download.file("https://raw.githubusercontent.com/rstudio/pagedown/master/inst/examples/index.Rmd", rmd_file)
paged_document <- rmarkdown::render(rmd_file)
servr::httd(tempdir(), port = 4321)
# Open a headless Chrome: it will render the paged document later ---------
instance <- browser_init()
# you may wait a little bit before retrieving the ws address
headless_address <- ws_addr()
# Built a websocket server -----------------------------------------------
# Heavily inspired by https://stackoverflow.com/a/50985728/6500804
# Initialize variable
pdf <- NULL
# Build a void log in html
html_body <- c(
'<h2>DevTools log</h2>',
'<ol id="log">',
'</ol>'
)
# Build a JS script to populate this log
write_log <- c(
"function writeMessage(msg) {",
" var log = document.getElementById('log');",
" var li = document.createElement('li');",
" li.innerText = msg;",
" log.appendChild(li);",
"}"
)
# Configure the httpuv server
app <- list(
call = function(req) {
wsUrl = paste(sep='',
'"',
"ws://",
ifelse(is.null(req$HTTP_HOST), req$SERVER_NAME, req$HTTP_HOST),
'"')
list(
status = 200L,
headers = list(
'Content-Type' = 'text/html'
),
body = paste0(collapse = "\r\n",
c("<!DOCTYPE html>",
"<html>",
"<head>",
"</head>",
"<body>",
html_body,
"</body>",
'<script type="text/javascript">',
write_log,
sprintf('var ws_adr = "%s";', headless_address),
# Create the connection to the httpuv server:
sprintf("var httpuv = new WebSocket(%s);", wsUrl),
# Create the connection to headless Chrome:
"var chromeConnection = new WebSocket(ws_adr);",
# Configure the connection with headless Chrome:
"chromeConnection.onmessage = function(event) {",
" var data = JSON.parse(event.data);",
# write the incoming messages from headless Chrome to the log
" writeMessage(event.data);",
# when the pagedownListener is called
' if (data.method === "Runtime.bindingCalled") {',
# ask to produce the pdf
' chromeConnection.send(\'{"id":99,"method":"Page.printToPDF"}\');',
" }",
# when the pdf is received
" if (data.id === 99) {",
' writeMessage("pdf received. Sending to R...");',
# send it to R using the httpuv connection
" httpuv.send(data.result.data);",
' writeMessage("closing connection to chrome headless...");',
" chromeConnection.close();",
" }",
"};",
# Synchronization with headless Chrome
'chromeConnection.onopen = function (event) {',
# We need the Runtime and Page domains
' chromeConnection.send(\'{"id":1,"method":"Runtime.enable"}\');',
' chromeConnection.send(\'{"id":2,"method":"Page.enable"}\');',
# Register the pagedownListener
' chromeConnection.send(\'{"id":3,"method":"Runtime.addBinding","params":{"name":"pagedownListener"}}\');',
# Open the html_paged document
' chromeConnection.send(\'{"id":4,"method":"Page.navigate","params":{"url":"http://127.0.0.1:4321"}}\');',
'};',
"</script>",
"</html>"
)
)
)
},
# Configure the server-side websocket connection
onWSOpen = function(ws) {
# In this POC, the only message received by the httpuv server is the pdf
ws$onMessage(function(binary, message) {
pdf <<- message
ws$close()
})
}
)
# Start the websocket server:
server <- startDaemonizedServer("0.0.0.0", 9454, app)
# and now, this is SHOWTIME!!!
# pick you favorite browser, and open http://localhost:9454
# obviously, it can be automated
# For instance, the RStudio viewer:
rstudioapi::viewer("http://localhost:9454")
# Wait to see the result...
# If you want to play, you also can have a look to the running headless browser at http://localhost:9222 (use Chrome!)
# Write the pdf file to disk:
writeBin(RCurl::base64Decode(pdf, "raw"),
"book.pdf"
)
# et voilà
# Stop the websocket server
stopDaemonizedServer(server)
# Close and clean the headless browser -----------------------------------------
chr_kill(instance)
chr_clean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment