Last active
November 14, 2018 10:22
-
-
Save RLesur/522e0e7e7b75826e2c9d35bdfcaa1a89 to your computer and use it in GitHub Desktop.
PDF generation from R using the Chrome DevTools Protocol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Packages --------------------------------------------------------------- | |
remotes::install_github('milesmcbain/chradle') | |
remotes::install_github('rlesur/pagedown@automation') | |
library(chradle) | |
library(pagedown) | |
library(httpuv) | |
# Chrome headless helpers ------------------------------------------------- | |
# Thanks to Miles McBain | |
browser_init <- function(debug_port = 9222, bin = "google-chrome"){ | |
debug_process <- processx::process$new(bin, | |
c("--use-test-config", | |
"--headless", | |
"--no-first-run", | |
"--new-window", | |
"--user-data-dir=remote-profile", | |
glue::glue("--remote-debugging-port={debug_port}"))) | |
chradle:::debugger_200_ok(debug_port) | |
list(debug_process = debug_process, debug_port = debug_port) | |
} | |
ws_addr <- function(debug_port = 9222){ | |
open_debuggers <- | |
jsonlite::read_json(glue::glue("http://localhost:{debug_port}/json"), simplifyVector = TRUE) | |
open_debuggers$webSocketDebuggerUrl[open_debuggers$type == "page"] | |
} | |
# Generate and serve a paged document ------------------------------------- | |
rmd_file <- file.path(tempdir(), "index.Rmd") | |
download.file("https://raw.githubusercontent.com/rstudio/pagedown/master/inst/examples/index.Rmd", rmd_file) | |
paged_document <- rmarkdown::render(rmd_file) | |
servr::httd(tempdir(), port = 4321) | |
# Open a headless Chrome: it will render the paged document later --------- | |
instance <- browser_init() | |
# you may wait a little bit before retrieving the ws address | |
headless_address <- ws_addr() | |
# Built a websocket server ----------------------------------------------- | |
# Heavily inspired by https://stackoverflow.com/a/50985728/6500804 | |
# Initialize variable | |
pdf <- NULL | |
# Build a void log in html | |
html_body <- c( | |
'<h2>DevTools log</h2>', | |
'<ol id="log">', | |
'</ol>' | |
) | |
# Build a JS script to populate this log | |
write_log <- c( | |
"function writeMessage(msg) {", | |
" var log = document.getElementById('log');", | |
" var li = document.createElement('li');", | |
" li.innerText = msg;", | |
" log.appendChild(li);", | |
"}" | |
) | |
# Configure the httpuv server | |
app <- list( | |
call = function(req) { | |
wsUrl = paste(sep='', | |
'"', | |
"ws://", | |
ifelse(is.null(req$HTTP_HOST), req$SERVER_NAME, req$HTTP_HOST), | |
'"') | |
list( | |
status = 200L, | |
headers = list( | |
'Content-Type' = 'text/html' | |
), | |
body = paste0(collapse = "\r\n", | |
c("<!DOCTYPE html>", | |
"<html>", | |
"<head>", | |
"</head>", | |
"<body>", | |
html_body, | |
"</body>", | |
'<script type="text/javascript">', | |
write_log, | |
sprintf('var ws_adr = "%s";', headless_address), | |
# Create the connection to the httpuv server: | |
sprintf("var httpuv = new WebSocket(%s);", wsUrl), | |
# Create the connection to headless Chrome: | |
"var chromeConnection = new WebSocket(ws_adr);", | |
# Configure the connection with headless Chrome: | |
"chromeConnection.onmessage = function(event) {", | |
" var data = JSON.parse(event.data);", | |
# write the incoming messages from headless Chrome to the log | |
" writeMessage(event.data);", | |
# when the pagedownListener is called | |
' if (data.method === "Runtime.bindingCalled") {', | |
# ask to produce the pdf | |
' chromeConnection.send(\'{"id":99,"method":"Page.printToPDF"}\');', | |
" }", | |
# when the pdf is received | |
" if (data.id === 99) {", | |
' writeMessage("pdf received. Sending to R...");', | |
# send it to R using the httpuv connection | |
" httpuv.send(data.result.data);", | |
' writeMessage("closing connection to chrome headless...");', | |
" chromeConnection.close();", | |
" }", | |
"};", | |
# Synchronization with headless Chrome | |
'chromeConnection.onopen = function (event) {', | |
# We need the Runtime and Page domains | |
' chromeConnection.send(\'{"id":1,"method":"Runtime.enable"}\');', | |
' chromeConnection.send(\'{"id":2,"method":"Page.enable"}\');', | |
# Register the pagedownListener | |
' chromeConnection.send(\'{"id":3,"method":"Runtime.addBinding","params":{"name":"pagedownListener"}}\');', | |
# Open the html_paged document | |
' chromeConnection.send(\'{"id":4,"method":"Page.navigate","params":{"url":"http://127.0.0.1:4321"}}\');', | |
'};', | |
"</script>", | |
"</html>" | |
) | |
) | |
) | |
}, | |
# Configure the server-side websocket connection | |
onWSOpen = function(ws) { | |
# In this POC, the only message received by the httpuv server is the pdf | |
ws$onMessage(function(binary, message) { | |
pdf <<- message | |
ws$close() | |
}) | |
} | |
) | |
# Start the websocket server: | |
server <- startDaemonizedServer("0.0.0.0", 9454, app) | |
# and now, this is SHOWTIME!!! | |
# pick you favorite browser, and open http://localhost:9454 | |
# obviously, it can be automated | |
# For instance, the RStudio viewer: | |
rstudioapi::viewer("http://localhost:9454") | |
# Wait to see the result... | |
# If you want to play, you also can have a look to the running headless browser at http://localhost:9222 (use Chrome!) | |
# Write the pdf file to disk: | |
writeBin(RCurl::base64Decode(pdf, "raw"), | |
"book.pdf" | |
) | |
# et voilà | |
# Stop the websocket server | |
stopDaemonizedServer(server) | |
# Close and clean the headless browser ----------------------------------------- | |
chr_kill(instance) | |
chr_clean() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment