Skip to content

Instantly share code, notes, and snippets.

@phi-gamma
Created September 6, 2013 18:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phi-gamma/6467801 to your computer and use it in GitHub Desktop.
Save phi-gamma/6467801 to your computer and use it in GitHub Desktop.
typeset google spreadsheets directly in Luatex
%% http://tex.stackexchange.com/q/131500
\documentclass {scrartcl}
\usepackage {luatexbase}
\RequireLuaModule {lualibs}
\usepackage {luaotfload} %% recommended, in that order!
\RequireLuaModule {spreadsheet}
\makeatletter
\protected \def \googlespreadsheet {%
\@ifnextchar[\googlespreadsheetopt
{\googlespreadsheetopt[]}%
}
\def \googlespreadsheetopt [#1]#2{%
\edef \currentspreadsheetoptions {#1}%
\directlua {
packagedata.spreadsheet.urihandler ([[\currentspreadsheetoptions]],
[[\detokenize {#2}]])
}%
}
\makeatother
\begin {document}
\input knuth
\begin {table}
\googlespreadsheet [center,dump]
{https://docs.google.com/spreadsheet/ccc?key=0Amykmqr4Of-MdEVIUUcyYld3WTJhZnJHRkgwSF9CaUE&usp=sharing}
\end {table}
\input knuth
\end {document}
#!/usr/bin/env texlua
-----------------------------------------------------------------------
-- FILE: spreadsheet.lua
-- DESCRIPTION: retrieve and typeset spreadsheets from google docs
-- REQUIREMENTS: Luatex, Luasec
-- AUTHOR: Philipp Gesang (Phg), <phg42.2a@gmail.com>
-- VERSION: 1.0
-- CREATED: 2013-09-06 17:26:50+0200
-----------------------------------------------------------------------
--
packagedata = packagedata or { }
packagedata.spreadsheet = packagedata.spreadsheet or { }
local spreadsheet = packagedata.spreadsheet
spreadsheet.module_info = {
name = "spreadsheet",
version = 0.0,
date = "2013/09/06",
description = "Typeset spreadsheets from Google Docs",
author = "Philipp Gesang",
copyright = "Philipp Gesang",
license = "BSD 2 clause",
}
if not config.lualibs then
config = config or { }
config.lualibs = config.lualibs or { }
config.lualibs.load_extended = true
require "lualibs"
end
if not caches then
local u = utilities
require "luaotfload-basics-gen"
u.storage = utilities.storage
utilities = u
end
local lpeg = require "lpeg"
local md5 = require "md5"
local socket = require "socket"
local ssl = require "ssl"
local http = require "socket.http"
local https = require "ssl.https"
local stringformat = string.format
local stringrep = string.rep
local tableconcat = table.concat
local tabletohash = table.tohash
local texiowrite_nl = texio.write_nl
local getwritablepath = caches.getwritablepath
local md5sumhexa = md5.sumhexa
local lfsisfile = lfs.isfile
local ioloaddata = io.loaddata
local iosavedata = io.savedata
local texsprint = tex.sprint
local unpack = unpack or table.unpack
local tonumber = tonumber
local sockettcp = socket.tcp
local sslwrap = ssl.wrap
local C, Cs, P, R, S = lpeg.C, lpeg.Cs, lpeg.P, lpeg.R, lpeg.S
local lpegmatch = lpeg.match
local urlescaper = lpeg.patterns.urlescaper
local urlsplitter = lpeg.patterns.urlsplitter
local rfc4180splitter = utilities.parsers.rfc4180splitter ()
local error, warn, info
if luatexbase and luatexbase.provides_module then
error, warn, info = luatexbase.provides_module(spreadsheet.module_info)
else
kpse.set_program_name "luatex"
error, warn, info = texiowrite_nl, texiowrite_nl, texiowrite_nl
end
-----------------------------------------------------------------------
--- connection handling
-----------------------------------------------------------------------
local ssl_parameters = {
mode = "client",
protocol = "tlsv1_2",
--protocol = "sslv3",
verify = "none",
options = "all",
}
local http_302 = [[HTTP/1.1 302 Moved Temporarily]]
local http_200 = [[HTTP/1.1 200 OK]]
local p_restof = (1 - P(-1))^1 * P(-1)
local p_location = P"Location: " * C(p_restof)
local p_setcookie = P"Set-Cookie: " * C(p_restof)
local p_http_header = C((R("az", "AZ") + P"-")^1 * P": " * p_restof)
local semicolon = P";"
local p_cookie = C((1 - semicolon)^1) --- ignore attributes
local tpl_GET = "GET %s HTTP/1.1"
local tpl_HEAD = "HEAD %s HTTP/1.1"
local header_Host = "Host: %s"
local header_User_Agent = "User-Agent: hackish CSV retrieval for LuaTeX"
local header_Accept = "Accept: */*"
local header_Connection = "Connection: Keep-Alive"
local header_Cookie = "Cookie: %s"
----- header_Referer = "Referer: %s"
local get_ssl_connection = function (hostname)
--- open tcp socket
local conn = sockettcp ()
conn:connect (hostname, 443)
--- envelope connection in ssl
conn = sslwrap (conn, ssl_parameters)
conn:dohandshake ()
return conn
end
local make_request = function (resource, hostname, from, cookies)
local req = { }
--resource = lpegmatch (urlescaper, resource)
--req[#req + 1] = string.format (tpl_HEAD, resource)
req[#req + 1] = stringformat (tpl_GET, resource)
req[#req + 1] = header_User_Agent
req[#req + 1] = stringformat (header_Host, hostname)
req[#req + 1] = header_Accept
req[#req + 1] = header_Connection
if cookies and next (cookies) then
req[#req + 1] = stringformat (header_Cookie,
tableconcat (cookies, "; "))
end
return tableconcat (req, "\n") .. "\n\n"
end
local max_redirects = 10
local resolve_indirection = function (uri)
local from, conn, hostname
local cookies = { }
local cnt = 0 -- count redirects
local components = lpegmatch (urlsplitter, uri)
while uri and cnt <= max_redirects do
local req, line, err, redirect
if not conn then
hostname = components [2]
conn = get_ssl_connection (hostname)
end
local resource = stringformat ([[/%s?%s]],
components [3],
components [4])
local request = make_request (resource, hostname, from, cookies)
local sent, err, last = conn:send (request)
if not sent then
warn ("> error; bytes sent:", last)
warn "> retrying ... "
local sent, err, last = conn:send (request, last)
if not sent then
warn ("> failed, aborting")
return
end
end
line, err, x, y = conn:receive ()
--info (stringformat (">>> response=%q error=%q", line, err or "<none>"))
if err then
conn:close ()
return
end
local found = line == http_200
local keepalive = false
while line do
redirect = lpegmatch (p_location, line) or redirect
local cookie = lpegmatch (p_setcookie, line)
if cookie then
cookie = lpegmatch (p_cookie, cookie)
cookies [#cookies + 1] = cookie
end
if line == header_Connection then
keepalive = true
end
line, err = conn:receive ()
if string.is_empty (line) then --- end of header
line = nil
end
end --- while line
if found then
local data
line, err = conn:receive ()
if line then
local bytes = tonumber (line, 16)
if bytes then
data = conn:receive (bytes)
end
end
return uri, data
end
from = uri
uri = redirect
cnt = cnt + 1
components = lpegmatch (urlsplitter, uri)
if keepalive == false or hostname ~= components [2] then
conn:close ()
conn = nil
collectgarbage "collect"
end
end -- while uri
end
-----------------------------------------------------------------------
--- url handling
-----------------------------------------------------------------------
local ampersand = P"&"
local output = P"output=" * ((1 - ampersand)^1 / "csv")
local p_output = (1 - output)^0 * output
local s_output = Cs((output + 1)^0)
local rewrite_spreadsheet_uri = function (uri)
info "Generating URL to CSV data."
if not uri then
return false
end
--- check if the uri already contains an output parameter
if lpegmatch (p_output, uri) then
--- if so, replace all occurrences with “csv” as argument
return lpegmatch (s_output, uri)
end
--- else, append the desired parameter
return uri .. [[&output=csv]]
end
local get_raw_doc = function (curi, force)
info (stringformat ("Retrieving URI %q.", curi))
local hash = md5sumhexa (curi)
local writable = getwritablepath ("spreadsheet", "")
local cachefile = writable .. "/" .. hash
if not force and lfsisfile (cachefile) then
local data = ioloaddata (cachefile)
info (stringformat ("Spreadsheet (%d bytes) found in cache.",
#data))
return "<cache>", data
end
local resolved, data = resolve_indirection (curi)
if not resolved then
info "Failed to resolve URI, sorry!"
return nil, nil
end
info (stringformat ("Found at %q (%d bytes).",
resolved, #data))
iosavedata (cachefile, data)
info (stringformat ("Spreadsheet stored in cache at %q.",
cachefile))
return resolved, data
end
-----------------------------------------------------------------------
--- csv extraction
-----------------------------------------------------------------------
local extract_data = function (raw)
local parsed = rfc4180splitter (raw)
if not parsed then
info "Cannot parse CSV file."
end
return parsed
end
-----------------------------------------------------------------------
--- latex writer
-----------------------------------------------------------------------
--- we consider the longest row only when making a spec
local colspec = {
center = "c",
left = "l",
right = "r",
default = "l",
}
local make_tablespec = function (rows, options)
local align = options.center and "center"
or options.left and "left"
or options.right and "right"
or "default"
local longest = 0
for i = 1, #rows do
local len = #rows [i]
if len > longest then
longest = len
end
end
return longest, stringrep (colspec [align], longest)
end
local make_row = function (row, n)
local acc = { }
local i = 1
while i <= n do
local cell = row [i]
if cell then
acc [#acc + 1] = cell
else
acc [#acc + 1] = "" -- fill up empty cells
end
i = i + 1
end
return tableconcat (acc, " & ") .. [[ \\]]
end
local starttable = [[\begin {tabular} {%s}]]
local stoptable = [[\end {tabular}]]
local tabular_of = function (data, options)
local acc = { }
local nrows = #data
local ncols, tspec = make_tablespec (data, options)
acc [#acc + 1] = stringformat (starttable, tspec)
for i = 1, nrows do
acc [#acc + 1] = make_row (data [i], nrows)
end
acc [#acc + 1] = stoptable
if texsprint then
if options.dump then
texiowrite_nl (tableconcat (acc, "\n"))
end
texsprint (unpack (acc))
else
texiowrite_nl (tableconcat (acc, "\n"))
end
return true
end
-----------------------------------------------------------------------
--- option handler
-----------------------------------------------------------------------
local commasplitter = lpeg.splitat ","
local handle_options = function (raw)
if raw == "" then
return { }
end
local options = { lpegmatch (commasplitter, raw) }
return tabletohash (options)
end
-----------------------------------------------------------------------
--- main handler
-----------------------------------------------------------------------
local urihandler = function (rawoptions, uri)
local options = handle_options (rawoptions)
local curi = rewrite_spreadsheet_uri (uri)
if not curi then
return false
end
local resolved, raw = get_raw_doc (curi, options.force)
if not resolved then
return false
end
local data = extract_data (raw)
if not data then
return false
end
return tabular_of (data, options)
end
packagedata.spreadsheet.urihandler = urihandler
--- test:
--- urihandler [[https://docs.google.com/spreadsheet/ccc?key=0Amykmqr4Of-MdEVIUUcyYld3WTJhZnJHRkgwSF9CaUE&usp=sharing]]
if not texsprint then --- running as script
if arg [1] then
return urihandler ("", arg [1])
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment