Skip to content

Instantly share code, notes, and snippets.

@rohieb
Created July 4, 2020 22:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rohieb/10f58325f5fb6bc02ffa2e7ed6260274 to your computer and use it in GitHub Desktop.
Save rohieb/10f58325f5fb6bc02ffa2e7ed6260274 to your computer and use it in GitHub Desktop.
-- SPDX-License-Identifier: 0BSD
-- Copyright (C) 2020 Roland Hieber
-- Convert headers of level >= 3 to LaTeX \paragraph{}s (usable with koma-script classes)
--
if FORMAT ~= "latex" and
FORMAT ~= "beamer" and
FORMAT ~= "json" and
FORMAT ~= "native"
then
error("unsupported format: " .. FORMAT)
end
local List = require 'pandoc.List'
-- The identifier in the native representation can still contain Unicode, and
-- pandoc's LaTeX writer takes additional care to escape unicode characters
-- when creating the label in the header and when referencing it in links (see
-- the function `toLabel` in pandoc's Text/Pandoc/Writers/LaTeX.hs).
-- Sadly this function is not exposed to Lua, so we have to make our own
-- compatible version.
--
-- Input: (string) identifier from native AST (`attr.identifier`)
-- Output: (string) label, compatible with the LaTeX writer
function to_label(str)
local function do_error(msg, n, c)
error(string.format("%s at input byte %d (0x%x)", msg, n, c))
end
local function uc_escape(val)
if (val >= 0x30 and val <= 0x39) or -- isDigit x && isAscii x
(val >= 0x41 and val <= 0x5A) or
(val >= 0x61 and val <= 0x7a) or -- isLetter x && isAscii x
val == 0x5F or val == 0x2D or
val == 0x2B or val == 0x3D or
val == 0x3A or val == 0x3B or
val == 0x2E -- x `elemText` "_-+=:;."
then
return string.char(val)
else
return string.format("ux%x", val)
end
end
-- no Unicode support in Lua... :( decode codepoint from UTF-8 manually
local res = ""
local seq = 0
local val = nil
local c = nil
local n = 1
while n <= #str do
c = string.byte(str, n)
if seq == 0 then
-- leader byte: 0b0xxx xxxx, 0b110x xxxx, 0b1110 xxxx, 0b1111 0xxx
if c >= 0x80 and c < 0xC0 then
do_error("invalid UTF-8 sequence: expecting leader", n-1, c)
return nil
end
seq = (c < 0x80 and 1) or (c < 0xE0 and 2) or
(c < 0xF0 and 3) or (c < 0xF8 and 4)
val = c & (2^(8-seq) - 1)
elseif seq > 0 and c >= 0x80 and c < 0xC0 then
-- continuation byte: 0b10xx xxxx
val = (val << 6) | (c & 0x3F)
else
do_error("invalid UTF-8 continuation byte", n-1, c)
return nil
end
seq = seq - 1
if seq == 0 then
res = res .. uc_escape(val)
end
n = n+1
end
if seq > 0 then
do_error("invalid UTF-8 sequence: leader without continuation bytes", n-1, c)
return nil
end
return res
end
-- Temporary pandoc.List of the elements making up the previous header
saved_header = nil
--
-- Handle Headers
--
function Header(elem)
if elem.level < 3 then
return elem
else
-- Fallback: if more than one Header occurs directly after another, put the
-- elements of the previous one in an extra Para
local result = {}
if saved_header then
result = pandoc.Para(saved_header)
end
-- maybe we need to merge Header with the next Para, save it
local label = to_label(elem.identifier)
saved_header = List:new {
pandoc.RawInline("tex", "\\hypertarget{" .. label .. "}{%\n")
, pandoc.RawInline("tex", "\\paragraph{")
, pandoc.Span(elem.content)
, pandoc.RawInline("tex", "}\\label{" .. label .. "}}\n")
}
return result
end
end
-- If Para directly after Header, merge Para with header to get an in-line
-- paragraph header in LaTeX, i.e.:
--
-- \paragraph{Title} Lorem ipsum dolor sit amet...
--
-- instead of:
--
-- \paragraph{Title}
--
-- Lorem ipsum dolor sit amet...
--
function Para(elem)
if saved_header then
saved_header:extend(elem.content)
local p = pandoc.Para(saved_header)
saved_header = nil
return p
else
return elem
end
end
-- Fallback: In-line LaTeX paragraphs look bad when followed by e.g. a list, so
-- just output the saved header as its own block, followed by this Para.
function Block(elem)
if saved_header then
local p = pandoc.Para(saved_header)
saved_header = nil
return { p, elem }
else
return elem
end
end
-- Fallback for one lonely header at the end with nothing after it
function Pandoc(doc)
if saved_header then
table.insert(doc.blocks, pandoc.Para(saved_header))
end
return doc
end
return {
{ Header = Header },
{ Para = Para },
{ Block = Block },
{ Pandoc = Pandoc },
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment