Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
do
do
local _ENV = _ENV
package.preload[ "locator" ] = function( ... ) local arg = _G.arg;
local module = {}
local labels = {
book = 'book',
['bk.'] = 'book',
['bks.'] = 'book',
chapter = 'chapter',
['chap.'] = 'chapter',
['chaps.'] = 'chapter',
column = 'column',
['col.'] = 'column',
['cols.'] = 'column',
figure = 'figure',
['fig.'] = 'figure',
['figs.'] = 'figure',
folio = 'folio',
['fol.'] = 'folio',
['fols.'] = 'folio',
number = 'number',
['no.'] = 'number',
['nos.'] = 'number',
line = 'line',
['l.'] = 'line',
['ll.'] = 'line',
note = 'note',
['n.'] = 'note',
['nn.'] = 'note',
opus = 'opus',
['op.'] = 'opus',
['opp.'] = 'opus',
page = 'page',
['p.'] = 'page',
['pp.'] = 'page',
paragraph = 'paragraph',
['para.'] = 'paragraph',
['paras.'] = 'paragraph',
part = 'part',
['pt.'] = 'part',
['pts.'] = 'part',
section = 'section',
['sec.'] = 'section',
['secs.'] = 'section',
['sub verbo'] = 'sub verbo',
['s.v.'] = 'sub verbo',
['s.vv.'] = 'sub verbo',
verse = 'verse',
['v.'] = 'verse',
['vv.'] = 'verse',
volume = 'volume',
['vol.'] = 'volume',
['vols.'] = 'volume'
}
function module.short_labels()
local sl = {}
for k, v in pairs(labels) do
if not sl[v] or string.len(k) < string.len(sl[v]) then
sl[v] = k
end
end
for k, v in pairs(labels) do
labels[k] = sl[v]
end
end
local function get_label(locator)
local s, e, label, remaining = string.find(locator, '^(%l+.?) *(.*)')
if label and labels[label:lower()] then
return labels[label:lower()], remaining
else
return labels['page'], locator
end
end
local function parse(suffix)
if not suffix then
return nil, nil, suffix
end
local s, e, locator, label, remaining
local _suffix = suffix
s, e, locator = string.find(_suffix, '^{([^{}]+)}$')
if locator then
label, locator = get_label(locator)
return label, locator, nil
end
local s, e, locator, remaining = string.find(_suffix, '^{([^{}]+)}, *(.*)')
if locator then
label, locator = get_label(locator)
return label, locator, remaining
end
s, e, locator = string.find(_suffix, '^, *{([^{}]+)}$')
if locator then
label, locator = get_label(locator)
return label, locator, nil
end
s, e, locator, remaining = string.find(_suffix, '^, *{([^{}]+)} *(.*)')
if locator then
label, locator = get_label(locator)
return label, locator, remaining
end
if not string.find(_suffix, '^, .') then
return nil, nil, suffix
end
s, e, label, remaining = string.find(_suffix, '^, *(%l+%.?) *(.*)')
if label and labels[label:lower()] then
label = labels[label:lower()]
_suffix = ', ' .. remaining
else
label = labels['page']
end
local _locator = ''
local loc
while true do
s, e, loc, remaining = string.find(_suffix, '^(, *[^, ]+)(.*)')
if loc then
_locator = _locator .. loc
_suffix = remaining
else
break
end
end
if _locator ~= '' then
if _suffix == '' then
_suffix = nil
end
_locator = _locator:gsub('^, *', '')
return label, _locator, _suffix
end
return nil, nil, suffix
end
function module.parse(suffix)
label, locator, suffix = parse(suffix)
if label == labels['page'] then
label = nil
end
return label, locator, suffix
end
return module
end
end
do
local _ENV = _ENV
package.preload[ "lunajson" ] = function( ... ) local arg = _G.arg;
local newdecoder = require 'lunajson.decoder'
local newencoder = require 'lunajson.encoder'
local sax = require 'lunajson.sax'
-- If you need multiple contexts of decoder and/or encoder,
-- you can require lunajson.decoder and/or lunajson.encoder directly.
return {
decode = newdecoder(),
encode = newencoder(),
newparser = sax.newparser,
newfileparser = sax.newfileparser,
}
end
end
do
local _ENV = _ENV
package.preload[ "lunajson.decoder" ] = function( ... ) local arg = _G.arg;
local setmetatable, tonumber, tostring =
setmetatable, tonumber, tostring
local floor, inf =
math.floor, math.huge
local mininteger, tointeger =
math.mininteger or nil, math.tointeger or nil
local byte, char, find, gsub, match, sub =
string.byte, string.char, string.find, string.gsub, string.match, string.sub
local function _decode_error(pos, errmsg)
error("parse error at " .. pos .. ": " .. errmsg, 2)
end
local f_str_ctrl_pat
if _VERSION == "Lua 5.1" then
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
f_str_ctrl_pat = '[^\32-\255]'
else
f_str_ctrl_pat = '[\0-\31]'
end
local _ENV = nil
local function newdecoder()
local json, pos, nullv, arraylen, rec_depth
-- `f` is the temporary for dispatcher[c] and
-- the dummy for the first return value of `find`
local dispatcher, f
--[[
Helper
--]]
local function decode_error(errmsg)
return _decode_error(pos, errmsg)
end
--[[
Invalid
--]]
local function f_err()
decode_error('invalid value')
end
--[[
Constants
--]]
-- null
local function f_nul()
if sub(json, pos, pos+2) == 'ull' then
pos = pos+3
return nullv
end
decode_error('invalid value')
end
-- false
local function f_fls()
if sub(json, pos, pos+3) == 'alse' then
pos = pos+4
return false
end
decode_error('invalid value')
end
-- true
local function f_tru()
if sub(json, pos, pos+2) == 'rue' then
pos = pos+3
return true
end
decode_error('invalid value')
end
--[[
Numbers
Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp)
is captured as a number and its conformance to the JSON spec is checked.
--]]
-- deal with non-standard locales
local radixmark = match(tostring(0.5), '[^0-9]')
local fixedtonumber = tonumber
if radixmark ~= '.' then
if find(radixmark, '%W') then
radixmark = '%' .. radixmark
end
fixedtonumber = function(s)
return tonumber(gsub(s, '.', radixmark))
end
end
local function number_error()
return decode_error('invalid number')
end
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
local function f_zro(mns)
local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0
if num == '' then
if c == '' then
if mns then
return -0.0
end
return 0
end
if c == 'e' or c == 'E' then
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
if c == '' then
pos = pos + #num
if mns then
return -0.0
end
return 0.0
end
end
number_error()
end
if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then
number_error()
end
if c ~= '' then
if c == 'e' or c == 'E' then
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
end
if c ~= '' then
number_error()
end
end
pos = pos + #num
c = fixedtonumber(num)
if mns then
c = -c
end
return c
end
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
local function f_num(mns)
pos = pos-1
local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos)
if byte(num, -1) == 0x2E then -- error if ended with period
number_error()
end
if c ~= '' then
if c ~= 'e' and c ~= 'E' then
number_error()
end
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
if not num or c ~= '' then
number_error()
end
end
pos = pos + #num
c = fixedtonumber(num)
if mns then
c = -c
if c == mininteger and not find(num, '[^0-9]') then
c = mininteger
end
end
return c
end
-- skip minus sign
local function f_mns()
local c = byte(json, pos)
if c then
pos = pos+1
if c > 0x30 then
if c < 0x3A then
return f_num(true)
end
else
if c > 0x2F then
return f_zro(true)
end
end
end
decode_error('invalid number')
end
--[[
Strings
--]]
local f_str_hextbl = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
0x8, 0x9, inf, inf, inf, inf, inf, inf,
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
__index = function()
return inf
end
}
setmetatable(f_str_hextbl, f_str_hextbl)
local f_str_escapetbl = {
['"'] = '"',
['\\'] = '\\',
['/'] = '/',
['b'] = '\b',
['f'] = '\f',
['n'] = '\n',
['r'] = '\r',
['t'] = '\t',
__index = function()
decode_error("invalid escape sequence")
end
}
setmetatable(f_str_escapetbl, f_str_escapetbl)
local function surrogate_first_error()
return decode_error("1st surrogate pair byte not continued by 2nd")
end
local f_str_surrogate_prev = 0
local function f_str_subst(ch, ucode)
if ch == 'u' then
local c1, c2, c3, c4, rest = byte(ucode, 1, 5)
ucode = f_str_hextbl[c1-47] * 0x1000 +
f_str_hextbl[c2-47] * 0x100 +
f_str_hextbl[c3-47] * 0x10 +
f_str_hextbl[c4-47]
if ucode ~= inf then
if ucode < 0x80 then -- 1byte
if rest then
return char(ucode, rest)
end
return char(ucode)
elseif ucode < 0x800 then -- 2bytes
c1 = floor(ucode / 0x40)
c2 = ucode - c1 * 0x40
c1 = c1 + 0xC0
c2 = c2 + 0x80
if rest then
return char(c1, c2, rest)
end
return char(c1, c2)
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes
c1 = floor(ucode / 0x1000)
ucode = ucode - c1 * 0x1000
c2 = floor(ucode / 0x40)
c3 = ucode - c2 * 0x40
c1 = c1 + 0xE0
c2 = c2 + 0x80
c3 = c3 + 0x80
if rest then
return char(c1, c2, c3, rest)
end
return char(c1, c2, c3)
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
if f_str_surrogate_prev == 0 then
f_str_surrogate_prev = ucode
if not rest then
return ''
end
surrogate_first_error()
end
f_str_surrogate_prev = 0
surrogate_first_error()
else -- surrogate pair 2nd
if f_str_surrogate_prev ~= 0 then
ucode = 0x10000 +
(f_str_surrogate_prev - 0xD800) * 0x400 +
(ucode - 0xDC00)
f_str_surrogate_prev = 0
c1 = floor(ucode / 0x40000)
ucode = ucode - c1 * 0x40000
c2 = floor(ucode / 0x1000)
ucode = ucode - c2 * 0x1000
c3 = floor(ucode / 0x40)
c4 = ucode - c3 * 0x40
c1 = c1 + 0xF0
c2 = c2 + 0x80
c3 = c3 + 0x80
c4 = c4 + 0x80
if rest then
return char(c1, c2, c3, c4, rest)
end
return char(c1, c2, c3, c4)
end
decode_error("2nd surrogate pair byte appeared without 1st")
end
end
decode_error("invalid unicode codepoint literal")
end
if f_str_surrogate_prev ~= 0 then
f_str_surrogate_prev = 0
surrogate_first_error()
end
return f_str_escapetbl[ch] .. ucode
end
-- caching interpreted keys for speed
local f_str_keycache = setmetatable({}, {__mode="v"})
local function f_str(iskey)
local newpos = pos
local tmppos, c1, c2
repeat
newpos = find(json, '"', newpos, true) -- search '"'
if not newpos then
decode_error("unterminated string")
end
tmppos = newpos-1
newpos = newpos+1
c1, c2 = byte(json, tmppos-1, tmppos)
if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s
repeat
tmppos = tmppos-2
c1, c2 = byte(json, tmppos-1, tmppos)
until c2 ~= 0x5C or c1 ~= 0x5C
tmppos = newpos-2
end
until c2 ~= 0x5C -- leave if '"' is not preceded by '\'
local str = sub(json, pos, tmppos)
pos = newpos
if iskey then -- check key cache
tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val
if tmppos then
return tmppos
end
tmppos = str
end
if find(str, f_str_ctrl_pat) then
decode_error("unescaped control string")
end
if find(str, '\\', 1, true) then -- check whether a backslash exists
-- We need to grab 4 characters after the escape char,
-- for encoding unicode codepoint to UTF-8.
-- As we need to ensure that every first surrogate pair byte is
-- immediately followed by second one, we grab upto 5 characters and
-- check the last for this purpose.
str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst)
if f_str_surrogate_prev ~= 0 then
f_str_surrogate_prev = 0
decode_error("1st surrogate pair byte not continued by 2nd")
end
end
if iskey then -- commit key cache
f_str_keycache[tmppos] = str
end
return str
end
--[[
Arrays, Objects
--]]
-- array
local function f_ary()
rec_depth = rec_depth + 1
if rec_depth > 1000 then
decode_error('too deeply nested json (> 1000)')
end
local ary = {}
pos = match(json, '^[ \n\r\t]*()', pos)
local i = 0
if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty
pos = pos+1
else
local newpos = pos
repeat
i = i+1
f = dispatcher[byte(json,newpos)] -- parse value
pos = newpos+1
ary[i] = f()
newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma
until not newpos
newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket
if not newpos then
decode_error("no closing bracket of an array")
end
pos = newpos
end
if arraylen then -- commit the length of the array if `arraylen` is set
ary[0] = i
end
rec_depth = rec_depth - 1
return ary
end
-- objects
local function f_obj()
rec_depth = rec_depth + 1
if rec_depth > 1000 then
decode_error('too deeply nested json (> 1000)')
end
local obj = {}
pos = match(json, '^[ \n\r\t]*()', pos)
if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty
pos = pos+1
else
local newpos = pos
repeat
if byte(json, newpos) ~= 0x22 then -- check '"'
decode_error("not key")
end
pos = newpos+1
local key = f_str(true) -- parse key
-- optimized for compact json
-- c1, c2 == ':', <the first char of the value> or
-- c1, c2, c3 == ':', ' ', <the first char of the value>
f = f_err
local c1, c2, c3 = byte(json, pos, pos+3)
if c1 == 0x3A then
if c2 ~= 0x20 then
f = dispatcher[c2]
newpos = pos+2
else
f = dispatcher[c3]
newpos = pos+3
end
end
if f == f_err then -- read a colon and arbitrary number of spaces
newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos)
if not newpos then
decode_error("no colon after a key")
end
f = dispatcher[byte(json, newpos)]
newpos = newpos+1
end
pos = newpos
obj[key] = f() -- parse value
newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos)
until not newpos
newpos = match(json, '^[ \n\r\t]*}()', pos)
if not newpos then
decode_error("no closing bracket of an object")
end
pos = newpos
end
rec_depth = rec_depth - 1
return obj
end
--[[
The jump table to dispatch a parser for a value,
indexed by the code of the value's first char.
Nil key means the end of json.
--]]
dispatcher = { [0] =
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err,
f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num,
f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err,
f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err,
f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err,
__index = function()
decode_error("unexpected termination")
end
}
setmetatable(dispatcher, dispatcher)
--[[
run decoder
--]]
local function decode(json_, pos_, nullv_, arraylen_)
json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_
rec_depth = 0
pos = match(json, '^[ \n\r\t]*()', pos)
f = dispatcher[byte(json, pos)]
pos = pos+1
local v = f()
if pos_ then
return v, pos
else
f, pos = find(json, '^[ \n\r\t]*', pos)
if pos ~= #json then
decode_error('json ended')
end
return v
end
end
return decode
end
return newdecoder
end
end
do
local _ENV = _ENV
package.preload[ "lunajson.encoder" ] = function( ... ) local arg = _G.arg;
local error = error
local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match
local concat = table.concat
local tostring = tostring
local pairs, type = pairs, type
local setmetatable = setmetatable
local huge, tiny = 1/0, -1/0
local f_string_esc_pat
if _VERSION == "Lua 5.1" then
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
f_string_esc_pat = '[^ -!#-[%]^-\255]'
else
f_string_esc_pat = '[\0-\31"\\]'
end
local _ENV = nil
local function newencoder()
local v, nullv
local i, builder, visited
local function f_tostring(v)
builder[i] = tostring(v)
i = i+1
end
local radixmark = match(tostring(0.5), '[^0-9]')
local delimmark = match(tostring(12345.12345), '[^0-9' .. radixmark .. ']')
if radixmark == '.' then
radixmark = nil
end
local radixordelim
if radixmark or delimmark then
radixordelim = true
if radixmark and find(radixmark, '%W') then
radixmark = '%' .. radixmark
end
if delimmark and find(delimmark, '%W') then
delimmark = '%' .. delimmark
end
end
local f_number = function(n)
if tiny < n and n < huge then
local s = format("%.17g", n)
if radixordelim then
if delimmark then
s = gsub(s, delimmark, '')
end
if radixmark then
s = gsub(s, radixmark, '.')
end
end
builder[i] = s
i = i+1
return
end
error('invalid number')
end
local doencode
local f_string_subst = {
['"'] = '\\"',
['\\'] = '\\\\',
['\b'] = '\\b',
['\f'] = '\\f',
['\n'] = '\\n',
['\r'] = '\\r',
['\t'] = '\\t',
__index = function(_, c)
return format('\\u00%02X', byte(c))
end
}
setmetatable(f_string_subst, f_string_subst)
local function f_string(s)
builder[i] = '"'
if find(s, f_string_esc_pat) then
s = gsub(s, f_string_esc_pat, f_string_subst)
end
builder[i+1] = s
builder[i+2] = '"'
i = i+3
end
local function f_table(o)
if visited[o] then
error("loop detected")
end
visited[o] = true
local tmp = o[0]
if type(tmp) == 'number' then -- arraylen available
builder[i] = '['
i = i+1
for j = 1, tmp do
doencode(o[j])
builder[i] = ','
i = i+1
end
if tmp > 0 then
i = i-1
end
builder[i] = ']'
else
tmp = o[1]
if tmp ~= nil then -- detected as array
builder[i] = '['
i = i+1
local j = 2
repeat
doencode(tmp)
tmp = o[j]
if tmp == nil then
break
end
j = j+1
builder[i] = ','
i = i+1
until false
builder[i] = ']'
else -- detected as object
builder[i] = '{'
i = i+1
local tmp = i
for k, v in pairs(o) do
if type(k) ~= 'string' then
error("non-string key")
end
f_string(k)
builder[i] = ':'
i = i+1
doencode(v)
builder[i] = ','
i = i+1
end
if i > tmp then
i = i-1
end
builder[i] = '}'
end
end
i = i+1
visited[o] = nil
end
local dispatcher = {
boolean = f_tostring,
number = f_number,
string = f_string,
table = f_table,
__index = function()
error("invalid type value")
end
}
setmetatable(dispatcher, dispatcher)
function doencode(v)
if v == nullv then
builder[i] = 'null'
i = i+1
return
end
return dispatcher[type(v)](v)
end
local function encode(v_, nullv_)
v, nullv = v_, nullv_
i, builder, visited = 1, {}, {}
doencode(v)
return concat(builder)
end
return encode
end
return newencoder
end
end
do
local _ENV = _ENV
package.preload[ "lunajson.sax" ] = function( ... ) local arg = _G.arg;
local setmetatable, tonumber, tostring =
setmetatable, tonumber, tostring
local floor, inf =
math.floor, math.huge
local mininteger, tointeger =
math.mininteger or nil, math.tointeger or nil
local byte, char, find, gsub, match, sub =
string.byte, string.char, string.find, string.gsub, string.match, string.sub
local function _parse_error(pos, errmsg)
error("parse error at " .. pos .. ": " .. errmsg, 2)
end
local f_str_ctrl_pat
if _VERSION == "Lua 5.1" then
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
f_str_ctrl_pat = '[^\32-\255]'
else
f_str_ctrl_pat = '[\0-\31]'
end
local type, unpack = type, table.unpack or unpack
local open = io.open
local _ENV = nil
local function nop() end
local function newparser(src, saxtbl)
local json, jsonnxt, rec_depth
local jsonlen, pos, acc = 0, 1, 0
-- `f` is the temporary for dispatcher[c] and
-- the dummy for the first return value of `find`
local dispatcher, f
-- initialize
if type(src) == 'string' then
json = src
jsonlen = #json
jsonnxt = function()
json = ''
jsonlen = 0
jsonnxt = nop
end
else
jsonnxt = function()
acc = acc + jsonlen
pos = 1
repeat
json = src()
if not json then
json = ''
jsonlen = 0
jsonnxt = nop
return
end
jsonlen = #json
until jsonlen > 0
end
jsonnxt()
end
local sax_startobject = saxtbl.startobject or nop
local sax_key = saxtbl.key or nop
local sax_endobject = saxtbl.endobject or nop
local sax_startarray = saxtbl.startarray or nop
local sax_endarray = saxtbl.endarray or nop
local sax_string = saxtbl.string or nop
local sax_number = saxtbl.number or nop
local sax_boolean = saxtbl.boolean or nop
local sax_null = saxtbl.null or nop
--[[
Helper
--]]
local function tryc()
local c = byte(json, pos)
if not c then
jsonnxt()
c = byte(json, pos)
end
return c
end
local function parse_error(errmsg)
return _parse_error(acc + pos, errmsg)
end
local function tellc()
return tryc() or parse_error("unexpected termination")
end
local function spaces() -- skip spaces and prepare the next char
while true do
pos = match(json, '^[ \n\r\t]*()', pos)
if pos <= jsonlen then
return
end
if jsonlen == 0 then
parse_error("unexpected termination")
end
jsonnxt()
end
end
--[[
Invalid
--]]
local function f_err()
parse_error('invalid value')
end
--[[
Constants
--]]
-- fallback slow constants parser
local function generic_constant(target, targetlen, ret, sax_f)
for i = 1, targetlen do
local c = tellc()
if byte(target, i) ~= c then
parse_error("invalid char")
end
pos = pos+1
end
return sax_f(ret)
end
-- null
local function f_nul()
if sub(json, pos, pos+2) == 'ull' then
pos = pos+3
return sax_null(nil)
end
return generic_constant('ull', 3, nil, sax_null)
end
-- false
local function f_fls()
if sub(json, pos, pos+3) == 'alse' then
pos = pos+4
return sax_boolean(false)
end
return generic_constant('alse', 4, false, sax_boolean)
end
-- true
local function f_tru()
if sub(json, pos, pos+2) == 'rue' then
pos = pos+3
return sax_boolean(true)
end
return generic_constant('rue', 3, true, sax_boolean)
end
--[[
Numbers
Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp)
is captured as a number and its conformance to the JSON spec is checked.
--]]
-- deal with non-standard locales
local radixmark = match(tostring(0.5), '[^0-9]')
local fixedtonumber = tonumber
if radixmark ~= '.' then
if find(radixmark, '%W') then
radixmark = '%' .. radixmark
end
fixedtonumber = function(s)
return tonumber(gsub(s, '.', radixmark))
end
end
local function number_error()
return parse_error('invalid number')
end
-- fallback slow parser
local function generic_number(mns)
local buf = {}
local i = 1
local is_int = true
local c = byte(json, pos)
pos = pos+1
local function nxt()
buf[i] = c
i = i+1
c = tryc()
pos = pos+1
end
if c == 0x30 then
nxt()
if c and 0x30 <= c and c < 0x3A then
number_error()
end
else
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
end
if c == 0x2E then
is_int = false
nxt()
if not (c and 0x30 <= c and c < 0x3A) then
number_error()
end
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
end
if c == 0x45 or c == 0x65 then
is_int = false
nxt()
if c == 0x2B or c == 0x2D then
nxt()
end
if not (c and 0x30 <= c and c < 0x3A) then
number_error()
end
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
end
if c and (0x41 <= c and c <= 0x5B or
0x61 <= c and c <= 0x7B or
c == 0x2B or c == 0x2D or c == 0x2E) then
number_error()
end
pos = pos-1
local num = char(unpack(buf))
num = fixedtonumber(num)
if mns then
num = -num
if num == mininteger and is_int then
num = mininteger
end
end
return sax_number(num)
end
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
local function f_zro(mns)
local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0
if num == '' then
if pos > jsonlen then
pos = pos - 1
return generic_number(mns)
end
if c == '' then
if mns then
return sax_number(-0.0)
end
return sax_number(0)
end
if c == 'e' or c == 'E' then
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
if c == '' then
pos = pos + #num
if pos > jsonlen then
pos = pos - #num - 1
return generic_number(mns)
end
if mns then
return sax_number(-0.0)
end
return sax_number(0.0)
end
end
pos = pos-1
return generic_number(mns)
end
if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then
pos = pos-1
return generic_number(mns)
end
if c ~= '' then
if c == 'e' or c == 'E' then
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
end
if c ~= '' then
pos = pos-1
return generic_number(mns)
end
end
pos = pos + #num
if pos > jsonlen then
pos = pos - #num - 1
return generic_number(mns)
end
c = fixedtonumber(num)
if mns then
c = -c
end
return sax_number(c)
end
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
local function f_num(mns)
pos = pos-1
local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos)
if byte(num, -1) == 0x2E then -- error if ended with period
return generic_number(mns)
end
if c ~= '' then
if c ~= 'e' and c ~= 'E' then
return generic_number(mns)
end
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
if not num or c ~= '' then
return generic_number(mns)
end
end
pos = pos + #num
if pos > jsonlen then
pos = pos - #num
return generic_number(mns)
end
c = fixedtonumber(num)
if mns then
c = -c
if c == mininteger and not find(num, '[^0-9]') then
c = mininteger
end
end
return sax_number(c)
end
-- skip minus sign
local function f_mns()
local c = byte(json, pos) or tellc()
if c then
pos = pos+1
if c > 0x30 then
if c < 0x3A then
return f_num(true)
end
else
if c > 0x2F then
return f_zro(true)
end
end
end
parse_error("invalid number")
end
--[[
Strings
--]]
local f_str_hextbl = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
0x8, 0x9, inf, inf, inf, inf, inf, inf,
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
__index = function()
return inf
end
}
setmetatable(f_str_hextbl, f_str_hextbl)
local f_str_escapetbl = {
['"'] = '"',
['\\'] = '\\',
['/'] = '/',
['b'] = '\b',
['f'] = '\f',
['n'] = '\n',
['r'] = '\r',
['t'] = '\t',
__index = function()
parse_error("invalid escape sequence")
end
}
setmetatable(f_str_escapetbl, f_str_escapetbl)
local function surrogate_first_error()
return parse_error("1st surrogate pair byte not continued by 2nd")
end
local f_str_surrogate_prev = 0
local function f_str_subst(ch, ucode)
if ch == 'u' then
local c1, c2, c3, c4, rest = byte(ucode, 1, 5)
ucode = f_str_hextbl[c1-47] * 0x1000 +
f_str_hextbl[c2-47] * 0x100 +
f_str_hextbl[c3-47] * 0x10 +
f_str_hextbl[c4-47]
if ucode ~= inf then
if ucode < 0x80 then -- 1byte
if rest then
return char(ucode, rest)
end
return char(ucode)
elseif ucode < 0x800 then -- 2bytes
c1 = floor(ucode / 0x40)
c2 = ucode - c1 * 0x40
c1 = c1 + 0xC0
c2 = c2 + 0x80
if rest then
return char(c1, c2, rest)
end
return char(c1, c2)
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes
c1 = floor(ucode / 0x1000)
ucode = ucode - c1 * 0x1000
c2 = floor(ucode / 0x40)
c3 = ucode - c2 * 0x40
c1 = c1 + 0xE0
c2 = c2 + 0x80
c3 = c3 + 0x80
if rest then
return char(c1, c2, c3, rest)
end
return char(c1, c2, c3)
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
if f_str_surrogate_prev == 0 then
f_str_surrogate_prev = ucode
if not rest then
return ''
end
surrogate_first_error()
end
f_str_surrogate_prev = 0
surrogate_first_error()
else -- surrogate pair 2nd
if f_str_surrogate_prev ~= 0 then
ucode = 0x10000 +
(f_str_surrogate_prev - 0xD800) * 0x400 +
(ucode - 0xDC00)
f_str_surrogate_prev = 0
c1 = floor(ucode / 0x40000)
ucode = ucode - c1 * 0x40000
c2 = floor(ucode / 0x1000)
ucode = ucode - c2 * 0x1000
c3 = floor(ucode / 0x40)
c4 = ucode - c3 * 0x40
c1 = c1 + 0xF0
c2 = c2 + 0x80
c3 = c3 + 0x80
c4 = c4 + 0x80
if rest then
return char(c1, c2, c3, c4, rest)
end
return char(c1, c2, c3, c4)
end
parse_error("2nd surrogate pair byte appeared without 1st")
end
end
parse_error("invalid unicode codepoint literal")
end
if f_str_surrogate_prev ~= 0 then
f_str_surrogate_prev = 0
surrogate_first_error()
end
return f_str_escapetbl[ch] .. ucode
end
local function f_str(iskey)
local pos2 = pos
local newpos
local str = ''
local bs
while true do
while true do -- search '\' or '"'
newpos = find(json, '[\\"]', pos2)
if newpos then
break
end
str = str .. sub(json, pos, jsonlen)
if pos2 == jsonlen+2 then
pos2 = 2
else
pos2 = 1
end
jsonnxt()
if jsonlen == 0 then
parse_error("unterminated string")
end
end
if byte(json, newpos) == 0x22 then -- break if '"'
break
end
pos2 = newpos+2 -- skip '\<char>'
bs = true -- mark the existence of a backslash
end
str = str .. sub(json, pos, newpos-1)
pos = newpos+1
if find(str, f_str_ctrl_pat) then
parse_error("unescaped control string")
end
if bs then -- a backslash exists
-- We need to grab 4 characters after the escape char,
-- for encoding unicode codepoint to UTF-8.
-- As we need to ensure that every first surrogate pair byte is
-- immediately followed by second one, we grab upto 5 characters and
-- check the last for this purpose.
str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst)
if f_str_surrogate_prev ~= 0 then
f_str_surrogate_prev = 0
parse_error("1st surrogate pair byte not continued by 2nd")
end
end
if iskey then
return sax_key(str)
end
return sax_string(str)
end
--[[
Arrays, Objects
--]]
-- arrays
local function f_ary()
rec_depth = rec_depth + 1
if rec_depth > 1000 then
parse_error('too deeply nested json (> 1000)')
end
sax_startarray()
spaces()
if byte(json, pos) ~= 0x5D then -- check closing bracket ']' which means the array empty
local newpos
while true do
f = dispatcher[byte(json, pos)] -- parse value
pos = pos+1
f()
newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma
if newpos then
pos = newpos
else
newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket
if newpos then
pos = newpos
break
end
spaces() -- since the current chunk can be ended, skip spaces toward following chunks
local c = byte(json, pos)
pos = pos+1
if c == 0x2C then -- check comma again
spaces()
elseif c == 0x5D then -- check closing bracket again
break
else
parse_error("no closing bracket of an array")
end
end
if pos > jsonlen then
spaces()
end
end
end
rec_depth = rec_depth - 1
return sax_endarray()
end
-- objects
local function f_obj()
rec_depth = rec_depth + 1
if rec_depth > 1000 then
parse_error('too deeply nested json (> 1000)')
end
sax_startobject()
spaces()
if byte(json, pos) ~= 0x7D then -- check closing bracket '}' which means the object empty
local newpos
while true do
if byte(json, pos) ~= 0x22 then
parse_error("not key")
end
pos = pos+1
f_str(true) -- parse key
newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) -- check colon
if newpos then
pos = newpos
else
spaces() -- read spaces through chunks
if byte(json, pos) ~= 0x3A then -- check colon again
parse_error("no colon after a key")
end
pos = pos+1
spaces()
end
if pos > jsonlen then
spaces()
end
f = dispatcher[byte(json, pos)]
pos = pos+1
f() -- parse value
newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma
if newpos then
pos = newpos
else
newpos = match(json, '^[ \n\r\t]*}()', pos) -- check closing bracket
if newpos then
pos = newpos
break
end
spaces() -- read spaces through chunks
local c = byte(json, pos)
pos = pos+1
if c == 0x2C then -- check comma again
spaces()
elseif c == 0x7D then -- check closing bracket again
break
else
parse_error("no closing bracket of an object")
end
end
if pos > jsonlen then
spaces()
end
end
end
rec_depth = rec_depth - 1
return sax_endobject()
end
--[[
The jump table to dispatch a parser for a value,
indexed by the code of the value's first char.
Key should be non-nil.
--]]
dispatcher = { [0] =
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err,
f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num,
f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err,
f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err,
f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err,
}
--[[
public funcitons
--]]
local function run()
rec_depth = 0
spaces()
f = dispatcher[byte(json, pos)]
pos = pos+1
f()
end
local function read(n)
if n < 0 then
error("the argument must be non-negative")
end
local pos2 = (pos-1) + n
local str = sub(json, pos, pos2)
while pos2 > jsonlen and jsonlen ~= 0 do
jsonnxt()
pos2 = pos2 - (jsonlen - (pos-1))
str = str .. sub(json, pos, pos2)
end
if jsonlen ~= 0 then
pos = pos2+1
end
return str
end
local function tellpos()
return acc + pos
end
return {
run = run,
tryc = tryc,
read = read,
tellpos = tellpos,
}
end
local function newfileparser(fn, saxtbl)
local fp = open(fn)
local function gen()
local s
if fp then
s = fp:read(8192)
if not s then
fp:close()
fp = nil
end
end
return s
end
return newparser(gen, saxtbl)
end
return {
newparser = newparser,
newfileparser = newfileparser
}
end
end
end
--
-- bbt-to-live-doc
--
-- Copyright (c) 2020 Emiliano Heyns
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy of
-- this software and associated documentation files (the "Software"), to deal in
-- the Software without restriction, including without limitation the rights to
-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-- of the Software, and to permit persons to whom the Software is furnished to do
-- so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included in all
-- copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- SOFTWARE.
--
-- local pl = require('pl.pretty') -- for pl.pretty.dump
local json = require('lunajson')
local csl_locator = require('locator')
local function collect(tbl)
if not tbl then return nil end
local t = ''
for k, v in pairs(tbl) do
if v.t == 'Str' then
t = t .. v.text
elseif v.t == 'Space' then
t = t .. ' '
else
error('cannot collect ' .. v.t, 1)
end
end
if t == '' then
return nil
else
return t
end
end
function deepcopy(orig)
local orig_type = type(orig)
local copy
if orig_type == 'table' then
copy = {}
for orig_key, orig_value in next, orig, nil do
copy[deepcopy(orig_key)] = deepcopy(orig_value)
end
setmetatable(copy, deepcopy(getmetatable(orig)))
else -- number, string, boolean, etc
copy = orig
end
return copy
end
local zotero = {
bibliography = 'http://127.0.0.1:23119/better-bibtex/library?/1/library',
scannable_cite = false
}
function Meta(meta)
if meta.zotero_bibliography then
meta.zotero.bibliography = meta.zotero_bibliography
elseif meta.zotero and meta.zotero.bibliography then
zotero.bibliography = collect(meta.zotero.bibliography)
end
if not string.match(zotero.bibliography, '^http://127%.0%.0%.1:2[34]119/better%-bibtex/') and not string.match(zotero.bibliography, '^http://localhost:2[34]119/better%-bibtex/') then
error(zotero.bibliography .. ' does not look like a Zotero bibliography url', 1)
end
if zotero.bibliography:find('%.j.on$') then
zotero.bibliography = zotero.bibliography:sub(1, -6)
end
if type(meta.zotero_scannable_cite) == 'string' then
if meta.zotero_scannable_cite == 'true' then
zotero.scannable_cite = true
elseif meta.zotero_scannable_cite == 'false' then
zotero.scannable_cite = false
else
error('scannable-cite expects true or false, got ' .. meta.zotero_scannable_cite, 1)
end
elseif type(meta.zotero_scannable_cite) == 'boolean' then
zotero.scannable_cite = meta.zotero_scannable_cite
elseif meta.zotero and type(meta.zotero['scannable-cite']) ~= 'nil' then
if type(meta.zotero['scannable-cite']) ~= 'boolean' then
error('scannable-cite expects a boolean', 1)
end
zotero.scannable_cite = meta.zotero['scannable-cite']
end
if string.match(FORMAT, 'docx') then
zotero.format = 'docx'
elseif string.match(FORMAT, 'odt') and zotero.scannable_cite then
zotero.format = 'scannable-cite'
csl_locator.short_labels()
elseif string.match(FORMAT, 'odt') then
zotero.format = 'odt'
end
end
function Inlines(inlines)
if not zotero.format then return inlines end
for k, v in pairs(inlines) do
if v.t == 'Cite' then
if zotero.format == 'scannable-cite' then
inlines[k] = scannable_cite(v)
else
inlines[k] = zotero_ref(v)
end
end
end
return inlines
end
math.randomseed(os.clock()^5)
function cite_id(length)
local id = ''
for i = 1, length do
id = id .. string.char(math.random(97, 122))
end
return id
end
function xmlescape(str)
return string.gsub(str, '["<>&]', { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' })
end
function trim(s)
return (s:gsub("^%s*(.-)%s*$", "%1"))
end
local state = {
reported = {}
}
function zotero_ref(cite)
if not state.bib then
local mt, contents = pandoc.mediabag.fetch(zotero.bibliography .. '.json&pandocFilterData=true', '.')
state.bib = json.decode(contents)
end
local csl = {
citationID = cite_id(8),
properties = {
formattedCitation = collect(cite.content),
plainCitation = collect(cite.content),
noteIndex = 0
},
citationItems = {},
schema = "https://github.com/citation-style-language/schema/raw/master/csl-citation.json"
}
for k, item in pairs(cite.citations) do
if item.mode == 'AuthorInText' then -- not supported in Zotero
return cite
end
if not state.bib[item.id] then
if not state.reported[item.id] then print('@' .. item.id .. ' not found in Zotero') end
state.reported[item.id] = true
return cite
end
local itemData = deepcopy(state.bib[item.id].item)
if item.mode == 'SuppressAuthor' then
itemData['suppress-author'] = true
end
itemData.prefix = collect(item.prefix)
local label, locator, suffix = csl_locator.parse(collect(item.suffix))
itemData.suffix = suffix
itemData.label = label
itemData.locator = locator
if state.bib[item.id].duplicate then
if not state.reported[item.id] then print(item.id .. ' appears more than once in the library') end
state.reported[item.id] = true
end
table.insert(csl.citationItems, {
id = state.bib[item.id].zotero.itemID,
uris = { state.bib[item.id].zotero.uri },
uri = { state.bib[item.id].zotero.uri },
itemData = itemData
})
end
if zotero.format == 'docx' then
local field = '<w:r><w:fldChar w:fldCharType="begin"/></w:r><w:r><w:instrText xml:space="preserve">'
field = field .. ' ADDIN ZOTERO_ITEM CSL_CITATION ' .. xmlescape(json.encode(csl)) .. ' '
field = field .. '</w:instrText></w:r><w:r><w:fldChar w:fldCharType="separate"/></w:r><w:r><w:rPr><w:noProof/></w:rPr><w:t>'
field = field .. xmlescape('<open Zotero document preferences: ' .. collect(cite.content) .. '>')
field = field .. '</w:t></w:r><w:r><w:fldChar w:fldCharType="end"/></w:r>'
return pandoc.RawInline('openxml', field)
else
csl = 'ZOTERO_ITEM CSL_CITATION ' .. xmlescape(json.encode(csl)) .. ' RND' .. cite_id(10)
local field = '<text:reference-mark-start text:name="' .. csl .. '"/>'
field = field .. xmlescape('<open Zotero document preferences: ' .. collect(cite.content) .. '>')
field = field .. '<text:reference-mark-end text:name="' .. csl .. '"/>'
return pandoc.RawInline('opendocument', field)
end
end
function scannable_cite(cite)
if not state.uris then
state.uris = {}
local mt, contents = pandoc.mediabag.fetch(zotero.bibliography .. '.jzon', '.')
for k, item in pairs(json.decode(contents).items) do
state.uris[item.citationKey] = item.uri
end
end
local citation = ''
for k, item in pairs(cite.citations) do
local uri = state.uris[item.id]
if not uri then
if not state.reported[item.id] then print('@' .. item.id .. ' not found in Zotero') end
state.reported[item.id] = true
return cite
end
local suppress = (item.mode == 'SuppressAuthor' and '-' or '')
local s, e, ug, id, key
s, e, key = string.find(uri, 'http://zotero.org/users/local/%w+/items/(%w+)')
if key then
ug = 'users'
id = '0'
else
s, e, ug, id, key = string.find(uri, 'http://zotero.org/(%w+)/(%w+)/items/(%w+)')
end
local label, locator, suffix = csl_locator.parse(collect(item.suffix))
if locator then
locator = (label or 'p.') .. ' ' .. locator
else
locator = ''
end
citation = citation ..
'{ ' .. (collect(item.prefix) or '') ..
' | ' .. suppress .. trim(string.gsub(collect(cite.content) or '', '[|{}]', '')) ..
' | ' .. locator ..
' | ' .. (suffix or '') ..
' | ' .. (ug == 'groups' and 'zg:' or 'zu:') .. id .. ':' .. key .. ' }'
end
return pandoc.Str(citation)
end
return {
{ Meta = Meta },
{ Inlines = Inlines }
}
@alexflaris
Copy link

alexflaris commented Aug 5, 2021

Hi there,

I am trying to use this filter (zotero.lua) for a paper that I am writing in R makrdown and knitting it for docx. I found this page (https://retorque.re/zotero-better-bibtex/exporting/pandoc/), but unfortunately I cannot get live citations (i.e. citation numbers that I can edit with Zotero inside my docx) when I convert my Rmd file to docx.

My YAML header looks like this:

---
title: "How to get live citations in papers"
output:
  bookdown::word_document2:
    reference_docx: ./my_reference_style.docx
    pandoc_args: ["--lua-filter=./zotero.lua"]
zotero:
  client: zotero
  csl-style: jama
bibliography: ./my_bibliography.bib
---

The docx has citations that are not per the csl-style I chose (so not "jama") and they are not editable with Zotero. After I open the Document Preferences and click ok, then I click on Refresh and I get an error from Zotero: "You must insert a citation before performing this operation".

Any ideas on how to solve this problem? I have RStudio 1.3.1073 and Rv4.0.5.

The output I get from the R markdown console is:

C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS paper.knit.md --to docx --from markdown+autolink_bare_uris+tex_math_single_backslash --output paper.docx --lua-filter "C:\Users\XRHSTO\DOCUME\R\WIN-LI\4.0\bookdown\RMARKD\lua\CUSTOM.LUA" --lua-filter "C:\Users\XRHSTO\DOCUME\R\WIN-LI\4.0\RMARKD\RMARKD\lua\PAGEBR.LUA" --lua-filter "C:\Users\XRHSTO\DOCUME\R\WIN-LI\4.0\RMARKD\RMARKD\lua\NUMBER.LUA" --highlight-style tango --reference-doc my_reference_style.docx --lua-filter=./zotero.lua --filter "C:/Program Files/RStudio/bin/pandoc/pandoc-citeproc.exe" 

Thank you,
Alex

@retorquere
Copy link
Author

retorquere commented Aug 7, 2021

The docx has citations that are not per the csl-style I chose (so not "jama")

The csl-style property only does something for ODT files, but it isn't a problem that it's set here.

and they are not editable with Zotero. After I open the Document Preferences and click ok, then I click on Refresh and I get an error from Zotero: "You must insert a citation before performing this operation".

I'd have to have a copy of (part of) the document to say. Since the command runs without error there's nothing for me to go on; from here it looks like you did everything right, but if the lua filter can't find the keys, it will just silently skip over them.

@retorquere
Copy link
Author

retorquere commented Aug 7, 2021

No wait, that isn't right, missing keys would be reported. I'd really need to see that document.

@alexflaris
Copy link

alexflaris commented Aug 8, 2021

Thank you for your prompt replies.

I experimented around and I believe the following is happening.

If my YAML header is as above, i.e.:

---
title: "How to get live citations in papers"
output:
  bookdown::word_document2:
    reference_docx: ./my_reference_style.docx
    pandoc_args: ["--lua-filter=./zotero.lua"]
zotero:
  client: zotero
  csl-style: jama
bibliography: ./my_bibliography.bib
---

The reason why there are no missing keys is because I have inputed the line bibliography: ./my_bibliography.bib at the end. If I remove that line, as I did in post #1882 and insert the following YAML header:

---
title: "How to create live citations from Rmarkdown"
output:
  bookdown::word_document2:
    reference_docx: ./my_document_style.docx
    pandoc_args: ["--lua-filter=./zotero.lua"]
zotero:
  library: "My Library" #my references are inside a subcollection inside My Library
  scannable-cite: false # only relevant when you're compiling to scannable-cite .odt
  client: zotero
  author-in-text: false # when true, enabled fake author-name-only cites by replacing it with the text of the last names of the authors
  csl-style: jama
---

without the bibliography: statement, then I do get missing keys for all of the citations from pandoc though, not the zotero.lua filter (I saw that there is a string that is supposed to be printed: print('@' .. item.id .. ' not found in Zotero')) .

I also tried another test. I checked your code and - if I understood correctly - if the scannable-cite field is not true/false, then an error message should be displayed.

I set scannable-cite to "whatever" and no error appeared.

So I do not know if the problem is in the way Rstudio loads the lua filters. I know it detects zotero.lua because it shows up on the Rmarkdown console as being part of the pandoc command options:

"C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS Experiment_with_references.knit.md --to docx --from markdown+autolink_bare_uris+tex_math_single_backslash --output Experiment_with_references.docx --lua-filter "C:\Users\XRHSTO~1\DOCUME~1\R\WIN-LI~1\4.0\bookdown\RMARKD~1\lua\CUSTOM~1.LUA" --lua-filter "C:\Users\XRHSTO~1\DOCUME~1\R\WIN-LI~1\4.0\RMARKD~1\RMARKD~1\lua\PAGEBR~1.LUA" --lua-filter "C:\Users\XRHSTO~1\DOCUME~1\R\WIN-LI~1\4.0\RMARKD~1\RMARKD~1\lua\NUMBER~1.LUA" --highlight-style tango --reference-doc RmarkdownDOCX_style_ANF_changed.docx --lua-filter=./zotero.lua

More importantly, I have searched quite a bit and I have not found a single example .Rmd using zotero.lua for .docx (i.e. I have not found a working YAML header implemented in an .Rmd that is posted). Are there any examples and information on its setup that you know of?

Either way, I am sending you my .Rmd and the .docx that goes with it, so you can take a look.

There are two experiments: ExampleRmdWithoutBibliography, which does not include the bibliography: line in the YAML header and ExampleRmdWithBibliography which does include bibliography:.

You can find all the files at my repository:

https://github.com/alexflaris/files_for_zotero_lua_example.git

Thanks again,
Alex

@retorquere
Copy link
Author

retorquere commented Aug 8, 2021

That would indicate that the filter is not being ran at all. What happens if you set the full path to the zotero.lua filter instead of ./zotero.lua? I don't know what the current directory is during that run.

That repo is missing a few files from the pandoc call, but if I strip those out, I do get the prints that keys are missing.

@alexflaris
Copy link

alexflaris commented Aug 10, 2021

I tried to specify the full path and it still doesn't work. I know RStudio locates it, because when I put a false path on purpose, I get the following error:

openBinaryFile: does not exist (No such file or directory)
Error: pandoc document conversion failed with error 83

So Rstudio finds the filter but does not apply it.

I downloaded pandoc separately and ran it from Windows cmd as such:

pandoc -o out.docx ExampleRmdWithoutBibliography.Rmd --lua-filter=./zotero.lua

At that point, I did get two errors from the filter:

@arezzoRoleStentsManagement2017 not found in Zotero
@calcagnoGlmultiPackageEasy2010 not found in Zotero

It seems that when I cite more than one articles (e.g. : [@arezzoRoleStentsManagement2017; @bagnallSystematicReviewEnhanced2014; @baucomAssociationPerioperativeHypothermia2015; @bretagnolRectalCancerSurgery2010]), it just picks up the first reference.

So I believe there might be a few problems:

  1. an error with how Rstudio interacts with builtin pandoc
  2. zotero.lua just picking up the first of a list of references and not the remaining ones.
  3. zotero.lua not communicating with my Zotero library despite it being open and running

When you ran the file, did it show that all keys were missing? Or just those two?

@retorquere
Copy link
Author

retorquere commented Aug 10, 2021

It just reports the first missing key per group, correct; when it finds one that doesn't exist, it leaves the entire citation to pandoc. I will change that, but that isn't the problem you are experiencing. There are two citation blocks, you currently get one message for each. So if you see "not found in Zotero", that means the filter is running, and deciding that those keys are not in your library.

If there was an error communicating with Zotero, you would instead see

Error running filter ./zotero.lua:
PandocHttpError "http://127.0.0.1:23119/better-bibtex/library?/1/library.json&pandocFilterData=true" (HttpExceptionRequest Request {
  host                 = "127.0.0.1"
  port                 = 23119
  secure               = False
  requestHeaders       = []
  path                 = "/better-bibtex/library"
  queryString          = "?/1/library.json&pandocFilterData=true"
  method               = "GET"
  proxy                = Nothing
  rawBody              = False
  redirectCount        = 10
  responseTimeout      = ResponseTimeoutDefault
  requestVersion       = HTTP/1.1
  proxySecureMode      = ProxySecureWithConnect
}
 (ConnectionFailure Network.Socket.connect: <socket: 13>: does not exist (Connection refused)))

but that's easy enough to verify by running the script with Zotero not running.

Anyhow, while 1. is possible, 2. and 3. are not the problem.

You're on an old version of zotero.lua BTW. The current version is a266bf0e6 (see the first line of the script)

edit: wait -- the communication between the filter and Zotero changed since the filter version you have. Upgrading will likely just fix it.

@alexflaris
Copy link

alexflaris commented Aug 10, 2021

You are right; I closed zotero and the error message you showed in the previous post, popped-up. However, I still can't find the keys, despite the citations being in my Zotero library - I guess it's the old filter problem.

Also by updating my RStudio to the latest version, I was able to have zotero.lua loaded. Still can't find the citekeys so I need to get the latest version of the filter.

How do I get the latest version? And where do you find the a266bf0e6 stamp?

@retorquere
Copy link
Author

retorquere commented Aug 10, 2021

Just download again from https://retorque.re/zotero-better-bibtex/exporting/pandoc/. The stamp is at the top of the file.

@alexflaris
Copy link

alexflaris commented Aug 10, 2021

It worked! Thank you again a lot.

Last thing: is there a way to automatically add the bibliography at the end (by, for example, passing the line <div id="refs"></div> in Rmarkdown), or do I need to add bibliography manually i.e. by doing it from inside the .docx file?

@retorquere
Copy link
Author

retorquere commented Aug 10, 2021

Technically possible to add it automatically, yes, but pandoc doesn't have a well-defined marker for it. You could make a separate filter for it.

@alexflaris
Copy link

alexflaris commented Aug 10, 2021

Maybe in the future - but at this point my technical skills are not at that level.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment