Skip to content

Instantly share code, notes, and snippets.

@poplarShift
Created November 4, 2023 11:55
Show Gist options
  • Save poplarShift/3629078ba614f664e524ef8ee4907d18 to your computer and use it in GitHub Desktop.
Save poplarShift/3629078ba614f664e524ef8ee4907d18 to your computer and use it in GitHub Desktop.
Parse markdown-style citations in .docx using pandocfilters
#!/usr/bin/env python
"""
Pandoc filter to convert text of form [@foo,@bar] to citations.
Handy for e.g. word documents...
"""
import re
from pandocfilters import toJSONFilter, Str
reg = re.compile("\w+")
def Cite(
citationIds,
citationPrefix=[],
citationSuffix=[],
citationMode="NormalCitation",
citationNoteNum=1,
citationHash=0,
):
return {
"t": "Cite",
"c": [
[
{
"citationId": citationId,
"citationPrefix": citationPrefix,
"citationSuffix": citationSuffix,
"citationMode": {
"t": citationMode,
},
"citationNoteNum": citationNoteNum,
"citationHash": citationHash,
}
for citationId in citationIds
],
[{"t": "Str", "c": citationId} for citationId in citationIds],
],
}
def mkcite(key, value, format, meta):
if key == "Str":
# match stuff of the form [@foo,@bar].
match = re.match("(\[\@[\@\w+,]+\])([\.\w]*)", value)
if match:
citekeys_raw, remainder = match.groups()
citekeys = reg.findall(citekeys_raw)
cites = Cite(citekeys)
return [cites, Str(remainder)]
if __name__ == "__main__":
toJSONFilter(mkcite)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment