Skip to content

Instantly share code, notes, and snippets.

@zengxs
Created May 25, 2020 12:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zengxs/172f202adb532c6aadfb4bba6079bd73 to your computer and use it in GitHub Desktop.
Save zengxs/172f202adb532c6aadfb4bba6079bd73 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
###
# START pandocfilters.py
###
# Author: John MacFarlane <jgm@berkeley.edu>
# Copyright: (C) 2013 John MacFarlane
# License: BSD3
"""
Functions to aid writing python scripts that process the pandoc
AST serialized as JSON.
"""
import codecs
import hashlib
import io
import json
import os
import sys
# some utility-functions: make it easier to create your own filters
def get_filename4code(module, content, ext=None):
"""Generate filename based on content
The function ensures that the (temporary) directory exists, so that the
file can be written.
Example:
filename = get_filename4code("myfilter", code)
"""
imagedir = module + "-images"
fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
try:
os.mkdir(imagedir)
sys.stderr.write('Created directory ' + imagedir + '\n')
except OSError:
pass
if ext:
fn += "." + ext
return os.path.join(imagedir, fn)
def get_value(kv, key, value = None):
"""get value from the keyvalues (options)"""
res = []
for k, v in kv:
if k == key:
value = v
else:
res.append([k, v])
return value, res
def get_caption(kv):
"""get caption from the keyvalues (options)
Example:
if key == 'CodeBlock':
[[ident, classes, keyvals], code] = value
caption, typef, keyvals = get_caption(keyvals)
...
return Para([Image([ident, [], keyvals], caption, [filename, typef])])
"""
caption = []
typef = ""
value, res = get_value(kv, u"caption")
if value is not None:
caption = [Str(value)]
typef = "fig:"
return caption, typef, res
def get_extension(format, default, **alternates):
"""get the extension for the result, needs a default and some specialisations
Example:
filetype = get_extension(format, "png", html="svg", latex="eps")
"""
try:
return alternates[format]
except KeyError:
return default
# end of utilities
def walk(x, action, format, meta):
"""Walk a tree, applying an action to every object.
Returns a modified tree. An action is a function of the form
`action(key, value, format, meta)`, where:
* `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
* the contents of the object (e.g. a string for 'Str', a list of
inline elements for 'Para')
* `format` is the target output format (as supplied by the
`format` argument of `walk`)
* `meta` is the document's metadata
The return of an action is either:
* `None`: this means that the object should remain unchanged
* a pandoc object: this will replace the original object
* a list of pandoc objects: these will replace the original object; the
list is merged with the neighbors of the orignal objects (spliced into
the list the original object belongs to); returning an empty list deletes
the object
"""
if isinstance(x, list):
array = []
for item in x:
if isinstance(item, dict) and 't' in item:
res = action(item['t'],
item['c'] if 'c' in item else None, format, meta)
if res is None:
array.append(walk(item, action, format, meta))
elif isinstance(res, list):
for z in res:
array.append(walk(z, action, format, meta))
else:
array.append(walk(res, action, format, meta))
else:
array.append(walk(item, action, format, meta))
return array
elif isinstance(x, dict):
for k in x:
x[k] = walk(x[k], action, format, meta)
return x
else:
return x
def toJSONFilter(action):
"""Like `toJSONFilters`, but takes a single action as argument.
"""
toJSONFilters([action])
def toJSONFilters(actions):
"""Generate a JSON-to-JSON filter from stdin to stdout
The filter:
* reads a JSON-formatted pandoc document from stdin
* transforms it by walking the tree and performing the actions
* returns a new JSON-formatted pandoc document to stdout
The argument `actions` is a list of functions of the form
`action(key, value, format, meta)`, as described in more
detail under `walk`.
This function calls `applyJSONFilters`, with the `format`
argument provided by the first command-line argument,
if present. (Pandoc sets this by default when calling
filters.)
"""
try:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
except AttributeError:
# Python 2 does not have sys.stdin.buffer.
# REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
input_stream = codecs.getreader("utf-8")(sys.stdin)
source = input_stream.read()
if len(sys.argv) > 1:
format = sys.argv[1]
else:
format = ""
sys.stdout.write(applyJSONFilters(actions, source, format))
def applyJSONFilters(actions, source, format=""):
"""Walk through JSON structure and apply filters
This:
* reads a JSON-formatted pandoc document from a source string
* transforms it by walking the tree and performing the actions
* returns a new JSON-formatted pandoc document as a string
The `actions` argument is a list of functions (see `walk`
for a full description).
The argument `source` is a string encoded JSON object.
The argument `format` is a string describing the output format.
Returns a the new JSON-formatted pandoc document.
"""
doc = json.loads(source)
if 'meta' in doc:
meta = doc['meta']
elif doc[0]: # old API
meta = doc[0]['unMeta']
else:
meta = {}
altered = doc
for action in actions:
altered = walk(altered, action, format, meta)
return json.dumps(altered)
def stringify(x):
"""Walks the tree x and returns concatenated string content,
leaving out all formatting.
"""
result = []
def go(key, val, format, meta):
if key in ['Str', 'MetaString']:
result.append(val)
elif key == 'Code':
result.append(val[1])
elif key == 'Math':
result.append(val[1])
elif key == 'LineBreak':
result.append(" ")
elif key == 'SoftBreak':
result.append(" ")
elif key == 'Space':
result.append(" ")
walk(x, go, "", {})
return ''.join(result)
def attributes(attrs):
"""Returns an attribute list, constructed from the
dictionary attrs.
"""
attrs = attrs or {}
ident = attrs.get("id", "")
classes = attrs.get("classes", [])
keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
return [ident, classes, keyvals]
def elt(eltType, numargs):
def fun(*args):
lenargs = len(args)
if lenargs != numargs:
raise ValueError(eltType + ' expects ' + str(numargs) +
' arguments, but given ' + str(lenargs))
if numargs == 0:
xs = []
elif len(args) == 1:
xs = args[0]
else:
xs = list(args)
return {'t': eltType, 'c': xs}
return fun
# Constructors for block elements
Plain = elt('Plain', 1)
Para = elt('Para', 1)
CodeBlock = elt('CodeBlock', 2)
RawBlock = elt('RawBlock', 2)
BlockQuote = elt('BlockQuote', 1)
OrderedList = elt('OrderedList', 2)
BulletList = elt('BulletList', 1)
DefinitionList = elt('DefinitionList', 1)
Header = elt('Header', 3)
HorizontalRule = elt('HorizontalRule', 0)
Table = elt('Table', 5)
Div = elt('Div', 2)
Null = elt('Null', 0)
# Constructors for inline elements
Str = elt('Str', 1)
Emph = elt('Emph', 1)
Strong = elt('Strong', 1)
Strikeout = elt('Strikeout', 1)
Superscript = elt('Superscript', 1)
Subscript = elt('Subscript', 1)
SmallCaps = elt('SmallCaps', 1)
Quoted = elt('Quoted', 2)
Cite = elt('Cite', 2)
Code = elt('Code', 2)
Space = elt('Space', 0)
LineBreak = elt('LineBreak', 0)
Math = elt('Math', 2)
RawInline = elt('RawInline', 2)
Link = elt('Link', 3)
Image = elt('Image', 3)
Note = elt('Note', 1)
SoftBreak = elt('SoftBreak', 0)
Span = elt('Span', 2)
###
# END pandocfilters.py
###
from functools import wraps
def for_html(func):
@wraps(func)
def wrapper(*args):
to_fmt = args[2]
if to_fmt in ['html', 'html4', 'html5']:
return func(*args)
return wrapper
def for_key(key):
def decorator(func):
@wraps(func)
def wrapper(*args):
input_key = args[0]
if input_key == key:
return func(*args)
return wrapper
return decorator
@for_html
@for_key('Header')
def header_filter(key, value, to_fmt, meta):
[level, [ident, klass, kvs], contents] = value
klass.append('remarkup-header')
contents.insert(0, RawInline('html', f'<a name="{ident}"></a>'))
return Header(level, [ident, klass, kvs], contents)
if __name__ == "__main__":
toJSONFilters([
header_filter,
])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment