zengxs/marku

## marku
#!/usr/bin/env python3

###
# START pandocfilters.py
###

# Author: John MacFarlane <jgm@berkeley.edu>
# Copyright: (C) 2013 John MacFarlane
# License: BSD3

"""
Functions to aid writing python scripts that process the pandoc
AST serialized as JSON.
"""

import codecs
import hashlib
import io
import json
import os
import sys


# some utility-functions: make it easier to create your own filters


def get_filename4code(module, content, ext=None):
    """Generate filename based on content

    The function ensures that the (temporary) directory exists, so that the
    file can be written.

    Example:
        filename = get_filename4code("myfilter", code)
    """
    imagedir = module + "-images"
    fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
    try:
        os.mkdir(imagedir)
        sys.stderr.write('Created directory ' + imagedir + '\n')
    except OSError:
        pass
    if ext:
        fn += "." + ext
    return os.path.join(imagedir, fn)

def get_value(kv, key, value = None):
    """get value from the keyvalues (options)"""
    res = []
    for k, v in kv:
        if k == key:
            value = v
        else:
            res.append([k, v])
    return value, res

def get_caption(kv):
    """get caption from the keyvalues (options)

    Example:
      if key == 'CodeBlock':
        [[ident, classes, keyvals], code] = value
        caption, typef, keyvals = get_caption(keyvals)
        ...
        return Para([Image([ident, [], keyvals], caption, [filename, typef])])
    """
    caption = []
    typef = ""
    value, res = get_value(kv, u"caption")
    if value is not None:
        caption = [Str(value)]
        typef = "fig:"

    return caption, typef, res


def get_extension(format, default, **alternates):
    """get the extension for the result, needs a default and some specialisations

    Example:
      filetype = get_extension(format, "png", html="svg", latex="eps")
    """
    try:
        return alternates[format]
    except KeyError:
        return default

# end of utilities


def walk(x, action, format, meta):
    """Walk a tree, applying an action to every object.
    Returns a modified tree.  An action is a function of the form
    `action(key, value, format, meta)`, where:

    * `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
    * the contents of the object (e.g. a string for 'Str', a list of
      inline elements for 'Para')
    * `format` is the target output format (as supplied by the
      `format` argument of `walk`)
    * `meta` is the document's metadata

    The return of an action is either:

    * `None`: this means that the object should remain unchanged
    * a pandoc object: this will replace the original object
    * a list of pandoc objects: these will replace the original object; the
      list is merged with the neighbors of the orignal objects (spliced into
      the list the original object belongs to); returning an empty list deletes
      the object
    """
    if isinstance(x, list):
        array = []
        for item in x:
            if isinstance(item, dict) and 't' in item:
                res = action(item['t'],
                             item['c'] if 'c' in item else None, format, meta)
                if res is None:
                    array.append(walk(item, action, format, meta))
                elif isinstance(res, list):
                    for z in res:
                        array.append(walk(z, action, format, meta))
                else:
                    array.append(walk(res, action, format, meta))
            else:
                array.append(walk(item, action, format, meta))
        return array
    elif isinstance(x, dict):
        for k in x:
            x[k] = walk(x[k], action, format, meta)
        return x
    else:
        return x

def toJSONFilter(action):
    """Like `toJSONFilters`, but takes a single action as argument.
    """
    toJSONFilters([action])


def toJSONFilters(actions):
    """Generate a JSON-to-JSON filter from stdin to stdout

    The filter:

    * reads a JSON-formatted pandoc document from stdin
    * transforms it by walking the tree and performing the actions
    * returns a new JSON-formatted pandoc document to stdout

    The argument `actions` is a list of functions of the form
    `action(key, value, format, meta)`, as described in more
    detail under `walk`.

    This function calls `applyJSONFilters`, with the `format`
    argument provided by the first command-line argument,
    if present.  (Pandoc sets this by default when calling
    filters.)
    """
    try:
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
    except AttributeError:
        # Python 2 does not have sys.stdin.buffer.
        # REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
        input_stream = codecs.getreader("utf-8")(sys.stdin)

    source = input_stream.read()
    if len(sys.argv) > 1:
        format = sys.argv[1]
    else:
        format = ""

    sys.stdout.write(applyJSONFilters(actions, source, format))

def applyJSONFilters(actions, source, format=""):
    """Walk through JSON structure and apply filters

    This:

    * reads a JSON-formatted pandoc document from a source string
    * transforms it by walking the tree and performing the actions
    * returns a new JSON-formatted pandoc document as a string

    The `actions` argument is a list of functions (see `walk`
    for a full description).

    The argument `source` is a string encoded JSON object.

    The argument `format` is a string describing the output format.

    Returns a the new JSON-formatted pandoc document.
    """

    doc = json.loads(source)

    if 'meta' in doc:
        meta = doc['meta']
    elif doc[0]:  # old API
        meta = doc[0]['unMeta']
    else:
        meta = {}
    altered = doc
    for action in actions:
        altered = walk(altered, action, format, meta)

    return json.dumps(altered)


def stringify(x):
    """Walks the tree x and returns concatenated string content,
    leaving out all formatting.
    """
    result = []

    def go(key, val, format, meta):
        if key in ['Str', 'MetaString']:
            result.append(val)
        elif key == 'Code':
            result.append(val[1])
        elif key == 'Math':
            result.append(val[1])
        elif key == 'LineBreak':
            result.append(" ")
        elif key == 'SoftBreak':
            result.append(" ")
        elif key == 'Space':
            result.append(" ")

    walk(x, go, "", {})
    return ''.join(result)


def attributes(attrs):
    """Returns an attribute list, constructed from the
    dictionary attrs.
    """
    attrs = attrs or {}
    ident = attrs.get("id", "")
    classes = attrs.get("classes", [])
    keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
    return [ident, classes, keyvals]


def elt(eltType, numargs):
    def fun(*args):
        lenargs = len(args)
        if lenargs != numargs:
            raise ValueError(eltType + ' expects ' + str(numargs) +
                             ' arguments, but given ' + str(lenargs))
        if numargs == 0:
            xs = []
        elif len(args) == 1:
            xs = args[0]
        else:
            xs = list(args)
        return {'t': eltType, 'c': xs}
    return fun

# Constructors for block elements

Plain = elt('Plain', 1)
Para = elt('Para', 1)
CodeBlock = elt('CodeBlock', 2)
RawBlock = elt('RawBlock', 2)
BlockQuote = elt('BlockQuote', 1)
OrderedList = elt('OrderedList', 2)
BulletList = elt('BulletList', 1)
DefinitionList = elt('DefinitionList', 1)
Header = elt('Header', 3)
HorizontalRule = elt('HorizontalRule', 0)
Table = elt('Table', 5)
Div = elt('Div', 2)
Null = elt('Null', 0)

# Constructors for inline elements

Str = elt('Str', 1)
Emph = elt('Emph', 1)
Strong = elt('Strong', 1)
Strikeout = elt('Strikeout', 1)
Superscript = elt('Superscript', 1)
Subscript = elt('Subscript', 1)
SmallCaps = elt('SmallCaps', 1)
Quoted = elt('Quoted', 2)
Cite = elt('Cite', 2)
Code = elt('Code', 2)
Space = elt('Space', 0)
LineBreak = elt('LineBreak', 0)
Math = elt('Math', 2)
RawInline = elt('RawInline', 2)
Link = elt('Link', 3)
Image = elt('Image', 3)
Note = elt('Note', 1)
SoftBreak = elt('SoftBreak', 0)
Span = elt('Span', 2)

###
# END  pandocfilters.py
###

from functools import wraps


def for_html(func):
    @wraps(func)
    def wrapper(*args):
        to_fmt = args[2]
        if to_fmt in ['html', 'html4', 'html5']:
            return func(*args)
    return wrapper


def for_key(key):
    def decorator(func):
        @wraps(func)
        def wrapper(*args):
            input_key = args[0]
            if input_key == key:
                return func(*args)
        return wrapper
    return decorator

@for_html
@for_key('Header')
def header_filter(key, value, to_fmt, meta):
    [level, [ident, klass, kvs], contents] = value
    klass.append('remarkup-header')
    contents.insert(0, RawInline('html', f'<a name="{ident}"></a>'))

    return Header(level, [ident, klass, kvs], contents)

if __name__ == "__main__":
    toJSONFilters([
        header_filter,
    ])
	#!/usr/bin/env python3

	###
	# START pandocfilters.py
	###

	# Author: John MacFarlane <jgm@berkeley.edu>
	# Copyright: (C) 2013 John MacFarlane
	# License: BSD3

	"""
	Functions to aid writing python scripts that process the pandoc
	AST serialized as JSON.
	"""

	import codecs
	import hashlib
	import io
	import json
	import os
	import sys


	# some utility-functions: make it easier to create your own filters


	def get_filename4code(module, content, ext=None):
	"""Generate filename based on content

	The function ensures that the (temporary) directory exists, so that the
	file can be written.

	Example:
	filename = get_filename4code("myfilter", code)
	"""
	imagedir = module + "-images"
	fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
	try:
	os.mkdir(imagedir)
	sys.stderr.write('Created directory ' + imagedir + '\n')
	except OSError:
	pass
	if ext:
	fn += "." + ext
	return os.path.join(imagedir, fn)

	def get_value(kv, key, value = None):
	"""get value from the keyvalues (options)"""
	res = []
	for k, v in kv:
	if k == key:
	value = v
	else:
	res.append([k, v])
	return value, res

	def get_caption(kv):
	"""get caption from the keyvalues (options)

	Example:
	if key == 'CodeBlock':
	[[ident, classes, keyvals], code] = value
	caption, typef, keyvals = get_caption(keyvals)
	...
	return Para([Image([ident, [], keyvals], caption, [filename, typef])])
	"""
	caption = []
	typef = ""
	value, res = get_value(kv, u"caption")
	if value is not None:
	caption = [Str(value)]
	typef = "fig:"

	return caption, typef, res


	def get_extension(format, default, **alternates):
	"""get the extension for the result, needs a default and some specialisations

	Example:
	filetype = get_extension(format, "png", html="svg", latex="eps")
	"""
	try:
	return alternates[format]
	except KeyError:
	return default

	# end of utilities


	def walk(x, action, format, meta):
	"""Walk a tree, applying an action to every object.
	Returns a modified tree. An action is a function of the form
	`action(key, value, format, meta)`, where:

	* `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
	* the contents of the object (e.g. a string for 'Str', a list of
	inline elements for 'Para')
	* `format` is the target output format (as supplied by the
	`format` argument of `walk`)
	* `meta` is the document's metadata

	The return of an action is either:

	* `None`: this means that the object should remain unchanged
	* a pandoc object: this will replace the original object
	* a list of pandoc objects: these will replace the original object; the
	list is merged with the neighbors of the orignal objects (spliced into
	the list the original object belongs to); returning an empty list deletes
	the object
	"""
	if isinstance(x, list):
	array = []
	for item in x:
	if isinstance(item, dict) and 't' in item:
	res = action(item['t'],
	item['c'] if 'c' in item else None, format, meta)
	if res is None:
	array.append(walk(item, action, format, meta))
	elif isinstance(res, list):
	for z in res:
	array.append(walk(z, action, format, meta))
	else:
	array.append(walk(res, action, format, meta))
	else:
	array.append(walk(item, action, format, meta))
	return array
	elif isinstance(x, dict):
	for k in x:
	x[k] = walk(x[k], action, format, meta)
	return x
	else:
	return x

	def toJSONFilter(action):
	"""Like `toJSONFilters`, but takes a single action as argument.
	"""
	toJSONFilters([action])


	def toJSONFilters(actions):
	"""Generate a JSON-to-JSON filter from stdin to stdout

	The filter:

	* reads a JSON-formatted pandoc document from stdin
	* transforms it by walking the tree and performing the actions
	* returns a new JSON-formatted pandoc document to stdout

	The argument `actions` is a list of functions of the form
	`action(key, value, format, meta)`, as described in more
	detail under `walk`.

	This function calls `applyJSONFilters`, with the `format`
	argument provided by the first command-line argument,
	if present. (Pandoc sets this by default when calling
	filters.)
	"""
	try:
	input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
	except AttributeError:
	# Python 2 does not have sys.stdin.buffer.
	# REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
	input_stream = codecs.getreader("utf-8")(sys.stdin)

	source = input_stream.read()
	if len(sys.argv) > 1:
	format = sys.argv[1]
	else:
	format = ""

	sys.stdout.write(applyJSONFilters(actions, source, format))

	def applyJSONFilters(actions, source, format=""):
	"""Walk through JSON structure and apply filters

	This:

	* reads a JSON-formatted pandoc document from a source string
	* transforms it by walking the tree and performing the actions
	* returns a new JSON-formatted pandoc document as a string

	The `actions` argument is a list of functions (see `walk`
	for a full description).

	The argument `source` is a string encoded JSON object.

	The argument `format` is a string describing the output format.

	Returns a the new JSON-formatted pandoc document.
	"""

	doc = json.loads(source)

	if 'meta' in doc:
	meta = doc['meta']
	elif doc[0]: # old API
	meta = doc[0]['unMeta']
	else:
	meta = {}
	altered = doc
	for action in actions:
	altered = walk(altered, action, format, meta)

	return json.dumps(altered)


	def stringify(x):
	"""Walks the tree x and returns concatenated string content,
	leaving out all formatting.
	"""
	result = []

	def go(key, val, format, meta):
	if key in ['Str', 'MetaString']:
	result.append(val)
	elif key == 'Code':
	result.append(val[1])
	elif key == 'Math':
	result.append(val[1])
	elif key == 'LineBreak':
	result.append(" ")
	elif key == 'SoftBreak':
	result.append(" ")
	elif key == 'Space':
	result.append(" ")

	walk(x, go, "", {})
	return ''.join(result)


	def attributes(attrs):
	"""Returns an attribute list, constructed from the
	dictionary attrs.
	"""
	attrs = attrs or {}
	ident = attrs.get("id", "")
	classes = attrs.get("classes", [])
	keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
	return [ident, classes, keyvals]


	def elt(eltType, numargs):
	def fun(*args):
	lenargs = len(args)
	if lenargs != numargs:
	raise ValueError(eltType + ' expects ' + str(numargs) +
	' arguments, but given ' + str(lenargs))
	if numargs == 0:
	xs = []
	elif len(args) == 1:
	xs = args[0]
	else:
	xs = list(args)
	return {'t': eltType, 'c': xs}
	return fun

	# Constructors for block elements

	Plain = elt('Plain', 1)
	Para = elt('Para', 1)
	CodeBlock = elt('CodeBlock', 2)
	RawBlock = elt('RawBlock', 2)
	BlockQuote = elt('BlockQuote', 1)
	OrderedList = elt('OrderedList', 2)
	BulletList = elt('BulletList', 1)
	DefinitionList = elt('DefinitionList', 1)
	Header = elt('Header', 3)
	HorizontalRule = elt('HorizontalRule', 0)
	Table = elt('Table', 5)
	Div = elt('Div', 2)
	Null = elt('Null', 0)

	# Constructors for inline elements

	Str = elt('Str', 1)
	Emph = elt('Emph', 1)
	Strong = elt('Strong', 1)
	Strikeout = elt('Strikeout', 1)
	Superscript = elt('Superscript', 1)
	Subscript = elt('Subscript', 1)
	SmallCaps = elt('SmallCaps', 1)
	Quoted = elt('Quoted', 2)
	Cite = elt('Cite', 2)
	Code = elt('Code', 2)
	Space = elt('Space', 0)
	LineBreak = elt('LineBreak', 0)
	Math = elt('Math', 2)
	RawInline = elt('RawInline', 2)
	Link = elt('Link', 3)
	Image = elt('Image', 3)
	Note = elt('Note', 1)
	SoftBreak = elt('SoftBreak', 0)
	Span = elt('Span', 2)

	###
	# END pandocfilters.py
	###

	from functools import wraps


	def for_html(func):
	@wraps(func)
	def wrapper(*args):
	to_fmt = args[2]
	if to_fmt in ['html', 'html4', 'html5']:
	return func(*args)
	return wrapper


	def for_key(key):
	def decorator(func):
	@wraps(func)
	def wrapper(*args):
	input_key = args[0]
	if input_key == key:
	return func(*args)
	return wrapper
	return decorator

	@for_html
	@for_key('Header')
	def header_filter(key, value, to_fmt, meta):
	[level, [ident, klass, kvs], contents] = value
	klass.append('remarkup-header')
	contents.insert(0, RawInline('html', f'<a name="{ident}"></a>'))

	return Header(level, [ident, klass, kvs], contents)

	if __name__ == "__main__":
	toJSONFilters([
	header_filter,
	])