Pandoc filter Ruby API http://about.ac/2015/06/docx-hack-guide.html
require 'json' | |
class String | |
def camelize | |
string = self | |
string.split('_').map(&:capitalize).join('') | |
end | |
end | |
module PandocFilter | |
class Node | |
# key: node type | |
# value: expected argument number | |
NODES = { | |
# block elements | |
plain: 1, | |
para: 1, | |
code_block: 2, | |
raw_block: 2, | |
block_quote: 1, | |
ordered_list: 2, | |
bullet_list: 1, | |
definition_list: 1, | |
header: 3, | |
horizontal_rule: 0, | |
table: 5, | |
div: 2, | |
null: 0, | |
# inline elements | |
str: 1, | |
emph: 1, | |
strong: 1, | |
strikeout: 1, | |
superscript: 1, | |
subscript: 1, | |
small_caps: 1, | |
quoted: 2, | |
cite: 2, | |
code: 2, | |
space: 0, | |
line_break: 0, | |
math: 2, | |
raw_inline: 2, | |
link: 2, | |
image: 2, | |
note: 1, | |
span: 2 | |
} | |
class << self | |
def method_missing(name, *args) | |
raise "undefined #{name} node type" unless NODES.keys.include?(name) | |
unless args.size == NODES[name] | |
raise "#{name} expects #{NODES[name]} arguments, but given #{args.size}" | |
end | |
new(name.to_s.camelize, *args).to_hash | |
end | |
end | |
attr_reader :type | |
attr_reader :args | |
attr_reader :numargs | |
def initialize(type, *args) | |
@type = type | |
@args = args | |
@numargs = args.size | |
end | |
def to_hash | |
xs = case numargs | |
when 0 | |
[] | |
when 1 | |
args[0] | |
else | |
args | |
end | |
{'t': type, 'c': xs} | |
end | |
end | |
# Converts an action into a filter that reads a JSON-formatted | |
# pandoc document from stdin, transforms it by walking the tree | |
# with the action, and returns a new JSON-formatted pandoc document | |
# to stdout. The argument is a function action(key, value, format, meta), | |
# where key is the type of the pandoc object (e.g. 'Str', 'Para'), | |
# value is the contents of the object (e.g. a string for 'Str', | |
# a list of inline elements for 'Para'), format is the target | |
# output format (which will be taken for the first command line | |
# argument if present), and meta is the document's metadata. | |
# If the function returns None, the object to which it applies | |
# will remain unchanged. If it returns an object, the object will | |
# be replaced. If it returns a list, the list will be spliced in to | |
# the list to which the target object belongs. (So, returning an | |
# empty list deletes the object.) | |
# | |
# action Callable object | |
# | |
# Return Manuplated JSON | |
def self.process(&action) | |
doc = JSON.load($stdin.read) | |
if ARGV.size > 1 | |
format = ARGV[1] | |
else | |
format = "" | |
end | |
altered = self.walk(doc, format, doc[0]['unMeta'], &action) | |
JSON.dump(altered, $stdout) | |
end | |
# Walks the tree x and returns concatenated string content, | |
# leaving out all formatting. | |
def self.stringify(x) | |
result = [] | |
go = lambda do |key, val, format, meta| | |
if ['Str', 'MetaString'].include? key | |
result.push(val) | |
elsif key == 'Code' | |
result.push(val[1]) | |
elsif key == 'Math' | |
result.push(val[1]) | |
elsif key == 'LineBreak' | |
result.push(" ") | |
elsif key == 'Space' | |
result.push(" ") | |
end | |
end | |
self.walk(x, "", {}, &go) | |
result.join('') | |
end | |
# Returns an attribute list, constructed from the | |
# dictionary attrs. | |
def attributes(attrs) | |
attrs ||= {} | |
ident = attrs.fetch('id', '') | |
classes = attrs.fetch("classes", []) | |
keyvals = [] | |
attrs.keep_if { |k, v| k != "classes" && k != "id" }.each do |k, v| | |
keyvals << [k, v] | |
end | |
[ident, classes, keyvals] | |
end | |
# Walk a tree, applying an action to every object. | |
# Returns a modified tree. | |
def self.walk(x, format, meta, &action) | |
if x.is_a? Array | |
array = [] | |
x.each do |item| | |
if item.is_a?(Hash) && item.has_key?('t') | |
res = action.call(item['t'], item['c'], format, meta) | |
if res.nil? | |
array.push(self.walk(item, format, meta, &action)) | |
elsif res.is_a? Array | |
res.each { |z| array.push(self.walk(z, format, meta, &action)) } | |
else | |
array.push(self.walk(res, format, meta, &action)) | |
end | |
else | |
array.push(self.walk(item, format, meta, &action)) | |
end | |
end | |
return array | |
elsif x.is_a? Hash | |
hash = {} | |
x.each { |k, _| hash[k] = self.walk(x[k], format, meta, &action) } | |
return hash | |
else | |
return x | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment