dmfs/preprocess_wiki.py

## preprocess_wiki.py
from collections import defaultdict
from jinja2 import Environment, FileSystemLoader, Template

import json
import os
import re

# a pattern that matches tags in the form of
# <!-- --- tagname: tagvalue -->
simple_tag_pattern = re.compile('<!--\s*---\s*([^:\s]+):\s*(.*?)\s*-->', re.DOTALL)

# a pattern that matches tags in the form of
# <!-- --- tagname {-->tagvalue<!--} --- -->
# the difference to simple_tag_pattern is that the tag value actually appears in the document
range_tag_pattern = re.compile('<!--\s*---\s*([^{\s]+)\s*{\s*-->(.*?)<!--\s*}\s*---\s*-->', re.DOTALL)

# a pattern that matches include statements in the form of
# <!-- >>> template/name -->...<!-- <<< -->
include_pattern = re.compile('<!--\s*>>>\s*(.*?)\s*-->(.*?)<!--\s*<<<\s*-->', re.DOTALL)

# parse all tags of the given page data string into a dict
def parse_page(page_data):
    result = defaultdict(list)
    result['__has_includes'] = False

    # find all simple tags
    for simple_match in simple_tag_pattern.finditer(page_data):
        tag_name = simple_match.group(1)
        tag_value = simple_match.group(2)
        result[tag_name].extend([v.strip() for v in tag_value.split(',')])

    # find all range tags
    for range_match in range_tag_pattern.finditer(page_data):
        tag_name = range_match.group(1)
        tag_value = range_match.group(2)
        result[tag_name].extend([v.strip() for v in tag_value.split(',')])

    # index pages with includes
    if include_pattern.search(page_data):
        result['__has_includes'] = True

    return result


def build_index(path):
    result = {'includes': [],'pages': {}, 'tags': defaultdict(lambda: defaultdict(lambda: defaultdict(list)))}
    for root, dirs, files in os.walk(path):
        for filename in files:
            name, ext = os.path.splitext(filename)
            if ext != '.md':
                # not a wiki page -> ignore
                continue

            p = os.path.normpath(os.path.join(root, filename))

            with open(p) as f:
                vals = parse_page(f.read())
                vals['__file'] = p

                if 'id' not in vals:
                	vals['id'] = p[0:-3]
                result['pages'][vals['id']] = vals

                for k, v in vals.items():
                   if not isinstance(v, list):
                       result['tags'][k]['_values'][v].append(vals)
                   else:
                       for tag in v:
                           result['tags'][k]['_values'][tag].append(vals)

                if vals['__has_includes']:
                    result['includes'].append(vals)

        for directory in dirs:
            if directory not in ['.', '..']:
                p = os.path.normpath(os.path.join(root, directory))
                build_index(p)

    return result


#
def render_template(env, template_file, context):
    t = env.get_template(template_file)
    return t.render(**context)


def process_template(match, env, context):
    filename = match.group(1)
    value = render_template(env, filename, context)
    return '<!-- >>> {0} -->{1}<!-- <<< -->'.format(filename, value)


def process_includes(env, page, context):
    with open(page['__file'], "r+") as f:
        data = f.read()

        context['this'] = page
        data = include_pattern.sub(lambda match: process_template(match, env, context), data)

        f.seek(0)
        f.write(data)
        f.truncate()

index = build_index('.')
env = Environment(loader=FileSystemLoader('jinja2-templates'))

#print(json.dumps(index))

for page in index['includes']:
    process_includes(env, page, index)
	from collections import defaultdict
	from jinja2 import Environment, FileSystemLoader, Template

	import json
	import os
	import re

	# a pattern that matches tags in the form of
	# <!-- --- tagname: tagvalue -->
	simple_tag_pattern = re.compile('<!--\s---\s([^:\s]+):\s(.?)\s*-->', re.DOTALL)

	# a pattern that matches tags in the form of
	# <!-- --- tagname {-->tagvalue<!--} --- -->
	# the difference to simple_tag_pattern is that the tag value actually appears in the document
	range_tag_pattern = re.compile('<!--\s---\s([^{\s]+)\s{\s-->(.?)<!--\s}\s---\s-->', re.DOTALL)

	# a pattern that matches include statements in the form of
	# <!-- >>> template/name -->...<!-- <<< -->
	include_pattern = re.compile('<!--\s>>>\s(.?)\s-->(.?)<!--\s<<<\s*-->', re.DOTALL)

	# parse all tags of the given page data string into a dict
	def parse_page(page_data):
	result = defaultdict(list)
	result['__has_includes'] = False

	# find all simple tags
	for simple_match in simple_tag_pattern.finditer(page_data):
	tag_name = simple_match.group(1)
	tag_value = simple_match.group(2)
	result[tag_name].extend([v.strip() for v in tag_value.split(',')])

	# find all range tags
	for range_match in range_tag_pattern.finditer(page_data):
	tag_name = range_match.group(1)
	tag_value = range_match.group(2)
	result[tag_name].extend([v.strip() for v in tag_value.split(',')])

	# index pages with includes
	if include_pattern.search(page_data):
	result['__has_includes'] = True

	return result


	def build_index(path):
	result = {'includes': [],'pages': {}, 'tags': defaultdict(lambda: defaultdict(lambda: defaultdict(list)))}
	for root, dirs, files in os.walk(path):
	for filename in files:
	name, ext = os.path.splitext(filename)
	if ext != '.md':
	# not a wiki page -> ignore
	continue

	p = os.path.normpath(os.path.join(root, filename))

	with open(p) as f:
	vals = parse_page(f.read())
	vals['__file'] = p

	if 'id' not in vals:
	vals['id'] = p[0:-3]
	result['pages'][vals['id']] = vals

	for k, v in vals.items():
	if not isinstance(v, list):
	result['tags'][k]['_values'][v].append(vals)
	else:
	for tag in v:
	result['tags'][k]['_values'][tag].append(vals)

	if vals['__has_includes']:
	result['includes'].append(vals)

	for directory in dirs:
	if directory not in ['.', '..']:
	p = os.path.normpath(os.path.join(root, directory))
	build_index(p)

	return result


	#
	def render_template(env, template_file, context):
	t = env.get_template(template_file)
	return t.render(**context)


	def process_template(match, env, context):
	filename = match.group(1)
	value = render_template(env, filename, context)
	return '<!-- >>> {0} -->{1}<!-- <<< -->'.format(filename, value)


	def process_includes(env, page, context):
	with open(page['__file'], "r+") as f:
	data = f.read()

	context['this'] = page
	data = include_pattern.sub(lambda match: process_template(match, env, context), data)

	f.seek(0)
	f.write(data)
	f.truncate()

	index = build_index('.')
	env = Environment(loader=FileSystemLoader('jinja2-templates'))

	#print(json.dumps(index))

	for page in index['includes']:
	process_includes(env, page, index)