Skip to content

Instantly share code, notes, and snippets.

@dmfs
Last active August 29, 2015 14:27
Show Gist options
  • Save dmfs/0f0e5bfd6955f0eb3b2e to your computer and use it in GitHub Desktop.
Save dmfs/0f0e5bfd6955f0eb3b2e to your computer and use it in GitHub Desktop.
A quick and dirty proof of concept to pre-process tags & templates in davwiki
from collections import defaultdict
from jinja2 import Environment, FileSystemLoader, Template
import json
import os
import re
# a pattern that matches tags in the form of
# <!-- --- tagname: tagvalue -->
simple_tag_pattern = re.compile('<!--\s*---\s*([^:\s]+):\s*(.*?)\s*-->', re.DOTALL)
# a pattern that matches tags in the form of
# <!-- --- tagname {-->tagvalue<!--} --- -->
# the difference to simple_tag_pattern is that the tag value actually appears in the document
range_tag_pattern = re.compile('<!--\s*---\s*([^{\s]+)\s*{\s*-->(.*?)<!--\s*}\s*---\s*-->', re.DOTALL)
# a pattern that matches include statements in the form of
# <!-- >>> template/name -->...<!-- <<< -->
include_pattern = re.compile('<!--\s*>>>\s*(.*?)\s*-->(.*?)<!--\s*<<<\s*-->', re.DOTALL)
# parse all tags of the given page data string into a dict
def parse_page(page_data):
result = defaultdict(list)
result['__has_includes'] = False
# find all simple tags
for simple_match in simple_tag_pattern.finditer(page_data):
tag_name = simple_match.group(1)
tag_value = simple_match.group(2)
result[tag_name].extend([v.strip() for v in tag_value.split(',')])
# find all range tags
for range_match in range_tag_pattern.finditer(page_data):
tag_name = range_match.group(1)
tag_value = range_match.group(2)
result[tag_name].extend([v.strip() for v in tag_value.split(',')])
# index pages with includes
if include_pattern.search(page_data):
result['__has_includes'] = True
return result
def build_index(path):
result = {'includes': [],'pages': {}, 'tags': defaultdict(lambda: defaultdict(lambda: defaultdict(list)))}
for root, dirs, files in os.walk(path):
for filename in files:
name, ext = os.path.splitext(filename)
if ext != '.md':
# not a wiki page -> ignore
continue
p = os.path.normpath(os.path.join(root, filename))
with open(p) as f:
vals = parse_page(f.read())
vals['__file'] = p
if 'id' not in vals:
vals['id'] = p[0:-3]
result['pages'][vals['id']] = vals
for k, v in vals.items():
if not isinstance(v, list):
result['tags'][k]['_values'][v].append(vals)
else:
for tag in v:
result['tags'][k]['_values'][tag].append(vals)
if vals['__has_includes']:
result['includes'].append(vals)
for directory in dirs:
if directory not in ['.', '..']:
p = os.path.normpath(os.path.join(root, directory))
build_index(p)
return result
#
def render_template(env, template_file, context):
t = env.get_template(template_file)
return t.render(**context)
def process_template(match, env, context):
filename = match.group(1)
value = render_template(env, filename, context)
return '<!-- >>> {0} -->{1}<!-- <<< -->'.format(filename, value)
def process_includes(env, page, context):
with open(page['__file'], "r+") as f:
data = f.read()
context['this'] = page
data = include_pattern.sub(lambda match: process_template(match, env, context), data)
f.seek(0)
f.write(data)
f.truncate()
index = build_index('.')
env = Environment(loader=FileSystemLoader('jinja2-templates'))
#print(json.dumps(index))
for page in index['includes']:
process_includes(env, page, index)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment