Created
January 5, 2015 17:00
-
-
Save adamnew123456/1d14699b865bb8b35556 to your computer and use it in GitHub Desktop.
WikiWord - A Static Wiki-Like Site Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Converts a tree of wiki pages into a tree of HTML pages. | |
""" | |
from collections import defaultdict | |
import configparser | |
import glob | |
import itertools | |
import os, os.path | |
import re | |
import shutil | |
import subprocess | |
import sys | |
# For example, SomeNamespae:OtherPage | |
NAMESPACE_CHAR = ':' | |
# The underscore is included to avoid atrocities like CeeLanguage, replacing | |
# it with C_Language | |
WIKIWORD = re.compile(r'(\W|^)([A-Z][a-z0-9:_]+){2,}(\W|$)') | |
# The file extension used for wiki pages | |
WIKI_EXT = '.md' | |
# The main page to use, by copying it into index.html - None means that a | |
# 'master file' will be produced while includes all other pages | |
MAIN_PAGE = None | |
# The command line to use when converting wiki files into their HTML output. | |
# Note that no input file is given, because stdin is used; similarly, no output | |
# file is given, as stdout is used to collect output | |
CONVERT_CMDLINE = 'hsmarkdown' | |
# The link format to use when converting WikiWords to links. | |
LINK_FORMAT = '<span class="wikiword"> [{title}]({link}) </span>' | |
# The path of the CSS style to use | |
CSS_FILE = '/style.css' | |
# The HTML output to use as a template | |
OUTPUT_HTML = ''' | |
<html> | |
<head> | |
<title> {title} </title> | |
<link rel="stylesheet" type="text/css" href="{css}" /> | |
</head> | |
<body> | |
<h1 class="title"> {title} </h1> | |
{body} | |
</body> | |
</html> | |
''' | |
def get_path_elems(path): | |
""" | |
Gets all parts of a path. | |
>>> get_path_elems('/foo/bar/baz') | |
['foo', 'bar', 'baz'] | |
""" | |
path = os.path.normpath(path) | |
elems = [] | |
while True: | |
head, tail = os.path.split(path) | |
if not tail: | |
elems.reverse() | |
return elems | |
else: | |
elems.append(tail) | |
path = head | |
def create_directory_tree(path): | |
""" | |
Creates a directory tree for all elements of the given path, if the | |
path doesn't already point to a directory. | |
""" | |
elems = get_path_elems(path) | |
prefix = '/' + elems.pop(0) | |
for elem in elems: | |
full_path = os.path.join(prefix, elem) | |
if not os.path.isdir(full_path): | |
os.mkdir(full_path) | |
prefix = os.path.join(prefix, elem) | |
def wikiword_to_path(wikiword, suffix='.html', leading_slash=True): | |
""" | |
Converts a WikiWord to a link. Note that WikiWords may be namespaced, | |
and so converts namespaces into directories. | |
>>> wikiword_to_path('WikiWord') | |
'/WikiWord.html' | |
>>> wikiword_to_path('NameSpace:WikiWord') | |
'/NameSpace/WikiWord.html' | |
>>> wikiword_to_path('WikiWord', suffix='.foo') | |
'/WikiWord.foo' | |
>>> wikiword_to_path('WikiWord', leading_slash=False) | |
'WikiWord.html' | |
""" | |
elements = wikiword.split(NAMESPACE_CHAR) | |
return (('/' if leading_slash else '') + | |
os.sep.join(elements) + suffix) | |
def path_to_wikiword(path): | |
""" | |
Converts a path (relative to either the input directory or the output | |
directory) to a namespaced WikiWord, ignoring any suffixes. | |
>>> path_to_wikiword('/WikiWord.html') | |
'WikiWord' | |
>>> path_to_wikiword('/NameSpace/WikiWord.html') | |
'NameSpace:WikiWord' | |
>>> path_to_wikiword('/WikiWord.foo') | |
'WikiWord' | |
>>> path_to_wikiword('WikiWord.html') | |
'WikiWord' | |
""" | |
path = os.path.normpath(path) | |
elements = path.split(os.sep) | |
# The path may start with /, so drop the header if it is empty | |
if not elements[0]: | |
elements.pop(0) | |
# Purge any extension, since it isn't part of a valid WikiWord | |
real_name, _ = os.path.splitext(elements[-1]) | |
elements[-1] = real_name | |
return ':'.join(elements) | |
def count_backslashes_at_end(text): | |
""" | |
Counts the number of backslahes at the end of a string. Used to figure out | |
whether or not to escape a WikiWord, and how many backslashes to add when | |
escaping one. | |
""" | |
sans_backslashes = text.rstrip('\\') | |
return len(text) - len(sans_backslashes) | |
def find_wikiwords(in_stream): | |
""" | |
Takes a stream, and generates from it a list, which contains two types of | |
tuples: | |
- (True, 'WikiWord') indicates text that is a WikiWord | |
- (False, '...') indicates text that is not a WikiWord | |
""" | |
text = in_stream.read() | |
while text: | |
match = WIKIWORD.search(text) | |
if not match: | |
yield (False, text) | |
break | |
match_begin, match_end = match.span() | |
wikiword = match.group() | |
pre_wikiword_text = text[:match_begin] | |
post_wikiword_text = text[match_end:] | |
# Remove any leading characters which are a part of the match generated | |
# by the regex, but which are not really a part of the WikiWord. | |
leading_text, *_, trailing_text = match.groups() | |
pre_wikiword_text += leading_text | |
post_wikiword_text = trailing_text + post_wikiword_text | |
wikiword = wikiword.lstrip(leading_text).rstrip(trailing_text) | |
# If there isn't any preceding text, then we don't care bout escape | |
# processing, since there cannot be any to process | |
if not pre_wikiword_text: | |
yield (True, wikiword) | |
text = post_wikiword_text | |
continue | |
# Escapes stack up before WikiWords. For example: | |
# | |
# WikiWord --> `WikiWord` | |
# \WikiWord --> WikiWord | |
# \\WikiWord --> \`WikiWord` | |
# ... | |
bs_count = count_backslashes_at_end(pre_wikiword_text) | |
if bs_count % 2 == 0: | |
# Even backslashes means that we're going to be producing a WikiWord | |
extra_bs = '\\' * (bs_count // 2) | |
pre_wikiword_text = pre_wikiword_text.rstrip('\\') + extra_bs | |
yield (False, pre_wikiword_text) | |
yield (True, wikiword) | |
else: | |
# Odd backslashes means we're escaping the WikiWord | |
extra_bs = '\\' * ((bs_count - 1) // 2) | |
pre_wikiword_text = pre_wikiword_text.rstrip('\\') + extra_bs | |
yield (False, pre_wikiword_text) | |
yield (False, wikiword) | |
text = post_wikiword_text | |
def wikiword_to_link(stream_tuple, fmt): | |
""" | |
Converts WikiWords back into links, which are considered plain text. | |
>>> wikiword_to_link((True, 'WikiWord'), '[{title}]({link})') | |
(False, '[WikiWord](/WikiWord.html)') | |
>>> wikiword_to_link((True, 'NameSpace:WikiWord'), '[{title}]({link})') | |
(False, '[NameSpace:WikiWord](/NameSpace/WikiWord.html)') | |
>>> wikiword_to_link((False, 'foo'), '[{title}]({link})') | |
(False, 'foo') | |
""" | |
is_wikiword, wikiword = stream_tuple | |
if not is_wikiword: | |
return stream_tuple | |
return (False, fmt.format(title=wikiword, link=wikiword_to_path(wikiword))) | |
def category_to_html(categories, category): | |
""" | |
Converts a category to HTML which links all the elements of that category. | |
""" | |
result = ''' | |
<html> | |
<head> | |
<title> Pages In {title} </title> | |
<link rel="stylesheet" type="text/css" href="{css}" /> | |
</head> | |
<body> | |
<h1 class="title"> Pages In {title} </h1> | |
<ul> | |
'''.format(title=category, css=CSS_FILE) | |
for page in categories[category]: | |
result += '<li> <a href="{path}"> {page} </a> </li>'.format( | |
path=wikiword_to_path(page), | |
page=page) | |
result += ''' | |
</ul> | |
</body> | |
</html> | |
''' | |
return result | |
def convert_file(input_dir, output_dir, path, category_store, all_pages): | |
""" | |
Converts a file residing in the input directory, to an HTML file in the | |
output directory. | |
""" | |
title = path_to_wikiword(path) | |
out_path = wikiword_to_path(title, leading_slash=False) | |
all_pages.add(title) | |
with open(os.path.join(input_dir, path)) as in_file: | |
ww_1, ww_2 = itertools.tee(find_wikiwords(in_file)) | |
# First, run the page through the converter, and get the output | |
with_links_converted = (wikiword_to_link(elem, LINK_FORMAT) for elem in ww_1) | |
preformatted_text = ''.join(text for (_, text) in with_links_converted) | |
subproc = subprocess.Popen( | |
CONVERT_CMDLINE.format(output=os.path.join(output_dir, path)), | |
stdin=subprocess.PIPE, | |
stdout=subprocess.PIPE, | |
shell=True) | |
formatted_text, _ = subproc.communicate(bytes(preformatted_text, 'utf-8')) | |
with open(os.path.join(output_dir, out_path), 'w') as out_file: | |
out_file.write(OUTPUT_HTML.format( | |
title=title, | |
css=CSS_FILE, | |
body=str(formatted_text, 'utf-8'))) | |
# Add the text to all the categories it links too | |
for is_wikiword, wikiword in ww_2: | |
# Links to Category:FooPages are not categories, they are lists of | |
# links belonging to Category:Foo - thus, we ignore them | |
if (is_wikiword and wikiword.startswith('Category:') and | |
not wikiword.endswith('Pages')): | |
category_store[wikiword].add(title) | |
def write_categories(output_path, categories): | |
""" | |
Writes out HTML files for all the categories. | |
""" | |
for category in list(categories): | |
# You can have descriptions of a category at Category:Foo, and then | |
# a list of pages at Category:FooPages | |
proper_category = category + 'Pages' | |
html = category_to_html(categories, category) | |
path = wikiword_to_path(proper_category, leading_slash=False) | |
with open(os.path.join(output_path, path), 'w') as cat_out: | |
cat_out.write(html) | |
def write_master_file(output_path, all_pages): | |
""" | |
Writes out an index of every page in the wiki. | |
""" | |
result = ''' | |
<html> | |
<head> | |
<title> Master Index </title> | |
<link rel="stylesheet" type="text/css" href="{css}" /> | |
</head> | |
<body> | |
<h1 class="title"> Wiki Master Index </h1> | |
<ul> | |
'''.format(css=CSS_FILE) | |
for page in sorted(all_pages): | |
result += '<li> <a href="{path}"> {page} </a> </li>'.format( | |
path=wikiword_to_path(page), | |
page=page) | |
result += ''' | |
</ul> | |
</body> | |
</html> | |
''' | |
with open(os.path.join(output_path, 'index.html'), 'w') as master_file: | |
master_file.write(result) | |
def convert_tree(input_dir, output_dir): | |
""" | |
Converts all the wiki files from the input directory to the output directory. | |
""" | |
all_pages = set() | |
categories = defaultdict(set) | |
os.chdir(input_dir) | |
for path, _, files in os.walk('.'): | |
path = os.path.normpath(path) | |
# Excuse hidden files, since they shouldn't be included | |
is_hidden = False | |
parts = get_path_elems(path) | |
for part in parts: | |
if part.startswith('.') and part != '.': | |
is_hidden = True | |
break | |
if is_hidden: | |
continue | |
for file in files: | |
name, ext = os.path.splitext(file) | |
if ext != WIKI_EXT: | |
continue | |
# Ensure that the converter routine has a place to actually put the | |
# files it generates | |
create_directory_tree(os.path.join(output_dir, path)) | |
convert_file(input_dir, output_dir, os.path.join(path, file), categories, all_pages) | |
write_categories(output_dir, categories) | |
if MAIN_PAGE is None: | |
write_master_file(output_dir, all_pages) | |
else: | |
index_page = os.path.join(output_dir, 'index.html') | |
source_page = os.path.join(output_dir, | |
wikiword_to_path(MAIN_PAGE, leading_slash=False)) | |
shutil.copy(source_page, index_page) | |
if __name__ == '__main__': | |
USAGE = sys.argv[0] + '<INPUT-DIR> <OUTPUT-DIR>' | |
try: | |
INPUT_DIR = os.path.abspath(sys.argv[1]) | |
OUTPUT_DIR = os.path.abspath(sys.argv[2]) | |
except IndexError: | |
print(USAGE, file=sys.stderr) | |
sys.exit(1) | |
config_file_path = os.path.join(INPUT_DIR, 'wiki.conf') | |
if not os.path.exists(config_file_path): | |
print('Warning: No configuration file at', config_file_path, '- using defaults', | |
file=sys.stderr) | |
config = configparser.ConfigParser() | |
config.read(config_file_path) | |
if 'wikiword' in config: | |
opts = config['wikiword'] | |
if 'extension' in opts: | |
WIKI_EXT = opts['extension'] | |
if 'converter' in opts: | |
CONVERT_CMDLINE = opts['converter'] | |
if 'link_format' in opts: | |
LINK_FORMAT = opts['link_format'] | |
if 'stylesheet' in opts: | |
CSS_FILE = opts['stylesheet'] | |
if 'mainpage' in opts: | |
MAIN_PAGE = opts['mainpage'] | |
if not os.path.exists(OUTPUT_DIR): | |
print('Warning: Creating output directory - move your stylesheet, etc. there', | |
file=sys.stderr) | |
os.mkdir(OUTPUT_DIR) | |
convert_tree(INPUT_DIR, OUTPUT_DIR) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment