gazpachoking/trac2down.py

## trac2down.py
"""Usage: trac2down <trac wiki dump path>"""
from __future__ import unicode_literals
import codecs
import os
import re
import sys


def convert_wiki_link(link):
    if link.startswith('wiki:'):
        link = link[5:]
    return link.strip("'").replace('/', '-')


def sub_full_wiki_link(m):
    return '[%s](%s)' % (m.group(2), convert_wiki_link(m.group(1)))


def sub_simple_wiki_link(m):
    return '[[%s]]' % convert_wiki_link(m.group(1))


def sub_fenced_block(m):
    if m.group(1) == 'html':
        return '\n%s\n' % m.group(2)
    elif m.group(1):
        return '```%s\n%s\n```' % (m.group(1), m.group(2))
    return '```\n%s\n```' % m.group(2)


def sub_table(m):
    lines = []
    for group in m.group(0).strip().split('\n'):
        lines.append(' | '.join(group.strip().split('||')).strip())
    width = len(m.group(1).strip().split('||')) - 2
    lines.insert(1, '| %s |' % ' | '.join('---' for x in range(width)))
    return '\n%s\n' % '\n'.join(lines)


def trac2down(text):
    text = re.sub('\r\n', '\n', text)
    text = re.sub(r'{{{(.*?)}}}', r'`\1`', text)
    text = re.sub(r'(?sm){{{\n(?:#!([a-z]+)\n)?(.*?)\n}}}', sub_fenced_block, text)
    text = re.sub(r'(?m)^(\|\|[^\n]+\|\|\n)+$', sub_table, text)
    text = re.sub(r'(?m)^====\s+(.*?)\s+====\s?$', r'#### \1', text)
    text = re.sub(r'(?m)^===\s+(.*?)\s+===\s?$', r'### \1', text)
    text = re.sub(r'(?m)^==\s+(.*?)\s+==\s?$', r'## \1', text)
    text = re.sub(r'(?m)^=\s+(.*?)\s+=\s?$', r'# \1', text)
    text = re.sub(r'^       * ', r'****', text)
    text = re.sub(r'^     * ', r'***', text)
    text = re.sub(r'^   * ', r'**', text)
    text = re.sub(r'^ * ', r'*', text)
    text = re.sub(r'^ \d+. ', r'1.', text)
    text = re.sub(r'(?m)\[\[BR\]\]$', '  ', text)

    a = []
    for line in text.split('\n'):
        if not line.startswith('    '):
            line = re.sub(r'(?<!\[)\[([^\s\[\]]+?)\]', sub_simple_wiki_link, line)
            line = re.sub(r'\[([a-z]+?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
            line = re.sub(r'\[(wiki:?[^\s\[\]]+)\s([^\[\]]+)\]', sub_full_wiki_link, line)
            line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
            line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
            line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
        a.append(line)
    return '\n'.join(a)


if __name__ == '__main__':
    wiki_dump = sys.argv[1]
    for oldfile in os.listdir(wiki_dump):
        name = oldfile.replace('%2F', '-')
        with codecs.open(os.path.join(wiki_dump, oldfile), 'r', 'utf-8') as f:
            text = f.read()
        # Ditch title if it's the same as page name
        if text.startswith('= %s =' % name):
            text = re.sub('^.*?\n+', '', text)
        text = trac2down(text)

        with open('%s.md' % name, 'w') as fp:
            fp.write(text.encode('utf-8'))
	"""Usage: trac2down <trac wiki dump path>"""
	from __future__ import unicode_literals
	import codecs
	import os
	import re
	import sys


	def convert_wiki_link(link):
	if link.startswith('wiki:'):
	link = link[5:]
	return link.strip("'").replace('/', '-')


	def sub_full_wiki_link(m):
	return '[%s](%s)' % (m.group(2), convert_wiki_link(m.group(1)))


	def sub_simple_wiki_link(m):
	return '[[%s]]' % convert_wiki_link(m.group(1))


	def sub_fenced_block(m):
	if m.group(1) == 'html':
	return '\n%s\n' % m.group(2)
	elif m.group(1):
	return '```%s\n%s\n```' % (m.group(1), m.group(2))
	return '```\n%s\n```' % m.group(2)


	def sub_table(m):
	lines = []
	for group in m.group(0).strip().split('\n'):
	lines.append(' \| '.join(group.strip().split('\|\|')).strip())
	width = len(m.group(1).strip().split('\|\|')) - 2
	lines.insert(1, '\| %s \|' % ' \| '.join('---' for x in range(width)))
	return '\n%s\n' % '\n'.join(lines)


	def trac2down(text):
	text = re.sub('\r\n', '\n', text)
	text = re.sub(r'{{{(.*?)}}}', r'`\1`', text)
	text = re.sub(r'(?sm){{{\n(?:#!([a-z]+)\n)?(.*?)\n}}}', sub_fenced_block, text)
	text = re.sub(r'(?m)^(\\|\\|[^\n]+\\|\\|\n)+$', sub_table, text)
	text = re.sub(r'(?m)^====\s+(.*?)\s+====\s?$', r'#### \1', text)
	text = re.sub(r'(?m)^===\s+(.*?)\s+===\s?$', r'### \1', text)
	text = re.sub(r'(?m)^==\s+(.*?)\s+==\s?$', r'## \1', text)
	text = re.sub(r'(?m)^=\s+(.*?)\s+=\s?$', r'# \1', text)
	text = re.sub(r'^ * ', r'****', text)
	text = re.sub(r'^ * ', r'***', text)
	text = re.sub(r'^ * ', r'**', text)
	text = re.sub(r'^ * ', r'*', text)
	text = re.sub(r'^ \d+. ', r'1.', text)
	text = re.sub(r'(?m)\[\[BR\]\]$', ' ', text)

	a = []
	for line in text.split('\n'):
	if not line.startswith(' '):
	line = re.sub(r'(?<!\[)\[([^\s\[\]]+?)\]', sub_simple_wiki_link, line)
	line = re.sub(r'\[([a-z]+?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
	line = re.sub(r'\[(wiki:?[^\s\[\]]+)\s([^\[\]]+)\]', sub_full_wiki_link, line)
	line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
	line = re.sub(r'\'\'\'(.?)\'\'\'', r'\1*', line)
	line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
	a.append(line)
	return '\n'.join(a)


	if __name__ == '__main__':
	wiki_dump = sys.argv[1]
	for oldfile in os.listdir(wiki_dump):
	name = oldfile.replace('%2F', '-')
	with codecs.open(os.path.join(wiki_dump, oldfile), 'r', 'utf-8') as f:
	text = f.read()
	# Ditch title if it's the same as page name
	if text.startswith('= %s =' % name):
	text = re.sub('^.*?\n+', '', text)
	text = trac2down(text)

	with open('%s.md' % name, 'w') as fp:
	fp.write(text.encode('utf-8'))