justinian/moinimport.py

## moinimport.py
#!/usr/bin/env python

import os
import os.path
import re
import string
import time

PAGES = "import/data/pages"

char_re = re.compile(r"\([0-9a-f]+\)")
def char_re_replace(match):
	string = match.group(0)[1:-1]
	return "".join(map(chr, [int(string[i:i+2],16) for i in range(len(string)) if i%2 == 0]))


dash_sub = string.maketrans(
	string.punctuation + string.whitespace,
	"-" * len(string.punctuation + string.whitespace)
	)
dash_trim = re.compile("-$|^-")
dash_collapse = re.compile(r"-+")
title_split = re.compile(r"(?<=[a-z])(?=[A-Z])")

def fix_name(s):
	return title_split.sub(r" ", s)

def fix_filepart(s):
	s = fix_name(s.lower().translate(dash_sub))
	s = dash_collapse.sub("-", s)
	s = dash_trim.sub("", s)
	return s

def fix_filename(s):
	return ".".join(map(fix_filepart, os.path.splitext(s)))


attachment_re = re.compile(r"attachment:([A-Za-z._0-9]+)")
comment_re = re.compile(r"(?m)^#.*$")

header_re = re.compile(r"(?m)^\s*(\=+)([^=]+?)\1\s*$")
def replace_header(match):
	return "\n" + "#" * len(match.group(1)) + " " + match.group(2)

bold_re = re.compile(r"\'\'\'(.*?)\'\'\'")
def replace_bold(match):
	return "**%s**" % match.group(1)

image_re = re.compile(r"\{\{([^\|]+)(\|[^|]*)?(\|[^|]*)?\}\}")
def replace_image(match):
	alt = ""
	if match.group(2) and len(match.group(2)) > 1:
		alt = 'alt="%s"' % (match.group(2)[1:],)

	style = ""
	if match.group(3) and len(match.group(3)) > 1:
		style = match.group(3)[1:].replace(',', ' ')

	return '{{< wrapimage src="%s" %s %s >}}' % (
			match.group(1), alt, style)

category_re = re.compile("(?<!\w)Category(\w+)")
def find_categories(data):
	return [m for m in category_re.findall(data)]

def replace_image_link(match):
	alt = ""
	if match.group(2) and len(match.group(2)) > 1:
		alt = 'alt="%s"' % (match.group(2)[1:],)

	style = ""
	if match.group(3) and len(match.group(3)) > 1:
		style = match.group(3)[1:].replace(',', ' ')

	return '<img src="%s" %s %s>' % (
			match.group(1), alt, style)


link_re = re.compile(r"\[\[([^|\]]+)(\|[^\]]+)?\]\]")
def replace_link(match):
	title = match.group(1)
	if match.group(2) and len(match.group(2)) > 1:
		title = match.group(2)[1:]
		if title.startswith("{{"):
			title = image_re.sub(replace_image_link, title)

	if not match.group(1).startswith("/files"):
		link = "/".join(map(fix_filepart, match.group(1).split('/')))
		link = '{{< relref "%s.md" >}}' % (link,)
	else:
		link = match.group(1)

	return '[%s](%s)' % (title, link)

def write_file(name, date, oldpath, newpath, attachpath):
	def fix_attach(match):
		return "/" + os.path.join(attachpath, fix_filename(match.group(1)))

	data = file(oldpath).read()
	categories = find_categories(data)

	data = comment_re.sub("", data)
	data = category_re.sub("", data)
	data = bold_re.sub(replace_bold, data)
	data = attachment_re.sub(fix_attach, data)
	data = header_re.sub(replace_header, data)
	#data = image_re.sub(replace_image, data)
	data = link_re.sub(replace_link, data)

	data = data.replace(" ~-IA-~", "{{< ia >}}")
	data = data.replace("~-IA-~", "{{< ia >}}")

	out = file(newpath, "w")
	print >> out, "+++"
	print >> out, "title = \"%s\"" % (name,)
	print >> out, "categories = [%s]" % (", ".join(['"%s"' % (c,) for c in categories]),)
	print >> out, "date = \"%s\"" % (time.asctime(date).strip(),)
	print >> out, "+++\n"
	out.write(data)

def write_page(name, path, revision):
	filepath = os.path.join(path, "revisions", "%08d" % revision)
	if not os.path.isfile(filepath): return False

	date = time.localtime()
	for line in file(os.path.join(path, "edit-log")):
		parts = line.split()
		if int(parts[1]) == revision:
			date = time.localtime(int(parts[0]) / 1000000)
			break
	else:
		print "no date on", name

	parts = name.split("/")
	name = fix_name(parts[-1])

	newpath = os.path.join("content", "page", *map(fix_filepart, parts[:-1]))
	newfile = fix_filepart(name) + ".md"

	if not os.path.isdir(newpath):
		os.makedirs(newpath)

	attachpath = write_attachments(name, path)
	write_file(name, date, filepath, os.path.join(newpath, newfile), attachpath)
	return True


def write_attachments(name, path):
	attachments = os.path.join(path, "attachments")
	webpath = os.path.join("files", *map(fix_filepart, name.split("/")))
	newpath = os.path.join("static", webpath)
	if not os.path.isdir(attachments) or len(os.listdir(attachments)) < 1:
		return newpath

	if not os.path.isdir(newpath):
		os.makedirs(newpath)

	for filename in os.listdir(attachments):
		oldfile = os.path.join(attachments, filename)
		newfile = os.path.join(newpath, fix_filename(filename))
		file(newfile, "w").write(file(oldfile).read())

	return webpath


def write_revisions(name, path, current):
	for i in range(current, 0, -1):
		if write_page(name, path, i):
			return


for d in os.listdir(PAGES):
	path = os.path.join(PAGES, d)
	if not os.path.isdir(path): continue
	revs = os.path.join(path, "revisions")
	if not os.path.isdir(revs): continue

	current = int(file(os.path.join(path, "current")).read().strip())
	name = char_re.sub(char_re_replace, d).replace("_", " ")
	write_revisions(name, path, current)
	#!/usr/bin/env python

	import os
	import os.path
	import re
	import string
	import time

	PAGES = "import/data/pages"

	char_re = re.compile(r"\([0-9a-f]+\)")
	def char_re_replace(match):
	string = match.group(0)[1:-1]
	return "".join(map(chr, [int(string[i:i+2],16) for i in range(len(string)) if i%2 == 0]))


	dash_sub = string.maketrans(
	string.punctuation + string.whitespace,
	"-" * len(string.punctuation + string.whitespace)
	)
	dash_trim = re.compile("-$\|^-")
	dash_collapse = re.compile(r"-+")
	title_split = re.compile(r"(?<=[a-z])(?=[A-Z])")

	def fix_name(s):
	return title_split.sub(r" ", s)

	def fix_filepart(s):
	s = fix_name(s.lower().translate(dash_sub))
	s = dash_collapse.sub("-", s)
	s = dash_trim.sub("", s)
	return s

	def fix_filename(s):
	return ".".join(map(fix_filepart, os.path.splitext(s)))


	attachment_re = re.compile(r"attachment:([A-Za-z._0-9]+)")
	comment_re = re.compile(r"(?m)^#.*$")

	header_re = re.compile(r"(?m)^\s(\=+)([^=]+?)\1\s$")
	def replace_header(match):
	return "\n" + "#" * len(match.group(1)) + " " + match.group(2)

	bold_re = re.compile(r"\'\'\'(.*?)\'\'\'")
	def replace_bold(match):
	return "%s" % match.group(1)

	image_re = re.compile(r"\{\{([^\\|]+)(\\|[^\|])?(\\|[^\|])?\}\}")
	def replace_image(match):
	alt = ""
	if match.group(2) and len(match.group(2)) > 1:
	alt = 'alt="%s"' % (match.group(2)[1:],)

	style = ""
	if match.group(3) and len(match.group(3)) > 1:
	style = match.group(3)[1:].replace(',', ' ')

	return '{{< wrapimage src="%s" %s %s >}}' % (
	match.group(1), alt, style)

	category_re = re.compile("(?<!\w)Category(\w+)")
	def find_categories(data):
	return [m for m in category_re.findall(data)]

	def replace_image_link(match):
	alt = ""
	if match.group(2) and len(match.group(2)) > 1:
	alt = 'alt="%s"' % (match.group(2)[1:],)

	style = ""
	if match.group(3) and len(match.group(3)) > 1:
	style = match.group(3)[1:].replace(',', ' ')

	return '<img src="%s" %s %s>' % (
	match.group(1), alt, style)


	link_re = re.compile(r"\[\[([^\|\]]+)(\\|[^\]]+)?\]\]")
	def replace_link(match):
	title = match.group(1)
	if match.group(2) and len(match.group(2)) > 1:
	title = match.group(2)[1:]
	if title.startswith("{{"):
	title = image_re.sub(replace_image_link, title)

	if not match.group(1).startswith("/files"):
	link = "/".join(map(fix_filepart, match.group(1).split('/')))
	link = '{{< relref "%s.md" >}}' % (link,)
	else:
	link = match.group(1)

	return '[%s](%s)' % (title, link)

	def write_file(name, date, oldpath, newpath, attachpath):
	def fix_attach(match):
	return "/" + os.path.join(attachpath, fix_filename(match.group(1)))

	data = file(oldpath).read()
	categories = find_categories(data)

	data = comment_re.sub("", data)
	data = category_re.sub("", data)
	data = bold_re.sub(replace_bold, data)
	data = attachment_re.sub(fix_attach, data)
	data = header_re.sub(replace_header, data)
	#data = image_re.sub(replace_image, data)
	data = link_re.sub(replace_link, data)

	data = data.replace(" ~-IA-~", "{{< ia >}}")
	data = data.replace("~-IA-~", "{{< ia >}}")

	out = file(newpath, "w")
	print >> out, "+++"
	print >> out, "title = \"%s\"" % (name,)
	print >> out, "categories = [%s]" % (", ".join(['"%s"' % (c,) for c in categories]),)
	print >> out, "date = \"%s\"" % (time.asctime(date).strip(),)
	print >> out, "+++\n"
	out.write(data)

	def write_page(name, path, revision):
	filepath = os.path.join(path, "revisions", "%08d" % revision)
	if not os.path.isfile(filepath): return False

	date = time.localtime()
	for line in file(os.path.join(path, "edit-log")):
	parts = line.split()
	if int(parts[1]) == revision:
	date = time.localtime(int(parts[0]) / 1000000)
	break
	else:
	print "no date on", name

	parts = name.split("/")
	name = fix_name(parts[-1])

	newpath = os.path.join("content", "page", *map(fix_filepart, parts[:-1]))
	newfile = fix_filepart(name) + ".md"

	if not os.path.isdir(newpath):
	os.makedirs(newpath)

	attachpath = write_attachments(name, path)
	write_file(name, date, filepath, os.path.join(newpath, newfile), attachpath)
	return True


	def write_attachments(name, path):
	attachments = os.path.join(path, "attachments")
	webpath = os.path.join("files", *map(fix_filepart, name.split("/")))
	newpath = os.path.join("static", webpath)
	if not os.path.isdir(attachments) or len(os.listdir(attachments)) < 1:
	return newpath

	if not os.path.isdir(newpath):
	os.makedirs(newpath)

	for filename in os.listdir(attachments):
	oldfile = os.path.join(attachments, filename)
	newfile = os.path.join(newpath, fix_filename(filename))
	file(newfile, "w").write(file(oldfile).read())

	return webpath


	def write_revisions(name, path, current):
	for i in range(current, 0, -1):
	if write_page(name, path, i):
	return


	for d in os.listdir(PAGES):
	path = os.path.join(PAGES, d)
	if not os.path.isdir(path): continue
	revs = os.path.join(path, "revisions")
	if not os.path.isdir(revs): continue

	current = int(file(os.path.join(path, "current")).read().strip())
	name = char_re.sub(char_re_replace, d).replace("_", " ")
	write_revisions(name, path, current)