Skip to content

Instantly share code, notes, and snippets.

@justinian
Created March 9, 2015 02:09
Show Gist options
  • Save justinian/e0059c55f595c22d4dce to your computer and use it in GitHub Desktop.
Save justinian/e0059c55f595c22d4dce to your computer and use it in GitHub Desktop.
Import MoinMoin data to a Hugo site
#!/usr/bin/env python
import os
import os.path
import re
import string
import time
PAGES = "import/data/pages"
char_re = re.compile(r"\([0-9a-f]+\)")
def char_re_replace(match):
string = match.group(0)[1:-1]
return "".join(map(chr, [int(string[i:i+2],16) for i in range(len(string)) if i%2 == 0]))
dash_sub = string.maketrans(
string.punctuation + string.whitespace,
"-" * len(string.punctuation + string.whitespace)
)
dash_trim = re.compile("-$|^-")
dash_collapse = re.compile(r"-+")
title_split = re.compile(r"(?<=[a-z])(?=[A-Z])")
def fix_name(s):
return title_split.sub(r" ", s)
def fix_filepart(s):
s = fix_name(s.lower().translate(dash_sub))
s = dash_collapse.sub("-", s)
s = dash_trim.sub("", s)
return s
def fix_filename(s):
return ".".join(map(fix_filepart, os.path.splitext(s)))
attachment_re = re.compile(r"attachment:([A-Za-z._0-9]+)")
comment_re = re.compile(r"(?m)^#.*$")
header_re = re.compile(r"(?m)^\s*(\=+)([^=]+?)\1\s*$")
def replace_header(match):
return "\n" + "#" * len(match.group(1)) + " " + match.group(2)
bold_re = re.compile(r"\'\'\'(.*?)\'\'\'")
def replace_bold(match):
return "**%s**" % match.group(1)
image_re = re.compile(r"\{\{([^\|]+)(\|[^|]*)?(\|[^|]*)?\}\}")
def replace_image(match):
alt = ""
if match.group(2) and len(match.group(2)) > 1:
alt = 'alt="%s"' % (match.group(2)[1:],)
style = ""
if match.group(3) and len(match.group(3)) > 1:
style = match.group(3)[1:].replace(',', ' ')
return '{{< wrapimage src="%s" %s %s >}}' % (
match.group(1), alt, style)
category_re = re.compile("(?<!\w)Category(\w+)")
def find_categories(data):
return [m for m in category_re.findall(data)]
def replace_image_link(match):
alt = ""
if match.group(2) and len(match.group(2)) > 1:
alt = 'alt="%s"' % (match.group(2)[1:],)
style = ""
if match.group(3) and len(match.group(3)) > 1:
style = match.group(3)[1:].replace(',', ' ')
return '<img src="%s" %s %s>' % (
match.group(1), alt, style)
link_re = re.compile(r"\[\[([^|\]]+)(\|[^\]]+)?\]\]")
def replace_link(match):
title = match.group(1)
if match.group(2) and len(match.group(2)) > 1:
title = match.group(2)[1:]
if title.startswith("{{"):
title = image_re.sub(replace_image_link, title)
if not match.group(1).startswith("/files"):
link = "/".join(map(fix_filepart, match.group(1).split('/')))
link = '{{< relref "%s.md" >}}' % (link,)
else:
link = match.group(1)
return '[%s](%s)' % (title, link)
def write_file(name, date, oldpath, newpath, attachpath):
def fix_attach(match):
return "/" + os.path.join(attachpath, fix_filename(match.group(1)))
data = file(oldpath).read()
categories = find_categories(data)
data = comment_re.sub("", data)
data = category_re.sub("", data)
data = bold_re.sub(replace_bold, data)
data = attachment_re.sub(fix_attach, data)
data = header_re.sub(replace_header, data)
#data = image_re.sub(replace_image, data)
data = link_re.sub(replace_link, data)
data = data.replace(" ~-IA-~", "{{< ia >}}")
data = data.replace("~-IA-~", "{{< ia >}}")
out = file(newpath, "w")
print >> out, "+++"
print >> out, "title = \"%s\"" % (name,)
print >> out, "categories = [%s]" % (", ".join(['"%s"' % (c,) for c in categories]),)
print >> out, "date = \"%s\"" % (time.asctime(date).strip(),)
print >> out, "+++\n"
out.write(data)
def write_page(name, path, revision):
filepath = os.path.join(path, "revisions", "%08d" % revision)
if not os.path.isfile(filepath): return False
date = time.localtime()
for line in file(os.path.join(path, "edit-log")):
parts = line.split()
if int(parts[1]) == revision:
date = time.localtime(int(parts[0]) / 1000000)
break
else:
print "no date on", name
parts = name.split("/")
name = fix_name(parts[-1])
newpath = os.path.join("content", "page", *map(fix_filepart, parts[:-1]))
newfile = fix_filepart(name) + ".md"
if not os.path.isdir(newpath):
os.makedirs(newpath)
attachpath = write_attachments(name, path)
write_file(name, date, filepath, os.path.join(newpath, newfile), attachpath)
return True
def write_attachments(name, path):
attachments = os.path.join(path, "attachments")
webpath = os.path.join("files", *map(fix_filepart, name.split("/")))
newpath = os.path.join("static", webpath)
if not os.path.isdir(attachments) or len(os.listdir(attachments)) < 1:
return newpath
if not os.path.isdir(newpath):
os.makedirs(newpath)
for filename in os.listdir(attachments):
oldfile = os.path.join(attachments, filename)
newfile = os.path.join(newpath, fix_filename(filename))
file(newfile, "w").write(file(oldfile).read())
return webpath
def write_revisions(name, path, current):
for i in range(current, 0, -1):
if write_page(name, path, i):
return
for d in os.listdir(PAGES):
path = os.path.join(PAGES, d)
if not os.path.isdir(path): continue
revs = os.path.join(path, "revisions")
if not os.path.isdir(revs): continue
current = int(file(os.path.join(path, "current")).read().strip())
name = char_re.sub(char_re_replace, d).replace("_", " ")
write_revisions(name, path, current)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment