Skip to content

Instantly share code, notes, and snippets.

@shogo82148
Created May 8, 2012 06:32
Show Gist options
  • Save shogo82148/2633034 to your computer and use it in GitHub Desktop.
Save shogo82148/2633034 to your computer and use it in GitHub Desktop.
pukiwiki2mediawiki converter
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
from struct import *
import glob
import os
import datetime
import locale
import shutil
SRC = r"path/to/pukiwiki/attach"
DST = r"attach"
code = "euc-jp"
#cf. http://ameblo.jp/mar-ameblo/entry-10020372509.html
def bytelist(s):
#result = []
#for a,b in zip(s[0::2],s[1::2]):
# result.append(int('0x%s%s' % (a,b),16))
return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2])
def decode(s):
return "".join(map((lambda x:pack("B",x)),bytelist(s)))
TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+_([0-9a-fA-F][0-9a-fA-F])+$");
for file in glob.glob(SRC + "/*"):
title = os.path.basename(file);
if not title or not TargetFile.match(title):
continue
(page, filename) = title.split("_")
try:
page = decode(page).decode(code)
filename = decode(filename).decode(code);
except:
page = ""
filename = ""
if filename:
print title + ":" + page + "_" + filename
try:
shutil.copyfile(file, DST + "/" + page + "_" + filename)
except:
pass
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
from struct import *
import glob
import os
import datetime
import locale
SRC = r"path/to/pukiwiki/wiki"
Puki2MediaRules = []
RefRE1 = re.compile(r"&ref\((.*?)\);")
RefRE2 = re.compile(r"#ref\((.*?)\)")
def InitPuki2Media():
rules = (
(r"~$", r"<br>"),
(r"^\*\*\*(.*?)(\[#\w+\])?$", r"====\1===="),
(r"^\*\*(.*?)(\[#\w+\])?$", r"===\1==="),
(r"^\*(.*?)(\[#\w+\])?$", r"==\1=="),
(r"^---([^-].*)$", r"***\1"),
(r"^--([^-].*)$", r"**\1"),
(r"^-([^-].*)$", r"*\1"),
(r"^:([^|]*)\|(.*)$", r";\1:\2"),
(r"%%(.*?)%%", r"<del>\1</del>"),
(r"^#.*$", ""),
(r"^\+\+\+(.*)$", r"###\1"),
(r"^\+\+(.*)$", r"##\1"),
(r"^\+(.*)$", r"#\1"),
(r"\[\[([^\[\]>]*)[>:](https?://[^\[\]>]*)\]\]", r"[\2 \1]"),
(r"\[\[([^\[\]>]*)>([^\[\]>]*)\]\]", r"[[\2|\1]]"),
(r"&color\(([^),]*)\)\{(.*?)\};", "<span style=\"color:\\1\">\\2</span>"),
(r"&", r"&amp;"),
(r"<", r"&lt;"),
(r">", r"&gt;"),
(r"^//(.*)$", lambda x:"<!--"+x.group(1).replace("-", "")+"-->"),
)
for rule in rules:
Puki2MediaRules.append((re.compile(rule[0],re.MULTILINE), rule[1]))
InitPuki2Media()
def Puki2Media(title, wiki):
#添付ファイル変換
(wiki, dummy) = RefRE1.subn(lambda x:"[[file:"+title+"_"+x.group(1).split(",")[0]+"]]", wiki);
(wiki, dummy) = RefRE2.subn(lambda x:"[[file:"+title+"_"+x.group(1).split(",")[0]+"]]", wiki);
#書式変換
for rule in Puki2MediaRules:
(wiki,dummy) = rule[0].subn(rule[1], wiki)
#表変換
out = ""
in_table = 0
for line in wiki.split("\n"):
if line!="" and line[0]==",":
#カンマ区切りの表
if in_table:
out += "|-\n"
else:
out += "{| cellspacing=\"0\" border=\"1\"\n"
in_table = 1
for cell in line.split(",")[1:]:
if cell!="" and (cell[0]==" " or cell[0]=="\t"):
if cell!="" and (cell[-1]==" " or cell[-1]=="\t"):
out += "|align=\"center\"|"
else:
out += "|align=\"right\"|"
else:
out += "|"
out += cell.strip() + "\n"
elif len(line)>=2 and line[0]=="|" and line[-1]=="|":
#表
if in_table:
out += "|-\n"
else:
out += "{| cellspacing=\"0\" border=\"1\"\n"
in_table = 1
for cell in line.split("|")[1:-1]:
cell = cell.strip()
out += "|" + cell + "\n"
else:
if in_table:
out += "|}\n"
in_table = 0
out += line + "\n"
return out
#http://ameblo.jp/mar-ameblo/entry-10020372509.html
def bytelist(s):
#result = []
#for a,b in zip(s[0::2],s[1::2]):
# result.append(int('0x%s%s' % (a,b),16))
return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2])
def decode(s):
return "".join(map((lambda x:pack("B",x)),filter(lambda x:x, bytelist(s))))
print '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.4/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.4/ http://www.mediawiki.org/xml/export-0.4.xsd" version="0.4" xml:lang="ja">\n'
TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+$");
for file in glob.glob(src + "/*"):
title = os.path.basename(file).split(".")[0];
if not title or not TargetFile.match(title):
continue
f = open(file, "r")
title = decode(title)
print "<page>"
print "<title>"+title+"</title>"
print "<revision>"
print "<timestamp>" + datetime.datetime.today().isoformat() + "</timestamp>"
print '<text xml:space="preserve">' + \
Puki2Media(title, "".join(f.readlines()))+ \
"</text>\n</revision>\n</page>"
f.close()
print "</mediawiki>"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment