Created
May 8, 2012 06:32
-
-
Save shogo82148/2633034 to your computer and use it in GitHub Desktop.
pukiwiki2mediawiki converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import re | |
from struct import * | |
import glob | |
import os | |
import datetime | |
import locale | |
import shutil | |
SRC = r"path/to/pukiwiki/attach" | |
DST = r"attach" | |
code = "euc-jp" | |
#cf. http://ameblo.jp/mar-ameblo/entry-10020372509.html | |
def bytelist(s): | |
#result = [] | |
#for a,b in zip(s[0::2],s[1::2]): | |
# result.append(int('0x%s%s' % (a,b),16)) | |
return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2]) | |
def decode(s): | |
return "".join(map((lambda x:pack("B",x)),bytelist(s))) | |
TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+_([0-9a-fA-F][0-9a-fA-F])+$"); | |
for file in glob.glob(SRC + "/*"): | |
title = os.path.basename(file); | |
if not title or not TargetFile.match(title): | |
continue | |
(page, filename) = title.split("_") | |
try: | |
page = decode(page).decode(code) | |
filename = decode(filename).decode(code); | |
except: | |
page = "" | |
filename = "" | |
if filename: | |
print title + ":" + page + "_" + filename | |
try: | |
shutil.copyfile(file, DST + "/" + page + "_" + filename) | |
except: | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import re | |
from struct import * | |
import glob | |
import os | |
import datetime | |
import locale | |
SRC = r"path/to/pukiwiki/wiki" | |
Puki2MediaRules = [] | |
RefRE1 = re.compile(r"&ref\((.*?)\);") | |
RefRE2 = re.compile(r"#ref\((.*?)\)") | |
def InitPuki2Media(): | |
rules = ( | |
(r"~$", r"<br>"), | |
(r"^\*\*\*(.*?)(\[#\w+\])?$", r"====\1===="), | |
(r"^\*\*(.*?)(\[#\w+\])?$", r"===\1==="), | |
(r"^\*(.*?)(\[#\w+\])?$", r"==\1=="), | |
(r"^---([^-].*)$", r"***\1"), | |
(r"^--([^-].*)$", r"**\1"), | |
(r"^-([^-].*)$", r"*\1"), | |
(r"^:([^|]*)\|(.*)$", r";\1:\2"), | |
(r"%%(.*?)%%", r"<del>\1</del>"), | |
(r"^#.*$", ""), | |
(r"^\+\+\+(.*)$", r"###\1"), | |
(r"^\+\+(.*)$", r"##\1"), | |
(r"^\+(.*)$", r"#\1"), | |
(r"\[\[([^\[\]>]*)[>:](https?://[^\[\]>]*)\]\]", r"[\2 \1]"), | |
(r"\[\[([^\[\]>]*)>([^\[\]>]*)\]\]", r"[[\2|\1]]"), | |
(r"&color\(([^),]*)\)\{(.*?)\};", "<span style=\"color:\\1\">\\2</span>"), | |
(r"&", r"&"), | |
(r"<", r"<"), | |
(r">", r">"), | |
(r"^//(.*)$", lambda x:"<!--"+x.group(1).replace("-", "")+"-->"), | |
) | |
for rule in rules: | |
Puki2MediaRules.append((re.compile(rule[0],re.MULTILINE), rule[1])) | |
InitPuki2Media() | |
def Puki2Media(title, wiki): | |
#添付ファイル変換 | |
(wiki, dummy) = RefRE1.subn(lambda x:"[[file:"+title+"_"+x.group(1).split(",")[0]+"]]", wiki); | |
(wiki, dummy) = RefRE2.subn(lambda x:"[[file:"+title+"_"+x.group(1).split(",")[0]+"]]", wiki); | |
#書式変換 | |
for rule in Puki2MediaRules: | |
(wiki,dummy) = rule[0].subn(rule[1], wiki) | |
#表変換 | |
out = "" | |
in_table = 0 | |
for line in wiki.split("\n"): | |
if line!="" and line[0]==",": | |
#カンマ区切りの表 | |
if in_table: | |
out += "|-\n" | |
else: | |
out += "{| cellspacing=\"0\" border=\"1\"\n" | |
in_table = 1 | |
for cell in line.split(",")[1:]: | |
if cell!="" and (cell[0]==" " or cell[0]=="\t"): | |
if cell!="" and (cell[-1]==" " or cell[-1]=="\t"): | |
out += "|align=\"center\"|" | |
else: | |
out += "|align=\"right\"|" | |
else: | |
out += "|" | |
out += cell.strip() + "\n" | |
elif len(line)>=2 and line[0]=="|" and line[-1]=="|": | |
#表 | |
if in_table: | |
out += "|-\n" | |
else: | |
out += "{| cellspacing=\"0\" border=\"1\"\n" | |
in_table = 1 | |
for cell in line.split("|")[1:-1]: | |
cell = cell.strip() | |
out += "|" + cell + "\n" | |
else: | |
if in_table: | |
out += "|}\n" | |
in_table = 0 | |
out += line + "\n" | |
return out | |
#http://ameblo.jp/mar-ameblo/entry-10020372509.html | |
def bytelist(s): | |
#result = [] | |
#for a,b in zip(s[0::2],s[1::2]): | |
# result.append(int('0x%s%s' % (a,b),16)) | |
return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2]) | |
def decode(s): | |
return "".join(map((lambda x:pack("B",x)),filter(lambda x:x, bytelist(s)))) | |
print '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.4/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.4/ http://www.mediawiki.org/xml/export-0.4.xsd" version="0.4" xml:lang="ja">\n' | |
TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+$"); | |
for file in glob.glob(src + "/*"): | |
title = os.path.basename(file).split(".")[0]; | |
if not title or not TargetFile.match(title): | |
continue | |
f = open(file, "r") | |
title = decode(title) | |
print "<page>" | |
print "<title>"+title+"</title>" | |
print "<revision>" | |
print "<timestamp>" + datetime.datetime.today().isoformat() + "</timestamp>" | |
print '<text xml:space="preserve">' + \ | |
Puki2Media(title, "".join(f.readlines()))+ \ | |
"</text>\n</revision>\n</page>" | |
f.close() | |
print "</mediawiki>" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment