shogo82148/extactAttach.py

## extactAttach.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
from struct import *
import glob
import os
import datetime
import locale
import shutil

SRC = r"path/to/pukiwiki/attach"
DST = r"attach"
code = "euc-jp"

#cf. http://ameblo.jp/mar-ameblo/entry-10020372509.html
def bytelist(s):
    #result = []
    #for a,b in zip(s[0::2],s[1::2]):
    #    result.append(int('0x%s%s' % (a,b),16))
    return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2])
def decode(s):
    return "".join(map((lambda x:pack("B",x)),bytelist(s)))

TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+_([0-9a-fA-F][0-9a-fA-F])+$");
for file in glob.glob(SRC + "/*"):
    title = os.path.basename(file);
    if not title or not TargetFile.match(title):
        continue
    (page, filename) = title.split("_")
    try:
        page = decode(page).decode(code)
        filename = decode(filename).decode(code);
    except:
        page = ""
        filename = ""
    if filename:
        print title + ":" + page + "_" + filename
        try:
            shutil.copyfile(file, DST + "/" + page + "_" + filename)
        except:
            pass

## puki2media.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
from struct import *
import glob
import os
import datetime
import locale

SRC = r"path/to/pukiwiki/wiki"

Puki2MediaRules = []
RefRE1 = re.compile(r"&ref\((.*?)\);")
RefRE2 = re.compile(r"#ref\((.*?)\)")
def InitPuki2Media():
    rules = (
        (r"~$", r"<br>"),
        (r"^\*\*\*(.*?)(\[#\w+\])?$", r"====\1===="),
        (r"^\*\*(.*?)(\[#\w+\])?$", r"===\1==="),
        (r"^\*(.*?)(\[#\w+\])?$", r"==\1=="),
        (r"^---([^-].*)$", r"***\1"),
        (r"^--([^-].*)$", r"**\1"),
        (r"^-([^-].*)$", r"*\1"),
        (r"^:([^|]*)\|(.*)$", r";\1:\2"),
        (r"%%(.*?)%%", r"<del>\1</del>"),
        (r"^#.*$", ""),
        (r"^\+\+\+(.*)$", r"###\1"),
        (r"^\+\+(.*)$", r"##\1"),
        (r"^\+(.*)$", r"#\1"),
        (r"\[\[([^\[\]>]*)[>:](https?://[^\[\]>]*)\]\]", r"[\2 \1]"),
        (r"\[\[([^\[\]>]*)>([^\[\]>]*)\]\]", r"[[\2|\1]]"),
        (r"&color\(([^),]*)\)\{(.*?)\};", "<span style=\"color:\\1\">\\2</span>"),
        (r"&", r"&amp;"),
        (r"<", r"&lt;"),
        (r">", r"&gt;"),
        (r"^//(.*)$", lambda x:"<!--"+x.group(1).replace("-", "")+"-->"),
        )
    for rule in rules:
        Puki2MediaRules.append((re.compile(rule[0],re.MULTILINE), rule[1]))

InitPuki2Media()
def Puki2Media(title, wiki):
    #添付ファイル変換
    (wiki, dummy) = RefRE1.subn(lambda x:"[[file:"+title+"_"+x.group(1).split(",")[0]+"]]", wiki);
    (wiki, dummy) = RefRE2.subn(lambda x:"[[file:"+title+"_"+x.group(1).split(",")[0]+"]]", wiki);

    #書式変換
    for rule in Puki2MediaRules:
        (wiki,dummy) = rule[0].subn(rule[1], wiki)

    #表変換
    out = ""
    in_table = 0
    for line in wiki.split("\n"):

        if line!="" and line[0]==",":
            #カンマ区切りの表
            if in_table:
                out += "|-\n"
            else:
                out += "{| cellspacing=\"0\" border=\"1\"\n"
                in_table = 1
            for cell in line.split(",")[1:]:
                if cell!="" and (cell[0]==" " or cell[0]=="\t"):
                    if cell!="" and (cell[-1]==" " or cell[-1]=="\t"):
                        out += "|align=\"center\"|"
                    else:
                        out += "|align=\"right\"|"
                else:
                    out += "|"
                out += cell.strip() + "\n"
        elif len(line)>=2 and line[0]=="|" and line[-1]=="|":
            #表
            if in_table:
                out += "|-\n"
            else:
                out += "{| cellspacing=\"0\" border=\"1\"\n"
                in_table = 1
            for cell in line.split("|")[1:-1]:
                cell = cell.strip()
                out += "|" + cell + "\n"
        else:
            if in_table:
                out += "|}\n"
                in_table = 0
            out += line + "\n"
    return out

#http://ameblo.jp/mar-ameblo/entry-10020372509.html
def bytelist(s):
    #result = []
    #for a,b in zip(s[0::2],s[1::2]):
    #    result.append(int('0x%s%s' % (a,b),16))
    return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2])
def decode(s):
    return "".join(map((lambda x:pack("B",x)),filter(lambda x:x, bytelist(s))))


print '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.4/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.4/ http://www.mediawiki.org/xml/export-0.4.xsd" version="0.4" xml:lang="ja">\n'

TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+$");
for file in glob.glob(src + "/*"):
    title = os.path.basename(file).split(".")[0];
    if not title or not TargetFile.match(title):
        continue
    f = open(file, "r")
    title = decode(title)
    print "<page>"
    print "<title>"+title+"</title>"
    print "<revision>"
    print "<timestamp>" + datetime.datetime.today().isoformat() + "</timestamp>"
    print '<text xml:space="preserve">' + \
        Puki2Media(title, "".join(f.readlines()))+ \
        "</text>\n</revision>\n</page>"
    f.close()
print "</mediawiki>"
	#!/usr/bin/python
	# -- coding: utf-8 --
	import re
	from struct import *
	import glob
	import os
	import datetime
	import locale
	import shutil

	SRC = r"path/to/pukiwiki/attach"
	DST = r"attach"
	code = "euc-jp"

	#cf. http://ameblo.jp/mar-ameblo/entry-10020372509.html
	def bytelist(s):
	#result = []
	#for a,b in zip(s[0::2],s[1::2]):
	# result.append(int('0x%s%s' % (a,b),16))
	return map((lambda a,b:int("0x"+a+b,16)), s[0::2], s[1::2])
	def decode(s):
	return "".join(map((lambda x:pack("B",x)),bytelist(s)))

	TargetFile = re.compile("^([0-9a-fA-F][0-9a-fA-F])+_([0-9a-fA-F][0-9a-fA-F])+$");
	for file in glob.glob(SRC + "/*"):
	title = os.path.basename(file);
	if not title or not TargetFile.match(title):
	continue
	(page, filename) = title.split("_")
	try:
	page = decode(page).decode(code)
	filename = decode(filename).decode(code);
	except:
	page = ""
	filename = ""
	if filename:
	print title + ":" + page + "_" + filename
	try:
	shutil.copyfile(file, DST + "/" + page + "_" + filename)
	except:
	pass