mktsv.py → 生ログを一つのTSVに変換、統合 mktree.py → TSVファイルからツリー構造を構築、HTMLで吐き出す macro.html → マクロ掲示板を変換したサンプル
どちらも素朴なテスト実装なので、速度やメモリ効率は気にしない。
200308といったログファイルが入っているディレクトリで順番に実行
$ python3 mktsv.py
$ python3 mktree.py > output.html
import csv | |
def writeLi(x,ind): | |
print("<li>") | |
print("<div>%s [%s] %s %s %s</div>" % (" " * ind , x["id"],x["title"],x["name"],x["time"],)) | |
print(x["body"]) | |
print("<ul>") | |
if "children" in x: | |
for c in x["children"]: | |
writeLi(c,ind+1) | |
print("</ul></li>") | |
with open("data.tsv", newline="") as f: | |
# TSV reader | |
ls = list( csv.DictReader(f, delimiter="\t") ) | |
# create tree structure | |
for x in ls: | |
for y in ls: | |
if y["parent"] == x["id"]: | |
if "children" in x: | |
x["children"] += [y] | |
else: | |
x["children"] = [y] # new list | |
y["noroot"] = True | |
# print html | |
print('<style>div{background:#DDF}</style><meta charset="utf-8"><ul>') | |
for x in ls: | |
if "noroot" not in x: # only root node | |
writeLi(x,0) | |
print('</ul>') |
import glob,os,re | |
# output file | |
f = open("data.tsv","w") | |
list = glob.glob('**/*.log',recursive=True) | |
# sort key == filename == message id | |
list.sort(key=lambda x: int(os.path.splitext( os.path.basename(x) )[0])) | |
# header | |
f.write("id\ttime\tunique\tname\ttitle\tbody\temail\turl\thost\tagent\tunknown1\tunknown2\tunknown3\tparent\n") | |
for x in list: | |
print(x) | |
with open(x,"rb") as log: | |
# "→\ | |
log = log.read().replace(b"\"",b"\t") | |
# unescape | |
log = log.replace(b";",b";").replace(b"_",b"_").replace(b""",b"\"") | |
# remove \0 | |
log = log.replace(b"\0",b"") | |
# decode as cp932 ( NOT shift-jis in python ) | |
log = log.decode("cp932","replace") | |
# remove row number | |
log = re.sub(r'\t\[\d*\](\d*)\t', "\t\\1\t", log) | |
f.write(os.path.splitext( os.path.basename(x) )[0] + "\t" + log + "\n") | |
f.close() |