このスプリプトはshuyo/concatAozora.pyのforkです。
AozoraEpub3を使うと良い。 こんな感じに設定しておくと、目次も良い感じに生成される(細かいところは好みで)。
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# How to use | |
# ./concatAozora.py -t title a.zip b.zip ... z.zip | |
import codecs, sys, os, zipfile | |
CODE = "shift_jis" | |
firstpage = True | |
g = codecs.open("output.txt", "wb", CODE) | |
files = sys.argv[1:] | |
option_title = True | |
if files[0] == "-t": | |
title = files[1] | |
files = files[2:] | |
option_title = False | |
is_first_book = True | |
for filename in files: | |
if not os.path.isfile(filename): | |
continue | |
if filename.lower().endswith(".zip"): | |
z = zipfile.ZipFile(filename) | |
targets = [x for x in z.namelist() if x.endswith(".txt")] | |
if len(targets) == 0: | |
sys.err.write("No .txt file in %s\n" % filename) | |
continue | |
f = codecs.iterdecode(z.open(targets[0]), CODE) | |
else: | |
f = codecs.open(filename, "rb", CODE) | |
if is_first_book: | |
if option_title: | |
title = next(f) | |
else: | |
next(f) | |
chapter = f.__next__().rstrip('\r\n') | |
author = next(f) | |
body = "" | |
for s in f: | |
if s.startswith("-------------------------------------------------------"): | |
body = "" | |
elif s.startswith("底本:"): | |
break | |
else: | |
s = s.replace("[#小見出し]", "[#ここから太字]") | |
s = s.replace("[#小見出し終わり]", "[#ここで太字終わり]") | |
s = s.replace("小見出し", "太字") | |
s = s.replace("中見出し", "小見出し") | |
s = s.replace("大見出し", "中見出し") | |
body += s | |
if firstpage: | |
g.write(title) | |
g.write("\n") | |
g.write(author) | |
g.write("\n") | |
firstpage = False | |
g.write("\n[#改ページ]\n") | |
g.write(chapter + "[#「" + chapter + "」は大見出し]") | |
g.write("\n") | |
g.write(body) | |
f.close() | |
g.close() |