Skip to content

Instantly share code, notes, and snippets.

@shimizukawa
Last active April 6, 2022 00:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save shimizukawa/22b2505f3df04ce95dd07a4f38ee02df to your computer and use it in GitHub Desktop.
Save shimizukawa/22b2505f3df04ce95dd07a4f38ee02df to your computer and use it in GitHub Desktop.
from pathlib import Path
import dataclasses
import typing
import re
import ebooklib
from ebooklib import epub
import html2text
import m2r
OUT_DIR = Path('./output')
SPHINX_CONFIG = """
project = '{}'
copyright = '<YOUR COPYRIGHT>'
author = '<AUTHOR>'
release = version = '<VERSION>'
master_doc = 'sphinx'
extensions = [
]
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '.venv']
html_theme = 'bizstyle'
"""
@dataclasses.dataclass
class Heading:
sec: typing.Tuple[int]
node: typing.Union[epub.Link, epub.Section]
def process_toc(toc, sec=()) -> typing.List[Heading]:
r = []
if isinstance(toc, tuple):
r += process_toc(toc[0], sec)
r += process_toc(toc[1], sec)
elif isinstance(toc, list):
for i, item in enumerate(toc):
s = sec + (i+1, )
r += process_toc(item, sec=s)
else:
r += [Heading(sec, toc)]
return r
NORMALIZER = re.compile(r'[\n\s\t?:*]')
def normalize(name):
normalized = NORMALIZER.sub('-', name)
normalized = re.sub(r'[–-]+', '-', normalized)
normalized = re.sub(r'\.-', '.', normalized)
return normalized
def main(epub_filename):
out_dir = Path(OUT_DIR / Path(epub_filename).stem)
out_dir.mkdir(exist_ok=True, parents=True)
book = epub.read_epub(epub_filename)
heads = process_toc(book.toc)
if heads[-1].sec[0] == 1: # 全部が1セクションだったら1段調整する
heads = process_toc(book.toc[0][1])
cover = ''
names = []
refs = set()
# writing chapters
for h in heads:
href = h.node.href
item = book.get_item_with_href(href)
title = h.node.title
if item is None:
if h.node.href.split('#', 1)[0] in refs:
# print("Skip item for:", href, title)
continue # 既に変換済み
raise RuntimeError("Can't get item for:", href, title)
refs.add(href)
basename = normalize(title)
depth = len(h.sec)
md = html2text.html2text(item.get_content().decode(), bodywidth=0)
# heading level
first_line, rest_lines = md.split('\n', 1)
if depth == 1 and first_line.startswith('# '):
rst_first_line = m2r.convert(first_line)
_t, _hr = rst_first_line.strip().split('\n')
rst_first_line = f'{_hr}\n{_t}\n{_hr}\n'
else:
first_line = '#' * (depth-2) + first_line
rst_first_line = m2r.convert(first_line)
rst = rst_first_line + m2r.convert(rest_lines)
# writing MD
if depth == 1:
file_md = out_dir / f'{basename}.md'
file_md.open('w') # clear old file
with file_md.open('a', encoding='utf-8') as f:
f.write(md)
# writing reST
if depth == 1:
file = out_dir / f'{basename}.rst'
file.open('w') # clear old file
names.append(file.relative_to('.').stem)
print('Writing ...', h.sec[0], href, file.relative_to('.'))
with file.open('a', encoding='utf-8') as f:
f.write(rst)
# writing rest of items
for item in book.get_items():
file = (out_dir / item.get_name())
if item.file_name in refs:
continue
elif item.get_type() == ebooklib.ITEM_DOCUMENT:
print('skip unprocessed document', item)
continue
elif item.get_type() == ebooklib.ITEM_NAVIGATION:
print('skip navigation file')
continue
elif item.get_type() == ebooklib.ITEM_STYLE:
print('skip style file')
continue
elif item.get_type() == ebooklib.ITEM_FONT:
print('skip font file')
continue
file.parent.mkdir(exist_ok=True, parents=True)
if item.get_type() == ebooklib.ITEM_IMAGE:
print('Writing ...', file.relative_to('.'))
file.write_bytes(item.get_content())
elif item.get_type() == ebooklib.ITEM_COVER:
file = file.with_name('cover.png')
print('Writing ...', file.relative_to('.'))
file.write_bytes(item.get_content())
cover = str(file.relative_to(out_dir))
else:
raise NotImplementedError('Unknown Type: %s' % item)
# writing sphinx index
with (out_dir / 'sphinx.rst').open('w') as f:
f.write('='*len(book.title) + '\n')
f.write(f'{book.title}\n')
f.write('='*len(book.title) + '\n')
f.write('\n')
f.write('.. toctree::\n')
f.write('\n')
f.write(''.join([f' {name}\n' for name in names]))
# writing sphinx config
with (out_dir / 'conf.py').open('w') as f:
f.write(SPHINX_CONFIG.format(book.title))
if cover:
f.write(f"html_logo = '{cover}'\n")
if __name__ == '__main__':
import sys
main(sys.argv[1])
(.venv) > python converter.py EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896.epub
Writing ... 1 c45552a6-d4a0-43a1-a46d-8b077d65114a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Title-Page.rst
Writing ... 2 8f76a324-86ff-47be-b6f2-c97a1653c271.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Copyright-and-Credits.rst
Writing ... 3 e93c552e-76d3-46f5-a489-debcabd838ff.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Dedication.rst
Writing ... 4 90e66f3a-32c8-40c7-9511-e94b315162cb.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/About-Packt.rst
Writing ... 5 add088be-8cdc-473d-b804-5900cba446b1.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Contributors.rst
Writing ... 6 a5965442-971b-4aeb-855a-9b3d0efa38b9.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Preface.rst
Writing ... 7 b8f395bd-1bf2-4bac-9072-6c170cd1bf2d.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-1-Before-You-Start.rst
Writing ... 8 fc34a3fb-f668-423e-9251-92f3f46f06d1.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Current-Status-of-Python.rst
Writing ... 9 c3f8e8d6-6134-4264-b384-fc50a02a5a0a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Modern-Python-Development-Environments.rst
Writing ... 10 847e77a3-6560-41eb-9cdf-51b13511727a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-2-Python-Craftsmanship.rst
Writing ... 11 f0d4715b-ef6e-4d14-b8a9-9c71913a38d0.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Modern-Syntax-Elements-Below-the-Class-Level.rst
Writing ... 12 261a3e3d-f924-40d5-95ef-bcfb39ba751b.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Modern-Syntax-Elements-Above-the-Class-Level.rst
Writing ... 13 7e761866-2637-4e14-8fa6-93bbda555a91.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Elements-of-Metaprogramming.rst
Writing ... 14 3863a217-5970-4ea5-b1a6-78509517a1b6.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Choosing-Good-Names.rst
Writing ... 15 65a3a432-ff05-4b32-9f03-9a4ab9a2bf1e.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Writing-a-Package.rst
Writing ... 16 6b9a94bc-5f99-49b4-a1fc-000f0a941eae.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Deploying-the-Code.rst
Writing ... 17 8286b1c7-b669-4b55-9441-1316e6820925.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Python-Extensions-in-Other-Languages.rst
Writing ... 18 fcabbbc9-0960-494e-92b0-c9146832e04c.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-3-Quality-over-Quantity.rst
Writing ... 19 8f31752f-8519-435c-b980-3598302185d2.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Managing-Code.rst
Writing ... 20 17633700-147b-4cd2-847e-2c0c0041e6fa.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Documenting-Your-Project.rst
Writing ... 21 de2d943f-73d3-4c50-8164-d3e1902f8f4a.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Test-Driven-Development.rst
Writing ... 22 febc1b7b-38eb-4130-937c-f5dba7265359.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-4-Need-for-Speed.rst
Writing ... 23 f88ad4e5-495e-45b2-a0e0-723d649026b6.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Optimization-Principles-and-Profiling-Techniques.rst
Writing ... 24 6f642a0b-3ed1-42d4-8bf9-b24d5674cd63.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Optimization-Some-Powerful-Techniques.rst
Writing ... 25 76ceb1e8-c58f-4d30-b778-b18ea34f7dd6.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Concurrency.rst
Writing ... 26 e6519caf-824e-422f-b92a-d001a8fe6c23.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Section-5-Technical-Architecture.rst
Writing ... 27 03cb5aa7-33fd-48cd-b735-c5de6e50b7ad.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Event-Driven-and-Signal-Programming.rst
Writing ... 28 77e7750c-61c8-404f-ae6a-e0b4c615cac5.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Useful-Design-Patterns.rst
Writing ... 29 1ef29cdc-f110-4dd3-992f-81fbbd066314.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/reStructuredText-Primer.rst
Writing ... 30 b41673c4-025b-4ab6-b3d0-e75dc827c03b.xhtml output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/Other-Books-You-May-Enjoy.rst
skip unprocessed document <EpubNav:nav:toc.xhtml>
skip navigation file
skip style file
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/cover.png
skip unprocessed document <EpubHtml:cover:cover.xhtml>
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/543c151d-e8a7-4179-bd10-6505abe50fee.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/1e2e68c9-e2ca-453e-a961-ee25050ddba5.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/bd2c0520-12be-4f25-ba11-ad040526de22.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/b4cdabc5-d7e4-4d31-a4db-ba7f967da5a5.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/be2295e4-dfdb-4002-a182-a4ed1a458140.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/6d749875-b47a-4d69-9393-21bf70dd6df2.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/e265aa5b-8a02-47da-906b-3f196372220f.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/78e369c4-f3ed-4d80-829a-099a4e60cab3.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/781952c3-4355-4995-ae2a-00df052830fd.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/e8606fc8-7898-423b-a167-d2484879c89b.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/315bffc1-0003-4d35-8701-fd8334315c4b.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/2cb385f6-554a-4bd9-b4c9-e5f74fdba446.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d25f4da6-ec9b-47dd-b42a-015d45065e66.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/465a2a53-dcff-416e-a794-05c0d11eb4f3.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/422d1e61-b70a-4cb8-9603-e7d63debd50f.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/8d1614d9-68b1-4a73-a66a-fcb5e24e025f.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/9bb3759a-c1dd-4dd5-a295-607f982ab575.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/84cc8f8c-a654-4ded-9c37-e68300f4b266.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/69dbbdf1-6b10-4661-8f7c-ff529f30ee41.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/8312890a-eda7-4f78-92df-082187e0c96e.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d28842d5-0249-4b7a-8bd0-fba3aa13e9ea.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/4e8905eb-9467-42b4-97f1-fcf592d85d9a.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/48fbcdab-523b-4103-a439-8ba3f4722119.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/2f9f2e8a-5181-4aaf-bf04-c0643627fe24.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/804d12e0-5d34-4335-ab50-aa332cdd00d9.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d6dd77f5-df0b-4e2f-bb1c-37e85b042eda.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/4dd1064a-b7e7-41b2-af66-77ad3656ebb1.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/10264766-b20a-4572-86aa-ce247a85a099.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/85a843aa-300f-42ff-9d89-e282b453910d.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/5f392a78-61b5-4910-bc9f-9bf825b18860.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/a4ccc16f-0a01-45ba-b83b-2a83ca174f7f.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/2fa84c7b-242d-46a8-b5fe-aced5f0d45b5.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/d95377ce-f0be-4703-a93a-72a6bbca669e.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/e75ad983-416d-4c7a-b4e8-43d5d0c01d36.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/c8d76b24-9d53-406b-9d76-1d21614a5fe7.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/a1c04688-0018-4163-8a0d-4c7e24e1378c.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/85e249c3-2dfd-4772-bc5b-49fbdb2b714d.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/40d83a1e-ddca-4879-a6c5-1b52c7cce55a.png
Writing ... output/EXPERT_PYTHON_PROGRAMMING_THIRD_EDITION-9781789808896/assets/78f4ee59-bcca-4061-bece-284f4b23d03d.png
docutils==0.16
EbookLib==0.17.1
html2text==2020.1.16
lxml==4.5.0
m2r==0.2.1
mistune==0.8.4
six==1.14.0
EbookLib
html2text
m2r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment