Skip to content

Instantly share code, notes, and snippets.

@mylamour
Last active Aug 19, 2021
Embed
What would you like to do?
Easy to extract all image from epub file. and extract highlight docs as note.
import ebooklib
from ebooklib import epub
book = epub.read_epub('./Enterprise Security Architecture.epub')
for image in book.get_items_of_type(ebooklib.ITEM_IMAGE):
with open('./test/{}'.format(image.file_name.split('/')[-1]),'wb') as im:
im.write(image.content)
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
book = epub.read_epub('./Enterprise Security Architecture.epub')
if __name__ == '__main__':
ch_No = 0
for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
if doc.get_name().startswith('K16265_C0'):
sub_No = 0
soup = BeautifulSoup(doc.get_content())
p = soup.findAll('body')
for tag in soup.find_all():
if tag.name == 'h1':
ch_No = ch_No + 1
print('# {}. {}'.format(ch_No, tag.text))
if tag.name == 'h2':
if not tag.text.startswith('To Summarise'):
sub_No = sub_No + 1
print('## {}.{}. {}'.format(ch_No, sub_No, tag.text))
if tag.name == 'p':
italic = tag.find('span', 'cItalic')
if italic:
print('*',italic.get_text(strip=True).strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment