Last active August 19, 2021 06:05
Easy to extract all image from epub file. and extract highlight docs as note.
import ebooklib
from ebooklib import epub
book = epub.read_epub('./Enterprise Security Architecture.epub')
for image in book.get_items_of_type(ebooklib.ITEM_IMAGE):
with open('./test/{}'.format(image.file_name.split('/')[-1]),'wb') as im:
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
book = epub.read_epub('./Enterprise Security Architecture.epub')
if __name__ == '__main__':
ch_No = 0
for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
if doc.get_name().startswith('K16265_C0'):
sub_No = 0
soup = BeautifulSoup(doc.get_content())
p = soup.findAll('body')
for tag in soup.find_all():
if == 'h1':
ch_No = ch_No + 1
print('# {}. {}'.format(ch_No, tag.text))
if == 'h2':
if not tag.text.startswith('To Summarise'):
sub_No = sub_No + 1
print('## {}.{}. {}'.format(ch_No, sub_No, tag.text))
if == 'p':
italic = tag.find('span', 'cItalic')
if italic:
