Skip to content

Instantly share code, notes, and snippets.

@mylamour
Last active August 19, 2021 06:05
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save mylamour/985c5c11ba70dd2c4d8fccbe60f2679e to your computer and use it in GitHub Desktop.
Easy to extract all image from epub file. and extract highlight docs as note.
import ebooklib
from ebooklib import epub
book = epub.read_epub('./Enterprise Security Architecture.epub')
for image in book.get_items_of_type(ebooklib.ITEM_IMAGE):
with open('./test/{}'.format(image.file_name.split('/')[-1]),'wb') as im:
im.write(image.content)
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
book = epub.read_epub('./Enterprise Security Architecture.epub')
if __name__ == '__main__':
ch_No = 0
for doc in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
if doc.get_name().startswith('K16265_C0'):
sub_No = 0
soup = BeautifulSoup(doc.get_content())
p = soup.findAll('body')
for tag in soup.find_all():
if tag.name == 'h1':
ch_No = ch_No + 1
print('# {}. {}'.format(ch_No, tag.text))
if tag.name == 'h2':
if not tag.text.startswith('To Summarise'):
sub_No = sub_No + 1
print('## {}.{}. {}'.format(ch_No, sub_No, tag.text))
if tag.name == 'p':
italic = tag.find('span', 'cItalic')
if italic:
print('*',italic.get_text(strip=True).strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment