-
-
Save alexshevchuk/0e113f909f1032944bdf1b5da634101e to your computer and use it in GitHub Desktop.
Builds epub book out of Paul Graham's essays.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html | |
Author: Ola Sitarska <ola@sitarska.com> | |
Copyright: Licensed under the GPL-3 (http://www.gnu.org/licenses/gpl-3.0.html) | |
This script requires python-epub-library: http://code.google.com/p/python-epub-builder/ | |
""" | |
import re, ez_epub, genshi | |
import urllib3 | |
from bs4 import BeautifulSoup | |
def addSection(link, title): | |
if not 'http' in link: | |
page = http.request('GET', 'http://www.paulgraham.com/'+link).data | |
soup = BeautifulSoup(page, 'lxml') | |
soup.prettify() | |
else: | |
page = http.request('GET', link).data | |
section = ez_epub.Section() | |
try: | |
section.title = title | |
print(section.title) | |
if not 'http' in link: | |
font = str(soup.findAll('table', {'width':'435'})[0].findAll('font')[0]) | |
if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100: | |
content = font | |
else: | |
content = '' | |
for par in soup.findAll('table', {'width':'435'})[0].findAll('p'): | |
content += str(par) | |
for p in content.split("<br /><br />"): | |
section.text.append(genshi.core.Markup(p)) | |
#exception for Subject: Airbnb | |
for pre in soup.findAll('pre'): | |
section.text.append(genshi.core.Markup(pre)) | |
else: | |
for p in str(page).replace("\n","<br />").split("<br /><br />"): | |
section.text.append(genshi.core.Markup(p)) | |
except: | |
pass | |
return section | |
http = urllib3.PoolManager() | |
book = ez_epub.Book() | |
book.title = "Paul Graham's Essays" | |
book.authors = ['Paul Graham'] | |
page = http.request('GET', 'http://www.paulgraham.com/articles.html').data | |
soup = BeautifulSoup(page, 'lxml') | |
soup.prettify() | |
links = soup.find_all('table', {'width': '435'})[1].find_all('a') | |
sections = [] | |
for link in links: | |
sections.append(addSection(link['href'], link.text)) | |
book.sections = sections | |
book.make(book.title) |
philippludwig
commented
Feb 4, 2021
Traceback (most recent call last): File "pgessays.py", line 53, in <module> book = ez_epub.Book() File "/home/philipp/downloads/ez_epub.py", line 19, in __init__ self.impl = epub.EpubBook() AttributeError: module 'epub' has no attribute 'EpubBook'
make sure you installed http://code.google.com/p/python-epub-builder/ (like commentaries suggest ) and all the project dependencies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment