Skip to content

Instantly share code, notes, and snippets.

@alexshevchuk
Forked from olasitarska/pgessays.py
Created January 6, 2021 20:13
Show Gist options
  • Save alexshevchuk/0e113f909f1032944bdf1b5da634101e to your computer and use it in GitHub Desktop.
Save alexshevchuk/0e113f909f1032944bdf1b5da634101e to your computer and use it in GitHub Desktop.
Builds epub book out of Paul Graham's essays.
# -*- coding: utf-8 -*-
"""
Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
Author: Ola Sitarska <ola@sitarska.com>
Copyright: Licensed under the GPL-3 (http://www.gnu.org/licenses/gpl-3.0.html)
This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
"""
import re, ez_epub, genshi
import urllib3
from bs4 import BeautifulSoup
def addSection(link, title):
if not 'http' in link:
page = http.request('GET', 'http://www.paulgraham.com/'+link).data
soup = BeautifulSoup(page, 'lxml')
soup.prettify()
else:
page = http.request('GET', link).data
section = ez_epub.Section()
try:
section.title = title
print(section.title)
if not 'http' in link:
font = str(soup.findAll('table', {'width':'435'})[0].findAll('font')[0])
if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100:
content = font
else:
content = ''
for par in soup.findAll('table', {'width':'435'})[0].findAll('p'):
content += str(par)
for p in content.split("<br /><br />"):
section.text.append(genshi.core.Markup(p))
#exception for Subject: Airbnb
for pre in soup.findAll('pre'):
section.text.append(genshi.core.Markup(pre))
else:
for p in str(page).replace("\n","<br />").split("<br /><br />"):
section.text.append(genshi.core.Markup(p))
except:
pass
return section
http = urllib3.PoolManager()
book = ez_epub.Book()
book.title = "Paul Graham's Essays"
book.authors = ['Paul Graham']
page = http.request('GET', 'http://www.paulgraham.com/articles.html').data
soup = BeautifulSoup(page, 'lxml')
soup.prettify()
links = soup.find_all('table', {'width': '435'})[1].find_all('a')
sections = []
for link in links:
sections.append(addSection(link['href'], link.text))
book.sections = sections
book.make(book.title)
@philippludwig
Copy link

Traceback (most recent call last):
  File "pgessays.py", line 53, in <module>
    book = ez_epub.Book()
  File "/home/philipp/downloads/ez_epub.py", line 19, in __init__
    self.impl = epub.EpubBook()
AttributeError: module 'epub' has no attribute 'EpubBook'

@alexshevchuk
Copy link
Author

Traceback (most recent call last):
  File "pgessays.py", line 53, in <module>
    book = ez_epub.Book()
  File "/home/philipp/downloads/ez_epub.py", line 19, in __init__
    self.impl = epub.EpubBook()
AttributeError: module 'epub' has no attribute 'EpubBook'

make sure you installed http://code.google.com/p/python-epub-builder/ (like commentaries suggest ) and all the project dependencies

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment