Skip to content

Instantly share code, notes, and snippets.

@aacook
Forked from olasitarska/pgessays.py
Last active March 18, 2016 01:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aacook/06803e592e8fa6114e74 to your computer and use it in GitHub Desktop.
Save aacook/06803e592e8fa6114e74 to your computer and use it in GitHub Desktop.
Builds epub book out of Paul Graham's essays.
# -*- coding: utf-8 -*-
"""
Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
Author: Ola Sitarska <ola@sitarska.com>
Copyright: Licensed under the GPL-3 (http://www.gnu.org/licenses/gpl-3.0.html)
1. This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
2. Install python packages with "pip install thing"
3. The easiest way to transfer is with USB. Plug in the Kindle, then take the "Paul Graham's Essays" folder and drag it into the documents folder on Kindle.
"""
import re, ez_epub, urllib2, genshi
from BeautifulSoup import BeautifulSoup
def addSection(link, title):
if not 'http' in link:
page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
soup = BeautifulSoup(page)
soup.prettify()
else:
page = urllib2.urlopen(link).read()
section = ez_epub.Section()
try:
section.title = title
print section.title
if not 'http' in link:
font = str(soup.findAll('table', {'width':'435'})[0].findAll('font')[0])
if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100:
content = font
else:
content = ''
for par in soup.findAll('table', {'width':'435'})[0].findAll('p'):
content += str(par)
for p in content.split("<br /><br />"):
section.text.append(genshi.core.Markup(p))
#exception for Subject: Airbnb
for pre in soup.findAll('pre'):
section.text.append(genshi.core.Markup(pre))
else:
for p in str(page).replace("\n","<br />").split("<br /><br />"):
section.text.append(genshi.core.Markup(p))
except:
pass
return section
book = ez_epub.Book()
book.title = "Paul Graham's Essays"
book.authors = ['Paul Graham']
page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read()
soup = BeautifulSoup(page)
soup.prettify()
links = soup.findAll('table', {'width': '435'})[1].findAll('a')
sections = []
for link in links:
sections.append(addSection(link['href'], link.text))
book.sections = sections
book.make(book.title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment