Skip to content

Instantly share code, notes, and snippets.

@james-ingold
Created October 12, 2016 17:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save james-ingold/653175d5fb07bf5ff07d47f235e2bdc4 to your computer and use it in GitHub Desktop.
Save james-ingold/653175d5fb07bf5ff07d47f235e2bdc4 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Builds epub book out of James Clear's essays: http://jamesclear.com/articles
Copyright: Licensed under the GPL-3 (http://www.gnu.org/licenses/gpl-3.0.html)
Requires python-epub-library: http://code.google.com/p/python-epub-builder/
"""
import re, ez_epub, urllib2, genshi
from BeautifulSoup import BeautifulSoup
def addSection(link, title):
page = urllib2.urlopen(link).read()
soup = BeautifulSoup(page)
soup.prettify()
section = ez_epub.Section()
try:
section.title = title
print section.title
entryContent = soup.findAll('div', {'class':'entry-content'})[0]
children = entryContent.findChildren()
for child in children:
print child
section.text.append(genshi.core.Markup(child))
except:
pass
return section
book = ez_epub.Book()
book.title = "James Clear Essays"
book.authors = ['James Clear']
page = urllib2.urlopen('http://jamesclear.com/articles').read()
soup = BeautifulSoup(page)
soup.prettify()
articles = soup.findAll('div', {'class':"archive-day-row"})
sections = []
for article in articles:
link = article.findAll('a')[0]
sections.append(addSection(link['href'], link.text))
book.sections = sections
book.make(book.title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment