Skip to content

Instantly share code, notes, and snippets.

@brandonwu
Created March 20, 2017 00:59
Show Gist options
  • Save brandonwu/311583c595517f6afe3020d80b787c7d to your computer and use it in GitHub Desktop.
Save brandonwu/311583c595517f6afe3020d80b787c7d to your computer and use it in GitHub Desktop.
Calibre recipe to download Unsong, by Scott Alexander
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
import re
conversion_options = { 'title' : 'Unsong', }
class AdvancedUserRecipe1488762317(BasicNewsRecipe):
title = 'Unsong'
oldest_article = 7
max_articles_per_feed = 300
handle_gzip = True
no_stylesheets = True
remove_javascript = True
extra_css = '.calibre_navbar {display: none;}'
keep_only_tags = [dict(name='h1'), dict(name='div', attrs={'class':'pjgm-postcontent'})]
remove_attributes = ['size']
remove_tags = [
dict(name='link'),
dict(name='meta'),
dict(name='div', attrs={'class':'pjgm-postmeta'}),
dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'}),
]
timefmt = ''
def parse_index(self):
out = []
raw = self.browser.open('https://unsongbook.com/').read()
urlre = re.compile('"(.*?)"')
tagre = re.compile('<.*?>')
lines = raw.replace('<br />', '\n').split('\n')
lines = filter(None, lines)
start = False
cur_book = None
chapters = []
for line in lines:
if start == False and 'Prologue' in line:
start = True
if start:
url = urlre.search(line)
if url:
url = url.group(0).replace('"', '')
else:
out.append((cur_book, chapters))
break
title = tagre.sub('', line)
if 'Prologue' in line:
out.append(('Prologue', [{'title': title,
'url': url,
'description': '',
'date': '',}]))
elif 'BOOK' in line:
if cur_book != None:
out.append((cur_book, chapters))
chapters = []
cur_book = title
else:
chapters.append({'title': title,
'url': url,
'description': '',
'date': '',})
if 'New chapters' in line:
out.append((cur_book, chapters))
break
return out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment