Created
March 20, 2017 00:59
-
-
Save brandonwu/311583c595517f6afe3020d80b787c7d to your computer and use it in GitHub Desktop.
Calibre recipe to download Unsong, by Scott Alexander
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# vim:fileencoding=utf-8 | |
from __future__ import unicode_literals, division, absolute_import, print_function | |
from calibre.web.feeds.news import BasicNewsRecipe | |
import re | |
conversion_options = { 'title' : 'Unsong', } | |
class AdvancedUserRecipe1488762317(BasicNewsRecipe): | |
title = 'Unsong' | |
oldest_article = 7 | |
max_articles_per_feed = 300 | |
handle_gzip = True | |
no_stylesheets = True | |
remove_javascript = True | |
extra_css = '.calibre_navbar {display: none;}' | |
keep_only_tags = [dict(name='h1'), dict(name='div', attrs={'class':'pjgm-postcontent'})] | |
remove_attributes = ['size'] | |
remove_tags = [ | |
dict(name='link'), | |
dict(name='meta'), | |
dict(name='div', attrs={'class':'pjgm-postmeta'}), | |
dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'}), | |
] | |
timefmt = '' | |
def parse_index(self): | |
out = [] | |
raw = self.browser.open('https://unsongbook.com/').read() | |
urlre = re.compile('"(.*?)"') | |
tagre = re.compile('<.*?>') | |
lines = raw.replace('<br />', '\n').split('\n') | |
lines = filter(None, lines) | |
start = False | |
cur_book = None | |
chapters = [] | |
for line in lines: | |
if start == False and 'Prologue' in line: | |
start = True | |
if start: | |
url = urlre.search(line) | |
if url: | |
url = url.group(0).replace('"', '') | |
else: | |
out.append((cur_book, chapters)) | |
break | |
title = tagre.sub('', line) | |
if 'Prologue' in line: | |
out.append(('Prologue', [{'title': title, | |
'url': url, | |
'description': '', | |
'date': '',}])) | |
elif 'BOOK' in line: | |
if cur_book != None: | |
out.append((cur_book, chapters)) | |
chapters = [] | |
cur_book = title | |
else: | |
chapters.append({'title': title, | |
'url': url, | |
'description': '', | |
'date': '',}) | |
if 'New chapters' in line: | |
out.append((cur_book, chapters)) | |
break | |
return out | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment