Last active
August 9, 2019 16:50
-
-
Save zellux/5844688 to your computer and use it in GitHub Desktop.
知乎日报 Kindle 版生成工具,可用于 Calibre
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__copyright__ = 'Yuanxuan Wang <zellux at gmail dot com>' | |
from calibre.web.feeds.news import BasicNewsRecipe | |
from calibre.ebooks.BeautifulSoup import Tag, NavigableString | |
from collections import OrderedDict | |
from contextlib import nested, closing | |
import json | |
import re | |
class Zhihu(BasicNewsRecipe): | |
INDEX = 'http://news.at.zhihu.com/api/1.1/news/latest' | |
title = u'知乎日报' | |
language = 'zh' | |
__author__ = "Yuanxuan Wang" | |
description = (u'知乎日报') | |
no_stylesheets = True | |
needs_subscription = False | |
extra_css = ''' | |
.headline { font-size: x-large; } | |
.content { word-wrap: break-word; line-height: 1.6em; } | |
''' | |
def parse_index(self): | |
opener = getattr(self.browser, 'open_novisit', self.browser.open) | |
with closing(opener(self.INDEX)) as f: | |
results = f.read() | |
if not results: | |
raise RuntimeError('Could not fetch index!') | |
self.log(results) | |
top_stories = [] | |
news = [] | |
json_obj = json.loads(results) | |
for item in json_obj['news']: | |
news.append(self.parse_article(item)) | |
for item in json_obj['top_stories']: | |
top_stories.append(self.parse_article(item)) | |
return [(u'News', news), (u'Top Stories', top_stories)] | |
def parse_article(self, item): | |
return { | |
'title': item['title'], | |
'date': '', | |
'description': '', | |
'url': item['items'][0]['url'] | |
} |
@hcoona The code didn't work at first, and it started working after I followed your comment to make some changes to the original code. Thanks!
现在好像不能用了,不能抓取了。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Line 50, should be changed into
item['share_url']