Skip to content

Instantly share code, notes, and snippets.

@zellux
Last active August 9, 2019 16:50
Show Gist options
  • Star 18 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save zellux/5844688 to your computer and use it in GitHub Desktop.
Save zellux/5844688 to your computer and use it in GitHub Desktop.
知乎日报 Kindle 版生成工具,可用于 Calibre
#!/usr/bin/env python
__copyright__ = 'Yuanxuan Wang <zellux at gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
from collections import OrderedDict
from contextlib import nested, closing
import json
import re
class Zhihu(BasicNewsRecipe):
INDEX = 'http://news.at.zhihu.com/api/1.1/news/latest'
title = u'知乎日报'
language = 'zh'
__author__ = "Yuanxuan Wang"
description = (u'知乎日报')
no_stylesheets = True
needs_subscription = False
extra_css = '''
.headline { font-size: x-large; }
.content { word-wrap: break-word; line-height: 1.6em; }
'''
def parse_index(self):
opener = getattr(self.browser, 'open_novisit', self.browser.open)
with closing(opener(self.INDEX)) as f:
results = f.read()
if not results:
raise RuntimeError('Could not fetch index!')
self.log(results)
top_stories = []
news = []
json_obj = json.loads(results)
for item in json_obj['news']:
news.append(self.parse_article(item))
for item in json_obj['top_stories']:
top_stories.append(self.parse_article(item))
return [(u'News', news), (u'Top Stories', top_stories)]
def parse_article(self, item):
return {
'title': item['title'],
'date': '',
'description': '',
'url': item['items'][0]['url']
}
@xiadaomike
Copy link

@hcoona The code didn't work at first, and it started working after I followed your comment to make some changes to the original code. Thanks!

@nonozone
Copy link

现在好像不能用了,不能抓取了。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment