Instantly share code, notes, and snippets.

Embed
What would you like to do?
知乎日报 Kindle 版生成工具,可用于 Calibre
#!/usr/bin/env python
__copyright__ = 'Yuanxuan Wang <zellux at gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
from collections import OrderedDict
from contextlib import nested, closing
import json
import re
class Zhihu(BasicNewsRecipe):
INDEX = 'http://news.at.zhihu.com/api/1.1/news/latest'
title = u'知乎日报'
language = 'zh'
__author__ = "Yuanxuan Wang"
description = (u'知乎日报')
no_stylesheets = True
needs_subscription = False
extra_css = '''
.headline { font-size: x-large; }
.content { word-wrap: break-word; line-height: 1.6em; }
'''
def parse_index(self):
opener = getattr(self.browser, 'open_novisit', self.browser.open)
with closing(opener(self.INDEX)) as f:
results = f.read()
if not results:
raise RuntimeError('Could not fetch index!')
self.log(results)
top_stories = []
news = []
json_obj = json.loads(results)
for item in json_obj['news']:
news.append(self.parse_article(item))
for item in json_obj['top_stories']:
top_stories.append(self.parse_article(item))
return [(u'News', news), (u'Top Stories', top_stories)]
def parse_article(self, item):
return {
'title': item['title'],
'date': '',
'description': '',
'url': item['items'][0]['url']
}
@hcoona

This comment has been minimized.

Copy link

hcoona commented Mar 28, 2014

Line 50, should be changed into item['share_url']

@xiadaomike

This comment has been minimized.

Copy link

xiadaomike commented Jun 1, 2014

@hcoona The code didn't work at first, and it started working after I followed your comment to make some changes to the original code. Thanks!

@nonozone

This comment has been minimized.

Copy link

nonozone commented Nov 27, 2015

现在好像不能用了,不能抓取了。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment