Skip to content

Instantly share code, notes, and snippets.

@cloverstd
Last active October 3, 2015 15:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cloverstd/67e5b4897f11694ff3c6 to your computer and use it in GitHub Desktop.
Save cloverstd/67e5b4897f11694ff3c6 to your computer and use it in GitHub Desktop.
扇贝打卡 RSS 源
#!/usr/bin/env python
# encoding: utf-8
from __future__ import absolute_import, unicode_literals
import tornado.ioloop
import tornado.httpserver
from tornado.options import parse_command_line, define, options
import tornado.httpclient
import tornado.web
import tornado.gen
import logging
from bs4 import BeautifulSoup
import PyRSS2Gen
import datetime
import locale
define('debug', type=bool, default=True)
define('port', type=int, default=8088)
define('host', type=str, default="127.0.0.1")
class ShanbayRss(object):
URL = "http://shanbay.com/checkin/record/{record_id}/"
DEFAULT_HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.99 Safari/537.36",
}
def __init__(self, record_id):
self.record_id = record_id
self.http_client = tornado.httpclient.AsyncHTTPClient()
@tornado.gen.coroutine
def get_latest_chekcins(self):
req = tornado.httpclient.HTTPRequest(
url=self.URL.format(record_id=self.record_id),
headers=self.DEFAULT_HEADERS,
)
resp = yield self.http_client.fetch(req)
checkins = self.parse_checkins(resp.body)
raise tornado.gen.Return(checkins)
@tornado.gen.coroutine
def to_xml(self):
checkins = yield self.get_latest_chekcins()
items = list()
items = [PyRSS2Gen.RSSItem(
title="{date} 第{number}天打卡".format(
date=item["date"],
number=item["day"],
),
link=item["link"],
description=item["note"],
guid=PyRSS2Gen.Guid(item["link"]),
) for item in checkins["checkins"]]
rss = PyRSS2Gen.RSS2(
title=checkins["title"],
link="http://hui.lu",
description="",
lastBuildDate=datetime.datetime.now(),
items=items
)
xml = rss.to_xml(encoding="utf-8")
raise tornado.gen.Return(xml)
def parse_checkins(self, content):
soup = BeautifulSoup(content, 'html.parser')
checkin_content = soup.find(attrs={"class":"checkins"}).\
find_all(attrs={"class": "checkin"})
checkins = list()
for i in checkin_content:
note = i.find(attrs={"class": "note"}).text.strip()
target = i.find(attrs={"class": "target"}).text.strip()
locale.setlocale(locale.LC_ALL, str('zh_CN.UTF-8'))
date = datetime.datetime.strptime(target.encode('utf-8'), '%B %d, %Y')
checkins.append({
"day": i.find(attrs={"class": "number"}).text.strip(),
"note": "".join(note.split()),
"date": date.strftime('%Y年%m月%d天'.encode('utf-8')).decode('utf-8'),
"link": "http://shanbay.com" + i.find("a",
attrs={"class": "target"})["href"]
})
title = soup.title.text.strip()
return {
"title": title,
"checkins": checkins,
}
class MainHandler(tornado.web.RequestHandler):
@tornado.gen.coroutine
def get(self):
self.write("hi")
class ShanbayRssHandler(tornado.web.RequestHandler):
@tornado.gen.coroutine
def get(self, record_id):
"""
由于个人打卡页面必须登录
所以曲线救国,通过单条打卡记录,可以查看最新的打卡记录
"""
try:
shanby_rss = ShanbayRss(record_id)
res = yield shanby_rss.to_xml()
self.set_header("Content-Type", "text/xml;charset=UTF-8")
self.write(res)
except Exception as e:
self.application.logger.error(e)
self.set_status(404)
self.write('404')
class Application(tornado.web.Application):
def __init__(self):
handlers = [
(r'/shanbay2rss/(\d+)', ShanbayRssHandler),
(r'/', MainHandler),
]
settings = dict()
settings["debug"] = options.debug
self.logger = logging.getLogger('application')
super(Application, self).__init__(handlers, **settings)
def main():
http_server = tornado.httpserver.HTTPServer(Application())
parse_command_line()
http_server.listen(options.port, address=options.host)
logger = logging.getLogger('application')
logger.info("Server listen on http://{host}:{port}".format(host=options.host,
port=options.port))
tornado.ioloop.IOLoop.current().start()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment