Skip to content

Instantly share code, notes, and snippets.

@fanzeyi
Created April 5, 2012 18:10
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save fanzeyi/2312928 to your computer and use it in GitHub Desktop.
Save fanzeyi/2312928 to your computer and use it in GitHub Desktop.
备份饭否消息。 非API
# -*- coding: utf-8 -*-
# AUTHOR: Zeray Rice <fanzeyi1994@gmail.com>
# FILE: main.py
# CREATED: 01:15:37 06/04/2012
# MODIFIED: 01:57:15 06/04/2012
import requests
import datetime
from jinja2 import Environment, FileSystemLoader
from BeautifulSoup import BeautifulSoup
cookies = dict(ai='', u='', SID='', uuid='') # 在这里填写 Cookies 信息
jinja = Environment(loader = FileSystemLoader('.'))
tpl = jinja.get_template("message.xml")
class Status(object):
def __init__(self, status):
# parse status HTML
self.text = status.findAll("span", {'class' : "content"})[0].text
self.time = status.findAll("a", {'class' : "time"})[0].attrMap['stime']
self.time = datetime.datetime.strptime(self.time, "%a %b %d %H:%M:%S +0000 %Y")
def parseHTML(html, result):
bs = BeautifulSoup(html)
stream = bs.findAll("div", id = "stream")[0].findAll("li")
for status in stream:
st = Status(status)
result.append(st)
def renderMSG(result, filenameCount):
print "Saving to %d.xml" % filenameCount
with open("treeholes/%d.xml" % filenameCount, "w") as fp:
fp.write(tpl.render(status = result).encode("utf-8"))
def getStatus():
filenameCount = 1
result = []
for p in range(1, 3512):
r = requests.get("http://fanfou.com/treeholes/p.%d" % p, cookies=cookies)
parseHTML(r.text, result)
print "Parsing Page %d.." % p
if len(result) >= 100:
renderMSG(result, filenameCount)
filenameCount = filenameCount + 1
result = []
renderMSG(result, filenameCount)
getStatus()
<?xml version="1.0" encoding="UTF-8"?>
<timelime id="treeholes">{% for st in status %}
<status>
<text><![CDATA[{{ st.text }}]]></text>
<time><![CDATA[{{ st.time }}]]></time>
</status>
{% endfor %}</timelime>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment