Skip to content

Instantly share code, notes, and snippets.

@favormm
Created November 21, 2013 04:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save favormm/7576132 to your computer and use it in GitHub Desktop.
Save favormm/7576132 to your computer and use it in GitHub Desktop.
#encoding: utf-8
import requests
from pyquery import PyQuery as pq
agent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0"
content_type = "applicationself.s.xsrf/x-www-form-urlencoded; charset=UTF-8"
class Sjtu:
def __init__(self,url):
self.host = url
self.main_host = "http://bbs.sjtu.edu.cn/"
self.s = requests.Session()
self.s.headers.update({'User-Agent': agent})
self.s.headers.update({'Referer': self.host })
self.s.headers.update({'Content-Type': content_type })
r = self.s.get( self.host, headers = self.s.headers )
self.page(r.content.decode( 'GBK','ignore'))
def page(self,content):
dom = pq(content)
tag_li = dom('table tr')
tag_li.each(lambda index,code: self.getContent(index,code))
def getContent(self, index, node):
li = pq(node)
if (li("td a").eq(1)):
title = li("td a").eq(1).text()
link = self.main_host + li("td a").eq(1).attr.href
print("post: %s and %s" % (title, link))
if __name__ == "__main__":
Sjtu("http://bbs.sjtu.edu.cn/bbstdoc,board,Secondhand.html")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment