Skip to content

Instantly share code, notes, and snippets.

@vikingmute
Created December 27, 2013 09:27
Show Gist options
  • Save vikingmute/8144570 to your computer and use it in GitHub Desktop.
Save vikingmute/8144570 to your computer and use it in GitHub Desktop.
从500上面抓取五天数据 并且分每天存成txt文档 然后用simpleHttpServer输出 使用工具BeautifulSoup
import urllib2
from bs4 import BeautifulSoup
from datetime import date, timedelta
def getstr(tag):
if tag.string:
return tag.string.encode('utf8')
else:
return '0'
#get the link content
for gap in range(0 , 5):
ddate = date.today() - timedelta(gap)
url = "http://live.500.com/?e=" + str(ddate)
print "staring fetch data from 500 at " + str(ddate) + ": \n"
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page)
table = soup.find(id="table_match")
if(table):
trs = table.find_all('tr')
#remove first row
trs = trs[1:]
lis = []
lis.append('<ul>\n')
for tr in trs:
names = tr.find_all(class_='p_lr01')
lname = names[0].find('a').string.encode('utf8')
rname = names[1].find('a').string.encode('utf8')
lscore = getstr(tr.find(class_='clt1'))
rscore = getstr(tr.find(class_='clt3'))
fstr = '<li>'+ lname + ':' + lscore + '---VS---' + rscore + ':' + rname + '</li>\n'
lis.append(fstr)
lis.append('</ul>')
print "data has generated, starting wrting file"
filename = "data" + str(ddate) + ".txt"
my_file = open("./data/" + filename ,"w+")
my_file.write(" ".join(lis))
my_file.close()
print "file has been created"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment