Created
June 27, 2012 02:59
-
-
Save satomacoto/3001040 to your computer and use it in GitHub Desktop.
競馬 - Yahoo!スポーツ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import sys | |
import os | |
from subprocess import * | |
import urllib2,re,math | |
from BeautifulSoup import BeautifulSoup | |
def get_races(url,r=10): | |
''' url:馬情報のURL r:解析対象レース数 ''' | |
opener = urllib2.build_opener() | |
# html 取得 | |
html = opener.open(url).read() | |
# テーブル取得 | |
soup = BeautifulSoup(html) | |
tables = soup.findAll('table') | |
races = tables[6] | |
races = races.findAll('tr') | |
p = re.compile('''<("[^"]*"|'[^']*'|[^'">])*>''') | |
uma = [] | |
for race in races[:r]: | |
cols = race.findAll('td') | |
if len(cols)>2: | |
uma += [[ p.sub('', str(col)).strip() for col in cols ]] | |
return uma | |
def get_uma_links(denma_id): | |
opener = urllib2.build_opener() | |
# トップページ | |
top = "http://keiba.yahoo.co.jp" | |
# 出馬表 | |
# 09 08 05 04 11 | |
# 年 開催競馬場 回 日 レース | |
url = top + '/race/denma/%s/' % denma_id | |
print url | |
# 出馬表取得 | |
html = opener.open(url).read() | |
# HTML解析 | |
soup = BeautifulSoup(html) | |
tables = soup.findAll('table') | |
# 各馬へのリンクの取得 | |
denma = tables[2] | |
links = denma.findAll('a', href=lambda value: value and 'horse' in value) | |
links = [(top + link['href'],link.strong.string) for link in links] | |
# 馬データの取得 | |
return links | |
def main(): | |
# ヘッダ | |
heads = '年月日,レース,競馬場,コース,馬場,頭数,枠番,馬番,人気,オッズ,着順,騎手名,斤量,馬体重,タイム,上3F,通過順位,タイム差'.split(',') | |
for link, uma in get_uma_links('1006040411'): | |
print uma, link | |
for race in get_races(link): | |
race = dict(zip(heads,race)) | |
for k, v in race.iteritems(): | |
print "%s:%s" % (k, v) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment