Skip to content

Instantly share code, notes, and snippets.

@satomacoto
Created June 27, 2012 02:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save satomacoto/3001040 to your computer and use it in GitHub Desktop.
Save satomacoto/3001040 to your computer and use it in GitHub Desktop.
競馬 - Yahoo!スポーツ
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
from subprocess import *
import urllib2,re,math
from BeautifulSoup import BeautifulSoup
def get_races(url,r=10):
''' url:馬情報のURL r:解析対象レース数 '''
opener = urllib2.build_opener()
# html 取得
html = opener.open(url).read()
# テーブル取得
soup = BeautifulSoup(html)
tables = soup.findAll('table')
races = tables[6]
races = races.findAll('tr')
p = re.compile('''<("[^"]*"|'[^']*'|[^'">])*>''')
uma = []
for race in races[:r]:
cols = race.findAll('td')
if len(cols)>2:
uma += [[ p.sub('', str(col)).strip() for col in cols ]]
return uma
def get_uma_links(denma_id):
opener = urllib2.build_opener()
# トップページ
top = "http://keiba.yahoo.co.jp"
# 出馬表
# 09 08 05 04 11
# 年 開催競馬場 回 日 レース
url = top + '/race/denma/%s/' % denma_id
print url
# 出馬表取得
html = opener.open(url).read()
# HTML解析
soup = BeautifulSoup(html)
tables = soup.findAll('table')
# 各馬へのリンクの取得
denma = tables[2]
links = denma.findAll('a', href=lambda value: value and 'horse' in value)
links = [(top + link['href'],link.strong.string) for link in links]
# 馬データの取得
return links
def main():
# ヘッダ
heads = '年月日,レース,競馬場,コース,馬場,頭数,枠番,馬番,人気,オッズ,着順,騎手名,斤量,馬体重,タイム,上3F,通過順位,タイム差'.split(',')
for link, uma in get_uma_links('1006040411'):
print uma, link
print
for race in get_races(link):
race = dict(zip(heads,race))
for k, v in race.iteritems():
print "%s:%s" % (k, v)
print
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment