Skip to content

Instantly share code, notes, and snippets.

@sykloid
Created March 1, 2009 15:17
Show Gist options
  • Save sykloid/72351 to your computer and use it in GitHub Desktop.
Save sykloid/72351 to your computer and use it in GitHub Desktop.
A screenscraper for the australian open scores page
#! /usr/bin/env python
# A Screenscraper for the Australian Open scores page.
# P.C. Shyamshankar
# January 2009
#
# Can handle ongoing as well as completed matches, but you'll have to change the
# urls yourself.
#
# Note : Match numbers do not stay fixed! As matches finish and are pulled out
# of the list, they affect all below them (-1) and their own, of course.
#
# Important matches are _usually_ at the top, and stay there for sometime, so
# they should work out in most cases.
from BeautifulSoup import BeautifulSoup
from urllib import urlopen
from sys import argv
try :
match_num = int(argv[1])
except :
match_num = -1
# Matches in progress
url = urlopen('http://www.australianopen.com/en_AU/scores/index2.html')
# Completed matches
# url = urlopen('http://www.australianopen.com/en_AU/scores/cmatch/12ms.html')
soup = BeautifulSoup(url)
tables = soup.findAll('table', attrs = {'summary' : True})
for index, table in enumerate(tables) :
if match_num == -1 :
pass
elif match_num == index :
pass
else :
continue
cells = [i.contents for i in table.findAll('td', attrs = {'valign' : True})]
players = [i.contents[0] for i in table.findAll('a')]
title = cells[0][0].find('td', attrs = {'class' : 'medBold'}).contents[0]
title = title.replace(u' ', ' ')
img = table.findAll('img')[0]
if img.get('alt') == u'Winner' :
status = 'completed'
print "Match (%d) completed : %s" % (index, title)
template = "%40s %1s %4s %4s %4s %4s %4s"
print template % ("Player", "W", "S1", "S2", "S3", "S4", "S5")
else :
status = 'progress'
print "Match (%d) in progress : %s" % (index, title)
template = "%40s %1s %4s %4s %4s %4s %4s %4s"
print template % ("Player", "S", "G", "S1", "S2", "S3", "S4", "S5")
if cells[2] :
serving = 0
else :
serving = 1
if u'Doubles' in title :
if status == 'completed' :
players[serving * 2] = players[serving * 2].contents[0]
players[serving * 2 + 1] = players[serving * 2 + 1].contents[0]
players[0] = players[0] + '/' + players[1]
players[1] = players[2] + '/' + players[3]
else :
players = []
players.append(
[cells[1][1].contents[0], cells[1][2].replace(u' ', u' ')]
)
players.append(
[cells[10][1].contents[0], cells[10][2].replace(u' ', u' ')]
)
if status == 'completed' :
players[serving] = players[serving][1] + ' ' + players[serving][0].contents[0]
players[1 - serving] = players[1 - serving][1] + ' ' + players[1 - serving][0]
else :
for i in 0, 1 :
players[i] = players[i][1] + ' ' + players[i][0]
player0 = []
player1 = []
if status == 'progress' :
if serving == 0 :
player0.append(cells[3][1].contents[0])
player1.append(cells[9 + 3][0])
elif serving == 1 :
player0.append(cells[3][0])
player1.append(cells[9 + 3][1].contents[0])
for i in range(4, 9) :
try :
player0.append(
cells[i][0] +
('(' + cells[i][1].contents[0] + ')'
if cells[i][1].contents else ''))
player1.append(
cells[9 + i][0] +
('(' + cells[9 + i][1].contents[0] + ')'
if cells[9 + i][1].contents else ''))
except :
break
player_0_string = template % (
(players[0],) +
(("*" if serving == 0 else " "),) +
tuple(player0) +
tuple(["-"] * ((5 if status =='completed' else 6) - len(player0)))
)
player_1_string = template % (
(players[1],) +
(("*" if serving == 1 else " "),) +
tuple(player1) +
tuple(["-"] * ((5 if status == 'completed' else 6) - len(player1)))
)
player_0_string = player_0_string.replace(u' ', u'%4s' % u'-')
player_1_string = player_1_string.replace(u' ', u'%4s' % u'-')
print player_0_string
print player_1_string
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment