Skip to content

Instantly share code, notes, and snippets.

@zdepablo
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zdepablo/e4d9d412990ac145f2a2 to your computer and use it in GitHub Desktop.
Save zdepablo/e4d9d412990ac145f2a2 to your computer and use it in GitHub Desktop.
Extract UEFA rankings for football team ranks from a HTML table
#!/usr/bin/python
# -*- coding: utf-8 -*-
from lxml import html,etree
import requests
import unicodecsv
def group(iterator, count):
itr = iter(iterator)
while True:
yield [itr.next() for i in range(count)]
def extractYear(method, year):
url = 'http://kassiesa.home.xs4all.nl/bert/uefa/data/method' + str(method) + '/trank' + str(year) + '.html'
page = requests.get(url)
tree = html.fromstring(page.text)
# Team ranks appear in usuaal cells (tr)
cells = tree.xpath('//table[@class="t1"]//tr/td')
l = []
for c in cells:
l.append(etree.tostring(c).replace('<td>','').replace('<td align="left">','').replace('</td>\n', '').replace('<td/>\n',''))
ranks = list(group(l,9))
teamranksFilename = 'data/team-rank-' + str(year) +'.csv'
with open(teamranksFilename, 'wb') as teamranks:
teamranks = unicodecsv.writer(teamranks, delimiter=',', encoding='utf-8')
teamranks.writerows(ranks)
# Country ranks appear in usuaal cells (tr)
cells = tree.xpath('//table[@class="t1"]//tr/th')
l = []
for c in cells:
l.append(etree.tostring(c).replace('<th>','').replace('<th align="left">','').replace('</th>\n', '').replace('<th/>\n',''))
ranks = list(group(l,9))
countryrankFilename = 'data/country-rank-' + str(year) +'.csv'
with open(countryrankFilename, 'wb') as countryranks:
countryranks = unicodecsv.writer(countryranks, delimiter=',', encoding='utf-8')
countryranks.writerows(ranks)
if __name__=="__main__":
extractYear(1,1960)
extractYear(1,1965)
extractYear(1,1970)
extractYear(1,1975)
extractYear(1,1980)
extractYear(1,1985)
extractYear(1,1990)
extractYear(1,1995)
extractYear(1,1998)
extractYear(2,2000)
extractYear(2,2003)
extractYear(3,2005)
extractYear(3,2008)
extractYear(4,2010)
extractYear(4,2014)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment