Skip to content

Instantly share code, notes, and snippets.

@dfeng
Created September 7, 2012 04:18
Show Gist options
  • Save dfeng/3663091 to your computer and use it in GitHub Desktop.
Save dfeng/3663091 to your computer and use it in GitHub Desktop.
Scraper for Fantasy Football mock drafts
import lxml.html
import requests
import csv
import os
base_url = 'http://fantasyfootballcalculator.com/'
folder = "./Drafts"
num = 1
verbose = True
def get_mock_drafts():
# Scraping the list of drafts
if verbose:
print "Scraping the list of mock drafts"
drafts = []
for i in xrange(num):
url = base_url + "completed_drafts.php?format=standard&teams=all&list=%d" % (i*25)
html = requests.get(url)
dom = lxml.html.fromstring(html.text)
for tr in dom.cssselect("#completed-drafts tbody tr"):
draft = dict()
draft['url'] = list(tr[8])[0].get('href')
draft['totalteams'] = tr[4].text
drafts.append(draft)
if verbose:
print "Scraped %d mock draft urls" % len(drafts)
return drafts
def get_picks(draft):
# draft gives the url and total number of teams of a particular mock draft
# returns the mock draft results, including team information and draft pick order
url = base_url + draft['url']
totalteams = int(draft['totalteams'])
r = requests.get(url)
if r.status_code != requests.codes.ok:
return False
dom = lxml.html.fromstring(r.text)
teams = []
picks = []
for th in dom.cssselect("#headRow th[class!='roundCol']"):
team = dict()
team['name'] = th.text
typ = th.get('class')
if not typ:
team['type'] = 'human'
else:
team['type'] = typ
teams.append(team)
css = dom.cssselect("#draftboardBody td[class!='rowLabel']")
draftno = len(css)
totalrows = draftno / totalteams
gap = (totalteams - draftno) % totalteams
ugly = False
if totalrows % 2 == 1 and gap != 0:
ugly = True
for i,td in enumerate(css):
pick = dict()
pick['name'] = " ".join([td.text, lxml.html.tostring(list(td)[0])[4:]])
pick['position'], pick['team'], pick['number'] = lxml.html.tostring(list(td)[1])[4:].replace("(","").replace(")","").split(" ")
mod = i % totalteams
row = i / totalteams
if i/totalteams % 2 == 0:
pos = i
else:
pos = (i/totalteams+1)*totalteams - mod-1
# edge case where the table didn't fill up, and ends on an even row - ruins all the alignment
if row == totalrows and ugly:
mod = mod + gap
pos = pos - gap
pick['teamname'] = teams[mod]['name']
pick['teamtype'] = teams[mod]['type']
pick['draftposition'] = pos+1
picks.append(pick)
return picks
def dic_to_csv(picks,path):
# picks: a dictionary of mock draft results
# path: filepath to the csv file to be created
keys = picks[0].keys()
with open(path, 'wb') as f:
dt = csv.DictWriter(f, keys)
dt.writer.writerow(keys)
dt.writerows(picks)
def main():
# create folder for csv
if not os.path.exists(folder):
os.makedirs(folder)
for draft in get_mock_drafts():
mockid = draft['url'][6:]
filepath = "".join([folder, "/", mockid, '.csv'])
if not os.path.isfile(filepath):
picks = get_picks(draft)
if picks:
dic_to_csv(picks,filepath)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment