Created
September 7, 2012 04:18
-
-
Save dfeng/3663091 to your computer and use it in GitHub Desktop.
Scraper for Fantasy Football mock drafts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lxml.html | |
import requests | |
import csv | |
import os | |
base_url = 'http://fantasyfootballcalculator.com/' | |
folder = "./Drafts" | |
num = 1 | |
verbose = True | |
def get_mock_drafts(): | |
# Scraping the list of drafts | |
if verbose: | |
print "Scraping the list of mock drafts" | |
drafts = [] | |
for i in xrange(num): | |
url = base_url + "completed_drafts.php?format=standard&teams=all&list=%d" % (i*25) | |
html = requests.get(url) | |
dom = lxml.html.fromstring(html.text) | |
for tr in dom.cssselect("#completed-drafts tbody tr"): | |
draft = dict() | |
draft['url'] = list(tr[8])[0].get('href') | |
draft['totalteams'] = tr[4].text | |
drafts.append(draft) | |
if verbose: | |
print "Scraped %d mock draft urls" % len(drafts) | |
return drafts | |
def get_picks(draft): | |
# draft gives the url and total number of teams of a particular mock draft | |
# returns the mock draft results, including team information and draft pick order | |
url = base_url + draft['url'] | |
totalteams = int(draft['totalteams']) | |
r = requests.get(url) | |
if r.status_code != requests.codes.ok: | |
return False | |
dom = lxml.html.fromstring(r.text) | |
teams = [] | |
picks = [] | |
for th in dom.cssselect("#headRow th[class!='roundCol']"): | |
team = dict() | |
team['name'] = th.text | |
typ = th.get('class') | |
if not typ: | |
team['type'] = 'human' | |
else: | |
team['type'] = typ | |
teams.append(team) | |
css = dom.cssselect("#draftboardBody td[class!='rowLabel']") | |
draftno = len(css) | |
totalrows = draftno / totalteams | |
gap = (totalteams - draftno) % totalteams | |
ugly = False | |
if totalrows % 2 == 1 and gap != 0: | |
ugly = True | |
for i,td in enumerate(css): | |
pick = dict() | |
pick['name'] = " ".join([td.text, lxml.html.tostring(list(td)[0])[4:]]) | |
pick['position'], pick['team'], pick['number'] = lxml.html.tostring(list(td)[1])[4:].replace("(","").replace(")","").split(" ") | |
mod = i % totalteams | |
row = i / totalteams | |
if i/totalteams % 2 == 0: | |
pos = i | |
else: | |
pos = (i/totalteams+1)*totalteams - mod-1 | |
# edge case where the table didn't fill up, and ends on an even row - ruins all the alignment | |
if row == totalrows and ugly: | |
mod = mod + gap | |
pos = pos - gap | |
pick['teamname'] = teams[mod]['name'] | |
pick['teamtype'] = teams[mod]['type'] | |
pick['draftposition'] = pos+1 | |
picks.append(pick) | |
return picks | |
def dic_to_csv(picks,path): | |
# picks: a dictionary of mock draft results | |
# path: filepath to the csv file to be created | |
keys = picks[0].keys() | |
with open(path, 'wb') as f: | |
dt = csv.DictWriter(f, keys) | |
dt.writer.writerow(keys) | |
dt.writerows(picks) | |
def main(): | |
# create folder for csv | |
if not os.path.exists(folder): | |
os.makedirs(folder) | |
for draft in get_mock_drafts(): | |
mockid = draft['url'][6:] | |
filepath = "".join([folder, "/", mockid, '.csv']) | |
if not os.path.isfile(filepath): | |
picks = get_picks(draft) | |
if picks: | |
dic_to_csv(picks,filepath) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment