Created
June 29, 2009 15:23
-
-
Save drewconway/137649 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
def get_player_profiles(player_list): | |
# Returns a dict of player profile URLs to be used in the next step | |
# Dict will hold player profile pages indexed by player name | |
player_profile_urls=dict.fromkeys(player_list) | |
for n in player_list: | |
names=n.split(' ') | |
# Search for the player names at NFL.com to get their individual player profiles, which contain the | |
# data we ultimately want. | |
search_url="http://www.nfl.com/players/search?category=name&filter="+names[0]+"+"+names[1]+"&playerType=current&team=3410" | |
results=urllib2.urlopen(search_url) | |
for l in results.readlines(): | |
try: | |
if l.count('<a href')>0 and l.count('profile?id'): | |
# Search the returned HTMl for the hyper-link data for the speciic player. | |
# This is mostly string clean up stiff to make the URL string ready for thte next step. | |
split1=l.split('=') | |
first_piece=split1[1].lstrip('"') | |
second_piece=split1[2].split('"')[0] | |
player_profile_urls[n]="http://www.nfl.com"+first_piece+"="+second_piece | |
except UnicodeDecodeError: | |
print "Ignoring UnicodeDecodeError" | |
results.close() | |
return player_profile_urls | |
player_urls=get_player_profiles(players) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment