Skip to content

Instantly share code, notes, and snippets.

@drewconway
Created June 29, 2009 15:23
Show Gist options
  • Save drewconway/137649 to your computer and use it in GitHub Desktop.
Save drewconway/137649 to your computer and use it in GitHub Desktop.
import urllib2
def get_player_profiles(player_list):
# Returns a dict of player profile URLs to be used in the next step
# Dict will hold player profile pages indexed by player name
player_profile_urls=dict.fromkeys(player_list)
for n in player_list:
names=n.split(' ')
# Search for the player names at NFL.com to get their individual player profiles, which contain the
# data we ultimately want.
search_url="http://www.nfl.com/players/search?category=name&filter="+names[0]+"+"+names[1]+"&playerType=current&team=3410"
results=urllib2.urlopen(search_url)
for l in results.readlines():
try:
if l.count('<a href')>0 and l.count('profile?id'):
# Search the returned HTMl for the hyper-link data for the speciic player.
# This is mostly string clean up stiff to make the URL string ready for thte next step.
split1=l.split('=')
first_piece=split1[1].lstrip('"')
second_piece=split1[2].split('"')[0]
player_profile_urls[n]="http://www.nfl.com"+first_piece+"="+second_piece
except UnicodeDecodeError:
print "Ignoring UnicodeDecodeError"
results.close()
return player_profile_urls
player_urls=get_player_profiles(players)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment