Skip to content

Instantly share code, notes, and snippets.

@gabecano4308
Last active December 31, 2020 19:09
Show Gist options
  • Save gabecano4308/f185299222553a734a8ce01324ad3aa4 to your computer and use it in GitHub Desktop.
Save gabecano4308/f185299222553a734a8ce01324ad3aa4 to your computer and use it in GitHub Desktop.
for part 1
# URL for the Washington Wizards Basketball Reference page
wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html')
# The requests library can send a GET request to the wiz_url
wiz_res = requests.get(wiz_url)
# BeautifulSoup library parses the content of an HTML document, in this case wiz_res
wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')
# BeautifulSoup's .find() method searches for a tag and specified attributes,
# returning the first match
wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'})
# Making a list of dictionaries to then convert into a pd.DataFrame
wiz_info = []
for row in wiz_per_game.find_all('tr')[1:]: # Excluding the first 'tr', since that's the table's title head
player = {}
player['Name'] = row.find('a').text.strip()
player['Age'] = row.find('td', {'data-stat' : 'age'}).text
player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
player_rest = requests.get(player_url)
player_soup = BeautifulSoup(player_rest.content, 'lxml')
player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
player_links= []
for link in player_info.find_all('a'):
player_links.append(link.get('href'))
if 'twitter' in player_links[1]:
player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
else:
player['Twitter Handle'] = 'Not Listed'
s = str(player_info.find_all('p'))
weight = re.search('\"weight\">(.*)lb</span>', s)
position = re.search('Position:\n </strong>\n (.*)\n\n', s)
height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
player['Height'] = height.group(1).strip()
player['Weight (Lbs)'] = weight.group(1).strip()
player['Position'] = position.group(1).strip()
wiz_info.append(player)
pd.DataFrame(wiz_info)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment