Skip to content

Instantly share code, notes, and snippets.

View gabecano4308's full-sized avatar

gabecano4308

  • Deloitte
  • Philadelphia
View GitHub Profile
sns.scatterplot(all_teams_2021['Min PG'], all_teams_2021['Turnovers PG'])
plt.title('Relationship Between Average Minutes Played and Turnovers Per Game');
sns.histplot(all_teams_2021['Min PG'], bins=20)
plt.title('Frequency of Minutes Played Per Game in 2021 Across the NBA');
all_teams_2021.groupby('Team')[['Weight (Lbs)']].sum().sort_values(by='Weight (Lbs)', ascending=True).head(10)
all_teams_2021[(all_teams_2021['Team'] == 'DET') & (all_teams_2021['Field Goal %'] <= 0.40)]
# Function changes all object columns that should be numeric to an int or float
def obj_to_num(nba_df):
str_cols = ['Name', 'Team', 'Twitter Handle', 'Position', 'Height']
for i in nba_df.columns:
if i not in str_cols:
nba_df[i] = pd.to_numeric(nba_df[i])
print(nba_df.dtypes)
def get_stats(year):
# Creating a list of dictionaries to then convert into a Pandas Dataframe
nba_info = []
# Iteratively finding the URL page for each NBA team according to the 'year' parameter and instantiating
# a BeautifulSoup object
for i in team_list:
team_url = (f'https://www.basketball-reference.com{i}/{str(year)}.html')
team_res = requests.get(team_url)
team_soup = BeautifulSoup(team_res.content, 'lxml')
# BeautifulSoup library parses the content of an HTML document, in this case wiz_res
wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')
# BeautifulSoup's .find() method searches for a tag and specified attributes,
# returning the first match
wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'})
# URL for the Washington Wizards Basketball Reference page
wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html')
# The requests library can send a GET request to the wiz_url
wiz_res = requests.get(wiz_url)
# BeautifulSoup library parses the content of an HTML document, in this case wiz_res
wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')
# BeautifulSoup's .find() method searches for a tag and specified attributes,
height_weight_position = []
for row in wiz_per_game.find_all('tr')[1:]:
player = {}
# Parsing html data from each player's specific web page
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
player_rest = requests.get(player_url)
player_soup = BeautifulSoup(player_rest.content, 'lxml')
twitter_handle = []
for row in wiz_per_game.find_all('tr')[1:]:
player = {}
# Taking the row's first hyperlink (player's url ending) and appending it to the base url
# to get the player's personal webpage url.
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
# Making a new BeautifulSoup instance of the player's webpage and narrowing it to the top section