Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created October 13, 2021 09:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/f80d3ef73044ff0a547a3e4b8a717b07 to your computer and use it in GitHub Desktop.
Save codecademydev/f80d3ef73044ff0a547a3e4b8a717b07 to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# load and investigate the data here:
data=pd.read_csv('tennis_stats.csv')
print(data.head())
print(data.dtypes)
print(data.shape)
print(data.columns)
# perform exploratory analysis here:
plt.figure(figsize=[15,15])
for i in range(data.shape[1]-2):
columns=data.columns[2:]
plt.subplot(6, 4, i+1)
name=columns[i]
plt.title(f'Winnings vs {name}')
plt.scatter(data.Winnings, data.iloc[:,[i+2]], norm=True, alpha=0.5)
plt.subplots_adjust(wspace=1.3, hspace=1.3)
plt.show()
x_train, x_test, y_train, y_test=train_test_split(data['ServiceGamesPlayed'], data['Winnings'], test_size=0.2, random_state=12)
import numpy as np
model=LinearRegression()
model.fit(np.array(x_train).reshape(-1, 1), y_train)
print(model.score(np.array(x_test).reshape(-1, 1), y_test))
plt.clf()
plt.scatter(data.ServiceGamesPlayed, data.Winnings, norm=True, alpha=0.5)
plt.show()
print(x_test.shape)
print(y_test.shape)
## perform single feature linear regressions here:
prediction=model.predict(np.array(x_test).reshape(-1, 1))
plt.scatter(np.array(x_test).reshape(-1,1), prediction, alpha=0.4 )
plt.show()
## perform two feature linear regressions here:
X_train, X_test, Y_train, Y_test=train_test_split(data['BreakPointsOpportunities'], data['Winnings'], test_size=0.2, random_state=12)
model.fit(np.array(X_train).reshape(-1, 1), Y_train)
print(model.score(np.array(X_test).reshape(-1, 1), Y_test))
XX_train, XX_test, YY_train, YY_test=train_test_split(data.BreakPointsFaced, data.Winnings, test_size=0.2, random_state=12)
model.fit(np.array(XX_train).reshape(-1,1), YY_train)
print(model.score(np.array(XX_test).reshape(-1,1), YY_test))
## perform multiple feature linear regressions here:
f_train, f_test, t_train, t_test=train_test_split(data.drop(['Player', 'Year', 'Winnings'], axis=1), data['Winnings'], test_size=0.2)
model.fit(f_train, t_train)
print(model.score(f_test, t_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment