Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created June 4, 2020 09:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/2b4bc6b23dc7c381166eb9f939af8779 to your computer and use it in GitHub Desktop.
Save codecademydev/2b4bc6b23dc7c381166eb9f939af8779 to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# load and investigate the data here:
df = pd.read_csv('tennis_stats.csv')
#print(df.corr())
# perform exploratory analysis here:
#plt.scatter(df['ReturnGamesPlayed'],df['Winnings'])
#plt.scatter(df['BreakPointsFaced'],df['Wins'])
#plt.scatter(df['BreakPointsOpportunities'],df['Winnings'])
#plt.show()
model = LinearRegression()
x = df[['BreakPointsOpportunities','ReturnGamesPlayed','Aces','BreakPointsFaced','DoubleFaults','ServiceGamesPlayed','Wins','Losses']]
y = df[['Winnings']]
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8,test_size=0.2,random_state=3)
model.fit(x_train,y_train)
y_predict = model.predict(x_test)
plt.scatter(y_test,y_predict,alpha=0.4)
plt.xlabel("Actual Value")
plt.ylabel("Predicted Value")
#plt.plot(range(20000), range(20000))
plt.show()
print(model.score(x_train,y_train))
print(model.score(x_test,y_test))
print(model.coef_)
## perform single feature linear regressions here:
## perform two feature linear regressions here:
## perform multiple feature linear regressions here:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment