Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created September 21, 2020 02:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/bba0e36aa25d5e339fa4129589c391cf to your computer and use it in GitHub Desktop.
Save codecademydev/bba0e36aa25d5e339fa4129589c391cf to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# load and investigate the data here:
tennis = pd.read_csv('tennis_stats.csv')
print(tennis.head())
# perform exploratory analysis here:
plt.scatter(tennis['FirstServeReturnPointsWon'], tennis['Winnings'])
# plt.show()
lr = LinearRegression()
lr.fit(tennis[['FirstServeReturnPointsWon']], tennis[['Winnings']])
wins_predict = lr.predict(tennis[['FirstServeReturnPointsWon']])
plt.plot(tennis[['FirstServeReturnPointsWon']], wins_predict, color = 'green', linewidth = 5)
plt.xlabel('First Serve Return Points Won')
plt.ylabel('Winnings')
plt.title('First Serve Return Points Vs Winnings')
plt.show()
plt.clf()
plt.scatter(tennis['BreakPointsOpportunities'], tennis['Winnings'])
lr2 = LinearRegression()
lr2.fit(tennis[['BreakPointsOpportunities']], tennis[['Winnings']])
wins_predict2 = lr2.predict(tennis[['BreakPointsOpportunities']])
plt.plot(tennis[['BreakPointsOpportunities']], wins_predict2, color = 'green', linewidth = 5)
plt.xlabel('Break Points Oppurtunities')
plt.ylabel('Winnings')
plt.title('Break Points Oppurtunites vs Winnigs')
plt.show()
plt.clf()
features = tennis[['FirstServeReturnPointsWon']]
outcome = tennis[['Winnings']]
x_train, x_test, y_train, y_test = train_test_split(features, outcome, train_size = 0.8, test_size = 0.2, random_state = 124)
model = LinearRegression()
model.fit(x_train, y_train)
Rsquare = model.score(x_test, y_test)
print(Rsquare)
prediction = model.predict(x_test)
plt.scatter(y_test, prediction, alpha = 0.1)
plt.xlabel('Test Values')
plt.ylabel('Predictions')
plt.title('Prediction of Winnigs for First Serve Return Points Won')
plt.show()
plt.clf()
## perform single feature linear regressions here:
features2 = tennis[['BreakPointsOpportunities']]
outcome2 = tennis[['Winnings']]
x_train2, x_test2, y_train2, y_test2 = train_test_split(features2, outcome2, train_size = 0.8, test_size = 0.2, random_state = 124)
model2 = LinearRegression()
model2.fit(x_train2, y_train2)
Rsquare2 = model2.score(x_test2, y_test2)
print(Rsquare2)
prediction2 = model2.predict(x_test2)
plt.scatter(y_test2, prediction2, alpha = 0.1)
plt.xlabel('Test value of Winnings')
plt.ylabel('Prediction Value of Winning')
plt.title('Winnigs for every BreakPoint')
plt.show()
plt.clf()
# perform two feature linear regressions here:
features3 = tennis[['BreakPointsOpportunities',
'FirstServeReturnPointsWon']]
outcome3 = tennis[['Winnings']]
x_train3, x_test3, y_train3, y_test3 = train_test_split(features3, outcome3, train_size = 0.8, test_size = 0.2, random_state = 124)
model3 = LinearRegression()
model3.fit(x_train3, y_train3)
R_square3 = model3.score(x_test3, y_test3)
predict3 = model3.predict(x_test3)
plt.scatter(y_test3, predict3, alpha = 0.3)
plt.xlabel('Test Value of Winnigs')
plt.ylabel('Prediction Value of Winnigs')
plt.title('Prediction of winning for Break Points Oppurtinities and First Serve Return POints Won')
plt.show()
plt.clf()
## perform multiple feature linear regressions here:
features5 = tennis[['FirstServe','FirstServePointsWon','FirstServeReturnPointsWon',
'SecondServePointsWon','SecondServeReturnPointsWon','Aces',
'BreakPointsConverted','BreakPointsFaced','BreakPointsOpportunities',
'BreakPointsSaved','DoubleFaults','ReturnGamesPlayed','ReturnGamesWon',
'ReturnPointsWon','ServiceGamesPlayed','ServiceGamesWon','TotalPointsWon',
'TotalServicePointsWon']]
outcome5 = tennis[['Winnings']]
x_train5, x_test5, y_train5, y_test5 = train_test_split(features5, outcome5, train_size = 0.8, test_size = 0.2)
model5 = LinearRegression()
model5.fit(x_train5, y_train5)
Rsquare5 = model5.score(x_test5, y_test5)
print(Rsquare5)
prediction5 = model5.predict(x_test5)
plt.scatter(y_test5, prediction5, alpha = 0.1)
plt.xlabel('Test Value of Winnigs')
plt.ylabel('Prediction Value of Winnigs')
plt.title('Prediction of winning for all X values')
plt.show()
plt.clf()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment