Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created March 31, 2020 13:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/0142b808f2f39d2426fa05527ef8b712 to your computer and use it in GitHub Desktop.
Save codecademydev/0142b808f2f39d2426fa05527ef8b712 to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# load and investigate the data
players = pd.read_csv('tennis_stats.csv')
print(players.columns)
# exploratory analysis
plt.scatter(players['FirstServeReturnPointsWon'],players['Winnings'])
plt.title('FirstServeReturnPointsWon vs Winnings')
plt.xlabel('FirstServeReturnPointsWon')
plt.ylabel('Winnings')
plt.show()
plt.clf()
plt.scatter(players['BreakPointsOpportunities'],players['Winnings'])
plt.title('BreakPointsOpportunities vs Winnings')
plt.xlabel('BreakPointsOpportunities')
plt.ylabel('Winnings')
plt.show()
plt.clf()
plt.scatter(players['BreakPointsSaved'],players['Winnings'])
plt.title('BreakPointsSaved vs Winnings')
plt.xlabel('BreakPointsSaved')
plt.ylabel('Winnings')
plt.show()
plt.clf()
plt.scatter(players['TotalPointsWon'],players['Ranking'])
plt.title('TotalPointsWon vs Ranking')
plt.xlabel('TotalPointsWon')
plt.ylabel('Ranking')
plt.show()
plt.clf()
plt.scatter(players['TotalServicePointsWon'],players['Wins'])
plt.title('TotalServicePointsWon vs Wins')
plt.xlabel('TotalServicePointsWon')
plt.ylabel('Wins')
plt.show()
plt.clf()
## single feature linear regression (Aces)
# select features and value to predict
features = players[['Aces']]
winnings = players[['Winnings']]
# train, test, split the data
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8)
# create and train model on training data
model = LinearRegression()
model.fit(features_train,winnings_train)
# score model on test data
print('Predicting Winnings with Aces Test Score:', model.score(features_test,winnings_test))
# make predictions with model
winnings_prediction = model.predict(features_test)
# plot predictions against actual winnings
plt.scatter(winnings_test,winnings_prediction, alpha=0.4)
plt.title('Predicted Winnings vs. Actual Winnings - 1 Feature')
plt.xlabel('Actual Winnings')
plt.ylabel('Predicted Winnings')
plt.show()
plt.clf()
## single feature linear regression (BreakPointsOpportunities)
# select features and value to predict
features = players[['BreakPointsOpportunities']]
winnings = players[['Winnings']]
# train, test, split the data
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8)
# create and train model on training data
model = LinearRegression()
model.fit(features_train,winnings_train)
# score model on test data
print('Predicting Winnings with BreakPointsOpportunities Test Score:', model.score(features_test,winnings_test))
# make predictions with model
winnings_prediction = model.predict(features_test)
# plot predictions against actual winnings
plt.scatter(winnings_test,winnings_prediction, alpha=0.4)
plt.title('Predicted Winnings vs. Actual Winnings - 1 Feature')
plt.xlabel('Actual Winnings')
plt.ylabel('Predicted Winnings')
plt.show()
plt.clf()
## two feature linear regression
# select features and value to predict
features = players[['BreakPointsOpportunities','FirstServeReturnPointsWon']]
winnings = players[['Winnings']]
# train, test, split the data
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8)
# create and train model on training data
model = LinearRegression()
model.fit(features_train,winnings_train)
# score model on test data
print('Predicting Winnings with 2 Features Test Score:', model.score(features_test,winnings_test))
# make predictions with model
winnings_prediction = model.predict(features_test)
# plot predictions against actual winnings
plt.scatter(winnings_test,winnings_prediction, alpha=0.4)
plt.title('Predicted Winnings vs. Actual Winnings - 2 Features')
plt.xlabel('Actual Winnings')
plt.ylabel('Predicted Winnings')
plt.show()
plt.clf()
## multiple features linear regression
# select features and value to predict
features = players[['FirstServe','FirstServePointsWon','FirstServeReturnPointsWon','SecondServePointsWon','SecondServeReturnPointsWon','Aces','BreakPointsConverted','BreakPointsFaced','BreakPointsOpportunities','BreakPointsSaved','DoubleFaults','ReturnGamesPlayed','ReturnGamesWon','ReturnPointsWon','ServiceGamesPlayed','ServiceGamesWon','TotalPointsWon','TotalServicePointsWon']]
winnings = players[['Winnings']]
# train, test, split the data
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8)
# create and train model on training data
model = LinearRegression()
model.fit(features_train,winnings_train)
# score model on test data
print('Predicting Winnings with Multiple Features Test Score:', model.score(features_test,winnings_test))
# make predictions with model
winnings_prediction = model.predict(features_test)
# plot predictions against actual winnings
plt.scatter(winnings_test,winnings_prediction, alpha=0.4)
plt.title('Predicted Winnings vs. Actual Winnings - Multiple Features')
plt.xlabel('Actual Winnings')
plt.ylabel('Predicted Winnings')
plt.show()
plt.clf()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment