-
-
Save codecademydev/0142b808f2f39d2426fa05527ef8b712 to your computer and use it in GitHub Desktop.
Codecademy export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import codecademylib3_seaborn | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
# load and investigate the data | |
players = pd.read_csv('tennis_stats.csv') | |
print(players.columns) | |
# exploratory analysis | |
plt.scatter(players['FirstServeReturnPointsWon'],players['Winnings']) | |
plt.title('FirstServeReturnPointsWon vs Winnings') | |
plt.xlabel('FirstServeReturnPointsWon') | |
plt.ylabel('Winnings') | |
plt.show() | |
plt.clf() | |
plt.scatter(players['BreakPointsOpportunities'],players['Winnings']) | |
plt.title('BreakPointsOpportunities vs Winnings') | |
plt.xlabel('BreakPointsOpportunities') | |
plt.ylabel('Winnings') | |
plt.show() | |
plt.clf() | |
plt.scatter(players['BreakPointsSaved'],players['Winnings']) | |
plt.title('BreakPointsSaved vs Winnings') | |
plt.xlabel('BreakPointsSaved') | |
plt.ylabel('Winnings') | |
plt.show() | |
plt.clf() | |
plt.scatter(players['TotalPointsWon'],players['Ranking']) | |
plt.title('TotalPointsWon vs Ranking') | |
plt.xlabel('TotalPointsWon') | |
plt.ylabel('Ranking') | |
plt.show() | |
plt.clf() | |
plt.scatter(players['TotalServicePointsWon'],players['Wins']) | |
plt.title('TotalServicePointsWon vs Wins') | |
plt.xlabel('TotalServicePointsWon') | |
plt.ylabel('Wins') | |
plt.show() | |
plt.clf() | |
## single feature linear regression (Aces) | |
# select features and value to predict | |
features = players[['Aces']] | |
winnings = players[['Winnings']] | |
# train, test, split the data | |
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8) | |
# create and train model on training data | |
model = LinearRegression() | |
model.fit(features_train,winnings_train) | |
# score model on test data | |
print('Predicting Winnings with Aces Test Score:', model.score(features_test,winnings_test)) | |
# make predictions with model | |
winnings_prediction = model.predict(features_test) | |
# plot predictions against actual winnings | |
plt.scatter(winnings_test,winnings_prediction, alpha=0.4) | |
plt.title('Predicted Winnings vs. Actual Winnings - 1 Feature') | |
plt.xlabel('Actual Winnings') | |
plt.ylabel('Predicted Winnings') | |
plt.show() | |
plt.clf() | |
## single feature linear regression (BreakPointsOpportunities) | |
# select features and value to predict | |
features = players[['BreakPointsOpportunities']] | |
winnings = players[['Winnings']] | |
# train, test, split the data | |
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8) | |
# create and train model on training data | |
model = LinearRegression() | |
model.fit(features_train,winnings_train) | |
# score model on test data | |
print('Predicting Winnings with BreakPointsOpportunities Test Score:', model.score(features_test,winnings_test)) | |
# make predictions with model | |
winnings_prediction = model.predict(features_test) | |
# plot predictions against actual winnings | |
plt.scatter(winnings_test,winnings_prediction, alpha=0.4) | |
plt.title('Predicted Winnings vs. Actual Winnings - 1 Feature') | |
plt.xlabel('Actual Winnings') | |
plt.ylabel('Predicted Winnings') | |
plt.show() | |
plt.clf() | |
## two feature linear regression | |
# select features and value to predict | |
features = players[['BreakPointsOpportunities','FirstServeReturnPointsWon']] | |
winnings = players[['Winnings']] | |
# train, test, split the data | |
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8) | |
# create and train model on training data | |
model = LinearRegression() | |
model.fit(features_train,winnings_train) | |
# score model on test data | |
print('Predicting Winnings with 2 Features Test Score:', model.score(features_test,winnings_test)) | |
# make predictions with model | |
winnings_prediction = model.predict(features_test) | |
# plot predictions against actual winnings | |
plt.scatter(winnings_test,winnings_prediction, alpha=0.4) | |
plt.title('Predicted Winnings vs. Actual Winnings - 2 Features') | |
plt.xlabel('Actual Winnings') | |
plt.ylabel('Predicted Winnings') | |
plt.show() | |
plt.clf() | |
## multiple features linear regression | |
# select features and value to predict | |
features = players[['FirstServe','FirstServePointsWon','FirstServeReturnPointsWon','SecondServePointsWon','SecondServeReturnPointsWon','Aces','BreakPointsConverted','BreakPointsFaced','BreakPointsOpportunities','BreakPointsSaved','DoubleFaults','ReturnGamesPlayed','ReturnGamesWon','ReturnPointsWon','ServiceGamesPlayed','ServiceGamesWon','TotalPointsWon','TotalServicePointsWon']] | |
winnings = players[['Winnings']] | |
# train, test, split the data | |
features_train, features_test, winnings_train, winnings_test = train_test_split(features, winnings, train_size = 0.8) | |
# create and train model on training data | |
model = LinearRegression() | |
model.fit(features_train,winnings_train) | |
# score model on test data | |
print('Predicting Winnings with Multiple Features Test Score:', model.score(features_test,winnings_test)) | |
# make predictions with model | |
winnings_prediction = model.predict(features_test) | |
# plot predictions against actual winnings | |
plt.scatter(winnings_test,winnings_prediction, alpha=0.4) | |
plt.title('Predicted Winnings vs. Actual Winnings - Multiple Features') | |
plt.xlabel('Actual Winnings') | |
plt.ylabel('Predicted Winnings') | |
plt.show() | |
plt.clf() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment