Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created April 5, 2020 20:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/142cde35f1cd7e73c9445c5237466eb5 to your computer and use it in GitHub Desktop.
Save codecademydev/142cde35f1cd7e73c9445c5237466eb5 to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# load and investigate the data here:
data = pd.read_csv('tennis_stats.csv')
df = pd.DataFrame(data)
# perform exploratory analysis here:
#Function that returns the R value of after performing linear regression
def scr(x_values, y_values):
x_train, x_test, y_train, y_test = train_test_split(x_values, y_values,train_size = 0.80, test_size = 0.20)
lr = LinearRegression()
lr.fit(x_train, y_train)
y_predict = lr.predict(x_test)
R = lr.score(x_train, y_train)
return (R)
## perform single feature linear regressions here:
#Following set of code calculates the R score of each feature and only prints the value above 0.8
print ('\nOutcome: Wins Models')
for i in df.columns[2:20]:
sc = scr(df[[i]],df[['Wins']])
if sc > 0.8:
print (i)
print (sc)
print ('\n\nOutcome: Losses Models')
for i in df.columns[2:20]:
sc = scr(df[[i]],df[['Losses']])
if sc > 0.8:
print (i)
print (sc)
print ('\n\nOutcome: Winnings Models')
for i in df.columns[2:20]:
sc = scr(df[[i]],df[['Winnings']])
if sc > 0.8:
print (i)
print (sc)
print ('\n\nOutcome: Ranking Models')
for i in df.columns[2:20]:
sc = scr(df[[i]],df[['Ranking']])
if sc > 0.8:
print (i)
print (sc)
## perform two feature linear regressions here:
#based on the single feature regression, following features were selected
print ('Model: BreakPointsOpportunities, ReturnGamesPlayed')
print (scr(df[['BreakPointsOpportunities', 'ReturnGamesPlayed']], df['Winnings']))
print ('Model: BreakPointsOpportunities, ServiceGamesPlayed')
print (scr(df[['BreakPointsOpportunities', 'ServiceGamesPlayed']], df['Winnings']))
print ('Model: ReturnGamesPlayed, ServiceGamesPlayed')
print (scr(df[['ReturnGamesPlayed', 'ServiceGamesPlayed']], df['Winnings']))
## perform multiple feature linear regressions here:
#all three combines
print ('Model: BreakPointsOpportunities, ReturnGamesPlayed, ServiceGamesPlayed')
print (scr(df[['BreakPointsOpportunities', 'ReturnGamesPlayed', 'ServiceGamesPlayed']], df['Winnings']))
print ('finish')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment