Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created December 19, 2020 05:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/e90e2459ac0580351d32c912a7d2192c to your computer and use it in GitHub Desktop.
Save codecademydev/e90e2459ac0580351d32c912a7d2192c to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# load and investigate the data here:
df = pd.read_csv('tennis_stats.csv')
print(df.head())
print(df.shape)
print(df.info())
print(df.describe())
# perform exploratory analysis here:
print(df.corr())
plt.scatter(df.BreakPointsOpportunities, df.Wins)
plt.title('BreakPointsOpportunities vs wins')
plt.xlabel("BreakPointsOpportuinties")
plt.ylabel("Wins")
plt.show()
plt.clf()
plt.scatter(df.FirstServePointsWon , df.Wins)
plt.title('First Serve Points Won vs Wins')
plt.xlabel("First Serve Points Won")
plt.ylabel("Wins")
plt.show()
plt.clf()
plt.scatter(df.FirstServeReturnPointsWon , df.Wins)
plt.title("First Serve Return Won vs Wins")
plt.xlabel("First Serve Return Won")
plt.ylabel("Wins")
plt.show()
plt.clf()
plt.scatter(df.SecondServePointsWon , df.Winnings)
plt.title("Seconds serve points won")
plt.xlabel("Seconds Serve Points Won")
plt.ylabel("Winnings")
plt.show()
plt.clf()
plt.scatter(df.ReturnGamesPlayed , df.Losses)
plt.title("Return Games Played vs Losses")
plt.xlabel("Return Games Played")
plt.ylabel("Losses")
plt.show()
plt.clf()
## perform single feature linear regressions here:
X = df[['BreakPointsOpportunities']]
y = df[['Winnings']]
X_train ,X_test, y_train , y_test = train_test_split(X, y , test_size = 0.3, random_state = 42)
print(X_train.shape)
print(y_test.shape)
simpl_regre = LinearRegression()
simpl_regre.fit(X_train, y_train)
y_pred = simpl_regre.predict(X_test)
print(y_pred)
print(simpl_regre.score(X_test,y_test))
plt.scatter(y_pred, y_test, alpha = 0.3)
#plt.plot(y_test,y_pred,color = 'r')
plt.title("Äctual predicted va predictions")
plt.xlabel("actual predicted")
plt.ylabel("predictions wins")
plt.show()
plt.clf()
## perform two feature linear regressions here:
X = df[['BreakPointsOpportunities', 'ServiceGamesPlayed']]
y = df[['Wins']]
X_train ,X_test, y_train , y_test = train_test_split(X, y , test_size = 0.3, random_state = 42)
print(X_train.shape)
print(y_test.shape)
simpl_regre = LinearRegression()
simpl_regre.fit(X_train, y_train)
y_pred = simpl_regre.predict(X_test)
print(y_pred)
print(simpl_regre.score(X_test,y_test))
plt.scatter(y_pred, y_test, alpha = 0.4 )
plt.xlabel("Actual predicted")
plt.ylabel("Predictions wins")
#plt.plot(y_test,y_pred,color = 'r')
plt.show()
plt.clf()
## perform multiple feature linear regressions here:
X = df[['BreakPointsOpportunities','ServiceGamesPlayed', 'DoubleFaults','BreakPointsFaced','Aces','ReturnGamesPlayed']]
y = df[['Losses']]
X_train ,X_test, y_train , y_test = train_test_split(X, y , test_size = 0.3, random_state = 42)
print(X_train.shape)
print(y_test.shape)
simpl_regre = LinearRegression()
simpl_regre.fit(X_train, y_train)
y_pred = simpl_regre.predict(X_test)
print(y_pred)
print(simpl_regre.score(X_test,y_test))
plt.scatter(y_pred, y_test, alpha = 0.3)
#plt.plot(y_test,y_pred,color = 'r')
plt.title("actual vs machine predicted")
plt.xlabel("actual")
plt.ylabel("machine predicted")
plt.show()
plt.clf()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment