Last active
June 18, 2020 13:58
-
-
Save joshstrupp/600fe05c504256cb042b to your computer and use it in GitHub Desktop.
NFL-win-loss-predictor-based-on-game-stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.cross_validation import train_test_split | |
from sklearn import metrics | |
from math import exp | |
import numpy as np | |
import matplotlib.pyplot as plt | |
nfl2000 = pd.read_csv('nfl2000stats.csv', sep=',') #13-3 | |
nfl2001 = pd.read_csv('nfl2001stats.csv', sep=',') #7-9 | |
nfl2002 = pd.read_csv('nfl2002stats.csv', sep=',') #11-5 | |
nfl2003 = pd.read_csv('nfl2003stats.csv', sep=',') #12-4 | |
nfl2004 = pd.read_csv('nfl2004stats.csv', sep=',') #5-11 | |
nfl2005 = pd.read_csv('nfl2005stats.csv', sep=',') #4-12 | |
nfl2006 = pd.read_csv('nfl2006stats.csv', sep=',') #8-8 | |
nfl2007 = pd.read_csv('nfl2007stats.csv', sep=',') #10-6 | |
nfl2008 = pd.read_csv('nfl2008stats.csv', sep=',') #13-3 | |
nfl2009 = pd.read_csv('nfl2009stats.csv', sep=',') #8-8 | |
nfl2010 = pd.read_csv('nfl2010stats.csv', sep=',') #6-10 | |
nfl2011 = pd.read_csv('nfl2011stats.csv', sep=',') #9-7 | |
nfl2012 = pd.read_csv('nfl2012stats.csv', sep=',') #6-10 | |
nfl2013 = pd.read_csv('nfl2013stats.csv', sep=',') #7-9 | |
nfl = pd.concat([nfl2000, nfl2001, nfl2002, nfl2003, nfl2004, nfl2005, nfl2006, nfl2007, nfl2008, nfl2009, nfl2010, nfl2011, nfl2012, nfl2013], axis=0) | |
nfl['WinLoss'] = np.where(nfl.ScoreOff > nfl.ScoreDef, 1, 0) | |
nfl.columns | |
feature_cols = ['Date', 'FirstDownDef', 'FirstDownOff', 'FumblesDef', 'FumblesOff', 'Line', 'Opponent', 'PassAttDef', 'PassAttOff', 'PassCompDef', 'PassCompOff', 'PassIntDef', 'PassIntOff', 'PassYdsDef', 'PassYdsOff', 'PenYdsDef', 'PenYdsOff', 'PuntAvgOff', 'RushAttDef', 'RushAttOff', 'RushYdsDef', 'RushYdsOff', 'SackNumDef', 'SackNumOff', 'SackYdsDef', 'SackYdsOff', 'ScoreDef', 'ScoreOff', 'Site', 'TeamName', 'ThirdDownPctDef', 'ThirdDownPctOff', 'TimePossDef', 'TimePossOff', 'TotalLine', 'Totalline', 'Totalline ', 'WinLoss'] | |
X = nfl[feature_cols] | |
y = nfl.WinLoss | |
# Train test split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2) | |
# Fit model -- In the linear regression model the dependent variable y is considered continuous, whereas in logistic regression it is categorical, i.e., discrete. In application, the former is used in regression settings while the latter is used for binary classification or multi-class classification (where it is called multinomial logistic regression) | |
logreg = LogisticRegression() | |
logreg.fit(X_train, y_train) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment