Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshstrupp/600fe05c504256cb042b to your computer and use it in GitHub Desktop.
Save joshstrupp/600fe05c504256cb042b to your computer and use it in GitHub Desktop.
NFL-win-loss-predictor-based-on-game-stats
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from math import exp
import numpy as np
import matplotlib.pyplot as plt
nfl2000 = pd.read_csv('nfl2000stats.csv', sep=',') #13-3
nfl2001 = pd.read_csv('nfl2001stats.csv', sep=',') #7-9
nfl2002 = pd.read_csv('nfl2002stats.csv', sep=',') #11-5
nfl2003 = pd.read_csv('nfl2003stats.csv', sep=',') #12-4
nfl2004 = pd.read_csv('nfl2004stats.csv', sep=',') #5-11
nfl2005 = pd.read_csv('nfl2005stats.csv', sep=',') #4-12
nfl2006 = pd.read_csv('nfl2006stats.csv', sep=',') #8-8
nfl2007 = pd.read_csv('nfl2007stats.csv', sep=',') #10-6
nfl2008 = pd.read_csv('nfl2008stats.csv', sep=',') #13-3
nfl2009 = pd.read_csv('nfl2009stats.csv', sep=',') #8-8
nfl2010 = pd.read_csv('nfl2010stats.csv', sep=',') #6-10
nfl2011 = pd.read_csv('nfl2011stats.csv', sep=',') #9-7
nfl2012 = pd.read_csv('nfl2012stats.csv', sep=',') #6-10
nfl2013 = pd.read_csv('nfl2013stats.csv', sep=',') #7-9
nfl = pd.concat([nfl2000, nfl2001, nfl2002, nfl2003, nfl2004, nfl2005, nfl2006, nfl2007, nfl2008, nfl2009, nfl2010, nfl2011, nfl2012, nfl2013], axis=0)
nfl['WinLoss'] = np.where(nfl.ScoreOff > nfl.ScoreDef, 1, 0)
nfl.columns
feature_cols = ['Date', 'FirstDownDef', 'FirstDownOff', 'FumblesDef', 'FumblesOff', 'Line', 'Opponent', 'PassAttDef', 'PassAttOff', 'PassCompDef', 'PassCompOff', 'PassIntDef', 'PassIntOff', 'PassYdsDef', 'PassYdsOff', 'PenYdsDef', 'PenYdsOff', 'PuntAvgOff', 'RushAttDef', 'RushAttOff', 'RushYdsDef', 'RushYdsOff', 'SackNumDef', 'SackNumOff', 'SackYdsDef', 'SackYdsOff', 'ScoreDef', 'ScoreOff', 'Site', 'TeamName', 'ThirdDownPctDef', 'ThirdDownPctOff', 'TimePossDef', 'TimePossOff', 'TotalLine', 'Totalline', 'Totalline ', 'WinLoss']
X = nfl[feature_cols]
y = nfl.WinLoss
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2)
# Fit model -- In the linear regression model the dependent variable y is considered continuous, whereas in logistic regression it is categorical, i.e., discrete. In application, the former is used in regression settings while the latter is used for binary classification or multi-class classification (where it is called multinomial logistic regression)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment