#!/usr/bin/env python
# coding: utf-8
# module imports
from patsy import dmatrices
import pandas as pd
from sklearn.linear_model import LogisticRegression
import statsmodels.discrete.discrete_model as sm
# read in the data & create matrices
df = pd.read_csv("./epa_base_data.csv")
df = df[df.year == 2019]
y, X = dmatrices('drive_point ~ C(down) + distance + adjusted_yardline + C(down):distance + C(down):adjusted_yardline + period + margin', df, return_type='dataframe')
# sklearn output
model = LogisticRegression(multi_class='multinomial', solver='newton-cg', C=1000000,fit_intercept=False)
mdl =, y)
# sm
logit = sm.MNLogit(y, X)"ncg").params
