Skip to content

Instantly share code, notes, and snippets.

@nigelhooke
Last active August 1, 2017 02:19
Show Gist options
  • Save nigelhooke/50cd290f634d7eefee8ed26b9e2c6186 to your computer and use it in GitHub Desktop.
Save nigelhooke/50cd290f634d7eefee8ed26b9e2c6186 to your computer and use it in GitHub Desktop.
Statsmodels GLM regression

Scripts for DiUS blog Using Statsmodels GLMs to model beverage consumption, published August 2017.

f = """bev_quant ~ precip + relhum + temp + bpm + n_events + C(is_pubhol)
+ C(is_pubhol_eve) + C(is_happy_hr) + C(is_mon_pubhol) + C(is_pre_xmas)"""
response, predictors = dmatrices(f, data, return_type='dataframe')
nb_results = sm.GLM(response, predictors,
family=sm.families.NegativeBinomial(alpha=0.15)).fit()
print(nb_results.summary())
import statsmodels.formula.api as smf
def ct_response(row):
"Calculate response observation for Cameron-Trivedi dispersion test"
y = row['bev_quant']
m = row['bev_mu']
return ((y - m)**2 - y) / m
ct_data = data.copy()
ct_data['bev_mu'] = po_results.mu
ct_data['ct_resp'] = ct_data.apply(ct_response, axis=1)
# Linear regression of auxiliary formula
ct_results = smf.ols('ct_resp ~ bev_mu - 1', ct_data).fit()
# Construct confidence interval for alpha, the coefficient of bev_mu
alpha_ci95 = ct_results.conf_int(0.05).loc['bev_mu']
print('\nC-T dispersion test: alpha = {:5.3f}, 95% CI = ({:5.3f}, {:5.3f})'
.format(ct_results.params[0], alpha_ci95.loc[0], alpha_ci95.loc[1]))
from patsy import dmatrices
import statsmodels.api as sm
formula = """bev_quant ~ bpm + n_events + precip + relhum + temp + C(is_happy_hr)
+ C(is_mon_pubhol) + C(is_pre_xmas) + C(is_pubhol) + C(is_pubhol_eve)"""
response, predictors = dmatrices(formula, data, return_type='dataframe')
po_results = sm.GLM(response, predictors, family=sm.families.Poisson()).fit()
print(po_results.summary())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment