Scripts for DiUS blog Using Statsmodels GLMs to model beverage consumption, published August 2017.
Last active
August 1, 2017 02:19
-
-
Save nigelhooke/50cd290f634d7eefee8ed26b9e2c6186 to your computer and use it in GitHub Desktop.
Statsmodels GLM regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
f = """bev_quant ~ precip + relhum + temp + bpm + n_events + C(is_pubhol) | |
+ C(is_pubhol_eve) + C(is_happy_hr) + C(is_mon_pubhol) + C(is_pre_xmas)""" | |
response, predictors = dmatrices(f, data, return_type='dataframe') | |
nb_results = sm.GLM(response, predictors, | |
family=sm.families.NegativeBinomial(alpha=0.15)).fit() | |
print(nb_results.summary()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import statsmodels.formula.api as smf | |
def ct_response(row): | |
"Calculate response observation for Cameron-Trivedi dispersion test" | |
y = row['bev_quant'] | |
m = row['bev_mu'] | |
return ((y - m)**2 - y) / m | |
ct_data = data.copy() | |
ct_data['bev_mu'] = po_results.mu | |
ct_data['ct_resp'] = ct_data.apply(ct_response, axis=1) | |
# Linear regression of auxiliary formula | |
ct_results = smf.ols('ct_resp ~ bev_mu - 1', ct_data).fit() | |
# Construct confidence interval for alpha, the coefficient of bev_mu | |
alpha_ci95 = ct_results.conf_int(0.05).loc['bev_mu'] | |
print('\nC-T dispersion test: alpha = {:5.3f}, 95% CI = ({:5.3f}, {:5.3f})' | |
.format(ct_results.params[0], alpha_ci95.loc[0], alpha_ci95.loc[1])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from patsy import dmatrices | |
import statsmodels.api as sm | |
formula = """bev_quant ~ bpm + n_events + precip + relhum + temp + C(is_happy_hr) | |
+ C(is_mon_pubhol) + C(is_pre_xmas) + C(is_pubhol) + C(is_pubhol_eve)""" | |
response, predictors = dmatrices(formula, data, return_type='dataframe') | |
po_results = sm.GLM(response, predictors, family=sm.families.Poisson()).fit() | |
print(po_results.summary()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment