Skip to content

Instantly share code, notes, and snippets.

@josef-pkt
Created December 9, 2013 02:44
Show Gist options
  • Save josef-pkt/7866696 to your computer and use it in GitHub Desktop.
Save josef-pkt/7866696 to your computer and use it in GitHub Desktop.
cluster robust standard errors for Negative Binomial
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 08 19:14:58 2013
Author: Josef Perktold, based on example by John Beieler
Not verified!
"""
import numpy as np
import pandas as pd
import statsmodels.api as sm
from patsy import dmatrices
from statsmodels.stats.sandwich_covariance import cov_cluster
import statsmodels.stats.sandwich_covariance as sc
dat_raw = pd.read_csv('negbin_full_data.csv')
dat = dat_raw.dropna()
#dat = dat_raw
y, X = dmatrices('terrorCounts~rivalry+jointDem1+logcapratio+contiguity',
data=dat, return_type='dataframe')
res_poi = sm.Poisson(y, X, missing='drop').fit()
# use estimated params to run faster during experimentation
start_params = np.array([ -5.35096895, 2.34653792, 1.32354659,
-0.15224597, 1.01714021, 64.78548562])
mod = sm.NegativeBinomial(y, X, missing='drop')
res0 = mod.fit(start_params=start_params, maxiter=60, method='bfgs')
#start_params = np.array(res0.params)
# see if newton works
res = mod.fit(start_params=start_params, maxiter=60, method='newton')
#check eigenvalues
print np.linalg.eigvals(res0.cov_params())
print np.linalg.eigvals(res.cov_params())
print "\nbse non-robust 'newton'"
print np.asarray(res.bse)
bses = pd.DataFrame(res.bse, columns=['non-robust'])
print "\nbse cluster robust 'newton'"
# this is not working yet
#clustered = cov_cluster(res, dat['dyadid'])
# NegativeBinomial has jac as scoreobs based on approx_fprime_cs
jac = res.model.scoreobs(np.array(res.params))
# fake a results instance that has the required attributes
class Holder(object):
def __init__(self, **kwds):
self.__dict__.update(kwds)
model_fake = Holder(exog=jac)
res_fake = Holder(model=model_fake, resid=np.ones(jac.shape[0]),
normalized_cov_params=res.normalized_cov_params)
gr = np.asarray(dat['dyadid'], int) # need for bincount, why not int64?
gr -= gr.min()
clustered = cov_cluster(res_fake, gr)
bse_clu = np.sqrt(np.diag(clustered))
print bse_clu
# calculate directly
S = sc.S_crosssection(jac, gr)
# The next part is _HCCM, which shouldn't require results
xxi = res.normalized_cov_params
H = np.dot(np.dot(xxi, S), xxi.T)
bse_clu2 = np.sqrt(np.diag(H))
print bse_clu2
#add correction
nobs, k_vars = X.shape
n_groups = len(np.unique(gr))
corr_fact = n_groups / (n_groups - 1.) * ((nobs-1.) / float(nobs - k_vars))
bse_clu3 = np.sqrt(np.diag(H * corr_fact))
print bse_clu3
# should be same as using fake
bses['robust_corr'] = bse_clu
bses['robust_no_corr'] = bse_clu2
bses['robust_corr_'] = bse_clu3
print '\n'
print bses
''' Results
Note: "bfgs" ends up with singular Hessian, "newton" doesn't
[ 4.84419405e+23 -2.99759693e+53 -6.64613998e+35 -1.12701301e+36
9.00058591e+34 -1.86552394e+34]
[ 1.91432527e+01 2.34646505e-02 1.52971738e-02 1.87594821e-03
1.12834253e-03 9.22005732e-03]
bse non-robust 'newton'
[ 0.08170064 0.13790239 0.11111216 0.03587395 0.10845392 4.37528876]
bse cluster robust 'newton'
[ 0.18103571 0.46702637 0.27775385 0.06913378 0.25524573 9.47252558]
[ 0.18099112 0.46691133 0.27768544 0.06911675 0.25518286 9.4701923 ]
[ 0.18103433 0.4670228 0.27775173 0.06913326 0.25524379 9.4724533 ]
non-robust robust_corr robust_no_corr robust_corr_
Intercept 0.081701 0.181036 0.180991 0.181034
rivalry 0.137902 0.467026 0.466911 0.467023
jointDem1 0.111112 0.277754 0.277685 0.277752
logcapratio 0.035874 0.069134 0.069117 0.069133
contiguity 0.108454 0.255246 0.255183 0.255244
alpha 4.375289 9.472526 9.470192 9.472453
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment