Skip to content

Instantly share code, notes, and snippets.

@darribas
Last active January 10, 2021 18:41
Show Gist options
  • Save darribas/7805381 to your computer and use it in GitHub Desktop.
Save darribas/7805381 to your computer and use it in GitHub Desktop.
Fixed-Effects panel OLS
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
'''
Fixed-effects OLS Panel models
Author: Dani Arribas-Bel <@darribas>
Copyright (c) 2013, Dani Arribas-Bel
All rights reserved.
LICENSE
-------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* The name of the author may not be used to endorse or
promote products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
'''
import pandas as pd
import pysal as ps
import numpy as np
from pysal.spreg.diagnostics import condition_index as ci
def one_way_fe(y, x, fe1, robust=None, name_y=None, name_x=None, model_name=None):
'''
One way Fixed-Effect panel
...
Arguments
---------
y : ndarray
Flat array or list with dependent variable
x : ndarray
Array with exogenous variables
fe1 : ndarray/list
Sequence with memberships to the Fixed effect (e.g.
individiual or neighborhood)
robust : str
Argument for PySAL OLS. If None (default), standard OLS
inference is performed. If 'white', white
heteroskedasticity is applied.
name_y : str
[Optional] Name of dependent variable
name_x : list
[Optional] Name of exogenous variables
model_name : str
[Optional] Name of model
Returns
-------
m : ps.spreg.BaseOLS
Barebones PySAL OLS model object
'''
yx = pd.DataFrame(x)
yx['y'] = y
yx['fe1'] = fe1
demeaner1 = yx.groupby('fe1').mean()
demeaner1 = yx[['fe1']].join(demeaner1, on='fe1')\
.drop(['fe1'], axis=1)
yx = yx.drop(['fe1'], axis=1)
yxd = yx - demeaner1
m = ps.spreg.BaseOLS(yxd[['y']].values, yxd.drop('y', axis=1).values, \
robust=robust)
if name_y:
m.name_y = name_y
else:
m.name_y = 'y'
if name_x:
m.name_x = name_x
else:
m.name_x = ['X'+str(i) for i in range(x.shape[1])]
from pysal.spreg.diagnostics import t_stat, r2
m.t_stat = t_stat(m)
m.r2 = r2(m)
m.model_name = m.name_y
m.fe = 'FE-1'
m.fe2 = None
return m
def two_way_fe(y, x, fe1, fe2, robust=None, name_y=None, name_x=None, model_name=None):
'''
Two ways Fixed-Effect panel
...
Arguments
---------
y : ndarray
Flat array or list with dependent variable
x : ndarray
Array with exogenous variables
fe1 : ndarray/list
Sequence with memberships to the first Fixed effect (e.g.
individiual or neighborhood)
fe2 : ndarray/list
Sequence with memberships to the second Fixed effect (e.g.
time)
robust : str
Argument for PySAL OLS. If None (default), standard OLS
inference is performed. If 'white', white
heteroskedasticity is applied.
name_y : str
[Optional] Name of dependent variable
name_x : list
[Optional] Name of exogenous variables
model_name : str
[Optional] Name of model
Returns
-------
m : ps.spreg.BaseOLS
Barebones PySAL OLS model object
'''
yx = pd.DataFrame(x)
yx['y'] = y
yx['fe1'] = fe1
yx['fe2'] = fe2
demeaner1 = yx.drop('fe2', axis=1).groupby('fe1').mean()
demeaner1 = yx[['fe1']].join(demeaner1, on='fe1')\
.drop(['fe1'], axis=1)
demeaner2 = yx.drop('fe1', axis=1).groupby('fe2').mean()
demeaner2 = yx[['fe2']].join(demeaner2, on='fe2')\
.drop(['fe2'], axis=1)
yx = yx.drop(['fe1', 'fe2'], axis=1)
yxd = yx - demeaner1 - demeaner2 + yx.mean()
m = ps.spreg.BaseOLS(yxd[['y']].values, yxd.drop('y', axis=1).values, \
robust=robust)
if name_y:
m.name_y = name_y
else:
m.name_y = 'y'
if name_x:
m.name_x = name_x
else:
m.name_x = ['X'+str(i) for i in range(x.shape[1])]
from pysal.spreg.diagnostics import t_stat, r2
m.t_stat = t_stat(m)
m.r2 = r2(m)
m.model_name = m.name_y
m.fe = 'FE-1'
m.fe2 = 'FE-2'
return m
if __name__ == '__main__':
# DGP
n = 1000
d1 = 10
d2 = 100
seed = np.random.seed(1234)
x = np.random.random((n, 3))
# Unbalanced panel
fe1 = np.random.random_integers(0, d1, n).astype(str)
fe2 = np.random.random_integers(0, d2, n).astype(str)
# Balanced panel
d2 = int(n * 1. / d1)
fe1 = np.arange(d1)
fe2 = np.arange(d2)
fe = np.array([(i, j) for i in fe1 for j in fe2])
fe1 = fe[:, 0]
fe2 = fe[:, 1]
#
fe1_d = pd.get_dummies(fe1, 'fe1')
fe2_d = pd.get_dummies(fe2, 'fe2')
e = np.random.normal(0, 1, (n, 1))
y = np.dot(x, np.array([[1., 1., 1.]]).T) \
+ fe1_d.ix[:, :-1].values.sum(axis=1)[:, None] \
+ e
#+ fe2_d.values.sum(axis=1)[:, None] \
# One-way
m = one_way_fe(y, x, fe1)
xone = np.hstack((x, fe1_d.values))
md = ps.spreg.BaseOLS(y, xone)
print "De-meaned | Dummy"
for i in range(3):
print np.round(m.betas[i][0], 9), " | ", \
np.round(md.betas[i][0], 9)
print 'Condition index'
print ci(m), " | ", ci(md)
# Two-way
m = two_way_fe(y, x, fe1, fe2)
malt = one_way_fe(y, xone[:, :-1], fe2)
xtwo = np.hstack((xone, fe2_d.values[:, 1:]))
md = ps.spreg.BaseOLS(y, xtwo)
print "\nDe-meaned | 1 demeaned-1 dummy | Dummy"
for i in range(3):
print np.round(m.betas[i][0], 9), " | ", \
np.round(malt.betas[i][0], 9), " | ", \
np.round(md.betas[i][0], 9)
print 'Condition index'
print ci(m), " | ",ci(malt), " | ", ci(md)
@jpiabrantes
Copy link

Does this code compute the standard errors of the coefficients used in the regression?

@MarlaWillemse
Copy link

At what point does the following warning arise, and how can it be prevented?

ComplexWarning: Casting complex values to real discards the imaginary part ci_result = sqrt(max_eigval / min_eigval)

@darribas
Copy link
Author

Hello @MarlaWillemse, I'm not sure that sounds more like a numpy/scipy message.

More generally, this code is a bit old, for user access to panel econometrics, I'd suggest statsmodels, which I believe now support these methods:

https://www.statsmodels.org/stable/index.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment