Skip to content

Instantly share code, notes, and snippets.

@jseabold
Created March 30, 2012 02:14
Show Gist options
  • Save jseabold/2245820 to your computer and use it in GitHub Desktop.
Save jseabold/2245820 to your computer and use it in GitHub Desktop.
ANOVA fitted linear model comparison for statsmodels
from pandas import DataFrame
import numpy as np
from scipy import stats
def anova_lm(*args, **kwargs):
"""
ANOVA table for one or more fitted linear models.
Parmeters
---------
args : fitted linear model results instance
One or more fitted linear models
**kwargs**
scale : float
Estimate of variance, If None, will be estimated from the largest
model. Default is None.
test : str {"F", "Chisq", "Cp"} or None
Test statistics to provide (Why not just give all). Default is "F".
Returns
-------
anova : DataFrame
A DataFrame containing.
Notes
-----
Model statistics are given in the order of args. Models must have
a formula_str attribute.
See Also
--------
model_results.compare_f_test, model_results.compare_lm_test
"""
test = kwargs.get("test", "F")
scale = kwargs.get("scale", None)
n_models = len(args)
model_formula = []
pr_test = "PR(>%s)" % test
names = ['df_resid', 'ssr', 'df_diff', 'ss_diff', test, pr_test]
table = DataFrame(np.empty((n_models, 6)), columns = names)
if not scale: # assume biggest model is last
scale = args[-1].scale
table["ssr"] = map(getattr, args, ["ssr"]*n_models)
table["df_resid"] = map(getattr, args, ["df_resid"]*n_models)
table.ix[1:]["df_diff"] = np.diff(map(getattr, args, ["df_model"]*n_models))
table["ss_diff"] = -table["ssr"].diff()
if test == "F":
table["F"] = table["ss_diff"] / table["df_diff"] / scale
table[pr_test] = stats.f.sf(table["F"], table["df_diff"],
table["df_resid"])
return table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment