jseabold/anova_lm.py

## anova_lm.py
from pandas import DataFrame
import numpy as np
from scipy import stats

def anova_lm(*args, **kwargs):
    """
    ANOVA table for one or more fitted linear models.

    Parmeters
    ---------
    args : fitted linear model results instance
        One or more fitted linear models

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
        model. Default is None.
    test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide (Why not just give all). Default is "F".

    Returns
    -------
    anova : DataFrame
        A DataFrame containing.

    Notes
    -----
    Model statistics are given in the order of args. Models must have
    a formula_str attribute.

    See Also
    --------
    model_results.compare_f_test, model_results.compare_lm_test
    """
    test = kwargs.get("test", "F")
    scale = kwargs.get("scale", None)
    n_models = len(args)

    model_formula = []
    pr_test = "PR(>%s)" % test
    names = ['df_resid', 'ssr', 'df_diff', 'ss_diff', test, pr_test]
    table = DataFrame(np.empty((n_models, 6)), columns = names)

    if not scale: # assume biggest model is last
        scale = args[-1].scale

    table["ssr"] = map(getattr, args, ["ssr"]*n_models)
    table["df_resid"] = map(getattr, args, ["df_resid"]*n_models)
    table.ix[1:]["df_diff"] = np.diff(map(getattr, args, ["df_model"]*n_models))
    table["ss_diff"] = -table["ssr"].diff()
    if test == "F":
        table["F"] = table["ss_diff"] / table["df_diff"] / scale
        table[pr_test] = stats.f.sf(table["F"], table["df_diff"],
                             table["df_resid"])

    return table
	from pandas import DataFrame
	import numpy as np
	from scipy import stats

	def anova_lm(args, *kwargs):
	"""
	ANOVA table for one or more fitted linear models.

	Parmeters
	---------
	args : fitted linear model results instance
	One or more fitted linear models

	kwargs

	scale : float
	Estimate of variance, If None, will be estimated from the largest
	model. Default is None.
	test : str {"F", "Chisq", "Cp"} or None
	Test statistics to provide (Why not just give all). Default is "F".

	Returns
	-------
	anova : DataFrame
	A DataFrame containing.

	Notes
	-----
	Model statistics are given in the order of args. Models must have
	a formula_str attribute.

	See Also
	--------
	model_results.compare_f_test, model_results.compare_lm_test
	"""
	test = kwargs.get("test", "F")
	scale = kwargs.get("scale", None)
	n_models = len(args)

	model_formula = []
	pr_test = "PR(>%s)" % test
	names = ['df_resid', 'ssr', 'df_diff', 'ss_diff', test, pr_test]
	table = DataFrame(np.empty((n_models, 6)), columns = names)

	if not scale: # assume biggest model is last
	scale = args[-1].scale

	table["ssr"] = map(getattr, args, ["ssr"]*n_models)
	table["df_resid"] = map(getattr, args, ["df_resid"]*n_models)
	table.ix[1:]["df_diff"] = np.diff(map(getattr, args, ["df_model"]*n_models))
	table["ss_diff"] = -table["ssr"].diff()
	if test == "F":
	table["F"] = table["ss_diff"] / table["df_diff"] / scale
	table[pr_test] = stats.f.sf(table["F"], table["df_diff"],
	table["df_resid"])

	return table