Skip to content

Instantly share code, notes, and snippets.

@jvrsgsty
Created February 28, 2014 01:05
Show Gist options
  • Save jvrsgsty/9263129 to your computer and use it in GitHub Desktop.
Save jvrsgsty/9263129 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# Javier Sagastuy
# Last revision: Feb 21, 2014
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.stats.anova import anova_lm
import statsmodels.formula.api as sm
def influenceM(fitted):
'''
Prints a table with a collection of influence measures for the fitted model
given
IN
fitted: a statsmodels.regression.linear_model.RegressionResultsWrapper
object
'''
influence = fitted.get_influence()
# includes dffits, Hat diag, Cooks and studentized residuals
# (internally and externally)
print influence.summary_table()
print "dfbetas 1 dfbetas x"
print influence.dfbetas
print influence.cov_ratio
def influenceG(fitted):
'''
Plots four residual analysis graph for the fitted model given:
Residuals vs. fitted
qqplot
Scale-location
Residuals vs. Leverage
IN
fitted: a statsmodels.regression.linear_model.RegressionResultsWrapper
object
'''
resid = fitted.resid
respuesta = fitted.fittedvalues
std_resid = fitted.norm_resid()
influence = fitted.get_influence()
# Residuals vs. fitted
plt.plot(respuesta, resid, 'bo')
plt.xlabel('Respuesta ajustada')
plt.ylabel('Residuales')
plt.title("Analisis de residuales")
plt.show()
# qqplot
stats.probplot(resid,dist = 'norm', plot=plt)
plt.show()
# Scale-location
plt.plot(respuesta, abs(std_resid)**0.5, 'bo')
plt.xlabel('Respuesta ajustada')
plt.ylabel('Residuales Estandarizados')
plt.title("Analisis de residuales")
plt.show()
# Residuals vs. leverage
plt.plot(influence.hat_matrix_diag, std_resid, 'bo')
plt.xlabel('Respuesta ajustada')
plt.ylabel('Residuales Estandarizados')
plt.title("Analisis de residuales")
plt.show()
def anova_JSB(fitted):
'''
Prints the anova_lm table for the fitted model given and writes a title
IN
fitted: a statsmodels.regression.linear_model.RegressionResultsWrapper
object
'''
print "\n ANOVA"
print "=============================================================================="
print anova_lm(fitted)
def aicSteps(model, dat, k):
'''
Function that calculates k aicSteps to construct the model which contains
the k regressors whose addition into the model minimizes the AIC
IN
model: a statsmodels.regression.linear_model.OLS object
dat: the original data frame that was given to the model
k: an integer specifying the number of aicSteps to take
'''
names = model.data.xnames
names.remove('Intercept')
inModel = []
formula = 'y ~ 1'
print 'Start: ' + formula
for i in range(k):
(formula, inModel, names) = aicStepInt(formula, inModel, names, dat)
print 'Step '+ str(i+1) + ': ' + formula
def aicStepInt(formula, inModel, names, dat):
'''
Function used internally to calculate the AIC step. Must be called initially
with the parameters it is called in aicSteps
IN
formula: a string with the formula of the adjustted model
inModel: a list containing the regressors already in the model
names: a list containing the names which are not yet in the model
dat: the original data frame that was given to the model
'''
AIC = []
for regressor in names:
model2 = sm.ols(formula=formula + ' + ' + regressor, data = dat)
fitted2 = model2.fit()
AIC.append(fitted2.aic)
AIC = np.array(AIC)
k = AIC.argmin()
formula += ' + ' + names[k]
inModel.append(names[k])
names.remove(names[k])
return (formula, inModel, names)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment