Created
February 28, 2014 01:05
-
-
Save jvrsgsty/9263129 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Javier Sagastuy | |
# Last revision: Feb 21, 2014 | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import scipy.stats as stats | |
from statsmodels.stats.anova import anova_lm | |
import statsmodels.formula.api as sm | |
def influenceM(fitted): | |
''' | |
Prints a table with a collection of influence measures for the fitted model | |
given | |
IN | |
fitted: a statsmodels.regression.linear_model.RegressionResultsWrapper | |
object | |
''' | |
influence = fitted.get_influence() | |
# includes dffits, Hat diag, Cooks and studentized residuals | |
# (internally and externally) | |
print influence.summary_table() | |
print "dfbetas 1 dfbetas x" | |
print influence.dfbetas | |
print influence.cov_ratio | |
def influenceG(fitted): | |
''' | |
Plots four residual analysis graph for the fitted model given: | |
Residuals vs. fitted | |
qqplot | |
Scale-location | |
Residuals vs. Leverage | |
IN | |
fitted: a statsmodels.regression.linear_model.RegressionResultsWrapper | |
object | |
''' | |
resid = fitted.resid | |
respuesta = fitted.fittedvalues | |
std_resid = fitted.norm_resid() | |
influence = fitted.get_influence() | |
# Residuals vs. fitted | |
plt.plot(respuesta, resid, 'bo') | |
plt.xlabel('Respuesta ajustada') | |
plt.ylabel('Residuales') | |
plt.title("Analisis de residuales") | |
plt.show() | |
# qqplot | |
stats.probplot(resid,dist = 'norm', plot=plt) | |
plt.show() | |
# Scale-location | |
plt.plot(respuesta, abs(std_resid)**0.5, 'bo') | |
plt.xlabel('Respuesta ajustada') | |
plt.ylabel('Residuales Estandarizados') | |
plt.title("Analisis de residuales") | |
plt.show() | |
# Residuals vs. leverage | |
plt.plot(influence.hat_matrix_diag, std_resid, 'bo') | |
plt.xlabel('Respuesta ajustada') | |
plt.ylabel('Residuales Estandarizados') | |
plt.title("Analisis de residuales") | |
plt.show() | |
def anova_JSB(fitted): | |
''' | |
Prints the anova_lm table for the fitted model given and writes a title | |
IN | |
fitted: a statsmodels.regression.linear_model.RegressionResultsWrapper | |
object | |
''' | |
print "\n ANOVA" | |
print "==============================================================================" | |
print anova_lm(fitted) | |
def aicSteps(model, dat, k): | |
''' | |
Function that calculates k aicSteps to construct the model which contains | |
the k regressors whose addition into the model minimizes the AIC | |
IN | |
model: a statsmodels.regression.linear_model.OLS object | |
dat: the original data frame that was given to the model | |
k: an integer specifying the number of aicSteps to take | |
''' | |
names = model.data.xnames | |
names.remove('Intercept') | |
inModel = [] | |
formula = 'y ~ 1' | |
print 'Start: ' + formula | |
for i in range(k): | |
(formula, inModel, names) = aicStepInt(formula, inModel, names, dat) | |
print 'Step '+ str(i+1) + ': ' + formula | |
def aicStepInt(formula, inModel, names, dat): | |
''' | |
Function used internally to calculate the AIC step. Must be called initially | |
with the parameters it is called in aicSteps | |
IN | |
formula: a string with the formula of the adjustted model | |
inModel: a list containing the regressors already in the model | |
names: a list containing the names which are not yet in the model | |
dat: the original data frame that was given to the model | |
''' | |
AIC = [] | |
for regressor in names: | |
model2 = sm.ols(formula=formula + ' + ' + regressor, data = dat) | |
fitted2 = model2.fit() | |
AIC.append(fitted2.aic) | |
AIC = np.array(AIC) | |
k = AIC.argmin() | |
formula += ' + ' + names[k] | |
inModel.append(names[k]) | |
names.remove(names[k]) | |
return (formula, inModel, names) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment