joaopcnogueira/backward_elimination.py

## backward_elimination.py
import numpy as np
import statsmodels.formula.api as sm
def backward_elimination(X, y, sl):
    """
    X: the data matrix with the independent variables (predictors)
    y: the matrix of the dependent variable (target)
    sl: statistical level, by default the user should add 0.05 (5%)
    """
    X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
    while(True):
        regressor_OLS = sm.OLS(y,X).fit()
        ind = np.argmax(regressor_OLS.pvalues)
        maxPvalue = regressor_OLS.pvalues[ind]
        if maxPvalue > sl:
            X = np.delete(X, ind, axis=1)
        else:
            print(regressor_OLS.summary())
            X = np.delete(X, 0, axis=1)
            return X

# USAGE
# Suppose one have a matrix of features X and want to use
# multiple linear regression to predict values of a matrix y.
# In order to select the best features, he might use the
# backward elimination method and save the selected features
# in a new matrix called X_opt:
#   sl = 0.05
#   X_opt = backward_elimination(X, sl)
	import numpy as np
	import statsmodels.formula.api as sm
	def backward_elimination(X, y, sl):
	"""
	X: the data matrix with the independent variables (predictors)
	y: the matrix of the dependent variable (target)
	sl: statistical level, by default the user should add 0.05 (5%)
	"""
	X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
	while(True):
	regressor_OLS = sm.OLS(y,X).fit()
	ind = np.argmax(regressor_OLS.pvalues)
	maxPvalue = regressor_OLS.pvalues[ind]
	if maxPvalue > sl:
	X = np.delete(X, ind, axis=1)
	else:
	print(regressor_OLS.summary())
	X = np.delete(X, 0, axis=1)
	return X

	# USAGE
	# Suppose one have a matrix of features X and want to use
	# multiple linear regression to predict values of a matrix y.
	# In order to select the best features, he might use the
	# backward elimination method and save the selected features
	# in a new matrix called X_opt:
	# sl = 0.05
	# X_opt = backward_elimination(X, sl)