Created
March 26, 2019 02:37
-
-
Save sgodfrey66/2b44955c6b18a2ccf03655f51f64b8b0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Imports | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
%matplotlib inline | |
from sklearn.datasets import load_boston | |
from sklearn.linear_model import LinearRegression | |
from sklearn.model_selection import cross_val_score | |
from itertools import combinations | |
import time | |
# Load the Boston housing price data | |
boston = load_boston() | |
X = pd.DataFrame(boston.data, columns = boston.feature_names) | |
y = boston['target'] | |
features = list(boston.feature_names) | |
# Function to generate and evaluate linear regression models from a set of features | |
def model_evaluator(X, y, features, min_n, max_n): | |
# Instantiate the linear regression model | |
lm = LinearRegression() | |
# Set up the model list | |
model_library = [] | |
# Loop through all combinations starting with min_n number of features and | |
# ending with max_n number of featues in X | |
for i in range(min_n, max_n + 1): | |
# Get the i combinations from features | |
combos = list(combinations(features, i)) | |
# For each combo create a model, generate predictions, score it and save in a dictionary | |
for combo in combos: | |
# Get this list of features | |
feat_sub = list(combo) | |
# Create a new X with a subset of features | |
X_sub = X[feat_sub] | |
# fit the model and generate predictions | |
model = lm.fit(X_sub, y) | |
pred = model.predict(X_sub) | |
# score the model | |
score_r2 = model.score(X_sub, y) | |
score_cv = cross_val_score(lm, X_sub, y, cv = 5).mean() | |
# save this model in a list of dictionaries | |
model_library.append({ | |
'model': 'linear_regr', | |
'features': list(combo), | |
'CV_R2': np.round(score_cv, 3), | |
'R2': np.round(score_r2, 3), | |
'intercept': np.round(model.intercept_, 2), | |
'coeff' : np.round(model.coef_, 2), | |
'no_features' : len(combo), | |
}) | |
# return a data frame | |
return pd.DataFrame(model_library) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment