sgodfrey66/model_evaluator.py

## model_evaluator.py
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

from itertools import combinations
import time

# Load the Boston housing price data
boston = load_boston()
X = pd.DataFrame(boston.data, columns = boston.feature_names)
y = boston['target']
features = list(boston.feature_names)

# Function to generate and evaluate linear regression models from a set of features
def model_evaluator(X, y, features, min_n, max_n):

    # Instantiate the linear regression model
    lm = LinearRegression()

    # Set up the model list
    model_library = []

    # Loop through all combinations starting with min_n number of features and
    # ending with max_n number of featues in X
    for i in range(min_n, max_n + 1):

        # Get the i combinations from features
        combos = list(combinations(features, i))

        # For each combo create a model, generate predictions, score it and save in a dictionary
        for combo in combos:

            # Get this list of features
            feat_sub = list(combo)

            # Create a new X with a subset of features
            X_sub = X[feat_sub]

            # fit the model and generate predictions
            model = lm.fit(X_sub, y)
            pred = model.predict(X_sub)

            # score the model
            score_r2 = model.score(X_sub, y)
            score_cv = cross_val_score(lm, X_sub, y, cv = 5).mean()

            # save this model in a list of dictionaries
            model_library.append({
                'model': 'linear_regr',
                'features': list(combo),
                'CV_R2': np.round(score_cv, 3),
                'R2': np.round(score_r2, 3),
                'intercept': np.round(model.intercept_, 2),
                'coeff' : np.round(model.coef_, 2),
                'no_features' : len(combo),
            })

    # return a data frame
    return pd.DataFrame(model_library)
	# Imports
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	%matplotlib inline

	from sklearn.datasets import load_boston
	from sklearn.linear_model import LinearRegression
	from sklearn.model_selection import cross_val_score

	from itertools import combinations
	import time

	# Load the Boston housing price data
	boston = load_boston()
	X = pd.DataFrame(boston.data, columns = boston.feature_names)
	y = boston['target']
	features = list(boston.feature_names)

	# Function to generate and evaluate linear regression models from a set of features
	def model_evaluator(X, y, features, min_n, max_n):

	# Instantiate the linear regression model
	lm = LinearRegression()

	# Set up the model list
	model_library = []

	# Loop through all combinations starting with min_n number of features and
	# ending with max_n number of featues in X
	for i in range(min_n, max_n + 1):

	# Get the i combinations from features
	combos = list(combinations(features, i))

	# For each combo create a model, generate predictions, score it and save in a dictionary
	for combo in combos:

	# Get this list of features
	feat_sub = list(combo)

	# Create a new X with a subset of features
	X_sub = X[feat_sub]

	# fit the model and generate predictions
	model = lm.fit(X_sub, y)
	pred = model.predict(X_sub)

	# score the model
	score_r2 = model.score(X_sub, y)
	score_cv = cross_val_score(lm, X_sub, y, cv = 5).mean()

	# save this model in a list of dictionaries
	model_library.append({
	'model': 'linear_regr',
	'features': list(combo),
	'CV_R2': np.round(score_cv, 3),
	'R2': np.round(score_r2, 3),
	'intercept': np.round(model.intercept_, 2),
	'coeff' : np.round(model.coef_, 2),
	'no_features' : len(combo),
	})

	# return a data frame
	return pd.DataFrame(model_library)