Skip to content

Instantly share code, notes, and snippets.

@DimaK415
Created November 3, 2017 05:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DimaK415/428bbeb0e79551f780bb990e7c26f813 to your computer and use it in GitHub Desktop.
Save DimaK415/428bbeb0e79551f780bb990e7c26f813 to your computer and use it in GitHub Desktop.
class EstimatorSelectionHelper:
def __init__(self, models, params, cv):
if not set(models.keys()).issubset(set(params.keys())):
missing_params = list(set(models.keys()) - set(params.keys()))
raise ValueError("Some estimators are missing parameters: %s" % missing_params)
self.models = models
self.params = params
self.keys = models.keys()
self.grid_searches = {}
self.cv = cv
def fit_models_cv(self, X, y, cv=5, n_jobs=-1, verbose=0, scoring=None, refit=False):
for key in self.keys:
print("Running GridSearchCV for %s." % key)
model = self.models[key]
params = self.params[key]
gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, verbose=verbose, scoring=scoring, refit=refit)
gs.fit(X,y)
self.grid_searches[key] = gs
print("All done!")
def cv_score_summary(self, sort_by='mean_score'):
def row(key, scores, params):
d = {
'estimator': key,
'min_score': min(scores),
'max_score': max(scores),
'mean_score': np.mean(scores),
'std_score': np.std(scores),
}
return pd.Series(dict(params.items() | d.items()))
rows = [row(k, gsc.cv_validation_scores, gsc.parameters)
for k in self.keys
for gsc in self.grid_searches[k].grid_scores_]
df = pd.concat(rows, axis=1).T.sort([sort_by], ascending=False)
columns = ['estimator', 'min_score', 'max_score', 'std_score', 'mean_score']
columns = columns + [c for c in df.columns if c not in columns]
return df[columns]
def fit_test_models(self, X_train, y_train, X_test, y_test, alphas = [], n_jobs=-1, verbose=0):
d = {
'estimator': [],
'alpha': [],
'score': [],
}
counter = 0
for key in self.keys:
for alpha in alphas:
print(f"Fitting and testing {key} with alpha: {alpha}")
tester = self.models[key]
tester.set_params(alpha = alpha)
tester.fit(X_train, y_train)
model_test_score = tester.score(X_test, y_test)
d['estimator'].append(key)
d['alpha'].append(alpha)
d['score'].append(model_test_score)
counter += 1
print(f"Fit and tested {counter} models. Explore your data!")
df = pd.DataFrame(d).sort_values('score', ascending=False)
return df
def show_me_the_scores(self, X_train, y_train, X_test, y_test, cv=5,
n_jobs=-1, verbose=0, sort_by='test_score', note=''):
counter = 0
d = {
'estimator': [],
'CVs': [],
'min_cv_score': [],
'max_cv_score': [],
'std_cv_score': [],
'mean_cv_score':[],
'test_score': [],
'note':[],
}
for key in self.keys:
for param in params_trees[key]:
d[param] = []
print(d)
for key in self.keys:
print(key)
for param in params_trees[key]:
print(param)
for val in params_trees[key][param]:
print(val)
for val2 in params_trees[key][param]:
print(val2)
print(f"Fitting, cross validating ({cv} kfolds) and testing {key} with {param} = {val}")
tester = self.models[key]
tester.set_params(alpha = alpha)
tester.fit(X_train, y_train)
model_test_score = tester.score(X_test, y_test)
cv_scores = cross_val_score(tester, X_train, y_train, cv=cv, verbose=verbose)
d['estimator'].append(key)
d['alpha'].append(alpha)
d['CVs'].append(cv)
d['min_cv_score'].append(min(cv_scores))
d['mean_cv_score'].append(np.mean(cv_scores))
d['max_cv_score'].append(max(cv_scores))
d['std_cv_score'].append(np.std(cv_scores))
d['test_score'].append(model_test_score)
d['note'].append(note)
d[params[key][param]].append(p)
counter += 1
print(f"Fit and tested {counter} models. Explore your data!")
df = pd.DataFrame(d).sort_values(sort_by, ascending=False)
# df = df[["estimator", "CVs", "alpha","max_cv_score","min_cv_score",
# "std_cv_score","mean_cv_score","test_score", "note"]]
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment