Created
November 3, 2017 05:26
-
-
Save DimaK415/428bbeb0e79551f780bb990e7c26f813 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class EstimatorSelectionHelper: | |
def __init__(self, models, params, cv): | |
if not set(models.keys()).issubset(set(params.keys())): | |
missing_params = list(set(models.keys()) - set(params.keys())) | |
raise ValueError("Some estimators are missing parameters: %s" % missing_params) | |
self.models = models | |
self.params = params | |
self.keys = models.keys() | |
self.grid_searches = {} | |
self.cv = cv | |
def fit_models_cv(self, X, y, cv=5, n_jobs=-1, verbose=0, scoring=None, refit=False): | |
for key in self.keys: | |
print("Running GridSearchCV for %s." % key) | |
model = self.models[key] | |
params = self.params[key] | |
gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, verbose=verbose, scoring=scoring, refit=refit) | |
gs.fit(X,y) | |
self.grid_searches[key] = gs | |
print("All done!") | |
def cv_score_summary(self, sort_by='mean_score'): | |
def row(key, scores, params): | |
d = { | |
'estimator': key, | |
'min_score': min(scores), | |
'max_score': max(scores), | |
'mean_score': np.mean(scores), | |
'std_score': np.std(scores), | |
} | |
return pd.Series(dict(params.items() | d.items())) | |
rows = [row(k, gsc.cv_validation_scores, gsc.parameters) | |
for k in self.keys | |
for gsc in self.grid_searches[k].grid_scores_] | |
df = pd.concat(rows, axis=1).T.sort([sort_by], ascending=False) | |
columns = ['estimator', 'min_score', 'max_score', 'std_score', 'mean_score'] | |
columns = columns + [c for c in df.columns if c not in columns] | |
return df[columns] | |
def fit_test_models(self, X_train, y_train, X_test, y_test, alphas = [], n_jobs=-1, verbose=0): | |
d = { | |
'estimator': [], | |
'alpha': [], | |
'score': [], | |
} | |
counter = 0 | |
for key in self.keys: | |
for alpha in alphas: | |
print(f"Fitting and testing {key} with alpha: {alpha}") | |
tester = self.models[key] | |
tester.set_params(alpha = alpha) | |
tester.fit(X_train, y_train) | |
model_test_score = tester.score(X_test, y_test) | |
d['estimator'].append(key) | |
d['alpha'].append(alpha) | |
d['score'].append(model_test_score) | |
counter += 1 | |
print(f"Fit and tested {counter} models. Explore your data!") | |
df = pd.DataFrame(d).sort_values('score', ascending=False) | |
return df | |
def show_me_the_scores(self, X_train, y_train, X_test, y_test, cv=5, | |
n_jobs=-1, verbose=0, sort_by='test_score', note=''): | |
counter = 0 | |
d = { | |
'estimator': [], | |
'CVs': [], | |
'min_cv_score': [], | |
'max_cv_score': [], | |
'std_cv_score': [], | |
'mean_cv_score':[], | |
'test_score': [], | |
'note':[], | |
} | |
for key in self.keys: | |
for param in params_trees[key]: | |
d[param] = [] | |
print(d) | |
for key in self.keys: | |
print(key) | |
for param in params_trees[key]: | |
print(param) | |
for val in params_trees[key][param]: | |
print(val) | |
for val2 in params_trees[key][param]: | |
print(val2) | |
print(f"Fitting, cross validating ({cv} kfolds) and testing {key} with {param} = {val}") | |
tester = self.models[key] | |
tester.set_params(alpha = alpha) | |
tester.fit(X_train, y_train) | |
model_test_score = tester.score(X_test, y_test) | |
cv_scores = cross_val_score(tester, X_train, y_train, cv=cv, verbose=verbose) | |
d['estimator'].append(key) | |
d['alpha'].append(alpha) | |
d['CVs'].append(cv) | |
d['min_cv_score'].append(min(cv_scores)) | |
d['mean_cv_score'].append(np.mean(cv_scores)) | |
d['max_cv_score'].append(max(cv_scores)) | |
d['std_cv_score'].append(np.std(cv_scores)) | |
d['test_score'].append(model_test_score) | |
d['note'].append(note) | |
d[params[key][param]].append(p) | |
counter += 1 | |
print(f"Fit and tested {counter} models. Explore your data!") | |
df = pd.DataFrame(d).sort_values(sort_by, ascending=False) | |
# df = df[["estimator", "CVs", "alpha","max_cv_score","min_cv_score", | |
# "std_cv_score","mean_cv_score","test_score", "note"]] | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment