Skip to content

Instantly share code, notes, and snippets.

@gwerbin
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gwerbin/9ca34c1ddf16988fbe2a to your computer and use it in GitHub Desktop.
Save gwerbin/9ca34c1ddf16988fbe2a to your computer and use it in GitHub Desktop.
mini-API for comparing models
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.pipeline import Pipeline
seed = 43770
rng = np.random.RandomState(seed)
class Selector(BaseEstimator, TransformerMixin):
def __init__(self, variables):
self.variables = variables
def fit(self, x, y=None):
return self
def transform(self, data):
return data[self.variables]
class Classifier():
def __init__(self, classifier, **kwargs):
self.classifier = classifier(**kwargs)
def make_pipe(self, x_vars):
pipe = Pipeline([
('data', Selector(x_vars)),
('cf', self.classifier)
])
return pipe
def fit(self, x_vars, y_var, data):
pipe = self.make_pipe(x_vars)
return pipe.fit(data[x_vars], data[y_var])
## Create the dictionary of classifiers
classifiers = {
'rf': Classifier(RandomForestClassifier, n_estimators=100, oob_score=True, bootstrap=True),
'ab': Classifier(AdaBoostClassifier, n_estimators=50)
}
## Load the data
iris = pd.DataFrame(
load_iris().data,
columns = map(lambda x: x.replace(" (cm)", ""), load_iris().feature_names)
)
iris['species'] = load_iris().target
## Choose the features to use
features = {
'0': ['sepal width', 'sepal length'],
'1': ['sepal width', 'sepal length', 'petal width', 'petal length']
}
## Loop and fit
results = dict()
for k in features.keys():
results[k] = dict()
for m in classifiers.keys():
print(len(features[k]))
results[k][m] = classifiers[m].fit(features[k], 'species', iris)
## But it doesn't work right
print(
'Number of features in X0: %i (should be 2)' % len(results['0']['rf'].steps[1][1].feature_importances_),
'Number of features in X1: %i (should be 4)' % len(results['1']['rf'].steps[1][1].feature_importances_),
sep = '\n'
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment