Skip to content

Instantly share code, notes, and snippets.

@satomacoto
Created April 1, 2016 09:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save satomacoto/b3e58038eb1c2a8ec31cecffdca1c3c4 to your computer and use it in GitHub Desktop.
Save satomacoto/b3e58038eb1c2a8ec31cecffdca1c3c4 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin
from sklearn.cross_validation import cross_val_predict
class ThroughTransformer(BaseEstimator, TransformerMixin):
def fit(self, X, y=None, **fit_param):
return self
def transform(self, X):
return X
class CrossValRegressor(BaseEstimator, RegressorMixin):
def __init__(self, estimator):
self.estimator = estimator
def fit(self, X, y=None, **fit_param):
self.estimator.fit(X, y, **fit_param)
return self
def transform(self, X):
return self.predict(X).reshape(-1, 1)
def fit_transform(self, X, y, **fit_param):
self.estimator.fit(X, y, **fit_param)
return cross_val_predict(self.estimator, X, y).reshape(-1, 1)
def predict(self, X):
return self.estimator.predict(X)
if __name__ == '__main__':
from sklearn.datasets import load_diabetes
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, ElasticNet, Lasso
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline, FeatureUnion
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.5, random_state=0)
features1 = FeatureUnion([
('th', ThroughTransformer()),
('rf', CrossValRegressor(RandomForestRegressor(random_state=2016))),
('et', CrossValRegressor(ExtraTreesRegressor(random_state=2016))),
('kn', CrossValRegressor(KNeighborsRegressor())),
('svr', CrossValRegressor(SVR())),
])
features2 = FeatureUnion([
('rf_10', CrossValRegressor(RandomForestRegressor(n_estimators=10, random_state=0))),
('rf_20', CrossValRegressor(RandomForestRegressor(n_estimators=20, random_state=0)))
])
pipe = Pipeline([
('f1', features1),
('f2', features2),
('rf', RandomForestRegressor(random_state=0))
])
pipe.fit(X_train, y_train)
print(pipe.score(X_test, y_test))
model = LinearRegression()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
model = RandomForestRegressor(random_state=2016)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment