Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# -*- coding: utf-8 -*-
from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin
from sklearn.cross_validation import cross_val_predict
class ThroughTransformer(BaseEstimator, TransformerMixin):
def fit(self, X, y=None, **fit_param):
return self
def transform(self, X):
return X
class CrossValRegressor(BaseEstimator, RegressorMixin):
def __init__(self, estimator):
self.estimator = estimator
def fit(self, X, y=None, **fit_param):
self.estimator.fit(X, y, **fit_param)
return self
def transform(self, X):
return self.predict(X).reshape(-1, 1)
def fit_transform(self, X, y, **fit_param):
self.estimator.fit(X, y, **fit_param)
return cross_val_predict(self.estimator, X, y).reshape(-1, 1)
def predict(self, X):
return self.estimator.predict(X)
if __name__ == '__main__':
from sklearn.datasets import load_diabetes
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, ElasticNet, Lasso
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline, FeatureUnion
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.5, random_state=0)
features1 = FeatureUnion([
('th', ThroughTransformer()),
('rf', CrossValRegressor(RandomForestRegressor(random_state=2016))),
('et', CrossValRegressor(ExtraTreesRegressor(random_state=2016))),
('kn', CrossValRegressor(KNeighborsRegressor())),
('svr', CrossValRegressor(SVR())),
])
features2 = FeatureUnion([
('rf_10', CrossValRegressor(RandomForestRegressor(n_estimators=10, random_state=0))),
('rf_20', CrossValRegressor(RandomForestRegressor(n_estimators=20, random_state=0)))
])
pipe = Pipeline([
('f1', features1),
('f2', features2),
('rf', RandomForestRegressor(random_state=0))
])
pipe.fit(X_train, y_train)
print(pipe.score(X_test, y_test))
model = LinearRegression()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
model = RandomForestRegressor(random_state=2016)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.