Created
August 15, 2016 03:52
-
-
Save shiodat/57d74827dbffd726a814cc280196c5be to your computer and use it in GitHub Desktop.
Port conflicts in GridSearchCV using Jubakit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
import numpy as np | |
from scipy.sparse import issparse | |
from jubakit.classifier import Classifier, Config, Dataset, Schema | |
from sklearn.base import BaseEstimator, ClassifierMixin | |
from sklearn.utils import check_random_state | |
class LinearClassifier(BaseEstimator, ClassifierMixin): | |
def __init__(self, method='AROW', regularization_weight=1.0, n_iter=5, | |
shuffle=True, seed=None, port=None): | |
self.method = method | |
self.regularization_weight = regularization_weight | |
self.n_iter = n_iter | |
self.shuffle = shuffle | |
self.seed = seed | |
self.port = port | |
def partial_fit(self, X, y): | |
check_random_state(self.seed) | |
if self.clf_ is None: | |
self._launch_classifier() | |
dataset = self._load_dataset(X, y) | |
for i in range(self.n_iter): | |
if self.shuffle: | |
dataset = dataset.shuffle(self.seed) | |
for _ in self.clf_.train(dataset): pass | |
return self | |
def fit(self, X, y): | |
self._launch_classifier() | |
return self.partial_fit(X, y) | |
def predict(self, X): | |
y_dummy = np.zeros(X.shape[0]) | |
y_pred = np.zeros(X.shape[0]) | |
dataset = self._load_dataset(X, y_dummy) | |
for index, _, result in self.clf_.classify(dataset): | |
y_pred[index] = result[0][0] | |
return y_pred | |
def decision_function(self, X): | |
y_dummy = np.zeros(X.shape[0]) | |
decisions = np.zeros((X.shape[0], self.classes_.shape[0])) | |
dataset = self._load_dataset(X, y_dummy) | |
for index, _, result in self.clf_.classify(dataset): | |
for ret in result: | |
i = np.where(self.classes_ == int(ret[0])) | |
decisions[index][i] = ret[1] | |
return decisions | |
def _launch_classifier(self): | |
self.cfg_ = Config(method=self.method, | |
parameter={'regularization_weight': self.regularization_weight}) | |
self.clf_ = Classifier.run(config=self.cfg_, port=self.port) | |
def _load_dataset(self, X, y): | |
self.classes_ = np.unique(y) | |
if issparse(X): | |
return Dataset.from_matrix(X, y) | |
else: | |
return Dataset.from_array(X, y) | |
def stop(self): | |
self.clf_.stop() | |
self.clf_ = None | |
def clear(self): | |
self.clf_.clear() | |
if __name__ == '__main__': | |
from sklearn.datasets import load_iris | |
from sklearn.grid_search import GridSearchCV | |
iris = load_iris() | |
X = iris.data | |
y = iris.target | |
clf = LinearClassifier() | |
parameters = {'method': ['AROW', 'CW'], | |
'regularization_weight': [0.1, 1, 10]} | |
print('single thread mode (n_jobs=1)') | |
gs = GridSearchCV(clf, parameters) | |
gs.fit(X, y) | |
print('best_score', gs.best_score_) | |
print('best_params', gs.best_params_) | |
print('multi thread mode (n_jobs=-1)') | |
gs = GridSearchCV(clf, parameters, n_jobs=-1) | |
gs.fit(X, y) | |
print('best_score', gs.best_score_) | |
print('best_params', gs.best_params_) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Jubakit cannot launch service with ports that already obtained by scikit-learn.grid_search.GridSearchCV.