Skip to content

Instantly share code, notes, and snippets.

@shiodat
Created August 15, 2016 03:52
Show Gist options
  • Save shiodat/57d74827dbffd726a814cc280196c5be to your computer and use it in GitHub Desktop.
Save shiodat/57d74827dbffd726a814cc280196c5be to your computer and use it in GitHub Desktop.
Port conflicts in GridSearchCV using Jubakit
# -*- coding:utf-8 -*-
import numpy as np
from scipy.sparse import issparse
from jubakit.classifier import Classifier, Config, Dataset, Schema
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_random_state
class LinearClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, method='AROW', regularization_weight=1.0, n_iter=5,
shuffle=True, seed=None, port=None):
self.method = method
self.regularization_weight = regularization_weight
self.n_iter = n_iter
self.shuffle = shuffle
self.seed = seed
self.port = port
def partial_fit(self, X, y):
check_random_state(self.seed)
if self.clf_ is None:
self._launch_classifier()
dataset = self._load_dataset(X, y)
for i in range(self.n_iter):
if self.shuffle:
dataset = dataset.shuffle(self.seed)
for _ in self.clf_.train(dataset): pass
return self
def fit(self, X, y):
self._launch_classifier()
return self.partial_fit(X, y)
def predict(self, X):
y_dummy = np.zeros(X.shape[0])
y_pred = np.zeros(X.shape[0])
dataset = self._load_dataset(X, y_dummy)
for index, _, result in self.clf_.classify(dataset):
y_pred[index] = result[0][0]
return y_pred
def decision_function(self, X):
y_dummy = np.zeros(X.shape[0])
decisions = np.zeros((X.shape[0], self.classes_.shape[0]))
dataset = self._load_dataset(X, y_dummy)
for index, _, result in self.clf_.classify(dataset):
for ret in result:
i = np.where(self.classes_ == int(ret[0]))
decisions[index][i] = ret[1]
return decisions
def _launch_classifier(self):
self.cfg_ = Config(method=self.method,
parameter={'regularization_weight': self.regularization_weight})
self.clf_ = Classifier.run(config=self.cfg_, port=self.port)
def _load_dataset(self, X, y):
self.classes_ = np.unique(y)
if issparse(X):
return Dataset.from_matrix(X, y)
else:
return Dataset.from_array(X, y)
def stop(self):
self.clf_.stop()
self.clf_ = None
def clear(self):
self.clf_.clear()
if __name__ == '__main__':
from sklearn.datasets import load_iris
from sklearn.grid_search import GridSearchCV
iris = load_iris()
X = iris.data
y = iris.target
clf = LinearClassifier()
parameters = {'method': ['AROW', 'CW'],
'regularization_weight': [0.1, 1, 10]}
print('single thread mode (n_jobs=1)')
gs = GridSearchCV(clf, parameters)
gs.fit(X, y)
print('best_score', gs.best_score_)
print('best_params', gs.best_params_)
print('multi thread mode (n_jobs=-1)')
gs = GridSearchCV(clf, parameters, n_jobs=-1)
gs.fit(X, y)
print('best_score', gs.best_score_)
print('best_params', gs.best_params_)
@shiodat
Copy link
Author

shiodat commented Aug 15, 2016

Jubakit cannot launch service with ports that already obtained by scikit-learn.grid_search.GridSearchCV.

...........................................................................
/home/daats/.pyenv/versions/3.5.0/lib/python3.5/site-packages/jubakit/base.py in run(cls=<class 'jubakit.classifier.Classifier'>, config={'parameter': {'regularization_weight': 0.1}, 'm...'unigram', 'global_weight': 'idf', 'key': '*'}]}}, port=None)
    441   def run(cls, config, port=None):
    442     """
    443     Runs a new standalone server and returns the serivce instance to access
    444     the server.
    445     """
--> 446     backend = _ServiceBackend(cls.name(), config, port)
        backend = undefined
        cls.name = <bound method Classifier.name of <class 'jubakit.classifier.Classifier'>>
        config = {'parameter': {'regularization_weight': 0.1}, 'm...'unigram', 'global_weight': 'idf', 'key': '*'}]}}
        port = None
    447     _logger.info('service %s started on port %d', cls.name(), backend.port)
    448 
    449     # Returns the Service instance.
    450     service = cls('127.0.0.1', backend.port)

...........................................................................
/home/daats/.pyenv/versions/3.5.0/lib/python3.5/site-packages/jubakit/base.py in __init__(self=<jubakit.base._ServiceBackend object>, name='classifier', config={'parameter': {'regularization_weight': 0.1}, 'm...'unigram', 'global_weight': 'idf', 'key': '*'}]}}, port=None)
    543       if started:
    544         status = self.get_status()
    545         pid = int(status['pid'])
    546         if pid != self._proc.pid:
    547           self._proc.kill()
--> 548           raise RuntimeError('server cannot be started as port {0} conflicts with external Jubatus process (PID: {1})'.format(self.port, pid))
        self.port = 10000
        pid = 12125
    549 
    550     if not started:
    551       _logger.error('failed to start service')
    552       log = self.stop()

RuntimeError: server cannot be started as port 10000 conflicts with external Jubatus process (PID: 12125)
___________________________________________________________________________

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment