shiodat/wrapper.py

## wrapper.py
# -*- coding:utf-8 -*-
import numpy as np
from scipy.sparse import issparse
from jubakit.classifier import Classifier, Config, Dataset, Schema
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_random_state

class LinearClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self, method='AROW', regularization_weight=1.0, n_iter=5,
                 shuffle=True, seed=None, port=None):
        self.method = method
        self.regularization_weight = regularization_weight
        self.n_iter = n_iter
        self.shuffle = shuffle
        self.seed = seed
        self.port = port

    def partial_fit(self, X, y):
        check_random_state(self.seed)

        if self.clf_ is None:
            self._launch_classifier()

        dataset = self._load_dataset(X, y)

        for i in range(self.n_iter):
            if self.shuffle:
                dataset = dataset.shuffle(self.seed)
            for _ in self.clf_.train(dataset): pass

        return self

    def fit(self, X, y):
        self._launch_classifier()
        return self.partial_fit(X, y)

    def predict(self, X):
        y_dummy = np.zeros(X.shape[0])
        y_pred = np.zeros(X.shape[0])

        dataset = self._load_dataset(X, y_dummy)

        for index, _, result in self.clf_.classify(dataset):
            y_pred[index] = result[0][0]

        return y_pred

    def decision_function(self, X):
        y_dummy = np.zeros(X.shape[0])
        decisions = np.zeros((X.shape[0], self.classes_.shape[0]))

        dataset = self._load_dataset(X, y_dummy)

        for index, _, result in self.clf_.classify(dataset):
            for ret in result:
                i = np.where(self.classes_ == int(ret[0]))
                decisions[index][i] = ret[1]

        return decisions

    def _launch_classifier(self):
        self.cfg_ = Config(method=self.method,
                        parameter={'regularization_weight': self.regularization_weight})
        self.clf_ = Classifier.run(config=self.cfg_, port=self.port)

    def _load_dataset(self, X, y):
        self.classes_ = np.unique(y)
        if issparse(X):
            return Dataset.from_matrix(X, y)
        else:
            return Dataset.from_array(X, y)

    def stop(self):
        self.clf_.stop()
        self.clf_ = None

    def clear(self):
        self.clf_.clear()

if __name__ == '__main__':
    from sklearn.datasets import load_iris
    from sklearn.grid_search import GridSearchCV

    iris = load_iris()
    X = iris.data
    y = iris.target

    clf = LinearClassifier()
    parameters = {'method': ['AROW', 'CW'],
                  'regularization_weight': [0.1, 1, 10]}

    print('single thread mode (n_jobs=1)')
    gs = GridSearchCV(clf, parameters)
    gs.fit(X, y)
    print('best_score', gs.best_score_)
    print('best_params', gs.best_params_)

    print('multi thread mode (n_jobs=-1)')
    gs = GridSearchCV(clf, parameters, n_jobs=-1)
    gs.fit(X, y)
    print('best_score', gs.best_score_)
    print('best_params', gs.best_params_)
	# -- coding:utf-8 --
	import numpy as np
	from scipy.sparse import issparse
	from jubakit.classifier import Classifier, Config, Dataset, Schema
	from sklearn.base import BaseEstimator, ClassifierMixin
	from sklearn.utils import check_random_state

	class LinearClassifier(BaseEstimator, ClassifierMixin):

	def __init__(self, method='AROW', regularization_weight=1.0, n_iter=5,
	shuffle=True, seed=None, port=None):
	self.method = method
	self.regularization_weight = regularization_weight
	self.n_iter = n_iter
	self.shuffle = shuffle
	self.seed = seed
	self.port = port

	def partial_fit(self, X, y):
	check_random_state(self.seed)

	if self.clf_ is None:
	self._launch_classifier()

	dataset = self._load_dataset(X, y)

	for i in range(self.n_iter):
	if self.shuffle:
	dataset = dataset.shuffle(self.seed)
	for _ in self.clf_.train(dataset): pass

	return self

	def fit(self, X, y):
	self._launch_classifier()
	return self.partial_fit(X, y)

	def predict(self, X):
	y_dummy = np.zeros(X.shape[0])
	y_pred = np.zeros(X.shape[0])

	dataset = self._load_dataset(X, y_dummy)

	for index, _, result in self.clf_.classify(dataset):
	y_pred[index] = result[0][0]

	return y_pred

	def decision_function(self, X):
	y_dummy = np.zeros(X.shape[0])
	decisions = np.zeros((X.shape[0], self.classes_.shape[0]))

	dataset = self._load_dataset(X, y_dummy)

	for index, _, result in self.clf_.classify(dataset):
	for ret in result:
	i = np.where(self.classes_ == int(ret[0]))
	decisions[index][i] = ret[1]

	return decisions

	def _launch_classifier(self):
	self.cfg_ = Config(method=self.method,
	parameter={'regularization_weight': self.regularization_weight})
	self.clf_ = Classifier.run(config=self.cfg_, port=self.port)

	def _load_dataset(self, X, y):
	self.classes_ = np.unique(y)
	if issparse(X):
	return Dataset.from_matrix(X, y)
	else:
	return Dataset.from_array(X, y)

	def stop(self):
	self.clf_.stop()
	self.clf_ = None

	def clear(self):
	self.clf_.clear()

	if __name__ == '__main__':
	from sklearn.datasets import load_iris
	from sklearn.grid_search import GridSearchCV

	iris = load_iris()
	X = iris.data
	y = iris.target

	clf = LinearClassifier()
	parameters = {'method': ['AROW', 'CW'],
	'regularization_weight': [0.1, 1, 10]}

	print('single thread mode (n_jobs=1)')
	gs = GridSearchCV(clf, parameters)
	gs.fit(X, y)
	print('best_score', gs.best_score_)
	print('best_params', gs.best_params_)

	print('multi thread mode (n_jobs=-1)')
	gs = GridSearchCV(clf, parameters, n_jobs=-1)
	gs.fit(X, y)
	print('best_score', gs.best_score_)
	print('best_params', gs.best_params_)