Created
July 21, 2016 02:03
-
-
Save shiodat/2f6b85a0c2d4cfefa93d549dd2edc8a3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import absolute_import, division, print_function, unicode_literals | |
""" | |
Finding Best Hyper Parameter for Classifier | |
======================================================= | |
In this example, we try to find the best hyper parameter of Classifier | |
(`method` and `regularization_weight`) by calculating precision for | |
possible hyper parameter values. | |
Datasets are randomly generated by using scikit-learn data generator. | |
""" | |
import sklearn.datasets | |
import sklearn.metrics | |
from jubakit.classifier import Classifier, Dataset, Config | |
# Generate a dummy dataset using scikit-learn. | |
from sklearn.datasets import load_digits | |
digits = load_digits() | |
X = digits.data | |
y = digits.target | |
# Convert arrays into jubakit Dataset. | |
dataset = Dataset.from_array(X, y) | |
# Try finding the best classifier parameter. | |
param2metrics = {} | |
for method in ['AROW', 'NHERD', 'CW', 'PA']: | |
for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]: | |
print('Running ({0} / regularization_weight = {1})...'.format(method, rw)) | |
# Create a config data structure. | |
jubatus_config = Config(method=method, parameter={'regularization_weight': rw}) | |
# It is equivalent to: | |
#jubatus_config = Config.default() | |
#jubatus_config['method'] = method | |
#jubatus_config['parameter']['regularization_weight'] = rw | |
# Launch Jubatus server using the specified configuration. | |
classifier = Classifier.run(jubatus_config) | |
print('\tport:', classifier._port) | |
# Train with the dataset. | |
for _ in classifier.train(dataset): | |
pass | |
# Classify with the same dataset. | |
y_true = [] | |
y_pred = [] | |
print('\tstart classification') | |
for (idx, label, result) in classifier.classify(dataset): | |
y_true.append(label) | |
y_pred.append(result[0][0]) | |
print('\tfinish classification') | |
classifier.stop() | |
# Store the metrics for current configuration. | |
param2metrics['{0} ({1})'.format(method, rw)] = sklearn.metrics.accuracy_score(y_true, y_pred) | |
# Show results for each hyper parameter. | |
best_C = sorted(param2metrics.keys(), key=lambda x: param2metrics[x], reverse=True)[0] | |
print('--------------------') | |
print('Configuration\tAccuracy') | |
for C in sorted(param2metrics.keys()): | |
print('{0}\t{1}\t{2}'.format(C, param2metrics[C], '*' if C == best_C else '')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I tried to run
classifier_parameter.py
example using a digits dataset.This example stopped while classifying datasets.
Any ideas?