Skip to content

Instantly share code, notes, and snippets.

@arjoly
arjoly / lazy.py
Last active August 29, 2015 14:26
Lazy fitting estimator
# Author: Arnaud Joly
# Licence: BSD3
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.base import RegressorMixin
from sklearn.base import clone
from sklearn.externals.joblib import Memory
import numpy as np
from time import time
from sklearn.datasets import make_multilabel_classification
X, y = make_multilabel_classification(n_samples=10000, sparse=True,
random_state=0, return_indicator=True)
from sklearn.ensemble import RandomForestClassifier
@arjoly
arjoly / bench_elm.py
Created August 11, 2014 08:29
Benchmark for the extreme learning machine classifier
import gc
import argparse
import sys
import time
import warnings
from sklearn.neural_network import ELMClassifier
from sklearn.datasets import make_classification
ELMClassifier._fit = profile(ELMClassifier._fit)
@arjoly
arjoly / bench_ovr.py
Last active August 29, 2015 14:03
Bench sparse multilabel ovr
import gc
import argparse
import sys
import numpy as np
import joblib
import time
import scipy.sparse as sp
import warnings
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import fit_ovr
@arjoly
arjoly / bench_ranking_metrics.py
Created April 24, 2014 15:43
Benchmark for ranking metrics
from __future__ import division
from __future__ import print_function
from timeit import timeit
from functools import partial
# from sklearn.metrics import roc_auc_score
# from sklearn.metrics import average_precision_score
from sklearn.metrics import label_ranking_average_precision_score
""" Some results on the 20 news dataset
Classifier train-time test-time error-rate
--------------------------------------------
5-nn 0.0047s 13.6651s 0.5916
random forest 263.3146s 3.9985s 0.2459
sgd 0.2265s 0.0657s 0.2604
"""
@arjoly
arjoly / example_lps.py
Last active December 30, 2015 21:29
Comparison of different strategies for multilabel classification: one-versus-rest and label power set
from __future__ import print_function
from __future__ import division
from collections import defaultdict
from functools import partial
from pprint import pprint
import numpy as np
from sklearn.datasets import fetch_mldata
def _check_clf_targets(y_true, y_pred):
"""Check that y_true and y_pred belong to the same classification task
This converts multiclass or binary types to a common shape, and raises a
ValueError for a mix of multilabel and multiclass targets, a mix of
multilabel formats, for the presence of continuous-valued or multioutput
targets, or for targets of different lengths.
Column vectors are squeezed to 1d.
def test_unique_labels():
# Empty iterable
assert_raises(ValueError, unique_labels)
# Multiclass problem
assert_array_equal(unique_labels(xrange(10)), np.arange(10))
assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))
# Multilabels
def test_unique_labels():
# Empty iterable
assert_raises(ValueError, unique_labels)
# Multiclass problem
assert_array_equal(unique_labels(xrange(10)), np.arange(10))
assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))
# Multilabels