Skip to content

Instantly share code, notes, and snippets.

@kingjr
Last active August 29, 2015 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kingjr/cdc853e09aae9dde537c to your computer and use it in GitHub Desktop.
Save kingjr/cdc853e09aae9dde537c to your computer and use it in GitHub Desktop.
This aims at reproducing the sklearn.svm.SVC object without having to store 'support_vectors_' and '_dual_coef_'
# Author: Jean-Remi King <jeanremi.king@gmail.com>
#
# License: BSD (3-clause)
import warnings
import numpy as np
import scipy.sparse as sp
from sklearn.svm import SVC, LinearSVC
from sklearn.datasets import make_classification
from sklearn.calibration import CalibratedClassifierCV
# CLASSIFIERS
def SVC_Light(probability=False, method='sigmoid', cv=5, **kwargs):
"""
Similar to SVC(kernel='linear') without having to store 'support_vectors_'
and '_dual_coef_'.
Uses CalibrationClassifierCV if probability=True.
"""
if probability is True:
base_estimator = _SVC_Light(probability=True, **kwargs)
return _SVC_Light_Proba(base_estimator=base_estimator, method=method,
cv=cv)
else:
return _SVC_Light(**kwargs)
class _SVC_Light_Proba(CalibratedClassifierCV):
def decision_function(self, X):
warnings.warn(
"With 'probability=True' decision_function=predict_proba")
return self.predict_proba(X)
def fit(self, X, y):
if len(np.unique(y)) > 2:
# XXX
raise ValueError('_SVC_Light currently does not support '
'probability=True for more than 2 classes.')
super(_SVC_Light_Proba, self).fit(X, y)
class _SVC_Light(SVC):
"""
Similar to SVC(kernel='linear') without having to store 'support_vectors_'
and '_dual_coef_'
"""
def __init__(self, kernel='linear', probability=False, **kwargs):
if 'kernel' in kwargs.keys():
raise ValueError('SVC_Light is only available when using a '
'linear kernel.')
if 'probability' in kwargs.keys():
raise RuntimeError('Currently, SVC_Light does not support '
'probability=True')
super(_SVC_Light, self).__init__(kernel=kernel,
probability=probability, **kwargs)
def fit(self, X, y, scaling=None):
super(_SVC_Light, self).fit(X, y)
# compute coef from support vectors once only
self._coef_ = self._compute_coef_()
self.__delattr__('support_vectors_')
self.__delattr__('_dual_coef_')
def _compute_coef_(self):
# Originally coef_(self) from SVC
coef = self._get_coef()
if sp.issparse(coef):
coef.data.flags.writeable = False
else:
coef.flags.writeable = False
return coef
def predict(self, X):
distances = self.decision_function(X)
y_pred = predict_OneVsOne(distances, self.classes_)
return y_pred
def decision_function(self, X):
X = self._validate_for_predict(X)
n_sample = X.shape[0]
intercept = np.tile(self.intercept_, (n_sample, 1))
distances = np.dot(self.coef_, X.T).T + intercept
if len(self.classes_) == 2:
distances *= -1
return distances
@property
def coef_(self):
return self._coef_
# PREDICTERS
def predict_OneVsOne(confidence, classes):
# for SVC, NuSVC
n_samples, n_w = confidence.shape
votes = np.zeros((n_samples, n_w))
k = 0
for i, class1 in enumerate(classes):
for j, class2 in enumerate(classes[(i + 1):]):
compared_classes = np.array([class1, class2])
comparison = confidence[:, k] < 0
votes[:, k] = compared_classes[comparison.astype(int)]
k += 1
summed_votes = np.array([np.sum(votes == c, axis=1) for c in classes]).T
y_pred = predict_OneVsRest(summed_votes, classes)
return y_pred
def predict_OneVsRest(confidence, classes):
# for LinearSVC
return np.array(classes[confidence.argmax(axis=1)])
# setup dataset --------------------------------------------------------------
X, y = make_classification(n_informative=10, n_classes=2)
# 1. Classic pipeline --------------------------------------------------------
svc = SVC(kernel='linear')
svc.fit(X, y)
y_pred = svc.decision_function(X)
score = svc.score(X, y)
# 2. Linear SVC ---------------------------------------------------------------
linearsvc = LinearSVC()
linearsvc.fit(X, y)
y_pred = linearsvc.decision_function(X)
score_linear = linearsvc.score(X, y)
# 3. Light SVC ----------------------------------------------------------------
svc_light = SVC_Light(kernel='linear')
svc_light.fit(X, y)
y_pred_light = svc_light.predict(X)
score_light = svc_light.score(X, y)
# 4. Light SVC Proba-----------------------------------------------------------
svc_light_proba = SVC_Light(kernel='linear', probability=True)
svc_light_proba.fit(X, y)
y_pred_light_proba = svc_light_proba.predict_proba(X)
y_pred_light_proba = svc_light_proba.predict(X)
score_light_proba = svc_light_proba.score(X, y)
print([score, score_linear, score_light, score_light_proba])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment