Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Last active March 13, 2017 04:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nkt1546789/8f2de125ba73faf935f057fce02a30a7 to your computer and use it in GitHub Desktop.
Save nkt1546789/8f2de125ba73faf935f057fce02a30a7 to your computer and use it in GitHub Desktop.
Probably this is the simplest classifier.
import numpy as np
from sklearn import preprocessing, base
class SimpleBinaryClassifier(base.BaseEstimator):
def fit(self, X, y):
"""
Requirement: y \in \{0, 1\}
"""
self.scaler = preprocessing.StandardScaler()
X = self.scaler.fit_transform(X)
y = y * 2 - 1
self.theta = X.T.dot(y)
return self
def decision_function(self, X):
X = self.scaler.transform(X)
return X.dot(self.theta)
def predict(self, X):
Z = self.decision_function(X)
return (Z >= 0) * 2 - 1
if __name__ == '__main__':
from sklearn import multiclass, metrics, model_selection
r = np.random.RandomState(1)
n = 250
mu = 2
X = np.r_[
r.multivariate_normal([-mu, mu], [[0.1, 0], [0, 0.1]], size=n),
r.multivariate_normal([mu, mu], [[0.1, 0], [0, 0.1]], size=n),
r.multivariate_normal([mu, -mu], [[0.1, 0], [0, 0.1]], size=n),
r.multivariate_normal([-mu, -mu], [[0.1, 0], [0, 0.1]], size=n)
]
y = np.concatenate([
np.repeat(0, n),
np.repeat(1, n),
np.repeat(2, n),
np.repeat(3, n)
])
# can handle multi-class classification problem using One Vs Rest startegy.
sc = multiclass.OneVsRestClassifier(SimpleBinaryClassifier())
Xtr, Xte, ytr, yte = model_selection.train_test_split(X, y, random_state=1)
sc.fit(Xtr, ytr)
ypred = sc.predict(Xte)
print("Accuracy:", metrics.accuracy_score(yte, ypred))
@nkt1546789
Copy link
Author

Using GaussianTransformer (https://gist.github.com/nkt1546789/733e376a5c63b52b183cc548d3124bd3),
We can handle not linearly separable data like this:

import numpy as np
from sklearn import metrics
from gaussian_transformer import GaussianTransformer
from simple_classifier import SimpleBinaryClassifier

r = np.random.RandomState(1)
n = 250
mu = 2
X = np.r_[
    r.multivariate_normal([-mu, mu], [[0.1, 0], [0, 0.1]], size=n),
    r.multivariate_normal([mu, mu], [[0.1, 0], [0, 0.1]], size=n),
    r.multivariate_normal([mu, -mu], [[0.1, 0], [0, 0.1]], size=n),
    r.multivariate_normal([-mu, -mu], [[0.1, 0], [0, 0.1]], size=n)
]

y = np.concatenate([
    np.repeat(0, n),
    np.repeat(1, n),
    np.repeat(0, n),
    np.repeat(1, n)
])

sc = multiclass.OneVsRestClassifier(SimpleBinaryClassifier())
Xtr, Xte, ytr, yte = model_selection.train_test_split(X, y, random_state=1)

tr = GaussianTransformer(sigma=1.0, b=100, random_state=1)
Phitr = tr.fit_transform(Xtr)
Phite = tr.transform(Xte)
sc.fit(Phitr, ytr)
ypred = sc.predict(Phite)

print("Accuracy:", metrics.accuracy_score(yte, ypred))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment