Skip to content

Instantly share code, notes, and snippets.

@dmoisset
Forked from alep/lr.py
Created July 26, 2016 14:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmoisset/ad6cfdf4edee498f63f99bbc8bdcf906 to your computer and use it in GitHub Desktop.
Save dmoisset/ad6cfdf4edee498f63f99bbc8bdcf906 to your computer and use it in GitHub Desktop.
Logistic Regresion (with type annotations).
# An example implementation of Logistic Regression
# Originally by Alejandro Peralta
# Type annotation by Daniel Moisset
import numpy as np
import scipy as sp
from sklearn import cross_validation
from sklearn.utils.fixes import expit as logistic_sigmoid
from sklearn.utils.extmath import log_logistic
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from typing import Tuple
class LR(object):
def fit(self, X: "np.ndarray[float]", y: "np.ndarray[int]") -> 'LR':
weights = np.zeros(X.shape[1] + 1)
self.classes_ = np.unique(y)
y = np.copy(y)
mask = (y == self.classes_[1])
y[~mask] = -1
self.weights, _, info = sp.optimize.fmin_l_bfgs_b(
func=self.likelihood,
x0=weights,
fprime=None,
args=(X, y, 1.0),
pgtol=0.00001,
maxiter=100)
print ("Minimization information: %s" % info)
return self
def decision_function(self, X_test: "np.ndarray[float]") -> "np.ndarray[float]":
scores = np.dot(X_test, self.weights[:-1].T) + self.weights[-1]
return scores.ravel() if len(scores.shape) > 1 and scores.shape[1] == 1 else scores
def predict(self, X_test: "np.ndarray[float]") -> "np.ndarray[int]":
scores = self.decision_function(X_test)
if len(scores.shape) == 1:
indices = (scores > 0).astype(np.int)
else:
indices = scores.argmax(axis=1)
x = self.classes_[indices]
return self.classes_[indices]
def predict_proba(self, X_test: "np.ndarray[float]") -> "np.ndarray[float]":
return logistic_sigmoid(np.dot(X_test, self.weights[:-1]) + self.weights[-1])
def likelihood(self, weights: "np.ndarray[float]",
X: "np.ndarray[float]", y: "np.ndarray[int]", C: float) -> Tuple[float, "np.ndarray[float]"]:
"""
Returns Likelihood and gradient of likelihood
"""
grad = np.empty_like(weights)
_, n_features = X.shape
c, w = weights[-1], weights[:-1]
# z_i = sum(weights[k] + X[i,k] for k in [0 .. l]) where...
# ...l is length of the vector where i is the data point
z = np.dot(X, w) + c # X[i, k="0"] is 1 for all i
yz = z * y
# L(weights) = sum(log(g(y[i]*z[i]) for i in [0 .. n]))
# ...g is the sigmoid function
L = - np.sum(log_logistic(yz)) + C * 0.5 * np.dot(w, w)
# Gradient
z = logistic_sigmoid(yz)
z0 = (z - 1) * y
grad[:n_features] = np.dot(X.T, z0) + C * w
grad[-1] = z0.sum() # all X[i, k="0"] is 1
return L, grad
if __name__ == '__main__':
train, tags = make_classification(n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(train, tags) # type: np.ndarray[float], np.ndarray[float], np.ndarray[int], np.ndarray[int]
clf = LR()
clf.fit(X_train, y_train)
print("Coef:", clf.weights[:X_train.shape[1]])
print("Intercept:", clf.weights[-1])
print("Score:", clf.decision_function(X_test))
print("Prediction:", clf.predict(X_test))
print("Prediction prob:", clf.predict_proba(X_test))
print("--------------")
clf0 = LogisticRegression()
clf0.fit(X_train, y_train)
print("Coef:", clf0.coef_)
print("Intercept:", clf0.intercept_)
print("Score:", clf0.decision_function(X_test))
print("Prediction:", clf0.predict(X_test))
print("Prediction prob:", clf0.predict_proba(X_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment