Skip to content

Instantly share code, notes, and snippets.

@daien
Created March 6, 2012 22:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save daien/1989208 to your computer and use it in GitHub Desktop.
Save daien/1989208 to your computer and use it in GitHub Desktop.
Multinomial Logistic Regression draft
"""
Multinomial Logistic Regression (MLR)
=====================================
Multiclass-classification with the MLR classifier
Authors: Adrien Gaidon & Jakob Verbeek
"""
import numpy as np
from scipy.optimize import fmin_bfgs
def mlr_nll_and_gradient(X, Y, W, sigma2, weighted):
""" Compute the MLR negative log-likelihood and its gradient
Parameters
----------
X : array-like, shape [n_samples, n_features],
Training data
Y : numpy array, shape [n_samples, n_classes],
Target values
W : numpy array, shape [n_features, n_classes],
MLR coefficient estimates
sigma2: float, optional, default: None,
Bandwidth of Gaussian prior for MAP estimation
(penalization parameter)
weighted: boolean, optional, default: False,
if False, then assumes Y contains binary labels, and
nll = sum_n log sum_c Y[n, c] p(c|data_n)
if True, then assumes Y contains weights, and
nll = sum_n sum_c Y[n, c] log p(c|data_n)
Returns
-------
nll: float,
the negative log-likelihood of the MLR objective function
grad: numpy array, shape [n_features, n_classes]
the gradient of the negative log-likelihood of the MLR objective
"""
n_samples, n_features = X.shape
_n, n_classes = Y.shape
_d, _c = W.shape
# check dimensions
assert n_samples == _n, "Shape mismatch between X and Y"
assert n_features == _d, "Shape mismatch between X and W"
assert n_classes == _c, "Shape mismatch between Y and W"
Yhat = np.dot(X, W)
Yhat -= Yhat.min(axis=1)[:, np.newaxis]
Yhat = np.exp(-Yhat)
# l1-normalize
Yhat /= Yhat.sum(axis=1)[:, np.newaxis]
if weighted:
nll = np.sum(np.log((1. + 1e-15) * Yhat) * Y)
Yhat *= Y.sum(axis=1)[:, np.newaxis]
Yhat -= Y
else:
_Yhat = Yhat * Y
nll = np.sum(np.log(_Yhat.sum(axis=1)))
_Yhat /= _Yhat.sum(axis=1)[:, np.newaxis]
Yhat -= _Yhat
del _Yhat
grad = np.dot(X.T, Yhat)
if sigma2 is not None:
nll -= np.sum(W * W) / (2. * sigma2)
nll -= n_features * n_classes * np.log(sigma2) / 2.
grad -= W / float(sigma2)
nll /= -float(n_samples)
grad /= -float(n_samples)
return nll, grad
class FuncGradComputer(object):
""" Convenience class to pass func and grad separately to optimize
"""
def __init__(self, X, Y, ss, weighted):
self.X = X
self.Y = Y
self.ss = ss
self.weighted = weighted
# use None value to signal recomputing is necessary
self.nll_ = None
self.grad_ = None
def _compute_func_grad(self, w):
""" Simultaneously compute objective function and gradient at w
"""
# reshape input flattened by scipy
W = w.reshape((self.X.shape[1], self.Y.shape[1]))
self.nll_, self.grad_ = mlr_nll_and_gradient(
self.X, self.Y, W, self.ss, self.weighted)
def compute_fun(self, w):
if self.nll_ is None:
self._compute_func_grad(w)
nll = self.nll_
self.nll_ = None # to mark for recomputing if recalled
return nll
def compute_grad(self, w):
if self.grad_ is None:
self._compute_func_grad(w)
grad = self.grad_.ravel() # need flattened grad
self.grad_ = None # to mark for recomputing if recalled
return grad
class MLR(object):
""" Multinomial Logistic Regression classifier
Parameters
----------
sigma2: float, optional, default: None,
Bandwidth of Gaussian prior for MAP estimation
(penalization parameter)
weighted: boolean, optional, default: False,
if False, then maximize sum_n log sum_c labels(n,c) p(c|data_n)
if True, then maximize sum_n sum_c labels(n,c) log p(c|data_n)
Attributes
----------
W_ : numpy array, shape [n_features, n_classes],
the coefficient estimates
infos_ : dict,
various output infos about the optimization
"""
def __init__(self, ss=None, weighted=False, seed=None):
assert ss is None or ss > 0, "ss must be None or > 0"
self.ss = ss
self.weighted = weighted
self.seed = seed
self.W_ = None
self.nll_ = None
self.grad_ = None
def fit(self, X, y):
""" Fit the model
Parameters
----------
X : array-like, shape [n_samples, n_features],
Training data
y : array-like, shape [n_samples] or [n_samples, n_classes]
Target values
Returns
-------
self : returns an instance of self.
"""
n_samples, n_features = X.shape
# get the target values
if y.ndim == 1:
# convert to 1-of-k coding (one-hot)
assert len(y) == n_samples, "Invalid number of labels"
self.classes = np.unique(y)
n_classes = len(self.classes)
Y = np.zeros((n_samples, n_classes), dtype=np.float64)
for i, cls in enumerate(self.classes):
Y[y == cls, i] = 1
else:
_n, n_classes = Y.shape
assert _n == n_samples, "Invalid number of rows in Y"
self.classes = np.arange(n_classes)
Y = y
# initialize the weight matrix
np.random.seed(self.seed)
w0 = np.random.random((n_features * n_classes, ))
# initialize the functions to compute the cost function and gradient
fgcomp = FuncGradComputer(X, Y, self.ss, self.weighted)
fun = fgcomp.compute_fun
grad = fgcomp.compute_grad
# minimize with BFGS
results = fmin_bfgs(fun, w0, fprime=grad, full_output=True)
self.W_ = results[0].reshape((n_features, n_classes))
self.infos_ = dict(zip(
"fopt gopt Bopt func_calls grad_calls warnflat".split(),
results[1:]))
return self
def predict_proba(self, X):
""" Probability estimates.
The returned estimates for all classes, ordered by label.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Vectors to predict
Returns
-------
Yhat : array-like, shape = [n_samples, n_classes]
Probability of the sample for each class in the model,
where classes are ordered by arithmetical order.
"""
Yhat = np.dot(X, self.W_)
Yhat -= Yhat.min(axis=1)[:, np.newaxis]
Yhat = np.exp(-Yhat)
# l1-normalize
Yhat /= Yhat.sum(axis=1)[:, np.newaxis]
return Yhat
def predict(self, X):
""" Predict most likely label
The returned estimates for all classes, ordered by label.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Vectors to predict
Returns
-------
yhat : array-like, shape = [n_samples]
The most likely label for each sample.
"""
Yhat = self.predict_proba(X)
yhat = self.classes[np.argmax(Yhat, axis=1).squeeze()]
return yhat
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment