Created
January 16, 2012 23:39
-
-
Save danhammer/1623646 to your computer and use it in GitHub Desktop.
logistic, ridge classifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def logistic_regression(x, | |
y, | |
rdg_cons = 1.e-6, | |
verbose=False, | |
conv_thresh=1.e-8, | |
maxit=500): | |
"""Calculates the maximum likelihood estimates of a logistic | |
regression. Uses the [Newton-Raphson algorithm] | |
(sites.stat.psu.edu/~jiali/course/stat597e/notes2/logit.pdf) | |
Args: | |
x : numpy array of characteristics. SHAPE: k-by-N, where k | |
is the number of attributes and N is the number of total | |
observations. | |
y : numpy array of binary outcome labels SHAPE: N-by-1. | |
rdg_cons : ridge constant (close to zero) | |
maxit : max number of iterations | |
conv_thresh : convergence threshold | |
[Note that the x and y arrays must be aligned correctly, i.e., the | |
attributes at index i in the x vector must correspond to the label | |
at index i in the y vector.] | |
Returns: | |
beta : logistic regression coefficients, with intercept | |
A : information matrix | |
L : log-likelihood, which can be used for a chi-squared sig. test | |
useful auxiliary transforms: | |
[covariance matrix] covmat = inverse(A) | |
[standard errors for beta] stderr = sqrt(diag(covmat)) | |
[scaled deviance statistic] deviance = -2L | |
[chi-squared value for -2L] model chi-squared test.""" | |
if x.shape[-1] != len(y): | |
raise ValueError, "x and y should be of the same length." | |
N, npreds = x.shape[1], x.shape[0] | |
# starting beta vector : no info => all zeros | |
beta_start = np.zeros(npreds+1,x.dtype) | |
# add intercept to characteristic vector | |
X = np.ones((npreds+1,N), x.dtype) | |
X[1:, :] = x | |
Xt = np.transpose(X) | |
# initial values | |
i = 0 | |
diff = 1. | |
beta = beta_start | |
while i < maxit: | |
beta_old = beta | |
exp_bx = np.exp(np.dot(beta, X)) | |
p = exp_bx/(1.+exp_bx) | |
# log-likeliehood | |
log_like = np.sum(y*np.log(p) + (1.-y)*np.log(1.-p)) | |
# scoring function | |
score = np.dot(X, y-p) | |
# update the coefficient vector, based on the first- and | |
# second-order approximations of the likelihood function, as | |
# well as a correction factor that estimates the first-order | |
# approximation of the error induced by adding a ridge | |
# constant that was originally added to ensure that the | |
# second-order approximation matrix could be inverted. | |
beta_diff, A = increment_beta(X, Xt, p, score, npreds, rdg_cons) | |
diff_correction = beta_diff_correction(X, Xt, p, score, npreds, rdg_cons) | |
beta = beta_old + beta_diff + diff_correction | |
print beta | |
# sum of absolute differences | |
diff = np.sum(np.fabs(beta-beta_old)) | |
if verbose: print 'Iteration: %s: %s, %s' %(i+1, log_like, diff) | |
# process completed if within the convergence threshold | |
if diff <= conv_thresh: break | |
i += 1 | |
if iter == maxit and diff > conv_thresh: | |
print '\nwarning: convergence not achieved' | |
return beta, A, log_like | |
def increment_beta(X, Xt, p, score, npreds, rdg_cons): | |
"""Calculate the ridge-corrected increment to the coefficient | |
vector, given the current state of the prediction | |
Returns: | |
----------- | |
A : information matrix | |
np.dot(np.linalg.inv(A),score) : beta adjustment | |
""" | |
A = np.dot(X*np.multiply(p,1.-p),Xt) + np.identity(npreds+1)*rdg_cons | |
return np.dot(np.linalg.inv(A),score), A | |
def beta_diff_correction(X, Xt, p, score, npreds, rdg_cons): | |
"""Estimate the first-order (linear) effect of adding the ridge | |
constant""" | |
# gamma is effectively dx in the Taylor approximation | |
upper = rdg_cons + (rdg_cons/10) | |
lower = rdg_cons - (rdg_cons/10) | |
# TODO (dan): refactor functions to calculate f_upper and f_lower, | |
# noting that this is also used in the body of the logistic equation | |
f_upper, _ = increment_beta(X, Xt, p, score, npreds, upper) | |
f_lower, _ = increment_beta(X, Xt, p, score, npreds, lower) | |
# The vector a is analogous to f(rdg_cons) = a*rdg_cons + b, and | |
# we are interested in the correction factor that is calculated as | |
# follows: | |
# Let v = rdg_cons. Then we originally assume that f(0) = f(v) | |
# [equivalent to no added constant along the diagnol], which may | |
# not be correct. A better approximation is f(0) - b, where b = | |
# f(v) - a*v. Then, correction = f(0) - b = f(v) - f(v) + a*v = | |
# a*v. | |
a = (f_upper - f_lower)/(2*(rdg_cons/10)) | |
return a * rdg_cons |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment