danhammer/logistic.py

## logistic.py
def logistic_regression(x,
                        y,
                        rdg_cons = 1.e-6,
                        verbose=False,
                        conv_thresh=1.e-8,
                        maxit=500):

    """Calculates the maximum likelihood estimates of a logistic
    regression.  Uses the [Newton-Raphson algorithm]
    (sites.stat.psu.edu/~jiali/course/stat597e/notes2/logit.pdf)

    Args:
    x : numpy array of characteristics. SHAPE: k-by-N, where k
    is the number of attributes and N is the number of total
    observations.

    y : numpy array of binary outcome labels SHAPE: N-by-1.

    rdg_cons : ridge constant (close to zero)

    maxit : max number of iterations

    conv_thresh : convergence threshold

    [Note that the x and y arrays must be aligned correctly, i.e., the
    attributes at index i in the x vector must correspond to the label
    at index i in the y vector.]

    Returns:
    beta : logistic regression coefficients, with intercept
    A : information matrix
    L : log-likelihood, which can be used for a chi-squared sig. test

    useful auxiliary transforms:
    [covariance matrix] covmat = inverse(A)
    [standard errors for beta] stderr = sqrt(diag(covmat))
    [scaled deviance statistic] deviance = -2L
    [chi-squared value for -2L] model chi-squared test."""

    if x.shape[-1] != len(y):
        raise ValueError, "x and y should be of the same length."
    N, npreds = x.shape[1], x.shape[0]

    # starting beta vector : no info => all zeros
    beta_start = np.zeros(npreds+1,x.dtype)

    # add intercept to characteristic vector
    X = np.ones((npreds+1,N), x.dtype)
    X[1:, :] = x
    Xt = np.transpose(X)

    # initial values
    i = 0
    diff = 1.
    beta = beta_start

    while i < maxit:
        beta_old = beta
        exp_bx = np.exp(np.dot(beta, X))
        p = exp_bx/(1.+exp_bx)

        # log-likeliehood
        log_like = np.sum(y*np.log(p) + (1.-y)*np.log(1.-p))

        # scoring function
        score = np.dot(X, y-p)

        # update the coefficient vector, based on the first- and
        # second-order approximations of the likelihood function, as
        # well as a correction factor that estimates the first-order
        # approximation of the error induced by adding a ridge
        # constant that was originally added to ensure that the
        # second-order approximation matrix could be inverted.
        beta_diff, A    = increment_beta(X, Xt, p, score, npreds, rdg_cons)
        diff_correction = beta_diff_correction(X, Xt, p, score, npreds, rdg_cons)
        beta = beta_old + beta_diff + diff_correction
        print beta

        # sum of absolute differences
        diff = np.sum(np.fabs(beta-beta_old))
        if verbose: print 'Iteration: %s: %s, %s' %(i+1, log_like, diff)

        # process completed if within the convergence threshold
        if diff <= conv_thresh: break
        i += 1

    if iter == maxit and diff > conv_thresh:
        print '\nwarning: convergence not achieved'

    return beta, A, log_like


def increment_beta(X, Xt, p, score, npreds, rdg_cons):
    """Calculate the ridge-corrected increment to the coefficient
    vector, given the current state of the prediction

    Returns:
    -----------
    A : information matrix
    np.dot(np.linalg.inv(A),score) : beta adjustment
    """
    A = np.dot(X*np.multiply(p,1.-p),Xt) + np.identity(npreds+1)*rdg_cons
    return np.dot(np.linalg.inv(A),score), A

def beta_diff_correction(X, Xt, p, score, npreds, rdg_cons):
    """Estimate the first-order (linear) effect of adding the ridge
    constant"""

    # gamma is effectively dx in the Taylor approximation
    upper = rdg_cons + (rdg_cons/10)
    lower = rdg_cons - (rdg_cons/10)

    # TODO (dan): refactor functions to calculate f_upper and f_lower,
    # noting that this is also used in the body of the logistic equation
    f_upper, _ = increment_beta(X, Xt, p, score, npreds, upper)
    f_lower, _ = increment_beta(X, Xt, p, score, npreds, lower)

    # The vector a is analogous to f(rdg_cons) = a*rdg_cons + b, and
    # we are interested in the correction factor that is calculated as
    # follows:

    # Let v = rdg_cons.  Then we originally assume that f(0) = f(v)
    # [equivalent to no added constant along the diagnol], which may
    # not be correct.  A better approximation is f(0) - b, where b =
    # f(v) - a*v.  Then, correction = f(0) - b = f(v) - f(v) + a*v =
    # a*v.

    a = (f_upper - f_lower)/(2*(rdg_cons/10))
    return a * rdg_cons
	def logistic_regression(x,
	y,
	rdg_cons = 1.e-6,
	verbose=False,
	conv_thresh=1.e-8,
	maxit=500):

	"""Calculates the maximum likelihood estimates of a logistic
	regression. Uses the [Newton-Raphson algorithm]
	(sites.stat.psu.edu/~jiali/course/stat597e/notes2/logit.pdf)

	Args:
	x : numpy array of characteristics. SHAPE: k-by-N, where k
	is the number of attributes and N is the number of total
	observations.

	y : numpy array of binary outcome labels SHAPE: N-by-1.

	rdg_cons : ridge constant (close to zero)

	maxit : max number of iterations

	conv_thresh : convergence threshold

	[Note that the x and y arrays must be aligned correctly, i.e., the
	attributes at index i in the x vector must correspond to the label
	at index i in the y vector.]

	Returns:
	beta : logistic regression coefficients, with intercept
	A : information matrix
	L : log-likelihood, which can be used for a chi-squared sig. test

	useful auxiliary transforms:
	[covariance matrix] covmat = inverse(A)
	[standard errors for beta] stderr = sqrt(diag(covmat))
	[scaled deviance statistic] deviance = -2L
	[chi-squared value for -2L] model chi-squared test."""

	if x.shape[-1] != len(y):
	raise ValueError, "x and y should be of the same length."
	N, npreds = x.shape[1], x.shape[0]

	# starting beta vector : no info => all zeros
	beta_start = np.zeros(npreds+1,x.dtype)

	# add intercept to characteristic vector
	X = np.ones((npreds+1,N), x.dtype)
	X[1:, :] = x
	Xt = np.transpose(X)

	# initial values
	i = 0
	diff = 1.
	beta = beta_start

	while i < maxit:
	beta_old = beta
	exp_bx = np.exp(np.dot(beta, X))
	p = exp_bx/(1.+exp_bx)

	# log-likeliehood
	log_like = np.sum(ynp.log(p) + (1.-y)np.log(1.-p))

	# scoring function
	score = np.dot(X, y-p)

	# update the coefficient vector, based on the first- and
	# second-order approximations of the likelihood function, as
	# well as a correction factor that estimates the first-order
	# approximation of the error induced by adding a ridge
	# constant that was originally added to ensure that the
	# second-order approximation matrix could be inverted.
	beta_diff, A = increment_beta(X, Xt, p, score, npreds, rdg_cons)
	diff_correction = beta_diff_correction(X, Xt, p, score, npreds, rdg_cons)
	beta = beta_old + beta_diff + diff_correction
	print beta

	# sum of absolute differences
	diff = np.sum(np.fabs(beta-beta_old))
	if verbose: print 'Iteration: %s: %s, %s' %(i+1, log_like, diff)

	# process completed if within the convergence threshold
	if diff <= conv_thresh: break
	i += 1

	if iter == maxit and diff > conv_thresh:
	print '\nwarning: convergence not achieved'

	return beta, A, log_like


	def increment_beta(X, Xt, p, score, npreds, rdg_cons):
	"""Calculate the ridge-corrected increment to the coefficient
	vector, given the current state of the prediction

	Returns:
	-----------
	A : information matrix
	np.dot(np.linalg.inv(A),score) : beta adjustment
	"""
	A = np.dot(Xnp.multiply(p,1.-p),Xt) + np.identity(npreds+1)rdg_cons
	return np.dot(np.linalg.inv(A),score), A

	def beta_diff_correction(X, Xt, p, score, npreds, rdg_cons):
	"""Estimate the first-order (linear) effect of adding the ridge
	constant"""

	# gamma is effectively dx in the Taylor approximation
	upper = rdg_cons + (rdg_cons/10)
	lower = rdg_cons - (rdg_cons/10)

	# TODO (dan): refactor functions to calculate f_upper and f_lower,
	# noting that this is also used in the body of the logistic equation
	f_upper, _ = increment_beta(X, Xt, p, score, npreds, upper)
	f_lower, _ = increment_beta(X, Xt, p, score, npreds, lower)

	# The vector a is analogous to f(rdg_cons) = a*rdg_cons + b, and
	# we are interested in the correction factor that is calculated as
	# follows:

	# Let v = rdg_cons. Then we originally assume that f(0) = f(v)
	# [equivalent to no added constant along the diagnol], which may
	# not be correct. A better approximation is f(0) - b, where b =
	# f(v) - av. Then, correction = f(0) - b = f(v) - f(v) + av =
	# a*v.

	a = (f_upper - f_lower)/(2*(rdg_cons/10))
	return a * rdg_cons