fabianp/spsaga.py

## spsaga.py
import numpy as np
from scipy import sparse
from datetime import datetime
from numba import njit

@njit
def deriv_logistic(p, b):
    """Derivative of the logistic loss"""
    p *= b
    if p > 0:
        phi = 1. / (1 + np.exp(-p))
    else:
        exp_t = np.exp(p)
        phi = exp_t / (1. + exp_t)
    return (phi - 1) * b


@njit
def prox_L1(x, gamma):
    """Proximal operator for the L1 norm"""
    return x

@njit
def _SPSAGA(x, memory_gradient, gradient_average, A_data, A_indices, A_indptr,
            b, alpha, beta, d, max_iter, n_samples, step_size, trace_x):
    sample_indices = np.arange(n_samples)

    # .. main iteration ..
    for it in range(max_iter):

        # .. keep track of coefficients for later plotting ..
        trace_x[it] = x

        np.random.shuffle(sample_indices)
        for i in sample_indices:
            p = 0.
            # .. sparse dot product ..
            for j in range(A_indptr[i], A_indptr[i+1]):
                j_idx = A_indices[j]
                p += x[j_idx] * A_data[j]

            grad_i = deriv_logistic(p, b[i])
            old_grad = memory_gradient[i]
            memory_gradient[i] = grad_i

            # .. update coefficients ..
            for j in range(A_indptr[i], A_indptr[i+1]):
                j_idx = A_indices[j]
                delta = (grad_i - old_grad) * A_data[j]
                incr = delta + d[j_idx] * (
                    gradient_average[j_idx] + alpha * x[j_idx])
                x[j_idx] = prox_L1(x[j_idx] - step_size * incr,
                                beta * step_size * d[j_idx])
                gradient_average[j_idx] += delta * A_data[j] / n_samples

    return x


def SPSAGA(A, b, alpha, beta, step_size, max_iter=100):
    """
    Solves an L1-regularized logistic regression problem of the form

         argmin_x logistic_loss + alpha * ||x||^2_2 + beta * ||x||_1
    """
    # .. make sure input has the correct tipe and ..
    # .. create temporary variables ..
    A = sparse.csr_matrix(A, dtype=np.float)
    n_samples, n_features = A.shape
    x = np.zeros(n_features)
    memory_gradient = np.zeros(n_samples)
    gradient_average = np.zeros(n_features)

    # .. to tracking execution time later ..
    start = datetime.now()
    trace_x = np.zeros((max_iter, n_features))

    # .. construct the diagonal D matrix ..
    B = A.copy()
    B.data[:] = 1
    d = np.array(B.mean(0)).ravel()
    idx = (d != 0)
    d[idx] = 1 / d[idx]

    # .. run SPSAGA iteration ..
    _SPSAGA(
        x, memory_gradient, gradient_average, A.data, A.indices, A.indptr,
            b, alpha, beta, d, max_iter, n_samples, step_size, trace_x)

    # .. there is no way to track execution time inside Numba code ..
    # .. but each iteration involves the same number of iterations ..
    # .. so we assume that each iteration has taken the same amount of time ..
    delta = (datetime.now() - start).total_seconds()
    trace_time = np.linspace(0, delta, max_iter)
    return x, trace_x, trace_time
	import numpy as np
	from scipy import sparse
	from datetime import datetime
	from numba import njit

	@njit
	def deriv_logistic(p, b):
	"""Derivative of the logistic loss"""
	p *= b
	if p > 0:
	phi = 1. / (1 + np.exp(-p))
	else:
	exp_t = np.exp(p)
	phi = exp_t / (1. + exp_t)
	return (phi - 1) * b


	@njit
	def prox_L1(x, gamma):
	"""Proximal operator for the L1 norm"""
	return x

	@njit
	def _SPSAGA(x, memory_gradient, gradient_average, A_data, A_indices, A_indptr,
	b, alpha, beta, d, max_iter, n_samples, step_size, trace_x):
	sample_indices = np.arange(n_samples)

	# .. main iteration ..
	for it in range(max_iter):

	# .. keep track of coefficients for later plotting ..
	trace_x[it] = x

	np.random.shuffle(sample_indices)
	for i in sample_indices:
	p = 0.
	# .. sparse dot product ..
	for j in range(A_indptr[i], A_indptr[i+1]):
	j_idx = A_indices[j]
	p += x[j_idx] * A_data[j]

	grad_i = deriv_logistic(p, b[i])
	old_grad = memory_gradient[i]
	memory_gradient[i] = grad_i

	# .. update coefficients ..
	for j in range(A_indptr[i], A_indptr[i+1]):
	j_idx = A_indices[j]
	delta = (grad_i - old_grad) * A_data[j]
	incr = delta + d[j_idx] * (
	gradient_average[j_idx] + alpha * x[j_idx])
	x[j_idx] = prox_L1(x[j_idx] - step_size * incr,
	beta * step_size * d[j_idx])
	gradient_average[j_idx] += delta * A_data[j] / n_samples

	return x


	def SPSAGA(A, b, alpha, beta, step_size, max_iter=100):
	"""
	Solves an L1-regularized logistic regression problem of the form

	argmin_x logistic_loss + alpha * \|\|x\|\|^2_2 + beta * \|\|x\|\|_1
	"""
	# .. make sure input has the correct tipe and ..
	# .. create temporary variables ..
	A = sparse.csr_matrix(A, dtype=np.float)
	n_samples, n_features = A.shape
	x = np.zeros(n_features)
	memory_gradient = np.zeros(n_samples)
	gradient_average = np.zeros(n_features)

	# .. to tracking execution time later ..
	start = datetime.now()
	trace_x = np.zeros((max_iter, n_features))

	# .. construct the diagonal D matrix ..
	B = A.copy()
	B.data[:] = 1
	d = np.array(B.mean(0)).ravel()
	idx = (d != 0)
	d[idx] = 1 / d[idx]

	# .. run SPSAGA iteration ..
	_SPSAGA(
	x, memory_gradient, gradient_average, A.data, A.indices, A.indptr,
	b, alpha, beta, d, max_iter, n_samples, step_size, trace_x)

	# .. there is no way to track execution time inside Numba code ..
	# .. but each iteration involves the same number of iterations ..
	# .. so we assume that each iteration has taken the same amount of time ..
	delta = (datetime.now() - start).total_seconds()
	trace_time = np.linspace(0, delta, max_iter)
	return x, trace_x, trace_time