Skip to content

Instantly share code, notes, and snippets.

@fabianp
Created August 21, 2017 22:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabianp/4d4a7b0b1c4bcbc129fbc91fa0dfccf7 to your computer and use it in GitHub Desktop.
Save fabianp/4d4a7b0b1c4bcbc129fbc91fa0dfccf7 to your computer and use it in GitHub Desktop.
Sparse Proximal SAGA
import numpy as np
from scipy import sparse
from datetime import datetime
from numba import njit
@njit
def deriv_logistic(p, b):
"""Derivative of the logistic loss"""
p *= b
if p > 0:
phi = 1. / (1 + np.exp(-p))
else:
exp_t = np.exp(p)
phi = exp_t / (1. + exp_t)
return (phi - 1) * b
@njit
def prox_L1(x, gamma):
"""Proximal operator for the L1 norm"""
return x
@njit
def _SPSAGA(x, memory_gradient, gradient_average, A_data, A_indices, A_indptr,
b, alpha, beta, d, max_iter, n_samples, step_size, trace_x):
sample_indices = np.arange(n_samples)
# .. main iteration ..
for it in range(max_iter):
# .. keep track of coefficients for later plotting ..
trace_x[it] = x
np.random.shuffle(sample_indices)
for i in sample_indices:
p = 0.
# .. sparse dot product ..
for j in range(A_indptr[i], A_indptr[i+1]):
j_idx = A_indices[j]
p += x[j_idx] * A_data[j]
grad_i = deriv_logistic(p, b[i])
old_grad = memory_gradient[i]
memory_gradient[i] = grad_i
# .. update coefficients ..
for j in range(A_indptr[i], A_indptr[i+1]):
j_idx = A_indices[j]
delta = (grad_i - old_grad) * A_data[j]
incr = delta + d[j_idx] * (
gradient_average[j_idx] + alpha * x[j_idx])
x[j_idx] = prox_L1(x[j_idx] - step_size * incr,
beta * step_size * d[j_idx])
gradient_average[j_idx] += delta * A_data[j] / n_samples
return x
def SPSAGA(A, b, alpha, beta, step_size, max_iter=100):
"""
Solves an L1-regularized logistic regression problem of the form
argmin_x logistic_loss + alpha * ||x||^2_2 + beta * ||x||_1
"""
# .. make sure input has the correct tipe and ..
# .. create temporary variables ..
A = sparse.csr_matrix(A, dtype=np.float)
n_samples, n_features = A.shape
x = np.zeros(n_features)
memory_gradient = np.zeros(n_samples)
gradient_average = np.zeros(n_features)
# .. to tracking execution time later ..
start = datetime.now()
trace_x = np.zeros((max_iter, n_features))
# .. construct the diagonal D matrix ..
B = A.copy()
B.data[:] = 1
d = np.array(B.mean(0)).ravel()
idx = (d != 0)
d[idx] = 1 / d[idx]
# .. run SPSAGA iteration ..
_SPSAGA(
x, memory_gradient, gradient_average, A.data, A.indices, A.indptr,
b, alpha, beta, d, max_iter, n_samples, step_size, trace_x)
# .. there is no way to track execution time inside Numba code ..
# .. but each iteration involves the same number of iterations ..
# .. so we assume that each iteration has taken the same amount of time ..
delta = (datetime.now() - start).total_seconds()
trace_time = np.linspace(0, delta, max_iter)
return x, trace_x, trace_time
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment