Skip to content

Instantly share code, notes, and snippets.

@dwf
Created November 2, 2011 23:10
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dwf/1335246 to your computer and use it in GitHub Desktop.
Save dwf/1335246 to your computer and use it in GitHub Desktop.
My implementation of feature sign search for L1 minimization.
"""
L1-penalized minimization using the feature sign search algorithm.
"""
import logging
import numpy as np
log = logging.getLogger("feature_sign")
log.setLevel(logging.INFO)
def feature_sign_search(dictionary, signal, sparsity, solution=None):
"""
Solve an L1-penalized minimization problem with the feature
sign search algorithm of Lee et al (2006).
Parameters
----------
dictionary : array_like, 2-dimensional
The dictionary of basis functions from which to form the
sparse linear combination.
signal : array_like, 1-dimensional
The signal being decomposed as a sparse linear combination
of the columns of the dictionary.
sparsity : float
The coefficient on the L1 penalty term of the cost function.
solution : ndarray, 1-dimensional, optional
Pre-allocated vector to use to store the solution.
Returns
-------
solution : ndarray, 1-dimensional
Vector containing the solution. If an array was passed in
as the argument `solution`, it will be updated in place
and the same object will be returned.
References
----------
.. [1] H. Lee, A. Battle, R. Raina, and A. Y. Ng. "Efficient
sparse coding algorithms". Advances in Neural Information
Processing Systems 19, 2007.
"""
effective_zero = 1e-18
# precompute matrices for speed.
gram_matrix = np.dot(dictionary.T, dictionary)
target_correlation = np.dot(dictionary.T, signal)
# initialization goes here.
if solution is None:
solution = np.zeros(gram_matrix.shape[0])
else:
assert solution.ndim == 1, "solution must be 1-dimensional"
assert solution.shape[0] == dictionary.shape[1], (
"solution.shape[0] does not match dictionary.shape[1]"
)
# Initialize all elements to be zero.
solution[...] = 0.
signs = np.zeros(gram_matrix.shape[0], dtype=np.int8)
active_set = set()
z_opt = np.inf
# Used to store max(abs(grad[nzidx] + sparsity * signs[nzidx])).
# Set to 0 here to trigger a new feature activation on first iteration.
nz_opt = 0
# second term is zero on initialization.
grad = - 2 * target_correlation # + 2 * np.dot(gram_matrix, solution)
max_grad_zero = np.argmax(np.abs(grad))
# Just used to compute exact cost function.
sds = np.dot(signal.T, signal)
while z_opt > sparsity or not np.allclose(nz_opt, 0):
if np.allclose(nz_opt, 0):
candidate = np.argmax(np.abs(grad) * (signs == 0))
log.debug("candidate feature: %d" % candidate)
if grad[candidate] > sparsity:
signs[candidate] = -1.
solution[candidate] = 0.
log.debug("added feature %d with negative sign" %
candidate)
active_set.add(candidate)
elif grad[candidate] < -sparsity:
signs[candidate] = 1.
solution[candidate] = 0.
log.debug("added feature %d with positive sign" %
candidate)
active_set.add(candidate)
if len(active_set) == 0:
break
else:
log.debug("Non-zero coefficient optimality not satisfied, "
"skipping new feature activation")
indices = np.array(sorted(active_set))
restr_gram = gram_matrix[np.ix_(indices, indices)]
restr_corr = target_correlation[indices]
restr_sign = signs[indices]
rhs = restr_corr - sparsity * restr_sign / 2
# TODO: implement footnote 3.
#
# If restr_gram becomes singular, check if rhs is in the column
# space of restr_gram.
#
# If so, use the pseudoinverse instead; if not, update to first
# zero-crossing along any direction in the null space of restr_gram
# such that it has non-zero dot product with rhs (how to choose
# this direction?).
new_solution = np.linalg.solve(np.atleast_2d(restr_gram), rhs)
new_signs = np.sign(new_solution)
restr_oldsol = solution[indices]
sign_flips = np.where(abs(new_signs - restr_sign) > 1)[0]
if len(sign_flips) > 0:
best_obj = np.inf
best_curr = None
best_curr = new_solution
best_obj = (sds + (np.dot(new_solution,
np.dot(restr_gram, new_solution))
- 2 * np.dot(new_solution, restr_corr))
+ sparsity * abs(new_solution).sum())
if log.isEnabledFor(logging.DEBUG):
# Extra computation only done if the log-level is
# sufficient.
ocost = (sds + (np.dot(restr_oldsol,
np.dot(restr_gram, restr_oldsol))
- 2 * np.dot(restr_oldsol, restr_corr))
+ sparsity * abs(restr_oldsol).sum())
cost = (sds + np.dot(new_solution,
np.dot(restr_gram, new_solution))
- 2 * np.dot(new_solution, restr_corr)
+ sparsity * abs(new_solution).sum())
log.debug("Cost before linesearch (old)\t: %e" % ocost)
log.debug("Cost before linesearch (new)\t: %e" % cost)
else:
ocost = None
for idx in sign_flips:
a = new_solution[idx]
b = restr_oldsol[idx]
prop = b / (b - a)
curr = restr_oldsol - prop * (restr_oldsol - new_solution)
cost = sds + (np.dot(curr, np.dot(restr_gram, curr))
- 2 * np.dot(curr, restr_corr)
+ sparsity * abs(curr).sum())
log.debug("Line search coefficient: %.5f cost = %e "
"zero-crossing coefficient's value = %e" %
(prop, cost, curr[idx]))
if cost < best_obj:
best_obj = cost
best_prop = prop
best_curr = curr
log.debug("Lowest cost after linesearch\t: %e" % best_obj)
if ocost is not None:
if ocost < best_obj and not np.allclose(ocost, best_obj):
log.debug("Warning: objective decreased from %e to %e" %
(ocost, best_obj))
else:
log.debug("No sign flips, not doing line search")
best_curr = new_solution
solution[indices] = best_curr
zeros = indices[np.abs(solution[indices]) < effective_zero]
solution[zeros] = 0.
signs[indices] = np.int8(np.sign(solution[indices]))
active_set.difference_update(zeros)
grad = - 2 * target_correlation + 2 * np.dot(gram_matrix, solution)
z_opt = np.max(abs(grad[signs == 0]))
nz_opt = np.max(abs(grad[signs != 0] + sparsity * signs[signs != 0]))
return solution
@hungmat
Copy link

hungmat commented Feb 23, 2018

The code works very well, thank you so much

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment