Created
November 2, 2011 23:10
-
-
Save dwf/1335246 to your computer and use it in GitHub Desktop.
My implementation of feature sign search for L1 minimization.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
L1-penalized minimization using the feature sign search algorithm. | |
""" | |
import logging | |
import numpy as np | |
log = logging.getLogger("feature_sign") | |
log.setLevel(logging.INFO) | |
def feature_sign_search(dictionary, signal, sparsity, solution=None): | |
""" | |
Solve an L1-penalized minimization problem with the feature | |
sign search algorithm of Lee et al (2006). | |
Parameters | |
---------- | |
dictionary : array_like, 2-dimensional | |
The dictionary of basis functions from which to form the | |
sparse linear combination. | |
signal : array_like, 1-dimensional | |
The signal being decomposed as a sparse linear combination | |
of the columns of the dictionary. | |
sparsity : float | |
The coefficient on the L1 penalty term of the cost function. | |
solution : ndarray, 1-dimensional, optional | |
Pre-allocated vector to use to store the solution. | |
Returns | |
------- | |
solution : ndarray, 1-dimensional | |
Vector containing the solution. If an array was passed in | |
as the argument `solution`, it will be updated in place | |
and the same object will be returned. | |
References | |
---------- | |
.. [1] H. Lee, A. Battle, R. Raina, and A. Y. Ng. "Efficient | |
sparse coding algorithms". Advances in Neural Information | |
Processing Systems 19, 2007. | |
""" | |
effective_zero = 1e-18 | |
# precompute matrices for speed. | |
gram_matrix = np.dot(dictionary.T, dictionary) | |
target_correlation = np.dot(dictionary.T, signal) | |
# initialization goes here. | |
if solution is None: | |
solution = np.zeros(gram_matrix.shape[0]) | |
else: | |
assert solution.ndim == 1, "solution must be 1-dimensional" | |
assert solution.shape[0] == dictionary.shape[1], ( | |
"solution.shape[0] does not match dictionary.shape[1]" | |
) | |
# Initialize all elements to be zero. | |
solution[...] = 0. | |
signs = np.zeros(gram_matrix.shape[0], dtype=np.int8) | |
active_set = set() | |
z_opt = np.inf | |
# Used to store max(abs(grad[nzidx] + sparsity * signs[nzidx])). | |
# Set to 0 here to trigger a new feature activation on first iteration. | |
nz_opt = 0 | |
# second term is zero on initialization. | |
grad = - 2 * target_correlation # + 2 * np.dot(gram_matrix, solution) | |
max_grad_zero = np.argmax(np.abs(grad)) | |
# Just used to compute exact cost function. | |
sds = np.dot(signal.T, signal) | |
while z_opt > sparsity or not np.allclose(nz_opt, 0): | |
if np.allclose(nz_opt, 0): | |
candidate = np.argmax(np.abs(grad) * (signs == 0)) | |
log.debug("candidate feature: %d" % candidate) | |
if grad[candidate] > sparsity: | |
signs[candidate] = -1. | |
solution[candidate] = 0. | |
log.debug("added feature %d with negative sign" % | |
candidate) | |
active_set.add(candidate) | |
elif grad[candidate] < -sparsity: | |
signs[candidate] = 1. | |
solution[candidate] = 0. | |
log.debug("added feature %d with positive sign" % | |
candidate) | |
active_set.add(candidate) | |
if len(active_set) == 0: | |
break | |
else: | |
log.debug("Non-zero coefficient optimality not satisfied, " | |
"skipping new feature activation") | |
indices = np.array(sorted(active_set)) | |
restr_gram = gram_matrix[np.ix_(indices, indices)] | |
restr_corr = target_correlation[indices] | |
restr_sign = signs[indices] | |
rhs = restr_corr - sparsity * restr_sign / 2 | |
# TODO: implement footnote 3. | |
# | |
# If restr_gram becomes singular, check if rhs is in the column | |
# space of restr_gram. | |
# | |
# If so, use the pseudoinverse instead; if not, update to first | |
# zero-crossing along any direction in the null space of restr_gram | |
# such that it has non-zero dot product with rhs (how to choose | |
# this direction?). | |
new_solution = np.linalg.solve(np.atleast_2d(restr_gram), rhs) | |
new_signs = np.sign(new_solution) | |
restr_oldsol = solution[indices] | |
sign_flips = np.where(abs(new_signs - restr_sign) > 1)[0] | |
if len(sign_flips) > 0: | |
best_obj = np.inf | |
best_curr = None | |
best_curr = new_solution | |
best_obj = (sds + (np.dot(new_solution, | |
np.dot(restr_gram, new_solution)) | |
- 2 * np.dot(new_solution, restr_corr)) | |
+ sparsity * abs(new_solution).sum()) | |
if log.isEnabledFor(logging.DEBUG): | |
# Extra computation only done if the log-level is | |
# sufficient. | |
ocost = (sds + (np.dot(restr_oldsol, | |
np.dot(restr_gram, restr_oldsol)) | |
- 2 * np.dot(restr_oldsol, restr_corr)) | |
+ sparsity * abs(restr_oldsol).sum()) | |
cost = (sds + np.dot(new_solution, | |
np.dot(restr_gram, new_solution)) | |
- 2 * np.dot(new_solution, restr_corr) | |
+ sparsity * abs(new_solution).sum()) | |
log.debug("Cost before linesearch (old)\t: %e" % ocost) | |
log.debug("Cost before linesearch (new)\t: %e" % cost) | |
else: | |
ocost = None | |
for idx in sign_flips: | |
a = new_solution[idx] | |
b = restr_oldsol[idx] | |
prop = b / (b - a) | |
curr = restr_oldsol - prop * (restr_oldsol - new_solution) | |
cost = sds + (np.dot(curr, np.dot(restr_gram, curr)) | |
- 2 * np.dot(curr, restr_corr) | |
+ sparsity * abs(curr).sum()) | |
log.debug("Line search coefficient: %.5f cost = %e " | |
"zero-crossing coefficient's value = %e" % | |
(prop, cost, curr[idx])) | |
if cost < best_obj: | |
best_obj = cost | |
best_prop = prop | |
best_curr = curr | |
log.debug("Lowest cost after linesearch\t: %e" % best_obj) | |
if ocost is not None: | |
if ocost < best_obj and not np.allclose(ocost, best_obj): | |
log.debug("Warning: objective decreased from %e to %e" % | |
(ocost, best_obj)) | |
else: | |
log.debug("No sign flips, not doing line search") | |
best_curr = new_solution | |
solution[indices] = best_curr | |
zeros = indices[np.abs(solution[indices]) < effective_zero] | |
solution[zeros] = 0. | |
signs[indices] = np.int8(np.sign(solution[indices])) | |
active_set.difference_update(zeros) | |
grad = - 2 * target_correlation + 2 * np.dot(gram_matrix, solution) | |
z_opt = np.max(abs(grad[signs == 0])) | |
nz_opt = np.max(abs(grad[signs != 0] + sparsity * signs[signs != 0])) | |
return solution |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The code works very well, thank you so much