Skip to content

Instantly share code, notes, and snippets.

@timvieira
Created February 18, 2017 20:18
Show Gist options
  • Save timvieira/1967ee24d01e4dae8f54f1e145bb61eb to your computer and use it in GitHub Desktop.
Save timvieira/1967ee24d01e4dae8f54f1e145bb61eb to your computer and use it in GitHub Desktop.
Cartoon version of Jiawei's optimization problem.
"""
Cartoon version of Jiawei's optimization problem.
Created [2017-02-17 Fri]
"""
import numpy as np
from scipy.optimize import fmin_bfgs
import autograd
from autograd import numpy as np
def norm(w):
return np.sqrt(np.sum(w*w))
def group_lasso(G, w):
return np.sum([norm(w[g]) for g in G])
def prox(G, w, threshold):
for g in G:
z = norm(w[g])
if z <= threshold:
w[g] = 0
else:
w[g] *= (z - threshold) / z
def l1_prox(w, threshold):
z = np.abs(w)
y = z <= threshold
w[y] = 0
w[~y] *= (z[~y] - threshold) / z[~y]
def L(loss, G, beta, delta, c):
gamma = np.log(1+np.exp(beta))
return (loss(gamma * delta)
+ c[0] * gamma.sum()
+ c[1] * np.abs(beta).sum()
+ c[2] * group_lasso(G, delta)
)
def L_no_regularizer(loss, beta, delta, c):
gamma = np.log(1+np.exp(beta))
return (loss(gamma * delta)
+ c[0] * gamma.sum()
)
def main():
np.random.seed(1234)
D = 10
c = np.random.uniform(0,5,size=3)
G = [[i] for i in range(D)]
# Some arbitrary convex loss... here we pick a random quadratic
magic = np.ones(D) # unregularized opt is beta = magic.
A = np.random.uniform(-1,1,size=(D,D))
A = A.dot(A.T) # random PSD matrix
def loss(Delta):
diff = (Delta - magic)
return np.dot(np.dot(diff.T, A), diff)
def obj_reg(w):
return L(loss, G, w[:D], w[D:], c)
def obj_smooth(w):
return L_no_regularizer(loss, w[:D], w[D:], c)
grad_reg = autograd.grad(obj_reg)
grad_smooth = autograd.grad(obj_smooth)
# Initialization
w0 = np.random.uniform(-1,1,size=2*D)
if 1:
print '#__________________________'
print '# L-BFGS'
opt = fmin_bfgs(obj_reg, w0, fprime=grad_reg, disp=-1)
#print opt
if 1:
print '#__________________________'
print '# Proximal gradient descent'
w = w0*1
a = 0.001 # learning rate
prox_freq = 1 # prox update frequency
print_freq = 250
iterations = 5000
for t in range(1, iterations):
g = grad_smooth(w)
w -= a * g
if t % prox_freq == 0:
l1_prox(w[:D], prox_freq*a*c[1]) # l1 on beta
prox(G, w[D:], prox_freq*a*c[2]) # group lasso on delta
if t % print_freq == 0:
print '%4d %.2f' % (t, obj_reg(w))
if 1:
print '#__________________________'
print '# Subgradient descent'
w = w0*1
a = 0.001 # learning rate
prox_freq = 1 # prox update frequency
print_freq = 250
iterations = 5000
for t in range(1, iterations):
g = grad_reg(w)
w -= a * g
if t % print_freq == 0:
print '%4d %.2f' % (t, obj_reg(w))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment