Skip to content

Instantly share code, notes, and snippets.

@yiboyang
Created June 26, 2019 20:56
Show Gist options
  • Save yiboyang/9f4bb5075449146c831b2e5c574fc187 to your computer and use it in GitHub Desktop.
Save yiboyang/9f4bb5075449146c831b2e5c574fc187 to your computer and use it in GitHub Desktop.
Algorithm demos for Numerical Optimization by Nocedal and Wright.
# active_set_for_convex_qp.py
# Yibo Yang
# 04/16/2019
# Algorithm 16.3 (active-set method for convex QP) in Numerical Optimization
import numpy as np
from numpy.linalg import solve, lstsq
printing_count_from_1 = 1 # set to 1 to match textbook numbering; 0 to use python numbering
def equality_constrained_qp(G, c, A, b):
"""
Solve min_x q(x) = 1/2 x^T G x + x^T c, subject to Ax = b
Assuming Z^T G Z is positive definite.
Used for solving (16.39) subproblem.
:param G: nxn
:param c:
:param A: m x n, should have full row rank
:param b: m vec
:return:
"""
if len(A) > 0:
[m, n] = A.shape
K = np.zeros([m + n, m + n]) # KKT matrix
K[:n, :n] = G
K[n:, :n] = A
K[:n, n:] = -A.T
rhs = np.hstack([-c, b])
sol = solve(K, rhs)
x = sol[:n]
lam = sol[n:]
else: # unconstrained; simply set gradient to zero
x, _, _, _ = lstsq(G, -c, rcond=None) # use lstsq instead of solve in case G is only positive semidefinite
return x
def active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0=None, max_its=100):
"""
Solve min_x q(x) = 1/2 x^T G x + x^T c, subject to A1 x = b1, A2 x >= b2
:param G:
:param c:
:param A1: m1 x n
:param b1:
:param A2: m2 x n
:param b2:
:param x0: initial primal var (feasible)
:param W0: initial working set (by default empty)
:return:
"""
m1 = len(A1)
m2 = len(A2)
if W0 is None:
# active_ineq_cons_idx = list(np.where(A1 @ x0 == b1)[0]) # subset of 0, 1, ..., m1-1
# eq_cons_idx = list(range(m1, m1 + m2)) # eq cons always active; m1, m1+1, ..., m1+m2-1
# W0 = active_ineq_cons_idx + eq_cons_idx # by default initialize working set to all active constraints at x0
W0 = []
W = W0
A12 = np.vstack([A1, A2]) # block matrix of all constraints Jacobian, [A1; A2]
b12 = np.hstack([b1, b2])
x = x0
for k in range(max_its):
print(f'iteration {k}')
print(f'x_k={x}')
print(f'W_k={[i+printing_count_from_1 for i in W]}')
# solve (16.39) to find p_k
g = G @ x + c
A_ = np.array([A12[i] for i in W])
# A_ = np.matrix([A12[i] for i in W])
b_ = np.zeros(len(A_))
p = equality_constrained_qp(G, g, A_, b_)
print(f'p_k={p}')
map_idx_in_W_to_cons_id = {idx: i for (idx, i) in enumerate(W)}
if np.all(np.isclose(p, 0)):
# compute Larange multipliers \hat \lambda_i that satisfy (16.42)
try:
lam_hat = solve(A_.T, g) # (16.42)
except:
# if overdetermined (e.g., lone Lagrange multiplier), np.linalg.solve will throw
# "numpy.linalg.linalg.LinAlgError: Last 2 dimensions of the array must be square"
lam_hat, _, _, _ = lstsq(A_.T, g, rcond=None)
W_intersect_I = [i for i in W if i < m1] # cons ids in W that correspond to ineq cons
# lam_hat_ineq_cons = [(i, lam) in enumerate(lam_hat) if map_idx_in_W_to_cons_id[i] < m1]
lam_hat_I = [lam for (j, lam) in enumerate(lam_hat) if map_idx_in_W_to_cons_id[j] in W_intersect_I]
print(f'\hat lambda corresponding to inequality constraints={lam_hat_I}')
lam_hat_I = np.array(lam_hat_I)
if np.all(lam_hat_I >= 0):
print(f'\hat lambda corresponding to inequality constraints all positive; returning')
print(f'optimal x={x}')
return x
else:
j = W_intersect_I[np.argmin(lam_hat_I)]
W.remove(j)
print(f'removed constraint {j+printing_count_from_1} from working set')
else: # p_k != 0
x_plus_p = x + p
feasible = True
for j in range(m1):
if j not in W and np.dot(A1[j], x_plus_p) < b1[j]: # only need to check ineq cons that're not in W
feasible = False
break
if feasible: # no blocking cons; alpha = 1
print('no blocking constraint')
x = x_plus_p
else:
blocking_cons = None
alpha = np.inf
for i in range(m1 + m2):
ai_dot_p = np.dot(A12[i], p) # ai^T p
if i not in W and ai_dot_p < 0:
alpha_prev = alpha
alpha = min(alpha, (b12[i] - np.dot(A12[i], x)) / ai_dot_p)
if alpha < alpha_prev:
blocking_cons = i
x = x + alpha * p
W += [blocking_cons]
print(f'added blocking constraint {blocking_cons+printing_count_from_1} to working set')
print()
return x
dtype = 'float'
n = 2 # number of variables
# textbook example 16.4
# G = np.array([[2, 0], [0, 2]], dtype=dtype)
# c = np.array([-2, -5], dtype=dtype)
# A1 = np.array([[1, -2],
# [-1, -2],
# [-1, 2],
# [1, 0],
# [0, 1]], dtype=dtype)
# b1 = np.array([-2, -6, -2, 0, 0], dtype=dtype)
# A2 = np.zeros([0, n], dtype=dtype)
# b2 = np.array([], dtype=dtype)
#
# x0 = np.array([2, 0], dtype=dtype)
# # res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0)
#
# # let initial working set be {3} in textbook (note Python counts from 0)
# W0 = [2]
# print(W0)
# res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0)
#
# # let initial working set be {5} in textbook (note Python counts from 0)
# W0 = [4]
# print(W0)
# res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0)
#
# # let initial working set be empty
# W0 = []
# print(W0)
# res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0, W0)
# problem 16.11
G = np.array([[1, -1], [-1, 2]], dtype=dtype)
c = np.array([-2, -6], dtype=dtype)
A1 = np.array([[-1 / 2, -1 / 2],
[1, -2],
[1, 0],
[0, 1]], dtype=dtype)
b1 = np.array([-1, -2, 0, 0], dtype=dtype)
A2 = np.zeros([0, n], dtype=dtype)
b2 = np.array([], dtype=dtype)
x0 = np.array([1 / 2, 1 / 2], dtype=dtype)
print(f'starting at interior point x0={x0}')
res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0)
print()
x0 = np.array([2, 0], dtype=dtype)
print(f'starting at a vertex x0={x0}')
res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0)
print()
x0 = np.array([1, 0], dtype=dtype)
print(f'starting at a non-vertex boundary point of feasible set x0={x0}')
res = active_set_cvx_qp(G, c, A1, b1, A2, b2, x0)
print()
# basic_interior_point.py
# Yibo Yang
# 05/01/2019
# Solving problem 18.69, p. 562 of Numerical Optimization. Since this problem has no inequality constraints, there's
# no non-negativitiy condition to enforce in algorithm 19.1, and algorithm 19.1 (which directly solves the Newton-KKT
# system) reduces to algorithm 18.1, p. 532.
import numpy as np
from numpy.linalg import solve, lstsq, norm
def numerical_gradient(f, x, eps=1e-5):
# compute numerical gradient using centered finite-difference
x = np.asarray(x, dtype=float)
n = len(x)
grad = np.empty(n)
for i in range(n):
x[i] += eps
f1 = f(x)
x[i] -= 2 * eps
f2 = f(x)
grad[i] = (f1 - f2) / (2 * eps)
x[i] += eps # restore
return grad
# f = lambda x: np.sum(x ** 2)
# x = np.array([0., 1., 2.])
# print(numerical_gradient(f, x))
def solve_newton(L_hess, grad_f, A, c, x, lam):
# solve the Newton-KKT system
n = len(x)
m = len(lam)
mat = np.zeros([m + n, m + n])
mat[:n, :n] = L_hess
mat[:n, -m:] = -A.T
mat[-m:, :n] = A
b = np.zeros([m + n])
b[:n] = - grad_f + A.T @ lam
b[-m:] = -c
p = solve(mat, b)
p_x = p[:n]
p_lam = p[-m:]
return p_x, p_lam
# problem 18.69, p. 562
x0 = np.array([-1.71, 1.59, 1.82, -0.763, -0.763])
# x_opt = np.array([-1.8, 1.7, 1.9, -.8, -.8]) # not actual optimal; constraints not satisfied!
tol = 1e-10
max_its = 50
x = x0
n = len(x)
m = 3
lam = np.zeros(m)
def f(x):
return np.exp(np.prod(x)) - 0.5 * (x[0] ** 3 + x[1] ** 3 + 1) ** 2
grad_f_symbolic = [None] * n
grad_f_symbolic[0] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[0] - 3 * x[0] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1)
grad_f_symbolic[1] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[1] - 3 * x[1] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1)
grad_f_symbolic[2] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[2]
grad_f_symbolic[3] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[3]
grad_f_symbolic[4] = lambda x: np.exp(np.prod(x)) * np.prod(x) / x[4]
grad_c_symbolic = [None] * m
grad_c_symbolic[0] = lambda x: 2 * x
grad_c_symbolic[1] = lambda x: np.array([0., x[2], x[1], -5 * x[4], -5 * x[3]])
grad_c_symbolic[2] = lambda x: np.array([3 * x[0] ** 2, 3 * x[1] ** 2, 0, 0, 0.])
for it in range(max_its):
print('iteration k =', it + 1)
print('x_k =', x)
print('f_k =', f(x))
prod = np.prod(x)
grad_f = np.exp(prod) * prod / x
grad_f[0] -= 3 * x[0] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1)
grad_f[1] -= 3 * x[1] ** 2 * (x[0] ** 3 + x[1] ** 3 + 1)
A = np.zeros([m, n])
# A[0] = 2 * x
# A[1] = np.array([0, x[2], x[1], -5 * x[4], -5 * x[3]])
# A[2] = np.array([3 * x[0] * 2, 3 * x[1] * 2, 0, 0, 0])
for j in range(m):
A[j] = grad_c_symbolic[j](x)
c = np.zeros([m])
c[0] = np.sum(x ** 2) - 10
c[1] = x[1] * x[2] - 5 * x[3] * x[4]
c[2] = x[0] ** 3 + x[1] ** 3 + 1
def dL_dxi(x, i):
res = 0
for j in range(m):
res -= lam[j] * grad_c_symbolic[j](x)[i]
res += grad_f_symbolic[i](x)
return res
L_hess = np.empty([n, n])
for i in range(n):
L_hess[i] = numerical_gradient(lambda x: dL_dxi(x, i), x)
p_x, p_lam = solve_newton(L_hess, grad_f, A, c, x, lam)
x_prev = x.copy()
x += p_x
lam += p_lam
if norm(x - x_prev) < tol:
print('||x_{k+1} - x_k|| < %g, terminating' % tol)
break
print()
print('final solution x=', x)
c = np.zeros([m])
c[0] = np.sum(x ** 2) - 10
c[1] = x[1] * x[2] - 5 * x[3] * x[4]
c[2] = x[0] ** 3 + x[1] ** 3 + 1
for i in range(m):
print('c_%d(x) =' % (i + 1), c[i])
#############################################################
# Execution Printout:
# iteration k = 1
# x_k = [-1.71 1.59 1.82 -0.763 -0.763]
# f_k = 0.0559001518641
# iteration k = 2
# x_k = [-1.71644697 1.59488994 1.82858782 -0.76372206 -0.76372206]
# f_k = 0.0539464669897
# iteration k = 3
# x_k = [-1.71714261 1.59570867 1.82724803 -0.76364346 -0.76364346]
# f_k = 0.0539496822767
# iteration k = 4
# x_k = [-1.71714357 1.59570969 1.82724575 -0.76364308 -0.76364308]
# f_k = 0.0539498477699
# ||x_{k+1} - x_k|| < 1e-10, terminating
#
# final solution x= [-1.71714357 1.59570969 1.82724575 -0.76364308 -0.76364308]
# c_1(x) = 3.5527136788e-15
# c_2(x) = -8.881784197e-16
# c_3(x) = 1.7763568394e-14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment