Last active
April 18, 2022 23:44
-
-
Save youheiakimoto/15212dbf46dc546af20af38b0b48ff17 to your computer and use it in GitHub Desktop.
Saddle Point Optimization with Approximate Minimization Oracle
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import stats | |
from scipy.optimize import minimize | |
"""Saddle Point Optimization with Approximate Minimization Oracle | |
Adversarial Evolution Strategy : Zero-order saddle point optimizer | |
Adversarial SLSQP : First-order saddle point optimizer | |
Its extended version is available at (https://gist.github.com/youheiakimoto/ab51e88c73baf68effd95b750100aad0). | |
Reference | |
--------- | |
Youhei Akimoto. 2021. | |
Saddle point optimization with approximate minimization oracle. | |
In Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '21). | |
Association for Computing Machinery, New York, NY, USA, 493–501. | |
DOI:https://doi.org/10.1145/3449639.3459266 | |
""" | |
def aslsqp_fixed(f, df, x0, y0, eta, tau, nstep, maxeval, minitr, tolgap=0): | |
"""Adversarial SLSQP with Fixed Learning Rate | |
Parameters | |
---------- | |
f : callable | |
objective | |
df : callable or None | |
derivative of objective | |
numerical approximation is used if None | |
x0, y0 : ndarray (1d) | |
initial x and y | |
eta : float | |
learning rate | |
tau : int | |
coefficient determining the number of successful steps | |
nstep : int | |
number of steps | |
maxeval : int | |
number of f-calls | |
minitr : int | |
minimum iterations | |
tolgap : float, optional | |
tolerance for approximate suboptimality error | |
Returns | |
------- | |
x, y (ndarray, 1D) : solutions | |
res (ndarray, 2D) : log | |
res[t] = neval, f(x(t), y(t)), f(x', yt), f(xt, y'), ndfx, ndfy, x(t+1), y(t+1), eta(t+1) | |
""" | |
res = np.zeros((nstep, 7+len(x0)+len(y0))) | |
neval = 0 | |
ndfx = ndfy = 0 | |
x = np.copy(x0) | |
y = np.copy(y0) | |
for i in range(nstep): | |
fxy = f(x, y) | |
# Oracle Calls | |
fx = lambda z: f(z, y) | |
fy = lambda z: -f(x, z) | |
if df is None: | |
xres = minimize(fx, x, method="SLSQP", options={'maxiter':tau}) | |
yres = minimize(fy, y, method="SLSQP", options={'maxiter':tau}) | |
else: | |
gx = lambda z: df(z, y)[:len(x)] | |
gy = lambda z: -df(x, z)[len(x):] | |
xres = minimize(fx, x, method="SLSQP", jac=gx, options={'maxiter':tau}) | |
yres = minimize(fy, y, method="SLSQP", jac=gy, options={'maxiter':tau}) | |
xp = xres.x | |
fx = xres.fun | |
nx = xres.nfev | |
ndx = xres.njev | |
yp = yres.x | |
fy = - yres.fun | |
ny = yres.nfev | |
ndy = yres.njev | |
# Update | |
x += eta * (xp - x) | |
y += eta * (yp - y) | |
neval += nx + ny | |
ndfx += ndx | |
ndfy += ndy | |
res[i, 0] = neval | |
res[i, 1] = fxy | |
res[i, 2] = fx | |
res[i, 3] = fy | |
res[i, 4] = ndfx | |
res[i, 5] = ndfy | |
res[i, 6:6+len(x)] = x | |
res[i, 6+len(x):6+len(x)+len(y)] = y | |
res[i, -1] = eta | |
if neval >= maxeval: | |
break | |
if res[i, 3] - res[i, 2] < tolgap: | |
break | |
if i >= minitr - 1: | |
gap = res[i+1-minitr:i+1, 3] - res[i+1-minitr:i+1:, 2] | |
if minitr > 1 and np.all(gap[1:minitr] - gap[0:minitr-1] > 0): | |
break | |
return x, y, res[:i+1] | |
def aslsqp(f, df, x0, y0, etamin, maxeval, tolgap=1e-6, tau=5, aeta=1, beta=5, ceta=1.1): | |
"""Adversarial Evolution Strategy with Learning Rate Adaptation | |
Parameters | |
---------- | |
f : callable | |
objective | |
df : callable or None | |
derivative of objective | |
numerical approximation is used if None | |
x0, y0 : ndarray (1d) | |
initial x and y | |
etamax : float | |
maximal learning rate | |
maxeval : int | |
number of f-calls | |
tolgap : float, optional | |
tolerance for approximate suboptimality error | |
tau : int, optional | |
coefficient determining the number of successful steps | |
aeta, beta : int, optional | |
number of iterations to estimate the slope is aeta / eta + beta | |
ceta : float, > 1, optional | |
factor to increase and decrease the learning rate | |
Returns | |
------- | |
x, y (ndarray, 1D) : solutions | |
res (ndarray, 2D) : log | |
res[t] = neaval, f(x(t), y(t)), f(x', yt), f(xt, y'), sx(t+1), sy(1+1), x(t+1), y(t+1), eta(t+1) | |
""" | |
res = np.zeros((0, 7+len(x0)+len(y0))) | |
neval = 0 | |
ndfx = ndfy = 0 | |
x = np.copy(x0) | |
y = np.copy(y0) | |
eta = 1.0 | |
slp = 0.0 | |
# Main Loop | |
while neval < maxeval: | |
u = np.random.rand() | |
if u < 1/3: | |
eta1 = eta * ceta | |
elif 1/3 <= u < 2/3: | |
eta1 = eta | |
else: | |
eta1 = eta / ceta | |
eta1 = np.clip(eta1, etamin, 1) | |
nstep = beta + int(aeta/eta1) | |
x1, y1, res1 = aslsqp_fixed(f, df, x, y, eta, tau, nstep, maxeval, beta, tolgap) | |
res1[:, 0] += neval | |
res1[:, 4] += ndfx | |
res1[:, 5] += ndfy | |
res = np.vstack((res, res1)) | |
neval = res1[-1, 0] | |
ndfx = res1[-1, 4] | |
ndfy = res1[-1, 5] | |
xarr = np.arange(res1.shape[0]) | |
slp1, _, _, _, std1 = stats.linregress(xarr, np.log(res1[:, 3] - res1[:, 2])) | |
if slp1 - 2*std1 < 0: | |
x = x1 | |
y = y1 | |
if slp >= 0 and slp1 >= 0: | |
eta = np.clip(eta / ceta**3, etamin, 1) | |
slp = 0.0 | |
elif slp1 <= slp: | |
eta = eta1 | |
slp = slp1 | |
elif eta == eta1: | |
slp = slp1 | |
if res[-1, 3] - res[-1, 2] < tolgap: | |
break | |
return x, y, res | |
def esstep(h, z, sz, hz): | |
"""Evolution Strategy (Randomized Hill-Climbing) | |
Parameters | |
---------- | |
h : callable | |
function to be minimized | |
z : ndarray (1d) | |
initial solution | |
sz : float, > 0 | |
step-size | |
hz : float | |
h(z) | |
Returns | |
------- | |
z (ndarray, 1d) : next solution | |
sz (float) : next step-size | |
hz (float) : h(z) | |
success (bool) : whether the step is successful | |
""" | |
Nz = len(z) | |
aup = np.exp(1.0/np.sqrt(2*Nz)) | |
adown = aup**(-0.25) | |
ztt = z + sz * np.random.randn(Nz) | |
hztt = h(ztt) | |
if hztt > hz: | |
return z, sz * adown, hz, False | |
else: | |
return ztt, sz * aup, hztt, True | |
def es_minimize(f, x, y, sx, sy, fxy, tau): | |
"""Locate approximate solutions to min_{xt} f(xt, y) and max_{yt} f(x, yt) | |
Parameters | |
---------- | |
f : callable | |
objective | |
x, y : ndarray (1d) | |
x and y | |
sx, sy : float, > 0 | |
initial step-size for x and y updates | |
fxy : float | |
f(x, y) value | |
tau : int | |
coefficient determining the number of successful steps | |
Returns | |
------- | |
xt, yt (ndarray, 1d) : approximate solutions | |
fx, fy (float) : f(xt, y) and f(x, yt) | |
sx, sy (float) : step-size for x and y updates | |
neval_x, neval_y (int) : number of f-calls for x and y updates | |
""" | |
Nx = len(x) | |
nsucc_x = 0 | |
neval_x = 0 | |
xt = np.copy(x) | |
fx = fxy | |
while nsucc_x < Nx * tau: | |
xt, sx, fx, succ = esstep(lambda z: f(z, y), xt, sx, fx) | |
nsucc_x += int(succ) | |
neval_x += 1 | |
Ny = len(y) | |
nsucc_y = 0 | |
neval_y = 0 | |
yt = np.copy(y) | |
fy = -fxy | |
while nsucc_y < Ny * tau: | |
yt, sy, fy, succ = esstep(lambda z: -f(x, z), yt, sy, fy) | |
nsucc_y += int(succ) | |
neval_y += 1 | |
return xt, yt, fx, -fy, sx, sy, neval_x, neval_y | |
def aes_fixed(f, x0, y0, eta, sx0, sy0, sxmax, symax, tau, nstep, maxeval, minitr, tolgap=0): | |
"""Adversarial ES with Fixed Learning Rate | |
Parameters | |
---------- | |
f : callable | |
objective | |
x0, y0 : ndarray (1d) | |
initial x and x | |
eta : float | |
learning rate | |
sx0, sy0 : float, > 0 | |
initial step-size | |
sxmax, symax : float, > 0 | |
maximal step-size | |
tau : int | |
coefficient determining the number of successful steps | |
nstep : int | |
number of steps | |
maxeval : int | |
number of f-calls | |
minitr : int | |
minimum iterations | |
tolgap : float, optional | |
tolerance for approximate suboptimality error | |
Returns | |
------- | |
x, y (ndarray, 1D) : solutions | |
sx, sy (float) : step-size | |
res (ndarray, 2D) : log | |
res[t] = neval, f(x(t), y(t)), f(x', yt), f(xt, y'), sx(t+1), sy(1+1), x(t+1), y(t+1), eta(t+1) | |
""" | |
res = np.zeros((nstep, 7+len(x0)+len(y0))) | |
neval = 0 | |
sx = sx0 | |
sy = sy0 | |
x = np.copy(x0) | |
y = np.copy(y0) | |
for i in range(nstep): | |
fxy = f(x, y) | |
xp, yp, fx, fy, sx, sy, nx, ny = es_minimize(f, x, y, sx, sy, fxy, tau) | |
x += eta * (xp - x) | |
y += eta * (yp - y) | |
sx = min(sx, sxmax) | |
sy = min(sy, symax) | |
neval += nx + ny | |
res[i, 0] = neval | |
res[i, 1] = fxy | |
res[i, 2] = fx | |
res[i, 3] = fy | |
res[i, 4] = sx | |
res[i, 5] = sy | |
res[i, 6:6+len(x)] = x | |
res[i, 6+len(x):6+len(x)+len(y)] = y | |
res[i, -1] = eta | |
if neval >= maxeval: | |
break | |
if res[i, 3] - res[i, 2] < tolgap: | |
break | |
if i >= minitr - 1: | |
gap = res[i+1-minitr:i+1, 3] - res[i+1-minitr:i+1:, 2] | |
if minitr > 1 and np.all(gap[1:minitr] - gap[0:minitr-1] > 0): | |
break | |
return x, y, sx, sy, res[:i+1] | |
def aes(f, x0, y0, etamin, sxmax, symax, maxeval, tolgap=0, tau=5, aeta=1, beta=5, ceta=1.1): | |
"""Adversarial Evolution Strategy with Learning Rate Adaptation | |
Parameters | |
---------- | |
f : callable | |
objective | |
x0, y0 : ndarray (1d) | |
initial x and y | |
etamax : float | |
maximal learning rate | |
sxmax, symax : float, > 0 | |
maximal step-size | |
maxeval : int | |
number of f-calls | |
tolgap : float, optional | |
tolerance for approximate suboptimality error | |
tau : int | |
coefficient determining the number of successful steps | |
aeta, beta : int, optional | |
number of iterations to estimate the slope is aeta / eta + beta | |
ceta : float, > 1, optional | |
factor to increase and decrease the learning rate | |
Returns | |
------- | |
x, y (ndarray, 1D) : solutions | |
res (ndarray, 2D) : log | |
res[t] = neval, f(x(t), y(t)), f(x', yt), f(xt, y'), sx(t+1), sy(1+1), x(t+1), y(t+1), eta(t+1) | |
""" | |
res = np.zeros((0, 7+len(x0)+len(y0))) | |
neval = 0 | |
sx = sxmax | |
sy = symax | |
x = np.copy(x0) | |
y = np.copy(y0) | |
eta = 1.0 | |
slp = 0.0 | |
while neval < maxeval: | |
u = np.random.rand() | |
if u < 1/3: | |
eta1 = eta * ceta | |
elif 1/3 <= u < 2/3: | |
eta1 = eta | |
else: | |
eta1 = eta / ceta | |
eta1 = np.clip(eta1, etamin, 1) | |
nstep = beta + int(aeta/eta1) | |
x1, y1, sx1, sy1, res1 = aes_fixed(f, x, y, eta1, sx, sy, sxmax, symax, tau, nstep, maxeval, beta, tolgap) | |
res1[:, 0] += neval | |
res = np.vstack((res, res1)) | |
neval = res1[-1, 0] | |
xarr = np.arange(res1.shape[0]) | |
slp1, _, _, _, std1 = stats.linregress(xarr, np.log(res1[:, 3] - res1[:, 2])) | |
if slp1 - 2*std1 < 0: | |
x = x1 | |
y = y1 | |
sx = sx1 | |
sy = sy1 | |
if slp >= 0 and slp1 >= 0: | |
eta = np.clip(eta / ceta**3, etamin, 1) | |
slp = 0.0 | |
elif slp1 <= slp: | |
eta = eta1 | |
slp = slp1 | |
elif eta == eta1: | |
slp = slp1 | |
if res[-1, 3] - res[-1, 2] < tolgap: | |
break | |
return x, y, res | |
def f1(x, y, a=1.0, b=10.0, c=1.0): | |
fx = (a/2) * np.dot(x, x) | |
fxy = b * np.dot(x, y) | |
fy = (c/2) * np.dot(y, y) | |
return fx + fxy - fy | |
def df1(x, y, a=1.0, b=10.0, c=1.0): | |
return np.concatenate((a * x + b * y, b * x - c * y)) | |
def g1(x, y, a=1.0, b=10.0, c=1.0): | |
gxx = (a + b**2/c) | |
gyy = (c + b**2/a) | |
return (gxx/2) * np.dot(x, x) + (gyy/2) * np.dot(y, y) | |
if __name__ == "__main__": | |
import matplotlib.pyplot as plt | |
import matplotlib as mpl | |
# Problem Setting | |
m = n = 5 | |
a = 1 | |
b = 4 | |
c = 1 | |
f = lambda x, y: f1(x, y, a, b, c) | |
df = lambda x, y: df1(x, y, a, b, c) | |
g = lambda x, y: g1(x, y, a, b, c) | |
bareta = 4 / (4 + b**2) | |
# AES without Learning Rate Adaptation | |
# Parameters | |
eta = bareta * 10**(-0.5) | |
sx0 = sxmax = 2.0 | |
sy0 = symax = 2.0 | |
nstep = maxeval = minitr = int(2e7) | |
tolgap = 1e-7 | |
tau = 5 | |
# Experiment | |
np.random.seed(100) | |
x0 = np.random.randn(m) | |
y0 = np.random.randn(n) | |
x, y, _, _, res = aes_fixed(f, x0, y0, eta, sx0, sy0, sxmax, symax, tau, nstep, maxeval, minitr, tolgap=tolgap) | |
# Plot | |
gap = np.zeros(res.shape[0]) | |
for t in range(len(res)): | |
xt = res[t, 6:6+m] | |
yt = res[t, 6+m:6+m+n] | |
gap[t] = g(xt, yt) | |
fig = plt.figure() | |
plt.semilogy(res[:, 0], gap, label="G(x, y)") | |
plt.semilogy(res[:, 0], res[:, 3] - res[:, 2], label="F(x, y)") | |
plt.semilogy(res[:, 0], res[:, -1], label="eta") | |
plt.grid() | |
plt.ylim(1e-6, 1e6) | |
plt.xlabel("#f-calls") | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig("aes_fixed.pdf") | |
# AES withLearning Rate Adaptation | |
# Parameters | |
etamin = 1e-6 | |
sx0 = sxmax = 2.0 | |
sy0 = symax = 2.0 | |
nstep = maxeval = int(2e7) | |
tolgap = 1e-7 | |
tau = 5 | |
# Experiment | |
np.random.seed(100) | |
x0 = np.random.randn(m) | |
y0 = np.random.randn(n) | |
x, y, res = aes(f, x0, y0, etamin, sxmax, symax, maxeval, tolgap=tolgap, tau=tau) | |
# Plot | |
gap = np.zeros(res.shape[0]) | |
for t in range(len(res)): | |
xt = res[t, 6:6+m] | |
yt = res[t, 6+m:6+m+n] | |
gap[t] = g(xt, yt) | |
fig = plt.figure() | |
plt.semilogy(res[:, 0], gap, label="G(x, y)") | |
plt.semilogy(res[:, 0], res[:, 3] - res[:, 2], label="F(x, y)") | |
plt.semilogy(res[:, 0], res[:, -1], label="eta") | |
plt.grid() | |
plt.ylim(1e-6, 1e6) | |
plt.xlabel("#f-calls") | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig("aes.pdf") | |
# ASLSQP without Learning Rate Adaptation | |
# Parameters | |
eta = bareta * 10**(-0.5) | |
sx0 = sxmax = 2.0 | |
sy0 = symax = 2.0 | |
nstep = maxeval = minitr = int(2e5) | |
tolgap = 1e-7 | |
tau = 5 | |
# Experiment | |
np.random.seed(100) | |
x0 = np.random.randn(m) | |
y0 = np.random.randn(n) | |
x, y, res = aslsqp_fixed(f, df, x0, y0, eta, tau, nstep, maxeval, minitr, tolgap) | |
# Plot | |
gap = np.zeros(res.shape[0]) | |
for t in range(len(res)): | |
xt = res[t, 6:6+m] | |
yt = res[t, 6+m:6+m+n] | |
gap[t] = g(xt, yt) | |
fig = plt.figure() | |
plt.semilogy(res[:, 0], gap, label="G(x, y)") | |
plt.semilogy(res[:, 0], res[:, 3] - res[:, 2], label="F(x, y)") | |
plt.semilogy(res[:, 0], res[:, -1], label="eta") | |
plt.grid() | |
plt.ylim(1e-6, 1e6) | |
plt.xlabel("#f-calls") | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig("aslsqp_fixed.pdf") | |
# ASLSQP with Learning Rate Adaptation | |
# Parameters | |
etamin = 1e-6 | |
tau = 5 | |
nstep = maxeval = int(2e5) | |
tolgap = 1e-7 | |
# Experiment | |
np.random.seed(100) | |
x0 = np.random.randn(m) | |
y0 = np.random.randn(n) | |
x, y, res = aslsqp(f, df, x0, y0, etamin, maxeval, tolgap=tolgap, tau=tau) | |
# Plot | |
gap = np.zeros(res.shape[0]) | |
for t in range(len(res)): | |
xt = res[t, 6:6+m] | |
yt = res[t, 6+m:6+m+n] | |
gap[t] = g(xt, yt) | |
fig = plt.figure() | |
plt.semilogy(res[:, 0], gap, label="G(x, y)") | |
plt.semilogy(res[:, 0], res[:, 3] - res[:, 2], label="F(x, y)") | |
plt.semilogy(res[:, 0], res[:, -1], label="eta") | |
plt.grid() | |
plt.ylim(1e-6, 1e6) | |
plt.xlabel("#f-calls") | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig("aslsqp.pdf") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment