Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
BrendelBethge attacks
import logging
from foolbox.utils import onehot_like
from foolbox.attacks.base import Attack
from foolbox.attacks.base import generator_call_decorator
import numpy as np
from numba import jitclass
import line_profiler
import atexit
import os
import functools
import time
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# file handler for log files
filename = np.random.randint(10000000)
path = os.path.realpath(__file__)
path = path.split(".py")[0]
file_handler = logging.FileHandler(f"{path}_{filename}.log")
formatter = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s")
file_handler.setFormatter(formatter)
# add file handler to logger
logger.addHandler(file_handler)
# output to stdout
# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.WARNING)
_epsilon = np.sqrt(np.finfo(float).eps)
profile = line_profiler.LineProfiler()
atexit.register(profile.print_stats)
EPS = 1e-10
norm = np.linalg.norm
class BrendelBethgeAttack(Attack):
""" Base class for the Brendel & Bethge adversarial attack described in [1]_.
References
----------
.. [1] Wieland Brendel, Jonas Rauber, Matthias Kümmerer, Ivan Ustyuzhaninov, Matthias Bethge,
"Accurate, reliable and fast robustness evaluation",
https://arxiv.org/abs/1907.01003
"""
@generator_call_decorator
def __call__(
self,
input_or_adv,
label=None,
unpack=True,
epsilon=None,
overshoot=1.1,
max_iterations=100,
lr=1,
momentum=0.8,
starting_point=None,
):
"""The Brendel & Bethge adversarial attack.
Parameters
----------
input_or_adv : `numpy.ndarray` or :class:`Adversarial`
The original, unperturbed input as a `numpy.ndarray` or
an :class:`Adversarial` instance.
label : int
The reference label of the original input. Must be passed
if `a` is a `numpy.ndarray`, must not be passed if `a` is
an :class:`Adversarial` instance.
unpack : bool
If true, returns the adversarial input, otherwise returns
the Adversarial object.
max_iterations : int
Number of steps for the optimization.
epsilon : float or None, optional
If not None the optimisation stops once it reaches a distance
epsilon between original and adversarial image.
lr : float
Scaling factor for the trust region radius. Smaller values lead
to smaller steps in each iteration which allows the attack to stay
in closer vicinity to the decision boundary.
overshoot: float, optional
A small overshoot biases the attack to stay in the adversarial region
(instead of sticking exactly to the decision boundary). Value should be
slightly above 1.
momentum: float, optional
Momentum term that smoothes the normal vector of the decision boundary
over several steps.
"""
logger.info('Starting attack')
a = input_or_adv
del input_or_adv
del label
del unpack
if not a.has_gradient():
logger.fatal(
"Applied gradient-based attack to model that "
"does not provide gradients."
)
return
min_, max_ = a.bounds()
original_image = a.original_image
original_label = a.original_class
bounds = a.bounds()
x0 = original_image.flatten()
# make abortion condition available everywhere
self.epsilon = epsilon
# instantiate optimizer
_optimizer = self.instantiate_optimizer()
# test initial point
logits, is_adv = yield from a.predictions(starting_point)
# get suitable starting point
x = yield from self._get_starting_point(
a, bounds, original_image, starting_point=starting_point
)
if self.epsilon is not None:
if a.distance.value <= epsilon:
return None
rate_normalization = np.prod(x.shape) * (max_ - min_)
# stop if last three updates of adv_distance is not decreasing
adv_distance_history = []
num_of_nones = 0
logger.info(f'Batch size is {x.shape[0]}')
for i in range(max_iterations):
logger.info(f"Starting iteration number {i}")
# get logits and local boundary geometry
try:
x = np.clip(x, min_ + EPS, max_ - EPS)
logits, is_adv = yield from a.predictions(x)
except AssertionError:
logger.exception("Assertion Error: ", x.min(), x.max(), min_, max_)
logits_diff, _boundary = yield from self.normal_vector(a, x, logits)
if _boundary.flatten().dot(_boundary.flatten()) < 1e-13:
break
# denoise estimate of boundary using a short history of the boundary
if i == 0:
boundary = _boundary
else:
boundary = (1 - momentum) * _boundary + momentum * boundary
if boundary.flatten().dot(boundary.flatten()) < 1e-13:
break
logger.debug(
f"Iteration {i}: logit = {logits[a.original_class]:.2}, logit-diff = {logits_diff:.2}, "
f"current dist. = {float(a.normalized_distance(x).value):.4}, best distance = {float(a.distance.value):.4}"
)
# compute optimal step within trust region depending on metric
original_shape = x.shape
x = x.flatten()
region = lr * rate_normalization
# we aim to slight overshoot over the boundary to stay within the adversarial region
corr_logits_diff = (
overshoot * logits_diff
if logits_diff < 0
else (2 - overshoot) * logits_diff
)
# employ solver to find optimal step within trust region
delta = _optimizer.solve(
x0,
x,
boundary.flatten(),
bounds[0],
bounds[1],
corr_logits_diff,
region,
)
try:
x += delta
except TypeError:
if num_of_nones < 5:
num_of_nones += 1
else:
break
# add step to current perturbation
x = x.reshape(original_shape)
x = np.clip(x, min_, max_)
# early stopping if progress stalls
if logits_diff > 0:
adv_distance_history.append(a.distance.value)
if len(adv_distance_history) > 4:
past_distance = adv_distance_history[-4]
if np.abs(a.distance.value - past_distance) / past_distance < 0.001:
logger.debug(
f"Progress is too slow. Finishing optimization after {i} steps."
)
if np.random.randint(10) == 5:
lr /= 2
logger.debug("new lr = ", lr)
# sometimes distance_value is not really updated any more because the logit-diff is < 0
if logits_diff < 0:
if (
np.abs(a.normalized_distance(x).value - past_distance)
/ past_distance
< 0.001
):
logger.debug(
f"Progress is too slow. Finishing optimization after {i} steps."
)
if np.random.randint(10) == 5:
lr /= 2
logger.debug("new lr = ", lr)
if epsilon is not None:
if a.distance.value <= epsilon:
break
if lr < 1e-6:
break
def _initial_binary_search(self, x0, x1, a, max_iter=10, corridor=0.1):
eps0, eps1 = 0, 1
for i in range(max_iter):
mid_point = self.mid_point(x0, x1, (eps0 + eps1) / 2, a.bounds())
# mid_point = (x0 + x1) / 2
logits, adv = yield from a.predictions(mid_point)
# break condition
l1, l2 = np.argsort(logits)[::-1][:2]
sec_label = l1 if l1 != a.original_class else l2
log1, log2 = logits[sec_label], logits[a.original_class]
if abs(log1 - log2) / abs(log2) < corridor:
break
elif self.epsilon is not None:
if a.distance.value <= self.epsilon:
break
if adv:
eps1 = (eps0 + eps1) / 2
else:
eps0 = (eps0 + eps1) / 2
return mid_point
def _get_starting_point(self, a, bounds, x0, corridor=0.1, starting_point=None):
if starting_point is None:
logger.debug(
"No starting point is given. Falling back to random initialization, but other starting points "
"like FGSM or DeepFool might be more query efficient and better."
)
starting_point = "random"
if type(starting_point) == str:
if starting_point == "random":
logger.debug("Starting random search")
# this could potentially be done better for different metrics
x = np.random.uniform(bounds[0], bounds[1], size=x0.shape).astype(
np.float32
)
logits, adv = yield from a.predictions(x)
while not adv: # draw noise until image is adversarial
logger.debug("f", a.original_class, logits.argmax())
x = np.random.uniform(bounds[0], bounds[1], size=x0.shape).astype(
np.float32
)
logger.debug("Perform binary search")
x = yield from self._initial_binary_search(
x0, x, a, max_iter=10, corridor=corridor
)
else:
if callable(starting_point):
x = starting_point(a)
else:
x = starting_point
x = yield from self._initial_binary_search(
x0, x, a, max_iter=10, corridor=corridor
)
return x
@classmethod
def normal_vector(cls, a, x, logits):
"""Returns the loss and the gradient of the loss w.r.t. x,
assuming that logits = model(x)."""
targeted = a.target_class() is not None
if targeted:
c_minimize = cls.best_other_class(logits, a.target_class())
c_maximize = a.target_class()
else:
c_minimize = a.original_class
c_maximize = cls.best_other_class(logits, a.original_class)
logits_diff = logits[c_minimize] - logits[c_maximize]
# calculate the gradient of total_loss w.r.t. x
logits_diff_grad = np.zeros_like(logits)
logits_diff_grad[c_minimize] = 1
logits_diff_grad[c_maximize] = -1
is_adv_loss_grad = yield from a.backward(logits_diff_grad, x)
return -logits_diff, is_adv_loss_grad
@staticmethod
def best_other_class(logits, exclude):
"""Returns the index of the largest logit, ignoring the class that
is passed as `exclude`."""
other_logits = logits - onehot_like(logits, exclude, value=np.inf)
return np.argmax(other_logits)
def instantiate_optimizer(self):
raise NotImplementedError
class BrendelBethgeL2Attack(BrendelBethgeAttack):
""" Brendel & Bethge adversarial attack described in [1]_ that minimizes the L2 distance.
References
----------
.. [1] Wieland Brendel, Jonas Rauber, Matthias Kümmerer, Ivan Ustyuzhaninov, Matthias Bethge,
"Accurate, reliable and fast robustness evaluation",
https://arxiv.org/abs/1907.01003
"""
def instantiate_optimizer(self):
return L2Optimizer()
def mid_point(self, x0, x1, epsilon, bounds):
# returns a point between x0 and x1 where
# epsilon = 0 returns x0 and epsilon = 1
# returns x1
return epsilon * x1 + (1 - epsilon) * x0
class BrendelBethgeLinfAttack(BrendelBethgeAttack):
""" Brendel & Bethge adversarial attack described in [1]_ that minimizes the L-infinity distance.
References
----------
.. [1] Wieland Brendel, Jonas Rauber, Matthias Kümmerer, Ivan Ustyuzhaninov, Matthias Bethge,
"Accurate, reliable and fast robustness evaluation",
https://arxiv.org/abs/1907.01003
"""
def instantiate_optimizer(self):
return LinfOptimizer()
def mid_point(self, x0, x1, epsilon, bounds):
# returns a point between x0 and x1 where
# epsilon = 0 returns x0 and epsilon = 1
delta = x1 - x0
min_, max_ = bounds
s = max_ - min_
clipped_delta = np.clip(delta, -epsilon * s, epsilon * s)
return x0 + clipped_delta
class BrendelBethgeL1Attack(BrendelBethgeAttack):
""" Brendel & Bethge adversarial attack described in [1]_ that minimizes the L1 distance.
References
----------
.. [1] Wieland Brendel, Jonas Rauber, Matthias Kümmerer, Ivan Ustyuzhaninov, Matthias Bethge,
"Accurate, reliable and fast robustness evaluation",
https://arxiv.org/abs/1907.01003
"""
def instantiate_optimizer(self):
return L1Optimizer()
def mid_point(self, x0, x1, epsilon, bounds):
# returns a point between x0 and x1 where
# epsilon = 0 returns x0 and epsilon = 1
# returns x1
threshold = (bounds[1] - bounds[0]) * (1 - epsilon)
mask = np.abs(x1 - x0) > threshold
new_x = x0.copy()
new_x[mask] += (np.sign(x1 - x0) * (np.abs(x1 - x0) - threshold))[mask]
return new_x
class BrendelBethgeL0Attack(BrendelBethgeAttack):
""" Brendel & Bethge adversarial attack described in [1]_ that minimizes the L0 distance.
References
----------
.. [1] Wieland Brendel, Jonas Rauber, Matthias Kümmerer, Ivan Ustyuzhaninov, Matthias Bethge,
"Accurate, reliable and fast robustness evaluation",
https://arxiv.org/abs/1907.01003
"""
def instantiate_optimizer(self):
return L0Optimizer()
def mid_point(self, x0, x1, epsilon, bounds):
# returns a point between x0 and x1 where
# epsilon = 0 returns x0 and epsilon = 1
# returns x1
threshold = (bounds[1] - bounds[0]) * epsilon
mask = np.abs(x1 - x0) < threshold
new_x = x0.copy()
new_x[mask] = x1[mask]
return new_x
@jitclass(spec=[])
class BFGSB(object):
def __init__(self):
pass
def solve(
self, fun_and_jac, q0, bounds, args, ftol=1e-10, pgtol=-1e-5, maxiter=None
):
debug = False
N = q0.shape[0]
if maxiter is None:
maxiter = N * 200
l = bounds[:, 0] # np.array([b[0] for b in bounds])
u = bounds[:, 1] # np.array([b[1] for b in bounds])
func_calls = 0
old_fval, gfk = fun_and_jac(q0, *args)
func_calls += 1
k = 0
Hk = np.eye(N)
# Sets the initial step guess to dx ~ 1
qk = q0
old_old_fval = old_fval + np.linalg.norm(gfk) / 2
# gnorm = np.amax(np.abs(gfk))
_gfk = gfk
# Compare with implementation BFGS-B implementation
# in https://github.com/andrewhooker/PopED/blob/master/R/bfgsb_min.R
while k < maxiter:
# check if projected gradient is still large enough
pg_norm = 0
for v in range(N):
if _gfk[v] < 0:
gv = max(qk[v] - u[v], _gfk[v])
else:
gv = min(qk[v] - l[v], _gfk[v])
if pg_norm < np.abs(gv):
pg_norm = np.abs(gv)
if pg_norm < pgtol:
# logging.info("Stopping due to projected gradient criterium.")
break
# get cauchy point
x_cp = self._cauchy_point(qk, l, u, _gfk.copy(), Hk)
qk1 = self._subspace_min(qk, l, u, x_cp, _gfk.copy(), Hk)
pk = qk1 - qk
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1, fnev = self._line_search_wolfe(
fun_and_jac, qk, pk, _gfk, old_fval, old_old_fval, l, u, args
)
func_calls += fnev
if alpha_k is None:
# logging.info("Stopping due to alphak being None.")
break
if np.abs(old_fval - old_old_fval) <= (ftol + ftol * np.abs(old_fval)):
# logging.info("Stopping due to function criterion.")
break
qkp1 = self._project(qk + alpha_k * pk, l, u)
if gfkp1 is None:
_, gfkp1 = fun_and_jac(qkp1, *args)
sk = qkp1 - qk
qk = qkp1
yk = np.zeros_like(qk)
for k3 in range(N):
yk[k3] = gfkp1[k3] - _gfk[k3]
if np.abs(yk[k3]) < 1e-4:
yk[k3] = -1e-4
_gfk = gfkp1
k += 1
# update inverse Hessian matrix
Hk_sk = Hk.dot(sk)
sk_yk = 0
sk_Hk_sk = 0
for v in range(N):
sk_yk += sk[v] * yk[v]
sk_Hk_sk += sk[v] * Hk_sk[v]
if np.abs(sk_yk) >= 1e-8:
rhok = 1.0 / sk_yk
else:
rhok = 100000.0
if np.abs(sk_Hk_sk) >= 1e-8:
rsk_Hk_sk = 1.0 / sk_Hk_sk
else:
rsk_Hk_sk = 100000.0
for v in range(N):
for w in range(N):
# change this to the update formula of the Hessian (not inverse Hessian)
Hk[v, w] += yk[v] * yk[w] * rhok - Hk_sk[v] * Hk_sk[w] * rsk_Hk_s
return qk
def _cauchy_point(self, x, l, u, g, B):
# finds the cauchy point for q(x)=x'Gx+x'd s$t. l<=x<=u
# g=G*x+d #gradient of q(x)
# converted from r-code: https://github.com/andrewhooker/PopED/blob/master/R/cauchy_point.R
n = x.shape[0]
t = np.zeros_like(x)
d = np.zeros_like(x)
for i in range(n):
if g[i] < 0:
t[i] = (x[i] - u[i]) / g[i]
elif g[i] > 0:
t[i] = (x[i] - l[i]) / g[i]
elif g[i] == 0:
t[i] = np.inf
if t[i] == 0:
d[i] = 0
else:
d[i] = -g[i]
ts = t.copy()
ts = ts[ts != 0]
ts = np.sort(ts)
df = g.dot(d)
d2f = d.dot(B.dot(d))
if d2f < 1e-10:
return x
dt_min = -df / d2f
t_old = 0
i = 0
z = np.zeros_like(x)
while i < ts.shape[0] and dt_min >= (ts[i] - t_old):
ind = ts[i] < t
d[~ind] = 0
z = z + (ts[i] - t_old) * d
df = g.dot(d) + d.dot(B.dot(z))
d2f = d.dot(B.dot(d))
dt_min = df / (d2f + 1e-8)
t_old = ts[i]
i += 1
dt_min = max(dt_min, 0)
t_old = t_old + dt_min
x_cp = x - t_old * g
temp = x - t * g
x_cp[t_old > t] = temp[t_old > t]
return x_cp
def _subspace_min(self, x, l, u, x_cp, d, G):
# converted from r-code: https://github.com/andrewhooker/PopED/blob/master/R/subspace_min.R
n = x.shape[0]
Z = np.eye(n)
fixed = (x_cp <= l + 1e-8) + (x_cp >= u - 1e8)
if np.all(fixed):
x = x_cp
return x
Z = Z[:, ~fixed]
rgc = Z.T.dot(d + G.dot(x_cp - x))
rB = Z.T.dot(G.dot(Z)) + 1e-10 * np.eye(Z.shape[1])
d[~fixed] = np.linalg.solve(rB, rgc)
d[~fixed] = -d[~fixed]
alpha = 1
temp1 = alpha
for i in np.arange(n)[~fixed]:
dk = d[i]
if dk < 0:
temp2 = l[i] - x_cp[i]
if temp2 >= 0:
temp1 = 0
else:
if dk * alpha < temp2:
temp1 = temp2 / dk
else:
temp2 = u[i] - x_cp[i]
else:
temp2 = u[i] - x_cp[i]
if temp1 <= 0:
temp1 = 0
else:
if dk * alpha > temp2:
temp1 = temp2 / dk
alpha = min(temp1, alpha)
return x_cp + alpha * Z.dot(d[~fixed])
def _project(self, q, l, u):
N = q.shape[0]
for k in range(N):
if q[k] < l[k]:
q[k] = l[k]
elif q[k] > u[k]:
q[k] = u[k]
return q
def _line_search_armijo(
self, fun_and_jac, pt, dpt, func_calls, m, gk, l, u, x0, x, b, min_, max_, c, r
):
ls_rho = 0.6
ls_c = 1e-4
ls_alpha = 1
t = m * ls_c
for k2 in range(100):
ls_pt = self._project(pt + ls_alpha * dpt, l, u)
gkp1, dgkp1 = fun_and_jac(ls_pt, x0, x, b, min_, max_, c, r)
func_calls += 1
if gk - gkp1 >= ls_alpha * t:
break
else:
ls_alpha *= ls_rho
return ls_alpha, ls_pt, gkp1, dgkp1, func_calls
def _line_search_wolfe(
self, fun_and_jac, xk, pk, gfk, old_fval, old_old_fval, l, u, args
):
"""Find alpha that satisfies strong Wolfe conditions.
Uses the line search algorithm to enforce strong Wolfe conditions
Wright and Nocedal, 'Numerical Optimization', 1999, pg. 59-60
For the zoom phase it uses an algorithm by
Outputs: (alpha0, gc, fc)
"""
c1 = 1e-4
c2 = 0.9
N = xk.shape[0]
_ls_fc = 0
_ls_ingfk = None
alpha0 = 0
phi0 = old_fval
derphi0 = 0
for v in range(N):
derphi0 += gfk[v] * pk[v]
if derphi0 == 0:
derphi0 = 1e-8
elif np.abs(derphi0) < 1e-8:
derphi0 = np.sign(derphi0) * 1e-8
alpha1 = min(1.0, 1.01 * 2 * (phi0 - old_old_fval) / derphi0)
# print(
# " (ls) initial alpha1: ",
# alpha1,
# phi0 - old_old_fval,
# phi0,
# old_old_fval,
# derphi0,
# )
if alpha1 == 0:
# This shouldn't happen. Perhaps the increment has slipped below
# machine precision? For now, set the return variables skip the
# useless while loop, and raise warnflag=2 due to possible imprecision.
# print("Slipped below machine precision.")
alpha_star = None
fval_star = old_fval
old_fval = old_old_fval
fprime_star = None
_xkp1 = self._project(xk + alpha1 * pk, l, u)
phi_a1, _ls_ingfk = fun_and_jac(_xkp1, *args)
_ls_fc += 1
# derphi_a1 = phiprime(alpha1) evaluated below
phi_a0 = phi0
derphi_a0 = derphi0
i = 1
maxiter = 10
while 1: # bracketing phase
# print(" (ls) in while loop: ", alpha1, alpha0)
if alpha1 == 0:
break
if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or (
(phi_a1 >= phi_a0) and (i > 1)
):
# inlining zoom for performance reasons
# alpha0, alpha1, phi_a0, phi_a1, derphi_a0, phi0, derphi0, pk, xk
# zoom signature: (a_lo, a_hi, phi_lo, phi_hi, derphi_lo, phi0, derphi0, pk, xk)
# INLINE START
k = 0
delta1 = 0.2 # cubic interpolant check
delta2 = 0.1 # quadratic interpolant check
phi_rec = phi0
a_rec = 0
a_hi = alpha1
a_lo = alpha0
phi_lo = phi_a0
phi_hi = phi_a1
derphi_lo = derphi_a0
while 1:
# interpolate to find a trial step length between a_lo and a_hi
# Need to choose interpolation here. Use cubic interpolation and then if the
# result is within delta * dalpha or outside of the interval bounded by a_lo or a_hi
# then use quadratic interpolation, if the result is still too close, then use bisection
dalpha = a_hi - a_lo
if dalpha < 0:
a, b = a_hi, a_lo
else:
a, b = a_lo, a_hi
# minimizer of cubic interpolant
# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
# if the result is too close to the end points (or out of the interval)
# then use quadratic interpolation with phi_lo, derphi_lo and phi_hi
# if the result is stil too close to the end points (or out of the interval)
# then use bisection
if k > 0:
cchk = delta1 * dalpha
a_j = self._cubicmin(
a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec
)
if (
(k == 0)
or (a_j is None)
or (a_j > b - cchk)
or (a_j < a + cchk)
):
qchk = delta2 * dalpha
a_j = self._quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
if (a_j is None) or (a_j > b - qchk) or (a_j < a + qchk):
a_j = a_lo + 0.5 * dalpha
# print "Using bisection."
# else: print "Using quadratic."
# else: print "Using cubic."
# Check new value of a_j
_xkp1 = self._project(xk + a_j * pk, l, u)
# if _xkp1[1] < 0:
# _xkp1[1] = 0
phi_aj, _ls_ingfk = fun_and_jac(_xkp1, *args)
derphi_aj = 0
for v in range(N):
derphi_aj += _ls_ingfk[v] * pk[v]
if (phi_aj > phi0 + c1 * a_j * derphi0) or (phi_aj >= phi_lo):
phi_rec = phi_hi
a_rec = a_hi
a_hi = a_j
phi_hi = phi_aj
else:
if abs(derphi_aj) <= -c2 * derphi0:
a_star = a_j
val_star = phi_aj
valprime_star = _ls_ingfk
break
if derphi_aj * (a_hi - a_lo) >= 0:
phi_rec = phi_hi
a_rec = a_hi
a_hi = a_lo
phi_hi = phi_lo
else:
phi_rec = phi_lo
a_rec = a_lo
a_lo = a_j
phi_lo = phi_aj
derphi_lo = derphi_aj
k += 1
if k > maxiter:
a_star = a_j
val_star = phi_aj
valprime_star = None
break
alpha_star = a_star
fval_star = val_star
fprime_star = valprime_star
fnev = k
## INLINE END
_ls_fc += fnev
break
i += 1
if i > maxiter:
break
_xkp1 = self._project(xk + alpha1 * pk, l, u)
_, _ls_ingfk = fun_and_jac(_xkp1, *args)
derphi_a1 = 0
for v in range(N):
derphi_a1 += _ls_ingfk[v] * pk[v]
_ls_fc += 1
if abs(derphi_a1) <= -c2 * derphi0:
alpha_star = alpha1
fval_star = phi_a1
fprime_star = _ls_ingfk
break
if derphi_a1 >= 0:
# alpha_star, fval_star, fprime_star, fnev, _ls_ingfk = _zoom(
# alpha1, alpha0, phi_a1, phi_a0, derphi_a1, phi0, derphi0, pk, xk
# )
#
# INLINE START
maxiter = 10
k = 0
delta1 = 0.2 # cubic interpolant check
delta2 = 0.1 # quadratic interpolant check
phi_rec = phi0
a_rec = 0
a_hi = alpha0
a_lo = alpha1
phi_lo = phi_a1
phi_hi = phi_a0
derphi_lo = derphi_a1
while 1:
# interpolate to find a trial step length between a_lo and a_hi
# Need to choose interpolation here. Use cubic interpolation and then if the
# result is within delta * dalpha or outside of the interval bounded by a_lo or a_hi
# then use quadratic interpolation, if the result is still too close, then use bisection
dalpha = a_hi - a_lo
if dalpha < 0:
a, b = a_hi, a_lo
else:
a, b = a_lo, a_hi
# minimizer of cubic interpolant
# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
# if the result is too close to the end points (or out of the interval)
# then use quadratic interpolation with phi_lo, derphi_lo and phi_hi
# if the result is stil too close to the end points (or out of the interval)
# then use bisection
if k > 0:
cchk = delta1 * dalpha
a_j = self._cubicmin(
a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec
)
if (
(k == 0)
or (a_j is None)
or (a_j > b - cchk)
or (a_j < a + cchk)
):
qchk = delta2 * dalpha
a_j = self._quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
if (a_j is None) or (a_j > b - qchk) or (a_j < a + qchk):
a_j = a_lo + 0.5 * dalpha
# print "Using bisection."
# else: print "Using quadratic."
# else: print "Using cubic."
# Check new value of a_j
_xkp1 = self._project(xk + a_j * pk, l, u)
phi_aj, _ls_ingfk = fun_and_jac(_xkp1, *args)
# print("call #3: ", phi_aj, _xkp1)
derphi_aj = 0
for v in range(N):
derphi_aj += _ls_ingfk[v] * pk[v]
if (phi_aj > phi0 + c1 * a_j * derphi0) or (phi_aj >= phi_lo):
phi_rec = phi_hi
a_rec = a_hi
a_hi = a_j
phi_hi = phi_aj
else:
if abs(derphi_aj) <= -c2 * derphi0:
a_star = a_j
val_star = phi_aj
valprime_star = _ls_ingfk
break
if derphi_aj * (a_hi - a_lo) >= 0:
phi_rec = phi_hi
a_rec = a_hi
a_hi = a_lo
phi_hi = phi_lo
else:
phi_rec = phi_lo
a_rec = a_lo
a_lo = a_j
phi_lo = phi_aj
derphi_lo = derphi_aj
k += 1
if k > maxiter:
a_star = a_j
val_star = phi_aj
valprime_star = None
break
alpha_star = a_star
fval_star = val_star
fprime_star = valprime_star
fnev = k
## INLINE END
_ls_fc += fnev
break
alpha2 = 2 * alpha1 # increase by factor of two on each iteration
i = i + 1
alpha0 = alpha1
alpha1 = alpha2
phi_a0 = phi_a1
_xkp1 = self._project(xk + alpha1 * pk, l, u)
phi_a1, _ls_ingfk = fun_and_jac(_xkp1, *args)
# print("call #4: ", phi_a1, _xkp1)
_ls_fc += 1
derphi_a0 = derphi_a1
# stopping test if lower function not found
if i > maxiter:
alpha_star = alpha1
fval_star = phi_a1
fprime_star = None
break
return alpha_star, _ls_fc, _ls_fc, fval_star, old_fval, fprime_star, _ls_fc
def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
# finds the minimizer for a cubic polynomial that goes through the
# points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
#
# if no minimizer can be found return None
#
# f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
C = fpa
D = fa
db = b - a
dc = c - a
if (db == 0) or (dc == 0) or (b == c):
return None
denom = (db * dc) ** 2 * (db - dc)
A = dc ** 2 * (fb - fa - C * db) - db ** 2 * (fc - fa - C * dc)
B = -dc ** 3 * (fb - fa - C * db) + db ** 3 * (fc - fa - C * dc)
A /= denom
B /= denom
radical = B * B - 3 * A * C
if radical < 0:
return None
if A == 0:
return None
xmin = a + (-B + np.sqrt(radical)) / (3 * A)
return xmin
def _quadmin(self, a, fa, fpa, b, fb):
# finds the minimizer for a quadratic polynomial that goes through
# the points (a,fa), (b,fb) with derivative at a of fpa
# f(x) = B*(x-a)^2 + C*(x-a) + D
D = fa
C = fpa
db = b - a * 1.0
if db == 0:
return None
B = (fb - D - C * db) / (db * db)
if B <= 0:
return None
xmin = a - C / (2.0 * B)
return xmin
spec = [("bfgsb", BFGSB.class_type.instance_type)]
class Optimizer(object):
""" Base class for the trust-region optimization. If feasible, this optimizer solves the problem
min_delta distance(x0, x + delta) s.t. ||delta||_2 <= r AND delta^T b = c AND min_ <= x + delta <= max_
where x0 is the original sample, x is the current optimisation state, r is the trust-region radius,
b is the current estimate of the normal vector of the decision boundary, c is the estimated distance of x
to the trust region and [min_, max_] are the value constraints of the input. The function distance(.,.)
is the distance measure to be optimised (e.g. L2, L1, L0).
"""
def __init__(self):
self.bfgsb = BFGSB() # a box-constrained BFGS solver
def solve(self, x0, x, b, min_, max_, c, r):
x0, x, b = x0.astype(np.float64), x.astype(np.float64), b.astype(np.float64)
cmax, cmaxnorm = self._max_logit_diff(x, b, min_, max_, c)
if np.abs(cmax) < np.abs(c):
# problem not solvable (boundary cannot be reached)
if np.sqrt(cmaxnorm) < r:
# make largest possible step towards boundary while staying within bounds
_delta = self.optimize_boundary_s_t_trustregion(
x0, x, b, min_, max_, c, r
)
else:
# make largest possible step towards boundary while staying within trust region
_delta = self.optimize_boundary_s_t_trustregion(
x0, x, b, min_, max_, c, r
)
else:
if cmaxnorm < r:
# problem is solvable
# proceed with standard optimization
_delta = self.optimize_distance_s_t_boundary_and_trustregion(
x0, x, b, min_, max_, c, r
)
else:
# problem might not be solvable
bnorm = np.linalg.norm(b)
minnorm = self._minimum_norm_to_boundary(x, b, min_, max_, c, bnorm)
if minnorm <= r:
# problem is solvable, proceed with standard optimization
_delta = self.optimize_distance_s_t_boundary_and_trustregion(
x0, x, b, min_, max_, c, r
)
else:
# problem not solvable (boundary cannot be reached)
# make largest step towards boundary within trust region
_delta = self.optimize_boundary_s_t_trustregion(
x0, x, b, min_, max_, c, r
)
return _delta
def _max_logit_diff(self, x, b, _ell, _u, c):
""" Tests whether the (estimated) boundary can be reached within trust region. """
N = x.shape[0]
cmax = 0.0
norm = 0.0
if c > 0:
for n in range(N):
if b[n] > 0:
cmax += b[n] * (_u - x[n])
norm += (_u - x[n]) ** 2
else:
cmax += b[n] * (_ell - x[n])
norm += (x[n] - _ell) ** 2
else:
for n in range(N):
if b[n] > 0:
cmax += b[n] * (_ell - x[n])
norm += (x[n] - _ell) ** 2
else:
cmax += b[n] * (_u - x[n])
norm += (_u - x[n]) ** 2
return cmax, np.sqrt(norm)
def _minimum_norm_to_boundary(self, x, b, _ell, _u, c, bnorm):
""" Computes the minimum norm necessary to reach the boundary. More precisely, we aim to solve the
following optimization problem
min ||delta||_2^2 s.t. lower <= x + delta <= upper AND b.dot(delta) = c
Lets forget about the box constraints for a second, i.e.
min ||delta||_2^2 s.t. b.dot(delta) = c
The dual of this problem is quite straight-forward to solve,
g(lambda, delta) = ||delta||_2^2 + lambda * (c - b.dot(delta))
The minimum of this Lagrangian is delta^* = lambda * b / 2, and so
inf_delta g(lambda, delta) = lambda^2 / 4 ||b||_2^2 + lambda * c
and so the optimal lambda, which maximizes inf_delta g(lambda, delta), is given by
lambda^* = 2c / ||b||_2^2
which in turn yields the optimal delta:
delta^* = c * b / ||b||_2^2
To take into account the box-constraints we perform a binary search over lambda and apply the box
constraint in each step.
"""
N = x.shape[0]
lambda_lower = 2 * c / bnorm ** 2
lambda_upper = (
np.sign(c) * np.inf
) # optimal initial point (if box-constraints are neglected)
_lambda = lambda_lower
k = 0
# perform a binary search over lambda
while True:
# compute _c = b.dot([- _lambda * b / 2]_clip)
k += 1
_c = 0
norm = 0
if c > 0:
for n in range(N):
lam_step = _lambda * b[n] / 2
if b[n] > 0:
max_step = _u - x[n]
delta_step = min(max_step, lam_step)
_c += b[n] * delta_step
norm += delta_step ** 2
else:
max_step = _ell - x[n]
delta_step = max(max_step, lam_step)
_c += b[n] * delta_step
norm += delta_step ** 2
else:
for n in range(N):
lam_step = _lambda * b[n] / 2
if b[n] > 0:
max_step = _ell - x[n]
delta_step = max(max_step, lam_step)
_c += b[n] * delta_step
norm += delta_step ** 2
else:
max_step = _u - x[n]
delta_step = min(max_step, lam_step)
_c += b[n] * delta_step
norm += delta_step ** 2
# adjust lambda
if np.abs(_c) < np.abs(c):
# increase absolute value of lambda
if np.isinf(lambda_upper):
_lambda *= 2
else:
lambda_lower = _lambda
_lambda = (lambda_upper - lambda_lower) / 2 + lambda_lower
else:
# decrease lambda
lambda_upper = _lambda
_lambda = (lambda_upper - lambda_lower) / 2 + lambda_lower
# stopping condition
if 0.999 * np.abs(c) - EPS < np.abs(_c) < 1.001 * np.abs(c) + EPS:
break
return np.sqrt(norm)
def optimize_distance_s_t_boundary_and_trustregion(
self, x0, x, b, min_, max_, c, r
):
""" Find the solution to the optimization problem
min_delta ||dx - delta||_p^p s.t. ||delta||_2^2 <= r^2 AND b^T delta = c AND min_ <= x + delta <= max_
"""
params0 = np.array([0.0, 0.0])
bounds = np.array([(-np.inf, np.inf), (0, np.inf)])
args = (x0, x, b, min_, max_, c, r)
qk = self.bfgsb.solve(self.fun_and_jac, params0, bounds, args)
return self._get_final_delta(
qk[0], qk[1], x0, x, b, min_, max_, c, r, touchup=True
)
def optimize_boundary_s_t_trustregion_fun_and_jac(
self, params, x0, x, b, min_, max_, c, r
):
N = x0.shape[0]
s = -np.sign(c)
_mu = params[0]
t = 1 / (2 * _mu + EPS)
g = -_mu * r ** 2
grad_mu = -r ** 2
for n in range(N):
d = -s * b[n] * t
if d < min_ - x[n]:
d = min_ - x[n]
elif d > max_ - x[n]:
d = max_ - x[n]
else:
grad_mu += (b[n] + 2 * _mu * d) * (b[n] / (2 * _mu ** 2 + EPS))
grad_mu += d ** 2
g += (b[n] + _mu * d) * d
return -g, -np.array([grad_mu])
def safe_div(self, nominator, denominator):
if np.abs(denominator) > EPS:
return nominator / denominator
elif denominator >= 0:
return nominator / EPS
else:
return -nominator / EPS
def optimize_boundary_s_t_trustregion(self, x0, x, b, min_, max_, c, r):
""" Find the solution to the optimization problem
min_delta sign(c) b^T delta s.t. ||delta||_2^2 <= r^2 AND min_ <= x + delta <= max_
Note: this optimization problem is independent of the Lp norm being optimized.
Lagrangian: g(delta) = sign(c) b^T delta + mu * (||delta||_2^2 - r^2)
Optimal delta: delta = - sign(c) * b / (2 * mu)
"""
params0 = np.array([1.0])
args = (x0, x, b, min_, max_, c, r)
bounds = np.array([(0, np.inf)])
qk = self.bfgsb.solve(
self.optimize_boundary_s_t_trustregion_fun_and_jac, params0, bounds, args
)
_delta = self.safe_div(-b, 2 * qk[0])
for n in range(x0.shape[0]):
if _delta[n] < min_ - x[n]:
_delta[n] = min_ - x[n]
elif _delta[n] > max_ - x[n]:
_delta[n] = max_ - x[n]
return _delta
@jitclass(spec=spec)
class L2Optimizer(Optimizer):
def optimize_distance_s_t_boundary_and_trustregion(
self, x0, x, b, min_, max_, c, r
):
""" Solves the L2 trust region problem
min ||x0 - x - delta||_2 s.t. b^top delta = c
& ell <= x + delta <= u
& ||delta||_2 <= r
This is a specialised solver that does not use the generic BFGS-B solver.
Instead, this active-set solver computes the active set of indices (those that
do not hit the bounds) and then computes that optimal step size in the direction
of the boundary and the direction of the original sample over the active indices.
Parameters
----------
x0 : `numpy.ndarray`
The original image against which we minimize the perturbation
(flattened).
x : `numpy.ndarray`
The current perturbation (flattened).
b : `numpy.ndarray`
Normal vector of the local decision boundary (flattened).
min_ : float
Lower bound on the pixel values.
max_ : float
Upper bound on the pixel values.
c : float
Logit difference between the ground truth class of x0 and the
leading class different from the ground truth.
r : float
Size of the trust region.
"""
N = x0.shape[0]
clamp_c = 0
clamp_norm = 0
ck = c
rk = r
masked_values = 0
mask = np.zeros(N, dtype=np.uint8)
delta = np.empty_like(x0)
dx = x0 - x
for k in range(20):
# inner optimization that solves subproblem
bnorm = 1e-8
bdotDx = 0
for i in range(N):
if mask[i] == 0:
bnorm += b[i] * b[i]
bdotDx += b[i] * dx[i]
bdotDx = bdotDx / bnorm
ck_bnorm = ck / bnorm
b_scale = -bdotDx + ck / bnorm
new_masked_values = 0
delta_norm = 0
descent_norm = 0
boundary_step_norm = 0
# make optimal step towards boundary AND minimum
for i in range(N):
if mask[i] == 0:
delta[i] = dx[i] + b[i] * b_scale
boundary_step_norm = (
boundary_step_norm + b[i] * ck_bnorm * b[i] * ck_bnorm
)
delta_norm = delta_norm + delta[i] * delta[i]
descent_norm = descent_norm + (dx[i] - b[i] * bdotDx) * (
dx[i] - b[i] * bdotDx
)
# check of step to boundary is already larger than trust region
if boundary_step_norm > rk * rk:
for i in range(N):
if mask[i] == 0:
delta[i] = b[i] * ck_bnorm
else:
# check if combined step to large and correct step to minimum if necessary
if delta_norm > rk * rk:
region_correct = np.sqrt(rk * rk - boundary_step_norm)
region_correct = region_correct / (np.sqrt(descent_norm) + 1e-8)
b_scale = -region_correct * bdotDx + ck / bnorm
for i in range(N):
if mask[i] == 0:
delta[i] = region_correct * dx[i] + b[i] * b_scale
for i in range(N):
if mask[i] == 0:
if x[i] + delta[i] <= min_:
mask[i] = 1
delta[i] = min_ - x[i]
new_masked_values = new_masked_values + 1
clamp_norm = clamp_norm + delta[i] * delta[i]
clamp_c = clamp_c + b[i] * delta[i]
if x[i] + delta[i] >= max_:
mask[i] = 1
delta[i] = max_ - x[i]
new_masked_values = new_masked_values + 1
clamp_norm = clamp_norm + delta[i] * delta[i]
clamp_c = clamp_c + b[i] * delta[i]
# should no additional variable get out of bounds, stop optimization
if new_masked_values == 0:
break
masked_values = masked_values + new_masked_values
if clamp_norm < r * r:
rk = np.sqrt(r * r - clamp_norm)
else:
rk = 0
ck = c - clamp_c
if masked_values == N:
break
return delta
def fun_and_jac(self, params, x0, x, b, min_, max_, c, r):
# we need to compute the loss function
# g = distance + mu * (norm_d - r ** 2) + lam * (b_dot_d - c)
# and its derivative d g / d lam and d g / d mu
lam, mu = params
N = x0.shape[0]
g = 0
d_g_d_lam = 0
d_g_d_mu = 0
distance = 0
b_dot_d = 0
d_norm = 0
t = 1 / (2 * mu + 2)
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
xn = x[n]
d = (2 * dx - lam * bn) * t
if d + xn > max_:
d = max_ - xn
elif d + xn < min_:
d = min_ - xn
else:
prefac = 2 * (d - dx) + 2 * mu * d + lam * bn
d_g_d_lam -= prefac * bn * t
d_g_d_mu -= prefac * 2 * d * t
distance += (d - dx) ** 2
b_dot_d += bn * d
d_norm += d ** 2
g += (dx - d) ** 2 + mu * d ** 2 + lam * bn * d
d_g_d_lam += bn * d
d_g_d_mu += d ** 2
g += -mu * r ** 2 - lam * c
d_g_d_lam -= c
d_g_d_mu -= r ** 2
return -g, -np.array([d_g_d_lam, d_g_d_mu])
def _get_final_delta(self, lam, mu, x0, x, b, min_, max_, c, r, touchup=True):
delta = np.empty_like(x0)
N = x0.shape[0]
t = 1 / (2 * mu + 2)
for n in range(N):
d = (2 * (x0[n] - x[n]) - lam * b[n]) * t
if d + x[n] > max_:
d = max_ - x[n]
elif d + x[n] < min_:
d = min_ - x[n]
delta[n] = d
return delta
def _distance(self, x0, x):
return np.linalg.norm(x0 - x) ** 2
@jitclass(spec=spec)
class L1Optimizer(Optimizer):
def fun_and_jac(self, params, x0, x, b, min_, max_, c, r):
lam, mu = params
# arg min_delta ||delta - dx||_1 + lam * b^T delta + mu * ||delta||_2^2 s.t. min <= delta + x <= max
N = x0.shape[0]
g = 0
d_g_d_lam = 0
d_g_d_mu = 0
if mu > 0:
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
t = 1 / (2 * mu)
u = -lam * bn * t - dx
if np.abs(u) - t < 0:
# value and grad = 0
d = dx
else:
d = np.sign(u) * (np.abs(u) - t) + dx
if d + x[n] < min_:
d = min_ - x[n]
elif d + x[n] > max_:
d = max_ - x[n]
else:
prefac = np.sign(d - dx) + 2 * mu * d + lam * bn
d_g_d_lam -= prefac * bn * t
d_g_d_mu -= prefac * 2 * d * t
g += np.abs(dx - d) + mu * d ** 2 + lam * bn * d
d_g_d_lam += bn * d
d_g_d_mu += d ** 2
else: # mu == 0
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
if np.abs(lam * bn) < 1:
d = dx
elif np.sign(lam * bn) < 0:
d = max_ - x[n]
else:
d = min_ - x[n]
g += np.abs(dx - d) + mu * d ** 2 + lam * bn * d
d_g_d_lam += bn * d
d_g_d_mu += d ** 2
g += -mu * r ** 2 - lam * c
d_g_d_lam -= c
d_g_d_mu -= r ** 2
return -g, -np.array([d_g_d_lam, d_g_d_mu])
def _get_final_delta(self, lam, mu, x0, x, b, min_, max_, c, r, touchup=True):
delta = np.empty_like(x0)
N = x0.shape[0]
b_dot_d = 0
norm_d = 0
distance = 0
if mu > 0:
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
t = 1 / (2 * mu)
u = -lam * bn * t - dx
if np.abs(u) - t < 0:
# value and grad = 0
d = dx
else:
d = np.sign(u) * (np.abs(u) - t) + dx
if d + x[n] < min_:
# grad = 0
d = min_ - x[n]
elif d + x[n] > max_:
# grad = 0
d = max_ - x[n]
delta[n] = d
b_dot_d += b[n] * d
norm_d += d ** 2
distance += np.abs(d - dx)
else: # mu == 0
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
if np.abs(lam * bn) < 1:
d = dx
elif np.sign(lam * bn) < 0:
d = max_ - x[n]
else:
d = min_ - x[n]
delta[n] = d
b_dot_d += b[n] * d
norm_d += d ** 2
distance += np.abs(d - dx)
if touchup:
# search for the one index that (a) we can modify to match boundary constraint, (b) stays within our
# trust region and (c) minimize the distance to the original image
dc = c - b_dot_d
k = 0
min_distance = np.inf
min_distance_idx = 0
for n in range(N):
if np.abs(b[n]) > 0:
dx = x0[n] - x[n]
old_d = delta[n]
new_d = old_d + dc / b[n]
if (
x[n] + new_d <= max_
and x[n] + new_d >= min_
and norm_d - old_d ** 2 + new_d ** 2 <= r ** 2
):
# conditions (a) and (b) are fulfilled
if k == 0:
min_distance = (
distance - np.abs(old_d - dx) + np.abs(new_d - dx)
)
min_distance_idx = n
k += 1
else:
new_distance = (
distance - np.abs(old_d - dx) + np.abs(new_d - dx)
)
if min_distance > new_distance:
min_distance = new_distance
min_distance_idx = n
if k > 0:
# touchup successful
idx = min_distance_idx
old_d = delta[idx]
new_d = old_d + dc / b[idx]
delta[idx] = new_d
return delta
def _distance(self, x0, x):
return np.abs(x0 - x).sum()
@jitclass(spec=spec)
class LinfOptimizer(Optimizer):
def optimize_distance_s_t_boundary_and_trustregion(
self, x0, x, b, min_, max_, c, r
):
""" Find the solution to the optimization problem
min_delta ||dx - delta||_p^p s.t. ||delta||_2^2 <= r^2 AND b^T delta = c AND min_ <= x + delta <= max_
"""
params0 = np.array([0.0, 0.0])
bounds = np.array([(-np.inf, np.inf), (0, np.inf)])
return self.binary_search(params0, bounds, x0, x, b, min_, max_, c, r)
def binary_search(
self, q0, bounds, x0, x, b, min_, max_, c, r, etol=1e-6, maxiter=1000
):
# perform binary search over epsilon
epsilon = (max_ - min_) / 2.0
eps_low = min_
eps_high = max_
func_calls = 0
bnorm = np.linalg.norm(b)
lambda0 = 2 * c / bnorm ** 2
k = 0
while eps_high - eps_low > etol:
fun, nfev, _lambda0 = self.fun(
epsilon, x0, x, b, min_, max_, c, r, lambda0=lambda0
)
func_calls += nfev
if fun > -np.inf:
# decrease epsilon
eps_high = epsilon
lambda0 = _lambda0
else:
# increase epsilon
eps_low = epsilon
k += 1
epsilon = (eps_high - eps_low) / 2.0 + eps_low
# print(k, func_calls, epsilon, eps_high, eps_low, fun)
if k > 20:
break
delta = self._get_final_delta(
lambda0, eps_high, x0, x, b, min_, max_, c, r, touchup=True
)
return delta
def _Linf_bounds(self, x0, epsilon, ell, u):
N = x0.shape[0]
_ell = np.empty_like(x0)
_u = np.empty_like(x0)
for i in range(N):
nx, px = x0[i] - epsilon, x0[i] + epsilon
if nx > ell:
_ell[i] = nx
else:
_ell[i] = ell
if px < u:
_u[i] = px
else:
_u[i] = u
return _ell, _u
def fun(self, epsilon, x0, x, b, ell, u, c, r, lambda0=None):
""" Computes the minimum norm necessary to reach the boundary. More precisely, we aim to solve the
following optimization problem
min ||delta||_2^2 s.t. lower <= x + delta <= upper AND b.dot(delta) = c
Lets forget about the box constraints for a second, i.e.
min ||delta||_2^2 s.t. b.dot(delta) = c
The dual of this problem is quite straight-forward to solve,
g(lambda, delta) = ||delta||_2^2 + lambda * (c - b.dot(delta))
The minimum of this Lagrangian is delta^* = lambda * b / 2, and so
inf_delta g(lambda, delta) = lambda^2 / 4 ||b||_2^2 + lambda * c
and so the optimal lambda, which maximizes inf_delta g(lambda, delta), is given by
lambda^* = 2c / ||b||_2^2
which in turn yields the optimal delta:
delta^* = c * b / ||b||_2^2
To take into account the box-constraints we perform a binary search over lambda and apply the box
constraint in each step.
"""
N = x.shape[0]
# print("")
# print("Starting fun with ", epsilon, lambda0)
# new box constraints
_ell, _u = self._Linf_bounds(x0, epsilon, ell, u)
# initialize lambda
_lambda = lambda0
# compute delta and determine active set
k = 0
lambda_max, lambda_min = 1e10, -1e10
# check whether problem is actually solvable (i.e. check whether boundary constraint can be reached)
max_c = 0
min_c = 0
for n in range(N):
if b[n] > 0:
max_c += b[n] * (_u[n] - x[n])
min_c += b[n] * (_ell[n] - x[n])
else:
max_c += b[n] * (_ell[n] - x[n])
min_c += b[n] * (_u[n] - x[n])
if c > max_c or c < min_c:
# print("Problem not solvable (boundary cannot be reached)", c, max_c, min_c)
return -np.inf, k, _lambda
while True:
k += 1
_c = 0
norm = 0
_active_bnorm = 0
for n in range(N):
lam_step = _lambda * b[n] / 2
if lam_step + x[n] < _ell[n]:
delta_step = _ell[n] - x[n]
elif lam_step + x[n] > _u[n]:
delta_step = _u[n] - x[n]
else:
delta_step = lam_step
_active_bnorm += b[n] ** 2
_c += b[n] * delta_step
norm += delta_step ** 2
if 0.9999 * np.abs(c) - EPS < np.abs(_c) < 1.0001 * np.abs(c) + EPS:
if norm > r ** 2:
# print("Problem cannot be solved (norm outside of trust region).")
return -np.inf, k, _lambda
else:
# print("Problem solved with lambda=", _lambda)
return -epsilon, k, _lambda
else:
# update lambda according to active variables
if _c > c:
lambda_max = _lambda
else:
lambda_min = _lambda
#
# print("Update _lambda", _lambda, c, _c, last_c, max_c, min_c)
if _active_bnorm == 0:
# update is stepping out of feasible region, fallback to binary search
_lambda = (lambda_max - lambda_min) / 2 + lambda_min
else:
_lambda += 2 * (c - _c) / _active_bnorm
dlambda = lambda_max - lambda_min
if (
_lambda > lambda_max - 0.1 * dlambda
or _lambda < lambda_min + 0.1 * dlambda
):
# update is stepping out of feasible region, fallback to binary search
_lambda = (lambda_max - lambda_min) / 2 + lambda_min
#if k > 500:
# rnd = np.random.randint(100000000)
# np.save(f'/mnt/qb/wbrendel/gradient_boundary_attack/backup/Linf_{rnd}.npy', [x0, x, b, ell, u, c, r, lambda0])
# return -np.inf, k, _lambda
def _get_final_delta(self, lam, eps, x0, x, b, min_, max_, c, r, touchup=True):
N = x.shape[0]
delta = np.empty_like(x0)
# new box constraints
_ell, _u = self._Linf_bounds(x0, eps, min_, max_)
for n in range(N):
lam_step = lam * b[n] / 2
if lam_step + x[n] < _ell[n]:
delta[n] = _ell[n] - x[n]
elif lam_step + x[n] > _u[n]:
delta[n] = _u[n] - x[n]
else:
delta[n] = lam_step
return delta
def _distance(self, x0, x):
return np.abs(x0 - x).max()
@jitclass(spec=spec)
class L0Optimizer(Optimizer):
def optimize_distance_s_t_boundary_and_trustregion(
self, x0, x, b, min_, max_, c, r
):
""" Find the solution to the optimization problem
min_delta ||dx - delta||_p^p s.t. ||delta||_2^2 <= r^2 AND b^T delta = c AND min_ <= x + delta <= max_
"""
params0 = np.array([0.0, 0.0])
bounds = np.array([(-np.inf, np.inf), (0, np.inf)])
return self.minimize(params0, bounds, x0, x, b, min_, max_, c, r)
def minimize(
self,
q0,
bounds,
x0,
x,
b,
min_,
max_,
c,
r,
ftol=1e-9,
xtol=-1e-5,
maxiter=1000,
):
# First check whether solution can be computed without trust region
delta, delta_norm = self.minimize_without_trustregion(
x0, x, b, c, r, min_, max_
)
if delta_norm <= r:
return delta
else:
# perform Nelder-Mead optimization
args = (x0, x, b, min_, max_, c, r)
results = self._nelder_mead_algorithm(
q0, bounds, args=args, tol_f=ftol, tol_x=xtol, max_iter=maxiter
)
delta = self._get_final_delta(
results[0], results[1], x0, x, b, min_, max_, c, r, touchup=True
)
return delta
def minimize_without_trustregion(self, x0, x, b, c, r, ell, u):
# compute maximum direction to b.dot(delta) within box-constraints
delta = x0 - x
total = np.empty_like(x0)
total_b = np.empty_like(x0)
bdotdelta = b.dot(delta)
delta_bdotdelta = c - bdotdelta
for k in range(x0.shape[0]):
if b[k] > 0 and delta_bdotdelta > 0:
total_b[k] = (u - x0[k]) * b[k] # pos
total[k] = u - x0[k]
elif b[k] > 0 and delta_bdotdelta < 0:
total_b[k] = (ell - x0[k]) * b[k] # neg
total[k] = ell - x0[k]
elif b[k] < 0 and delta_bdotdelta > 0:
total_b[k] = (ell - x0[k]) * b[k] # pos
total[k] = ell - x0[k]
else:
total_b[k] = (u - x0[k]) * b[k] # neg
total[k] = u - x0[k]
b_argsort = np.argsort(np.abs(total_b))[::-1]
for idx in b_argsort:
# print(idx, bdotdelta, b[idx], total_b[idx])
if np.abs(c - bdotdelta) > np.abs(total_b[idx]):
delta[idx] += total[idx]
bdotdelta += total_b[idx]
else:
delta[idx] += (c - bdotdelta) / (b[idx] + 1e-20)
break
delta_norm = np.linalg.norm(delta)
return delta, delta_norm
def _nelder_mead_algorithm(
self,
q0,
bounds,
args=(),
ρ=1.0,
χ=2.0,
γ=0.5,
σ=0.5,
tol_f=1e-8,
tol_x=1e-8,
max_iter=1000,
):
"""
Implements the Nelder-Mead algorithm described in Lagarias et al. (1998)
modified to maximize instead of minimizing.
Parameters
----------
vertices : ndarray(float, ndim=2)
Initial simplex with shape (n+1, n) to be modified in-place.
args : tuple, optional
Extra arguments passed to the objective function.
ρ : scalar(float), optional(default=1.)
Reflection parameter. Must be strictly greater than 0.
χ : scalar(float), optional(default=2.)
Expansion parameter. Must be strictly greater than max(1, ρ).
γ : scalar(float), optional(default=0.5)
Contraction parameter. Must be stricly between 0 and 1.
σ : scalar(float), optional(default=0.5)
Shrinkage parameter. Must be strictly between 0 and 1.
tol_f : scalar(float), optional(default=1e-10)
Tolerance to be used for the function value convergence test.
tol_x : scalar(float), optional(default=1e-10)
Tolerance to be used for the function domain convergence test.
max_iter : scalar(float), optional(default=1000)
The maximum number of allowed iterations.
Returns
----------
x : Approximate solution
"""
vertices = self._initialize_simplex(q0)
n = vertices.shape[1]
self._check_params(ρ, χ, γ, σ, bounds, n)
nit = 0
ργ = ρ * γ
ρχ = ρ * χ
σ_n = σ ** n
f_val = np.empty(n + 1, dtype=np.float64)
for i in range(n + 1):
f_val[i] = self._neg_bounded_fun(bounds, vertices[i], args=args)
# Step 1: Sort
sort_ind = f_val.argsort()
LV_ratio = 1
# Compute centroid
x_bar = vertices[sort_ind[:n]].sum(axis=0) / n
while True:
# print("Iterate ", nit, " with current f_val: ", f_val)
shrink = False
# Check termination
fail = nit >= max_iter
best_val_idx = sort_ind[0]
worst_val_idx = sort_ind[n]
term_f = f_val[worst_val_idx] - f_val[best_val_idx] < tol_f
# Linearized volume ratio test (see [2])
term_x = LV_ratio < tol_x
if term_x or term_f or fail:
break
# Step 2: Reflection
x_r = x_bar + ρ * (x_bar - vertices[worst_val_idx])
f_r = self._neg_bounded_fun(bounds, x_r, args=args)
if f_r >= f_val[best_val_idx] and f_r < f_val[sort_ind[n - 1]]:
# Accept reflection
vertices[worst_val_idx] = x_r
LV_ratio *= ρ
# Step 3: Expansion
elif f_r < f_val[best_val_idx]:
x_e = x_bar + χ * (x_r - x_bar)
f_e = self._neg_bounded_fun(bounds, x_e, args=args)
if f_e < f_r: # Greedy minimization
vertices[worst_val_idx] = x_e
LV_ratio *= ρχ
else:
vertices[worst_val_idx] = x_r
LV_ratio *= ρ
# Step 4 & 5: Contraction and Shrink
else:
# Step 4: Contraction
if f_r < f_val[worst_val_idx]: # Step 4.a: Outside Contraction
x_c = x_bar + γ * (x_r - x_bar)
LV_ratio_update = ργ
else: # Step 4.b: Inside Contraction
x_c = x_bar - γ * (x_r - x_bar)
LV_ratio_update = γ
f_c = self._neg_bounded_fun(bounds, x_c, args=args)
if f_c < min(f_r, f_val[worst_val_idx]): # Accept contraction
vertices[worst_val_idx] = x_c
LV_ratio *= LV_ratio_update
# Step 5: Shrink
else:
shrink = True
for i in sort_ind[1:]:
vertices[i] = vertices[best_val_idx] + σ * (
vertices[i] - vertices[best_val_idx]
)
f_val[i] = self._neg_bounded_fun(bounds, vertices[i], args=args)
sort_ind[1:] = f_val[sort_ind[1:]].argsort() + 1
x_bar = (
vertices[best_val_idx]
+ σ * (x_bar - vertices[best_val_idx])
+ (vertices[worst_val_idx] - vertices[sort_ind[n]]) / n
)
LV_ratio *= σ_n
if not shrink: # Nonshrink ordering rule
f_val[worst_val_idx] = self._neg_bounded_fun(
bounds, vertices[worst_val_idx], args=args
)
for i, j in enumerate(sort_ind):
if f_val[worst_val_idx] < f_val[j]:
sort_ind[i + 1 :] = sort_ind[i:-1]
sort_ind[i] = worst_val_idx
break
x_bar += (vertices[worst_val_idx] - vertices[sort_ind[n]]) / n
nit += 1
return vertices[sort_ind[0]]
def _initialize_simplex(self, x0):
"""
Generates an initial simplex for the Nelder-Mead method.
Parameters
----------
x0 : ndarray(float, ndim=1)
Initial guess. Array of real elements of size (n,), where ‘n’ is the
number of independent variables.
bounds: ndarray(float, ndim=2)
Sequence of (min, max) pairs for each element in x0.
Returns
----------
vertices : ndarray(float, ndim=2)
Initial simplex with shape (n+1, n).
"""
n = x0.size
vertices = np.empty((n + 1, n), dtype=np.float64)
# Broadcast x0 on row dimension
vertices[:] = x0
nonzdelt = 0.05
zdelt = 0.00025
for i in range(n):
# Generate candidate coordinate
if vertices[i + 1, i] != 0.0:
vertices[i + 1, i] *= 1 + nonzdelt
else:
vertices[i + 1, i] = zdelt
return vertices
def _check_params(self, ρ, χ, γ, σ, bounds, n):
"""
Checks whether the parameters for the Nelder-Mead algorithm are valid.
JIT-compiled in `nopython` mode using Numba.
Parameters
----------
ρ : scalar(float)
Reflection parameter. Must be strictly greater than 0.
χ : scalar(float)
Expansion parameter. Must be strictly greater than max(1, ρ).
γ : scalar(float)
Contraction parameter. Must be stricly between 0 and 1.
σ : scalar(float)
Shrinkage parameter. Must be strictly between 0 and 1.
bounds: ndarray(float, ndim=2)
Sequence of (min, max) pairs for each element in x.
n : scalar(int)
Number of independent variables.
"""
if ρ < 0:
raise ValueError("ρ must be strictly greater than 0.")
if χ < 1:
raise ValueError("χ must be strictly greater than 1.")
if χ < ρ:
raise ValueError("χ must be strictly greater than ρ.")
if γ < 0 or γ > 1:
raise ValueError("γ must be strictly between 0 and 1.")
if σ < 0 or σ > 1:
raise ValueError("σ must be strictly between 0 and 1.")
if not (bounds.shape == (0, 2) or bounds.shape == (n, 2)):
raise ValueError("The shape of `bounds` is not valid.")
if (np.atleast_2d(bounds)[:, 0] > np.atleast_2d(bounds)[:, 1]).any():
raise ValueError("Lower bounds must be greater than upper bounds.")
def _check_bounds(self, x, bounds):
"""
Checks whether `x` is within `bounds`. JIT-compiled in `nopython` mode
using Numba.
Parameters
----------
x : ndarray(float, ndim=1)
1-D array with shape (n,) of independent variables.
bounds: ndarray(float, ndim=2)
Sequence of (min, max) pairs for each element in x.
Returns
----------
bool
`True` if `x` is within `bounds`, `False` otherwise.
"""
if bounds.shape == (0, 2):
return True
else:
return (np.atleast_2d(bounds)[:, 0] <= x).all() and (
x <= np.atleast_2d(bounds)[:, 1]
).all()
def _neg_bounded_fun(self, bounds, x, args=()):
"""
Wrapper for bounding and taking the negative of `fun` for the
Nelder-Mead algorithm. JIT-compiled in `nopython` mode using Numba.
Parameters
----------
bounds: ndarray(float, ndim=2)
Sequence of (min, max) pairs for each element in x.
x : ndarray(float, ndim=1)
1-D array with shape (n,) of independent variables at which `fun` is
to be evaluated.
args : tuple, optional
Extra arguments passed to the objective function.
Returns
----------
scalar
`-fun(x, *args)` if x is within `bounds`, `np.inf` otherwise.
"""
if self._check_bounds(x, bounds):
return -self.fun(x, *args)
else:
return np.inf
def fun(self, params, x0, x, b, min_, max_, c, r):
# arg min_delta ||delta - dx||_0 + lam * b^T delta + mu * ||delta||_2^2 s.t. min <= delta + x <= max
lam, mu = params
N = x0.shape[0]
g = -mu * r ** 2 - lam * c
if mu > 0:
t = 1 / (2 * mu)
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
case1 = lam * bn * dx + mu * dx ** 2
optd = -lam * bn * t
if optd < min_ - x[n]:
optd = min_ - x[n]
elif optd > max_ - x[n]:
optd = max_ - x[n]
case2 = 1 + lam * bn * optd + mu * optd ** 2
if case1 <= case2:
g += mu * dx ** 2 + lam * bn * dx
else:
g += 1 + mu * optd ** 2 + lam * bn * optd
else:
# arg min_delta ||delta - dx||_0 + lam * b^T delta
# case delta[n] = dx[n]: lam * b[n] * dx[n]
# case delta[n] != dx[n]: lam * b[n] * [min_ - x[n], max_ - x[n]]
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
case1 = lam * bn * dx
case2 = 1 + lam * bn * (min_ - x[n])
case3 = 1 + lam * bn * (max_ - x[n])
if case1 <= case2 and case1 <= case3:
g += mu * dx ** 2 + lam * bn * dx
elif case2 < case3:
g += 1 + mu * (min_ - x[n]) ** 2 + lam * bn * (min_ - x[n])
else:
g += 1 + mu * (max_ - x[n]) ** 2 + lam * bn * (max_ - x[n])
return g
def _get_final_delta(self, lam, mu, x0, x, b, min_, max_, c, r, touchup=True):
if touchup:
delta = self.__get_final_delta(lam, mu, x0, x, b, min_, max_, c, r)
if delta is not None:
return delta
else:
# fallback
params = [
(lam + 1e-5, mu),
(lam, mu + 1e-5),
(lam - 1e-5, mu),
(lam, mu - 1e-5),
(lam + 1e-5, mu + 1e-5),
(lam - 1e-5, mu - 1e-5),
(lam + 1e-5, mu - 1e-5),
(lam - 1e-5, mu + 1e-5),
]
for param in params:
delta = self.__get_final_delta(
param[0], param[1], x0, x, b, min_, max_, c, r
)
if delta is not None:
return delta
# 2nd fallback
return self.__get_final_delta(
lam, mu, x0, x, b, min_, max_, c, r, False
)
else:
return self.__get_final_delta(lam, mu, x0, x, b, min_, max_, c, r, False)
def __get_final_delta(self, lam, mu, x0, x, b, min_, max_, c, r, touchup=True):
delta = np.empty_like(x0)
N = x0.shape[0]
b_dot_d = 0
norm_d = 0
distance = 0
if mu > 0:
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
t = 1 / (2 * mu)
case1 = lam * bn * dx + mu * dx ** 2
optd = -lam * bn * t
if optd < min_ - x[n]:
optd = min_ - x[n]
elif optd > max_ - x[n]:
optd = max_ - x[n]
case2 = 1 + lam * bn * optd + mu * optd ** 2
if case1 <= case2:
d = dx
else:
d = optd
distance += 1
delta[n] = d
b_dot_d += bn * d
norm_d += d ** 2
else: # mu == 0
for n in range(N):
dx = x0[n] - x[n]
bn = b[n]
case1 = lam * bn * dx
case2 = 1 + lam * bn * (min_ - x[n])
case3 = 1 + lam * bn * (max_ - x[n])
if case1 <= case2 and case1 <= case3:
d = dx
elif case2 < case3:
d = min_ - x[n]
distance += 1
else:
d = max_ - x[n]
distance += 1
delta[n] = d
norm_d += d ** 2
b_dot_d += bn * d
if touchup:
# search for the one index that
# (a) we can modify to match boundary constraint
# (b) stays within our trust region and
# (c) minimize the distance to the original image.
dc = c - b_dot_d
k = 0
min_distance = np.inf
min_norm = np.inf
min_distance_idx = 0
for n in range(N):
if np.abs(b[n]) > 0:
dx = x0[n] - x[n]
old_d = delta[n]
new_d = old_d + dc / b[n]
if (
x[n] + new_d <= max_
and x[n] + new_d >= min_
and norm_d - old_d ** 2 + new_d ** 2 <= r ** 2
):
# conditions (a) and (b) are fulfilled
if k == 0:
min_distance = (
distance
- (np.abs(old_d - dx) > 1e-10)
+ (np.abs(new_d - dx) > 1e-10)
)
min_distance_idx = n
min_norm = norm_d - old_d ** 2 + new_d ** 2
k += 1
else:
new_distance = (
distance
- (np.abs(old_d - dx) > 1e-10)
+ (np.abs(new_d - dx) > 1e-10)
)
if (
min_distance > new_distance
or min_distance == new_distance
and min_norm > norm_d - old_d ** 2 + new_d ** 2
):
min_distance = new_distance
min_norm = norm_d - old_d ** 2 + new_d ** 2
min_distance_idx = n
if k > 0:
# touchup successful
idx = min_distance_idx
old_d = delta[idx]
new_d = old_d + dc / b[idx]
delta[idx] = new_d
return delta
else:
return None
return delta
def _distance(self, x0, x):
return np.sum(np.abs(x - x0) > EPS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.