Created
March 23, 2018 21:53
-
-
Save mikofski/5f705134ea320c8d6376abafd57498d8 to your computer and use it in GitHub Desktop.
profiling script of scipy PR8357 to vectorize newton using a mask for active array items
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division, print_function, absolute_import | |
import warnings | |
import numpy as np | |
# Newton-Raphson method | |
def newton(func, x0, fprime=None, args=(), tol=1.48e-8, maxiter=50, | |
fprime2=None, **kwargs): | |
if tol <= 0: | |
raise ValueError("tol too small (%g <= 0)" % tol) | |
if maxiter < 1: | |
raise ValueError("maxiter must be greater than 0") | |
if not np.isscalar(x0): | |
return _array_newton(func, x0, fprime, args, tol, maxiter, fprime2, | |
**kwargs) | |
if fprime is not None: | |
# Newton-Raphson method | |
# Multiply by 1.0 to convert to floating point. We don't use float(x0) | |
# so it still works if x0 is complex. | |
p0 = 1.0 * x0 | |
for iteration in range(maxiter): | |
myargs = (p0,) + args | |
fder = fprime(*myargs) | |
if fder == 0: | |
msg = "derivative was zero." | |
warnings.warn(msg, RuntimeWarning) | |
return p0 | |
fval = func(*myargs) | |
newton_step = fval / fder | |
if fprime2 is None: | |
# Newton step | |
p = p0 - newton_step | |
else: | |
fder2 = fprime2(*myargs) | |
# Halley's method | |
p = p0 - newton_step / (1.0 - 0.5 * newton_step * fder2 / fder) | |
if abs(p - p0) < tol: | |
return p | |
p0 = p | |
else: | |
# Secant method | |
p0 = x0 | |
if x0 >= 0: | |
p1 = x0*(1 + 1e-4) + 1e-4 | |
else: | |
p1 = x0*(1 + 1e-4) - 1e-4 | |
q0 = func(*((p0,) + args)) | |
q1 = func(*((p1,) + args)) | |
for iteration in range(maxiter): | |
if q1 == q0: | |
if p1 != p0: | |
msg = "Tolerance of %s reached" % (p1 - p0) | |
warnings.warn(msg, RuntimeWarning) | |
return (p1 + p0)/2.0 | |
else: | |
p = p1 - q1*(p1 - p0)/(q1 - q0) | |
if abs(p - p1) < tol: | |
return p | |
p0 = p1 | |
q0 = q1 | |
p1 = p | |
q1 = func(*((p1,) + args)) | |
msg = "Failed to converge after %d iterations, value is %s" % (maxiter, p) | |
raise RuntimeError(msg) | |
def _array_newton(func, x0, fprime, args, tol, maxiter, fprime2, | |
failure_idx_flag=False): | |
""" | |
A vectorized version of Newton, Halley, and secant methods for arrays. Do | |
not use this method directly. This method is called from :func:`newton` | |
when ``np.isscalar(x0)`` is true. For docstring, see :func:`newton`. | |
""" | |
x = np.asarray(x0) | |
active = np.ones_like(x, dtype=bool) # only calculate active items | |
zero_der = np.zeros_like(x, dtype=bool) | |
if fprime is not None: | |
# Newton-Raphson method | |
for iteration in range(maxiter): | |
fder = np.asarray(fprime(x[active], args[0][active], args[1][active], *args[2:])) | |
# Remove zero derivative entries from active set. | |
nz_der = (fder != 0) # indices of non-zero derivatives | |
zero_der[active] |= ~nz_der # update zero derivatives | |
active[active] &= nz_der # remove zero-derivatives from active | |
fder = fder[nz_der] # update fder | |
if not active.any(): | |
break | |
fval = np.asarray(func(x[active], args[0][active], args[1][active], *args[2:])) | |
newton_step = fval / fder | |
if fprime2 is None: | |
deltax = newton_step | |
else: | |
fder2 = np.asarray(fprime2(x[active], args[0][active], args[1][active], *args[2:])) | |
deltax = newton_step / (1.0 - 0.5 * newton_step * fder2 / fder) | |
x[active] -= deltax | |
# Remove converged entries from active set. | |
active[active] &= np.abs(deltax) >= tol | |
if not active.any(): | |
break | |
if zero_der.any(): | |
all_or_some = 'all' if zero_der.all() else 'some' | |
msg = "%s derivatives were zero" % all_or_some | |
warnings.warn(msg, RuntimeWarning) | |
if failure_idx_flag: | |
x = x, ~active, zero_der | |
return x | |
else: | |
# Secant method | |
dx = np.finfo(float).eps**0.33 | |
dp = np.where(x >= 0, dx, -dx) | |
p1 = x * (1 + dx) + dp | |
q0 = np.asarray(func(*((x,) + args))) | |
q1 = np.asarray(func(*((p1,) + args))) | |
for iteration in range(maxiter): | |
zero_der = (q1 == q0) | |
nonzero_dp = (p1 != x) | |
if zero_der.all(): | |
if nonzero_dp.any(): | |
rms = np.sqrt( | |
sum((p1[nonzero_dp] - x[nonzero_dp])**2) | |
) | |
msg = "RMS of %s where callback isn't zero" % rms | |
warnings.warn(msg, RuntimeWarning) | |
p_avg = (p1 + x) / 2.0 | |
if failure_idx_flag: | |
p_avg = (p_avg, active, zero_der) | |
return p_avg | |
p = p1 - q1*(p1 - x)/(q1 - q0) | |
p = np.where(zero_der, (p1 + x)/2.0, p) | |
active = np.abs(p - p1) >= tol # not yet converged | |
# stop iterating if there aren't any failures, not incl zero der | |
if not active[~zero_der].any(): | |
# non-zero dp, but infinite newton step | |
zero_der_nz_dp = (zero_der & nonzero_dp) | |
if zero_der_nz_dp.any(): | |
rms = np.sqrt( | |
sum((p1[zero_der_nz_dp] - x[zero_der_nz_dp])**2) | |
) | |
msg = "RMS of %s where callback isn't zero" % rms | |
warnings.warn(msg, RuntimeWarning) | |
if failure_idx_flag: | |
p = (p, active, zero_der) | |
return p | |
x = p1 | |
q0 = q1 | |
p1 = p | |
q1 = np.asarray(func(*((p1,) + args))) | |
msg = "Failed to converge after %d iterations, value is %s" % (maxiter, p) | |
if active.all(): | |
raise RuntimeError(msg) | |
warnings.warn(msg, RuntimeWarning) | |
if failure_idx_flag: | |
p = (p, active, zero_der) | |
return p | |
def time_array_newton(): | |
def f(x, *a): | |
b = a[0] + x * a[3] | |
return a[1] - a[2] * (np.exp(b / a[5]) - 1.0) - b / a[4] - x | |
def f_1(x, *a): | |
b = a[3] / a[5] | |
return -a[2] * np.exp(a[0] / a[5] + x * b) * b - a[3] / a[4] - 1 | |
def f_2(x, *a): | |
b = a[3] / a[5] | |
return -a[2] * np.exp(a[0] / a[5] + x * b) * b ** 2 | |
a0 = np.array([ | |
5.32725221, 5.48673747, 5.49539973, | |
5.36387202, 4.80237316, 1.43764452, | |
5.23063958, 5.46094772, 5.50512718, | |
5.42046290 | |
]) | |
a1 = (np.sin(range(10)) + 1.0) * 7.0 | |
args = (a0, a1, 1e-09, 0.004, 10, 0.27456) | |
x0 = [7.0] * 10 | |
newton(f, x0, args=args, fprime=f_1, fprime2=f_2) | |
if __name__ == '__main__': | |
for _ in range(10000): time_array_newton() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment