Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Last active June 20, 2017 12:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denis-bz/92bc951d542a64a3f164f98bce84dabe to your computer and use it in GitHub Desktop.
Save denis-bz/92bc951d542a64a3f164f98bce84dabe to your computer and use it in GitHub Desktop.
Gradient descent with 2-point line fit where gradients cross 0 2017-06-20 Jun

Gradient descent with 2-point line fit where gradients cross 0

The Gradient_descent method iterates

xnew = xold - rate(t) * grad(xold)

GD is a workhorse in machine learning, because it's so simple, uses gradients only (not function values), and can do very big x.

rate(t) is a step-size or "learning rate" (aka η, Greek eta). It can be a constant, or decreasing, taking smaller steps as you move along. One can tune a rate function to work sometimes, but everybody knows:

  • rate too small: many tiny steps, long run times: try doubling it
  • rate too big, x oscillates: try halving it.

gd_cross0.py below improves on standard GD by:

  1. see where grad components cross 0, g < 0 < gprev or g > 0 > gprev
  2. draw a line though the 2 points (x, g) -- (xprev, gprev)
  3. set xnew = where the line == 0, and re-evaluate gradfunc( xnew ).

A plot:

2d-lin-gdcross0

Fine print: false position vs. secant method

The method of False position is very similar to the above line-through-2-points aka Secant method, but theoretically better: once a zero is bracketed in an interval a < x < b, it stays in there. Does it matter in practice, with noisy gradients ? Todo: compare the two; test cases welcome.

The files below:

gd_cross0.py
    a bare-bones implementation of GD with 0-crossing and Nesterov momentum
	def gd_cross0( gradfunc, x0, params, ratefunc, callback=None, verbose=1 )
lingrad.py
    linear gradient Ax - b, A = diag( 1/cond .. 1 ) -- lots of theory
    *= (1 + ripple * sin( pi x ))
params.py
    class Params
ratefunc.py
    rate0 / t^rate_power
run-gd
    shell script: for dim rate ... run test-gd.py
test-gd.py
    run this rate0= momentum= ... in ipython
zutil.py

(If you download this lot, mv 1-gd_cross0.py gd_cross0.py; gist.github shows files in alphabetical order, so 0-* 1-* first.)

See also

Numerical Recipes p. 449 ff. .
lightning AdaGrad -- fast cython SGD (not plain GD) for big, sparse classification
Open loop

Comments are welcome, test cases most welcome.

cheers
-- denis

Last change: 20 June 2017

#!/usr/bin/env python2
""" gradient descent with Nesterov momentum
and https://en.wikipedia.org/wiki/Secant_method:
when grad components cross 0, g < 0 < gprev or g > 0 > gprev,
ynew = where the line through (y, grad) -- (yprev, gradprev) crosses 0
and re-evaluate gradfunc( ynew ).
"""
from __future__ import division
import numpy as np
import etc.zutil as nu
__version__ = "2017-06-20 june denis-bz-py t-online de"
#...............................................................................
def gd_cross0( gradfunc, x0, params, ratefunc, callback=None, verbose=1 ):
""" gd with Nesterov momentum and cross0 """
x = np.copy( x0 )
p = params # defaults: params.py
step = np.zeros_like( x0 )
y = np.zeros_like( x0 )
grad = np.zeros_like( x0 )
print "\n{ gd: |x| |s| |m| |g|_max step = momstep - rate * grad min max x ncross0"
for t in xrange(p.maxiter):
kmom = t / (t + 3) * p.momentum # 0 1/4 2/5 ...
momstep = kmom * step # e.g. 0.9 * prev step
y_ = y.copy() # copy just in case
grad_ = grad.copy()
y = x + momstep
grad = gradfunc( y )
j = (grad * grad_ < 0) # components g < 0 < gprev or g > 0 > gprev
# drop j where grad grad_ are both small, 2-point line wobbly --
np.logical_and( j, np.fabs( grad - grad_ ) > p.gtol, out=j ) # gtol ?
ncross0 = j.sum() # plot
if p.cross0 and (ncross0 > 0):
# y0 = where the line (y, grad) -- (yprev, gradprev) crosses 0
# False_position ? see .md
# use all 3 values, y_ y y0 g_ g g0 ? see exp-cross0.note
# per-comp rates *= 1.1 where same sign, rprop ?
y[j] -= grad[j] * (y[j] - y_[j]) \
/ (grad[j] - grad_[j])
grad = gradfunc( y ) # again
momstep = y - x
#.......................................................................
rate = ratefunc( t ) # e.g. rate0 / t^rate_power
gradstep = - rate * grad
step = momstep + gradstep
x += step
gradstepmax = _max( gradstep )
stepmax = _max( step )
xmax = _max( x )
stop = (t == p.maxiter - 1) or (gradstepmax <= p.gtol) \
or (stepmax <= p.xtol) or (xmax >= p.xlim)
if verbose or stop:
norms = nu.vmap( _max, [x, step, momstep, gradstep] )
print ("%2d |x s m g| %-20s s %-16s\t m %-16s\t g %-16s\t x %s\t %.0d" % (
t, norms,
nu.pvec(step), nu.pvec(momstep), nu.pvec(gradstep), x, ncross0
)).expandtabs(4) # vecs with caller's np.set_printoptions
if callable(callback):
if callback( locals() ):
break
# print the reason for stopping (class Stopiter) --
if t < p.miniter:
continue
if t == p.maxiter - 1:
print "} gd: stopping after %d iterations" % p.maxiter
break
if gradstepmax <= p.gtol:
print "} gd: gradient step is small enough, rate %.3g * |g|_max %.3g <= gtol %.3g" % (
rate, gradstepmax / rate, p.gtol )
break
if stepmax <= p.xtol:
print "} gd: small step, |x - xprev|_max %.3g <= xtol %.3g" % (
stepmax, p.xtol )
break
if xmax >= p.xlim:
print "} gd: x is too big, |x|_max %.3g > xlim %.3g" % (xmax, p.xlim)
break
return nu.Bag( locals() ) # the lot, Bag aka dotdict
def _max( x ):
return np.linalg.norm( x, ord=np.inf )
# from: python test-gd.py tag="2d-lin/2d-lin-rate1.5-mom.9-ripple1-gtol.1-cross0" dim=2 gradfunc="lin" cross0=1 gtol=.1 xtol=0 momentum=.9 rate0=1.5 rate_power=0 ripple=1 save=1
# run: 20 Jun 2017 14:08 in ~bz/py/grad/cross0 denis-imac 10.8.3
--------------------------------------------------------------------------------
test-gd.py tag="2d-lin/2d-lin-rate1.5-mom.9-ripple1-gtol.1-cross0" dim=2 gradfunc="lin" cross0=1 gtol=.1 xtol=0 momentum=.9 rate0=1.5 rate_power=0 ripple=1 save=1
Params --
# lingrad.py --
dim 2
cond 10
b 100
ripple 1
# gd.py --
momentum 0.9
cross0 1
gtol 0.1
maxiter 50
xtol 0
# ratefunc.py: rate0 / t^p
rate0 1.5
rate_power 0
seed 1
Lingrad: [0.1 1] * x - 100 dim 2 ripple 1 ripple_w 1
Ratefunc: constant 1.5
{ gd: |x| |s| |m| |g|_max step = momstep - rate * grad min max x ncross0
0 |x s m g| [150 150 0 150] s [150 150] m [0 0] g [150 150] x [150 150]
1 |x s m g| [220 70 34 36] s [70 -3.5] m [34 -2.4] g [36 -1.1] x [220 146] 1
2 |x s m g| [455 236 25 211] s [236 -24] m [25 -1.3] g [211 -23] x [455 122]
3 |x s m g| [569 114 106 17] s [114 -28] m [106 -11] g [7.7 -17] x [569 94]
4 |x s m g| [628 59 58 4.3] s [59 9.2] m [58 4.9] g [0.053 4.3] x [628 103] 1
5 |x s m g| [762 135 33 102] s [135 -2.8] m [33 -1.7] g [102 -1.1] x [762 100] 1
6 |x s m g| [852 90 81 8.9] s [90 -1.2] m [81 0.57] g [8.9 -1.7] x [852 99] 1
7 |x s m g| [935 83 57 27] s [83 1.1] m [57 0.96] g [27 0.12] x [935 1e+02] 1
8 |x s m g| [990 54 54 0.12] s [54 0.049] m [54 -0.071] g [0.05 0.12] x [990 1e+02] 1
9 |x s m g| [991 0.9 0.19 0.71] s [0.9 0.012] m [0.19 0.033] g [0.71 -0.021] x [991 1e+02] 1
10 |x s m g| [992 1.4 0.62 0.74] s [1.4 0.0076] m [0.62 0.0081] g [0.74 -0.00058] x [992 1e+02]
11 |x s m g| [994 2.5 0.96 1.6] s [2.5 -0.0025] m [0.96 0.0053] g [1.6 -0.0078] x [994 1e+02]
12 |x s m g| [997 2.8 1.8 0.94] s [2.8 0.0049] m [1.8 -0.0018] g [0.94 0.0067] x [997 100]
13 |x s m g| [999 2.1 2 0.053] s [2.1 -0.0052] m [2 0.0036] g [0.053 -0.0087] x [999 1e+02]
} gd: gradient step is small enough, rate 1.5 * |g|_max 0.0351 <= gtol 0.1
Datalogger: saving to 2d-lin/2d-lin-rate1.5-mom.9-ripple1-gtol.1-cross0.npz gradstep momstep ncross0 step x
gradstep: 14 [150 150] .. [0.053 -0.0087]
momstep : 14 [0 0] .. [2 0.0036]
ncross0 : 14 0 .. 0
step : 14 [150 150] .. [2.1 -0.0052]
x : 14 [150 150] .. [999 1e+02]
Params : \n # lingrad.py -- \n dim 2 \n cond 10 \n b 100 \n ripple 1 \n \n # gd.py -- \n momentum 0.9 \n cross0 1 \n gtol 0.1 \n maxiter 50 \n xtol 0 \n \n # ratefunc.py: rate0 / t^p \n rate0 1.5 \n rate_power 0 \n \n seed 1 \n
# from: python test-gd.py tag="2d-lin/2d-lin-rate1.5-mom.9-ripple1-gtol.1" dim=2 gradfunc="lin" cross0=0 gtol=.1 xtol=0 momentum=.9 rate0=1.5 rate_power=0 ripple=1 save=1
# run: 20 Jun 2017 14:08 in ~bz/py/grad/cross0 denis-imac 10.8.3
--------------------------------------------------------------------------------
test-gd.py tag="2d-lin/2d-lin-rate1.5-mom.9-ripple1-gtol.1" dim=2 gradfunc="lin" cross0=0 gtol=.1 xtol=0 momentum=.9 rate0=1.5 rate_power=0 ripple=1 save=1
Params --
# lingrad.py --
dim 2
cond 10
b 100
ripple 1
# gd.py --
momentum 0.9
cross0 0
gtol 0.1
maxiter 50
xtol 0
# ratefunc.py: rate0 / t^p
rate0 1.5
rate_power 0
seed 1
Lingrad: [0.1 1] * x - 100 dim 2 ripple 1 ripple_w 1
Ratefunc: constant 1.5
{ gd: |x| |s| |m| |g|_max step = momstep - rate * grad min max x ncross0
0 |x s m g| [150 150 0 150] s [150 150] m [0 0] g [150 150] x [150 150]
1 |x s m g| [220 70 34 37] s [70 -3] m [34 34] g [36 -37] x [220 147] 1
2 |x s m g| [455 236 25 211] s [236 -40] m [25 -1.1] g [211 -39] x [455 107]
3 |x s m g| [569 114 106 32] s [114 14] m [106 -18] g [7.7 32] x [569 121] 1
4 |x s m g| [628 59 58 26] s [59 -18] m [58 7.3] g [0.053 -26] x [628 102] 1
5 |x s m g| [762 135 33 102] s [135 3.3] m [33 -10] g [102 14] x [762 106] 1
6 |x s m g| [852 90 81 8.9] s [90 1.1] m [81 2] g [8.9 -0.93] x [852 107] 1
7 |x s m g| [935 83 57 27] s [83 -0.29] m [57 0.67] g [27 -0.95] x [935 106]
8 |x s m g| [990 54 54 15] s [54 -16] m [54 -0.19] g [0.05 -15] x [990 91]
9 |x s m g| [1019 35 37 45] s [29 35] m [37 -11] g [-7.6 45] x [1019 126] 2
10 |x s m g| [1031 12 24 17] s [12 7.2] m [20 24] g [-7.8 -17] x [1031 133] 1
11 |x s m g| [1037 24 8.7 29] s [6.3 -24] m [8.7 5.1] g [-2.4 -29] x [1037 109]
12 |x s m g| [1036 17 17 5.8] s [-1.2 -17] m [4.6 -17] g [-5.8 0.068] x [1036 92] 1
13 |x s m g| [1034 25 13 37] s [-2.1 25] m [-0.9 -13] g [-1.2 37] x [1034 116]
14 |x s m g| [1023 76 18 95] s [-11 -76] m [-1.6 18] g [-9.7 -95] x [1023 40] 1
15 |x s m g| [1011 273 57 331] s [-12 273] m [-8.5 -57] g [-3.8 331] x [1011 313] 1
16 |x s m g| [1001 835 207 1042] s [-9.3 -835] m [-9.3 207] g [-0.054 -1042] x [1001 -522] 1
17 |x s m g| [995 114 639 752] s [-6 114] m [-7.1 -639] g [1.1 752] x [995 -408] 2
18 |x s m g| [993 122 88 34] s [-1.8 122] m [-4.7 88] g [2.9 34] x [993 -286]
19 |x s m g| [993 745 95 650] s [-0.46 745] m [-1.4 95] g [0.95 650] x [993 458]
20 |x s m g| [995 59 583 524] s [1.9 59] m [-0.36 583] g [2.2 -524] x [995 518] 1
21 |x s m g| [997 1262 47 1308] s [2.4 -1262] m [1.5 47] g [0.95 -1308] x [997 -744]
22 |x s m g| [2708 3452 999 4451] s [2 3452] m [1.9 -999] g [0.08 4451] x [999 2708] 1
23 |x s m g| [1792 916 2748 3664] s [1.4 -916] m [1.6 2748] g [-0.19 -3664] x [1001 1792] 2
24 |x s m g| [1019 774 733 41] s [1.1 -774] m [1.1 -733] g [-0.074 -41] x [1002 1019]
25 |x s m g| [1002 1185 622 563] s [0.092 -1185] m [0.85 -622] g [-0.75 -563] x [1002 -167]
26 |x s m g| [1002 393 956 563] s [-0.067 -393] m [0.074 -956] g [-0.14 563] x [1002 -559] 1
27 |x s m g| [2053 2612 318 2930] s [-0.078 2612] m [-0.054 -318] g [-0.024 2930] x [1002 2053]
28 |x s m g| [1002 2138 2123 4262] s [-0.067 -2138] m [-0.064 2123] g [-0.0034 -4262] x [1002 -86] 1
29 |x s m g| [2227 2312 1744 4057] s [-0.055 2312] m [-0.055 -1744] g [-1.4e-05 4057] x [1001 2227] 1
30 |x s m g| [6244 8470 1892 10362] s [-0.047 -8470] m [-0.045 1892] g [-0.0025 -10362] x [1001 -6244] 1
31 |x s m g| [3788 2456 6951 9406] s [-0.047 2456] m [-0.039 -6951] g [-0.0083 9406] x [1001 -3788] 1
32 |x s m g| [1001 4684 2021 2663] s [-0.057 4684] m [-0.039 2021] g [-0.019 2663] x [1001 896]
33 |x s m g| [3076 3972 3864 7837] s [-0.086 -3972] m [-0.047 3864] g [-0.038 -7837] x [1001 -3076] 1
34 |x s m g| [1388 4464 3285 7749] s [-0.15 4464] m [-0.071 -3285] g [-0.079 7749] x [1001 1388] 1
35 |x s m g| [9838 11226 3700 14926] s [-0.28 -11226] m [-0.12 3700] g [-0.15 -14926] x [1001 -9838] 1
36 |x s m g| [12587 2749 9326 6578] s [-0.41 -2749] m [-0.23 -9326] g [-0.18 6578] x [1000 -12587] 1
} gd: x is too big, |x|_max 1.26e+04 > xlim 1e+04
Datalogger: saving to 2d-lin/2d-lin-rate1.5-mom.9-ripple1-gtol.1.npz gradstep momstep ncross0 step x
gradstep: 37 [150 150] .. [-0.18 6578]
momstep : 37 [0 0] .. [-0.23 -9326]
ncross0 : 37 0 .. 1
step : 37 [150 150] .. [-0.41 -2749]
x : 37 [150 150] .. [1000 -12587]
Params : \n # lingrad.py -- \n dim 2 \n cond 10 \n b 100 \n ripple 1 \n \n # gd.py -- \n momentum 0.9 \n cross0 0 \n gtol 0.1 \n maxiter 50 \n xtol 0 \n \n # ratefunc.py: rate0 / t^p \n rate0 1.5 \n rate_power 0 \n \n seed 1 \n
#!/usr/bin/env python2
""" linear gradient: Ax - b, A = diag( 1/cond .. 1 )
b 100, cond 10: xmin = b / A = [1000 .. 100]
"""
from __future__ import division
import numpy as np
class Lingrad(object):
def __init__( s, dim=2, b=100, cond=10, ripple=0, ripple_w=1, verbose=1 ):
s.A = np.linspace( 1, 1./cond, dim )[::-1] # dim 1: 1
s.b = b
s.xmin = b / s.A
s.ripple = ripple
s.ripple_w = ripple_w
s.__name__ = "Lingrad: %s * x - %s dim %d ripple %.3g ripple_w %.3g " % (
s.A, b, dim, ripple, ripple_w )
if verbose:
print "\n", s.__name__
def __call__( s, x ):
x = np.asarray_chkfinite( x )
g = s.A * x - s.b
if s.ripple > 0:
g *= (1 + s.ripple * np.sin( np.pi * x / s.ripple_w )) # *= not +=
# if noise > 0:
# g *= (1 + np.random.laplace( scale=noise, size=np.size(x) ))
return g
if __name__ == "__main__":
import sys
plot = 0
ripple = 1
# to change these params, run this.py a=1 b=None 'c = ...' in sh or ipython
for arg in sys.argv[1:]:
exec( arg )
g = Lingrad( dim=1, ripple=ripple )
print "g( [0, 0] );", g( [0, 0] )
gmin = g( g.xmin )
print "g( g.xmin ):", gmin
assert np.allclose( gmin, 0 )
if plot:
# todo: 3d
from matplotlib import pyplot as pl
import seaborn as sns
fig = pl.figure( figsize=[12, 6] )
fig.suptitle( "lingrad ripple %g" % ripple )
ax = fig.gca()
x = np.linspace( 0, 100, 301 )
ax.plot( x, - g(x) )
pl.draw()
#!/usr/bin/env python2
""" params = Params()
# to change the defaults below in sh or ipython: run test.py dim=3 ... in sh or ipython
for arg in sys.argv[1:]:
exec( arg, params.__dict__ )
use params.dim ...
"""
import numpy as np
class Params(object):
# lingrad.py: Ax - b, A = diag( 1/cond .. 1 )
dim = 2
cond = 10 # cond 10, b 100: xmin b/A = [1000 .. 100]
b = 100
ripple = 0 # grad *= (1 + ripple * sin( pi * x ))
# gd.py --
momentum = 0.9
cross0 = True
gtol = .1 # |gradstep|_max = rate * |grad|_max < gtol: quit
maxiter = 50
miniter = 10
xlim = 1e4 # |x|_max > xlim: quit
xtol = 0 # |x - xprev|_max < xtol: quit ?
# ratefunc.py: rate0 / t^p
rate0 = 1
rate_power = 0
seed = 1
def __str__(s):
return """
# lingrad.py --
dim %d
cond %.3g
b %.3g
ripple %.3g
# gd.py --
momentum %.3g
cross0 %s
gtol %.3g
maxiter %d
xtol %.3g
# ratefunc.py: rate0 / t^p
rate0 %.3g
rate_power %.3g
seed %d
""" % (
s.dim, s.cond, s.b, s.ripple, s.momentum, s.cross0, s.gtol, s.maxiter, s.xtol,
s.rate0, s.rate_power, s.seed )
#...............................................................................
if __name__ == "__main__":
p = Params()
print "Params -- \n", p
exec( "rate0=3", p.__dict__ )
# exec( arg in sys.argv, p.__dict__ )
print "p.rate0:", p.rate0
#!/usr/bin/env python2
from __future__ import division
import numpy as np
#...............................................................................
class Ratefunc( object ):
""" ratefunc = Ratefunc( rate0=0.1, power=0 )
rate = ratefunc( t ) aka learning_rate
p == 0: constant rate0 -- default
p != 0: rate0 / t ^ p
NB: t may be step (sample X_i) or minibatch or epoch
"""
# $sklearn/linear_model/stochastic_gradient.py
# optimal: eta = 1.0 / (alpha * (t + t0)) [default]
# where t0 is chosen by a heuristic proposed by Leon Bottou ?
# SGDRegressor power_t=0.25
# SGDClassifier power_t=0.5
#...........................................................................
def __init__( s, rate0=0.1, power=0, verbose=1 ):
s.rate0 = rate0
s.power = power
s.__name__ = s.__str__()
if verbose:
print "\nRatefunc: %s" % s.__name__
def __call__( s, t, *unused ):
if s.power == 0:
return s.rate0
else:
return s.rate0 / max( t, 1 ) ** s.power
def __str__( s ):
if s.power == 0:
return "constant %.3g" % s.rate0
else:
return "%.3g / t^%.3g" % (s.rate0, s.power)
#...............................................................................
if __name__ == "__main__":
np.set_printoptions( threshold=20, edgeitems=20, linewidth=140,
formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g
epo = 50000 // 128 # ~ 400 minibatches per epoch
T = np.arange( epo * 40 )
def ft( f, T=T ):
fepo = np.array([ f(t) for t in T ]) [::epo]
print (fepo * 100) .round().astype(int)
return fepo
for p in [.5, .9]:
fepo = ft( Ratefunc( rate0=1, p=p ))
#!/bin/bash
# run-gd: for ... python test-gd.py > longname.log
# (could of course do this in python -- de gustabus)
trap exit INT QUIT TERM
dims=2 # lin: 3 4 similar
gradfunc=lin
rate_power=0 # rate0 / t^p
ripple=1 # lingrad: (Ax - b) * (1 + ripple * sin( pi x ))
gtol=.1
xtol=0 # small momstep + gradstep far from min ?
save=1
# gradfunc=rosen # no cross0
# rate=.003
[[ $1 ]] &&
export "$@" # args a=1 b=None ... -> py
set -u # no undef vars
#...............................................................................
for dim in $dims
do
dir=${dim}d-$gradfunc
mkdir $dir 2> /dev/null
for rate in 1.5
do
for mom in .9
do
for cross0 in 0 1
do
# good long name for .log .npz --
tag=$dir/$dir-rate$rate-mom$mom-ripple$ripple-gtol$gtol
[[ $xtol != 0 ]] && tag=$tag-xtol$xtol
[[ $cross0 != 0 ]] && tag=$tag-cross0
log=$tag.log
mv $log ~/tmp 2> /dev/null
#...........................................................................
py -from test-gd.py tag=\"$tag\" \
dim=$dim gradfunc=\"$gradfunc\" \
cross0=$cross0 \
gtol=$gtol xtol=$xtol \
momentum=$mom \
rate0=$rate rate_power=$rate_power \
ripple=$ripple \
save=$save \
"$@" \
> $log 2>&1
log-sum $log
done
done
done
done
#!/usr/bin/env python2
""" test-gd.py: gd( gradfunc, x0, params, ratefunc )
"""
from __future__ import division
import sys
import numpy as np
from scipy.optimize import rosen, rosen_der
from etc import zutil as nu
from opt.util import datalogger
from gd_cross0 import gd_cross0 as gd
from lingrad import Lingrad
from params import Params
from ratefunc import Ratefunc
np.set_printoptions( threshold=20, edgeitems=20, linewidth=140,
formatter = dict( float=nu.numstr ))
# formatter = dict( float = lambda x: "%.3g" % x )) # float arrays %.3g
print "\n", 80 * "-"
print " ".join(sys.argv)
#...............................................................................
gradfunc = "lin"
save = 0
tag = "tmp" # > tag.npz
# to change params, run this.py a=1 b=None 'c = ...' in sh or ipython
p = params = Params()
for arg in sys.argv[1:]:
exec( arg )
exec( arg, params.__dict__ ) # both
np.random.seed( params.seed )
print "Params --", params
#...............................................................................
if gradfunc.startswith( "lin" ): # Ax - b, A = diag( 1/cond .. 1 )
gradfunc = Lingrad( dim=p.dim, b=p.b, cond=p.cond, ripple=p.ripple )
elif gradfunc.startswith( "rosen" ):
gradfunc = lambda x: 100 * np.asarray_chkfinite( rosen_der( x / 100 ))
# rate=.01: grads osc
else:
assert 0, gradfunc
# rate0 / t^p
ratefunc = Ratefunc( rate0=p.rate0, power=p.rate_power )
# log these vars inside gd, callback=logger --
logger = datalogger.Datalogger( "x step gradstep momstep ncross0 " )
x0 = np.zeros( p.dim )
#...............................................................................
bag = gd( gradfunc, x0=x0, params=params, ratefunc=ratefunc, callback=logger )
if save:
npz = tag + ".npz"
logger.savez( npz, Params=str(params), verbose=1 )
#!/usr/bin/env python2
""" zutil.py: Bag asum findfile ints numstr vmap """
from __future__ import division
import numpy as np
class Bag( dict ):
""" a dict with d.key short for d["key"], aka Dotdict
d = Bag( k=v ... / **dict / dict.items() / [(k,v) ...] ) just like dict
"""
# np.savez( **bag )
# bag = Bag( np.load( .npz ).items() )
def __init__(self, *args, **kwargs):
dict.__init__( self, *args, **kwargs )
self.__dict__ = self
def __getnewargs__(self): # for cPickle.dump( d, file, protocol=-1)
return tuple(self)
Dotdict = Bag
#...............................................................................
def asum( X ):
""" array summary: "shape dtype min av max [density]" """
if not hasattr( X, "dtype" ) or X.dtype.kind not in "fiu": # float int uint
return str(X)
if hasattr( X, "todense" ): # issparse
sparsetype = type(X).__name__ + " " # csr_matrix etc.
density = " of the %.3g %% non-0" % (
100. * X.nnz / np.prod( X.shape ))
else:
sparsetype = density = ""
dtype = str(X.dtype) .replace( "float64", "" )
return "%s %s%s min av max %.3g %.3g %.3g %s" % (
X.shape, sparsetype, dtype, X.min(), X.mean(), X.max(), density )
def bincount( y ):
return np.bincount( (y - y.min()) .astype(int) )
def copy( x ):
return x if x is None or np.isscalar(x) \
else np.copy( x )
def findfile( filename, dirs="data ../data $SCIKIT_LEARN_DATA $webdata " ):
""" -> first dir/file found in a list of dirs
IOError if none
"""
from os.path import expanduser, expandvars, isfile, join
if isinstance( dirs, basestring ):
dirs = dirs.split()
for dir in [""] + dirs: # "": /abspath ~/name name
dirfile = expanduser( expandvars( join( dir, filename )))
if isfile( dirfile ):
return dirfile
raise IOError( "file \"%s\" not found in folders %s" % (filename, dirs) )
def ints( X ):
return np.round(X).astype(int) # NaN Inf -> - maxint
def minmax( x ):
if np.isscalar(x) or x.ndim == 0:
return "%.3g" % x
return "%-6s %-6s" % (numstr( x.min() ), numstr( x.max() ))
def mkdirpart( filename ):
""" tmp/file: mkdir tmp """
import os
dir = os.path.dirname( filename )
if dir and not os.path.isdir( dir ):
os.makedirs( dir )
def numstr( x, fmt="%.2g" ):
""" %.2g but 123 not 1.2e+02 """
# np.set_printoptions( ... formatter = dict( float=ut.numstr ))
if isinstance( x, basestring ) or x is None:
return x
if np.isscalar(x):
if abs(x) < 1e-10: return "0"
if abs(x) < 1e-6: return "%.1g" % x
if 100 <= abs(x) < 1e5: return "%.0f" % x
return fmt % x
x = np.asanyarray(x)
assert x.ndim == 1, x.ndim # NotImplemented
return " ".join([ numstr( _, fmt ) for _ in x ])
def numstr3( x ):
return numstr( x, fmt="%.3g" )
_pvec = 3
def pvec( x ):
""" -> str(x) if short / min max if long """
if np.isscalar(x) or x.ndim == 0:
return "%.3g" % x
return str(x) if len(x) <= _pvec \
else minmax( x )
def vmap( f, X ):
return np.vstack( map( f, X )) .squeeze()
def From( pyfile ):
pyfile = pyfile.split("/")[-1]
return "from %s run in %s %s" % (pyfile, pwd(), isoday())
def isoday():
import time
return time.strftime( "%Y-%m-%d %h %H:%M" ) # 2011-11-15 Nov 12:06
def pwd():
import os
return os.getenv("PWD") .replace( "/Users/", "~", 1 ) # mac -L not -P
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment