Skip to content

Instantly share code, notes, and snippets.

@dannyko
Last active August 29, 2015 14:03
Show Gist options
  • Save dannyko/ffe9653768cb80dfc0da to your computer and use it in GitHub Desktop.
Save dannyko/ffe9653768cb80dfc0da to your computer and use it in GitHub Desktop.
Newton-Raphson Visualization (1D)

Newton's method solves quadratic optimization problems in one step because the derivative of a parabola is a straight line, so every linear fit provides an exact approximation. To make things a little more interesting, this visualization combines a trigonometric cosine function in such a way that the global minimizer occurs at x = 0 regardless of the weight of the cosine term. The extra cosine term provides a convenient expression that creates nonlinear structure in the derivative, to better reveal some of the interesting properties of the algorithm.

The upper plot shows the objective function as a blue line with arbitrary units on the y-axis. Background colors indicate the percentage of failures to converge within 10 steps in the neighborhood of a given initial position. Green indicates low failure percentage, while dark red indicates high failure percentage. The lower plot shows the objective function's derivative, a straight line combined with a trigonometric sine function.

Clicking either plot selects the position of the initial position and starts the algorithm running, providing a step-by-step visualization of each iteration. The local quadratic approximation at each position momentarily appear along with the minimum of the local quadratic approximation, before the animation moving the point kicks in.

The gray lines show where the "undamped" algorithm would move, and the red lines show where the "damped" algorithm moves. When damping = 1 (default), gray and red lines coincide; otherwise, the red line's position will be proportionally scaled to reduce the standard step size by some fractional amount. Adjusting the top slider on the right affects the convergence parameters for the algorithm. The top-most slider sets the zero-tolerance for the magnitude of the derivative (i.e. norm of the gradient). If the gradient norm goes below this tolerance of zero, the algorithm converges.

The middle slider sets the damping coefficient for the step length. The damped Newton's method is perhaps the simplest possible modification to the original method that can increase its radius of convergence for objective functions having nonlinear derivatives.

Moving the bottom slider changes the weight of the cosine term. Notice that with the slider all the way left, the cosine term disappears, and the algorithm never fails to converge (all green), as expected for purely quadratic objectives.

If you want to read more, here's the full post on LinkedIn.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {
font: 13pt courier;
font-weight: bold;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.blue_line {
fill: none;
stroke: steelblue;
stroke-width: 3px;
}
.green_line {
fill: none;
stroke: #090;
stroke-width: 3px;
}
.red_line {
fill: none;
stroke: #900;
stroke-width: 3px;
}
.gray_line {
fill: none;
stroke: #999;
stroke-width: 3px;
}
.black_line {
fill: none;
stroke: #000;
stroke-width: 3px;
}
div {
position: relative;
}
input {
position: absolute;
left: 960px;
}
</style>
</head>
<body>
<div>
<input min="1e-4" max="1" step="1e-4" type="range" id="toleranceSlider" value=".01" title="zero tolerance" autocomplete="off">
<br/>
<input min="0.1" max="1" step="0.01" type="range" id="dampingSlider" value="1" title="damping coefficient" autocomplete="off">
<br/>
<input min="0" max="3" step="0.01" type="range" id="cosineSlider" value="1" title="cosine-term coefficient" autocomplete="off">
</div>
<script src="http://d3js.org/d3.v3.js"></script>
<script>
var margin = {top: 20, right: 20, bottom: 60, left: 100},
width = 900 - margin.left - margin.right,
height = 360 - margin.top - margin.bottom;
var xMin = -6 ;
var dx = 0.1 ;
var xMax = -xMin + 1.01 * dx ;
var N = Math.floor((xMax - xMin) / dx) ;
var xi = 0 ;
var dur = 667 ;
var running = false ; // initialize
var x = d3.scale.linear()
.range([0, width]);
var y = d3.scale.linear()
.range([height, 0]) ;
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom") ;
var yAxis = d3.svg.axis()
.scale(y)
.orient("left") ;
var line = d3.svg.line()
.x(function(d) { return x(d.x); })
.y(function(d) { return y(d.y) + 0.5 ; }) ;
var objective_function = function(x0) {
return 0.5 * x0 * x0 - cosineCoefficient * Math.cos(x0) ;
} ;
var gradient_function = function(x0) {
return x0 + cosineCoefficient * Math.sin(x0) ;
}
var hessian_function = function(x0) {
return 1 + cosineCoefficient * Math.cos(x0) ;
}
var set_data = function(d, f0) {
for(var i = 0 ; i < N ; i++) {
xi = xMin + i * dx ;
d[i] = {x: xi, y: f0(xi)} ;
}
} ;
var tolSlider = d3.select("#toleranceSlider")
.on("input", function() {
tol = +this.value;
tolText.text('zero tolerance: ' + tol) ;
colorize() ;
});
var dampingSlider = d3.select("#dampingSlider")
.on("input", function() {
damping = +this.value;
dampingText.text('damping: ' + damping) ;
colorize() ;
});
var cosineSlider = d3.select("#cosineSlider")
.on("input", function() {
cosineCoefficient = +this.value;
cosineText.text('a: ' + cosineCoefficient) ;
init_data() ;
path
.datum(data)
.attr("d", line) ;
path2
.datum(ddata)
.attr("d", line2) ;
var circ = d3.select('circle') ;
if( !circ.empty() ) {
circ.attr("cy", y(objective_function(x.invert(circ.attr("cx"))))) ;
}
colorize() ;
});
var tol = +tolSlider.attr('value') ; // initialize
var damping = +dampingSlider.attr('value') ; // initialize
var cosineCoefficient = +cosineSlider.attr('value') ; // initialize
var data = [] ; // initialize data
var ddata = [] ; // initialize derivative data
var init_data = function() {
set_data(data, objective_function) ;
set_data(ddata, gradient_function) ;
var pad = 1 ;
x.domain(d3.extent(data, function(d) { return d.x; }));
var yDom = d3.extent(data, function(d) { return d.y === 0 ? -pad : d.y / Math.abs(d.y) * pad + d.y; }) ;
y.domain(yDom);
}
init_data() ;
// optimization on objective function:
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom) ;
d3.select(document.body).append("br")
var g = svg
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var bgLine = [] ;
var bgOpacity = 1/3 ;
var bgGroup = g.insert("g") ;
for(var k = 0 ; k < width ; k++) {
bgLine[k] = bgGroup
.append("path")
.style("opacity", bgOpacity)
.attr("class", "bgLine")
}
var tolText = g
.append("text")
.text("zero tolerance: " + tol)
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "12pt")
.attr("x", "340")
.attr("y", "-8")
.attr('font-family', 'courier')
var dampingText = g
.append("text")
.text("damping: " + damping)
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "12pt")
.attr("x", "580")
.attr("y", "-8")
.attr('font-family', 'courier')
var cosineText = g
.append("text")
.text("a: " + cosineCoefficient)
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "12pt")
.attr("x", "730")
.attr("y", "-8")
.attr('font-family', 'courier')
g.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
// .call(xAxis)
// .append("text")
// .attr("y", 6)
//.attr("dy", 50)
// .attr("dx", width / 2)
// .text("x");
g.append("g")
.attr("class", "y axis")
//.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", -40)
.attr("dx", -height / 2)
.text("f");
var path = g.append("path")
.datum(data)
.attr("class", "blue_line")
.attr("d", line);
var r = 5 ;
var title = g
.append("text")
.text("f (x) = x * x / 2 - a * cos(x)")
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "13pt")
.attr("x", 20)
.attr("y", -8)
.attr('font-family', 'courier') ;
var info = g
.append("text")
.text("")
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "12pt")
.attr("x", width * 0.1)
.attr("y", "20")
.attr('font-family', 'courier') ;
var hoverText = g
.append("text")
.text("")
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "12pt")
.attr("y", "20")
.attr('font-family', 'courier') ;
var hoverText2 = g
.append("text")
.text("")
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "12pt")
.attr("y", "46")
.attr('font-family', 'courier') ;
var update_text = function(txt, callback) {
info
.transition()
.duration(dur / 2)
.ease('linear')
.style('opacity', 0)
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 1)
.text(txt)
.each('end', callback)
} ;
var draw_circle = function(xy) {
var circ = g.append("circle")
.attr("cx", xy[0])
.attr("cy", xy[1])
.attr("r", r) ;
return circ ;
}
var draw_line = function(element, d) {
element
.datum(d)
.attr("d", line)
.style('opacity', 0)
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 1)
.transition()
.duration(dur)
.ease('linear')
.delay(dur)
.style('opacity', 0) ;
} ;
var draw_vline = function(element, x0) {
d = [{x: x0, y: y.domain()[0]}, {x: x0, y: y.domain()[1]}] ;
draw_line(element, d) ;
} ;
var parabola = g.append("path").attr("class", "green_line").style("stroke-dasharray", ("20, 10")) ;
var redLine = g.append("path").attr("class", "red_line").style("stroke-dasharray", ("2, 2")) ;
var grayLine = g.append("path").attr("class", "gray_line").style("stroke-dasharray", ("10, 10")) ;
var hoverLine = g.append("path").attr("class", "black_line").style("stroke-dasharray", ("5, 5")) ;
var quadratic_approx = function(x0, xj, h) {
var data0 = [] ; // initialize
var f0 = function(x1) { // quadratic approximation function
var dx = x1 - x0 ;
return objective_function(x0) + gradient_function(x0) * dx + 0.5 * hessian_function(x0) * dx * dx ;
}
set_data(data0, f0) ; // compute discrete sample of quadratic approximant
draw_line(parabola, data0) ; // draw the quadratic approximant
draw_vline(redLine, xj) ; // draw the location of the damped Newton-Raphson step
draw_vline(grayLine, x0 + h) ; // draw the location of the undamped Newton-Raphson step (the minimizer of the quadratic approximant)
}
// root finding on gradient function:
var height2 = height * 0.75 ;
var y2 = d3.scale.linear()
.range([height2, 0]) ;
var line2 = d3.svg.line()
.x(function(d) { return x(d.x); })
.y(function(d) { return y2(d.y) + 0.5; }) ;
var svg2 = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height2 + margin.top + margin.bottom) ;
var g2 = svg2
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")") ;
y2.domain(d3.extent(ddata, function(d) { return d.y; })) ;
g2.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height2 + ")")
.call(xAxis)
.append("text")
.attr("y", 6)
.attr("dy", 50)
.attr("dx", width / 2)
.text("x");
var yAxis2 = d3.svg.axis()
.scale(y2)
.orient("left") ;
g2.append("g")
.attr("class", "y axis")
//.call(yAxis2)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", -40)
.attr("dx", -height2 / 2 - 6)
.text("0");
var title2 = g2
.append("text")
.text("f'(x) = x + a * sin(x)")
.attr("stroke", "none")
.attr("fill", "black")
.attr("font-size", "13pt")
.attr("x", 20)
.attr("y", -8)
.attr('font-family', 'courier')
var draw_circle2 = function(xy) {
var circ = g2.append("circle")
.attr("cx", xy[0])
.attr("cy", xy[1])
.attr("r", r) ;
return circ ;
}
var draw_line2 = function(element, d) {
element
.datum(d)
.attr("d", line2)
.style('opacity', 0)
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 1)
.transition()
.duration(dur)
.ease('linear')
.delay(dur)
.style('opacity', 0) ;
} ;
var draw_vline2 = function(element, x0) {
d = [{x: x0, y: y2.domain()[0]}, {x: x0, y: y2.domain()[1]}] ;
draw_line2(element, d) ;
} ;
var path2 = g2.append("path")
.datum(ddata)
.attr("class", "black_line")
.attr("d", line2);
var linearization = g2.append("path").attr("class", "green_line").style("stroke-dasharray", ("20, 10")) ;
var redLine2 = g2.append("path").attr("class", "red_line").style("stroke-dasharray", ("2, 2")) ;
var grayLine2 = g2.append("path").attr("class", "gray_line").style("stroke-dasharray", ("10, 10")) ;
var hoverLine2 = g2.append("path").attr("class", "black_line").style("stroke-dasharray", ("5, 5")) ;
var zpath = g2 // zero line
.append("path")
.attr("class", "gray_line")
.style("stroke-dasharray", ("2, 2"))
.datum([{x: x.domain()[0], y: 0}, {x: x.domain()[1], y: 0}])
.attr("d", line2) ;
var linear_approx = function(x0, xj, h) {
var data0 = [] ; // initialize
var f0 = function(x1) { // linear approximation function (linearization)
var dx = x1 - x0 ;
return gradient_function(x0) + hessian_function(x0) * dx ;
}
set_data(data0, f0) ; // compute discrete sample of quadratic approximant
draw_line2(linearization, data0) ; // draw the quadratic approximant
draw_vline2(redLine2, xj) ; // draw the location of the damped Newton-Raphson step
draw_vline2(grayLine2, x0 + h) ; // draw the location of the undamped Newton-Raphson step (the minimizer of the quadratic approximant)
}
// nuts and bolts:
var newton_iteration = function(xi, damping0) {
if (damping === undefined) damping0 = 1 ; // default is no damping
var h = -gradient_function(xi) / hessian_function(xi) ;
var xj = xi + damping0 * h ;
return xj ;
}
var gradient_norm = function(xi) {
return Math.abs(gradient_function(xi)) ;
}
var newton_step = function(circle, tol, maxIter, count) {
var xi = x.invert(circle.attr("cx")) ;
var xj = newton_iteration(xi, damping) ;
var h = (xj - xi) / damping ; // somewhat wasteful to recompute h
var gradNorm = gradient_norm(xj) ; // norm (i.e. length or magnitude) of the gradient function (derivative)
update_text('x_' + (count + 1) + ' = ' + xj + ', gradient norm: ' + gradNorm)
// draw the quadratic Taylor approximation of the objective function centered at xi:
linear_approx(xi, xj, h) ;
quadratic_approx(xi, xj, h) ;
// animate the step:
circle
.transition()
.duration(dur)
.ease('linear')
.attr('fill', '#060')
.transition()
.duration(dur)
.ease('linear')
.attr("cx", x(xj))
.attrTween("cy", function() {
return function(t) {
var dx = xj - xi ;
var x0 = xi + t * dx ;
return y(objective_function(x0)) ;
} ;
})
.transition()
.duration(dur)
.ease('linear')
.attr('fill', '#000')
.each('end', function() {
var infoFade = function() {
info
.transition()
.duration(dur)
.delay(dur)
.ease('linear')
.style('opacity', 0)
} ;
if(gradNorm <= tol) {
update_text('converged in ' + (count + 1) + ' steps, final position: ' + xj, infoFade) ;
bgGroup
.transition()
.delay(4 * dur)
.duration(dur)
.ease('linear')
.style('opacity', 1) ;
running = false ;
count = -1 ;
circle
.transition()
.duration(dur)
.ease('linear')
.style('fill', 'steelblue')
} else {
count++ ;
}
if(count < maxIter && count > 0) {
newton_step(circle, tol, maxIter, count)
} else if(count == maxIter) {
update_text('failed to converge in ' + count + ' steps, final position: ' + xj, infoFade) ;
bgGroup
.transition()
.delay(4 * dur)
.duration(dur)
.ease('linear')
.style('opacity', 1) ;
running = false ;
circle
.transition()
.duration(dur)
.ease('linear')
.style('fill', 'steelblue')
}
}) ;
}
var run_newton = function(xy) {
if(running) {
return false ; // do nothing if already running
}
running = true ; // to prevent multiple visualizations from activating simultaneously
bgGroup
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 1/5)
var maxIter = 10 ;
var count = 0 ;
update_text('x_0 = ' + x.invert(xy[0])) ;
var circ = d3.select('circle') ;
var circle = draw_circle(xy) ;
if(circ.empty()) {
setTimeout(function() { newton_step(circle, tol, maxIter, count) }, 2 * dur) ;
} else { // remove existing circle before drawing new circle
circ.transition()
.duration(dur)
.delay(dur)
.ease('linear')
.style('opacity', 0)
.each('end', function() {
newton_step(circle, tol, maxIter, count) ;
})
.remove()
}
} ;
var click_function = function() {
var xy = d3.mouse(g.node()) ;
xy[1] = y(objective_function(x.invert(xy[0]))) ;
run_newton(xy) ;
} ;
svg.on('click', click_function) ;
svg2.on('click', click_function) ;
var mouse_function = function() {
if(bgGroup.style('opacity') < 1) return ; // do nothing if animation is running
var xy = d3.mouse(g.node()) ;
prec = 5 ;
if(xy[0] < 0) xy[0] = 0 ;
if(xy[0] > width) xy[0] = width ;
var xi = x.invert(xy[0]).toPrecision(prec) ;
draw_vline(hoverLine, xi) ;
draw_vline2(hoverLine2, xi) ; // draw the location of the damped Newton-Raphson step
var dx = jump / Navg ; // grid spacing
var Niter = Math.round(grid_average(xy[0], Navg, dx))
var xpos = xy[0] + 5 ;
if(xi > 0) xpos -= 140 ;
hoverText
.attr("x", xpos)
.text("x_0 = " + xi)
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 1)
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 0) ;
hoverText2
.attr("x", xpos)
.text(Niter + '% failure')
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 1)
.transition()
.duration(dur)
.ease('linear')
.style('opacity', 0) ;
}
svg.on('mouseover', mouse_function) ;
//svg2.on('mouseover', mouse_function) ;
var maxIterTest = 10 ;
var grid_average = function(k, Ngrid, dx) {
var Nk = 0 ; // initialize
for(var kg = 0 ; kg < Ngrid ; kg++) {
var xi = x.invert(k - Ngrid * dx * 0.5 + kg * dx) ;
var j = 0 ;
while( j++ < maxIterTest && gradient_norm(xi) >= tol) {
xi = newton_iteration(xi, damping) ;
}
if(j > maxIterTest) {
Nk++ ;
}
}
Nk /= Ngrid ;
Nk *= 100 ;
return Nk ;
}
var bgColor ;
var jump = 20 ;
var Navg = 300 ;
var convergence_test = function() {
var Niter = [] ; // initialize
var grid = [] ; // initialize
var dx = jump / Navg ; // grid spacing
var counter = 0 ;
var maxN = 0 ;
for(var k = 0.5 * jump ; k < width ; k += jump) {
Niter[counter] = grid_average(k, Navg, dx) ; // average number of iterations over a sub-integer grid
if(Niter[counter] > maxN) maxN = Niter[counter] ;
grid[counter] = k ;
counter++ ;
}
var iteration_count = d3
.scale
.linear()
.domain(grid)
.range(Niter) ;
bgColor = d3.scale.quantize()
.domain([0, 20, 40, 60, 100])
.range(["rgb(26, 152, 80)", "rgb(166, 217, 106)", "rgb(253, 174, 97)", "rgb(215, 48, 39)", "rgb(70, 13, 0)"]);
return iteration_count ;
} ;
var colorize = function() {
var iteration_count = convergence_test() ;
for(var k = 0 ; k < width ; k++) {
d = [{x: x.invert(k), y: y.domain()[0]}, {x: x.invert(k), y: y.domain()[1]}] ;
var clr = bgColor(iteration_count(k)) ;
bgLine[k]
.datum(d)
.attr("d", line)
.style("stroke-width", 3)
.style("stroke", clr) ;
}
}
colorize() ;
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment