viswanathgs/conjugate_gradient.py

## conjugate_gradient.py
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
# Axes3D import has side effects, it enables using projection='3d' in add_subplot
import matplotlib.pyplot as plt
import random
import copy

# From https://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf
#
# Linear equations:
#  3x + 2y = 2
#  2x + 6y = -8
#
# Ax = b:
#  A = [[3, 2], [2, 6]]
#  b = [2, -8]'
A = np.array([[3, 2], [2, 6]], dtype=np.float32)
b = np.array([2, -8], dtype=np.float32).reshape(2, 1)

# Quadratic form:
#   f(x) = 1/2 * x' * A * 2 - b' * x + c
#   f'(x) = Ax - b
#
#   f(x, y) = 1.5x^2 + 2xy + 3y^2 -2x + 8y
def f(x, y):
    return 1.5 * x**2 + 2 * x * y + 3 * y**2 - 2 * x + 8 * y

# Method of steepest descent:
#  r{i} = b - Ax{i}
#  alpha{i} = r'{i}.r{i} / r'{i}.A.r{i}
#  x{i+1} = x{i} + alpha{i}.r{i}
def steepest_descent(f, A, b, eps=1e-3, max_iters=10000):
    x = np.array([-2, -2], dtype=np.float32).reshape(2, 1)
    trace = []
    i = 0
    while i < max_iters:
        trace += [(x[0, 0], x[1, 0])]
        r = b - np.dot(A, x)
        residual = np.linalg.norm(r)
        print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual))
        if residual < eps:
            print("Converged after {} iters".format(i))
            return trace

        rt = np.transpose(r)
        alpha = np.dot(rt, r) / np.dot(rt, np.dot(A, r))
        x += alpha * r
        i += 1

    return trace

# Method 2 (one instead of two matrix-vector products):
#  Instead of r{i} = b - Ax{i} (for i = 1, 2, ...), use
#  r{i+1} = r{i} - alpha{i}.A.r{i}
#
#  Avoids an additional A.x{i} product.
def steepest_descent_fast(f, A, b, eps=1e-3, max_iters=10000):
    x = np.array([-2, -2], dtype=np.float32).reshape(2, 1)
    r = b - np.dot(A, x)
    trace = []
    i = 0
    while i < max_iters:
        trace += [(x[0, 0], x[1, 0])]
        residual = np.linalg.norm(r)
        print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual))
        if residual < eps:
            print("Converged after {} iters".format(i))
            return trace

        Ar = np.dot(A, r)
        rt = np.transpose(r)
        alpha = np.dot(rt, r) / np.dot(rt, Ar)
        x += alpha * r
        r -= alpha * Ar
        i += 1

    return trace

def conjugate_gradient(f, A, b):
    x = np.array([-2, -2], dtype=np.float32).reshape(2, 1)
    r = b - np.dot(A, x)
    d = copy.deepcopy(r)
    rt = np.transpose(r)
    dt = np.transpose(d)
    trace = [(x[0, 0], x[1, 0])]
    residual = np.linalg.norm(r)
    print("Iter 0, x=({:.2f}, {:.2f}), residual={}".format(x[0, 0], x[1, 0], residual))

    for i in range(1, 3):
        Ad = np.dot(A, d)
        alpha = np.dot(rt, r) / np.dot(dt, Ad)
        x += alpha * d
        prev_rdot = np.dot(rt, r)
        r -= alpha * Ad
        rt = np.transpose(r)
        residual = np.linalg.norm(r)
        print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual))

        beta = np.dot(rt, r) / prev_rdot
        d = r + beta * d
        dt = np.transpose(d)
        trace += [(x[0, 0], x[1, 0])]

    return trace

def plot(f, sd_trace, cg_trace):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    x = y = np.arange(-4.0, 4.0, 0.05)
    X, Y = np.meshgrid(x, y)
    zs = np.array(f(np.ravel(X), np.ravel(Y)))
    Z = zs.reshape(X.shape)

    # Plot f(x, y)
    ax.plot_surface(X, Y, Z, alpha=0.3)

    # Plot the convergence traces
    for trace, color in [(sd_trace, 'ro-'), (cg_trace, 'bo-')]:
        xs = [x for (x, y) in trace]
        ys = [y for (x, y) in trace]
        zs = [f(x, y) for (x, y) in trace]
        for i in range(len(trace) - 1):
            ax.plot(xs[i:i+2], ys[i:i+2], zs[i:i+2], color, alpha=0.5)

    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')

    plt.show()

if __name__ == '__main__':
    print('Steepest Descent:')
    sd_trace = steepest_descent_fast(f, A, b)
    print('Conjugate Gradients:')
    cg_trace = conjugate_gradient(f, A, b)
    plot(f, sd_trace, cg_trace)
	import numpy as np
	from mpl_toolkits.mplot3d import Axes3D
	# Axes3D import has side effects, it enables using projection='3d' in add_subplot
	import matplotlib.pyplot as plt
	import random
	import copy

	# From https://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf
	#
	# Linear equations:
	# 3x + 2y = 2
	# 2x + 6y = -8
	#
	# Ax = b:
	# A = [[3, 2], [2, 6]]
	# b = [2, -8]'
	A = np.array([[3, 2], [2, 6]], dtype=np.float32)
	b = np.array([2, -8], dtype=np.float32).reshape(2, 1)

	# Quadratic form:
	# f(x) = 1/2 * x' * A * 2 - b' * x + c
	# f'(x) = Ax - b
	#
	# f(x, y) = 1.5x^2 + 2xy + 3y^2 -2x + 8y
	def f(x, y):
	return 1.5 * x*2 + 2 x * y + 3 * y*2 - 2 x + 8 * y

	# Method of steepest descent:
	# r{i} = b - Ax{i}
	# alpha{i} = r'{i}.r{i} / r'{i}.A.r{i}
	# x{i+1} = x{i} + alpha{i}.r{i}
	def steepest_descent(f, A, b, eps=1e-3, max_iters=10000):
	x = np.array([-2, -2], dtype=np.float32).reshape(2, 1)
	trace = []
	i = 0
	while i < max_iters:
	trace += [(x[0, 0], x[1, 0])]
	r = b - np.dot(A, x)
	residual = np.linalg.norm(r)
	print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual))
	if residual < eps:
	print("Converged after {} iters".format(i))
	return trace

	rt = np.transpose(r)
	alpha = np.dot(rt, r) / np.dot(rt, np.dot(A, r))
	x += alpha * r
	i += 1

	return trace

	# Method 2 (one instead of two matrix-vector products):
	# Instead of r{i} = b - Ax{i} (for i = 1, 2, ...), use
	# r{i+1} = r{i} - alpha{i}.A.r{i}
	#
	# Avoids an additional A.x{i} product.
	def steepest_descent_fast(f, A, b, eps=1e-3, max_iters=10000):
	x = np.array([-2, -2], dtype=np.float32).reshape(2, 1)
	r = b - np.dot(A, x)
	trace = []
	i = 0
	while i < max_iters:
	trace += [(x[0, 0], x[1, 0])]
	residual = np.linalg.norm(r)
	print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual))
	if residual < eps:
	print("Converged after {} iters".format(i))
	return trace

	Ar = np.dot(A, r)
	rt = np.transpose(r)
	alpha = np.dot(rt, r) / np.dot(rt, Ar)
	x += alpha * r
	r -= alpha * Ar
	i += 1

	return trace

	def conjugate_gradient(f, A, b):
	x = np.array([-2, -2], dtype=np.float32).reshape(2, 1)
	r = b - np.dot(A, x)
	d = copy.deepcopy(r)
	rt = np.transpose(r)
	dt = np.transpose(d)
	trace = [(x[0, 0], x[1, 0])]
	residual = np.linalg.norm(r)
	print("Iter 0, x=({:.2f}, {:.2f}), residual={}".format(x[0, 0], x[1, 0], residual))

	for i in range(1, 3):
	Ad = np.dot(A, d)
	alpha = np.dot(rt, r) / np.dot(dt, Ad)
	x += alpha * d
	prev_rdot = np.dot(rt, r)
	r -= alpha * Ad
	rt = np.transpose(r)
	residual = np.linalg.norm(r)
	print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual))

	beta = np.dot(rt, r) / prev_rdot
	d = r + beta * d
	dt = np.transpose(d)
	trace += [(x[0, 0], x[1, 0])]

	return trace

	def plot(f, sd_trace, cg_trace):
	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	x = y = np.arange(-4.0, 4.0, 0.05)
	X, Y = np.meshgrid(x, y)
	zs = np.array(f(np.ravel(X), np.ravel(Y)))
	Z = zs.reshape(X.shape)

	# Plot f(x, y)
	ax.plot_surface(X, Y, Z, alpha=0.3)

	# Plot the convergence traces
	for trace, color in [(sd_trace, 'ro-'), (cg_trace, 'bo-')]:
	xs = [x for (x, y) in trace]
	ys = [y for (x, y) in trace]
	zs = [f(x, y) for (x, y) in trace]
	for i in range(len(trace) - 1):
	ax.plot(xs[i:i+2], ys[i:i+2], zs[i:i+2], color, alpha=0.5)

	ax.set_xlabel('X Label')
	ax.set_ylabel('Y Label')
	ax.set_zlabel('Z Label')

	plt.show()

	if __name__ == '__main__':
	print('Steepest Descent:')
	sd_trace = steepest_descent_fast(f, A, b)
	print('Conjugate Gradients:')
	cg_trace = conjugate_gradient(f, A, b)
	plot(f, sd_trace, cg_trace)