Created
May 30, 2019 21:33
-
-
Save viswanathgs/a4a4cde2bc958678d664c96b6a154f75 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from mpl_toolkits.mplot3d import Axes3D | |
# Axes3D import has side effects, it enables using projection='3d' in add_subplot | |
import matplotlib.pyplot as plt | |
import random | |
import copy | |
# From https://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf | |
# | |
# Linear equations: | |
# 3x + 2y = 2 | |
# 2x + 6y = -8 | |
# | |
# Ax = b: | |
# A = [[3, 2], [2, 6]] | |
# b = [2, -8]' | |
A = np.array([[3, 2], [2, 6]], dtype=np.float32) | |
b = np.array([2, -8], dtype=np.float32).reshape(2, 1) | |
# Quadratic form: | |
# f(x) = 1/2 * x' * A * 2 - b' * x + c | |
# f'(x) = Ax - b | |
# | |
# f(x, y) = 1.5x^2 + 2xy + 3y^2 -2x + 8y | |
def f(x, y): | |
return 1.5 * x**2 + 2 * x * y + 3 * y**2 - 2 * x + 8 * y | |
# Method of steepest descent: | |
# r{i} = b - Ax{i} | |
# alpha{i} = r'{i}.r{i} / r'{i}.A.r{i} | |
# x{i+1} = x{i} + alpha{i}.r{i} | |
def steepest_descent(f, A, b, eps=1e-3, max_iters=10000): | |
x = np.array([-2, -2], dtype=np.float32).reshape(2, 1) | |
trace = [] | |
i = 0 | |
while i < max_iters: | |
trace += [(x[0, 0], x[1, 0])] | |
r = b - np.dot(A, x) | |
residual = np.linalg.norm(r) | |
print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual)) | |
if residual < eps: | |
print("Converged after {} iters".format(i)) | |
return trace | |
rt = np.transpose(r) | |
alpha = np.dot(rt, r) / np.dot(rt, np.dot(A, r)) | |
x += alpha * r | |
i += 1 | |
return trace | |
# Method 2 (one instead of two matrix-vector products): | |
# Instead of r{i} = b - Ax{i} (for i = 1, 2, ...), use | |
# r{i+1} = r{i} - alpha{i}.A.r{i} | |
# | |
# Avoids an additional A.x{i} product. | |
def steepest_descent_fast(f, A, b, eps=1e-3, max_iters=10000): | |
x = np.array([-2, -2], dtype=np.float32).reshape(2, 1) | |
r = b - np.dot(A, x) | |
trace = [] | |
i = 0 | |
while i < max_iters: | |
trace += [(x[0, 0], x[1, 0])] | |
residual = np.linalg.norm(r) | |
print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual)) | |
if residual < eps: | |
print("Converged after {} iters".format(i)) | |
return trace | |
Ar = np.dot(A, r) | |
rt = np.transpose(r) | |
alpha = np.dot(rt, r) / np.dot(rt, Ar) | |
x += alpha * r | |
r -= alpha * Ar | |
i += 1 | |
return trace | |
def conjugate_gradient(f, A, b): | |
x = np.array([-2, -2], dtype=np.float32).reshape(2, 1) | |
r = b - np.dot(A, x) | |
d = copy.deepcopy(r) | |
rt = np.transpose(r) | |
dt = np.transpose(d) | |
trace = [(x[0, 0], x[1, 0])] | |
residual = np.linalg.norm(r) | |
print("Iter 0, x=({:.2f}, {:.2f}), residual={}".format(x[0, 0], x[1, 0], residual)) | |
for i in range(1, 3): | |
Ad = np.dot(A, d) | |
alpha = np.dot(rt, r) / np.dot(dt, Ad) | |
x += alpha * d | |
prev_rdot = np.dot(rt, r) | |
r -= alpha * Ad | |
rt = np.transpose(r) | |
residual = np.linalg.norm(r) | |
print("Iter {}, x=({:.2f}, {:.2f}), residual={}".format(i, x[0, 0], x[1, 0], residual)) | |
beta = np.dot(rt, r) / prev_rdot | |
d = r + beta * d | |
dt = np.transpose(d) | |
trace += [(x[0, 0], x[1, 0])] | |
return trace | |
def plot(f, sd_trace, cg_trace): | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
x = y = np.arange(-4.0, 4.0, 0.05) | |
X, Y = np.meshgrid(x, y) | |
zs = np.array(f(np.ravel(X), np.ravel(Y))) | |
Z = zs.reshape(X.shape) | |
# Plot f(x, y) | |
ax.plot_surface(X, Y, Z, alpha=0.3) | |
# Plot the convergence traces | |
for trace, color in [(sd_trace, 'ro-'), (cg_trace, 'bo-')]: | |
xs = [x for (x, y) in trace] | |
ys = [y for (x, y) in trace] | |
zs = [f(x, y) for (x, y) in trace] | |
for i in range(len(trace) - 1): | |
ax.plot(xs[i:i+2], ys[i:i+2], zs[i:i+2], color, alpha=0.5) | |
ax.set_xlabel('X Label') | |
ax.set_ylabel('Y Label') | |
ax.set_zlabel('Z Label') | |
plt.show() | |
if __name__ == '__main__': | |
print('Steepest Descent:') | |
sd_trace = steepest_descent_fast(f, A, b) | |
print('Conjugate Gradients:') | |
cg_trace = conjugate_gradient(f, A, b) | |
plot(f, sd_trace, cg_trace) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment