Last active
July 6, 2022 05:35
-
-
Save felipessalvatore/c2e1c09dfcb8710b847e2457620f8204 to your computer and use it in GitHub Desktop.
Plotting a 3d image of gradient descent in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#code adapted from http://tillbergmann.com/blog/python-gradient-descent.html | |
%matplotlib inline | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import matplotlib.animation as animation | |
from scipy import stats | |
from sklearn.datasets.samples_generator import make_regression | |
x, y = make_regression(n_samples = 100, | |
n_features=1, | |
n_informative=1, | |
noise=20, | |
random_state=2017) | |
x = x.flatten() | |
slope, intercept, _,_,_ = stats.linregress(x,y) | |
best_fit = np.vectorize(lambda x: x * slope + intercept) | |
plt.plot(x,y, 'o', alpha=0.5) | |
grid = np.arange(-3,3,0.1) | |
plt.plot(grid,best_fit(grid), '.') | |
def gradient_descent(x, y, theta_init, step=0.001, maxsteps=0, precision=0.001, ): | |
costs = [] | |
m = y.size # number of data points | |
theta = theta_init | |
history = [] # to store all thetas | |
preds = [] | |
counter = 0 | |
oldcost = 0 | |
pred = np.dot(x, theta) | |
error = pred - y | |
currentcost = np.sum(error ** 2) / (2 * m) | |
preds.append(pred) | |
costs.append(currentcost) | |
history.append(theta) | |
counter+=1 | |
while abs(currentcost - oldcost) > precision: | |
oldcost=currentcost | |
gradient = x.T.dot(error)/m | |
theta = theta - step * gradient # update | |
history.append(theta) | |
pred = np.dot(x, theta) | |
error = pred - y | |
currentcost = np.sum(error ** 2) / (2 * m) | |
costs.append(currentcost) | |
if counter % 25 == 0: preds.append(pred) | |
counter+=1 | |
if maxsteps: | |
if counter == maxsteps: | |
break | |
return history, costs, preds, counter | |
xaug = np.c_[np.ones(x.shape[0]), x] | |
theta_i = [-15, 40] + np.random.rand(2) | |
history, cost, preds, iters = gradient_descent(xaug, y, theta_i, step=0.1) | |
theta = history[-1] | |
print("Gradient Descent: {:.2f}, {:.2f} {:d}".format(theta[0], theta[1], iters)) | |
print("Least Squares: {:.2f}, {:.2f}".format(intercept, slope)) | |
from mpl_toolkits.mplot3d import Axes3D | |
def error(X, Y, THETA): | |
return np.sum((X.dot(THETA) - Y)**2)/(2*Y.size) | |
ms = np.linspace(theta[0] - 20 , theta[0] + 20, 20) | |
bs = np.linspace(theta[1] - 40 , theta[1] + 40, 40) | |
M, B = np.meshgrid(ms, bs) | |
zs = np.array([error(xaug, y, theta) | |
for theta in zip(np.ravel(M), np.ravel(B))]) | |
Z = zs.reshape(M.shape) | |
fig = plt.figure(figsize=(20, 10)) | |
ax = fig.add_subplot(111, projection='3d') | |
ax.plot_surface(M, B, Z, rstride=1, cstride=1, color='b', alpha=0.2) | |
#ax.contour(M, B, Z, 20, color='b', alpha=0.5, offset=0, stride=30) | |
ax.set_xlabel('x1', labelpad=30, fontsize=24, fontweight='bold') | |
ax.set_ylabel('x2', labelpad=30, fontsize=24, fontweight='bold') | |
ax.set_zlabel('f(x1,x2)', labelpad=30, fontsize=24, fontweight='bold') | |
ax.view_init(elev=20., azim=30) | |
ax.plot([theta[0]], [theta[1]], [cost[-1]] , markerfacecolor='r', markeredgecolor='r', marker='o', markersize=7) | |
ax.plot([history[0][0]], [history[0][1]], [cost[0]] , markerfacecolor='r', markeredgecolor='r', marker='o', markersize=7) | |
ax.plot([t[0] for t in history], [t[1] for t in history], cost , markerfacecolor='r', markeredgecolor='r', marker='.', markersize=2) | |
ax.plot([t[0] for t in history], [t[1] for t in history], 0 , markerfacecolor='r', markeredgecolor='r', marker='.', markersize=2) | |
fig.suptitle("Minimizando f(x1,x2)", fontsize=24, fontweight='bold') | |
plt.savefig("Minimization_image.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm not sure i even understood the code but it is marvellous. it shows the little that i know about gradient descent