Skip to content

Instantly share code, notes, and snippets.

@alessiot
Last active February 23, 2022 20:07
Show Gist options
  • Save alessiot/aca064b64ff416f75a4d30e08b405c37 to your computer and use it in GitHub Desktop.
Save alessiot/aca064b64ff416f75a4d30e08b405c37 to your computer and use it in GitHub Desktop.
Visualizing Gradient Descend
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adapted from [here](https://github.com/SkalskiP/ILearnDeepLearning.py/blob/master/01_mysteries_of_neural_networks/04_optimizers/Comparison%20of%20optimizers.ipynb)\n",
"\n",
"The functions used are the [six-fump camel function](http://www.sfu.ca/~ssurjano/camel6.html)\n",
"\n",
"\\begin{equation}\n",
"z(x,y) = \\left(4 - 2.1 x^2 \\frac{x^4}{3}\\right)x^2 + xy + \\left(-4+4y^2\\right)y^2\n",
"\\end{equation}\n",
"\n",
"and [Goldstein-Price function](http://www.sfu.ca/~ssurjano/goldpr.html)\n",
"\\begin{equation}\n",
"z(x,y) = \\left[1 + (x + y + 1)^2\\cdot(19 - 14x + 3x^2 - 14y + 6xy + 3y^2)\\right]\\cdot\\left[30+(2x - 3y)^2\\cdot(18-32x+12x^2+48y-36xy+27y^2)\\right]\n",
"\\end{equation}"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"from math import sqrt, cos\n",
"from scipy.integrate import ode"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# starting point for gradient descent\n",
"INIT_PARAMS = [-1, -1]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#Goldstein-Price function\n",
"INIT_PARAMS = [-1.5, 1.9]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# output directory (the folder must be created on the drive)\n",
"OUTPUT_DIR = \"optimizers_comparison\""
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# SIX-HUMP CAMEL FUNCTION\n",
"def dzdt(t, z): \n",
" x = z[0]\n",
" y = z[1]\n",
" dzdx = 8*x - 8.1*x**3 + 2*x**5 + y\n",
" dzdy = x - 8*y + 16*y**3\n",
" \n",
" return [-dzdx,-dzdy]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#Goldstein-Price function\n",
"def dzdt(t, z): \n",
" x = z[0]\n",
" y = z[1]\n",
" df1dx = 2*(x+y+1)\n",
" df1dy = 2*(x+y+1)\n",
" df2dx = -14+6*x+6*y\n",
" df2dy = -14+6*x+6*y\n",
" dg1dx = 2*(2*x-3*y)*2\n",
" dg1dy = 2*(2*x-3*y)*(-3)\n",
" dg2dx = -32+24*x-36*y\n",
" dg2dy = 48-36*x+54*y\n",
" f1 = (x+y+1)**2\n",
" f2 = 19-14*x+3*x**2-14*y+6*x*y+3*y**2\n",
" g1 = (2*x-3*y)**2\n",
" g2 = 18-32*x+12*x**2+48*y-36*x*y+27*y**2\n",
" f = f1*f2 + 1\n",
" g = g1*g2 + 30\n",
" dzdx = (df1dx*f2 + df2dx*f1)*g + f*(dg1dx*g2+dg2dx*g1)\n",
" dzdy = (df1dy*f2 + df2dy*f1)*g + f*(dg1dy*g2+dg2dy*g1)\n",
" \n",
" return [-dzdx,-dzdy]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# SIX-HUMP CAMEL FUNCTION\n",
"tf_fun = lambda x, y: (4-2.1*x**2+(x**4)/3) * x**2 + x*y + (-4+4*y**2) * y**2\n",
"np_fun = lambda x, y: (4-2.1*x**2+(x**4)/3) * x**2 + x*y + (-4+4*y**2) * y**2"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#Goldstein-Price function\n",
"tf_fun = lambda x, y: (1+((x+y+1)**2)*(19-14*x+3*x**2-14*y+6*x*y+3*y**2))*(30+((2*x-3*y)**2)*(18-32*x+12*x**2+48*y-36*x*y+27*y**2))\n",
"np_fun = lambda x, y: (1+((x+y+1)**2)*(19-14*x+3*x**2-14*y+6*x*y+3*y**2))*(30+((2*x-3*y)**2)*(18-32*x+12*x**2+48*y-36*x*y+27*y**2))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def ode_optim(z_init):\n",
" t0 = 0.0\n",
" tfin = 0.125\n",
" solver = ode(dzdt)\n",
" solver.set_integrator(\"vode\", atol=1e-8, rtol=1e-6, method=\"bdf\", order=23) \n",
" solver.set_initial_value(z_init, t0) \n",
" j = 0\n",
" while solver.successful() and solver.t < tfin:\n",
" solver.integrate(tfin, step=False)\n",
" j += 1 \n",
"\n",
" return solver.y\n",
"\n",
"\n",
"def optimize(tf_function, init_point, iterations, optimizer):\n",
" \n",
" x_list, y_list, cost_list = [], [], []\n",
" \n",
" if optimizer == \"ode\":\n",
" z_init = INIT_PARAMS\n",
" x_list.append(z_init[0]); y_list.append(z_init[1]); cost_list.append(np_fun(z_init[0],z_init[1]))\n",
" for t in range(iterations):\n",
" z_sol = ode_optim(z_init)\n",
" x = z_sol[0]\n",
" y = z_sol[1]\n",
" z_init = z_sol\n",
" x_list.append(x); y_list.append(y); cost_list.append(np_fun(x,y))\n",
" else: \n",
" x, y = [tf.Variable(initial_value=p, dtype=tf.float32) for p in init_point]\n",
" function = tf_function(x, y)\n",
" train_op = optimizer.minimize(function)\n",
"\n",
" with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
" for t in range(iterations):\n",
" x_, y_, function_ = sess.run([x, y, function])\n",
" x_list.append(x_); y_list.append(y_); cost_list.append(function_)\n",
" result, _ = sess.run([function, train_op])\n",
" \n",
" return x_list, y_list, cost_list\n",
"\n",
"def create_blank_chart_with_styling(plot_size):\n",
" # my favorite styling kit\n",
" plt.style.use('dark_background')\n",
" # determining the size of the graph\n",
" fig = plt.figure(figsize=plot_size) \n",
" # 3D mode\n",
" ax = Axes3D(fig)\n",
" # transparent axis pane background \n",
" ax.xaxis.pane.fill = False\n",
" ax.yaxis.pane.fill = False\n",
" ax.zaxis.pane.fill = False\n",
" # setting chart axis names\n",
" ax.set(xlabel=\"$x$\", ylabel=\"$y$\")\n",
" return (fig, ax)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# SIX-HUMP CAMEL FUNCTION\n",
"ITERATIONS = 180\n",
"GRID_X_MIN = -2\n",
"GRID_X_MAX = 2\n",
"GRID_Y_MIN = -1\n",
"GRID_Y_MAX = 1\n",
"LR = 0.02"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"#Goldstein-Price function\n",
"ITERATIONS = 300\n",
"GRID_X_MIN = -2\n",
"GRID_X_MAX = 2\n",
"GRID_Y_MIN = -2\n",
"GRID_Y_MAX = 2\n",
"LR = 0.02"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# Definition of optimisers\n",
"optimizers = [\n",
" (\"ode\",\"ODE SOlver\"),\n",
" (tf.train.GradientDescentOptimizer(learning_rate=LR), \"Gradient Descent\"),\n",
" (tf.train.MomentumOptimizer(learning_rate=LR, momentum=0.95, use_nesterov=False), \"Momentum\"),\n",
" (tf.train.MomentumOptimizer(learning_rate=LR, momentum=0.95, use_nesterov=True), \"Nasterov\"),\n",
" (tf.train.RMSPropOptimizer(learning_rate=LR), \"RMSProp\"),\n",
" (tf.train.AdamOptimizer(learning_rate=LR), \"Adam\"),\n",
"]\n",
"\n",
"# Definition of colours for subsequent trajectories\n",
"paths_colors = [\n",
" \"#FFFFFF\",\n",
" \"#F2112D\",\n",
" \"#F06E1E\",\n",
" \"#EED82A\",\n",
" \"#A5EC37\",\n",
" \"#54EA43\"\n",
"]\n",
"\n",
"# Trajectories covered by optimizers\n",
"optimization_paths = [optimize(tf_fun, INIT_PARAMS, ITERATIONS, optimizer[0]) for optimizer in optimizers]\n",
"labels = [item[1] for item in optimizers]\n",
"\n",
"def create_animation(np_function, iterations, paths, colors, plot_name, file_name, dir_name):\n",
" for angle in range(iterations):\n",
" fix, ax = create_blank_chart_with_styling((10, 10))\n",
" \n",
" a3D, b3D = np.meshgrid(np.linspace(GRID_X_MIN, GRID_X_MAX, 50), np.linspace(GRID_Y_MIN, GRID_Y_MAX, 50))\n",
" cost3D = np.array([np_function(x_, y_) for x_, y_ in zip(a3D.flatten(), b3D.flatten())]).reshape(a3D.shape)\n",
" ax.plot_wireframe(a3D, b3D, cost3D, cmap=plt.get_cmap('rainbow'), alpha=0.2, zorder=-10)\n",
" \n",
" for path, color in zip(paths, colors):\n",
" ax.plot(path[0][:angle], path[1][:angle], zs=path[2][:angle], zdir='z', c=color, lw=3, zorder=1, alpha=1.0)\n",
" \n",
" if angle == 0:\n",
" ax.scatter(path[0][0], path[1][0], zs=path[2][0], s=100, c=color, zorder=10, edgecolors=\"k\")\n",
" else:\n",
" ax.scatter(path[0][angle-1], path[1][angle-1], zs=path[2][angle-1], s=100, c=color, zorder=10, edgecolors=\"k\")\n",
" \n",
" ax.legend(labels, loc='lower right', prop={'size': 10}, framealpha=0.0)\n",
" \n",
" ax.set_xlim(GRID_X_MIN, GRID_X_MAX)\n",
" ax.set_ylim(GRID_Y_MIN, GRID_Y_MAX)\n",
" ax.set_zlim(cost3D.min(), cost3D.max())\n",
" \n",
" # graph rotation\n",
" #ax.view_init(45, 180 + angle*2)\n",
" ax.view_init(25, 180 + angle*2)\n",
" # addition of a title\n",
" ax.set_title(plot_name, fontsize=20)\n",
" # saving a file\n",
" plt.savefig(\"./{}/{}_{:05}.png\".format(dir_name, file_name, angle))\n",
" plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"create_animation(np_fun, ITERATIONS, optimization_paths, paths_colors, \"\", \"test\", OUTPUT_DIR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#convert -delay 10 -loop 0 *.png animation.gif"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment