Skip to content

Instantly share code, notes, and snippets.

@raytroop
Last active December 12, 2018 03:13
Show Gist options
  • Save raytroop/d4fc09668694abae75173b8bc3b3f0e0 to your computer and use it in GitHub Desktop.
Save raytroop/d4fc09668694abae75173b8bc3b3f0e0 to your computer and use it in GitHub Desktop.
implement Adam optimzer of TensorFlow
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## AdamOptimizer\n",
"- In this toy project, we implement AdamOptimizer step by step"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"tf.set_random_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"# for auto-reloading extenrnal modules\n",
"# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. random data for testing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"x = np.random.randn(20)\n",
"y = 2 * x + 3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. tf.train.AdamOptimizer"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Variable 'w:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b:0' shape=() dtype=float32_ref>\n",
"\n",
"compute_gradients:\n",
"<tf.Variable 'w:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b:0' shape=() dtype=float32_ref>\n",
"\n",
"apply_gradients:\n",
"<tf.Variable 'w:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'beta1_power:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'beta2_power:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'w/Adam:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'w/Adam_1:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b/Adam:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b/Adam_1:0' shape=() dtype=float32_ref>\n"
]
}
],
"source": [
"tf.reset_default_graph()\n",
"LR = 1\n",
"BETA1=0.5\n",
"BETA2=0.5 \n",
"w = tf.get_variable(name='w', initializer=0.1)\n",
"b = tf.get_variable(name='b', initializer=0.0)\n",
"y_pred = x * w + b\n",
"loss = tf.reduce_sum(tf.pow(y_pred - y, 2))\n",
"opt = tf.train.AdamOptimizer(learning_rate=LR, beta1=BETA1, beta2=BETA2)\n",
"for var in tf.global_variables():\n",
" print(var)\n",
"\n",
"print('\\ncompute_gradients:')\n",
"grads_and_vars = opt.compute_gradients(loss, var_list=[w, b])\n",
"for var in tf.global_variables():\n",
" print(var)\n",
"\n",
"print('\\napply_gradients:')\n",
"opt_op = opt.apply_gradients(grads_and_vars)\n",
"\n",
"for var in tf.global_variables():\n",
" print(var)\n",
" \n",
"# same with \n",
"# wm = [var for var in tf.global_variables() if var.op.name == 'w/Adam']\n",
"wm = [var for var in tf.global_variables() if var.name == 'w/Adam:0'][0]\n",
"wv = [var for var in tf.global_variables() if var.name == 'w/Adam_1:0'][0]\n",
"bm = [var for var in tf.global_variables() if var.name == 'b/Adam:0'][0]\n",
"bv = [var for var in tf.global_variables() if var.name == 'b/Adam_1:0'][0]\n",
"\n",
"beta1 = [var for var in tf.global_variables() if var.name == 'beta1_power:0'][0]\n",
"beta2 = [var for var in tf.global_variables() if var.name == 'beta2_power:0'][0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```python\n",
"with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
" for i in range(1000):\n",
" loss_, _ = sess.run([loss, opt_op])\n",
" if i % 100 == 0:\n",
" print('batch i')\n",
" print('loss {}'.format(loss_))\n",
" print(w.eval(), b.eval())\n",
"``` "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"batch 0\n",
"loss 206.27879333496094\n",
"0.25 0.25\n",
"batch 1\n",
"loss 82.32612609863281\n",
"0.125 0.125\n",
"batch 2\n",
"loss 20.670351028442383\n",
"0.0625 0.0625\n",
"batch 3\n",
"loss 3.098949909210205\n",
"0.03125 0.03125\n",
"batch 4\n",
"loss 7.379493713378906\n",
"0.015625 0.015625\n"
]
}
],
"source": [
"with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
" for i in range(5):\n",
" loss_, _ = sess.run([loss, opt_op])\n",
" \n",
" print('batch {}'.format(i))\n",
" print('loss {}'.format(loss_))\n",
" print(beta1.eval(), beta2.eval())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. our naive AdamOptimizer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's implement `AdamOptimizer` follow TensorFlow API\n",
"<br>\n",
"- Initialization:\n",
"$$\n",
"m_0 = 0\n",
"$$\n",
"$$\n",
"v_0 = 0\n",
"$$\n",
"$$\n",
"t = 0\n",
"$$\n",
"- Update rule:\n",
"$$\n",
"lr_t = lr * \\sqrt{1 - \\beta_2^t} / (1 - \\beta_1^t)\n",
"$$\n",
"$$\n",
"m_t = \\beta_1 * m_{t-1} + (1 - \\beta_1) * g\n",
"$$\n",
"$$\n",
"v_t = \\beta_2 * v_{t-1} + (1 - \\beta_2) * g * g\n",
"$$\n",
"$$\n",
"variable = variable - lr_t * m_t / (\\sqrt{v_t} + \\epsilon)\n",
"$$"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"class naive_adam:\n",
" def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, name='Adam'):\n",
" self.lr = tf.convert_to_tensor(learning_rate, dtype=tf.float32)\n",
" self.beta1 = tf.convert_to_tensor(beta1, dtype=tf.float32)\n",
" self.beta2 = tf.convert_to_tensor(beta2, dtype=tf.float32)\n",
" self.epsilon = tf.convert_to_tensor(epsilon, dtype=tf.float32)\n",
" self.name = name\n",
"\n",
" def compute_gradients(self, loss, var_list=None):\n",
" return list(zip(tf.gradients(loss, var_list), var_list))\n",
"\n",
" def apply_gradients(self, grads_and_vars, global_step=None):\n",
" beta1 = tf.get_variable('beta1_power', initializer=1.0, trainable=False)\n",
" beta2 = tf.get_variable('beta2_power', initializer=1.0, trainable=False)\n",
" varm = []\n",
" varv = []\n",
" for _, var in grads_and_vars:\n",
" varm.append(tf.get_variable(name=var.op.name +'/' + self.name, \n",
" initializer=0.0, trainable=False))\n",
" varv.append(tf.get_variable(name=var.op.name +'/' + self.name + '_1', \n",
" initializer=0.0, trainable=False))\n",
" update_beta1 = tf.assign(beta1, beta1 * self.beta1)\n",
" update_beta2 = tf.assign(beta2, beta2 * self.beta2)\n",
"\n",
" lr = self.lr * tf.sqrt((tf.constant(1.0) - update_beta2)) / (tf.constant(1.0) - update_beta1)\n",
" var_step = []\n",
" for mt, vt, (grad, var) in zip(varm, varv, grads_and_vars):\n",
" update_mt = mt.assign(self.beta1 * mt + (tf.constant(1.0) - self.beta1) * grad)\n",
" update_vt = vt.assign(self.beta2 * vt + (tf.constant(1.0) - self.beta2) * grad * grad)\n",
" var_step.append(var.assign_sub(lr * update_mt / (tf.sqrt(update_vt) + self.epsilon)))\n",
" update_vars = tf.group(*var_step)\n",
"\n",
" if global_step is not None:\n",
" update_globalstep = tf.assign_add(global_step, 1)\n",
" with tf.control_dependencies([update_vars, update_globalstep]):\n",
" apply_op = tf.no_op()\n",
" else:\n",
" with tf.control_dependencies([update_vars]):\n",
" apply_op = tf.no_op()\n",
" return apply_op"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tf.Variable 'w:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b:0' shape=() dtype=float32_ref>\n",
"\n",
"compute_gradients:\n",
"<tf.Variable 'w:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b:0' shape=() dtype=float32_ref>\n",
"\n",
"apply_gradients:\n",
"<tf.Variable 'w:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'beta1_power:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'beta2_power:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'w/Adam:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'w/Adam_1:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b/Adam:0' shape=() dtype=float32_ref>\n",
"<tf.Variable 'b/Adam_1:0' shape=() dtype=float32_ref>\n"
]
}
],
"source": [
"tf.reset_default_graph()\n",
"w = tf.get_variable(name='w', initializer=0.1)\n",
"b = tf.get_variable(name='b', initializer=0.0)\n",
"y_pred = x * w + b\n",
"loss = tf.reduce_sum(tf.pow(y_pred - y, 2))\n",
"opt = naive_adam(learning_rate=LR, beta1=BETA1, beta2=BETA2)\n",
"for var in tf.global_variables():\n",
" print(var)\n",
"\n",
"print('\\ncompute_gradients:')\n",
"grads_and_vars = opt.compute_gradients(loss, var_list=[w, b])\n",
"for var in tf.global_variables():\n",
" print(var)\n",
"\n",
"print('\\napply_gradients:')\n",
"opt_op = opt.apply_gradients(grads_and_vars)\n",
"\n",
"for var in tf.global_variables():\n",
" print(var)\n",
" \n",
"# same with \n",
"# wm = [var for var in tf.global_variables() if var.op.name == 'w/Adam']\n",
"wm = [var for var in tf.global_variables() if var.name == 'w/Adam:0'][0]\n",
"wv = [var for var in tf.global_variables() if var.name == 'w/Adam_1:0'][0]\n",
"bm = [var for var in tf.global_variables() if var.name == 'b/Adam:0'][0]\n",
"bv = [var for var in tf.global_variables() if var.name == 'b/Adam_1:0'][0]\n",
"\n",
"beta1 = [var for var in tf.global_variables() if var.name == 'beta1_power:0'][0]\n",
"beta2 = [var for var in tf.global_variables() if var.name == 'beta2_power:0'][0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"batch 0\n",
"loss 206.27879333496094\n",
"0.5 0.5\n",
"batch 1\n",
"loss 82.32612609863281\n",
"0.25 0.25\n",
"batch 2\n",
"loss 20.67035675048828\n",
"0.125 0.125\n",
"batch 3\n",
"loss 3.098945379257202\n",
"0.0625 0.0625\n",
"batch 4\n",
"loss 7.379493713378906\n",
"0.03125 0.03125\n"
]
}
],
"source": [
"with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
" for i in range(5):\n",
" loss_, _ = sess.run([loss, opt_op])\n",
" \n",
" print('batch {}'.format(i))\n",
" print('loss {}'.format(loss_))\n",
" print(beta1.eval(), beta2.eval())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment