Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikanher/c349b802d904ffccb863d750eec8eea9 to your computer and use it in GitHub Desktop.
Save ikanher/c349b802d904ffccb863d750eec8eea9 to your computer and use it in GitHub Desktop.
Gradient descent bias convergence problem (convert celcius to f)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"from torch import nn, optim\n",
"dtype = torch.float\n",
"device = torch.device(\"cpu\")\n",
"#device = torch.device(\"cuda:0\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.4.0\n"
]
}
],
"source": [
"print(torch.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# convert from celcius to fahrenheit: T(°F) = T(°C) × 9/5 + 32\n",
"def c_to_f(celcius):\n",
" return celcius * 9/5 + 32"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#x = np.array([[-15], [-11], [-10], [-7], [-2], [0], [3], [17], [15], [25], [60], [84], [120]], dtype=np.float32)\n",
"x = np.array([ [c] for c in np.random.randint(-500, high=500, size=2000)], dtype=np.float32)\n",
"\n",
"# multiplying by some random * 50 just to add some noise\n",
"y = np.array([ c_to_f(c) + np.random.randn() * 50 for c in x], dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0,0.5,'y')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(x, y, s=0.1)\n",
"plt.xlabel(\"x\"); plt.ylabel(\"y\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"x_train = torch.from_numpy(x)\n",
"y_train = torch.from_numpy(y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"lr = 1e-6"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def lin(a, b, x):\n",
" return a * x + b"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def mse_loss(y_pred, y):\n",
" return (y_pred - y).pow(2).mean()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"a = torch.randn(1, device=device, dtype=dtype, requires_grad=True)\n",
"b = torch.randn(1, device=device, dtype=dtype, requires_grad=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 loss: tensor(1.00000e+05 *\n",
" 6.6192) a: tensor([-1.0054]) b: tensor([-0.5949])\n",
"100 loss: tensor(3574.2202) a: tensor([ 1.8045]) b: tensor([-0.5883])\n",
"200 loss: tensor(3573.7935) a: tensor([ 1.8045]) b: tensor([-0.5818])\n",
"300 loss: tensor(3573.3669) a: tensor([ 1.8045]) b: tensor([-0.5752])\n",
"400 loss: tensor(3572.9404) a: tensor([ 1.8045]) b: tensor([-0.5687])\n",
"500 loss: tensor(3572.5142) a: tensor([ 1.8045]) b: tensor([-0.5622])\n",
"600 loss: tensor(3572.0896) a: tensor([ 1.8045]) b: tensor([-0.5556])\n",
"700 loss: tensor(3571.6636) a: tensor([ 1.8045]) b: tensor([-0.5491])\n",
"800 loss: tensor(3571.2375) a: tensor([ 1.8045]) b: tensor([-0.5426])\n",
"900 loss: tensor(3570.8120) a: tensor([ 1.8045]) b: tensor([-0.5361])\n",
"1000 loss: tensor(3570.3865) a: tensor([ 1.8045]) b: tensor([-0.5295])\n",
"1100 loss: tensor(3569.9607) a: tensor([ 1.8045]) b: tensor([-0.5230])\n",
"1200 loss: tensor(3569.5356) a: tensor([ 1.8045]) b: tensor([-0.5165])\n",
"1300 loss: tensor(3569.1104) a: tensor([ 1.8045]) b: tensor([-0.5100])\n",
"1400 loss: tensor(3568.6865) a: tensor([ 1.8045]) b: tensor([-0.5035])\n",
"1500 loss: tensor(3568.2620) a: tensor([ 1.8045]) b: tensor([-0.4969])\n",
"1600 loss: tensor(3567.8374) a: tensor([ 1.8045]) b: tensor([-0.4904])\n",
"1700 loss: tensor(3567.4141) a: tensor([ 1.8045]) b: tensor([-0.4839])\n",
"1800 loss: tensor(3566.9895) a: tensor([ 1.8045]) b: tensor([-0.4774])\n",
"1900 loss: tensor(3566.5649) a: tensor([ 1.8045]) b: tensor([-0.4709])\n",
"2000 loss: tensor(3566.1423) a: tensor([ 1.8045]) b: tensor([-0.4644])\n",
"2100 loss: tensor(3565.7185) a: tensor([ 1.8045]) b: tensor([-0.4579])\n",
"2200 loss: tensor(3565.2944) a: tensor([ 1.8045]) b: tensor([-0.4514])\n",
"2300 loss: tensor(3564.8718) a: tensor([ 1.8045]) b: tensor([-0.4449])\n",
"2400 loss: tensor(3564.4482) a: tensor([ 1.8045]) b: tensor([-0.4384])\n",
"2500 loss: tensor(3564.0264) a: tensor([ 1.8045]) b: tensor([-0.4319])\n",
"2600 loss: tensor(3563.6028) a: tensor([ 1.8045]) b: tensor([-0.4254])\n",
"2700 loss: tensor(3563.1812) a: tensor([ 1.8045]) b: tensor([-0.4189])\n",
"2800 loss: tensor(3562.7581) a: tensor([ 1.8045]) b: tensor([-0.4124])\n",
"2900 loss: tensor(3562.3364) a: tensor([ 1.8045]) b: tensor([-0.4059])\n",
"3000 loss: tensor(3561.9133) a: tensor([ 1.8045]) b: tensor([-0.3994])\n",
"3100 loss: tensor(3561.4927) a: tensor([ 1.8045]) b: tensor([-0.3929])\n",
"3200 loss: tensor(3561.0698) a: tensor([ 1.8045]) b: tensor([-0.3864])\n",
"3300 loss: tensor(3560.6489) a: tensor([ 1.8045]) b: tensor([-0.3799])\n",
"3400 loss: tensor(3560.2280) a: tensor([ 1.8045]) b: tensor([-0.3734])\n",
"3500 loss: tensor(3559.8062) a: tensor([ 1.8045]) b: tensor([-0.3669])\n",
"3600 loss: tensor(3559.3853) a: tensor([ 1.8045]) b: tensor([-0.3604])\n",
"3700 loss: tensor(3558.9653) a: tensor([ 1.8045]) b: tensor([-0.3539])\n",
"3800 loss: tensor(3558.5447) a: tensor([ 1.8045]) b: tensor([-0.3474])\n",
"3900 loss: tensor(3558.1233) a: tensor([ 1.8045]) b: tensor([-0.3410])\n",
"4000 loss: tensor(3557.7031) a: tensor([ 1.8045]) b: tensor([-0.3345])\n",
"4100 loss: tensor(3557.2834) a: tensor([ 1.8045]) b: tensor([-0.3280])\n",
"4200 loss: tensor(3556.8635) a: tensor([ 1.8045]) b: tensor([-0.3215])\n",
"4300 loss: tensor(3556.4441) a: tensor([ 1.8045]) b: tensor([-0.3150])\n",
"4400 loss: tensor(3556.0232) a: tensor([ 1.8044]) b: tensor([-0.3086])\n",
"4500 loss: tensor(3555.6040) a: tensor([ 1.8044]) b: tensor([-0.3021])\n",
"4600 loss: tensor(3555.1843) a: tensor([ 1.8044]) b: tensor([-0.2956])\n",
"4700 loss: tensor(3554.7654) a: tensor([ 1.8044]) b: tensor([-0.2891])\n",
"4800 loss: tensor(3554.3467) a: tensor([ 1.8044]) b: tensor([-0.2827])\n",
"4900 loss: tensor(3553.9277) a: tensor([ 1.8044]) b: tensor([-0.2762])\n"
]
}
],
"source": [
"n_epochs = 5000\n",
"for epoch in range(n_epochs):\n",
" \n",
" # compute loss\n",
" loss = mse_loss(lin(a, b, x_train), y_train)\n",
"\n",
" # backpropagate a and b\n",
" loss.backward()\n",
"\n",
" if epoch % 100 == 0:\n",
" print(epoch, \"loss: \", loss, \"a: \" , a, \"b: \", b)\n",
" \n",
" with torch.no_grad():\n",
" a -= lr * a.grad\n",
" b -= lr * b.grad\n",
" #print(\"Reducing bias with respect to bias gradient: \", b.grad, \" new bias: \", b)\n",
" \n",
" # zero gradient\n",
" a.grad.zero_()\n",
" b.grad.zero_()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"from torch import nn, optim\n",
"dtype = torch.float\n",
"device = torch.device(\"cpu\")\n",
"#device = torch.device(\"cuda:0\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.4.0\n"
]
}
],
"source": [
"print(torch.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# convert from celcius to fahrenheit: T(°F) = T(°C) × 9/5 + 32\n",
"def c_to_f(celcius):\n",
" return celcius * 9/5 + 32"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#x = np.array([[-15], [-11], [-10], [-7], [-2], [0], [3], [17], [15], [25], [60], [84], [120]], dtype=np.float32)\n",
"x = np.array([ [c] for c in np.random.randint(-500, high=500, size=2000)], dtype=np.float32)\n",
"\n",
"# multiplying by some random * 50 just to add some noise\n",
"y = np.array([ c_to_f(c) + np.random.randn() * 50 for c in x], dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0,0.5,'y')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(x, y, s=0.1)\n",
"plt.xlabel(\"x\"); plt.ylabel(\"y\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"x_train = torch.from_numpy(x)\n",
"y_train = torch.from_numpy(y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"lr = 1e-6"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def lin(a, b, x):\n",
" return a * x + b"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def mse_loss(y_pred, y):\n",
" return (y_pred - y).pow(2).mean()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"a = torch.randn(1, device=device, dtype=dtype, requires_grad=True)\n",
"b = torch.randn(1, device=device, dtype=dtype, requires_grad=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 loss: tensor(1.00000e+05 *\n",
" 6.6192) a: tensor([-1.0054]) b: tensor([-0.5949])\n",
"100 loss: tensor(3574.2202) a: tensor([ 1.8045]) b: tensor([-0.5883])\n",
"200 loss: tensor(3573.7935) a: tensor([ 1.8045]) b: tensor([-0.5818])\n",
"300 loss: tensor(3573.3669) a: tensor([ 1.8045]) b: tensor([-0.5752])\n",
"400 loss: tensor(3572.9404) a: tensor([ 1.8045]) b: tensor([-0.5687])\n",
"500 loss: tensor(3572.5142) a: tensor([ 1.8045]) b: tensor([-0.5622])\n",
"600 loss: tensor(3572.0896) a: tensor([ 1.8045]) b: tensor([-0.5556])\n",
"700 loss: tensor(3571.6636) a: tensor([ 1.8045]) b: tensor([-0.5491])\n",
"800 loss: tensor(3571.2375) a: tensor([ 1.8045]) b: tensor([-0.5426])\n",
"900 loss: tensor(3570.8120) a: tensor([ 1.8045]) b: tensor([-0.5361])\n",
"1000 loss: tensor(3570.3865) a: tensor([ 1.8045]) b: tensor([-0.5295])\n",
"1100 loss: tensor(3569.9607) a: tensor([ 1.8045]) b: tensor([-0.5230])\n",
"1200 loss: tensor(3569.5356) a: tensor([ 1.8045]) b: tensor([-0.5165])\n",
"1300 loss: tensor(3569.1104) a: tensor([ 1.8045]) b: tensor([-0.5100])\n",
"1400 loss: tensor(3568.6865) a: tensor([ 1.8045]) b: tensor([-0.5035])\n",
"1500 loss: tensor(3568.2620) a: tensor([ 1.8045]) b: tensor([-0.4969])\n",
"1600 loss: tensor(3567.8374) a: tensor([ 1.8045]) b: tensor([-0.4904])\n",
"1700 loss: tensor(3567.4141) a: tensor([ 1.8045]) b: tensor([-0.4839])\n",
"1800 loss: tensor(3566.9895) a: tensor([ 1.8045]) b: tensor([-0.4774])\n",
"1900 loss: tensor(3566.5649) a: tensor([ 1.8045]) b: tensor([-0.4709])\n",
"2000 loss: tensor(3566.1423) a: tensor([ 1.8045]) b: tensor([-0.4644])\n",
"2100 loss: tensor(3565.7185) a: tensor([ 1.8045]) b: tensor([-0.4579])\n",
"2200 loss: tensor(3565.2944) a: tensor([ 1.8045]) b: tensor([-0.4514])\n",
"2300 loss: tensor(3564.8718) a: tensor([ 1.8045]) b: tensor([-0.4449])\n",
"2400 loss: tensor(3564.4482) a: tensor([ 1.8045]) b: tensor([-0.4384])\n",
"2500 loss: tensor(3564.0264) a: tensor([ 1.8045]) b: tensor([-0.4319])\n",
"2600 loss: tensor(3563.6028) a: tensor([ 1.8045]) b: tensor([-0.4254])\n",
"2700 loss: tensor(3563.1812) a: tensor([ 1.8045]) b: tensor([-0.4189])\n",
"2800 loss: tensor(3562.7581) a: tensor([ 1.8045]) b: tensor([-0.4124])\n",
"2900 loss: tensor(3562.3364) a: tensor([ 1.8045]) b: tensor([-0.4059])\n",
"3000 loss: tensor(3561.9133) a: tensor([ 1.8045]) b: tensor([-0.3994])\n",
"3100 loss: tensor(3561.4927) a: tensor([ 1.8045]) b: tensor([-0.3929])\n",
"3200 loss: tensor(3561.0698) a: tensor([ 1.8045]) b: tensor([-0.3864])\n",
"3300 loss: tensor(3560.6489) a: tensor([ 1.8045]) b: tensor([-0.3799])\n",
"3400 loss: tensor(3560.2280) a: tensor([ 1.8045]) b: tensor([-0.3734])\n",
"3500 loss: tensor(3559.8062) a: tensor([ 1.8045]) b: tensor([-0.3669])\n",
"3600 loss: tensor(3559.3853) a: tensor([ 1.8045]) b: tensor([-0.3604])\n",
"3700 loss: tensor(3558.9653) a: tensor([ 1.8045]) b: tensor([-0.3539])\n",
"3800 loss: tensor(3558.5447) a: tensor([ 1.8045]) b: tensor([-0.3474])\n",
"3900 loss: tensor(3558.1233) a: tensor([ 1.8045]) b: tensor([-0.3410])\n",
"4000 loss: tensor(3557.7031) a: tensor([ 1.8045]) b: tensor([-0.3345])\n",
"4100 loss: tensor(3557.2834) a: tensor([ 1.8045]) b: tensor([-0.3280])\n",
"4200 loss: tensor(3556.8635) a: tensor([ 1.8045]) b: tensor([-0.3215])\n",
"4300 loss: tensor(3556.4441) a: tensor([ 1.8045]) b: tensor([-0.3150])\n",
"4400 loss: tensor(3556.0232) a: tensor([ 1.8044]) b: tensor([-0.3086])\n",
"4500 loss: tensor(3555.6040) a: tensor([ 1.8044]) b: tensor([-0.3021])\n",
"4600 loss: tensor(3555.1843) a: tensor([ 1.8044]) b: tensor([-0.2956])\n",
"4700 loss: tensor(3554.7654) a: tensor([ 1.8044]) b: tensor([-0.2891])\n",
"4800 loss: tensor(3554.3467) a: tensor([ 1.8044]) b: tensor([-0.2827])\n",
"4900 loss: tensor(3553.9277) a: tensor([ 1.8044]) b: tensor([-0.2762])\n"
]
}
],
"source": [
"n_epochs = 5000\n",
"for epoch in range(n_epochs):\n",
" \n",
" # compute loss\n",
" loss = mse_loss(lin(a, b, x_train), y_train)\n",
"\n",
" # backpropagate a and b\n",
" loss.backward()\n",
"\n",
" if epoch % 100 == 0:\n",
" print(epoch, \"loss: \", loss, \"a: \" , a, \"b: \", b)\n",
" \n",
" with torch.no_grad():\n",
" a -= lr * a.grad\n",
" b -= lr * b.grad\n",
" #print(\"Reducing bias with respect to bias gradient: \", b.grad, \" new bias: \", b)\n",
" \n",
" # zero gradient\n",
" a.grad.zero_()\n",
" b.grad.zero_()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment