Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Gradient descent bias convergence problem (convert celcius to f)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"from torch import nn, optim\n",
"dtype = torch.float\n",
"device = torch.device(\"cpu\")\n",
"#device = torch.device(\"cuda:0\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.4.0\n"
]
}
],
"source": [
"print(torch.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# convert from celcius to fahrenheit: T(°F) = T(°C) × 9/5 + 32\n",
"def c_to_f(celcius):\n",
" return celcius * 9/5 + 32"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#x = np.array([[-15], [-11], [-10], [-7], [-2], [0], [3], [17], [15], [25], [60], [84], [120]], dtype=np.float32)\n",
"x = np.array([ [c] for c in np.random.randint(-500, high=500, size=2000)], dtype=np.float32)\n",
"\n",
"# multiplying by some random * 50 just to add some noise\n",
"y = np.array([ c_to_f(c) + np.random.randn() * 50 for c in x], dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0,0.5,'y')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(x, y, s=0.1)\n",
"plt.xlabel(\"x\"); plt.ylabel(\"y\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"x_train = torch.from_numpy(x)\n",
"y_train = torch.from_numpy(y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"lr = 1e-6"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def lin(a, b, x):\n",
" return a * x + b"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def mse_loss(y_pred, y):\n",
" return (y_pred - y).pow(2).mean()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"a = torch.randn(1, device=device, dtype=dtype, requires_grad=True)\n",
"b = torch.randn(1, device=device, dtype=dtype, requires_grad=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 loss: tensor(1.00000e+05 *\n",
" 6.6192) a: tensor([-1.0054]) b: tensor([-0.5949])\n",
"100 loss: tensor(3574.2202) a: tensor([ 1.8045]) b: tensor([-0.5883])\n",
"200 loss: tensor(3573.7935) a: tensor([ 1.8045]) b: tensor([-0.5818])\n",
"300 loss: tensor(3573.3669) a: tensor([ 1.8045]) b: tensor([-0.5752])\n",
"400 loss: tensor(3572.9404) a: tensor([ 1.8045]) b: tensor([-0.5687])\n",
"500 loss: tensor(3572.5142) a: tensor([ 1.8045]) b: tensor([-0.5622])\n",
"600 loss: tensor(3572.0896) a: tensor([ 1.8045]) b: tensor([-0.5556])\n",
"700 loss: tensor(3571.6636) a: tensor([ 1.8045]) b: tensor([-0.5491])\n",
"800 loss: tensor(3571.2375) a: tensor([ 1.8045]) b: tensor([-0.5426])\n",
"900 loss: tensor(3570.8120) a: tensor([ 1.8045]) b: tensor([-0.5361])\n",
"1000 loss: tensor(3570.3865) a: tensor([ 1.8045]) b: tensor([-0.5295])\n",
"1100 loss: tensor(3569.9607) a: tensor([ 1.8045]) b: tensor([-0.5230])\n",
"1200 loss: tensor(3569.5356) a: tensor([ 1.8045]) b: tensor([-0.5165])\n",
"1300 loss: tensor(3569.1104) a: tensor([ 1.8045]) b: tensor([-0.5100])\n",
"1400 loss: tensor(3568.6865) a: tensor([ 1.8045]) b: tensor([-0.5035])\n",
"1500 loss: tensor(3568.2620) a: tensor([ 1.8045]) b: tensor([-0.4969])\n",
"1600 loss: tensor(3567.8374) a: tensor([ 1.8045]) b: tensor([-0.4904])\n",
"1700 loss: tensor(3567.4141) a: tensor([ 1.8045]) b: tensor([-0.4839])\n",
"1800 loss: tensor(3566.9895) a: tensor([ 1.8045]) b: tensor([-0.4774])\n",
"1900 loss: tensor(3566.5649) a: tensor([ 1.8045]) b: tensor([-0.4709])\n",
"2000 loss: tensor(3566.1423) a: tensor([ 1.8045]) b: tensor([-0.4644])\n",
"2100 loss: tensor(3565.7185) a: tensor([ 1.8045]) b: tensor([-0.4579])\n",
"2200 loss: tensor(3565.2944) a: tensor([ 1.8045]) b: tensor([-0.4514])\n",
"2300 loss: tensor(3564.8718) a: tensor([ 1.8045]) b: tensor([-0.4449])\n",
"2400 loss: tensor(3564.4482) a: tensor([ 1.8045]) b: tensor([-0.4384])\n",
"2500 loss: tensor(3564.0264) a: tensor([ 1.8045]) b: tensor([-0.4319])\n",
"2600 loss: tensor(3563.6028) a: tensor([ 1.8045]) b: tensor([-0.4254])\n",
"2700 loss: tensor(3563.1812) a: tensor([ 1.8045]) b: tensor([-0.4189])\n",
"2800 loss: tensor(3562.7581) a: tensor([ 1.8045]) b: tensor([-0.4124])\n",
"2900 loss: tensor(3562.3364) a: tensor([ 1.8045]) b: tensor([-0.4059])\n",
"3000 loss: tensor(3561.9133) a: tensor([ 1.8045]) b: tensor([-0.3994])\n",
"3100 loss: tensor(3561.4927) a: tensor([ 1.8045]) b: tensor([-0.3929])\n",
"3200 loss: tensor(3561.0698) a: tensor([ 1.8045]) b: tensor([-0.3864])\n",
"3300 loss: tensor(3560.6489) a: tensor([ 1.8045]) b: tensor([-0.3799])\n",
"3400 loss: tensor(3560.2280) a: tensor([ 1.8045]) b: tensor([-0.3734])\n",
"3500 loss: tensor(3559.8062) a: tensor([ 1.8045]) b: tensor([-0.3669])\n",
"3600 loss: tensor(3559.3853) a: tensor([ 1.8045]) b: tensor([-0.3604])\n",
"3700 loss: tensor(3558.9653) a: tensor([ 1.8045]) b: tensor([-0.3539])\n",
"3800 loss: tensor(3558.5447) a: tensor([ 1.8045]) b: tensor([-0.3474])\n",
"3900 loss: tensor(3558.1233) a: tensor([ 1.8045]) b: tensor([-0.3410])\n",
"4000 loss: tensor(3557.7031) a: tensor([ 1.8045]) b: tensor([-0.3345])\n",
"4100 loss: tensor(3557.2834) a: tensor([ 1.8045]) b: tensor([-0.3280])\n",
"4200 loss: tensor(3556.8635) a: tensor([ 1.8045]) b: tensor([-0.3215])\n",
"4300 loss: tensor(3556.4441) a: tensor([ 1.8045]) b: tensor([-0.3150])\n",
"4400 loss: tensor(3556.0232) a: tensor([ 1.8044]) b: tensor([-0.3086])\n",
"4500 loss: tensor(3555.6040) a: tensor([ 1.8044]) b: tensor([-0.3021])\n",
"4600 loss: tensor(3555.1843) a: tensor([ 1.8044]) b: tensor([-0.2956])\n",
"4700 loss: tensor(3554.7654) a: tensor([ 1.8044]) b: tensor([-0.2891])\n",
"4800 loss: tensor(3554.3467) a: tensor([ 1.8044]) b: tensor([-0.2827])\n",
"4900 loss: tensor(3553.9277) a: tensor([ 1.8044]) b: tensor([-0.2762])\n"
]
}
],
"source": [
"n_epochs = 5000\n",
"for epoch in range(n_epochs):\n",
" \n",
" # compute loss\n",
" loss = mse_loss(lin(a, b, x_train), y_train)\n",
"\n",
" # backpropagate a and b\n",
" loss.backward()\n",
"\n",
" if epoch % 100 == 0:\n",
" print(epoch, \"loss: \", loss, \"a: \" , a, \"b: \", b)\n",
" \n",
" with torch.no_grad():\n",
" a -= lr * a.grad\n",
" b -= lr * b.grad\n",
" #print(\"Reducing bias with respect to bias gradient: \", b.grad, \" new bias: \", b)\n",
" \n",
" # zero gradient\n",
" a.grad.zero_()\n",
" b.grad.zero_()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment