Skip to content

Instantly share code, notes, and snippets.

@dejanbatanjac
Last active August 13, 2019 12:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dejanbatanjac/86d02413118cd7fb10b479c1d4c8f318 to your computer and use it in GitHub Desktop.
Save dejanbatanjac/86d02413118cd7fb10b479c1d4c8f318 to your computer and use it in GitHub Desktop.
Grad catch.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%matplotlib inline\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.utils.data import DataLoader, Dataset, TensorDataset\nfrom torch.optim import *\nimport torchvision\n\n",
"execution_count": 1,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Detecting the device"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# detecting the device\ndev = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\nprint(dev)",
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": "cuda\n",
"name": "stdout"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Loading of the dataset"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "dl = DataLoader( torchvision.datasets.MNIST('/data/mnist', download=True))\n\ntensor = dl.dataset.data\ntensor = tensor.to(dtype=torch.float32)\n\ntr = tensor.reshape(tensor.size(0), -1) \ntargets = dl.dataset.targets\ntargets = targets.to(dtype=torch.long)\n\nx_train = tr[0:50000]\ny_train = targets[0:50000]\nx_valid = tr[50000:60000]\ny_valid = targets[50000:60000]\n\n\ndef normalize(x, m, s): return (x-m)/(s+1e-5)\ndef normalize_to(train, valid):\n m,s = train.mean(),train.std()\n return normalize(train, m, s), normalize(valid, m, s)\n\nx_train,x_valid=normalize_to(x_train, x_valid)\nprint(x_train.std(), x_train.mean())\n\n",
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": "tensor(1.0000) tensor(-7.7135e-06)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"code_folding": [
0,
20,
26
],
"trusted": true
},
"cell_type": "code",
"source": "def train_epoch(m, epochs=1, lr=0.4, bs=128, droplast=True, dev=\"cuda\"):\n '''model, num of epo, lr, bs, droplast'''\n \n bs=bs\n\n train_ds = TensorDataset(x_train, y_train)\n train_dl = DataLoader(train_ds, batch_size=bs, drop_last=droplast, shuffle=True)\n\n valid_ds = TensorDataset(x_valid, y_valid)\n valid_dl = DataLoader(valid_ds, batch_size=bs, drop_last=droplast, shuffle=True)\n\n loaders={}\n loaders['train'] = train_dl\n loaders['valid'] = valid_dl\n \n model = m \n model.to(dev)\n \n criterion = nn.CrossEntropyLoss()\n epochs = epochs \n optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n\n for epoch in range(0,epochs): \n\n # TRAINING PHASE\n model.train() \n for i, (data,target) in enumerate(loaders['train']): \n\n optimizer.zero_grad()\n\n # we need to have the dev\n data = data.to(dev) \n target = target.to(dev)\n\n output = model(data)\n loss = criterion(output, target) # one batch loss \n loss.backward()\n \n if(model.l1.passedbatches) <= 3: \n print(\"batch: \", model.l1.passedbatches)\n print(\"WEIGHT:Train:Loop:\",model.l1.weight)\n print(\"GRAD:Train:Loop:\",model.l1.weight.grad) \n \n optimizer.step() \n ",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "class M2(nn.Module):\n def __init__(self):\n super().__init__() \n self.l1 = nn.Linear(28*28, 10)\n \n def forward(self, x):\n x = x.reshape(-1, 28*28) \n x = F.relu(self.l1(x)) \n return x \n",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"scrolled": false,
"trusted": true
},
"cell_type": "code",
"source": "torch.cuda.empty_cache()\nseed_value=0 \ntorch.manual_seed(seed_value)\n\nmodel = M2() \nhooks={}\n\n# forward hooks\ndef forward_pass(module, inp, outp):\n \n module.passedbatches +=1 \n \n if(module.passedbatches) <= 3: \n print(\"batch: \", module.passedbatches)\n print(\"WEIGHT:Forward:\",module.weight)\n print(\"GRAD:Forward:\",module.weight.grad)\n\n\ndef backward_pass(module, ginp, goutp):\n \n if(module.passedbatches) <= 3: \n print(\"batch: \", module.passedbatches)\n print(\"WEIGHT:Backward:\",module.weight)\n print(\"GRAD:Backward:\",module.weight.grad)\n \n\nfor name,o in model.named_children():\n o.passedbatches=0\n o.name = name\n hooks[name]=o.register_forward_hook(forward_pass)\n hooks[name]=o.register_backward_hook(backward_pass)\n\ntrain_epoch(model, epochs=1, lr=0.05, bs=256, dev=\"cpu\") \n\nfor k,v in hooks.items():\n v.remove()\n",
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": "batch: 1\nWEIGHT:Forward: Parameter containing:\ntensor([[-0.0003, 0.0192, -0.0294, ..., 0.0219, 0.0037, 0.0021],\n [-0.0198, -0.0150, -0.0104, ..., -0.0203, -0.0060, -0.0299],\n [-0.0201, 0.0149, -0.0333, ..., -0.0203, 0.0012, 0.0080],\n ...,\n [ 0.0237, 0.0103, -0.0219, ..., 0.0088, -0.0009, 0.0009],\n [ 0.0144, -0.0336, -0.0346, ..., -0.0222, -0.0025, -0.0138],\n [-0.0196, -0.0118, 0.0230, ..., -0.0202, 0.0172, 0.0355]],\n requires_grad=True)\nGRAD:Forward: None\nbatch: 1\nWEIGHT:Backward: Parameter containing:\ntensor([[-0.0003, 0.0192, -0.0294, ..., 0.0219, 0.0037, 0.0021],\n [-0.0198, -0.0150, -0.0104, ..., -0.0203, -0.0060, -0.0299],\n [-0.0201, 0.0149, -0.0333, ..., -0.0203, 0.0012, 0.0080],\n ...,\n [ 0.0237, 0.0103, -0.0219, ..., 0.0088, -0.0009, 0.0009],\n [ 0.0144, -0.0336, -0.0346, ..., -0.0222, -0.0025, -0.0138],\n [-0.0196, -0.0118, 0.0230, ..., -0.0202, 0.0172, 0.0355]],\n requires_grad=True)\nGRAD:Backward: None\nbatch: 1\nWEIGHT:Train:Loop: Parameter containing:\ntensor([[-0.0003, 0.0192, -0.0294, ..., 0.0219, 0.0037, 0.0021],\n [-0.0198, -0.0150, -0.0104, ..., -0.0203, -0.0060, -0.0299],\n [-0.0201, 0.0149, -0.0333, ..., -0.0203, 0.0012, 0.0080],\n ...,\n [ 0.0237, 0.0103, -0.0219, ..., 0.0088, -0.0009, 0.0009],\n [ 0.0144, -0.0336, -0.0346, ..., -0.0222, -0.0025, -0.0138],\n [-0.0196, -0.0118, 0.0230, ..., -0.0202, 0.0172, 0.0355]],\n requires_grad=True)\nGRAD:Train:Loop: tensor([[-0.0198, -0.0198, -0.0198, ..., -0.0198, -0.0198, -0.0198],\n [-0.0235, -0.0235, -0.0235, ..., -0.0235, -0.0235, -0.0235],\n [ 0.0018, 0.0018, 0.0018, ..., 0.0018, 0.0018, 0.0018],\n ...,\n [-0.0161, -0.0161, -0.0161, ..., -0.0161, -0.0161, -0.0161],\n [-0.0005, -0.0005, -0.0005, ..., -0.0005, -0.0005, -0.0005],\n [ 0.0135, 0.0135, 0.0135, ..., 0.0135, 0.0135, 0.0135]])\nbatch: 2\nWEIGHT:Forward: Parameter containing:\ntensor([[ 7.2432e-04, 2.0150e-02, -2.8403e-02, ..., 2.2927e-02,\n 4.7243e-03, 3.0520e-03],\n [-1.8603e-02, -1.3807e-02, -9.2679e-03, ..., -1.9090e-02,\n -4.7880e-03, -2.8771e-02],\n [-2.0237e-02, 1.4810e-02, -3.3364e-02, ..., -2.0432e-02,\n 1.0932e-03, 7.9391e-03],\n ...,\n [ 2.4482e-02, 1.1099e-02, -2.1131e-02, ..., 9.6508e-03,\n -5.6901e-05, 1.7043e-03],\n [ 1.4433e-02, -3.3573e-02, -3.4551e-02, ..., -2.2222e-02,\n -2.4386e-03, -1.3786e-02],\n [-2.0232e-02, -1.2456e-02, 2.2341e-02, ..., -2.0917e-02,\n 1.6546e-02, 3.4834e-02]], requires_grad=True)\nGRAD:Forward: tensor([[0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n ...,\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.]])\nbatch: 2\nWEIGHT:Backward: Parameter containing:\ntensor([[ 7.2432e-04, 2.0150e-02, -2.8403e-02, ..., 2.2927e-02,\n 4.7243e-03, 3.0520e-03],\n [-1.8603e-02, -1.3807e-02, -9.2679e-03, ..., -1.9090e-02,\n -4.7880e-03, -2.8771e-02],\n [-2.0237e-02, 1.4810e-02, -3.3364e-02, ..., -2.0432e-02,\n 1.0932e-03, 7.9391e-03],\n ...,\n [ 2.4482e-02, 1.1099e-02, -2.1131e-02, ..., 9.6508e-03,\n -5.6901e-05, 1.7043e-03],\n [ 1.4433e-02, -3.3573e-02, -3.4551e-02, ..., -2.2222e-02,\n -2.4386e-03, -1.3786e-02],\n [-2.0232e-02, -1.2456e-02, 2.2341e-02, ..., -2.0917e-02,\n 1.6546e-02, 3.4834e-02]], requires_grad=True)\nGRAD:Backward: tensor([[0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n ...,\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.]])\nbatch: 2\nWEIGHT:Train:Loop: Parameter containing:\ntensor([[ 7.2432e-04, 2.0150e-02, -2.8403e-02, ..., 2.2927e-02,\n 4.7243e-03, 3.0520e-03],\n [-1.8603e-02, -1.3807e-02, -9.2679e-03, ..., -1.9090e-02,\n -4.7880e-03, -2.8771e-02],\n [-2.0237e-02, 1.4810e-02, -3.3364e-02, ..., -2.0432e-02,\n 1.0932e-03, 7.9391e-03],\n ...,\n [ 2.4482e-02, 1.1099e-02, -2.1131e-02, ..., 9.6508e-03,\n -5.6901e-05, 1.7043e-03],\n [ 1.4433e-02, -3.3573e-02, -3.4551e-02, ..., -2.2222e-02,\n -2.4386e-03, -1.3786e-02],\n [-2.0232e-02, -1.2456e-02, 2.2341e-02, ..., -2.0917e-02,\n 1.6546e-02, 3.4834e-02]], requires_grad=True)\nGRAD:Train:Loop: tensor([[-0.0003, -0.0003, -0.0003, ..., -0.0003, -0.0003, -0.0003],\n [ 0.0020, 0.0020, 0.0020, ..., 0.0020, 0.0020, 0.0020],\n [-0.0023, -0.0023, -0.0023, ..., -0.0023, -0.0023, -0.0023],\n ...,\n [-0.0078, -0.0078, -0.0078, ..., -0.0078, -0.0078, -0.0078],\n [-0.0034, -0.0034, -0.0034, ..., -0.0034, -0.0034, -0.0034],\n [-0.0006, -0.0006, -0.0006, ..., -0.0006, -0.0006, -0.0006]])\nbatch: 3\nWEIGHT:Forward: Parameter containing:\ntensor([[ 0.0016, 0.0211, -0.0275, ..., 0.0238, 0.0056, 0.0040],\n [-0.0176, -0.0129, -0.0083, ..., -0.0181, -0.0038, -0.0278],\n [-0.0202, 0.0148, -0.0333, ..., -0.0204, 0.0011, 0.0080],\n ...,\n [ 0.0256, 0.0122, -0.0200, ..., 0.0108, 0.0011, 0.0028],\n [ 0.0146, -0.0334, -0.0344, ..., -0.0220, -0.0022, -0.0136],\n [-0.0208, -0.0130, 0.0218, ..., -0.0215, 0.0160, 0.0343]],\n requires_grad=True)\nGRAD:Forward: tensor([[0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n ...,\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.]])\nbatch: 3\nWEIGHT:Backward: Parameter containing:\ntensor([[ 0.0016, 0.0211, -0.0275, ..., 0.0238, 0.0056, 0.0040],\n [-0.0176, -0.0129, -0.0083, ..., -0.0181, -0.0038, -0.0278],\n [-0.0202, 0.0148, -0.0333, ..., -0.0204, 0.0011, 0.0080],\n ...,\n [ 0.0256, 0.0122, -0.0200, ..., 0.0108, 0.0011, 0.0028],\n [ 0.0146, -0.0334, -0.0344, ..., -0.0220, -0.0022, -0.0136],\n [-0.0208, -0.0130, 0.0218, ..., -0.0215, 0.0160, 0.0343]],\n requires_grad=True)\nGRAD:Backward: tensor([[0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n ...,\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.]])\nbatch: 3\nWEIGHT:Train:Loop: Parameter containing:\ntensor([[ 0.0016, 0.0211, -0.0275, ..., 0.0238, 0.0056, 0.0040],\n [-0.0176, -0.0129, -0.0083, ..., -0.0181, -0.0038, -0.0278],\n [-0.0202, 0.0148, -0.0333, ..., -0.0204, 0.0011, 0.0080],\n ...,\n [ 0.0256, 0.0122, -0.0200, ..., 0.0108, 0.0011, 0.0028],\n [ 0.0146, -0.0334, -0.0344, ..., -0.0220, -0.0022, -0.0136],\n [-0.0208, -0.0130, 0.0218, ..., -0.0215, 0.0160, 0.0343]],\n requires_grad=True)\nGRAD:Train:Loop: tensor([[ 0.0122, 0.0122, 0.0122, ..., 0.0122, 0.0122, 0.0122],\n [ 0.0152, 0.0152, 0.0152, ..., 0.0152, 0.0152, 0.0152],\n [-0.0008, -0.0008, -0.0008, ..., -0.0008, -0.0008, -0.0008],\n ...,\n [-0.0005, -0.0005, -0.0005, ..., -0.0005, -0.0005, -0.0005],\n [-0.0159, -0.0159, -0.0159, ..., -0.0159, -0.0159, -0.0159],\n [-0.0420, -0.0420, -0.0420, ..., -0.0420, -0.0420, -0.0420]])\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true,
"scrolled": false
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"gist": {
"id": "",
"data": {
"description": "Grad catch.ipynb",
"public": false
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.2",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment