Created
November 18, 2019 21:06
-
-
Save dejanbatanjac/b5ed26a925c75514b8ab6d4e6a328e67 to your computer and use it in GitHub Desktop.
Freezingn layers (parameters) some ideas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Working with the learning rate" | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"code_folding": [] | |
}, | |
"cell_type": "code", | |
"source": "import torch\nfrom torchvision.models import resnet18\nmodel = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)\nfor param_group in optimizer.param_groups:\n print(param_group['lr'])\n \n# we will have just a single param group \nprint(optimizer.param_groups[0].keys())", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "0.1\ndict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "model = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=0.1)\noptimizer.param_groups.clear()\noptimizer.param_groups.append({'params' : model.conv1.parameters(), 'lr' : 0.3, 'name': 'model.conv1' })\noptimizer.add_param_group({'params' : model.fc.parameters(), 'lr' : 0.4, 'name': 'model.fc' })\n\n# now we have two groups\nfor param_group in optimizer.param_groups: \n print(param_group['name'], param_group['lr']) ", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "model.conv1 0.3\nmodel.fc 0.4\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "optimizer.param_groups.clear() \nfor name, param in model.named_parameters():\n optimizer.add_param_group({'params' : param, 'lr' : 0.1, 'name':name})\n \n#now every parameter is a param group \nfor p in optimizer.param_groups:\n print( p['name'], p['lr'])\n\n# for p in optimizer.state_dict()[\"param_groups\"]:\n# print( p['name'], p['lr'])", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "conv1.weight 0.1\nbn1.weight 0.1\nbn1.bias 0.1\nlayer1.0.conv1.weight 0.1\nlayer1.0.bn1.weight 0.1\nlayer1.0.bn1.bias 0.1\nlayer1.0.conv2.weight 0.1\nlayer1.0.bn2.weight 0.1\nlayer1.0.bn2.bias 0.1\nlayer1.1.conv1.weight 0.1\nlayer1.1.bn1.weight 0.1\nlayer1.1.bn1.bias 0.1\nlayer1.1.conv2.weight 0.1\nlayer1.1.bn2.weight 0.1\nlayer1.1.bn2.bias 0.1\nlayer2.0.conv1.weight 0.1\nlayer2.0.bn1.weight 0.1\nlayer2.0.bn1.bias 0.1\nlayer2.0.conv2.weight 0.1\nlayer2.0.bn2.weight 0.1\nlayer2.0.bn2.bias 0.1\nlayer2.0.downsample.0.weight 0.1\nlayer2.0.downsample.1.weight 0.1\nlayer2.0.downsample.1.bias 0.1\nlayer2.1.conv1.weight 0.1\nlayer2.1.bn1.weight 0.1\nlayer2.1.bn1.bias 0.1\nlayer2.1.conv2.weight 0.1\nlayer2.1.bn2.weight 0.1\nlayer2.1.bn2.bias 0.1\nlayer3.0.conv1.weight 0.1\nlayer3.0.bn1.weight 0.1\nlayer3.0.bn1.bias 0.1\nlayer3.0.conv2.weight 0.1\nlayer3.0.bn2.weight 0.1\nlayer3.0.bn2.bias 0.1\nlayer3.0.downsample.0.weight 0.1\nlayer3.0.downsample.1.weight 0.1\nlayer3.0.downsample.1.bias 0.1\nlayer3.1.conv1.weight 0.1\nlayer3.1.bn1.weight 0.1\nlayer3.1.bn1.bias 0.1\nlayer3.1.conv2.weight 0.1\nlayer3.1.bn2.weight 0.1\nlayer3.1.bn2.bias 0.1\nlayer4.0.conv1.weight 0.1\nlayer4.0.bn1.weight 0.1\nlayer4.0.bn1.bias 0.1\nlayer4.0.conv2.weight 0.1\nlayer4.0.bn2.weight 0.1\nlayer4.0.bn2.bias 0.1\nlayer4.0.downsample.0.weight 0.1\nlayer4.0.downsample.1.weight 0.1\nlayer4.0.downsample.1.bias 0.1\nlayer4.1.conv1.weight 0.1\nlayer4.1.bn1.weight 0.1\nlayer4.1.bn1.bias 0.1\nlayer4.1.conv2.weight 0.1\nlayer4.1.bn2.weight 0.1\nlayer4.1.bn2.bias 0.1\nfc.weight 0.1\nfc.bias 0.1\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# checking what we learn fc.weight\nbs=1\ninput = torch.rand(bs,3, 256,256)\ntarget = torch.randint (1000, (bs,))\n\nmodel.train()\np1 = model.fc.weight.clone()\n\noutput = model(input)\nloss_fn = torch.nn.CrossEntropyLoss()\nloss = loss_fn(output, target)\nprint(loss)\noptimizer.zero_grad()\nloss.backward()\noptimizer.step()\np2 = model.fc.weight\n\nprint(torch.equal(p1,p2))\nprint(p1, p2)", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "tensor(7.2231, grad_fn=<NllLossBackward>)\nFalse\ntensor([[-0.0090, 0.0003, -0.0309, ..., 0.0237, -0.0336, 0.0138],\n [ 0.0361, -0.0332, -0.0357, ..., 0.0187, -0.0219, -0.0133],\n [-0.0047, 0.0039, 0.0167, ..., -0.0410, -0.0391, -0.0096],\n ...,\n [-0.0203, 0.0192, -0.0088, ..., 0.0162, 0.0181, 0.0167],\n [ 0.0235, -0.0051, 0.0012, ..., -0.0016, -0.0316, -0.0415],\n [-0.0416, 0.0338, -0.0343, ..., 0.0116, -0.0197, 0.0015]],\n grad_fn=<CloneBackward>) Parameter containing:\ntensor([[-0.0091, 0.0003, -0.0310, ..., 0.0236, -0.0337, 0.0137],\n [ 0.0360, -0.0333, -0.0358, ..., 0.0186, -0.0220, -0.0134],\n [-0.0048, 0.0038, 0.0166, ..., -0.0411, -0.0392, -0.0097],\n ...,\n [-0.0204, 0.0192, -0.0088, ..., 0.0161, 0.0180, 0.0166],\n [ 0.0234, -0.0052, 0.0011, ..., -0.0016, -0.0316, -0.0415],\n [-0.0418, 0.0337, -0.0345, ..., 0.0115, -0.0198, 0.0014]],\n requires_grad=True)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# checking that we don't learn fc.weight\n\nfor p in optimizer.param_groups:\n if p['name']=='fc.weight':\n p['lr']=0\n\n# for p in optimizer.param_groups:\n# print( p['name'], p['lr'])\n\ninput = torch.rand(bs,3, 256,256)\ntarget = torch.randint (1000, (bs,))\n\nmodel.train()\np1 = model.fc.weight.clone()\n\noutput = model(input)\nloss_fn = torch.nn.CrossEntropyLoss()\nloss = loss_fn(output, target)\nprint(loss)\noptimizer.zero_grad()\nloss.backward()\noptimizer.step()\np2 = model.fc.weight\n\nprint(torch.equal(p1,p2))\nprint(p1, p2)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "tensor(35.6802, grad_fn=<NllLossBackward>)\nTrue\ntensor([[-0.0091, 0.0003, -0.0310, ..., 0.0236, -0.0337, 0.0137],\n [ 0.0360, -0.0333, -0.0358, ..., 0.0186, -0.0220, -0.0134],\n [-0.0048, 0.0038, 0.0166, ..., -0.0411, -0.0392, -0.0097],\n ...,\n [-0.0204, 0.0192, -0.0088, ..., 0.0161, 0.0180, 0.0166],\n [ 0.0234, -0.0052, 0.0011, ..., -0.0016, -0.0316, -0.0415],\n [-0.0418, 0.0337, -0.0345, ..., 0.0115, -0.0198, 0.0014]],\n grad_fn=<CloneBackward>) Parameter containing:\ntensor([[-0.0091, 0.0003, -0.0310, ..., 0.0236, -0.0337, 0.0137],\n [ 0.0360, -0.0333, -0.0358, ..., 0.0186, -0.0220, -0.0134],\n [-0.0048, 0.0038, 0.0166, ..., -0.0411, -0.0392, -0.0097],\n ...,\n [-0.0204, 0.0192, -0.0088, ..., 0.0161, 0.0180, 0.0166],\n [ 0.0234, -0.0052, 0.0011, ..., -0.0016, -0.0316, -0.0415],\n [-0.0418, 0.0337, -0.0345, ..., 0.0115, -0.0198, 0.0014]],\n requires_grad=True)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# even multiple parameters can be inside a single param group\n\nfrom torchvision.models import resnet18\nmodel = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(list(model.conv1.parameters())+list(model.fc.parameters()), lr=0.1, momentum=0.9)\nfor param_group in optimizer.param_groups:\n print(param_group['lr']) ", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "0.1\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# once more what is insde the optimizer state dict: state and param_groups\nprint(optimizer.state_dict())", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "{'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [2188731422616, 2188730930088, 2188730930664]}]}\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Working with `requires_grad`" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import torch\nfrom torchvision.models import resnet18\nmodel = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)\n\nfor name, p in model.named_parameters():\n p.requires_grad=False\n\nmodel.conv1.weight.requires_grad_(True)\nmodel.fc.weight.requires_grad = True\n\nbs=1\ninput = torch.rand(bs,3, 128,128)\ntarget = torch.randint (1000, (1,))\nmodel.train()\n\np1 = model.fc.weight.clone()\noutput = model(input)\n\nloss_fn = torch.nn.CrossEntropyLoss()\nloss = loss_fn(output, target)\n\noptimizer.zero_grad()\nloss.backward()\noptimizer.step()\n\np2 = model.fc.weight\n\n\nprint(torch.equal(p1,p2))\nprint(p1,p2)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "False\ntensor([[ 0.0007, -0.0217, 0.0195, ..., -0.0349, 0.0243, 0.0407],\n [ 0.0289, -0.0284, -0.0226, ..., -0.0101, 0.0058, -0.0020],\n [-0.0037, -0.0206, 0.0072, ..., -0.0413, 0.0119, 0.0141],\n ...,\n [-0.0020, -0.0051, 0.0062, ..., 0.0061, -0.0358, 0.0293],\n [ 0.0165, -0.0386, -0.0076, ..., -0.0365, -0.0242, 0.0380],\n [ 0.0167, -0.0085, 0.0145, ..., -0.0114, -0.0181, -0.0384]],\n grad_fn=<CloneBackward>) Parameter containing:\ntensor([[ 0.0007, -0.0217, 0.0195, ..., -0.0349, 0.0242, 0.0407],\n [ 0.0288, -0.0284, -0.0227, ..., -0.0102, 0.0058, -0.0021],\n [-0.0038, -0.0206, 0.0071, ..., -0.0413, 0.0118, 0.0141],\n ...,\n [-0.0021, -0.0052, 0.0061, ..., 0.0060, -0.0358, 0.0293],\n [ 0.0163, -0.0387, -0.0078, ..., -0.0366, -0.0243, 0.0379],\n [ 0.0166, -0.0086, 0.0144, ..., -0.0114, -0.0182, -0.0384]],\n requires_grad=True)\n", | |
"name": "stdout" | |
} | |
] | |
} | |
], | |
"metadata": { | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/81c60e579849c07b8c9e93cf6a9797b5" | |
}, | |
"gist": { | |
"id": "81c60e579849c07b8c9e93cf6a9797b5", | |
"data": { | |
"description": "Freezingn layers (parameters) some ideas", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.3", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment