Skip to content

Instantly share code, notes, and snippets.

@dejanbatanjac
Created November 18, 2019 21:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dejanbatanjac/b5ed26a925c75514b8ab6d4e6a328e67 to your computer and use it in GitHub Desktop.
Save dejanbatanjac/b5ed26a925c75514b8ab6d4e6a328e67 to your computer and use it in GitHub Desktop.
Freezingn layers (parameters) some ideas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## Working with the learning rate"
},
{
"metadata": {
"trusted": true,
"code_folding": []
},
"cell_type": "code",
"source": "import torch\nfrom torchvision.models import resnet18\nmodel = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)\nfor param_group in optimizer.param_groups:\n print(param_group['lr'])\n \n# we will have just a single param group \nprint(optimizer.param_groups[0].keys())",
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": "0.1\ndict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "model = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=0.1)\noptimizer.param_groups.clear()\noptimizer.param_groups.append({'params' : model.conv1.parameters(), 'lr' : 0.3, 'name': 'model.conv1' })\noptimizer.add_param_group({'params' : model.fc.parameters(), 'lr' : 0.4, 'name': 'model.fc' })\n\n# now we have two groups\nfor param_group in optimizer.param_groups: \n print(param_group['name'], param_group['lr']) ",
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": "model.conv1 0.3\nmodel.fc 0.4\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "optimizer.param_groups.clear() \nfor name, param in model.named_parameters():\n optimizer.add_param_group({'params' : param, 'lr' : 0.1, 'name':name})\n \n#now every parameter is a param group \nfor p in optimizer.param_groups:\n print( p['name'], p['lr'])\n\n# for p in optimizer.state_dict()[\"param_groups\"]:\n# print( p['name'], p['lr'])",
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": "conv1.weight 0.1\nbn1.weight 0.1\nbn1.bias 0.1\nlayer1.0.conv1.weight 0.1\nlayer1.0.bn1.weight 0.1\nlayer1.0.bn1.bias 0.1\nlayer1.0.conv2.weight 0.1\nlayer1.0.bn2.weight 0.1\nlayer1.0.bn2.bias 0.1\nlayer1.1.conv1.weight 0.1\nlayer1.1.bn1.weight 0.1\nlayer1.1.bn1.bias 0.1\nlayer1.1.conv2.weight 0.1\nlayer1.1.bn2.weight 0.1\nlayer1.1.bn2.bias 0.1\nlayer2.0.conv1.weight 0.1\nlayer2.0.bn1.weight 0.1\nlayer2.0.bn1.bias 0.1\nlayer2.0.conv2.weight 0.1\nlayer2.0.bn2.weight 0.1\nlayer2.0.bn2.bias 0.1\nlayer2.0.downsample.0.weight 0.1\nlayer2.0.downsample.1.weight 0.1\nlayer2.0.downsample.1.bias 0.1\nlayer2.1.conv1.weight 0.1\nlayer2.1.bn1.weight 0.1\nlayer2.1.bn1.bias 0.1\nlayer2.1.conv2.weight 0.1\nlayer2.1.bn2.weight 0.1\nlayer2.1.bn2.bias 0.1\nlayer3.0.conv1.weight 0.1\nlayer3.0.bn1.weight 0.1\nlayer3.0.bn1.bias 0.1\nlayer3.0.conv2.weight 0.1\nlayer3.0.bn2.weight 0.1\nlayer3.0.bn2.bias 0.1\nlayer3.0.downsample.0.weight 0.1\nlayer3.0.downsample.1.weight 0.1\nlayer3.0.downsample.1.bias 0.1\nlayer3.1.conv1.weight 0.1\nlayer3.1.bn1.weight 0.1\nlayer3.1.bn1.bias 0.1\nlayer3.1.conv2.weight 0.1\nlayer3.1.bn2.weight 0.1\nlayer3.1.bn2.bias 0.1\nlayer4.0.conv1.weight 0.1\nlayer4.0.bn1.weight 0.1\nlayer4.0.bn1.bias 0.1\nlayer4.0.conv2.weight 0.1\nlayer4.0.bn2.weight 0.1\nlayer4.0.bn2.bias 0.1\nlayer4.0.downsample.0.weight 0.1\nlayer4.0.downsample.1.weight 0.1\nlayer4.0.downsample.1.bias 0.1\nlayer4.1.conv1.weight 0.1\nlayer4.1.bn1.weight 0.1\nlayer4.1.bn1.bias 0.1\nlayer4.1.conv2.weight 0.1\nlayer4.1.bn2.weight 0.1\nlayer4.1.bn2.bias 0.1\nfc.weight 0.1\nfc.bias 0.1\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# checking what we learn fc.weight\nbs=1\ninput = torch.rand(bs,3, 256,256)\ntarget = torch.randint (1000, (bs,))\n\nmodel.train()\np1 = model.fc.weight.clone()\n\noutput = model(input)\nloss_fn = torch.nn.CrossEntropyLoss()\nloss = loss_fn(output, target)\nprint(loss)\noptimizer.zero_grad()\nloss.backward()\noptimizer.step()\np2 = model.fc.weight\n\nprint(torch.equal(p1,p2))\nprint(p1, p2)",
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": "tensor(7.2231, grad_fn=<NllLossBackward>)\nFalse\ntensor([[-0.0090, 0.0003, -0.0309, ..., 0.0237, -0.0336, 0.0138],\n [ 0.0361, -0.0332, -0.0357, ..., 0.0187, -0.0219, -0.0133],\n [-0.0047, 0.0039, 0.0167, ..., -0.0410, -0.0391, -0.0096],\n ...,\n [-0.0203, 0.0192, -0.0088, ..., 0.0162, 0.0181, 0.0167],\n [ 0.0235, -0.0051, 0.0012, ..., -0.0016, -0.0316, -0.0415],\n [-0.0416, 0.0338, -0.0343, ..., 0.0116, -0.0197, 0.0015]],\n grad_fn=<CloneBackward>) Parameter containing:\ntensor([[-0.0091, 0.0003, -0.0310, ..., 0.0236, -0.0337, 0.0137],\n [ 0.0360, -0.0333, -0.0358, ..., 0.0186, -0.0220, -0.0134],\n [-0.0048, 0.0038, 0.0166, ..., -0.0411, -0.0392, -0.0097],\n ...,\n [-0.0204, 0.0192, -0.0088, ..., 0.0161, 0.0180, 0.0166],\n [ 0.0234, -0.0052, 0.0011, ..., -0.0016, -0.0316, -0.0415],\n [-0.0418, 0.0337, -0.0345, ..., 0.0115, -0.0198, 0.0014]],\n requires_grad=True)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# checking that we don't learn fc.weight\n\nfor p in optimizer.param_groups:\n if p['name']=='fc.weight':\n p['lr']=0\n\n# for p in optimizer.param_groups:\n# print( p['name'], p['lr'])\n\ninput = torch.rand(bs,3, 256,256)\ntarget = torch.randint (1000, (bs,))\n\nmodel.train()\np1 = model.fc.weight.clone()\n\noutput = model(input)\nloss_fn = torch.nn.CrossEntropyLoss()\nloss = loss_fn(output, target)\nprint(loss)\noptimizer.zero_grad()\nloss.backward()\noptimizer.step()\np2 = model.fc.weight\n\nprint(torch.equal(p1,p2))\nprint(p1, p2)",
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": "tensor(35.6802, grad_fn=<NllLossBackward>)\nTrue\ntensor([[-0.0091, 0.0003, -0.0310, ..., 0.0236, -0.0337, 0.0137],\n [ 0.0360, -0.0333, -0.0358, ..., 0.0186, -0.0220, -0.0134],\n [-0.0048, 0.0038, 0.0166, ..., -0.0411, -0.0392, -0.0097],\n ...,\n [-0.0204, 0.0192, -0.0088, ..., 0.0161, 0.0180, 0.0166],\n [ 0.0234, -0.0052, 0.0011, ..., -0.0016, -0.0316, -0.0415],\n [-0.0418, 0.0337, -0.0345, ..., 0.0115, -0.0198, 0.0014]],\n grad_fn=<CloneBackward>) Parameter containing:\ntensor([[-0.0091, 0.0003, -0.0310, ..., 0.0236, -0.0337, 0.0137],\n [ 0.0360, -0.0333, -0.0358, ..., 0.0186, -0.0220, -0.0134],\n [-0.0048, 0.0038, 0.0166, ..., -0.0411, -0.0392, -0.0097],\n ...,\n [-0.0204, 0.0192, -0.0088, ..., 0.0161, 0.0180, 0.0166],\n [ 0.0234, -0.0052, 0.0011, ..., -0.0016, -0.0316, -0.0415],\n [-0.0418, 0.0337, -0.0345, ..., 0.0115, -0.0198, 0.0014]],\n requires_grad=True)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# even multiple parameters can be inside a single param group\n\nfrom torchvision.models import resnet18\nmodel = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(list(model.conv1.parameters())+list(model.fc.parameters()), lr=0.1, momentum=0.9)\nfor param_group in optimizer.param_groups:\n print(param_group['lr']) ",
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": "0.1\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# once more what is insde the optimizer state dict: state and param_groups\nprint(optimizer.state_dict())",
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": "{'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [2188731422616, 2188730930088, 2188730930664]}]}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Working with `requires_grad`"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import torch\nfrom torchvision.models import resnet18\nmodel = resnet18(pretrained=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)\n\nfor name, p in model.named_parameters():\n p.requires_grad=False\n\nmodel.conv1.weight.requires_grad_(True)\nmodel.fc.weight.requires_grad = True\n\nbs=1\ninput = torch.rand(bs,3, 128,128)\ntarget = torch.randint (1000, (1,))\nmodel.train()\n\np1 = model.fc.weight.clone()\noutput = model(input)\n\nloss_fn = torch.nn.CrossEntropyLoss()\nloss = loss_fn(output, target)\n\noptimizer.zero_grad()\nloss.backward()\noptimizer.step()\n\np2 = model.fc.weight\n\n\nprint(torch.equal(p1,p2))\nprint(p1,p2)",
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"text": "False\ntensor([[ 0.0007, -0.0217, 0.0195, ..., -0.0349, 0.0243, 0.0407],\n [ 0.0289, -0.0284, -0.0226, ..., -0.0101, 0.0058, -0.0020],\n [-0.0037, -0.0206, 0.0072, ..., -0.0413, 0.0119, 0.0141],\n ...,\n [-0.0020, -0.0051, 0.0062, ..., 0.0061, -0.0358, 0.0293],\n [ 0.0165, -0.0386, -0.0076, ..., -0.0365, -0.0242, 0.0380],\n [ 0.0167, -0.0085, 0.0145, ..., -0.0114, -0.0181, -0.0384]],\n grad_fn=<CloneBackward>) Parameter containing:\ntensor([[ 0.0007, -0.0217, 0.0195, ..., -0.0349, 0.0242, 0.0407],\n [ 0.0288, -0.0284, -0.0227, ..., -0.0102, 0.0058, -0.0021],\n [-0.0038, -0.0206, 0.0071, ..., -0.0413, 0.0118, 0.0141],\n ...,\n [-0.0021, -0.0052, 0.0061, ..., 0.0060, -0.0358, 0.0293],\n [ 0.0163, -0.0387, -0.0078, ..., -0.0366, -0.0243, 0.0379],\n [ 0.0166, -0.0086, 0.0144, ..., -0.0114, -0.0182, -0.0384]],\n requires_grad=True)\n",
"name": "stdout"
}
]
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/81c60e579849c07b8c9e93cf6a9797b5"
},
"gist": {
"id": "81c60e579849c07b8c9e93cf6a9797b5",
"data": {
"description": "Freezingn layers (parameters) some ideas",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment