Skip to content

Instantly share code, notes, and snippets.

@MasanoriYamada
Last active May 6, 2023 16:50
Show Gist options
  • Save MasanoriYamada/9c12a3854836b7cedc970ab7350af219 to your computer and use it in GitHub Desktop.
Save MasanoriYamada/9c12a3854836b7cedc970ab7350af219 to your computer and use it in GitHub Desktop.
torchopt lr scheduler
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "07c385dd",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import torch.optim.lr_scheduler\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"#from torch.nn.utils._stateless import functional_call\n",
"from torch.func import functional_call\n",
"from collections import OrderedDict\n",
"\n",
"import torchopt\n",
"import functorch\n",
"from copy import deepcopy\n",
"\n",
"# Define a simple CNN model\n",
"class SimpleCNN(nn.Module):\n",
" def __init__(self):\n",
" super(SimpleCNN, self).__init__()\n",
" self.conv1 = nn.Conv2d(1, 6, 5)\n",
" self.pool = nn.MaxPool2d(2, 2)\n",
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
" self.fc1 = nn.Linear(16 * 4 * 4, 120)\n",
" self.fc2 = nn.Linear(120, 84)\n",
" self.fc3 = nn.Linear(84, 10)\n",
"\n",
" def forward(self, x):\n",
" x = self.pool(F.relu(self.conv1(x)))\n",
" x = self.pool(F.relu(self.conv2(x)))\n",
" x = x.view(-1, 16 * 4 * 4)\n",
" x = F.relu(self.fc1(x))\n",
" x = F.relu(self.fc2(x))\n",
" x = self.fc3(x)\n",
" return x\n",
"\n",
"# Load the MNIST dataset\n",
"transform = transforms.Compose(\n",
" [transforms.ToTensor(),\n",
" transforms.Normalize((0.5,), (0.5,))])\n",
"\n",
"trainset = torchvision.datasets.MNIST(root='./data', train=False,\n",
" download=True, transform=transform)\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=100,\n",
" shuffle=False, num_workers=0)\n",
"trainloader2 = torch.utils.data.DataLoader(trainset, batch_size=100,\n",
" shuffle=False, num_workers=0)\n",
"\n",
"\n",
"# Initialize the models\n",
"model = SimpleCNN()\n",
"model_ref = deepcopy(model)\n",
"model_refref = deepcopy(model)\n",
"\n",
"# Set device and dtype\n",
"device = 'cpu'\n",
"dtype = torch.float32\n",
"\n",
"# Set parameters for the test\n",
"lr = 1\n",
"total_iters = 10\n",
"optimizers = (torchopt.sgd, torch.optim.SGD, {})\n",
"inplace = False\n",
"weight_decay = 0.9\n",
"use_chain_flat = True"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbb332db",
"metadata": {},
"outputs": [],
"source": [
"\n",
"model.to(device)\n",
"model_ref.to(device)\n",
"\n",
"\n",
"torchopt_optimizer, torch_optimizer, optimizer_kwargs = optimizers\n",
"\n",
"fmodel, params, buffers = functorch.make_functional_with_buffers(model)\n",
"lr_schedule = torchopt.schedule.linear_schedule(\n",
" init_value=lr,\n",
" end_value=0.1 * lr,\n",
" transition_steps=total_iters,\n",
" transition_begin=0,\n",
" )\n",
"optim = torchopt_optimizer(\n",
" lr_schedule,\n",
" weight_decay=weight_decay,\n",
" **optimizer_kwargs,\n",
")\n",
"optim_state = optim.init(params)\n",
"optim_ref = torch_optimizer(\n",
" model_ref.parameters(),\n",
" lr,\n",
" weight_decay=weight_decay,\n",
" **optimizer_kwargs,\n",
")\n",
"torch_scheduler = torch.optim.lr_scheduler.LinearLR(\n",
" optim_ref,\n",
" start_factor=1.0,\n",
" end_factor=0.1,\n",
" total_iters=total_iters,\n",
")\n",
"\n",
"for i, (xs, ys) in enumerate(trainloader):\n",
" if i >= total_iters:\n",
" break\n",
" xs = xs.to(device=device, dtype=dtype)\n",
" ys = ys.to(device=device)\n",
"\n",
" pred = fmodel(params, buffers, xs)\n",
" pred_ref = model_ref(xs)\n",
" loss = F.cross_entropy(pred, ys)\n",
" loss_ref = F.cross_entropy(pred_ref, ys)\n",
" print(loss, loss == loss_ref)\n",
" grads = torch.autograd.grad(loss, params, allow_unused=True)\n",
" #print(grads[0][0]) \n",
" updates, optim_state = optim.update(grads, optim_state, params=params, inplace=inplace)\n",
" params = torchopt.apply_updates(params, updates, inplace=inplace)\n",
"\n",
" optim_ref.zero_grad()\n",
" loss_ref.backward()\n",
" optim_ref.step()\n",
" print('lr', optim_ref.param_groups[0]['lr'])\n",
" print('lr', lr_schedule(i)) \n",
" torch_scheduler.step()"
]
},
{
"cell_type": "markdown",
"id": "2e689e2c",
"metadata": {},
"source": [
"# functional_callで書き直す"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb4417d0",
"metadata": {},
"outputs": [],
"source": [
"lr_schedule = torchopt.schedule.linear_schedule(\n",
" init_value=lr,\n",
" end_value=0.1 * lr,\n",
" transition_steps=total_iters,\n",
" transition_begin=0,\n",
" )\n",
"\n",
"optim = torchopt_optimizer(\n",
" lr_schedule,\n",
" weight_decay=weight_decay,\n",
" **optimizer_kwargs,\n",
")\n",
"params = model_refref.state_dict()\n",
"optim_state = optim.init(params)\n",
"\n",
"for i, (xs, ys) in enumerate(trainloader2):\n",
" if i >= total_iters:\n",
" break\n",
" xs = xs.to(device=device, dtype=dtype)\n",
" ys = ys.to(device=device)\n",
" for key in params:\n",
" params[key] = params[key].detach() # leaf\n",
" params[key].requires_grad = True\n",
" params[key].grad = None # optimizer.zero_grad()\n",
" pred = functional_call(model_refref, params, xs)\n",
" loss = F.cross_entropy(pred, ys)\n",
" print(loss)\n",
" loss.backward()\n",
" grads = OrderedDict()\n",
" for key in params:\n",
" if params[key].grad is None:\n",
" grads[key] = torch.zeros_like(params[key])\n",
" else:\n",
" grads[key] = params[key].grad\n",
" #print(grads['conv1.weight'][0])\n",
" updates, optim_state = optim.update(grads, optim_state, params=params, inplace=False)\n",
" params = torchopt.apply_updates(params, updates, inplace=False)\n",
" print('lr', lr_schedule(i))"
]
},
{
"cell_type": "markdown",
"id": "17345c29",
"metadata": {},
"source": [
"# custum functionで実現\n",
"\n",
"- CosineLRScheduler"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "1686eba2",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import torch.optim.lr_scheduler\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"#from torch.nn.utils._stateless import functional_call\n",
"from torch.func import functional_call\n",
"from collections import OrderedDict\n",
"\n",
"import torchopt\n",
"import functorch\n",
"from copy import deepcopy\n",
"\n",
"# Define a simple CNN model\n",
"class SimpleCNN(nn.Module):\n",
" def __init__(self):\n",
" super(SimpleCNN, self).__init__()\n",
" self.conv1 = nn.Conv2d(1, 6, 5)\n",
" self.pool = nn.MaxPool2d(2, 2)\n",
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
" self.fc1 = nn.Linear(16 * 4 * 4, 120)\n",
" self.fc2 = nn.Linear(120, 84)\n",
" self.fc3 = nn.Linear(84, 10)\n",
"\n",
" def forward(self, x):\n",
" x = self.pool(F.relu(self.conv1(x)))\n",
" x = self.pool(F.relu(self.conv2(x)))\n",
" x = x.view(-1, 16 * 4 * 4)\n",
" x = F.relu(self.fc1(x))\n",
" x = F.relu(self.fc2(x))\n",
" x = self.fc3(x)\n",
" return x\n",
"\n",
"# Load the MNIST dataset\n",
"transform = transforms.Compose(\n",
" [transforms.ToTensor(),\n",
" transforms.Normalize((0.5,), (0.5,))])\n",
"\n",
"trainset = torchvision.datasets.MNIST(root='./data', train=False,\n",
" download=True, transform=transform)\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=100,\n",
" shuffle=False, num_workers=0)\n",
"trainloader2 = torch.utils.data.DataLoader(trainset, batch_size=100,\n",
" shuffle=False, num_workers=0)\n",
"\n",
"\n",
"# Initialize the models\n",
"model = SimpleCNN()\n",
"model_ref = deepcopy(model)\n",
"\n",
"# Set device and dtype\n",
"device = 'cpu'\n",
"dtype = torch.float32\n",
"\n",
"# Set parameters for the test\n",
"lr = 1\n",
"total_epoch = 10\n",
"optimizers = (torchopt.sgd, torch.optim.SGD, {})\n",
"inplace = False\n",
"weight_decay = 0.9\n",
"use_chain_flat = True"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "d9248512",
"metadata": {},
"outputs": [],
"source": [
"from timm.scheduler import CosineLRScheduler\n",
"\n",
"optim_ref = torch.optim.SGD(\n",
" model_ref.parameters(),\n",
" lr,\n",
" weight_decay=weight_decay,\n",
")\n",
"\n",
"warmup_epoch = 1\n",
"scheduler = CosineLRScheduler(optim_ref, t_initial=total_epoch, lr_min=1e-6, warmup_t=warmup_epoch,\n",
" warmup_lr_init=1e-6, warmup_prefix=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "81cf504e",
"metadata": {},
"outputs": [],
"source": [
"torchopt_optimizer = torchopt.sgd\n",
"steps_per_epoch = len(trainloader)\n",
"\n",
"def get_custom_schedule(scheduler, steps_per_epoch):\n",
" def custom_schedule(step: int) -> float:\n",
" epoch = step // steps_per_epoch\n",
" return scheduler.get_epoch_values(epoch)[0]\n",
" return custom_schedule\n",
"\n",
"custom_schedule = get_custom_schedule(scheduler, steps_per_epoch)\n",
"\n",
"optim = torchopt_optimizer(\n",
" custom_schedule,\n",
" weight_decay=weight_decay,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "36f4fb78",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2977, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2990, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3096, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3079, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3103, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3096, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3059, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2983, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2942, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3105, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3069, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3125, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2931, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2991, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3091, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3071, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3094, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2977, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2997, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3062, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3129, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3111, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2973, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3053, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3085, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2933, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3083, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2919, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2975, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2985, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2996, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2996, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2943, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2992, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3114, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3099, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3073, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2959, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3096, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3065, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3074, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2899, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2993, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3093, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2961, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2981, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3091, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2959, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3078, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2992, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 1e-06\n",
"lr2: 1e-06\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3057, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3055, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3053, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3089, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3078, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3114, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3085, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3057, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3071, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2974, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3079, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2977, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3071, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 1.0\n",
"lr2: 1.0\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3057, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3087, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3075, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3059, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3113, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3083, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3058, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2976, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3077, grad_fn=<NllLossBackward0>) tensor(False)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2978, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2995, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2995, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"lr1: 0.9755282826193187\n",
"lr2: 0.9755282826193187\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2987, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3080, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3068, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3055, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3110, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3079, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3062, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2986, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3072, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2980, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2997, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2997, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3055, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3010, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.9045085926789765\n",
"lr2: 0.9045085926789765\n",
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2990, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3058, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3104, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3073, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3062, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2990, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2985, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.7938928322536104\n",
"lr2: 0.7938928322536104\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2981, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3059, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3094, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2988, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3058, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2991, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.6545088426789766\n",
"lr2: 0.6545088426789766\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3082, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2993, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.5000005\n",
"lr2: 0.5000005\n",
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2988, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.3454921573210235\n",
"lr2: 0.3454921573210235\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2993, grad_fn=<NllLossBackward0>) tensor(False)\n",
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3010, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(False)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.20610816774638963\n",
"lr2: 0.20610816774638963\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n",
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n",
"lr1: 0.09549240732102351\n",
"lr2: 0.09549240732102351\n"
]
}
],
"source": [
"model.to(device)\n",
"model_ref.to(device)\n",
"\n",
"\n",
"fmodel, params, buffers = functorch.make_functional_with_buffers(model)\n",
"optim_state = optim.init(params)\n",
"\n",
"\n",
"for epoch in range(total_epoch):\n",
" scheduler.step(epoch)\n",
" for i, (xs, ys) in enumerate(trainloader):\n",
" step = len(trainloader) * epoch + i\n",
" xs = xs.to(device=device, dtype=dtype)\n",
" ys = ys.to(device=device)\n",
"\n",
" pred = fmodel(params, buffers, xs)\n",
" pred_ref = model_ref(xs)\n",
" loss = F.cross_entropy(pred, ys)\n",
" loss_ref = F.cross_entropy(pred_ref, ys)\n",
" print(loss, loss == loss_ref)\n",
" grads = torch.autograd.grad(loss, params, allow_unused=True)\n",
" #print(grads[0][0]) \n",
" updates, optim_state = optim.update(grads, optim_state, params=params, inplace=inplace)\n",
" params = torchopt.apply_updates(params, updates, inplace=inplace)\n",
"\n",
" optim_ref.zero_grad()\n",
" loss_ref.backward()\n",
" optim_ref.step()\n",
" print('lr1:', optim_ref.param_groups[0]['lr'])\n",
" print('lr2:', custom_schedule(step))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c22cbad",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@MasanoriYamada
Copy link
Author

MasanoriYamada commented May 6, 2023

torchoptのlrスケジューリングの正しさのチェック
https://github.com/metaopt/torchopt/blob/4701569cf12cf4be3787371390bd66c647594cdb/tests/test_schedule.py#L117

custum functionでちゃんと動くようになった

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment