Skip to content

Instantly share code, notes, and snippets.

@InnovArul
Created February 16, 2019 08:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save InnovArul/ca0db578769186bbaf094c5f8416a8bc to your computer and use it in GitHub Desktop.
Save InnovArul/ca0db578769186bbaf094c5f8416a8bc to your computer and use it in GitHub Desktop.
pytorch forum (Model parameters are not being updated?)
from __future__ import print_function
import torch.nn.functional as F
# from torch.autograd import Variable
# import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
filter = torch.Tensor(
[[[0.06, 0, 0],
[0.1, 0, 0.2],
[0.06, 0.1, 0]],
[[0.1, 0, 0],
[0.2, 0, 0],
[0.1, 0, 0]]])
self.register_buffer("filter", filter)
self.weight = nn.Parameter(torch.Tensor(1, 1, 2))
self.bias = nn.Parameter(torch.Tensor(1))
# init params
nn.init.xavier_uniform_(self.weight)
self.bias.data.uniform_(-1, 1)
self.bn1 = nn.BatchNorm2d(1)
self.fc = nn.Linear(1*13*13, 10)
def forward(self, x):
self.kernel = torch.einsum("ijk, klm -> ijlm", self.weight, self.filter)
out = F.conv2d(input=x, weight=self.kernel, bias=self.bias)
out = self.bn1(out)
out = nn.ReLU()(out)
out = nn.MaxPool2d(kernel_size=2, stride=2)(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
def main(argv=None):
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 100
batch_size = 1024
learning_rate = 0.001
log_interval = 10
# MNIST dataset
train_dataset = datasets.MNIST(
root='mnist_data/',
train=True,
transform=transforms.ToTensor(),
download=True)
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
model = ConvNet()
model.to(device)
for name, param in model.named_parameters():
print(name, '\t\t', param.shape)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
for epoch in range(1, num_epochs + 1):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, target)
# print(model.weight)
# print(model.kernel)
# print(model.weight.grad)
a = list(model.parameters())[0].clone()
loss.backward()
optimizer.step()
b = list(model.parameters())[0].clone()
print(torch.equal(a.data, b.data))
if batch_idx % log_interval == 0:
_, predicted = torch.max(output.data, 1)
total = target.size(0)
correct = (predicted == target).sum().item()
print('batch Accuracy: {} %'.format(100 * correct / total))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment