Skip to content

Instantly share code, notes, and snippets.

@vfdev-5
Last active March 6, 2019 18:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vfdev-5/3ed8cc7339a874f056013bf7706796db to your computer and use it in GitHub Desktop.
Save vfdev-5/3ed8cc7339a874f056013bf7706796db to your computer and use it in GitHub Desktop.
TypeError: _queue_reduction(): incompatible function arguments. The following argument types are supported:
from __future__ import print_function
import argparse
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.parallel as parallel
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.utils.data.distributed as data_dist
from torch.utils.data import DataLoader, BatchSampler
from torchvision.models.resnet import resnet18
from torchvision import datasets, transforms
USE_DISTRIBUTED=True
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.backbone = resnet18(pretrained=False, num_classes=10)
self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)
# self.backbone.maxpool = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
# following module is not used
self.maxpool = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
def forward(self, x):
x = self.backbone(x)
return F.log_softmax(x, dim=-1)
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device, non_blocking=True), target.to(device, non_blocking=True)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def main():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
help='For Saving the current Model')
parser.add_argument("--local_rank", type=int, default=0)
args = parser.parse_args()
local_rank = args.local_rank
if USE_DISTRIBUTED:
dist.init_process_group(backend='nccl')
torch.cuda.device(local_rank)
device = torch.device('cuda:{}'.format(local_rank))
torch.backends.cudnn.benchmark = True
torch.manual_seed(args.seed)
train_sampler = None
train_dataset = datasets.MNIST('.', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
if USE_DISTRIBUTED:
train_sampler = data_dist.DistributedSampler(train_dataset)
kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=args.batch_size,
sampler=train_sampler, **kwargs)
model = Net().to(device)
if USE_DISTRIBUTED:
model = parallel.DistributedDataParallel(model,
device_ids=[local_rank],
output_device=local_rank)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
if (args.save_model):
torch.save(model.state_dict(),"mnist_cnn.pt")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment