Created
January 26, 2017 15:53
-
-
Save culurciello/47acb81fde1d19082f2a59c73c3c2ce0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
elab@gpu5 ~/pytorch-examples/imagenet [master*]$ python3 main.py -a alexnet /media/SuperSSD/test-dataset-train-val/ | |
=> creating model 'alexnet' | |
Traceback (most recent call last): | |
File "main.py", line 286, in <module> | |
main() | |
File "main.py", line 129, in main | |
train(train_loader, model, criterion, optimizer, epoch) | |
File "main.py", line 165, in train | |
output = model(input_var) | |
File "/usr/local/lib/python3.4/dist-packages/torch/nn/modules/module.py", line 210, in __call__ | |
result = self.forward(*input, **kwargs) | |
File "/usr/local/lib/python3.4/dist-packages/torchvision/models/alexnet.py", line 42, in forward | |
x = self.features(x) | |
File "/usr/local/lib/python3.4/dist-packages/torch/nn/modules/module.py", line 210, in __call__ | |
result = self.forward(*input, **kwargs) | |
File "/usr/local/lib/python3.4/dist-packages/torch/nn/parallel/data_parallel.py", line 41, in forward | |
replicas = self.replicate(self.module, self.device_ids) | |
File "/usr/local/lib/python3.4/dist-packages/torch/nn/parallel/data_parallel.py", line 48, in replicate | |
return replicate(module, device_ids) | |
File "/usr/local/lib/python3.4/dist-packages/torch/nn/parallel/replicate.py", line 33, in replicate | |
param_copies = Broadcast(device_ids)(param) | |
File "/usr/local/lib/python3.4/dist-packages/torch/nn/parallel/_functions.py", line 15, in forward | |
return comm.broadcast(input, self.target_gpus) | |
File "/usr/local/lib/python3.4/dist-packages/torch/cuda/comm.py", line 23, in broadcast | |
nccl.broadcast(tensors) | |
File "/usr/local/lib/python3.4/dist-packages/torch/cuda/nccl.py", line 182, in broadcast | |
comm = communicator(inputs) | |
File "/usr/local/lib/python3.4/dist-packages/torch/cuda/nccl.py", line 137, in communicator | |
_communicators[key] = NcclCommList(devices) | |
File "/usr/local/lib/python3.4/dist-packages/torch/cuda/nccl.py", line 110, in __init__ | |
check_error(lib.ncclCommInitAll(self, len(devices), int_array(devices))) | |
AttributeError: 'NoneType' object has no attribute 'ncclCommInitAll' | |
Exception ignored in: <bound method NcclCommList.__del__ of <torch.cuda.nccl.NcclCommList object at 0x7eff37a4f2e8>> | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.4/dist-packages/torch/cuda/nccl.py", line 117, in __del__ | |
lib.ncclCommDestroy(self[i]) | |
AttributeError: 'NoneType' object has no attribute 'ncclCommDestroy' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment