Skip to content

Instantly share code, notes, and snippets.

@zhreshold
Last active December 14, 2017 00:10
Show Gist options
  • Save zhreshold/c927b30a712709cfa6f9911bf90930bb to your computer and use it in GitHub Desktop.
Save zhreshold/c927b30a712709cfa6f9911bf90930bb to your computer and use it in GitHub Desktop.
Dead lock log
import mxnet as mx
from mxnet import gluon
dataset = gluon.data.vision.MNIST()
loader = gluon.data.DataLoader(dataset, 34, last_batch='rollover', num_workers=8)
ctx = [mx.gpu(i) for i in range(2)]
for e in range(10):
for i, batch in enumerate(loader):
data = gluon.utils.split_and_load(batch[0], ctx_list=ctx)
label = gluon.utils.split_and_load(batch[1], ctx_list=ctx)
mx.nd.waitall()
print('epoch %d, batch %d' % (e, i))
INFO:root:Epoch[0] Batch[4999] Speed: 1315.471563 samples/sec accuracy=0.110930,top_k_accuracy
_5=0.260795
^CTraceback (most recent call last):
Process Process-7:
File "train_imagenet.py", line 192, in <module>
Process Process-8:
Traceback (most recent call last):
Process Process-1:
Traceback (most recent call last):
Traceback (most recent call last):
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Process Process-5:
Process Process-2:
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Process Process-6:
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-3:
Process Process-4:
Traceback (most recent call last):
Traceback (most recent call last):
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
self.run()
self.run()
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
self._target(*self._args, **self._kwargs)
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
self._target(*self._args, **self._kwargs)
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
self.run()
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
self.run()
self.run()
self.run()
self._target(*self._args, **self._kwargs)
idx, samples = key_queue.get()
idx, samples = key_queue.get()
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
File "/usr/lib/python2.7/multiprocessing/queues.py", line 117, in get
idx, samples = key_queue.get()
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
self._target(*self._args, **self._kwargs)
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
self.run()
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
self._target(*self._args, **self._kwargs)
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
idx, samples = key_queue.get()
res = self._recv()
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
self._target(*self._args, **self._kwargs)
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 65, in recv
self._rlock.acquire()
self._rlock.acquire()
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 117, in worker_loop
buf = self.recv_bytes()
KeyboardInterrupt
KeyboardInterrupt
idx, samples = key_queue.get()
idx, samples = key_queue.get()
KeyboardInterrupt
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
self._rlock.acquire()
idx, samples = key_queue.get()
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
KeyboardInterrupt
idx, samples = key_queue.get()
File "/usr/lib/python2.7/multiprocessing/queues.py", line 115, in get
train(net, train_data, val_data, ctx, args)
File "train_imagenet.py", line 144, in train
self._rlock.acquire()
self._rlock.acquire()
for i, batch in enumerate(train_data):
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 227, in __iter__
KeyboardInterrupt
KeyboardInterrupt
self._rlock.acquire()
KeyboardInterrupt
idx, batch = data_queue.get()
File "/usr/lib/python2.7/multiprocessing/queues.py", line 117, in get
self._rlock.acquire()
KeyboardInterrupt
res = self._recv()
File "/home/ubuntu/model_zoo/mxnet/python/mxnet/gluon/data/dataloader.py", line 65, in recv
buf = self.recv_bytes()
KeyboardInterrupt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment