Created
May 5, 2021 15:56
-
-
Save morganmcg1/54378ed49976d3fe42a303f1c92b59a2 to your computer and use it in GitHub Desktop.
cluster_dataloader_error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------------------------------------------------- | |
AssertionError Traceback (most recent call last) | |
<ipython-input-17-d566959a97b0> in <module> | |
1 # If one or more worker jobs errors, this will describe the issue | |
----> 2 futures[0].result() | |
/srv/conda/envs/saturn/lib/python3.7/site-packages/distributed/client.py in result(self, timeout) | |
223 if self.status == "error": | |
224 typ, exc, tb = result | |
--> 225 raise exc.with_traceback(tb) | |
226 elif self.status == "cancelled": | |
227 raise result | |
/srv/conda/envs/saturn/lib/python3.7/site-packages/dask_pytorch_ddp/dispatch.py in dispatch_with_ddp() | |
117 try: | |
118 dist.init_process_group(backend=backend) | |
--> 119 val = pytorch_function(*args, **kwargs) | |
120 finally: | |
121 dist.destroy_process_group() | |
<ipython-input-14-bcb0682c2503> in simple_train_cluster() | |
66 model.train() | |
67 | |
---> 68 for inputs, labels in train_loader: | |
69 # zero the parameter gradients | |
70 optimizer.zero_grad() | |
/srv/conda/envs/saturn/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __iter__() | |
350 return self._iterator | |
351 else: | |
--> 352 return self._get_iterator() | |
353 | |
354 @property | |
/srv/conda/envs/saturn/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _get_iterator() | |
292 return _SingleProcessDataLoaderIter(self) | |
293 else: | |
--> 294 return _MultiProcessingDataLoaderIter(self) | |
295 | |
296 @property | |
/srv/conda/envs/saturn/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __init__() | |
799 # before it starts, and __del__ tries to join but will get: | |
800 # AssertionError: can only join a started process. | |
--> 801 w.start() | |
802 self._index_queues.append(index_queue) | |
803 self._workers.append(w) | |
/srv/conda/envs/saturn/lib/python3.7/multiprocessing/process.py in start() | |
108 'can only start a process object created by current process' | |
109 assert not _current_process._config.get('daemon'), \ | |
--> 110 'daemonic processes are not allowed to have children' | |
111 _cleanup() | |
112 self._popen = self._Popen(self) | |
AssertionError: daemonic processes are not allowed to have children |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment