Last active
October 11, 2017 18:34
-
-
Save sam186/75e500452cbf9b863576e2d47c7733e7 to your computer and use it in GitHub Desktop.
pytorch dataparallel hang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#0 0x00007ffff76c1827 in futex_abstimed_wait_cancelable (private=0, abstime=0x0, expected=0, | |
futex_word=0x7fff04000c10) at ../sysdeps/unix/sysv/linux/futex-internal.h:205 | |
#1 do_futex_wait (sem=sem@entry=0x7fff04000c10, abstime=0x0) at sem_waitcommon.c:111 | |
#2 0x00007ffff76c18d4 in __new_sem_wait_slow (sem=0x7fff04000c10, abstime=0x0) at sem_waitcommon.c:181 | |
#3 0x00007ffff76c197a in __new_sem_wait (sem=<optimized out>) at sem_wait.c:29 | |
#4 0x00007ffff7a61b33 in PyThread_acquire_lock_timed (lock=0x7fff04000c10, microseconds=-1000000, intr_flag=1) | |
at Python/thread_pthread.h:354 | |
#5 0x00007ffff7a68804 in acquire_timed (lock=0x7fff04000c10, timeout=-1000000000) at ./Modules/_threadmodule.c:68 | |
#6 0x00007ffff7a68946 in lock_PyThread_acquire_lock (self=0x7ffff6456418, args=<optimized out>, | |
kwds=<optimized out>) at ./Modules/_threadmodule.c:151 | |
#7 0x00007ffff7992302 in _PyCFunction_FastCallDict (func_obj=0x7fffc6377cf0, args=0x7fffbc07baf8, | |
nargs=<optimized out>, kwargs=0x0) at Objects/methodobject.c:231 | |
#8 0x00007ffff7a17b8c in call_function (pp_stack=0x7fffffffcb98, oparg=<optimized out>, kwnames=0x0) | |
at Python/ceval.c:4809 | |
#9 0x00007ffff7a1ad40 in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) | |
at Python/ceval.c:3295 | |
#10 0x00007ffff7a16100 in _PyEval_EvalCodeWithName (_co=0x7ffff0b311e0, globals=<optimized out>, | |
locals=<optimized out>, args=<optimized out>, argcount=1, kwnames=0x0, kwargs=0x7fffbc084730, kwcount=0, | |
kwstep=1, defs=0x7ffff0b26360, defcount=2, kwdefs=0x0, closure=0x0, name=0x7ffff0b27930, | |
qualname=0x7ffff0b30440) at Python/ceval.c:4139 | |
#11 0x00007ffff7a17b2a in fast_function (kwnames=<optimized out>, nargs=1, stack=<optimized out>, | |
func=0x7ffff0b389d8) at Python/ceval.c:4950 | |
#12 call_function (pp_stack=0x7fffffffce38, oparg=<optimized out>, kwnames=<optimized out>) at Python/ceval.c:4830 | |
#13 0x00007ffff7a1ad40 in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) | |
at Python/ceval.c:3295 | |
#14 0x00007ffff7a16100 in _PyEval_EvalCodeWithName (_co=0x7ffff0b31150, globals=<optimized out>, | |
locals=<optimized out>, args=<optimized out>, argcount=1, kwnames=0x0, kwargs=0x7fff0927a1c0, kwcount=0, | |
kwstep=1, defs=0x7ffff0b322c8, defcount=1, kwdefs=0x0, closure=0x0, name=0x7ffff7f96d18, | |
qualname=0x7ffff0b28c70) at Python/ceval.c:4139 | |
#15 0x00007ffff7a17b2a in fast_function (kwnames=<optimized out>, nargs=1, stack=<optimized out>, | |
---Type <return> to continue, or q <return> to quit--- | |
func=0x7ffff0b38950) at Python/ceval.c:4950 | |
#16 call_function (pp_stack=0x7fffffffd0d8, oparg=<optimized out>, kwnames=<optimized out>) at Python/ceval.c:4830 | |
#17 0x00007ffff7a1ad40 in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) | |
at Python/ceval.c:3295 | |
#18 0x00007ffff7a16100 in _PyEval_EvalCodeWithName (_co=0x7fffc6383810, globals=<optimized out>, | |
locals=<optimized out>, args=<optimized out>, argcount=4, kwnames=0x0, kwargs=0x7fffbc07c5f8, kwcount=0, | |
kwstep=1, defs=0x7fffc66917e0, defcount=2, kwdefs=0x0, closure=0x0, name=0x7fffc6385a30, | |
qualname=0x7fffc6385a30) at Python/ceval.c:4139 | |
#19 0x00007ffff7a17b2a in fast_function (kwnames=<optimized out>, nargs=4, stack=<optimized out>, | |
func=0x7fffc63888c8) at Python/ceval.c:4950 | |
#20 call_function (pp_stack=0x7fffffffd378, oparg=<optimized out>, kwnames=<optimized out>) at Python/ceval.c:4830 | |
#21 0x00007ffff7a1ad40 in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) | |
at Python/ceval.c:3295 | |
#22 0x00007ffff7a15514 in _PyFunction_FastCall (co=<optimized out>, args=<optimized out>, nargs=4, | |
globals=<optimized out>) at Python/ceval.c:4891 | |
#23 0x00007ffff7a17c88 in fast_function (kwnames=0x0, nargs=4, stack=<optimized out>, func=0x7fffc63a1620) | |
at Python/ceval.c:4926 | |
#24 call_function (pp_stack=0x7fffffffd5a8, oparg=<optimized out>, kwnames=0x0) at Python/ceval.c:4830 | |
#25 0x00007ffff7a1ad40 in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) | |
at Python/ceval.c:3295 | |
#26 0x00007ffff7a16100 in _PyEval_EvalCodeWithName (_co=0x7fffc638da50, globals=<optimized out>, | |
locals=<optimized out>, args=<optimized out>, argcount=2, kwnames=0x7ffff7f91060, kwargs=0x7ffff7f91068, | |
kwcount=0, kwstep=2, defs=0x0, defcount=0, kwdefs=0x0, closure=0x0, name=0x7fffc67057a0, | |
qualname=0x7fffc6391df8) at Python/ceval.c:4139 | |
#27 0x00007ffff7a1639c in _PyFunction_FastCallDict (func=0x7fffc63a1488, args=0x7fffffffd7e0, nargs=2, | |
kwargs=0x7ffff64433a8) at Python/ceval.c:5042 | |
#28 0x00007ffff793ace6 in _PyObject_FastCallDict (func=0x7fffc63a1488, args=0x7fffffffd7e0, nargs=<optimized out>, | |
kwargs=0x7ffff64433a8) at Objects/abstract.c:2295 | |
#29 0x00007ffff793af3c in _PyObject_Call_Prepend (func=0x7fffc63a1488, obj=0x7ffff6441f98, args=0x7ffff7e74fd0, | |
kwargs=0x7ffff64433a8) at Objects/abstract.c:2358 | |
---Type <return> to continue, or q <return> to quit--- | |
#30 0x00007ffff793afd6 in PyObject_Call (func=0x7ffff7f878c8, args=<optimized out>, kwargs=<optimized out>) | |
at Objects/abstract.c:2246 | |
#31 0x00007ffff7a1bfc9 in do_call_core (kwdict=0x7ffff64433a8, callargs=<optimized out>, func=0x7ffff7f878c8) | |
at Python/ceval.c:5078 | |
#32 _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) at Python/ceval.c:3377 | |
#33 0x00007ffff7a16100 in _PyEval_EvalCodeWithName (_co=0x7fffc654a660, globals=<optimized out>, | |
locals=<optimized out>, args=<optimized out>, argcount=2, kwnames=0x0, kwargs=0x8, kwcount=0, kwstep=2, | |
defs=0x0, defcount=0, kwdefs=0x0, closure=0x0, name=0x7ffff7f94170, qualname=0x7fffc66a1230) | |
at Python/ceval.c:4139 | |
#34 0x00007ffff7a1639c in _PyFunction_FastCallDict (func=0x7fffc6430488, args=0x7fffffffdbd0, nargs=2, kwargs=0x0) | |
at Python/ceval.c:5042 | |
#35 0x00007ffff793ace6 in _PyObject_FastCallDict (func=0x7fffc6430488, args=0x7fffffffdbd0, nargs=<optimized out>, | |
kwargs=0x0) at Objects/abstract.c:2295 | |
#36 0x00007ffff793af3c in _PyObject_Call_Prepend (func=0x7fffc6430488, obj=0x7ffff6441f98, args=0x7ffff7e74ef0, | |
kwargs=0x0) at Objects/abstract.c:2358 | |
#37 0x00007ffff793afd6 in PyObject_Call (func=0x7ffff7f87908, args=<optimized out>, kwargs=<optimized out>) | |
at Objects/abstract.c:2246 | |
#38 0x00007ffff79b344f in slot_tp_call (self=0x7ffff6441f98, args=0x7ffff7e74ef0, kwds=0x0) | |
at Objects/typeobject.c:6194 | |
#39 0x00007ffff793ac1e in _PyObject_FastCallDict (func=0x7ffff6441f98, args=<optimized out>, nargs=<optimized out>, | |
kwargs=0x0) at Objects/abstract.c:2316 | |
#40 0x00007ffff7a1795b in call_function (pp_stack=0x7fffffffdec8, oparg=<optimized out>, kwnames=0x0) | |
at Python/ceval.c:4833 | |
#41 0x00007ffff7a1ad40 in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) | |
at Python/ceval.c:3295 | |
#42 0x00007ffff7a16100 in _PyEval_EvalCodeWithName (_co=0x7ffff7f028a0, globals=<optimized out>, | |
locals=<optimized out>, args=<optimized out>, argcount=0, kwnames=0x0, kwargs=0x8, kwcount=0, kwstep=2, | |
defs=0x0, defcount=0, kwdefs=0x0, closure=0x0, name=0x0, qualname=0x0) at Python/ceval.c:4139 | |
#43 0x00007ffff7a16583 in PyEval_EvalCodeEx (_co=<optimized out>, globals=<optimized out>, locals=<optimized out>, | |
args=<optimized out>, argcount=<optimized out>, kws=<optimized out>, kwcount=0, defs=0x0, defcount=0, | |
---Type <return> to continue, or q <return> to quit--- | |
kwdefs=0x0, closure=0x0) at Python/ceval.c:4160 | |
#44 0x00007ffff7a165cb in PyEval_EvalCode (co=<optimized out>, globals=<optimized out>, locals=<optimized out>) | |
at Python/ceval.c:695 | |
#45 0x00007ffff7a48ee0 in run_mod (arena=0x7ffff7f63270, flags=0x7fffffffe220, locals=0x7ffff7f45f78, | |
globals=0x7ffff7f45f78, filename=0x7ffff7e870a0, mod=0x6b1970) at Python/pythonrun.c:980 | |
#46 PyRun_FileExFlags (fp=0x68c070, filename_str=<optimized out>, start=<optimized out>, globals=0x7ffff7f45f78, | |
locals=0x7ffff7f45f78, closeit=<optimized out>, flags=0x7fffffffe220) at Python/pythonrun.c:933 | |
#47 0x00007ffff7a4a4a3 in PyRun_SimpleFileExFlags (fp=0x68c070, filename=<optimized out>, closeit=1, | |
flags=0x7fffffffe220) at Python/pythonrun.c:396 | |
#48 0x00007ffff7a658d5 in run_file (p_cf=0x7fffffffe220, filename=0x6032c0 L"test.py", fp=0x68c070) | |
at Modules/main.c:338 | |
#49 Py_Main (argc=<optimized out>, argv=<optimized out>) at Modules/main.c:810 | |
#50 0x0000000000400c1d in main (argc=2, argv=<optimized out>) at ./Programs/python.c:69 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch import nn | |
from torch.autograd import Variable | |
import torch | |
l = nn.Linear(5,5).cuda() | |
pl = nn.DataParallel(l) | |
print("Checkpoint 1") | |
a = Variable(torch.rand(5,5).cuda(), requires_grad=True) | |
print("Checkpoint 2") | |
print(pl(a)) # Here it gets stuck | |
print("Checkpoint 3") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment