Skip to content

Instantly share code, notes, and snippets.

@0xee
Last active November 12, 2019 10:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save 0xee/bf6b3d9ded7ebd574dad to your computer and use it in GitHub Desktop.
Save 0xee/bf6b3d9ded7ebd574dad to your computer and use it in GitHub Desktop.
CUDA deadlock reproducing testcase
extern "C" __global__ void empty(int) { }
(gdb) info threads
Id Target Id Frame
66 Thread 0x7f6e24ca3700 (LWP 27539) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
65 Thread 0x7f6e244a2700 (LWP 27540) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
64 Thread 0x7f6e23ca1700 (LWP 27541) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
63 Thread 0x7f6e234a0700 (LWP 27542) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
62 Thread 0x7f6e22c9f700 (LWP 27543) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
61 Thread 0x7f6e2249e700 (LWP 27544) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
60 Thread 0x7f6e21c9d700 (LWP 27545) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
59 Thread 0x7f6e2149c700 (LWP 27546) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
58 Thread 0x7f6e20c9b700 (LWP 27547) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
57 Thread 0x7f6dfffff700 (LWP 27548) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
56 Thread 0x7f6df77fe700 (LWP 27549) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
55 Thread 0x7f6dff7fe700 (LWP 27550) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
54 Thread 0x7f6dfeffd700 (LWP 27551) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
53 Thread 0x7f6dfe7fc700 (LWP 27552) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
52 Thread 0x7f6dfdffb700 (LWP 27553) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
51 Thread 0x7f6dfd7fa700 (LWP 27554) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
50 Thread 0x7f6dfcff9700 (LWP 27555) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
49 Thread 0x7f6df7fff700 (LWP 27556) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
48 Thread 0x7f6df6ffd700 (LWP 27557) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
47 Thread 0x7f6df67fc700 (LWP 27558) "repro" 0x00007fffe52eb660 in clock_gettime ()
46 Thread 0x7f6df5ffb700 (LWP 27559) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
45 Thread 0x7f6df57fa700 (LWP 27560) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
44 Thread 0x7f6df4ff9700 (LWP 27561) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
43 Thread 0x7f6dc3fff700 (LWP 27562) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
42 Thread 0x7f6dc37fe700 (LWP 27563) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
41 Thread 0x7f6dc2ffd700 (LWP 27564) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
40 Thread 0x7f6dc27fc700 (LWP 27565) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
39 Thread 0x7f6dc1ffb700 (LWP 27566) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
38 Thread 0x7f6dc17fa700 (LWP 27567) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
37 Thread 0x7f6dc0ff9700 (LWP 27568) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
36 Thread 0x7f6d9ffff700 (LWP 27569) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
35 Thread 0x7f6d9f7fe700 (LWP 27570) "repro" 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
34 Thread 0x7f6d9effd700 (LWP 27571) "repro" 0x00007f6e25385e5d in nanosleep () from /lib64/libc.so.6
33 Thread 0x7f6d9e7fc700 (LWP 27572) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
32 Thread 0x7f6d9dffb700 (LWP 27573) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
31 Thread 0x7f6d9d7fa700 (LWP 27574) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
30 Thread 0x7f6d9cff9700 (LWP 27575) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
29 Thread 0x7f6d7ffff700 (LWP 27576) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
28 Thread 0x7f6d7f7fe700 (LWP 27577) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
27 Thread 0x7f6d7effd700 (LWP 27578) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
26 Thread 0x7f6d7e7fc700 (LWP 27579) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
25 Thread 0x7f6d7dffb700 (LWP 27580) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
24 Thread 0x7f6d7d7fa700 (LWP 27581) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
23 Thread 0x7f6d7cff9700 (LWP 27582) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
22 Thread 0x7f6d5ffff700 (LWP 27583) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
21 Thread 0x7f6d5f7fe700 (LWP 27584) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
20 Thread 0x7f6d5effd700 (LWP 27585) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
19 Thread 0x7f6d5e7fc700 (LWP 27586) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
18 Thread 0x7f6d5dffb700 (LWP 27587) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
17 Thread 0x7f6d5d3fa700 (LWP 27588) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
16 Thread 0x7f6d5cbf9700 (LWP 27589) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
15 Thread 0x7f6d3ffff700 (LWP 27590) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
14 Thread 0x7f6d3f3fe700 (LWP 27591) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
13 Thread 0x7f6d3ebfd700 (LWP 27592) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
12 Thread 0x7f6d3dbfc700 (LWP 27593) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
11 Thread 0x7f6d3d3fb700 (LWP 27594) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
10 Thread 0x7f6d3cbfa700 (LWP 27595) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
9 Thread 0x7f6d27fff700 (LWP 27596) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
8 Thread 0x7f6d26ffe700 (LWP 27597) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
7 Thread 0x7f6d25bfd700 (LWP 27598) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
6 Thread 0x7f6d253fc700 (LWP 27599) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
5 Thread 0x7f6d13fff700 (LWP 27600) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
4 Thread 0x7f6d12ffe700 (LWP 27601) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
3 Thread 0x7f6d07bff700 (LWP 27602) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
2 Thread 0x7f6d063fe700 (LWP 27603) "repro" 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
* 1 Thread 0x7f6e26f2c740 (LWP 27537) "repro" 0x00007f6e250b8fef in pthread_join () from /lib64/libpthread.so.0
(gdb) thread apply all bt
Thread 66 (Thread 0x7f6e24ca3700 (LWP 27539)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45000) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45000) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e44fe8) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 65 (Thread 0x7f6e244a2700 (LWP 27540)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e451a0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e451a0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45188) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 64 (Thread 0x7f6e23ca1700 (LWP 27541)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45350) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45350) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45338) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 63 (Thread 0x7f6e234a0700 (LWP 27542)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e454d0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e454d0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e454b8) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 62 (Thread 0x7f6e22c9f700 (LWP 27543)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e456a0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e456a0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45688) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 61 (Thread 0x7f6e2249e700 (LWP 27544)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45820) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45820) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45808) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 60 (Thread 0x7f6e21c9d700 (LWP 27545)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e459a0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e459a0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45988) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 59 (Thread 0x7f6e2149c700 (LWP 27546)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45b20) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45b20) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45b08) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 58 (Thread 0x7f6e20c9b700 (LWP 27547)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45d30) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45d30) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45d18) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 57 (Thread 0x7f6dfffff700 (LWP 27548)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45650) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45650) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45638) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 56 (Thread 0x7f6df77fe700 (LWP 27549)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45fe0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45fe0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45fc8) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 55 (Thread 0x7f6dff7fe700 (LWP 27550)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46160) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46160) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46148) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 54 (Thread 0x7f6dfeffd700 (LWP 27551)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e462e0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e462e0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e462c8) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 53 (Thread 0x7f6dfe7fc700 (LWP 27552)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46460) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46460) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46448) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 52 (Thread 0x7f6dfdffb700 (LWP 27553)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e465e0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e465e0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e465c8) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 51 (Thread 0x7f6dfd7fa700 (LWP 27554)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46760) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46760) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46748) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 50 (Thread 0x7f6dfcff9700 (LWP 27555)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e469f0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e469f0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e469d8) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 49 (Thread 0x7f6df7fff700 (LWP 27556)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e45ca0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e45ca0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e45c88) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 48 (Thread 0x7f6df6ffd700 (LWP 27557)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46ca0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46ca0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46c88) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 47 (Thread 0x7f6df67fc700 (LWP 27558)):
#0 0x00007fffe52eb660 in clock_gettime ()
#1 0x00007f6e253c6e3d in clock_gettime () from /lib64/libc.so.6
#2 0x00007f6e266c169e in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e260b7bbb in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e260128bd in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e26094226 in ?? () from /usr/lib64/libcuda.so.1
#6 0x00007f6e260948b8 in ?? () from /usr/lib64/libcuda.so.1
#7 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#8 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#9 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#10 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46e20) at /usr/include/c++/4.7/functional:1598
#11 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46e20) at /usr/include/c++/4.7/functional:1586
#12 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46e08) at /usr/include/c++/4.7/thread:115
#13 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#14 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#15 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 46 (Thread 0x7f6df5ffb700 (LWP 27559)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46fa0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46fa0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46f88) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 45 (Thread 0x7f6df57fa700 (LWP 27560)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47120) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47120) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47108) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 44 (Thread 0x7f6df4ff9700 (LWP 27561)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e472a0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e472a0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47288) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 43 (Thread 0x7f6dc3fff700 (LWP 27562)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47420) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47420) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47408) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 42 (Thread 0x7f6dc37fe700 (LWP 27563)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e475a0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e475a0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47588) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 41 (Thread 0x7f6dc2ffd700 (LWP 27564)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47720) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47720) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47708) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 40 (Thread 0x7f6dc27fc700 (LWP 27565)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e478a0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e478a0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47888) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 39 (Thread 0x7f6dc1ffb700 (LWP 27566)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47a20) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47a20) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47a08) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 38 (Thread 0x7f6dc17fa700 (LWP 27567)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26095369 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffb6ac in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb123 in cuMemHostRegister_v2 () from /usr/lib64/libcuda.so.1
#6 0x0000000000401c66 in threadFun (itCount=..., device=0) at repro.cpp:62
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47ba0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47ba0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47b88) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 37 (Thread 0x7f6dc0ff9700 (LWP 27568)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47d20) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47d20) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47d08) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 36 (Thread 0x7f6d9ffff700 (LWP 27569)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e47ea0) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e47ea0) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e47e88) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 35 (Thread 0x7f6d9f7fe700 (LWP 27570)):
#0 0x00007f6e250be11c in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007f6e250b9f47 in _L_lock_1027 () from /lib64/libpthread.so.0
#2 0x00007f6e250b9dd9 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007f6e26094899 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#6 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#7 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e48020) at /usr/include/c++/4.7/functional:1598
#8 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e48020) at /usr/include/c++/4.7/functional:1586
#9 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e48008) at /usr/include/c++/4.7/thread:115
#10 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#11 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#12 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 34 (Thread 0x7f6d9effd700 (LWP 27571)):
#0 0x00007f6e25385e5d in nanosleep () from /lib64/libc.so.6
#1 0x0000000000402dad in std::this_thread::sleep_for<long, std::ratio<1l, 1l> > (__rtime=...) at /usr/include/c++/4.7/thread:277
#2 0x0000000000402233 in monitorFun (counters=..., terminate=...) at repro.cpp:94
#3 0x0000000000407027 in std::_Bind_simple<void (*(std::reference_wrapper<std::array<std::__atomic_base<unsigned long>, 32ul> >, std::reference_wrapper<std::atomic_bool>))(std::array<std::__atomic_base<unsigned long>, 32ul>&, std::atomic_bool&)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e481a0) at /usr/include/c++/4.7/functional:1598
#4 0x0000000000406d7b in std::_Bind_simple<void (*(std::reference_wrapper<std::array<std::__atomic_base<unsigned long>, 32ul> >, std::reference_wrapper<std::atomic_bool>))(std::array<std::__atomic_base<unsigned long>, 32ul>&, std::atomic_bool&)>::operator()() (
this=0x1e481a0) at /usr/include/c++/4.7/functional:1586
#5 0x0000000000406ca2 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::array<std::__atomic_base<unsigned long>, 32ul> >, std::reference_wrapper<std::atomic_bool>))(std::array<std::__atomic_base<unsigned long>, 32ul>&, std::atomic_bool&)> >::_M_run() (this=0x1e48188) at /usr/include/c++/4.7/thread:115
#6 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#7 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#8 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 33 (Thread 0x7f6d9e7fc700 (LWP 27572)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 32 (Thread 0x7f6d9dffb700 (LWP 27573)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 31 (Thread 0x7f6d9d7fa700 (LWP 27574)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 30 (Thread 0x7f6d9cff9700 (LWP 27575)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 29 (Thread 0x7f6d7ffff700 (LWP 27576)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 28 (Thread 0x7f6d7f7fe700 (LWP 27577)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 27 (Thread 0x7f6d7effd700 (LWP 27578)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 26 (Thread 0x7f6d7e7fc700 (LWP 27579)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 25 (Thread 0x7f6d7dffb700 (LWP 27580)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 24 (Thread 0x7f6d7d7fa700 (LWP 27581)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 23 (Thread 0x7f6d7cff9700 (LWP 27582)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 22 (Thread 0x7f6d5ffff700 (LWP 27583)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 21 (Thread 0x7f6d5f7fe700 (LWP 27584)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 20 (Thread 0x7f6d5effd700 (LWP 27585)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 19 (Thread 0x7f6d5e7fc700 (LWP 27586)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 18 (Thread 0x7f6d5dffb700 (LWP 27587)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 17 (Thread 0x7f6d5d3fa700 (LWP 27588)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 16 (Thread 0x7f6d5cbf9700 (LWP 27589)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 15 (Thread 0x7f6d3ffff700 (LWP 27590)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 14 (Thread 0x7f6d3f3fe700 (LWP 27591)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 13 (Thread 0x7f6d3ebfd700 (LWP 27592)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 12 (Thread 0x7f6d3dbfc700 (LWP 27593)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 11 (Thread 0x7f6d3d3fb700 (LWP 27594)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 10 (Thread 0x7f6d3cbfa700 (LWP 27595)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 9 (Thread 0x7f6d27fff700 (LWP 27596)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 8 (Thread 0x7f6d26ffe700 (LWP 27597)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 7 (Thread 0x7f6d25bfd700 (LWP 27598)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 6 (Thread 0x7f6d253fc700 (LWP 27599)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 5 (Thread 0x7f6d13fff700 (LWP 27600)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 4 (Thread 0x7f6d12ffe700 (LWP 27601)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 3 (Thread 0x7f6d07bff700 (LWP 27602)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 2 (Thread 0x7f6d063fe700 (LWP 27603)):
#0 0x00007f6e253ac17d in poll () from /lib64/libc.so.6
#1 0x00007f6e266c1c99 in ?? () from /usr/lib64/libcuda.so.1
#2 0x00007f6e260c96b2 in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e266c2328 in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#5 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x7f6e26f2c740 (LWP 27537)):
#0 0x00007f6e250b8fef in pthread_join () from /lib64/libpthread.so.0
#1 0x00007f6e25c42ff7 in std::thread::join() () from /usr/lib64/libstdc++.so.6
#2 0x0000000000402460 in main () at repro.cpp:118
(gdb) thread 47
[Switching to thread 47 (Thread 0x7f6df67fc700 (LWP 27558))]
#0 0x00007fffe52eb660 in clock_gettime ()
(gdb) bt
#0 0x00007fffe52eb660 in clock_gettime ()
#1 0x00007f6e253c6e3d in clock_gettime () from /lib64/libc.so.6
#2 0x00007f6e266c169e in ?? () from /usr/lib64/libcuda.so.1
#3 0x00007f6e260b7bbb in ?? () from /usr/lib64/libcuda.so.1
#4 0x00007f6e260128bd in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f6e26094226 in ?? () from /usr/lib64/libcuda.so.1
#6 0x00007f6e260948b8 in ?? () from /usr/lib64/libcuda.so.1
#7 0x00007f6e25ffd306 in ?? () from /usr/lib64/libcuda.so.1
#8 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
#9 0x0000000000401f1e in threadFun (itCount=..., device=0) at repro.cpp:76
#10 0x0000000000406f19 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (this=0x1e46e20) at /usr/include/c++/4.7/functional:1598
#11 0x0000000000406d53 in std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)>::operator()() (this=0x1e46e20) at /usr/include/c++/4.7/functional:1586
#12 0x0000000000406c84 in std::thread::_Impl<std::_Bind_simple<void (*(std::reference_wrapper<std::__atomic_base<unsigned long> >, int))(std::__atomic_base<unsigned long>&, int)> >::_M_run() (this=0x1e46e08) at /usr/include/c++/4.7/thread:115
#13 0x00007f6e25c43340 in ?? () from /usr/lib64/libstdc++.so.6
#14 0x00007f6e250b7e0f in start_thread () from /lib64/libpthread.so.0
#15 0x00007f6e253b50dd in clone () from /lib64/libc.so.6
(gdb) frame 8
#8 0x00007f6e25fcb2d2 in cuMemHostUnregister () from /usr/lib64/libcuda.so.1
(gdb) info frame
Stack level 8, frame at 0x7f6df67fbc50:
rip = 0x7f6e25fcb2d2 in cuMemHostUnregister; saved rip 0x401f1e
called by frame at 0x7f6df67fbe00, caller of frame at 0x7f6df67fbbb0
Arglist at 0x7f6df67fbba8, args:
Locals at 0x7f6df67fbba8, Previous frame's sp is 0x7f6df67fbc50
Saved registers:
rbx at 0x7f6df67fbc40, rip at 0x7f6df67fbc48
# build config
CUDA_PATH ?= /opt/cuda
CXX ?= g++
BUILDDIR ?= build
CXXFLAGS += -std=c++11 -g
# configs which produced the issue so far:
# 32 threads, 10M, w/ kernel
# test case config
GPU ?=0
ALLOC_SIZE ?=10
THREADS ?=32
REPETITIONS ?=100000
RUN_KERNEL ?=1
INC+=-I$(CUDA_PATH)/include
LIBS+=-lcuda
DEFINES=-DGPU=$(GPU) -DALLOC_SIZE=$(ALLOC_SIZE) \
-DTHREADS=$(THREADS) \
-DREPETITIONS=$(REPETITIONS) \
-DRUN_KERNEL=$(RUN_KERNEL)
CXXFLAGS+=$(INC) $(DEFINES)
LDFLAGS+=$(LIBS)
CUBINS=$(BUILDDIR)/EmptyKernel.cubin
EXE=$(BUILDDIR)/repro
SRC=*.cpp
CFG:=$(BUILDDIR)/cfg_$(GPU)_$(THREADS)_$(ALLOC_SIZE)_$(REPETITIONS)_$(RUN_KERNEL)
all: $(CUBINS) $(EXE)
$(BUILDDIR):
mkdir -p $(BUILDDIR)
$(EXE): $(SRC) $(BUILDDIR) $(CFG)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $<
$(BUILDDIR)/%.cubin: %.cu $(BUILDDIR) $(CFG)
nvcc -cubin -arch=sm_35 $< -o $@
$(CFG):
@rm -f $(BUILDDIR)/cfg_*
@touch $@
clean:
rm -rf $(BUILDDIR)
.PHONY: clean
// workaround for gcc 4.7 to use std::this_thread::sleep_for
#define _GLIBCXX_USE_NANOSLEEP
#include <cuda.h>
#include <iostream>
#include <iomanip>
#include <thread>
#include <atomic>
#include <vector>
#include <chrono>
#include <array>
#define CHECK(CALL) \
do { \
CUresult __ret = (CALL); \
if (__ret != CUDA_SUCCESS) { \
std::cerr << #CALL << " returned " << __ret << std::endl; \
exit(1); \
} \
} while (false)
static constexpr size_t gpuId = GPU;
static constexpr size_t allocSize = ALLOC_SIZE * (1ul<<20);
static constexpr size_t nThreads = THREADS;
static constexpr size_t repetitions = REPETITIONS;
static constexpr size_t nBuffers = 4;
static constexpr char const * moduleFile = "EmptyKernel.cubin";
static constexpr char const * kernelName = "empty";
void threadFun(std::atomic_size_t & itCount,
CUdevice device) {
CUcontext ctx = NULL;
CHECK(cuCtxCreate(&ctx, 0, device));
CUstream stream = NULL;
CHECK(cuStreamCreate(&stream, 0));
#if RUN_KERNEL
CUmodule module = NULL;
CHECK(cuModuleLoad(&module, moduleFile));
CUfunction kernel = NULL;
CHECK(cuModuleGetFunction(&kernel, module, kernelName));
#endif
std::array<std::vector<char>, nBuffers> buffers;
for(auto & buf : buffers)
buf.resize(allocSize);
for (size_t i = 0; i < repetitions; ++i, itCount++) {
CUdeviceptr gpuBuf = 0;
CHECK(cuMemAlloc(&gpuBuf, allocSize));
for (auto & buf : buffers) {
CHECK(cuMemHostRegister(buf.data(), buf.size(), 0));
CHECK(cuMemcpyHtoDAsync(gpuBuf, buf.data(), allocSize, stream));
#if RUN_KERNEL
int arg = 0;
auto p = &arg;
CHECK(cuLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, stream, (void**)&p, 0));
#endif
}
CHECK(cuStreamSynchronize(stream));
CHECK(cuMemFree(gpuBuf));
for (auto & buf : buffers)
CHECK(cuMemHostUnregister(buf.data()));
}
CHECK(cuStreamSynchronize(stream));
CHECK(cuStreamDestroy(stream));
#if RUN_KERNEL
CHECK(cuModuleUnload(module));
#endif
CHECK(cuCtxDestroy(ctx));
}
void monitorFun(std::array<std::atomic_size_t, nThreads> & counters, std::atomic_bool & terminate) {
while (!terminate) {
for(size_t i = 0; i < nThreads; ++i)
std::cerr << std::setw(5) << counters[i].load() << " ";
std::cerr << std::endl;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
int main() {
CHECK(cuInit(0));
CUdevice device = 0;
CHECK(cuDeviceGet(&device, gpuId));
std::vector<std::thread> threads;
std::array<std::atomic_size_t, nThreads> counters;
for (size_t i = 0; i < nThreads; ++i) {
counters[i].store(0);
threads.emplace_back(threadFun, std::ref(counters[i]), device);
}
std::atomic_bool terminate;
std::thread monitor(monitorFun, std::ref(counters), std::ref(terminate));
for (auto & t : threads)
t.join();
terminate = true;
monitor.join();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment