Skip to content

Instantly share code, notes, and snippets.

Created February 12, 2016 07:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/c8c65043af505c6813b4 to your computer and use it in GitHub Desktop.
Save anonymous/c8c65043af505c6813b4 to your computer and use it in GitHub Desktop.
lxcfs / lxcfs-freeze hang
#0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f3637b176db in _L_lock_11305 () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x00007f3637b15838 in __GI___libc_realloc (oldmem=0x7f3637e3e620 <main_arena>, bytes=bytes@entry=567) at malloc.c:3025
#3 0x00007f3637b0b24b in _IO_vasprintf (result_ptr=0x7f3614ff8748, format=<optimized out>, args=args@entry=0x7f3614ff8628) at vasprintf.c:84
#4 0x00007f3637aea027 in ___asprintf (string_ptr=string_ptr@entry=0x7f3614ff8748, format=format@entry=0x7f3637c00688 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n") at asprintf.c:35
#5 0x00007f3637ac71c2 in __assert_fail_base (fmt=0x7f3637c00688 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x7f3637c039a0 "({ __typeof (self->tid) __value; if (sizeof (__value) == 1) asm volatile (\"movb %%fs:%P2,%b0\" : \"=q\" (__value) : \"0\" (0), \"i\" (__builtin_offsetof (struct pthread, tid))); else if (sizeof (__value) == "..., file=file@entry=0x7f3637c03968 "../nptl/sysdeps/unix/sysv/linux/x86_64/../fork.c", line=line@entry=141, function=function@entry=0x7f3637bfe1b2 <__PRETTY_FUNCTION__.11206> "__libc_fork") at assert.c:57
#6 0x00007f3637ac7312 in __GI___assert_fail (assertion=0x7f3637c039a0 "({ __typeof (self->tid) __value; if (sizeof (__value) == 1) asm volatile (\"movb %%fs:%P2,%b0\" : \"=q\" (__value) : \"0\" (0), \"i\" (__builtin_offsetof (struct pthread, tid))); else if (sizeof (__value) == "..., file=0x7f3637c03968 "../nptl/sysdeps/unix/sysv/linux/x86_64/../fork.c", line=141, function=0x7f3637bfe1b2 <__PRETTY_FUNCTION__.11206> "__libc_fork") at assert.c:101
#7 0x00007f3637b53385 in __libc_fork () at ../nptl/sysdeps/unix/sysv/linux/x86_64/../fork.c:141
#8 0x00007f3637e55425 in __fork () at ../nptl/sysdeps/unix/sysv/linux/pt-fork.c:25
#9 0x0000000000404dd8 in write_task_init_pid_exit (sock=5, target=<optimized out>) at lxcfs.c:2530
#10 0x0000000000406fac in get_task_reaper_pid (task=<optimized out>) at lxcfs.c:2565
#11 get_reaper_busy (task=<optimized out>) at lxcfs.c:2581
#12 proc_uptime_read (fi=<optimized out>, offset=<optimized out>, size=<optimized out>, buf=<optimized out>) at lxcfs.c:2613
#13 proc_read (path=<optimized out>, fi=<optimized out>, offset=<optimized out>, size=<optimized out>, buf=<optimized out>) at lxcfs.c:2891
#14 lxcfs_read (path=0xfffffffffffffe00 <error: Cannot access memory at address 0xfffffffffffffe00>, buf=0x80 <error: Cannot access memory at address 0x80>, size=2047, offset=0, fi=0x7f3604001600) at lxcfs.c:2980
#15 0x00007f363806e574 in fuse_fs_read_buf () from /lib/x86_64-linux-gnu/libfuse.so.2
#16 0x00007f363806e732 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#17 0x00007f363807708e in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#18 0x00007f3638077895 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#19 0x00007f3638074394 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#20 0x00007f3637e4c0a4 in start_thread (arg=0x7f3614ff9700) at pthread_create.c:309
#21 0x00007f3637b819cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
#0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f3637b176db in _L_lock_11305 () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x00007f3637b15838 in __GI___libc_realloc (oldmem=0x7f3637e3e620 <main_arena>, bytes=bytes@entry=567) at malloc.c:3025
#3 0x00007f3637b0b24b in _IO_vasprintf (result_ptr=0x7f3636891748, format=<optimized out>, args=args@entry=0x7f3636891628) at vasprintf.c:84
#4 0x00007f3637aea027 in ___asprintf (string_ptr=string_ptr@entry=0x7f3636891748, format=format@entry=0x7f3637c00688 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n") at asprintf.c:35
#5 0x00007f3637ac71c2 in __assert_fail_base (fmt=0x7f3637c00688 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x7f3637c039a0 "({ __typeof (self->tid) __value; if (sizeof (__value) == 1) asm volatile (\"movb %%fs:%P2,%b0\" : \"=q\" (__value) : \"0\" (0), \"i\" (__builtin_offsetof (struct pthread, tid))); else if (sizeof (__value) == "..., file=file@entry=0x7f3637c03968 "../nptl/sysdeps/unix/sysv/linux/x86_64/../fork.c", line=line@entry=141, function=function@entry=0x7f3637bfe1b2 <__PRETTY_FUNCTION__.11206> "__libc_fork") at assert.c:57
#6 0x00007f3637ac7312 in __GI___assert_fail (assertion=0x7f3637c039a0 "({ __typeof (self->tid) __value; if (sizeof (__value) == 1) asm volatile (\"movb %%fs:%P2,%b0\" : \"=q\" (__value) : \"0\" (0), \"i\" (__builtin_offsetof (struct pthread, tid))); else if (sizeof (__value) == "..., file=0x7f3637c03968 "../nptl/sysdeps/unix/sysv/linux/x86_64/../fork.c", line=141, function=0x7f3637bfe1b2 <__PRETTY_FUNCTION__.11206> "__libc_fork") at assert.c:101
#7 0x00007f3637b53385 in __libc_fork () at ../nptl/sysdeps/unix/sysv/linux/x86_64/../fork.c:141
#8 0x00007f3637e55425 in __fork () at ../nptl/sysdeps/unix/sysv/linux/pt-fork.c:25
#9 0x0000000000404dd8 in write_task_init_pid_exit (sock=7, target=<optimized out>) at lxcfs.c:2530
#10 0x0000000000406fac in get_task_reaper_pid (task=<optimized out>) at lxcfs.c:2565
#11 get_reaper_busy (task=<optimized out>) at lxcfs.c:2581
#12 proc_uptime_read (fi=<optimized out>, offset=<optimized out>, size=<optimized out>, buf=<optimized out>) at lxcfs.c:2613
#13 proc_read (path=<optimized out>, fi=<optimized out>, offset=<optimized out>, size=<optimized out>, buf=<optimized out>) at lxcfs.c:2891
#14 lxcfs_read (path=0xfffffffffffffe00 <error: Cannot access memory at address 0xfffffffffffffe00>, buf=0x80 <error: Cannot access memory at address 0x80>, size=2047, offset=0, fi=0x7f3624000ab0) at lxcfs.c:2980
#15 0x00007f363806e574 in fuse_fs_read_buf () from /lib/x86_64-linux-gnu/libfuse.so.2
#16 0x00007f363806e732 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#17 0x00007f363807708e in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#18 0x00007f3638077895 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#19 0x00007f3638074394 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
#20 0x00007f3637e4c0a4 in start_thread (arg=0x7f3636892700) at pthread_create.c:309
#21 0x00007f3637b819cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
patched lxc-freeze to usleep for 0.1 seconds instead of sleeping for 1 second while waiting for freezer.state changes, hopefully speeding up the triggering of this bug.
commands:
in the container, the following while loop ran in two different login shells (via ssh)
# while : ; do uptime & uptime & uptime; done &> /dev/null
on the host, the following while loop ran
# i=0; while : ; do let i++; echo `date` freeze $i && lxc-freeze -n 108; echo unfreeze && lxc-unfreeze -n 108; done
until it started to hang on attempt 22155:
---------------
Thu Feb 11 15:41:25 CET 2016 freeze 22155
^C
---------------
ps faxo -> lxcfs:
4 0 2262 1 19 0 1108596 2752 futex_wait_queue_me Ssl ? 00:04:02 lxcfs
5 0 18258 2262 19 0 1108572 2328 wait S ? 00:00:00 \_ lxcfs
1 0 18260 18258 19 0 1108704 1392 futex_wait_queue_me S ? 00:00:00 | \_ lxcfs
5 0 18259 2262 19 0 1108572 2328 wait S ? 00:00:00 \_ lxcfs
1 0 18262 18259 19 0 1108704 1392 futex_wait_queue_me S ? 00:00:00 \_ lxcfs
ps faxo -> lxc:
5 0 25750 1 19 0 38664 3280 ep_poll Ss ? 00:01:35 lxc-start
4 0 25785 25750 19 0 10660 88 refrigerator Ds ? 00:00:01 \_ init
5 0 27625 25785 19 0 18984 92 refrigerator Ds ? 00:00:01 \_ rpcbind
5 0 27795 25785 19 0 52676 448 refrigerator Dl ? 00:00:03 \_ rsyslogd
1 1 27856 25785 19 0 16684 0 refrigerator Ds ? 00:00:00 \_ atd
1 0 27925 25785 19 0 18892 252 refrigerator Ds ? 00:00:00 \_ cron
5 0 27987 25785 19 0 49948 680 refrigerator Ds ? 00:00:01 \_ sshd
4 0 16490 27987 19 0 71276 380 refrigerator Ds ? 00:00:04 | \_ sshd
4 0 16497 16490 19 0 22336 5372 refrigerator Ds ? 00:00:17 | | \_ bash
0 0 18230 16497 19 0 6292 952 request_wait_answer D ? 00:00:00 | | \_ uptime
0 0 18234 16497 19 0 6292 1064 refrigerator D ? 00:00:00 | | \_ uptime
0 0 18235 16497 19 0 6292 972 request_wait_answer D+ ? 00:00:00 | | \_ uptime
4 0 15299 27987 19 0 71276 3880 refrigerator Ds ? 00:00:00 | \_ sshd
4 0 15521 15299 19 0 21228 5768 refrigerator Ds pts/3 00:00:08 | \_ bash
0 0 18229 15521 19 0 6292 1120 refrigerator D pts/3 00:00:00 | \_ uptime
0 0 18232 15521 19 0 6292 956 refrigerator D pts/3 00:00:00 | \_ uptime
0 0 18233 15521 19 0 6292 1068 refrigerator D+ pts/3 00:00:00 | \_ uptime
4 0 28005 25785 19 0 37892 364 refrigerator Ds ? 00:00:01 \_ master
4 101 28027 28005 19 0 40012 408 refrigerator D ? 00:00:01 | \_ qmgr
4 101 11527 28005 19 0 39960 20 refrigerator D ? 00:00:01 | \_ pickup
4 0 28031 25785 19 0 14588 4 refrigerator Ds+ pts/0 00:00:01 \_ getty
4 0 28032 25785 19 0 14588 4 refrigerator Ds+ pts/1 00:00:01 \_ getty
Unfreezing the container works:
# lxc-unfreeze -n 108
but:
# ps faxo f,uid,pid,ppid,pri,ni,vsz,rss,wchan:20,stat,tty,time,comm:30,cgroup:150 | grep lxc
4 0 2262 1 19 0 1108596 2812 futex_wait_queue_me Ssl ? 00:04:03 lxcfs 6:devices:/system.slice/lxcfs.service,1:name=systemd:/system.slice/lxcfs.service
5 0 18258 2262 19 0 1108572 2328 wait S ? 00:00:00 \_ lxcfs 6:devices:/system.slice/lxcfs.service,1:name=systemd:/system.slice/lxcfs.service
1 0 18260 18258 19 0 1108704 1480 futex_wait_queue_me S ? 00:00:00 | \_ lxcfs 6:devices:/system.slice/lxcfs.service,1:name=systemd:/system.slice/lxcfs.service
5 0 18259 2262 19 0 1108572 2328 wait S ? 00:00:00 \_ lxcfs 6:devices:/system.slice/lxcfs.service,1:name=systemd:/system.slice/lxcfs.service
1 0 18262 18259 19 0 1108704 1480 futex_wait_queue_me S ? 00:00:00 \_ lxcfs 6:devices:/system.slice/lxcfs.service,1:name=systemd:/system.slice/lxcfs.service
5 0 25750 1 19 0 38664 3280 ep_poll Ss ? 00:01:35 lxc-start -
4 0 25785 25750 19 0 10660 88 poll_schedule_timeou Ss ? 00:00:01 \_ init 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
5 0 27625 25785 19 0 18984 92 poll_schedule_timeou Ss ? 00:00:01 \_ rpcbind 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
5 0 27795 25785 19 0 52676 448 poll_schedule_timeou Sl ? 00:00:03 \_ rsyslogd 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
1 1 27856 25785 19 0 16684 0 hrtimer_nanosleep Ss ? 00:00:00 \_ atd 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
1 0 27925 25785 19 0 18892 252 hrtimer_nanosleep Ss ? 00:00:00 \_ cron 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
5 0 27987 25785 19 0 49948 680 poll_schedule_timeou Ss ? 00:00:01 \_ sshd 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 16490 27987 19 0 71276 380 poll_schedule_timeou Ss ? 00:00:04 | \_ sshd 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 16497 16490 19 0 22336 5372 wait Ss ? 00:00:17 | | \_ bash 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
0 0 18230 16497 19 0 6292 952 request_wait_answer D ? 00:00:00 | | \_ uptime 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
0 0 18235 16497 19 0 6292 972 request_wait_answer D+ ? 00:00:00 | | \_ uptime 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 15299 27987 19 0 71276 3880 poll_schedule_timeou Ss ? 00:00:00 | \_ sshd 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 15521 15299 19 0 21244 5784 wait_woken Ss+ pts/3 00:00:09 | \_ bash 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 28005 25785 19 0 37892 364 ep_poll Ss ? 00:00:01 \_ master 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 101 28027 28005 19 0 40012 408 ep_poll S ? 00:00:01 | \_ qmgr 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 101 14205 28005 19 0 39960 2872 ep_poll S ? 00:00:00 | \_ pickup 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 28031 25785 19 0 14588 4 wait_woken Ss+ pts/0 00:00:01 \_ getty 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
4 0 28032 25785 19 0 14588 4 wait_woken Ss+ pts/1 00:00:01 \_ getty 10:hugetlb:/lxc/108,9:perf_event:/lxc/108,8:net_cls,net_prio:/lxc/108,7:freezer:/lxc/108,6:devices:/lxc/108,5:memory:/lxc/108,4:blkio:/lxc/108,3:cpu,cpuacct:/lxc/108,2:cpuset:/lxc/108,1:name=systemd:/lxc/108
lxcfs still hangs in the lock, uptime still hangs in reqeust_wait_answer!
the backtraces for PID 18260 and 18262 are unchanged, trying to continue both processes with gdb does not change anything.
killing the uptime processes is not possible, rebooting the container kills all the hung processes (and everything else in the container)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment