Skip to content

Instantly share code, notes, and snippets.

@amurzeau
Created March 31, 2019 20:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amurzeau/26f045bdfea407528dd7de3102fb4be7 to your computer and use it in GitHub Desktop.
Save amurzeau/26f045bdfea407528dd7de3102fb4be7 to your computer and use it in GitHub Desktop.
TLS deadlock between dl_close and __tls_get_addr
(gdb) thr a a bt
Thread 3 (Thread 0x7ff4067a4700 (LWP 1140)):
#0 __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:103
#1 0x00007ff4071747c1 in __GI___pthread_mutex_lock (mutex=0x7ff4071ce968 <_rtld_global+2312>) at ../nptl/pthread_mutex_lock.c:115
#2 0x00007ff4071b79bf in tls_get_addr_tail (dtv=0x7ff4000015e0, the_map=0x7ff400000f90, ti=<optimized out>, ti=<optimized out>) at ../elf/dl-tls.c:761
#3 0x00007ff4071bd278 in __tls_get_addr () at ../sysdeps/x86_64/tls_get_addr.S:55
#4 0x00007ff4071a216b in thread_that_use_tls_after_sleep (arg=0x0) at test_compiler_tls_lib.c:13
#5 0x00007ff407171fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
#6 0x00007ff4070a27ef in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 2 (Thread 0x7ff406fa5700 (LWP 1139)):
#0 0x00007ff407173485 in __GI___pthread_timedjoin_ex (threadid=140686057424640, thread_return=0x0, abstime=0x0, block=<optimized out>) at pthread_join_common.c:89
#1 0x00007ff4071a21bf in lib_shutdown () at test_compiler_tls_lib.c:21
#2 0x00007ff4071b9f45 in _dl_close_worker (force=<optimized out>, map=<optimized out>) at dl-close.c:288
#3 _dl_close_worker (map=<optimized out>, force=<optimized out>) at dl-close.c:111
#4 0x00007ff4071ba87e in _dl_close (_map=0x7ff400000f90) at dl-close.c:842
#5 0x00007ff4070ddf1f in __GI__dl_catch_exception (exception=exception@entry=0x7ff406fa4e50, operate=operate@entry=0x7ff40718c330 <dlclose_doit>,
args=args@entry=0x7ff400000f90) at dl-error-skeleton.c:196
#6 0x00007ff4070ddfaf in __GI__dl_catch_error (objname=objname@entry=0x7ff400000f40, errstring=errstring@entry=0x7ff400000f48,
mallocedp=mallocedp@entry=0x7ff400000f38, operate=operate@entry=0x7ff40718c330 <dlclose_doit>, args=args@entry=0x7ff400000f90) at dl-error-skeleton.c:215
#7 0x00007ff40718c975 in _dlerror_run (operate=operate@entry=0x7ff40718c330 <dlclose_doit>, args=0x7ff400000f90) at dlerror.c:163
#8 0x00007ff40718c364 in __dlclose (handle=<optimized out>) at dlclose.c:46
#9 0x000055d2ef12427b in test_thread (arg=0x7ffcde59c7c1) at test_compiler_tls.c:27
#10 0x00007ff407171fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
#11 0x00007ff4070a27ef in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 1 (Thread 0x7ff406fa6740 (LWP 1138)):
#0 futex_abstimed_wait_cancelable (private=0, abstime=0x7ffcde59a6c0, expected=0, futex_word=0x55d2ef1270e8 <cond+40>)
at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 __pthread_cond_wait_common (abstime=0x7ffcde59a6c0, mutex=0x7ffcde59a6d0, cond=0x55d2ef1270c0 <cond>) at pthread_cond_wait.c:539
#2 __pthread_cond_timedwait (cond=0x55d2ef1270c0 <cond>, mutex=0x7ffcde59a6d0, abstime=0x7ffcde59a6c0) at pthread_cond_wait.c:667
#3 0x000055d2ef124368 in main (argc=2, argv=0x7ffcde59a7f8) at test_compiler_tls.c:58
/* Build with gcc test_compiler_tls.c -o test_compiler_tls -ldl -g -pthread */
/* Also compile test_compiler_tls_lib.c */
/* Then run it as: ./test_compiler_tls ./test_compiler_tls_lib.so */
#include <dlfcn.h>
#include <stdio.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
#include <stdint.h>
pthread_cond_t cond;
void* test_thread(void* arg) {
const char* filename = (const char*) arg;
int i;
intptr_t ret = 0;
for(i = 0; i < 10; i++) {
printf("Try %d\n", i);
void* handle = dlopen(filename, RTLD_NOW);
if(handle == 0) {
printf("Can't open library: %s\n", dlerror());
ret = 1;
break;
}
dlclose(handle);
}
pthread_cond_signal(&cond);
return (void*) ret;
}
int main(int argc, char* argv[]) {
pthread_t thread;
pthread_mutex_t mutex;
struct timespec timeToWait;
struct timeval now;
int ret;
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&cond, NULL);
if(argc < 2) {
printf("Usage: %s libopenblas.so\n", argv[0]);
return 1;
}
pthread_create(&thread, NULL, &test_thread, argv[1]);
gettimeofday(&now, NULL);
/* 10s timeout */
timeToWait.tv_sec = now.tv_sec + 10;
timeToWait.tv_nsec = now.tv_usec * 1000UL;
pthread_mutex_lock(&mutex);
ret = pthread_cond_timedwait(&cond, &mutex, &timeToWait);
if(ret != 0) {
printf("Failed to wait end of test thread, deadlocked ?: %d\n", ret);
return 2;
} else {
void* threadRet;
pthread_join(thread, &threadRet);
if((intptr_t)threadRet != 0) {
printf("test_thread failed\n");
return 3;
}
return 0;
}
}
/* Build with gcc test_compiler_tls_lib.c -shared -o test_compiler_tls_lib.so -g -pthread -fPIC */
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
static __thread int tls_variable = 0;
static pthread_t thread_handle;
static void* thread_that_use_tls_after_sleep(void* arg) {
// Wait 100ms, if dl_close is called before tls_variable is accessed, it will hang
usleep(100000);
printf("TLS variable has value: %d\n", tls_variable);
}
void __attribute__((constructor)) lib_init(void) {
pthread_create(&thread_handle, NULL, &thread_that_use_tls_after_sleep, NULL);
}
void __attribute__((destructor)) lib_shutdown(void) {
pthread_join(thread_handle, NULL);
}
-- System Information:
Debian Release: buster/sid
APT prefers unstable-debug
APT policy: (500, 'unstable-debug'), (500, 'unstable'), (500, 'stable')
Architecture: amd64 (x86_64)
Kernel: Linux 4.19.0-2-amd64 (SMP w/2 CPU cores)
Kernel taint flags: TAINT_OOT_MODULE, TAINT_UNSIGNED_MODULE
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8), LANGUAGE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
Versions of packages gcc-8 depends on:
ii binutils 2.31.1-11
ii cpp-8 8.2.0-16
ii gcc-8-base 8.2.0-16
ii libc6 2.28-5
ii libcc1-0 8.2.0-16
ii libgcc-8-dev 8.2.0-16
ii libgcc1 1:8.2.0-16
ii libgmp10 2:6.1.2+dfsg-4
ii libisl19 0.20-2
ii libmpc3 1.1.0-1
ii libmpfr6 4.0.2-1
ii libstdc++6 8.2.0-16
ii zlib1g 1:1.2.11.dfsg-1
Versions of packages gcc-8 recommends:
ii libc6-dev 2.28-5
Versions of packages gcc-8 suggests:
pn gcc-8-doc <none>
pn gcc-8-locales <none>
pn gcc-8-multilib <none>
pn libasan5-dbg <none>
pn libatomic1-dbg <none>
pn libgcc1-dbg <none>
pn libgomp1-dbg <none>
pn libitm1-dbg <none>
pn liblsan0-dbg <none>
pn libmpx2-dbg <none>
pn libquadmath0-dbg <none>
pn libtsan0-dbg <none>
pn libubsan1-dbg <none>
-- no debconf information
@qAao
Copy link

qAao commented Aug 16, 2022

Hello, have you solved this problem?
What is the reason for this?
We had a similar problem, which was confusing.

@amurzeau
Copy link
Author

This is a test case for the original issue here: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=903514
The issue was worked around by not using compiler based TLS, so avoiding __thread.

What you need is to avoid using compiler based TLS variable while the destructor (declared with __attribute__((destructor)) is being run, as while the destructor does not return, you can't access TLS variables from another thread.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment