[lttng-dev] some troubles having lttgng-ust actually log something on a shared library

Mathieu Desnoyers compudj at krystal.dyndns.org
Thu Dec 22 11:58:01 EST 2011


Some more info about the libuuid issue:

Here is my stack when the program hangs. It turns out that we trigger a
deadlock between a mutex in ld within the guts of libc and the mutex I
use to protect operations touching UST data structures:

thread 1:
- _dl_open first takes the
    __rtld_lock_lock_recursive (GL(dl_load_lock))
- then, within tracepoint_register_lib, we take the UST lock.

#0  __lll_lock_wait ()
    at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
No locals.
#1  0x00007f0fac16c0e9 in _L_lock_953 () from /lib/libpthread.so.0
No symbol table info available.
#2  0x00007f0fac16bf0b in __pthread_mutex_lock (mutex=0x7f0facbbf900)
    at pthread_mutex_lock.c:61
        ignore1 = <value optimized out>
        ignore2 = -1396967168
        ignore3 = -512
        __PRETTY_FUNCTION__ = "__pthread_mutex_lock"
        type = <value optimized out>
#3  0x00007f0fac9a0aa2 in tracepoint_register_lib (
    tracepoints_start=0x7f0facdc1618, tracepoints_count=2) at tracepoint.c:555
        iter = <value optimized out>
        __func__ = "tracepoint_register_lib"
#4  0x00007f0facbc0d57 in ?? ()
No symbol table info available.
#5  0x00007fffb9c4e080 in ?? ()
No symbol table info available.
#6  0x00007f0facdc1320 in ?? ()
No symbol table info available.
#7  0x00007fffb9c4e0a0 in ?? ()
No symbol table info available.
#8  0x00007f0facbc1096 in ?? ()
No symbol table info available.
#9  0x00007fffb9c4e108 in ?? ()
No symbol table info available.
#10 0x0000000000000001 in ?? ()
No symbol table info available.
#11 0x00007fffb9c4e688 in ?? ()
No symbol table info available.
#12 0x00007f0facbc0acb in ?? ()
No symbol table info available.
#13 0x00007f0facbc0000 in ?? ()
No symbol table info available.
#14 0x00007f0fadae0949 in call_init (l=0x1b60030, argc=16, 
    argv=0x7fffb9c4e080, env=0x7fffb9c4e698) at dl-init.c:70
        init_array = <value optimized out>
#15 0x00007f0fadae0a87 in _dl_init (main_map=0x1b60030, argc=1, 
    argv=0x7fffb9c4e688, env=0x7fffb9c4e698) at dl-init.c:134
        preinit_array = <value optimized out>
        preinit_array_size = 0x0
        i = 4294967295
#16 0x00007f0fadae4e21 in dl_open_worker (a=<value optimized out>)
    at dl-open.c:463
        args = 0x7fffb9c4e310
        file = <value optimized out>
        mode = -2147483647
        call_map = <value optimized out>
        __PRETTY_FUNCTION__ = "dl_open_worker"
        new = 0x1b60030

        r = <value optimized out>
        reloc_mode = 28705512
        l = 0x7
        any_tls = <value optimized out>
#17 0x00007f0fadae05e6 in _dl_catch_error (objname=<value optimized out>, 
    errstring=<value optimized out>, mallocedp=<value optimized out>, 
    operate=<value optimized out>, args=<value optimized out>)
    at dl-error.c:178
        errcode = <value optimized out>
        old = 0x7fffb9c4e410
        c = {objname = 0x7fffb9c4e390 "", errstring = 0x0, malloced = 72, 
          env = {{__jmpbuf = {2147483649, -6018440278980550012, -2, 4196808, 
                1, 4197324, -6018440279045561724, -5937401677670767996}, 
              __mask_was_saved = -1381120784, __saved_mask = {__val = {0, 
                  139705315081674, 11341735, 140736310076160, 140736310076432, 
                  140736310076160, 139705317093248, 7, 4177789447, 
                  139705315083504, 0, 7, 65277960, 139705309705000, 
                  140736310076512, 139705309705032}}}}}
#18 0x00007f0fadae460a in _dl_open (file=0x400bcc "./libmotion.so", 
    mode=-2147483647, caller_dlopen=0x4009c8, nsid=-2, argc=1, argv=0x293b, 
    env=0x7fffb9c4e698) at dl-open.c:554
        args = {file = 0x400bcc "./libmotion.so", mode = -2147483647, 
          caller_dlopen = 0x4009c8, caller_dl_open = 0x7f0fad5bbf66, 
          map = 0x1b60030, nsid = 0, argc = 1, argv = 0x7fffb9c4e688, 
          env = 0x7fffb9c4e698}
        objname = <value optimized out>
        errstring = <value optimized out>
        malloced = <value optimized out>
        errcode = <value optimized out>
        __PRETTY_FUNCTION__ = "_dl_open"
#19 0x00007f0fad5bbf66 in dlopen_doit (a=<value optimized out>) at dlopen.c:67
        args = 0x7fffb9c4e530
#20 0x00007f0fadae05e6 in _dl_catch_error (objname=<value optimized out>, 
    errstring=<value optimized out>, mallocedp=<value optimized out>, 
    operate=<value optimized out>, args=<value optimized out>)
    at dl-error.c:178
        errcode = <value optimized out>
        old = 0x0
        c = {objname = 0x7f0fad2625a8 "S\032", errstring = 0x0, 
          malloced = 208, env = {{__jmpbuf = {139705311813824, 
                -5937400952046345596, 140736310076720, 140736310077056, 0, 0, 
                -6018440278972161404, -5937401677670767996}, 
              __mask_was_saved = -1378944648, __saved_mask = {__val = {
                  139702401236993, 4294967295, 139705314959880, 1, 6296144, 0, 
                  0, 0, 0, 139705315099634, 1, 0, 6296184, 139705309705800, 
                  4196392, 0}}}}}
#21 0x00007f0fad5bc2ec in _dlerror_run (operate=0x7f0fad5bbf00 <dlopen_doit>, 
    args=0x7fffb9c4e530) at dlerror.c:164
        result = 0x7f0fad7be0c0
#22 0x00007f0fad5bbee1 in __dlopen (file=<value optimized out>, 
    mode=<value optimized out>) at dlopen.c:88
        args = {file = 0x400bcc "./libmotion.so", mode = 1, 
#23 0x00000000004009c8 in main (argc=1, argv=0x7fffb9c4e688)
    at run_motion.cpp:9
        motion_library = 0x400af0

(thread 2 omitted, because it does not hold any lock)

thread 3: first takes the UST lock in handle_message, then
tls_get_addr_tail() takes __rtld_lock_lock_recursive
(GL(dl_load_lock));. This only occurs when libuuid is loaded from a
dlopen() dependency, as the case where it is linked with the application
(-luuid). From
http://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Thread-Local.html and
http://people.redhat.com/drepper/tls.pdf , it appears that in the
dlopen() case, the TLS are fixed lazily when used. If a lock is taken in
the fixup, this can lead to interesting deadlocks.

In addition, even though it is for Solaris, the following page seems to
document well the difference between TLS allocation at program startup
and at dynamic linking (dlopen):
http://docs.oracle.com/cd/E23824_01/html/819-0690/gentextid-22601.html#scrolltoc

#0  __lll_lock_wait ()
    at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
No locals.
#1  0x00007f0fac16c104 in _L_lock_1024 () from /lib/libpthread.so.0
No symbol table info available.
#2  0x00007f0fac16bf67 in __pthread_mutex_lock (mutex=0x7f0fadcf1948)
    at pthread_mutex_lock.c:82
        ignore1 = <value optimized out>
        ignore2 = <value optimized out>
        ignore3 = <value optimized out>
        __PRETTY_FUNCTION__ = "__pthread_mutex_lock"
        type = 2916030792
#3  0x00007f0fadae3767 in tls_get_addr_tail (dtv=<value optimized out>, 
    the_map=0x1b615a0, module=5) at dl-tls.c:722
No locals.
#4  0x00007f0fadae3bd0 in __tls_get_addr (ti=0x7f0fac582330) at dl-tls.c:767
        dtv = 0x7f0fadcf1948
        the_map = 0x80
        p = 0xfffffffffffffe00
#5  0x00007f0fac38087a in ?? () from /lib/libuuid.so.1
No symbol table info available.
#6  0x00007f0fac381729 in uuid_generate () from /lib/libuuid.so.1
No symbol table info available.
#7  0x00007f0fac99e491 in ltt_session_create () at ltt-events.c:267
No locals.
#8  0x00007f0fac99a6d5 in lttng_abi_create_session (objd=-1378936504, cmd=128, 
    arg=5) at lttng-ust-abi.c:230
        session = <value optimized out>
        session_objd = <value optimized out>
#9  lttng_cmd (objd=-1378936504, cmd=128, arg=5) at lttng-ust-abi.c:302
No locals.
#10 0x00007f0fac9990b7 in handle_message (arg=<value optimized out>)
    at lttng-ust-comm.c:276
        ret = -1378936504
        lur = {handle = 0, cmd = 0, ret_code = 0, ret_val = 0, u = {channel = {
              memory_map_size = 0}, stream = {memory_map_size = 0}, version = {
              major = 0, minor = 0, patchlevel = 0}, tracepoint = {
              name = '\000' <repeats 255 times>, 
              loglevel = '\000' <repeats 255 times>, loglevel_value = 0}}}
        ops = <value optimized out>
        shm_fd = 0
        wait_fd = 0
#11 ust_listener_thread (arg=<value optimized out>) at lttng-ust-comm.c:710
        len = <value optimized out>
        lum = {handle = 0, cmd = 64, u = {channel = {overwrite = 0, 
              subbuf_size = 0, num_subbuf = 0, switch_timer_interval = 0, 
              read_timer_interval = 0, output = LTTNG_UST_MMAP, shm_fd = 0, 
              wait_fd = 0, memory_map_size = 0}, stream = {shm_fd = 0, 
              wait_fd = 0, memory_map_size = 0}, event = {
              name = '\000' <repeats 255 times>, 
              instrumentation = LTTNG_UST_TRACEPOINT, u = {<No data fields>}}, 
            context = {ctx = LTTNG_UST_CONTEXT_VTID, u = {<No data fields>}}, 

More to come...

Thanks,

Mathieu

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com



More information about the lttng-dev mailing list