[lttng-dev] epoll_wait reboot kernel?

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Mon Mar 10 15:38:44 EDT 2014


----- Original Message -----
> From: "zhenyu.ren" <zhenyu.ren at aliyun.com>
> To: "Mathieu Desnoyers" <mathieu.desnoyers at efficios.com>
> Cc: "lttng-dev" <lttng-dev at lists.lttng.org>
> Sent: Tuesday, February 25, 2014 1:01:30 AM
> Subject: Re: [lttng-dev] epoll_wait reboot kernel?
> 
> >Which gcc version do you use ?
> $gcc --version
> gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-46)
> 
> I disassembed lib_ring_buffer_write() and focused on how it calles
> lib_ring_buffer_do_copy(indeed inline_memcpy)
> 
> void lib_ring_buffer_write(const struct lib_ring_buffer_config *config,
>                            struct lib_ring_buffer_ctx *ctx,
>                            const void *src, size_t len)
> {
>         struct lib_ring_buffer_backend *bufb = &ctx->buf->backend;
>         struct channel_backend *chanb = &ctx->chan->backend;
>         size_t sbidx, index;
>         size_t offset = ctx->buf_offset;
>         ssize_t pagecpy;
>         struct lib_ring_buffer_backend_pages *rpages;
>         unsigned long sb_bindex, id;
>         if (unlikely(!len))
>                 return;
>         offset &= chanb->buf_size - 1;
>         sbidx = offset >> chanb->subbuf_size_order;
>         index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
>         pagecpy = min_t(size_t, len, (-offset) & ~PAGE_MASK);
>         id = bufb->buf_wsb[sbidx].id;
>         sb_bindex = subbuffer_id_get_index(config, id);
>         rpages = bufb->array[sb_bindex];
>         CHAN_WARN_ON(ctx->chan,
>                      config->mode == RING_BUFFER_OVERWRITE
>                      && subbuffer_id_is_noref(config, id));
>         if (likely(pagecpy == len))
>                 lib_ring_buffer_do_copy(config,
>                                         rpages->p[index].virt
>                                             + (offset & ~PAGE_MASK),
>                                         src, len);
>         else
>                 _lib_ring_buffer_write(bufb, offset, src, len, 0);
>         ctx->buf_offset += len;
> }
> 00000000000003a0 <lib_ring_buffer_write>:
>      3a0:       55                      push   %rbp
>      3a1:       48 89 e5                mov    %rsp,%rbp
>      3a4:       48 83 ec 40             sub    $0x40,%rsp
>      3a8:       48 89 5d d8             mov    %rbx,0xffffffffffffffd8(%rbp)
>      3ac:       4c 89 65 e0             mov    %r12,0xffffffffffffffe0(%rbp)
>      3b0:       4c 89 6d e8             mov    %r13,0xffffffffffffffe8(%rbp)
>      3b4:       4c 89 75 f0             mov    %r14,0xfffffffffffffff0(%rbp)
>      3b8:       4c 89 7d f8             mov    %r15,0xfffffffffffffff8(%rbp)
>      3bc:       e8 00 00 00 00          callq  3c1
>      <lib_ring_buffer_write+0x21>
>      3c1:       49 89 f5                mov    %rsi,%r13
>      rsi/r13->               ctx
>      3c4:       48 85 c9                test   %rcx,%rcx
>      rcx->                   len
>      3c7:       48 89 55 c8             mov    %rdx,0xffffffffffffffc8(%rbp)
>      rdx/c8->                src
>      3cb:       49 89 cc                mov    %rcx,%r12
>      rcx/r12->               len
>      3ce:       48 8b 76 20             mov    0x20(%rsi),%rsi
>      rsi->                   ctx->buf
>      3d2:       4d 8b 45 00             mov    0x0(%r13),%r8
>      r8->                    ctx->chan
>      3d6:       49 8b 55 30             mov    0x30(%r13),%rdx
>      rdx->                   offset =
>      ctx->buf_offset
>      3da:       0f 84 a5 00 00 00       je     485
>      <lib_ring_buffer_write+0xe5>         if(!len)  return
>      3e0:       49 8b 58 10             mov    0x10(%r8),%rbx
>      rbx->                   ctx->chan->backend
>      3e4:       49 8d 40 10             lea    0x10(%r8),%rax
>      rax->                   *chanb =
>      &ctx->chan->backend
>      3e8:       4c 8d 76 28             lea    0x28(%rsi),%r14
>      r14->                   *bufb =
>      &ctx->buf->backend
>      3ec:       48 83 eb 01             sub    $0x1,%rbx
>      rbx->                   chanb->buf_size
>      - 1
>      3f0:       48 21 d3                and    %rdx,%rbx
>      rbx->                   offset &=
>      chanb->buf_size -1
>      3f3:       48 8b 50 08             mov    0x8(%rax),%rdx
>      rdx->                   chanb->subbuf_size
>      3f7:       48 89 55 d0             mov    %rdx,0xffffffffffffffd0(%rbp)
>      d0->                    chanb->subbuf_size
>      3fb:       8b 48 10                mov    0x10(%rax),%ecx
>      ecx->
>      chanb->subbuf_size_order
>      3fe:       48 89 da                mov    %rbx,%rdx
>      rdx->                   offset
>      401:       48 8b 46 28             mov    0x28(%rsi),%rax
>      rax->                   bufb =
>      &ctx->buf->backen
>      405:       48 d3 ea                shr    %cl,%rdx
>      rdx->                   sbidx = offset
>      >> chanb->subbuf_size_orde
>      408:       48 8b 0c d0             mov    (%rax,%rdx,8),%rcx
>      rcx->                   id =
>      bufb->buf_wsb[sbidx].id
>      40c:       8b 47 08                mov    0x8(%rdi),%eax
>      eax->                   config->mode
>      40f:       85 c0                   test   %eax,%eax
>      CHAN_WARN_ON(... (config->mode ==
>      RING_BUFFER_OVERWRITE)...)
>      411:       0f 85 89 00 00 00       jne    4a0
>      <lib_ring_buffer_write+0x100>
>      417:       49 8b 46 10             mov    0x10(%r14),%rax
>      rax->                   bufb->array
>      41b:       48 89 ca                mov    %rcx,%rdx
>      rdx->                   id
>      41e:       83 e2 ff                and    $0xffffffffffffffff,%edx
>      421:       4c 8b 3c d0             mov    (%rax,%rdx,8),%r15
>      r15->                   rpages =
>      bufb->array[sb_binde
>      425:       48 b8 00 00 00 00 01    mov    $0x100000000,%rax
>      rax->                   SB_ID_NOREF_MASK
>      42c:       00 00 00
>      42f:       48 85 c8                test   %rcx,%rax
>      id & SB_ID_NOREF_MASK
>      432:       0f 95 c0                setne  %al
>      435:       0f b6 c0                movzbl %al,%eax
>      438:       48 85 c0                test   %rax,%rax
>      if(config->mode==RING_BUFFER_OVERWRITE
>      && subbuffer_id_is_noref(config,id))
>      43b:       75 6f                   jne    4ac
>      <lib_ring_buffer_write+0x10c>
>      43d:       48 89 d8                mov    %rbx,%rax
>      rax->                   offset
>      440:       48 f7 d8                neg    %rax
>      rax->                   -offset
>      443:       25 ff 0f 00 00          and    $0xfff,%eax
>      eax->                   (-offset) &
>      ~PAGE_MASK
>      448:       49 39 c4                cmp    %rax,%r12
>      cmp(len,(-offset) & ~PAGE_MASK)
>      44b:       49 0f 46 c4             cmovbe %r12,%rax
>      rax->                   pagecpy =
>      min_t(size_t, len, (-offset) & ~PAGE_MASK)
>      44f:       4c 39 e0                cmp    %r12,%rax
>      if(pagecpy == len)
>      452:       75 72                   jne    4c6
>      <lib_ring_buffer_write+0x126>        jmp to call
>      _lib_ring_buffer_write()
>      454:       48 8b 45 d0             mov    0xffffffffffffffd0(%rbp),%rax
>      rax->                   chanb->subbuf_size
>      458:       48 8b 75 c8             mov    0xffffffffffffffc8(%rbp),%rsi
>      rsi->                   src
>      45c:       48 83 e8 01             sub    $0x1,%rax
>      rax->
>      chanb->subbuf_size - 1
>      460:       48 21 d8                and    %rbx,%rax
>      rax->                   offset &
>      (chanb->subbuf_size - 1)
>      463:       81 e3 ff 0f 00 00       and    $0xfff,%ebx
>      ebx->                   offset &
>      ~PAGE_MASK
>      469:       48 c1 e8 0c             shr    $0xc,%rax
>      rax->
>      index=(offset&(chanb->subbuf_size-1))>>PAGE_SHIFT
>      46d:       48 c1 e0 04             shl    $0x4,%rax
>      rax->                   index
>      471:       4a 03 5c 38 20          add    0x20(%rax,%r15,1),%rbx
>      rbx->                   rpages->p[index].virt+
>      (offset & ~PAGE_MASK)
>      476:       48 89 da                mov    %rbx,%rdx
>      rdx->                   SHOULD BE
>      inline_memcpy() the 3rd parameter:len
>      479:       48 89 df                mov    %rbx,%rdi
>      rdi->                   1st parameter
>      47c:       e8 00 00 00 00          callq  481
>      <lib_ring_buffer_write+0xe1>         call inline_memcpy(dst,src,len)
>      481:       4d 01 65 30             add    %r12,0x30(%r13)
>      ctx->buf_offset += len(r12)
>      485:       48 8b 5d d8             mov    0xffffffffffffffd8(%rbp),%rbx
>      489:       4c 8b 65 e0             mov    0xffffffffffffffe0(%rbp),%r12
>      48d:       4c 8b 6d e8             mov    0xffffffffffffffe8(%rbp),%r13
>      491:       4c 8b 75 f0             mov    0xfffffffffffffff0(%rbp),%r14
>      495:       4c 8b 7d f8             mov    0xfffffffffffffff8(%rbp),%r15
>      499:       c9                      leaveq
>      49a:       c3                      retq
>      49b:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
>      4a0:       49 8b 46 10             mov    0x10(%r14),%rax
>      rax->                   bufb->array
>      4a4:       4c 8b 3c c8             mov    (%rax,%rcx,8),%r15
>      r15->                   rpages =
>      bufb->array[sb_bindex]
>      4a8:       31 c0                   xor    %eax,%eax
>      eax->                   0
>      4aa:       eb 8c                   jmp    438
>      <lib_ring_buffer_write+0x98>         .......
>      4ac:       f0 41 ff 00             lock incl (%r8)
>      atomic_inc(&__chan->record_disabled
>      4b0:       be 6e 00 00 00          mov    $0x6e,%esi
>      4b5:       48 c7 c7 00 00 00 00    mov    $0x0,%rdi
>      4bc:       e8 00 00 00 00          callq  4c1
>      <lib_ring_buffer_write+0x121>        WARN_ON(1);
>      4c1:       e9 77 ff ff ff          jmpq   43d
>      <lib_ring_buffer_write+0x9d>         ....... jmp to if (pagecpy == len)
>      4c6:       48 8b 55 c8             mov    0xffffffffffffffc8(%rbp),%rdx
>      rdx->                   src
>      4ca:       45 31 c0                xor    %r8d,%r8d
>      r8d->                   0
>      4cd:       4c 89 e1                mov    %r12,%rcx
>      r12/rcx->               len
>      4d0:       48 89 de                mov    %rbx,%rsi
>      rbx/rsi->               offset
>      4d3:       4c 89 f7                mov    %r14,%rdi
>      r14/rdi->               bufb
>      4d6:       e8 00 00 00 00          callq  4db
>      <lib_ring_buffer_write+0x13b>        _lib_ring_buffer_write(bufb,
>      offset, src, len, 0);
>      4db:       eb a4                   jmp    481
>      <lib_ring_buffer_write+0xe1>         ...... jmp to ctx->buf_offset +=
>      len and return
> 
> 
>      All difference with good case is also where I am insterested in:
> 
> bad case:
>      45c:       48 83 e8 01             sub    $0x1,%rax
>      rax->
>      chanb->subbuf_size - 1
>      460:       48 21 d8                and    %rbx,%rax
>      rax->                   offset &
>      (chanb->subbuf_size - 1)
>      463:       81 e3 ff 0f 00 00       and    $0xfff,%ebx
>      ebx->                   offset &
>      ~PAGE_MASK
>      469:       48 c1 e8 0c             shr    $0xc,%rax
>      rax->
>      index=(offset&(chanb->subbuf_size-1))>>PAGE_SHIFT
>      46d:       48 c1 e0 04             shl    $0x4,%rax
>      rax->                   index
>      471:       4a 03 5c 38 20          add    0x20(%rax,%r15,1),%rbx
>      rbx->                   rpages->p[index].virt+
>      (offset & ~PAGE_MASK)
>      476:       48 89 da                mov    %rbx,%rdx
>      rdx->                   SHOULD BE
>      inline_memcpy() the 3rd parameter:len
>      479:       48 89 df                mov    %rbx,%rdi
>      rdi->                   1st parameter
>      47c:       e8 00 00 00 00          callq  481
>      <lib_ring_buffer_write+0xe1>         call inline_memcpy(dst,src,len)
> 
> 
> good case:
>      45c:       4c 89 e2                mov    %r12,%rdx
>      r12/rdx->       len
>      45f:       48 83 e8 01             sub    $0x1,%rax
>      rax->           chanb->subbuf_size - 1
>      463:       48 21 d8                and    %rbx,%rax
>      rax->           offset &
>      (chanb->subbuf_size - 1)
>      466:       81 e3 ff 0f 00 00       and    $0xfff,%ebx
>      ebx->           offset & ~PAGE_MASK
>      46c:       48 c1 e8 0c             shr    $0xc,%rax
>      rax->
>      index=(offset&(chanb->subbuf_size-1))>>PAGE_SHIFT
>      470:       48 c1 e0 04             shl    $0x4,%rax
>      rax->           index
>      474:       4a 03 5c 38 20          add    0x20(%rax,%r15,1),%rbx
>      rbx->           rpages->p[index].virt+ (offset &
>      ~PAGE_MASK)
>      479:       48 89 df                mov    %rbx,%rdi
>      rdi->           dst , the 1st parameter
>      of inline_memcpy(dst,src,len)
>      47c:       e8 00 00 00 00          callq  481
>      <lib_ring_buffer_write+0xe1>         call inline_memcpy(dst,src,len)
> 
> In good case, at line 45c, rdx ,which is the 3rd parameter of
> inline_memcpy(dst,src,len), comes from  %r12 that is len.However,in bad
> case,at line 476, rdx comes from rbx that is the 1st parameter of
> inline_memcpy(dst,src,len). So you can image what will happen if len is too
> large.
> 
> I don't know why GCC produce the wrong code.Does anyone has any clue?

Hrm, this looks like a compiler bug to me. First check if your distribution
vendor has an updated compiler. Perhaps try compiling with slightly different
compiler versions to see if you can reproduce the issue, try compiling with "-O0"
or "-O1" instead of "-O2" to see if the behavior changes. You might also want to
look for similar issues on the gcc bug tracker for your compiler version.

Thanks,

Mathieu

> 
> Thanks
> zhenyu.ren
> 
> 
> ----- Original Message -----
> > From: "zhenyu.ren" <zhenyu.ren at aliyun.com>
> > To: "lttng-dev" <lttng-dev at lists.lttng.org>, "zhenyu.ren"
> > <zhenyu.ren at aliyun.com>
> > Sent: Monday, February 24, 2014 4:36:36 AM
> > Subject: Re: [lttng-dev] epoll_wait reboot kernel?
> > 
> > Oh,It seems not epoll related . I am using DBG/ERR() as debug way ,but this
> > print-style method doesn't work as expected,so I mistake for epoll_wait.
> > It seems gcc problem because if  I changed lib_ring_buffer_do_copy as the
> > following,all  work well
> > 
> > #define lib_ring_buffer_do_copy(config, dest, src, len)         \
> > do {                                                            \
> >         size_t __len1 = (len);                                  \
> >         /*here , not __len=(len)*/
> > mdelay(0x135);\
> >         if (__builtin_constant_p(len))                          \
> >                 memcpy(dest, src, __len1);                      \
> >         else                                                    \
> >                 inline_memcpy(dest, src, __len1);               \
> > } while (0)
> > 
> > I disasembled lib_ring_buffer_write() and "vimdiff bad good" shows :
> > 49c:       48 89 da                mov    %rbx,%rdx
> > |       49c:
> > 4c 89 f2                mov    %r14,%rdx
> > 
> > here rdx is the 3rd parameter to inline_memcpy().
> > 
> > Thanks
> > zhenyu.ren
> > 
> > 
> > >Hi,lttng-dev
> > >
> >  >  I am working on rhel6(2.6.32.220) and ust is working very well.But when
> >  >  I
> >  >  use lttng-modules (after "lttng start"),the kernel reboots immediately
> >  >  >without any message output to console.
> > >
> > >  It took me some days finding out what results in reboot. I shortcut
> > >  __event_probe__##_name() before _tstruct but kernel still reboot(So what
> > >  left is  >metadata_printf).If I comment out "mask |= POLLIN" in
> > >  lttng_metadata_ring_buffer_poll() ,kernel do not reboot. What I find out
> > >  is before reboot the >callchain of  lttng-consumerd thread is
> > >  consumer_thread_metadata_poll()->lttng_poll_wait(wait for metadata
> > >  stream)->compat_epoll_wait().
> > >
> > > I want to use poll instead as epoll in lttng-tools ,I changed lttng-tools
> > > configure file to not use epoll(undef HAVE_EPOLL),but "lttng enable-event
> > > -a -k >"failed :(Do I miss anything?)
> > >#lttng enable-event -a -k
> > >DEBUG1 [17837/18080]: Wait for client response (in thread_manage_clients()
> > >at main.c:3709)
> > >DEBUG1 [17837/18080]: Receiving data from client ... (in
> > >thread_manage_clients() at main.c:3754)
> > >DEBUG1 [17837/18080]: Nothing recv() from client... continuing (in
> > >thread_manage_clients() at main.c:3758)
> > >DEBUG1 [17837/18080]: Clean command context structure (in
> > >clean_command_ctx() at main.c:535)
> > >DEBUG1 [17837/18080]: Accepting client command ... (in
> > >thread_manage_clients() at main.c:3667)
> > >DEBUG1 [17837/18080]: Wait for client response (in thread_manage_clients()
> > >at main.c:3709)
> > >DEBUG1 [17837/18080]: Receiving data from client ... (in
> > >thread_manage_clients() at main.c:3754)
> > >DEBUG1 [17837/18080]: Processing client command 7 (in process_client_msg()
> > >at main.c:2579)
> > >DEBUG1 [17837/18080]: Getting session k-test by name (in
> > >process_client_msg() at main.c:2656)
> > >DEBUG1 [17837/18080]: Creating kernel session (in create_kernel_session()
> > >at
> > >main.c:2495)
> > >DEBUG3 [17837/18080]: Created hashtable size 4 at 0x669410 of type 1 (in
> > >lttng_ht_new() at hashtable.c:112)
> > >DEBUG1 [17837/18080]: Kernel session created (fd: 21) (in
> > >kernel_create_session() at kernel.c:109)
> > >DEBUG3 [17837/18080]: Copying tracing session consumer output in kernel
> > >session (in copy_session_consumer() at main.c:2393)
> > >DEBUG3 [17837/18080]: Created hashtable size 4 at 0x66a750 of type 1 (in
> > >lttng_ht_new() at hashtable.c:112)
> > >DEBUG3 [17837/18080]: Copy session consumer subdir /kernel (in
> > >copy_session_consumer() at main.c:2428)
> > >DEBUG3 [17837/18080]: mkdir() recursive
> > >/root/lttng-traces/k-test-20140222-131335 with mode 504 for uid 0 and gid
> > >0
> > >(in run_as_mkdir_recursive() at >runas.c:310)
> > >DEBUG1 [17837/18080]: Using run_as_clone (in run_as() at runas.c:293)
> > >lttng-sessiond: ht-cleanup.c:92: thread_ht_cleanup: Assertion `pollfd ==
> > >ht_cleanup_pipe[0]' failed.
> > >
> > >  It seems lttng-tools  in ubuntu 12.04 is using poll,isnt it?
> > >
> >  > Does anyone have any suggestions to me ?
> > 
> > >PS:.lttng(ust/module/tool) version is 2.3.0 and urcu version is 0.8
> > 
> > 
> > _______________________________________________
> > lttng-dev mailing list
> > lttng-dev at lists.lttng.org
> > http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
> > 
> 
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com



More information about the lttng-dev mailing list