[lttng-dev] [URCU PATCH 3/3] call_rcu: use wfcqueue, eliminate false-sharing

Paul E. McKenney paulmck at linux.vnet.ibm.com
Mon Oct 8 11:10:19 EDT 2012


On Mon, Oct 08, 2012 at 10:49:16AM -0400, Mathieu Desnoyers wrote:
> * Lai Jiangshan (laijs at cn.fujitsu.com) wrote:
> > On 10/02/2012 10:16 PM, Mathieu Desnoyers wrote:
> > > Eliminate false-sharing between call_rcu (enqueuer) and worker threads
> > > on the queue head and tail.
> > > 
> > > Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
> > > ---
> > > diff --git a/tests/Makefile.am b/tests/Makefile.am
> > > index 81718bb..c92bbe6 100644
> > > --- a/tests/Makefile.am
> > > +++ b/tests/Makefile.am
> > > @@ -30,14 +30,14 @@ if COMPAT_FUTEX
> > >  COMPAT+=$(top_srcdir)/compat_futex.c
> > >  endif
> > >  
> > > -URCU=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfqueue.c $(COMPAT)
> > > -URCU_QSBR=$(top_srcdir)/urcu-qsbr.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfqueue.c $(COMPAT)
> > > +URCU=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfcqueue.c $(COMPAT)
> > > +URCU_QSBR=$(top_srcdir)/urcu-qsbr.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfcqueue.c $(COMPAT)
> > >  # URCU_MB uses urcu.c but -DRCU_MB must be defined
> > > -URCU_MB=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfqueue.c $(COMPAT)
> > > +URCU_MB=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfcqueue.c $(COMPAT)
> > >  # URCU_SIGNAL uses urcu.c but -DRCU_SIGNAL must be defined
> > > -URCU_SIGNAL=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfqueue.c $(COMPAT)
> > > -URCU_BP=$(top_srcdir)/urcu-bp.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfqueue.c $(COMPAT)
> > > -URCU_DEFER=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfqueue.c $(COMPAT)
> > > +URCU_SIGNAL=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfcqueue.c $(COMPAT)
> > > +URCU_BP=$(top_srcdir)/urcu-bp.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfcqueue.c $(COMPAT)
> > > +URCU_DEFER=$(top_srcdir)/urcu.c $(top_srcdir)/urcu-pointer.c $(top_srcdir)/wfcqueue.c $(COMPAT)
> > >  
> > >  URCU_COMMON_LIB=$(top_builddir)/liburcu-common.la
> > >  URCU_LIB=$(top_builddir)/liburcu.la
> > > diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h
> > > index 13b24ff..cf65992 100644
> > > --- a/urcu-call-rcu-impl.h
> > > +++ b/urcu-call-rcu-impl.h
> > > @@ -21,6 +21,7 @@
> > >   */
> > >  
> > >  #define _GNU_SOURCE
> > > +#define _LGPL_SOURCE
> > >  #include <stdio.h>
> > >  #include <pthread.h>
> > >  #include <signal.h>
> > > @@ -35,7 +36,7 @@
> > >  #include <sched.h>
> > >  
> > >  #include "config.h"
> > > -#include "urcu/wfqueue.h"
> > > +#include "urcu/wfcqueue.h"
> > >  #include "urcu-call-rcu.h"
> > >  #include "urcu-pointer.h"
> > >  #include "urcu/list.h"
> > > @@ -46,7 +47,14 @@
> > >  /* Data structure that identifies a call_rcu thread. */
> > >  
> > >  struct call_rcu_data {
> > > -	struct cds_wfq_queue cbs;
> > > +	/*
> > > +	 * Align the tail on cache line size to eliminate false-sharing
> > > +	 * with head.
> > > +	 */
> > > +	struct cds_wfcq_tail __attribute__((aligned(CAA_CACHE_LINE_SIZE))) cbs_tail;
> > > +	/* Alignment on cache line size will add padding here */
> > > +
> > > +	struct cds_wfcq_head cbs_head;
> > 
> > 
> > wrong here. In this code, cbs_tail and cbs_head are in the same cache line.
> > 
> > ---
> > 
> > struct call_rcu_data {
> > 	struct cds_wfcq_tail cbs_tail;
> > 	struct cds_wfcq_head __attribute__((aligned(CAA_CACHE_LINE_SIZE))) cbs_head;
> > 	/* other fields, can move some fields up to use the room between tail and head */
> > };
> > 
> > # cat test.c
> > 
> > struct a {
> > 	int __attribute__((aligned(64))) i;
> > 	int j;
> > };
> > struct b {
> > 	int i;
> > 	int __attribute__((aligned(64))) j;
> > };
> > 
> > void main(void)
> > {
> > 	printf("%d,%d\n", sizeof(struct a), sizeof(struct b));
> > }
> > 
> > # ./a.out
> > 64,128
> > 
> 
> Good point! While we are there, I notice that the "qlen" count, kept for
> debugging, is causing false-sharing too. I wonder if we should split
> this counter in two counters: nr_enqueue and nr_dequeue, which would sit
> in two different cache lines ? It's mainly Paul who cares about this
> counter. Thoughts ?

Works for me, as long as nr_enqueue and nr_dequeue are both unsigned long
to avoid issues with overflow.

							Thanx, Paul

> Here is the fix to the problem you noticed above:
> 
> commit b9f893b69fbc31baea418794938f4eb74cc4923a
> Author: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
> Date:   Mon Oct 8 10:44:38 2012 -0400
> 
>     Fix urcu-call-rcu-impl.h: false-sharing
>     
>     > >  struct call_rcu_data {
>     > > -   struct cds_wfq_queue cbs;
>     > > +   /*
>     > > +    * Align the tail on cache line size to eliminate false-sharing
>     > > +    * with head.
>     > > +    */
>     > > +   struct cds_wfcq_tail __attribute__((aligned(CAA_CACHE_LINE_SIZE))) cbs_tail;
>     > > +   /* Alignment on cache line size will add padding here */
>     > > +
>     > > +   struct cds_wfcq_head cbs_head;
>     >
>     >
>     > wrong here. In this code, cbs_tail and cbs_head are in the same cache line.
>     
>     Reported-by: Lai Jiangshan <laijs at cn.fujitsu.com>
>     Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
> 
> Thanks!
> 
> Mathieu
> 
> -- 
> Mathieu Desnoyers
> Operating System Efficiency R&D Consultant
> EfficiOS Inc.
> http://www.efficios.com
> 




More information about the lttng-dev mailing list