[ltt-dev] [URCU RFC patch 1/3] call_rcu: use cpu affinity for per-cpu call_rcu threads

Paul E. McKenney paulmck at linux.vnet.ibm.com
Mon Jun 6 15:44:14 EDT 2011


On Mon, Jun 06, 2011 at 01:45:59PM -0400, Mathieu Desnoyers wrote:
> I played a bit with the call_rcu() implementation alongside with my
> rbtree tests, and noticed the following:
> 
> If I use per-cpu call_rcu threads with URCU_CALL_RCU_RT flag, with one
> updater thread only for my rbtree (no reader), I get 38365 updates/s.
> If I add cpu affinity to these per-cpu call_rcu threads (I have prepared
> a patch that does this), it jumps to 54219 updates/s.  So it looks like
> keeping per-cpu affinity for the call_rcu thread is a good thing.

Makes sense to me!

Main comment is that API.h should also be updated.

							Thanx, Paul

> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
> ---
>  API.txt              |    7 +++++--
>  tests/rcutorture.h   |    4 ++--
>  urcu-call-rcu-impl.h |   51 ++++++++++++++++++++++++++++++++++++++++++++-------
>  urcu-call-rcu.h      |    3 ++-
>  urcu-qsbr.c          |    1 +
>  urcu.c               |    1 +
>  6 files changed, 55 insertions(+), 12 deletions(-)
> 
> Index: userspace-rcu/API.txt
> ===================================================================
> --- userspace-rcu.orig/API.txt
> +++ userspace-rcu/API.txt
> @@ -59,12 +59,15 @@ void call_rcu(struct rcu_head *head,
> 
>  		call_rcu(&p->rcu, func);
> 
> -struct call_rcu_data *create_call_rcu_data(unsigned long flags);
> +struct call_rcu_data *create_call_rcu_data(unsigned long flags,
> +					   int cpu_affinity);
> 
>  	Returns a handle that can be passed to the following
>  	primitives.  The "flags" argument can be zero, or can be
>  	URCU_CALL_RCU_RT if the worker threads associated with the
> -	new helper thread are to get real-time response.
> +	new helper thread are to get real-time response. The argument
> +	"cpu_affinity" specifies a cpu on which the call_rcu thread should
> +	be affined to. It is ignored if negative.
> 
>  struct call_rcu_data *get_default_call_rcu_data(void);
> 
> Index: userspace-rcu/tests/rcutorture.h
> ===================================================================
> --- userspace-rcu.orig/tests/rcutorture.h
> +++ userspace-rcu/tests/rcutorture.h
> @@ -156,7 +156,7 @@ void *rcu_update_perf_test(void *arg)
>  	if ((random() & 0xf00) == 0) {
>  		struct call_rcu_data *crdp;
> 
> -		crdp = create_call_rcu_data(0);
> +		crdp = create_call_rcu_data(0, -1);
>  		if (crdp != NULL) {
>  			fprintf(stderr,
>  				"Using per-thread call_rcu() worker.\n");
> @@ -385,7 +385,7 @@ void *rcu_fake_update_stress_test(void *
>  	if ((random() & 0xf00) == 0) {
>  		struct call_rcu_data *crdp;
> 
> -		crdp = create_call_rcu_data(0);
> +		crdp = create_call_rcu_data(0, -1);
>  		if (crdp != NULL) {
>  			fprintf(stderr,
>  				"Using per-thread call_rcu() worker.\n");
> Index: userspace-rcu/urcu-call-rcu-impl.h
> ===================================================================
> --- userspace-rcu.orig/urcu-call-rcu-impl.h
> +++ userspace-rcu/urcu-call-rcu-impl.h
> @@ -20,6 +20,7 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>   */
> 
> +#define _GNU_SOURCE
>  #include <stdio.h>
>  #include <pthread.h>
>  #include <signal.h>
> @@ -31,6 +32,7 @@
>  #include <sys/time.h>
>  #include <syscall.h>
>  #include <unistd.h>
> +#include <sched.h>
> 
>  #include "config.h"
>  #include "urcu/wfqueue.h"
> @@ -47,6 +49,7 @@ struct call_rcu_data {
>  	pthread_cond_t cond;
>  	unsigned long qlen;
>  	pthread_t tid;
> +	int cpu_affinity;
>  	struct cds_list_head list;
>  } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
> 
> @@ -146,6 +149,31 @@ static void call_rcu_unlock(pthread_mute
>  	}
>  }
> 
> +#if HAVE_SCHED_SETAFFINITY
> +static
> +int set_thread_cpu_affinity(struct call_rcu_data *crdp)
> +{
> +	cpu_set_t mask;
> +
> +	if (crdp->cpu_affinity < 0)
> +		return 0;
> +
> +	CPU_ZERO(&mask);
> +	CPU_SET(crdp->cpu_affinity, &mask);
> +#if SCHED_SETAFFINITY_ARGS == 2
> +	return sched_setaffinity(0, &mask);
> +#else
> +	return sched_setaffinity(0, sizeof(mask), &mask);
> +#endif
> +}
> +#else
> +static
> +int set_thread_cpu_affinity(struct call_rcu_data *crdp)
> +{
> +	return 0;
> +}
> +#endif
> +
>  /* This is the code run by each call_rcu thread. */
> 
>  static void *call_rcu_thread(void *arg)
> @@ -156,6 +184,11 @@ static void *call_rcu_thread(void *arg)
>  	struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
>  	struct rcu_head *rhp;
> 
> +	if (set_thread_cpu_affinity(crdp) != 0) {
> +		perror("pthread_setaffinity_np");
> +		exit(-1);
> +	}
> +
>  	thread_call_rcu_data = crdp;
>  	for (;;) {
>  		if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
> @@ -214,7 +247,8 @@ static void *call_rcu_thread(void *arg)
>   */
> 
>  static void call_rcu_data_init(struct call_rcu_data **crdpp,
> -			       unsigned long flags)
> +			       unsigned long flags,
> +			       int cpu_affinity)
>  {
>  	struct call_rcu_data *crdp;
> 
> @@ -236,6 +270,7 @@ static void call_rcu_data_init(struct ca
>  	}
>  	crdp->flags = flags | URCU_CALL_RCU_RUNNING;
>  	cds_list_add(&crdp->list, &call_rcu_data_list);
> +	crdp->cpu_affinity = cpu_affinity;
>  	cmm_smp_mb();  /* Structure initialized before pointer is planted. */
>  	*crdpp = crdp;
>  	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
> @@ -280,20 +315,22 @@ pthread_t get_call_rcu_thread(struct cal
>   * Create a call_rcu_data structure (with thread) and return a pointer.
>   */
> 
> -static struct call_rcu_data *__create_call_rcu_data(unsigned long flags)
> +static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
> +						    int cpu_affinity)
>  {
>  	struct call_rcu_data *crdp;
> 
> -	call_rcu_data_init(&crdp, flags);
> +	call_rcu_data_init(&crdp, flags, cpu_affinity);
>  	return crdp;
>  }
> 
> -struct call_rcu_data *create_call_rcu_data(unsigned long flags)
> +struct call_rcu_data *create_call_rcu_data(unsigned long flags,
> +					   int cpu_affinity)
>  {
>  	struct call_rcu_data *crdp;
> 
>  	call_rcu_lock(&call_rcu_mutex);
> -	crdp = __create_call_rcu_data(flags);
> +	crdp = __create_call_rcu_data(flags, cpu_affinity);
>  	call_rcu_unlock(&call_rcu_mutex);
>  	return crdp;
>  }
> @@ -346,7 +383,7 @@ struct call_rcu_data *get_default_call_r
>  		call_rcu_unlock(&call_rcu_mutex);
>  		return default_call_rcu_data;
>  	}
> -	call_rcu_data_init(&default_call_rcu_data, 0);
> +	call_rcu_data_init(&default_call_rcu_data, 0, -1);
>  	call_rcu_unlock(&call_rcu_mutex);
>  	return default_call_rcu_data;
>  }
> @@ -434,7 +471,7 @@ int create_all_cpu_call_rcu_data(unsigne
>  			call_rcu_unlock(&call_rcu_mutex);
>  			continue;
>  		}
> -		crdp = __create_call_rcu_data(flags);
> +		crdp = __create_call_rcu_data(flags, i);
>  		if (crdp == NULL) {
>  			call_rcu_unlock(&call_rcu_mutex);
>  			errno = ENOMEM;
> Index: userspace-rcu/urcu-call-rcu.h
> ===================================================================
> --- userspace-rcu.orig/urcu-call-rcu.h
> +++ userspace-rcu/urcu-call-rcu.h
> @@ -64,7 +64,8 @@ struct rcu_head {
>   */
>  struct call_rcu_data *get_cpu_call_rcu_data(int cpu);
>  pthread_t get_call_rcu_thread(struct call_rcu_data *crdp);
> -struct call_rcu_data *create_call_rcu_data(unsigned long flags);
> +struct call_rcu_data *create_call_rcu_data(unsigned long flags,
> +					   int cpu_affinity);
>  int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp);
>  struct call_rcu_data *get_default_call_rcu_data(void);
>  struct call_rcu_data *get_call_rcu_data(void);
> Index: userspace-rcu/urcu-qsbr.c
> ===================================================================
> --- userspace-rcu.orig/urcu-qsbr.c
> +++ userspace-rcu/urcu-qsbr.c
> @@ -23,6 +23,7 @@
>   * IBM's contributions to this file may be relicensed under LGPLv2 or later.
>   */
> 
> +#define _GNU_SOURCE
>  #include <stdio.h>
>  #include <pthread.h>
>  #include <signal.h>
> Index: userspace-rcu/urcu.c
> ===================================================================
> --- userspace-rcu.orig/urcu.c
> +++ userspace-rcu/urcu.c
> @@ -24,6 +24,7 @@
>   */
> 
>  #define _BSD_SOURCE
> +#define _GNU_SOURCE
>  #include <stdio.h>
>  #include <pthread.h>
>  #include <signal.h>
> 




More information about the lttng-dev mailing list