[ltt-dev] [PATCH] urcu-qsbr: avoid useless futex wakeups and burning CPU for long grace periods

Mathieu Desnoyers compudj at krystal.dyndns.org
Tue Aug 16 08:29:20 EDT 2011


* Paolo Bonzini (pbonzini at redhat.com) wrote:
>>>> @@ -136,7 +137,11 @@ extern int32_t gp_futex;
>>>> */
>>>> static inline void wake_up_gp(void)
>>>> {
>>>> - if (unlikely(uatomic_read(&gp_futex) == -1)) {
>>>> + if (unlikely(_CMM_LOAD_SHARED(rcu_reader.waiting))) {
>>>> + _CMM_STORE_SHARED(rcu_reader.waiting, 0);
>>>
>>> Commenting this memory barrier would be helpful too.
>>
>> Ok.
>
> I tried updating the formal model, but I had to basically rewrite it to  
> model the futex primitives and the list move algorithm used by urcu.  I  
> basically model a parity-flipping RCU and run the waker and waiter  
> infinitely, asserting that progress states (parity flips) are reached  
> infinite times.

Sounds good,

>
> My ProMeLa-fu is just a few hours old, so a careful review is  
> appreciated.  However, the model seems to be okay: it passes, and more  
> importantly it fails if I modify it to trigger known problems (missing  
> write barriers especially).

Yep. I found that failure injection was one of the only way I could get
myself to actually trust my models. ;-)

>
> The attached patches do the following:
>
> 1) rewrite the futex.spin model to match your proposed, simpler change;
>
> 2) add the optimization to tell waker threads whether they are still  
> being waited on;
>
> 3) change the futex_progress_late_dec model to trigger a different  
> failure, namely the missing memory barrier in the first version I sent  
> of the patch.
>
> Please let me know what you think about them.

Comments below,

>
> Thanks!
>
> Paolo

> From 19bbae3ec31b5b2a4ff80e3b867975660be0cb07 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini at redhat.com>
> Date: Sun, 14 Aug 2011 16:06:27 -0700
> Subject: [PATCH 1/3] new futex model
> 
> Rewrite the model to include the futex primitives and the list
> move algorithm used by urcu.  The model implements a full-blown
> parity-flipping RCU where grace period waits are driven exclusively
> by futexes.  It runs the waker and waiter infinitely, asserting
> that progress states (parity flips) are reached infinite times.
> ---
>  futex-wakeup/DEFINES    |    2 +-
>  futex-wakeup/futex.ltl  |    2 +-
>  futex-wakeup/futex.spin |  131 ++++++++++++++++++++++++----------------------
>  3 files changed, 70 insertions(+), 65 deletions(-)
> 
> diff --git a/futex-wakeup/DEFINES b/futex-wakeup/DEFINES
> index e328b55..79a9626 100644
> --- a/futex-wakeup/DEFINES
> +++ b/futex-wakeup/DEFINES
> @@ -1 +1 @@
> -#define queue_has_entry	(queue[0] == 1 || queue[1] == 1)
> +/* no common #defines used yet */
> diff --git a/futex-wakeup/futex.ltl b/futex-wakeup/futex.ltl
> index 3d6842e..8718641 100644
> --- a/futex-wakeup/futex.ltl
> +++ b/futex-wakeup/futex.ltl
> @@ -1 +1 @@
> -([] <> ((!np_) || (!queue_has_entry)))
> +([] <> !np_)
> diff --git a/futex-wakeup/futex.spin b/futex-wakeup/futex.spin
> index 5459a53..c29ade9 100644
> --- a/futex-wakeup/futex.spin
> +++ b/futex-wakeup/futex.spin
> @@ -4,30 +4,34 @@
>   * Algorithm verified :
>   *
>   * queue = 0;
> - * fut = 0;
> - * runvar = 0;
> + * gp_futex = 0;

Can you comment the initial value of "gp" ?

>   *
> - *                            Waker
> - *                            queue = 1;
> - *                            if (fut == -1) {
> - *                              fut = 0;
> + *                          Waker
> + *                          while (1) {
> + *                            queue = gp;

you might want to specify that queue is per-process. Maybe with a:

this.queue   in the comments ?

> + *                            if (gp_futex == -1) {
> + *                              gp_futex = 0;
> + *                              futex_wake = 1;
>   *                            }
> + *                          }
>   *
>   * Waiter
> + * in_registry = 1;
>   * while (1) {
> - *   progress:

Where is your progress noted ?

> - *   fut = fut - 1;
> - *   if (queue == 1) {
> - *     fut = 0;
> + *   gp_futex = -1;
> + *   in_registry &= (queue != gp);
> + *   if (all in_registry == 0) {
> + *     gp_futex = 0;
> + *     gp = !gp;
> + *     restart;
>   *   } else {
> - *     if (fut == -1) {
> - *        while (fut == -1) { }
> - *     }
> + *     futex_wake = (gp_futex == -1 ? 0 : 1);
> + *     while (futex_wake == 0) { }
>   *   }
>   *   queue = 0;
>   * }
>   *
> - * if queue = 1, then !_np
> + * if queue = gp, then !_np

It seems to be just !_np

>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -49,76 +53,77 @@
>  #define get_pid()       (_pid)
>  
>  int queue[2] = 0;
> -int fut = 0;
> +int futex_wake = 0;
> +int gp_futex = 0;
> +int gp = 1;
> +int in_registry[2] = 0;
>  
>  active [2] proctype waker()
>  {
>  	assert(get_pid() < 2);
>  
> -	queue[get_pid()] = 1;
> -
> -	if
> -	:: (fut == -1) ->
> -		fut = 0;
> -	:: else ->
> -		skip;
> -	fi;
> -
> -	queue[get_pid()] = 1;
> -
> -	if
> -	:: (fut == -1) ->
> -		fut = 0;
> -	:: else ->
> -		skip;
> -	fi;
> -
> -#ifdef INJ_QUEUE_NO_WAKE
> -	queue[get_pid()] = 1;
> +	do
> +	:: 1 ->
> +		queue[get_pid()] = gp;
> +	
> +		if
> +		:: (gp_futex == -1) ->
> +			gp_futex = 0;
> +#ifndef INJ_QUEUE_NO_WAKE
> +			futex_wake = 1;
>  #endif
> +		:: else ->
> +			skip;
> +		fi;
> +	od;
>  }
>  
>  
>  active proctype waiter()
>  {
> +restart:
> +	in_registry[0] = 1;
> +	in_registry[1] = 1;
>  	do
>  	:: 1 ->
>  #ifndef INJ_LATE_DEC
> -		fut = fut - 1;
> +		gp_futex = -1;
>  #endif
> +
>  		if
> -		:: (queue[0] == 1 || queue[1] == 1) ->
> -#ifndef INJ_LATE_DEC
> -			fut = 0;
> -#endif
> -			skip;
> +		:: (in_registry[0] && queue[0] == gp) ->
> +			in_registry[0] = 0;
>  		:: else ->
> -#ifdef INJ_LATE_DEC
> -			fut = fut - 1;
> -#endif
> -			if
> -			:: (fut == -1) ->
> -				do
> -				:: 1 ->
> -					if
> -					:: (fut == -1) ->
> -						skip;
> -					:: else ->
> -						break;
> -					fi;
> -				od;
> -			:: else ->
> -				skip;
> -			fi;
> +			skip;
>  		fi;
> -progress:
>  		if
> -		:: queue[0] == 1 ->
> -			queue[0] = 0;
> +		:: (in_registry[1] && queue[1] == gp) ->
> +			in_registry[1] = 0;
> +		:: else ->
> +			skip;
>  		fi;
>  		if
> -		:: queue[1] == 1 ->
> -			queue[1] = 0;
> +		:: (in_registry[0] == 0 && in_registry[1] == 0) ->
> +progress:
> +#ifndef INJ_LATE_DEC
> +			gp_futex = 0;
> +#endif
> +			gp = !gp;

Shouldn't you flip the gp for both steps of synchronize_rcu ? Here, it
looks like you only flip gp after the reader passes through both of the
checks, which defeats the purpose the 2-phase grace period.

> +			goto restart;
> +		:: else ->
> +#ifdef INJ_LATE_DEC
> +			gp_futex = -1;
> +#endif
> +			futex_wake = gp_futex + 1;
> +			do
> +			:: 1 ->
> +				if
> +				:: (futex_wake == 0) ->
> +					skip;
> +				:: else ->
> +					break;
> +				fi;
> +			od;
>  		fi;
>  	od;
>  }
> -- 
> 1.7.6
> 
> 
> From 6187a36ce9fd21f8aa1d9f9942bd890e9f8c504a Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini at redhat.com>
> Date: Sun, 14 Aug 2011 18:07:38 -0700
> Subject: [PATCH 2/3] model optimization of the waker (selective wake)
> 
> This patch adds to the model an optimization, whereby threads are
> told whether they are still being waited on, and skip the futex wakeup
> if not.
> ---
>  futex-wakeup/futex.spin |   43 ++++++++++++++++++++++++++++++++-----------
>  1 files changed, 32 insertions(+), 11 deletions(-)
> 
> diff --git a/futex-wakeup/futex.spin b/futex-wakeup/futex.spin
> index c29ade9..99e0322 100644
> --- a/futex-wakeup/futex.spin
> +++ b/futex-wakeup/futex.spin
> @@ -9,9 +9,11 @@
>   *                          Waker
>   *                          while (1) {
>   *                            queue = gp;
> - *                            if (gp_futex == -1) {
> - *                              gp_futex = 0;
> - *                              futex_wake = 1;
> + *                            if (waiting == 1) {

Here your pseudo-code does not match your promela model. You should add:

                                   this.waiting = 0;

> + *                              if (gp_futex == -1) {
> + *                                gp_futex = 0;
> + *                                futex_wake = 1;
> + *                              }
>   *                            }
>   *                          }
>   *
> @@ -19,6 +21,7 @@
>   * in_registry = 1;
>   * while (1) {
>   *   gp_futex = -1;
> + *   waiting |= (queue != gp);
>   *   in_registry &= (queue != gp);
>   *   if (all in_registry == 0) {
>   *     gp_futex = 0;
> @@ -53,6 +56,7 @@
>  #define get_pid()       (_pid)
>  
>  int queue[2] = 0;
> +int waiting[2] = 0;
>  int futex_wake = 0;
>  int gp_futex = 0;
>  int gp = 1;
> @@ -66,14 +70,18 @@ active [2] proctype waker()
>  	:: 1 ->
>  		queue[get_pid()] = gp;
>  	
> -		if
> -		:: (gp_futex == -1) ->
> -			gp_futex = 0;
> +                if
> +        	:: (waiting[get_pid()] == 1) ->
> +        		waiting[get_pid()] = 0;
> +			if
> +			:: (gp_futex == -1) ->
> +				gp_futex = 0;
>  #ifndef INJ_QUEUE_NO_WAKE
> -			futex_wake = 1;
> +				futex_wake = 1;
>  #endif
> -		:: else ->
> -			skip;
> +			:: else ->
> +				skip;
> +			fi;
>  		fi;
>  	od;
>  }
> @@ -89,19 +97,32 @@ restart:
>  #ifndef INJ_LATE_DEC
>  		gp_futex = -1;
>  #endif
> +		if
> +		:: (in_registry[0] == 1 && queue[0] != gp) ->
> +			waiting[0] = 1;
> +                :: else ->
> +		        skip;
> +		fi;
> +		if
> +		:: (in_registry[1] == 1 && queue[1] != gp) ->
> +			waiting[1] = 1;
> +                :: else ->
> +		        skip;
> +		fi;
>  
>  		if
> -		:: (in_registry[0] && queue[0] == gp) ->
> +		:: (in_registry[0] == 1 && queue[0] == gp) ->
>  			in_registry[0] = 0;
>  		:: else ->
>  			skip;
>  		fi;
>  		if
> -		:: (in_registry[1] && queue[1] == gp) ->
> +		:: (in_registry[1] == 1 && queue[1] == gp) ->


for consistency, you seem to use "in_registry[1]" and "in_registry[1] ==
1" for pretty much the same thing. Is there a reason why they differ ?

Thanks,

Mathieu

>  			in_registry[1] = 0;
>  		:: else ->
>  			skip;
>  		fi;
> +
>  		if
>  		:: (in_registry[0] == 0 && in_registry[1] == 0) ->
>  progress:
> -- 
> 1.7.6
> 
> 
> From a154ac6990211210a048f7b7588b582cb7a9f528 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini at redhat.com>
> Date: Sun, 14 Aug 2011 23:57:59 -0700
> Subject: [PATCH 3/3] more interesting late_dec variant
> 
> This patch changes the futex_progress_late_dec model to trigger a different
> failure, a reordering of the waiting[] assignments vs. the gp_futex
> assignment.
> ---
>  futex-wakeup/futex.spin |    8 +++-----
>  1 files changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/futex-wakeup/futex.spin b/futex-wakeup/futex.spin
> index 99e0322..5029e33 100644
> --- a/futex-wakeup/futex.spin
> +++ b/futex-wakeup/futex.spin
> @@ -109,6 +109,9 @@ restart:
>                  :: else ->
>  		        skip;
>  		fi;
> +#ifdef INJ_LATE_DEC
> +		gp_futex = -1;
> +#endif
>  
>  		if
>  		:: (in_registry[0] == 1 && queue[0] == gp) ->
> @@ -126,15 +129,10 @@ restart:
>  		if
>  		:: (in_registry[0] == 0 && in_registry[1] == 0) ->
>  progress:
> -#ifndef INJ_LATE_DEC
>  			gp_futex = 0;
> -#endif
>  			gp = !gp;
>  			goto restart;
>  		:: else ->
> -#ifdef INJ_LATE_DEC
> -			gp_futex = -1;
> -#endif
>  			futex_wake = gp_futex + 1;
>  			do
>  			:: 1 ->
> -- 
> 1.7.6
> 


-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com




More information about the lttng-dev mailing list