[lttng-dev] [PATCH v2 07/12] tests: Use uatomic for accessing global states
Paul E. McKenney
paulmck at kernel.org
Wed Jun 21 19:37:17 EDT 2023
On Wed, Jun 07, 2023 at 02:53:54PM -0400, Olivier Dion wrote:
> Global states accesses were protected via memory barriers. Use the
> uatomic API with the CMM memory model so that TSAN does not warns about
"does not warn", for whatever that is worth.
> none atomic concurrent accesses.
>
> Also, the thread id map mutex must be unlocked after setting the new
> created thread id in the map. Otherwise, the new thread could observe an
> unset id.
>
> Change-Id: I1ecdc387b3f510621cbc116ad3b95c676f5d659a
> Co-authored-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
> Signed-off-by: Olivier Dion <odion at efficios.com>
> ---
> tests/common/api.h | 12 ++--
> tests/regression/rcutorture.h | 106 +++++++++++++++++++++++-----------
> 2 files changed, 80 insertions(+), 38 deletions(-)
>
> diff --git a/tests/common/api.h b/tests/common/api.h
> index a260463..9d22b0f 100644
> --- a/tests/common/api.h
> +++ b/tests/common/api.h
> @@ -26,6 +26,7 @@
>
> #include <urcu/compiler.h>
> #include <urcu/arch.h>
> +#include <urcu/uatomic.h>
>
> /*
> * Machine parameters.
> @@ -135,7 +136,7 @@ static int __smp_thread_id(void)
> thread_id_t tid = pthread_self();
>
> for (i = 0; i < NR_THREADS; i++) {
> - if (__thread_id_map[i] == tid) {
> + if (uatomic_read(&__thread_id_map[i]) == tid) {
> long v = i + 1; /* must be non-NULL. */
>
> if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
> @@ -184,12 +185,13 @@ static thread_id_t create_thread(void *(*func)(void *), void *arg)
> exit(-1);
> }
> __thread_id_map[i] = __THREAD_ID_MAP_WAITING;
> - spin_unlock(&__thread_id_map_mutex);
> +
> if (pthread_create(&tid, NULL, func, arg) != 0) {
> perror("create_thread:pthread_create");
> exit(-1);
> }
> - __thread_id_map[i] = tid;
> + uatomic_set(&__thread_id_map[i], tid);
> + spin_unlock(&__thread_id_map_mutex);
> return tid;
> }
>
> @@ -199,7 +201,7 @@ static void *wait_thread(thread_id_t tid)
> void *vp;
>
> for (i = 0; i < NR_THREADS; i++) {
> - if (__thread_id_map[i] == tid)
> + if (uatomic_read(&__thread_id_map[i]) == tid)
> break;
> }
> if (i >= NR_THREADS){
> @@ -211,7 +213,7 @@ static void *wait_thread(thread_id_t tid)
> perror("wait_thread:pthread_join");
> exit(-1);
> }
> - __thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
> + uatomic_set(&__thread_id_map[i], __THREAD_ID_MAP_EMPTY);
> return vp;
> }
>
> diff --git a/tests/regression/rcutorture.h b/tests/regression/rcutorture.h
> index bc394f9..5835b8f 100644
> --- a/tests/regression/rcutorture.h
> +++ b/tests/regression/rcutorture.h
> @@ -44,6 +44,14 @@
> * data. A correct RCU implementation will have all but the first two
> * numbers non-zero.
> *
> + * rcu_stress_count: Histogram of "ages" of structures seen by readers. If any
> + * entries past the first two are non-zero, RCU is broken. The age of a newly
> + * allocated structure is zero, it becomes one when removed from reader
> + * visibility, and is incremented once per grace period subsequently -- and is
> + * freed after passing through (RCU_STRESS_PIPE_LEN-2) grace periods. Since
> + * this tests only has one true writer (there are fake writers), only buckets at
> + * indexes 0 and 1 should be none-zero.
> + *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> * the Free Software Foundation; either version 2 of the License, or
> @@ -68,6 +76,8 @@
> #include <stdlib.h>
> #include "tap.h"
>
> +#include <urcu/uatomic.h>
> +
> #define NR_TESTS 1
>
> DEFINE_PER_THREAD(long long, n_reads_pt);
> @@ -145,10 +155,10 @@ void *rcu_read_perf_test(void *arg)
> run_on(me);
> uatomic_inc(&nthreadsrunning);
> put_thread_offline();
> - while (goflag == GOFLAG_INIT)
> + while (uatomic_read(&goflag) == GOFLAG_INIT)
> (void) poll(NULL, 0, 1);
> put_thread_online();
> - while (goflag == GOFLAG_RUN) {
> + while (uatomic_read(&goflag) == GOFLAG_RUN) {
> for (i = 0; i < RCU_READ_RUN; i++) {
> rcu_read_lock();
> /* rcu_read_lock_nest(); */
> @@ -180,9 +190,9 @@ void *rcu_update_perf_test(void *arg __attribute__((unused)))
> }
> }
> uatomic_inc(&nthreadsrunning);
> - while (goflag == GOFLAG_INIT)
> + while (uatomic_read(&goflag) == GOFLAG_INIT)
> (void) poll(NULL, 0, 1);
> - while (goflag == GOFLAG_RUN) {
> + while (uatomic_read(&goflag) == GOFLAG_RUN) {
> synchronize_rcu();
> n_updates_local++;
> }
> @@ -211,15 +221,11 @@ int perftestrun(int nthreads, int nreaders, int nupdaters)
> int t;
> int duration = 1;
>
> - cmm_smp_mb();
> while (uatomic_read(&nthreadsrunning) < nthreads)
> (void) poll(NULL, 0, 1);
> - goflag = GOFLAG_RUN;
> - cmm_smp_mb();
> + uatomic_set(&goflag, GOFLAG_RUN);
> sleep(duration);
The theory here being that the context switches implied by the sleep()
make the memory barrier unnecesary? Not unreasonable, I guess. ;-)
> - cmm_smp_mb();
> - goflag = GOFLAG_STOP;
> - cmm_smp_mb();
> + uatomic_set(&goflag, GOFLAG_STOP);
> wait_all_threads();
> for_each_thread(t) {
> n_reads += per_thread(n_reads_pt, t);
> @@ -300,6 +306,13 @@ struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0, 0 } };
> struct rcu_stress *rcu_stress_current;
> int rcu_stress_idx = 0;
>
> +/*
> + * How many time a reader has seen something that should not be visible. It is
> + * an error if this value is different than zero at the end of the stress test.
> + *
> + * Here, the something that should not be visibile is an old pipe that has been
> + * freed (mbtest = 0).
> + */
> int n_mberror = 0;
> DEFINE_PER_THREAD(long long [RCU_STRESS_PIPE_LEN + 1], rcu_stress_count);
>
> @@ -315,19 +328,25 @@ void *rcu_read_stress_test(void *arg __attribute__((unused)))
>
> rcu_register_thread();
> put_thread_offline();
> - while (goflag == GOFLAG_INIT)
> + while (uatomic_read(&goflag) == GOFLAG_INIT)
> (void) poll(NULL, 0, 1);
> put_thread_online();
> - while (goflag == GOFLAG_RUN) {
> + while (uatomic_read(&goflag) == GOFLAG_RUN) {
> rcu_read_lock();
> p = rcu_dereference(rcu_stress_current);
> if (p->mbtest == 0)
> - n_mberror++;
> + uatomic_inc_mo(&n_mberror, CMM_RELAXED);
> rcu_read_lock_nest();
> + /*
> + * The value of garbage is nothing important. This is
> + * essentially a busy loop. The atomic operation -- while not
> + * important here -- helps tools such as TSAN to not flag this
> + * as a race condition.
> + */
> for (i = 0; i < 100; i++)
> - garbage++;
> + uatomic_inc(&garbage);
> rcu_read_unlock_nest();
> - pc = p->pipe_count;
> + pc = uatomic_read(&p->pipe_count);
> rcu_read_unlock();
> if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0))
> pc = RCU_STRESS_PIPE_LEN;
> @@ -397,26 +416,47 @@ static
> void *rcu_update_stress_test(void *arg __attribute__((unused)))
> {
> int i;
> - struct rcu_stress *p;
> + struct rcu_stress *p, *old_p;
> struct rcu_head rh;
> enum writer_state writer_state = WRITER_STATE_SYNC_RCU;
>
> - while (goflag == GOFLAG_INIT)
> + rcu_register_thread();
> +
> + put_thread_offline();
> + while (uatomic_read(&goflag) == GOFLAG_INIT)
> (void) poll(NULL, 0, 1);
> - while (goflag == GOFLAG_RUN) {
> +
> + put_thread_online();
> + while (uatomic_read(&goflag) == GOFLAG_RUN) {
> i = rcu_stress_idx + 1;
> if (i >= RCU_STRESS_PIPE_LEN)
> i = 0;
> + /*
> + * Get old pipe that we free after a synchronize_rcu().
> + */
> + rcu_read_lock();
> + old_p = rcu_dereference(rcu_stress_current);
> + rcu_read_unlock();
> +
> + /*
> + * Allocate a new pipe.
> + */
> p = &rcu_stress_array[i];
> - p->mbtest = 0;
> - cmm_smp_mb();
> p->pipe_count = 0;
> p->mbtest = 1;
> +
> rcu_assign_pointer(rcu_stress_current, p);
> rcu_stress_idx = i;
> +
> + /*
> + * Increment every pipe except the freshly allocated one. A
> + * reader should only see either the old pipe or the new
> + * pipe. This is reflected in the rcu_stress_count histogram.
> + */
> for (i = 0; i < RCU_STRESS_PIPE_LEN; i++)
> if (i != rcu_stress_idx)
> - rcu_stress_array[i].pipe_count++;
> + uatomic_inc(&rcu_stress_array[i].pipe_count);
> +
> switch (writer_state) {
> case WRITER_STATE_SYNC_RCU:
> synchronize_rcu();
> @@ -432,9 +472,7 @@ void *rcu_update_stress_test(void *arg __attribute__((unused)))
> strerror(errno));
> abort();
> }
> - rcu_register_thread();
> call_rcu(&rh, rcu_update_stress_test_rcu);
> - rcu_unregister_thread();
> /*
> * Our MacOS X test machine with the following
> * config:
> @@ -470,18 +508,24 @@ void *rcu_update_stress_test(void *arg __attribute__((unused)))
> {
> struct urcu_gp_poll_state poll_state;
>
> - rcu_register_thread();
> poll_state = start_poll_synchronize_rcu();
> - rcu_unregister_thread();
> while (!poll_state_synchronize_rcu(poll_state))
> (void) poll(NULL, 0, 1); /* Wait for 1ms */
> break;
> }
> }
> + /*
> + * No readers should see that old pipe now. Setting mbtest to 0
> + * to mark it as "freed".
> + */
> + old_p->mbtest = 0;
> n_updates++;
> advance_writer_state(&writer_state);
> }
>
> + put_thread_offline();
> + rcu_unregister_thread();
> +
> return NULL;
> }
>
> @@ -497,9 +541,9 @@ void *rcu_fake_update_stress_test(void *arg __attribute__((unused)))
> set_thread_call_rcu_data(crdp);
> }
> }
> - while (goflag == GOFLAG_INIT)
> + while (uatomic_read(&goflag) == GOFLAG_INIT)
> (void) poll(NULL, 0, 1);
> - while (goflag == GOFLAG_RUN) {
> + while (uatomic_read(&goflag) == GOFLAG_RUN) {
> synchronize_rcu();
> (void) poll(NULL, 0, 1);
> }
> @@ -535,13 +579,9 @@ int stresstest(int nreaders)
> create_thread(rcu_update_stress_test, NULL);
> for (i = 0; i < 5; i++)
> create_thread(rcu_fake_update_stress_test, NULL);
> - cmm_smp_mb();
> - goflag = GOFLAG_RUN;
> - cmm_smp_mb();
> + uatomic_set(&goflag, GOFLAG_RUN);
> sleep(10);
> - cmm_smp_mb();
> - goflag = GOFLAG_STOP;
> - cmm_smp_mb();
> + uatomic_set(&goflag, GOFLAG_STOP);
> wait_all_threads();
> for_each_thread(t)
> n_reads += per_thread(n_reads_pt, t);
Looks plausible!
Thanx, Paul
More information about the lttng-dev
mailing list