[lttng-dev] [PATCH 4/7] Replace the internal pointer manipulation with __atomic builtins

Ondřej Surý ondrej at sury.org
Fri Mar 17 17:37:52 EDT 2023


Instead of custom code, use the __atomic builtins to implement the
rcu_dereference(), rcu_cmpxchg_pointer(), rcu_xchg_pointer() and
rcu_assign_pointer().

Signed-off-by: Ondřej Surý <ondrej at sury.org>
---
 include/urcu/arch.h           | 20 +++++++++
 include/urcu/arch/alpha.h     |  6 +++
 include/urcu/arch/arm.h       | 12 ++++++
 include/urcu/arch/mips.h      |  2 +
 include/urcu/arch/nios2.h     |  2 +
 include/urcu/arch/ppc.h       |  6 +++
 include/urcu/arch/s390.h      |  2 +
 include/urcu/arch/sparc64.h   |  6 +++
 include/urcu/arch/x86.h       | 20 +++++++++
 include/urcu/static/pointer.h | 77 +++++++----------------------------
 10 files changed, 90 insertions(+), 63 deletions(-)

diff --git a/include/urcu/arch.h b/include/urcu/arch.h
index d3914da..aec6fa1 100644
--- a/include/urcu/arch.h
+++ b/include/urcu/arch.h
@@ -21,6 +21,26 @@
 #ifndef _URCU_ARCH_H
 #define _URCU_ARCH_H
 
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif /* if !defined(__has_feature) */
+
+/* GCC defines __SANITIZE_ADDRESS__, so reuse the macro for clang */
+#if __has_feature(address_sanitizer)
+#define __SANITIZE_ADDRESS__ 1
+#endif /* if __has_feature(address_sanitizer) */
+
+#ifdef __SANITIZE_THREAD__
+/* FIXME: Somebody who understands the barriers should look into this */
+#define cmm_mb() __atomic_thread_fence(__ATOMIC_ACQ_REL)
+#define cmm_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#define cmm_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
+#define cmm_smp_mb() __atomic_thread_fence(__ATOMIC_ACQ_REL)
+#define cmm_smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#define cmm_smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
+#define cmm_read_barrier_depends() __atomic_thread_fence(__ATOMIC_ACQ_REL)
+#endif
+
 /*
  * Architecture detection using compiler defines.
  *
diff --git a/include/urcu/arch/alpha.h b/include/urcu/arch/alpha.h
index dc33e28..84526ef 100644
--- a/include/urcu/arch/alpha.h
+++ b/include/urcu/arch/alpha.h
@@ -29,9 +29,15 @@
 extern "C" {
 #endif
 
+#ifndef cmm_mb
 #define cmm_mb()			__asm__ __volatile__ ("mb":::"memory")
+#endif
+#ifndef cmm_wmb
 #define cmm_wmb()			__asm__ __volatile__ ("wmb":::"memory")
+#endif
+#ifndef cmm_read_barrier_depends
 #define cmm_read_barrier_depends()	__asm__ __volatile__ ("mb":::"memory")
+#endif
 
 /*
  * On Linux, define the membarrier system call number if not yet available in
diff --git a/include/urcu/arch/arm.h b/include/urcu/arch/arm.h
index 54ca4fa..4950e13 100644
--- a/include/urcu/arch/arm.h
+++ b/include/urcu/arch/arm.h
@@ -42,16 +42,28 @@ extern "C" {
 /*
  * Issues full system DMB operation.
  */
+#ifndef cmm_mb
 #define cmm_mb()	__asm__ __volatile__ ("dmb sy":::"memory")
+#endif
+#ifndef cmm_rmb
 #define cmm_rmb()	__asm__ __volatile__ ("dmb sy":::"memory")
+#endif
+#ifndef cmm_wmb
 #define cmm_wmb()	__asm__ __volatile__ ("dmb sy":::"memory")
+#endif
 
 /*
  * Issues DMB operation only to the inner shareable domain.
  */
+#ifndef cmm_smp_mb
 #define cmm_smp_mb()	__asm__ __volatile__ ("dmb ish":::"memory")
+#endif
+#ifndef cmm_smp_rmb
 #define cmm_smp_rmb()	__asm__ __volatile__ ("dmb ish":::"memory")
+#endif
+#ifndef cmm_smp_wmb
 #define cmm_smp_wmb()	__asm__ __volatile__ ("dmb ish":::"memory")
+#endif
 
 #endif /* URCU_ARCH_ARMV7 */
 
diff --git a/include/urcu/arch/mips.h b/include/urcu/arch/mips.h
index ea5b7e9..b9ee021 100644
--- a/include/urcu/arch/mips.h
+++ b/include/urcu/arch/mips.h
@@ -30,11 +30,13 @@
 extern "C" {
 #endif
 
+#ifndef cmm_mb
 #define cmm_mb()			__asm__ __volatile__ (		    \
 					"	.set	mips2		\n" \
 					"	sync			\n" \
 					"	.set	mips0		\n" \
 					:::"memory")
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/include/urcu/arch/nios2.h b/include/urcu/arch/nios2.h
index b4f3e50..5def45c 100644
--- a/include/urcu/arch/nios2.h
+++ b/include/urcu/arch/nios2.h
@@ -29,7 +29,9 @@
 extern "C" {
 #endif
 
+#ifndef cmm_mb
 #define cmm_mb()	cmm_barrier()
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/include/urcu/arch/ppc.h b/include/urcu/arch/ppc.h
index 791529e..b8ec40d 100644
--- a/include/urcu/arch/ppc.h
+++ b/include/urcu/arch/ppc.h
@@ -48,7 +48,9 @@ extern "C" {
  * order cacheable and non-cacheable memory operations separately---i.e.
  * not the latter against the former.
  */
+#ifndef cmm_mb
 #define cmm_mb()         __asm__ __volatile__ ("sync":::"memory")
+#endif
 
 /*
  * lwsync orders loads in cacheable memory with respect to other loads,
@@ -56,8 +58,12 @@ extern "C" {
  * Therefore, use it for barriers ordering accesses to cacheable memory
  * only.
  */
+#ifndef cmm_smp_rmb
 #define cmm_smp_rmb()    __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
+#endif
+#ifndef cmm_smp_rmb
 #define cmm_smp_wmb()    __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
+#endif
 
 #define mftbl()						\
 	__extension__					\
diff --git a/include/urcu/arch/s390.h b/include/urcu/arch/s390.h
index 67461b4..2733873 100644
--- a/include/urcu/arch/s390.h
+++ b/include/urcu/arch/s390.h
@@ -39,7 +39,9 @@ extern "C" {
 
 #define CAA_CACHE_LINE_SIZE	128
 
+#ifndef cmm_mb
 #define cmm_mb()    __asm__ __volatile__("bcr 15,0" : : : "memory")
+#endif
 
 #define HAS_CAA_GET_CYCLES
 
diff --git a/include/urcu/arch/sparc64.h b/include/urcu/arch/sparc64.h
index 1ff40f5..32a6b0e 100644
--- a/include/urcu/arch/sparc64.h
+++ b/include/urcu/arch/sparc64.h
@@ -49,9 +49,15 @@ __asm__ __volatile__("ba,pt %%xcc, 1f\n\t"	\
 		     "1:\n"			\
 		     : : : "memory")
 
+#ifndef cmm_mb
 #define cmm_mb()	membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
+#endif
+#ifndef cmm_rmb
 #define cmm_rmb()	membar_safe("#LoadLoad")
+#endif
+#ifndef cmm_wmb
 #define cmm_wmb()	membar_safe("#StoreStore")
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/include/urcu/arch/x86.h b/include/urcu/arch/x86.h
index 744f9f9..6be9d38 100644
--- a/include/urcu/arch/x86.h
+++ b/include/urcu/arch/x86.h
@@ -46,15 +46,23 @@ extern "C" {
 /* For backwards compat */
 #define CONFIG_RCU_HAVE_FENCE 1
 
+#ifndef cmm_mb
 #define cmm_mb()    __asm__ __volatile__ ("mfence":::"memory")
+#endif
 
 /*
  * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when
  * using SSE or working with I/O areas.  cmm_smp_rmb/cmm_smp_wmb are
  * only compiler barriers, which is enough for general use.
  */
+#ifndef cmm_rmb
 #define cmm_rmb()     __asm__ __volatile__ ("lfence":::"memory")
+#endif
+
+#ifndef cmm_wmb
 #define cmm_wmb()     __asm__ __volatile__ ("sfence"::: "memory")
+#endif
+
 #define cmm_smp_rmb() cmm_barrier()
 #define cmm_smp_wmb() cmm_barrier()
 
@@ -72,15 +80,27 @@ extern "C" {
  * under our feet; cmm_smp_wmb() ceases to be a nop for these processors.
  */
 #if (CAA_BITS_PER_LONG == 32)
+#ifndef cmm_mb
 #define cmm_mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
+#endif
+#ifndef cmm_rmb
 #define cmm_rmb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
+#endif
+#ifndef cmm_wmb
 #define cmm_wmb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
+#endif
 #else
+#ifndef cmm_mb
 #define cmm_mb()    __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
+#endif
+#ifndef cmm_rmb
 #define cmm_rmb()    __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
+#endif
+#ifndef cmm_wmb
 #define cmm_wmb()    __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
 #endif
 #endif
+#endif
 
 #define caa_cpu_relax()	__asm__ __volatile__ ("rep; nop" : : : "memory")
 
diff --git a/include/urcu/static/pointer.h b/include/urcu/static/pointer.h
index 9e46a57..3f116f3 100644
--- a/include/urcu/static/pointer.h
+++ b/include/urcu/static/pointer.h
@@ -38,6 +38,8 @@
 extern "C" {
 #endif
 
+#define _rcu_get_pointer(addr) __atomic_load_n(addr, __ATOMIC_CONSUME)
+
 /**
  * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable
  * into a RCU read-side critical section. The pointer can later be safely
@@ -49,14 +51,6 @@ extern "C" {
  * Inserts memory barriers on architectures that require them (currently only
  * Alpha) and documents which pointers are protected by RCU.
  *
- * With C standards prior to C11/C++11, the compiler memory barrier in
- * CMM_LOAD_SHARED() ensures that value-speculative optimizations (e.g.
- * VSS: Value Speculation Scheduling) does not perform the data read
- * before the pointer read by speculating the value of the pointer.
- * Correct ordering is ensured because the pointer is read as a volatile
- * access. This acts as a global side-effect operation, which forbids
- * reordering of dependent memory operations.
- *
  * With C standards C11/C++11, concerns about dependency-breaking
  * optimizations are taken care of by the "memory_order_consume" atomic
  * load.
@@ -65,10 +59,6 @@ extern "C" {
  * explicit because the pointer used as input argument is a pointer,
  * not an _Atomic type as required by C11/C++11.
  *
- * By defining URCU_DEREFERENCE_USE_VOLATILE, the user requires use of
- * volatile access to implement rcu_dereference rather than
- * memory_order_consume load from the C11/C++11 standards.
- *
  * This may improve performance on weakly-ordered architectures where
  * the compiler implements memory_order_consume as a
  * memory_order_acquire, which is stricter than required by the
@@ -83,35 +73,7 @@ extern "C" {
  * meets the 10-line criterion in LGPL, allowing this function to be
  * expanded directly in non-LGPL code.
  */
-
-#if !defined (URCU_DEREFERENCE_USE_VOLATILE) &&		\
-	((defined (__cplusplus) && __cplusplus >= 201103L) ||	\
-	(defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L))
-# define __URCU_DEREFERENCE_USE_ATOMIC_CONSUME
-#endif
-
-/*
- * If p is const (the pointer itself, not what it points to), using
- * __typeof__(p) would declare a const variable, leading to
- * -Wincompatible-pointer-types errors.  Using the statement expression
- * makes it an rvalue and gets rid of the const-ness.
- */
-#ifdef __URCU_DEREFERENCE_USE_ATOMIC_CONSUME
-# define _rcu_dereference(p) __extension__ ({						\
-				__typeof__(__extension__ ({				\
-					__typeof__(p) __attribute__((unused)) _________p0 = { 0 }; \
-					_________p0;					\
-				})) _________p1;					\
-				__atomic_load(&(p), &_________p1, __ATOMIC_CONSUME);	\
-				(_________p1);						\
-			})
-#else
-# define _rcu_dereference(p) __extension__ ({						\
-				__typeof__(p) _________p1 = CMM_LOAD_SHARED(p);		\
-				cmm_smp_read_barrier_depends();				\
-				(_________p1);						\
-			})
-#endif
+#define _rcu_dereference(p) _rcu_get_pointer(&(p))
 
 /**
  * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
@@ -126,12 +88,12 @@ extern "C" {
  * meets the 10-line criterion in LGPL, allowing this function to be
  * expanded directly in non-LGPL code.
  */
-#define _rcu_cmpxchg_pointer(p, old, _new)				\
-	__extension__							\
-	({								\
-		__typeof__(*p) _________pold = (old);			\
-		__typeof__(*p) _________pnew = (_new);			\
-		uatomic_cmpxchg(p, _________pold, _________pnew);	\
+#define _rcu_cmpxchg_pointer(p, old, _new)						\
+	({										\
+		__typeof__(*(p)) __old = old;						\
+		__atomic_compare_exchange_n(p, &__old, _new, 0,				\
+					    __ATOMIC_ACQ_REL, __ATOMIC_CONSUME);	\
+		__old;									\
 	})
 
 /**
@@ -145,22 +107,11 @@ extern "C" {
  * meets the 10-line criterion in LGPL, allowing this function to be
  * expanded directly in non-LGPL code.
  */
-#define _rcu_xchg_pointer(p, v)				\
-	__extension__					\
-	({						\
-		__typeof__(*p) _________pv = (v);	\
-		uatomic_xchg(p, _________pv);		\
-	})
-
+#define _rcu_xchg_pointer(p, v) \
+	__atomic_exchange_n(p, v, __ATOMIC_ACQ_REL)
 
-#define _rcu_set_pointer(p, v)				\
-	do {						\
-		__typeof__(*p) _________pv = (v);	\
-		if (!__builtin_constant_p(v) || 	\
-		    ((v) != NULL))			\
-			cmm_wmb();				\
-		uatomic_set(p, _________pv);		\
-	} while (0)
+#define _rcu_set_pointer(p, v) \
+	__atomic_store_n(p, v, __ATOMIC_RELEASE)
 
 /**
  * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure
@@ -178,7 +129,7 @@ extern "C" {
  * meets the 10-line criterion in LGPL, allowing this function to be
  * expanded directly in non-LGPL code.
  */
-#define _rcu_assign_pointer(p, v)	_rcu_set_pointer(&(p), v)
+#define _rcu_assign_pointer(p, v) rcu_set_pointer(&(p), v)
 
 #ifdef __cplusplus
 }
-- 
2.39.2



More information about the lttng-dev mailing list