[lttng-dev] [PATCH 5/7] Replace the arch-specific memory barriers with __atomic builtins
Ondřej Surý
ondrej at sury.org
Tue Mar 21 09:31:00 EDT 2023
Instead of a custom code, use the __atomic_thread_fence() builtin to
implement the cmm_mb(), cmm_rmb(), cmm_wmb(), cmm_smp_mb(),
cmm_smp_rmb(), and cmm_smp_wmb() on all architectures, and
cmm_read_barrier_depends() on alpha (otherwise it's still no-op).
family of functions
Signed-off-by: Ondřej Surý <ondrej at sury.org>
---
include/urcu/arch/alpha.h | 6 +++---
include/urcu/arch/arm.h | 14 -------------
include/urcu/arch/generic.h | 6 +++---
include/urcu/arch/mips.h | 6 ------
include/urcu/arch/nios2.h | 2 --
include/urcu/arch/ppc.h | 25 ----------------------
include/urcu/arch/s390.h | 2 --
include/urcu/arch/sparc64.h | 13 ------------
include/urcu/arch/x86.h | 42 +++----------------------------------
9 files changed, 9 insertions(+), 107 deletions(-)
diff --git a/include/urcu/arch/alpha.h b/include/urcu/arch/alpha.h
index dc33e28..61687c7 100644
--- a/include/urcu/arch/alpha.h
+++ b/include/urcu/arch/alpha.h
@@ -29,9 +29,9 @@
extern "C" {
#endif
-#define cmm_mb() __asm__ __volatile__ ("mb":::"memory")
-#define cmm_wmb() __asm__ __volatile__ ("wmb":::"memory")
-#define cmm_read_barrier_depends() __asm__ __volatile__ ("mb":::"memory")
+#ifndef cmm_read_barrier_depends
+#define cmm_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME)
+#endif
/*
* On Linux, define the membarrier system call number if not yet available in
diff --git a/include/urcu/arch/arm.h b/include/urcu/arch/arm.h
index 54ca4fa..b3671dc 100644
--- a/include/urcu/arch/arm.h
+++ b/include/urcu/arch/arm.h
@@ -39,20 +39,6 @@ extern "C" {
/* For backwards compat. */
#define CONFIG_RCU_ARM_HAVE_DMB 1
-/*
- * Issues full system DMB operation.
- */
-#define cmm_mb() __asm__ __volatile__ ("dmb sy":::"memory")
-#define cmm_rmb() __asm__ __volatile__ ("dmb sy":::"memory")
-#define cmm_wmb() __asm__ __volatile__ ("dmb sy":::"memory")
-
-/*
- * Issues DMB operation only to the inner shareable domain.
- */
-#define cmm_smp_mb() __asm__ __volatile__ ("dmb ish":::"memory")
-#define cmm_smp_rmb() __asm__ __volatile__ ("dmb ish":::"memory")
-#define cmm_smp_wmb() __asm__ __volatile__ ("dmb ish":::"memory")
-
#endif /* URCU_ARCH_ARMV7 */
#include <stdlib.h>
diff --git a/include/urcu/arch/generic.h b/include/urcu/arch/generic.h
index be6e41e..2715162 100644
--- a/include/urcu/arch/generic.h
+++ b/include/urcu/arch/generic.h
@@ -44,15 +44,15 @@ extern "C" {
*/
#ifndef cmm_mb
-#define cmm_mb() __sync_synchronize()
+#define cmm_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
#endif
#ifndef cmm_rmb
-#define cmm_rmb() cmm_mb()
+#define cmm_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
#endif
#ifndef cmm_wmb
-#define cmm_wmb() cmm_mb()
+#define cmm_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
#endif
#define cmm_mc() cmm_barrier()
diff --git a/include/urcu/arch/mips.h b/include/urcu/arch/mips.h
index ea5b7e9..ffe65c0 100644
--- a/include/urcu/arch/mips.h
+++ b/include/urcu/arch/mips.h
@@ -30,12 +30,6 @@
extern "C" {
#endif
-#define cmm_mb() __asm__ __volatile__ ( \
- " .set mips2 \n" \
- " sync \n" \
- " .set mips0 \n" \
- :::"memory")
-
#ifdef __cplusplus
}
#endif
diff --git a/include/urcu/arch/nios2.h b/include/urcu/arch/nios2.h
index b4f3e50..cd6bdb8 100644
--- a/include/urcu/arch/nios2.h
+++ b/include/urcu/arch/nios2.h
@@ -29,8 +29,6 @@
extern "C" {
#endif
-#define cmm_mb() cmm_barrier()
-
#ifdef __cplusplus
}
#endif
diff --git a/include/urcu/arch/ppc.h b/include/urcu/arch/ppc.h
index 791529e..618f79c 100644
--- a/include/urcu/arch/ppc.h
+++ b/include/urcu/arch/ppc.h
@@ -34,31 +34,6 @@ extern "C" {
/* Include size of POWER5+ L3 cache lines: 256 bytes */
#define CAA_CACHE_LINE_SIZE 256
-#ifdef __NO_LWSYNC__
-#define LWSYNC_OPCODE "sync\n"
-#else
-#define LWSYNC_OPCODE "lwsync\n"
-#endif
-
-/*
- * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
- * preserve ordering of cacheable vs. non-cacheable accesses, so it
- * should not be used to order with respect to MMIO operations. An
- * eieio+lwsync pair is also not enough for cmm_rmb, because it will
- * order cacheable and non-cacheable memory operations separately---i.e.
- * not the latter against the former.
- */
-#define cmm_mb() __asm__ __volatile__ ("sync":::"memory")
-
-/*
- * lwsync orders loads in cacheable memory with respect to other loads,
- * and stores in cacheable memory with respect to other stores.
- * Therefore, use it for barriers ordering accesses to cacheable memory
- * only.
- */
-#define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
-#define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
-
#define mftbl() \
__extension__ \
({ \
diff --git a/include/urcu/arch/s390.h b/include/urcu/arch/s390.h
index 67461b4..c22fdf9 100644
--- a/include/urcu/arch/s390.h
+++ b/include/urcu/arch/s390.h
@@ -39,8 +39,6 @@ extern "C" {
#define CAA_CACHE_LINE_SIZE 128
-#define cmm_mb() __asm__ __volatile__("bcr 15,0" : : : "memory")
-
#define HAS_CAA_GET_CYCLES
typedef uint64_t caa_cycles_t;
diff --git a/include/urcu/arch/sparc64.h b/include/urcu/arch/sparc64.h
index 1ff40f5..b4e25ca 100644
--- a/include/urcu/arch/sparc64.h
+++ b/include/urcu/arch/sparc64.h
@@ -40,19 +40,6 @@ extern "C" {
#define CAA_CACHE_LINE_SIZE 256
-/*
- * Inspired from the Linux kernel. Workaround Spitfire bug #51.
- */
-#define membar_safe(type) \
-__asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
- "membar " type "\n" \
- "1:\n" \
- : : : "memory")
-
-#define cmm_mb() membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define cmm_rmb() membar_safe("#LoadLoad")
-#define cmm_wmb() membar_safe("#StoreStore")
-
#ifdef __cplusplus
}
#endif
diff --git a/include/urcu/arch/x86.h b/include/urcu/arch/x86.h
index 744f9f9..af4487d 100644
--- a/include/urcu/arch/x86.h
+++ b/include/urcu/arch/x86.h
@@ -46,44 +46,8 @@ extern "C" {
/* For backwards compat */
#define CONFIG_RCU_HAVE_FENCE 1
-#define cmm_mb() __asm__ __volatile__ ("mfence":::"memory")
-
-/*
- * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when
- * using SSE or working with I/O areas. cmm_smp_rmb/cmm_smp_wmb are
- * only compiler barriers, which is enough for general use.
- */
-#define cmm_rmb() __asm__ __volatile__ ("lfence":::"memory")
-#define cmm_wmb() __asm__ __volatile__ ("sfence"::: "memory")
-#define cmm_smp_rmb() cmm_barrier()
-#define cmm_smp_wmb() cmm_barrier()
-
-#else
-
-/*
- * We leave smp_rmb/smp_wmb as full barriers for processors that do not have
- * fence instructions.
- *
- * An empty cmm_smp_rmb() may not be enough on old PentiumPro multiprocessor
- * systems, due to an erratum. The Linux kernel says that "Even distro
- * kernels should think twice before enabling this", but for now let's
- * be conservative and leave the full barrier on 32-bit processors. Also,
- * IDT WinChip supports weak store ordering, and the kernel may enable it
- * under our feet; cmm_smp_wmb() ceases to be a nop for these processors.
- */
-#if (CAA_BITS_PER_LONG == 32)
-#define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
-#define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
-#define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
-#else
-#define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
-#define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
-#define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
-#endif
#endif
-#define caa_cpu_relax() __asm__ __volatile__ ("rep; nop" : : : "memory")
-
#define HAS_CAA_GET_CYCLES
#define rdtscll(val) \
@@ -98,10 +62,10 @@ typedef uint64_t caa_cycles_t;
static inline caa_cycles_t caa_get_cycles(void)
{
- caa_cycles_t ret = 0;
+ caa_cycles_t ret = 0;
- rdtscll(ret);
- return ret;
+ rdtscll(ret);
+ return ret;
}
/*
--
2.39.2
More information about the lttng-dev
mailing list