[ltt-dev] [URCU PATCH 2/3] Rename all arch primitives with prefix caa_

Thu Nov 18 15:31:35 EST 2010

This is the second patch for reducing namespace pollution.
The caa_ prefix stands for Concurrent Architecture Abstraction.
Again, suggested by Mathieu Desnoyers and Paul E. Mckenney.

Every define, macro and function specific to the architecture
abstraction of liburcu is modified with that prefix

Signed-off-by: David Goulet <david.goulet at polymtl.ca>
---
 tests/api_gcc.h                   |   16 ++++++++--------
 tests/api_ppc.h                   |   16 ++++++++--------
 tests/api_x86.h                   |   16 ++++++++--------
 tests/rcutorture.h                |    2 +-
 tests/test_cycles_per_loop.c      |    6 +++---
 tests/test_looplen.c              |    6 +++---
 tests/test_mutex.c                |    6 +++---
 tests/test_perthreadlock.c        |    8 ++++----
 tests/test_perthreadlock_timing.c |   14 +++++++-------
 tests/test_qsbr.c                 |    2 +-
 tests/test_qsbr_gc.c              |    8 ++++----
 tests/test_qsbr_timing.c          |   12 ++++++------
 tests/test_rwlock.c               |    2 +-
 tests/test_rwlock_timing.c        |   12 ++++++------
 tests/test_urcu.c                 |    2 +-
 tests/test_urcu_assign.c          |    2 +-
 tests/test_urcu_bp.c              |    2 +-
 tests/test_urcu_defer.c           |    4 ++--
 tests/test_urcu_gc.c              |    8 ++++----
 tests/test_urcu_lfq.c             |    6 +++---
 tests/test_urcu_lfs.c             |    2 +-
 tests/test_urcu_timing.c          |   12 ++++++------
 tests/test_urcu_wfq.c             |    2 +-
 tests/test_urcu_wfs.c             |    2 +-
 urcu-bp-static.h                  |   10 +++++-----
 urcu-bp.c                         |    6 +++---
 urcu-defer.c                      |   32 ++++++++++++++++----------------
 urcu-pointer-static.h             |    4 ++--
 urcu-qsbr-static.h                |   10 +++++-----
 urcu-qsbr.c                       |   14 +++++++-------
 urcu-static.h                     |   12 ++++++------
 urcu.c                            |   16 ++++++++--------
 urcu/arch_generic.h               |    8 ++++----
 urcu/arch_ppc.h                   |    2 +-
 urcu/arch_s390.h                  |    2 +-
 urcu/arch_sparc64.h               |    2 +-
 urcu/arch_x86.h                   |    6 +++---
 urcu/compiler.h                   |   14 +++++++-------
 urcu/system.h                     |   12 ++++++------
 urcu/uatomic_arch_x86.h           |    2 +-
 urcu/uatomic_generic.h            |    4 ++--
 urcu/wfqueue-static.h             |    8 ++++----
 urcu/wfstack-static.h             |    8 ++++----
 43 files changed, 170 insertions(+), 170 deletions(-)

diff --git a/tests/api_gcc.h b/tests/api_gcc.h
index b23110d..632bdd5 100644
--- a/tests/api_gcc.h
+++ b/tests/api_gcc.h
@@ -73,7 +73,7 @@
  * Machine parameters.
  */
 
-/* #define CACHE_LINE_SIZE 64 */
+/* #define CAA_CACHE_LINE_SIZE 64 */
 #define ____cacheline_internodealigned_in_smp \
 	__attribute__((__aligned__(1 << 6)))
 
@@ -295,9 +295,9 @@ cmpxchg(volatile long *ptr, long oldval, long newval)
  * Default machine parameters.
  */
 
-#ifndef CACHE_LINE_SIZE
-/* #define CACHE_LINE_SIZE 128 */
-#endif /* #ifndef CACHE_LINE_SIZE */
+#ifndef CAA_CACHE_LINE_SIZE
+/* #define CAA_CACHE_LINE_SIZE 128 */
+#endif /* #ifndef CAA_CACHE_LINE_SIZE */
 
 /*
  * Exclusive locking primitives.
@@ -497,7 +497,7 @@ long long get_microseconds(void)
 #define DEFINE_PER_THREAD(type, name) \
 	struct { \
 		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+			__attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
 	} __per_thread_##name[NR_THREADS];
 #define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
 
@@ -531,7 +531,7 @@ long long get_microseconds(void)
 #define DEFINE_PER_CPU(type, name) \
 	struct { \
 		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+			__attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
 	} __per_cpu_##name[NR_CPUS]
 #define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
 
@@ -972,7 +972,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
+	caa_container_of(ptr, type, member)
 
 /**
  * list_first_entry - get the first element from a list
@@ -1274,7 +1274,7 @@ static inline void hlist_move_list(struct hlist_head *old,
 	old->first = NULL;
 }
 
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+#define hlist_entry(ptr, type, member) caa_container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
 	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
diff --git a/tests/api_ppc.h b/tests/api_ppc.h
index 9773500..5c5f6e9 100644
--- a/tests/api_ppc.h
+++ b/tests/api_ppc.h
@@ -76,7 +76,7 @@
 
 #define CONFIG_PPC64
 
-/*#define CACHE_LINE_SIZE 128 */
+/*#define CAA_CACHE_LINE_SIZE 128 */
 #define ____cacheline_internodealigned_in_smp \
 	__attribute__((__aligned__(1 << 7)))
 
@@ -660,9 +660,9 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
  * Default machine parameters.
  */
 
-#ifndef CACHE_LINE_SIZE
-/* #define CACHE_LINE_SIZE 128 */
-#endif /* #ifndef CACHE_LINE_SIZE */
+#ifndef CAA_CACHE_LINE_SIZE
+/* #define CAA_CACHE_LINE_SIZE 128 */
+#endif /* #ifndef CAA_CACHE_LINE_SIZE */
 
 /*
  * Exclusive locking primitives.
@@ -855,7 +855,7 @@ long long get_microseconds(void)
 #define DEFINE_PER_THREAD(type, name) \
 	struct { \
 		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+			__attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
 	} __per_thread_##name[NR_THREADS];
 #define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
 
@@ -889,7 +889,7 @@ long long get_microseconds(void)
 #define DEFINE_PER_CPU(type, name) \
 	struct { \
 		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+			__attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
 	} __per_cpu_##name[NR_CPUS]
 #define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
 
@@ -1331,7 +1331,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
+	caa_container_of(ptr, type, member)
 
 /**
  * list_first_entry - get the first element from a list
@@ -1633,7 +1633,7 @@ static inline void hlist_move_list(struct hlist_head *old,
 	old->first = NULL;
 }
 
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+#define hlist_entry(ptr, type, member) caa_container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
 	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
diff --git a/tests/api_x86.h b/tests/api_x86.h
index fe00a35..0365432 100644
--- a/tests/api_x86.h
+++ b/tests/api_x86.h
@@ -76,7 +76,7 @@
  * Machine parameters.
  */
 
-/* #define CACHE_LINE_SIZE 64 */
+/* #define CAA_CACHE_LINE_SIZE 64 */
 #define ____cacheline_internodealigned_in_smp \
 	__attribute__((__aligned__(1 << 6)))
 
@@ -356,9 +356,9 @@ __asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \
  * Default machine parameters.
  */
 
-#ifndef CACHE_LINE_SIZE
-/* #define CACHE_LINE_SIZE 128 */
-#endif /* #ifndef CACHE_LINE_SIZE */
+#ifndef CAA_CACHE_LINE_SIZE
+/* #define CAA_CACHE_LINE_SIZE 128 */
+#endif /* #ifndef CAA_CACHE_LINE_SIZE */
 
 /*
  * Exclusive locking primitives.
@@ -558,7 +558,7 @@ long long get_microseconds(void)
 #define DEFINE_PER_THREAD(type, name) \
 	struct { \
 		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+			__attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
 	} __per_thread_##name[NR_THREADS];
 #define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
 
@@ -592,7 +592,7 @@ long long get_microseconds(void)
 #define DEFINE_PER_CPU(type, name) \
 	struct { \
 		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+			__attribute__((__aligned__(CAA_CACHE_LINE_SIZE))); \
 	} __per_cpu_##name[NR_CPUS]
 #define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
 
@@ -1034,7 +1034,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
+	caa_container_of(ptr, type, member)
 
 /**
  * list_first_entry - get the first element from a list
@@ -1336,7 +1336,7 @@ static inline void hlist_move_list(struct hlist_head *old,
 	old->first = NULL;
 }
 
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+#define hlist_entry(ptr, type, member) caa_container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
 	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
diff --git a/tests/rcutorture.h b/tests/rcutorture.h
index 4192bd0..4dac2f2 100644
--- a/tests/rcutorture.h
+++ b/tests/rcutorture.h
@@ -77,7 +77,7 @@ char argsbuf[64];
 #define GOFLAG_RUN  1
 #define GOFLAG_STOP 2
 
-int goflag __attribute__((__aligned__(CACHE_LINE_SIZE))) = GOFLAG_INIT;
+int goflag __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))) = GOFLAG_INIT;
 
 #define RCU_READ_RUN 1000
 
diff --git a/tests/test_cycles_per_loop.c b/tests/test_cycles_per_loop.c
index 64b160b..05a6ea7 100644
--- a/tests/test_cycles_per_loop.c
+++ b/tests/test_cycles_per_loop.c
@@ -6,16 +6,16 @@
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 int main()
 {
 	cycles_t time1, time2;
 
-	time1 = get_cycles();
+	time1 = caa_get_cycles();
 	loop_sleep(NR_LOOPS);
-	time2 = get_cycles();
+	time2 = caa_get_cycles();
 	printf("CPU clock cycles per loop: %g\n", (time2 - time1) /
 						  (double)NR_LOOPS);
 }
diff --git a/tests/test_looplen.c b/tests/test_looplen.c
index 53d39cd..01394e0 100644
--- a/tests/test_looplen.c
+++ b/tests/test_looplen.c
@@ -61,7 +61,7 @@ static inline pid_t gettid(void)
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 #define LOOPS 1048576
@@ -75,9 +75,9 @@ int main(int argc, char **argv)
 	double cpl;
 
 	for (i = 0; i < TESTS; i++) {
-		time1 = get_cycles();
+		time1 = caa_get_cycles();
 		loop_sleep(LOOPS);
-		time2 = get_cycles();
+		time2 = caa_get_cycles();
 		time_tot += time2 - time1;
 	}
 	cpl = ((double)time_tot) / (double)TESTS / (double)LOOPS;
diff --git a/tests/test_mutex.c b/tests/test_mutex.c
index 7dd1e31..747bcc4 100644
--- a/tests/test_mutex.c
+++ b/tests/test_mutex.c
@@ -85,7 +85,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
@@ -156,9 +156,9 @@ static unsigned long long __thread nr_writes;
 static unsigned long long __thread nr_reads;
 
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_reads;
 
 static unsigned int nr_readers;
 static unsigned int nr_writers;
diff --git a/tests/test_perthreadlock.c b/tests/test_perthreadlock.c
index 9de8ced..c233724 100644
--- a/tests/test_perthreadlock.c
+++ b/tests/test_perthreadlock.c
@@ -68,7 +68,7 @@ struct test_array {
 
 struct per_thread_lock {
 	pthread_mutex_t lock;
-} __attribute__((aligned(CACHE_LINE_SIZE)));	/* cache-line aligned */
+} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));	/* cache-line aligned */
 
 static struct per_thread_lock *per_thread_lock;
 
@@ -89,7 +89,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
@@ -160,9 +160,9 @@ static unsigned long long __thread nr_writes;
 static unsigned long long __thread nr_reads;
 
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_reads;
 
 static unsigned int nr_readers;
 static unsigned int nr_writers;
diff --git a/tests/test_perthreadlock_timing.c b/tests/test_perthreadlock_timing.c
index 9eb1c13..26b642a 100644
--- a/tests/test_perthreadlock_timing.c
+++ b/tests/test_perthreadlock_timing.c
@@ -60,7 +60,7 @@ static struct test_array test_array = { 8 };
 
 struct per_thread_lock {
 	pthread_mutex_t lock;
-} __attribute__((aligned(CACHE_LINE_SIZE)));	/* cache-line aligned */
+} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));	/* cache-line aligned */
 
 static struct per_thread_lock *per_thread_lock;
 
@@ -78,8 +78,8 @@ static int num_write;
 #define NR_READ num_read
 #define NR_WRITE num_write
 
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
 
 void *thr_reader(void *arg)
 {
@@ -91,7 +91,7 @@ void *thr_reader(void *arg)
 			"reader", pthread_self(), (unsigned long)gettid());
 	sleep(2);
 
-	time1 = get_cycles();
+	time1 = caa_get_cycles();
 	for (i = 0; i < OUTER_READ_LOOP; i++) {
 		for (j = 0; j < INNER_READ_LOOP; j++) {
 			pthread_mutex_lock(&per_thread_lock[tidx].lock);
@@ -99,7 +99,7 @@ void *thr_reader(void *arg)
 			pthread_mutex_unlock(&per_thread_lock[tidx].lock);
 		}
 	}
-	time2 = get_cycles();
+	time2 = caa_get_cycles();
 
 	reader_time[tidx] = time2 - time1;
 
@@ -122,7 +122,7 @@ void *thr_writer(void *arg)
 
 	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
 		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
+			time1 = caa_get_cycles();
 			for (tidx = 0; tidx < NR_READ; tidx++) {
 				pthread_mutex_lock(&per_thread_lock[tidx].lock);
 			}
@@ -130,7 +130,7 @@ void *thr_writer(void *arg)
 			for (tidx = NR_READ - 1; tidx >= 0; tidx--) {
 				pthread_mutex_unlock(&per_thread_lock[tidx].lock);
 			}
-			time2 = get_cycles();
+			time2 = caa_get_cycles();
 			writer_time[(unsigned long)arg] += time2 - time1;
 			usleep(1);
 		}
diff --git a/tests/test_qsbr.c b/tests/test_qsbr.c
index 4f19c72..1ef8c26 100644
--- a/tests/test_qsbr.c
+++ b/tests/test_qsbr.c
@@ -83,7 +83,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_qsbr_gc.c b/tests/test_qsbr_gc.c
index 875fd36..c9b3f4a 100644
--- a/tests/test_qsbr_gc.c
+++ b/tests/test_qsbr_gc.c
@@ -88,7 +88,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
@@ -164,7 +164,7 @@ static unsigned int nr_writers;
 
 pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
 
 
 void rcu_copy_mutex_lock(void)
@@ -419,9 +419,9 @@ int main(int argc, char **argv)
 	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
 	pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
 	if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
-			< CACHE_LINE_SIZE)
+			< CAA_CACHE_LINE_SIZE)
 		for (i = 0; i < nr_writers; i++)
-			pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
+			pending_reclaims[i].queue = calloc(1, CAA_CACHE_LINE_SIZE);
 	else
 		for (i = 0; i < nr_writers; i++)
 			pending_reclaims[i].queue = calloc(reclaim_batch,
diff --git a/tests/test_qsbr_timing.c b/tests/test_qsbr_timing.c
index 69193bf..f9f4504 100644
--- a/tests/test_qsbr_timing.c
+++ b/tests/test_qsbr_timing.c
@@ -95,8 +95,8 @@ static int num_write;
 #define NR_READ num_read
 #define NR_WRITE num_write
 
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
 
 void *thr_reader(void *arg)
 {
@@ -110,7 +110,7 @@ void *thr_reader(void *arg)
 
 	rcu_register_thread();
 
-	time1 = get_cycles();
+	time1 = caa_get_cycles();
 	for (i = 0; i < OUTER_READ_LOOP; i++) {
 		for (j = 0; j < INNER_READ_LOOP; j++) {
 			_rcu_read_lock();
@@ -122,7 +122,7 @@ void *thr_reader(void *arg)
 		}
 		_rcu_quiescent_state();
 	}
-	time2 = get_cycles();
+	time2 = caa_get_cycles();
 
 	rcu_unregister_thread();
 
@@ -147,7 +147,7 @@ void *thr_writer(void *arg)
 
 	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
 		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
+			time1 = caa_get_cycles();
 			new = malloc(sizeof(struct test_array));
 			rcu_copy_mutex_lock();
 			old = test_rcu_pointer;
@@ -163,7 +163,7 @@ void *thr_writer(void *arg)
 				old->a = 0;
 			}
 			free(old);
-			time2 = get_cycles();
+			time2 = caa_get_cycles();
 			writer_time[(unsigned long)arg] += time2 - time1;
 			usleep(1);
 		}
diff --git a/tests/test_rwlock.c b/tests/test_rwlock.c
index 445ce95..deca53b 100644
--- a/tests/test_rwlock.c
+++ b/tests/test_rwlock.c
@@ -85,7 +85,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_rwlock_timing.c b/tests/test_rwlock_timing.c
index 7d16806..96269c0 100644
--- a/tests/test_rwlock_timing.c
+++ b/tests/test_rwlock_timing.c
@@ -75,8 +75,8 @@ static int num_write;
 #define NR_READ num_read
 #define NR_WRITE num_write
 
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
 
 void *thr_reader(void *arg)
 {
@@ -87,7 +87,7 @@ void *thr_reader(void *arg)
 			"reader", pthread_self(), (unsigned long)gettid());
 	sleep(2);
 
-	time1 = get_cycles();
+	time1 = caa_get_cycles();
 	for (i = 0; i < OUTER_READ_LOOP; i++) {
 		for (j = 0; j < INNER_READ_LOOP; j++) {
 			pthread_rwlock_rdlock(&lock);
@@ -95,7 +95,7 @@ void *thr_reader(void *arg)
 			pthread_rwlock_unlock(&lock);
 		}
 	}
-	time2 = get_cycles();
+	time2 = caa_get_cycles();
 
 	reader_time[(unsigned long)arg] = time2 - time1;
 
@@ -117,11 +117,11 @@ void *thr_writer(void *arg)
 
 	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
 		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
+			time1 = caa_get_cycles();
 			pthread_rwlock_wrlock(&lock);
 			test_array.a = 8;
 			pthread_rwlock_unlock(&lock);
-			time2 = get_cycles();
+			time2 = caa_get_cycles();
 			writer_time[(unsigned long)arg] += time2 - time1;
 			usleep(1);
 		}
diff --git a/tests/test_urcu.c b/tests/test_urcu.c
index eeea7f5..e6a6489 100644
--- a/tests/test_urcu.c
+++ b/tests/test_urcu.c
@@ -83,7 +83,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_urcu_assign.c b/tests/test_urcu_assign.c
index 481cdd6..24a704b 100644
--- a/tests/test_urcu_assign.c
+++ b/tests/test_urcu_assign.c
@@ -83,7 +83,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_urcu_bp.c b/tests/test_urcu_bp.c
index e831824..7f20a6a 100644
--- a/tests/test_urcu_bp.c
+++ b/tests/test_urcu_bp.c
@@ -83,7 +83,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_urcu_defer.c b/tests/test_urcu_defer.c
index 2cbb041..c333964 100644
--- a/tests/test_urcu_defer.c
+++ b/tests/test_urcu_defer.c
@@ -84,7 +84,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
@@ -156,7 +156,7 @@ static unsigned long long __thread nr_writes;
 static unsigned long long __thread nr_reads;
 
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
 
 static unsigned int nr_readers;
 static unsigned int nr_writers;
diff --git a/tests/test_urcu_gc.c b/tests/test_urcu_gc.c
index ddafb87..d0f7e6e 100644
--- a/tests/test_urcu_gc.c
+++ b/tests/test_urcu_gc.c
@@ -92,7 +92,7 @@ static unsigned long wduration;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
@@ -164,7 +164,7 @@ static unsigned long long __thread nr_writes;
 static unsigned long long __thread nr_reads;
 
 static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+unsigned long long __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *tot_nr_writes;
 
 static unsigned int nr_readers;
 static unsigned int nr_writers;
@@ -419,9 +419,9 @@ int main(int argc, char **argv)
 	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
 	pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
 	if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
-			< CACHE_LINE_SIZE)
+			< CAA_CACHE_LINE_SIZE)
 		for (i = 0; i < nr_writers; i++)
-			pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
+			pending_reclaims[i].queue = calloc(1, CAA_CACHE_LINE_SIZE);
 	else
 		for (i = 0; i < nr_writers; i++)
 			pending_reclaims[i].queue = calloc(reclaim_batch,
diff --git a/tests/test_urcu_lfq.c b/tests/test_urcu_lfq.c
index 901bcae..aed9be0 100644
--- a/tests/test_urcu_lfq.c
+++ b/tests/test_urcu_lfq.c
@@ -77,7 +77,7 @@ static unsigned long wdelay;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
@@ -202,7 +202,7 @@ fail:
 
 static void rcu_release_node(struct urcu_ref *ref)
 {
-	struct rcu_lfq_node *node = container_of(ref, struct rcu_lfq_node, ref);
+	struct rcu_lfq_node *node = caa_container_of(ref, struct rcu_lfq_node, ref);
 	defer_rcu(free, node);
 	//synchronize_rcu();
 	//free(node);
@@ -255,7 +255,7 @@ void *thr_dequeuer(void *_count)
 
 static void release_node(struct urcu_ref *ref)
 {
-	struct rcu_lfq_node *node = container_of(ref, struct rcu_lfq_node, ref);
+	struct rcu_lfq_node *node = caa_container_of(ref, struct rcu_lfq_node, ref);
 	free(node);
 }
 
diff --git a/tests/test_urcu_lfs.c b/tests/test_urcu_lfs.c
index 8249eba..02be2d4 100644
--- a/tests/test_urcu_lfs.c
+++ b/tests/test_urcu_lfs.c
@@ -77,7 +77,7 @@ static unsigned long wdelay;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_urcu_timing.c b/tests/test_urcu_timing.c
index 94ac0ae..8e1a810 100644
--- a/tests/test_urcu_timing.c
+++ b/tests/test_urcu_timing.c
@@ -94,8 +94,8 @@ static int num_write;
 #define NR_READ num_read
 #define NR_WRITE num_write
 
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CAA_CACHE_LINE_SIZE))) *writer_time;
 
 void *thr_reader(void *arg)
 {
@@ -109,7 +109,7 @@ void *thr_reader(void *arg)
 
 	rcu_register_thread();
 
-	time1 = get_cycles();
+	time1 = caa_get_cycles();
 	for (i = 0; i < OUTER_READ_LOOP; i++) {
 		for (j = 0; j < INNER_READ_LOOP; j++) {
 			rcu_read_lock();
@@ -120,7 +120,7 @@ void *thr_reader(void *arg)
 			rcu_read_unlock();
 		}
 	}
-	time2 = get_cycles();
+	time2 = caa_get_cycles();
 
 	rcu_unregister_thread();
 
@@ -145,7 +145,7 @@ void *thr_writer(void *arg)
 
 	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
 		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
+			time1 = caa_get_cycles();
 			new = malloc(sizeof(struct test_array));
 			rcu_copy_mutex_lock();
 			old = test_rcu_pointer;
@@ -161,7 +161,7 @@ void *thr_writer(void *arg)
 				old->a = 0;
 			}
 			free(old);
-			time2 = get_cycles();
+			time2 = caa_get_cycles();
 			writer_time[(unsigned long)arg] += time2 - time1;
 			usleep(1);
 		}
diff --git a/tests/test_urcu_wfq.c b/tests/test_urcu_wfq.c
index d446e47..cb49454 100644
--- a/tests/test_urcu_wfq.c
+++ b/tests/test_urcu_wfq.c
@@ -76,7 +76,7 @@ static unsigned long wdelay;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/tests/test_urcu_wfs.c b/tests/test_urcu_wfs.c
index 294e955..49f9218 100644
--- a/tests/test_urcu_wfs.c
+++ b/tests/test_urcu_wfs.c
@@ -76,7 +76,7 @@ static unsigned long wdelay;
 static inline void loop_sleep(unsigned long l)
 {
 	while(l-- != 0)
-		cpu_relax();
+		caa_cpu_relax();
 }
 
 static int verbose_mode;
diff --git a/urcu-bp-static.h b/urcu-bp-static.h
index 394476b..ea2c376 100644
--- a/urcu-bp-static.h
+++ b/urcu-bp-static.h
@@ -140,7 +140,7 @@ struct rcu_reader {
 	/* Data used by both reader and synchronize_rcu() */
 	long ctr;
 	/* Data used for registry */
-	struct list_head node __attribute__((aligned(CACHE_LINE_SIZE)));
+	struct list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
 	pthread_t tid;
 	int alloc;	/* registry entry allocated */
 };
@@ -162,7 +162,7 @@ static inline int rcu_old_gp_ongoing(long *value)
 	 * Make sure both tests below are done on the same version of *value
 	 * to insure consistency.
 	 */
-	v = LOAD_SHARED(*value);
+	v = CAA_LOAD_SHARED(*value);
 	return (v & RCU_GP_CTR_NEST_MASK) &&
 		 ((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE);
 }
@@ -182,14 +182,14 @@ static inline void _rcu_read_lock(void)
 	 *   RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE)
 	 */
 	if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) {
-		_STORE_SHARED(rcu_reader->ctr, _LOAD_SHARED(rcu_gp_ctr));
+		_CAA_STORE_SHARED(rcu_reader->ctr, _CAA_LOAD_SHARED(rcu_gp_ctr));
 		/*
 		 * Set active readers count for outermost nesting level before
 		 * accessing the pointer.
 		 */
 		cmm_smp_mb();
 	} else {
-		_STORE_SHARED(rcu_reader->ctr, tmp + RCU_GP_COUNT);
+		_CAA_STORE_SHARED(rcu_reader->ctr, tmp + RCU_GP_COUNT);
 	}
 }
 
@@ -199,7 +199,7 @@ static inline void _rcu_read_unlock(void)
 	 * Finish using rcu before decrementing the pointer.
 	 */
 	cmm_smp_mb();
-	_STORE_SHARED(rcu_reader->ctr, rcu_reader->ctr - RCU_GP_COUNT);
+	_CAA_STORE_SHARED(rcu_reader->ctr, rcu_reader->ctr - RCU_GP_COUNT);
 	cmm_barrier();	/* Ensure the compiler does not reorder us with mutex */
 }
 
diff --git a/urcu-bp.c b/urcu-bp.c
index 33352c2..b457d2b 100644
--- a/urcu-bp.c
+++ b/urcu-bp.c
@@ -123,13 +123,13 @@ void update_counter_and_wait(void)
 	struct rcu_reader *index, *tmp;
 
 	/* Switch parity: 0 -> 1, 1 -> 0 */
-	STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
+	CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
 
 	/*
 	 * Must commit qparity update to memory before waiting for other parity
 	 * quiescent state. Failure to do so could result in the writer waiting
 	 * forever while new readers are always accessing data (no progress).
-	 * Ensured by STORE_SHARED and LOAD_SHARED.
+	 * Ensured by CAA_STORE_SHARED and CAA_LOAD_SHARED.
 	 */
 
 	/*
@@ -155,7 +155,7 @@ void update_counter_and_wait(void)
 			if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
 				usleep(RCU_SLEEP_DELAY);
 			else
-				cpu_relax();
+				caa_cpu_relax();
 		}
 	}
 	/* put back the reader list in the registry */
diff --git a/urcu-defer.c b/urcu-defer.c
index 6dc08a3..c28e848 100644
--- a/urcu-defer.c
+++ b/urcu-defer.c
@@ -110,7 +110,7 @@ static unsigned long rcu_defer_num_callbacks(void)
 
 	mutex_lock(&rcu_defer_mutex);
 	list_for_each_entry(index, &registry, list) {
-		head = LOAD_SHARED(index->head);
+		head = CAA_LOAD_SHARED(index->head);
 		num_items += head - index->tail;
 	}
 	mutex_unlock(&rcu_defer_mutex);
@@ -153,21 +153,21 @@ static void rcu_defer_barrier_queue(struct defer_queue *queue,
 
 	for (i = queue->tail; i != head;) {
 		cmm_smp_rmb();       /* read head before q[]. */
-		p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+		p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
 		if (unlikely(DQ_IS_FCT_BIT(p))) {
 			DQ_CLEAR_FCT_BIT(p);
 			queue->last_fct_out = p;
-			p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+			p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
 		} else if (unlikely(p == DQ_FCT_MARK)) {
-			p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+			p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
 			queue->last_fct_out = p;
-			p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
+			p = CAA_LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]);
 		}
 		fct = queue->last_fct_out;
 		fct(p);
 	}
 	cmm_smp_mb();	/* push tail after having used q[] */
-	STORE_SHARED(queue->tail, i);
+	CAA_STORE_SHARED(queue->tail, i);
 }
 
 static void _rcu_defer_barrier_thread(void)
@@ -212,7 +212,7 @@ void rcu_defer_barrier(void)
 
 	mutex_lock(&rcu_defer_mutex);
 	list_for_each_entry(index, &registry, list) {
-		index->last_head = LOAD_SHARED(index->head);
+		index->last_head = CAA_LOAD_SHARED(index->head);
 		num_items += index->last_head - index->tail;
 	}
 	if (likely(!num_items)) {
@@ -241,7 +241,7 @@ void _defer_rcu(void (*fct)(void *p), void *p)
 	 * thread.
 	 */
 	head = defer_queue.head;
-	tail = LOAD_SHARED(defer_queue.tail);
+	tail = CAA_LOAD_SHARED(defer_queue.tail);
 
 	/*
 	 * If queue is full, or reached threshold. Empty queue ourself.
@@ -250,7 +250,7 @@ void _defer_rcu(void (*fct)(void *p), void *p)
 	if (unlikely(head - tail >= DEFER_QUEUE_SIZE - 2)) {
 		assert(head - tail <= DEFER_QUEUE_SIZE);
 		rcu_defer_barrier_thread();
-		assert(head - LOAD_SHARED(defer_queue.tail) == 0);
+		assert(head - CAA_LOAD_SHARED(defer_queue.tail) == 0);
 	}
 
 	if (unlikely(defer_queue.last_fct_in != fct)) {
@@ -261,13 +261,13 @@ void _defer_rcu(void (*fct)(void *p), void *p)
 			 * marker, write DQ_FCT_MARK followed by the function
 			 * pointer.
 			 */
-			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+			_CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
 				      DQ_FCT_MARK);
-			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+			_CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
 				      fct);
 		} else {
 			DQ_SET_FCT_BIT(fct);
-			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+			_CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
 				      fct);
 		}
 	} else {
@@ -276,16 +276,16 @@ void _defer_rcu(void (*fct)(void *p), void *p)
 			 * If the data to encode is not aligned or the marker,
 			 * write DQ_FCT_MARK followed by the function pointer.
 			 */
-			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+			_CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
 				      DQ_FCT_MARK);
-			_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
+			_CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK],
 				      fct);
 		}
 	}
-	_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
+	_CAA_STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p);
 	cmm_smp_wmb();	/* Publish new pointer before head */
 			/* Write q[] before head. */
-	STORE_SHARED(defer_queue.head, head);
+	CAA_STORE_SHARED(defer_queue.head, head);
 	cmm_smp_mb();	/* Write queue head before read futex */
 	/*
 	 * Wake-up any waiting defer thread.
diff --git a/urcu-pointer-static.h b/urcu-pointer-static.h
index c8ac7f0..5a1e0e4 100644
--- a/urcu-pointer-static.h
+++ b/urcu-pointer-static.h
@@ -49,7 +49,7 @@ extern "C" {
  * Inserts memory barriers on architectures that require them (currently only
  * Alpha) and documents which pointers are protected by RCU.
  *
- * The compiler memory barrier in LOAD_SHARED() ensures that value-speculative
+ * The compiler memory barrier in CAA_LOAD_SHARED() ensures that value-speculative
  * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the
  * data read before the pointer read by speculating the value of the pointer.
  * Correct ordering is ensured because the pointer is read as a volatile access.
@@ -62,7 +62,7 @@ extern "C" {
  */
 
 #define _rcu_dereference(p)     ({					\
-				typeof(p) _________p1 = LOAD_SHARED(p); \
+				typeof(p) _________p1 = CAA_LOAD_SHARED(p); \
 				cmm_smp_read_barrier_depends();		\
 				(_________p1);				\
 				})
diff --git a/urcu-qsbr-static.h b/urcu-qsbr-static.h
index 108ef6e..ab4dcb5 100644
--- a/urcu-qsbr-static.h
+++ b/urcu-qsbr-static.h
@@ -135,7 +135,7 @@ struct rcu_reader {
 	/* Data used by both reader and synchronize_rcu() */
 	unsigned long ctr;
 	/* Data used for registry */
-	struct list_head node __attribute__((aligned(CACHE_LINE_SIZE)));
+	struct list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
 	pthread_t tid;
 };
 
@@ -159,7 +159,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
 {
 	unsigned long v;
 
-	v = LOAD_SHARED(*ctr);
+	v = CAA_LOAD_SHARED(*ctr);
 	return v && (v != rcu_gp_ctr);
 }
 
@@ -175,7 +175,7 @@ static inline void _rcu_read_unlock(void)
 static inline void _rcu_quiescent_state(void)
 {
 	cmm_smp_mb();
-	_STORE_SHARED(rcu_reader.ctr, _LOAD_SHARED(rcu_gp_ctr));
+	_CAA_STORE_SHARED(rcu_reader.ctr, _CAA_LOAD_SHARED(rcu_gp_ctr));
 	cmm_smp_mb();	/* write rcu_reader.ctr before read futex */
 	wake_up_gp();
 	cmm_smp_mb();
@@ -184,7 +184,7 @@ static inline void _rcu_quiescent_state(void)
 static inline void _rcu_thread_offline(void)
 {
 	cmm_smp_mb();
-	STORE_SHARED(rcu_reader.ctr, 0);
+	CAA_STORE_SHARED(rcu_reader.ctr, 0);
 	cmm_smp_mb();	/* write rcu_reader.ctr before read futex */
 	wake_up_gp();
 	cmm_barrier();	/* Ensure the compiler does not reorder us with mutex */
@@ -193,7 +193,7 @@ static inline void _rcu_thread_offline(void)
 static inline void _rcu_thread_online(void)
 {
 	cmm_barrier();	/* Ensure the compiler does not reorder us with mutex */
-	_STORE_SHARED(rcu_reader.ctr, LOAD_SHARED(rcu_gp_ctr));
+	_CAA_STORE_SHARED(rcu_reader.ctr, CAA_LOAD_SHARED(rcu_gp_ctr));
 	cmm_smp_mb();
 }
 
diff --git a/urcu-qsbr.c b/urcu-qsbr.c
index 2cf73d5..51d34c7 100644
--- a/urcu-qsbr.c
+++ b/urcu-qsbr.c
@@ -114,10 +114,10 @@ static void update_counter_and_wait(void)
 
 #if (BITS_PER_LONG < 64)
 	/* Switch parity: 0 -> 1, 1 -> 0 */
-	STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+	CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
 #else	/* !(BITS_PER_LONG < 64) */
 	/* Increment current G.P. */
-	STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+	CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
 #endif	/* !(BITS_PER_LONG < 64) */
 
 	/*
@@ -163,7 +163,7 @@ static void update_counter_and_wait(void)
 				wait_gp();
 			} else {
 #ifndef HAS_INCOHERENT_CACHES
-				cpu_relax();
+				caa_cpu_relax();
 #else /* #ifndef HAS_INCOHERENT_CACHES */
 				cmm_smp_mb();
 #endif /* #else #ifndef HAS_INCOHERENT_CACHES */
@@ -198,7 +198,7 @@ void synchronize_rcu(void)
 	 * threads registered as readers.
 	 */
 	if (was_online)
-		STORE_SHARED(rcu_reader.ctr, 0);
+		CAA_STORE_SHARED(rcu_reader.ctr, 0);
 
 	mutex_lock(&rcu_gp_lock);
 
@@ -238,7 +238,7 @@ out:
 	 * freed.
 	 */
 	if (was_online)
-		_STORE_SHARED(rcu_reader.ctr, LOAD_SHARED(rcu_gp_ctr));
+		_CAA_STORE_SHARED(rcu_reader.ctr, CAA_LOAD_SHARED(rcu_gp_ctr));
 	cmm_smp_mb();
 }
 #else /* !(BITS_PER_LONG < 64) */
@@ -255,7 +255,7 @@ void synchronize_rcu(void)
 	 */
 	cmm_smp_mb();
 	if (was_online)
-		STORE_SHARED(rcu_reader.ctr, 0);
+		CAA_STORE_SHARED(rcu_reader.ctr, 0);
 
 	mutex_lock(&rcu_gp_lock);
 	if (list_empty(&registry))
@@ -265,7 +265,7 @@ out:
 	mutex_unlock(&rcu_gp_lock);
 
 	if (was_online)
-		_STORE_SHARED(rcu_reader.ctr, LOAD_SHARED(rcu_gp_ctr));
+		_CAA_STORE_SHARED(rcu_reader.ctr, CAA_LOAD_SHARED(rcu_gp_ctr));
 	cmm_smp_mb();
 }
 #endif  /* !(BITS_PER_LONG < 64) */
diff --git a/urcu-static.h b/urcu-static.h
index ad415ac..46fe64a 100644
--- a/urcu-static.h
+++ b/urcu-static.h
@@ -222,7 +222,7 @@ struct rcu_reader {
 	unsigned long ctr;
 	char need_mb;
 	/* Data used for registry */
-	struct list_head node __attribute__((aligned(CACHE_LINE_SIZE)));
+	struct list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
 	pthread_t tid;
 };
 
@@ -250,7 +250,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
 	 * Make sure both tests below are done on the same version of *value
 	 * to insure consistency.
 	 */
-	v = LOAD_SHARED(*ctr);
+	v = CAA_LOAD_SHARED(*ctr);
 	return (v & RCU_GP_CTR_NEST_MASK) &&
 		 ((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE);
 }
@@ -266,14 +266,14 @@ static inline void _rcu_read_lock(void)
 	 *   RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE)
 	 */
 	if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) {
-		_STORE_SHARED(rcu_reader.ctr, _LOAD_SHARED(rcu_gp_ctr));
+		_CAA_STORE_SHARED(rcu_reader.ctr, _CAA_LOAD_SHARED(rcu_gp_ctr));
 		/*
 		 * Set active readers count for outermost nesting level before
 		 * accessing the pointer. See smp_mb_master().
 		 */
 		smp_mb_slave(RCU_MB_GROUP);
 	} else {
-		_STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT);
+		_CAA_STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT);
 	}
 }
 
@@ -288,12 +288,12 @@ static inline void _rcu_read_unlock(void)
 	 */
 	if (likely((tmp & RCU_GP_CTR_NEST_MASK) == RCU_GP_COUNT)) {
 		smp_mb_slave(RCU_MB_GROUP);
-		_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
+		_CAA_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
 		/* write rcu_reader.ctr before read futex */
 		smp_mb_slave(RCU_MB_GROUP);
 		wake_up_gp();
 	} else {
-		_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
+		_CAA_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
 	}
 	cmm_barrier();	/* Ensure the compiler does not reorder us with mutex */
 }
diff --git a/urcu.c b/urcu.c
index 9c556aa..b4a5a7d 100644
--- a/urcu.c
+++ b/urcu.c
@@ -99,9 +99,9 @@ static void mutex_lock(pthread_mutex_t *mutex)
 			perror("Error in pthread mutex lock");
 			exit(-1);
 		}
-		if (LOAD_SHARED(rcu_reader.need_mb)) {
+		if (CAA_LOAD_SHARED(rcu_reader.need_mb)) {
 			cmm_smp_mb();
-			_STORE_SHARED(rcu_reader.need_mb, 0);
+			_CAA_STORE_SHARED(rcu_reader.need_mb, 0);
 			cmm_smp_mb();
 		}
 		poll(NULL,0,10);
@@ -155,7 +155,7 @@ static void force_mb_all_readers(void)
 	 * cache flush is enforced.
 	 */
 	list_for_each_entry(index, &registry, node) {
-		STORE_SHARED(index->need_mb, 1);
+		CAA_STORE_SHARED(index->need_mb, 1);
 		pthread_kill(index->tid, SIGRCU);
 	}
 	/*
@@ -172,7 +172,7 @@ static void force_mb_all_readers(void)
 	 * the Linux Test Project (LTP).
 	 */
 	list_for_each_entry(index, &registry, node) {
-		while (LOAD_SHARED(index->need_mb)) {
+		while (CAA_LOAD_SHARED(index->need_mb)) {
 			pthread_kill(index->tid, SIGRCU);
 			poll(NULL, 0, 1);
 		}
@@ -205,7 +205,7 @@ void update_counter_and_wait(void)
 	struct rcu_reader *index, *tmp;
 
 	/* Switch parity: 0 -> 1, 1 -> 0 */
-	STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
+	CAA_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
 
 	/*
 	 * Must commit rcu_gp_ctr update to memory before waiting for quiescent
@@ -251,7 +251,7 @@ void update_counter_and_wait(void)
 			if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
 				wait_gp();
 			else
-				cpu_relax();
+				caa_cpu_relax();
 		}
 #else /* #ifndef HAS_INCOHERENT_CACHES */
 		/*
@@ -275,7 +275,7 @@ void update_counter_and_wait(void)
 				wait_loops = 0;
 				break; /* only escape switch */
 			default:
-				cpu_relax();
+				caa_cpu_relax();
 			}
 		}
 #endif /* #else #ifndef HAS_INCOHERENT_CACHES */
@@ -384,7 +384,7 @@ static void sigrcu_handler(int signo, siginfo_t *siginfo, void *context)
 	 * executed on.
 	 */
 	cmm_smp_mb();
-	_STORE_SHARED(rcu_reader.need_mb, 0);
+	_CAA_STORE_SHARED(rcu_reader.need_mb, 0);
 	cmm_smp_mb();
 }
 
diff --git a/urcu/arch_generic.h b/urcu/arch_generic.h
index 25a9f7a..100d3c6 100644
--- a/urcu/arch_generic.h
+++ b/urcu/arch_generic.h
@@ -28,8 +28,8 @@
 extern "C" {
 #endif
 
-#ifndef CACHE_LINE_SIZE
-#define CACHE_LINE_SIZE	64
+#ifndef CAA_CACHE_LINE_SIZE
+#define CAA_CACHE_LINE_SIZE	64
 #endif
 
 #if !defined(cmm_mc) && !defined(cmm_rmc) && !defined(cmm_wmc)
@@ -117,8 +117,8 @@ extern "C" {
 #define cmm_smp_read_barrier_depends()
 #endif
 
-#ifndef cpu_relax
-#define cpu_relax()		cmm_barrier()
+#ifndef caa_cpu_relax
+#define caa_cpu_relax()		cmm_barrier()
 #endif
 
 #ifdef __cplusplus
diff --git a/urcu/arch_ppc.h b/urcu/arch_ppc.h
index 93aed2a..e1a8270 100644
--- a/urcu/arch_ppc.h
+++ b/urcu/arch_ppc.h
@@ -30,7 +30,7 @@ extern "C" {
 #endif 
 
 /* Include size of POWER5+ L3 cache lines: 256 bytes */
-#define CACHE_LINE_SIZE	256
+#define CAA_CACHE_LINE_SIZE	256
 
 #define cmm_mb()    asm volatile("sync":::"memory")
 
diff --git a/urcu/arch_s390.h b/urcu/arch_s390.h
index 8a33e20..8d1483a 100644
--- a/urcu/arch_s390.h
+++ b/urcu/arch_s390.h
@@ -35,7 +35,7 @@
 extern "C" {
 #endif 
 
-#define CACHE_LINE_SIZE	128
+#define CAA_CACHE_LINE_SIZE	128
 
 #define cmm_mb()    __asm__ __volatile__("bcr 15,0" : : : "memory")
 
diff --git a/urcu/arch_sparc64.h b/urcu/arch_sparc64.h
index 39f27c7..0752c4d 100644
--- a/urcu/arch_sparc64.h
+++ b/urcu/arch_sparc64.h
@@ -29,7 +29,7 @@
 extern "C" {
 #endif 
 
-#define CACHE_LINE_SIZE	256
+#define CAA_CACHE_LINE_SIZE	256
 
 /*
  * Inspired from the Linux kernel. Workaround Spitfire bug #51.
diff --git a/urcu/arch_x86.h b/urcu/arch_x86.h
index d0a58e8..20db5cf 100644
--- a/urcu/arch_x86.h
+++ b/urcu/arch_x86.h
@@ -29,7 +29,7 @@
 extern "C" {
 #endif 
 
-#define CACHE_LINE_SIZE	128
+#define CAA_CACHE_LINE_SIZE	128
 
 #ifdef CONFIG_RCU_HAVE_FENCE
 #define cmm_mb()    asm volatile("mfence":::"memory")
@@ -45,7 +45,7 @@ extern "C" {
 #define cmm_wmb()   asm volatile("lock; addl $0,0(%%esp)"::: "memory")
 #endif
 
-#define cpu_relax()	asm volatile("rep; nop" : : : "memory");
+#define caa_cpu_relax()	asm volatile("rep; nop" : : : "memory");
 
 #define rdtscll(val)							  \
 	do {						  		  \
@@ -57,7 +57,7 @@ extern "C" {
 
 typedef unsigned long long cycles_t;
 
-static inline cycles_t get_cycles(void)
+static inline cycles_t caa_get_cycles(void)
 {
         cycles_t ret = 0;
 
diff --git a/urcu/compiler.h b/urcu/compiler.h
index d67e6c5..ca32deb 100644
--- a/urcu/compiler.h
+++ b/urcu/compiler.h
@@ -28,16 +28,16 @@
 /*
  * Instruct the compiler to perform only a single access to a variable
  * (prohibits merging and refetching). The compiler is also forbidden to reorder
- * successive instances of ACCESS_ONCE(), but only when the compiler is aware of
+ * successive instances of CAA_ACCESS_ONCE(), but only when the compiler is aware of
  * particular ordering. Compiler ordering can be ensured, for example, by
- * putting two ACCESS_ONCE() in separate C statements.
+ * putting two CAA_ACCESS_ONCE() in separate C statements.
  *
  * This macro does absolutely -nothing- to prevent the CPU from reordering,
  * merging, or refetching absolutely anything at any time.  Its main intended
  * use is to mediate communication between process-level code and irq/NMI
  * handlers, all running on the same CPU.
  */
-#define ACCESS_ONCE(x)	(*(volatile typeof(x) *)&(x))
+#define CAA_ACCESS_ONCE(x)	(*(volatile typeof(x) *)&(x))
 
 #ifndef max
 #define max(a,b) ((a)>(b)?(a):(b))
@@ -48,14 +48,14 @@
 #endif
 
 #if defined(__SIZEOF_LONG__)
-#define BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
+#define CAA_BITS_PER_LONG	(__SIZEOF_LONG__ * 8)
 #elif defined(_LP64)
-#define BITS_PER_LONG	64
+#define CAA_BITS_PER_LONG	64
 #else
-#define BITS_PER_LONG	32
+#define CAA_BITS_PER_LONG	32
 #endif
 
-#define container_of(ptr, type, member)					\
+#define caa_container_of(ptr, type, member)					\
 	({								\
 		const typeof(((type *)NULL)->member) * __ptr = (ptr);	\
 		(type *)((char *)__ptr - offsetof(type, member));	\
diff --git a/urcu/system.h b/urcu/system.h
index 11a499e..e018606 100644
--- a/urcu/system.h
+++ b/urcu/system.h
@@ -24,29 +24,29 @@
 /*
  * Identify a shared load. A cmm_smp_rmc() or cmm_smp_mc() should come before the load.
  */
-#define _LOAD_SHARED(p)	       ACCESS_ONCE(p)
+#define _CAA_LOAD_SHARED(p)	       CAA_ACCESS_ONCE(p)
 
 /*
  * Load a data from shared memory, doing a cache flush if required.
  */
-#define LOAD_SHARED(p)			\
+#define CAA_LOAD_SHARED(p)			\
 	({				\
 		cmm_smp_rmc();		\
-		_LOAD_SHARED(p);	\
+		_CAA_LOAD_SHARED(p);	\
 	})
 
 /*
  * Identify a shared store. A cmm_smp_wmc() or cmm_smp_mc() should follow the store.
  */
-#define _STORE_SHARED(x, v)	({ ACCESS_ONCE(x) = (v); })
+#define _CAA_STORE_SHARED(x, v)	({ CAA_ACCESS_ONCE(x) = (v); })
 
 /*
  * Store v into x, where x is located in shared memory. Performs the required
  * cache flush after writing. Returns v.
  */
-#define STORE_SHARED(x, v)		\
+#define CAA_STORE_SHARED(x, v)		\
 	({				\
-		typeof(x) _v = _STORE_SHARED(x, v);	\
+		typeof(x) _v = _CAA_STORE_SHARED(x, v);	\
 		cmm_smp_wmc();		\
 		_v;			\
 	})
diff --git a/urcu/uatomic_arch_x86.h b/urcu/uatomic_arch_x86.h
index 043e616..4e09afd 100644
--- a/urcu/uatomic_arch_x86.h
+++ b/urcu/uatomic_arch_x86.h
@@ -39,7 +39,7 @@ struct __uatomic_dummy {
 };
 #define __hp(x)	((struct __uatomic_dummy *)(x))
 
-#define _uatomic_set(addr, v)	STORE_SHARED(*(addr), (v))
+#define _uatomic_set(addr, v)	CAA_STORE_SHARED(*(addr), (v))
 
 /* cmpxchg */
 
diff --git a/urcu/uatomic_generic.h b/urcu/uatomic_generic.h
index f65b398..6b4ef9e 100644
--- a/urcu/uatomic_generic.h
+++ b/urcu/uatomic_generic.h
@@ -29,11 +29,11 @@ extern "C" {
 #endif
 
 #ifndef uatomic_set
-#define uatomic_set(addr, v)	STORE_SHARED(*(addr), (v))
+#define uatomic_set(addr, v)	CAA_STORE_SHARED(*(addr), (v))
 #endif
 
 #ifndef uatomic_read
-#define uatomic_read(addr)	LOAD_SHARED(*(addr))
+#define uatomic_read(addr)	CAA_LOAD_SHARED(*(addr))
 #endif
 
 #if !defined __OPTIMIZE__  || defined UATOMIC_NO_LINK_ERROR
diff --git a/urcu/wfqueue-static.h b/urcu/wfqueue-static.h
index 0f7e68f..d0db3fc 100644
--- a/urcu/wfqueue-static.h
+++ b/urcu/wfqueue-static.h
@@ -79,7 +79,7 @@ void _wfq_enqueue(struct wfq_queue *q, struct wfq_node *node)
 	 * that the queue is being appended to. The following store will append
 	 * "node" to the queue from a dequeuer perspective.
 	 */
-	STORE_SHARED(*old_tail, node);
+	CAA_STORE_SHARED(*old_tail, node);
 }
 
 /*
@@ -99,19 +99,19 @@ ___wfq_dequeue_blocking(struct wfq_queue *q)
 	/*
 	 * Queue is empty if it only contains the dummy node.
 	 */
-	if (q->head == &q->dummy && LOAD_SHARED(q->tail) == &q->dummy.next)
+	if (q->head == &q->dummy && CAA_LOAD_SHARED(q->tail) == &q->dummy.next)
 		return NULL;
 	node = q->head;
 
 	/*
 	 * Adaptative busy-looping waiting for enqueuer to complete enqueue.
 	 */
-	while ((next = LOAD_SHARED(node->next)) == NULL) {
+	while ((next = CAA_LOAD_SHARED(node->next)) == NULL) {
 		if (++attempt >= WFQ_ADAPT_ATTEMPTS) {
 			poll(NULL, 0, WFQ_WAIT);	/* Wait for 10ms */
 			attempt = 0;
 		} else
-			cpu_relax();
+			caa_cpu_relax();
 	}
 	/*
 	 * Move queue head forward.
diff --git a/urcu/wfstack-static.h b/urcu/wfstack-static.h
index 3f44743..0acb4f0 100644
--- a/urcu/wfstack-static.h
+++ b/urcu/wfstack-static.h
@@ -67,7 +67,7 @@ void _wfs_push(struct wfs_stack *s, struct wfs_node *node)
 	 * At this point, dequeuers see a NULL node->next, they should busy-wait
 	 * until node->next is set to old_head.
 	 */
-	STORE_SHARED(node->next, old_head);
+	CAA_STORE_SHARED(node->next, old_head);
 }
 
 /*
@@ -80,18 +80,18 @@ ___wfs_pop_blocking(struct wfs_stack *s)
 	int attempt = 0;
 
 retry:
-	head = LOAD_SHARED(s->head);
+	head = CAA_LOAD_SHARED(s->head);
 	if (head == WF_STACK_END)
 		return NULL;
 	/*
 	 * Adaptative busy-looping waiting for push to complete.
 	 */
-	while ((next = LOAD_SHARED(head->next)) == NULL) {
+	while ((next = CAA_LOAD_SHARED(head->next)) == NULL) {
 		if (++attempt >= WFS_ADAPT_ATTEMPTS) {
 			poll(NULL, 0, WFS_WAIT);	/* Wait for 10ms */
 			attempt = 0;
 		} else
-			cpu_relax();
+			caa_cpu_relax();
 	}
 	if (uatomic_cmpxchg(&s->head, head, next) == head)
 		return head;
-- 
1.7.3.2