[lttng-dev] urcu workqueue thread uses 99% of cpu while workqueue is empty

Minlan Wang wangminlan at szsandstone.com
Mon Jun 20 23:52:06 EDT 2022


Hi, Mathieu,
On Fri, Jun 17, 2022 at 09:37:23AM -0400, Mathieu Desnoyers wrote:
> Can you try the following patch on top of v0.12.1 + your debug patch ?
> 
> diff --git a/src/workqueue.c b/src/workqueue.c
> index eed1e21e..19fc89c8 100644
> --- a/src/workqueue.c
> +++ b/src/workqueue.c
> @@ -142,6 +142,7 @@ static void futex_wait(int32_t *futex)
>  
>  		return;
>  	}
> +wait:
>  	while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
>  		err = errno;
>  		fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
> @@ -161,7 +162,16 @@ static void futex_wait(int32_t *futex)
>  
>  	fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
>  		pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
> -
> +	/*
> +	 * Prior queued wakeups can cause futex wait to return even
> +	 * though the futex value is still -1. If this happens, wait
> +	 * again.
> +	 */
> +	if (uatomic_read(futex) == -1) {
> +		fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
> +			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
> +		goto wait;
> +	}
>  }
>  
>  static void futex_wake_up(int32_t *futex)
> 
I tried this patch on top of my debug patch, the issue disappeared.
The log shows that "futex_wait futex == -1, wait again" did happen several times.
I put the complete log in the attachment.

> 
> We should not rely too much on the ordering of fprintf stderr output between threads,
> because those can be interleaved in any order (each fprintf to stderr is unbuffered).
> If we want more precision in the order of events I would recommend using LTTng-UST for
> fine-grained user-space tracing. This might explain why the futex has an unexpected value
> at line 2099.
I tried this patch on top of previous 2 patches to use lttng_ust_tracepoint in
workqueue.c, but always failed in compile example code with this error:
---
make[5]: Entering directory `/home/mlwang/debug/userspace-rcu/doc/examples/rculfqueue'
  CC       cds_lfq_enqueue.o
  CCLD     cds_lfq_enqueue
../../../src/.libs//liburcu-cds.so: undefined reference to `lttng_ust_tracepoint_provider_workqueue'
---
the patch is as following:

>From 2fa8082914da55b6e3815dffd2f749798bf23735 Mon Sep 17 00:00:00 2001
From: root <root at localhost.localdomain>
Date: Tue, 21 Jun 2022 11:42:56 +0800
Subject: [PATCH] add lttng_ust_tracepoint in workqueue.c

---
 src/Makefile.am       |  4 +--
 src/liburcu-cds.pc.in |  2 +-
 src/workqueue.c       |  5 ++++
 src/workqueue.h       |  1 +
 src/workqueue_tp.c    |  4 +++
 src/workqueue_tp.h    | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 src/workqueue_tp.c
 create mode 100644 src/workqueue_tp.h

diff --git a/src/Makefile.am b/src/Makefile.am
index 88ccc1f..5a18e31 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)
 
 #Add the -version-info directly here since we are only building
 # library that use the version-info
-AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION)
+AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION) -llttng-ust -ldl
 if USE_CYGWIN
 AM_LDFLAGS+=-no-undefined
 endif
@@ -56,7 +56,7 @@ liburcu_bp_la_SOURCES = urcu-bp.c urcu-pointer.c $(COMPAT)
 liburcu_bp_la_LIBADD = liburcu-common.la
 
 liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c lfstack.c \
-	workqueue.c workqueue.h $(RCULFHASH) $(COMPAT)
+	workqueue.c workqueue.h workqueue_tp.c workqueue_tp.h $(RCULFHASH) $(COMPAT)
 liburcu_cds_la_LIBADD = liburcu-common.la
 
 pkgconfigdir = $(libdir)/pkgconfig
diff --git a/src/liburcu-cds.pc.in b/src/liburcu-cds.pc.in
index e3d13af..f4e5e3e 100644
--- a/src/liburcu-cds.pc.in
+++ b/src/liburcu-cds.pc.in
@@ -7,5 +7,5 @@ Name: Userspace RCU Concurrent Data Structures
 Description: Data structures leveraging RCU and atomic operations to provide efficient concurrency-aware storage
 Version: @PACKAGE_VERSION@
 Requires:
-Libs: -L${libdir} -lurcu-cds
+Libs: -L${libdir} -lurcu-cds -llttng-ust -ldl
 Cflags: -I${includedir} 
diff --git a/src/workqueue.c b/src/workqueue.c
index 19fc89c..c29c070 100644
--- a/src/workqueue.c
+++ b/src/workqueue.c
@@ -145,6 +145,7 @@ static void futex_wait(int32_t *futex)
 wait:
 	while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
 		err = errno;
+		lttng_ust_tracepoint(workqueue, futex_wait_err, (long)futex, *futex, err);
 		fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
 			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, err);
 		switch (err) {
@@ -160,6 +161,7 @@ wait:
 		}
 	}
 
+	lttng_ust_tracepoint(workqueue, futex_wait_return, (long)futex, *futex, ret);
 	fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
 		pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
 	/*
@@ -168,6 +170,7 @@ wait:
 	 * again.
 	 */
 	if (uatomic_read(futex) == -1) {
+		lttng_ust_tracepoint(workqueue, futex_wait_again, (long)futex, *futex);
 		fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
 			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
 		goto wait;
@@ -184,11 +187,13 @@ static void futex_wake_up(int32_t *futex)
 	if (caa_unlikely(old == -1)) {
 		old = uatomic_xchg(futex, 0);
 		if (old == -1) {
+			lttng_ust_tracepoint(workqueue, futex_wake_up_success, (long)futex, old);
 			fprintf(stderr, "%lu: wq %p, wakeup succeed: old %d\n",
 				pthread_self(),
 				caa_container_of(futex, struct urcu_workqueue, futex),
 				old);
 		} else {
+			lttng_ust_tracepoint(workqueue, futex_wake_up_fail, (long)futex, old);
 			fprintf(stderr, "%lu: wq %p, wakeup failed: old %d\n",
 				pthread_self(),
 				caa_container_of(futex, struct urcu_workqueue, futex),
diff --git a/src/workqueue.h b/src/workqueue.h
index 52b6973..63931a4 100644
--- a/src/workqueue.h
+++ b/src/workqueue.h
@@ -28,6 +28,7 @@
 #include <pthread.h>
 
 #include <urcu/wfcqueue.h>
+#include "workqueue_tp.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/workqueue_tp.c b/src/workqueue_tp.c
new file mode 100644
index 0000000..a5f9586
--- /dev/null
+++ b/src/workqueue_tp.c
@@ -0,0 +1,4 @@
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+
+#include "workqueue_tp.h"
diff --git a/src/workqueue_tp.h b/src/workqueue_tp.h
new file mode 100644
index 0000000..e2bce31
--- /dev/null
+++ b/src/workqueue_tp.h
@@ -0,0 +1,82 @@
+#undef	LTTNG_UST_TRACEPOINT_PROVIDER
+#define	LTTNG_UST_TRACEPOINT_PROVIVER workqueue
+
+#undef	LTTNG_UST_TRACEPOINT_INCLUDE
+#define	LTTNG_UST_TRACEPOINT_INCLUDE "./workqueue_tp.h"
+
+#if !defined(_WORKQUEUE_TP_H) || defined(LTTNG_USG_TRACEPOINT_HEADER_MULTI_READ)
+#define	_WORKQUEUE_TP_H
+
+#include <lttng/tracepoint.h>
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_err,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg,
+		int, err_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+		lttng_ust_field_integer(int, err_field, err_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_return,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg,
+		int, ret_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+		lttng_ust_field_integer(int, ret_field, ret_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_again,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wake_up_success,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wake_up_fail,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+	)
+)
+
+#endif /* _WORKQUEUE_TP_H */
+#include <lttng/tracepoint-event.h>
-- 
1.8.3.1

Is "perf probe" accurate in tracing the futex event here?  Maybe i can try that.

B.R
Minlan
-------------- next part --------------
A non-text attachment was scrubbed...
Name: log.tar.bz2
Type: application/x-bzip2
Size: 153355 bytes
Desc: not available
URL: <https://lists.lttng.org/pipermail/lttng-dev/attachments/20220620/7a71fd6b/attachment-0001.bin>


More information about the lttng-dev mailing list