[lttng-dev] urcu workqueue thread uses 99% of cpu while workqueue is empty
Minlan Wang
wangminlan at szsandstone.com
Mon Jun 20 23:52:06 EDT 2022
Hi, Mathieu,
On Fri, Jun 17, 2022 at 09:37:23AM -0400, Mathieu Desnoyers wrote:
> Can you try the following patch on top of v0.12.1 + your debug patch ?
>
> diff --git a/src/workqueue.c b/src/workqueue.c
> index eed1e21e..19fc89c8 100644
> --- a/src/workqueue.c
> +++ b/src/workqueue.c
> @@ -142,6 +142,7 @@ static void futex_wait(int32_t *futex)
>
> return;
> }
> +wait:
> while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
> err = errno;
> fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
> @@ -161,7 +162,16 @@ static void futex_wait(int32_t *futex)
>
> fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
> pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
> -
> + /*
> + * Prior queued wakeups can cause futex wait to return even
> + * though the futex value is still -1. If this happens, wait
> + * again.
> + */
> + if (uatomic_read(futex) == -1) {
> + fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
> + pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
> + goto wait;
> + }
> }
>
> static void futex_wake_up(int32_t *futex)
>
I tried this patch on top of my debug patch, the issue disappeared.
The log shows that "futex_wait futex == -1, wait again" did happen several times.
I put the complete log in the attachment.
>
> We should not rely too much on the ordering of fprintf stderr output between threads,
> because those can be interleaved in any order (each fprintf to stderr is unbuffered).
> If we want more precision in the order of events I would recommend using LTTng-UST for
> fine-grained user-space tracing. This might explain why the futex has an unexpected value
> at line 2099.
I tried this patch on top of previous 2 patches to use lttng_ust_tracepoint in
workqueue.c, but always failed in compile example code with this error:
---
make[5]: Entering directory `/home/mlwang/debug/userspace-rcu/doc/examples/rculfqueue'
CC cds_lfq_enqueue.o
CCLD cds_lfq_enqueue
../../../src/.libs//liburcu-cds.so: undefined reference to `lttng_ust_tracepoint_provider_workqueue'
---
the patch is as following:
>From 2fa8082914da55b6e3815dffd2f749798bf23735 Mon Sep 17 00:00:00 2001
From: root <root at localhost.localdomain>
Date: Tue, 21 Jun 2022 11:42:56 +0800
Subject: [PATCH] add lttng_ust_tracepoint in workqueue.c
---
src/Makefile.am | 4 +--
src/liburcu-cds.pc.in | 2 +-
src/workqueue.c | 5 ++++
src/workqueue.h | 1 +
src/workqueue_tp.c | 4 +++
src/workqueue_tp.h | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 95 insertions(+), 3 deletions(-)
create mode 100644 src/workqueue_tp.c
create mode 100644 src/workqueue_tp.h
diff --git a/src/Makefile.am b/src/Makefile.am
index 88ccc1f..5a18e31 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)
#Add the -version-info directly here since we are only building
# library that use the version-info
-AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION)
+AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION) -llttng-ust -ldl
if USE_CYGWIN
AM_LDFLAGS+=-no-undefined
endif
@@ -56,7 +56,7 @@ liburcu_bp_la_SOURCES = urcu-bp.c urcu-pointer.c $(COMPAT)
liburcu_bp_la_LIBADD = liburcu-common.la
liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c lfstack.c \
- workqueue.c workqueue.h $(RCULFHASH) $(COMPAT)
+ workqueue.c workqueue.h workqueue_tp.c workqueue_tp.h $(RCULFHASH) $(COMPAT)
liburcu_cds_la_LIBADD = liburcu-common.la
pkgconfigdir = $(libdir)/pkgconfig
diff --git a/src/liburcu-cds.pc.in b/src/liburcu-cds.pc.in
index e3d13af..f4e5e3e 100644
--- a/src/liburcu-cds.pc.in
+++ b/src/liburcu-cds.pc.in
@@ -7,5 +7,5 @@ Name: Userspace RCU Concurrent Data Structures
Description: Data structures leveraging RCU and atomic operations to provide efficient concurrency-aware storage
Version: @PACKAGE_VERSION@
Requires:
-Libs: -L${libdir} -lurcu-cds
+Libs: -L${libdir} -lurcu-cds -llttng-ust -ldl
Cflags: -I${includedir}
diff --git a/src/workqueue.c b/src/workqueue.c
index 19fc89c..c29c070 100644
--- a/src/workqueue.c
+++ b/src/workqueue.c
@@ -145,6 +145,7 @@ static void futex_wait(int32_t *futex)
wait:
while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
err = errno;
+ lttng_ust_tracepoint(workqueue, futex_wait_err, (long)futex, *futex, err);
fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, err);
switch (err) {
@@ -160,6 +161,7 @@ wait:
}
}
+ lttng_ust_tracepoint(workqueue, futex_wait_return, (long)futex, *futex, ret);
fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
/*
@@ -168,6 +170,7 @@ wait:
* again.
*/
if (uatomic_read(futex) == -1) {
+ lttng_ust_tracepoint(workqueue, futex_wait_again, (long)futex, *futex);
fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
goto wait;
@@ -184,11 +187,13 @@ static void futex_wake_up(int32_t *futex)
if (caa_unlikely(old == -1)) {
old = uatomic_xchg(futex, 0);
if (old == -1) {
+ lttng_ust_tracepoint(workqueue, futex_wake_up_success, (long)futex, old);
fprintf(stderr, "%lu: wq %p, wakeup succeed: old %d\n",
pthread_self(),
caa_container_of(futex, struct urcu_workqueue, futex),
old);
} else {
+ lttng_ust_tracepoint(workqueue, futex_wake_up_fail, (long)futex, old);
fprintf(stderr, "%lu: wq %p, wakeup failed: old %d\n",
pthread_self(),
caa_container_of(futex, struct urcu_workqueue, futex),
diff --git a/src/workqueue.h b/src/workqueue.h
index 52b6973..63931a4 100644
--- a/src/workqueue.h
+++ b/src/workqueue.h
@@ -28,6 +28,7 @@
#include <pthread.h>
#include <urcu/wfcqueue.h>
+#include "workqueue_tp.h"
#ifdef __cplusplus
extern "C" {
diff --git a/src/workqueue_tp.c b/src/workqueue_tp.c
new file mode 100644
index 0000000..a5f9586
--- /dev/null
+++ b/src/workqueue_tp.c
@@ -0,0 +1,4 @@
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+
+#include "workqueue_tp.h"
diff --git a/src/workqueue_tp.h b/src/workqueue_tp.h
new file mode 100644
index 0000000..e2bce31
--- /dev/null
+++ b/src/workqueue_tp.h
@@ -0,0 +1,82 @@
+#undef LTTNG_UST_TRACEPOINT_PROVIDER
+#define LTTNG_UST_TRACEPOINT_PROVIVER workqueue
+
+#undef LTTNG_UST_TRACEPOINT_INCLUDE
+#define LTTNG_UST_TRACEPOINT_INCLUDE "./workqueue_tp.h"
+
+#if !defined(_WORKQUEUE_TP_H) || defined(LTTNG_USG_TRACEPOINT_HEADER_MULTI_READ)
+#define _WORKQUEUE_TP_H
+
+#include <lttng/tracepoint.h>
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ workqueue,
+ futex_wait_err,
+ LTTNG_UST_TP_ARGS(
+ long, futex_p_arg,
+ int, futex_arg,
+ int, err_arg
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+ lttng_ust_field_integer(int, futex_field, futex_arg)
+ lttng_ust_field_integer(int, err_field, err_arg)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ workqueue,
+ futex_wait_return,
+ LTTNG_UST_TP_ARGS(
+ long, futex_p_arg,
+ int, futex_arg,
+ int, ret_arg
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+ lttng_ust_field_integer(int, futex_field, futex_arg)
+ lttng_ust_field_integer(int, ret_field, ret_arg)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ workqueue,
+ futex_wait_again,
+ LTTNG_UST_TP_ARGS(
+ long, futex_p_arg,
+ int, futex_arg
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+ lttng_ust_field_integer(int, futex_field, futex_arg)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ workqueue,
+ futex_wake_up_success,
+ LTTNG_UST_TP_ARGS(
+ long, futex_p_arg,
+ int, futex_arg
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+ lttng_ust_field_integer(int, futex_field, futex_arg)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ workqueue,
+ futex_wake_up_fail,
+ LTTNG_UST_TP_ARGS(
+ long, futex_p_arg,
+ int, futex_arg
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+ lttng_ust_field_integer(int, futex_field, futex_arg)
+ )
+)
+
+#endif /* _WORKQUEUE_TP_H */
+#include <lttng/tracepoint-event.h>
--
1.8.3.1
Is "perf probe" accurate in tracing the futex event here? Maybe i can try that.
B.R
Minlan
-------------- next part --------------
A non-text attachment was scrubbed...
Name: log.tar.bz2
Type: application/x-bzip2
Size: 153355 bytes
Desc: not available
URL: <https://lists.lttng.org/pipermail/lttng-dev/attachments/20220620/7a71fd6b/attachment-0001.bin>
More information about the lttng-dev
mailing list