[lttng-dev] [PATCH lttng-tools 2/2] Fix: join consumer timer thread

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Fri Jun 16 21:23:13 UTC 2017


Detaching the timer thread has the unfortunate side-effect of letting
the health management data structures be freed by main() while the timer
thread may still be using them (if, e.g., main() exits quickly).

Overcome this situation by tearing down and joining the timer thread.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
---
 src/bin/lttng-consumerd/lttng-consumerd.c | 68 +++++++++++++++++++------------
 src/common/consumer/consumer-timer.c      | 13 ++++--
 src/common/consumer/consumer-timer.h      |  1 +
 3 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/src/bin/lttng-consumerd/lttng-consumerd.c b/src/bin/lttng-consumerd/lttng-consumerd.c
index 9fb4747..2a8b092 100644
--- a/src/bin/lttng-consumerd/lttng-consumerd.c
+++ b/src/bin/lttng-consumerd/lttng-consumerd.c
@@ -56,6 +56,7 @@
 
 static pthread_t channel_thread, data_thread, metadata_thread,
 		sessiond_thread, metadata_timer_thread, health_thread;
+static bool metadata_timer_thread_online;
 
 /* to count the number of times the user pressed ctrl+c */
 static int sigintcount = 0;
@@ -506,6 +507,20 @@ int main(int argc, char **argv)
 	}
 	cmm_smp_mb();	/* Read ready before following operations */
 
+	/*
+	 * Create the thread to manage the UST metadata periodic timer and
+	 * live timer.
+	 */
+	ret = pthread_create(&metadata_timer_thread, default_pthread_attr(),
+			consumer_timer_thread, (void *) ctx);
+	if (ret) {
+		errno = ret;
+		PERROR("pthread_create");
+		retval = -1;
+		goto exit_metadata_timer_thread;
+	}
+	metadata_timer_thread_online = true;
+
 	/* Create thread to manage channels */
 	ret = pthread_create(&channel_thread, default_pthread_attr(),
 			consumer_thread_channel_poll,
@@ -549,34 +564,12 @@ int main(int argc, char **argv)
 		goto exit_sessiond_thread;
 	}
 
-	/*
-	 * Create the thread to manage the UST metadata periodic timer and
-	 * live timer.
-	 */
-	ret = pthread_create(&metadata_timer_thread, default_pthread_attr(),
-			consumer_timer_thread, (void *) ctx);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_create");
-		retval = -1;
-		goto exit_metadata_timer_thread;
-	}
-
-	ret = pthread_detach(metadata_timer_thread);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_detach");
-		retval = -1;
-		goto exit_metadata_timer_detach;
-	}
 
 	/*
 	 * This is where we start awaiting program completion (e.g. through
 	 * signal that asks threads to teardown.
 	 */
 
-exit_metadata_timer_detach:
-exit_metadata_timer_thread:
 	ret = pthread_join(sessiond_thread, &status);
 	if (ret) {
 		errno = ret;
@@ -617,6 +610,8 @@ exit_metadata_thread:
 	}
 exit_channel_thread:
 
+exit_metadata_timer_thread:
+
 	ret = pthread_join(health_thread, &status);
 	if (ret) {
 		errno = ret;
@@ -629,17 +624,38 @@ exit_health_thread:
 exit_health_pipe:
 
 exit_init_data:
-	tmp_ctx = ctx;
-	ctx = NULL;
-	cmm_barrier();	/* Clear ctx for signal handler. */
 	/*
 	 * Wait for all pending call_rcu work to complete before tearing
 	 * down data structures. call_rcu worker may be trying to
 	 * perform lookups in those structures.
 	 */
 	rcu_barrier();
-	lttng_consumer_destroy(tmp_ctx);
 	lttng_consumer_cleanup();
+	/*
+	 * Tearing down the metadata timer thread in a
+	 * non-fully-symmetric fashion compared to its creation in case
+	 * lttng_consumer_cleanup() ends up tearing down timers (which
+	 * requires the timer thread to be alive).
+	 */
+	if (metadata_timer_thread_online) {
+		/*
+		 * Ensure the metadata timer thread exits only after all other
+		 * threads are gone, because it is required to perform timer
+		 * teardown synchronization.
+		 */
+		kill(getpid(), LTTNG_CONSUMER_SIG_EXIT);
+		ret = pthread_join(metadata_timer_thread, &status);
+		if (ret) {
+			errno = ret;
+			PERROR("pthread_join metadata_timer_thread");
+			retval = -1;
+		}
+		metadata_timer_thread_online = false;
+	}
+	tmp_ctx = ctx;
+	ctx = NULL;
+	cmm_barrier();	/* Clear ctx for signal handler. */
+	lttng_consumer_destroy(tmp_ctx);
 
 	if (health_consumerd) {
 		health_app_destroy(health_consumerd);
diff --git a/src/common/consumer/consumer-timer.c b/src/common/consumer/consumer-timer.c
index 0adc572..e42940e 100644
--- a/src/common/consumer/consumer-timer.c
+++ b/src/common/consumer/consumer-timer.c
@@ -72,6 +72,10 @@ static void setmask(sigset_t *mask)
 	if (ret) {
 		PERROR("sigaddset monitor");
 	}
+	ret = sigaddset(mask, LTTNG_CONSUMER_SIG_EXIT);
+	if (ret) {
+		PERROR("sigaddset exit");
+	}
 }
 
 static int channel_monitor_pipe = -1;
@@ -782,7 +786,7 @@ end:
 /*
  * This thread is the sighandler for signals LTTNG_CONSUMER_SIG_SWITCH,
  * LTTNG_CONSUMER_SIG_TEARDOWN, LTTNG_CONSUMER_SIG_LIVE, and
- * LTTNG_CONSUMER_SIG_MONITOR.
+ * LTTNG_CONSUMER_SIG_MONITOR, LTTNG_CONSUMER_SIG_EXIT.
  */
 void *consumer_timer_thread(void *data)
 {
@@ -836,6 +840,9 @@ void *consumer_timer_thread(void *data)
 
 			channel = info.si_value.sival_ptr;
 			monitor_timer(ctx, channel);
+		} else if (signr == LTTNG_CONSUMER_SIG_EXIT) {
+			assert(CMM_LOAD_SHARED(consumer_quit));
+			goto end;
 		} else {
 			ERR("Unexpected signal %d\n", info.si_signo);
 		}
@@ -844,10 +851,8 @@ void *consumer_timer_thread(void *data)
 error_testpoint:
 	/* Only reached in testpoint error */
 	health_error();
+end:
 	health_unregister(health_consumerd);
-
 	rcu_unregister_thread();
-
-	/* Never return */
 	return NULL;
 }
diff --git a/src/common/consumer/consumer-timer.h b/src/common/consumer/consumer-timer.h
index 851a172..1b5dd41 100644
--- a/src/common/consumer/consumer-timer.h
+++ b/src/common/consumer/consumer-timer.h
@@ -28,6 +28,7 @@
 #define LTTNG_CONSUMER_SIG_TEARDOWN	SIGRTMIN + 11
 #define LTTNG_CONSUMER_SIG_LIVE		SIGRTMIN + 12
 #define LTTNG_CONSUMER_SIG_MONITOR	SIGRTMIN + 13
+#define LTTNG_CONSUMER_SIG_EXIT		SIGRTMIN + 14
 
 #define CLOCKID CLOCK_MONOTONIC
 
-- 
2.1.4



More information about the lttng-dev mailing list