[lttng-dev] [PATCH lttng-tools v2] Fix: race between kconsumerd and sessiond on tear down

Jonathan Rajotte jonathan.rajotte-julien at efficios.com
Thu Sep 3 17:48:36 EDT 2015


v2: minimize indentation by using return on condition.

Kconsumerd and sessiond both have reference on lttng-module. This can lead to a race
on modprobe_remove_lttng_all which might fail to unload modules due to
certain modules not having a ref count equal to zero at the time.

waitpid is used to force a synchronization on the child (kconsumer) termination.

This also have been applied to ust consumers for the sake of consistency.

Fixes: #878

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien at efficios.com>
---
 src/bin/lttng-sessiond/main.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index d284deb..66a6149 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -587,6 +587,34 @@ static int generate_lock_file_path(char *path, size_t len)
 }
 
 /*
+ * Wait on consumer process termination.
+ *
+ * Need to be called with the consumer data lock held or from a context
+ * ensuring no concurrent access to data (e.g: cleanup).
+ */
+static void wait_consumer(struct consumer_data *consumer_data)
+{
+	pid_t ret;
+	int status;
+
+	if (consumer_data->pid <= 0) {
+		return;
+	}
+
+	DBG("Waiting for complete teardown of consumerd (PID: %d)",
+			consumer_data->pid);
+	ret = waitpid(consumer_data->pid, &status, 0);
+	if (ret == -1) {
+		PERROR("consumerd waitpid pid: %d", consumer_data->pid)
+	}
+	if (!WIFEXITED(status)) {
+		ERR("consumerd termination with error: %d",
+				WEXITSTATUS(ret));
+	}
+	consumer_data->pid = 0;
+}
+
+/*
  * Cleanup the session daemon's data structures.
  */
 static void sessiond_cleanup(void)
@@ -680,6 +708,11 @@ static void sessiond_cleanup(void)
 		}
 	}
 
+	wait_consumer(&kconsumer_data);
+	wait_consumer(&ustconsumer64_data);
+	wait_consumer(&ustconsumer32_data);
+
+
 	DBG("Cleaning up all agent apps");
 	agent_app_ht_clean();
 
@@ -1497,7 +1530,6 @@ error:
 
 	unlink(consumer_data->err_unix_sock_path);
 	unlink(consumer_data->cmd_unix_sock_path);
-	consumer_data->pid = 0;
 	pthread_mutex_unlock(&consumer_data->lock);
 
 	/* Cleanup metadata socket mutex. */
-- 
2.1.4




More information about the lttng-dev mailing list