[lttng-dev] [PATCH lttng-tools] Fix: notification thread: RCU-safe reclaim of hash table nodes

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Wed Dec 12 12:16:44 EST 2018


Nodes that are put in a rculfhash hash table created with the
"auto resize" flag need to beware that a worker thread can access the
hash table nodes as a RCU reader concurrently, and that this worker
thread can modify the hash table content, effectively adding and
removing "bucket" nodes, and changing the size of the hash table
index.

Therefore, even though only a single thread reads and updates the hash
table, a grace period is needed before reclaiming the memory holding
the rculfhash nodes.

Moreover, handle_notification_thread_command_add_channel() misses a
RCU read-side lock around iteration on the triggers hash table. Failure
to hold this read-side lock could cause segmentation faults when
accessing hash table objects if a hash table resize is done by the
worker thread in parallel with iteration over the hash table.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
---
 .../lttng-sessiond/notification-thread-events.c    | 73 +++++++++++++++++++---
 .../lttng-sessiond/notification-thread-internal.h  |  4 ++
 2 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/src/bin/lttng-sessiond/notification-thread-events.c b/src/bin/lttng-sessiond/notification-thread-events.c
index 1233bf30..ba4e5a05 100644
--- a/src/bin/lttng-sessiond/notification-thread-events.c
+++ b/src/bin/lttng-sessiond/notification-thread-events.c
@@ -70,6 +70,8 @@ struct lttng_channel_trigger_list {
 	struct cds_list_head list;
 	/* Node in the channel_triggers_ht */
 	struct cds_lfht_node channel_triggers_ht_node;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 /*
@@ -116,6 +118,8 @@ struct lttng_session_trigger_list {
 struct lttng_trigger_ht_element {
 	struct lttng_trigger *trigger;
 	struct cds_lfht_node node;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 struct lttng_condition_list_element {
@@ -132,6 +136,8 @@ struct notification_client_list {
 	const struct lttng_trigger *trigger;
 	struct cds_list_head list;
 	struct cds_lfht_node notification_trigger_ht_node;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 struct notification_client {
@@ -198,6 +204,8 @@ struct notification_client {
 			struct lttng_dynamic_buffer buffer;
 		} outbound;
 	} communication;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 struct channel_state_sample {
@@ -206,6 +214,8 @@ struct channel_state_sample {
 	uint64_t highest_usage;
 	uint64_t lowest_usage;
 	uint64_t channel_total_consumed;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 static unsigned long hash_channel_key(struct channel_key *key);
@@ -501,6 +511,12 @@ enum lttng_object_type get_condition_binding_object(
 }
 
 static
+void free_channel_info_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct channel_info, rcu_node));
+}
+
+static
 void channel_info_destroy(struct channel_info *channel_info)
 {
 	if (!channel_info) {
@@ -515,7 +531,13 @@ void channel_info_destroy(struct channel_info *channel_info)
 	if (channel_info->name) {
 		free(channel_info->name);
 	}
-	free(channel_info);
+	call_rcu(&channel_info->rcu_node, free_channel_info_rcu);
+}
+
+static
+void free_session_info_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct session_info, rcu_node));
 }
 
 /* Don't call directly, use the ref-counting mechanism. */
@@ -539,7 +561,7 @@ void session_info_destroy(void *_data)
 			&session_info->sessions_ht_node);
 	rcu_read_unlock();
 	free(session_info->name);
-	free(session_info);
+	call_rcu(&session_info->rcu_node, free_session_info_rcu);
 }
 
 static
@@ -1099,6 +1121,12 @@ end:
 }
 
 static
+void free_notification_client_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct notification_client, rcu_node));
+}
+
+static
 void notification_client_destroy(struct notification_client *client,
 		struct notification_thread_state *state)
 {
@@ -1120,7 +1148,7 @@ void notification_client_destroy(struct notification_client *client,
 	}
 	lttng_dynamic_buffer_reset(&client->communication.inbound.buffer);
 	lttng_dynamic_buffer_reset(&client->communication.outbound.buffer);
-	free(client);
+	call_rcu(&client->rcu_node, free_notification_client_rcu);
 }
 
 /*
@@ -1544,6 +1572,7 @@ int handle_notification_thread_command_add_channel(
 		goto error;
 	}
 
+	rcu_read_lock();
 	/* Build a list of all triggers applying to the new channel. */
 	cds_lfht_for_each_entry(state->triggers_ht, &iter, trigger_ht_element,
 			node) {
@@ -1556,6 +1585,7 @@ int handle_notification_thread_command_add_channel(
 
 		new_element = zmalloc(sizeof(*new_element));
 		if (!new_element) {
+			rcu_read_unlock();
 			goto error;
 		}
 		CDS_INIT_LIST_HEAD(&new_element->node);
@@ -1563,6 +1593,7 @@ int handle_notification_thread_command_add_channel(
 		cds_list_add(&new_element->node, &trigger_list);
 		trigger_count++;
 	}
+	rcu_read_unlock();
 
 	DBG("[notification-thread] Found %i triggers that apply to newly added channel",
 			trigger_count);
@@ -1598,6 +1629,20 @@ error:
 }
 
 static
+void free_channel_trigger_list_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct lttng_channel_trigger_list,
+			rcu_node));
+}
+
+static
+void free_channel_state_sample_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct channel_state_sample,
+			rcu_node));
+}
+
+static
 int handle_notification_thread_command_remove_channel(
 	struct notification_thread_state *state,
 	uint64_t channel_key, enum lttng_domain_type domain,
@@ -1639,7 +1684,7 @@ int handle_notification_thread_command_remove_channel(
 		free(trigger_list_element);
 	}
 	cds_lfht_del(state->channel_triggers_ht, node);
-	free(trigger_list);
+	call_rcu(&trigger_list->rcu_node, free_channel_trigger_list_rcu);
 
 	/* Free sampled channel state. */
 	cds_lfht_lookup(state->channel_state_ht,
@@ -1658,7 +1703,7 @@ int handle_notification_thread_command_remove_channel(
 				channel_state_ht_node);
 
 		cds_lfht_del(state->channel_state_ht, node);
-		free(sample);
+		call_rcu(&sample->rcu_node, free_channel_state_sample_rcu);
 	}
 
 	/* Remove the channel from the channels_ht and free it. */
@@ -2118,6 +2163,20 @@ error:
 }
 
 static
+void free_notification_client_list_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct notification_client_list,
+			rcu_node));
+}
+
+static
+void free_lttng_trigger_ht_element_rcu(struct rcu_head *node)
+{
+	free(caa_container_of(node, struct lttng_trigger_ht_element,
+			rcu_node));
+}
+
+static
 int handle_notification_thread_command_unregister_trigger(
 		struct notification_thread_state *state,
 		struct lttng_trigger *trigger,
@@ -2186,7 +2245,7 @@ int handle_notification_thread_command_unregister_trigger(
 	}
 	cds_lfht_del(state->notification_trigger_clients_ht,
 			&client_list->notification_trigger_ht_node);
-	free(client_list);
+	call_rcu(&client_list->rcu_node, free_notification_client_list_rcu);
 
 	/* Remove trigger from triggers_ht. */
 	trigger_ht_element = caa_container_of(triggers_ht_node,
@@ -2198,7 +2257,7 @@ int handle_notification_thread_command_unregister_trigger(
 	action = lttng_trigger_get_action(trigger_ht_element->trigger);
 	lttng_action_destroy(action);
 	lttng_trigger_destroy(trigger_ht_element->trigger);
-	free(trigger_ht_element);
+	call_rcu(&trigger_ht_element->rcu_node, free_lttng_trigger_ht_element_rcu);
 end:
 	rcu_read_unlock();
 	if (_cmd_reply) {
diff --git a/src/bin/lttng-sessiond/notification-thread-internal.h b/src/bin/lttng-sessiond/notification-thread-internal.h
index 397159da..e217995f 100644
--- a/src/bin/lttng-sessiond/notification-thread-internal.h
+++ b/src/bin/lttng-sessiond/notification-thread-internal.h
@@ -53,6 +53,8 @@ struct session_info {
 		/* Identifier of the currently ongoing rotation. */
 		uint64_t id;
 	} rotation;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 struct channel_info {
@@ -68,6 +70,8 @@ struct channel_info {
 	struct cds_lfht_node channels_ht_node;
 	/* Node in the session_info's channels_ht. */
 	struct cds_lfht_node session_info_channels_ht_node;
+	/* call_rcu delayed reclaim. */
+	struct rcu_head rcu_node;
 };
 
 #endif /* NOTIFICATION_THREAD_INTERNAL_H */
-- 
2.11.0



More information about the lttng-dev mailing list