[lttng-dev] [PATCH lttng-tools] Fix: notification thread: RCU-safe reclaim of hash table nodes
Jérémie Galarneau
jeremie.galarneau at efficios.com
Tue Jan 22 15:35:53 EST 2019
Merged in master and stable-2.11.
Thanks,
Jérémie
On Wed, 12 Dec 2018 at 12:16, Mathieu Desnoyers
<mathieu.desnoyers at efficios.com> wrote:
>
> Nodes that are put in a rculfhash hash table created with the
> "auto resize" flag need to beware that a worker thread can access the
> hash table nodes as a RCU reader concurrently, and that this worker
> thread can modify the hash table content, effectively adding and
> removing "bucket" nodes, and changing the size of the hash table
> index.
>
> Therefore, even though only a single thread reads and updates the hash
> table, a grace period is needed before reclaiming the memory holding
> the rculfhash nodes.
>
> Moreover, handle_notification_thread_command_add_channel() misses a
> RCU read-side lock around iteration on the triggers hash table. Failure
> to hold this read-side lock could cause segmentation faults when
> accessing hash table objects if a hash table resize is done by the
> worker thread in parallel with iteration over the hash table.
>
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
> ---
> .../lttng-sessiond/notification-thread-events.c | 73 +++++++++++++++++++---
> .../lttng-sessiond/notification-thread-internal.h | 4 ++
> 2 files changed, 70 insertions(+), 7 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/notification-thread-events.c b/src/bin/lttng-sessiond/notification-thread-events.c
> index 1233bf30..ba4e5a05 100644
> --- a/src/bin/lttng-sessiond/notification-thread-events.c
> +++ b/src/bin/lttng-sessiond/notification-thread-events.c
> @@ -70,6 +70,8 @@ struct lttng_channel_trigger_list {
> struct cds_list_head list;
> /* Node in the channel_triggers_ht */
> struct cds_lfht_node channel_triggers_ht_node;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> /*
> @@ -116,6 +118,8 @@ struct lttng_session_trigger_list {
> struct lttng_trigger_ht_element {
> struct lttng_trigger *trigger;
> struct cds_lfht_node node;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> struct lttng_condition_list_element {
> @@ -132,6 +136,8 @@ struct notification_client_list {
> const struct lttng_trigger *trigger;
> struct cds_list_head list;
> struct cds_lfht_node notification_trigger_ht_node;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> struct notification_client {
> @@ -198,6 +204,8 @@ struct notification_client {
> struct lttng_dynamic_buffer buffer;
> } outbound;
> } communication;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> struct channel_state_sample {
> @@ -206,6 +214,8 @@ struct channel_state_sample {
> uint64_t highest_usage;
> uint64_t lowest_usage;
> uint64_t channel_total_consumed;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> static unsigned long hash_channel_key(struct channel_key *key);
> @@ -501,6 +511,12 @@ enum lttng_object_type get_condition_binding_object(
> }
>
> static
> +void free_channel_info_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct channel_info, rcu_node));
> +}
> +
> +static
> void channel_info_destroy(struct channel_info *channel_info)
> {
> if (!channel_info) {
> @@ -515,7 +531,13 @@ void channel_info_destroy(struct channel_info *channel_info)
> if (channel_info->name) {
> free(channel_info->name);
> }
> - free(channel_info);
> + call_rcu(&channel_info->rcu_node, free_channel_info_rcu);
> +}
> +
> +static
> +void free_session_info_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct session_info, rcu_node));
> }
>
> /* Don't call directly, use the ref-counting mechanism. */
> @@ -539,7 +561,7 @@ void session_info_destroy(void *_data)
> &session_info->sessions_ht_node);
> rcu_read_unlock();
> free(session_info->name);
> - free(session_info);
> + call_rcu(&session_info->rcu_node, free_session_info_rcu);
> }
>
> static
> @@ -1099,6 +1121,12 @@ end:
> }
>
> static
> +void free_notification_client_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct notification_client, rcu_node));
> +}
> +
> +static
> void notification_client_destroy(struct notification_client *client,
> struct notification_thread_state *state)
> {
> @@ -1120,7 +1148,7 @@ void notification_client_destroy(struct notification_client *client,
> }
> lttng_dynamic_buffer_reset(&client->communication.inbound.buffer);
> lttng_dynamic_buffer_reset(&client->communication.outbound.buffer);
> - free(client);
> + call_rcu(&client->rcu_node, free_notification_client_rcu);
> }
>
> /*
> @@ -1544,6 +1572,7 @@ int handle_notification_thread_command_add_channel(
> goto error;
> }
>
> + rcu_read_lock();
> /* Build a list of all triggers applying to the new channel. */
> cds_lfht_for_each_entry(state->triggers_ht, &iter, trigger_ht_element,
> node) {
> @@ -1556,6 +1585,7 @@ int handle_notification_thread_command_add_channel(
>
> new_element = zmalloc(sizeof(*new_element));
> if (!new_element) {
> + rcu_read_unlock();
> goto error;
> }
> CDS_INIT_LIST_HEAD(&new_element->node);
> @@ -1563,6 +1593,7 @@ int handle_notification_thread_command_add_channel(
> cds_list_add(&new_element->node, &trigger_list);
> trigger_count++;
> }
> + rcu_read_unlock();
>
> DBG("[notification-thread] Found %i triggers that apply to newly added channel",
> trigger_count);
> @@ -1598,6 +1629,20 @@ error:
> }
>
> static
> +void free_channel_trigger_list_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct lttng_channel_trigger_list,
> + rcu_node));
> +}
> +
> +static
> +void free_channel_state_sample_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct channel_state_sample,
> + rcu_node));
> +}
> +
> +static
> int handle_notification_thread_command_remove_channel(
> struct notification_thread_state *state,
> uint64_t channel_key, enum lttng_domain_type domain,
> @@ -1639,7 +1684,7 @@ int handle_notification_thread_command_remove_channel(
> free(trigger_list_element);
> }
> cds_lfht_del(state->channel_triggers_ht, node);
> - free(trigger_list);
> + call_rcu(&trigger_list->rcu_node, free_channel_trigger_list_rcu);
>
> /* Free sampled channel state. */
> cds_lfht_lookup(state->channel_state_ht,
> @@ -1658,7 +1703,7 @@ int handle_notification_thread_command_remove_channel(
> channel_state_ht_node);
>
> cds_lfht_del(state->channel_state_ht, node);
> - free(sample);
> + call_rcu(&sample->rcu_node, free_channel_state_sample_rcu);
> }
>
> /* Remove the channel from the channels_ht and free it. */
> @@ -2118,6 +2163,20 @@ error:
> }
>
> static
> +void free_notification_client_list_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct notification_client_list,
> + rcu_node));
> +}
> +
> +static
> +void free_lttng_trigger_ht_element_rcu(struct rcu_head *node)
> +{
> + free(caa_container_of(node, struct lttng_trigger_ht_element,
> + rcu_node));
> +}
> +
> +static
> int handle_notification_thread_command_unregister_trigger(
> struct notification_thread_state *state,
> struct lttng_trigger *trigger,
> @@ -2186,7 +2245,7 @@ int handle_notification_thread_command_unregister_trigger(
> }
> cds_lfht_del(state->notification_trigger_clients_ht,
> &client_list->notification_trigger_ht_node);
> - free(client_list);
> + call_rcu(&client_list->rcu_node, free_notification_client_list_rcu);
>
> /* Remove trigger from triggers_ht. */
> trigger_ht_element = caa_container_of(triggers_ht_node,
> @@ -2198,7 +2257,7 @@ int handle_notification_thread_command_unregister_trigger(
> action = lttng_trigger_get_action(trigger_ht_element->trigger);
> lttng_action_destroy(action);
> lttng_trigger_destroy(trigger_ht_element->trigger);
> - free(trigger_ht_element);
> + call_rcu(&trigger_ht_element->rcu_node, free_lttng_trigger_ht_element_rcu);
> end:
> rcu_read_unlock();
> if (_cmd_reply) {
> diff --git a/src/bin/lttng-sessiond/notification-thread-internal.h b/src/bin/lttng-sessiond/notification-thread-internal.h
> index 397159da..e217995f 100644
> --- a/src/bin/lttng-sessiond/notification-thread-internal.h
> +++ b/src/bin/lttng-sessiond/notification-thread-internal.h
> @@ -53,6 +53,8 @@ struct session_info {
> /* Identifier of the currently ongoing rotation. */
> uint64_t id;
> } rotation;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> struct channel_info {
> @@ -68,6 +70,8 @@ struct channel_info {
> struct cds_lfht_node channels_ht_node;
> /* Node in the session_info's channels_ht. */
> struct cds_lfht_node session_info_channels_ht_node;
> + /* call_rcu delayed reclaim. */
> + struct rcu_head rcu_node;
> };
>
> #endif /* NOTIFICATION_THREAD_INTERNAL_H */
> --
> 2.11.0
>
--
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
More information about the lttng-dev
mailing list