[ltt-dev] [PATCH 06/10 round10] add LFHT_MEMORY_CHUNK low level memory management
Mathieu Desnoyers
mathieu.desnoyers at efficios.com
Tue Nov 22 05:20:37 EST 2011
* Lai Jiangshan (laijs at cn.fujitsu.com) wrote:
> The old memory mangement is named as LFHT_MEMORY_ORDER.
>
> Signed-off-by: Lai Jiangshan <laijs at cn.fujitsu.com>
> ---
> rculfhash.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 files changed, 111 insertions(+), 5 deletions(-)
>
> diff --git a/rculfhash.c b/rculfhash.c
> index b7be893..ae1177f 100644
> --- a/rculfhash.c
> +++ b/rculfhash.c
> @@ -172,6 +172,10 @@
> #define dbg_printf(fmt, args...)
> #endif
>
> +#if !defined(LFHT_MEMORY_ORDER) && !defined(LFHT_MEMORY_CHUNK)
> +#define LFHT_MEMORY_ORDER
> +#endif
> +
> /*
> * Split-counters lazily update the global counter each 1024
> * addition/removal. It automatically keeps track of resize required.
> @@ -195,6 +199,8 @@
> #define MAX_TABLE_ORDER 64
> #endif
>
> +#define MAX_CHUNK_TABLE (1UL << 10)
> +
> /*
> * Minimum number of bucket nodes to touch per thread to parallelize grow/shrink.
> */
> @@ -247,6 +253,7 @@ struct rcu_table {
> unsigned long resize_target;
> int resize_initiated;
>
> +#ifdef LFHT_MEMORY_ORDER
> /*
> * Contains the per order-index-level bucket node table. The size
> * of each bucket node table is half the number of hashes contained
> @@ -255,6 +262,7 @@ struct rcu_table {
> * levels to improve cache locality for small index orders.
> */
> struct cds_lfht_node *tbl[MAX_TABLE_ORDER];
> +#endif
> };
>
> /*
> @@ -289,6 +297,16 @@ struct cds_lfht {
> pthread_attr_t *resize_attr; /* Resize threads attributes */
> long count; /* global approximate item count */
> struct ht_items_count *split_count; /* split item count */
> +
> +#ifdef LFHT_MEMORY_CHUNK
> + /*
> + * Contains the bucket node chunks. The size of each bucket node
> + * chunk is ->min_alloc_size(avoid to alloc chunks with different
> + * size). chunks improve cache locality for small index orders
> + * and improve lookup_bucket() for the archs without hardware fls().
> + */
> + struct cds_lfht_node *tbl[0];
> +#endif
> };
>
> /*
> @@ -753,6 +771,93 @@ unsigned long _uatomic_xchg_monotonic_increase(unsigned long *ptr,
> return old2;
> }
>
> +#ifdef LFHT_MEMORY_CHUNK
> +static
> +struct cds_lfht *_cds_lfht_alloc(unsigned long min_alloc_size,
> + unsigned long max_size)
> +{
> + struct cds_lfht *ht;
> +
> + min_alloc_size = max(min_alloc_size, max_size / MAX_CHUNK_TABLE);
> + ht = calloc(1, sizeof(struct cds_lfht) +
> + sizeof(ht->tbl[0]) * (max_size / min_alloc_size));
Hrm, this first level of indirection quickly becomes much larger than
the order-based table. Have you measured gains with this approach ?
Can you detail the use-case for this approach ?
Have you measured the overhead of software fls() compared to the
overhead of increases cache footprint of the larger table for the
chunk-based approach ?
Thanks,
Mathieu
> + assert(ht);
> +
> + ht->min_alloc_size = min_alloc_size;
> + ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
> + ht->max_size = max_size;
> +
> + return ht;
> +}
> +
> +static
> +void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
> +{
> + if (order == 0) {
> + ht->tbl[0] = calloc(ht->min_alloc_size,
> + sizeof(struct cds_lfht_node));
> + assert(ht->tbl[0]);
> + } else if (order > ht->min_alloc_order) {
> + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_order);
> +
> + for (i = len; i < 2 * len; i++) {
> + ht->tbl[i] = calloc(ht->min_alloc_size,
> + sizeof(struct cds_lfht_node));
> + assert(ht->tbl[i]);
> + }
> + }
> + /* Nothing to do for 0 < order && order <= ht->min_alloc_order */
> +}
> +
> +/*
> + * cds_lfht_free_bucket_table() should be called with decreasing order.
> + * When cds_lfht_free_bucket_table(0) is called, it means the whole
> + * lfht is destroyed.
> + */
> +static
> +void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order)
> +{
> + if (order == 0)
> + poison_free(ht->tbl[0]);
> + else if (order > ht->min_alloc_order) {
> + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_order);
> +
> + for (i = len; i < 2 * len; i++)
> + poison_free(ht->tbl[i]);
> + }
> + /* Nothing to do for 0 < order && order <= ht->min_alloc_order */
> +}
> +
> +static inline
> +struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
> +{
> + unsigned long chunk, offset;
> +
> + if (__builtin_constant_p(index) && index == 0)
> + return &ht->tbl[0][0];
> +
> + chunk = index >> ht->min_alloc_order;
> + offset = index & (ht->min_alloc_size - 1);
> + return &ht->tbl[chunk][offset];
> +}
> +#else
> +/* LFHT_MEMORY_ORDER */
> +static
> +struct cds_lfht *_cds_lfht_alloc(unsigned long min_alloc_size,
> + unsigned long max_size)
> +{
> + struct cds_lfht *ht;
> +
> + ht = calloc(1, sizeof(struct cds_lfht));
> + assert(ht);
> +
> + ht->min_alloc_size = min_alloc_size;
> + ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
> + ht->max_size = max_size;
> +
> + return ht;
> +}
> +
> static
> void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
> {
> @@ -803,6 +908,7 @@ struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
> index, order, index & ((1UL << (order - 1)) - 1));
> return &ht->t.tbl[order][index & ((1UL << (order - 1)) - 1)];
> }
> +#endif
>
> static inline
> struct cds_lfht_node *lookup_bucket(struct cds_lfht *ht, unsigned long size,
> @@ -1376,8 +1482,11 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
> if (!init_size || (init_size & (init_size - 1)))
> return NULL;
>
> +#ifdef LFHT_MEMORY_ORDER
> + /* max_size == 0 for LFHT_MEMORY_ORDER means infinite max_size. */
> if (!max_size)
> max_size = 1UL << (MAX_TABLE_ORDER - 1);
> +#endif
>
> /* max_size must be power of two */
> if (!max_size || (max_size & (max_size - 1)))
> @@ -1387,8 +1496,8 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
> init_size = max(init_size, MIN_TABLE_SIZE);
> max_size = max(max_size, min_alloc_size);
> init_size = min(init_size, max_size);
> - ht = calloc(1, sizeof(struct cds_lfht));
> - assert(ht);
> +
> + ht = _cds_lfht_alloc(min_alloc_size, max_size);
> ht->flags = flags;
> ht->cds_lfht_call_rcu = cds_lfht_call_rcu;
> ht->cds_lfht_synchronize_rcu = cds_lfht_synchronize_rcu;
> @@ -1404,9 +1513,6 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
> pthread_mutex_init(&ht->resize_mutex, NULL);
> order = get_count_order_ulong(init_size);
> ht->t.resize_target = 1UL << order;
> - ht->min_alloc_size = min_alloc_size;
> - ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
> - ht->max_size = max_size;
> cds_lfht_create_bucket(ht, 1UL << order);
> ht->t.size = 1UL << order;
> return ht;
> --
> 1.7.4.4
>
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
More information about the lttng-dev
mailing list