[lttng-dev] [RFC v2] Add kmalloc failover to vmalloc
Mathieu Desnoyers
mathieu.desnoyers at efficios.com
Thu Sep 21 18:39:37 UTC 2017
----- On Sep 21, 2017, at 2:29 PM, Michael Jeanson mjeanson at efficios.com wrote:
> This patch is based on the kvmalloc helpers introduced in kernel 4.12.
>
> It will gracefully failover memory allocations of more than one page to
> vmalloc for systems under high memory pressure or fragmentation.
>
> I only used it in lttng-events.c as a POC, most allocations fit into a
> single page so I'm not sure how useful this actually is.
>
> Thoughts?
Just update the changelog to specify why we need this for struct lttng_session
(~32kB), and you can submit it for good with a "Fix: " tag.
Thanks,
Mathieu
>
> V2:
> - Add vmalloc_sync_all
> - Use only for lttng_session
>
> Signed-off-by: Michael Jeanson <mjeanson at efficios.com>
>
> See upstream commit:
> commit a7c3e901a46ff54c016d040847eda598a9e3e653
> Author: Michal Hocko <mhocko at suse.com>
> Date: Mon May 8 15:57:09 2017 -0700
>
> mm: introduce kv[mz]alloc helpers
>
> Patch series "kvmalloc", v5.
>
> There are many open coded kmalloc with vmalloc fallback instances in the
> tree. Most of them are not careful enough or simply do not care about
> the underlying semantic of the kmalloc/page allocator which means that
> a) some vmalloc fallbacks are basically unreachable because the kmalloc
> part will keep retrying until it succeeds b) the page allocator can
> invoke a really disruptive steps like the OOM killer to move forward
> which doesn't sound appropriate when we consider that the vmalloc
> fallback is available.
>
> As it can be seen implementing kvmalloc requires quite an intimate
> knowledge if the page allocator and the memory reclaim internals which
> strongly suggests that a helper should be implemented in the memory
> subsystem proper.
>
> Most callers, I could find, have been converted to use the helper
> instead. This is patch 6. There are some more relying on __GFP_REPEAT
> in the networking stack which I have converted as well and Eric Dumazet
> was not opposed [2] to convert them as well.
>
> [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org
> [2]
> http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com
>
> This patch (of 9):
>
> Using kmalloc with the vmalloc fallback for larger allocations is a
> common pattern in the kernel code. Yet we do not have any common helper
> for that and so users have invented their own helpers. Some of them are
> really creative when doing so. Let's just add kv[mz]alloc and make sure
> it is implemented properly. This implementation makes sure to not make
> a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also
> to not warn about allocation failures. This also rules out the OOM
> killer as the vmalloc is a more approapriate fallback than a disruptive
> user visible action.
>
> Signed-off-by: Michael Jeanson <mjeanson at efficios.com>
> ---
> lttng-events.c | 6 +--
> wrapper/vmalloc.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 112 insertions(+), 5 deletions(-)
>
> diff --git a/lttng-events.c b/lttng-events.c
> index 6aa994c..21c4113 100644
> --- a/lttng-events.c
> +++ b/lttng-events.c
> @@ -132,7 +132,7 @@ struct lttng_session *lttng_session_create(void)
> int i;
>
> mutex_lock(&sessions_mutex);
> - session = kzalloc(sizeof(struct lttng_session), GFP_KERNEL);
> + session = lttng_kvzalloc(sizeof(struct lttng_session), GFP_KERNEL);
> if (!session)
> goto err;
> INIT_LIST_HEAD(&session->chan);
> @@ -163,7 +163,7 @@ struct lttng_session *lttng_session_create(void)
> err_free_cache:
> kfree(metadata_cache);
> err_free_session:
> - kfree(session);
> + lttng_kvfree(session);
> err:
> mutex_unlock(&sessions_mutex);
> return NULL;
> @@ -212,7 +212,7 @@ void lttng_session_destroy(struct lttng_session *session)
> kref_put(&session->metadata_cache->refcount, metadata_cache_destroy);
> list_del(&session->list);
> mutex_unlock(&sessions_mutex);
> - kfree(session);
> + lttng_kvfree(session);
> }
>
> int lttng_session_statedump(struct lttng_session *session)
> diff --git a/wrapper/vmalloc.h b/wrapper/vmalloc.h
> index 2332439..861f442 100644
> --- a/wrapper/vmalloc.h
> +++ b/wrapper/vmalloc.h
> @@ -25,6 +25,9 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> */
>
> +#include <linux/version.h>
> +#include <linux/vmalloc.h>
> +
> #ifdef CONFIG_KALLSYMS
>
> #include <linux/kallsyms.h>
> @@ -51,8 +54,6 @@ void wrapper_vmalloc_sync_all(void)
> }
> #else
>
> -#include <linux/vmalloc.h>
> -
> static inline
> void wrapper_vmalloc_sync_all(void)
> {
> @@ -60,4 +61,110 @@ void wrapper_vmalloc_sync_all(void)
> }
> #endif
>
> +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0))
> +static inline
> +void *lttng_kvmalloc(unsigned long size, int flags)
> +{
> + void *ret;
> +
> + ret = kvmalloc(size, flags);
> + if (is_vmalloc_addr(ret)) {
> + /*
> + * Make sure we don't trigger recursive page faults in the
> + * tracing fast path.
> + */
> + wrapper_vmalloc_sync_all();
> + }
> + return ret;
> +}
> +
> +static inline
> +void *lttng_kvzalloc(unsigned long size, int flags)
> +{
> + void *ret;
> +
> + ret = kvzalloc(size, flags);
> + if (is_vmalloc_addr(ret)) {
> + /*
> + * Make sure we don't trigger recursive page faults in the
> + * tracing fast path.
> + */
> + wrapper_vmalloc_sync_all();
> + }
> + return ret;
> +}
> +
> +static inline
> +void lttng_kvfree(const void *addr)
> +{
> + kvfree(addr);
> +}
> +
> +#else
> +
> +#include <linux/slab.h>
> +#include <linux/mm.h>
> +
> +/**
> + * lttng_kvmalloc_node - attempt to allocate physically contiguous memory, but
> upon
> + * failure, fall back to non-contiguous (vmalloc) allocation.
> + * @size: size of the request.
> + * @flags: gfp mask for the allocation - must be compatible with GFP_KERNEL.
> + *
> + * Uses kmalloc to get the memory but if the allocation fails then falls back
> + * to the vmalloc allocator. Use lttng_kvfree to free the memory.
> + *
> + * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not
> supported
> + */
> +static inline
> +void *lttng_kvmalloc(unsigned long size, int flags)
> +{
> + void *ret;
> +
> + /*
> + * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
> + * so the given set of flags has to be compatible.
> + */
> + WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
> +
> + /*
> + * If the allocation fits in a single page, do not fallback.
> + */
> + if (size <= PAGE_SIZE) {
> + return kmalloc(size, flags);
> + }
> +
> + /*
> + * Make sure that larger requests are not too disruptive - no OOM
> + * killer and no allocation failure warnings as we have a fallback
> + */
> + ret = kmalloc(size, flags | __GFP_NOWARN | __GFP_NORETRY);
> + if (!ret) {
> + ret = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
> + /*
> + * Make sure we don't trigger recursive page faults in the
> + * tracing fast path.
> + */
> + wrapper_vmalloc_sync_all();
> + }
> + return ret;
> +}
> +
> +static inline
> +void *lttng_kvzalloc(unsigned long size, int flags)
> +{
> + return lttng_kvmalloc(size, flags | __GFP_ZERO);
> +}
> +
> +static inline
> +void lttng_kvfree(const void *addr)
> +{
> + if (is_vmalloc_addr(addr)) {
> + vfree(addr);
> + } else {
> + kfree(addr);
> + }
> +}
> +#endif
> +
> #endif /* _LTTNG_WRAPPER_VMALLOC_H */
> --
> 2.7.4
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
More information about the lttng-dev
mailing list