[lttng-dev] [PATCH lttng-modules] RFC: Add kmalloc failover to vmalloc

Thu Sep 21 14:55:31 UTC 2017

This patch is based on the kvmalloc helpers introduced in kernel 4.12.

It will gracefully failover memory allocations of more than one page to
vmalloc for systems under high memory pressure or fragmentation.

I only used it in lttng-events.c as a POC, most allocations fit into a
single page so I'm not sure how useful this actually is.

Thoughts?

Signed-off-by: Michael Jeanson <mjeanson at efficios.com>

See upstream commit:
  commit a7c3e901a46ff54c016d040847eda598a9e3e653
  Author: Michal Hocko <mhocko at suse.com>
  Date:   Mon May 8 15:57:09 2017 -0700

    mm: introduce kv[mz]alloc helpers

    Patch series "kvmalloc", v5.

    There are many open coded kmalloc with vmalloc fallback instances in the
    tree.  Most of them are not careful enough or simply do not care about
    the underlying semantic of the kmalloc/page allocator which means that
    a) some vmalloc fallbacks are basically unreachable because the kmalloc
    part will keep retrying until it succeeds b) the page allocator can
    invoke a really disruptive steps like the OOM killer to move forward
    which doesn't sound appropriate when we consider that the vmalloc
    fallback is available.

    As it can be seen implementing kvmalloc requires quite an intimate
    knowledge if the page allocator and the memory reclaim internals which
    strongly suggests that a helper should be implemented in the memory
    subsystem proper.

    Most callers, I could find, have been converted to use the helper
    instead.  This is patch 6.  There are some more relying on __GFP_REPEAT
    in the networking stack which I have converted as well and Eric Dumazet
    was not opposed [2] to convert them as well.

    [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org
    [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com

    This patch (of 9):

    Using kmalloc with the vmalloc fallback for larger allocations is a
    common pattern in the kernel code.  Yet we do not have any common helper
    for that and so users have invented their own helpers.  Some of them are
    really creative when doing so.  Let's just add kv[mz]alloc and make sure
    it is implemented properly.  This implementation makes sure to not make
    a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also
    to not warn about allocation failures.  This also rules out the OOM
    killer as the vmalloc is a more approapriate fallback than a disruptive
    user visible action.

Signed-off-by: Michael Jeanson <mjeanson at efficios.com>
---
 lttng-events.c    | 30 ++++++++++-----------
 wrapper/vmalloc.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 17 deletions(-)

diff --git a/lttng-events.c b/lttng-events.c
index 6aa994c..1e4cc10 100644
--- a/lttng-events.c
+++ b/lttng-events.c
@@ -132,14 +132,14 @@ struct lttng_session *lttng_session_create(void)
 	int i;
 
 	mutex_lock(&sessions_mutex);
-	session = kzalloc(sizeof(struct lttng_session), GFP_KERNEL);
+	session = lttng_kvzalloc(sizeof(struct lttng_session), GFP_KERNEL);
 	if (!session)
 		goto err;
 	INIT_LIST_HEAD(&session->chan);
 	INIT_LIST_HEAD(&session->events);
 	uuid_le_gen(&session->uuid);
 
-	metadata_cache = kzalloc(sizeof(struct lttng_metadata_cache),
+	metadata_cache = lttng_kvzalloc(sizeof(struct lttng_metadata_cache),
 			GFP_KERNEL);
 	if (!metadata_cache)
 		goto err_free_session;
@@ -161,9 +161,9 @@ struct lttng_session *lttng_session_create(void)
 	return session;
 
 err_free_cache:
-	kfree(metadata_cache);
+	lttng_kvfree(metadata_cache);
 err_free_session:
-	kfree(session);
+	lttng_kvfree(session);
 err:
 	mutex_unlock(&sessions_mutex);
 	return NULL;
@@ -174,7 +174,7 @@ void metadata_cache_destroy(struct kref *kref)
 	struct lttng_metadata_cache *cache =
 		container_of(kref, struct lttng_metadata_cache, refcount);
 	vfree(cache->data);
-	kfree(cache);
+	lttng_kvfree(cache);
 }
 
 void lttng_session_destroy(struct lttng_session *session)
@@ -212,7 +212,7 @@ void lttng_session_destroy(struct lttng_session *session)
 	kref_put(&session->metadata_cache->refcount, metadata_cache_destroy);
 	list_del(&session->list);
 	mutex_unlock(&sessions_mutex);
-	kfree(session);
+	lttng_kvfree(session);
 }
 
 int lttng_session_statedump(struct lttng_session *session)
@@ -495,7 +495,7 @@ struct lttng_channel *lttng_channel_create(struct lttng_session *session,
 		printk(KERN_WARNING "LTT : Can't lock transport module.\n");
 		goto notransport;
 	}
-	chan = kzalloc(sizeof(struct lttng_channel), GFP_KERNEL);
+	chan = lttng_kvzalloc(sizeof(struct lttng_channel), GFP_KERNEL);
 	if (!chan)
 		goto nomem;
 	chan->session = session;
@@ -520,7 +520,7 @@ struct lttng_channel *lttng_channel_create(struct lttng_session *session,
 	return chan;
 
 create_error:
-	kfree(chan);
+	lttng_kvfree(chan);
 nomem:
 	if (transport)
 		module_put(transport->owner);
@@ -542,7 +542,7 @@ void _lttng_channel_destroy(struct lttng_channel *chan)
 	module_put(chan->transport->owner);
 	list_del(&chan->list);
 	lttng_destroy_context(chan->ctx);
-	kfree(chan);
+	lttng_kvfree(chan);
 }
 
 void lttng_metadata_channel_destroy(struct lttng_channel *chan)
@@ -1343,7 +1343,7 @@ int lttng_enabler_ref_events(struct lttng_enabler *enabler)
 			 * If no backward ref, create it.
 			 * Add backward ref from event to enabler.
 			 */
-			enabler_ref = kzalloc(sizeof(*enabler_ref), GFP_KERNEL);
+			enabler_ref = lttng_kvzalloc(sizeof(*enabler_ref), GFP_KERNEL);
 			if (!enabler_ref)
 				return -ENOMEM;
 			enabler_ref->ref = enabler;
@@ -1381,7 +1381,7 @@ struct lttng_enabler *lttng_enabler_create(enum lttng_enabler_type type,
 {
 	struct lttng_enabler *enabler;
 
-	enabler = kzalloc(sizeof(*enabler), GFP_KERNEL);
+	enabler = lttng_kvzalloc(sizeof(*enabler), GFP_KERNEL);
 	if (!enabler)
 		return NULL;
 	enabler->type = type;
@@ -1427,7 +1427,7 @@ int lttng_enabler_attach_bytecode(struct lttng_enabler *enabler,
 	ret = get_user(bytecode_len, &bytecode->len);
 	if (ret)
 		return ret;
-	bytecode_node = kzalloc(sizeof(*bytecode_node) + bytecode_len,
+	bytecode_node = lttng_kvzalloc(sizeof(*bytecode_node) + bytecode_len,
 			GFP_KERNEL);
 	if (!bytecode_node)
 		return -ENOMEM;
@@ -1443,7 +1443,7 @@ int lttng_enabler_attach_bytecode(struct lttng_enabler *enabler,
 	return 0;
 
 error_free:
-	kfree(bytecode_node);
+	lttng_kvfree(bytecode_node);
 	return ret;
 }
 
@@ -1461,14 +1461,14 @@ void lttng_enabler_destroy(struct lttng_enabler *enabler)
 	/* Destroy filter bytecode */
 	list_for_each_entry_safe(filter_node, tmp_filter_node,
 			&enabler->filter_bytecode_head, node) {
-		kfree(filter_node);
+		lttng_kvfree(filter_node);
 	}
 
 	/* Destroy contexts */
 	lttng_destroy_context(enabler->ctx);
 
 	list_del(&enabler->node);
-	kfree(enabler);
+	lttng_kvfree(enabler);
 }
 
 /*
diff --git a/wrapper/vmalloc.h b/wrapper/vmalloc.h
index 2332439..ad2d6cf 100644
--- a/wrapper/vmalloc.h
+++ b/wrapper/vmalloc.h
@@ -25,6 +25,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+
 #ifdef CONFIG_KALLSYMS
 
 #include <linux/kallsyms.h>
@@ -51,8 +54,6 @@ void wrapper_vmalloc_sync_all(void)
 }
 #else
 
-#include <linux/vmalloc.h>
-
 static inline
 void wrapper_vmalloc_sync_all(void)
 {
@@ -60,4 +61,78 @@ void wrapper_vmalloc_sync_all(void)
 }
 #endif
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0))
+static inline
+void *lttng_kvmalloc(unsigned long size, int flags)
+{
+	return kvmalloc(size, flags);
+}
+
+static inline
+void *lttng_kvzalloc(unsigned long size, int flags)
+{
+	return kvzalloc(size, flags);
+}
+#else
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+/**
+ * lttng_kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @flags: gfp mask for the allocation - must be compatible with GFP_KERNEL.
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use lttng_kvfree to free the memory.
+ *
+ * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported
+ */
+static inline
+void *lttng_kvmalloc(unsigned long size, int flags)
+{
+	void *ret;
+
+	/*
+	 * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
+	 * so the given set of flags has to be compatible.
+	 */
+	WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
+
+	/*
+	 * If the allocation fits in a single page, do not fallback.
+	 */
+	if (size <= PAGE_SIZE) {
+		return kmalloc(size, flags);
+	}
+
+	/*
+	 * Make sure that larger requests are not too disruptive - no OOM
+	 * killer and no allocation failure warnings as we have a fallback
+	 */
+	ret = kmalloc(size, flags | __GFP_NOWARN | __GFP_NORETRY);
+	if (!ret) {
+		ret = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+	}
+	return ret;
+}
+
+static inline
+void *lttng_kvzalloc(unsigned long size, int flags)
+{
+	return lttng_kvmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline
+void lttng_kvfree(const void *addr)
+{
+	if (is_vmalloc_addr(addr)) {
+		vfree(addr);
+	} else {
+		kfree(addr);
+	}
+}
+#endif
+
 #endif /* _LTTNG_WRAPPER_VMALLOC_H */
-- 
2.7.4