[lttng-dev] [PATCH lttng-modules] Fix: allow racy tracepoint string input from kernel and userspace

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Mon Apr 14 15:25:17 EDT 2014


Fixes #781

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
---
 lib/ringbuffer/backend.h             |  215 ++++++++++++++++++++++++++++++++++
 lib/ringbuffer/backend_internal.h    |    6 +
 lib/ringbuffer/ring_buffer_backend.c |  166 ++++++++++++++++++++++++++
 lttng-events.h                       |    4 +
 lttng-ring-buffer-client.h           |   17 +++
 lttng-ring-buffer-metadata-client.h  |    8 ++
 probes/lttng-events.h                |   28 ++---
 7 files changed, 429 insertions(+), 15 deletions(-)

diff --git a/lib/ringbuffer/backend.h b/lib/ringbuffer/backend.h
index bbbc80d..7246ec4 100644
--- a/lib/ringbuffer/backend.h
+++ b/lib/ringbuffer/backend.h
@@ -165,6 +165,128 @@ void lib_ring_buffer_memset(const struct lib_ring_buffer_config *config,
 	ctx->buf_offset += len;
 }
 
+/*
+ * Copy up to @len string bytes from @src to @dest. Stop whenever a NULL
+ * terminating character is found in @src. Returns the number of bytes
+ * copied. Does *not* terminate @dest with NULL terminating character.
+ */
+static inline
+size_t lib_ring_buffer_do_strcpy(const struct lib_ring_buffer_config *config,
+		char *dest, const char *src, size_t len)
+{
+	size_t count;
+
+	for (count = 0; count < len; count++) {
+		char c;
+
+		/*
+		 * Only read source character once, in case it is
+		 * modified concurrently.
+		 */
+		c = ACCESS_ONCE(src[count]);
+		if (!c)
+			break;
+		lib_ring_buffer_do_copy(config, &dest[count], &c, 1);
+	}
+	return count;
+}
+
+/*
+ * Copy up to @len string bytes from @src to @dest. Stop whenever a NULL
+ * terminating character is found in @src, or when a fault occurs.
+ * Returns the number of bytes copied. Does *not* terminate @dest with
+ * NULL terminating character.
+ *
+ * This function deals with userspace pointers, it should never be called
+ * directly without having the src pointer checked with access_ok()
+ * previously.
+ */
+static inline
+size_t lib_ring_buffer_do_strcpy_from_user_inatomic(const struct lib_ring_buffer_config *config,
+		char *dest, const char __user *src, size_t len)
+{
+	size_t count;
+
+	for (count = 0; count < len; count++) {
+		int ret;
+		char c;
+
+		ret = __get_user(c, &src[count]);
+		if (ret || !c)
+			break;
+		lib_ring_buffer_do_copy(config, &dest[count], &c, 1);
+	}
+	return count;
+}
+
+/**
+ * lib_ring_buffer_strcpy - write string data to a buffer backend
+ * @config : ring buffer instance configuration
+ * @ctx: ring buffer context. (input arguments only)
+ * @src : source pointer to copy from
+ * @len : length of data to copy
+ * @pad : character to use for padding
+ *
+ * This function copies @len - 1 bytes of string data from a source
+ * pointer to a buffer backend, followed by a terminating '\0'
+ * character, at the current context offset. This is more or less a
+ * buffer backend-specific strncpy() operation. If a terminating '\0'
+ * character is found in @src before @len - 1 characters are copied, pad
+ * the buffer with @pad characters (e.g. '#'). Calls the slow path
+ * (_ring_buffer_strcpy) if copy is crossing a page boundary.
+ */
+static inline
+void lib_ring_buffer_strcpy(const struct lib_ring_buffer_config *config,
+			   struct lib_ring_buffer_ctx *ctx,
+			   const char *src, size_t len, int pad)
+{
+	struct lib_ring_buffer_backend *bufb = &ctx->buf->backend;
+	struct channel_backend *chanb = &ctx->chan->backend;
+	size_t sbidx, index;
+	size_t offset = ctx->buf_offset;
+	ssize_t pagecpy;
+	struct lib_ring_buffer_backend_pages *rpages;
+	unsigned long sb_bindex, id;
+
+	if (unlikely(!len))
+		return;
+	offset &= chanb->buf_size - 1;
+	sbidx = offset >> chanb->subbuf_size_order;
+	index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+	pagecpy = min_t(size_t, len, (-offset) & ~PAGE_MASK);
+	id = bufb->buf_wsb[sbidx].id;
+	sb_bindex = subbuffer_id_get_index(config, id);
+	rpages = bufb->array[sb_bindex];
+	CHAN_WARN_ON(ctx->chan,
+		     config->mode == RING_BUFFER_OVERWRITE
+		     && subbuffer_id_is_noref(config, id));
+	if (likely(pagecpy == len)) {
+		size_t count;
+
+		count = lib_ring_buffer_do_strcpy(config,
+					rpages->p[index].virt
+					    + (offset & ~PAGE_MASK),
+					src, len - 1);
+		offset += count;
+		/* Padding */
+		if (unlikely(count < len - 1)) {
+			size_t pad_len = len - 1 - count;
+
+			lib_ring_buffer_do_memset(rpages->p[index].virt
+						+ (offset & ~PAGE_MASK),
+					pad, pad_len);
+			offset += pad_len;
+		}
+		/* Ending '\0' */
+		lib_ring_buffer_do_memset(rpages->p[index].virt
+					+ (offset & ~PAGE_MASK),
+				'\0', 1);
+	} else {
+		_lib_ring_buffer_strcpy(bufb, offset, src, len, 0, pad);
+	}
+	ctx->buf_offset += len;
+}
+
 /**
  * lib_ring_buffer_copy_from_user_inatomic - write userspace data to a buffer backend
  * @config : ring buffer instance configuration
@@ -239,6 +361,99 @@ fill_buffer:
 	_lib_ring_buffer_memset(bufb, offset, 0, len, 0);
 }
 
+/**
+ * lib_ring_buffer_strcpy_from_user_inatomic - write userspace string data to a buffer backend
+ * @config : ring buffer instance configuration
+ * @ctx: ring buffer context (input arguments only)
+ * @src : userspace source pointer to copy from
+ * @len : length of data to copy
+ * @pad : character to use for padding
+ *
+ * This function copies @len - 1 bytes of string data from a userspace
+ * source pointer to a buffer backend, followed by a terminating '\0'
+ * character, at the current context offset. This is more or less a
+ * buffer backend-specific strncpy() operation. If a terminating '\0'
+ * character is found in @src before @len - 1 characters are copied, pad
+ * the buffer with @pad characters (e.g. '#'). Calls the slow path
+ * (_ring_buffer_strcpy_from_user_inatomic) if copy is crossing a page
+ * boundary. Disable the page fault handler to ensure we never try to
+ * take the mmap_sem.
+ */
+static inline
+void lib_ring_buffer_strcpy_from_user_inatomic(const struct lib_ring_buffer_config *config,
+		struct lib_ring_buffer_ctx *ctx,
+		const void __user *src, size_t len, int pad)
+{
+	struct lib_ring_buffer_backend *bufb = &ctx->buf->backend;
+	struct channel_backend *chanb = &ctx->chan->backend;
+	size_t sbidx, index;
+	size_t offset = ctx->buf_offset;
+	ssize_t pagecpy;
+	struct lib_ring_buffer_backend_pages *rpages;
+	unsigned long sb_bindex, id;
+	mm_segment_t old_fs = get_fs();
+
+	if (unlikely(!len))
+		return;
+	offset &= chanb->buf_size - 1;
+	sbidx = offset >> chanb->subbuf_size_order;
+	index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+	pagecpy = min_t(size_t, len, (-offset) & ~PAGE_MASK);
+	id = bufb->buf_wsb[sbidx].id;
+	sb_bindex = subbuffer_id_get_index(config, id);
+	rpages = bufb->array[sb_bindex];
+	CHAN_WARN_ON(ctx->chan,
+		     config->mode == RING_BUFFER_OVERWRITE
+		     && subbuffer_id_is_noref(config, id));
+
+	set_fs(KERNEL_DS);
+	pagefault_disable();
+	if (unlikely(!access_ok(VERIFY_READ, src, len)))
+		goto fill_buffer;
+
+	if (likely(pagecpy == len)) {
+		size_t count;
+
+		count = lib_ring_buffer_do_strcpy_from_user_inatomic(config,
+					rpages->p[index].virt
+					    + (offset & ~PAGE_MASK),
+					src, len - 1);
+		offset += count;
+		/* Padding */
+		if (unlikely(count < len - 1)) {
+			size_t pad_len = len - 1 - count;
+
+			lib_ring_buffer_do_memset(rpages->p[index].virt
+						+ (offset & ~PAGE_MASK),
+					pad, pad_len);
+			offset += pad_len;
+		}
+		/* Ending '\0' */
+		lib_ring_buffer_do_memset(rpages->p[index].virt
+					+ (offset & ~PAGE_MASK),
+				'\0', 1);
+	} else {
+		_lib_ring_buffer_strcpy_from_user_inatomic(bufb, offset, src,
+					len, 0, pad);
+	}
+	pagefault_enable();
+	set_fs(old_fs);
+	ctx->buf_offset += len;
+
+	return;
+
+fill_buffer:
+	pagefault_enable();
+	set_fs(old_fs);
+	/*
+	 * In the error path we call the slow path version to avoid
+	 * the pollution of static inline code.
+	 */
+	_lib_ring_buffer_memset(bufb, offset, pad, len - 1, 0);
+	offset += len - 1;
+	_lib_ring_buffer_memset(bufb, offset, '\0', 1, 0);
+}
+
 /*
  * This accessor counts the number of unread records in a buffer.
  * It only provides a consistent value if no reads not writes are performed
diff --git a/lib/ringbuffer/backend_internal.h b/lib/ringbuffer/backend_internal.h
index 9682c5b..e4df3b9 100644
--- a/lib/ringbuffer/backend_internal.h
+++ b/lib/ringbuffer/backend_internal.h
@@ -56,9 +56,15 @@ extern void _lib_ring_buffer_write(struct lib_ring_buffer_backend *bufb,
 extern void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
 				    size_t offset, int c, size_t len,
 				    ssize_t pagecpy);
+extern void _lib_ring_buffer_strcpy(struct lib_ring_buffer_backend *bufb,
+				   size_t offset, const char *src, size_t len,
+				   ssize_t pagecpy, int pad);
 extern void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
 					    size_t offset, const void *src,
 					    size_t len, ssize_t pagecpy);
+extern void _lib_ring_buffer_strcpy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
+		size_t offset, const char __user *src, size_t len,
+		ssize_t pagecpy, int pad);
 
 /*
  * Subbuffer ID bits for overwrite mode. Need to fit within a single word to be
diff --git a/lib/ringbuffer/ring_buffer_backend.c b/lib/ringbuffer/ring_buffer_backend.c
index 32accf3..550fad3 100644
--- a/lib/ringbuffer/ring_buffer_backend.c
+++ b/lib/ringbuffer/ring_buffer_backend.c
@@ -557,6 +557,87 @@ void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
 }
 EXPORT_SYMBOL_GPL(_lib_ring_buffer_memset);
 
+/**
+ * lib_ring_buffer_strcpy - write string data to a ring_buffer buffer.
+ * @bufb : buffer backend
+ * @offset : offset within the buffer
+ * @src : source address
+ * @len : length to write
+ * @pagecpy : page size copied so far
+ * @pad : character to use for padding
+ */
+void _lib_ring_buffer_strcpy(struct lib_ring_buffer_backend *bufb,
+			size_t offset, const char *src, size_t len,
+			ssize_t pagecpy, int pad)
+{
+	struct channel_backend *chanb = &bufb->chan->backend;
+	const struct lib_ring_buffer_config *config = &chanb->config;
+	size_t sbidx, index;
+	struct lib_ring_buffer_backend_pages *rpages;
+	unsigned long sb_bindex, id;
+	int src_terminated = 0;
+
+	CHAN_WARN_ON(chanb, !len);
+	offset += pagecpy;
+	do {
+		len -= pagecpy;
+		if (!src_terminated)
+			src += pagecpy;
+		sbidx = offset >> chanb->subbuf_size_order;
+		index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+
+		/*
+		 * Underlying layer should never ask for writes across
+		 * subbuffers.
+		 */
+		CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
+
+		pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
+		id = bufb->buf_wsb[sbidx].id;
+		sb_bindex = subbuffer_id_get_index(config, id);
+		rpages = bufb->array[sb_bindex];
+		CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
+			     && subbuffer_id_is_noref(config, id));
+
+		if (likely(!src_terminated)) {
+			size_t count, to_copy;
+
+			to_copy = pagecpy;
+			if (pagecpy == len)
+				to_copy--;	/* Final '\0' */
+			count = lib_ring_buffer_do_strcpy(config,
+					rpages->p[index].virt
+						+ (offset & ~PAGE_MASK),
+					src, to_copy);
+			offset += count;
+			/* Padding */
+			if (unlikely(count < to_copy)) {
+				size_t pad_len = to_copy - count;
+
+				/* Next pages will have padding */
+				src_terminated = 1;
+				lib_ring_buffer_do_memset(rpages->p[index].virt
+						+ (offset & ~PAGE_MASK),
+					pad, pad_len);
+				offset += pad_len;
+			}
+		} else {
+			size_t pad_len;
+
+			pad_len = pagecpy;
+			if (pagecpy == len)
+				pad_len--;	/* Final '\0' */
+			lib_ring_buffer_do_memset(rpages->p[index].virt
+					+ (offset & ~PAGE_MASK),
+				pad, pad_len);
+			offset += pad_len;
+		}
+	} while (unlikely(len != pagecpy));
+	/* Ending '\0' */
+	lib_ring_buffer_do_memset(rpages->p[index].virt + (offset & ~PAGE_MASK),
+			'\0', 1);
+}
+EXPORT_SYMBOL_GPL(_lib_ring_buffer_strcpy);
 
 /**
  * lib_ring_buffer_copy_from_user_inatomic - write user data to a ring_buffer buffer.
@@ -615,6 +696,91 @@ void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bu
 EXPORT_SYMBOL_GPL(_lib_ring_buffer_copy_from_user_inatomic);
 
 /**
+ * lib_ring_buffer_strcpy_from_user_inatomic - write userspace string data to a ring_buffer buffer.
+ * @bufb : buffer backend
+ * @offset : offset within the buffer
+ * @src : source address
+ * @len : length to write
+ * @pagecpy : page size copied so far
+ * @pad : character to use for padding
+ *
+ * This function deals with userspace pointers, it should never be called
+ * directly without having the src pointer checked with access_ok()
+ * previously.
+ */
+void _lib_ring_buffer_strcpy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
+		size_t offset, const char __user *src, size_t len,
+		ssize_t pagecpy, int pad)
+{
+	struct channel_backend *chanb = &bufb->chan->backend;
+	const struct lib_ring_buffer_config *config = &chanb->config;
+	size_t sbidx, index;
+	struct lib_ring_buffer_backend_pages *rpages;
+	unsigned long sb_bindex, id;
+	int src_terminated = 0;
+
+	offset += pagecpy;
+	do {
+		len -= pagecpy;
+		if (!src_terminated)
+			src += pagecpy;
+		sbidx = offset >> chanb->subbuf_size_order;
+		index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+
+		/*
+		 * Underlying layer should never ask for writes across
+		 * subbuffers.
+		 */
+		CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
+
+		pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
+		id = bufb->buf_wsb[sbidx].id;
+		sb_bindex = subbuffer_id_get_index(config, id);
+		rpages = bufb->array[sb_bindex];
+		CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
+				&& subbuffer_id_is_noref(config, id));
+
+		if (likely(!src_terminated)) {
+			size_t count, to_copy;
+
+			to_copy = pagecpy;
+			if (pagecpy == len)
+				to_copy--;	/* Final '\0' */
+			count = lib_ring_buffer_do_strcpy_from_user_inatomic(config,
+					rpages->p[index].virt
+						+ (offset & ~PAGE_MASK),
+					src, to_copy);
+			offset += count;
+			/* Padding */
+			if (unlikely(count < to_copy)) {
+				size_t pad_len = to_copy - count;
+
+				/* Next pages will have padding */
+				src_terminated = 1;
+				lib_ring_buffer_do_memset(rpages->p[index].virt
+						+ (offset & ~PAGE_MASK),
+					pad, pad_len);
+				offset += pad_len;
+			}
+		} else {
+			size_t pad_len;
+
+			pad_len = pagecpy;
+			if (pagecpy == len)
+				pad_len--;	/* Final '\0' */
+			lib_ring_buffer_do_memset(rpages->p[index].virt
+					+ (offset & ~PAGE_MASK),
+				pad, pad_len);
+			offset += pad_len;
+		}
+	} while (unlikely(len != pagecpy));
+	/* Ending '\0' */
+	lib_ring_buffer_do_memset(rpages->p[index].virt + (offset & ~PAGE_MASK),
+			'\0', 1);
+}
+EXPORT_SYMBOL_GPL(_lib_ring_buffer_strcpy_from_user_inatomic);
+
+/**
  * lib_ring_buffer_read - read data from ring_buffer_buffer.
  * @bufb : buffer backend
  * @offset : offset within the buffer
diff --git a/lttng-events.h b/lttng-events.h
index aeb2a6b..e1de1af 100644
--- a/lttng-events.h
+++ b/lttng-events.h
@@ -236,6 +236,10 @@ struct lttng_channel_ops {
 				      const void *src, size_t len);
 	void (*event_memset)(struct lib_ring_buffer_ctx *ctx,
 			     int c, size_t len);
+	void (*event_strcpy)(struct lib_ring_buffer_ctx *ctx, const char *src,
+			     size_t len);
+	void (*event_strcpy_from_user)(struct lib_ring_buffer_ctx *ctx,
+				       const char __user *src, size_t len);
 	/*
 	 * packet_avail_size returns the available size in the current
 	 * packet. Note that the size returned is only a hint, since it
diff --git a/lttng-ring-buffer-client.h b/lttng-ring-buffer-client.h
index 288cc32..9872ea4 100644
--- a/lttng-ring-buffer-client.h
+++ b/lttng-ring-buffer-client.h
@@ -629,6 +629,21 @@ void lttng_event_memset(struct lib_ring_buffer_ctx *ctx,
 }
 
 static
+void lttng_event_strcpy(struct lib_ring_buffer_ctx *ctx, const char *src,
+		size_t len)
+{
+	lib_ring_buffer_strcpy(&client_config, ctx, src, len, '#');
+}
+
+static
+void lttng_event_strcpy_from_user(struct lib_ring_buffer_ctx *ctx,
+		const char __user *src, size_t len)
+{
+	lib_ring_buffer_strcpy_from_user_inatomic(&client_config, ctx, src,
+			len, '#');
+}
+
+static
 wait_queue_head_t *lttng_get_writer_buf_wait_queue(struct channel *chan, int cpu)
 {
 	struct lib_ring_buffer *buf = channel_get_ring_buffer(&client_config,
@@ -669,6 +684,8 @@ static struct lttng_transport lttng_relay_transport = {
 		.event_write = lttng_event_write,
 		.event_write_from_user = lttng_event_write_from_user,
 		.event_memset = lttng_event_memset,
+		.event_strcpy = lttng_event_strcpy,
+		.event_strcpy_from_user = lttng_event_strcpy_from_user,
 		.packet_avail_size = NULL,	/* Would be racy anyway */
 		.get_writer_buf_wait_queue = lttng_get_writer_buf_wait_queue,
 		.get_hp_wait_queue = lttng_get_hp_wait_queue,
diff --git a/lttng-ring-buffer-metadata-client.h b/lttng-ring-buffer-metadata-client.h
index 5c8a1df..f077f4f 100644
--- a/lttng-ring-buffer-metadata-client.h
+++ b/lttng-ring-buffer-metadata-client.h
@@ -326,6 +326,13 @@ void lttng_event_memset(struct lib_ring_buffer_ctx *ctx,
 }
 
 static
+void lttng_event_strcpy(struct lib_ring_buffer_ctx *ctx, const char *src,
+		size_t len)
+{
+	lib_ring_buffer_strcpy(&client_config, ctx, src, len, '#');
+}
+
+static
 size_t lttng_packet_avail_size(struct channel *chan)
 			     
 {
@@ -383,6 +390,7 @@ static struct lttng_transport lttng_relay_transport = {
 		.event_write_from_user = lttng_event_write_from_user,
 		.event_memset = lttng_event_memset,
 		.event_write = lttng_event_write,
+		.event_strcpy = lttng_event_strcpy,
 		.packet_avail_size = lttng_packet_avail_size,
 		.get_writer_buf_wait_queue = lttng_get_writer_buf_wait_queue,
 		.get_hp_wait_queue = lttng_get_hp_wait_queue,
diff --git a/probes/lttng-events.h b/probes/lttng-events.h
index 680f466..ab6f342 100644
--- a/probes/lttng-events.h
+++ b/probes/lttng-events.h
@@ -691,24 +691,22 @@ __assign_##dest##_3:							\
  */
 #undef tp_copy_string_from_user
 #define tp_copy_string_from_user(dest, src)				\
-	__assign_##dest:						\
-	{								\
-		size_t __ustrlen;					\
-									\
-		if (0)							\
-			(void) __typemap.dest;				\
-		lib_ring_buffer_align_ctx(&__ctx, lttng_alignof(__typemap.dest));\
-		__ustrlen = __get_dynamic_array_len(dest);		\
-		if (likely(__ustrlen > 1)) {				\
-			__chan->ops->event_write_from_user(&__ctx, src,	\
-				__ustrlen - 1);				\
-		}							\
-		__chan->ops->event_memset(&__ctx, 0, 1);		\
-	}								\
+__assign_##dest:							\
+	if (0)								\
+		(void) __typemap.dest;					\
+	lib_ring_buffer_align_ctx(&__ctx, lttng_alignof(__typemap.dest)); \
+	__chan->ops->event_strcpy_from_user(&__ctx, src,		\
+		__get_dynamic_array_len(dest));				\
 	goto __end_field_##dest;
+
 #undef tp_strcpy
 #define tp_strcpy(dest, src)						\
-	tp_memcpy(dest, src, __get_dynamic_array_len(dest))
+__assign_##dest:							\
+	if (0)								\
+		(void) __typemap.dest;					\
+	lib_ring_buffer_align_ctx(&__ctx, lttng_alignof(__typemap.dest)); \
+	__chan->ops->event_strcpy(&__ctx, src, __get_dynamic_array_len(dest)); \
+	goto __end_field_##dest;
 
 /* Named field types must be defined in lttng-types.h */
 
-- 
1.7.10.4




More information about the lttng-dev mailing list