[lttng-dev] [PATCH lttng-modules] Fix: allow racy tracepoint string input from kernel and userspace
Mathieu Desnoyers
mathieu.desnoyers at efficios.com
Mon Apr 14 15:25:17 EDT 2014
Fixes #781
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
---
lib/ringbuffer/backend.h | 215 ++++++++++++++++++++++++++++++++++
lib/ringbuffer/backend_internal.h | 6 +
lib/ringbuffer/ring_buffer_backend.c | 166 ++++++++++++++++++++++++++
lttng-events.h | 4 +
lttng-ring-buffer-client.h | 17 +++
lttng-ring-buffer-metadata-client.h | 8 ++
probes/lttng-events.h | 28 ++---
7 files changed, 429 insertions(+), 15 deletions(-)
diff --git a/lib/ringbuffer/backend.h b/lib/ringbuffer/backend.h
index bbbc80d..7246ec4 100644
--- a/lib/ringbuffer/backend.h
+++ b/lib/ringbuffer/backend.h
@@ -165,6 +165,128 @@ void lib_ring_buffer_memset(const struct lib_ring_buffer_config *config,
ctx->buf_offset += len;
}
+/*
+ * Copy up to @len string bytes from @src to @dest. Stop whenever a NULL
+ * terminating character is found in @src. Returns the number of bytes
+ * copied. Does *not* terminate @dest with NULL terminating character.
+ */
+static inline
+size_t lib_ring_buffer_do_strcpy(const struct lib_ring_buffer_config *config,
+ char *dest, const char *src, size_t len)
+{
+ size_t count;
+
+ for (count = 0; count < len; count++) {
+ char c;
+
+ /*
+ * Only read source character once, in case it is
+ * modified concurrently.
+ */
+ c = ACCESS_ONCE(src[count]);
+ if (!c)
+ break;
+ lib_ring_buffer_do_copy(config, &dest[count], &c, 1);
+ }
+ return count;
+}
+
+/*
+ * Copy up to @len string bytes from @src to @dest. Stop whenever a NULL
+ * terminating character is found in @src, or when a fault occurs.
+ * Returns the number of bytes copied. Does *not* terminate @dest with
+ * NULL terminating character.
+ *
+ * This function deals with userspace pointers, it should never be called
+ * directly without having the src pointer checked with access_ok()
+ * previously.
+ */
+static inline
+size_t lib_ring_buffer_do_strcpy_from_user_inatomic(const struct lib_ring_buffer_config *config,
+ char *dest, const char __user *src, size_t len)
+{
+ size_t count;
+
+ for (count = 0; count < len; count++) {
+ int ret;
+ char c;
+
+ ret = __get_user(c, &src[count]);
+ if (ret || !c)
+ break;
+ lib_ring_buffer_do_copy(config, &dest[count], &c, 1);
+ }
+ return count;
+}
+
+/**
+ * lib_ring_buffer_strcpy - write string data to a buffer backend
+ * @config : ring buffer instance configuration
+ * @ctx: ring buffer context. (input arguments only)
+ * @src : source pointer to copy from
+ * @len : length of data to copy
+ * @pad : character to use for padding
+ *
+ * This function copies @len - 1 bytes of string data from a source
+ * pointer to a buffer backend, followed by a terminating '\0'
+ * character, at the current context offset. This is more or less a
+ * buffer backend-specific strncpy() operation. If a terminating '\0'
+ * character is found in @src before @len - 1 characters are copied, pad
+ * the buffer with @pad characters (e.g. '#'). Calls the slow path
+ * (_ring_buffer_strcpy) if copy is crossing a page boundary.
+ */
+static inline
+void lib_ring_buffer_strcpy(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer_ctx *ctx,
+ const char *src, size_t len, int pad)
+{
+ struct lib_ring_buffer_backend *bufb = &ctx->buf->backend;
+ struct channel_backend *chanb = &ctx->chan->backend;
+ size_t sbidx, index;
+ size_t offset = ctx->buf_offset;
+ ssize_t pagecpy;
+ struct lib_ring_buffer_backend_pages *rpages;
+ unsigned long sb_bindex, id;
+
+ if (unlikely(!len))
+ return;
+ offset &= chanb->buf_size - 1;
+ sbidx = offset >> chanb->subbuf_size_order;
+ index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+ pagecpy = min_t(size_t, len, (-offset) & ~PAGE_MASK);
+ id = bufb->buf_wsb[sbidx].id;
+ sb_bindex = subbuffer_id_get_index(config, id);
+ rpages = bufb->array[sb_bindex];
+ CHAN_WARN_ON(ctx->chan,
+ config->mode == RING_BUFFER_OVERWRITE
+ && subbuffer_id_is_noref(config, id));
+ if (likely(pagecpy == len)) {
+ size_t count;
+
+ count = lib_ring_buffer_do_strcpy(config,
+ rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ src, len - 1);
+ offset += count;
+ /* Padding */
+ if (unlikely(count < len - 1)) {
+ size_t pad_len = len - 1 - count;
+
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ pad, pad_len);
+ offset += pad_len;
+ }
+ /* Ending '\0' */
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ '\0', 1);
+ } else {
+ _lib_ring_buffer_strcpy(bufb, offset, src, len, 0, pad);
+ }
+ ctx->buf_offset += len;
+}
+
/**
* lib_ring_buffer_copy_from_user_inatomic - write userspace data to a buffer backend
* @config : ring buffer instance configuration
@@ -239,6 +361,99 @@ fill_buffer:
_lib_ring_buffer_memset(bufb, offset, 0, len, 0);
}
+/**
+ * lib_ring_buffer_strcpy_from_user_inatomic - write userspace string data to a buffer backend
+ * @config : ring buffer instance configuration
+ * @ctx: ring buffer context (input arguments only)
+ * @src : userspace source pointer to copy from
+ * @len : length of data to copy
+ * @pad : character to use for padding
+ *
+ * This function copies @len - 1 bytes of string data from a userspace
+ * source pointer to a buffer backend, followed by a terminating '\0'
+ * character, at the current context offset. This is more or less a
+ * buffer backend-specific strncpy() operation. If a terminating '\0'
+ * character is found in @src before @len - 1 characters are copied, pad
+ * the buffer with @pad characters (e.g. '#'). Calls the slow path
+ * (_ring_buffer_strcpy_from_user_inatomic) if copy is crossing a page
+ * boundary. Disable the page fault handler to ensure we never try to
+ * take the mmap_sem.
+ */
+static inline
+void lib_ring_buffer_strcpy_from_user_inatomic(const struct lib_ring_buffer_config *config,
+ struct lib_ring_buffer_ctx *ctx,
+ const void __user *src, size_t len, int pad)
+{
+ struct lib_ring_buffer_backend *bufb = &ctx->buf->backend;
+ struct channel_backend *chanb = &ctx->chan->backend;
+ size_t sbidx, index;
+ size_t offset = ctx->buf_offset;
+ ssize_t pagecpy;
+ struct lib_ring_buffer_backend_pages *rpages;
+ unsigned long sb_bindex, id;
+ mm_segment_t old_fs = get_fs();
+
+ if (unlikely(!len))
+ return;
+ offset &= chanb->buf_size - 1;
+ sbidx = offset >> chanb->subbuf_size_order;
+ index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+ pagecpy = min_t(size_t, len, (-offset) & ~PAGE_MASK);
+ id = bufb->buf_wsb[sbidx].id;
+ sb_bindex = subbuffer_id_get_index(config, id);
+ rpages = bufb->array[sb_bindex];
+ CHAN_WARN_ON(ctx->chan,
+ config->mode == RING_BUFFER_OVERWRITE
+ && subbuffer_id_is_noref(config, id));
+
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+ if (unlikely(!access_ok(VERIFY_READ, src, len)))
+ goto fill_buffer;
+
+ if (likely(pagecpy == len)) {
+ size_t count;
+
+ count = lib_ring_buffer_do_strcpy_from_user_inatomic(config,
+ rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ src, len - 1);
+ offset += count;
+ /* Padding */
+ if (unlikely(count < len - 1)) {
+ size_t pad_len = len - 1 - count;
+
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ pad, pad_len);
+ offset += pad_len;
+ }
+ /* Ending '\0' */
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ '\0', 1);
+ } else {
+ _lib_ring_buffer_strcpy_from_user_inatomic(bufb, offset, src,
+ len, 0, pad);
+ }
+ pagefault_enable();
+ set_fs(old_fs);
+ ctx->buf_offset += len;
+
+ return;
+
+fill_buffer:
+ pagefault_enable();
+ set_fs(old_fs);
+ /*
+ * In the error path we call the slow path version to avoid
+ * the pollution of static inline code.
+ */
+ _lib_ring_buffer_memset(bufb, offset, pad, len - 1, 0);
+ offset += len - 1;
+ _lib_ring_buffer_memset(bufb, offset, '\0', 1, 0);
+}
+
/*
* This accessor counts the number of unread records in a buffer.
* It only provides a consistent value if no reads not writes are performed
diff --git a/lib/ringbuffer/backend_internal.h b/lib/ringbuffer/backend_internal.h
index 9682c5b..e4df3b9 100644
--- a/lib/ringbuffer/backend_internal.h
+++ b/lib/ringbuffer/backend_internal.h
@@ -56,9 +56,15 @@ extern void _lib_ring_buffer_write(struct lib_ring_buffer_backend *bufb,
extern void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
size_t offset, int c, size_t len,
ssize_t pagecpy);
+extern void _lib_ring_buffer_strcpy(struct lib_ring_buffer_backend *bufb,
+ size_t offset, const char *src, size_t len,
+ ssize_t pagecpy, int pad);
extern void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
size_t offset, const void *src,
size_t len, ssize_t pagecpy);
+extern void _lib_ring_buffer_strcpy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
+ size_t offset, const char __user *src, size_t len,
+ ssize_t pagecpy, int pad);
/*
* Subbuffer ID bits for overwrite mode. Need to fit within a single word to be
diff --git a/lib/ringbuffer/ring_buffer_backend.c b/lib/ringbuffer/ring_buffer_backend.c
index 32accf3..550fad3 100644
--- a/lib/ringbuffer/ring_buffer_backend.c
+++ b/lib/ringbuffer/ring_buffer_backend.c
@@ -557,6 +557,87 @@ void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
}
EXPORT_SYMBOL_GPL(_lib_ring_buffer_memset);
+/**
+ * lib_ring_buffer_strcpy - write string data to a ring_buffer buffer.
+ * @bufb : buffer backend
+ * @offset : offset within the buffer
+ * @src : source address
+ * @len : length to write
+ * @pagecpy : page size copied so far
+ * @pad : character to use for padding
+ */
+void _lib_ring_buffer_strcpy(struct lib_ring_buffer_backend *bufb,
+ size_t offset, const char *src, size_t len,
+ ssize_t pagecpy, int pad)
+{
+ struct channel_backend *chanb = &bufb->chan->backend;
+ const struct lib_ring_buffer_config *config = &chanb->config;
+ size_t sbidx, index;
+ struct lib_ring_buffer_backend_pages *rpages;
+ unsigned long sb_bindex, id;
+ int src_terminated = 0;
+
+ CHAN_WARN_ON(chanb, !len);
+ offset += pagecpy;
+ do {
+ len -= pagecpy;
+ if (!src_terminated)
+ src += pagecpy;
+ sbidx = offset >> chanb->subbuf_size_order;
+ index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+
+ /*
+ * Underlying layer should never ask for writes across
+ * subbuffers.
+ */
+ CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
+
+ pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
+ id = bufb->buf_wsb[sbidx].id;
+ sb_bindex = subbuffer_id_get_index(config, id);
+ rpages = bufb->array[sb_bindex];
+ CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
+ && subbuffer_id_is_noref(config, id));
+
+ if (likely(!src_terminated)) {
+ size_t count, to_copy;
+
+ to_copy = pagecpy;
+ if (pagecpy == len)
+ to_copy--; /* Final '\0' */
+ count = lib_ring_buffer_do_strcpy(config,
+ rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ src, to_copy);
+ offset += count;
+ /* Padding */
+ if (unlikely(count < to_copy)) {
+ size_t pad_len = to_copy - count;
+
+ /* Next pages will have padding */
+ src_terminated = 1;
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ pad, pad_len);
+ offset += pad_len;
+ }
+ } else {
+ size_t pad_len;
+
+ pad_len = pagecpy;
+ if (pagecpy == len)
+ pad_len--; /* Final '\0' */
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ pad, pad_len);
+ offset += pad_len;
+ }
+ } while (unlikely(len != pagecpy));
+ /* Ending '\0' */
+ lib_ring_buffer_do_memset(rpages->p[index].virt + (offset & ~PAGE_MASK),
+ '\0', 1);
+}
+EXPORT_SYMBOL_GPL(_lib_ring_buffer_strcpy);
/**
* lib_ring_buffer_copy_from_user_inatomic - write user data to a ring_buffer buffer.
@@ -615,6 +696,91 @@ void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bu
EXPORT_SYMBOL_GPL(_lib_ring_buffer_copy_from_user_inatomic);
/**
+ * lib_ring_buffer_strcpy_from_user_inatomic - write userspace string data to a ring_buffer buffer.
+ * @bufb : buffer backend
+ * @offset : offset within the buffer
+ * @src : source address
+ * @len : length to write
+ * @pagecpy : page size copied so far
+ * @pad : character to use for padding
+ *
+ * This function deals with userspace pointers, it should never be called
+ * directly without having the src pointer checked with access_ok()
+ * previously.
+ */
+void _lib_ring_buffer_strcpy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
+ size_t offset, const char __user *src, size_t len,
+ ssize_t pagecpy, int pad)
+{
+ struct channel_backend *chanb = &bufb->chan->backend;
+ const struct lib_ring_buffer_config *config = &chanb->config;
+ size_t sbidx, index;
+ struct lib_ring_buffer_backend_pages *rpages;
+ unsigned long sb_bindex, id;
+ int src_terminated = 0;
+
+ offset += pagecpy;
+ do {
+ len -= pagecpy;
+ if (!src_terminated)
+ src += pagecpy;
+ sbidx = offset >> chanb->subbuf_size_order;
+ index = (offset & (chanb->subbuf_size - 1)) >> PAGE_SHIFT;
+
+ /*
+ * Underlying layer should never ask for writes across
+ * subbuffers.
+ */
+ CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
+
+ pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
+ id = bufb->buf_wsb[sbidx].id;
+ sb_bindex = subbuffer_id_get_index(config, id);
+ rpages = bufb->array[sb_bindex];
+ CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
+ && subbuffer_id_is_noref(config, id));
+
+ if (likely(!src_terminated)) {
+ size_t count, to_copy;
+
+ to_copy = pagecpy;
+ if (pagecpy == len)
+ to_copy--; /* Final '\0' */
+ count = lib_ring_buffer_do_strcpy_from_user_inatomic(config,
+ rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ src, to_copy);
+ offset += count;
+ /* Padding */
+ if (unlikely(count < to_copy)) {
+ size_t pad_len = to_copy - count;
+
+ /* Next pages will have padding */
+ src_terminated = 1;
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ pad, pad_len);
+ offset += pad_len;
+ }
+ } else {
+ size_t pad_len;
+
+ pad_len = pagecpy;
+ if (pagecpy == len)
+ pad_len--; /* Final '\0' */
+ lib_ring_buffer_do_memset(rpages->p[index].virt
+ + (offset & ~PAGE_MASK),
+ pad, pad_len);
+ offset += pad_len;
+ }
+ } while (unlikely(len != pagecpy));
+ /* Ending '\0' */
+ lib_ring_buffer_do_memset(rpages->p[index].virt + (offset & ~PAGE_MASK),
+ '\0', 1);
+}
+EXPORT_SYMBOL_GPL(_lib_ring_buffer_strcpy_from_user_inatomic);
+
+/**
* lib_ring_buffer_read - read data from ring_buffer_buffer.
* @bufb : buffer backend
* @offset : offset within the buffer
diff --git a/lttng-events.h b/lttng-events.h
index aeb2a6b..e1de1af 100644
--- a/lttng-events.h
+++ b/lttng-events.h
@@ -236,6 +236,10 @@ struct lttng_channel_ops {
const void *src, size_t len);
void (*event_memset)(struct lib_ring_buffer_ctx *ctx,
int c, size_t len);
+ void (*event_strcpy)(struct lib_ring_buffer_ctx *ctx, const char *src,
+ size_t len);
+ void (*event_strcpy_from_user)(struct lib_ring_buffer_ctx *ctx,
+ const char __user *src, size_t len);
/*
* packet_avail_size returns the available size in the current
* packet. Note that the size returned is only a hint, since it
diff --git a/lttng-ring-buffer-client.h b/lttng-ring-buffer-client.h
index 288cc32..9872ea4 100644
--- a/lttng-ring-buffer-client.h
+++ b/lttng-ring-buffer-client.h
@@ -629,6 +629,21 @@ void lttng_event_memset(struct lib_ring_buffer_ctx *ctx,
}
static
+void lttng_event_strcpy(struct lib_ring_buffer_ctx *ctx, const char *src,
+ size_t len)
+{
+ lib_ring_buffer_strcpy(&client_config, ctx, src, len, '#');
+}
+
+static
+void lttng_event_strcpy_from_user(struct lib_ring_buffer_ctx *ctx,
+ const char __user *src, size_t len)
+{
+ lib_ring_buffer_strcpy_from_user_inatomic(&client_config, ctx, src,
+ len, '#');
+}
+
+static
wait_queue_head_t *lttng_get_writer_buf_wait_queue(struct channel *chan, int cpu)
{
struct lib_ring_buffer *buf = channel_get_ring_buffer(&client_config,
@@ -669,6 +684,8 @@ static struct lttng_transport lttng_relay_transport = {
.event_write = lttng_event_write,
.event_write_from_user = lttng_event_write_from_user,
.event_memset = lttng_event_memset,
+ .event_strcpy = lttng_event_strcpy,
+ .event_strcpy_from_user = lttng_event_strcpy_from_user,
.packet_avail_size = NULL, /* Would be racy anyway */
.get_writer_buf_wait_queue = lttng_get_writer_buf_wait_queue,
.get_hp_wait_queue = lttng_get_hp_wait_queue,
diff --git a/lttng-ring-buffer-metadata-client.h b/lttng-ring-buffer-metadata-client.h
index 5c8a1df..f077f4f 100644
--- a/lttng-ring-buffer-metadata-client.h
+++ b/lttng-ring-buffer-metadata-client.h
@@ -326,6 +326,13 @@ void lttng_event_memset(struct lib_ring_buffer_ctx *ctx,
}
static
+void lttng_event_strcpy(struct lib_ring_buffer_ctx *ctx, const char *src,
+ size_t len)
+{
+ lib_ring_buffer_strcpy(&client_config, ctx, src, len, '#');
+}
+
+static
size_t lttng_packet_avail_size(struct channel *chan)
{
@@ -383,6 +390,7 @@ static struct lttng_transport lttng_relay_transport = {
.event_write_from_user = lttng_event_write_from_user,
.event_memset = lttng_event_memset,
.event_write = lttng_event_write,
+ .event_strcpy = lttng_event_strcpy,
.packet_avail_size = lttng_packet_avail_size,
.get_writer_buf_wait_queue = lttng_get_writer_buf_wait_queue,
.get_hp_wait_queue = lttng_get_hp_wait_queue,
diff --git a/probes/lttng-events.h b/probes/lttng-events.h
index 680f466..ab6f342 100644
--- a/probes/lttng-events.h
+++ b/probes/lttng-events.h
@@ -691,24 +691,22 @@ __assign_##dest##_3: \
*/
#undef tp_copy_string_from_user
#define tp_copy_string_from_user(dest, src) \
- __assign_##dest: \
- { \
- size_t __ustrlen; \
- \
- if (0) \
- (void) __typemap.dest; \
- lib_ring_buffer_align_ctx(&__ctx, lttng_alignof(__typemap.dest));\
- __ustrlen = __get_dynamic_array_len(dest); \
- if (likely(__ustrlen > 1)) { \
- __chan->ops->event_write_from_user(&__ctx, src, \
- __ustrlen - 1); \
- } \
- __chan->ops->event_memset(&__ctx, 0, 1); \
- } \
+__assign_##dest: \
+ if (0) \
+ (void) __typemap.dest; \
+ lib_ring_buffer_align_ctx(&__ctx, lttng_alignof(__typemap.dest)); \
+ __chan->ops->event_strcpy_from_user(&__ctx, src, \
+ __get_dynamic_array_len(dest)); \
goto __end_field_##dest;
+
#undef tp_strcpy
#define tp_strcpy(dest, src) \
- tp_memcpy(dest, src, __get_dynamic_array_len(dest))
+__assign_##dest: \
+ if (0) \
+ (void) __typemap.dest; \
+ lib_ring_buffer_align_ctx(&__ctx, lttng_alignof(__typemap.dest)); \
+ __chan->ops->event_strcpy(&__ctx, src, __get_dynamic_array_len(dest)); \
+ goto __end_field_##dest;
/* Named field types must be defined in lttng-types.h */
--
1.7.10.4
More information about the lttng-dev
mailing list