[ltt-dev] [PATCH] LTTng optimize write to page function

Lai Jiangshan laijs at cn.fujitsu.com
Tue Feb 3 20:12:05 EST 2009


Mathieu Desnoyers wrote:
> The functions in ltt-relay-alloc.c take care of writing the data into
> the buffer pages. Those pages are allocated from the page allocator and
> no virtual mapping is done so we can save precious TLB entries.
> ltt-relay-alloc.c is the abstraction layer which makes the buffers
> "look" like a contiguous memory area, although they are made from
> physically discontiguous pages linked with a linked list. A caching
> mechanism makes sure we never walk over more than 1-2 entries of the
> list. We use a linked list rather than a table to make sure we don't
> depend on vmalloc to allocate large pointer arrays.
> 
> I did a bit of profiling with oprofile on LTTng and found out that write
> functions in ltt-relay-alloc.c were taking a lot of CPU time. I through it would
> be good to improve them a bit.
> 
> Running a 2.6.29-rc3 kernel
> 
> Compiling a 2.6.25 kernel using make -j10 on a 8-cores x86_64 with a vanilla
> 2.6.29-rc3 kernel (all tests are cache-hot) :
> real 1m22.103s
> 
> With dormant instrumentation
> real 1m24.667s
> (note : this 2s regression should be identified eventually by doing a bissection
> of the LTTng tree.)
> 
> ltt-armall
> 
> Without modification, with flight recorder tracing active :
> real 1m31.135s
> 
> Replacing the memcpy call with a specialized call for 1, 2, 4 and 8 bytes :
> real 1m30.440s
> 
> Inlining the fast path of the write function :
> real 1m29.614s
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
> CC: Martin Bligh <mbligh at google.com>
> CC: Zhaolei <zhaolei at cn.fujitsu.com>
> ---
>  include/linux/ltt-relay.h |   88 ++++++++++++++++++++++++++++++++++++++++++++--
>  ltt/ltt-relay-alloc.c     |   66 ++++++----------------------------
>  2 files changed, 98 insertions(+), 56 deletions(-)
> 
> Index: linux-2.6-lttng/ltt/ltt-relay-alloc.c
> ===================================================================
> --- linux-2.6-lttng.orig/ltt/ltt-relay-alloc.c	2009-02-03 10:37:05.000000000 -0500
> +++ linux-2.6-lttng/ltt/ltt-relay-alloc.c	2009-02-03 10:37:13.000000000 -0500
> @@ -424,7 +424,7 @@ EXPORT_SYMBOL_GPL(ltt_relay_close);
>  /*
>   * Start iteration at the previous element. Skip the real list head.
>   */
> -static struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> +struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
>  	struct buf_page *page, size_t offset, ssize_t diff_offset)
>  {
>  	struct buf_page *iter;
> @@ -456,13 +456,15 @@ static struct buf_page *ltt_relay_find_p
>  			return iter;
>  		}
>  	}
> +	WARN_ON(1);
>  	return NULL;
>  }
> +EXPORT_SYMBOL_GPL(ltt_relay_find_prev_page);
>  
>  /*
>   * Start iteration at the next element. Skip the real list head.
>   */
> -static struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> +struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
>  	struct buf_page *page, size_t offset, ssize_t diff_offset)
>  {
>  	struct buf_page *iter;
> @@ -494,48 +496,10 @@ static struct buf_page *ltt_relay_find_n
>  			return iter;
>  		}
>  	}
> +	WARN_ON(1);
>  	return NULL;
>  }
> -
> -/*
> - * Find the page containing "offset". Cache it if it is after the currently
> - * cached page.
> - */
> -static struct buf_page *ltt_relay_cache_page(struct rchan_buf *buf,
> -		struct buf_page **page_cache,
> -		struct buf_page *page, size_t offset)
> -{
> -	ssize_t diff_offset;
> -	ssize_t half_buf_size = buf->chan->alloc_size >> 1;
> -
> -	/*
> -	 * Make sure this is the page we want to write into. The current
> -	 * page is changed concurrently by other writers. [wrh]page are
> -	 * used as a cache remembering the last page written
> -	 * to/read/looked up for header address. No synchronization;
> -	 * could have to find the previous page is a nested write
> -	 * occured. Finding the right page is done by comparing the
> -	 * dest_offset with the buf_page offsets.
> -	 * When at the exact opposite of the buffer, bias towards forward search
> -	 * because it will be cached.
> -	 */
> -
> -	diff_offset = (ssize_t)offset - (ssize_t)page->offset;
> -	if (diff_offset <= -(ssize_t)half_buf_size)
> -		diff_offset += buf->chan->alloc_size;
> -	else if (diff_offset > half_buf_size)
> -		diff_offset -= buf->chan->alloc_size;
> -
> -	if (unlikely(diff_offset >= (ssize_t)PAGE_SIZE)) {
> -		page = ltt_relay_find_next_page(buf, page, offset, diff_offset);
> -		WARN_ON(!page);
> -		*page_cache = page;
> -	} else if (unlikely(diff_offset < 0)) {
> -		page = ltt_relay_find_prev_page(buf, page, offset, diff_offset);
> -		WARN_ON(!page);
> -	}
> -	return page;
> -}
> +EXPORT_SYMBOL_GPL(ltt_relay_find_next_page);
>  
>  /**
>   * ltt_relay_write - write data to a ltt_relay buffer.
> @@ -543,22 +507,17 @@ static struct buf_page *ltt_relay_cache_
>   * @offset : offset within the buffer
>   * @src : source address
>   * @len : length to write
> + * @page : cached buffer page
>   */
> -int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> -	const void *src, size_t len)
> +void _ltt_relay_write(struct rchan_buf *buf, size_t offset,
> +	const void *src, size_t len, struct buf_page *page)
>  {
> -	struct buf_page *page;
> -	ssize_t pagecpy, orig_len;
> +	ssize_t pagecpy;
>  
> -	orig_len = len;
> -	offset &= buf->chan->alloc_size - 1;
> -	page = buf->wpage;
> -	if (unlikely(!len))
> -		return 0;
>  	for (;;) {
>  		page = ltt_relay_cache_page(buf, &buf->wpage, page, offset);
>  		pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
> -		memcpy(page_address(page->page)
> +		ltt_relay_do_copy(page_address(page->page)
>  			+ (offset & ~PAGE_MASK), src, pagecpy);

I think memcpy() is better than ltt_relay_do_copy() here.
(offset & ~PAGE_MASK) is unlikely 1,2,4,8,16 here.
>  		len -= pagecpy;
>  		if (likely(!len))
> @@ -571,9 +530,8 @@ int ltt_relay_write(struct rchan_buf *bu
>  		 */
>  		WARN_ON(offset >= buf->chan->alloc_size);
>  	}
> -	return orig_len;
>  }
> -EXPORT_SYMBOL_GPL(ltt_relay_write);
> +EXPORT_SYMBOL_GPL(_ltt_relay_write);
>  
>  /**
>   * ltt_relay_read - read data from ltt_relay_buffer.
> Index: linux-2.6-lttng/include/linux/ltt-relay.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/linux/ltt-relay.h	2009-02-03 10:37:06.000000000 -0500
> +++ linux-2.6-lttng/include/linux/ltt-relay.h	2009-02-03 10:37:13.000000000 -0500
> @@ -140,8 +140,14 @@ struct rchan_callbacks {
>  	int (*remove_buf_file)(struct dentry *dentry);
>  };
>  
> -extern int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> -	const void *src, size_t len);
> +extern struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> +	struct buf_page *page, size_t offset, ssize_t diff_offset);
> +
> +extern struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> +	struct buf_page *page, size_t offset, ssize_t diff_offset);
> +
> +extern void _ltt_relay_write(struct rchan_buf *buf, size_t offset,
> +	const void *src, size_t len, struct buf_page *page);
>  
>  extern int ltt_relay_read(struct rchan_buf *buf, size_t offset,
>  	void *dest, size_t len);
> @@ -159,6 +165,84 @@ extern void *ltt_relay_offset_address(st
>  	size_t offset);
>  
>  /*
> + * Find the page containing "offset". Cache it if it is after the currently
> + * cached page.
> + */
> +static inline struct buf_page *ltt_relay_cache_page(struct rchan_buf *buf,
> +		struct buf_page **page_cache,
> +		struct buf_page *page, size_t offset)
> +{
> +	ssize_t diff_offset;
> +	ssize_t half_buf_size = buf->chan->alloc_size >> 1;
> +
> +	/*
> +	 * Make sure this is the page we want to write into. The current
> +	 * page is changed concurrently by other writers. [wrh]page are
> +	 * used as a cache remembering the last page written
> +	 * to/read/looked up for header address. No synchronization;
> +	 * could have to find the previous page is a nested write
> +	 * occured. Finding the right page is done by comparing the
> +	 * dest_offset with the buf_page offsets.
> +	 * When at the exact opposite of the buffer, bias towards forward search
> +	 * because it will be cached.
> +	 */
> +
> +	diff_offset = (ssize_t)offset - (ssize_t)page->offset;
> +	if (diff_offset <= -(ssize_t)half_buf_size)
> +		diff_offset += buf->chan->alloc_size;
> +	else if (diff_offset > half_buf_size)
> +		diff_offset -= buf->chan->alloc_size;
> +
> +	if (unlikely(diff_offset >= (ssize_t)PAGE_SIZE)) {
> +		page = ltt_relay_find_next_page(buf, page, offset, diff_offset);
> +		*page_cache = page;
> +	} else if (unlikely(diff_offset < 0)) {
> +		page = ltt_relay_find_prev_page(buf, page, offset, diff_offset);
> +	}
> +	return page;
> +}
> +
> +static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
> +{
> +	switch (len) {
> +	case 1:	*(u8 *)dest = *(const u8 *)src;
> +		break;
> +	case 2:	*(u16 *)dest = *(const u16 *)src;
> +		break;
> +	case 4:	*(u32 *)dest = *(const u32 *)src;
> +		break;
> +#if (BITS_PER_LONG == 64)
> +	case 8:	*(u64 *)dest = *(const u64 *)src;
> +		break;
> +#endif
> +	default:
> +		memcpy(dest, src, len);
> +	}
> +}

I think this function is not correct when @src is not alignment for
2,4,8,or 16.

> +
> +static inline int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> +	const void *src, size_t len)
> +{
> +	struct buf_page *page;
> +	ssize_t pagecpy, orig_len;
> +
> +	if (unlikely(!len))
> +		return 0;
> +	orig_len = len;
> +	offset &= buf->chan->alloc_size - 1;
> +	page = buf->wpage;
> +
> +	page = ltt_relay_cache_page(buf, &buf->wpage, page, offset);
> +	pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
> +	ltt_relay_do_copy(page_address(page->page)
> +		+ (offset & ~PAGE_MASK), src, pagecpy);
> +	len -= pagecpy;
> +	if (unlikely(len))
> +		_ltt_relay_write(buf, offset, src, len, page);
> +	return orig_len;
> +}
> +
> +/*
>   * CONFIG_LTT_RELAY kernel API, ltt/ltt-relay-alloc.c
>   */
>  
> 






More information about the lttng-dev mailing list