[ltt-dev] [PATCH] LTTng optimize write to page function

Mathieu Desnoyers compudj at krystal.dyndns.org
Tue Feb 3 21:35:18 EST 2009


* Lai Jiangshan (laijs at cn.fujitsu.com) wrote:
> Mathieu Desnoyers wrote:
> > The functions in ltt-relay-alloc.c take care of writing the data into
> > the buffer pages. Those pages are allocated from the page allocator and
> > no virtual mapping is done so we can save precious TLB entries.
> > ltt-relay-alloc.c is the abstraction layer which makes the buffers
> > "look" like a contiguous memory area, although they are made from
> > physically discontiguous pages linked with a linked list. A caching
> > mechanism makes sure we never walk over more than 1-2 entries of the
> > list. We use a linked list rather than a table to make sure we don't
> > depend on vmalloc to allocate large pointer arrays.
> > 
> > I did a bit of profiling with oprofile on LTTng and found out that write
> > functions in ltt-relay-alloc.c were taking a lot of CPU time. I through it would
> > be good to improve them a bit.
> > 
> > Running a 2.6.29-rc3 kernel
> > 
> > Compiling a 2.6.25 kernel using make -j10 on a 8-cores x86_64 with a vanilla
> > 2.6.29-rc3 kernel (all tests are cache-hot) :
> > real 1m22.103s
> > 
> > With dormant instrumentation
> > real 1m24.667s
> > (note : this 2s regression should be identified eventually by doing a bissection
> > of the LTTng tree.)
> > 
> > ltt-armall
> > 
> > Without modification, with flight recorder tracing active :
> > real 1m31.135s
> > 
> > Replacing the memcpy call with a specialized call for 1, 2, 4 and 8 bytes :
> > real 1m30.440s
> > 
> > Inlining the fast path of the write function :
> > real 1m29.614s
> > 
> > Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
> > CC: Martin Bligh <mbligh at google.com>
> > CC: Zhaolei <zhaolei at cn.fujitsu.com>
> > ---
> >  include/linux/ltt-relay.h |   88 ++++++++++++++++++++++++++++++++++++++++++++--
> >  ltt/ltt-relay-alloc.c     |   66 ++++++----------------------------
> >  2 files changed, 98 insertions(+), 56 deletions(-)
> > 
> > Index: linux-2.6-lttng/ltt/ltt-relay-alloc.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/ltt/ltt-relay-alloc.c	2009-02-03 10:37:05.000000000 -0500
> > +++ linux-2.6-lttng/ltt/ltt-relay-alloc.c	2009-02-03 10:37:13.000000000 -0500
> > @@ -424,7 +424,7 @@ EXPORT_SYMBOL_GPL(ltt_relay_close);
> >  /*
> >   * Start iteration at the previous element. Skip the real list head.
> >   */
> > -static struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> > +struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> >  	struct buf_page *page, size_t offset, ssize_t diff_offset)
> >  {
> >  	struct buf_page *iter;
> > @@ -456,13 +456,15 @@ static struct buf_page *ltt_relay_find_p
> >  			return iter;
> >  		}
> >  	}
> > +	WARN_ON(1);
> >  	return NULL;
> >  }
> > +EXPORT_SYMBOL_GPL(ltt_relay_find_prev_page);
> >  
> >  /*
> >   * Start iteration at the next element. Skip the real list head.
> >   */
> > -static struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> > +struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> >  	struct buf_page *page, size_t offset, ssize_t diff_offset)
> >  {
> >  	struct buf_page *iter;
> > @@ -494,48 +496,10 @@ static struct buf_page *ltt_relay_find_n
> >  			return iter;
> >  		}
> >  	}
> > +	WARN_ON(1);
> >  	return NULL;
> >  }
> > -
> > -/*
> > - * Find the page containing "offset". Cache it if it is after the currently
> > - * cached page.
> > - */
> > -static struct buf_page *ltt_relay_cache_page(struct rchan_buf *buf,
> > -		struct buf_page **page_cache,
> > -		struct buf_page *page, size_t offset)
> > -{
> > -	ssize_t diff_offset;
> > -	ssize_t half_buf_size = buf->chan->alloc_size >> 1;
> > -
> > -	/*
> > -	 * Make sure this is the page we want to write into. The current
> > -	 * page is changed concurrently by other writers. [wrh]page are
> > -	 * used as a cache remembering the last page written
> > -	 * to/read/looked up for header address. No synchronization;
> > -	 * could have to find the previous page is a nested write
> > -	 * occured. Finding the right page is done by comparing the
> > -	 * dest_offset with the buf_page offsets.
> > -	 * When at the exact opposite of the buffer, bias towards forward search
> > -	 * because it will be cached.
> > -	 */
> > -
> > -	diff_offset = (ssize_t)offset - (ssize_t)page->offset;
> > -	if (diff_offset <= -(ssize_t)half_buf_size)
> > -		diff_offset += buf->chan->alloc_size;
> > -	else if (diff_offset > half_buf_size)
> > -		diff_offset -= buf->chan->alloc_size;
> > -
> > -	if (unlikely(diff_offset >= (ssize_t)PAGE_SIZE)) {
> > -		page = ltt_relay_find_next_page(buf, page, offset, diff_offset);
> > -		WARN_ON(!page);
> > -		*page_cache = page;
> > -	} else if (unlikely(diff_offset < 0)) {
> > -		page = ltt_relay_find_prev_page(buf, page, offset, diff_offset);
> > -		WARN_ON(!page);
> > -	}
> > -	return page;
> > -}
> > +EXPORT_SYMBOL_GPL(ltt_relay_find_next_page);
> >  
> >  /**
> >   * ltt_relay_write - write data to a ltt_relay buffer.
> > @@ -543,22 +507,17 @@ static struct buf_page *ltt_relay_cache_
> >   * @offset : offset within the buffer
> >   * @src : source address
> >   * @len : length to write
> > + * @page : cached buffer page
> >   */
> > -int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > -	const void *src, size_t len)
> > +void _ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > +	const void *src, size_t len, struct buf_page *page)
> >  {
> > -	struct buf_page *page;
> > -	ssize_t pagecpy, orig_len;
> > +	ssize_t pagecpy;
> >  
> > -	orig_len = len;
> > -	offset &= buf->chan->alloc_size - 1;
> > -	page = buf->wpage;
> > -	if (unlikely(!len))
> > -		return 0;
> >  	for (;;) {
> >  		page = ltt_relay_cache_page(buf, &buf->wpage, page, offset);
> >  		pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
> > -		memcpy(page_address(page->page)
> > +		ltt_relay_do_copy(page_address(page->page)
> >  			+ (offset & ~PAGE_MASK), src, pagecpy);
> 

Hi Lai,

Happy new chinese year :)

Please see the corrected v2 of this patch. This v1 is broken.

> I think memcpy() is better than ltt_relay_do_copy() here.
> (offset & ~PAGE_MASK) is unlikely 1,2,4,8,16 here.

ltt_relay_do_copy has the switch for 1, 2, 4, 8 for the "len" parameter,
which corresponds to "pagecpy", not (offset & ~PAGE_MASK).

> >  		len -= pagecpy;
> >  		if (likely(!len))
> > @@ -571,9 +530,8 @@ int ltt_relay_write(struct rchan_buf *bu
> >  		 */
> >  		WARN_ON(offset >= buf->chan->alloc_size);
> >  	}
> > -	return orig_len;
> >  }
> > -EXPORT_SYMBOL_GPL(ltt_relay_write);
> > +EXPORT_SYMBOL_GPL(_ltt_relay_write);
> >  
> >  /**
> >   * ltt_relay_read - read data from ltt_relay_buffer.
> > Index: linux-2.6-lttng/include/linux/ltt-relay.h
> > ===================================================================
> > --- linux-2.6-lttng.orig/include/linux/ltt-relay.h	2009-02-03 10:37:06.000000000 -0500
> > +++ linux-2.6-lttng/include/linux/ltt-relay.h	2009-02-03 10:37:13.000000000 -0500
> > @@ -140,8 +140,14 @@ struct rchan_callbacks {
> >  	int (*remove_buf_file)(struct dentry *dentry);
> >  };
> >  
> > -extern int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > -	const void *src, size_t len);
> > +extern struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> > +	struct buf_page *page, size_t offset, ssize_t diff_offset);
> > +
> > +extern struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> > +	struct buf_page *page, size_t offset, ssize_t diff_offset);
> > +
> > +extern void _ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > +	const void *src, size_t len, struct buf_page *page);
> >  
> >  extern int ltt_relay_read(struct rchan_buf *buf, size_t offset,
> >  	void *dest, size_t len);
> > @@ -159,6 +165,84 @@ extern void *ltt_relay_offset_address(st
> >  	size_t offset);
> >  
> >  /*
> > + * Find the page containing "offset". Cache it if it is after the currently
> > + * cached page.
> > + */
> > +static inline struct buf_page *ltt_relay_cache_page(struct rchan_buf *buf,
> > +		struct buf_page **page_cache,
> > +		struct buf_page *page, size_t offset)
> > +{
> > +	ssize_t diff_offset;
> > +	ssize_t half_buf_size = buf->chan->alloc_size >> 1;
> > +
> > +	/*
> > +	 * Make sure this is the page we want to write into. The current
> > +	 * page is changed concurrently by other writers. [wrh]page are
> > +	 * used as a cache remembering the last page written
> > +	 * to/read/looked up for header address. No synchronization;
> > +	 * could have to find the previous page is a nested write
> > +	 * occured. Finding the right page is done by comparing the
> > +	 * dest_offset with the buf_page offsets.
> > +	 * When at the exact opposite of the buffer, bias towards forward search
> > +	 * because it will be cached.
> > +	 */
> > +
> > +	diff_offset = (ssize_t)offset - (ssize_t)page->offset;
> > +	if (diff_offset <= -(ssize_t)half_buf_size)
> > +		diff_offset += buf->chan->alloc_size;
> > +	else if (diff_offset > half_buf_size)
> > +		diff_offset -= buf->chan->alloc_size;
> > +
> > +	if (unlikely(diff_offset >= (ssize_t)PAGE_SIZE)) {
> > +		page = ltt_relay_find_next_page(buf, page, offset, diff_offset);
> > +		*page_cache = page;
> > +	} else if (unlikely(diff_offset < 0)) {
> > +		page = ltt_relay_find_prev_page(buf, page, offset, diff_offset);
> > +	}
> > +	return page;
> > +}
> > +
> > +static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
> > +{
> > +	switch (len) {
> > +	case 1:	*(u8 *)dest = *(const u8 *)src;
> > +		break;
> > +	case 2:	*(u16 *)dest = *(const u16 *)src;
> > +		break;
> > +	case 4:	*(u32 *)dest = *(const u32 *)src;
> > +		break;
> > +#if (BITS_PER_LONG == 64)
> > +	case 8:	*(u64 *)dest = *(const u64 *)src;
> > +		break;
> > +#endif
> > +	default:
> > +		memcpy(dest, src, len);
> > +	}
> > +}
> 
> I think this function is not correct when @src is not alignment for
> 2,4,8,or 16.
> 

Hrm, interesting. So if we need to copy 4 chars, e.g.

char data[4]

Then there are no requirements for alignment within the data.

This is normally not a problem for architectures with
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, but could be a problem for
architectures with slow unaligned accesses.

What do you think of this proposal for ltt_relay_do_copy ?

#if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
{
	switch (len) {
	case 0:	break;
	case 1:	*(u8 *)dest = *(const u8 *)src;
		break;
	case 2:	*(u16 *)dest = *(const u16 *)src;
		break;
	case 4:	*(u32 *)dest = *(const u32 *)src;
		break;
#if (BITS_PER_LONG == 64)
	case 8:	*(u64 *)dest = *(const u64 *)src;
		break;
#endif
	default:
		memcpy(dest, src, len);
	}
}
#else
/*
 * Returns whether the dest and src addresses are aligned on
 * min(sizeof(void *), len). Call this with statically known len for efficiency.
 */
static inline addr_aligned(void *dest, void *src, size_t len)
{
	if (ltt_align((size_t)dest, len))
		return 0;
	if (ltt_align((size_t)src, len))
		return 0;
	return 1;
}

static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
{
	switch (len) {
	case 0:	break;
	case 1:	*(u8 *)dest = *(const u8 *)src;
		break;
	case 2:	if (unlikely(!addr_aligned(dest, src, 2)))
			goto memcpy_fallback;
		*(u16 *)dest = *(const u16 *)src;
		break;
	case 4:	if (unlikely(!addr_aligned(dest, src, 4)))
			goto memcpy_fallback;
		*(u32 *)dest = *(const u32 *)src;
		break;
#if (BITS_PER_LONG == 64)
	case 8:	if (unlikely(!addr_aligned(dest, src, 8)))
			goto memcpy_fallback;
		*(u64 *)dest = *(const u64 *)src;
		break;
#endif
	default:
		goto memcpy_fallback;
	}
	return;
memcpy_fallback:
	memcpy(dest, src, len);
}
#endif


Mathieu

> > +
> > +static inline int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > +	const void *src, size_t len)
> > +{
> > +	struct buf_page *page;
> > +	ssize_t pagecpy, orig_len;
> > +
> > +	if (unlikely(!len))
> > +		return 0;
> > +	orig_len = len;
> > +	offset &= buf->chan->alloc_size - 1;
> > +	page = buf->wpage;
> > +
> > +	page = ltt_relay_cache_page(buf, &buf->wpage, page, offset);
> > +	pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
> > +	ltt_relay_do_copy(page_address(page->page)
> > +		+ (offset & ~PAGE_MASK), src, pagecpy);
> > +	len -= pagecpy;
> > +	if (unlikely(len))
> > +		_ltt_relay_write(buf, offset, src, len, page);
> > +	return orig_len;
> > +}
> > +
> > +/*
> >   * CONFIG_LTT_RELAY kernel API, ltt/ltt-relay-alloc.c
> >   */
> >  
> > 
> 
> 
> 
> _______________________________________________
> ltt-dev mailing list
> ltt-dev at lists.casi.polymtl.ca
> http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68




More information about the lttng-dev mailing list