[ltt-dev] [PATCH] LTTng optimize write to page function
Mathieu Desnoyers
compudj at krystal.dyndns.org
Tue Feb 3 21:35:18 EST 2009
* Lai Jiangshan (laijs at cn.fujitsu.com) wrote:
> Mathieu Desnoyers wrote:
> > The functions in ltt-relay-alloc.c take care of writing the data into
> > the buffer pages. Those pages are allocated from the page allocator and
> > no virtual mapping is done so we can save precious TLB entries.
> > ltt-relay-alloc.c is the abstraction layer which makes the buffers
> > "look" like a contiguous memory area, although they are made from
> > physically discontiguous pages linked with a linked list. A caching
> > mechanism makes sure we never walk over more than 1-2 entries of the
> > list. We use a linked list rather than a table to make sure we don't
> > depend on vmalloc to allocate large pointer arrays.
> >
> > I did a bit of profiling with oprofile on LTTng and found out that write
> > functions in ltt-relay-alloc.c were taking a lot of CPU time. I through it would
> > be good to improve them a bit.
> >
> > Running a 2.6.29-rc3 kernel
> >
> > Compiling a 2.6.25 kernel using make -j10 on a 8-cores x86_64 with a vanilla
> > 2.6.29-rc3 kernel (all tests are cache-hot) :
> > real 1m22.103s
> >
> > With dormant instrumentation
> > real 1m24.667s
> > (note : this 2s regression should be identified eventually by doing a bissection
> > of the LTTng tree.)
> >
> > ltt-armall
> >
> > Without modification, with flight recorder tracing active :
> > real 1m31.135s
> >
> > Replacing the memcpy call with a specialized call for 1, 2, 4 and 8 bytes :
> > real 1m30.440s
> >
> > Inlining the fast path of the write function :
> > real 1m29.614s
> >
> > Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
> > CC: Martin Bligh <mbligh at google.com>
> > CC: Zhaolei <zhaolei at cn.fujitsu.com>
> > ---
> > include/linux/ltt-relay.h | 88 ++++++++++++++++++++++++++++++++++++++++++++--
> > ltt/ltt-relay-alloc.c | 66 ++++++----------------------------
> > 2 files changed, 98 insertions(+), 56 deletions(-)
> >
> > Index: linux-2.6-lttng/ltt/ltt-relay-alloc.c
> > ===================================================================
> > --- linux-2.6-lttng.orig/ltt/ltt-relay-alloc.c 2009-02-03 10:37:05.000000000 -0500
> > +++ linux-2.6-lttng/ltt/ltt-relay-alloc.c 2009-02-03 10:37:13.000000000 -0500
> > @@ -424,7 +424,7 @@ EXPORT_SYMBOL_GPL(ltt_relay_close);
> > /*
> > * Start iteration at the previous element. Skip the real list head.
> > */
> > -static struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> > +struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> > struct buf_page *page, size_t offset, ssize_t diff_offset)
> > {
> > struct buf_page *iter;
> > @@ -456,13 +456,15 @@ static struct buf_page *ltt_relay_find_p
> > return iter;
> > }
> > }
> > + WARN_ON(1);
> > return NULL;
> > }
> > +EXPORT_SYMBOL_GPL(ltt_relay_find_prev_page);
> >
> > /*
> > * Start iteration at the next element. Skip the real list head.
> > */
> > -static struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> > +struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> > struct buf_page *page, size_t offset, ssize_t diff_offset)
> > {
> > struct buf_page *iter;
> > @@ -494,48 +496,10 @@ static struct buf_page *ltt_relay_find_n
> > return iter;
> > }
> > }
> > + WARN_ON(1);
> > return NULL;
> > }
> > -
> > -/*
> > - * Find the page containing "offset". Cache it if it is after the currently
> > - * cached page.
> > - */
> > -static struct buf_page *ltt_relay_cache_page(struct rchan_buf *buf,
> > - struct buf_page **page_cache,
> > - struct buf_page *page, size_t offset)
> > -{
> > - ssize_t diff_offset;
> > - ssize_t half_buf_size = buf->chan->alloc_size >> 1;
> > -
> > - /*
> > - * Make sure this is the page we want to write into. The current
> > - * page is changed concurrently by other writers. [wrh]page are
> > - * used as a cache remembering the last page written
> > - * to/read/looked up for header address. No synchronization;
> > - * could have to find the previous page is a nested write
> > - * occured. Finding the right page is done by comparing the
> > - * dest_offset with the buf_page offsets.
> > - * When at the exact opposite of the buffer, bias towards forward search
> > - * because it will be cached.
> > - */
> > -
> > - diff_offset = (ssize_t)offset - (ssize_t)page->offset;
> > - if (diff_offset <= -(ssize_t)half_buf_size)
> > - diff_offset += buf->chan->alloc_size;
> > - else if (diff_offset > half_buf_size)
> > - diff_offset -= buf->chan->alloc_size;
> > -
> > - if (unlikely(diff_offset >= (ssize_t)PAGE_SIZE)) {
> > - page = ltt_relay_find_next_page(buf, page, offset, diff_offset);
> > - WARN_ON(!page);
> > - *page_cache = page;
> > - } else if (unlikely(diff_offset < 0)) {
> > - page = ltt_relay_find_prev_page(buf, page, offset, diff_offset);
> > - WARN_ON(!page);
> > - }
> > - return page;
> > -}
> > +EXPORT_SYMBOL_GPL(ltt_relay_find_next_page);
> >
> > /**
> > * ltt_relay_write - write data to a ltt_relay buffer.
> > @@ -543,22 +507,17 @@ static struct buf_page *ltt_relay_cache_
> > * @offset : offset within the buffer
> > * @src : source address
> > * @len : length to write
> > + * @page : cached buffer page
> > */
> > -int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > - const void *src, size_t len)
> > +void _ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > + const void *src, size_t len, struct buf_page *page)
> > {
> > - struct buf_page *page;
> > - ssize_t pagecpy, orig_len;
> > + ssize_t pagecpy;
> >
> > - orig_len = len;
> > - offset &= buf->chan->alloc_size - 1;
> > - page = buf->wpage;
> > - if (unlikely(!len))
> > - return 0;
> > for (;;) {
> > page = ltt_relay_cache_page(buf, &buf->wpage, page, offset);
> > pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
> > - memcpy(page_address(page->page)
> > + ltt_relay_do_copy(page_address(page->page)
> > + (offset & ~PAGE_MASK), src, pagecpy);
>
Hi Lai,
Happy new chinese year :)
Please see the corrected v2 of this patch. This v1 is broken.
> I think memcpy() is better than ltt_relay_do_copy() here.
> (offset & ~PAGE_MASK) is unlikely 1,2,4,8,16 here.
ltt_relay_do_copy has the switch for 1, 2, 4, 8 for the "len" parameter,
which corresponds to "pagecpy", not (offset & ~PAGE_MASK).
> > len -= pagecpy;
> > if (likely(!len))
> > @@ -571,9 +530,8 @@ int ltt_relay_write(struct rchan_buf *bu
> > */
> > WARN_ON(offset >= buf->chan->alloc_size);
> > }
> > - return orig_len;
> > }
> > -EXPORT_SYMBOL_GPL(ltt_relay_write);
> > +EXPORT_SYMBOL_GPL(_ltt_relay_write);
> >
> > /**
> > * ltt_relay_read - read data from ltt_relay_buffer.
> > Index: linux-2.6-lttng/include/linux/ltt-relay.h
> > ===================================================================
> > --- linux-2.6-lttng.orig/include/linux/ltt-relay.h 2009-02-03 10:37:06.000000000 -0500
> > +++ linux-2.6-lttng/include/linux/ltt-relay.h 2009-02-03 10:37:13.000000000 -0500
> > @@ -140,8 +140,14 @@ struct rchan_callbacks {
> > int (*remove_buf_file)(struct dentry *dentry);
> > };
> >
> > -extern int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > - const void *src, size_t len);
> > +extern struct buf_page *ltt_relay_find_prev_page(struct rchan_buf *buf,
> > + struct buf_page *page, size_t offset, ssize_t diff_offset);
> > +
> > +extern struct buf_page *ltt_relay_find_next_page(struct rchan_buf *buf,
> > + struct buf_page *page, size_t offset, ssize_t diff_offset);
> > +
> > +extern void _ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > + const void *src, size_t len, struct buf_page *page);
> >
> > extern int ltt_relay_read(struct rchan_buf *buf, size_t offset,
> > void *dest, size_t len);
> > @@ -159,6 +165,84 @@ extern void *ltt_relay_offset_address(st
> > size_t offset);
> >
> > /*
> > + * Find the page containing "offset". Cache it if it is after the currently
> > + * cached page.
> > + */
> > +static inline struct buf_page *ltt_relay_cache_page(struct rchan_buf *buf,
> > + struct buf_page **page_cache,
> > + struct buf_page *page, size_t offset)
> > +{
> > + ssize_t diff_offset;
> > + ssize_t half_buf_size = buf->chan->alloc_size >> 1;
> > +
> > + /*
> > + * Make sure this is the page we want to write into. The current
> > + * page is changed concurrently by other writers. [wrh]page are
> > + * used as a cache remembering the last page written
> > + * to/read/looked up for header address. No synchronization;
> > + * could have to find the previous page is a nested write
> > + * occured. Finding the right page is done by comparing the
> > + * dest_offset with the buf_page offsets.
> > + * When at the exact opposite of the buffer, bias towards forward search
> > + * because it will be cached.
> > + */
> > +
> > + diff_offset = (ssize_t)offset - (ssize_t)page->offset;
> > + if (diff_offset <= -(ssize_t)half_buf_size)
> > + diff_offset += buf->chan->alloc_size;
> > + else if (diff_offset > half_buf_size)
> > + diff_offset -= buf->chan->alloc_size;
> > +
> > + if (unlikely(diff_offset >= (ssize_t)PAGE_SIZE)) {
> > + page = ltt_relay_find_next_page(buf, page, offset, diff_offset);
> > + *page_cache = page;
> > + } else if (unlikely(diff_offset < 0)) {
> > + page = ltt_relay_find_prev_page(buf, page, offset, diff_offset);
> > + }
> > + return page;
> > +}
> > +
> > +static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
> > +{
> > + switch (len) {
> > + case 1: *(u8 *)dest = *(const u8 *)src;
> > + break;
> > + case 2: *(u16 *)dest = *(const u16 *)src;
> > + break;
> > + case 4: *(u32 *)dest = *(const u32 *)src;
> > + break;
> > +#if (BITS_PER_LONG == 64)
> > + case 8: *(u64 *)dest = *(const u64 *)src;
> > + break;
> > +#endif
> > + default:
> > + memcpy(dest, src, len);
> > + }
> > +}
>
> I think this function is not correct when @src is not alignment for
> 2,4,8,or 16.
>
Hrm, interesting. So if we need to copy 4 chars, e.g.
char data[4]
Then there are no requirements for alignment within the data.
This is normally not a problem for architectures with
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, but could be a problem for
architectures with slow unaligned accesses.
What do you think of this proposal for ltt_relay_do_copy ?
#if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
{
switch (len) {
case 0: break;
case 1: *(u8 *)dest = *(const u8 *)src;
break;
case 2: *(u16 *)dest = *(const u16 *)src;
break;
case 4: *(u32 *)dest = *(const u32 *)src;
break;
#if (BITS_PER_LONG == 64)
case 8: *(u64 *)dest = *(const u64 *)src;
break;
#endif
default:
memcpy(dest, src, len);
}
}
#else
/*
* Returns whether the dest and src addresses are aligned on
* min(sizeof(void *), len). Call this with statically known len for efficiency.
*/
static inline addr_aligned(void *dest, void *src, size_t len)
{
if (ltt_align((size_t)dest, len))
return 0;
if (ltt_align((size_t)src, len))
return 0;
return 1;
}
static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
{
switch (len) {
case 0: break;
case 1: *(u8 *)dest = *(const u8 *)src;
break;
case 2: if (unlikely(!addr_aligned(dest, src, 2)))
goto memcpy_fallback;
*(u16 *)dest = *(const u16 *)src;
break;
case 4: if (unlikely(!addr_aligned(dest, src, 4)))
goto memcpy_fallback;
*(u32 *)dest = *(const u32 *)src;
break;
#if (BITS_PER_LONG == 64)
case 8: if (unlikely(!addr_aligned(dest, src, 8)))
goto memcpy_fallback;
*(u64 *)dest = *(const u64 *)src;
break;
#endif
default:
goto memcpy_fallback;
}
return;
memcpy_fallback:
memcpy(dest, src, len);
}
#endif
Mathieu
> > +
> > +static inline int ltt_relay_write(struct rchan_buf *buf, size_t offset,
> > + const void *src, size_t len)
> > +{
> > + struct buf_page *page;
> > + ssize_t pagecpy, orig_len;
> > +
> > + if (unlikely(!len))
> > + return 0;
> > + orig_len = len;
> > + offset &= buf->chan->alloc_size - 1;
> > + page = buf->wpage;
> > +
> > + page = ltt_relay_cache_page(buf, &buf->wpage, page, offset);
> > + pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
> > + ltt_relay_do_copy(page_address(page->page)
> > + + (offset & ~PAGE_MASK), src, pagecpy);
> > + len -= pagecpy;
> > + if (unlikely(len))
> > + _ltt_relay_write(buf, offset, src, len, page);
> > + return orig_len;
> > +}
> > +
> > +/*
> > * CONFIG_LTT_RELAY kernel API, ltt/ltt-relay-alloc.c
> > */
> >
> >
>
>
>
> _______________________________________________
> ltt-dev mailing list
> ltt-dev at lists.casi.polymtl.ca
> http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
>
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
More information about the lttng-dev
mailing list