[ltt-dev] [PATCH] LTTng optimize write to page function
Mathieu Desnoyers
compudj at krystal.dyndns.org
Tue Feb 3 22:53:14 EST 2009
* Mathieu Desnoyers (compudj at krystal.dyndns.org) wrote:
> > +
> > > +static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
> > > +{
> > > + switch (len) {
> > > + case 1: *(u8 *)dest = *(const u8 *)src;
> > > + break;
> > > + case 2: *(u16 *)dest = *(const u16 *)src;
> > > + break;
> > > + case 4: *(u32 *)dest = *(const u32 *)src;
> > > + break;
> > > +#if (BITS_PER_LONG == 64)
> > > + case 8: *(u64 *)dest = *(const u64 *)src;
> > > + break;
> > > +#endif
> > > + default:
> > > + memcpy(dest, src, len);
> > > + }
> > > +}
> >
> > I think this function is not correct when @src is not alignment for
> > 2,4,8,or 16.
> >
>
> Hrm, interesting. So if we need to copy 4 chars, e.g.
>
> char data[4]
>
> Then there are no requirements for alignment within the data.
>
> This is normally not a problem for architectures with
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, but could be a problem for
> architectures with slow unaligned accesses.
>
> What do you think of this proposal for ltt_relay_do_copy ?
>
Here is the cleaned up patch which applies on top of "LTTng optimize
write to page function (v2)"
LTTng optimize write to page function deal with unaligned access
Make sure we don't end up doing unaligned accesses on architectures which lack
support for efficient unaligned access.
Standard configurations are either :
If architecture defines
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-> !CONFIG_LTT_ALIGNMENT (to save space)
or if the architecture does not define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-> CONFIG_LTT_ALIGNMENT (to speed up tracing)
Compiling a kernel with tracing active :
Tests done only on x86_64 (which has efficient unaligned access) :
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
!CONFIG_LTT_ALIGNMENT
real 1m29.349s
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
CONFIG_LTT_ALIGNMENT
real 1m29.309s
!CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS (forced by modifying arch/x86/Kconfig)
CONFIG_LTT_ALIGNMENT
real 1m29.162s
So even with this supplementary test, the fast path stays fast.
Testing the variations on an architecture without efficient unaligned
access would be welcome.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
CC: Lai Jiangshan <laijs at cn.fujitsu.com>
CC: Martin Bligh <mbligh at google.com>
---
include/linux/ltt-core.h | 35 +++++++++++++++++++++++++++++++++++
include/linux/ltt-relay.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/ltt-tracer.h | 35 -----------------------------------
3 files changed, 79 insertions(+), 35 deletions(-)
Index: linux-2.6-lttng/include/linux/ltt-relay.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/ltt-relay.h 2009-02-03 21:00:40.000000000 -0500
+++ linux-2.6-lttng/include/linux/ltt-relay.h 2009-02-03 21:26:17.000000000 -0500
@@ -20,6 +20,7 @@
#include <linux/poll.h>
#include <linux/kref.h>
#include <linux/mm.h>
+#include <linux/ltt-core.h>
/* Needs a _much_ better name... */
#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE)
@@ -202,6 +203,7 @@ static inline struct buf_page *ltt_relay
return page;
}
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
{
switch (len) {
@@ -220,6 +222,48 @@ static inline void ltt_relay_do_copy(voi
memcpy(dest, src, len);
}
}
+#else
+/*
+ * Returns whether the dest and src addresses are aligned on
+ * min(sizeof(void *), len). Call this with statically known len for efficiency.
+ */
+static inline int addr_aligned(const void *dest, const void *src, size_t len)
+{
+ if (ltt_align((size_t)dest, len))
+ return 0;
+ if (ltt_align((size_t)src, len))
+ return 0;
+ return 1;
+}
+
+static inline void ltt_relay_do_copy(void *dest, const void *src, size_t len)
+{
+ switch (len) {
+ case 0: break;
+ case 1: *(u8 *)dest = *(const u8 *)src;
+ break;
+ case 2: if (unlikely(!addr_aligned(dest, src, 2)))
+ goto memcpy_fallback;
+ *(u16 *)dest = *(const u16 *)src;
+ break;
+ case 4: if (unlikely(!addr_aligned(dest, src, 4)))
+ goto memcpy_fallback;
+ *(u32 *)dest = *(const u32 *)src;
+ break;
+#if (BITS_PER_LONG == 64)
+ case 8: if (unlikely(!addr_aligned(dest, src, 8)))
+ goto memcpy_fallback;
+ *(u64 *)dest = *(const u64 *)src;
+ break;
+#endif
+ default:
+ goto memcpy_fallback;
+ }
+ return;
+memcpy_fallback:
+ memcpy(dest, src, len);
+}
+#endif
static inline int ltt_relay_write(struct rchan_buf *buf, size_t offset,
const void *src, size_t len)
Index: linux-2.6-lttng/include/linux/ltt-core.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/ltt-core.h 2009-02-03 21:24:29.000000000 -0500
+++ linux-2.6-lttng/include/linux/ltt-core.h 2009-02-03 21:25:18.000000000 -0500
@@ -42,4 +42,39 @@ extern ltt_run_filter_functor ltt_run_fi
extern void ltt_filter_register(ltt_run_filter_functor func);
extern void ltt_filter_unregister(void);
+#if defined(CONFIG_LTT) && defined(CONFIG_LTT_ALIGNMENT)
+
+/*
+ * Calculate the offset needed to align the type.
+ * size_of_type must be non-zero.
+ */
+static inline unsigned int ltt_align(size_t align_drift, size_t size_of_type)
+{
+ size_t alignment = min(sizeof(void *), size_of_type);
+ return (alignment - align_drift) & (alignment - 1);
+}
+/* Default arch alignment */
+#define LTT_ALIGN
+
+static inline int ltt_get_alignment(void)
+{
+ return sizeof(void *);
+}
+
+#else
+
+static inline unsigned int ltt_align(size_t align_drift,
+ size_t size_of_type)
+{
+ return 0;
+}
+
+#define LTT_ALIGN __attribute__((packed))
+
+static inline int ltt_get_alignment(void)
+{
+ return 0;
+}
+#endif /* defined(CONFIG_LTT) && defined(CONFIG_LTT_ALIGNMENT) */
+
#endif /* LTT_CORE_H */
Index: linux-2.6-lttng/include/linux/ltt-tracer.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/ltt-tracer.h 2009-02-03 21:24:25.000000000 -0500
+++ linux-2.6-lttng/include/linux/ltt-tracer.h 2009-02-03 21:24:49.000000000 -0500
@@ -138,41 +138,6 @@ static inline enum marker_id marker_id_t
return MARKER_ID_DYNAMIC;
}
-#if defined(CONFIG_LTT) && defined(CONFIG_LTT_ALIGNMENT)
-
-/*
- * Calculate the offset needed to align the type.
- * size_of_type must be non-zero.
- */
-static inline unsigned int ltt_align(size_t align_drift, size_t size_of_type)
-{
- size_t alignment = min(sizeof(void *), size_of_type);
- return (alignment - align_drift) & (alignment - 1);
-}
-/* Default arch alignment */
-#define LTT_ALIGN
-
-static inline int ltt_get_alignment(void)
-{
- return sizeof(void *);
-}
-
-#else
-
-static inline unsigned int ltt_align(size_t align_drift,
- size_t size_of_type)
-{
- return 0;
-}
-
-#define LTT_ALIGN __attribute__((packed))
-
-static inline int ltt_get_alignment(void)
-{
- return 0;
-}
-#endif /* CONFIG_LTT_ALIGNMENT */
-
#ifdef CONFIG_LTT
struct user_dbg_data {
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
More information about the lttng-dev
mailing list