[lttng-dev] [PATCH lttng-modules v4 5/5] Extract the payload of epoll_wait/epoll_pwait

Julien Desfossez jdesfossez at efficios.com
Fri Apr 15 21:53:35 UTC 2016


On 15-Apr-2016 09:41:04 PM, Mathieu Desnoyers wrote:
> ----- On Apr 15, 2016, at 4:37 PM, Julien Desfossez jdesfossez at efficios.com wrote:
> 
> > When epoll_wait returns, extract the content of the "events" field
> > (events set and data payload).
> > 
> > Here is an example output:
> > syscall_entry_epoll_wait: { epfd = 3, maxevents = 32, timeout = 100 }
> > syscall_exit_epoll_wait: { ret = 1, fds_length = 1,
> >  fds = [ [0] = { raw_events = 0x1,
> >    events = { EPOLLIN = 1, EPOLLPRI = 0, EPOLLOUT = 0, EPOLLERR = 0,
> >      padding = 0 },
> >    data_union = { u64 = 0x100000005, fd = 5 } } ]
> > }
> > 
> > Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
> > ---
> > .../syscalls/headers/syscalls_pointers_override.h  | 190 +++++++++++++++++++++
> > 1 file changed, 190 insertions(+)
> > 
> > diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> > b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> > index 9bb6a01..301b338 100644
> > --- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> > +++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> > @@ -799,4 +799,194 @@ SC_LTTNG_TRACEPOINT_EVENT_CODE(epoll_ctl,
> > )
> > #endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
> > defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
> > 
> > +#ifndef ONCE_LTTNG_TRACE_EPOLL_H
> > +#define ONCE_LTTNG_TRACE_EPOLL_H
> > +
> > +static struct lttng_event_field lttng_epoll_wait_fields[] = {
> > +	[0] = {
> > +		.name = "data_union",
> > +		.type = {
> > +			.atype = atype_struct,
> > +			.u._struct.nr_fields = ARRAY_SIZE(lttng_epoll_data_fields),
> > +			.u._struct.fields = lttng_epoll_data_fields,
> > +		}
> > +	},
> > +	[1] = {
> > +		.name = "raw_events",
> > +		.type = __type_integer(uint32_t, 0, 0, 0, __BYTE_ORDER, 16, none),
> > +	},
> > +	[2] = {
> > +		.name = "events",
> > +		.type = {
> > +			.atype = atype_struct,
> > +			.u._struct.nr_fields = ARRAY_SIZE(lttng_epoll_ctl_events_fields),
> > +			.u._struct.fields = lttng_epoll_ctl_events_fields,
> > +		}
> > +	},
> > +};
> > +
> > +static struct lttng_type lttng_epoll_wait_elem = {
> > +	.atype = atype_struct,
> > +	.u._struct.nr_fields = ARRAY_SIZE(lttng_epoll_wait_fields),
> > +	.u._struct.fields = lttng_epoll_wait_fields,
> > +};
> > +
> > +#endif /* ONCE_LTTNG_TRACE_EPOLL_H */
> > +
> > +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
> > +#define OVERRIDE_32_epoll_wait
> > +#define OVERRIDE_64_epoll_wait
> > +SC_LTTNG_TRACEPOINT_EVENT_CODE(epoll_wait,
> > +	TP_PROTO(sc_exit(long ret,) int epfd, struct epoll_event __user * uevents,
> > +		int maxevents, int timeout),
> > +	TP_ARGS(sc_exit(ret,) epfd, uevents, maxevents, timeout),
> > +	TP_locvar(
> > +		sc_out(
> > +			int fds_length;
> 
> Why is it signed ?
> 
> > +			struct epoll_event *events;
> > +		)
> > +	),
> > +	TP_code_pre(
> > +		BUILD_BUG_ON(((ARRAY_SIZE(lttng_epoll_ctl_events_fields) - 1) +
> > +				EPOLL_FLAGS_PADDING_SIZE) !=
> > +					sizeof(uint8_t) * BITS_PER_BYTE);
> > +		sc_out({
> > +			int err;
> > +
> > +			tp_locvar->fds_length = 0;
> > +			tp_locvar->events = NULL;
> > +
> > +			if (maxevents <= 0)
> > +				goto skip_code;
> > +
> > +			tp_locvar->events = kmalloc(
> 
> How should we handle negative or very large maxevents values ?

A negative value is catched by the if just before.
For very large value, I thought we could rely on kmalloc to fail and
handle the failure, is it ok ?
If the resulting event is too large for the subbuffer, it will just be
discarded right ?

Thanks,

Julien

> 
> > +				maxevents * sizeof(struct epoll_event),
> > +				GFP_KERNEL);
> > +			if (!tp_locvar->events)
> > +				goto skip_code;
> > +
> > +			if (ret > 0) {
> > +				tp_locvar->fds_length = ret;
> > +				err = copy_from_user(tp_locvar->events,
> > +					uevents, maxevents * sizeof(struct epoll_event));
> > +				if (err) {
> > +					tp_locvar->fds_length = 0;
> > +				}
> > +			}
> > +		}
> > +		skip_code:
> > +		)
> > +	),
> > +	TP_FIELDS(
> > +		sc_exit(ctf_integer(long, ret, ret))
> > +		sc_in(ctf_integer(int, epfd, epfd))
> > +		sc_in(ctf_integer(int, maxevents, maxevents))
> > +		sc_in(ctf_integer(int, timeout, timeout))
> > +		sc_out(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
> > +		sc_out(
> > +			ctf_custom_field(
> > +				ctf_custom_type(
> > +					.atype = atype_sequence_compound,
> > +					.u.sequence_compound.length_name =
> > +						"fds_length",
> > +					.u.sequence_compound.elem_type =
> > +						&lttng_epoll_wait_elem,
> > +				),
> > +				fds,
> > +				ctf_custom_code(
> > +					uint32_t i;
> > +
> > +					ctf_align(uint64_t)
> > +					for (i = 0; i < tp_locvar->fds_length; i++) {
> 
> Comparison of unsigned (i) with signed (tp_locvar->fds_length).
> 
> > +						ctf_integer_bitfield_type(uint32_t,
> > +							cpu_to_le32(tp_locvar->events[i].events))
> > +						ctf_integer_bitfield_type(uint8_t,
> > +							(uint8_t) cpu_to_le32(tp_locvar->events[i].events))
> > +						ctf_integer_type(uint64_t, tp_locvar->events[i].data)
> > +						ctf_integer_type(int, tp_locvar->events[i].data)
> 
> ^ fix order of fields (as you pointed out).
> 
> Thanks,
> 
> Mathieu
> 
> > +					}
> > +				)
> > +			)
> > +		)
> > +	),
> > +	TP_code_post(
> > +		sc_out(
> > +			kfree(tp_locvar->events);
> > +		)
> > +	)
> > +)
> > +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
> > +
> > +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> > || defined(CONFIG_ARM)
> > +#define OVERRIDE_32_epoll_pwait
> > +#define OVERRIDE_64_epoll_pwait
> > +SC_LTTNG_TRACEPOINT_EVENT_CODE(epoll_pwait,
> > +	TP_PROTO(sc_exit(long ret,) int epfd, struct epoll_event __user * uevents,
> > +		int maxevents, int timeout, const sigset_t * sigmask, size_t sigsetsize),
> > +	TP_ARGS(sc_exit(ret,) epfd, uevents, maxevents, timeout, sigmask, sigsetsize),
> > +	TP_locvar(
> > +		sc_out(
> > +			int fds_length;
> > +			struct epoll_event events[maxevents];
> > +		)
> > +	),
> > +	TP_code_pre(
> > +		BUILD_BUG_ON(((ARRAY_SIZE(lttng_epoll_ctl_events_fields) - 1) +
> > +				EPOLL_FLAGS_PADDING_SIZE) !=
> > +					sizeof(uint8_t) * BITS_PER_BYTE);
> > +		sc_out({
> > +			int err;
> > +
> > +			if (ret > 0) {
> > +				tp_locvar->fds_length = ret;
> > +				err = copy_from_user(tp_locvar->events,
> > +					uevents, maxevents * sizeof(struct epoll_event));
> > +				if (err) {
> > +					tp_locvar->fds_length = 0;
> > +				}
> > +			} else {
> > +				tp_locvar->fds_length = 0;
> > +			}
> > +
> > +		})
> > +	),
> > +	TP_FIELDS(
> > +		sc_exit(ctf_integer(long, ret, ret))
> > +		sc_in(ctf_integer(int, epfd, epfd))
> > +		sc_in(ctf_integer(int, maxevents, maxevents))
> > +		sc_in(ctf_integer(int, timeout, timeout))
> > +		sc_in(ctf_integer(const sigset_t *, sigmask, sigmask))
> > +		sc_in(ctf_integer(size_t, sigsetsize, sigsetsize))
> > +
> > +		sc_out(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
> > +		sc_out(
> > +			ctf_custom_field(
> > +				ctf_custom_type(
> > +					.atype = atype_sequence_compound,
> > +					.u.sequence_compound.length_name =
> > +						"fds_length",
> > +					.u.sequence_compound.elem_type =
> > +						&lttng_epoll_wait_elem,
> > +				),
> > +				fds,
> > +				ctf_custom_code(
> > +					uint32_t i;
> > +
> > +					ctf_align(uint64_t)
> > +					for (i = 0; i < tp_locvar->fds_length; i++) {
> > +						ctf_integer_type(uint64_t, tp_locvar->events[i].data)
> > +						ctf_integer_type(int, tp_locvar->events[i].data)
> > +						ctf_integer_bitfield_type(uint32_t,
> > +							cpu_to_le32(tp_locvar->events[i].events))
> > +						ctf_integer_bitfield_type(uint8_t,
> > +							cpu_to_le32(tp_locvar->events[i].events))
> > +					}
> > +				)
> > +			)
> > +		)
> > +	),
> > +	TP_code_post()
> > +)
> > +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
> > defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
> > +
> > #endif /* CREATE_SYSCALL_TABLE */
> > --
> > 1.9.1
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com


More information about the lttng-dev mailing list