[lttng-dev] [PATCH lttng-modules v4 3/5] Extract the FDs and flags from poll and ppoll

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Fri Apr 15 21:37:21 UTC 2016


----- On Apr 15, 2016, at 4:37 PM, Julien Desfossez jdesfossez at efficios.com wrote:

> Instead of printing the pointer address of the poll set, extract all the
> FDs and flags from the poll set. For now, we only output the
> standardized set of events to limit the verbosity of the output, we also
> extract the raw value. When we switch to CTF2 we will be able to hide
> unset fields and then we will extract all the fields.
> 
> Here is an example of output with one FD:
> syscall_entry_poll: {
>  timeout_msecs = -1, nfds = 1, fds_length = 1,
>  fds = [
>    [0] = { fd = 4, raw_events = 0x5, events = { POLLIN = 1, POLLPRI = 0,
>            POLLOUT = 1, POLLERR = 0, POLLHUP = 0, padding = 0 } } ]
> }
> 
> syscall_exit_poll: {
>  ret = 1, nfds = 1, fds_length = 1,
>  fds = [ [0] = { fd = 4, raw_events = 0x4, events = { POLLIN = 0,
>  POLLPRI = 0, POLLOUT = 1, POLLERR = 0, POLLHUP = 0, padding = 0 } } ] }
> 
> Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
> ---
> .../syscalls/headers/syscalls_pointers_override.h  | 320 +++++++++++++++++++++
> 1 file changed, 320 insertions(+)
> 
> diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> index debd4b2..a312036 100644
> --- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> +++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> @@ -340,4 +340,324 @@ SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
> )
> #endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
> defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
> 
> +#ifndef ONCE_LTTNG_TRACE_POLL_H
> +#define ONCE_LTTNG_TRACE_POLL_H
> +
> +#define LTTNG_POLL_NRFLAGS (POLLNVAL + 1)
> +#define POLL_FLAGS_PADDING_SIZE (sizeof(uint8_t) * BITS_PER_BYTE) - \
> +	ilog2(LTTNG_POLL_NRFLAGS - 1)
> +
> +/*
> + * Only extract the values specified by iBCS2 for now.
> + */
> +static struct lttng_event_field lttng_pollfd_flag_fields[] = {
> +	[ilog2(POLLIN)] = {
> +		.name = "POLLIN",
> +		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
> +	},
> +	[ilog2(POLLPRI)] = {
> +		.name = "POLLPRI",
> +		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
> +	},
> +	[ilog2(POLLOUT)] = {
> +		.name = "POLLOUT",
> +		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
> +	},
> +	[ilog2(POLLERR)] = {
> +		.name = "POLLERR",
> +		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
> +	},
> +	[ilog2(POLLHUP)] = {
> +		.name = "POLLHUP",
> +		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
> +	},
> +	[ilog2(POLLNVAL)] = {
> +		.name = "POLLNVAL",
> +		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
> +	},
> +	[ilog2(LTTNG_POLL_NRFLAGS)] = {
> +		.name = "padding",
> +		.type = __type_integer(int, POLL_FLAGS_PADDING_SIZE, 1, 0,
> +				__LITTLE_ENDIAN, 10, none),
> +	},
> +};
> +
> +static struct lttng_event_field lttng_pollfd_fields[] = {
> +	[0] = {
> +		.name = "fd",
> +		.type = __type_integer(int, 0, 0, 0, __BYTE_ORDER, 10, none),
> +	},
> +	[1] = {
> +		.name = "raw_events",
> +		.type = __type_integer(short, 0, 0, 0, __BYTE_ORDER, 16, none),
> +	},
> +	[2] = {
> +		.name = "events",
> +		.type = {
> +			.atype = atype_struct,
> +			.u._struct.nr_fields = ARRAY_SIZE(lttng_pollfd_flag_fields),
> +			.u._struct.fields = lttng_pollfd_flag_fields,
> +		}
> +	},
> +};
> +
> +static struct lttng_type lttng_pollfd_elem = {
> +	.atype = atype_struct,
> +	.u._struct.nr_fields = ARRAY_SIZE(lttng_pollfd_fields),
> +	.u._struct.fields = lttng_pollfd_fields,
> +};
> +#endif /* ONCE_LTTNG_TRACE_POLL_H */
> +
> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
> +#define OVERRIDE_32_poll
> +#define OVERRIDE_64_poll
> +SC_LTTNG_TRACEPOINT_EVENT_CODE(poll,
> +	TP_PROTO(sc_exit(long ret,) struct pollfd __user * ufds,
> +		unsigned int nfds, int timeout_msecs),
> +	TP_ARGS(sc_exit(ret,) ufds, nfds, timeout_msecs),
> +	TP_locvar(
> +		unsigned int fds_length, fds_max_len;
> +		struct pollfd *fds;
> +	),
> +	TP_code_pre(
> +		BUILD_BUG_ON(((ARRAY_SIZE(lttng_pollfd_flag_fields) - 1) +
> +				POLL_FLAGS_PADDING_SIZE) !=
> +					sizeof(uint8_t) * BITS_PER_BYTE);
> +		sc_in(
> +			tp_locvar->fds_length = nfds;
> +		)
> +		/*
> +		 * On exit, the number of active FDs is determined by ret,
> +		 * nfds stays the same as the entry, but we only want to
> +		 * output the FDs that are relevant.
> +		 */
> +		sc_out(
> +			if (ret > 0) {
> +				tp_locvar->fds_length = ret;
> +				tp_locvar->fds_max_len = nfds;
> +			} else {
> +				tp_locvar->fds_length = 0;
> +				tp_locvar->fds_max_len = 0;
> +			}
> +		)
> +		sc_inout({
> +			int err;
> +
> +			tp_locvar->fds = kmalloc(nfds * sizeof(struct pollfd), GFP_KERNEL);

How should we handle very large nfds values ?

Thanks,

Mathieu

> +			if (!tp_locvar->fds) {
> +				tp_locvar->fds_length = 0;
> +				tp_locvar->fds_max_len = 0;
> +				goto skip_code;
> +			}
> +			err = copy_from_user(tp_locvar->fds, ufds,
> +				nfds * sizeof(struct pollfd));
> +			if (err) {
> +				tp_locvar->fds_length = 0;
> +				tp_locvar->fds_max_len = 0;
> +			}
> +		}
> +		skip_code:
> +		)
> +	),
> +	TP_FIELDS(
> +		sc_exit(ctf_integer(long, ret, ret))
> +		sc_in(ctf_integer(int, timeout_msecs, timeout_msecs))
> +		sc_inout(ctf_integer(unsigned int, nfds, nfds))
> +		sc_inout(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
> +		sc_in(
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence_compound,
> +					.u.sequence_compound.length_name = "fds_length",
> +					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
> +				),
> +				fds,
> +				ctf_custom_code(
> +					uint32_t i;
> +
> +					ctf_align(int) /* Align on largest field in struct. */
> +					for (i = 0; i < tp_locvar->fds_length; i++) {
> +						ctf_integer_type(int, tp_locvar->fds[i].fd)
> +						ctf_integer_type(short, tp_locvar->fds[i].events)
> +						ctf_integer_bitfield_type(uint8_t,
> +							(uint8_t) cpu_to_le16(tp_locvar->fds[i].events))
> +					}
> +				)
> +			)
> +		)
> +		sc_out(
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence_compound,
> +					.u.sequence_compound.length_name = "fds_length",
> +					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
> +				),
> +				fds,
> +				ctf_custom_code(
> +					uint32_t i;
> +					int nr = 0;
> +
> +					ctf_align(int) /* Align on largest field in struct. */
> +					/*
> +					 * Iterate over the complete array, but only output
> +					 * "ret" active FDs.
> +					 */
> +					for (i = 0; i < tp_locvar->fds_max_len; i++) {
> +						if (!tp_locvar->fds[i].revents)
> +							continue;
> +						if (++nr > tp_locvar->fds_length)
> +							break;
> +						ctf_integer_type(int, tp_locvar->fds[i].fd)
> +						ctf_integer_type(short, tp_locvar->fds[i].revents)
> +						ctf_integer_bitfield_type(uint8_t,
> +							(uint8_t) cpu_to_le16(tp_locvar->fds[i].revents))
> +					}
> +					/*
> +					 * If there is a discrepancy between ret and the
> +					 * content of revents (e.g. caused by userspace corrupting
> +					 * the array from a concurrent thread), we have to output
> +					 * zeros to keep the trace readable.
> +					 */
> +					for (i = nr; i < tp_locvar->fds_length - nr; i++) {
> +						ctf_integer_type(int, 0)
> +						ctf_integer_type(short, 0)
> +						ctf_integer_bitfield_type(uint8_t, 0)
> +					}
> +				)
> +			)
> +		)
> +	),
> +	TP_code_post(
> +		kfree(tp_locvar->fds);
> +	)
> +)
> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
> +
> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> || defined(CONFIG_ARM)
> +#define OVERRIDE_32_ppoll
> +#define OVERRIDE_64_ppoll
> +SC_LTTNG_TRACEPOINT_EVENT_CODE(ppoll,
> +	TP_PROTO(sc_exit(long ret,) struct pollfd __user * ufds,
> +		unsigned int nfds, struct timespec * tsp, const sigset_t * sigmask, size_t
> sigsetsize),
> +	TP_ARGS(sc_exit(ret,) ufds, nfds, tsp, sigmask, sigsetsize),
> +	TP_locvar(
> +		unsigned int fds_length, fds_max_len;
> +		struct pollfd *fds;
> +	),
> +	TP_code_pre(
> +		BUILD_BUG_ON(((ARRAY_SIZE(lttng_pollfd_flag_fields) - 1) +
> +				POLL_FLAGS_PADDING_SIZE) !=
> +					sizeof(uint8_t) * BITS_PER_BYTE);
> +		sc_in(
> +			tp_locvar->fds_length = nfds;
> +		)
> +		/*
> +		 * On exit, the number of active FDs is determined by ret,
> +		 * nfds stays the same as the entry, but we only want to
> +		 * output the FDs that are relevant.
> +		 */
> +		sc_out(
> +			if (ret > 0) {
> +				tp_locvar->fds_length = ret;
> +				tp_locvar->fds_max_len = nfds;
> +			} else {
> +				tp_locvar->fds_length = 0;
> +				tp_locvar->fds_max_len = 0;
> +			}
> +		)
> +		sc_inout({
> +			int err;
> +
> +			tp_locvar->fds = kmalloc(nfds * sizeof(struct pollfd), GFP_KERNEL);
> +			if (!tp_locvar->fds) {
> +				tp_locvar->fds_length = 0;
> +				tp_locvar->fds_max_len = 0;
> +				goto skip_code;
> +			}
> +			err = copy_from_user(tp_locvar->fds, ufds,
> +				nfds * sizeof(struct pollfd));
> +			if (err) {
> +				tp_locvar->fds_length = 0;
> +				tp_locvar->fds_max_len = 0;
> +			}
> +		}
> +		skip_code:
> +		)
> +	),
> +	TP_FIELDS(
> +		sc_exit(ctf_integer(long, ret, ret))
> +		sc_in(ctf_integer(struct timespec *, tsp, tsp))
> +		sc_in(ctf_integer(const sigset_t *, sigmask, sigmask))
> +		sc_in(ctf_integer(size_t, sigsetsize, sigsetsize))
> +		sc_inout(ctf_integer(unsigned int, nfds, nfds))
> +		sc_inout(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
> +		sc_in(
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence_compound,
> +					.u.sequence_compound.length_name = "fds_length",
> +					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
> +				),
> +				fds,
> +				ctf_custom_code(
> +					uint32_t i;
> +
> +					ctf_align(int) /* Align on largest field in struct. */
> +					for (i = 0; i < tp_locvar->fds_length; i++) {
> +						ctf_integer_type(int, tp_locvar->fds[i].fd)
> +						ctf_integer_type(short, tp_locvar->fds[i].events)
> +						ctf_integer_bitfield_type(uint8_t,
> +							(uint8_t) cpu_to_le16(tp_locvar->fds[i].events))
> +					}
> +				)
> +			)
> +		)
> +		sc_out(
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence_compound,
> +					.u.sequence_compound.length_name = "fds_length",
> +					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
> +				),
> +				fds,
> +				ctf_custom_code(
> +					uint32_t i;
> +					int nr = 0;
> +
> +					ctf_align(int) /* Align on largest field in struct. */
> +					/*
> +					 * Iterate over the complete array, but only output
> +					 * "ret" active FDs.
> +					 */
> +					for (i = 0; i < tp_locvar->fds_max_len; i++) {
> +						if (!tp_locvar->fds[i].revents)
> +							continue;
> +						if (++nr > tp_locvar->fds_length)
> +							break;
> +						ctf_integer_type(int, tp_locvar->fds[i].fd)
> +						ctf_integer_type(short, tp_locvar->fds[i].revents)
> +						ctf_integer_bitfield_type(uint8_t,
> +							(uint8_t) cpu_to_le16(tp_locvar->fds[i].revents))
> +					}
> +					/*
> +					 * If there is a discrepancy between ret and the
> +					 * content of revents (e.g. caused by userspace corrupting
> +					 * the array from a concurrent thread), we have to output
> +					 * zeros to keep the trace readable.
> +					 */
> +					for (i = nr; i < tp_locvar->fds_length - nr; i++) {
> +						ctf_integer_type(int, 0)
> +						ctf_integer_type(short, 0)
> +						ctf_integer_bitfield_type(uint8_t, 0)
> +					}
> +				)
> +			)
> +		)
> +	),
> +	TP_code_post(
> +		kfree(tp_locvar->fds);
> +	)
> +)
> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
> defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
> +
> #endif /* CREATE_SYSCALL_TABLE */
> --
> 1.9.1

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


More information about the lttng-dev mailing list