[lttng-dev] [PATCH lttng-modules v5 3/5] Extract the FDs and flags from poll and ppoll

Julien Desfossez jdesfossez at efficios.com
Sat Apr 23 00:16:50 UTC 2016


Instead of printing the pointer address of the poll set, extract all the
FDs and flags from the poll set. For now, we only output the
standardized set of events to limit the verbosity of the output, we also
extract the raw value. When we switch to CTF2 we will be able to hide
unset fields and then we will extract all the fields.

Here is an example of output with one FD:
syscall_entry_poll: {
  timeout_msecs = -1, nfds = 1, fds_length = 1,
  fds = [
    [0] = { fd = 4, raw_events = 0x5, events = { POLLIN = 1, POLLPRI = 0,
            POLLOUT = 1, POLLERR = 0, POLLHUP = 0, padding = 0 } } ]
}

syscall_exit_poll: {
  ret = 1, nfds = 1, fds_length = 1,
  fds = [ [0] = { fd = 4, raw_events = 0x4, events = { POLLIN = 0,
  POLLPRI = 0, POLLOUT = 1, POLLERR = 0, POLLHUP = 0, padding = 0 } } ] }

Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
---
 .../syscalls/headers/syscalls_pointers_override.h  | 360 +++++++++++++++++++++
 1 file changed, 360 insertions(+)

diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h b/instrumentation/syscalls/headers/syscalls_pointers_override.h
index ef4dc1c..6ade85c 100644
--- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
+++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
@@ -563,4 +563,364 @@ SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
 )
 #endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
 
+#ifndef ONCE_LTTNG_TRACE_POLL_H
+#define ONCE_LTTNG_TRACE_POLL_H
+
+#define LTTNG_POLL_NRFLAGS (POLLNVAL + 1)
+#define POLL_FLAGS_PADDING_SIZE (sizeof(uint8_t) * BITS_PER_BYTE) - \
+	ilog2(LTTNG_POLL_NRFLAGS - 1)
+
+/*
+ * Only extract the values specified by iBCS2 for now.
+ */
+static struct lttng_event_field lttng_pollfd_flag_fields[] = {
+	[ilog2(POLLIN)] = {
+		.name = "POLLIN",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLPRI)] = {
+		.name = "POLLPRI",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLOUT)] = {
+		.name = "POLLOUT",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLERR)] = {
+		.name = "POLLERR",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLHUP)] = {
+		.name = "POLLHUP",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLNVAL)] = {
+		.name = "POLLNVAL",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(LTTNG_POLL_NRFLAGS)] = {
+		.name = "padding",
+		.type = __type_integer(int, POLL_FLAGS_PADDING_SIZE, 1, 0,
+				__LITTLE_ENDIAN, 10, none),
+	},
+};
+
+static struct lttng_event_field lttng_pollfd_fields[] = {
+	[0] = {
+		.name = "fd",
+		.type = __type_integer(int, 0, 0, 0, __BYTE_ORDER, 10, none),
+	},
+	[1] = {
+		.name = "raw_events",
+		.type = __type_integer(short, 0, 0, 0, __BYTE_ORDER, 16, none),
+	},
+	[2] = {
+		.name = "events",
+		.type = {
+			.atype = atype_struct,
+			.u._struct.nr_fields = ARRAY_SIZE(lttng_pollfd_flag_fields),
+			.u._struct.fields = lttng_pollfd_flag_fields,
+		}
+	},
+};
+
+static struct lttng_type lttng_pollfd_elem = {
+	.atype = atype_struct,
+	.u._struct.nr_fields = ARRAY_SIZE(lttng_pollfd_fields),
+	.u._struct.fields = lttng_pollfd_fields,
+};
+#endif /* ONCE_LTTNG_TRACE_POLL_H */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
+#define OVERRIDE_32_poll
+#define OVERRIDE_64_poll
+SC_LTTNG_TRACEPOINT_EVENT_CODE(poll,
+	TP_PROTO(sc_exit(long ret,) struct pollfd __user * ufds,
+		unsigned int nfds, int timeout_msecs),
+	TP_ARGS(sc_exit(ret,) ufds, nfds, timeout_msecs),
+	TP_locvar(
+		unsigned int fds_length, fds_max_len;
+		struct pollfd *fds;
+		unsigned int alloc_fds;
+		unsigned int overflow;
+	),
+	TP_code_pre(
+		BUILD_BUG_ON(((ARRAY_SIZE(lttng_pollfd_flag_fields) - 1) +
+				POLL_FLAGS_PADDING_SIZE) !=
+					sizeof(uint8_t) * BITS_PER_BYTE);
+		tp_locvar->fds = NULL;
+		tp_locvar->overflow = 0;
+
+		sc_in(
+			if (nfds * sizeof(struct pollfd) > PAGE_SIZE) {
+				tp_locvar->fds_length = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->fds_max_len = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->overflow = 1;
+			} else {
+				tp_locvar->fds_length = nfds;
+				tp_locvar->fds_max_len = nfds;
+			}
+			tp_locvar->alloc_fds = tp_locvar->fds_length * sizeof(struct pollfd);
+		)
+		/*
+		 * On exit, the number of active FDs is determined by ret,
+		 * nfds stays the same as the entry, but we only want to
+		 * output the FDs that are relevant.
+		 */
+		sc_out(
+			if (ret <= 0 || ret > nfds)
+				goto error;
+
+			if (nfds * sizeof(struct pollfd) > PAGE_SIZE) {
+				tp_locvar->fds_length = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->fds_max_len = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->overflow = 1;
+			} else {
+				tp_locvar->fds_length = ret;
+				tp_locvar->fds_max_len = nfds;
+			}
+			tp_locvar->alloc_fds = tp_locvar->fds_max_len * sizeof(struct pollfd);
+		)
+		{
+			int err;
+
+			tp_locvar->fds = kmalloc(tp_locvar->alloc_fds, GFP_ATOMIC);
+			if (!tp_locvar->fds)
+				goto error;
+			err = lib_ring_buffer_copy_from_user_check_nofault(
+				tp_locvar->fds, ufds,
+				nfds * sizeof(struct pollfd));
+			if (err != 0)
+				goto error;
+		}
+		goto end;
+
+	error:
+		tp_locvar->fds_length = 0;
+		tp_locvar->fds_max_len = 0;
+	end:
+		;
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(int, timeout_msecs, timeout_msecs))
+		sc_inout(ctf_integer(unsigned int, nfds, nfds))
+		sc_inout(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
+		sc_in(ctf_integer(int, overflow, tp_locvar->overflow))
+		sc_in(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					for (i = 0; i < tp_locvar->fds_length; i++) {
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].events)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) tp_locvar->fds[i].events)
+					}
+				)
+			)
+		)
+		sc_out(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+					unsigned int nr = 0;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					/*
+					 * Iterate over the complete array, but only output
+					 * "ret" active FDs.
+					 */
+					for (i = 0; i < tp_locvar->fds_max_len; i++) {
+						if (!tp_locvar->fds[i].revents)
+							continue;
+						if (++nr > tp_locvar->fds_length)
+							break;
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].revents)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) tp_locvar->fds[i].revents)
+					}
+					/*
+					 * If there is a discrepancy between ret and the
+					 * content of revents (e.g. caused by userspace corrupting
+					 * the array from a concurrent thread), we have to output
+					 * zeros to keep the trace readable.
+					 */
+					for (i = nr; i < tp_locvar->fds_length - nr; i++) {
+						ctf_integer_type(int, 0)
+						ctf_integer_type(short, 0)
+						ctf_integer_bitfield_type(uint8_t, 0)
+					}
+				)
+			)
+		)
+	),
+	TP_code_post(
+		kfree(tp_locvar->fds);
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+#define OVERRIDE_32_ppoll
+#define OVERRIDE_64_ppoll
+SC_LTTNG_TRACEPOINT_EVENT_CODE(ppoll,
+	TP_PROTO(sc_exit(long ret,) struct pollfd __user * ufds,
+		unsigned int nfds, struct timespec * tsp, const sigset_t * sigmask, size_t sigsetsize),
+	TP_ARGS(sc_exit(ret,) ufds, nfds, tsp, sigmask, sigsetsize),
+	TP_locvar(
+		unsigned int fds_length, fds_max_len;
+		struct pollfd *fds;
+		unsigned int alloc_fds;
+		unsigned int overflow;
+	),
+	TP_code_pre(
+		BUILD_BUG_ON(((ARRAY_SIZE(lttng_pollfd_flag_fields) - 1) +
+				POLL_FLAGS_PADDING_SIZE) !=
+					sizeof(uint8_t) * BITS_PER_BYTE);
+		tp_locvar->fds = NULL;
+		tp_locvar->overflow = 0;
+
+		sc_in(
+			if (nfds * sizeof(struct pollfd) > PAGE_SIZE) {
+				tp_locvar->fds_length = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->fds_max_len = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->overflow = 1;
+			} else {
+				tp_locvar->fds_length = nfds;
+				tp_locvar->fds_max_len = nfds;
+			}
+			tp_locvar->alloc_fds = tp_locvar->fds_length * sizeof(struct pollfd);
+		)
+		/*
+		 * On exit, the number of active FDs is determined by ret,
+		 * nfds stays the same as the entry, but we only want to
+		 * output the FDs that are relevant.
+		 */
+		sc_out(
+			if (ret <= 0 || ret > nfds)
+				goto error;
+
+			if (nfds * sizeof(struct pollfd) > PAGE_SIZE) {
+				tp_locvar->fds_length = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->fds_max_len = PAGE_SIZE / sizeof(struct pollfd);
+				tp_locvar->overflow = 1;
+			} else {
+				tp_locvar->fds_length = ret;
+				tp_locvar->fds_max_len = nfds;
+			}
+			tp_locvar->alloc_fds = tp_locvar->fds_max_len * sizeof(struct pollfd);
+		)
+		{
+			int err;
+
+			tp_locvar->fds = kmalloc(tp_locvar->alloc_fds, GFP_ATOMIC);
+			if (!tp_locvar->fds)
+				goto error;
+			err = lib_ring_buffer_copy_from_user_check_nofault(
+				tp_locvar->fds, ufds,
+				nfds * sizeof(struct pollfd));
+			if (err != 0)
+				goto error;
+		}
+		goto end;
+
+	error:
+		tp_locvar->fds_length = 0;
+		tp_locvar->fds_max_len = 0;
+	end:
+		;
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(struct timespec *, tsp, tsp))
+		sc_in(ctf_integer(const sigset_t *, sigmask, sigmask))
+		sc_in(ctf_integer(size_t, sigsetsize, sigsetsize))
+		sc_inout(ctf_integer(unsigned int, nfds, nfds))
+		sc_inout(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
+		sc_inout(ctf_integer(int, overflow, tp_locvar->overflow))
+		sc_in(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					for (i = 0; i < tp_locvar->fds_length; i++) {
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].events)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) tp_locvar->fds[i].events)
+					}
+				)
+			)
+		)
+		sc_out(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+					unsigned int nr = 0;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					/*
+					 * Iterate over the complete array, but only output
+					 * "ret" active FDs.
+					 */
+					for (i = 0; i < tp_locvar->fds_max_len; i++) {
+						if (!tp_locvar->fds[i].revents)
+							continue;
+						if (++nr > tp_locvar->fds_length)
+							break;
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].revents)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) tp_locvar->fds[i].revents)
+					}
+					/*
+					 * If there is a discrepancy between ret and the
+					 * content of revents (e.g. caused by userspace corrupting
+					 * the array from a concurrent thread), we have to output
+					 * zeros to keep the trace readable.
+					 */
+					for (i = nr; i < tp_locvar->fds_length - nr; i++) {
+						ctf_integer_type(int, 0)
+						ctf_integer_type(short, 0)
+						ctf_integer_bitfield_type(uint8_t, 0)
+					}
+				)
+			)
+		)
+	),
+	TP_code_post(
+		kfree(tp_locvar->fds);
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
+
 #endif /* CREATE_SYSCALL_TABLE */
-- 
1.9.1



More information about the lttng-dev mailing list