[lttng-dev] [PATCH lttng-modules v4 3/5] Extract the FDs and flags from poll and ppoll

Julien Desfossez jdesfossez at efficios.com
Fri Apr 15 20:37:19 UTC 2016


Instead of printing the pointer address of the poll set, extract all the
FDs and flags from the poll set. For now, we only output the
standardized set of events to limit the verbosity of the output, we also
extract the raw value. When we switch to CTF2 we will be able to hide
unset fields and then we will extract all the fields.

Here is an example of output with one FD:
syscall_entry_poll: {
  timeout_msecs = -1, nfds = 1, fds_length = 1,
  fds = [
    [0] = { fd = 4, raw_events = 0x5, events = { POLLIN = 1, POLLPRI = 0,
            POLLOUT = 1, POLLERR = 0, POLLHUP = 0, padding = 0 } } ]
}

syscall_exit_poll: {
  ret = 1, nfds = 1, fds_length = 1,
  fds = [ [0] = { fd = 4, raw_events = 0x4, events = { POLLIN = 0,
  POLLPRI = 0, POLLOUT = 1, POLLERR = 0, POLLHUP = 0, padding = 0 } } ] }

Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
---
 .../syscalls/headers/syscalls_pointers_override.h  | 320 +++++++++++++++++++++
 1 file changed, 320 insertions(+)

diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h b/instrumentation/syscalls/headers/syscalls_pointers_override.h
index debd4b2..a312036 100644
--- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
+++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
@@ -340,4 +340,324 @@ SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
 )
 #endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
 
+#ifndef ONCE_LTTNG_TRACE_POLL_H
+#define ONCE_LTTNG_TRACE_POLL_H
+
+#define LTTNG_POLL_NRFLAGS (POLLNVAL + 1)
+#define POLL_FLAGS_PADDING_SIZE (sizeof(uint8_t) * BITS_PER_BYTE) - \
+	ilog2(LTTNG_POLL_NRFLAGS - 1)
+
+/*
+ * Only extract the values specified by iBCS2 for now.
+ */
+static struct lttng_event_field lttng_pollfd_flag_fields[] = {
+	[ilog2(POLLIN)] = {
+		.name = "POLLIN",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLPRI)] = {
+		.name = "POLLPRI",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLOUT)] = {
+		.name = "POLLOUT",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLERR)] = {
+		.name = "POLLERR",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLHUP)] = {
+		.name = "POLLHUP",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(POLLNVAL)] = {
+		.name = "POLLNVAL",
+		.type = __type_integer(int, 1, 1, 0, __LITTLE_ENDIAN, 10, none),
+	},
+	[ilog2(LTTNG_POLL_NRFLAGS)] = {
+		.name = "padding",
+		.type = __type_integer(int, POLL_FLAGS_PADDING_SIZE, 1, 0,
+				__LITTLE_ENDIAN, 10, none),
+	},
+};
+
+static struct lttng_event_field lttng_pollfd_fields[] = {
+	[0] = {
+		.name = "fd",
+		.type = __type_integer(int, 0, 0, 0, __BYTE_ORDER, 10, none),
+	},
+	[1] = {
+		.name = "raw_events",
+		.type = __type_integer(short, 0, 0, 0, __BYTE_ORDER, 16, none),
+	},
+	[2] = {
+		.name = "events",
+		.type = {
+			.atype = atype_struct,
+			.u._struct.nr_fields = ARRAY_SIZE(lttng_pollfd_flag_fields),
+			.u._struct.fields = lttng_pollfd_flag_fields,
+		}
+	},
+};
+
+static struct lttng_type lttng_pollfd_elem = {
+	.atype = atype_struct,
+	.u._struct.nr_fields = ARRAY_SIZE(lttng_pollfd_fields),
+	.u._struct.fields = lttng_pollfd_fields,
+};
+#endif /* ONCE_LTTNG_TRACE_POLL_H */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
+#define OVERRIDE_32_poll
+#define OVERRIDE_64_poll
+SC_LTTNG_TRACEPOINT_EVENT_CODE(poll,
+	TP_PROTO(sc_exit(long ret,) struct pollfd __user * ufds,
+		unsigned int nfds, int timeout_msecs),
+	TP_ARGS(sc_exit(ret,) ufds, nfds, timeout_msecs),
+	TP_locvar(
+		unsigned int fds_length, fds_max_len;
+		struct pollfd *fds;
+	),
+	TP_code_pre(
+		BUILD_BUG_ON(((ARRAY_SIZE(lttng_pollfd_flag_fields) - 1) +
+				POLL_FLAGS_PADDING_SIZE) !=
+					sizeof(uint8_t) * BITS_PER_BYTE);
+		sc_in(
+			tp_locvar->fds_length = nfds;
+		)
+		/*
+		 * On exit, the number of active FDs is determined by ret,
+		 * nfds stays the same as the entry, but we only want to
+		 * output the FDs that are relevant.
+		 */
+		sc_out(
+			if (ret > 0) {
+				tp_locvar->fds_length = ret;
+				tp_locvar->fds_max_len = nfds;
+			} else {
+				tp_locvar->fds_length = 0;
+				tp_locvar->fds_max_len = 0;
+			}
+		)
+		sc_inout({
+			int err;
+
+			tp_locvar->fds = kmalloc(nfds * sizeof(struct pollfd), GFP_KERNEL);
+			if (!tp_locvar->fds) {
+				tp_locvar->fds_length = 0;
+				tp_locvar->fds_max_len = 0;
+				goto skip_code;
+			}
+			err = copy_from_user(tp_locvar->fds, ufds,
+				nfds * sizeof(struct pollfd));
+			if (err) {
+				tp_locvar->fds_length = 0;
+				tp_locvar->fds_max_len = 0;
+			}
+		}
+		skip_code:
+		)
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(int, timeout_msecs, timeout_msecs))
+		sc_inout(ctf_integer(unsigned int, nfds, nfds))
+		sc_inout(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
+		sc_in(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					for (i = 0; i < tp_locvar->fds_length; i++) {
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].events)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) cpu_to_le16(tp_locvar->fds[i].events))
+					}
+				)
+			)
+		)
+		sc_out(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+					int nr = 0;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					/*
+					 * Iterate over the complete array, but only output
+					 * "ret" active FDs.
+					 */
+					for (i = 0; i < tp_locvar->fds_max_len; i++) {
+						if (!tp_locvar->fds[i].revents)
+							continue;
+						if (++nr > tp_locvar->fds_length)
+							break;
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].revents)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) cpu_to_le16(tp_locvar->fds[i].revents))
+					}
+					/*
+					 * If there is a discrepancy between ret and the
+					 * content of revents (e.g. caused by userspace corrupting
+					 * the array from a concurrent thread), we have to output
+					 * zeros to keep the trace readable.
+					 */
+					for (i = nr; i < tp_locvar->fds_length - nr; i++) {
+						ctf_integer_type(int, 0)
+						ctf_integer_type(short, 0)
+						ctf_integer_bitfield_type(uint8_t, 0)
+					}
+				)
+			)
+		)
+	),
+	TP_code_post(
+		kfree(tp_locvar->fds);
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+#define OVERRIDE_32_ppoll
+#define OVERRIDE_64_ppoll
+SC_LTTNG_TRACEPOINT_EVENT_CODE(ppoll,
+	TP_PROTO(sc_exit(long ret,) struct pollfd __user * ufds,
+		unsigned int nfds, struct timespec * tsp, const sigset_t * sigmask, size_t sigsetsize),
+	TP_ARGS(sc_exit(ret,) ufds, nfds, tsp, sigmask, sigsetsize),
+	TP_locvar(
+		unsigned int fds_length, fds_max_len;
+		struct pollfd *fds;
+	),
+	TP_code_pre(
+		BUILD_BUG_ON(((ARRAY_SIZE(lttng_pollfd_flag_fields) - 1) +
+				POLL_FLAGS_PADDING_SIZE) !=
+					sizeof(uint8_t) * BITS_PER_BYTE);
+		sc_in(
+			tp_locvar->fds_length = nfds;
+		)
+		/*
+		 * On exit, the number of active FDs is determined by ret,
+		 * nfds stays the same as the entry, but we only want to
+		 * output the FDs that are relevant.
+		 */
+		sc_out(
+			if (ret > 0) {
+				tp_locvar->fds_length = ret;
+				tp_locvar->fds_max_len = nfds;
+			} else {
+				tp_locvar->fds_length = 0;
+				tp_locvar->fds_max_len = 0;
+			}
+		)
+		sc_inout({
+			int err;
+
+			tp_locvar->fds = kmalloc(nfds * sizeof(struct pollfd), GFP_KERNEL);
+			if (!tp_locvar->fds) {
+				tp_locvar->fds_length = 0;
+				tp_locvar->fds_max_len = 0;
+				goto skip_code;
+			}
+			err = copy_from_user(tp_locvar->fds, ufds,
+				nfds * sizeof(struct pollfd));
+			if (err) {
+				tp_locvar->fds_length = 0;
+				tp_locvar->fds_max_len = 0;
+			}
+		}
+		skip_code:
+		)
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(struct timespec *, tsp, tsp))
+		sc_in(ctf_integer(const sigset_t *, sigmask, sigmask))
+		sc_in(ctf_integer(size_t, sigsetsize, sigsetsize))
+		sc_inout(ctf_integer(unsigned int, nfds, nfds))
+		sc_inout(ctf_integer(unsigned int, fds_length, tp_locvar->fds_length))
+		sc_in(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					for (i = 0; i < tp_locvar->fds_length; i++) {
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].events)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) cpu_to_le16(tp_locvar->fds[i].events))
+					}
+				)
+			)
+		)
+		sc_out(
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence_compound,
+					.u.sequence_compound.length_name = "fds_length",
+					.u.sequence_compound.elem_type = &lttng_pollfd_elem,
+				),
+				fds,
+				ctf_custom_code(
+					uint32_t i;
+					int nr = 0;
+
+					ctf_align(int) /* Align on largest field in struct. */
+					/*
+					 * Iterate over the complete array, but only output
+					 * "ret" active FDs.
+					 */
+					for (i = 0; i < tp_locvar->fds_max_len; i++) {
+						if (!tp_locvar->fds[i].revents)
+							continue;
+						if (++nr > tp_locvar->fds_length)
+							break;
+						ctf_integer_type(int, tp_locvar->fds[i].fd)
+						ctf_integer_type(short, tp_locvar->fds[i].revents)
+						ctf_integer_bitfield_type(uint8_t,
+							(uint8_t) cpu_to_le16(tp_locvar->fds[i].revents))
+					}
+					/*
+					 * If there is a discrepancy between ret and the
+					 * content of revents (e.g. caused by userspace corrupting
+					 * the array from a concurrent thread), we have to output
+					 * zeros to keep the trace readable.
+					 */
+					for (i = nr; i < tp_locvar->fds_length - nr; i++) {
+						ctf_integer_type(int, 0)
+						ctf_integer_type(short, 0)
+						ctf_integer_bitfield_type(uint8_t, 0)
+					}
+				)
+			)
+		)
+	),
+	TP_code_post(
+		kfree(tp_locvar->fds);
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
+
 #endif /* CREATE_SYSCALL_TABLE */
-- 
1.9.1



More information about the lttng-dev mailing list