[lttng-dev] [PATCH lttng-modules v7 2/5] Extract the FD sets in select and pselect6

Julien Desfossez jdesfossez at efficios.com
Sat Apr 30 15:09:27 UTC 2016


Instead of extracting the user-space pointers of the 3 fd_set, we now
extract the bitmask of the FDs in the sets (in, out, ex) in the form of
an array of uint8_t (1024 FDs is the limit in the kernel).

In this example, we select in input FDs 5 to 19 (0xFFFF0), it returns
that one FD is ready: FD 12 (0x1000).

syscall_entry_select: {
  n = 20,
  _fdset_in_length = 3, fdset_in = [ [0] = 0xF0, [1] = 0xFF, [2] = 0xF ],
  _fdset_out_length = 0, fdset_out = [ ],
  _fdset_ex_length = 0, fdset_ex = [ ],
  tvp = 0
 }

syscall_exit_select: {
  ret = 1,
  _fdset_in_length = 3, fdset_in = [ [0] = 0x0, [1] = 0x10, [2] = 0x0 ],
  _fdset_out_length = 0, fdset_out = [ ],
  _fdset_ex_length = 0, fdset_ex = [ ],
  tvp = 0
}

Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
---
 .../syscalls/headers/syscalls_pointers_override.h  | 248 +++++++++++++++++++++
 1 file changed, 248 insertions(+)

diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h b/instrumentation/syscalls/headers/syscalls_pointers_override.h
index bf5c632..b9dd54a 100644
--- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
+++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
@@ -53,4 +53,252 @@ SC_LTTNG_TRACEPOINT_EVENT(pipe2,
 	)
 )
 
+#define LTTNG_SYSCALL_SELECT_locvar			\
+	unsigned long *fds_in, *fds_out, *fds_ex;	\
+	unsigned long nr_bytes, nr_ulong; 		\
+	uint8_t overflow;
+
+#define LTTNG_SYSCALL_SELECT_code_pre							\
+	sc_inout(									\
+	{										\
+		int err;								\
+		unsigned int n_in_bytes;						\
+											\
+		tp_locvar->fds_in = NULL;						\
+		tp_locvar->fds_out = NULL;						\
+		tp_locvar->fds_ex = NULL;						\
+		tp_locvar->overflow = 0;						\
+											\
+		sc_out(									\
+			if (ret <= 0)							\
+				goto error;						\
+		)									\
+											\
+		if (n <= 0)								\
+			goto error;							\
+											\
+		/* On error or bogus input, don't copy anything. */			\
+		if (n >__FD_SETSIZE)							\
+			goto error;							\
+											\
+		n_in_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);		\
+											\
+		/*									\
+		 * Limit atomic memory allocation to one page, since n			\
+		 * is limited to 1024 and the smallest page size on Linux		\
+		 * is 4k, this should not happen, don't try to make it work.		\
+		 */									\
+		if (n_in_bytes > PAGE_SIZE) {						\
+			WARN_ON_ONCE(1);						\
+			/* Inform the user that we did not output everything. */	\
+			tp_locvar->overflow = 1;					\
+			goto error;							\
+		} else {								\
+			tp_locvar->nr_bytes = n_in_bytes;				\
+			tp_locvar->nr_ulong = DIV_ROUND_UP(n_in_bytes,			\
+					sizeof(unsigned long));				\
+		}									\
+											\
+		if (inp) {								\
+			tp_locvar->fds_in = kmalloc(					\
+					tp_locvar->nr_ulong * sizeof(unsigned long),	\
+					GFP_ATOMIC | GFP_NOWAIT);			\
+			if (!tp_locvar->fds_in)						\
+				goto error;						\
+											\
+			err = lib_ring_buffer_copy_from_user_check_nofault(		\
+					tp_locvar->fds_in, inp,				\
+					tp_locvar->nr_ulong * sizeof(unsigned long));	\
+			if (err != 0)							\
+				goto error;						\
+		}									\
+		if (outp) {								\
+			tp_locvar->fds_out = kmalloc(					\
+					tp_locvar->nr_ulong * sizeof(unsigned long),	\
+					GFP_ATOMIC | GFP_NOWAIT);			\
+			if (!tp_locvar->fds_out)					\
+				goto error;						\
+											\
+			err = lib_ring_buffer_copy_from_user_check_nofault(		\
+					tp_locvar->fds_out, outp,			\
+					tp_locvar->nr_ulong * sizeof(unsigned long));	\
+			if (err != 0)							\
+				goto error;						\
+		}									\
+		if (exp) {								\
+			tp_locvar->fds_ex = kmalloc(					\
+					tp_locvar->nr_ulong * sizeof(unsigned long),	\
+					GFP_ATOMIC | GFP_NOWAIT);			\
+			if (!tp_locvar->fds_ex)						\
+				goto error;						\
+											\
+			err = lib_ring_buffer_copy_from_user_check_nofault(		\
+					tp_locvar->fds_ex, exp,				\
+					tp_locvar->nr_ulong * sizeof(unsigned long));	\
+			if (err != 0)							\
+				goto error;						\
+		}									\
+		goto end;								\
+											\
+error:											\
+	tp_locvar->nr_bytes = 0;							\
+	tp_locvar->nr_ulong = 0;							\
+end:	; /* Label at end of compound statement. */					\
+	}										\
+	)
+
+#define LTTNG_SYSCALL_SELECT_fds_field_LE(name, input)							\
+	ctf_custom_field(										\
+		ctf_custom_type(									\
+			.atype = atype_sequence,							\
+			.u.sequence.length_type = __type_integer(					\
+				uint8_t, 0, 0, 0, __BYTE_ORDER, 10, none),				\
+			.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,			\
+				__BYTE_ORDER, 16, none),						\
+		),											\
+		name,											\
+		ctf_custom_code(									\
+			unsigned int src;								\
+			unsigned int nr_bytes_out = 0;							\
+													\
+			if (input) {									\
+				ctf_integer_type(uint8_t, tp_locvar->nr_bytes)				\
+				ctf_align(uint8_t)							\
+			} else {									\
+				ctf_integer_type(uint8_t, 0)						\
+				ctf_align(uint8_t)							\
+				goto skip_##name;							\
+			}										\
+													\
+			for (src = 0; src < tp_locvar->nr_ulong; src++) {				\
+				int dst;								\
+				for (dst = 0; dst < sizeof(long); dst++) {				\
+					if (nr_bytes_out++ >= tp_locvar->nr_bytes) {			\
+						goto skip_##name;					\
+					}								\
+					ctf_user_integer_type(uint8_t,					\
+						((uint8_t __user *) (input->fds_bits + src))[dst]);	\
+				}									\
+			}										\
+		skip_##name:   ;									\
+		)											\
+	)
+
+#define LTTNG_SYSCALL_SELECT_fds_field_BE(name, input)							\
+	ctf_custom_field(										\
+		ctf_custom_type(									\
+			.atype = atype_sequence,							\
+			.u.sequence.length_type = __type_integer(					\
+				uint8_t, 0, 0, 0, __BYTE_ORDER, 10, none),				\
+			.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,			\
+				__BYTE_ORDER, 16, none),						\
+		),											\
+		name,											\
+		ctf_custom_code(									\
+			unsigned int src;								\
+			unsigned int nr_bytes_out = 0;							\
+													\
+			if (input) {									\
+				ctf_integer_type(uint8_t, tp_locvar->nr_bytes)				\
+				ctf_align(uint8_t)							\
+			} else {									\
+				ctf_integer_type(uint8_t, 0)						\
+				ctf_align(uint8_t)							\
+				goto skip_##name;							\
+			}										\
+													\
+			for (src = 0; src < tp_locvar->nr_ulong; src++) {				\
+				int dst;								\
+				for (dst = sizeof(long); dst >= 0; dst--) {				\
+					if (nr_bytes_out++ >= tp_locvar->nr_bytes) {			\
+						goto skip_##name;					\
+					}								\
+					ctf_user_integer_type(uint8_t,					\
+						((uint8_t __user *) (input->fds_bits + src))[dst]);	\
+				}									\
+			}										\
+		skip_##name:   ;									\
+		)											\
+	)
+
+#define LTTNG_SYSCALL_SELECT_code_post	\
+	kfree(tp_locvar->fds_in);	\
+	kfree(tp_locvar->fds_out);	\
+	kfree(tp_locvar->fds_ex);
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
+#define OVERRIDE_32_select
+#define OVERRIDE_64_select
+SC_LTTNG_TRACEPOINT_EVENT_CODE(select,
+	TP_PROTO(sc_exit(long ret,) int n, fd_set __user *inp, fd_set __user *outp,
+		fd_set __user *exp, struct timeval *tvp),
+	TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp),
+	TP_locvar(
+		LTTNG_SYSCALL_SELECT_locvar
+	),
+	TP_code_pre(
+		LTTNG_SYSCALL_SELECT_code_pre
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(int, n, n))
+		sc_inout(ctf_integer(uint8_t, overflow, tp_locvar->overflow))
+		sc_inout(ctf_integer(struct timeval *, tvp, tvp))
+
+		sc_inout(
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+			LTTNG_SYSCALL_SELECT_fds_field_LE(readfds, inp)
+			LTTNG_SYSCALL_SELECT_fds_field_LE(writefds, outp)
+			LTTNG_SYSCALL_SELECT_fds_field_LE(exceptfds, exp)
+#else
+			LTTNG_SYSCALL_SELECT_fds_field_BE(readfds, inp)
+			LTTNG_SYSCALL_SELECT_fds_field_BE(writefds, outp)
+			LTTNG_SYSCALL_SELECT_fds_field_BE(exceptfds, exp)
+#endif
+		)
+	),
+	TP_code_post(
+		LTTNG_SYSCALL_SELECT_code_post
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+#define OVERRIDE_32_pselect6
+#define OVERRIDE_64_pselect6
+SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
+	TP_PROTO(sc_exit(long ret,) int n, fd_set __user * inp, fd_set __user * outp,
+		fd_set __user * exp, struct timeval __user * tvp, void __user * sig),
+	TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp, sig),
+	TP_locvar(
+		LTTNG_SYSCALL_SELECT_locvar
+	),
+	TP_code_pre(
+		LTTNG_SYSCALL_SELECT_code_pre
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(int, n, n))
+		sc_inout(ctf_integer(uint8_t, overflow, tp_locvar->overflow))
+		sc_inout(ctf_integer(struct timeval *, tvp, tvp))
+		sc_in(ctf_integer_hex(void *, sig, sig))
+
+		sc_inout(
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+			LTTNG_SYSCALL_SELECT_fds_field_LE(readfds, inp)
+			LTTNG_SYSCALL_SELECT_fds_field_LE(writefds, outp)
+			LTTNG_SYSCALL_SELECT_fds_field_LE(exceptfds, exp)
+#else
+			LTTNG_SYSCALL_SELECT_fds_field_BE(readfds, inp)
+			LTTNG_SYSCALL_SELECT_fds_field_BE(writefds, outp)
+			LTTNG_SYSCALL_SELECT_fds_field_BE(exceptfds, exp)
+#endif
+		)
+	),
+	TP_code_post(
+		LTTNG_SYSCALL_SELECT_code_post
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
+
 #endif /* CREATE_SYSCALL_TABLE */
-- 
1.9.1



More information about the lttng-dev mailing list