[lttng-dev] [PATCH lttng-modules v5 2/5] Extract the FD sets in select and pselect6

Julien Desfossez jdesfossez at efficios.com
Sat Apr 23 00:16:49 UTC 2016


Instead of extracting the user-space pointers of the 3 fd_set, we now
extract the bitmask of the FDs in the sets (in, out, ex) in the form of
an array of uint8_t (1024 FDs is the limit in the kernel).

In this example, we select in input FDs 5 to 19 (0xFFFF0), it returns
that one FD is ready: FD 12 (0x1000).

syscall_entry_select: {
  n = 20,
  _fdset_in_length = 3, fdset_in = [ [0] = 0xF0, [1] = 0xFF, [2] = 0xF ],
  _fdset_out_length = 0, fdset_out = [ ],
  _fdset_ex_length = 0, fdset_ex = [ ],
  tvp = 0
 }

syscall_exit_select: {
  ret = 1,
  _fdset_in_length = 3, fdset_in = [ [0] = 0x0, [1] = 0x10, [2] = 0x0 ],
  _fdset_out_length = 0, fdset_out = [ ],
  _fdset_ex_length = 0, fdset_ex = [ ],
  tvp = 0
}

Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
---
 .../syscalls/headers/syscalls_pointers_override.h  | 510 +++++++++++++++++++++
 1 file changed, 510 insertions(+)

diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h b/instrumentation/syscalls/headers/syscalls_pointers_override.h
index bf5c632..ef4dc1c 100644
--- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
+++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
@@ -53,4 +53,514 @@ SC_LTTNG_TRACEPOINT_EVENT(pipe2,
 	)
 )
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
+#define OVERRIDE_32_select
+#define OVERRIDE_64_select
+SC_LTTNG_TRACEPOINT_EVENT_CODE(select,
+	TP_PROTO(sc_exit(long ret,) int n, fd_set __user *inp, fd_set __user *outp,
+		fd_set __user *exp, struct timeval *tvp),
+	TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp),
+	TP_locvar(
+		unsigned long *fds_in, *fds_out, *fds_ex;
+		unsigned long nr_bytes, nr_ulong;
+		unsigned int overflow;
+	),
+	TP_code_pre(
+		sc_inout(
+		{
+			int err;
+
+			tp_locvar->fds_in = NULL;
+			tp_locvar->fds_out = NULL;
+			tp_locvar->fds_ex = NULL;
+
+			tp_locvar->overflow = 0;
+
+			sc_out(
+				if (ret <= 0)
+					goto error;
+			)
+
+			if (n <= 0)
+				goto error;
+
+			/* Limit atomic memory allocation to one page */
+			if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
+				tp_locvar->nr_bytes = PAGE_SIZE;
+				tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
+				/* Inform the user that we did not output everything. */
+				tp_locvar->overflow = 1;
+			} else {
+				tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
+				tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
+					BITS_PER_BYTE * sizeof(unsigned long));
+			}
+			/* On error or bogus input, don't copy anything. */
+			if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
+				goto error;
+			}
+
+			if (inp) {
+				tp_locvar->fds_in = kmalloc(
+						tp_locvar->nr_ulong * sizeof(unsigned long),
+						GFP_ATOMIC);
+				if (!tp_locvar->fds_in)
+					goto error;
+
+				err = lib_ring_buffer_copy_from_user_check_nofault(
+						tp_locvar->fds_in, inp,
+						tp_locvar->nr_ulong * sizeof(unsigned long));
+				if (err != 0)
+					goto error;
+			}
+			if (outp) {
+				tp_locvar->fds_out = kmalloc(
+						tp_locvar->nr_ulong * sizeof(unsigned long),
+						GFP_ATOMIC);
+				if (!tp_locvar->fds_out)
+					goto error;
+
+				err = lib_ring_buffer_copy_from_user_check_nofault(
+						tp_locvar->fds_out, outp,
+						tp_locvar->nr_ulong * sizeof(unsigned long));
+				if (err != 0)
+					goto error;
+			}
+			if (exp) {
+				tp_locvar->fds_ex = kmalloc(
+						tp_locvar->nr_ulong * sizeof(unsigned long),
+						GFP_ATOMIC);
+				if (!tp_locvar->fds_ex)
+					goto error;
+
+				err = lib_ring_buffer_copy_from_user_check_nofault(
+						tp_locvar->fds_ex, exp,
+						tp_locvar->nr_ulong * sizeof(unsigned long));
+				if (err != 0)
+					goto error;
+			}
+			goto end;
+
+	error:
+		tp_locvar->nr_bytes = 0;
+		tp_locvar->nr_ulong = 0;
+	end:
+		/* bypass error: label at end of compound statement */
+		;
+		}
+		)
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(int, n, n))
+		sc_inout(ctf_integer(int, overflow, tp_locvar->overflow))
+		sc_inout(ctf_integer(struct timeval *, tvp, tvp))
+
+		sc_inout(
+			/* inp */
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence,
+					.u.sequence.length_type = __type_integer(
+						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
+					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
+						__BYTE_ORDER, 16, none),
+				),
+				readfds,
+				ctf_custom_code(
+					unsigned int src;
+					unsigned int nr_bytes_out = 0;
+
+					if (inp) {
+						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
+						ctf_align(uint8_t)
+					} else {
+						ctf_integer_type(uint16_t, 0)
+						ctf_align(uint8_t)
+						goto skip_inp;
+					}
+
+					for (src = 0; src < tp_locvar->nr_ulong; src++) {
+						int dst;
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+						for (dst = 0; dst < sizeof(long); dst++) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (inp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_inp;
+							}
+						}
+#else
+						for (dst = sizeof(long); dst >= 0; dst--) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (inp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_inp;
+							}
+						}
+#endif
+					}
+				skip_inp:
+					;
+				)
+			)
+			/* outp */
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence,
+					.u.sequence.length_type = __type_integer(
+						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
+					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
+						__BYTE_ORDER, 16, none),
+				),
+				writefds,
+				ctf_custom_code(
+					unsigned int src;
+					unsigned int nr_bytes_out = 0;
+
+					if (outp) {
+						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
+						ctf_align(uint8_t)
+					} else {
+						ctf_integer_type(uint16_t, 0)
+						ctf_align(uint8_t)
+						goto skip_outp;
+					}
+
+					for (src = 0; src < tp_locvar->nr_ulong; src++) {
+						int dst;
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+						for (dst = 0; dst < sizeof(long); dst++) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (outp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_outp;
+							}
+						}
+#else
+						for (dst = sizeof(long); dst >= 0; dst--) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (outp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_outp;
+							}
+						}
+#endif
+					}
+				skip_outp:
+					;
+				)
+			)
+			/* exp */
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence,
+					.u.sequence.length_type = __type_integer(
+						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
+					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
+						__BYTE_ORDER, 16, none),
+				),
+				exceptfds,
+				ctf_custom_code(
+					unsigned int src;
+					unsigned int nr_bytes_out = 0;
+
+					if (exp) {
+						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
+						ctf_align(uint8_t)
+					} else {
+						ctf_integer_type(uint16_t, 0)
+						ctf_align(uint8_t)
+						goto skip_exp;
+					}
+
+					for (src = 0; src < tp_locvar->nr_ulong; src++) {
+						int dst;
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+						for (dst = 0; dst < sizeof(long); dst++) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (exp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_exp;
+							}
+						}
+#else
+						for (dst = sizeof(long); dst >= 0; dst--) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (exp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_exp;
+							}
+						}
+#endif
+					}
+				skip_exp:
+					;
+				)
+			)
+		)
+	),
+	TP_code_post(
+		kfree(tp_locvar->fds_in);
+		kfree(tp_locvar->fds_out);
+		kfree(tp_locvar->fds_ex);
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+#define OVERRIDE_32_pselect6
+#define OVERRIDE_64_pselect6
+SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
+	TP_PROTO(sc_exit(long ret,) int n, fd_set __user * inp, fd_set __user * outp,
+		fd_set __user * exp, struct timeval * tvp, void * sig),
+	TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp, sig),
+	TP_locvar(
+		unsigned long *fds_in, *fds_out, *fds_ex;
+		unsigned long nr_bytes, nr_ulong;
+		unsigned int overflow;
+	),
+	TP_code_pre(
+		sc_inout(
+		{
+			int err;
+
+			tp_locvar->fds_in = NULL;
+			tp_locvar->fds_out = NULL;
+			tp_locvar->fds_ex = NULL;
+
+			tp_locvar->overflow = 0;
+
+			sc_out(
+				if (ret <= 0)
+					goto error;
+			)
+
+			if (n <= 0)
+				goto error;
+
+			/* Limit atomic memory allocation to one page */
+			if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
+				tp_locvar->nr_bytes = PAGE_SIZE;
+				tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
+				/* Inform the user that we did not output everything. */
+				tp_locvar->overflow = 1;
+			} else {
+				tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
+				tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
+					BITS_PER_BYTE * sizeof(unsigned long));
+			}
+			/* On error or bogus input, don't copy anything. */
+			if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
+				goto error;
+			}
+
+			if (inp) {
+				tp_locvar->fds_in = kmalloc(
+						tp_locvar->nr_ulong * sizeof(unsigned long),
+						GFP_ATOMIC);
+				if (!tp_locvar->fds_in)
+					goto error;
+
+				err = lib_ring_buffer_copy_from_user_check_nofault(
+						tp_locvar->fds_in, inp,
+						tp_locvar->nr_ulong * sizeof(unsigned long));
+				if (err != 0)
+					goto error;
+			}
+			if (outp) {
+				tp_locvar->fds_out = kmalloc(
+						tp_locvar->nr_ulong * sizeof(unsigned long),
+						GFP_ATOMIC);
+				if (!tp_locvar->fds_out)
+					goto error;
+
+				err = lib_ring_buffer_copy_from_user_check_nofault(
+						tp_locvar->fds_out, outp,
+						tp_locvar->nr_ulong * sizeof(unsigned long));
+				if (err != 0)
+					goto error;
+			}
+			if (exp) {
+				tp_locvar->fds_ex = kmalloc(
+						tp_locvar->nr_ulong * sizeof(unsigned long),
+						GFP_ATOMIC);
+				if (!tp_locvar->fds_ex)
+					goto error;
+
+				err = lib_ring_buffer_copy_from_user_check_nofault(
+						tp_locvar->fds_ex, exp,
+						tp_locvar->nr_ulong * sizeof(unsigned long));
+				if (err != 0)
+					goto error;
+			}
+			goto end;
+
+	error:
+		tp_locvar->nr_bytes = 0;
+		tp_locvar->nr_ulong = 0;
+	end:
+		/* bypass error: label at end of compound statement */
+		;
+		}
+		)
+	),
+	TP_FIELDS(
+		sc_exit(ctf_integer(long, ret, ret))
+		sc_in(ctf_integer(int, n, n))
+		sc_in(ctf_integer(int, overflow, tp_locvar->overflow))
+		sc_inout(ctf_integer(struct timeval *, tvp, tvp))
+
+		sc_inout(
+			/* inp */
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence,
+					.u.sequence.length_type = __type_integer(
+						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
+					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
+						__BYTE_ORDER, 16, none),
+				),
+				readfds,
+				ctf_custom_code(
+					unsigned int src;
+					unsigned int nr_bytes_out = 0;
+
+					if (inp) {
+						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
+						ctf_align(uint8_t)
+					} else {
+						ctf_integer_type(uint16_t, 0)
+						ctf_align(uint8_t)
+						goto skip_inp;
+					}
+
+					for (src = 0; src < tp_locvar->nr_ulong; src++) {
+						int dst;
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+						for (dst = 0; dst < sizeof(long); dst++) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (inp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_inp;
+							}
+						}
+#else
+						for (dst = sizeof(long); dst >= 0; dst--) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (inp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_inp;
+							}
+						}
+#endif
+					}
+				skip_inp:
+					;
+				)
+			)
+			/* outp */
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence,
+					.u.sequence.length_type = __type_integer(
+						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
+					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
+						__BYTE_ORDER, 16, none),
+				),
+				writefds,
+				ctf_custom_code(
+					unsigned int src;
+					unsigned int nr_bytes_out = 0;
+
+					if (outp) {
+						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
+						ctf_align(uint8_t)
+					} else {
+						ctf_integer_type(uint16_t, 0)
+						ctf_align(uint8_t)
+						goto skip_outp;
+					}
+
+					for (src = 0; src < tp_locvar->nr_ulong; src++) {
+						int dst;
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+						for (dst = 0; dst < sizeof(long); dst++) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (outp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_outp;
+							}
+						}
+#else
+						for (dst = sizeof(long); dst >= 0; dst--) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (outp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_outp;
+							}
+						}
+#endif
+					}
+				skip_outp:
+					;
+				)
+			)
+			/* exp */
+			ctf_custom_field(
+				ctf_custom_type(
+					.atype = atype_sequence,
+					.u.sequence.length_type = __type_integer(
+						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
+					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
+						__BYTE_ORDER, 16, none),
+				),
+				exceptfds,
+				ctf_custom_code(
+					unsigned int src;
+					unsigned int nr_bytes_out = 0;
+
+					if (exp) {
+						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
+						ctf_align(uint8_t)
+					} else {
+						ctf_integer_type(uint16_t, 0)
+						ctf_align(uint8_t)
+						goto skip_exp;
+					}
+
+					for (src = 0; src < tp_locvar->nr_ulong; src++) {
+						int dst;
+#if (__BYTE_ORDER == __LITTLE_ENDIAN)
+						for (dst = 0; dst < sizeof(long); dst++) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (exp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_exp;
+							}
+						}
+#else
+						for (dst = sizeof(long); dst >= 0; dst--) {
+							ctf_user_integer_type(uint8_t,
+								((uint8_t __user *) (exp->fds_bits + src))[dst]);
+							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
+								goto skip_exp;
+							}
+						}
+#endif
+					}
+				skip_exp:
+					;
+				)
+			)
+		)
+	),
+	TP_code_post(
+		kfree(tp_locvar->fds_in);
+		kfree(tp_locvar->fds_out);
+		kfree(tp_locvar->fds_ex);
+	)
+)
+#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
+
 #endif /* CREATE_SYSCALL_TABLE */
-- 
1.9.1



More information about the lttng-dev mailing list