[lttng-dev] [PATCH lttng-modules v5 2/5] Extract the FD sets in select and pselect6

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Fri Apr 29 18:13:23 UTC 2016


----- On Apr 22, 2016, at 8:16 PM, Julien Desfossez jdesfossez at efficios.com wrote:

> Instead of extracting the user-space pointers of the 3 fd_set, we now
> extract the bitmask of the FDs in the sets (in, out, ex) in the form of
> an array of uint8_t (1024 FDs is the limit in the kernel).
> 
> In this example, we select in input FDs 5 to 19 (0xFFFF0), it returns
> that one FD is ready: FD 12 (0x1000).
> 
> syscall_entry_select: {
>  n = 20,
>  _fdset_in_length = 3, fdset_in = [ [0] = 0xF0, [1] = 0xFF, [2] = 0xF ],
>  _fdset_out_length = 0, fdset_out = [ ],
>  _fdset_ex_length = 0, fdset_ex = [ ],
>  tvp = 0
> }
> 
> syscall_exit_select: {
>  ret = 1,
>  _fdset_in_length = 3, fdset_in = [ [0] = 0x0, [1] = 0x10, [2] = 0x0 ],
>  _fdset_out_length = 0, fdset_out = [ ],
>  _fdset_ex_length = 0, fdset_ex = [ ],
>  tvp = 0
> }
> 
> Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
> ---
> .../syscalls/headers/syscalls_pointers_override.h  | 510 +++++++++++++++++++++
> 1 file changed, 510 insertions(+)
> 
> diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> index bf5c632..ef4dc1c 100644
> --- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> +++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> @@ -53,4 +53,514 @@ SC_LTTNG_TRACEPOINT_EVENT(pipe2,
> 	)
> )
> 
> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
> +#define OVERRIDE_32_select
> +#define OVERRIDE_64_select
> +SC_LTTNG_TRACEPOINT_EVENT_CODE(select,
> +	TP_PROTO(sc_exit(long ret,) int n, fd_set __user *inp, fd_set __user *outp,
> +		fd_set __user *exp, struct timeval *tvp),
> +	TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp),
> +	TP_locvar(
> +		unsigned long *fds_in, *fds_out, *fds_ex;
> +		unsigned long nr_bytes, nr_ulong;
> +		unsigned int overflow;

Change for uint8_t for overflow.

> +	),
> +	TP_code_pre(
> +		sc_inout(
> +		{
> +			int err;
> +
> +			tp_locvar->fds_in = NULL;
> +			tp_locvar->fds_out = NULL;
> +			tp_locvar->fds_ex = NULL;
> +

remove newline.

> +			tp_locvar->overflow = 0;
> +
> +			sc_out(
> +				if (ret <= 0)
> +					goto error;
> +			)
> +
> +			if (n <= 0)
> +				goto error;
> +
> +			/* Limit atomic memory allocation to one page */
> +			if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {

Put DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) into local variable.

> +				tp_locvar->nr_bytes = PAGE_SIZE;
> +				tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
> +				/* Inform the user that we did not output everything. */
> +				tp_locvar->overflow = 1;
> +			} else {
> +				tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
> +				tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
> +					BITS_PER_BYTE * sizeof(unsigned long));
> +			}
> +			/* On error or bogus input, don't copy anything. */
> +			if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {

Replace 8 by BITS_PER_BYTE

Move this check before the comparison with PAGE_SIZE.
A "n" larger than 1023 is an error....
Test on "n" rather than nr_bytes.

nr_bytes will therefore never be over on page on linux.
We could put a WARN_ON_ONCE() and goto error for it instead of the ceiling.


> +				goto error;
> +			}
> +
> +			if (inp) {
> +				tp_locvar->fds_in = kmalloc(
> +						tp_locvar->nr_ulong * sizeof(unsigned long),
> +						GFP_ATOMIC);

| GFP_NOWAIT as discussed.

> +				if (!tp_locvar->fds_in)
> +					goto error;
> +
> +				err = lib_ring_buffer_copy_from_user_check_nofault(
> +						tp_locvar->fds_in, inp,
> +						tp_locvar->nr_ulong * sizeof(unsigned long));
> +				if (err != 0)
> +					goto error;
> +			}
> +			if (outp) {
> +				tp_locvar->fds_out = kmalloc(
> +						tp_locvar->nr_ulong * sizeof(unsigned long),
> +						GFP_ATOMIC);

.....

> +				if (!tp_locvar->fds_out)
> +					goto error;
> +
> +				err = lib_ring_buffer_copy_from_user_check_nofault(
> +						tp_locvar->fds_out, outp,
> +						tp_locvar->nr_ulong * sizeof(unsigned long));
> +				if (err != 0)
> +					goto error;
> +			}
> +			if (exp) {
> +				tp_locvar->fds_ex = kmalloc(
> +						tp_locvar->nr_ulong * sizeof(unsigned long),
> +						GFP_ATOMIC);

.........

> +				if (!tp_locvar->fds_ex)
> +					goto error;
> +
> +				err = lib_ring_buffer_copy_from_user_check_nofault(
> +						tp_locvar->fds_ex, exp,
> +						tp_locvar->nr_ulong * sizeof(unsigned long));
> +				if (err != 0)
> +					goto error;
> +			}
> +			goto end;
> +
> +	error:
> +		tp_locvar->nr_bytes = 0;
> +		tp_locvar->nr_ulong = 0;
> +	end:
> +		/* bypass error: label at end of compound statement */

Reword to /* Label at end of compound statement. */

> +		;

Mod line to:

    end:    ;    /* Label at end of compound statement. */

> +		}
> +		)
> +	),
> +	TP_FIELDS(
> +		sc_exit(ctf_integer(long, ret, ret))
> +		sc_in(ctf_integer(int, n, n))
> +		sc_inout(ctf_integer(int, overflow, tp_locvar->overflow))

uint8_t

> +		sc_inout(ctf_integer(struct timeval *, tvp, tvp))
> +
> +		sc_inout(
> +			/* inp */
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence,
> +					.u.sequence.length_type = __type_integer(
> +						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),

uint8_t is enough. (1024/8=128)

> +					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> +						__BYTE_ORDER, 16, none),
> +				),
> +				readfds,
> +				ctf_custom_code(
> +					unsigned int src;
> +					unsigned int nr_bytes_out = 0;
> +
> +					if (inp) {
> +						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)

uint8_t

> +						ctf_align(uint8_t)
> +					} else {
> +						ctf_integer_type(uint16_t, 0)

uint8_t

> +						ctf_align(uint8_t)
> +						goto skip_inp;
> +					}
> +
> +					for (src = 0; src < tp_locvar->nr_ulong; src++) {
> +						int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> +						for (dst = 0; dst < sizeof(long); dst++) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (inp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_inp;
> +							}
> +						}
> +#else
> +						for (dst = sizeof(long); dst >= 0; dst--) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (inp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_inp;
> +							}
> +						}
> +#endif
> +					}
> +				skip_inp:
> +					;
> +				)

3 macros (nested) rather than cut n paste.

> +			)
> +			/* outp */
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence,
> +					.u.sequence.length_type = __type_integer(
> +						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> +					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> +						__BYTE_ORDER, 16, none),
> +				),
> +				writefds,
> +				ctf_custom_code(
> +					unsigned int src;
> +					unsigned int nr_bytes_out = 0;
> +
> +					if (outp) {
> +						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> +						ctf_align(uint8_t)
> +					} else {
> +						ctf_integer_type(uint16_t, 0)
> +						ctf_align(uint8_t)
> +						goto skip_outp;
> +					}
> +
> +					for (src = 0; src < tp_locvar->nr_ulong; src++) {
> +						int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> +						for (dst = 0; dst < sizeof(long); dst++) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (outp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_outp;
> +							}
> +						}
> +#else
> +						for (dst = sizeof(long); dst >= 0; dst--) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (outp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_outp;
> +							}
> +						}
> +#endif
> +					}
> +				skip_outp:
> +					;
> +				)
> +			)
> +			/* exp */
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence,
> +					.u.sequence.length_type = __type_integer(
> +						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> +					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> +						__BYTE_ORDER, 16, none),
> +				),
> +				exceptfds,
> +				ctf_custom_code(
> +					unsigned int src;
> +					unsigned int nr_bytes_out = 0;
> +
> +					if (exp) {
> +						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> +						ctf_align(uint8_t)
> +					} else {
> +						ctf_integer_type(uint16_t, 0)
> +						ctf_align(uint8_t)
> +						goto skip_exp;
> +					}
> +
> +					for (src = 0; src < tp_locvar->nr_ulong; src++) {
> +						int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> +						for (dst = 0; dst < sizeof(long); dst++) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (exp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_exp;
> +							}
> +						}
> +#else
> +						for (dst = sizeof(long); dst >= 0; dst--) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (exp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_exp;
> +							}
> +						}
> +#endif
> +					}
> +				skip_exp:
> +					;
> +				)
> +			)
> +		)
> +	),
> +	TP_code_post(
> +		kfree(tp_locvar->fds_in);
> +		kfree(tp_locvar->fds_out);
> +		kfree(tp_locvar->fds_ex);
> +	)
> +)
> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
> +
> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> || defined(CONFIG_ARM)
> +#define OVERRIDE_32_pselect6
> +#define OVERRIDE_64_pselect6
> +SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
> +	TP_PROTO(sc_exit(long ret,) int n, fd_set __user * inp, fd_set __user * outp,
> +		fd_set __user * exp, struct timeval * tvp, void * sig),
> +	TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp, sig),
> +	TP_locvar(

Please combine into a macro with select.

> +		unsigned long *fds_in, *fds_out, *fds_ex;
> +		unsigned long nr_bytes, nr_ulong;
> +		unsigned int overflow;
> +	),
> +	TP_code_pre(
> +		sc_inout(
> +		{
> +			int err;
> +
> +			tp_locvar->fds_in = NULL;
> +			tp_locvar->fds_out = NULL;
> +			tp_locvar->fds_ex = NULL;
> +
> +			tp_locvar->overflow = 0;
> +
> +			sc_out(
> +				if (ret <= 0)
> +					goto error;
> +			)
> +
> +			if (n <= 0)
> +				goto error;
> +
> +			/* Limit atomic memory allocation to one page */
> +			if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
> +				tp_locvar->nr_bytes = PAGE_SIZE;
> +				tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
> +				/* Inform the user that we did not output everything. */
> +				tp_locvar->overflow = 1;
> +			} else {
> +				tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
> +				tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
> +					BITS_PER_BYTE * sizeof(unsigned long));
> +			}
> +			/* On error or bogus input, don't copy anything. */
> +			if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
> +				goto error;
> +			}
> +
> +			if (inp) {
> +				tp_locvar->fds_in = kmalloc(
> +						tp_locvar->nr_ulong * sizeof(unsigned long),
> +						GFP_ATOMIC);
> +				if (!tp_locvar->fds_in)
> +					goto error;
> +
> +				err = lib_ring_buffer_copy_from_user_check_nofault(
> +						tp_locvar->fds_in, inp,
> +						tp_locvar->nr_ulong * sizeof(unsigned long));
> +				if (err != 0)
> +					goto error;
> +			}
> +			if (outp) {
> +				tp_locvar->fds_out = kmalloc(
> +						tp_locvar->nr_ulong * sizeof(unsigned long),
> +						GFP_ATOMIC);
> +				if (!tp_locvar->fds_out)
> +					goto error;
> +
> +				err = lib_ring_buffer_copy_from_user_check_nofault(
> +						tp_locvar->fds_out, outp,
> +						tp_locvar->nr_ulong * sizeof(unsigned long));
> +				if (err != 0)
> +					goto error;
> +			}
> +			if (exp) {
> +				tp_locvar->fds_ex = kmalloc(
> +						tp_locvar->nr_ulong * sizeof(unsigned long),
> +						GFP_ATOMIC);
> +				if (!tp_locvar->fds_ex)
> +					goto error;
> +
> +				err = lib_ring_buffer_copy_from_user_check_nofault(
> +						tp_locvar->fds_ex, exp,
> +						tp_locvar->nr_ulong * sizeof(unsigned long));
> +				if (err != 0)
> +					goto error;
> +			}
> +			goto end;
> +
> +	error:
> +		tp_locvar->nr_bytes = 0;
> +		tp_locvar->nr_ulong = 0;
> +	end:
> +		/* bypass error: label at end of compound statement */
> +		;
> +		}
> +		)
> +	),
> +	TP_FIELDS(
> +		sc_exit(ctf_integer(long, ret, ret))
> +		sc_in(ctf_integer(int, n, n))
> +		sc_in(ctf_integer(int, overflow, tp_locvar->overflow))
> +		sc_inout(ctf_integer(struct timeval *, tvp, tvp))
> +
> +		sc_inout(
> +			/* inp */
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence,
> +					.u.sequence.length_type = __type_integer(
> +						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> +					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> +						__BYTE_ORDER, 16, none),
> +				),
> +				readfds,
> +				ctf_custom_code(
> +					unsigned int src;
> +					unsigned int nr_bytes_out = 0;
> +
> +					if (inp) {
> +						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> +						ctf_align(uint8_t)
> +					} else {
> +						ctf_integer_type(uint16_t, 0)
> +						ctf_align(uint8_t)
> +						goto skip_inp;
> +					}
> +
> +					for (src = 0; src < tp_locvar->nr_ulong; src++) {
> +						int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> +						for (dst = 0; dst < sizeof(long); dst++) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (inp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_inp;
> +							}
> +						}
> +#else
> +						for (dst = sizeof(long); dst >= 0; dst--) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (inp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_inp;
> +							}
> +						}
> +#endif
> +					}
> +				skip_inp:
> +					;
> +				)
> +			)
> +			/* outp */
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence,
> +					.u.sequence.length_type = __type_integer(
> +						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> +					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> +						__BYTE_ORDER, 16, none),
> +				),
> +				writefds,
> +				ctf_custom_code(
> +					unsigned int src;
> +					unsigned int nr_bytes_out = 0;
> +
> +					if (outp) {
> +						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> +						ctf_align(uint8_t)
> +					} else {
> +						ctf_integer_type(uint16_t, 0)
> +						ctf_align(uint8_t)
> +						goto skip_outp;
> +					}
> +
> +					for (src = 0; src < tp_locvar->nr_ulong; src++) {
> +						int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> +						for (dst = 0; dst < sizeof(long); dst++) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (outp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_outp;
> +							}
> +						}
> +#else
> +						for (dst = sizeof(long); dst >= 0; dst--) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (outp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_outp;
> +							}
> +						}
> +#endif
> +					}
> +				skip_outp:
> +					;
> +				)
> +			)
> +			/* exp */
> +			ctf_custom_field(
> +				ctf_custom_type(
> +					.atype = atype_sequence,
> +					.u.sequence.length_type = __type_integer(
> +						uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> +					.u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> +						__BYTE_ORDER, 16, none),
> +				),
> +				exceptfds,
> +				ctf_custom_code(
> +					unsigned int src;
> +					unsigned int nr_bytes_out = 0;
> +
> +					if (exp) {
> +						ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> +						ctf_align(uint8_t)
> +					} else {
> +						ctf_integer_type(uint16_t, 0)
> +						ctf_align(uint8_t)
> +						goto skip_exp;
> +					}
> +
> +					for (src = 0; src < tp_locvar->nr_ulong; src++) {
> +						int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> +						for (dst = 0; dst < sizeof(long); dst++) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (exp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_exp;
> +							}
> +						}
> +#else
> +						for (dst = sizeof(long); dst >= 0; dst--) {
> +							ctf_user_integer_type(uint8_t,
> +								((uint8_t __user *) (exp->fds_bits + src))[dst]);
> +							if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> +								goto skip_exp;
> +							}
> +						}
> +#endif
> +					}
> +				skip_exp:
> +					;
> +				)
> +			)
> +		)
> +	),
> +	TP_code_post(
> +		kfree(tp_locvar->fds_in);
> +		kfree(tp_locvar->fds_out);
> +		kfree(tp_locvar->fds_ex);

Thanks,

Mathieu

> +	)
> +)
> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
> defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
> +
> #endif /* CREATE_SYSCALL_TABLE */
> --
> 1.9.1

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


More information about the lttng-dev mailing list