[lttng-dev] [PATCH lttng-modules v5 2/5] Extract the FD sets in select and pselect6
Mathieu Desnoyers
mathieu.desnoyers at efficios.com
Fri Apr 29 18:13:23 UTC 2016
----- On Apr 22, 2016, at 8:16 PM, Julien Desfossez jdesfossez at efficios.com wrote:
> Instead of extracting the user-space pointers of the 3 fd_set, we now
> extract the bitmask of the FDs in the sets (in, out, ex) in the form of
> an array of uint8_t (1024 FDs is the limit in the kernel).
>
> In this example, we select in input FDs 5 to 19 (0xFFFF0), it returns
> that one FD is ready: FD 12 (0x1000).
>
> syscall_entry_select: {
> n = 20,
> _fdset_in_length = 3, fdset_in = [ [0] = 0xF0, [1] = 0xFF, [2] = 0xF ],
> _fdset_out_length = 0, fdset_out = [ ],
> _fdset_ex_length = 0, fdset_ex = [ ],
> tvp = 0
> }
>
> syscall_exit_select: {
> ret = 1,
> _fdset_in_length = 3, fdset_in = [ [0] = 0x0, [1] = 0x10, [2] = 0x0 ],
> _fdset_out_length = 0, fdset_out = [ ],
> _fdset_ex_length = 0, fdset_ex = [ ],
> tvp = 0
> }
>
> Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
> ---
> .../syscalls/headers/syscalls_pointers_override.h | 510 +++++++++++++++++++++
> 1 file changed, 510 insertions(+)
>
> diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> index bf5c632..ef4dc1c 100644
> --- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
> +++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
> @@ -53,4 +53,514 @@ SC_LTTNG_TRACEPOINT_EVENT(pipe2,
> )
> )
>
> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
> +#define OVERRIDE_32_select
> +#define OVERRIDE_64_select
> +SC_LTTNG_TRACEPOINT_EVENT_CODE(select,
> + TP_PROTO(sc_exit(long ret,) int n, fd_set __user *inp, fd_set __user *outp,
> + fd_set __user *exp, struct timeval *tvp),
> + TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp),
> + TP_locvar(
> + unsigned long *fds_in, *fds_out, *fds_ex;
> + unsigned long nr_bytes, nr_ulong;
> + unsigned int overflow;
Change for uint8_t for overflow.
> + ),
> + TP_code_pre(
> + sc_inout(
> + {
> + int err;
> +
> + tp_locvar->fds_in = NULL;
> + tp_locvar->fds_out = NULL;
> + tp_locvar->fds_ex = NULL;
> +
remove newline.
> + tp_locvar->overflow = 0;
> +
> + sc_out(
> + if (ret <= 0)
> + goto error;
> + )
> +
> + if (n <= 0)
> + goto error;
> +
> + /* Limit atomic memory allocation to one page */
> + if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
Put DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) into local variable.
> + tp_locvar->nr_bytes = PAGE_SIZE;
> + tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
> + /* Inform the user that we did not output everything. */
> + tp_locvar->overflow = 1;
> + } else {
> + tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
> + tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
> + BITS_PER_BYTE * sizeof(unsigned long));
> + }
> + /* On error or bogus input, don't copy anything. */
> + if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
Replace 8 by BITS_PER_BYTE
Move this check before the comparison with PAGE_SIZE.
A "n" larger than 1023 is an error....
Test on "n" rather than nr_bytes.
nr_bytes will therefore never be over on page on linux.
We could put a WARN_ON_ONCE() and goto error for it instead of the ceiling.
> + goto error;
> + }
> +
> + if (inp) {
> + tp_locvar->fds_in = kmalloc(
> + tp_locvar->nr_ulong * sizeof(unsigned long),
> + GFP_ATOMIC);
| GFP_NOWAIT as discussed.
> + if (!tp_locvar->fds_in)
> + goto error;
> +
> + err = lib_ring_buffer_copy_from_user_check_nofault(
> + tp_locvar->fds_in, inp,
> + tp_locvar->nr_ulong * sizeof(unsigned long));
> + if (err != 0)
> + goto error;
> + }
> + if (outp) {
> + tp_locvar->fds_out = kmalloc(
> + tp_locvar->nr_ulong * sizeof(unsigned long),
> + GFP_ATOMIC);
.....
> + if (!tp_locvar->fds_out)
> + goto error;
> +
> + err = lib_ring_buffer_copy_from_user_check_nofault(
> + tp_locvar->fds_out, outp,
> + tp_locvar->nr_ulong * sizeof(unsigned long));
> + if (err != 0)
> + goto error;
> + }
> + if (exp) {
> + tp_locvar->fds_ex = kmalloc(
> + tp_locvar->nr_ulong * sizeof(unsigned long),
> + GFP_ATOMIC);
.........
> + if (!tp_locvar->fds_ex)
> + goto error;
> +
> + err = lib_ring_buffer_copy_from_user_check_nofault(
> + tp_locvar->fds_ex, exp,
> + tp_locvar->nr_ulong * sizeof(unsigned long));
> + if (err != 0)
> + goto error;
> + }
> + goto end;
> +
> + error:
> + tp_locvar->nr_bytes = 0;
> + tp_locvar->nr_ulong = 0;
> + end:
> + /* bypass error: label at end of compound statement */
Reword to /* Label at end of compound statement. */
> + ;
Mod line to:
end: ; /* Label at end of compound statement. */
> + }
> + )
> + ),
> + TP_FIELDS(
> + sc_exit(ctf_integer(long, ret, ret))
> + sc_in(ctf_integer(int, n, n))
> + sc_inout(ctf_integer(int, overflow, tp_locvar->overflow))
uint8_t
> + sc_inout(ctf_integer(struct timeval *, tvp, tvp))
> +
> + sc_inout(
> + /* inp */
> + ctf_custom_field(
> + ctf_custom_type(
> + .atype = atype_sequence,
> + .u.sequence.length_type = __type_integer(
> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
uint8_t is enough. (1024/8=128)
> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> + __BYTE_ORDER, 16, none),
> + ),
> + readfds,
> + ctf_custom_code(
> + unsigned int src;
> + unsigned int nr_bytes_out = 0;
> +
> + if (inp) {
> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
uint8_t
> + ctf_align(uint8_t)
> + } else {
> + ctf_integer_type(uint16_t, 0)
uint8_t
> + ctf_align(uint8_t)
> + goto skip_inp;
> + }
> +
> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
> + int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> + for (dst = 0; dst < sizeof(long); dst++) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_inp;
> + }
> + }
> +#else
> + for (dst = sizeof(long); dst >= 0; dst--) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_inp;
> + }
> + }
> +#endif
> + }
> + skip_inp:
> + ;
> + )
3 macros (nested) rather than cut n paste.
> + )
> + /* outp */
> + ctf_custom_field(
> + ctf_custom_type(
> + .atype = atype_sequence,
> + .u.sequence.length_type = __type_integer(
> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> + __BYTE_ORDER, 16, none),
> + ),
> + writefds,
> + ctf_custom_code(
> + unsigned int src;
> + unsigned int nr_bytes_out = 0;
> +
> + if (outp) {
> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> + ctf_align(uint8_t)
> + } else {
> + ctf_integer_type(uint16_t, 0)
> + ctf_align(uint8_t)
> + goto skip_outp;
> + }
> +
> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
> + int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> + for (dst = 0; dst < sizeof(long); dst++) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_outp;
> + }
> + }
> +#else
> + for (dst = sizeof(long); dst >= 0; dst--) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_outp;
> + }
> + }
> +#endif
> + }
> + skip_outp:
> + ;
> + )
> + )
> + /* exp */
> + ctf_custom_field(
> + ctf_custom_type(
> + .atype = atype_sequence,
> + .u.sequence.length_type = __type_integer(
> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> + __BYTE_ORDER, 16, none),
> + ),
> + exceptfds,
> + ctf_custom_code(
> + unsigned int src;
> + unsigned int nr_bytes_out = 0;
> +
> + if (exp) {
> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> + ctf_align(uint8_t)
> + } else {
> + ctf_integer_type(uint16_t, 0)
> + ctf_align(uint8_t)
> + goto skip_exp;
> + }
> +
> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
> + int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> + for (dst = 0; dst < sizeof(long); dst++) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_exp;
> + }
> + }
> +#else
> + for (dst = sizeof(long); dst >= 0; dst--) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_exp;
> + }
> + }
> +#endif
> + }
> + skip_exp:
> + ;
> + )
> + )
> + )
> + ),
> + TP_code_post(
> + kfree(tp_locvar->fds_in);
> + kfree(tp_locvar->fds_out);
> + kfree(tp_locvar->fds_ex);
> + )
> +)
> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
> +
> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> || defined(CONFIG_ARM)
> +#define OVERRIDE_32_pselect6
> +#define OVERRIDE_64_pselect6
> +SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
> + TP_PROTO(sc_exit(long ret,) int n, fd_set __user * inp, fd_set __user * outp,
> + fd_set __user * exp, struct timeval * tvp, void * sig),
> + TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp, sig),
> + TP_locvar(
Please combine into a macro with select.
> + unsigned long *fds_in, *fds_out, *fds_ex;
> + unsigned long nr_bytes, nr_ulong;
> + unsigned int overflow;
> + ),
> + TP_code_pre(
> + sc_inout(
> + {
> + int err;
> +
> + tp_locvar->fds_in = NULL;
> + tp_locvar->fds_out = NULL;
> + tp_locvar->fds_ex = NULL;
> +
> + tp_locvar->overflow = 0;
> +
> + sc_out(
> + if (ret <= 0)
> + goto error;
> + )
> +
> + if (n <= 0)
> + goto error;
> +
> + /* Limit atomic memory allocation to one page */
> + if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
> + tp_locvar->nr_bytes = PAGE_SIZE;
> + tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
> + /* Inform the user that we did not output everything. */
> + tp_locvar->overflow = 1;
> + } else {
> + tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
> + tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
> + BITS_PER_BYTE * sizeof(unsigned long));
> + }
> + /* On error or bogus input, don't copy anything. */
> + if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
> + goto error;
> + }
> +
> + if (inp) {
> + tp_locvar->fds_in = kmalloc(
> + tp_locvar->nr_ulong * sizeof(unsigned long),
> + GFP_ATOMIC);
> + if (!tp_locvar->fds_in)
> + goto error;
> +
> + err = lib_ring_buffer_copy_from_user_check_nofault(
> + tp_locvar->fds_in, inp,
> + tp_locvar->nr_ulong * sizeof(unsigned long));
> + if (err != 0)
> + goto error;
> + }
> + if (outp) {
> + tp_locvar->fds_out = kmalloc(
> + tp_locvar->nr_ulong * sizeof(unsigned long),
> + GFP_ATOMIC);
> + if (!tp_locvar->fds_out)
> + goto error;
> +
> + err = lib_ring_buffer_copy_from_user_check_nofault(
> + tp_locvar->fds_out, outp,
> + tp_locvar->nr_ulong * sizeof(unsigned long));
> + if (err != 0)
> + goto error;
> + }
> + if (exp) {
> + tp_locvar->fds_ex = kmalloc(
> + tp_locvar->nr_ulong * sizeof(unsigned long),
> + GFP_ATOMIC);
> + if (!tp_locvar->fds_ex)
> + goto error;
> +
> + err = lib_ring_buffer_copy_from_user_check_nofault(
> + tp_locvar->fds_ex, exp,
> + tp_locvar->nr_ulong * sizeof(unsigned long));
> + if (err != 0)
> + goto error;
> + }
> + goto end;
> +
> + error:
> + tp_locvar->nr_bytes = 0;
> + tp_locvar->nr_ulong = 0;
> + end:
> + /* bypass error: label at end of compound statement */
> + ;
> + }
> + )
> + ),
> + TP_FIELDS(
> + sc_exit(ctf_integer(long, ret, ret))
> + sc_in(ctf_integer(int, n, n))
> + sc_in(ctf_integer(int, overflow, tp_locvar->overflow))
> + sc_inout(ctf_integer(struct timeval *, tvp, tvp))
> +
> + sc_inout(
> + /* inp */
> + ctf_custom_field(
> + ctf_custom_type(
> + .atype = atype_sequence,
> + .u.sequence.length_type = __type_integer(
> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> + __BYTE_ORDER, 16, none),
> + ),
> + readfds,
> + ctf_custom_code(
> + unsigned int src;
> + unsigned int nr_bytes_out = 0;
> +
> + if (inp) {
> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> + ctf_align(uint8_t)
> + } else {
> + ctf_integer_type(uint16_t, 0)
> + ctf_align(uint8_t)
> + goto skip_inp;
> + }
> +
> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
> + int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> + for (dst = 0; dst < sizeof(long); dst++) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_inp;
> + }
> + }
> +#else
> + for (dst = sizeof(long); dst >= 0; dst--) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_inp;
> + }
> + }
> +#endif
> + }
> + skip_inp:
> + ;
> + )
> + )
> + /* outp */
> + ctf_custom_field(
> + ctf_custom_type(
> + .atype = atype_sequence,
> + .u.sequence.length_type = __type_integer(
> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> + __BYTE_ORDER, 16, none),
> + ),
> + writefds,
> + ctf_custom_code(
> + unsigned int src;
> + unsigned int nr_bytes_out = 0;
> +
> + if (outp) {
> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> + ctf_align(uint8_t)
> + } else {
> + ctf_integer_type(uint16_t, 0)
> + ctf_align(uint8_t)
> + goto skip_outp;
> + }
> +
> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
> + int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> + for (dst = 0; dst < sizeof(long); dst++) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_outp;
> + }
> + }
> +#else
> + for (dst = sizeof(long); dst >= 0; dst--) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_outp;
> + }
> + }
> +#endif
> + }
> + skip_outp:
> + ;
> + )
> + )
> + /* exp */
> + ctf_custom_field(
> + ctf_custom_type(
> + .atype = atype_sequence,
> + .u.sequence.length_type = __type_integer(
> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
> + __BYTE_ORDER, 16, none),
> + ),
> + exceptfds,
> + ctf_custom_code(
> + unsigned int src;
> + unsigned int nr_bytes_out = 0;
> +
> + if (exp) {
> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
> + ctf_align(uint8_t)
> + } else {
> + ctf_integer_type(uint16_t, 0)
> + ctf_align(uint8_t)
> + goto skip_exp;
> + }
> +
> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
> + int dst;
> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
> + for (dst = 0; dst < sizeof(long); dst++) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_exp;
> + }
> + }
> +#else
> + for (dst = sizeof(long); dst >= 0; dst--) {
> + ctf_user_integer_type(uint8_t,
> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
> + goto skip_exp;
> + }
> + }
> +#endif
> + }
> + skip_exp:
> + ;
> + )
> + )
> + )
> + ),
> + TP_code_post(
> + kfree(tp_locvar->fds_in);
> + kfree(tp_locvar->fds_out);
> + kfree(tp_locvar->fds_ex);
Thanks,
Mathieu
> + )
> +)
> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
> defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
> +
> #endif /* CREATE_SYSCALL_TABLE */
> --
> 1.9.1
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
More information about the lttng-dev
mailing list