[lttng-dev] [PATCH lttng-modules v5 2/5] Extract the FD sets in select and pselect6
Mathieu Desnoyers
mathieu.desnoyers at efficios.com
Fri Apr 29 18:25:57 UTC 2016
----- On Apr 29, 2016, at 2:13 PM, Mathieu Desnoyers mathieu.desnoyers at efficios.com wrote:
> ----- On Apr 22, 2016, at 8:16 PM, Julien Desfossez jdesfossez at efficios.com
> wrote:
>
>> Instead of extracting the user-space pointers of the 3 fd_set, we now
>> extract the bitmask of the FDs in the sets (in, out, ex) in the form of
>> an array of uint8_t (1024 FDs is the limit in the kernel).
>>
>> In this example, we select in input FDs 5 to 19 (0xFFFF0), it returns
>> that one FD is ready: FD 12 (0x1000).
>>
>> syscall_entry_select: {
>> n = 20,
>> _fdset_in_length = 3, fdset_in = [ [0] = 0xF0, [1] = 0xFF, [2] = 0xF ],
>> _fdset_out_length = 0, fdset_out = [ ],
>> _fdset_ex_length = 0, fdset_ex = [ ],
>> tvp = 0
>> }
>>
>> syscall_exit_select: {
>> ret = 1,
>> _fdset_in_length = 3, fdset_in = [ [0] = 0x0, [1] = 0x10, [2] = 0x0 ],
>> _fdset_out_length = 0, fdset_out = [ ],
>> _fdset_ex_length = 0, fdset_ex = [ ],
>> tvp = 0
>> }
>>
>> Signed-off-by: Julien Desfossez <jdesfossez at efficios.com>
>> ---
>> .../syscalls/headers/syscalls_pointers_override.h | 510 +++++++++++++++++++++
>> 1 file changed, 510 insertions(+)
>>
>> diff --git a/instrumentation/syscalls/headers/syscalls_pointers_override.h
>> b/instrumentation/syscalls/headers/syscalls_pointers_override.h
>> index bf5c632..ef4dc1c 100644
>> --- a/instrumentation/syscalls/headers/syscalls_pointers_override.h
>> +++ b/instrumentation/syscalls/headers/syscalls_pointers_override.h
>> @@ -53,4 +53,514 @@ SC_LTTNG_TRACEPOINT_EVENT(pipe2,
>> )
>> )
>>
>> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
>> +#define OVERRIDE_32_select
>> +#define OVERRIDE_64_select
>> +SC_LTTNG_TRACEPOINT_EVENT_CODE(select,
>> + TP_PROTO(sc_exit(long ret,) int n, fd_set __user *inp, fd_set __user *outp,
>> + fd_set __user *exp, struct timeval *tvp),
>> + TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp),
>> + TP_locvar(
>> + unsigned long *fds_in, *fds_out, *fds_ex;
>> + unsigned long nr_bytes, nr_ulong;
>> + unsigned int overflow;
>
> Change for uint8_t for overflow.
>
>> + ),
>> + TP_code_pre(
>> + sc_inout(
>> + {
>> + int err;
>> +
>> + tp_locvar->fds_in = NULL;
>> + tp_locvar->fds_out = NULL;
>> + tp_locvar->fds_ex = NULL;
>> +
>
> remove newline.
>
>> + tp_locvar->overflow = 0;
>> +
>> + sc_out(
>> + if (ret <= 0)
>> + goto error;
>> + )
>> +
>> + if (n <= 0)
>> + goto error;
>> +
>> + /* Limit atomic memory allocation to one page */
>> + if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
>
> Put DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) into local variable.
>
>> + tp_locvar->nr_bytes = PAGE_SIZE;
>> + tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
>> + /* Inform the user that we did not output everything. */
>> + tp_locvar->overflow = 1;
>> + } else {
>> + tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
>> + tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
>> + BITS_PER_BYTE * sizeof(unsigned long));
>> + }
>> + /* On error or bogus input, don't copy anything. */
>> + if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
>
> Replace 8 by BITS_PER_BYTE
>
> Move this check before the comparison with PAGE_SIZE.
> A "n" larger than 1023 is an error....
> Test on "n" rather than nr_bytes.
>
> nr_bytes will therefore never be over on page on linux.
> We could put a WARN_ON_ONCE() and goto error for it instead of the ceiling.
>
>
>> + goto error;
>> + }
>> +
>> + if (inp) {
>> + tp_locvar->fds_in = kmalloc(
>> + tp_locvar->nr_ulong * sizeof(unsigned long),
>> + GFP_ATOMIC);
>
>| GFP_NOWAIT as discussed.
>
>> + if (!tp_locvar->fds_in)
>> + goto error;
>> +
>> + err = lib_ring_buffer_copy_from_user_check_nofault(
>> + tp_locvar->fds_in, inp,
>> + tp_locvar->nr_ulong * sizeof(unsigned long));
>> + if (err != 0)
>> + goto error;
>> + }
>> + if (outp) {
>> + tp_locvar->fds_out = kmalloc(
>> + tp_locvar->nr_ulong * sizeof(unsigned long),
>> + GFP_ATOMIC);
>
> .....
>
>> + if (!tp_locvar->fds_out)
>> + goto error;
>> +
>> + err = lib_ring_buffer_copy_from_user_check_nofault(
>> + tp_locvar->fds_out, outp,
>> + tp_locvar->nr_ulong * sizeof(unsigned long));
>> + if (err != 0)
>> + goto error;
>> + }
>> + if (exp) {
>> + tp_locvar->fds_ex = kmalloc(
>> + tp_locvar->nr_ulong * sizeof(unsigned long),
>> + GFP_ATOMIC);
>
> .........
>
>> + if (!tp_locvar->fds_ex)
>> + goto error;
>> +
>> + err = lib_ring_buffer_copy_from_user_check_nofault(
>> + tp_locvar->fds_ex, exp,
>> + tp_locvar->nr_ulong * sizeof(unsigned long));
>> + if (err != 0)
>> + goto error;
>> + }
>> + goto end;
>> +
>> + error:
>> + tp_locvar->nr_bytes = 0;
>> + tp_locvar->nr_ulong = 0;
>> + end:
>> + /* bypass error: label at end of compound statement */
>
> Reword to /* Label at end of compound statement. */
>
>> + ;
>
> Mod line to:
>
> end: ; /* Label at end of compound statement. */
>
>> + }
>> + )
>> + ),
>> + TP_FIELDS(
>> + sc_exit(ctf_integer(long, ret, ret))
>> + sc_in(ctf_integer(int, n, n))
>> + sc_inout(ctf_integer(int, overflow, tp_locvar->overflow))
>
> uint8_t
>
>> + sc_inout(ctf_integer(struct timeval *, tvp, tvp))
>> +
>> + sc_inout(
>> + /* inp */
>> + ctf_custom_field(
>> + ctf_custom_type(
>> + .atype = atype_sequence,
>> + .u.sequence.length_type = __type_integer(
>> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
>
> uint8_t is enough. (1024/8=128)
>
>> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
>> + __BYTE_ORDER, 16, none),
>> + ),
>> + readfds,
>> + ctf_custom_code(
>> + unsigned int src;
>> + unsigned int nr_bytes_out = 0;
>> +
>> + if (inp) {
>> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
>
> uint8_t
>
>> + ctf_align(uint8_t)
>> + } else {
>> + ctf_integer_type(uint16_t, 0)
>
> uint8_t
>
>> + ctf_align(uint8_t)
>> + goto skip_inp;
>> + }
>> +
>> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
>> + int dst;
>> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
>> + for (dst = 0; dst < sizeof(long); dst++) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_inp;
>> + }
While technically correct, please do the check before
"writing" the field.
Move the check just before the ctf_user_integer_type(),
and post-increment it. (both little and big endian).
Mathieu
>> + }
>> +#else
>> + for (dst = sizeof(long); dst >= 0; dst--) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_inp;
>> + }
>> + }
>> +#endif
>> + }
>> + skip_inp:
>> + ;
>> + )
>
> 3 macros (nested) rather than cut n paste.
>
>> + )
>> + /* outp */
>> + ctf_custom_field(
>> + ctf_custom_type(
>> + .atype = atype_sequence,
>> + .u.sequence.length_type = __type_integer(
>> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
>> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
>> + __BYTE_ORDER, 16, none),
>> + ),
>> + writefds,
>> + ctf_custom_code(
>> + unsigned int src;
>> + unsigned int nr_bytes_out = 0;
>> +
>> + if (outp) {
>> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
>> + ctf_align(uint8_t)
>> + } else {
>> + ctf_integer_type(uint16_t, 0)
>> + ctf_align(uint8_t)
>> + goto skip_outp;
>> + }
>> +
>> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
>> + int dst;
>> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
>> + for (dst = 0; dst < sizeof(long); dst++) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_outp;
>> + }
>> + }
>> +#else
>> + for (dst = sizeof(long); dst >= 0; dst--) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_outp;
>> + }
>> + }
>> +#endif
>> + }
>> + skip_outp:
>> + ;
>> + )
>> + )
>> + /* exp */
>> + ctf_custom_field(
>> + ctf_custom_type(
>> + .atype = atype_sequence,
>> + .u.sequence.length_type = __type_integer(
>> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
>> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
>> + __BYTE_ORDER, 16, none),
>> + ),
>> + exceptfds,
>> + ctf_custom_code(
>> + unsigned int src;
>> + unsigned int nr_bytes_out = 0;
>> +
>> + if (exp) {
>> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
>> + ctf_align(uint8_t)
>> + } else {
>> + ctf_integer_type(uint16_t, 0)
>> + ctf_align(uint8_t)
>> + goto skip_exp;
>> + }
>> +
>> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
>> + int dst;
>> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
>> + for (dst = 0; dst < sizeof(long); dst++) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_exp;
>> + }
>> + }
>> +#else
>> + for (dst = sizeof(long); dst >= 0; dst--) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_exp;
>> + }
>> + }
>> +#endif
>> + }
>> + skip_exp:
>> + ;
>> + )
>> + )
>> + )
>> + ),
>> + TP_code_post(
>> + kfree(tp_locvar->fds_in);
>> + kfree(tp_locvar->fds_out);
>> + kfree(tp_locvar->fds_ex);
>> + )
>> +)
>> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) */
>> +
>> +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
>> || defined(CONFIG_ARM)
>> +#define OVERRIDE_32_pselect6
>> +#define OVERRIDE_64_pselect6
>> +SC_LTTNG_TRACEPOINT_EVENT_CODE(pselect6,
>> + TP_PROTO(sc_exit(long ret,) int n, fd_set __user * inp, fd_set __user * outp,
>> + fd_set __user * exp, struct timeval * tvp, void * sig),
>> + TP_ARGS(sc_exit(ret,) n, inp, outp, exp, tvp, sig),
>> + TP_locvar(
>
> Please combine into a macro with select.
>
>> + unsigned long *fds_in, *fds_out, *fds_ex;
>> + unsigned long nr_bytes, nr_ulong;
>> + unsigned int overflow;
>> + ),
>> + TP_code_pre(
>> + sc_inout(
>> + {
>> + int err;
>> +
>> + tp_locvar->fds_in = NULL;
>> + tp_locvar->fds_out = NULL;
>> + tp_locvar->fds_ex = NULL;
>> +
>> + tp_locvar->overflow = 0;
>> +
>> + sc_out(
>> + if (ret <= 0)
>> + goto error;
>> + )
>> +
>> + if (n <= 0)
>> + goto error;
>> +
>> + /* Limit atomic memory allocation to one page */
>> + if (DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE) > PAGE_SIZE) {
>> + tp_locvar->nr_bytes = PAGE_SIZE;
>> + tp_locvar->nr_ulong = PAGE_SIZE / sizeof(unsigned long);
>> + /* Inform the user that we did not output everything. */
>> + tp_locvar->overflow = 1;
>> + } else {
>> + tp_locvar->nr_bytes = DIV_ROUND_UP((unsigned int) n, BITS_PER_BYTE);
>> + tp_locvar->nr_ulong = DIV_ROUND_UP((unsigned int) n,
>> + BITS_PER_BYTE * sizeof(unsigned long));
>> + }
>> + /* On error or bogus input, don't copy anything. */
>> + if (tp_locvar->nr_bytes > (__FD_SETSIZE / (8 * sizeof(uint8_t)))) {
>> + goto error;
>> + }
>> +
>> + if (inp) {
>> + tp_locvar->fds_in = kmalloc(
>> + tp_locvar->nr_ulong * sizeof(unsigned long),
>> + GFP_ATOMIC);
>> + if (!tp_locvar->fds_in)
>> + goto error;
>> +
>> + err = lib_ring_buffer_copy_from_user_check_nofault(
>> + tp_locvar->fds_in, inp,
>> + tp_locvar->nr_ulong * sizeof(unsigned long));
>> + if (err != 0)
>> + goto error;
>> + }
>> + if (outp) {
>> + tp_locvar->fds_out = kmalloc(
>> + tp_locvar->nr_ulong * sizeof(unsigned long),
>> + GFP_ATOMIC);
>> + if (!tp_locvar->fds_out)
>> + goto error;
>> +
>> + err = lib_ring_buffer_copy_from_user_check_nofault(
>> + tp_locvar->fds_out, outp,
>> + tp_locvar->nr_ulong * sizeof(unsigned long));
>> + if (err != 0)
>> + goto error;
>> + }
>> + if (exp) {
>> + tp_locvar->fds_ex = kmalloc(
>> + tp_locvar->nr_ulong * sizeof(unsigned long),
>> + GFP_ATOMIC);
>> + if (!tp_locvar->fds_ex)
>> + goto error;
>> +
>> + err = lib_ring_buffer_copy_from_user_check_nofault(
>> + tp_locvar->fds_ex, exp,
>> + tp_locvar->nr_ulong * sizeof(unsigned long));
>> + if (err != 0)
>> + goto error;
>> + }
>> + goto end;
>> +
>> + error:
>> + tp_locvar->nr_bytes = 0;
>> + tp_locvar->nr_ulong = 0;
>> + end:
>> + /* bypass error: label at end of compound statement */
>> + ;
>> + }
>> + )
>> + ),
>> + TP_FIELDS(
>> + sc_exit(ctf_integer(long, ret, ret))
>> + sc_in(ctf_integer(int, n, n))
>> + sc_in(ctf_integer(int, overflow, tp_locvar->overflow))
>> + sc_inout(ctf_integer(struct timeval *, tvp, tvp))
>> +
>> + sc_inout(
>> + /* inp */
>> + ctf_custom_field(
>> + ctf_custom_type(
>> + .atype = atype_sequence,
>> + .u.sequence.length_type = __type_integer(
>> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
>> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
>> + __BYTE_ORDER, 16, none),
>> + ),
>> + readfds,
>> + ctf_custom_code(
>> + unsigned int src;
>> + unsigned int nr_bytes_out = 0;
>> +
>> + if (inp) {
>> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
>> + ctf_align(uint8_t)
>> + } else {
>> + ctf_integer_type(uint16_t, 0)
>> + ctf_align(uint8_t)
>> + goto skip_inp;
>> + }
>> +
>> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
>> + int dst;
>> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
>> + for (dst = 0; dst < sizeof(long); dst++) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_inp;
>> + }
>> + }
>> +#else
>> + for (dst = sizeof(long); dst >= 0; dst--) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (inp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_inp;
>> + }
>> + }
>> +#endif
>> + }
>> + skip_inp:
>> + ;
>> + )
>> + )
>> + /* outp */
>> + ctf_custom_field(
>> + ctf_custom_type(
>> + .atype = atype_sequence,
>> + .u.sequence.length_type = __type_integer(
>> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
>> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
>> + __BYTE_ORDER, 16, none),
>> + ),
>> + writefds,
>> + ctf_custom_code(
>> + unsigned int src;
>> + unsigned int nr_bytes_out = 0;
>> +
>> + if (outp) {
>> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
>> + ctf_align(uint8_t)
>> + } else {
>> + ctf_integer_type(uint16_t, 0)
>> + ctf_align(uint8_t)
>> + goto skip_outp;
>> + }
>> +
>> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
>> + int dst;
>> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
>> + for (dst = 0; dst < sizeof(long); dst++) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_outp;
>> + }
>> + }
>> +#else
>> + for (dst = sizeof(long); dst >= 0; dst--) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (outp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_outp;
>> + }
>> + }
>> +#endif
>> + }
>> + skip_outp:
>> + ;
>> + )
>> + )
>> + /* exp */
>> + ctf_custom_field(
>> + ctf_custom_type(
>> + .atype = atype_sequence,
>> + .u.sequence.length_type = __type_integer(
>> + uint16_t, 0, 0, 0, __BYTE_ORDER, 10, none),
>> + .u.sequence.elem_type = __type_integer(uint8_t, 0, 0, 0,
>> + __BYTE_ORDER, 16, none),
>> + ),
>> + exceptfds,
>> + ctf_custom_code(
>> + unsigned int src;
>> + unsigned int nr_bytes_out = 0;
>> +
>> + if (exp) {
>> + ctf_integer_type(uint16_t, tp_locvar->nr_bytes)
>> + ctf_align(uint8_t)
>> + } else {
>> + ctf_integer_type(uint16_t, 0)
>> + ctf_align(uint8_t)
>> + goto skip_exp;
>> + }
>> +
>> + for (src = 0; src < tp_locvar->nr_ulong; src++) {
>> + int dst;
>> +#if (__BYTE_ORDER == __LITTLE_ENDIAN)
>> + for (dst = 0; dst < sizeof(long); dst++) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_exp;
>> + }
>> + }
>> +#else
>> + for (dst = sizeof(long); dst >= 0; dst--) {
>> + ctf_user_integer_type(uint8_t,
>> + ((uint8_t __user *) (exp->fds_bits + src))[dst]);
>> + if (++nr_bytes_out >= tp_locvar->nr_bytes) {
>> + goto skip_exp;
>> + }
>> + }
>> +#endif
>> + }
>> + skip_exp:
>> + ;
>> + )
>> + )
>> + )
>> + ),
>> + TP_code_post(
>> + kfree(tp_locvar->fds_in);
>> + kfree(tp_locvar->fds_out);
>> + kfree(tp_locvar->fds_ex);
>
> Thanks,
>
> Mathieu
>
>> + )
>> +)
>> +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_X86_64) ||
>> defined(CONFIG_ARM64) || defined(CONFIG_ARM) */
>> +
>> #endif /* CREATE_SYSCALL_TABLE */
>> --
>> 1.9.1
>
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
More information about the lttng-dev
mailing list