[lttng-dev] [RFC PATCH lttng-tools] Fix mmap for large subbuffers and handle EINTR

David Goulet dgoulet at efficios.com
Thu Jul 5 10:57:37 EDT 2012


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Comments below.

On 04/07/12 12:10 PM, Mathieu Desnoyers wrote:
> With large subbuffer (packet) size, if write() returns before copying the
> entire packet for mmap buffers, the consumerd restarts the write 
> infinitely, which is not good at all.
> 
> This affects both lttng-ust (in default mmap mode) and lttng-kernel (but 
> only for mmap buffers, which is not the default).
> 
> This issue would show up with large subbuffer size.
> 
> We need to handle this case, as well as EINTR errors (which need to restart
> write).
> 
> Also fixing the return value of mmap read functions, which were returning
> the amount of data written by the last invocation of write() rather than
> the total number of bytes written. splice use had the same issue.
> 
> Also now consider a write() that returns more bytes than requested as an 
> error.
> 
> Moreover, assigning error = ret after failed splice and write was a 
> mistake: error is holding the actual error value. ret just holds -1.
> 
> This needs testing. Feedback is welcome.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com> --- diff
> --git a/src/common/kernel-consumer/kernel-consumer.c
> b/src/common/kernel-consumer/kernel-consumer.c index bbc31f8..eca710d
> 100644 --- a/src/common/kernel-consumer/kernel-consumer.c +++
> b/src/common/kernel-consumer/kernel-consumer.c @@ -49,7 +49,7 @@ ssize_t
> lttng_kconsumer_on_read_subbuffer_mmap( struct lttng_consumer_stream
> *stream, unsigned long len) { unsigned long mmap_offset; -	ssize_t ret =
> 0; +	ssize_t ret = 0, written = 0; off_t orig_offset =
> stream->out_fd_offset; int fd = stream->wait_fd; int outfd =
> stream->out_fd; @@ -64,25 +64,34 @@ ssize_t
> lttng_kconsumer_on_read_subbuffer_mmap(

Just before going into the while() loop, there is a call to
kernctl_get_mmap_read_offset() and, on failure, it goes to the "end:" label
without setting "written" to a negative value.

> 
> while (len > 0) { ret = write(outfd, stream->mmap_base + mmap_offset,
> len); -		if (ret >= len) { -			len = 0; -		} else if (ret < 0) { -			errno
> = -ret; +		if (ret < 0) { +			if (errno == EINTR) { +				/* restart the
> interrupted system call */ +				continue; +			} else { +				perror("Error
> in file write"); +				if (written == 0) { +					written = ret; +				} +
> goto end; +			} +		} else if (ret > len) { perror("Error in file write"); +
> written += ret; goto end;

I'm not sure this case is very useful... write() can not possibly return more
than "len" (the size_t count argument). If it does, I'm pretty sure the whole
Linux system will not go very well... hehe. The same applies for splice(),
checking is ret > len is basically safe guarding the syscall which I don't
think is necessary.

However, according to the man page write(2), a return value of 0 coupled with
a regular file can return an error status:

       If  count  is  zero  and  fd refers to a regular file, then write() may
       return a failure status if one of the errors below is detected.  If  no
       errors  are  detected,  0  will  be  returned without causing any other
       effect.  If count is zero and fd refers to a file other than a  regular
       file, the results are not specified.

We might want to add a zero/errno check instead.

For testing, I can tell you that this patch fixes mmap network streaming (soon
to be merged upstream) where the old code did not worked.

Cheers!
David

> +		} else { +			len -= ret; +			mmap_offset += ret; } /* This won't block,
> but will start writeout asynchronously */ lttng_sync_file_range(outfd,
> stream->out_fd_offset, ret, SYNC_FILE_RANGE_WRITE); stream->out_fd_offset
> += ret; +		written += ret; } - lttng_consumer_sync_trace_file(stream,
> orig_offset); - -	goto end; - end: -	return ret; +	return written; }
> 
> /* @@ -94,7 +103,7 @@ ssize_t lttng_kconsumer_on_read_subbuffer_splice( 
> struct lttng_consumer_local_data *ctx, struct lttng_consumer_stream
> *stream, unsigned long len) { -	ssize_t ret = 0; +	ssize_t ret = 0, written
> = 0; loff_t offset = 0; off_t orig_offset = stream->out_fd_offset; int fd =
> stream->wait_fd; @@ -107,8 +116,11 @@ ssize_t
> lttng_kconsumer_on_read_subbuffer_splice( SPLICE_F_MOVE | SPLICE_F_MORE); 
> DBG("splice chan to pipe ret %zd", ret); if (ret < 0) { -			errno = -ret; 
> perror("Error in relay splice"); +			if (written == 0) { +				written =
> ret; +			} +			ret = errno; goto splice_error; }
> 
> @@ -116,8 +128,18 @@ ssize_t lttng_kconsumer_on_read_subbuffer_splice( 
> SPLICE_F_MOVE | SPLICE_F_MORE); DBG("splice pipe to file %zd", ret); if
> (ret < 0) { -			errno = -ret; perror("Error in file splice"); +			if
> (written == 0) { +				written = ret; +			} +			ret = errno; +			goto
> splice_error; +		} +		if (ret > len) { +			errno = EINVAL; +
> perror("Wrote more data than requested"); +			written += ret; +			ret =
> errno; goto splice_error; } len -= ret; @@ -125,6 +147,7 @@ ssize_t
> lttng_kconsumer_on_read_subbuffer_splice( lttng_sync_file_range(outfd,
> stream->out_fd_offset, ret, SYNC_FILE_RANGE_WRITE); stream->out_fd_offset
> += ret; +		written += ret; } lttng_consumer_sync_trace_file(stream,
> orig_offset);
> 
> @@ -132,7 +155,7 @@ ssize_t lttng_kconsumer_on_read_subbuffer_splice(
> 
> splice_error: /* send the appropriate error description to sessiond */ -
> switch(ret) { +	switch (ret) { case EBADF: lttng_consumer_send_error(ctx,
> CONSUMERD_SPLICE_EBADF); break; @@ -148,7 +171,7 @@ splice_error: }
> 
> end: -	return ret; +	return written; }
> 
> /* @@ -351,13 +374,14 @@ ssize_t lttng_kconsumer_read_subbuffer(struct
> lttng_consumer_stream *stream,
> 
> /* splice the subbuffer to the tracefile */ ret =
> lttng_consumer_on_read_subbuffer_splice(ctx, stream, len); -			if (ret < 0)
> { +			if (ret != len) { /* * display the error but continue processing to
> try * to release the subbuffer */ ERR("Error splicing to tracefile"); } + 
> break; case LTTNG_EVENT_MMAP: /* read the used subbuffer size */ @@ -369,7
> +393,7 @@ ssize_t lttng_kconsumer_read_subbuffer(struct
> lttng_consumer_stream *stream, } /* write the subbuffer to the tracefile
> */ ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len); -			if
> (ret < 0) { +			if (ret != len) { /* * display the error but continue
> processing to try * to release the subbuffer diff --git
> a/src/common/ust-consumer/ust-consumer.c
> b/src/common/ust-consumer/ust-consumer.c index 2b55fd4..4db39d8 100644 ---
> a/src/common/ust-consumer/ust-consumer.c +++
> b/src/common/ust-consumer/ust-consumer.c @@ -49,7 +49,7 @@ ssize_t
> lttng_ustconsumer_on_read_subbuffer_mmap( struct lttng_consumer_stream
> *stream, unsigned long len) { unsigned long mmap_offset; -	long ret = 0; +
> long ret = 0, written = 0; off_t orig_offset = stream->out_fd_offset; int
> outfd = stream->out_fd;
> 
> @@ -63,25 +63,34 @@ ssize_t lttng_ustconsumer_on_read_subbuffer_mmap( } 
> while (len > 0) { ret = write(outfd, stream->mmap_base + mmap_offset,
> len); -		if (ret >= len) { -			len = 0; -		} else if (ret < 0) { -			errno
> = -ret; -			PERROR("Error in file write"); +		if (ret < 0) { +			if (errno
> == EINTR) { +				/* restart the interrupted system call */ +				continue; +
> } else { +				perror("Error in file write"); +				if (written == 0) { +
> written = ret; +				} +				goto end; +			} +		} else if (ret > len) { +
> perror("Error in file write"); +			written += ret; goto end; +		} else { +
> len -= ret; +			mmap_offset += ret; } /* This won't block, but will start
> writeout asynchronously */ lttng_sync_file_range(outfd,
> stream->out_fd_offset, ret, SYNC_FILE_RANGE_WRITE); stream->out_fd_offset
> += ret; +		written += ret; } - lttng_consumer_sync_trace_file(stream,
> orig_offset); - -	goto end; - end: -	return ret; +	return written; }
> 
> /* @@ -384,7 +393,7 @@ int lttng_ustconsumer_read_subbuffer(struct
> lttng_consumer_stream *stream, assert(err == 0); /* write the subbuffer to
> the tracefile */ ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream,
> len); -	if (ret < 0) { +	if (ret != len) { /* * display the error but
> continue processing to try * to release the subbuffer
> 
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.12 (GNU/Linux)

iQEcBAEBAgAGBQJP9atdAAoJEELoaioR9I02l5wIAJe5VXr47vnXZoaxJWLHFeYs
4DT4HGLrZmPEUbyhrywQvtn86qNwiAfwFMHGEQ9P7xAxriLsSiFA0doQ6KzifXGn
gfGGvwQJvEFxvCFh0EPjBVoneJ0hzMz0rLw97VoF0J03RKeLoDWHa36kai9fWOoO
Ku42fxBPFTgqBZsDgqRUJpdudBkvqvCMpOQI27MEJ4E2vCg/G24Q83OhHfc9NUuO
UtVsiAzPiXp3wKL882ly+r8lQpDKXkAdOfZszKROTNtMCf/Yo2QGKpKB4RMAGdQG
uVSDgba7Ih/WmUbz5WUJ5FLmqC9UrGyKtKHgG6oJ+ebNdEbOafBydLNQ/00a/OY=
=cS6M
-----END PGP SIGNATURE-----



More information about the lttng-dev mailing list