[ltt-dev] [UST PATCH 1/3] ust-consumerd: fix exit race crashes

Nils Carlson nils.carlson at ericsson.com
Thu Apr 28 09:20:18 EDT 2011


merged.

On Wed, 27 Apr 2011, Jason Wessel wrote:

> The ust-consumerd gets shutdown by the SIGTERM signal and a number of
> places in the ust-consumerd did not properly deal with the case where
> a system call returns EINTR in errno as a result of a signal to the
> process.  The failure to handle EINTR properly was leading to some
> data corruption in the buffer code and causing some random "victim"
> crashes in lowlevel.c
>
> The way all the offending functions were tracked down was to
> temporarily add an abort() in the SIGTERM signal handler.  Then it was
> a matter of looking at what threads were blocked on system calls at
> the time outside of the thread that received the signal.
>
> Signed-off-by: Jason Wessel <jason.wessel at windriver.com>
> ---
> libustconsumer/libustconsumer.c |   25 +++++++++++++++++++------
> ust-consumerd/ust-consumerd.c   |   11 ++++++++++-
> 2 files changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/libustconsumer/libustconsumer.c b/libustconsumer/libustconsumer.c
> index c5acffa..6f6d4bb 100644
> --- a/libustconsumer/libustconsumer.c
> +++ b/libustconsumer/libustconsumer.c
> @@ -477,6 +477,8 @@ int consumer_loop(struct ustconsumer_instance *instance, struct buffer_info *buf
> 			DBG("App died while being traced");
> 			finish_consuming_dead_subbuffer(instance->callbacks, buf);
> 			break;
> +		} else if (read_result == -1 && errno == EINTR) {
> +			continue;
> 		}
>
> 		if(instance->callbacks->on_read_subbuffer)
> @@ -783,8 +785,11 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
>
> 	struct sockaddr_un addr;
>
> +socket_again:
> 	result = fd = socket(PF_UNIX, SOCK_STREAM, 0);
> 	if(result == -1) {
> +		if (errno == EINTR)
> +			goto socket_again;
> 		PERROR("socket");
> 		return 1;
> 	}
> @@ -794,13 +799,21 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
> 	strncpy(addr.sun_path, instance->sock_path, UNIX_PATH_MAX);
> 	addr.sun_path[UNIX_PATH_MAX-1] = '\0';
>
> -	result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
> -	if(result == -1) {
> -		PERROR("connect");
> -	}
> +connect_again:
> +		result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
> +		if(result == -1) {
> +			if (errno == EINTR)
> +				goto connect_again;
> +			PERROR("connect");
> +		}
>
> -	while(bytes != sizeof(msg))
> -		bytes += send(fd, msg, sizeof(msg), 0);
> +	while(bytes != sizeof(msg)) {
> +		int inc = send(fd, msg, sizeof(msg), 0);
> +		if (inc < 0 && errno != EINTR)
> +			break;
> +		else
> +			bytes += inc;
> +	}
>
> 	close(fd);
>
> diff --git a/ust-consumerd/ust-consumerd.c b/ust-consumerd/ust-consumerd.c
> index ce2ee40..c961394 100644
> --- a/ust-consumerd/ust-consumerd.c
> +++ b/ust-consumerd/ust-consumerd.c
> @@ -210,7 +210,11 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
> 		    trace_path, buf->pid, buf->pidunique, buf->name);
> 		return 1;
> 	}
> +again:
> 	result = fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 00600);
> +	if (result == -1 && errno == EINTR)
> +		goto again;
> +
> 	if(result == -1) {
> 		PERROR("open");
> 		ERR("failed opening trace file %s", tmp);
> @@ -225,7 +229,12 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
> int on_close_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
> {
> 	struct buffer_info_local *buf_local = buf->user_data;
> -	int result = close(buf_local->file_fd);
> +	int result;
> +
> +again:
> +	result = close(buf_local->file_fd);
> +	if (result == -1 && errno == EINTR)
> +		goto again;
> 	free(buf_local);
> 	if(result == -1) {
> 		PERROR("close");
> -- 
> 1.7.1
>
>
> _______________________________________________
> ltt-dev mailing list
> ltt-dev at lists.casi.polymtl.ca
> http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
>




More information about the lttng-dev mailing list