[ltt-dev] [UST PATCH 1/3] ust-consumerd: fix exit race crashes
Nils Carlson
nils.carlson at ericsson.com
Thu Apr 28 09:20:18 EDT 2011
merged.
On Wed, 27 Apr 2011, Jason Wessel wrote:
> The ust-consumerd gets shutdown by the SIGTERM signal and a number of
> places in the ust-consumerd did not properly deal with the case where
> a system call returns EINTR in errno as a result of a signal to the
> process. The failure to handle EINTR properly was leading to some
> data corruption in the buffer code and causing some random "victim"
> crashes in lowlevel.c
>
> The way all the offending functions were tracked down was to
> temporarily add an abort() in the SIGTERM signal handler. Then it was
> a matter of looking at what threads were blocked on system calls at
> the time outside of the thread that received the signal.
>
> Signed-off-by: Jason Wessel <jason.wessel at windriver.com>
> ---
> libustconsumer/libustconsumer.c | 25 +++++++++++++++++++------
> ust-consumerd/ust-consumerd.c | 11 ++++++++++-
> 2 files changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/libustconsumer/libustconsumer.c b/libustconsumer/libustconsumer.c
> index c5acffa..6f6d4bb 100644
> --- a/libustconsumer/libustconsumer.c
> +++ b/libustconsumer/libustconsumer.c
> @@ -477,6 +477,8 @@ int consumer_loop(struct ustconsumer_instance *instance, struct buffer_info *buf
> DBG("App died while being traced");
> finish_consuming_dead_subbuffer(instance->callbacks, buf);
> break;
> + } else if (read_result == -1 && errno == EINTR) {
> + continue;
> }
>
> if(instance->callbacks->on_read_subbuffer)
> @@ -783,8 +785,11 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
>
> struct sockaddr_un addr;
>
> +socket_again:
> result = fd = socket(PF_UNIX, SOCK_STREAM, 0);
> if(result == -1) {
> + if (errno == EINTR)
> + goto socket_again;
> PERROR("socket");
> return 1;
> }
> @@ -794,13 +799,21 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
> strncpy(addr.sun_path, instance->sock_path, UNIX_PATH_MAX);
> addr.sun_path[UNIX_PATH_MAX-1] = '\0';
>
> - result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
> - if(result == -1) {
> - PERROR("connect");
> - }
> +connect_again:
> + result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
> + if(result == -1) {
> + if (errno == EINTR)
> + goto connect_again;
> + PERROR("connect");
> + }
>
> - while(bytes != sizeof(msg))
> - bytes += send(fd, msg, sizeof(msg), 0);
> + while(bytes != sizeof(msg)) {
> + int inc = send(fd, msg, sizeof(msg), 0);
> + if (inc < 0 && errno != EINTR)
> + break;
> + else
> + bytes += inc;
> + }
>
> close(fd);
>
> diff --git a/ust-consumerd/ust-consumerd.c b/ust-consumerd/ust-consumerd.c
> index ce2ee40..c961394 100644
> --- a/ust-consumerd/ust-consumerd.c
> +++ b/ust-consumerd/ust-consumerd.c
> @@ -210,7 +210,11 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
> trace_path, buf->pid, buf->pidunique, buf->name);
> return 1;
> }
> +again:
> result = fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 00600);
> + if (result == -1 && errno == EINTR)
> + goto again;
> +
> if(result == -1) {
> PERROR("open");
> ERR("failed opening trace file %s", tmp);
> @@ -225,7 +229,12 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
> int on_close_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
> {
> struct buffer_info_local *buf_local = buf->user_data;
> - int result = close(buf_local->file_fd);
> + int result;
> +
> +again:
> + result = close(buf_local->file_fd);
> + if (result == -1 && errno == EINTR)
> + goto again;
> free(buf_local);
> if(result == -1) {
> PERROR("close");
> --
> 1.7.1
>
>
> _______________________________________________
> ltt-dev mailing list
> ltt-dev at lists.casi.polymtl.ca
> http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
>
More information about the lttng-dev
mailing list