[ltt-dev] [UST PATCH 1/3] ust-consumerd: fix exit race crashes

Jason Wessel jason.wessel at windriver.com
Wed Apr 27 16:22:14 EDT 2011


The ust-consumerd gets shutdown by the SIGTERM signal and a number of
places in the ust-consumerd did not properly deal with the case where
a system call returns EINTR in errno as a result of a signal to the
process.  The failure to handle EINTR properly was leading to some
data corruption in the buffer code and causing some random "victim"
crashes in lowlevel.c

The way all the offending functions were tracked down was to
temporarily add an abort() in the SIGTERM signal handler.  Then it was
a matter of looking at what threads were blocked on system calls at
the time outside of the thread that received the signal.

Signed-off-by: Jason Wessel <jason.wessel at windriver.com>
---
 libustconsumer/libustconsumer.c |   25 +++++++++++++++++++------
 ust-consumerd/ust-consumerd.c   |   11 ++++++++++-
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/libustconsumer/libustconsumer.c b/libustconsumer/libustconsumer.c
index c5acffa..6f6d4bb 100644
--- a/libustconsumer/libustconsumer.c
+++ b/libustconsumer/libustconsumer.c
@@ -477,6 +477,8 @@ int consumer_loop(struct ustconsumer_instance *instance, struct buffer_info *buf
 			DBG("App died while being traced");
 			finish_consuming_dead_subbuffer(instance->callbacks, buf);
 			break;
+		} else if (read_result == -1 && errno == EINTR) {
+			continue;
 		}
 
 		if(instance->callbacks->on_read_subbuffer)
@@ -783,8 +785,11 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
 
 	struct sockaddr_un addr;
 
+socket_again:
 	result = fd = socket(PF_UNIX, SOCK_STREAM, 0);
 	if(result == -1) {
+		if (errno == EINTR)
+			goto socket_again;
 		PERROR("socket");
 		return 1;
 	}
@@ -794,13 +799,21 @@ int ustconsumer_stop_instance(struct ustconsumer_instance *instance, int send_ms
 	strncpy(addr.sun_path, instance->sock_path, UNIX_PATH_MAX);
 	addr.sun_path[UNIX_PATH_MAX-1] = '\0';
 
-	result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
-	if(result == -1) {
-		PERROR("connect");
-	}
+connect_again:
+		result = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+		if(result == -1) {
+			if (errno == EINTR)
+				goto connect_again;
+			PERROR("connect");
+		}
 
-	while(bytes != sizeof(msg))
-		bytes += send(fd, msg, sizeof(msg), 0);
+	while(bytes != sizeof(msg)) {
+		int inc = send(fd, msg, sizeof(msg), 0);
+		if (inc < 0 && errno != EINTR)
+			break;
+		else
+			bytes += inc;
+	}
 
 	close(fd);
 
diff --git a/ust-consumerd/ust-consumerd.c b/ust-consumerd/ust-consumerd.c
index ce2ee40..c961394 100644
--- a/ust-consumerd/ust-consumerd.c
+++ b/ust-consumerd/ust-consumerd.c
@@ -210,7 +210,11 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
 		    trace_path, buf->pid, buf->pidunique, buf->name);
 		return 1;
 	}
+again:
 	result = fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 00600);
+	if (result == -1 && errno == EINTR)
+		goto again;
+
 	if(result == -1) {
 		PERROR("open");
 		ERR("failed opening trace file %s", tmp);
@@ -225,7 +229,12 @@ int on_open_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
 int on_close_buffer(struct ustconsumer_callbacks *data, struct buffer_info *buf)
 {
 	struct buffer_info_local *buf_local = buf->user_data;
-	int result = close(buf_local->file_fd);
+	int result;
+
+again:
+	result = close(buf_local->file_fd);
+	if (result == -1 && errno == EINTR)
+		goto again;
 	free(buf_local);
 	if(result == -1) {
 		PERROR("close");
-- 
1.7.1





More information about the lttng-dev mailing list