[ltt-dev] [UST PATCH] Make libustctl list only online pids

Mathieu Desnoyers compudj at krystal.dyndns.org
Mon May 16 10:23:43 EDT 2011


* Nils Carlson (nils.carlson at ericsson.com) wrote:
> Previously libustctl would list all pids. This patch changes this
> so only online pids are listed. This is done by appending on each
> socket name the mtime from the proc/<pid> directory. This way a
> socket can be checked to see if the appended mtime matches the
> mtime of the proc dir for the current pid, thus allowing us to
> distinguish between old and new socket files.

Hi Nils,

I'm OK with the approach. I have a few cosmetic comments below,

> 
> Signed-off-by: Nils Carlson <nils.carlson at ericsson.com>
> ---
>  libust/tracectl.c     |    9 +++-
>  libustcomm/ustcomm.c  |  150 ++++++++++++++++++++++++++++++++++++++++++++-----
>  libustcomm/ustcomm.h  |    7 ++
>  libustctl/libustctl.c |   18 +-----
>  4 files changed, 153 insertions(+), 31 deletions(-)
> 
> diff --git a/libust/tracectl.c b/libust/tracectl.c
> index bc0a07c..fb9130c 100644
> --- a/libust/tracectl.c
> +++ b/libust/tracectl.c
> @@ -1228,12 +1228,19 @@ static struct ustcomm_sock * init_app_socket(int epoll_fd)
>  	char *dir_name, *sock_name;
>  	int result;
>  	struct ustcomm_sock *sock = NULL;
> +	time_t mtime;
>  
>  	dir_name = ustcomm_user_sock_dir();
>  	if (!dir_name)
>  		return NULL;
>  
> -	result = asprintf(&sock_name, "%s/%d", dir_name, (int)getpid());
> +	mtime = ustcomm_pid_st_mtime(getpid());
> +	if (!mtime) {
> +		goto free_dir_name;
> +	}
> +
> +	result = asprintf(&sock_name, "%s/%d.%ld", dir_name,
> +			  (int)getpid(), (long)mtime);
>  	if (result < 0) {
>  		ERR("string overflow allocating socket name, "
>  		    "UST thread bailing");
> diff --git a/libustcomm/ustcomm.c b/libustcomm/ustcomm.c
> index ed6d8f1..17bd9d9 100644
> --- a/libustcomm/ustcomm.c
> +++ b/libustcomm/ustcomm.c
> @@ -22,6 +22,7 @@
>  #include <sys/types.h>
>  #include <signal.h>
>  #include <errno.h>
> +#include <limits.h>
>  #include <sys/socket.h>
>  #include <sys/un.h>
>  #include <unistd.h>
> @@ -550,6 +551,125 @@ char *ustcomm_user_sock_dir(void)
>  	return sock_dir;
>  }
>  
> +static int time_and_pid_from_socket_name(char *sock_name, unsigned long *time,
> +					 pid_t *pid)
> +{
> +	char *saveptr, *pid_m_time_str;
> +	char *sock_basename = strdup(basename(sock_name));
> +
> +	if (!sock_basename) {
> +		return -1;
> +	}
> +
> +	/* This is the pid */
> +	pid_m_time_str = strtok_r(sock_basename, ".", &saveptr);
> +	if (!pid_m_time_str) {
> +		goto out_err;
> +	}
> +
> +	errno = 0;
> +	*pid = (pid_t)strtoul(pid_m_time_str, NULL, 10);
> +	if (errno) {

Errno is a TLS variable, while the value returned by strtoul is directly
accessible from registers. So I would tend to think that using errno all
the time will bloat our code, causing TLS accesses when not necessary.
And I fear that on systems lacking TLS support (such as Android), they
might simply use a shared variable modified concurrently across threads,
which could be racy. So I would prefer to use the return value of the
functions to trigger the error, and use the errno value only to print
the error message in more details. Otherwise, I'm worried that we might
end up either missing errors or triggering on a false-positive from the
wrong thread.

> +		goto out_err;
> +	}
> +
> +	/* This should be the time-stamp */
> +	pid_m_time_str = strtok_r(NULL, ".", &saveptr);
> +	if (!pid_m_time_str) {
> +		goto out_err;
> +	}
> +
> +	errno = 0;
> +	*time = strtoul(pid_m_time_str, NULL, 10);
> +	if (errno) {
> +		goto out_err;
> +	}
> +
> +	return 0;
> +
> +out_err:
> +	free(sock_basename);
> +	return -1;
> +}
> +
> +time_t ustcomm_pid_st_mtime(pid_t pid)
> +{
> +	struct stat proc_stat;
> +	char proc_name[PATH_MAX + 1];
> +
> +	if (snprintf(proc_name, PATH_MAX, "/proc/%ld", (long)pid) < 0) {
> +		return 0;
> +	}
> +
> +	if (stat(proc_name, &proc_stat)) {
> +		return 0;
> +	}
> +
> +	return proc_stat.st_mtime;
> +}
> +
> +int ustcomm_is_socket_live(char *sock_name, pid_t *read_pid)
> +{
> +	time_t time_from_pid;
> +	unsigned long time_from_sock;
> +	pid_t pid;
> +
> +	if (time_and_pid_from_socket_name(sock_name, &time_from_sock, &pid)) {
> +		return 0;
> +	}
> +
> +	if (read_pid) {
> +		*read_pid = pid;
> +	}
> +
> +	time_from_pid = ustcomm_pid_st_mtime(pid);
> +	if (!time_from_pid) {
> +		return 0;
> +	}
> +
> +	if (((unsigned long)time_from_pid) == time_from_sock) {

Please use: (unsigned long) time_from_pid  (with the space) (typical in
kernel coding style, which we try to follow).

> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ustcomm_get_sock_name(char *dir_name, pid_t pid, char *sock_name)
> +{
> +	struct dirent *dirent;
> +	char sock_path_base[101];
> +	int len;
> +	DIR *dir = opendir(dir_name);
> +
> +	snprintf(sock_path_base, 100, "%ld.", (long)pid);
> +	len = strlen(sock_path_base);
> +
> +	while ((dirent = readdir(dir))) {
> +		if (!strcmp(dirent->d_name, ".") ||
> +		    !strcmp(dirent->d_name, "..") ||
> +		    !strcmp(dirent->d_name, "ust-consumer") ||
> +		    dirent->d_type == DT_DIR ||
> +		    strncmp(dirent->d_name, sock_path_base, len)) {
> +			continue;
> +		}
> +
> +		if (ustcomm_is_socket_live(dirent->d_name, NULL)) {
> +			printf("found %s\n", dirent->d_name);

Should this message be only printed in verbose mode ?

> +			if (snprintf(sock_name, PATH_MAX, "%s/%s",
> +				     dir_name, dirent->d_name) < 0) {
> +				PERROR("path longer than PATH_MAX?");
> +				goto out_err;
> +			}
> +			closedir(dir);
> +			return 0;
> +		}
> +	}
> +
> +out_err:
> +	closedir(dir);
> +	return -1;
> +}
> +
>  /* Open a connection to a traceable app.
>   *
>   * Return value:
> @@ -561,16 +681,16 @@ static int connect_app_non_root(pid_t pid, int *app_fd)
>  {
>  	int result;
>  	int retval = 0;
> -	char *dir_name, *sock_name;
> +	char *dir_name;
> +	char sock_name[PATH_MAX + 1];
> +

Extra whiteline.

>  
>  	dir_name = ustcomm_user_sock_dir();
>  	if (!dir_name)
>  		return -ENOMEM;
>  
> -	result = asprintf(&sock_name, "%s/%d", dir_name, pid);
> -	if (result < 0) {
> -		ERR("failed to allocate socket name");
> -		retval = -1;
> +	if (ustcomm_get_sock_name(dir_name, pid, sock_name)) {
> +		retval = -ENOENT;
>  		goto free_dir_name;
>  	}
>  
> @@ -578,11 +698,9 @@ static int connect_app_non_root(pid_t pid, int *app_fd)
>  	if (result < 0) {
>  		ERR("failed to connect to app");
>  		retval = -1;
> -		goto free_sock_name;
> +		goto free_dir_name;
>  	}
>  
> -free_sock_name:
> -	free(sock_name);
>  free_dir_name:
>  	free(dir_name);
>  
> @@ -595,8 +713,8 @@ static int connect_app_root(pid_t pid, int *app_fd)
>  {
>  	DIR *tmp_dir;
>  	struct dirent *dirent;
> -	char *sock_name;
> -	int result;
> +	char dir_name[PATH_MAX + 1], sock_name[PATH_MAX + 1];

If you look at unistd.h:

/* Put the absolute pathname of the current working directory in BUF.
   If successful, return BUF.  If not, put an error message in
   BUF and return NULL.  BUF should be at least PATH_MAX bytes long.  */
extern char *getwd (char *__buf)
     __THROW __nonnull ((1)) __attribute_deprecated__ __wur;

So the usual approach is that the buffer is expected to be PATH_MAX
wide, and this is the operations using it (e.g. snprintf, strncpy) that
should copy at most PATH_MAX - 1 elements to keep room for the \0. It
would be good to standardize all our code on this, because it will make
passing path information around between various functions safer.

> +	int result = -1;
>  
>  	tmp_dir = opendir(USER_TMP_DIR);
>  	if (!tmp_dir) {
> @@ -607,14 +725,16 @@ static int connect_app_root(pid_t pid, int *app_fd)
>  		if (!strncmp(dirent->d_name, USER_SOCK_DIR_BASE,
>  			     strlen(USER_SOCK_DIR_BASE))) {
>  
> -			if (asprintf(&sock_name, USER_TMP_DIR "/%s/%u",
> -				     dirent->d_name, pid) < 0) {
> -				goto close_tmp_dir;
> +			if (snprintf(dir_name, PATH_MAX, "%s/%s", USER_TMP_DIR,
> +				     dirent->d_name) < 0) {
> +				continue;
>  			}
>  
> -			result = ustcomm_connect_path(sock_name, app_fd);
> +			if (ustcomm_get_sock_name(dir_name, pid, sock_name)) {
> +				continue;
> +			}
>  
> -			free(sock_name);
> +			result = ustcomm_connect_path(sock_name, app_fd);
>  
>  			if (result == 0) {
>  				goto close_tmp_dir;
> diff --git a/libustcomm/ustcomm.h b/libustcomm/ustcomm.h
> index a91c111..4706b72 100644
> --- a/libustcomm/ustcomm.h
> +++ b/libustcomm/ustcomm.h
> @@ -162,6 +162,13 @@ extern int ustcomm_request_consumer(pid_t pid, const char *channel);
>  
>  /* Returns the current users socket directory, must be freed */
>  extern char *ustcomm_user_sock_dir(void);
> +
> +/* Get the st_m_time from proc*/
> +extern time_t ustcomm_pid_st_mtime(pid_t pid);
> +
> +/* Check that a socket is live */
> +extern int ustcomm_is_socket_live(char *sock_name, pid_t *read_pid);
> +
>  extern int ustcomm_connect_app(pid_t pid, int *app_fd);
>  extern int ustcomm_connect_path(const char *path, int *connection_fd);
>  
> diff --git a/libustctl/libustctl.c b/libustctl/libustctl.c
> index 8b0dfc1..691f88f 100644
> --- a/libustctl/libustctl.c
> +++ b/libustctl/libustctl.c
> @@ -110,7 +110,7 @@ static int get_pids_in_dir(DIR *dir, pid_t **pid_list,
>  			    unsigned int *pid_list_size)
>  {
>  	struct dirent *dirent;
> -	long read_pid;
> +	pid_t read_pid;
>  
>  	while ((dirent = readdir(dir))) {
>  		if (!strcmp(dirent->d_name, ".") ||
> @@ -121,21 +121,9 @@ static int get_pids_in_dir(DIR *dir, pid_t **pid_list,
>  			continue;
>  		}
>  
> -		errno = 0;
> -		read_pid = strtol(dirent->d_name, NULL, 10);
> -		if (errno) {
> -			continue;
> -		}
> -
> -		/*
> -		 * FIXME: Here we previously called pid_is_online, which
> -		 * always returned 1, now I replaced it with just 1.
> -		 * We need to figure out an intelligent way of solving
> -		 * this, maybe connect-disconnect.
> -		 */
> -		if (1) {
> +		if (ustcomm_is_socket_live(dirent->d_name, &read_pid)) {
>  
> -			(*pid_list)[(*pid_list_index)++] = read_pid;
> +			(*pid_list)[(*pid_list_index)++] = (long)read_pid;

Same comment as above about adding a space after the cast. Actually,
this examplify why the space is welcome: it helps us distinguish between
the (*pid_list)[] (where the parenthesis are added to enforce priority
of operators) from the (long) read_pid cast.

Thanks!

Mathieu

>  
>  			if (*pid_list_index == *pid_list_size) {
>  				if (realloc_pid_list(pid_list, pid_list_size)) {
> -- 
> 1.7.2.5
> 
> 
> _______________________________________________
> ltt-dev mailing list
> ltt-dev at lists.casi.polymtl.ca
> http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
> 

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com




More information about the lttng-dev mailing list