[ltt-dev] Babeltrace PATCH

Mathieu Desnoyers compudj at krystal.dyndns.org
Tue Jul 26 08:41:47 EDT 2011


* Amer Alhalabi (amer.alhalabi at ericsson.com) wrote:
> Babeltrace PATCH: Multiple Directory Conversion
> 
> Code added/modified to merge and convert many CTF binary files located
> in different directories into one ASCII file

It's getting in good shape. A few comments below.

>  
> Signed-off-by: Amer Alhalabi <amer.alhalabi at ericsson.com>
> 
> 
> 
> Index: babeltrace/converter/babeltrace-lib.c
> ===================================================================
> --- babeltrace.orig/converter/babeltrace-lib.c	2011-07-25 10:42:26.000000000 -0400
> +++ babeltrace/converter/babeltrace-lib.c	2011-07-25 12:17:21.202761000 -0400
> @@ -56,38 +56,56 @@
>  		return 0;
>  }
>  
> -int convert_trace(struct trace_descriptor *td_write,
> -		  struct trace_descriptor *td_read)
> +int convert_trace(struct trace_descriptor *td_write)
>  {
> -	struct ctf_trace *tin = container_of(td_read, struct ctf_trace, parent);
> -	struct ctf_text_stream_pos *sout =
> -		container_of(td_write, struct ctf_text_stream_pos, trace_descriptor);
> -	int stream_id;
> +	struct ptr_heap *stream_heap;
> +	struct ctf_text_stream_pos *sout;
> +	int i, stream_id;
>  	int ret = 0;
>  
> -	tin->stream_heap = g_new(struct ptr_heap, 1);
> -	heap_init(tin->stream_heap, 0, stream_compare);
> +	stream_heap = g_new(struct ptr_heap, 1);
> +	heap_init(stream_heap, 0, stream_compare);
> +	sout = container_of(td_write, struct ctf_text_stream_pos,
> +						trace_descriptor);
> +
> +	for (i = 0; i < td_read_arr_count; i++) {
> +		struct ctf_trace *tin;
> +		struct trace_descriptor *td_read;
>  
> -	/* Populate heap with each stream */
> -	for (stream_id = 0; stream_id < tin->streams->len; stream_id++) {
> -		struct ctf_stream_class *stream = g_ptr_array_index(tin->streams, stream_id);
> -		int filenr;
> -
> -		if (!stream)
> +		td_read = td_read_arr[i];
> +		if (!td_read)
>  			continue;

I see that you use realloc to double the size of the array. Note that
this memory is uninitialized (not zeroed). Therefore, this check is
bogus. What I usually do to deal with arrays like this is to keep two
values: the array "size" and the array "allocated size". I use the
"size" to know if I am within the bounds of the array, and the allocated
size to grow the array when the size reaches the allocated size.

> -		for (filenr = 0; filenr < stream->streams->len; filenr++) {
> -			struct ctf_file_stream *file_stream = g_ptr_array_index(stream->streams, filenr);
> -			ret = read_event(file_stream);
> -			if (ret == EOF) {
> -				ret = 0;
> +		tin = container_of(td_read, struct ctf_trace, parent);
> +
> +		/* Populate heap with each stream */
> +		for (stream_id = 0; stream_id < tin->streams->len;
> +				stream_id++) {
> +			struct ctf_stream_class *stream;
> +			int filenr;
> +
> +			stream = g_ptr_array_index(tin->streams, stream_id);
> +			if (!stream)
>  				continue;
> -			} else if (ret)
> -				goto end;
> -			/* Add to heap */
> -			ret = heap_insert(tin->stream_heap, file_stream);
> -			if (ret) {
> -				fprintf(stdout, "[error] Out of memory.\n");
> -				goto end;
> +			for (filenr = 0; filenr < stream->streams->len;
> +					filenr++) {
> +				struct ctf_file_stream *file_stream;
> +
> +				file_stream = g_ptr_array_index(stream->streams,
> +								filenr);
> +
> +				ret = read_event(file_stream);
> +				if (ret == EOF) {
> +					ret = 0;
> +					continue;
> +				} else if (ret)
> +					goto end;
> +				/* Add to heap */
> +				ret = heap_insert(stream_heap, file_stream);
> +				if (ret) {
> +					fprintf(stdout,
> +						"[error] Out of memory.\n");
> +					goto end;
> +				}
>  			}
>  		}
>  	}
> @@ -96,7 +114,7 @@
>  	for (;;) {
>  		struct ctf_file_stream *file_stream, *removed;
>  
> -		file_stream = heap_maximum(tin->stream_heap);
> +		file_stream = heap_maximum(stream_heap);
>  		if (!file_stream) {
>  			/* end of file for all streams */
>  			ret = 0;
> @@ -109,19 +127,19 @@
>  		}
>  		ret = read_event(file_stream);
>  		if (ret == EOF) {
> -			removed = heap_remove(tin->stream_heap);
> +			removed = heap_remove(stream_heap);
>  			assert(removed == file_stream);
>  			ret = 0;
>  			continue;
>  		} else if (ret)
>  			goto end;
>  		/* Reinsert the file stream into the heap, and rebalance. */
> -		removed = heap_replace_max(tin->stream_heap, file_stream);
> +		removed = heap_replace_max(stream_heap, file_stream);
>  		assert(removed == file_stream);
>  	}
>  
>  end:
> -	heap_free(tin->stream_heap);
> -	g_free(tin->stream_heap);
> +	heap_free(stream_heap);
> +	g_free(stream_heap);
>  	return ret;
>  }
> Index: babeltrace/converter/babeltrace.c ===================================================================
> --- babeltrace.orig/converter/babeltrace.c	2011-07-25 10:42:26.000000000 -0400
> +++ babeltrace/converter/babeltrace.c	2011-07-25 14:15:58.806354000 -0400
> @@ -27,6 +27,8 @@
>  #include <sys/stat.h>
>  #include <sys/types.h>
>  #include <fcntl.h>
> +#include <ftw.h>
> +#include <dirent.h>
>  
>  static char *opt_input_format;
>  static char *opt_output_format;
> @@ -37,6 +39,13 @@
>  int babeltrace_verbose, babeltrace_debug;  int opt_field_names;
>  
> +/* max number of items in td_arr_read[] */ static int td_read_arr_size;
> +/* the count of trace_descriptors in td_read_arr[] */ int 
> +td_read_arr_count; struct trace_descriptor **td_read_arr; static struct 
> +format *fmt_read;
> +

for some reason the comments and code got mixed up here.

>  void strlower(char *str)
>  {
>  	while (*str) {
> @@ -149,11 +158,95 @@
>  	return ret;
>  }
>  
> +/*
> + * this function increases (doubles) the size of td_read_arr
> + * it returns -1 in case of memory (re)allocation error
> + * it return 0 for success
> + */
> +static int increase_size()

I prefer explicit (void) in function prototypes.

In any case, since we use the glib, you should use "GArray" instead, it
does exactly what you are trying to achieve. Otherwise, if you really
want to reimplement an array, then we should extract it into a separate
"library" file.

> +{
> +	struct trace_descriptor **tmp;
> +
> +	tmp = realloc(td_read_arr, 2 * td_read_arr_size *
> +			sizeof(struct trace_descriptor *));
> +	if (tmp == NULL)
> +		return -1;
> +	td_read_arr_size = td_read_arr_size * 2;
> +	td_read_arr = tmp;
> +	return 0;
> +}
> +
> +/*
> + * cleanup() closes the opened traces for read
> + * and free the memory allocated for td_read_arr  */ static void 
> +cleanup() {

(void) and comment vs code got mixed up.
Also, always start function definitions "{" on a new line.

You could name this "destroy_array" instead of "cleanup".

> +	int i;
> +	for (i = 0; i < td_read_arr_count; i++) {
> +		if (td_read_arr[i])
> +			fmt_read->close_trace(td_read_arr[i]);
> +	}
> +	if (td_read_arr)
> +		free(td_read_arr);
> +}
> +
> +/*
> + * traverse_dir() is the callback functiion for File Tree Walk (nftw).
> + * it receives the path of the current entry (file, dir, link..etc) 
> +with
> + * a flag to indicate the type of the entry.
> + * if the entry being visited is a directory and contains a metadata 
> +file,
> + * then open it for reading and save a trace_descriptor to that 
> +directory
> + * in td_read_arr[]

Problem with comment indentation here.

> + */
> +static int traverse_dir(const char *fpath, const struct stat *sb,
> +						int tflag, struct FTW *ftwbuf)
> +{
> +	if (tflag == FTW_D) {

Instead of

 if (tflag == FTW_D) {
    ... all the code ...
 }

You could save an indentation level with:

 if (tflag != FTW_D)
   return 0;

 ... rest of code ...

> +		int dirfd;
> +		int fd;
> +		struct trace_descriptor *td_read;
> +		int ret = 0;
> +
> +		dirfd = open(fpath, 0);
> +		if (dirfd < 0) {
> +			fprintf(stdout, "[error] unable to open trace "
> +				"directory file descriptor.\n");
> +			return -1;
> +		}
> +		fd = openat(dirfd, "metadata", O_RDONLY);
> +		if (fd < 0) {
> +			close(dirfd);
> +		} else {
> +			close(fd);
> +			close(dirfd);
> +			td_read = fmt_read->open_trace(fpath, O_RDONLY);
> +			if (!td_read) {
> +				fprintf(stdout, "Error opening trace \"%s\" "
> +						"for reading.\n\n", fpath);
> +				return -1;	/* error */
> +			}
> +			if (td_read_arr_count >= td_read_arr_size) {
> +				ret = increase_size();
> +				if (ret != 0)
> +					return -1;	/* error */
> +			}
> +			td_read_arr[td_read_arr_count++] = td_read;
> +		}
> +	}
> +	return 0;	/* success */
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	int ret;
> -	struct format *fmt_read, *fmt_write;
> -	struct trace_descriptor *td_read, *td_write;
> +	struct format *fmt_write;
> +	struct trace_descriptor *td_write;
> +
> +	/* do some initialization */
> +	td_read_arr_size = 1024;

I would be tempted to start with low values (e.g. 1) to test your
reallocation code. The slight performance loss in the common case is not
as important as making sure the code does not fail on large number of
files.

Also, please add a

#define DEFAULT_FILE_ARRAY_SIZE	1

at the beginning of the c file.

> +	td_read_arr_count = 0;
> +	td_read_arr = NULL;
>  

Those 3 values (and also an array allocated size) should be within a
single structure, as they logically belong to the same "object".

>  	ret = parse_options(argc, argv);
>  	if (ret < 0) {
> @@ -196,12 +289,30 @@
>  		exit(EXIT_FAILURE);
>  	}
>  
> -	td_read = fmt_read->open_trace(opt_input_path, O_RDONLY);
> -	if (!td_read) {
> +	/*
> +	 * first allocate memomry for td_read_arr .
> +	 * pass the input path to nftw() .
> +	 * specify traverse_dir() as the callback function.
> +	 * depth = 10 which is the max number of file descriptors
> +	 * that nftw() can open at a given time.
> +	 * flags = 0  check nftw documentation for more info .
> +	 */
> +	ret = increase_size();
> +	if (ret != 0) {
> +		fprintf(stdout, "[error] out of memory.\n");
> +		goto error_td_read;
> +	}
> +	ret = nftw(opt_input_path, traverse_dir, 10, 0);
> +	if (ret != 0) {
>  		fprintf(stdout, "[error] opening trace \"%s\" for reading.\n\n",
>  			opt_input_path);
>  		goto error_td_read;
>  	}
> +	if (td_read_arr_count == 0) {
> +		fprintf(stdout, "[warning] no metadata file was found."
> +						" no output was generated\n");
> +		return 0;
> +	}
>  
>  	td_write = fmt_write->open_trace(opt_output_path, O_RDWR);
>  	if (!td_write) {
> @@ -210,21 +321,24 @@
>  		goto error_td_write;
>  	}
>  
> -	ret = convert_trace(td_write, td_read);
> +	ret = convert_trace(td_write);
>  	if (ret) {
>  		fprintf(stdout, "Error printing trace.\n\n");
>  		goto error_copy_trace;
>  	}
>  
>  	fmt_write->close_trace(td_write);
> -	fmt_read->close_trace(td_read);
> +	cleanup();
> +	fprintf(stdout, "finished converting. output written to:\n%s\n",
> +			opt_output_path);
>  	exit(EXIT_SUCCESS);
>  
>  	/* Error handling */
>  error_copy_trace:
>  	fmt_write->close_trace(td_write);
>  error_td_write:
> -	fmt_read->close_trace(td_read);
> +	cleanup();
>  error_td_read:
> +	cleanup();

We have two cleanups called here. Looks like a bug.

Thanks,

Mathieu

>  	exit(EXIT_FAILURE);
>  }
> Index: babeltrace/include/babeltrace/babeltrace.h
> ===================================================================
> --- babeltrace.orig/include/babeltrace/babeltrace.h	2011-07-25 10:46:11.000000000 -0400
> +++ babeltrace/include/babeltrace/babeltrace.h	2011-07-25 10:48:37.752245000 -0400
> @@ -7,6 +7,8 @@
>  #define BABELTRACE_VERSION_MINOR	1
>  
>  extern int babeltrace_verbose, babeltrace_debug;
> +extern int td_read_arr_count;
> +extern struct trace_descriptor **td_read_arr;
>  
>  #define printf_verbose(fmt, args...)				\
>  	do {							\
> @@ -22,8 +24,7 @@
>  
>  struct trace_descriptor;
>  
> -int convert_trace(struct trace_descriptor *td_write,
> -		  struct trace_descriptor *td_read);
> +int convert_trace(struct trace_descriptor *td_write);
>  
>  extern int opt_field_names;
>  


> _______________________________________________
> ltt-dev mailing list
> ltt-dev at lists.casi.polymtl.ca
> http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev


-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com




More information about the lttng-dev mailing list