[ltt-dev] Babeltrace Update - Convert and Merge The Content of Multiple Subdirectories

Tue Jul 19 11:43:34 EDT 2011

* Amer Alhalabi (amer.alhalabi at ericsson.com) wrote:
> 
> 
> 
> Hello Guys,
> 
> I have updated babeltrace so that it can convert multiple
> subdirectories (given the path of the parent directory) at the same
> time and merge the events into one ASCII file.
> 
> Small changes have been made to babeltrace.c and babeltrace-lib.c
> --see attachement
> 
> Please have a look at them and lemme know what you think.  (Later on I
> might do some other scalability imporvements)

Hi Amer,

Thanks for submitting this. A few points for next time: please generate
a diff of the changes, and submit it directly in the email body (without
formatting). This helps accelerate the review process. Meanwhile, I
generated the diff from your code for review below,

> diff --git a/converter/babeltrace-lib.c b/converter/babeltrace-lib.c
> index 6cc2b7b..10f7544 100644
> --- a/converter/babeltrace-lib.c
> +++ b/converter/babeltrace-lib.c
> @@ -56,49 +56,58 @@ int stream_compare(void *a, void *b)
>  		return 0;
>  }
>  
> -int convert_trace(struct trace_descriptor *td_write,
> -		  struct trace_descriptor *td_read)
> +int convert_trace(struct trace_descriptor *td_write, struct trace_descriptor **td_read_arr, int td_read_arr_index)
>  {
> -	struct ctf_trace *tin = container_of(td_read, struct ctf_trace, parent);
> -	struct ctf_text_stream_pos *sout =
> -		container_of(td_write, struct ctf_text_stream_pos, trace_descriptor);
> -	int stream_id;
> -	int ret = 0;
> +	// create a heap to the hold the streams from the td_read trace_descriptors saved in td_read_arr

Please avoid comments with "//". Always use /*  */ for single-line
commands, and

/*
 * blah blah
 * blah
 */

for multi-line comments. I follow quite stricly the Linux kernel coding
style (Documentation/CodingStyle of the Linux kernel source tree). You
can verify that your patch follows the coding style with the program
scripts/checkpatch.pl in the Linux kernel sources.

> +	struct ptr_head *global_heap;

Please add whiteline between declarations and code.

> +	global_heap = g_new(struct ptr_heap, 1);

Don't call a local variable "global", this is confusing.

> +	heap_init(global_heap, 0, stream_compare);
> -	tin->stream_heap = g_new(struct ptr_heap, 1);
> -	heap_init(tin->stream_heap, 0, stream_compare);
> +	struct ctf_text_stream_pos *sout = container_of(td_write, struct ctf_text_stream_pos, trace_descriptor);

Never mix declarations and code, except at the beginning of a new block.

> +	int ret = 0;
>  
> -	/* Populate heap with each stream */
> -	for (stream_id = 0; stream_id < tin->streams->len; stream_id++) {
> -		struct ctf_stream_class *stream = g_ptr_array_index(tin->streams, stream_id);
> -		int filenr;
> +	int i_for;

just "i" would do.

> +	for(i_for=0;i_for<td_read_arr_index;i_for++)

coding style:

for (i = 0; i < td_read_arr_index; i++) {

> +	{
> +		struct trace_descriptor *td_read = td_read_arr[i_for];

-		if (!stream)
+		if(!td_read)

use a space after if, for, switch and while.

 			continue;
> -		for (filenr = 0; filenr < stream->streams->len; filenr++) {
> -			struct ctf_file_stream *file_stream = g_ptr_array_index(stream->streams, filenr);
> -			ret = read_event(file_stream);
> -			if (ret == EOF) {
> -				ret = 0;
> +
> +		struct ctf_trace *tin = container_of(td_read, struct ctf_trace, parent);

No declaration and code mix.

> +		int stream_id;
> +
> +		// Populate heap with each stream
> +		for (stream_id = 0; stream_id < tin->streams->len; stream_id++) {
> +			struct ctf_stream_class *stream = g_ptr_array_index(tin->streams, stream_id);
> +			int filenr;
> +
> +			if (!stream)
>  				continue;
> -			} else if (ret)
> -				goto end;
> -			/* Add to heap */
> -			ret = heap_insert(tin->stream_heap, file_stream);
> -			if (ret) {
> -				fprintf(stdout, "[error] Out of memory.\n");
> -				goto end;
> +			for (filenr = 0; filenr < stream->streams->len; filenr++) {
> +				struct ctf_file_stream *file_stream = g_ptr_array_index(stream->streams, filenr);
> +				ret = read_event(file_stream);
> +				if (ret == EOF) {
> +					ret = 0;
> +					continue;
> +				} else if (ret)
> +					goto end;
> +				// Add to heap
> +				ret = heap_insert(global_heap, file_stream);
> +				if (ret) {
> +					fprintf(stdout, "[error] Out of memory.\n");
> +					goto end;
> +				}
>  			}
>  		}
>  	}
> -
> -	/* Replace heap entries until EOF for each stream (heap empty) */
> +	// Replace heap entries until EOF for each stream (heap empty)
>  	for (;;) {
> -		struct ctf_file_stream *file_stream, *removed;
>  
> -		file_stream = heap_maximum(tin->stream_heap);
> +		struct ctf_file_stream *file_stream, *removed;
> +		file_stream = heap_maximum(global_heap);
>  		if (!file_stream) {
> -			/* end of file for all streams */
> +			// end of file for all streams
>  			ret = 0;
>  			break;
>  		}
> @@ -109,19 +118,22 @@ int convert_trace(struct trace_descriptor *td_write,
>  		}
>  		ret = read_event(file_stream);
>  		if (ret == EOF) {
> -			removed = heap_remove(tin->stream_heap);
> +			//removed = heap_remove(tin->stream_heap);
> +			removed = heap_remove(global_heap);
>  			assert(removed == file_stream);
>  			ret = 0;
>  			continue;
>  		} else if (ret)
>  			goto end;
> -		/* Reinsert the file stream into the heap, and rebalance. */
> -		removed = heap_replace_max(tin->stream_heap, file_stream);
> +		// Reinsert the file stream into the heap, and rebalance.
> +		//removed = heap_replace_max(tin->stream_heap, file_stream);
> +		removed = heap_replace_max(global_heap, file_stream);
>  		assert(removed == file_stream);
>  	}
> -
>  end:
> -	heap_free(tin->stream_heap);
> -	g_free(tin->stream_heap);
> +	//heap_free(tin->stream_heap);
> +	//g_free(tin->stream_heap);
> +	heap_free(global_heap);
> +	g_free(global_heap);
>  	return ret;
>  }
> diff --git a/converter/babeltrace.c b/converter/babeltrace.c
> index dd335ed..96b99bb 100644
> --- a/converter/babeltrace.c
> +++ b/converter/babeltrace.c
> @@ -27,6 +27,10 @@
>  #include <sys/stat.h>
>  #include <sys/types.h>
>  #include <fcntl.h>
> +#include <dirent.h>
> +
> +#define _XOPEN_SOURCE 500
> +#include <ftw.h>
>  
>  static char *opt_input_format;
>  static char *opt_output_format;
> @@ -37,6 +41,12 @@ static const char *opt_output_path;
>  int babeltrace_verbose, babeltrace_debug;
>  int opt_field_names;
>  

variables and functions only referenced within a single C file should be
declared "static", so no symbols pollute outside of the file. This also
lets the compiler do more optimisations, because it is allowed to assume
it has a complete view of interactions for these variables.

> +int td_read_arr_index;  // the index of the last element in the array + 1  ;i.e. position
> +#define td_read_arr_size 1024   // max number of td_read (trace_descriptors) that can be handled ;i.e. Max. num. of subdirectories

Please dynamically grow the array size as needed. I try to avoid these
hardcoded limits as much as possible.

> +struct trace_descriptor *td_read_arr[td_read_arr_size]; // an array of td_read trace_descriptors
> +struct format *fmt_read;

> +
> +
>  void strlower(char *str)
>  {
>  	while (*str) {
> @@ -149,11 +159,68 @@ end:
>  	return ret;
>  }
>  
> +/*
> + * this is the callback function for File Tree Walk (nftw)

Please use at most 80-columns text, and ideally slightly less (e.g. 72
or 76 columns), so when we comment code in email threads 80-columns are
sufficient to view the code.

> + * it receives the path of the current file/dir being visited with a flag to indicate the type of the file/dir
> + * if the file being visited is a directory, then check if it contains a metadata file.
> + * if it does, then open that directory for reading and save a trace_descriptor pointer to that dir in the rd_read_arr
> + */
> +static int traverse_dir(const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf)
> +{
> +	if( tflag == FTW_D )

if () {

> +	{
> +		DIR *dir;
> +		int dirfd;
> +		int fd;
> +
> +		dir = opendir(fpath);
> +		if(!dir){

if () {

> +			fprintf(stdout,"[error] invalid path:%s .\n\n", fpath);
> +			exit(EXIT_FAILURE);

return error code rather than exit ?

> +		}
> +		dirfd = open(fpath, 0);
> +		if (dirfd < 0) {
> +			fprintf(stdout, "[error] unable to open trace directory file descriptor.\n");
> +			exit(EXIT_FAILURE);

return error code rather than exit ?

> +		}
> +
> +		fd = openat(dirfd, "metadata", O_RDONLY);
> +		if( fd < 0){

if () {

> +			close(dirfd);
> +			closedir(dir);
> +		}

} else {

> +		else
> +		{
> +			close(fd);
> +			close(dirfd);
> +			closedir(dir);
> +			struct trace_descriptor *td_read;

mixed declaration and code.

> +			td_read = fmt_read->open_trace(fpath, O_RDONLY);
> +			if (!td_read) {
> +				fprintf(stdout, "[error] opening trace \"%s\" for reading.\n\n", fpath);
> +				exit(EXIT_FAILURE);

return error code rather than exit ?

> +			}
> +			if( td_read_arr_index < td_read_arr_size){

if (td_read_arr_index < td_read_arr_size) {

> +				td_read_arr[td_read_arr_index++] = td_read;
> +			}
> +			else

} else {

> +			{
> +				fprintf(stdout, "[error] out of range\n\n\n");

\n\n\n -> just \n

> +				exit(EXIT_FAILURE);

just return an error code here rather than exit ?

> +			}
> +		}
> +	}
> +    return 0;           /* To tell nftw() to continue */
> +}
> +
> +
>  int main(int argc, char **argv)
>  {
>  	int ret;
> -	struct format *fmt_read, *fmt_write;
> -	struct trace_descriptor *td_read, *td_write;
> +	//struct format *fmt_read, *fmt_write;
> +	struct format *fmt_write;
> +	//struct trace_descriptor *td_read, *td_write;
> +	struct trace_descriptor *td_write;
>  
>  	ret = parse_options(argc, argv);
>  	if (ret < 0) {
> @@ -196,35 +263,67 @@ int main(int argc, char **argv)
>  		exit(EXIT_FAILURE);
>  	}
>  
> +	struct stat st;
> +	if(stat(opt_input_path,&st) != 0)

if (stat(opt_input_path,&st))

> +		perror(opt_input_path);
> +
> +	/* pass the input path (provided by the user from the command line options to nftw function

/*
 * ....

+80 cols.

> +	 * specify traverse_dir() as the callback function
> +	 *  depth=10 which is the number of directories that can be accessed at the same time

not really. This is more precisely the max number of file descriptors nftw will open at
a given time.

> +	 *  flags=0 ; check the documentation of nftw for more info about these flags
> +	 *
> +	 *  after the traversal, if no trace_descriptor is saved in the td_read_Arr , it means that no metadata file was found
> +	 */
> +	td_read_arr_index = 0;
> +	nftw(opt_input_path,traverse_dir, 10, 0);

error code handling ?

> +	if(td_read_arr_index == 0){

if () {

> +		fprintf(stdout, "[warning] no metadata file was found. no output was generated\n");
> +		return 0;
> +	}
> +	/*
>  	td_read = fmt_read->open_trace(opt_input_path, O_RDONLY);
>  	if (!td_read) {
>  		fprintf(stdout, "[error] opening trace \"%s\" for reading.\n\n",
>  			opt_input_path);
>  		goto error_td_read;
>  	}
> +	*/
> +
>  
>  	td_write = fmt_write->open_trace(opt_output_path, O_RDWR);
>  	if (!td_write) {
> -		fprintf(stdout, "Error opening trace \"%s\" for writing.\n\n",
> +		fprintf(stdout, "[error] failed to open trace \"%s\" for writing.\n\n",
>  			opt_output_path ? : "<none>");
>  		goto error_td_write;
>  	}
>  
> -	ret = convert_trace(td_write, td_read);
> -	if (ret) {
> -		fprintf(stdout, "Error printing trace.\n\n");
> +	//ret = convert_trace(td_write, td_read);
> +	ret = convert_trace(td_write, td_read_arr, td_read_arr_index);
> +	if (ret)
>  		goto error_copy_trace;
> -	}
> +
>  
>  	fmt_write->close_trace(td_write);
> -	fmt_read->close_trace(td_read);
> +
> +	int i_for;

mixed declaration and code.

> +	for(i_for=0;i_for<td_read_arr_index;i_for++)

use "i" instead.

for () {

> +	{
> +		//fmt_read->close_trace(td_read);
> +		fmt_read->close_trace(td_read_arr[i_for]);
> +	}
> +
> +	printf("finished converting. output written to:\n%s\n", opt_output_path);
>  	exit(EXIT_SUCCESS);
>  
>  	/* Error handling */
>  error_copy_trace:
>  	fmt_write->close_trace(td_write);
>  error_td_write:
> -	fmt_read->close_trace(td_read);
> +	//fmt_read->close_trace(td_read);
> +	for(i_for=0;i_for<td_read_arr_size;i_for++)

same here.

> +	{
> +		fmt_read->close_trace(td_read_arr[i_for]);
> +	}
>  error_td_read:
>  	exit(EXIT_FAILURE);
>  }

Thanks!

Mathieu

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com