[lttng-dev] [PATCH lttng-tools] Fix: Synchronization issue for data available command

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Thu Oct 25 18:05:32 EDT 2012


* David Goulet (dgoulet at efficios.com) wrote:
> 
> 
> Mathieu Desnoyers:
> > * David Goulet (dgoulet at efficios.com) wrote:
> >> Signed-off-by: David Goulet <dgoulet at efficios.com>
> >> ---
> >>  src/common/consumer.c                        |   66 +++++++++++++++++++++++---
> >>  src/common/kernel-consumer/kernel-consumer.c |   19 ++------
> >>  src/common/ust-consumer/ust-consumer.c       |   19 ++------
> >>  3 files changed, 66 insertions(+), 38 deletions(-)
> >>
> >> diff --git a/src/common/consumer.c b/src/common/consumer.c
> >> index e61a227..cf0e715 100644
> >> --- a/src/common/consumer.c
> >> +++ b/src/common/consumer.c
> >> @@ -2657,6 +2657,33 @@ error:
> >>  }
> >>  
> >>  /*
> >> + * Try to lock the stream mutex.
> >> + *
> >> + * On success, 1 is returned else 0 indicating that the mutex is NOT lock.
> >> + */
> >> +static int stream_try_lock(struct lttng_consumer_stream *stream)
> >> +{
> >> +	int ret;
> >> +
> >> +	assert(stream);
> >> +
> >> +	/*
> >> +	 * Try to lock the stream mutex. On failure, we know that the stream is
> >> +	 * being used else where hence there is data still being extracted.
> >> +	 */
> >> +	ret = pthread_mutex_trylock(&stream->lock);
> >> +	if (ret == EBUSY) {
> >> +		ret = 0;
> >> +		goto end;
> >> +	}
> > 
> > what are we doing with the other errors ?
> > 
> 
> The only other error is EINVAL and is basically impossible to get at
> this stage since the stream is certain to be valid.

The whole point of testing error values is to test all of them, not to
"assume that at this point this error value cannot happen". Please fix.

Thanks,

Mathieu


> 
> >> +
> >> +	ret = 1;
> >> +
> >> +end:
> >> +	return ret;
> >> +}
> >> +
> >> +/*
> >>   * Check if for a given session id there is still data needed to be extract
> >>   * from the buffers.
> >>   *
> >> @@ -2696,17 +2723,43 @@ int consumer_data_available(uint64_t id)
> >>  			ht->hash_fct((void *)((unsigned long) id), 0x42UL),
> >>  			ht->match_fct, (void *)((unsigned long) id),
> >>  			&iter.iter, stream, node_session_id.node) {
> >> -		/* Check the stream for data. */
> >> -		ret = data_available(stream);
> >> -		if (ret == 0) {
> >> +		/* If this call fails, the stream is being used hence data pending. */
> >> +		ret = stream_try_lock(stream);
> >> +		if (!ret) {
> >>  			goto data_not_available;
> >>  		}
> >>  
> >> +		/*
> >> +		 * A removed node from the hash table indicates that the stream has
> >> +		 * been deleted thus having a guarantee that the buffers are closed
> >> +		 * on the consumer side. However, data can still be transmitted
> >> +		 * over the network so don't skip the relayd check.
> >> +		 */
> >> +		ret = cds_lfht_is_node_deleted(&stream->node.node);
> >> +		if (!ret) {
> >> +			/* Check the stream if there is data in the buffers. */
> >> +			ret = data_available(stream);
> >> +			if (ret == 0) {
> >> +				pthread_mutex_unlock(&stream->lock);
> >> +				goto data_not_available;
> >> +			}
> >> +		}
> >> +
> >> +		/* Relayd check */
> >>  		if (stream->net_seq_idx != -1) {
> >>  			relayd = consumer_find_relayd(stream->net_seq_idx);
> >> -			assert(relayd);
> >> +			if (!relayd) {
> >> +				/*
> >> +				 * At this point, if the relayd object is not available for the
> >> +				 * given stream, it is because the relayd is being cleanup so
> > 
> > cleanup -> cleaned up
> > 
> >> +				 * every stream associated with it (for a session id value) are
> >> +				 * or wil be marked for deletion hence not having data pending
> > 
> > hence not having -> hence do not have
> > 
> > 
> > wil -> will
> > 
> >> +				 * anymore.
> >> +				 */
> >> +				pthread_mutex_unlock(&stream->lock);
> >> +				goto data_not_available;
> >> +			}
> >>  
> >> -			pthread_mutex_lock(&stream->lock);
> >>  			pthread_mutex_lock(&relayd->ctrl_sock_mutex);
> >>  			if (stream->metadata_flag) {
> >>  				ret = relayd_quiescent_control(&relayd->control_sock);
> >> @@ -2715,11 +2768,12 @@ int consumer_data_available(uint64_t id)
> >>  						stream->relayd_stream_id, stream->next_net_seq_num);
> >>  			}
> >>  			pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
> >> -			pthread_mutex_unlock(&stream->lock);
> >>  			if (ret == 0) {
> >> +				pthread_mutex_unlock(&stream->lock);
> > 
> > dependency (lock nesting order) between 
> > 
> >                       pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
> >                       pthread_mutex_unlock(&stream->lock);
> > 
> > should be documented.
> 
> I'll add another patch on top of this one, like we discussed, that
> documents the whole locking scheme of this file.
> 
> Thanks!
> David
> 
> > 
> > Thanks,
> > 
> > Mathieu
> > 
> > 
> >>  				goto data_not_available;
> >>  			}
> >>  		}
> >> +		pthread_mutex_unlock(&stream->lock);
> >>  	}
> >>  
> >>  	/*
> >> diff --git a/src/common/kernel-consumer/kernel-consumer.c b/src/common/kernel-consumer/kernel-consumer.c
> >> index 249df8a..196deee 100644
> >> --- a/src/common/kernel-consumer/kernel-consumer.c
> >> +++ b/src/common/kernel-consumer/kernel-consumer.c
> >> @@ -485,7 +485,8 @@ error:
> >>  
> >>  /*
> >>   * Check if data is still being extracted from the buffers for a specific
> >> - * stream. Consumer data lock MUST be acquired before calling this function.
> >> + * stream. Consumer data lock MUST be acquired before calling this function
> >> + * and the stream lock.
> >>   *
> >>   * Return 0 if the traced data are still getting read else 1 meaning that the
> >>   * data is available for trace viewer reading.
> >> @@ -496,31 +497,17 @@ int lttng_kconsumer_data_available(struct lttng_consumer_stream *stream)
> >>  
> >>  	assert(stream);
> >>  
> >> -	/*
> >> -	 * Try to lock the stream mutex. On failure, we know that the stream is
> >> -	 * being used else where hence there is data still being extracted.
> >> -	 */
> >> -	ret = pthread_mutex_trylock(&stream->lock);
> >> -	if (ret == EBUSY) {
> >> -		/* Data not available */
> >> -		ret = 0;
> >> -		goto end;
> >> -	}
> >> -	/* The stream is now locked so we can do our ustctl calls */
> >> -
> >>  	ret = kernctl_get_next_subbuf(stream->wait_fd);
> >>  	if (ret == 0) {
> >>  		/* There is still data so let's put back this subbuffer. */
> >>  		ret = kernctl_put_subbuf(stream->wait_fd);
> >>  		assert(ret == 0);
> >> -		goto end_unlock;
> >> +		goto end;
> >>  	}
> >>  
> >>  	/* Data is available to be read for this stream. */
> >>  	ret = 1;
> >>  
> >> -end_unlock:
> >> -	pthread_mutex_unlock(&stream->lock);
> >>  end:
> >>  	return ret;
> >>  }
> >> diff --git a/src/common/ust-consumer/ust-consumer.c b/src/common/ust-consumer/ust-consumer.c
> >> index e8e3f93..4d3671a 100644
> >> --- a/src/common/ust-consumer/ust-consumer.c
> >> +++ b/src/common/ust-consumer/ust-consumer.c
> >> @@ -526,7 +526,8 @@ error:
> >>  
> >>  /*
> >>   * Check if data is still being extracted from the buffers for a specific
> >> - * stream. Consumer data lock MUST be acquired before calling this function.
> >> + * stream. Consumer data lock MUST be acquired before calling this function
> >> + * and the stream lock.
> >>   *
> >>   * Return 0 if the traced data are still getting read else 1 meaning that the
> >>   * data is available for trace viewer reading.
> >> @@ -539,31 +540,17 @@ int lttng_ustconsumer_data_available(struct lttng_consumer_stream *stream)
> >>  
> >>  	DBG("UST consumer checking data availability");
> >>  
> >> -	/*
> >> -	 * Try to lock the stream mutex. On failure, we know that the stream is
> >> -	 * being used else where hence there is data still being extracted.
> >> -	 */
> >> -	ret = pthread_mutex_trylock(&stream->lock);
> >> -	if (ret == EBUSY) {
> >> -		/* Data not available */
> >> -		ret = 0;
> >> -		goto end;
> >> -	}
> >> -	/* The stream is now locked so we can do our ustctl calls */
> >> -
> >>  	ret = ustctl_get_next_subbuf(stream->chan->handle, stream->buf);
> >>  	if (ret == 0) {
> >>  		/* There is still data so let's put back this subbuffer. */
> >>  		ret = ustctl_put_subbuf(stream->chan->handle, stream->buf);
> >>  		assert(ret == 0);
> >> -		goto end_unlock;
> >> +		goto end;
> >>  	}
> >>  
> >>  	/* Data is available to be read for this stream. */
> >>  	ret = 1;
> >>  
> >> -end_unlock:
> >> -	pthread_mutex_unlock(&stream->lock);
> >>  end:
> >>  	return ret;
> >>  }
> >> -- 
> >> 1.7.10.4
> >>
> >>
> >> _______________________________________________
> >> lttng-dev mailing list
> >> lttng-dev at lists.lttng.org
> >> http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
> > 

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com



More information about the lttng-dev mailing list