[ltt-dev] [PATCH] Fixing bugs in cluster synchronisation for accuracy algorithm
Mathieu Desnoyers
compudj at krystal.dyndns.org
Mon Sep 13 17:37:08 EDT 2010
* Masoume Jabbarifar (masoume.jabbarifar at polymtl.ca) wrote:
> Quoting Mathieu Desnoyers <compudj at krystal.dyndns.org>:
>
> > * masoume.jabbarifar at polymtl.ca (masoume.jabbarifar at polymtl.ca) wrote:
> > > From: Masoume Jabbarifar <masoume.jabbarifar at polymtl.ca>
> > >
> >
> > Can you document the change in the patch header ?
>
> Once we had indirect path to reference node (more than 2 hops to root node in
> the tree) the recursive function did not work so this patch fixed it. Also a
> field has been added to save each accuracy for each node based on reference
> node.
OK, so please turn this into two separate patches:
one for the bugfix, with the proper comment, and one for the new
accuracy feature, also with appropriate patch comments.
Thanks,
Mathieu
>
> >
> > Also, please CC Benjamin on these changes. I'd like him to have a look
> > at them if he has time.
> >
> > > ---
> > > lttv/lttv/sync/data_structures.h | 2 +-
> > > lttv/lttv/sync/factor_reduction_accuracy.c | 15 ++++++++++++---
> > > lttv/lttv/sync/sync_chain_lttv.c | 18 ++++++++++++------
> > > lttv/lttv/sync/sync_chain_unittest.c | 9 ++++++---
> > > 4 files changed, 31 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/lttv/lttv/sync/data_structures.h
> > b/lttv/lttv/sync/data_structures.h
> > > index 627286c..2206455 100644
> > > --- a/lttv/lttv/sync/data_structures.h
> > > +++ b/lttv/lttv/sync/data_structures.h
> > > @@ -131,7 +131,7 @@ typedef struct
> > > * synchronization */
> > > typedef struct
> > > {
> > > - double drift, offset;
> > > + double drift, offset, accuracyToRefNode;
> >
> > Hrm, I see that the sync code use a different coding style than the rest
> > of lttv. *sight* OK then.
> >
> > For your information, the rest of lttv uses lower cases for variable
> > identifiers, and UpperCase for type identifiers. It also uses
> > lower_case_with_underscore() for function names.
> >
> > > } Factors;
> > >
> > > // Correction factors between trace pairs, to be used for reduction
> > > diff --git a/lttv/lttv/sync/factor_reduction_accuracy.c
> > b/lttv/lttv/sync/factor_reduction_accuracy.c
> > > index a63dae7..bf0546b 100644
> > > --- a/lttv/lttv/sync/factor_reduction_accuracy.c
> > > +++ b/lttv/lttv/sync/factor_reduction_accuracy.c
> > > @@ -415,12 +415,19 @@ static void getFactors(AllFactors* const allFactors,
> > unsigned int** const
> > > unsigned int reference;
> > > PairFactors** const pairFactors= allFactors->pairFactors;
> > >
> > > - reference= references[traceNum];
> > > -
> > > + if (traceNum == references[traceNum])
> > > + {
> > > + reference=traceNum;
> > > + }
> > > + else
> > > + {
> > > + reference = predecessors[references[traceNum]][traceNum];
> > > + }
> > > if (reference == traceNum)
> > > {
> > > factors->offset= 0.;
> > > factors->drift= 1.;
> > > + factors->accuracyToRefNode= 0.;
> > > }
> > > else
> > > {
> > > @@ -428,6 +435,8 @@ static void getFactors(AllFactors* const allFactors,
> > unsigned int** const
> > >
> > > getFactors(allFactors, predecessors, references,
> > > predecessors[reference][traceNum], &previousVertexFactors);
> > > + factors->accuracyToRefNode =
> > > + pairFactors[reference][traceNum].accuracy+
> > pairFactors[traceNum][reference].accuracy +
> > previousVertexFactors.accuracyToRefNode;
> > >
> > > /* Convert the time from traceNum to reference;
> > > * pairFactors[row][col] converts the time from col to row, invert the
> > > @@ -452,7 +461,7 @@ static void getFactors(AllFactors* const allFactors,
> > unsigned int** const
> > > }
> > > else
> > > {
> > > - g_assert_not_reached();
> > > + //g_assert_not_reached();
> >
> > Why commenting this ? This assertion was probably there for a reason.
> > Shutting up assertions without proper comments is usually a bad
> > practice, as assertions are our safety nets.
>
> Ops, you are right, it should not be commented.
> >
> > > }
> > > }
> > > }
> > > diff --git a/lttv/lttv/sync/sync_chain_lttv.c
> > b/lttv/lttv/sync/sync_chain_lttv.c
> > > index 95bef44..20ed111 100644
> > > --- a/lttv/lttv/sync/sync_chain_lttv.c
> > > +++ b/lttv/lttv/sync/sync_chain_lttv.c
> > > @@ -218,6 +218,7 @@ bool syncTraceset(LttvTracesetContext* const
> > traceSetContext)
> > > double minOffset, minDrift;
> > > unsigned int refFreqTrace;
> > > int retval;
> > > + double sumAccuracy;
> > >
> > > if (!optionSync.present)
> > > {
> > > @@ -370,8 +371,6 @@ bool syncTraceset(LttvTracesetContext* const
> > traceSetContext)
> > > t->drift * t->start_tsc + t->offset));
> > > }
> > >
> > > - g_array_free(factors, TRUE);
> > > -
> > > lttv_traceset_context_compute_time_span(traceSetContext,
> > > &traceSetContext->time_span);
> > >
> > > @@ -395,21 +394,28 @@ bool syncTraceset(LttvTracesetContext* const
> > traceSetContext)
> > > if (!optionSyncNull.present && optionSyncStats.present)
> > > {
> > > printStats(syncState);
> > > -
> > > + sumAccuracy = 0;
> >
> > = 0.;
> >
> > > printf("Resulting synchronization factors:\n");
> > > for (i= 0; i < syncState->traceNb; i++)
> > > {
> > > LttTrace* t;
> > > + Factors* traceFactors;
> > >
> > > t= traceSetContext->traces[i]->t;
> > > -
> > > - printf("\ttrace %u drift= %g offset= %g (%f) start time= %ld.%09ld\n",
> > > + traceFactors= &g_array_index(factors, Factors, i);
> > > +
> > > + printf("\ttrace %u drift= %g offset= %g (%f) start time= %ld.%09ld
> > accuracyToRefNode= %g\n",
> > > i, t->drift, t->offset, (double) tsc_to_uint64(t->freq_scale,
> > > t->start_freq, t->offset) / NANOSECONDS_PER_SECOND,
> > > t->start_time_from_tsc.tv_sec,
> > > - t->start_time_from_tsc.tv_nsec);
> > > + t->start_time_from_tsc.tv_nsec,traceFactors->accuracyToRefNode);
> > > + sumAccuracy += traceFactors->accuracyToRefNode;
> > > +
> > > }
> > > }
> > > + printf("Total accuracy (0.0 is the best): %g\n",sumAccuracy);
> > > +
> > > + g_array_free(factors, TRUE);
> > >
> > > syncState->processingModule->destroyProcessing(syncState);
> > > if (syncState->matchingModule != NULL)
> > > diff --git a/lttv/lttv/sync/sync_chain_unittest.c
> > b/lttv/lttv/sync/sync_chain_unittest.c
> > > index 40302a0..d59eb09 100644
> > > --- a/lttv/lttv/sync/sync_chain_unittest.c
> > > +++ b/lttv/lttv/sync/sync_chain_unittest.c
> > > @@ -114,6 +114,7 @@ int main(const int argc, char* const argv[])
> > > GString* analysisModulesNames;
> > > GString* reductionModulesNames;
> > > unsigned int id;
> > > + double sumAccuracy;
> > > AllFactors* allFactors;
> > >
> > > /*
> > > @@ -266,15 +267,17 @@ int main(const int argc, char* const argv[])
> > > unsigned int i;
> > >
> > > printStats(syncState);
> > > -
> > > + sumAccuracy = 0;
> >
> > = 0.;
> >
> > Thanks,
> >
> > Mathieu
> >
> > > printf("Resulting synchronization factors:\n");
> > > for (i= 0; i < factors->len; i++)
> > > {
> > > Factors* traceFactors= &g_array_index(factors, Factors, i);
> > > - printf("\ttrace %u drift= %g offset= %g\n", i,
> > > - traceFactors->drift, traceFactors->offset);
> > > + printf("\ttrace %u drift= %g offset= %g accuracyToRefNode= %g\n", i,
> > > + traceFactors->drift, traceFactors->offset,
> > traceFactors->accuracyToRefNode);
> > > + sumAccuracy += traceFactors->accuracyToRefNode;
> > > }
> > > }
> > > + printf("Total accuracy (0.0 is the best): %g\n",sumAccuracy);
> > >
> > > // Destroy modules and clean up
> > > syncState->processingModule->destroyProcessing(syncState);
> > > --
> > > 1.6.0.4
> > >
> > >
> > > _______________________________________________
> > > ltt-dev mailing list
> > > ltt-dev at lists.casi.polymtl.ca
> > > http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
> > >
> >
> > --
> > Mathieu Desnoyers
> > Operating System Efficiency R&D Consultant
> > EfficiOS Inc.
> > http://www.efficios.com
> >
>
>
>
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
More information about the lttng-dev
mailing list