[ltt-dev] Trace "destroy" hangs, lttd keeps running

Mathieu Desnoyers mathieu.desnoyers at polymtl.ca
Fri Nov 20 09:38:42 EST 2009


* Ashwin Tanugula (ashwin.tanugula at broadcom.com) wrote:
>  
> [...]
> 
> > > I continued my testing using "return 1;" to reproduce the errors with 
> > > the viewer, so that I can send you my trace data.
> > >
> 
> > OK. Then let's revert to the previous lttng assumption: synchronized TSCs across MIPS cores. Does that work for you ? (I see that you configured your kernel with support for 2 CPUs. Do you have an SMP board ?)
> 
> > MIPS assume synchronized TSC
> 
> > Code for non-synchronized TSC support was broken (at least for MIPS32). I suspect the spinlock did not do well with NMIs or something like this. Revert to assuming synchronized TSCs across cores until someone send me a MIPS board to play with.
> 
> 
> Mathieu,
> 
> I applied this patch on top of patch-2.6.31.6-lttng-0.177.
> 
> I was able to generate the trace data.
> 

Great ! :)

> However, I was only able to view two out of 10 traces generated.
> 

These errors seem to be caused by the fact that you run a SMP mips
system with frequency scaling (either dynamic freq. scaling., or halting
the clock in idle).

You might wait to try to disable the freq. scaling and see if it works
better.

Thanks,

Mathieu

> 8 of them gave similar error :
> 
> 
> 1) ** ERROR **: Process 399 has been created at [0.000000000] and inserted at [24.928527633] before 
> fork on cpu 1[38.437246443].
> Probably an unsynchronized TSC problem on the traced machine.
> aborting...
> /usr/local/bin/lttv-gui: line 10:  4346 Aborted                 (core dumped) $LTTV_CMD.real -m lttvwindow -m guievents -m guifilter -m guicontrolflow -m resourceview -m guistatistics -m guitracecontrol $*
> 
> 2) ** ERROR **: Process 570 has been created at [0.000000000] and inserted at [415.432990305] before 
> fork on cpu 0[436.945971082].
> Probably an unsynchronized TSC problem on the traced machine.
> aborting...
> /usr/local/bin/lttv-gui: line 10:  4353 Aborted                 (core dumped) $LTTV_CMD.real -m lttvwindow -m guievents -m guifilter -m guicontrolflow -m resourceview -m guistatistics -m guitracecontrol $*
> 
> 
> I ran lttv in debug mode. Please look at the attached log 
> 
> I have uploaded my trace data so that you can download and view it.
> 
> Here are the bad ones.
> 
> 
> http://dl.dropbox.com/u/986090/trace1.tar.bz2
> 
> http://dl.dropbox.com/u/986090/trace3.tar.bz2
> 
> http://dl.dropbox.com/u/986090/trace4.tar.bz2
> 
> http://dl.dropbox.com/u/986090/trace5.tar.bz2
> 
> 
> Here are the good ones if you want to view them.
> 
> http://dl.dropbox.com/u/986090/trace2.tar.bz2
> 
> http://dl.dropbox.com/u/986090/trace6.tar.bz2
> 
> 
> 
> Thanks,
> Ashwin
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>
> ---
>  arch/mips/Kconfig                   |    1 
>  arch/mips/include/asm/trace-clock.h |   29 ++----
>  arch/mips/kernel/Makefile           |    2 
>  arch/mips/kernel/trace-clock.c      |  171 ------------------------------------
>  4 files changed, 12 insertions(+), 191 deletions(-)
> 
> Index: linux-2.6-lttng/arch/mips/Kconfig ===================================================================
> --- linux-2.6-lttng.orig/arch/mips/Kconfig	2009-11-19 11:36:16.000000000 -0500
> +++ linux-2.6-lttng/arch/mips/Kconfig	2009-11-19 11:36:23.000000000 -0500
> @@ -1739,7 +1739,6 @@ config HAVE_GET_CYCLES_32
>  	depends on !CPU_R4400_WORKAROUNDS
>  	select HAVE_TRACE_CLOCK
>  	select HAVE_TRACE_CLOCK_32_TO_64
> -	select HAVE_UNSYNCHRONIZED_TSC
>  
>  #
>  # Use the generic interrupt handling code in kernel/irq/:
> Index: linux-2.6-lttng/arch/mips/include/asm/trace-clock.h
> ===================================================================
> --- linux-2.6-lttng.orig/arch/mips/include/asm/trace-clock.h	2009-11-19 11:36:40.000000000 -0500
> +++ linux-2.6-lttng/arch/mips/include/asm/trace-clock.h	2009-11-19 11:44:59.000000000 -0500
> @@ -32,28 +32,15 @@ extern u64 trace_clock_read_synthetic_ts
>   * tracing needs to detect delays longer than 8 seconds, we need a full 64-bits
>   * TSC, whic is provided by trace-clock-32-to-64.
>  */
> -extern u64 trace_clock_async_tsc_read(void);
>  
>  static inline u32 trace_clock_read32(void)  {
> -	u32 cycles;
> -
> -	if (likely(tsc_is_sync()))
> -		cycles = (u32)get_cycles(); /* only need the 32 LSB */
> -	else
> -		cycles = (u32)trace_clock_async_tsc_read();
> -	return cycles;
> +	return (u32)get_cycles(); /* only need the 32 LSB */
>  }
>  
>  static inline u64 trace_clock_read64(void)  {
> -	u64 cycles;
> -
> -	if (likely(tsc_is_sync()))
> -		cycles = trace_clock_read_synthetic_tsc();
> -	else
> -		cycles = trace_clock_async_tsc_read();
> -	return cycles;
> +	return trace_clock_read_synthetic_tsc();
>  }
>  
>  static inline u64 trace_clock_frequency(void) @@ -66,11 +53,19 @@ static inline u32 trace_clock_freq_scale
>  	return 1;
>  }
>  
> -extern void get_trace_clock(void);
> -extern void put_trace_clock(void);
>  extern void get_synthetic_tsc(void);
>  extern void put_synthetic_tsc(void);
>  
> +static inline void get_trace_clock(void) {
> +	get_synthetic_tsc();
> +}
> +
> +static inline void put_trace_clock(void) {
> +	put_synthetic_tsc();
> +}
> +
>  static inline void set_trace_clock_is_sync(int state)  {  }
> Index: linux-2.6-lttng/arch/mips/kernel/trace-clock.c
> ===================================================================
> --- linux-2.6-lttng.orig/arch/mips/kernel/trace-clock.c	2009-11-19 11:41:23.000000000 -0500
> +++ /dev/null	1970-01-01 00:00:00.000000000 +0000
> @@ -1,171 +0,0 @@
> -/*
> - * arch/mips/kernel/trace-clock.c
> - *
> - * Trace clock for mips.
> - *
> - * Mathieu Desnoyers <mathieu.desnoyers at polymtl.ca>, October 2008
> - */
> -
> -#include <linux/module.h>
> -#include <linux/trace-clock.h>
> -#include <linux/jiffies.h>
> -#include <linux/timer.h>
> -#include <linux/spinlock.h>
> -
> -static u64 trace_clock_last_tsc;
> -static DEFINE_PER_CPU(struct timer_list, update_timer); -static DEFINE_SPINLOCK(async_tsc_lock);
> -static int async_tsc_refcount;	/* Number of readers */
> -static int async_tsc_enabled;	/* Async TSC enabled on all online CPUs */
> -
> -#if (BITS_PER_LONG == 64)
> -static inline u64 trace_clock_cmpxchg64(u64 *ptr, u64 old, u64 new) -{
> -	return cmpxchg64(ptr, old, new);
> -}
> -#else
> -/*
> - * Emulate an atomic 64-bits update with a spinlock.
> - * Note : preempt_disable or irq save must be explicit with raw_spinlock_t.
> - * Given we use a spinlock for this time base, we should never be called from
> - * NMI context.
> - */
> -static raw_spinlock_t trace_clock_lock =
> -	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
> -
> -/*
> - * Must be called under irqoff+spinlock on MIPS32.
> - */
> -static inline u64 trace_clock_cmpxchg64(u64 *ptr, u64 old, u64 new) -{
> -	u64 val;
> -
> -	val = *ptr;
> -	if (likely(val == old))
> -		*ptr = new;
> -	return val;
> -}
> -#endif
> -
> -/*
> - * Must be called under irqoff+spinlock on MIPS32.
> - */
> -static cycles_t read_last_tsc(void)
> -{
> -	return trace_clock_last_tsc;
> -}
> -
> -/*
> - * Support for architectures with non-sync TSCs.
> - * When the local TSC is discovered to lag behind the highest TSC counter, we
> - * increment the TSC count of an amount that should be, ideally, lower than the
> - * execution time of this routine, in cycles : this is the granularity we look
> - * for : we must be able to order the events.
> - *
> - * MIPS32 does not have atomic 64-bit updates. Emulate it with irqoff+spinlock.
> - */
> -
> -notrace u64 trace_clock_async_tsc_read(void) -{
> -	u64 new_tsc, last_tsc;
> -#if (BITS_PER_LONG == 32)
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
> -	__raw_spin_lock(&trace_clock_lock);
> -#endif
> -
> -	WARN_ON(!async_tsc_refcount || !async_tsc_enabled);
> -	new_tsc = trace_clock_read_synthetic_tsc();
> -	barrier();
> -	last_tsc = read_last_tsc();
> -	do {
> -		if (new_tsc < last_tsc)
> -			new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION;
> -		/*
> -		 * If cmpxchg fails with a value higher than the new_tsc, don't
> -		 * retry : the value has been incremented and the events
> -		 * happened almost at the same time.
> -		 * We must retry if cmpxchg fails with a lower value :
> -		 * it means that we are the CPU with highest frequency and
> -		 * therefore MUST update the value.
> -		 */
> -		last_tsc = trace_clock_cmpxchg64(&trace_clock_last_tsc,
> -						 last_tsc, new_tsc);
> -	} while (unlikely(last_tsc < new_tsc));
> -#if (BITS_PER_LONG == 32)
> -	__raw_spin_unlock(&trace_clock_lock);
> -	local_irq_restore(flags);
> -#endif
> -	return new_tsc;
> -}
> -EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read);
> -
> -static void update_timer_ipi(void *info) -{
> -	(void)trace_clock_async_tsc_read();
> -}
> -
> -/*
> - * update_timer_fct : - Timer function to resync the clocks
> - * @data: unused
> - *
> - * Fires every jiffy.
> - */
> -static void update_timer_fct(unsigned long data) -{
> -	(void)trace_clock_async_tsc_read();
> -
> -	per_cpu(update_timer, smp_processor_id()).expires = jiffies + 1;
> -	add_timer_on(&per_cpu(update_timer, smp_processor_id()),
> -		     smp_processor_id());
> -}
> -
> -static void enable_trace_clock(int cpu) -{
> -	init_timer(&per_cpu(update_timer, cpu));
> -	per_cpu(update_timer, cpu).function = update_timer_fct;
> -	per_cpu(update_timer, cpu).expires = jiffies + 1;
> -	smp_call_function_single(cpu, update_timer_ipi, NULL, 1);
> -	add_timer_on(&per_cpu(update_timer, cpu), cpu);
> -}
> -
> -static void disable_trace_clock(int cpu) -{
> -	del_timer_sync(&per_cpu(update_timer, cpu));
> -}
> -
> -void get_trace_clock(void)
> -{
> -	int cpu;
> -
> -	spin_lock(&async_tsc_lock);
> -	if (async_tsc_refcount++ || tsc_is_sync()) {
> -		get_synthetic_tsc();
> -	} else {
> -		async_tsc_enabled = 1;
> -		get_synthetic_tsc();
> -		for_each_online_cpu(cpu)
> -			enable_trace_clock(cpu);
> -	}
> -	spin_unlock(&async_tsc_lock);
> -}
> -EXPORT_SYMBOL_GPL(get_trace_clock);
> -
> -void put_trace_clock(void)
> -{
> -	int cpu;
> -
> -	spin_lock(&async_tsc_lock);
> -	WARN_ON(async_tsc_refcount <= 0);
> -	if (async_tsc_refcount != 1 || !async_tsc_enabled) {
> -		put_synthetic_tsc();
> -	} else {
> -		for_each_online_cpu(cpu)
> -			disable_trace_clock(cpu);
> -		async_tsc_enabled = 0;
> -	}
> -	async_tsc_refcount--;
> -	spin_unlock(&async_tsc_lock);
> -}
> -EXPORT_SYMBOL_GPL(put_trace_clock);
> Index: linux-2.6-lttng/arch/mips/kernel/Makefile
> ===================================================================
> --- linux-2.6-lttng.orig/arch/mips/kernel/Makefile	2009-11-19 11:47:20.000000000 -0500
> +++ linux-2.6-lttng/arch/mips/kernel/Makefile	2009-11-19 11:47:27.000000000 -0500
> @@ -88,8 +88,6 @@ obj-$(CONFIG_GPIO_TXX9)		+= gpio_txx9.o
>  obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
>  obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
>  
> -obj-$(CONFIG_HAVE_TRACE_CLOCK)	+= trace-clock.o
> -
>  CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
>  
>  obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT)	+= 8250-platform.o
> 
> > 
> > Thanks,
> > Ashwin
> > 
> > 
> > 
> > 
> > 

Content-Description: lttv_gdb_log.txt
> (gdb) set args -v -d -m textDump -t /tmp/trace
> (gdb) run
> 
> [...]
> 
> 
> kernel.syscall_exit: 38.437020829 (/home/atanugul/new_traces/trace/trace1/kernel_1), 392, 0, , , 0, 0x0, MODE_UNKNOWN { ret = 0 }
> kernel.irq_entry: 38.437059405 (/home/atanugul/new_traces/trace/trace1/kernel_0), 0, 0, swapper, , 0, 0x0, IRQ { ip = 2147489408, handler = 0x8001a398, irq_id = 104, kernel_mode = 1 }
> kernel.sched_migrate_task: 38.437060072 (/home/atanugul/new_traces/trace/trace1/kernel_1), 392, 0, , , 0, 0x0, MODE_UNKNOWN { pid = 392, state = 0, dest_cpu = 0 }
> kernel.softirq_raise: 38.437066823 (/home/atanugul/new_traces/trace/trace1/kernel_0), 0, 0, swapper, , 0, 0x0, IRQ { softirq_id = 1 [run_timer_softirq+0x0/0x2a4] }
> kernel.irq_exit: 38.437075484 (/home/atanugul/new_traces/trace/trace1/kernel_0), 0, 0, swapper, , 0, 0x0, SYSCALL { handled = 1 }
> kernel.softirq_entry: 38.437078870 (/home/atanugul/new_traces/trace/trace1/kernel_0), 0, 0, swapper, , 0, 0x0, SOFTIRQ { softirq_id = 1 [run_timer_softirq+0x0/0x2a4] }
> kernel.timer_set: 38.437096690 (/home/atanugul/new_traces/trace/trace1/kernel_0), 0, 0, swapper, , 0, 0x0, SOFTIRQ { expires = 4294796539, function = 0x80230950, data = 0 }
> kernel.softirq_exit: 38.437099758 (/home/atanugul/new_traces/trace/trace1/kernel_0), 0, 0, swapper, , 0, 0x0, SYSCALL { softirq_id = 1 [run_timer_softirq+0x0/0x2a4] }
> 
> ** ERROR **: Process 399 has been created at [0.000000000] and inserted at [24.928527633] before 
> fork on cpu 1[38.437246443].
> Probably an unsynchronized TSC problem on the traced machine.
> aborting...
> 
> Program received signal SIGABRT, Aborted.
> 0x00508422 in __kernel_vsyscall ()
> 
> 
> 
> 
> (gdb) bt full
> 
> #0  0x00508422 in __kernel_vsyscall ()
> No symbol table info available.
> #1  0x010284d1 in raise () from /lib/tls/i686/cmov/libc.so.6
> No symbol table info available.
> #2  0x0102b932 in abort () from /lib/tls/i686/cmov/libc.so.6
> No symbol table info available.
> #3  0x00678036 in g_logv () from /lib/libglib-2.0.so.0
> No symbol table info available.
> #4  0x00678066 in g_log () from /lib/libglib-2.0.so.0
> No symbol table info available.
> #5  0x0805f732 in process_fork (hook_data=0x80cce58, call_data=0x80bf200)
>     at state.c:3092
>         s = 0x80bf200
>         e = 0x8098598
>         th = 0x80cce58
>         parent_pid = 392
>         child_pid = 399
>         child_tgid = 399
>         cpu = 1
>         ts = 0x80be410
>         process = 0x80e9198
>         child_process = 0x80b93b0
>         f = 0x80bc6a8
>         __PRETTY_FUNCTION__ = "process_fork"
> #6  0x08055bf7 in lttv_hooks_call_merge (h1=0x80c2890, call_data1=0x80bf200, 
>     h2=0x80cd418, call_data2=0x80bf200) at hook.c:320
>         ret = 135000512
>         sum_ret = 0
>         c1 = 0x80cc248
>         c2 = 0x80bba20
>         i = 0
>         j = 0
> #7  0x080661db in lttv_process_traceset_middle (self=0x80bdc18, end=..., 
>     nb_events=4294967295, end_position=0x0) at tracecontext.c:772
>         pqueue = 0x80c6800
>         tfc = 0x80bf200
>         e = 0x8098598
>         count = 187099
>         read_ret = 0
>         last_ret = 0
>         __PRETTY_FUNCTION__ = "lttv_process_traceset_middle"
> #8  0x0030b372 in process_traceset (hook_data=0x0, call_data=0x0) at batchAnalysis.c:133
>         value_expression = {v_int = 0x80cd958, v_uint = 0x80cd958, v_long = 0x80cd958, 
>           v_ulong = 0x80cd958, v_float = 0x80cd958, v_double = 0x80cd958, 
>           v_time = 0x80cd958, v_pointer = 0x80cd958, v_string = 0x80cd958, 
>           v_gobject = 0x80cd958}
>         value_filter = {v_int = 0x80cd9f8, v_uint = 0x80cd9f8, v_long = 0x80cd9f8, 
>           v_ulong = 0x80cd9f8, v_float = 0x80cd9f8, v_double = 0x80cd9f8, 
>           v_time = 0x80cd9f8, v_pointer = 0x80cd9f8, v_string = 0x80cd9f8, 
>      v_gobject = 0x80cd9f8}
>         attributes = 0x8076f00
>         tscs = 0xbffff3fc
>         tss = 0x80bdc18
>         tc = 0x80bdc18
>         start = {tv_sec = 0, tv_nsec = 0}
>         end = {tv_sec = 4294967295, tv_nsec = 4294967295}
>         retval = 1
>         __PRETTY_FUNCTION__ = "process_traceset"
> #9  0x08055ab8 in lttv_hooks_call (h=0x8076f60, call_data=0x0) at hook.c:272
>         ret = 0
>         sum_ret = 0
>         c = 0x807b370
>         i = 0
> #10 0x080537d2 in main (argc=7, argv=0xbffff524) at main.c:224
>         i = 4
>         profile_memory_short_option = 0x806ca28 "-M"
>         profile_memory_long_option = 0x806ca2b "--memory"
>         profile_memory = 0
>         retval = 1
>         value = {v_int = 0x8077644, v_uint = 0x8077644, v_long = 0x8077644, 
>         v_ulong = 0x8077644, v_float = 0x8077644, v_double = 0x8077644, 
>         v_time = 0x8077644, v_pointer = 0x8077644, v_string = 0x8077644, 
>         v_gobject = 0x8077644}
>         __PRETTY_FUNCTION__ = "main"
>         error = 0x0
>         module_module = 0x80770c8
>         module_option = 0x8075338


-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68




More information about the lttng-dev mailing list