Patch from Dave Hansen <haveblue@us.ibm.com>

The new lost-tick detection code can be relatively uninformative when
something using the common_interrupt->do_IRQ path keeps interrupts
disabled for a long time. This patch will record the last interrupt on
the current CPU, and spit it out along with the stack trace.  I ignore
the timer interrupt, because it is always running when this detection
occurs, and would overwrite the previous interrupt that took a long time.

You can now turn this on with a boot-time option with the same name as
x86-64: "report_lost_ticks".  I don't like the default of 5, so I let it
take a plain report_lost_ticks, or report_lost_ticks=100, if you want.

Warning! Detected 4094446 micro-second gap between interrupts.
  Compensating for 4093 lost ticks.
Call Trace:
 [<c010ab60>] handle_IRQ_event+0x28/0x50
 [<c010ad44>] do_IRQ+0xa0/0x10c
 [<c01097b3>] common_interrupt+0x43/0x58
  Last run common irq: 24: eth2




 i386/kernel/irq.c  |    9 +++++++++
 i386/kernel/time.c |   48 +++++++++++++++++++++++++++++++++++++-----------
 asm-i386/irq.h     |    3 +++
 3 files changed, 49 insertions(+), 11 deletions(-)

diff -puN arch/i386/kernel/irq.c~report-lost-ticks arch/i386/kernel/irq.c
--- 25-power4/arch/i386/kernel/irq.c~report-lost-ticks	2003-02-18 21:46:30.000000000 -0800
+++ 25-power4-akpm/arch/i386/kernel/irq.c	2003-02-18 21:46:30.000000000 -0800
@@ -32,6 +32,7 @@
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/percpu.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
@@ -68,6 +69,11 @@
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
 	{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
 
+/*
+ * Used to determine the culprits who disabled interrupts for a long time
+ */
+DEFINE_PER_CPU(int, last_do_IRQ_interrupt) = { -1 };
+
 static void register_irq_proc (unsigned int irq);
 
 /*
@@ -329,6 +335,9 @@ asmlinkage unsigned int do_IRQ(struct pt
 	struct irqaction * action;
 	unsigned int status;
 
+	if (irq)	/* don't count the timer */
+		__get_cpu_var(last_do_IRQ_interrupt) = irq;
+
 	irq_enter();
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
diff -puN arch/i386/kernel/time.c~report-lost-ticks arch/i386/kernel/time.c
--- 25-power4/arch/i386/kernel/time.c~report-lost-ticks	2003-02-18 21:46:30.000000000 -0800
+++ 25-power4-akpm/arch/i386/kernel/time.c	2003-02-18 21:46:30.000000000 -0800
@@ -267,6 +267,24 @@ static inline void do_timer_interrupt(in
 #endif
 }
 
+static int report_lost_ticks;
+int __init report_lost_ticks_setup(char *str)
+{
+	char* numstr;
+	
+	if (str[0] == '=') {
+		numstr = &str[1];
+		return get_option(&numstr,&report_lost_ticks) ? 1 : 0;
+	} else if (str[0] == '\0') {
+		report_lost_ticks = 5;
+		return 1;
+	}
+
+	return 0;
+}
+
+__setup("report_lost_ticks", report_lost_ticks_setup);
+
 /*
  * Lost tick detection and compensation
  */
@@ -274,18 +292,15 @@ static inline void detect_lost_tick(void
 {
 	/* read time since last interrupt */
 	unsigned long delta = timer->get_offset();
-	static unsigned long dbg_print;
 	
 	/* check if delta is greater then two ticks */
-	if(delta >= 2*(1000000/HZ)){
+	if (unlikely((delta >= 2*(1000000/HZ)))) {
+
+		if (report_lost_ticks > 0) {
+			struct irqaction *action;
+			int last_irq;
 
-		/*
-		 * only print debug info first 5 times
-		 */
-		/*
-		 * AKPM: disable this for now; it's nice, but irritating.
-		 */
-		if (0 && dbg_print < 5) {
+			last_irq = __get_cpu_var(last_do_IRQ_interrupt);
 			printk(KERN_WARNING "\nWarning! Detected %lu "
 				"micro-second gap between interrupts.\n",
 				delta);
@@ -293,13 +308,24 @@ static inline void detect_lost_tick(void
 				"ticks.\n",
 				delta/(1000000/HZ)-1);
 			dump_stack();
-			dbg_print++;
+
+			if (last_irq >= 0 &&
+					(action = irq_desc[last_irq].action)) {
+				printk(KERN_WARNING "  Last run common "
+							"irq: %d: %s", 
+					last_irq, action->name);
+				for (action = action->next; action;
+						action = action->next)
+					printk(", %s", action->name);
+				printk("\n");	
+			}
+
+			report_lost_ticks--;
 		}
 		/* calculate number of missed ticks */
 		delta = delta/(1000000/HZ)-1;
 		jiffies += delta;
 	}
-		
 }
 
 /*
diff -puN include/asm-i386/irq.h~report-lost-ticks include/asm-i386/irq.h
--- 25-power4/include/asm-i386/irq.h~report-lost-ticks	2003-02-18 21:46:30.000000000 -0800
+++ 25-power4-akpm/include/asm-i386/irq.h	2003-02-18 21:46:30.000000000 -0800
@@ -12,6 +12,8 @@
 
 #include <linux/config.h>
 #include <linux/sched.h>
+#include <linux/percpu.h>
+
 /* include comes from machine specific directory */
 #include "irq_vectors.h"
 
@@ -24,6 +26,7 @@ extern void disable_irq(unsigned int);
 extern void disable_irq_nosync(unsigned int);
 extern void enable_irq(unsigned int);
 extern void release_x86_irqs(struct task_struct *);
+DECLARE_PER_CPU(int, last_do_IRQ_interrupt);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */

_