From: john stultz <johnstul@us.ibm.com>

This patch fixes a race in the timer_interrupt code caused by
detect_lost_tick().  Since we're doing lost-tick compensation outside
timer->mark_offset, time can pass between time-source reads which can cause
gettimeofday inconsistencies.  

Additionally detect_lost_tick() was broken for the PIT case, since the whole
point of detect_lost_tick() is to interpolate between two time sources to
find inconsistencies.  Additionally this could cause xtime_lock seq_lock
reader starvation which has been causing machine hangs for SMP boxes that use
the PIT as a time source.

This patch fixes the described race by removing detect_lost_tick() and
instead implementing the lost tick detection code inside mark_offset().

Some of the divs and mods being added here might concern folks, but by not
calling timer->get_offset() in detect_lost_tick() we eliminate much of the
same math.  I did some simple cycle counting and the new code comes out on
average equivalent or faster.  



 arch/i386/kernel/time.c                 |   36 --------------------------------
 arch/i386/kernel/timers/timer_cyclone.c |   18 ++++++++++++++++
 arch/i386/kernel/timers/timer_tsc.c     |   26 +++++++++++++++++++++++
 3 files changed, 44 insertions(+), 36 deletions(-)

diff -puN arch/i386/kernel/time.c~lost-tick-fix arch/i386/kernel/time.c
--- 25/arch/i386/kernel/time.c~lost-tick-fix	2003-04-17 19:33:23.000000000 -0700
+++ 25-akpm/arch/i386/kernel/time.c	2003-04-17 19:33:23.000000000 -0700
@@ -241,41 +241,6 @@ static inline void do_timer_interrupt(in
 }
 
 /*
- * Lost tick detection and compensation
- */
-static inline void detect_lost_tick(void)
-{
-	/* read time since last interrupt */
-	unsigned long delta = timer->get_offset();
-	static unsigned long dbg_print;
-	
-	/* check if delta is greater then two ticks */
-	if(delta >= 2*(1000000/HZ)){
-
-		/*
-		 * only print debug info first 5 times
-		 */
-		/*
-		 * AKPM: disable this for now; it's nice, but irritating.
-		 */
-		if (0 && dbg_print < 5) {
-			printk(KERN_WARNING "\nWarning! Detected %lu "
-				"micro-second gap between interrupts.\n",
-				delta);
-			printk(KERN_WARNING "  Compensating for %lu lost "
-				"ticks.\n",
-				delta/(1000000/HZ)-1);
-			dump_stack();
-			dbg_print++;
-		}
-		/* calculate number of missed ticks */
-		delta = delta/(1000000/HZ)-1;
-		jiffies += delta;
-	}
-		
-}
-
-/*
  * This is the same as the above, except we _also_ save the current
  * Time Stamp Counter value at the time of the timer interrupt, so that
  * we later on can estimate the time of day more exactly.
@@ -291,7 +256,6 @@ void timer_interrupt(int irq, void *dev_
 	 */
 	write_seqlock(&xtime_lock);
 
-	detect_lost_tick();
 	timer->mark_offset();
  
 	do_timer_interrupt(irq, NULL, regs);
diff -puN arch/i386/kernel/timers/timer_cyclone.c~lost-tick-fix arch/i386/kernel/timers/timer_cyclone.c
--- 25/arch/i386/kernel/timers/timer_cyclone.c~lost-tick-fix	2003-04-17 19:33:23.000000000 -0700
+++ 25-akpm/arch/i386/kernel/timers/timer_cyclone.c	2003-04-17 19:33:23.000000000 -0700
@@ -18,6 +18,7 @@
 #include <asm/fixmap.h>
 
 extern spinlock_t i8253_lock;
+extern unsigned long jiffies;
 extern unsigned long calibrate_tsc(void);
 
 /* Number of usecs that the last interrupt was delayed */
@@ -46,6 +47,8 @@ static rwlock_t monotonic_lock = RW_LOCK
 
 static void mark_offset_cyclone(void)
 {
+	unsigned long lost, delay;
+	unsigned long delta = last_cyclone_low;
 	int count;
 	unsigned long long this_offset, last_offset;
 
@@ -62,6 +65,15 @@ static void mark_offset_cyclone(void)
 	count |= inb(0x40) << 8;
 	spin_unlock(&i8253_lock);
 
+	/* lost tick compensation */
+	delta = last_cyclone_low - delta;	
+	delta /=(CYCLONE_TIMER_FREQ/1000000);
+	delta += delay_at_last_interrupt;
+	lost = delta/(1000000/HZ);
+	delay = delta%(1000000/HZ);
+	if(lost >= 2)
+		jiffies += lost-1;
+	
 	/* update the monotonic base value */
 	this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
 	monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
@@ -70,6 +82,12 @@ static void mark_offset_cyclone(void)
 	/* calculate delay_at_last_interrupt */
 	count = ((LATCH-1) - count) * TICK_SIZE;
 	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+	/* catch corner case where tick rollover 
+	 * occured between cyclone and pit reads
+	 */
+	if(abs(delay - delay_at_last_interrupt) > 900)
+		jiffies++;
 }
 
 static unsigned long get_offset_cyclone(void)
diff -puN arch/i386/kernel/timers/timer_tsc.c~lost-tick-fix arch/i386/kernel/timers/timer_tsc.c
--- 25/arch/i386/kernel/timers/timer_tsc.c~lost-tick-fix	2003-04-17 19:33:23.000000000 -0700
+++ 25-akpm/arch/i386/kernel/timers/timer_tsc.c	2003-04-17 19:33:23.000000000 -0700
@@ -21,6 +21,7 @@
 int tsc_disable __initdata = 0;
 
 extern spinlock_t i8253_lock;
+extern unsigned long jiffies;
 
 static int use_tsc;
 /* Number of usecs that the last interrupt was delayed */
@@ -117,6 +118,8 @@ static unsigned long long monotonic_cloc
 
 static void mark_offset_tsc(void)
 {
+	unsigned long lost,delay;
+	unsigned long delta = last_tsc_low;
 	int count;
 	int countmp;
 	static int count1 = 0;
@@ -161,6 +164,23 @@ static void mark_offset_tsc(void)
 		}
 	}
 
+	/* lost tick compensation */
+	delta = last_tsc_low - delta;
+	{
+		register unsigned long eax, edx;
+		eax = delta;
+		__asm__("mull %2"
+		:"=a" (eax), "=d" (edx)
+		:"rm" (fast_gettimeoffset_quotient),
+		 "0" (eax));
+		delta = edx;
+	}
+	delta += delay_at_last_interrupt;
+	lost = delta/(1000000/HZ);
+	delay = delta%(1000000/HZ);
+	if(lost >= 2)
+		jiffies += lost-1;
+
 	/* update the monotonic base value */
 	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 	monotonic_base += cycles_2_ns(this_offset - last_offset);
@@ -169,6 +189,12 @@ static void mark_offset_tsc(void)
 	/* calculate delay_at_last_interrupt */
 	count = ((LATCH-1) - count) * TICK_SIZE;
 	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+	/* catch corner case where tick rollover 
+	 * occured between tsc and pit reads
+	 */
+	if(abs(delay - delay_at_last_interrupt) > 900)
+		jiffies++;
 }
 
 static void delay_tsc(unsigned long loops)

_