From: Keith Owens <kaos@sgi.com>

(akpm: it adds touch_hangcheck_timer() but doesn't call it from anywhere???)


There are at least two cases where the existing hangcheck
code falls down :-

(1) Some kernel events such as dumping or debugging will legitimately
    disable interrupts for long periods.  Restarting after these events
    must ignore the hangcheck.

(2) During hotplug cpu changes, the hangcheck timer can move from one
    cpu to another, see migrate_timers().  There is no guarantee that
    the clock on the new cpu is in sync with the old clock so hangcheck
    may trip with a spurious error.

Like the NMI watchdog, hangcheck-timer needs a facility to ignore the
timeout for special cases.  Add touch_hangcheck_timer().

Signed-off-by: Keith Owens <kaos@sgi.com>
Cc: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 drivers/char/hangcheck-timer.c |   13 +++++++++++++
 include/linux/hangcheck.h      |   17 +++++++++++++++++
 2 files changed, 30 insertions(+)

diff -puN drivers/char/hangcheck-timer.c~add-skip_hangcheck_timer drivers/char/hangcheck-timer.c
--- 25/drivers/char/hangcheck-timer.c~add-skip_hangcheck_timer	Mon May 23 15:57:01 2005
+++ 25-akpm/drivers/char/hangcheck-timer.c	Mon May 23 15:57:01 2005
@@ -143,6 +143,14 @@ static inline unsigned long long monoton
 }
 #endif  /* HAVE_MONOTONIC */
 
+/* Allow single shot ignore of the timer check */
+static int skip_hangcheck_timer;
+
+void touch_hangcheck_timer(void)
+{
+	skip_hangcheck_timer = 1;
+}
+EXPORT_SYMBOL(touch_hangcheck_timer);
 
 /* Last time scheduled */
 static unsigned long long hangcheck_tsc, hangcheck_tsc_margin;
@@ -164,6 +172,11 @@ static void hangcheck_fire(unsigned long
 	else
 		tsc_diff = (cur_tsc + (~0ULL - hangcheck_tsc)); /* or something */
 
+	if (skip_hangcheck_timer) {
+		skip_hangcheck_timer = 0;
+		tsc_diff = 0;
+	}
+
 	if (tsc_diff > hangcheck_tsc_margin) {
 		if (hangcheck_dump_tasks) {
 			printk(KERN_CRIT "Hangcheck: Task state:\n");
diff -puN /dev/null include/linux/hangcheck.h
--- /dev/null	Thu Apr 11 07:25:15 2002
+++ 25-akpm/include/linux/hangcheck.h	Mon May 23 15:57:01 2005
@@ -0,0 +1,17 @@
+#ifndef LINUX_HANGCHECK_H
+#define LINUX_HANGCHECK_H
+
+/**
+ * touch_hangcheck_timer - restart HANGCHECK timer timeout.
+ *
+ * If the kernel supports the HANGCHECK code, touch_hangcheck_timer() may be
+ * used to reset the timeout - for code which intentionally disables interrupts
+ * for a long time. This call is stateless.
+ */
+#ifdef CONFIG_HANGCHECK_TIMER
+extern void touch_hangcheck_timer(void);
+#else
+#define touch_hangcheck_timer() do { } while(0)
+#endif
+
+#endif
_