patch-2.1.80 linux/arch/i386/kernel/irq.c

Next file: linux/arch/i386/kernel/irq.h
Previous file: linux/arch/i386/kernel/io_apic.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.79/linux/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c
@@ -27,6 +27,7 @@
 #include <linux/malloc.h>
 #include <linux/random.h>
 #include <linux/smp.h>
+#include <linux/tasks.h>
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 
@@ -36,11 +37,35 @@
 #include <asm/bitops.h>
 #include <asm/smp.h>
 #include <asm/pgtable.h>
+#include <asm/delay.h>
 
 #include "irq.h"
 
-#ifdef __SMP_PROF__
-extern volatile unsigned long smp_local_timer_ticks[1+NR_CPUS];
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we haveto limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check wether your system freezes. I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 1
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
 #endif
 
 unsigned int local_irq_count[NR_CPUS];
@@ -50,52 +75,115 @@
 int __intel_bh_counter;
 #endif
 
-#ifdef __SMP_PROF__
-static unsigned int int_count[NR_CPUS][NR_IRQS] = {{0},};
-#endif
-
 atomic_t nmi_counter;
 
 /*
- * This contains the irq mask for both irq controllers
+ * About the IO-APIC, the architecture is 'merged' into our
+ * current irq architecture, seemlessly. (i hope). It is only
+ * visible through 8 more hardware interrupt lines, but otherwise
+ * drivers are unaffected. The main code is believed to be
+ * NR_IRQS-safe (nothing anymore thinks we have 16
+ * irq lines only), but there might be some places left ...
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ * and on SMP the extended IO-APIC IRQs 16-23. The IO-APIC
+ * uses this mask too, in probe_irq*().
+ *
+ * (0x0000ffff for NR_IRQS==16, 0x00ffffff for NR_IRQS=24)
  */
-static unsigned int cached_irq_mask = 0xffff;
+static unsigned int cached_irq_mask = (1<<NR_IRQS)-1;
 
-#define cached_21	(((char *)(&cached_irq_mask))[0])
-#define cached_A1	(((char *)(&cached_irq_mask))[1])
+#define cached_21	((cached_irq_mask | io_apic_irqs) & 0xff)
+#define cached_A1	(((cached_irq_mask | io_apic_irqs) >> 8) & 0xff)
 
 spinlock_t irq_controller_lock;
 
+static int irq_events [NR_IRQS] = { -1, };
+static int disabled_irq [NR_IRQS] = { 0, };
+#ifdef __SMP__
+static int irq_owner [NR_IRQS] = { NO_PROC_ID, };
+#endif
+
 /*
- * This is always called from an interrupt context
- * with local interrupts disabled. Don't worry about
- * irq-safe locks.
- *
- * Note that we always ack the primary irq controller,
- * even if the interrupt came from the secondary, as
- * the primary will still have routed it. Oh, the joys
- * of PC hardware.
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt and sometimes the keyboard interrupt is
+ * not connected to any IO-APIC pin, it's fed to the CPU ExtInt IRQ line
+ * directly.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs us one more branch in do_IRQ,
+ * but we have _much_ higher compatibility and robustness this way.
  */
-static inline void mask_and_ack_irq(int irq_nr)
+
+#ifndef __SMP__
+  static const unsigned int io_apic_irqs = 0;
+#else
+	/*
+	 * the timer interrupt is not connected to the IO-APIC on all boards
+	 * (mine is such ;), and since it is not performance critical anyway,
+	 * we route it through the INTA pin and win lots of design simplicity.
+	 * Ditto the obsolete EISA dma chaining irq. All other interrupts are
+	 * routed through the IO-APIC, distributed amongst all CPUs, dependent
+	 * on irq traffic and CPU load.
+	 */
+  const unsigned int io_apic_irqs = ~((1<<0)|(1<<2)|(1<<13));
+#endif
+
+static inline int ack_irq(int irq)
 {
+	/*
+	 * The IO-APIC part will be moved to assembly, nested
+	 * interrupts will be ~5 instructions from entry to iret ...
+	 */
+	int should_handle_irq = 0;
+	int cpu = smp_processor_id();
+
+	/*
+	 * We always call this with local irqs disabled
+	 */
 	spin_lock(&irq_controller_lock);
-	cached_irq_mask |= 1 << irq_nr;
-	if (irq_nr & 8) {
-		inb(0xA1);	/* DUMMY */
-		outb(cached_A1,0xA1);
-		outb(0x62,0x20);	/* Specific EOI to cascade */
-		outb(0x20,0xA0);
-	} else {
-		inb(0x21);	/* DUMMY */
-		outb(cached_21,0x21);
-		outb(0x20,0x20);
+
+	if (!irq_events[irq]++ && !disabled_irq[irq]) {
+		should_handle_irq = 1;
+#ifdef __SMP__
+		irq_owner[irq] = cpu;
+#endif
+		hardirq_enter(cpu);
+	}
+
+	if (IO_APIC_IRQ(irq))
+		ack_APIC_irq ();
+	else {
+	/*
+	 * 8259-triggered INTA-cycle interrupt
+	 */
+		if (should_handle_irq)
+			mask_irq(irq);
+			
+		if (irq & 8) {
+			inb(0xA1);	/* DUMMY */
+			outb(0x62,0x20);	/* Specific EOI to cascade */
+			outb(0x20,0xA0);
+		} else {
+			inb(0x21);	/* DUMMY */
+			outb(0x20,0x20);
+		}
 	}
+
 	spin_unlock(&irq_controller_lock);
+
+	return (should_handle_irq);
 }
 
-static inline void set_irq_mask(int irq_nr)
+void set_8259A_irq_mask(int irq)
 {
-	if (irq_nr & 8) {
+	if (irq >= 16) {
+		printk ("HUH #3 (%d)?\n", irq);
+		return;
+	}
+	if (irq & 8) {
 		outb(cached_A1,0xA1);
 	} else {
 		outb(cached_21,0x21);
@@ -106,80 +194,105 @@
  * These have to be protected by the spinlock
  * before being called.
  */
-static inline void mask_irq(unsigned int irq_nr)
+void mask_irq(unsigned int irq)
 {
-	cached_irq_mask |= 1 << irq_nr;
-	set_irq_mask(irq_nr);
+	if (IO_APIC_IRQ(irq))
+		disable_IO_APIC_irq(irq);
+	else {
+		cached_irq_mask |= 1 << irq;
+		set_8259A_irq_mask(irq);
+	}
 }
 
-static inline void unmask_irq(unsigned int irq_nr)
+void unmask_irq(unsigned int irq)
 {
-	cached_irq_mask &= ~(1 << irq_nr);
-	set_irq_mask(irq_nr);
+	if (IO_APIC_IRQ(irq))
+		enable_IO_APIC_irq(irq);
+	else {
+		cached_irq_mask &= ~(1 << irq);
+		set_8259A_irq_mask(irq);
+	}
 }
 
-void disable_irq(unsigned int irq_nr)
-{
-	unsigned long flags;
+/*
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
 
-	spin_lock_irqsave(&irq_controller_lock, flags);
-	mask_irq(irq_nr);
-	spin_unlock_irqrestore(&irq_controller_lock, flags);
-	synchronize_irq();
-}
 
-void enable_irq(unsigned int irq_nr)
-{
-	unsigned long flags;
+BUILD_COMMON_IRQ()
+/*
+ * ISA PIC or IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ */
+BUILD_IRQ(0) BUILD_IRQ(1) BUILD_IRQ(2) BUILD_IRQ(3)
+BUILD_IRQ(4) BUILD_IRQ(5) BUILD_IRQ(6) BUILD_IRQ(7)
+BUILD_IRQ(8) BUILD_IRQ(9) BUILD_IRQ(10) BUILD_IRQ(11)
+BUILD_IRQ(12) BUILD_IRQ(13) BUILD_IRQ(14) BUILD_IRQ(15)
 
-	spin_lock_irqsave(&irq_controller_lock, flags);
-	unmask_irq(irq_nr);
-	spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
+#ifdef __SMP__
 
 /*
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ * The IO-APIC (persent only in SMP boards) has 8 more hardware
+ * interrupt pins, for all of them we define an IRQ vector:
  *
- * These macros create the low-level assembly IRQ routines that do all
- * the operations that are needed to keep the AT interrupt-controller
- * happy. They are also written to be fast - and to disable interrupts
- * as little as humanly possible.
+ * raw PCI interrupts 0-3, basically these are the ones used
+ * heavily:
  */
+BUILD_IRQ(16) BUILD_IRQ(17) BUILD_IRQ(18) BUILD_IRQ(19)
 
-#if NR_IRQS != 16
-#error make irq stub building NR_IRQS dependent and remove me.
-#endif
+/*
+ * [FIXME: anyone with 2 separate PCI buses and 2 IO-APICs,
+ *         please speak up and request experimental patches.
+ *         --mingo ]
+ */
 
-BUILD_COMMON_IRQ()
-BUILD_IRQ(FIRST,0,0x01)
-BUILD_IRQ(FIRST,1,0x02)
-BUILD_IRQ(FIRST,2,0x04)
-BUILD_IRQ(FIRST,3,0x08)
-BUILD_IRQ(FIRST,4,0x10)
-BUILD_IRQ(FIRST,5,0x20)
-BUILD_IRQ(FIRST,6,0x40)
-BUILD_IRQ(FIRST,7,0x80)
-BUILD_IRQ(SECOND,8,0x01)
-BUILD_IRQ(SECOND,9,0x02)
-BUILD_IRQ(SECOND,10,0x04)
-BUILD_IRQ(SECOND,11,0x08)
-BUILD_IRQ(SECOND,12,0x10)
-BUILD_IRQ(SECOND,13,0x20)
-BUILD_IRQ(SECOND,14,0x40)
-BUILD_IRQ(SECOND,15,0x80)
+/*
+ * MIRQ (motherboard IRQ) interrupts 0-1:
+ */
+BUILD_IRQ(20) BUILD_IRQ(21)
 
-#ifdef __SMP__
+/*
+ * 'nondefined general purpose interrupt'.
+ */
+BUILD_IRQ(22)
+/*
+ * optionally rerouted SMI interrupt:
+ */
+BUILD_IRQ(23)
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs), via smp_message_pass():
+ */
 BUILD_SMP_INTERRUPT(reschedule_interrupt)
 BUILD_SMP_INTERRUPT(invalidate_interrupt)
 BUILD_SMP_INTERRUPT(stop_cpu_interrupt)
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
 BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt)
+
 #endif
 
-static void (*interrupt[17])(void) = {
+static void (*interrupt[NR_IRQS])(void) = {
 	IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
 	IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
 	IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
-	IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt	
+	IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
+#ifdef __SMP__
+	,IRQ16_interrupt, IRQ17_interrupt, IRQ18_interrupt, IRQ19_interrupt,
+	IRQ20_interrupt, IRQ21_interrupt, IRQ22_interrupt, IRQ23_interrupt
+#endif
 };
 
 /*
@@ -215,135 +328,58 @@
  */
 static struct irqaction irq2  = { no_action, 0, 0, "cascade", NULL, NULL};
 
-static struct irqaction *irq_action[16] = {
+static struct irqaction *irq_action[NR_IRQS] = {
 	NULL, NULL, NULL, NULL,
 	NULL, NULL, NULL, NULL,
 	NULL, NULL, NULL, NULL,
 	NULL, NULL, NULL, NULL
+#ifdef __SMP__
+	,NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL
+#endif
 };
 
 int get_irq_list(char *buf)
 {
-	int i;
+	int i, j;
 	struct irqaction * action;
 	char *p = buf;
 
+	p += sprintf(p, "           ");
+	for (j=0; j<smp_num_cpus; j++)
+		p += sprintf(p, "CPU%d       ",j);
+	*p++ = '\n';
+
 	for (i = 0 ; i < NR_IRQS ; i++) {
 		action = irq_action[i];
 		if (!action) 
 			continue;
-		p += sprintf(p, "%3d: %10u   %s",
-			i, kstat.interrupts[i], action->name);
+		p += sprintf(p, "%3d: ",i);
+#ifndef __SMP__
+		p += sprintf(p, "%10u ", kstat.interrupts[0][i]);
+#else
+		for (j=0; j<smp_num_cpus; j++)
+			p += sprintf(p, "%10u ",
+				kstat.interrupts[cpu_logical_map[j]][i]);
+#endif
+		if (IO_APIC_IRQ(i))
+			p += sprintf(p, " IO-APIC ");
+		else
+			p += sprintf(p, "  XT PIC ");
+		p += sprintf(p, "  %s", action->name);
+
 		for (action=action->next; action; action = action->next) {
 			p += sprintf(p, ", %s", action->name);
 		}
 		*p++ = '\n';
 	}
 	p += sprintf(p, "NMI: %10u\n", atomic_read(&nmi_counter));
-#ifdef __SMP_PROF__
+#ifdef __SMP__
 	p += sprintf(p, "IPI: %10lu\n", ipi_count);
 #endif		
 	return p - buf;
 }
 
-#ifdef __SMP_PROF__
-
-extern unsigned int prof_multiplier[NR_CPUS];
-extern unsigned int prof_counter[NR_CPUS];
-
-int get_smp_prof_list(char *buf) {
-	int i,j, len = 0;
-	struct irqaction * action;
-	unsigned long sum_spins = 0;
-	unsigned long sum_spins_syscall = 0;
-	unsigned long sum_spins_sys_idle = 0;
-	unsigned long sum_smp_idle_count = 0;
-	unsigned long sum_local_timer_ticks = 0;
-
-	for (i=0;i<smp_num_cpus;i++) {
-		int cpunum = cpu_logical_map[i];
-		sum_spins+=smp_spins[cpunum];
-		sum_spins_syscall+=smp_spins_syscall[cpunum];
-		sum_spins_sys_idle+=smp_spins_sys_idle[cpunum];
-		sum_smp_idle_count+=smp_idle_count[cpunum];
-		sum_local_timer_ticks+=smp_local_timer_ticks[cpunum];
-	}
-
-	len += sprintf(buf+len,"CPUS: %10i \n", smp_num_cpus);
-	len += sprintf(buf+len,"            SUM ");
-	for (i=0;i<smp_num_cpus;i++)
-		len += sprintf(buf+len,"        P%1d ",cpu_logical_map[i]);
-	len += sprintf(buf+len,"\n");
-	for (i = 0 ; i < NR_IRQS ; i++) {
-		action = *(i + irq_action);
-		if (!action || !action->handler)
-			continue;
-		len += sprintf(buf+len, "%3d: %10d ",
-			i, kstat.interrupts[i]);
-		for (j=0;j<smp_num_cpus;j++)
-			len+=sprintf(buf+len, "%10d ",
-				int_count[cpu_logical_map[j]][i]);
-		len += sprintf(buf+len, "  %s", action->name);
-		for (action=action->next; action; action = action->next) {
-			len += sprintf(buf+len, ", %s", action->name);
-		}
-		len += sprintf(buf+len, "\n");
-	}
-	len+=sprintf(buf+len, "LCK: %10lu",
-		sum_spins);
-
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10lu",smp_spins[cpu_logical_map[i]]);
-
-	len +=sprintf(buf+len,"   spins from int\n");
-
-	len+=sprintf(buf+len, "LCK: %10lu",
-		sum_spins_syscall);
-
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10lu",smp_spins_syscall[cpu_logical_map[i]]);
-
-	len +=sprintf(buf+len,"   spins from syscall\n");
-
-	len+=sprintf(buf+len, "LCK: %10lu",
-		sum_spins_sys_idle);
-
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10lu",smp_spins_sys_idle[cpu_logical_map[i]]);
-
-	len +=sprintf(buf+len,"   spins from sysidle\n");
-	len+=sprintf(buf+len,"IDLE %10lu",sum_smp_idle_count);
-
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10lu",smp_idle_count[cpu_logical_map[i]]);
-
-	len +=sprintf(buf+len,"   idle ticks\n");
-
-	len+=sprintf(buf+len,"TICK %10lu",sum_local_timer_ticks);
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10lu",smp_local_timer_ticks[cpu_logical_map[i]]);
-
-	len +=sprintf(buf+len,"   local APIC timer ticks\n");
-
-	len+=sprintf(buf+len,"MULT:          ");
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10u",prof_multiplier[cpu_logical_map[i]]);
-	len +=sprintf(buf+len,"   profiling multiplier\n");
-
-	len+=sprintf(buf+len,"COUNT:         ");
-	for (i=0;i<smp_num_cpus;i++)
-		len+=sprintf(buf+len," %10u",prof_counter[cpu_logical_map[i]]);
-
-	len +=sprintf(buf+len,"   profiling counter\n");
-
-	len+=sprintf(buf+len, "IPI: %10lu   received\n",
-		ipi_count);
-
-	return len;
-}
-#endif 
-
-
 /*
  * Global interrupt locks for SMP. Allow interrupts to come in on any
  * CPU, yet make cli/sti act globally to protect critical regions..
@@ -387,6 +423,7 @@
 		 * and other fun things.
 		 */
 		atomic_sub(local_count, &global_irq_count);
+		global_irq_holder = NO_PROC_ID;
 		global_irq_lock = 0;
 
 		/*
@@ -394,6 +431,12 @@
 		 * their things before trying to get the lock again.
 		 */
 		for (;;) {
+			atomic_add(local_count, &global_irq_count);
+			__sti();
+			SYNC_OTHER_CORES(cpu);
+			__cli();
+			atomic_sub(local_count, &global_irq_count);
+			SYNC_OTHER_CORES(cpu);
 			check_smp_invalidate(cpu);
 			if (atomic_read(&global_irq_count))
 				continue;
@@ -403,6 +446,7 @@
 				break;
 		}
 		atomic_add(local_count, &global_irq_count);
+		global_irq_holder = cpu;
 	}
 }
 
@@ -413,16 +457,30 @@
  * are no interrupts that are executing on another
  * CPU we need to call this function.
  *
+ * We have to give pending interrupts a chance to
+ * arrive (ie. let them get until hard_irq_enter()),
+ * even if they are arriving to another CPU.
+ *
  * On UP this is a no-op.
+ *
+ * UPDATE: this method is not quite safe, as it wont
+ * catch irq handlers polling for the irq lock bit
+ * in __global_cli():get_interrupt_lock():wait_on_irq().
+ * drivers should rather use disable_irq()/enable_irq()
+ * and/or synchronize_one_irq()
  */
 void synchronize_irq(void)
 {
-	int cpu = smp_processor_id();
-	int local_count = local_irq_count[cpu];
+	int local_count = local_irq_count[smp_processor_id()];
 
-	/* Do we need to wait? */
 	if (local_count != atomic_read(&global_irq_count)) {
-		/* The stupid way to do this */
+		int i;
+
+		/* The very stupid way to do this */
+		for (i=0; i<NR_IRQS; i++) {
+			disable_irq(i);
+			enable_irq(i);
+		}
 		cli();
 		sti();
 	}
@@ -493,53 +551,225 @@
 	}
 }
 
+void synchronize_one_irq(unsigned int irq)
+{
+	int cpu = smp_processor_id(), owner;
+	int local_count = local_irq_count[cpu];
+	unsigned long flags;
+
+	__save_flags(flags);
+	__cli();
+	release_irqlock(cpu);
+	atomic_sub(local_count, &global_irq_count);
+
+repeat:	
+	spin_lock(&irq_controller_lock);
+	owner = irq_owner[irq];
+	spin_unlock(&irq_controller_lock);
+
+	if ((owner != NO_PROC_ID) && (owner != cpu)) {
+		atomic_add(local_count, &global_irq_count);
+		__sti();
+		SYNC_OTHER_CORES(cpu);
+		__cli();
+		atomic_sub(local_count, &global_irq_count);
+		SYNC_OTHER_CORES(cpu);
+		goto repeat;
+	}
+
+	if (!disabled_irq[irq])
+		printk("\n...WHAT??.#1...\n");
+
+	atomic_add(local_count, &global_irq_count);
+	__restore_flags(flags);
+}
+
 #endif
 
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-asmlinkage void do_IRQ(struct pt_regs regs)
+static void handle_IRQ_event(int irq, struct pt_regs * regs)
 {
-	int irq = regs.orig_eax & 0xff;
 	struct irqaction * action;
-	int status, cpu;
+	int status, cpu = smp_processor_id();
 
-	/* 
-	 * mask and ack quickly, we don't want the irq controller
-	 * thinking we're snobs just because some other CPU has
-	 * disabled global interrupts (we have already done the
-	 * INT_ACK cycles, it's too late to try to pretend to the
-	 * controller that we aren't taking the interrupt).
-	 */
-	mask_and_ack_irq(irq);
-
-	cpu = smp_processor_id();
-	irq_enter(cpu, irq);
-	kstat.interrupts[irq]++;
+again:
+#ifdef __SMP__
+	while (test_bit(0,&global_irq_lock)) mb();
+#endif
 
-	/* Return with this interrupt masked if no action */
+	kstat.interrupts[cpu][irq]++;
 	status = 0;
 	action = *(irq + irq_action);
+
 	if (action) {
+#if 0
 		if (!(action->flags & SA_INTERRUPT))
 			__sti();
+#endif
 
 		do {
 			status |= action->flags;
-			action->handler(irq, action->dev_id, &regs);
+			action->handler(irq, action->dev_id, regs);
 			action = action->next;
 		} while (action);
 		if (status & SA_SAMPLE_RANDOM)
 			add_interrupt_randomness(irq);
 		__cli();
-		spin_lock(&irq_controller_lock);
-		unmask_irq(irq);
+	}
+
+	spin_lock(&irq_controller_lock);
+
+#ifdef __SMP__
+	release_irqlock(cpu);
+#endif
+
+	if ((--irq_events[irq]) && (!disabled_irq[irq])) {
+		spin_unlock(&irq_controller_lock);
+		goto again;
+	}
+#ifdef __SMP__
+	/* FIXME: move this into hardirq.h */
+	irq_owner[irq] = NO_PROC_ID;
+#endif
+	hardirq_exit(cpu);
+
+	spin_unlock(&irq_controller_lock);
+}
+
+
+/*
+ * disable/enable_irq() wait for all irq contexts to finish
+ * executing. Also it's recursive.
+ */
+void disable_irq(unsigned int irq)
+{
+#ifdef __SMP__
+	int cpu = smp_processor_id();
+#endif
+	unsigned long f, flags;
+
+	save_flags(flags);
+	__save_flags(f);
+	__cli();
+	spin_lock(&irq_controller_lock);
+
+	disabled_irq[irq]++;
+
+#ifdef __SMP__
+	/*
+	 * We have to wait for all irq handlers belonging to this IRQ
+	 * vector to finish executing.
+	 */
+	if ((irq_owner[irq] == NO_PROC_ID) || (irq_owner[irq] == cpu) ||
+		(disabled_irq[irq] > 1)) {
+
 		spin_unlock(&irq_controller_lock);
+		__restore_flags(f);
+		restore_flags(flags);
+		if (disabled_irq[irq] > 100)
+			printk("disable_irq(%d), infinit recursion!\n",irq);
+		return;
+	}
+#endif
+
+	spin_unlock(&irq_controller_lock);
+
+#ifdef __SMP__
+	synchronize_one_irq(irq);
+#endif
+
+	__restore_flags(f);
+	restore_flags(flags);
+}
+
+void enable_irq(unsigned int irq)
+{
+	unsigned long flags;
+	int cpu = smp_processor_id();
+
+	spin_lock_irqsave(&irq_controller_lock,flags);
+
+	if (!disabled_irq[irq]) {
+		spin_unlock_irqrestore(&irq_controller_lock,flags);
+		printk("more enable_irq(%d)'s than disable_irq(%d)'s!!",irq,irq);
+		return;
+	}
+
+	disabled_irq[irq]--;
+
+#ifndef __SMP__
+	if (disabled_irq[irq]) {
+		spin_unlock_irqrestore(&irq_controller_lock,flags);
+		return;
+	}
+#else
+	if (disabled_irq[irq] || (irq_owner[irq] != NO_PROC_ID)) {
+		spin_unlock_irqrestore(&irq_controller_lock,flags);
+		return;
+	}
+#endif
+
+	/*
+	 * Nobody is executing this irq handler currently, lets check
+	 * wether we have outstanding events to be handled.
+	 */
+
+	if (irq_events[irq]) {
+		struct pt_regs regs;
+
+#ifdef __SMP__
+		irq_owner[irq] = cpu;
+#endif
+		hardirq_enter(cpu);
+#ifdef __SMP__
+		release_irqlock(cpu);
+#endif
+		spin_unlock(&irq_controller_lock);
+
+		handle_IRQ_event(irq,&regs);
+		__restore_flags(flags);
+		return;
 	}
+	spin_unlock_irqrestore(&irq_controller_lock,flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ *
+ * the biggest change on SMP is the fact that we no more mask
+ * interrupts in hardware, please believe me, this is unavoidable,
+ * the hardware is largely message-oriented, i tried to force our
+ * state-driven irq handling scheme onto the IO-APIC, but no avail.
+ *
+ * so we soft-disable interrupts via 'event counters', the first 'incl'
+ * will do the IRQ handling. This also has the nice side effect of increased
+ * overlapping ... i saw no driver problem so far.
+ */
+asmlinkage void do_IRQ(struct pt_regs regs)
+{
+	/* 
+	 * We ack quickly, we don't want the irq controller
+	 * thinking we're snobs just because some other CPU has
+	 * disabled global interrupts (we have already done the
+	 * INT_ACK cycles, it's too late to try to pretend to the
+	 * controller that we aren't taking the interrupt).
+	 *
+	 * 0 return value means that this irq is already being
+	 * handled by some other CPU. (or is disabled)
+	 */
+	int irq = regs.orig_eax & 0xff;
+
+/*
+	printk("<%d>",irq);
+ */
+	if (!ack_irq(irq))
+		return;
+
+	handle_IRQ_event(irq,&regs);
+
+	unmask_irq(irq);
 
-	irq_exit(cpu, irq);
 	/*
 	 * This should be conditional: we should really get
 	 * a return code from the irq handler to tell us
@@ -581,6 +811,15 @@
 
 	if (!shared) {
 		spin_lock(&irq_controller_lock);
+		if (IO_APIC_IRQ(irq)) {
+			/*
+			 * First disable it in the 8259A:
+			 */
+			cached_irq_mask |= 1 << irq;
+			if (irq < 16)
+				set_8259A_irq_mask(irq);
+			setup_IO_APIC_irq(irq);
+		}
 		unmask_irq(irq);
 		spin_unlock(&irq_controller_lock);
 	}
@@ -597,12 +836,13 @@
 	int retval;
 	struct irqaction * action;
 
-	if (irq > 15)
+	if (irq >= NR_IRQS)
 		return -EINVAL;
 	if (!handler)
 		return -EINVAL;
 
-	action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+	action = (struct irqaction *)
+			kmalloc(sizeof(struct irqaction), GFP_KERNEL);
 	if (!action)
 		return -ENOMEM;
 
@@ -625,7 +865,7 @@
 	struct irqaction * action, **p;
 	unsigned long flags;
 
-	if (irq > 15) {
+	if (irq >= NR_IRQS) {
 		printk("Trying to free IRQ%d\n",irq);
 		return;
 	}
@@ -644,41 +884,100 @@
 	printk("Trying to free free IRQ%d\n",irq);
 }
 
+/*
+ * probing is always single threaded [FIXME: is this true?]
+ */
+static unsigned int probe_irqs[NR_CPUS][NR_IRQS];
+
 unsigned long probe_irq_on (void)
 {
-	unsigned int i, irqs = 0;
+	unsigned int i, j, irqs = 0;
 	unsigned long delay;
 
-	/* first, enable any unassigned irqs */
-	for (i = 15; i > 0; i--) {
+	/*
+	 * save current irq counts
+	 */
+	memcpy(probe_irqs,kstat.interrupts,NR_CPUS*NR_IRQS*sizeof(int));
+
+	/*
+	 * first, enable any unassigned irqs
+	 */
+	for (i = NR_IRQS-1; i > 0; i--) {
 		if (!irq_action[i]) {
-			enable_irq(i);
+			spin_lock(&irq_controller_lock);
+			unmask_irq(i);
 			irqs |= (1 << i);
+			spin_unlock(&irq_controller_lock);
 		}
 	}
 
-	/* wait for spurious interrupts to mask themselves out again */
+	/*
+	 * wait for spurious interrupts to increase counters
+	 */
 	for (delay = jiffies + HZ/10; delay > jiffies; )
-		/* about 100ms delay */;
+		/* about 100ms delay */ synchronize_irq();
+
+	/*
+	 * now filter out any obviously spurious interrupts
+	 */
+	for (i=0; i<NR_IRQS; i++)
+		for (j=0; j<NR_CPUS; j++)
+			if (kstat.interrupts[j][i] != probe_irqs[j][i])
+				irqs &= ~(i<<1);
 
-	/* now filter out any obviously spurious interrupts */
-	return irqs & ~cached_irq_mask;
+	return irqs;
 }
 
 int probe_irq_off (unsigned long irqs)
 {
-	unsigned int i;
+	int i,j, irq_found = -1;
 
-#ifdef DEBUG
-	printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, cached_irq_mask);
-#endif
-	irqs &= cached_irq_mask;
-	if (!irqs)
-		return 0;
-	i = ffz(~irqs);
-	if (irqs != (irqs & (1 << i)))
-		i = -i;
-	return i;
+	for (i=0; i<NR_IRQS; i++) {
+		int sum = 0;
+		for (j=0; j<NR_CPUS; j++) {
+			sum += kstat.interrupts[j][i];
+			sum -= probe_irqs[j][i];
+		}
+		if (sum && (irqs & (i<<1))) {
+			if (irq_found != -1) {
+				irq_found = -irq_found;
+				goto out;
+			} else
+				irq_found = i;
+		}
+	}
+	if (irq_found == -1)
+		irq_found = 0;
+out:
+	return irq_found;
+}
+
+void init_IO_APIC_traps(void)
+{
+	int i;
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level
+	 *
+	 * also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kindof importantish ;)
+	 */
+	for (i = 0; i < NR_IRQS ; i++)
+		if (IO_APIC_GATE_OFFSET+(i<<3) <= 0xfe)  /* HACK */ {
+			if (IO_APIC_IRQ(i)) {
+				/*
+				 * First disable it in the 8259A:
+				 */
+				cached_irq_mask |= 1 << i;
+				if (i < 16)
+					set_8259A_irq_mask(i);
+				setup_IO_APIC_irq(i);
+			}
+		}
 }
 
 __initfunc(void init_IRQ(void))
@@ -690,18 +989,25 @@
 	outb_p(LATCH & 0xff , 0x40);	/* LSB */
 	outb(LATCH >> 8 , 0x40);	/* MSB */
 
-	for (i = 0; i < NR_IRQS ; i++)
+	printk("INIT IRQ\n");
+	for (i=0; i<NR_IRQS; i++) {
+		irq_events[i] = 0;
+#ifdef __SMP__
+		irq_owner[i] = NO_PROC_ID;
+#endif
+		disabled_irq[i] = 0;
+	}
+	/*
+	 * 16 old-style INTA-cycle interrupt gates:
+	 */
+	for (i = 0; i < 16; i++)
 		set_intr_gate(0x20+i,interrupt[i]);
 
 #ifdef __SMP__	
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level
-	 */
+
+	for (i = 0; i < NR_IRQS ; i++)
+		if (IO_APIC_GATE_OFFSET+(i<<3) <= 0xfe)  /* hack -- mingo */
+			set_intr_gate(IO_APIC_GATE_OFFSET+(i<<3),interrupt[i]);
 
 	/*
 	 * The reschedule interrupt slowly changes it's functionality,
@@ -712,21 +1018,23 @@
 	 * [ It has to be here .. it doesn't work if you put
 	 *   it down the bottom - assembler explodes 8) ]
 	 */
-	/* IRQ '16' (trap 0x30) - IPI for rescheduling */
-	set_intr_gate(0x20+i, reschedule_interrupt);
 
+	/* IPI for rescheduling */
+	set_intr_gate(0x30, reschedule_interrupt);
 
-	/* IRQ '17' (trap 0x31) - IPI for invalidation */
-	set_intr_gate(0x21+i, invalidate_interrupt);
+	/* IPI for invalidation */
+	set_intr_gate(0x31, invalidate_interrupt);
 
-	/* IRQ '18' (trap 0x40) - IPI for CPU halt */
-	set_intr_gate(0x30+i, stop_cpu_interrupt);
+	/* IPI for CPU halt */
+	set_intr_gate(0x40, stop_cpu_interrupt);
+
+	/* self generated IPI for local APIC timer */
+	set_intr_gate(0x41, apic_timer_interrupt);
 
-	/* IRQ '19' (trap 0x41) - self generated IPI for local APIC timer */
-	set_intr_gate(0x31+i, apic_timer_interrupt);
 #endif	
 	request_region(0x20,0x20,"pic1");
 	request_region(0xa0,0x20,"pic2");
 	setup_x86_irq(2, &irq2);
 	setup_x86_irq(13, &irq13);
 } 
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov