From: Christoph Lameter <clameter@sgi.com>

Changelog
        * Make cmpxchg and cmpxchg8b generally available on the i386
	  platform.
        * Provide emulation of cmpxchg suitable for uniprocessor if
	  build and run on 386.
        * Provide emulation of cmpxchg8b suitable for uniprocessor systems
	  if build and run on 386 or 486.
	* Provide an inline function to atomically get a 64 bit value via
	  cmpxchg8b in an SMP system (courtesy of Nick Piggin)
	  (important for i386 PAE mode and other places where atomic 64 bit
	  operations are useful)

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/i386/Kconfig            |    5 +
 25-akpm/arch/i386/kernel/cpu/intel.c |   99 +++++++++++++++++++++++++++++++++++
 25-akpm/include/asm-i386/system.h    |   89 +++++++++++++++++++++++++++++--
 3 files changed, 187 insertions(+), 6 deletions(-)

diff -puN arch/i386/Kconfig~page-fault-scalability-patch-v11-universal-cmpxchg-for-i386 arch/i386/Kconfig
--- 25/arch/i386/Kconfig~page-fault-scalability-patch-v11-universal-cmpxchg-for-i386	Thu Dec  2 12:39:35 2004
+++ 25-akpm/arch/i386/Kconfig	Thu Dec  2 12:39:35 2004
@@ -351,6 +351,11 @@ config X86_CMPXCHG
 	depends on !M386
 	default y
 
+config X86_CMPXCHG8B
+	bool
+	depends on !M386 && !M486
+	default y
+
 config X86_XADD
 	bool
 	depends on !M386
diff -puN arch/i386/kernel/cpu/intel.c~page-fault-scalability-patch-v11-universal-cmpxchg-for-i386 arch/i386/kernel/cpu/intel.c
--- 25/arch/i386/kernel/cpu/intel.c~page-fault-scalability-patch-v11-universal-cmpxchg-for-i386	Thu Dec  2 12:39:35 2004
+++ 25-akpm/arch/i386/kernel/cpu/intel.c	Thu Dec  2 12:39:35 2004
@@ -6,6 +6,7 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/thread_info.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -287,5 +288,103 @@ __init int intel_cpu_init(void)
 	return 0;
 }
 
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+	u8 prev;
+	unsigned long flags;
+	/*
+	 * Check if the kernel was compiled for an old cpu but the
+	 * currently running cpu can do cmpxchg after all
+	 * All CPUs except 386 support CMPXCHG
+	 */
+	if (cpu_data->x86 > 3)
+		return __cmpxchg(ptr, old, new, sizeof(u8));
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u8 *)ptr;
+	if (prev == old)
+		*(u8 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+	u16 prev;
+	unsigned long flags;
+	/*
+	 * Check if the kernel was compiled for an old cpu but the
+	 * currently running cpu can do cmpxchg after all
+	 * All CPUs except 386 support CMPXCHG
+	 */
+	if (cpu_data->x86 > 3)
+		return __cmpxchg(ptr, old, new, sizeof(u16));
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u16 *)ptr;
+	if (prev == old)
+		*(u16 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+	u32 prev;
+	unsigned long flags;
+	/*
+	 * Check if the kernel was compiled for an old cpu but the
+	 * currently running cpu can do cmpxchg after all
+	 * All CPUs except 386 support CMPXCHG
+	 */
+	if (cpu_data->x86 > 3)
+		return __cmpxchg(ptr, old, new, sizeof(u32));
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u32 *)ptr;
+	if (prev == old)
+		*(u32 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
+#ifndef CONFIG_X86_CMPXCHG8B
+unsigned long long cmpxchg8b_486(volatile unsigned long long *ptr,
+	       unsigned long long old, unsigned long long newv)
+{
+	unsigned long long prev;
+	unsigned long flags;
+
+	/*
+	 * Check if the kernel was compiled for an old cpu but
+	 * we are running really on a cpu capable of cmpxchg8b
+	 */
+
+	if (cpu_has(cpu_data, X86_FEATURE_CX8))
+		return __cmpxchg8b(ptr, old, newv);
+
+	/* Poor mans cmpxchg8b for 386 and 486. Not suitable for SMP */
+	local_irq_save(flags);
+	prev = *ptr;
+	if (prev == old)
+		*ptr = newv;
+	local_irq_restore(flags);
+	return prev;
+}
+
+EXPORT_SYMBOL(cmpxchg8b_486);
+#endif
+
 // arch_initcall(intel_cpu_init);
 
diff -puN include/asm-i386/system.h~page-fault-scalability-patch-v11-universal-cmpxchg-for-i386 include/asm-i386/system.h
--- 25/include/asm-i386/system.h~page-fault-scalability-patch-v11-universal-cmpxchg-for-i386	Thu Dec  2 12:39:35 2004
+++ 25-akpm/include/asm-i386/system.h	Thu Dec  2 12:39:35 2004
@@ -149,6 +149,9 @@ struct __xchg_dummy { unsigned long a[10
 #define __xg(x) ((struct __xchg_dummy *)(x))
 
 
+#define ll_low(x)	*(((unsigned int*)&(x))+0)
+#define ll_high(x)	*(((unsigned int*)&(x))+1)
+
 /*
  * The semantics of XCHGCMP8B are a bit strange, this is why
  * there is a loop and the loading of %%eax and %%edx has to
@@ -184,8 +187,6 @@ static inline void __set_64bit_constant 
 {
 	__set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
 }
-#define ll_low(x)	*(((unsigned int*)&(x))+0)
-#define ll_high(x)	*(((unsigned int*)&(x))+1)
 
 static inline void __set_64bit_var (unsigned long long *ptr,
 			 unsigned long long value)
@@ -203,6 +204,26 @@ static inline void __set_64bit_var (unsi
  __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
  __set_64bit(ptr, ll_low(value), ll_high(value)) )
 
+static inline unsigned long long __get_64bit(unsigned long long * ptr)
+{
+	unsigned long long ret;
+	__asm__ __volatile__ (
+		"\n1:\t"
+		"movl (%1), %%eax\n\t"
+		"movl 4(%1), %%edx\n\t"
+		"movl %%eax, %%ebx\n\t"
+		"movl %%edx, %%ecx\n\t"
+		LOCK_PREFIX "cmpxchg8b (%1)\n\t"
+		"jnz 1b"
+		:	"=A"(ret)
+		:	"D"(ptr)
+		:	"ebx", "ecx", "memory");
+	return ret;
+}
+
+#define get_64bit(ptr) __get_64bit(ptr)
+
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
  * Note 2: xchg has side effect, so that attribute volatile is necessary,
@@ -240,7 +261,41 @@ static inline unsigned long __xchg(unsig
  */
 
 #ifdef CONFIG_X86_CMPXCHG
+
 #define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \
+					(unsigned long)(n), sizeof(*(ptr))))
+
+#else
+
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+		return cmpxchg_386_u8(ptr, old, new);
+	case 2:
+		return cmpxchg_386_u16(ptr, old, new);
+	case 4:
+		return cmpxchg_386_u32(ptr, old, new);
+	}
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))cmpxchg_386((ptr), (unsigned long)(o), \
+					(unsigned long)(n), sizeof(*(ptr))))
 #endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -270,10 +325,32 @@ static inline unsigned long __cmpxchg(vo
 	return old;
 }
 
-#define cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
-    
+static inline unsigned long long __cmpxchg8b(volatile unsigned long long *ptr,
+		unsigned long long old, unsigned long long newv)
+{
+	unsigned long long prev;
+	__asm__ __volatile__(
+	LOCK_PREFIX "cmpxchg8b (%4)"
+		: "=A" (prev)
+		: "0" (old), "c" ((unsigned long)(newv >> 32)),
+		  "b" ((unsigned long)(newv & 0xffffffffULL)), "D" (ptr)
+		: "memory");
+	return prev;
+}
+
+#ifdef CONFIG_X86_CMPXCHG8B
+#define cmpxchg8b __cmpxchg8b
+#else
+/*
+ * Building a kernel capable of running on 80486 and 80386. Both
+ * do not support cmpxchg8b. Call a function that emulates the
+ * instruction if necessary.
+ */
+extern unsigned long long cmpxchg8b_486(volatile unsigned long long *,
+		unsigned long long, unsigned long long);
+#define cmpxchg8b cmpxchg8b_486
+#endif
+
 #ifdef __KERNEL__
 struct alt_instr { 
 	__u8 *instr; 		/* original instruction */
_