From: Linus Torvalds <torvalds@osdl.org>

So here's v3 (which also removes the now stale __put_user_check() macro).

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/i386/kernel/i386_ksyms.c |    5 +
 25-akpm/arch/i386/lib/Makefile        |    2 
 25-akpm/arch/i386/lib/putuser.S       |   87 ++++++++++++++++++++++++++++++++++
 25-akpm/include/asm-i386/uaccess.h    |   40 ++++++++++-----
 4 files changed, 120 insertions(+), 14 deletions(-)

diff -puN arch/i386/kernel/i386_ksyms.c~out-of-line-x86-put_user-implementation arch/i386/kernel/i386_ksyms.c
--- 25/arch/i386/kernel/i386_ksyms.c~out-of-line-x86-put_user-implementation	2005-02-08 18:28:44.000000000 -0800
+++ 25-akpm/arch/i386/kernel/i386_ksyms.c	2005-02-08 18:28:44.000000000 -0800
@@ -97,6 +97,11 @@ EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
diff -puN arch/i386/lib/Makefile~out-of-line-x86-put_user-implementation arch/i386/lib/Makefile
--- 25/arch/i386/lib/Makefile~out-of-line-x86-put_user-implementation	2005-02-08 18:28:44.000000000 -0800
+++ 25-akpm/arch/i386/lib/Makefile	2005-02-08 18:28:44.000000000 -0800
@@ -3,7 +3,7 @@
 #
 
 
-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
 	bitops.o
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -puN /dev/null arch/i386/lib/putuser.S
--- /dev/null	2003-09-15 06:40:47.000000000 -0700
+++ 25-akpm/arch/i386/lib/putuser.S	2005-02-08 18:28:44.000000000 -0800
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs:	%eax[:%edx] contains the data
+ *		%ecx contains the address
+ *
+ * Outputs:	%eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER	pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT	popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+	ENTER
+	cmpl TI_addr_limit(%ebx),%ecx
+	jae bad_put_user
+1:	movb %al,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $1,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+2:	movw %ax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $3,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+3:	movl %eax,(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+	ENTER
+	movl TI_addr_limit(%ebx),%ebx
+	subl $7,%ebx
+	cmpl %ebx,%ecx
+	jae bad_put_user
+4:	movl %eax,(%ecx)
+5:	movl %edx,4(%ecx)
+	xorl %eax,%eax
+	EXIT
+
+bad_put_user:
+	movl $-14,%eax
+	EXIT
+
+.section __ex_table,"a"
+	.long 1b,bad_put_user
+	.long 2b,bad_put_user
+	.long 3b,bad_put_user
+	.long 4b,bad_put_user
+	.long 5b,bad_put_user
+.previous
diff -puN include/asm-i386/uaccess.h~out-of-line-x86-put_user-implementation include/asm-i386/uaccess.h
--- 25/include/asm-i386/uaccess.h~out-of-line-x86-put_user-implementation	2005-02-08 18:28:44.000000000 -0800
+++ 25-akpm/include/asm-i386/uaccess.h	2005-02-08 18:28:44.000000000 -0800
@@ -185,6 +185,21 @@ extern void __get_user_4(void);
 
 extern void __put_user_bad(void);
 
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
 /**
  * put_user: - Write a simple value into user space.
  * @x:   Value to copy to user space.
@@ -201,9 +216,18 @@ extern void __put_user_bad(void);
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr)						\
+({	int __ret_pu;						\
+	__chk_user_ptr(ptr);					\
+	switch(sizeof(*(ptr))) {				\
+	case 1: __put_user_1(x, ptr); break;			\
+	case 2: __put_user_2(x, ptr); break;			\
+	case 4: __put_user_4(x, ptr); break;			\
+	case 8: __put_user_8(x, ptr); break;			\
+	default:__put_user_X(x, ptr); break;			\
+	}							\
+	__ret_pu;						\
+})
 
 /**
  * __get_user: - Get a simple variable from user space, with less checking.
@@ -259,16 +283,6 @@ extern void __put_user_bad(void);
 })
 
 
-#define __put_user_check(x,ptr,size)					\
-({									\
-	long __pu_err = -EFAULT;					\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	might_sleep();						\
-	if (access_ok(VERIFY_WRITE,__pu_addr,size))			\
-		__put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT);	\
-	__pu_err;							\
-})							
-
 #define __put_user_u64(x, addr, err)				\
 	__asm__ __volatile__(					\
 		"1:	movl %%eax,0(%2)\n"			\
_