From: Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua>

This patch is needed for next sha512 optimization patch.

It adds these to linux/bitops.h:

extern inline u32 rol32(u32 x, int num)
extern inline u32 ror32(u32 x, int num)
extern inline u64 rol64(u64 x, int num)
extern inline u64 ror64(u64 x, int num)

Generic C version is provided.  Arches may override it by optimized ones. 
64bit i386 asm version is provided.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/include/asm-i386/bitops.h |  125 +++++++++++++++++++++++++++++++++++++-
 25-akpm/include/linux/bitops.h    |   32 +++++++++
 2 files changed, 155 insertions(+), 2 deletions(-)

diff -puN include/asm-i386/bitops.h~add-rotate-left-right-ops-to-bitopsh include/asm-i386/bitops.h
--- 25/include/asm-i386/bitops.h~add-rotate-left-right-ops-to-bitopsh	2004-10-03 16:33:24.617802944 -0700
+++ 25-akpm/include/asm-i386/bitops.h	2004-10-03 16:33:24.624801880 -0700
@@ -431,9 +431,130 @@ static inline int ffs(int x)
 #define hweight16(x) generic_hweight16(x)
 #define hweight8(x) generic_hweight8(x)
 
-#endif /* __KERNEL__ */
+/*
+ * 64bit rotations
+ * (gcc3 seems to be clever enough to do 32bit ones just fine)
+ *
+ * Why "i" and "I" constraints do not work? gcc says:
+ * "warning: asm operand 2 probably doesn't match constraints"
+ * "error: impossible constraint in 'asm'"
+ * Will use "Ic" for now. If gcc will fail to do const propagation
+ * and will try to stuff constant into ecx, shld %3,... will expand
+ * to shld %ecx,... and assembler will moan.
+ * Do not 'fix' by changing to shld %b3,...
+ *
+ * Have to stick to edx,eax pair only because
+ * gcc has limited support for 64bit asm parameters
+ */
+#define constant_rol64(v,c) \
+	({						\
+	u64 vv = (v);					\
+	if(!(c&63)) {					\
+	} else if((c&63)==1) {				\
+		asm (					\
+		"	shldl	$1,%%edx,%%eax	\n"	\
+		"	rcll	$1,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	} else if((c&63)==63) {				\
+		asm (					\
+		"	shrdl	$1,%%edx,%%eax	\n"	\
+		"	rcrl	$1,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	} else if((c&63)<32) {				\
+		asm (					\
+		"	shldl	%3,%%edx,%%eax	\n"	\
+		"	shldl	%3,%2,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv),				\
+		  "r" (vv),				\
+		  "Ic" (c&63)				\
+		);					\
+	} else if((c&63)>32) {				\
+		asm (					\
+		"	shrdl	%3,%%edx,%%eax	\n"	\
+		"	shrdl	%3,%2,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv),				\
+		  "r" (vv),				\
+		  "Ic" (64-(c&63))			\
+		);					\
+	} else /* (c&63)==32 */ {			\
+		asm (					\
+		"	xchgl	%%edx,%%eax	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	}						\
+	vv;						\
+	})
+#define constant_ror64(v,c) \
+	({						\
+	u64 vv = (v);					\
+	if(!(c&63)) {					\
+	} else if((c&63)==1) {				\
+		asm (					\
+		"	shrdl	$1,%%edx,%%eax	\n"	\
+		"	rcrl	$1,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	} else if((c&63)==63) {				\
+		asm (					\
+		"	shldl	$1,%%edx,%%eax	\n"	\
+		"	rcll	$1,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	} else if((c&63)<32) {				\
+		asm (					\
+		"	shrdl	%3,%%edx,%%eax	\n"	\
+		"	shrdl	%3,%2,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv),				\
+		  "r" (vv),				\
+		  "Ic" (c&63)				\
+		);					\
+	} else if((c&63)>32) {				\
+		asm (					\
+		"	shldl	%3,%%edx,%%eax	\n"	\
+		"	shldl	%3,%2,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv),				\
+		  "r" (vv),				\
+		  "Ic" (64-(c&63))			\
+		);					\
+	} else /* (c&63)==32 */ {			\
+		asm (					\
+		"	xchgl	%%edx,%%eax	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	}						\
+	vv;						\
+	})
+/*
+ * Unfortunately 64bit rotations with non-constant count
+ * have issues with cnt>=32. Using C code instead
+ */
+static inline u64 rol64(u64 x,int num) {
+	if(__builtin_constant_p(num))
+		return constant_rol64(x,num);
+	/* Hmmm... shall we do cnt&=63 here? */
+	return ((x<<num) | (x>>(64-num)));
+}
+static inline u64 ror64(u64 x,int num) {
+	if(__builtin_constant_p(num))
+		return constant_ror64(x,num);
+	return ((x>>num) | (x<<(64-num)));
+}
+
+#define ARCH_HAS_ROL64
+#define ARCH_HAS_ROR64
 
-#ifdef __KERNEL__
 
 #define ext2_set_bit(nr,addr) \
 	__test_and_set_bit((nr),(unsigned long*)addr)
diff -puN include/linux/bitops.h~add-rotate-left-right-ops-to-bitopsh include/linux/bitops.h
--- 25/include/linux/bitops.h~add-rotate-left-right-ops-to-bitopsh	2004-10-03 16:33:24.619802640 -0700
+++ 25-akpm/include/linux/bitops.h	2004-10-03 16:33:24.624801880 -0700
@@ -4,6 +4,38 @@
 #include <asm/bitops.h>
 
 /*
+ * bit rotations
+ */
+
+#ifndef ARCH_HAS_ROL32
+static inline u32 rol32(u32 x, int num)
+{
+	return (x << num) | (x >> (32 - num));
+}
+#endif
+
+#ifndef ARCH_HAS_ROR32
+static inline u32 ror32(u32 x, int num)
+{
+	return (x >> num) | (x << (32 - num));
+}
+#endif
+
+#ifndef ARCH_HAS_ROL64
+static inline u64 rol64(u64 x, int num)
+{
+	return (x << num) | (x >> (64 - num));
+}
+#endif
+
+#ifndef ARCH_HAS_ROR64
+static inline u64 ror64(u64 x, int num)
+{
+	return (x >> num) | (x << (64 - num));
+}
+#endif
+
+/*
  * ffs: find first bit set. This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
  * differs in spirit from the above ffz (man ffs).
_