patch-2.1.9 linux/include/asm-sparc/checksum.h

Next file: linux/include/asm-sparc/cprefix.h
Previous file: linux/include/asm-sparc/cache.h
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.8/linux/include/asm-sparc/checksum.h linux/include/asm-sparc/checksum.h
@@ -1,4 +1,4 @@
-/* $Id: checksum.h,v 1.13 1996/04/18 03:30:19 davem Exp $ */
+/* $Id: checksum.h,v 1.22 1996/11/10 21:28:25 davem Exp $ */
 #ifndef __SPARC_CHECKSUM_H
 #define __SPARC_CHECKSUM_H
 
@@ -7,6 +7,7 @@
  *  Copyright(C) 1995 Linus Torvalds
  *  Copyright(C) 1995 Miguel de Icaza
  *  Copyright(C) 1996 David S. Miller
+ *  Copyright(C) 1996 Eddie C. Dost
  *
  * derived from:
  *	Alpha checksum c-code
@@ -14,8 +15,7 @@
  *      RFC1071 Computing the Internet Checksum
  */
 
-/*
- * computes the checksum of a memory block at buff, length len,
+/* computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
  *
  * returns a 32-bit number suitable for feeding into itself
@@ -28,8 +28,7 @@
  */
 extern unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum);
 
-/*
- * the same as csum_partial, but copies from fs:src while it
+/* the same as csum_partial, but copies from fs:src while it
  * checksums
  *
  * here even more important to align src and dst on a 32-bit (or even
@@ -40,85 +39,81 @@
 #define csum_partial_copy_fromuser(s, d, l, w)  \
                        csum_partial_copy((char *) (s), (d), (l), (w))
 
-/* ihl is always 5 or greater, almost always is 5, iph is always word
- * aligned but can fail to be dword aligned very often.
+/* ihl is always 5 or greater, almost always is 5, and iph is word aligned
+ * the majority of the time.
  */
-extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
+extern __inline__ unsigned short ip_fast_csum(__const__ unsigned char *iph,
+					      unsigned int ihl)
 {
-	unsigned long tmp1, tmp2;
 	unsigned short sum;
 
-	__asm__ __volatile__("
-		ld	[%1 + 0x00], %0
-		ld	[%1 + 0x04], %3
-		sub	%2, 4, %2
-		addcc	%3, %0, %0
-		ld	[%1 + 0x08], %4
-		addxcc	%4, %0, %0
-		ld	[%1 + 0x0c], %3
-		addxcc	%3, %0, %0
-		ld	[%1 + 0x10], %4
-		addx	%0, %%g0, %0
-	1:
-		addcc	%4, %0, %0
-		add	%1, 4, %1
-		addxcc	%0, %%g0, %0
-		subcc	%2, 1, %2
-		be,a	2f
-		 sll	%0, 16, %3
-
-		b	1b
-		 ld	[%1 + 0x10], %4
-	2:
-		addcc	%0, %3, %3
-		srl	%3, 16, %0
-		addx	%0, %%g0, %0
-		xnor	%%g0, %0, %0
-	" : "=r" (sum), "=&r" (iph), "=&r" (ihl), "=r" (tmp1), "=r" (tmp2)
-	  : "1" (iph), "2" (ihl));
-
+	/* Note: We must read %2 before we touch %0 for the first time,
+	 *       because GCC can legitimately use the same register for
+	 *       both operands.
+	 */
+	__asm__ __volatile__("sub\t%2, 4, %%g4\n\t"
+			     "ld\t[%1 + 0x00], %0\n\t"
+			     "ld\t[%1 + 0x04], %%g2\n\t"
+			     "ld\t[%1 + 0x08], %%g3\n\t"
+			     "addcc\t%%g2, %0, %0\n\t"
+			     "addxcc\t%%g3, %0, %0\n\t"
+			     "ld\t[%1 + 0x0c], %%g2\n\t"
+			     "ld\t[%1 + 0x10], %%g3\n\t"
+			     "addxcc\t%%g2, %0, %0\n\t"
+			     "addx\t%0, %%g0, %0\n"
+			     "1:\taddcc\t%%g3, %0, %0\n\t"
+			     "add\t%1, 4, %1\n\t"
+			     "addxcc\t%0, %%g0, %0\n\t"
+			     "subcc\t%%g4, 1, %%g4\n\t"
+			     "be,a\t2f\n\t"
+			     "sll\t%0, 16, %%g2\n\t"
+			     "b\t1b\n\t"
+			     "ld\t[%1 + 0x10], %%g3\n"
+			     "2:\taddcc\t%0, %%g2, %%g2\n\t"
+			     "srl\t%%g2, 16, %0\n\t"
+			     "addx\t%0, %%g0, %0\n\t"
+			     "xnor\t%%g0, %0, %0"
+			     : "=r" (sum), "=&r" (iph)
+			     : "r" (ihl), "1" (iph)
+			     : "g2", "g3", "g4");
 	return sum;
 }
 
-/*
- * computes the checksum of the TCP/UDP pseudo-header
+/* computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented
  */
-extern inline unsigned short csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
-					       unsigned int len, unsigned short proto,
-					       unsigned int sum)
+extern __inline__ unsigned short csum_tcpudp_magic(unsigned long saddr,
+						   unsigned long daddr,
+						   unsigned int len,
+						   unsigned short proto,
+						   unsigned int sum)
 {
-	__asm__ __volatile__("
-		addcc	%1, %0, %0
-		addxcc	%2, %0, %0
-		addxcc	%3, %0, %0
-		addx	%0, %%g0, %0
-		sll	%0, 16, %1
-		addcc	%1, %0, %0
-		srl	%0, 16, %0
-		addx	%0, %%g0, %0
-		xnor	%%g0, %0, %0
-	" : "=r" (sum), "=r" (saddr)
-	  : "r" (daddr), "r" ((proto<<16)+len), "0" (sum), "1" (saddr));
-
+	__asm__ __volatile__("addcc\t%1, %0, %0\n\t"
+			     "addxcc\t%2, %0, %0\n\t"
+			     "addxcc\t%3, %0, %0\n\t"
+			     "addx\t%0, %%g0, %0\n\t"
+			     "sll\t%0, 16, %1\n\t"
+			     "addcc\t%1, %0, %0\n\t"
+			     "srl\t%0, 16, %0\n\t"
+			     "addx\t%0, %%g0, %0\n\t"
+			     "xnor\t%%g0, %0, %0"
+			     : "=r" (sum), "=r" (saddr)
+			     : "r" (daddr), "r" ((proto<<16)+len), "0" (sum),
+			     "1" (saddr));
 	return sum;
 }
 
-/*
- *	Fold a partial checksum without adding pseudo headers
- */
-extern inline unsigned int csum_fold(unsigned int sum)
+/* Fold a partial checksum without adding pseudo headers. */
+extern __inline__ unsigned int csum_fold(unsigned int sum)
 {
 	unsigned int tmp;
 
-	__asm__ __volatile__("
-		addcc	%0, %1, %1
-		srl	%1, 16, %1
-		addx	%1, %%g0, %1
-		xnor	%%g0, %1, %0
-	" : "=&r" (sum), "=r" (tmp)
-	  : "0" (sum), "1" (sum<<16));
-
+	__asm__ __volatile__("addcc\t%0, %1, %1\n\t"
+			     "srl\t%1, 16, %1\n\t"
+			     "addx\t%1, %%g0, %1\n\t"
+			     "xnor\t%%g0, %1, %0"
+			     : "=&r" (sum), "=r" (tmp)
+			     : "0" (sum), "1" (sum<<16));
 	return sum;
 }
 
@@ -159,11 +154,8 @@
 	return csum_fold(sum);
 }
 
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
+/* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
+extern __inline__ unsigned short ip_compute_csum(unsigned char * buff, int len)
 {
 	return csum_fold(csum_partial(buff, len, 0));
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov