patch-2.4.21 linux-2.4.21/arch/x86_64/lib/csum-partial.c
Next file: linux-2.4.21/arch/x86_64/lib/csum-wrappers.c
Previous file: linux-2.4.21/arch/x86_64/lib/csum-copy.S
Back to the patch index
Back to the overall index
- Lines: 149
- Date:
2003-06-13 07:51:32.000000000 -0700
- Orig file:
linux-2.4.20/arch/x86_64/lib/csum-partial.c
- Orig date:
2002-11-28 15:53:12.000000000 -0800
diff -urN linux-2.4.20/arch/x86_64/lib/csum-partial.c linux-2.4.21/arch/x86_64/lib/csum-partial.c
@@ -7,35 +7,39 @@
#include <linux/compiler.h>
#include <linux/module.h>
+#include <asm/checksum.h>
-/* Better way for this sought */
-static inline unsigned short from64to16(unsigned long x)
+#define __force_inline inline __attribute__((always_inline))
+
+static inline unsigned short from32to16(unsigned a)
{
- /* add up 32-bit words for 33 bits */
- x = (x & 0xffffffff) + (x >> 32);
- /* add up 16-bit and 17-bit words for 17+c bits */
- x = (x & 0xffff) + (x >> 16);
- /* add up 16-bit and 2-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
+ unsigned short b = a >> 16;
+ asm("addw %w2,%w0\n\t"
+ "adcw $0,%w0\n"
+ : "=r" (b)
+ : "0" (b), "r" (a));
+ return b;
}
/*
* Do a 64-bit checksum on an arbitrary memory area.
* Returns a 32bit checksum.
*
- * This isn't a great routine, but it's not _horrible_ either.
- * We rely on the compiler to unroll.
+ * This isn't as time critical as it used to be because many NICs
+ * do hardware checksumming these days.
+ *
+ * Things tried and found to not make it faster:
+ * Manual Prefetching
+ * Unrolling to an 128 bytes inner loop.
+ * Using interleaving with more registers to break the carry chains.
*/
-static inline unsigned do_csum(const unsigned char * buff, int len)
+static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
{
- int odd, count;
+ unsigned odd, count;
unsigned long result = 0;
- if (len <= 0)
- goto out;
+ if (unlikely(len == 0))
+ return result;
odd = 1 & (unsigned long) buff;
if (unlikely(odd)) {
result = *buff << 8;
@@ -45,7 +49,7 @@
count = len >> 1; /* nr of 16-bit words.. */
if (count) {
if (2 & (unsigned long) buff) {
- result += *(unsigned short *) buff;
+ result += *(unsigned short *)buff;
count--;
len -= 2;
buff += 2;
@@ -59,18 +63,41 @@
buff += 4;
}
count >>= 1; /* nr of 64-bit words.. */
- if (count) {
+
+ /* main loop using 64byte blocks */
unsigned long zero = 0;
- do {
- asm(" addq %1,%0\n"
- " adcq %2,%0\n"
+ unsigned count64 = count >> 3;
+ while (count64) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcq 4*8(%[src]),%[res]\n\t"
+ "adcq 5*8(%[src]),%[res]\n\t"
+ "adcq 6*8(%[src]),%[res]\n\t"
+ "adcq 7*8(%[src]),%[res]\n\t"
+ "adcq %[zero],%[res]"
+ : [res] "=r" (result)
+ : [src] "r" (buff), [zero] "r" (zero),
+ "[res]" (result));
+ buff += 64;
+ count64--;
+ }
+
+ /* last upto 7 8byte blocks */
+ count %= 8;
+ while (count) {
+ asm("addq %1,%0\n\t"
+ "adcq %2,%0\n"
: "=r" (result)
- : "m" (*buff), "r" (zero), "0" (result));
- count--;
+ : "m" (*(unsigned long *)buff),
+ "r" (zero), "0" (result));
+ --count;
buff += 8;
- } while (count);
- result = (result & 0xffffffff) + (result >> 32);
}
+ result = add32_with_carry(result>>32,
+ result&0xffffffff);
+
if (len & 4) {
result += *(unsigned int *) buff;
buff += 4;
@@ -83,10 +110,11 @@
}
if (len & 1)
result += *buff;
- result = from64to16(result);
- if (unlikely(odd))
- return ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
+ result = add32_with_carry(result>>32, result & 0xffffffff);
+ if (unlikely(odd)) {
+ result = from32to16(result);
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+ }
return result;
}
@@ -102,14 +130,9 @@
*
* it's best to have buff aligned on a 64-bit boundary
*/
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum)
{
- unsigned result = do_csum(buff, len);
-
- /* add in old sum, and carry.. */
- asm("addl %1,%0\n\t"
- "adcl $0,%0" : "=r" (result) : "r" (sum), "0" (result));
- return result;
+ return add32_with_carry(do_csum(buff, len), sum);
}
/*
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)