patch-2.4.22 linux-2.4.22/arch/sh64/lib/checksum.S
Next file: linux-2.4.22/arch/sh64/lib/copy_user_memcpy.S
Previous file: linux-2.4.22/arch/sh64/lib/c-checksum.c
Back to the patch index
Back to the overall index
- Lines: 657
- Date:
2003-08-25 04:44:40.000000000 -0700
- Orig file:
linux-2.4.21/arch/sh64/lib/checksum.S
- Orig date:
1969-12-31 16:00:00.000000000 -0800
diff -urN linux-2.4.21/arch/sh64/lib/checksum.S linux-2.4.22/arch/sh64/lib/checksum.S
@@ -0,0 +1,656 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh5/lib/checksum.S
+ *
+ * Copyright (C) 2000, 2001 Paolo Alberelli, Stefano D'Andrea
+ *
+ */
+
+/*
+ *
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IP/TCP/UDP checksumming routines
+ *
+ * Authors: Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Tom May, <ftom@netcom.com>
+ * Pentium Pro/II routines:
+ * Alexander Kjeldaas <astor@guardian.no>
+ * Finn Arne Gangstad <finnag@guardian.no>
+ * Lots of code moved from tcp.c and ip.c; see those files
+ * for more names.
+ *
+ * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ * handling.
+ * Andi Kleen, add zeroing on error
+ * converted to pure assembler
+ *
+ * SuperH version: Copyright (C) 1999 Niibe Yutaka
+ *
+ * SH-5 version: Copyright (C) 2000 Paolo Alberelli, Stefano D'Andrea
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/errno.h>
+#include <asm/registers.h>
+
+ .section .text64, "ax"
+/*
+ * unsigned int csum_partial(const unsigned char *buf,
+ * int len,
+ * unsigned int sum);
+ *
+ *
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*
+ * SH-5 ABI convention:
+ * Input parameters:
+ * r2 = *buf
+ * r3 = len
+ * r4 = sum so far computed checksum (may be zero)
+ * return value must be in:
+ * r2 returned checksum
+ */
+
+ /*
+ * Experiments with Ethernet and SLIP connections show that buff
+ * is aligned on either a 2-byte or 4-byte boundary. We get at
+ * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+ * alignment for the unrolled loop.
+ */
+
+#ifndef NOTDEF
+ /* This version of _csum_partial is an easy but unefficient
+ * implementation.
+ * It's mainteined only for historical reasons.
+ */
+ .global csum_partial
+csum_partial:
+ _ptar .byte_footer, t1 /* t1 = .byte_footer */
+ _ptar .add_short_loop, t2 /* t2 = checksum eval loop */
+ _ptar .exiting, t3 /* t3 = add carry... */
+
+ movi 2, r1
+
+ /* we assume buf short aligned */
+ /* Short must be checksummed */
+.add_short_loop:
+ bgt r1, r3, t1 /* Size = 1 or 0 (remind) */
+
+ ld.uw r2, 0, r7 /* r7 = word to be checksumed*/
+ add r4, r7, r4
+
+ addi r2, 2, r2 /* move buf forward... */
+ addi r3, -2, r3 /* decrement len */
+ blink t2, ZERO /* goto .add_short_loop */
+
+.byte_footer:
+ /* still one byte to be checksummed ? */
+ xor r7, r7, r7
+ beqi r3, 0, t3
+ ld.ub r2, 0, r7 /* r7 = last byte... */
+#ifndef __LITTLE_ENDIAN__
+ shlli r7, 8, r7
+#endif
+
+.exiting:
+ add r4, r7, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+#else /* NOTDEF ---------------------------------------------------- */
+
+ .global csum_partial
+csum_partial:
+ movi 32, r8 /* r8 = sizeof(8 * int) */
+ _ptar .byte_footer, t1 /* t1 = .byte_footer */
+ _ptar .word_footer, t2 /* t2 = .word_footer */
+ _ptar .long_footer, t3 /* t3 = .long_footer */
+ _ptar .exit, t4 /* t4 = exit point */
+ _ptar .long_aligned, t0 /* t0 = .long_aligned */
+ or r2, ZERO, r5 /* r5 = buffer pointer */
+ or r3, ZERO, r6 /* r6 = original length */
+ andi r2, 2, r7
+ beq r7, ZERO, t0 /* It's buf long aligned */
+
+ /* we assume buf short aligned */
+
+ beqi r3, 1, t1 /* Size = 1 */
+
+ /* Short must be checksummed */
+ ld.uw r2, 0, r7 /* r7 = word to be checksumed*/
+ add r4, r7, r4
+
+ or ZERO, ZERO, r7
+
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+ addi r6, -2, r6
+ addi r5, 2, r5 /* r5 is now long aligned */
+ beq r6, ZERO, t4 /* Exit if done */
+
+ or ZERO, ZERO, r7 /* Clean up r7 */
+
+.long_aligned:
+ bgt r8, r6, t3
+
+ /* 8 Longs to be checksummed */
+ ld.l r5, 0, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+ ld.l r5, 4, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+
+
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+ ld.l r5, 8, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+ ld.l r5, 12, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+ ld.l r5, 16, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+ ld.l r5, 20, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+ ld.l r5, 24, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+ ld.l r5, 28, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+ sub r6, r8, r6
+ add r5, r8, r5
+ blink t0, ZERO
+
+.long_footer:
+ movi 4, r8
+ bgt r8, r6, t2
+
+ /* Long to be checksummed */
+ ld.l r5, 0, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+ sub r6, r8, r6
+ add r5, r8, r5
+ blink t3, ZERO
+
+.word_footer:
+ movi 2, r8
+ bgt r8, r6, t1
+
+ /* Short to be checksummed */
+ ld.uw r5, 0, r7 /* r7 = data to be checksummed*/
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+ sub r6, r8, r6
+ add r5, r8, r5
+
+.byte_footer:
+ beqi r6, 0, t4
+ /* Short to be checksummed */
+ ld.ub r5, 0, r7 /* r7 = data to be checksummed*/
+
+#ifndef __LITTLE_ENDIAN__
+ shlli r7, 8, r7
+#endif
+
+ add r4, r7, r4
+ or ZERO, ZERO, r7
+ mshflo.l r4, r7, r7
+ shlri r4, 32, r4
+ add r4, r7, r4 /* Add eventual "carry" */
+
+
+.exit:
+ or r4, ZERO, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+#endif /*NOTDEF*/
+
+/*
+ * unsigned int csum_partial_copy_generic (const char *src,
+ * char *dst,
+ * int len,
+ * int sum,
+ * int *src_err_ptr,
+ * int *dst_err_ptr)
+ *
+ *
+ *
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ * DST definitions? It's damn hard to trigger all cases. I hope I got
+ * them all but there's no guarantee.
+ */
+
+/*
+ * SH-5 ABI convention:
+ * Input parameters:
+ * r2 = const char *src
+ * r3 = char *dst
+ * r4 = int len
+ * r5 = int sum so far computed checksum (may be zero)
+ * r6 = int *src_err_ptr
+ * r7 = int *dst_err_ptr
+ * return value must be in:
+ * r2 returned checksum
+ */
+
+#ifndef NOTDEF
+ /*
+ ** This version of _csum_partial_copy_generic is an easy but
+ ** unefficient implementation.
+ ** It's mainteined only for historical reasons.
+ */
+ .global csum_partial_copy_generic
+csum_partial_copy_generic:
+ _ptar .gc_byte_footer, t1 /* t1 = .byte_footer */
+ _ptar .gc_add_short_loop, t2 /* t2 = checksum eval loop */
+ _ptar .gc_exiting, t3 /* t3 = add carry... */
+
+ or r2, ZERO, r20 /* r20 = source pointer */
+ or r3, ZERO, r21 /* r21 = destination pointer */
+
+ movi 2, r1
+
+ /* we assume buf short aligned */
+ /* Short must be checksummed */
+.gc_add_short_loop:
+
+ bgt r1, r4, t1 /* Size = 1 or 0 (remind) */
+
+.src_err_1:
+ ld.uw r20, 0, r8 /* r8 = word to be checksum. */
+ st.w r21, 0, r8 /* fill data into DST */
+
+ add r5, r8, r5
+
+ addi r20, 2, r20 /* move SRC forward... */
+ addi r21, 2, r21 /* move DST forward... */
+ addi r4, -2, r4 /* decrement len */
+ blink t2, ZERO /* goto .gc_add_short_loop */
+
+.gc_byte_footer:
+ /* still one byte to be checksummed ? */
+ xor r8, r8, r8
+ beqi r4, 0, t3
+
+.src_err_2:
+ ld.ub r20, 0, r8 /* r8 = last byte... */
+ st.b r21, 0, r8
+#ifndef __LITTLE_ENDIAN__
+ shlli r8, 8, r8
+#endif
+
+.gc_exiting:
+ add r5, r8, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+ .section .fixup, "ax"
+
+_csum_partial_copy_generic_dst_err:
+ movi -(EFAULT), r8 /* r8 = EFAULT reply */
+ st.l r7, 0, r8 /* *DST_ERR = -EFAULT */
+
+ /* Quiet exit */
+ or r5, ZERO, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+_csum_partial_copy_generic_src_err:
+ movi -(EFAULT), r8 /* r8 = EFAULT reply */
+ ld.l r6, 0, r8 /* *SRC_ERR = -EFAULT */
+
+ /*
+ * Now reset the DST buffer.
+ * r20 points to the next DST byte.
+ * r3 points to the first DST byte.
+ */
+ _ptar .quiet_exit, t0
+ _ptar .src_err_loop, t1
+ beq r20, r3, t0
+
+.src_err_loop:
+ addi r20, -1, r20
+ ld.b r20, 0, ZERO
+ bne r20, r3, t1
+
+ /* Quiet exit */
+.quiet_exit:
+ or r5, ZERO, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+ .section __ex_table, "a"
+
+ .global asm_checksum_start /* Just a marker */
+asm_checksum_start:
+ .long .src_err_1, _csum_partial_copy_generic_src_err
+ .long .src_err_2, _csum_partial_copy_generic_src_err
+ .long .src_err_1+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_2+4, _csum_partial_copy_generic_dst_err
+ .global asm_checksum_end /* Just a marker */
+asm_checksum_end:
+
+#else /* NOTDEF -------------------------------------------------------- */
+
+ .global csum_partial_copy_generic
+csum_partial_copy_generic:
+
+ movi 32, r27 /* r27 = sizeof(8 * int) */
+ _ptar .byte_footer_gc, t1 /* t1 = .byte_footer_gc */
+ _ptar .word_footer_gc, t2 /* t2 = .word_footer_gc */
+ _ptar .long_footer_gc, t3 /* t3 = .long_footer_gc */
+ _ptar .exit_gc, t4 /* t4 = exit point */
+
+ or r2, ZERO, r24 /* r24 = original SRC pointer */
+ or r3, ZERO, r20 /* r20 = original DST pointer */
+ or r4, ZERO, r25 /* r25 = original length */
+ _ptar .long_aligned_gc, t0
+ andi r2, 2, r26 /* check if source it's */
+ beq r26, ZERO, t0 /* long aligned */
+
+ /* It's short aligned */
+
+ beqi r4, 1, t1 /* Size = 1 */
+
+ /* Short must be checksummed */
+.src_err_1:
+ ld.w r2, 0, r26 /* r26: data to be checksummed*/
+ st.w r3, 0, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+ addi r25, -2, r25
+ addi r24, 2, r24 /* r24 is now long aligned */
+ addi r20, 2, r20 /* r20 it's now long aligned */
+ beq r25, ZERO, t4 /* Exit if done */
+
+ or ZERO, ZERO, r26 /* Clean up r26 */
+
+.long_aligned_gc:
+ bgt r27, r25, t3
+
+ /* 8 Longs to be checksummed */
+.src_err_2:
+ ld.l r24, 0, r26 /* r26: data to be checksummed*/
+ st.l r20, 0, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_3:
+ ld.l r24, 4, r26 /* r26: data to be checksummed*/
+ st.l r20, 4, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_4:
+ ld.l r24, 8, r26 /* r26: data to be checksummed*/
+ st.l r20, 8, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_5:
+ ld.l r24, 12, r26 /* r26: data to be checksummed*/
+ st.l r20, 12, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_6:
+ ld.l r24, 16, r26 /* r26: data to be checksummed*/
+ st.l r20, 16, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_7:
+ ld.l r24, 20, r26 /* r26: data to be checksummed*/
+ st.l r20, 20, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26,r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_8:
+ ld.l r24, 24, r26 /* r26: data to be checksummed*/
+ st.l r20, 24, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.src_err_9:
+ ld.l r24, 28, r26 /* r26: data to be checksummed*/
+ st.l r20, 28, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+ sub r25, r27, r25
+ add r24, r27, r24
+ add r20, r27, r20
+ blink t0, ZERO
+
+.long_footer_gc:
+ movi 4, r27
+ bgt r27, r25, t2
+
+ /* Long to be checksummed */
+.src_err_10:
+ ld.l r24, 0, r26 /* r26: data to be checksummed*/
+ st.l r20, 0, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+ sub r25, r27, r25
+ add r24, r27, r24
+ add r20, r27, r20
+ blink t3, ZERO
+
+.word_footer_gc:
+ movi 2, r27
+ bgt r27, r25, t1
+
+ /* Short to be checksummed */
+.src_err_11:
+ ld.uw r24, 0, r26 /* r26: data to be checksummed*/
+ st.w r20, 0, r26 /* fill data into DST */
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+ sub r25, r27, r25
+ add r24, r27, r24
+ add r20, r27, r20
+
+.byte_footer_gc:
+ beqi r25, 0, t4
+ /* Short to be checksummed */
+ ld.ub r24, 0, r26 /* r26: data to be checksummed*/
+.src_err_12:
+ ld.ub r24, 0, r26 /* r26: data to be checksummed*/
+ st.b r20, 0, r26 /* fill data into DST */
+
+#ifndef __LITTLE_ENDIAN__
+ shlli r26, 8, r26
+#endif
+
+ add r5, r26, r5
+ or ZERO, ZERO, r26
+ mshflo.l r5, r26, r26
+ shlri r5, 32, r5
+ add r5, r26, r5 /* Add eventual "carry" */
+
+.exit_gc:
+ or r5, ZERO, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+ .section .fixup, "ax"
+
+_csum_partial_copy_generic_dst_err:
+ movi -(EFAULT), r21 /* r21 = EFAULT reply */
+ ld.l r7, 0, r21
+
+ /* Quiet exit */
+ or r5, ZERO, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+_csum_partial_copy_generic_src_err:
+ movi -(EFAULT), r21 /* r21 = EFAULT reply */
+ ld.l r6, 0, r21
+
+ /*
+ * Now reset the DST buffer.
+ * r20 points to the next DST byte.
+ * r3 points to the first DST byte.
+ */
+ _ptar .quiet_exit, t0
+ _ptar .src_err_loop, t1
+ beq r20, r3, t0
+
+.src_err_loop:
+ addi r20, -1, r20
+ st.b r20, 0, ZERO
+ bne r20, r3, t1
+
+ /* Quiet exit */
+.quiet_exit:
+ or r5, ZERO, r2
+
+ ptabs r18, t0
+ blink t0, ZERO
+
+ .section __ex_table, "a"
+
+ .global asm_checksum_start /* Just a marker */
+asm_checksum_start:
+ .long .src_err_1, _csum_partial_copy_generic_src_err
+ .long .src_err_2, _csum_partial_copy_generic_src_err
+ .long .src_err_3, _csum_partial_copy_generic_src_err
+ .long .src_err_4, _csum_partial_copy_generic_src_err
+ .long .src_err_5, _csum_partial_copy_generic_src_err
+ .long .src_err_6, _csum_partial_copy_generic_src_err
+ .long .src_err_7, _csum_partial_copy_generic_src_err
+ .long .src_err_8, _csum_partial_copy_generic_src_err
+ .long .src_err_9, _csum_partial_copy_generic_src_err
+ .long .src_err_10, _csum_partial_copy_generic_src_err
+ .long .src_err_11, _csum_partial_copy_generic_src_err
+ .long .src_err_12, _csum_partial_copy_generic_src_err
+ .long .src_err_1+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_2+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_3+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_4+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_5+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_6+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_7+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_8+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_9+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_10+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_11+4, _csum_partial_copy_generic_dst_err
+ .long .src_err_12+4, _csum_partial_copy_generic_dst_err
+
+ .global asm_checksum_end /* Just a marker */
+asm_checksum_end:
+#endif /* NOTDEF */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)