patch-2.4.5 linux/arch/arm/lib/csumpartialcopygeneric.S
Next file: linux/arch/arm/lib/csumpartialcopyuser.S
Previous file: linux/arch/arm/lib/csumpartialcopy.S
Back to the patch index
Back to the overall index
- Lines: 328
- Date:
Wed May 16 15:25:16 2001
- Orig file:
v2.4.4/linux/arch/arm/lib/csumpartialcopygeneric.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.4.4/linux/arch/arm/lib/csumpartialcopygeneric.S linux/arch/arm/lib/csumpartialcopygeneric.S
@@ -0,0 +1,327 @@
+/*
+ * linux/arch/arm/lib/csumpartialcopygeneric.S
+ *
+ * Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * unsigned int
+ * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
+ * r0 = src, r1 = dst, r2 = len, r3 = sum
+ * Returns : r0 = checksum
+ *
+ * Note that 'tst' and 'teq' preserve the carry flag.
+ */
+
+src .req r0
+dst .req r1
+len .req r2
+sum .req r3
+
+.zero: mov r0, sum
+ load_regs ea
+
+ /*
+ * Align an unaligned destination pointer. We know that
+ * we have >= 8 bytes here, so we don't need to check
+ * the length. Note that the source pointer hasn't been
+ * aligned yet.
+ */
+.dst_unaligned: tst dst, #1
+ beq .dst_16bit
+
+ load1b ip
+ sub len, len, #1
+ adcs sum, sum, ip, lsl #8 @ update checksum
+ strb ip, [dst], #1
+ tst dst, #2
+ moveq pc, lr @ dst is now 32bit aligned
+
+.dst_16bit: load2b r8, ip
+ sub len, len, #2
+ adcs sum, sum, r8
+ strb r8, [dst], #1
+ adcs sum, sum, ip, lsl #8
+ strb ip, [dst], #1
+ mov pc, lr @ dst is now 32bit aligned
+
+ /*
+ * Handle 0 to 7 bytes, with any alignment of source and
+ * destination pointers. Note that when we get here, C = 0
+ */
+.less8: teq len, #0 @ check for zero count
+ beq .zero
+
+ /* we must have at least one byte. */
+ tst dst, #1 @ dst 16-bit aligned
+ beq .less8_aligned
+
+ /* Align dst */
+ load1b ip
+ sub len, len, #1
+ adcs sum, sum, ip, lsl #8 @ update checksum
+ strb ip, [dst], #1
+ tst len, #6
+ beq .less8_byteonly
+
+1: load2b r8, ip
+ sub len, len, #2
+ adcs sum, sum, r8
+ strb r8, [dst], #1
+ adcs sum, sum, ip, lsl #8
+ strb ip, [dst], #1
+.less8_aligned: tst len, #6
+ bne 1b
+.less8_byteonly:
+ tst len, #1
+ beq .done
+ load1b r8
+ adcs sum, sum, r8 @ update checksum
+ strb r8, [dst], #1
+ b .done
+
+FN_ENTRY
+ mov ip, sp
+ save_regs
+ sub fp, ip, #4
+
+ cmp len, #8 @ Ensure that we have at least
+ blo .less8 @ 8 bytes to copy.
+
+ adds sum, sum, #0 @ C = 0
+ tst dst, #3 @ Test destination alignment
+ blne .dst_unaligned @ align destination, return here
+
+ /*
+ * Ok, the dst pointer is now 32bit aligned, and we know
+ * that we must have more than 4 bytes to copy. Note
+ * that C contains the carry from the dst alignment above.
+ */
+
+ tst src, #3 @ Test source alignment
+ bne .src_not_aligned
+
+ /* Routine for src & dst aligned */
+
+ bics ip, len, #15
+ beq 2f
+
+1: load4l r4, r5, r6, r7
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r4, r5
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ tst ip, #4
+ beq 4f
+
+3: load1l r4
+ str r4, [dst], #4
+ adcs sum, sum, r4
+
+4: ands len, len, #3
+ beq .done
+ load1l r4
+ tst len, #2
+ beq .exit
+ adcs sum, sum, r4, lsl #16
+ strb r4, [dst], #1
+ mov r4, r4, lsr #8
+ strb r4, [dst], #1
+ mov r4, r4, lsr #8
+.exit: tst len, #1
+ strneb r4, [dst], #1
+ andne r4, r4, #255
+ adcnes sum, sum, r4
+
+ /*
+ * If the dst pointer was not 16-bit aligned, we
+ * need to rotate the checksum here to get around
+ * the inefficient byte manipulations in the
+ * architecture independent code.
+ */
+.done: adc r0, sum, #0
+ ldr sum, [sp, #0] @ dst
+ tst sum, #1
+ movne sum, r0, lsl #8
+ orrne r0, sum, r0, lsr #24
+ load_regs ea
+
+.src_not_aligned:
+ adc sum, sum, #0 @ include C from dst alignment
+ and ip, src, #3
+ bic src, src, #3
+ load1l r4
+ cmp ip, #2
+ beq .src2_aligned
+ bhi .src3_aligned
+ mov r4, r4, lsr #8 @ C = 0
+ bics ip, len, #15
+ beq 2f
+1: load4l r5, r6, r7, r8
+ orr r4, r4, r5, lsl #24
+ mov r5, r5, lsr #8
+ orr r5, r5, r6, lsl #24
+ mov r6, r6, lsr #8
+ orr r6, r6, r7, lsl #24
+ mov r7, r7, lsr #8
+ orr r7, r7, r8, lsl #24
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ mov r4, r8, lsr #8
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r5, r6
+ orr r4, r4, r5, lsl #24
+ mov r5, r5, lsr #8
+ orr r5, r5, r6, lsl #24
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ mov r4, r6, lsr #8
+ tst ip, #4
+ beq 4f
+3: load1l r5
+ orr r4, r4, r5, lsl #24
+ str r4, [dst], #4
+ adcs sum, sum, r4
+ mov r4, r5, lsr #8
+4: ands len, len, #3
+ beq .done
+ tst len, #2
+ beq .exit
+ adcs sum, sum, r4, lsl #16
+ strb r4, [dst], #1
+ mov r4, r4, lsr #8
+ strb r4, [dst], #1
+ mov r4, r4, lsr #8
+ b .exit
+
+.src2_aligned: mov r4, r4, lsr #16
+ adds sum, sum, #0
+ bics ip, len, #15
+ beq 2f
+1: load4l r5, r6, r7, r8
+ orr r4, r4, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r6, lsl #16
+ mov r6, r6, lsr #16
+ orr r6, r6, r7, lsl #16
+ mov r7, r7, lsr #16
+ orr r7, r7, r8, lsl #16
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ mov r4, r8, lsr #16
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r5, r6
+ orr r4, r4, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r6, lsl #16
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ mov r4, r6, lsr #16
+ tst ip, #4
+ beq 4f
+3: load1l r5
+ orr r4, r4, r5, lsl #16
+ str r4, [dst], #4
+ adcs sum, sum, r4
+ mov r4, r5, lsr #16
+4: ands len, len, #3
+ beq .done
+ tst len, #2
+ beq .exit
+ adcs sum, sum, r4, lsl #16
+ strb r4, [dst], #1
+ mov r4, r4, lsr #8
+ strb r4, [dst], #1
+ tst len, #1
+ beq .done
+ load1b r4
+ b .exit
+
+.src3_aligned: mov r4, r4, lsr #24
+ adds sum, sum, #0
+ bics ip, len, #15
+ beq 2f
+1: load4l r5, r6, r7, r8
+ orr r4, r4, r5, lsl #8
+ mov r5, r5, lsr #24
+ orr r5, r5, r6, lsl #8
+ mov r6, r6, lsr #24
+ orr r6, r6, r7, lsl #8
+ mov r7, r7, lsr #24
+ orr r7, r7, r8, lsl #8
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ mov r4, r8, lsr #24
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r5, r6
+ orr r4, r4, r5, lsl #8
+ mov r5, r5, lsr #24
+ orr r5, r5, r6, lsl #8
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ mov r4, r6, lsr #24
+ tst ip, #4
+ beq 4f
+3: load1l r5
+ orr r4, r4, r5, lsl #8
+ str r4, [dst], #4
+ adcs sum, sum, r4
+ mov r4, r5, lsr #24
+4: ands len, len, #3
+ beq .done
+ tst len, #2
+ beq .exit
+ adcs sum, sum, r4, lsl #16
+ strb r4, [dst], #1
+ load1l r4
+ strb r4, [dst], #1
+ adcs sum, sum, r4, lsl #24
+ mov r4, r4, lsr #8
+ b .exit
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)