patch-2.1.79 linux/arch/sparc64/lib/checksum.S

Next file: linux/arch/sparc64/lib/strncpy_from_user.S
Previous file: linux/arch/sparc64/kernel/winfixup.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.78/linux/arch/sparc64/lib/checksum.S linux/arch/sparc64/lib/checksum.S
@@ -35,204 +35,6 @@
 	/* I think I have an erection...  Once _AGAIN_ the SunSoft
 	 * engineers are caught asleep at the keyboard, tsk tsk...
 	 */
-#define CSUMCOPY_ECACHE_LOAD(off, t0, t1, t2, t3, t4, t5, t6, t7)			\
-	ldxa		[%src + off + 0x00] %asi, t0;					\
-	ldxa		[%src + off + 0x08] %asi, t1;					\
-	ldxa		[%src + off + 0x10] %asi, t2;					\
-	ldxa		[%src + off + 0x18] %asi, t3;					\
-	ldxa		[%src + off + 0x20] %asi, t4;					\
-	ldxa		[%src + off + 0x28] %asi, t5;					\
-	ldxa		[%src + off + 0x30] %asi, t6;					\
-	ldxa		[%src + off + 0x38] %asi, t7;					\
-	nop; nop; /* DO NOT TOUCH THIS!!!!! */
-
-#define CSUMCOPY_EC_STALIGNED_LDNXT(off, t0, t1, t2, t3, t4, t5, t6, t7)		\
-	stx		t0, [%dst + off - 0x40];					\
-	addcc		%sum, t0, %sum;							\
-	bcc,pt		%xcc, 11f;							\
-	 ldxa		[%src + off + 0x00] %asi, t0;					\
-	add		%sum, 1, %sum;							\
-11:	stx		t1, [%dst + off - 0x38];					\
-	addcc		%sum, t1, %sum;							\
-	bcc,pt		%xcc, 12f;							\
-	 ldxa		[%src + off + 0x08] %asi, t1;					\
-	add		%sum, 1, %sum;							\
-12:	stx		t2, [%dst + off - 0x30];					\
-	addcc		%sum, t2, %sum;							\
-	bcc,pt		%xcc, 13f;							\
-	 ldxa		[%src + off + 0x10] %asi, t2;					\
-	add		%sum, 1, %sum;							\
-13:	stx		t3, [%dst + off - 0x28];					\
-	addcc		%sum, t3, %sum;							\
-	bcc,pt		%xcc, 14f;							\
-	 ldxa		[%src + off + 0x18] %asi, t3;					\
-	add		%sum, 1, %sum;							\
-14:	stx		t4, [%dst + off - 0x20];					\
-	addcc		%sum, t4, %sum;							\
-	bcc,pt		%xcc, 15f;							\
-	 ldxa		[%src + off + 0x20] %asi, t4;					\
-	add		%sum, 1, %sum;							\
-15:	stx		t5, [%dst + off - 0x18];					\
-	addcc		%sum, t5, %sum;							\
-	bcc,pt		%xcc, 16f;							\
-	 ldxa		[%src + off + 0x28] %asi, t5;					\
-	add		%sum, 1, %sum;							\
-16:	stx		t6, [%dst + off - 0x10];					\
-	addcc		%sum, t6, %sum;							\
-	bcc,pt		%xcc, 17f;							\
-	 ldxa		[%src + off + 0x30] %asi, t6;					\
-	add		%sum, 1, %sum;							\
-17:	stx		t7, [%dst + off - 0x08];					\
-	addcc		%sum, t7, %sum;							\
-	bcc,pt		%xcc, 18f;							\
-	 ldxa		[%src + off + 0x38] %asi, t7;					\
-	add		%sum, 1, %sum;							\
-18:
-
-#define CSUMCOPY_EC_STUNALIGN_LDNXT(off, t0, t1, t2, t3, t4, t5, t6, t7)		\
-	stw		t0, [%dst + off - 0x3c];					\
-	addcc		%sum, t0, %sum;							\
-	srlx		t0, 32, t0;							\
-	stw		t0, [%dst + off - 0x40];					\
-	bcc,pt		%xcc, 21f;							\
-	 ldxa		[%src + off + 0x00] %asi, t0;					\
-	add		%sum, 1, %sum;							\
-21:	stw		t1, [%dst + off - 0x34];					\
-	addcc		%sum, t1, %sum;							\
-	srlx		t1, 32, t1;							\
-	stw		t1, [%dst + off - 0x38];					\
-	bcc,pt		%xcc, 22f;							\
-	 ldxa		[%src + off + 0x08] %asi, t1;					\
-	add		%sum, 1, %sum;							\
-22:	stw		t2, [%dst + off - 0x2c];					\
-	addcc		%sum, t2, %sum;							\
-	srlx		t2, 32, t2;							\
-	stw		t2, [%dst + off - 0x30];					\
-	bcc,pt		%xcc, 23f;							\
-	 ldxa		[%src + off + 0x10] %asi, t2;					\
-	add		%sum, 1, %sum;							\
-23:	stw		t3, [%dst + off - 0x24];					\
-	addcc		%sum, t3, %sum;							\
-	srlx		t3, 32, t3;							\
-	stw		t3, [%dst + off - 0x28];					\
-	bcc,pt		%xcc, 24f;							\
-	 ldxa		[%src + off + 0x18] %asi, t3;					\
-	add		%sum, 1, %sum;							\
-24:	stw		t4, [%dst + off - 0x1c];					\
-	addcc		%sum, t4, %sum;							\
-	srlx		t4, 32, t4;							\
-	stw		t4, [%dst + off - 0x20];					\
-	bcc,pt		%xcc, 25f;							\
-	 ldxa		[%src + off + 0x20] %asi, t4;					\
-	add		%sum, 1, %sum;							\
-25:	stw		t5, [%dst + off - 0x14];					\
-	addcc		%sum, t5, %sum;							\
-	srlx		t5, 32, t5;							\
-	stw		t5, [%dst + off - 0x18];					\
-	bcc,pt		%xcc, 26f;							\
-	 ldxa		[%src + off + 0x28] %asi, t5;					\
-	add		%sum, 1, %sum;							\
-26:	stw		t6, [%dst + off - 0x0c];					\
-	addcc		%sum, t6, %sum;							\
-	srlx		t6, 32, t6;							\
-	stw		t6, [%dst + off - 0x10];					\
-	bcc,pt		%xcc, 27f;							\
-	 ldxa		[%src + off + 0x30] %asi, t6;					\
-	add		%sum, 1, %sum;							\
-27:	stw		t7, [%dst + off - 0x04];					\
-	addcc		%sum, t7, %sum;							\
-	srlx		t7, 32, t7;							\
-	stw		t7, [%dst + off - 0x08];					\
-	bcc,pt		%xcc, 28f;							\
-	 ldxa		[%src + off + 0x38] %asi, t7;					\
-	add		%sum, 1, %sum;							\
-28:
-
-#define CSUMCOPY_EC_STALIGNED(off, t0, t1, t2, t3, t4, t5, t6, t7)			\
-	addcc		%sum, t0, %sum;							\
-	bcc,pt		%xcc, 31f;							\
-	 stx		t0, [%dst + off + 0x00];					\
-	add		%sum, 1, %sum;							\
-31:	addcc		%sum, t1, %sum;							\
-	bcc,pt		%xcc, 32f;							\
-	 stx		t1, [%dst + off + 0x08];					\
-	add		%sum, 1, %sum;							\
-32:	addcc		%sum, t2, %sum;							\
-	bcc,pt		%xcc, 33f;							\
-	 stx		t2, [%dst + off + 0x10];					\
-	add		%sum, 1, %sum;							\
-33:	addcc		%sum, t3, %sum;							\
-	bcc,pt		%xcc, 34f;							\
-	 stx		t3, [%dst + off + 0x18];					\
-	add		%sum, 1, %sum;							\
-34:	addcc		%sum, t4, %sum;							\
-	bcc,pt		%xcc, 35f;							\
-	 stx		t4, [%dst + off + 0x20];					\
-	add		%sum, 1, %sum;							\
-35:	addcc		%sum, t5, %sum;							\
-	bcc,pt		%xcc, 36f;							\
-	 stx		t5, [%dst + off + 0x28];					\
-	add		%sum, 1, %sum;							\
-36:	addcc		%sum, t6, %sum;							\
-	bcc,pt		%xcc, 37f;							\
-	 stx		t6, [%dst + off + 0x30];					\
-	add		%sum, 1, %sum;							\
-37:	addcc		%sum, t7, %sum;							\
-	bcc,pt		%xcc, 38f;							\
-	 stx		t7, [%dst + off + 0x38];					\
-	add		%sum, 1, %sum;							\
-38:
-
-#define CSUMCOPY_EC_STUNALIGN(off, t0, t1, t2, t3, t4, t5, t6, t7)			\
-	stw		t0, [%dst + off + 0x04];					\
-	addcc		%sum, t0, %sum;							\
-	srlx		t0, 32, t0;							\
-	bcc,pt		%xcc, 41f;							\
-	 stw		t0, [%dst + off + 0x00];					\
-	add		%sum, 1, %sum;							\
-41:	stw		t1, [%dst + off + 0x0c];					\
-	addcc		%sum, t1, %sum;							\
-	srlx		t1, 32, t1;							\
-	bcc,pt		%xcc, 42f;							\
-	 stw		t1, [%dst + off + 0x08];					\
-	add		%sum, 1, %sum;							\
-42:	stw		t2, [%dst + off + 0x14];					\
-	addcc		%sum, t2, %sum;							\
-	srlx		t2, 32, t2;							\
-	bcc,pt		%xcc, 43f;							\
-	 stw		t2, [%dst + off + 0x10];					\
-	add		%sum, 1, %sum;							\
-43:	stw		t3, [%dst + off + 0x1c];					\
-	addcc		%sum, t3, %sum;							\
-	srlx		t3, 32, t3;							\
-	bcc,pt		%xcc, 44f;							\
-	 stw		t3, [%dst + off + 0x18];					\
-	add		%sum, 1, %sum;							\
-44:	stw		t4, [%dst + off + 0x24];					\
-	addcc		%sum, t4, %sum;							\
-	srlx		t4, 32, t4;							\
-	bcc,pt		%xcc, 45f;							\
-	 stw		t4, [%dst + off + 0x20];					\
-	add		%sum, 1, %sum;							\
-45:	stw		t5, [%dst + off + 0x2c];					\
-	addcc		%sum, t5, %sum;							\
-	srlx		t5, 32, t5;							\
-	bcc,pt		%xcc, 46f;							\
-	 stw		t5, [%dst + off + 0x28];					\
-	add		%sum, 1, %sum;							\
-46:	stw		t6, [%dst + off + 0x34];					\
-	addcc		%sum, t6, %sum;							\
-	srlx		t6, 32, t6;							\
-	bcc,pt		%xcc, 47f;							\
-	 stw		t6, [%dst + off + 0x30];					\
-	add		%sum, 1, %sum;							\
-47:	stw		t7, [%dst + off + 0x3c];					\
-	addcc		%sum, t7, %sum;							\
-	srlx		t7, 32, t7;							\
-	bcc,pt		%xcc, 48f;							\
-	 stw		t7, [%dst + off + 0x38];					\
-	add		%sum, 1, %sum;							\
-48:
 
 #define CSUMCOPY_LASTCHUNK(off, t0, t1)							\
 	ldxa		[%src - off - 0x08] %asi, t0;					\
@@ -296,6 +98,7 @@
 	 add		%sum, 1, %sum		! IEU1
 
 cc_fixit:
+	cmp		%len, 6			! IEU1	Group
 	bl,a,pn		%icc, ccte		! CTI
 	 andcc		%len, 0xf, %g7		! IEU1	Group
 	andcc		%src, 2, %g0		! IEU1	Group
@@ -316,17 +119,17 @@
 	sll		%g3, 16, %g3		! IEU0	Group
 	srl		%sum, 16, %sum		! IEU0	Group
 	or		%g3, %sum, %sum		! IEU0	Group (regdep)
-1:	be,pt		%icc, cc_dword_aligned	! CTI
-	 andn		%len, 0xff, %g2		! IEU1
+1:	be,pt		%icc, ccmerge		! CTI
+	 andcc		%len, 0xf0, %g1		! IEU1
 	lduwa		[%src + 0x00] %asi, %g4	! Load	Group
 	sub		%len, 4, %len		! IEU0
 	add		%src, 4, %src		! IEU1
 	add		%dst, 4, %dst		! IEU0	Group
 	addcc		%g4, %sum, %sum		! IEU1	Group + 1 bubble
 	stw		%g4, [%dst - 0x4]	! Store
-	bcc,pt		%xcc, cc_dword_aligned	! CTI
-	 andn		%len, 0xff, %g2		! IEU0	Group
-	b,pt		%xcc, cc_dword_aligned	! CTI	4 clocks (mispredict)
+	bcc,pt		%xcc, ccmerge		! CTI
+	 andcc		%len, 0xf0, %g1		! IEU1	Group
+	b,pt		%xcc, ccmerge		! CTI	4 clocks (mispredict)
 	 add		%sum, 1, %sum		! IEU0
 
 	.align		32
@@ -342,26 +145,8 @@
 	 cmp		%len, 256		! IEU1	Group
 	bgeu,pt		%icc, csum_partial_copy_vis ! CTI
 	 andcc		%src, 7, %g0		! IEU1	Group
-	be,pt		%icc, cc_dword_aligned	! CTI
-	 andn		%len, 0xff, %g2		! IEU0
-	b,pt		%xcc, cc_fixit		! CTI	Group
-	 cmp		%len, 6			! IEU1
-cc_dword_aligned:
-	brz,pn		%g2, 3f			! CTI	Group
-	 andcc		%dst, 4, %g0		! IEU1	Group (brz uses IEU1)
-	be,pn		%icc, ccdbl + 4		! CTI
-5:	CSUMCOPY_ECACHE_LOAD(       0x00,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STUNALIGN_LDNXT(0x40,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STUNALIGN_LDNXT(0x80,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STUNALIGN_LDNXT(0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STUNALIGN(      0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-10:
-	sub		%len, 256, %len		! IEU0	Group
-	add		%src, 256, %src		! IEU1
-	andncc		%len, 0xff, %g0		! IEU1	Group
-	bne,pt		%icc, 5b		! CTI
-	 add		%dst, 256, %dst		! IEU0
-3:	andcc		%len, 0xf0, %g1		! IEU1	Group
+	bne,pn		%icc, cc_fixit		! CTI
+	 andcc		%len, 0xf0, %g1		! IEU1	Group
 ccmerge:be,pn		%icc, ccte		! CTI
 	 andcc		%len, 0xf, %g7		! IEU1	Group
 	sll		%g1, 2, %o4		! IEU0
@@ -396,19 +181,6 @@
 	 add		%o0, 1, %o0		! IEU1	4 clocks (mispredict)
 1:	retl					! CTI	Group brk forced
 	 sllx		%g4, 32,%g4		! IEU0	Group
-ccdbl:	CSUMCOPY_ECACHE_LOAD(       0x00,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STALIGNED_LDNXT(0x40,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STALIGNED_LDNXT(0x80,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STALIGNED_LDNXT(0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-	CSUMCOPY_EC_STALIGNED(      0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7)
-11:
-	sub		%len, 256, %len		! IEU0	Group
-	add		%src, 256, %src		! IEU1
-	andncc		%len, 0xff, %g0		! IEU1	Group	
-	bne,pt		%icc, ccdbl		! CTI
-	 add		%dst, 256, %dst		! IEU0
-	b,pt		%xcc, ccmerge		! CTI	Group
-	 andcc		%len, 0xf0, %g1		! IEU1
 
 ccslow:	mov	0, %g5
 	brlez,pn %len, 4f

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov