patch-2.1.34 linux/arch/sparc64/lib/blockops.S

Next file: linux/arch/sparc64/lib/checksum.S
Previous file: linux/arch/sparc64/lib/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.33/linux/arch/sparc64/lib/blockops.S linux/arch/sparc64/lib/blockops.S
@@ -1,33 +1,32 @@
-/* $Id: blockops.S,v 1.3 1997/02/25 20:00:10 jj Exp $
+/* $Id: blockops.S,v 1.5 1997/03/26 18:34:28 jj Exp $
  * arch/sparc64/lib/blockops.S: UltraSparc block zero optimized routines.
  *
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
 #include <asm/asi.h>
 
-/* FIXME: Write this. */
-#define BZERO_TEST
-
 	/* Zero out 256 bytes of memory at (buf + offset). */
 #define BLAST_BLOCK(buf, offset)				\
 	stda		%f48, [buf + offset + 0x00] %asi;	\
 	stda		%f48, [buf + offset + 0x40] %asi;	\
 	stda		%f48, [buf + offset + 0x80] %asi;	\
-	stda		%f48, [buf + offset + 0xc0] %asi;	\
+	stda		%f48, [buf + offset + 0xc0] %asi;
 
-	/* Copy 32 bytes of memory at (src + offset) to
+	/* Copy 256 bytes of memory at (src + offset) to
 	 * (dst + offset).
 	 */
-#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3)		\
-	ldx		[src + offset + 0x18], t0;		\
-	ldx		[src + offset + 0x10], t1;		\
-	ldx		[src + offset + 0x08], t2;		\
-	ldx		[src + offset + 0x00], t3;		\
-	stx		t0, [dst + offset + 0x18];		\
-	stx		t1, [dst + offset + 0x10];		\
-	stx		t2, [dst + offset + 0x08];		\
-	stx		t3, [dst + offset + 0x00];
+#define MIRROR_BLOCK(dst, src, offset, sync)			\
+	ldda		[src + offset + 0x000] %asi, %f0;	\
+	ldda		[src + offset + 0x040] %asi, %f16;	\
+	ldda		[src + offset + 0x080] %asi, %f32;	\
+	ldda		[src + offset + 0x0c0] %asi, %f48;	\
+	membar		sync;					\
+	stda		%f0, [dst + offset + 0x000] %asi;	\
+	stda		%f16, [dst + offset + 0x040] %asi;	\
+	stda		%f32, [dst + offset + 0x080] %asi;	\
+	stda		%f48, [dst + offset + 0x0c0] %asi;
 
 	.text
 	.align	4
@@ -38,7 +37,17 @@
 	mov		%o0, %o1
 	wr		%g0, ASI_BLK_P, %asi
 	mov		0x10, %g2
-	BZERO_TEST
+
+	membar		#Sync|#StoreLoad
+
+	fzero		%f48
+	fzero		%f50
+	fzero		%f52
+	fzero		%f54
+	fzero		%f56
+	fzero		%f58
+	fzero		%f60
+	fzero		%f62
 1:
 	BLAST_BLOCK(%o0, 0x000)
 	BLAST_BLOCK(%o0, 0x100)
@@ -48,6 +57,8 @@
 	bne,pt		%icc, 1b
 	 add		%o0, 0x400, %o0
 
+	membar		#Sync|#LoadStore|#StoreStore
+
 	retl
 	 mov		%o1, %o0
 
@@ -56,7 +67,15 @@
 	mov		%o0, %o1
 	wr		%g0, ASI_BLK_P, %asi
 	mov		0x08, %g2
-	BZERO_TEST
+	membar		#Sync|#StoreLoad
+	fzero		%f48
+	fzero		%f50
+	fzero		%f52
+	fzero		%f54
+	fzero		%f56
+	fzero		%f58
+	fzero		%f60
+	fzero		%f62
 1:
 	BLAST_BLOCK(%o0, 0x000)
 	BLAST_BLOCK(%o0, 0x100)
@@ -66,27 +85,54 @@
 	bne,pt		%icc, 1b
 	 add		%o0, 0x400, %o0
 
+	membar		#Sync|#LoadStore|#StoreStore
+
 	retl
 	 mov		%o1, %o0
 
+	.globl		__bfill64
+__bfill64:
+	/* %o0 = buf */
+	stx		%o1, [%sp + 0x7ff + 128]
+	wr		%g0, ASI_BLK_P, %asi
+	mov		0x08, %g2
+	ldd		[%sp + 0x7ff + 128], %f48
+	membar		#Sync|#StoreLoad
+	fmovd		%f48, %f50
+	fmovd		%f48, %f52
+	fmovd		%f48, %f54
+	fmovd		%f48, %f56
+	fmovd		%f48, %f58
+	fmovd		%f48, %f60
+	fmovd		%f48, %f62
+1:
+	BLAST_BLOCK(%o0, 0x000)
+	BLAST_BLOCK(%o0, 0x100)
+	BLAST_BLOCK(%o0, 0x200)
+	BLAST_BLOCK(%o0, 0x300)
+	subcc		%g2, 1, %g2
+	bne,pt		%icc, 1b
+	 add		%o0, 0x400, %o0
+
+	retl
+	 membar		#Sync|#LoadStore|#StoreStore
+
 	.globl		__copy_1page
 __copy_1page:
 	/* %o0 = dst, %o1 = src */
-	or		%g0, 0x10, %g1
+	or		%g0, 0x08, %g1
+	wr		%g0, ASI_BLK_P, %asi
+	membar		#Sync|#StoreLoad
 1:
-	MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0x000, #Sync)
+	MIRROR_BLOCK(%o0, %o1, 0x100, #Sync)
+	MIRROR_BLOCK(%o0, %o1, 0x200, #Sync)
+	MIRROR_BLOCK(%o0, %o1, 0x300, #Sync)
 	subcc		%g1, 1, %g1
-	add		%o0, 0x100, %o0
+	add		%o0, 0x400, %o0
 	bne,pt		%icc, 1b
-	 add		%o1, 0x100, %o1
+	 add		%o1, 0x400, %o1
 
 	retl
-	 nop
+	 membar		#Sync|#LoadStore|#StoreStore
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov