patch-1.3.71 linux/arch/sparc/lib/memcpy.S

Next file: linux/arch/sparc/mm/Makefile
Previous file: linux/arch/sparc/lib/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v1.3.70/linux/arch/sparc/lib/memcpy.S linux/arch/sparc/lib/memcpy.S
@@ -0,0 +1,520 @@
+! Fast memmove/memcpy/bcopy
+! Copyright Australian National University, 1995
+! This file may be used under the terms of the GNU Public License
+! Author: Paul Mackerras, September 95
+! Minor beautifications David S. Miller
+
+#include <asm/cprefix.h>
+
+	.globl	C_LABEL(bcopy)
+C_LABEL(bcopy):
+	mov	%o0,%o3
+	mov	%o1,%o0
+	mov	%o3,%o1
+
+	.globl	C_LABEL(amemmove)
+C_LABEL(amemmove):
+	.globl	C_LABEL(memmove)
+	.globl	C_LABEL(memcpy)
+C_LABEL(memmove):
+C_LABEL(memcpy):
+	save	%sp,-96,%sp
+	mov	%i0,%l7
+
+	cmp	%i0,%i1		! check for dest within source area
+	bleu,a	1f
+	andcc	%i0,3,%l1
+	add	%i1,%i2,%l0
+	cmp	%i0,%l0
+	blu,a	Lback
+	mov	%l0,%i1
+
+	! copying forwards
+	! first get dest to be word-aligned
+	andcc	%i0,3,%l1
+1:
+	be,a	Lwalign		! if dest already word-aligned
+	cmp	%i2,4
+	mov	4,%l2
+	sub	%l2,%l1,%l2	! #bytes until word-aligned
+	subcc	%i2,%l2,%i2
+	ble,a	Lend		! not copying enough to get past word bdry
+	addcc	%i2,%l2,%i2
+
+1:
+	ldub	[%i1],%o0	! copy single bytes until word-aligned
+	add	%i1,1,%i1
+	subcc	%l2,1,%l2
+	stb	%o0,[%i0]
+	bgt	1b
+	add	%i0,1,%i0
+	cmp	%i2,4
+
+Lwalign:			! dest now word aligned
+	blt,a	Lend
+	orcc	%i2,%g0,%g0
+
+	andcc	%i1,3,%l0
+	be,a	Ldoword		! if dest word aligned wrt src
+	andcc	%i0,4,%g0
+
+	! yucky cases where we have to shift
+
+	mov	4,%l2
+	sub	%l2,%l0,%l2	! address adjustment, used at Lendn
+	sll	%l0,3,%l0	! bit offset = shift left count
+	sll	%l2,3,%l1	! shift right count
+	add	%i1,%l2,%i1	! round up to next word
+	ld	[%i1-4],%o0	! get first word
+
+	andcc	%i0,4,%g0	! get destination double-word aligned
+	be,a	1f
+	andcc	%i1,4,%g0
+	ld	[%i1],%o1	! by constructing and storing one word
+	add	%i0,4,%i0
+	add	%i1,4,%i1
+	sub	%i2,4,%i2
+	sll	%o0,%l0,%o0
+	srl	%o1,%l1,%l6
+	or	%o0,%l6,%o0
+	st	%o0,[%i0-4]
+	mov	%o1,%o0
+
+	andcc	%i1,4,%g0	! now construct & store pairs of double-words
+1:
+	bne,a	3f		! if source now not double-word aligned
+	subcc	%i2,4,%i2
+	subcc	%i2,16,%i2
+	blt	2f
+	mov	%o0,%o1
+4:
+	ldd	[%i1],%o2
+	sll	%o1,%l0,%o4
+	ldd	[%i1+8],%o0
+	add	%i0,16,%i0
+	add	%i1,16,%i1
+	subcc	%i2,16,%i2
+	srl	%o2,%l1,%l6
+	or	%l6,%o4,%o4
+	sll	%o2,%l0,%o5
+	srl	%o3,%l1,%l6
+	or	%l6,%o5,%o5
+	std	%o4,[%i0-16]
+	sll	%o3,%l0,%o4
+	srl	%o0,%l1,%l6
+	or	%l6,%o4,%o4
+	sll	%o0,%l0,%o5
+	srl	%o1,%l1,%l6
+	or	%l6,%o5,%o5
+	bge	4b
+	std	%o4,[%i0-8]
+2:
+	addcc	%i2,12,%i2
+	blt,a	Lendn
+	addcc	%i2,4,%i2
+5:
+	ld	[%i1],%o2
+	add	%i0,4,%i0
+	add	%i1,4,%i1
+	subcc	%i2,4,%i2
+	sll	%o1,%l0,%o0
+	srl	%o2,%l1,%o1
+	or	%o1,%o0,%o0
+	st	%o0,[%i0-4]
+	bge	5b
+	mov	%o2,%o1
+	ba	Lendn
+	addcc	%i2,4,%i2
+
+3:
+	blt,a	Lendn
+	addcc	%i2,4,%i2
+	ld	[%i1],%o1
+	add	%i1,4,%i1
+	subcc	%i2,16,%i2
+	blt,a	8f
+	addcc	%i2,16,%i2
+7:
+	ldd	[%i1],%o2
+	sll	%o0,%l0,%o4
+	srl	%o1,%l1,%l6
+	or	%l6,%o4,%o4
+	sll	%o1,%l0,%o5
+	ldd	[%i1+8],%o0
+	add	%i0,16,%i0
+	add	%i1,16,%i1
+	subcc	%i2,16,%i2
+	srl	%o2,%l1,%l6
+	or	%l6,%o5,%o5
+	std	%o4,[%i0-16]
+	sll	%o2,%l0,%o4
+	srl	%o3,%l1,%l6
+	or	%l6,%o4,%o4
+	sll	%o3,%l0,%o5
+	srl	%o0,%l1,%l6
+	or	%l6,%o5,%o5
+	bge	7b
+	std	%o4,[%i0-8]
+	addcc	%i2,16,%i2
+8:
+	sll	%o0,%l0,%o4
+	srl	%o1,%l1,%l6
+	or	%l6,%o4,%o4
+	st	%o4,[%i0]
+	add	%i0,4,%i0
+	subcc	%i2,4,%i2
+	blt,a	Lendn
+	addcc	%i2,4,%i2
+	mov	%o1,%o0
+	ld	[%i1],%o1
+	ba	8b
+	add	%i1,4,%i1
+
+
+Ldoword:
+	! here both dest and src are word-aligned
+	! make dest double-word aligned
+	be,a	1f
+	andcc	%i1,4,%g0
+	ld	[%i1],%o0
+	add	%i0,4,%i0
+	add	%i1,4,%i1
+	sub	%i2,4,%i2
+	st	%o0,[%i0-4]
+	cmp	%i2,4
+	blt,a	Lend
+	orcc	%i2,%g0,%g0
+	andcc	%i1,4,%g0
+
+1:
+	be,a	Ldodble		! if source double-word aligned now
+	subcc	%i2,32,%i2
+	ld	[%i1],%o5
+	add	%i1,4,%i1
+	subcc	%i2,36,%i2
+	blt,a	3f
+	add	%i2,32,%i2
+2:
+	ldd	[%i1],%o2
+	add	%i1,32,%i1
+	subcc	%i2,32,%i2
+	mov	%o5,%o0
+	ldd	[%i1-24],%o4
+	mov	%o2,%o1
+	std	%o0,[%i0]
+	mov	%o3,%o2
+	ldd	[%i1-16],%o0
+	mov	%o4,%o3
+	std	%o2,[%i0+8]
+	mov	%o5,%o2
+	ldd	[%i1-8],%o4
+	mov	%o0,%o3
+	std	%o2,[%i0+16]
+	mov	%o1,%o0
+	mov	%o4,%o1
+	std	%o0,[%i0+24]
+	bge	2b
+	add	%i0,32,%i0
+	add	%i2,32,%i2
+3:
+	st	%o5,[%i0]
+	add	%i0,4,%i0
+	subcc	%i2,4,%i2
+	blt,a	Lend
+	addcc	%i2,4,%i2
+	ld	[%i1],%o5
+	ba	3b
+	add	%i1,4,%i1
+
+Ldodble:
+	! dest and source are both double-word aligned
+	blt,a	2f
+	addcc	%i2,28,%i2
+1:
+	ldd	[%i1],%o0	! copy sets of 4 double-words
+	subcc	%i2,32,%i2
+	ldd	[%i1+8],%o2
+	add	%i1,32,%i1
+	ldd	[%i1-16],%o4
+	add	%i0,32,%i0
+	std	%o0,[%i0-32]
+	ldd	[%i1-8],%o0
+	std	%o2,[%i0-24]
+	std	%o4,[%i0-16]
+	bge	1b
+	std	%o0,[%i0-8]
+	addcc	%i2,28,%i2
+2:
+	blt,a	Lend
+	addcc	%i2,4,%i2
+3:
+	ld	[%i1],%o0	! copy words
+	add	%i1,4,%i1
+	add	%i0,4,%i0
+	subcc	%i2,4,%i2
+	bge	3b
+	st	%o0,[%i0-4]
+	ba	Lend
+	addcc	%i2,4,%i2
+
+Lendn:
+	sub	%i1,%l2,%i1
+Lend:
+	ble	Lout
+	nop
+1:
+	ldub	[%i1],%o0
+	add	%i1,1,%i1
+	subcc	%i2,1,%i2
+	stb	%o0,[%i0]
+	bgt	1b
+	add	%i0,1,%i0
+
+	ba	Lout
+	nop
+
+Lback:	! Here we have to copy backwards
+	add	%i0,%i2,%i0
+	! first get dest to be word-aligned
+	andcc	%i0,3,%l2	! #bytes until word-aligned
+	be,a	Lbwal		! if dest already word-aligned
+	cmp	%i2,4
+	subcc	%i2,%l2,%i2
+	ble,a	Lbend		! not copying enough to get past word bdry
+	addcc	%i2,%l2,%i2
+
+1:
+	ldub	[%i1-1],%o0	! copy single bytes until word-aligned
+	sub	%i1,1,%i1
+	subcc	%l2,1,%l2
+	stb	%o0,[%i0-1]
+	bgt	1b
+	sub	%i0,1,%i0
+	cmp	%i2,4
+
+Lbwal:				! dest now word aligned
+	blt,a	Lbend
+	orcc	%i2,%g0,%g0
+
+	andcc	%i1,3,%l2
+	be,a	Lbword		! if dest word aligned wrt src
+	andcc	%i0,4,%g0
+
+	! yucky cases where we have to shift
+	! note %l2 used below at Lbendn
+
+	mov	4,%l0
+	sub	%l0,%l2,%l0	! # bytes to right of src in word
+	sll	%l0,3,%l0	! bit offset = shift right count
+	sll	%l2,3,%l1	! shift left count
+	sub	%i1,%l2,%i1	! round down to word boundary
+	ld	[%i1],%o1	! get first word
+
+	andcc	%i0,4,%g0	! get destination double-word aligned
+	be,a	1f
+	andcc	%i1,4,%g0
+	ld	[%i1-4],%o0	! by constructing and storing one word
+	sub	%i0,4,%i0
+	sub	%i1,4,%i1
+	sub	%i2,4,%i2
+	srl	%o1,%l0,%o1
+	sll	%o0,%l1,%l6
+	or	%o1,%l6,%o1
+	st	%o1,[%i0]
+	mov	%o0,%o1
+
+	andcc	%i1,4,%g0	! now construct & store pairs of double-words
+1:
+	bne,a	3f		! if source now not double-word aligned
+	subcc	%i2,4,%i2
+	subcc	%i2,16,%i2
+	blt	2f
+	mov	%o1,%o0
+4:
+	ldd	[%i1-8],%o2
+	srl	%o0,%l0,%o5
+	ldd	[%i1-16],%o0
+	sub	%i0,16,%i0
+	sub	%i1,16,%i1
+	subcc	%i2,16,%i2
+	sll	%o3,%l1,%l6
+	or	%l6,%o5,%o5
+	srl	%o3,%l0,%o4
+	sll	%o2,%l1,%l6
+	or	%l6,%o4,%o4
+	std	%o4,[%i0+8]
+	srl	%o2,%l0,%o5
+	sll	%o1,%l1,%l6
+	or	%l6,%o5,%o5
+	srl	%o1,%l0,%o4
+	sll	%o0,%l1,%l6
+	or	%l6,%o4,%o4
+	bge	4b
+	std	%o4,[%i0]
+2:
+	addcc	%i2,12,%i2
+	blt,a	Lbendn
+	addcc	%i2,4,%i2
+5:
+	ld	[%i1-4],%o2
+	sub	%i0,4,%i0
+	sub	%i1,4,%i1
+	subcc	%i2,4,%i2
+	srl	%o0,%l0,%o0
+	sll	%o2,%l1,%o1
+	or	%o1,%o0,%o0
+	st	%o0,[%i0]
+	bge	5b
+	mov	%o2,%o0
+	ba	Lbendn
+	addcc	%i2,4,%i2
+
+3:
+	blt,a	Lbendn
+	addcc	%i2,4,%i2
+	ld	[%i1-4],%o0
+	sub	%i1,4,%i1
+	subcc	%i2,16,%i2
+	blt,a	8f
+	addcc	%i2,16,%i2
+7:
+	ldd	[%i1-8],%o2
+	srl	%o1,%l0,%o5
+	sll	%o0,%l1,%l6
+	or	%l6,%o5,%o5
+	srl	%o0,%l0,%o4
+	ldd	[%i1-16],%o0
+	sub	%i0,16,%i0
+	sub	%i1,16,%i1
+	subcc	%i2,16,%i2
+	sll	%o3,%l1,%l6
+	or	%l6,%o4,%o4
+	std	%o4,[%i0+8]
+	srl	%o3,%l0,%o5
+	sll	%o2,%l1,%l6
+	or	%l6,%o5,%o5
+	srl	%o2,%l0,%o4
+	sll	%o1,%l1,%l6
+	or	%l6,%o4,%o4
+	bge	7b
+	std	%o4,[%i0]
+	addcc	%i2,16,%i2
+8:
+	srl	%o1,%l0,%o5
+	sll	%o0,%l1,%l6
+	or	%l6,%o5,%o5
+	st	%o5,[%i0-4]
+	sub	%i0,4,%i0
+	subcc	%i2,4,%i2
+	blt,a	Lbendn
+	addcc	%i2,4,%i2
+	mov	%o0,%o1
+	ld	[%i1-4],%o0
+	ba	8b
+	sub	%i1,4,%i1
+
+
+Lbword:
+	! here both dest and src are word-aligned
+	! make dest double-word aligned
+	be,a	1f
+	andcc	%i1,4,%g0
+	ld	[%i1-4],%o0
+	sub	%i0,4,%i0
+	sub	%i1,4,%i1
+	sub	%i2,4,%i2
+	st	%o0,[%i0]
+	cmp	%i2,4
+	blt,a	Lbend
+	orcc	%i2,%g0,%g0
+	andcc	%i1,4,%g0
+
+1:
+	be,a	Lbdble		! if source double-word aligned now
+	subcc	%i2,32,%i2
+	ld	[%i1-4],%o4
+	sub	%i1,4,%i1
+	subcc	%i2,36,%i2
+	blt,a	3f
+	add	%i2,32,%i2
+2:
+	ldd	[%i1-8],%o2
+	sub	%i1,32,%i1
+	subcc	%i2,32,%i2
+	mov	%o4,%o1
+	ldd	[%i1+16],%o4
+	mov	%o3,%o0
+	std	%o0,[%i0-8]
+	mov	%o2,%o3
+	ldd	[%i1+8],%o0
+	mov	%o5,%o2
+	std	%o2,[%i0-16]
+	mov	%o4,%o3
+	ldd	[%i1],%o4
+	mov	%o1,%o2
+	std	%o2,[%i0-24]
+	mov	%o0,%o1
+	mov	%o5,%o0
+	std	%o0,[%i0-32]
+	bge	2b
+	sub	%i0,32,%i0
+	add	%i2,32,%i2
+3:
+	st	%o4,[%i0-4]
+	sub	%i0,4,%i0
+	subcc	%i2,4,%i2
+	blt,a	Lbend
+	addcc	%i2,4,%i2
+	ld	[%i1-4],%o4
+	ba	3b
+	sub	%i1,4,%i1
+
+Lbdble:
+	! dest and source are both double-word aligned
+	blt,a	2f
+	addcc	%i2,28,%i2
+1:
+	ldd	[%i1-8],%o0	! copy sets of 4 double-words
+	subcc	%i2,32,%i2
+	ldd	[%i1-16],%o2
+	sub	%i1,32,%i1
+	ldd	[%i1+8],%o4
+	sub	%i0,32,%i0
+	std	%o0,[%i0+24]
+	ldd	[%i1],%o0
+	std	%o2,[%i0+16]
+	std	%o4,[%i0+8]
+	bge	1b
+	std	%o0,[%i0]
+	addcc	%i2,28,%i2
+2:
+	blt,a	Lbend
+	addcc	%i2,4,%i2
+3:
+	ld	[%i1-4],%o0	! copy words
+	sub	%i1,4,%i1
+	sub	%i0,4,%i0
+	subcc	%i2,4,%i2
+	bge	3b
+	st	%o0,[%i0]
+	ba	Lbend
+	addcc	%i2,4,%i2
+
+Lbendn:
+	add	%i1,%l2,%i1
+Lbend:
+	ble	Lout
+	nop
+1:
+	ldub	[%i1-1],%o0
+	sub	%i1,1,%i1
+	subcc	%i2,1,%i2
+	stb	%o0,[%i0-1]
+	bgt	1b
+	sub	%i0,1,%i0
+
+Lout:
+	ret
+	restore	%l7,0,%o0
+
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov with Sam's (original) version
of this