patch-2.1.4 linux/arch/alpha/lib/divide.S

Next file: linux/arch/alpha/lib/get_user.S
Previous file: linux/arch/alpha/lib/copy_user.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.3/linux/arch/alpha/lib/divide.S linux/arch/alpha/lib/divide.S
@@ -53,17 +53,25 @@
 #define mask	$0
 #define divisor	$1
 #define compare $28
+#define tmp1	$3
+#define tmp2	$4
 
 #ifdef DIV
+#define DIV_ONLY(x,y...) x,##y
+#define MOD_ONLY(x,y...)
 #define func(x) __div##x
 #define modulus $2
 #define quotient $27
 #define GETSIGN(x) xor $24,$25,x
+#define STACK 48
 #else
+#define DIV_ONLY(x,y...)
+#define MOD_ONLY(x,y...) x,##y
 #define func(x) __rem##x
 #define modulus $27
 #define quotient $2
 #define GETSIGN(x) bis $24,$24,x
+#define STACK 32
 #endif
 
 /*
@@ -82,44 +90,65 @@
 #endif
 
 .set noat
+.align	3
 .globl	ufunction
 .ent	ufunction
 ufunction:
-	subq	$30,32,$30
-	stq	$0, 0($30)
-	stq	$1, 8($30)
-	stq	$2,16($30)
+	subq	$30,STACK,$30
+	.frame	$30,STACK,$23
+	.prologue 0
 
+7:	stq	$1, 0($30)
 	bis	$25,$25,divisor
+	stq	$2, 8($30)
 	bis	$24,$24,modulus
+	stq	$0,16($30)
 	bis	$31,$31,quotient
 	LONGIFY(divisor)
+	stq	tmp1,24($30)
 	LONGIFY(modulus)
-	beq	divisor, 9f			/* div by zero */
 	bis	$31,1,mask
+	DIV_ONLY(stq tmp2,32($30))
+	beq	divisor, 9f			/* div by zero */
 
-	/* shift divisor left */
+#ifdef INTSIZE
+	/*
+	 * shift divisor left, using 3-bit shifts for
+	 * 32-bit divides as we can't overflow. Three-bit
+	 * shifts will result in looping three times less
+	 * here, but can result in two loops more later.
+	 * Thus using a large shift isn't worth it (and
+	 * s8add pairs better than a sll..)
+	 */
+1:	cmpult	divisor,modulus,compare
+	s8addq	divisor,$31,divisor
+	s8addq	mask,$31,mask
+	bne	compare,1b
+#else
 1:	cmpult	divisor,modulus,compare
-	blt	divisor, 3f
+	blt     divisor, 2f
 	addq	divisor,divisor,divisor
 	addq	mask,mask,mask
 	bne	compare,1b
+	unop
+#endif
 
 	/* ok, start to go right again.. */
-2:	srl	divisor,1,divisor
-	beq	mask,9f
+2:	DIV_ONLY(addq quotient,mask,tmp2)
 	srl	mask,1,mask
-3:	cmpule	divisor,modulus,compare
-	beq	compare,2b
-	addq	quotient,mask,quotient
-	beq	mask,9f
-	subq	modulus,divisor,modulus
-	br	2b
-
-9:	ldq	$0, 0($30)
-	ldq	$1, 8($30)
-	ldq	$2, 16($30)
-	addq	$30,32,$30
+	cmpule	divisor,modulus,compare
+	subq	modulus,divisor,tmp1
+	DIV_ONLY(cmovne compare,tmp2,quotient)
+	srl	divisor,1,divisor
+	cmovne	compare,tmp1,modulus
+	bne	mask,2b
+
+9:	ldq	$1, 0($30)
+	ldq	$2, 8($30)
+	ldq	$0,16($30)
+	ldq	tmp1,24($30)
+	DIV_ONLY(ldq tmp2,32($30))
+	addq	$30,STACK,$30
 	ret	$31,($23),1
 	.end	ufunction
 
@@ -133,28 +162,34 @@
  * which is probably not the best solution, but at least should
  * have the property that (x/y)*y + (x%y) = x.
  */
+.align 3
 .globl	sfunction
 .ent	sfunction
 sfunction:
+	subq	$30,STACK,$30
+	.frame	$30,STACK,$23
+	.prologue 0
 	bis	$24,$25,$28
 	SLONGIFY($28)
-	bge	$28,ufunction
-	subq	$30,32,$30
-	stq	$23,0($30)
-	stq	$24,8($30)
-	stq	$25,16($30)
+	bge	$28,7b
+	stq	$24,0($30)
 	subq	$31,$24,$28
+	stq	$25,8($30)
 	cmovlt	$24,$28,$24	/* abs($24) */
+	stq	$23,16($30)
 	subq	$31,$25,$28
+	stq	tmp1,24($30)
 	cmovlt	$25,$28,$25	/* abs($25) */
+	unop
 	bsr	$23,ufunction
-	ldq	$23,0($30)
-	ldq	$24,8($30)
-	ldq	$25,16($30)
-	addq	$30,32,$30
+	ldq	$24,0($30)
+	ldq	$25,8($30)
 	GETSIGN($28)
+	subq	$31,$27,tmp1
 	SLONGIFY($28)
-	bge	$28,1f
-	subq	$31,$27,$27
-1:	ret	$31,($23),1
+	ldq	$23,16($30)
+	cmovlt	$28,tmp1,$27
+	ldq	tmp1,24($30)
+	addq	$30,STACK,$30
+	ret	$31,($23),1
 	.end	sfunction

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov