patch-2.4.4 linux/arch/ia64/lib/strlen_user.S
Next file: linux/arch/ia64/lib/strncpy_from_user.S
Previous file: linux/arch/ia64/lib/strlen.S
Back to the patch index
Back to the overall index
- Lines: 135
- Date:
Thu Apr 5 12:51:47 2001
- Orig file:
v2.4.3/linux/arch/ia64/lib/strlen_user.S
- Orig date:
Tue Mar 6 19:44:35 2001
diff -u --recursive --new-file v2.4.3/linux/arch/ia64/lib/strlen_user.S linux/arch/ia64/lib/strlen_user.S
@@ -6,9 +6,9 @@
*
* Outputs:
* ret0 0 in case of fault, strlen(buffer)+1 otherwise
- *
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
*
* 01/19/99 S.Eranian heavily enhanced version (see details below)
@@ -24,8 +24,8 @@
// - length of string + 1
// - 0 in case an exception is raised
//
-// This is an enhanced version of the basic strlen_user. it includes a
-// combination of compute zero index (czx), parallel comparisons, speculative
+// This is an enhanced version of the basic strlen_user. it includes a
+// combination of compute zero index (czx), parallel comparisons, speculative
// loads and loop unroll using rotating registers.
//
// General Ideas about the algorithm:
@@ -34,7 +34,7 @@
// string may not be 8-byte aligned. In this case we load the 8byte
// quantity which includes the start of the string and mask the unused
// bytes with 0xff to avoid confusing czx.
-// We use speculative loads and software pipelining to hide memory
+// We use speculative loads and software pipelining to hide memory
// latency and do read ahead safely. This way we defer any exception.
//
// Because we don't want the kernel to be relying on particular
@@ -45,7 +45,7 @@
// The fact that speculation may fail can be caused, for instance, by
// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
// a NaT bit will be set if the translation is not present. The normal
-// load, on the other hand, will cause the translation to be inserted
+// load, on the other hand, will cause the translation to be inserted
// if the mapping exists.
//
// It should be noted that we execute recovery code only when we need
@@ -53,30 +53,21 @@
// recovery code on pure read ahead data.
//
// Remarks:
-// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
// register to 1. This is required to make sure that we get the parallel
// compare correct.
//
// - we don't use the epilogue counter to exit the loop but we need to set
// it to zero beforehand.
//
-// - after the loop we must test for Nat values because neither the
+// - after the loop we must test for Nat values because neither the
// czx nor cmp instruction raise a NaT consumption fault. We must be
-// careful not to look too far for a Nat for which we don't care.
+// careful not to look too far for a Nat for which we don't care.
// For instance we don't need to look at a NaT in val2 if the zero byte
// was in val1.
//
// - Clearly performance tuning is required.
//
-//
-//
-
-#define EX(y,x...) \
- .section __ex_table,"a"; \
- data4 @gprel(99f); \
- data4 y-99f; \
- .previous; \
-99: x
#define saved_pfs r11
#define tmp r10
@@ -89,15 +80,9 @@
#define val1 r22
#define val2 r23
-
- .text
- .psr abi64
- .psr lsb
- .lsb
-
GLOBAL_ENTRY(__strlen_user)
- UNW(.prologue)
- UNW(.save ar.pfs, saved_pfs)
+ .prologue
+ .save ar.pfs, saved_pfs
alloc saved_pfs=ar.pfs,11,0,0,8
.rotr v[2], w[2] // declares our 4 aliases
@@ -105,7 +90,7 @@
extr.u tmp=in0,0,3 // tmp=least significant 3 bits
mov orig=in0 // keep trackof initial byte address
dep src=0,in0,0,3 // src=8byte-aligned in0 address
- UNW(.save pr, saved_pr)
+ .save pr, saved_pr
mov saved_pr=pr // preserve predicates (rotation)
;;
@@ -127,8 +112,8 @@
or v[1]=v[1],mask // now we have a safe initial byte pattern
;;
1:
- ld8.s v[0]=[src],8 // speculatively load next
- czx1.r val1=v[1] // search 0 byte from right
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
czx1.r val2=w[1] // search 0 byte from right following 8bytes
;;
ld8.s w[0]=[src],8 // speculatively load next to next
@@ -144,11 +129,7 @@
// - there must be a better way of doing the test
//
cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
-#ifdef notyet
tnat.nz p6,p7=val1 // test NaT on val1
-#else
- tnat.z p7,p6=val1 // test NaT on val1
-#endif
(p6) br.cond.spnt.few recover// jump to recovery if val1 is NaT
;;
//
@@ -193,7 +174,7 @@
2:
EX(.Lexit1, (p6) ld8 val=[base],8)
;;
- czx1.r val1=val // search 0 byte from right
+ czx1.r val1=val // search 0 byte from right
;;
cmp.eq p6,p0=8,val1 // val1==8 ?
(p6) br.wtop.dptk.few 2b // loop until p6 == 0
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)