patch-2.4.20 linux-2.4.20/arch/x86_64/lib/memset.S
Next file: linux-2.4.20/arch/x86_64/lib/old-checksum.c
Previous file: linux-2.4.20/arch/x86_64/lib/memmove.c
Back to the patch index
Back to the overall index
- Lines: 85
- Date:
Thu Nov 28 15:53:12 2002
- Orig file:
linux-2.4.19/arch/x86_64/lib/memset.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.19/arch/x86_64/lib/memset.S linux-2.4.20/arch/x86_64/lib/memset.S
@@ -0,0 +1,84 @@
+/* Copyright 2002 Andi Kleen, SuSE Labs */
+
+/*
+ * ISO C memset - set a memory block to a byte value.
+ *
+ * rdi destination
+ * rsi value (char)
+ * rdx count (bytes)
+ *
+ * rax original destination
+ */
+ .globl __memset
+ .globl memset
+ .p2align
+memset:
+__memset:
+ movq %rdi,%r10
+ movq %rdx,%r11
+
+ /* expand byte value */
+ movzbl %sil,%ecx
+ movabs $0x0101010101010101,%rax
+ mul %rcx /* with rax, clobbers rdx */
+
+ /* align dst */
+ movl %edi,%r9d
+ andl $7,%r9d
+ jnz bad_alignment
+after_bad_alignment:
+
+ movq %r11,%rcx
+ movl $64,%r8d
+ shrq $6,%rcx
+ jz handle_tail
+
+loop_64:
+ movq %rax,(%rdi)
+ movq %rax,8(%rdi)
+ movq %rax,16(%rdi)
+ movq %rax,24(%rdi)
+ movq %rax,32(%rdi)
+ movq %rax,40(%rdi)
+ movq %rax,48(%rdi)
+ movq %rax,56(%rdi)
+ addq %r8,%rdi
+ decl %ecx
+ jnz loop_64
+
+ /* Handle tail in loops. The loops should be faster than hard
+ to predict jump tables. */
+handle_tail:
+ movl %r11d,%ecx
+ andl $63&(~7),%ecx
+ jz handle_7
+ shrl $3,%ecx
+loop_8:
+ movq %rax,(%rdi)
+ addq $8,%rdi
+ decl %ecx
+ jnz loop_8
+
+handle_7:
+ movl %r11d,%ecx
+ andl $7,%ecx
+ jz ende
+loop_1:
+ movb %al,(%rdi)
+ addq $1,%rdi
+ decl %ecx
+ jnz loop_1
+
+ende:
+ movq %r10,%rax
+ ret
+
+bad_alignment:
+ cmpq $7,%r11
+ jbe handle_7
+ movq %rax,(%rdi) /* unaligned store */
+ movq $8,%r8
+ subq %r9,%r8
+ addq %r8,%rdi
+ subq %r8,%r11
+ jmp after_bad_alignment
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)