patch-2.4.25 linux-2.4.25/arch/mips64/mm/pg-r4k.c
Next file: linux-2.4.25/arch/mips64/mm/pg-sb1.c
Previous file: linux-2.4.25/arch/mips64/mm/loadmmu.c
Back to the patch index
Back to the overall index
- Lines: 1138
- Date:
2004-02-18 05:36:30.000000000 -0800
- Orig file:
linux-2.4.24/arch/mips64/mm/pg-r4k.c
- Orig date:
2003-08-25 04:44:40.000000000 -0700
diff -urN linux-2.4.24/arch/mips64/mm/pg-r4k.c linux-2.4.25/arch/mips64/mm/pg-r4k.c
@@ -3,706 +3,469 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
- * Copyright (C) 1997, 98, 99, 2000, 01, 02, 03 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2000 Kanoj Sarcar (kanoj@sgi.com)
+ * Copyright (C) 2003 Ralf Baechle (ralf@linux-mips.org)
*/
+#include <linux/config.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/module.h>
#include <asm/cacheops.h>
+#include <asm/inst.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/prefetch.h>
#include <asm/system.h>
#include <asm/bootinfo.h>
+#include <asm/mipsregs.h>
#include <asm/mmu_context.h>
#include <asm/cpu.h>
+#include <asm/war.h>
/*
- * Zero an entire page. Basically a simple unrolled loop should do the
- * job but we want more performance by saving memory bus bandwidth. We
- * have five flavours of the routine available for:
+ * Maximum sizes:
*
- * - 16byte cachelines and no second level cache
- * - 32byte cachelines second level cache
- * - a version which handles the buggy R4600 v1.x
- * - a version which handles the buggy R4600 v2.0
- * - Finally a last version without fancy cache games for the SC and MC
- * versions of R4000 and R4400.
+ * R4000 16 bytes D-cache, 128 bytes S-cache: 0x78 bytes
+ * R4600 v1.7: 0x5c bytes
+ * R4600 v2.0: 0x60 bytes
+ * With prefetching, 16 byte strides 0xa0 bytes
*/
-void r4k_clear_page_d16(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "cache\t%3,16(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "cache\t%3,-32(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "cache\t%3,-16(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_D)
- : "memory");
-}
-
-void r4k_clear_page_d32(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "cache\t%3,-32(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_D)
- : "memory");
-}
+static unsigned int clear_page_array[0xa0 / 4];
+void clear_page(void * page) __attribute__((alias("clear_page_array")));
/*
- * This flavour of r4k_clear_page is for the R4600 V1.x. Cite from the
- * IDT R4600 V1.7 errata:
- *
- * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
- * Hit_Invalidate_D and Create_Dirty_Excl_D should only be
- * executed if there is no other dcache activity. If the dcache is
- * accessed for another instruction immeidately preceding when these
- * cache instructions are executing, it is possible that the dcache
- * tag match outputs used by these cache instructions will be
- * incorrect. These cache instructions should be preceded by at least
- * four instructions that are not any kind of load or store
- * instruction.
+ * Maximum sizes:
*
- * This is not allowed: lw
- * nop
- * nop
- * nop
- * cache Hit_Writeback_Invalidate_D
- *
- * This is allowed: lw
- * nop
- * nop
- * nop
- * nop
- * cache Hit_Writeback_Invalidate_D
- */
-void r4k_clear_page_r4600_v1(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tnop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "cache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "cache\t%3,-32(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_D)
- : "memory");
-}
-
-/*
- * And this one is for the R4600 V2.0
- */
-void r4k_clear_page_r4600_v2(void * page)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- *(volatile unsigned int *)KSEG1;
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "cache\t%3,-32(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_D)
- : "memory");
- local_irq_restore(flags);
-}
-
-/*
- * The next 4 versions are optimized for all possible scache configurations
- * of the SC / MC versions of R4000 and R4400 ...
- *
- * Todo: For even better performance we should have a routine optimized for
- * every legal combination of dcache / scache linesize. When I (Ralf) tried
- * this the kernel crashed shortly after mounting the root filesystem. CPU
- * bug? Weirdo cache instruction semantics?
- */
-void r4k_clear_page_s16(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "cache\t%3,16(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "cache\t%3,-32(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "cache\t%3,-16(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_SD)
- : "memory");
-}
-
-void r4k_clear_page_s32(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "cache\t%3,-32(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_SD)
- : "memory");
-}
-
-void r4k_clear_page_s64(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_SD)
- : "memory");
-}
-
-void r4k_clear_page_s128(void * page)
-{
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tcache\t%3,(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "sd\t$0,32(%0)\n\t"
- "sd\t$0,40(%0)\n\t"
- "sd\t$0,48(%0)\n\t"
- "sd\t$0,56(%0)\n\t"
- "daddiu\t%0,128\n\t"
- "sd\t$0,-64(%0)\n\t"
- "sd\t$0,-56(%0)\n\t"
- "sd\t$0,-48(%0)\n\t"
- "sd\t$0,-40(%0)\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE), "i" (Create_Dirty_Excl_SD)
- : "memory");
-}
-
-/*
- * This version has been tuned on an Origin. For other machines the arguments
- * of the pref instructin may have to be tuned differently.
+ * R4000 16 bytes D-cache, 128 bytes S-cache: 0xbc bytes
+ * R4600 v1.7: 0x80 bytes
+ * R4600 v2.0: 0x84 bytes
+ * With prefetching, 16 byte strides 0xb8 bytes
*/
-void andes_clear_page(void * page)
-{
- __asm__ __volatile__(
- ".set\tpush\n\t"
- ".set\tmips4\n\t"
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%2\n"
- "1:\tpref 7,512(%0)\n\t"
- "sd\t$0,(%0)\n\t"
- "sd\t$0,8(%0)\n\t"
- "sd\t$0,16(%0)\n\t"
- "sd\t$0,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "sd\t$0,-32(%0)\n\t"
- "sd\t$0,-24(%0)\n\t"
- "sd\t$0,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- "sd\t$0,-8(%0)\n\t"
- ".set\tpop"
- : "=r" (page)
- : "0" (page), "I" (PAGE_SIZE)
- : "memory");
-}
+static unsigned int copy_page_array[0xb8 / 4];
+void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
/*
- * This is still inefficient. We only can do better if we know the
- * virtual address where the copy will be accessed.
+ * An address fits into a single register so it's safe to use 64-bit registers
+ * if we have 64-bit adresses.
*/
-
-void r4k_copy_page_d16(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "cache\t%7,16(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "cache\t%7,32(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "cache\t%7,-16(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_D));
-}
-
-void r4k_copy_page_d32(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "cache\t%7,32(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_D));
-}
+#define cpu_has_64bit_registers cpu_has_64bit_addresses
/*
- * Again a special version for the R4600 V1.x
+ * This is suboptimal for 32-bit kernels; we assume that R10000 is only used
+ * with 64-bit kernels. The prefetch offsets have been experimentally tuned
+ * an Origin 200.
*/
-void r4k_copy_page_r4600_v1(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
+static int pref_offset_clear __initdata = 512;
+static int pref_offset_copy __initdata = 256;
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tnop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "cache\t%7,32(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_D));
-}
-
-void r4k_copy_page_r4600_v2(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
- unsigned long flags;
-
- local_irq_save(flags);
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tnop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n\t"
- "cache\t%7,32(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_D));
- local_irq_restore(flags);
-}
+static unsigned int pref_src_mode __initdata;
+static unsigned int pref_dst_mode __initdata;
-/*
- * These are for R4000SC / R4400MC
- */
-void r4k_copy_page_s16(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "cache\t%7,16(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "cache\t%7,32(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "cache\t%7,-16(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_SD));
-}
-
-void r4k_copy_page_s32(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "cache\t%7,32(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_SD));
-}
-
-void r4k_copy_page_s64(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tcache\t%7,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
- :"0" (to), "1" (from), "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_SD));
-}
-
-void r4k_copy_page_s128(void * to, void * from)
-{
- unsigned long dummy1, dummy2;
- unsigned long reg1, reg2, reg3, reg4;
-
- __asm__ __volatile__(
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%8\n"
- "1:\tcache\t%9,(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "ld\t%4,16(%1)\n\t"
- "ld\t%5,24(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "sd\t%4,16(%0)\n\t"
- "sd\t%5,24(%0)\n\t"
- "ld\t%2,32(%1)\n\t"
- "ld\t%3,40(%1)\n\t"
- "ld\t%4,48(%1)\n\t"
- "ld\t%5,56(%1)\n\t"
- "sd\t%2,32(%0)\n\t"
- "sd\t%3,40(%0)\n\t"
- "sd\t%4,48(%0)\n\t"
- "sd\t%5,56(%0)\n\t"
- "daddiu\t%0,128\n\t"
- "daddiu\t%1,128\n\t"
- "ld\t%2,-64(%1)\n\t"
- "ld\t%3,-56(%1)\n\t"
- "ld\t%4,-48(%1)\n\t"
- "ld\t%5,-40(%1)\n\t"
- "sd\t%2,-64(%0)\n\t"
- "sd\t%3,-56(%0)\n\t"
- "sd\t%4,-48(%0)\n\t"
- "sd\t%5,-40(%0)\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "ld\t%4,-16(%1)\n\t"
- "ld\t%5,-8(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "sd\t%4,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%5,-8(%0)\n\t"
- ".set\tat\n\t"
- ".set\treorder"
- :"=r" (dummy1), "=r" (dummy2),
- "=&r" (reg1), "=&r" (reg2), "=&r" (reg3), "=&r" (reg4)
- :"0" (to), "1" (from),
- "I" (PAGE_SIZE),
- "i" (Create_Dirty_Excl_SD));
-}
-
-/*
- * This version has been tuned on an Origin. For other machines the arguments
- * of the pref instructin may have to be tuned differently.
- */
-void andes_copy_page(void * to, void * from)
-{
- unsigned long dummy1, dummy2, reg1, reg2, reg3, reg4;
+static int has_scache __initdata = 0;
+static int load_offset __initdata = 0;
+static int store_offset __initdata = 0;
+
+static unsigned int __initdata *dest, *epc;
+
+static inline void build_src_pref(int advance)
+{
+ if (!(load_offset & (cpu_dcache_line_size() - 1))) {
+ union mips_instruction mi;
+
+ mi.i_format.opcode = pref_op;
+ mi.i_format.rs = 5; /* $a1 */
+ mi.i_format.rt = pref_src_mode;
+ mi.i_format.simmediate = load_offset + advance;
+
+ *epc++ = mi.word;
+ }
+}
+
+static inline void __build_load_reg(int reg)
+{
+ union mips_instruction mi;
+
+ if (cpu_has_64bit_registers)
+ mi.i_format.opcode = ld_op;
+ else
+ mi.i_format.opcode = lw_op;
+ mi.i_format.rs = 5; /* $a1 */
+ mi.i_format.rt = reg; /* $zero */
+ mi.i_format.simmediate = load_offset;
+
+ load_offset += (cpu_has_64bit_registers ? 8 : 4);
+
+ *epc++ = mi.word;
+}
+
+static inline void build_load_reg(int reg)
+{
+ if (cpu_has_prefetch)
+ build_src_pref(pref_offset_copy);
+
+ __build_load_reg(reg);
+}
+
+static inline void build_dst_pref(int advance)
+{
+ if (!(store_offset & (cpu_dcache_line_size() - 1))) {
+ union mips_instruction mi;
+
+ mi.i_format.opcode = pref_op;
+ mi.i_format.rs = 4; /* $a0 */
+ mi.i_format.rt = pref_dst_mode;
+ mi.i_format.simmediate = store_offset + advance;
+
+ *epc++ = mi.word;
+ }
+}
+
+static inline void build_cdex(void)
+{
+ union mips_instruction mi;
+
+ if (cpu_has_cache_cdex_s &&
+ !(store_offset & (cpu_scache_line_size() - 1))) {
+
+ mi.c_format.opcode = cache_op;
+ mi.c_format.rs = 4; /* $a0 */
+ mi.c_format.c_op = 3; /* Create Dirty Exclusive */
+ mi.c_format.cache = 3; /* Secondary Data Cache */
+ mi.c_format.simmediate = store_offset;
+
+ *epc++ = mi.word;
+ }
+
+ if (store_offset & (cpu_dcache_line_size() - 1))
+ return;
+
+ if (R4600_V1_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2010)) {
+ *epc++ = 0; /* nop */
+ *epc++ = 0; /* nop */
+ *epc++ = 0; /* nop */
+ *epc++ = 0; /* nop */
+ }
+
+ mi.c_format.opcode = cache_op;
+ mi.c_format.rs = 4; /* $a0 */
+ mi.c_format.c_op = 3; /* Create Dirty Exclusive */
+ mi.c_format.cache = 1; /* Data Cache */
+ mi.c_format.simmediate = store_offset;
+
+ *epc++ = mi.word;
+}
+
+static inline void __build_store_zero_reg(void)
+{
+ union mips_instruction mi;
+
+ if (cpu_has_64bits)
+ mi.i_format.opcode = sd_op;
+ else
+ mi.i_format.opcode = sw_op;
+ mi.i_format.rs = 4; /* $a0 */
+ mi.i_format.rt = 0; /* $zero */
+ mi.i_format.simmediate = store_offset;
+
+ store_offset += (cpu_has_64bits ? 8 : 4);
+
+ *epc++ = mi.word;
+}
+
+static inline void __build_store_reg(int reg)
+{
+ union mips_instruction mi;
+ int reg_size;
+
+#ifdef CONFIG_MIPS32
+ if (cpu_has_64bit_registers && reg == 0) {
+ mi.i_format.opcode = sd_op;
+ reg_size = 8;
+ } else {
+ mi.i_format.opcode = sw_op;
+ reg_size = 4;
+ }
+#endif
+#ifdef CONFIG_MIPS64
+ mi.i_format.opcode = sd_op;
+ reg_size = 8;
+#endif
+ mi.i_format.rs = 4; /* $a0 */
+ mi.i_format.rt = reg; /* $zero */
+ mi.i_format.simmediate = store_offset;
+
+ store_offset += reg_size;
+
+ *epc++ = mi.word;
+}
+
+static inline void build_store_reg(int reg)
+{
+ if (cpu_has_prefetch)
+ if (reg)
+ build_dst_pref(pref_offset_copy);
+ else
+ build_dst_pref(pref_offset_clear);
+ else if (cpu_has_cache_cdex_p)
+ build_cdex();
+
+ __build_store_reg(reg);
+}
+
+static inline void build_addiu_at_a0(unsigned long offset)
+{
+ union mips_instruction mi;
+
+ BUG_ON(offset > 0x7fff);
+
+ mi.i_format.opcode = cpu_has_64bit_addresses ? daddiu_op : addiu_op;
+ mi.i_format.rs = 4; /* $a0 */
+ mi.i_format.rt = 1; /* $at */
+ mi.i_format.simmediate = offset;
+
+ *epc++ = mi.word;
+}
+
+static inline void build_addiu_a1(unsigned long offset)
+{
+ union mips_instruction mi;
+
+ BUG_ON(offset > 0x7fff);
+
+ mi.i_format.opcode = cpu_has_64bit_addresses ? daddiu_op : addiu_op;
+ mi.i_format.rs = 5; /* $a1 */
+ mi.i_format.rt = 5; /* $a1 */
+ mi.i_format.simmediate = offset;
+
+ load_offset -= offset;
+
+ *epc++ = mi.word;
+}
+
+static inline void build_addiu_a0(unsigned long offset)
+{
+ union mips_instruction mi;
+
+ BUG_ON(offset > 0x7fff);
+
+ mi.i_format.opcode = cpu_has_64bit_addresses ? daddiu_op : addiu_op;
+ mi.i_format.rs = 4; /* $a0 */
+ mi.i_format.rt = 4; /* $a0 */
+ mi.i_format.simmediate = offset;
+
+ store_offset -= offset;
+
+ *epc++ = mi.word;
+}
+
+static inline void build_bne(unsigned int *dest)
+{
+ union mips_instruction mi;
+
+ mi.i_format.opcode = bne_op;
+ mi.i_format.rs = 1; /* $at */
+ mi.i_format.rt = 4; /* $a0 */
+ mi.i_format.simmediate = dest - epc - 1;
+
+ *epc++ = mi.word;
+}
+
+static inline void build_nop(void)
+{
+ *epc++ = 0;
+}
+
+static inline void build_jr_ra(void)
+{
+ union mips_instruction mi;
+
+ mi.r_format.opcode = spec_op;
+ mi.r_format.rs = 31;
+ mi.r_format.rt = 0;
+ mi.r_format.rd = 0;
+ mi.r_format.re = 0;
+ mi.r_format.func = jr_op;
+
+ *epc++ = mi.word;
+}
+
+void __init build_clear_page(void)
+{
+ epc = (unsigned int *) &clear_page_array;
+
+ if (cpu_has_prefetch) {
+ switch (current_cpu_data.cputype) {
+ case CPU_R10000:
+ case CPU_R12000:
+ pref_src_mode = Pref_LoadStreamed;
+ pref_dst_mode = Pref_StoreRetained;
+ break;
+ default:
+ pref_src_mode = Pref_LoadStreamed;
+ pref_dst_mode = Pref_PrepareForStore;
+ break;
+ }
+ }
+
+ build_addiu_at_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0));
+
+ if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020)) {
+ *epc++ = 0x40026000; /* mfc0 $v0, $12 */
+ *epc++ = 0x34410001; /* ori $at, v0, 0x1 */
+ *epc++ = 0x38210001; /* xori $at, at, 0x1 */
+ *epc++ = 0x40816000; /* mtc0 $at, $12 */
+ *epc++ = 0x00000000; /* nop */
+ *epc++ = 0x00000000; /* nop */
+ *epc++ = 0x00000000; /* nop */
+ *epc++ = 0x3c01a000; /* lui $at, 0xa000 */
+ *epc++ = 0x8c200000; /* lw $zero, ($at) */
+ }
+
+dest = epc;
+ build_store_reg(0);
+ build_store_reg(0);
+ build_store_reg(0);
+ build_store_reg(0);
+ if (has_scache && cpu_scache_line_size() == 128) {
+ build_store_reg(0);
+ build_store_reg(0);
+ build_store_reg(0);
+ build_store_reg(0);
+ }
+ build_addiu_a0(2 * store_offset);
+ build_store_reg(0);
+ build_store_reg(0);
+ if (has_scache && cpu_scache_line_size() == 128) {
+ build_store_reg(0);
+ build_store_reg(0);
+ build_store_reg(0);
+ build_store_reg(0);
+ }
+ build_store_reg(0);
+ build_bne(dest);
+ build_store_reg(0);
+
+ if (cpu_has_prefetch && pref_offset_clear) {
+ build_addiu_at_a0(pref_offset_clear);
+ dest = epc;
+ __build_store_reg(0);
+ __build_store_reg(0);
+ __build_store_reg(0);
+ __build_store_reg(0);
+ build_addiu_a0(2 * store_offset);
+ __build_store_reg(0);
+ __build_store_reg(0);
+ __build_store_reg(0);
+ build_bne(dest);
+ __build_store_reg(0);
+ }
+
+ build_jr_ra();
+ if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020))
+ *epc++ = 0x40826000; /* mtc0 $v0, $12 */
+ else
+ build_nop();
+
+ flush_icache_range((unsigned long)&clear_page_array,
+ (unsigned long) epc);
+
+ BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array));
+}
+
+void __init build_copy_page(void)
+{
+ epc = (unsigned int *) ©_page_array;
+
+ build_addiu_at_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0));
+
+ if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020)) {
+ *epc++ = 0x40026000; /* mfc0 $v0, $12 */
+ *epc++ = 0x34410001; /* ori $at, v0, 0x1 */
+ *epc++ = 0x38210001; /* xori $at, at, 0x1 */
+ *epc++ = 0x40816000; /* mtc0 $at, $12 */
+ *epc++ = 0x00000000; /* nop */
+ *epc++ = 0x00000000; /* nop */
+ *epc++ = 0x00000000; /* nop */
+ *epc++ = 0x3c01a000; /* lui $at, 0xa000 */
+ *epc++ = 0x8c200000; /* lw $zero, ($at) */
+ }
+
+dest = epc;
+ build_load_reg( 8);
+ build_load_reg( 9);
+ build_load_reg(10);
+ build_load_reg(11);
+ build_store_reg( 8);
+ build_store_reg( 9);
+ build_store_reg(10);
+ build_store_reg(11);
+ if (has_scache && cpu_scache_line_size() == 128) {
+ build_load_reg( 8);
+ build_load_reg( 9);
+ build_load_reg(10);
+ build_load_reg(11);
+ build_store_reg( 8);
+ build_store_reg( 9);
+ build_store_reg(10);
+ build_store_reg(11);
+ }
+ build_addiu_a0(2 * store_offset);
+ build_addiu_a1(2 * load_offset);
+ build_load_reg( 8);
+ build_load_reg( 9);
+ build_load_reg(10);
+ build_load_reg(11);
+ build_store_reg( 8);
+ build_store_reg( 9);
+ build_store_reg(10);
+ if (has_scache && cpu_scache_line_size() == 128) {
+ build_store_reg(11);
+ build_load_reg( 8);
+ build_load_reg( 9);
+ build_load_reg(10);
+ build_load_reg(11);
+ build_store_reg( 8);
+ build_store_reg( 9);
+ build_store_reg(10);
+ }
+ build_bne(dest);
+ build_store_reg(11);
+
+ if (cpu_has_prefetch && pref_offset_copy) {
+ build_addiu_at_a0(pref_offset_copy);
+ dest = epc;
+ __build_load_reg( 8);
+ __build_load_reg( 9);
+ __build_load_reg(10);
+ __build_load_reg(11);
+ __build_store_reg( 8);
+ __build_store_reg( 9);
+ __build_store_reg(10);
+ __build_store_reg(11);
+ build_addiu_a0(2 * store_offset);
+ build_addiu_a1(2 * load_offset);
+ __build_load_reg( 8);
+ __build_load_reg( 9);
+ __build_load_reg(10);
+ __build_load_reg(11);
+ __build_store_reg( 8);
+ __build_store_reg( 9);
+ __build_store_reg(10);
+ build_bne(dest);
+ __build_store_reg(11);
+ }
+
+ build_jr_ra();
+ if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020))
+ *epc++ = 0x40826000; /* mtc0 $v0, $12 */
+ else
+ build_nop();
- __asm__ __volatile__(
- ".set\tpush\n\t"
- ".set\tmips4\n\t"
- ".set\tnoreorder\n\t"
- ".set\tnoat\n\t"
- "daddiu\t$1,%0,%8\n"
- "1:\tpref\t0,2*128(%1)\n\t"
- "pref\t1,2*128(%0)\n\t"
- "ld\t%2,(%1)\n\t"
- "ld\t%3,8(%1)\n\t"
- "ld\t%4,16(%1)\n\t"
- "ld\t%5,24(%1)\n\t"
- "sd\t%2,(%0)\n\t"
- "sd\t%3,8(%0)\n\t"
- "sd\t%4,16(%0)\n\t"
- "sd\t%5,24(%0)\n\t"
- "daddiu\t%0,64\n\t"
- "daddiu\t%1,64\n\t"
- "ld\t%2,-32(%1)\n\t"
- "ld\t%3,-24(%1)\n\t"
- "ld\t%4,-16(%1)\n\t"
- "ld\t%5,-8(%1)\n\t"
- "sd\t%2,-32(%0)\n\t"
- "sd\t%3,-24(%0)\n\t"
- "sd\t%4,-16(%0)\n\t"
- "bne\t$1,%0,1b\n\t"
- " sd\t%5,-8(%0)\n\t"
- ".set\tpop\n\t"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2),
- "=&r" (reg3), "=&r" (reg4)
- :"0" (to), "1" (from), "I" (PAGE_SIZE));
+ BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array));
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)