patch-2.4.19 linux-2.4.19/arch/mips/mm/pg-r4k.S
Next file: linux-2.4.19/arch/mips/mm/pg-r5432.c
Previous file: linux-2.4.19/arch/mips/mm/pg-r3k.c
Back to the patch index
Back to the overall index
- Lines: 691
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/mips/mm/pg-r4k.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/mips/mm/pg-r4k.S linux-2.4.19/arch/mips/mm/pg-r4k.S
@@ -0,0 +1,690 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * r4xx0.c: R4000 processor variant specific MMU/Cache routines.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org
+ */
+#include <linux/config.h>
+#include <asm/addrspace.h>
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/cacheops.h>
+#include <asm/mipsregs.h>
+#include <asm/offset.h>
+
+#ifdef CONFIG_64BIT_PHYS_ADDR
+#define PGD_SIZE 0x2000
+#else
+#define PGD_SIZE 0x1000
+#endif
+
+ .text
+ .set mips3
+ .set noat
+
+/*
+ * Zero an entire page. Basically a simple unrolled loop should do the
+ * job but we want more performance by saving memory bus bandwidth. We
+ * have five flavours of the routine available for:
+ *
+ * - 16byte cachelines and no second level cache
+ * - 32byte cachelines second level cache
+ * - a version which handles the buggy R4600 v1.x
+ * - a version which handles the buggy R4600 v2.0
+ * - Finally a last version without fancy cache games for the SC and MC
+ * versions of R4000 and R4400.
+ */
+
+LEAF(r4k_clear_page_d16)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_D, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ cache Create_Dirty_Excl_D, 16(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ cache Create_Dirty_Excl_D, -32(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ cache Create_Dirty_Excl_D, -16(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_d16)
+
+LEAF(r4k_clear_page_d32)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_D, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ cache Create_Dirty_Excl_D, -32(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_d32)
+
+/*
+ * This flavour of r4k_clear_page is for the R4600 V1.x. Cite from the
+ * IDT R4600 V1.7 errata:
+ *
+ * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
+ * Hit_Invalidate_D and Create_Dirty_Excl_D should only be
+ * executed if there is no other dcache activity. If the dcache is
+ * accessed for another instruction immeidately preceding when these
+ * cache instructions are executing, it is possible that the dcache
+ * tag match outputs used by these cache instructions will be
+ * incorrect. These cache instructions should be preceded by at least
+ * four instructions that are not any kind of load or store
+ * instruction.
+ *
+ * This is not allowed: lw
+ * nop
+ * nop
+ * nop
+ * cache Hit_Writeback_Invalidate_D
+ *
+ * This is allowed: lw
+ * nop
+ * nop
+ * nop
+ * nop
+ * cache Hit_Writeback_Invalidate_D
+ */
+
+LEAF(r4k_clear_page_r4600_v1)
+ addiu AT, a0, _PAGE_SIZE
+1: nop
+ nop
+ nop
+ nop
+ cache Create_Dirty_Excl_D, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ nop
+ nop
+ nop
+ cache Create_Dirty_Excl_D, -32(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_r4600_v1)
+
+LEAF(r4k_clear_page_r4600_v2)
+ mfc0 a1, CP0_STATUS
+ ori AT, a1, 1
+ xori AT, 1
+ mtc0 AT, CP0_STATUS
+ nop
+ nop
+ nop
+
+ .set volatile
+ la AT, KSEG1
+ lw zero, (AT)
+ .set novolatile
+
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_D, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ cache Create_Dirty_Excl_D, -32(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+
+ mfc0 AT, CP0_STATUS # __restore_flags
+ andi a1, 1
+ ori AT, 1
+ xori AT, 1
+ or a1, AT
+ mtc0 a1, CP0_STATUS
+ nop
+ nop
+ nop
+
+ jr ra
+ END(r4k_clear_page_r4600_v2)
+
+/*
+ * The next 4 versions are optimized for all possible scache configurations
+ * of the SC / MC versions of R4000 and R4400 ...
+ *
+ * Todo: For even better performance we should have a routine optimized for
+ * every legal combination of dcache / scache linesize. When I (Ralf) tried
+ * this the kernel crashed shortly after mounting the root filesystem. CPU
+ * bug? Weirdo cache instruction semantics?
+ */
+
+LEAF(r4k_clear_page_s16)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ cache Create_Dirty_Excl_SD, 16(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ cache Create_Dirty_Excl_SD, -32(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ cache Create_Dirty_Excl_SD, -16(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_s16)
+
+LEAF(r4k_clear_page_s32)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ cache Create_Dirty_Excl_SD, -32(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_s32)
+
+LEAF(r4k_clear_page_s64)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ addiu a0, 64
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_s64)
+
+LEAF(r4k_clear_page_s128)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ sd zero, (a0)
+ sd zero, 8(a0)
+ sd zero, 16(a0)
+ sd zero, 24(a0)
+ sd zero, 32(a0)
+ sd zero, 40(a0)
+ sd zero, 48(a0)
+ sd zero, 56(a0)
+ addiu a0, 128
+ sd zero, -64(a0)
+ sd zero, -56(a0)
+ sd zero, -48(a0)
+ sd zero, -40(a0)
+ sd zero, -32(a0)
+ sd zero, -24(a0)
+ sd zero, -16(a0)
+ sd zero, -8(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_clear_page_s128)
+
+/*
+ * This is still inefficient. We only can do better if we know the
+ * virtual address where the copy will be accessed.
+ */
+
+LEAF(r4k_copy_page_d16)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_D, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ cache Create_Dirty_Excl_D, 16(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ cache Create_Dirty_Excl_D, 32(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ cache Create_Dirty_Excl_D, -16(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_d16)
+
+LEAF(r4k_copy_page_d32)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_D, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ cache Create_Dirty_Excl_D, 32(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_d32)
+
+/*
+ * Again a special version for the R4600 V1.x
+ */
+
+LEAF(r4k_copy_page_r4600_v1)
+ addiu AT, a0, _PAGE_SIZE
+1: nop
+ nop
+ nop
+ nop
+ cache Create_Dirty_Excl_D, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ nop
+ nop
+ nop
+ nop
+ cache Create_Dirty_Excl_D, 32(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_r4600_v1)
+
+LEAF(r4k_copy_page_r4600_v2)
+ mfc0 v1, CP0_STATUS
+ ori AT, v1, 1
+ xori AT, 1
+
+ mtc0 AT, CP0_STATUS
+ nop
+ nop
+ nop
+
+ addiu AT, a0, _PAGE_SIZE
+1: nop
+ nop
+ nop
+ nop
+ cache Create_Dirty_Excl_D, (a0)
+ lw t1, (a1)
+ lw t0, 4(a1)
+ lw a3, 8(a1)
+ lw a2, 12(a1)
+ sw t1, (a0)
+ sw t0, 4(a0)
+ sw a3, 8(a0)
+ sw a2, 12(a0)
+ lw t1, 16(a1)
+ lw t0, 20(a1)
+ lw a3, 24(a1)
+ lw a2, 28(a1)
+ sw t1, 16(a0)
+ sw t0, 20(a0)
+ sw a3, 24(a0)
+ sw a2, 28(a0)
+ nop
+ nop
+ nop
+ nop
+ cache Create_Dirty_Excl_D, 32(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw t1, -32(a1)
+ lw t0, -28(a1)
+ lw a3, -24(a1)
+ lw a2, -20(a1)
+ sw t1, -32(a0)
+ sw t0, -28(a0)
+ sw a3, -24(a0)
+ sw a2, -20(a0)
+ lw t1, -16(a1)
+ lw t0, -12(a1)
+ lw a3, -8(a1)
+ lw a2, -4(a1)
+ sw t1, -16(a0)
+ sw t0, -12(a0)
+ sw a3, -8(a0)
+ sw a2, -4(a0)
+ bne AT, a0, 1b
+
+ mfc0 AT, CP0_STATUS # __restore_flags
+ andi v1, 1
+ ori AT, 1
+ xori AT, 1
+ or v1, AT
+ mtc0 v1, CP0_STATUS
+ nop
+ nop
+ nop
+ jr ra
+ END(r4k_copy_page_r4600_v2)
+
+/*
+ * These are for R4000SC / R4400MC
+ */
+
+LEAF(r4k_copy_page_s16)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ cache Create_Dirty_Excl_SD, 16(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ cache Create_Dirty_Excl_SD, 32(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ cache Create_Dirty_Excl_SD, -16(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_s16)
+
+LEAF(r4k_copy_page_s32)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ cache Create_Dirty_Excl_SD, 32(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_s32)
+
+LEAF(r4k_copy_page_s64)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ addiu a0, 64
+ addiu a1, 64
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_s64)
+
+LEAF(r4k_copy_page_s128)
+ addiu AT, a0, _PAGE_SIZE
+1: cache Create_Dirty_Excl_SD, (a0)
+ lw a3, (a1)
+ lw a2, 4(a1)
+ lw v1, 8(a1)
+ lw v0, 12(a1)
+ sw a3, (a0)
+ sw a2, 4(a0)
+ sw v1, 8(a0)
+ sw v0, 12(a0)
+ lw a3, 16(a1)
+ lw a2, 20(a1)
+ lw v1, 24(a1)
+ lw v0, 28(a1)
+ sw a3, 16(a0)
+ sw a2, 20(a0)
+ sw v1, 24(a0)
+ sw v0, 28(a0)
+ lw a3, 32(a1)
+ lw a2, 36(a1)
+ lw v1, 40(a1)
+ lw v0, 44(a1)
+ sw a3, 32(a0)
+ sw a2, 36(a0)
+ sw v1, 40(a0)
+ sw v0, 44(a0)
+ lw a3, 48(a1)
+ lw a2, 52(a1)
+ lw v1, 56(a1)
+ lw v0, 60(a1)
+ sw a3, 48(a0)
+ sw a2, 52(a0)
+ sw v1, 56(a0)
+ sw v0, 60(a0)
+ addiu a0, 128
+ addiu a1, 128
+ lw a3, -64(a1)
+ lw a2, -60(a1)
+ lw v1, -56(a1)
+ lw v0, -52(a1)
+ sw a3, -64(a0)
+ sw a2, -60(a0)
+ sw v1, -56(a0)
+ sw v0, -52(a0)
+ lw a3, -48(a1)
+ lw a2, -44(a1)
+ lw v1, -40(a1)
+ lw v0, -36(a1)
+ sw a3, -48(a0)
+ sw a2, -44(a0)
+ sw v1, -40(a0)
+ sw v0, -36(a0)
+ lw a3, -32(a1)
+ lw a2, -28(a1)
+ lw v1, -24(a1)
+ lw v0, -20(a1)
+ sw a3, -32(a0)
+ sw a2, -28(a0)
+ sw v1, -24(a0)
+ sw v0, -20(a0)
+ lw a3, -16(a1)
+ lw a2, -12(a1)
+ lw v1, -8(a1)
+ lw v0, -4(a1)
+ sw a3, -16(a0)
+ sw a2, -12(a0)
+ sw v1, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(r4k_copy_page_s128)
+
+/* This one still needs to receive cache optimizations */
+LEAF(pgd_init)
+ addiu AT, a0, PGD_SIZE / 2
+ la v0, invalid_pte_table
+1: sw v0, (a0)
+ sw v0, 4(a0)
+ sw v0, 8(a0)
+ sw v0, 12(a0)
+ addiu a0, 32
+ sw v0, -16(a0)
+ sw v0, -12(a0)
+ sw v0, -8(a0)
+ sw v0, -4(a0)
+ bne AT, a0, 1b
+ jr ra
+ END(pgd_init)
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)