patch-2.1.34 linux/arch/sparc64/kernel/dtlb_miss.S

Next file: linux/arch/sparc64/kernel/dtlb_prot.S
Previous file: linux/arch/sparc64/kernel/devices.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.33/linux/arch/sparc64/kernel/dtlb_miss.S linux/arch/sparc64/kernel/dtlb_miss.S
@@ -1,93 +1,80 @@
-/* $Id: dtlb_miss.S,v 1.5 1997/02/25 20:00:02 jj Exp $
+/* $Id: dtlb_miss.S,v 1.11 1997/04/10 01:59:35 davem Exp $
  * dtlb_miss.S:	Data TLB miss code, this is included directly
  *              into the trap table.
  *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
-	/* We are in the MMU globals, %g7 contains the physical
-	 * address of current->mm->pgd at all times.
-	 *
-	 * Many subtle things are done here.  The high bits of
-	 * the virtual address missed are most easily obtained
-	 * from the tag target (it is at address zero in ASI_IMMU
-	 * so no address formation is necessary to get at this).
-	 * This is used to compute the pgd and pmd table offsets.
-	 *
-	 * Even more clever is that physical page zero is always
-	 * a page full of zeroes.  This means we can just follow
-	 * through with all the page table traversals even if nothing
-	 * is mapped because we'll just do loads from page zero
-	 * and get yet another zero.  We only need to do the check
-	 * for the valid bit being set in the final pte we obtain.
-	 *
-	 * Furthermore, we set the TSB base register to the address
-	 * zero, and we use the 8KB tsb ptr to calculate the pte
-	 * offset.  Again it is at address zero in ASI_IMMU_TSB_8KB_PTR
-	 * so no address formation is necessary, saves more instructions.
-	 *
-	 * We use physical address accesses to get at the page
-	 * tables, and this is for two reasons.  This makes it
-	 * impossible to take a fault while we are servicing the
-	 * miss.  Also this physical bypass access only allocates
-	 * in the E-cache, and thus we prevent D-cache pollution
-	 * from the miss handlers probing the page tables.
-	 *
-	 * It looks very hairy and slow.  But I take only 1 more
-	 * overhead of loads from ram than the Solaris version, and
-	 * my version is one instruction quicker for a true TLB miss.
-	 * And more importantly, all true TLB misses under Linux will be
-	 * serviced in _constant_ time.  When using the TSB in the
-	 * manner it was intended to be used (like solaris does) the
-	 * overhead for a TLB miss is _indeterminate_ especially during
-	 * processes startup when the TSB is cold.
-	 *
-	 * XXX I think I can knock off two more instructions here...
-	 */
-
-	/* I-cache line 0 */
-	ldxa		[%g0] ASI_DMMU, %g1		! grab Tag Target either way
-	brlz,pn		%g1, 3f				! special kernel processing
-	 srlx		%g1, 8, %g3			! put high vaddr bits in place
-
-1:
-	and		%g3, %g2, %g3			! get offset
-	ldxa		[%g7 + %g3] ASI_PHYS_USE_EC, %g5! load pgd
-	sllx		%g1, 2, %g4			! begin pmd_offset formation
-	and		%g4, %g2, %g3			! and now mask it
-	ldxa		[%g5 + %g3] ASI_PHYS_USE_EC, %g4! load pmd
-	/* I-cache line 1 */
-	ldxa		[%g0] ASI_DMMU_TSB_8KB_PTR, %g1	! get 8KB pointer bits
-	srlx		%g1, 1, %g1			! shift right to get pte_offset
-	ldxa		[%g4 + %g1] ASI_PHYS_USE_EC, %g3! load pte
-	brlz,a,pt	%g3, 2f				! is valid bit clear?
-	 stxa		%g3, [%g0] ASI_DTLB_DATA_IN	! nope, load TTE into DTLB
-
-	ba,a,pt		%xcc, sparc64_dtlb_refbit_catch	! longer processing needed
-2:
-	retry						! return from trap
-
-#define KTTE_HIGH_BITS	_PAGE_VALID | _PAGE_SZ4MB
-#define KTTE_LOW_BITS	_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W | _PAGE_G
-
-	nop						! align next insn on cache line
-3:
-	/* I-cache line 2 */
-	srax		%g1, 19, %g5			! mask down high bits
-	cmp		%g5, -1				! if -1 this is VMALLOC area
-	be,pn		%xcc, 1b			! yep
-	 sethi		%uhi(KTTE_HIGH_BITS), %g4	! begin pte formation
-
-	sllx		%g1, 23, %g1			! begin masking for physpage
-	sllx		%g4, 32, %g4			! high protection TTE bits
-	or		%g4, (KTTE_LOW_BITS), %g4	! low protection TTE bits
-	srlx		%g1, 41, %g1			! put physpage into place
-	/* I-cache line 3 */
-	or		%g4, %g1, %g1			! finish TTE computation
-	stxa		%g1, [%g0] ASI_DTLB_DATA_IN	! load TTE into DTLB
-	retry						! return from trap
+/* The basic algorithm is:
+ *
+ * if(faulting_context != 0) {
+ *		pgd = pgd_offset(current->mm.pgd, fault_address);
+ * page_table_walk_continue:
+ *		pmd = pmd_offset(pgd, fault_address);
+ *		pte = pte_offset(pmd, fault_address);
+ *		if(pte & _PAGE_V) {
+ *			tlb_load(pte, fault_address);
+ *			return_from_trap();
+ *		}
+ *		goto longer_processing;
+ * } else {
+ *		if(fault_address >= KERNBASE &&
+ *		   fault_address < VMALLOC_START) {
+ *			tlb_load(__pa(fault_address) | PAGE_KERNEL);
+ *			return_from_trap();
+ *		} else {
+ *			pgd = pgd_offset(swapper_pg_dir, fault_address);
+ *			goto page_table_walk_continue;
+ *		}
+ * }
+ *
+ * This is optimized for user TLB misses on purpose.
+ */
 
-	nop; nop; nop; nop; nop;
+#define KERN_HIGHBITS	(_PAGE_VALID | _PAGE_SZ4MB)
+#define KERN_LOWBITS	(_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
+#define KERN_LOWBITS_IO	(_PAGE_E | _PAGE_P | _PAGE_W)
+
+				/* ICACHE line 1 */
+  /*0x00*/	ldxa		[%g0] ASI_DMMU, %g1		! Get TAG_TARGET
+  /*0x04*/	srlx		%g1, 8, %g3			! Position PGD offset
+  /*0x08*/	srlx		%g1, 48, %g5			! Shift down CONTEXT bits
+  /*0x0c*/	and		%g3, %g2, %g3			! Mask PGD offset
+  /*0x10*/	sllx		%g1, 2, %g4			! Position PMD offset
+  /*0x14*/	brz,pn		%g5, 3f				! Context 0 == kernel
+  /*0x18*/	 and		%g4, %g2, %g4			! Mask PMD offset
+  /*0x1c*/	ldxa		[%g0] ASI_DMMU_TSB_8KB_PTR, %g1	! For PTE offset
+
+				/* ICACHE line 2 */
+  /*0x20*/	ldxa		[%g7 + %g3] ASI_PHYS_USE_EC, %g5	! Load PGD
+  /*0x24*/	srlx		%g1, 1, %g1				! PTE offset
+2:/*0x28*/	ldxa		[%g5 + %g4] ASI_PHYS_USE_EC, %g3	! Load PMD
+  /*0x2c*/	ldxa		[%g3 + %g1] ASI_PHYS_USE_EC, %g5	! Load PTE
+  /*0x30*/	brlz,a,pt	%g5, 1f					! Valid set?
+  /*0x34*/	 stxa		%g5, [%g0] ASI_DTLB_DATA_IN		! TLB load
+  /*0x38*/	ba,a,pt		%xcc, sparc64_dtlb_refbit_catch		! Nope...
+1:/*0x3c*/	retry							! Trap return
+
+3:				/* ICACHE line 3 */
+  /*0x40*/	sllx		%g1, 43, %g5		! This gets >= VMALLOC_START...
+  /*0x44*/	brlz,pn		%g5, 4f			! ...if now less than zero.
+  /*0x48*/	 andncc		%g1, 0x3ff, %g0		! Slick trick...
+  /*0x4c*/	be,pn		%xcc, 4f		! Yes, it is some PROM mapping
+  /*0x50*/	 srlx		%g5, 21, %g5		! This is now physical page
+  /*0x54*/	sethi		%uhi(KERN_HIGHBITS), %g1	! Construct PTE
+  /*0x58*/	sllx		%g1, 32, %g1			! Move priv bits up
+  /*0x5c*/	or		%g1, %g5, %g1			! Or in the page
+
+				/* ICACHE line 4 */
+  /*0x60*/	or		%g1, (KERN_LOWBITS), %g1	! Set low priv bits
+  /*0x64*/	stxa		%g1, [%g0] ASI_DTLB_DATA_IN	! TLB load
+  /*0x68*/	retry						! Trap return
+4:/*0x6c*/	ldxa		[%g0] ASI_DMMU_TSB_8KB_PTR, %g1	! For PTE offset
+  /*0x70*/	ldxa		[%g6 + %g3] ASI_PHYS_USE_EC, %g5	! Load kern PGD
+  /*0x74*/	ba,pt		%xcc, 2b			! Go back up top
+  /*0x78*/	 srlx		%g1, 1, %g1
+  /*0x7c*/	nop
 
-#undef KTTE_HIGH_BITS
-#undef KTTE_LOW_BITS
+#undef KERN_HIGHBITS
+#undef KERN_LOWBITS

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov