patch-2.4.19 linux-2.4.19/arch/ppc64/mm/init.c
Next file: linux-2.4.19/arch/ppc64/vmlinux.lds
Previous file: linux-2.4.19/arch/ppc64/mm/imalloc.c
Back to the patch index
Back to the overall index
- Lines: 657
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/ppc64/mm/init.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/ppc64/mm/init.c linux-2.4.19/arch/ppc64/mm/init.c
@@ -0,0 +1,656 @@
+/*
+ *
+ *
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * Dave Engebretsen <engebret@us.ibm.com>
+ * Rework for PPC64 port.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h> /* for initrd_* */
+#endif
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/abs_addr.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/naca.h>
+#include <asm/eeh.h>
+
+#include <asm/ppcdebug.h>
+
+#define PGTOKB(pages) (((pages) * PAGE_SIZE) >> 10)
+
+#ifdef CONFIG_PPC_ISERIES
+#include <asm/iSeries/iSeries_dma.h>
+#endif
+
+struct mmu_context_queue_t mmu_context_queue;
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+
+static int boot_mapsize;
+static unsigned long totalram_pages;
+
+extern pgd_t swapper_pg_dir[];
+extern char __init_begin, __init_end;
+extern char __chrp_begin, __chrp_end;
+extern char __openfirmware_begin, __openfirmware_end;
+extern struct _of_tce_table of_tce_table[];
+extern char _start[], _end[];
+extern char _stext[], etext[];
+extern struct task_struct *current_set[NR_CPUS];
+
+void mm_init_ppc64(void);
+
+unsigned long *pmac_find_end_of_memory(void);
+extern unsigned long *find_end_of_memory(void);
+
+extern pgd_t ioremap_dir[];
+pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir;
+
+static void map_io_page(unsigned long va, unsigned long pa, int flags);
+extern void die_if_kernel(char *,struct pt_regs *,long);
+
+unsigned long klimit = (unsigned long)_end;
+
+HPTE *Hash=0;
+unsigned long Hash_size=0;
+unsigned long _SDR1=0;
+unsigned long _ASR=0;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* This is declared as we are using the more or less generic
+ * include/asm-ppc64/tlb.h file -- tgall
+ */
+mmu_gather_t mmu_gathers[NR_CPUS];
+
+int do_check_pgt_cache(int low, int high)
+{
+ int freed = 0;
+
+ if (pgtable_cache_size > high) {
+ do {
+ if (pgd_quicklist)
+ free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed;
+ if (pmd_quicklist)
+ free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed;
+ if (pte_quicklist)
+ free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed;
+ } while (pgtable_cache_size > low);
+ }
+ return freed;
+}
+
+void show_mem(void)
+{
+ int i,free = 0,total = 0,reserved = 0;
+ int shared = 0, cached = 0;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+ printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+ i = max_mapnr;
+ while (i-- > 0) {
+ total++;
+ if (PageReserved(mem_map+i))
+ reserved++;
+ else if (PageSwapCache(mem_map+i))
+ cached++;
+ else if (!atomic_read(&mem_map[i].count))
+ free++;
+ else
+ shared += atomic_read(&mem_map[i].count) - 1;
+ }
+ printk("%d pages of RAM\n",total);
+ printk("%d free pages\n",free);
+ printk("%d reserved pages\n",reserved);
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
+ printk("%d pages in page table cache\n",(int)pgtable_cache_size);
+ show_buffers();
+}
+
+void si_meminfo(struct sysinfo *val)
+{
+ val->totalram = totalram_pages;
+ val->sharedram = 0;
+ val->freeram = nr_free_pages();
+ val->bufferram = atomic_read(&buffermem_pages);
+ val->totalhigh = 0;
+ val->freehigh = 0;
+ val->mem_unit = PAGE_SIZE;
+}
+
+void *
+ioremap(unsigned long addr, unsigned long size)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return (void*)addr;
+#else
+ if(mem_init_done && (addr >> 60UL)) {
+ if (IS_EEH_TOKEN_DISABLED(addr))
+ return IO_TOKEN_TO_ADDR(addr);
+ return (void*)addr; /* already mapped address or EEH token. */
+ }
+ return __ioremap(addr, size, _PAGE_NO_CACHE);
+#endif
+}
+
+extern struct vm_struct * get_im_area( unsigned long size );
+
+void *
+__ioremap(unsigned long addr, unsigned long size, unsigned long flags)
+{
+ unsigned long pa, ea, i;
+
+ /*
+ * Choose an address to map it to.
+ * Once the imalloc system is running, we use it.
+ * Before that, we map using addresses going
+ * up from ioremap_bot. imalloc will use
+ * the addresses from ioremap_bot through
+ * IMALLOC_END (0xE000001fffffffff)
+ *
+ */
+ pa = addr & PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - pa;
+
+ if (size == 0)
+ return NULL;
+
+ if (mem_init_done) {
+ struct vm_struct *area;
+ area = get_im_area(size);
+ if (area == 0)
+ return NULL;
+ ea = (unsigned long)(area->addr);
+ }
+ else {
+ ea = ioremap_bot;
+ ioremap_bot += size;
+ }
+
+ if ((flags & _PAGE_PRESENT) == 0)
+ flags |= pgprot_val(PAGE_KERNEL);
+ if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU))
+ flags |= _PAGE_GUARDED;
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ map_io_page(ea+i, pa+i, flags);
+ }
+
+ return (void *) (ea + (addr & ~PAGE_MASK));
+}
+
+void iounmap(void *addr)
+{
+#ifdef CONFIG_PPC_ISERIES
+ /* iSeries I/O Remap is a noop */
+ return;
+#else
+ /* DRENG / PPPBBB todo */
+ return;
+#endif
+}
+
+/*
+ * map_io_page currently only called by __ioremap
+ * map_io_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+static void map_io_page(unsigned long ea, unsigned long pa, int flags)
+{
+ pgd_t *pgdp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ unsigned long vsid;
+
+ if (mem_init_done) {
+ spin_lock(&ioremap_mm.page_table_lock);
+ pgdp = pgd_offset_i(ea);
+ pmdp = pmd_alloc(&ioremap_mm, pgdp, ea);
+ ptep = pte_alloc(&ioremap_mm, pmdp, ea);
+
+ pa = absolute_to_phys(pa);
+ set_pte(ptep, mk_pte_phys(pa & PAGE_MASK, __pgprot(flags)));
+ spin_unlock(&ioremap_mm.page_table_lock);
+ } else {
+ /* If the mm subsystem is not fully up, we cannot create a
+ * linux page table entry for this mapping. Simply bolt an
+ * entry in the hardware page table.
+ */
+ vsid = get_kernel_vsid(ea);
+ make_pte(htab_data.htab,
+ (vsid << 28) | (ea & 0xFFFFFFF), // va (NOT the ea)
+ pa,
+ _PAGE_NO_CACHE | _PAGE_GUARDED | PP_RWXX,
+ htab_data.htab_hash_mask, 0);
+ }
+}
+
+void
+local_flush_tlb_all(void)
+{
+ /* Implemented to just flush the vmalloc area.
+ * vmalloc is the only user of flush_tlb_all.
+ */
+ local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END );
+}
+
+void
+local_flush_tlb_mm(struct mm_struct *mm)
+{
+ if ( mm->map_count ) {
+ struct vm_area_struct *mp;
+ for ( mp = mm->mmap; mp != NULL; mp = mp->vm_next )
+ local_flush_tlb_range( mm, mp->vm_start, mp->vm_end );
+ }
+ else /* MIKEC: It is not clear why this is needed */
+ /* paulus: it is needed to clear out stale HPTEs
+ * when an address space (represented by an mm_struct)
+ * is being destroyed. */
+ local_flush_tlb_range( mm, USER_START, USER_END );
+}
+
+/*
+ * Callers should hold the mm->page_table_lock
+ */
+void
+local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ unsigned long context = 0;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *ptep;
+
+ switch( REGION_ID(vmaddr) ) {
+ case VMALLOC_REGION_ID:
+ pgd = pgd_offset_k( vmaddr );
+ break;
+ case IO_REGION_ID:
+ pgd = pgd_offset_i( vmaddr );
+ break;
+ case USER_REGION_ID:
+ pgd = pgd_offset( vma->vm_mm, vmaddr );
+ context = vma->vm_mm->context;
+ break;
+ default:
+ panic("local_flush_tlb_page: invalid region 0x%016lx", vmaddr);
+
+ }
+
+ if (!pgd_none(*pgd)) {
+ pmd = pmd_offset(pgd, vmaddr);
+ if (!pmd_none(*pmd)) {
+ ptep = pte_offset(pmd, vmaddr);
+ /* Check if HPTE might exist and flush it if so */
+ if (pte_val(*ptep) & _PAGE_HASHPTE)
+ flush_hash_page(context, vmaddr, ptep);
+ }
+ }
+}
+
+void
+local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *ptep;
+ unsigned long pgd_end, pmd_end;
+ unsigned long context;
+
+ if ( start >= end )
+ panic("flush_tlb_range: start (%016lx) greater than end (%016lx)\n", start, end );
+
+ if ( REGION_ID(start) != REGION_ID(end) )
+ panic("flush_tlb_range: start (%016lx) and end (%016lx) not in same region\n", start, end );
+
+ context = 0;
+
+ switch( REGION_ID(start) ) {
+ case VMALLOC_REGION_ID:
+ pgd = pgd_offset_k( start );
+ break;
+ case IO_REGION_ID:
+ pgd = pgd_offset_i( start );
+ break;
+ case USER_REGION_ID:
+ pgd = pgd_offset( mm, start );
+ context = mm->context;
+ break;
+ default:
+ panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
+
+ }
+
+ do {
+ pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
+ if ( pgd_end > end )
+ pgd_end = end;
+ if ( !pgd_none( *pgd ) ) {
+ pmd = pmd_offset( pgd, start );
+ do {
+ pmd_end = ( start + PMD_SIZE ) & PMD_MASK;
+ if ( pmd_end > end )
+ pmd_end = end;
+ if ( !pmd_none( *pmd ) ) {
+ ptep = pte_offset( pmd, start );
+ do {
+ if ( pte_val(*ptep) & _PAGE_HASHPTE )
+ flush_hash_page( context, start, ptep );
+ start += PAGE_SIZE;
+ ++ptep;
+ } while ( start < pmd_end );
+ }
+ else
+ start = pmd_end;
+ ++pmd;
+ } while ( start < pgd_end );
+ }
+ else
+ start = pgd_end;
+ ++pgd;
+ } while ( start < end );
+}
+
+
+void __init free_initmem(void)
+{
+ unsigned long a;
+ unsigned long num_freed_pages = 0;
+#define FREESEC(START,END,CNT) do { \
+ a = (unsigned long)(&START); \
+ for (; a < (unsigned long)(&END); a += PAGE_SIZE) { \
+ clear_bit(PG_reserved, &mem_map[MAP_NR(a)].flags); \
+ set_page_count(mem_map+MAP_NR(a), 1); \
+ free_page(a); \
+ CNT++; \
+ } \
+} while (0)
+
+ FREESEC(__init_begin,__init_end,num_freed_pages);
+
+ printk ("Freeing unused kernel memory: %ldk init\n",
+ PGTOKB(num_freed_pages));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ unsigned long xstart = start;
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(mem_map + MAP_NR(start));
+ set_page_count(mem_map+MAP_NR(start), 1);
+ free_page(start);
+ totalram_pages++;
+ }
+ printk ("Freeing initrd memory: %ldk freed\n", (end - xstart) >> 10);
+}
+#endif
+
+
+
+/*
+ * Do very early mm setup.
+ */
+void __init mm_init_ppc64(void) {
+ struct paca_struct *lpaca;
+ unsigned long guard_page, index;
+
+ ppc_md.progress("MM:init", 0);
+
+ /* Reserve all contexts < FIRST_USER_CONTEXT for kernel use.
+ * The range of contexts [FIRST_USER_CONTEXT, NUM_USER_CONTEXT)
+ * are stored on a stack/queue for easy allocation and deallocation.
+ */
+ mmu_context_queue.lock = SPIN_LOCK_UNLOCKED;
+ mmu_context_queue.head = 0;
+ mmu_context_queue.tail = NUM_USER_CONTEXT-1;
+ mmu_context_queue.size = NUM_USER_CONTEXT;
+ for(index=0; index < NUM_USER_CONTEXT ;index++) {
+ mmu_context_queue.elements[index] = index+FIRST_USER_CONTEXT;
+ }
+
+ /* Setup guard pages for the Paca's */
+ for (index = 0; index < NR_CPUS; index++) {
+ lpaca = &paca[index];
+ guard_page = ((unsigned long)lpaca) + 0x1000;
+ ppc_md.hpte_updateboltedpp(PP_RXRX, guard_page);
+ }
+
+ ppc_md.progress("MM:exit", 0x211);
+}
+
+
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+void __init do_init_bootmem(void)
+{
+ unsigned long i;
+ unsigned long start, bootmap_pages;
+ unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
+
+ PPCDBG(PPCDBG_MMINIT, "do_init_bootmem: start\n");
+ /*
+ * Find an area to use for the bootmem bitmap. Calculate the size of
+ * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
+ * Add 1 additional page in case the address isn't page-aligned.
+ */
+ bootmap_pages = bootmem_bootmap_pages(total_pages);
+
+ start = (unsigned long)__a2p(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE));
+ if( start == 0 ) {
+ udbg_printf("do_init_bootmem: failed to allocate a bitmap.\n");
+ udbg_printf("\tbootmap_pages = 0x%lx.\n", bootmap_pages);
+ PPCDBG_ENTER_DEBUGGER();
+ }
+
+ PPCDBG(PPCDBG_MMINIT, "\tstart = 0x%lx\n", start);
+ PPCDBG(PPCDBG_MMINIT, "\tbootmap_pages = 0x%lx\n", bootmap_pages);
+ PPCDBG(PPCDBG_MMINIT, "\tphysicalMemorySize = 0x%lx\n", naca->physicalMemorySize);
+
+ boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+ PPCDBG(PPCDBG_MMINIT, "\tboot_mapsize = 0x%lx\n", boot_mapsize);
+
+ /* add all physical memory to the bootmem map */
+ for (i=0; i < lmb.memory.cnt ;i++) {
+ unsigned long physbase, size;
+ unsigned long type = lmb.memory.region[i].type;
+
+ if ( type != LMB_MEMORY_AREA )
+ continue;
+
+ physbase = lmb.memory.region[i].physbase;
+ size = lmb.memory.region[i].size;
+ free_bootmem(physbase, size);
+ }
+ /* reserve the sections we're already using */
+ for (i=0; i < lmb.reserved.cnt ;i++) {
+ unsigned long physbase = lmb.reserved.region[i].physbase;
+ unsigned long size = lmb.reserved.region[i].size;
+#if 0 /* PPPBBB */
+ if ( (physbase == 0) && (size < (16<<20)) ) {
+ size = 16 << 20;
+ }
+#endif
+ reserve_bootmem(physbase, size);
+ }
+
+ PPCDBG(PPCDBG_MMINIT, "do_init_bootmem: end\n");
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES], i;
+
+ /*
+ * All pages are DMA-able so we put them all in the DMA zone.
+ */
+ zones_size[0] = lmb_end_of_DRAM() >> PAGE_SHIFT;
+ for (i = 1; i < MAX_NR_ZONES; i++)
+ zones_size[i] = 0;
+ free_area_init(zones_size);
+}
+
+extern unsigned long prof_shift;
+extern unsigned long prof_len;
+extern unsigned int * prof_buffer;
+extern unsigned long dprof_shift;
+extern unsigned long dprof_len;
+extern unsigned int * dprof_buffer;
+
+void initialize_paca_hardware_interrupt_stack(void);
+
+void __init mem_init(void)
+{
+ extern char *sysmap;
+ extern unsigned long sysmap_size;
+ unsigned long addr;
+ int codepages = 0;
+ int datapages = 0;
+ int initpages = 0;
+ unsigned long va_rtas_base = (unsigned long)__va(rtas.base);
+
+ max_mapnr = max_low_pfn;
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ num_physpages = max_mapnr; /* RAM is assumed contiguous */
+
+ totalram_pages += free_all_bootmem();
+
+ ifppcdebug(PPCDBG_MMINIT) {
+ udbg_printf("mem_init: totalram_pages = 0x%lx\n", totalram_pages);
+ udbg_printf("mem_init: va_rtas_base = 0x%lx\n", va_rtas_base);
+ udbg_printf("mem_init: va_rtas_end = 0x%lx\n", PAGE_ALIGN(va_rtas_base+rtas.size));
+ udbg_printf("mem_init: pinned start = 0x%lx\n", __va(0));
+ udbg_printf("mem_init: pinned end = 0x%lx\n", PAGE_ALIGN(klimit));
+ }
+
+ if ( sysmap_size )
+ for (addr = (unsigned long)sysmap;
+ addr < PAGE_ALIGN((unsigned long)sysmap+sysmap_size) ;
+ addr += PAGE_SIZE)
+ SetPageReserved(mem_map + MAP_NR(addr));
+
+ for (addr = KERNELBASE; addr <= (unsigned long)__va(lmb_end_of_DRAM());
+ addr += PAGE_SIZE) {
+ if (!PageReserved(mem_map + MAP_NR(addr)))
+ continue;
+ if (addr < (ulong) etext)
+ codepages++;
+
+ else if (addr >= (unsigned long)&__init_begin
+ && addr < (unsigned long)&__init_end)
+ initpages++;
+ else if (addr < klimit)
+ datapages++;
+ }
+
+ printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08lx,%08lx]\n",
+ (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
+ codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
+ initpages<< (PAGE_SHIFT-10),
+ PAGE_OFFSET, (unsigned long)__va(lmb_end_of_DRAM()));
+ mem_init_done = 1;
+
+ /* set the last page of each hardware interrupt stack to be protected */
+ initialize_paca_hardware_interrupt_stack();
+
+#ifdef CONFIG_PPC_ISERIES
+ create_virtual_bus_tce_table();
+ /* HACK HACK This allows the iSeries profiling to use /proc/profile */
+ prof_shift = dprof_shift;
+ prof_len = dprof_len;
+ prof_buffer = dprof_buffer;
+#endif
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean. We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+ clear_bit(PG_arch_1, &page->flags);
+}
+
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+ if (page->mapping && !PageReserved(page)
+ && !test_bit(PG_arch_1, &page->flags)) {
+ __flush_dcache_icache(page_address(page));
+ set_bit(PG_arch_1, &page->flags);
+ }
+}
+
+void clear_user_page(void *page, unsigned long vaddr)
+{
+ clear_page(page);
+}
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr)
+{
+ copy_page(vto, vfrom);
+ __flush_dcache_icache(vto);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+ unsigned long addr, int len)
+{
+ unsigned long maddr;
+
+ maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
+ flush_icache_range(maddr, maddr + len);
+}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)