patch-2.4.4 linux/arch/ia64/sn/sn1/mm.c
Next file: linux/arch/ia64/sn/sn1/probe.c
Previous file: linux/arch/ia64/sn/sn1/machvec.c
Back to the patch index
Back to the overall index
- Lines: 508
- Date:
Thu Apr 5 12:51:47 2001
- Orig file:
v2.4.3/linux/arch/ia64/sn/sn1/mm.c
- Orig date:
Thu Jan 4 15:25:55 2001
diff -u --recursive --new-file v2.4.3/linux/arch/ia64/sn/sn1/mm.c linux/arch/ia64/sn/sn1/mm.c
@@ -1,7 +1,7 @@
/*
- * Copyright, 2000, Silicon Graphics.
+ * Copyright, 2000-2001, Silicon Graphics.
* Copyright Srinivasa Thirumalachar (sprasad@engr.sgi.com)
- * Copyright 2000 Kanoj Sarcar (kanoj@sgi.com)
+ * Copyright 2000-2001 Kanoj Sarcar (kanoj@sgi.com)
*/
#include <linux/config.h>
@@ -11,32 +11,23 @@
#include <asm/efi.h>
#include <asm/sn/mmzone_sn1.h>
-# define MIN(a,b) ((a) < (b) ? (a) : (b))
-# define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define DONE_NOTHING 0
+#define DONE_FINDING 1
+#define DONE_BUILDING 2
-/*
- * Note that the nodemem[] data structure does not support arbitrary
- * memory types and memory descriptors inside the node. For example,
- * you can not have multiple efi-mem-type segments in the node and
- * expect the OS not to use specific mem-types. Currently, the
- * assumption is that "start" is the start of virtual/physical memory
- * on the node. PROM can reserve some memory _only_ at the beginning.
- * This is tracked via the "usable" field, that maintains where the
- * os can start using memory from on a node (ie end of PROM memory).
- * setup_node_bootmem() is passed the above "usable" value, and is
- * expected to make bootmem calls that ensure lower memory is not used.
- * Note that the bootmem for a node is initialized on the entire node,
- * without regards to any holes - then we reserve the holes in
- * setup_sn1_bootmem(), to make sure the holes are not handed out by
- * alloc_bootmem, as well as the corresponding mem_map entries are not
- * considered allocatable by the page_alloc routines.
- */
struct nodemem_s {
- u64 start ;
- u64 end ;
- u64 hole[SN1_MAX_BANK_PER_NODE] ;
- u64 usable;
-} nodemem[MAXNODES] ;
+ u64 start; /* start of kernel usable memory */
+ u64 end; /* end of kernel usable memory */
+ u64 mtot; /* total kernel usable memory */
+ u64 done; /* state of bootmem initialization */
+ u64 bstart; /* where should the bootmem area be */
+ u64 bsize; /* bootmap size */
+ u64 hole[SN1_MAX_BANK_PER_NODE];
+} nodemem[MAXNODES];
+
static int nodemem_valid = 0;
static int __init
@@ -46,7 +37,7 @@
unsigned long count = 0;
if (start >= end)
- return 0 ;
+ return 0;
/*
* Get the memmap ptrs to the start and end of the holes.
@@ -54,31 +45,33 @@
* Can we do virt_to_page(end), if end is on the next node?
*/
- page = virt_to_page(start-1);
- page++ ;
- pageend = virt_to_page(end) ;
+ page = virt_to_page(start - 1);
+ page++;
+ pageend = virt_to_page(end);
printk("hpage=0x%lx, hpageend=0x%lx\n", (u64)page, (u64)pageend) ;
free_bootmem_node(NODE_DATA(nid), __pa(page), (u64)pageend - (u64)page);
- return count ;
+ return count;
}
-void
+static void __init
free_unused_memmap_node(int nid)
{
- u64 i = 0 ;
- u64 holestart = -1 ;
+ u64 i = 0;
+ u64 holestart = -1;
+ u64 start = nodemem[nid].start;
+ start = ((start >> SN1_NODE_ADDR_SHIFT) << SN1_NODE_ADDR_SHIFT);
do {
- holestart = nodemem[nid].hole[i] ;
- i++ ;
+ holestart = nodemem[nid].hole[i];
+ i++;
while ((i < SN1_MAX_BANK_PER_NODE) &&
- (nodemem[nid].hole[i] == (u64)-1))
- i++ ;
+ (nodemem[nid].hole[i] == (u64)-1))
+ i++;
if (i < SN1_MAX_BANK_PER_NODE)
free_unused_memmap_hole(nid, holestart,
- nodemem[nid].start + (i<<SN1_BANK_ADDR_SHIFT));
+ start + (i<<SN1_BANK_ADDR_SHIFT));
} while (i<SN1_MAX_BANK_PER_NODE);
}
@@ -98,7 +91,6 @@
cnodeid = NASID_TO_CNODEID(nasid);
bankid = GetBankId(__pa(vaddr));
nodemem[cnodeid].start = MIN(nodemem[cnodeid].start, vaddr);
- nodemem[cnodeid].usable = MIN(nodemem[cnodeid].usable, vaddr);
nvaddr = (unsigned long)__va((unsigned long)(++nasid) <<
SN1_NODE_ADDR_SHIFT);
nodemem[cnodeid].end = MAX(nodemem[cnodeid].end, MIN(end, nvaddr));
@@ -118,11 +110,14 @@
pgtbl_size_ok(int nid)
{
unsigned long numpfn, bank0size, nodesize ;
+ unsigned long start = nodemem[nid].start;
+
+ start = ((start >> SN1_NODE_ADDR_SHIFT) << SN1_NODE_ADDR_SHIFT);
- nodesize = nodemem[nid].end - nodemem[nid].start ;
+ nodesize = nodemem[nid].end - start ;
numpfn = nodesize >> PAGE_SHIFT;
- bank0size = nodemem[nid].hole[0] - nodemem[nid].start ;
+ bank0size = nodemem[nid].hole[0] - start ;
/* If nid == master node && no kernel text replication */
bank0size -= 0xA00000 ; /* Kernel text + stuff */
bank0size -= ((numpfn + 7) >> 3);
@@ -163,198 +158,198 @@
#ifdef CONFIG_DISCONTIGMEM
-extern bootmem_data_t bdata[] ;
-static int curnodeid ;
+extern bootmem_data_t bdata[];
+/*
+ * This assumes there will be a hole in kernel-usable memory between nodes
+ * (due to prom). The memory descriptors invoked via efi_memmap_walk are
+ * in increasing order. It tries to identify first suitable free area to
+ * put the bootmem for the node in. When presented with the md holding
+ * the kernel, it only searches at the end of the kernel area.
+ */
static int __init
-setup_node_bootmem(unsigned long start, unsigned long end, unsigned long nodefree)
+find_node_bootmem(unsigned long start, unsigned long end, void *arg)
{
+ int nasid = GetNasId(__pa(start));
+ int cnodeid = NASID_TO_CNODEID(nasid);
+ unsigned long nodesize;
extern char _end;
- int i;
- unsigned long kernelend = PAGE_ALIGN((unsigned long)(&_end));
- unsigned long pkernelend = __pa(kernelend);
- unsigned long bootmap_start, bootmap_size;
- unsigned long pstart, pend;
-
- pstart = __pa(start) ;
- pend = __pa(end) ;
-
- /* If we are past a node mem boundary, on simulated dig numa
- * increment current node id. */
-
- curnodeid = NASID_TO_CNODEID(GetNasId(pstart)) ;
-
- /*
- * Make sure we are being passed page aligned addresses.
- */
- if ((start & (PAGE_SIZE - 1)) || (end & (PAGE_SIZE - 1)))
- panic("setup_node_bootmem:align");
+ unsigned long kaddr = (unsigned long)&_end;
-
- /* For now, just go to the lower CHUNK alignment so that
- * chunktonid of 0-8MB and other lower mem pages get initted. */
-
- pstart &= CHUNKMASK ;
- pend = (pend+CHUNKSZ-1) & CHUNKMASK;
-
- /* If pend == 0, both addrs below 8 MB, special case it
- * FIX: CHUNKNUM(pend-1) broken if pend == 0
- * both addrs within 8MB */
-
- if (pend == 0) {
- chunktonid[0] = 0;
- return 0;
- }
-
- /* Fill up the chunktonid array first. */
-
- for (i = PCHUNKNUM(pstart); i <= PCHUNKNUM(pend-1); i++)
- chunktonid[i] = curnodeid;
-
- /* This check is bogus for now till MAXCHUNKS is properly
- * defined to say if it includes holes or not. */
-
- if ((CHUNKTONID(PCHUNKNUM(pend)) > MAXCHUNKS) ||
- (PCHUNKNUM(pstart) >= PCHUNKNUM(pend))) {
- printk("Ign 0x%lx-0x%lx, ", __pa(start), __pa(end));
+ /*
+ * Track memory available to kernel.
+ */
+ nodemem[cnodeid].mtot += ((end - start) >> PAGE_SHIFT);
+ if (nodemem[cnodeid].done != DONE_NOTHING)
return(0);
- }
+ nodesize = nodemem[cnodeid].end - ((nodemem[cnodeid].start >>
+ SN1_NODE_ADDR_SHIFT) << SN1_NODE_ADDR_SHIFT);
+ nodesize >>= PAGE_SHIFT;
- /* This routine gets called many times in node 0.
- * The first one to reach here would be the one after
- * kernelend to end of first node. */
-
- NODE_DATA(curnodeid)->bdata = &(bdata[curnodeid]);
-
- if (curnodeid == 0) {
- /* for master node, forcibly assign these values
- * This gets called many times on dig but we
- * want these exact values
- * Also on softsdv, the memdesc for 0 is missing */
- NODE_START(curnodeid) = PAGE_OFFSET;
- NODE_SIZE(curnodeid) = (end - PAGE_OFFSET);
- } else {
- /* This gets called only once for non zero nodes
- * If it does not, then NODE_STARt should be
- * LOCAL_BASE(nid) */
+ /*
+ * Adjust limits for the md holding the kernel.
+ */
+ if ((start < kaddr) && (end > kaddr))
+ start = PAGE_ALIGN(kaddr);
- NODE_START(curnodeid) = start;
- NODE_SIZE(curnodeid) = (end - start);
+ /*
+ * We need space for mem_map, bootmem map plus a few more pages
+ * to satisfy alloc_bootmems out of node 0.
+ */
+ if ((end - start) > ((nodesize * sizeof(struct page)) + (nodesize/8)
+ + (10 * PAGE_SIZE))) {
+ nodemem[cnodeid].bstart = start;
+ nodemem[cnodeid].done = DONE_FINDING;
}
+ return(0);
+}
- /* if end < kernelend do not do anything below this */
- if (pend < pkernelend)
- return 0 ;
+/*
+ * This assumes there will be a hole in kernel-usable memory between nodes
+ * (due to prom). The memory descriptors invoked via efi_memmap_walk are
+ * in increasing order.
+ */
+static int __init
+build_node_bootmem(unsigned long start, unsigned long end, void *arg)
+{
+ int nasid = GetNasId(__pa(start));
+ int curnodeid = NASID_TO_CNODEID(nasid);
+ int i;
+ unsigned long pstart, pend;
+ extern char _end, _stext;
+ unsigned long kaddr = (unsigned long)&_end;
- /*
- * Handle the node that contains kernel text/data. It would
- * be nice if the loader loads the kernel at a "chunk", ie
- * not in memory that the kernel will ignore (else free_initmem
- * has to worry about not freeing memory that the kernel ignores).
- * Note that we assume the space from the node start to
- * KERNEL_START can not hold all the bootmem data, but from kernel
- * end to node end can.
- */
-
- /* TBD: This may be bogus in light of the above check. */
-
- if ((pstart < pkernelend) && (pend >= pkernelend)) {
- bootmap_start = pkernelend;
- } else {
- bootmap_start = __pa(start); /* chunk & page aligned */
+ if (nodemem[curnodeid].done == DONE_FINDING) {
+ /*
+ * This is where we come to know the node is present.
+ * Do node wide tasks.
+ */
+ nodemem[curnodeid].done = DONE_BUILDING;
+ NODE_DATA(curnodeid)->bdata = &(bdata[curnodeid]);
+
+ /*
+ * Update the chunktonid array as a node wide task. There
+ * are too many smalls mds on first node to do this per md.
+ */
+ pstart = __pa(nodemem[curnodeid].start);
+ pend = __pa(nodemem[curnodeid].end);
+ pstart &= CHUNKMASK;
+ pend = (pend + CHUNKSZ - 1) & CHUNKMASK;
+ /* Possible check point to enforce minimum node size */
+ if (nodemem[curnodeid].bstart == -1) {
+ printk("No valid bootmem area on node %d\n", curnodeid);
+ while(1);
+ }
+ for (i = PCHUNKNUM(pstart); i <= PCHUNKNUM(pend - 1); i++)
+ chunktonid[i] = curnodeid;
+ if ((CHUNKTONID(PCHUNKNUM(pend)) > MAXCHUNKS) ||
+ (PCHUNKNUM(pstart) >= PCHUNKNUM(pend))) {
+ printk("Ign 0x%lx-0x%lx, ", __pa(start), __pa(end));
+ return(0);
+ }
+
+ /*
+ * NODE_START and NODE_SIZE determine the physical range
+ * on the node that mem_map array needs to be set up for.
+ */
+ NODE_START(curnodeid) = ((nodemem[curnodeid].start >>
+ SN1_NODE_ADDR_SHIFT) << SN1_NODE_ADDR_SHIFT);
+ NODE_SIZE(curnodeid) = (nodemem[curnodeid].end -
+ NODE_START(curnodeid));
+
+ nodemem[curnodeid].bsize =
+ init_bootmem_node(NODE_DATA(curnodeid),
+ (__pa(nodemem[curnodeid].bstart) >> PAGE_SHIFT),
+ (__pa((nodemem[curnodeid].start >> SN1_NODE_ADDR_SHIFT)
+ << SN1_NODE_ADDR_SHIFT) >> PAGE_SHIFT),
+ (__pa(nodemem[curnodeid].end) >> PAGE_SHIFT));
+
+ } else if (nodemem[curnodeid].done == DONE_NOTHING) {
+ printk("build_node_bootmem: node %d weirdness\n", curnodeid);
+ while(1); /* Paranoia */
}
/*
- * Low memory is reserved for PROM use on SN1. The current node
- * memory model is [PROM mem ... kernel ... free], where the
- * first two components are optional on a node.
+ * Free the entire md.
*/
- if (bootmap_start < __pa(nodefree))
- bootmap_start = __pa(nodefree);
-
-/* XXX TBD */
-/* For curnodeid of 0, this gets called many times because of many
- * < 8MB segments. start gets bumped each time. We want to fix it
- * to 0 now.
- */
- if (curnodeid == 0)
- start=PAGE_OFFSET;
-/*
- * This makes sure that in free_area_init_core - paging_init
- * idx is the entire node page range and for loop goes thro
- * all pages. test_bit for kernel pages should remain reserved
- * because free available mem takes care of kernel_start and end
- */
-
- bootmap_size = init_bootmem_node(NODE_DATA(curnodeid),
- (bootmap_start >> PAGE_SHIFT),
- (__pa(start) >> PAGE_SHIFT), (__pa(end) >> PAGE_SHIFT));
+ free_bootmem_node(NODE_DATA(curnodeid), __pa(start), (end - start));
- free_bootmem_node(NODE_DATA(curnodeid), bootmap_start + bootmap_size,
- __pa(end) - (bootmap_start + bootmap_size));
+ /*
+ * Reclaim back the bootmap and kernel areas.
+ */
+ if ((start <= nodemem[curnodeid].bstart) && (end >
+ nodemem[curnodeid].bstart))
+ reserve_bootmem_node(NODE_DATA(curnodeid),
+ __pa(nodemem[curnodeid].bstart), nodemem[curnodeid].bsize);
+ if ((start <= kaddr) && (end > kaddr))
+ reserve_bootmem_node(NODE_DATA(curnodeid),
+ __pa(&_stext), (&_end - &_stext));
return(0);
}
-void
+void __init
setup_sn1_bootmem(int maxnodes)
{
int i;
- for (i=0;i<MAXNODES;i++) {
- nodemem[i].usable = nodemem[i].start = -1 ;
- nodemem[i].end = 0 ;
- memset(&nodemem[i].hole, -1, sizeof(nodemem[i].hole)) ;
+ for (i = 0; i < MAXNODES; i++) {
+ nodemem[i].start = nodemem[i].bstart = -1;
+ nodemem[i].end = nodemem[i].bsize = nodemem[i].mtot = 0;
+ nodemem[i].done = DONE_NOTHING;
+ memset(&nodemem[i].hole, -1, sizeof(nodemem[i].hole));
}
- efi_memmap_walk(build_nodemem_map, 0) ;
+ efi_memmap_walk(build_nodemem_map, 0);
- /*
- * Run thru all the nodes, adjusting their starts. This is needed
- * because efi_memmap_walk() might not process certain mds that
- * are marked reserved for PROM at node low memory.
- */
- for (i = 0; i < maxnodes; i++)
- nodemem[i].start = ((nodemem[i].start >> SN1_NODE_ADDR_SHIFT) <<
- SN1_NODE_ADDR_SHIFT);
- nodemem_valid = 1 ;
+ nodemem_valid = 1;
- /* After building the nodemem map, check if the page table
+ /*
+ * After building the nodemem map, check if the node memmap
* will fit in the first bank of each node. If not change
- * the node end addr till it fits. We dont want to do this
- * in mm/page_alloc.c
+ * the node end addr till it fits.
*/
- for (i=0;i<maxnodes;i++)
- check_pgtbl_size(i) ;
-
- for (i=0;i<maxnodes;i++)
- setup_node_bootmem(nodemem[i].start, nodemem[i].end, nodemem[i].usable);
+ for (i = 0; i < maxnodes; i++)
+ check_pgtbl_size(i);
- /*
- * Mark the holes as reserved, so the corresponding mem_map
- * entries will not be marked allocatable in free_all_bootmem*().
- */
- for (i = 0; i < maxnodes; i++) {
- int j = 0 ;
- u64 holestart = -1 ;
-
- do {
- holestart = nodemem[i].hole[j++];
- while ((j < SN1_MAX_BANK_PER_NODE) &&
- (nodemem[i].hole[j] == (u64)-1))
- j++;
- if (j < SN1_MAX_BANK_PER_NODE)
- reserve_bootmem_node(NODE_DATA(i),
- __pa(holestart), (nodemem[i].start +
- ((long)j << SN1_BANK_ADDR_SHIFT) -
- holestart));
- } while (j < SN1_MAX_BANK_PER_NODE);
- }
+ dump_nodemem_map(maxnodes);
- dump_nodemem_map(maxnodes) ;
+ efi_memmap_walk(find_node_bootmem, 0);
+ efi_memmap_walk(build_node_bootmem, 0);
}
#endif
+void __init
+discontig_paging_init(void)
+{
+ int i;
+ unsigned long max_dma, zones_size[MAX_NR_ZONES], holes_size[MAX_NR_ZONES];
+ extern void dump_node_data(void);
+
+ max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ for (i = 0; i < numnodes; i++) {
+ unsigned long startpfn = __pa((void *)NODE_START(i)) >> PAGE_SHIFT;
+ unsigned long numpfn = NODE_SIZE(i) >> PAGE_SHIFT;
+ memset(zones_size, 0, sizeof(zones_size));
+ memset(holes_size, 0, sizeof(holes_size));
+ holes_size[ZONE_DMA] = numpfn - nodemem[i].mtot;
+
+ if ((startpfn + numpfn) < max_dma) {
+ zones_size[ZONE_DMA] = numpfn;
+ } else if (startpfn > max_dma) {
+ zones_size[ZONE_NORMAL] = numpfn;
+ panic("discontig_paging_init: %d\n", i);
+ } else {
+ zones_size[ZONE_DMA] = (max_dma - startpfn);
+ zones_size[ZONE_NORMAL] = numpfn - zones_size[ZONE_DMA];
+ panic("discontig_paging_init: %d\n", i);
+ }
+ free_area_init_node(i, NODE_DATA(i), NULL, zones_size, startpfn<<PAGE_SHIFT, holes_size);
+ free_unused_memmap_node(i);
+ }
+ dump_node_data();
+}
+
/*
* This used to be invoked from an SN1 specific hack in efi_memmap_walk.
* It tries to ignore banks which the kernel is ignoring because bank 0
@@ -386,10 +381,10 @@
int i,j;
printk("NODEMEM_S info ....\n") ;
- printk("Node start end usable\n");
+ printk("Node start end\n");
for (i=0;i<maxnodes;i++) {
- printk("%d 0x%lx 0x%lx 0x%lx\n",
- i, nodemem[i].start, nodemem[i].end, nodemem[i].usable);
+ printk("%d 0x%lx 0x%lx\n",
+ i, nodemem[i].start, nodemem[i].end);
printk("Holes -> ") ;
for (j=0;j<SN1_MAX_BANK_PER_NODE;j++)
printk("0x%lx ", nodemem[i].hole[j]) ;
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)