Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/x86_64/mm/init.c      |   65 +++++++++++++++++++++++++++++----------------
 arch/x86_64/mm/numa.c      |   25 +++--------------
 include/asm-x86_64/dma.h   |   11 ++++++-
 include/asm-x86_64/proto.h |    2 +
 include/linux/gfp.h        |    9 ++++++
 include/linux/mmzone.h     |   12 ++++----
 mm/page_alloc.c            |   19 ++++++++++---
 7 files changed, 89 insertions(+), 54 deletions(-)

diff -puN arch/x86_64/mm/init.c~x86_64-dma32 arch/x86_64/mm/init.c
--- devel/arch/x86_64/mm/init.c~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/arch/x86_64/mm/init.c	2005-09-07 20:10:24.000000000 -0700
@@ -318,32 +318,51 @@ void zap_low_mappings(void)
 	flush_tlb_all();
 }
 
+/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+__init void 
+size_zones(unsigned long *z, unsigned long *h, 
+	   unsigned long start_pfn, unsigned long end_pfn)
+{
+ 	int i; 
+ 	unsigned long w;
+ 
+ 	for (i = 0; i < MAX_NR_ZONES; i++)
+ 		z[i] = 0;
+ 
+ 	if (start_pfn < MAX_DMA_PFN)
+ 		z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; 
+ 	if (start_pfn < MAX_DMA32_PFN) { 
+ 		unsigned long dma32_pfn = MAX_DMA32_PFN;
+ 		if (dma32_pfn > end_pfn)
+ 			dma32_pfn = end_pfn; 
+ 		z[ZONE_DMA32] = dma32_pfn - start_pfn;
+ 	} 
+ 	z[ZONE_NORMAL] = end_pfn - start_pfn; 
+ 
+ 	/* Remove lower zones from higher ones. */
+ 	w = 0;
+ 	for (i = 0; i < MAX_NR_ZONES; i++) { 
+ 		if (z[i]) 
+ 			z[i] -= w; 
+ 	        w += z[i]; 
+	}
+	
+	/* Compute holes */
+	w = 0;
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		unsigned long s = w;
+		w += z[i];
+		h[i] = e820_hole_size(s, w);
+	}
+}
+
 #ifndef CONFIG_NUMA
 void __init paging_init(void)
 {
-	{
-		unsigned long zones_size[MAX_NR_ZONES];
-		unsigned long holes[MAX_NR_ZONES];
-		unsigned int max_dma;
-
-		memset(zones_size, 0, sizeof(zones_size));
-		memset(holes, 0, sizeof(holes));
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (end_pfn < max_dma) {
-			zones_size[ZONE_DMA] = end_pfn;
-			holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
-		} else {
-			zones_size[ZONE_DMA] = max_dma;
-			holes[ZONE_DMA] = e820_hole_size(0, max_dma);
-			zones_size[ZONE_NORMAL] = end_pfn - max_dma;
-			holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
-		}
-		free_area_init_node(0, NODE_DATA(0), zones_size,
-                        __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
-	}
-	return;
+	unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+	size_zones(zones, holes, 0, end_pfn);
+	free_area_init_node(0, NODE_DATA(0), zones,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 }
 #endif
 
diff -puN arch/x86_64/mm/numa.c~x86_64-dma32 arch/x86_64/mm/numa.c
--- devel/arch/x86_64/mm/numa.c~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/arch/x86_64/mm/numa.c	2005-09-07 20:10:24.000000000 -0700
@@ -130,29 +130,14 @@ void __init setup_node_zones(int nodeid)
 	unsigned long start_pfn, end_pfn; 
 	unsigned long zones[MAX_NR_ZONES];
 	unsigned long holes[MAX_NR_ZONES];
-	unsigned long dma_end_pfn;
 
-	memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 
-	memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES);
+ 	start_pfn = node_start_pfn(nodeid);
+ 	end_pfn = node_end_pfn(nodeid);
 
-	start_pfn = node_start_pfn(nodeid);
-	end_pfn = node_end_pfn(nodeid);
+	Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", 
+		nodeid, start_pfn, end_pfn);
 
-	Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
-	
-	/* All nodes > 0 have a zero length zone DMA */ 
-	dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 
-	if (start_pfn < dma_end_pfn) { 
-		zones[ZONE_DMA] = dma_end_pfn - start_pfn;
-		holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
-		zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 
-		holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
-
-	} else { 
-		zones[ZONE_NORMAL] = end_pfn - start_pfn; 
-		holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
-	} 
-    
+	size_zones(zones, holes, start_pfn, end_pfn);
 	free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
 			    start_pfn, holes);
 } 
diff -puN include/asm-x86_64/dma.h~x86_64-dma32 include/asm-x86_64/dma.h
--- devel/include/asm-x86_64/dma.h~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/include/asm-x86_64/dma.h	2005-09-07 20:10:24.000000000 -0700
@@ -72,8 +72,15 @@
 
 #define MAX_DMA_CHANNELS	8
 
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
+
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
+
+/* Compat define for old dma zone */
+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
 
 /* 8237 DMA controllers */
 #define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
diff -puN include/asm-x86_64/proto.h~x86_64-dma32 include/asm-x86_64/proto.h
--- devel/include/asm-x86_64/proto.h~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/include/asm-x86_64/proto.h	2005-09-07 20:10:24.000000000 -0700
@@ -23,6 +23,8 @@ extern void mtrr_bp_init(void);
 #define mtrr_bp_init() do {} while (0)
 #endif
 extern void init_memory_mapping(unsigned long start, unsigned long end);
+extern void size_zones(unsigned long *z, unsigned long *h, 
+			unsigned long start_pfn, unsigned long end_pfn);
 
 extern void system_call(void); 
 extern int kernel_syscall(void);
diff -puN include/linux/gfp.h~x86_64-dma32 include/linux/gfp.h
--- devel/include/linux/gfp.h~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/include/linux/gfp.h	2005-09-07 20:10:24.000000000 -0700
@@ -14,6 +14,13 @@ struct vm_area_struct;
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
 #define __GFP_DMA	0x01u
 #define __GFP_HIGHMEM	0x02u
+#ifdef CONFIG_DMA_IS_DMA32
+#define __GFP_DMA32	0x01	/* ZONE_DMA is ZONE_DMA32 */
+#elif BITS_PER_LONG < 64
+#define __GFP_DMA32	0x00	/* ZONE_NORMAL is ZONE_DMA32 */
+#else
+#define __GFP_DMA32	0x04	/* Has own ZONE_DMA32 */
+#endif
 
 /*
  * Action modifiers - doesn't change the zoning
@@ -64,6 +71,8 @@ struct vm_area_struct;
 
 #define GFP_DMA		__GFP_DMA
 
+/* 4GB DMA on some platforms */
+#define GFP_DMA32	__GFP_DMA32
 
 /*
  * There is only one page-allocator function, and two main namespaces to
diff -puN include/linux/mmzone.h~x86_64-dma32 include/linux/mmzone.h
--- devel/include/linux/mmzone.h~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/include/linux/mmzone.h	2005-09-07 20:10:24.000000000 -0700
@@ -70,11 +70,12 @@ struct per_cpu_pageset {
 #endif
 
 #define ZONE_DMA		0
-#define ZONE_NORMAL		1
-#define ZONE_HIGHMEM		2
+#define ZONE_DMA32		1
+#define ZONE_NORMAL		2
+#define ZONE_HIGHMEM		3
 
-#define MAX_NR_ZONES		3	/* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
+#define MAX_NR_ZONES		4	/* Sync this with ZONES_SHIFT */
+#define ZONES_SHIFT		3	/* ceil(log2(MAX_NR_ZONES)) */
 
 
 /*
@@ -90,7 +91,7 @@ struct per_cpu_pageset {
  * be 8 (2 ** 3) zonelists.  GFP_ZONETYPES defines the number of possible
  * combinations of zone modifiers in "zone modifier space".
  */
-#define GFP_ZONEMASK	0x03
+#define GFP_ZONEMASK	0x07
 /*
  * As an optimisation any zone modifier bits which are only valid when
  * no other zone modifier bits are set (loners) should be placed in
@@ -110,6 +111,7 @@ struct per_cpu_pageset {
  * into multiple physical zones. On a PC we have 3 zones:
  *
  * ZONE_DMA	  < 16 MB	ISA DMA capable memory
+ * ZONE_DMA32	     0 MB 	Empty
  * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
  * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
  */
diff -puN mm/page_alloc.c~x86_64-dma32 mm/page_alloc.c
--- devel/mm/page_alloc.c~x86_64-dma32	2005-09-07 20:10:24.000000000 -0700
+++ devel-akpm/mm/page_alloc.c	2005-09-07 20:10:24.000000000 -0700
@@ -58,8 +58,11 @@ long nr_swap_pages;
  *	NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
  *	HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
  *	HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ *
+ * TBD: should special case ZONE_DMA32 machines here - in those we normally
+ * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
 
 EXPORT_SYMBOL(totalram_pages);
 EXPORT_SYMBOL(nr_swap_pages);
@@ -71,7 +74,7 @@ EXPORT_SYMBOL(nr_swap_pages);
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 
 unsigned long __initdata nr_kernel_pages;
@@ -1418,6 +1421,10 @@ static int __init build_zonelists_node(p
 		zone = pgdat->node_zones + ZONE_NORMAL;
 		if (zone->present_pages)
 			zonelist->zones[j++] = zone;
+	case ZONE_DMA32:
+		zone = pgdat->node_zones + ZONE_DMA32;
+		if (zone->present_pages)
+			zonelist->zones[j++] = zone;
 	case ZONE_DMA:
 		zone = pgdat->node_zones + ZONE_DMA;
 		if (zone->present_pages)
@@ -1526,6 +1533,8 @@ static void __init build_zonelists(pg_da
 			k = ZONE_NORMAL;
 			if (i & __GFP_HIGHMEM)
 				k = ZONE_HIGHMEM;
+			if (i & __GFP_DMA32)
+				k = ZONE_DMA32;
 			if (i & __GFP_DMA)
 				k = ZONE_DMA;
 
@@ -1550,7 +1559,9 @@ static void __init build_zonelists(pg_da
 		j = 0;
 		k = ZONE_NORMAL;
 		if (i & __GFP_HIGHMEM)
-			k = ZONE_HIGHMEM;
+			k = ZONE_HIGHMEM;		
+		if (i & __GFP_DMA32)
+			k = ZONE_DMA32;
 		if (i & __GFP_DMA)
 			k = ZONE_DMA;
 
@@ -1895,7 +1906,7 @@ static void __init free_area_init_core(s
 		if (zholes_size)
 			realsize -= zholes_size[j];
 
-		if (j == ZONE_DMA || j == ZONE_NORMAL)
+		if (j < ZONE_HIGHMEM) 
 			nr_kernel_pages += realsize;
 		nr_all_pages += realsize;
 
_