The compound page logic is a little fragile - it relies on additional
metadata in the pageframes which some other kernel code likes to stomp on
(xfs was doing this).

Also, because we're treating all higher-order pages as compound pages it is
no longer possible to free individual lower-order pages from the middle of
higher-order pages.  At least one ARM driver insists on doing this.

We only really need the compound page logic for higher-order pages which can
be mapped into user pagetables and placed under direct-io.  This covers
hugetlb pages and, conceivably, soundcard DMA buffers which were allcoated
with a higher-order allocation but which weren't marked PageReserved.

The patch arranges for the hugetlb implications to allocate their pages with
compound page metadata, and all other higher-order allocations go back to the
old way.

(Andrea supplied the GFP_LEVEL_MASK fix)


---

 25-akpm/arch/i386/mm/hugetlbpage.c    |    3 ++-
 25-akpm/arch/ia64/mm/hugetlbpage.c    |    3 ++-
 25-akpm/arch/ppc64/mm/hugetlbpage.c   |    3 ++-
 25-akpm/arch/sh/mm/hugetlbpage.c      |    3 ++-
 25-akpm/arch/sparc64/mm/hugetlbpage.c |    3 ++-
 25-akpm/include/linux/gfp.h           |    6 ++++++
 25-akpm/include/linux/mm.h            |    4 ++--
 25-akpm/include/linux/slab.h          |    4 +---
 25-akpm/mm/page_alloc.c               |   22 +++++++++++-----------
 include/linux/page-flags.h            |    0 
 10 files changed, 30 insertions(+), 21 deletions(-)

diff -puN include/linux/page-flags.h~use-compound-pages-for-hugetlb-only include/linux/page-flags.h
diff -puN mm/page_alloc.c~use-compound-pages-for-hugetlb-only mm/page_alloc.c
--- 25/mm/page_alloc.c~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/mm/page_alloc.c	Thu Apr  8 14:43:06 2004
@@ -130,6 +130,9 @@ static void destroy_compound_page(struct
 	int i;
 	int nr_pages = 1 << order;
 
+	if (!PageCompound(page))
+		return;
+
 	if (page[1].index != order)
 		bad_page(__FUNCTION__, page);
 
@@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page
  * or two.
  */
 
-static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
+static struct page *
+buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
 {
 	unsigned long flags;
 	struct page *page = NULL;
+	int cold = !!(gfp_flags & __GFP_COLD);
 
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
@@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(str
 		BUG_ON(bad_range(zone, page));
 		mod_page_state_zone(zone, pgalloc, 1 << order);
 		prep_new_page(page, order);
-		if (order)
+		if (order && (gfp_flags & __GFP_COMP))
 			prep_compound_page(page, order);
 	}
 	return page;
@@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, uns
 	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
-	int cold;
 	int alloc_type;
 	int do_retry;
 
 	might_sleep_if(wait);
 
-	cold = 0;
-	if (gfp_mask & __GFP_COLD)
-		cold = 1;
-
 	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 	if (zones[0] == NULL)     /* no zones in the zonelist */
 		return NULL;
@@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
@@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
@@ -620,7 +620,7 @@ rebalance:
 		for (i = 0; zones[i] != NULL; i++) {
 			struct zone *z = zones[i];
 
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
@@ -648,7 +648,7 @@ rebalance:
 
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
diff -puN arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/i386/mm/hugetlbpage.c
--- 25/arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/arch/i386/mm/hugetlbpage.c	Thu Apr  8 15:01:26 2004
@@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+				HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ia64/mm/hugetlbpage.c
--- 25/arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/arch/ia64/mm/hugetlbpage.c	Thu Apr  8 15:01:26 2004
@@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ppc64/mm/hugetlbpage.c
--- 25/arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/arch/ppc64/mm/hugetlbpage.c	Thu Apr  8 15:01:26 2004
@@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_pag
 	static int nid = 0;
 	struct page *page;
 
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	if (!page)
 		return NULL;
 
diff -puN arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sh/mm/hugetlbpage.c
--- 25/arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/arch/sh/mm/hugetlbpage.c	Thu Apr  8 15:01:26 2004
@@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sparc64/mm/hugetlbpage.c
--- 25/arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/arch/sparc64/mm/hugetlbpage.c	Thu Apr  8 15:01:34 2004
@@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN include/linux/gfp.h~use-compound-pages-for-hugetlb-only include/linux/gfp.h
--- 25/include/linux/gfp.h~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/include/linux/gfp.h	Thu Apr  8 15:01:40 2004
@@ -32,10 +32,16 @@
 #define __GFP_NOFAIL	0x800	/* Retry for ever.  Cannot fail */
 #define __GFP_NORETRY	0x1000	/* Do not retry.  Might fail */
 #define __GFP_NO_GROW	0x2000	/* Slab internal usage */
+#define __GFP_COMP	0x4000	/* Add compound page metadata */
 
 #define __GFP_BITS_SHIFT 16	/* Room for 16 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
 
+/* if you forget to add the bitmask here kernel will crash, period */
+#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
diff -puN include/linux/mm.h~use-compound-pages-for-hugetlb-only include/linux/mm.h
--- 25/include/linux/mm.h~use-compound-pages-for-hugetlb-only	Thu Apr  8 14:28:32 2004
+++ 25-akpm/include/linux/mm.h	Thu Apr  8 15:01:35 2004
@@ -247,14 +247,14 @@ static inline int page_count(struct page
 
 static inline void get_page(struct page *page)
 {
-	if (PageCompound(page))
+	if (unlikely(PageCompound(page)))
 		page = (struct page *)page->private;
 	atomic_inc(&page->count);
 }
 
 static inline void put_page(struct page *page)
 {
-	if (PageCompound(page)) {
+	if (unlikely(PageCompound(page))) {
 		page = (struct page *)page->private;
 		if (put_page_testzero(page)) {
 			if (page[1].mapping) {	/* destructor? */
diff -puN include/linux/slab.h~use-compound-pages-for-hugetlb-only include/linux/slab.h
--- 25/include/linux/slab.h~use-compound-pages-for-hugetlb-only	Thu Apr  8 15:01:40 2004
+++ 25-akpm/include/linux/slab.h	Thu Apr  8 15:01:40 2004
@@ -25,9 +25,7 @@ typedef struct kmem_cache_s kmem_cache_t
 #define	SLAB_KERNEL		GFP_KERNEL
 #define	SLAB_DMA		GFP_DMA
 
-#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-				__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|\
-				__GFP_NOFAIL|__GFP_NORETRY)
+#define SLAB_LEVEL_MASK		GFP_LEVEL_MASK
 
 #define	SLAB_NO_GROW		__GFP_NO_GROW	/* don't grow a cache */
 

_