The compound page logic is a little fragile - it relies on additional
metadata in the pageframes which some other kernel code likes to stomp on
(xfs was doing this).

Also, because we're treating all higher-order pages as compound pages it is
no longer possible to free individual lower-order pages from the middle of
higher-order pages.  At least one ARM driver insists on doing this.

We only really need the compound page logic for higher-order pages which can
be mapped into user pagetables and placed under direct-io.  This covers
hugetlb pages and, conceivably, soundcard DMA buffers which were allcoated
with a higher-order allocation but which weren't marked PageReserved.

The patch arranges for the hugetlb implications to allocate their pages with
compound page metadata, and all other higher-order allocations go back to the
old way.


---

 25-akpm/arch/i386/mm/hugetlbpage.c    |    3 ++-
 25-akpm/arch/ia64/mm/hugetlbpage.c    |    3 ++-
 25-akpm/arch/ppc64/mm/hugetlbpage.c   |    3 ++-
 25-akpm/arch/sh/mm/hugetlbpage.c      |    3 ++-
 25-akpm/arch/sparc64/mm/hugetlbpage.c |    3 ++-
 25-akpm/include/linux/gfp.h           |    1 +
 25-akpm/include/linux/mm.h            |    4 ++--
 25-akpm/mm/page_alloc.c               |   22 +++++++++++-----------
 include/linux/page-flags.h            |    0 
 9 files changed, 24 insertions(+), 18 deletions(-)

diff -puN include/linux/page-flags.h~use-compound-pages-for-hugetlb-only include/linux/page-flags.h
diff -puN mm/page_alloc.c~use-compound-pages-for-hugetlb-only mm/page_alloc.c
--- 25/mm/page_alloc.c~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:04.860918880 -0700
+++ 25-akpm/mm/page_alloc.c	2004-04-06 18:46:55.820807640 -0700
@@ -130,6 +130,9 @@ static void destroy_compound_page(struct
 	int i;
 	int nr_pages = 1 << order;
 
+	if (!PageCompound(page))
+		return;
+
 	if (page[1].index != order)
 		bad_page(__FUNCTION__, page);
 
@@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page
  * or two.
  */
 
-static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
+static struct page *
+buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
 {
 	unsigned long flags;
 	struct page *page = NULL;
+	int cold = !!(gfp_flags & __GFP_COLD);
 
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
@@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(str
 		BUG_ON(bad_range(zone, page));
 		mod_page_state_zone(zone, pgalloc, 1 << order);
 		prep_new_page(page, order);
-		if (order)
+		if (order && (gfp_flags & __GFP_COMP))
 			prep_compound_page(page, order);
 	}
 	return page;
@@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, uns
 	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
-	int cold;
 	int alloc_type;
 	int do_retry;
 
 	might_sleep_if(wait);
 
-	cold = 0;
-	if (gfp_mask & __GFP_COLD)
-		cold = 1;
-
 	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 	if (zones[0] == NULL)     /* no zones in the zonelist */
 		return NULL;
@@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
@@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, uns
 
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
@@ -620,7 +620,7 @@ rebalance:
 		for (i = 0; zones[i] != NULL; i++) {
 			struct zone *z = zones[i];
 
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
@@ -648,7 +648,7 @@ rebalance:
 
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
-			page = buffered_rmqueue(z, order, cold);
+			page = buffered_rmqueue(z, order, gfp_mask);
 			if (page)
 				goto got_pg;
 		}
diff -puN arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/i386/mm/hugetlbpage.c
--- 25/arch/i386/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:04.877916296 -0700
+++ 25-akpm/arch/i386/mm/hugetlbpage.c	2004-04-06 18:47:19.432218160 -0700
@@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+				HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ia64/mm/hugetlbpage.c
--- 25/arch/ia64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:04.894913712 -0700
+++ 25-akpm/arch/ia64/mm/hugetlbpage.c	2004-04-06 18:47:29.318715184 -0700
@@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/ppc64/mm/hugetlbpage.c
--- 25/arch/ppc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:04.911911128 -0700
+++ 25-akpm/arch/ppc64/mm/hugetlbpage.c	2004-04-06 18:47:39.397183024 -0700
@@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_pag
 	static int nid = 0;
 	struct page *page;
 
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	if (!page)
 		return NULL;
 
diff -puN arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sh/mm/hugetlbpage.c
--- 25/arch/sh/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:04.928908544 -0700
+++ 25-akpm/arch/sh/mm/hugetlbpage.c	2004-04-06 18:47:47.763911088 -0700
@@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only arch/sparc64/mm/hugetlbpage.c
--- 25/arch/sparc64/mm/hugetlbpage.c~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:04.945905960 -0700
+++ 25-akpm/arch/sparc64/mm/hugetlbpage.c	2004-04-06 18:47:57.778388656 -0700
@@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_pag
 {
 	static int nid = 0;
 	struct page *page;
-	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
+					HUGETLB_PAGE_ORDER);
 	nid = (nid + 1) % numnodes;
 	return page;
 }
diff -puN include/linux/gfp.h~use-compound-pages-for-hugetlb-only include/linux/gfp.h
--- 25/include/linux/gfp.h~use-compound-pages-for-hugetlb-only	2004-04-06 18:43:13.707573984 -0700
+++ 25-akpm/include/linux/gfp.h	2004-04-06 18:48:51.463227320 -0700
@@ -32,6 +32,7 @@
 #define __GFP_NOFAIL	0x800	/* Retry for ever.  Cannot fail */
 #define __GFP_NORETRY	0x1000	/* Do not retry.  Might fail */
 #define __GFP_NO_GROW	0x2000	/* Slab internal usage */
+#define __GFP_COMP	0x4000	/* Add compound page metadata */
 
 #define __GFP_BITS_SHIFT 16	/* Room for 16 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
diff -puN include/linux/mm.h~use-compound-pages-for-hugetlb-only include/linux/mm.h
--- 25/include/linux/mm.h~use-compound-pages-for-hugetlb-only	2004-04-06 18:49:06.323968144 -0700
+++ 25-akpm/include/linux/mm.h	2004-04-06 18:49:36.783337616 -0700
@@ -247,14 +247,14 @@ static inline int page_count(struct page
 
 static inline void get_page(struct page *page)
 {
-	if (PageCompound(page))
+	if (unlikely(PageCompound(page)))
 		page = (struct page *)page->private;
 	atomic_inc(&page->count);
 }
 
 static inline void put_page(struct page *page)
 {
-	if (PageCompound(page)) {
+	if (unlikely(PageCompound(page))) {
 		page = (struct page *)page->private;
 		if (put_page_testzero(page)) {
 			if (page[1].mapping) {	/* destructor? */

_