patch-2.4.23 linux-2.4.23/mm/page_alloc.c
Next file: linux-2.4.23/mm/shmem.c
Previous file: linux-2.4.23/mm/oom_kill.c
Back to the patch index
Back to the overall index
- Lines: 302
- Date:
2003-11-28 10:26:21.000000000 -0800
- Orig file:
linux-2.4.22/mm/page_alloc.c
- Orig date:
2002-11-28 15:53:15.000000000 -0800
diff -urN linux-2.4.22/mm/page_alloc.c linux-2.4.23/mm/page_alloc.c
@@ -42,6 +42,9 @@
static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, };
static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
+static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+
+int vm_gfp_debug = 0;
/*
* Temporary debugging check.
@@ -106,7 +109,8 @@
BUG();
if (PageActive(page))
BUG();
- page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
+ ClearPageReferenced(page);
+ ClearPageDirty(page);
if (current->flags & PF_FREE_PAGES)
goto local_freelist;
@@ -253,10 +257,8 @@
static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
{
struct page * page = NULL;
- int __freed = 0;
+ int __freed;
- if (!(gfp_mask & __GFP_WAIT))
- goto out;
if (in_interrupt())
BUG();
@@ -316,33 +318,36 @@
}
current->nr_local_pages = 0;
}
- out:
+
*freed = __freed;
return page;
}
+static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
+{
+ long free = zone->free_pages - (1UL << order);
+ return free >= 0 ? free : 0;
+}
+
/*
* This is the 'heart' of the zoned buddy allocator:
*/
struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
{
- unsigned long min;
zone_t **zone, * classzone;
struct page * page;
- int freed;
+ int freed, class_idx;
zone = zonelist->zones;
classzone = *zone;
- if (classzone == NULL)
- return NULL;
- min = 1UL << order;
+ class_idx = zone_idx(classzone);
+
for (;;) {
zone_t *z = *(zone++);
if (!z)
break;
- min += z->pages_low;
- if (z->free_pages > min) {
+ if (zone_free_pages(z, order) > z->watermarks[class_idx].low) {
page = rmqueue(z, order);
if (page)
return page;
@@ -355,18 +360,16 @@
wake_up_interruptible(&kswapd_wait);
zone = zonelist->zones;
- min = 1UL << order;
for (;;) {
- unsigned long local_min;
+ unsigned long min;
zone_t *z = *(zone++);
if (!z)
break;
- local_min = z->pages_min;
+ min = z->watermarks[class_idx].min;
if (!(gfp_mask & __GFP_WAIT))
- local_min >>= 2;
- min += local_min;
- if (z->free_pages > min) {
+ min >>= 2;
+ if (zone_free_pages(z, order) > min) {
page = rmqueue(z, order);
if (page)
return page;
@@ -375,8 +378,7 @@
/* here we're in the low on memory slow path */
-rebalance:
- if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
+ if (current->flags & PF_MEMALLOC && !in_interrupt()) {
zone = zonelist->zones;
for (;;) {
zone_t *z = *(zone++);
@@ -392,34 +394,51 @@
/* Atomic allocations - we can't balance anything */
if (!(gfp_mask & __GFP_WAIT))
- return NULL;
+ goto out;
+ rebalance:
page = balance_classzone(classzone, gfp_mask, order, &freed);
if (page)
return page;
zone = zonelist->zones;
- min = 1UL << order;
- for (;;) {
- zone_t *z = *(zone++);
- if (!z)
- break;
+ if (likely(freed)) {
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
- min += z->pages_min;
- if (z->free_pages > min) {
- page = rmqueue(z, order);
- if (page)
- return page;
+ if (zone_free_pages(z, order) > z->watermarks[class_idx].min) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
}
- }
+ goto rebalance;
+ } else {
+ /*
+ * Check that no other task is been killed meanwhile,
+ * in such a case we can succeed the allocation.
+ */
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
- /* Don't let big-order allocations loop */
- if (order > 3)
- return NULL;
+ if (zone_free_pages(z, order) > z->watermarks[class_idx].high) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
+ }
+ }
- /* Yield for kswapd, and try again */
- yield();
- goto rebalance;
+ out:
+ printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
+ order, gfp_mask, !!(current->flags & PF_MEMALLOC));
+ if (unlikely(vm_gfp_debug))
+ dump_stack();
+ return NULL;
}
/*
@@ -481,17 +500,22 @@
{
pg_data_t *pgdat;
unsigned int sum = 0;
+ zonelist_t *zonelist;
+ zone_t **zonep, *zone;
for_each_pgdat(pgdat) {
- zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
- zone_t **zonep = zonelist->zones;
- zone_t *zone;
-
- for (zone = *zonep++; zone; zone = *zonep++) {
- unsigned long size = zone->size;
- unsigned long high = zone->pages_high;
- if (size > high)
- sum += size - high;
+ int class_idx;
+ zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
+ zonep = zonelist->zones;
+ zone = *zonep;
+ class_idx = zone_idx(zone);
+
+ sum += zone->nr_cache_pages;
+ for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
+ int free = zone->free_pages - zone->watermarks[class_idx].high;
+ if (free <= 0)
+ continue;
+ sum += free;
}
}
@@ -532,13 +556,9 @@
zone_t *zone;
for (zone = tmpdat->node_zones;
zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
- printk("Zone:%s freepages:%6lukB min:%6lukB low:%6lukB "
- "high:%6lukB\n",
+ printk("Zone:%s freepages:%6lukB\n",
zone->name,
- K(zone->free_pages),
- K(zone->pages_min),
- K(zone->pages_low),
- K(zone->pages_high));
+ K(zone->free_pages));
tmpdat = tmpdat->node_next;
}
@@ -729,6 +749,7 @@
zone_t *zone = pgdat->node_zones + j;
unsigned long mask;
unsigned long size, realsize;
+ int idx;
zone_table[nid * MAX_NR_ZONES + j] = zone;
realsize = size = zones_size[j];
@@ -737,11 +758,15 @@
printk("zone(%lu): %lu pages.\n", j, size);
zone->size = size;
+ zone->realsize = realsize;
zone->name = zone_names[j];
zone->lock = SPIN_LOCK_UNLOCKED;
zone->zone_pgdat = pgdat;
zone->free_pages = 0;
zone->need_balance = 0;
+ zone->nr_active_pages = zone->nr_inactive_pages = 0;
+
+
if (!size)
continue;
@@ -766,9 +791,29 @@
mask = zone_balance_min[j];
else if (mask > zone_balance_max[j])
mask = zone_balance_max[j];
- zone->pages_min = mask;
- zone->pages_low = mask*2;
- zone->pages_high = mask*3;
+ zone->watermarks[j].min = mask;
+ zone->watermarks[j].low = mask*2;
+ zone->watermarks[j].high = mask*3;
+ /* now set the watermarks of the lower zones in the "j" classzone */
+ for (idx = j-1; idx >= 0; idx--) {
+ zone_t * lower_zone = pgdat->node_zones + idx;
+ unsigned long lower_zone_reserve;
+ if (!lower_zone->size)
+ continue;
+
+ mask = lower_zone->watermarks[idx].min;
+ lower_zone->watermarks[j].min = mask;
+ lower_zone->watermarks[j].low = mask*2;
+ lower_zone->watermarks[j].high = mask*3;
+
+ /* now the brainer part */
+ lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx];
+ lower_zone->watermarks[j].min += lower_zone_reserve;
+ lower_zone->watermarks[j].low += lower_zone_reserve;
+ lower_zone->watermarks[j].high += lower_zone_reserve;
+
+ realsize += lower_zone->realsize;
+ }
zone->zone_mem_map = mem_map + offset;
zone->zone_start_mapnr = offset;
@@ -852,3 +897,16 @@
}
__setup("memfrac=", setup_mem_frac);
+
+static int __init setup_lower_zone_reserve(char *str)
+{
+ int j = 0;
+
+ while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2);
+ printk("setup_lower_zone_reserve: ");
+ for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d ", lower_zone_reserve_ratio[j]);
+ printk("\n");
+ return 1;
+}
+
+__setup("lower_zone_reserve=", setup_lower_zone_reserve);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)