patch-2.4.23 linux-2.4.23/mm/vmscan.c
Next file: linux-2.4.23/net/8021q/vlan.c
Previous file: linux-2.4.23/mm/swap.c
Back to the patch index
Back to the overall index
- Lines: 342
- Date:
2003-11-28 10:26:21.000000000 -0800
- Orig file:
linux-2.4.22/mm/vmscan.c
- Orig date:
2002-11-28 15:53:15.000000000 -0800
diff -urN linux-2.4.22/mm/vmscan.c linux-2.4.23/mm/vmscan.c
@@ -27,12 +27,42 @@
#include <asm/pgalloc.h>
/*
- * The "priority" of VM scanning is how much of the queues we
- * will scan in one go. A value of 6 for DEF_PRIORITY implies
- * that we'll scan 1/64th of the queues ("queue_length >> 6")
- * during a normal aging round.
+ * "vm_passes" is the number of vm passes before failing the
+ * memory balancing. Take into account 3 passes are needed
+ * for a flush/wait/free cycle and that we only scan 1/vm_cache_scan_ratio
+ * of the inactive list at each pass.
*/
-#define DEF_PRIORITY (6)
+int vm_passes = 60;
+
+/*
+ * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan
+ * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll
+ * scan 1/6 of the inactive lists during a normal aging round.
+ */
+int vm_cache_scan_ratio = 6;
+
+/*
+ * "vm_mapped_ratio" controls the pageout rate, the smaller, the earlier
+ * we'll start to pageout.
+ */
+int vm_mapped_ratio = 100;
+
+/*
+ * "vm_lru_balance_ratio" controls the balance between active and
+ * inactive cache. The bigger vm_balance is, the easier the
+ * active cache will grow, because we'll rotate the active list
+ * slowly. A value of 2 means we'll go towards a balance of
+ * 1/3 of the cache being inactive.
+ */
+int vm_lru_balance_ratio = 2;
+
+/*
+ * "vm_vfs_scan_ratio" is what proportion of the VFS queues we will scan
+ * in one go. A value of 6 for vm_vfs_scan_ratio implies that 1/6th of
+ * the unused-inode, dentry and dquot caches will be freed during a normal
+ * aging round.
+ */
+int vm_vfs_scan_ratio = 6;
/*
* The swap-out function returns 1 if it successfully
@@ -292,13 +322,13 @@
return count;
}
-static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone));
-static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone)
+static int FASTCALL(swap_out(zone_t * classzone));
+static int swap_out(zone_t * classzone)
{
int counter, nr_pages = SWAP_CLUSTER_MAX;
struct mm_struct *mm;
- counter = mmlist_nr;
+ counter = mmlist_nr << 1;
do {
if (unlikely(current->need_resched)) {
__set_current_state(TASK_RUNNING);
@@ -334,15 +364,15 @@
return 0;
}
-static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority));
-static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority)
+static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone));
+static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout));
+static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout)
{
struct list_head * entry;
- int max_scan = nr_inactive_pages / priority;
- int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10);
+ int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio;
+ int max_mapped = vm_mapped_ratio * nr_pages;
- spin_lock(&pagemap_lru_lock);
- while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) {
+ while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) {
struct page * page;
if (unlikely(current->need_resched)) {
@@ -371,6 +401,8 @@
if (!memclass(page_zone(page), classzone))
continue;
+ max_scan--;
+
/* Racy check to avoid trylocking when not worthwhile */
if (!page->buffers && (page_count(page) != 1 || !page->mapping))
goto page_mapped;
@@ -468,34 +500,49 @@
spin_lock(&pagecache_lock);
/*
- * this is the non-racy check for busy page.
+ * This is the non-racy check for busy page.
+ * It is critical to check PageDirty _after_ we made sure
+ * the page is freeable so not in use by anybody.
+ * At this point we're guaranteed that page->buffers is NULL,
+ * nobody can refill page->buffers under us because we still
+ * hold the page lock.
*/
- if (!page->mapping || !is_page_cache_freeable(page)) {
+ if (!page->mapping || page_count(page) > 1) {
spin_unlock(&pagecache_lock);
UnlockPage(page);
page_mapped:
- if (--max_mapped >= 0)
- continue;
+ if (--max_mapped < 0) {
+ spin_unlock(&pagemap_lru_lock);
- /*
- * Alert! We've found too many mapped pages on the
- * inactive list, so we start swapping out now!
- */
- spin_unlock(&pagemap_lru_lock);
- swap_out(priority, gfp_mask, classzone);
- return nr_pages;
- }
+ nr_pages -= kmem_cache_reap(gfp_mask);
+ if (nr_pages <= 0)
+ goto out;
- /*
- * It is critical to check PageDirty _after_ we made sure
- * the page is freeable* so not in use by anybody.
- */
+ shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask);
+ shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask);
+#ifdef CONFIG_QUOTA
+ shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask);
+#endif
+
+ if (!*failed_swapout)
+ *failed_swapout = !swap_out(classzone);
+
+ max_mapped = nr_pages * vm_mapped_ratio;
+
+ spin_lock(&pagemap_lru_lock);
+ refill_inactive(nr_pages, classzone);
+ }
+ continue;
+
+ }
if (PageDirty(page)) {
spin_unlock(&pagecache_lock);
UnlockPage(page);
continue;
}
+ __lru_cache_del(page);
+
/* point of no return */
if (likely(!PageSwapCache(page))) {
__remove_inode_page(page);
@@ -508,7 +555,6 @@
swap_free(swap);
}
- __lru_cache_del(page);
UnlockPage(page);
/* effectively free the page here */
@@ -520,6 +566,7 @@
}
spin_unlock(&pagemap_lru_lock);
+ out:
return nr_pages;
}
@@ -530,13 +577,15 @@
* We move them the other way when we see the
* reference bit on the page.
*/
-static void refill_inactive(int nr_pages)
+static void refill_inactive(int nr_pages, zone_t * classzone)
{
struct list_head * entry;
+ unsigned long ratio;
+
+ ratio = (unsigned long) nr_pages * classzone->nr_active_pages / (((unsigned long) classzone->nr_inactive_pages * vm_lru_balance_ratio) + 1);
- spin_lock(&pagemap_lru_lock);
entry = active_list.prev;
- while (nr_pages && entry != &active_list) {
+ while (ratio && entry != &active_list) {
struct page * page;
page = list_entry(entry, struct page, lru);
@@ -547,60 +596,68 @@
continue;
}
- nr_pages--;
+ ratio--;
del_page_from_active_list(page);
add_page_to_inactive_list(page);
SetPageReferenced(page);
}
- spin_unlock(&pagemap_lru_lock);
+
+ if (entry != &active_list) {
+ list_del(&active_list);
+ list_add(&active_list, entry);
+ }
}
-static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages));
-static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)
+static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout));
+static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout)
{
- int chunk_size = nr_pages;
- unsigned long ratio;
-
nr_pages -= kmem_cache_reap(gfp_mask);
if (nr_pages <= 0)
- return 0;
+ goto out;
- nr_pages = chunk_size;
- /* try to keep the active list 2/3 of the size of the cache */
- ratio = (unsigned long) nr_pages * nr_active_pages / ((nr_inactive_pages + 1) * 2);
- refill_inactive(ratio);
-
- nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority);
- if (nr_pages <= 0)
- return 0;
+ spin_lock(&pagemap_lru_lock);
+ refill_inactive(nr_pages, classzone);
- shrink_dcache_memory(priority, gfp_mask);
- shrink_icache_memory(priority, gfp_mask);
-#ifdef CONFIG_QUOTA
- shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
-#endif
+ nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, failed_swapout);
- return nr_pages;
+out:
+ return nr_pages;
}
+static int check_classzone_need_balance(zone_t * classzone);
+
int try_to_free_pages_zone(zone_t *classzone, unsigned int gfp_mask)
{
- int priority = DEF_PRIORITY;
- int nr_pages = SWAP_CLUSTER_MAX;
-
gfp_mask = pf_gfp_mask(gfp_mask);
- do {
- nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages);
- if (nr_pages <= 0)
- return 1;
- } while (--priority);
- /*
- * Hmm.. Cache shrink failed - time to kill something?
- * Mhwahahhaha! This is the part I really like. Giggle.
- */
- out_of_memory();
+ for (;;) {
+ int tries = vm_passes;
+ int failed_swapout = !(gfp_mask & __GFP_IO);
+ int nr_pages = SWAP_CLUSTER_MAX;
+
+ do {
+ nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout);
+ if (nr_pages <= 0)
+ return 1;
+ shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask);
+ shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask);
+#ifdef CONFIG_QUOTA
+ shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask);
+#endif
+ if (!failed_swapout)
+ failed_swapout = !swap_out(classzone);
+ } while (--tries);
+
+ if (likely(current->pid != 1))
+ break;
+ if (!check_classzone_need_balance(classzone))
+ break;
+
+ __set_current_state(TASK_RUNNING);
+ yield();
+ }
+
return 0;
}
@@ -627,11 +684,12 @@
static int check_classzone_need_balance(zone_t * classzone)
{
- zone_t * first_classzone;
+ zone_t * first_zone;
+ int class_idx = zone_idx(classzone);
- first_classzone = classzone->zone_pgdat->node_zones;
- while (classzone >= first_classzone) {
- if (classzone->free_pages > classzone->pages_high)
+ first_zone = classzone->zone_pgdat->node_zones;
+ while (classzone >= first_zone) {
+ if (classzone->free_pages > classzone->watermarks[class_idx].high)
return 0;
classzone--;
}
@@ -647,12 +705,12 @@
zone = pgdat->node_zones + i;
if (unlikely(current->need_resched))
schedule();
- if (!zone->need_balance)
+ if (!zone->need_balance || !zone->size)
continue;
if (!try_to_free_pages_zone(zone, GFP_KSWAPD)) {
zone->need_balance = 0;
__set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
+ schedule_timeout(HZ*5);
continue;
}
if (check_classzone_need_balance(zone))
@@ -684,7 +742,7 @@
for (i = pgdat->nr_zones-1; i >= 0; i--) {
zone = pgdat->node_zones + i;
- if (!zone->need_balance)
+ if (!zone->need_balance || !zone->size)
continue;
return 0;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)