patch-2.4.1 linux/mm/vmscan.c
Next file: linux/net/appletalk/aarp.c
Previous file: linux/mm/vmalloc.c
Back to the patch index
Back to the overall index
- Lines: 625
- Date:
Mon Jan 15 12:36:49 2001
- Orig file:
v2.4.0/linux/mm/vmscan.c
- Orig date:
Wed Jan 3 20:45:26 2001
diff -u --recursive --new-file v2.4.0/linux/mm/vmscan.c linux/mm/vmscan.c
@@ -35,45 +35,21 @@
* using a process that no longer actually exists (it might
* have died while we slept).
*/
-static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
+static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
{
pte_t pte;
swp_entry_t entry;
- struct page * page;
- int onlist;
-
- pte = *page_table;
- if (!pte_present(pte))
- goto out_failed;
- page = pte_page(pte);
- if ((!VALID_PAGE(page)) || PageReserved(page))
- goto out_failed;
-
- if (!mm->swap_cnt)
- return 1;
-
- mm->swap_cnt--;
- onlist = PageActive(page);
/* Don't look at this pte if it's been accessed recently. */
if (ptep_test_and_clear_young(page_table)) {
- age_page_up(page);
- goto out_failed;
+ page->age += PAGE_AGE_ADV;
+ if (page->age > PAGE_AGE_MAX)
+ page->age = PAGE_AGE_MAX;
+ return;
}
- if (!onlist)
- /* The page is still mapped, so it can't be freeable... */
- age_page_down_ageonly(page);
-
- /*
- * If the page is in active use by us, or if the page
- * is in active use by others, don't unmap it or
- * (worse) start unneeded IO.
- */
- if (page->age > 0)
- goto out_failed;
if (TryLockPage(page))
- goto out_failed;
+ return;
/* From this point on, the odds are that we're going to
* nuke this pte, so read and clear the pte. This hook
@@ -87,9 +63,6 @@
* Is the page already in the swap cache? If so, then
* we can just drop our reference to it without doing
* any IO - it's already up-to-date on disk.
- *
- * Return 0, as we didn't actually free any real
- * memory, and we should just continue our scan.
*/
if (PageSwapCache(page)) {
entry.val = page->index;
@@ -99,12 +72,12 @@
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
- UnlockPage(page);
mm->rss--;
- deactivate_page(page);
+ if (!page->age)
+ deactivate_page(page);
+ UnlockPage(page);
page_cache_release(page);
-out_failed:
- return 0;
+ return;
}
/*
@@ -153,34 +126,20 @@
out_unlock_restore:
set_pte(page_table, pte);
UnlockPage(page);
- return 0;
+ return;
}
-/*
- * A new implementation of swap_out(). We do not swap complete processes,
- * but only a small number of blocks, before we continue with the next
- * process. The number of blocks actually swapped is determined on the
- * number of page faults, that this process actually had in the last time,
- * so we won't swap heavily used processes all the time ...
- *
- * Note: the priority argument is a hint on much CPU to waste with the
- * swap block search, not a hint, of how much blocks to swap with
- * each process.
- *
- * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
- */
-
-static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
{
pte_t * pte;
unsigned long pmd_end;
if (pmd_none(*dir))
- return 0;
+ return count;
if (pmd_bad(*dir)) {
pmd_ERROR(*dir);
pmd_clear(dir);
- return 0;
+ return count;
}
pte = pte_offset(dir, address);
@@ -190,28 +149,33 @@
end = pmd_end;
do {
- int result;
- mm->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(mm, vma, address, pte, gfp_mask);
- if (result)
- return result;
+ if (pte_present(*pte)) {
+ struct page *page = pte_page(*pte);
+
+ if (VALID_PAGE(page) && !PageReserved(page)) {
+ try_to_swap_out(mm, vma, address, pte, page);
+ if (!--count)
+ break;
+ }
+ }
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
- return 0;
+ mm->swap_address = address + PAGE_SIZE;
+ return count;
}
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
{
pmd_t * pmd;
unsigned long pgd_end;
if (pgd_none(*dir))
- return 0;
+ return count;
if (pgd_bad(*dir)) {
pgd_ERROR(*dir);
pgd_clear(dir);
- return 0;
+ return count;
}
pmd = pmd_offset(dir, address);
@@ -221,23 +185,23 @@
end = pgd_end;
do {
- int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask);
- if (result)
- return result;
+ count = swap_out_pmd(mm, vma, pmd, address, end, count);
+ if (!count)
+ break;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
- return 0;
+ return count;
}
-static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask)
+static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
{
pgd_t *pgdir;
unsigned long end;
/* Don't swap out areas which are locked down */
if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
- return 0;
+ return count;
pgdir = pgd_offset(mm, address);
@@ -245,18 +209,17 @@
if (address >= end)
BUG();
do {
- int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask);
- if (result)
- return result;
+ count = swap_out_pgd(mm, vma, pgdir, address, end, count);
+ if (!count)
+ break;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
} while (address && (address < end));
- return 0;
+ return count;
}
-static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
+static int swap_out_mm(struct mm_struct * mm, int count)
{
- int result = 0;
unsigned long address;
struct vm_area_struct* vma;
@@ -276,8 +239,8 @@
address = vma->vm_start;
for (;;) {
- result = swap_out_vma(mm, vma, address, gfp_mask);
- if (result)
+ count = swap_out_vma(mm, vma, address, count);
+ if (!count)
goto out_unlock;
vma = vma->vm_next;
if (!vma)
@@ -287,94 +250,63 @@
}
/* Reset to 0 when we reach the end of address space */
mm->swap_address = 0;
- mm->swap_cnt = 0;
out_unlock:
spin_unlock(&mm->page_table_lock);
- return result;
+ return !count;
}
/*
- * Select the task with maximal swap_cnt and try to swap out a page.
* N.B. This function returns only 0 or 1. Return values != 1 from
* the lower level routines result in continued processing.
*/
#define SWAP_SHIFT 5
#define SWAP_MIN 8
+static inline int swap_amount(struct mm_struct *mm)
+{
+ int nr = mm->rss >> SWAP_SHIFT;
+ return nr < SWAP_MIN ? SWAP_MIN : nr;
+}
+
static int swap_out(unsigned int priority, int gfp_mask)
{
int counter;
- int __ret = 0;
+ int retval = 0;
+ struct mm_struct *mm = current->mm;
- /*
- * We make one or two passes through the task list, indexed by
- * assign = {0, 1}:
- * Pass 1: select the swappable task with maximal RSS that has
- * not yet been swapped out.
- * Pass 2: re-assign rss swap_cnt values, then select as above.
- *
- * With this approach, there's no need to remember the last task
- * swapped out. If the swap-out fails, we clear swap_cnt so the
- * task won't be selected again until all others have been tried.
- *
- * Think of swap_cnt as a "shadow rss" - it tells us which process
- * we want to page out (always try largest first).
- */
- counter = (nr_threads << SWAP_SHIFT) >> priority;
- if (counter < 1)
- counter = 1;
+ /* Always start by trying to penalize the process that is allocating memory */
+ if (mm)
+ retval = swap_out_mm(mm, swap_amount(mm));
- for (; counter >= 0; counter--) {
+ /* Then, look at the other mm's */
+ counter = mmlist_nr >> priority;
+ do {
struct list_head *p;
- unsigned long max_cnt = 0;
- struct mm_struct *best = NULL;
- int assign = 0;
- int found_task = 0;
- select:
+
spin_lock(&mmlist_lock);
p = init_mm.mmlist.next;
- for (; p != &init_mm.mmlist; p = p->next) {
- struct mm_struct *mm = list_entry(p, struct mm_struct, mmlist);
- if (mm->rss <= 0)
- continue;
- found_task++;
- /* Refresh swap_cnt? */
- if (assign == 1) {
- mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
- if (mm->swap_cnt < SWAP_MIN)
- mm->swap_cnt = SWAP_MIN;
- }
- if (mm->swap_cnt > max_cnt) {
- max_cnt = mm->swap_cnt;
- best = mm;
- }
- }
+ if (p == &init_mm.mmlist)
+ goto empty;
+
+ /* Move it to the back of the queue.. */
+ list_del(p);
+ list_add_tail(p, &init_mm.mmlist);
+ mm = list_entry(p, struct mm_struct, mmlist);
- /* Make sure it doesn't disappear */
- if (best)
- atomic_inc(&best->mm_users);
+ /* Make sure the mm doesn't disappear when we drop the lock.. */
+ atomic_inc(&mm->mm_users);
spin_unlock(&mmlist_lock);
- /*
- * We have dropped the tasklist_lock, but we
- * know that "mm" still exists: we are running
- * with the big kernel lock, and exit_mm()
- * cannot race with us.
- */
- if (!best) {
- if (!assign && found_task > 0) {
- assign = 1;
- goto select;
- }
- break;
- } else {
- __ret = swap_out_mm(best, gfp_mask);
- mmput(best);
- break;
- }
- }
- return __ret;
+ /* Walk about 6% of the address space each time */
+ retval |= swap_out_mm(mm, swap_amount(mm));
+ mmput(mm);
+ } while (--counter >= 0);
+ return retval;
+
+empty:
+ spin_unlock(&mmlist_lock);
+ return 0;
}
@@ -540,7 +472,6 @@
*/
if (PageDirty(page)) {
int (*writepage)(struct page *) = page->mapping->a_ops->writepage;
- int result;
if (!writepage)
goto page_active;
@@ -558,16 +489,12 @@
page_cache_get(page);
spin_unlock(&pagemap_lru_lock);
- result = writepage(page);
+ writepage(page);
page_cache_release(page);
/* And re-start the thing.. */
spin_lock(&pagemap_lru_lock);
- if (result != 1)
- continue;
- /* writepage refused to do anything */
- set_page_dirty(page);
- goto page_active;
+ continue;
}
/*
@@ -808,6 +735,9 @@
int inactive_shortage(void)
{
int shortage = 0;
+ pg_data_t *pgdat = pgdat_list;
+
+ /* Is the inactive dirty list too small? */
shortage += freepages.high;
shortage += inactive_target;
@@ -818,7 +748,27 @@
if (shortage > 0)
return shortage;
- return 0;
+ /* If not, do we have enough per-zone pages on the inactive list? */
+
+ shortage = 0;
+
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ int zone_shortage;
+ zone_t *zone = pgdat->node_zones+ i;
+
+ zone_shortage = zone->pages_high;
+ zone_shortage -= zone->inactive_dirty_pages;
+ zone_shortage -= zone->inactive_clean_pages;
+ zone_shortage -= zone->free_pages;
+ if (zone_shortage > 0)
+ shortage += zone_shortage;
+ }
+ pgdat = pgdat->node_next;
+ } while (pgdat);
+
+ return shortage;
}
/*
@@ -833,72 +783,35 @@
* really care about latency. In that case we don't try
* to free too many pages.
*/
+#define DEF_PRIORITY (6)
static int refill_inactive(unsigned int gfp_mask, int user)
{
- int priority, count, start_count, made_progress;
+ int count, start_count, maxtry;
count = inactive_shortage() + free_shortage();
if (user)
count = (1 << page_cluster);
start_count = count;
- /* Always trim SLAB caches when memory gets low. */
- kmem_cache_reap(gfp_mask);
-
- priority = 6;
+ maxtry = 6;
do {
- made_progress = 0;
-
if (current->need_resched) {
__set_current_state(TASK_RUNNING);
schedule();
}
- while (refill_inactive_scan(priority, 1)) {
- made_progress = 1;
+ while (refill_inactive_scan(DEF_PRIORITY, 1)) {
if (--count <= 0)
goto done;
}
- /*
- * don't be too light against the d/i cache since
- * refill_inactive() almost never fail when there's
- * really plenty of memory free.
- */
- shrink_dcache_memory(priority, gfp_mask);
- shrink_icache_memory(priority, gfp_mask);
+ /* If refill_inactive_scan failed, try to page stuff out.. */
+ swap_out(DEF_PRIORITY, gfp_mask);
- /*
- * Then, try to page stuff out..
- */
- while (swap_out(priority, gfp_mask)) {
- made_progress = 1;
- if (--count <= 0)
- goto done;
- }
-
- /*
- * If we either have enough free memory, or if
- * page_launder() will be able to make enough
- * free memory, then stop.
- */
- if (!inactive_shortage() || !free_shortage())
- goto done;
-
- /*
- * Only switch to a lower "priority" if we
- * didn't make any useful progress in the
- * last loop.
- */
- if (!made_progress)
- priority--;
- } while (priority >= 0);
-
- /* Always end on a refill_inactive.., may sleep... */
- while (refill_inactive_scan(0, 1)) {
- if (--count <= 0)
- goto done;
- }
+ if (--maxtry <= 0)
+ return 0;
+
+ } while (inactive_shortage());
done:
return (count < start_count);
@@ -922,20 +835,29 @@
/*
* If needed, we move pages from the active list
- * to the inactive list. We also "eat" pages from
- * the inode and dentry cache whenever we do this.
+ * to the inactive list.
*/
- if (free_shortage() || inactive_shortage()) {
- shrink_dcache_memory(6, gfp_mask);
- shrink_icache_memory(6, gfp_mask);
+ if (inactive_shortage())
ret += refill_inactive(gfp_mask, user);
+
+ /*
+ * Delete pages from the inode and dentry caches and
+ * reclaim unused slab cache if memory is low.
+ */
+ if (free_shortage()) {
+ shrink_dcache_memory(DEF_PRIORITY, gfp_mask);
+ shrink_icache_memory(DEF_PRIORITY, gfp_mask);
} else {
/*
- * Reclaim unused slab cache memory.
+ * Illogical, but true. At least for now.
+ *
+ * If we're _not_ under shortage any more, we
+ * reap the caches. Why? Because a noticeable
+ * part of the caches are the buffer-heads,
+ * which we'll want to keep if under shortage.
*/
kmem_cache_reap(gfp_mask);
- ret = 1;
- }
+ }
return ret;
}
@@ -988,13 +910,8 @@
static int recalc = 0;
/* If needed, try to free some memory. */
- if (inactive_shortage() || free_shortage()) {
- int wait = 0;
- /* Do we need to do some synchronous flushing? */
- if (waitqueue_active(&kswapd_done))
- wait = 1;
- do_try_to_free_pages(GFP_KSWAPD, wait);
- }
+ if (inactive_shortage() || free_shortage())
+ do_try_to_free_pages(GFP_KSWAPD, 0);
/*
* Do some (very minimal) background scanning. This
@@ -1002,7 +919,7 @@
* every minute. This clears old referenced bits
* and moves unused pages to the inactive list.
*/
- refill_inactive_scan(6, 0);
+ refill_inactive_scan(DEF_PRIORITY, 0);
/* Once a second, recalculate some VM stats. */
if (time_after(jiffies, recalc + HZ)) {
@@ -1010,11 +927,6 @@
recalculate_vm_stats();
}
- /*
- * Wake up everybody waiting for free memory
- * and unplug the disk queue.
- */
- wake_up_all(&kswapd_done);
run_task_queue(&tq_disk);
/*
@@ -1045,33 +957,10 @@
}
}
-void wakeup_kswapd(int block)
+void wakeup_kswapd(void)
{
- DECLARE_WAITQUEUE(wait, current);
-
- if (current == kswapd_task)
- return;
-
- if (!block) {
- if (waitqueue_active(&kswapd_wait))
- wake_up(&kswapd_wait);
- return;
- }
-
- /*
- * Kswapd could wake us up before we get a chance
- * to sleep, so we have to be very careful here to
- * prevent SMP races...
- */
- __set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&kswapd_done, &wait);
-
- if (waitqueue_active(&kswapd_wait))
- wake_up(&kswapd_wait);
- schedule();
-
- remove_wait_queue(&kswapd_done, &wait);
- __set_current_state(TASK_RUNNING);
+ if (current != kswapd_task)
+ wake_up_process(kswapd_task);
}
/*
@@ -1096,7 +985,7 @@
/*
* Kreclaimd will move pages from the inactive_clean list to the
* free list, in order to keep atomic allocations possible under
- * all circumstances. Even when kswapd is blocked on IO.
+ * all circumstances.
*/
int kreclaimd(void *unused)
{
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)