patch-2.4.10 linux/mm/swapfile.c
Next file: linux/mm/vmalloc.c
Previous file: linux/mm/swap_state.c
Back to the patch index
Back to the overall index
- Lines: 1005
- Date:
Sat Sep 22 20:37:35 2001
- Orig file:
v2.4.9/linux/mm/swapfile.c
- Orig date:
Sun Aug 12 13:28:01 2001
diff -u --recursive --new-file v2.4.9/linux/mm/swapfile.c linux/mm/swapfile.c
@@ -14,39 +14,27 @@
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/shm.h>
+#include <linux/compiler.h>
#include <asm/pgtable.h>
spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
unsigned int nr_swapfiles;
int total_swap_pages;
+static int swap_overflow;
+
+static const char Bad_file[] = "Bad swap file entry ";
+static const char Unused_file[] = "Unused swap file entry ";
+static const char Bad_offset[] = "Bad swap offset entry ";
+static const char Unused_offset[] = "Unused swap offset entry ";
struct swap_list_t swap_list = {-1, -1};
struct swap_info_struct swap_info[MAX_SWAPFILES];
-/*
- * When swap space gets filled up, we will set this flag.
- * This will make do_swap_page(), in the page fault path,
- * free swap entries on swapin so we'll reclaim swap space
- * in order to be able to swap something out.
- *
- * At the moment we start reclaiming when swap usage goes
- * over 80% of swap space.
- *
- * XXX: Random numbers, fixme.
- */
-#define SWAP_FULL_PCT 80
-int vm_swap_full (void)
-{
- int swap_used = total_swap_pages - nr_swap_pages;
-
- return swap_used * 100 > total_swap_pages * SWAP_FULL_PCT;
-}
-
#define SWAPFILE_CLUSTER 256
-static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
+static inline int scan_swap_map(struct swap_info_struct *si)
{
unsigned long offset;
/*
@@ -89,20 +77,33 @@
for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
if (si->swap_map[offset])
continue;
+ si->lowest_bit = offset+1;
got_page:
if (offset == si->lowest_bit)
si->lowest_bit++;
if (offset == si->highest_bit)
si->highest_bit--;
- si->swap_map[offset] = count;
+ if (si->lowest_bit > si->highest_bit) {
+ si->lowest_bit = si->max;
+ si->highest_bit = 0;
+ }
+ /* Initial count 1 for user reference + 1 for swap cache */
+ si->swap_map[offset] = 2;
nr_swap_pages--;
si->cluster_next = offset+1;
return offset;
}
+ si->lowest_bit = si->max;
+ si->highest_bit = 0;
return 0;
}
-swp_entry_t __get_swap_page(unsigned short count)
+/*
+ * Callers of get_swap_page must hold swap_list_lock across the call,
+ * and across the following add_to_swap_cache, to guard against races
+ * with read_swap_cache_async.
+ */
+swp_entry_t get_swap_page(void)
{
struct swap_info_struct * p;
unsigned long offset;
@@ -110,20 +111,17 @@
int type, wrapped = 0;
entry.val = 0; /* Out of memory */
- if (count >= SWAP_MAP_MAX)
- goto bad_count;
- swap_list_lock();
type = swap_list.next;
if (type < 0)
goto out;
- if (nr_swap_pages == 0)
+ if (nr_swap_pages <= 0)
goto out;
while (1) {
p = &swap_info[type];
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
swap_device_lock(p);
- offset = scan_swap_map(p, count);
+ offset = scan_swap_map(p);
swap_device_unlock(p);
if (offset) {
entry = SWP_ENTRY(type,offset);
@@ -148,21 +146,14 @@
goto out; /* out of swap space */
}
out:
- swap_list_unlock();
- return entry;
-
-bad_count:
- printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
- count, __builtin_return_address(0));
return entry;
}
-
/*
* Caller has made sure that the swapdevice corresponding to entry
* is still around or has not been recycled.
*/
-void __swap_free(swp_entry_t entry, unsigned short count)
+void swap_free(swp_entry_t entry)
{
struct swap_info_struct * p;
unsigned long offset, type;
@@ -186,9 +177,7 @@
swap_list.next = type;
swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX) {
- if (p->swap_map[offset] < count)
- goto bad_count;
- if (!(p->swap_map[offset] -= count)) {
+ if (!--(p->swap_map[offset])) {
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
@@ -202,21 +191,16 @@
return;
bad_nofile:
- printk("swap_free: Trying to free nonexistent swap-page\n");
+ printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
goto out;
bad_device:
- printk("swap_free: Trying to free swap from unused swap-device\n");
+ printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
goto out;
bad_offset:
- printk("swap_free: offset exceeds max\n");
+ printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
goto out;
bad_free:
- printk("VM: Bad swap entry %08lx\n", entry.val);
- goto out;
-bad_count:
- swap_device_unlock(p);
- swap_list_unlock();
- printk(KERN_ERR "VM: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
+ printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
goto out;
}
@@ -229,33 +213,23 @@
* share this swap entry, so be cautious and let do_wp_page work out
* what to do if a write is requested later.
*/
-/* tasklist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
pte_t *dir, swp_entry_t entry, struct page* page)
{
pte_t pte = *dir;
- if (pte_none(pte))
- return;
- if (pte_present(pte)) {
- /* If this entry is swap-cached, then page must already
- hold the right address for any copies in physical
- memory */
- if (pte_page(pte) != page)
- return;
- /* We will be removing the swap cache in a moment, so... */
- ptep_mkdirty(dir);
+ if (likely(pte_to_swp_entry(pte).val != entry.val))
return;
- }
- if (pte_to_swp_entry(pte).val != entry.val)
+ if (unlikely(pte_none(pte) || pte_present(pte)))
return;
- set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
- swap_free(entry);
get_page(page);
+ set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+ swap_free(entry);
++vma->vm_mm->rss;
}
-/* tasklist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset,
swp_entry_t entry, struct page* page)
@@ -283,7 +257,7 @@
} while (address && (address < end));
}
-/* tasklist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size,
swp_entry_t entry, struct page* page)
@@ -314,7 +288,7 @@
} while (address && (address < end));
}
-/* tasklist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
swp_entry_t entry, struct page* page)
{
@@ -337,8 +311,6 @@
/*
* Go through process' page directory.
*/
- if (!mm)
- return;
spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start);
@@ -349,53 +321,42 @@
}
/*
- * this is called when we find a page in the swap list
- * all the locks have been dropped at this point which
- * isn't a problem because we rescan the swap map
- * and we _don't_ clear the refrence count if for
- * some reason it isn't 0
+ * Scan swap_map from current position to next entry still in use.
+ * Recycle to start on reaching the end, returning 0 when empty.
*/
-
-static inline int free_found_swap_entry(unsigned int type, int i)
+static int find_next_to_unuse(struct swap_info_struct *si, int prev)
{
- struct task_struct *p;
- struct page *page;
- swp_entry_t entry;
-
- entry = SWP_ENTRY(type, i);
+ int max = si->max;
+ int i = prev;
+ int count;
- /*
- * Get a page for the entry, using the existing swap
- * cache page if there is one. Otherwise, get a clean
- * page and read the swap into it.
- */
- page = read_swap_cache_async(entry);
- if (!page) {
- swap_free(entry);
- return -ENOMEM;
- }
- lock_page(page);
- if (PageSwapCache(page))
- delete_from_swap_cache_nolock(page);
- UnlockPage(page);
- read_lock(&tasklist_lock);
- for_each_task(p)
- unuse_process(p->mm, entry, page);
- read_unlock(&tasklist_lock);
- shmem_unuse(entry, page);
- /*
- * Now get rid of the extra reference to the temporary
- * page we've been using.
- */
- page_cache_release(page);
/*
- * Check for and clear any overflowed swap map counts.
+ * No need for swap_device_lock(si) here: we're just looking
+ * for whether an entry is in use, not modifying it; false
+ * hits are okay, and sys_swapoff() has already prevented new
+ * allocations from this area (while holding swap_list_lock()).
*/
- swap_free(entry);
- return 0;
+ for (;;) {
+ if (++i >= max) {
+ if (!prev) {
+ i = 0;
+ break;
+ }
+ /*
+ * No entries in use at top of swap_map,
+ * loop back to start and recheck there.
+ */
+ max = prev + 1;
+ prev = 0;
+ i = 1;
+ }
+ count = si->swap_map[i];
+ if (count && count != SWAP_MAP_BAD)
+ break;
+ }
+ return i;
}
-
/*
* We completely avoid races by reading each swap page in advance,
* and then search for the process using it. All the necessary
@@ -404,85 +365,181 @@
static int try_to_unuse(unsigned int type)
{
struct swap_info_struct * si = &swap_info[type];
- int ret, foundpage;
+ struct mm_struct *start_mm;
+ unsigned short *swap_map;
+ unsigned short swcount;
+ struct page *page;
+ swp_entry_t entry;
+ int i = 0;
+ int retval = 0;
+ int reset_overflow = 0;
- do {
- int i;
+ /*
+ * When searching mms for an entry, a good strategy is to
+ * start at the first mm we freed the previous entry from
+ * (though actually we don't notice whether we or coincidence
+ * freed the entry). Initialize this start_mm with a hold.
+ *
+ * A simpler strategy would be to start at the last mm we
+ * freed the previous entry from; but that would take less
+ * advantage of mmlist ordering (now preserved by swap_out()),
+ * which clusters forked address spaces together, most recent
+ * child immediately after parent. If we race with dup_mmap(),
+ * we very much want to resolve parent before child, otherwise
+ * we may miss some entries: using last mm would invert that.
+ */
+ start_mm = &init_mm;
+ atomic_inc(&init_mm.mm_users);
- /*
- * The algorithm is inefficient but seldomly used
- *
- * Find a swap page in use and read it in.
+ /*
+ * Keep on scanning until all entries have gone. Usually,
+ * one pass through swap_map is enough, but not necessarily:
+ * mmput() removes mm from mmlist before exit_mmap() and its
+ * zap_page_range(). That's not too bad, those entries are
+ * on their way out, and handled faster there than here.
+ * do_munmap() behaves similarly, taking the range out of mm's
+ * vma list before zap_page_range(). But unfortunately, when
+ * unmapping a part of a vma, it takes the whole out first,
+ * then reinserts what's left after (might even reschedule if
+ * open() method called) - so swap entries may be invisible
+ * to swapoff for a while, then reappear - but that is rare.
+ */
+ while ((i = find_next_to_unuse(si, i))) {
+ /*
+ * Get a page for the entry, using the existing swap
+ * cache page if there is one. Otherwise, get a clean
+ * page and read the swap into it.
*/
- foundpage = 0;
- swap_device_lock(si);
- for (i = 1; i < si->max ; i++) {
- int count = si->swap_map[i];
- if (!count || count == SWAP_MAP_BAD)
- continue;
-
+ swap_map = &si->swap_map[i];
+ entry = SWP_ENTRY(type, i);
+ page = read_swap_cache_async(entry);
+ if (!page) {
/*
- * Prevent swaphandle from being completely
- * unused by swap_free while we are trying
- * to read in the page - this prevents warning
- * messages from rw_swap_page_base.
+ * Either swap_duplicate() failed because entry
+ * has been freed independently, and will not be
+ * reused since sys_swapoff() already disabled
+ * allocation from here, or alloc_page() failed.
*/
- foundpage = 1;
- if (count != SWAP_MAP_MAX)
- si->swap_map[i] = count + 1;
+ if (!*swap_map)
+ continue;
+ retval = -ENOMEM;
+ break;
+ }
- swap_device_unlock(si);
- ret = free_found_swap_entry(type,i);
- if (ret)
- return ret;
+ /*
+ * Don't hold on to start_mm if it looks like exiting.
+ * Can mmput ever block? if so, then we cannot risk
+ * it between deleting the page from the swap cache,
+ * and completing the search through mms (and cannot
+ * use it to avoid the long hold on mmlist_lock there).
+ */
+ if (atomic_read(&start_mm->mm_users) == 1) {
+ mmput(start_mm);
+ start_mm = &init_mm;
+ atomic_inc(&init_mm.mm_users);
+ }
- /*
- * we pick up the swap_list_lock() to guard the nr_swap_pages,
- * si->swap_map[] should only be changed if it is SWAP_MAP_MAX
- * otherwise ugly stuff can happen with other people who are in
- * the middle of a swap operation to this device. This kind of
- * operation can sometimes be detected with the undead swap
- * check. Don't worry about these 'undead' entries for now
- * they will be caught the next time though the top loop.
- * Do worry, about the weak locking that allows this to happen
- * because if it happens to a page that is SWAP_MAP_MAX
- * then bad stuff can happen.
- */
- swap_list_lock();
- swap_device_lock(si);
- if (si->swap_map[i] > 0) {
- /* normally this would just kill the swap page if
- * it still existed, it appears though that the locks
- * are a little fuzzy
- */
- if (si->swap_map[i] != SWAP_MAP_MAX) {
- printk("VM: Undead swap entry %08lx\n",
- SWP_ENTRY(type, i).val);
- } else {
- nr_swap_pages++;
- si->swap_map[i] = 0;
+ /*
+ * Wait for and lock page. Remove it from swap cache
+ * so try_to_swap_out won't bump swap count. Mark dirty
+ * so try_to_swap_out will preserve it without us having
+ * to mark any present ptes as dirty: so we can skip
+ * searching processes once swap count has all gone.
+ */
+ lock_page(page);
+ if (PageSwapCache(page))
+ delete_from_swap_cache(page);
+ SetPageDirty(page);
+ UnlockPage(page);
+ flush_page_to_ram(page);
+
+ /*
+ * Remove all references to entry, without blocking.
+ * Whenever we reach init_mm, there's no address space
+ * to search, but use it as a reminder to search shmem.
+ */
+ swcount = *swap_map;
+ if (swcount) {
+ if (start_mm == &init_mm)
+ shmem_unuse(entry, page);
+ else
+ unuse_process(start_mm, entry, page);
+ }
+ if (*swap_map) {
+ int set_start_mm = (*swap_map >= swcount);
+ struct list_head *p = &start_mm->mmlist;
+ struct mm_struct *new_start_mm = start_mm;
+ struct mm_struct *mm;
+
+ spin_lock(&mmlist_lock);
+ while (*swap_map && (p = p->next) != &start_mm->mmlist) {
+ mm = list_entry(p, struct mm_struct, mmlist);
+ swcount = *swap_map;
+ if (mm == &init_mm) {
+ set_start_mm = 1;
+ shmem_unuse(entry, page);
+ } else
+ unuse_process(mm, entry, page);
+ if (set_start_mm && *swap_map < swcount) {
+ new_start_mm = mm;
+ set_start_mm = 0;
}
}
+ atomic_inc(&new_start_mm->mm_users);
+ spin_unlock(&mmlist_lock);
+ mmput(start_mm);
+ start_mm = new_start_mm;
+ }
+ page_cache_release(page);
+
+ /*
+ * How could swap count reach 0x7fff when the maximum
+ * pid is 0x7fff, and there's no way to repeat a swap
+ * page within an mm (except in shmem, where it's the
+ * shared object which takes the reference count)?
+ * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
+ *
+ * If that's wrong, then we should worry more about
+ * exit_mmap() and do_munmap() cases described above:
+ * we might be resetting SWAP_MAP_MAX too early here.
+ * We know "Undead"s can happen, they're okay, so don't
+ * report them; but do report if we reset SWAP_MAP_MAX.
+ */
+ if (*swap_map == SWAP_MAP_MAX) {
+ swap_list_lock();
+ swap_device_lock(si);
+ nr_swap_pages++;
+ *swap_map = 0;
swap_device_unlock(si);
swap_list_unlock();
+ reset_overflow = 1;
+ }
- /*
- * This lock stuff is ulgy!
- * Make sure that we aren't completely killing
- * interactive performance.
- */
- if (current->need_resched)
- schedule();
- swap_device_lock(si);
+ /*
+ * Make sure that we aren't completely killing
+ * interactive performance. Interruptible check on
+ * signal_pending() would be nice, but changes the spec?
+ */
+ if (current->need_resched)
+ schedule();
+ else {
+ unlock_kernel();
+ lock_kernel();
}
- swap_device_unlock(si);
- } while (foundpage);
- return 0;
+ }
+
+ mmput(start_mm);
+ if (reset_overflow) {
+ printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
+ swap_overflow = 0;
+ }
+ return retval;
}
asmlinkage long sys_swapoff(const char * specialfile)
{
struct swap_info_struct * p = NULL;
+ unsigned short *swap_map;
struct nameidata nd;
int i, type, prev;
int err;
@@ -500,14 +557,8 @@
for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
p = swap_info + type;
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
- if (p->swap_file) {
- if (p->swap_file == nd.dentry)
- break;
- } else {
- if (S_ISBLK(nd.dentry->d_inode->i_mode)
- && (p->swap_device == nd.dentry->d_inode->i_rdev))
- break;
- }
+ if (p->swap_file == nd.dentry)
+ break;
}
prev = type;
}
@@ -528,8 +579,8 @@
}
nr_swap_pages -= p->pages;
total_swap_pages -= p->pages;
- swap_list_unlock();
p->flags = SWP_USED;
+ swap_list_unlock();
err = try_to_unuse(type);
if (err) {
/* re-insert swap space back into swap_list */
@@ -544,22 +595,28 @@
swap_info[prev].next = p - swap_info;
nr_swap_pages += p->pages;
total_swap_pages += p->pages;
- swap_list_unlock();
p->flags = SWP_WRITEOK;
+ swap_list_unlock();
goto out_dput;
}
if (p->swap_device)
- blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP);
+ blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
path_release(&nd);
- nd.dentry = p->swap_file;
- p->swap_file = NULL;
+ swap_list_lock();
+ swap_device_lock(p);
nd.mnt = p->swap_vfsmnt;
+ nd.dentry = p->swap_file;
p->swap_vfsmnt = NULL;
+ p->swap_file = NULL;
p->swap_device = 0;
- vfree(p->swap_map);
+ p->max = 0;
+ swap_map = p->swap_map;
p->swap_map = NULL;
p->flags = 0;
+ swap_device_unlock(p);
+ swap_list_unlock();
+ vfree(swap_map);
err = 0;
out_dput:
@@ -637,20 +694,24 @@
union swap_header *swap_header = 0;
int swap_header_version;
int nr_good_pages = 0;
- unsigned long maxpages;
+ unsigned long maxpages = 1;
int swapfilesize;
struct block_device *bdev = NULL;
+ unsigned short *swap_map;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
lock_kernel();
+ swap_list_lock();
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
if (!(p->flags & SWP_USED))
break;
error = -EPERM;
- if (type >= MAX_SWAPFILES)
+ if (type >= MAX_SWAPFILES) {
+ swap_list_unlock();
goto out;
+ }
if (type >= nr_swapfiles)
nr_swapfiles = type+1;
p->flags = SWP_USED;
@@ -662,7 +723,6 @@
p->highest_bit = 0;
p->cluster_nr = 0;
p->sdev_lock = SPIN_LOCK_UNLOCKED;
- p->max = 1;
p->next = -1;
if (swap_flags & SWAP_FLAG_PREFER) {
p->prio =
@@ -670,6 +730,7 @@
} else {
p->prio = --least_priority;
}
+ swap_list_unlock();
error = user_path_walk(specialfile, &nd);
if (error)
goto bad_swap_2;
@@ -686,6 +747,7 @@
p->swap_device = dev;
set_blocksize(dev, PAGE_SIZE);
+ bd_acquire(swap_inode);
bdev = swap_inode->i_bdev;
bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode));
if (bdops) bdev->bd_op = bdops;
@@ -698,29 +760,24 @@
if (!dev || (blk_size[MAJOR(dev)] &&
!blk_size[MAJOR(dev)][MINOR(dev)]))
goto bad_swap;
- error = -EBUSY;
- for (i = 0 ; i < nr_swapfiles ; i++) {
- if (i == type)
- continue;
- if (dev == swap_info[i].swap_device)
- goto bad_swap;
- }
swapfilesize = 0;
if (blk_size[MAJOR(dev)])
swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
>> (PAGE_SHIFT - 10);
- } else if (S_ISREG(swap_inode->i_mode)) {
- error = -EBUSY;
- for (i = 0 ; i < nr_swapfiles ; i++) {
- if (i == type || !swap_info[i].swap_file)
- continue;
- if (swap_inode == swap_info[i].swap_file->d_inode)
- goto bad_swap;
- }
+ } else if (S_ISREG(swap_inode->i_mode))
swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
- } else
+ else
goto bad_swap;
+ error = -EBUSY;
+ for (i = 0 ; i < nr_swapfiles ; i++) {
+ struct swap_info_struct *q = &swap_info[i];
+ if (i == type || !q->swap_file)
+ continue;
+ if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
+ goto bad_swap;
+ }
+
swap_header = (void *) __get_free_page(GFP_USER);
if (!swap_header) {
printk("Unable to start swapping: out of memory :-)\n");
@@ -752,17 +809,17 @@
if (!p->lowest_bit)
p->lowest_bit = i;
p->highest_bit = i;
- p->max = i+1;
+ maxpages = i+1;
j++;
}
}
nr_good_pages = j;
- p->swap_map = vmalloc(p->max * sizeof(short));
+ p->swap_map = vmalloc(maxpages * sizeof(short));
if (!p->swap_map) {
error = -ENOMEM;
goto bad_swap;
}
- for (i = 1 ; i < p->max ; i++) {
+ for (i = 1 ; i < maxpages ; i++) {
if (test_bit(i,(char *) swap_header))
p->swap_map[i] = 0;
else
@@ -782,25 +839,23 @@
}
p->lowest_bit = 1;
- p->highest_bit = swap_header->info.last_page - 1;
- p->max = swap_header->info.last_page;
-
- maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL));
- if (p->max >= maxpages)
- p->max = maxpages-1;
+ maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL)) - 1;
+ if (maxpages > swap_header->info.last_page)
+ maxpages = swap_header->info.last_page;
+ p->highest_bit = maxpages - 1;
error = -EINVAL;
if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
goto bad_swap;
/* OK, set up the swap map and apply the bad block list */
- if (!(p->swap_map = vmalloc (p->max * sizeof(short)))) {
+ if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
error = -ENOMEM;
goto bad_swap;
}
error = 0;
- memset(p->swap_map, 0, p->max * sizeof(short));
+ memset(p->swap_map, 0, maxpages * sizeof(short));
for (i=0; i<swap_header->info.nr_badpages; i++) {
int page = swap_header->info.badpages[i];
if (page <= 0 || page >= swap_header->info.last_page)
@@ -815,7 +870,7 @@
goto bad_swap;
}
- if (swapfilesize && p->max > swapfilesize) {
+ if (swapfilesize && maxpages > swapfilesize) {
printk(KERN_WARNING
"Swap area shorter than signature indicates\n");
error = -EINVAL;
@@ -827,9 +882,11 @@
goto bad_swap;
}
p->swap_map[0] = SWAP_MAP_BAD;
+ swap_list_lock();
+ swap_device_lock(p);
+ p->max = maxpages;
p->flags = SWP_WRITEOK;
p->pages = nr_good_pages;
- swap_list_lock();
nr_swap_pages += nr_good_pages;
total_swap_pages += nr_good_pages;
printk(KERN_INFO "Adding Swap: %dk swap-space (priority %d)\n",
@@ -849,6 +906,7 @@
} else {
swap_info[prev].next = p - swap_info;
}
+ swap_device_unlock(p);
swap_list_unlock();
error = 0;
goto out;
@@ -856,8 +914,8 @@
if (bdev)
blkdev_put(bdev, BDEV_SWAP);
bad_swap_2:
- if (p->swap_map)
- vfree(p->swap_map);
+ swap_list_lock();
+ swap_map = p->swap_map;
nd.mnt = p->swap_vfsmnt;
nd.dentry = p->swap_file;
p->swap_device = 0;
@@ -867,6 +925,9 @@
p->flags = 0;
if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority;
+ swap_list_unlock();
+ if (swap_map)
+ vfree(swap_map);
path_release(&nd);
out:
if (swap_header)
@@ -878,32 +939,30 @@
void si_swapinfo(struct sysinfo *val)
{
unsigned int i;
- unsigned long freeswap = 0;
- unsigned long totalswap = 0;
+ unsigned long nr_to_be_unused = 0;
+ swap_list_lock();
for (i = 0; i < nr_swapfiles; i++) {
unsigned int j;
- if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
+ if (swap_info[i].flags != SWP_USED)
continue;
for (j = 0; j < swap_info[i].max; ++j) {
switch (swap_info[i].swap_map[j]) {
+ case 0:
case SWAP_MAP_BAD:
continue;
- case 0:
- freeswap++;
default:
- totalswap++;
+ nr_to_be_unused++;
}
}
}
- val->freeswap = freeswap;
- val->totalswap = totalswap;
- return;
+ val->freeswap = nr_swap_pages + nr_to_be_unused;
+ val->totalswap = total_swap_pages + nr_to_be_unused;
+ swap_list_unlock();
}
/*
* Verify that a swap entry is valid and increment its swap map count.
- * Kernel_lock is held, which guarantees existance of swap device.
*
* Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
* "permanent", but will be reclaimed by the next swapoff.
@@ -914,43 +973,30 @@
unsigned long offset, type;
int result = 0;
- /* Swap entry 0 is illegal */
- if (!entry.val)
- goto out;
type = SWP_TYPE(entry);
if (type >= nr_swapfiles)
goto bad_file;
p = type + swap_info;
offset = SWP_OFFSET(entry);
- if (offset >= p->max)
- goto bad_offset;
- if (!p->swap_map[offset])
- goto bad_unused;
- /*
- * Entry is valid, so increment the map count.
- */
+
swap_device_lock(p);
- if (p->swap_map[offset] < SWAP_MAP_MAX)
- p->swap_map[offset]++;
- else {
- static int overflow = 0;
- if (overflow++ < 5)
- printk("VM: swap entry overflow\n");
- p->swap_map[offset] = SWAP_MAP_MAX;
+ if (offset < p->max && p->swap_map[offset]) {
+ if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
+ p->swap_map[offset]++;
+ result = 1;
+ } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
+ if (swap_overflow++ < 5)
+ printk(KERN_WARNING "swap_dup: swap entry overflow\n");
+ p->swap_map[offset] = SWAP_MAP_MAX;
+ result = 1;
+ }
}
swap_device_unlock(p);
- result = 1;
out:
return result;
bad_file:
- printk("Bad swap file entry %08lx\n", entry.val);
- goto out;
-bad_offset:
- printk("Bad swap offset entry %08lx\n", entry.val);
- goto out;
-bad_unused:
- printk("Unused swap offset entry in swap_dup %08lx\n", entry.val);
+ printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
goto out;
}
@@ -985,18 +1031,18 @@
printk(KERN_ERR "swap_count: null entry!\n");
goto out;
bad_file:
- printk("Bad swap file entry %08lx\n", entry.val);
+ printk(KERN_ERR "swap_count: %s%08lx\n", Bad_file, entry.val);
goto out;
bad_offset:
- printk("Bad swap offset entry %08lx\n", entry.val);
+ printk(KERN_ERR "swap_count: %s%08lx\n", Bad_offset, entry.val);
goto out;
bad_unused:
- printk("Unused swap offset entry in swap_count %08lx\n", entry.val);
+ printk(KERN_ERR "swap_count: %s%08lx\n", Unused_offset, entry.val);
goto out;
}
/*
- * Kernel_lock protects against swap device deletion.
+ * Prior swap_duplicate protects against swap device deletion.
*/
void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
kdev_t *dev, struct inode **swapf)
@@ -1006,23 +1052,22 @@
type = SWP_TYPE(entry);
if (type >= nr_swapfiles) {
- printk("Internal error: bad swap-device\n");
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
return;
}
p = &swap_info[type];
*offset = SWP_OFFSET(entry);
- if (*offset >= p->max) {
- printk("rw_swap_page: weirdness\n");
+ if (*offset >= p->max && *offset != 0) {
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
return;
}
if (p->swap_map && !p->swap_map[*offset]) {
- printk("VM: Bad swap entry %08lx\n", entry.val);
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
return;
}
if (!(p->flags & SWP_USED)) {
- printk(KERN_ERR "rw_swap_page: "
- "Trying to swap to unused swap-device\n");
+ printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
return;
}
@@ -1037,8 +1082,8 @@
}
/*
- * Kernel_lock protects against swap device deletion. Grab an extra
- * reference on the swaphandle so that it dos not become unused.
+ * swap_device_lock prevents swap_map being freed. Don't grab an extra
+ * reference on the swaphandle, it doesn't matter if it becomes unused.
*/
int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
{
@@ -1046,20 +1091,23 @@
unsigned long toff;
struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
- *offset = SWP_OFFSET(entry);
- toff = *offset = (*offset >> page_cluster) << page_cluster;
+ if (!page_cluster) /* no readahead */
+ return 0;
+ toff = (SWP_OFFSET(entry) >> page_cluster) << page_cluster;
+ if (!toff) /* first page is swap header */
+ toff++, i--;
+ *offset = toff;
swap_device_lock(swapdev);
do {
/* Don't read-ahead past the end of the swap area */
if (toff >= swapdev->max)
break;
- /* Don't read in bad or busy pages */
+ /* Don't read in free or bad pages */
if (!swapdev->swap_map[toff])
break;
if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
break;
- swapdev->swap_map[toff]++;
toff++;
ret++;
} while (--i);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)