patch-2.4.6 linux/include/linux/mm.h
Next file: linux/include/linux/mmzone.h
Previous file: linux/include/linux/mii.h
Back to the patch index
Back to the overall index
- Lines: 426
- Date:
Tue Jul 3 15:42:55 2001
- Orig file:
v2.4.5/linux/include/linux/mm.h
- Orig date:
Fri May 25 18:01:28 2001
diff -u --recursive --new-file v2.4.5/linux/include/linux/mm.h linux/include/linux/mm.h
@@ -10,6 +10,7 @@
#include <linux/string.h>
#include <linux/list.h>
#include <linux/mmzone.h>
+#include <linux/swap.h>
extern unsigned long max_mapnr;
extern unsigned long num_physpages;
@@ -39,32 +40,37 @@
* library, the executable area etc).
*/
struct vm_area_struct {
- struct mm_struct * vm_mm; /* VM area parameters */
- unsigned long vm_start;
- unsigned long vm_end;
+ struct mm_struct * vm_mm; /* The address space we belong to. */
+ unsigned long vm_start; /* Our start address within vm_mm. */
+ unsigned long vm_end; /* Our end address within vm_mm. */
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next;
- pgprot_t vm_page_prot;
- unsigned long vm_flags;
+ pgprot_t vm_page_prot; /* Access permissions of this VMA. */
+ unsigned long vm_flags; /* Flags, listed below. */
/* AVL tree of VM areas per task, sorted by address */
short vm_avl_height;
struct vm_area_struct * vm_avl_left;
struct vm_area_struct * vm_avl_right;
- /* For areas with an address space and backing store,
+ /*
+ * For areas with an address space and backing store,
* one of the address_space->i_mmap{,shared} lists,
* for shm areas, the list of attaches, otherwise unused.
*/
struct vm_area_struct *vm_next_share;
struct vm_area_struct **vm_pprev_share;
+ /* Function pointers to deal with this struct. */
struct vm_operations_struct * vm_ops;
- unsigned long vm_pgoff; /* offset in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */
- struct file * vm_file;
- unsigned long vm_raend;
+
+ /* Information about our backing store: */
+ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
+ units, *not* PAGE_CACHE_SIZE */
+ struct file * vm_file; /* File we map to (can be NULL). */
+ unsigned long vm_raend; /* XXX: put full readahead info here. */
void * vm_private_data; /* was vm_pte (shared mem) */
};
@@ -90,6 +96,7 @@
#define VM_LOCKED 0x00002000
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
+ /* Used by sys_madvise() */
#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
@@ -124,37 +131,144 @@
};
/*
+ * Each physical page in the system has a struct page associated with
+ * it to keep track of whatever it is we are using the page for at the
+ * moment. Note that we have no way to track which tasks are using
+ * a page.
+ *
* Try to keep the most commonly accessed fields in single cache lines
* here (16 bytes or greater). This ordering should be particularly
* beneficial on 32-bit processors.
*
* The first line is data used in page cache lookup, the second line
* is used for linear searches (eg. clock algorithm scans).
+ *
+ * TODO: make this structure smaller, it could be as small as 32 bytes.
*/
typedef struct page {
- struct list_head list;
- struct address_space *mapping;
- unsigned long index;
- struct page *next_hash;
- atomic_t count;
- unsigned long flags; /* atomic flags, some possibly updated asynchronously */
- struct list_head lru;
- unsigned long age;
- wait_queue_head_t wait;
- struct page **pprev_hash;
- struct buffer_head * buffers;
- void *virtual; /* non-NULL if kmapped */
- struct zone_struct *zone;
+ struct list_head list; /* ->mapping has some page lists. */
+ struct address_space *mapping; /* The inode (or ...) we belong to. */
+ unsigned long index; /* Our offset within mapping. */
+ struct page *next_hash; /* Next page sharing our hash bucket in
+ the pagecache hash table. */
+ atomic_t count; /* Usage count, see below. */
+ unsigned long flags; /* atomic flags, some possibly
+ updated asynchronously */
+ struct list_head lru; /* Pageout list, eg. active_list;
+ protected by pagemap_lru_lock !! */
+ unsigned long age; /* Page aging counter. */
+ wait_queue_head_t wait; /* Page locked? Stand in line... */
+ struct page **pprev_hash; /* Complement to *next_hash. */
+ struct buffer_head * buffers; /* Buffer maps us to a disk block. */
+ void *virtual; /* Kernel virtual address (NULL if
+ not kmapped, ie. highmem) */
+ struct zone_struct *zone; /* Memory zone we are in. */
} mem_map_t;
+/*
+ * Methods to modify the page usage count.
+ *
+ * What counts for a page usage:
+ * - cache mapping (page->mapping)
+ * - disk mapping (page->buffers)
+ * - page mapped in a task's page tables, each mapping
+ * is counted separately
+ *
+ * Also, many kernel routines increase the page count before a critical
+ * routine so they can be sure the page doesn't go away from under them.
+ */
#define get_page(p) atomic_inc(&(p)->count)
#define put_page(p) __free_page(p)
#define put_page_testzero(p) atomic_dec_and_test(&(p)->count)
#define page_count(p) atomic_read(&(p)->count)
#define set_page_count(p,v) atomic_set(&(p)->count, v)
-/* Page flag bit values */
-#define PG_locked 0
+/*
+ * Various page->flags bits:
+ *
+ * PG_reserved is set for special pages, which can never be swapped
+ * out. Some of them might not even exist (eg. empty_bad_page)...
+ *
+ * Multiple processes may "see" the same page. E.g. for untouched
+ * mappings of /dev/null, all processes see the same page full of
+ * zeroes, and text pages of executables and shared libraries have
+ * only one copy in memory, at most, normally.
+ *
+ * For the non-reserved pages, page->count denotes a reference count.
+ * page->count == 0 means the page is free.
+ * page->count == 1 means the page is used for exactly one purpose
+ * (e.g. a private data page of one process).
+ *
+ * A page may be used for kmalloc() or anyone else who does a
+ * __get_free_page(). In this case the page->count is at least 1, and
+ * all other fields are unused but should be 0 or NULL. The
+ * management of this page is the responsibility of the one who uses
+ * it.
+ *
+ * The other pages (we may call them "process pages") are completely
+ * managed by the Linux memory manager: I/O, buffers, swapping etc.
+ * The following discussion applies only to them.
+ *
+ * A page may belong to an inode's memory mapping. In this case,
+ * page->mapping is the pointer to the inode, and page->offset is the
+ * file offset of the page (not necessarily a multiple of PAGE_SIZE).
+ *
+ * A page may have buffers allocated to it. In this case,
+ * page->buffers is a circular list of these buffer heads. Else,
+ * page->buffers == NULL.
+ *
+ * For pages belonging to inodes, the page->count is the number of
+ * attaches, plus 1 if buffers are allocated to the page, plus one
+ * for the page cache itself.
+ *
+ * All pages belonging to an inode are in these doubly linked lists:
+ * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
+ * using the page->list list_head. These fields are also used for
+ * freelist managemet (when page->count==0).
+ *
+ * There is also a hash table mapping (inode,offset) to the page
+ * in memory if present. The lists for this hash table use the fields
+ * page->next_hash and page->pprev_hash.
+ *
+ * All process pages can do I/O:
+ * - inode pages may need to be read from disk,
+ * - inode pages which have been modified and are MAP_SHARED may need
+ * to be written to disk,
+ * - private pages which have been modified may need to be swapped out
+ * to swap space and (later) to be read back into memory.
+ * During disk I/O, PG_locked is used. This bit is set before I/O
+ * and reset when I/O completes. page->wait is a wait queue of all
+ * tasks waiting for the I/O on this page to complete.
+ * PG_uptodate tells whether the page's contents is valid.
+ * When a read completes, the page becomes uptodate, unless a disk I/O
+ * error happened.
+ *
+ * For choosing which pages to swap out, inode pages carry a
+ * PG_referenced bit, which is set any time the system accesses
+ * that page through the (inode,offset) hash table. This referenced
+ * bit, together with the referenced bit in the page tables, is used
+ * to manipulate page->age and move the page across the active,
+ * inactive_dirty and inactive_clean lists.
+ *
+ * Note that the referenced bit, the page->lru list_head and the
+ * active, inactive_dirty and inactive_clean lists are protected by
+ * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
+ *
+ * PG_skip is used on sparc/sparc64 architectures to "skip" certain
+ * parts of the address space.
+ *
+ * PG_error is set to indicate that an I/O error occurred on this page.
+ *
+ * PG_arch_1 is an architecture specific page state bit. The generic
+ * code guarentees that this bit is cleared for a page when it first
+ * is entered into the page cache.
+ *
+ * PG_highmem pages are not permanently mapped into the kernel virtual
+ * address space, they need to be kmapped separately for doing IO on
+ * the pages. The struct page (these bits with information) are always
+ * mapped into kernel address space...
+ */
+#define PG_locked 0 /* Page is locked. Don't touch. */
#define PG_error 1
#define PG_referenced 2
#define PG_uptodate 3
@@ -167,6 +281,7 @@
#define PG_skip 10
#define PG_inactive_clean 11
#define PG_highmem 12
+#define PG_checked 13 /* kill me in 2.5.<early>. */
/* bits 21-29 unused */
#define PG_arch_1 30
#define PG_reserved 31
@@ -181,6 +296,8 @@
#define PageLocked(page) test_bit(PG_locked, &(page)->flags)
#define LockPage(page) set_bit(PG_locked, &(page)->flags)
#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags)
+#define PageChecked(page) test_bit(PG_checked, &(page)->flags)
+#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
extern void __set_page_dirty(struct page *);
@@ -254,81 +371,7 @@
#define NOPAGE_SIGBUS (NULL)
#define NOPAGE_OOM ((struct page *) (-1))
-
-/*
- * Various page->flags bits:
- *
- * PG_reserved is set for a page which must never be accessed (which
- * may not even be present).
- *
- * PG_DMA has been removed, page->zone now tells exactly wether the
- * page is suited to do DMAing into.
- *
- * Multiple processes may "see" the same page. E.g. for untouched
- * mappings of /dev/null, all processes see the same page full of
- * zeroes, and text pages of executables and shared libraries have
- * only one copy in memory, at most, normally.
- *
- * For the non-reserved pages, page->count denotes a reference count.
- * page->count == 0 means the page is free.
- * page->count == 1 means the page is used for exactly one purpose
- * (e.g. a private data page of one process).
- *
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page->count is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
- *
- * The other pages (we may call them "process pages") are completely
- * managed by the Linux memory manager: I/O, buffers, swapping etc.
- * The following discussion applies only to them.
- *
- * A page may belong to an inode's memory mapping. In this case,
- * page->inode is the pointer to the inode, and page->offset is the
- * file offset of the page (not necessarily a multiple of PAGE_SIZE).
- *
- * A page may have buffers allocated to it. In this case,
- * page->buffers is a circular list of these buffer heads. Else,
- * page->buffers == NULL.
- *
- * For pages belonging to inodes, the page->count is the number of
- * attaches, plus 1 if buffers are allocated to the page.
- *
- * All pages belonging to an inode make up a doubly linked list
- * inode->i_pages, using the fields page->next and page->prev. (These
- * fields are also used for freelist management when page->count==0.)
- * There is also a hash table mapping (inode,offset) to the page
- * in memory if present. The lists for this hash table use the fields
- * page->next_hash and page->pprev_hash.
- *
- * All process pages can do I/O:
- * - inode pages may need to be read from disk,
- * - inode pages which have been modified and are MAP_SHARED may need
- * to be written to disk,
- * - private pages which have been modified may need to be swapped out
- * to swap space and (later) to be read back into memory.
- * During disk I/O, PG_locked is used. This bit is set before I/O
- * and reset when I/O completes. page->wait is a wait queue of all
- * tasks waiting for the I/O on this page to complete.
- * PG_uptodate tells whether the page's contents is valid.
- * When a read completes, the page becomes uptodate, unless a disk I/O
- * error happened.
- *
- * For choosing which pages to swap out, inode pages carry a
- * PG_referenced bit, which is set any time the system accesses
- * that page through the (inode,offset) hash table.
- *
- * PG_skip is used on sparc/sparc64 architectures to "skip" certain
- * parts of the address space.
- *
- * PG_error is set to indicate that an I/O error occurred on this page.
- *
- * PG_arch_1 is an architecture specific page state bit. The generic
- * code guarentees that this bit is cleared for a page when it first
- * is entered into the page cache.
- */
-
+/* The array of struct pages */
extern mem_map_t * mem_map;
/*
@@ -337,10 +380,10 @@
* can allocate highmem pages, the *get*page*() variants return
* virtual kernel addresses to the allocated page(s).
*/
-extern struct page * FASTCALL(__alloc_pages(zonelist_t *zonelist, unsigned long order));
+extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned long order));
+extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned long order, zonelist_t *zonelist));
extern struct page * alloc_pages_node(int nid, int gfp_mask, unsigned long order);
-#ifndef CONFIG_DISCONTIGMEM
static inline struct page * alloc_pages(int gfp_mask, unsigned long order)
{
/*
@@ -348,11 +391,8 @@
*/
if (order >= MAX_ORDER)
return NULL;
- return __alloc_pages(contig_page_data.node_zonelists+(gfp_mask), order);
+ return _alloc_pages(gfp_mask, order);
}
-#else /* !CONFIG_DISCONTIGMEM */
-extern struct page * alloc_pages(int gfp_mask, unsigned long order);
-#endif /* !CONFIG_DISCONTIGMEM */
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
@@ -427,6 +467,23 @@
extern void si_meminfo(struct sysinfo * val);
extern void swapin_readahead(swp_entry_t);
+/*
+ * Work out if there are any other processes sharing this
+ * swap cache page. Never mind the buffers.
+ */
+static inline int exclusive_swap_page(struct page *page)
+{
+ unsigned int count;
+
+ if (!PageLocked(page))
+ BUG();
+ if (!PageSwapCache(page))
+ return 0;
+ count = page_count(page) - !!page->buffers; /* 2: us + swap cache */
+ count += swap_count(page); /* +1: just swap cache */
+ return count == 3; /* =3: total */
+}
+
/* mmap.c */
extern void lock_vma_mappings(struct vm_area_struct *);
extern void unlock_vma_mappings(struct vm_area_struct *);
@@ -471,24 +528,24 @@
/*
* GFP bitmasks..
*/
-#define __GFP_WAIT 0x01
-#define __GFP_HIGH 0x02
-#define __GFP_IO 0x04
-#define __GFP_DMA 0x08
-#ifdef CONFIG_HIGHMEM
-#define __GFP_HIGHMEM 0x10
-#else
-#define __GFP_HIGHMEM 0x0 /* noop */
-#endif
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
+#define __GFP_DMA 0x01
+#define __GFP_HIGHMEM 0x02
+
+/* Action modifiers - doesn't change the zoning */
+#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
+#define __GFP_HIGH 0x20 /* Should access emergency pools? */
+#define __GFP_IO 0x40 /* Can start physical IO? */
+#define __GFP_FS 0x80 /* Can call down to low-level FS? */
-
-#define GFP_BUFFER (__GFP_HIGH | __GFP_WAIT)
+#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT)
+#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
#define GFP_ATOMIC (__GFP_HIGH)
-#define GFP_USER ( __GFP_WAIT | __GFP_IO)
-#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
-#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_KSWAPD ( __GFP_IO)
+#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM)
+#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_KSWAPD ( __GFP_IO | __GFP_FS)
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */
@@ -533,11 +590,6 @@
}
extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
-
-#define buffer_under_min() (atomic_read(&buffermem_pages) * 100 < \
- buffer_mem.min_percent * num_physpages)
-#define pgcache_under_min() (atomic_read(&page_cache_size) * 100 < \
- page_cache.min_percent * num_physpages)
#endif /* __KERNEL__ */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)