From: Rik van Riel <riel@redhat.com>

the patch below (softly) enforces RLIMIT_RSS in the 2.6 kernel, it has been
tested by Pavel and seems to work ok for his workload.



---

 include/linux/init_task.h |    2 ++
 include/linux/sched.h     |    1 +
 include/linux/swap.h      |    4 ++--
 kernel/sys.c              |    8 ++++++++
 mm/rmap.c                 |   18 +++++++++++++++++-
 mm/vmscan.c               |   12 ++++++++----
 6 files changed, 38 insertions(+), 7 deletions(-)

diff -puN include/linux/init_task.h~vm-rss-limit-enforcement include/linux/init_task.h
--- 25/include/linux/init_task.h~vm-rss-limit-enforcement	2004-01-29 17:44:04.000000000 -0800
+++ 25-akpm/include/linux/init_task.h	2004-01-29 19:11:49.000000000 -0800
@@ -2,6 +2,7 @@
 #define _LINUX__INIT_TASK_H
 
 #include <linux/file.h>
+#include <linux/resource.h>
 
 #define INIT_FILES \
 { 							\
@@ -41,6 +42,7 @@
 	.page_table_lock =  SPIN_LOCK_UNLOCKED, 		\
 	.mmlist		= LIST_HEAD_INIT(name.mmlist),		\
 	.default_kioctx = INIT_KIOCTX(name.default_kioctx, name),	\
+	.rlimit_rss	= RLIM_INFINITY			\
 }
 
 #define INIT_SIGNALS(sig) {	\
diff -puN include/linux/sched.h~vm-rss-limit-enforcement include/linux/sched.h
--- 25/include/linux/sched.h~vm-rss-limit-enforcement	2004-01-29 17:44:04.000000000 -0800
+++ 25-akpm/include/linux/sched.h	2004-01-29 19:12:09.000000000 -0800
@@ -205,6 +205,7 @@ struct mm_struct {
 	unsigned long arg_start, arg_end, env_start, env_end;
 	unsigned long rss, total_vm, locked_vm;
 	unsigned long def_flags;
+	unsigned long rlimit_rss;
 	cpumask_t cpu_vm_mask;
 
 	unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
diff -puN include/linux/swap.h~vm-rss-limit-enforcement include/linux/swap.h
--- 25/include/linux/swap.h~vm-rss-limit-enforcement	2004-01-29 17:44:04.000000000 -0800
+++ 25-akpm/include/linux/swap.h	2004-01-29 17:44:04.000000000 -0800
@@ -179,7 +179,7 @@ extern int vm_swappiness;
 
 /* linux/mm/rmap.c */
 #ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
 struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
 					struct pte_chain *));
 void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@ int FASTCALL(try_to_unmap(struct page *)
 /* linux/mm/shmem.c */
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
 #else
-#define page_referenced(page)	TestClearPageReferenced(page)
+#define page_referenced(page, _x)	TestClearPageReferenced(page)
 #define try_to_unmap(page)	SWAP_FAIL
 #endif /* CONFIG_MMU */
 
diff -puN kernel/sys.c~vm-rss-limit-enforcement kernel/sys.c
--- 25/kernel/sys.c~vm-rss-limit-enforcement	2004-01-29 17:44:04.000000000 -0800
+++ 25-akpm/kernel/sys.c	2004-01-29 19:11:51.000000000 -0800
@@ -1308,6 +1308,14 @@ asmlinkage long sys_setrlimit(unsigned i
 	if (retval)
 		return retval;
 
+	/* The rlimit is specified in bytes, convert to pages for mm. */
+	if (resource == RLIMIT_RSS && current->mm) {
+		unsigned long pages = RLIM_INFINITY;
+		if (new_rlim.rlim_cur != RLIM_INFINITY)
+			pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+		current->mm->rlimit_rss = pages;
+	}
+
 	*old_rlim = new_rlim;
 	return 0;
 }
diff -puN mm/rmap.c~vm-rss-limit-enforcement mm/rmap.c
--- 25/mm/rmap.c~vm-rss-limit-enforcement	2004-01-29 17:44:04.000000000 -0800
+++ 25-akpm/mm/rmap.c	2004-01-29 19:12:20.000000000 -0800
@@ -104,6 +104,7 @@ pte_chain_encode(struct pte_chain *pte_c
 /**
  * page_referenced - test if the page was referenced
  * @page: the page to test
+ * @rsslimit: set if the process(es) using the page is(are) over RSS limit.
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of processes which referenced the page.
@@ -111,9 +112,13 @@ pte_chain_encode(struct pte_chain *pte_c
  *
  * If the page has a single-entry pte_chain, collapse that back to a PageDirect
  * representation.  This way, it's only done under memory pressure.
+ *
+ * The pte_chain_lock() is sufficient to pin down mm_structs while we examine
+ * them.
  */
-int page_referenced(struct page * page)
+int page_referenced(struct page *page, int *rsslimit)
 {
+	struct mm_struct * mm;
 	struct pte_chain *pc;
 	int referenced = 0;
 
@@ -127,10 +132,17 @@ int page_referenced(struct page * page)
 		pte_t *pte = rmap_ptep_map(page->pte.direct);
 		if (ptep_test_and_clear_young(pte))
 			referenced++;
+
+		mm = ptep_to_mm(pte);
+		if (mm->rss > mm->rlimit_rss)
+			*rsslimit = 1;
 		rmap_ptep_unmap(pte);
 	} else {
 		int nr_chains = 0;
 
+		/* We clear it if any task using the page is under its limit. */
+		*rsslimit = 1;
+
 		/* Check all the page tables mapping this page. */
 		for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
 			int i;
@@ -142,6 +154,10 @@ int page_referenced(struct page * page)
 				p = rmap_ptep_map(pte_paddr);
 				if (ptep_test_and_clear_young(p))
 					referenced++;
+
+				mm = ptep_to_mm(p);
+				if (mm->rss < mm->rlimit_rss)
+					*rsslimit = 0;
 				rmap_ptep_unmap(p);
 				nr_chains++;
 			}
diff -puN mm/vmscan.c~vm-rss-limit-enforcement mm/vmscan.c
--- 25/mm/vmscan.c~vm-rss-limit-enforcement	2004-01-29 17:44:04.000000000 -0800
+++ 25-akpm/mm/vmscan.c	2004-01-29 19:12:39.000000000 -0800
@@ -250,6 +250,7 @@ shrink_list(struct list_head *page_list,
 	LIST_HEAD(ret_pages);
 	struct pagevec freed_pvec;
 	int pgactivate = 0;
+	int over_rsslimit = 0;
 	int ret = 0;
 
 	cond_resched();
@@ -278,8 +279,8 @@ shrink_list(struct list_head *page_list,
 			goto keep_locked;
 
 		pte_chain_lock(page);
-		referenced = page_referenced(page);
-		if (referenced && page_mapping_inuse(page)) {
+		referenced = page_referenced(page, &over_rsslimit);
+		if (referenced && page_mapping_inuse(page) && !over_rsslimit) {
 			/* In active use or really unfreeable.  Activate it. */
 			pte_chain_unlock(page);
 			goto activate_locked;
@@ -597,6 +598,7 @@ refill_inactive_zone(struct zone *zone, 
 	long mapped_ratio;
 	long distress;
 	long swap_tendency;
+	int over_rsslimit = 0;
 
 	lru_add_drain();
 	pgmoved = 0;
@@ -657,13 +659,15 @@ refill_inactive_zone(struct zone *zone, 
 		list_del(&page->lru);
 		if (page_mapped(page)) {
 			pte_chain_lock(page);
-			if (page_mapped(page) && page_referenced(page)) {
+			if (page_mapped(page) &&
+					page_referenced(page, &over_rsslimit) &&
+					!over_rsslimit) {
 				pte_chain_unlock(page);
 				list_add(&page->lru, &l_active);
 				continue;
 			}
 			pte_chain_unlock(page);
-			if (!reclaim_mapped) {
+			if (!reclaim_mapped && !over_rsslimit) {
 				list_add(&page->lru, &l_active);
 				continue;
 			}

_