From: William Lee Irwin III <wli@holomorphy.com>

Merely removing down_read(&mm->mmap_sem) from task_vsize() is too
half-assed to let stand. The following patch removes the vma iteration
as well as the down_read(&mm->mmap_sem) from both task_mem() and
task_statm() and callers for the CONFIG_MMU=y case in favor of
accounting the various stats reported at the times of vma creation,
destruction, and modification. Unlike the 2.4.x patches of the same
name, this has no per-pte-modification overhead whatsoever.

This patch quashes end user complaints of top(1) being slow as well as
kernel hacker complaints of per-pte accounting overhead simultaneously.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-sparc64-akpm/arch/ia64/ia32/binfmt_elf32.c  |    2 
 25-sparc64-akpm/arch/ia64/kernel/perfmon.c     |    2 
 25-sparc64-akpm/arch/ia64/mm/fault.c           |    1 
 25-sparc64-akpm/arch/s390/kernel/compat_exec.c |    2 
 25-sparc64-akpm/arch/x86_64/ia32/ia32_binfmt.c |    2 
 25-sparc64-akpm/fs/exec.c                      |    2 
 25-sparc64-akpm/fs/proc/array.c                |    5 --
 25-sparc64-akpm/fs/proc/task_mmu.c             |   54 ++++---------------------
 25-sparc64-akpm/fs/proc/task_nommu.c           |    5 +-
 25-sparc64-akpm/include/linux/mm.h             |   13 ++++++
 25-sparc64-akpm/include/linux/proc_fs.h        |    2 
 25-sparc64-akpm/include/linux/sched.h          |    4 -
 25-sparc64-akpm/mm/mmap.c                      |   26 ++++++++++++
 25-sparc64-akpm/mm/mprotect.c                  |    2 
 25-sparc64-akpm/mm/mremap.c                    |    3 +
 15 files changed, 67 insertions(+), 58 deletions(-)

diff -puN arch/ia64/ia32/binfmt_elf32.c~o1-proc_pid_statm arch/ia64/ia32/binfmt_elf32.c
--- 25-sparc64/arch/ia64/ia32/binfmt_elf32.c~o1-proc_pid_statm	2004-08-24 23:18:25.740001328 -0700
+++ 25-sparc64-akpm/arch/ia64/ia32/binfmt_elf32.c	2004-08-24 23:18:25.765997376 -0700
@@ -187,7 +187,7 @@ ia32_setup_arg_pages (struct linux_binpr
 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
 					PAGE_COPY_EXEC: PAGE_COPY;
 		insert_vm_struct(current->mm, mpnt);
-		current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
 	}
 
 	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
diff -puN arch/ia64/kernel/perfmon.c~o1-proc_pid_statm arch/ia64/kernel/perfmon.c
--- 25-sparc64/arch/ia64/kernel/perfmon.c~o1-proc_pid_statm	2004-08-24 23:18:25.742001024 -0700
+++ 25-sparc64-akpm/arch/ia64/kernel/perfmon.c	2004-08-24 23:18:25.770996616 -0700
@@ -2352,7 +2352,7 @@ pfm_smpl_buffer_alloc(struct task_struct
 	insert_vm_struct(mm, vma);
 
 	mm->total_vm  += size >> PAGE_SHIFT;
-
+	vm_stat_account(vma);
 	up_write(&task->mm->mmap_sem);
 
 	/*
diff -puN arch/ia64/mm/fault.c~o1-proc_pid_statm arch/ia64/mm/fault.c
--- 25-sparc64/arch/ia64/mm/fault.c~o1-proc_pid_statm	2004-08-24 23:18:25.744000720 -0700
+++ 25-sparc64-akpm/arch/ia64/mm/fault.c	2004-08-24 23:18:25.771996464 -0700
@@ -41,6 +41,7 @@ expand_backing_store (struct vm_area_str
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
+	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
 	return 0;
 }
 
diff -puN arch/s390/kernel/compat_exec.c~o1-proc_pid_statm arch/s390/kernel/compat_exec.c
--- 25-sparc64/arch/s390/kernel/compat_exec.c~o1-proc_pid_statm	2004-08-24 23:18:25.746000416 -0700
+++ 25-sparc64-akpm/arch/s390/kernel/compat_exec.c	2004-08-24 23:18:25.772996312 -0700
@@ -69,7 +69,7 @@ int setup_arg_pages32(struct linux_binpr
 		mpnt->vm_page_prot = PAGE_COPY;
 		mpnt->vm_flags = VM_STACK_FLAGS;
 		insert_vm_struct(mm, mpnt);
-		mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	} 
 
 	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
diff -puN arch/x86_64/ia32/ia32_binfmt.c~o1-proc_pid_statm arch/x86_64/ia32/ia32_binfmt.c
--- 25-sparc64/arch/x86_64/ia32/ia32_binfmt.c~o1-proc_pid_statm	2004-08-24 23:18:25.747000264 -0700
+++ 25-sparc64-akpm/arch/x86_64/ia32/ia32_binfmt.c	2004-08-24 23:18:25.772996312 -0700
@@ -368,7 +368,7 @@ int setup_arg_pages(struct linux_binprm 
  		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
  			PAGE_COPY_EXEC : PAGE_COPY;
 		insert_vm_struct(mm, mpnt);
-		mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	} 
 
 	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
diff -puN fs/exec.c~o1-proc_pid_statm fs/exec.c
--- 25-sparc64/fs/exec.c~o1-proc_pid_statm	2004-08-24 23:18:25.748999960 -0700
+++ 25-sparc64-akpm/fs/exec.c	2004-08-24 23:18:25.774996008 -0700
@@ -434,7 +434,7 @@ int setup_arg_pages(struct linux_binprm 
 		mpnt->vm_flags |= mm->def_flags;
 		mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
 		insert_vm_struct(mm, mpnt);
-		mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	}
 
 	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
diff -puN fs/proc/array.c~o1-proc_pid_statm fs/proc/array.c
--- 25-sparc64/fs/proc/array.c~o1-proc_pid_statm	2004-08-24 23:18:25.750999656 -0700
+++ 25-sparc64-akpm/fs/proc/array.c	2004-08-24 23:18:25.774996008 -0700
@@ -279,7 +279,6 @@ static inline char *task_cap(struct task
 			    cap_t(p->cap_effective));
 }
 
-extern char *task_mem(struct mm_struct *, char *);
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
 	char * orig = buffer;
@@ -415,17 +414,13 @@ int proc_pid_stat(struct task_struct *ta
 	return res;
 }
 
-extern int task_statm(struct mm_struct *, int *, int *, int *, int *);
 int proc_pid_statm(struct task_struct *task, char *buffer)
 {
 	int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
 	struct mm_struct *mm = get_task_mm(task);
 	
 	if (mm) {
-		down_read(&mm->mmap_sem);
 		size = task_statm(mm, &shared, &text, &data, &resident);
-		up_read(&mm->mmap_sem);
-
 		mmput(mm);
 	}
 
diff -puN fs/proc/task_mmu.c~o1-proc_pid_statm fs/proc/task_mmu.c
--- 25-sparc64/fs/proc/task_mmu.c~o1-proc_pid_statm	2004-08-24 23:18:25.751999504 -0700
+++ 25-sparc64-akpm/fs/proc/task_mmu.c	2004-08-24 23:18:25.775995856 -0700
@@ -6,27 +6,11 @@
 
 char *task_mem(struct mm_struct *mm, char *buffer)
 {
-	unsigned long data = 0, stack = 0, exec = 0, lib = 0;
-	struct vm_area_struct *vma;
+	unsigned long data, text, lib;
 
-	down_read(&mm->mmap_sem);
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
-		if (!vma->vm_file) {
-			data += len;
-			if (vma->vm_flags & VM_GROWSDOWN)
-				stack += len;
-			continue;
-		}
-		if (vma->vm_flags & VM_WRITE)
-			continue;
-		if (vma->vm_flags & VM_EXEC) {
-			exec += len;
-			if (vma->vm_flags & VM_EXECUTABLE)
-				continue;
-			lib += len;
-		}
-	}
+	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
+	text = (mm->end_code - mm->start_code) >> 10;
+	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	buffer += sprintf(buffer,
 		"VmSize:\t%8lu kB\n"
 		"VmLck:\t%8lu kB\n"
@@ -38,9 +22,8 @@ char *task_mem(struct mm_struct *mm, cha
 		mm->total_vm << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
 		mm->rss << (PAGE_SHIFT-10),
-		data - stack, stack,
-		exec - lib, lib);
-	up_read(&mm->mmap_sem);
+		data << (PAGE_SHIFT-10),
+		mm->stack_vm << (PAGE_SHIFT-10), text, lib);
 	return buffer;
 }
 
@@ -52,28 +35,11 @@ unsigned long task_vsize(struct mm_struc
 int task_statm(struct mm_struct *mm, int *shared, int *text,
 	       int *data, int *resident)
 {
-	struct vm_area_struct *vma;
-	int size = 0;
-
+	*shared = mm->shared_vm;
+	*text = mm->exec_vm - ((mm->end_code - mm->start_code) >> PAGE_SHIFT);
+	*data = mm->total_vm - mm->shared_vm;
 	*resident = mm->rss;
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		int pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-
-		size += pages;
-		if (is_vm_hugetlb_page(vma)) {
-			if (!(vma->vm_flags & VM_DONTCOPY))
-				*shared += pages;
-			continue;
-		}
-		if (vma->vm_file)
-			*shared += pages;
-		if (vma->vm_flags & VM_EXECUTABLE)
-			*text += pages;
-		else
-			*data += pages;
-	}
-
-	return size;
+	return mm->total_vm;
 }
 
 static int show_map(struct seq_file *m, void *v)
diff -puN fs/proc/task_nommu.c~o1-proc_pid_statm fs/proc/task_nommu.c
--- 25-sparc64/fs/proc/task_nommu.c~o1-proc_pid_statm	2004-08-24 23:18:25.752999352 -0700
+++ 25-sparc64-akpm/fs/proc/task_nommu.c	2004-08-24 23:18:25.776995704 -0700
@@ -82,7 +82,8 @@ int task_statm(struct mm_struct *mm, int
 {
 	struct mm_tblock_struct *tbp;
 	int size = kobjsize(mm);
-	
+
+	down_read(&mm->mmap_sem);
 	for (tbp = &mm->context.tblock; tbp; tbp = tbp->next) {
 		if (tbp->next)
 			size += kobjsize(tbp->next);
@@ -94,7 +95,7 @@ int task_statm(struct mm_struct *mm, int
 
 	size += (*text = mm->end_code - mm->start_code);
 	size += (*data = mm->start_stack - mm->start_data);
-
+	up_read(&mm->mmap_sem);
 	*resident = size;
 	return size;
 }
diff -puN include/linux/mm.h~o1-proc_pid_statm include/linux/mm.h
--- 25-sparc64/include/linux/mm.h~o1-proc_pid_statm	2004-08-24 23:18:25.754999048 -0700
+++ 25-sparc64-akpm/include/linux/mm.h	2004-08-24 23:18:25.777995552 -0700
@@ -754,6 +754,19 @@ extern struct page * follow_page(struct 
 		int write);
 extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
 		unsigned long to, unsigned long size, pgprot_t prot);
+void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+
+static inline void vm_stat_account(struct vm_area_struct *vma)
+{
+	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
+							vma_pages(vma));
+}
+
+static inline void vm_stat_unaccount(struct vm_area_struct *vma)
+{
+	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
+							-vma_pages(vma));
+}
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
diff -puN include/linux/proc_fs.h~o1-proc_pid_statm include/linux/proc_fs.h
--- 25-sparc64/include/linux/proc_fs.h~o1-proc_pid_statm	2004-08-24 23:18:25.756998744 -0700
+++ 25-sparc64-akpm/include/linux/proc_fs.h	2004-08-24 23:18:25.778995400 -0700
@@ -93,6 +93,8 @@ struct dentry *proc_pid_unhash(struct ta
 void proc_pid_flush(struct dentry *proc_dentry);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
+int task_statm(struct mm_struct *, int *, int *, int *, int *);
+char *task_mem(struct mm_struct *, char *);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
diff -puN include/linux/sched.h~o1-proc_pid_statm include/linux/sched.h
--- 25-sparc64/include/linux/sched.h~o1-proc_pid_statm	2004-08-24 23:18:25.758998440 -0700
+++ 25-sparc64-akpm/include/linux/sched.h	2004-08-24 23:18:25.779995248 -0700
@@ -225,8 +225,8 @@ struct mm_struct {
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
 	unsigned long arg_start, arg_end, env_start, env_end;
-	unsigned long rlimit_rss, rss, total_vm, locked_vm;
-	unsigned long def_flags;
+	unsigned long rlimit_rss, rss, total_vm, locked_vm, shared_vm;
+	unsigned long exec_vm, stack_vm, def_flags;
 
 	unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
 
diff -puN mm/mmap.c~o1-proc_pid_statm mm/mmap.c
--- 25-sparc64/mm/mmap.c~o1-proc_pid_statm	2004-08-24 23:18:25.759998288 -0700
+++ 25-sparc64-akpm/mm/mmap.c	2004-08-24 23:18:25.781994944 -0700
@@ -729,6 +729,28 @@ none:
 	return NULL;
 }
 
+void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
+						struct file *file, long pages)
+{
+	const unsigned long stack_flags
+		= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
+
+#ifdef CONFIG_HUGETLB
+	if (flags & VM_HUGETLB) {
+		if (!(flags & VM_DONTCOPY))
+			mm->shared_vm += pages;
+		return;
+	}
+#endif /* CONFIG_HUGETLB */
+
+	if (file)
+		mm->shared_vm += pages;
+	else if (flags & stack_flags)
+		mm->stack_vm += pages;
+	if (flags & VM_EXEC)
+		mm->exec_vm += pages;
+}
+
 /*
  * The caller must hold down_write(current->mm->mmap_sem).
  */
@@ -987,6 +1009,7 @@ out:	
 					pgoff, flags & MAP_NONBLOCK);
 		down_write(&mm->mmap_sem);
 	}
+	__vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	return addr;
 
 unmap_and_free_vma:
@@ -1330,6 +1353,7 @@ int expand_stack(struct vm_area_struct *
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
+	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
 	anon_vma_unlock(vma);
 	return 0;
 }
@@ -1392,6 +1416,7 @@ int expand_stack(struct vm_area_struct *
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
+	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
 	anon_vma_unlock(vma);
 	return 0;
 }
@@ -1497,6 +1522,7 @@ static void unmap_vma(struct mm_struct *
 	area->vm_mm->total_vm -= len >> PAGE_SHIFT;
 	if (area->vm_flags & VM_LOCKED)
 		area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+	vm_stat_unaccount(area);
 	area->vm_mm->unmap_area(area);
 	remove_vm_struct(area);
 }
diff -puN mm/mprotect.c~o1-proc_pid_statm mm/mprotect.c
--- 25-sparc64/mm/mprotect.c~o1-proc_pid_statm	2004-08-24 23:18:25.761997984 -0700
+++ 25-sparc64-akpm/mm/mprotect.c	2004-08-24 23:18:25.781994944 -0700
@@ -175,9 +175,11 @@ success:
 	 * vm_flags and vm_page_prot are protected by the mmap_sem
 	 * held in write mode.
 	 */
+	vm_stat_unaccount(vma);
 	vma->vm_flags = newflags;
 	vma->vm_page_prot = newprot;
 	change_protection(vma, start, end, newprot);
+	vm_stat_account(vma);
 	return 0;
 
 fail:
diff -puN mm/mremap.c~o1-proc_pid_statm mm/mremap.c
--- 25-sparc64/mm/mremap.c~o1-proc_pid_statm	2004-08-24 23:18:25.762997832 -0700
+++ 25-sparc64-akpm/mm/mremap.c	2004-08-24 23:18:25.783994640 -0700
@@ -224,6 +224,7 @@ static unsigned long move_vma(struct vm_
 	}
 
 	mm->total_vm += new_len >> PAGE_SHIFT;
+	__vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		if (new_len > old_len)
@@ -360,6 +361,8 @@ unsigned long do_mremap(unsigned long ad
 				addr + new_len, vma->vm_pgoff, NULL);
 
 			current->mm->total_vm += pages;
+			__vm_stat_account(vma->vm_mm, vma->vm_flags,
+							vma->vm_file, pages);
 			if (vma->vm_flags & VM_LOCKED) {
 				current->mm->locked_vm += pages;
 				make_pages_present(addr + old_len,
_