From: "V. Rajesh" <vrajesh@eecs.umich.edu>

If a vma is already present in an i_mmap list of a mapping,
then it is racy to update the vm_start, vm_end, and vm_pgoff 
members of the vma without holding the mapping's i_shared_sem. 
This is because the updates can race with invalidate_mmap_range_list.

I audited all the places that assign vm_start, vm_end, and vm_pgoff.
AFAIK, the following is the list of questionable places:

1) This patch fixes the racy split_vma. Kernel 2.4 does the
   right thing, but the following changesets introduced a race.

   http://linux.bkbits.net:8080/linux-2.5/patch@1.536.34.4
   http://linux.bkbits.net:8080/linux-2.5/patch@1.536.34.5

   You can use the patch and programs in the following URL to 
   trigger the race.

  http://www-personal.engin.umich.edu/~vrajesh/linux/truncate-race/

2) This patch also locks a small racy window in vma_merge.

3) In few cases vma_merge and do_mremap expand a vma by adding 
   extra length to vm_end without holding i_shared_sem. I think 
   that's fine.

4) In arch/sparc64, vm_end is updated without holding i_shared_sem.
   Check make_hugetlb_page_present.  I hope that is fine, but 
   I am not sure. 




 mm/filemap.c |    3 ++
 mm/mmap.c    |   65 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 57 insertions(+), 11 deletions(-)

diff -puN mm/mmap.c~vma-split-truncate-race-fix-2 mm/mmap.c
--- 25/mm/mmap.c~vma-split-truncate-race-fix-2	2003-10-17 17:52:12.000000000 -0700
+++ 25-akpm/mm/mmap.c	2003-10-18 16:26:29.000000000 -0700
@@ -280,6 +280,26 @@ static void vma_link(struct mm_struct *m
 }
 
 /*
+ * Insert vm structure into process list sorted by address and into the inode's
+ * i_mmap ring. The caller should hold mm->page_table_lock and
+ * ->f_mappping->i_shared_sem if vm_file is non-NULL.
+ */
+static void
+__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+	struct vm_area_struct * __vma, * prev;
+	struct rb_node ** rb_link, * rb_parent;
+
+	__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
+	if (__vma && __vma->vm_start < vma->vm_end)
+		BUG();
+	__vma_link(mm, vma, prev, rb_link, rb_parent);
+	mark_mm_hugetlb(mm, vma);
+	mm->map_count++;
+	validate_mm(mm);
+}
+
+/*
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
@@ -351,7 +371,9 @@ static int vma_merge(struct mm_struct *m
 			unsigned long end, unsigned long vm_flags,
 			struct file *file, unsigned long pgoff)
 {
-	spinlock_t * lock = &mm->page_table_lock;
+	spinlock_t *lock = &mm->page_table_lock;
+	struct inode *inode = file ? file->f_dentry->d_inode : NULL;
+	struct semaphore *i_shared_sem;
 
 	/*
 	 * We later require that vma->vm_flags == vm_flags, so this tests
@@ -360,6 +382,8 @@ static int vma_merge(struct mm_struct *m
 	if (vm_flags & VM_SPECIAL)
 		return 0;
 
+	i_shared_sem = file ? &inode->i_mapping->i_shared_sem : NULL;
+
 	if (!prev) {
 		prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
 		goto merge_next;
@@ -372,12 +396,11 @@ static int vma_merge(struct mm_struct *m
 			is_mergeable_vma(prev, file, vm_flags) &&
 			can_vma_merge_after(prev, vm_flags, file, pgoff)) {
 		struct vm_area_struct *next;
-		struct inode *inode = file ? file->f_dentry->d_inode : NULL;
 		int need_up = 0;
 
 		if (unlikely(file && prev->vm_next &&
 				prev->vm_next->vm_file == file)) {
-			down(&inode->i_mapping->i_shared_sem);
+			down(i_shared_sem);
 			need_up = 1;
 		}
 		spin_lock(lock);
@@ -395,7 +418,7 @@ static int vma_merge(struct mm_struct *m
 			__remove_shared_vm_struct(next, inode);
 			spin_unlock(lock);
 			if (need_up)
-				up(&inode->i_mapping->i_shared_sem);
+				up(i_shared_sem);
 			if (file)
 				fput(file);
 
@@ -405,7 +428,7 @@ static int vma_merge(struct mm_struct *m
 		}
 		spin_unlock(lock);
 		if (need_up)
-			up(&inode->i_mapping->i_shared_sem);
+			up(i_shared_sem);
 		return 1;
 	}
 
@@ -419,10 +442,14 @@ static int vma_merge(struct mm_struct *m
 				pgoff, (end - addr) >> PAGE_SHIFT))
 			return 0;
 		if (end == prev->vm_start) {
+			if (file)
+				down(i_shared_sem);
 			spin_lock(lock);
 			prev->vm_start = addr;
 			prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
 			spin_unlock(lock);
+			if (file)
+				up(i_shared_sem);
 			return 1;
 		}
 	}
@@ -1142,6 +1169,7 @@ int split_vma(struct mm_struct * mm, str
 	      unsigned long addr, int new_below)
 {
 	struct vm_area_struct *new;
+	struct address_space *mapping = NULL;
 
 	if (mm->map_count >= MAX_MAP_COUNT)
 		return -ENOMEM;
@@ -1155,12 +1183,9 @@ int split_vma(struct mm_struct * mm, str
 
 	INIT_LIST_HEAD(&new->shared);
 
-	if (new_below) {
+	if (new_below)
 		new->vm_end = addr;
-		vma->vm_start = addr;
-		vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT);
-	} else {
-		vma->vm_end = addr;
+	else {
 		new->vm_start = addr;
 		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
 	}
@@ -1171,7 +1196,25 @@ int split_vma(struct mm_struct * mm, str
 	if (new->vm_ops && new->vm_ops->open)
 		new->vm_ops->open(new);
 
-	insert_vm_struct(mm, new);
+	if (vma->vm_file)
+		 mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+
+	if (mapping)
+		down(&mapping->i_shared_sem);
+	spin_lock(&mm->page_table_lock);
+
+	if (new_below) {
+		vma->vm_start = addr;
+		vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT);
+	} else
+		vma->vm_end = addr;
+
+	__insert_vm_struct(mm, new);
+
+	spin_unlock(&mm->page_table_lock);
+	if (mapping)
+		up(&mapping->i_shared_sem);
+
 	return 0;
 }
 
diff -puN mm/filemap.c~vma-split-truncate-race-fix-2 mm/filemap.c
--- 25/mm/filemap.c~vma-split-truncate-race-fix-2	2003-10-17 17:52:12.000000000 -0700
+++ 25-akpm/mm/filemap.c	2003-10-18 16:26:15.000000000 -0700
@@ -61,6 +61,9 @@
  *        ->swap_device_lock	(exclusive_swap_page, others)
  *          ->mapping->page_lock
  *
+ *  ->i_sem
+ *    ->i_shared_sem		(truncate->invalidate_mmap_range)
+ *
  *  ->mmap_sem
  *    ->i_shared_sem		(various places)
  *

_