From: "V. Rajesh" If a vma is already present in an i_mmap list of a mapping, then it is racy to update the vm_start, vm_end, and vm_pgoff members of the vma without holding the mapping's i_shared_sem. This is because the updates can race with invalidate_mmap_range_list. I audited all the places that assign vm_start, vm_end, and vm_pgoff. AFAIK, the following is the list of questionable places: 1) This patch fixes the racy split_vma. Kernel 2.4 does the right thing, but the following changesets introduced a race. http://linux.bkbits.net:8080/linux-2.5/patch@1.536.34.4 http://linux.bkbits.net:8080/linux-2.5/patch@1.536.34.5 You can use the patch and programs in the following URL to trigger the race. http://www-personal.engin.umich.edu/~vrajesh/linux/truncate-race/ 2) This patch also locks a small racy window in vma_merge. 3) In few cases vma_merge and do_mremap expand a vma by adding extra length to vm_end without holding i_shared_sem. I think that's fine. 4) In arch/sparc64, vm_end is updated without holding i_shared_sem. Check make_hugetlb_page_present. I hope that is fine, but I am not sure. 25-akpm/mm/mmap.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 46 insertions(+), 6 deletions(-) diff -puN mm/mmap.c~vma-split-truncate-race-fix mm/mmap.c --- 25/mm/mmap.c~vma-split-truncate-race-fix Thu Oct 9 01:50:09 2003 +++ 25-akpm/mm/mmap.c Thu Oct 9 01:50:09 2003 @@ -278,6 +278,26 @@ static void vma_link(struct mm_struct *m } /* + * Insert vm structure into process list sorted by address and into the inode's + * i_mmap ring. The caller should hold mm->page_table_lock and + * ->f_mappping->i_shared_sem if vm_file is non-NULL. + */ +static void +__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) +{ + struct vm_area_struct * __vma, * prev; + struct rb_node ** rb_link, * rb_parent; + + __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent); + if (__vma && __vma->vm_start < vma->vm_end) + BUG(); + __vma_link(mm, vma, prev, rb_link, rb_parent); + mark_mm_hugetlb(mm, vma); + mm->map_count++; + validate_mm(mm); +} + +/* * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. */ @@ -417,10 +437,14 @@ static int vma_merge(struct mm_struct *m pgoff, (end - addr) >> PAGE_SHIFT)) return 0; if (end == prev->vm_start) { + if (file) + down(&file->f_mapping->i_shared_sem); spin_lock(lock); prev->vm_start = addr; prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT; spin_unlock(lock); + if (file) + up(&file->f_mapping->i_shared_sem); return 1; } } @@ -1140,6 +1164,7 @@ int split_vma(struct mm_struct * mm, str unsigned long addr, int new_below) { struct vm_area_struct *new; + struct address_space *mapping = NULL; if (mm->map_count >= MAX_MAP_COUNT) return -ENOMEM; @@ -1153,12 +1178,9 @@ int split_vma(struct mm_struct * mm, str INIT_LIST_HEAD(&new->shared); - if (new_below) { + if (new_below) new->vm_end = addr; - vma->vm_start = addr; - vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT); - } else { - vma->vm_end = addr; + else { new->vm_start = addr; new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } @@ -1169,7 +1191,25 @@ int split_vma(struct mm_struct * mm, str if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); - insert_vm_struct(mm, new); + if (vma->vm_file) + mapping = vma->vm_file->f_mapping; + + if (mapping) + down(&mapping->i_shared_sem); + spin_lock(&mm->page_table_lock); + + if (new_below) { + vma->vm_start = addr; + vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT); + } else + vma->vm_end = addr; + + __insert_vm_struct(mm, new); + + spin_unlock(&mm->page_table_lock); + if (mapping) + up(&mapping->i_shared_sem); + return 0; } _