Instead, use a radix-tree walk of the pages which are tagged as being under writeback. The new function wait_on_page_writeback_range() was generalised out of filemap_fdatawait(). We can later use this to provide concurrent fsync of just a section of a file. DESC stop-using-locked-pages fix EDESC From: Chris Mason [ data not getting flushed ] Ummm, this might help: DESC wait_on_page_writeback_range fix EDESC wait_on_page_writeback_range() does a pagevec_lookup_tag on min(end - index + 1, (pgoff_t)PAGEVEC_SIZE) Which translates to: (unsigned long)-1 - 0 + 1, which is 0. --- 25-akpm/fs/inode.c | 1 25-akpm/include/linux/fs.h | 1 25-akpm/include/linux/mm.h | 2 - 25-akpm/mm/filemap.c | 74 ++++++++++++++++++++++----------------------- 25-akpm/mm/swap_state.c | 1 25-akpm/mm/truncate.c | 1 mm/page-writeback.c | 0 mm/vmscan.c | 0 8 files changed, 38 insertions(+), 42 deletions(-) diff -puN fs/inode.c~stop-using-locked-pages fs/inode.c --- 25/fs/inode.c~stop-using-locked-pages 2004-03-26 12:36:39.097155952 -0800 +++ 25-akpm/fs/inode.c 2004-03-26 12:36:39.110153976 -0800 @@ -177,7 +177,6 @@ void inode_init_once(struct inode *inode memset(inode, 0, sizeof(*inode)); INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_data.clean_pages); - INIT_LIST_HEAD(&inode->i_data.locked_pages); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); diff -puN mm/filemap.c~stop-using-locked-pages mm/filemap.c --- 25/mm/filemap.c~stop-using-locked-pages 2004-03-26 12:36:39.099155648 -0800 +++ 25-akpm/mm/filemap.c 2004-03-26 12:36:39.112153672 -0800 @@ -156,7 +156,6 @@ int filemap_fdatawrite(struct address_sp { return __filemap_fdatawrite(mapping, WB_SYNC_ALL); } - EXPORT_SYMBOL(filemap_fdatawrite); /* @@ -167,51 +166,40 @@ int filemap_flush(struct address_space * { return __filemap_fdatawrite(mapping, WB_SYNC_NONE); } - EXPORT_SYMBOL(filemap_flush); -/** - * filemap_fdatawait - walk the list of locked pages of the given address - * space and wait for all of them. - * @mapping: address space structure to wait for +/* + * Wait for writeback to complete against pages indexed by start->end + * inclusive */ -int filemap_fdatawait(struct address_space * mapping) +static int wait_on_page_writeback_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) { + struct pagevec pvec; + int nr_pages; int ret = 0; - int progress; - -restart: - progress = 0; - spin_lock_irq(&mapping->tree_lock); - while (!list_empty(&mapping->locked_pages)) { - struct page *page; + pgoff_t index; - page = list_entry(mapping->locked_pages.next,struct page,list); - list_del_init(&page->list); + if (end < start) + return 0; - if (!PageWriteback(page)) { - if (++progress > 32) { - if (need_resched()) { - spin_unlock_irq(&mapping->tree_lock); - __cond_resched(); - goto restart; - } - } - continue; + pagevec_init(&pvec, 0); + index = start; + while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_WRITEBACK, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + wait_on_page_writeback(page); + if (PageError(page)) + ret = -EIO; } - - progress = 0; - page_cache_get(page); - spin_unlock_irq(&mapping->tree_lock); - - wait_on_page_writeback(page); - if (PageError(page)) - ret = -EIO; - - page_cache_release(page); - spin_lock_irq(&mapping->tree_lock); + pagevec_release(&pvec); + cond_resched(); } - spin_unlock_irq(&mapping->tree_lock); /* Check for outstanding write errors */ if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) @@ -221,6 +209,18 @@ restart: return ret; } + +/** + * filemap_fdatawait - walk the list of under-writeback pages of the given + * address space and wait for all of them. + * + * @mapping: address space structure to wait for + */ +int filemap_fdatawait(struct address_space *mapping) +{ + return wait_on_page_writeback_range(mapping, 0, -1); +} + EXPORT_SYMBOL(filemap_fdatawait); int filemap_write_and_wait(struct address_space *mapping) diff -puN mm/page-writeback.c~stop-using-locked-pages mm/page-writeback.c diff -puN mm/swap_state.c~stop-using-locked-pages mm/swap_state.c --- 25/mm/swap_state.c~stop-using-locked-pages 2004-03-26 12:36:39.102155192 -0800 +++ 25-akpm/mm/swap_state.c 2004-03-26 12:36:39.112153672 -0800 @@ -28,7 +28,6 @@ struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC), .tree_lock = SPIN_LOCK_UNLOCKED, .clean_pages = LIST_HEAD_INIT(swapper_space.clean_pages), - .locked_pages = LIST_HEAD_INIT(swapper_space.locked_pages), .a_ops = &swap_aops, .backing_dev_info = &swap_backing_dev_info, .i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap), diff -puN mm/truncate.c~stop-using-locked-pages mm/truncate.c --- 25/mm/truncate.c~stop-using-locked-pages 2004-03-26 12:36:39.103155040 -0800 +++ 25-akpm/mm/truncate.c 2004-03-26 12:36:39.113153520 -0800 @@ -178,7 +178,6 @@ void truncate_inode_pages(struct address if (lstart == 0) { WARN_ON(mapping->nrpages); WARN_ON(!list_empty(&mapping->clean_pages)); - WARN_ON(!list_empty(&mapping->locked_pages)); } } diff -puN mm/vmscan.c~stop-using-locked-pages mm/vmscan.c diff -puN include/linux/fs.h~stop-using-locked-pages include/linux/fs.h --- 25/include/linux/fs.h~stop-using-locked-pages 2004-03-26 12:36:39.106154584 -0800 +++ 25-akpm/include/linux/fs.h 2004-03-26 12:36:39.114153368 -0800 @@ -327,7 +327,6 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and spinlock protecting it */ struct list_head clean_pages; /* list of clean pages */ - struct list_head locked_pages; /* list of locked pages */ unsigned long nrpages; /* number of total pages */ struct address_space_operations *a_ops; /* methods */ struct list_head i_mmap; /* list of private mappings */ diff -puN include/linux/mm.h~stop-using-locked-pages include/linux/mm.h --- 25/include/linux/mm.h~stop-using-locked-pages 2004-03-26 12:36:39.108154280 -0800 +++ 25-akpm/include/linux/mm.h 2004-03-26 12:36:39.115153216 -0800 @@ -182,7 +182,7 @@ struct page { atomic_t count; /* Usage count, see below. */ struct list_head list; /* ->mapping has some page lists. */ struct address_space *mapping; /* The inode (or ...) we belong to. */ - unsigned long index; /* Our offset within mapping. */ + pgoff_t index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list; protected by zone->lru_lock !! */ union { _