The radix-tree walk for writeback has a couple of problems:

a) It always scans a file from its first dirty page, so if someone
   is repeatedly dirtying the front part of a file, pages near the end
   may be starved of writeout.  (Well, not completely: the `kupdate'
   function will write an entire file once the file's dirty timestamp
   has expired).  

b) When the disk queues are huge (10000 requests), there can be a
   very large number of locked pages.  Scanning past these in writeback
   consumes quite some CPU time.

So in each address_space we record the index at which the last batch of
writeout terminated and start the next batch of writeback from that
point.



---

 25-akpm/fs/mpage.c         |   20 +++++++++++++++++++-
 25-akpm/include/linux/fs.h |    1 +
 2 files changed, 20 insertions(+), 1 deletion(-)

diff -puN fs/mpage.c~writeback-search-start fs/mpage.c
--- 25/fs/mpage.c~writeback-search-start	2004-03-23 23:29:02.868978784 -0800
+++ 25-akpm/fs/mpage.c	2004-03-23 23:30:28.099021848 -0800
@@ -610,6 +610,7 @@ mpage_writepages(struct address_space *m
 	struct pagevec pvec;
 	int nr_pages;
 	pgoff_t index;
+	int scanned = 0;
 
 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
 		wbc->encountered_congestion = 1;
@@ -621,11 +622,18 @@ mpage_writepages(struct address_space *m
 		writepage = mapping->a_ops->writepage;
 
 	pagevec_init(&pvec, 0);
-	index = 0;
+	if (wbc->sync_mode == WB_SYNC_NONE) {
+		index = mapping->writeback_index; /* Start from prev offset */
+	} else {
+		index = 0;			  /* whole-file sweep */
+		scanned = 1;
+	}
+retry:
 	while (!done && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 					PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE))) {
 		unsigned i;
 
+		scanned = 1;
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
 
@@ -672,6 +680,16 @@ mpage_writepages(struct address_space *m
 		}
 		pagevec_release(&pvec);
 	}
+	if (!scanned && !done) {
+		/*
+		 * We hit the last page and there is more work to be done: wrap
+		 * back to the start of the file
+		 */
+		scanned = 1;
+		index = 0;
+		goto retry;
+	}
+	mapping->writeback_index = index;
 	if (bio)
 		mpage_bio_submit(WRITE, bio);
 	return ret;
diff -puN include/linux/fs.h~writeback-search-start include/linux/fs.h
--- 25/include/linux/fs.h~writeback-search-start	2004-03-23 23:29:02.870978480 -0800
+++ 25-akpm/include/linux/fs.h	2004-03-23 23:29:02.873978024 -0800
@@ -327,6 +327,7 @@ struct address_space {
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	spinlock_t		tree_lock;	/* and spinlock protecting it */
 	unsigned long		nrpages;	/* number of total pages */
+	pgoff_t			writeback_index;/* writeback starts here */
 	struct address_space_operations *a_ops;	/* methods */
 	struct list_head	i_mmap;		/* list of private mappings */
 	struct list_head	i_mmap_shared;	/* list of shared mappings */

_