From: Suparna Bhattacharya <suparna@in.ibm.com>

The open coded readahead logic which was added in aio_pread is best avoided
if possible.  Duplicating similar checks across sync and aio paths (e.g. 
checking for O_DIRECT) and the divergence of logic between these paths
isn't good from a long term maintainability standpoint.

Secondly, this logic really belongs in the generic fops methods for
aio_read rather than in the high level aio handlers; it should be possible
for a filesystem to override the logic with its own if suitable.

So, this patch moves the readahead out of aio_pread, and instead modifies
do_generic_mapping_read to readahead _all_ the pages in the range requested
upfront before it starts waiting for any of the pages to become uptodate. 
This leads to sane readahead behaviour for the kind of i/o patterns
generated by streaming aio reads.  It also takes care not to repeatedly
issue readaheads for subsequent AIO retries for the same request.



 fs/aio.c            |   19 +------------------
 include/linux/aio.h |    3 +++
 mm/filemap.c        |   30 +++++++++++++++++++++++-------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff -puN fs/aio.c~aio-readahead-rework fs/aio.c
--- 25/fs/aio.c~aio-readahead-rework	2003-08-30 15:42:34.000000000 -0700
+++ 25-akpm/fs/aio.c	2003-08-30 15:42:34.000000000 -0700
@@ -1363,25 +1363,8 @@ ssize_t aio_setup_iocb(struct kiocb *kio
 			kiocb->ki_left)))
 			break;
 		ret = -EINVAL;
-		if (file->f_op->aio_read) {
-			/*
-			 * Do not do readahead for DIRECT i/o
-			 */
-			if (!(file->f_flags & O_DIRECT)) {
-				struct address_space *mapping;
-				unsigned long index;
-				unsigned long end;
-
-				mapping = file->f_dentry->d_inode->i_mapping;
-				index = kiocb->ki_pos >> PAGE_CACHE_SHIFT;
-				end = (kiocb->ki_pos + kiocb->ki_left) >>
-						PAGE_CACHE_SHIFT;
-				for (; index < end; index++)
-					page_cache_readahead(mapping,
-						&file->f_ra, file, index);
-			}
+		if (file->f_op->aio_read)
 			kiocb->ki_retry = aio_pread;
-		}
 		break;
 	case IOCB_CMD_PWRITE:
 		ret = -EBADF;
diff -puN include/linux/aio.h~aio-readahead-rework include/linux/aio.h
--- 25/include/linux/aio.h~aio-readahead-rework	2003-08-30 15:42:34.000000000 -0700
+++ 25-akpm/include/linux/aio.h	2003-08-30 15:42:34.000000000 -0700
@@ -179,6 +179,9 @@ int FASTCALL(io_submit_one(struct kioctx
 	dump_stack(); \
 	}
 
+#define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait)
+#define is_retried_kiocb(iocb) ((iocb)->ki_retried > 1)
+
 #include <linux/aio_abi.h>
 
 static inline struct kiocb *list_kiocb(struct list_head *h)
diff -puN mm/filemap.c~aio-readahead-rework mm/filemap.c
--- 25/mm/filemap.c~aio-readahead-rework	2003-08-30 15:42:34.000000000 -0700
+++ 25-akpm/mm/filemap.c	2003-08-30 15:42:34.000000000 -0700
@@ -617,21 +617,39 @@ void do_generic_mapping_read(struct addr
 			     read_actor_t actor)
 {
 	struct inode *inode = mapping->host;
-	unsigned long index, offset;
+	unsigned long index, offset, last, end_index;
 	struct page *cached_page;
+	loff_t isize = i_size_read(inode);
 	int error;
 
 	cached_page = NULL;
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 
+	last = (*ppos + desc->count) >> PAGE_CACHE_SHIFT;
+	end_index = isize >> PAGE_CACHE_SHIFT;
+	if (last > end_index)
+		last = end_index;
+
+	/* Don't repeat the readahead if we are executing aio retries */
+	if (in_aio()) {
+		if (is_retried_kiocb(io_wait_to_kiocb(current->io_wait)))
+			goto done_readahead;
+	}
+
+	/*
+	 * Let the readahead logic know upfront about all
+	 * the pages we'll need to satisfy this request
+	 */
+	for (; index < last; index++)
+		page_cache_readahead(mapping, ra, filp, index);
+	index = *ppos >> PAGE_CACHE_SHIFT;
+
+done_readahead:
 	for (;;) {
 		struct page *page;
-		unsigned long end_index, nr, ret;
-		loff_t isize = i_size_read(inode);
+		unsigned long nr, ret;
 
-		end_index = isize >> PAGE_CACHE_SHIFT;
-			
 		if (index > end_index)
 			break;
 		nr = PAGE_CACHE_SIZE;
@@ -642,8 +660,6 @@ void do_generic_mapping_read(struct addr
 		}
 
 		cond_resched();
-		if (is_sync_wait(current->io_wait))
-			page_cache_readahead(mapping, ra, filp, index);
 
 		nr = nr - offset;
 find_page:

_