From: Jens Axboe <axboe@suse.de>


Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/drivers/block/ll_rw_blk.c |   14 ++++--
 25-akpm/drivers/ide/ide-disk.c    |   31 ++++++++++-----
 25-akpm/drivers/ide/ide-io.c      |   78 ++++++++++++++++++++++++--------------
 25-akpm/include/linux/blkdev.h    |    8 +++
 25-akpm/include/linux/ide.h       |    7 +++
 5 files changed, 98 insertions(+), 40 deletions(-)

diff -puN drivers/block/ll_rw_blk.c~barrier-update drivers/block/ll_rw_blk.c
--- 25/drivers/block/ll_rw_blk.c~barrier-update	2004-06-07 00:02:23.048267904 -0700
+++ 25-akpm/drivers/block/ll_rw_blk.c	2004-06-07 00:02:23.060266080 -0700
@@ -2700,10 +2700,17 @@ void blk_recalc_rq_sectors(struct reques
 static int __end_that_request_first(struct request *req, int uptodate,
 				    int nr_bytes)
 {
-	int total_bytes, bio_nbytes, error = 0, next_idx = 0;
+	int total_bytes, bio_nbytes, error, next_idx = 0;
 	struct bio *bio;
 
 	/*
+	 * extend uptodate bool to allow < 0 value to be direct io error
+	 */
+	error = 0;
+	if (end_io_error(uptodate))
+		error = !uptodate ? -EIO : uptodate;
+
+	/*
 	 * for a REQ_BLOCK_PC request, we want to carry any eventual
 	 * sense key with us all the way through
 	 */
@@ -2711,7 +2718,6 @@ static int __end_that_request_first(stru
 		req->errors = 0;
 
 	if (!uptodate) {
-		error = -EIO;
 		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
@@ -2794,7 +2800,7 @@ static int __end_that_request_first(stru
 /**
  * end_that_request_first - end I/O on a request
  * @req:      the request being processed
- * @uptodate: 0 for I/O error
+ * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
  * @nr_sectors: number of sectors to end I/O on
  *
  * Description:
@@ -2815,7 +2821,7 @@ EXPORT_SYMBOL(end_that_request_first);
 /**
  * end_that_request_chunk - end I/O on a request
  * @req:      the request being processed
- * @uptodate: 0 for I/O error
+ * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
diff -puN drivers/ide/ide-disk.c~barrier-update drivers/ide/ide-disk.c
--- 25/drivers/ide/ide-disk.c~barrier-update	2004-06-07 00:02:23.050267600 -0700
+++ 25-akpm/drivers/ide/ide-disk.c	2004-06-07 00:02:23.062265776 -0700
@@ -1266,13 +1266,6 @@ static int set_nowerr(ide_drive_t *drive
 	return 0;
 }
 
-/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */
-#define ide_id_has_flush_cache(id)	((id)->cfs_enable_2 & 0x3000)
-
-/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */
-#define ide_id_has_flush_cache_ext(id)	\
-	(((id)->cfs_enable_2 & 0x2400) == 0x2400)
-
 static int write_cache (ide_drive_t *drive, int arg)
 {
 	ide_task_t args;
@@ -1440,6 +1433,7 @@ static void idedisk_setup (ide_drive_t *
 {
 	struct hd_driveid *id = drive->id;
 	unsigned long long capacity;
+	int barrier;
 
 	idedisk_add_settings(drive);
 
@@ -1572,8 +1566,27 @@ static void idedisk_setup (ide_drive_t *
 
 	write_cache(drive, 1);
 
-	blk_queue_ordered(drive->queue, 1);
-	blk_queue_issue_flush_fn(drive->queue, idedisk_issue_flush);
+	/*
+	 * decide if we can sanely support flushes and barriers on
+	 * this drive. unfortunately not all drives advertise FLUSH_CACHE
+	 * support even if they support it. So assume FLUSH_CACHE is there
+	 * always. LBA48 drives are newer, so expect it to flag support
+	 * properly. We can safely support FLUSH_CACHE on lba48, if capacity
+	 * doesn't exceed lba28
+	 */
+	barrier = 1;
+	if (drive->addressing == 1) {
+		barrier = ide_id_has_flush_cache(id);
+		if (capacity > (1ULL << 28) && !ide_id_has_flush_cache_ext(id))
+			barrier = 0;
+	}
+
+	printk("%s: cache flushes %ssupported\n",
+		drive->name, barrier ? "" : "not ");
+	if (barrier) {
+		blk_queue_ordered(drive->queue, 1);
+		blk_queue_issue_flush_fn(drive->queue, idedisk_issue_flush);
+	}
 }
 
 static void ide_cacheflush_p(ide_drive_t *drive)
diff -puN drivers/ide/ide-io.c~barrier-update drivers/ide/ide-io.c
--- 25/drivers/ide/ide-io.c~barrier-update	2004-06-07 00:02:23.052267296 -0700
+++ 25-akpm/drivers/ide/ide-io.c	2004-06-07 00:02:23.064265472 -0700
@@ -67,7 +67,7 @@ static void ide_fill_flush_cmd(ide_drive
 	rq->buffer = buf;
 	rq->buffer[0] = WIN_FLUSH_CACHE;
 
-	if (drive->id->cfs_enable_2 & 0x2400)
+	if (ide_id_has_flush_cache_ext(drive->id))
 		rq->buffer[0] = WIN_FLUSH_CACHE_EXT;
 }
 
@@ -115,10 +115,10 @@ static int __ide_end_request(ide_drive_t
 	 * if failfast is set on a request, override number of sectors and
 	 * complete the whole request right now
 	 */
-	if (blk_noretry_request(rq) && !uptodate)
+	if (blk_noretry_request(rq) && end_io_error(uptodate))
 		nr_sectors = rq->hard_nr_sectors;
 
-	if (!blk_fs_request(rq) && !uptodate && !rq->errors)
+	if (!blk_fs_request(rq) && end_io_error(uptodate) && !rq->errors)
 		rq->errors = -EIO;
 
 	/*
@@ -227,7 +227,7 @@ u64 ide_get_error_location(ide_drive_t *
 	lcyl = args[4];
 	sect = args[3];
 
-	if (drive->id->cfs_enable_2 & 0x2400) {
+	if (ide_id_has_flush_cache_ext(drive->id)) {
 		low = (hcyl << 16) | (lcyl << 8) | sect;
 		HWIF(drive)->OUTB(drive->ctl|0x80, IDE_CONTROL_REG);
 		high = ide_read_24(drive);
@@ -275,32 +275,49 @@ static void ide_complete_barrier(ide_dri
 	}
 
 	/*
-	 * bummer, flush failed. if it was the pre-flush, fail the barrier.
-	 * if it was the post-flush, complete the succesful part of the request
-	 * and fail the rest
+	 * we need to end real_rq, but it's not on the queue currently.
+	 * put it back on the queue, so we don't have to special case
+	 * anything else for completing it
 	 */
-	good_sectors = 0;
-	if (blk_barrier_postflush(rq)) {
-		sector = ide_get_error_location(drive, rq->buffer);
-
-		if ((sector >= real_rq->hard_sector) &&
-		    (sector < real_rq->hard_sector + real_rq->hard_nr_sectors))
-			good_sectors = sector - real_rq->hard_sector;
-	} else
-		sector = real_rq->hard_sector;
+	if (!blk_barrier_postflush(rq))
+		elv_requeue_request(drive->queue, real_rq);
 
-	bad_sectors = real_rq->hard_nr_sectors - good_sectors;
-	if (good_sectors)
-		__ide_end_request(drive, real_rq, 1, good_sectors);
-	if (bad_sectors)
-		__ide_end_request(drive, real_rq, 0, bad_sectors);
+	/*
+	 * drive aborted flush command, assume FLUSH_CACHE_* doesn't
+	 * work and disable barrier support
+	 */
+	if (error & ABRT_ERR) {
+		printk(KERN_ERR "%s: barrier support doesn't work\n", drive->name);
+		__ide_end_request(drive, real_rq, -EOPNOTSUPP, real_rq->hard_nr_sectors);
+		blk_queue_ordered(drive->queue, 0);
+		blk_queue_issue_flush_fn(drive->queue, NULL);
+	} else {
+		/*
+		 * find out what part of the request failed
+		 */
+		good_sectors = 0;
+		if (blk_barrier_postflush(rq)) {
+			sector = ide_get_error_location(drive, rq->buffer);
 
-	drive->doing_barrier = 0;
+			if ((sector >= real_rq->hard_sector) &&
+			    (sector < real_rq->hard_sector + real_rq->hard_nr_sectors))
+				good_sectors = sector - real_rq->hard_sector;
+		} else
+			sector = real_rq->hard_sector;
+
+		bad_sectors = real_rq->hard_nr_sectors - good_sectors;
+		if (good_sectors)
+			__ide_end_request(drive, real_rq, 1, good_sectors);
+		if (bad_sectors)
+			__ide_end_request(drive, real_rq, 0, bad_sectors);
+
+		printk(KERN_ERR "%s: failed barrier write: "
+				"sector=%Lx(good=%d/bad=%d)\n",
+				drive->name, (unsigned long long)sector,
+				good_sectors, bad_sectors);
+	}
 
-	printk(KERN_ERR "%s: failed barrier write: "
-			"sector=%Lx(good=%d/bad=%d)\n",
-			drive->name, (unsigned long long)sector,
-			good_sectors, bad_sectors);
+	drive->doing_barrier = 0;
 }
 
 /**
@@ -888,8 +905,15 @@ repeat:	
 	 * though that is 3 requests, it must be seen as a single transaction.
 	 * we must not preempt this drive until that is complete
 	 */
-	if (drive->doing_barrier)
+	if (drive->doing_barrier) {
+		/*
+		 * small race where queue could get replugged during
+		 * the 3-request flush cycle, just yank the plug since
+		 * we want it to finish asap
+		 */
+		blk_remove_plug(drive->queue);
 		return drive;
+	}
 
 	do {
 		if ((!drive->sleep || time_after_eq(jiffies, drive->sleep))
diff -puN include/linux/blkdev.h~barrier-update include/linux/blkdev.h
--- 25/include/linux/blkdev.h~barrier-update	2004-06-07 00:02:23.054266992 -0700
+++ 25-akpm/include/linux/blkdev.h	2004-06-07 00:02:23.065265320 -0700
@@ -571,6 +571,14 @@ extern void end_that_request_last(struct
 extern int process_that_request_first(struct request *, unsigned int);
 extern void end_request(struct request *req, int uptodate);
 
+/*
+ * end_that_request_first/chunk() takes an uptodate argument. we account
+ * any value <= as an io error. 0 means -EIO for compatability reasons,
+ * any other < 0 value is the direct error type. An uptodate value of
+ * 1 indicates successful io completion
+ */
+#define end_io_error(uptodate)	(unlikely((uptodate) <= 0))
+
 static inline void blkdev_dequeue_request(struct request *req)
 {
 	BUG_ON(list_empty(&req->queuelist));
diff -puN include/linux/ide.h~barrier-update include/linux/ide.h
--- 25/include/linux/ide.h~barrier-update	2004-06-07 00:02:23.055266840 -0700
+++ 25-akpm/include/linux/ide.h	2004-06-07 00:02:23.066265168 -0700
@@ -1697,4 +1697,11 @@ extern struct semaphore ide_cfg_sem;
 
 extern struct bus_type ide_bus_type;
 
+/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */
+#define ide_id_has_flush_cache(id)	((id)->cfs_enable_2 & 0x3000)
+
+/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */
+#define ide_id_has_flush_cache_ext(id)	\
+	(((id)->cfs_enable_2 & 0x2400) == 0x2400)
+
 #endif /* _IDE_H */
_