From: Neil Brown <neilb@cse.unsw.edu.au>

I've made a bunch of changes to the 'md' bits - largely moving the
unplugging into the individual personalities which know more about which
drives are actually in use.


---

 25-akpm/drivers/md/linear.c       |   15 ++++++++++
 25-akpm/drivers/md/md.c           |   35 +++--------------------
 25-akpm/drivers/md/multipath.c    |   23 +++++++++++++++
 25-akpm/drivers/md/raid0.c        |   17 +++++++++++
 25-akpm/drivers/md/raid1.c        |   56 +++++++++++++++++++++++++++++++-------
 25-akpm/drivers/md/raid5.c        |   36 +++++++++++++++++++++---
 25-akpm/drivers/md/raid6main.c    |   36 ++++++++++++++++++++++--
 25-akpm/include/linux/raid/md_k.h |    7 ++--
 8 files changed, 175 insertions(+), 50 deletions(-)

diff -puN drivers/md/linear.c~md-unplug-update drivers/md/linear.c
--- 25/drivers/md/linear.c~md-unplug-update	2004-04-06 19:04:25.157284248 -0700
+++ 25-akpm/drivers/md/linear.c	2004-04-06 19:04:25.172281968 -0700
@@ -80,6 +80,20 @@ static int linear_mergeable_bvec(request
 	return maxsectors << 9;
 }
 
+static void linear_unplug(request_queue_t *q)
+{
+	mddev_t *mddev = q->queuedata;
+	linear_conf_t *conf = mddev_to_conf(mddev);
+	int i;
+
+	for (i=0; i < mddev->raid_disks; i++) {
+		request_queue_t *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
+		if (r_queue->unplug_fn)
+			r_queue->unplug_fn(r_queue);
+	}
+}
+
+
 static int linear_run (mddev_t *mddev)
 {
 	linear_conf_t *conf;
@@ -185,6 +199,7 @@ static int linear_run (mddev_t *mddev)
 		BUG();
 
 	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
+	mddev->queue->unplug_fn = linear_unplug;
 	return 0;
 
 out:
diff -puN drivers/md/md.c~md-unplug-update drivers/md/md.c
--- 25/drivers/md/md.c~md-unplug-update	2004-04-06 19:04:25.159283944 -0700
+++ 25-akpm/drivers/md/md.c	2004-04-06 19:04:25.174281664 -0700
@@ -160,30 +160,6 @@ static int md_fail_request (request_queu
 	return 0;
 }
 
-void md_unplug_mddev(mddev_t *mddev)
-{
-	struct list_head *tmp;
-	mdk_rdev_t *rdev;
-
-	/*
-	 * this list iteration is done without any locking in md?!
-	 */
-	ITERATE_RDEV(mddev, rdev, tmp) {
-		request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
-
-		if (r_queue->unplug_fn)
-			r_queue->unplug_fn(r_queue);
-	}
-}
-EXPORT_SYMBOL(md_unplug_mddev);
-
-static void md_unplug_all(request_queue_t *q)
-{
-	mddev_t *mddev = q->queuedata;
-
-	md_unplug_mddev(mddev);
-}
-
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
 	atomic_inc(&mddev->active);
@@ -1669,7 +1645,6 @@ static int do_md_run(mddev_t * mddev)
 	 */
 	mddev->queue->queuedata = mddev;
 	mddev->queue->make_request_fn = mddev->pers->make_request;
-	mddev->queue->unplug_fn = md_unplug_all;
 
 	mddev->changed = 1;
 	return 0;
@@ -2742,10 +2717,9 @@ int md_thread(void * arg)
 		clear_bit(THREAD_WAKEUP, &thread->flags);
 
 		run = thread->run;
-		if (run) {
+		if (run)
 			run(thread->mddev);
-			md_unplug_mddev(thread->mddev);
-		}
+
 		if (signal_pending(current))
 			flush_signals(current);
 	}
@@ -3313,8 +3287,6 @@ static void md_do_sync(mddev_t *mddev)
 		    test_bit(MD_RECOVERY_ERR, &mddev->recovery))
 			break;
 
-		md_unplug_mddev(mddev);
-
 	repeat:
 		if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) {
 			/* step marks */
@@ -3347,6 +3319,7 @@ static void md_do_sync(mddev_t *mddev)
 		 * about not overloading the IO subsystem. (things like an
 		 * e2fsck being done on the RAID array should execute fast)
 		 */
+		mddev->queue->unplug_fn(mddev->queue);
 		cond_resched();
 
 		currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
@@ -3365,6 +3338,8 @@ static void md_do_sync(mddev_t *mddev)
 	 * this also signals 'finished resyncing' to md_stop
 	 */
  out:
+	mddev->queue->unplug_fn(mddev->queue);
+
 	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
 	/* tell personality that we are finished */
diff -puN drivers/md/multipath.c~md-unplug-update drivers/md/multipath.c
--- 25/drivers/md/multipath.c~md-unplug-update	2004-04-06 19:04:25.160283792 -0700
+++ 25-akpm/drivers/md/multipath.c	2004-04-06 19:04:25.175281512 -0700
@@ -155,6 +155,27 @@ static int multipath_read_balance (multi
 	return 0;
 }
 
+static void unplug_slaves(mddev_t *mddev)
+{
+	multipath_conf_t *conf = mddev_to_conf(mddev);
+	int i;
+
+	for (i=0; i<mddev->raid_disks; i++) {
+		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+		if (rdev && !rdev->faulty) {
+			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+
+			if (r_queue->unplug_fn)
+				r_queue->unplug_fn(r_queue);
+		}
+	}
+}
+static void multipath_unplug(request_queue_t *q)
+{
+	unplug_slaves(q->queuedata);
+}
+
+
 static int multipath_make_request (request_queue_t *q, struct bio * bio)
 {
 	mddev_t *mddev = q->queuedata;
@@ -419,6 +440,8 @@ static int multipath_run (mddev_t *mddev
 	}
 	memset(conf->multipaths, 0, sizeof(struct multipath_info)*mddev->raid_disks);
 
+	mddev->queue->unplug_fn = multipath_unplug;
+
 	conf->working_disks = 0;
 	ITERATE_RDEV(mddev,rdev,tmp) {
 		disk_idx = rdev->raid_disk;
diff -puN drivers/md/raid0.c~md-unplug-update drivers/md/raid0.c
--- 25/drivers/md/raid0.c~md-unplug-update	2004-04-06 19:04:25.162283488 -0700
+++ 25-akpm/drivers/md/raid0.c	2004-04-06 19:04:25.176281360 -0700
@@ -25,6 +25,21 @@
 #define MD_DRIVER
 #define MD_PERSONALITY
 
+static void raid0_unplug(request_queue_t *q)
+{
+	mddev_t *mddev = q->queuedata;
+	raid0_conf_t *conf = mddev_to_conf(mddev);
+	mdk_rdev_t **devlist = conf->strip_zone[0].dev;
+	int i;
+
+	for (i=0; i<mddev->raid_disks; i++) {
+		request_queue_t *r_queue = bdev_get_queue(devlist[i]->bdev);
+
+		if (r_queue->unplug_fn)
+			r_queue->unplug_fn(r_queue);
+	}
+}
+
 static int create_strip_zones (mddev_t *mddev)
 {
 	int i, c, j;
@@ -202,6 +217,8 @@ static int create_strip_zones (mddev_t *
 			conf->hash_spacing = sz;
 	}
 
+	mddev->queue->unplug_fn = raid0_unplug;
+
 	printk("raid0: done.\n");
 	return 0;
  abort:
diff -puN drivers/md/raid1.c~md-unplug-update drivers/md/raid1.c
--- 25/drivers/md/raid1.c~md-unplug-update	2004-04-06 19:04:25.164283184 -0700
+++ 25-akpm/drivers/md/raid1.c	2004-04-06 19:04:25.178281056 -0700
@@ -37,6 +37,9 @@ static mdk_personality_t raid1_personali
 static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
 static LIST_HEAD(retry_list_head);
 
+static void unplug_slaves(mddev_t *mddev);
+
+
 static void * r1bio_pool_alloc(int gfp_flags, void *data)
 {
 	mddev_t *mddev = data;
@@ -47,6 +50,8 @@ static void * r1bio_pool_alloc(int gfp_f
 			 gfp_flags);
 	if (r1_bio)
 		memset(r1_bio, 0, sizeof(*r1_bio) + sizeof(struct bio*)*mddev->raid_disks);
+	else
+		unplug_slaves(mddev);
 
 	return r1_bio;
 }
@@ -71,8 +76,10 @@ static void * r1buf_pool_alloc(int gfp_f
 	int i, j;
 
 	r1_bio = r1bio_pool_alloc(gfp_flags, conf->mddev);
-	if (!r1_bio)
+	if (!r1_bio) {
+		unplug_slaves(conf->mddev);
 		return NULL;
+	}
 
 	/*
 	 * Allocate bios : 1 for reading, n-1 for writing
@@ -443,6 +450,29 @@ rb_out:
 	return new_disk;
 }
 
+static void unplug_slaves(mddev_t *mddev)
+{
+	conf_t *conf = mddev_to_conf(mddev);
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&conf->device_lock, flags);
+	for (i=0; i<mddev->raid_disks; i++) {
+		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+		if (rdev && !rdev->faulty) {
+			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+
+			if (r_queue->unplug_fn)
+				r_queue->unplug_fn(r_queue);
+		}
+	}
+	spin_unlock_irqrestore(&conf->device_lock, flags);
+}
+static void raid1_unplug(request_queue_t *q)
+{
+	unplug_slaves(q->queuedata);
+}
+
 /*
  * Throttle resync depth, so that we can both get proper overlapping of
  * requests, but are still able to handle normal requests quickly.
@@ -451,16 +481,18 @@ rb_out:
 
 static void device_barrier(conf_t *conf, sector_t sect)
 {
-	md_unplug_mddev(conf->mddev);
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock);
+	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
+			    conf->resync_lock, unplug_slaves(conf->mddev));
 	
 	if (!conf->barrier++) {
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, conf->resync_lock);
+		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
+				    conf->resync_lock, unplug_slaves(conf->mddev));
 		if (conf->nr_pending)
 			BUG();
 	}
-	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, conf->resync_lock);
+	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
+			    conf->resync_lock, unplug_slaves(conf->mddev));
 	conf->next_resync = sect;
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -479,9 +511,8 @@ static int make_request(request_queue_t 
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
 	 */
-	md_unplug_mddev(conf->mddev);
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
+	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
 	conf->nr_pending++;
 	spin_unlock_irq(&conf->resync_lock);
 
@@ -646,9 +677,9 @@ static void print_conf(conf_t *conf)
 
 static void close_sync(conf_t *conf)
 {
-	md_unplug_mddev(conf->mddev);
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
+	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
+			    conf->resync_lock, 	unplug_slaves(conf->mddev));
 	spin_unlock_irq(&conf->resync_lock);
 
 	if (conf->barrier) BUG();
@@ -862,6 +893,7 @@ static void raid1d(mddev_t *mddev)
 	struct bio *bio;
 	unsigned long flags;
 	conf_t *conf = mddev_to_conf(mddev);
+	int unplug=0;
 	mdk_rdev_t *rdev;
 
 	md_check_recovery(mddev);
@@ -881,6 +913,7 @@ static void raid1d(mddev_t *mddev)
 		bio = r1_bio->master_bio;
 		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
 			sync_request_write(mddev, r1_bio);
+			unplug = 1;
 		} else {
 			if (map(mddev, &rdev) == -1) {
 				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
@@ -896,12 +929,14 @@ static void raid1d(mddev_t *mddev)
 				bio->bi_bdev = rdev->bdev;
 				bio->bi_sector = r1_bio->sector + rdev->data_offset;
 				bio->bi_rw = READ;
-
+				unplug = 1;
 				generic_make_request(bio);
 			}
 		}
 	}
 	spin_unlock_irqrestore(&retry_list_lock, flags);
+	if (unplug)
+		unplug_slaves(mddev);
 }
 
 
@@ -1104,6 +1139,7 @@ static int run(mddev_t *mddev)
 			mdname(mddev));
 		goto out_free_conf;
 	}
+	mddev->queue->unplug_fn = raid1_unplug;
 
 
 	ITERATE_RDEV(mddev, rdev, tmp) {
diff -puN drivers/md/raid5.c~md-unplug-update drivers/md/raid5.c
--- 25/drivers/md/raid5.c~md-unplug-update	2004-04-06 19:04:25.166282880 -0700
+++ 25-akpm/drivers/md/raid5.c	2004-04-06 19:04:25.179280904 -0700
@@ -231,6 +231,8 @@ static struct stripe_head *__find_stripe
 	return NULL;
 }
 
+static void unplug_slaves(mddev_t *mddev);
+
 static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector,
 					     int pd_idx, int noblock) 
 {
@@ -249,12 +251,13 @@ static struct stripe_head *get_active_st
 				break;
 			if (!sh) {
 				conf->inactive_blocked = 1;
-				md_unplug_mddev(conf->mddev);
 				wait_event_lock_irq(conf->wait_for_stripe,
 						    !list_empty(&conf->inactive_list) &&
 						    (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4)
 						     || !conf->inactive_blocked),
-						    conf->device_lock);
+						    conf->device_lock,
+						    unplug_slaves(conf->mddev);
+					);
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, pd_idx);
@@ -1293,6 +1296,25 @@ static inline void raid5_activate_delaye
 		}
 	}
 }
+
+static void unplug_slaves(mddev_t *mddev)
+{
+	raid5_conf_t *conf = mddev_to_conf(mddev);
+	int i;
+
+	for (i=0; i<mddev->raid_disks; i++) {
+		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		if (rdev && !rdev->faulty) {
+			struct block_device *bdev = rdev->bdev;
+			if (bdev) {
+				request_queue_t *r_queue = bdev_get_queue(bdev);
+				if (r_queue && r_queue->unplug_fn)
+					r_queue->unplug_fn(r_queue);
+			}
+		}
+	}
+}
+
 static void raid5_unplug_device(request_queue_t *q)
 {
 	mddev_t *mddev = q->queuedata;
@@ -1306,6 +1328,8 @@ static void raid5_unplug_device(request_
 	md_wakeup_thread(mddev->thread);
 
 	spin_unlock_irqrestore(&conf->device_lock, flags);
+
+	unplug_slaves(mddev);
 }
 
 static inline void raid5_plug_device(raid5_conf_t *conf)
@@ -1392,9 +1416,11 @@ static int sync_request (mddev_t *mddev,
 	int raid_disks = conf->raid_disks;
 	int data_disks = raid_disks-1;
 
-	if (sector_nr >= mddev->size <<1)
-		/* just being told to finish up .. nothing to do */
+	if (sector_nr >= mddev->size <<1) {
+		/* just being told to finish up .. nothing much to do */
+		unplug_slaves(mddev);
 		return 0;
+	}
 
 	x = sector_nr;
 	chunk_offset = sector_div(x, sectors_per_chunk);
@@ -1474,6 +1500,8 @@ static void raid5d (mddev_t *mddev)
 
 	spin_unlock_irq(&conf->device_lock);
 
+	unplug_slaves(mddev);
+
 	PRINTK("--- raid5d inactive\n");
 }
 
diff -puN drivers/md/raid6main.c~md-unplug-update drivers/md/raid6main.c
--- 25/drivers/md/raid6main.c~md-unplug-update	2004-04-06 19:04:25.167282728 -0700
+++ 25-akpm/drivers/md/raid6main.c	2004-04-06 19:04:25.181280600 -0700
@@ -250,6 +250,8 @@ static struct stripe_head *__find_stripe
 	return NULL;
 }
 
+static void unplug_slaves(mddev_t *mddev);
+
 static struct stripe_head *get_active_stripe(raid6_conf_t *conf, sector_t sector,
 					     int pd_idx, int noblock)
 {
@@ -272,7 +274,9 @@ static struct stripe_head *get_active_st
 						    !list_empty(&conf->inactive_list) &&
 						    (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4)
 						     || !conf->inactive_blocked),
-						    conf->device_lock);
+						    conf->device_lock,
+						    unplug_slaves(conf->mddev);
+					);
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, pd_idx);
@@ -1454,6 +1458,26 @@ static inline void raid6_activate_delaye
 		}
 	}
 }
+
+static void unplug_slaves(mddev_t *mddev)
+{
+	/* note: this is always called with device_lock held */
+	raid6_conf_t *conf = mddev_to_conf(mddev);
+	int i;
+
+	for (i=0; i<mddev->raid_disks; i++) {
+		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		if (rdev && !rdev->faulty) {
+			struct block_device *bdev = rdev->bdev;
+			if (bdev) {
+				request_queue_t *r_queue = bdev_get_queue(bdev);
+				if (r_queue && r_queue->unplug_fn)
+					r_queue->unplug_fn(r_queue);
+			}
+		}
+	}
+}
+
 static void raid6_unplug_device(request_queue_t *q)
 {
 	mddev_t *mddev = q->queuedata;
@@ -1467,6 +1491,8 @@ static void raid6_unplug_device(request_
 	md_wakeup_thread(mddev->thread);
 
 	spin_unlock_irqrestore(&conf->device_lock, flags);
+
+	unplug_slaves(mddev);
 }
 
 static inline void raid6_plug_device(raid6_conf_t *conf)
@@ -1553,9 +1579,11 @@ static int sync_request (mddev_t *mddev,
 	int raid_disks = conf->raid_disks;
 	int data_disks = raid_disks - 2;
 
-	if (sector_nr >= mddev->size <<1)
-		/* just being told to finish up .. nothing to do */
+	if (sector_nr >= mddev->size <<1) {
+		/* just being told to finish up .. nothing much to do */
+		unplug_slaves(mddev);
 		return 0;
+	}
 
 	x = sector_nr;
 	chunk_offset = sector_div(x, sectors_per_chunk);
@@ -1635,6 +1663,8 @@ static void raid6d (mddev_t *mddev)
 
 	spin_unlock_irq(&conf->device_lock);
 
+	unplug_slaves(mddev);
+
 	PRINTK("--- raid6d inactive\n");
 }
 
diff -puN include/linux/raid/md_k.h~md-unplug-update include/linux/raid/md_k.h
--- 25/include/linux/raid/md_k.h~md-unplug-update	2004-04-06 19:04:25.168282576 -0700
+++ 25-akpm/include/linux/raid/md_k.h	2004-04-06 19:04:25.182280448 -0700
@@ -315,7 +315,7 @@ typedef struct mdk_thread_s {
 
 #define THREAD_WAKEUP  0
 
-#define __wait_event_lock_irq(wq, condition, lock) 			\
+#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
 do {									\
 	wait_queue_t __wait;						\
 	init_waitqueue_entry(&__wait, current);				\
@@ -326,6 +326,7 @@ do {									\
 		if (condition)						\
 			break;						\
 		spin_unlock_irq(&lock);					\
+		cmd;							\
 		schedule();						\
 		spin_lock_irq(&lock);					\
 	}								\
@@ -333,11 +334,11 @@ do {									\
 	remove_wait_queue(&wq, &__wait);				\
 } while (0)
 
-#define wait_event_lock_irq(wq, condition, lock) 			\
+#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
 do {									\
 	if (condition)	 						\
 		break;							\
-	__wait_event_lock_irq(wq, condition, lock);			\
+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
 } while (0)
 
 #endif

_