From: Alasdair G Kergon <agk@redhat.com>

Each multipath instance can use a Hardware Handler with hooks for the
particular hardware you're using.

This patch provides the hw_handler infrastructure.

So far 3 hooks are available:

  A status function invoked by device-mapper table and status requests.

  An initialisation function called when a Priority Group is selected for
  use but before any I/O is sent to it.  This function should return straight
  away, and I/O is queued until dm_pg_init_complete() is called indicating
  whether or not the initialisation was successful.  The are three error
  flags, any or all of which may be set: MP_FAIL_PATH, MP_BYPASS_PG,
  MP_ERROR_IO.

  An error handler which gets the opportunity to decode any error that a bio
  generated.  Patches are pending to make scsi error details available for
  dm_scsi_err_handler() to decode.

Signed-Off-By: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/drivers/md/Makefile        |    2 
 25-akpm/drivers/md/dm-hw-handler.c |  216 +++++++++++++++++++++++++++++++++++++
 25-akpm/drivers/md/dm-hw-handler.h |   68 +++++++++++
 25-akpm/drivers/md/dm-mpath.c      |  122 ++++++++++++++++++++
 25-akpm/drivers/md/dm-mpath.h      |    4 
 5 files changed, 408 insertions(+), 4 deletions(-)

diff -puN /dev/null drivers/md/dm-hw-handler.c
--- /dev/null	2003-09-15 06:40:47.000000000 -0700
+++ 25-akpm/drivers/md/dm-hw-handler.c	2005-02-22 18:19:25.000000000 -0800
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ *
+ * Multipath hardware handler registration.
+ */
+
+#include "dm.h"
+#include "dm-hw-handler.h"
+
+#include <linux/slab.h>
+
+struct hwh_internal {
+	struct hw_handler_type hwht;
+
+	struct list_head list;
+	long use;
+};
+
+#define hwht_to_hwhi(__hwht) container_of((__hwht), struct hwh_internal, hwht)
+
+static LIST_HEAD(_hw_handlers);
+static DECLARE_RWSEM(_hwh_lock);
+
+struct hwh_internal *__find_hw_handler_type(const char *name)
+{
+	struct hwh_internal *hwhi;
+
+	list_for_each_entry(hwhi, &_hw_handlers, list) {
+		if (!strcmp(name, hwhi->hwht.name))
+			return hwhi;
+	}
+
+	return NULL;
+}
+
+static struct hwh_internal *get_hw_handler(const char *name)
+{
+	struct hwh_internal *hwhi;
+
+	down_read(&_hwh_lock);
+	hwhi = __find_hw_handler_type(name);
+	if (hwhi) {
+		if ((hwhi->use == 0) && !try_module_get(hwhi->hwht.module))
+			hwhi = NULL;
+		else
+			hwhi->use++;
+	}
+	up_read(&_hwh_lock);
+
+	return hwhi;
+}
+
+struct hw_handler_type *dm_get_hw_handler(const char *name)
+{
+	struct hwh_internal *hwhi;
+
+	if (!name)
+		return NULL;
+
+	hwhi = get_hw_handler(name);
+	if (!hwhi) {
+		request_module("dm-%s", name);
+		hwhi = get_hw_handler(name);
+	}
+
+	return hwhi ? &hwhi->hwht : NULL;
+}
+
+void dm_put_hw_handler(struct hw_handler_type *hwht)
+{
+	struct hwh_internal *hwhi;
+
+	if (!hwht)
+		return;
+
+	down_read(&_hwh_lock);
+	hwhi = __find_hw_handler_type(hwht->name);
+	if (!hwhi)
+		goto out;
+
+	if (--hwhi->use == 0)
+		module_put(hwhi->hwht.module);
+
+	if (hwhi->use < 0)
+		BUG();
+
+      out:
+	up_read(&_hwh_lock);
+}
+
+static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
+{
+	struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL);
+
+	if (hwhi) {
+		memset(hwhi, 0, sizeof(*hwhi));
+		hwhi->hwht = *hwht;
+	}
+
+	return hwhi;
+}
+
+int dm_register_hw_handler(struct hw_handler_type *hwht)
+{
+	int r = 0;
+	struct hwh_internal *hwhi = _alloc_hw_handler(hwht);
+
+	if (!hwhi)
+		return -ENOMEM;
+
+	down_write(&_hwh_lock);
+
+	if (__find_hw_handler_type(hwht->name)) {
+		kfree(hwhi);
+		r = -EEXIST;
+	} else
+		list_add(&hwhi->list, &_hw_handlers);
+
+	up_write(&_hwh_lock);
+
+	return r;
+}
+
+int dm_unregister_hw_handler(struct hw_handler_type *hwht)
+{
+	struct hwh_internal *hwhi;
+
+	down_write(&_hwh_lock);
+
+	hwhi = __find_hw_handler_type(hwht->name);
+	if (!hwhi) {
+		up_write(&_hwh_lock);
+		return -EINVAL;
+	}
+
+	if (hwhi->use) {
+		up_write(&_hwh_lock);
+		return -ETXTBSY;
+	}
+
+	list_del(&hwhi->list);
+
+	up_write(&_hwh_lock);
+
+	kfree(hwhi);
+
+	return 0;
+}
+
+unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio)
+{
+	int sense_key, asc, ascq;
+
+#if 0
+	if (bio->bi_error & BIO_SENSE) {
+		/* FIXME: This is just an initial guess. */
+		/* key / asc / ascq */
+		sense_key = (bio->bi_error >> 16) & 0xff;
+		asc = (bio->bi_error >> 8) & 0xff;
+		ascq = bio->bi_error & 0xff;
+
+		switch (sense_key) {
+			/* This block as a whole comes from the device.
+			 * So no point retrying on another path. */
+		case 0x03:	/* Medium error */
+		case 0x05:	/* Illegal request */
+		case 0x07:	/* Data protect */
+		case 0x08:	/* Blank check */
+		case 0x0a:	/* copy aborted */
+		case 0x0c:	/* obsolete - no clue ;-) */
+		case 0x0d:	/* volume overflow */
+		case 0x0e:	/* data miscompare */
+		case 0x0f:	/* reserved - no idea either. */
+			return MP_ERROR_IO;
+
+			/* For these errors it's unclear whether they
+			 * come from the device or the controller.
+			 * So just lets try a different path, and if
+			 * it eventually succeeds, user-space will clear
+			 * the paths again... */
+		case 0x02:	/* Not ready */
+		case 0x04:	/* Hardware error */
+		case 0x09:	/* vendor specific */
+		case 0x0b:	/* Aborted command */
+			return MP_FAIL_PATH;
+
+		case 0x06:	/* Unit attention - might want to decode */
+			if (asc == 0x04 && ascq == 0x01)
+				/* "Unit in the process of
+				 * becoming ready" */
+				return 0;
+			return MP_FAIL_PATH;
+
+			/* FIXME: For Unit Not Ready we may want
+			 * to have a generic pg activation
+			 * feature (START_UNIT). */
+
+			/* Should these two ever end up in the
+			 * error path? I don't think so. */
+		case 0x00:	/* No sense */
+		case 0x01:	/* Recovered error */
+			return 0;
+		}
+	}
+#endif
+
+	/* We got no idea how to decode the other kinds of errors ->
+	 * assume generic error condition. */
+	return MP_FAIL_PATH;
+}
+
+EXPORT_SYMBOL(dm_register_hw_handler);
+EXPORT_SYMBOL(dm_unregister_hw_handler);
+EXPORT_SYMBOL(dm_scsi_err_handler);
diff -puN /dev/null drivers/md/dm-hw-handler.h
--- /dev/null	2003-09-15 06:40:47.000000000 -0700
+++ 25-akpm/drivers/md/dm-hw-handler.h	2005-02-22 18:19:25.000000000 -0800
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ *
+ * Multipath hardware handler registration.
+ */
+
+#ifndef	DM_HW_HANDLER_H
+#define	DM_HW_HANDLER_H
+
+#include <linux/device-mapper.h>
+
+#include "dm-mpath.h"
+
+struct hw_handler_type;
+struct hw_handler {
+	struct hw_handler_type *type;
+	void *context;
+};
+
+/*
+ * Constructs a hardware handler object, takes custom arguments
+ */
+typedef int (*hwh_ctr_fn) (struct hw_handler *hwh, unsigned arc, char **argv);
+typedef void (*hwh_dtr_fn) (struct hw_handler *hwh);
+
+typedef void (*hwh_pg_init_fn) (struct hw_handler *hwh, unsigned bypassed,
+				struct path *path);
+typedef unsigned (*hwh_err_fn) (struct hw_handler *hwh, struct bio *bio);
+typedef	int (*hwh_status_fn) (struct hw_handler *hwh,
+			      status_type_t type,
+			      char *result, unsigned int maxlen);
+
+/* Information about a hardware handler type */
+struct hw_handler_type {
+	char *name;
+	struct module *module;
+
+	hwh_ctr_fn ctr;
+	hwh_dtr_fn dtr;
+
+	hwh_pg_init_fn pg_init;
+	hwh_err_fn err;
+	hwh_status_fn status;
+};
+
+/* Register a hardware handler */
+int dm_register_hw_handler(struct hw_handler_type *type);
+
+/* Unregister a hardware handler */
+int dm_unregister_hw_handler(struct hw_handler_type *type);
+
+/* Returns a registered hardware handler type */
+struct hw_handler_type *dm_get_hw_handler(const char *name);
+
+/* Releases a hardware handler  */
+void dm_put_hw_handler(struct hw_handler_type *hwht);
+
+/* Default hwh_err_fn */
+unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio);
+
+/* Error flags for hwh_err_fn and dm_pg_init_complete */
+#define MP_FAIL_PATH 1
+#define MP_BYPASS_PG 2
+#define MP_ERROR_IO  4	/* Don't retry this I/O */
+
+#endif
diff -puN drivers/md/dm-mpath.c~device-mapper-multipath-hardware-handler drivers/md/dm-mpath.c
--- 25/drivers/md/dm-mpath.c~device-mapper-multipath-hardware-handler	2005-02-22 18:19:25.000000000 -0800
+++ 25-akpm/drivers/md/dm-mpath.c	2005-02-22 18:19:25.000000000 -0800
@@ -7,6 +7,7 @@
 
 #include "dm.h"
 #include "dm-path-selector.h"
+#include "dm-hw-handler.h"
 #include "dm-bio-list.h"
 #include "dm-bio-record.h"
 
@@ -58,8 +59,10 @@ struct multipath {
 
 	spinlock_t lock;
 
+	struct hw_handler hw_handler;
 	unsigned nr_priority_groups;
 	struct list_head priority_groups;
+	unsigned pg_init_required;	/* pg_init needs calling? */
 
 	unsigned nr_valid_paths;	/* Total number of usable paths */
 	struct pgpath *current_pgpath;
@@ -188,12 +191,18 @@ static struct multipath *alloc_multipath
 static void free_multipath(struct multipath *m)
 {
 	struct priority_group *pg, *tmp;
+	struct hw_handler *hwh = &m->hw_handler;
 
 	list_for_each_entry_safe (pg, tmp, &m->priority_groups, list) {
 		list_del(&pg->list);
 		free_priority_group(pg, m->ti);
 	}
 
+	if (hwh->type) {
+		hwh->type->dtr(hwh);
+		dm_put_hw_handler(hwh->type);
+	}
+
 	mempool_destroy(m->mpio_pool);
 	kfree(m);
 }
@@ -205,8 +214,18 @@ static void free_multipath(struct multip
 
 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
 {
+	struct hw_handler *hwh = &m->hw_handler;
+
 	m->current_pg = pgpath->pg;
-	m->queue_io = 0;
+
+	/* Must we initialise the PG first, and queue I/O till it's ready? */
+	if (hwh->type && hwh->type->pg_init) {
+		m->pg_init_required = 1;
+		m->queue_io = 1;
+	} else {
+		m->pg_init_required = 0;
+		m->queue_io = 0;
+	}
 }
 
 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
@@ -288,7 +307,7 @@ static int map_io(struct multipath *m, s
 		/* Queue for the daemon to resubmit */
 		bio_list_add(&m->queued_ios, bio);
 		m->queue_size++;
-		if (!m->queue_io)
+		if (m->pg_init_required || !m->queue_io)
 			schedule_work(&m->process_queued_ios);
 		pgpath = NULL;
 		r = 0;
@@ -358,8 +377,9 @@ static void dispatch_queued_ios(struct m
 static void process_queued_ios(void *data)
 {
 	struct multipath *m = (struct multipath *) data;
+	struct hw_handler *hwh = &m->hw_handler;
 	struct pgpath *pgpath;
-	unsigned must_queue = 0;
+	unsigned init_required, must_queue = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&m->lock, flags);
@@ -373,8 +393,15 @@ static void process_queued_ios(void *dat
 	    (!pgpath && m->queue_if_no_path && !m->suspended))
 		must_queue = 1;
 
+	init_required = m->pg_init_required;
+	if (init_required)
+		m->pg_init_required = 0;
+
 	spin_unlock_irqrestore(&m->lock, flags);
 
+	if (init_required)
+		hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path);
+
 	if (!must_queue)
 		dispatch_queued_ios(m);
 }
@@ -393,6 +420,7 @@ static void trigger_event(void *data)
 /*-----------------------------------------------------------------
  * Constructor/argument parsing:
  * <#multipath feature args> [<arg>]*
+ * <#hw_handler args> [hw_handler [<arg>]*]
  * <#priority groups>
  * <initial priority group>
  *     [<selector> <#selector args> [<arg>]*
@@ -584,6 +612,43 @@ static struct priority_group *parse_prio
 	return NULL;
 }
 
+static int parse_hw_handler(struct arg_set *as, struct multipath *m,
+			    struct dm_target *ti)
+{
+	int r;
+	struct hw_handler_type *hwht;
+	unsigned hw_argc;
+
+	static struct param _params[] = {
+		{0, 1024, ESTR("invalid number of hardware handler args")},
+	};
+
+	r = read_param(_params, shift(as), &hw_argc, &ti->error);
+	if (r)
+		return -EINVAL;
+
+	if (!hw_argc)
+		return 0;
+
+	hwht = dm_get_hw_handler(shift(as));
+	if (!hwht) {
+		ti->error = ESTR("unknown hardware handler type");
+		return -EINVAL;
+	}
+
+	r = hwht->ctr(&m->hw_handler, hw_argc - 1, as->argv);
+	if (r) {
+		dm_put_hw_handler(hwht);
+		ti->error = ESTR("hardware handler constructor failed");
+		return r;
+	}
+
+	m->hw_handler.type = hwht;
+	consume(as, hw_argc - 1);
+
+	return 0;
+}
+
 static int parse_features(struct arg_set *as, struct multipath *m,
 			  struct dm_target *ti)
 {
@@ -637,6 +702,10 @@ static int multipath_ctr(struct dm_targe
 	if (r)
 		goto bad;
 
+	r = parse_hw_handler(&as, m, ti);
+	if (r)
+		goto bad;
+
 	r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
 	if (r)
 		goto bad;
@@ -873,11 +942,43 @@ static int bypass_pg_num(struct multipat
 }
 
 /*
+ * pg_init must call this when it has completed its initialisation
+ */
+void dm_pg_init_complete(struct path *path, unsigned err_flags)
+{
+	struct pgpath *pgpath = path_to_pgpath(path);
+	struct priority_group *pg = pgpath->pg;
+	struct multipath *m = pg->m;
+	unsigned long flags;
+
+	/* We insist on failing the path if the PG is already bypassed. */
+	if (err_flags && pg->bypassed)
+		err_flags |= MP_FAIL_PATH;
+
+	if (err_flags & MP_FAIL_PATH)
+		fail_path(pgpath);
+
+	if (err_flags & MP_BYPASS_PG)
+		bypass_pg(m, pg, 1);
+
+	spin_lock_irqsave(&m->lock, flags);
+	if (!err_flags)
+		m->queue_io = 0;
+	else {
+		m->current_pgpath = NULL;
+		m->current_pg = NULL;
+	}
+	schedule_work(&m->process_queued_ios);
+	spin_unlock_irqrestore(&m->lock, flags);
+}
+
+/*
  * end_io handling
  */
 static int do_end_io(struct multipath *m, struct bio *bio,
 		     int error, struct mpath_io *mpio)
 {
+	struct hw_handler *hwh = &m->hw_handler;
 	unsigned err_flags = MP_FAIL_PATH;	/* Default behavior */
 
 	if (!error)
@@ -895,6 +996,9 @@ static int do_end_io(struct multipath *m
 	}
 	spin_unlock(&m->lock);
 
+	if (hwh->type && hwh->type->err)
+		err_flags = hwh->type->err(hwh, bio);
+
 	if (mpio->pgpath) {
 		if (err_flags & MP_FAIL_PATH)
 			fail_path(mpio->pgpath);
@@ -970,6 +1074,7 @@ static void multipath_resume(struct dm_t
 /*
  * Info output has the following format:
  * num_multipath_feature_args [multipath_feature_args]*
+ * num_handler_status_args [handler_status_args]*
  * num_groups init_group_number
  *            [A|D|E num_ps_status_args [ps_status_args]*
  *             num_paths num_selector_args
@@ -977,6 +1082,7 @@ static void multipath_resume(struct dm_t
  *
  * Table output has the following format (identical to the constructor string):
  * num_feature_args [features_args]*
+ * num_handler_args hw_handler [hw_handler_args]*
  * num_groups init_group_number
  *     [priority selector-name num_ps_args [ps_args]*
  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
@@ -987,6 +1093,7 @@ static int multipath_status(struct dm_ta
 	int sz = 0;
 	unsigned long flags;
 	struct multipath *m = (struct multipath *) ti->private;
+	struct hw_handler *hwh = &m->hw_handler;
 	struct priority_group *pg;
 	struct pgpath *p;
 	unsigned pg_num;
@@ -1002,6 +1109,13 @@ static int multipath_status(struct dm_ta
 	else
 		DMEMIT("0 ");
 
+	if (hwh->type && hwh->type->status)
+		sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
+	else if (!hwh->type || type == STATUSTYPE_INFO)
+		DMEMIT("0 ");
+	else
+		DMEMIT("1 %s ", hwh->type->name);
+
 	DMEMIT("%u ", m->nr_priority_groups);
 
 	if (m->next_pg)
@@ -1178,6 +1292,8 @@ static void __exit dm_multipath_exit(voi
 	kmem_cache_destroy(_mpio_cache);
 }
 
+EXPORT_SYMBOL(dm_pg_init_complete);
+
 module_init(dm_multipath_init);
 module_exit(dm_multipath_exit);
 
diff -puN drivers/md/dm-mpath.h~device-mapper-multipath-hardware-handler drivers/md/dm-mpath.h
--- 25/drivers/md/dm-mpath.h~device-mapper-multipath-hardware-handler	2005-02-22 18:19:25.000000000 -0800
+++ 25-akpm/drivers/md/dm-mpath.h	2005-02-22 18:19:25.000000000 -0800
@@ -16,6 +16,10 @@ struct path {
 	unsigned is_active;	/* Read-only */
 
 	void *pscontext;	/* For path-selector use */
+	void *hwhcontext;	/* For hw-handler use */
 };
 
+/* Callback for hwh_pg_init_fn to use when complete */
+void dm_pg_init_complete(struct path *path, unsigned err_flags);
+
 #endif
diff -puN drivers/md/Makefile~device-mapper-multipath-hardware-handler drivers/md/Makefile
--- 25/drivers/md/Makefile~device-mapper-multipath-hardware-handler	2005-02-22 18:19:25.000000000 -0800
+++ 25-akpm/drivers/md/Makefile	2005-02-22 18:19:25.000000000 -0800
@@ -4,7 +4,7 @@
 
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
 		   dm-ioctl.o dm-io.o kcopyd.o
-dm-multipath-objs := dm-path-selector.o dm-mpath.o
+dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-log.o dm-raid1.o
 md-mod-objs     := md.o bitmap.o
_