From: Jens Axboe <axboe@suse.de>

This update:

- Removes some debugging code and the dprintk() stuff

- Detects when tagging is used or not, defines a threshold of
  CFQ_MAX_TAG (default: 4) for when we change from non-tagged to tagged
  operations. This affects accounting.

- Fix a bug where the rr_list was sorted badly. Also make sure we always
  keep it sorted, and add some logic to detect when to resort. This
  could trigger a harmless WARN_ON at line 428.

- Don't set ->service_start when queue is removed from rr_list, this
  could starve a queue if it was quickly readded to the rr_list again.
  In that case, we never would hit the 1 second interval for aging the
  service_used down.

- Cache jiffies in some function instead of re-reading it 2-3 times.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/drivers/block/cfq-iosched.c |  177 +++++++++++++++++++++---------------
 1 files changed, 104 insertions(+), 73 deletions(-)

diff -puN drivers/block/cfq-iosched.c~cfq-v2-update drivers/block/cfq-iosched.c
--- 25/drivers/block/cfq-iosched.c~cfq-v2-update	Tue Sep 14 16:17:24 2004
+++ 25-akpm/drivers/block/cfq-iosched.c	Tue Sep 14 16:17:24 2004
@@ -22,14 +22,6 @@
 #include <linux/rbtree.h>
 #include <linux/mempool.h>
 
-#undef CFQ_DEBUG
-
-#ifdef CFQ_DEBUG
-#define dprintk(fmt, args...) printk(KERN_ERR "cfq: " fmt, ##args)
-#else
-#define dprintk(fmt, args...)
-#endif
-
 static unsigned long max_elapsed_crq;
 static unsigned long max_elapsed_dispatch;
 
@@ -84,6 +76,11 @@ static int cfq_back_penalty = 2;	/* pena
 #define rq_rb_key(rq)		(rq)->sector
 
 /*
+ * threshold for switching off non-tag accounting
+ */
+#define CFQ_MAX_TAG		(4)
+
+/*
  * sort key types and names
  */
 enum {
@@ -125,18 +122,21 @@ struct cfq_data {
 
 	sector_t last_sector;
 
+	int rq_in_driver;
+
 	/*
 	 * tunables, see top of file
 	 */
 	unsigned int cfq_quantum;
 	unsigned int cfq_queued;
-	unsigned int cfq_tagged;
 	unsigned int cfq_fifo_expire_r;
 	unsigned int cfq_fifo_expire_w;
 	unsigned int cfq_fifo_batch_expire;
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_max;
 	unsigned int find_best_crq;
+
+	unsigned int cfq_tagged;
 };
 
 struct cfq_queue {
@@ -170,14 +170,12 @@ struct cfq_queue {
 	unsigned long service_start;
 	unsigned long service_used;
 
+	unsigned int max_rate;
+
 	/* number of requests that have been handed to the driver */
 	int in_flight;
 	/* number of currently allocated requests */
 	int alloc_limit[2];
-
-#ifdef CFQ_DEBUG
-	char name[16];
-#endif
 };
 
 struct cfq_rq {
@@ -404,11 +402,42 @@ static void cfq_update_next_crq(struct c
 		cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
 }
 
-static inline void
-cfq_sort_rr_list(struct cfq_queue *cfqq)
+static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
+{
+	struct list_head *head = &cfqq->cfqd->rr_list;
+	struct list_head *next, *prev;
+
+	/*
+	 * list might still be ordered
+	 */
+	next = cfqq->cfq_list.next;
+	if (next != head) {
+		struct cfq_queue *cnext = list_entry_cfqq(next);
+
+		if (cfqq->service_used > cnext->service_used)
+			return 1;
+	}
+
+	prev = cfqq->cfq_list.prev;
+	if (prev != head) {
+		struct cfq_queue *cprev = list_entry_cfqq(prev);
+
+		if (cfqq->service_used < cprev->service_used)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
 {
 	struct list_head *entry = &cfqq->cfqd->rr_list;
 
+	if (!cfqq->on_rr)
+		return;
+	if (!new_queue && !cfq_check_sort_rr_list(cfqq))
+		return;
+
 	list_del(&cfqq->cfq_list);
 
 	/*
@@ -446,21 +475,16 @@ cfq_sort_rr_list(struct cfq_queue *cfqq)
 static inline void
 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	BUG_ON(cfqq->on_rr);
-
 	/*
 	 * it's currently on the empty list
 	 */
-	cfq_sort_rr_list(cfqq);
 	cfqq->on_rr = 1;
 	cfqd->busy_queues++;
 
-	/*
-	 * if the queue is on the empty_list, service_start was the time
-	 * where it was deleted from the rr_list.
-	 */
 	if (time_after(jiffies, cfqq->service_start + cfq_service))
 		cfqq->service_used >>= 3;
+
+	cfq_sort_rr_list(cfqq, 1);
 }
 
 static inline void
@@ -468,7 +492,6 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, s
 {
 	list_move(&cfqq->cfq_list, &cfqd->empty_list);
 	cfqq->on_rr = 0;
-	cfqq->service_start = jiffies;
 
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
@@ -492,10 +515,8 @@ static inline void cfq_del_crq_rb(struct
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
 		RB_CLEAR_COLOR(&crq->rb_node);
 
-		if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) {
-			dprintk("moving 0x%p empty_list\n", cfqq);
+		if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
 			cfq_del_cfqq_rr(cfqd, cfqq);
-		}
 	}
 }
 
@@ -541,11 +562,8 @@ static void cfq_add_crq_rb(struct cfq_rq
 
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
-	if (!cfqq->on_rr) {
+	if (!cfqq->on_rr)
 		cfq_add_cfqq_rr(cfqd, cfqq);
-		dprintk("moving to rr list %d\n", cfqd->busy_queues);
-	} else
-		dprintk("already on rr list %d\n", cfqd->busy_queues);
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -590,11 +608,30 @@ out:
 	return NULL;
 }
 
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+/*
+ * make sure the service time gets corrected on reissue of this request
+ */
+static void cfq_requeue_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
 
-	dprintk("removing 0x%p\n", rq);
+	if (crq) {
+		struct cfq_queue *cfqq = crq->cfq_queue;
+
+		if (cfqq->cfqd->cfq_tagged) {
+			cfqq->service_used--;
+			cfq_sort_rr_list(cfqq, 0);
+		}
+
+		crq->accounted = 0;
+		cfqq->cfqd->rq_in_driver--;
+	}
+	list_add(&rq->queuelist, &q->queue_head);
+}
+
+static void cfq_remove_request(request_queue_t *q, struct request *rq)
+{
+	struct cfq_rq *crq = RQ_DATA(rq);
 
 	if (crq) {
 		cfq_remove_merge_hints(q, crq);
@@ -730,20 +767,21 @@ static inline struct cfq_rq *cfq_check_f
 	struct cfq_data *cfqd = cfqq->cfqd;
 	const int reads = !list_empty(&cfqq->fifo[0]);
 	const int writes = !list_empty(&cfqq->fifo[1]);
+	unsigned long now = jiffies;
 	struct cfq_rq *crq;
 
-	if (jiffies - cfqq->last_fifo_expire < cfqd->cfq_fifo_batch_expire)
+	if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
 		return NULL;
 
 	crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
-	if (reads && time_after(jiffies, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
-		cfqq->last_fifo_expire = jiffies;
+	if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
+		cfqq->last_fifo_expire = now;
 		return crq;
 	}
 
 	crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
-	if (writes && time_after(jiffies, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
-		cfqq->last_fifo_expire = jiffies;
+	if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
+		cfqq->last_fifo_expire = now;
 		return crq;
 	}
 
@@ -822,7 +860,8 @@ restart:
 static inline void cfq_account_dispatch(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
-	unsigned long elapsed = jiffies - crq->queue_start;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	unsigned long now, elapsed;
 
 	/*
 	 * accounted bit is necessary since some drivers will call
@@ -831,52 +870,62 @@ static inline void cfq_account_dispatch(
 	if (crq->accounted)
 		return;
 
+	now = jiffies;
+	if (cfqq->service_start == ~0UL)
+		cfqq->service_start = now;
+
 	/*
 	 * on drives with tagged command queueing, command turn-around time
 	 * doesn't necessarily reflect the time spent processing this very
 	 * command inside the drive. so do the accounting differently there,
 	 * by just sorting on the number of requests
 	 */
-	if (cfqq->cfqd->cfq_tagged) {
-		if (time_after(jiffies, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = jiffies;
+	if (cfqd->cfq_tagged) {
+		if (time_after(now, cfqq->service_start + cfq_service)) {
+			cfqq->service_start = now;
 			cfqq->service_used /= 10;
 		}
 
 		cfqq->service_used++;
+		cfq_sort_rr_list(cfqq, 0);
 	}
 
+	elapsed = now - crq->queue_start;
 	if (elapsed > max_elapsed_dispatch)
 		max_elapsed_dispatch = elapsed;
 
 	crq->accounted = 1;
-	crq->service_start = jiffies;
+	crq->service_start = now;
+
+	if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
+		cfqq->cfqd->cfq_tagged = 1;
+		printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
+	}
 }
 
 static inline void
 cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-	unsigned long start_val = cfqq->service_used;
+	struct cfq_data *cfqd = cfqq->cfqd;
 
-	if (!cfqq->cfqd->cfq_tagged) {
-		unsigned long duration = jiffies - crq->service_start;
+	WARN_ON(!cfqd->rq_in_driver);
+	cfqd->rq_in_driver--;
 
-		if (time_after(jiffies, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = jiffies;
+	if (!cfqd->cfq_tagged) {
+		unsigned long now = jiffies;
+		unsigned long duration = now - crq->service_start;
+
+		if (time_after(now, cfqq->service_start + cfq_service)) {
+			cfqq->service_start = now;
 			cfqq->service_used >>= 3;
 		}
 
 		cfqq->service_used += duration;
+		cfq_sort_rr_list(cfqq, 0);
 
 		if (duration > max_elapsed_crq)
 			max_elapsed_crq = duration;
 	}
-
-	/*
-	 * make sure list stays properly sorted, but only do so if necessary
-	 */
-	if (cfqq->on_rr && cfqq->service_used != start_val)
-		cfq_sort_rr_list(cfqq);
 }
 
 static struct request *cfq_next_request(request_queue_t *q)
@@ -913,13 +962,9 @@ static void cfq_put_queue(struct cfq_que
 {
 	BUG_ON(!atomic_read(&cfqq->ref));
 
-	dprintk("cfq_put_queue 0x%p, ref\n", atomic_read(&cfqq->ref));
-
 	if (!atomic_dec_and_test(&cfqq->ref))
 		return;
 
-	dprintk("killing queue 0x%p/%s\n", cfqq, cfqq->name);
-
 	BUG_ON(rb_first(&cfqq->sort_list));
 	BUG_ON(cfqq->on_rr);
 
@@ -1163,11 +1208,8 @@ retry:
 		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
 		atomic_set(&cfqq->ref, 0);
 		cfqq->cfqd = cfqd;
-#ifdef CFQ_DEBUG
-		strncpy(cfqq->name, current->comm, sizeof(cfqq->name)-1);
-#endif
-		dprintk("cfqq set up for 0x%p/%s\n", cfqq, cfqq->name);
 		cfqq->key_type = cfqd->key_type;
+		cfqq->service_start = ~0UL;
 	}
 
 	if (new_cfqq)
@@ -1212,17 +1254,14 @@ cfq_insert_request(request_queue_t *q, s
 
 	switch (where) {
 		case ELEVATOR_INSERT_BACK:
-			dprintk("adding back 0x%p\n", rq);
 			while (cfq_dispatch_requests(q, cfqd->cfq_quantum))
 				;
 			list_add_tail(&rq->queuelist, &q->queue_head);
 			break;
 		case ELEVATOR_INSERT_FRONT:
-			dprintk("adding front 0x%p\n", rq);
 			list_add(&rq->queuelist, &q->queue_head);
 			break;
 		case ELEVATOR_INSERT_SORT:
-			dprintk("adding sort 0x%p\n", rq);
 			BUG_ON(!blk_fs_request(rq));
 			cfq_enqueue(cfqd, crq);
 			break;
@@ -1511,7 +1550,6 @@ static int cfq_init(request_queue_t *q, 
 	cfqd->cfq_back_max = cfq_back_max;
 	cfqd->cfq_back_penalty = cfq_back_penalty;
 
-	dprintk("cfq on queue 0x%p\n", q);
 	return 0;
 out_spare:
 	mempool_destroy(cfqd->crq_pool);
@@ -1654,7 +1692,6 @@ static ssize_t __FUNC(struct cfq_data *c
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum);
 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
-SHOW_FUNCTION(cfq_tagged_show, cfqd->cfq_tagged);
 SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r);
 SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w);
 SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire);
@@ -1675,7 +1712,6 @@ static ssize_t __FUNC(struct cfq_data *c
 }
 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX);
 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX);
-STORE_FUNCTION(cfq_tagged_store, &cfqd->cfq_tagged, 0, 1);
 STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX);
 STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX);
 STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX);
@@ -1694,11 +1730,6 @@ static struct cfq_fs_entry cfq_queued_en
 	.show = cfq_queued_show,
 	.store = cfq_queued_store,
 };
-static struct cfq_fs_entry cfq_tagged_entry = {
-	.attr = {.name = "tagged", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_tagged_show,
-	.store = cfq_tagged_store,
-};
 static struct cfq_fs_entry cfq_fifo_expire_r_entry = {
 	.attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
 	.show = cfq_fifo_expire_r_show,
@@ -1746,7 +1777,6 @@ static struct cfq_fs_entry cfq_key_type_
 static struct attribute *default_attrs[] = {
 	&cfq_quantum_entry.attr,
 	&cfq_queued_entry.attr,
-	&cfq_tagged_entry.attr,
 	&cfq_fifo_expire_r_entry.attr,
 	&cfq_fifo_expire_w_entry.attr,
 	&cfq_fifo_batch_expire_entry.attr,
@@ -1805,6 +1835,7 @@ elevator_t iosched_cfq = {
 	.elevator_next_req_fn =		cfq_next_request,
 	.elevator_add_req_fn =		cfq_insert_request,
 	.elevator_remove_req_fn =	cfq_remove_request,
+	.elevator_requeue_req_fn =	cfq_requeue_request,
 	.elevator_queue_empty_fn =	cfq_queue_empty,
 	.elevator_completed_req_fn =	cfq_completed_request,
 	.elevator_former_req_fn =	cfq_former_request,
_