Merge ../linux-2.6 by hand

author: Paul Mackerras <paulus@samba.org> 2005-10-31 13:37:12 +1100
committer: Paul Mackerras <paulus@samba.org> 2005-10-31 13:37:12 +1100
commit: 23fd07750a789a66fe88cf173d52a18f1a387da4 (patch)
tree: 06fdd6df35fdb835abdaa9b754d62f6b84b97250 /drivers/block
parent: bd787d438a59266af3c9f6351644c85ef1dd21fe (diff)
parent: ed28f96ac1960f30f818374d65be71d2fdf811b0 (diff)
20 files changed, 613 insertions, 1023 deletions
diff --git a/drivers/block/Kconfig.iosched b/drivers/block/Kconfig.iosched
index 6070a480600..5b90d2fa63b 100644
--- a/drivers/block/Kconfig.iosched
+++ b/drivers/block/Kconfig.iosched
@@ -38,4 +38,32 @@ config IOSCHED_CFQ
 	  among all processes in the system. It should provide a fair
 	  working environment, suitable for desktop systems.
 
+choice
+	prompt "Default I/O scheduler"
+	default DEFAULT_AS
+	help
+	  Select the I/O scheduler which will be used by default for all
+	  block devices.
+
+	config DEFAULT_AS
+		bool "Anticipatory" if IOSCHED_AS
+
+	config DEFAULT_DEADLINE
+		bool "Deadline" if IOSCHED_DEADLINE
+
+	config DEFAULT_CFQ
+		bool "CFQ" if IOSCHED_CFQ
+
+	config DEFAULT_NOOP
+		bool "No-op"
+
+endchoice
+
+config DEFAULT_IOSCHED
+	string
+	default "anticipatory" if DEFAULT_AS
+	default "deadline" if DEFAULT_DEADLINE
+	default "cfq" if DEFAULT_CFQ
+	default "noop" if DEFAULT_NOOP
+
 endmenu
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 0e9e586e9ba..881c48d941b 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "12"
+#define VERSION "14"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
 
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 45a24309618..41ae0ede619 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -224,7 +224,7 @@ aoechr_init(void)
 		return PTR_ERR(aoe_class);
 	}
 	for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
-		class_device_create(aoe_class,
+		class_device_create(aoe_class, NULL,
 					MKDEV(AOE_MAJOR, chardevs[i].minor),
 					NULL, chardevs[i].name);
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index b5be4b7d7b5..5c9c7c1a3d4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -8,6 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <asm/unaligned.h>
 #include "aoe.h"
 
 #define TIMERTICK (HZ / 10)
@@ -311,16 +312,16 @@ ataid_complete(struct aoedev *d, unsigned char *id)
 	u16 n;
 
 	/* word 83: command set supported */
-	n = le16_to_cpup((__le16 *) &id[83<<1]);
+	n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
 
 	/* word 86: command set/feature enabled */
-	n |= le16_to_cpup((__le16 *) &id[86<<1]);
+	n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
 
 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
 		d->flags |= DEVFL_EXT;
 
 		/* word 100: number lba48 sectors */
-		ssize = le64_to_cpup((__le64 *) &id[100<<1]);
+		ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
 
 		/* set as in ide-disk.c:init_idedisk_capacity */
 		d->geo.cylinders = ssize;
@@ -331,12 +332,12 @@ ataid_complete(struct aoedev *d, unsigned char *id)
 		d->flags &= ~DEVFL_EXT;
 
 		/* number lba28 sectors */
-		ssize = le32_to_cpup((__le32 *) &id[60<<1]);
+		ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
 
 		/* NOTE: obsolete in ATA 6 */
-		d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
-		d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
-		d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
+		d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
+		d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
+		d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
 	}
 	d->ssize = ssize;
 	d->geo.start = 0;
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 95c0a3690b0..c6744ff3829 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -98,7 +98,6 @@ struct as_data {
 
 	struct as_rq *next_arq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 
 	unsigned long exit_prob;	/* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
 	return ioc;
 }
 
+static void as_put_io_context(struct as_rq *arq)
+{
+	struct as_io_context *aic;
+
+	if (unlikely(!arq->io_context))
+		return;
+
+	aic = arq->io_context->aic;
+
+	if (arq->is_sync == REQ_SYNC && aic) {
+		spin_lock(&aic->lock);
+		set_bit(AS_TASK_IORUNNING, &aic->state);
+		aic->last_end_request = jiffies;
+		spin_unlock(&aic->lock);
+	}
+
+	put_io_context(arq->io_context);
+}
+
 /*
  * the back merge hash support functions
  */
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
 		__as_del_arq_hash(arq);
 }
 
-static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
-{
-	as_del_arq_hash(arq);
-
-	if (q->last_merge == arq->request)
-		q->last_merge = NULL;
-}
-
 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
 		BUG_ON(!arq->on_hash);
 
 		if (!rq_mergeable(__rq)) {
-			as_remove_merge_hints(ad->q, arq);
+			as_del_arq_hash(arq);
 			continue;
 		}
 
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
-	if (arq->state == AS_RQ_PRESCHED) {
-		WARN_ON(arq->io_context);
-		goto out;
-	}
-
-	if (arq->state == AS_RQ_MERGED)
-		goto out_ioc;
-
 	if (arq->state != AS_RQ_REMOVED) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 		goto out;
 	}
 
-	if (!blk_fs_request(rq))
-		goto out;
-
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		kblockd_schedule_work(&ad->antic_work);
 		ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 		}
 	}
 
-out_ioc:
-	if (!arq->io_context)
-		goto out;
-
-	if (arq->is_sync == REQ_SYNC) {
-		struct as_io_context *aic = arq->io_context->aic;
-		if (aic) {
-			spin_lock(&aic->lock);
-			set_bit(AS_TASK_IORUNNING, &aic->state);
-			aic->last_end_request = jiffies;
-			spin_unlock(&aic->lock);
-		}
-	}
-
-	put_io_context(arq->io_context);
+	as_put_io_context(arq);
 out:
 	arq->state = AS_RQ_POSTSCHED;
 }
@@ -1047,73 +1032,11 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
 
 	list_del_init(&arq->fifo);
-	as_remove_merge_hints(q, arq);
+	as_del_arq_hash(arq);
 	as_del_arq_rb(ad, arq);
 }
 
 /*
- * as_remove_dispatched_request is called to remove a request which has gone
- * to the dispatch list.
- */
-static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-	struct as_io_context *aic;
-
-	if (!arq) {
-		WARN_ON(1);
-		return;
-	}
-
-	WARN_ON(arq->state != AS_RQ_DISPATCHED);
-	WARN_ON(ON_RB(&arq->rb_node));
-	if (arq->io_context && arq->io_context->aic) {
-		aic = arq->io_context->aic;
-		if (aic) {
-			WARN_ON(!atomic_read(&aic->nr_dispatched));
-			atomic_dec(&aic->nr_dispatched);
-		}
-	}
-}
-
-/*
- * as_remove_request is called when a driver has finished with a request.
- * This should be only called for dispatched requests, but for some reason
- * a POWER4 box running hwscan it does not.
- */
-static void as_remove_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-
-	if (unlikely(arq->state == AS_RQ_NEW))
-		goto out;
-
-	if (ON_RB(&arq->rb_node)) {
-		if (arq->state != AS_RQ_QUEUED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		/*
-		 * We'll lose the aliased request(s) here. I don't think this
-		 * will ever happen, but if it does, hopefully someone will
-		 * report it.
-		 */
-		WARN_ON(!list_empty(&rq->queuelist));
-		as_remove_queued_request(q, rq);
-	} else {
-		if (arq->state != AS_RQ_DISPATCHED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		as_remove_dispatched_request(q, rq);
-	}
-out:
-	arq->state = AS_RQ_REMOVED;
-}
-
-/*
  * as_fifo_expired returns 0 if there are no expired reads on the fifo,
  * 1 otherwise.  It is ratelimited so that we only perform the check once per
  * `fifo_expire' interval.  Otherwise a large number of expired requests
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
-	struct list_head *insert;
 	const int data_dir = arq->is_sync;
 
 	BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	/*
 	 * take it off the sort and fifo list, add to dispatch queue
 	 */
-	insert = ad->dispatch->prev;
-
 	while (!list_empty(&rq->queuelist)) {
 		struct request *__rq = list_entry_rq(rq->queuelist.next);
 		struct as_rq *__arq = RQ_DATA(__rq);
 
-		list_move_tail(&__rq->queuelist, ad->dispatch);
+		list_del(&__rq->queuelist);
+
+		elv_dispatch_add_tail(ad->q, __rq);
 
 		if (__arq->io_context && __arq->io_context->aic)
 			atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	as_remove_queued_request(ad->q, rq);
 	WARN_ON(arq->state != AS_RQ_QUEUED);
 
-	list_add(&rq->queuelist, insert);
+	elv_dispatch_sort(ad->q, rq);
+
 	arq->state = AS_RQ_DISPATCHED;
 	if (arq->io_context && arq->io_context->aic)
 		atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
  * read/write expire, batch expire, etc, and moves it to the dispatch
  * queue. Returns 1 if a request was found, 0 otherwise.
  */
-static int as_dispatch_request(struct as_data *ad)
+static int as_dispatch_request(request_queue_t *q, int force)
 {
+	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq;
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
 	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
 
+	if (unlikely(force)) {
+		/*
+		 * Forced dispatch, accounting is useless.  Reset
+		 * accounting states and dump fifo_lists.  Note that
+		 * batch_data_dir is reset to REQ_SYNC to avoid
+		 * screwing write batch accounting as write batch
+		 * accounting occurs on W->R transition.
+		 */
+		int dispatched = 0;
+
+		ad->batch_data_dir = REQ_SYNC;
+		ad->changed_batch = 0;
+		ad->new_batch = 0;
+
+		while (ad->next_arq[REQ_SYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_SYNC] = jiffies;
+
+		while (ad->next_arq[REQ_ASYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_ASYNC] = jiffies;
+
+		return dispatched;
+	}
+
 	/* Signal that the write batch was uncontended, so we can't time it */
 	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
 		if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
 	return 1;
 }
 
-static struct request *as_next_request(request_queue_t *q)
-{
-	struct as_data *ad = q->elevator->elevator_data;
-	struct request *rq = NULL;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first
-	 */
-	if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
-		rq = list_entry_rq(ad->dispatch->next);
-
-	return rq;
-}
-
 /*
  * Add arq to a list behind alias
  */
@@ -1404,17 +1343,26 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
 	/*
 	 * Don't want to have to handle merges.
 	 */
-	as_remove_merge_hints(ad->q, arq);
+	as_del_arq_hash(arq);
+	arq->request->flags |= REQ_NOMERGE;
 }
 
 /*
  * add arq to rbtree and fifo
  */
-static void as_add_request(struct as_data *ad, struct as_rq *arq)
+static void as_add_request(request_queue_t *q, struct request *rq)
 {
+	struct as_data *ad = q->elevator->elevator_data;
+	struct as_rq *arq = RQ_DATA(rq);
 	struct as_rq *alias;
 	int data_dir;
 
+	if (arq->state != AS_RQ_PRESCHED) {
+		printk("arq->state: %d\n", arq->state);
+		WARN_ON(1);
+	}
+	arq->state = AS_RQ_NEW;
+
 	if (rq_data_dir(arq->request) == READ
 			|| current->flags&PF_SYNCWRITE)
 		arq->is_sync = 1;
@@ -1437,12 +1385,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 		arq->expires = jiffies + ad->fifo_expire[data_dir];
 		list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
 
-		if (rq_mergeable(arq->request)) {
+		if (rq_mergeable(arq->request))
 			as_add_arq_hash(ad, arq);
-
-			if (!ad->q->last_merge)
-				ad->q->last_merge = arq->request;
-		}
 		as_update_arq(ad, arq); /* keep state machine up to date */
 
 	} else {
@@ -1463,96 +1407,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 	arq->state = AS_RQ_QUEUED;
 }
 
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state == AS_RQ_REMOVED) {
-			arq->state = AS_RQ_DISPATCHED;
-			if (arq->io_context && arq->io_context->aic)
-				atomic_inc(&arq->io_context->aic->nr_dispatched);
-		}
-	} else
-		WARN_ON(blk_fs_request(rq)
-			&& (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
-
-	/* Stop anticipating - let this request get through */
-	as_antic_stop(ad);
-}
-
-/*
- * requeue the request. The request has not been completed, nor is it a
- * new request, so don't touch accounting.
- */
-static void as_requeue_request(request_queue_t *q, struct request *rq)
-{
-	as_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
-}
-
-/*
- * Account a request that is inserted directly onto the dispatch queue.
- * arq->io_context->aic->nr_dispatched should not need to be incremented
- * because only new requests should come through here: requeues go through
- * our explicit requeue handler.
- */
-static void as_account_queued_request(struct as_data *ad, struct request *rq)
-{
-	if (blk_fs_request(rq)) {
-		struct as_rq *arq = RQ_DATA(rq);
-		arq->state = AS_RQ_DISPATCHED;
-		ad->nr_dispatched++;
-	}
+	WARN_ON(arq->state != AS_RQ_DISPATCHED);
+	arq->state = AS_RQ_REMOVED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_dec(&arq->io_context->aic->nr_dispatched);
 }
 
-static void
-as_insert_request(request_queue_t *q, struct request *rq, int where)
+static void as_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state != AS_RQ_PRESCHED) {
-			printk("arq->state: %d\n", arq->state);
-			WARN_ON(1);
-		}
-		arq->state = AS_RQ_NEW;
-	}
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT)) {
-		WARN_ON(1);
-		where = ELEVATOR_INSERT_BACK;
-	}
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (ad->next_arq[REQ_SYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
-
-			while (ad->next_arq[REQ_ASYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
-
-			list_add_tail(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			as_add_request(ad, arq);
-			break;
-		default:
-			BUG();
-			return;
-	}
+	WARN_ON(arq->state != AS_RQ_REMOVED);
+	arq->state = AS_RQ_DISPATCHED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_inc(&arq->io_context->aic->nr_dispatched);
 }
 
 /*
@@ -1565,12 +1437,8 @@ static int as_queue_empty(request_queue_t *q)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
-	if (!list_empty(&ad->fifo_list[REQ_ASYNC])
-		|| !list_empty(&ad->fifo_list[REQ_SYNC])
-		|| !list_empty(ad->dispatch))
-			return 0;
-
-	return 1;
+	return list_empty(&ad->fifo_list[REQ_ASYNC])
+		&& list_empty(&ad->fifo_list[REQ_SYNC]);
 }
 
 static struct request *
@@ -1608,15 +1476,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	int ret;
 
 	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
-	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
 	__rq = as_find_arq_hash(ad, bio->bi_sector);
@@ -1644,9 +1503,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	if (rq_mergeable(__rq))
-		q->last_merge = __rq;
-out_insert:
 	if (ret) {
 		if (rq_mergeable(__rq))
 			as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1549,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 		 * behind the disk head. We currently don't bother adjusting.
 		 */
 	}
-
-	if (arq->on_hash)
-		q->last_merge = req;
 }
 
 static void
@@ -1763,6 +1616,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
 	 * kill knowledge of next, this one is a goner
 	 */
 	as_remove_queued_request(q, next);
+	as_put_io_context(anext);
 
 	anext->state = AS_RQ_MERGED;
 }
@@ -1782,7 +1636,7 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (as_next_request(q))
+	if (!as_queue_empty(q))
 		q->request_fn(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1797,7 +1651,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 		return;
 	}
 
-	if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
+	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
+		     arq->state != AS_RQ_PRESCHED &&
+		     arq->state != AS_RQ_MERGED)) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 	}
@@ -1807,7 +1663,7 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 }
 
 static int as_set_request(request_queue_t *q, struct request *rq,
-			  struct bio *bio, int gfp_mask)
+			  struct bio *bio, gfp_t gfp_mask)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1907,7 +1763,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
 	ad->sort_list[REQ_ASYNC] = RB_ROOT;
-	ad->dispatch = &q->queue_head;
 	ad->fifo_expire[REQ_SYNC] = default_read_expire;
 	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
 	ad->antic_expire = default_antic_expire;
@@ -2072,10 +1927,9 @@ static struct elevator_type iosched_as = {
 		.elevator_merge_fn = 		as_merge,
 		.elevator_merged_fn =		as_merged_request,
 		.elevator_merge_req_fn =	as_merged_requests,
-		.elevator_next_req_fn =		as_next_request,
-		.elevator_add_req_fn =		as_insert_request,
-		.elevator_remove_req_fn =	as_remove_request,
-		.elevator_requeue_req_fn = 	as_requeue_request,
+		.elevator_dispatch_fn =		as_dispatch_request,
+		.elevator_add_req_fn =		as_add_request,
+		.elevator_activate_req_fn =	as_activate_request,
 		.elevator_deactivate_req_fn = 	as_deactivate_request,
 		.elevator_queue_empty_fn =	as_queue_empty,
 		.elevator_completed_req_fn =	as_completed_request,
@@ -2119,8 +1973,8 @@ static int __init as_init(void)
 
 static void __exit as_exit(void)
 {
-	kmem_cache_destroy(arq_pool);
 	elv_unregister(&iosched_as);
+	kmem_cache_destroy(arq_pool);
 }
 
 module_init(as_init);
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e183a3ef783..ec27976a57d 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -28,13 +28,17 @@
    through the array controller.  Note in particular, neither 
    physical nor logical disks are presented through the scsi layer. */
 
+#include <linux/timer.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/atomic.h>
+
 #include <scsi/scsi.h> 
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h> 
-#include <asm/atomic.h>
-#include <linux/timer.h>
-#include <linux/completion.h>
 
 #include "cciss_scsi.h"
 
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index cd056e7e64e..5281f8e7051 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
 	(node)->rb_left = NULL;		\
 } while (0)
 #define RB_CLEAR_ROOT(root)	((root)->rb_node = NULL)
-#define ON_RB(node)		((node)->rb_color != RB_NONE)
 #define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)		(rq)->sector
 
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
 #undef CFQ_CFQQ_FNS
 
 enum cfq_rq_state_flags {
-	CFQ_CRQ_FLAG_in_flight = 0,
-	CFQ_CRQ_FLAG_in_driver,
-	CFQ_CRQ_FLAG_is_sync,
-	CFQ_CRQ_FLAG_requeued,
+	CFQ_CRQ_FLAG_is_sync = 0,
 };
 
 #define CFQ_CRQ_FNS(name)						\
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
 	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;	\
 }
 
-CFQ_CRQ_FNS(in_flight);
-CFQ_CRQ_FNS(in_driver);
 CFQ_CRQ_FNS(is_sync);
-CFQ_CRQ_FNS(requeued);
 #undef CFQ_CRQ_FNS
 
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
 
 #define process_sync(tsk)	((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
 	hlist_del_init(&crq->hash);
 }
 
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
-	cfq_del_crq_hash(crq);
-
-	if (q->last_merge == crq->request)
-		q->last_merge = NULL;
-}
-
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
 	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 	return NULL;
 }
 
-static inline int cfq_pending_requests(struct cfq_data *cfqd)
-{
-	return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-	if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+	if (!cfqd->rq_in_driver && cfqd->busy_queues)
 		kblockd_schedule_work(&cfqd->unplug_work);
 }
 
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	return !cfq_pending_requests(cfqd);
+	return !cfqd->busy_queues;
 }
 
 /*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 	if (crq2 == NULL)
 		return crq1;
 
-	if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
-		return crq1;
-	else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
-		return crq2;
-
 	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
 		return crq1;
 	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
 	struct rb_node *rbnext, *rbprev;
 
-	rbnext = NULL;
-	if (ON_RB(&last->rb_node))
-		rbnext = rb_next(&last->rb_node);
-	if (!rbnext) {
+	if (!(rbnext = rb_next(&last->rb_node))) {
 		rbnext = rb_first(&cfqq->sort_list);
 		if (rbnext == &last->rb_node)
 			rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
  * the pending list according to last request service
  */
 static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
 
-	cfq_resort_rr_list(cfqq, requeue);
+	cfq_resort_rr_list(cfqq, 0);
 }
 
 static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
 
-	if (ON_RB(&crq->rb_node)) {
-		struct cfq_data *cfqd = cfqq->cfqd;
-		const int sync = cfq_crq_is_sync(crq);
-
-		BUG_ON(!cfqq->queued[sync]);
-		cfqq->queued[sync]--;
+	BUG_ON(!cfqq->queued[sync]);
+	cfqq->queued[sync]--;
 
-		cfq_update_next_crq(crq);
+	cfq_update_next_crq(crq);
 
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		RB_CLEAR_COLOR(&crq->rb_node);
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	RB_CLEAR_COLOR(&crq->rb_node);
 
-		if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
-			cfq_del_cfqq_rr(cfqd, cfqq);
-	}
+	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
+		cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
 static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 	 * if that happens, put the alias on the dispatch list
 	 */
 	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
-		cfq_dispatch_sort(cfqd->queue, __alias);
+		cfq_dispatch_insert(cfqd->queue, __alias);
 
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
 	if (!cfq_cfqq_on_rr(cfqq))
-		cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
+		cfq_add_cfqq_rr(cfqd, cfqq);
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 static inline void
 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-	if (ON_RB(&crq->rb_node)) {
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		cfqq->queued[cfq_crq_is_sync(crq)]--;
-	}
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	cfqq->queued[cfq_crq_is_sync(crq)]--;
 
 	cfq_add_crq_rb(crq);
 }
@@ -676,49 +643,28 @@ out:
 	return NULL;
 }
 
-static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
+static void cfq_activate_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
-
-	if (crq) {
-		struct cfq_queue *cfqq = crq->cfq_queue;
 
-		if (cfq_crq_in_driver(crq)) {
-			cfq_clear_crq_in_driver(crq);
-			WARN_ON(!cfqd->rq_in_driver);
-			cfqd->rq_in_driver--;
-		}
-		if (cfq_crq_in_flight(crq)) {
-			const int sync = cfq_crq_is_sync(crq);
-
-			cfq_clear_crq_in_flight(crq);
-			WARN_ON(!cfqq->on_dispatch[sync]);
-			cfqq->on_dispatch[sync]--;
-		}
-		cfq_mark_crq_requeued(crq);
-	}
+	cfqd->rq_in_driver++;
 }
 
-/*
- * make sure the service time gets corrected on reissue of this request
- */
-static void cfq_requeue_request(request_queue_t *q, struct request *rq)
+static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	cfq_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+
+	WARN_ON(!cfqd->rq_in_driver);
+	cfqd->rq_in_driver--;
 }
 
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+static void cfq_remove_request(struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
 
-	if (crq) {
-		list_del_init(&rq->queuelist);
-		cfq_del_crq_rb(crq);
-		cfq_remove_merge_hints(q, crq);
-
-	}
+	list_del_init(&rq->queuelist);
+	cfq_del_crq_rb(crq);
+	cfq_del_crq_hash(crq);
 }
 
 static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 		ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	*req = __rq;
 	return ret;
 }
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
 	cfq_del_crq_hash(crq);
 	cfq_add_crq_hash(cfqd, crq);
 
-	if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
+	if (rq_rb_key(req) != crq->rb_key) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
 		cfq_update_next_crq(crq);
 		cfq_reposition_crq_rb(cfqq, crq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -785,7 +721,7 @@ cfq_merged_requests(request_queue_t *q, struct request *rq,
 	    time_before(next->start_time, rq->start_time))
 		list_move(&rq->queuelist, &next->queuelist);
 
-	cfq_remove_request(q, next);
+	cfq_remove_request(next);
 }
 
 static inline void
@@ -992,53 +928,15 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 1;
 }
 
-/*
- * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
- * this function sector sorts the selected request to minimize seeks. we start
- * at cfqd->last_sector, not 0.
- */
-static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct list_head *head = &q->queue_head, *entry = head;
-	struct request *__rq;
-	sector_t last;
-
-	list_del(&crq->request->queuelist);
-
-	last = cfqd->last_sector;
-	list_for_each_entry_reverse(__rq, head, queuelist) {
-		struct cfq_rq *__crq = RQ_DATA(__rq);
-
-		if (blk_barrier_rq(__rq))
-			break;
-		if (!blk_fs_request(__rq))
-			break;
-		if (cfq_crq_requeued(__crq))
-			break;
-
-		if (__rq->sector <= crq->request->sector)
-			break;
-		if (__rq->sector > last && crq->request->sector < last) {
-			last = crq->request->sector + crq->request->nr_sectors;
-			break;
-		}
-		entry = &__rq->queuelist;
-	}
-
-	cfqd->last_sector = last;
 
 	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
-
-	cfq_del_crq_rb(crq);
-	cfq_remove_merge_hints(q, crq);
-
-	cfq_mark_crq_in_flight(crq);
-	cfq_clear_crq_requeued(crq);
-
+	cfq_remove_request(crq->request);
 	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-	list_add_tail(&crq->request->queuelist, entry);
+	elv_dispatch_sort(q, crq->request);
 }
 
 /*
@@ -1159,7 +1057,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		/*
 		 * finally, insert request into driver dispatch list
 		 */
-		cfq_dispatch_sort(cfqd->queue, crq);
+		cfq_dispatch_insert(cfqd->queue, crq);
 
 		cfqd->dispatch_slice++;
 		dispatched++;
@@ -1194,7 +1092,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 static int
-cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+cfq_dispatch_requests(request_queue_t *q, int force)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
@@ -1204,12 +1102,25 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 
 	cfqq = cfq_select_queue(cfqd, force);
 	if (cfqq) {
+		int max_dispatch;
+
+		/*
+		 * if idle window is disabled, allow queue buildup
+		 */
+		if (!cfq_cfqq_idle_window(cfqq) &&
+		    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+			return 0;
+
 		cfq_clear_cfqq_must_dispatch(cfqq);
 		cfq_clear_cfqq_wait_request(cfqq);
 		del_timer(&cfqd->idle_slice_timer);
 
-		if (cfq_class_idle(cfqq))
-			max_dispatch = 1;
+		if (!force) {
+			max_dispatch = cfqd->cfq_quantum;
+			if (cfq_class_idle(cfqq))
+				max_dispatch = 1;
+		} else
+			max_dispatch = INT_MAX;
 
 		return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 	}
@@ -1217,93 +1128,6 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 	return 0;
 }
 
-static inline void cfq_account_dispatch(struct cfq_rq *crq)
-{
-	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct cfq_data *cfqd = cfqq->cfqd;
-
-	if (unlikely(!blk_fs_request(crq->request)))
-		return;
-
-	/*
-	 * accounted bit is necessary since some drivers will call
-	 * elv_next_request() many times for the same request (eg ide)
-	 */
-	if (cfq_crq_in_driver(crq))
-		return;
-
-	cfq_mark_crq_in_driver(crq);
-	cfqd->rq_in_driver++;
-}
-
-static inline void
-cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
-	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now;
-
-	if (!cfq_crq_in_driver(crq))
-		return;
-
-	now = jiffies;
-
-	WARN_ON(!cfqd->rq_in_driver);
-	cfqd->rq_in_driver--;
-
-	if (!cfq_class_idle(cfqq))
-		cfqd->last_end_request = now;
-
-	if (!cfq_cfqq_dispatched(cfqq)) {
-		if (cfq_cfqq_on_rr(cfqq)) {
-			cfqq->service_last = now;
-			cfq_resort_rr_list(cfqq, 0);
-		}
-		if (cfq_cfqq_expired(cfqq)) {
-			__cfq_slice_expired(cfqd, cfqq, 0);
-			cfq_schedule_dispatch(cfqd);
-		}
-	}
-
-	if (cfq_crq_is_sync(crq))
-		crq->io_context->last_end_request = now;
-}
-
-static struct request *cfq_next_request(request_queue_t *q)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct request *rq;
-
-	if (!list_empty(&q->queue_head)) {
-		struct cfq_rq *crq;
-dispatch:
-		rq = list_entry_rq(q->queue_head.next);
-
-		crq = RQ_DATA(rq);
-		if (crq) {
-			struct cfq_queue *cfqq = crq->cfq_queue;
-
-			/*
-			 * if idle window is disabled, allow queue buildup
-			 */
-			if (!cfq_crq_in_driver(crq) &&
-			    !cfq_cfqq_idle_window(cfqq) &&
-			    !blk_barrier_rq(rq) &&
-			    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
-				return NULL;
-
-			cfq_remove_merge_hints(q, crq);
-			cfq_account_dispatch(crq);
-		}
-
-		return rq;
-	}
-
-	if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
-		goto dispatch;
-
-	return NULL;
-}
-
 /*
  * task holds one reference to the queue, dropped when task exits. each crq
  * in-flight on this queue also holds a reference, dropped when crq is freed.
@@ -1422,7 +1246,7 @@ static void cfq_exit_io_context(struct cfq_io_context *cic)
 }
 
 static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask)
+cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
@@ -1517,7 +1341,7 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 
 static struct cfq_queue *
 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
-	      int gfp_mask)
+	      gfp_t gfp_mask)
 {
 	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1578,7 +1402,7 @@ out:
  * cfqq, so we don't need to worry about it disappearing
  */
 static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask)
+cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
 	struct cfq_io_context *cic;
@@ -1816,8 +1640,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	}
 }
 
-static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
+static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(rq);
 	struct cfq_queue *cfqq = crq->cfq_queue;
 
@@ -1827,66 +1652,43 @@ static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
 
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		cfq_add_crq_hash(cfqd, crq);
 
-		if (!cfqd->queue->last_merge)
-			cfqd->queue->last_merge = rq;
-	}
-
 	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
-static void
-cfq_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (cfq_dispatch_requests(q, INT_MAX, 1))
-				;
-			list_add_tail(&rq->queuelist, &q->queue_head);
-			/*
-			 * If we were idling with pending requests on
-			 * inactive cfqqs, force dispatching will
-			 * remove the idle timer and the queue won't
-			 * be kicked by __make_request() afterward.
-			 * Kick it here.
-			 */
-			cfq_schedule_dispatch(cfqd);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, &q->queue_head);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			cfq_enqueue(cfqd, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_queue *cfqq;
+	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
+	unsigned long now;
 
-	if (unlikely(!blk_fs_request(rq)))
-		return;
+	now = jiffies;
 
-	cfqq = crq->cfq_queue;
+	WARN_ON(!cfqd->rq_in_driver);
+	WARN_ON(!cfqq->on_dispatch[sync]);
+	cfqd->rq_in_driver--;
+	cfqq->on_dispatch[sync]--;
 
-	if (cfq_crq_in_flight(crq)) {
-		const int sync = cfq_crq_is_sync(crq);
+	if (!cfq_class_idle(cfqq))
+		cfqd->last_end_request = now;
 
-		WARN_ON(!cfqq->on_dispatch[sync]);
-		cfqq->on_dispatch[sync]--;
+	if (!cfq_cfqq_dispatched(cfqq)) {
+		if (cfq_cfqq_on_rr(cfqq)) {
+			cfqq->service_last = now;
+			cfq_resort_rr_list(cfqq, 0);
+		}
+		if (cfq_cfqq_expired(cfqq)) {
+			__cfq_slice_expired(cfqd, cfqq, 0);
+			cfq_schedule_dispatch(cfqd);
+		}
 	}
 
-	cfq_account_completion(cfqq, crq);
+	if (cfq_crq_is_sync(crq))
+		crq->io_context->last_end_request = now;
 }
 
 static struct request *
@@ -2075,7 +1877,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  */
 static int
 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		int gfp_mask)
+		gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -2118,9 +1920,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
-		cfq_clear_crq_in_flight(crq);
-		cfq_clear_crq_in_driver(crq);
-		cfq_clear_crq_requeued(crq);
 
 		if (rw == READ || process_sync(tsk))
 			cfq_mark_crq_is_sync(crq);
@@ -2201,7 +2000,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 		 * only expire and reinvoke request handler, if there are
 		 * other queues with pending requests
 		 */
-		if (!cfq_pending_requests(cfqd)) {
+		if (!cfqd->busy_queues) {
 			cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
 			add_timer(&cfqd->idle_slice_timer);
 			goto out_cont;
@@ -2576,10 +2375,9 @@ static struct elevator_type iosched_cfq = {
 		.elevator_merge_fn = 		cfq_merge,
 		.elevator_merged_fn =		cfq_merged_request,
 		.elevator_merge_req_fn =	cfq_merged_requests,
-		.elevator_next_req_fn =		cfq_next_request,
+		.elevator_dispatch_fn =		cfq_dispatch_requests,
 		.elevator_add_req_fn =		cfq_insert_request,
-		.elevator_remove_req_fn =	cfq_remove_request,
-		.elevator_requeue_req_fn =	cfq_requeue_request,
+		.elevator_activate_req_fn =	cfq_activate_request,
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
 		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_completed_req_fn =	cfq_completed_request,
@@ -2620,28 +2418,8 @@ static int __init cfq_init(void)
 
 static void __exit cfq_exit(void)
 {
-	struct task_struct *g, *p;
-	unsigned long flags;
-
-	read_lock_irqsave(&tasklist_lock, flags);
-
-	/*
-	 * iterate each process in the system, removing our io_context
-	 */
-	do_each_thread(g, p) {
-		struct io_context *ioc = p->io_context;
-
-		if (ioc && ioc->cic) {
-			ioc->cic->exit(ioc->cic);
-			cfq_free_io_context(ioc->cic);
-			ioc->cic = NULL;
-		}
-	} while_each_thread(g, p);
-
-	read_unlock_irqrestore(&tasklist_lock, flags);
-
-	cfq_slab_kill();
 	elv_unregister(&iosched_cfq);
+	cfq_slab_kill();
 }
 
 module_init(cfq_init);
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 52a3ae5289a..7929471d7df 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -50,7 +50,6 @@ struct deadline_data {
 	 * next in sort order. read, write or both are NULL
 	 */
 	struct deadline_rq *next_drq[2];
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
@@ -113,15 +112,6 @@ static inline void deadline_del_drq_hash(struct deadline_rq *drq)
 		__deadline_del_drq_hash(drq);
 }
 
-static void
-deadline_remove_merge_hints(request_queue_t *q, struct deadline_rq *drq)
-{
-	deadline_del_drq_hash(drq);
-
-	if (q->last_merge == drq->request)
-		q->last_merge = NULL;
-}
-
 static inline void
 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
 {
@@ -239,10 +229,9 @@ deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
 			dd->next_drq[data_dir] = rb_entry_drq(rbnext);
 	}
 
-	if (ON_RB(&drq->rb_node)) {
-		rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-		RB_CLEAR(&drq->rb_node);
-	}
+	BUG_ON(!ON_RB(&drq->rb_node));
+	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+	RB_CLEAR(&drq->rb_node);
 }
 
 static struct request *
@@ -286,7 +275,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
 /*
  * add drq to rbtree and fifo
  */
-static inline void
+static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
@@ -301,12 +290,8 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 	drq->expires = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		deadline_add_drq_hash(dd, drq);
-
-		if (!q->last_merge)
-			q->last_merge = rq;
-	}
 }
 
 /*
@@ -315,14 +300,11 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
 	struct deadline_rq *drq = RQ_DATA(rq);
+	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (drq) {
-		struct deadline_data *dd = q->elevator->elevator_data;
-
-		list_del_init(&drq->fifo);
-		deadline_remove_merge_hints(q, drq);
-		deadline_del_drq_rb(dd, drq);
-	}
+	list_del_init(&drq->fifo);
+	deadline_del_drq_rb(dd, drq);
+	deadline_del_drq_hash(drq);
 }
 
 static int
@@ -333,15 +315,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	int ret;
 
 	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
-	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
 	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
@@ -373,8 +346,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	if (ret)
 		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
@@ -399,8 +370,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 		deadline_del_drq_rb(dd, drq);
 		deadline_add_drq_rb(dd, drq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -452,7 +421,7 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
 	request_queue_t *q = drq->request->q;
 
 	deadline_remove_request(q, drq->request);
-	list_add_tail(&drq->request->queuelist, dd->dispatch);
+	elv_dispatch_add_tail(q, drq->request);
 }
 
 /*
@@ -502,8 +471,9 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
  * deadline_dispatch_requests selects the best request according to
  * read/write expire, fifo_batch, etc
  */
-static int deadline_dispatch_requests(struct deadline_data *dd)
+static int deadline_dispatch_requests(request_queue_t *q, int force)
 {
+	struct deadline_data *dd = q->elevator->elevator_data;
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 	struct deadline_rq *drq;
@@ -597,65 +567,12 @@ dispatch_request:
 	return 1;
 }
 
-static struct request *deadline_next_request(request_queue_t *q)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct request *rq;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first one
-	 */
-	if (!list_empty(dd->dispatch)) {
-dispatch:
-		rq = list_entry_rq(dd->dispatch->next);
-		return rq;
-	}
-
-	if (deadline_dispatch_requests(dd))
-		goto dispatch;
-
-	return NULL;
-}
-
-static void
-deadline_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT))
-		where = ELEVATOR_INSERT_BACK;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (deadline_dispatch_requests(dd))
-				;
-			list_add_tail(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			deadline_add_request(q, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static int deadline_queue_empty(request_queue_t *q)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (!list_empty(&dd->fifo_list[WRITE])
-	    || !list_empty(&dd->fifo_list[READ])
-	    || !list_empty(dd->dispatch))
-		return 0;
-
-	return 1;
+	return list_empty(&dd->fifo_list[WRITE])
+		&& list_empty(&dd->fifo_list[READ]);
 }
 
 static struct request *
@@ -733,7 +650,6 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
 	dd->sort_list[WRITE] = RB_ROOT;
-	dd->dispatch = &q->queue_head;
 	dd->fifo_expire[READ] = read_expire;
 	dd->fifo_expire[WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
@@ -748,15 +664,13 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
 
-	if (drq) {
-		mempool_free(drq, dd->drq_pool);
-		rq->elevator_private = NULL;
-	}
+	mempool_free(drq, dd->drq_pool);
+	rq->elevator_private = NULL;
 }
 
 static int
 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		     int gfp_mask)
+		     gfp_t gfp_mask)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;
@@ -917,9 +831,8 @@ static struct elevator_type iosched_deadline = {
 		.elevator_merge_fn = 		deadline_merge,
 		.elevator_merged_fn =		deadline_merged_request,
 		.elevator_merge_req_fn =	deadline_merged_requests,
-		.elevator_next_req_fn =		deadline_next_request,
-		.elevator_add_req_fn =		deadline_insert_request,
-		.elevator_remove_req_fn =	deadline_remove_request,
+		.elevator_dispatch_fn =		deadline_dispatch_requests,
+		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_queue_empty_fn =	deadline_queue_empty,
 		.elevator_former_req_fn =	deadline_former_request,
 		.elevator_latter_req_fn =	deadline_latter_request,
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 98f0126a2de..36f1057084b 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 
@@ -83,21 +84,11 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_try_merge);
 
-inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
-{
-	if (q->last_merge)
-		return elv_try_merge(q->last_merge, bio);
-
-	return ELEVATOR_NO_MERGE;
-}
-EXPORT_SYMBOL(elv_try_last_merge);
-
 static struct elevator_type *elevator_find(const char *name)
 {
 	struct elevator_type *e = NULL;
 	struct list_head *entry;
 
-	spin_lock_irq(&elv_list_lock);
 	list_for_each(entry, &elv_list) {
 		struct elevator_type *__e;
 
@@ -108,7 +99,6 @@ static struct elevator_type *elevator_find(const char *name)
 			break;
 		}
 	}
-	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -120,12 +110,15 @@ static void elevator_put(struct elevator_type *e)
 
 static struct elevator_type *elevator_get(const char *name)
 {
-	struct elevator_type *e = elevator_find(name);
+	struct elevator_type *e;
 
-	if (!e)
-		return NULL;
-	if (!try_module_get(e->elevator_owner))
-		return NULL;
+	spin_lock_irq(&elv_list_lock);
+
+	e = elevator_find(name);
+	if (e && !try_module_get(e->elevator_owner))
+		e = NULL;
+
+	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -139,8 +132,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
 	eq->ops = &e->ops;
 	eq->elevator_type = e;
 
-	INIT_LIST_HEAD(&q->queue_head);
-	q->last_merge = NULL;
 	q->elevator = eq;
 
 	if (eq->ops->elevator_init_fn)
@@ -153,23 +144,20 @@ static char chosen_elevator[16];
 
 static void elevator_setup_default(void)
 {
+	struct elevator_type *e;
+
 	/*
-	 * check if default is set and exists
+	 * If default has not been set, use the compiled-in selection.
 	 */
-	if (chosen_elevator[0] && elevator_find(chosen_elevator))
-		return;
-
-#if defined(CONFIG_IOSCHED_AS)
-	strcpy(chosen_elevator, "anticipatory");
-#elif defined(CONFIG_IOSCHED_DEADLINE)
-	strcpy(chosen_elevator, "deadline");
-#elif defined(CONFIG_IOSCHED_CFQ)
-	strcpy(chosen_elevator, "cfq");
-#elif defined(CONFIG_IOSCHED_NOOP)
-	strcpy(chosen_elevator, "noop");
-#else
-#error "You must build at least 1 IO scheduler into the kernel"
-#endif
+	if (!chosen_elevator[0])
+		strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
+
+ 	/*
+ 	 * If the given scheduler is not available, fall back to no-op.
+ 	 */
+ 	if (!(e = elevator_find(chosen_elevator)))
+ 		strcpy(chosen_elevator, "noop");
+	elevator_put(e);
 }
 
 static int __init elevator_setup(char *str)
@@ -186,6 +174,11 @@ int elevator_init(request_queue_t *q, char *name)
 	struct elevator_queue *eq;
 	int ret = 0;
 
+	INIT_LIST_HEAD(&q->queue_head);
+	q->last_merge = NULL;
+	q->end_sector = 0;
+	q->boundary_rq = NULL;
+
 	elevator_setup_default();
 
 	if (!name)
@@ -220,9 +213,52 @@ void elevator_exit(elevator_t *e)
 	kfree(e);
 }
 
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
+ * appended to the dispatch queue.  To be used by specific elevators.
+ */
+void elv_dispatch_sort(request_queue_t *q, struct request *rq)
+{
+	sector_t boundary;
+	struct list_head *entry;
+
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	boundary = q->end_sector;
+
+	list_for_each_prev(entry, &q->queue_head) {
+		struct request *pos = list_entry_rq(entry);
+
+		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+			break;
+		if (rq->sector >= boundary) {
+			if (pos->sector < boundary)
+				continue;
+		} else {
+			if (pos->sector >= boundary)
+				break;
+		}
+		if (rq->sector >= pos->sector)
+			break;
+	}
+
+	list_add(&rq->queuelist, entry);
+}
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
+	int ret;
+
+	if (q->last_merge) {
+		ret = elv_try_merge(q->last_merge, bio);
+		if (ret != ELEVATOR_NO_MERGE) {
+			*req = q->last_merge;
+			return ret;
+		}
+	}
 
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
@@ -236,6 +272,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
 
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq);
+
+	q->last_merge = rq;
 }
 
 void elv_merge_requests(request_queue_t *q, struct request *rq,
@@ -243,20 +281,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 {
 	elevator_t *e = q->elevator;
 
-	if (q->last_merge == next)
-		q->last_merge = NULL;
-
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
+
+	q->last_merge = rq;
 }
 
-/*
- * For careful internal use by the block layer. Essentially the same as
- * a requeue in that it tells the io scheduler that this request is not
- * active in the driver or hardware anymore, but we don't want the request
- * added back to the scheduler. Function is not exported.
- */
-void elv_deactivate_request(request_queue_t *q, struct request *rq)
+void elv_requeue_request(request_queue_t *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
 
@@ -264,19 +295,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq)
 	 * it already went through dequeue, we need to decrement the
 	 * in_flight count again
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
+		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
+			e->ops->elevator_deactivate_req_fn(q, rq);
+	}
 
 	rq->flags &= ~REQ_STARTED;
 
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
-void elv_requeue_request(request_queue_t *q, struct request *rq)
-{
-	elv_deactivate_request(q, rq);
-
 	/*
 	 * if this is the flush, requeue the original instead and drop the flush
 	 */
@@ -285,31 +311,27 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 		rq = rq->end_io_data;
 	}
 
-	/*
-	 * the request is prepped and may have some resources allocated.
-	 * allowing unprepped requests to pass this one may cause resource
-	 * deadlock.  turn on softbarrier.
-	 */
-	rq->flags |= REQ_SOFTBARRIER;
-
-	/*
-	 * if iosched has an explicit requeue hook, then use that. otherwise
-	 * just put the request at the front of the queue
-	 */
-	if (q->elevator->ops->elevator_requeue_req_fn)
-		q->elevator->ops->elevator_requeue_req_fn(q, rq);
-	else
-		__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
 }
 
 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
-	/*
-	 * barriers implicitly indicate back insertion
-	 */
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
-	    where == ELEVATOR_INSERT_SORT)
+	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * barriers implicitly indicate back insertion
+		 */
+		if (where == ELEVATOR_INSERT_SORT)
+			where = ELEVATOR_INSERT_BACK;
+
+		/*
+		 * this request is scheduling boundary, update end_sector
+		 */
+		if (blk_fs_request(rq)) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = rq;
+		}
+	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
 	if (plug)
@@ -317,23 +339,54 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 	rq->q = q;
 
-	if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
-		q->elevator->ops->elevator_add_req_fn(q, rq, where);
+	switch (where) {
+	case ELEVATOR_INSERT_FRONT:
+		rq->flags |= REQ_SOFTBARRIER;
 
-		if (blk_queue_plugged(q)) {
-			int nrq = q->rq.count[READ] + q->rq.count[WRITE]
-				  - q->in_flight;
+		list_add(&rq->queuelist, &q->queue_head);
+		break;
 
-			if (nrq >= q->unplug_thresh)
-				__generic_unplug_device(q);
-		}
-	} else
+	case ELEVATOR_INSERT_BACK:
+		rq->flags |= REQ_SOFTBARRIER;
+
+		while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+			;
+		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
-		 * if drain is set, store the request "locally". when the drain
-		 * is finished, the requests will be handed ordered to the io
-		 * scheduler
+		 * We kick the queue here for the following reasons.
+		 * - The elevator might have returned NULL previously
+		 *   to delay requests and returned them now.  As the
+		 *   queue wasn't empty before this request, ll_rw_blk
+		 *   won't run the queue on return, resulting in hang.
+		 * - Usually, back inserted requests won't be merged
+		 *   with anything.  There's no point in delaying queue
+		 *   processing.
 		 */
-		list_add_tail(&rq->queuelist, &q->drain_list);
+		blk_remove_plug(q);
+		q->request_fn(q);
+		break;
+
+	case ELEVATOR_INSERT_SORT:
+		BUG_ON(!blk_fs_request(rq));
+		rq->flags |= REQ_SORTED;
+		q->elevator->ops->elevator_add_req_fn(q, rq);
+		if (q->last_merge == NULL && rq_mergeable(rq))
+			q->last_merge = rq;
+		break;
+
+	default:
+		printk(KERN_ERR "%s: bad insertion point %d\n",
+		       __FUNCTION__, where);
+		BUG();
+	}
+
+	if (blk_queue_plugged(q)) {
+		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+			- q->in_flight;
+
+		if (nrq >= q->unplug_thresh)
+			__generic_unplug_device(q);
+	}
 }
 
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
@@ -348,13 +401,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
-	struct request *rq = q->elevator->ops->elevator_next_req_fn(q);
+	struct request *rq;
+
+	if (unlikely(list_empty(&q->queue_head) &&
+		     !q->elevator->ops->elevator_dispatch_fn(q, 0)))
+		return NULL;
+
+	rq = list_entry_rq(q->queue_head.next);
 
 	/*
 	 * if this is a barrier write and the device has to issue a
 	 * flush sequence to support it, check how far we are
 	 */
-	if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) {
+	if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
 		BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
 
 		if (q->ordered == QUEUE_ORDERED_FLUSH &&
@@ -371,15 +430,30 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		/*
-		 * just mark as started even if we don't start it, a request
-		 * that has been delayed should not be passed by new incoming
-		 * requests
-		 */
-		rq->flags |= REQ_STARTED;
+		if (!(rq->flags & REQ_STARTED)) {
+			elevator_t *e = q->elevator;
+
+			/*
+			 * This is the first time the device driver
+			 * sees this request (possibly after
+			 * requeueing).  Notify IO scheduler.
+			 */
+			if (blk_sorted_rq(rq) &&
+			    e->ops->elevator_activate_req_fn)
+				e->ops->elevator_activate_req_fn(q, rq);
+
+			/*
+			 * just mark as started even if we don't start
+			 * it, a request that has been delayed should
+			 * not be passed by new incoming requests
+			 */
+			rq->flags |= REQ_STARTED;
+		}
 
-		if (rq == q->last_merge)
-			q->last_merge = NULL;
+		if (!q->boundary_rq || q->boundary_rq == rq) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = NULL;
+		}
 
 		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
@@ -391,9 +465,9 @@ struct request *elv_next_request(request_queue_t *q)
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  turn on softbarrier.
+			 * avoid resource deadlock.  REQ_STARTED will
+			 * prevent other fs requests from passing this one.
 			 */
-			rq->flags |= REQ_SOFTBARRIER;
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
@@ -416,42 +490,32 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 }
 
-void elv_remove_request(request_queue_t *q, struct request *rq)
+void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	BUG_ON(list_empty(&rq->queuelist));
+
+	list_del_init(&rq->queuelist);
 
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
-	 * the driver side. note that we only account requests that the
-	 * driver has seen (REQ_STARTED set), to avoid false accounting
-	 * for request-request merges
+	 * the driver side.
 	 */
 	if (blk_account_rq(rq))
 		q->in_flight++;
-
-	/*
-	 * the main clearing point for q->last_merge is on retrieval of
-	 * request by driver (it calls elv_next_request()), but it _can_
-	 * also happen here if a request is added to the queue but later
-	 * deleted without ever being given to driver (merged with another
-	 * request).
-	 */
-	if (rq == q->last_merge)
-		q->last_merge = NULL;
-
-	if (e->ops->elevator_remove_req_fn)
-		e->ops->elevator_remove_req_fn(q, rq);
 }
 
 int elv_queue_empty(request_queue_t *q)
 {
 	elevator_t *e = q->elevator;
 
+	if (!list_empty(&q->queue_head))
+		return 0;
+
 	if (e->ops->elevator_queue_empty_fn)
 		return e->ops->elevator_queue_empty_fn(q);
 
-	return list_empty(&q->queue_head);
+	return 1;
 }
 
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
@@ -487,7 +551,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 }
 
 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		    int gfp_mask)
+		    gfp_t gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
@@ -523,11 +587,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
 	/*
 	 * request is released from the driver, io must be done
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
-
-	if (e->ops->elevator_completed_req_fn)
-		e->ops->elevator_completed_req_fn(q, rq);
+		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
+			e->ops->elevator_completed_req_fn(q, rq);
+	}
 }
 
 int elv_register_queue(struct request_queue *q)
@@ -555,10 +619,9 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+	spin_lock_irq(&elv_list_lock);
 	if (elevator_find(e->elevator_name))
 		BUG();
-
-	spin_lock_irq(&elv_list_lock);
 	list_add_tail(&e->list, &elv_list);
 	spin_unlock_irq(&elv_list_lock);
 
@@ -572,6 +635,27 @@ EXPORT_SYMBOL_GPL(elv_register);
 
 void elv_unregister(struct elevator_type *e)
 {
+	struct task_struct *g, *p;
+
+	/*
+	 * Iterate every thread in the process to remove the io contexts.
+	 */
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		struct io_context *ioc = p->io_context;
+		if (ioc && ioc->cic) {
+			ioc->cic->exit(ioc->cic);
+			ioc->cic->dtor(ioc->cic);
+			ioc->cic = NULL;
+		}
+		if (ioc && ioc->aic) {
+			ioc->aic->exit(ioc->aic);
+			ioc->aic->dtor(ioc->aic);
+			ioc->aic = NULL;
+		}
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+
 	spin_lock_irq(&elv_list_lock);
 	list_del_init(&e->list);
 	spin_unlock_irq(&elv_list_lock);
@@ -582,25 +666,36 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
  * we don't free the old io scheduler, before we have allocated what we
  * need for the new one. this way we have a chance of going back to the old
- * one, if the new one fails init for some reason. we also do an intermediate
- * switch to noop to ensure safety with stack-allocated requests, since they
- * don't originate from the block layer allocator. noop is safe here, because
- * it never needs to touch the elevator itself for completion events. DRAIN
- * flags will make sure we don't touch it for additions either.
+ * one, if the new one fails init for some reason.
  */
 static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 {
-	elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	struct elevator_type *noop_elevator = NULL;
-	elevator_t *old_elevator;
+	elevator_t *old_elevator, *e;
 
+	/*
+	 * Allocate new elevator
+	 */
+	e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
 	if (!e)
 		goto error;
 
 	/*
-	 * first step, drain requests from the block freelist
+	 * Turn on BYPASS and drain all requests w/ elevator private data
 	 */
-	blk_wait_queue_drained(q, 0);
+	spin_lock_irq(q->queue_lock);
+
+	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+		;
+
+	while (q->rq.elvpriv) {
+		spin_unlock_irq(q->queue_lock);
+		msleep(10);
+		spin_lock_irq(q->queue_lock);
+	}
+
+	spin_unlock_irq(q->queue_lock);
 
 	/*
 	 * unregister old elevator data
@@ -609,18 +704,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	old_elevator = q->elevator;
 
 	/*
- 	 * next step, switch to noop since it uses no private rq structures
-	 * and doesn't allocate any memory for anything. then wait for any
-	 * non-fs requests in-flight
- 	 */
-	noop_elevator = elevator_get("noop");
-	spin_lock_irq(q->queue_lock);
-	elevator_attach(q, noop_elevator, e);
-	spin_unlock_irq(q->queue_lock);
-
-	blk_wait_queue_drained(q, 1);
-
-	/*
 	 * attach and start new elevator
 	 */
 	if (elevator_attach(q, new_e, e))
@@ -630,11 +713,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 		goto fail_register;
 
 	/*
-	 * finally exit old elevator and start queue again
+	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
-	blk_finish_queue_drain(q);
-	elevator_put(noop_elevator);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 	return;
 
 fail_register:
@@ -643,13 +725,13 @@ fail_register:
 	 * one again (along with re-adding the sysfs dir)
 	 */
 	elevator_exit(e);
+	e = NULL;
 fail:
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	blk_finish_queue_drain(q);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	kfree(e);
 error:
-	if (noop_elevator)
-		elevator_put(noop_elevator);
 	elevator_put(new_e);
 	printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
 }
@@ -671,8 +753,10 @@ ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
 		return -EINVAL;
 	}
 
-	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name))
+	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
+		elevator_put(e);
 		return count;
+	}
 
 	elevator_switch(q, e);
 	return count;
@@ -701,11 +785,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
 EXPORT_SYMBOL(elv_add_request);
 EXPORT_SYMBOL(__elv_add_request);
 EXPORT_SYMBOL(elv_requeue_request);
 EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_remove_request);
+EXPORT_SYMBOL(elv_dequeue_request);
 EXPORT_SYMBOL(elv_queue_empty);
 EXPORT_SYMBOL(elv_completed_request);
 EXPORT_SYMBOL(elevator_exit);
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index d42840cc0d1..486ce1fdeb8 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -337,10 +337,30 @@ static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
+static ssize_t disk_attr_store(struct kobject * kobj, struct attribute * attr,
+			       const char *page, size_t count)
+{
+	struct gendisk *disk = to_disk(kobj);
+	struct disk_attribute *disk_attr =
+		container_of(attr,struct disk_attribute,attr);
+	ssize_t ret = 0;
+
+	if (disk_attr->store)
+		ret = disk_attr->store(disk, page, count);
+	return ret;
+}
+
 static struct sysfs_ops disk_sysfs_ops = {
 	.show	= &disk_attr_show,
+	.store	= &disk_attr_store,
 };
 
+static ssize_t disk_uevent_store(struct gendisk * disk,
+				 const char *buf, size_t count)
+{
+	kobject_hotplug(&disk->kobj, KOBJ_ADD);
+	return count;
+}
 static ssize_t disk_dev_read(struct gendisk * disk, char *page)
 {
 	dev_t base = MKDEV(disk->major, disk->first_minor); 
@@ -382,6 +402,10 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page)
 		jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
 		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
 }
+static struct disk_attribute disk_attr_uevent = {
+	.attr = {.name = "uevent", .mode = S_IWUSR },
+	.store	= disk_uevent_store
+};
 static struct disk_attribute disk_attr_dev = {
 	.attr = {.name = "dev", .mode = S_IRUGO },
 	.show	= disk_dev_read
@@ -404,6 +428,7 @@ static struct disk_attribute disk_attr_stat = {
 };
 
 static struct attribute * default_attrs[] = {
+	&disk_attr_uevent.attr,
 	&disk_attr_dev.attr,
 	&disk_attr_range.attr,
 	&disk_attr_removable.attr,
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index baedac52294..0af73512b9a 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	blk_queue_activity_fn(q, NULL, NULL);
-
-	INIT_LIST_HEAD(&q->drain_list);
 }
 
 EXPORT_SYMBOL(blk_queue_make_request);
@@ -353,6 +351,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_PREFLUSH;
 
 	if (!flush_rq->errors)
@@ -369,6 +369,8 @@ static void blk_post_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_POSTFLUSH;
 
 	q->end_flush_fn(q, flush_rq);
@@ -408,8 +410,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
 	if (!list_empty(&rq->queuelist))
 		blkdev_dequeue_request(rq);
 
-	elv_deactivate_request(q, rq);
-
 	flush_rq->end_io_data = rq;
 	flush_rq->end_io = blk_pre_flush_end_io;
 
@@ -1040,6 +1040,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags);
 static char *rq_flags[] = {
 	"REQ_RW",
 	"REQ_FAILFAST",
+	"REQ_SORTED",
 	"REQ_SOFTBARRIER",
 	"REQ_HARDBARRIER",
 	"REQ_CMD",
@@ -1047,6 +1048,7 @@ static char *rq_flags[] = {
 	"REQ_STARTED",
 	"REQ_DONTPREP",
 	"REQ_QUEUED",
+	"REQ_ELVPRIV",
 	"REQ_PC",
 	"REQ_BLOCK_PC",
 	"REQ_SENSE",
@@ -1637,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
 
 	rl->count[READ] = rl->count[WRITE] = 0;
 	rl->starved[READ] = rl->starved[WRITE] = 0;
+	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
-	init_waitqueue_head(&rl->drain);
 
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
@@ -1652,13 +1654,13 @@ static int blk_init_free_list(request_queue_t *q)
 
 static int __make_request(request_queue_t *, struct bio *);
 
-request_queue_t *blk_alloc_queue(int gfp_mask)
+request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id)
+request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	request_queue_t *q;
 
@@ -1782,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-	elv_put_request(q, rq);
+	if (rq->flags & REQ_ELVPRIV)
+		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
+		  int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -1800,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 	 */
 	rq->flags = rw;
 
-	if (!elv_set_request(q, rq, bio, gfp_mask))
-		return rq;
+	if (priv) {
+		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		rq->flags |= REQ_ELVPRIV;
+	}
 
-	mempool_free(rq, q->rq.rq_pool);
-	return NULL;
+	return rq;
 }
 
 /*
@@ -1860,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(request_queue_t *q, int rw)
+static void freed_request(request_queue_t *q, int rw, int priv)
 {
 	struct request_list *rl = &q->rq;
 
 	rl->count[rw]--;
+	if (priv)
+		rl->elvpriv--;
 
 	__freed_request(q, rw);
 
 	if (unlikely(rl->starved[rw ^ 1]))
 		__freed_request(q, rw ^ 1);
-
-	if (!rl->count[READ] && !rl->count[WRITE]) {
-		smp_mb();
-		if (unlikely(waitqueue_active(&rl->drain)))
-			wake_up(&rl->drain);
-	}
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1885,14 +1889,12 @@ static void freed_request(request_queue_t *q, int rw)
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
-				   int gfp_mask)
+				   gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = current_io_context(GFP_ATOMIC);
-
-	if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
-		goto out;
+	int priv;
 
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
@@ -1937,9 +1939,14 @@ get_rq:
 	rl->starved[rw] = 0;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
+
+	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	if (priv)
+		rl->elvpriv++;
+
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, bio, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1949,7 +1956,7 @@ get_rq:
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 
 		/*
 		 * in the very unlikely event that allocation failed and no
@@ -2019,7 +2026,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
+struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -2251,7 +2258,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
  * @gfp_mask:	memory allocation flags
  */
 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
-		    unsigned int len, unsigned int gfp_mask)
+		    unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 
@@ -2433,13 +2440,15 @@ void disk_round_stats(struct gendisk *disk)
 {
 	unsigned long now = jiffies;
 
-	__disk_stat_add(disk, time_in_queue,
-			disk->in_flight * (now - disk->stamp));
-	disk->stamp = now;
+	if (now == disk->stamp)
+		return;
 
-	if (disk->in_flight)
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
-	disk->stamp_idle = now;
+	if (disk->in_flight) {
+		__disk_stat_add(disk, time_in_queue,
+				disk->in_flight * (now - disk->stamp));
+		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
+	}
+	disk->stamp = now;
 }
 
 /*
@@ -2454,6 +2463,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	if (unlikely(--req->ref_count))
 		return;
 
+	elv_completed_request(q, req);
+
 	req->rq_status = RQ_INACTIVE;
 	req->rl = NULL;
 
@@ -2463,26 +2474,25 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
-
-		elv_completed_request(q, req);
+		int priv = req->flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 
 		blk_free_request(q, req);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 	}
 }
 
 void blk_put_request(struct request *req)
 {
+	unsigned long flags;
+	request_queue_t *q = req->q;
+
 	/*
-	 * if req->rl isn't set, this request didnt originate from the
-	 * block layer, so it's safe to just disregard it
+	 * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
+	 * following if (q) test.
 	 */
-	if (req->rl) {
-		unsigned long flags;
-		request_queue_t *q = req->q;
-
+	if (q) {
 		spin_lock_irqsave(q->queue_lock, flags);
 		__blk_put_request(q, req);
 		spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2797,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
 	}
 }
 
-void blk_finish_queue_drain(request_queue_t *q)
-{
-	struct request_list *rl = &q->rq;
-	struct request *rq;
-	int requeued = 0;
-
-	spin_lock_irq(q->queue_lock);
-	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (!list_empty(&q->drain_list)) {
-		rq = list_entry_rq(q->drain_list.next);
-
-		list_del_init(&rq->queuelist);
-		elv_requeue_request(q, rq);
-		requeued++;
-	}
-
-	if (requeued)
-		q->request_fn(q);
-
-	spin_unlock_irq(q->queue_lock);
-
-	wake_up(&rl->wait[0]);
-	wake_up(&rl->wait[1]);
-	wake_up(&rl->drain);
-}
-
-static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
-{
-	int wait = rl->count[READ] + rl->count[WRITE];
-
-	if (dispatch)
-		wait += !list_empty(&q->queue_head);
-
-	return wait;
-}
-
-/*
- * We rely on the fact that only requests allocated through blk_alloc_request()
- * have io scheduler private data structures associated with them. Any other
- * type of request (allocated on stack or through kmalloc()) should not go
- * to the io scheduler core, but be attached to the queue head instead.
- */
-void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
-{
-	struct request_list *rl = &q->rq;
-	DEFINE_WAIT(wait);
-
-	spin_lock_irq(q->queue_lock);
-	set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (wait_drain(q, rl, wait_dispatch)) {
-		prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
-
-		if (wait_drain(q, rl, wait_dispatch)) {
-			__generic_unplug_device(q);
-			spin_unlock_irq(q->queue_lock);
-			io_schedule();
-			spin_lock_irq(q->queue_lock);
-		}
-
-		finish_wait(&rl->drain, &wait);
-	}
-
-	spin_unlock_irq(q->queue_lock);
-}
-
-/*
- * block waiting for the io scheduler being started again.
- */
-static inline void block_wait_queue_running(request_queue_t *q)
-{
-	DEFINE_WAIT(wait);
-
-	while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
-		struct request_list *rl = &q->rq;
-
-		prepare_to_wait_exclusive(&rl->drain, &wait,
-				TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * re-check the condition. avoids using prepare_to_wait()
-		 * in the fast path (queue is running)
-		 */
-		if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
-			io_schedule();
-
-		finish_wait(&rl->drain, &wait);
-	}
-}
-
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
@@ -2983,8 +2902,6 @@ end_io:
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 
-		block_wait_queue_running(q);
-
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
@@ -3393,7 +3310,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(int gfp_flags)
+struct io_context *current_io_context(gfp_t gfp_flags)
 {
 	struct task_struct *tsk = current;
 	struct io_context *ret;
@@ -3424,7 +3341,7 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(int gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags)
 {
 	struct io_context *ret;
 	ret = current_io_context(gfp_flags);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b35e08876dd..96c664af8d0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -881,7 +881,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 {
 	struct file *filp = lo->lo_backing_file;
-	int gfp = lo->old_gfp_mask;
+	gfp_t gfp = lo->old_gfp_mask;
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c
index b1730b62c37..f56b8edb06e 100644
--- a/drivers/block/noop-iosched.c
+++ b/drivers/block/noop-iosched.c
@@ -7,57 +7,19 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-/*
- * See if we can find a request that this buffer can be coalesced with.
- */
-static int elevator_noop_merge(request_queue_t *q, struct request **req,
-			       struct bio *bio)
-{
-	int ret;
-
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE)
-		*req = q->last_merge;
-
-	return ret;
-}
-
-static void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
-					 struct request *next)
-{
-	list_del_init(&next->queuelist);
-}
-
-static void elevator_noop_add_request(request_queue_t *q, struct request *rq,
-				      int where)
+static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
 {
-	if (where == ELEVATOR_INSERT_FRONT)
-		list_add(&rq->queuelist, &q->queue_head);
-	else
-		list_add_tail(&rq->queuelist, &q->queue_head);
-
-	/*
-	 * new merges must not precede this barrier
-	 */
-	if (rq->flags & REQ_HARDBARRIER)
-		q->last_merge = NULL;
-	else if (!q->last_merge)
-		q->last_merge = rq;
+	elv_dispatch_add_tail(q, rq);
 }
 
-static struct request *elevator_noop_next_request(request_queue_t *q)
+static int elevator_noop_dispatch(request_queue_t *q, int force)
 {
-	if (!list_empty(&q->queue_head))
-		return list_entry_rq(q->queue_head.next);
-
-	return NULL;
+	return 0;
 }
 
 static struct elevator_type elevator_noop = {
 	.ops = {
-		.elevator_merge_fn		= elevator_noop_merge,
-		.elevator_merge_req_fn		= elevator_noop_merge_requests,
-		.elevator_next_req_fn		= elevator_noop_next_request,
+		.elevator_dispatch_fn		= elevator_noop_dispatch,
 		.elevator_add_req_fn		= elevator_noop_add_request,
 	},
 	.elevator_name = "noop",
diff --git a/drivers/block/paride/paride.c b/drivers/block/paride/paride.c
index 1fef136c0e4..ce94aa11f6a 100644
--- a/drivers/block/paride/paride.c
+++ b/drivers/block/paride/paride.c
@@ -29,6 +29,7 @@
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/sched.h>	/* TASK_* */
 
 #ifdef CONFIG_PARPORT_MODULE
 #define CONFIG_PARPORT
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 94af920465b..e9746af29b9 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -807,10 +807,6 @@ static int pf_next_buf(void)
 		return 1;
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
 	pf_end_request(1);
-	if (pf_req) {
-		pf_count = pf_req->current_nr_sectors;
-		pf_buf = pf_req->buffer;
-	}
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 	return 1;
 }
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index b3982395f22..6f5df0fad70 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -162,6 +162,8 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
 #include <linux/mtio.h>
 #include <linux/pg.h>
 #include <linux/device.h>
+#include <linux/sched.h>	/* current, TASK_* */
+#include <linux/jiffies.h>
 
 #include <asm/uaccess.h>
 
@@ -674,7 +676,7 @@ static int __init pg_init(void)
 	for (unit = 0; unit < PG_UNITS; unit++) {
 		struct pg *dev = &devices[unit];
 		if (dev->present) {
-			class_device_create(pg_class, MKDEV(major, unit),
+			class_device_create(pg_class, NULL, MKDEV(major, unit),
 					NULL, "pg%u", unit);
 			err = devfs_mk_cdev(MKDEV(major, unit),
 				      S_IFCHR | S_IRUSR | S_IWUSR, "pg/%u",
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index d8d35233cf4..715ae5dc88f 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -146,6 +146,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
 #include <linux/slab.h>
 #include <linux/mtio.h>
 #include <linux/device.h>
+#include <linux/sched.h>	/* current, TASK_*, schedule_timeout() */
 
 #include <asm/uaccess.h>
 
@@ -971,7 +972,7 @@ static int __init pt_init(void)
 	devfs_mk_dir("pt");
 	for (unit = 0; unit < PT_UNITS; unit++)
 		if (pt[unit].present) {
-			class_device_create(pt_class, MKDEV(major, unit),
+			class_device_create(pt_class, NULL, MKDEV(major, unit),
 					NULL, "pt%d", unit);
 			err = devfs_mk_cdev(MKDEV(major, unit),
 				      S_IFCHR | S_IRUSR | S_IWUSR,
@@ -980,7 +981,7 @@ static int __init pt_init(void)
 				class_device_destroy(pt_class, MKDEV(major, unit));
 				goto out_class;
 			}
-			class_device_create(pt_class, MKDEV(major, unit + 128),
+			class_device_create(pt_class, NULL, MKDEV(major, unit + 128),
 					NULL, "pt%dn", unit);
 			err = devfs_mk_cdev(MKDEV(major, unit + 128),
 				      S_IFCHR | S_IRUSR | S_IWUSR,
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 145c1fbffe0..68c60a5bcda 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -348,7 +348,7 @@ static int rd_open(struct inode *inode, struct file *filp)
 		struct block_device *bdev = inode->i_bdev;
 		struct address_space *mapping;
 		unsigned bsize;
-		int gfp_mask;
+		gfp_t gfp_mask;
 
 		inode = igrab(bdev->bd_inode);
 		rd_bdev[unit] = bdev;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index d57007b92f7..1ded3b43345 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1,7 +1,7 @@
 /*
  *  sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
  *
- *  Copyright 2004 Red Hat, Inc.
+ *  Copyright 2004-2005 Red Hat, Inc.
  *
  *  Author/maintainer:  Jeff Garzik <jgarzik@pobox.com>
  *
@@ -31,10 +31,6 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
-MODULE_AUTHOR("Jeff Garzik");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Promise SATA SX8 block driver");
-
 #if 0
 #define CARM_DEBUG
 #define CARM_VERBOSE_DEBUG
@@ -45,9 +41,35 @@ MODULE_DESCRIPTION("Promise SATA SX8 block driver");
 #undef CARM_NDEBUG
 
 #define DRV_NAME "sx8"
-#define DRV_VERSION "0.8"
+#define DRV_VERSION "1.0"
 #define PFX DRV_NAME ": "
 
+MODULE_AUTHOR("Jeff Garzik");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Promise SATA SX8 block driver");
+MODULE_VERSION(DRV_VERSION);
+
+/*
+ * SX8 hardware has a single message queue for all ATA ports.
+ * When this driver was written, the hardware (firmware?) would
+ * corrupt data eventually, if more than one request was outstanding.
+ * As one can imagine, having 8 ports bottlenecking on a single
+ * command hurts performance.
+ *
+ * Based on user reports, later versions of the hardware (firmware?)
+ * seem to be able to survive with more than one command queued.
+ *
+ * Therefore, we default to the safe option -- 1 command -- but
+ * allow the user to increase this.
+ *
+ * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
+ * but problems seem to occur when you exceed ~30, even on newer hardware.
+ */
+static int max_queue = 1;
+module_param(max_queue, int, 0444);
+MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
+
+
 #define NEXT_RESP(idx)	((idx + 1) % RMSG_Q_LEN)
 
 /* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
@@ -90,12 +112,10 @@ enum {
 
 	/* command message queue limits */
 	CARM_MAX_REQ		= 64,	       /* max command msgs per host */
-	CARM_MAX_Q		= 1,		   /* one command at a time */
 	CARM_MSG_LOW_WATER	= (CARM_MAX_REQ / 4),	     /* refill mark */
 
 	/* S/G limits, host-wide and per-request */
 	CARM_MAX_REQ_SG		= 32,	     /* max s/g entries per request */
-	CARM_SG_BOUNDARY	= 0xffffUL,	    /* s/g segment boundary */
 	CARM_MAX_HOST_SG	= 600,		/* max s/g entries per host */
 	CARM_SG_LOW_WATER	= (CARM_MAX_HOST_SG / 4),   /* re-fill mark */
 
@@ -181,6 +201,10 @@ enum {
 	FL_DYN_MAJOR		= (1 << 17),
 };
 
+enum {
+	CARM_SG_BOUNDARY	= 0xffffUL,	    /* s/g segment boundary */
+};
+
 enum scatter_gather_types {
 	SGT_32BIT		= 0,
 	SGT_64BIT		= 1,
@@ -218,7 +242,6 @@ static const char *state_name[] = {
 
 struct carm_port {
 	unsigned int			port_no;
-	unsigned int			n_queued;
 	struct gendisk			*disk;
 	struct carm_host		*host;
 
@@ -448,7 +471,7 @@ static inline int carm_lookup_bucket(u32 msg_size)
 	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
 		if (msg_size <= msg_sizes[i])
 			return i;
-	
+
 	return -ENOENT;
 }
 
@@ -509,7 +532,7 @@ static struct carm_request *carm_get_request(struct carm_host *host)
 	if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))
 		return NULL;
 
-	for (i = 0; i < CARM_MAX_Q; i++)
+	for (i = 0; i < max_queue; i++)
 		if ((host->msg_alloc & (1ULL << i)) == 0) {
 			struct carm_request *crq = &host->req[i];
 			crq->port = NULL;
@@ -521,14 +544,14 @@ static struct carm_request *carm_get_request(struct carm_host *host)
 			assert(host->n_msgs <= CARM_MAX_REQ);
 			return crq;
 		}
-	
+
 	DPRINTK("no request available, returning NULL\n");
 	return NULL;
 }
 
 static int carm_put_request(struct carm_host *host, struct carm_request *crq)
 {
-	assert(crq->tag < CARM_MAX_Q);
+	assert(crq->tag < max_queue);
 
 	if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))
 		return -EINVAL; /* tried to clear a tag that was not active */
@@ -791,7 +814,7 @@ static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
 			int is_ok)
 {
 	carm_end_request_queued(host, crq, is_ok);
-	if (CARM_MAX_Q == 1)
+	if (max_queue == 1)
 		carm_round_robin(host);
 	else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&
 		 (host->hw_sg_used <= CARM_SG_LOW_WATER)) {
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index ed4d5006fe6..bfb23d543ff 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1512,7 +1512,7 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	scmd->nsg = 1;
 	sg = &scmd->sgv[0];
 	sg->page = virt_to_page(sc->top_sense);
-	sg->offset = (unsigned int)sc->top_sense & (PAGE_SIZE-1);
+	sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
 	sg->length = UB_SENSE_SIZE;
 	scmd->len = UB_SENSE_SIZE;
 	scmd->lun = cmd->lun;
@@ -1891,7 +1891,7 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
 	cmd->nsg = 1;
 	sg = &cmd->sgv[0];
 	sg->page = virt_to_page(p);
-	sg->offset = (unsigned int)p & (PAGE_SIZE-1);
+	sg->offset = (unsigned long)p & (PAGE_SIZE-1);
 	sg->length = 8;
 	cmd->len = 8;
 	cmd->lun = lun;
author	Paul Mackerras <paulus@samba.org>	2005-10-31 13:37:12 +1100
committer	Paul Mackerras <paulus@samba.org>	2005-10-31 13:37:12 +1100
commit	23fd07750a789a66fe88cf173d52a18f1a387da4 (patch)
tree	06fdd6df35fdb835abdaa9b754d62f6b84b97250 /drivers/block
parent	bd787d438a59266af3c9f6351644c85ef1dd21fe (diff)
parent	ed28f96ac1960f30f818374d65be71d2fdf811b0 (diff)