diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 20 | ||||
-rw-r--r-- | block/Kconfig.iosched | 3 | ||||
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/as-iosched.c | 672 | ||||
-rw-r--r-- | block/blktrace.c | 26 | ||||
-rw-r--r-- | block/cfq-iosched.c | 865 | ||||
-rw-r--r-- | block/deadline-iosched.c | 464 | ||||
-rw-r--r-- | block/elevator.c | 315 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 236 | ||||
-rw-r--r-- | block/noop-iosched.c | 2 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 6 |
11 files changed, 919 insertions, 1692 deletions
diff --git a/block/Kconfig b/block/Kconfig index b6f5f0a7965..9af6c614dfd 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -1,6 +1,24 @@ # # Block layer core configuration # +config BLOCK + bool "Enable the block layer" + default y + help + This permits the block layer to be removed from the kernel if it's not + needed (on some embedded devices for example). If this option is + disabled, then blockdev files will become unusable and some + filesystems (such as ext3) will become unavailable. + + This option will also disable SCSI character devices and USB storage + since they make use of various block layer definitions and + facilities. + + Say Y here unless you know you really don't want to mount disks and + suchlike. + +if BLOCK + #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64 #for instance. config LBD @@ -33,4 +51,6 @@ config LSF If unsure, say Y. +endif + source block/Kconfig.iosched diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 48d090e266f..903f0d3b685 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -1,3 +1,4 @@ +if BLOCK menu "IO Schedulers" @@ -67,3 +68,5 @@ config DEFAULT_IOSCHED default "noop" if DEFAULT_NOOP endmenu + +endif diff --git a/block/Makefile b/block/Makefile index c05de0e0037..4b84d0d5947 100644 --- a/block/Makefile +++ b/block/Makefile @@ -2,7 +2,7 @@ # Makefile for the kernel block layer # -obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o +obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_AS) += as-iosched.o diff --git a/block/as-iosched.c b/block/as-iosched.c index 5da56d48fbd..165509e8659 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1,7 +1,7 @@ /* * Anticipatory & deadline i/o scheduler. * - * Copyright (C) 2002 Jens Axboe <axboe@suse.de> + * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk> * Nick Piggin <nickpiggin@yahoo.com.au> * */ @@ -14,7 +14,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> -#include <linux/hash.h> #include <linux/rbtree.h> #include <linux/interrupt.h> @@ -93,9 +92,8 @@ struct as_data { struct rb_root sort_list[2]; struct list_head fifo_list[2]; - struct as_rq *next_arq[2]; /* next in sort order */ + struct request *next_rq[2]; /* next in sort order */ sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ - struct hlist_head *hash; /* request hash */ unsigned long exit_prob; /* probability a task will exit while being waited on */ @@ -115,7 +113,6 @@ struct as_data { int write_batch_count; /* max # of reqs in a write batch */ int current_write_count; /* how many requests left this batch */ int write_batch_idled; /* has the write batch gone idle? */ - mempool_t *arq_pool; enum anticipation_status antic_status; unsigned long antic_start; /* jiffies: when it started */ @@ -133,8 +130,6 @@ struct as_data { unsigned long antic_expire; }; -#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) - /* * per-request data. */ @@ -150,40 +145,14 @@ enum arq_state { AS_RQ_POSTSCHED, /* when they shouldn't be */ }; -struct as_rq { - /* - * rbtree index, key is the starting offset - */ - struct rb_node rb_node; - sector_t rb_key; - - struct request *request; - - struct io_context *io_context; /* The submitting task */ - - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - - /* - * expire fifo - */ - struct list_head fifo; - unsigned long expires; +#define RQ_IOC(rq) ((struct io_context *) (rq)->elevator_private) +#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2) +#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state) - unsigned int is_sync; - enum arq_state state; -}; - -#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private) - -static kmem_cache_t *arq_pool; - -static atomic_t ioc_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; -static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +static void as_move_to_dispatch(struct as_data *ad, struct request *rq); static void as_antic_stop(struct as_data *ad); /* @@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad); static void free_as_io_context(struct as_io_context *aic) { kfree(aic); - if (atomic_dec_and_test(&ioc_count) && ioc_gone) + elv_ioc_count_dec(ioc_count); + if (ioc_gone && !elv_ioc_count_read(ioc_count)) complete(ioc_gone); } @@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void) ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; - atomic_inc(&ioc_count); + elv_ioc_count_inc(ioc_count); } return ret; @@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void) * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */ -static struct io_context *as_get_io_context(void) +static struct io_context *as_get_io_context(int node) { - struct io_context *ioc = get_io_context(GFP_ATOMIC); + struct io_context *ioc = get_io_context(GFP_ATOMIC, node); if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); if (!ioc->aic) { @@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void) return ioc; } -static void as_put_io_context(struct as_rq *arq) +static void as_put_io_context(struct request *rq) { struct as_io_context *aic; - if (unlikely(!arq->io_context)) + if (unlikely(!RQ_IOC(rq))) return; - aic = arq->io_context->aic; + aic = RQ_IOC(rq)->aic; - if (arq->is_sync == REQ_SYNC && aic) { + if (rq_is_sync(rq) && aic) { spin_lock(&aic->lock); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; spin_unlock(&aic->lock); } - put_io_context(arq->io_context); -} - -/* - * the back merge hash support functions - */ -static const int as_hash_shift = 6; -#define AS_HASH_BLOCK(sec) ((sec) >> 3) -#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) -#define AS_HASH_ENTRIES (1 << as_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) - -static inline void __as_del_arq_hash(struct as_rq *arq) -{ - hlist_del_init(&arq->hash); -} - -static inline void as_del_arq_hash(struct as_rq *arq) -{ - if (!hlist_unhashed(&arq->hash)) - __as_del_arq_hash(arq); -} - -static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - - BUG_ON(!hlist_unhashed(&arq->hash)); - - hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; - - if (hlist_unhashed(&arq->hash)) { - WARN_ON(1); - return; - } - - if (&arq->hash != head->first) { - hlist_del(&arq->hash); - hlist_add_head(&arq->hash, head); - } -} - -static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) -{ - struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct as_rq *arq; - - hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) { - struct request *__rq = arq->request; - - BUG_ON(hlist_unhashed(&arq->hash)); - - if (!rq_mergeable(__rq)) { - as_del_arq_hash(arq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; + put_io_context(RQ_IOC(rq)); } /* * rb tree support functions */ -#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) -#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) -#define rq_rb_key(rq) (rq)->sector - -/* - * as_find_first_arq finds the first (lowest sector numbered) request - * for the specified data_dir. Used to sweep back to the start of the disk - * (1-way elevator) after we process the last (highest sector) request. - */ -static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) -{ - struct rb_node *n = ad->sort_list[data_dir].rb_node; - - if (n == NULL) - return NULL; - - for (;;) { - if (n->rb_left == NULL) - return rb_entry_arq(n); - - n = n->rb_left; - } -} - -/* - * Add the request to the rb tree if it is unique. If there is an alias (an - * existing request against the same sector), which can happen when using - * direct IO, then return the alias. - */ -static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) -{ - struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; - struct rb_node *parent = NULL; - struct as_rq *__arq; - struct request *rq = arq->request; - - arq->rb_key = rq_rb_key(rq); - - while (*p) { - parent = *p; - __arq = rb_entry_arq(parent); - - if (arq->rb_key < __arq->rb_key) - p = &(*p)->rb_left; - else if (arq->rb_key > __arq->rb_key) - p = &(*p)->rb_right; - else - return __arq; - } - - rb_link_node(&arq->rb_node, parent, p); - rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); - - return NULL; -} +#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))]) -static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +static void as_add_rq_rb(struct as_data *ad, struct request *rq) { - struct as_rq *alias; + struct request *alias; - while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { + while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) { as_move_to_dispatch(ad, alias); as_antic_stop(ad); } } -static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) -{ - if (!RB_EMPTY_NODE(&arq->rb_node)) { - WARN_ON(1); - return; - } - - rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); - RB_CLEAR_NODE(&arq->rb_node); -} - -static struct request * -as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) +static inline void as_del_rq_rb(struct as_data *ad, struct request *rq) { - struct rb_node *n = ad->sort_list[data_dir].rb_node; - struct as_rq *arq; - - while (n) { - arq = rb_entry_arq(n); - - if (sector < arq->rb_key) - n = n->rb_left; - else if (sector > arq->rb_key) - n = n->rb_right; - else - return arq->request; - } - - return NULL; + elv_rb_del(RQ_RB_ROOT(ad, rq), rq); } /* @@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) * as_choose_req selects the preferred one of two requests of the same data_dir * ignoring time - eg. timeouts, which is the job of as_dispatch_request */ -static struct as_rq * -as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) +static struct request * +as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2) { int data_dir; sector_t last, s1, s2, d1, d2; int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ const sector_t maxback = MAXBACK; - if (arq1 == NULL || arq1 == arq2) - return arq2; - if (arq2 == NULL) - return arq1; + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; - data_dir = arq1->is_sync; + data_dir = rq_is_sync(rq1); last = ad->last_sector[data_dir]; - s1 = arq1->request->sector; - s2 = arq2->request->sector; + s1 = rq1->sector; + s2 = rq2->sector; - BUG_ON(data_dir != arq2->is_sync); + BUG_ON(data_dir != rq_is_sync(rq2)); /* * Strict one way elevator _except_ in the case where we allow @@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) /* Found required data */ if (!r1_wrap && r2_wrap) - return arq1; + return rq1; else if (!r2_wrap && r1_wrap) - return arq2; + return rq2; else if (r1_wrap && r2_wrap) { /* both behind the head */ if (s1 <= s2) - return arq1; + return rq1; else - return arq2; + return rq2; } /* Both requests in front of the head */ if (d1 < d2) - return arq1; + return rq1; else if (d2 < d1) - return arq2; + return rq2; else { if (s1 >= s2) - return arq1; + return rq1; else - return arq2; + return rq2; } } /* - * as_find_next_arq finds the next request after @prev in elevator order. + * as_find_next_rq finds the next request after @prev in elevator order. * this with as_choose_req form the basis for how the scheduler chooses * what request to process next. Anticipation works on top of this. */ -static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) +static struct request * +as_find_next_rq(struct as_data *ad, struct request *last) { - const int data_dir = last->is_sync; - struct as_rq *ret; struct rb_node *rbnext = rb_next(&last->rb_node); struct rb_node *rbprev = rb_prev(&last->rb_node); - struct as_rq *arq_next, *arq_prev; + struct request *next = NULL, *prev = NULL; - BUG_ON(!RB_EMPTY_NODE(&last->rb_node)); + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); if (rbprev) - arq_prev = rb_entry_arq(rbprev); - else - arq_prev = NULL; + prev = rb_entry_rq(rbprev); if (rbnext) - arq_next = rb_entry_arq(rbnext); + next = rb_entry_rq(rbnext); else { - arq_next = as_find_first_arq(ad, data_dir); - if (arq_next == last) - arq_next = NULL; - } + const int data_dir = rq_is_sync(last); - ret = as_choose_req(ad, arq_next, arq_prev); + rbnext = rb_first(&ad->sort_list[data_dir]); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } - return ret; + return as_choose_req(ad, next, prev); } /* @@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - int data_dir = arq->is_sync; + int data_dir = rq_is_sync(rq); unsigned long thinktime = 0; sector_t seek_dist; @@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, * previous one issued. */ static int as_close_req(struct as_data *ad, struct as_io_context *aic, - struct as_rq *arq) + struct request *rq) { unsigned long delay; /* milliseconds */ sector_t last = ad->last_sector[ad->batch_data_dir]; - sector_t next = arq->request->sector; + sector_t next = rq->sector; sector_t delta; /* acceptable close offset (in sectors) */ sector_t s; @@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic, * * If this task has queued some other IO, do not enter enticipation. */ -static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) +static int as_can_break_anticipation(struct as_data *ad, struct request *rq) { struct io_context *ioc; struct as_io_context *aic; @@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) ioc = ad->io_context; BUG_ON(!ioc); - if (arq && ioc == arq->io_context) { + if (rq && ioc == RQ_IOC(rq)) { /* request from same process */ return 1; } @@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { + if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) { /* * Found a close request that is not one of ours. * @@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) ad->exit_no_coop = (7*ad->exit_no_coop)/8; } - as_update_iohist(ad, aic, arq->request); + as_update_iohist(ad, aic, rq); return 1; } @@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) } /* - * as_can_anticipate indicates whether we should either run arq + * as_can_anticipate indicates whether we should either run rq * or keep anticipating a better request. */ -static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) +static int as_can_anticipate(struct as_data *ad, struct request *rq) { if (!ad->io_context) /* @@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) */ return 0; - if (as_can_break_anticipation(ad, arq)) + if (as_can_break_anticipation(ad, rq)) /* * This request is a good candidate. Don't keep anticipating, * run it. @@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) } /* - * as_update_arq must be called whenever a request (arq) is added to + * as_update_rq must be called whenever a request (rq) is added to * the sort_list. This function keeps caches up to date, and checks if the * request might be one we are "anticipating" */ -static void as_update_arq(struct as_data *ad, struct as_rq *arq) +static void as_update_rq(struct as_data *ad, struct request *rq) { - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); - /* keep the next_arq cache up to date */ - ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); + /* keep the next_rq cache up to date */ + ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]); /* * have we been anticipating this request? @@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq) */ if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - if (as_can_break_anticipation(ad, arq)) + if (as_can_break_anticipation(ad, rq)) as_antic_stop(ad); } } @@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad) static void as_completed_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); WARN_ON(!list_empty(&rq->queuelist)); - if (arq->state != AS_RQ_REMOVED) { - printk("arq->state %d\n", arq->state); + if (RQ_STATE(rq) != AS_RQ_REMOVED) { + printk("rq->state %d\n", RQ_STATE(rq)); WARN_ON(1); goto out; } @@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq) * actually serviced. This should help devices with big TCQ windows * and writeback caches */ - if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { + if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { update_write_batch(ad); ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; ad->new_batch = 0; } - if (ad->io_context == arq->io_context && ad->io_context) { + if (ad->io_context == RQ_IOC(rq) && ad->io_context) { ad->antic_start = jiffies; ad->ioc_finished = 1; if (ad->antic_status == ANTIC_WAIT_REQ) { @@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq) } } - as_put_io_context(arq); + as_put_io_context(rq); out: - arq->state = AS_RQ_POSTSCHED; + RQ_SET_STATE(rq, AS_RQ_POSTSCHED); } /* @@ -1041,27 +855,27 @@ out: */ static void as_remove_queued_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); struct as_data *ad = q->elevator->elevator_data; + struct io_context *ioc; - WARN_ON(arq->state != AS_RQ_QUEUED); + WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED); - if (arq->io_context && arq->io_context->aic) { - BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); - atomic_dec(&arq->io_context->aic->nr_queued); + ioc = RQ_IOC(rq); + if (ioc && ioc->aic) { + BUG_ON(!atomic_read(&ioc->aic->nr_queued)); + atomic_dec(&ioc->aic->nr_queued); } /* - * Update the "next_arq" cache if we are about to remove its + * Update the "next_rq" cache if we are about to remove its * entry */ - if (ad->next_arq[data_dir] == arq) - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + if (ad->next_rq[data_dir] == rq) + ad->next_rq[data_dir] = as_find_next_rq(ad, rq); - list_del_init(&arq->fifo); - as_del_arq_hash(arq); - as_del_arq_rb(ad, arq); + rq_fifo_clear(rq); + as_del_rq_rb(ad, rq); } /* @@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) */ static int as_fifo_expired(struct as_data *ad, int adir) { - struct as_rq *arq; + struct request *rq; long delta_jif; delta_jif = jiffies - ad->last_check_fifo[adir]; @@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir) if (list_empty(&ad->fifo_list[adir])) return 0; - arq = list_entry_fifo(ad->fifo_list[adir].next); + rq = rq_entry_fifo(ad->fifo_list[adir].next); - return time_after(jiffies, arq->expires); + return time_after(jiffies, rq_fifo_time(rq)); } /* @@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad) /* * move an entry to dispatch queue */ -static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) +static void as_move_to_dispatch(struct as_data *ad, struct request *rq) { - struct request *rq = arq->request; - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); - BUG_ON(!RB_EMPTY_NODE(&arq->rb_node)); + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); as_antic_stop(ad); ad->antic_status = ANTIC_OFF; /* * This has to be set in order to be correctly updated by - * as_find_next_arq + * as_find_next_rq */ ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; if (data_dir == REQ_SYNC) { + struct io_context *ioc = RQ_IOC(rq); /* In case we have to anticipate after this */ - copy_io_context(&ad->io_context, &arq->io_context); + copy_io_context(&ad->io_context, &ioc); } else { if (ad->io_context) { put_io_context(ad->io_context); @@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) } ad->ioc_finished = 0; - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + ad->next_rq[data_dir] = as_find_next_rq(ad, rq); /* * take it off the sort and fifo list, add to dispatch queue */ as_remove_queued_request(ad->q, rq); - WARN_ON(arq->state != AS_RQ_QUEUED); + WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED); elv_dispatch_sort(ad->q, rq); - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); + RQ_SET_STATE(rq, AS_RQ_DISPATCHED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched); ad->nr_dispatched++; } @@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) static int as_dispatch_request(request_queue_t *q, int force) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq; const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); + struct request *rq; if (unlikely(force)) { /* @@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force) ad->changed_batch = 0; ad->new_batch = 0; - while (ad->next_arq[REQ_SYNC]) { - as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); + while (ad->next_rq[REQ_SYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); dispatched++; } ad->last_check_fifo[REQ_SYNC] = jiffies; - while (ad->next_arq[REQ_ASYNC]) { - as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); + while (ad->next_rq[REQ_ASYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); dispatched++; } ad->last_check_fifo[REQ_ASYNC] = jiffies; @@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force) /* * batch is still running or no reads or no writes */ - arq = ad->next_arq[ad->batch_data_dir]; + rq = ad->next_rq[ad->batch_data_dir]; if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { if (as_fifo_expired(ad, REQ_SYNC)) goto fifo_expired; - if (as_can_anticipate(ad, arq)) { + if (as_can_anticipate(ad, rq)) { as_antic_waitreq(ad); return 0; } } - if (arq) { + if (rq) { /* we have a "next request" */ if (reads && !writes) ad->current_batch_expires = @@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force) ad->changed_batch = 1; } ad->batch_data_dir = REQ_SYNC; - arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); ad->last_check_fifo[ad->batch_data_dir] = jiffies; goto dispatch_request; } @@ -1282,7 +1096,7 @@ dispatch_writes: ad->batch_data_dir = REQ_ASYNC; ad->current_write_count = ad->write_batch_count; ad->write_batch_idled = 0; - arq = ad->next_arq[ad->batch_data_dir]; + rq = ad->next_rq[ad->batch_data_dir]; goto dispatch_request; } @@ -1296,8 +1110,7 @@ dispatch_request: if (as_fifo_expired(ad, ad->batch_data_dir)) { fifo_expired: - arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); - BUG_ON(arq == NULL); + rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); } if (ad->changed_batch) { @@ -1316,70 +1129,58 @@ fifo_expired: } /* - * arq is the selected appropriate request. + * rq is the selected appropriate request. */ - as_move_to_dispatch(ad, arq); + as_move_to_dispatch(ad, rq); return 1; } /* - * add arq to rbtree and fifo + * add rq to rbtree and fifo */ static void as_add_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); int data_dir; - arq->state = AS_RQ_NEW; + RQ_SET_STATE(rq, AS_RQ_NEW); - if (rq_data_dir(arq->request) == READ - || (arq->request->flags & REQ_RW_SYNC)) - arq->is_sync = 1; - else - arq->is_sync = 0; - data_dir = arq->is_sync; + data_dir = rq_is_sync(rq); - arq->io_context = as_get_io_context(); + rq->elevator_private = as_get_io_context(q->node); - if (arq->io_context) { - as_update_iohist(ad, arq->io_context->aic, arq->request); - atomic_inc(&arq->io_context->aic->nr_queued); + if (RQ_IOC(rq)) { + as_update_iohist(ad, RQ_IOC(rq)->aic, rq); + atomic_inc(&RQ_IOC(rq)->aic->nr_queued); } - as_add_arq_rb(ad, arq); - if (rq_mergeable(arq->request)) - as_add_arq_hash(ad, arq); + as_add_rq_rb(ad, rq); /* * set expire time (only used for reads) and add to fifo list */ - arq->expires = jiffies + ad->fifo_expire[data_dir]; - list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]); + list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]); - as_update_arq(ad, arq); /* keep state machine up to date */ - arq->state = AS_RQ_QUEUED; + as_update_rq(ad, rq); /* keep state machine up to date */ + RQ_SET_STATE(rq, AS_RQ_QUEUED); } static void as_activate_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - - WARN_ON(arq->state != AS_RQ_DISPATCHED); - arq->state = AS_RQ_REMOVED; - if (arq->io_context && arq->io_context->aic) - atomic_dec(&arq->io_context->aic->nr_dispatched); + WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED); + RQ_SET_STATE(rq, AS_RQ_REMOVED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched); } static void as_deactivate_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - - WARN_ON(arq->state != AS_RQ_REMOVED); - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); + WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED); + RQ_SET_STATE(rq, AS_RQ_DISPATCHED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched); } /* @@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q) && list_empty(&ad->fifo_list[REQ_SYNC]); } -static struct request *as_former_request(request_queue_t *q, - struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&arq->rb_node); - struct request *ret = NULL; - - if (rbprev) - ret = rb_entry_arq(rbprev)->request; - - return ret; -} - -static struct request *as_latter_request(request_queue_t *q, - struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&arq->rb_node); - struct request *ret = NULL; - - if (rbnext) - ret = rb_entry_arq(rbnext)->request; - - return ret; -} - static int as_merge(request_queue_t *q, struct request **req, struct bio *bio) { struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; - int ret; - - /* - * see if the merge hash can satisfy a back merge - */ - __rq = as_find_arq_hash(ad, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } /* * check for front merge */ - __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); - if (__rq) { - BUG_ON(rb_key != rq_rb_key(__rq)); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; - } + __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - if (ret) { - if (rq_mergeable(__rq)) - as_hot_arq_hash(ad, RQ_DATA(__rq)); - } - *req = __rq; - return ret; } -static void as_merged_request(request_queue_t *q, struct request *req) +static void as_merged_request(request_queue_t *q, struct request *req, int type) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(req); - - /* - * hash always needs to be repositioned, key is end sector - */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); /* * if the merge was a front merge, we need to reposition request */ - if (rq_rb_key(req) != arq->rb_key) { - as_del_arq_rb(ad, arq); - as_add_arq_rb(ad, arq); + if (type == ELEVATOR_FRONT_MERGE) { + as_del_rq_rb(ad, req); + as_add_rq_rb(ad, req); /* * Note! At this stage of this and the next function, our next * request may not be optimal - eg the request may have "grown" @@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req) static void as_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(req); - struct as_rq *anext = RQ_DATA(next); - - BUG_ON(!arq); - BUG_ON(!anext); - /* - * reposition arq (this is the merged request) in hash, and in rbtree - * in case of a front merge + * if next expires before rq, assign its expire time to arq + * and move into next position (next will be deleted) in fifo */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); - - if (rq_rb_key(req) != arq->rb_key) { - as_del_arq_rb(ad, arq); - as_add_arq_rb(ad, arq); - } + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(req))) { + struct io_context *rioc = RQ_IOC(req); + struct io_context *nioc = RQ_IOC(next); - /* - * if anext expires before arq, assign its expire time to arq - * and move into anext position (anext will be deleted) in fifo - */ - if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) { - if (time_before(anext->expires, arq->expires)) { - list_move(&arq->fifo, &anext->fifo); - arq->expires = anext->expires; + list_move(&req->queuelist, &next->queuelist); + rq_set_fifo_time(req, rq_fifo_time(next)); /* * Don't copy here but swap, because when anext is * removed below, it must contain the unused context */ - swap_io_context(&arq->io_context, &anext->io_context); + swap_io_context(&rioc, &nioc); } } @@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req, * kill knowledge of next, this one is a goner */ as_remove_queued_request(q, next); - as_put_io_context(anext); + as_put_io_context(next); - anext->state = AS_RQ_MERGED; + RQ_SET_STATE(next, AS_RQ_MERGED); } /* @@ -1553,61 +1280,18 @@ static void as_work_handler(void *data) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (!as_queue_empty(q)) - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } -static void as_put_request(request_queue_t *q, struct request *rq) -{ - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); - - if (!arq) { - WARN_ON(1); - return; - } - - if (unlikely(arq->state != AS_RQ_POSTSCHED && - arq->state != AS_RQ_PRESCHED && - arq->state != AS_RQ_MERGED)) { - printk("arq->state %d\n", arq->state); - WARN_ON(1); - } - - mempool_free(arq, ad->arq_pool); - rq->elevator_private = NULL; -} - -static int as_set_request(request_queue_t *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask) -{ - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); - - if (arq) { - memset(arq, 0, sizeof(*arq)); - RB_CLEAR_NODE(&arq->rb_node); - arq->request = rq; - arq->state = AS_RQ_PRESCHED; - arq->io_context = NULL; - INIT_HLIST_NODE(&arq->hash); - INIT_LIST_HEAD(&arq->fifo); - rq->elevator_private = arq; - return 0; - } - - return 1; -} - -static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int as_may_queue(request_queue_t *q, int rw) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - ioc = as_get_io_context(); + ioc = as_get_io_context(q->node); if (ad->io_context == ioc) ret = ELV_MQUEUE_MUST; put_io_context(ioc); @@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e) BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); - mempool_destroy(ad->arq_pool); put_io_context(ad->io_context); - kfree(ad->hash); kfree(ad); } /* - * initialize elevator private data (as_data), and alloc a arq for - * each request on the free lists + * initialize elevator private data (as_data). */ static void *as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; - int i; - - if (!arq_pool) - return NULL; ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); if (!ad) @@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e) ad->q = q; /* Identify what queue the data belongs to */ - ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!ad->hash) { - kfree(ad); - return NULL; - } - - ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, - mempool_free_slab, arq_pool, q->node); - if (!ad->arq_pool) { - kfree(ad->hash); - kfree(ad); - return NULL; - } - /* anticipatory scheduling helpers */ ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; init_timer(&ad->antic_timer); INIT_WORK(&ad->antic_work, as_work_handler, q); - for (i = 0; i < AS_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&ad->hash[i]); - INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); ad->sort_list[REQ_SYNC] = RB_ROOT; @@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = { .elevator_deactivate_req_fn = as_deactivate_request, .elevator_queue_empty_fn = as_queue_empty, .elevator_completed_req_fn = as_completed_request, - .elevator_former_req_fn = as_former_request, - .elevator_latter_req_fn = as_latter_request, - .elevator_set_req_fn = as_set_request, - .elevator_put_req_fn = as_put_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_may_queue_fn = as_may_queue, .elevator_init_fn = as_init_queue, .elevator_exit_fn = as_exit_queue, @@ -1806,11 +1463,6 @@ static int __init as_init(void) { int ret; - arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), - 0, 0, NULL, NULL); - if (!arq_pool) - return -ENOMEM; - ret = elv_register(&iosched_as); if (!ret) { /* @@ -1822,7 +1474,6 @@ static int __init as_init(void) return 0; } - kmem_cache_destroy(arq_pool); return ret; } @@ -1833,10 +1484,9 @@ static void __exit as_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (atomic_read(&ioc_count)) + if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); synchronize_rcu(); - kmem_cache_destroy(arq_pool); } module_init(as_init); diff --git a/block/blktrace.c b/block/blktrace.c index 8ff33441d8a..135593c8e45 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 Jens Axboe <axboe@suse.de> + * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK /* * Bio action bits of interest */ -static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) }; +static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; /* * More could be added as needed, taking care to increment the decrementer @@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) #define trace_ahead_bit(rw) \ (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) +#define trace_meta_bit(rw) \ + (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= bio_act[trace_barrier_bit(rw)]; what |= bio_act[trace_sync_bit(rw)]; what |= bio_act[trace_ahead_bit(rw)]; + what |= bio_act[trace_meta_bit(rw)]; pid = tsk->pid; if (unlikely(act_log_check(bt, what, sector, pid))) @@ -473,6 +476,9 @@ static void blk_check_time(unsigned long long *t) *t -= (a + b) / 2; } +/* + * calibrate our inter-CPU timings + */ static void blk_trace_check_cpu_time(void *data) { unsigned long long *t; @@ -490,20 +496,6 @@ static void blk_trace_check_cpu_time(void *data) put_cpu(); } -/* - * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU - * timings - */ -static void blk_trace_calibrate_offsets(void) -{ - unsigned long flags; - - smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1); - local_irq_save(flags); - blk_trace_check_cpu_time(NULL); - local_irq_restore(flags); -} - static void blk_trace_set_ht_offsets(void) { #if defined(CONFIG_SCHED_SMT) @@ -532,7 +524,7 @@ static void blk_trace_set_ht_offsets(void) static __init int blk_trace_init(void) { mutex_init(&blk_tree_mutex); - blk_trace_calibrate_offsets(); + on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1); blk_trace_set_ht_offsets(); return 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3a3aee08ec5..99116e2a310 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4,7 +4,7 @@ * Based on ideas from a previously unfinished io * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. * - * Copyright (C) 2003 Jens Axboe <axboe@suse.de> + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> */ #include <linux/module.h> #include <linux/blkdev.h> @@ -17,7 +17,6 @@ * tunables */ static const int cfq_quantum = 4; /* max queue in one round of service */ -static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ @@ -32,8 +31,6 @@ static int cfq_slice_idle = HZ / 125; #define CFQ_KEY_ASYNC (0) -static DEFINE_SPINLOCK(cfq_exit_lock); - /* * for the hash of cfqq inside the cfqd */ @@ -41,37 +38,19 @@ static DEFINE_SPINLOCK(cfq_exit_lock); #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) -/* - * for the hash of crq inside the cfqq - */ -#define CFQ_MHASH_SHIFT 6 -#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) -#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) -#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) - #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) -#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) -#define RQ_DATA(rq) (rq)->elevator_private +#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) +#define RQ_CFQQ(rq) ((rq)->elevator_private2) -/* - * rb-tree defines - */ -#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) -#define rq_rb_key(rq) (rq)->sector - -static kmem_cache_t *crq_pool; static kmem_cache_t *cfq_pool; static kmem_cache_t *cfq_ioc_pool; -static atomic_t ioc_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) -#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) #define ASYNC (0) @@ -103,29 +82,14 @@ struct cfq_data { unsigned int busy_queues; /* - * non-ordered list of empty cfqq's - */ - struct list_head empty_list; - - /* * cfqq lookup hash */ struct hlist_head *cfq_hash; - /* - * global crq hash for all queues - */ - struct hlist_head *crq_hash; - - mempool_t *crq_pool; - int rq_in_driver; int hw_tag; /* - * schedule slice state info - */ - /* * idle window management */ struct timer_list idle_slice_timer; @@ -141,13 +105,10 @@ struct cfq_data { sector_t last_sector; unsigned long last_end_request; - unsigned int rq_starved; - /* * tunables, see top of file */ unsigned int cfq_quantum; - unsigned int cfq_queued; unsigned int cfq_fifo_expire[2]; unsigned int cfq_back_penalty; unsigned int cfq_back_max; @@ -170,23 +131,24 @@ struct cfq_queue { struct hlist_node cfq_hash; /* hash key */ unsigned int key; - /* on either rr or empty list of cfqd */ + /* member of the rr/busy/cur/idle cfqd list */ struct list_head cfq_list; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ - struct cfq_rq *next_crq; + struct request *next_rq; /* requests queued in sort_list */ int queued[2]; /* currently allocated requests */ int allocated[2]; + /* pending metadata requests */ + int meta_pending; /* fifo list of requests in sort_list */ struct list_head fifo; unsigned long slice_start; unsigned long slice_end; unsigned long slice_left; - unsigned long service_last; /* number of requests that are on the dispatch list */ int on_dispatch[2]; @@ -199,18 +161,6 @@ struct cfq_queue { unsigned int flags; }; -struct cfq_rq { - struct rb_node rb_node; - sector_t rb_key; - struct request *request; - struct hlist_node hash; - - struct cfq_queue *cfq_queue; - struct cfq_io_context *io_context; - - unsigned int crq_flags; -}; - enum cfqq_state_flags { CFQ_CFQQ_FLAG_on_rr = 0, CFQ_CFQQ_FLAG_wait_request, @@ -220,6 +170,7 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_fifo_expire, CFQ_CFQQ_FLAG_idle_window, CFQ_CFQQ_FLAG_prio_changed, + CFQ_CFQQ_FLAG_queue_new, }; #define CFQ_CFQQ_FNS(name) \ @@ -244,70 +195,14 @@ CFQ_CFQQ_FNS(must_dispatch); CFQ_CFQQ_FNS(fifo_expire); CFQ_CFQQ_FNS(idle_window); CFQ_CFQQ_FNS(prio_changed); +CFQ_CFQQ_FNS(queue_new); #undef CFQ_CFQQ_FNS -enum cfq_rq_state_flags { - CFQ_CRQ_FLAG_is_sync = 0, -}; - -#define CFQ_CRQ_FNS(name) \ -static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ -{ \ - crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ -} \ -static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ -{ \ - crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ -} \ -static inline int cfq_crq_##name(const struct cfq_rq *crq) \ -{ \ - return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ -} - -CFQ_CRQ_FNS(is_sync); -#undef CFQ_CRQ_FNS - static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); -static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); +static void cfq_dispatch_insert(request_queue_t *, struct request *); static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); /* - * lots of deadline iosched dupes, can be abstracted later... - */ -static inline void cfq_del_crq_hash(struct cfq_rq *crq) -{ - hlist_del_init(&crq->hash); -} - -static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) -{ - const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); - - hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); -} - -static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) -{ - struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; - struct hlist_node *entry, *next; - - hlist_for_each_safe(entry, next, hash_list) { - struct cfq_rq *crq = list_entry_hash(entry); - struct request *__rq = crq->request; - - if (!rq_mergeable(__rq)) { - cfq_del_crq_hash(crq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - -/* * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */ @@ -333,12 +228,12 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) } /* - * Lifted from AS - choose which of crq1 and crq2 that is best served now. + * Lifted from AS - choose which of rq1 and rq2 that is best served now. * We choose the request that is closest to the head right now. Distance * behind the head is penalized and only allowed to a certain extent. */ -static struct cfq_rq * -cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) +static struct request * +cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) { sector_t last, s1, s2, d1 = 0, d2 = 0; unsigned long back_max; @@ -346,18 +241,22 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ - if (crq1 == NULL || crq1 == crq2) - return crq2; - if (crq2 == NULL) - return crq1; + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; - if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) - return crq1; - else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) - return crq2; + if (rq_is_sync(rq1) && !rq_is_sync(rq2)) + return rq1; + else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) + return rq2; + if (rq_is_meta(rq1) && !rq_is_meta(rq2)) + return rq1; + else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) + return rq2; - s1 = crq1->request->sector; - s2 = crq2->request->sector; + s1 = rq1->sector; + s2 = rq2->sector; last = cfqd->last_sector; @@ -392,23 +291,23 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) * check two variables for all permutations: --> faster! */ switch (wrap) { - case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ + case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ if (d1 < d2) - return crq1; + return rq1; else if (d2 < d1) - return crq2; + return rq2; else { if (s1 >= s2) - return crq1; + return rq1; else - return crq2; + return rq2; } case CFQ_RQ2_WRAP: - return crq1; + return rq1; case CFQ_RQ1_WRAP: - return crq2; - case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ + return rq2; + case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ default: /* * Since both rqs are wrapped, @@ -417,50 +316,43 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) * since back seek takes more time than forward. */ if (s1 <= s2) - return crq1; + return rq1; else - return crq2; + return rq2; } } /* * would be nice to take fifo expire time into account as well */ -static struct cfq_rq * -cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct cfq_rq *last) +static struct request * +cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct request *last) { - struct cfq_rq *crq_next = NULL, *crq_prev = NULL; - struct rb_node *rbnext, *rbprev; - - if (!(rbnext = rb_next(&last->rb_node))) { - rbnext = rb_first(&cfqq->sort_list); - if (rbnext == &last->rb_node) - rbnext = NULL; - } + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct request *next = NULL, *prev = NULL; - rbprev = rb_prev(&last->rb_node); + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); if (rbprev) - crq_prev = rb_entry_crq(rbprev); - if (rbnext) - crq_next = rb_entry_crq(rbnext); - - return cfq_choose_req(cfqd, crq_next, crq_prev); -} + prev = rb_entry_rq(rbprev); -static void cfq_update_next_crq(struct cfq_rq *crq) -{ - struct cfq_queue *cfqq = crq->cfq_queue; + if (rbnext) + next = rb_entry_rq(rbnext); + else { + rbnext = rb_first(&cfqq->sort_list); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } - if (cfqq->next_crq == crq) - cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); + return cfq_choose_req(cfqd, next, prev); } static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) { struct cfq_data *cfqd = cfqq->cfqd; - struct list_head *list, *entry; + struct list_head *list; BUG_ON(!cfq_cfqq_on_rr(cfqq)); @@ -485,31 +377,26 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) } /* - * if queue was preempted, just add to front to be fair. busy_rr - * isn't sorted, but insert at the back for fairness. + * If this queue was preempted or is new (never been serviced), let + * it be added first for fairness but beind other new queues. + * Otherwise, just add to the back of the list. */ - if (preempted || list == &cfqd->busy_rr) { - if (preempted) - list = list->prev; + if (preempted || cfq_cfqq_queue_new(cfqq)) { + struct list_head *n = list; + struct cfq_queue *__cfqq; - list_add_tail(&cfqq->cfq_list, list); - return; - } + while (n->next != list) { + __cfqq = list_entry_cfqq(n->next); + if (!cfq_cfqq_queue_new(__cfqq)) + break; - /* - * sort by when queue was last serviced - */ - entry = list; - while ((entry = entry->prev) != list) { - struct cfq_queue *__cfqq = list_entry_cfqq(entry); + n = n->next; + } - if (!__cfqq->service_last) - break; - if (time_before(__cfqq->service_last, cfqq->service_last)) - break; + list = n; } - list_add(&cfqq->cfq_list, entry); + list_add_tail(&cfqq->cfq_list, list); } /* @@ -531,7 +418,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { BUG_ON(!cfq_cfqq_on_rr(cfqq)); cfq_clear_cfqq_on_rr(cfqq); - list_move(&cfqq->cfq_list, &cfqd->empty_list); + list_del_init(&cfqq->cfq_list); BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -540,81 +427,43 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * rb tree support functions */ -static inline void cfq_del_crq_rb(struct cfq_rq *crq) +static inline void cfq_del_rq_rb(struct request *rq) { - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - const int sync = cfq_crq_is_sync(crq); + const int sync = rq_is_sync(rq); BUG_ON(!cfqq->queued[sync]); cfqq->queued[sync]--; - cfq_update_next_crq(crq); - - rb_erase(&crq->rb_node, &cfqq->sort_list); + elv_rb_del(&cfqq->sort_list, rq); if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); } -static struct cfq_rq * -__cfq_add_crq_rb(struct cfq_rq *crq) +static void cfq_add_rq_rb(struct request *rq) { - struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; - struct rb_node *parent = NULL; - struct cfq_rq *__crq; - - while (*p) { - parent = *p; - __crq = rb_entry_crq(parent); - - if (crq->rb_key < __crq->rb_key) - p = &(*p)->rb_left; - else if (crq->rb_key > __crq->rb_key) - p = &(*p)->rb_right; - else - return __crq; - } - - rb_link_node(&crq->rb_node, parent, p); - return NULL; -} - -static void cfq_add_crq_rb(struct cfq_rq *crq) -{ - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - struct request *rq = crq->request; - struct cfq_rq *__alias; + struct request *__alias; - crq->rb_key = rq_rb_key(rq); - cfqq->queued[cfq_crq_is_sync(crq)]++; + cfqq->queued[rq_is_sync(rq)]++; /* * looks a little odd, but the first insert might return an alias. * if that happens, put the alias on the dispatch list */ - while ((__alias = __cfq_add_crq_rb(crq)) != NULL) + while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) cfq_dispatch_insert(cfqd->queue, __alias); - - rb_insert_color(&crq->rb_node, &cfqq->sort_list); - - if (!cfq_cfqq_on_rr(cfqq)) - cfq_add_cfqq_rr(cfqd, cfqq); - - /* - * check if this request is a better next-serve candidate - */ - cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); } static inline void -cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) +cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { - rb_erase(&crq->rb_node, &cfqq->sort_list); - cfqq->queued[cfq_crq_is_sync(crq)]--; - - cfq_add_crq_rb(crq); + elv_rb_del(&cfqq->sort_list, rq); + cfqq->queued[rq_is_sync(rq)]--; + cfq_add_rq_rb(rq); } static struct request * @@ -623,27 +472,14 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) struct task_struct *tsk = current; pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); struct cfq_queue *cfqq; - struct rb_node *n; - sector_t sector; cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); - if (!cfqq) - goto out; - - sector = bio->bi_sector + bio_sectors(bio); - n = cfqq->sort_list.rb_node; - while (n) { - struct cfq_rq *crq = rb_entry_crq(n); + if (cfqq) { + sector_t sector = bio->bi_sector + bio_sectors(bio); - if (sector < crq->rb_key) - n = n->rb_left; - else if (sector > crq->rb_key) - n = n->rb_right; - else - return crq->request; + return elv_rb_find(&cfqq->sort_list, sector); } -out: return NULL; } @@ -673,11 +509,18 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq) static void cfq_remove_request(struct request *rq) { - struct cfq_rq *crq = RQ_DATA(rq); + struct cfq_queue *cfqq = RQ_CFQQ(rq); + + if (cfqq->next_rq == rq) + cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); list_del_init(&rq->queuelist); - cfq_del_crq_rb(crq); - cfq_del_crq_hash(crq); + cfq_del_rq_rb(rq); + + if (rq_is_meta(rq)) { + WARN_ON(!cfqq->meta_pending); + cfqq->meta_pending--; + } } static int @@ -685,39 +528,23 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; - int ret; - - __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); - if (__rq && elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } __rq = cfq_find_rq_fmerge(cfqd, bio); if (__rq && elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - *req = __rq; - return ret; } -static void cfq_merged_request(request_queue_t *q, struct request *req) +static void cfq_merged_request(request_queue_t *q, struct request *req, + int type) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(req); - - cfq_del_crq_hash(crq); - cfq_add_crq_hash(cfqd, crq); - - if (rq_rb_key(req) != crq->rb_key) { - struct cfq_queue *cfqq = crq->cfq_queue; + if (type == ELEVATOR_FRONT_MERGE) { + struct cfq_queue *cfqq = RQ_CFQQ(req); - cfq_update_next_crq(crq); - cfq_reposition_crq_rb(cfqq, crq); + cfq_reposition_rq_rb(cfqq, req); } } @@ -725,8 +552,6 @@ static void cfq_merged_requests(request_queue_t *q, struct request *rq, struct request *next) { - cfq_merged_request(q, rq); - /* * reposition in fifo if next is older than rq */ @@ -768,13 +593,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) del_timer(&cfqd->idle_slice_timer); - if (!preempted && !cfq_cfqq_dispatched(cfqq)) { - cfqq->service_last = now; + if (!preempted && !cfq_cfqq_dispatched(cfqq)) cfq_schedule_dispatch(cfqd); - } cfq_clear_cfqq_must_dispatch(cfqq); cfq_clear_cfqq_wait_request(cfqq); + cfq_clear_cfqq_queue_new(cfqq); /* * store what was left of this slice, if the queue idled out @@ -868,26 +692,25 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) { struct cfq_queue *cfqq = NULL; - /* - * if current list is non-empty, grab first entry. if it is empty, - * get next prio level and grab first entry then if any are spliced - */ - if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) + if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) { + /* + * if current list is non-empty, grab first entry. if it is + * empty, get next prio level and grab first entry then if any + * are spliced + */ cfqq = list_entry_cfqq(cfqd->cur_rr.next); - - /* - * If no new queues are available, check if the busy list has some - * before falling back to idle io. - */ - if (!cfqq && !list_empty(&cfqd->busy_rr)) + } else if (!list_empty(&cfqd->busy_rr)) { + /* + * If no new queues are available, check if the busy list has + * some before falling back to idle io. + */ cfqq = list_entry_cfqq(cfqd->busy_rr.next); - - /* - * if we have idle queues and no rt or be queues had pending - * requests, either allow immediate service if the grace period - * has passed or arm the idle grace timer - */ - if (!cfqq && !list_empty(&cfqd->idle_rr)) { + } else if (!list_empty(&cfqd->idle_rr)) { + /* + * if we have idle queues and no rt or be queues had pending + * requests, either allow immediate service if the grace period + * has passed or arm the idle grace timer + */ unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; if (time_after_eq(jiffies, end)) @@ -942,16 +765,14 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) return 1; } -static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) +static void cfq_dispatch_insert(request_queue_t *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_queue *cfqq = crq->cfq_queue; - struct request *rq; + struct cfq_queue *cfqq = RQ_CFQQ(rq); - cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); - cfq_remove_request(crq->request); - cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; - elv_dispatch_sort(q, crq->request); + cfq_remove_request(rq); + cfqq->on_dispatch[rq_is_sync(rq)]++; + elv_dispatch_sort(q, rq); rq = list_entry(q->queue_head.prev, struct request, queuelist); cfqd->last_sector = rq->sector + rq->nr_sectors; @@ -960,24 +781,23 @@ static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) /* * return expired entry, or NULL to just start from scratch in rbtree */ -static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) +static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) { struct cfq_data *cfqd = cfqq->cfqd; struct request *rq; - struct cfq_rq *crq; + int fifo; if (cfq_cfqq_fifo_expire(cfqq)) return NULL; + if (list_empty(&cfqq->fifo)) + return NULL; - if (!list_empty(&cfqq->fifo)) { - int fifo = cfq_cfqq_class_sync(cfqq); + fifo = cfq_cfqq_class_sync(cfqq); + rq = rq_entry_fifo(cfqq->fifo.next); - crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); - rq = crq->request; - if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { - cfq_mark_cfqq_fifo_expire(cfqq); - return crq; - } + if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { + cfq_mark_cfqq_fifo_expire(cfqq); + return rq; } return NULL; @@ -1063,25 +883,25 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); do { - struct cfq_rq *crq; + struct request *rq; /* * follow expired path, else get first next available */ - if ((crq = cfq_check_fifo(cfqq)) == NULL) - crq = cfqq->next_crq; + if ((rq = cfq_check_fifo(cfqq)) == NULL) + rq = cfqq->next_rq; /* * finally, insert request into driver dispatch list */ - cfq_dispatch_insert(cfqd->queue, crq); + cfq_dispatch_insert(cfqd->queue, rq); cfqd->dispatch_slice++; dispatched++; if (!cfqd->active_cic) { - atomic_inc(&crq->io_context->ioc->refcount); - cfqd->active_cic = crq->io_context; + atomic_inc(&RQ_CIC(rq)->ioc->refcount); + cfqd->active_cic = RQ_CIC(rq); } if (RB_EMPTY_ROOT(&cfqq->sort_list)) @@ -1112,13 +932,12 @@ static int cfq_forced_dispatch_cfqqs(struct list_head *list) { struct cfq_queue *cfqq, *next; - struct cfq_rq *crq; int dispatched; dispatched = 0; list_for_each_entry_safe(cfqq, next, list, cfq_list) { - while ((crq = cfqq->next_crq)) { - cfq_dispatch_insert(cfqq->cfqd->queue, crq); + while (cfqq->next_rq) { + cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); dispatched++; } BUG_ON(!list_empty(&cfqq->fifo)); @@ -1194,8 +1013,8 @@ cfq_dispatch_requests(request_queue_t *q, int force) } /* - * task holds one reference to the queue, dropped when task exits. each crq - * in-flight on this queue also holds a reference, dropped when crq is freed. + * task holds one reference to the queue, dropped when task exits. each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. * * queue lock must be held here. */ @@ -1223,7 +1042,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) kmem_cache_free(cfq_pool, cfqq); } -static inline struct cfq_queue * +static struct cfq_queue * __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, const int hashval) { @@ -1260,62 +1079,63 @@ static void cfq_free_io_context(struct io_context *ioc) freed++; } - if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) + elv_ioc_count_mod(ioc_count, -freed); + + if (ioc_gone && !elv_ioc_count_read(ioc_count)) complete(ioc_gone); } -static void cfq_trim(struct io_context *ioc) +static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - ioc->set_ioprio = NULL; - cfq_free_io_context(ioc); + if (unlikely(cfqq == cfqd->active_queue)) + __cfq_slice_expired(cfqd, cfqq, 0); + + cfq_put_queue(cfqq); } -/* - * Called with interrupts disabled - */ -static void cfq_exit_single_io_context(struct cfq_io_context *cic) +static void __cfq_exit_single_io_context(struct cfq_data *cfqd, + struct cfq_io_context *cic) { - struct cfq_data *cfqd = cic->key; - request_queue_t *q; - - if (!cfqd) - return; - - q = cfqd->queue; - - WARN_ON(!irqs_disabled()); - - spin_lock(q->queue_lock); + list_del_init(&cic->queue_list); + smp_wmb(); + cic->key = NULL; if (cic->cfqq[ASYNC]) { - if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) - __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0); - cfq_put_queue(cic->cfqq[ASYNC]); + cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); cic->cfqq[ASYNC] = NULL; } if (cic->cfqq[SYNC]) { - if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) - __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0); - cfq_put_queue(cic->cfqq[SYNC]); + cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); cic->cfqq[SYNC] = NULL; } +} - cic->key = NULL; - list_del_init(&cic->queue_list); - spin_unlock(q->queue_lock); + +/* + * Called with interrupts disabled + */ +static void cfq_exit_single_io_context(struct cfq_io_context *cic) +{ + struct cfq_data *cfqd = cic->key; + + if (cfqd) { + request_queue_t *q = cfqd->queue; + + spin_lock_irq(q->queue_lock); + __cfq_exit_single_io_context(cfqd, cic); + spin_unlock_irq(q->queue_lock); + } } static void cfq_exit_io_context(struct io_context *ioc) { struct cfq_io_context *__cic; - unsigned long flags; struct rb_node *n; /* * put the reference this task is holding to the various queues */ - spin_lock_irqsave(&cfq_exit_lock, flags); n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1324,22 +1144,21 @@ static void cfq_exit_io_context(struct io_context *ioc) cfq_exit_single_io_context(__cic); n = rb_next(n); } - - spin_unlock_irqrestore(&cfq_exit_lock, flags); } static struct cfq_io_context * cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { - struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); + struct cfq_io_context *cic; + cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node); if (cic) { memset(cic, 0, sizeof(*cic)); cic->last_end_request = jiffies; INIT_LIST_HEAD(&cic->queue_list); cic->dtor = cfq_free_io_context; cic->exit = cfq_exit_io_context; - atomic_inc(&ioc_count); + elv_ioc_count_inc(ioc_count); } return cic; @@ -1420,15 +1239,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic) spin_unlock(cfqd->queue->queue_lock); } -/* - * callback from sys_ioprio_set, irqs are disabled - */ -static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) +static void cfq_ioc_set_ioprio(struct io_context *ioc) { struct cfq_io_context *cic; struct rb_node *n; - spin_lock(&cfq_exit_lock); + ioc->ioprio_changed = 0; n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1437,10 +1253,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) changed_ioprio(cic); n = rb_next(n); } - - spin_unlock(&cfq_exit_lock); - - return 0; } static struct cfq_queue * @@ -1460,12 +1272,18 @@ retry: cfqq = new_cfqq; new_cfqq = NULL; } else if (gfp_mask & __GFP_WAIT) { + /* + * Inform the allocator of the fact that we will + * just repeat this allocation if it fails, to allow + * the allocator to do whatever it needs to attempt to + * free memory. + */ spin_unlock_irq(cfqd->queue->queue_lock); - new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node); spin_lock_irq(cfqd->queue->queue_lock); goto retry; } else { - cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node); if (!cfqq) goto out; } @@ -1480,13 +1298,13 @@ retry: hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); atomic_set(&cfqq->ref, 0); cfqq->cfqd = cfqd; - cfqq->service_last = 0; /* * set ->slice_left to allow preemption for a new process */ cfqq->slice_left = 2 * cfqd->cfq_slice_idle; cfq_mark_cfqq_idle_window(cfqq); cfq_mark_cfqq_prio_changed(cfqq); + cfq_mark_cfqq_queue_new(cfqq); cfq_init_prio_data(cfqq); } @@ -1502,12 +1320,10 @@ out: static void cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) { - spin_lock(&cfq_exit_lock); + WARN_ON(!list_empty(&cic->queue_list)); rb_erase(&cic->rb_node, &ioc->cic_root); - list_del_init(&cic->queue_list); - spin_unlock(&cfq_exit_lock); kmem_cache_free(cfq_ioc_pool, cic); - atomic_dec(&ioc_count); + elv_ioc_count_dec(ioc_count); } static struct cfq_io_context * @@ -1551,7 +1367,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, cic->ioc = ioc; cic->key = cfqd; - ioc->set_ioprio = cfq_ioc_set_ioprio; restart: parent = NULL; p = &ioc->cic_root.rb_node; @@ -1573,11 +1388,12 @@ restart: BUG(); } - spin_lock(&cfq_exit_lock); rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); + + spin_lock_irq(cfqd->queue->queue_lock); list_add(&cic->queue_list, &cfqd->cic_list); - spin_unlock(&cfq_exit_lock); + spin_unlock_irq(cfqd->queue->queue_lock); } /* @@ -1593,7 +1409,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) might_sleep_if(gfp_mask & __GFP_WAIT); - ioc = get_io_context(gfp_mask); + ioc = get_io_context(gfp_mask, cfqd->queue->node); if (!ioc) return NULL; @@ -1607,6 +1423,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) cfq_cic_link(cfqd, ioc, cic); out: + smp_read_barrier_depends(); + if (unlikely(ioc->ioprio_changed)) + cfq_ioc_set_ioprio(ioc); + return cic; err: put_io_context(ioc); @@ -1640,15 +1460,15 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) static void cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, - struct cfq_rq *crq) + struct request *rq) { sector_t sdist; u64 total; - if (cic->last_request_pos < crq->request->sector) - sdist = crq->request->sector - cic->last_request_pos; + if (cic->last_request_pos < rq->sector) + sdist = rq->sector - cic->last_request_pos; else - sdist = cic->last_request_pos - crq->request->sector; + sdist = cic->last_request_pos - rq->sector; /* * Don't allow the seek distance to get too large from the @@ -1699,7 +1519,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, */ static int cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, - struct cfq_rq *crq) + struct request *rq) { struct cfq_queue *cfqq = cfqd->active_queue; @@ -1718,7 +1538,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, */ if (new_cfqq->slice_left < cfqd->cfq_slice_idle) return 0; - if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) + /* + * if the new request is sync, but the currently running queue is + * not, let the sync request have priority. + */ + if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) + return 1; + /* + * So both queues are sync. Let the new request get disk time if + * it's a metadata request and the current queue is doing regular IO. + */ + if (rq_is_meta(rq) && !cfqq->meta_pending) return 1; return 0; @@ -1730,47 +1560,45 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, */ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct cfq_queue *__cfqq, *next; - - list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) - cfq_resort_rr_list(__cfqq, 1); + cfq_slice_expired(cfqd, 1); if (!cfqq->slice_left) cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; - cfqq->slice_end = cfqq->slice_left + jiffies; - cfq_slice_expired(cfqd, 1); - __cfq_set_active_queue(cfqd, cfqq); -} - -/* - * should really be a ll_rw_blk.c helper - */ -static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) -{ - request_queue_t *q = cfqd->queue; + /* + * Put the new queue at the front of the of the current list, + * so we know that it will be selected next. + */ + BUG_ON(!cfq_cfqq_on_rr(cfqq)); + list_move(&cfqq->cfq_list, &cfqd->cur_rr); - if (!blk_queue_plugged(q)) - q->request_fn(q); - else - __generic_unplug_device(q); + cfqq->slice_end = cfqq->slice_left + jiffies; } /* - * Called when a new fs request (crq) is added (to cfqq). Check if there's + * Called when a new fs request (rq) is added (to cfqq). Check if there's * something we should do about it */ static void -cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct cfq_rq *crq) +cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct request *rq) { - struct cfq_io_context *cic = crq->io_context; + struct cfq_io_context *cic = RQ_CIC(rq); + + if (rq_is_meta(rq)) + cfqq->meta_pending++; + + /* + * check if this request is a better next-serve candidate)) { + */ + cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); + BUG_ON(!cfqq->next_rq); /* * we never wait for an async request and we don't allow preemption * of an async request. so just return early */ - if (!cfq_crq_is_sync(crq)) { + if (!rq_is_sync(rq)) { /* * sync process issued an async request, if it's waiting * then expire it and kick rq handling. @@ -1778,17 +1606,17 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cic == cfqd->active_cic && del_timer(&cfqd->idle_slice_timer)) { cfq_slice_expired(cfqd, 0); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } return; } cfq_update_io_thinktime(cfqd, cic); - cfq_update_io_seektime(cfqd, cic, crq); + cfq_update_io_seektime(cfqd, cic, rq); cfq_update_idle_window(cfqd, cfqq, cic); cic->last_queue = jiffies; - cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; + cic->last_request_pos = rq->sector + rq->nr_sectors; if (cfqq == cfqd->active_queue) { /* @@ -1799,9 +1627,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) { cfq_mark_cfqq_must_dispatch(cfqq); del_timer(&cfqd->idle_slice_timer); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } - } else if (cfq_should_preempt(cfqd, cfqq, crq)) { + } else if (cfq_should_preempt(cfqd, cfqq, rq)) { /* * not the active queue - expire current slice if it is * idle and has expired it's mean thinktime or this new queue @@ -1809,34 +1637,32 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, */ cfq_preempt_queue(cfqd, cfqq); cfq_mark_cfqq_must_dispatch(cfqq); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } } static void cfq_insert_request(request_queue_t *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(rq); - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); cfq_init_prio_data(cfqq); - cfq_add_crq_rb(crq); + cfq_add_rq_rb(rq); - list_add_tail(&rq->queuelist, &cfqq->fifo); + if (!cfq_cfqq_on_rr(cfqq)) + cfq_add_cfqq_rr(cfqd, cfqq); - if (rq_mergeable(rq)) - cfq_add_crq_hash(cfqd, crq); + list_add_tail(&rq->queuelist, &cfqq->fifo); - cfq_crq_enqueued(cfqd, cfqq, crq); + cfq_rq_enqueued(cfqd, cfqq, rq); } static void cfq_completed_request(request_queue_t *q, struct request *rq) { - struct cfq_rq *crq = RQ_DATA(rq); - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - const int sync = cfq_crq_is_sync(crq); + const int sync = rq_is_sync(rq); unsigned long now; now = jiffies; @@ -1849,15 +1675,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) if (!cfq_class_idle(cfqq)) cfqd->last_end_request = now; - if (!cfq_cfqq_dispatched(cfqq)) { - if (cfq_cfqq_on_rr(cfqq)) { - cfqq->service_last = now; - cfq_resort_rr_list(cfqq, 0); - } - } + if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq)) + cfq_resort_rr_list(cfqq, 0); if (sync) - crq->io_context->last_end_request = now; + RQ_CIC(rq)->last_end_request = now; /* * If this is the active queue, check if it needs to be expired, @@ -1873,30 +1695,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) } } -static struct request * -cfq_former_request(request_queue_t *q, struct request *rq) -{ - struct cfq_rq *crq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&crq->rb_node); - - if (rbprev) - return rb_entry_crq(rbprev)->request; - - return NULL; -} - -static struct request * -cfq_latter_request(request_queue_t *q, struct request *rq) -{ - struct cfq_rq *crq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&crq->rb_node); - - if (rbnext) - return rb_entry_crq(rbnext)->request; - - return NULL; -} - /* * we temporarily boost lower priority queues if they are holding fs exclusive * resources. they are boosted to normal prio (CLASS_BE/4) @@ -1933,9 +1731,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) cfq_resort_rr_list(cfqq, 0); } -static inline int -__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct task_struct *task, int rw) +static inline int __cfq_may_queue(struct cfq_queue *cfqq) { if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && !cfq_cfqq_must_alloc_slice(cfqq)) { @@ -1946,7 +1742,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, return ELV_MQUEUE_MAY; } -static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int cfq_may_queue(request_queue_t *q, int rw) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -1963,48 +1759,30 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) cfq_init_prio_data(cfqq); cfq_prio_boost(cfqq); - return __cfq_may_queue(cfqd, cfqq, tsk, rw); + return __cfq_may_queue(cfqq); } return ELV_MQUEUE_MAY; } -static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) -{ - struct cfq_data *cfqd = q->elevator->elevator_data; - - if (unlikely(cfqd->rq_starved)) { - struct request_list *rl = &q->rq; - - smp_mb(); - if (waitqueue_active(&rl->wait[READ])) - wake_up(&rl->wait[READ]); - if (waitqueue_active(&rl->wait[WRITE])) - wake_up(&rl->wait[WRITE]); - } -} - /* * queue lock held here */ static void cfq_put_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(rq); + struct cfq_queue *cfqq = RQ_CFQQ(rq); - if (crq) { - struct cfq_queue *cfqq = crq->cfq_queue; + if (cfqq) { const int rw = rq_data_dir(rq); BUG_ON(!cfqq->allocated[rw]); cfqq->allocated[rw]--; - put_io_context(crq->io_context->ioc); + put_io_context(RQ_CIC(rq)->ioc); - mempool_free(crq, cfqd->crq_pool); rq->elevator_private = NULL; + rq->elevator_private2 = NULL; - cfq_check_waiters(q, cfqq); cfq_put_queue(cfqq); } } @@ -2013,8 +1791,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq) * Allocate cfq data structures associated with this request. */ static int -cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) +cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -2022,7 +1799,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, const int rw = rq_data_dir(rq); pid_t key = cfq_queue_pid(tsk, rw); struct cfq_queue *cfqq; - struct cfq_rq *crq; unsigned long flags; int is_sync = key != CFQ_KEY_ASYNC; @@ -2046,42 +1822,18 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, cfqq->allocated[rw]++; cfq_clear_cfqq_must_alloc(cfqq); - cfqd->rq_starved = 0; atomic_inc(&cfqq->ref); - spin_unlock_irqrestore(q->queue_lock, flags); - crq = mempool_alloc(cfqd->crq_pool, gfp_mask); - if (crq) { - RB_CLEAR_NODE(&crq->rb_node); - crq->rb_key = 0; - crq->request = rq; - INIT_HLIST_NODE(&crq->hash); - crq->cfq_queue = cfqq; - crq->io_context = cic; - - if (is_sync) - cfq_mark_crq_is_sync(crq); - else - cfq_clear_crq_is_sync(crq); + spin_unlock_irqrestore(q->queue_lock, flags); - rq->elevator_private = crq; - return 0; - } + rq->elevator_private = cic; + rq->elevator_private2 = cfqq; + return 0; - spin_lock_irqsave(q->queue_lock, flags); - cfqq->allocated[rw]--; - if (!(cfqq->allocated[0] + cfqq->allocated[1])) - cfq_mark_cfqq_must_alloc(cfqq); - cfq_put_queue(cfqq); queue_fail: if (cic) put_io_context(cic->ioc); - /* - * mark us rq allocation starved. we need to kickstart the process - * ourselves if there are no pending requests that can do it for us. - * that would be an extremely rare OOM situation - */ - cfqd->rq_starved = 1; + cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); return 1; @@ -2090,27 +1842,10 @@ queue_fail: static void cfq_kick_queue(void *data) { request_queue_t *q = data; - struct cfq_data *cfqd = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - - if (cfqd->rq_starved) { - struct request_list *rl = &q->rq; - - /* - * we aren't guaranteed to get a request after this, but we - * have to be opportunistic - */ - smp_mb(); - if (waitqueue_active(&rl->wait[READ])) - wake_up(&rl->wait[READ]); - if (waitqueue_active(&rl->wait[WRITE])) - wake_up(&rl->wait[WRITE]); - } - - blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -2193,7 +1928,6 @@ static void cfq_exit_queue(elevator_t *e) cfq_shutdown_timer_wq(cfqd); - spin_lock(&cfq_exit_lock); spin_lock_irq(q->queue_lock); if (cfqd->active_queue) @@ -2203,25 +1937,14 @@ static void cfq_exit_queue(elevator_t *e) struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, struct cfq_io_context, queue_list); - if (cic->cfqq[ASYNC]) { - cfq_put_queue(cic->cfqq[ASYNC]); - cic->cfqq[ASYNC] = NULL; - } - if (cic->cfqq[SYNC]) { - cfq_put_queue(cic->cfqq[SYNC]); - cic->cfqq[SYNC] = NULL; - } - cic->key = NULL; - list_del_init(&cic->queue_list); + + __cfq_exit_single_io_context(cfqd, cic); } spin_unlock_irq(q->queue_lock); - spin_unlock(&cfq_exit_lock); cfq_shutdown_timer_wq(cfqd); - mempool_destroy(cfqd->crq_pool); - kfree(cfqd->crq_hash); kfree(cfqd->cfq_hash); kfree(cfqd); } @@ -2231,7 +1954,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) struct cfq_data *cfqd; int i; - cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); + cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node); if (!cfqd) return NULL; @@ -2243,23 +1966,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_LIST_HEAD(&cfqd->busy_rr); INIT_LIST_HEAD(&cfqd->cur_rr); INIT_LIST_HEAD(&cfqd->idle_rr); - INIT_LIST_HEAD(&cfqd->empty_list); INIT_LIST_HEAD(&cfqd->cic_list); - cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); - if (!cfqd->crq_hash) - goto out_crqhash; - - cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); + cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node); if (!cfqd->cfq_hash) - goto out_cfqhash; - - cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool); - if (!cfqd->crq_pool) - goto out_crqpool; + goto out_free; - for (i = 0; i < CFQ_MHASH_ENTRIES; i++) - INIT_HLIST_HEAD(&cfqd->crq_hash[i]); for (i = 0; i < CFQ_QHASH_ENTRIES; i++) INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); @@ -2275,7 +1987,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); - cfqd->cfq_queued = cfq_queued; cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; @@ -2287,19 +1998,13 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) cfqd->cfq_slice_idle = cfq_slice_idle; return cfqd; -out_crqpool: - kfree(cfqd->cfq_hash); -out_cfqhash: - kfree(cfqd->crq_hash); -out_crqhash: +out_free: kfree(cfqd); return NULL; } static void cfq_slab_kill(void) { - if (crq_pool) - kmem_cache_destroy(crq_pool); if (cfq_pool) kmem_cache_destroy(cfq_pool); if (cfq_ioc_pool) @@ -2308,11 +2013,6 @@ static void cfq_slab_kill(void) static int __init cfq_slab_setup(void) { - crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, - NULL, NULL); - if (!crq_pool) - goto fail; - cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, NULL, NULL); if (!cfq_pool) @@ -2358,7 +2058,6 @@ static ssize_t __FUNC(elevator_t *e, char *page) \ return cfq_var_show(__data, (page)); \ } SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); -SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); @@ -2386,7 +2085,6 @@ static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ return ret; \ } STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); -STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); @@ -2402,7 +2100,6 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(quantum), - CFQ_ATTR(queued), CFQ_ATTR(fifo_expire_sync), CFQ_ATTR(fifo_expire_async), CFQ_ATTR(back_seek_max), @@ -2425,14 +2122,14 @@ static struct elevator_type iosched_cfq = { .elevator_deactivate_req_fn = cfq_deactivate_request, .elevator_queue_empty_fn = cfq_queue_empty, .elevator_completed_req_fn = cfq_completed_request, - .elevator_former_req_fn = cfq_former_request, - .elevator_latter_req_fn = cfq_latter_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_set_req_fn = cfq_set_request, .elevator_put_req_fn = cfq_put_request, .elevator_may_queue_fn = cfq_may_queue, .elevator_init_fn = cfq_init_queue, .elevator_exit_fn = cfq_exit_queue, - .trim = cfq_trim, + .trim = cfq_free_io_context, }, .elevator_attrs = cfq_attrs, .elevator_name = "cfq", @@ -2468,7 +2165,7 @@ static void __exit cfq_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (atomic_read(&ioc_count)) + if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); synchronize_rcu(); cfq_slab_kill(); diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c7ca9f0b649..b7c5b34cb7b 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -1,7 +1,7 @@ /* * Deadline i/o scheduler. * - * Copyright (C) 2002 Jens Axboe <axboe@suse.de> + * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk> */ #include <linux/kernel.h> #include <linux/fs.h> @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> -#include <linux/hash.h> #include <linux/rbtree.h> /* @@ -24,13 +23,6 @@ static const int writes_starved = 2; /* max times reads can starve a write */ static const int fifo_batch = 16; /* # of sequential requests treated as one by the above parameters. For throughput. */ -static const int deadline_hash_shift = 5; -#define DL_HASH_BLOCK(sec) ((sec) >> 3) -#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) -#define DL_HASH_ENTRIES (1 << deadline_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define ON_HASH(drq) (!hlist_unhashed(&(drq)->hash)) - struct deadline_data { /* * run time data @@ -45,8 +37,7 @@ struct deadline_data { /* * next in sort order. read, write or both are NULL */ - struct deadline_rq *next_drq[2]; - struct hlist_head *hash; /* request hash */ + struct request *next_rq[2]; unsigned int batching; /* number of sequential requests made */ sector_t last_sector; /* head position */ unsigned int starved; /* times reads have starved writes */ @@ -58,240 +49,69 @@ struct deadline_data { int fifo_batch; int writes_starved; int front_merges; - - mempool_t *drq_pool; }; -/* - * pre-request data. - */ -struct deadline_rq { - /* - * rbtree index, key is the starting offset - */ - struct rb_node rb_node; - sector_t rb_key; - - struct request *request; - - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - - /* - * expire fifo - */ - struct list_head fifo; - unsigned long expires; -}; - -static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq); - -static kmem_cache_t *drq_pool; - -#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) +static void deadline_move_request(struct deadline_data *, struct request *); -/* - * the back merge hash support functions - */ -static inline void __deadline_del_drq_hash(struct deadline_rq *drq) -{ - hlist_del_init(&drq->hash); -} - -static inline void deadline_del_drq_hash(struct deadline_rq *drq) -{ - if (ON_HASH(drq)) - __deadline_del_drq_hash(drq); -} - -static inline void -deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct request *rq = drq->request; - - BUG_ON(ON_HASH(drq)); - - hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void -deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct request *rq = drq->request; - struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))]; - - if (ON_HASH(drq) && &drq->hash != head->first) { - hlist_del(&drq->hash); - hlist_add_head(&drq->hash, head); - } -} - -static struct request * -deadline_find_drq_hash(struct deadline_data *dd, sector_t offset) -{ - struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct deadline_rq *drq; - - hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) { - struct request *__rq = drq->request; - - BUG_ON(!ON_HASH(drq)); - - if (!rq_mergeable(__rq)) { - __deadline_del_drq_hash(drq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - -/* - * rb tree support functions - */ -#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node) -#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)]) -#define rq_rb_key(rq) (rq)->sector - -static struct deadline_rq * -__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node; - struct rb_node *parent = NULL; - struct deadline_rq *__drq; - - while (*p) { - parent = *p; - __drq = rb_entry_drq(parent); - - if (drq->rb_key < __drq->rb_key) - p = &(*p)->rb_left; - else if (drq->rb_key > __drq->rb_key) - p = &(*p)->rb_right; - else - return __drq; - } - - rb_link_node(&drq->rb_node, parent, p); - return NULL; -} +#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) static void -deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) +deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) { - struct deadline_rq *__alias; - - drq->rb_key = rq_rb_key(drq->request); + struct rb_root *root = RQ_RB_ROOT(dd, rq); + struct request *__alias; retry: - __alias = __deadline_add_drq_rb(dd, drq); - if (!__alias) { - rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); - return; + __alias = elv_rb_add(root, rq); + if (unlikely(__alias)) { + deadline_move_request(dd, __alias); + goto retry; } - - deadline_move_request(dd, __alias); - goto retry; } static inline void -deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) +deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) { - const int data_dir = rq_data_dir(drq->request); + const int data_dir = rq_data_dir(rq); - if (dd->next_drq[data_dir] == drq) { - struct rb_node *rbnext = rb_next(&drq->rb_node); + if (dd->next_rq[data_dir] == rq) { + struct rb_node *rbnext = rb_next(&rq->rb_node); - dd->next_drq[data_dir] = NULL; + dd->next_rq[data_dir] = NULL; if (rbnext) - dd->next_drq[data_dir] = rb_entry_drq(rbnext); - } - - BUG_ON(!RB_EMPTY_NODE(&drq->rb_node)); - rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); - RB_CLEAR_NODE(&drq->rb_node); -} - -static struct request * -deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir) -{ - struct rb_node *n = dd->sort_list[data_dir].rb_node; - struct deadline_rq *drq; - - while (n) { - drq = rb_entry_drq(n); - - if (sector < drq->rb_key) - n = n->rb_left; - else if (sector > drq->rb_key) - n = n->rb_right; - else - return drq->request; + dd->next_rq[data_dir] = rb_entry_rq(rbnext); } - return NULL; + elv_rb_del(RQ_RB_ROOT(dd, rq), rq); } /* - * deadline_find_first_drq finds the first (lowest sector numbered) request - * for the specified data_dir. Used to sweep back to the start of the disk - * (1-way elevator) after we process the last (highest sector) request. - */ -static struct deadline_rq * -deadline_find_first_drq(struct deadline_data *dd, int data_dir) -{ - struct rb_node *n = dd->sort_list[data_dir].rb_node; - - for (;;) { - if (n->rb_left == NULL) - return rb_entry_drq(n); - - n = n->rb_left; - } -} - -/* - * add drq to rbtree and fifo + * add rq to rbtree and fifo */ static void deadline_add_request(struct request_queue *q, struct request *rq) { struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(rq); + const int data_dir = rq_data_dir(rq); - const int data_dir = rq_data_dir(drq->request); + deadline_add_rq_rb(dd, rq); - deadline_add_drq_rb(dd, drq); /* * set expire time (only used for reads) and add to fifo list */ - drq->expires = jiffies + dd->fifo_expire[data_dir]; - list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); - - if (rq_mergeable(rq)) - deadline_add_drq_hash(dd, drq); + rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); + list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); } /* - * remove rq from rbtree, fifo, and hash + * remove rq from rbtree and fifo. */ static void deadline_remove_request(request_queue_t *q, struct request *rq) { - struct deadline_rq *drq = RQ_DATA(rq); struct deadline_data *dd = q->elevator->elevator_data; - list_del_init(&drq->fifo); - deadline_del_drq_rb(dd, drq); - deadline_del_drq_hash(drq); + rq_fifo_clear(rq); + deadline_del_rq_rb(dd, rq); } static int @@ -302,27 +122,14 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) int ret; /* - * see if the merge hash can satisfy a back merge - */ - __rq = deadline_find_drq_hash(dd, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } - - /* * check for front merge */ if (dd->front_merges) { - sector_t rb_key = bio->bi_sector + bio_sectors(bio); + sector_t sector = bio->bi_sector + bio_sectors(bio); - __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio)); + __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); if (__rq) { - BUG_ON(rb_key != rq_rb_key(__rq)); + BUG_ON(sector != __rq->sector); if (elv_rq_merge_ok(__rq, bio)) { ret = ELEVATOR_FRONT_MERGE; @@ -333,29 +140,21 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; out: - if (ret) - deadline_hot_drq_hash(dd, RQ_DATA(__rq)); *req = __rq; return ret; } -static void deadline_merged_request(request_queue_t *q, struct request *req) +static void deadline_merged_request(request_queue_t *q, struct request *req, + int type) { struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(req); - - /* - * hash always needs to be repositioned, key is end sector - */ - deadline_del_drq_hash(drq); - deadline_add_drq_hash(dd, drq); /* * if the merge was a front merge, we need to reposition request */ - if (rq_rb_key(req) != drq->rb_key) { - deadline_del_drq_rb(dd, drq); - deadline_add_drq_rb(dd, drq); + if (type == ELEVATOR_FRONT_MERGE) { + elv_rb_del(RQ_RB_ROOT(dd, req), req); + deadline_add_rq_rb(dd, req); } } @@ -363,33 +162,14 @@ static void deadline_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(req); - struct deadline_rq *dnext = RQ_DATA(next); - - BUG_ON(!drq); - BUG_ON(!dnext); - /* - * reposition drq (this is the merged request) in hash, and in rbtree - * in case of a front merge + * if next expires before rq, assign its expire time to rq + * and move into next position (next will be deleted) in fifo */ - deadline_del_drq_hash(drq); - deadline_add_drq_hash(dd, drq); - - if (rq_rb_key(req) != drq->rb_key) { - deadline_del_drq_rb(dd, drq); - deadline_add_drq_rb(dd, drq); - } - - /* - * if dnext expires before drq, assign its expire time to drq - * and move into dnext position (dnext will be deleted) in fifo - */ - if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) { - if (time_before(dnext->expires, drq->expires)) { - list_move(&drq->fifo, &dnext->fifo); - drq->expires = dnext->expires; + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(req))) { + list_move(&req->queuelist, &next->queuelist); + rq_set_fifo_time(req, rq_fifo_time(next)); } } @@ -403,52 +183,50 @@ deadline_merged_requests(request_queue_t *q, struct request *req, * move request from sort list to dispatch queue. */ static inline void -deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq) +deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) { - request_queue_t *q = drq->request->q; + request_queue_t *q = rq->q; - deadline_remove_request(q, drq->request); - elv_dispatch_add_tail(q, drq->request); + deadline_remove_request(q, rq); + elv_dispatch_add_tail(q, rq); } /* * move an entry to dispatch queue */ static void -deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq) +deadline_move_request(struct deadline_data *dd, struct request *rq) { - const int data_dir = rq_data_dir(drq->request); - struct rb_node *rbnext = rb_next(&drq->rb_node); + const int data_dir = rq_data_dir(rq); + struct rb_node *rbnext = rb_next(&rq->rb_node); - dd->next_drq[READ] = NULL; - dd->next_drq[WRITE] = NULL; + dd->next_rq[READ] = NULL; + dd->next_rq[WRITE] = NULL; if (rbnext) - dd->next_drq[data_dir] = rb_entry_drq(rbnext); + dd->next_rq[data_dir] = rb_entry_rq(rbnext); - dd->last_sector = drq->request->sector + drq->request->nr_sectors; + dd->last_sector = rq->sector + rq->nr_sectors; /* * take it off the sort and fifo list, move * to dispatch queue */ - deadline_move_to_dispatch(dd, drq); + deadline_move_to_dispatch(dd, rq); } -#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo) - /* * deadline_check_fifo returns 0 if there are no expired reads on the fifo, * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) */ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) { - struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next); + struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); /* - * drq is expired! + * rq is expired! */ - if (time_after(jiffies, drq->expires)) + if (time_after(jiffies, rq_fifo_time(rq))) return 1; return 0; @@ -463,21 +241,21 @@ static int deadline_dispatch_requests(request_queue_t *q, int force) struct deadline_data *dd = q->elevator->elevator_data; const int reads = !list_empty(&dd->fifo_list[READ]); const int writes = !list_empty(&dd->fifo_list[WRITE]); - struct deadline_rq *drq; + struct request *rq; int data_dir; /* * batches are currently reads XOR writes */ - if (dd->next_drq[WRITE]) - drq = dd->next_drq[WRITE]; + if (dd->next_rq[WRITE]) + rq = dd->next_rq[WRITE]; else - drq = dd->next_drq[READ]; + rq = dd->next_rq[READ]; - if (drq) { + if (rq) { /* we have a "next request" */ - if (dd->last_sector != drq->request->sector) + if (dd->last_sector != rq->sector) /* end the batch on a non sequential request */ dd->batching += dd->fifo_batch; @@ -526,30 +304,33 @@ dispatch_find_request: if (deadline_check_fifo(dd, data_dir)) { /* An expired request exists - satisfy it */ dd->batching = 0; - drq = list_entry_fifo(dd->fifo_list[data_dir].next); + rq = rq_entry_fifo(dd->fifo_list[data_dir].next); - } else if (dd->next_drq[data_dir]) { + } else if (dd->next_rq[data_dir]) { /* * The last req was the same dir and we have a next request in * sort order. No expired requests so continue on from here. */ - drq = dd->next_drq[data_dir]; + rq = dd->next_rq[data_dir]; } else { + struct rb_node *node; /* * The last req was the other direction or we have run out of * higher-sectored requests. Go back to the lowest sectored * request (1 way elevator) and start a new batch. */ dd->batching = 0; - drq = deadline_find_first_drq(dd, data_dir); + node = rb_first(&dd->sort_list[data_dir]); + if (node) + rq = rb_entry_rq(node); } dispatch_request: /* - * drq is the selected appropriate request. + * rq is the selected appropriate request. */ dd->batching++; - deadline_move_request(dd, drq); + deadline_move_request(dd, rq); return 1; } @@ -562,30 +343,6 @@ static int deadline_queue_empty(request_queue_t *q) && list_empty(&dd->fifo_list[READ]); } -static struct request * -deadline_former_request(request_queue_t *q, struct request *rq) -{ - struct deadline_rq *drq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&drq->rb_node); - - if (rbprev) - return rb_entry_drq(rbprev)->request; - - return NULL; -} - -static struct request * -deadline_latter_request(request_queue_t *q, struct request *rq) -{ - struct deadline_rq *drq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&drq->rb_node); - - if (rbnext) - return rb_entry_drq(rbnext)->request; - - return NULL; -} - static void deadline_exit_queue(elevator_t *e) { struct deadline_data *dd = e->elevator_data; @@ -593,46 +350,21 @@ static void deadline_exit_queue(elevator_t *e) BUG_ON(!list_empty(&dd->fifo_list[READ])); BUG_ON(!list_empty(&dd->fifo_list[WRITE])); - mempool_destroy(dd->drq_pool); - kfree(dd->hash); kfree(dd); } /* - * initialize elevator private data (deadline_data), and alloc a drq for - * each request on the free lists + * initialize elevator private data (deadline_data). */ static void *deadline_init_queue(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; - int i; - - if (!drq_pool) - return NULL; dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) return NULL; memset(dd, 0, sizeof(*dd)); - dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!dd->hash) { - kfree(dd); - return NULL; - } - - dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, - mempool_free_slab, drq_pool, q->node); - if (!dd->drq_pool) { - kfree(dd->hash); - kfree(dd); - return NULL; - } - - for (i = 0; i < DL_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&dd->hash[i]); - INIT_LIST_HEAD(&dd->fifo_list[READ]); INIT_LIST_HEAD(&dd->fifo_list[WRITE]); dd->sort_list[READ] = RB_ROOT; @@ -645,39 +377,6 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e) return dd; } -static void deadline_put_request(request_queue_t *q, struct request *rq) -{ - struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(rq); - - mempool_free(drq, dd->drq_pool); - rq->elevator_private = NULL; -} - -static int -deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) -{ - struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq; - - drq = mempool_alloc(dd->drq_pool, gfp_mask); - if (drq) { - memset(drq, 0, sizeof(*drq)); - RB_CLEAR_NODE(&drq->rb_node); - drq->request = rq; - - INIT_HLIST_NODE(&drq->hash); - - INIT_LIST_HEAD(&drq->fifo); - - rq->elevator_private = drq; - return 0; - } - - return 1; -} - /* * sysfs parts below */ @@ -757,10 +456,8 @@ static struct elevator_type iosched_deadline = { .elevator_dispatch_fn = deadline_dispatch_requests, .elevator_add_req_fn = deadline_add_request, .elevator_queue_empty_fn = deadline_queue_empty, - .elevator_former_req_fn = deadline_former_request, - .elevator_latter_req_fn = deadline_latter_request, - .elevator_set_req_fn = deadline_set_request, - .elevator_put_req_fn = deadline_put_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_init_fn = deadline_init_queue, .elevator_exit_fn = deadline_exit_queue, }, @@ -772,24 +469,11 @@ static struct elevator_type iosched_deadline = { static int __init deadline_init(void) { - int ret; - - drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), - 0, 0, NULL, NULL); - - if (!drq_pool) - return -ENOMEM; - - ret = elv_register(&iosched_deadline); - if (ret) - kmem_cache_destroy(drq_pool); - - return ret; + return elv_register(&iosched_deadline); } static void __exit deadline_exit(void) { - kmem_cache_destroy(drq_pool); elv_unregister(&iosched_deadline); } diff --git a/block/elevator.c b/block/elevator.c index 9b72dc7c8a5..487dd3da885 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -3,7 +3,7 @@ * * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * - * 30042000 Jens Axboe <axboe@suse.de> : + * 30042000 Jens Axboe <axboe@kernel.dk> : * * Split the elevator a bit so that it is possible to choose a different * one or even write a new "plug in". There are three pieces: @@ -33,6 +33,7 @@ #include <linux/compiler.h> #include <linux/delay.h> #include <linux/blktrace_api.h> +#include <linux/hash.h> #include <asm/uaccess.h> @@ -40,6 +41,16 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); /* + * Merge hash stuff. + */ +static const int elv_hash_shift = 6; +#define ELV_HASH_BLOCK(sec) ((sec) >> 3) +#define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) +#define ELV_HASH_ENTRIES (1 << elv_hash_shift) +#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) + +/* * can we safely merge with this request? */ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) @@ -56,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) /* * same device and no special stuff set, merge is ok */ - if (rq->rq_disk == bio->bi_bdev->bd_disk && - !rq->waiting && !rq->special) + if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special) return 1; return 0; @@ -151,27 +161,44 @@ __setup("elevator=", elevator_setup); static struct kobj_type elv_ktype; -static elevator_t *elevator_alloc(struct elevator_type *e) -{ - elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); - if (eq) { - memset(eq, 0, sizeof(*eq)); - eq->ops = &e->ops; - eq->elevator_type = e; - kobject_init(&eq->kobj); - snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - eq->kobj.ktype = &elv_ktype; - mutex_init(&eq->sysfs_lock); - } else { - elevator_put(e); - } +static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) +{ + elevator_t *eq; + int i; + + eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); + if (unlikely(!eq)) + goto err; + + memset(eq, 0, sizeof(*eq)); + eq->ops = &e->ops; + eq->elevator_type = e; + kobject_init(&eq->kobj); + snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); + eq->kobj.ktype = &elv_ktype; + mutex_init(&eq->sysfs_lock); + + eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, + GFP_KERNEL, q->node); + if (!eq->hash) + goto err; + + for (i = 0; i < ELV_HASH_ENTRIES; i++) + INIT_HLIST_HEAD(&eq->hash[i]); + return eq; +err: + kfree(eq); + elevator_put(e); + return NULL; } static void elevator_release(struct kobject *kobj) { elevator_t *e = container_of(kobj, elevator_t, kobj); + elevator_put(e->elevator_type); + kfree(e->hash); kfree(e); } @@ -198,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name) e = elevator_get("noop"); } - eq = elevator_alloc(e); + eq = elevator_alloc(q, e); if (!eq) return -ENOMEM; @@ -212,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name) return ret; } +EXPORT_SYMBOL(elevator_init); + void elevator_exit(elevator_t *e) { mutex_lock(&e->sysfs_lock); @@ -223,10 +252,118 @@ void elevator_exit(elevator_t *e) kobject_put(&e->kobj); } +EXPORT_SYMBOL(elevator_exit); + +static inline void __elv_rqhash_del(struct request *rq) +{ + hlist_del_init(&rq->hash); +} + +static void elv_rqhash_del(request_queue_t *q, struct request *rq) +{ + if (ELV_ON_HASH(rq)) + __elv_rqhash_del(rq); +} + +static void elv_rqhash_add(request_queue_t *q, struct request *rq) +{ + elevator_t *e = q->elevator; + + BUG_ON(ELV_ON_HASH(rq)); + hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); +} + +static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) +{ + __elv_rqhash_del(rq); + elv_rqhash_add(q, rq); +} + +static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) +{ + elevator_t *e = q->elevator; + struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; + struct hlist_node *entry, *next; + struct request *rq; + + hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { + BUG_ON(!ELV_ON_HASH(rq)); + + if (unlikely(!rq_mergeable(rq))) { + __elv_rqhash_del(rq); + continue; + } + + if (rq_hash_key(rq) == offset) + return rq; + } + + return NULL; +} + +/* + * RB-tree support functions for inserting/lookup/removal of requests + * in a sorted RB tree. + */ +struct request *elv_rb_add(struct rb_root *root, struct request *rq) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct request *__rq; + + while (*p) { + parent = *p; + __rq = rb_entry(parent, struct request, rb_node); + + if (rq->sector < __rq->sector) + p = &(*p)->rb_left; + else if (rq->sector > __rq->sector) + p = &(*p)->rb_right; + else + return __rq; + } + + rb_link_node(&rq->rb_node, parent, p); + rb_insert_color(&rq->rb_node, root); + return NULL; +} + +EXPORT_SYMBOL(elv_rb_add); + +void elv_rb_del(struct rb_root *root, struct request *rq) +{ + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); + rb_erase(&rq->rb_node, root); + RB_CLEAR_NODE(&rq->rb_node); +} + +EXPORT_SYMBOL(elv_rb_del); + +struct request *elv_rb_find(struct rb_root *root, sector_t sector) +{ + struct rb_node *n = root->rb_node; + struct request *rq; + + while (n) { + rq = rb_entry(n, struct request, rb_node); + + if (sector < rq->sector) + n = n->rb_left; + else if (sector > rq->sector) + n = n->rb_right; + else + return rq; + } + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_find); + /* * Insert rq into dispatch queue of q. Queue lock must be held on - * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be - * appended to the dispatch queue. To be used by specific elevators. + * entry. rq is sort insted into the dispatch queue. To be used by + * specific elevators. */ void elv_dispatch_sort(request_queue_t *q, struct request *rq) { @@ -235,6 +372,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) if (q->last_merge == rq) q->last_merge = NULL; + + elv_rqhash_del(q, rq); + q->nr_sorted--; boundary = q->end_sector; @@ -242,7 +382,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) + if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) break; if (rq->sector >= boundary) { if (pos->sector < boundary) @@ -258,11 +398,38 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_add(&rq->queuelist, entry); } +EXPORT_SYMBOL(elv_dispatch_sort); + +/* + * Insert rq into dispatch queue of q. Queue lock must be held on + * entry. rq is added to the back of the dispatch queue. To be used by + * specific elevators. + */ +void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) +{ + if (q->last_merge == rq) + q->last_merge = NULL; + + elv_rqhash_del(q, rq); + + q->nr_sorted--; + + q->end_sector = rq_end_sector(rq); + q->boundary_rq = rq; + list_add_tail(&rq->queuelist, &q->queue_head); +} + +EXPORT_SYMBOL(elv_dispatch_add_tail); + int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; + struct request *__rq; int ret; + /* + * First try one-hit cache. + */ if (q->last_merge) { ret = elv_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { @@ -271,18 +438,30 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) } } + /* + * See if our hash lookup can find a potential backmerge. + */ + __rq = elv_rqhash_find(q, bio->bi_sector); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } + if (e->ops->elevator_merge_fn) return e->ops->elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } -void elv_merged_request(request_queue_t *q, struct request *rq) +void elv_merged_request(request_queue_t *q, struct request *rq, int type) { elevator_t *e = q->elevator; if (e->ops->elevator_merged_fn) - e->ops->elevator_merged_fn(q, rq); + e->ops->elevator_merged_fn(q, rq, type); + + if (type == ELEVATOR_BACK_MERGE) + elv_rqhash_reposition(q, rq); q->last_merge = rq; } @@ -294,8 +473,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, if (e->ops->elevator_merge_req_fn) e->ops->elevator_merge_req_fn(q, rq, next); - q->nr_sorted--; + elv_rqhash_reposition(q, rq); + elv_rqhash_del(q, next); + + q->nr_sorted--; q->last_merge = rq; } @@ -313,7 +495,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) e->ops->elevator_deactivate_req_fn(q, rq); } - rq->flags &= ~REQ_STARTED; + rq->cmd_flags &= ~REQ_STARTED; elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); } @@ -344,13 +526,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) switch (where) { case ELEVATOR_INSERT_FRONT: - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_BACK: - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; elv_drain_elevator(q); list_add_tail(&rq->queuelist, &q->queue_head); /* @@ -369,10 +551,14 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) case ELEVATOR_INSERT_SORT: BUG_ON(!blk_fs_request(rq)); - rq->flags |= REQ_SORTED; + rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; - if (q->last_merge == NULL && rq_mergeable(rq)) - q->last_merge = rq; + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } + /* * Some ioscheds (cfq) run q->request_fn directly, so * rq cannot be accessed after calling @@ -387,7 +573,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) * insertion; otherwise, requests should be requeued * in ordseq order. */ - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; if (q->ordseq == 0) { list_add(&rq->queuelist, &q->queue_head); @@ -429,9 +615,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { if (q->ordcolor) - rq->flags |= REQ_ORDERED_COLOR; + rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { + if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { /* * toggle ordered color */ @@ -452,7 +638,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } - } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) + } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) where = ELEVATOR_INSERT_BACK; if (plug) @@ -461,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, elv_insert(q, rq, where); } +EXPORT_SYMBOL(__elv_add_request); + void elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { @@ -471,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where, spin_unlock_irqrestore(q->queue_lock, flags); } +EXPORT_SYMBOL(elv_add_request); + static inline struct request *__elv_next_request(request_queue_t *q) { struct request *rq; @@ -493,7 +683,7 @@ struct request *elv_next_request(request_queue_t *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { - if (!(rq->flags & REQ_STARTED)) { + if (!(rq->cmd_flags & REQ_STARTED)) { elevator_t *e = q->elevator; /* @@ -510,7 +700,7 @@ struct request *elv_next_request(request_queue_t *q) * it, a request that has been delayed should * not be passed by new incoming requests */ - rq->flags |= REQ_STARTED; + rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); } @@ -519,7 +709,7 @@ struct request *elv_next_request(request_queue_t *q) q->boundary_rq = NULL; } - if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) + if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) break; ret = q->prep_rq_fn(q, rq); @@ -541,7 +731,7 @@ struct request *elv_next_request(request_queue_t *q) nr_bytes = rq->data_len; blkdev_dequeue_request(rq); - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; end_that_request_chunk(rq, 0, nr_bytes); end_that_request_last(rq, 0); } else { @@ -554,9 +744,12 @@ struct request *elv_next_request(request_queue_t *q) return rq; } +EXPORT_SYMBOL(elv_next_request); + void elv_dequeue_request(request_queue_t *q, struct request *rq) { BUG_ON(list_empty(&rq->queuelist)); + BUG_ON(ELV_ON_HASH(rq)); list_del_init(&rq->queuelist); @@ -569,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq) q->in_flight++; } +EXPORT_SYMBOL(elv_dequeue_request); + int elv_queue_empty(request_queue_t *q) { elevator_t *e = q->elevator; @@ -582,6 +777,8 @@ int elv_queue_empty(request_queue_t *q) return 1; } +EXPORT_SYMBOL(elv_queue_empty); + struct request *elv_latter_request(request_queue_t *q, struct request *rq) { elevator_t *e = q->elevator; @@ -600,13 +797,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) return NULL; } -int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) +int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) { elevator_t *e = q->elevator; if (e->ops->elevator_set_req_fn) - return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); + return e->ops->elevator_set_req_fn(q, rq, gfp_mask); rq->elevator_private = NULL; return 0; @@ -620,12 +816,12 @@ void elv_put_request(request_queue_t *q, struct request *rq) e->ops->elevator_put_req_fn(q, rq); } -int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) +int elv_may_queue(request_queue_t *q, int rw) { elevator_t *e = q->elevator; if (e->ops->elevator_may_queue_fn) - return e->ops->elevator_may_queue_fn(q, rw, bio); + return e->ops->elevator_may_queue_fn(q, rw); return ELV_MQUEUE_MAY; } @@ -792,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) /* * Allocate new elevator */ - e = elevator_alloc(new_e); + e = elevator_alloc(q, new_e); if (!e) return 0; @@ -908,11 +1104,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) return len; } -EXPORT_SYMBOL(elv_dispatch_sort); -EXPORT_SYMBOL(elv_add_request); -EXPORT_SYMBOL(__elv_add_request); -EXPORT_SYMBOL(elv_next_request); -EXPORT_SYMBOL(elv_dequeue_request); -EXPORT_SYMBOL(elv_queue_empty); -EXPORT_SYMBOL(elevator_exit); -EXPORT_SYMBOL(elevator_init); +struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) +{ + struct rb_node *rbprev = rb_prev(&rq->rb_node); + + if (rbprev) + return rb_entry_rq(rbprev); + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_former_request); + +struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) +{ + struct rb_node *rbnext = rb_next(&rq->rb_node); + + if (rbnext) + return rb_entry_rq(rbnext); + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_latter_request); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 51dc0edf76e..83425fb3c8d 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); static void init_request_from_bio(struct request *req, struct bio *bio); static int __make_request(request_queue_t *q, struct bio *bio); +static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* * For the allocated request tables @@ -277,19 +278,19 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) EXPORT_SYMBOL(blk_queue_make_request); -static inline void rq_init(request_queue_t *q, struct request *rq) +static void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); rq->errors = 0; - rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; + INIT_HLIST_NODE(&rq->hash); + RB_CLEAR_NODE(&rq->rb_node); rq->ioprio = 0; rq->buffer = NULL; rq->ref_count = 1; rq->q = q; - rq->waiting = NULL; rq->special = NULL; rq->data_len = 0; rq->data = NULL; @@ -382,8 +383,8 @@ unsigned blk_ordered_req_seq(struct request *rq) if (rq == &q->post_flush_rq) return QUEUE_ORDSEQ_POSTFLUSH; - if ((rq->flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) + if ((rq->cmd_flags & REQ_ORDERED_COLOR) == + (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) return QUEUE_ORDSEQ_DRAIN; else return QUEUE_ORDSEQ_DONE; @@ -446,11 +447,11 @@ static void queue_flush(request_queue_t *q, unsigned which) end_io = post_flush_end_io; } + rq->cmd_flags = REQ_HARDBARRIER; rq_init(q, rq); - rq->flags = REQ_HARDBARRIER; rq->elevator_private = NULL; + rq->elevator_private2 = NULL; rq->rq_disk = q->bar_rq.rq_disk; - rq->rl = NULL; rq->end_io = end_io; q->prepare_flush_fn(q, rq); @@ -471,11 +472,13 @@ static inline struct request *start_ordered(request_queue_t *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; + rq->cmd_flags = 0; rq_init(q, rq); - rq->flags = bio_data_dir(q->orig_bar_rq->bio); - rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; + if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) + rq->cmd_flags |= REQ_RW; + rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; rq->elevator_private = NULL; - rq->rl = NULL; + rq->elevator_private2 = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; @@ -587,8 +590,8 @@ static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) return 0; } -static inline int ordered_bio_endio(struct request *rq, struct bio *bio, - unsigned int nbytes, int error) +static int ordered_bio_endio(struct request *rq, struct bio *bio, + unsigned int nbytes, int error) { request_queue_t *q = rq->q; bio_end_io_t *endio; @@ -1124,7 +1127,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq) } list_del_init(&rq->queuelist); - rq->flags &= ~REQ_QUEUED; + rq->cmd_flags &= ~REQ_QUEUED; rq->tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) @@ -1160,7 +1163,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq) struct blk_queue_tag *bqt = q->queue_tags; int tag; - if (unlikely((rq->flags & REQ_QUEUED))) { + if (unlikely((rq->cmd_flags & REQ_QUEUED))) { printk(KERN_ERR "%s: request %p for device [%s] already tagged %d", __FUNCTION__, rq, @@ -1168,13 +1171,18 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq) BUG(); } - tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); - if (tag >= bqt->max_depth) - return 1; + /* + * Protect against shared tag maps, as we may not have exclusive + * access to the tag map. + */ + do { + tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); + if (tag >= bqt->max_depth) + return 1; - __set_bit(tag, bqt->tag_map); + } while (test_and_set_bit(tag, bqt->tag_map)); - rq->flags |= REQ_QUEUED; + rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; blkdev_dequeue_request(rq); @@ -1210,65 +1218,31 @@ void blk_queue_invalidate_tags(request_queue_t *q) printk(KERN_ERR "%s: bad tag found on list\n", __FUNCTION__); list_del_init(&rq->queuelist); - rq->flags &= ~REQ_QUEUED; + rq->cmd_flags &= ~REQ_QUEUED; } else blk_queue_end_tag(q, rq); - rq->flags &= ~REQ_STARTED; + rq->cmd_flags &= ~REQ_STARTED; __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); } } EXPORT_SYMBOL(blk_queue_invalidate_tags); -static const char * const rq_flags[] = { - "REQ_RW", - "REQ_FAILFAST", - "REQ_SORTED", - "REQ_SOFTBARRIER", - "REQ_HARDBARRIER", - "REQ_FUA", - "REQ_CMD", - "REQ_NOMERGE", - "REQ_STARTED", - "REQ_DONTPREP", - "REQ_QUEUED", - "REQ_ELVPRIV", - "REQ_PC", - "REQ_BLOCK_PC", - "REQ_SENSE", - "REQ_FAILED", - "REQ_QUIET", - "REQ_SPECIAL", - "REQ_DRIVE_CMD", - "REQ_DRIVE_TASK", - "REQ_DRIVE_TASKFILE", - "REQ_PREEMPT", - "REQ_PM_SUSPEND", - "REQ_PM_RESUME", - "REQ_PM_SHUTDOWN", - "REQ_ORDERED_COLOR", -}; - void blk_dump_rq_flags(struct request *rq, char *msg) { int bit; - printk("%s: dev %s: flags = ", msg, - rq->rq_disk ? rq->rq_disk->disk_name : "?"); - bit = 0; - do { - if (rq->flags & (1 << bit)) - printk("%s ", rq_flags[bit]); - bit++; - } while (bit < __REQ_NR_BITS); + printk("%s: dev %s: type=%x, flags=%x\n", msg, + rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, + rq->cmd_flags); printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); - if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { + if (blk_pc_request(rq)) { printk("cdb: "); for (bit = 0; bit < sizeof(rq->cmd); bit++) printk("%02x ", rq->cmd[bit]); @@ -1441,7 +1415,7 @@ static inline int ll_new_mergeable(request_queue_t *q, int nr_phys_segs = bio_phys_segments(q, bio); if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1464,7 +1438,7 @@ static inline int ll_new_hw_segment(request_queue_t *q, if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1491,7 +1465,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, max_sectors = q->max_sectors; if (req->nr_sectors + bio_sectors(bio) > max_sectors) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1530,7 +1504,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req, if (req->nr_sectors + bio_sectors(bio) > max_sectors) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -2029,14 +2003,13 @@ EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(request_queue_t *q, struct request *rq) { - if (rq->flags & REQ_ELVPRIV) + if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); } -static inline struct request * -blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, - int priv, gfp_t gfp_mask) +static struct request * +blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -2044,17 +2017,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, return NULL; /* - * first three bits are identical in rq->flags and bio->bi_rw, + * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h */ - rq->flags = rw; + rq->cmd_flags = rw | REQ_ALLOCED; if (priv) { - if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { + if (unlikely(elv_set_request(q, rq, gfp_mask))) { mempool_free(rq, q->rq.rq_pool); return NULL; } - rq->flags |= REQ_ELVPRIV; + rq->cmd_flags |= REQ_ELVPRIV; } return rq; @@ -2141,13 +2114,13 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, struct io_context *ioc = NULL; int may_queue, priv; - may_queue = elv_may_queue(q, rw, bio); + may_queue = elv_may_queue(q, rw); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { if (rl->count[rw]+1 >= q->nr_requests) { - ioc = current_io_context(GFP_ATOMIC); + ioc = current_io_context(GFP_ATOMIC, q->node); /* * The queue will fill after this allocation, so set * it as full, and mark this process as "batching". @@ -2189,7 +2162,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); + rq = blk_alloc_request(q, rw, priv, gfp_mask); if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything @@ -2225,7 +2198,6 @@ rq_starved: ioc->nr_batch_requests--; rq_init(q, rq); - rq->rl = rl; blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: @@ -2268,7 +2240,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw, * up to a big batch of them for a small period time. * See ioc_batching, ioc_set_batching */ - ioc = current_io_context(GFP_NOIO); + ioc = current_io_context(GFP_NOIO, q->node); ioc_set_batching(q, ioc); spin_lock_irq(q->queue_lock); @@ -2300,6 +2272,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** + * blk_start_queueing - initiate dispatch of requests to device + * @q: request queue to kick into gear + * + * This is basically a helper to remove the need to know whether a queue + * is plugged or not if someone just wants to initiate dispatch of requests + * for this queue. + * + * The queue lock must be held with interrupts disabled. + */ +void blk_start_queueing(request_queue_t *q) +{ + if (!blk_queue_plugged(q)) + q->request_fn(q); + else + __generic_unplug_device(q); +} +EXPORT_SYMBOL(blk_start_queueing); + +/** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted * @rq: request to be inserted @@ -2351,7 +2342,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq, * must not attempt merges on this) and that it acts as a soft * barrier */ - rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER; + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = data; @@ -2365,11 +2357,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq, drive_stat_acct(rq, rq->nr_sectors, 1); __elv_add_request(q, rq, where, 0); - - if (blk_queue_plugged(q)) - __generic_unplug_device(q); - else - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -2558,7 +2546,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; rq->rq_disk = bd_disk; - rq->flags |= REQ_NOMERGE; + rq->cmd_flags |= REQ_NOMERGE; rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); @@ -2598,10 +2586,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->waiting = &wait; + rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); wait_for_completion(&wait); - rq->waiting = NULL; if (rq->errors) err = -EIO; @@ -2710,8 +2697,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats); */ void __blk_put_request(request_queue_t *q, struct request *req) { - struct request_list *rl = req->rl; - if (unlikely(!q)) return; if (unlikely(--req->ref_count)) @@ -2719,18 +2704,16 @@ void __blk_put_request(request_queue_t *q, struct request *req) elv_completed_request(q, req); - req->rq_status = RQ_INACTIVE; - req->rl = NULL; - /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools */ - if (rl) { + if (req->cmd_flags & REQ_ALLOCED) { int rw = rq_data_dir(req); - int priv = req->flags & REQ_ELVPRIV; + int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); + BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); freed_request(q, rw, priv); @@ -2764,9 +2747,9 @@ EXPORT_SYMBOL(blk_put_request); */ void blk_end_sync_rq(struct request *rq, int error) { - struct completion *waiting = rq->waiting; + struct completion *waiting = rq->end_io_data; - rq->waiting = NULL; + rq->end_io_data = NULL; __blk_put_request(rq->q, rq); /* @@ -2829,7 +2812,7 @@ static int attempt_merge(request_queue_t *q, struct request *req, if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk - || next->waiting || next->special) + || next->special) return 0; /* @@ -2890,22 +2873,24 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq) static void init_request_from_bio(struct request *req, struct bio *bio) { - req->flags |= REQ_CMD; + req->cmd_type = REQ_TYPE_FS; /* * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) */ if (bio_rw_ahead(bio) || bio_failfast(bio)) - req->flags |= REQ_FAILFAST; + req->cmd_flags |= REQ_FAILFAST; /* * REQ_BARRIER implies no merging, but lets make it explicit */ if (unlikely(bio_barrier(bio))) - req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) - req->flags |= REQ_RW_SYNC; + req->cmd_flags |= REQ_RW_SYNC; + if (bio_rw_meta(bio)) + req->cmd_flags |= REQ_RW_META; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; @@ -2914,7 +2899,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio) req->nr_phys_segments = bio_phys_segments(req->q, bio); req->nr_hw_segments = bio_hw_segments(req->q, bio); req->buffer = bio_data(bio); /* see ->buffer comment above */ - req->waiting = NULL; req->bio = req->biotail = bio; req->ioprio = bio_prio(bio); req->rq_disk = bio->bi_bdev->bd_disk; @@ -2924,17 +2908,11 @@ static void init_request_from_bio(struct request *req, struct bio *bio) static int __make_request(request_queue_t *q, struct bio *bio) { struct request *req; - int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; - unsigned short prio; - sector_t sector; + int el_ret, nr_sectors, barrier, err; + const unsigned short prio = bio_prio(bio); + const int sync = bio_sync(bio); - sector = bio->bi_sector; nr_sectors = bio_sectors(bio); - cur_nr_sectors = bio_cur_sectors(bio); - prio = bio_prio(bio); - - rw = bio_data_dir(bio); - sync = bio_sync(bio); /* * low level driver can indicate that it wants pages above a @@ -2943,8 +2921,6 @@ static int __make_request(request_queue_t *q, struct bio *bio) */ blk_queue_bounce(q, &bio); - spin_lock_prefetch(q->queue_lock); - barrier = bio_barrier(bio); if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; @@ -2972,7 +2948,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_back_merge(q, req)) - elv_merged_request(q, req); + elv_merged_request(q, req, el_ret); goto out; case ELEVATOR_FRONT_MERGE: @@ -2992,14 +2968,14 @@ static int __make_request(request_queue_t *q, struct bio *bio) * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->current_nr_sectors = cur_nr_sectors; - req->hard_cur_sectors = cur_nr_sectors; - req->sector = req->hard_sector = sector; + req->current_nr_sectors = bio_cur_sectors(bio); + req->hard_cur_sectors = req->current_nr_sectors; + req->sector = req->hard_sector = bio->bi_sector; req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_front_merge(q, req)) - elv_merged_request(q, req); + elv_merged_request(q, req, el_ret); goto out; /* ELV_NO_MERGE: elevator says don't/can't merge. */ @@ -3012,7 +2988,7 @@ get_rq: * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ - req = get_request_wait(q, rw, bio); + req = get_request_wait(q, bio_data_dir(bio), bio); /* * After dropping the lock and possibly sleeping here, our request @@ -3306,7 +3282,7 @@ static int __end_that_request_first(struct request *req, int uptodate, req->errors = 0; if (!uptodate) { - if (blk_fs_request(req) && !(req->flags & REQ_QUIET)) + if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) printk("end_request: I/O error, dev %s, sector %llu\n", req->rq_disk ? req->rq_disk->disk_name : "?", (unsigned long long)req->sector); @@ -3569,8 +3545,8 @@ EXPORT_SYMBOL(end_request); void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) { - /* first two bits are identical in rq->flags and bio->bi_rw */ - rq->flags |= (bio->bi_rw & 3); + /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ + rq->cmd_flags |= (bio->bi_rw & 3); rq->nr_phys_segments = bio_phys_segments(q, bio); rq->nr_hw_segments = bio_hw_segments(q, bio); @@ -3658,25 +3634,22 @@ EXPORT_SYMBOL(put_io_context); /* Called by the exitting task */ void exit_io_context(void) { - unsigned long flags; struct io_context *ioc; struct cfq_io_context *cic; - local_irq_save(flags); task_lock(current); ioc = current->io_context; current->io_context = NULL; - ioc->task = NULL; task_unlock(current); - local_irq_restore(flags); + ioc->task = NULL; if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); if (ioc->cic_root.rb_node != NULL) { cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); cic->exit(ioc); } - + put_io_context(ioc); } @@ -3688,7 +3661,7 @@ void exit_io_context(void) * but since the current task itself holds a reference, the context can be * used in general code, so long as it stays within `current` context. */ -struct io_context *current_io_context(gfp_t gfp_flags) +static struct io_context *current_io_context(gfp_t gfp_flags, int node) { struct task_struct *tsk = current; struct io_context *ret; @@ -3697,11 +3670,11 @@ struct io_context *current_io_context(gfp_t gfp_flags) if (likely(ret)) return ret; - ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); + ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); if (ret) { atomic_set(&ret->refcount, 1); ret->task = current; - ret->set_ioprio = NULL; + ret->ioprio_changed = 0; ret->last_waited = jiffies; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; @@ -3721,10 +3694,10 @@ EXPORT_SYMBOL(current_io_context); * * This is always called in the context of the task which submitted the I/O. */ -struct io_context *get_io_context(gfp_t gfp_flags) +struct io_context *get_io_context(gfp_t gfp_flags, int node) { struct io_context *ret; - ret = current_io_context(gfp_flags); + ret = current_io_context(gfp_flags, node); if (likely(ret)) atomic_inc(&ret->refcount); return ret; @@ -3837,9 +3810,6 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) ssize_t ret = queue_var_store(&ra_kb, page, count); spin_lock_irq(q->queue_lock); - if (ra_kb > (q->max_sectors >> 1)) - ra_kb = (q->max_sectors >> 1); - q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); spin_unlock_irq(q->queue_lock); diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 56a7c620574..79af4317942 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e) { struct noop_data *nd; - nd = kmalloc(sizeof(*nd), GFP_KERNEL); + nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); if (!nd) return NULL; INIT_LIST_HEAD(&nd->queue); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index b33eda26e20..2dc326421a2 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q, rq->sense = sense; rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; bio = rq->bio; /* @@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, memset(sense, 0, sizeof(sense)); rq->sense = sense; rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; blk_execute_rq(q, disk, rq, 0); @@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c int err; rq = blk_get_request(q, WRITE, __GFP_WAIT); - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; rq->data_len = 0; rq->timeout = BLK_DEFAULT_TIMEOUT; |