diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-28 08:00:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-28 08:00:51 -0700 |
commit | b4412323cc954bd0a2144b1c2ed573dd2eddb32c (patch) | |
tree | a0dd14e6d46efbb36a0898c158e8efb49e4a22ef | |
parent | dc1d60a014aa9614518f9856ff661716d0969ffd (diff) | |
parent | d6de8be711b28049a5cb93c954722c311c7d3f7f (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
cfq-iosched: fix RCU problem in cfq_cic_lookup()
block: make blktrace use per-cpu buffers for message notes
Added in elevator switch message to blktrace stream
Added in MESSAGE notes for blktraces
block: reorder cfq_queue to save space on 64bit builds
block: Move the second call to get_request to the end of the loop
splice: handle try_to_release_page() failure
splice: fix sendfile() issue with relay
-rw-r--r-- | block/blk-core.c | 37 | ||||
-rw-r--r-- | block/blktrace.c | 23 | ||||
-rw-r--r-- | block/cfq-iosched.c | 36 | ||||
-rw-r--r-- | block/elevator.c | 2 | ||||
-rw-r--r-- | fs/splice.c | 17 | ||||
-rw-r--r-- | include/linux/blktrace_api.h | 26 | ||||
-rw-r--r-- | kernel/relay.c | 2 |
7 files changed, 110 insertions, 33 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 6a9cc0d22a6..1905aaba49f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -806,35 +806,32 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, rq = get_request(q, rw_flags, bio, GFP_NOIO); while (!rq) { DEFINE_WAIT(wait); + struct io_context *ioc; struct request_list *rl = &q->rq; prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - rq = get_request(q, rw_flags, bio, GFP_NOIO); - - if (!rq) { - struct io_context *ioc; + blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); - blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); - - __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); - io_schedule(); + __generic_unplug_device(q); + spin_unlock_irq(q->queue_lock); + io_schedule(); - /* - * After sleeping, we become a "batching" process and - * will be able to allocate at least one request, and - * up to a big batch of them for a small period time. - * See ioc_batching, ioc_set_batching - */ - ioc = current_io_context(GFP_NOIO, q->node); - ioc_set_batching(q, ioc); + /* + * After sleeping, we become a "batching" process and + * will be able to allocate at least one request, and + * up to a big batch of them for a small period time. + * See ioc_batching, ioc_set_batching + */ + ioc = current_io_context(GFP_NOIO, q->node); + ioc_set_batching(q, ioc); - spin_lock_irq(q->queue_lock); - } + spin_lock_irq(q->queue_lock); finish_wait(&rl->wait[rw], &wait); - } + + rq = get_request(q, rw_flags, bio, GFP_NOIO); + }; return rq; } diff --git a/block/blktrace.c b/block/blktrace.c index b2cbb4e5d76..7ae87cc4a16 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -75,6 +75,23 @@ static void trace_note_time(struct blk_trace *bt) local_irq_restore(flags); } +void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) +{ + int n; + va_list args; + char *buf; + + preempt_disable(); + buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); + va_start(args, fmt); + n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); + va_end(args); + + trace_note(bt, 0, BLK_TN_MESSAGE, buf, n); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(__trace_note_message); + static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, pid_t pid) { @@ -232,6 +249,7 @@ static void blk_trace_cleanup(struct blk_trace *bt) debugfs_remove(bt->dropped_file); blk_remove_tree(bt->dir); free_percpu(bt->sequence); + free_percpu(bt->msg_data); kfree(bt); } @@ -346,6 +364,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->sequence) goto err; + bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG); + if (!bt->msg_data) + goto err; + ret = -ENOENT; dir = blk_create_tree(buts->name); if (!dir) @@ -392,6 +414,7 @@ err: if (bt->dropped_file) debugfs_remove(bt->dropped_file); free_percpu(bt->sequence); + free_percpu(bt->msg_data); if (bt->rchan) relay_close(bt->rchan); kfree(bt); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b399c62936e..d01b411c72f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -124,6 +124,8 @@ struct cfq_data { struct cfq_queue { /* reference count */ atomic_t ref; + /* various state flags, see below */ + unsigned int flags; /* parent cfq_data */ struct cfq_data *cfqd; /* service_tree member */ @@ -138,14 +140,14 @@ struct cfq_queue { int queued[2]; /* currently allocated requests */ int allocated[2]; - /* pending metadata requests */ - int meta_pending; /* fifo list of requests in sort_list */ struct list_head fifo; unsigned long slice_end; long slice_resid; + /* pending metadata requests */ + int meta_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -153,8 +155,6 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; - /* various state flags, see below */ - unsigned int flags; }; enum cfqq_state_flags { @@ -1142,6 +1142,9 @@ static void cfq_put_queue(struct cfq_queue *cfqq) kmem_cache_free(cfq_pool, cfqq); } +/* + * Must always be called with the rcu_read_lock() held + */ static void __call_for_each_cic(struct io_context *ioc, void (*func)(struct io_context *, struct cfq_io_context *)) @@ -1197,6 +1200,11 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) cfq_cic_free(cic); } +/* + * Must be called with rcu_read_lock() held or preemption otherwise disabled. + * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), + * and ->trim() which is called with the task lock held + */ static void cfq_free_io_context(struct io_context *ioc) { /* @@ -1502,20 +1510,24 @@ static struct cfq_io_context * cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) { struct cfq_io_context *cic; + unsigned long flags; void *k; if (unlikely(!ioc)) return NULL; + rcu_read_lock(); + /* * we maintain a last-hit cache, to avoid browsing over the tree */ cic = rcu_dereference(ioc->ioc_data); - if (cic && cic->key == cfqd) + if (cic && cic->key == cfqd) { + rcu_read_unlock(); return cic; + } do { - rcu_read_lock(); cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); rcu_read_unlock(); if (!cic) @@ -1524,10 +1536,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) k = cic->key; if (unlikely(!k)) { cfq_drop_dead_cic(cfqd, ioc, cic); + rcu_read_lock(); continue; } + spin_lock_irqsave(&ioc->lock, flags); rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); break; } while (1); @@ -2134,6 +2149,10 @@ static void *cfq_init_queue(struct request_queue *q) static void cfq_slab_kill(void) { + /* + * Caller already ensured that pending RCU callbacks are completed, + * so we should have no busy allocations at this point. + */ if (cfq_pool) kmem_cache_destroy(cfq_pool); if (cfq_ioc_pool) @@ -2292,6 +2311,11 @@ static void __exit cfq_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); + + /* + * this also protects us from entering cfq_slab_kill() with + * pending RCU callbacks + */ if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); cfq_slab_kill(); diff --git a/block/elevator.c b/block/elevator.c index 980f8ae147b..902dd1344d5 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1110,6 +1110,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); + blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); + return 1; fail_register: diff --git a/fs/splice.c b/fs/splice.c index 78150038b58..aa5f6f60b30 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ wait_on_page_writeback(page); - if (PagePrivate(page)) - try_to_release_page(page, GFP_KERNEL); + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) + goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag @@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, * Raced with truncate or failed to remove page from current * address space, unlock and return failure. */ +out_unlock: unlock_page(page); return 1; } @@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, while (len) { size_t read_len; - loff_t pos = sd->pos; + loff_t pos = sd->pos, prev_pos = pos; ret = do_splice_to(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) @@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); - if (unlikely(ret <= 0)) + if (unlikely(ret <= 0)) { + sd->pos = prev_pos; goto out_release; + } bytes += ret; len -= ret; sd->pos = pos; - if (ret < read_len) + if (ret < read_len) { + sd->pos = prev_pos + ret; goto out_release; + } } done: @@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index cfc3147e5cf..e3ef903aae8 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -55,6 +55,7 @@ enum blktrace_act { enum blktrace_notify { __BLK_TN_PROCESS = 0, /* establish pid/name mapping */ __BLK_TN_TIMESTAMP, /* include system clock */ + __BLK_TN_MESSAGE, /* Character string message */ }; @@ -79,6 +80,7 @@ enum blktrace_notify { #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) +#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_IO_TRACE_MAGIC 0x65617400 #define BLK_IO_TRACE_VERSION 0x07 @@ -119,6 +121,7 @@ struct blk_trace { int trace_state; struct rchan *rchan; unsigned long *sequence; + unsigned char *msg_data; u16 act_mask; u64 start_lba; u64 end_lba; @@ -149,7 +152,28 @@ extern void blk_trace_shutdown(struct request_queue *); extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); +/** + * blk_add_trace_msg - Add a (simple) message to the blktrace stream + * @q: queue the io is for + * @fmt: format to print message in + * args... Variable argument list for format + * + * Description: + * Records a (simple) message onto the blktrace stream. + * + * NOTE: BLK_TN_MAX_MSG characters are output at most. + * NOTE: Can not use 'static inline' due to presence of var args... + * + **/ +#define blk_add_trace_msg(q, fmt, ...) \ + do { \ + struct blk_trace *bt = (q)->blk_trace; \ + if (unlikely(bt)) \ + __trace_note_message(bt, fmt, ##__VA_ARGS__); \ + } while (0) +#define BLK_TN_MAX_MSG 128 /** * blk_add_trace_rq - Add a trace for a request oriented action @@ -299,6 +323,8 @@ extern int blk_trace_remove(struct request_queue *q); #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) #define blk_trace_remove(q) (-ENOTTY) +#define blk_add_trace_msg(q, fmt, ...) do { } while (0) + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ #endif diff --git a/kernel/relay.c b/kernel/relay.c index bc24dcdc570..7de644cdec4 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, ret = 0; spliced = 0; - while (len) { + while (len && !spliced) { ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); if (ret < 0) break; |