From a82afdfcb8c0df09776b6458af6b68fc58b2e87b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jul 2009 17:48:16 +0900 Subject: block: use the same failfast bits for bio and request bio and request use the same set of failfast bits. This patch makes the following changes to simplify things. * enumify BIO_RW* bits and reorder bits such that BIOS_RW_FAILFAST_* bits coincide with __REQ_FAILFAST_* bits. * The above pushes BIO_RW_AHEAD out of sync with __REQ_FAILFAST_DEV but the matching is useless anyway. init_request_from_bio() is responsible for setting FAILFAST bits on FS requests and non-FS requests never use BIO_RW_AHEAD. Drop the code and comment from blk_rq_bio_prep(). * Define REQ_FAILFAST_MASK which is OR of all FAILFAST bits and simplify FAILFAST flags handling in init_request_from_bio(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69103e053c9..c3015736d81 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -93,6 +93,7 @@ enum rq_flag_bits { __REQ_FAILFAST_DEV, /* no driver retries of device errors */ __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + /* above flags must match BIO_RW_* */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ @@ -143,6 +144,9 @@ enum rq_flag_bits { #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ + REQ_FAILFAST_DRIVER) + #define BLK_MAX_CDB 16 /* -- cgit v1.2.3 From 80a761fd33cf812f771e212139157bf8f58d4b3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jul 2009 17:48:17 +0900 Subject: block: implement mixed merge of different failfast requests Failfast has characteristics from other attributes. When issuing, executing and successuflly completing requests, failfast doesn't make any difference. It only affects how a request is handled on failure. Allowing requests with different failfast settings to be merged cause normal IOs to fail prematurely while not allowing has performance penalties as failfast is used for read aheads which are likely to be located near in-flight or to-be-issued normal IOs. This patch introduces the concept of 'mixed merge'. A request is a mixed merge if it is merge of segments which require different handling on failure. Currently the only mixable attributes are failfast ones (or lack thereof). When a bio with different failfast settings is added to an existing request or requests of different failfast settings are merged, the merged request is marked mixed. Each bio carries failfast settings and the request always tracks failfast state of the first bio. When the request fails, blk_rq_err_bytes() can be used to determine how many bytes can be safely failed without crossing into an area which requires further retrials. This allows request merging regardless of failfast settings while keeping the failure handling correct. This patch only implements mixed merge but doesn't enable it. The next one will update SCSI to make use of mixed merge. Signed-off-by: Tejun Heo Cc: Niel Lambrechts Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c3015736d81..650b6a9cb67 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,6 +115,7 @@ enum rq_flag_bits { __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_NR_BITS, /* stops here */ }; @@ -143,6 +144,7 @@ enum rq_flag_bits { #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ REQ_FAILFAST_DRIVER) @@ -832,11 +834,13 @@ static inline void blk_run_address_space(struct address_space *mapping) } /* - * blk_rq_pos() : the current sector - * blk_rq_bytes() : bytes left in the entire request - * blk_rq_cur_bytes() : bytes left in the current segment - * blk_rq_sectors() : sectors left in the entire request - * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_err_bytes() : bytes left till the next error boundary + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -853,6 +857,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq) return rq->bio ? bio_cur_bytes(rq->bio) : 0; } +extern unsigned int blk_rq_err_bytes(const struct request *rq); + static inline unsigned int blk_rq_sectors(const struct request *rq) { return blk_rq_bytes(rq) >> 9; @@ -863,6 +869,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +static inline unsigned int blk_rq_err_sectors(const struct request *rq) +{ + return blk_rq_err_bytes(rq) >> 9; +} + /* * Request issue related functions. */ @@ -889,10 +900,12 @@ extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); extern bool blk_end_request_cur(struct request *rq, int error); +extern bool blk_end_request_err(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, int error); extern bool __blk_end_request_cur(struct request *rq, int error); +extern bool __blk_end_request_err(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); -- cgit v1.2.3 From 1f98a13f623e0ef666690a18c1250335fc6d7ef1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 11 Sep 2009 14:32:04 +0200 Subject: bio: first step in sanitizing the bio->bi_rw flag testing Get rid of any functions that test for these bits and make callers use bio_rw_flagged() directly. Then it is at least directly apparent what variable and flag they check. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 650b6a9cb67..88edb62def8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -86,7 +86,7 @@ enum { }; /* - * request type modified bits. first two bits match BIO_RW* bits, important + * request type modified bits. first four bits match BIO_RW* bits, important */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ -- cgit v1.2.3 From fb1e75389bd06fd5987e9cda1b4e0305c782f854 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 30 Jul 2009 08:18:24 +0200 Subject: block: improve queue_should_plug() by looking at IO depths Instead of just checking whether this device uses block layer tagging, we can improve the detection by looking at the maximum queue depth it has reached. If that crosses 4, then deem it a queuing device. This is important on high IOPS devices, since plugging hurts the performance there (it can be as much as 10-15% of the sys time). Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 88edb62def8..98b45633a27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -459,6 +459,7 @@ struct request_queue #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ +#define QUEUE_FLAG_CQ 16 /* hardware does queuing */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -581,6 +582,7 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) +#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) -- cgit v1.2.3 From 01e97f6b897bf06ec83375d691f2f4d57f5b3a09 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Sep 2009 20:06:47 +0200 Subject: block: enable rq CPU completion affinity by default Test results here look good, and on big OLTP runs it has also shown to significantly increase cycles attributed to the database and cause a performance boost. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 98b45633a27..8bf1a10e4d8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -463,7 +463,8 @@ struct request_queue #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ - (1 << QUEUE_FLAG_STACKABLE)) + (1 << QUEUE_FLAG_STACKABLE) | \ + (1 << QUEUE_FLAG_SAME_COMP)) static inline int queue_is_locked(struct request_queue *q) { -- cgit v1.2.3 From 3c5820c743479285ce2678fd3c12b1fd39fe998f Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 11 Sep 2009 21:54:52 +0200 Subject: block: Optimal I/O limit wrapper Implement blk_limits_io_opt() and make blk_queue_io_opt() a wrapper around it. DM needs this to avoid poking at the queue_limits directly. Signed-off-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bf1a10e4d8..86c2bdff3b8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -935,6 +935,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); +extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, -- cgit v1.2.3 From 746cd1e7e4a555ddaee53b19a46e05c9c61eaf09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 12 Sep 2009 07:35:43 +0200 Subject: block: use blkdev_issue_discard in blk_ioctl_discard blk_ioctl_discard duplicates large amounts of code from blkdev_issue_discard, the only difference between the two is that blkdev_issue_discard needs to send a barrier discard request and blk_ioctl_discard a non-barrier one, and blk_ioctl_discard needs to wait on the request. To facilitates this add a flags argument to blkdev_issue_discard to control both aspects of the behaviour. This will be very useful later on for using the waiting funcitonality for other callers. Based on an earlier patch from Matthew Wilcox . Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86c2bdff3b8..e23a86cae5a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -998,15 +998,18 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); -extern int blkdev_issue_discard(struct block_device *, - sector_t sector, sector_t nr_sects, gfp_t); +#define DISCARD_FL_WAIT 0x01 /* wait for completion */ +#define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */ +extern int blkdev_issue_discard(struct block_device *, sector_t sector, + sector_t nr_sects, gfp_t, int flags); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks) { block <<= (sb->s_blocksize_bits - 9); nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, + DISCARD_FL_BARRIER); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3