aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Makefile6
-rw-r--r--drivers/md/dm-emc.c2
-rw-r--r--drivers/md/dm-exception-store.c10
-rw-r--r--drivers/md/dm-io.c38
-rw-r--r--drivers/md/dm-io.h79
-rw-r--r--drivers/md/dm-kcopyd.c (renamed from drivers/md/kcopyd.c)298
-rw-r--r--drivers/md/dm-log.c254
-rw-r--r--drivers/md/dm-log.h131
-rw-r--r--drivers/md/dm-mpath-hp-sw.c1
-rw-r--r--drivers/md/dm-mpath-rdac.c1
-rw-r--r--drivers/md/dm-raid1.c132
-rw-r--r--drivers/md/dm-snap.c22
-rw-r--r--drivers/md/dm-snap.h4
-rw-r--r--drivers/md/dm-table.c47
-rw-r--r--drivers/md/dm-uevent.c22
-rw-r--r--drivers/md/dm.c16
-rw-r--r--drivers/md/dm.h98
-rw-r--r--drivers/md/kcopyd.h42
-rw-r--r--drivers/md/md.c129
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid1.c31
-rw-r--r--drivers/md/raid10.c33
-rw-r--r--drivers/md/raid5.c191
-rw-r--r--drivers/md/raid6algos.c3
24 files changed, 748 insertions, 845 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index d9aa7edb878..7be09eeea29 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -3,10 +3,10 @@
#
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
- dm-ioctl.o dm-io.o kcopyd.o
+ dm-ioctl.o dm-io.o dm-kcopyd.o
dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o
-dm-mirror-objs := dm-log.o dm-raid1.o
+dm-mirror-objs := dm-raid1.o
dm-rdac-objs := dm-mpath-rdac.o
dm-hp-sw-objs := dm-mpath-hp-sw.o
md-mod-objs := md.o bitmap.o
@@ -39,7 +39,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o
obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o
obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
-obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
+obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
quiet_cmd_unroll = UNROLL $@
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 6b91b9ab1d4..3ea5ad4b780 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h,
memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
rq->sense_len = 0;
- memset(&rq->cmd, 0, BLK_MAX_CDB);
-
rq->timeout = EMC_FAILOVER_TIMEOUT;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 5bbce29f143..41f408068a7 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -9,13 +9,13 @@
#include "dm.h"
#include "dm-snap.h"
-#include "dm-io.h"
-#include "kcopyd.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/dm-io.h>
+#include <linux/dm-kcopyd.h>
#define DM_MSG_PREFIX "snapshots"
#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
@@ -131,7 +131,7 @@ struct pstore {
static unsigned sectors_to_pages(unsigned sectors)
{
- return sectors / (PAGE_SIZE >> 9);
+ return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
}
static int alloc_area(struct pstore *ps)
@@ -159,7 +159,7 @@ static void free_area(struct pstore *ps)
}
struct mdata_req {
- struct io_region *where;
+ struct dm_io_region *where;
struct dm_io_request *io_req;
struct work_struct work;
int result;
@@ -177,7 +177,7 @@ static void do_metadata(struct work_struct *work)
*/
static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata)
{
- struct io_region where = {
+ struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * chunk,
.count = ps->snap->chunk_size,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 8f25f628ef1..4789c42d9a3 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -5,13 +5,14 @@
* This file is released under the GPL.
*/
-#include "dm-io.h"
+#include "dm.h"
#include <linux/bio.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/dm-io.h>
struct dm_io_client {
mempool_t *pool;
@@ -20,7 +21,7 @@ struct dm_io_client {
/* FIXME: can we shrink this ? */
struct io {
- unsigned long error;
+ unsigned long error_bits;
atomic_t count;
struct task_struct *sleeper;
struct dm_io_client *client;
@@ -107,14 +108,14 @@ static inline unsigned bio_get_region(struct bio *bio)
static void dec_count(struct io *io, unsigned int region, int error)
{
if (error)
- set_bit(region, &io->error);
+ set_bit(region, &io->error_bits);
if (atomic_dec_and_test(&io->count)) {
if (io->sleeper)
wake_up_process(io->sleeper);
else {
- unsigned long r = io->error;
+ unsigned long r = io->error_bits;
io_notify_fn fn = io->callback;
void *context = io->context;
@@ -271,7 +272,7 @@ static void km_dp_init(struct dpages *dp, void *data)
/*-----------------------------------------------------------------
* IO routines that accept a list of pages.
*---------------------------------------------------------------*/
-static void do_region(int rw, unsigned int region, struct io_region *where,
+static void do_region(int rw, unsigned region, struct dm_io_region *where,
struct dpages *dp, struct io *io)
{
struct bio *bio;
@@ -320,7 +321,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where,
}
static void dispatch_io(int rw, unsigned int num_regions,
- struct io_region *where, struct dpages *dp,
+ struct dm_io_region *where, struct dpages *dp,
struct io *io, int sync)
{
int i;
@@ -347,17 +348,17 @@ static void dispatch_io(int rw, unsigned int num_regions,
}
static int sync_io(struct dm_io_client *client, unsigned int num_regions,
- struct io_region *where, int rw, struct dpages *dp,
+ struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
struct io io;
- if (num_regions > 1 && rw != WRITE) {
+ if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
return -EIO;
}
- io.error = 0;
+ io.error_bits = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */
io.sleeper = current;
io.client = client;
@@ -378,25 +379,25 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
return -EINTR;
if (error_bits)
- *error_bits = io.error;
+ *error_bits = io.error_bits;
- return io.error ? -EIO : 0;
+ return io.error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
- struct io_region *where, int rw, struct dpages *dp,
+ struct dm_io_region *where, int rw, struct dpages *dp,
io_notify_fn fn, void *context)
{
struct io *io;
- if (num_regions > 1 && rw != WRITE) {
+ if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
fn(1, context);
return -EIO;
}
io = mempool_alloc(client->pool, GFP_NOIO);
- io->error = 0;
+ io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = NULL;
io->client = client;
@@ -435,10 +436,15 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
}
/*
- * New collapsed (a)synchronous interface
+ * New collapsed (a)synchronous interface.
+ *
+ * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
+ * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in
+ * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
+ * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
*/
int dm_io(struct dm_io_request *io_req, unsigned num_regions,
- struct io_region *where, unsigned long *sync_error_bits)
+ struct dm_io_region *where, unsigned long *sync_error_bits)
{
int r;
struct dpages dp;
diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h
deleted file mode 100644
index f647e2cceaa..00000000000
--- a/drivers/md/dm-io.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software
- *
- * This file is released under the GPL.
- */
-
-#ifndef _DM_IO_H
-#define _DM_IO_H
-
-#include "dm.h"
-
-struct io_region {
- struct block_device *bdev;
- sector_t sector;
- sector_t count; /* If this is zero the region is ignored. */
-};
-
-struct page_list {
- struct page_list *next;
- struct page *page;
-};
-
-typedef void (*io_notify_fn)(unsigned long error, void *context);
-
-enum dm_io_mem_type {
- DM_IO_PAGE_LIST,/* Page list */
- DM_IO_BVEC, /* Bio vector */
- DM_IO_VMA, /* Virtual memory area */
- DM_IO_KMEM, /* Kernel memory */
-};
-
-struct dm_io_memory {
- enum dm_io_mem_type type;
-
- union {
- struct page_list *pl;
- struct bio_vec *bvec;
- void *vma;
- void *addr;
- } ptr;
-
- unsigned offset;
-};
-
-struct dm_io_notify {
- io_notify_fn fn; /* Callback for asynchronous requests */
- void *context; /* Passed to callback */
-};
-
-/*
- * IO request structure
- */
-struct dm_io_client;
-struct dm_io_request {
- int bi_rw; /* READ|WRITE - not READA */
- struct dm_io_memory mem; /* Memory to use for io */
- struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */
- struct dm_io_client *client; /* Client memory handler */
-};
-
-/*
- * For async io calls, users can alternatively use the dm_io() function below
- * and dm_io_client_create() to create private mempools for the client.
- *
- * Create/destroy may block.
- */
-struct dm_io_client *dm_io_client_create(unsigned num_pages);
-int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client);
-void dm_io_client_destroy(struct dm_io_client *client);
-
-/*
- * IO interface using private per-client pools.
- * Each bit in the optional 'sync_error_bits' bitset indicates whether an
- * error occurred doing io to the corresponding region.
- */
-int dm_io(struct dm_io_request *io_req, unsigned num_regions,
- struct io_region *region, unsigned long *sync_error_bits);
-
-#endif
diff --git a/drivers/md/kcopyd.c b/drivers/md/dm-kcopyd.c
index e76b52ade69..996802b8a45 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -9,9 +9,8 @@
* completion notification.
*/
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/atomic.h>
-
#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/init.h>
@@ -23,24 +22,15 @@
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
+#include <linux/dm-kcopyd.h>
-#include "kcopyd.h"
-
-static struct workqueue_struct *_kcopyd_wq;
-static struct work_struct _kcopyd_work;
-
-static void wake(void)
-{
- queue_work(_kcopyd_wq, &_kcopyd_work);
-}
+#include "dm.h"
/*-----------------------------------------------------------------
* Each kcopyd client has its own little pool of preallocated
* pages for kcopyd io.
*---------------------------------------------------------------*/
-struct kcopyd_client {
- struct list_head list;
-
+struct dm_kcopyd_client {
spinlock_t lock;
struct page_list *pages;
unsigned int nr_pages;
@@ -50,8 +40,32 @@ struct kcopyd_client {
wait_queue_head_t destroyq;
atomic_t nr_jobs;
+
+ mempool_t *job_pool;
+
+ struct workqueue_struct *kcopyd_wq;
+ struct work_struct kcopyd_work;
+
+/*
+ * We maintain three lists of jobs:
+ *
+ * i) jobs waiting for pages
+ * ii) jobs that have pages, and are waiting for the io to be issued.
+ * iii) jobs that have completed.
+ *
+ * All three of these are protected by job_lock.
+ */
+ spinlock_t job_lock;
+ struct list_head complete_jobs;
+ struct list_head io_jobs;
+ struct list_head pages_jobs;
};
+static void wake(struct dm_kcopyd_client *kc)
+{
+ queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
+}
+
static struct page_list *alloc_pl(void)
{
struct page_list *pl;
@@ -75,7 +89,7 @@ static void free_pl(struct page_list *pl)
kfree(pl);
}
-static int kcopyd_get_pages(struct kcopyd_client *kc,
+static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
unsigned int nr, struct page_list **pages)
{
struct page_list *pl;
@@ -98,7 +112,7 @@ static int kcopyd_get_pages(struct kcopyd_client *kc,
return 0;
}
-static void kcopyd_put_pages(struct kcopyd_client *kc, struct page_list *pl)
+static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
{
struct page_list *cursor;
@@ -126,7 +140,7 @@ static void drop_pages(struct page_list *pl)
}
}
-static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr)
+static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr)
{
unsigned int i;
struct page_list *pl = NULL, *next;
@@ -147,7 +161,7 @@ static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr)
return 0;
}
-static void client_free_pages(struct kcopyd_client *kc)
+static void client_free_pages(struct dm_kcopyd_client *kc)
{
BUG_ON(kc->nr_free_pages != kc->nr_pages);
drop_pages(kc->pages);
@@ -161,7 +175,7 @@ static void client_free_pages(struct kcopyd_client *kc)
* ever having to do io (which could cause a deadlock).
*---------------------------------------------------------------*/
struct kcopyd_job {
- struct kcopyd_client *kc;
+ struct dm_kcopyd_client *kc;
struct list_head list;
unsigned long flags;
@@ -175,13 +189,13 @@ struct kcopyd_job {
* Either READ or WRITE
*/
int rw;
- struct io_region source;
+ struct dm_io_region source;
/*
* The destinations for the transfer.
*/
unsigned int num_dests;
- struct io_region dests[KCOPYD_MAX_REGIONS];
+ struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
sector_t offset;
unsigned int nr_pages;
@@ -191,7 +205,7 @@ struct kcopyd_job {
* Set this to ensure you are notified when the job has
* completed. 'context' is for callback to use.
*/
- kcopyd_notify_fn fn;
+ dm_kcopyd_notify_fn fn;
void *context;
/*
@@ -207,47 +221,19 @@ struct kcopyd_job {
#define MIN_JOBS 512
static struct kmem_cache *_job_cache;
-static mempool_t *_job_pool;
-/*
- * We maintain three lists of jobs:
- *
- * i) jobs waiting for pages
- * ii) jobs that have pages, and are waiting for the io to be issued.
- * iii) jobs that have completed.
- *
- * All three of these are protected by job_lock.
- */
-static DEFINE_SPINLOCK(_job_lock);
-
-static LIST_HEAD(_complete_jobs);
-static LIST_HEAD(_io_jobs);
-static LIST_HEAD(_pages_jobs);
-
-static int jobs_init(void)
+int __init dm_kcopyd_init(void)
{
_job_cache = KMEM_CACHE(kcopyd_job, 0);
if (!_job_cache)
return -ENOMEM;
- _job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
- if (!_job_pool) {
- kmem_cache_destroy(_job_cache);
- return -ENOMEM;
- }
-
return 0;
}
-static void jobs_exit(void)
+void dm_kcopyd_exit(void)
{
- BUG_ON(!list_empty(&_complete_jobs));
- BUG_ON(!list_empty(&_io_jobs));
- BUG_ON(!list_empty(&_pages_jobs));
-
- mempool_destroy(_job_pool);
kmem_cache_destroy(_job_cache);
- _job_pool = NULL;
_job_cache = NULL;
}
@@ -255,18 +241,19 @@ static void jobs_exit(void)
* Functions to push and pop a job onto the head of a given job
* list.
*/
-static struct kcopyd_job *pop(struct list_head *jobs)
+static struct kcopyd_job *pop(struct list_head *jobs,
+ struct dm_kcopyd_client *kc)
{
struct kcopyd_job *job = NULL;
unsigned long flags;
- spin_lock_irqsave(&_job_lock, flags);
+ spin_lock_irqsave(&kc->job_lock, flags);
if (!list_empty(jobs)) {
job = list_entry(jobs->next, struct kcopyd_job, list);
list_del(&job->list);
}
- spin_unlock_irqrestore(&_job_lock, flags);
+ spin_unlock_irqrestore(&kc->job_lock, flags);
return job;
}
@@ -274,10 +261,11 @@ static struct kcopyd_job *pop(struct list_head *jobs)
static void push(struct list_head *jobs, struct kcopyd_job *job)
{
unsigned long flags;
+ struct dm_kcopyd_client *kc = job->kc;
- spin_lock_irqsave(&_job_lock, flags);
+ spin_lock_irqsave(&kc->job_lock, flags);
list_add_tail(&job->list, jobs);
- spin_unlock_irqrestore(&_job_lock, flags);
+ spin_unlock_irqrestore(&kc->job_lock, flags);
}
/*
@@ -294,11 +282,11 @@ static int run_complete_job(struct kcopyd_job *job)
void *context = job->context;
int read_err = job->read_err;
unsigned long write_err = job->write_err;
- kcopyd_notify_fn fn = job->fn;
- struct kcopyd_client *kc = job->kc;
+ dm_kcopyd_notify_fn fn = job->fn;
+ struct dm_kcopyd_client *kc = job->kc;
kcopyd_put_pages(kc, job->pages);
- mempool_free(job, _job_pool);
+ mempool_free(job, kc->job_pool);
fn(read_err, write_err, context);
if (atomic_dec_and_test(&kc->nr_jobs))
@@ -310,6 +298,7 @@ static int run_complete_job(struct kcopyd_job *job)
static void complete_io(unsigned long error, void *context)
{
struct kcopyd_job *job = (struct kcopyd_job *) context;
+ struct dm_kcopyd_client *kc = job->kc;
if (error) {
if (job->rw == WRITE)
@@ -317,22 +306,22 @@ static void complete_io(unsigned long error, void *context)
else
job->read_err = 1;
- if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
- push(&_complete_jobs, job);
- wake();
+ if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
+ push(&kc->complete_jobs, job);
+ wake(kc);
return;
}
}
if (job->rw == WRITE)
- push(&_complete_jobs, job);
+ push(&kc->complete_jobs, job);
else {
job->rw = WRITE;
- push(&_io_jobs, job);
+ push(&kc->io_jobs, job);
}
- wake();
+ wake(kc);
}
/*
@@ -343,7 +332,7 @@ static int run_io_job(struct kcopyd_job *job)
{
int r;
struct dm_io_request io_req = {
- .bi_rw = job->rw,
+ .bi_rw = job->rw | (1 << BIO_RW_SYNC),
.mem.type = DM_IO_PAGE_LIST,
.mem.ptr.pl = job->pages,
.mem.offset = job->offset,
@@ -369,7 +358,7 @@ static int run_pages_job(struct kcopyd_job *job)
r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
if (!r) {
/* this job is ready for io */
- push(&_io_jobs, job);
+ push(&job->kc->io_jobs, job);
return 0;
}
@@ -384,12 +373,13 @@ static int run_pages_job(struct kcopyd_job *job)
* Run through a list for as long as possible. Returns the count
* of successful jobs.
*/
-static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
+static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
+ int (*fn) (struct kcopyd_job *))
{
struct kcopyd_job *job;
int r, count = 0;
- while ((job = pop(jobs))) {
+ while ((job = pop(jobs, kc))) {
r = fn(job);
@@ -399,7 +389,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
job->write_err = (unsigned long) -1L;
else
job->read_err = 1;
- push(&_complete_jobs, job);
+ push(&kc->complete_jobs, job);
break;
}
@@ -421,8 +411,11 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
/*
* kcopyd does this every time it's woken up.
*/
-static void do_work(struct work_struct *ignored)
+static void do_work(struct work_struct *work)
{
+ struct dm_kcopyd_client *kc = container_of(work,
+ struct dm_kcopyd_client, kcopyd_work);
+
/*
* The order that these are called is *very* important.
* complete jobs can free some pages for pages jobs.
@@ -430,9 +423,9 @@ static void do_work(struct work_struct *ignored)
* list. io jobs call wake when they complete and it all
* starts again.
*/
- process_jobs(&_complete_jobs, run_complete_job);
- process_jobs(&_pages_jobs, run_pages_job);
- process_jobs(&_io_jobs, run_io_job);
+ process_jobs(&kc->complete_jobs, kc, run_complete_job);
+ process_jobs(&kc->pages_jobs, kc, run_pages_job);
+ process_jobs(&kc->io_jobs, kc, run_io_job);
}
/*
@@ -442,9 +435,10 @@ static void do_work(struct work_struct *ignored)
*/
static void dispatch_job(struct kcopyd_job *job)
{
- atomic_inc(&job->kc->nr_jobs);
- push(&_pages_jobs, job);
- wake();
+ struct dm_kcopyd_client *kc = job->kc;
+ atomic_inc(&kc->nr_jobs);
+ push(&kc->pages_jobs, job);
+ wake(kc);
}
#define SUB_JOB_SIZE 128
@@ -469,7 +463,7 @@ static void segment_complete(int read_err, unsigned long write_err,
* Only dispatch more work if there hasn't been an error.
*/
if ((!job->read_err && !job->write_err) ||
- test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
+ test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
/* get the next chunk of work */
progress = job->progress;
count = job->source.count - progress;
@@ -484,7 +478,8 @@ static void segment_complete(int read_err, unsigned long write_err,
if (count) {
int i;
- struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO);
+ struct kcopyd_job *sub_job = mempool_alloc(job->kc->job_pool,
+ GFP_NOIO);
*sub_job = *job;
sub_job->source.sector += progress;
@@ -508,7 +503,7 @@ static void segment_complete(int read_err, unsigned long write_err,
* after we've completed.
*/
job->fn(read_err, write_err, job->context);
- mempool_free(job, _job_pool);
+ mempool_free(job, job->kc->job_pool);
}
}
@@ -526,16 +521,16 @@ static void split_job(struct kcopyd_job *job)
segment_complete(0, 0u, job);
}
-int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
- unsigned int num_dests, struct io_region *dests,
- unsigned int flags, kcopyd_notify_fn fn, void *context)
+int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
+ unsigned int num_dests, struct dm_io_region *dests,
+ unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
{
struct kcopyd_job *job;
/*
* Allocate a new job.
*/
- job = mempool_alloc(_job_pool, GFP_NOIO);
+ job = mempool_alloc(kc->job_pool, GFP_NOIO);
/*
* set up for the read.
@@ -569,6 +564,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
return 0;
}
+EXPORT_SYMBOL(dm_kcopyd_copy);
/*
* Cancels a kcopyd job, eg. someone might be deactivating a
@@ -583,126 +579,76 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
#endif /* 0 */
/*-----------------------------------------------------------------
- * Unit setup
+ * Client setup
*---------------------------------------------------------------*/
-static DEFINE_MUTEX(_client_lock);
-static LIST_HEAD(_clients);
-
-static void client_add(struct kcopyd_client *kc)
+int dm_kcopyd_client_create(unsigned int nr_pages,
+ struct dm_kcopyd_client **result)
{
- mutex_lock(&_client_lock);
- list_add(&kc->list, &_clients);
- mutex_unlock(&_client_lock);
-}
-
-static void client_del(struct kcopyd_client *kc)
-{
- mutex_lock(&_client_lock);
- list_del(&kc->list);
- mutex_unlock(&_client_lock);
-}
-
-static DEFINE_MUTEX(kcopyd_init_lock);
-static int kcopyd_clients = 0;
+ int r = -ENOMEM;
+ struct dm_kcopyd_client *kc;
-static int kcopyd_init(void)
-{
- int r;
-
- mutex_lock(&kcopyd_init_lock);
-
- if (kcopyd_clients) {
- /* Already initialized. */
- kcopyd_clients++;
- mutex_unlock(&kcopyd_init_lock);
- return 0;
- }
-
- r = jobs_init();
- if (r) {
- mutex_unlock(&kcopyd_init_lock);
- return r;
- }
-
- _kcopyd_wq = create_singlethread_workqueue("kcopyd");
- if (!_kcopyd_wq) {
- jobs_exit();
- mutex_unlock(&kcopyd_init_lock);
+ kc = kmalloc(sizeof(*kc), GFP_KERNEL);
+ if (!kc)
return -ENOMEM;
- }
-
- kcopyd_clients++;
- INIT_WORK(&_kcopyd_work, do_work);
- mutex_unlock(&kcopyd_init_lock);
- return 0;
-}
-static void kcopyd_exit(void)
-{
- mutex_lock(&kcopyd_init_lock);
- kcopyd_clients--;
- if (!kcopyd_clients) {
- jobs_exit();
- destroy_workqueue(_kcopyd_wq);
- _kcopyd_wq = NULL;
- }
- mutex_unlock(&kcopyd_init_lock);
-}
-
-int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
-{
- int r = 0;
- struct kcopyd_client *kc;
+ spin_lock_init(&kc->lock);
+ spin_lock_init(&kc->job_lock);
+ INIT_LIST_HEAD(&kc->complete_jobs);
+ INIT_LIST_HEAD(&kc->io_jobs);
+ INIT_LIST_HEAD(&kc->pages_jobs);
- r = kcopyd_init();
- if (r)
- return r;
+ kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
+ if (!kc->job_pool)
+ goto bad_slab;
- kc = kmalloc(sizeof(*kc), GFP_KERNEL);
- if (!kc) {
- kcopyd_exit();
- return -ENOMEM;
- }
+ INIT_WORK(&kc->kcopyd_work, do_work);
+ kc->kcopyd_wq = create_singlethread_workqueue("kcopyd");
+ if (!kc->kcopyd_wq)
+ goto bad_workqueue;
- spin_lock_init(&kc->lock);
kc->pages = NULL;
kc->nr_pages = kc->nr_free_pages = 0;
r = client_alloc_pages(kc, nr_pages);
- if (r) {
- kfree(kc);
- kcopyd_exit();
- return r;
- }
+ if (r)
+ goto bad_client_pages;
kc->io_client = dm_io_client_create(nr_pages);
if (IS_ERR(kc->io_client)) {
r = PTR_ERR(kc->io_client);
- client_free_pages(kc);
- kfree(kc);
- kcopyd_exit();
- return r;
+ goto bad_io_client;
}
init_waitqueue_head(&kc->destroyq);
atomic_set(&kc->nr_jobs, 0);
- client_add(kc);
*result = kc;
return 0;
+
+bad_io_client:
+ client_free_pages(kc);
+bad_client_pages:
+ destroy_workqueue(kc->kcopyd_wq);
+bad_workqueue:
+ mempool_destroy(kc->job_pool);
+bad_slab:
+ kfree(kc);
+
+ return r;
}
+EXPORT_SYMBOL(dm_kcopyd_client_create);
-void kcopyd_client_destroy(struct kcopyd_client *kc)
+void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
{
/* Wait for completion of all jobs submitted by this client. */
wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
+ BUG_ON(!list_empty(&kc->complete_jobs));
+ BUG_ON(!list_empty(&kc->io_jobs));
+ BUG_ON(!list_empty(&kc->pages_jobs));
+ destroy_workqueue(kc->kcopyd_wq);
dm_io_client_destroy(kc->io_client);
client_free_pages(kc);
- client_del(kc);
+ mempool_destroy(kc->job_pool);
kfree(kc);
- kcopyd_exit();
}
-
-EXPORT_SYMBOL(kcopyd_client_create);
-EXPORT_SYMBOL(kcopyd_client_destroy);
-EXPORT_SYMBOL(kcopyd_copy);
+EXPORT_SYMBOL(dm_kcopyd_client_destroy);
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 2a74b2142f5..67a6f31b7fc 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2003 Sistina Software
+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the LGPL.
*/
@@ -8,64 +9,58 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/dm-io.h>
+#include <linux/dm-dirty-log.h>
-#include "dm-log.h"
-#include "dm-io.h"
+#include "dm.h"
-#define DM_MSG_PREFIX "mirror log"
+#define DM_MSG_PREFIX "dirty region log"
-static LIST_HEAD(_log_types);
-static DEFINE_SPINLOCK(_lock);
+struct dm_dirty_log_internal {
+ struct dm_dirty_log_type *type;
-int dm_register_dirty_log_type(struct dirty_log_type *type)
-{
- spin_lock(&_lock);
- type->use_count = 0;
- list_add(&type->list, &_log_types);
- spin_unlock(&_lock);
+ struct list_head list;
+ long use;
+};
- return 0;
-}
+static LIST_HEAD(_log_types);
+static DEFINE_SPINLOCK(_lock);
-int dm_unregister_dirty_log_type(struct dirty_log_type *type)
+static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name)
{
- spin_lock(&_lock);
-
- if (type->use_count)
- DMWARN("Attempt to unregister a log type that is still in use");
- else
- list_del(&type->list);
+ struct dm_dirty_log_internal *log_type;
- spin_unlock(&_lock);
+ list_for_each_entry(log_type, &_log_types, list)
+ if (!strcmp(name, log_type->type->name))
+ return log_type;
- return 0;
+ return NULL;
}
-static struct dirty_log_type *_get_type(const char *type_name)
+static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
{
- struct dirty_log_type *type;
+ struct dm_dirty_log_internal *log_type;
spin_lock(&_lock);
- list_for_each_entry (type, &_log_types, list)
- if (!strcmp(type_name, type->name)) {
- if (!type->use_count && !try_module_get(type->module)){
- spin_unlock(&_lock);
- return NULL;
- }
- type->use_count++;
- spin_unlock(&_lock);
- return type;
- }
+
+ log_type = __find_dirty_log_type(name);
+ if (log_type) {
+ if (!log_type->use && !try_module_get(log_type->type->module))
+ log_type = NULL;
+ else
+ log_type->use++;
+ }
spin_unlock(&_lock);
- return NULL;
+
+ return log_type;
}
/*
* get_type
* @type_name
*
- * Attempt to retrieve the dirty_log_type by name. If not already
+ * Attempt to retrieve the dm_dirty_log_type by name. If not already
* available, attempt to load the appropriate module.
*
* Log modules are named "dm-log-" followed by the 'type_name'.
@@ -78,14 +73,17 @@ static struct dirty_log_type *_get_type(const char *type_name)
*
* Returns: dirty_log_type* on success, NULL on failure
*/
-static struct dirty_log_type *get_type(const char *type_name)
+static struct dm_dirty_log_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
- struct dirty_log_type *type;
+ struct dm_dirty_log_internal *log_type;
+
+ if (!type_name)
+ return NULL;
- type = _get_type(type_name);
- if (type)
- return type;
+ log_type = _get_dirty_log_type(type_name);
+ if (log_type)
+ return log_type->type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
@@ -95,34 +93,106 @@ static struct dirty_log_type *get_type(const char *type_name)
}
while (request_module("dm-log-%s", type_name_dup) ||
- !(type = _get_type(type_name))) {
+ !(log_type = _get_dirty_log_type(type_name))) {
p = strrchr(type_name_dup, '-');
if (!p)
break;
p[0] = '\0';
}
- if (!type)
+ if (!log_type)
DMWARN("Module for logging type \"%s\" not found.", type_name);
kfree(type_name_dup);
- return type;
+ return log_type ? log_type->type : NULL;
}
-static void put_type(struct dirty_log_type *type)
+static void put_type(struct dm_dirty_log_type *type)
{
+ struct dm_dirty_log_internal *log_type;
+
+ if (!type)
+ return;
+
spin_lock(&_lock);
- if (!--type->use_count)
+ log_type = __find_dirty_log_type(type->name);
+ if (!log_type)
+ goto out;
+
+ if (!--log_type->use)
module_put(type->module);
+
+ BUG_ON(log_type->use < 0);
+
+out:
spin_unlock(&_lock);
}
-struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti,
- unsigned int argc, char **argv)
+static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
{
- struct dirty_log_type *type;
- struct dirty_log *log;
+ struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
+ GFP_KERNEL);
+
+ if (log_type)
+ log_type->type = type;
+
+ return log_type;
+}
+
+int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
+{
+ struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
+ int r = 0;
+
+ if (!log_type)
+ return -ENOMEM;
+
+ spin_lock(&_lock);
+ if (!__find_dirty_log_type(type->name))
+ list_add(&log_type->list, &_log_types);
+ else {
+ kfree(log_type);
+ r = -EEXIST;
+ }
+ spin_unlock(&_lock);
+
+ return r;
+}
+EXPORT_SYMBOL(dm_dirty_log_type_register);
+
+int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
+{
+ struct dm_dirty_log_internal *log_type;
+
+ spin_lock(&_lock);
+
+ log_type = __find_dirty_log_type(type->name);
+ if (!log_type) {
+ spin_unlock(&_lock);
+ return -EINVAL;
+ }
+
+ if (log_type->use) {
+ spin_unlock(&_lock);
+ return -ETXTBSY;
+ }
+
+ list_del(&log_type->list);
+
+ spin_unlock(&_lock);
+ kfree(log_type);
+
+ return 0;
+}
+EXPORT_SYMBOL(dm_dirty_log_type_unregister);
+
+struct dm_dirty_log *dm_dirty_log_create(const char *type_name,
+ struct dm_target *ti,
+ unsigned int argc, char **argv)
+{
+ struct dm_dirty_log_type *type;
+ struct dm_dirty_log *log;
log = kmalloc(sizeof(*log), GFP_KERNEL);
if (!log)
@@ -143,13 +213,15 @@ struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *t
return log;
}
+EXPORT_SYMBOL(dm_dirty_log_create);
-void dm_destroy_dirty_log(struct dirty_log *log)
+void dm_dirty_log_destroy(struct dm_dirty_log *log)
{
log->type->dtr(log);
put_type(log->type);
kfree(log);
}
+EXPORT_SYMBOL(dm_dirty_log_destroy);
/*-----------------------------------------------------------------
* Persistent and core logs share a lot of their implementation.
@@ -207,7 +279,7 @@ struct log_c {
struct dm_dev *log_dev;
struct log_header header;
- struct io_region header_location;
+ struct dm_io_region header_location;
struct log_header *disk_header;
};
@@ -215,7 +287,7 @@ struct log_c {
* The touched member needs to be updated every time we access
* one of the bitsets.
*/
-static inline int log_test_bit(uint32_t *bs, unsigned bit)
+static inline int log_test_bit(uint32_t *bs, unsigned bit)
{
return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0;
}
@@ -302,7 +374,7 @@ static inline int write_header(struct log_c *log)
* argv contains region_size followed optionally by [no]sync
*--------------------------------------------------------------*/
#define BYTE_SHIFT 3
-static int create_log_context(struct dirty_log *log, struct dm_target *ti,
+static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
unsigned int argc, char **argv,
struct dm_dev *dev)
{
@@ -315,7 +387,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti,
int r;
if (argc < 1 || argc > 2) {
- DMWARN("wrong number of arguments to mirror log");
+ DMWARN("wrong number of arguments to dirty region log");
return -EINVAL;
}
@@ -325,8 +397,8 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti,
else if (!strcmp(argv[1], "nosync"))
sync = NOSYNC;
else {
- DMWARN("unrecognised sync argument to mirror log: %s",
- argv[1]);
+ DMWARN("unrecognised sync argument to "
+ "dirty region log: %s", argv[1]);
return -EINVAL;
}
}
@@ -434,7 +506,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti,
return 0;
}
-static int core_ctr(struct dirty_log *log, struct dm_target *ti,
+static int core_ctr(struct dm_dirty_log *log, struct dm_target *ti,
unsigned int argc, char **argv)
{
return create_log_context(log, ti, argc, argv, NULL);
@@ -447,7 +519,7 @@ static void destroy_log_context(struct log_c *lc)
kfree(lc);
}
-static void core_dtr(struct dirty_log *log)
+static void core_dtr(struct dm_dirty_log *log)
{
struct log_c *lc = (struct log_c *) log->context;
@@ -460,14 +532,14 @@ static void core_dtr(struct dirty_log *log)
*
* argv contains log_device region_size followed optionally by [no]sync
*--------------------------------------------------------------*/
-static int disk_ctr(struct dirty_log *log, struct dm_target *ti,
+static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti,
unsigned int argc, char **argv)
{
int r;
struct dm_dev *dev;
if (argc < 2 || argc > 3) {
- DMWARN("wrong number of arguments to disk mirror log");
+ DMWARN("wrong number of arguments to disk dirty region log");
return -EINVAL;
}
@@ -485,7 +557,7 @@ static int disk_ctr(struct dirty_log *log, struct dm_target *ti,
return 0;
}
-static void disk_dtr(struct dirty_log *log)
+static void disk_dtr(struct dm_dirty_log *log)
{
struct log_c *lc = (struct log_c *) log->context;
@@ -514,7 +586,7 @@ static void fail_log_device(struct log_c *lc)
dm_table_event(lc->ti->table);
}
-static int disk_resume(struct dirty_log *log)
+static int disk_resume(struct dm_dirty_log *log)
{
int r;
unsigned i;
@@ -524,7 +596,7 @@ static int disk_resume(struct dirty_log *log)
/* read the disk header */
r = read_header(lc);
if (r) {
- DMWARN("%s: Failed to read header on mirror log device",
+ DMWARN("%s: Failed to read header on dirty region log device",
lc->log_dev->name);
fail_log_device(lc);
/*
@@ -562,7 +634,7 @@ static int disk_resume(struct dirty_log *log)
/* write the new header */
r = write_header(lc);
if (r) {
- DMWARN("%s: Failed to write header on mirror log device",
+ DMWARN("%s: Failed to write header on dirty region log device",
lc->log_dev->name);
fail_log_device(lc);
}
@@ -570,38 +642,38 @@ static int disk_resume(struct dirty_log *log)
return r;
}
-static uint32_t core_get_region_size(struct dirty_log *log)
+static uint32_t core_get_region_size(struct dm_dirty_log *log)
{
struct log_c *lc = (struct log_c *) log->context;
return lc->region_size;
}
-static int core_resume(struct dirty_log *log)
+static int core_resume(struct dm_dirty_log *log)
{
struct log_c *lc = (struct log_c *) log->context;
lc->sync_search = 0;
return 0;
}
-static int core_is_clean(struct dirty_log *log, region_t region)
+static int core_is_clean(struct dm_dirty_log *log, region_t region)
{
struct log_c *lc = (struct log_c *) log->context;
return log_test_bit(lc->clean_bits, region);
}
-static int core_in_sync(struct dirty_log *log, region_t region, int block)
+static int core_in_sync(struct dm_dirty_log *log, region_t region, int block)
{
struct log_c *lc = (struct log_c *) log->context;
return log_test_bit(lc->sync_bits, region);
}
-static int core_flush(struct dirty_log *log)
+static int core_flush(struct dm_dirty_log *log)
{
/* no op */
return 0;
}
-static int disk_flush(struct dirty_log *log)
+static int disk_flush(struct dm_dirty_log *log)
{
int r;
struct log_c *lc = (struct log_c *) log->context;
@@ -619,19 +691,19 @@ static int disk_flush(struct dirty_log *log)
return r;
}
-static void core_mark_region(struct dirty_log *log, region_t region)
+static void core_mark_region(struct dm_dirty_log *log, region_t region)
{
struct log_c *lc = (struct log_c *) log->context;
log_clear_bit(lc, lc->clean_bits, region);
}
-static void core_clear_region(struct dirty_log *log, region_t region)
+static void core_clear_region(struct dm_dirty_log *log, region_t region)
{
struct log_c *lc = (struct log_c *) log->context;
log_set_bit(lc, lc->clean_bits, region);
}
-static int core_get_resync_work(struct dirty_log *log, region_t *region)
+static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
{
struct log_c *lc = (struct log_c *) log->context;
@@ -654,7 +726,7 @@ static int core_get_resync_work(struct dirty_log *log, region_t *region)
return 1;
}
-static void core_set_region_sync(struct dirty_log *log, region_t region,
+static void core_set_region_sync(struct dm_dirty_log *log, region_t region,
int in_sync)
{
struct log_c *lc = (struct log_c *) log->context;
@@ -669,7 +741,7 @@ static void core_set_region_sync(struct dirty_log *log, region_t region,
}
}
-static region_t core_get_sync_count(struct dirty_log *log)
+static region_t core_get_sync_count(struct dm_dirty_log *log)
{
struct log_c *lc = (struct log_c *) log->context;
@@ -680,7 +752,7 @@ static region_t core_get_sync_count(struct dirty_log *log)
if (lc->sync != DEFAULTSYNC) \
DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "")
-static int core_status(struct dirty_log *log, status_type_t status,
+static int core_status(struct dm_dirty_log *log, status_type_t status,
char *result, unsigned int maxlen)
{
int sz = 0;
@@ -700,7 +772,7 @@ static int core_status(struct dirty_log *log, status_type_t status,
return sz;
}
-static int disk_status(struct dirty_log *log, status_type_t status,
+static int disk_status(struct dm_dirty_log *log, status_type_t status,
char *result, unsigned int maxlen)
{
int sz = 0;
@@ -722,7 +794,7 @@ static int disk_status(struct dirty_log *log, status_type_t status,
return sz;
}
-static struct dirty_log_type _core_type = {
+static struct dm_dirty_log_type _core_type = {
.name = "core",
.module = THIS_MODULE,
.ctr = core_ctr,
@@ -740,7 +812,7 @@ static struct dirty_log_type _core_type = {
.status = core_status,
};
-static struct dirty_log_type _disk_type = {
+static struct dm_dirty_log_type _disk_type = {
.name = "disk",
.module = THIS_MODULE,
.ctr = disk_ctr,
@@ -763,26 +835,28 @@ int __init dm_dirty_log_init(void)
{
int r;
- r = dm_register_dirty_log_type(&_core_type);
+ r = dm_dirty_log_type_register(&_core_type);
if (r)
DMWARN("couldn't register core log");
- r = dm_register_dirty_log_type(&_disk_type);
+ r = dm_dirty_log_type_register(&_disk_type);
if (r) {
DMWARN("couldn't register disk type");
- dm_unregister_dirty_log_type(&_core_type);
+ dm_dirty_log_type_unregister(&_core_type);
}
return r;
}
-void dm_dirty_log_exit(void)
+void __exit dm_dirty_log_exit(void)
{
- dm_unregister_dirty_log_type(&_disk_type);
- dm_unregister_dirty_log_type(&_core_type);
+ dm_dirty_log_type_unregister(&_disk_type);
+ dm_dirty_log_type_unregister(&_core_type);
}
-EXPORT_SYMBOL(dm_register_dirty_log_type);
-EXPORT_SYMBOL(dm_unregister_dirty_log_type);
-EXPORT_SYMBOL(dm_create_dirty_log);
-EXPORT_SYMBOL(dm_destroy_dirty_log);
+module_init(dm_dirty_log_init);
+module_exit(dm_dirty_log_exit);
+
+MODULE_DESCRIPTION(DM_NAME " dirty region log");
+MODULE_AUTHOR("Joe Thornber, Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h
deleted file mode 100644
index 3fae87eb596..00000000000
--- a/drivers/md/dm-log.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software
- *
- * This file is released under the LGPL.
- */
-
-#ifndef DM_DIRTY_LOG
-#define DM_DIRTY_LOG
-
-#include "dm.h"
-
-typedef sector_t region_t;
-
-struct dirty_log_type;
-
-struct dirty_log {
- struct dirty_log_type *type;
- void *context;
-};
-
-struct dirty_log_type {
- struct list_head list;
- const char *name;
- struct module *module;
- unsigned int use_count;
-
- int (*ctr)(struct dirty_log *log, struct dm_target *ti,
- unsigned int argc, char **argv);
- void (*dtr)(struct dirty_log *log);
-
- /*
- * There are times when we don't want the log to touch
- * the disk.
- */
- int (*presuspend)(struct dirty_log *log);
- int (*postsuspend)(struct dirty_log *log);
- int (*resume)(struct dirty_log *log);
-
- /*
- * Retrieves the smallest size of region that the log can
- * deal with.
- */
- uint32_t (*get_region_size)(struct dirty_log *log);
-
- /*
- * A predicate to say whether a region is clean or not.
- * May block.
- */
- int (*is_clean)(struct dirty_log *log, region_t region);
-
- /*
- * Returns: 0, 1, -EWOULDBLOCK, < 0
- *
- * A predicate function to check the area given by
- * [sector, sector + len) is in sync.
- *
- * If -EWOULDBLOCK is returned the state of the region is
- * unknown, typically this will result in a read being
- * passed to a daemon to deal with, since a daemon is
- * allowed to block.
- */
- int (*in_sync)(struct dirty_log *log, region_t region, int can_block);
-
- /*
- * Flush the current log state (eg, to disk). This
- * function may block.
- */
- int (*flush)(struct dirty_log *log);
-
- /*
- * Mark an area as clean or dirty. These functions may
- * block, though for performance reasons blocking should
- * be extremely rare (eg, allocating another chunk of
- * memory for some reason).
- */
- void (*mark_region)(struct dirty_log *log, region_t region);
- void (*clear_region)(struct dirty_log *log, region_t region);
-
- /*
- * Returns: <0 (error), 0 (no region), 1 (region)
- *
- * The mirrord will need perform recovery on regions of
- * the mirror that are in the NOSYNC state. This
- * function asks the log to tell the caller about the
- * next region that this machine should recover.
- *
- * Do not confuse this function with 'in_sync()', one
- * tells you if an area is synchronised, the other
- * assigns recovery work.
- */
- int (*get_resync_work)(struct dirty_log *log, region_t *region);
-
- /*
- * This notifies the log that the resync status of a region
- * has changed. It also clears the region from the recovering
- * list (if present).
- */
- void (*set_region_sync)(struct dirty_log *log,
- region_t region, int in_sync);
-
- /*
- * Returns the number of regions that are in sync.
- */
- region_t (*get_sync_count)(struct dirty_log *log);
-
- /*
- * Support function for mirror status requests.
- */
- int (*status)(struct dirty_log *log, status_type_t status_type,
- char *result, unsigned int maxlen);
-};
-
-int dm_register_dirty_log_type(struct dirty_log_type *type);
-int dm_unregister_dirty_log_type(struct dirty_log_type *type);
-
-
-/*
- * Make sure you use these two functions, rather than calling
- * type->constructor/destructor() directly.
- */
-struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti,
- unsigned int argc, char **argv);
-void dm_destroy_dirty_log(struct dirty_log *log);
-
-/*
- * init/exit functions.
- */
-int dm_dirty_log_init(void);
-void dm_dirty_log_exit(void);
-
-#endif
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
index 204bf42c944..b63a0ab37c5 100644
--- a/drivers/md/dm-mpath-hp-sw.c
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path)
req->sense = h->sense;
memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
- memset(&req->cmd, 0, BLK_MAX_CDB);
req->cmd[0] = START_STOP;
req->cmd[4] = 1;
req->cmd_len = COMMAND_SIZE(req->cmd[0]);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
index e04eb5c697f..95e77734880 100644
--- a/drivers/md/dm-mpath-rdac.c
+++ b/drivers/md/dm-mpath-rdac.c
@@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h,
return NULL;
}
- memset(&rq->cmd, 0, BLK_MAX_CDB);
rq->sense = h->sense;
memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
rq->sense_len = 0;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 762cb086bb7..ff05fe89308 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -7,9 +7,6 @@
#include "dm.h"
#include "dm-bio-list.h"
#include "dm-bio-record.h"
-#include "dm-io.h"
-#include "dm-log.h"
-#include "kcopyd.h"
#include <linux/ctype.h>
#include <linux/init.h>
@@ -22,6 +19,9 @@
#include <linux/workqueue.h>
#include <linux/log2.h>
#include <linux/hardirq.h>
+#include <linux/dm-io.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/dm-kcopyd.h>
#define DM_MSG_PREFIX "raid1"
#define DM_IO_PAGES 64
@@ -74,7 +74,7 @@ struct region_hash {
unsigned region_shift;
/* holds persistent region state */
- struct dirty_log *log;
+ struct dm_dirty_log *log;
/* hash table */
rwlock_t hash_lock;
@@ -133,7 +133,7 @@ struct mirror_set {
struct dm_target *ti;
struct list_head list;
struct region_hash rh;
- struct kcopyd_client *kcopyd_client;
+ struct dm_kcopyd_client *kcopyd_client;
uint64_t features;
spinlock_t lock; /* protects the lists */
@@ -154,6 +154,9 @@ struct mirror_set {
struct workqueue_struct *kmirrord_wq;
struct work_struct kmirrord_work;
+ struct timer_list timer;
+ unsigned long timer_pending;
+
struct work_struct trigger_event;
unsigned int nr_mirrors;
@@ -178,13 +181,32 @@ static void wake(struct mirror_set *ms)
queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
}
+static void delayed_wake_fn(unsigned long data)
+{
+ struct mirror_set *ms = (struct mirror_set *) data;
+
+ clear_bit(0, &ms->timer_pending);
+ wake(ms);
+}
+
+static void delayed_wake(struct mirror_set *ms)
+{
+ if (test_and_set_bit(0, &ms->timer_pending))
+ return;
+
+ ms->timer.expires = jiffies + HZ / 5;
+ ms->timer.data = (unsigned long) ms;
+ ms->timer.function = delayed_wake_fn;
+ add_timer(&ms->timer);
+}
+
/* FIXME move this */
static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
#define MIN_REGIONS 64
#define MAX_RECOVERY 1
static int rh_init(struct region_hash *rh, struct mirror_set *ms,
- struct dirty_log *log, uint32_t region_size,
+ struct dm_dirty_log *log, uint32_t region_size,
region_t nr_regions)
{
unsigned int nr_buckets, max_buckets;
@@ -249,7 +271,7 @@ static void rh_exit(struct region_hash *rh)
}
if (rh->log)
- dm_destroy_dirty_log(rh->log);
+ dm_dirty_log_destroy(rh->log);
if (rh->region_pool)
mempool_destroy(rh->region_pool);
vfree(rh->buckets);
@@ -405,24 +427,22 @@ static void rh_update_states(struct region_hash *rh)
write_lock_irq(&rh->hash_lock);
spin_lock(&rh->region_lock);
if (!list_empty(&rh->clean_regions)) {
- list_splice(&rh->clean_regions, &clean);
- INIT_LIST_HEAD(&rh->clean_regions);
+ list_splice_init(&rh->clean_regions, &clean);
list_for_each_entry(reg, &clean, list)
list_del(&reg->hash_list);
}
if (!list_empty(&rh->recovered_regions)) {
- list_splice(&rh->recovered_regions, &recovered);
- INIT_LIST_HEAD(&rh->recovered_regions);
+ list_splice_init(&rh->recovered_regions, &recovered);
list_for_each_entry (reg, &recovered, list)
list_del(&reg->hash_list);
}
if (!list_empty(&rh->failed_recovered_regions)) {
- list_splice(&rh->failed_recovered_regions, &failed_recovered);
- INIT_LIST_HEAD(&rh->failed_recovered_regions);
+ list_splice_init(&rh->failed_recovered_regions,
+ &failed_recovered);
list_for_each_entry(reg, &failed_recovered, list)
list_del(&reg->hash_list);
@@ -790,7 +810,7 @@ static int recover(struct mirror_set *ms, struct region *reg)
{
int r;
unsigned int i;
- struct io_region from, to[KCOPYD_MAX_REGIONS], *dest;
+ struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
struct mirror *m;
unsigned long flags = 0;
@@ -822,9 +842,9 @@ static int recover(struct mirror_set *ms, struct region *reg)
}
/* hand to kcopyd */
- set_bit(KCOPYD_IGNORE_ERROR, &flags);
- r = kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags,
- recovery_complete, reg);
+ set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
+ r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
+ flags, recovery_complete, reg);
return r;
}
@@ -833,7 +853,7 @@ static void do_recovery(struct mirror_set *ms)
{
int r;
struct region *reg;
- struct dirty_log *log = ms->rh.log;
+ struct dm_dirty_log *log = ms->rh.log;
/*
* Start quiescing some regions.
@@ -909,7 +929,7 @@ static void map_bio(struct mirror *m, struct bio *bio)
bio->bi_sector = map_sector(m, bio);
}
-static void map_region(struct io_region *io, struct mirror *m,
+static void map_region(struct dm_io_region *io, struct mirror *m,
struct bio *bio)
{
io->bdev = m->dev->bdev;
@@ -951,7 +971,7 @@ static void read_callback(unsigned long error, void *context)
/* Asynchronous read. */
static void read_async_bio(struct mirror *m, struct bio *bio)
{
- struct io_region io;
+ struct dm_io_region io;
struct dm_io_request io_req = {
.bi_rw = READ,
.mem.type = DM_IO_BVEC,
@@ -1019,7 +1039,7 @@ static void __bio_mark_nosync(struct mirror_set *ms,
{
unsigned long flags;
struct region_hash *rh = &ms->rh;
- struct dirty_log *log = ms->rh.log;
+ struct dm_dirty_log *log = ms->rh.log;
struct region *reg;
region_t region = bio_to_region(rh, bio);
int recovering = 0;
@@ -1107,7 +1127,7 @@ out:
static void do_write(struct mirror_set *ms, struct bio *bio)
{
unsigned int i;
- struct io_region io[ms->nr_mirrors], *dest = io;
+ struct dm_io_region io[ms->nr_mirrors], *dest = io;
struct mirror *m;
struct dm_io_request io_req = {
.bi_rw = WRITE,
@@ -1182,6 +1202,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
spin_lock_irq(&ms->lock);
bio_list_merge(&ms->failures, &sync);
spin_unlock_irq(&ms->lock);
+ wake(ms);
} else
while ((bio = bio_list_pop(&sync)))
do_write(ms, bio);
@@ -1241,7 +1262,7 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
bio_list_merge(&ms->failures, failures);
spin_unlock_irq(&ms->lock);
- wake(ms);
+ delayed_wake(ms);
}
static void trigger_event(struct work_struct *work)
@@ -1255,7 +1276,7 @@ static void trigger_event(struct work_struct *work)
/*-----------------------------------------------------------------
* kmirrord
*---------------------------------------------------------------*/
-static int _do_mirror(struct work_struct *work)
+static void do_mirror(struct work_struct *work)
{
struct mirror_set *ms =container_of(work, struct mirror_set,
kmirrord_work);
@@ -1277,23 +1298,7 @@ static int _do_mirror(struct work_struct *work)
do_writes(ms, &writes);
do_failures(ms, &failures);
- return (ms->failures.head) ? 1 : 0;
-}
-
-static void do_mirror(struct work_struct *work)
-{
- /*
- * If _do_mirror returns 1, we give it
- * another shot. This helps for cases like
- * 'suspend' where we call flush_workqueue
- * and expect all work to be finished. If
- * a failure happens during a suspend, we
- * couldn't issue a 'wake' because it would
- * not be honored. Therefore, we return '1'
- * from _do_mirror, and retry here.
- */
- while (_do_mirror(work))
- schedule();
+ dm_table_unplug_all(ms->ti->table);
}
@@ -1303,7 +1308,7 @@ static void do_mirror(struct work_struct *work)
static struct mirror_set *alloc_context(unsigned int nr_mirrors,
uint32_t region_size,
struct dm_target *ti,
- struct dirty_log *dl)
+ struct dm_dirty_log *dl)
{
size_t len;
struct mirror_set *ms = NULL;
@@ -1403,12 +1408,12 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
/*
* Create dirty log: log_type #log_params <log_params>
*/
-static struct dirty_log *create_dirty_log(struct dm_target *ti,
+static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
unsigned int argc, char **argv,
unsigned int *args_used)
{
unsigned int param_count;
- struct dirty_log *dl;
+ struct dm_dirty_log *dl;
if (argc < 2) {
ti->error = "Insufficient mirror log arguments";
@@ -1427,7 +1432,7 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti,
return NULL;
}
- dl = dm_create_dirty_log(argv[0], ti, param_count, argv + 2);
+ dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2);
if (!dl) {
ti->error = "Error creating mirror dirty log";
return NULL;
@@ -1435,7 +1440,7 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti,
if (!_check_region_size(ti, dl->type->get_region_size(dl))) {
ti->error = "Invalid region size";
- dm_destroy_dirty_log(dl);
+ dm_dirty_log_destroy(dl);
return NULL;
}
@@ -1496,7 +1501,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
int r;
unsigned int nr_mirrors, m, args_used;
struct mirror_set *ms;
- struct dirty_log *dl;
+ struct dm_dirty_log *dl;
dl = create_dirty_log(ti, argc, argv, &args_used);
if (!dl)
@@ -1506,9 +1511,9 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
argc -= args_used;
if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 ||
- nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) {
+ nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
ti->error = "Invalid number of mirrors";
- dm_destroy_dirty_log(dl);
+ dm_dirty_log_destroy(dl);
return -EINVAL;
}
@@ -1516,13 +1521,13 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (argc < nr_mirrors * 2) {
ti->error = "Too few mirror arguments";
- dm_destroy_dirty_log(dl);
+ dm_dirty_log_destroy(dl);
return -EINVAL;
}
ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
if (!ms) {
- dm_destroy_dirty_log(dl);
+ dm_dirty_log_destroy(dl);
return -ENOMEM;
}
@@ -1547,6 +1552,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err_free_context;
}
INIT_WORK(&ms->kmirrord_work, do_mirror);
+ init_timer(&ms->timer);
+ ms->timer_pending = 0;
INIT_WORK(&ms->trigger_event, trigger_event);
r = parse_features(ms, argc, argv, &args_used);
@@ -1571,7 +1578,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err_destroy_wq;
}
- r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
+ r = dm_kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
if (r)
goto err_destroy_wq;
@@ -1589,8 +1596,9 @@ static void mirror_dtr(struct dm_target *ti)
{
struct mirror_set *ms = (struct mirror_set *) ti->private;
+ del_timer_sync(&ms->timer);
flush_workqueue(ms->kmirrord_wq);
- kcopyd_client_destroy(ms->kcopyd_client);
+ dm_kcopyd_client_destroy(ms->kcopyd_client);
destroy_workqueue(ms->kmirrord_wq);
free_context(ms, ti, ms->nr_mirrors);
}
@@ -1734,7 +1742,7 @@ out:
static void mirror_presuspend(struct dm_target *ti)
{
struct mirror_set *ms = (struct mirror_set *) ti->private;
- struct dirty_log *log = ms->rh.log;
+ struct dm_dirty_log *log = ms->rh.log;
atomic_set(&ms->suspend, 1);
@@ -1763,7 +1771,7 @@ static void mirror_presuspend(struct dm_target *ti)
static void mirror_postsuspend(struct dm_target *ti)
{
struct mirror_set *ms = ti->private;
- struct dirty_log *log = ms->rh.log;
+ struct dm_dirty_log *log = ms->rh.log;
if (log->type->postsuspend && log->type->postsuspend(log))
/* FIXME: need better error handling */
@@ -1773,7 +1781,7 @@ static void mirror_postsuspend(struct dm_target *ti)
static void mirror_resume(struct dm_target *ti)
{
struct mirror_set *ms = ti->private;
- struct dirty_log *log = ms->rh.log;
+ struct dm_dirty_log *log = ms->rh.log;
atomic_set(&ms->suspend, 0);
if (log->type->resume && log->type->resume(log))
@@ -1811,7 +1819,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
{
unsigned int m, sz = 0;
struct mirror_set *ms = (struct mirror_set *) ti->private;
- struct dirty_log *log = ms->rh.log;
+ struct dm_dirty_log *log = ms->rh.log;
char buffer[ms->nr_mirrors + 1];
switch (type) {
@@ -1864,15 +1872,9 @@ static int __init dm_mirror_init(void)
{
int r;
- r = dm_dirty_log_init();
- if (r)
- return r;
-
r = dm_register_target(&mirror_target);
- if (r < 0) {
+ if (r < 0)
DMERR("Failed to register mirror target");
- dm_dirty_log_exit();
- }
return r;
}
@@ -1884,8 +1886,6 @@ static void __exit dm_mirror_exit(void)
r = dm_unregister_target(&mirror_target);
if (r < 0)
DMERR("unregister failed %d", r);
-
- dm_dirty_log_exit();
}
/* Module hooks */
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 4dc8a43c034..1ba8a47d61b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -18,10 +18,10 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/log2.h>
+#include <linux/dm-kcopyd.h>
#include "dm-snap.h"
#include "dm-bio-list.h"
-#include "kcopyd.h"
#define DM_MSG_PREFIX "snapshots"
@@ -36,9 +36,9 @@
#define SNAPSHOT_COPY_PRIORITY 2
/*
- * Each snapshot reserves this many pages for io
+ * Reserve 1MB for each snapshot initially (with minimum of 1 page).
*/
-#define SNAPSHOT_PAGES 256
+#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
@@ -536,7 +536,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->last_percent = 0;
init_rwsem(&s->lock);
spin_lock_init(&s->pe_lock);
- s->table = ti->table;
+ s->ti = ti;
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@@ -558,7 +558,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad4;
}
- r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
+ r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
if (r) {
ti->error = "Could not create kcopyd client";
goto bad5;
@@ -591,7 +591,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return 0;
bad6:
- kcopyd_client_destroy(s->kcopyd_client);
+ dm_kcopyd_client_destroy(s->kcopyd_client);
bad5:
s->store.destroy(&s->store);
@@ -613,7 +613,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
static void __free_exceptions(struct dm_snapshot *s)
{
- kcopyd_client_destroy(s->kcopyd_client);
+ dm_kcopyd_client_destroy(s->kcopyd_client);
s->kcopyd_client = NULL;
exit_exception_table(&s->pending, pending_cache);
@@ -699,7 +699,7 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
s->valid = 0;
- dm_table_event(s->table);
+ dm_table_event(s->ti->table);
}
static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -824,7 +824,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
static void start_copy(struct dm_snap_pending_exception *pe)
{
struct dm_snapshot *s = pe->snap;
- struct io_region src, dest;
+ struct dm_io_region src, dest;
struct block_device *bdev = s->origin->bdev;
sector_t dev_size;
@@ -839,7 +839,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count;
/* Hand over to kcopyd */
- kcopyd_copy(s->kcopyd_client,
+ dm_kcopyd_copy(s->kcopyd_client,
&src, 1, &dest, 0, copy_callback, pe);
}
@@ -1060,7 +1060,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
- if (bio->bi_sector >= dm_table_get_size(snap->table))
+ if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
goto next_snapshot;
/*
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
index 93bce5d4974..24f9fb73b98 100644
--- a/drivers/md/dm-snap.h
+++ b/drivers/md/dm-snap.h
@@ -132,7 +132,7 @@ struct exception_store {
struct dm_snapshot {
struct rw_semaphore lock;
- struct dm_table *table;
+ struct dm_target *ti;
struct dm_dev *origin;
struct dm_dev *cow;
@@ -169,7 +169,7 @@ struct dm_snapshot {
/* The on disk metadata handler */
struct exception_store store;
- struct kcopyd_client *kcopyd_client;
+ struct dm_kcopyd_client *kcopyd_client;
/* Queue of snapshot writes for ksnapd to flush */
struct bio_list queued_bios;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e75b1437b58..94116eaf470 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -245,44 +245,6 @@ int dm_table_create(struct dm_table **result, int mode,
return 0;
}
-int dm_create_error_table(struct dm_table **result, struct mapped_device *md)
-{
- struct dm_table *t;
- sector_t dev_size = 1;
- int r;
-
- /*
- * Find current size of device.
- * Default to 1 sector if inactive.
- */
- t = dm_get_table(md);
- if (t) {
- dev_size = dm_table_get_size(t);
- dm_table_put(t);
- }
-
- r = dm_table_create(&t, FMODE_READ, 1, md);
- if (r)
- return r;
-
- r = dm_table_add_target(t, "error", 0, dev_size, NULL);
- if (r)
- goto out;
-
- r = dm_table_complete(t);
- if (r)
- goto out;
-
- *result = t;
-
-out:
- if (r)
- dm_table_put(t);
-
- return r;
-}
-EXPORT_SYMBOL_GPL(dm_create_error_table);
-
static void free_devices(struct list_head *devices)
{
struct list_head *tmp, *next;
@@ -911,10 +873,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
q->max_hw_sectors = t->limits.max_hw_sectors;
q->seg_boundary_mask = t->limits.seg_boundary_mask;
q->bounce_pfn = t->limits.bounce_pfn;
+
if (t->limits.no_cluster)
- q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+ queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
else
- q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
}
@@ -954,7 +917,7 @@ void dm_table_presuspend_targets(struct dm_table *t)
if (!t)
return;
- return suspend_targets(t, 0);
+ suspend_targets(t, 0);
}
void dm_table_postsuspend_targets(struct dm_table *t)
@@ -962,7 +925,7 @@ void dm_table_postsuspend_targets(struct dm_table *t)
if (!t)
return;
- return suspend_targets(t, 1);
+ suspend_targets(t, 1);
}
int dm_table_resume_targets(struct dm_table *t)
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
index 50377e5dc2a..6f65883aef1 100644
--- a/drivers/md/dm-uevent.c
+++ b/drivers/md/dm-uevent.c
@@ -78,7 +78,7 @@ static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
event = dm_uevent_alloc(md);
if (!event) {
- DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__);
+ DMERR("%s: dm_uevent_alloc() failed", __func__);
goto err_nomem;
}
@@ -86,32 +86,32 @@ static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) {
DMERR("%s: add_uevent_var() for DM_TARGET failed",
- __FUNCTION__);
+ __func__);
goto err_add;
}
if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) {
DMERR("%s: add_uevent_var() for DM_ACTION failed",
- __FUNCTION__);
+ __func__);
goto err_add;
}
if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u",
dm_next_uevent_seq(md))) {
DMERR("%s: add_uevent_var() for DM_SEQNUM failed",
- __FUNCTION__);
+ __func__);
goto err_add;
}
if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) {
- DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__);
+ DMERR("%s: add_uevent_var() for DM_PATH failed", __func__);
goto err_add;
}
if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d",
nr_valid_paths)) {
DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed",
- __FUNCTION__);
+ __func__);
goto err_add;
}
@@ -146,25 +146,25 @@ void dm_send_uevents(struct list_head *events, struct kobject *kobj)
if (dm_copy_name_and_uuid(event->md, event->name,
event->uuid)) {
DMERR("%s: dm_copy_name_and_uuid() failed",
- __FUNCTION__);
+ __func__);
goto uevent_free;
}
if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) {
DMERR("%s: add_uevent_var() for DM_NAME failed",
- __FUNCTION__);
+ __func__);
goto uevent_free;
}
if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) {
DMERR("%s: add_uevent_var() for DM_UUID failed",
- __FUNCTION__);
+ __func__);
goto uevent_free;
}
r = kobject_uevent_env(kobj, event->action, event->ku_env.envp);
if (r)
- DMERR("%s: kobject_uevent_env failed", __FUNCTION__);
+ DMERR("%s: kobject_uevent_env failed", __func__);
uevent_free:
dm_uevent_free(event);
}
@@ -187,7 +187,7 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
struct dm_uevent *event;
if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
- DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type);
+ DMERR("%s: Invalid event_type %d", __func__, event_type);
goto out;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6617ce4af09..372369b1cc2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -204,6 +204,7 @@ static int (*_inits[])(void) __initdata = {
dm_target_init,
dm_linear_init,
dm_stripe_init,
+ dm_kcopyd_init,
dm_interface_init,
};
@@ -212,6 +213,7 @@ static void (*_exits[])(void) = {
dm_target_exit,
dm_linear_exit,
dm_stripe_exit,
+ dm_kcopyd_exit,
dm_interface_exit,
};
@@ -922,7 +924,7 @@ static void free_minor(int minor)
/*
* See if the device with a specific minor # is free.
*/
-static int specific_minor(struct mapped_device *md, int minor)
+static int specific_minor(int minor)
{
int r, m;
@@ -955,7 +957,7 @@ out:
return r;
}
-static int next_free_minor(struct mapped_device *md, int *minor)
+static int next_free_minor(int *minor)
{
int r, m;
@@ -966,9 +968,8 @@ static int next_free_minor(struct mapped_device *md, int *minor)
spin_lock(&_minor_lock);
r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
- if (r) {
+ if (r)
goto out;
- }
if (m >= (1 << MINORBITS)) {
idr_remove(&_minor_idr, m);
@@ -991,7 +992,7 @@ static struct block_device_operations dm_blk_dops;
static struct mapped_device *alloc_dev(int minor)
{
int r;
- struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+ struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
void *old_md;
if (!md) {
@@ -1004,13 +1005,12 @@ static struct mapped_device *alloc_dev(int minor)
/* get a minor number for the dev */
if (minor == DM_ANY_MINOR)
- r = next_free_minor(md, &minor);
+ r = next_free_minor(&minor);
else
- r = specific_minor(md, minor);
+ r = specific_minor(minor);
if (r < 0)
goto bad_minor;
- memset(md, 0, sizeof(*md));
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
spin_lock_init(&md->pushback_lock);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index b4584a39383..8c03b634e62 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -16,67 +16,6 @@
#include <linux/blkdev.h>
#include <linux/hdreg.h>
-#define DM_NAME "device-mapper"
-
-#define DMERR(f, arg...) \
- printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-#define DMERR_LIMIT(f, arg...) \
- do { \
- if (printk_ratelimit()) \
- printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \
- f "\n", ## arg); \
- } while (0)
-
-#define DMWARN(f, arg...) \
- printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-#define DMWARN_LIMIT(f, arg...) \
- do { \
- if (printk_ratelimit()) \
- printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \
- f "\n", ## arg); \
- } while (0)
-
-#define DMINFO(f, arg...) \
- printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
-#define DMINFO_LIMIT(f, arg...) \
- do { \
- if (printk_ratelimit()) \
- printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \
- "\n", ## arg); \
- } while (0)
-
-#ifdef CONFIG_DM_DEBUG
-# define DMDEBUG(f, arg...) \
- printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg)
-# define DMDEBUG_LIMIT(f, arg...) \
- do { \
- if (printk_ratelimit()) \
- printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \
- "\n", ## arg); \
- } while (0)
-#else
-# define DMDEBUG(f, arg...) do {} while (0)
-# define DMDEBUG_LIMIT(f, arg...) do {} while (0)
-#endif
-
-#define DMEMIT(x...) sz += ((sz >= maxlen) ? \
- 0 : scnprintf(result + sz, maxlen - sz, x))
-
-#define SECTOR_SHIFT 9
-
-/*
- * Definitions of return values from target end_io function.
- */
-#define DM_ENDIO_INCOMPLETE 1
-#define DM_ENDIO_REQUEUE 2
-
-/*
- * Definitions of return values from target map function.
- */
-#define DM_MAPIO_SUBMITTED 0
-#define DM_MAPIO_REMAPPED 1
-#define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE
-
/*
* Suspend feature flags
*/
@@ -136,34 +75,6 @@ static inline int array_too_big(unsigned long fixed, unsigned long obj,
return (num > (ULONG_MAX - fixed) / obj);
}
-/*
- * Ceiling(n / sz)
- */
-#define dm_div_up(n, sz) (((n) + (sz) - 1) / (sz))
-
-#define dm_sector_div_up(n, sz) ( \
-{ \
- sector_t _r = ((n) + (sz) - 1); \
- sector_div(_r, (sz)); \
- _r; \
-} \
-)
-
-/*
- * ceiling(n / size) * size
- */
-#define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz))
-
-static inline sector_t to_sector(unsigned long n)
-{
- return (n >> 9);
-}
-
-static inline unsigned long to_bytes(sector_t n)
-{
- return (n << 9);
-}
-
int dm_split_args(int *argc, char ***argvp, char *input);
/*
@@ -189,4 +100,13 @@ int dm_lock_for_deletion(struct mapped_device *md);
void dm_kobject_uevent(struct mapped_device *md);
+/*
+ * Dirty log
+ */
+int dm_dirty_log_init(void);
+void dm_dirty_log_exit(void);
+
+int dm_kcopyd_init(void);
+void dm_kcopyd_exit(void);
+
#endif
diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h
deleted file mode 100644
index 4845f2a0c67..00000000000
--- a/drivers/md/kcopyd.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2001 Sistina Software
- *
- * This file is released under the GPL.
- *
- * Kcopyd provides a simple interface for copying an area of one
- * block-device to one or more other block-devices, with an asynchronous
- * completion notification.
- */
-
-#ifndef DM_KCOPYD_H
-#define DM_KCOPYD_H
-
-#include "dm-io.h"
-
-/* FIXME: make this configurable */
-#define KCOPYD_MAX_REGIONS 8
-
-#define KCOPYD_IGNORE_ERROR 1
-
-/*
- * To use kcopyd you must first create a kcopyd client object.
- */
-struct kcopyd_client;
-int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result);
-void kcopyd_client_destroy(struct kcopyd_client *kc);
-
-/*
- * Submit a copy job to kcopyd. This is built on top of the
- * previous three fns.
- *
- * read_err is a boolean,
- * write_err is a bitset, with 1 bit for each destination region
- */
-typedef void (*kcopyd_notify_fn)(int read_err, unsigned long write_err,
- void *context);
-
-int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
- unsigned int num_dests, struct io_region *dests,
- unsigned int flags, kcopyd_notify_fn fn, void *context);
-
-#endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5ebfb4d7990..83eb78b0013 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -276,13 +276,15 @@ static mddev_t * mddev_find(dev_t unit)
init_waitqueue_head(&new->sb_wait);
new->reshape_position = MaxSector;
new->resync_max = MaxSector;
+ new->level = LEVEL_NONE;
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
kfree(new);
return NULL;
}
- set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+ /* Can be unlocked because the queue is new: no concurrency */
+ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
blk_queue_make_request(new->queue, md_fail_request);
@@ -731,9 +733,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
else
rdev->desc_nr = sb->this_disk.number;
- if (refdev == 0)
+ if (!refdev) {
ret = 1;
- else {
+ } else {
__u64 ev1, ev2;
mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
if (!uuid_equal(refsb, sb)) {
@@ -1116,9 +1118,9 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
else
rdev->desc_nr = le32_to_cpu(sb->dev_number);
- if (refdev == 0)
+ if (!refdev) {
ret = 1;
- else {
+ } else {
__u64 ev1, ev2;
struct mdp_superblock_1 *refsb =
(struct mdp_superblock_1*)page_address(refdev->sb_page);
@@ -1368,6 +1370,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
MD_BUG();
return -EINVAL;
}
+
+ /* prevent duplicates */
+ if (find_rdev(mddev, rdev->bdev->bd_dev))
+ return -EEXIST;
+
/* make sure rdev->size exceeds mddev->size */
if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
if (mddev->pers) {
@@ -1651,6 +1658,8 @@ static void md_update_sb(mddev_t * mddev, int force_change)
int sync_req;
int nospares = 0;
+ if (mddev->external)
+ return;
repeat:
spin_lock_irq(&mddev->write_lock);
@@ -1819,6 +1828,10 @@ state_show(mdk_rdev_t *rdev, char *page)
len += sprintf(page+len, "%swrite_mostly",sep);
sep = ",";
}
+ if (test_bit(Blocked, &rdev->flags)) {
+ len += sprintf(page+len, "%sblocked", sep);
+ sep = ",";
+ }
if (!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags)) {
len += sprintf(page+len, "%sspare", sep);
@@ -1835,6 +1848,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
* remove - disconnects the device
* writemostly - sets write_mostly
* -writemostly - clears write_mostly
+ * blocked - sets the Blocked flag
+ * -blocked - clears the Blocked flag
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -1857,6 +1872,16 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
} else if (cmd_match(buf, "-writemostly")) {
clear_bit(WriteMostly, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "blocked")) {
+ set_bit(Blocked, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-blocked")) {
+ clear_bit(Blocked, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+
+ err = 0;
}
return err ? err : len;
}
@@ -2096,7 +2121,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
rv = -EBUSY;
else
rv = entry->store(rdev, page, length);
- mddev_unlock(rdev->mddev);
+ mddev_unlock(mddev);
}
return rv;
}
@@ -2185,7 +2210,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
goto abort_free;
}
}
+
INIT_LIST_HEAD(&rdev->same_set);
+ init_waitqueue_head(&rdev->blocked_wait);
return rdev;
@@ -2456,7 +2483,6 @@ resync_start_show(mddev_t *mddev, char *page)
static ssize_t
resync_start_store(mddev_t *mddev, const char *buf, size_t len)
{
- /* can only set chunk_size if array is not yet active */
char *e;
unsigned long long n = simple_strtoull(buf, &e, 10);
@@ -2590,15 +2616,20 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
err = do_md_stop(mddev, 1);
else {
mddev->ro = 1;
+ set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev);
}
break;
case read_auto:
- /* stopping an active array */
if (mddev->pers) {
- err = do_md_stop(mddev, 1);
- if (err == 0)
- mddev->ro = 2; /* FIXME mark devices writable */
+ if (mddev->ro != 1)
+ err = do_md_stop(mddev, 1);
+ else
+ err = restart_array(mddev);
+ if (err == 0) {
+ mddev->ro = 2;
+ set_disk_ro(mddev->gendisk, 0);
+ }
} else {
mddev->ro = 2;
err = do_md_run(mddev);
@@ -2611,6 +2642,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
if (atomic_read(&mddev->writes_pending) == 0) {
if (mddev->in_sync == 0) {
mddev->in_sync = 1;
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
if (mddev->persistent)
set_bit(MD_CHANGE_CLEAN,
&mddev->flags);
@@ -2634,6 +2667,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
err = 0;
} else {
mddev->ro = 0;
+ set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev);
}
break;
@@ -3711,6 +3745,30 @@ static int do_md_stop(mddev_t * mddev, int mode)
mddev->reshape_position = MaxSector;
mddev->external = 0;
mddev->persistent = 0;
+ mddev->level = LEVEL_NONE;
+ mddev->clevel[0] = 0;
+ mddev->flags = 0;
+ mddev->ro = 0;
+ mddev->metadata_type[0] = 0;
+ mddev->chunk_size = 0;
+ mddev->ctime = mddev->utime = 0;
+ mddev->layout = 0;
+ mddev->max_disks = 0;
+ mddev->events = 0;
+ mddev->delta_disks = 0;
+ mddev->new_level = LEVEL_NONE;
+ mddev->new_layout = 0;
+ mddev->new_chunk = 0;
+ mddev->curr_resync = 0;
+ mddev->resync_mismatches = 0;
+ mddev->suspend_lo = mddev->suspend_hi = 0;
+ mddev->sync_speed_min = mddev->sync_speed_max = 0;
+ mddev->recovery = 0;
+ mddev->in_sync = 0;
+ mddev->changed = 0;
+ mddev->degraded = 0;
+ mddev->barriers_work = 0;
+ mddev->safemode = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -4918,6 +4976,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
+
+ if (mddev->external)
+ set_bit(Blocked, &rdev->flags);
/*
dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
mdname(mddev),
@@ -5364,6 +5425,8 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
md_wakeup_thread(mddev->sync_thread);
}
atomic_inc(&mddev->writes_pending);
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
if (mddev->in_sync) {
spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync) {
@@ -5718,7 +5781,7 @@ static int remove_and_add_spares(mddev_t *mddev)
rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
- !mddev->external &&
+ !test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
@@ -5788,7 +5851,7 @@ void md_check_recovery(mddev_t *mddev)
return;
if (signal_pending(current)) {
- if (mddev->pers->sync_request) {
+ if (mddev->pers->sync_request && !mddev->external) {
printk(KERN_INFO "md: %s in immediate safe mode\n",
mdname(mddev));
mddev->safemode = 2;
@@ -5800,7 +5863,7 @@ void md_check_recovery(mddev_t *mddev)
(mddev->flags && !mddev->external) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
- (mddev->safemode == 1) ||
+ (mddev->external == 0 && mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
))
@@ -5809,16 +5872,20 @@ void md_check_recovery(mddev_t *mddev)
if (mddev_trylock(mddev)) {
int spares = 0;
- spin_lock_irq(&mddev->write_lock);
- if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
- !mddev->in_sync && mddev->recovery_cp == MaxSector) {
- mddev->in_sync = 1;
- if (mddev->persistent)
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ if (!mddev->external) {
+ spin_lock_irq(&mddev->write_lock);
+ if (mddev->safemode &&
+ !atomic_read(&mddev->writes_pending) &&
+ !mddev->in_sync &&
+ mddev->recovery_cp == MaxSector) {
+ mddev->in_sync = 1;
+ if (mddev->persistent)
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ }
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
+ spin_unlock_irq(&mddev->write_lock);
}
- if (mddev->safemode == 1)
- mddev->safemode = 0;
- spin_unlock_irq(&mddev->write_lock);
if (mddev->flags)
md_update_sb(mddev, 0);
@@ -5913,6 +5980,16 @@ void md_check_recovery(mddev_t *mddev)
}
}
+void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
+{
+ sysfs_notify(&rdev->kobj, NULL, "state");
+ wait_event_timeout(rdev->blocked_wait,
+ !test_bit(Blocked, &rdev->flags),
+ msecs_to_jiffies(5000));
+ rdev_dec_pending(rdev, mddev);
+}
+EXPORT_SYMBOL(md_wait_for_blocked_rdev);
+
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
@@ -5947,13 +6024,9 @@ static struct notifier_block md_notifier = {
static void md_geninit(void)
{
- struct proc_dir_entry *p;
-
dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
- p = create_proc_entry("mdstat", S_IRUGO, NULL);
- if (p)
- p->proc_fops = &md_seq_fops;
+ proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
}
static int __init md_init(void)
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 3f299d835a2..42ee1a2dc14 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -244,7 +244,8 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
conf->working_disks--;
mddev->degraded++;
printk(KERN_ALERT "multipath: IO failure on %s,"
- " disabling IO path. \n Operation continuing"
+ " disabling IO path.\n"
+ "multipath: Operation continuing"
" on %d IO paths.\n",
bdevname (rdev->bdev,b),
conf->working_disks);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ff61b309129..6778b7cb39b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -773,7 +773,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
r1bio_t *r1_bio;
struct bio *read_bio;
int i, targets = 0, disks;
- mdk_rdev_t *rdev;
struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
struct bio_list bl;
@@ -781,6 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
const int rw = bio_data_dir(bio);
const int do_sync = bio_sync(bio);
int do_barriers;
+ mdk_rdev_t *blocked_rdev;
/*
* Register the new request and wait if the reconstruction
@@ -862,10 +862,17 @@ static int make_request(struct request_queue *q, struct bio * bio)
first = 0;
}
#endif
+ retry_write:
+ blocked_rdev = NULL;
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
- !test_bit(Faulty, &rdev->flags)) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
if (test_bit(Faulty, &rdev->flags)) {
rdev_dec_pending(rdev, mddev);
@@ -878,6 +885,20 @@ static int make_request(struct request_queue *q, struct bio * bio)
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Wait for this device to become unblocked */
+ int j;
+
+ for (j = 0; j < i; j++)
+ if (r1_bio->bios[j])
+ rdev_dec_pending(conf->mirrors[j].rdev, mddev);
+
+ allow_barrier(conf);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
BUG_ON(targets == 0); /* we never fail the last device */
if (targets < conf->raid_disks) {
@@ -1008,8 +1029,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} else
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
- " Operation continuing on %d devices\n",
+ printk(KERN_ALERT "raid1: Disk failure on %s, disabling device.\n"
+ "raid1: Operation continuing on %d devices.\n",
bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 32389d2f18f..5938fa96292 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -790,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
const int do_sync = bio_sync(bio);
struct bio_list bl;
unsigned long flags;
+ mdk_rdev_t *blocked_rdev;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
@@ -879,17 +880,23 @@ static int make_request(struct request_queue *q, struct bio * bio)
/*
* WRITE:
*/
- /* first select target devices under spinlock and
+ /* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
*/
raid10_find_phys(conf, r10_bio);
+ retry_write:
+ blocked_rdev = 0;
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
- if (rdev &&
- !test_bit(Faulty, &rdev->flags)) {
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else {
@@ -899,6 +906,22 @@ static int make_request(struct request_queue *q, struct bio * bio)
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Have to wait for this device to get unblocked, then retry */
+ int j;
+ int d;
+
+ for (j = 0; j < i; j++)
+ if (r10_bio->devs[j].bio) {
+ d = r10_bio->devs[j].devnum;
+ rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+ }
+ allow_barrier(conf);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
atomic_set(&r10_bio->remaining, 0);
bio_list_init(&bl);
@@ -1001,8 +1024,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
- " Operation continuing on %d devices\n",
+ printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n"
+ "raid10: Operation continuing on %d devices.\n",
bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b162b839a66..087eee0cb80 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -63,6 +63,7 @@
#define STRIPE_SHIFT (PAGE_SHIFT - 9)
#define STRIPE_SECTORS (STRIPE_SIZE>>9)
#define IO_THRESHOLD 1
+#define BYPASS_THRESHOLD 1
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
#define HASH_MASK (NR_HASH - 1)
@@ -398,6 +399,7 @@ static void ops_run_io(struct stripe_head *sh)
might_sleep();
+ set_bit(STRIPE_IO_STARTED, &sh->state);
for (i = disks; i--; ) {
int rw;
struct bio *bi;
@@ -433,7 +435,7 @@ static void ops_run_io(struct stripe_head *sh)
bi->bi_bdev = rdev->bdev;
pr_debug("%s: for %llu schedule op %ld on disc %d\n",
- __FUNCTION__, (unsigned long long)sh->sector,
+ __func__, (unsigned long long)sh->sector,
bi->bi_rw, i);
atomic_inc(&sh->count);
bi->bi_sector = sh->sector + rdev->data_offset;
@@ -520,7 +522,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
raid5_conf_t *conf = sh->raid_conf;
int i;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
/* clear completed biofills */
@@ -569,7 +571,7 @@ static void ops_run_biofill(struct stripe_head *sh)
raid5_conf_t *conf = sh->raid_conf;
int i;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = sh->disks; i--; ) {
@@ -600,7 +602,7 @@ static void ops_complete_compute5(void *stripe_head_ref)
int target = sh->ops.target;
struct r5dev *tgt = &sh->dev[target];
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
set_bit(R5_UPTODATE, &tgt->flags);
@@ -625,7 +627,7 @@ ops_run_compute5(struct stripe_head *sh, unsigned long pending)
int i;
pr_debug("%s: stripe %llu block: %d\n",
- __FUNCTION__, (unsigned long long)sh->sector, target);
+ __func__, (unsigned long long)sh->sector, target);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
for (i = disks; i--; )
@@ -653,7 +655,7 @@ static void ops_complete_prexor(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
@@ -670,7 +672,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
/* existing parity data subtracted */
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
@@ -699,7 +701,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
*/
int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
@@ -744,7 +746,7 @@ static void ops_complete_postxor(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
@@ -757,7 +759,7 @@ static void ops_complete_write(void *stripe_head_ref)
struct stripe_head *sh = stripe_head_ref;
int disks = sh->disks, i, pd_idx = sh->pd_idx;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
@@ -787,7 +789,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
unsigned long flags;
dma_async_tx_callback callback;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
/* check if prexor is active which means only process blocks
@@ -837,7 +839,7 @@ static void ops_complete_check(void *stripe_head_ref)
struct stripe_head *sh = stripe_head_ref;
int pd_idx = sh->pd_idx;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&
@@ -859,7 +861,7 @@ static void ops_run_check(struct stripe_head *sh)
int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
- pr_debug("%s: stripe %llu\n", __FUNCTION__,
+ pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
@@ -1260,8 +1262,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
}
set_bit(Faulty, &rdev->flags);
printk (KERN_ALERT
- "raid5: Disk failure on %s, disabling device."
- " Operation continuing on %d devices\n",
+ "raid5: Disk failure on %s, disabling device.\n"
+ "raid5: Operation continuing on %d devices.\n",
bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
}
@@ -1720,6 +1722,9 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
locked++;
}
}
+ if (locked + 1 == disks)
+ if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
+ atomic_inc(&sh->raid_conf->pending_full_writes);
} else {
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
@@ -1759,7 +1764,7 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
locked++;
pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
- __FUNCTION__, (unsigned long long)sh->sector,
+ __func__, (unsigned long long)sh->sector,
locked, sh->ops.pending);
return locked;
@@ -1947,6 +1952,9 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
STRIPE_SECTORS, 0, 0);
}
+ if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
+ if (atomic_dec_and_test(&conf->pending_full_writes))
+ md_wakeup_thread(conf->mddev->thread);
}
/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
@@ -2149,6 +2157,10 @@ static void handle_completed_write_requests(raid5_conf_t *conf,
0);
}
}
+
+ if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
+ if (atomic_dec_and_test(&conf->pending_full_writes))
+ md_wakeup_thread(conf->mddev->thread);
}
static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
@@ -2333,6 +2345,9 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
s->locked++;
set_bit(R5_Wantwrite, &sh->dev[i].flags);
}
+ if (s->locked == disks)
+ if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
+ atomic_inc(&conf->pending_full_writes);
/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
set_bit(STRIPE_INSYNC, &sh->state);
@@ -2592,6 +2607,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
}
}
+
/*
* handle_stripe - do things to a stripe.
*
@@ -2617,6 +2633,7 @@ static void handle_stripe5(struct stripe_head *sh)
struct stripe_head_state s;
struct r5dev *dev;
unsigned long pending = 0;
+ mdk_rdev_t *blocked_rdev = NULL;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2676,6 +2693,11 @@ static void handle_stripe5(struct stripe_head *sh)
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ break;
+ }
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -2690,6 +2712,11 @@ static void handle_stripe5(struct stripe_head *sh)
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+
if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
sh->ops.count++;
@@ -2879,8 +2906,13 @@ static void handle_stripe5(struct stripe_head *sh)
if (sh->ops.count)
pending = get_stripe_work(sh);
+ unlock:
spin_unlock(&sh->lock);
+ /* wait for this device to become unblocked */
+ if (unlikely(blocked_rdev))
+ md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+
if (pending)
raid5_run_ops(sh, pending);
@@ -2897,6 +2929,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
struct stripe_head_state s;
struct r6_state r6s;
struct r5dev *dev, *pdev, *qdev;
+ mdk_rdev_t *blocked_rdev = NULL;
r6s.qd_idx = raid6_next_disk(pd_idx, disks);
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
@@ -2960,6 +2993,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ break;
+ }
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -2974,6 +3012,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
set_bit(R5_Insync, &dev->flags);
}
rcu_read_unlock();
+
+ if (unlikely(blocked_rdev)) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3079,8 +3122,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
handle_stripe_expansion(conf, sh, &r6s);
+ unlock:
spin_unlock(&sh->lock);
+ /* wait for this device to become unblocked */
+ if (unlikely(blocked_rdev))
+ md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+
return_io(return_bi);
for (i=disks; i-- ;) {
@@ -3094,6 +3142,8 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
else
continue;
+ set_bit(STRIPE_IO_STARTED, &sh->state);
+
bi = &sh->dev[i].req;
bi->bi_rw = rw;
@@ -3164,7 +3214,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
clear_bit(STRIPE_DELAYED, &sh->state);
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
- list_add_tail(&sh->lru, &conf->handle_list);
+ list_add_tail(&sh->lru, &conf->hold_list);
}
} else
blk_plug_device(conf->mddev->queue);
@@ -3442,6 +3492,58 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
}
}
+/* __get_priority_stripe - get the next stripe to process
+ *
+ * Full stripe writes are allowed to pass preread active stripes up until
+ * the bypass_threshold is exceeded. In general the bypass_count
+ * increments when the handle_list is handled before the hold_list; however, it
+ * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a
+ * stripe with in flight i/o. The bypass_count will be reset when the
+ * head of the hold_list has changed, i.e. the head was promoted to the
+ * handle_list.
+ */
+static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf)
+{
+ struct stripe_head *sh;
+
+ pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
+ __func__,
+ list_empty(&conf->handle_list) ? "empty" : "busy",
+ list_empty(&conf->hold_list) ? "empty" : "busy",
+ atomic_read(&conf->pending_full_writes), conf->bypass_count);
+
+ if (!list_empty(&conf->handle_list)) {
+ sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
+
+ if (list_empty(&conf->hold_list))
+ conf->bypass_count = 0;
+ else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
+ if (conf->hold_list.next == conf->last_hold)
+ conf->bypass_count++;
+ else {
+ conf->last_hold = conf->hold_list.next;
+ conf->bypass_count -= conf->bypass_threshold;
+ if (conf->bypass_count < 0)
+ conf->bypass_count = 0;
+ }
+ }
+ } else if (!list_empty(&conf->hold_list) &&
+ ((conf->bypass_threshold &&
+ conf->bypass_count > conf->bypass_threshold) ||
+ atomic_read(&conf->pending_full_writes) == 0)) {
+ sh = list_entry(conf->hold_list.next,
+ typeof(*sh), lru);
+ conf->bypass_count -= conf->bypass_threshold;
+ if (conf->bypass_count < 0)
+ conf->bypass_count = 0;
+ } else
+ return NULL;
+
+ list_del_init(&sh->lru);
+ atomic_inc(&sh->count);
+ BUG_ON(atomic_read(&sh->count) != 1);
+ return sh;
+}
static int make_request(struct request_queue *q, struct bio * bi)
{
@@ -3914,7 +4016,6 @@ static void raid5d(mddev_t *mddev)
handled = 0;
spin_lock_irq(&conf->device_lock);
while (1) {
- struct list_head *first;
struct bio *bio;
if (conf->seq_flush != conf->seq_write) {
@@ -3936,17 +4037,12 @@ static void raid5d(mddev_t *mddev)
handled++;
}
- if (list_empty(&conf->handle_list)) {
+ sh = __get_priority_stripe(conf);
+
+ if (!sh) {
async_tx_issue_pending_all();
break;
}
-
- first = conf->handle_list.next;
- sh = list_entry(first, struct stripe_head, lru);
-
- list_del_init(first);
- atomic_inc(&sh->count);
- BUG_ON(atomic_read(&sh->count)!= 1);
spin_unlock_irq(&conf->device_lock);
handled++;
@@ -3978,15 +4074,13 @@ static ssize_t
raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
- char *end;
- int new;
+ unsigned long new;
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
return -ENODEV;
- new = simple_strtoul(page, &end, 10);
- if (!*page || (*end && *end != '\n') )
+ if (strict_strtoul(page, 10, &new))
return -EINVAL;
if (new <= 16 || new > 32768)
return -EINVAL;
@@ -4011,6 +4105,40 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
raid5_store_stripe_cache_size);
static ssize_t
+raid5_show_preread_threshold(mddev_t *mddev, char *page)
+{
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+ if (conf)
+ return sprintf(page, "%d\n", conf->bypass_threshold);
+ else
+ return 0;
+}
+
+static ssize_t
+raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len)
+{
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+ unsigned long new;
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+ if (!conf)
+ return -ENODEV;
+
+ if (strict_strtoul(page, 10, &new))
+ return -EINVAL;
+ if (new > conf->max_nr_stripes)
+ return -EINVAL;
+ conf->bypass_threshold = new;
+ return len;
+}
+
+static struct md_sysfs_entry
+raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
+ S_IRUGO | S_IWUSR,
+ raid5_show_preread_threshold,
+ raid5_store_preread_threshold);
+
+static ssize_t
stripe_cache_active_show(mddev_t *mddev, char *page)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -4026,6 +4154,7 @@ raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
static struct attribute *raid5_attrs[] = {
&raid5_stripecache_size.attr,
&raid5_stripecache_active.attr,
+ &raid5_preread_bypass_threshold.attr,
NULL,
};
static struct attribute_group raid5_attrs_group = {
@@ -4130,12 +4259,14 @@ static int run(mddev_t *mddev)
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list);
+ INIT_LIST_HEAD(&conf->hold_list);
INIT_LIST_HEAD(&conf->delayed_list);
INIT_LIST_HEAD(&conf->bitmap_list);
INIT_LIST_HEAD(&conf->inactive_list);
atomic_set(&conf->active_stripes, 0);
atomic_set(&conf->preread_active_stripes, 0);
atomic_set(&conf->active_aligned_reads, 0);
+ conf->bypass_threshold = BYPASS_THRESHOLD;
pr_debug("raid5: run(%s) called.\n", mdname(mddev));
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
index 77a6e4bf503..21987e3dbe6 100644
--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -121,7 +121,8 @@ int __init raid6_select_algo(void)
j0 = jiffies;
while ( (j1 = jiffies) == j0 )
cpu_relax();
- while ( (jiffies-j1) < (1 << RAID6_TIME_JIFFIES_LG2) ) {
+ while (time_before(jiffies,
+ j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
perf++;
}