struct dm_io {
struct mapped_device *md;
int error;
- struct bio *bio;
atomic_t io_count;
+ struct bio *bio;
unsigned long start_time;
};
#define DMF_DELETING 4
#define DMF_NOFLUSH_SUSPENDING 5
+/*
+ * Work processed by per-device workqueue.
+ */
+struct dm_wq_req {
+ enum {
+ DM_WQ_FLUSH_ALL,
+ DM_WQ_FLUSH_DEFERRED,
+ } type;
+ struct work_struct work;
+ struct mapped_device *md;
+ void *context;
+};
+
struct mapped_device {
struct rw_semaphore io_lock;
struct mutex suspend_lock;
struct bio_list deferred;
struct bio_list pushback;
+ /*
+ * Processing queue (flush/barriers)
+ */
+ struct workqueue_struct *wq;
+
/*
* The current mapping.
*/
dm_target_init,
dm_linear_init,
dm_stripe_init,
+ dm_kcopyd_init,
dm_interface_init,
};
dm_target_exit,
dm_linear_exit,
dm_stripe_exit,
+ dm_kcopyd_exit,
dm_interface_exit,
};
* CRUD END
*---------------------------------------------------------------*/
+static int dm_merge_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
+{
+ struct mapped_device *md = q->queuedata;
+ struct dm_table *map = dm_get_table(md);
+ struct dm_target *ti;
+ sector_t max_sectors;
+ int max_size;
+
+ if (unlikely(!map))
+ return 0;
+
+ ti = dm_table_find_target(map, bvm->bi_sector);
+
+ /*
+ * Find maximum amount of I/O that won't need splitting
+ */
+ max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
+ (sector_t) BIO_MAX_SECTORS);
+ max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
+ if (max_size < 0)
+ max_size = 0;
+
+ /*
+ * merge_bvec_fn() returns number of bytes
+ * it can accept at this offset
+ * max is precomputed maximal io size
+ */
+ if (max_size && ti->type->merge)
+ max_size = ti->type->merge(ti, bvm, biovec, max_size);
+
+ /*
+ * Always allow an entire first page
+ */
+ if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
+ max_size = biovec->bv_len;
+
+ dm_table_put(map);
+
+ return max_size;
+}
+
/*
* The request function that just remaps the bio built up by
* dm_merge_bvec.
/*
* See if the device with a specific minor # is free.
*/
-static int specific_minor(struct mapped_device *md, int minor)
+static int specific_minor(int minor)
{
int r, m;
return r;
}
-static int next_free_minor(struct mapped_device *md, int *minor)
+static int next_free_minor(int *minor)
{
int r, m;
spin_lock(&_minor_lock);
r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
- if (r) {
+ if (r)
goto out;
- }
if (m >= (1 << MINORBITS)) {
idr_remove(&_minor_idr, m);
static struct mapped_device *alloc_dev(int minor)
{
int r;
- struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+ struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
void *old_md;
if (!md) {
/* get a minor number for the dev */
if (minor == DM_ANY_MINOR)
- r = next_free_minor(md, &minor);
+ r = next_free_minor(&minor);
else
- r = specific_minor(md, minor);
+ r = specific_minor(minor);
if (r < 0)
goto bad_minor;
- memset(md, 0, sizeof(*md));
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
spin_lock_init(&md->pushback_lock);
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
+ blk_queue_merge_bvec(md->queue, dm_merge_bvec);
md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
if (!md->io_pool)
add_disk(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
+ md->wq = create_singlethread_workqueue("kdmflush");
+ if (!md->wq)
+ goto bad_thread;
+
/* Populate the mapping, nobody knows we exist yet */
spin_lock(&_minor_lock);
old_md = idr_replace(&_minor_idr, md, minor);
return md;
+bad_thread:
+ put_disk(md->disk);
bad_disk:
bioset_free(md->bs);
bad_no_bioset:
unlock_fs(md);
bdput(md->suspended_bdev);
}
+ destroy_workqueue(md->wq);
mempool_destroy(md->tio_pool);
mempool_destroy(md->io_pool);
bioset_free(md->bs);
}
EXPORT_SYMBOL_GPL(dm_put);
+static int dm_wait_for_completion(struct mapped_device *md)
+{
+ int r = 0;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ smp_mb();
+ if (!atomic_read(&md->pending))
+ break;
+
+ if (signal_pending(current)) {
+ r = -EINTR;
+ break;
+ }
+
+ io_schedule();
+ }
+ set_current_state(TASK_RUNNING);
+
+ return r;
+}
+
/*
* Process the deferred bios
*/
spin_unlock_irqrestore(&md->pushback_lock, flags);
}
+static void dm_wq_work(struct work_struct *work)
+{
+ struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
+ struct mapped_device *md = req->md;
+
+ down_write(&md->io_lock);
+ switch (req->type) {
+ case DM_WQ_FLUSH_ALL:
+ __merge_pushback_list(md);
+ /* pass through */
+ case DM_WQ_FLUSH_DEFERRED:
+ __flush_deferred_io(md);
+ break;
+ default:
+ DMERR("dm_wq_work: unrecognised work type %d", req->type);
+ BUG();
+ }
+ up_write(&md->io_lock);
+}
+
+static void dm_wq_queue(struct mapped_device *md, int type, void *context,
+ struct dm_wq_req *req)
+{
+ req->type = type;
+ req->md = md;
+ req->context = context;
+ INIT_WORK(&req->work, dm_wq_work);
+ queue_work(md->wq, &req->work);
+}
+
+static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+{
+ struct dm_wq_req req;
+
+ dm_wq_queue(md, type, context, &req);
+ flush_workqueue(md->wq);
+}
+
/*
* Swap in a new table (destroying old one).
*/
{
struct dm_table *map = NULL;
DECLARE_WAITQUEUE(wait, current);
- int pending, r = 0;
+ int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
dm_table_unplug_all(map);
/*
- * Then we wait for the already mapped ios to
- * complete.
+ * Wait for the already-mapped ios to complete.
*/
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- smp_mb();
- pending = atomic_read(&md->pending);
- if (!pending || signal_pending(current))
- break;
-
- io_schedule();
- }
- set_current_state(TASK_RUNNING);
+ r = dm_wait_for_completion(md);
down_write(&md->io_lock);
remove_wait_queue(&md->wait, &wait);
if (noflush)
__merge_pushback_list(md);
+ up_write(&md->io_lock);
/* were we interrupted ? */
- if (pending) {
- __flush_deferred_io(md);
- up_write(&md->io_lock);
+ if (r < 0) {
+ dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
unlock_fs(md);
- r = -EINTR;
goto out; /* pushback list is already flushed, so skip flush */
}
- up_write(&md->io_lock);
dm_table_postsuspend_targets(map);
set_bit(DMF_SUSPENDED, &md->flags);
flush_and_out:
- if (r && noflush) {
+ if (r && noflush)
/*
* Because there may be already I/Os in the pushback list,
* flush them before return.
*/
- down_write(&md->io_lock);
- __merge_pushback_list(md);
- __flush_deferred_io(md);
- up_write(&md->io_lock);
- }
+ dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
out:
if (r && md->suspended_bdev) {
if (r)
goto out;
- down_write(&md->io_lock);
- __flush_deferred_io(md);
- up_write(&md->io_lock);
+ dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
unlock_fs(md);