2 * Copyright (C) 2003 Sistina Software
3 * Copyright (C) 2006 Red Hat GmbH
5 * This file is released under the GPL.
10 #include <linux/bio.h>
11 #include <linux/mempool.h>
12 #include <linux/module.h>
13 #include <linux/sched.h>
14 #include <linux/slab.h>
16 static struct bio_set *_bios;
23 /* FIXME: can we shrink this ? */
27 struct task_struct *sleeper;
28 struct dm_io_client *client;
29 io_notify_fn callback;
34 * io contexts are only dynamically allocated for asynchronous
35 * io. Since async io is likely to be the majority of io we'll
36 * have the same number of io contexts as bios! (FIXME: must reduce this).
38 static unsigned _num_ios;
39 static mempool_t *_io_pool;
42 * Temporary functions to allow old and new interfaces to co-exist.
44 static struct bio_set *bios(struct dm_io_client *client)
46 return client ? client->bios : _bios;
49 static mempool_t *io_pool(struct dm_io_client *client)
51 return client ? client->pool : _io_pool;
54 static unsigned int pages_to_ios(unsigned int pages)
56 return 4 * pages; /* too many ? */
59 static int resize_pool(unsigned int new_ios)
65 /* free off the pool */
66 mempool_destroy(_io_pool);
72 r = mempool_resize(_io_pool, new_ios, GFP_KERNEL);
77 _io_pool = mempool_create_kmalloc_pool(new_ios,
82 _bios = bioset_create(16, 16);
84 mempool_destroy(_io_pool);
96 int dm_io_get(unsigned int num_pages)
98 return resize_pool(_num_ios + pages_to_ios(num_pages));
101 void dm_io_put(unsigned int num_pages)
103 resize_pool(_num_ios - pages_to_ios(num_pages));
106 /*-----------------------------------------------------------------
107 * We need to keep track of which region a bio is doing io for.
108 * In order to save a memory allocation we store this the last
109 * bvec which we know is unused (blech).
110 * XXX This is ugly and can OOPS with some configs... find another way.
111 *---------------------------------------------------------------*/
112 static inline void bio_set_region(struct bio *bio, unsigned region)
114 bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
117 static inline unsigned bio_get_region(struct bio *bio)
119 return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
122 /*-----------------------------------------------------------------
123 * We need an io object to keep track of the number of bios that
124 * have been dispatched for a particular io.
125 *---------------------------------------------------------------*/
126 static void dec_count(struct io *io, unsigned int region, int error)
129 set_bit(region, &io->error);
131 if (atomic_dec_and_test(&io->count)) {
133 wake_up_process(io->sleeper);
137 io_notify_fn fn = io->callback;
138 void *context = io->context;
140 mempool_free(io, io_pool(io->client));
146 static int endio(struct bio *bio, unsigned int done, int error)
151 /* keep going until we've finished */
155 if (error && bio_data_dir(bio) == READ)
159 * The bio destructor in bio_put() may use the io object.
161 io = bio->bi_private;
162 region = bio_get_region(bio);
167 dec_count(io, region, error);
172 /*-----------------------------------------------------------------
173 * These little objects provide an abstraction for getting a new
174 * destination page for io.
175 *---------------------------------------------------------------*/
177 void (*get_page)(struct dpages *dp,
178 struct page **p, unsigned long *len, unsigned *offset);
179 void (*next_page)(struct dpages *dp);
186 * Functions for getting the pages from a list.
188 static void list_get_page(struct dpages *dp,
189 struct page **p, unsigned long *len, unsigned *offset)
191 unsigned o = dp->context_u;
192 struct page_list *pl = (struct page_list *) dp->context_ptr;
195 *len = PAGE_SIZE - o;
199 static void list_next_page(struct dpages *dp)
201 struct page_list *pl = (struct page_list *) dp->context_ptr;
202 dp->context_ptr = pl->next;
206 static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
208 dp->get_page = list_get_page;
209 dp->next_page = list_next_page;
210 dp->context_u = offset;
211 dp->context_ptr = pl;
215 * Functions for getting the pages from a bvec.
217 static void bvec_get_page(struct dpages *dp,
218 struct page **p, unsigned long *len, unsigned *offset)
220 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
223 *offset = bvec->bv_offset;
226 static void bvec_next_page(struct dpages *dp)
228 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
229 dp->context_ptr = bvec + 1;
232 static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
234 dp->get_page = bvec_get_page;
235 dp->next_page = bvec_next_page;
236 dp->context_ptr = bvec;
239 static void vm_get_page(struct dpages *dp,
240 struct page **p, unsigned long *len, unsigned *offset)
242 *p = vmalloc_to_page(dp->context_ptr);
243 *offset = dp->context_u;
244 *len = PAGE_SIZE - dp->context_u;
247 static void vm_next_page(struct dpages *dp)
249 dp->context_ptr += PAGE_SIZE - dp->context_u;
253 static void vm_dp_init(struct dpages *dp, void *data)
255 dp->get_page = vm_get_page;
256 dp->next_page = vm_next_page;
257 dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
258 dp->context_ptr = data;
261 static void dm_bio_destructor(struct bio *bio)
263 struct io *io = bio->bi_private;
265 bio_free(bio, bios(io->client));
268 /*-----------------------------------------------------------------
269 * IO routines that accept a list of pages.
270 *---------------------------------------------------------------*/
271 static void do_region(int rw, unsigned int region, struct io_region *where,
272 struct dpages *dp, struct io *io)
279 sector_t remaining = where->count;
283 * Allocate a suitably sized-bio: we add an extra
284 * bvec for bio_get/set_region() and decrement bi_max_vecs
285 * to hide it from bio_add_page().
287 num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2;
288 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, bios(io->client));
289 bio->bi_sector = where->sector + (where->count - remaining);
290 bio->bi_bdev = where->bdev;
291 bio->bi_end_io = endio;
292 bio->bi_private = io;
293 bio->bi_destructor = dm_bio_destructor;
295 bio_set_region(bio, region);
298 * Try and add as many pages as possible.
301 dp->get_page(dp, &page, &len, &offset);
302 len = min(len, to_bytes(remaining));
303 if (!bio_add_page(bio, page, len, offset))
307 remaining -= to_sector(len);
311 atomic_inc(&io->count);
316 static void dispatch_io(int rw, unsigned int num_regions,
317 struct io_region *where, struct dpages *dp,
318 struct io *io, int sync)
321 struct dpages old_pages = *dp;
324 rw |= (1 << BIO_RW_SYNC);
327 * For multiple regions we need to be careful to rewind
328 * the dp object for each call to do_region.
330 for (i = 0; i < num_regions; i++) {
333 do_region(rw, i, where + i, dp, io);
337 * Drop the extra reference that we were holding to avoid
338 * the io being completed too early.
343 static int sync_io(struct dm_io_client *client, unsigned int num_regions,
344 struct io_region *where, int rw, struct dpages *dp,
345 unsigned long *error_bits)
349 if (num_regions > 1 && rw != WRITE) {
355 atomic_set(&io.count, 1); /* see dispatch_io() */
356 io.sleeper = current;
359 dispatch_io(rw, num_regions, where, dp, &io, 1);
362 set_current_state(TASK_UNINTERRUPTIBLE);
364 if (!atomic_read(&io.count) || signal_pending(current))
369 set_current_state(TASK_RUNNING);
371 if (atomic_read(&io.count))
375 *error_bits = io.error;
377 return io.error ? -EIO : 0;
380 static int async_io(struct dm_io_client *client, unsigned int num_regions,
381 struct io_region *where, int rw, struct dpages *dp,
382 io_notify_fn fn, void *context)
386 if (num_regions > 1 && rw != WRITE) {
392 io = mempool_alloc(io_pool(client), GFP_NOIO);
394 atomic_set(&io->count, 1); /* see dispatch_io() */
398 io->context = context;
400 dispatch_io(rw, num_regions, where, dp, io, 0);
404 int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
405 struct page_list *pl, unsigned int offset,
406 unsigned long *error_bits)
409 list_dp_init(&dp, pl, offset);
410 return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
413 int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
414 struct bio_vec *bvec, unsigned long *error_bits)
417 bvec_dp_init(&dp, bvec);
418 return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
421 int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
422 void *data, unsigned long *error_bits)
425 vm_dp_init(&dp, data);
426 return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
429 int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
430 struct page_list *pl, unsigned int offset,
431 io_notify_fn fn, void *context)
434 list_dp_init(&dp, pl, offset);
435 return async_io(NULL, num_regions, where, rw, &dp, fn, context);
438 int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
439 struct bio_vec *bvec, io_notify_fn fn, void *context)
442 bvec_dp_init(&dp, bvec);
443 return async_io(NULL, num_regions, where, rw, &dp, fn, context);
446 int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
447 void *data, io_notify_fn fn, void *context)
450 vm_dp_init(&dp, data);
451 return async_io(NULL, num_regions, where, rw, &dp, fn, context);
454 EXPORT_SYMBOL(dm_io_get);
455 EXPORT_SYMBOL(dm_io_put);
456 EXPORT_SYMBOL(dm_io_sync);
457 EXPORT_SYMBOL(dm_io_async);
458 EXPORT_SYMBOL(dm_io_sync_bvec);
459 EXPORT_SYMBOL(dm_io_async_bvec);
460 EXPORT_SYMBOL(dm_io_sync_vm);
461 EXPORT_SYMBOL(dm_io_async_vm);