X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Fblock_dev.c;h=7d822fae7765b2bfdf0978e8ecf1296875073e85;hb=f12c03722045882a50c270f6332cf2c7b16a83d2;hp=b3e9bfa748cf99971567913492428524aa847e2b;hpb=64b853aa328f34dd58e4e617cded91e2ddbcac13;p=linux-2.6 diff --git a/fs/block_dev.c b/fs/block_dev.c index b3e9bfa748..7d822fae77 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -31,6 +31,8 @@ struct bdev_inode { struct inode vfs_inode; }; +static const struct address_space_operations def_blk_aops; + static inline struct bdev_inode *BDEV_I(struct inode *inode) { return container_of(inode, struct bdev_inode, vfs_inode); @@ -171,203 +173,6 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, iov, offset, nr_segs, blkdev_get_blocks, NULL); } -#if 0 -static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error) -{ - struct kiocb *iocb = bio->bi_private; - atomic_t *bio_count = &iocb->ki_bio_count; - - if (bio_data_dir(bio) == READ) - bio_check_pages_dirty(bio); - else { - bio_release_pages(bio); - bio_put(bio); - } - - /* iocb->ki_nbytes stores error code from LLDD */ - if (error) - iocb->ki_nbytes = -EIO; - - if (atomic_dec_and_test(bio_count)) { - if ((long)iocb->ki_nbytes < 0) - aio_complete(iocb, iocb->ki_nbytes, 0); - else - aio_complete(iocb, iocb->ki_left, 0); - } - - return 0; -} - -#define VEC_SIZE 16 -struct pvec { - unsigned short nr; - unsigned short idx; - struct page *page[VEC_SIZE]; -}; - -#define PAGES_SPANNED(addr, len) \ - (DIV_ROUND_UP((addr) + (len), PAGE_SIZE) - (addr) / PAGE_SIZE); - -/* - * get page pointer for user addr, we internally cache struct page array for - * (addr, count) range in pvec to avoid frequent call to get_user_pages. If - * internal page list is exhausted, a batch count of up to VEC_SIZE is used - * to get next set of page struct. - */ -static struct page *blk_get_page(unsigned long addr, size_t count, int rw, - struct pvec *pvec) -{ - int ret, nr_pages; - if (pvec->idx == pvec->nr) { - nr_pages = PAGES_SPANNED(addr, count); - nr_pages = min(nr_pages, VEC_SIZE); - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, addr, nr_pages, - rw == READ, 0, pvec->page, NULL); - up_read(¤t->mm->mmap_sem); - if (ret < 0) - return ERR_PTR(ret); - pvec->nr = ret; - pvec->idx = 0; - } - return pvec->page[pvec->idx++]; -} - -/* return a page back to pvec array */ -static void blk_unget_page(struct page *page, struct pvec *pvec) -{ - pvec->page[--pvec->idx] = page; -} - -static ssize_t -blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) -{ - struct inode *inode = iocb->ki_filp->f_mapping->host; - unsigned blkbits = blksize_bits(bdev_hardsect_size(I_BDEV(inode))); - unsigned blocksize_mask = (1 << blkbits) - 1; - unsigned long seg = 0; /* iov segment iterator */ - unsigned long nvec; /* number of bio vec needed */ - unsigned long cur_off; /* offset into current page */ - unsigned long cur_len; /* I/O len of current page, up to PAGE_SIZE */ - - unsigned long addr; /* user iovec address */ - size_t count; /* user iovec len */ - size_t nbytes = iocb->ki_nbytes = iocb->ki_left; /* total xfer size */ - loff_t size; /* size of block device */ - struct bio *bio; - atomic_t *bio_count = &iocb->ki_bio_count; - struct page *page; - struct pvec pvec; - - pvec.nr = 0; - pvec.idx = 0; - - if (pos & blocksize_mask) - return -EINVAL; - - size = i_size_read(inode); - if (pos + nbytes > size) { - nbytes = size - pos; - iocb->ki_left = nbytes; - } - - /* - * check first non-zero iov alignment, the remaining - * iov alignment is checked inside bio loop below. - */ - do { - addr = (unsigned long) iov[seg].iov_base; - count = min(iov[seg].iov_len, nbytes); - if (addr & blocksize_mask || count & blocksize_mask) - return -EINVAL; - } while (!count && ++seg < nr_segs); - atomic_set(bio_count, 1); - - while (nbytes) { - /* roughly estimate number of bio vec needed */ - nvec = (nbytes + PAGE_SIZE - 1) / PAGE_SIZE; - nvec = max(nvec, nr_segs - seg); - nvec = min(nvec, (unsigned long) BIO_MAX_PAGES); - - /* bio_alloc should not fail with GFP_KERNEL flag */ - bio = bio_alloc(GFP_KERNEL, nvec); - bio->bi_bdev = I_BDEV(inode); - bio->bi_end_io = blk_end_aio; - bio->bi_private = iocb; - bio->bi_sector = pos >> blkbits; -same_bio: - cur_off = addr & ~PAGE_MASK; - cur_len = PAGE_SIZE - cur_off; - if (count < cur_len) - cur_len = count; - - page = blk_get_page(addr, count, rw, &pvec); - if (unlikely(IS_ERR(page))) - goto backout; - - if (bio_add_page(bio, page, cur_len, cur_off)) { - pos += cur_len; - addr += cur_len; - count -= cur_len; - nbytes -= cur_len; - - if (count) - goto same_bio; - while (++seg < nr_segs) { - addr = (unsigned long) iov[seg].iov_base; - count = iov[seg].iov_len; - if (!count) - continue; - if (unlikely(addr & blocksize_mask || - count & blocksize_mask)) { - page = ERR_PTR(-EINVAL); - goto backout; - } - count = min(count, nbytes); - goto same_bio; - } - } else { - blk_unget_page(page, &pvec); - } - - /* bio is ready, submit it */ - if (rw == READ) - bio_set_pages_dirty(bio); - atomic_inc(bio_count); - submit_bio(rw, bio); - } - -completion: - iocb->ki_left -= nbytes; - nbytes = iocb->ki_left; - iocb->ki_pos += nbytes; - - blk_run_address_space(inode->i_mapping); - if (atomic_dec_and_test(bio_count)) - aio_complete(iocb, nbytes, 0); - - return -EIOCBQUEUED; - -backout: - /* - * back out nbytes count constructed so far for this bio, - * we will throw away current bio. - */ - nbytes += bio->bi_size; - bio_release_pages(bio); - bio_put(bio); - - /* - * if no bio was submmitted, return the error code. - * otherwise, proceed with pending I/O completion. - */ - if (atomic_read(bio_count) == 1) - return PTR_ERR(page); - goto completion; -} -#endif - static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); @@ -378,14 +183,26 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } -static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, blkdev_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + blkdev_get_block); } -static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - return block_commit_write(page, from, to); + int ret; + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + page_cache_release(page); + + return ret; } /* @@ -453,7 +270,7 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +static void init_once(struct kmem_cache * cachep, void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; @@ -517,12 +334,11 @@ void __init bdev_cache_init(void) bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), - init_once, NULL); + init_once); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); - err = PTR_ERR(bd_mnt); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ @@ -588,12 +404,10 @@ EXPORT_SYMBOL(bdget); long nr_blockdev_pages(void) { - struct list_head *p; + struct block_device *bdev; long ret = 0; spin_lock(&bdev_lock); - list_for_each(p, &all_bdevs) { - struct block_device *bdev; - bdev = list_entry(p, struct block_device, bd_list); + list_for_each_entry(bdev, &all_bdevs, bd_list) { ret += bdev->bd_inode->i_mapping->nrpages; } spin_unlock(&bdev_lock); @@ -728,9 +542,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->kobj); + return kobject_get(&bdev->bd_part->dev.kobj); else - return kobject_get(&bdev->bd_disk->kobj); + return kobject_get(&bdev->bd_disk->dev.kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) @@ -874,7 +688,7 @@ static struct bd_holder *find_bd_holder(struct block_device *bdev, */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { - int ret; + int err; if (!bo) return -EINVAL; @@ -882,15 +696,18 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) if (!bd_holder_grab_dirs(bdev, bo)) return -EBUSY; - ret = add_symlink(bo->sdir, bo->sdev); - if (ret == 0) { - ret = add_symlink(bo->hdir, bo->hdev); - if (ret) - del_symlink(bo->sdir, bo->sdev); + err = add_symlink(bo->sdir, bo->sdev); + if (err) + return err; + + err = add_symlink(bo->hdir, bo->hdev); + if (err) { + del_symlink(bo->sdir, bo->sdev); + return err; } - if (ret == 0) - list_add_tail(&bo->list, &bdev->bd_holder_list); - return ret; + + list_add_tail(&bo->list, &bdev->bd_holder_list); + return 0; } /** @@ -948,7 +765,7 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev, static int bd_claim_by_kobject(struct block_device *bdev, void *holder, struct kobject *kobj) { - int res; + int err; struct bd_holder *bo, *found; if (!kobj) @@ -959,21 +776,24 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, return -ENOMEM; mutex_lock(&bdev->bd_mutex); - res = bd_claim(bdev, holder); - if (res == 0) { - found = find_bd_holder(bdev, bo); - if (found == NULL) { - res = add_bd_holder(bdev, bo); - if (res) - bd_release(bdev); - } - } - if (res || found) - free_bd_holder(bo); - mutex_unlock(&bdev->bd_mutex); + err = bd_claim(bdev, holder); + if (err) + goto fail; - return res; + found = find_bd_holder(bdev, bo); + if (found) + goto fail; + + err = add_bd_holder(bdev, bo); + if (err) + bd_release(bdev); + else + bo = NULL; +fail: + mutex_unlock(&bdev->bd_mutex); + free_bd_holder(bo); + return err; } /** @@ -987,15 +807,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, static void bd_release_from_kobject(struct block_device *bdev, struct kobject *kobj) { - struct bd_holder *bo; - if (!kobj) return; mutex_lock(&bdev->bd_mutex); bd_release(bdev); - if ((bo = del_bd_holder(bdev, kobj))) - free_bd_holder(bo); + free_bd_holder(del_bd_holder(bdev, kobj)); mutex_unlock(&bdev->bd_mutex); } @@ -1163,7 +980,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) ret = -ENXIO; goto out_first; } - kobject_get(&p->kobj); + kobject_get(&p->dev.kobj); bdev->bd_part = p; bd_set_size(bdev, (loff_t) p->nr_sects << 9); } @@ -1286,7 +1103,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) module_put(owner); if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->kobj); + kobject_put(&bdev->bd_part->dev.kobj); bdev->bd_part = NULL; } bdev->bd_disk = NULL; @@ -1322,12 +1139,12 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); } -const struct address_space_operations def_blk_aops = { +static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, .sync_page = block_sync_page, - .prepare_write = blkdev_prepare_write, - .commit_write = blkdev_commit_write, + .write_begin = blkdev_write_begin, + .write_end = blkdev_write_end, .writepages = generic_writepages, .direct_IO = blkdev_direct_IO, }; @@ -1385,19 +1202,19 @@ struct block_device *lookup_bdev(const char *path) if (error) return ERR_PTR(error); - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.mnt->mnt_flags & MNT_NODEV) + if (nd.path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_release(&nd); + path_put(&nd.path); return bdev; fail: bdev = ERR_PTR(error);