X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Fbuffer.c;h=6f0bddddcf4aadb69c6b1de52eedb97e5735bba6;hb=4b11111aba6c80cc2969fd1806d2a869bfc9f357;hp=1f157749041773e9e0e467a72439e190f1952cc5;hpb=89e107877b65bf6eff1d63a1302dee9a091586f5;p=linux-2.6 diff --git a/fs/buffer.c b/fs/buffer.c index 1f15774904..6f0bddddcf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -67,14 +67,14 @@ static int sync_buffer(void *word) return 0; } -void fastcall __lock_buffer(struct buffer_head *bh) +void __lock_buffer(struct buffer_head *bh) { wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__lock_buffer); -void fastcall unlock_buffer(struct buffer_head *bh) +void unlock_buffer(struct buffer_head *bh) { smp_mb__before_clear_bit(); clear_buffer_locked(bh); @@ -710,6 +710,8 @@ static int __set_page_dirty(struct page *page, if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); + __inc_bdi_stat(mapping->backing_dev_info, + BDI_RECLAIMABLE); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, @@ -1162,7 +1164,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, * mapping->tree_lock and the global inode_lock. */ -void fastcall mark_buffer_dirty(struct buffer_head *bh) +void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) @@ -1434,6 +1436,7 @@ void invalidate_bh_lrus(void) { on_each_cpu(invalidate_bh_lru, NULL, 1, 1); } +EXPORT_SYMBOL_GPL(invalidate_bh_lrus); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) @@ -1728,7 +1731,6 @@ done: * The page and buffer_heads can be released at any time from * here on. */ - wbc->pages_skipped++; /* We didn't write this page */ } return err; @@ -1797,7 +1799,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) start = max(from, block_start); size = min(to, block_end) - start; - zero_user_page(page, start, size, KM_USER0); + zero_user(page, start, size); set_buffer_uptodate(bh); } @@ -1860,19 +1862,10 @@ static int __block_prepare_write(struct inode *inode, struct page *page, mark_buffer_dirty(bh); continue; } - if (block_end > to || block_start < from) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_end > to) - memset(kaddr+to, 0, - block_end-to); - if (block_start < from) - memset(kaddr+block_start, - 0, from-block_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } + if (block_end > to || block_start < from) + zero_user_segments(page, + to, block_end, + block_start, from); continue; } } @@ -2103,8 +2096,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) SetPageError(page); } if (!buffer_mapped(bh)) { - zero_user_page(page, i * blocksize, blocksize, - KM_USER0); + zero_user(page, i * blocksize, blocksize); if (!err) set_buffer_uptodate(bh); continue; @@ -2189,25 +2181,6 @@ out: return err; } -int generic_cont_expand(struct inode *inode, loff_t size) -{ - unsigned int offset; - - offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ - - /* ugh. in prepare/commit_write, if from==to==start of block, we - * skip the prepare. make sure we never send an offset for the start - * of a block. - * XXX: actually, this should be handled in those filesystems by - * checking for the AOP_FLAG_CONT_EXPAND flag. - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - /* caller must handle this extra byte. */ - size++; - } - return generic_cont_expand_simple(inode, size); -} - int cont_expand_zero(struct file *file, struct address_space *mapping, loff_t pos, loff_t *bytes) { @@ -2236,7 +2209,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, &page, &fsdata); if (err) goto out; - zero_user_page(page, zerofrom, len, KM_USER0); + zero_user(page, zerofrom, len); err = pagecache_write_end(file, mapping, curpos, len, len, page, fsdata); if (err < 0) @@ -2263,7 +2236,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, &page, &fsdata); if (err) goto out; - zero_user_page(page, zerofrom, len, KM_USER0); + zero_user(page, zerofrom, len); err = pagecache_write_end(file, mapping, curpos, len, len, page, fsdata); if (err < 0) @@ -2388,7 +2361,7 @@ out_unlock: } /* - * nobh_prepare_write()'s prereads are special: the buffer_heads are freed + * nobh_write_begin()'s prereads are special: the buffer_heads are freed * immediately, while under the page lock. So it needs a special end_io * handler which does not touch the bh after unlocking it. */ @@ -2397,27 +2370,70 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) __end_buffer_read_notouch(bh, uptodate); } +/* + * Attach the singly-linked list of buffers created by nobh_write_begin, to + * the page (converting it to circular linked list and taking care of page + * dirty races). + */ +static void attach_nobh_buffers(struct page *page, struct buffer_head *head) +{ + struct buffer_head *bh; + + BUG_ON(!PageLocked(page)); + + spin_lock(&page->mapping->private_lock); + bh = head; + do { + if (PageDirty(page)) + set_buffer_dirty(bh); + if (!bh->b_this_page) + bh->b_this_page = head; + bh = bh->b_this_page; + } while (bh != head); + attach_page_buffers(page, head); + spin_unlock(&page->mapping->private_lock); +} + /* * On entry, the page is fully not uptodate. * On exit the page is fully uptodate in the areas outside (from,to) */ -int nobh_prepare_write(struct page *page, unsigned from, unsigned to, +int nobh_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, get_block_t *get_block) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocksize = 1 << blkbits; struct buffer_head *head, *bh; + struct page *page; + pgoff_t index; + unsigned from, to; unsigned block_in_page; unsigned block_start, block_end; sector_t block_in_file; - char *kaddr; int nr_reads = 0; int ret = 0; int is_mapped_to_disk = 1; - if (page_has_buffers(page)) - return block_prepare_write(page, from, to, get_block); + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + *fsdata = NULL; + + if (page_has_buffers(page)) { + unlock_page(page); + page_cache_release(page); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, get_block); + } if (PageMappedToDisk(page)) return 0; @@ -2432,8 +2448,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, * than the circular one we're used to. */ head = alloc_page_buffers(page, blocksize, 0); - if (!head) - return -ENOMEM; + if (!head) { + ret = -ENOMEM; + goto out_release; + } block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); @@ -2465,13 +2483,8 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, continue; } if (buffer_new(bh) || !buffer_mapped(bh)) { - kaddr = kmap_atomic(page, KM_USER0); - if (block_start < from) - memset(kaddr+block_start, 0, from-block_start); - if (block_end > to) - memset(kaddr + to, 0, block_end - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_segments(page, block_start, from, + to, block_end); continue; } if (buffer_uptodate(bh)) @@ -2502,15 +2515,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, if (is_mapped_to_disk) SetPageMappedToDisk(page); - do { - bh = head; - head = head->b_this_page; - free_buffer_head(bh); - } while (head); + *fsdata = head; /* to be released by nobh_write_end */ return 0; failed: + BUG_ON(!ret); /* * Error recovery is a bit difficult. We need to zero out blocks that * were newly allocated, and dirty them to ensure they get written out. @@ -2518,64 +2528,56 @@ failed: * the handling of potential IO errors during writeout would be hard * (could try doing synchronous writeout, but what if that fails too?) */ - spin_lock(&page->mapping->private_lock); - bh = head; - block_start = 0; - do { - if (PageUptodate(page)) - set_buffer_uptodate(bh); - if (PageDirty(page)) - set_buffer_dirty(bh); + attach_nobh_buffers(page, head); + page_zero_new_buffers(page, from, to); - block_end = block_start+blocksize; - if (block_end <= from) - goto next; - if (block_start >= to) - goto next; +out_release: + unlock_page(page); + page_cache_release(page); + *pagep = NULL; - if (buffer_new(bh)) { - clear_buffer_new(bh); - if (!buffer_uptodate(bh)) { - zero_user_page(page, block_start, bh->b_size, KM_USER0); - set_buffer_uptodate(bh); - } - mark_buffer_dirty(bh); - } -next: - block_start = block_end; - if (!bh->b_this_page) - bh->b_this_page = head; - bh = bh->b_this_page; - } while (bh != head); - attach_page_buffers(page, head); - spin_unlock(&page->mapping->private_lock); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret; } -EXPORT_SYMBOL(nobh_prepare_write); +EXPORT_SYMBOL(nobh_write_begin); -/* - * Make sure any changes to nobh_commit_write() are reflected in - * nobh_truncate_page(), since it doesn't call commit_write(). - */ -int nobh_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +int nobh_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct buffer_head *head = fsdata; + struct buffer_head *bh; - if (page_has_buffers(page)) - return generic_commit_write(file, page, from, to); + if (!PageMappedToDisk(page)) { + if (unlikely(copied < len) && !page_has_buffers(page)) + attach_nobh_buffers(page, head); + if (page_has_buffers(page)) + return generic_write_end(file, mapping, pos, len, + copied, page, fsdata); + } SetPageUptodate(page); set_page_dirty(page); - if (pos > inode->i_size) { - i_size_write(inode, pos); + if (pos+copied > inode->i_size) { + i_size_write(inode, pos+copied); mark_inode_dirty(inode); } - return 0; + + unlock_page(page); + page_cache_release(page); + + while (head) { + bh = head; + head = head->b_this_page; + free_buffer_head(bh); + } + + return copied; } -EXPORT_SYMBOL(nobh_commit_write); +EXPORT_SYMBOL(nobh_write_end); /* * nobh_writepage() - based on block_full_write_page() except @@ -2619,7 +2621,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) @@ -2628,44 +2630,79 @@ out: } EXPORT_SYMBOL(nobh_writepage); -/* - * This function assumes that ->prepare_write() uses nobh_prepare_write(). - */ -int nobh_truncate_page(struct address_space *mapping, loff_t from) +int nobh_truncate_page(struct address_space *mapping, + loff_t from, get_block_t *get_block) { - struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned to; + unsigned blocksize; + sector_t iblock; + unsigned length, pos; + struct inode *inode = mapping->host; struct page *page; - const struct address_space_operations *a_ops = mapping->a_ops; - int ret = 0; + struct buffer_head map_bh; + int err; - if ((offset & (blocksize - 1)) == 0) - goto out; + blocksize = 1 << inode->i_blkbits; + length = offset & (blocksize - 1); + + /* Block boundary? Nothing to do */ + if (!length) + return 0; + + length = blocksize - length; + iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ret = -ENOMEM; page = grab_cache_page(mapping, index); + err = -ENOMEM; if (!page) goto out; - to = (offset + blocksize) & ~(blocksize - 1); - ret = a_ops->prepare_write(NULL, page, offset, to); - if (ret == 0) { - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, - KM_USER0); - /* - * It would be more correct to call aops->commit_write() - * here, but this is more efficient. - */ - SetPageUptodate(page); - set_page_dirty(page); + if (page_has_buffers(page)) { +has_buffers: + unlock_page(page); + page_cache_release(page); + return block_truncate_page(mapping, from, get_block); + } + + /* Find the buffer that contains "offset" */ + pos = blocksize; + while (offset >= pos) { + iblock++; + pos += blocksize; + } + + err = get_block(inode, iblock, &map_bh, 0); + if (err) + goto unlock; + /* unmapped? It's a hole - nothing to do */ + if (!buffer_mapped(&map_bh)) + goto unlock; + + /* Ok, it's mapped. Make sure it's up-to-date */ + if (!PageUptodate(page)) { + err = mapping->a_ops->readpage(NULL, page); + if (err) { + page_cache_release(page); + goto out; + } + lock_page(page); + if (!PageUptodate(page)) { + err = -EIO; + goto unlock; + } + if (page_has_buffers(page)) + goto has_buffers; } + zero_user(page, offset, length); + set_page_dirty(page); + err = 0; + +unlock: unlock_page(page); page_cache_release(page); out: - return ret; + return err; } EXPORT_SYMBOL(nobh_truncate_page); @@ -2733,7 +2770,7 @@ int block_truncate_page(struct address_space *mapping, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); mark_buffer_dirty(bh); err = 0; @@ -2779,7 +2816,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); return __block_write_full_page(inode, page, get_block, wbc); } @@ -3117,7 +3154,8 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_alloc(bh_cachep, + set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; @@ -3160,12 +3198,68 @@ static int buffer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } +/** + * bh_uptodate_or_lock: Test whether the buffer is uptodate + * @bh: struct buffer_head + * + * Return true if the buffer is up-to-date and false, + * with the buffer locked, if not. + */ +int bh_uptodate_or_lock(struct buffer_head *bh) +{ + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) + return 0; + unlock_buffer(bh); + } + return 1; +} +EXPORT_SYMBOL(bh_uptodate_or_lock); + +/** + * bh_submit_read: Submit a locked buffer for reading + * @bh: struct buffer_head + * + * Returns zero on success and -EIO on error. + */ +int bh_submit_read(struct buffer_head *bh) +{ + BUG_ON(!buffer_locked(bh)); + + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + return 0; + } + + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) + return 0; + return -EIO; +} +EXPORT_SYMBOL(bh_submit_read); + +static void +init_buffer_head(struct kmem_cache *cachep, void *data) +{ + struct buffer_head *bh = data; + + memset(bh, 0, sizeof(*bh)); + INIT_LIST_HEAD(&bh->b_assoc_buffers); +} + void __init buffer_init(void) { int nrpages; - bh_cachep = KMEM_CACHE(buffer_head, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); + bh_cachep = kmem_cache_create("buffer_head", + sizeof(struct buffer_head), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), + init_buffer_head); /* * Limit the bh occupancy to 10% of ZONE_NORMAL @@ -3192,7 +3286,6 @@ EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_commit_write); -EXPORT_SYMBOL(generic_cont_expand); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev);