From: Anton Altaparmakov Date: Tue, 15 Feb 2005 10:08:43 +0000 (+0000) Subject: NTFS: - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking X-Git-Tag: v2.6.13-rc4~89^2~42 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b6ad6c52fe36ab35d0fe28c064f59de2ba670c2a;p=linux-2.6 NTFS: - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist(). This allows us to map runlist fragments with the runlist lock already held without having to drop and reacquire it around the call. Adapt all callers. - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked runlist. This allows us to find runlist elements with the runlist lock already held without having to drop and reacquire it around the call. Adapt all callers. Signed-off-by: Anton Altaparmakov --- diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index b40c334e61..9d42393c16 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -63,6 +63,15 @@ ToDo/Notes: - Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in the creation of the unmapped runlist element for the base attribute extent. + - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking + helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist(). + This allows us to map runlist fragments with the runlist lock already + held without having to drop and reacquire it around the call. Adapt + all callers. + - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked + runlist. This allows us to find runlist elements with the runlist + lock already held without having to drop and reacquire it around the + call. Adapt all callers. 2.1.22 - Many bug and race fixes and error handling improvements. diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 812d53e933..2b4b8b9e87 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -2,7 +2,7 @@ * aops.c - NTFS kernel address space operations and page cache handling. * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -135,7 +135,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) i * rec_size), rec_size); flush_dcache_page(page); kunmap_atomic(addr, KM_BIO_SRC_IRQ); - if (likely(!PageError(page) && page_uptodate)) + if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); } unlock_page(page); @@ -347,11 +347,11 @@ handle_zblock: */ static int ntfs_readpage(struct file *file, struct page *page) { - loff_t i_size; ntfs_inode *ni, *base_ni; u8 *kaddr; ntfs_attr_search_ctx *ctx; MFT_RECORD *mrec; + unsigned long flags; u32 attr_len; int err = 0; @@ -389,9 +389,9 @@ static int ntfs_readpage(struct file *file, struct page *page) * Attribute is resident, implying it is not compressed or encrypted. * This also means the attribute is smaller than an mft record and * hence smaller than a page, so can simply zero out any pages with - * index above 0. We can also do this if the file size is 0. + * index above 0. */ - if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { + if (unlikely(page->index > 0)) { kaddr = kmap_atomic(page, KM_USER0); memset(kaddr, 0, PAGE_CACHE_SIZE); flush_dcache_page(page); @@ -418,9 +418,10 @@ static int ntfs_readpage(struct file *file, struct page *page) if (unlikely(err)) goto put_unm_err_out; attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); - i_size = i_size_read(VFS_I(ni)); - if (unlikely(attr_len > i_size)) - attr_len = i_size; + read_lock_irqsave(&ni->size_lock, flags); + if (unlikely(attr_len > ni->initialized_size)) + attr_len = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); kaddr = kmap_atomic(page, KM_USER0); /* Copy the data to the page. */ memcpy(kaddr, (u8*)ctx->attr + @@ -1247,20 +1248,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc) int err; BUG_ON(!PageLocked(page)); - /* - * If a previous ntfs_truncate() failed, repeat it and abort if it - * fails again. - */ - if (unlikely(NInoTruncateFailed(ni))) { - down_write(&vi->i_alloc_sem); - err = ntfs_truncate(vi); - up_write(&vi->i_alloc_sem); - if (err || NInoTruncateFailed(ni)) { - if (!err) - err = -EIO; - goto err_out; - } - } i_size = i_size_read(vi); /* Is the page fully outside i_size? (truncate in progress) */ if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> @@ -1490,13 +1477,12 @@ static int ntfs_prepare_nonresident_write(struct page *page, read_lock_irqsave(&ni->size_lock, flags); /* - * The first out of bounds block for the allocated size. No need to + * The first out of bounds block for the allocated size. No need to * round up as allocated_size is in multiples of cluster size and the * minimum cluster size is 512 bytes, which is equal to the smallest * blocksize. */ ablock = ni->allocated_size >> blocksize_bits; - i_size = i_size_read(vi); initialized_size = ni->initialized_size; read_unlock_irqrestore(&ni->size_lock, flags); diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 7d668466dc..7a16f7ca76 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1,7 +1,7 @@ /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -30,7 +30,7 @@ #include "types.h" /** - * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode + * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode * @ni: ntfs inode for which to map (part of) a runlist * @vcn: map runlist part containing this vcn * @@ -38,24 +38,23 @@ * * Return 0 on success and -errno on error. * - * Locking: - The runlist must be unlocked on entry and is unlocked on return. - * - This function takes the lock for writing and modifies the runlist. + * Locking: - The runlist must be locked for writing. + * - This function modifies the runlist. */ -int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) +int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) { ntfs_inode *base_ni; - ntfs_attr_search_ctx *ctx; MFT_RECORD *mrec; + ntfs_attr_search_ctx *ctx; + runlist_element *rl; int err = 0; ntfs_debug("Mapping runlist part containing vcn 0x%llx.", (unsigned long long)vcn); - if (!NInoAttr(ni)) base_ni = ni; else base_ni = ni->ext.base_ntfs_ino; - mrec = map_mft_record(base_ni); if (IS_ERR(mrec)) return PTR_ERR(mrec); @@ -66,15 +65,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, vcn, NULL, 0, ctx); - if (unlikely(err)) - goto put_err_out; - - down_write(&ni->runlist.lock); - /* Make sure someone else didn't do the work while we were sleeping. */ - if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= - LCN_RL_NOT_MAPPED)) { - runlist_element *rl; - + if (likely(!err)) { rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr, ni->runlist.rl); if (IS_ERR(rl)) @@ -82,9 +73,6 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) else ni->runlist.rl = rl; } - up_write(&ni->runlist.lock); - -put_err_out: ntfs_attr_put_search_ctx(ctx); err_out: unmap_mft_record(base_ni); @@ -92,17 +80,45 @@ err_out: } /** - * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode - * @ni: ntfs inode describing the runlist to search - * @vcn: vcn to find - * @need_write: if false, lock for reading and if true, lock for writing + * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode + * @ni: ntfs inode for which to map (part of) a runlist + * @vcn: map runlist part containing this vcn + * + * Map the part of a runlist containing the @vcn of the ntfs inode @ni. + * + * Return 0 on success and -errno on error. + * + * Locking: - The runlist must be unlocked on entry and is unlocked on return. + * - This function takes the runlist lock for writing and modifies the + * runlist. + */ +int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) +{ + int err = 0; + + down_write(&ni->runlist.lock); + /* Make sure someone else didn't do the work while we were sleeping. */ + if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= + LCN_RL_NOT_MAPPED)) + err = ntfs_map_runlist_nolock(ni, vcn); + up_write(&ni->runlist.lock); + return err; +} + +/** + * ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode + * @ni: ntfs inode describing the runlist to search + * @vcn: vcn to find + * @write_locked: true if the runlist is locked for writing * * Find the virtual cluster number @vcn in the runlist described by the ntfs * inode @ni and return the address of the runlist element containing the @vcn. - * The runlist is left locked and the caller has to unlock it. If @need_write - * is true, the runlist is locked for writing and if @need_write is false, the - * runlist is locked for reading. In the error case, the runlist is not left - * locked. + * The runlist is left locked and the caller has to unlock it. In the error + * case, the runlist is left in the same locking state as on entry. + * + * Note if @write_locked is FALSE the lock may be dropped inside the function + * so you cannot rely on the runlist still being the same when this function + * returns. * * Note you need to distinguish between the lcn of the returned runlist element * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on @@ -124,28 +140,24 @@ err_out: * true, it is locked for writing. Otherwise is is locked for * reading. */ -runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, - const BOOL need_write) +runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, + const BOOL write_locked) { runlist_element *rl; int err = 0; BOOL is_retry = FALSE; - ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.", + ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", ni->mft_no, (unsigned long long)vcn, - !need_write ? "read" : "writ"); + write_locked ? "write" : "read"); BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); -lock_retry_remap: - if (!need_write) - down_read(&ni->runlist.lock); - else - down_write(&ni->runlist.lock); +retry_remap: rl = ni->runlist.rl; if (likely(rl && vcn >= rl[0].vcn)) { while (likely(rl->length)) { - if (likely(vcn < rl[1].vcn)) { + if (unlikely(vcn < rl[1].vcn)) { if (likely(rl->lcn >= LCN_HOLE)) { ntfs_debug("Done."); return rl; @@ -161,19 +173,23 @@ lock_retry_remap: err = -EIO; } } - if (!need_write) - up_read(&ni->runlist.lock); - else - up_write(&ni->runlist.lock); if (!err && !is_retry) { /* * The @vcn is in an unmapped region, map the runlist and * retry. */ - err = ntfs_map_runlist(ni, vcn); + if (!write_locked) { + up_read(&ni->runlist.lock); + down_write(&ni->runlist.lock); + } + err = ntfs_map_runlist_nolock(ni, vcn); + if (!write_locked) { + up_write(&ni->runlist.lock); + down_read(&ni->runlist.lock); + } if (likely(!err)) { is_retry = TRUE; - goto lock_retry_remap; + goto retry_remap; } /* * -EINVAL and -ENOENT coming from a failed mapping attempt are @@ -184,7 +200,8 @@ lock_retry_remap: err = -EIO; } else if (!err) err = -EIO; - ntfs_error(ni->vol->sb, "Failed with error code %i.", err); + if (err != -ENOENT) + ntfs_error(ni->vol->sb, "Failed with error code %i.", err); return ERR_PTR(err); } diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h index e0c2c6c81b..3eb4516570 100644 --- a/fs/ntfs/attrib.h +++ b/fs/ntfs/attrib.h @@ -60,10 +60,11 @@ typedef struct { ATTR_RECORD *base_attr; } ntfs_attr_search_ctx; +extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn); extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); -extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, - const BOOL need_write); +extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, + const BOOL write_locked); int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, const u32 name_len, const IGNORE_CASE_BOOL ic, diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index 5346596fa8..8db4492b13 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c @@ -849,7 +849,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, total_freed = real_freed = 0; /* This returns with ni->runlist locked for reading on success. */ - rl = ntfs_find_vcn(ni, start_vcn, FALSE); + down_read(&ni->runlist.lock); + rl = ntfs_find_vcn_nolock(ni, start_vcn, FALSE); if (IS_ERR(rl)) { if (!is_rollback) ntfs_error(vol->sb, "Failed to find first runlist " @@ -863,7 +864,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, ntfs_error(vol->sb, "First runlist element has " "invalid lcn, aborting."); err = -EIO; - goto unl_err_out; + goto err_out; } /* Find the starting cluster inside the run that needs freeing. */ delta = start_vcn - rl->vcn; @@ -881,7 +882,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, if (!is_rollback) ntfs_error(vol->sb, "Failed to clear first run " "(error %i), aborting.", err); - goto unl_err_out; + goto err_out; } /* We have freed @to_free real clusters. */ real_freed = to_free; @@ -901,30 +902,15 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, if (unlikely(rl->lcn < LCN_HOLE)) { VCN vcn; - /* - * Attempt to map runlist, dropping runlist lock for - * the duration. - */ + /* Attempt to map runlist. */ vcn = rl->vcn; - up_read(&ni->runlist.lock); - err = ntfs_map_runlist(ni, vcn); - if (err) { - if (!is_rollback) - ntfs_error(vol->sb, "Failed to map " - "runlist fragment."); - if (err == -EINVAL || err == -ENOENT) - err = -EIO; - goto err_out; - } - /* - * This returns with ni->runlist locked for reading on - * success. - */ - rl = ntfs_find_vcn(ni, vcn, FALSE); + rl = ntfs_find_vcn_nolock(ni, vcn, FALSE); if (IS_ERR(rl)) { err = PTR_ERR(rl); if (!is_rollback) - ntfs_error(vol->sb, "Failed to find " + ntfs_error(vol->sb, "Failed to map " + "runlist fragment or " + "failed to find " "subsequent runlist " "element."); goto err_out; @@ -937,7 +923,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, (unsigned long long) rl->lcn); err = -EIO; - goto unl_err_out; + goto err_out; } } /* The number of clusters in this run that need freeing. */ @@ -953,7 +939,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, if (!is_rollback) ntfs_error(vol->sb, "Failed to clear " "subsequent run."); - goto unl_err_out; + goto err_out; } /* We have freed @to_free real clusters. */ real_freed += to_free; @@ -974,9 +960,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, /* We are done. Return the number of actually freed clusters. */ ntfs_debug("Done."); return real_freed; -unl_err_out: - up_read(&ni->runlist.lock); err_out: + up_read(&ni->runlist.lock); if (is_rollback) return err; /* If no real clusters were freed, no need to rollback. */ diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 4e0bf39426..0975d73883 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -287,7 +287,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, } unmap_mft_record(ni); ntfs_error(base_ni->vol->sb, "Found stale extent mft " - "reference! Corrupt file system. " + "reference! Corrupt filesystem. " "Run chkdsk."); return ERR_PTR(-EIO); } @@ -318,7 +318,7 @@ map_err_out: /* Verify the sequence number if it is present. */ if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { ntfs_error(base_ni->vol->sb, "Found stale extent mft " - "reference! Corrupt file system. Run chkdsk."); + "reference! Corrupt filesystem. Run chkdsk."); destroy_ni = TRUE; m = ERR_PTR(-EIO); goto unm_err_out; @@ -1292,19 +1292,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) /* * Determine the last lcn of the mft bitmap. The allocated size of the * mft bitmap cannot be zero so we are ok to do this. - * ntfs_find_vcn() returns the runlist locked on success. */ + down_write(&mftbmp_ni->runlist.lock); read_lock_irqsave(&mftbmp_ni->size_lock, flags); ll = mftbmp_ni->allocated_size; read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); - rl = ntfs_find_vcn(mftbmp_ni, (ll - 1) >> vol->cluster_size_bits, TRUE); + rl = ntfs_find_vcn_nolock(mftbmp_ni, + (ll - 1) >> vol->cluster_size_bits, TRUE); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { + up_write(&mftbmp_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " "cluster of mft bitmap attribute."); - if (!IS_ERR(rl)) { - up_write(&mftbmp_ni->runlist.lock); + if (!IS_ERR(rl)) ret = -EIO; - } else + else ret = PTR_ERR(rl); return ret; } @@ -1428,6 +1429,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) // TODO: Deal with this by moving this extent to a new mft // record or by starting a new extent in a new mft record or by // moving other attributes out of this mft record. + // Note: It will need to be a special mft record and if none of + // those are available it gets rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " "accomodate extended mft bitmap attribute " "extent. Cannot handle this yet."); @@ -1719,19 +1722,20 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) * Determine the preferred allocation location, i.e. the last lcn of * the mft data attribute. The allocated size of the mft data * attribute cannot be zero so we are ok to do this. - * ntfs_find_vcn() returns the runlist locked on success. */ + down_write(&mft_ni->runlist.lock); read_lock_irqsave(&mft_ni->size_lock, flags); ll = mft_ni->allocated_size; read_unlock_irqrestore(&mft_ni->size_lock, flags); - rl = ntfs_find_vcn(mft_ni, (ll - 1) >> vol->cluster_size_bits, TRUE); + rl = ntfs_find_vcn_nolock(mft_ni, (ll - 1) >> vol->cluster_size_bits, + TRUE); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { + up_write(&mft_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " "cluster of mft data attribute."); - if (!IS_ERR(rl)) { - up_write(&mft_ni->runlist.lock); + if (!IS_ERR(rl)) ret = -EIO; - } else + else ret = PTR_ERR(rl); return ret; } @@ -1858,7 +1862,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) // moving other attributes out of this mft record. // Note: Use the special reserved mft records and ensure that // this extent is not required to find the mft record in - // question. + // question. If no free special records left we would need to + // move an existing record away, insert ours in its place, and + // then place the moved record into the newly allocated space + // and we would then need to update all references to this mft + // record appropriately. This is rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " "accomodate extended mft data attribute " "extent. Cannot handle this yet."); @@ -2021,7 +2029,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no, "reports this as corruption, please email " "linux-ntfs-dev@lists.sourceforge.net stating " "that you saw this message and that the " - "modified file system created was corrupt. " + "modified filesystem created was corrupt. " "Thank you."); } /* Set the update sequence number to 1. */