if (unlikely(page == NULL)) {
if (flags & XBF_READ_AHEAD) {
bp->b_page_count = i;
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
return -ENOMEM;
}
ASSERT(!PagePrivate(page));
if (!PageUptodate(page)) {
page_count--;
- if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
+ if (blocksize >= PAGE_CACHE_SIZE) {
+ if (flags & XBF_READ)
+ bp->b_flags |= _XBF_PAGE_LOCKED;
+ } else if (!PagePrivate(page)) {
if (test_page_region(page, offset, nbytes))
page_count++;
}
}
- unlock_page(page);
bp->b_pages[i] = page;
offset = 0;
}
+ if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
+ }
+
if (page_count == bp->b_page_count)
bp->b_flags |= XBF_DONE;
bp->b_count_desired = len;
bp->b_buffer_length = buflen;
bp->b_flags |= XBF_MAPPED;
+ bp->b_flags &= ~_XBF_PAGE_LOCKED;
return 0;
}
xfs_buf_t *bp,
int schedule)
{
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+ if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
+ bp->b_flags &= ~_XBF_PAGE_LOCKED;
xfs_buf_ioend(bp, schedule);
+ }
}
STATIC void
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
+
+ if (bp->b_flags & _XBF_PAGE_LOCKED)
+ unlock_page(page);
} while (bvec >= bio->bi_io_vec);
_xfs_buf_ioend(bp, 1);
* filesystem block size is not smaller than the page size.
*/
if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
- (bp->b_flags & XBF_READ) &&
+ ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
+ (XBF_READ|_XBF_PAGE_LOCKED)) &&
(blocksize >= PAGE_CACHE_SIZE)) {
bio = bio_alloc(GFP_NOIO, 1);
_XBF_PAGES = (1 << 18), /* backed by refcounted pages */
_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
_XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
+
+ /*
+ * Special flag for supporting metadata blocks smaller than a FSB.
+ *
+ * In this case we can have multiple xfs_buf_t on a single page and
+ * need to lock out concurrent xfs_buf_t readers as they only
+ * serialise access to the buffer.
+ *
+ * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+ * between reads of the page. Hence we can have one thread read the
+ * page and modify it, but then race with another thread that thinks
+ * the page is not up-to-date and hence reads it again.
+ *
+ * The result is that the first modifcation to the page is lost.
+ * This sort of AGF/AGI reading race can happen when unlinking inodes
+ * that require truncation and results in the AGI unlinked list
+ * modifications being lost.
+ */
+ _XBF_PAGE_LOCKED = (1 << 22),
} xfs_buf_flags_t;
typedef enum {