]> err.no Git - linux-2.6/blobdiff - mm/filemap.c
move mm_struct and vm_area_struct
[linux-2.6] / mm / filemap.c
index 5d5449f3d41c83810e002d7fc41cf0cb55b88536..7989c44cb293d09c5c6b306128aceafdb369d311 100644 (file)
@@ -593,7 +593,7 @@ void fastcall __lock_page_nosync(struct page *page)
  * Is there a pagecache struct page at the given (mapping, offset) tuple?
  * If yes, increment its refcount and return it; if no, return NULL.
  */
-struct page * find_get_page(struct address_space *mapping, unsigned long offset)
+struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
 {
        struct page *page;
 
@@ -617,30 +617,31 @@ EXPORT_SYMBOL(find_get_page);
  * Returns zero if the page was not present. find_lock_page() may sleep.
  */
 struct page *find_lock_page(struct address_space *mapping,
-                               unsigned long offset)
+                               pgoff_t offset)
 {
        struct page *page;
 
-       read_lock_irq(&mapping->tree_lock);
 repeat:
+       read_lock_irq(&mapping->tree_lock);
        page = radix_tree_lookup(&mapping->page_tree, offset);
        if (page) {
                page_cache_get(page);
                if (TestSetPageLocked(page)) {
                        read_unlock_irq(&mapping->tree_lock);
                        __lock_page(page);
-                       read_lock_irq(&mapping->tree_lock);
 
                        /* Has the page been truncated while we slept? */
-                       if (unlikely(page->mapping != mapping ||
-                                    page->index != offset)) {
+                       if (unlikely(page->mapping != mapping)) {
                                unlock_page(page);
                                page_cache_release(page);
                                goto repeat;
                        }
+                       VM_BUG_ON(page->index != offset);
+                       goto out;
                }
        }
        read_unlock_irq(&mapping->tree_lock);
+out:
        return page;
 }
 EXPORT_SYMBOL(find_lock_page);
@@ -663,7 +664,7 @@ EXPORT_SYMBOL(find_lock_page);
  * memory exhaustion.
  */
 struct page *find_or_create_page(struct address_space *mapping,
-               unsigned long index, gfp_t gfp_mask)
+               pgoff_t index, gfp_t gfp_mask)
 {
        struct page *page, *cached_page = NULL;
        int err;
@@ -797,7 +798,7 @@ EXPORT_SYMBOL(find_get_pages_tag);
  * and deadlock against the caller's locked page.
  */
 struct page *
-grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
+grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 {
        struct page *page = find_get_page(mapping, index);
 
@@ -859,47 +860,49 @@ static void shrink_readahead_size_eio(struct file *filp,
  * It may be NULL.
  */
 void do_generic_mapping_read(struct address_space *mapping,
-                            struct file_ra_state *_ra,
+                            struct file_ra_state *ra,
                             struct file *filp,
                             loff_t *ppos,
                             read_descriptor_t *desc,
                             read_actor_t actor)
 {
        struct inode *inode = mapping->host;
-       unsigned long index;
-       unsigned long offset;
-       unsigned long last_index;
-       unsigned long next_index;
-       unsigned long prev_index;
+       pgoff_t index;
+       pgoff_t last_index;
+       pgoff_t prev_index;
+       unsigned long offset;      /* offset into pagecache page */
        unsigned int prev_offset;
        struct page *cached_page;
        int error;
-       struct file_ra_state ra = *_ra;
 
        cached_page = NULL;
        index = *ppos >> PAGE_CACHE_SHIFT;
-       next_index = index;
-       prev_index = ra.prev_index;
-       prev_offset = ra.prev_offset;
+       prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
+       prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
        last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
 
        for (;;) {
                struct page *page;
-               unsigned long end_index;
+               pgoff_t end_index;
                loff_t isize;
                unsigned long nr, ret;
 
                cond_resched();
-               if (index == next_index)
-                       next_index = page_cache_readahead(mapping, &ra, filp,
-                                       index, last_index - index);
-
 find_page:
                page = find_get_page(mapping, index);
-               if (unlikely(page == NULL)) {
-                       handle_ra_miss(mapping, &ra, index);
-                       goto no_cached_page;
+               if (!page) {
+                       page_cache_sync_readahead(mapping,
+                                       ra, filp,
+                                       index, last_index - index);
+                       page = find_get_page(mapping, index);
+                       if (unlikely(page == NULL))
+                               goto no_cached_page;
+               }
+               if (PageReadahead(page)) {
+                       page_cache_async_readahead(mapping,
+                                       ra, filp, page,
+                                       index, last_index - index);
                }
                if (!PageUptodate(page))
                        goto page_not_up_to_date;
@@ -961,7 +964,6 @@ page_ok:
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
                prev_offset = offset;
-               ra.prev_offset = offset;
 
                page_cache_release(page);
                if (ret == nr && desc->count)
@@ -1010,7 +1012,7 @@ readpage:
                                }
                                unlock_page(page);
                                error = -EIO;
-                               shrink_readahead_size_eio(filp, &ra);
+                               shrink_readahead_size_eio(filp, ra);
                                goto readpage_error;
                        }
                        unlock_page(page);
@@ -1050,9 +1052,11 @@ no_cached_page:
        }
 
 out:
-       *_ra = ra;
+       ra->prev_pos = prev_index;
+       ra->prev_pos <<= PAGE_CACHE_SHIFT;
+       ra->prev_pos |= prev_offset;
 
-       *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+       *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
        if (cached_page)
                page_cache_release(cached_page);
        if (filp)
@@ -1212,29 +1216,9 @@ out:
 }
 EXPORT_SYMBOL(generic_file_aio_read);
 
-int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
-{
-       ssize_t written;
-       unsigned long count = desc->count;
-       struct file *file = desc->arg.data;
-
-       if (size > count)
-               size = count;
-
-       written = file->f_op->sendpage(file, page, offset,
-                                      size, &file->f_pos, size<count);
-       if (written < 0) {
-               desc->error = written;
-               written = 0;
-       }
-       desc->count = count - written;
-       desc->written += written;
-       return written;
-}
-
 static ssize_t
 do_readahead(struct address_space *mapping, struct file *filp,
-            unsigned long index, unsigned long nr)
+            pgoff_t index, unsigned long nr)
 {
        if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
                return -EINVAL;
@@ -1254,8 +1238,8 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
        if (file) {
                if (file->f_mode & FMODE_READ) {
                        struct address_space *mapping = file->f_mapping;
-                       unsigned long start = offset >> PAGE_CACHE_SHIFT;
-                       unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+                       pgoff_t start = offset >> PAGE_CACHE_SHIFT;
+                       pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
                        unsigned long len = end - start + 1;
                        ret = do_readahead(mapping, file, start, len);
                }
@@ -1265,7 +1249,6 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
 }
 
 #ifdef CONFIG_MMU
-static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
 /**
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:      file to read
@@ -1274,7 +1257,7 @@ static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int fastcall page_cache_read(struct file * file, unsigned long offset)
+static int fastcall page_cache_read(struct file * file, pgoff_t offset)
 {
        struct address_space *mapping = file->f_mapping;
        struct page *page; 
@@ -1301,69 +1284,69 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
 #define MMAP_LOTSAMISS  (100)
 
 /**
- * filemap_nopage - read in file data for page fault handling
- * @area:      the applicable vm_area
- * @address:   target address to read in
- * @type:      returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
+ * filemap_fault - read in file data for page fault handling
+ * @vma:       vma in which the fault was taken
+ * @vmf:       struct vm_fault containing details of the fault
  *
- * filemap_nopage() is invoked via the vma operations vector for a
+ * filemap_fault() is invoked via the vma operations vector for a
  * mapped memory region to read in file data during a page fault.
  *
  * The goto's are kind of ugly, but this streamlines the normal case of having
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
  */
-struct page *filemap_nopage(struct vm_area_struct *area,
-                               unsigned long address, int *type)
+int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        int error;
-       struct file *file = area->vm_file;
+       struct file *file = vma->vm_file;
        struct address_space *mapping = file->f_mapping;
        struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
        struct page *page;
-       unsigned long size, pgoff;
-       int did_readaround = 0, majmin = VM_FAULT_MINOR;
-
-       pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+       unsigned long size;
+       int did_readaround = 0;
+       int ret = 0;
 
-retry_all:
        size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       if (pgoff >= size)
+       if (vmf->pgoff >= size)
                goto outside_data_content;
 
        /* If we don't want any read-ahead, don't bother */
-       if (VM_RandomReadHint(area))
+       if (VM_RandomReadHint(vma))
                goto no_cached_page;
 
-       /*
-        * The readahead code wants to be told about each and every page
-        * so it can build and shrink its windows appropriately
-        *
-        * For sequential accesses, we use the generic readahead logic.
-        */
-       if (VM_SequentialReadHint(area))
-               page_cache_readahead(mapping, ra, file, pgoff, 1);
-
        /*
         * Do we have something in the page cache already?
         */
 retry_find:
-       page = find_get_page(mapping, pgoff);
+       page = find_lock_page(mapping, vmf->pgoff);
+       /*
+        * For sequential accesses, we use the generic readahead logic.
+        */
+       if (VM_SequentialReadHint(vma)) {
+               if (!page) {
+                       page_cache_sync_readahead(mapping, ra, file,
+                                                          vmf->pgoff, 1);
+                       page = find_lock_page(mapping, vmf->pgoff);
+                       if (!page)
+                               goto no_cached_page;
+               }
+               if (PageReadahead(page)) {
+                       page_cache_async_readahead(mapping, ra, file, page,
+                                                          vmf->pgoff, 1);
+               }
+       }
+
        if (!page) {
                unsigned long ra_pages;
 
-               if (VM_SequentialReadHint(area)) {
-                       handle_ra_miss(mapping, ra, pgoff);
-                       goto no_cached_page;
-               }
                ra->mmap_miss++;
 
                /*
                 * Do we miss much more than hit in this file? If so,
                 * stop bothering with read-ahead. It will only hurt.
                 */
-               if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+               if (ra->mmap_miss > MMAP_LOTSAMISS)
                        goto no_cached_page;
 
                /*
@@ -1371,7 +1354,7 @@ retry_find:
                 * check did_readaround, as this is an inner loop.
                 */
                if (!did_readaround) {
-                       majmin = VM_FAULT_MAJOR;
+                       ret = VM_FAULT_MAJOR;
                        count_vm_event(PGMAJFAULT);
                }
                did_readaround = 1;
@@ -1379,48 +1362,56 @@ retry_find:
                if (ra_pages) {
                        pgoff_t start = 0;
 
-                       if (pgoff > ra_pages / 2)
-                               start = pgoff - ra_pages / 2;
+                       if (vmf->pgoff > ra_pages / 2)
+                               start = vmf->pgoff - ra_pages / 2;
                        do_page_cache_readahead(mapping, file, start, ra_pages);
                }
-               page = find_get_page(mapping, pgoff);
+               page = find_lock_page(mapping, vmf->pgoff);
                if (!page)
                        goto no_cached_page;
        }
 
        if (!did_readaround)
-               ra->mmap_hit++;
+               ra->mmap_miss--;
 
        /*
-        * Ok, found a page in the page cache, now we need to check
-        * that it's up-to-date.
+        * We have a locked page in the page cache, now we need to check
+        * that it's up-to-date. If not, it is going to be due to an error.
         */
-       if (!PageUptodate(page))
+       if (unlikely(!PageUptodate(page)))
                goto page_not_uptodate;
 
-success:
+       /* Must recheck i_size under page lock */
+       size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       if (unlikely(vmf->pgoff >= size)) {
+               unlock_page(page);
+               page_cache_release(page);
+               goto outside_data_content;
+       }
+
        /*
         * Found the page and have a reference on it.
         */
        mark_page_accessed(page);
-       if (type)
-               *type = majmin;
-       return page;
+       ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
+       vmf->page = page;
+       return ret | VM_FAULT_LOCKED;
 
 outside_data_content:
        /*
         * An external ptracer can access pages that normally aren't
         * accessible..
         */
-       if (area->vm_mm == current->mm)
-               return NOPAGE_SIGBUS;
+       if (vma->vm_mm == current->mm)
+               return VM_FAULT_SIGBUS;
+
        /* Fall through to the non-read-ahead case */
 no_cached_page:
        /*
         * We're only likely to ever get here if MADV_RANDOM is in
         * effect.
         */
-       error = page_cache_read(file, pgoff);
+       error = page_cache_read(file, vmf->pgoff);
 
        /*
         * The page we want has now been added to the page cache.
@@ -1436,12 +1427,13 @@ no_cached_page:
         * to schedule I/O.
         */
        if (error == -ENOMEM)
-               return NOPAGE_OOM;
-       return NOPAGE_SIGBUS;
+               return VM_FAULT_OOM;
+       return VM_FAULT_SIGBUS;
 
 page_not_uptodate:
+       /* IO error path */
        if (!did_readaround) {
-               majmin = VM_FAULT_MAJOR;
+               ret = VM_FAULT_MAJOR;
                count_vm_event(PGMAJFAULT);
        }
 
@@ -1451,217 +1443,21 @@ page_not_uptodate:
         * because there really aren't any performance issues here
         * and we need to check for errors.
         */
-       lock_page(page);
-
-       /* Somebody truncated the page on us? */
-       if (!page->mapping) {
-               unlock_page(page);
-               page_cache_release(page);
-               goto retry_all;
-       }
-
-       /* Somebody else successfully read it in? */
-       if (PageUptodate(page)) {
-               unlock_page(page);
-               goto success;
-       }
        ClearPageError(page);
        error = mapping->a_ops->readpage(file, page);
-       if (!error) {
-               wait_on_page_locked(page);
-               if (PageUptodate(page))
-                       goto success;
-       } else if (error == AOP_TRUNCATED_PAGE) {
-               page_cache_release(page);
-               goto retry_find;
-       }
-
-       /*
-        * Things didn't work out. Return zero to tell the
-        * mm layer so, possibly freeing the page cache page first.
-        */
-       shrink_readahead_size_eio(file, ra);
        page_cache_release(page);
-       return NOPAGE_SIGBUS;
-}
-EXPORT_SYMBOL(filemap_nopage);
-
-static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
-                                       int nonblock)
-{
-       struct address_space *mapping = file->f_mapping;
-       struct page *page;
-       int error;
-
-       /*
-        * Do we have something in the page cache already?
-        */
-retry_find:
-       page = find_get_page(mapping, pgoff);
-       if (!page) {
-               if (nonblock)
-                       return NULL;
-               goto no_cached_page;
-       }
-
-       /*
-        * Ok, found a page in the page cache, now we need to check
-        * that it's up-to-date.
-        */
-       if (!PageUptodate(page)) {
-               if (nonblock) {
-                       page_cache_release(page);
-                       return NULL;
-               }
-               goto page_not_uptodate;
-       }
 
-success:
-       /*
-        * Found the page and have a reference on it.
-        */
-       mark_page_accessed(page);
-       return page;
-
-no_cached_page:
-       error = page_cache_read(file, pgoff);
-
-       /*
-        * The page we want has now been added to the page cache.
-        * In the unlikely event that someone removed it in the
-        * meantime, we'll just come back here and read it again.
-        */
-       if (error >= 0)
+       if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
 
-       /*
-        * An error return from page_cache_read can result if the
-        * system is low on memory, or a problem occurs while trying
-        * to schedule I/O.
-        */
-       return NULL;
-
-page_not_uptodate:
-       lock_page(page);
-
-       /* Did it get truncated while we waited for it? */
-       if (!page->mapping) {
-               unlock_page(page);
-               goto err;
-       }
-
-       /* Did somebody else get it up-to-date? */
-       if (PageUptodate(page)) {
-               unlock_page(page);
-               goto success;
-       }
-
-       error = mapping->a_ops->readpage(file, page);
-       if (!error) {
-               wait_on_page_locked(page);
-               if (PageUptodate(page))
-                       goto success;
-       } else if (error == AOP_TRUNCATED_PAGE) {
-               page_cache_release(page);
-               goto retry_find;
-       }
-
-       /*
-        * Umm, take care of errors if the page isn't up-to-date.
-        * Try to re-read it _once_. We do this synchronously,
-        * because there really aren't any performance issues here
-        * and we need to check for errors.
-        */
-       lock_page(page);
-
-       /* Somebody truncated the page on us? */
-       if (!page->mapping) {
-               unlock_page(page);
-               goto err;
-       }
-       /* Somebody else successfully read it in? */
-       if (PageUptodate(page)) {
-               unlock_page(page);
-               goto success;
-       }
-
-       ClearPageError(page);
-       error = mapping->a_ops->readpage(file, page);
-       if (!error) {
-               wait_on_page_locked(page);
-               if (PageUptodate(page))
-                       goto success;
-       } else if (error == AOP_TRUNCATED_PAGE) {
-               page_cache_release(page);
-               goto retry_find;
-       }
-
-       /*
-        * Things didn't work out. Return zero to tell the
-        * mm layer so, possibly freeing the page cache page first.
-        */
-err:
-       page_cache_release(page);
-
-       return NULL;
-}
-
-int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
-               unsigned long len, pgprot_t prot, unsigned long pgoff,
-               int nonblock)
-{
-       struct file *file = vma->vm_file;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       unsigned long size;
-       struct mm_struct *mm = vma->vm_mm;
-       struct page *page;
-       int err;
-
-       if (!nonblock)
-               force_page_cache_readahead(mapping, vma->vm_file,
-                                       pgoff, len >> PAGE_CACHE_SHIFT);
-
-repeat:
-       size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
-               return -EINVAL;
-
-       page = filemap_getpage(file, pgoff, nonblock);
-
-       /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as
-        * done in shmem_populate calling shmem_getpage */
-       if (!page && !nonblock)
-               return -ENOMEM;
-
-       if (page) {
-               err = install_page(mm, vma, addr, page, prot);
-               if (err) {
-                       page_cache_release(page);
-                       return err;
-               }
-       } else if (vma->vm_flags & VM_NONLINEAR) {
-               /* No page was found just because we can't read it in now (being
-                * here implies nonblock != 0), but the page may exist, so set
-                * the PTE to fault it in later. */
-               err = install_file_pte(mm, vma, addr, pgoff, prot);
-               if (err)
-                       return err;
-       }
-
-       len -= PAGE_SIZE;
-       addr += PAGE_SIZE;
-       pgoff++;
-       if (len)
-               goto repeat;
-
-       return 0;
+       /* Things didn't work out. Return zero to tell the mm layer so. */
+       shrink_readahead_size_eio(file, ra);
+       return VM_FAULT_SIGBUS;
 }
-EXPORT_SYMBOL(filemap_populate);
+EXPORT_SYMBOL(filemap_fault);
 
 struct vm_operations_struct generic_file_vm_ops = {
-       .nopage         = filemap_nopage,
-       .populate       = filemap_populate,
+       .fault          = filemap_fault,
 };
 
 /* This is used for a general mmap of a disk file */
@@ -1674,6 +1470,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
                return -ENOEXEC;
        file_accessed(file);
        vma->vm_ops = &generic_file_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
        return 0;
 }
 
@@ -1701,7 +1498,7 @@ EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
 static struct page *__read_cache_page(struct address_space *mapping,
-                               unsigned long index,
+                               pgoff_t index,
                                int (*filler)(void *,struct page*),
                                void *data)
 {
@@ -1742,7 +1539,7 @@ repeat:
  * after submitting it to the filler.
  */
 struct page *read_cache_page_async(struct address_space *mapping,
-                               unsigned long index,
+                               pgoff_t index,
                                int (*filler)(void *,struct page*),
                                void *data)
 {
@@ -1790,7 +1587,7 @@ EXPORT_SYMBOL(read_cache_page_async);
  * If the page does not get brought uptodate, return -EIO.
  */
 struct page *read_cache_page(struct address_space *mapping,
-                               unsigned long index,
+                               pgoff_t index,
                                int (*filler)(void *,struct page*),
                                void *data)
 {