X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmemory.c;h=ca8cac11bd2cf4930533f306a0209bbbea0e9c31;hb=d1496c39e500857b8949cdb91af24e0eb8aae4d0;hp=7abd3899848bf9ff4f032fda6a48ad86e1f92d18;hpb=6967614761fd305b3414d9485d89dc2e0a407410;p=linux-2.6 diff --git a/mm/memory.c b/mm/memory.c index 7abd389984..ca8cac11bd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1068,31 +1068,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; - ret = __handle_mm_fault(mm, vma, start, + ret = handle_mm_fault(mm, vma, start, foll_flags & FOLL_WRITE); + if (ret & VM_FAULT_ERROR) { + if (ret & VM_FAULT_OOM) + return i ? i : -ENOMEM; + else if (ret & VM_FAULT_SIGBUS) + return i ? i : -EFAULT; + BUG(); + } + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + /* - * The VM_FAULT_WRITE bit tells us that do_wp_page has - * broken COW when necessary, even if maybe_mkwrite - * decided not to set pte_write. We can thus safely do - * subsequent page lookups as if they were reads. + * The VM_FAULT_WRITE bit tells us that + * do_wp_page has broken COW when necessary, + * even if maybe_mkwrite decided not to set + * pte_write. We can thus safely do subsequent + * page lookups as if they were reads. */ if (ret & VM_FAULT_WRITE) foll_flags &= ~FOLL_WRITE; - - switch (ret & ~VM_FAULT_WRITE) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - return i ? i : -EFAULT; - case VM_FAULT_OOM: - return i ? i : -ENOMEM; - default: - BUG(); - } + cond_resched(); } if (pages) { @@ -1639,7 +1638,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *old_page, *new_page; pte_t entry; - int reuse = 0, ret = VM_FAULT_MINOR; + int reuse = 0, ret = 0; struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); @@ -1766,6 +1765,15 @@ gotten: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { + /* + * Yes, Virginia, this is actually required to prevent a race + * with clear_page_dirty_for_io() from clearing the page dirty + * bit after it clear all dirty ptes, but before a racing + * do_wp_page installs a dirty pte. + * + * do_no_page is protected similarly. + */ + wait_on_page_locked(dirty_page); set_page_dirty_balance(dirty_page); put_page(dirty_page); } @@ -1834,10 +1842,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, /* * files that support invalidating or truncating portions of the - * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and - * have their .nopage function return the page locked. + * file from under mmaped areas must have their ->fault function + * return a locked page (and set VM_FAULT_LOCKED in the return). + * This provides synchronisation against concurrent unmapping here. */ - BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); again: restart_addr = vma->vm_truncate_count; @@ -2140,7 +2148,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; swp_entry_t entry; pte_t pte; - int ret = VM_FAULT_MINOR; + int ret = 0; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) goto out; @@ -2208,8 +2216,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(page); if (write_access) { + /* XXX: We could OR the do_wp_page code with this one? */ if (do_wp_page(mm, vma, address, - page_table, pmd, ptl, pte) == VM_FAULT_OOM) + page_table, pmd, ptl, pte) & VM_FAULT_OOM) ret = VM_FAULT_OOM; goto out; } @@ -2280,7 +2289,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, lazy_mmu_prot_update(entry); unlock: pte_unmap_unlock(page_table, ptl); - return VM_FAULT_MINOR; + return 0; release: page_cache_release(page); goto unlock; @@ -2306,63 +2315,63 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, pgoff_t pgoff, unsigned int flags, pte_t orig_pte) { spinlock_t *ptl; - struct page *page, *faulted_page; + struct page *page; pte_t entry; int anon = 0; struct page *dirty_page = NULL; - struct fault_data fdata; + struct vm_fault vmf; + int ret; - fdata.address = address & PAGE_MASK; - fdata.pgoff = pgoff; - fdata.flags = flags; + vmf.virtual_address = (void __user *)(address & PAGE_MASK); + vmf.pgoff = pgoff; + vmf.flags = flags; + vmf.page = NULL; pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); if (likely(vma->vm_ops->fault)) { - fdata.type = -1; - faulted_page = vma->vm_ops->fault(vma, &fdata); - WARN_ON(fdata.type == -1); - if (unlikely(!faulted_page)) - return fdata.type; + ret = vma->vm_ops->fault(vma, &vmf); + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + return ret; } else { /* Legacy ->nopage path */ - fdata.type = VM_FAULT_MINOR; - faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, - &fdata.type); + ret = 0; + vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); /* no page was available -- either SIGBUS or OOM */ - if (unlikely(faulted_page == NOPAGE_SIGBUS)) + if (unlikely(vmf.page == NOPAGE_SIGBUS)) return VM_FAULT_SIGBUS; - else if (unlikely(faulted_page == NOPAGE_OOM)) + else if (unlikely(vmf.page == NOPAGE_OOM)) return VM_FAULT_OOM; } /* - * For consistency in subsequent calls, make the faulted_page always + * For consistency in subsequent calls, make the faulted page always * locked. */ - if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) - lock_page(faulted_page); + if (unlikely(!(ret & VM_FAULT_LOCKED))) + lock_page(vmf.page); else - BUG_ON(!PageLocked(faulted_page)); + VM_BUG_ON(!PageLocked(vmf.page)); /* * Should we do an early C-O-W break? */ - page = faulted_page; + page = vmf.page; if (flags & FAULT_FLAG_WRITE) { if (!(vma->vm_flags & VM_SHARED)) { anon = 1; if (unlikely(anon_vma_prepare(vma))) { - fdata.type = VM_FAULT_OOM; + ret = VM_FAULT_OOM; goto out; } - page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, + vma, address); if (!page) { - fdata.type = VM_FAULT_OOM; + ret = VM_FAULT_OOM; goto out; } - copy_user_highpage(page, faulted_page, address, vma); + copy_user_highpage(page, vmf.page, address, vma); } else { /* * If the page will be shareable, see if the backing @@ -2372,11 +2381,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_ops->page_mkwrite) { unlock_page(page); if (vma->vm_ops->page_mkwrite(vma, page) < 0) { - fdata.type = VM_FAULT_SIGBUS; - anon = 1; /* no anon but release faulted_page */ + ret = VM_FAULT_SIGBUS; + anon = 1; /* no anon but release vmf.page */ goto out_unlocked; } lock_page(page); + /* + * XXX: this is not quite right (racy vs + * invalidate) to unlock and relock the page + * like this, however a better fix requires + * reworking page_mkwrite locking API, which + * is better done later. + */ + if (!page->mapping) { + ret = 0; + anon = 1; /* no anon but release vmf.page */ + goto out; + } } } @@ -2427,16 +2448,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap_unlock(page_table, ptl); out: - unlock_page(faulted_page); + unlock_page(vmf.page); out_unlocked: if (anon) - page_cache_release(faulted_page); + page_cache_release(vmf.page); else if (dirty_page) { set_page_dirty_balance(dirty_page); put_page(dirty_page); } - return fdata.type; + return ret; } static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -2447,18 +2468,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); - return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); + return __do_fault(mm, vma, address, page_table, pmd, pgoff, + flags, orig_pte); } -static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - int write_access, pgoff_t pgoff, pte_t orig_pte) -{ - unsigned int flags = FAULT_FLAG_NONLINEAR | - (write_access ? FAULT_FLAG_WRITE : 0); - - return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); -} /* * do_no_pfn() tries to create a new page mapping for a page without @@ -2483,7 +2496,6 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; pte_t entry; unsigned long pfn; - int ret = VM_FAULT_MINOR; pte_unmap(page_table); BUG_ON(!(vma->vm_flags & VM_PFNMAP)); @@ -2495,7 +2507,7 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, else if (unlikely(pfn == NOPFN_SIGBUS)) return VM_FAULT_SIGBUS; else if (unlikely(pfn == NOPFN_REFAULT)) - return VM_FAULT_MINOR; + return 0; page_table = pte_offset_map_lock(mm, pmd, address, &ptl); @@ -2507,7 +2519,7 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, address, page_table, entry); } pte_unmap_unlock(page_table, ptl); - return ret; + return 0; } /* @@ -2519,17 +2531,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, +static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, int write_access, pte_t orig_pte) { + unsigned int flags = FAULT_FLAG_NONLINEAR | + (write_access ? FAULT_FLAG_WRITE : 0); pgoff_t pgoff; - int err; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) - return VM_FAULT_MINOR; + return 0; - if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { + if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || + !(vma->vm_flags & VM_CAN_NONLINEAR))) { /* * Page table corrupted: show pte and kill process. */ @@ -2539,18 +2553,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, pgoff = pte_to_pgoff(orig_pte); - if (vma->vm_ops && vma->vm_ops->fault) - return do_nonlinear_fault(mm, vma, address, page_table, pmd, - write_access, pgoff, orig_pte); - - /* We can then assume vm->vm_ops && vma->vm_ops->populate */ - err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, - vma->vm_page_prot, pgoff, 0); - if (err == -ENOMEM) - return VM_FAULT_OOM; - if (err) - return VM_FAULT_SIGBUS; - return VM_FAULT_MAJOR; + return __do_fault(mm, vma, address, page_table, pmd, pgoff, + flags, orig_pte); } /* @@ -2588,7 +2592,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, pte, pmd, write_access); } if (pte_file(entry)) - return do_file_page(mm, vma, address, + return do_nonlinear_fault(mm, vma, address, pte, pmd, write_access, entry); return do_swap_page(mm, vma, address, pte, pmd, write_access, entry); @@ -2620,13 +2624,13 @@ static inline int handle_pte_fault(struct mm_struct *mm, } unlock: pte_unmap_unlock(pte, ptl); - return VM_FAULT_MINOR; + return 0; } /* * By the time we get here, we already hold the mm semaphore */ -int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, +int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) { pgd_t *pgd; @@ -2655,8 +2659,6 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, write_access); } -EXPORT_SYMBOL_GPL(__handle_mm_fault); - #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. @@ -2861,3 +2863,4 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return buf - old_buf; } +EXPORT_SYMBOL_GPL(access_process_vm);