X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmemory.c;h=f82b359b27452dd1f7e682197479fa1391d19a46;hb=5707708111ca6c4e9a1160acffdc98a98d95e462;hp=23c870479b3e83d69d8a5f3d55016fb2fb55b816;hpb=d0217ac04ca6591841e5665f518e38064f4e65bd;p=linux-2.6 diff --git a/mm/memory.c b/mm/memory.c index 23c870479b..f82b359b27 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1068,31 +1068,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; - ret = __handle_mm_fault(mm, vma, start, + ret = handle_mm_fault(mm, vma, start, foll_flags & FOLL_WRITE); + if (ret & VM_FAULT_ERROR) { + if (ret & VM_FAULT_OOM) + return i ? i : -ENOMEM; + else if (ret & VM_FAULT_SIGBUS) + return i ? i : -EFAULT; + BUG(); + } + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + /* - * The VM_FAULT_WRITE bit tells us that do_wp_page has - * broken COW when necessary, even if maybe_mkwrite - * decided not to set pte_write. We can thus safely do - * subsequent page lookups as if they were reads. + * The VM_FAULT_WRITE bit tells us that + * do_wp_page has broken COW when necessary, + * even if maybe_mkwrite decided not to set + * pte_write. We can thus safely do subsequent + * page lookups as if they were reads. */ if (ret & VM_FAULT_WRITE) foll_flags &= ~FOLL_WRITE; - - switch (ret & ~VM_FAULT_WRITE) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - return i ? i : -EFAULT; - case VM_FAULT_OOM: - return i ? i : -ENOMEM; - default: - BUG(); - } + cond_resched(); } if (pages) { @@ -1639,7 +1638,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *old_page, *new_page; pte_t entry; - int reuse = 0, ret = VM_FAULT_MINOR; + int reuse = 0, ret = 0; + int page_mkwrite = 0; struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); @@ -1688,6 +1688,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_release(old_page); if (!pte_same(*page_table, orig_pte)) goto unlock; + + page_mkwrite = 1; } dirty_page = old_page; get_page(dirty_page); @@ -1766,7 +1768,16 @@ gotten: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { - set_page_dirty_balance(dirty_page); + /* + * Yes, Virginia, this is actually required to prevent a race + * with clear_page_dirty_for_io() from clearing the page dirty + * bit after it clear all dirty ptes, but before a racing + * do_wp_page installs a dirty pte. + * + * do_no_page is protected similarly. + */ + wait_on_page_locked(dirty_page); + set_page_dirty_balance(dirty_page, page_mkwrite); put_page(dirty_page); } return ret; @@ -1835,8 +1846,8 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, /* * files that support invalidating or truncating portions of the * file from under mmaped areas must have their ->fault function - * return a locked page (and FAULT_RET_LOCKED code). This provides - * synchronisation against concurrent unmapping here. + * return a locked page (and set VM_FAULT_LOCKED in the return). + * This provides synchronisation against concurrent unmapping here. */ again: @@ -2140,7 +2151,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; swp_entry_t entry; pte_t pte; - int ret = VM_FAULT_MINOR; + int ret = 0; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) goto out; @@ -2208,8 +2219,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(page); if (write_access) { + /* XXX: We could OR the do_wp_page code with this one? */ if (do_wp_page(mm, vma, address, - page_table, pmd, ptl, pte) == VM_FAULT_OOM) + page_table, pmd, ptl, pte) & VM_FAULT_OOM) ret = VM_FAULT_OOM; goto out; } @@ -2280,7 +2292,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, lazy_mmu_prot_update(entry); unlock: pte_unmap_unlock(page_table, ptl); - return VM_FAULT_MINOR; + return 0; release: page_cache_release(page); goto unlock; @@ -2298,13 +2310,14 @@ oom: * do not need to flush old virtual caches or the TLB. * * We enter with non-exclusive mmap_sem (to exclude vma changes, - * but allow concurrent faults), and pte mapped but not yet locked. + * but allow concurrent faults), and pte neither mapped nor locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, + unsigned long address, pmd_t *pmd, pgoff_t pgoff, unsigned int flags, pte_t orig_pte) { + pte_t *page_table; spinlock_t *ptl; struct page *page; pte_t entry; @@ -2312,22 +2325,22 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, struct page *dirty_page = NULL; struct vm_fault vmf; int ret; + int page_mkwrite = 0; vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; - pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); if (likely(vma->vm_ops->fault)) { ret = vma->vm_ops->fault(vma, &vmf); - if (unlikely(ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE))) - return (ret & VM_FAULT_MASK); + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + return ret; } else { /* Legacy ->nopage path */ - ret = VM_FAULT_MINOR; + ret = 0; vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); /* no page was available -- either SIGBUS or OOM */ if (unlikely(vmf.page == NOPAGE_SIGBUS)) @@ -2340,7 +2353,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, * For consistency in subsequent calls, make the faulted page always * locked. */ - if (unlikely(!(ret & FAULT_RET_LOCKED))) + if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf.page); else VM_BUG_ON(!PageLocked(vmf.page)); @@ -2356,7 +2369,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = VM_FAULT_OOM; goto out; } - page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, + vma, address); if (!page) { ret = VM_FAULT_OOM; goto out; @@ -2384,10 +2398,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, * is better done later. */ if (!page->mapping) { - ret = VM_FAULT_MINOR; + ret = 0; anon = 1; /* no anon but release vmf.page */ goto out; } + page_mkwrite = 1; } } @@ -2443,11 +2458,11 @@ out_unlocked: if (anon) page_cache_release(vmf.page); else if (dirty_page) { - set_page_dirty_balance(dirty_page); + set_page_dirty_balance(dirty_page, page_mkwrite); put_page(dirty_page); } - return (ret & VM_FAULT_MASK); + return ret; } static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -2458,8 +2473,8 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); - return __do_fault(mm, vma, address, page_table, pmd, pgoff, - flags, orig_pte); + pte_unmap(page_table); + return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); } @@ -2486,7 +2501,6 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; pte_t entry; unsigned long pfn; - int ret = VM_FAULT_MINOR; pte_unmap(page_table); BUG_ON(!(vma->vm_flags & VM_PFNMAP)); @@ -2498,7 +2512,7 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, else if (unlikely(pfn == NOPFN_SIGBUS)) return VM_FAULT_SIGBUS; else if (unlikely(pfn == NOPFN_REFAULT)) - return VM_FAULT_MINOR; + return 0; page_table = pte_offset_map_lock(mm, pmd, address, &ptl); @@ -2510,7 +2524,7 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, address, page_table, entry); } pte_unmap_unlock(page_table, ptl); - return ret; + return 0; } /* @@ -2531,7 +2545,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, pgoff_t pgoff; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) - return VM_FAULT_MINOR; + return 0; if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || !(vma->vm_flags & VM_CAN_NONLINEAR))) { @@ -2543,9 +2557,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, } pgoff = pte_to_pgoff(orig_pte); - - return __do_fault(mm, vma, address, page_table, pmd, pgoff, - flags, orig_pte); + return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); } /* @@ -2615,13 +2627,13 @@ static inline int handle_pte_fault(struct mm_struct *mm, } unlock: pte_unmap_unlock(pte, ptl); - return VM_FAULT_MINOR; + return 0; } /* * By the time we get here, we already hold the mm semaphore */ -int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, +int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) { pgd_t *pgd; @@ -2650,8 +2662,6 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, write_access); } -EXPORT_SYMBOL_GPL(__handle_mm_fault); - #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. @@ -2856,3 +2866,4 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return buf - old_buf; } +EXPORT_SYMBOL_GPL(access_process_vm);