X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmemory.c;h=156861fcac436e4716537c7e5dff565dded43224;hb=d73468533451fd896324058d9ba649c11ba3e3ee;hp=92a3ebd8d7951daff767f409836c8490cf283028;hpb=dd77a4ee0f3981693d4229aa1d57cea9e526ff47;p=linux-2.6 diff --git a/mm/memory.c b/mm/memory.c index 92a3ebd8d7..156861fcac 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -467,7 +467,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ if (is_cow_mapping(vm_flags)) { ptep_set_wrprotect(src_mm, addr, src_pte); - pte = *src_pte; + pte = pte_wrprotect(pte); } /* @@ -506,6 +506,7 @@ again: src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + arch_enter_lazy_mmu_mode(); do { /* @@ -527,6 +528,7 @@ again: progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); pte_unmap_nested(src_pte - 1); add_mm_rss(dst_mm, rss[0], rss[1]); @@ -628,6 +630,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, int anon_rss = 0; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; if (pte_none(ptent)) { @@ -690,10 +693,11 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; if (!pte_file(ptent)) free_swap_and_cache(pte_to_swp_entry(ptent)); - pte_clear_full(mm, addr, pte, tlb->fullmm); + pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); add_mm_rss(mm, file_rss, anon_rss); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return addr; @@ -1082,6 +1086,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, default: BUG(); } + cond_resched(); } if (pages) { pages[i] = page; @@ -1109,6 +1114,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; + arch_enter_lazy_mmu_mode(); do { struct page *page = ZERO_PAGE(addr); pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); @@ -1118,6 +1124,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, zero_pte); } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return 0; } @@ -1275,11 +1282,13 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; + arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return 0; } @@ -1443,6 +1452,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) memset(kaddr, 0, PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(dst); return; } @@ -1577,7 +1587,14 @@ gotten: entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); lazy_mmu_prot_update(entry); - ptep_establish(vma, address, page_table, entry); + /* + * Clear the pte entry and flush it first, before updating the + * pte with the new entry. This will avoid a race condition + * seen in the presence of one thread doing SMC and another + * thread doing COW. + */ + ptep_clear_flush(vma, address, page_table); + set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); page_add_new_anon_rmap(new_page, vma, address); @@ -2154,11 +2171,13 @@ retry: * after the next truncate_count read. */ - /* no page was available -- either SIGBUS or OOM */ - if (new_page == NOPAGE_SIGBUS) + /* no page was available -- either SIGBUS, OOM or REFAULT */ + if (unlikely(new_page == NOPAGE_SIGBUS)) return VM_FAULT_SIGBUS; - if (new_page == NOPAGE_OOM) + else if (unlikely(new_page == NOPAGE_OOM)) return VM_FAULT_OOM; + else if (unlikely(new_page == NOPAGE_REFAULT)) + return VM_FAULT_MINOR; /* * Should we do an early C-O-W break? @@ -2255,6 +2274,54 @@ oom: return VM_FAULT_OOM; } +/* + * do_no_pfn() tries to create a new page mapping for a page without + * a struct_page backing it + * + * As this is called only for pages that do not currently exist, we + * do not need to flush old virtual caches or the TLB. + * + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), and pte mapped but not yet locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. + * + * It is expected that the ->nopfn handler always returns the same pfn + * for a given virtual mapping. + * + * Mark this `noinline' to prevent it from bloating the main pagefault code. + */ +static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + int write_access) +{ + spinlock_t *ptl; + pte_t entry; + unsigned long pfn; + int ret = VM_FAULT_MINOR; + + pte_unmap(page_table); + BUG_ON(!(vma->vm_flags & VM_PFNMAP)); + BUG_ON(is_cow_mapping(vma->vm_flags)); + + pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); + if (pfn == NOPFN_OOM) + return VM_FAULT_OOM; + if (pfn == NOPFN_SIGBUS) + return VM_FAULT_SIGBUS; + + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + + /* Only go through if we didn't race with anybody else... */ + if (pte_none(*page_table)) { + entry = pfn_pte(pfn, vma->vm_page_prot); + if (write_access) + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + set_pte_at(mm, address, page_table, entry); + } + pte_unmap_unlock(page_table, ptl); + return ret; +} + /* * Fault of a previously existing named mapping. Repopulate the pte * from the encoded file_pte if possible. This enables swappable @@ -2317,11 +2384,17 @@ static inline int handle_pte_fault(struct mm_struct *mm, old_entry = entry = *pte; if (!pte_present(entry)) { if (pte_none(entry)) { - if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(mm, vma, address, - pte, pmd, write_access); - return do_no_page(mm, vma, address, - pte, pmd, write_access); + if (vma->vm_ops) { + if (vma->vm_ops->nopage) + return do_no_page(mm, vma, address, + pte, pmd, + write_access); + if (unlikely(vma->vm_ops->nopfn)) + return do_no_pfn(mm, vma, address, pte, + pmd, write_access); + } + return do_anonymous_page(mm, vma, address, + pte, pmd, write_access); } if (pte_file(entry)) return do_file_page(mm, vma, address, @@ -2550,3 +2623,56 @@ int in_gate_area_no_task(unsigned long addr) } #endif /* __HAVE_ARCH_GATE_AREA */ + +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + /* ignore errors, just check how much was sucessfully transfered */ + while (len) { + int bytes, ret, offset; + void *maddr; + + ret = get_user_pages(tsk, mm, addr, 1, + write, 1, &page, &vma); + if (ret <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + page_cache_release(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + up_read(&mm->mmap_sem); + mmput(mm); + + return buf - old_buf; +}