X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Frmap.c;h=450f5241b5a525282ae7eb150fe322423b01e570;hb=242e54686257493f0b10ac557e730419d9af7d24;hp=884d6d1928bce0b438ec7d43d7b33918e690be9c;hpb=1da177e4c3f41524e886b7f1b8a0c1fc7321cac2;p=linux-2.6 diff --git a/mm/rmap.c b/mm/rmap.c index 884d6d1928..450f5241b5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -34,9 +34,8 @@ * anon_vma->lock * mm->page_table_lock * zone->lru_lock (in mark_page_accessed) - * swap_list_lock (in swap_free etc's swap_info_get) + * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) - * swap_device_lock (in swap_duplicate, swap_info_get) * mapping->private_lock (in __set_page_dirty_buffers) * inode_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) @@ -242,6 +241,42 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) return vma_address(page, vma); } +/* + * Check that @page is mapped at @address into @mm. + * + * On success returns with mapped pte and locked mm->page_table_lock. + */ +pte_t *page_check_address(struct page *page, struct mm_struct *mm, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* + * We need the page_table_lock to protect us from page faults, + * munmap, fork, etc... + */ + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, address); + if (likely(pgd_present(*pgd))) { + pud = pud_offset(pgd, address); + if (likely(pud_present(*pud))) { + pmd = pmd_offset(pud, address); + if (likely(pmd_present(*pmd))) { + pte = pte_offset_map(pmd, address); + if (likely(pte_present(*pte) && + page_to_pfn(page) == pte_pfn(*pte))) + return pte; + pte_unmap(pte); + } + } + } + spin_unlock(&mm->page_table_lock); + return ERR_PTR(-ENOENT); +} + /* * Subfunctions of page_referenced: page_referenced_one called * repeatedly from either page_referenced_anon or page_referenced_file. @@ -251,51 +286,25 @@ static int page_referenced_one(struct page *page, { struct mm_struct *mm = vma->vm_mm; unsigned long address; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; pte_t *pte; int referenced = 0; - if (!get_mm_counter(mm, rss)) - goto out; address = vma_address(page, vma); if (address == -EFAULT) goto out; - spin_lock(&mm->page_table_lock); - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto out_unlock; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - goto out_unlock; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto out_unlock; - - pte = pte_offset_map(pmd, address); - if (!pte_present(*pte)) - goto out_unmap; - - if (page_to_pfn(page) != pte_pfn(*pte)) - goto out_unmap; - - if (ptep_clear_flush_young(vma, address, pte)) - referenced++; - - if (mm != current->mm && !ignore_token && has_swap_token(mm)) - referenced++; + pte = page_check_address(page, mm, address); + if (!IS_ERR(pte)) { + if (ptep_clear_flush_young(vma, address, pte)) + referenced++; - (*mapcount)--; + if (mm != current->mm && !ignore_token && has_swap_token(mm)) + referenced++; -out_unmap: - pte_unmap(pte); -out_unlock: - spin_unlock(&mm->page_table_lock); + (*mapcount)--; + pte_unmap(pte); + spin_unlock(&mm->page_table_lock); + } out: return referenced; } @@ -430,22 +439,19 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { - struct anon_vma *anon_vma = vma->anon_vma; - pgoff_t index; - BUG_ON(PageReserved(page)); - BUG_ON(!anon_vma); inc_mm_counter(vma->vm_mm, anon_rss); - anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; - index = (address - vma->vm_start) >> PAGE_SHIFT; - index += vma->vm_pgoff; - index >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (atomic_inc_and_test(&page->_mapcount)) { - page->index = index; + struct anon_vma *anon_vma = vma->anon_vma; + + BUG_ON(!anon_vma); + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; + + page->index = linear_page_index(vma, address); + inc_page_state(nr_mapped); } /* else checking page index and mapping is racy */ @@ -502,48 +508,24 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; unsigned long address; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; pte_t *pte; pte_t pteval; int ret = SWAP_AGAIN; - if (!get_mm_counter(mm, rss)) - goto out; address = vma_address(page, vma); if (address == -EFAULT) goto out; - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - spin_lock(&mm->page_table_lock); - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto out_unlock; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - goto out_unlock; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto out_unlock; - - pte = pte_offset_map(pmd, address); - if (!pte_present(*pte)) - goto out_unmap; - - if (page_to_pfn(page) != pte_pfn(*pte)) - goto out_unmap; + pte = page_check_address(page, mm, address); + if (IS_ERR(pte)) + goto out; /* * If the page is mlock()d, we cannot swap it out. * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. + * + * Pages belonging to VM_RESERVED regions should not happen here. */ if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || ptep_clear_flush_young(vma, address, pte)) { @@ -551,27 +533,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) goto out_unmap; } - /* - * Don't pull an anonymous page out from under get_user_pages. - * GUP carefully breaks COW and raises page count (while holding - * page_table_lock, as we have here) to make sure that the page - * cannot be freed. If we unmap that page here, a user write - * access to the virtual address will bring back the page, but - * its raised count will (ironically) be taken to mean it's not - * an exclusive swap page, do_wp_page will replace it by a copy - * page, and the user never get to see the data GUP was holding - * the original page for. - * - * This test is also useful for when swapoff (unuse_process) has - * to drop page lock: its reference to the page stops existing - * ptes from being unmapped, so swapoff can make progress. - */ - if (PageSwapCache(page) && - page_count(page) != page_mapcount(page) + 2) { - ret = SWAP_FAIL; - goto out_unmap; - } - /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); pteval = ptep_clear_flush(vma, address, pte); @@ -598,13 +559,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) dec_mm_counter(mm, anon_rss); } - inc_mm_counter(mm, rss); + dec_mm_counter(mm, rss); page_remove_rmap(page); page_cache_release(page); out_unmap: pte_unmap(pte); -out_unlock: spin_unlock(&mm->page_table_lock); out: return ret; @@ -639,7 +599,7 @@ static void try_to_unmap_cluster(unsigned long cursor, pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte; + pte_t *pte, *original_pte; pte_t pteval; struct page *page; unsigned long address; @@ -671,7 +631,7 @@ static void try_to_unmap_cluster(unsigned long cursor, if (!pmd_present(*pmd)) goto out_unlock; - for (pte = pte_offset_map(pmd, address); + for (original_pte = pte = pte_offset_map(pmd, address); address < end; pte++, address += PAGE_SIZE) { if (!pte_present(*pte)) @@ -707,8 +667,7 @@ static void try_to_unmap_cluster(unsigned long cursor, (*mapcount)--; } - pte_unmap(pte); - + pte_unmap(original_pte); out_unlock: spin_unlock(&mm->page_table_lock); } @@ -802,8 +761,7 @@ static int try_to_unmap_file(struct page *page) if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) continue; cursor = (unsigned long) vma->vm_private_data; - while (get_mm_counter(vma->vm_mm, rss) && - cursor < max_nl_cursor && + while ( cursor < max_nl_cursor && cursor < vma->vm_end - vma->vm_start) { try_to_unmap_cluster(cursor, &mapcount, vma); cursor += CLUSTER_SIZE; @@ -860,3 +818,4 @@ int try_to_unmap(struct page *page) ret = SWAP_SUCCESS; return ret; } +