From c0718806cf955d5eb51ea77bffb5b21d9bba4972 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:31 -0700 Subject: [PATCH] [PATCH] mm: rmap with inner ptlock rmap's page_check_address descend without page_table_lock. First just pte_offset_map in case there's no pte present worth locking for, then take page_table_lock for the full check, and pass ptl back to caller in the same style as pte_offset_map_lock. __xip_unmap, page_referenced_one and try_to_unmap_one use pte_unmap_unlock. try_to_unmap_cluster also. page_check_address reformatted to avoid progressive indentation. No use is made of its one error code, return NULL when it fails. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 +- mm/filemap_xip.c | 12 ++--- mm/rmap.c | 109 +++++++++++++++++++++---------------------- 3 files changed, 60 insertions(+), 65 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e80fb7ee6e..35b30e6c8c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -95,8 +95,8 @@ int try_to_unmap(struct page *); /* * Called from mm/filemap_xip.c to unmap empty zero page */ -pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); - +pte_t *page_check_address(struct page *, struct mm_struct *, + unsigned long, spinlock_t **); /* * Used by swapoff to help locate where page is expected in vma. diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 4e74ad6033..9cf687e4a2 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping, unsigned long address; pte_t *pte; pte_t pteval; + spinlock_t *ptl; struct page *page; spin_lock(&mapping->i_mmap_lock); @@ -183,20 +184,15 @@ __xip_unmap (struct address_space * mapping, ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); BUG_ON(address < vma->vm_start || address >= vma->vm_end); page = ZERO_PAGE(address); - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - pte = page_check_address(page, mm, address); - if (!IS_ERR(pte)) { + pte = page_check_address(page, mm, address, &ptl); + if (pte) { /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); pteval = ptep_clear_flush(vma, address, pte); page_remove_rmap(page); dec_mm_counter(mm, file_rss); BUG_ON(pte_dirty(pteval)); - pte_unmap(pte); - spin_unlock(&mm->page_table_lock); + pte_unmap_unlock(pte, ptl); page_cache_release(page); } } diff --git a/mm/rmap.c b/mm/rmap.c index 4c52c56c99..a84bdfe582 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -247,34 +247,41 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) * On success returns with mapped pte and locked mm->page_table_lock. */ pte_t *page_check_address(struct page *page, struct mm_struct *mm, - unsigned long address) + unsigned long address, spinlock_t **ptlp) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; + spinlock_t *ptl; - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - spin_lock(&mm->page_table_lock); pgd = pgd_offset(mm, address); - if (likely(pgd_present(*pgd))) { - pud = pud_offset(pgd, address); - if (likely(pud_present(*pud))) { - pmd = pmd_offset(pud, address); - if (likely(pmd_present(*pmd))) { - pte = pte_offset_map(pmd, address); - if (likely(pte_present(*pte) && - page_to_pfn(page) == pte_pfn(*pte))) - return pte; - pte_unmap(pte); - } - } + if (!pgd_present(*pgd)) + return NULL; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return NULL; + + pte = pte_offset_map(pmd, address); + /* Make a quick check before getting the lock */ + if (!pte_present(*pte)) { + pte_unmap(pte); + return NULL; } - spin_unlock(&mm->page_table_lock); - return ERR_PTR(-ENOENT); + + ptl = &mm->page_table_lock; + spin_lock(ptl); + if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { + *ptlp = ptl; + return pte; + } + pte_unmap_unlock(pte, ptl); + return NULL; } /* @@ -287,28 +294,28 @@ static int page_referenced_one(struct page *page, struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *pte; + spinlock_t *ptl; int referenced = 0; address = vma_address(page, vma); if (address == -EFAULT) goto out; - pte = page_check_address(page, mm, address); - if (!IS_ERR(pte)) { - if (ptep_clear_flush_young(vma, address, pte)) - referenced++; + pte = page_check_address(page, mm, address, &ptl); + if (!pte) + goto out; - /* Pretend the page is referenced if the task has the - swap token and is in the middle of a page fault. */ - if (mm != current->mm && !ignore_token && - has_swap_token(mm) && - rwsem_is_locked(&mm->mmap_sem)) - referenced++; + if (ptep_clear_flush_young(vma, address, pte)) + referenced++; - (*mapcount)--; - pte_unmap(pte); - spin_unlock(&mm->page_table_lock); - } + /* Pretend the page is referenced if the task has the + swap token and is in the middle of a page fault. */ + if (mm != current->mm && !ignore_token && has_swap_token(mm) && + rwsem_is_locked(&mm->mmap_sem)) + referenced++; + + (*mapcount)--; + pte_unmap_unlock(pte, ptl); out: return referenced; } @@ -507,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) unsigned long address; pte_t *pte; pte_t pteval; + spinlock_t *ptl; int ret = SWAP_AGAIN; address = vma_address(page, vma); if (address == -EFAULT) goto out; - pte = page_check_address(page, mm, address); - if (IS_ERR(pte)) + pte = page_check_address(page, mm, address, &ptl); + if (!pte) goto out; /* @@ -564,8 +572,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) page_cache_release(page); out_unmap: - pte_unmap(pte); - spin_unlock(&mm->page_table_lock); + pte_unmap_unlock(pte, ptl); out: return ret; } @@ -599,19 +606,14 @@ static void try_to_unmap_cluster(unsigned long cursor, pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, *original_pte; + pte_t *pte; pte_t pteval; + spinlock_t *ptl; struct page *page; unsigned long address; unsigned long end; unsigned long pfn; - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - spin_lock(&mm->page_table_lock); - address = (vma->vm_start + cursor) & CLUSTER_MASK; end = address + CLUSTER_SIZE; if (address < vma->vm_start) @@ -621,22 +623,22 @@ static void try_to_unmap_cluster(unsigned long cursor, pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) - goto out_unlock; + return; pud = pud_offset(pgd, address); if (!pud_present(*pud)) - goto out_unlock; + return; pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) - goto out_unlock; + return; + + pte = pte_offset_map_lock(mm, pmd, address, &ptl); /* Update high watermark before we lower rss */ update_hiwater_rss(mm); - for (original_pte = pte = pte_offset_map(pmd, address); - address < end; pte++, address += PAGE_SIZE) { - + for (; address < end; pte++, address += PAGE_SIZE) { if (!pte_present(*pte)) continue; @@ -669,10 +671,7 @@ static void try_to_unmap_cluster(unsigned long cursor, dec_mm_counter(mm, file_rss); (*mapcount)--; } - - pte_unmap(original_pte); -out_unlock: - spin_unlock(&mm->page_table_lock); + pte_unmap_unlock(pte - 1, ptl); } static int try_to_unmap_anon(struct page *page) -- 2.39.5