mm: fix race in COW logic

author Nick Piggin <npiggin@suse.de>

Mon, 23 Jun 2008 12:30:30 +0000 (14:30 +0200)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 Jun 2008 18:28:32 +0000 (11:28 -0700)
author Nick Piggin <npiggin@suse.de>
Mon, 23 Jun 2008 12:30:30 +0000 (14:30 +0200)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Jun 2008 18:28:32 +0000 (11:28 -0700)
diff --git a/mm/memory.c b/mm/memory.c

index 423e0e7c2f73b851a970d8746470df9c373b5095..d14b251a25a638dec1b5327a6261a67660ba67d5 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1785,7 +1785,6 @@ gotten:
         page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
         if (likely(pte_same(*page_table, orig_pte))) {
                 if (old_page) {
-                       page_remove_rmap(old_page, vma);
                         if (!PageAnon(old_page)) {
                                 dec_mm_counter(mm, file_rss);
                                 inc_mm_counter(mm, anon_rss);
@@ -1807,6 +1806,32 @@ gotten:
                 lru_cache_add_active(new_page);
                 page_add_new_anon_rmap(new_page, vma, address);
  
+               if (old_page) {
+                       /*
+                        * Only after switching the pte to the new page may
+                        * we remove the mapcount here. Otherwise another
+                        * process may come and find the rmap count decremented
+                        * before the pte is switched to the new page, and
+                        * "reuse" the old page writing into it while our pte
+                        * here still points into it and can be read by other
+                        * threads.
+                        *
+                        * The critical issue is to order this
+                        * page_remove_rmap with the ptp_clear_flush above.
+                        * Those stores are ordered by (if nothing else,)
+                        * the barrier present in the atomic_add_negative
+                        * in page_remove_rmap.
+                        *
+                        * Then the TLB flush in ptep_clear_flush ensures that
+                        * no process can access the old page before the
+                        * decremented mapcount is visible. And the old page
+                        * cannot be reused until after the decremented
+                        * mapcount is visible. So transitively, TLBs to
+                        * old page will be flushed before it can be reused.
+                        */
+                       page_remove_rmap(old_page, vma);
+               }
+
                 /* Free the old page.. */
                 new_page = old_page;
                 ret |= VM_FAULT_WRITE;
author	Nick Piggin <npiggin@suse.de>
	Mon, 23 Jun 2008 12:30:30 +0000 (14:30 +0200)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 Jun 2008 18:28:32 +0000 (11:28 -0700)