]> err.no Git - linux-2.6/commitdiff
KVM: MMU: Simplify page table walker
authorAvi Kivity <avi@qumranet.com>
Wed, 17 Oct 2007 10:18:47 +0000 (12:18 +0200)
committerAvi Kivity <avi@qumranet.com>
Wed, 30 Jan 2008 15:52:53 +0000 (17:52 +0200)
Simplify the walker level loop not to carry so much information from one
loop to the next.  In addition to being complex, this made kmap_atomic()
critical sections difficult to manage.

As a result of this change, kmap_atomic() sections are limited to actually
touching the guest pte, which allows the other functions called from the
walker to do sleepy operations.  This will happen when we enable swapping.

Signed-off-by: Avi Kivity <avi@qumranet.com>
drivers/kvm/paging_tmpl.h

index a9e687b5c1e932277eb5ced9650e80a80626c6e8..bab1b7f8d7057789b4b31c7d8c93ecc6cf054c10 100644 (file)
 struct guest_walker {
        int level;
        gfn_t table_gfn[PT_MAX_FULL_LEVELS];
-       pt_element_t *table;
        pt_element_t pte;
-       pt_element_t *ptep;
-       struct page *page;
-       int index;
        pt_element_t inherited_ar;
        gfn_t gfn;
        u32 error_code;
 };
 
-static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
-                                   int write_fault,
-                                   pt_element_t *ptep,
-                                   gfn_t table_gfn)
-{
-       gpa_t pte_gpa;
-
-       if (write_fault && !is_dirty_pte(*ptep)) {
-               mark_page_dirty(vcpu->kvm, table_gfn);
-               *ptep |= PT_DIRTY_MASK;
-               pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
-               pte_gpa += offset_in_page(ptep);
-               kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
-       }
-}
-
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 {
        hpa_t hpa;
        struct kvm_memory_slot *slot;
-       pt_element_t *ptep;
-       pt_element_t root;
+       struct page *page;
+       pt_element_t *table;
+       pt_element_t pte;
        gfn_t table_gfn;
+       unsigned index;
+       gpa_t pte_gpa;
 
        pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
        walker->level = vcpu->mmu.root_level;
-       walker->table = NULL;
-       walker->page = NULL;
-       walker->ptep = NULL;
-       root = vcpu->cr3;
+       pte = vcpu->cr3;
 #if PTTYPE == 64
        if (!is_long_mode(vcpu)) {
-               walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
-               root = *walker->ptep;
-               walker->pte = root;
-               if (!(root & PT_PRESENT_MASK))
+               pte = vcpu->pdptrs[(addr >> 30) & 3];
+               if (!is_present_pte(pte))
                        goto not_present;
                --walker->level;
        }
 #endif
-       table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-       walker->table_gfn[walker->level - 1] = table_gfn;
-       pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-                walker->level - 1, table_gfn);
-       slot = gfn_to_memslot(vcpu->kvm, table_gfn);
-       hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
-       walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
-       walker->table = kmap_atomic(walker->page, KM_USER0);
-
        ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
               (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
 
        walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
 
        for (;;) {
-               int index = PT_INDEX(addr, walker->level);
-               hpa_t paddr;
+               index = PT_INDEX(addr, walker->level);
 
-               ptep = &walker->table[index];
-               walker->index = index;
-               ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
-                      ((unsigned long)ptep & PAGE_MASK));
+               table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+               walker->table_gfn[walker->level - 1] = table_gfn;
+               pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+                        walker->level - 1, table_gfn);
+
+               slot = gfn_to_memslot(vcpu->kvm, table_gfn);
+               hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
+               page = pfn_to_page(hpa >> PAGE_SHIFT);
 
-               if (!is_present_pte(*ptep))
+               table = kmap_atomic(page, KM_USER0);
+               pte = table[index];
+               kunmap_atomic(table, KM_USER0);
+
+               if (!is_present_pte(pte))
                        goto not_present;
 
-               if (write_fault && !is_writeble_pte(*ptep))
+               if (write_fault && !is_writeble_pte(pte))
                        if (user_fault || is_write_protection(vcpu))
                                goto access_error;
 
-               if (user_fault && !(*ptep & PT_USER_MASK))
+               if (user_fault && !(pte & PT_USER_MASK))
                        goto access_error;
 
 #if PTTYPE == 64
-               if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
+               if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
                        goto access_error;
 #endif
 
-               if (!(*ptep & PT_ACCESSED_MASK)) {
+               if (!(pte & PT_ACCESSED_MASK)) {
                        mark_page_dirty(vcpu->kvm, table_gfn);
-                       *ptep |= PT_ACCESSED_MASK;
+                       pte |= PT_ACCESSED_MASK;
+                       table = kmap_atomic(page, KM_USER0);
+                       table[index] = pte;
+                       kunmap_atomic(table, KM_USER0);
                }
 
                if (walker->level == PT_PAGE_TABLE_LEVEL) {
-                       walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
-                               >> PAGE_SHIFT;
-                       FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
-                                               table_gfn);
+                       walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
                        break;
                }
 
                if (walker->level == PT_DIRECTORY_LEVEL
-                   && (*ptep & PT_PAGE_SIZE_MASK)
+                   && (pte & PT_PAGE_SIZE_MASK)
                    && (PTTYPE == 64 || is_pse(vcpu))) {
-                       walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
+                       walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK)
                                >> PAGE_SHIFT;
                        walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
-                       FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
-                                               table_gfn);
                        break;
                }
 
-               walker->inherited_ar &= walker->table[index];
-               table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
-               kunmap_atomic(walker->table, KM_USER0);
-               paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT);
-               walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
-               walker->table = kmap_atomic(walker->page, KM_USER0);
+               walker->inherited_ar &= pte;
                --walker->level;
-               walker->table_gfn[walker->level - 1] = table_gfn;
-               pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-                        walker->level - 1, table_gfn);
        }
-       walker->pte = *ptep;
-       if (walker->page)
-               walker->ptep = NULL;
-       if (walker->table)
-               kunmap_atomic(walker->table, KM_USER0);
-       pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
+
+       if (write_fault && !is_dirty_pte(pte)) {
+               mark_page_dirty(vcpu->kvm, table_gfn);
+               pte |= PT_DIRTY_MASK;
+               table = kmap_atomic(page, KM_USER0);
+               table[index] = pte;
+               kunmap_atomic(table, KM_USER0);
+               pte_gpa = table_gfn << PAGE_SHIFT;
+               pte_gpa += index * sizeof(pt_element_t);
+               kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+       }
+
+       walker->pte = pte;
+       pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
        return 1;
 
 not_present:
@@ -209,8 +183,6 @@ err:
                walker->error_code |= PFERR_USER_MASK;
        if (fetch_fault)
                walker->error_code |= PFERR_FETCH_MASK;
-       if (walker->table)
-               kunmap_atomic(walker->table, KM_USER0);
        return 0;
 }