From: Avi Kivity Date: Wed, 17 Oct 2007 10:18:47 +0000 (+0200) Subject: KVM: MMU: Simplify page table walker X-Git-Tag: v2.6.25-rc1~1138^2~206 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=42bf3f0a1f5a25be26e6bb53162cdee82410310d;p=linux-2.6 KVM: MMU: Simplify page table walker Simplify the walker level loop not to carry so much information from one loop to the next. In addition to being complex, this made kmap_atomic() critical sections difficult to manage. As a result of this change, kmap_atomic() sections are limited to actually touching the guest pte, which allows the other functions called from the walker to do sleepy operations. This will happen when we enable swapping. Signed-off-by: Avi Kivity --- diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index a9e687b5c1..bab1b7f8d7 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -59,32 +59,12 @@ struct guest_walker { int level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; - pt_element_t *table; pt_element_t pte; - pt_element_t *ptep; - struct page *page; - int index; pt_element_t inherited_ar; gfn_t gfn; u32 error_code; }; -static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu, - int write_fault, - pt_element_t *ptep, - gfn_t table_gfn) -{ - gpa_t pte_gpa; - - if (write_fault && !is_dirty_pte(*ptep)) { - mark_page_dirty(vcpu->kvm, table_gfn); - *ptep |= PT_DIRTY_MASK; - pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT); - pte_gpa += offset_in_page(ptep); - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep)); - } -} - /* * Fetch a guest pte for a guest virtual address */ @@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker, { hpa_t hpa; struct kvm_memory_slot *slot; - pt_element_t *ptep; - pt_element_t root; + struct page *page; + pt_element_t *table; + pt_element_t pte; gfn_t table_gfn; + unsigned index; + gpa_t pte_gpa; pgprintk("%s: addr %lx\n", __FUNCTION__, addr); walker->level = vcpu->mmu.root_level; - walker->table = NULL; - walker->page = NULL; - walker->ptep = NULL; - root = vcpu->cr3; + pte = vcpu->cr3; #if PTTYPE == 64 if (!is_long_mode(vcpu)) { - walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; - root = *walker->ptep; - walker->pte = root; - if (!(root & PT_PRESENT_MASK)) + pte = vcpu->pdptrs[(addr >> 30) & 3]; + if (!is_present_pte(pte)) goto not_present; --walker->level; } #endif - table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; - walker->table_gfn[walker->level - 1] = table_gfn; - pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, - walker->level - 1, table_gfn); - slot = gfn_to_memslot(vcpu->kvm, table_gfn); - hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK); - walker->page = pfn_to_page(hpa >> PAGE_SHIFT); - walker->table = kmap_atomic(walker->page, KM_USER0); - ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; for (;;) { - int index = PT_INDEX(addr, walker->level); - hpa_t paddr; + index = PT_INDEX(addr, walker->level); - ptep = &walker->table[index]; - walker->index = index; - ASSERT(((unsigned long)walker->table & PAGE_MASK) == - ((unsigned long)ptep & PAGE_MASK)); + table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + walker->table_gfn[walker->level - 1] = table_gfn; + pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, + walker->level - 1, table_gfn); + + slot = gfn_to_memslot(vcpu->kvm, table_gfn); + hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK); + page = pfn_to_page(hpa >> PAGE_SHIFT); - if (!is_present_pte(*ptep)) + table = kmap_atomic(page, KM_USER0); + pte = table[index]; + kunmap_atomic(table, KM_USER0); + + if (!is_present_pte(pte)) goto not_present; - if (write_fault && !is_writeble_pte(*ptep)) + if (write_fault && !is_writeble_pte(pte)) if (user_fault || is_write_protection(vcpu)) goto access_error; - if (user_fault && !(*ptep & PT_USER_MASK)) + if (user_fault && !(pte & PT_USER_MASK)) goto access_error; #if PTTYPE == 64 - if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK)) + if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) goto access_error; #endif - if (!(*ptep & PT_ACCESSED_MASK)) { + if (!(pte & PT_ACCESSED_MASK)) { mark_page_dirty(vcpu->kvm, table_gfn); - *ptep |= PT_ACCESSED_MASK; + pte |= PT_ACCESSED_MASK; + table = kmap_atomic(page, KM_USER0); + table[index] = pte; + kunmap_atomic(table, KM_USER0); } if (walker->level == PT_PAGE_TABLE_LEVEL) { - walker->gfn = (*ptep & PT_BASE_ADDR_MASK) - >> PAGE_SHIFT; - FNAME(update_dirty_bit)(vcpu, write_fault, ptep, - table_gfn); + walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; break; } if (walker->level == PT_DIRECTORY_LEVEL - && (*ptep & PT_PAGE_SIZE_MASK) + && (pte & PT_PAGE_SIZE_MASK) && (PTTYPE == 64 || is_pse(vcpu))) { - walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK) + walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); - FNAME(update_dirty_bit)(vcpu, write_fault, ptep, - table_gfn); break; } - walker->inherited_ar &= walker->table[index]; - table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; - kunmap_atomic(walker->table, KM_USER0); - paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT); - walker->page = pfn_to_page(paddr >> PAGE_SHIFT); - walker->table = kmap_atomic(walker->page, KM_USER0); + walker->inherited_ar &= pte; --walker->level; - walker->table_gfn[walker->level - 1] = table_gfn; - pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, - walker->level - 1, table_gfn); } - walker->pte = *ptep; - if (walker->page) - walker->ptep = NULL; - if (walker->table) - kunmap_atomic(walker->table, KM_USER0); - pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); + + if (write_fault && !is_dirty_pte(pte)) { + mark_page_dirty(vcpu->kvm, table_gfn); + pte |= PT_DIRTY_MASK; + table = kmap_atomic(page, KM_USER0); + table[index] = pte; + kunmap_atomic(table, KM_USER0); + pte_gpa = table_gfn << PAGE_SHIFT; + pte_gpa += index * sizeof(pt_element_t); + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); + } + + walker->pte = pte; + pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte); return 1; not_present: @@ -209,8 +183,6 @@ err: walker->error_code |= PFERR_USER_MASK; if (fetch_fault) walker->error_code |= PFERR_FETCH_MASK; - if (walker->table) - kunmap_atomic(walker->table, KM_USER0); return 0; }