]> err.no Git - linux-2.6/blobdiff - arch/x86/kvm/mmu.c
[POWERPC] 4xx: Add Canyonlands and Yosemite to multi-board defconfig
[linux-2.6] / arch / x86 / kvm / mmu.c
index 8f12ec52ad8601ed4b657f5f6858e27f2f40fc09..d8172aabc660de7503c43b697fea470d81f7eb9a 100644 (file)
@@ -291,7 +291,6 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 {
        int r;
 
-       kvm_mmu_free_some_pages(vcpu);
        r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache,
                                   pte_chain_cache, 4);
        if (r)
@@ -569,9 +568,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 {
        struct kvm_mmu_page *sp;
 
-       if (!vcpu->kvm->arch.n_free_mmu_pages)
-               return NULL;
-
        sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
        sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
        sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
@@ -685,8 +681,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                                             unsigned level,
                                             int metaphysical,
                                             unsigned access,
-                                            u64 *parent_pte,
-                                            bool *new_page)
+                                            u64 *parent_pte)
 {
        union kvm_mmu_page_role role;
        unsigned index;
@@ -726,8 +721,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
        vcpu->arch.mmu.prefetch_page(vcpu, sp);
        if (!metaphysical)
                rmap_write_protect(vcpu->kvm, gfn);
-       if (new_page)
-               *new_page = 1;
        return sp;
 }
 
@@ -880,21 +873,28 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 {
+       struct page *page;
+
        gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
 
        if (gpa == UNMAPPED_GVA)
                return NULL;
-       return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+       down_read(&current->mm->mmap_sem);
+       page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       up_read(&current->mm->mmap_sem);
+
+       return page;
 }
 
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
                         unsigned pt_access, unsigned pte_access,
                         int user_fault, int write_fault, int dirty,
-                        int *ptwrite, gfn_t gfn)
+                        int *ptwrite, gfn_t gfn, struct page *page)
 {
        u64 spte;
        int was_rmapped = is_rmap_pte(*shadow_pte);
-       struct page *page;
+       int was_writeble = is_writeble_pte(*shadow_pte);
 
        pgprintk("%s: spte %llx access %x write_fault %d"
                 " user_fault %d gfn %lx\n",
@@ -912,8 +912,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
        if (!(pte_access & ACC_EXEC_MASK))
                spte |= PT64_NX_MASK;
 
-       page = gfn_to_page(vcpu->kvm, gfn);
-
        spte |= PT_PRESENT_MASK;
        if (pte_access & ACC_USER_MASK)
                spte |= PT_USER_MASK;
@@ -963,9 +961,12 @@ unshadowed:
                rmap_add(vcpu, shadow_pte, gfn);
                if (!is_rmap_pte(*shadow_pte))
                        kvm_release_page_clean(page);
+       } else {
+               if (was_writeble)
+                       kvm_release_page_dirty(page);
+               else
+                       kvm_release_page_clean(page);
        }
-       else
-               kvm_release_page_clean(page);
        if (!ptwrite || !*ptwrite)
                vcpu->arch.last_pte_updated = shadow_pte;
 }
@@ -974,7 +975,8 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
 
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
+static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
+                          gfn_t gfn, struct page *page)
 {
        int level = PT32E_ROOT_LEVEL;
        hpa_t table_addr = vcpu->arch.mmu.root_hpa;
@@ -989,7 +991,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
                if (level == 1) {
                        mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-                                    0, write, 1, &pt_write, gfn);
+                                    0, write, 1, &pt_write, gfn, page);
                        return pt_write || is_io_pte(table[index]);
                }
 
@@ -1001,10 +1003,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
                                >> PAGE_SHIFT;
                        new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
                                                     v, level - 1,
-                                                    1, ACC_ALL, &table[index],
-                                                    NULL);
+                                                    1, ACC_ALL, &table[index]);
                        if (!new_table) {
                                pgprintk("nonpaging_map: ENOMEM\n");
+                               kvm_release_page_clean(page);
                                return -ENOMEM;
                        }
 
@@ -1015,6 +1017,29 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
        }
 }
 
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
+{
+       int r;
+
+       struct page *page;
+
+       down_read(&vcpu->kvm->slots_lock);
+
+       down_read(&current->mm->mmap_sem);
+       page = gfn_to_page(vcpu->kvm, gfn);
+       up_read(&current->mm->mmap_sem);
+
+       spin_lock(&vcpu->kvm->mmu_lock);
+       kvm_mmu_free_some_pages(vcpu);
+       r = __nonpaging_map(vcpu, v, write, gfn, page);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+
+       up_read(&vcpu->kvm->slots_lock);
+
+       return r;
+}
+
+
 static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
                                    struct kvm_mmu_page *sp)
 {
@@ -1031,6 +1056,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 
        if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                return;
+       spin_lock(&vcpu->kvm->mmu_lock);
 #ifdef CONFIG_X86_64
        if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
                hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -1038,6 +1064,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                sp = page_header(root);
                --sp->root_count;
                vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+               spin_unlock(&vcpu->kvm->mmu_lock);
                return;
        }
 #endif
@@ -1051,6 +1078,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                }
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
        }
+       spin_unlock(&vcpu->kvm->mmu_lock);
        vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 }
 
@@ -1068,7 +1096,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
                ASSERT(!VALID_PAGE(root));
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-                                     PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
+                                     PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
                root = __pa(sp->spt);
                ++sp->root_count;
                vcpu->arch.mmu.root_hpa = root;
@@ -1089,7 +1117,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
                        root_gfn = 0;
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
                                      PT32_ROOT_LEVEL, !is_paging(vcpu),
-                                     ACC_ALL, NULL, NULL);
+                                     ACC_ALL, NULL);
                root = __pa(sp->spt);
                ++sp->root_count;
                vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
@@ -1150,7 +1178,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
-       pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+       pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
        mmu_free_roots(vcpu);
 }
 
@@ -1250,15 +1278,16 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 {
        int r;
 
-       mutex_lock(&vcpu->kvm->lock);
        r = mmu_topup_memory_caches(vcpu);
        if (r)
                goto out;
+       spin_lock(&vcpu->kvm->mmu_lock);
+       kvm_mmu_free_some_pages(vcpu);
        mmu_alloc_roots(vcpu);
+       spin_unlock(&vcpu->kvm->mmu_lock);
        kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
        kvm_mmu_flush_tlb(vcpu);
 out:
-       mutex_unlock(&vcpu->kvm->lock);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_load);
@@ -1333,6 +1362,49 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
        return !!(spte && (*spte & PT_ACCESSED_MASK));
 }
 
+static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+                                         const u8 *new, int bytes)
+{
+       gfn_t gfn;
+       int r;
+       u64 gpte = 0;
+       struct page *page;
+
+       if (bytes != 4 && bytes != 8)
+               return;
+
+       /*
+        * Assume that the pte write on a page table of the same type
+        * as the current vcpu paging mode.  This is nearly always true
+        * (might be false while changing modes).  Note it is verified later
+        * by update_pte().
+        */
+       if (is_pae(vcpu)) {
+               /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
+               if ((bytes == 4) && (gpa % 4 == 0)) {
+                       r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8);
+                       if (r)
+                               return;
+                       memcpy((void *)&gpte + (gpa % 8), new, 4);
+               } else if ((bytes == 8) && (gpa % 8 == 0)) {
+                       memcpy((void *)&gpte, new, 8);
+               }
+       } else {
+               if ((bytes == 4) && (gpa % 4 == 0))
+                       memcpy((void *)&gpte, new, 4);
+       }
+       if (!is_present_pte(gpte))
+               return;
+       gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+
+       down_read(&current->mm->mmap_sem);
+       page = gfn_to_page(vcpu->kvm, gfn);
+       up_read(&current->mm->mmap_sem);
+
+       vcpu->arch.update_pte.gfn = gfn;
+       vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn);
+}
+
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                       const u8 *new, int bytes)
 {
@@ -1353,6 +1425,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        int npte;
 
        pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+       mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
+       spin_lock(&vcpu->kvm->mmu_lock);
+       kvm_mmu_free_some_pages(vcpu);
        ++vcpu->kvm->stat.mmu_pte_write;
        kvm_mmu_audit(vcpu, "pre pte write");
        if (gfn == vcpu->arch.last_pt_write_gfn
@@ -1421,13 +1496,26 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                }
        }
        kvm_mmu_audit(vcpu, "post pte write");
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       if (vcpu->arch.update_pte.page) {
+               kvm_release_page_clean(vcpu->arch.update_pte.page);
+               vcpu->arch.update_pte.page = NULL;
+       }
 }
 
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 {
-       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       gpa_t gpa;
+       int r;
+
+       down_read(&vcpu->kvm->slots_lock);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       up_read(&vcpu->kvm->slots_lock);
 
-       return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       spin_lock(&vcpu->kvm->mmu_lock);
+       r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       return r;
 }
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@@ -1447,7 +1535,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
        int r;
        enum emulation_result er;
 
-       mutex_lock(&vcpu->kvm->lock);
        r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
        if (r < 0)
                goto out;
@@ -1462,7 +1549,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
                goto out;
 
        er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
-       mutex_unlock(&vcpu->kvm->lock);
 
        switch (er) {
        case EMULATE_DONE:
@@ -1477,7 +1563,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
                BUG();
        }
 out:
-       mutex_unlock(&vcpu->kvm->lock);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -1574,8 +1659,10 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
 
+       spin_lock(&kvm->mmu_lock);
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
                kvm_mmu_zap_page(kvm, sp);
+       spin_unlock(&kvm->mmu_lock);
 
        kvm_flush_remote_tlbs(kvm);
 }