X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fkvm%2Fx86.c;h=7237cb25f77d27030ffe3f160320a52346938a3b;hb=0771671749b59a507b6da4efb931c44d9691e248;hp=4902b35060f5bff642b3e6938f0a3bac4b998a48;hpb=e9b11c17552afe684e9e5d0444309a3ddf410116;p=linux-2.6 diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c index 4902b35060..7237cb25f7 100644 --- a/drivers/kvm/x86.c +++ b/drivers/kvm/x86.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -42,26 +43,37 @@ #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) #define EFER_RESERVED_BITS 0xfffffffffffff2fe -#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) +#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_x86_ops *kvm_x86_ops; struct kvm_stats_debugfs_item debugfs_entries[] = { - { "pf_fixed", STAT_OFFSET(pf_fixed) }, - { "pf_guest", STAT_OFFSET(pf_guest) }, - { "tlb_flush", STAT_OFFSET(tlb_flush) }, - { "invlpg", STAT_OFFSET(invlpg) }, - { "exits", STAT_OFFSET(exits) }, - { "io_exits", STAT_OFFSET(io_exits) }, - { "mmio_exits", STAT_OFFSET(mmio_exits) }, - { "signal_exits", STAT_OFFSET(signal_exits) }, - { "irq_window", STAT_OFFSET(irq_window_exits) }, - { "halt_exits", STAT_OFFSET(halt_exits) }, - { "halt_wakeup", STAT_OFFSET(halt_wakeup) }, - { "request_irq", STAT_OFFSET(request_irq_exits) }, - { "irq_exits", STAT_OFFSET(irq_exits) }, - { "light_exits", STAT_OFFSET(light_exits) }, - { "efer_reload", STAT_OFFSET(efer_reload) }, + { "pf_fixed", VCPU_STAT(pf_fixed) }, + { "pf_guest", VCPU_STAT(pf_guest) }, + { "tlb_flush", VCPU_STAT(tlb_flush) }, + { "invlpg", VCPU_STAT(invlpg) }, + { "exits", VCPU_STAT(exits) }, + { "io_exits", VCPU_STAT(io_exits) }, + { "mmio_exits", VCPU_STAT(mmio_exits) }, + { "signal_exits", VCPU_STAT(signal_exits) }, + { "irq_window", VCPU_STAT(irq_window_exits) }, + { "halt_exits", VCPU_STAT(halt_exits) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "request_irq", VCPU_STAT(request_irq_exits) }, + { "irq_exits", VCPU_STAT(irq_exits) }, + { "host_state_reload", VCPU_STAT(host_state_reload) }, + { "efer_reload", VCPU_STAT(efer_reload) }, + { "fpu_reload", VCPU_STAT(fpu_reload) }, + { "insn_emulation", VCPU_STAT(insn_emulation) }, + { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, + { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, + { "mmu_pte_write", VM_STAT(mmu_pte_write) }, + { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, + { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, + { "mmu_flooded", VM_STAT(mmu_flooded) }, + { "mmu_recycled", VM_STAT(mmu_recycled) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { NULL } }; @@ -154,6 +166,26 @@ out: return ret; } +static bool pdptrs_changed(struct kvm_vcpu *vcpu) +{ + u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; + bool changed = true; + int r; + + if (is_long_mode(vcpu) || !is_pae(vcpu)) + return false; + + mutex_lock(&vcpu->kvm->lock); + r = kvm_read_guest(vcpu->kvm, vcpu->cr3 & ~31u, pdpte, sizeof(pdpte)); + if (r < 0) + goto out; + changed = memcmp(pdpte, vcpu->pdptrs, sizeof(pdpte)) != 0; +out: + mutex_unlock(&vcpu->kvm->lock); + + return changed; +} + void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (cr0 & CR0_RESERVED_BITS) { @@ -259,6 +291,11 @@ EXPORT_SYMBOL_GPL(set_cr4); void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + if (cr3 == vcpu->cr3 && !pdptrs_changed(vcpu)) { + kvm_mmu_flush_tlb(vcpu); + return; + } + if (is_long_mode(vcpu)) { if (cr3 & CR3_L_MODE_RESERVED_BITS) { printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); @@ -599,6 +636,27 @@ void decache_vcpus_on_cpu(int cpu) spin_unlock(&kvm_lock); } +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_HLT: + case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SET_TSS_ADDR: + case KVM_CAP_EXT_CPUID: + r = 1; + break; + default: + r = 0; + break; + } + return r; + +} + long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -648,15 +706,22 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); + kvm_put_guest_fpu(vcpu); } -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +static int is_efer_nx(void) { u64 efer; - int i; - struct kvm_cpuid_entry *e, *entry; rdmsrl(MSR_EFER, efer); + return efer & EFER_NX; +} + +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_cpuid_entry2 *e, *entry; + entry = NULL; for (i = 0; i < vcpu->cpuid_nent; ++i) { e = &vcpu->cpuid_entries[i]; @@ -665,15 +730,56 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { + if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { entry->edx &= ~(1 << 20); printk(KERN_INFO "kvm: guest NX capability removed\n"); } } +/* when an old userspace process fills a new kernel module */ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) +{ + int r, i; + struct kvm_cpuid_entry *cpuid_entries; + + r = -E2BIG; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + goto out; + r = -ENOMEM; + cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); + if (!cpuid_entries) + goto out; + r = -EFAULT; + if (copy_from_user(cpuid_entries, entries, + cpuid->nent * sizeof(struct kvm_cpuid_entry))) + goto out_free; + for (i = 0; i < cpuid->nent; i++) { + vcpu->cpuid_entries[i].function = cpuid_entries[i].function; + vcpu->cpuid_entries[i].eax = cpuid_entries[i].eax; + vcpu->cpuid_entries[i].ebx = cpuid_entries[i].ebx; + vcpu->cpuid_entries[i].ecx = cpuid_entries[i].ecx; + vcpu->cpuid_entries[i].edx = cpuid_entries[i].edx; + vcpu->cpuid_entries[i].index = 0; + vcpu->cpuid_entries[i].flags = 0; + vcpu->cpuid_entries[i].padding[0] = 0; + vcpu->cpuid_entries[i].padding[1] = 0; + vcpu->cpuid_entries[i].padding[2] = 0; + } + vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); + r = 0; + +out_free: + vfree(cpuid_entries); +out: + return r; +} + +static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -682,16 +788,198 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, goto out; r = -EFAULT; if (copy_from_user(&vcpu->cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry))) + cpuid->nent * sizeof(struct kvm_cpuid_entry2))) goto out; vcpu->cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); return 0; out: return r; } +static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) +{ + int r; + + r = -E2BIG; + if (cpuid->nent < vcpu->cpuid_nent) + goto out; + r = -EFAULT; + if (copy_to_user(entries, &vcpu->cpuid_entries, + vcpu->cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + goto out; + return 0; + +out: + cpuid->nent = vcpu->cpuid_nent; + return r; +} + +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + +static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index) +{ + entry->function = function; + entry->index = index; + cpuid_count(entry->function, entry->index, + &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); + entry->flags = 0; +} + +static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) +{ + const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | + bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | + bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | + bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | + bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | + bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | + bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | + bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | + bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | + bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); + const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) | + bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | + bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | + bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | + bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | + bit(X86_FEATURE_PGE) | + bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | + bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) | + bit(X86_FEATURE_SYSCALL) | + (bit(X86_FEATURE_NX) && is_efer_nx()) | +#ifdef CONFIG_X86_64 + bit(X86_FEATURE_LM) | +#endif + bit(X86_FEATURE_MMXEXT) | + bit(X86_FEATURE_3DNOWEXT) | + bit(X86_FEATURE_3DNOW); + const u32 kvm_supported_word3_x86_features = + bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); + const u32 kvm_supported_word6_x86_features = + bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); + + /* all func 2 cpuid_count() should be called on the same cpu */ + get_cpu(); + do_cpuid_1_ent(entry, function, index); + ++*nent; + + switch (function) { + case 0: + entry->eax = min(entry->eax, (u32)0xb); + break; + case 1: + entry->edx &= kvm_supported_word0_x86_features; + entry->ecx &= kvm_supported_word3_x86_features; + break; + /* function 2 entries are STATEFUL. That is, repeated cpuid commands + * may return different values. This forces us to get_cpu() before + * issuing the first command, and also to emulate this annoying behavior + * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ + case 2: { + int t, times = entry->eax & 0xff; + + entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + for (t = 1; t < times && *nent < maxnent; ++t) { + do_cpuid_1_ent(&entry[t], function, 0); + entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + ++*nent; + } + break; + } + /* function 4 and 0xb have additional index. */ + case 4: { + int index, cache_type; + + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + /* read more entries until cache_type is zero */ + for (index = 1; *nent < maxnent; ++index) { + cache_type = entry[index - 1].eax & 0x1f; + if (!cache_type) + break; + do_cpuid_1_ent(&entry[index], function, index); + entry[index].flags |= + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + ++*nent; + } + break; + } + case 0xb: { + int index, level_type; + + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + /* read more entries until level_type is zero */ + for (index = 1; *nent < maxnent; ++index) { + level_type = entry[index - 1].ecx & 0xff; + if (!level_type) + break; + do_cpuid_1_ent(&entry[index], function, index); + entry[index].flags |= + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + ++*nent; + } + break; + } + case 0x80000000: + entry->eax = min(entry->eax, 0x8000001a); + break; + case 0x80000001: + entry->edx &= kvm_supported_word1_x86_features; + entry->ecx &= kvm_supported_word6_x86_features; + break; + } + put_cpu(); +} + +static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm, + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) +{ + struct kvm_cpuid_entry2 *cpuid_entries; + int limit, nent = 0, r = -E2BIG; + u32 func; + + if (cpuid->nent < 1) + goto out; + r = -ENOMEM; + cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); + if (!cpuid_entries) + goto out; + + do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); + limit = cpuid_entries[0].eax; + for (func = 1; func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); + limit = cpuid_entries[nent - 1].eax; + for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + r = -EFAULT; + if (copy_to_user(entries, cpuid_entries, + nent * sizeof(struct kvm_cpuid_entry2))) + goto out_free; + cpuid->nent = nent; + r = 0; + +out_free: + vfree(cpuid_entries); +out: + return r; +} + static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { @@ -758,6 +1046,36 @@ long kvm_arch_vcpu_ioctl(struct file *filp, goto out; break; } + case KVM_SET_CPUID2: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, + cpuid_arg->entries); + if (r) + goto out; + break; + } + case KVM_GET_CPUID2: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, + cpuid_arg->entries); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } case KVM_GET_MSRS: r = msr_io(vcpu, argp, kvm_get_msr, 1); break; @@ -906,6 +1224,37 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) return r; } +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + int n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + mutex_lock(&kvm->lock); + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + kvm_mmu_slot_remove_write_access(kvm, log->slot); + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + mutex_unlock(&kvm->lock); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1022,6 +1371,24 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_SUPPORTED_CPUID: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid, + cpuid_arg->entries); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } default: ; } @@ -1029,7 +1396,7 @@ out: return r; } -static __init void kvm_init_msr_list(void) +static void kvm_init_msr_list(void) { u32 dummy[2]; unsigned i, j; @@ -1100,15 +1467,6 @@ int emulator_read_std(unsigned long addr, } EXPORT_SYMBOL_GPL(emulator_read_std); -static int emulator_write_std(unsigned long addr, - const void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu) -{ - pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes); - return X86EMUL_UNHANDLEABLE; -} - static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, @@ -1305,7 +1663,6 @@ EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); struct x86_emulate_ops emulate_ops = { .read_std = emulator_read_std, - .write_std = emulator_write_std, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, @@ -1360,7 +1717,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, get_segment_base(vcpu, VCPU_SREG_FS); r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops); + ++vcpu->stat.insn_emulation; if (r) { + ++vcpu->stat.insn_emulation_fail; if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) return EMULATE_DONE; return EMULATE_FAIL; @@ -1408,7 +1767,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i) if (vcpu->pio.guest_pages[i]) { - kvm_release_page(vcpu->pio.guest_pages[i]); + kvm_release_page_dirty(vcpu->pio.guest_pages[i]); vcpu->pio.guest_pages[i] = NULL; } } @@ -1645,9 +2004,48 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); -__init void kvm_arch_init(void) +int kvm_arch_init(void *opaque) { + int r; + struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; + + r = kvm_mmu_module_init(); + if (r) + goto out_fail; + kvm_init_msr_list(); + + if (kvm_x86_ops) { + printk(KERN_ERR "kvm: already loaded the other module\n"); + r = -EEXIST; + goto out; + } + + if (!ops->cpu_has_kvm_support()) { + printk(KERN_ERR "kvm: no hardware support\n"); + r = -EOPNOTSUPP; + goto out; + } + if (ops->disabled_by_bios()) { + printk(KERN_ERR "kvm: disabled by bios\n"); + r = -EOPNOTSUPP; + goto out; + } + + kvm_x86_ops = ops; + kvm_mmu_set_nonpresent_ptes(0ull, 0ull); + return 0; + +out: + kvm_mmu_module_exit(); +out_fail: + return r; +} + +void kvm_arch_exit(void) +{ + kvm_x86_ops = NULL; + kvm_mmu_module_exit(); } int kvm_emulate_halt(struct kvm_vcpu *vcpu) @@ -1788,14 +2186,47 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, } } +static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) +{ + struct kvm_cpuid_entry2 *e = &vcpu->cpuid_entries[i]; + int j, nent = vcpu->cpuid_nent; + + e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; + /* when no next entry is found, the current entry[i] is reselected */ + for (j = i + 1; j == i; j = (j + 1) % nent) { + struct kvm_cpuid_entry2 *ej = &vcpu->cpuid_entries[j]; + if (ej->function == e->function) { + ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; + return j; + } + } + return 0; /* silence gcc, even though control never reaches here */ +} + +/* find an entry with matching function, matching index (if needed), and that + * should be read next (if it's stateful) */ +static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, + u32 function, u32 index) +{ + if (e->function != function) + return 0; + if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) + return 0; + if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && + !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) + return 0; + return 1; +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { int i; - u32 function; - struct kvm_cpuid_entry *e, *best; + u32 function, index; + struct kvm_cpuid_entry2 *e, *best; kvm_x86_ops->cache_regs(vcpu); function = vcpu->regs[VCPU_REGS_RAX]; + index = vcpu->regs[VCPU_REGS_RCX]; vcpu->regs[VCPU_REGS_RAX] = 0; vcpu->regs[VCPU_REGS_RBX] = 0; vcpu->regs[VCPU_REGS_RCX] = 0; @@ -1803,7 +2234,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) best = NULL; for (i = 0; i < vcpu->cpuid_nent; ++i) { e = &vcpu->cpuid_entries[i]; - if (e->function == function) { + if (is_matching_cpuid_entry(e, function, index)) { + if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) + move_to_next_stateful_cpuid_entry(vcpu, i); best = e; break; } @@ -1943,10 +2376,8 @@ again: ++vcpu->stat.request_irq_exits; goto out; } - if (!need_resched()) { - ++vcpu->stat.light_exits; + if (!need_resched()) goto again; - } } out: @@ -2273,6 +2704,28 @@ struct fxsave { #endif }; +/* + * Translate a guest virtual address to a guest physical address. + */ +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + unsigned long vaddr = tr->linear_address; + gpa_t gpa; + + vcpu_load(vcpu); + mutex_lock(&vcpu->kvm->lock); + gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); + tr->physical_address = gpa; + tr->valid = gpa != UNMAPPED_GVA; + tr->writeable = 1; + tr->usermode = 0; + mutex_unlock(&vcpu->kvm->lock); + vcpu_put(vcpu); + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; @@ -2352,6 +2805,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fx_save(&vcpu->guest_fx_image); fx_restore(&vcpu->host_fx_image); + ++vcpu->stat.fpu_reload; } EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); @@ -2363,13 +2817,12 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - int r; - struct kvm_vcpu *vcpu = kvm_x86_ops->vcpu_create(kvm, id); + return kvm_x86_ops->vcpu_create(kvm, id); +} - if (IS_ERR(vcpu)) { - r = -ENOMEM; - goto fail; - } +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int r; /* We do fxsave: this must be aligned. */ BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF); @@ -2382,14 +2835,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, if (r < 0) goto free_vcpu; - return vcpu; + return 0; free_vcpu: kvm_x86_ops->vcpu_free(vcpu); -fail: - return ERR_PTR(r); + return r; } -void kvm_arch_vcpu_destory(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); kvm_mmu_unload(vcpu); @@ -2476,3 +2928,100 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); free_page((unsigned long)vcpu->pio_data); } + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + + if (!kvm) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&kvm->active_mmu_pages); + + return kvm; +} + +static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_mmu_unload(vcpu); + vcpu_put(vcpu); +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + + /* + * Unpin any mmu pages first. + */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) + if (kvm->vcpus[i]) + kvm_unload_vcpu_mmu(kvm->vcpus[i]); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kfree(kvm->vpic); + kfree(kvm->vioapic); + kvm_free_vcpus(kvm); + kvm_free_physmem(kvm); + kfree(kvm); +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + int npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; + + /*To keep backward compatibility with older userspace, + *x86 needs to hanlde !user_alloc case. + */ + if (!user_alloc) { + if (npages && !old.rmap) { + down_write(¤t->mm->mmap_sem); + memslot->userspace_addr = do_mmap(NULL, 0, + npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0); + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *)memslot->userspace_addr)) + return PTR_ERR((void *)memslot->userspace_addr); + } else { + if (!old.user_alloc && old.rmap) { + int ret; + + down_write(¤t->mm->mmap_sem); + ret = do_munmap(current->mm, old.userspace_addr, + old.npages * PAGE_SIZE); + up_write(¤t->mm->mmap_sem); + if (ret < 0) + printk(KERN_WARNING + "kvm_vm_ioctl_set_memory_region: " + "failed to munmap memory\n"); + } + } + } + + if (!kvm->n_requested_mmu_pages) { + unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); + kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + } + + kvm_mmu_slot_remove_write_access(kvm, mem->slot); + kvm_flush_remote_tlbs(kvm); + + return 0; +}