]> err.no Git - linux-2.6/blobdiff - drivers/kvm/svm.c
[POWERPC] spu sched: static timeslicing for SCHED_RR contexts
[linux-2.6] / drivers / kvm / svm.c
index 0e6bc8c649ce9ada43835a155be4c40fd439365d..83da4ea150a335c3543315a7adef7855b1f047ce 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/profile.h>
 #include <asm/desc.h>
 
 #include "kvm_svm.h"
@@ -166,11 +167,6 @@ static inline void write_dr7(unsigned long val)
        asm volatile ("mov %0, %%dr7" :: "r" (val));
 }
 
-static inline int svm_is_long_mode(struct kvm_vcpu *vcpu)
-{
-       return vcpu->svm->vmcb->save.efer & KVM_EFER_LMA;
-}
-
 static inline void force_new_asid(struct kvm_vcpu *vcpu)
 {
        vcpu->svm->asid_generation--;
@@ -240,13 +236,15 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
        vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip;
        vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+
+       vcpu->interrupt_window_open = 1;
 }
 
 static int has_svm(void)
 {
        uint32_t eax, ebx, ecx, edx;
 
-       if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
                printk(KERN_INFO "has_svm: not amd\n");
                return 0;
        }
@@ -276,7 +274,7 @@ static void svm_hardware_disable(void *garbage)
                wrmsrl(MSR_VM_HSAVE_PA, 0);
                rdmsrl(MSR_EFER, efer);
                wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
-               per_cpu(svm_data, raw_smp_processor_id()) = 0;
+               per_cpu(svm_data, raw_smp_processor_id()) = NULL;
                __free_page(svm_data->save_area);
                kfree(svm_data);
        }
@@ -402,11 +400,11 @@ static __init int svm_hardware_setup(void)
        set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1);
        set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1);
        set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1);
-       set_msr_interception(msrpm_va, MSR_STAR, 1, 1);
        set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
        set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
        set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
 #endif
+       set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1);
        set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
        set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
        set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
@@ -500,11 +498,11 @@ static void init_vmcb(struct vmcb *vmcb)
                /*              (1ULL << INTERCEPT_SELECTIVE_CR0) | */
                                (1ULL << INTERCEPT_CPUID) |
                                (1ULL << INTERCEPT_HLT) |
-                               (1ULL << INTERCEPT_INVLPG) |
                                (1ULL << INTERCEPT_INVLPGA) |
                                (1ULL << INTERCEPT_IOIO_PROT) |
                                (1ULL << INTERCEPT_MSR_PROT) |
                                (1ULL << INTERCEPT_TASK_SWITCH) |
+                               (1ULL << INTERCEPT_SHUTDOWN) |
                                (1ULL << INTERCEPT_VMRUN) |
                                (1ULL << INTERCEPT_VMMCALL) |
                                (1ULL << INTERCEPT_VMLOAD) |
@@ -530,7 +528,13 @@ static void init_vmcb(struct vmcb *vmcb)
        save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
                SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
        save->cs.limit = 0xffff;
-       save->cs.base = 0xffff0000;
+       /*
+        * cs.base should really be 0xffff0000, but vmx can't handle that, so
+        * be consistent with it.
+        *
+        * Replace when we have real mode working for vmx.
+        */
+       save->cs.base = 0xf0000;
 
        save->gdtr.limit = 0xffff;
        save->idtr.limit = 0xffff;
@@ -575,6 +579,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
        init_vmcb(vcpu->svm->vmcb);
 
+       fx_init(vcpu);
+
        return 0;
 
 out2:
@@ -603,6 +609,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
        put_cpu();
 }
 
+static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
+{
+}
+
 static void svm_cache_regs(struct kvm_vcpu *vcpu)
 {
        vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax;
@@ -642,7 +652,7 @@ static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
        case VCPU_SREG_LDTR: return &save->ldtr;
        }
        BUG();
-       return 0;
+       return NULL;
 }
 
 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
@@ -681,14 +691,14 @@ static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 
 static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-       dt->limit = vcpu->svm->vmcb->save.ldtr.limit;
-       dt->base = vcpu->svm->vmcb->save.ldtr.base;
+       dt->limit = vcpu->svm->vmcb->save.idtr.limit;
+       dt->base = vcpu->svm->vmcb->save.idtr.base;
 }
 
 static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-       vcpu->svm->vmcb->save.ldtr.limit = dt->limit;
-       vcpu->svm->vmcb->save.ldtr.base = dt->base ;
+       vcpu->svm->vmcb->save.idtr.limit = dt->limit;
+       vcpu->svm->vmcb->save.idtr.base = dt->base ;
 }
 
 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
@@ -703,6 +713,10 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
        vcpu->svm->vmcb->save.gdtr.base = dt->base ;
 }
 
+static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
+{
+}
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 #ifdef CONFIG_X86_64
@@ -719,7 +733,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        }
 #endif
        vcpu->svm->cr0 = cr0;
-       vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK;
+       vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK;
        vcpu->cr0 = cr0;
 }
 
@@ -850,6 +864,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        u64 fault_address;
        u32 error_code;
        enum emulation_result er;
+       int r;
 
        if (is_external_interrupt(exit_int_info))
                push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
@@ -858,7 +873,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        fault_address  = vcpu->svm->vmcb->control.exit_info_2;
        error_code = vcpu->svm->vmcb->control.exit_info_1;
-       if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) {
+       r = kvm_mmu_page_fault(vcpu, fault_address, error_code);
+       if (r < 0) {
+               spin_unlock(&vcpu->kvm->lock);
+               return r;
+       }
+       if (!r) {
                spin_unlock(&vcpu->kvm->lock);
                return 1;
        }
@@ -883,6 +903,19 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return 0;
 }
 
+static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       /*
+        * VMCB is undefined after a SHUTDOWN intercept
+        * so reinitialize it.
+        */
+       memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
+       init_vmcb(vcpu->svm->vmcb);
+
+       kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+       return 0;
+}
+
 static int io_get_override(struct kvm_vcpu *vcpu,
                          struct vmcb_seg **seg,
                          int *addr_override)
@@ -911,7 +944,7 @@ static int io_get_override(struct kvm_vcpu *vcpu,
                return 0;
 
        *addr_override = 0;
-       *seg = 0;
+       *seg = NULL;
        for (i = 0; i < ins_length; i++)
                switch (inst[i]) {
                case 0xf0:
@@ -1034,10 +1067,11 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
        skip_emulated_instruction(vcpu);
-       if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF))
+       if (vcpu->irq_summary)
                return 1;
 
        kvm_run->exit_reason = KVM_EXIT_HLT;
+       ++kvm_stat.halt_exits;
        return 0;
 }
 
@@ -1063,7 +1097,7 @@ static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       if (emulate_instruction(vcpu, 0, 0, 0) != EMULATE_DONE)
+       if (emulate_instruction(vcpu, NULL, 0, 0) != EMULATE_DONE)
                printk(KERN_ERR "%s: failed\n", __FUNCTION__);
        return 1;
 }
@@ -1071,20 +1105,6 @@ static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_ru
 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 {
        switch (ecx) {
-       case MSR_IA32_MC0_CTL:
-       case MSR_IA32_MCG_STATUS:
-       case MSR_IA32_MCG_CAP:
-       case MSR_IA32_MC0_MISC:
-       case MSR_IA32_MC0_MISC+4:
-       case MSR_IA32_MC0_MISC+8:
-       case MSR_IA32_MC0_MISC+12:
-       case MSR_IA32_MC0_MISC+16:
-       case MSR_IA32_UCODE_REV:
-               /* MTRR registers */
-       case 0xfe:
-       case 0x200 ... 0x2ff:
-               *data = 0;
-               break;
        case MSR_IA32_TIME_STAMP_COUNTER: {
                u64 tsc;
 
@@ -1092,16 +1112,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
                *data = vcpu->svm->vmcb->control.tsc_offset + tsc;
                break;
        }
-       case MSR_EFER:
-               *data = vcpu->shadow_efer;
-               break;
-       case MSR_IA32_APICBASE:
-               *data = vcpu->apic_base;
-               break;
-#ifdef CONFIG_X86_64
-       case MSR_STAR:
+       case MSR_K6_STAR:
                *data = vcpu->svm->vmcb->save.star;
                break;
+#ifdef CONFIG_X86_64
        case MSR_LSTAR:
                *data = vcpu->svm->vmcb->save.lstar;
                break;
@@ -1125,8 +1139,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
                *data = vcpu->svm->vmcb->save.sysenter_esp;
                break;
        default:
-               printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", ecx);
-               return 1;
+               return kvm_get_msr_common(vcpu, ecx, data);
        }
        return 0;
 }
@@ -1150,15 +1163,6 @@ static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 {
        switch (ecx) {
-#ifdef CONFIG_X86_64
-       case MSR_EFER:
-               set_efer(vcpu, data);
-               break;
-#endif
-       case MSR_IA32_MC0_STATUS:
-               printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
-                           , __FUNCTION__, data);
-               break;
        case MSR_IA32_TIME_STAMP_COUNTER: {
                u64 tsc;
 
@@ -1166,17 +1170,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                vcpu->svm->vmcb->control.tsc_offset = data - tsc;
                break;
        }
-       case MSR_IA32_UCODE_REV:
-       case MSR_IA32_UCODE_WRITE:
-       case 0x200 ... 0x2ff: /* MTRRs */
-               break;
-       case MSR_IA32_APICBASE:
-               vcpu->apic_base = data;
-               break;
-#ifdef CONFIG_X86_64_
-       case MSR_STAR:
+       case MSR_K6_STAR:
                vcpu->svm->vmcb->save.star = data;
                break;
+#ifdef CONFIG_X86_64
        case MSR_LSTAR:
                vcpu->svm->vmcb->save.lstar = data;
                break;
@@ -1200,8 +1197,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                vcpu->svm->vmcb->save.sysenter_esp = data;
                break;
        default:
-               printk(KERN_ERR "kvm: unhandled wrmsr: %x\n", ecx);
-               return 1;
+               return kvm_set_msr_common(vcpu, ecx, data);
        }
        return 0;
 }
@@ -1227,6 +1223,23 @@ static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                return rdmsr_interception(vcpu, kvm_run);
 }
 
+static int interrupt_window_interception(struct kvm_vcpu *vcpu,
+                                  struct kvm_run *kvm_run)
+{
+       /*
+        * If the user space waits to inject interrupts, exit as soon as
+        * possible
+        */
+       if (kvm_run->request_interrupt_window &&
+           !vcpu->irq_summary) {
+               ++kvm_stat.irq_window_exits;
+               kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+               return 0;
+       }
+
+       return 1;
+}
+
 static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
                                      struct kvm_run *kvm_run) = {
        [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
@@ -1251,6 +1264,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
        [SVM_EXIT_NMI]                          = nop_on_interception,
        [SVM_EXIT_SMI]                          = nop_on_interception,
        [SVM_EXIT_INIT]                         = nop_on_interception,
+       [SVM_EXIT_VINTR]                        = interrupt_window_interception,
        /* [SVM_EXIT_CR0_SEL_WRITE]             = emulate_on_interception, */
        [SVM_EXIT_CPUID]                        = cpuid_interception,
        [SVM_EXIT_HLT]                          = halt_interception,
@@ -1259,6 +1273,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
        [SVM_EXIT_IOIO]                         = io_interception,
        [SVM_EXIT_MSR]                          = msr_interception,
        [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
+       [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
        [SVM_EXIT_VMRUN]                        = invalid_op_interception,
        [SVM_EXIT_VMMCALL]                      = invalid_op_interception,
        [SVM_EXIT_VMLOAD]                       = invalid_op_interception,
@@ -1319,15 +1334,11 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
 }
 
 
-static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu)
+static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
        struct vmcb_control_area *control;
 
-       if (!vcpu->irq_summary)
-               return;
-
        control = &vcpu->svm->vmcb->control;
-
        control->int_vector = pop_irq(vcpu);
        control->int_ctl &= ~V_INTR_PRIO_MASK;
        control->int_ctl |= V_IRQ_MASK |
@@ -1342,6 +1353,59 @@ static void kvm_reput_irq(struct kvm_vcpu *vcpu)
                control->int_ctl &= ~V_IRQ_MASK;
                push_irq(vcpu, control->int_vector);
        }
+
+       vcpu->interrupt_window_open =
+               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+                                      struct kvm_run *kvm_run)
+{
+       struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+
+       vcpu->interrupt_window_open =
+               (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+                (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
+
+       if (vcpu->interrupt_window_open && vcpu->irq_summary)
+               /*
+                * If interrupts enabled, and not blocked by sti or mov ss. Good.
+                */
+               kvm_do_inject_irq(vcpu);
+
+       /*
+        * Interrupts blocked.  Wait for unblock.
+        */
+       if (!vcpu->interrupt_window_open &&
+           (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+               control->intercept |= 1ULL << INTERCEPT_VINTR;
+       } else
+               control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
+static void post_kvm_run_save(struct kvm_vcpu *vcpu,
+                             struct kvm_run *kvm_run)
+{
+       kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
+                                                 vcpu->irq_summary == 0);
+       kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
+       kvm_run->cr8 = vcpu->cr8;
+       kvm_run->apic_base = vcpu->apic_base;
+}
+
+/*
+ * Check if userspace requested an interrupt window, and that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
+                                         struct kvm_run *kvm_run)
+{
+       return (!vcpu->irq_summary &&
+               kvm_run->request_interrupt_window &&
+               vcpu->interrupt_window_open &&
+               (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 }
 
 static void save_db_regs(unsigned long *db_regs)
@@ -1365,9 +1429,11 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        u16 fs_selector;
        u16 gs_selector;
        u16 ldt_selector;
+       int r;
 
 again:
-       kvm_try_inject_irq(vcpu);
+       if (!vcpu->mmio_read_completed)
+               do_interrupt_requests(vcpu, kvm_run);
 
        clgi();
 
@@ -1387,6 +1453,10 @@ again:
                save_db_regs(vcpu->svm->host_db_regs);
                load_db_regs(vcpu->svm->db_regs);
        }
+
+       fx_save(vcpu->host_fx_image);
+       fx_restore(vcpu->guest_fx_image);
+
        asm volatile (
 #ifdef CONFIG_X86_64
                "push %%rbx; push %%rcx; push %%rdx;"
@@ -1496,6 +1566,9 @@ again:
 #endif
                : "cc", "memory" );
 
+       fx_save(vcpu->guest_fx_image);
+       fx_restore(vcpu->host_fx_image);
+
        if ((vcpu->svm->vmcb->save.dr7 & 0xff))
                load_db_regs(vcpu->svm->host_db_regs);
 
@@ -1512,6 +1585,13 @@ again:
 
        reload_tss(vcpu);
 
+       /*
+        * Profile KVM exit RIPs:
+        */
+       if (unlikely(prof_on == KVM_PROFILING))
+               profile_hit(KVM_PROFILING,
+                       (void *)(unsigned long)vcpu->svm->vmcb->save.rip);
+
        stgi();
 
        kvm_reput_irq(vcpu);
@@ -1521,18 +1601,28 @@ again:
        if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
                kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
                kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code;
+               post_kvm_run_save(vcpu, kvm_run);
                return 0;
        }
 
-       if (handle_exit(vcpu, kvm_run)) {
+       r = handle_exit(vcpu, kvm_run);
+       if (r > 0) {
                if (signal_pending(current)) {
                        ++kvm_stat.signal_exits;
+                       post_kvm_run_save(vcpu, kvm_run);
+                       return -EINTR;
+               }
+
+               if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+                       ++kvm_stat.request_irq_exits;
+                       post_kvm_run_save(vcpu, kvm_run);
                        return -EINTR;
                }
                kvm_resched(vcpu);
                goto again;
        }
-       return 0;
+       post_kvm_run_save(vcpu, kvm_run);
+       return r;
 }
 
 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
@@ -1591,6 +1681,7 @@ static struct kvm_arch_ops svm_arch_ops = {
 
        .vcpu_load = svm_vcpu_load,
        .vcpu_put = svm_vcpu_put,
+       .vcpu_decache = svm_vcpu_decache,
 
        .set_guest_debug = svm_guest_debug,
        .get_msr = svm_get_msr,
@@ -1598,8 +1689,8 @@ static struct kvm_arch_ops svm_arch_ops = {
        .get_segment_base = svm_get_segment_base,
        .get_segment = svm_get_segment,
        .set_segment = svm_set_segment,
-       .is_long_mode = svm_is_long_mode,
        .get_cs_db_l_bits = svm_get_cs_db_l_bits,
+       .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits,
        .set_cr0 = svm_set_cr0,
        .set_cr0_no_modeswitch = svm_set_cr0,
        .set_cr3 = svm_set_cr3,