X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fkvm%2Fkvm_main.c;h=7aeaaba79c548fffaca68b6800faf3b0e187a529;hb=11ec2804711896546ee3c945f3786c7f9fdd175a;hp=cd0557954e50ef3614c03c334b38f06104ca0551;hpb=6ae26fa468533c86aaa6936fd366142fcf01386f;p=linux-2.6 diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index cd0557954e..7aeaaba79c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -54,7 +54,7 @@ static cpumask_t cpus_hardware_enabled; struct kvm_arch_ops *kvm_arch_ops; -static void hardware_disable(void *ignored); +static __read_mostly struct preempt_ops kvm_preempt_ops; #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) @@ -84,10 +84,17 @@ static struct dentry *debugfs_dir; #define MAX_IO_MSRS 256 -#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL -#define LMSW_GUEST_MASK 0x0eULL -#define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) -#define CR8_RESEVED_BITS (~0x0fULL) +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) #define EFER_RESERVED_BITS 0xfffffffffffff2fe #ifdef CONFIG_X86_64 @@ -234,13 +241,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); */ static void vcpu_load(struct kvm_vcpu *vcpu) { + int cpu; + mutex_lock(&vcpu->mutex); - kvm_arch_ops->vcpu_load(vcpu); + cpu = get_cpu(); + preempt_notifier_register(&vcpu->preempt_notifier); + kvm_arch_ops->vcpu_load(vcpu, cpu); + put_cpu(); } static void vcpu_put(struct kvm_vcpu *vcpu) { + preempt_disable(); kvm_arch_ops->vcpu_put(vcpu); + preempt_notifier_unregister(&vcpu->preempt_notifier); + preempt_enable(); mutex_unlock(&vcpu->mutex); } @@ -261,8 +276,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) atomic_set(&completed, 0); cpus_clear(cpus); needed = 0; - for (i = 0; i < kvm->nvcpus; ++i) { - vcpu = &kvm->vcpus[i]; + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; @@ -286,26 +303,69 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) } } +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) +{ + struct page *page; + int r; + + mutex_init(&vcpu->mutex); + vcpu->cpu = -1; + vcpu->mmu.root_hpa = INVALID_PAGE; + vcpu->kvm = kvm; + vcpu->vcpu_id = id; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto fail; + } + vcpu->run = page_address(page); + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto fail_free_run; + } + vcpu->pio_data = page_address(page); + + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, + FX_IMAGE_ALIGN); + vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; + + r = kvm_mmu_create(vcpu); + if (r < 0) + goto fail_free_pio_data; + + return 0; + +fail_free_pio_data: + free_page((unsigned long)vcpu->pio_data); +fail_free_run: + free_page((unsigned long)vcpu->run); +fail: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_init); + +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mmu_destroy(vcpu); + free_page((unsigned long)vcpu->pio_data); + free_page((unsigned long)vcpu->run); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); - int i; if (!kvm) return ERR_PTR(-ENOMEM); kvm_io_bus_init(&kvm->pio_bus); - spin_lock_init(&kvm->lock); + mutex_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); kvm_io_bus_init(&kvm->mmio_bus); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu = &kvm->vcpus[i]; - - mutex_init(&vcpu->mutex); - vcpu->cpu = -1; - vcpu->kvm = kvm; - vcpu->mmu.root_hpa = INVALID_PAGE; - } spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); @@ -362,30 +422,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) - return; - vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } -static void kvm_free_vcpu(struct kvm_vcpu *vcpu) -{ - if (!vcpu->vmcs) - return; - - vcpu_load(vcpu); - kvm_mmu_destroy(vcpu); - vcpu_put(vcpu); - kvm_arch_ops->vcpu_free(vcpu); - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; - free_page((unsigned long)vcpu->pio_data); - vcpu->pio_data = NULL; - free_pio_guest_pages(vcpu); -} - static void kvm_free_vcpus(struct kvm *kvm) { unsigned int i; @@ -394,9 +435,15 @@ static void kvm_free_vcpus(struct kvm *kvm) * Unpin any mmu pages first. */ for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_unload_vcpu_mmu(&kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_free_vcpu(&kvm->vcpus[i]); + if (kvm->vcpus[i]) + kvm_unload_vcpu_mmu(kvm->vcpus[i]); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_ops->vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + } static int kvm_dev_release(struct inode *inode, struct file *filp) @@ -437,58 +484,60 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; int i; - u64 pdpte; u64 *pdpt; int ret; struct page *page; + u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); page = gfn_to_page(vcpu->kvm, pdpt_gfn); - /* FIXME: !page - emulate? 0xff? */ + if (!page) { + ret = 0; + goto out; + } + pdpt = kmap_atomic(page, KM_USER0); + memcpy(pdpte, pdpt+offset, sizeof(pdpte)); + kunmap_atomic(pdpt, KM_USER0); - ret = 1; - for (i = 0; i < 4; ++i) { - pdpte = pdpt[offset + i]; - if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) { + for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { + if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { ret = 0; goto out; } } + ret = 1; - for (i = 0; i < 4; ++i) - vcpu->pdptrs[i] = pdpt[offset + i]; - + memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); out: - kunmap_atomic(pdpt, KM_USER0); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return ret; } void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { - if (cr0 & CR0_RESEVED_BITS) { + if (cr0 & CR0_RESERVED_BITS) { printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", cr0, vcpu->cr0); inject_gp(vcpu); return; } - if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { + if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); inject_gp(vcpu); return; } - if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { + if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { printk(KERN_DEBUG "set_cr0: #GP, set PG flag " "and a clear PE flag\n"); inject_gp(vcpu); return; } - if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { + if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { #ifdef CONFIG_X86_64 if ((vcpu->shadow_efer & EFER_LME)) { int cs_db, cs_l; @@ -521,9 +570,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_arch_ops->set_cr0(vcpu, cr0); vcpu->cr0 = cr0; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return; } EXPORT_SYMBOL_GPL(set_cr0); @@ -536,62 +585,72 @@ EXPORT_SYMBOL_GPL(lmsw); void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - if (cr4 & CR4_RESEVED_BITS) { + if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); inject_gp(vcpu); return; } if (is_long_mode(vcpu)) { - if (!(cr4 & CR4_PAE_MASK)) { + if (!(cr4 & X86_CR4_PAE)) { printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " "in long mode\n"); inject_gp(vcpu); return; } - } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) + } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) && !load_pdptrs(vcpu, vcpu->cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); inject_gp(vcpu); + return; } - if (cr4 & CR4_VMXE_MASK) { + if (cr4 & X86_CR4_VMXE) { printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); inject_gp(vcpu); return; } kvm_arch_ops->set_cr4(vcpu, cr4); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr4); void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (is_long_mode(vcpu)) { - if (cr3 & CR3_L_MODE_RESEVED_BITS) { + if (cr3 & CR3_L_MODE_RESERVED_BITS) { printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); inject_gp(vcpu); return; } } else { - if (cr3 & CR3_RESEVED_BITS) { - printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); - inject_gp(vcpu); - return; - } - if (is_paging(vcpu) && is_pae(vcpu) && - !load_pdptrs(vcpu, cr3)) { - printk(KERN_DEBUG "set_cr3: #GP, pdptrs " - "reserved bits\n"); - inject_gp(vcpu); - return; + if (is_pae(vcpu)) { + if (cr3 & CR3_PAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); + inject_gp(vcpu); + return; + } + if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { + printk(KERN_DEBUG "set_cr3: #GP, pdptrs " + "reserved bits\n"); + inject_gp(vcpu); + return; + } + } else { + if (cr3 & CR3_NONPAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); + inject_gp(vcpu); + return; + } } } vcpu->cr3 = cr3; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -605,13 +664,13 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) inject_gp(vcpu); else vcpu->mmu.new_cr3(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr3); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { - if ( cr8 & CR8_RESEVED_BITS) { + if (cr8 & CR8_RESERVED_BITS) { printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); inject_gp(vcpu); return; @@ -682,7 +741,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; raced: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); memory_config_version = kvm->memory_config_version; new = old = *memslot; @@ -711,7 +770,7 @@ raced: * Do memory allocations outside lock. memory_config_version will * detect any races. */ - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); /* Deallocate if slot is being removed */ if (!npages) @@ -750,10 +809,10 @@ raced: memset(new.dirty_bitmap, 0, dirty_bytes); } - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); if (memory_config_version != kvm->memory_config_version) { - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); kvm_free_physmem_slot(&new, &old); goto raced; } @@ -771,13 +830,13 @@ raced: kvm_mmu_slot_remove_write_access(kvm, mem->slot); kvm_flush_remote_tlbs(kvm); - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); kvm_free_physmem_slot(&old, &new); return 0; out_unlock: - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); out_free: kvm_free_physmem_slot(&new, &old); out: @@ -795,14 +854,14 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, int n; unsigned long any = 0; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); /* * Prevent changes to guest memory configuration even while the lock * is not taken. */ ++kvm->busy; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) goto out; @@ -821,18 +880,18 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); kvm_flush_remote_tlbs(kvm); memset(memslot->dirty_bitmap, 0, n); - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); r = 0; out: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); --kvm->busy; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); return r; } @@ -862,7 +921,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, < alias->target_phys_addr) goto out; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); p = &kvm->aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -876,7 +935,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, kvm_mmu_zap_all(kvm); - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); return 0; @@ -1061,7 +1120,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { struct page *page; void *virt; - unsigned offset = offset_in_page(gpa); if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) return 0; @@ -1070,7 +1128,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, return 0; mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); - kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes); memcpy(virt + offset_in_page(gpa), val, bytes); kunmap_atomic(virt, KM_USER0); return 1; @@ -1160,7 +1218,7 @@ int emulate_clts(struct kvm_vcpu *vcpu) { unsigned long cr0; - cr0 = vcpu->cr0 & ~CR0_TS_MASK; + cr0 = vcpu->cr0 & ~X86_CR0_TS; kvm_arch_ops->set_cr0(vcpu, cr0); return X86EMUL_CONTINUE; } @@ -1262,6 +1320,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); if ((r || vcpu->mmio_is_write) && run) { + run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = vcpu->mmio_phys_addr; memcpy(run->mmio.data, vcpu->mmio_data, 8); run->mmio.len = vcpu->mmio_size; @@ -1329,6 +1388,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) } switch (nr) { default: + run->hypercall.nr = nr; run->hypercall.args[0] = a0; run->hypercall.args[1] = a1; run->hypercall.args[2] = a2; @@ -1439,7 +1499,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); - para_state = kmap_atomic(para_state_page, KM_USER0); + para_state = kmap(para_state_page); printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); printk(KERN_DEBUG ".... size: %d\n", para_state->size); @@ -1475,7 +1535,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) para_state->ret = 0; err_kunmap_skip: - kunmap_atomic(para_state, KM_USER0); + kunmap(para_state_page); return 0; err_gp: return 1; @@ -1622,30 +1682,10 @@ void kvm_resched(struct kvm_vcpu *vcpu) { if (!need_resched()) return; - vcpu_put(vcpu); cond_resched(); - vcpu_load(vcpu); } EXPORT_SYMBOL_GPL(kvm_resched); -void load_msrs(struct vmx_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - wrmsrl(e[i].index, e[i].data); -} -EXPORT_SYMBOL_GPL(load_msrs); - -void save_msrs(struct vmx_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - rdmsrl(e[i].index, e[i].data); -} -EXPORT_SYMBOL_GPL(save_msrs); - void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { int i; @@ -1690,11 +1730,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) unsigned bytes; int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; - kvm_arch_ops->vcpu_put(vcpu); q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, PAGE_KERNEL); if (!q) { - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return -ENOMEM; } @@ -1706,7 +1744,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) memcpy(p, q, bytes); q -= vcpu->pio.guest_page_offset; vunmap(q); - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return 0; } @@ -1760,18 +1797,35 @@ static int complete_pio(struct kvm_vcpu *vcpu) return 0; } -void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) +static void kernel_pio(struct kvm_io_device *pio_dev, + struct kvm_vcpu *vcpu, + void *pd) { /* TODO: String I/O for in kernel device */ if (vcpu->pio.in) kvm_iodevice_read(pio_dev, vcpu->pio.port, vcpu->pio.size, - vcpu->pio_data); + pd); else kvm_iodevice_write(pio_dev, vcpu->pio.port, vcpu->pio.size, - vcpu->pio_data); + pd); +} + +static void pio_string_write(struct kvm_io_device *pio_dev, + struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; + void *pd = vcpu->pio_data; + int i; + + for (i = 0; i < io->cur_count; i++) { + kvm_iodevice_write(pio_dev, io->port, + io->size, + pd); + pd += io->size; + } } int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, @@ -1779,7 +1833,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, gva_t address, int rep, unsigned port) { unsigned now, in_page; - int i; + int i, ret = 0; int nr_pages = 1; struct page *page; struct kvm_io_device *pio_dev; @@ -1806,15 +1860,12 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); kvm_arch_ops->decache_regs(vcpu); if (pio_dev) { - kernel_pio(pio_dev, vcpu); + kernel_pio(pio_dev, vcpu, vcpu->pio_data); complete_pio(vcpu); return 1; } return 0; } - /* TODO: String I/O for in kernel device */ - if (pio_dev) - printk(KERN_ERR "kvm_setup_pio: no string io support\n"); if (!count) { kvm_arch_ops->skip_emulated_instruction(vcpu); @@ -1849,12 +1900,12 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.cur_count = now; for (i = 0; i < nr_pages; ++i) { - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); if (page) get_page(page); vcpu->pio.guest_pages[i] = page; - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); if (!page) { inject_gp(vcpu); free_pio_guest_pages(vcpu); @@ -1862,9 +1913,21 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } - if (!vcpu->pio.in) - return pio_copy_data(vcpu); - return 0; + if (!vcpu->pio.in) { + /* string PIO write */ + ret = pio_copy_data(vcpu); + if (ret >= 0 && pio_dev) { + pio_string_write(pio_dev, vcpu); + complete_pio(vcpu); + if (vcpu->pio.count == 0) + ret = 1; + } + } else if (pio_dev) + printk(KERN_ERR "no string pio read support yet, " + "port %x size %d count %ld\n", + port, size, count); + + return ret; } EXPORT_SYMBOL_GPL(kvm_setup_pio); @@ -1897,7 +1960,6 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* * Read-modify-write. Back to userspace. */ - kvm_run->exit_reason = KVM_EXIT_MMIO; r = 0; goto out; } @@ -2090,7 +2152,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, sizeof vcpu->irq_pending); vcpu->irq_summary = 0; - for (i = 0; i < NR_IRQ_WORDS; ++i) + for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) if (vcpu->irq_pending[i]) __set_bit(i, &vcpu->irq_summary); @@ -2236,13 +2298,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; vcpu_load(vcpu); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; tr->usermode = 0; - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); vcpu_put(vcpu); return 0; @@ -2285,7 +2347,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, unsigned long pgoff; struct page *page; - *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (pgoff == 0) page = virt_to_page(vcpu->run); @@ -2294,6 +2355,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, else return NOPAGE_SIGBUS; get_page(page); + if (type != NULL) + *type = VM_FAULT_MINOR; + return page; } @@ -2346,74 +2410,49 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) { int r; struct kvm_vcpu *vcpu; - struct page *page; - r = -EINVAL; if (!valid_vcpu(n)) - goto out; - - vcpu = &kvm->vcpus[n]; - - mutex_lock(&vcpu->mutex); - - if (vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return -EEXIST; - } - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - r = -ENOMEM; - if (!page) - goto out_unlock; - vcpu->run = page_address(page); - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - r = -ENOMEM; - if (!page) - goto out_free_run; - vcpu->pio_data = page_address(page); + return -EINVAL; - vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, - FX_IMAGE_ALIGN); - vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; - vcpu->cr0 = 0x10; + vcpu = kvm_arch_ops->vcpu_create(kvm, n); + if (IS_ERR(vcpu)) + return PTR_ERR(vcpu); - r = kvm_arch_ops->vcpu_create(vcpu); - if (r < 0) - goto out_free_vcpus; + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); - r = kvm_mmu_create(vcpu); - if (r < 0) - goto out_free_vcpus; - - kvm_arch_ops->vcpu_load(vcpu); + vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); - if (r >= 0) - r = kvm_arch_ops->vcpu_setup(vcpu); vcpu_put(vcpu); - if (r < 0) - goto out_free_vcpus; + goto free_vcpu; + + mutex_lock(&kvm->lock); + if (kvm->vcpus[n]) { + r = -EEXIST; + mutex_unlock(&kvm->lock); + goto mmu_unload; + } + kvm->vcpus[n] = vcpu; + mutex_unlock(&kvm->lock); + /* Now it's all set up, let userspace reach it */ r = create_vcpu_fd(vcpu); if (r < 0) - goto out_free_vcpus; + goto unlink; + return r; - spin_lock(&kvm_lock); - if (n >= kvm->nvcpus) - kvm->nvcpus = n + 1; - spin_unlock(&kvm_lock); +unlink: + mutex_lock(&kvm->lock); + kvm->vcpus[n] = NULL; + mutex_unlock(&kvm->lock); - return r; +mmu_unload: + vcpu_load(vcpu); + kvm_mmu_unload(vcpu); + vcpu_put(vcpu); -out_free_vcpus: - kvm_free_vcpu(vcpu); -out_free_run: - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; -out_unlock: - mutex_unlock(&vcpu->mutex); -out: +free_vcpu: + kvm_arch_ops->vcpu_free(vcpu); return r; } @@ -2768,12 +2807,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, unsigned long pgoff; struct page *page; - *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; page = gfn_to_page(kvm, pgoff); if (!page) return NOPAGE_SIGBUS; get_page(page); + if (type != NULL) + *type = VM_FAULT_MINOR; + return page; } @@ -2893,25 +2934,6 @@ static struct miscdevice kvm_dev = { &kvm_chardev_ops, }; -static int kvm_reboot(struct notifier_block *notifier, unsigned long val, - void *v) -{ - if (val == SYS_RESTART) { - /* - * Some (well, at least mine) BIOSes hang on reboot if - * in vmx root mode. - */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(hardware_disable, NULL, 0, 1); - } - return NOTIFY_OK; -} - -static struct notifier_block kvm_reboot_notifier = { - .notifier_call = kvm_reboot, - .priority = 0, -}; - /* * Make sure that a cpu that is being hot-unplugged does not have any vcpus * cached on it. @@ -2925,7 +2947,9 @@ static void decache_vcpus_on_cpu(int cpu) spin_lock(&kvm_lock); list_for_each_entry(vm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &vm->vcpus[i]; + vcpu = vm->vcpus[i]; + if (!vcpu) + continue; /* * If the vcpu is locked, then it is running on some * other cpu and therefore it is not cached on the @@ -2994,6 +3018,25 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } +static int kvm_reboot(struct notifier_block *notifier, unsigned long val, + void *v) +{ + if (val == SYS_RESTART) { + /* + * Some (well, at least mine) BIOSes hang on reboot if + * in vmx root mode. + */ + printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + on_each_cpu(hardware_disable, NULL, 0, 1); + } + return NOTIFY_OK; +} + +static struct notifier_block kvm_reboot_notifier = { + .notifier_call = kvm_reboot, + .priority = 0, +}; + void kvm_io_bus_init(struct kvm_io_bus *bus) { memset(bus, 0, sizeof(*bus)); @@ -3047,8 +3090,9 @@ static u64 stat_get(void *_offset) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &kvm->vcpus[i]; - total += *(u32 *)((void *)vcpu + offset); + vcpu = kvm->vcpus[i]; + if (vcpu) + total += *(u32 *)((void *)vcpu + offset); } spin_unlock(&kvm_lock); return total; @@ -3105,6 +3149,27 @@ static struct sys_device kvm_sysdev = { hpa_t bad_page_address; +static inline +struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) +{ + return container_of(pn, struct kvm_vcpu, preempt_notifier); +} + +static void kvm_sched_in(struct preempt_notifier *pn, int cpu) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_load(vcpu, cpu); +} + +static void kvm_sched_out(struct preempt_notifier *pn, + struct task_struct *next) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_put(vcpu); +} + int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) { int r; @@ -3151,6 +3216,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) goto out_free; } + kvm_preempt_ops.sched_in = kvm_sched_in; + kvm_preempt_ops.sched_out = kvm_sched_out; + return r; out_free: