X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fkvm%2Fkvm_main.c;h=7aeaaba79c548fffaca68b6800faf3b0e187a529;hb=11ec2804711896546ee3c945f3786c7f9fdd175a;hp=e157e282fcffd1159c4a63a02a96f9b4b2b6982c;hpb=2eeb2e94eb6232f0895da696c10e6636093ff72b;p=linux-2.6 diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index e157e282fc..7aeaaba79c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -16,36 +16,33 @@ */ #include "kvm.h" +#include "x86_emulate.h" +#include "segment_descriptor.h" #include #include #include -#include -#include #include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include #include -#include -#include -#include #include #include #include +#include -#include "x86_emulate.h" -#include "segment_descriptor.h" +#include +#include +#include +#include +#include MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -53,8 +50,12 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(kvm_lock); static LIST_HEAD(vm_list); +static cpumask_t cpus_hardware_enabled; + struct kvm_arch_ops *kvm_arch_ops; +static __read_mostly struct preempt_ops kvm_preempt_ops; + #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { @@ -81,14 +82,19 @@ static struct kvm_stats_debugfs_item { static struct dentry *debugfs_dir; -struct vfsmount *kvmfs_mnt; - #define MAX_IO_MSRS 256 -#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL -#define LMSW_GUEST_MASK 0x0eULL -#define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) -#define CR8_RESEVED_BITS (~0x0fULL) +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) #define EFER_RESERVED_BITS 0xfffffffffffff2fe #ifdef CONFIG_X86_64 @@ -104,55 +110,6 @@ struct segment_descriptor_64 { static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); -static struct inode *kvmfs_inode(struct file_operations *fops) -{ - int error = -ENOMEM; - struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); - - if (!inode) - goto eexit_1; - - inode->i_fop = fops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; - -eexit_1: - return ERR_PTR(error); -} - -static struct file *kvmfs_file(struct inode *inode, void *private_data) -{ - struct file *file = get_empty_filp(); - - if (!file) - return ERR_PTR(-ENFILE); - - file->f_path.mnt = mntget(kvmfs_mnt); - file->f_path.dentry = d_alloc_anon(inode); - if (!file->f_path.dentry) - return ERR_PTR(-ENOMEM); - file->f_mapping = inode->i_mapping; - - file->f_pos = 0; - file->f_flags = O_RDWR; - file->f_op = inode->i_fop; - file->f_mode = FMODE_READ | FMODE_WRITE; - file->f_version = 0; - file->private_data = private_data; - return file; -} - unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -284,30 +241,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); */ static void vcpu_load(struct kvm_vcpu *vcpu) { - mutex_lock(&vcpu->mutex); - kvm_arch_ops->vcpu_load(vcpu); -} - -/* - * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL - * if the slot is not populated. - */ -static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot) -{ - struct kvm_vcpu *vcpu = &kvm->vcpus[slot]; + int cpu; mutex_lock(&vcpu->mutex); - if (!vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return NULL; - } - kvm_arch_ops->vcpu_load(vcpu); - return vcpu; + cpu = get_cpu(); + preempt_notifier_register(&vcpu->preempt_notifier); + kvm_arch_ops->vcpu_load(vcpu, cpu); + put_cpu(); } static void vcpu_put(struct kvm_vcpu *vcpu) { + preempt_disable(); kvm_arch_ops->vcpu_put(vcpu); + preempt_notifier_unregister(&vcpu->preempt_notifier); + preempt_enable(); mutex_unlock(&vcpu->mutex); } @@ -328,8 +276,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) atomic_set(&completed, 0); cpus_clear(cpus); needed = 0; - for (i = 0; i < kvm->nvcpus; ++i) { - vcpu = &kvm->vcpus[i]; + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; @@ -353,28 +303,72 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) } } +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) +{ + struct page *page; + int r; + + mutex_init(&vcpu->mutex); + vcpu->cpu = -1; + vcpu->mmu.root_hpa = INVALID_PAGE; + vcpu->kvm = kvm; + vcpu->vcpu_id = id; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto fail; + } + vcpu->run = page_address(page); + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto fail_free_run; + } + vcpu->pio_data = page_address(page); + + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, + FX_IMAGE_ALIGN); + vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; + + r = kvm_mmu_create(vcpu); + if (r < 0) + goto fail_free_pio_data; + + return 0; + +fail_free_pio_data: + free_page((unsigned long)vcpu->pio_data); +fail_free_run: + free_page((unsigned long)vcpu->run); +fail: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_init); + +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mmu_destroy(vcpu); + free_page((unsigned long)vcpu->pio_data); + free_page((unsigned long)vcpu->run); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); - int i; if (!kvm) return ERR_PTR(-ENOMEM); - spin_lock_init(&kvm->lock); + kvm_io_bus_init(&kvm->pio_bus); + mutex_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); + kvm_io_bus_init(&kvm->mmio_bus); spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); - kvm_io_bus_init(&kvm->mmio_bus); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu = &kvm->vcpus[i]; - - mutex_init(&vcpu->mutex); - vcpu->cpu = -1; - vcpu->kvm = kvm; - vcpu->mmu.root_hpa = INVALID_PAGE; - } return kvm; } @@ -428,30 +422,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) - return; - vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } -static void kvm_free_vcpu(struct kvm_vcpu *vcpu) -{ - if (!vcpu->vmcs) - return; - - vcpu_load(vcpu); - kvm_mmu_destroy(vcpu); - vcpu_put(vcpu); - kvm_arch_ops->vcpu_free(vcpu); - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; - free_page((unsigned long)vcpu->pio_data); - vcpu->pio_data = NULL; - free_pio_guest_pages(vcpu); -} - static void kvm_free_vcpus(struct kvm *kvm) { unsigned int i; @@ -460,9 +435,15 @@ static void kvm_free_vcpus(struct kvm *kvm) * Unpin any mmu pages first. */ for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_unload_vcpu_mmu(&kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_free_vcpu(&kvm->vcpus[i]); + if (kvm->vcpus[i]) + kvm_unload_vcpu_mmu(kvm->vcpus[i]); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_ops->vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + } static int kvm_dev_release(struct inode *inode, struct file *filp) @@ -475,6 +456,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); @@ -502,58 +484,60 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; int i; - u64 pdpte; u64 *pdpt; int ret; struct page *page; + u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); page = gfn_to_page(vcpu->kvm, pdpt_gfn); - /* FIXME: !page - emulate? 0xff? */ + if (!page) { + ret = 0; + goto out; + } + pdpt = kmap_atomic(page, KM_USER0); + memcpy(pdpte, pdpt+offset, sizeof(pdpte)); + kunmap_atomic(pdpt, KM_USER0); - ret = 1; - for (i = 0; i < 4; ++i) { - pdpte = pdpt[offset + i]; - if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) { + for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { + if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { ret = 0; goto out; } } + ret = 1; - for (i = 0; i < 4; ++i) - vcpu->pdptrs[i] = pdpt[offset + i]; - + memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); out: - kunmap_atomic(pdpt, KM_USER0); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return ret; } void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { - if (cr0 & CR0_RESEVED_BITS) { + if (cr0 & CR0_RESERVED_BITS) { printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", cr0, vcpu->cr0); inject_gp(vcpu); return; } - if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { + if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); inject_gp(vcpu); return; } - if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { + if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { printk(KERN_DEBUG "set_cr0: #GP, set PG flag " "and a clear PE flag\n"); inject_gp(vcpu); return; } - if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { + if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { #ifdef CONFIG_X86_64 if ((vcpu->shadow_efer & EFER_LME)) { int cs_db, cs_l; @@ -586,9 +570,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_arch_ops->set_cr0(vcpu, cr0); vcpu->cr0 = cr0; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return; } EXPORT_SYMBOL_GPL(set_cr0); @@ -601,62 +585,72 @@ EXPORT_SYMBOL_GPL(lmsw); void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - if (cr4 & CR4_RESEVED_BITS) { + if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); inject_gp(vcpu); return; } if (is_long_mode(vcpu)) { - if (!(cr4 & CR4_PAE_MASK)) { + if (!(cr4 & X86_CR4_PAE)) { printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " "in long mode\n"); inject_gp(vcpu); return; } - } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) + } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) && !load_pdptrs(vcpu, vcpu->cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); inject_gp(vcpu); + return; } - if (cr4 & CR4_VMXE_MASK) { + if (cr4 & X86_CR4_VMXE) { printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); inject_gp(vcpu); return; } kvm_arch_ops->set_cr4(vcpu, cr4); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr4); void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (is_long_mode(vcpu)) { - if (cr3 & CR3_L_MODE_RESEVED_BITS) { + if (cr3 & CR3_L_MODE_RESERVED_BITS) { printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); inject_gp(vcpu); return; } } else { - if (cr3 & CR3_RESEVED_BITS) { - printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); - inject_gp(vcpu); - return; - } - if (is_paging(vcpu) && is_pae(vcpu) && - !load_pdptrs(vcpu, cr3)) { - printk(KERN_DEBUG "set_cr3: #GP, pdptrs " - "reserved bits\n"); - inject_gp(vcpu); - return; + if (is_pae(vcpu)) { + if (cr3 & CR3_PAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); + inject_gp(vcpu); + return; + } + if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { + printk(KERN_DEBUG "set_cr3: #GP, pdptrs " + "reserved bits\n"); + inject_gp(vcpu); + return; + } + } else { + if (cr3 & CR3_NONPAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); + inject_gp(vcpu); + return; + } } } vcpu->cr3 = cr3; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -670,13 +664,13 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) inject_gp(vcpu); else vcpu->mmu.new_cr3(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr3); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { - if ( cr8 & CR8_RESEVED_BITS) { + if (cr8 & CR8_RESERVED_BITS) { printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); inject_gp(vcpu); return; @@ -711,13 +705,6 @@ void fx_init(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(fx_init); -static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) -{ - spin_lock(&vcpu->kvm->lock); - kvm_mmu_slot_remove_write_access(vcpu, slot); - spin_unlock(&vcpu->kvm->lock); -} - /* * Allocate some memory and give it an address in the guest physical address * space. @@ -754,7 +741,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; raced: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); memory_config_version = kvm->memory_config_version; new = old = *memslot; @@ -783,7 +770,7 @@ raced: * Do memory allocations outside lock. memory_config_version will * detect any races. */ - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); /* Deallocate if slot is being removed */ if (!npages) @@ -822,10 +809,10 @@ raced: memset(new.dirty_bitmap, 0, dirty_bytes); } - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); if (memory_config_version != kvm->memory_config_version) { - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); kvm_free_physmem_slot(&new, &old); goto raced; } @@ -840,25 +827,16 @@ raced: *memslot = new; ++kvm->memory_config_version; - spin_unlock(&kvm->lock); - - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu; + kvm_mmu_slot_remove_write_access(kvm, mem->slot); + kvm_flush_remote_tlbs(kvm); - vcpu = vcpu_load_slot(kvm, i); - if (!vcpu) - continue; - if (new.flags & KVM_MEM_LOG_DIRTY_PAGES) - do_remove_write_access(vcpu, mem->slot); - kvm_mmu_reset_context(vcpu); - vcpu_put(vcpu); - } + mutex_unlock(&kvm->lock); kvm_free_physmem_slot(&old, &new); return 0; out_unlock: - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); out_free: kvm_free_physmem_slot(&new, &old); out: @@ -874,17 +852,16 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int r, i; int n; - int cleared; unsigned long any = 0; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); /* * Prevent changes to guest memory configuration even while the lock * is not taken. */ ++kvm->busy; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) goto out; @@ -903,30 +880,18 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; - if (any) { - cleared = 0; - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu; - - vcpu = vcpu_load_slot(kvm, i); - if (!vcpu) - continue; - if (!cleared) { - do_remove_write_access(vcpu, log->slot); - memset(memslot->dirty_bitmap, 0, n); - cleared = 1; - } - kvm_arch_ops->tlb_flush(vcpu); - vcpu_put(vcpu); - } - } + mutex_lock(&kvm->lock); + kvm_mmu_slot_remove_write_access(kvm, log->slot); + kvm_flush_remote_tlbs(kvm); + memset(memslot->dirty_bitmap, 0, n); + mutex_unlock(&kvm->lock); r = 0; out: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); --kvm->busy; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); return r; } @@ -956,7 +921,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, < alias->target_phys_addr) goto out; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); p = &kvm->aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -968,13 +933,9 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, break; kvm->naliases = n; - spin_unlock(&kvm->lock); + kvm_mmu_zap_all(kvm); - vcpu_load(&kvm->vcpus[0]); - spin_lock(&kvm->lock); - kvm_mmu_zap_all(&kvm->vcpus[0]); - spin_unlock(&kvm->lock); - vcpu_put(&kvm->vcpus[0]); + mutex_unlock(&kvm->lock); return 0; @@ -1110,6 +1071,12 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); } +static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); +} + static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, @@ -1153,7 +1120,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { struct page *page; void *virt; - unsigned offset = offset_in_page(gpa); if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) return 0; @@ -1162,16 +1128,16 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, return 0; mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); - kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes); memcpy(virt + offset_in_page(gpa), val, bytes); kunmap_atomic(virt, KM_USER0); return 1; } -static int emulator_write_emulated(unsigned long addr, - const void *val, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) +static int emulator_write_emulated_onepage(unsigned long addr, + const void *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_io_device *mmio_dev; @@ -1203,6 +1169,26 @@ static int emulator_write_emulated(unsigned long addr, return X86EMUL_CONTINUE; } +static int emulator_write_emulated(unsigned long addr, + const void *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + /* Crossing a page boundary? */ + if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { + int rc, now; + + now = -addr & ~PAGE_MASK; + rc = emulator_write_emulated_onepage(addr, val, now, ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + addr += now; + val += now; + bytes -= now; + } + return emulator_write_emulated_onepage(addr, val, bytes, ctxt); +} + static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, @@ -1232,7 +1218,7 @@ int emulate_clts(struct kvm_vcpu *vcpu) { unsigned long cr0; - cr0 = vcpu->cr0 & ~CR0_TS_MASK; + cr0 = vcpu->cr0 & ~X86_CR0_TS; kvm_arch_ops->set_cr0(vcpu, cr0); return X86EMUL_CONTINUE; } @@ -1334,6 +1320,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); if ((r || vcpu->mmio_is_write) && run) { + run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = vcpu->mmio_phys_addr; memcpy(run->mmio.data, vcpu->mmio_data, 8); run->mmio.len = vcpu->mmio_size; @@ -1401,6 +1388,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) } switch (nr) { default: + run->hypercall.nr = nr; run->hypercall.args[0] = a0; run->hypercall.args[1] = a1; run->hypercall.args[2] = a2; @@ -1511,7 +1499,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); - para_state = kmap_atomic(para_state_page, KM_USER0); + para_state = kmap(para_state_page); printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); printk(KERN_DEBUG ".... size: %d\n", para_state->size); @@ -1547,7 +1535,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) para_state->ret = 0; err_kunmap_skip: - kunmap_atomic(para_state, KM_USER0); + kunmap(para_state_page); return 0; err_gp: return 1; @@ -1607,7 +1595,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); } @@ -1685,7 +1673,7 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common); * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { return kvm_arch_ops->set_msr(vcpu, msr_index, data); } @@ -1694,30 +1682,10 @@ void kvm_resched(struct kvm_vcpu *vcpu) { if (!need_resched()) return; - vcpu_put(vcpu); cond_resched(); - vcpu_load(vcpu); } EXPORT_SYMBOL_GPL(kvm_resched); -void load_msrs(struct vmx_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - wrmsrl(e[i].index, e[i].data); -} -EXPORT_SYMBOL_GPL(load_msrs); - -void save_msrs(struct vmx_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - rdmsrl(e[i].index, e[i].data); -} -EXPORT_SYMBOL_GPL(save_msrs); - void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { int i; @@ -1762,11 +1730,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) unsigned bytes; int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; - kvm_arch_ops->vcpu_put(vcpu); q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, PAGE_KERNEL); if (!q) { - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return -ENOMEM; } @@ -1778,7 +1744,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) memcpy(p, q, bytes); q -= vcpu->pio.guest_page_offset; vunmap(q); - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return 0; } @@ -1832,14 +1797,46 @@ static int complete_pio(struct kvm_vcpu *vcpu) return 0; } +static void kernel_pio(struct kvm_io_device *pio_dev, + struct kvm_vcpu *vcpu, + void *pd) +{ + /* TODO: String I/O for in kernel device */ + + if (vcpu->pio.in) + kvm_iodevice_read(pio_dev, vcpu->pio.port, + vcpu->pio.size, + pd); + else + kvm_iodevice_write(pio_dev, vcpu->pio.port, + vcpu->pio.size, + pd); +} + +static void pio_string_write(struct kvm_io_device *pio_dev, + struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; + void *pd = vcpu->pio_data; + int i; + + for (i = 0; i < io->cur_count; i++) { + kvm_iodevice_write(pio_dev, io->port, + io->size, + pd); + pd += io->size; + } +} + int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int size, unsigned long count, int string, int down, gva_t address, int rep, unsigned port) { unsigned now, in_page; - int i; + int i, ret = 0; int nr_pages = 1; struct page *page; + struct kvm_io_device *pio_dev; vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -1851,15 +1848,22 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.cur_count = count; vcpu->pio.size = size; vcpu->pio.in = in; + vcpu->pio.port = port; vcpu->pio.string = string; vcpu->pio.down = down; vcpu->pio.guest_page_offset = offset_in_page(address); vcpu->pio.rep = rep; + pio_dev = vcpu_find_pio_dev(vcpu, port); if (!string) { kvm_arch_ops->cache_regs(vcpu); memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); kvm_arch_ops->decache_regs(vcpu); + if (pio_dev) { + kernel_pio(pio_dev, vcpu, vcpu->pio_data); + complete_pio(vcpu); + return 1; + } return 0; } @@ -1896,12 +1900,12 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.cur_count = now; for (i = 0; i < nr_pages; ++i) { - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); if (page) get_page(page); vcpu->pio.guest_pages[i] = page; - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); if (!page) { inject_gp(vcpu); free_pio_guest_pages(vcpu); @@ -1909,9 +1913,21 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } - if (!vcpu->pio.in) - return pio_copy_data(vcpu); - return 0; + if (!vcpu->pio.in) { + /* string PIO write */ + ret = pio_copy_data(vcpu); + if (ret >= 0 && pio_dev) { + pio_string_write(pio_dev, vcpu); + complete_pio(vcpu); + if (vcpu->pio.count == 0) + ret = 1; + } + } else if (pio_dev) + printk(KERN_ERR "no string pio read support yet, " + "port %x size %d count %ld\n", + port, size, count); + + return ret; } EXPORT_SYMBOL_GPL(kvm_setup_pio); @@ -1944,7 +1960,6 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* * Read-modify-write. Back to userspace. */ - kvm_run->exit_reason = KVM_EXIT_MMIO; r = 0; goto out; } @@ -2137,7 +2152,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, sizeof vcpu->irq_pending); vcpu->irq_summary = 0; - for (i = 0; i < NR_IRQ_WORDS; ++i) + for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) if (vcpu->irq_pending[i]) __set_bit(i, &vcpu->irq_summary); @@ -2198,7 +2213,7 @@ static __init void kvm_init_msr_list(void) */ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - return set_msr(vcpu, index, *data); + return kvm_set_msr(vcpu, index, *data); } /* @@ -2283,13 +2298,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; vcpu_load(vcpu); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; tr->usermode = 0; - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); vcpu_put(vcpu); return 0; @@ -2332,7 +2347,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, unsigned long pgoff; struct page *page; - *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (pgoff == 0) page = virt_to_page(vcpu->run); @@ -2341,6 +2355,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, else return NOPAGE_SIGBUS; get_page(page); + if (type != NULL) + *type = VM_FAULT_MINOR; + return page; } @@ -2378,34 +2395,12 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) struct inode *inode; struct file *file; + r = anon_inode_getfd(&fd, &inode, &file, + "kvm-vcpu", &kvm_vcpu_fops, vcpu); + if (r) + return r; atomic_inc(&vcpu->kvm->filp->f_count); - inode = kvmfs_inode(&kvm_vcpu_fops); - if (IS_ERR(inode)) { - r = PTR_ERR(inode); - goto out1; - } - - file = kvmfs_file(inode, vcpu); - if (IS_ERR(file)) { - r = PTR_ERR(file); - goto out2; - } - - r = get_unused_fd(); - if (r < 0) - goto out3; - fd = r; - fd_install(fd, file); - return fd; - -out3: - fput(file); -out2: - iput(inode); -out1: - fput(vcpu->kvm->filp); - return r; } /* @@ -2415,74 +2410,49 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) { int r; struct kvm_vcpu *vcpu; - struct page *page; - r = -EINVAL; if (!valid_vcpu(n)) - goto out; - - vcpu = &kvm->vcpus[n]; - - mutex_lock(&vcpu->mutex); - - if (vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return -EEXIST; - } - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - r = -ENOMEM; - if (!page) - goto out_unlock; - vcpu->run = page_address(page); - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - r = -ENOMEM; - if (!page) - goto out_free_run; - vcpu->pio_data = page_address(page); - - vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, - FX_IMAGE_ALIGN); - vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; - vcpu->cr0 = 0x10; + return -EINVAL; - r = kvm_arch_ops->vcpu_create(vcpu); - if (r < 0) - goto out_free_vcpus; + vcpu = kvm_arch_ops->vcpu_create(kvm, n); + if (IS_ERR(vcpu)) + return PTR_ERR(vcpu); - r = kvm_mmu_create(vcpu); - if (r < 0) - goto out_free_vcpus; + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); - kvm_arch_ops->vcpu_load(vcpu); + vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); - if (r >= 0) - r = kvm_arch_ops->vcpu_setup(vcpu); vcpu_put(vcpu); - if (r < 0) - goto out_free_vcpus; + goto free_vcpu; + mutex_lock(&kvm->lock); + if (kvm->vcpus[n]) { + r = -EEXIST; + mutex_unlock(&kvm->lock); + goto mmu_unload; + } + kvm->vcpus[n] = vcpu; + mutex_unlock(&kvm->lock); + + /* Now it's all set up, let userspace reach it */ r = create_vcpu_fd(vcpu); if (r < 0) - goto out_free_vcpus; + goto unlink; + return r; - spin_lock(&kvm_lock); - if (n >= kvm->nvcpus) - kvm->nvcpus = n + 1; - spin_unlock(&kvm_lock); +unlink: + mutex_lock(&kvm->lock); + kvm->vcpus[n] = NULL; + mutex_unlock(&kvm->lock); - return r; +mmu_unload: + vcpu_load(vcpu); + kvm_mmu_unload(vcpu); + vcpu_put(vcpu); -out_free_vcpus: - kvm_free_vcpu(vcpu); -out_free_run: - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; -out_unlock: - mutex_unlock(&vcpu->mutex); -out: +free_vcpu: + kvm_arch_ops->vcpu_free(vcpu); return r; } @@ -2501,9 +2471,9 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { entry->edx &= ~(1 << 20); - printk(KERN_INFO ": guest NX capability removed\n"); + printk(KERN_INFO "kvm: guest NX capability removed\n"); } } @@ -2704,7 +2674,7 @@ static long kvm_vcpu_ioctl(struct file *filp, break; } case KVM_GET_MSRS: - r = msr_io(vcpu, argp, get_msr, 1); + r = msr_io(vcpu, argp, kvm_get_msr, 1); break; case KVM_SET_MSRS: r = msr_io(vcpu, argp, do_set_msr, 0); @@ -2837,12 +2807,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, unsigned long pgoff; struct page *page; - *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; page = gfn_to_page(kvm, pgoff); if (!page) return NOPAGE_SIGBUS; get_page(page); + if (type != NULL) + *type = VM_FAULT_MINOR; + return page; } @@ -2870,41 +2842,18 @@ static int kvm_dev_ioctl_create_vm(void) struct file *file; struct kvm *kvm; - inode = kvmfs_inode(&kvm_vm_fops); - if (IS_ERR(inode)) { - r = PTR_ERR(inode); - goto out1; - } - kvm = kvm_create_vm(); - if (IS_ERR(kvm)) { - r = PTR_ERR(kvm); - goto out2; + if (IS_ERR(kvm)) + return PTR_ERR(kvm); + r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); + if (r) { + kvm_destroy_vm(kvm); + return r; } - file = kvmfs_file(inode, kvm); - if (IS_ERR(file)) { - r = PTR_ERR(file); - goto out3; - } kvm->filp = file; - r = get_unused_fd(); - if (r < 0) - goto out4; - fd = r; - fd_install(fd, file); - return fd; - -out4: - fput(file); -out3: - kvm_destroy_vm(kvm); -out2: - iput(inode); -out1: - return r; } static long kvm_dev_ioctl(struct file *filp, @@ -2985,25 +2934,6 @@ static struct miscdevice kvm_dev = { &kvm_chardev_ops, }; -static int kvm_reboot(struct notifier_block *notifier, unsigned long val, - void *v) -{ - if (val == SYS_RESTART) { - /* - * Some (well, at least mine) BIOSes hang on reboot if - * in vmx root mode. - */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); - } - return NOTIFY_OK; -} - -static struct notifier_block kvm_reboot_notifier = { - .notifier_call = kvm_reboot, - .priority = 0, -}; - /* * Make sure that a cpu that is being hot-unplugged does not have any vcpus * cached on it. @@ -3017,7 +2947,9 @@ static void decache_vcpus_on_cpu(int cpu) spin_lock(&kvm_lock); list_for_each_entry(vm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &vm->vcpus[i]; + vcpu = vm->vcpus[i]; + if (!vcpu) + continue; /* * If the vcpu is locked, then it is running on some * other cpu and therefore it is not cached on the @@ -3037,33 +2969,74 @@ static void decache_vcpus_on_cpu(int cpu) spin_unlock(&kvm_lock); } +static void hardware_enable(void *junk) +{ + int cpu = raw_smp_processor_id(); + + if (cpu_isset(cpu, cpus_hardware_enabled)) + return; + cpu_set(cpu, cpus_hardware_enabled); + kvm_arch_ops->hardware_enable(NULL); +} + +static void hardware_disable(void *junk) +{ + int cpu = raw_smp_processor_id(); + + if (!cpu_isset(cpu, cpus_hardware_enabled)) + return; + cpu_clear(cpu, cpus_hardware_enabled); + decache_vcpus_on_cpu(cpu); + kvm_arch_ops->hardware_disable(NULL); +} + static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { int cpu = (long)v; switch (val) { - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: + case CPU_DYING: + case CPU_DYING_FROZEN: + printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", + cpu); + hardware_disable(NULL); + break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - decache_vcpus_on_cpu(cpu); - smp_call_function_single(cpu, kvm_arch_ops->hardware_disable, - NULL, 0, 1); + smp_call_function_single(cpu, hardware_disable, NULL, 0, 1); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, - NULL, 0, 1); + smp_call_function_single(cpu, hardware_enable, NULL, 0, 1); break; } return NOTIFY_OK; } +static int kvm_reboot(struct notifier_block *notifier, unsigned long val, + void *v) +{ + if (val == SYS_RESTART) { + /* + * Some (well, at least mine) BIOSes hang on reboot if + * in vmx root mode. + */ + printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + on_each_cpu(hardware_disable, NULL, 0, 1); + } + return NOTIFY_OK; +} + +static struct notifier_block kvm_reboot_notifier = { + .notifier_call = kvm_reboot, + .priority = 0, +}; + void kvm_io_bus_init(struct kvm_io_bus *bus) { memset(bus, 0, sizeof(*bus)); @@ -3117,8 +3090,9 @@ static u64 stat_get(void *_offset) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &kvm->vcpus[i]; - total += *(u32 *)((void *)vcpu + offset); + vcpu = kvm->vcpus[i]; + if (vcpu) + total += *(u32 *)((void *)vcpu + offset); } spin_unlock(&kvm_lock); return total; @@ -3152,14 +3126,13 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - decache_vcpus_on_cpu(raw_smp_processor_id()); - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + hardware_disable(NULL); return 0; } static int kvm_resume(struct sys_device *dev) { - on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); + hardware_enable(NULL); return 0; } @@ -3176,17 +3149,26 @@ static struct sys_device kvm_sysdev = { hpa_t bad_page_address; -static int kvmfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static inline +struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) { - return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt); + return container_of(pn, struct kvm_vcpu, preempt_notifier); } -static struct file_system_type kvm_fs_type = { - .name = "kvmfs", - .get_sb = kvmfs_get_sb, - .kill_sb = kill_anon_super, -}; +static void kvm_sched_in(struct preempt_notifier *pn, int cpu) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_load(vcpu, cpu); +} + +static void kvm_sched_out(struct preempt_notifier *pn, + struct task_struct *next) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_put(vcpu); +} int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) { @@ -3212,7 +3194,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) if (r < 0) goto out; - on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); + on_each_cpu(hardware_enable, NULL, 0, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_1; @@ -3234,6 +3216,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) goto out_free; } + kvm_preempt_ops.sched_in = kvm_sched_in; + kvm_preempt_ops.sched_out = kvm_sched_out; + return r; out_free: @@ -3244,7 +3229,7 @@ out_free_2: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_1: - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); out: kvm_arch_ops = NULL; @@ -3258,7 +3243,7 @@ void kvm_exit_arch(void) sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); kvm_arch_ops = NULL; } @@ -3272,14 +3257,6 @@ static __init int kvm_init(void) if (r) goto out4; - r = register_filesystem(&kvm_fs_type); - if (r) - goto out3; - - kvmfs_mnt = kern_mount(&kvm_fs_type); - r = PTR_ERR(kvmfs_mnt); - if (IS_ERR(kvmfs_mnt)) - goto out2; kvm_init_debug(); kvm_init_msr_list(); @@ -3296,10 +3273,6 @@ static __init int kvm_init(void) out: kvm_exit_debug(); - mntput(kvmfs_mnt); -out2: - unregister_filesystem(&kvm_fs_type); -out3: kvm_mmu_module_exit(); out4: return r; @@ -3309,8 +3282,6 @@ static __exit void kvm_exit(void) { kvm_exit_debug(); __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); - mntput(kvmfs_mnt); - unregister_filesystem(&kvm_fs_type); kvm_mmu_module_exit(); }