]> err.no Git - linux-2.6/blob - drivers/kvm/x86.c
KVM: Portability: Move pio emulation functions to x86.c
[linux-2.6] / drivers / kvm / x86.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * derived from drivers/kvm/kvm_main.c
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  *
8  * Authors:
9  *   Avi Kivity   <avi@qumranet.com>
10  *   Yaniv Kamay  <yaniv@qumranet.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2.  See
13  * the COPYING file in the top-level directory.
14  *
15  */
16
17 #include "kvm.h"
18 #include "x86.h"
19 #include "segment_descriptor.h"
20 #include "irq.h"
21
22 #include <linux/kvm.h>
23 #include <linux/fs.h>
24 #include <linux/vmalloc.h>
25 #include <linux/module.h>
26
27 #include <asm/uaccess.h>
28
29 #define MAX_IO_MSRS 256
30 #define CR0_RESERVED_BITS                                               \
31         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
32                           | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
33                           | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
34 #define CR4_RESERVED_BITS                                               \
35         (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
36                           | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
37                           | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
38                           | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
39
40 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
41 #define EFER_RESERVED_BITS 0xfffffffffffff2fe
42
43 unsigned long segment_base(u16 selector)
44 {
45         struct descriptor_table gdt;
46         struct segment_descriptor *d;
47         unsigned long table_base;
48         unsigned long v;
49
50         if (selector == 0)
51                 return 0;
52
53         asm("sgdt %0" : "=m"(gdt));
54         table_base = gdt.base;
55
56         if (selector & 4) {           /* from ldt */
57                 u16 ldt_selector;
58
59                 asm("sldt %0" : "=g"(ldt_selector));
60                 table_base = segment_base(ldt_selector);
61         }
62         d = (struct segment_descriptor *)(table_base + (selector & ~7));
63         v = d->base_low | ((unsigned long)d->base_mid << 16) |
64                 ((unsigned long)d->base_high << 24);
65 #ifdef CONFIG_X86_64
66         if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
67                 v |= ((unsigned long) \
68                       ((struct segment_descriptor_64 *)d)->base_higher) << 32;
69 #endif
70         return v;
71 }
72 EXPORT_SYMBOL_GPL(segment_base);
73
74 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
75 {
76         if (irqchip_in_kernel(vcpu->kvm))
77                 return vcpu->apic_base;
78         else
79                 return vcpu->apic_base;
80 }
81 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
82
83 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
84 {
85         /* TODO: reserve bits check */
86         if (irqchip_in_kernel(vcpu->kvm))
87                 kvm_lapic_set_base(vcpu, data);
88         else
89                 vcpu->apic_base = data;
90 }
91 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
92
93 static void inject_gp(struct kvm_vcpu *vcpu)
94 {
95         kvm_x86_ops->inject_gp(vcpu, 0);
96 }
97
98 /*
99  * Load the pae pdptrs.  Return true is they are all valid.
100  */
101 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
102 {
103         gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
104         unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
105         int i;
106         int ret;
107         u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
108
109         mutex_lock(&vcpu->kvm->lock);
110         ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
111                                   offset * sizeof(u64), sizeof(pdpte));
112         if (ret < 0) {
113                 ret = 0;
114                 goto out;
115         }
116         for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
117                 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
118                         ret = 0;
119                         goto out;
120                 }
121         }
122         ret = 1;
123
124         memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
125 out:
126         mutex_unlock(&vcpu->kvm->lock);
127
128         return ret;
129 }
130
131 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
132 {
133         if (cr0 & CR0_RESERVED_BITS) {
134                 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
135                        cr0, vcpu->cr0);
136                 inject_gp(vcpu);
137                 return;
138         }
139
140         if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
141                 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
142                 inject_gp(vcpu);
143                 return;
144         }
145
146         if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
147                 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
148                        "and a clear PE flag\n");
149                 inject_gp(vcpu);
150                 return;
151         }
152
153         if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
154 #ifdef CONFIG_X86_64
155                 if ((vcpu->shadow_efer & EFER_LME)) {
156                         int cs_db, cs_l;
157
158                         if (!is_pae(vcpu)) {
159                                 printk(KERN_DEBUG "set_cr0: #GP, start paging "
160                                        "in long mode while PAE is disabled\n");
161                                 inject_gp(vcpu);
162                                 return;
163                         }
164                         kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
165                         if (cs_l) {
166                                 printk(KERN_DEBUG "set_cr0: #GP, start paging "
167                                        "in long mode while CS.L == 1\n");
168                                 inject_gp(vcpu);
169                                 return;
170
171                         }
172                 } else
173 #endif
174                 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
175                         printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
176                                "reserved bits\n");
177                         inject_gp(vcpu);
178                         return;
179                 }
180
181         }
182
183         kvm_x86_ops->set_cr0(vcpu, cr0);
184         vcpu->cr0 = cr0;
185
186         mutex_lock(&vcpu->kvm->lock);
187         kvm_mmu_reset_context(vcpu);
188         mutex_unlock(&vcpu->kvm->lock);
189         return;
190 }
191 EXPORT_SYMBOL_GPL(set_cr0);
192
193 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
194 {
195         set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
196 }
197 EXPORT_SYMBOL_GPL(lmsw);
198
199 void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
200 {
201         if (cr4 & CR4_RESERVED_BITS) {
202                 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
203                 inject_gp(vcpu);
204                 return;
205         }
206
207         if (is_long_mode(vcpu)) {
208                 if (!(cr4 & X86_CR4_PAE)) {
209                         printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
210                                "in long mode\n");
211                         inject_gp(vcpu);
212                         return;
213                 }
214         } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
215                    && !load_pdptrs(vcpu, vcpu->cr3)) {
216                 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
217                 inject_gp(vcpu);
218                 return;
219         }
220
221         if (cr4 & X86_CR4_VMXE) {
222                 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
223                 inject_gp(vcpu);
224                 return;
225         }
226         kvm_x86_ops->set_cr4(vcpu, cr4);
227         vcpu->cr4 = cr4;
228         mutex_lock(&vcpu->kvm->lock);
229         kvm_mmu_reset_context(vcpu);
230         mutex_unlock(&vcpu->kvm->lock);
231 }
232 EXPORT_SYMBOL_GPL(set_cr4);
233
234 void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
235 {
236         if (is_long_mode(vcpu)) {
237                 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
238                         printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
239                         inject_gp(vcpu);
240                         return;
241                 }
242         } else {
243                 if (is_pae(vcpu)) {
244                         if (cr3 & CR3_PAE_RESERVED_BITS) {
245                                 printk(KERN_DEBUG
246                                        "set_cr3: #GP, reserved bits\n");
247                                 inject_gp(vcpu);
248                                 return;
249                         }
250                         if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
251                                 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
252                                        "reserved bits\n");
253                                 inject_gp(vcpu);
254                                 return;
255                         }
256                 }
257                 /*
258                  * We don't check reserved bits in nonpae mode, because
259                  * this isn't enforced, and VMware depends on this.
260                  */
261         }
262
263         mutex_lock(&vcpu->kvm->lock);
264         /*
265          * Does the new cr3 value map to physical memory? (Note, we
266          * catch an invalid cr3 even in real-mode, because it would
267          * cause trouble later on when we turn on paging anyway.)
268          *
269          * A real CPU would silently accept an invalid cr3 and would
270          * attempt to use it - with largely undefined (and often hard
271          * to debug) behavior on the guest side.
272          */
273         if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
274                 inject_gp(vcpu);
275         else {
276                 vcpu->cr3 = cr3;
277                 vcpu->mmu.new_cr3(vcpu);
278         }
279         mutex_unlock(&vcpu->kvm->lock);
280 }
281 EXPORT_SYMBOL_GPL(set_cr3);
282
283 void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
284 {
285         if (cr8 & CR8_RESERVED_BITS) {
286                 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
287                 inject_gp(vcpu);
288                 return;
289         }
290         if (irqchip_in_kernel(vcpu->kvm))
291                 kvm_lapic_set_tpr(vcpu, cr8);
292         else
293                 vcpu->cr8 = cr8;
294 }
295 EXPORT_SYMBOL_GPL(set_cr8);
296
297 unsigned long get_cr8(struct kvm_vcpu *vcpu)
298 {
299         if (irqchip_in_kernel(vcpu->kvm))
300                 return kvm_lapic_get_cr8(vcpu);
301         else
302                 return vcpu->cr8;
303 }
304 EXPORT_SYMBOL_GPL(get_cr8);
305
306 /*
307  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
308  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
309  *
310  * This list is modified at module load time to reflect the
311  * capabilities of the host cpu.
312  */
313 static u32 msrs_to_save[] = {
314         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
315         MSR_K6_STAR,
316 #ifdef CONFIG_X86_64
317         MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
318 #endif
319         MSR_IA32_TIME_STAMP_COUNTER,
320 };
321
322 static unsigned num_msrs_to_save;
323
324 static u32 emulated_msrs[] = {
325         MSR_IA32_MISC_ENABLE,
326 };
327
328 #ifdef CONFIG_X86_64
329
330 static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
331 {
332         if (efer & EFER_RESERVED_BITS) {
333                 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
334                        efer);
335                 inject_gp(vcpu);
336                 return;
337         }
338
339         if (is_paging(vcpu)
340             && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
341                 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
342                 inject_gp(vcpu);
343                 return;
344         }
345
346         kvm_x86_ops->set_efer(vcpu, efer);
347
348         efer &= ~EFER_LMA;
349         efer |= vcpu->shadow_efer & EFER_LMA;
350
351         vcpu->shadow_efer = efer;
352 }
353
354 #endif
355
356 /*
357  * Writes msr value into into the appropriate "register".
358  * Returns 0 on success, non-0 otherwise.
359  * Assumes vcpu_load() was already called.
360  */
361 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
362 {
363         return kvm_x86_ops->set_msr(vcpu, msr_index, data);
364 }
365
366 /*
367  * Adapt set_msr() to msr_io()'s calling convention
368  */
369 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
370 {
371         return kvm_set_msr(vcpu, index, *data);
372 }
373
374
375 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
376 {
377         switch (msr) {
378 #ifdef CONFIG_X86_64
379         case MSR_EFER:
380                 set_efer(vcpu, data);
381                 break;
382 #endif
383         case MSR_IA32_MC0_STATUS:
384                 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
385                        __FUNCTION__, data);
386                 break;
387         case MSR_IA32_MCG_STATUS:
388                 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
389                         __FUNCTION__, data);
390                 break;
391         case MSR_IA32_UCODE_REV:
392         case MSR_IA32_UCODE_WRITE:
393         case 0x200 ... 0x2ff: /* MTRRs */
394                 break;
395         case MSR_IA32_APICBASE:
396                 kvm_set_apic_base(vcpu, data);
397                 break;
398         case MSR_IA32_MISC_ENABLE:
399                 vcpu->ia32_misc_enable_msr = data;
400                 break;
401         default:
402                 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
403                 return 1;
404         }
405         return 0;
406 }
407 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
408
409
410 /*
411  * Reads an msr value (of 'msr_index') into 'pdata'.
412  * Returns 0 on success, non-0 otherwise.
413  * Assumes vcpu_load() was already called.
414  */
415 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
416 {
417         return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
418 }
419
420 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
421 {
422         u64 data;
423
424         switch (msr) {
425         case 0xc0010010: /* SYSCFG */
426         case 0xc0010015: /* HWCR */
427         case MSR_IA32_PLATFORM_ID:
428         case MSR_IA32_P5_MC_ADDR:
429         case MSR_IA32_P5_MC_TYPE:
430         case MSR_IA32_MC0_CTL:
431         case MSR_IA32_MCG_STATUS:
432         case MSR_IA32_MCG_CAP:
433         case MSR_IA32_MC0_MISC:
434         case MSR_IA32_MC0_MISC+4:
435         case MSR_IA32_MC0_MISC+8:
436         case MSR_IA32_MC0_MISC+12:
437         case MSR_IA32_MC0_MISC+16:
438         case MSR_IA32_UCODE_REV:
439         case MSR_IA32_PERF_STATUS:
440         case MSR_IA32_EBL_CR_POWERON:
441                 /* MTRR registers */
442         case 0xfe:
443         case 0x200 ... 0x2ff:
444                 data = 0;
445                 break;
446         case 0xcd: /* fsb frequency */
447                 data = 3;
448                 break;
449         case MSR_IA32_APICBASE:
450                 data = kvm_get_apic_base(vcpu);
451                 break;
452         case MSR_IA32_MISC_ENABLE:
453                 data = vcpu->ia32_misc_enable_msr;
454                 break;
455 #ifdef CONFIG_X86_64
456         case MSR_EFER:
457                 data = vcpu->shadow_efer;
458                 break;
459 #endif
460         default:
461                 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
462                 return 1;
463         }
464         *pdata = data;
465         return 0;
466 }
467 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
468
469 /*
470  * Read or write a bunch of msrs. All parameters are kernel addresses.
471  *
472  * @return number of msrs set successfully.
473  */
474 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
475                     struct kvm_msr_entry *entries,
476                     int (*do_msr)(struct kvm_vcpu *vcpu,
477                                   unsigned index, u64 *data))
478 {
479         int i;
480
481         vcpu_load(vcpu);
482
483         for (i = 0; i < msrs->nmsrs; ++i)
484                 if (do_msr(vcpu, entries[i].index, &entries[i].data))
485                         break;
486
487         vcpu_put(vcpu);
488
489         return i;
490 }
491
492 /*
493  * Read or write a bunch of msrs. Parameters are user addresses.
494  *
495  * @return number of msrs set successfully.
496  */
497 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
498                   int (*do_msr)(struct kvm_vcpu *vcpu,
499                                 unsigned index, u64 *data),
500                   int writeback)
501 {
502         struct kvm_msrs msrs;
503         struct kvm_msr_entry *entries;
504         int r, n;
505         unsigned size;
506
507         r = -EFAULT;
508         if (copy_from_user(&msrs, user_msrs, sizeof msrs))
509                 goto out;
510
511         r = -E2BIG;
512         if (msrs.nmsrs >= MAX_IO_MSRS)
513                 goto out;
514
515         r = -ENOMEM;
516         size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
517         entries = vmalloc(size);
518         if (!entries)
519                 goto out;
520
521         r = -EFAULT;
522         if (copy_from_user(entries, user_msrs->entries, size))
523                 goto out_free;
524
525         r = n = __msr_io(vcpu, &msrs, entries, do_msr);
526         if (r < 0)
527                 goto out_free;
528
529         r = -EFAULT;
530         if (writeback && copy_to_user(user_msrs->entries, entries, size))
531                 goto out_free;
532
533         r = n;
534
535 out_free:
536         vfree(entries);
537 out:
538         return r;
539 }
540
541 long kvm_arch_dev_ioctl(struct file *filp,
542                         unsigned int ioctl, unsigned long arg)
543 {
544         void __user *argp = (void __user *)arg;
545         long r;
546
547         switch (ioctl) {
548         case KVM_GET_MSR_INDEX_LIST: {
549                 struct kvm_msr_list __user *user_msr_list = argp;
550                 struct kvm_msr_list msr_list;
551                 unsigned n;
552
553                 r = -EFAULT;
554                 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
555                         goto out;
556                 n = msr_list.nmsrs;
557                 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
558                 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
559                         goto out;
560                 r = -E2BIG;
561                 if (n < num_msrs_to_save)
562                         goto out;
563                 r = -EFAULT;
564                 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
565                                  num_msrs_to_save * sizeof(u32)))
566                         goto out;
567                 if (copy_to_user(user_msr_list->indices
568                                  + num_msrs_to_save * sizeof(u32),
569                                  &emulated_msrs,
570                                  ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
571                         goto out;
572                 r = 0;
573                 break;
574         }
575         default:
576                 r = -EINVAL;
577         }
578 out:
579         return r;
580 }
581
582 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
583 {
584         kvm_x86_ops->vcpu_load(vcpu, cpu);
585 }
586
587 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
588 {
589         kvm_x86_ops->vcpu_put(vcpu);
590 }
591
592 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
593 {
594         u64 efer;
595         int i;
596         struct kvm_cpuid_entry *e, *entry;
597
598         rdmsrl(MSR_EFER, efer);
599         entry = NULL;
600         for (i = 0; i < vcpu->cpuid_nent; ++i) {
601                 e = &vcpu->cpuid_entries[i];
602                 if (e->function == 0x80000001) {
603                         entry = e;
604                         break;
605                 }
606         }
607         if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
608                 entry->edx &= ~(1 << 20);
609                 printk(KERN_INFO "kvm: guest NX capability removed\n");
610         }
611 }
612
613 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
614                                     struct kvm_cpuid *cpuid,
615                                     struct kvm_cpuid_entry __user *entries)
616 {
617         int r;
618
619         r = -E2BIG;
620         if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
621                 goto out;
622         r = -EFAULT;
623         if (copy_from_user(&vcpu->cpuid_entries, entries,
624                            cpuid->nent * sizeof(struct kvm_cpuid_entry)))
625                 goto out;
626         vcpu->cpuid_nent = cpuid->nent;
627         cpuid_fix_nx_cap(vcpu);
628         return 0;
629
630 out:
631         return r;
632 }
633
634 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
635                                     struct kvm_lapic_state *s)
636 {
637         vcpu_load(vcpu);
638         memcpy(s->regs, vcpu->apic->regs, sizeof *s);
639         vcpu_put(vcpu);
640
641         return 0;
642 }
643
644 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
645                                     struct kvm_lapic_state *s)
646 {
647         vcpu_load(vcpu);
648         memcpy(vcpu->apic->regs, s->regs, sizeof *s);
649         kvm_apic_post_state_restore(vcpu);
650         vcpu_put(vcpu);
651
652         return 0;
653 }
654
655 long kvm_arch_vcpu_ioctl(struct file *filp,
656                          unsigned int ioctl, unsigned long arg)
657 {
658         struct kvm_vcpu *vcpu = filp->private_data;
659         void __user *argp = (void __user *)arg;
660         int r;
661
662         switch (ioctl) {
663         case KVM_GET_LAPIC: {
664                 struct kvm_lapic_state lapic;
665
666                 memset(&lapic, 0, sizeof lapic);
667                 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
668                 if (r)
669                         goto out;
670                 r = -EFAULT;
671                 if (copy_to_user(argp, &lapic, sizeof lapic))
672                         goto out;
673                 r = 0;
674                 break;
675         }
676         case KVM_SET_LAPIC: {
677                 struct kvm_lapic_state lapic;
678
679                 r = -EFAULT;
680                 if (copy_from_user(&lapic, argp, sizeof lapic))
681                         goto out;
682                 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
683                 if (r)
684                         goto out;
685                 r = 0;
686                 break;
687         }
688         case KVM_SET_CPUID: {
689                 struct kvm_cpuid __user *cpuid_arg = argp;
690                 struct kvm_cpuid cpuid;
691
692                 r = -EFAULT;
693                 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
694                         goto out;
695                 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
696                 if (r)
697                         goto out;
698                 break;
699         }
700         case KVM_GET_MSRS:
701                 r = msr_io(vcpu, argp, kvm_get_msr, 1);
702                 break;
703         case KVM_SET_MSRS:
704                 r = msr_io(vcpu, argp, do_set_msr, 0);
705                 break;
706         default:
707                 r = -EINVAL;
708         }
709 out:
710         return r;
711 }
712
713 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
714 {
715         int ret;
716
717         if (addr > (unsigned int)(-3 * PAGE_SIZE))
718                 return -1;
719         ret = kvm_x86_ops->set_tss_addr(kvm, addr);
720         return ret;
721 }
722
723 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
724                                           u32 kvm_nr_mmu_pages)
725 {
726         if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
727                 return -EINVAL;
728
729         mutex_lock(&kvm->lock);
730
731         kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
732         kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
733
734         mutex_unlock(&kvm->lock);
735         return 0;
736 }
737
738 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
739 {
740         return kvm->n_alloc_mmu_pages;
741 }
742
743 /*
744  * Set a new alias region.  Aliases map a portion of physical memory into
745  * another portion.  This is useful for memory windows, for example the PC
746  * VGA region.
747  */
748 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
749                                          struct kvm_memory_alias *alias)
750 {
751         int r, n;
752         struct kvm_mem_alias *p;
753
754         r = -EINVAL;
755         /* General sanity checks */
756         if (alias->memory_size & (PAGE_SIZE - 1))
757                 goto out;
758         if (alias->guest_phys_addr & (PAGE_SIZE - 1))
759                 goto out;
760         if (alias->slot >= KVM_ALIAS_SLOTS)
761                 goto out;
762         if (alias->guest_phys_addr + alias->memory_size
763             < alias->guest_phys_addr)
764                 goto out;
765         if (alias->target_phys_addr + alias->memory_size
766             < alias->target_phys_addr)
767                 goto out;
768
769         mutex_lock(&kvm->lock);
770
771         p = &kvm->aliases[alias->slot];
772         p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
773         p->npages = alias->memory_size >> PAGE_SHIFT;
774         p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
775
776         for (n = KVM_ALIAS_SLOTS; n > 0; --n)
777                 if (kvm->aliases[n - 1].npages)
778                         break;
779         kvm->naliases = n;
780
781         kvm_mmu_zap_all(kvm);
782
783         mutex_unlock(&kvm->lock);
784
785         return 0;
786
787 out:
788         return r;
789 }
790
791 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
792 {
793         int r;
794
795         r = 0;
796         switch (chip->chip_id) {
797         case KVM_IRQCHIP_PIC_MASTER:
798                 memcpy(&chip->chip.pic,
799                         &pic_irqchip(kvm)->pics[0],
800                         sizeof(struct kvm_pic_state));
801                 break;
802         case KVM_IRQCHIP_PIC_SLAVE:
803                 memcpy(&chip->chip.pic,
804                         &pic_irqchip(kvm)->pics[1],
805                         sizeof(struct kvm_pic_state));
806                 break;
807         case KVM_IRQCHIP_IOAPIC:
808                 memcpy(&chip->chip.ioapic,
809                         ioapic_irqchip(kvm),
810                         sizeof(struct kvm_ioapic_state));
811                 break;
812         default:
813                 r = -EINVAL;
814                 break;
815         }
816         return r;
817 }
818
819 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
820 {
821         int r;
822
823         r = 0;
824         switch (chip->chip_id) {
825         case KVM_IRQCHIP_PIC_MASTER:
826                 memcpy(&pic_irqchip(kvm)->pics[0],
827                         &chip->chip.pic,
828                         sizeof(struct kvm_pic_state));
829                 break;
830         case KVM_IRQCHIP_PIC_SLAVE:
831                 memcpy(&pic_irqchip(kvm)->pics[1],
832                         &chip->chip.pic,
833                         sizeof(struct kvm_pic_state));
834                 break;
835         case KVM_IRQCHIP_IOAPIC:
836                 memcpy(ioapic_irqchip(kvm),
837                         &chip->chip.ioapic,
838                         sizeof(struct kvm_ioapic_state));
839                 break;
840         default:
841                 r = -EINVAL;
842                 break;
843         }
844         kvm_pic_update_irq(pic_irqchip(kvm));
845         return r;
846 }
847
848 long kvm_arch_vm_ioctl(struct file *filp,
849                        unsigned int ioctl, unsigned long arg)
850 {
851         struct kvm *kvm = filp->private_data;
852         void __user *argp = (void __user *)arg;
853         int r = -EINVAL;
854
855         switch (ioctl) {
856         case KVM_SET_TSS_ADDR:
857                 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
858                 if (r < 0)
859                         goto out;
860                 break;
861         case KVM_SET_MEMORY_REGION: {
862                 struct kvm_memory_region kvm_mem;
863                 struct kvm_userspace_memory_region kvm_userspace_mem;
864
865                 r = -EFAULT;
866                 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
867                         goto out;
868                 kvm_userspace_mem.slot = kvm_mem.slot;
869                 kvm_userspace_mem.flags = kvm_mem.flags;
870                 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
871                 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
872                 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
873                 if (r)
874                         goto out;
875                 break;
876         }
877         case KVM_SET_NR_MMU_PAGES:
878                 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
879                 if (r)
880                         goto out;
881                 break;
882         case KVM_GET_NR_MMU_PAGES:
883                 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
884                 break;
885         case KVM_SET_MEMORY_ALIAS: {
886                 struct kvm_memory_alias alias;
887
888                 r = -EFAULT;
889                 if (copy_from_user(&alias, argp, sizeof alias))
890                         goto out;
891                 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
892                 if (r)
893                         goto out;
894                 break;
895         }
896         case KVM_CREATE_IRQCHIP:
897                 r = -ENOMEM;
898                 kvm->vpic = kvm_create_pic(kvm);
899                 if (kvm->vpic) {
900                         r = kvm_ioapic_init(kvm);
901                         if (r) {
902                                 kfree(kvm->vpic);
903                                 kvm->vpic = NULL;
904                                 goto out;
905                         }
906                 } else
907                         goto out;
908                 break;
909         case KVM_IRQ_LINE: {
910                 struct kvm_irq_level irq_event;
911
912                 r = -EFAULT;
913                 if (copy_from_user(&irq_event, argp, sizeof irq_event))
914                         goto out;
915                 if (irqchip_in_kernel(kvm)) {
916                         mutex_lock(&kvm->lock);
917                         if (irq_event.irq < 16)
918                                 kvm_pic_set_irq(pic_irqchip(kvm),
919                                         irq_event.irq,
920                                         irq_event.level);
921                         kvm_ioapic_set_irq(kvm->vioapic,
922                                         irq_event.irq,
923                                         irq_event.level);
924                         mutex_unlock(&kvm->lock);
925                         r = 0;
926                 }
927                 break;
928         }
929         case KVM_GET_IRQCHIP: {
930                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
931                 struct kvm_irqchip chip;
932
933                 r = -EFAULT;
934                 if (copy_from_user(&chip, argp, sizeof chip))
935                         goto out;
936                 r = -ENXIO;
937                 if (!irqchip_in_kernel(kvm))
938                         goto out;
939                 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
940                 if (r)
941                         goto out;
942                 r = -EFAULT;
943                 if (copy_to_user(argp, &chip, sizeof chip))
944                         goto out;
945                 r = 0;
946                 break;
947         }
948         case KVM_SET_IRQCHIP: {
949                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
950                 struct kvm_irqchip chip;
951
952                 r = -EFAULT;
953                 if (copy_from_user(&chip, argp, sizeof chip))
954                         goto out;
955                 r = -ENXIO;
956                 if (!irqchip_in_kernel(kvm))
957                         goto out;
958                 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
959                 if (r)
960                         goto out;
961                 r = 0;
962                 break;
963         }
964         default:
965                 ;
966         }
967 out:
968         return r;
969 }
970
971 static __init void kvm_init_msr_list(void)
972 {
973         u32 dummy[2];
974         unsigned i, j;
975
976         for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
977                 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
978                         continue;
979                 if (j < i)
980                         msrs_to_save[j] = msrs_to_save[i];
981                 j++;
982         }
983         num_msrs_to_save = j;
984 }
985
986 /*
987  * Only apic need an MMIO device hook, so shortcut now..
988  */
989 static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
990                                                 gpa_t addr)
991 {
992         struct kvm_io_device *dev;
993
994         if (vcpu->apic) {
995                 dev = &vcpu->apic->dev;
996                 if (dev->in_range(dev, addr))
997                         return dev;
998         }
999         return NULL;
1000 }
1001
1002
1003 static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1004                                                 gpa_t addr)
1005 {
1006         struct kvm_io_device *dev;
1007
1008         dev = vcpu_find_pervcpu_dev(vcpu, addr);
1009         if (dev == NULL)
1010                 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1011         return dev;
1012 }
1013
1014 int emulator_read_std(unsigned long addr,
1015                              void *val,
1016                              unsigned int bytes,
1017                              struct kvm_vcpu *vcpu)
1018 {
1019         void *data = val;
1020
1021         while (bytes) {
1022                 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1023                 unsigned offset = addr & (PAGE_SIZE-1);
1024                 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
1025                 int ret;
1026
1027                 if (gpa == UNMAPPED_GVA)
1028                         return X86EMUL_PROPAGATE_FAULT;
1029                 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
1030                 if (ret < 0)
1031                         return X86EMUL_UNHANDLEABLE;
1032
1033                 bytes -= tocopy;
1034                 data += tocopy;
1035                 addr += tocopy;
1036         }
1037
1038         return X86EMUL_CONTINUE;
1039 }
1040 EXPORT_SYMBOL_GPL(emulator_read_std);
1041
1042 static int emulator_write_std(unsigned long addr,
1043                               const void *val,
1044                               unsigned int bytes,
1045                               struct kvm_vcpu *vcpu)
1046 {
1047         pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
1048         return X86EMUL_UNHANDLEABLE;
1049 }
1050
1051 static int emulator_read_emulated(unsigned long addr,
1052                                   void *val,
1053                                   unsigned int bytes,
1054                                   struct kvm_vcpu *vcpu)
1055 {
1056         struct kvm_io_device *mmio_dev;
1057         gpa_t                 gpa;
1058
1059         if (vcpu->mmio_read_completed) {
1060                 memcpy(val, vcpu->mmio_data, bytes);
1061                 vcpu->mmio_read_completed = 0;
1062                 return X86EMUL_CONTINUE;
1063         }
1064
1065         gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1066
1067         /* For APIC access vmexit */
1068         if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
1069                 goto mmio;
1070
1071         if (emulator_read_std(addr, val, bytes, vcpu)
1072                         == X86EMUL_CONTINUE)
1073                 return X86EMUL_CONTINUE;
1074         if (gpa == UNMAPPED_GVA)
1075                 return X86EMUL_PROPAGATE_FAULT;
1076
1077 mmio:
1078         /*
1079          * Is this MMIO handled locally?
1080          */
1081         mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1082         if (mmio_dev) {
1083                 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1084                 return X86EMUL_CONTINUE;
1085         }
1086
1087         vcpu->mmio_needed = 1;
1088         vcpu->mmio_phys_addr = gpa;
1089         vcpu->mmio_size = bytes;
1090         vcpu->mmio_is_write = 0;
1091
1092         return X86EMUL_UNHANDLEABLE;
1093 }
1094
1095 static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1096                                const void *val, int bytes)
1097 {
1098         int ret;
1099
1100         ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1101         if (ret < 0)
1102                 return 0;
1103         kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1104         return 1;
1105 }
1106
1107 static int emulator_write_emulated_onepage(unsigned long addr,
1108                                            const void *val,
1109                                            unsigned int bytes,
1110                                            struct kvm_vcpu *vcpu)
1111 {
1112         struct kvm_io_device *mmio_dev;
1113         gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1114
1115         if (gpa == UNMAPPED_GVA) {
1116                 kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
1117                 return X86EMUL_PROPAGATE_FAULT;
1118         }
1119
1120         /* For APIC access vmexit */
1121         if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
1122                 goto mmio;
1123
1124         if (emulator_write_phys(vcpu, gpa, val, bytes))
1125                 return X86EMUL_CONTINUE;
1126
1127 mmio:
1128         /*
1129          * Is this MMIO handled locally?
1130          */
1131         mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1132         if (mmio_dev) {
1133                 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1134                 return X86EMUL_CONTINUE;
1135         }
1136
1137         vcpu->mmio_needed = 1;
1138         vcpu->mmio_phys_addr = gpa;
1139         vcpu->mmio_size = bytes;
1140         vcpu->mmio_is_write = 1;
1141         memcpy(vcpu->mmio_data, val, bytes);
1142
1143         return X86EMUL_CONTINUE;
1144 }
1145
1146 int emulator_write_emulated(unsigned long addr,
1147                                    const void *val,
1148                                    unsigned int bytes,
1149                                    struct kvm_vcpu *vcpu)
1150 {
1151         /* Crossing a page boundary? */
1152         if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
1153                 int rc, now;
1154
1155                 now = -addr & ~PAGE_MASK;
1156                 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
1157                 if (rc != X86EMUL_CONTINUE)
1158                         return rc;
1159                 addr += now;
1160                 val += now;
1161                 bytes -= now;
1162         }
1163         return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
1164 }
1165 EXPORT_SYMBOL_GPL(emulator_write_emulated);
1166
1167 static int emulator_cmpxchg_emulated(unsigned long addr,
1168                                      const void *old,
1169                                      const void *new,
1170                                      unsigned int bytes,
1171                                      struct kvm_vcpu *vcpu)
1172 {
1173         static int reported;
1174
1175         if (!reported) {
1176                 reported = 1;
1177                 printk(KERN_WARNING "kvm: emulating exchange as write\n");
1178         }
1179         return emulator_write_emulated(addr, new, bytes, vcpu);
1180 }
1181
1182 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
1183 {
1184         return kvm_x86_ops->get_segment_base(vcpu, seg);
1185 }
1186
1187 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
1188 {
1189         return X86EMUL_CONTINUE;
1190 }
1191
1192 int emulate_clts(struct kvm_vcpu *vcpu)
1193 {
1194         kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS);
1195         return X86EMUL_CONTINUE;
1196 }
1197
1198 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
1199 {
1200         struct kvm_vcpu *vcpu = ctxt->vcpu;
1201
1202         switch (dr) {
1203         case 0 ... 3:
1204                 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1205                 return X86EMUL_CONTINUE;
1206         default:
1207                 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
1208                 return X86EMUL_UNHANDLEABLE;
1209         }
1210 }
1211
1212 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1213 {
1214         unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
1215         int exception;
1216
1217         kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
1218         if (exception) {
1219                 /* FIXME: better handling */
1220                 return X86EMUL_UNHANDLEABLE;
1221         }
1222         return X86EMUL_CONTINUE;
1223 }
1224
1225 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1226 {
1227         static int reported;
1228         u8 opcodes[4];
1229         unsigned long rip = vcpu->rip;
1230         unsigned long rip_linear;
1231
1232         rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
1233
1234         if (reported)
1235                 return;
1236
1237         emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
1238
1239         printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
1240                context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1241         reported = 1;
1242 }
1243 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1244
1245 struct x86_emulate_ops emulate_ops = {
1246         .read_std            = emulator_read_std,
1247         .write_std           = emulator_write_std,
1248         .read_emulated       = emulator_read_emulated,
1249         .write_emulated      = emulator_write_emulated,
1250         .cmpxchg_emulated    = emulator_cmpxchg_emulated,
1251 };
1252
1253 int emulate_instruction(struct kvm_vcpu *vcpu,
1254                         struct kvm_run *run,
1255                         unsigned long cr2,
1256                         u16 error_code,
1257                         int no_decode)
1258 {
1259         int r;
1260
1261         vcpu->mmio_fault_cr2 = cr2;
1262         kvm_x86_ops->cache_regs(vcpu);
1263
1264         vcpu->mmio_is_write = 0;
1265         vcpu->pio.string = 0;
1266
1267         if (!no_decode) {
1268                 int cs_db, cs_l;
1269                 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
1270
1271                 vcpu->emulate_ctxt.vcpu = vcpu;
1272                 vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
1273                 vcpu->emulate_ctxt.cr2 = cr2;
1274                 vcpu->emulate_ctxt.mode =
1275                         (vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM)
1276                         ? X86EMUL_MODE_REAL : cs_l
1277                         ? X86EMUL_MODE_PROT64 : cs_db
1278                         ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
1279
1280                 if (vcpu->emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
1281                         vcpu->emulate_ctxt.cs_base = 0;
1282                         vcpu->emulate_ctxt.ds_base = 0;
1283                         vcpu->emulate_ctxt.es_base = 0;
1284                         vcpu->emulate_ctxt.ss_base = 0;
1285                 } else {
1286                         vcpu->emulate_ctxt.cs_base =
1287                                         get_segment_base(vcpu, VCPU_SREG_CS);
1288                         vcpu->emulate_ctxt.ds_base =
1289                                         get_segment_base(vcpu, VCPU_SREG_DS);
1290                         vcpu->emulate_ctxt.es_base =
1291                                         get_segment_base(vcpu, VCPU_SREG_ES);
1292                         vcpu->emulate_ctxt.ss_base =
1293                                         get_segment_base(vcpu, VCPU_SREG_SS);
1294                 }
1295
1296                 vcpu->emulate_ctxt.gs_base =
1297                                         get_segment_base(vcpu, VCPU_SREG_GS);
1298                 vcpu->emulate_ctxt.fs_base =
1299                                         get_segment_base(vcpu, VCPU_SREG_FS);
1300
1301                 r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops);
1302                 if (r)  {
1303                         if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1304                                 return EMULATE_DONE;
1305                         return EMULATE_FAIL;
1306                 }
1307         }
1308
1309         r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops);
1310
1311         if (vcpu->pio.string)
1312                 return EMULATE_DO_MMIO;
1313
1314         if ((r || vcpu->mmio_is_write) && run) {
1315                 run->exit_reason = KVM_EXIT_MMIO;
1316                 run->mmio.phys_addr = vcpu->mmio_phys_addr;
1317                 memcpy(run->mmio.data, vcpu->mmio_data, 8);
1318                 run->mmio.len = vcpu->mmio_size;
1319                 run->mmio.is_write = vcpu->mmio_is_write;
1320         }
1321
1322         if (r) {
1323                 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1324                         return EMULATE_DONE;
1325                 if (!vcpu->mmio_needed) {
1326                         kvm_report_emulation_failure(vcpu, "mmio");
1327                         return EMULATE_FAIL;
1328                 }
1329                 return EMULATE_DO_MMIO;
1330         }
1331
1332         kvm_x86_ops->decache_regs(vcpu);
1333         kvm_x86_ops->set_rflags(vcpu, vcpu->emulate_ctxt.eflags);
1334
1335         if (vcpu->mmio_is_write) {
1336                 vcpu->mmio_needed = 0;
1337                 return EMULATE_DO_MMIO;
1338         }
1339
1340         return EMULATE_DONE;
1341 }
1342 EXPORT_SYMBOL_GPL(emulate_instruction);
1343
1344 static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
1345 {
1346         int i;
1347
1348         for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
1349                 if (vcpu->pio.guest_pages[i]) {
1350                         kvm_release_page(vcpu->pio.guest_pages[i]);
1351                         vcpu->pio.guest_pages[i] = NULL;
1352                 }
1353 }
1354
1355 static int pio_copy_data(struct kvm_vcpu *vcpu)
1356 {
1357         void *p = vcpu->pio_data;
1358         void *q;
1359         unsigned bytes;
1360         int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1361
1362         q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1363                  PAGE_KERNEL);
1364         if (!q) {
1365                 free_pio_guest_pages(vcpu);
1366                 return -ENOMEM;
1367         }
1368         q += vcpu->pio.guest_page_offset;
1369         bytes = vcpu->pio.size * vcpu->pio.cur_count;
1370         if (vcpu->pio.in)
1371                 memcpy(q, p, bytes);
1372         else
1373                 memcpy(p, q, bytes);
1374         q -= vcpu->pio.guest_page_offset;
1375         vunmap(q);
1376         free_pio_guest_pages(vcpu);
1377         return 0;
1378 }
1379
1380 int complete_pio(struct kvm_vcpu *vcpu)
1381 {
1382         struct kvm_pio_request *io = &vcpu->pio;
1383         long delta;
1384         int r;
1385
1386         kvm_x86_ops->cache_regs(vcpu);
1387
1388         if (!io->string) {
1389                 if (io->in)
1390                         memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
1391                                io->size);
1392         } else {
1393                 if (io->in) {
1394                         r = pio_copy_data(vcpu);
1395                         if (r) {
1396                                 kvm_x86_ops->cache_regs(vcpu);
1397                                 return r;
1398                         }
1399                 }
1400
1401                 delta = 1;
1402                 if (io->rep) {
1403                         delta *= io->cur_count;
1404                         /*
1405                          * The size of the register should really depend on
1406                          * current address size.
1407                          */
1408                         vcpu->regs[VCPU_REGS_RCX] -= delta;
1409                 }
1410                 if (io->down)
1411                         delta = -delta;
1412                 delta *= io->size;
1413                 if (io->in)
1414                         vcpu->regs[VCPU_REGS_RDI] += delta;
1415                 else
1416                         vcpu->regs[VCPU_REGS_RSI] += delta;
1417         }
1418
1419         kvm_x86_ops->decache_regs(vcpu);
1420
1421         io->count -= io->cur_count;
1422         io->cur_count = 0;
1423
1424         return 0;
1425 }
1426
1427 static void kernel_pio(struct kvm_io_device *pio_dev,
1428                        struct kvm_vcpu *vcpu,
1429                        void *pd)
1430 {
1431         /* TODO: String I/O for in kernel device */
1432
1433         mutex_lock(&vcpu->kvm->lock);
1434         if (vcpu->pio.in)
1435                 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1436                                   vcpu->pio.size,
1437                                   pd);
1438         else
1439                 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1440                                    vcpu->pio.size,
1441                                    pd);
1442         mutex_unlock(&vcpu->kvm->lock);
1443 }
1444
1445 static void pio_string_write(struct kvm_io_device *pio_dev,
1446                              struct kvm_vcpu *vcpu)
1447 {
1448         struct kvm_pio_request *io = &vcpu->pio;
1449         void *pd = vcpu->pio_data;
1450         int i;
1451
1452         mutex_lock(&vcpu->kvm->lock);
1453         for (i = 0; i < io->cur_count; i++) {
1454                 kvm_iodevice_write(pio_dev, io->port,
1455                                    io->size,
1456                                    pd);
1457                 pd += io->size;
1458         }
1459         mutex_unlock(&vcpu->kvm->lock);
1460 }
1461
1462 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1463                                                gpa_t addr)
1464 {
1465         return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
1466 }
1467
1468 int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1469                   int size, unsigned port)
1470 {
1471         struct kvm_io_device *pio_dev;
1472
1473         vcpu->run->exit_reason = KVM_EXIT_IO;
1474         vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1475         vcpu->run->io.size = vcpu->pio.size = size;
1476         vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1477         vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
1478         vcpu->run->io.port = vcpu->pio.port = port;
1479         vcpu->pio.in = in;
1480         vcpu->pio.string = 0;
1481         vcpu->pio.down = 0;
1482         vcpu->pio.guest_page_offset = 0;
1483         vcpu->pio.rep = 0;
1484
1485         kvm_x86_ops->cache_regs(vcpu);
1486         memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1487         kvm_x86_ops->decache_regs(vcpu);
1488
1489         kvm_x86_ops->skip_emulated_instruction(vcpu);
1490
1491         pio_dev = vcpu_find_pio_dev(vcpu, port);
1492         if (pio_dev) {
1493                 kernel_pio(pio_dev, vcpu, vcpu->pio_data);
1494                 complete_pio(vcpu);
1495                 return 1;
1496         }
1497         return 0;
1498 }
1499 EXPORT_SYMBOL_GPL(kvm_emulate_pio);
1500
1501 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1502                   int size, unsigned long count, int down,
1503                   gva_t address, int rep, unsigned port)
1504 {
1505         unsigned now, in_page;
1506         int i, ret = 0;
1507         int nr_pages = 1;
1508         struct page *page;
1509         struct kvm_io_device *pio_dev;
1510
1511         vcpu->run->exit_reason = KVM_EXIT_IO;
1512         vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1513         vcpu->run->io.size = vcpu->pio.size = size;
1514         vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1515         vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
1516         vcpu->run->io.port = vcpu->pio.port = port;
1517         vcpu->pio.in = in;
1518         vcpu->pio.string = 1;
1519         vcpu->pio.down = down;
1520         vcpu->pio.guest_page_offset = offset_in_page(address);
1521         vcpu->pio.rep = rep;
1522
1523         if (!count) {
1524                 kvm_x86_ops->skip_emulated_instruction(vcpu);
1525                 return 1;
1526         }
1527
1528         if (!down)
1529                 in_page = PAGE_SIZE - offset_in_page(address);
1530         else
1531                 in_page = offset_in_page(address) + size;
1532         now = min(count, (unsigned long)in_page / size);
1533         if (!now) {
1534                 /*
1535                  * String I/O straddles page boundary.  Pin two guest pages
1536                  * so that we satisfy atomicity constraints.  Do just one
1537                  * transaction to avoid complexity.
1538                  */
1539                 nr_pages = 2;
1540                 now = 1;
1541         }
1542         if (down) {
1543                 /*
1544                  * String I/O in reverse.  Yuck.  Kill the guest, fix later.
1545                  */
1546                 pr_unimpl(vcpu, "guest string pio down\n");
1547                 inject_gp(vcpu);
1548                 return 1;
1549         }
1550         vcpu->run->io.count = now;
1551         vcpu->pio.cur_count = now;
1552
1553         if (vcpu->pio.cur_count == vcpu->pio.count)
1554                 kvm_x86_ops->skip_emulated_instruction(vcpu);
1555
1556         for (i = 0; i < nr_pages; ++i) {
1557                 mutex_lock(&vcpu->kvm->lock);
1558                 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
1559                 vcpu->pio.guest_pages[i] = page;
1560                 mutex_unlock(&vcpu->kvm->lock);
1561                 if (!page) {
1562                         inject_gp(vcpu);
1563                         free_pio_guest_pages(vcpu);
1564                         return 1;
1565                 }
1566         }
1567
1568         pio_dev = vcpu_find_pio_dev(vcpu, port);
1569         if (!vcpu->pio.in) {
1570                 /* string PIO write */
1571                 ret = pio_copy_data(vcpu);
1572                 if (ret >= 0 && pio_dev) {
1573                         pio_string_write(pio_dev, vcpu);
1574                         complete_pio(vcpu);
1575                         if (vcpu->pio.count == 0)
1576                                 ret = 1;
1577                 }
1578         } else if (pio_dev)
1579                 pr_unimpl(vcpu, "no string pio read support yet, "
1580                        "port %x size %d count %ld\n",
1581                         port, size, count);
1582
1583         return ret;
1584 }
1585 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
1586
1587 __init void kvm_arch_init(void)
1588 {
1589         kvm_init_msr_list();
1590 }