]> err.no Git - linux-2.6/blob - drivers/kvm/x86.c
KVM: Portability: Move kvm_get/set_msr[_common] to x86.c
[linux-2.6] / drivers / kvm / x86.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * derived from drivers/kvm/kvm_main.c
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  *
8  * Authors:
9  *   Avi Kivity   <avi@qumranet.com>
10  *   Yaniv Kamay  <yaniv@qumranet.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2.  See
13  * the COPYING file in the top-level directory.
14  *
15  */
16
17 #include "kvm.h"
18 #include "x86.h"
19 #include "segment_descriptor.h"
20 #include "irq.h"
21
22 #include <linux/kvm.h>
23 #include <linux/fs.h>
24 #include <linux/vmalloc.h>
25 #include <linux/module.h>
26
27 #include <asm/uaccess.h>
28
29 #define MAX_IO_MSRS 256
30 #define CR0_RESERVED_BITS                                               \
31         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
32                           | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
33                           | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
34 #define CR4_RESERVED_BITS                                               \
35         (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
36                           | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
37                           | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
38                           | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
39
40 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
41 #define EFER_RESERVED_BITS 0xfffffffffffff2fe
42
43 unsigned long segment_base(u16 selector)
44 {
45         struct descriptor_table gdt;
46         struct segment_descriptor *d;
47         unsigned long table_base;
48         unsigned long v;
49
50         if (selector == 0)
51                 return 0;
52
53         asm("sgdt %0" : "=m"(gdt));
54         table_base = gdt.base;
55
56         if (selector & 4) {           /* from ldt */
57                 u16 ldt_selector;
58
59                 asm("sldt %0" : "=g"(ldt_selector));
60                 table_base = segment_base(ldt_selector);
61         }
62         d = (struct segment_descriptor *)(table_base + (selector & ~7));
63         v = d->base_low | ((unsigned long)d->base_mid << 16) |
64                 ((unsigned long)d->base_high << 24);
65 #ifdef CONFIG_X86_64
66         if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
67                 v |= ((unsigned long) \
68                       ((struct segment_descriptor_64 *)d)->base_higher) << 32;
69 #endif
70         return v;
71 }
72 EXPORT_SYMBOL_GPL(segment_base);
73
74 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
75 {
76         if (irqchip_in_kernel(vcpu->kvm))
77                 return vcpu->apic_base;
78         else
79                 return vcpu->apic_base;
80 }
81 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
82
83 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
84 {
85         /* TODO: reserve bits check */
86         if (irqchip_in_kernel(vcpu->kvm))
87                 kvm_lapic_set_base(vcpu, data);
88         else
89                 vcpu->apic_base = data;
90 }
91 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
92
93 static void inject_gp(struct kvm_vcpu *vcpu)
94 {
95         kvm_x86_ops->inject_gp(vcpu, 0);
96 }
97
98 /*
99  * Load the pae pdptrs.  Return true is they are all valid.
100  */
101 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
102 {
103         gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
104         unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
105         int i;
106         int ret;
107         u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
108
109         mutex_lock(&vcpu->kvm->lock);
110         ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
111                                   offset * sizeof(u64), sizeof(pdpte));
112         if (ret < 0) {
113                 ret = 0;
114                 goto out;
115         }
116         for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
117                 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
118                         ret = 0;
119                         goto out;
120                 }
121         }
122         ret = 1;
123
124         memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
125 out:
126         mutex_unlock(&vcpu->kvm->lock);
127
128         return ret;
129 }
130
131 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
132 {
133         if (cr0 & CR0_RESERVED_BITS) {
134                 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
135                        cr0, vcpu->cr0);
136                 inject_gp(vcpu);
137                 return;
138         }
139
140         if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
141                 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
142                 inject_gp(vcpu);
143                 return;
144         }
145
146         if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
147                 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
148                        "and a clear PE flag\n");
149                 inject_gp(vcpu);
150                 return;
151         }
152
153         if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
154 #ifdef CONFIG_X86_64
155                 if ((vcpu->shadow_efer & EFER_LME)) {
156                         int cs_db, cs_l;
157
158                         if (!is_pae(vcpu)) {
159                                 printk(KERN_DEBUG "set_cr0: #GP, start paging "
160                                        "in long mode while PAE is disabled\n");
161                                 inject_gp(vcpu);
162                                 return;
163                         }
164                         kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
165                         if (cs_l) {
166                                 printk(KERN_DEBUG "set_cr0: #GP, start paging "
167                                        "in long mode while CS.L == 1\n");
168                                 inject_gp(vcpu);
169                                 return;
170
171                         }
172                 } else
173 #endif
174                 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
175                         printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
176                                "reserved bits\n");
177                         inject_gp(vcpu);
178                         return;
179                 }
180
181         }
182
183         kvm_x86_ops->set_cr0(vcpu, cr0);
184         vcpu->cr0 = cr0;
185
186         mutex_lock(&vcpu->kvm->lock);
187         kvm_mmu_reset_context(vcpu);
188         mutex_unlock(&vcpu->kvm->lock);
189         return;
190 }
191 EXPORT_SYMBOL_GPL(set_cr0);
192
193 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
194 {
195         set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
196 }
197 EXPORT_SYMBOL_GPL(lmsw);
198
199 void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
200 {
201         if (cr4 & CR4_RESERVED_BITS) {
202                 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
203                 inject_gp(vcpu);
204                 return;
205         }
206
207         if (is_long_mode(vcpu)) {
208                 if (!(cr4 & X86_CR4_PAE)) {
209                         printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
210                                "in long mode\n");
211                         inject_gp(vcpu);
212                         return;
213                 }
214         } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
215                    && !load_pdptrs(vcpu, vcpu->cr3)) {
216                 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
217                 inject_gp(vcpu);
218                 return;
219         }
220
221         if (cr4 & X86_CR4_VMXE) {
222                 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
223                 inject_gp(vcpu);
224                 return;
225         }
226         kvm_x86_ops->set_cr4(vcpu, cr4);
227         vcpu->cr4 = cr4;
228         mutex_lock(&vcpu->kvm->lock);
229         kvm_mmu_reset_context(vcpu);
230         mutex_unlock(&vcpu->kvm->lock);
231 }
232 EXPORT_SYMBOL_GPL(set_cr4);
233
234 void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
235 {
236         if (is_long_mode(vcpu)) {
237                 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
238                         printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
239                         inject_gp(vcpu);
240                         return;
241                 }
242         } else {
243                 if (is_pae(vcpu)) {
244                         if (cr3 & CR3_PAE_RESERVED_BITS) {
245                                 printk(KERN_DEBUG
246                                        "set_cr3: #GP, reserved bits\n");
247                                 inject_gp(vcpu);
248                                 return;
249                         }
250                         if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
251                                 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
252                                        "reserved bits\n");
253                                 inject_gp(vcpu);
254                                 return;
255                         }
256                 }
257                 /*
258                  * We don't check reserved bits in nonpae mode, because
259                  * this isn't enforced, and VMware depends on this.
260                  */
261         }
262
263         mutex_lock(&vcpu->kvm->lock);
264         /*
265          * Does the new cr3 value map to physical memory? (Note, we
266          * catch an invalid cr3 even in real-mode, because it would
267          * cause trouble later on when we turn on paging anyway.)
268          *
269          * A real CPU would silently accept an invalid cr3 and would
270          * attempt to use it - with largely undefined (and often hard
271          * to debug) behavior on the guest side.
272          */
273         if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
274                 inject_gp(vcpu);
275         else {
276                 vcpu->cr3 = cr3;
277                 vcpu->mmu.new_cr3(vcpu);
278         }
279         mutex_unlock(&vcpu->kvm->lock);
280 }
281 EXPORT_SYMBOL_GPL(set_cr3);
282
283 void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
284 {
285         if (cr8 & CR8_RESERVED_BITS) {
286                 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
287                 inject_gp(vcpu);
288                 return;
289         }
290         if (irqchip_in_kernel(vcpu->kvm))
291                 kvm_lapic_set_tpr(vcpu, cr8);
292         else
293                 vcpu->cr8 = cr8;
294 }
295 EXPORT_SYMBOL_GPL(set_cr8);
296
297 unsigned long get_cr8(struct kvm_vcpu *vcpu)
298 {
299         if (irqchip_in_kernel(vcpu->kvm))
300                 return kvm_lapic_get_cr8(vcpu);
301         else
302                 return vcpu->cr8;
303 }
304 EXPORT_SYMBOL_GPL(get_cr8);
305
306 /*
307  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
308  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
309  *
310  * This list is modified at module load time to reflect the
311  * capabilities of the host cpu.
312  */
313 static u32 msrs_to_save[] = {
314         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
315         MSR_K6_STAR,
316 #ifdef CONFIG_X86_64
317         MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
318 #endif
319         MSR_IA32_TIME_STAMP_COUNTER,
320 };
321
322 static unsigned num_msrs_to_save;
323
324 static u32 emulated_msrs[] = {
325         MSR_IA32_MISC_ENABLE,
326 };
327
328 #ifdef CONFIG_X86_64
329
330 static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
331 {
332         if (efer & EFER_RESERVED_BITS) {
333                 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
334                        efer);
335                 inject_gp(vcpu);
336                 return;
337         }
338
339         if (is_paging(vcpu)
340             && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
341                 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
342                 inject_gp(vcpu);
343                 return;
344         }
345
346         kvm_x86_ops->set_efer(vcpu, efer);
347
348         efer &= ~EFER_LMA;
349         efer |= vcpu->shadow_efer & EFER_LMA;
350
351         vcpu->shadow_efer = efer;
352 }
353
354 #endif
355
356 /*
357  * Writes msr value into into the appropriate "register".
358  * Returns 0 on success, non-0 otherwise.
359  * Assumes vcpu_load() was already called.
360  */
361 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
362 {
363         return kvm_x86_ops->set_msr(vcpu, msr_index, data);
364 }
365
366 /*
367  * Adapt set_msr() to msr_io()'s calling convention
368  */
369 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
370 {
371         return kvm_set_msr(vcpu, index, *data);
372 }
373
374
375 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
376 {
377         switch (msr) {
378 #ifdef CONFIG_X86_64
379         case MSR_EFER:
380                 set_efer(vcpu, data);
381                 break;
382 #endif
383         case MSR_IA32_MC0_STATUS:
384                 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
385                        __FUNCTION__, data);
386                 break;
387         case MSR_IA32_MCG_STATUS:
388                 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
389                         __FUNCTION__, data);
390                 break;
391         case MSR_IA32_UCODE_REV:
392         case MSR_IA32_UCODE_WRITE:
393         case 0x200 ... 0x2ff: /* MTRRs */
394                 break;
395         case MSR_IA32_APICBASE:
396                 kvm_set_apic_base(vcpu, data);
397                 break;
398         case MSR_IA32_MISC_ENABLE:
399                 vcpu->ia32_misc_enable_msr = data;
400                 break;
401         default:
402                 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
403                 return 1;
404         }
405         return 0;
406 }
407 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
408
409
410 /*
411  * Reads an msr value (of 'msr_index') into 'pdata'.
412  * Returns 0 on success, non-0 otherwise.
413  * Assumes vcpu_load() was already called.
414  */
415 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
416 {
417         return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
418 }
419
420 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
421 {
422         u64 data;
423
424         switch (msr) {
425         case 0xc0010010: /* SYSCFG */
426         case 0xc0010015: /* HWCR */
427         case MSR_IA32_PLATFORM_ID:
428         case MSR_IA32_P5_MC_ADDR:
429         case MSR_IA32_P5_MC_TYPE:
430         case MSR_IA32_MC0_CTL:
431         case MSR_IA32_MCG_STATUS:
432         case MSR_IA32_MCG_CAP:
433         case MSR_IA32_MC0_MISC:
434         case MSR_IA32_MC0_MISC+4:
435         case MSR_IA32_MC0_MISC+8:
436         case MSR_IA32_MC0_MISC+12:
437         case MSR_IA32_MC0_MISC+16:
438         case MSR_IA32_UCODE_REV:
439         case MSR_IA32_PERF_STATUS:
440         case MSR_IA32_EBL_CR_POWERON:
441                 /* MTRR registers */
442         case 0xfe:
443         case 0x200 ... 0x2ff:
444                 data = 0;
445                 break;
446         case 0xcd: /* fsb frequency */
447                 data = 3;
448                 break;
449         case MSR_IA32_APICBASE:
450                 data = kvm_get_apic_base(vcpu);
451                 break;
452         case MSR_IA32_MISC_ENABLE:
453                 data = vcpu->ia32_misc_enable_msr;
454                 break;
455 #ifdef CONFIG_X86_64
456         case MSR_EFER:
457                 data = vcpu->shadow_efer;
458                 break;
459 #endif
460         default:
461                 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
462                 return 1;
463         }
464         *pdata = data;
465         return 0;
466 }
467 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
468
469 /*
470  * Read or write a bunch of msrs. All parameters are kernel addresses.
471  *
472  * @return number of msrs set successfully.
473  */
474 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
475                     struct kvm_msr_entry *entries,
476                     int (*do_msr)(struct kvm_vcpu *vcpu,
477                                   unsigned index, u64 *data))
478 {
479         int i;
480
481         vcpu_load(vcpu);
482
483         for (i = 0; i < msrs->nmsrs; ++i)
484                 if (do_msr(vcpu, entries[i].index, &entries[i].data))
485                         break;
486
487         vcpu_put(vcpu);
488
489         return i;
490 }
491
492 /*
493  * Read or write a bunch of msrs. Parameters are user addresses.
494  *
495  * @return number of msrs set successfully.
496  */
497 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
498                   int (*do_msr)(struct kvm_vcpu *vcpu,
499                                 unsigned index, u64 *data),
500                   int writeback)
501 {
502         struct kvm_msrs msrs;
503         struct kvm_msr_entry *entries;
504         int r, n;
505         unsigned size;
506
507         r = -EFAULT;
508         if (copy_from_user(&msrs, user_msrs, sizeof msrs))
509                 goto out;
510
511         r = -E2BIG;
512         if (msrs.nmsrs >= MAX_IO_MSRS)
513                 goto out;
514
515         r = -ENOMEM;
516         size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
517         entries = vmalloc(size);
518         if (!entries)
519                 goto out;
520
521         r = -EFAULT;
522         if (copy_from_user(entries, user_msrs->entries, size))
523                 goto out_free;
524
525         r = n = __msr_io(vcpu, &msrs, entries, do_msr);
526         if (r < 0)
527                 goto out_free;
528
529         r = -EFAULT;
530         if (writeback && copy_to_user(user_msrs->entries, entries, size))
531                 goto out_free;
532
533         r = n;
534
535 out_free:
536         vfree(entries);
537 out:
538         return r;
539 }
540
541 long kvm_arch_dev_ioctl(struct file *filp,
542                         unsigned int ioctl, unsigned long arg)
543 {
544         void __user *argp = (void __user *)arg;
545         long r;
546
547         switch (ioctl) {
548         case KVM_GET_MSR_INDEX_LIST: {
549                 struct kvm_msr_list __user *user_msr_list = argp;
550                 struct kvm_msr_list msr_list;
551                 unsigned n;
552
553                 r = -EFAULT;
554                 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
555                         goto out;
556                 n = msr_list.nmsrs;
557                 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
558                 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
559                         goto out;
560                 r = -E2BIG;
561                 if (n < num_msrs_to_save)
562                         goto out;
563                 r = -EFAULT;
564                 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
565                                  num_msrs_to_save * sizeof(u32)))
566                         goto out;
567                 if (copy_to_user(user_msr_list->indices
568                                  + num_msrs_to_save * sizeof(u32),
569                                  &emulated_msrs,
570                                  ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
571                         goto out;
572                 r = 0;
573                 break;
574         }
575         default:
576                 r = -EINVAL;
577         }
578 out:
579         return r;
580 }
581
582 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
583 {
584         kvm_x86_ops->vcpu_load(vcpu, cpu);
585 }
586
587 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
588 {
589         kvm_x86_ops->vcpu_put(vcpu);
590 }
591
592 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
593 {
594         u64 efer;
595         int i;
596         struct kvm_cpuid_entry *e, *entry;
597
598         rdmsrl(MSR_EFER, efer);
599         entry = NULL;
600         for (i = 0; i < vcpu->cpuid_nent; ++i) {
601                 e = &vcpu->cpuid_entries[i];
602                 if (e->function == 0x80000001) {
603                         entry = e;
604                         break;
605                 }
606         }
607         if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
608                 entry->edx &= ~(1 << 20);
609                 printk(KERN_INFO "kvm: guest NX capability removed\n");
610         }
611 }
612
613 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
614                                     struct kvm_cpuid *cpuid,
615                                     struct kvm_cpuid_entry __user *entries)
616 {
617         int r;
618
619         r = -E2BIG;
620         if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
621                 goto out;
622         r = -EFAULT;
623         if (copy_from_user(&vcpu->cpuid_entries, entries,
624                            cpuid->nent * sizeof(struct kvm_cpuid_entry)))
625                 goto out;
626         vcpu->cpuid_nent = cpuid->nent;
627         cpuid_fix_nx_cap(vcpu);
628         return 0;
629
630 out:
631         return r;
632 }
633
634 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
635                                     struct kvm_lapic_state *s)
636 {
637         vcpu_load(vcpu);
638         memcpy(s->regs, vcpu->apic->regs, sizeof *s);
639         vcpu_put(vcpu);
640
641         return 0;
642 }
643
644 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
645                                     struct kvm_lapic_state *s)
646 {
647         vcpu_load(vcpu);
648         memcpy(vcpu->apic->regs, s->regs, sizeof *s);
649         kvm_apic_post_state_restore(vcpu);
650         vcpu_put(vcpu);
651
652         return 0;
653 }
654
655 long kvm_arch_vcpu_ioctl(struct file *filp,
656                          unsigned int ioctl, unsigned long arg)
657 {
658         struct kvm_vcpu *vcpu = filp->private_data;
659         void __user *argp = (void __user *)arg;
660         int r;
661
662         switch (ioctl) {
663         case KVM_GET_LAPIC: {
664                 struct kvm_lapic_state lapic;
665
666                 memset(&lapic, 0, sizeof lapic);
667                 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
668                 if (r)
669                         goto out;
670                 r = -EFAULT;
671                 if (copy_to_user(argp, &lapic, sizeof lapic))
672                         goto out;
673                 r = 0;
674                 break;
675         }
676         case KVM_SET_LAPIC: {
677                 struct kvm_lapic_state lapic;
678
679                 r = -EFAULT;
680                 if (copy_from_user(&lapic, argp, sizeof lapic))
681                         goto out;
682                 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
683                 if (r)
684                         goto out;
685                 r = 0;
686                 break;
687         }
688         case KVM_SET_CPUID: {
689                 struct kvm_cpuid __user *cpuid_arg = argp;
690                 struct kvm_cpuid cpuid;
691
692                 r = -EFAULT;
693                 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
694                         goto out;
695                 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
696                 if (r)
697                         goto out;
698                 break;
699         }
700         case KVM_GET_MSRS:
701                 r = msr_io(vcpu, argp, kvm_get_msr, 1);
702                 break;
703         case KVM_SET_MSRS:
704                 r = msr_io(vcpu, argp, do_set_msr, 0);
705                 break;
706         default:
707                 r = -EINVAL;
708         }
709 out:
710         return r;
711 }
712
713 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
714 {
715         int ret;
716
717         if (addr > (unsigned int)(-3 * PAGE_SIZE))
718                 return -1;
719         ret = kvm_x86_ops->set_tss_addr(kvm, addr);
720         return ret;
721 }
722
723 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
724                                           u32 kvm_nr_mmu_pages)
725 {
726         if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
727                 return -EINVAL;
728
729         mutex_lock(&kvm->lock);
730
731         kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
732         kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
733
734         mutex_unlock(&kvm->lock);
735         return 0;
736 }
737
738 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
739 {
740         return kvm->n_alloc_mmu_pages;
741 }
742
743 /*
744  * Set a new alias region.  Aliases map a portion of physical memory into
745  * another portion.  This is useful for memory windows, for example the PC
746  * VGA region.
747  */
748 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
749                                          struct kvm_memory_alias *alias)
750 {
751         int r, n;
752         struct kvm_mem_alias *p;
753
754         r = -EINVAL;
755         /* General sanity checks */
756         if (alias->memory_size & (PAGE_SIZE - 1))
757                 goto out;
758         if (alias->guest_phys_addr & (PAGE_SIZE - 1))
759                 goto out;
760         if (alias->slot >= KVM_ALIAS_SLOTS)
761                 goto out;
762         if (alias->guest_phys_addr + alias->memory_size
763             < alias->guest_phys_addr)
764                 goto out;
765         if (alias->target_phys_addr + alias->memory_size
766             < alias->target_phys_addr)
767                 goto out;
768
769         mutex_lock(&kvm->lock);
770
771         p = &kvm->aliases[alias->slot];
772         p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
773         p->npages = alias->memory_size >> PAGE_SHIFT;
774         p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
775
776         for (n = KVM_ALIAS_SLOTS; n > 0; --n)
777                 if (kvm->aliases[n - 1].npages)
778                         break;
779         kvm->naliases = n;
780
781         kvm_mmu_zap_all(kvm);
782
783         mutex_unlock(&kvm->lock);
784
785         return 0;
786
787 out:
788         return r;
789 }
790
791 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
792 {
793         int r;
794
795         r = 0;
796         switch (chip->chip_id) {
797         case KVM_IRQCHIP_PIC_MASTER:
798                 memcpy(&chip->chip.pic,
799                         &pic_irqchip(kvm)->pics[0],
800                         sizeof(struct kvm_pic_state));
801                 break;
802         case KVM_IRQCHIP_PIC_SLAVE:
803                 memcpy(&chip->chip.pic,
804                         &pic_irqchip(kvm)->pics[1],
805                         sizeof(struct kvm_pic_state));
806                 break;
807         case KVM_IRQCHIP_IOAPIC:
808                 memcpy(&chip->chip.ioapic,
809                         ioapic_irqchip(kvm),
810                         sizeof(struct kvm_ioapic_state));
811                 break;
812         default:
813                 r = -EINVAL;
814                 break;
815         }
816         return r;
817 }
818
819 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
820 {
821         int r;
822
823         r = 0;
824         switch (chip->chip_id) {
825         case KVM_IRQCHIP_PIC_MASTER:
826                 memcpy(&pic_irqchip(kvm)->pics[0],
827                         &chip->chip.pic,
828                         sizeof(struct kvm_pic_state));
829                 break;
830         case KVM_IRQCHIP_PIC_SLAVE:
831                 memcpy(&pic_irqchip(kvm)->pics[1],
832                         &chip->chip.pic,
833                         sizeof(struct kvm_pic_state));
834                 break;
835         case KVM_IRQCHIP_IOAPIC:
836                 memcpy(ioapic_irqchip(kvm),
837                         &chip->chip.ioapic,
838                         sizeof(struct kvm_ioapic_state));
839                 break;
840         default:
841                 r = -EINVAL;
842                 break;
843         }
844         kvm_pic_update_irq(pic_irqchip(kvm));
845         return r;
846 }
847
848 long kvm_arch_vm_ioctl(struct file *filp,
849                        unsigned int ioctl, unsigned long arg)
850 {
851         struct kvm *kvm = filp->private_data;
852         void __user *argp = (void __user *)arg;
853         int r = -EINVAL;
854
855         switch (ioctl) {
856         case KVM_SET_TSS_ADDR:
857                 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
858                 if (r < 0)
859                         goto out;
860                 break;
861         case KVM_SET_MEMORY_REGION: {
862                 struct kvm_memory_region kvm_mem;
863                 struct kvm_userspace_memory_region kvm_userspace_mem;
864
865                 r = -EFAULT;
866                 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
867                         goto out;
868                 kvm_userspace_mem.slot = kvm_mem.slot;
869                 kvm_userspace_mem.flags = kvm_mem.flags;
870                 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
871                 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
872                 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
873                 if (r)
874                         goto out;
875                 break;
876         }
877         case KVM_SET_NR_MMU_PAGES:
878                 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
879                 if (r)
880                         goto out;
881                 break;
882         case KVM_GET_NR_MMU_PAGES:
883                 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
884                 break;
885         case KVM_SET_MEMORY_ALIAS: {
886                 struct kvm_memory_alias alias;
887
888                 r = -EFAULT;
889                 if (copy_from_user(&alias, argp, sizeof alias))
890                         goto out;
891                 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
892                 if (r)
893                         goto out;
894                 break;
895         }
896         case KVM_CREATE_IRQCHIP:
897                 r = -ENOMEM;
898                 kvm->vpic = kvm_create_pic(kvm);
899                 if (kvm->vpic) {
900                         r = kvm_ioapic_init(kvm);
901                         if (r) {
902                                 kfree(kvm->vpic);
903                                 kvm->vpic = NULL;
904                                 goto out;
905                         }
906                 } else
907                         goto out;
908                 break;
909         case KVM_IRQ_LINE: {
910                 struct kvm_irq_level irq_event;
911
912                 r = -EFAULT;
913                 if (copy_from_user(&irq_event, argp, sizeof irq_event))
914                         goto out;
915                 if (irqchip_in_kernel(kvm)) {
916                         mutex_lock(&kvm->lock);
917                         if (irq_event.irq < 16)
918                                 kvm_pic_set_irq(pic_irqchip(kvm),
919                                         irq_event.irq,
920                                         irq_event.level);
921                         kvm_ioapic_set_irq(kvm->vioapic,
922                                         irq_event.irq,
923                                         irq_event.level);
924                         mutex_unlock(&kvm->lock);
925                         r = 0;
926                 }
927                 break;
928         }
929         case KVM_GET_IRQCHIP: {
930                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
931                 struct kvm_irqchip chip;
932
933                 r = -EFAULT;
934                 if (copy_from_user(&chip, argp, sizeof chip))
935                         goto out;
936                 r = -ENXIO;
937                 if (!irqchip_in_kernel(kvm))
938                         goto out;
939                 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
940                 if (r)
941                         goto out;
942                 r = -EFAULT;
943                 if (copy_to_user(argp, &chip, sizeof chip))
944                         goto out;
945                 r = 0;
946                 break;
947         }
948         case KVM_SET_IRQCHIP: {
949                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
950                 struct kvm_irqchip chip;
951
952                 r = -EFAULT;
953                 if (copy_from_user(&chip, argp, sizeof chip))
954                         goto out;
955                 r = -ENXIO;
956                 if (!irqchip_in_kernel(kvm))
957                         goto out;
958                 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
959                 if (r)
960                         goto out;
961                 r = 0;
962                 break;
963         }
964         default:
965                 ;
966         }
967 out:
968         return r;
969 }
970
971 static __init void kvm_init_msr_list(void)
972 {
973         u32 dummy[2];
974         unsigned i, j;
975
976         for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
977                 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
978                         continue;
979                 if (j < i)
980                         msrs_to_save[j] = msrs_to_save[i];
981                 j++;
982         }
983         num_msrs_to_save = j;
984 }
985
986 __init void kvm_arch_init(void)
987 {
988         kvm_init_msr_list();
989 }