2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_validity", VCPU_STAT(exit_validity) },
35 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
36 { "exit_external_request", VCPU_STAT(exit_external_request) },
37 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
38 { "exit_instruction", VCPU_STAT(exit_instruction) },
39 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
40 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
41 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
42 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
43 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
44 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
45 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
46 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
47 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
48 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
49 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
50 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
51 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
52 { "instruction_spx", VCPU_STAT(instruction_spx) },
53 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
54 { "instruction_stap", VCPU_STAT(instruction_stap) },
55 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
56 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
57 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
58 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
59 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
64 /* Section: not file related */
65 void kvm_arch_hardware_enable(void *garbage)
67 /* every s390 is virtualization enabled ;-) */
70 void kvm_arch_hardware_disable(void *garbage)
74 void decache_vcpus_on_cpu(int cpu)
78 int kvm_arch_hardware_setup(void)
83 void kvm_arch_hardware_unsetup(void)
87 void kvm_arch_check_processor_compat(void *rtn)
91 int kvm_arch_init(void *opaque)
96 void kvm_arch_exit(void)
100 /* Section: device related */
101 long kvm_arch_dev_ioctl(struct file *filp,
102 unsigned int ioctl, unsigned long arg)
104 if (ioctl == KVM_S390_ENABLE_SIE)
105 return s390_enable_sie();
109 int kvm_dev_ioctl_check_extension(long ext)
114 /* Section: vm related */
116 * Get (and clear) the dirty memory log for a memory slot.
118 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
119 struct kvm_dirty_log *log)
124 long kvm_arch_vm_ioctl(struct file *filp,
125 unsigned int ioctl, unsigned long arg)
127 struct kvm *kvm = filp->private_data;
128 void __user *argp = (void __user *)arg;
132 case KVM_S390_INTERRUPT: {
133 struct kvm_s390_interrupt s390int;
136 if (copy_from_user(&s390int, argp, sizeof(s390int)))
138 r = kvm_s390_inject_vm(kvm, &s390int);
148 struct kvm *kvm_arch_create_vm(void)
154 rc = s390_enable_sie();
159 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
163 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
167 sprintf(debug_name, "kvm-%u", current->pid);
169 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
173 spin_lock_init(&kvm->arch.float_int.lock);
174 INIT_LIST_HEAD(&kvm->arch.float_int.list);
176 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
177 VM_EVENT(kvm, 3, "%s", "vm created");
179 try_module_get(THIS_MODULE);
183 free_page((unsigned long)(kvm->arch.sca));
190 void kvm_arch_destroy_vm(struct kvm *kvm)
192 debug_unregister(kvm->arch.dbf);
193 free_page((unsigned long)(kvm->arch.sca));
195 module_put(THIS_MODULE);
198 /* Section: vcpu related */
199 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
204 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
206 /* kvm common code refers to this, but does'nt call it */
210 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
212 save_fp_regs(&vcpu->arch.host_fpregs);
213 save_access_regs(vcpu->arch.host_acrs);
214 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
215 restore_fp_regs(&vcpu->arch.guest_fpregs);
216 restore_access_regs(vcpu->arch.guest_acrs);
218 if (signal_pending(current))
219 atomic_set_mask(CPUSTAT_STOP_INT,
220 &vcpu->arch.sie_block->cpuflags);
223 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
225 save_fp_regs(&vcpu->arch.guest_fpregs);
226 save_access_regs(vcpu->arch.guest_acrs);
227 restore_fp_regs(&vcpu->arch.host_fpregs);
228 restore_access_regs(vcpu->arch.host_acrs);
231 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
233 /* this equals initial cpu reset in pop, but we don't switch to ESA */
234 vcpu->arch.sie_block->gpsw.mask = 0UL;
235 vcpu->arch.sie_block->gpsw.addr = 0UL;
236 vcpu->arch.sie_block->prefix = 0UL;
237 vcpu->arch.sie_block->ihcpu = 0xffff;
238 vcpu->arch.sie_block->cputm = 0UL;
239 vcpu->arch.sie_block->ckc = 0UL;
240 vcpu->arch.sie_block->todpr = 0;
241 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
242 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
243 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
244 vcpu->arch.guest_fpregs.fpc = 0;
245 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
246 vcpu->arch.sie_block->gbea = 1;
249 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
251 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
252 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
253 vcpu->arch.sie_block->gmsor = 0x000000000000;
254 vcpu->arch.sie_block->ecb = 2;
255 vcpu->arch.sie_block->eca = 0xC1002001U;
256 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
257 (unsigned long) vcpu);
258 get_cpu_id(&vcpu->arch.cpu_id);
259 vcpu->arch.cpu_id.version = 0xfe;
263 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
266 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
272 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
274 if (!vcpu->arch.sie_block)
277 vcpu->arch.sie_block->icpua = id;
278 BUG_ON(!kvm->arch.sca);
279 BUG_ON(kvm->arch.sca->cpu[id].sda);
280 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
281 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
282 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
284 spin_lock_init(&vcpu->arch.local_int.lock);
285 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
286 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
287 spin_lock_bh(&kvm->arch.float_int.lock);
288 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
289 init_waitqueue_head(&vcpu->arch.local_int.wq);
290 spin_unlock_bh(&kvm->arch.float_int.lock);
292 rc = kvm_vcpu_init(vcpu, kvm, id);
295 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
296 vcpu->arch.sie_block);
298 try_module_get(THIS_MODULE);
307 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
309 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
310 free_page((unsigned long)(vcpu->arch.sie_block));
312 module_put(THIS_MODULE);
315 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
317 /* kvm common code refers to this, but never calls it */
322 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
325 kvm_s390_vcpu_initial_reset(vcpu);
330 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
333 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
338 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
341 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
346 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
347 struct kvm_sregs *sregs)
350 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
351 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
356 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
357 struct kvm_sregs *sregs)
360 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
361 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
366 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
369 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
370 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
375 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
378 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
379 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
384 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
389 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
392 vcpu->arch.sie_block->gpsw = psw;
397 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
398 struct kvm_translation *tr)
400 return -EINVAL; /* not implemented yet */
403 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
404 struct kvm_debug_guest *dbg)
406 return -EINVAL; /* not implemented yet */
409 static void __vcpu_run(struct kvm_vcpu *vcpu)
411 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
416 vcpu->arch.sie_block->icptcode = 0;
420 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
421 atomic_read(&vcpu->arch.sie_block->cpuflags));
422 sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
423 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
424 vcpu->arch.sie_block->icptcode);
429 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
432 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
439 if (vcpu->sigset_active)
440 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
442 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
444 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
446 switch (kvm_run->exit_reason) {
447 case KVM_EXIT_S390_SIEIC:
448 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
449 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
451 case KVM_EXIT_UNKNOWN:
452 case KVM_EXIT_S390_RESET:
461 kvm_s390_deliver_pending_interrupts(vcpu);
463 rc = kvm_handle_sie_intercept(vcpu);
464 } while (!signal_pending(current) && !rc);
466 if (signal_pending(current) && !rc)
469 if (rc == -ENOTSUPP) {
470 /* intercept cannot be handled in-kernel, prepare kvm-run */
471 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
472 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
473 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
474 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
475 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
476 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
480 if (rc == -EREMOTE) {
481 /* intercept was handled, but userspace support is needed
482 * kvm_run has been prepared by the handler */
486 if (vcpu->sigset_active)
487 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
491 vcpu->stat.exit_userspace++;
495 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
496 unsigned long n, int prefix)
499 return copy_to_guest(vcpu, guestdest, from, n);
501 return copy_to_guest_absolute(vcpu, guestdest, from, n);
505 * store status at address
506 * we use have two special cases:
507 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
508 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
510 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
512 const unsigned char archmode = 1;
515 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
516 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
518 addr = SAVE_AREA_BASE;
520 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
521 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
523 addr = SAVE_AREA_BASE;
528 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
529 vcpu->arch.guest_fpregs.fprs, 128, prefix))
532 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
533 vcpu->arch.guest_gprs, 128, prefix))
536 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
537 &vcpu->arch.sie_block->gpsw, 16, prefix))
540 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
541 &vcpu->arch.sie_block->prefix, 4, prefix))
544 if (__guestcopy(vcpu,
545 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
546 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
549 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
550 &vcpu->arch.sie_block->todpr, 4, prefix))
553 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
554 &vcpu->arch.sie_block->cputm, 8, prefix))
557 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
558 &vcpu->arch.sie_block->ckc, 8, prefix))
561 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
562 &vcpu->arch.guest_acrs, 64, prefix))
565 if (__guestcopy(vcpu,
566 addr + offsetof(struct save_area_s390x, ctrl_regs),
567 &vcpu->arch.sie_block->gcr, 128, prefix))
572 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
577 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
582 long kvm_arch_vcpu_ioctl(struct file *filp,
583 unsigned int ioctl, unsigned long arg)
585 struct kvm_vcpu *vcpu = filp->private_data;
586 void __user *argp = (void __user *)arg;
589 case KVM_S390_INTERRUPT: {
590 struct kvm_s390_interrupt s390int;
592 if (copy_from_user(&s390int, argp, sizeof(s390int)))
594 return kvm_s390_inject_vcpu(vcpu, &s390int);
596 case KVM_S390_STORE_STATUS:
597 return kvm_s390_vcpu_store_status(vcpu, arg);
598 case KVM_S390_SET_INITIAL_PSW: {
601 if (copy_from_user(&psw, argp, sizeof(psw)))
603 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
605 case KVM_S390_INITIAL_RESET:
606 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
613 /* Section: memory related */
614 int kvm_arch_set_memory_region(struct kvm *kvm,
615 struct kvm_userspace_memory_region *mem,
616 struct kvm_memory_slot old,
619 /* A few sanity checks. We can have exactly one memory slot which has
620 to start at guest virtual zero and which has to be located at a
621 page boundary in userland and which has to end at a page boundary.
622 The memory in userland is ok to be fragmented into various different
623 vmas. It is okay to mmap() and munmap() stuff in this slot after
624 doing this call at any time */
629 if (mem->guest_phys_addr)
632 if (mem->userspace_addr & (PAGE_SIZE - 1))
635 if (mem->memory_size & (PAGE_SIZE - 1))
638 kvm->arch.guest_origin = mem->userspace_addr;
639 kvm->arch.guest_memsize = mem->memory_size;
641 /* FIXME: we do want to interrupt running CPUs and update their memory
642 configuration now to avoid race conditions. But hey, changing the
643 memory layout while virtual CPUs are running is usually bad
644 programming practice. */
649 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
654 static int __init kvm_s390_init(void)
656 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
659 static void __exit kvm_s390_exit(void)
664 module_init(kvm_s390_init);
665 module_exit(kvm_s390_exit);