X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Flguest%2Finterrupts_and_traps.c;h=a1039068f95c378cd85b2d6ff2f0f6729c003db2;hb=4fb8af10d0fd09372d52966b76922b9e82bbc950;hp=2b66f79c208b519d19937af625ae64a8b8fc70a5;hpb=37e58df30063e229ee5157f9d1c1fa1d749917c2;p=linux-2.6 diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 2b66f79c20..a1039068f9 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi) /* We need a helper to "push" a value onto the Guest's stack, since that's a * big part of what delivering an interrupt does. */ -static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) +static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) { /* Stack grows upwards: move stack then write value. */ *gstack -= 4; - lgwrite(lg, *gstack, u32, val); + lgwrite(cpu, *gstack, u32, val); } /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or @@ -60,7 +60,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) * We set up the stack just like the CPU does for a real interrupt, so it's * identical for the Guest (and the standard "iret" instruction will undo * it). */ -static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) +static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) { unsigned long gstack, origstack; u32 eflags, ss, irq_enable; @@ -69,59 +69,59 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) /* There are two cases for interrupts: one where the Guest is already * in the kernel, and a more complex one where the Guest is in * userspace. We check the privilege level to find out. */ - if ((lg->regs->ss&0x3) != GUEST_PL) { + if ((cpu->regs->ss&0x3) != GUEST_PL) { /* The Guest told us their kernel stack with the SET_STACK * hypercall: both the virtual address and the segment */ - virtstack = lg->esp1; - ss = lg->ss1; + virtstack = cpu->esp1; + ss = cpu->ss1; - origstack = gstack = guest_pa(lg, virtstack); + origstack = gstack = guest_pa(cpu, virtstack); /* We push the old stack segment and pointer onto the new * stack: when the Guest does an "iret" back from the interrupt * handler the CPU will notice they're dropping privilege * levels and expect these here. */ - push_guest_stack(lg, &gstack, lg->regs->ss); - push_guest_stack(lg, &gstack, lg->regs->esp); + push_guest_stack(cpu, &gstack, cpu->regs->ss); + push_guest_stack(cpu, &gstack, cpu->regs->esp); } else { /* We're staying on the same Guest (kernel) stack. */ - virtstack = lg->regs->esp; - ss = lg->regs->ss; + virtstack = cpu->regs->esp; + ss = cpu->regs->ss; - origstack = gstack = guest_pa(lg, virtstack); + origstack = gstack = guest_pa(cpu, virtstack); } /* Remember that we never let the Guest actually disable interrupts, so * the "Interrupt Flag" bit is always set. We copy that bit from the * Guest's "irq_enabled" field into the eflags word: we saw the Guest * copy it back in "lguest_iret". */ - eflags = lg->regs->eflags; - if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 + eflags = cpu->regs->eflags; + if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 && !(irq_enable & X86_EFLAGS_IF)) eflags &= ~X86_EFLAGS_IF; /* An interrupt is expected to push three things on the stack: the old * "eflags" word, the old code segment, and the old instruction * pointer. */ - push_guest_stack(lg, &gstack, eflags); - push_guest_stack(lg, &gstack, lg->regs->cs); - push_guest_stack(lg, &gstack, lg->regs->eip); + push_guest_stack(cpu, &gstack, eflags); + push_guest_stack(cpu, &gstack, cpu->regs->cs); + push_guest_stack(cpu, &gstack, cpu->regs->eip); /* For the six traps which supply an error code, we push that, too. */ if (has_err) - push_guest_stack(lg, &gstack, lg->regs->errcode); + push_guest_stack(cpu, &gstack, cpu->regs->errcode); /* Now we've pushed all the old state, we change the stack, the code * segment and the address to execute. */ - lg->regs->ss = ss; - lg->regs->esp = virtstack + (gstack - origstack); - lg->regs->cs = (__KERNEL_CS|GUEST_PL); - lg->regs->eip = idt_address(lo, hi); + cpu->regs->ss = ss; + cpu->regs->esp = virtstack + (gstack - origstack); + cpu->regs->cs = (__KERNEL_CS|GUEST_PL); + cpu->regs->eip = idt_address(lo, hi); /* There are two kinds of interrupt handlers: 0xE is an "interrupt * gate" which expects interrupts to be disabled on entry. */ if (idt_type(lo, hi) == 0xE) - if (put_user(0, &lg->lguest_data->irq_enabled)) - kill_guest(lg, "Disabling interrupts"); + if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) + kill_guest(cpu, "Disabling interrupts"); } /*H:205 @@ -129,23 +129,22 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) * * maybe_do_interrupt() gets called before every entry to the Guest, to see if * we should divert the Guest to running an interrupt handler. */ -void maybe_do_interrupt(struct lguest *lg) +void maybe_do_interrupt(struct lg_cpu *cpu) { unsigned int irq; DECLARE_BITMAP(blk, LGUEST_IRQS); struct desc_struct *idt; /* If the Guest hasn't even initialized yet, we can do nothing. */ - if (!lg->lguest_data) + if (!cpu->lg->lguest_data) return; /* Take our "irqs_pending" array and remove any interrupts the Guest * wants blocked: the result ends up in "blk". */ - if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, + if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, sizeof(blk))) return; - - bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); + bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); /* Find the first interrupt. */ irq = find_first_bit(blk, LGUEST_IRQS); @@ -155,19 +154,20 @@ void maybe_do_interrupt(struct lguest *lg) /* They may be in the middle of an iret, where they asked us never to * deliver interrupts. */ - if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) + if (cpu->regs->eip >= cpu->lg->noirq_start && + (cpu->regs->eip < cpu->lg->noirq_end)) return; /* If they're halted, interrupts restart them. */ - if (lg->halted) { + if (cpu->halted) { /* Re-enable interrupts. */ - if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) - kill_guest(lg, "Re-enabling interrupts"); - lg->halted = 0; + if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled)) + kill_guest(cpu, "Re-enabling interrupts"); + cpu->halted = 0; } else { /* Otherwise we check if they have interrupts disabled. */ u32 irq_enabled; - if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) + if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled)) irq_enabled = 0; if (!irq_enabled) return; @@ -176,15 +176,15 @@ void maybe_do_interrupt(struct lguest *lg) /* Look at the IDT entry the Guest gave us for this interrupt. The * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip * over them. */ - idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; + idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; /* If they don't have a handler (yet?), we just ignore it */ if (idt_present(idt->a, idt->b)) { /* OK, mark it no longer pending and deliver it. */ - clear_bit(irq, lg->irqs_pending); + clear_bit(irq, cpu->irqs_pending); /* set_guest_interrupt() takes the interrupt descriptor and a * flag to say whether this interrupt pushes an error code onto * the stack as well: virtual interrupts never do. */ - set_guest_interrupt(lg, idt->a, idt->b, 0); + set_guest_interrupt(cpu, idt->a, idt->b, 0); } /* Every time we deliver an interrupt, we update the timestamp in the @@ -192,7 +192,7 @@ void maybe_do_interrupt(struct lguest *lg) * did this more often, but it can actually be quite slow: doing it * here is a compromise which means at least it gets updated every * timer interrupt. */ - write_timestamp(lg); + write_timestamp(cpu); } /*:*/ @@ -236,28 +236,28 @@ void free_interrupts(void) clear_bit(syscall_vector, used_vectors); } -/*H:220 Now we've got the routines to deliver interrupts, delivering traps - * like page fault is easy. The only trick is that Intel decided that some - * traps should have error codes: */ +/*H:220 Now we've got the routines to deliver interrupts, delivering traps like + * page fault is easy. The only trick is that Intel decided that some traps + * should have error codes: */ static int has_err(unsigned int trap) { return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); } /* deliver_trap() returns true if it could deliver the trap. */ -int deliver_trap(struct lguest *lg, unsigned int num) +int deliver_trap(struct lg_cpu *cpu, unsigned int num) { /* Trap numbers are always 8 bit, but we set an impossible trap number * for traps inside the Switcher, so check that here. */ - if (num >= ARRAY_SIZE(lg->arch.idt)) + if (num >= ARRAY_SIZE(cpu->arch.idt)) return 0; /* Early on the Guest hasn't set the IDT entries (or maybe it put a * bogus one in): if we fail here, the Guest will be killed. */ - if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b)) + if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) return 0; - set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, - has_err(num)); + set_guest_interrupt(cpu, cpu->arch.idt[num].a, + cpu->arch.idt[num].b, has_err(num)); return 1; } @@ -309,18 +309,18 @@ static int direct_trap(unsigned int num) * the Guest. * * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ -void pin_stack_pages(struct lguest *lg) +void pin_stack_pages(struct lg_cpu *cpu) { unsigned int i; /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or * two pages of stack space. */ - for (i = 0; i < lg->stack_pages; i++) + for (i = 0; i < cpu->lg->stack_pages; i++) /* The stack grows *upwards*, so the address we're given is the * start of the page after the kernel stack. Subtract one to * get back onto the first stack page, and keep subtracting to * get to the rest of the stack pages. */ - pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE); + pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); } /* Direct traps also mean that we need to know whenever the Guest wants to use @@ -331,21 +331,21 @@ void pin_stack_pages(struct lguest *lg) * * In Linux each process has its own kernel stack, so this happens a lot: we * change stacks on each context switch. */ -void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) +void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) { /* You are not allowed have a stack segment with privilege level 0: bad * Guest! */ if ((seg & 0x3) != GUEST_PL) - kill_guest(lg, "bad stack segment %i", seg); + kill_guest(cpu, "bad stack segment %i", seg); /* We only expect one or two stack pages. */ if (pages > 2) - kill_guest(lg, "bad stack pages %u", pages); + kill_guest(cpu, "bad stack pages %u", pages); /* Save where the stack is, and how many pages */ - lg->ss1 = seg; - lg->esp1 = esp; - lg->stack_pages = pages; + cpu->ss1 = seg; + cpu->esp1 = esp; + cpu->lg->stack_pages = pages; /* Make sure the new stack pages are mapped */ - pin_stack_pages(lg); + pin_stack_pages(cpu); } /* All this reference to mapping stacks leads us neatly into the other complex @@ -353,7 +353,7 @@ void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) /*H:235 This is the routine which actually checks the Guest's IDT entry and * transfers it into the entry in "struct lguest": */ -static void set_trap(struct lguest *lg, struct desc_struct *trap, +static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, unsigned int num, u32 lo, u32 hi) { u8 type = idt_type(lo, hi); @@ -366,7 +366,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap, /* We only support interrupt and trap gates. */ if (type != 0xE && type != 0xF) - kill_guest(lg, "bad IDT type %i", type); + kill_guest(cpu, "bad IDT type %i", type); /* We only copy the handler address, present bit, privilege level and * type. The privilege level controls where the trap can be triggered @@ -383,7 +383,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap, * * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ -void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) +void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) { /* Guest never handles: NMI, doublefault, spurious interrupt or * hypercall. We ignore when it tries to set them. */ @@ -392,13 +392,13 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) /* Mark the IDT as changed: next time the Guest runs we'll know we have * to copy this again. */ - lg->changed |= CHANGED_IDT; + cpu->changed |= CHANGED_IDT; /* Check that the Guest doesn't try to step outside the bounds. */ - if (num >= ARRAY_SIZE(lg->arch.idt)) - kill_guest(lg, "Setting idt entry %u", num); + if (num >= ARRAY_SIZE(cpu->arch.idt)) + kill_guest(cpu, "Setting idt entry %u", num); else - set_trap(lg, &lg->arch.idt[num], num, lo, hi); + set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); } /* The default entry for each interrupt points into the Switcher routines which @@ -406,7 +406,8 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) * deliver_trap() to bounce it back into the Guest. */ static void default_idt_entry(struct desc_struct *idt, int trap, - const unsigned long handler) + const unsigned long handler, + const struct desc_struct *base) { /* A present interrupt gate. */ u32 flags = 0x8e00; @@ -415,6 +416,10 @@ static void default_idt_entry(struct desc_struct *idt, * the Guest to use the "int" instruction to trigger it. */ if (trap == LGUEST_TRAP_ENTRY) flags |= (GUEST_PL << 13); + else if (base) + /* Copy priv. level from what Guest asked for. This allows + * debug (int 3) traps from Guest userspace, for example. */ + flags |= (base->b & 0x6000); /* Now pack it into the IDT entry in its weird format. */ idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); @@ -428,20 +433,22 @@ void setup_default_idt_entries(struct lguest_ro_state *state, unsigned int i; for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) - default_idt_entry(&state->guest_idt[i], i, def[i]); + default_idt_entry(&state->guest_idt[i], i, def[i], NULL); } /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead * we copy them into the IDT which we've set up for Guests on this CPU, just * before we run the Guest. This routine does that copy. */ -void copy_traps(const struct lguest *lg, struct desc_struct *idt, +void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, const unsigned long *def) { unsigned int i; /* We can simply copy the direct traps, otherwise we use the default * ones in the Switcher: they will return to the Host. */ - for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) { + for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { + const struct desc_struct *gidt = &cpu->arch.idt[i]; + /* If no Guest can ever override this trap, leave it alone. */ if (!direct_trap(i)) continue; @@ -449,12 +456,15 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt, /* Only trap gates (type 15) can go direct to the Guest. * Interrupt gates (type 14) disable interrupts as they are * entered, which we never let the Guest do. Not present - * entries (type 0x0) also can't go direct, of course. */ - if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF) - idt[i] = lg->arch.idt[i]; + * entries (type 0x0) also can't go direct, of course. + * + * If it can't go direct, we still need to copy the priv. level: + * they might want to give userspace access to a software + * interrupt. */ + if (idt_type(gidt->a, gidt->b) == 0xF) + idt[i] = *gidt; else - /* Reset it to the default. */ - default_idt_entry(&idt[i], i, def[i]); + default_idt_entry(&idt[i], i, def[i], gidt); } } @@ -470,13 +480,13 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt, * infrastructure to set a callback at that time. * * 0 means "turn off the clock". */ -void guest_set_clockevent(struct lguest *lg, unsigned long delta) +void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) { ktime_t expires; if (unlikely(delta == 0)) { /* Clock event device is shutting down. */ - hrtimer_cancel(&lg->hrt); + hrtimer_cancel(&cpu->hrt); return; } @@ -484,25 +494,25 @@ void guest_set_clockevent(struct lguest *lg, unsigned long delta) * all the time between now and the timer interrupt it asked for. This * is almost always the right thing to do. */ expires = ktime_add_ns(ktime_get_real(), delta); - hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS); + hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); } /* This is the function called when the Guest's timer expires. */ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) { - struct lguest *lg = container_of(timer, struct lguest, hrt); + struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt); /* Remember the first interrupt is the timer interrupt. */ - set_bit(0, lg->irqs_pending); + set_bit(0, cpu->irqs_pending); /* If the Guest is actually stopped, we need to wake it up. */ - if (lg->halted) - wake_up_process(lg->tsk); + if (cpu->halted) + wake_up_process(cpu->tsk); return HRTIMER_NORESTART; } /* This sets up the timer for this Guest. */ -void init_clockdev(struct lguest *lg) +void init_clockdev(struct lg_cpu *cpu) { - hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); - lg->hrt.function = clockdev_fn; + hrtimer_init(&cpu->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); + cpu->hrt.function = clockdev_fn; }