X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Flguest%2Flguest.c;h=6e135ac0834f9c867ae2e475fee47ae2bb05405c;hb=2b56fec64faae9fc5c3e61bbfb851b7985292cd5;hp=6dfe568523a2d0cf0e319fd83bd818886f7fd1cd;hpb=9f5577d8158d8190174d95cbf21713251cc8a044;p=linux-2.6 diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 6dfe568523..6e135ac083 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c @@ -323,9 +323,12 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, * __thread variables). So we have a hypercall specifically for this case. */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { + /* There's one problem which normal hardware doesn't have: the Host + * can't handle us removing entries we're currently using. So we clear + * the GS register here: if it's needed it'll be reloaded anyway. */ + loadsegment(gs, 0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); } -/*:*/ /*G:038 That's enough excitement for now, back to ploughing through each of * the paravirt_ops (we're about 1/3 of the way through). @@ -643,21 +646,42 @@ static void __init lguest_init_IRQ(void) * Time. * * It would be far better for everyone if the Guest had its own clock, but - * until then it must ask the Host for the time. + * until then the Host gives us the time on every interrupt. */ static unsigned long lguest_get_wallclock(void) { - return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); + return lguest_data.time.tv_sec; } -/* If the Host tells us we can trust the TSC, we use that, otherwise we simply - * use the imprecise but reliable "jiffies" counter. */ static cycle_t lguest_clock_read(void) { + unsigned long sec, nsec; + + /* If the Host tells the TSC speed, we can trust that. */ if (lguest_data.tsc_khz) return native_read_tsc(); - else - return jiffies; + + /* If we can't use the TSC, we read the time value written by the Host. + * Since it's in two parts (seconds and nanoseconds), we risk reading + * it just as it's changing from 99 & 0.999999999 to 100 and 0, and + * getting 99 and 0. As Linux tends to come apart under the stress of + * time travel, we must be careful: */ + do { + /* First we read the seconds part. */ + sec = lguest_data.time.tv_sec; + /* This read memory barrier tells the compiler and the CPU that + * this can't be reordered: we have to complete the above + * before going on. */ + rmb(); + /* Now we read the nanoseconds part. */ + nsec = lguest_data.time.tv_nsec; + /* Make sure we've done that. */ + rmb(); + /* Now if the seconds part has changed, try again. */ + } while (unlikely(lguest_data.time.tv_sec != sec)); + + /* Our non-TSC clock is in real nanoseconds. */ + return sec*1000000000ULL + nsec; } /* This is what we tell the kernel is our clocksource. */ @@ -665,8 +689,12 @@ static struct clocksource lguest_clock = { .name = "lguest", .rating = 400, .read = lguest_clock_read, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << 22, + .shift = 22, }; +/* The "scheduler clock" is just our real clock, adjusted to start at zero */ static unsigned long long lguest_sched_clock(void) { return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); @@ -742,24 +770,20 @@ static void lguest_time_init(void) set_irq_handler(0, lguest_time_irq); /* Our clock structure look like arch/i386/kernel/tsc.c if we can use - * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way, - * the "rating" is initialized so high that it's always chosen over any - * other clocksource. */ + * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either + * way, the "rating" is initialized so high that it's always chosen + * over any other clocksource. */ if (lguest_data.tsc_khz) { - lguest_clock.shift = 22; lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, lguest_clock.shift); - lguest_clock.mask = CLOCKSOURCE_MASK(64); lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; - } else { - /* To understand this, start at kernel/time/jiffies.c... */ - lguest_clock.shift = 8; - lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; - lguest_clock.mask = CLOCKSOURCE_MASK(32); } clock_base = lguest_clock_read(); clocksource_register(&lguest_clock); + /* Now we've set up our clock, we can use it as the scheduler clock */ + paravirt_ops.sched_clock = lguest_sched_clock; + /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ lguest_clockevent.cpumask = cpumask_of_cpu(0); @@ -912,23 +936,24 @@ static const struct lguest_insns /* Now our patch routine is fairly simple (based on the native one in * paravirt.c). If we have a replacement, we copy it in and return how much of * the available space we used. */ -static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) +static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, + unsigned long addr, unsigned len) { unsigned int insn_len; /* Don't do anything special if we don't have a replacement */ if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) - return paravirt_patch_default(type, clobber, insns, len); + return paravirt_patch_default(type, clobber, ibuf, addr, len); insn_len = lguest_insns[type].end - lguest_insns[type].start; /* Similarly if we can't fit replacement (shouldn't happen, but let's * be thorough). */ if (len < insn_len) - return paravirt_patch_default(type, clobber, insns, len); + return paravirt_patch_default(type, clobber, ibuf, addr, len); /* Copy in our instructions. */ - memcpy(insns, lguest_insns[type].start, insn_len); + memcpy(ibuf, lguest_insns[type].start, insn_len); return insn_len; } @@ -996,7 +1021,6 @@ __init void lguest_init(void *boot) paravirt_ops.time_init = lguest_time_init; paravirt_ops.set_lazy_mode = lguest_lazy_mode; paravirt_ops.wbinvd = lguest_wbinvd; - paravirt_ops.sched_clock = lguest_sched_clock; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ @@ -1019,6 +1043,11 @@ __init void lguest_init(void *boot) * the normal data segment to get through booting. */ asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); + /* Clear the part of the kernel data which is expected to be zero. + * Normally it will be anyway, but if we're loading from a bzImage with + * CONFIG_RELOCATALE=y, the relocations will be sitting here. */ + memset(__bss_start, 0, __bss_stop - __bss_start); + /* The Host uses the top of the Guest's virtual address space for the * Host<->Guest Switcher, and it tells us how much it needs in * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */