X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Flguest%2Flguest.c;h=6e135ac0834f9c867ae2e475fee47ae2bb05405c;hb=2b56fec64faae9fc5c3e61bbfb851b7985292cd5;hp=6dfe568523a2d0cf0e319fd83bd818886f7fd1cd;hpb=9f5577d8158d8190174d95cbf21713251cc8a044;p=linux-2.6

diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 6dfe568523..6e135ac083 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -323,9 +323,12 @@ static void lguest_write_gdt_entry(struct desc_struct *dt,
  * __thread variables).  So we have a hypercall specifically for this case. */
 static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
 {
+	/* There's one problem which normal hardware doesn't have: the Host
+	 * can't handle us removing entries we're currently using.  So we clear
+	 * the GS register here: if it's needed it'll be reloaded anyway. */
+	loadsegment(gs, 0);
 	lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
 }
-/*:*/
 
 /*G:038 That's enough excitement for now, back to ploughing through each of
  * the paravirt_ops (we're about 1/3 of the way through).
@@ -643,21 +646,42 @@ static void __init lguest_init_IRQ(void)
  * Time.
  *
  * It would be far better for everyone if the Guest had its own clock, but
- * until then it must ask the Host for the time.
+ * until then the Host gives us the time on every interrupt.
  */
 static unsigned long lguest_get_wallclock(void)
 {
-	return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0);
+	return lguest_data.time.tv_sec;
 }
 
-/* If the Host tells us we can trust the TSC, we use that, otherwise we simply
- * use the imprecise but reliable "jiffies" counter. */
 static cycle_t lguest_clock_read(void)
 {
+	unsigned long sec, nsec;
+
+	/* If the Host tells the TSC speed, we can trust that. */
 	if (lguest_data.tsc_khz)
 		return native_read_tsc();
-	else
-		return jiffies;
+
+	/* If we can't use the TSC, we read the time value written by the Host.
+	 * Since it's in two parts (seconds and nanoseconds), we risk reading
+	 * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
+	 * getting 99 and 0.  As Linux tends to come apart under the stress of
+	 * time travel, we must be careful: */
+	do {
+		/* First we read the seconds part. */
+		sec = lguest_data.time.tv_sec;
+		/* This read memory barrier tells the compiler and the CPU that
+		 * this can't be reordered: we have to complete the above
+		 * before going on. */
+		rmb();
+		/* Now we read the nanoseconds part. */
+		nsec = lguest_data.time.tv_nsec;
+		/* Make sure we've done that. */
+		rmb();
+		/* Now if the seconds part has changed, try again. */
+	} while (unlikely(lguest_data.time.tv_sec != sec));
+
+	/* Our non-TSC clock is in real nanoseconds. */
+	return sec*1000000000ULL + nsec;
 }
 
 /* This is what we tell the kernel is our clocksource.  */
@@ -665,8 +689,12 @@ static struct clocksource lguest_clock = {
 	.name		= "lguest",
 	.rating		= 400,
 	.read		= lguest_clock_read,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.mult		= 1 << 22,
+	.shift		= 22,
 };
 
+/* The "scheduler clock" is just our real clock, adjusted to start at zero */
 static unsigned long long lguest_sched_clock(void)
 {
 	return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
@@ -742,24 +770,20 @@ static void lguest_time_init(void)
 	set_irq_handler(0, lguest_time_irq);
 
 	/* Our clock structure look like arch/i386/kernel/tsc.c if we can use
-	 * the TSC, otherwise it looks like kernel/time/jiffies.c.  Either way,
-	 * the "rating" is initialized so high that it's always chosen over any
-	 * other clocksource. */
+	 * the TSC, otherwise it's a dumb nanosecond-resolution clock.  Either
+	 * way, the "rating" is initialized so high that it's always chosen
+	 * over any other clocksource. */
 	if (lguest_data.tsc_khz) {
-		lguest_clock.shift = 22;
 		lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
 							 lguest_clock.shift);
-		lguest_clock.mask = CLOCKSOURCE_MASK(64);
 		lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS;
-	} else {
-		/* To understand this, start at kernel/time/jiffies.c... */
-		lguest_clock.shift = 8;
-		lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8;
-		lguest_clock.mask = CLOCKSOURCE_MASK(32);
 	}
 	clock_base = lguest_clock_read();
 	clocksource_register(&lguest_clock);
 
+	/* Now we've set up our clock, we can use it as the scheduler clock */
+	paravirt_ops.sched_clock = lguest_sched_clock;
+
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
 	lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -912,23 +936,24 @@ static const struct lguest_insns
 /* Now our patch routine is fairly simple (based on the native one in
  * paravirt.c).  If we have a replacement, we copy it in and return how much of
  * the available space we used. */
-static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len)
+static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
+			     unsigned long addr, unsigned len)
 {
 	unsigned int insn_len;
 
 	/* Don't do anything special if we don't have a replacement */
 	if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
-		return paravirt_patch_default(type, clobber, insns, len);
+		return paravirt_patch_default(type, clobber, ibuf, addr, len);
 
 	insn_len = lguest_insns[type].end - lguest_insns[type].start;
 
 	/* Similarly if we can't fit replacement (shouldn't happen, but let's
 	 * be thorough). */
 	if (len < insn_len)
-		return paravirt_patch_default(type, clobber, insns, len);
+		return paravirt_patch_default(type, clobber, ibuf, addr, len);
 
 	/* Copy in our instructions. */
-	memcpy(insns, lguest_insns[type].start, insn_len);
+	memcpy(ibuf, lguest_insns[type].start, insn_len);
 	return insn_len;
 }
 
@@ -996,7 +1021,6 @@ __init void lguest_init(void *boot)
 	paravirt_ops.time_init = lguest_time_init;
 	paravirt_ops.set_lazy_mode = lguest_lazy_mode;
 	paravirt_ops.wbinvd = lguest_wbinvd;
-	paravirt_ops.sched_clock = lguest_sched_clock;
 	/* Now is a good time to look at the implementations of these functions
 	 * before returning to the rest of lguest_init(). */
 
@@ -1019,6 +1043,11 @@ __init void lguest_init(void *boot)
 	 * the normal data segment to get through booting. */
 	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
 
+	/* Clear the part of the kernel data which is expected to be zero.
+	 * Normally it will be anyway, but if we're loading from a bzImage with
+	 * CONFIG_RELOCATALE=y, the relocations will be sitting here. */
+	memset(__bss_start, 0, __bss_stop - __bss_start);
+
 	/* The Host uses the top of the Guest's virtual address space for the
 	 * Host<->Guest Switcher, and it tells us how much it needs in
 	 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */