xen: Add support for preemption

author Jeremy Fitzhardinge <jeremy@xensource.com>

Wed, 18 Jul 2007 01:37:06 +0000 (18:37 -0700)

committer Jeremy Fitzhardinge <jeremy@goop.org>

Wed, 18 Jul 2007 15:47:44 +0000 (08:47 -0700)
author Jeremy Fitzhardinge <jeremy@xensource.com>
Wed, 18 Jul 2007 01:37:06 +0000 (18:37 -0700)
committer Jeremy Fitzhardinge <jeremy@goop.org>
Wed, 18 Jul 2007 15:47:44 +0000 (08:47 -0700)
diff --git a/arch/i386/xen/Kconfig b/arch/i386/xen/Kconfig

index b7697ff223616fa608e80a1eac7261c3abce6f71..9df99e1885a43ccd90eba7834df1dab259046083 100644 (file)
--- a/arch/i386/xen/Kconfig
+++ b/arch/i386/xen/Kconfig
@@ -4,7 +4,7 @@
  
  config XEN
         bool "Enable support for Xen hypervisor"
  
  config XEN
         bool "Enable support for Xen hypervisor"
-       depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || NEED_MULTIPLE_NODES)
+       depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
         help
           This is the Linux Xen port.  Enabling this will allow the
           kernel to boot in a paravirtualized environment under the
         help
           This is the Linux Xen port.  Enabling this will allow the
           kernel to boot in a paravirtualized environment under the
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c

index de62d66e08934172658f8aa3ebbe4fc126e7d34d..a1124b7f1d14e02e74567f4c12003b017db33244 100644 (file)
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -15,6 +15,7 @@
  #include <linux/init.h>
  #include <linux/smp.h>
  #include <linux/preempt.h>
  #include <linux/init.h>
  #include <linux/smp.h>
  #include <linux/preempt.h>
+#include <linux/hardirq.h>
  #include <linux/percpu.h>
  #include <linux/delay.h>
  #include <linux/start_kernel.h>
  #include <linux/percpu.h>
  #include <linux/delay.h>
  #include <linux/start_kernel.h>
@@ -108,11 +109,10 @@ static unsigned long xen_save_fl(void)
         struct vcpu_info *vcpu;
         unsigned long flags;
  
         struct vcpu_info *vcpu;
         unsigned long flags;
  
-       preempt_disable();
         vcpu = x86_read_percpu(xen_vcpu);
         vcpu = x86_read_percpu(xen_vcpu);
+
         /* flag has opposite sense of mask */
         flags = !vcpu->evtchn_upcall_mask;
         /* flag has opposite sense of mask */
         flags = !vcpu->evtchn_upcall_mask;
-       preempt_enable();
  
         /* convert to IF type flag
            -0 -> 0x00000000
  
         /* convert to IF type flag
            -0 -> 0x00000000
@@ -125,32 +125,35 @@ static void xen_restore_fl(unsigned long flags)
  {
         struct vcpu_info *vcpu;
  
  {
         struct vcpu_info *vcpu;
  
-       preempt_disable();
-
         /* convert from IF type flag */
         flags = !(flags & X86_EFLAGS_IF);
         /* convert from IF type flag */
         flags = !(flags & X86_EFLAGS_IF);
+
+       /* There's a one instruction preempt window here.  We need to
+          make sure we're don't switch CPUs between getting the vcpu
+          pointer and updating the mask. */
+       preempt_disable();
         vcpu = x86_read_percpu(xen_vcpu);
         vcpu->evtchn_upcall_mask = flags;
         vcpu = x86_read_percpu(xen_vcpu);
         vcpu->evtchn_upcall_mask = flags;
+       preempt_enable_no_resched();
  
  
-       if (flags == 0) {
-               /* Unmask then check (avoid races).  We're only protecting
-                  against updates by this CPU, so there's no need for
-                  anything stronger. */
-               barrier();
+       /* Doesn't matter if we get preempted here, because any
+          pending event will get dealt with anyway. */
  
  
+       if (flags == 0) {
+               preempt_check_resched();
+               barrier(); /* unmask then check (avoid races) */
                 if (unlikely(vcpu->evtchn_upcall_pending))
                         force_evtchn_callback();
                 if (unlikely(vcpu->evtchn_upcall_pending))
                         force_evtchn_callback();
-               preempt_enable();
-       } else
-               preempt_enable_no_resched();
+       }
  }
  
  static void xen_irq_disable(void)
  {
  }
  
  static void xen_irq_disable(void)
  {
-       struct vcpu_info *vcpu;
+       /* There's a one instruction preempt window here.  We need to
+          make sure we're don't switch CPUs between getting the vcpu
+          pointer and updating the mask. */
         preempt_disable();
         preempt_disable();
-       vcpu = x86_read_percpu(xen_vcpu);
-       vcpu->evtchn_upcall_mask = 1;
+       x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
         preempt_enable_no_resched();
  }
  
         preempt_enable_no_resched();
  }
  
@@ -158,18 +161,20 @@ static void xen_irq_enable(void)
  {
         struct vcpu_info *vcpu;
  
  {
         struct vcpu_info *vcpu;
  
+       /* There's a one instruction preempt window here.  We need to
+          make sure we're don't switch CPUs between getting the vcpu
+          pointer and updating the mask. */
         preempt_disable();
         vcpu = x86_read_percpu(xen_vcpu);
         vcpu->evtchn_upcall_mask = 0;
         preempt_disable();
         vcpu = x86_read_percpu(xen_vcpu);
         vcpu->evtchn_upcall_mask = 0;
+       preempt_enable_no_resched();
  
  
-       /* Unmask then check (avoid races).  We're only protecting
-          against updates by this CPU, so there's no need for
-          anything stronger. */
-       barrier();
+       /* Doesn't matter if we get preempted here, because any
+          pending event will get dealt with anyway. */
  
  
+       barrier(); /* unmask then check (avoid races) */
         if (unlikely(vcpu->evtchn_upcall_pending))
                 force_evtchn_callback();
         if (unlikely(vcpu->evtchn_upcall_pending))
                 force_evtchn_callback();
-       preempt_enable();
  }
  
  static void xen_safe_halt(void)
  }
  
  static void xen_safe_halt(void)
@@ -189,6 +194,8 @@ static void xen_halt(void)
  
  static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
  {
  
  static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
  {
+       BUG_ON(preemptible());
+
         switch (mode) {
         case PARAVIRT_LAZY_NONE:
                 BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
         switch (mode) {
         case PARAVIRT_LAZY_NONE:
                 BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
@@ -293,9 +300,13 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
         xmaddr_t mach_lp = virt_to_machine(lp);
         u64 entry = (u64)high << 32 | low;
  
         xmaddr_t mach_lp = virt_to_machine(lp);
         u64 entry = (u64)high << 32 | low;
  
+       preempt_disable();
+
         xen_mc_flush();
         if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
                 BUG();
         xen_mc_flush();
         if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
                 BUG();
+
+       preempt_enable();
  }
  
  static int cvt_gate_to_trap(int vector, u32 low, u32 high,
  }
  
  static int cvt_gate_to_trap(int vector, u32 low, u32 high,
@@ -328,11 +339,13 @@ static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
  static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
                                 u32 low, u32 high)
  {
  static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
                                 u32 low, u32 high)
  {
-
-       int cpu = smp_processor_id();
         unsigned long p = (unsigned long)&dt[entrynum];
         unsigned long p = (unsigned long)&dt[entrynum];
-       unsigned long start = per_cpu(idt_desc, cpu).address;
-       unsigned long end = start + per_cpu(idt_desc, cpu).size + 1;
+       unsigned long start, end;
+
+       preempt_disable();
+
+       start = __get_cpu_var(idt_desc).address;
+       end = start + __get_cpu_var(idt_desc).size + 1;
  
         xen_mc_flush();
  
  
         xen_mc_flush();
  
@@ -347,6 +360,8 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
                         if (HYPERVISOR_set_trap_table(info))
                                 BUG();
         }
                         if (HYPERVISOR_set_trap_table(info))
                                 BUG();
         }
+
+       preempt_enable();
  }
  
  static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
  }
  
  static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
@@ -368,11 +383,9 @@ static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
  
  void xen_copy_trap_info(struct trap_info *traps)
  {
  
  void xen_copy_trap_info(struct trap_info *traps)
  {
-       const struct Xgt_desc_struct *desc = &get_cpu_var(idt_desc);
+       const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
  
         xen_convert_trap_info(desc, traps);
  
         xen_convert_trap_info(desc, traps);
-
-       put_cpu_var(idt_desc);
  }
  
  /* Load a new IDT into Xen.  In principle this can be per-CPU, so we
  }
  
  /* Load a new IDT into Xen.  In principle this can be per-CPU, so we
@@ -382,12 +395,11 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc)
  {
         static DEFINE_SPINLOCK(lock);
         static struct trap_info traps[257];
  {
         static DEFINE_SPINLOCK(lock);
         static struct trap_info traps[257];
-       int cpu = smp_processor_id();
-
-       per_cpu(idt_desc, cpu) = *desc;
  
         spin_lock(&lock);
  
  
         spin_lock(&lock);
  
+       __get_cpu_var(idt_desc) = *desc;
+
         xen_convert_trap_info(desc, traps);
  
         xen_mc_flush();
         xen_convert_trap_info(desc, traps);
  
         xen_mc_flush();
@@ -402,6 +414,8 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc)
  static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
                                 u32 low, u32 high)
  {
  static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
                                 u32 low, u32 high)
  {
+       preempt_disable();
+
         switch ((high >> 8) & 0xff) {
         case DESCTYPE_LDT:
         case DESCTYPE_TSS:
         switch ((high >> 8) & 0xff) {
         case DESCTYPE_LDT:
         case DESCTYPE_TSS:
@@ -418,10 +432,12 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
         }
  
         }
         }
  
         }
+
+       preempt_enable();
  }
  
  static void xen_load_esp0(struct tss_struct *tss,
  }
  
  static void xen_load_esp0(struct tss_struct *tss,
-                                  struct thread_struct *thread)
+                         struct thread_struct *thread)
  {
         struct multicall_space mcs = xen_mc_entry(0);
         MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
  {
         struct multicall_space mcs = xen_mc_entry(0);
         MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
@@ -525,6 +541,8 @@ static unsigned long xen_read_cr3(void)
  
  static void xen_write_cr3(unsigned long cr3)
  {
  
  static void xen_write_cr3(unsigned long cr3)
  {
+       BUG_ON(preemptible());
+
         if (cr3 == x86_read_percpu(xen_cr3)) {
                 /* just a simple tlb flush */
                 xen_flush_tlb();
         if (cr3 == x86_read_percpu(xen_cr3)) {
                 /* just a simple tlb flush */
                 xen_flush_tlb();
diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c

index bc49ef846203c0495b3d477ee0c56f6e8c463163..f431cf14e6442e0b66baa8faf45259896a621b24 100644 (file)
--- a/arch/i386/xen/mmu.c
+++ b/arch/i386/xen/mmu.c
@@ -38,6 +38,7 @@
   *
   * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
   */
   *
   * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
   */
+#include <linux/sched.h>
  #include <linux/highmem.h>
  #include <linux/bug.h>
  #include <linux/sched.h>
  #include <linux/highmem.h>
  #include <linux/bug.h>
  #include <linux/sched.h>
@@ -531,5 +532,7 @@ void xen_exit_mmap(struct mm_struct *mm)
         drop_mm_ref(mm);
         put_cpu();
  
         drop_mm_ref(mm);
         put_cpu();
  
+       spin_lock(&mm->page_table_lock);
         xen_pgd_unpin(mm->pgd);
         xen_pgd_unpin(mm->pgd);
+       spin_unlock(&mm->page_table_lock);
  }
  }
diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c

index 869f9833f08fa888055ea41b5aabffe37a1899e4..d4015a9ed46c04bf7f85b224491aca85be0d4cdc 100644 (file)
--- a/arch/i386/xen/multicalls.c
+++ b/arch/i386/xen/multicalls.c
@@ -20,6 +20,7 @@
   * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
   */
  #include <linux/percpu.h>
   * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
   */
  #include <linux/percpu.h>
+#include <linux/hardirq.h>
  
  #include <asm/xen/hypercall.h>
  
  
  #include <asm/xen/hypercall.h>
  
@@ -39,10 +40,12 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
  
  void xen_mc_flush(void)
  {
  
  void xen_mc_flush(void)
  {
-       struct mc_buffer *b = &get_cpu_var(mc_buffer);
+       struct mc_buffer *b = &__get_cpu_var(mc_buffer);
         int ret = 0;
         unsigned long flags;
  
         int ret = 0;
         unsigned long flags;
  
+       BUG_ON(preemptible());
+
         /* Disable interrupts in case someone comes in and queues
            something in the middle */
         local_irq_save(flags);
         /* Disable interrupts in case someone comes in and queues
            something in the middle */
         local_irq_save(flags);
@@ -60,7 +63,6 @@ void xen_mc_flush(void)
         } else
                 BUG_ON(b->argidx != 0);
  
         } else
                 BUG_ON(b->argidx != 0);
  
-       put_cpu_var(mc_buffer);
         local_irq_restore(flags);
  
         BUG_ON(ret);
         local_irq_restore(flags);
  
         BUG_ON(ret);
@@ -68,10 +70,11 @@ void xen_mc_flush(void)
  
  struct multicall_space __xen_mc_entry(size_t args)
  {
  
  struct multicall_space __xen_mc_entry(size_t args)
  {
-       struct mc_buffer *b = &get_cpu_var(mc_buffer);
+       struct mc_buffer *b = &__get_cpu_var(mc_buffer);
         struct multicall_space ret;
         unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
  
         struct multicall_space ret;
         unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
  
+       BUG_ON(preemptible());
         BUG_ON(argspace > MC_ARGS);
  
         if (b->mcidx == MC_BATCH ||
         BUG_ON(argspace > MC_ARGS);
  
         if (b->mcidx == MC_BATCH ||
@@ -83,7 +86,5 @@ struct multicall_space __xen_mc_entry(size_t args)
         ret.args = &b->args[b->argidx];
         b->argidx += argspace;
  
         ret.args = &b->args[b->argidx];
         b->argidx += argspace;
  
-       put_cpu_var(mc_buffer);
-
         return ret;
  }
         return ret;
  }
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c

index aeb04cf5dbf1cd4ed7139d3aea2a5a9074f06800..51fdabf1fd4dd813645737cbab05c6894227d0ae 100644 (file)
--- a/arch/i386/xen/time.c
+++ b/arch/i386/xen/time.c
@@ -88,7 +88,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
         u64 state_time;
         struct vcpu_runstate_info *state;
  
         u64 state_time;
         struct vcpu_runstate_info *state;
  
-       preempt_disable();
+       BUG_ON(preemptible());
  
         state = &__get_cpu_var(runstate);
  
  
         state = &__get_cpu_var(runstate);
  
@@ -103,8 +103,6 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
                 *res = *state;
                 barrier();
         } while (get64(&state->state_entry_time) != state_time);
                 *res = *state;
                 barrier();
         } while (get64(&state->state_entry_time) != state_time);
-
-       preempt_enable();
  }
  
  static void setup_runstate_info(int cpu)
  }
  
  static void setup_runstate_info(int cpu)
@@ -179,9 +177,19 @@ static void do_stolen_accounting(void)
  unsigned long long xen_sched_clock(void)
  {
         struct vcpu_runstate_info state;
  unsigned long long xen_sched_clock(void)
  {
         struct vcpu_runstate_info state;
-       cycle_t now = xen_clocksource_read();
+       cycle_t now;
+       u64 ret;
         s64 offset;
  
         s64 offset;
  
+       /*
+        * Ideally sched_clock should be called on a per-cpu basis
+        * anyway, so preempt should already be disabled, but that's
+        * not current practice at the moment.
+        */
+       preempt_disable();
+
+       now = xen_clocksource_read();
+
         get_runstate_snapshot(&state);
  
         WARN_ON(state.state != RUNSTATE_running);
         get_runstate_snapshot(&state);
  
         WARN_ON(state.state != RUNSTATE_running);
@@ -190,9 +198,13 @@ unsigned long long xen_sched_clock(void)
         if (offset < 0)
                 offset = 0;
  
         if (offset < 0)
                 offset = 0;
  
-       return state.time[RUNSTATE_blocked] +
+       ret = state.time[RUNSTATE_blocked] +
                 state.time[RUNSTATE_running] +
                 offset;
                 state.time[RUNSTATE_running] +
                 offset;
+
+       preempt_enable();
+
+       return ret;
  }
  
  
  }
author	Jeremy Fitzhardinge <jeremy@xensource.com>
	Wed, 18 Jul 2007 01:37:06 +0000 (18:37 -0700)
committer	Jeremy Fitzhardinge <jeremy@goop.org>
	Wed, 18 Jul 2007 15:47:44 +0000 (08:47 -0700)
arch/i386/xen/Kconfig		patch \| blob \| history
arch/i386/xen/enlighten.c		patch \| blob \| history
arch/i386/xen/mmu.c		patch \| blob \| history
arch/i386/xen/multicalls.c		patch \| blob \| history
arch/i386/xen/time.c		patch \| blob \| history