Add Xen support for preemption. This is mostly a cleanup of existing
preempt_enable/disable calls, or just comments to explain the current
usage.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
config XEN
bool "Enable support for Xen hypervisor"
config XEN
bool "Enable support for Xen hypervisor"
- depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || NEED_MULTIPLE_NODES)
+ depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
help
This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the
help
This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/preempt.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/preempt.h>
+#include <linux/hardirq.h>
#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/start_kernel.h>
#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/start_kernel.h>
struct vcpu_info *vcpu;
unsigned long flags;
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = x86_read_percpu(xen_vcpu);
vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
/* convert to IF type flag
-0 -> 0x00000000
{
struct vcpu_info *vcpu;
{
struct vcpu_info *vcpu;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
+
+ /* There's a one instruction preempt window here. We need to
+ make sure we're don't switch CPUs between getting the vcpu
+ pointer and updating the mask. */
+ preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
+ preempt_enable_no_resched();
- if (flags == 0) {
- /* Unmask then check (avoid races). We're only protecting
- against updates by this CPU, so there's no need for
- anything stronger. */
- barrier();
+ /* Doesn't matter if we get preempted here, because any
+ pending event will get dealt with anyway. */
+ if (flags == 0) {
+ preempt_check_resched();
+ barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
- preempt_enable();
- } else
- preempt_enable_no_resched();
}
static void xen_irq_disable(void)
{
}
static void xen_irq_disable(void)
{
- struct vcpu_info *vcpu;
+ /* There's a one instruction preempt window here. We need to
+ make sure we're don't switch CPUs between getting the vcpu
+ pointer and updating the mask. */
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = 1;
+ x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
preempt_enable_no_resched();
}
{
struct vcpu_info *vcpu;
{
struct vcpu_info *vcpu;
+ /* There's a one instruction preempt window here. We need to
+ make sure we're don't switch CPUs between getting the vcpu
+ pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
+ preempt_enable_no_resched();
- /* Unmask then check (avoid races). We're only protecting
- against updates by this CPU, so there's no need for
- anything stronger. */
- barrier();
+ /* Doesn't matter if we get preempted here, because any
+ pending event will get dealt with anyway. */
+ barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
static void xen_safe_halt(void)
}
static void xen_safe_halt(void)
static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
{
static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
{
+ BUG_ON(preemptible());
+
switch (mode) {
case PARAVIRT_LAZY_NONE:
BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
switch (mode) {
case PARAVIRT_LAZY_NONE:
BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
xmaddr_t mach_lp = virt_to_machine(lp);
u64 entry = (u64)high << 32 | low;
xmaddr_t mach_lp = virt_to_machine(lp);
u64 entry = (u64)high << 32 | low;
xen_mc_flush();
if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
BUG();
xen_mc_flush();
if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
BUG();
}
static int cvt_gate_to_trap(int vector, u32 low, u32 high,
}
static int cvt_gate_to_trap(int vector, u32 low, u32 high,
static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
u32 low, u32 high)
{
static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
u32 low, u32 high)
{
-
- int cpu = smp_processor_id();
unsigned long p = (unsigned long)&dt[entrynum];
unsigned long p = (unsigned long)&dt[entrynum];
- unsigned long start = per_cpu(idt_desc, cpu).address;
- unsigned long end = start + per_cpu(idt_desc, cpu).size + 1;
+ unsigned long start, end;
+
+ preempt_disable();
+
+ start = __get_cpu_var(idt_desc).address;
+ end = start + __get_cpu_var(idt_desc).size + 1;
if (HYPERVISOR_set_trap_table(info))
BUG();
}
if (HYPERVISOR_set_trap_table(info))
BUG();
}
}
static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
}
static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
void xen_copy_trap_info(struct trap_info *traps)
{
void xen_copy_trap_info(struct trap_info *traps)
{
- const struct Xgt_desc_struct *desc = &get_cpu_var(idt_desc);
+ const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
xen_convert_trap_info(desc, traps);
xen_convert_trap_info(desc, traps);
-
- put_cpu_var(idt_desc);
}
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
}
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
- int cpu = smp_processor_id();
-
- per_cpu(idt_desc, cpu) = *desc;
+ __get_cpu_var(idt_desc) = *desc;
+
xen_convert_trap_info(desc, traps);
xen_mc_flush();
xen_convert_trap_info(desc, traps);
xen_mc_flush();
static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
u32 low, u32 high)
{
static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
u32 low, u32 high)
{
switch ((high >> 8) & 0xff) {
case DESCTYPE_LDT:
case DESCTYPE_TSS:
switch ((high >> 8) & 0xff) {
case DESCTYPE_LDT:
case DESCTYPE_TSS:
}
static void xen_load_esp0(struct tss_struct *tss,
}
static void xen_load_esp0(struct tss_struct *tss,
- struct thread_struct *thread)
+ struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
{
struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
static void xen_write_cr3(unsigned long cr3)
{
static void xen_write_cr3(unsigned long cr3)
{
+ BUG_ON(preemptible());
+
if (cr3 == x86_read_percpu(xen_cr3)) {
/* just a simple tlb flush */
xen_flush_tlb();
if (cr3 == x86_read_percpu(xen_cr3)) {
/* just a simple tlb flush */
xen_flush_tlb();
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
+#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/bug.h>
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/bug.h>
#include <linux/sched.h>
drop_mm_ref(mm);
put_cpu();
drop_mm_ref(mm);
put_cpu();
+ spin_lock(&mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
#include <linux/percpu.h>
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
#include <linux/percpu.h>
+#include <linux/hardirq.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypercall.h>
void xen_mc_flush(void)
{
void xen_mc_flush(void)
{
- struct mc_buffer *b = &get_cpu_var(mc_buffer);
+ struct mc_buffer *b = &__get_cpu_var(mc_buffer);
int ret = 0;
unsigned long flags;
int ret = 0;
unsigned long flags;
+ BUG_ON(preemptible());
+
/* Disable interrupts in case someone comes in and queues
something in the middle */
local_irq_save(flags);
/* Disable interrupts in case someone comes in and queues
something in the middle */
local_irq_save(flags);
} else
BUG_ON(b->argidx != 0);
} else
BUG_ON(b->argidx != 0);
- put_cpu_var(mc_buffer);
local_irq_restore(flags);
BUG_ON(ret);
local_irq_restore(flags);
BUG_ON(ret);
struct multicall_space __xen_mc_entry(size_t args)
{
struct multicall_space __xen_mc_entry(size_t args)
{
- struct mc_buffer *b = &get_cpu_var(mc_buffer);
+ struct mc_buffer *b = &__get_cpu_var(mc_buffer);
struct multicall_space ret;
unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
struct multicall_space ret;
unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
BUG_ON(argspace > MC_ARGS);
if (b->mcidx == MC_BATCH ||
BUG_ON(argspace > MC_ARGS);
if (b->mcidx == MC_BATCH ||
ret.args = &b->args[b->argidx];
b->argidx += argspace;
ret.args = &b->args[b->argidx];
b->argidx += argspace;
- put_cpu_var(mc_buffer);
-
u64 state_time;
struct vcpu_runstate_info *state;
u64 state_time;
struct vcpu_runstate_info *state;
state = &__get_cpu_var(runstate);
state = &__get_cpu_var(runstate);
*res = *state;
barrier();
} while (get64(&state->state_entry_time) != state_time);
*res = *state;
barrier();
} while (get64(&state->state_entry_time) != state_time);
}
static void setup_runstate_info(int cpu)
}
static void setup_runstate_info(int cpu)
unsigned long long xen_sched_clock(void)
{
struct vcpu_runstate_info state;
unsigned long long xen_sched_clock(void)
{
struct vcpu_runstate_info state;
- cycle_t now = xen_clocksource_read();
+ cycle_t now;
+ u64 ret;
+ /*
+ * Ideally sched_clock should be called on a per-cpu basis
+ * anyway, so preempt should already be disabled, but that's
+ * not current practice at the moment.
+ */
+ preempt_disable();
+
+ now = xen_clocksource_read();
+
get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running);
get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running);
if (offset < 0)
offset = 0;
if (offset < 0)
offset = 0;
- return state.time[RUNSTATE_blocked] +
+ ret = state.time[RUNSTATE_blocked] +
state.time[RUNSTATE_running] +
offset;
state.time[RUNSTATE_running] +
offset;
+
+ preempt_enable();
+
+ return ret;