From: Jeremy Fitzhardinge Date: Tue, 16 Oct 2007 18:51:29 +0000 (-0700) Subject: paravirt: clean up lazy mode handling X-Git-Tag: v2.6.24-rc1~478^2~10 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8965c1c0950d459d99b8b81dfc1ab02e3d2cfb08;p=linux-2.6 paravirt: clean up lazy mode handling Currently, the set_lazy_mode pv_op is overloaded with 5 functions: 1. enter lazy cpu mode 2. leave lazy cpu mode 3. enter lazy mmu mode 4. leave lazy mmu mode 5. flush pending batched operations This complicates each paravirt backend, since it needs to deal with all the possible state transitions, handling flushing, etc. In particular, flushing is quite distinct from the other 4 functions, and seems to just cause complication. This patch removes the set_lazy_mode operation, and adds "enter" and "leave" lazy mode operations on mmu_ops and cpu_ops. All the logic associated with enter and leaving lazy states is now in common code (basically BUG_ONs to make sure that no mode is current when entering a lazy mode, and make sure that the mode is current when leaving). Also, flush is handled in a common way, by simply leaving and re-entering the lazy mode. The result is that the Xen, lguest and VMI lazy mode implementations are much simpler. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Zach Amsden Cc: Rusty Russell Cc: Avi Kivity Cc: Anthony Liguory Cc: "Glauber de Oliveira Costa" Cc: Jun Nakajima --- diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c index fa412515af..6a80d67c21 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt_32.c @@ -164,7 +164,6 @@ static void *get_call_destination(u8 type) { struct paravirt_patch_template tmpl = { .pv_init_ops = pv_init_ops, - .pv_misc_ops = pv_misc_ops, .pv_time_ops = pv_time_ops, .pv_cpu_ops = pv_cpu_ops, .pv_irq_ops = pv_irq_ops, @@ -282,6 +281,49 @@ int paravirt_disable_iospace(void) return ret; } +static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; + +static inline void enter_lazy(enum paravirt_lazy_mode mode) +{ + BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); + BUG_ON(preemptible()); + + x86_write_percpu(paravirt_lazy_mode, mode); +} + +void paravirt_leave_lazy(enum paravirt_lazy_mode mode) +{ + BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); + BUG_ON(preemptible()); + + x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); +} + +void paravirt_enter_lazy_mmu(void) +{ + enter_lazy(PARAVIRT_LAZY_MMU); +} + +void paravirt_leave_lazy_mmu(void) +{ + paravirt_leave_lazy(PARAVIRT_LAZY_MMU); +} + +void paravirt_enter_lazy_cpu(void) +{ + enter_lazy(PARAVIRT_LAZY_CPU); +} + +void paravirt_leave_lazy_cpu(void) +{ + paravirt_leave_lazy(PARAVIRT_LAZY_CPU); +} + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void) +{ + return x86_read_percpu(paravirt_lazy_mode); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -347,6 +389,11 @@ struct pv_cpu_ops pv_cpu_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, + + .lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + }, }; struct pv_apic_ops pv_apic_ops = { @@ -360,10 +407,6 @@ struct pv_apic_ops pv_apic_ops = { #endif }; -struct pv_misc_ops pv_misc_ops = { - .set_lazy_mode = paravirt_nop, -}; - struct pv_mmu_ops pv_mmu_ops = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, @@ -414,6 +457,11 @@ struct pv_mmu_ops pv_mmu_ops = { .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, .activate_mm = paravirt_nop, + + .lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + }, }; EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 67cea5c2e3..f02bad68ab 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode) +static void vmi_enter_lazy_cpu(void) { - static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode); - - if (!vmi_ops.set_lazy_mode) - return; + paravirt_enter_lazy_cpu(); + vmi_ops.set_lazy_mode(2); +} - /* Modes should never nest or overlap */ - BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE || - mode == PARAVIRT_LAZY_FLUSH)); +static void vmi_enter_lazy_mmu(void) +{ + paravirt_enter_lazy_mmu(); + vmi_ops.set_lazy_mode(1); +} - if (mode == PARAVIRT_LAZY_FLUSH) { - vmi_ops.set_lazy_mode(0); - vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode)); - } else { - vmi_ops.set_lazy_mode(mode); - __get_cpu_var(lazy_mode) = mode; - } +static void vmi_leave_lazy(void) +{ + paravirt_leave_lazy(paravirt_get_lazy_mode()); + vmi_ops.set_lazy_mode(0); } static inline int __init check_vmi_rom(struct vrom_header *rom) @@ -798,7 +796,16 @@ static inline int __init activate_vmi(void) para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); para_fill(pv_cpu_ops.io_delay, IODelay); - para_wrap(pv_misc_ops.set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); + + para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + set_lazy_mode, SetLazyMode); + para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, + set_lazy_mode, SetLazyMode); + + para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, + set_lazy_mode, SetLazyMode); + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, + set_lazy_mode, SetLazyMode); /* user and kernel flush are just handled with different flags to FlushTLB */ para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3d3bf05dec..7171a07360 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -52,8 +52,6 @@ EXPORT_SYMBOL_GPL(hypercall_page); -DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); - DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); DEFINE_PER_CPU(unsigned long, xen_cr3); @@ -249,29 +247,10 @@ static void xen_halt(void) xen_safe_halt(); } -static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) +static void xen_leave_lazy(void) { - BUG_ON(preemptible()); - - switch (mode) { - case PARAVIRT_LAZY_NONE: - BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_MMU: - case PARAVIRT_LAZY_CPU: - BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_FLUSH: - /* flush if necessary, but don't change state */ - if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) - xen_mc_flush(); - return; - } - + paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); - x86_write_percpu(xen_lazy_mode, mode); } static unsigned long xen_store_tr(void) @@ -358,7 +337,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * loaded properly. This will go away as soon as Xen has been * modified to not save/restore %gs for normal hypercalls. */ - if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) loadsegment(gs, 0); } @@ -962,6 +941,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, + + .lazy_mode = { + .enter = paravirt_enter_lazy_cpu, + .leave = xen_leave_lazy, + }, }; static const struct pv_irq_ops xen_irq_ops __initdata = { @@ -1037,10 +1021,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, .exit_mmap = xen_exit_mmap, -}; -static const struct pv_misc_ops xen_misc_ops __initdata = { - .set_lazy_mode = xen_set_lazy_mode, + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, }; #ifdef CONFIG_SMP @@ -1114,7 +1099,6 @@ asmlinkage void __init xen_start_kernel(void) pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; - pv_misc_ops = xen_misc_ops; machine_ops = xen_machine_ops; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 874db0cd1d..2061bdd3e7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -155,7 +155,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { if (mm == current->mm || mm == &init_mm) { - if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { struct multicall_space mcs; mcs = xen_mc_entry(0); diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index e6f7530b15..5d96a5fa21 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -35,7 +35,7 @@ void xen_mc_flush(void); /* Issue a multicall if we're not in a lazy mode */ static inline void xen_mc_issue(unsigned mode) { - if ((xen_get_lazy_mode() & mode) == 0) + if ((paravirt_get_lazy_mode() & mode) == 0) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b9aaea45f0..b5697bae52 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -29,13 +29,6 @@ unsigned long long xen_sched_clock(void); void xen_mark_init_mm_pinned(void); -DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); - -static inline unsigned xen_get_lazy_mode(void) -{ - return x86_read_percpu(xen_lazy_mode); -} - void __init xen_fill_possible_map(void); void __init xen_setup_vcpu_info_placement(void); diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index ca9b844f37..c302629e08 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c @@ -97,29 +97,17 @@ static cycle_t clock_base; * them as a batch when lazy_mode is eventually turned off. Because hypercalls * are reasonably expensive, batching them up makes sense. For example, a * large mmap might update dozens of page table entries: that code calls - * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls - * lguest_lazy_mode(PARAVIRT_LAZY_NONE). + * paravirt_enter_lazy_mmu(), does the dozen updates, then calls + * lguest_leave_lazy_mode(). * * So, when we're in lazy mode, we call async_hypercall() to store the call for * future processing. When lazy mode is turned off we issue a hypercall to * flush the stored calls. - * - * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which - * indicates we're to flush any outstanding calls immediately. This is used - * when an interrupt handler does a kmap_atomic(): the page table changes must - * happen immediately even if we're in the middle of a batch. Usually we're - * not, though, so there's nothing to do. */ -static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */ -static void lguest_lazy_mode(enum paravirt_lazy_mode mode) + */ +static void lguest_leave_lazy_mode(void) { - if (mode == PARAVIRT_LAZY_FLUSH) { - if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE)) - hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); - } else { - lazy_mode = mode; - if (mode == PARAVIRT_LAZY_NONE) - hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); - } + paravirt_leave_lazy(paravirt_get_lazy_mode()); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); } static void lazy_hcall(unsigned long call, @@ -127,7 +115,7 @@ static void lazy_hcall(unsigned long call, unsigned long arg2, unsigned long arg3) { - if (lazy_mode == PARAVIRT_LAZY_NONE) + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) hcall(call, arg1, arg2, arg3); else async_hcall(call, arg1, arg2, arg3); @@ -1011,6 +999,8 @@ __init void lguest_init(void *boot) pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; + pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; + pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; @@ -1022,6 +1012,8 @@ __init void lguest_init(void *boot) pv_mmu_ops.set_pmd = lguest_set_pmd; pv_mmu_ops.read_cr2 = lguest_read_cr2; pv_mmu_ops.read_cr3 = lguest_read_cr3; + pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ @@ -1034,8 +1026,6 @@ __init void lguest_init(void *boot) pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; - pv_misc_ops.set_lazy_mode = lguest_lazy_mode; - /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 19726e1205..f59d370c5d 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -25,15 +25,6 @@ struct tss_struct; struct mm_struct; struct desc_struct; -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE = 0, - PARAVIRT_LAZY_MMU = 1, - PARAVIRT_LAZY_CPU = 2, - PARAVIRT_LAZY_FLUSH = 3, -}; - - /* general info */ struct pv_info { unsigned int kernel_rpl; @@ -64,9 +55,10 @@ struct pv_init_ops { }; -struct pv_misc_ops { +struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ - void (*set_lazy_mode)(enum paravirt_lazy_mode mode); + void (*enter)(void); + void (*leave)(void); }; struct pv_time_ops { @@ -131,6 +123,8 @@ struct pv_cpu_ops { /* These two are jmp to, not actually called. */ void (*irq_enable_sysexit)(void); void (*iret)(void); + + struct pv_lazy_ops lazy_mode; }; struct pv_irq_ops { @@ -244,6 +238,8 @@ struct pv_mmu_ops { #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); #endif + + struct pv_lazy_ops lazy_mode; }; /* This contains all the paravirt structures: we get a convenient @@ -252,7 +248,6 @@ struct pv_mmu_ops { struct paravirt_patch_template { struct pv_init_ops pv_init_ops; - struct pv_misc_ops pv_misc_ops; struct pv_time_ops pv_time_ops; struct pv_cpu_ops pv_cpu_ops; struct pv_irq_ops pv_irq_ops; @@ -262,7 +257,6 @@ struct paravirt_patch_template extern struct pv_info pv_info; extern struct pv_init_ops pv_init_ops; -extern struct pv_misc_ops pv_misc_ops; extern struct pv_time_ops pv_time_ops; extern struct pv_cpu_ops pv_cpu_ops; extern struct pv_irq_ops pv_irq_ops; @@ -953,37 +947,57 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) } #endif /* CONFIG_X86_PAE */ +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE, + PARAVIRT_LAZY_MMU, + PARAVIRT_LAZY_CPU, +}; + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void); +void paravirt_enter_lazy_cpu(void); +void paravirt_leave_lazy_cpu(void); +void paravirt_enter_lazy_mmu(void); +void paravirt_leave_lazy_mmu(void); +void paravirt_leave_lazy(enum paravirt_lazy_mode mode); + #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE static inline void arch_enter_lazy_cpu_mode(void) { - PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_CPU); + PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); } static inline void arch_leave_lazy_cpu_mode(void) { - PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_NONE); + PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } static inline void arch_flush_lazy_cpu_mode(void) { - PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_FLUSH); + if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { - PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_MMU); + PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); } static inline void arch_leave_lazy_mmu_mode(void) { - PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_NONE); + PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } static inline void arch_flush_lazy_mmu_mode(void) { - PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_FLUSH); + if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } } void _paravirt_nop(void);