lguest: per-vcpu lguest pgdir management

author Glauber de Oliveira Costa <gcosta@redhat.com>

Mon, 7 Jan 2008 13:05:37 +0000 (11:05 -0200)

committer Rusty Russell <rusty@rustcorp.com.au>

Wed, 30 Jan 2008 11:50:14 +0000 (22:50 +1100)
author Glauber de Oliveira Costa <gcosta@redhat.com>
Mon, 7 Jan 2008 13:05:37 +0000 (11:05 -0200)
committer Rusty Russell <rusty@rustcorp.com.au>
Wed, 30 Jan 2008 11:50:14 +0000 (22:50 +1100)
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c

index be8f0468576723bc2f20aa08f77116cdc05dec88..0471018d700db290e649e4a051d7634880c9abfd 100644 (file)
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -62,7 +62,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
                 if (args->arg1)
                         guest_pagetable_clear_all(cpu);
                 else
-                       guest_pagetable_flush_user(lg);
+                       guest_pagetable_flush_user(cpu);
                 break;
  
         /* All these calls simply pass the arguments through to the right
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c

index b87d9d6c36a47171a63c47724dad7dadf21eae91..6bbfce4e5987ba9eb0548504329b34268ba83f45 100644 (file)
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -76,7 +76,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
                 virtstack = cpu->esp1;
                 ss = cpu->ss1;
  
-               origstack = gstack = guest_pa(lg, virtstack);
+               origstack = gstack = guest_pa(cpu, virtstack);
                 /* We push the old stack segment and pointer onto the new
                  * stack: when the Guest does an "iret" back from the interrupt
                  * handler the CPU will notice they're dropping privilege
@@ -88,7 +88,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
                 virtstack = cpu->regs->esp;
                 ss = cpu->regs->ss;
  
-               origstack = gstack = guest_pa(lg, virtstack);
+               origstack = gstack = guest_pa(cpu, virtstack);
         }
  
         /* Remember that we never let the Guest actually disable interrupts, so
@@ -323,7 +323,7 @@ void pin_stack_pages(struct lg_cpu *cpu)
                  * start of the page after the kernel stack.  Subtract one to
                  * get back onto the first stack page, and keep subtracting to
                  * get to the rest of the stack pages. */
-               pin_page(lg, cpu->esp1 - 1 - i * PAGE_SIZE);
+               pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
  }
  
  /* Direct traps also mean that we need to know whenever the Guest wants to use
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h

index 95b473cdd0e05ff047c7e96846bd5a460fef3f26..94e518da9aa82a5432073cb5d64a85a3ad7b647d 100644 (file)
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -57,6 +57,8 @@ struct lg_cpu {
         unsigned long regs_page;
         struct lguest_regs *regs;
  
+       int cpu_pgd; /* which pgd this cpu is currently using */
+
         /* If a hypercall was asked for, this points to the arguments. */
         struct hcall_args *hcall;
         u32 next_hcall;
@@ -92,8 +94,6 @@ struct lguest
         int changed;
         struct lguest_pages *last_pages;
  
-       /* We keep a small number of these. */
-       u32 pgdidx;
         struct pgdir pgdirs[4];
  
         unsigned long noirq_start, noirq_end;
@@ -169,13 +169,13 @@ void free_guest_pagetable(struct lguest *lg);
  void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
  void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
  void guest_pagetable_clear_all(struct lg_cpu *cpu);
-void guest_pagetable_flush_user(struct lguest *lg);
+void guest_pagetable_flush_user(struct lg_cpu *cpu);
  void guest_set_pte(struct lguest *lg, unsigned long gpgdir,
                    unsigned long vaddr, pte_t val);
  void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
-int demand_page(struct lguest *info, unsigned long cr2, int errcode);
-void pin_page(struct lguest *lg, unsigned long vaddr);
-unsigned long guest_pa(struct lguest *lg, unsigned long vaddr);
+int demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode);
+void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
+unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
  void page_table_guest_data_init(struct lguest *lg);
  
  /* <arch>/core.c: */
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c

index e34c81636a8c4fac3951bc9ef55f7479088004e2..fb665611ccc24a5c8c73dda7e4b4b7846aa35255 100644 (file)
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -94,10 +94,10 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
  
  /* These two functions just like the above two, except they access the Guest
   * page tables.  Hence they return a Guest address. */
-static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
+static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
  {
         unsigned int index = vaddr >> (PGDIR_SHIFT);
-       return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t);
+       return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
  }
  
  static unsigned long gpte_addr(struct lguest *lg,
@@ -200,22 +200,23 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd)
   *
   * If we fixed up the fault (ie. we mapped the address), this routine returns
   * true.  Otherwise, it was a real fault and we need to tell the Guest. */
-int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
+int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
  {
         pgd_t gpgd;
         pgd_t *spgd;
         unsigned long gpte_ptr;
         pte_t gpte;
         pte_t *spte;
+       struct lguest *lg = cpu->lg;
  
         /* First step: get the top-level Guest page table entry. */
-       gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t);
+       gpgd = lgread(lg, gpgd_addr(cpu, vaddr), pgd_t);
         /* Toplevel not present?  We can't map it in. */
         if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
                 return 0;
  
         /* Now look at the matching shadow entry. */
-       spgd = spgd_addr(lg, lg->pgdidx, vaddr);
+       spgd = spgd_addr(lg, cpu->cpu_pgd, vaddr);
         if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
                 /* No shadow entry: allocate a new shadow PTE page. */
                 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
@@ -297,19 +298,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
   *
   * This is a quick version which answers the question: is this virtual address
   * mapped by the shadow page tables, and is it writable? */
-static int page_writable(struct lguest *lg, unsigned long vaddr)
+static int page_writable(struct lg_cpu *cpu, unsigned long vaddr)
  {
         pgd_t *spgd;
         unsigned long flags;
  
         /* Look at the current top level entry: is it present? */
-       spgd = spgd_addr(lg, lg->pgdidx, vaddr);
+       spgd = spgd_addr(cpu->lg, cpu->cpu_pgd, vaddr);
         if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
                 return 0;
  
         /* Check the flags on the pte entry itself: it must be present and
          * writable. */
-       flags = pte_flags(*(spte_addr(lg, *spgd, vaddr)));
+       flags = pte_flags(*(spte_addr(cpu->lg, *spgd, vaddr)));
  
         return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
  }
@@ -317,10 +318,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr)
  /* So, when pin_stack_pages() asks us to pin a page, we check if it's already
   * in the page tables, and if not, we call demand_page() with error code 2
   * (meaning "write"). */
-void pin_page(struct lguest *lg, unsigned long vaddr)
+void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
  {
-       if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2))
-               kill_guest(lg, "bad stack page %#lx", vaddr);
+       if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
+               kill_guest(cpu->lg, "bad stack page %#lx", vaddr);
  }
  
  /*H:450 If we chase down the release_pgd() code, it looks like this: */
@@ -358,28 +359,28 @@ static void flush_user_mappings(struct lguest *lg, int idx)
   *
   * The Guest has a hypercall to throw away the page tables: it's used when a
   * large number of mappings have been changed. */
-void guest_pagetable_flush_user(struct lguest *lg)
+void guest_pagetable_flush_user(struct lg_cpu *cpu)
  {
         /* Drop the userspace part of the current page table. */
-       flush_user_mappings(lg, lg->pgdidx);
+       flush_user_mappings(cpu->lg, cpu->cpu_pgd);
  }
  /*:*/
  
  /* We walk down the guest page tables to get a guest-physical address */
-unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
+unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
  {
         pgd_t gpgd;
         pte_t gpte;
  
         /* First step: get the top-level Guest page table entry. */
-       gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t);
+       gpgd = lgread(cpu->lg, gpgd_addr(cpu, vaddr), pgd_t);
         /* Toplevel not present?  We can't map it in. */
         if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
-               kill_guest(lg, "Bad address %#lx", vaddr);
+               kill_guest(cpu->lg, "Bad address %#lx", vaddr);
  
-       gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t);
+       gpte = lgread(cpu->lg, gpte_addr(cpu->lg, gpgd, vaddr), pte_t);
         if (!(pte_flags(gpte) & _PAGE_PRESENT))
-               kill_guest(lg, "Bad address %#lx", vaddr);
+               kill_guest(cpu->lg, "Bad address %#lx", vaddr);
  
         return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
  }
@@ -399,11 +400,12 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
  /*H:435 And this is us, creating the new page directory.  If we really do
   * allocate a new one (and so the kernel parts are not there), we set
   * blank_pgdir. */
-static unsigned int new_pgdir(struct lguest *lg,
+static unsigned int new_pgdir(struct lg_cpu *cpu,
                               unsigned long gpgdir,
                               int *blank_pgdir)
  {
         unsigned int next;
+       struct lguest *lg = cpu->lg;
  
         /* We pick one entry at random to throw out.  Choosing the Least
          * Recently Used might be better, but this is easy. */
@@ -413,7 +415,7 @@ static unsigned int new_pgdir(struct lguest *lg,
                 lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
                 /* If the allocation fails, just keep using the one we have */
                 if (!lg->pgdirs[next].pgdir)
-                       next = lg->pgdidx;
+                       next = cpu->cpu_pgd;
                 else
                         /* This is a blank page, so there are no kernel
                          * mappings: caller must map the stack! */
@@ -442,9 +444,9 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
         /* If not, we allocate or mug an existing one: if it's a fresh one,
          * repin gets set to 1. */
         if (newpgdir == ARRAY_SIZE(lg->pgdirs))
-               newpgdir = new_pgdir(lg, pgtable, &repin);
+               newpgdir = new_pgdir(cpu, pgtable, &repin);
         /* Change the current pgd index to the new one. */
-       lg->pgdidx = newpgdir;
+       cpu->cpu_pgd = newpgdir;
         /* If it was completely blank, we map in the Guest kernel stack */
         if (repin)
                 pin_stack_pages(cpu);
@@ -591,11 +593,11 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
  {
         /* We start on the first shadow page table, and give it a blank PGD
          * page. */
-       lg->pgdidx = 0;
-       lg->pgdirs[lg->pgdidx].gpgdir = pgtable;
-       lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL);
-       if (!lg->pgdirs[lg->pgdidx].pgdir)
+       lg->pgdirs[0].gpgdir = pgtable;
+       lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+       if (!lg->pgdirs[0].pgdir)
                 return -ENOMEM;
+       lg->cpus[0].cpu_pgd = 0;
         return 0;
  }
  
@@ -607,7 +609,7 @@ void page_table_guest_data_init(struct lguest *lg)
             /* We tell the Guest that it can't use the top 4MB of virtual
              * addresses used by the Switcher. */
             || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
-           || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir))
+           || put_user(lg->pgdirs[0].gpgdir, &lg->lguest_data->pgdir))
                 kill_guest(lg, "bad guest page %p", lg->lguest_data);
  
         /* In flush_user_mappings() we loop from 0 to
@@ -637,7 +639,6 @@ void free_guest_pagetable(struct lguest *lg)
   * Guest is about to run on this CPU. */
  void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
  {
-       struct lguest *lg = cpu->lg;
         pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
         pgd_t switcher_pgd;
         pte_t regs_pte;
@@ -647,7 +648,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
          * page for this CPU (with appropriate flags). */
         switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL);
  
-       lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
+       cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
  
         /* We also change the Switcher PTE page.  When we're running the Guest,
          * we want the Guest's "regs" page to appear where the first Switcher
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c

index 65f2e3809475c74804e7506f5d932c3db0ad84d0..8c723555ffb39cf396ddf19272ae909e83fce08c 100644 (file)
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -145,7 +145,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
                       * 0-th argument above, ie "a").  %ebx contains the
                       * physical address of the Guest's top-level page
                       * directory. */
-                    : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
+                    : "0"(pages), "1"(__pa(lg->pgdirs[cpu->cpu_pgd].pgdir))
                      /* We tell gcc that all these registers could change,
                       * which means we don't have to save and restore them in
                       * the Switcher. */
@@ -223,7 +223,7 @@ static int emulate_insn(struct lg_cpu *cpu)
         unsigned int insnlen = 0, in = 0, shift = 0;
         /* The eip contains the *virtual* address of the Guest's instruction:
          * guest_pa just subtracts the Guest's page_offset. */
-       unsigned long physaddr = guest_pa(lg, cpu->regs->eip);
+       unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
  
         /* This must be the Guest kernel trying to do something, not userspace!
          * The bottom two bits of the CS segment register are the privilege
@@ -305,7 +305,8 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
                  *
                  * The errcode tells whether this was a read or a write, and
                  * whether kernel or userspace code. */
-               if (demand_page(lg,cpu->arch.last_pagefault,cpu->regs->errcode))
+               if (demand_page(cpu, cpu->arch.last_pagefault,
+                               cpu->regs->errcode))
                         return;
  
                 /* OK, it's really not there (or not OK): the Guest needs to
author	Glauber de Oliveira Costa <gcosta@redhat.com>
	Mon, 7 Jan 2008 13:05:37 +0000 (11:05 -0200)
committer	Rusty Russell <rusty@rustcorp.com.au>
	Wed, 30 Jan 2008 11:50:14 +0000 (22:50 +1100)
drivers/lguest/hypercalls.c		patch \| blob \| history
drivers/lguest/interrupts_and_traps.c		patch \| blob \| history
drivers/lguest/lg.h		patch \| blob \| history
drivers/lguest/page_tables.c		patch \| blob \| history
drivers/lguest/x86/core.c		patch \| blob \| history