debuggers.hg

changeset 22838:e9277ab43947

p2m: Allow non-leaf entries to be replaced by leaf entries

Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages
respectively, freeing the p2m table page properly. This allows, for example,
a sequence of 512 singleton zero pages to be replaced with a superpage
populate-on-demand entry.

Changes:
* Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will
handle everything related to the freeing properly.
* Add p2m_free_entry(), based on ept_free_entry(), which will free
intermediate tables recursively.
* For both ept and p2m, when replacing non-leaf entries with leaf
entries, keep old entry and call *_free_entry() after new entry
has been written and proper flushes have been done.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
author George Dunlap <george.dunlap@eu.citrix.com>
date Fri Jan 21 15:37:36 2011 +0000 (2011-01-21)
parents 003acf02d416
children ca64bd257a6f
files xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/hap/p2m-ept.c xen/arch/x86/mm/p2m.c xen/include/asm-x86/p2m.h
line diff
     1.1 --- a/xen/arch/x86/mm/hap/hap.c	Thu Jan 20 17:04:06 2011 +0000
     1.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Jan 21 15:37:36 2011 +0000
     1.3 @@ -333,9 +333,11 @@ static void hap_free_p2m_page(struct dom
     1.4  
     1.5      ASSERT(page_get_owner(pg) == d);
     1.6      /* Should have just the one ref we gave it in alloc_p2m_page() */
     1.7 -    if ( (pg->count_info & PGC_count_mask) != 1 )
     1.8 -        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
     1.9 -                  pg->count_info, pg->u.inuse.type_info);
    1.10 +    if ( (pg->count_info & PGC_count_mask) != 1 ) {
    1.11 +        HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
    1.12 +                     pg, pg->count_info, pg->u.inuse.type_info);
    1.13 +        WARN();
    1.14 +    }
    1.15      pg->count_info &= ~PGC_count_mask;
    1.16      /* Free should not decrement domain's total allocation, since
    1.17       * these pages were allocated without an owner. */
     2.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c	Thu Jan 20 17:04:06 2011 +0000
     2.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Fri Jan 21 15:37:36 2011 +0000
     2.3 @@ -166,8 +166,6 @@ static int ept_set_middle_entry(struct p
     2.4  /* free ept sub tree behind an entry */
     2.5  void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
     2.6  {
     2.7 -    struct domain *d = p2m->domain;
     2.8 -
     2.9      /* End if the entry is a leaf entry. */
    2.10      if ( level == 0 || !is_epte_present(ept_entry) ||
    2.11           is_epte_superpage(ept_entry) )
    2.12 @@ -180,8 +178,8 @@ void ept_free_entry(struct p2m_domain *p
    2.13              ept_free_entry(p2m, epte + i, level - 1);
    2.14          unmap_domain_page(epte);
    2.15      }
    2.16 -
    2.17 -    d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn));
    2.18 +    
    2.19 +    p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
    2.20  }
    2.21  
    2.22  static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
    2.23 @@ -317,6 +315,7 @@ ept_set_entry(struct p2m_domain *p2m, un
    2.24      int vtd_pte_present = 0;
    2.25      int needs_sync = 1;
    2.26      struct domain *d = p2m->domain;
    2.27 +    ept_entry_t old_entry = { .epte = 0 };
    2.28  
    2.29      /*
    2.30       * the caller must make sure:
    2.31 @@ -357,8 +356,12 @@ ept_set_entry(struct p2m_domain *p2m, un
    2.32      vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0;
    2.33  
    2.34      /*
    2.35 -     * When we are here, we must be on a leaf ept entry
    2.36 -     * with i == target or i > target.
    2.37 +     * If we're here with i > target, we must be at a leaf node, and
    2.38 +     * we need to break up the superpage.
    2.39 +     *
    2.40 +     * If we're here with i == target and i > 0, we need to check to see
    2.41 +     * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
    2.42 +     * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately.
    2.43       */
    2.44  
    2.45      if ( i == target )
    2.46 @@ -370,6 +373,10 @@ ept_set_entry(struct p2m_domain *p2m, un
    2.47          if ( !is_epte_present(ept_entry) )
    2.48              needs_sync = 0;
    2.49  
    2.50 +        /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
    2.51 +         * the intermediate tables will be freed below after the ept flush */
    2.52 +        old_entry = *ept_entry;
    2.53 +
    2.54          if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
    2.55               (p2mt == p2m_ram_paging_in_start) )
    2.56          {
    2.57 @@ -487,6 +494,13 @@ out:
    2.58          }
    2.59      }
    2.60  
    2.61 +    /* Release the old intermediate tables, if any.  This has to be the
    2.62 +       last thing we do, after the ept_sync_domain() and removal
    2.63 +       from the iommu tables, so as to avoid a potential
    2.64 +       use-after-free. */
    2.65 +    if ( is_epte_present(&old_entry) )
    2.66 +        ept_free_entry(p2m, &old_entry, target);
    2.67 +
    2.68      return rv;
    2.69  }
    2.70  
     3.1 --- a/xen/arch/x86/mm/p2m.c	Thu Jan 20 17:04:06 2011 +0000
     3.2 +++ b/xen/arch/x86/mm/p2m.c	Fri Jan 21 15:37:36 2011 +0000
     3.3 @@ -153,11 +153,45 @@ p2m_alloc_ptp(struct p2m_domain *p2m, un
     3.4  
     3.5      page_list_add_tail(pg, &p2m->pages);
     3.6      pg->u.inuse.type_info = type | 1 | PGT_validated;
     3.7 -    pg->count_info |= 1;
     3.8  
     3.9      return pg;
    3.10  }
    3.11  
    3.12 +void
    3.13 +p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
    3.14 +{
    3.15 +    ASSERT(pg);
    3.16 +    ASSERT(p2m);
    3.17 +    ASSERT(p2m->domain);
    3.18 +    ASSERT(p2m->domain->arch.paging.free_page);
    3.19 +
    3.20 +    page_list_del(pg, &p2m->pages);
    3.21 +    p2m->domain->arch.paging.free_page(p2m->domain, pg);
    3.22 +
    3.23 +    return;
    3.24 +}
    3.25 +
    3.26 +/* Free intermediate tables from a p2m sub-tree */
    3.27 +void
    3.28 +p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
    3.29 +{
    3.30 +    /* End if the entry is a leaf entry. */
    3.31 +    if ( page_order == 0
    3.32 +         || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
    3.33 +         || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
    3.34 +        return;
    3.35 +
    3.36 +    if ( page_order > 9 )
    3.37 +    {
    3.38 +        l1_pgentry_t *l3_table = map_domain_page(l1e_get_pfn(*p2m_entry));
    3.39 +        for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
    3.40 +            p2m_free_entry(p2m, l3_table + i, page_order - 9);
    3.41 +        unmap_domain_page(l3_table);
    3.42 +    }
    3.43 +
    3.44 +    p2m_free_ptp(p2m, mfn_to_page(_mfn(l1e_get_pfn(*p2m_entry))));
    3.45 +}
    3.46 +
    3.47  // Walk one level of the P2M table, allocating a new table if required.
    3.48  // Returns 0 on error.
    3.49  //
    3.50 @@ -1316,6 +1350,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
    3.51       */
    3.52      if ( page_order == 18 )
    3.53      {
    3.54 +        l1_pgentry_t old_entry = l1e_empty();
    3.55          p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
    3.56                                     L3_PAGETABLE_SHIFT - PAGE_SHIFT,
    3.57                                     L3_PAGETABLE_ENTRIES);
    3.58 @@ -1323,10 +1358,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
    3.59          if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
    3.60               !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
    3.61          {
    3.62 -            P2M_ERROR("configure P2M table L3 entry with large page\n");
    3.63 -            domain_crash(p2m->domain);
    3.64 -            goto out;
    3.65 +            /* We're replacing a non-SP page with a superpage.  Make sure to
    3.66 +             * handle freeing the table properly. */
    3.67 +            old_entry = *p2m_entry;
    3.68          }
    3.69 +
    3.70          ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
    3.71          l3e_content = mfn_valid(mfn) 
    3.72              ? l3e_from_pfn(mfn_x(mfn),
    3.73 @@ -1335,7 +1371,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
    3.74          entry_content.l1 = l3e_content.l3;
    3.75          paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
    3.76                                 table_mfn, entry_content, 3);
    3.77 -
    3.78 +        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
    3.79 +
    3.80 +        /* Free old intermediate tables if necessary */
    3.81 +        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
    3.82 +            p2m_free_entry(p2m, &old_entry, page_order);
    3.83      }
    3.84      /*
    3.85       * When using PAE Xen, we only allow 33 bits of pseudo-physical
    3.86 @@ -1372,9 +1412,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
    3.87          /* level 1 entry */
    3.88          paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
    3.89                                 table_mfn, entry_content, 1);
    3.90 +        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
    3.91      }
    3.92      else if ( page_order == 9 )
    3.93      {
    3.94 +        l1_pgentry_t old_entry = l1e_empty();
    3.95          p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
    3.96                                     L2_PAGETABLE_SHIFT - PAGE_SHIFT,
    3.97                                     L2_PAGETABLE_ENTRIES);
    3.98 @@ -1384,9 +1426,9 @@ p2m_set_entry(struct p2m_domain *p2m, un
    3.99          if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
   3.100               !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
   3.101          {
   3.102 -            P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
   3.103 -            domain_crash(p2m->domain);
   3.104 -            goto out;
   3.105 +            /* We're replacing a non-SP page with a superpage.  Make sure to
   3.106 +             * handle freeing the table properly. */
   3.107 +            old_entry = *p2m_entry;
   3.108          }
   3.109          
   3.110          ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
   3.111 @@ -1400,6 +1442,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
   3.112          entry_content.l1 = l2e_content.l2;
   3.113          paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
   3.114                                 table_mfn, entry_content, 2);
   3.115 +        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
   3.116 +
   3.117 +        /* Free old intermediate tables if necessary */
   3.118 +        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
   3.119 +            p2m_free_entry(p2m, &old_entry, page_order);
   3.120      }
   3.121  
   3.122      /* Track the highest gfn for which we have ever had a valid mapping */
     4.1 --- a/xen/include/asm-x86/p2m.h	Thu Jan 20 17:04:06 2011 +0000
     4.2 +++ b/xen/include/asm-x86/p2m.h	Fri Jan 21 15:37:36 2011 +0000
     4.3 @@ -541,6 +541,7 @@ static inline void p2m_mem_access_check(
     4.4  #endif
     4.5  
     4.6  struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
     4.7 +void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
     4.8  
     4.9  #endif /* _XEN_P2M_H */
    4.10