debuggers.hg

changeset 21147:1ea7b73b3061

1GB Page Table Support for HVM Guest 2/3

This patch changes P2M code to works with 1GB page now.

Signed-off-by: Wei Huang <wei.huang2@amd.com>
Acked-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Tim Deegan <tim.deegan@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Apr 06 07:07:37 2010 +0100 (2010-04-06)
parents 6868816898bd
children 44bef2b4a075
files xen/arch/x86/mm/p2m.c
line diff
     1.1 --- a/xen/arch/x86/mm/p2m.c	Tue Apr 06 07:02:17 2010 +0100
     1.2 +++ b/xen/arch/x86/mm/p2m.c	Tue Apr 06 07:07:37 2010 +0100
     1.3 @@ -187,7 +187,36 @@ p2m_next_level(struct domain *d, mfn_t *
     1.4  
     1.5      ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
     1.6  
     1.7 -    /* split single large page into 4KB page in P2M table */
     1.8 +    /* split 1GB pages into 2MB pages */
     1.9 +    if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
    1.10 +    {
    1.11 +        unsigned long flags, pfn;
    1.12 +        struct page_info *pg = d->arch.p2m->alloc_page(d);
    1.13 +        if ( pg == NULL )
    1.14 +            return 0;
    1.15 +        page_list_add_tail(pg, &d->arch.p2m->pages);
    1.16 +        pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
    1.17 +        pg->count_info = 1;
    1.18 +        
    1.19 +        flags = l1e_get_flags(*p2m_entry);
    1.20 +        pfn = l1e_get_pfn(*p2m_entry);
    1.21 +        
    1.22 +        l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
    1.23 +        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
    1.24 +        {
    1.25 +            new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags);
    1.26 +            paging_write_p2m_entry(d, gfn, l1_entry+i, *table_mfn, new_entry,
    1.27 +                                   2);
    1.28 +        }
    1.29 +        unmap_domain_page(l1_entry);
    1.30 +        new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
    1.31 +                                 __PAGE_HYPERVISOR|_PAGE_USER); //disable PSE
    1.32 +        paging_write_p2m_entry(d, gfn,
    1.33 +                               p2m_entry, *table_mfn, new_entry, 3);
    1.34 +    }
    1.35 +
    1.36 +
    1.37 +    /* split single 2MB large page into 4KB page in P2M table */
    1.38      if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
    1.39      {
    1.40          unsigned long flags, pfn;
    1.41 @@ -1064,6 +1093,23 @@ p2m_pod_demand_populate(struct domain *d
    1.42      if ( unlikely(d->is_dying) )
    1.43          goto out_fail;
    1.44  
    1.45 +    /* Because PoD does not have cache list for 1GB pages, it has to remap
    1.46 +     * 1GB region to 2MB chunks for a retry. */
    1.47 +    if ( order == 18 )
    1.48 +    {
    1.49 +        gfn_aligned = (gfn >> order) << order;
    1.50 +        /* Note that we are supposed to call set_p2m_entry() 512 times to 
    1.51 +         * split 1GB into 512 2MB pages here. But We only do once here because
    1.52 +         * set_p2m_entry() should automatically shatter the 1GB page into 
    1.53 +         * 512 2MB pages. The rest of 511 calls are unnecessary.
    1.54 +         */
    1.55 +        set_p2m_entry(d, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9,
    1.56 +                      p2m_populate_on_demand);
    1.57 +        audit_p2m(d);
    1.58 +        p2m_unlock(p2md);
    1.59 +        return 0;
    1.60 +    }
    1.61 +
    1.62      /* If we're low, start a sweep */
    1.63      if ( order == 9 && page_list_empty(&p2md->pod.super) )
    1.64          p2m_pod_emergency_sweep_super(d);
    1.65 @@ -1196,6 +1242,7 @@ p2m_set_entry(struct domain *d, unsigned
    1.66      l1_pgentry_t *p2m_entry;
    1.67      l1_pgentry_t entry_content;
    1.68      l2_pgentry_t l2e_content;
    1.69 +    l3_pgentry_t l3e_content;
    1.70      int rv=0;
    1.71  
    1.72      if ( tb_init_done )
    1.73 @@ -1222,18 +1269,41 @@ p2m_set_entry(struct domain *d, unsigned
    1.74          goto out;
    1.75  #endif
    1.76      /*
    1.77 +     * Try to allocate 1GB page table if this feature is supported.
    1.78 +     */
    1.79 +    if ( page_order == 18 )
    1.80 +    {
    1.81 +        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
    1.82 +                                   L3_PAGETABLE_SHIFT - PAGE_SHIFT,
    1.83 +                                   L3_PAGETABLE_ENTRIES);
    1.84 +        ASSERT(p2m_entry);
    1.85 +        if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
    1.86 +             !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
    1.87 +        {
    1.88 +            P2M_ERROR("configure P2M table L3 entry with large page\n");
    1.89 +            domain_crash(d);
    1.90 +            goto out;
    1.91 +        }
    1.92 +        l3e_content = mfn_valid(mfn) 
    1.93 +            ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt) | _PAGE_PSE)
    1.94 +            : l3e_empty();
    1.95 +        entry_content.l1 = l3e_content.l3;
    1.96 +        paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 3);
    1.97 +
    1.98 +    }
    1.99 +    /*
   1.100       * When using PAE Xen, we only allow 33 bits of pseudo-physical
   1.101       * address in translated guests (i.e. 8 GBytes).  This restriction
   1.102       * comes from wanting to map the P2M table into the 16MB RO_MPT hole
   1.103       * in Xen's address space for translated PV guests.
   1.104       * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
   1.105       */
   1.106 -    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
   1.107 -                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
   1.108 -                         ((CONFIG_PAGING_LEVELS == 3)
   1.109 -                          ? (paging_mode_hap(d) ? 4 : 8)
   1.110 -                          : L3_PAGETABLE_ENTRIES),
   1.111 -                         PGT_l2_page_table) )
   1.112 +    else if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
   1.113 +                              L3_PAGETABLE_SHIFT - PAGE_SHIFT,
   1.114 +                              ((CONFIG_PAGING_LEVELS == 3)
   1.115 +                               ? (paging_mode_hap(d) ? 4 : 8)
   1.116 +                               : L3_PAGETABLE_ENTRIES),
   1.117 +                              PGT_l2_page_table) )
   1.118          goto out;
   1.119  
   1.120      if ( page_order == 0 )
   1.121 @@ -1255,7 +1325,7 @@ p2m_set_entry(struct domain *d, unsigned
   1.122          /* level 1 entry */
   1.123          paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
   1.124      }
   1.125 -    else 
   1.126 +    else if ( page_order == 9 )
   1.127      {
   1.128          p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
   1.129                                     L2_PAGETABLE_SHIFT - PAGE_SHIFT,
   1.130 @@ -1352,11 +1422,34 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
   1.131  #else
   1.132          l3e += l3_table_offset(addr);
   1.133  #endif
   1.134 +pod_retry_l3:
   1.135          if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
   1.136          {
   1.137 +            if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand )
   1.138 +            {
   1.139 +                if ( q != p2m_query )
   1.140 +                {
   1.141 +                    if ( !p2m_pod_demand_populate(d, gfn, 18, q) )
   1.142 +                        goto pod_retry_l3;
   1.143 +                }
   1.144 +                else
   1.145 +                    *t = p2m_populate_on_demand;
   1.146 +            }
   1.147              unmap_domain_page(l3e);
   1.148              return _mfn(INVALID_MFN);
   1.149          }
   1.150 +        else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) )
   1.151 +        {
   1.152 +            mfn = _mfn(l3e_get_pfn(*l3e) +
   1.153 +                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
   1.154 +                       l1_table_offset(addr));
   1.155 +            *t = p2m_flags_to_type(l3e_get_flags(*l3e));
   1.156 +            unmap_domain_page(l3e);
   1.157 +
   1.158 +            ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
   1.159 +            return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
   1.160 +        }
   1.161 +
   1.162          mfn = _mfn(l3e_get_pfn(*l3e));
   1.163          unmap_domain_page(l3e);
   1.164      }
   1.165 @@ -1437,12 +1530,59 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
   1.166      {
   1.167          l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
   1.168          l2_pgentry_t l2e = l2e_empty();
   1.169 +        l3_pgentry_t l3e = l3e_empty();
   1.170          int ret;
   1.171  
   1.172          ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
   1.173                 / sizeof(l1_pgentry_t));
   1.174  
   1.175          /*
   1.176 +         * Read & process L3
   1.177 +         */
   1.178 +        p2m_entry = (l1_pgentry_t *)
   1.179 +            &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START)
   1.180 +                               + l3_linear_offset(addr)];
   1.181 +    pod_retry_l3:
   1.182 +        ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e));
   1.183 +
   1.184 +        if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
   1.185 +        {
   1.186 +            if ( (l3e_get_flags(l3e) & _PAGE_PSE) &&
   1.187 +                 (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) )
   1.188 +            {
   1.189 +                /* The read has succeeded, so we know that mapping exists */
   1.190 +                if ( q != p2m_query )
   1.191 +                {
   1.192 +                    if ( !p2m_pod_demand_populate(current->domain, gfn, 18, q) )
   1.193 +                        goto pod_retry_l3;
   1.194 +                    p2mt = p2m_invalid;
   1.195 +                    printk("%s: Allocate 1GB failed!\n", __func__);
   1.196 +                    goto out;
   1.197 +                }
   1.198 +                else
   1.199 +                {
   1.200 +                    p2mt = p2m_populate_on_demand;
   1.201 +                    goto out;
   1.202 +                }
   1.203 +            }
   1.204 +            goto pod_retry_l2;
   1.205 +        }
   1.206 +
   1.207 +        if ( l3e_get_flags(l3e) & _PAGE_PSE )
   1.208 +        {
   1.209 +            p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
   1.210 +            ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt));
   1.211 +            if (p2m_is_valid(p2mt) )
   1.212 +                mfn = _mfn(l3e_get_pfn(l3e) + 
   1.213 +                           l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + 
   1.214 +                           l1_table_offset(addr));
   1.215 +            else
   1.216 +                p2mt = p2m_mmio_dm;
   1.217 +            
   1.218 +            goto out;
   1.219 +        }
   1.220 +
   1.221 +        /*
   1.222           * Read & process L2
   1.223           */
   1.224          p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
   1.225 @@ -1596,10 +1736,18 @@ int set_p2m_entry(struct domain *d, unsi
   1.226      while ( todo )
   1.227      {
   1.228          if ( is_hvm_domain(d) && paging_mode_hap(d) )
   1.229 -            order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ?
   1.230 -                9 : 0;
   1.231 +            order = ( (((gfn | mfn_x(mfn) | todo) & ((1ul << 18) - 1)) == 0) ) ?
   1.232 +                18 :
   1.233 +            (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ? 9 : 0;
   1.234          else
   1.235              order = 0;
   1.236 +
   1.237 +        /* Note that we only enable hap_1gb_pgtb when CONFIG_PAGING_LEVELS==4. 
   1.238 +         * So 1GB should never be enabled under 32bit or PAE modes. But for
   1.239 +         * safety's reason, we double-check the page order again..
   1.240 +         */
   1.241 +        BUG_ON(order == 18 && CONFIG_PAGING_LEVELS < 4);
   1.242 +
   1.243          if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) )
   1.244              rc = 0;
   1.245          gfn += 1ul << order;
   1.246 @@ -1867,6 +2015,31 @@ static void audit_p2m(struct domain *d)
   1.247                      gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
   1.248                      continue;
   1.249                  }
   1.250 +
   1.251 +                /* check for 1GB super page */
   1.252 +                if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
   1.253 +                {
   1.254 +                    mfn = l3e_get_pfn(l3e[i3]);
   1.255 +                    ASSERT(mfn_valid(_mfn(mfn)));
   1.256 +                    /* we have to cover 512x512 4K pages */
   1.257 +                    for ( i2 = 0; 
   1.258 +                          i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
   1.259 +                          i2++)
   1.260 +                    {
   1.261 +                        m2pfn = get_gpfn_from_mfn(mfn+i2);
   1.262 +                        if ( m2pfn != (gfn + i2) )
   1.263 +                        {
   1.264 +                            pmbad++;
   1.265 +                            P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
   1.266 +                                       " -> gfn %#lx\n", gfn+i2, mfn+i2,
   1.267 +                                       m2pfn);
   1.268 +                            BUG();
   1.269 +                        }
   1.270 +                        gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
   1.271 +                        continue;
   1.272 +                    }
   1.273 +                }
   1.274 +
   1.275                  l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
   1.276                  for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
   1.277                  {
   1.278 @@ -2224,7 +2397,7 @@ void p2m_change_type_global(struct domai
   1.279      l1_pgentry_t l1e_content;
   1.280      l1_pgentry_t *l1e;
   1.281      l2_pgentry_t *l2e;
   1.282 -    mfn_t l1mfn, l2mfn;
   1.283 +    mfn_t l1mfn, l2mfn, l3mfn;
   1.284      unsigned long i1, i2, i3;
   1.285      l3_pgentry_t *l3e;
   1.286  #if CONFIG_PAGING_LEVELS == 4
   1.287 @@ -2245,6 +2418,7 @@ void p2m_change_type_global(struct domai
   1.288  #if CONFIG_PAGING_LEVELS == 4
   1.289      l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   1.290  #else /* CONFIG_PAGING_LEVELS == 3 */
   1.291 +    l3mfn = _mfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   1.292      l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   1.293  #endif
   1.294  
   1.295 @@ -2255,6 +2429,7 @@ void p2m_change_type_global(struct domai
   1.296          {
   1.297              continue;
   1.298          }
   1.299 +        l3mfn = _mfn(l4e_get_pfn(l4e[i4]));
   1.300          l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
   1.301  #endif
   1.302          for ( i3 = 0;
   1.303 @@ -2265,6 +2440,20 @@ void p2m_change_type_global(struct domai
   1.304              {
   1.305                  continue;
   1.306              }
   1.307 +            if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) )
   1.308 +            {
   1.309 +                flags = l3e_get_flags(l3e[i3]);
   1.310 +                if ( p2m_flags_to_type(flags) != ot )
   1.311 +                    continue;
   1.312 +                mfn = l3e_get_pfn(l3e[i3]);
   1.313 +                gfn = get_gpfn_from_mfn(mfn);
   1.314 +                flags = p2m_type_to_flags(nt);
   1.315 +                l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
   1.316 +                paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l3e[i3],
   1.317 +                                       l3mfn, l1e_content, 3);
   1.318 +                continue;
   1.319 +            }
   1.320 +
   1.321              l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
   1.322              l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
   1.323              for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )