debuggers.hg

changeset 22288:7831b8e5aae2

x86 guest pagetable walker: check for invalid bits in pagetable
entries.

Some bits are reserved in x86 pagetable entries and must be zero; the
MMU should raise a pagefault if it sees them, with bit 3 set in the
error code. Xen's software pagetable walker hasn't been doing this,
which has been OK because no guest OSes actually have invalid bits set
except in error cases where things are already very wrong.

Xen's shadow pagetable code deliberately sets these bits as part of
the not-present-entry fast path, so if we're to support
shadow-on-shadow nested HVM, we need to start checking them.

Signed-off-by: Tim Deeegan <Tim.Deegan@citrix.com>
author Keir Fraser <keir@xen.org>
date Mon Oct 18 11:30:10 2010 +0100 (2010-10-18)
parents 3a5755249361
children fc2242ac90e1
files xen/arch/x86/cpu/mtrr/main.c xen/arch/x86/mm/guest_walk.c xen/arch/x86/mm/hap/guest_walk.c xen/arch/x86/mm/shadow/multi.c xen/include/asm-x86/guest_pt.h xen/include/asm-x86/processor.h
line diff
     1.1 --- a/xen/arch/x86/cpu/mtrr/main.c	Thu Oct 14 12:46:29 2010 +0100
     1.2 +++ b/xen/arch/x86/cpu/mtrr/main.c	Mon Oct 18 11:30:10 2010 +0100
     1.3 @@ -600,6 +600,8 @@ struct mtrr_value {
     1.4  	unsigned long	lsize;
     1.5  };
     1.6  
     1.7 +unsigned int paddr_bits __read_mostly = 36;
     1.8 +
     1.9  /**
    1.10   * mtrr_bp_init - initialize mtrrs on the boot CPU
    1.11   *
    1.12 @@ -620,17 +622,16 @@ void __init mtrr_bp_init(void)
    1.13  		   Intel will implement it to when they extend the address
    1.14  		   bus of the Xeon. */
    1.15  		if (cpuid_eax(0x80000000) >= 0x80000008) {
    1.16 -			u32 phys_addr;
    1.17 -			phys_addr = cpuid_eax(0x80000008) & 0xff;
    1.18 +			paddr_bits = cpuid_eax(0x80000008) & 0xff;
    1.19  			/* CPUID workaround for Intel 0F33/0F34 CPU */
    1.20  			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
    1.21  			    boot_cpu_data.x86 == 0xF &&
    1.22  			    boot_cpu_data.x86_model == 0x3 &&
    1.23  			    (boot_cpu_data.x86_mask == 0x3 ||
    1.24  			     boot_cpu_data.x86_mask == 0x4))
    1.25 -				phys_addr = 36;
    1.26 +				paddr_bits = 36;
    1.27  
    1.28 -			size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
    1.29 +			size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
    1.30  			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
    1.31  		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
    1.32  			   boot_cpu_data.x86 == 6) {
     2.1 --- a/xen/arch/x86/mm/guest_walk.c	Thu Oct 14 12:46:29 2010 +0100
     2.2 +++ b/xen/arch/x86/mm/guest_walk.c	Mon Oct 18 11:30:10 2010 +0100
     2.3 @@ -63,7 +63,7 @@ static uint32_t mandatory_flags(struct v
     2.4           && !(pfec & PFEC_user_mode) )
     2.5          pfec &= ~PFEC_write_access;
     2.6  
     2.7 -    return flags[(pfec & 0x1f) >> 1];
     2.8 +    return flags[(pfec & 0x1f) >> 1] | _PAGE_INVALID_BITS;
     2.9  }
    2.10  
    2.11  /* Modify a guest pagetable entry to set the Accessed and Dirty bits.
    2.12 @@ -131,17 +131,19 @@ guest_walk_tables(struct vcpu *v, struct
    2.13      guest_l3e_t *l3p = NULL;
    2.14      guest_l4e_t *l4p;
    2.15  #endif
    2.16 -    uint32_t gflags, mflags, rc = 0;
    2.17 +    uint32_t gflags, mflags, iflags, rc = 0;
    2.18      int pse;
    2.19  
    2.20      perfc_incr(guest_walk);
    2.21      memset(gw, 0, sizeof(*gw));
    2.22      gw->va = va;
    2.23  
    2.24 -    /* Mandatory bits that must be set in every entry.  We invert NX, to
    2.25 -     * calculate as if there were an "X" bit that allowed access. 
    2.26 -     * We will accumulate, in rc, the set of flags that are missing. */
    2.27 +    /* Mandatory bits that must be set in every entry.  We invert NX and
    2.28 +     * the invalid bits, to calculate as if there were an "X" bit that
    2.29 +     * allowed access.  We will accumulate, in rc, the set of flags that
    2.30 +     * are missing/unwanted. */
    2.31      mflags = mandatory_flags(v, pfec);
    2.32 +    iflags = (_PAGE_NX_BIT | _PAGE_INVALID_BITS);
    2.33  
    2.34  #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
    2.35  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
    2.36 @@ -150,7 +152,7 @@ guest_walk_tables(struct vcpu *v, struct
    2.37      gw->l4mfn = top_mfn;
    2.38      l4p = (guest_l4e_t *) top_map;
    2.39      gw->l4e = l4p[guest_l4_table_offset(va)];
    2.40 -    gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
    2.41 +    gflags = guest_l4e_get_flags(gw->l4e) ^ iflags;
    2.42      rc |= ((gflags & mflags) ^ mflags);
    2.43      if ( rc & _PAGE_PRESENT ) goto out;
    2.44  
    2.45 @@ -164,7 +166,7 @@ guest_walk_tables(struct vcpu *v, struct
    2.46          goto out;
    2.47      /* Get the l3e and check its flags*/
    2.48      gw->l3e = l3p[guest_l3_table_offset(va)];
    2.49 -    gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
    2.50 +    gflags = guest_l3e_get_flags(gw->l3e) ^ iflags;
    2.51      rc |= ((gflags & mflags) ^ mflags);
    2.52      if ( rc & _PAGE_PRESENT )
    2.53          goto out;
    2.54 @@ -201,7 +203,7 @@ guest_walk_tables(struct vcpu *v, struct
    2.55  
    2.56  #endif /* All levels... */
    2.57  
    2.58 -    gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
    2.59 +    gflags = guest_l2e_get_flags(gw->l2e) ^ iflags;
    2.60      rc |= ((gflags & mflags) ^ mflags);
    2.61      if ( rc & _PAGE_PRESENT )
    2.62          goto out;
    2.63 @@ -246,7 +248,7 @@ guest_walk_tables(struct vcpu *v, struct
    2.64          if(l1p == NULL)
    2.65              goto out;
    2.66          gw->l1e = l1p[guest_l1_table_offset(va)];
    2.67 -        gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
    2.68 +        gflags = guest_l1e_get_flags(gw->l1e) ^ iflags;
    2.69          rc |= ((gflags & mflags) ^ mflags);
    2.70      }
    2.71  
     3.1 --- a/xen/arch/x86/mm/hap/guest_walk.c	Thu Oct 14 12:46:29 2010 +0100
     3.2 +++ b/xen/arch/x86/mm/hap/guest_walk.c	Mon Oct 18 11:30:10 2010 +0100
     3.3 @@ -99,6 +99,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     3.4      if ( missing & _PAGE_PRESENT )
     3.5          pfec[0] &= ~PFEC_page_present;
     3.6  
     3.7 +    if ( missing & _PAGE_INVALID_BITS ) 
     3.8 +        pfec[0] |= PFEC_reserved_bit;
     3.9 +
    3.10      if ( missing & _PAGE_PAGED )
    3.11          pfec[0] = PFEC_page_paged;
    3.12  
     4.1 --- a/xen/arch/x86/mm/shadow/multi.c	Thu Oct 14 12:46:29 2010 +0100
     4.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Mon Oct 18 11:30:10 2010 +0100
     4.3 @@ -3181,6 +3181,8 @@ static int sh_page_fault(struct vcpu *v,
     4.4          perfc_incr(shadow_fault_bail_real_fault);
     4.5          SHADOW_PRINTK("not a shadow fault\n");
     4.6          reset_early_unshadow(v);
     4.7 +        if ( (rc & _PAGE_INVALID_BITS) )
     4.8 +            regs->error_code |= PFEC_reserved_bit;
     4.9          goto propagate;
    4.10      }
    4.11  
    4.12 @@ -3772,6 +3774,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l
    4.13  {
    4.14      walk_t gw;
    4.15      gfn_t gfn;
    4.16 +    uint32_t missing;
    4.17  
    4.18  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
    4.19      /* Check the vTLB cache first */
    4.20 @@ -3780,10 +3783,12 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l
    4.21          return vtlb_gfn;
    4.22  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
    4.23  
    4.24 -    if ( sh_walk_guest_tables(v, va, &gw, pfec[0]) != 0 )
    4.25 +    if ( (missing = sh_walk_guest_tables(v, va, &gw, pfec[0])) != 0 )
    4.26      {
    4.27 -        if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
    4.28 +        if ( (missing & _PAGE_PRESENT) )
    4.29              pfec[0] &= ~PFEC_page_present;
    4.30 +        if ( missing & _PAGE_INVALID_BITS )
    4.31 +            pfec[0] |= PFEC_reserved_bit;
    4.32          return INVALID_GFN;
    4.33      }
    4.34      gfn = guest_walk_to_gfn(&gw);
     5.1 --- a/xen/include/asm-x86/guest_pt.h	Thu Oct 14 12:46:29 2010 +0100
     5.2 +++ b/xen/include/asm-x86/guest_pt.h	Mon Oct 18 11:30:10 2010 +0100
     5.3 @@ -204,6 +204,17 @@ guest_supports_nx(struct vcpu *v)
     5.4  }
     5.5  
     5.6  
     5.7 +/* Some bits are invalid in any pagetable entry. */
     5.8 +#if GUEST_PAGING_LEVELS == 2
     5.9 +#define _PAGE_INVALID_BITS (0)
    5.10 +#elif GUEST_PAGING_LEVELS == 3
    5.11 +#define _PAGE_INVALID_BITS \
    5.12 +    get_pte_flags(((1ull<<63) - 1) & ~((1ull<<paddr_bits) - 1))
    5.13 +#else /* GUEST_PAGING_LEVELS == 4 */
    5.14 +#define _PAGE_INVALID_BITS \
    5.15 +    get_pte_flags(((1ull<<52) - 1) & ~((1ull<<paddr_bits) - 1))
    5.16 +#endif
    5.17 +
    5.18  
    5.19  /* Type used for recording a walk through guest pagetables.  It is
    5.20   * filled in by the pagetable walk function, and also used as a cache
     6.1 --- a/xen/include/asm-x86/processor.h	Thu Oct 14 12:46:29 2010 +0100
     6.2 +++ b/xen/include/asm-x86/processor.h	Mon Oct 18 11:30:10 2010 +0100
     6.3 @@ -196,6 +196,9 @@ extern int phys_proc_id[NR_CPUS];
     6.4  extern int cpu_core_id[NR_CPUS];
     6.5  extern int opt_cpu_info;
     6.6  
     6.7 +/* Maximum width of physical addresses supported by the hardware */
     6.8 +extern unsigned int paddr_bits;
     6.9 +
    6.10  extern void identify_cpu(struct cpuinfo_x86 *);
    6.11  extern void setup_clear_cpu_cap(unsigned int);
    6.12  extern void print_cpu_info(unsigned int cpu);