debuggers.hg

changeset 20908:7310235f74f8

x86 mca: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.

a) For Mci_CTL MSR, Guest can write any value to it. When read back,
it will be ANDed with the physical value. Some bit in physical value
can be 0, either because read-only in hardware (like masked by AMD's
Mci_CTL_MASK), or because Xen didn't enable it.
If guest write some bit as 0, while that bit is 1 in host, we will
not inject MCE corresponding that bank to guest, as we can't
distinguish if the MCE is caused by the guest-cleared bit.

b) For MCG_CTL MSR, guest can write any value to it. When read back,
it will be ANDed with the physical value.
If guest does not write all 1s. In mca_ctl_conflict(), we simply
not inject any vMCE to guest if some bit is set in physical MSR
while is cleared in guest 's vMCG_CTL MSR.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jan 29 06:48:00 2010 +0000 (2010-01-29)
parents f85120520509
children 805eae786b50
files xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c
line diff
     1.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Fri Jan 29 06:47:24 2010 +0000
     1.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Fri Jan 29 06:48:00 2010 +0000
     1.3 @@ -31,6 +31,11 @@ unsigned int nr_mce_banks;
     1.4  
     1.5  static uint64_t g_mcg_cap;
     1.6  
     1.7 +/* Real value in physical CTL MSR */
     1.8 +static uint64_t h_mcg_ctl = 0UL;
     1.9 +static uint64_t *h_mci_ctrl;
    1.10 +int firstbank;
    1.11 +
    1.12  static void intpose_init(void);
    1.13  static void mcinfo_clear(struct mc_info *);
    1.14  
    1.15 @@ -642,6 +647,21 @@ void mcheck_init(struct cpuinfo_x86 *c)
    1.16  		break;
    1.17  	}
    1.18  
    1.19 +    if ( !h_mci_ctrl )
    1.20 +    {
    1.21 +        h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks);
    1.22 +        if (!h_mci_ctrl)
    1.23 +        {
    1.24 +            dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n");
    1.25 +            return;
    1.26 +        }
    1.27 +        /* Don't care banks before firstbank */
    1.28 +        memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl));
    1.29 +        for (i = firstbank; i < nr_mce_banks; i++)
    1.30 +            rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]);
    1.31 +    }
    1.32 +    if (g_mcg_cap & MCG_CTL_P)
    1.33 +        rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl);
    1.34      set_poll_bankmask(c);
    1.35  	if (!inited)
    1.36  		printk(XENLOG_INFO "CPU%i: No machine check initialization\n",
    1.37 @@ -708,7 +728,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va
    1.38              *val);
    1.39          break;
    1.40      case MSR_IA32_MCG_CTL:
    1.41 -        *val = d->arch.vmca_msrs.mcg_ctl;
    1.42 +        /* Always 0 if no CTL support */
    1.43 +        *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl;
    1.44          mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n",
    1.45              *val);
    1.46          break;
    1.47 @@ -723,7 +744,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va
    1.48          switch (msr & (MSR_IA32_MC0_CTL | 3))
    1.49          {
    1.50          case MSR_IA32_MC0_CTL:
    1.51 -            *val = d->arch.vmca_msrs.mci_ctl[bank];
    1.52 +            *val = d->arch.vmca_msrs.mci_ctl[bank] &
    1.53 +                    (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
    1.54              mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n",
    1.55                       bank, *val);
    1.56              break;
    1.57 @@ -805,13 +827,6 @@ int mce_wrmsr(u32 msr, u64 val)
    1.58      switch ( msr )
    1.59      {
    1.60      case MSR_IA32_MCG_CTL:
    1.61 -        if ( val && (val + 1) )
    1.62 -        {
    1.63 -            mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written "
    1.64 -                     "to MCG_CTL should be all 0s or 1s\n", val);
    1.65 -            ret = -1;
    1.66 -            break;
    1.67 -        }
    1.68          d->arch.vmca_msrs.mcg_ctl = val;
    1.69          break;
    1.70      case MSR_IA32_MCG_STATUS:
    1.71 @@ -855,14 +870,6 @@ int mce_wrmsr(u32 msr, u64 val)
    1.72          switch ( msr & (MSR_IA32_MC0_CTL | 3) )
    1.73          {
    1.74          case MSR_IA32_MC0_CTL:
    1.75 -            if ( val && (val + 1) )
    1.76 -            {
    1.77 -                mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL "
    1.78 -                         "should be all 0s or 1s (is %"PRIx64")\n",
    1.79 -                         bank, val);
    1.80 -                ret = -1;
    1.81 -                break;
    1.82 -            }
    1.83              d->arch.vmca_msrs.mci_ctl[bank] = val;
    1.84              break;
    1.85          case MSR_IA32_MC0_STATUS:
    1.86 @@ -1162,6 +1169,23 @@ void intpose_inval(unsigned int cpu_nr, 
    1.87      (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \
    1.88      ((r) - MSR_IA32_MC0_CTL) % 4 != 0)	/* excludes MCi_CTL */
    1.89  
    1.90 +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d)
    1.91 +{
    1.92 +    int bank_nr;
    1.93 +
    1.94 +    if ( !bank || !d || !h_mci_ctrl )
    1.95 +        return 1;
    1.96 +
    1.97 +    /* Will MCE happen in host if If host mcg_ctl is 0? */
    1.98 +    if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl )
    1.99 +        return 1;
   1.100 +
   1.101 +    bank_nr = bank->mc_bank;
   1.102 +    if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
   1.103 +        return 1;
   1.104 +    return 0;
   1.105 +}
   1.106 +
   1.107  static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
   1.108  {
   1.109  	struct cpuinfo_x86 *c;
     2.1 --- a/xen/arch/x86/cpu/mcheck/mce.h	Fri Jan 29 06:47:24 2010 +0000
     2.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h	Fri Jan 29 06:48:00 2010 +0000
     2.3 @@ -39,6 +39,8 @@ void mce_intel_feature_init(struct cpuin
     2.4  void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
     2.5  
     2.6  u64 mce_cap_init(void);
     2.7 +extern int firstbank;
     2.8 +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d);
     2.9  
    2.10  int intel_mce_rdmsr(uint32_t msr, uint64_t *val);
    2.11  int intel_mce_wrmsr(uint32_t msr, uint64_t val);
     3.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Jan 29 06:47:24 2010 +0000
     3.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Jan 29 06:48:00 2010 +0000
     3.3 @@ -20,7 +20,6 @@ int cmci_support = 0;
     3.4  int ser_support = 0;
     3.5  
     3.6  static int nr_intel_ext_msrs = 0;
     3.7 -static int firstbank;
     3.8  
     3.9  /* Below are for MCE handling */
    3.10  struct mce_softirq_barrier {
    3.11 @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci
    3.12                         *  the mfn in question) */
    3.13                        BUG_ON( result->owner == DOMID_COW );
    3.14                        if ( result->owner != DOMID_XEN ) {
    3.15 +
    3.16                            d = get_domain_by_id(result->owner);
    3.17 +                          if ( mca_ctl_conflict(bank, d) )
    3.18 +                          {
    3.19 +                              /* Guest has different MCE ctl with hypervisor */
    3.20 +                              put_domain(d);
    3.21 +                              return;
    3.22 +                          }
    3.23 +
    3.24                            gfn =
    3.25                                mfn_to_gmfn(d, ((bank->mc_addr) >> PAGE_SHIFT));
    3.26                            bank->mc_addr =