debuggers.hg

changeset 19988:06893fe1c399

x86: extend some of Intel's recent MCE work to also support AMD

At least the MSR handling for guests can easily be made shared between
the two vendors; likely a lot of the other code in mce_intel.c could
also be made common. The goal here, however, is to eliminate the
annoying guest-tried-to-modify-msr messages that result from enabling
the MCE code on the Linux side.

Additionally (in order for not having to make the same change twice to
basically identical code) the patch also merges
amd_{fam10,k8}_mcheck_init(), enables the former to also be used for
Fam11 (I'd suppose that Fam12 would also need to go here, but I have
no data to confirm that), and does some minor (mostly coding style for
the code moved around) adjustments.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Jul 15 16:21:12 2009 +0100 (2009-07-15)
parents 10d806ca5952
children 8368aff1d759
files xen/arch/x86/cpu/mcheck/amd_f10.c xen/arch/x86/cpu/mcheck/amd_k8.c xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/domain.c xen/arch/x86/hvm/hvm.c xen/arch/x86/traps.c xen/include/asm-x86/traps.h
line diff
     1.1 --- a/xen/arch/x86/cpu/mcheck/amd_f10.c	Wed Jul 15 15:33:05 2009 +0100
     1.2 +++ b/xen/arch/x86/cpu/mcheck/amd_f10.c	Wed Jul 15 16:21:12 2009 +0100
     1.3 @@ -82,45 +82,16 @@ amd_f10_handler(struct mc_info *mi, uint
     1.4  	return MCA_EXTINFO_LOCAL;
     1.5  }
     1.6  
     1.7 -
     1.8 -extern void k8_machine_check(struct cpu_user_regs *regs, long error_code);
     1.9 -
    1.10  /* AMD Family10 machine check */
    1.11  int amd_f10_mcheck_init(struct cpuinfo_x86 *c) 
    1.12  { 
    1.13 -	uint64_t value;
    1.14 -	uint32_t i;
    1.15 -	int cpu_nr;
    1.16 -
    1.17 -	if (!cpu_has(c, X86_FEATURE_MCA))
    1.18 +	if (!amd_k8_mcheck_init(c))
    1.19  		return 0;
    1.20  
    1.21 -	x86_mce_vector_register(k8_machine_check);
    1.22  	x86_mce_callback_register(amd_f10_handler);
    1.23 -	cpu_nr = smp_processor_id();
    1.24 -
    1.25 -	rdmsrl(MSR_IA32_MCG_CAP, value);
    1.26 -	if (value & MCG_CTL_P)	/* Control register present ? */
    1.27 -		wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
    1.28 -	nr_mce_banks = value & MCG_CAP_COUNT;
    1.29  
    1.30 -	for (i = 0; i < nr_mce_banks; i++) {
    1.31 -		switch (i) {
    1.32 -		case 4: /* Northbridge */
    1.33 -			/* Enable error reporting of all errors */
    1.34 -			wrmsrl(MSR_IA32_MC4_CTL, 0xffffffffffffffffULL);
    1.35 -			wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
    1.36 -			break;
    1.37 +	printk("CPU%i: AMD Family%xh machine check reporting enabled\n",
    1.38 +	       smp_processor_id(), c->x86);
    1.39  
    1.40 -		default:
    1.41 -			/* Enable error reporting of all errors */
    1.42 -			wrmsrl(MSR_IA32_MC0_CTL + 4 * i, 0xffffffffffffffffULL);
    1.43 -			wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
    1.44 -			break;
    1.45 -		}
    1.46 -	}
    1.47 -
    1.48 -	set_in_cr4(X86_CR4_MCE);
    1.49 -	printk("CPU%i: AMD Family10h machine check reporting enabled.\n", cpu_nr);
    1.50  	return 1;
    1.51  }
     2.1 --- a/xen/arch/x86/cpu/mcheck/amd_k8.c	Wed Jul 15 15:33:05 2009 +0100
     2.2 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c	Wed Jul 15 16:21:12 2009 +0100
     2.3 @@ -70,7 +70,7 @@
     2.4  
     2.5  
     2.6  /* Machine Check Handler for AMD K8 family series */
     2.7 -void k8_machine_check(struct cpu_user_regs *regs, long error_code)
     2.8 +static void k8_machine_check(struct cpu_user_regs *regs, long error_code)
     2.9  {
    2.10  	mcheck_cmn_handler(regs, error_code, mca_allbanks);
    2.11  }
    2.12 @@ -78,29 +78,30 @@ void k8_machine_check(struct cpu_user_re
    2.13  /* AMD K8 machine check */
    2.14  int amd_k8_mcheck_init(struct cpuinfo_x86 *c)
    2.15  {
    2.16 -	uint64_t value;
    2.17  	uint32_t i;
    2.18 -	int cpu_nr;
    2.19  
    2.20  	/* Check for PPro style MCA; our caller has confirmed MCE support. */
    2.21  	if (!cpu_has(c, X86_FEATURE_MCA))
    2.22  		return 0;
    2.23  
    2.24 +	mce_cap_init();
    2.25  	x86_mce_vector_register(k8_machine_check);
    2.26 -	cpu_nr = smp_processor_id();
    2.27 -
    2.28 -	rdmsrl(MSR_IA32_MCG_CAP, value);
    2.29 -	if (value & MCG_CTL_P)	/* Control register present ? */
    2.30 -		wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
    2.31 -	nr_mce_banks = value & MCG_CAP_COUNT;
    2.32  
    2.33  	for (i = 0; i < nr_mce_banks; i++) {
    2.34  		switch (i) {
    2.35  		case 4: /* Northbridge */
    2.36 -			/* Enable error reporting of all errors */
    2.37 -			wrmsrl(MSR_IA32_MC4_CTL, 0xffffffffffffffffULL);
    2.38 -			wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
    2.39 -			break;
    2.40 +			if (c->x86 == 0xf) {
    2.41 +				/*
    2.42 +				 * Enable error reporting of all errors except
    2.43 +				 * for GART TBL walk error reporting, which
    2.44 +				 * trips off incorrectly with IOMMU & 3ware &
    2.45 +				 * Cerberus.
    2.46 +				 */
    2.47 +				wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10));
    2.48 +				wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
    2.49 +				break;
    2.50 +			}
    2.51 +			/* fall through */
    2.52  
    2.53  		default:
    2.54  			/* Enable error reporting of all errors */
    2.55 @@ -111,7 +112,9 @@ int amd_k8_mcheck_init(struct cpuinfo_x8
    2.56  	}
    2.57  
    2.58  	set_in_cr4(X86_CR4_MCE);
    2.59 -	printk("CPU%i: AMD K8 machine check reporting enabled.\n", cpu_nr);
    2.60 +	if (c->x86 < 0x10 || c->x86 > 0x11)
    2.61 +		printk("CPU%i: AMD K8 machine check reporting enabled\n",
    2.62 +		       smp_processor_id());
    2.63  
    2.64  	return 1;
    2.65  }
     3.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Wed Jul 15 15:33:05 2009 +0100
     3.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Wed Jul 15 16:21:12 2009 +0100
     3.3 @@ -23,10 +23,12 @@
     3.4  #include "mce.h"
     3.5  
     3.6  int mce_disabled = 0;
     3.7 +invbool_param("mce", mce_disabled);
     3.8 +
     3.9  int is_mc_panic = 0;
    3.10  unsigned int nr_mce_banks;
    3.11  
    3.12 -EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
    3.13 +static uint64_t g_mcg_cap;
    3.14  
    3.15  static void intpose_init(void);
    3.16  static void mcinfo_clear(struct mc_info *);
    3.17 @@ -545,18 +547,17 @@ static int amd_mcheck_init(struct cpuinf
    3.18  		rc = amd_k7_mcheck_init(ci);
    3.19  		break;
    3.20  
    3.21 +	default:
    3.22 +		/* Assume that machine check support is available.
    3.23 +		 * The minimum provided support is at least the K8. */
    3.24  	case 0xf:
    3.25  		rc = amd_k8_mcheck_init(ci);
    3.26  		break;
    3.27  
    3.28  	case 0x10:
    3.29 +	case 0x11:
    3.30  		rc = amd_f10_mcheck_init(ci);
    3.31  		break;
    3.32 -
    3.33 -	default:
    3.34 -		/* Assume that machine check support is available.
    3.35 -		 * The minimum provided support is at least the K8. */
    3.36 -		rc = amd_k8_mcheck_init(ci);
    3.37  	}
    3.38  
    3.39  	return rc;
    3.40 @@ -633,19 +634,273 @@ void mcheck_init(struct cpuinfo_x86 *c)
    3.41  		    smp_processor_id());
    3.42  }
    3.43  
    3.44 +u64 mce_cap_init(void)
    3.45 +{
    3.46 +    u32 l, h;
    3.47 +    u64 value;
    3.48  
    3.49 -static void __init mcheck_disable(char *str)
    3.50 +    rdmsr(MSR_IA32_MCG_CAP, l, h);
    3.51 +    value = ((u64)h << 32) | l;
    3.52 +    /* For Guest vMCE usage */
    3.53 +    g_mcg_cap = value & ~MCG_CMCI_P;
    3.54 +
    3.55 +    if (l & MCG_CTL_P) /* Control register present ? */
    3.56 +        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
    3.57 +
    3.58 +    nr_mce_banks = l & MCG_CAP_COUNT;
    3.59 +    if ( nr_mce_banks > MAX_NR_BANKS )
    3.60 +    {
    3.61 +        printk(KERN_WARNING "MCE: exceed max mce banks\n");
    3.62 +        g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS;
    3.63 +    }
    3.64 +
    3.65 +    return value;
    3.66 +}
    3.67 +
    3.68 +/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
    3.69 +void mce_init_msr(struct domain *d)
    3.70  {
    3.71 -	mce_disabled = 1;
    3.72 +    d->arch.vmca_msrs.mcg_status = 0x0;
    3.73 +    d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
    3.74 +    d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0;
    3.75 +    d->arch.vmca_msrs.nr_injection = 0;
    3.76 +    memset(d->arch.vmca_msrs.mci_ctl, ~0,
    3.77 +           sizeof(d->arch.vmca_msrs.mci_ctl));
    3.78 +    INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
    3.79 +    spin_lock_init(&d->arch.vmca_msrs.lock);
    3.80  }
    3.81  
    3.82 -static void __init mcheck_enable(char *str)
    3.83 +int mce_rdmsr(u32 msr, u32 *lo, u32 *hi)
    3.84  {
    3.85 -	mce_disabled = 0;
    3.86 +    struct domain *d = current->domain;
    3.87 +    int ret = 1;
    3.88 +    unsigned int bank;
    3.89 +    struct bank_entry *entry = NULL;
    3.90 +
    3.91 +    *lo = *hi = 0x0;
    3.92 +    spin_lock(&d->arch.vmca_msrs.lock);
    3.93 +
    3.94 +    switch ( msr )
    3.95 +    {
    3.96 +    case MSR_IA32_MCG_STATUS:
    3.97 +        *lo = (u32)d->arch.vmca_msrs.mcg_status;
    3.98 +        *hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32);
    3.99 +        gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n", *lo, *hi);
   3.100 +        break;
   3.101 +    case MSR_IA32_MCG_CAP:
   3.102 +        *lo = (u32)d->arch.vmca_msrs.mcg_cap;
   3.103 +        *hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32);
   3.104 +        gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n", *lo, *hi);
   3.105 +        break;
   3.106 +    case MSR_IA32_MCG_CTL:
   3.107 +        *lo = (u32)d->arch.vmca_msrs.mcg_ctl;
   3.108 +        *hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32);
   3.109 +        gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n", *lo, *hi);
   3.110 +        break;
   3.111 +    case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
   3.112 +        bank = (msr - MSR_IA32_MC0_CTL) / 4;
   3.113 +        if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
   3.114 +        {
   3.115 +            gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank);
   3.116 +            ret = -1;
   3.117 +            break;
   3.118 +        }
   3.119 +        switch (msr & (MSR_IA32_MC0_CTL | 3))
   3.120 +        {
   3.121 +        case MSR_IA32_MC0_CTL:
   3.122 +            *lo = (u32)d->arch.vmca_msrs.mci_ctl[bank];
   3.123 +            *hi = (u32)(d->arch.vmca_msrs.mci_ctl[bank] >> 32);
   3.124 +            gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_CTL lo %x hi %x\n",
   3.125 +                     bank, *lo, *hi);
   3.126 +            break;
   3.127 +        case MSR_IA32_MC0_STATUS:
   3.128 +            /* Only error bank is read. Non-error banks simply return. */
   3.129 +            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
   3.130 +            {
   3.131 +                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   3.132 +                                   struct bank_entry, list);
   3.133 +                if (entry->bank == bank) {
   3.134 +                    *lo = entry->mci_status;
   3.135 +                    *hi = entry->mci_status >> 32;
   3.136 +                    gdprintk(XENLOG_DEBUG,
   3.137 +                             "MCE: rd MC%u_STATUS in vMCE# context "
   3.138 +                             "lo %x hi %x\n", bank, *lo, *hi);
   3.139 +                }
   3.140 +                else
   3.141 +                    entry = NULL;
   3.142 +            }
   3.143 +            if ( !entry )
   3.144 +                gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_STATUS\n", bank);
   3.145 +            break;
   3.146 +        case MSR_IA32_MC0_ADDR:
   3.147 +            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
   3.148 +            {
   3.149 +                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   3.150 +                                   struct bank_entry, list);
   3.151 +                if ( entry->bank == bank )
   3.152 +                {
   3.153 +                    *lo = entry->mci_addr;
   3.154 +                    *hi = entry->mci_addr >> 32;
   3.155 +                    gdprintk(XENLOG_DEBUG,
   3.156 +                             "MCE: rd MC%u_ADDR in vMCE# context lo %x hi %x\n",
   3.157 +                             bank, *lo, *hi);
   3.158 +                }
   3.159 +            }
   3.160 +            break;
   3.161 +        case MSR_IA32_MC0_MISC:
   3.162 +            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
   3.163 +            {
   3.164 +                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   3.165 +                                   struct bank_entry, list);
   3.166 +                if ( entry->bank == bank )
   3.167 +                {
   3.168 +                    *lo = entry->mci_misc;
   3.169 +                    *hi = entry->mci_misc >> 32;
   3.170 +                    gdprintk(XENLOG_DEBUG,
   3.171 +                             "MCE: rd MC%u_MISC in vMCE# context lo %x hi %x\n",
   3.172 +                             bank, *lo, *hi);
   3.173 +                }
   3.174 +            }
   3.175 +            break;
   3.176 +        }
   3.177 +        break;
   3.178 +    default:
   3.179 +        switch ( boot_cpu_data.x86_vendor )
   3.180 +        {
   3.181 +        case X86_VENDOR_INTEL:
   3.182 +            ret = intel_mce_rdmsr(msr, lo, hi);
   3.183 +            break;
   3.184 +        default:
   3.185 +            ret = 0;
   3.186 +            break;
   3.187 +        }
   3.188 +        break;
   3.189 +    }
   3.190 +
   3.191 +    spin_unlock(&d->arch.vmca_msrs.lock);
   3.192 +    return ret;
   3.193  }
   3.194  
   3.195 -custom_param("nomce", mcheck_disable);
   3.196 -custom_param("mce", mcheck_enable);
   3.197 +int mce_wrmsr(u32 msr, u64 value)
   3.198 +{
   3.199 +    struct domain *d = current->domain;
   3.200 +    struct bank_entry *entry = NULL;
   3.201 +    unsigned int bank;
   3.202 +    int ret = 1;
   3.203 +
   3.204 +    if ( !g_mcg_cap )
   3.205 +        return 0;
   3.206 +
   3.207 +    spin_lock(&d->arch.vmca_msrs.lock);
   3.208 +
   3.209 +    switch ( msr )
   3.210 +    {
   3.211 +    case MSR_IA32_MCG_CTL:
   3.212 +        if ( value && (value + 1) )
   3.213 +        {
   3.214 +            gdprintk(XENLOG_WARNING, "MCE: value written to MCG_CTL"
   3.215 +                     "should be all 0s or 1s\n");
   3.216 +            ret = -1;
   3.217 +            break;
   3.218 +        }
   3.219 +        d->arch.vmca_msrs.mcg_ctl = value;
   3.220 +        break;
   3.221 +    case MSR_IA32_MCG_STATUS:
   3.222 +        d->arch.vmca_msrs.mcg_status = value;
   3.223 +        gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", value);
   3.224 +        /* For HVM guest, this is the point for deleting vMCE injection node */
   3.225 +        if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) )
   3.226 +        {
   3.227 +            d->arch.vmca_msrs.nr_injection--; /* Should be 0 */
   3.228 +            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
   3.229 +            {
   3.230 +                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   3.231 +                    struct bank_entry, list);
   3.232 +                if ( entry->mci_status & MCi_STATUS_VAL )
   3.233 +                    gdprintk(XENLOG_ERR, "MCE: MCi_STATUS MSR should have "
   3.234 +                                "been cleared before write MCG_STATUS MSR\n");
   3.235 +
   3.236 +                gdprintk(XENLOG_DEBUG, "MCE: Delete HVM last injection "
   3.237 +                                "Node, nr_injection %u\n",
   3.238 +                                d->arch.vmca_msrs.nr_injection);
   3.239 +                list_del(&entry->list);
   3.240 +            }
   3.241 +            else
   3.242 +                gdprintk(XENLOG_DEBUG, "MCE: Not found HVM guest"
   3.243 +                    " last injection Node, something Wrong!\n");
   3.244 +        }
   3.245 +        break;
   3.246 +    case MSR_IA32_MCG_CAP:
   3.247 +        gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n");
   3.248 +        ret = -1;
   3.249 +        break;
   3.250 +    case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
   3.251 +        bank = (msr - MSR_IA32_MC0_CTL) / 4;
   3.252 +        if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
   3.253 +        {
   3.254 +            gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank);
   3.255 +            ret = -1;
   3.256 +            break;
   3.257 +        }
   3.258 +        switch ( msr & (MSR_IA32_MC0_CTL | 3) )
   3.259 +        {
   3.260 +        case MSR_IA32_MC0_CTL:
   3.261 +            if ( value && (value + 1) )
   3.262 +            {
   3.263 +                gdprintk(XENLOG_WARNING, "MCE: value written to MC%u_CTL"
   3.264 +                         "should be all 0s or 1s (is %"PRIx64")\n",
   3.265 +                         bank, value);
   3.266 +                ret = -1;
   3.267 +                break;
   3.268 +            }
   3.269 +            d->arch.vmca_msrs.mci_ctl[bank] = value;
   3.270 +            break;
   3.271 +        case MSR_IA32_MC0_STATUS:
   3.272 +            /* Give the first entry of the list, it corresponds to current
   3.273 +             * vMCE# injection. When vMCE# is finished processing by the
   3.274 +             * the guest, this node will be deleted.
   3.275 +             * Only error bank is written. Non-error banks simply return.
   3.276 +             */
   3.277 +            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
   3.278 +            {
   3.279 +                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   3.280 +                                   struct bank_entry, list);
   3.281 +                if ( entry->bank == bank )
   3.282 +                    entry->mci_status = value;
   3.283 +                gdprintk(XENLOG_DEBUG,
   3.284 +                         "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
   3.285 +                         bank, value);
   3.286 +            }
   3.287 +            else
   3.288 +                gdprintk(XENLOG_DEBUG,
   3.289 +                         "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, value);
   3.290 +            break;
   3.291 +        case MSR_IA32_MC0_ADDR:
   3.292 +            gdprintk(XENLOG_WARNING, "MCE: MC%u_ADDR is read-only\n", bank);
   3.293 +            ret = -1;
   3.294 +            break;
   3.295 +        case MSR_IA32_MC0_MISC:
   3.296 +            gdprintk(XENLOG_WARNING, "MCE: MC%u_MISC is read-only\n", bank);
   3.297 +            ret = -1;
   3.298 +            break;
   3.299 +        }
   3.300 +        break;
   3.301 +    default:
   3.302 +        switch ( boot_cpu_data.x86_vendor )
   3.303 +        {
   3.304 +        case X86_VENDOR_INTEL:
   3.305 +            ret = intel_mce_wrmsr(msr, value);
   3.306 +            break;
   3.307 +        default:
   3.308 +            ret = 0;
   3.309 +            break;
   3.310 +        }
   3.311 +        break;
   3.312 +    }
   3.313 +
   3.314 +    spin_unlock(&d->arch.vmca_msrs.lock);
   3.315 +    return ret;
   3.316 +}
   3.317  
   3.318  static void mcinfo_clear(struct mc_info *mi)
   3.319  {
     4.1 --- a/xen/arch/x86/cpu/mcheck/mce.h	Wed Jul 15 15:33:05 2009 +0100
     4.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h	Wed Jul 15 16:21:12 2009 +0100
     4.3 @@ -23,6 +23,11 @@ void intel_mcheck_timer(struct cpuinfo_x
     4.4  void mce_intel_feature_init(struct cpuinfo_x86 *c);
     4.5  void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
     4.6  
     4.7 +u64 mce_cap_init(void);
     4.8 +
     4.9 +int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
    4.10 +int intel_mce_wrmsr(u32 msr, u64 value);
    4.11 +
    4.12  int mce_available(struct cpuinfo_x86 *c);
    4.13  int mce_firstbank(struct cpuinfo_x86 *c);
    4.14  /* Helper functions used for collecting error telemetry */
     5.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Wed Jul 15 15:33:05 2009 +0100
     5.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Wed Jul 15 16:21:12 2009 +0100
     5.3 @@ -995,14 +995,9 @@ void mce_intel_feature_init(struct cpuin
     5.4      intel_init_cmci(c);
     5.5  }
     5.6  
     5.7 -static uint64_t g_mcg_cap;
     5.8 -static void mce_cap_init(struct cpuinfo_x86 *c)
     5.9 +static void _mce_cap_init(struct cpuinfo_x86 *c)
    5.10  {
    5.11 -    u32 l, h;
    5.12 -
    5.13 -    rdmsr (MSR_IA32_MCG_CAP, l, h);
    5.14 -    /* For Guest vMCE usage */
    5.15 -    g_mcg_cap = ((u64)h << 32 | l) & (~MCG_CMCI_P);
    5.16 +    u32 l = mce_cap_init();
    5.17  
    5.18      if ((l & MCG_CMCI_P) && cpu_has_apic)
    5.19          cmci_support = 1;
    5.20 @@ -1011,12 +1006,6 @@ static void mce_cap_init(struct cpuinfo_
    5.21      if (l & MCG_SER_P)
    5.22          ser_support = 1;
    5.23  
    5.24 -    nr_mce_banks = l & MCG_CAP_COUNT;
    5.25 -    if (nr_mce_banks > MAX_NR_BANKS)
    5.26 -    {
    5.27 -        printk(KERN_WARNING "MCE: exceed max mce banks\n");
    5.28 -        g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS;
    5.29 -    }
    5.30      if (l & MCG_EXT_P)
    5.31      {
    5.32          nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
    5.33 @@ -1052,9 +1041,6 @@ static void mce_init(void)
    5.34      }
    5.35  
    5.36      set_in_cr4(X86_CR4_MCE);
    5.37 -    rdmsr (MSR_IA32_MCG_CAP, l, h);
    5.38 -    if (l & MCG_CTL_P) /* Control register present ? */
    5.39 -        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
    5.40  
    5.41      for (i = firstbank; i < nr_mce_banks; i++)
    5.42      {
    5.43 @@ -1076,7 +1062,7 @@ static void mce_init(void)
    5.44  /* p4/p6 family have similar MCA initialization process */
    5.45  int intel_mcheck_init(struct cpuinfo_x86 *c)
    5.46  {
    5.47 -    mce_cap_init(c);
    5.48 +    _mce_cap_init(c);
    5.49      printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
    5.50              smp_processor_id());
    5.51  
    5.52 @@ -1094,220 +1080,39 @@ int intel_mcheck_init(struct cpuinfo_x86
    5.53      return 1;
    5.54  }
    5.55  
    5.56 -/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
    5.57 -void intel_mce_init_msr(struct domain *d)
    5.58 -{
    5.59 -    d->arch.vmca_msrs.mcg_status = 0x0;
    5.60 -    d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
    5.61 -    d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0;
    5.62 -    d->arch.vmca_msrs.nr_injection = 0;
    5.63 -    memset(d->arch.vmca_msrs.mci_ctl, ~0,
    5.64 -           sizeof(d->arch.vmca_msrs.mci_ctl));
    5.65 -    INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
    5.66 -    spin_lock_init(&d->arch.vmca_msrs.lock);
    5.67 -}
    5.68 -
    5.69  int intel_mce_wrmsr(u32 msr, u64 value)
    5.70  {
    5.71 -    struct domain *d = current->domain;
    5.72 -    struct bank_entry *entry = NULL;
    5.73 -    unsigned int bank;
    5.74      int ret = 1;
    5.75  
    5.76 -    spin_lock(&d->arch.vmca_msrs.lock);
    5.77 -    switch(msr)
    5.78 +    switch ( msr )
    5.79      {
    5.80 -    case MSR_IA32_MCG_CTL:
    5.81 -        if (value != (u64)~0x0 && value != 0x0) {
    5.82 -            gdprintk(XENLOG_WARNING, "MCE: value written to MCG_CTL"
    5.83 -                     "should be all 0s or 1s\n");
    5.84 -            ret = -1;
    5.85 -            break;
    5.86 -        }
    5.87 -        d->arch.vmca_msrs.mcg_ctl = value;
    5.88 -        break;
    5.89 -    case MSR_IA32_MCG_STATUS:
    5.90 -        d->arch.vmca_msrs.mcg_status = value;
    5.91 -        gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", value);
    5.92 -        /* For HVM guest, this is the point for deleting vMCE injection node */
    5.93 -        if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection >0) )
    5.94 -        {
    5.95 -            d->arch.vmca_msrs.nr_injection--; /* Should be 0 */
    5.96 -            if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
    5.97 -                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
    5.98 -                    struct bank_entry, list);
    5.99 -                if (entry->mci_status & MCi_STATUS_VAL)
   5.100 -                    gdprintk(XENLOG_ERR, "MCE: MCi_STATUS MSR should have "
   5.101 -                                "been cleared before write MCG_STATUS MSR\n");
   5.102 -
   5.103 -                gdprintk(XENLOG_DEBUG, "MCE: Delete HVM last injection "
   5.104 -                                "Node, nr_injection %u\n",
   5.105 -                                d->arch.vmca_msrs.nr_injection);
   5.106 -                list_del(&entry->list);
   5.107 -            }
   5.108 -            else
   5.109 -                gdprintk(XENLOG_DEBUG, "MCE: Not found HVM guest"
   5.110 -                    " last injection Node, something Wrong!\n");
   5.111 -        }
   5.112 -        break;
   5.113 -    case MSR_IA32_MCG_CAP:
   5.114 -        gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n");
   5.115 -        ret = -1;
   5.116 -        break;
   5.117      case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
   5.118          gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, "
   5.119                   "Guest should not write this MSR!\n");
   5.120          break;
   5.121 -    case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
   5.122 -        bank = (msr - MSR_IA32_MC0_CTL) / 4;
   5.123 -        if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) {
   5.124 -            gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank);
   5.125 -            ret = -1;
   5.126 -            break;
   5.127 -        }
   5.128 -        switch (msr & (MSR_IA32_MC0_CTL | 3))
   5.129 -        {
   5.130 -        case MSR_IA32_MC0_CTL:
   5.131 -            if (value != (u64)~0x0 && value != 0x0) {
   5.132 -                gdprintk(XENLOG_WARNING, "MCE: value written to MC%u_CTL"
   5.133 -                         "should be all 0s or 1s (is %"PRIx64")\n",
   5.134 -                         bank, value);
   5.135 -                ret = -1;
   5.136 -                break;
   5.137 -            }
   5.138 -            d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value;
   5.139 -            break;
   5.140 -        case MSR_IA32_MC0_STATUS:
   5.141 -            /* Give the first entry of the list, it corresponds to current
   5.142 -             * vMCE# injection. When vMCE# is finished processing by the
   5.143 -             * the guest, this node will be deleted.
   5.144 -             * Only error bank is written. Non-error banks simply return.
   5.145 -             */
   5.146 -            if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
   5.147 -                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   5.148 -                                   struct bank_entry, list);
   5.149 -                if ( entry->bank == bank )
   5.150 -                    entry->mci_status = value;
   5.151 -                gdprintk(XENLOG_DEBUG,
   5.152 -                         "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
   5.153 -                         bank, value);
   5.154 -            } else
   5.155 -                gdprintk(XENLOG_DEBUG,
   5.156 -                         "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, value);
   5.157 -            break;
   5.158 -        case MSR_IA32_MC0_ADDR:
   5.159 -            gdprintk(XENLOG_WARNING, "MCE: MC%u_ADDR is read-only\n", bank);
   5.160 -            ret = -1;
   5.161 -            break;
   5.162 -        case MSR_IA32_MC0_MISC:
   5.163 -            gdprintk(XENLOG_WARNING, "MCE: MC%u_MISC is read-only\n", bank);
   5.164 -            ret = -1;
   5.165 -            break;
   5.166 -        }
   5.167 -        break;
   5.168      default:
   5.169          ret = 0;
   5.170          break;
   5.171      }
   5.172 -    spin_unlock(&d->arch.vmca_msrs.lock);
   5.173 +
   5.174      return ret;
   5.175  }
   5.176  
   5.177  int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi)
   5.178  {
   5.179 -    struct domain *d = current->domain;
   5.180      int ret = 1;
   5.181 -    unsigned int bank;
   5.182 -    struct bank_entry *entry = NULL;
   5.183  
   5.184 -    *lo = *hi = 0x0;
   5.185 -    spin_lock(&d->arch.vmca_msrs.lock);
   5.186 -    switch(msr)
   5.187 +    switch ( msr )
   5.188      {
   5.189 -    case MSR_IA32_MCG_STATUS:
   5.190 -        *lo = (u32)d->arch.vmca_msrs.mcg_status;
   5.191 -        *hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32);
   5.192 -        gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n", *lo, *hi);
   5.193 -        break;
   5.194 -    case MSR_IA32_MCG_CAP:
   5.195 -        *lo = (u32)d->arch.vmca_msrs.mcg_cap;
   5.196 -        *hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32);
   5.197 -        gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n", *lo, *hi);
   5.198 -        break;
   5.199 -    case MSR_IA32_MCG_CTL:
   5.200 -        *lo = (u32)d->arch.vmca_msrs.mcg_ctl;
   5.201 -        *hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32);
   5.202 -        gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n", *lo, *hi);
   5.203 -        break;
   5.204      case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
   5.205          gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, "
   5.206                   "Guest should not read this MSR!\n");
   5.207          break;
   5.208 -    case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
   5.209 -        bank = (msr - MSR_IA32_MC0_CTL) / 4;
   5.210 -        if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) {
   5.211 -            gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank);
   5.212 -            ret = -1;
   5.213 -            break;
   5.214 -        }
   5.215 -        switch (msr & (MSR_IA32_MC0_CTL | 3))
   5.216 -        {
   5.217 -        case MSR_IA32_MC0_CTL:
   5.218 -            *lo = (u32)d->arch.vmca_msrs.mci_ctl[bank];
   5.219 -            *hi = (u32)(d->arch.vmca_msrs.mci_ctl[bank] >> 32);
   5.220 -            gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_CTL lo %x hi %x\n",
   5.221 -                     bank, *lo, *hi);
   5.222 -            break;
   5.223 -        case MSR_IA32_MC0_STATUS:
   5.224 -            /* Only error bank is read. Non-error banks simply return. */
   5.225 -            if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
   5.226 -                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   5.227 -                                   struct bank_entry, list);
   5.228 -                if (entry->bank == bank) {
   5.229 -                    *lo = entry->mci_status;
   5.230 -                    *hi = entry->mci_status >> 32;
   5.231 -                    gdprintk(XENLOG_DEBUG,
   5.232 -                             "MCE: rd MC%u_STATUS in vmCE# context "
   5.233 -                             "lo %x hi %x\n", bank, *lo, *hi);
   5.234 -                } else
   5.235 -                    entry = NULL;
   5.236 -            }
   5.237 -            if (!entry)
   5.238 -                gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_STATUS\n", bank);
   5.239 -            break;
   5.240 -        case MSR_IA32_MC0_ADDR:
   5.241 -            if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
   5.242 -                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   5.243 -                                   struct bank_entry, list);
   5.244 -                if (entry->bank == bank) {
   5.245 -                    *lo = entry->mci_addr;
   5.246 -                    *hi = entry->mci_addr >> 32;
   5.247 -                    gdprintk(XENLOG_DEBUG,
   5.248 -                             "MCE: rd MC%u_ADDR in vMCE# context lo %x hi %x\n",
   5.249 -                             bank, *lo, *hi);
   5.250 -                }
   5.251 -            }
   5.252 -            break;
   5.253 -        case MSR_IA32_MC0_MISC:
   5.254 -            if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
   5.255 -                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
   5.256 -                                   struct bank_entry, list);
   5.257 -                if (entry->bank == bank) {
   5.258 -                    *lo = entry->mci_misc;
   5.259 -                    *hi = entry->mci_misc >> 32;
   5.260 -                    gdprintk(XENLOG_DEBUG,
   5.261 -                             "MCE: rd MC%u_MISC in vMCE# context lo %x hi %x\n",
   5.262 -                             bank, *lo, *hi);
   5.263 -                }
   5.264 -            }
   5.265 -            break;
   5.266 -        }
   5.267 -        break;
   5.268      default:
   5.269          ret = 0;
   5.270          break;
   5.271      }
   5.272 -    spin_unlock(&d->arch.vmca_msrs.lock);
   5.273 +
   5.274      return ret;
   5.275  }
   5.276  
     6.1 --- a/xen/arch/x86/domain.c	Wed Jul 15 15:33:05 2009 +0100
     6.2 +++ b/xen/arch/x86/domain.c	Wed Jul 15 16:21:12 2009 +0100
     6.3 @@ -484,8 +484,7 @@ int arch_domain_create(struct domain *d,
     6.4              goto fail;
     6.5  
     6.6          /* For Guest vMCE MSRs virtualization */
     6.7 -        if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
     6.8 -            intel_mce_init_msr(d);
     6.9 +        mce_init_msr(d);
    6.10      }
    6.11  
    6.12      if ( is_hvm_domain(d) )
     7.1 --- a/xen/arch/x86/hvm/hvm.c	Wed Jul 15 15:33:05 2009 +0100
     7.2 +++ b/xen/arch/x86/hvm/hvm.c	Wed Jul 15 16:21:12 2009 +0100
     7.3 @@ -43,6 +43,7 @@
     7.4  #include <asm/processor.h>
     7.5  #include <asm/types.h>
     7.6  #include <asm/msr.h>
     7.7 +#include <asm/traps.h>
     7.8  #include <asm/mc146818rtc.h>
     7.9  #include <asm/spinlock.h>
    7.10  #include <asm/hvm/hvm.h>
    7.11 @@ -1773,8 +1774,6 @@ void hvm_rdtsc_intercept(struct cpu_user
    7.12      regs->edx = (uint32_t)(tsc >> 32);
    7.13  }
    7.14  
    7.15 -extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
    7.16 -extern int intel_mce_wrmsr(u32 msr, u64 value);
    7.17  int hvm_msr_read_intercept(struct cpu_user_regs *regs)
    7.18  {
    7.19      uint32_t ecx = regs->ecx;
    7.20 @@ -1852,7 +1851,7 @@ int hvm_msr_read_intercept(struct cpu_us
    7.21           break;
    7.22  
    7.23      default:
    7.24 -        ret = intel_mce_rdmsr(ecx, &lo, &hi);
    7.25 +        ret = mce_rdmsr(ecx, &lo, &hi);
    7.26          if ( ret < 0 )
    7.27              goto gp_fault;
    7.28          else if ( ret )
    7.29 @@ -1951,7 +1950,7 @@ int hvm_msr_write_intercept(struct cpu_u
    7.30          break;
    7.31  
    7.32      default:
    7.33 -        ret = intel_mce_wrmsr(ecx, msr_content);
    7.34 +        ret = mce_wrmsr(ecx, msr_content);
    7.35          if ( ret < 0 )
    7.36              goto gp_fault;
    7.37          else if ( ret )
     8.1 --- a/xen/arch/x86/traps.c	Wed Jul 15 15:33:05 2009 +0100
     8.2 +++ b/xen/arch/x86/traps.c	Wed Jul 15 16:21:12 2009 +0100
     8.3 @@ -1680,7 +1680,8 @@ static int emulate_privileged_op(struct 
     8.4      unsigned long *reg, eip = regs->eip, res;
     8.5      u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0;
     8.6      enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
     8.7 -    unsigned int port, i, data_sel, ar, data, rc, bpmatch = 0;
     8.8 +    int rc;
     8.9 +    unsigned int port, i, data_sel, ar, data, bpmatch = 0;
    8.10      unsigned int op_bytes, op_default, ad_bytes, ad_default;
    8.11  #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
    8.12                      ? regs->reg \
    8.13 @@ -2245,14 +2246,12 @@ static int emulate_privileged_op(struct 
    8.14          default:
    8.15              if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
    8.16                  break;
    8.17 -            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
    8.18 -            {
    8.19 -                int rc = intel_mce_wrmsr(regs->ecx, res);
    8.20 -                if ( rc < 0 )
    8.21 -                    goto fail;
    8.22 -                if ( rc )
    8.23 -                    break;
    8.24 -            }
    8.25 +
    8.26 +            rc = mce_wrmsr(regs->ecx, res);
    8.27 +            if ( rc < 0 )
    8.28 +                goto fail;
    8.29 +            if ( rc )
    8.30 +                break;
    8.31  
    8.32              if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
    8.33                   (eax != l) || (edx != h) )
    8.34 @@ -2334,15 +2333,11 @@ static int emulate_privileged_op(struct 
    8.35                  break;
    8.36              }
    8.37  
    8.38 -            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
    8.39 -            {
    8.40 -                int rc = intel_mce_rdmsr(regs->ecx, &l, &h);
    8.41 -
    8.42 -                if ( rc < 0 )
    8.43 -                    goto fail;
    8.44 -                if ( rc )
    8.45 -                    goto rdmsr_writeback;
    8.46 -            }
    8.47 +            rc = mce_rdmsr(regs->ecx, &l, &h);
    8.48 +            if ( rc < 0 )
    8.49 +                goto fail;
    8.50 +            if ( rc )
    8.51 +                goto rdmsr_writeback;
    8.52  
    8.53              /* Everyone can read the MSR space. */
    8.54              /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n",
     9.1 --- a/xen/include/asm-x86/traps.h	Wed Jul 15 15:33:05 2009 +0100
     9.2 +++ b/xen/include/asm-x86/traps.h	Wed Jul 15 16:21:12 2009 +0100
     9.3 @@ -47,9 +47,9 @@ extern int guest_has_trap_callback(struc
     9.4  extern int send_guest_trap(struct domain *d, uint16_t vcpuid,
     9.5  				unsigned int trap_nr);
     9.6  
     9.7 -/* Intel vMCE MSRs virtualization */
     9.8 -extern void intel_mce_init_msr(struct domain *d);
     9.9 -extern int intel_mce_wrmsr(u32 msr, u64 value);
    9.10 -extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
    9.11 +/* Guest vMCE MSRs virtualization */
    9.12 +extern void mce_init_msr(struct domain *d);
    9.13 +extern int mce_wrmsr(u32 msr, u64 value);
    9.14 +extern int mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
    9.15  
    9.16  #endif /* ASM_TRAP_H */