debuggers.hg
changeset 18976:4d5203f95498
Enable CMCI for Intel CPUs
Signed-off-by Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by Liping Ke <liping.ke@intel.com>
Signed-off-by Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by Liping Ke <liping.ke@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Mon Dec 22 08:12:33 2008 +0000 (2008-12-22) |
parents | 2dffa6ceb0af |
children | aa0fee8a6ef5 |
files | xen/arch/x86/apic.c xen/arch/x86/cpu/mcheck/Makefile xen/arch/x86/cpu/mcheck/k7.c xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/cpu/mcheck/non-fatal.c xen/arch/x86/cpu/mcheck/p4.c xen/arch/x86/cpu/mcheck/p6.c xen/arch/x86/cpu/mcheck/x86_mca.h xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/i8259.c xen/arch/x86/smpboot.c xen/common/stop_machine.c xen/include/asm-x86/apicdef.h xen/include/asm-x86/config.h xen/include/asm-x86/irq.h xen/include/asm-x86/mach-default/irq_vectors.h xen/include/asm-x86/msr-index.h xen/include/asm-x86/smp.h xen/include/public/arch-x86/xen-mca.h xen/include/xen/stop_machine.h |
line diff
1.1 --- a/xen/arch/x86/apic.c Fri Dec 19 14:56:36 2008 +0000 1.2 +++ b/xen/arch/x86/apic.c Mon Dec 22 08:12:33 2008 +0000 1.3 @@ -99,8 +99,11 @@ void __init apic_intr_init(void) 1.4 /* Performance Counters Interrupt */ 1.5 set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt); 1.6 1.7 - /* thermal monitor LVT interrupt */ 1.8 -#ifdef CONFIG_X86_MCE_P4THERMAL 1.9 + /* CMCI Correctable Machine Check Interrupt */ 1.10 + set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt); 1.11 + 1.12 + /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/ 1.13 +#ifdef CONFIG_X86_MCE_THERMAL 1.14 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 1.15 #endif 1.16 } 1.17 @@ -172,12 +175,17 @@ void clear_local_APIC(void) 1.18 } 1.19 1.20 /* lets not touch this if we didn't frob it */ 1.21 -#ifdef CONFIG_X86_MCE_P4THERMAL 1.22 +#ifdef CONFIG_X86_MCE_THERMAL 1.23 if (maxlvt >= 5) { 1.24 v = apic_read(APIC_LVTTHMR); 1.25 apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); 1.26 } 1.27 #endif 1.28 + 1.29 + if (maxlvt >= 6) { 1.30 + v = apic_read(APIC_CMCI); 1.31 + apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED); 1.32 + } 1.33 /* 1.34 * Clean APIC state for other OSs: 1.35 */ 1.36 @@ -189,10 +197,13 @@ void clear_local_APIC(void) 1.37 if (maxlvt >= 4) 1.38 apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); 1.39 1.40 -#ifdef CONFIG_X86_MCE_P4THERMAL 1.41 +#ifdef CONFIG_X86_MCE_THERMAL 1.42 if (maxlvt >= 5) 1.43 apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); 1.44 #endif 1.45 + if (maxlvt >= 6) 1.46 + apic_write_around(APIC_CMCI, APIC_LVT_MASKED); 1.47 + 1.48 v = GET_APIC_VERSION(apic_read(APIC_LVR)); 1.49 if (APIC_INTEGRATED(v)) { /* !82489DX */ 1.50 if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ 1.51 @@ -597,6 +608,7 @@ static struct { 1.52 unsigned int apic_spiv; 1.53 unsigned int apic_lvtt; 1.54 unsigned int apic_lvtpc; 1.55 + unsigned int apic_lvtcmci; 1.56 unsigned int apic_lvt0; 1.57 unsigned int apic_lvt1; 1.58 unsigned int apic_lvterr; 1.59 @@ -608,7 +620,7 @@ static struct { 1.60 int lapic_suspend(void) 1.61 { 1.62 unsigned long flags; 1.63 - 1.64 + int maxlvt = get_maxlvt(); 1.65 if (!apic_pm_state.active) 1.66 return 0; 1.67 1.68 @@ -620,6 +632,11 @@ int lapic_suspend(void) 1.69 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 1.70 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 1.71 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 1.72 + 1.73 + if (maxlvt >= 6) { 1.74 + apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI); 1.75 + } 1.76 + 1.77 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 1.78 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 1.79 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1.80 @@ -637,6 +654,7 @@ int lapic_resume(void) 1.81 { 1.82 unsigned int l, h; 1.83 unsigned long flags; 1.84 + int maxlvt = get_maxlvt(); 1.85 1.86 if (!apic_pm_state.active) 1.87 return 0; 1.88 @@ -669,6 +687,11 @@ int lapic_resume(void) 1.89 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1.90 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1.91 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1.92 + 1.93 + if (maxlvt >= 6) { 1.94 + apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci); 1.95 + } 1.96 + 1.97 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 1.98 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 1.99 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2.1 --- a/xen/arch/x86/cpu/mcheck/Makefile Fri Dec 19 14:56:36 2008 +0000 2.2 +++ b/xen/arch/x86/cpu/mcheck/Makefile Mon Dec 22 08:12:33 2008 +0000 2.3 @@ -3,8 +3,7 @@ obj-y += k7.o 2.4 obj-y += amd_k8.o 2.5 obj-y += amd_f10.o 2.6 obj-y += mce.o 2.7 +obj-y += mce_intel.o 2.8 obj-y += non-fatal.o 2.9 -obj-y += p4.o 2.10 obj-$(x86_32) += p5.o 2.11 -obj-$(x86_32) += p6.o 2.12 obj-$(x86_32) += winchip.o
3.1 --- a/xen/arch/x86/cpu/mcheck/k7.c Fri Dec 19 14:56:36 2008 +0000 3.2 +++ b/xen/arch/x86/cpu/mcheck/k7.c Mon Dec 22 08:12:33 2008 +0000 3.3 @@ -14,6 +14,7 @@ 3.4 #include <asm/msr.h> 3.5 3.6 #include "mce.h" 3.7 +#include "x86_mca.h" 3.8 3.9 /* Machine Check Handler For AMD Athlon/Duron */ 3.10 static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_code)
4.1 --- a/xen/arch/x86/cpu/mcheck/mce.c Fri Dec 19 14:56:36 2008 +0000 4.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Dec 22 08:12:33 2008 +0000 4.3 @@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non- 4.4 * to physical cpus present in the machine. 4.5 * The more physical cpus are available, the more entries you need. 4.6 */ 4.7 -#define MAX_MCINFO 10 4.8 +#define MAX_MCINFO 20 4.9 4.10 struct mc_machine_notify { 4.11 struct mc_info mc; 4.12 @@ -110,6 +110,22 @@ static void amd_mcheck_init(struct cpuin 4.13 } 4.14 } 4.15 4.16 +/*check the existence of Machine Check*/ 4.17 +int mce_available(struct cpuinfo_x86 *c) 4.18 +{ 4.19 + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 4.20 +} 4.21 + 4.22 +/*Make sure there are no machine check on offlined or suspended CPUs*/ 4.23 +void mce_disable_cpu(void) 4.24 +{ 4.25 + if (!mce_available(¤t_cpu_data) || mce_disabled == 1) 4.26 + return; 4.27 + printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id()); 4.28 + clear_in_cr4(X86_CR4_MCE); 4.29 +} 4.30 + 4.31 + 4.32 /* This has to be run for each processor */ 4.33 void mcheck_init(struct cpuinfo_x86 *c) 4.34 { 4.35 @@ -135,11 +151,13 @@ void mcheck_init(struct cpuinfo_x86 *c) 4.36 #ifndef CONFIG_X86_64 4.37 if (c->x86==5) 4.38 intel_p5_mcheck_init(c); 4.39 - if (c->x86==6) 4.40 - intel_p6_mcheck_init(c); 4.41 #endif 4.42 - if (c->x86==15) 4.43 - intel_p4_mcheck_init(c); 4.44 + /*If it is P6 or P4 family, including CORE 2 DUO series*/ 4.45 + if (c->x86 == 6 || c->x86==15) 4.46 + { 4.47 + printk(KERN_DEBUG "MCE: Intel newly family MC Init\n"); 4.48 + intel_mcheck_init(c); 4.49 + } 4.50 break; 4.51 4.52 #ifndef CONFIG_X86_64 4.53 @@ -413,7 +431,7 @@ void x86_mcinfo_dump(struct mc_info *mi) 4.54 if (mic == NULL) 4.55 return; 4.56 if (mic->type != MC_TYPE_BANK) 4.57 - continue; 4.58 + goto next; 4.59 4.60 mc_bank = (struct mcinfo_bank *)mic; 4.61 4.62 @@ -426,6 +444,7 @@ void x86_mcinfo_dump(struct mc_info *mi) 4.63 printk(" at %16"PRIx64, mc_bank->mc_addr); 4.64 4.65 printk("\n"); 4.66 +next: 4.67 mic = x86_mcinfo_next(mic); /* next entry */ 4.68 if ((mic == NULL) || (mic->size == 0)) 4.69 break;
5.1 --- a/xen/arch/x86/cpu/mcheck/mce.h Fri Dec 19 14:56:36 2008 +0000 5.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Dec 22 08:12:33 2008 +0000 5.3 @@ -1,14 +1,22 @@ 5.4 #include <xen/init.h> 5.5 +#include <asm/types.h> 5.6 #include <asm/traps.h> 5.7 +#include <asm/atomic.h> 5.8 +#include <asm/percpu.h> 5.9 + 5.10 5.11 /* Init functions */ 5.12 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); 5.13 void amd_k7_mcheck_init(struct cpuinfo_x86 *c); 5.14 void amd_k8_mcheck_init(struct cpuinfo_x86 *c); 5.15 void amd_f10_mcheck_init(struct cpuinfo_x86 *c); 5.16 -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); 5.17 + 5.18 + 5.19 +void intel_mcheck_timer(struct cpuinfo_x86 *c); 5.20 void intel_p5_mcheck_init(struct cpuinfo_x86 *c); 5.21 -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); 5.22 +void intel_mcheck_init(struct cpuinfo_x86 *c); 5.23 +void mce_intel_feature_init(struct cpuinfo_x86 *c); 5.24 + 5.25 void winchip_mcheck_init(struct cpuinfo_x86 *c); 5.26 5.27 /* Function pointer used in the handlers to collect additional information 5.28 @@ -19,6 +27,7 @@ extern int (*mc_callback_bank_extended)( 5.29 uint16_t bank, uint64_t status); 5.30 5.31 5.32 +int mce_available(struct cpuinfo_x86 *c); 5.33 /* Helper functions used for collecting error telemetry */ 5.34 struct mc_info *x86_mcinfo_getptr(void); 5.35 void x86_mcinfo_clear(struct mc_info *mi); 5.36 @@ -26,6 +35,3 @@ int x86_mcinfo_add(struct mc_info *mi, v 5.37 void x86_mcinfo_dump(struct mc_info *mi); 5.38 void mc_panic(char *s); 5.39 5.40 -/* Global variables */ 5.41 -extern int mce_disabled; 5.42 -extern unsigned int nr_mce_banks;
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Dec 22 08:12:33 2008 +0000 6.3 @@ -0,0 +1,681 @@ 6.4 +#include <xen/init.h> 6.5 +#include <xen/types.h> 6.6 +#include <xen/irq.h> 6.7 +#include <xen/event.h> 6.8 +#include <xen/kernel.h> 6.9 +#include <xen/smp.h> 6.10 +#include <asm/processor.h> 6.11 +#include <asm/system.h> 6.12 +#include <asm/msr.h> 6.13 +#include "mce.h" 6.14 +#include "x86_mca.h" 6.15 + 6.16 +DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); 6.17 + 6.18 +static int nr_intel_ext_msrs = 0; 6.19 +static int cmci_support = 0; 6.20 +extern int firstbank; 6.21 + 6.22 +#ifdef CONFIG_X86_MCE_THERMAL 6.23 +static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) 6.24 +{ 6.25 + printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n", 6.26 + smp_processor_id()); 6.27 + add_taint(TAINT_MACHINE_CHECK); 6.28 +} 6.29 + 6.30 +/* P4/Xeon Thermal transition interrupt handler */ 6.31 +static void intel_thermal_interrupt(struct cpu_user_regs *regs) 6.32 +{ 6.33 + u32 l, h; 6.34 + unsigned int cpu = smp_processor_id(); 6.35 + static s_time_t next[NR_CPUS]; 6.36 + 6.37 + ack_APIC_irq(); 6.38 + if (NOW() < next[cpu]) 6.39 + return; 6.40 + 6.41 + next[cpu] = NOW() + MILLISECS(5000); 6.42 + rdmsr(MSR_IA32_THERM_STATUS, l, h); 6.43 + if (l & 0x1) { 6.44 + printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); 6.45 + printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", 6.46 + cpu); 6.47 + add_taint(TAINT_MACHINE_CHECK); 6.48 + } else { 6.49 + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); 6.50 + } 6.51 +} 6.52 + 6.53 +/* Thermal interrupt handler for this CPU setup */ 6.54 +static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) 6.55 + = unexpected_thermal_interrupt; 6.56 + 6.57 +fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) 6.58 +{ 6.59 + irq_enter(); 6.60 + vendor_thermal_interrupt(regs); 6.61 + irq_exit(); 6.62 +} 6.63 + 6.64 +/* P4/Xeon Thermal regulation detect and init */ 6.65 +static void intel_init_thermal(struct cpuinfo_x86 *c) 6.66 +{ 6.67 + u32 l, h; 6.68 + int tm2 = 0; 6.69 + unsigned int cpu = smp_processor_id(); 6.70 + 6.71 + /* Thermal monitoring */ 6.72 + if (!cpu_has(c, X86_FEATURE_ACPI)) 6.73 + return; /* -ENODEV */ 6.74 + 6.75 + /* Clock modulation */ 6.76 + if (!cpu_has(c, X86_FEATURE_ACC)) 6.77 + return; /* -ENODEV */ 6.78 + 6.79 + /* first check if its enabled already, in which case there might 6.80 + * be some SMM goo which handles it, so we can't even put a handler 6.81 + * since it might be delivered via SMI already -zwanem. 6.82 + */ 6.83 + rdmsr (MSR_IA32_MISC_ENABLE, l, h); 6.84 + h = apic_read(APIC_LVTTHMR); 6.85 + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 6.86 + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu); 6.87 + return; /* -EBUSY */ 6.88 + } 6.89 + 6.90 + if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) 6.91 + tm2 = 1; 6.92 + 6.93 + /* check whether a vector already exists, temporarily masked? */ 6.94 + if (h & APIC_VECTOR_MASK) { 6.95 + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n", 6.96 + cpu, (h & APIC_VECTOR_MASK)); 6.97 + return; /* -EBUSY */ 6.98 + } 6.99 + 6.100 + /* The temperature transition interrupt handler setup */ 6.101 + h = THERMAL_APIC_VECTOR; /* our delivery vector */ 6.102 + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ 6.103 + apic_write_around(APIC_LVTTHMR, h); 6.104 + 6.105 + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); 6.106 + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); 6.107 + 6.108 + /* ok we're good to go... */ 6.109 + vendor_thermal_interrupt = intel_thermal_interrupt; 6.110 + 6.111 + rdmsr (MSR_IA32_MISC_ENABLE, l, h); 6.112 + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); 6.113 + 6.114 + l = apic_read (APIC_LVTTHMR); 6.115 + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 6.116 + printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 6.117 + cpu, tm2 ? "TM2" : "TM1"); 6.118 + return; 6.119 +} 6.120 +#endif /* CONFIG_X86_MCE_THERMAL */ 6.121 + 6.122 +static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext) 6.123 +{ 6.124 + if (nr_intel_ext_msrs == 0) 6.125 + return; 6.126 + 6.127 + /*this function will called when CAP(9).MCG_EXT_P = 1*/ 6.128 + memset(mc_ext, 0, sizeof(struct mcinfo_extended)); 6.129 + mc_ext->common.type = MC_TYPE_EXTENDED; 6.130 + mc_ext->common.size = sizeof(mc_ext); 6.131 + mc_ext->mc_msrs = 10; 6.132 + 6.133 + mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX; 6.134 + rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value); 6.135 + mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX; 6.136 + rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value); 6.137 + mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX; 6.138 + rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value); 6.139 + 6.140 + mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX; 6.141 + rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value); 6.142 + mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI; 6.143 + rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value); 6.144 + mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI; 6.145 + rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value); 6.146 + 6.147 + mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP; 6.148 + rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value); 6.149 + mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP; 6.150 + rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value); 6.151 + mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS; 6.152 + rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value); 6.153 + mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP; 6.154 + rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value); 6.155 +} 6.156 + 6.157 +/* machine_check_poll might be called by following types: 6.158 + * 1. called when do mcheck_init. 6.159 + * 2. called in cmci interrupt handler 6.160 + * 3. called in polling handler 6.161 + * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 6.162 + * consumer. 6.163 +*/ 6.164 +static int machine_check_poll(struct mc_info *mi, int calltype) 6.165 +{ 6.166 + int exceptions = (read_cr4() & X86_CR4_MCE); 6.167 + int i, nr_unit = 0, uc = 0, pcc = 0; 6.168 + uint64_t status, addr; 6.169 + struct mcinfo_global mcg; 6.170 + struct mcinfo_extended mce; 6.171 + unsigned int cpu; 6.172 + struct domain *d; 6.173 + 6.174 + cpu = smp_processor_id(); 6.175 + 6.176 + if (!mi) { 6.177 + printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n"); 6.178 + return 0; 6.179 + } 6.180 + x86_mcinfo_clear(mi); 6.181 + 6.182 + memset(&mcg, 0, sizeof(mcg)); 6.183 + mcg.common.type = MC_TYPE_GLOBAL; 6.184 + mcg.common.size = sizeof(mcg); 6.185 + /*If called from cpu-reset check, don't need to fill them. 6.186 + *If called from cmci context, we'll try to fill domid by memory addr 6.187 + */ 6.188 + mcg.mc_domid = -1; 6.189 + mcg.mc_vcpuid = -1; 6.190 + if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET) 6.191 + mcg.mc_flags = MC_FLAG_POLLED; 6.192 + else if (calltype == MC_FLAG_CMCI) 6.193 + mcg.mc_flags = MC_FLAG_CMCI; 6.194 + mcg.mc_socketid = phys_proc_id[cpu]; 6.195 + mcg.mc_coreid = cpu_core_id[cpu]; 6.196 + mcg.mc_apicid = cpu_physical_id(cpu); 6.197 + mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 6.198 + rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus); 6.199 + 6.200 + for ( i = 0; i < nr_mce_banks; i++ ) { 6.201 + struct mcinfo_bank mcb; 6.202 + /*For CMCI, only owners checks the owned MSRs*/ 6.203 + if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) && 6.204 + (calltype & MC_FLAG_CMCI) ) 6.205 + continue; 6.206 + rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status); 6.207 + 6.208 + if (! (status & MCi_STATUS_VAL) ) 6.209 + continue; 6.210 + /* 6.211 + * Uncorrected events are handled by the exception 6.212 + * handler when it is enabled. But when the exception 6.213 + * is disabled such as when mcheck_init, log everything. 6.214 + */ 6.215 + if ((status & MCi_STATUS_UC) && exceptions) 6.216 + continue; 6.217 + 6.218 + if (status & MCi_STATUS_UC) 6.219 + uc = 1; 6.220 + if (status & MCi_STATUS_PCC) 6.221 + pcc = 1; 6.222 + 6.223 + memset(&mcb, 0, sizeof(mcb)); 6.224 + mcb.common.type = MC_TYPE_BANK; 6.225 + mcb.common.size = sizeof(mcb); 6.226 + mcb.mc_bank = i; 6.227 + mcb.mc_status = status; 6.228 + if (status & MCi_STATUS_MISCV) 6.229 + rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc); 6.230 + if (status & MCi_STATUS_ADDRV) { 6.231 + rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr); 6.232 + d = maddr_get_owner(addr); 6.233 + if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) ) 6.234 + mcb.mc_domid = d->domain_id; 6.235 + } 6.236 + if (cmci_support) 6.237 + rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2); 6.238 + if (calltype == MC_FLAG_CMCI) 6.239 + rdtscll(mcb.mc_tsc); 6.240 + x86_mcinfo_add(mi, &mcb); 6.241 + nr_unit++; 6.242 + add_taint(TAINT_MACHINE_CHECK); 6.243 + /*Clear state for this bank */ 6.244 + wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0); 6.245 + printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%lx]\n", 6.246 + i, cpu, status); 6.247 + printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], " 6.248 + "thread[%d]\n", cpu, mcg.mc_socketid, 6.249 + mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid); 6.250 + 6.251 + } 6.252 + /*if pcc = 1, uc must be 1*/ 6.253 + if (pcc) 6.254 + mcg.mc_flags |= MC_FLAG_UNCORRECTABLE; 6.255 + else if (uc) 6.256 + mcg.mc_flags |= MC_FLAG_RECOVERABLE; 6.257 + else /*correctable*/ 6.258 + mcg.mc_flags |= MC_FLAG_CORRECTABLE; 6.259 + 6.260 + if (nr_unit && nr_intel_ext_msrs && 6.261 + (mcg.mc_gstatus & MCG_STATUS_EIPV)) { 6.262 + intel_get_extended_msrs(&mce); 6.263 + x86_mcinfo_add(mi, &mce); 6.264 + } 6.265 + if (nr_unit) 6.266 + x86_mcinfo_add(mi, &mcg); 6.267 + /*Clear global state*/ 6.268 + return nr_unit; 6.269 +} 6.270 + 6.271 +static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) 6.272 +{ 6.273 + /* MACHINE CHECK Error handler will be sent in another patch, 6.274 + * simply copy old solutions here. This code will be replaced 6.275 + * by upcoming machine check patches 6.276 + */ 6.277 + 6.278 + int recover=1; 6.279 + u32 alow, ahigh, high, low; 6.280 + u32 mcgstl, mcgsth; 6.281 + int i; 6.282 + 6.283 + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 6.284 + if (mcgstl & (1<<0)) /* Recoverable ? */ 6.285 + recover=0; 6.286 + 6.287 + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 6.288 + smp_processor_id(), mcgsth, mcgstl); 6.289 + 6.290 + for (i=0; i<nr_mce_banks; i++) { 6.291 + rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); 6.292 + if (high & (1<<31)) { 6.293 + if (high & (1<<29)) 6.294 + recover |= 1; 6.295 + if (high & (1<<25)) 6.296 + recover |= 2; 6.297 + printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); 6.298 + high &= ~(1<<31); 6.299 + if (high & (1<<27)) { 6.300 + rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); 6.301 + printk ("[%08x%08x]", ahigh, alow); 6.302 + } 6.303 + if (high & (1<<26)) { 6.304 + rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); 6.305 + printk (" at %08x%08x", ahigh, alow); 6.306 + } 6.307 + printk ("\n"); 6.308 + } 6.309 + } 6.310 + 6.311 + if (recover & 2) 6.312 + mc_panic ("CPU context corrupt"); 6.313 + if (recover & 1) 6.314 + mc_panic ("Unable to continue"); 6.315 + 6.316 + printk(KERN_EMERG "Attempting to continue.\n"); 6.317 + /* 6.318 + * Do not clear the MSR_IA32_MCi_STATUS if the error is not 6.319 + * recoverable/continuable.This will allow BIOS to look at the MSRs 6.320 + * for errors if the OS could not log the error. 6.321 + */ 6.322 + for (i=0; i<nr_mce_banks; i++) { 6.323 + u32 msr; 6.324 + msr = MSR_IA32_MC0_STATUS+i*4; 6.325 + rdmsr (msr, low, high); 6.326 + if (high&(1<<31)) { 6.327 + /* Clear it */ 6.328 + wrmsr(msr, 0UL, 0UL); 6.329 + /* Serialize */ 6.330 + wmb(); 6.331 + add_taint(TAINT_MACHINE_CHECK); 6.332 + } 6.333 + } 6.334 + mcgstl &= ~(1<<2); 6.335 + wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 6.336 +} 6.337 + 6.338 +extern void (*cpu_down_handler)(int down_cpu); 6.339 +extern void (*cpu_down_rollback_handler)(int down_cpu); 6.340 +extern void mce_disable_cpu(void); 6.341 +static bool_t cmci_clear_lock = 0; 6.342 +static DEFINE_SPINLOCK(cmci_discover_lock); 6.343 +static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); 6.344 + 6.345 +/* 6.346 + * Discover bank sharing using the algorithm recommended in the SDM. 6.347 + */ 6.348 +static int do_cmci_discover(int i) 6.349 +{ 6.350 + unsigned msr = MSR_IA32_MC0_CTL2 + i; 6.351 + u64 val; 6.352 + 6.353 + rdmsrl(msr, val); 6.354 + /* Some other CPU already owns this bank. */ 6.355 + if (val & CMCI_EN) { 6.356 + clear_bit(i, __get_cpu_var(mce_banks_owned)); 6.357 + goto out; 6.358 + } 6.359 + wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); 6.360 + rdmsrl(msr, val); 6.361 + 6.362 + if (!(val & CMCI_EN)) { 6.363 + /* 6.364 + * This bank does not support CMCI. The polling 6.365 + * timer has to handle it. 6.366 + */ 6.367 + set_bit(i, __get_cpu_var(no_cmci_banks)); 6.368 + return 0; 6.369 + } 6.370 + set_bit(i, __get_cpu_var(mce_banks_owned)); 6.371 +out: 6.372 + clear_bit(i, __get_cpu_var(no_cmci_banks)); 6.373 + return 1; 6.374 +} 6.375 + 6.376 +void cmci_discover(void) 6.377 +{ 6.378 + int i; 6.379 + 6.380 + printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id()); 6.381 + spin_lock(&cmci_discover_lock); 6.382 + for (i = 0; i < nr_mce_banks; i++) { 6.383 + /*If the cpu is the bank owner, need not re-discover*/ 6.384 + if (test_bit(i, __get_cpu_var(mce_banks_owned))) 6.385 + continue; 6.386 + do_cmci_discover(i); 6.387 + } 6.388 + spin_unlock(&cmci_discover_lock); 6.389 + printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 6.390 + smp_processor_id(), 6.391 + *((unsigned long *)__get_cpu_var(mce_banks_owned)), 6.392 + *((unsigned long *)__get_cpu_var(no_cmci_banks))); 6.393 +} 6.394 + 6.395 +/* 6.396 + * Define an owner for each bank. Banks can be shared between CPUs 6.397 + * and to avoid reporting events multiple times always set up one 6.398 + * CPU as owner. 6.399 + * 6.400 + * The assignment has to be redone when CPUs go offline and 6.401 + * any of the owners goes away. Also pollers run in parallel so we 6.402 + * have to be careful to update the banks in a way that doesn't 6.403 + * lose or duplicate events. 6.404 + */ 6.405 + 6.406 +static void mce_set_owner(void) 6.407 +{ 6.408 + 6.409 + if (!cmci_support || mce_disabled == 1) 6.410 + return; 6.411 + 6.412 + cmci_discover(); 6.413 +} 6.414 + 6.415 +static void clear_cmci(void) 6.416 +{ 6.417 + int i; 6.418 + 6.419 + if (!cmci_support || mce_disabled == 1) 6.420 + return; 6.421 + 6.422 + printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", 6.423 + smp_processor_id()); 6.424 + 6.425 + for (i = 0; i < nr_mce_banks; i++) { 6.426 + unsigned msr = MSR_IA32_MC0_CTL2 + i; 6.427 + u64 val; 6.428 + if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 6.429 + continue; 6.430 + rdmsrl(msr, val); 6.431 + if (val & (CMCI_EN|CMCI_THRESHOLD_MASK)) 6.432 + wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK)); 6.433 + clear_bit(i, __get_cpu_var(mce_banks_owned)); 6.434 + } 6.435 +} 6.436 + 6.437 +/*we need to re-set cmci owners when cpu_down fail or cpu_up*/ 6.438 +static void cmci_reenable_cpu(void *h) 6.439 +{ 6.440 + if (!mce_available(¤t_cpu_data) || mce_disabled == 1) 6.441 + return; 6.442 + printk(KERN_DEBUG "CMCI: reenable mce on CPU%d\n", smp_processor_id()); 6.443 + mce_set_owner(); 6.444 + set_in_cr4(X86_CR4_MCE); 6.445 +} 6.446 + 6.447 +/* When take cpu_down, we need to execute the impacted cmci_owner judge algorithm 6.448 + * First, we need to clear the ownership on the dead CPU 6.449 + * Then, other CPUs will check whether to take the bank's ownership from down_cpu 6.450 + * CPU0 need not and "never" execute this path 6.451 +*/ 6.452 +void __cpu_clear_cmci( int down_cpu) 6.453 +{ 6.454 + int cpu = smp_processor_id(); 6.455 + 6.456 + if (!cmci_support && mce_disabled == 1) 6.457 + return; 6.458 + 6.459 + if (cpu == 0) { 6.460 + printk(KERN_DEBUG "CMCI: CPU0 need not be cleared\n"); 6.461 + return; 6.462 + } 6.463 + 6.464 + local_irq_disable(); 6.465 + if (cpu == down_cpu){ 6.466 + mce_disable_cpu(); 6.467 + clear_cmci(); 6.468 + wmb(); 6.469 + test_and_set_bool(cmci_clear_lock); 6.470 + return; 6.471 + } 6.472 + while (!cmci_clear_lock) 6.473 + cpu_relax(); 6.474 + if (cpu != down_cpu) 6.475 + mce_set_owner(); 6.476 + 6.477 + test_and_clear_bool(cmci_clear_lock); 6.478 + local_irq_enable(); 6.479 + 6.480 +} 6.481 + 6.482 +void __cpu_clear_cmci_rollback( int down_cpu) 6.483 +{ 6.484 + cpumask_t down_map; 6.485 + if (!cmci_support || mce_disabled == 1) 6.486 + return; 6.487 + 6.488 + cpus_clear(down_map); 6.489 + cpu_set(down_cpu, down_map); 6.490 + printk(KERN_ERR "CMCI: cpu_down fail. " 6.491 + "Reenable cmci on CPU%d\n", down_cpu); 6.492 + on_selected_cpus(down_map, cmci_reenable_cpu, NULL, 1, 1); 6.493 +} 6.494 + 6.495 +static void intel_init_cmci(struct cpuinfo_x86 *c) 6.496 +{ 6.497 + u32 l, apic; 6.498 + int cpu = smp_processor_id(); 6.499 + 6.500 + if (!mce_available(c) || !cmci_support) { 6.501 + printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu); 6.502 + return; 6.503 + } 6.504 + 6.505 + apic = apic_read(APIC_CMCI); 6.506 + if ( apic & APIC_VECTOR_MASK ) 6.507 + { 6.508 + printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n", 6.509 + cpu, ( apic & APIC_VECTOR_MASK )); 6.510 + return; 6.511 + } 6.512 + 6.513 + apic = CMCI_APIC_VECTOR; 6.514 + apic |= (APIC_DM_FIXED | APIC_LVT_MASKED); 6.515 + apic_write_around(APIC_CMCI, apic); 6.516 + 6.517 + /*now clear mask flag*/ 6.518 + l = apic_read(APIC_CMCI); 6.519 + apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED); 6.520 + cpu_down_handler = __cpu_clear_cmci; 6.521 + cpu_down_rollback_handler = __cpu_clear_cmci_rollback; 6.522 +} 6.523 + 6.524 +fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs) 6.525 +{ 6.526 + int nr_unit; 6.527 + struct mc_info *mi = x86_mcinfo_getptr(); 6.528 + int cpu = smp_processor_id(); 6.529 + 6.530 + ack_APIC_irq(); 6.531 + irq_enter(); 6.532 + printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu); 6.533 + nr_unit = machine_check_poll(mi, MC_FLAG_CMCI); 6.534 + if (nr_unit) { 6.535 + x86_mcinfo_dump(mi); 6.536 + if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) 6.537 + send_guest_global_virq(dom0, VIRQ_MCA); 6.538 + } 6.539 + irq_exit(); 6.540 +} 6.541 + 6.542 +void mce_intel_feature_init(struct cpuinfo_x86 *c) 6.543 +{ 6.544 + 6.545 +#ifdef CONFIG_X86_MCE_THERMAL 6.546 + intel_init_thermal(c); 6.547 +#endif 6.548 + intel_init_cmci(c); 6.549 +} 6.550 + 6.551 +static void mce_cap_init(struct cpuinfo_x86 *c) 6.552 +{ 6.553 + u32 l, h; 6.554 + 6.555 + rdmsr (MSR_IA32_MCG_CAP, l, h); 6.556 + if ((l & MCG_CMCI_P) && cpu_has_apic) 6.557 + cmci_support = 1; 6.558 + 6.559 + nr_mce_banks = l & 0xff; 6.560 + if (nr_mce_banks > MAX_NR_BANKS) 6.561 + printk(KERN_WARNING "MCE: exceed max mce banks\n"); 6.562 + if (l & MCG_EXT_P) 6.563 + { 6.564 + nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff; 6.565 + printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n", 6.566 + smp_processor_id(), nr_intel_ext_msrs); 6.567 + } 6.568 + /* for most of p6 family, bank 0 is an alias bios MSR. 6.569 + * But after model>1a, bank 0 is available*/ 6.570 + if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL 6.571 + && c->x86_model < 0x1A) 6.572 + firstbank = 1; 6.573 + else 6.574 + firstbank = 0; 6.575 +} 6.576 + 6.577 +static void mce_init(void) 6.578 +{ 6.579 + u32 l, h; 6.580 + int i, nr_unit; 6.581 + struct mc_info *mi = x86_mcinfo_getptr(); 6.582 + clear_in_cr4(X86_CR4_MCE); 6.583 + /* log the machine checks left over from the previous reset. 6.584 + * This also clears all registers*/ 6.585 + 6.586 + nr_unit = machine_check_poll(mi, MC_FLAG_RESET); 6.587 + /*in the boot up stage, not expect inject to DOM0, but go print out 6.588 + */ 6.589 + if (nr_unit > 0) 6.590 + x86_mcinfo_dump(mi); 6.591 + 6.592 + set_in_cr4(X86_CR4_MCE); 6.593 + rdmsr (MSR_IA32_MCG_CAP, l, h); 6.594 + if (l & MCG_CTL_P) /* Control register present ? */ 6.595 + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 6.596 + 6.597 + for (i = firstbank; i < nr_mce_banks; i++) 6.598 + { 6.599 + /*Some banks are shared across cores, use MCi_CTRL to judge whether 6.600 + * this bank has been initialized by other cores already.*/ 6.601 + rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h); 6.602 + if (!l & !h) 6.603 + { 6.604 + /*if ctl is 0, this bank is never initialized*/ 6.605 + printk(KERN_DEBUG "mce_init: init bank%d\n", i); 6.606 + wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff); 6.607 + wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0); 6.608 + } 6.609 + } 6.610 + if (firstbank) /*if cmci enabled, firstbank = 0*/ 6.611 + wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); 6.612 +} 6.613 + 6.614 +/*p4/p6 faimily has similar MCA initialization process*/ 6.615 +void intel_mcheck_init(struct cpuinfo_x86 *c) 6.616 +{ 6.617 + 6.618 + mce_cap_init(c); 6.619 + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 6.620 + smp_processor_id()); 6.621 + /* machine check is available */ 6.622 + machine_check_vector = intel_machine_check; 6.623 + mce_init(); 6.624 + mce_intel_feature_init(c); 6.625 + mce_set_owner(); 6.626 +} 6.627 + 6.628 +/* 6.629 + * Periodic polling timer for "silent" machine check errors. If the 6.630 + * poller finds an MCE, poll faster. When the poller finds no more 6.631 + * errors, poll slower 6.632 +*/ 6.633 +static struct timer mce_timer; 6.634 + 6.635 +#define MCE_PERIOD 4000 6.636 +#define MCE_MIN 2000 6.637 +#define MCE_MAX 32000 6.638 + 6.639 +static u64 period = MCE_PERIOD; 6.640 +static int adjust = 0; 6.641 + 6.642 +static void mce_intel_checkregs(void *info) 6.643 +{ 6.644 + int nr_unit; 6.645 + struct mc_info *mi = x86_mcinfo_getptr(); 6.646 + 6.647 + if( !mce_available(¤t_cpu_data)) 6.648 + return; 6.649 + nr_unit = machine_check_poll(mi, MC_FLAG_POLLED); 6.650 + if (nr_unit) 6.651 + { 6.652 + x86_mcinfo_dump(mi); 6.653 + adjust++; 6.654 + if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) 6.655 + send_guest_global_virq(dom0, VIRQ_MCA); 6.656 + } 6.657 +} 6.658 + 6.659 +static void mce_intel_work_fn(void *data) 6.660 +{ 6.661 + on_each_cpu(mce_intel_checkregs, data, 1, 1); 6.662 + if (adjust) { 6.663 + period = period / (adjust + 1); 6.664 + printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval to %ld", 6.665 + period); 6.666 + } 6.667 + else { 6.668 + period *= 2; 6.669 + } 6.670 + if (period > MCE_MAX) 6.671 + period = MCE_MAX; 6.672 + if (period < MCE_MIN) 6.673 + period = MCE_MIN; 6.674 + set_timer(&mce_timer, NOW() + MILLISECS(period)); 6.675 + adjust = 0; 6.676 +} 6.677 + 6.678 +void intel_mcheck_timer(struct cpuinfo_x86 *c) 6.679 +{ 6.680 + printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n"); 6.681 + init_timer(&mce_timer, mce_intel_work_fn, NULL, 0); 6.682 + set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD)); 6.683 +} 6.684 +
7.1 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c Fri Dec 19 14:56:36 2008 +0000 7.2 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Dec 22 08:12:33 2008 +0000 7.3 @@ -19,8 +19,8 @@ 7.4 #include <asm/msr.h> 7.5 7.6 #include "mce.h" 7.7 - 7.8 -static int firstbank; 7.9 +#include "x86_mca.h" 7.10 +int firstbank = 0; 7.11 static struct timer mce_timer; 7.12 7.13 #define MCE_PERIOD MILLISECS(15000) 7.14 @@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec 7.15 struct cpuinfo_x86 *c = &boot_cpu_data; 7.16 7.17 /* Check for MCE support */ 7.18 - if (!cpu_has(c, X86_FEATURE_MCE)) 7.19 + if (!mce_available(c)) 7.20 return -ENODEV; 7.21 - 7.22 - /* Check for PPro style MCA */ 7.23 - if (!cpu_has(c, X86_FEATURE_MCA)) 7.24 - return -ENODEV; 7.25 - 7.26 /* 7.27 * Check for non-fatal errors every MCE_RATE s 7.28 */ 7.29 @@ -85,12 +80,20 @@ static int __init init_nonfatal_mce_chec 7.30 break; 7.31 7.32 case X86_VENDOR_INTEL: 7.33 - init_timer(&mce_timer, mce_work_fn, NULL, 0); 7.34 - set_timer(&mce_timer, NOW() + MCE_PERIOD); 7.35 + /* p5 family is different. P4/P6 and latest CPUs shares the 7.36 + * same polling methods 7.37 + */ 7.38 + if ( c->x86 != 5 ) 7.39 + { 7.40 + /* some CPUs or banks don't support cmci, we need to 7.41 + * enable this feature anyway 7.42 + */ 7.43 + intel_mcheck_timer(c); 7.44 + } 7.45 break; 7.46 } 7.47 7.48 - printk(KERN_INFO "MCA: Machine check polling timer started.\n"); 7.49 + printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n"); 7.50 return 0; 7.51 } 7.52 __initcall(init_nonfatal_mce_checker);
8.1 --- a/xen/arch/x86/cpu/mcheck/p4.c Fri Dec 19 14:56:36 2008 +0000 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,270 +0,0 @@ 8.4 -/* 8.5 - * P4 specific Machine Check Exception Reporting 8.6 - */ 8.7 - 8.8 -#include <xen/init.h> 8.9 -#include <xen/types.h> 8.10 -#include <xen/kernel.h> 8.11 -#include <xen/config.h> 8.12 -#include <xen/smp.h> 8.13 -#include <xen/irq.h> 8.14 -#include <xen/time.h> 8.15 -#include <asm/processor.h> 8.16 -#include <asm/system.h> 8.17 -#include <asm/msr.h> 8.18 -#include <asm/apic.h> 8.19 - 8.20 -#include "mce.h" 8.21 - 8.22 -/* as supported by the P4/Xeon family */ 8.23 -struct intel_mce_extended_msrs { 8.24 - u32 eax; 8.25 - u32 ebx; 8.26 - u32 ecx; 8.27 - u32 edx; 8.28 - u32 esi; 8.29 - u32 edi; 8.30 - u32 ebp; 8.31 - u32 esp; 8.32 - u32 eflags; 8.33 - u32 eip; 8.34 - /* u32 *reserved[]; */ 8.35 -}; 8.36 - 8.37 -static int mce_num_extended_msrs = 0; 8.38 - 8.39 - 8.40 -#ifdef CONFIG_X86_MCE_P4THERMAL 8.41 -static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) 8.42 -{ 8.43 - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 8.44 - smp_processor_id()); 8.45 - add_taint(TAINT_MACHINE_CHECK); 8.46 -} 8.47 - 8.48 -/* P4/Xeon Thermal transition interrupt handler */ 8.49 -static void intel_thermal_interrupt(struct cpu_user_regs *regs) 8.50 -{ 8.51 - u32 l, h; 8.52 - unsigned int cpu = smp_processor_id(); 8.53 - static s_time_t next[NR_CPUS]; 8.54 - 8.55 - ack_APIC_irq(); 8.56 - 8.57 - if (NOW() < next[cpu]) 8.58 - return; 8.59 - 8.60 - next[cpu] = NOW() + MILLISECS(5000); 8.61 - rdmsr(MSR_IA32_THERM_STATUS, l, h); 8.62 - if (l & 0x1) { 8.63 - printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); 8.64 - printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", 8.65 - cpu); 8.66 - add_taint(TAINT_MACHINE_CHECK); 8.67 - } else { 8.68 - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); 8.69 - } 8.70 -} 8.71 - 8.72 -/* Thermal interrupt handler for this CPU setup */ 8.73 -static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = unexpected_thermal_interrupt; 8.74 - 8.75 -fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) 8.76 -{ 8.77 - irq_enter(); 8.78 - vendor_thermal_interrupt(regs); 8.79 - irq_exit(); 8.80 -} 8.81 - 8.82 -/* P4/Xeon Thermal regulation detect and init */ 8.83 -static void intel_init_thermal(struct cpuinfo_x86 *c) 8.84 -{ 8.85 - u32 l, h; 8.86 - unsigned int cpu = smp_processor_id(); 8.87 - 8.88 - /* Thermal monitoring */ 8.89 - if (!cpu_has(c, X86_FEATURE_ACPI)) 8.90 - return; /* -ENODEV */ 8.91 - 8.92 - /* Clock modulation */ 8.93 - if (!cpu_has(c, X86_FEATURE_ACC)) 8.94 - return; /* -ENODEV */ 8.95 - 8.96 - /* first check if its enabled already, in which case there might 8.97 - * be some SMM goo which handles it, so we can't even put a handler 8.98 - * since it might be delivered via SMI already -zwanem. 8.99 - */ 8.100 - rdmsr (MSR_IA32_MISC_ENABLE, l, h); 8.101 - h = apic_read(APIC_LVTTHMR); 8.102 - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 8.103 - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 8.104 - cpu); 8.105 - return; /* -EBUSY */ 8.106 - } 8.107 - 8.108 - /* check whether a vector already exists, temporarily masked? */ 8.109 - if (h & APIC_VECTOR_MASK) { 8.110 - printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " 8.111 - "installed\n", 8.112 - cpu, (h & APIC_VECTOR_MASK)); 8.113 - return; /* -EBUSY */ 8.114 - } 8.115 - 8.116 - /* The temperature transition interrupt handler setup */ 8.117 - h = THERMAL_APIC_VECTOR; /* our delivery vector */ 8.118 - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ 8.119 - apic_write_around(APIC_LVTTHMR, h); 8.120 - 8.121 - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); 8.122 - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); 8.123 - 8.124 - /* ok we're good to go... */ 8.125 - vendor_thermal_interrupt = intel_thermal_interrupt; 8.126 - 8.127 - rdmsr (MSR_IA32_MISC_ENABLE, l, h); 8.128 - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); 8.129 - 8.130 - l = apic_read (APIC_LVTTHMR); 8.131 - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 8.132 - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 8.133 - return; 8.134 -} 8.135 -#endif /* CONFIG_X86_MCE_P4THERMAL */ 8.136 - 8.137 - 8.138 -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 8.139 -static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 8.140 -{ 8.141 - u32 h; 8.142 - 8.143 - if (mce_num_extended_msrs == 0) 8.144 - goto done; 8.145 - 8.146 - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); 8.147 - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); 8.148 - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); 8.149 - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); 8.150 - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); 8.151 - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); 8.152 - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); 8.153 - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); 8.154 - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); 8.155 - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); 8.156 - 8.157 - /* can we rely on kmalloc to do a dynamic 8.158 - * allocation for the reserved registers? 8.159 - */ 8.160 -done: 8.161 - return mce_num_extended_msrs; 8.162 -} 8.163 - 8.164 -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) 8.165 -{ 8.166 - int recover=1; 8.167 - u32 alow, ahigh, high, low; 8.168 - u32 mcgstl, mcgsth; 8.169 - int i; 8.170 - struct intel_mce_extended_msrs dbg; 8.171 - 8.172 - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 8.173 - if (mcgstl & (1<<0)) /* Recoverable ? */ 8.174 - recover=0; 8.175 - 8.176 - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 8.177 - smp_processor_id(), mcgsth, mcgstl); 8.178 - 8.179 - if (intel_get_extended_msrs(&dbg)) { 8.180 - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", 8.181 - smp_processor_id(), dbg.eip, dbg.eflags); 8.182 - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", 8.183 - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); 8.184 - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", 8.185 - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); 8.186 - } 8.187 - 8.188 - for (i=0; i<nr_mce_banks; i++) { 8.189 - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); 8.190 - if (high & (1<<31)) { 8.191 - if (high & (1<<29)) 8.192 - recover |= 1; 8.193 - if (high & (1<<25)) 8.194 - recover |= 2; 8.195 - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); 8.196 - high &= ~(1<<31); 8.197 - if (high & (1<<27)) { 8.198 - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); 8.199 - printk ("[%08x%08x]", ahigh, alow); 8.200 - } 8.201 - if (high & (1<<26)) { 8.202 - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); 8.203 - printk (" at %08x%08x", ahigh, alow); 8.204 - } 8.205 - printk ("\n"); 8.206 - } 8.207 - } 8.208 - 8.209 - if (recover & 2) 8.210 - mc_panic ("CPU context corrupt"); 8.211 - if (recover & 1) 8.212 - mc_panic ("Unable to continue"); 8.213 - 8.214 - printk(KERN_EMERG "Attempting to continue.\n"); 8.215 - /* 8.216 - * Do not clear the MSR_IA32_MCi_STATUS if the error is not 8.217 - * recoverable/continuable.This will allow BIOS to look at the MSRs 8.218 - * for errors if the OS could not log the error. 8.219 - */ 8.220 - for (i=0; i<nr_mce_banks; i++) { 8.221 - u32 msr; 8.222 - msr = MSR_IA32_MC0_STATUS+i*4; 8.223 - rdmsr (msr, low, high); 8.224 - if (high&(1<<31)) { 8.225 - /* Clear it */ 8.226 - wrmsr(msr, 0UL, 0UL); 8.227 - /* Serialize */ 8.228 - wmb(); 8.229 - add_taint(TAINT_MACHINE_CHECK); 8.230 - } 8.231 - } 8.232 - mcgstl &= ~(1<<2); 8.233 - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 8.234 -} 8.235 - 8.236 - 8.237 -void intel_p4_mcheck_init(struct cpuinfo_x86 *c) 8.238 -{ 8.239 - u32 l, h; 8.240 - int i; 8.241 - 8.242 - machine_check_vector = intel_machine_check; 8.243 - wmb(); 8.244 - 8.245 - printk (KERN_INFO "Intel machine check architecture supported.\n"); 8.246 - rdmsr (MSR_IA32_MCG_CAP, l, h); 8.247 - if (l & (1<<8)) /* Control register present ? */ 8.248 - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 8.249 - nr_mce_banks = l & 0xff; 8.250 - 8.251 - for (i=0; i<nr_mce_banks; i++) { 8.252 - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 8.253 - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 8.254 - } 8.255 - 8.256 - set_in_cr4 (X86_CR4_MCE); 8.257 - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 8.258 - smp_processor_id()); 8.259 - 8.260 - /* Check for P4/Xeon extended MCE MSRs */ 8.261 - rdmsr (MSR_IA32_MCG_CAP, l, h); 8.262 - if (l & (1<<9)) {/* MCG_EXT_P */ 8.263 - mce_num_extended_msrs = (l >> 16) & 0xff; 8.264 - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" 8.265 - " available\n", 8.266 - smp_processor_id(), mce_num_extended_msrs); 8.267 - 8.268 -#ifdef CONFIG_X86_MCE_P4THERMAL 8.269 - /* Check for P4/Xeon Thermal monitor */ 8.270 - intel_init_thermal(c); 8.271 -#endif 8.272 - } 8.273 -}
9.1 --- a/xen/arch/x86/cpu/mcheck/p6.c Fri Dec 19 14:56:36 2008 +0000 9.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 9.3 @@ -1,118 +0,0 @@ 9.4 -/* 9.5 - * P6 specific Machine Check Exception Reporting 9.6 - * (C) Copyright 2002 Alan Cox <alan@redhat.com> 9.7 - */ 9.8 - 9.9 -#include <xen/init.h> 9.10 -#include <xen/types.h> 9.11 -#include <xen/kernel.h> 9.12 -#include <xen/smp.h> 9.13 - 9.14 -#include <asm/processor.h> 9.15 -#include <asm/system.h> 9.16 -#include <asm/msr.h> 9.17 - 9.18 -#include "mce.h" 9.19 - 9.20 -/* Machine Check Handler For PII/PIII */ 9.21 -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) 9.22 -{ 9.23 - int recover=1; 9.24 - u32 alow, ahigh, high, low; 9.25 - u32 mcgstl, mcgsth; 9.26 - int i; 9.27 - 9.28 - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 9.29 - if (mcgstl & (1<<0)) /* Recoverable ? */ 9.30 - recover=0; 9.31 - 9.32 - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 9.33 - smp_processor_id(), mcgsth, mcgstl); 9.34 - 9.35 - for (i=0; i<nr_mce_banks; i++) { 9.36 - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); 9.37 - if (high & (1<<31)) { 9.38 - if (high & (1<<29)) 9.39 - recover |= 1; 9.40 - if (high & (1<<25)) 9.41 - recover |= 2; 9.42 - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); 9.43 - high &= ~(1<<31); 9.44 - if (high & (1<<27)) { 9.45 - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); 9.46 - printk ("[%08x%08x]", ahigh, alow); 9.47 - } 9.48 - if (high & (1<<26)) { 9.49 - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); 9.50 - printk (" at %08x%08x", ahigh, alow); 9.51 - } 9.52 - printk ("\n"); 9.53 - } 9.54 - } 9.55 - 9.56 - if (recover & 2) 9.57 - mc_panic ("CPU context corrupt"); 9.58 - if (recover & 1) 9.59 - mc_panic ("Unable to continue"); 9.60 - 9.61 - printk (KERN_EMERG "Attempting to continue.\n"); 9.62 - /* 9.63 - * Do not clear the MSR_IA32_MCi_STATUS if the error is not 9.64 - * recoverable/continuable.This will allow BIOS to look at the MSRs 9.65 - * for errors if the OS could not log the error. 9.66 - */ 9.67 - for (i=0; i<nr_mce_banks; i++) { 9.68 - unsigned int msr; 9.69 - msr = MSR_IA32_MC0_STATUS+i*4; 9.70 - rdmsr (msr,low, high); 9.71 - if (high & (1<<31)) { 9.72 - /* Clear it */ 9.73 - wrmsr (msr, 0UL, 0UL); 9.74 - /* Serialize */ 9.75 - wmb(); 9.76 - add_taint(TAINT_MACHINE_CHECK); 9.77 - } 9.78 - } 9.79 - mcgstl &= ~(1<<2); 9.80 - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 9.81 -} 9.82 - 9.83 -/* Set up machine check reporting for processors with Intel style MCE */ 9.84 -void intel_p6_mcheck_init(struct cpuinfo_x86 *c) 9.85 -{ 9.86 - u32 l, h; 9.87 - int i; 9.88 - 9.89 - /* Check for MCE support */ 9.90 - if (!cpu_has(c, X86_FEATURE_MCE)) 9.91 - return; 9.92 - 9.93 - /* Check for PPro style MCA */ 9.94 - if (!cpu_has(c, X86_FEATURE_MCA)) 9.95 - return; 9.96 - 9.97 - /* Ok machine check is available */ 9.98 - machine_check_vector = intel_machine_check; 9.99 - wmb(); 9.100 - 9.101 - printk (KERN_INFO "Intel machine check architecture supported.\n"); 9.102 - rdmsr (MSR_IA32_MCG_CAP, l, h); 9.103 - if (l & (1<<8)) /* Control register present ? */ 9.104 - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 9.105 - nr_mce_banks = l & 0xff; 9.106 - 9.107 - /* 9.108 - * Following the example in IA-32 SDM Vol 3: 9.109 - * - MC0_CTL should not be written 9.110 - * - Status registers on all banks should be cleared on reset 9.111 - */ 9.112 - for (i=1; i<nr_mce_banks; i++) 9.113 - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 9.114 - 9.115 - for (i=0; i<nr_mce_banks; i++) 9.116 - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 9.117 - 9.118 - set_in_cr4 (X86_CR4_MCE); 9.119 - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 9.120 - smp_processor_id()); 9.121 -}
10.1 --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Dec 19 14:56:36 2008 +0000 10.2 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Dec 22 08:12:33 2008 +0000 10.3 @@ -28,7 +28,10 @@ 10.4 /* Bitfield of the MSR_IA32_MCG_CAP register */ 10.5 #define MCG_CAP_COUNT 0x00000000000000ffULL 10.6 #define MCG_CTL_P 0x0000000000000100ULL 10.7 -/* Bits 9-63 are reserved */ 10.8 +#define MCG_EXT_P (1UL<<9) 10.9 +#define MCG_EXT_CNT (16) 10.10 +#define MCG_CMCI_P (1UL<<10) 10.11 +/* Other bits are reserved */ 10.12 10.13 /* Bitfield of the MSR_IA32_MCG_STATUS register */ 10.14 #define MCG_STATUS_RIPV 0x0000000000000001ULL 10.15 @@ -70,3 +73,17 @@ 10.16 /* reserved bits */ 10.17 #define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL 10.18 10.19 +/*Intel Specific bitfield*/ 10.20 +#define CMCI_THRESHOLD 0x2 10.21 + 10.22 + 10.23 +#define MAX_NR_BANKS 128 10.24 + 10.25 +typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS); 10.26 +DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned); 10.27 + 10.28 +/* Global variables */ 10.29 +extern int mce_disabled; 10.30 +extern unsigned int nr_mce_banks; 10.31 +extern int firstbank; 10.32 +
11.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Dec 19 14:56:36 2008 +0000 11.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Dec 22 08:12:33 2008 +0000 11.3 @@ -2030,7 +2030,8 @@ static void vmx_do_extint(struct cpu_use 11.4 fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs); 11.5 fastcall void smp_error_interrupt(struct cpu_user_regs *regs); 11.6 fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs); 11.7 -#ifdef CONFIG_X86_MCE_P4THERMAL 11.8 + fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs); 11.9 +#ifdef CONFIG_X86_MCE_THERMAL 11.10 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs); 11.11 #endif 11.12 11.13 @@ -2060,10 +2061,13 @@ static void vmx_do_extint(struct cpu_use 11.14 case ERROR_APIC_VECTOR: 11.15 smp_error_interrupt(regs); 11.16 break; 11.17 + case CMCI_APIC_VECTOR: 11.18 + smp_cmci_interrupt(regs); 11.19 + break; 11.20 case PMU_APIC_VECTOR: 11.21 smp_pmu_apic_interrupt(regs); 11.22 break; 11.23 -#ifdef CONFIG_X86_MCE_P4THERMAL 11.24 +#ifdef CONFIG_X86_MCE_THERMAL 11.25 case THERMAL_APIC_VECTOR: 11.26 smp_thermal_interrupt(regs); 11.27 break;
12.1 --- a/xen/arch/x86/i8259.c Fri Dec 19 14:56:36 2008 +0000 12.2 +++ b/xen/arch/x86/i8259.c Mon Dec 22 08:12:33 2008 +0000 12.3 @@ -74,6 +74,7 @@ BUILD_SMP_INTERRUPT(error_interrupt,ERRO 12.4 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 12.5 BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR) 12.6 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) 12.7 +BUILD_SMP_INTERRUPT(cmci_interrupt, CMCI_APIC_VECTOR) 12.8 12.9 #define IRQ(x,y) \ 12.10 IRQ##x##y##_interrupt
13.1 --- a/xen/arch/x86/smpboot.c Fri Dec 19 14:56:36 2008 +0000 13.2 +++ b/xen/arch/x86/smpboot.c Mon Dec 22 08:12:33 2008 +0000 13.3 @@ -1237,11 +1237,25 @@ remove_siblinginfo(int cpu) 13.4 } 13.5 13.6 extern void fixup_irqs(cpumask_t map); 13.7 -int __cpu_disable(void) 13.8 + 13.9 +/* 13.10 + * Functions called when offline cpu. 13.11 + * We need to process some new feature such as 13.12 + * CMCI owner change when do cpu hotplug in latest 13.13 + * Intel CPU families 13.14 +*/ 13.15 +void (*cpu_down_handler)(int down_cpu) = NULL; 13.16 +void (*cpu_down_rollback_handler)(int down_cpu) = NULL; 13.17 + 13.18 + 13.19 +int __cpu_disable(int down_cpu) 13.20 { 13.21 cpumask_t map = cpu_online_map; 13.22 int cpu = smp_processor_id(); 13.23 13.24 + /*Only down_cpu need to execute this function*/ 13.25 + if (cpu != down_cpu) 13.26 + return 0; 13.27 /* 13.28 * Perhaps use cpufreq to drop frequency, but that could go 13.29 * into generic code. 13.30 @@ -1293,10 +1307,14 @@ void __cpu_die(unsigned int cpu) 13.31 } 13.32 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 13.33 } 13.34 +static int take_cpu_down(void *down_cpu) 13.35 +{ 13.36 13.37 -static int take_cpu_down(void *unused) 13.38 -{ 13.39 - return __cpu_disable(); 13.40 + if (cpu_down_handler) 13.41 + cpu_down_handler(*(int *)down_cpu); 13.42 + wmb(); 13.43 + 13.44 + return __cpu_disable(*(int *)down_cpu); 13.45 } 13.46 13.47 int cpu_down(unsigned int cpu) 13.48 @@ -1322,7 +1340,7 @@ int cpu_down(unsigned int cpu) 13.49 13.50 printk("Prepare to bring CPU%d down...\n", cpu); 13.51 13.52 - err = stop_machine_run(take_cpu_down, NULL, cpu); 13.53 + err = stop_machine_run(take_cpu_down, &cpu, cpu_online_map); 13.54 if ( err < 0 ) 13.55 goto out; 13.56 13.57 @@ -1333,6 +1351,10 @@ int cpu_down(unsigned int cpu) 13.58 err = -EBUSY; 13.59 } 13.60 out: 13.61 + /*if cpu_offline failed, re-check cmci_owner*/ 13.62 + 13.63 + if ( err < 0 && cpu_down_rollback_handler) 13.64 + cpu_down_rollback_handler(cpu); 13.65 spin_unlock(&cpu_add_remove_lock); 13.66 return err; 13.67 }
14.1 --- a/xen/common/stop_machine.c Fri Dec 19 14:56:36 2008 +0000 14.2 +++ b/xen/common/stop_machine.c Mon Dec 22 08:12:33 2008 +0000 14.3 @@ -45,7 +45,7 @@ struct stopmachine_data { 14.4 enum stopmachine_state state; 14.5 atomic_t done; 14.6 14.7 - unsigned int fn_cpu; 14.8 + cpumask_t fn_cpus; 14.9 int fn_result; 14.10 int (*fn)(void *); 14.11 void *fn_data; 14.12 @@ -63,21 +63,22 @@ static void stopmachine_set_state(enum s 14.13 cpu_relax(); 14.14 } 14.15 14.16 -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) 14.17 +int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpus) 14.18 { 14.19 cpumask_t allbutself; 14.20 unsigned int i, nr_cpus; 14.21 - int ret; 14.22 + int cur_cpu, ret; 14.23 14.24 BUG_ON(!local_irq_is_enabled()); 14.25 14.26 allbutself = cpu_online_map; 14.27 - cpu_clear(smp_processor_id(), allbutself); 14.28 + cur_cpu = smp_processor_id(); 14.29 + cpu_clear(cur_cpu, allbutself); 14.30 nr_cpus = cpus_weight(allbutself); 14.31 14.32 if ( nr_cpus == 0 ) 14.33 { 14.34 - BUG_ON(cpu != smp_processor_id()); 14.35 + BUG_ON(!cpu_isset(cur_cpu, cpus)); 14.36 return (*fn)(data); 14.37 } 14.38 14.39 @@ -91,7 +92,8 @@ int stop_machine_run(int (*fn)(void *), 14.40 stopmachine_data.fn = fn; 14.41 stopmachine_data.fn_data = data; 14.42 stopmachine_data.nr_cpus = nr_cpus; 14.43 - stopmachine_data.fn_cpu = cpu; 14.44 + stopmachine_data.fn_cpus = cpus; 14.45 + stopmachine_data.fn_result = 0; 14.46 atomic_set(&stopmachine_data.done, 0); 14.47 stopmachine_data.state = STOPMACHINE_START; 14.48 14.49 @@ -105,8 +107,13 @@ int stop_machine_run(int (*fn)(void *), 14.50 local_irq_disable(); 14.51 stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); 14.52 14.53 - if ( cpu == smp_processor_id() ) 14.54 - stopmachine_data.fn_result = (*fn)(data); 14.55 + /* callback will run on each cpu of the input map. 14.56 + * If callback fails on any CPU, the stop_machine_run 14.57 + * will return the *ORed* the failure 14.58 + */ 14.59 + if ( cpu_isset(cur_cpu, cpus) ){ 14.60 + stopmachine_data.fn_result |= (*fn)(data); 14.61 + } 14.62 stopmachine_set_state(STOPMACHINE_INVOKE); 14.63 ret = stopmachine_data.fn_result; 14.64 14.65 @@ -121,7 +128,6 @@ int stop_machine_run(int (*fn)(void *), 14.66 static void stopmachine_softirq(void) 14.67 { 14.68 enum stopmachine_state state = STOPMACHINE_START; 14.69 - 14.70 smp_mb(); 14.71 14.72 while ( state != STOPMACHINE_EXIT ) 14.73 @@ -136,10 +142,11 @@ static void stopmachine_softirq(void) 14.74 local_irq_disable(); 14.75 break; 14.76 case STOPMACHINE_INVOKE: 14.77 - if ( stopmachine_data.fn_cpu == smp_processor_id() ) 14.78 - stopmachine_data.fn_result = 14.79 + if ( cpu_isset(smp_processor_id(), stopmachine_data.fn_cpus )) { 14.80 + stopmachine_data.fn_result |= 14.81 stopmachine_data.fn(stopmachine_data.fn_data); 14.82 - break; 14.83 + } 14.84 + break; 14.85 default: 14.86 break; 14.87 }
15.1 --- a/xen/include/asm-x86/apicdef.h Fri Dec 19 14:56:36 2008 +0000 15.2 +++ b/xen/include/asm-x86/apicdef.h Mon Dec 22 08:12:33 2008 +0000 15.3 @@ -80,6 +80,8 @@ 15.4 #define APIC_LVTTHMR 0x330 15.5 #define APIC_LVTPC 0x340 15.6 #define APIC_LVT0 0x350 15.7 +#define APIC_CMCI 0x2F0 15.8 + 15.9 #define APIC_LVT_TIMER_BASE_MASK (0x3<<18) 15.10 #define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) 15.11 #define SET_APIC_TIMER_BASE(x) (((x)<<18))
16.1 --- a/xen/include/asm-x86/config.h Fri Dec 19 14:56:36 2008 +0000 16.2 +++ b/xen/include/asm-x86/config.h Mon Dec 22 08:12:33 2008 +0000 16.3 @@ -22,7 +22,7 @@ 16.4 #define CONFIG_X86_IO_APIC 1 16.5 #define CONFIG_X86_PM_TIMER 1 16.6 #define CONFIG_HPET_TIMER 1 16.7 -#define CONFIG_X86_MCE_P4THERMAL 1 16.8 +#define CONFIG_X86_MCE_THERMAL 1 16.9 #define CONFIG_NUMA 1 16.10 #define CONFIG_DISCONTIGMEM 1 16.11 #define CONFIG_NUMA_EMU 1
17.1 --- a/xen/include/asm-x86/irq.h Fri Dec 19 14:56:36 2008 +0000 17.2 +++ b/xen/include/asm-x86/irq.h Mon Dec 22 08:12:33 2008 +0000 17.3 @@ -33,6 +33,7 @@ fastcall void error_interrupt(void); 17.4 fastcall void pmu_apic_interrupt(void); 17.5 fastcall void spurious_interrupt(void); 17.6 fastcall void thermal_interrupt(void); 17.7 +fastcall void cmci_interrupt(void); 17.8 17.9 void disable_8259A_irq(unsigned int irq); 17.10 void enable_8259A_irq(unsigned int irq);
18.1 --- a/xen/include/asm-x86/mach-default/irq_vectors.h Fri Dec 19 14:56:36 2008 +0000 18.2 +++ b/xen/include/asm-x86/mach-default/irq_vectors.h Mon Dec 22 08:12:33 2008 +0000 18.3 @@ -10,13 +10,13 @@ 18.4 #define THERMAL_APIC_VECTOR 0xfa 18.5 #define LOCAL_TIMER_VECTOR 0xf9 18.6 #define PMU_APIC_VECTOR 0xf8 18.7 - 18.8 +#define CMCI_APIC_VECTOR 0xf7 18.9 /* 18.10 * High-priority dynamically-allocated vectors. For interrupts that 18.11 * must be higher priority than any guest-bound interrupt. 18.12 */ 18.13 #define FIRST_HIPRIORITY_VECTOR 0xf0 18.14 -#define LAST_HIPRIORITY_VECTOR 0xf7 18.15 +#define LAST_HIPRIORITY_VECTOR 0xf6 18.16 18.17 /* Legacy PIC uses vectors 0xe0-0xef. */ 18.18 #define FIRST_LEGACY_VECTOR 0xe0
19.1 --- a/xen/include/asm-x86/msr-index.h Fri Dec 19 14:56:36 2008 +0000 19.2 +++ b/xen/include/asm-x86/msr-index.h Mon Dec 22 08:12:33 2008 +0000 19.3 @@ -92,8 +92,10 @@ 19.4 #define MSR_IA32_MC0_STATUS 0x00000401 19.5 #define MSR_IA32_MC0_ADDR 0x00000402 19.6 #define MSR_IA32_MC0_MISC 0x00000403 19.7 +#define MSR_IA32_MC0_CTL2 0x00000280 19.8 +#define CMCI_EN (1UL<<30) 19.9 +#define CMCI_THRESHOLD_MASK 0x7FFF 19.10 19.11 -#define MSR_IA32_MC1_CTL 0x00000404 19.12 #define MSR_IA32_MC1_STATUS 0x00000405 19.13 #define MSR_IA32_MC1_ADDR 0x00000406 19.14 #define MSR_IA32_MC1_MISC 0x00000407
20.1 --- a/xen/include/asm-x86/smp.h Fri Dec 19 14:56:36 2008 +0000 20.2 +++ b/xen/include/asm-x86/smp.h Mon Dec 22 08:12:33 2008 +0000 20.3 @@ -101,7 +101,7 @@ static __inline int logical_smp_processo 20.4 20.5 #endif 20.6 20.7 -extern int __cpu_disable(void); 20.8 +extern int __cpu_disable(int down_cpu); 20.9 extern void __cpu_die(unsigned int cpu); 20.10 #endif /* !__ASSEMBLY__ */ 20.11
21.1 --- a/xen/include/public/arch-x86/xen-mca.h Fri Dec 19 14:56:36 2008 +0000 21.2 +++ b/xen/include/public/arch-x86/xen-mca.h Mon Dec 22 08:12:33 2008 +0000 21.3 @@ -106,7 +106,10 @@ struct mcinfo_common { 21.4 21.5 #define MC_FLAG_CORRECTABLE (1 << 0) 21.6 #define MC_FLAG_UNCORRECTABLE (1 << 1) 21.7 - 21.8 +#define MC_FLAG_RECOVERABLE (1 << 2) 21.9 +#define MC_FLAG_POLLED (1 << 3) 21.10 +#define MC_FLAG_RESET (1 << 4) 21.11 +#define MC_FLAG_CMCI (1 << 5) 21.12 /* contains global x86 mc information */ 21.13 struct mcinfo_global { 21.14 struct mcinfo_common common; 21.15 @@ -115,6 +118,7 @@ struct mcinfo_global { 21.16 uint16_t mc_domid; 21.17 uint32_t mc_socketid; /* physical socket of the physical core */ 21.18 uint16_t mc_coreid; /* physical impacted core */ 21.19 + uint8_t mc_apicid; 21.20 uint16_t mc_core_threadid; /* core thread of physical core */ 21.21 uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ 21.22 uint64_t mc_gstatus; /* global status */ 21.23 @@ -132,6 +136,8 @@ struct mcinfo_bank { 21.24 uint64_t mc_addr; /* bank address, only valid 21.25 * if addr bit is set in mc_status */ 21.26 uint64_t mc_misc; 21.27 + uint64_t mc_ctrl2; 21.28 + uint64_t mc_tsc; 21.29 }; 21.30 21.31 21.32 @@ -150,7 +156,12 @@ struct mcinfo_extended { 21.33 * multiple times. */ 21.34 21.35 uint32_t mc_msrs; /* Number of msr with valid values. */ 21.36 - struct mcinfo_msr mc_msr[5]; 21.37 + /* 21.38 + * Currently Intel extended MSR (32/64) including all gp registers 21.39 + * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10 21.40 + * of them might be useful. So expend this array to 10. 21.41 + */ 21.42 + struct mcinfo_msr mc_msr[10]; 21.43 }; 21.44 21.45 #define MCINFO_HYPERCALLSIZE 1024
22.1 --- a/xen/include/xen/stop_machine.h Fri Dec 19 14:56:36 2008 +0000 22.2 +++ b/xen/include/xen/stop_machine.h Mon Dec 22 08:12:33 2008 +0000 22.3 @@ -5,7 +5,7 @@ 22.4 * stop_machine_run: freeze the machine on all CPUs and run this function 22.5 * @fn: the function to run 22.6 * @data: the data ptr for the @fn() 22.7 - * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS). 22.8 + * @cpus: cpus to run @fn() on. 22.9 * 22.10 * Description: This causes every other cpu to enter a safe point, with 22.11 * each of which disables interrupts, and finally interrupts are disabled 22.12 @@ -14,6 +14,6 @@ 22.13 * 22.14 * This can be thought of as a very heavy write lock, equivalent to 22.15 * grabbing every spinlock in the kernel. */ 22.16 -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); 22.17 +int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu); 22.18 22.19 #endif /* __XEN_STOP_MACHINE_H__ */