/root/src/xen/xen/arch/x86/cpu/mcheck/mce_intel.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <xen/init.h> |
2 | | #include <xen/types.h> |
3 | | #include <xen/irq.h> |
4 | | #include <xen/event.h> |
5 | | #include <xen/kernel.h> |
6 | | #include <xen/delay.h> |
7 | | #include <xen/smp.h> |
8 | | #include <xen/mm.h> |
9 | | #include <xen/cpu.h> |
10 | | #include <asm/processor.h> |
11 | | #include <public/sysctl.h> |
12 | | #include <asm/system.h> |
13 | | #include <asm/msr.h> |
14 | | #include <asm/p2m.h> |
15 | | #include <asm/mce.h> |
16 | | #include <asm/apic.h> |
17 | | #include "mce.h" |
18 | | #include "x86_mca.h" |
19 | | #include "barrier.h" |
20 | | #include "util.h" |
21 | | #include "vmce.h" |
22 | | #include "mcaction.h" |
23 | | |
24 | | static DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, mce_banks_owned); |
25 | | bool __read_mostly cmci_support; |
26 | | static bool __read_mostly ser_support; |
27 | | static bool __read_mostly mce_force_broadcast; |
28 | | boolean_param("mce_fb", mce_force_broadcast); |
29 | | |
30 | | static int __read_mostly nr_intel_ext_msrs; |
31 | | |
32 | | /* If mce_force_broadcast == 1, lmce_support will be disabled forcibly. */ |
33 | | bool __read_mostly lmce_support; |
34 | | |
35 | | /* Intel SDM define bit15~bit0 of IA32_MCi_STATUS as the MC error code */ |
36 | 0 | #define INTEL_MCCOD_MASK 0xFFFF |
37 | | |
38 | | /* |
39 | | * Currently Intel SDM define 2 kinds of srao errors: |
40 | | * 1). Memory scrubbing error, error code = 0xC0 ~ 0xCF |
41 | | * 2). L3 explicit writeback error, error code = 0x17A |
42 | | */ |
43 | 0 | #define INTEL_SRAO_MEM_SCRUB 0xC0 ... 0xCF |
44 | 0 | #define INTEL_SRAO_L3_EWB 0x17A |
45 | | |
46 | | /* |
47 | | * Currently Intel SDM define 2 kinds of srar errors: |
48 | | * 1). Data Load error, error code = 0x134 |
49 | | * 2). Instruction Fetch error, error code = 0x150 |
50 | | */ |
51 | 0 | #define INTEL_SRAR_DATA_LOAD 0x134 |
52 | 0 | #define INTEL_SRAR_INSTR_FETCH 0x150 |
53 | | |
54 | | #ifdef CONFIG_X86_MCE_THERMAL |
55 | 0 | #define MCE_RING 0x1 |
56 | | static DEFINE_PER_CPU(int, last_state); |
57 | | |
58 | | static void intel_thermal_interrupt(struct cpu_user_regs *regs) |
59 | 0 | { |
60 | 0 | uint64_t msr_content; |
61 | 0 | unsigned int cpu = smp_processor_id(); |
62 | 0 | static DEFINE_PER_CPU(s_time_t, next); |
63 | 0 | int *this_last_state; |
64 | 0 |
|
65 | 0 | ack_APIC_irq(); |
66 | 0 |
|
67 | 0 | if ( NOW() < per_cpu(next, cpu) ) |
68 | 0 | return; |
69 | 0 |
|
70 | 0 | per_cpu(next, cpu) = NOW() + MILLISECS(5000); |
71 | 0 | rdmsrl(MSR_IA32_THERM_STATUS, msr_content); |
72 | 0 | this_last_state = &per_cpu(last_state, cpu); |
73 | 0 | if ( *this_last_state == (msr_content & MCE_RING) ) |
74 | 0 | return; |
75 | 0 | *this_last_state = msr_content & MCE_RING; |
76 | 0 | if ( msr_content & MCE_RING ) |
77 | 0 | { |
78 | 0 | printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu); |
79 | 0 | printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu); |
80 | 0 | add_taint(TAINT_MACHINE_CHECK); |
81 | 0 | } else |
82 | 0 | printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu); |
83 | 0 | } |
84 | | |
85 | | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ |
86 | | static bool intel_thermal_supported(struct cpuinfo_x86 *c) |
87 | 13 | { |
88 | 13 | if ( !cpu_has_apic ) |
89 | 0 | return false; |
90 | 13 | if ( !cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_TM1) ) |
91 | 0 | return false; |
92 | 13 | return true; |
93 | 13 | } |
94 | | |
95 | | static u32 __read_mostly lvtthmr_init; |
96 | | |
97 | | static void __init mcheck_intel_therm_init(void) |
98 | 1 | { |
99 | 1 | /* |
100 | 1 | * This function is only called on boot CPU. Save the init thermal |
101 | 1 | * LVT value on BSP and use that value to restore APs' thermal LVT |
102 | 1 | * entry BIOS programmed later |
103 | 1 | */ |
104 | 1 | if ( intel_thermal_supported(&boot_cpu_data) ) |
105 | 1 | lvtthmr_init = apic_read(APIC_LVTTHMR); |
106 | 1 | } |
107 | | |
108 | | /* P4/Xeon Thermal regulation detect and init */ |
109 | | static void intel_init_thermal(struct cpuinfo_x86 *c) |
110 | 12 | { |
111 | 12 | uint64_t msr_content; |
112 | 12 | uint32_t val; |
113 | 12 | int tm2 = 0; |
114 | 12 | unsigned int cpu = smp_processor_id(); |
115 | 12 | static uint8_t thermal_apic_vector; |
116 | 12 | |
117 | 12 | if ( !intel_thermal_supported(c) ) |
118 | 0 | return; /* -ENODEV */ |
119 | 12 | |
120 | 12 | /* first check if its enabled already, in which case there might |
121 | 12 | * be some SMM goo which handles it, so we can't even put a handler |
122 | 12 | * since it might be delivered via SMI already -zwanem. |
123 | 12 | */ |
124 | 12 | rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); |
125 | 12 | val = lvtthmr_init; |
126 | 12 | /* |
127 | 12 | * The initial value of thermal LVT entries on all APs always reads |
128 | 12 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI |
129 | 12 | * sequence to them and LVT registers are reset to 0s except for |
130 | 12 | * the mask bits which are set to 1s when APs receive INIT IPI. |
131 | 12 | * If BIOS takes over the thermal interrupt and sets its interrupt |
132 | 12 | * delivery mode to SMI (not fixed), it restores the value that the |
133 | 12 | * BIOS has programmed on AP based on BSP's info we saved (since BIOS |
134 | 12 | * is required to set the same value for all threads/cores). |
135 | 12 | */ |
136 | 12 | if ( (val & APIC_MODE_MASK) != APIC_DM_FIXED |
137 | 12 | || (val & APIC_VECTOR_MASK) > 0xf ) |
138 | 0 | apic_write(APIC_LVTTHMR, val); |
139 | 12 | |
140 | 12 | if ( (msr_content & (1ULL<<3)) |
141 | 12 | && (val & APIC_MODE_MASK) == APIC_DM_SMI ) |
142 | 0 | { |
143 | 0 | if ( c == &boot_cpu_data ) |
144 | 0 | printk(KERN_DEBUG "Thermal monitoring handled by SMI\n"); |
145 | 0 | return; /* -EBUSY */ |
146 | 0 | } |
147 | 12 | |
148 | 12 | if ( cpu_has(c, X86_FEATURE_TM2) && (msr_content & (1ULL << 13)) ) |
149 | 0 | tm2 = 1; |
150 | 12 | |
151 | 12 | /* check whether a vector already exists, temporarily masked? */ |
152 | 12 | if ( val & APIC_VECTOR_MASK ) |
153 | 0 | { |
154 | 0 | if ( c == &boot_cpu_data ) |
155 | 0 | printk(KERN_DEBUG "Thermal LVT vector (%#x) already installed\n", |
156 | 0 | val & APIC_VECTOR_MASK); |
157 | 0 | return; /* -EBUSY */ |
158 | 0 | } |
159 | 12 | |
160 | 12 | alloc_direct_apic_vector(&thermal_apic_vector, intel_thermal_interrupt); |
161 | 12 | |
162 | 12 | /* The temperature transition interrupt handler setup */ |
163 | 12 | val = thermal_apic_vector; /* our delivery vector */ |
164 | 12 | val |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ |
165 | 12 | apic_write(APIC_LVTTHMR, val); |
166 | 12 | |
167 | 12 | rdmsrl(MSR_IA32_THERM_INTERRUPT, msr_content); |
168 | 12 | wrmsrl(MSR_IA32_THERM_INTERRUPT, msr_content | 0x03); |
169 | 12 | |
170 | 12 | rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); |
171 | 12 | wrmsrl(MSR_IA32_MISC_ENABLE, msr_content | (1ULL<<3)); |
172 | 12 | |
173 | 12 | apic_write(APIC_LVTTHMR, val & ~APIC_LVT_MASKED); |
174 | 12 | if ( opt_cpu_info ) |
175 | 0 | printk(KERN_INFO "CPU%u: Thermal monitoring enabled (%s)\n", |
176 | 0 | cpu, tm2 ? "TM2" : "TM1"); |
177 | 12 | return; |
178 | 12 | } |
179 | | #endif /* CONFIG_X86_MCE_THERMAL */ |
180 | | |
181 | | /* Intel MCE handler */ |
182 | | static inline void intel_get_extended_msr(struct mcinfo_extended *ext, u32 msr) |
183 | 0 | { |
184 | 0 | if ( ext->mc_msrs < ARRAY_SIZE(ext->mc_msr) |
185 | 0 | && msr < MSR_IA32_MCG_EAX + nr_intel_ext_msrs ) |
186 | 0 | { |
187 | 0 | ext->mc_msr[ext->mc_msrs].reg = msr; |
188 | 0 | rdmsrl(msr, ext->mc_msr[ext->mc_msrs].value); |
189 | 0 | ++ext->mc_msrs; |
190 | 0 | } |
191 | 0 | } |
192 | | |
193 | | |
194 | | struct mcinfo_extended * |
195 | | intel_get_extended_msrs(struct mcinfo_global *mig, struct mc_info *mi) |
196 | 0 | { |
197 | 0 | struct mcinfo_extended *mc_ext; |
198 | 0 | int i; |
199 | 0 |
|
200 | 0 | /* |
201 | 0 | * According to spec, processor _support_ 64 bit will always |
202 | 0 | * have MSR beyond IA32_MCG_MISC |
203 | 0 | */ |
204 | 0 | if ( !mi|| !mig || nr_intel_ext_msrs == 0 || |
205 | 0 | !(mig->mc_gstatus & MCG_STATUS_EIPV) ) |
206 | 0 | return NULL; |
207 | 0 |
|
208 | 0 | mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED); |
209 | 0 | if ( !mc_ext ) |
210 | 0 | { |
211 | 0 | mi->flags |= MCINFO_FLAGS_UNCOMPLETE; |
212 | 0 | return NULL; |
213 | 0 | } |
214 | 0 |
|
215 | 0 | for ( i = MSR_IA32_MCG_EAX; i <= MSR_IA32_MCG_MISC; i++ ) |
216 | 0 | intel_get_extended_msr(mc_ext, i); |
217 | 0 |
|
218 | 0 | for ( i = MSR_IA32_MCG_R8; i <= MSR_IA32_MCG_R15; i++ ) |
219 | 0 | intel_get_extended_msr(mc_ext, i); |
220 | 0 |
|
221 | 0 | return mc_ext; |
222 | 0 | } |
223 | | |
224 | | enum intel_mce_type |
225 | | { |
226 | | intel_mce_invalid, |
227 | | intel_mce_fatal, |
228 | | intel_mce_corrected, |
229 | | intel_mce_ucr_ucna, |
230 | | intel_mce_ucr_srao, |
231 | | intel_mce_ucr_srar, |
232 | | }; |
233 | | |
234 | | static enum intel_mce_type intel_check_mce_type(uint64_t status) |
235 | 0 | { |
236 | 0 | if ( !(status & MCi_STATUS_VAL) ) |
237 | 0 | return intel_mce_invalid; |
238 | 0 |
|
239 | 0 | if ( status & MCi_STATUS_PCC ) |
240 | 0 | return intel_mce_fatal; |
241 | 0 |
|
242 | 0 | /* Corrected error? */ |
243 | 0 | if ( !(status & MCi_STATUS_UC) ) |
244 | 0 | return intel_mce_corrected; |
245 | 0 |
|
246 | 0 | if ( !ser_support ) |
247 | 0 | return intel_mce_fatal; |
248 | 0 |
|
249 | 0 | if ( status & MCi_STATUS_S ) |
250 | 0 | { |
251 | 0 | if ( status & MCi_STATUS_AR ) |
252 | 0 | { |
253 | 0 | if ( status & MCi_STATUS_OVER ) |
254 | 0 | return intel_mce_fatal; |
255 | 0 | else |
256 | 0 | return intel_mce_ucr_srar; |
257 | 0 | } else |
258 | 0 | return intel_mce_ucr_srao; |
259 | 0 | } |
260 | 0 | else |
261 | 0 | return intel_mce_ucr_ucna; |
262 | 0 |
|
263 | 0 | /* Any type not included abovoe ? */ |
264 | 0 | return intel_mce_fatal; |
265 | 0 | } |
266 | | |
267 | | static void intel_memerr_dhandler( |
268 | | struct mca_binfo *binfo, |
269 | | enum mce_result *result, |
270 | | const struct cpu_user_regs *regs) |
271 | 0 | { |
272 | 0 | mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); |
273 | 0 | mc_memerr_dhandler(binfo, result, regs); |
274 | 0 | } |
275 | | |
276 | | static bool intel_srar_check(uint64_t status) |
277 | 0 | { |
278 | 0 | return (intel_check_mce_type(status) == intel_mce_ucr_srar); |
279 | 0 | } |
280 | | |
281 | | static bool intel_checkaddr(uint64_t status, uint64_t misc, int addrtype) |
282 | 0 | { |
283 | 0 | if ( !(status & MCi_STATUS_ADDRV) || |
284 | 0 | !(status & MCi_STATUS_MISCV) || |
285 | 0 | ((misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) |
286 | 0 | /* addr is virtual */ |
287 | 0 | return (addrtype == MC_ADDR_VIRTUAL); |
288 | 0 |
|
289 | 0 | return (addrtype == MC_ADDR_PHYSICAL); |
290 | 0 | } |
291 | | |
292 | | static void intel_srar_dhandler( |
293 | | struct mca_binfo *binfo, |
294 | | enum mce_result *result, |
295 | | const struct cpu_user_regs *regs) |
296 | 0 | { |
297 | 0 | uint64_t status = binfo->mib->mc_status; |
298 | 0 |
|
299 | 0 | /* For unknown srar error code, reset system */ |
300 | 0 | *result = MCER_RESET; |
301 | 0 |
|
302 | 0 | switch ( status & INTEL_MCCOD_MASK ) |
303 | 0 | { |
304 | 0 | case INTEL_SRAR_DATA_LOAD: |
305 | 0 | case INTEL_SRAR_INSTR_FETCH: |
306 | 0 | intel_memerr_dhandler(binfo, result, regs); |
307 | 0 | break; |
308 | 0 | } |
309 | 0 | } |
310 | | |
311 | | static bool intel_srao_check(uint64_t status) |
312 | 0 | { |
313 | 0 | return (intel_check_mce_type(status) == intel_mce_ucr_srao); |
314 | 0 | } |
315 | | |
316 | | static void intel_srao_dhandler( |
317 | | struct mca_binfo *binfo, |
318 | | enum mce_result *result, |
319 | | const struct cpu_user_regs *regs) |
320 | 0 | { |
321 | 0 | uint64_t status = binfo->mib->mc_status; |
322 | 0 |
|
323 | 0 | /* For unknown srao error code, no action required */ |
324 | 0 | *result = MCER_CONTINUE; |
325 | 0 |
|
326 | 0 | if ( status & MCi_STATUS_VAL ) |
327 | 0 | { |
328 | 0 | switch ( status & INTEL_MCCOD_MASK ) |
329 | 0 | { |
330 | 0 | case INTEL_SRAO_MEM_SCRUB: |
331 | 0 | case INTEL_SRAO_L3_EWB: |
332 | 0 | intel_memerr_dhandler(binfo, result, regs); |
333 | 0 | break; |
334 | 0 | } |
335 | 0 | } |
336 | 0 | } |
337 | | |
338 | | static bool intel_default_check(uint64_t status) |
339 | 0 | { |
340 | 0 | return true; |
341 | 0 | } |
342 | | |
343 | | static void intel_default_mce_dhandler( |
344 | | struct mca_binfo *binfo, |
345 | | enum mce_result *result, |
346 | | const struct cpu_user_regs * regs) |
347 | 0 | { |
348 | 0 | uint64_t status = binfo->mib->mc_status; |
349 | 0 | enum intel_mce_type type; |
350 | 0 |
|
351 | 0 | type = intel_check_mce_type(status); |
352 | 0 |
|
353 | 0 | if ( type == intel_mce_fatal ) |
354 | 0 | *result = MCER_RESET; |
355 | 0 | else |
356 | 0 | *result = MCER_CONTINUE; |
357 | 0 | } |
358 | | |
359 | | static const struct mca_error_handler intel_mce_dhandlers[] = { |
360 | | {intel_srao_check, intel_srao_dhandler}, |
361 | | {intel_srar_check, intel_srar_dhandler}, |
362 | | {intel_default_check, intel_default_mce_dhandler} |
363 | | }; |
364 | | |
365 | | static void intel_default_mce_uhandler( |
366 | | struct mca_binfo *binfo, |
367 | | enum mce_result *result, |
368 | | const struct cpu_user_regs *regs) |
369 | 0 | { |
370 | 0 | uint64_t status = binfo->mib->mc_status; |
371 | 0 | enum intel_mce_type type; |
372 | 0 |
|
373 | 0 | type = intel_check_mce_type(status); |
374 | 0 |
|
375 | 0 | switch ( type ) |
376 | 0 | { |
377 | 0 | case intel_mce_fatal: |
378 | 0 | *result = MCER_RESET; |
379 | 0 | break; |
380 | 0 |
|
381 | 0 | default: |
382 | 0 | *result = MCER_CONTINUE; |
383 | 0 | break; |
384 | 0 | } |
385 | 0 | } |
386 | | |
387 | | static const struct mca_error_handler intel_mce_uhandlers[] = { |
388 | | {intel_default_check, intel_default_mce_uhandler} |
389 | | }; |
390 | | |
391 | | /* According to MCA OS writer guide, CMCI handler need to clear bank when |
392 | | * 1) CE (UC = 0) |
393 | | * 2) ser_support = 1, Superious error, OVER = 0, EN = 0, [UC = 1] |
394 | | * 3) ser_support = 1, UCNA, OVER = 0, S = 1, AR = 0, PCC = 0, [UC = 1, EN = 1] |
395 | | * MCA handler need to clear bank when |
396 | | * 1) ser_support = 1, Superious error, OVER = 0, EN = 0, UC = 1 |
397 | | * 2) ser_support = 1, SRAR, UC = 1, OVER = 0, S = 1, AR = 1, [EN = 1] |
398 | | * 3) ser_support = 1, SRAO, UC = 1, S = 1, AR = 0, [EN = 1] |
399 | | */ |
400 | | |
401 | | static bool intel_need_clearbank_scan(enum mca_source who, u64 status) |
402 | 0 | { |
403 | 0 | if ( who == MCA_CMCI_HANDLER ) |
404 | 0 | { |
405 | 0 | /* CMCI need clear bank */ |
406 | 0 | if ( !(status & MCi_STATUS_UC) ) |
407 | 0 | return true; |
408 | 0 | /* Spurious need clear bank */ |
409 | 0 | else if ( ser_support && !(status & MCi_STATUS_OVER) |
410 | 0 | && !(status & MCi_STATUS_EN) ) |
411 | 0 | return true; |
412 | 0 | /* UCNA OVER = 0 need clear bank */ |
413 | 0 | else if ( ser_support && !(status & MCi_STATUS_OVER) |
414 | 0 | && !(status & MCi_STATUS_PCC) && !(status & MCi_STATUS_S) |
415 | 0 | && !(status & MCi_STATUS_AR) ) |
416 | 0 | return true; |
417 | 0 | /* Only Log, no clear */ |
418 | 0 | else return false; |
419 | 0 | } |
420 | 0 | else if ( who == MCA_MCE_SCAN ) |
421 | 0 | { |
422 | 0 | if ( !ser_support ) |
423 | 0 | return false; |
424 | 0 | /* |
425 | 0 | * For fatal error, it shouldn't be cleared so that sticky bank |
426 | 0 | * have chance to be handled after reboot by polling |
427 | 0 | */ |
428 | 0 | if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) ) |
429 | 0 | return false; |
430 | 0 | /* Spurious need clear bank */ |
431 | 0 | else if ( !(status & MCi_STATUS_OVER) |
432 | 0 | && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN) ) |
433 | 0 | return true; |
434 | 0 | /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */ |
435 | 0 | else if ( (status & MCi_STATUS_UC) |
436 | 0 | && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR) |
437 | 0 | && !(status & MCi_STATUS_OVER) ) |
438 | 0 | return true; |
439 | 0 | /* SRAO need clear bank */ |
440 | 0 | else if ( !(status & MCi_STATUS_AR) |
441 | 0 | && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC) ) |
442 | 0 | return true; |
443 | 0 | else |
444 | 0 | return false; |
445 | 0 | } |
446 | 0 |
|
447 | 0 | return true; |
448 | 0 | } |
449 | | |
450 | | /* |
451 | | * MCE continues/is recoverable when |
452 | | * 1) CE UC = 0 |
453 | | * 2) Supious ser_support = 1, OVER = 0, En = 0 [UC = 1] |
454 | | * 3) SRAR ser_support = 1, OVER = 0, PCC = 0, S = 1, AR = 1 [UC =1, EN = 1] |
455 | | * 4) SRAO ser_support = 1, PCC = 0, S = 1, AR = 0, EN = 1 [UC = 1] |
456 | | * 5) UCNA ser_support = 1, OVER = 0, EN = 1, PCC = 0, S = 0, AR = 0, [UC = 1] |
457 | | */ |
458 | | static bool intel_recoverable_scan(uint64_t status) |
459 | 0 | { |
460 | 0 |
|
461 | 0 | if ( !(status & MCi_STATUS_UC ) ) |
462 | 0 | return true; |
463 | 0 | else if ( ser_support && !(status & MCi_STATUS_EN) |
464 | 0 | && !(status & MCi_STATUS_OVER) ) |
465 | 0 | return true; |
466 | 0 | /* SRAR error */ |
467 | 0 | else if ( ser_support && !(status & MCi_STATUS_OVER) |
468 | 0 | && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S) |
469 | 0 | && (status & MCi_STATUS_AR) && (status & MCi_STATUS_EN) ) |
470 | 0 | return true; |
471 | 0 | /* SRAO error */ |
472 | 0 | else if ( ser_support && !(status & MCi_STATUS_PCC) |
473 | 0 | && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR) |
474 | 0 | && (status & MCi_STATUS_EN) ) |
475 | 0 | return true; |
476 | 0 | /* UCNA error */ |
477 | 0 | else if ( ser_support && !(status & MCi_STATUS_OVER) |
478 | 0 | && (status & MCi_STATUS_EN) && !(status & MCi_STATUS_PCC) |
479 | 0 | && !(status & MCi_STATUS_S) && !(status & MCi_STATUS_AR) ) |
480 | 0 | return true; |
481 | 0 | return false; |
482 | 0 | } |
483 | | |
484 | | /* CMCI */ |
485 | | static DEFINE_SPINLOCK(cmci_discover_lock); |
486 | | |
487 | | /* |
488 | | * Discover bank sharing using the algorithm recommended in the SDM. |
489 | | */ |
490 | | static int do_cmci_discover(int i) |
491 | 276 | { |
492 | 276 | unsigned msr = MSR_IA32_MCx_CTL2(i); |
493 | 276 | u64 val; |
494 | 276 | unsigned int threshold, max_threshold; |
495 | 276 | static unsigned int cmci_threshold = 2; |
496 | 276 | integer_param("cmci-threshold", cmci_threshold); |
497 | 276 | |
498 | 276 | rdmsrl(msr, val); |
499 | 276 | /* Some other CPU already owns this bank. */ |
500 | 276 | if ( val & CMCI_EN ) |
501 | 161 | { |
502 | 161 | mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); |
503 | 161 | goto out; |
504 | 161 | } |
505 | 276 | |
506 | 115 | if ( cmci_threshold ) |
507 | 115 | { |
508 | 115 | wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); |
509 | 115 | rdmsrl(msr, val); |
510 | 115 | } |
511 | 115 | |
512 | 115 | if ( !(val & CMCI_EN) ) |
513 | 84 | { |
514 | 84 | /* This bank does not support CMCI. Polling timer has to handle it. */ |
515 | 84 | mcabanks_set(i, __get_cpu_var(no_cmci_banks)); |
516 | 84 | wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK); |
517 | 84 | return 0; |
518 | 84 | } |
519 | 31 | max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); |
520 | 31 | threshold = cmci_threshold; |
521 | 31 | if ( threshold > max_threshold ) |
522 | 0 | { |
523 | 0 | mce_printk(MCE_QUIET, |
524 | 0 | "CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n", |
525 | 0 | threshold, smp_processor_id(), i, max_threshold); |
526 | 0 | threshold = max_threshold; |
527 | 0 | } |
528 | 31 | wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); |
529 | 31 | mcabanks_set(i, __get_cpu_var(mce_banks_owned)); |
530 | 192 | out: |
531 | 192 | mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); |
532 | 192 | return 1; |
533 | 31 | } |
534 | | |
535 | | static void cmci_discover(void) |
536 | 12 | { |
537 | 12 | unsigned long flags; |
538 | 12 | int i; |
539 | 12 | mctelem_cookie_t mctc; |
540 | 12 | struct mca_summary bs; |
541 | 12 | |
542 | 12 | mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id()); |
543 | 12 | |
544 | 12 | spin_lock_irqsave(&cmci_discover_lock, flags); |
545 | 12 | |
546 | 288 | for ( i = 0; i < nr_mce_banks; i++ ) |
547 | 276 | if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) ) |
548 | 276 | do_cmci_discover(i); |
549 | 12 | |
550 | 12 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
551 | 12 | |
552 | 12 | /* |
553 | 12 | * In case CMCI happended when do owner change. |
554 | 12 | * If CMCI happened yet not processed immediately, |
555 | 12 | * MCi_status (error_count bit 38~52) is not cleared, |
556 | 12 | * the CMCI interrupt will never be triggered again. |
557 | 12 | */ |
558 | 12 | |
559 | 12 | mctc = mcheck_mca_logout( |
560 | 12 | MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL); |
561 | 12 | |
562 | 12 | if ( bs.errcnt && mctc != NULL ) |
563 | 0 | { |
564 | 0 | if ( dom0_vmce_enabled() ) |
565 | 0 | { |
566 | 0 | mctelem_commit(mctc); |
567 | 0 | send_global_virq(VIRQ_MCA); |
568 | 0 | } |
569 | 0 | else |
570 | 0 | { |
571 | 0 | x86_mcinfo_dump(mctelem_dataptr(mctc)); |
572 | 0 | mctelem_dismiss(mctc); |
573 | 0 | } |
574 | 0 | } |
575 | 12 | else if ( mctc != NULL ) |
576 | 0 | mctelem_dismiss(mctc); |
577 | 12 | |
578 | 12 | mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", |
579 | 0 | smp_processor_id(), |
580 | 0 | *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map), |
581 | 0 | *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map)); |
582 | 12 | } |
583 | | |
584 | | /* |
585 | | * Define an owner for each bank. Banks can be shared between CPUs |
586 | | * and to avoid reporting events multiple times always set up one |
587 | | * CPU as owner. |
588 | | * |
589 | | * The assignment has to be redone when CPUs go offline and |
590 | | * any of the owners goes away. Also pollers run in parallel so we |
591 | | * have to be careful to update the banks in a way that doesn't |
592 | | * lose or duplicate events. |
593 | | */ |
594 | | |
595 | | static void mce_set_owner(void) |
596 | 12 | { |
597 | 12 | if ( !cmci_support || !opt_mce ) |
598 | 0 | return; |
599 | 12 | |
600 | 12 | cmci_discover(); |
601 | 12 | } |
602 | | |
603 | | static void __cpu_mcheck_distribute_cmci(void *unused) |
604 | 0 | { |
605 | 0 | cmci_discover(); |
606 | 0 | } |
607 | | |
608 | | static void cpu_mcheck_distribute_cmci(void) |
609 | 0 | { |
610 | 0 | if ( cmci_support && opt_mce ) |
611 | 0 | on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0); |
612 | 0 | } |
613 | | |
614 | | static void clear_cmci(void) |
615 | 0 | { |
616 | 0 | int i; |
617 | 0 |
|
618 | 0 | if ( !cmci_support || !opt_mce ) |
619 | 0 | return; |
620 | 0 |
|
621 | 0 | mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n", |
622 | 0 | smp_processor_id()); |
623 | 0 |
|
624 | 0 | for ( i = 0; i < nr_mce_banks; i++ ) |
625 | 0 | { |
626 | 0 | unsigned msr = MSR_IA32_MCx_CTL2(i); |
627 | 0 | u64 val; |
628 | 0 | if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) ) |
629 | 0 | continue; |
630 | 0 | rdmsrl(msr, val); |
631 | 0 | if ( val & (CMCI_EN|CMCI_THRESHOLD_MASK) ) |
632 | 0 | wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK)); |
633 | 0 | mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); |
634 | 0 | } |
635 | 0 | } |
636 | | |
637 | | static void cpu_mcheck_disable(void) |
638 | 0 | { |
639 | 0 | clear_in_cr4(X86_CR4_MCE); |
640 | 0 |
|
641 | 0 | if ( cmci_support && opt_mce ) |
642 | 0 | clear_cmci(); |
643 | 0 | } |
644 | | |
645 | | static void cmci_interrupt(struct cpu_user_regs *regs) |
646 | 0 | { |
647 | 0 | mctelem_cookie_t mctc; |
648 | 0 | struct mca_summary bs; |
649 | 0 |
|
650 | 0 | ack_APIC_irq(); |
651 | 0 |
|
652 | 0 | mctc = mcheck_mca_logout( |
653 | 0 | MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL); |
654 | 0 |
|
655 | 0 | if ( bs.errcnt && mctc != NULL ) |
656 | 0 | { |
657 | 0 | if ( dom0_vmce_enabled() ) |
658 | 0 | { |
659 | 0 | mctelem_commit(mctc); |
660 | 0 | mce_printk(MCE_VERBOSE, "CMCI: send CMCI to DOM0 through virq\n"); |
661 | 0 | send_global_virq(VIRQ_MCA); |
662 | 0 | } |
663 | 0 | else |
664 | 0 | { |
665 | 0 | x86_mcinfo_dump(mctelem_dataptr(mctc)); |
666 | 0 | mctelem_dismiss(mctc); |
667 | 0 | } |
668 | 0 | } |
669 | 0 | else if ( mctc != NULL ) |
670 | 0 | mctelem_dismiss(mctc); |
671 | 0 | } |
672 | | |
673 | | static void intel_init_cmci(struct cpuinfo_x86 *c) |
674 | 12 | { |
675 | 12 | u32 l, apic; |
676 | 12 | int cpu = smp_processor_id(); |
677 | 12 | |
678 | 12 | if ( !mce_available(c) || !cmci_support ) |
679 | 0 | { |
680 | 0 | if ( opt_cpu_info ) |
681 | 0 | mce_printk(MCE_QUIET, "CMCI: CPU%d has no CMCI support\n", cpu); |
682 | 0 | return; |
683 | 0 | } |
684 | 12 | |
685 | 12 | apic = apic_read(APIC_CMCI); |
686 | 12 | if ( apic & APIC_VECTOR_MASK ) |
687 | 0 | { |
688 | 0 | mce_printk(MCE_QUIET, "CPU%d CMCI LVT vector (%#x) already installed\n", |
689 | 0 | cpu, ( apic & APIC_VECTOR_MASK )); |
690 | 0 | return; |
691 | 0 | } |
692 | 12 | |
693 | 12 | alloc_direct_apic_vector(&cmci_apic_vector, cmci_interrupt); |
694 | 12 | |
695 | 12 | apic = cmci_apic_vector; |
696 | 12 | apic |= (APIC_DM_FIXED | APIC_LVT_MASKED); |
697 | 12 | apic_write(APIC_CMCI, apic); |
698 | 12 | |
699 | 12 | l = apic_read(APIC_CMCI); |
700 | 12 | apic_write(APIC_CMCI, l & ~APIC_LVT_MASKED); |
701 | 12 | |
702 | 12 | mce_set_owner(); |
703 | 12 | } |
704 | | |
705 | | /* MCA */ |
706 | | |
707 | | static bool mce_is_broadcast(struct cpuinfo_x86 *c) |
708 | 12 | { |
709 | 12 | if ( mce_force_broadcast ) |
710 | 0 | return true; |
711 | 12 | |
712 | 12 | /* |
713 | 12 | * According to Intel SDM Dec, 2009, 15.10.4.1, For processors with |
714 | 12 | * DisplayFamily_DisplayModel encoding of 06H_EH and above, |
715 | 12 | * a MCA signal is broadcast to all logical processors in the system |
716 | 12 | */ |
717 | 12 | if ( c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 && |
718 | 12 | c->x86_model >= 0xe ) |
719 | 12 | return true; |
720 | 0 | return false; |
721 | 12 | } |
722 | | |
723 | | static bool intel_enable_lmce(void) |
724 | 0 | { |
725 | 0 | uint64_t msr_content; |
726 | 0 |
|
727 | 0 | /* |
728 | 0 | * Section "Enabling Local Machine Check" in Intel SDM Vol 3 |
729 | 0 | * requires software must ensure the LOCK bit and LMCE_ON bit |
730 | 0 | * of MSR_IA32_FEATURE_CONTROL are set before setting |
731 | 0 | * MSR_IA32_MCG_EXT_CTL.LMCE_EN. |
732 | 0 | */ |
733 | 0 |
|
734 | 0 | if ( rdmsr_safe(MSR_IA32_FEATURE_CONTROL, msr_content) ) |
735 | 0 | return false; |
736 | 0 |
|
737 | 0 | if ( (msr_content & IA32_FEATURE_CONTROL_LOCK) && |
738 | 0 | (msr_content & IA32_FEATURE_CONTROL_LMCE_ON) ) |
739 | 0 | { |
740 | 0 | wrmsrl(MSR_IA32_MCG_EXT_CTL, MCG_EXT_CTL_LMCE_EN); |
741 | 0 | return true; |
742 | 0 | } |
743 | 0 |
|
744 | 0 | return false; |
745 | 0 | } |
746 | | |
747 | | /* Check and init MCA */ |
748 | | static void intel_init_mca(struct cpuinfo_x86 *c) |
749 | 12 | { |
750 | 12 | bool broadcast, cmci = false, ser = false, lmce = false; |
751 | 12 | int ext_num = 0, first; |
752 | 12 | uint64_t msr_content; |
753 | 12 | |
754 | 12 | broadcast = mce_is_broadcast(c); |
755 | 12 | |
756 | 12 | rdmsrl(MSR_IA32_MCG_CAP, msr_content); |
757 | 12 | |
758 | 12 | if ( (msr_content & MCG_CMCI_P) && cpu_has_apic ) |
759 | 12 | cmci = true; |
760 | 12 | |
761 | 12 | /* Support Software Error Recovery */ |
762 | 12 | if ( msr_content & MCG_SER_P ) |
763 | 12 | ser = true; |
764 | 12 | |
765 | 12 | if ( msr_content & MCG_EXT_P ) |
766 | 0 | ext_num = (msr_content >> MCG_EXT_CNT) & 0xff; |
767 | 12 | |
768 | 12 | first = mce_firstbank(c); |
769 | 12 | |
770 | 12 | if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) ) |
771 | 0 | lmce = intel_enable_lmce(); |
772 | 12 | |
773 | 4 | #define CAP(enabled, name) ((enabled) ? ", " name : "") |
774 | 12 | if ( smp_processor_id() == 0 ) |
775 | 1 | { |
776 | 1 | dprintk(XENLOG_INFO, |
777 | 1 | "MCA Capability: firstbank %d, extended MCE MSR %d%s%s%s%s\n", |
778 | 1 | first, ext_num, |
779 | 1 | CAP(broadcast, "BCAST"), |
780 | 1 | CAP(ser, "SER"), |
781 | 1 | CAP(cmci, "CMCI"), |
782 | 1 | CAP(lmce, "LMCE")); |
783 | 1 | |
784 | 1 | mce_broadcast = broadcast; |
785 | 1 | cmci_support = cmci; |
786 | 1 | ser_support = ser; |
787 | 1 | lmce_support = lmce; |
788 | 1 | nr_intel_ext_msrs = ext_num; |
789 | 1 | firstbank = first; |
790 | 1 | } |
791 | 11 | else if ( cmci != cmci_support || ser != ser_support || |
792 | 11 | broadcast != mce_broadcast || |
793 | 11 | first != firstbank || ext_num != nr_intel_ext_msrs || |
794 | 11 | lmce != lmce_support ) |
795 | 0 | dprintk(XENLOG_WARNING, |
796 | 12 | "CPU%u has different MCA capability " |
797 | 12 | "(firstbank %d, extended MCE MSR %d%s%s%s%s)" |
798 | 12 | " than BSP, may cause undetermined result!!!\n", |
799 | 0 | smp_processor_id(), first, ext_num, |
800 | 0 | CAP(broadcast, "BCAST"), |
801 | 0 | CAP(ser, "SER"), |
802 | 0 | CAP(cmci, "CMCI"), |
803 | 0 | CAP(lmce, "LMCE")); |
804 | 12 | #undef CAP |
805 | 12 | } |
806 | | |
807 | | static void intel_mce_post_reset(void) |
808 | 12 | { |
809 | 12 | mctelem_cookie_t mctc; |
810 | 12 | struct mca_summary bs; |
811 | 12 | |
812 | 12 | mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs, NULL); |
813 | 12 | |
814 | 12 | /* in the boot up stage, print out and also log in DOM0 boot process */ |
815 | 12 | if ( bs.errcnt && mctc != NULL ) |
816 | 0 | { |
817 | 0 | x86_mcinfo_dump(mctelem_dataptr(mctc)); |
818 | 0 | mctelem_commit(mctc); |
819 | 0 | } |
820 | 12 | return; |
821 | 12 | } |
822 | | |
823 | | static void intel_init_mce(void) |
824 | 12 | { |
825 | 12 | uint64_t msr_content; |
826 | 12 | int i; |
827 | 12 | |
828 | 12 | intel_mce_post_reset(); |
829 | 12 | |
830 | 12 | /* clear all banks */ |
831 | 288 | for ( i = firstbank; i < nr_mce_banks; i++ ) |
832 | 276 | { |
833 | 276 | /* |
834 | 276 | * Some banks are shared across cores, use MCi_CTRL to judge whether |
835 | 276 | * this bank has been initialized by other cores already. |
836 | 276 | */ |
837 | 276 | rdmsrl(MSR_IA32_MCx_CTL(i), msr_content); |
838 | 276 | if ( !msr_content ) |
839 | 60 | { |
840 | 60 | /* if ctl is 0, this bank is never initialized */ |
841 | 60 | mce_printk(MCE_VERBOSE, "mce_init: init bank%d\n", i); |
842 | 60 | wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL); |
843 | 60 | wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL); |
844 | 60 | } |
845 | 276 | } |
846 | 12 | if ( firstbank ) /* if cmci enabled, firstbank = 0 */ |
847 | 0 | wrmsrl(MSR_IA32_MC0_STATUS, 0x0ULL); |
848 | 12 | |
849 | 12 | x86_mce_vector_register(mcheck_cmn_handler); |
850 | 12 | mce_recoverable_register(intel_recoverable_scan); |
851 | 12 | mce_need_clearbank_register(intel_need_clearbank_scan); |
852 | 12 | mce_register_addrcheck(intel_checkaddr); |
853 | 12 | |
854 | 12 | mce_dhandlers = intel_mce_dhandlers; |
855 | 12 | mce_dhandler_num = ARRAY_SIZE(intel_mce_dhandlers); |
856 | 12 | mce_uhandlers = intel_mce_uhandlers; |
857 | 12 | mce_uhandler_num = ARRAY_SIZE(intel_mce_uhandlers); |
858 | 12 | } |
859 | | |
860 | | static void cpu_mcabank_free(unsigned int cpu) |
861 | 0 | { |
862 | 0 | struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu); |
863 | 0 | struct mca_banks *owned = per_cpu(mce_banks_owned, cpu); |
864 | 0 |
|
865 | 0 | mcabanks_free(cmci); |
866 | 0 | mcabanks_free(owned); |
867 | 0 | } |
868 | | |
869 | | static int cpu_mcabank_alloc(unsigned int cpu) |
870 | 12 | { |
871 | 12 | struct mca_banks *cmci = mcabanks_alloc(); |
872 | 12 | struct mca_banks *owned = mcabanks_alloc(); |
873 | 12 | |
874 | 12 | if ( !cmci || !owned ) |
875 | 0 | goto out; |
876 | 12 | |
877 | 12 | per_cpu(no_cmci_banks, cpu) = cmci; |
878 | 12 | per_cpu(mce_banks_owned, cpu) = owned; |
879 | 12 | per_cpu(last_state, cpu) = -1; |
880 | 12 | |
881 | 12 | return 0; |
882 | 0 | out: |
883 | 0 | mcabanks_free(cmci); |
884 | 0 | mcabanks_free(owned); |
885 | 0 | return -ENOMEM; |
886 | 12 | } |
887 | | |
888 | | static int cpu_callback( |
889 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
890 | 33 | { |
891 | 33 | unsigned int cpu = (unsigned long)hcpu; |
892 | 33 | int rc = 0; |
893 | 33 | |
894 | 33 | switch ( action ) |
895 | 33 | { |
896 | 11 | case CPU_UP_PREPARE: |
897 | 11 | rc = cpu_mcabank_alloc(cpu); |
898 | 11 | break; |
899 | 11 | |
900 | 0 | case CPU_DYING: |
901 | 0 | cpu_mcheck_disable(); |
902 | 0 | break; |
903 | 11 | |
904 | 0 | case CPU_UP_CANCELED: |
905 | 0 | case CPU_DEAD: |
906 | 0 | cpu_mcheck_distribute_cmci(); |
907 | 0 | cpu_mcabank_free(cpu); |
908 | 0 | break; |
909 | 33 | } |
910 | 33 | |
911 | 33 | return !rc ? NOTIFY_DONE : notifier_from_errno(rc); |
912 | 33 | } |
913 | | |
914 | | static struct notifier_block cpu_nfb = { |
915 | | .notifier_call = cpu_callback |
916 | | }; |
917 | | |
918 | | /* p4/p6 family have similar MCA initialization process */ |
919 | | enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp) |
920 | 12 | { |
921 | 12 | if ( bsp ) |
922 | 1 | { |
923 | 1 | /* Early MCE initialisation for BSP. */ |
924 | 1 | if ( cpu_mcabank_alloc(0) ) |
925 | 0 | BUG(); |
926 | 1 | register_cpu_notifier(&cpu_nfb); |
927 | 1 | mcheck_intel_therm_init(); |
928 | 1 | } |
929 | 12 | |
930 | 12 | intel_init_mca(c); |
931 | 12 | |
932 | 12 | mce_handler_init(); |
933 | 12 | |
934 | 12 | intel_init_mce(); |
935 | 12 | |
936 | 12 | intel_init_cmci(c); |
937 | 12 | #ifdef CONFIG_X86_MCE_THERMAL |
938 | 12 | intel_init_thermal(c); |
939 | 12 | #endif |
940 | 12 | |
941 | 12 | return mcheck_intel; |
942 | 12 | } |
943 | | |
944 | | /* intel specific MCA MSR */ |
945 | | int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) |
946 | 72 | { |
947 | 72 | unsigned int bank = msr - MSR_IA32_MC0_CTL2; |
948 | 72 | |
949 | 72 | if ( bank < GUEST_MC_BANK_NUM ) |
950 | 72 | { |
951 | 72 | v->arch.vmce.bank[bank].mci_ctl2 = val; |
952 | 72 | mce_printk(MCE_VERBOSE, "MCE: wr MC%u_CTL2 %#"PRIx64"\n", bank, val); |
953 | 72 | } |
954 | 72 | |
955 | 72 | return 1; |
956 | 72 | } |
957 | | |
958 | | int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) |
959 | 71 | { |
960 | 71 | unsigned int bank = msr - MSR_IA32_MC0_CTL2; |
961 | 71 | |
962 | 71 | if ( bank < GUEST_MC_BANK_NUM ) |
963 | 71 | { |
964 | 71 | *val = v->arch.vmce.bank[bank].mci_ctl2; |
965 | 71 | mce_printk(MCE_VERBOSE, "MCE: rd MC%u_CTL2 %#"PRIx64"\n", bank, *val); |
966 | 71 | } |
967 | 71 | |
968 | 71 | return 1; |
969 | 71 | } |
970 | | |
971 | | bool vmce_has_lmce(const struct vcpu *v) |
972 | 0 | { |
973 | 0 | return v->arch.vmce.mcg_cap & MCG_LMCE_P; |
974 | 0 | } |