/root/src/xen/xen/arch/x86/cpu/mcheck/mce.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * mce.c - x86 Machine Check Exception Reporting |
3 | | * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> |
4 | | */ |
5 | | |
6 | | #include <xen/init.h> |
7 | | #include <xen/types.h> |
8 | | #include <xen/kernel.h> |
9 | | #include <xen/smp.h> |
10 | | #include <xen/errno.h> |
11 | | #include <xen/console.h> |
12 | | #include <xen/sched.h> |
13 | | #include <xen/sched-if.h> |
14 | | #include <xen/cpumask.h> |
15 | | #include <xen/event.h> |
16 | | #include <xen/guest_access.h> |
17 | | #include <xen/hypercall.h> /* for do_mca */ |
18 | | #include <xen/cpu.h> |
19 | | |
20 | | #include <asm/processor.h> |
21 | | #include <asm/setup.h> |
22 | | #include <asm/system.h> |
23 | | #include <asm/apic.h> |
24 | | #include <asm/msr.h> |
25 | | #include <asm/p2m.h> |
26 | | |
27 | | #include "mce.h" |
28 | | #include "barrier.h" |
29 | | #include "mcaction.h" |
30 | | #include "util.h" |
31 | | #include "vmce.h" |
32 | | |
33 | | bool __read_mostly opt_mce = true; |
34 | | boolean_param("mce", opt_mce); |
35 | | bool __read_mostly mce_broadcast; |
36 | | bool is_mc_panic; |
37 | | unsigned int __read_mostly nr_mce_banks; |
38 | | unsigned int __read_mostly firstbank; |
39 | | uint8_t __read_mostly cmci_apic_vector; |
40 | | |
41 | | DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, poll_bankmask); |
42 | | DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, no_cmci_banks); |
43 | | DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, mce_clear_banks); |
44 | | |
45 | | static void intpose_init(void); |
46 | | static void mcinfo_clear(struct mc_info *); |
47 | | struct mca_banks *mca_allbanks; |
48 | | |
49 | | #define SEG_PL(segsel) ((segsel) & 0x3) |
50 | 0 | #define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16) |
51 | | |
52 | | #if 0 |
53 | | #define x86_mcerr(fmt, err, args...) \ |
54 | | ({ \ |
55 | | int _err = (err); \ |
56 | | gdprintk(XENLOG_WARNING, "x86_mcerr: " fmt ", returning %d\n", \ |
57 | | ## args, _err); \ |
58 | | _err; \ |
59 | | }) |
60 | | #else |
61 | 0 | #define x86_mcerr(fmt, err, args...) (err) |
62 | | #endif |
63 | | |
64 | | int mce_verbosity; |
65 | | static int __init mce_set_verbosity(const char *str) |
66 | 0 | { |
67 | 0 | if ( strcmp("verbose", str) == 0 ) |
68 | 0 | mce_verbosity = MCE_VERBOSE; |
69 | 0 | else |
70 | 0 | return -EINVAL; |
71 | 0 |
|
72 | 0 | return 0; |
73 | 0 | } |
74 | | custom_param("mce_verbosity", mce_set_verbosity); |
75 | | |
76 | | /* Handle unconfigured int18 (should never happen) */ |
77 | | static void unexpected_machine_check(const struct cpu_user_regs *regs) |
78 | 0 | { |
79 | 0 | console_force_unlock(); |
80 | 0 | printk("Unexpected Machine Check Exception\n"); |
81 | 0 | fatal_trap(regs, 1); |
82 | 0 | } |
83 | | |
84 | | static x86_mce_vector_t _machine_check_vector = unexpected_machine_check; |
85 | | |
86 | | void x86_mce_vector_register(x86_mce_vector_t hdlr) |
87 | 12 | { |
88 | 12 | _machine_check_vector = hdlr; |
89 | 12 | wmb(); |
90 | 12 | } |
91 | | |
92 | | /* Call the installed machine check handler for this CPU setup. */ |
93 | | |
94 | | void do_machine_check(const struct cpu_user_regs *regs) |
95 | 0 | { |
96 | 0 | _machine_check_vector(regs); |
97 | 0 | } |
98 | | |
99 | | /* |
100 | | * Init machine check callback handler |
101 | | * It is used to collect additional information provided by newer |
102 | | * CPU families/models without the need to duplicate the whole handler. |
103 | | * This avoids having many handlers doing almost nearly the same and each |
104 | | * with its own tweaks ands bugs. |
105 | | */ |
106 | | static x86_mce_callback_t mc_callback_bank_extended = NULL; |
107 | | |
108 | | void x86_mce_callback_register(x86_mce_callback_t cbfunc) |
109 | 0 | { |
110 | 0 | mc_callback_bank_extended = cbfunc; |
111 | 0 | } |
112 | | |
113 | | /* |
114 | | * Machine check recoverable judgement callback handler |
115 | | * It is used to judge whether an UC error is recoverable by software |
116 | | */ |
117 | | static mce_recoverable_t mc_recoverable_scan = NULL; |
118 | | |
119 | | void mce_recoverable_register(mce_recoverable_t cbfunc) |
120 | 12 | { |
121 | 12 | mc_recoverable_scan = cbfunc; |
122 | 12 | } |
123 | | |
124 | | struct mca_banks *mcabanks_alloc(void) |
125 | 49 | { |
126 | 49 | struct mca_banks *mb; |
127 | 49 | |
128 | 49 | mb = xmalloc(struct mca_banks); |
129 | 49 | if ( !mb ) |
130 | 0 | return NULL; |
131 | 49 | |
132 | 49 | mb->bank_map = xzalloc_array(unsigned long, |
133 | 49 | BITS_TO_LONGS(nr_mce_banks)); |
134 | 49 | if ( !mb->bank_map ) |
135 | 0 | { |
136 | 0 | xfree(mb); |
137 | 0 | return NULL; |
138 | 0 | } |
139 | 49 | |
140 | 49 | mb->num = nr_mce_banks; |
141 | 49 | |
142 | 49 | return mb; |
143 | 49 | } |
144 | | |
145 | | void mcabanks_free(struct mca_banks *banks) |
146 | 0 | { |
147 | 0 | if ( banks == NULL ) |
148 | 0 | return; |
149 | 0 | if ( banks->bank_map ) |
150 | 0 | xfree(banks->bank_map); |
151 | 0 | xfree(banks); |
152 | 0 | } |
153 | | |
154 | | static void mcabank_clear(int banknum) |
155 | 0 | { |
156 | 0 | uint64_t status; |
157 | 0 |
|
158 | 0 | status = mca_rdmsr(MSR_IA32_MCx_STATUS(banknum)); |
159 | 0 |
|
160 | 0 | if ( status & MCi_STATUS_ADDRV ) |
161 | 0 | mca_wrmsr(MSR_IA32_MCx_ADDR(banknum), 0x0ULL); |
162 | 0 | if ( status & MCi_STATUS_MISCV ) |
163 | 0 | mca_wrmsr(MSR_IA32_MCx_MISC(banknum), 0x0ULL); |
164 | 0 |
|
165 | 0 | mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL); |
166 | 0 | } |
167 | | |
168 | | /* |
169 | | * Judging whether to Clear Machine Check error bank callback handler |
170 | | * According to Intel latest MCA OS Recovery Writer's Guide, |
171 | | * whether the error MCA bank needs to be cleared is decided by the mca_source |
172 | | * and MCi_status bit value. |
173 | | */ |
174 | | static mce_need_clearbank_t mc_need_clearbank_scan = NULL; |
175 | | |
176 | | void mce_need_clearbank_register(mce_need_clearbank_t cbfunc) |
177 | 12 | { |
178 | 12 | mc_need_clearbank_scan = cbfunc; |
179 | 12 | } |
180 | | |
181 | | /* |
182 | | * mce_logout_lock should only be used in the trap handler, |
183 | | * while MCIP has not been cleared yet in the global status |
184 | | * register. Other use is not safe, since an MCE trap can |
185 | | * happen at any moment, which would cause lock recursion. |
186 | | */ |
187 | | static DEFINE_SPINLOCK(mce_logout_lock); |
188 | | |
189 | | const struct mca_error_handler *__read_mostly mce_dhandlers; |
190 | | const struct mca_error_handler *__read_mostly mce_uhandlers; |
191 | | unsigned int __read_mostly mce_dhandler_num; |
192 | | unsigned int __read_mostly mce_uhandler_num; |
193 | | |
194 | | static void mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) |
195 | 0 | { |
196 | 0 | struct mcinfo_bank *mib; |
197 | 0 |
|
198 | 0 | if ( !mi ) |
199 | 0 | return; |
200 | 0 |
|
201 | 0 | mib = x86_mcinfo_reserve(mi, sizeof(*mib), MC_TYPE_BANK); |
202 | 0 | if ( !mib ) |
203 | 0 | { |
204 | 0 | mi->flags |= MCINFO_FLAGS_UNCOMPLETE; |
205 | 0 | return; |
206 | 0 | } |
207 | 0 |
|
208 | 0 | mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank)); |
209 | 0 |
|
210 | 0 | mib->mc_bank = bank; |
211 | 0 | mib->mc_domid = DOMID_INVALID; |
212 | 0 |
|
213 | 0 | if ( mib->mc_status & MCi_STATUS_MISCV ) |
214 | 0 | mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank)); |
215 | 0 |
|
216 | 0 | if ( mib->mc_status & MCi_STATUS_ADDRV ) |
217 | 0 | mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank)); |
218 | 0 |
|
219 | 0 | if ( (mib->mc_status & MCi_STATUS_MISCV) && |
220 | 0 | (mib->mc_status & MCi_STATUS_ADDRV) && |
221 | 0 | (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) && |
222 | 0 | (who == MCA_POLLER || who == MCA_CMCI_HANDLER) && |
223 | 0 | (mfn_valid(_mfn(paddr_to_pfn(mib->mc_addr)))) ) |
224 | 0 | { |
225 | 0 | struct domain *d; |
226 | 0 |
|
227 | 0 | d = maddr_get_owner(mib->mc_addr); |
228 | 0 | if ( d ) |
229 | 0 | mib->mc_domid = d->domain_id; |
230 | 0 | } |
231 | 0 |
|
232 | 0 | if ( who == MCA_CMCI_HANDLER ) |
233 | 0 | { |
234 | 0 | mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank); |
235 | 0 | mib->mc_tsc = rdtsc(); |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | | static int mca_init_global(uint32_t flags, struct mcinfo_global *mig) |
240 | 0 | { |
241 | 0 | uint64_t status; |
242 | 0 | int cpu_nr; |
243 | 0 | const struct vcpu *curr = current; |
244 | 0 |
|
245 | 0 | /* Set global information */ |
246 | 0 | status = mca_rdmsr(MSR_IA32_MCG_STATUS); |
247 | 0 | mig->mc_gstatus = status; |
248 | 0 | mig->mc_domid = DOMID_INVALID; |
249 | 0 | mig->mc_vcpuid = XEN_MC_VCPUID_INVALID; |
250 | 0 | mig->mc_flags = flags; |
251 | 0 | cpu_nr = smp_processor_id(); |
252 | 0 | /* Retrieve detector information */ |
253 | 0 | x86_mc_get_cpu_info(cpu_nr, &mig->mc_socketid, |
254 | 0 | &mig->mc_coreid, &mig->mc_core_threadid, |
255 | 0 | &mig->mc_apicid, NULL, NULL, NULL); |
256 | 0 |
|
257 | 0 | if ( curr != INVALID_VCPU ) |
258 | 0 | { |
259 | 0 | mig->mc_domid = curr->domain->domain_id; |
260 | 0 | mig->mc_vcpuid = curr->vcpu_id; |
261 | 0 | } |
262 | 0 |
|
263 | 0 | return 0; |
264 | 0 | } |
265 | | |
266 | | /* |
267 | | * Utility function to perform MCA bank telemetry readout and to push that |
268 | | * telemetry towards an interested dom0 for logging and diagnosis. |
269 | | * The caller - #MC handler or MCA poll function - must arrange that we |
270 | | * do not migrate cpus. |
271 | | */ |
272 | | |
273 | | /* XXFM Could add overflow counting? */ |
274 | | |
275 | | /* |
276 | | * Add out_param clear_bank for Machine Check Handler Caller. |
277 | | * For Intel latest CPU, whether to clear the error bank status needs to |
278 | | * be judged by the callback function defined above. |
279 | | */ |
280 | | mctelem_cookie_t |
281 | | mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, |
282 | | struct mca_summary *sp, struct mca_banks *clear_bank) |
283 | 73 | { |
284 | 73 | uint64_t gstatus, status; |
285 | 73 | struct mcinfo_global *mig = NULL; /* on stack */ |
286 | 73 | mctelem_cookie_t mctc = NULL; |
287 | 73 | bool uc = false, pcc = false, recover = true, need_clear = true; |
288 | 73 | uint32_t mc_flags = 0; |
289 | 73 | struct mc_info *mci = NULL; |
290 | 73 | mctelem_class_t which = MC_URGENT; /* XXXgcc */ |
291 | 73 | int errcnt = 0; |
292 | 73 | int i; |
293 | 73 | |
294 | 73 | gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS); |
295 | 73 | switch ( who ) |
296 | 73 | { |
297 | 0 | case MCA_MCE_SCAN: |
298 | 0 | mc_flags = MC_FLAG_MCE; |
299 | 0 | which = MC_URGENT; |
300 | 0 | break; |
301 | 0 |
|
302 | 53 | case MCA_POLLER: |
303 | 53 | case MCA_RESET: |
304 | 53 | mc_flags = MC_FLAG_POLLED; |
305 | 53 | which = MC_NONURGENT; |
306 | 53 | break; |
307 | 53 | |
308 | 12 | case MCA_CMCI_HANDLER: |
309 | 12 | mc_flags = MC_FLAG_CMCI; |
310 | 12 | which = MC_NONURGENT; |
311 | 12 | break; |
312 | 53 | |
313 | 0 | default: |
314 | 0 | BUG(); |
315 | 73 | } |
316 | 73 | |
317 | 73 | /* |
318 | 73 | * If no mc_recovery_scan callback handler registered, |
319 | 73 | * this error is not recoverable |
320 | 73 | */ |
321 | 76 | recover = mc_recoverable_scan ? 1 : 0; |
322 | 76 | |
323 | 1.62k | for ( i = 0; i < nr_mce_banks; i++ ) |
324 | 1.55k | { |
325 | 1.55k | /* Skip bank if corresponding bit in bankmask is clear */ |
326 | 1.55k | if ( !mcabanks_test(i, bankmask) ) |
327 | 1.01k | continue; |
328 | 1.55k | |
329 | 539 | status = mca_rdmsr(MSR_IA32_MCx_STATUS(i)); |
330 | 539 | if ( !(status & MCi_STATUS_VAL) ) |
331 | 755 | continue; /* this bank has no valid telemetry */ |
332 | 539 | |
333 | 539 | /* |
334 | 539 | * For Intel Latest CPU CMCI/MCE Handler caller, we need to |
335 | 539 | * decide whether to clear bank by MCi_STATUS bit value such as |
336 | 539 | * OVER/UC/EN/PCC/S/AR |
337 | 539 | */ |
338 | 18.4E | if ( mc_need_clearbank_scan ) |
339 | 0 | need_clear = mc_need_clearbank_scan(who, status); |
340 | 18.4E | |
341 | 18.4E | /* |
342 | 18.4E | * If this is the first bank with valid MCA DATA, then |
343 | 18.4E | * try to reserve an entry from the urgent/nonurgent queue |
344 | 18.4E | * depending on whether we are called from an exception or |
345 | 18.4E | * a poller; this can fail (for example dom0 may not |
346 | 18.4E | * yet have consumed past telemetry). |
347 | 18.4E | */ |
348 | 18.4E | if ( errcnt++ == 0 ) |
349 | 0 | { |
350 | 0 | mctc = mctelem_reserve(which); |
351 | 0 | if ( mctc ) |
352 | 0 | { |
353 | 0 | mci = mctelem_dataptr(mctc); |
354 | 0 | mcinfo_clear(mci); |
355 | 0 | mig = x86_mcinfo_reserve(mci, sizeof(*mig), MC_TYPE_GLOBAL); |
356 | 0 | /* mc_info should at least hold up the global information */ |
357 | 0 | ASSERT(mig); |
358 | 0 | mca_init_global(mc_flags, mig); |
359 | 0 | /* A hook here to get global extended msrs */ |
360 | 0 | if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) |
361 | 0 | intel_get_extended_msrs(mig, mci); |
362 | 0 | } |
363 | 0 | } |
364 | 18.4E | |
365 | 18.4E | /* flag for uncorrected errors */ |
366 | 18.4E | if ( !uc && ((status & MCi_STATUS_UC) != 0) ) |
367 | 0 | uc = true; |
368 | 18.4E | |
369 | 18.4E | /* flag processor context corrupt */ |
370 | 18.4E | if ( !pcc && ((status & MCi_STATUS_PCC) != 0) ) |
371 | 0 | pcc = true; |
372 | 18.4E | |
373 | 18.4E | if ( recover && uc ) |
374 | 18.4E | /* uc = true, recover = true, we need not panic. */ |
375 | 0 | recover = mc_recoverable_scan(status); |
376 | 18.4E | |
377 | 18.4E | mca_init_bank(who, mci, i); |
378 | 18.4E | |
379 | 18.4E | if ( mc_callback_bank_extended ) |
380 | 0 | mc_callback_bank_extended(mci, i, status); |
381 | 18.4E | |
382 | 18.4E | /* By default, need_clear = true */ |
383 | 18.4E | if ( who != MCA_MCE_SCAN && need_clear ) |
384 | 18.4E | /* Clear bank */ |
385 | 0 | mcabank_clear(i); |
386 | 18.4E | else if ( who == MCA_MCE_SCAN && need_clear ) |
387 | 0 | mcabanks_set(i, clear_bank); |
388 | 18.4E | |
389 | 18.4E | wmb(); |
390 | 18.4E | } |
391 | 76 | |
392 | 76 | if ( mig && errcnt > 0 ) |
393 | 0 | { |
394 | 0 | if ( pcc ) |
395 | 0 | mig->mc_flags |= MC_FLAG_UNCORRECTABLE; |
396 | 0 | else if ( uc ) |
397 | 0 | mig->mc_flags |= MC_FLAG_RECOVERABLE; |
398 | 0 | else |
399 | 0 | mig->mc_flags |= MC_FLAG_CORRECTABLE; |
400 | 0 | } |
401 | 76 | |
402 | 76 | if ( sp ) |
403 | 87 | { |
404 | 87 | sp->errcnt = errcnt; |
405 | 87 | sp->ripv = (gstatus & MCG_STATUS_RIPV) != 0; |
406 | 87 | sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0; |
407 | 87 | sp->lmce = (gstatus & MCG_STATUS_LMCE) != 0; |
408 | 87 | sp->uc = uc; |
409 | 87 | sp->pcc = pcc; |
410 | 87 | sp->recoverable = recover; |
411 | 87 | } |
412 | 76 | |
413 | 76 | return mci != NULL ? mctc : NULL; /* may be NULL */ |
414 | 73 | } |
415 | | |
416 | | static void mce_spin_lock(spinlock_t *lk) |
417 | 0 | { |
418 | 0 | while ( !spin_trylock(lk) ) |
419 | 0 | { |
420 | 0 | cpu_relax(); |
421 | 0 | mce_panic_check(); |
422 | 0 | } |
423 | 0 | } |
424 | | |
425 | | static void mce_spin_unlock(spinlock_t *lk) |
426 | 0 | { |
427 | 0 | spin_unlock(lk); |
428 | 0 | } |
429 | | |
430 | | static enum mce_result mce_action(const struct cpu_user_regs *regs, |
431 | | mctelem_cookie_t mctc); |
432 | | |
433 | | /* |
434 | | * Return: |
435 | | * -1: if system can't be recovered |
436 | | * 0: Continue to next step |
437 | | */ |
438 | | static int mce_urgent_action(const struct cpu_user_regs *regs, |
439 | | mctelem_cookie_t mctc) |
440 | 0 | { |
441 | 0 | uint64_t gstatus; |
442 | 0 |
|
443 | 0 | if ( mctc == NULL ) |
444 | 0 | return 0; |
445 | 0 |
|
446 | 0 | gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS); |
447 | 0 |
|
448 | 0 | /* |
449 | 0 | * FIXME: When RIPV = EIPV = 0, it's a little bit tricky. It may be an |
450 | 0 | * asynchronic error, currently we have no way to precisely locate |
451 | 0 | * whether the error occur at guest or hypervisor. |
452 | 0 | * To avoid handling error in wrong way, we treat it as unrecovered. |
453 | 0 | * |
454 | 0 | * Another unrecovered case is RIPV = 0 while in hypervisor |
455 | 0 | * since Xen is not pre-emptible. |
456 | 0 | */ |
457 | 0 | if ( !(gstatus & MCG_STATUS_RIPV) && |
458 | 0 | (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs)) ) |
459 | 0 | return -1; |
460 | 0 |
|
461 | 0 | return mce_action(regs, mctc) == MCER_RESET ? -1 : 0; |
462 | 0 | } |
463 | | |
464 | | /* Shared #MC handler. */ |
465 | | void mcheck_cmn_handler(const struct cpu_user_regs *regs) |
466 | 0 | { |
467 | 0 | static DEFINE_MCE_BARRIER(mce_trap_bar); |
468 | 0 | static atomic_t severity_cpu = ATOMIC_INIT(-1); |
469 | 0 | static atomic_t found_error = ATOMIC_INIT(0); |
470 | 0 | static cpumask_t mce_fatal_cpus; |
471 | 0 | struct mca_banks *bankmask = mca_allbanks; |
472 | 0 | struct mca_banks *clear_bank = __get_cpu_var(mce_clear_banks); |
473 | 0 | uint64_t gstatus; |
474 | 0 | mctelem_cookie_t mctc = NULL; |
475 | 0 | struct mca_summary bs; |
476 | 0 | bool bcast, lmce; |
477 | 0 |
|
478 | 0 | mce_spin_lock(&mce_logout_lock); |
479 | 0 |
|
480 | 0 | if ( clear_bank != NULL ) |
481 | 0 | memset(clear_bank->bank_map, 0x0, |
482 | 0 | sizeof(long) * BITS_TO_LONGS(clear_bank->num)); |
483 | 0 | mctc = mcheck_mca_logout(MCA_MCE_SCAN, bankmask, &bs, clear_bank); |
484 | 0 | lmce = bs.lmce; |
485 | 0 | bcast = mce_broadcast && !lmce; |
486 | 0 |
|
487 | 0 | if ( bs.errcnt ) |
488 | 0 | { |
489 | 0 | /* |
490 | 0 | * Uncorrected errors must be dealt with in softirq context. |
491 | 0 | */ |
492 | 0 | if ( bs.uc || bs.pcc ) |
493 | 0 | { |
494 | 0 | add_taint(TAINT_MACHINE_CHECK); |
495 | 0 | if ( mctc ) |
496 | 0 | mctelem_defer(mctc, lmce); |
497 | 0 | /* |
498 | 0 | * For PCC=1 and can't be recovered, context is lost, so |
499 | 0 | * reboot now without clearing the banks, and deal with |
500 | 0 | * the telemetry after reboot (the MSRs are sticky) |
501 | 0 | */ |
502 | 0 | if ( bs.pcc || !bs.recoverable ) |
503 | 0 | cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus); |
504 | 0 | } |
505 | 0 | else if ( mctc != NULL ) |
506 | 0 | mctelem_commit(mctc); |
507 | 0 | atomic_set(&found_error, 1); |
508 | 0 |
|
509 | 0 | /* The last CPU will be take check/clean-up etc */ |
510 | 0 | atomic_set(&severity_cpu, smp_processor_id()); |
511 | 0 |
|
512 | 0 | mce_printk(MCE_CRITICAL, "MCE: clear_bank map %lx on CPU%d\n", |
513 | 0 | *((unsigned long *)clear_bank), smp_processor_id()); |
514 | 0 | if ( clear_bank != NULL ) |
515 | 0 | mcheck_mca_clearbanks(clear_bank); |
516 | 0 | } |
517 | 0 | else if ( mctc != NULL ) |
518 | 0 | mctelem_dismiss(mctc); |
519 | 0 | mce_spin_unlock(&mce_logout_lock); |
520 | 0 |
|
521 | 0 | mce_barrier_enter(&mce_trap_bar, bcast); |
522 | 0 | if ( mctc != NULL && mce_urgent_action(regs, mctc) ) |
523 | 0 | cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus); |
524 | 0 | mce_barrier_exit(&mce_trap_bar, bcast); |
525 | 0 |
|
526 | 0 | /* |
527 | 0 | * Wait until everybody has processed the trap. |
528 | 0 | */ |
529 | 0 | mce_barrier_enter(&mce_trap_bar, bcast); |
530 | 0 | if ( lmce || atomic_read(&severity_cpu) == smp_processor_id() ) |
531 | 0 | { |
532 | 0 | /* |
533 | 0 | * According to SDM, if no error bank found on any cpus, |
534 | 0 | * something unexpected happening, we can't do any |
535 | 0 | * recovery job but to reset the system. |
536 | 0 | */ |
537 | 0 | if ( atomic_read(&found_error) == 0 ) |
538 | 0 | mc_panic("MCE: No CPU found valid MCE, need reset"); |
539 | 0 | if ( !cpumask_empty(&mce_fatal_cpus) ) |
540 | 0 | { |
541 | 0 | char *ebufp, ebuf[96] = "MCE: Fatal error happened on CPUs "; |
542 | 0 | ebufp = ebuf + strlen(ebuf); |
543 | 0 | cpumask_scnprintf(ebufp, 95 - strlen(ebuf), &mce_fatal_cpus); |
544 | 0 | mc_panic(ebuf); |
545 | 0 | } |
546 | 0 | atomic_set(&found_error, 0); |
547 | 0 | atomic_set(&severity_cpu, -1); |
548 | 0 | } |
549 | 0 | mce_barrier_exit(&mce_trap_bar, bcast); |
550 | 0 |
|
551 | 0 | /* Clear flags after above fatal check */ |
552 | 0 | mce_barrier_enter(&mce_trap_bar, bcast); |
553 | 0 | gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS); |
554 | 0 | if ( (gstatus & MCG_STATUS_MCIP) != 0 ) |
555 | 0 | { |
556 | 0 | mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step"); |
557 | 0 | mca_wrmsr(MSR_IA32_MCG_STATUS, 0); |
558 | 0 | } |
559 | 0 | mce_barrier_exit(&mce_trap_bar, bcast); |
560 | 0 |
|
561 | 0 | raise_softirq(MACHINE_CHECK_SOFTIRQ); |
562 | 0 | } |
563 | | |
564 | | void mcheck_mca_clearbanks(struct mca_banks *bankmask) |
565 | 0 | { |
566 | 0 | int i; |
567 | 0 |
|
568 | 0 | for ( i = 0; i < nr_mce_banks; i++ ) |
569 | 0 | { |
570 | 0 | if ( !mcabanks_test(i, bankmask) ) |
571 | 0 | continue; |
572 | 0 | mcabank_clear(i); |
573 | 0 | } |
574 | 0 | } |
575 | | |
576 | | /*check the existence of Machine Check*/ |
577 | | bool mce_available(const struct cpuinfo_x86 *c) |
578 | 25 | { |
579 | 25 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); |
580 | 25 | } |
581 | | |
582 | | /* |
583 | | * Check if bank 0 is usable for MCE. It isn't for Intel P6 family |
584 | | * before model 0x1a. |
585 | | */ |
586 | | unsigned int mce_firstbank(struct cpuinfo_x86 *c) |
587 | 12 | { |
588 | 12 | return c->x86 == 6 && |
589 | 12 | c->x86_vendor == X86_VENDOR_INTEL && c->x86_model < 0x1a; |
590 | 12 | } |
591 | | |
592 | | int show_mca_info(int inited, struct cpuinfo_x86 *c) |
593 | 12 | { |
594 | 12 | static enum mcheck_type g_type = mcheck_unset; |
595 | 12 | |
596 | 12 | if ( inited != g_type ) |
597 | 1 | { |
598 | 1 | char prefix[20]; |
599 | 1 | static const char *const type_str[] = { |
600 | 1 | [mcheck_amd_famXX] = "AMD", |
601 | 1 | [mcheck_amd_k8] = "AMD K8", |
602 | 1 | [mcheck_intel] = "Intel" |
603 | 1 | }; |
604 | 1 | |
605 | 1 | snprintf(prefix, ARRAY_SIZE(prefix), "%sCPU%u: ", |
606 | 1 | g_type != mcheck_unset ? XENLOG_WARNING : XENLOG_INFO, |
607 | 1 | smp_processor_id()); |
608 | 1 | BUG_ON(inited >= ARRAY_SIZE(type_str)); |
609 | 1 | switch ( inited ) |
610 | 1 | { |
611 | 1 | default: |
612 | 1 | printk("%s%s machine check reporting enabled\n", |
613 | 1 | prefix, type_str[inited]); |
614 | 1 | break; |
615 | 1 | |
616 | 0 | case mcheck_amd_famXX: |
617 | 0 | printk("%s%s Fam%xh machine check reporting enabled\n", |
618 | 0 | prefix, type_str[inited], c->x86); |
619 | 0 | break; |
620 | 1 | |
621 | 0 | case mcheck_none: |
622 | 0 | printk("%sNo machine check initialization\n", prefix); |
623 | 0 | break; |
624 | 1 | } |
625 | 1 | g_type = inited; |
626 | 1 | } |
627 | 12 | |
628 | 12 | return 0; |
629 | 12 | } |
630 | | |
631 | | static void set_poll_bankmask(struct cpuinfo_x86 *c) |
632 | 12 | { |
633 | 12 | int cpu = smp_processor_id(); |
634 | 12 | struct mca_banks *mb; |
635 | 12 | |
636 | 12 | mb = per_cpu(poll_bankmask, cpu); |
637 | 12 | BUG_ON(!mb); |
638 | 12 | |
639 | 12 | if ( cmci_support && opt_mce ) |
640 | 12 | { |
641 | 12 | mb->num = per_cpu(no_cmci_banks, cpu)->num; |
642 | 12 | bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map, |
643 | 12 | nr_mce_banks); |
644 | 12 | } |
645 | 12 | else |
646 | 0 | { |
647 | 0 | bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks); |
648 | 0 | if ( mce_firstbank(c) ) |
649 | 0 | mcabanks_clear(0, mb); |
650 | 0 | } |
651 | 12 | } |
652 | | |
653 | | /* The perbank ctl/status init is platform specific because of AMD's quirk */ |
654 | | int mca_cap_init(void) |
655 | 12 | { |
656 | 12 | uint64_t msr_content; |
657 | 12 | |
658 | 12 | rdmsrl(MSR_IA32_MCG_CAP, msr_content); |
659 | 12 | |
660 | 12 | if ( msr_content & MCG_CTL_P ) /* Control register present ? */ |
661 | 0 | wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL); |
662 | 12 | |
663 | 12 | if ( nr_mce_banks && (msr_content & MCG_CAP_COUNT) != nr_mce_banks ) |
664 | 0 | { |
665 | 0 | dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n", |
666 | 0 | smp_processor_id()); |
667 | 0 | return -ENODEV; |
668 | 0 | } |
669 | 12 | nr_mce_banks = msr_content & MCG_CAP_COUNT; |
670 | 12 | |
671 | 12 | if ( !nr_mce_banks ) |
672 | 0 | { |
673 | 0 | printk(XENLOG_INFO "CPU%u: No MCE banks present. " |
674 | 0 | "Machine check support disabled\n", smp_processor_id()); |
675 | 0 | return -ENODEV; |
676 | 0 | } |
677 | 12 | |
678 | 12 | /* mcabanks_alloc depends on nr_mce_banks */ |
679 | 12 | if ( !mca_allbanks ) |
680 | 1 | { |
681 | 1 | int i; |
682 | 1 | |
683 | 1 | mca_allbanks = mcabanks_alloc(); |
684 | 24 | for ( i = 0; i < nr_mce_banks; i++ ) |
685 | 23 | mcabanks_set(i, mca_allbanks); |
686 | 1 | } |
687 | 12 | |
688 | 12 | return mca_allbanks ? 0 : -ENOMEM; |
689 | 12 | } |
690 | | |
691 | | static void cpu_bank_free(unsigned int cpu) |
692 | 0 | { |
693 | 0 | struct mca_banks *poll = per_cpu(poll_bankmask, cpu); |
694 | 0 | struct mca_banks *clr = per_cpu(mce_clear_banks, cpu); |
695 | 0 |
|
696 | 0 | mcabanks_free(poll); |
697 | 0 | mcabanks_free(clr); |
698 | 0 | } |
699 | | |
700 | | static int cpu_bank_alloc(unsigned int cpu) |
701 | 12 | { |
702 | 12 | struct mca_banks *poll = mcabanks_alloc(); |
703 | 12 | struct mca_banks *clr = mcabanks_alloc(); |
704 | 12 | |
705 | 12 | if ( !poll || !clr ) |
706 | 0 | { |
707 | 0 | mcabanks_free(poll); |
708 | 0 | mcabanks_free(clr); |
709 | 0 | return -ENOMEM; |
710 | 0 | } |
711 | 12 | |
712 | 12 | per_cpu(poll_bankmask, cpu) = poll; |
713 | 12 | per_cpu(mce_clear_banks, cpu) = clr; |
714 | 12 | return 0; |
715 | 12 | } |
716 | | |
717 | | static int cpu_callback( |
718 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
719 | 33 | { |
720 | 33 | unsigned int cpu = (unsigned long)hcpu; |
721 | 33 | int rc = 0; |
722 | 33 | |
723 | 33 | switch ( action ) |
724 | 33 | { |
725 | 11 | case CPU_UP_PREPARE: |
726 | 11 | rc = cpu_bank_alloc(cpu); |
727 | 11 | break; |
728 | 11 | |
729 | 0 | case CPU_UP_CANCELED: |
730 | 0 | case CPU_DEAD: |
731 | 0 | cpu_bank_free(cpu); |
732 | 0 | break; |
733 | 33 | } |
734 | 33 | |
735 | 33 | return !rc ? NOTIFY_DONE : notifier_from_errno(rc); |
736 | 33 | } |
737 | | |
738 | | static struct notifier_block cpu_nfb = { |
739 | | .notifier_call = cpu_callback |
740 | | }; |
741 | | |
742 | | /* This has to be run for each processor */ |
743 | | void mcheck_init(struct cpuinfo_x86 *c, bool bsp) |
744 | 12 | { |
745 | 12 | enum mcheck_type inited = mcheck_none; |
746 | 12 | |
747 | 12 | if ( !opt_mce ) |
748 | 0 | { |
749 | 0 | if ( bsp ) |
750 | 0 | printk(XENLOG_INFO "MCE support disabled by bootparam\n"); |
751 | 0 | return; |
752 | 0 | } |
753 | 12 | |
754 | 12 | if ( !mce_available(c) ) |
755 | 0 | { |
756 | 0 | printk(XENLOG_INFO "CPU%i: No machine check support available\n", |
757 | 0 | smp_processor_id()); |
758 | 0 | return; |
759 | 0 | } |
760 | 12 | |
761 | 12 | /*Hardware Enable */ |
762 | 12 | if ( mca_cap_init() ) |
763 | 0 | return; |
764 | 12 | |
765 | 12 | /* Early MCE initialisation for BSP. */ |
766 | 12 | if ( bsp && cpu_bank_alloc(smp_processor_id()) ) |
767 | 0 | BUG(); |
768 | 12 | |
769 | 12 | switch ( c->x86_vendor ) |
770 | 12 | { |
771 | 0 | case X86_VENDOR_AMD: |
772 | 0 | inited = amd_mcheck_init(c); |
773 | 0 | break; |
774 | 0 |
|
775 | 12 | case X86_VENDOR_INTEL: |
776 | 12 | switch ( c->x86 ) |
777 | 12 | { |
778 | 12 | case 6: |
779 | 12 | case 15: |
780 | 12 | inited = intel_mcheck_init(c, bsp); |
781 | 12 | break; |
782 | 12 | } |
783 | 12 | break; |
784 | 12 | |
785 | 0 | default: |
786 | 0 | break; |
787 | 12 | } |
788 | 12 | |
789 | 12 | show_mca_info(inited, c); |
790 | 12 | if ( inited == mcheck_none || inited == mcheck_unset ) |
791 | 0 | goto out; |
792 | 12 | |
793 | 12 | intpose_init(); |
794 | 12 | |
795 | 12 | if ( bsp ) |
796 | 1 | { |
797 | 1 | mctelem_init(sizeof(struct mc_info)); |
798 | 1 | register_cpu_notifier(&cpu_nfb); |
799 | 1 | } |
800 | 12 | |
801 | 12 | /* Turn on MCE now */ |
802 | 12 | set_in_cr4(X86_CR4_MCE); |
803 | 12 | |
804 | 12 | set_poll_bankmask(c); |
805 | 12 | |
806 | 12 | return; |
807 | 0 | out: |
808 | 0 | if ( bsp ) |
809 | 0 | { |
810 | 0 | cpu_bank_free(smp_processor_id()); |
811 | 0 | mcabanks_free(mca_allbanks); |
812 | 0 | mca_allbanks = NULL; |
813 | 0 | } |
814 | 0 | } |
815 | | |
816 | | static void mcinfo_clear(struct mc_info *mi) |
817 | 0 | { |
818 | 0 | memset(mi, 0, sizeof(struct mc_info)); |
819 | 0 | x86_mcinfo_nentries(mi) = 0; |
820 | 0 | } |
821 | | |
822 | | void *x86_mcinfo_reserve(struct mc_info *mi, |
823 | | unsigned int size, unsigned int type) |
824 | 0 | { |
825 | 0 | int i; |
826 | 0 | unsigned long end1, end2; |
827 | 0 | struct mcinfo_common *mic_base, *mic_index; |
828 | 0 |
|
829 | 0 | mic_index = mic_base = x86_mcinfo_first(mi); |
830 | 0 |
|
831 | 0 | /* go to first free entry */ |
832 | 0 | for ( i = 0; i < x86_mcinfo_nentries(mi); i++ ) |
833 | 0 | mic_index = x86_mcinfo_next(mic_index); |
834 | 0 |
|
835 | 0 | /* check if there is enough size */ |
836 | 0 | end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct mc_info)); |
837 | 0 | end2 = (unsigned long)((uint8_t *)mic_index + size); |
838 | 0 |
|
839 | 0 | if ( end1 < end2 ) |
840 | 0 | { |
841 | 0 | mce_printk(MCE_CRITICAL, |
842 | 0 | "mcinfo_add: No space left in mc_info\n"); |
843 | 0 | return NULL; |
844 | 0 | } |
845 | 0 |
|
846 | 0 | /* there's enough space. add entry. */ |
847 | 0 | x86_mcinfo_nentries(mi)++; |
848 | 0 |
|
849 | 0 | memset(mic_index, 0, size); |
850 | 0 | mic_index->size = size; |
851 | 0 | mic_index->type = type; |
852 | 0 |
|
853 | 0 | return mic_index; |
854 | 0 | } |
855 | | |
856 | | static void x86_mcinfo_apei_save( |
857 | | struct mcinfo_global *mc_global, struct mcinfo_bank *mc_bank) |
858 | 0 | { |
859 | 0 | struct mce m; |
860 | 0 |
|
861 | 0 | memset(&m, 0, sizeof(struct mce)); |
862 | 0 |
|
863 | 0 | m.cpu = mc_global->mc_coreid; |
864 | 0 | m.cpuvendor = boot_cpu_data.x86_vendor; |
865 | 0 | m.cpuid = cpuid_eax(1); |
866 | 0 | m.socketid = mc_global->mc_socketid; |
867 | 0 | m.apicid = mc_global->mc_apicid; |
868 | 0 |
|
869 | 0 | m.mcgstatus = mc_global->mc_gstatus; |
870 | 0 | m.status = mc_bank->mc_status; |
871 | 0 | m.misc = mc_bank->mc_misc; |
872 | 0 | m.addr = mc_bank->mc_addr; |
873 | 0 | m.bank = mc_bank->mc_bank; |
874 | 0 |
|
875 | 0 | apei_write_mce(&m); |
876 | 0 | } |
877 | | |
878 | | /* |
879 | | * Dump machine check information in a format, |
880 | | * mcelog can parse. This is used only when |
881 | | * Dom0 does not take the notification. |
882 | | */ |
883 | | void x86_mcinfo_dump(struct mc_info *mi) |
884 | 0 | { |
885 | 0 | struct mcinfo_common *mic = NULL; |
886 | 0 | struct mcinfo_global *mc_global; |
887 | 0 | struct mcinfo_bank *mc_bank; |
888 | 0 |
|
889 | 0 | /* first print the global info */ |
890 | 0 | x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL); |
891 | 0 | if ( mic == NULL ) |
892 | 0 | return; |
893 | 0 | mc_global = (struct mcinfo_global *)mic; |
894 | 0 | if ( mc_global->mc_flags & MC_FLAG_MCE ) |
895 | 0 | printk(XENLOG_WARNING |
896 | 0 | "CPU%d: Machine Check Exception: %16"PRIx64"\n", |
897 | 0 | mc_global->mc_coreid, mc_global->mc_gstatus); |
898 | 0 | else if ( mc_global->mc_flags & MC_FLAG_CMCI ) |
899 | 0 | printk(XENLOG_WARNING "CMCI occurred on CPU %d.\n", |
900 | 0 | mc_global->mc_coreid); |
901 | 0 | else if ( mc_global->mc_flags & MC_FLAG_POLLED ) |
902 | 0 | printk(XENLOG_WARNING "POLLED occurred on CPU %d.\n", |
903 | 0 | mc_global->mc_coreid); |
904 | 0 |
|
905 | 0 | /* then the bank information */ |
906 | 0 | x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); /* finds the first entry */ |
907 | 0 | do { |
908 | 0 | if ( mic == NULL ) |
909 | 0 | return; |
910 | 0 | if ( mic->type != MC_TYPE_BANK ) |
911 | 0 | goto next; |
912 | 0 |
|
913 | 0 | mc_bank = (struct mcinfo_bank *)mic; |
914 | 0 |
|
915 | 0 | printk(XENLOG_WARNING "Bank %d: %16"PRIx64, |
916 | 0 | mc_bank->mc_bank, |
917 | 0 | mc_bank->mc_status); |
918 | 0 | if ( mc_bank->mc_status & MCi_STATUS_MISCV ) |
919 | 0 | printk("[%16"PRIx64"]", mc_bank->mc_misc); |
920 | 0 | if ( mc_bank->mc_status & MCi_STATUS_ADDRV ) |
921 | 0 | printk(" at %16"PRIx64, mc_bank->mc_addr); |
922 | 0 | printk("\n"); |
923 | 0 |
|
924 | 0 | if ( is_mc_panic ) |
925 | 0 | x86_mcinfo_apei_save(mc_global, mc_bank); |
926 | 0 |
|
927 | 0 | next: |
928 | 0 | mic = x86_mcinfo_next(mic); /* next entry */ |
929 | 0 | if ( (mic == NULL) || (mic->size == 0) ) |
930 | 0 | break; |
931 | 0 | } while ( 1 ); |
932 | 0 | } |
933 | | |
934 | | static void do_mc_get_cpu_info(void *v) |
935 | 0 | { |
936 | 0 | int cpu = smp_processor_id(); |
937 | 0 | int cindex, cpn; |
938 | 0 | struct cpuinfo_x86 *c; |
939 | 0 | xen_mc_logical_cpu_t *log_cpus, *xcp; |
940 | 0 | uint32_t junk, ebx; |
941 | 0 |
|
942 | 0 | log_cpus = v; |
943 | 0 | c = &cpu_data[cpu]; |
944 | 0 | cindex = 0; |
945 | 0 | cpn = cpu - 1; |
946 | 0 |
|
947 | 0 | /* |
948 | 0 | * Deal with sparse masks, condensed into a contig array. |
949 | 0 | */ |
950 | 0 | while ( cpn >= 0 ) |
951 | 0 | { |
952 | 0 | if ( cpu_online(cpn) ) |
953 | 0 | cindex++; |
954 | 0 | cpn--; |
955 | 0 | } |
956 | 0 |
|
957 | 0 | xcp = &log_cpus[cindex]; |
958 | 0 | c = &cpu_data[cpu]; |
959 | 0 | xcp->mc_cpunr = cpu; |
960 | 0 | x86_mc_get_cpu_info(cpu, &xcp->mc_chipid, |
961 | 0 | &xcp->mc_coreid, &xcp->mc_threadid, |
962 | 0 | &xcp->mc_apicid, &xcp->mc_ncores, |
963 | 0 | &xcp->mc_ncores_active, &xcp->mc_nthreads); |
964 | 0 | xcp->mc_cpuid_level = c->cpuid_level; |
965 | 0 | xcp->mc_family = c->x86; |
966 | 0 | xcp->mc_vendor = c->x86_vendor; |
967 | 0 | xcp->mc_model = c->x86_model; |
968 | 0 | xcp->mc_step = c->x86_mask; |
969 | 0 | xcp->mc_cache_size = c->x86_cache_size; |
970 | 0 | xcp->mc_cache_alignment = c->x86_cache_alignment; |
971 | 0 | memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid); |
972 | 0 | memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid); |
973 | 0 | memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps); |
974 | 0 |
|
975 | 0 | /* |
976 | 0 | * This part needs to run on the CPU itself. |
977 | 0 | */ |
978 | 0 | xcp->mc_nmsrvals = __MC_NMSRS; |
979 | 0 | xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP; |
980 | 0 | rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value); |
981 | 0 |
|
982 | 0 | if ( c->cpuid_level >= 1 ) |
983 | 0 | { |
984 | 0 | cpuid(1, &junk, &ebx, &junk, &junk); |
985 | 0 | xcp->mc_clusterid = (ebx >> 24) & 0xff; |
986 | 0 | } |
987 | 0 | else |
988 | 0 | xcp->mc_clusterid = get_apic_id(); |
989 | 0 | } |
990 | | |
991 | | void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid, |
992 | | uint16_t *threadid, uint32_t *apicid, |
993 | | unsigned *ncores, unsigned *ncores_active, |
994 | | unsigned *nthreads) |
995 | 0 | { |
996 | 0 | struct cpuinfo_x86 *c; |
997 | 0 |
|
998 | 0 | *apicid = cpu_physical_id(cpu); |
999 | 0 | c = &cpu_data[cpu]; |
1000 | 0 | if ( c->apicid == BAD_APICID ) |
1001 | 0 | { |
1002 | 0 | *chipid = cpu; |
1003 | 0 | *coreid = 0; |
1004 | 0 | *threadid = 0; |
1005 | 0 | if ( ncores != NULL ) |
1006 | 0 | *ncores = 1; |
1007 | 0 | if ( ncores_active != NULL ) |
1008 | 0 | *ncores_active = 1; |
1009 | 0 | if ( nthreads != NULL ) |
1010 | 0 | *nthreads = 1; |
1011 | 0 | } |
1012 | 0 | else |
1013 | 0 | { |
1014 | 0 | *chipid = c->phys_proc_id; |
1015 | 0 | if ( c->x86_max_cores > 1 ) |
1016 | 0 | *coreid = c->cpu_core_id; |
1017 | 0 | else |
1018 | 0 | *coreid = 0; |
1019 | 0 | *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1); |
1020 | 0 | if ( ncores != NULL ) |
1021 | 0 | *ncores = c->x86_max_cores; |
1022 | 0 | if ( ncores_active != NULL ) |
1023 | 0 | *ncores_active = c->booted_cores; |
1024 | 0 | if ( nthreads != NULL ) |
1025 | 0 | *nthreads = c->x86_num_siblings; |
1026 | 0 | } |
1027 | 0 | } |
1028 | | |
1029 | 34.2k | #define INTPOSE_NENT 50 |
1030 | | |
1031 | | static struct intpose_ent { |
1032 | | unsigned int cpu_nr; |
1033 | | uint64_t msr; |
1034 | | uint64_t val; |
1035 | | } intpose_arr[INTPOSE_NENT]; |
1036 | | |
1037 | | static void intpose_init(void) |
1038 | 12 | { |
1039 | 12 | static int done; |
1040 | 12 | int i; |
1041 | 12 | |
1042 | 12 | if ( done++ > 0 ) |
1043 | 11 | return; |
1044 | 12 | |
1045 | 51 | for ( i = 0; i < INTPOSE_NENT; i++ ) |
1046 | 50 | intpose_arr[i].cpu_nr = -1; |
1047 | 1 | |
1048 | 1 | } |
1049 | | |
1050 | | struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr, |
1051 | | uint64_t *valp) |
1052 | 694 | { |
1053 | 694 | int i; |
1054 | 694 | |
1055 | 34.1k | for ( i = 0; i < INTPOSE_NENT; i++ ) |
1056 | 33.5k | { |
1057 | 33.5k | if ( intpose_arr[i].cpu_nr == cpu_nr && intpose_arr[i].msr == msr ) |
1058 | 0 | { |
1059 | 0 | if ( valp != NULL ) |
1060 | 0 | *valp = intpose_arr[i].val; |
1061 | 0 | return &intpose_arr[i]; |
1062 | 0 | } |
1063 | 33.5k | } |
1064 | 694 | |
1065 | 694 | return NULL; |
1066 | 694 | } |
1067 | | |
1068 | | static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val) |
1069 | 0 | { |
1070 | 0 | struct intpose_ent *ent = intpose_lookup(cpu_nr, msr, NULL); |
1071 | 0 | int i; |
1072 | 0 |
|
1073 | 0 | if ( ent ) |
1074 | 0 | { |
1075 | 0 | ent->val = val; |
1076 | 0 | return; |
1077 | 0 | } |
1078 | 0 |
|
1079 | 0 | for ( i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++ ) |
1080 | 0 | { |
1081 | 0 | if ( ent->cpu_nr == -1 ) |
1082 | 0 | { |
1083 | 0 | ent->cpu_nr = cpu_nr; |
1084 | 0 | ent->msr = msr; |
1085 | 0 | ent->val = val; |
1086 | 0 | return; |
1087 | 0 | } |
1088 | 0 | } |
1089 | 0 |
|
1090 | 0 | printk("intpose_add: interpose array full - request dropped\n"); |
1091 | 0 | } |
1092 | | |
1093 | | bool intpose_inval(unsigned int cpu_nr, uint64_t msr) |
1094 | 0 | { |
1095 | 0 | struct intpose_ent *ent = intpose_lookup(cpu_nr, msr, NULL); |
1096 | 0 |
|
1097 | 0 | if ( !ent ) |
1098 | 0 | return false; |
1099 | 0 |
|
1100 | 0 | ent->cpu_nr = -1; |
1101 | 0 | return true; |
1102 | 0 | } |
1103 | | |
1104 | | #define IS_MCA_BANKREG(r) \ |
1105 | 0 | ((r) >= MSR_IA32_MC0_CTL && \ |
1106 | 0 | (r) <= MSR_IA32_MCx_MISC(nr_mce_banks - 1) && \ |
1107 | 0 | ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ |
1108 | | |
1109 | | static bool x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) |
1110 | 0 | { |
1111 | 0 | struct cpuinfo_x86 *c; |
1112 | 0 | int i, errs = 0; |
1113 | 0 |
|
1114 | 0 | c = &cpu_data[smp_processor_id()]; |
1115 | 0 |
|
1116 | 0 | for ( i = 0; i < mci->mcinj_count; i++ ) |
1117 | 0 | { |
1118 | 0 | uint64_t reg = mci->mcinj_msr[i].reg; |
1119 | 0 | const char *reason = NULL; |
1120 | 0 |
|
1121 | 0 | if ( IS_MCA_BANKREG(reg) ) |
1122 | 0 | { |
1123 | 0 | if ( c->x86_vendor == X86_VENDOR_AMD ) |
1124 | 0 | { |
1125 | 0 | /* |
1126 | 0 | * On AMD we can set MCi_STATUS_WREN in the |
1127 | 0 | * HWCR MSR to allow non-zero writes to banks |
1128 | 0 | * MSRs not to #GP. The injector in dom0 |
1129 | 0 | * should set that bit, but we detect when it |
1130 | 0 | * is necessary and set it as a courtesy to |
1131 | 0 | * avoid #GP in the hypervisor. |
1132 | 0 | */ |
1133 | 0 | mci->mcinj_flags |= |
1134 | 0 | _MC_MSRINJ_F_REQ_HWCR_WREN; |
1135 | 0 | continue; |
1136 | 0 | } |
1137 | 0 | else |
1138 | 0 | { |
1139 | 0 | /* |
1140 | 0 | * No alternative but to interpose, so require |
1141 | 0 | * that the injector specified as such. |
1142 | 0 | */ |
1143 | 0 | if ( !(mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) ) |
1144 | 0 | reason = "must specify interposition"; |
1145 | 0 | } |
1146 | 0 | } |
1147 | 0 | else |
1148 | 0 | { |
1149 | 0 | switch ( reg ) |
1150 | 0 | { |
1151 | 0 | /* MSRs acceptable on all x86 cpus */ |
1152 | 0 | case MSR_IA32_MCG_STATUS: |
1153 | 0 | break; |
1154 | 0 |
|
1155 | 0 | case MSR_F10_MC4_MISC1: |
1156 | 0 | case MSR_F10_MC4_MISC2: |
1157 | 0 | case MSR_F10_MC4_MISC3: |
1158 | 0 | if ( c->x86_vendor != X86_VENDOR_AMD ) |
1159 | 0 | reason = "only supported on AMD"; |
1160 | 0 | else if ( c->x86 < 0x10 ) |
1161 | 0 | reason = "only supported on AMD Fam10h+"; |
1162 | 0 | break; |
1163 | 0 |
|
1164 | 0 | /* MSRs that the HV will take care of */ |
1165 | 0 | case MSR_K8_HWCR: |
1166 | 0 | if ( c->x86_vendor == X86_VENDOR_AMD ) |
1167 | 0 | reason = "HV will operate HWCR"; |
1168 | 0 | else |
1169 | 0 | reason = "only supported on AMD"; |
1170 | 0 | break; |
1171 | 0 |
|
1172 | 0 | default: |
1173 | 0 | reason = "not a recognized MCA MSR"; |
1174 | 0 | break; |
1175 | 0 | } |
1176 | 0 | } |
1177 | 0 |
|
1178 | 0 | if ( reason != NULL ) |
1179 | 0 | { |
1180 | 0 | printk("HV MSR INJECT ERROR: MSR %#Lx %s\n", |
1181 | 0 | (unsigned long long)mci->mcinj_msr[i].reg, reason); |
1182 | 0 | errs++; |
1183 | 0 | } |
1184 | 0 | } |
1185 | 0 |
|
1186 | 0 | return !errs; |
1187 | 0 | } |
1188 | | |
1189 | | static uint64_t x86_mc_hwcr_wren(void) |
1190 | 0 | { |
1191 | 0 | uint64_t old; |
1192 | 0 |
|
1193 | 0 | rdmsrl(MSR_K8_HWCR, old); |
1194 | 0 |
|
1195 | 0 | if ( !(old & K8_HWCR_MCi_STATUS_WREN) ) |
1196 | 0 | { |
1197 | 0 | uint64_t new = old | K8_HWCR_MCi_STATUS_WREN; |
1198 | 0 | wrmsrl(MSR_K8_HWCR, new); |
1199 | 0 | } |
1200 | 0 |
|
1201 | 0 | return old; |
1202 | 0 | } |
1203 | | |
1204 | | static void x86_mc_hwcr_wren_restore(uint64_t hwcr) |
1205 | 0 | { |
1206 | 0 | if ( !(hwcr & K8_HWCR_MCi_STATUS_WREN) ) |
1207 | 0 | wrmsrl(MSR_K8_HWCR, hwcr); |
1208 | 0 | } |
1209 | | |
1210 | | static void x86_mc_msrinject(void *data) |
1211 | 0 | { |
1212 | 0 | struct xen_mc_msrinject *mci = data; |
1213 | 0 | struct mcinfo_msr *msr; |
1214 | 0 | uint64_t hwcr = 0; |
1215 | 0 | int intpose; |
1216 | 0 | int i; |
1217 | 0 |
|
1218 | 0 | if ( mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN ) |
1219 | 0 | hwcr = x86_mc_hwcr_wren(); |
1220 | 0 |
|
1221 | 0 | intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0; |
1222 | 0 |
|
1223 | 0 | for ( i = 0, msr = &mci->mcinj_msr[0]; i < mci->mcinj_count; i++, msr++ ) |
1224 | 0 | { |
1225 | 0 | printk("HV MSR INJECT (%s) target %u actual %u MSR %#Lx <-- %#Lx\n", |
1226 | 0 | intpose ? "interpose" : "hardware", |
1227 | 0 | mci->mcinj_cpunr, smp_processor_id(), |
1228 | 0 | (unsigned long long)msr->reg, |
1229 | 0 | (unsigned long long)msr->value); |
1230 | 0 |
|
1231 | 0 | if ( intpose ) |
1232 | 0 | intpose_add(mci->mcinj_cpunr, msr->reg, msr->value); |
1233 | 0 | else |
1234 | 0 | wrmsrl(msr->reg, msr->value); |
1235 | 0 | } |
1236 | 0 |
|
1237 | 0 | if ( mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN ) |
1238 | 0 | x86_mc_hwcr_wren_restore(hwcr); |
1239 | 0 | } |
1240 | | |
1241 | | /*ARGSUSED*/ |
1242 | | static void x86_mc_mceinject(void *data) |
1243 | 0 | { |
1244 | 0 | printk("Simulating #MC on cpu %d\n", smp_processor_id()); |
1245 | 0 | __asm__ __volatile__("int $0x12"); |
1246 | 0 | } |
1247 | | |
1248 | | #if BITS_PER_LONG == 64 |
1249 | | |
1250 | 0 | #define ID2COOKIE(id) ((mctelem_cookie_t)(id)) |
1251 | 0 | #define COOKIE2ID(c) ((uint64_t)(c)) |
1252 | | |
1253 | | #elif defined(BITS_PER_LONG) |
1254 | | #error BITS_PER_LONG has unexpected value |
1255 | | #else |
1256 | | #error BITS_PER_LONG definition absent |
1257 | | #endif |
1258 | | |
1259 | | # include <compat/arch-x86/xen-mca.h> |
1260 | | |
1261 | | # define xen_mcinfo_msr mcinfo_msr |
1262 | | CHECK_mcinfo_msr; |
1263 | | # undef xen_mcinfo_msr |
1264 | | # undef CHECK_mcinfo_msr |
1265 | | # define CHECK_mcinfo_msr struct mcinfo_msr |
1266 | | |
1267 | | # define xen_mcinfo_common mcinfo_common |
1268 | | CHECK_mcinfo_common; |
1269 | | # undef xen_mcinfo_common |
1270 | | # undef CHECK_mcinfo_common |
1271 | | # define CHECK_mcinfo_common struct mcinfo_common |
1272 | | |
1273 | | CHECK_FIELD_(struct, mc_fetch, flags); |
1274 | | CHECK_FIELD_(struct, mc_fetch, fetch_id); |
1275 | | # define CHECK_compat_mc_fetch struct mc_fetch |
1276 | | |
1277 | | CHECK_FIELD_(struct, mc_physcpuinfo, ncpus); |
1278 | | # define CHECK_compat_mc_physcpuinfo struct mc_physcpuinfo |
1279 | | |
1280 | | #define CHECK_compat_mc_inject_v2 struct mc_inject_v2 |
1281 | | CHECK_mc; |
1282 | | # undef CHECK_compat_mc_fetch |
1283 | | # undef CHECK_compat_mc_physcpuinfo |
1284 | | |
1285 | | # define xen_mc_info mc_info |
1286 | | CHECK_mc_info; |
1287 | | # undef xen_mc_info |
1288 | | |
1289 | | # define xen_mcinfo_global mcinfo_global |
1290 | | CHECK_mcinfo_global; |
1291 | | # undef xen_mcinfo_global |
1292 | | |
1293 | | # define xen_mcinfo_bank mcinfo_bank |
1294 | | CHECK_mcinfo_bank; |
1295 | | # undef xen_mcinfo_bank |
1296 | | |
1297 | | # define xen_mcinfo_extended mcinfo_extended |
1298 | | CHECK_mcinfo_extended; |
1299 | | # undef xen_mcinfo_extended |
1300 | | |
1301 | | # define xen_mcinfo_recovery mcinfo_recovery |
1302 | | # define xen_cpu_offline_action cpu_offline_action |
1303 | | # define xen_page_offline_action page_offline_action |
1304 | | CHECK_mcinfo_recovery; |
1305 | | # undef xen_cpu_offline_action |
1306 | | # undef xen_page_offline_action |
1307 | | # undef xen_mcinfo_recovery |
1308 | | |
1309 | | /* Machine Check Architecture Hypercall */ |
1310 | | long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc) |
1311 | 0 | { |
1312 | 0 | long ret = 0; |
1313 | 0 | struct xen_mc curop, *op = &curop; |
1314 | 0 | struct vcpu *v = current; |
1315 | 0 | union { |
1316 | 0 | struct xen_mc_fetch *nat; |
1317 | 0 | struct compat_mc_fetch *cmp; |
1318 | 0 | } mc_fetch; |
1319 | 0 | union { |
1320 | 0 | struct xen_mc_physcpuinfo *nat; |
1321 | 0 | struct compat_mc_physcpuinfo *cmp; |
1322 | 0 | } mc_physcpuinfo; |
1323 | 0 | uint32_t flags, cmdflags; |
1324 | 0 | int nlcpu; |
1325 | 0 | xen_mc_logical_cpu_t *log_cpus = NULL; |
1326 | 0 | mctelem_cookie_t mctc; |
1327 | 0 | mctelem_class_t which; |
1328 | 0 | unsigned int target; |
1329 | 0 | struct xen_mc_msrinject *mc_msrinject; |
1330 | 0 | struct xen_mc_mceinject *mc_mceinject; |
1331 | 0 |
|
1332 | 0 | ret = xsm_do_mca(XSM_PRIV); |
1333 | 0 | if ( ret ) |
1334 | 0 | return x86_mcerr("", ret); |
1335 | 0 |
|
1336 | 0 | if ( copy_from_guest(op, u_xen_mc, 1) ) |
1337 | 0 | return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT); |
1338 | 0 |
|
1339 | 0 | if ( op->interface_version != XEN_MCA_INTERFACE_VERSION ) |
1340 | 0 | return x86_mcerr("do_mca: interface version mismatch", -EACCES); |
1341 | 0 |
|
1342 | 0 | switch ( op->cmd ) |
1343 | 0 | { |
1344 | 0 | case XEN_MC_fetch: |
1345 | 0 | mc_fetch.nat = &op->u.mc_fetch; |
1346 | 0 | cmdflags = mc_fetch.nat->flags; |
1347 | 0 |
|
1348 | 0 | switch ( cmdflags & (XEN_MC_NONURGENT | XEN_MC_URGENT) ) |
1349 | 0 | { |
1350 | 0 | case XEN_MC_NONURGENT: |
1351 | 0 | which = MC_NONURGENT; |
1352 | 0 | break; |
1353 | 0 |
|
1354 | 0 | case XEN_MC_URGENT: |
1355 | 0 | which = MC_URGENT; |
1356 | 0 | break; |
1357 | 0 |
|
1358 | 0 | default: |
1359 | 0 | return x86_mcerr("do_mca fetch: bad cmdflags", -EINVAL); |
1360 | 0 | } |
1361 | 0 |
|
1362 | 0 | flags = XEN_MC_OK; |
1363 | 0 |
|
1364 | 0 | if ( cmdflags & XEN_MC_ACK ) |
1365 | 0 | { |
1366 | 0 | mctelem_cookie_t cookie = ID2COOKIE(mc_fetch.nat->fetch_id); |
1367 | 0 | mctelem_ack(which, cookie); |
1368 | 0 | } |
1369 | 0 | else |
1370 | 0 | { |
1371 | 0 | if ( !is_pv_32bit_vcpu(v) |
1372 | 0 | ? guest_handle_is_null(mc_fetch.nat->data) |
1373 | 0 | : compat_handle_is_null(mc_fetch.cmp->data) ) |
1374 | 0 | return x86_mcerr("do_mca fetch: guest buffer " |
1375 | 0 | "invalid", -EINVAL); |
1376 | 0 |
|
1377 | 0 | mctc = mctelem_consume_oldest_begin(which); |
1378 | 0 | if ( mctc ) |
1379 | 0 | { |
1380 | 0 | struct mc_info *mcip = mctelem_dataptr(mctc); |
1381 | 0 | if ( !is_pv_32bit_vcpu(v) |
1382 | 0 | ? copy_to_guest(mc_fetch.nat->data, mcip, 1) |
1383 | 0 | : copy_to_compat(mc_fetch.cmp->data, mcip, 1) ) |
1384 | 0 | { |
1385 | 0 | ret = -EFAULT; |
1386 | 0 | flags |= XEN_MC_FETCHFAILED; |
1387 | 0 | mc_fetch.nat->fetch_id = 0; |
1388 | 0 | } |
1389 | 0 | else |
1390 | 0 | mc_fetch.nat->fetch_id = COOKIE2ID(mctc); |
1391 | 0 | mctelem_consume_oldest_end(mctc); |
1392 | 0 | } |
1393 | 0 | else |
1394 | 0 | { |
1395 | 0 | /* There is no data */ |
1396 | 0 | flags |= XEN_MC_NODATA; |
1397 | 0 | mc_fetch.nat->fetch_id = 0; |
1398 | 0 | } |
1399 | 0 |
|
1400 | 0 | mc_fetch.nat->flags = flags; |
1401 | 0 | if (copy_to_guest(u_xen_mc, op, 1) != 0) |
1402 | 0 | ret = -EFAULT; |
1403 | 0 | } |
1404 | 0 |
|
1405 | 0 | break; |
1406 | 0 |
|
1407 | 0 | case XEN_MC_notifydomain: |
1408 | 0 | return x86_mcerr("do_mca notify unsupported", -EINVAL); |
1409 | 0 |
|
1410 | 0 | case XEN_MC_physcpuinfo: |
1411 | 0 | mc_physcpuinfo.nat = &op->u.mc_physcpuinfo; |
1412 | 0 | nlcpu = num_online_cpus(); |
1413 | 0 |
|
1414 | 0 | if ( !is_pv_32bit_vcpu(v) |
1415 | 0 | ? !guest_handle_is_null(mc_physcpuinfo.nat->info) |
1416 | 0 | : !compat_handle_is_null(mc_physcpuinfo.cmp->info) ) |
1417 | 0 | { |
1418 | 0 | if ( mc_physcpuinfo.nat->ncpus <= 0 ) |
1419 | 0 | return x86_mcerr("do_mca cpuinfo: ncpus <= 0", |
1420 | 0 | -EINVAL); |
1421 | 0 | nlcpu = min(nlcpu, (int)mc_physcpuinfo.nat->ncpus); |
1422 | 0 | log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu); |
1423 | 0 | if ( log_cpus == NULL ) |
1424 | 0 | return x86_mcerr("do_mca cpuinfo", -ENOMEM); |
1425 | 0 | on_each_cpu(do_mc_get_cpu_info, log_cpus, 1); |
1426 | 0 | if ( !is_pv_32bit_vcpu(v) |
1427 | 0 | ? copy_to_guest(mc_physcpuinfo.nat->info, log_cpus, nlcpu) |
1428 | 0 | : copy_to_compat(mc_physcpuinfo.cmp->info, log_cpus, nlcpu) ) |
1429 | 0 | ret = -EFAULT; |
1430 | 0 | xfree(log_cpus); |
1431 | 0 | } |
1432 | 0 |
|
1433 | 0 | mc_physcpuinfo.nat->ncpus = nlcpu; |
1434 | 0 |
|
1435 | 0 | if ( copy_to_guest(u_xen_mc, op, 1) ) |
1436 | 0 | return x86_mcerr("do_mca cpuinfo", -EFAULT); |
1437 | 0 |
|
1438 | 0 | break; |
1439 | 0 |
|
1440 | 0 | case XEN_MC_msrinject: |
1441 | 0 | if ( nr_mce_banks == 0 ) |
1442 | 0 | return x86_mcerr("do_mca inject", -ENODEV); |
1443 | 0 |
|
1444 | 0 | mc_msrinject = &op->u.mc_msrinject; |
1445 | 0 | target = mc_msrinject->mcinj_cpunr; |
1446 | 0 |
|
1447 | 0 | if ( target >= nr_cpu_ids ) |
1448 | 0 | return x86_mcerr("do_mca inject: bad target", -EINVAL); |
1449 | 0 |
|
1450 | 0 | if ( !cpu_online(target) ) |
1451 | 0 | return x86_mcerr("do_mca inject: target offline", |
1452 | 0 | -EINVAL); |
1453 | 0 |
|
1454 | 0 | if ( mc_msrinject->mcinj_count == 0 ) |
1455 | 0 | return 0; |
1456 | 0 |
|
1457 | 0 | if ( mc_msrinject->mcinj_flags & MC_MSRINJ_F_GPADDR ) |
1458 | 0 | { |
1459 | 0 | domid_t domid; |
1460 | 0 | struct domain *d; |
1461 | 0 | struct mcinfo_msr *msr; |
1462 | 0 | unsigned int i; |
1463 | 0 | paddr_t gaddr; |
1464 | 0 | unsigned long gfn, mfn; |
1465 | 0 | p2m_type_t t; |
1466 | 0 |
|
1467 | 0 | domid = (mc_msrinject->mcinj_domid == DOMID_SELF) ? |
1468 | 0 | current->domain->domain_id : mc_msrinject->mcinj_domid; |
1469 | 0 | if ( domid >= DOMID_FIRST_RESERVED ) |
1470 | 0 | return x86_mcerr("do_mca inject: incompatible flag " |
1471 | 0 | "MC_MSRINJ_F_GPADDR with domain %d", |
1472 | 0 | -EINVAL, domid); |
1473 | 0 |
|
1474 | 0 | d = get_domain_by_id(domid); |
1475 | 0 | if ( d == NULL ) |
1476 | 0 | return x86_mcerr("do_mca inject: bad domain id %d", |
1477 | 0 | -EINVAL, domid); |
1478 | 0 |
|
1479 | 0 | for ( i = 0, msr = &mc_msrinject->mcinj_msr[0]; |
1480 | 0 | i < mc_msrinject->mcinj_count; |
1481 | 0 | i++, msr++ ) |
1482 | 0 | { |
1483 | 0 | gaddr = msr->value; |
1484 | 0 | gfn = PFN_DOWN(gaddr); |
1485 | 0 | mfn = mfn_x(get_gfn(d, gfn, &t)); |
1486 | 0 |
|
1487 | 0 | if ( mfn == mfn_x(INVALID_MFN) ) |
1488 | 0 | { |
1489 | 0 | put_gfn(d, gfn); |
1490 | 0 | put_domain(d); |
1491 | 0 | return x86_mcerr("do_mca inject: bad gfn %#lx of domain %d", |
1492 | 0 | -EINVAL, gfn, domid); |
1493 | 0 | } |
1494 | 0 |
|
1495 | 0 | msr->value = pfn_to_paddr(mfn) | (gaddr & (PAGE_SIZE - 1)); |
1496 | 0 |
|
1497 | 0 | put_gfn(d, gfn); |
1498 | 0 | } |
1499 | 0 |
|
1500 | 0 | put_domain(d); |
1501 | 0 | } |
1502 | 0 |
|
1503 | 0 | if ( !x86_mc_msrinject_verify(mc_msrinject) ) |
1504 | 0 | return x86_mcerr("do_mca inject: illegal MSR", -EINVAL); |
1505 | 0 |
|
1506 | 0 | add_taint(TAINT_ERROR_INJECT); |
1507 | 0 |
|
1508 | 0 | on_selected_cpus(cpumask_of(target), x86_mc_msrinject, |
1509 | 0 | mc_msrinject, 1); |
1510 | 0 |
|
1511 | 0 | break; |
1512 | 0 |
|
1513 | 0 | case XEN_MC_mceinject: |
1514 | 0 | if ( nr_mce_banks == 0 ) |
1515 | 0 | return x86_mcerr("do_mca #MC", -ENODEV); |
1516 | 0 |
|
1517 | 0 | mc_mceinject = &op->u.mc_mceinject; |
1518 | 0 | target = mc_mceinject->mceinj_cpunr; |
1519 | 0 |
|
1520 | 0 | if ( target >= nr_cpu_ids ) |
1521 | 0 | return x86_mcerr("do_mca #MC: bad target", -EINVAL); |
1522 | 0 |
|
1523 | 0 | if ( !cpu_online(target) ) |
1524 | 0 | return x86_mcerr("do_mca #MC: target offline", -EINVAL); |
1525 | 0 |
|
1526 | 0 | add_taint(TAINT_ERROR_INJECT); |
1527 | 0 |
|
1528 | 0 | if ( mce_broadcast ) |
1529 | 0 | on_each_cpu(x86_mc_mceinject, mc_mceinject, 1); |
1530 | 0 | else |
1531 | 0 | on_selected_cpus(cpumask_of(target), x86_mc_mceinject, |
1532 | 0 | mc_mceinject, 1); |
1533 | 0 | break; |
1534 | 0 |
|
1535 | 0 | case XEN_MC_inject_v2: |
1536 | 0 | { |
1537 | 0 | const cpumask_t *cpumap; |
1538 | 0 | cpumask_var_t cmv; |
1539 | 0 | bool broadcast = op->u.mc_inject_v2.flags & XEN_MC_INJECT_CPU_BROADCAST; |
1540 | 0 |
|
1541 | 0 | if ( nr_mce_banks == 0 ) |
1542 | 0 | return x86_mcerr("do_mca #MC", -ENODEV); |
1543 | 0 |
|
1544 | 0 | if ( broadcast ) |
1545 | 0 | cpumap = &cpu_online_map; |
1546 | 0 | else |
1547 | 0 | { |
1548 | 0 | ret = xenctl_bitmap_to_cpumask(&cmv, &op->u.mc_inject_v2.cpumap); |
1549 | 0 | if ( ret ) |
1550 | 0 | break; |
1551 | 0 | cpumap = cmv; |
1552 | 0 | if ( !cpumask_intersects(cpumap, &cpu_online_map) ) |
1553 | 0 | { |
1554 | 0 | free_cpumask_var(cmv); |
1555 | 0 | ret = x86_mcerr("No online CPU passed\n", -EINVAL); |
1556 | 0 | break; |
1557 | 0 | } |
1558 | 0 | if ( !cpumask_subset(cpumap, &cpu_online_map) ) |
1559 | 0 | dprintk(XENLOG_INFO, |
1560 | 0 | "Not all required CPUs are online\n"); |
1561 | 0 | } |
1562 | 0 |
|
1563 | 0 | switch ( op->u.mc_inject_v2.flags & XEN_MC_INJECT_TYPE_MASK ) |
1564 | 0 | { |
1565 | 0 | case XEN_MC_INJECT_TYPE_MCE: |
1566 | 0 | if ( mce_broadcast && |
1567 | 0 | !cpumask_equal(cpumap, &cpu_online_map) ) |
1568 | 0 | printk("Not trigger MCE on all CPUs, may HANG!\n"); |
1569 | 0 | on_selected_cpus(cpumap, x86_mc_mceinject, NULL, 1); |
1570 | 0 | break; |
1571 | 0 |
|
1572 | 0 | case XEN_MC_INJECT_TYPE_CMCI: |
1573 | 0 | if ( !cmci_apic_vector ) |
1574 | 0 | ret = x86_mcerr("No CMCI supported in platform\n", -EINVAL); |
1575 | 0 | else |
1576 | 0 | { |
1577 | 0 | if ( cpumask_test_cpu(smp_processor_id(), cpumap) ) |
1578 | 0 | send_IPI_self(cmci_apic_vector); |
1579 | 0 | send_IPI_mask(cpumap, cmci_apic_vector); |
1580 | 0 | } |
1581 | 0 | break; |
1582 | 0 |
|
1583 | 0 | case XEN_MC_INJECT_TYPE_LMCE: |
1584 | 0 | if ( !lmce_support ) |
1585 | 0 | { |
1586 | 0 | ret = x86_mcerr("No LMCE support", -EINVAL); |
1587 | 0 | break; |
1588 | 0 | } |
1589 | 0 | if ( broadcast ) |
1590 | 0 | { |
1591 | 0 | ret = x86_mcerr("Broadcast cannot be used with LMCE", -EINVAL); |
1592 | 0 | break; |
1593 | 0 | } |
1594 | 0 | /* Ensure at most one CPU is specified. */ |
1595 | 0 | if ( nr_cpu_ids > cpumask_next(cpumask_first(cpumap), cpumap) ) |
1596 | 0 | { |
1597 | 0 | ret = x86_mcerr("More than one CPU specified for LMCE", |
1598 | 0 | -EINVAL); |
1599 | 0 | break; |
1600 | 0 | } |
1601 | 0 | on_selected_cpus(cpumap, x86_mc_mceinject, NULL, 1); |
1602 | 0 | break; |
1603 | 0 |
|
1604 | 0 | default: |
1605 | 0 | ret = x86_mcerr("Wrong mca type\n", -EINVAL); |
1606 | 0 | break; |
1607 | 0 | } |
1608 | 0 |
|
1609 | 0 | if ( cpumap != &cpu_online_map ) |
1610 | 0 | free_cpumask_var(cmv); |
1611 | 0 |
|
1612 | 0 | break; |
1613 | 0 | } |
1614 | 0 |
|
1615 | 0 | default: |
1616 | 0 | return x86_mcerr("do_mca: bad command", -EINVAL); |
1617 | 0 | } |
1618 | 0 |
|
1619 | 0 | return ret; |
1620 | 0 | } |
1621 | | |
1622 | | int mcinfo_dumpped; |
1623 | | static int x86_mcinfo_dump_panic(mctelem_cookie_t mctc) |
1624 | 0 | { |
1625 | 0 | struct mc_info *mcip = mctelem_dataptr(mctc); |
1626 | 0 |
|
1627 | 0 | x86_mcinfo_dump(mcip); |
1628 | 0 | mcinfo_dumpped++; |
1629 | 0 |
|
1630 | 0 | return 0; |
1631 | 0 | } |
1632 | | |
1633 | | /* XXX shall we dump commited mc_info?? */ |
1634 | | static void mc_panic_dump(void) |
1635 | 0 | { |
1636 | 0 | int cpu; |
1637 | 0 |
|
1638 | 0 | dprintk(XENLOG_ERR, "Begin dump mc_info\n"); |
1639 | 0 | for_each_online_cpu(cpu) |
1640 | 0 | mctelem_process_deferred(cpu, x86_mcinfo_dump_panic, |
1641 | 0 | mctelem_has_deferred_lmce(cpu)); |
1642 | 0 | dprintk(XENLOG_ERR, "End dump mc_info, %x mcinfo dumped\n", mcinfo_dumpped); |
1643 | 0 | } |
1644 | | |
1645 | | void mc_panic(char *s) |
1646 | 0 | { |
1647 | 0 | is_mc_panic = true; |
1648 | 0 | console_force_unlock(); |
1649 | 0 |
|
1650 | 0 | printk("Fatal machine check: %s\n", s); |
1651 | 0 | printk("\n" |
1652 | 0 | "****************************************\n" |
1653 | 0 | "\n" |
1654 | 0 | " The processor has reported a hardware error which cannot\n" |
1655 | 0 | " be recovered from. Xen will now reboot the machine.\n"); |
1656 | 0 | mc_panic_dump(); |
1657 | 0 | panic("HARDWARE ERROR"); |
1658 | 0 | } |
1659 | | |
1660 | | /* |
1661 | | * Machine Check owner judge algorithm: |
1662 | | * When error happens, all cpus serially read its msr banks. |
1663 | | * The first CPU who fetches the error bank's info will clear |
1664 | | * this bank. Later readers can't get any information again. |
1665 | | * The first CPU is the actual mce_owner |
1666 | | * |
1667 | | * For Fatal (pcc=1) error, it might cause machine crash |
1668 | | * before we're able to log. For avoiding log missing, we adopt two |
1669 | | * round scanning: |
1670 | | * Round1: simply scan. If found pcc = 1 or ripv = 0, simply reset. |
1671 | | * All MCE banks are sticky, when boot up, MCE polling mechanism |
1672 | | * will help to collect and log those MCE errors. |
1673 | | * Round2: Do all MCE processing logic as normal. |
1674 | | */ |
1675 | | |
1676 | | /* Maybe called in MCE context, no lock, no printk */ |
1677 | | static enum mce_result mce_action(const struct cpu_user_regs *regs, |
1678 | | mctelem_cookie_t mctc) |
1679 | 0 | { |
1680 | 0 | struct mc_info *local_mi; |
1681 | 0 | enum mce_result bank_result = MCER_NOERROR; |
1682 | 0 | enum mce_result worst_result = MCER_NOERROR; |
1683 | 0 | struct mcinfo_common *mic = NULL; |
1684 | 0 | struct mca_binfo binfo; |
1685 | 0 | const struct mca_error_handler *handlers = mce_dhandlers; |
1686 | 0 | unsigned int i, handler_num = mce_dhandler_num; |
1687 | 0 |
|
1688 | 0 | /* When in mce context, regs is valid */ |
1689 | 0 | if ( regs ) |
1690 | 0 | { |
1691 | 0 | handler_num = mce_uhandler_num; |
1692 | 0 | handlers = mce_uhandlers; |
1693 | 0 | } |
1694 | 0 |
|
1695 | 0 | local_mi = (struct mc_info *)mctelem_dataptr(mctc); |
1696 | 0 | x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL); |
1697 | 0 | if ( mic == NULL ) |
1698 | 0 | { |
1699 | 0 | printk(KERN_ERR "MCE: get local buffer entry failed\n "); |
1700 | 0 | return MCER_CONTINUE; |
1701 | 0 | } |
1702 | 0 |
|
1703 | 0 | memset(&binfo, 0, sizeof(binfo)); |
1704 | 0 | binfo.mig = (struct mcinfo_global *)mic; |
1705 | 0 | binfo.mi = local_mi; |
1706 | 0 |
|
1707 | 0 | /* Processing bank information */ |
1708 | 0 | x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); |
1709 | 0 |
|
1710 | 0 | for ( ; bank_result != MCER_RESET && mic && mic->size; |
1711 | 0 | mic = x86_mcinfo_next(mic) ) |
1712 | 0 | { |
1713 | 0 | if ( mic->type != MC_TYPE_BANK ) |
1714 | 0 | { |
1715 | 0 | continue; |
1716 | 0 | } |
1717 | 0 | binfo.mib = (struct mcinfo_bank *)mic; |
1718 | 0 | binfo.bank = binfo.mib->mc_bank; |
1719 | 0 | bank_result = MCER_NOERROR; |
1720 | 0 | for ( i = 0; i < handler_num; i++ ) |
1721 | 0 | { |
1722 | 0 | if ( handlers[i].owned_error(binfo.mib->mc_status) ) |
1723 | 0 | { |
1724 | 0 | handlers[i].recovery_handler(&binfo, &bank_result, regs); |
1725 | 0 | if ( worst_result < bank_result ) |
1726 | 0 | worst_result = bank_result; |
1727 | 0 | break; |
1728 | 0 | } |
1729 | 0 | } |
1730 | 0 | } |
1731 | 0 |
|
1732 | 0 | return worst_result; |
1733 | 0 | } |
1734 | | |
1735 | | /* |
1736 | | * Called from mctelem_process_deferred. Return 1 if the telemetry |
1737 | | * should be committed for dom0 consumption, 0 if it should be |
1738 | | * dismissed. |
1739 | | */ |
1740 | | static int mce_delayed_action(mctelem_cookie_t mctc) |
1741 | 0 | { |
1742 | 0 | enum mce_result result; |
1743 | 0 | int ret = 0; |
1744 | 0 |
|
1745 | 0 | result = mce_action(NULL, mctc); |
1746 | 0 |
|
1747 | 0 | switch ( result ) |
1748 | 0 | { |
1749 | 0 | case MCER_RESET: |
1750 | 0 | dprintk(XENLOG_ERR, "MCE delayed action failed\n"); |
1751 | 0 | is_mc_panic = true; |
1752 | 0 | x86_mcinfo_dump(mctelem_dataptr(mctc)); |
1753 | 0 | panic("MCE: Software recovery failed for the UCR"); |
1754 | 0 | break; |
1755 | 0 |
|
1756 | 0 | case MCER_RECOVERED: |
1757 | 0 | dprintk(XENLOG_INFO, "MCE: Error is successfully recovered\n"); |
1758 | 0 | ret = 1; |
1759 | 0 | break; |
1760 | 0 |
|
1761 | 0 | case MCER_CONTINUE: |
1762 | 0 | dprintk(XENLOG_INFO, "MCE: Error can't be recovered, " |
1763 | 0 | "system is tainted\n"); |
1764 | 0 | x86_mcinfo_dump(mctelem_dataptr(mctc)); |
1765 | 0 | ret = 1; |
1766 | 0 | break; |
1767 | 0 |
|
1768 | 0 | default: |
1769 | 0 | ret = 0; |
1770 | 0 | break; |
1771 | 0 | } |
1772 | 0 | return ret; |
1773 | 0 | } |
1774 | | |
1775 | | /* Softirq Handler for this MCE# processing */ |
1776 | | static void mce_softirq(void) |
1777 | 0 | { |
1778 | 0 | static DEFINE_MCE_BARRIER(mce_inside_bar); |
1779 | 0 | static DEFINE_MCE_BARRIER(mce_severity_bar); |
1780 | 0 | static atomic_t severity_cpu; |
1781 | 0 | int cpu = smp_processor_id(); |
1782 | 0 | unsigned int workcpu; |
1783 | 0 | bool lmce = mctelem_has_deferred_lmce(cpu); |
1784 | 0 | bool bcast = mce_broadcast && !lmce; |
1785 | 0 |
|
1786 | 0 | mce_printk(MCE_VERBOSE, "CPU%d enter softirq\n", cpu); |
1787 | 0 |
|
1788 | 0 | mce_barrier_enter(&mce_inside_bar, bcast); |
1789 | 0 |
|
1790 | 0 | if ( !lmce ) |
1791 | 0 | { |
1792 | 0 | /* |
1793 | 0 | * Everybody is here. Now let's see who gets to do the |
1794 | 0 | * recovery work. Right now we just see if there's a CPU |
1795 | 0 | * that did not have any problems, and pick that one. |
1796 | 0 | * |
1797 | 0 | * First, just set a default value: the last CPU who reaches this |
1798 | 0 | * will overwrite the value and become the default. |
1799 | 0 | */ |
1800 | 0 |
|
1801 | 0 | atomic_set(&severity_cpu, cpu); |
1802 | 0 |
|
1803 | 0 | mce_barrier_enter(&mce_severity_bar, bcast); |
1804 | 0 | if ( !mctelem_has_deferred(cpu) ) |
1805 | 0 | atomic_set(&severity_cpu, cpu); |
1806 | 0 | mce_barrier_exit(&mce_severity_bar, bcast); |
1807 | 0 | } |
1808 | 0 |
|
1809 | 0 | /* We choose severity_cpu for further processing */ |
1810 | 0 | if ( lmce || atomic_read(&severity_cpu) == cpu ) |
1811 | 0 | { |
1812 | 0 |
|
1813 | 0 | mce_printk(MCE_VERBOSE, "CPU%d handling errors\n", cpu); |
1814 | 0 |
|
1815 | 0 | /* |
1816 | 0 | * Step1: Fill DOM0 LOG buffer, vMCE injection buffer and |
1817 | 0 | * vMCE MSRs virtualization buffer |
1818 | 0 | */ |
1819 | 0 |
|
1820 | 0 | if ( lmce ) |
1821 | 0 | mctelem_process_deferred(cpu, mce_delayed_action, true); |
1822 | 0 | else |
1823 | 0 | for_each_online_cpu(workcpu) |
1824 | 0 | mctelem_process_deferred(workcpu, mce_delayed_action, false); |
1825 | 0 |
|
1826 | 0 | /* Step2: Send Log to DOM0 through vIRQ */ |
1827 | 0 | if ( dom0_vmce_enabled() ) |
1828 | 0 | { |
1829 | 0 | mce_printk(MCE_VERBOSE, "MCE: send MCE# to DOM0 through virq\n"); |
1830 | 0 | send_global_virq(VIRQ_MCA); |
1831 | 0 | } |
1832 | 0 | } |
1833 | 0 |
|
1834 | 0 | mce_barrier_exit(&mce_inside_bar, bcast); |
1835 | 0 | } |
1836 | | |
1837 | | /* |
1838 | | * Machine Check owner judge algorithm: |
1839 | | * When error happens, all cpus serially read its msr banks. |
1840 | | * The first CPU who fetches the error bank's info will clear |
1841 | | * this bank. Later readers can't get any infor again. |
1842 | | * The first CPU is the actual mce_owner |
1843 | | * |
1844 | | * For Fatal (pcc=1) error, it might cause machine crash |
1845 | | * before we're able to log. For avoiding log missing, we adopt two |
1846 | | * round scanning: |
1847 | | * Round1: simply scan. If found pcc = 1 or ripv = 0, simply reset. |
1848 | | * All MCE banks are sticky, when boot up, MCE polling mechanism |
1849 | | * will help to collect and log those MCE errors. |
1850 | | * Round2: Do all MCE processing logic as normal. |
1851 | | */ |
1852 | | void mce_handler_init(void) |
1853 | 12 | { |
1854 | 12 | if ( smp_processor_id() != 0 ) |
1855 | 11 | return; |
1856 | 12 | |
1857 | 12 | /* callback register, do we really need so many callback? */ |
1858 | 12 | /* mce handler data initialization */ |
1859 | 1 | spin_lock_init(&mce_logout_lock); |
1860 | 1 | open_softirq(MACHINE_CHECK_SOFTIRQ, mce_softirq); |
1861 | 1 | } |