debuggers.hg

view xen/arch/x86/cpu/mcheck/mce.c @ 22906:700ac6445812

Now add KDB to the non-kdb tree
author Mukesh Rathor
date Thu Feb 03 15:42:41 2011 -0800 (2011-02-03)
parents e8acb9753ff1
children
line source
1 /*
2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
4 */
6 #include <xen/init.h>
7 #include <xen/types.h>
8 #include <xen/kernel.h>
9 #include <xen/config.h>
10 #include <xen/smp.h>
11 #include <xen/errno.h>
12 #include <xen/console.h>
13 #include <xen/sched.h>
14 #include <xen/sched-if.h>
15 #include <xen/cpumask.h>
16 #include <xen/event.h>
17 #include <xen/guest_access.h>
18 #include <xen/hypercall.h> /* for do_mca */
20 #include <asm/processor.h>
21 #include <asm/system.h>
22 #include <asm/msr.h>
24 #include "mce.h"
26 bool_t __read_mostly mce_disabled;
27 invbool_param("mce", mce_disabled);
28 bool_t __read_mostly mce_broadcast = 0;
29 bool_t is_mc_panic;
30 unsigned int __read_mostly nr_mce_banks;
31 int __read_mostly firstbank;
33 static void intpose_init(void);
34 static void mcinfo_clear(struct mc_info *);
35 struct mca_banks *mca_allbanks;
37 #define SEG_PL(segsel) ((segsel) & 0x3)
38 #define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16)
40 #if 0
41 static int x86_mcerr(const char *msg, int err)
42 {
43 gdprintk(XENLOG_WARNING, "x86_mcerr: %s, returning %d\n",
44 msg != NULL ? msg : "", err);
45 return err;
46 }
47 #else
48 #define x86_mcerr(msg, err) (err)
49 #endif
51 int mce_verbosity;
52 static void __init mce_set_verbosity(char *str)
53 {
54 if (strcmp("verbose", str) == 0)
55 mce_verbosity = MCE_VERBOSE;
56 else
57 printk(KERN_DEBUG "Machine Check verbosity level %s not recognised"
58 "use mce_verbosity=verbose", str);
59 }
60 custom_param("mce_verbosity", mce_set_verbosity);
62 /* Handle unconfigured int18 (should never happen) */
63 static void unexpected_machine_check(struct cpu_user_regs *regs, long error_code)
64 {
65 printk(XENLOG_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
66 smp_processor_id());
67 }
70 static x86_mce_vector_t _machine_check_vector = unexpected_machine_check;
72 void x86_mce_vector_register(x86_mce_vector_t hdlr)
73 {
74 _machine_check_vector = hdlr;
75 wmb();
76 }
78 /* Call the installed machine check handler for this CPU setup. */
80 void machine_check_vector(struct cpu_user_regs *regs, long error_code)
81 {
82 _machine_check_vector(regs, error_code);
83 }
85 /* Init machine check callback handler
86 * It is used to collect additional information provided by newer
87 * CPU families/models without the need to duplicate the whole handler.
88 * This avoids having many handlers doing almost nearly the same and each
89 * with its own tweaks ands bugs. */
90 static x86_mce_callback_t mc_callback_bank_extended = NULL;
92 void x86_mce_callback_register(x86_mce_callback_t cbfunc)
93 {
94 mc_callback_bank_extended = cbfunc;
95 }
97 /* Machine check recoverable judgement callback handler
98 * It is used to judge whether an UC error is recoverable by software
99 */
100 static mce_recoverable_t mc_recoverable_scan = NULL;
102 void mce_recoverable_register(mce_recoverable_t cbfunc)
103 {
104 mc_recoverable_scan = cbfunc;
105 }
107 struct mca_banks *mcabanks_alloc(void)
108 {
109 struct mca_banks *mb;
111 mb = xmalloc(struct mca_banks);
112 if (!mb)
113 return NULL;
115 mb->bank_map = xmalloc_array(unsigned long,
116 BITS_TO_LONGS(nr_mce_banks));
117 if (!mb->bank_map)
118 {
119 xfree(mb);
120 return NULL;
121 }
123 mb->num = nr_mce_banks;
124 memset(mb->bank_map, 0, sizeof(long) * BITS_TO_LONGS(nr_mce_banks));
126 return mb;
127 }
129 void mcabanks_free(struct mca_banks *banks)
130 {
131 if (banks == NULL)
132 return;
133 if (banks->bank_map)
134 xfree(banks->bank_map);
135 xfree(banks);
136 }
137 /* Judging whether to Clear Machine Check error bank callback handler
138 * According to Intel latest MCA OS Recovery Writer's Guide,
139 * whether the error MCA bank needs to be cleared is decided by the mca_source
140 * and MCi_status bit value.
141 */
142 static mce_need_clearbank_t mc_need_clearbank_scan = NULL;
144 void mce_need_clearbank_register(mce_need_clearbank_t cbfunc)
145 {
146 mc_need_clearbank_scan = cbfunc;
147 }
149 static struct mcinfo_bank *mca_init_bank(enum mca_source who,
150 struct mc_info *mi, int bank)
151 {
152 struct mcinfo_bank *mib;
153 uint64_t addr=0, misc = 0;
155 if (!mi)
156 return NULL;
158 mib = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_bank));
159 if (!mib)
160 {
161 mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
162 return NULL;
163 }
165 memset(mib, 0, sizeof (struct mcinfo_bank));
166 mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank));
168 mib->common.type = MC_TYPE_BANK;
169 mib->common.size = sizeof (struct mcinfo_bank);
170 mib->mc_bank = bank;
172 addr = misc = 0;
173 if (mib->mc_status & MCi_STATUS_MISCV)
174 mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank));
176 if (mib->mc_status & MCi_STATUS_ADDRV)
177 {
178 mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank));
180 if (mfn_valid(paddr_to_pfn(mib->mc_addr))) {
181 struct domain *d;
183 d = maddr_get_owner(mib->mc_addr);
184 if (d != NULL && (who == MCA_POLLER ||
185 who == MCA_CMCI_HANDLER))
186 mib->mc_domid = d->domain_id;
187 }
188 }
190 if (who == MCA_CMCI_HANDLER) {
191 mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
192 rdtscll(mib->mc_tsc);
193 }
195 return mib;
196 }
198 static int mca_init_global(uint32_t flags, struct mcinfo_global *mig)
199 {
200 uint64_t status;
201 int cpu_nr;
202 struct vcpu *v = current;
203 struct domain *d;
205 /* Set global information */
206 memset(mig, 0, sizeof (struct mcinfo_global));
207 mig->common.type = MC_TYPE_GLOBAL;
208 mig->common.size = sizeof (struct mcinfo_global);
209 status = mca_rdmsr(MSR_IA32_MCG_STATUS);
210 mig->mc_gstatus = status;
211 mig->mc_domid = mig->mc_vcpuid = -1;
212 mig->mc_flags = flags;
213 cpu_nr = smp_processor_id();
214 /* Retrieve detector information */
215 x86_mc_get_cpu_info(cpu_nr, &mig->mc_socketid,
216 &mig->mc_coreid, &mig->mc_core_threadid,
217 &mig->mc_apicid, NULL, NULL, NULL);
219 /* This is really meaningless */
220 if (v != NULL && ((d = v->domain) != NULL)) {
221 mig->mc_domid = d->domain_id;
222 mig->mc_vcpuid = v->vcpu_id;
223 } else {
224 mig->mc_domid = -1;
225 mig->mc_vcpuid = -1;
226 }
228 return 0;
229 }
231 /* Utility function to perform MCA bank telemetry readout and to push that
232 * telemetry towards an interested dom0 for logging and diagnosis.
233 * The caller - #MC handler or MCA poll function - must arrange that we
234 * do not migrate cpus. */
236 /* XXFM Could add overflow counting? */
238 /* Add out_param clear_bank for Machine Check Handler Caller.
239 * For Intel latest CPU, whether to clear the error bank status needs to
240 * be judged by the callback function defined above.
241 */
242 mctelem_cookie_t mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask,
243 struct mca_summary *sp, struct mca_banks* clear_bank)
244 {
245 uint64_t gstatus, status;
246 struct mcinfo_global *mig = NULL; /* on stack */
247 mctelem_cookie_t mctc = NULL;
248 uint32_t uc = 0, pcc = 0, recover, need_clear = 1, mc_flags = 0;
249 struct mc_info *mci = NULL;
250 mctelem_class_t which = MC_URGENT; /* XXXgcc */
251 int errcnt = 0;
252 int i;
254 gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
255 switch (who) {
256 case MCA_MCE_HANDLER:
257 case MCA_MCE_SCAN:
258 mc_flags = MC_FLAG_MCE;
259 which = MC_URGENT;
260 break;
262 case MCA_POLLER:
263 case MCA_RESET:
264 mc_flags = MC_FLAG_POLLED;
265 which = MC_NONURGENT;
266 break;
268 case MCA_CMCI_HANDLER:
269 mc_flags = MC_FLAG_CMCI;
270 which = MC_NONURGENT;
271 break;
273 default:
274 BUG();
275 }
277 /* If no mc_recovery_scan callback handler registered,
278 * this error is not recoverable
279 */
280 recover = (mc_recoverable_scan)? 1: 0;
282 for (i = 0; i < 32 && i < nr_mce_banks; i++) {
283 struct mcinfo_bank *mib; /* on stack */
285 /* Skip bank if corresponding bit in bankmask is clear */
286 if (!mcabanks_test(i, bankmask))
287 continue;
289 status = mca_rdmsr(MSR_IA32_MCx_STATUS(i));
290 if (!(status & MCi_STATUS_VAL))
291 continue; /* this bank has no valid telemetry */
293 /* For Intel Latest CPU CMCI/MCE Handler caller, we need to
294 * decide whether to clear bank by MCi_STATUS bit value such as
295 * OVER/UC/EN/PCC/S/AR
296 */
297 if ( mc_need_clearbank_scan )
298 need_clear = mc_need_clearbank_scan(who, status);
300 /* If this is the first bank with valid MCA DATA, then
301 * try to reserve an entry from the urgent/nonurgent queue
302 * depending on whethere we are called from an exception or
303 * a poller; this can fail (for example dom0 may not
304 * yet have consumed past telemetry). */
305 if (errcnt++ == 0) {
306 if ( (mctc = mctelem_reserve(which)) != NULL ) {
307 mci = mctelem_dataptr(mctc);
308 mcinfo_clear(mci);
309 mig = (struct mcinfo_global*)x86_mcinfo_reserve
310 (mci, sizeof(struct mcinfo_global));
311 /* mc_info should at least hold up the global information */
312 ASSERT(mig);
313 mca_init_global(mc_flags, mig);
314 /* A hook here to get global extended msrs */
315 {
316 struct mcinfo_extended *intel_get_extended_msrs(
317 struct mcinfo_global *mig, struct mc_info *mi);
319 if (boot_cpu_data.x86_vendor ==
320 X86_VENDOR_INTEL)
321 intel_get_extended_msrs(mig, mci);
322 }
323 }
324 }
326 /* form a mask of which banks have logged uncorrected errors */
327 if ((status & MCi_STATUS_UC) != 0)
328 uc |= (1 << i);
330 /* likewise for those with processor context corrupt */
331 if ((status & MCi_STATUS_PCC) != 0)
332 pcc |= (1 << i);
334 if (recover && uc)
335 /* uc = 1, recover = 1, we need not panic.
336 */
337 recover = mc_recoverable_scan(status);
339 mib = mca_init_bank(who, mci, i);
341 if (mc_callback_bank_extended)
342 mc_callback_bank_extended(mci, i, status);
344 /* By default, need_clear = 1 */
345 if (who != MCA_MCE_SCAN && need_clear)
346 /* Clear status */
347 mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
348 else if ( who == MCA_MCE_SCAN && need_clear)
349 mcabanks_set(i, clear_bank);
351 wmb();
352 }
354 if (mig && errcnt > 0) {
355 if (pcc)
356 mig->mc_flags |= MC_FLAG_UNCORRECTABLE;
357 else if (uc)
358 mig->mc_flags |= MC_FLAG_RECOVERABLE;
359 else
360 mig->mc_flags |= MC_FLAG_CORRECTABLE;
361 }
364 if (sp) {
365 sp->errcnt = errcnt;
366 sp->ripv = (gstatus & MCG_STATUS_RIPV) != 0;
367 sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0;
368 sp->uc = uc;
369 sp->pcc = pcc;
370 sp->recoverable = recover;
371 }
373 return mci != NULL ? mctc : NULL; /* may be NULL */
374 }
376 #define DOM_NORMAL 0
377 #define DOM0_TRAP 1
378 #define DOMU_TRAP 2
379 #define DOMU_KILLED 4
381 /* Shared #MC handler. */
382 void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code,
383 struct mca_banks *bankmask)
384 {
385 int xen_state_lost, dom0_state_lost, domU_state_lost;
386 struct vcpu *v = current;
387 struct domain *curdom = v->domain;
388 domid_t domid = curdom->domain_id;
389 int ctx_xen, ctx_dom0, ctx_domU;
390 uint32_t dom_state = DOM_NORMAL;
391 mctelem_cookie_t mctc = NULL;
392 struct mca_summary bs;
393 struct mc_info *mci = NULL;
394 int irqlocked = 0;
395 uint64_t gstatus;
396 int ripv;
398 /* This handler runs as interrupt gate. So IPIs from the
399 * polling service routine are defered until we're finished.
400 */
402 /* Disable interrupts for the _vcpu_. It may not re-scheduled to
403 * another physical CPU. */
404 vcpu_schedule_lock_irq(v);
405 irqlocked = 1;
407 /* Read global status; if it does not indicate machine check
408 * in progress then bail as long as we have a valid ip to return to. */
409 gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
410 ripv = ((gstatus & MCG_STATUS_RIPV) != 0);
411 if (!(gstatus & MCG_STATUS_MCIP) && ripv) {
412 add_taint(TAINT_MACHINE_CHECK); /* questionable */
413 vcpu_schedule_unlock_irq(v);
414 irqlocked = 0;
415 goto cmn_handler_done;
416 }
418 /* Go and grab error telemetry. We must choose whether to commit
419 * for logging or dismiss the cookie that is returned, and must not
420 * reference the cookie after that action.
421 */
422 mctc = mcheck_mca_logout(MCA_MCE_HANDLER, bankmask, &bs, NULL);
423 if (mctc != NULL)
424 mci = (struct mc_info *)mctelem_dataptr(mctc);
426 /* Clear MCIP or another #MC will enter shutdown state */
427 gstatus &= ~MCG_STATUS_MCIP;
428 mca_wrmsr(MSR_IA32_MCG_STATUS, gstatus);
429 wmb();
431 /* If no valid errors and our stack is intact, we're done */
432 if (ripv && bs.errcnt == 0) {
433 vcpu_schedule_unlock_irq(v);
434 irqlocked = 0;
435 goto cmn_handler_done;
436 }
438 if (bs.uc || bs.pcc)
439 add_taint(TAINT_MACHINE_CHECK);
441 /* Machine check exceptions will usually be for UC and/or PCC errors,
442 * but it is possible to configure machine check for some classes
443 * of corrected error.
444 *
445 * UC errors could compromise any domain or the hypervisor
446 * itself - for example a cache writeback of modified data that
447 * turned out to be bad could be for data belonging to anyone, not
448 * just the current domain. In the absence of known data poisoning
449 * to prevent consumption of such bad data in the system we regard
450 * all UC errors as terminal. It may be possible to attempt some
451 * heuristics based on the address affected, which guests have
452 * mappings to that mfn etc.
453 *
454 * PCC errors apply to the current context.
455 *
456 * If MCG_STATUS indicates !RIPV then even a #MC that is not UC
457 * and not PCC is terminal - the return instruction pointer
458 * pushed onto the stack is bogus. If the interrupt context is
459 * the hypervisor or dom0 the game is over, otherwise we can
460 * limit the impact to a single domU but only if we trampoline
461 * somewhere safely - we can't return and unwind the stack.
462 * Since there is no trampoline in place we will treat !RIPV
463 * as terminal for any context.
464 */
465 ctx_xen = SEG_PL(regs->cs) == 0;
466 ctx_dom0 = !ctx_xen && (domid == 0);
467 ctx_domU = !ctx_xen && !ctx_dom0;
469 xen_state_lost = bs.uc != 0 || (ctx_xen && (bs.pcc || !ripv)) ||
470 !ripv;
471 dom0_state_lost = bs.uc != 0 || (ctx_dom0 && (bs.pcc || !ripv));
472 domU_state_lost = bs.uc != 0 || (ctx_domU && (bs.pcc || !ripv));
474 if (xen_state_lost) {
475 /* Now we are going to panic anyway. Allow interrupts, so that
476 * printk on serial console can work. */
477 vcpu_schedule_unlock_irq(v);
478 irqlocked = 0;
480 printk("Terminal machine check exception occurred in "
481 "hypervisor context.\n");
483 /* If MCG_STATUS_EIPV indicates, the IP on the stack is related
484 * to the error then it makes sense to print a stack trace.
485 * That can be useful for more detailed error analysis and/or
486 * error case studies to figure out, if we can clear
487 * xen_impacted and kill a DomU instead
488 * (i.e. if a guest only control structure is affected, but then
489 * we must ensure the bad pages are not re-used again).
490 */
491 if (bs.eipv & MCG_STATUS_EIPV) {
492 printk("MCE: Instruction Pointer is related to the "
493 "error, therefore print the execution state.\n");
494 show_execution_state(regs);
495 }
497 /* Commit the telemetry so that panic flow can find it. */
498 if (mctc != NULL) {
499 x86_mcinfo_dump(mci);
500 mctelem_commit(mctc);
501 }
502 mc_panic("Hypervisor state lost due to machine check "
503 "exception.\n");
504 /*NOTREACHED*/
505 }
507 /*
508 * Xen hypervisor state is intact. If dom0 state is lost then
509 * give it a chance to decide what to do if it has registered
510 * a handler for this event, otherwise panic.
511 *
512 * XXFM Could add some Solaris dom0 contract kill here?
513 */
514 if (dom0_state_lost) {
515 if (dom0 && dom0->max_vcpus && dom0->vcpu[0] &&
516 guest_has_trap_callback(dom0, 0, TRAP_machine_check)) {
517 dom_state = DOM0_TRAP;
518 send_guest_trap(dom0, 0, TRAP_machine_check);
519 /* XXFM case of return with !ripv ??? */
520 } else {
521 /* Commit telemetry for panic flow. */
522 if (mctc != NULL) {
523 x86_mcinfo_dump(mci);
524 mctelem_commit(mctc);
525 }
526 mc_panic("Dom0 state lost due to machine check "
527 "exception\n");
528 /*NOTREACHED*/
529 }
530 }
532 /*
533 * If a domU has lost state then send it a trap if it has registered
534 * a handler, otherwise crash the domain.
535 * XXFM Revisit this functionality.
536 */
537 if (domU_state_lost) {
538 if (guest_has_trap_callback(v->domain, v->vcpu_id,
539 TRAP_machine_check)) {
540 dom_state = DOMU_TRAP;
541 send_guest_trap(curdom, v->vcpu_id,
542 TRAP_machine_check);
543 } else {
544 dom_state = DOMU_KILLED;
545 /* Enable interrupts. This basically results in
546 * calling sti on the *physical* cpu. But after
547 * domain_crash() the vcpu pointer is invalid.
548 * Therefore, we must unlock the irqs before killing
549 * it. */
550 vcpu_schedule_unlock_irq(v);
551 irqlocked = 0;
553 /* DomU is impacted. Kill it and continue. */
554 domain_crash(curdom);
555 }
556 }
558 switch (dom_state) {
559 case DOM0_TRAP:
560 case DOMU_TRAP:
561 /* Enable interrupts. */
562 vcpu_schedule_unlock_irq(v);
563 irqlocked = 0;
565 /* guest softirqs and event callbacks are scheduled
566 * immediately after this handler exits. */
567 break;
568 case DOMU_KILLED:
569 /* Nothing to do here. */
570 break;
572 case DOM_NORMAL:
573 vcpu_schedule_unlock_irq(v);
574 irqlocked = 0;
575 break;
576 }
578 cmn_handler_done:
579 BUG_ON(irqlocked);
580 BUG_ON(!ripv);
582 if (bs.errcnt) {
583 /* Not panicing, so forward telemetry to dom0 now if it
584 * is interested. */
585 if (dom0_vmce_enabled()) {
586 if (mctc != NULL)
587 mctelem_commit(mctc);
588 send_guest_global_virq(dom0, VIRQ_MCA);
589 } else {
590 x86_mcinfo_dump(mci);
591 if (mctc != NULL)
592 mctelem_dismiss(mctc);
593 }
594 } else if (mctc != NULL) {
595 mctelem_dismiss(mctc);
596 }
597 }
599 void mcheck_mca_clearbanks(struct mca_banks *bankmask)
600 {
601 int i;
602 uint64_t status;
604 for (i = 0; i < 32 && i < nr_mce_banks; i++) {
605 if (!mcabanks_test(i, bankmask))
606 continue;
607 status = mca_rdmsr(MSR_IA32_MCx_STATUS(i));
608 if (!(status & MCi_STATUS_VAL))
609 continue;
610 mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
611 }
612 }
614 static enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *ci)
615 {
616 enum mcheck_type rc = mcheck_none;
618 switch (ci->x86) {
619 case 6:
620 rc = amd_k7_mcheck_init(ci);
621 break;
623 default:
624 /* Assume that machine check support is available.
625 * The minimum provided support is at least the K8. */
626 case 0xf:
627 rc = amd_k8_mcheck_init(ci);
628 break;
630 case 0x10 ... 0x17:
631 rc = amd_f10_mcheck_init(ci);
632 break;
633 }
635 return rc;
636 }
638 /*check the existence of Machine Check*/
639 int mce_available(struct cpuinfo_x86 *c)
640 {
641 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
642 }
644 /*
645 * Check if bank 0 is usable for MCE. It isn't for AMD K7,
646 * and Intel P6 family before model 0x1a.
647 */
648 int mce_firstbank(struct cpuinfo_x86 *c)
649 {
650 if (c->x86 == 6) {
651 if (c->x86_vendor == X86_VENDOR_AMD)
652 return 1;
654 if (c->x86_vendor == X86_VENDOR_INTEL && c->x86_model < 0x1a)
655 return 1;
656 }
658 return 0;
659 }
661 int show_mca_info(int inited, struct cpuinfo_x86 *c)
662 {
663 static enum mcheck_type g_type = mcheck_unset;
665 if (inited != g_type) {
666 char prefix[20];
667 static const char *const type_str[] = {
668 [mcheck_amd_famXX] = "AMD",
669 [mcheck_amd_k7] = "AMD K7",
670 [mcheck_amd_k8] = "AMD K8",
671 [mcheck_intel] = "Intel"
672 };
674 snprintf(prefix, ARRAY_SIZE(prefix),
675 g_type != mcheck_unset ? XENLOG_WARNING "CPU%i: "
676 : XENLOG_INFO,
677 smp_processor_id());
678 BUG_ON(inited >= ARRAY_SIZE(type_str));
679 switch (inited) {
680 default:
681 printk("%s%s machine check reporting enabled\n",
682 prefix, type_str[inited]);
683 break;
684 case mcheck_amd_famXX:
685 printk("%s%s Fam%xh machine check reporting enabled\n",
686 prefix, type_str[inited], c->x86);
687 break;
688 case mcheck_none:
689 printk("%sNo machine check initialization\n", prefix);
690 break;
691 }
692 g_type = inited;
693 }
695 return 0;
696 }
698 int set_poll_bankmask(struct cpuinfo_x86 *c)
699 {
700 int cpu = smp_processor_id();
701 struct mca_banks *mb;
703 mb = mcabanks_alloc();
704 if (!mb)
705 return -ENOMEM;
707 if (cmci_support && !mce_disabled) {
708 mb->num = per_cpu(no_cmci_banks, cpu)->num;
709 bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map,
710 nr_mce_banks);
711 }
712 else {
713 bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks);
714 if (mce_firstbank(c))
715 mcabanks_clear(0, mb);
716 }
717 per_cpu(poll_bankmask, cpu) = mb;
719 return 0;
720 }
722 /* The perbank ctl/status init is platform specific because of AMD's quirk */
723 int mca_cap_init(void)
724 {
725 uint64_t msr_content;
727 rdmsrl(MSR_IA32_MCG_CAP, msr_content);
729 if (msr_content & MCG_CTL_P) /* Control register present ? */
730 wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
732 if (nr_mce_banks && (msr_content & MCG_CAP_COUNT) != nr_mce_banks)
733 {
734 dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n",
735 smp_processor_id());
736 return -ENODEV;
737 }
738 nr_mce_banks = msr_content & MCG_CAP_COUNT;
740 /* mcabanks_alloc depends on nr_mcebanks */
741 if (!mca_allbanks)
742 {
743 int i;
745 mca_allbanks = mcabanks_alloc();
746 for ( i = 0; i < nr_mce_banks; i++)
747 mcabanks_set(i, mca_allbanks);
748 }
750 return mca_allbanks ? 0:-ENOMEM;
751 }
753 /* This has to be run for each processor */
754 void mcheck_init(struct cpuinfo_x86 *c)
755 {
756 enum mcheck_type inited = mcheck_none;
758 if (mce_disabled == 1) {
759 dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
760 return;
761 }
763 if (!mce_available(c))
764 {
765 printk(XENLOG_INFO "CPU%i: No machine check support available\n",
766 smp_processor_id());
767 return;
768 }
770 /*Hardware Enable */
771 if (mca_cap_init())
772 return;
774 switch (c->x86_vendor) {
775 case X86_VENDOR_AMD:
776 inited = amd_mcheck_init(c);
777 break;
779 case X86_VENDOR_INTEL:
780 switch (c->x86) {
781 case 6:
782 case 15:
783 inited = intel_mcheck_init(c);
784 break;
785 }
786 break;
788 default:
789 break;
790 }
792 show_mca_info(inited, c);
793 if (inited == mcheck_none || inited == mcheck_unset)
794 goto out;
796 intpose_init();
798 mctelem_init(sizeof(struct mc_info));
800 vmce_init(c);
802 /* Turn on MCE now */
803 set_in_cr4(X86_CR4_MCE);
805 set_poll_bankmask(c);
807 return;
808 out:
809 if (smp_processor_id() == 0)
810 {
811 mcabanks_free(mca_allbanks);
812 mca_allbanks = NULL;
813 }
814 }
816 static void mcinfo_clear(struct mc_info *mi)
817 {
818 memset(mi, 0, sizeof(struct mc_info));
819 x86_mcinfo_nentries(mi) = 0;
820 }
822 void *x86_mcinfo_reserve(struct mc_info *mi, int size)
823 {
824 int i;
825 unsigned long end1, end2;
826 struct mcinfo_common *mic_base, *mic_index;
828 mic_index = mic_base = x86_mcinfo_first(mi);
830 /* go to first free entry */
831 for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
832 mic_index = x86_mcinfo_next(mic_index);
833 }
835 /* check if there is enough size */
836 end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct mc_info));
837 end2 = (unsigned long)((uint8_t *)mic_index + size);
839 if (end1 < end2)
840 {
841 mce_printk(MCE_CRITICAL,
842 "mcinfo_add: No space left in mc_info\n");
843 return NULL;
844 }
846 /* there's enough space. add entry. */
847 x86_mcinfo_nentries(mi)++;
849 return mic_index;
850 }
852 void *x86_mcinfo_add(struct mc_info *mi, void *mcinfo)
853 {
854 struct mcinfo_common *mic, *buf;
856 mic = (struct mcinfo_common *)mcinfo;
857 buf = x86_mcinfo_reserve(mi, mic->size);
859 if ( !buf )
860 mce_printk(MCE_CRITICAL,
861 "mcinfo_add: No space left in mc_info\n");
862 else
863 memcpy(buf, mic, mic->size);
865 return buf;
866 }
868 /* Dump machine check information in a format,
869 * mcelog can parse. This is used only when
870 * Dom0 does not take the notification. */
871 void x86_mcinfo_dump(struct mc_info *mi)
872 {
873 struct mcinfo_common *mic = NULL;
874 struct mcinfo_global *mc_global;
875 struct mcinfo_bank *mc_bank;
877 /* first print the global info */
878 x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
879 if (mic == NULL)
880 return;
881 mc_global = (struct mcinfo_global *)mic;
882 if (mc_global->mc_flags & MC_FLAG_MCE) {
883 printk(XENLOG_WARNING
884 "CPU%d: Machine Check Exception: %16"PRIx64"\n",
885 mc_global->mc_coreid, mc_global->mc_gstatus);
886 } else {
887 printk(XENLOG_WARNING "MCE: The hardware reports a non "
888 "fatal, correctable incident occurred on "
889 "CPU %d.\n",
890 mc_global->mc_coreid);
891 }
893 /* then the bank information */
894 x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); /* finds the first entry */
895 do {
896 if (mic == NULL)
897 return;
898 if (mic->type != MC_TYPE_BANK)
899 goto next;
901 mc_bank = (struct mcinfo_bank *)mic;
903 printk(XENLOG_WARNING "Bank %d: %16"PRIx64,
904 mc_bank->mc_bank,
905 mc_bank->mc_status);
906 if (mc_bank->mc_status & MCi_STATUS_MISCV)
907 printk("[%16"PRIx64"]", mc_bank->mc_misc);
908 if (mc_bank->mc_status & MCi_STATUS_ADDRV)
909 printk(" at %16"PRIx64, mc_bank->mc_addr);
911 printk("\n");
912 next:
913 mic = x86_mcinfo_next(mic); /* next entry */
914 if ((mic == NULL) || (mic->size == 0))
915 break;
916 } while (1);
917 }
919 static void do_mc_get_cpu_info(void *v)
920 {
921 int cpu = smp_processor_id();
922 int cindex, cpn;
923 struct cpuinfo_x86 *c;
924 xen_mc_logical_cpu_t *log_cpus, *xcp;
925 uint32_t junk, ebx;
927 log_cpus = v;
928 c = &cpu_data[cpu];
929 cindex = 0;
930 cpn = cpu - 1;
932 /*
933 * Deal with sparse masks, condensed into a contig array.
934 */
935 while (cpn >= 0) {
936 if (cpu_online(cpn))
937 cindex++;
938 cpn--;
939 }
941 xcp = &log_cpus[cindex];
942 c = &cpu_data[cpu];
943 xcp->mc_cpunr = cpu;
944 x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
945 &xcp->mc_coreid, &xcp->mc_threadid,
946 &xcp->mc_apicid, &xcp->mc_ncores,
947 &xcp->mc_ncores_active, &xcp->mc_nthreads);
948 xcp->mc_cpuid_level = c->cpuid_level;
949 xcp->mc_family = c->x86;
950 xcp->mc_vendor = c->x86_vendor;
951 xcp->mc_model = c->x86_model;
952 xcp->mc_step = c->x86_mask;
953 xcp->mc_cache_size = c->x86_cache_size;
954 xcp->mc_cache_alignment = c->x86_cache_alignment;
955 memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
956 memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
957 memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
959 /*
960 * This part needs to run on the CPU itself.
961 */
962 xcp->mc_nmsrvals = __MC_NMSRS;
963 xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
964 rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
966 if (c->cpuid_level >= 1) {
967 cpuid(1, &junk, &ebx, &junk, &junk);
968 xcp->mc_clusterid = (ebx >> 24) & 0xff;
969 } else
970 xcp->mc_clusterid = hard_smp_processor_id();
971 }
974 void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
975 uint16_t *threadid, uint32_t *apicid,
976 unsigned *ncores, unsigned *ncores_active,
977 unsigned *nthreads)
978 {
979 struct cpuinfo_x86 *c;
981 *apicid = cpu_physical_id(cpu);
982 c = &cpu_data[cpu];
983 if (c->apicid == BAD_APICID) {
984 *chipid = cpu;
985 *coreid = 0;
986 *threadid = 0;
987 if (ncores != NULL)
988 *ncores = 1;
989 if (ncores_active != NULL)
990 *ncores_active = 1;
991 if (nthreads != NULL)
992 *nthreads = 1;
993 } else {
994 *chipid = phys_proc_id[cpu];
995 if (c->x86_max_cores > 1)
996 *coreid = cpu_core_id[cpu];
997 else
998 *coreid = 0;
999 *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
1000 if (ncores != NULL)
1001 *ncores = c->x86_max_cores;
1002 if (ncores_active != NULL)
1003 *ncores_active = c->booted_cores;
1004 if (nthreads != NULL)
1005 *nthreads = c->x86_num_siblings;
1009 #define INTPOSE_NENT 50
1011 static struct intpose_ent {
1012 unsigned int cpu_nr;
1013 uint64_t msr;
1014 uint64_t val;
1015 } intpose_arr[INTPOSE_NENT];
1017 static void intpose_init(void)
1019 static int done;
1020 int i;
1022 if (done++ > 0)
1023 return;
1025 for (i = 0; i < INTPOSE_NENT; i++) {
1026 intpose_arr[i].cpu_nr = -1;
1031 struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr,
1032 uint64_t *valp)
1034 int i;
1036 for (i = 0; i < INTPOSE_NENT; i++) {
1037 if (intpose_arr[i].cpu_nr == cpu_nr &&
1038 intpose_arr[i].msr == msr) {
1039 if (valp != NULL)
1040 *valp = intpose_arr[i].val;
1041 return &intpose_arr[i];
1045 return NULL;
1048 static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val)
1050 struct intpose_ent *ent;
1051 int i;
1053 if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
1054 ent->val = val;
1055 return;
1058 for (i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++) {
1059 if (ent->cpu_nr == -1) {
1060 ent->cpu_nr = cpu_nr;
1061 ent->msr = msr;
1062 ent->val = val;
1063 return;
1067 printk("intpose_add: interpose array full - request dropped\n");
1070 void intpose_inval(unsigned int cpu_nr, uint64_t msr)
1072 struct intpose_ent *ent;
1074 if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
1075 ent->cpu_nr = -1;
1079 #define IS_MCA_BANKREG(r) \
1080 ((r) >= MSR_IA32_MC0_CTL && \
1081 (r) <= MSR_IA32_MCx_MISC(nr_mce_banks - 1) && \
1082 ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
1084 static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
1086 struct cpuinfo_x86 *c;
1087 int i, errs = 0;
1089 c = &cpu_data[smp_processor_id()];
1091 for (i = 0; i < mci->mcinj_count; i++) {
1092 uint64_t reg = mci->mcinj_msr[i].reg;
1093 const char *reason = NULL;
1095 if (IS_MCA_BANKREG(reg)) {
1096 if (c->x86_vendor == X86_VENDOR_AMD) {
1097 /* On AMD we can set MCi_STATUS_WREN in the
1098 * HWCR MSR to allow non-zero writes to banks
1099 * MSRs not to #GP. The injector in dom0
1100 * should set that bit, but we detect when it
1101 * is necessary and set it as a courtesy to
1102 * avoid #GP in the hypervisor. */
1103 mci->mcinj_flags |=
1104 _MC_MSRINJ_F_REQ_HWCR_WREN;
1105 continue;
1106 } else {
1107 /* No alternative but to interpose, so require
1108 * that the injector specified as such. */
1109 if (!(mci->mcinj_flags &
1110 MC_MSRINJ_F_INTERPOSE)) {
1111 reason = "must specify interposition";
1114 } else {
1115 switch (reg) {
1116 /* MSRs acceptable on all x86 cpus */
1117 case MSR_IA32_MCG_STATUS:
1118 break;
1120 /* MSRs that the HV will take care of */
1121 case MSR_K8_HWCR:
1122 if (c->x86_vendor == X86_VENDOR_AMD)
1123 reason = "HV will operate HWCR";
1124 else
1125 reason ="only supported on AMD";
1126 break;
1128 default:
1129 reason = "not a recognized MCA MSR";
1130 break;
1134 if (reason != NULL) {
1135 printk("HV MSR INJECT ERROR: MSR 0x%llx %s\n",
1136 (unsigned long long)mci->mcinj_msr[i].reg, reason);
1137 errs++;
1141 return !errs;
1144 static uint64_t x86_mc_hwcr_wren(void)
1146 uint64_t old;
1148 rdmsrl(MSR_K8_HWCR, old);
1150 if (!(old & K8_HWCR_MCi_STATUS_WREN)) {
1151 uint64_t new = old | K8_HWCR_MCi_STATUS_WREN;
1152 wrmsrl(MSR_K8_HWCR, new);
1155 return old;
1158 static void x86_mc_hwcr_wren_restore(uint64_t hwcr)
1160 if (!(hwcr & K8_HWCR_MCi_STATUS_WREN))
1161 wrmsrl(MSR_K8_HWCR, hwcr);
1164 static void x86_mc_msrinject(void *data)
1166 struct xen_mc_msrinject *mci = data;
1167 struct mcinfo_msr *msr;
1168 struct cpuinfo_x86 *c;
1169 uint64_t hwcr = 0;
1170 int intpose;
1171 int i;
1173 c = &cpu_data[smp_processor_id()];
1175 if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
1176 hwcr = x86_mc_hwcr_wren();
1178 intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0;
1180 for (i = 0, msr = &mci->mcinj_msr[0];
1181 i < mci->mcinj_count; i++, msr++) {
1182 printk("HV MSR INJECT (%s) target %u actual %u MSR 0x%llx "
1183 "<-- 0x%llx\n",
1184 intpose ? "interpose" : "hardware",
1185 mci->mcinj_cpunr, smp_processor_id(),
1186 (unsigned long long)msr->reg,
1187 (unsigned long long)msr->value);
1189 if (intpose)
1190 intpose_add(mci->mcinj_cpunr, msr->reg, msr->value);
1191 else
1192 wrmsrl(msr->reg, msr->value);
1195 if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
1196 x86_mc_hwcr_wren_restore(hwcr);
1199 /*ARGSUSED*/
1200 static void x86_mc_mceinject(void *data)
1202 printk("Simulating #MC on cpu %d\n", smp_processor_id());
1203 __asm__ __volatile__("int $0x12");
1206 static void x86_cmci_inject(void *data)
1208 printk("Simulating CMCI on cpu %d\n", smp_processor_id());
1209 __asm__ __volatile__("int $0xf7");
1212 #if BITS_PER_LONG == 64
1214 #define ID2COOKIE(id) ((mctelem_cookie_t)(id))
1215 #define COOKIE2ID(c) ((uint64_t)(c))
1217 #elif BITS_PER_LONG == 32
1219 #define ID2COOKIE(id) ((mctelem_cookie_t)(uint32_t)((id) & 0xffffffffU))
1220 #define COOKIE2ID(c) ((uint64_t)(uint32_t)(c))
1222 #elif defined(BITS_PER_LONG)
1223 #error BITS_PER_LONG has unexpected value
1224 #else
1225 #error BITS_PER_LONG definition absent
1226 #endif
1228 #ifdef CONFIG_COMPAT
1229 # include <compat/arch-x86/xen-mca.h>
1231 # define xen_mcinfo_msr mcinfo_msr
1232 CHECK_mcinfo_msr;
1233 # undef xen_mcinfo_msr
1234 # undef CHECK_mcinfo_msr
1235 # define CHECK_mcinfo_msr struct mcinfo_msr
1237 # define xen_mcinfo_common mcinfo_common
1238 CHECK_mcinfo_common;
1239 # undef xen_mcinfo_common
1240 # undef CHECK_mcinfo_common
1241 # define CHECK_mcinfo_common struct mcinfo_common
1243 CHECK_FIELD_(struct, mc_fetch, flags);
1244 CHECK_FIELD_(struct, mc_fetch, fetch_id);
1245 # define CHECK_compat_mc_fetch struct mc_fetch
1247 CHECK_FIELD_(struct, mc_physcpuinfo, ncpus);
1248 # define CHECK_compat_mc_physcpuinfo struct mc_physcpuinfo
1250 #define CHECK_compat_mc_inject_v2 struct mc_inject_v2
1251 CHECK_mc;
1252 # undef CHECK_compat_mc_fetch
1253 # undef CHECK_compat_mc_physcpuinfo
1255 # define xen_mc_info mc_info
1256 CHECK_mc_info;
1257 # undef xen_mc_info
1259 # define xen_mcinfo_global mcinfo_global
1260 CHECK_mcinfo_global;
1261 # undef xen_mcinfo_global
1263 # define xen_mcinfo_bank mcinfo_bank
1264 CHECK_mcinfo_bank;
1265 # undef xen_mcinfo_bank
1267 # define xen_mcinfo_extended mcinfo_extended
1268 CHECK_mcinfo_extended;
1269 # undef xen_mcinfo_extended
1271 # define xen_mcinfo_recovery mcinfo_recovery
1272 # define xen_cpu_offline_action cpu_offline_action
1273 # define xen_page_offline_action page_offline_action
1274 CHECK_mcinfo_recovery;
1275 # undef xen_cpu_offline_action
1276 # undef xen_page_offline_action
1277 # undef xen_mcinfo_recovery
1278 #else
1279 # define compat_mc_fetch xen_mc_fetch
1280 # define compat_mc_physcpuinfo xen_mc_physcpuinfo
1281 # define compat_handle_is_null guest_handle_is_null
1282 # define copy_to_compat copy_to_guest
1283 #endif
1285 /* Machine Check Architecture Hypercall */
1286 long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
1288 long ret = 0;
1289 struct xen_mc curop, *op = &curop;
1290 struct vcpu *v = current;
1291 union {
1292 struct xen_mc_fetch *nat;
1293 struct compat_mc_fetch *cmp;
1294 } mc_fetch;
1295 union {
1296 struct xen_mc_physcpuinfo *nat;
1297 struct compat_mc_physcpuinfo *cmp;
1298 } mc_physcpuinfo;
1299 uint32_t flags, cmdflags;
1300 int nlcpu;
1301 xen_mc_logical_cpu_t *log_cpus = NULL;
1302 mctelem_cookie_t mctc;
1303 mctelem_class_t which;
1304 unsigned int target;
1305 struct xen_mc_msrinject *mc_msrinject;
1306 struct xen_mc_mceinject *mc_mceinject;
1308 if (!IS_PRIV(v->domain) )
1309 return x86_mcerr(NULL, -EPERM);
1311 if ( copy_from_guest(op, u_xen_mc, 1) )
1312 return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT);
1314 if ( op->interface_version != XEN_MCA_INTERFACE_VERSION )
1315 return x86_mcerr("do_mca: interface version mismatch", -EACCES);
1317 switch (op->cmd) {
1318 case XEN_MC_fetch:
1319 mc_fetch.nat = &op->u.mc_fetch;
1320 cmdflags = mc_fetch.nat->flags;
1322 switch (cmdflags & (XEN_MC_NONURGENT | XEN_MC_URGENT)) {
1323 case XEN_MC_NONURGENT:
1324 which = MC_NONURGENT;
1325 break;
1327 case XEN_MC_URGENT:
1328 which = MC_URGENT;
1329 break;
1331 default:
1332 return x86_mcerr("do_mca fetch: bad cmdflags", -EINVAL);
1335 flags = XEN_MC_OK;
1337 if (cmdflags & XEN_MC_ACK) {
1338 mctelem_cookie_t cookie = ID2COOKIE(mc_fetch.nat->fetch_id);
1339 mctelem_ack(which, cookie);
1340 } else {
1341 if (!is_pv_32on64_vcpu(v)
1342 ? guest_handle_is_null(mc_fetch.nat->data)
1343 : compat_handle_is_null(mc_fetch.cmp->data))
1344 return x86_mcerr("do_mca fetch: guest buffer "
1345 "invalid", -EINVAL);
1347 if ((mctc = mctelem_consume_oldest_begin(which))) {
1348 struct mc_info *mcip = mctelem_dataptr(mctc);
1349 if (!is_pv_32on64_vcpu(v)
1350 ? copy_to_guest(mc_fetch.nat->data, mcip, 1)
1351 : copy_to_compat(mc_fetch.cmp->data,
1352 mcip, 1)) {
1353 ret = -EFAULT;
1354 flags |= XEN_MC_FETCHFAILED;
1355 mc_fetch.nat->fetch_id = 0;
1356 } else {
1357 mc_fetch.nat->fetch_id = COOKIE2ID(mctc);
1359 mctelem_consume_oldest_end(mctc);
1360 } else {
1361 /* There is no data */
1362 flags |= XEN_MC_NODATA;
1363 mc_fetch.nat->fetch_id = 0;
1366 mc_fetch.nat->flags = flags;
1367 if (copy_to_guest(u_xen_mc, op, 1) != 0)
1368 ret = -EFAULT;
1371 break;
1373 case XEN_MC_notifydomain:
1374 return x86_mcerr("do_mca notify unsupported", -EINVAL);
1376 case XEN_MC_physcpuinfo:
1377 mc_physcpuinfo.nat = &op->u.mc_physcpuinfo;
1378 nlcpu = num_online_cpus();
1380 if (!is_pv_32on64_vcpu(v)
1381 ? !guest_handle_is_null(mc_physcpuinfo.nat->info)
1382 : !compat_handle_is_null(mc_physcpuinfo.cmp->info)) {
1383 if (mc_physcpuinfo.nat->ncpus <= 0)
1384 return x86_mcerr("do_mca cpuinfo: ncpus <= 0",
1385 -EINVAL);
1386 nlcpu = min(nlcpu, (int)mc_physcpuinfo.nat->ncpus);
1387 log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
1388 if (log_cpus == NULL)
1389 return x86_mcerr("do_mca cpuinfo", -ENOMEM);
1390 on_each_cpu(do_mc_get_cpu_info, log_cpus, 1);
1391 if (!is_pv_32on64_vcpu(v)
1392 ? copy_to_guest(mc_physcpuinfo.nat->info,
1393 log_cpus, nlcpu)
1394 : copy_to_compat(mc_physcpuinfo.cmp->info,
1395 log_cpus, nlcpu))
1396 ret = -EFAULT;
1397 xfree(log_cpus);
1400 mc_physcpuinfo.nat->ncpus = nlcpu;
1402 if (copy_to_guest(u_xen_mc, op, 1))
1403 return x86_mcerr("do_mca cpuinfo", -EFAULT);
1405 break;
1407 case XEN_MC_msrinject:
1408 if (nr_mce_banks == 0)
1409 return x86_mcerr("do_mca inject", -ENODEV);
1411 mc_msrinject = &op->u.mc_msrinject;
1412 target = mc_msrinject->mcinj_cpunr;
1414 if (target >= NR_CPUS)
1415 return x86_mcerr("do_mca inject: bad target", -EINVAL);
1417 if (!cpu_online(target))
1418 return x86_mcerr("do_mca inject: target offline",
1419 -EINVAL);
1421 if (mc_msrinject->mcinj_count == 0)
1422 return 0;
1424 if (!x86_mc_msrinject_verify(mc_msrinject))
1425 return x86_mcerr("do_mca inject: illegal MSR", -EINVAL);
1427 add_taint(TAINT_ERROR_INJECT);
1429 on_selected_cpus(cpumask_of(target), x86_mc_msrinject,
1430 mc_msrinject, 1);
1432 break;
1434 case XEN_MC_mceinject:
1435 if (nr_mce_banks == 0)
1436 return x86_mcerr("do_mca #MC", -ENODEV);
1438 mc_mceinject = &op->u.mc_mceinject;
1439 target = mc_mceinject->mceinj_cpunr;
1441 if (target >= NR_CPUS)
1442 return x86_mcerr("do_mca #MC: bad target", -EINVAL);
1444 if (!cpu_online(target))
1445 return x86_mcerr("do_mca #MC: target offline", -EINVAL);
1447 add_taint(TAINT_ERROR_INJECT);
1449 if ( mce_broadcast )
1450 on_each_cpu(x86_mc_mceinject, mc_mceinject, 1);
1451 else
1452 on_selected_cpus(cpumask_of(target), x86_mc_mceinject,
1453 mc_mceinject, 1);
1454 break;
1456 case XEN_MC_inject_v2:
1458 cpumask_t cpumap;
1460 if (nr_mce_banks == 0)
1461 return x86_mcerr("do_mca #MC", -ENODEV);
1463 if ( op->u.mc_inject_v2.flags & XEN_MC_INJECT_CPU_BROADCAST )
1464 cpus_copy(cpumap, cpu_online_map);
1465 else
1467 int gcw;
1469 cpus_clear(cpumap);
1470 xenctl_cpumap_to_cpumask(&cpumap,
1471 &op->u.mc_inject_v2.cpumap);
1472 gcw = cpus_weight(cpumap);
1473 cpus_and(cpumap, cpu_online_map, cpumap);
1475 if ( cpus_empty(cpumap) )
1476 return x86_mcerr("No online CPU passed\n", -EINVAL);
1477 else if ( gcw != cpus_weight(cpumap) )
1478 dprintk(XENLOG_INFO,
1479 "Not all required CPUs are online\n");
1482 switch (op->u.mc_inject_v2.flags & XEN_MC_INJECT_TYPE_MASK)
1484 case XEN_MC_INJECT_TYPE_MCE:
1485 if ( mce_broadcast &&
1486 !cpus_equal(cpumap, cpu_online_map) )
1487 printk("Not trigger MCE on all CPUs, may HANG!\n");
1488 on_selected_cpus(&cpumap, x86_mc_mceinject, NULL, 1);
1489 break;
1490 case XEN_MC_INJECT_TYPE_CMCI:
1491 if ( !cmci_support )
1492 return x86_mcerr(
1493 "No CMCI supported in platform\n", -EINVAL);
1494 on_selected_cpus(&cpumap, x86_cmci_inject, NULL, 1);
1495 break;
1496 default:
1497 return x86_mcerr("Wrong mca type\n", -EINVAL);
1499 break;
1502 default:
1503 return x86_mcerr("do_mca: bad command", -EINVAL);
1506 return ret;
1509 int mcinfo_dumpped;
1510 static int x86_mcinfo_dump_panic(mctelem_cookie_t mctc)
1512 struct mc_info *mcip = mctelem_dataptr(mctc);
1514 x86_mcinfo_dump(mcip);
1515 mcinfo_dumpped++;
1517 return 0;
1520 /* XXX shall we dump commited mc_info?? */
1521 static void mc_panic_dump(void)
1523 int cpu;
1525 dprintk(XENLOG_ERR, "Begin dump mc_info\n");
1526 for_each_online_cpu(cpu)
1527 mctelem_process_deferred(cpu, x86_mcinfo_dump_panic);
1528 dprintk(XENLOG_ERR, "End dump mc_info, %x mcinfo dumped\n", mcinfo_dumpped);
1531 void mc_panic(char *s)
1533 is_mc_panic = 1;
1534 console_force_unlock();
1536 printk("Fatal machine check: %s\n", s);
1537 printk("\n"
1538 "****************************************\n"
1539 "\n"
1540 " The processor has reported a hardware error which cannot\n"
1541 " be recovered from. Xen will now reboot the machine.\n");
1542 mc_panic_dump();
1543 panic("HARDWARE ERROR");