debuggers.hg

annotate xen/arch/x86/cpu/mcheck/mce.c @ 20963:da7ae6d8838a

x86: MCE fixes

- fill_vmsr_data() leaked a domain reference; since the caller already
obtained one, there's no need to obtain another one here
- intel_UCR_handler() could call put_domain() with a NULL pointer
- mcheck_mca_logout() updated a local data structure that wasn't used
after the update

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 10 09:18:11 2010 +0000 (2010-02-10)
parents ebd2495ec073
children 50ea24db1f88
rev   line source
kaf24@8869 1 /*
kaf24@8869 2 * mce.c - x86 Machine Check Exception Reporting
kaf24@8869 3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
kaf24@8869 4 */
kaf24@8869 5
kaf24@8869 6 #include <xen/init.h>
kaf24@8869 7 #include <xen/types.h>
kaf24@8869 8 #include <xen/kernel.h>
kaf24@8869 9 #include <xen/config.h>
kaf24@8869 10 #include <xen/smp.h>
keir@18006 11 #include <xen/errno.h>
keir@18934 12 #include <xen/console.h>
keir@19405 13 #include <xen/sched.h>
keir@19405 14 #include <xen/sched-if.h>
keir@19405 15 #include <xen/cpumask.h>
keir@19405 16 #include <xen/event.h>
keir@19405 17 #include <xen/guest_access.h>
keir@20323 18 #include <xen/hypercall.h> /* for do_mca */
kaf24@8869 19
keir@19405 20 #include <asm/processor.h>
kaf24@8869 21 #include <asm/system.h>
keir@19405 22 #include <asm/msr.h>
kaf24@8869 23
kaf24@8869 24 #include "mce.h"
kaf24@8869 25
keir@20288 26 int mce_disabled;
keir@19988 27 invbool_param("mce", mce_disabled);
keir@20911 28 static int mce_force_broadcast;
keir@20911 29 boolean_param("mce_fb", mce_force_broadcast);
keir@20288 30 int is_mc_panic;
keir@18006 31 unsigned int nr_mce_banks;
kaf24@8869 32
keir@20911 33 int mce_broadcast = 0;
keir@19988 34 static uint64_t g_mcg_cap;
keir@16131 35
keir@20908 36 /* Real value in physical CTL MSR */
keir@20908 37 static uint64_t h_mcg_ctl = 0UL;
keir@20908 38 static uint64_t *h_mci_ctrl;
keir@20908 39 int firstbank;
keir@20908 40
keir@19406 41 static void intpose_init(void);
keir@19405 42 static void mcinfo_clear(struct mc_info *);
keir@19405 43
keir@19406 44 #define SEG_PL(segsel) ((segsel) & 0x3)
keir@19406 45 #define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16)
keir@18006 46
keir@19558 47 #if 0
keir@19558 48 static int x86_mcerr(const char *msg, int err)
keir@19405 49 {
keir@19558 50 gdprintk(XENLOG_WARNING, "x86_mcerr: %s, returning %d\n",
keir@19558 51 msg != NULL ? msg : "", err);
keir@19558 52 return err;
keir@19405 53 }
keir@19405 54 #else
keir@19558 55 #define x86_mcerr(msg, err) (err)
keir@19405 56 #endif
keir@18006 57
keir@19405 58 cpu_banks_t mca_allbanks;
keir@18006 59
keir@20288 60 int mce_verbosity;
keir@20288 61 static void __init mce_set_verbosity(char *str)
keir@20288 62 {
keir@20288 63 if (strcmp("verbose", str) == 0)
keir@20288 64 mce_verbosity = MCE_VERBOSE;
keir@20288 65 else
keir@20288 66 printk(KERN_DEBUG "Machine Check verbosity level %s not recognised"
keir@20288 67 "use mce_verbosity=verbose", str);
keir@20288 68 }
keir@20288 69 custom_param("mce_verbosity", mce_set_verbosity);
keir@20288 70
kaf24@8869 71 /* Handle unconfigured int18 (should never happen) */
keir@18006 72 static void unexpected_machine_check(struct cpu_user_regs *regs, long error_code)
keir@19405 73 {
keir@18006 74 printk(XENLOG_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
keir@18006 75 smp_processor_id());
kaf24@8869 76 }
kaf24@8869 77
keir@18006 78
keir@19405 79 static x86_mce_vector_t _machine_check_vector = unexpected_machine_check;
keir@19405 80
keir@19405 81 void x86_mce_vector_register(x86_mce_vector_t hdlr)
keir@19405 82 {
keir@19405 83 _machine_check_vector = hdlr;
keir@19405 84 wmb();
keir@19405 85 }
keir@19405 86
kaf24@8869 87 /* Call the installed machine check handler for this CPU setup. */
keir@19405 88
keir@19405 89 void machine_check_vector(struct cpu_user_regs *regs, long error_code)
keir@19405 90 {
keir@19405 91 _machine_check_vector(regs, error_code);
keir@19405 92 }
keir@18006 93
keir@18006 94 /* Init machine check callback handler
keir@18006 95 * It is used to collect additional information provided by newer
keir@18006 96 * CPU families/models without the need to duplicate the whole handler.
keir@18006 97 * This avoids having many handlers doing almost nearly the same and each
keir@18006 98 * with its own tweaks ands bugs. */
keir@19405 99 static x86_mce_callback_t mc_callback_bank_extended = NULL;
keir@19405 100
keir@19405 101 void x86_mce_callback_register(x86_mce_callback_t cbfunc)
keir@19405 102 {
keir@19405 103 mc_callback_bank_extended = cbfunc;
keir@19405 104 }
keir@19405 105
keir@19781 106 /* Machine check recoverable judgement callback handler
keir@19781 107 * It is used to judge whether an UC error is recoverable by software
keir@19781 108 */
keir@19781 109 static mce_recoverable_t mc_recoverable_scan = NULL;
keir@19781 110
keir@19781 111 void mce_recoverable_register(mce_recoverable_t cbfunc)
keir@19781 112 {
keir@19781 113 mc_recoverable_scan = cbfunc;
keir@19781 114 }
keir@19781 115
keir@19781 116 /* Judging whether to Clear Machine Check error bank callback handler
keir@19781 117 * According to Intel latest MCA OS Recovery Writer's Guide,
keir@19781 118 * whether the error MCA bank needs to be cleared is decided by the mca_source
keir@19781 119 * and MCi_status bit value.
keir@19781 120 */
keir@19781 121 static mce_need_clearbank_t mc_need_clearbank_scan = NULL;
keir@19781 122
keir@19781 123 void mce_need_clearbank_register(mce_need_clearbank_t cbfunc)
keir@19781 124 {
keir@19781 125 mc_need_clearbank_scan = cbfunc;
keir@19781 126 }
keir@19781 127
keir@19405 128 /* Utility function to perform MCA bank telemetry readout and to push that
keir@19405 129 * telemetry towards an interested dom0 for logging and diagnosis.
keir@19405 130 * The caller - #MC handler or MCA poll function - must arrange that we
keir@19405 131 * do not migrate cpus. */
keir@19405 132
keir@19405 133 /* XXFM Could add overflow counting? */
keir@19781 134
keir@19781 135 /* Add out_param clear_bank for Machine Check Handler Caller.
keir@19781 136 * For Intel latest CPU, whether to clear the error bank status needs to
keir@19781 137 * be judged by the callback function defined above.
keir@19781 138 */
keir@19405 139 mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask,
keir@19781 140 struct mca_summary *sp, cpu_banks_t* clear_bank)
keir@19405 141 {
keir@19405 142 struct vcpu *v = current;
keir@19405 143 struct domain *d;
keir@19405 144 uint64_t gstatus, status, addr, misc;
keir@19405 145 struct mcinfo_global mcg; /* on stack */
keir@19405 146 struct mcinfo_common *mic;
keir@19405 147 struct mcinfo_global *mig; /* on stack */
keir@19405 148 mctelem_cookie_t mctc = NULL;
keir@19781 149 uint32_t uc = 0, pcc = 0, recover, need_clear = 1 ;
keir@19405 150 struct mc_info *mci = NULL;
keir@19405 151 mctelem_class_t which = MC_URGENT; /* XXXgcc */
keir@19405 152 unsigned int cpu_nr;
keir@19405 153 int errcnt = 0;
keir@19405 154 int i;
keir@19405 155 enum mca_extinfo cbret = MCA_EXTINFO_IGNORED;
keir@19405 156
keir@19405 157 cpu_nr = smp_processor_id();
keir@19405 158 BUG_ON(cpu_nr != v->processor);
keir@19405 159
keir@19406 160 mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
keir@19405 161
keir@19405 162 memset(&mcg, 0, sizeof (mcg));
keir@19405 163 mcg.common.type = MC_TYPE_GLOBAL;
keir@19405 164 mcg.common.size = sizeof (mcg);
keir@19405 165 if (v != NULL && ((d = v->domain) != NULL)) {
keir@19405 166 mcg.mc_domid = d->domain_id;
keir@19405 167 mcg.mc_vcpuid = v->vcpu_id;
keir@19405 168 } else {
keir@19405 169 mcg.mc_domid = -1;
keir@19405 170 mcg.mc_vcpuid = -1;
keir@19405 171 }
keir@19405 172 mcg.mc_gstatus = gstatus; /* MCG_STATUS */
keir@19405 173
keir@19405 174 switch (who) {
keir@19405 175 case MCA_MCE_HANDLER:
keir@19563 176 case MCA_MCE_SCAN:
keir@19405 177 mcg.mc_flags = MC_FLAG_MCE;
keir@19405 178 which = MC_URGENT;
keir@19405 179 break;
keir@19405 180
keir@19405 181 case MCA_POLLER:
keir@19405 182 case MCA_RESET:
keir@19405 183 mcg.mc_flags = MC_FLAG_POLLED;
keir@19405 184 which = MC_NONURGENT;
keir@19405 185 break;
keir@19405 186
keir@19405 187 case MCA_CMCI_HANDLER:
keir@19405 188 mcg.mc_flags = MC_FLAG_CMCI;
keir@19405 189 which = MC_NONURGENT;
keir@19405 190 break;
keir@19405 191
keir@19405 192 default:
keir@19405 193 BUG();
keir@19405 194 }
keir@19405 195
keir@19405 196 /* Retrieve detector information */
keir@19405 197 x86_mc_get_cpu_info(cpu_nr, &mcg.mc_socketid,
keir@19405 198 &mcg.mc_coreid, &mcg.mc_core_threadid,
keir@19405 199 &mcg.mc_apicid, NULL, NULL, NULL);
keir@19405 200
keir@19781 201 /* If no mc_recovery_scan callback handler registered,
keir@19781 202 * this error is not recoverable
keir@19781 203 */
keir@19781 204 recover = (mc_recoverable_scan)? 1: 0;
keir@19781 205
keir@19405 206 for (i = 0; i < 32 && i < nr_mce_banks; i++) {
keir@19405 207 struct mcinfo_bank mcb; /* on stack */
keir@19405 208
keir@19405 209 /* Skip bank if corresponding bit in bankmask is clear */
keir@19405 210 if (!test_bit(i, bankmask))
keir@19405 211 continue;
keir@19405 212
keir@19406 213 mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
keir@19405 214 if (!(status & MCi_STATUS_VAL))
keir@19405 215 continue; /* this bank has no valid telemetry */
keir@19405 216
keir@19781 217 /* For Intel Latest CPU CMCI/MCE Handler caller, we need to
keir@19781 218 * decide whether to clear bank by MCi_STATUS bit value such as
keir@19781 219 * OVER/UC/EN/PCC/S/AR
keir@19781 220 */
keir@19781 221 if ( mc_need_clearbank_scan )
keir@19781 222 need_clear = mc_need_clearbank_scan(who, status);
keir@19781 223
keir@19405 224 /* If this is the first bank with valid MCA DATA, then
keir@19405 225 * try to reserve an entry from the urgent/nonurgent queue
keir@19405 226 * depending on whethere we are called from an exception or
keir@19405 227 * a poller; this can fail (for example dom0 may not
keir@19405 228 * yet have consumed past telemetry). */
keir@19405 229 if (errcnt == 0) {
keir@19405 230 if ((mctc = mctelem_reserve(which)) != NULL) {
keir@19405 231 mci = mctelem_dataptr(mctc);
keir@19405 232 mcinfo_clear(mci);
keir@19405 233 }
keir@19405 234 }
keir@19405 235
keir@19405 236 memset(&mcb, 0, sizeof (mcb));
keir@19405 237 mcb.common.type = MC_TYPE_BANK;
keir@19405 238 mcb.common.size = sizeof (mcb);
keir@19405 239 mcb.mc_bank = i;
keir@19405 240 mcb.mc_status = status;
keir@19405 241
keir@19405 242 /* form a mask of which banks have logged uncorrected errors */
keir@19405 243 if ((status & MCi_STATUS_UC) != 0)
keir@19405 244 uc |= (1 << i);
keir@19405 245
keir@19405 246 /* likewise for those with processor context corrupt */
keir@19405 247 if ((status & MCi_STATUS_PCC) != 0)
keir@19405 248 pcc |= (1 << i);
keir@19405 249
keir@19781 250 if (recover && uc)
keir@19781 251 /* uc = 1, recover = 1, we need not panic.
keir@19781 252 */
keir@19781 253 recover = mc_recoverable_scan(status);
keir@19781 254
keir@19405 255 addr = misc = 0;
keir@19405 256
keir@19405 257 if (status & MCi_STATUS_ADDRV) {
keir@19406 258 mca_rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
keir@20242 259 if (mfn_valid(paddr_to_pfn(addr))) {
keir@20242 260 d = maddr_get_owner(addr);
keir@20242 261 if (d != NULL && (who == MCA_POLLER ||
keir@20242 262 who == MCA_CMCI_HANDLER))
keir@20242 263 mcb.mc_domid = d->domain_id;
keir@20242 264 }
keir@19405 265 }
keir@19405 266
keir@19405 267 if (status & MCi_STATUS_MISCV)
keir@19406 268 mca_rdmsrl(MSR_IA32_MC0_MISC + 4 * i, misc);
keir@19405 269
keir@19405 270 mcb.mc_addr = addr;
keir@19405 271 mcb.mc_misc = misc;
keir@19405 272
keir@19405 273 if (who == MCA_CMCI_HANDLER) {
keir@19406 274 mca_rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
keir@19405 275 rdtscll(mcb.mc_tsc);
keir@19405 276 }
keir@19405 277
keir@19405 278 /* Increment the error count; if this is the first bank
keir@19405 279 * with a valid error then add the global info to the mcinfo. */
keir@19405 280 if (errcnt++ == 0 && mci != NULL)
keir@19405 281 x86_mcinfo_add(mci, &mcg);
keir@19405 282
keir@19405 283 /* Add the bank data */
keir@19405 284 if (mci != NULL)
keir@19405 285 x86_mcinfo_add(mci, &mcb);
keir@19405 286
keir@19405 287 if (mc_callback_bank_extended && cbret != MCA_EXTINFO_GLOBAL) {
keir@19405 288 cbret = mc_callback_bank_extended(mci, i, status);
keir@19405 289 }
keir@19405 290
keir@19781 291 /* By default, need_clear = 1 */
keir@19781 292 if (who != MCA_MCE_SCAN && need_clear)
keir@19563 293 /* Clear status */
keir@19563 294 mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
keir@19781 295 else if ( who == MCA_MCE_SCAN && need_clear)
keir@19781 296 set_bit(i, clear_bank);
keir@19781 297
keir@19405 298 wmb();
keir@19405 299 }
keir@19405 300
keir@19405 301 if (mci != NULL && errcnt > 0) {
keir@19405 302 x86_mcinfo_lookup(mic, mci, MC_TYPE_GLOBAL);
keir@20963 303 mig = container_of(mic, struct mcinfo_global, common);
keir@20963 304 if (mic == NULL)
keir@20963 305 ;
keir@20963 306 else if (pcc)
keir@20963 307 mig->mc_flags |= MC_FLAG_UNCORRECTABLE;
keir@19405 308 else if (uc)
keir@20963 309 mig->mc_flags |= MC_FLAG_RECOVERABLE;
keir@19405 310 else
keir@20963 311 mig->mc_flags |= MC_FLAG_CORRECTABLE;
keir@19405 312 }
keir@18006 313
keir@18006 314
keir@19405 315 if (sp) {
keir@19405 316 sp->errcnt = errcnt;
keir@19405 317 sp->ripv = (gstatus & MCG_STATUS_RIPV) != 0;
keir@19405 318 sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0;
keir@19405 319 sp->uc = uc;
keir@19405 320 sp->pcc = pcc;
keir@19781 321 sp->recoverable = recover;
keir@19405 322 }
keir@19405 323
keir@19405 324 return mci != NULL ? mctc : NULL; /* may be NULL */
keir@19405 325 }
keir@19405 326
keir@19405 327 #define DOM_NORMAL 0
keir@19405 328 #define DOM0_TRAP 1
keir@19405 329 #define DOMU_TRAP 2
keir@19405 330 #define DOMU_KILLED 4
keir@19405 331
keir@19405 332 /* Shared #MC handler. */
keir@19405 333 void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code,
keir@19405 334 cpu_banks_t bankmask)
keir@18006 335 {
keir@19405 336 int xen_state_lost, dom0_state_lost, domU_state_lost;
keir@19405 337 struct vcpu *v = current;
keir@19405 338 struct domain *curdom = v->domain;
keir@19405 339 domid_t domid = curdom->domain_id;
keir@19405 340 int ctx_xen, ctx_dom0, ctx_domU;
keir@19405 341 uint32_t dom_state = DOM_NORMAL;
keir@19405 342 mctelem_cookie_t mctc = NULL;
keir@19405 343 struct mca_summary bs;
keir@19405 344 struct mc_info *mci = NULL;
keir@19405 345 int irqlocked = 0;
keir@19405 346 uint64_t gstatus;
keir@19405 347 int ripv;
keir@19405 348
keir@19405 349 /* This handler runs as interrupt gate. So IPIs from the
keir@19405 350 * polling service routine are defered until we're finished.
keir@19405 351 */
keir@19405 352
keir@19405 353 /* Disable interrupts for the _vcpu_. It may not re-scheduled to
keir@19405 354 * another physical CPU. */
keir@19405 355 vcpu_schedule_lock_irq(v);
keir@19405 356 irqlocked = 1;
keir@19405 357
keir@19405 358 /* Read global status; if it does not indicate machine check
keir@19405 359 * in progress then bail as long as we have a valid ip to return to. */
keir@19406 360 mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
keir@19405 361 ripv = ((gstatus & MCG_STATUS_RIPV) != 0);
keir@19405 362 if (!(gstatus & MCG_STATUS_MCIP) && ripv) {
keir@19405 363 add_taint(TAINT_MACHINE_CHECK); /* questionable */
keir@19405 364 vcpu_schedule_unlock_irq(v);
keir@19405 365 irqlocked = 0;
keir@19405 366 goto cmn_handler_done;
keir@19405 367 }
keir@19405 368
keir@19405 369 /* Go and grab error telemetry. We must choose whether to commit
keir@19405 370 * for logging or dismiss the cookie that is returned, and must not
keir@19405 371 * reference the cookie after that action.
keir@19405 372 */
keir@19781 373 mctc = mcheck_mca_logout(MCA_MCE_HANDLER, bankmask, &bs, NULL);
keir@19405 374 if (mctc != NULL)
keir@19405 375 mci = (struct mc_info *)mctelem_dataptr(mctc);
keir@19405 376
keir@19405 377 /* Clear MCIP or another #MC will enter shutdown state */
keir@19405 378 gstatus &= ~MCG_STATUS_MCIP;
keir@19406 379 mca_wrmsrl(MSR_IA32_MCG_STATUS, gstatus);
keir@19405 380 wmb();
keir@19405 381
keir@19405 382 /* If no valid errors and our stack is intact, we're done */
keir@19405 383 if (ripv && bs.errcnt == 0) {
keir@19405 384 vcpu_schedule_unlock_irq(v);
keir@19405 385 irqlocked = 0;
keir@19405 386 goto cmn_handler_done;
keir@19405 387 }
keir@19405 388
keir@19405 389 if (bs.uc || bs.pcc)
keir@19405 390 add_taint(TAINT_MACHINE_CHECK);
keir@19405 391
keir@19405 392 /* Machine check exceptions will usually be for UC and/or PCC errors,
keir@19405 393 * but it is possible to configure machine check for some classes
keir@19405 394 * of corrected error.
keir@19405 395 *
keir@19405 396 * UC errors could compromise any domain or the hypervisor
keir@19405 397 * itself - for example a cache writeback of modified data that
keir@19405 398 * turned out to be bad could be for data belonging to anyone, not
keir@19405 399 * just the current domain. In the absence of known data poisoning
keir@19405 400 * to prevent consumption of such bad data in the system we regard
keir@19405 401 * all UC errors as terminal. It may be possible to attempt some
keir@19405 402 * heuristics based on the address affected, which guests have
keir@19405 403 * mappings to that mfn etc.
keir@19405 404 *
keir@19405 405 * PCC errors apply to the current context.
keir@19405 406 *
keir@19405 407 * If MCG_STATUS indicates !RIPV then even a #MC that is not UC
keir@19405 408 * and not PCC is terminal - the return instruction pointer
keir@19405 409 * pushed onto the stack is bogus. If the interrupt context is
keir@19405 410 * the hypervisor or dom0 the game is over, otherwise we can
keir@19405 411 * limit the impact to a single domU but only if we trampoline
keir@19405 412 * somewhere safely - we can't return and unwind the stack.
keir@19405 413 * Since there is no trampoline in place we will treat !RIPV
keir@19405 414 * as terminal for any context.
keir@19405 415 */
keir@19405 416 ctx_xen = SEG_PL(regs->cs) == 0;
keir@19405 417 ctx_dom0 = !ctx_xen && (domid == dom0->domain_id);
keir@19405 418 ctx_domU = !ctx_xen && !ctx_dom0;
keir@19405 419
keir@19405 420 xen_state_lost = bs.uc != 0 || (ctx_xen && (bs.pcc || !ripv)) ||
keir@19405 421 !ripv;
keir@19405 422 dom0_state_lost = bs.uc != 0 || (ctx_dom0 && (bs.pcc || !ripv));
keir@19405 423 domU_state_lost = bs.uc != 0 || (ctx_domU && (bs.pcc || !ripv));
keir@19405 424
keir@19405 425 if (xen_state_lost) {
keir@19405 426 /* Now we are going to panic anyway. Allow interrupts, so that
keir@19405 427 * printk on serial console can work. */
keir@19405 428 vcpu_schedule_unlock_irq(v);
keir@19405 429 irqlocked = 0;
keir@19405 430
keir@19743 431 printk("Terminal machine check exception occurred in "
keir@19405 432 "hypervisor context.\n");
keir@19405 433
keir@19405 434 /* If MCG_STATUS_EIPV indicates, the IP on the stack is related
keir@19405 435 * to the error then it makes sense to print a stack trace.
keir@19405 436 * That can be useful for more detailed error analysis and/or
keir@19405 437 * error case studies to figure out, if we can clear
keir@19405 438 * xen_impacted and kill a DomU instead
keir@19405 439 * (i.e. if a guest only control structure is affected, but then
keir@19405 440 * we must ensure the bad pages are not re-used again).
keir@19405 441 */
keir@19405 442 if (bs.eipv & MCG_STATUS_EIPV) {
keir@19405 443 printk("MCE: Instruction Pointer is related to the "
keir@19405 444 "error, therefore print the execution state.\n");
keir@19405 445 show_execution_state(regs);
keir@19405 446 }
keir@19405 447
keir@19405 448 /* Commit the telemetry so that panic flow can find it. */
keir@19405 449 if (mctc != NULL) {
keir@19405 450 x86_mcinfo_dump(mci);
keir@19405 451 mctelem_commit(mctc);
keir@19405 452 }
keir@19405 453 mc_panic("Hypervisor state lost due to machine check "
keir@19405 454 "exception.\n");
keir@19405 455 /*NOTREACHED*/
keir@19405 456 }
keir@19405 457
keir@19405 458 /*
keir@19405 459 * Xen hypervisor state is intact. If dom0 state is lost then
keir@19405 460 * give it a chance to decide what to do if it has registered
keir@19405 461 * a handler for this event, otherwise panic.
keir@19405 462 *
keir@19405 463 * XXFM Could add some Solaris dom0 contract kill here?
keir@19405 464 */
keir@19405 465 if (dom0_state_lost) {
keir@19405 466 if (guest_has_trap_callback(dom0, 0, TRAP_machine_check)) {
keir@19405 467 dom_state = DOM0_TRAP;
keir@19405 468 send_guest_trap(dom0, 0, TRAP_machine_check);
keir@19405 469 /* XXFM case of return with !ripv ??? */
keir@19405 470 } else {
keir@19405 471 /* Commit telemetry for panic flow. */
keir@19405 472 if (mctc != NULL) {
keir@19405 473 x86_mcinfo_dump(mci);
keir@19405 474 mctelem_commit(mctc);
keir@19405 475 }
keir@19405 476 mc_panic("Dom0 state lost due to machine check "
keir@19405 477 "exception\n");
keir@19405 478 /*NOTREACHED*/
keir@19405 479 }
keir@19405 480 }
keir@19405 481
keir@19405 482 /*
keir@19405 483 * If a domU has lost state then send it a trap if it has registered
keir@19405 484 * a handler, otherwise crash the domain.
keir@19405 485 * XXFM Revisit this functionality.
keir@19405 486 */
keir@19405 487 if (domU_state_lost) {
keir@19405 488 if (guest_has_trap_callback(v->domain, v->vcpu_id,
keir@19405 489 TRAP_machine_check)) {
keir@19405 490 dom_state = DOMU_TRAP;
keir@19405 491 send_guest_trap(curdom, v->vcpu_id,
keir@19405 492 TRAP_machine_check);
keir@19405 493 } else {
keir@19405 494 dom_state = DOMU_KILLED;
keir@19405 495 /* Enable interrupts. This basically results in
keir@19405 496 * calling sti on the *physical* cpu. But after
keir@19405 497 * domain_crash() the vcpu pointer is invalid.
keir@19405 498 * Therefore, we must unlock the irqs before killing
keir@19405 499 * it. */
keir@19405 500 vcpu_schedule_unlock_irq(v);
keir@19405 501 irqlocked = 0;
keir@19405 502
keir@19405 503 /* DomU is impacted. Kill it and continue. */
keir@19405 504 domain_crash(curdom);
keir@19405 505 }
keir@19405 506 }
keir@19405 507
keir@19405 508 switch (dom_state) {
keir@19405 509 case DOM0_TRAP:
keir@19405 510 case DOMU_TRAP:
keir@19405 511 /* Enable interrupts. */
keir@19405 512 vcpu_schedule_unlock_irq(v);
keir@19405 513 irqlocked = 0;
keir@19405 514
keir@19405 515 /* guest softirqs and event callbacks are scheduled
keir@19405 516 * immediately after this handler exits. */
keir@19405 517 break;
keir@19405 518 case DOMU_KILLED:
keir@19405 519 /* Nothing to do here. */
keir@19405 520 break;
keir@19405 521
keir@19405 522 case DOM_NORMAL:
keir@19405 523 vcpu_schedule_unlock_irq(v);
keir@19405 524 irqlocked = 0;
keir@19405 525 break;
keir@19405 526 }
keir@19405 527
keir@19405 528 cmn_handler_done:
keir@19405 529 BUG_ON(irqlocked);
keir@19405 530 BUG_ON(!ripv);
keir@19405 531
keir@19405 532 if (bs.errcnt) {
keir@19405 533 /* Not panicing, so forward telemetry to dom0 now if it
keir@19405 534 * is interested. */
keir@19405 535 if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
keir@19405 536 if (mctc != NULL)
keir@19405 537 mctelem_commit(mctc);
keir@19405 538 send_guest_global_virq(dom0, VIRQ_MCA);
keir@19405 539 } else {
keir@19405 540 x86_mcinfo_dump(mci);
keir@19405 541 if (mctc != NULL)
keir@19405 542 mctelem_dismiss(mctc);
keir@19405 543 }
keir@19405 544 } else if (mctc != NULL) {
keir@19405 545 mctelem_dismiss(mctc);
keir@19405 546 }
keir@19405 547 }
keir@19405 548
keir@19563 549 void mcheck_mca_clearbanks(cpu_banks_t bankmask)
keir@19563 550 {
keir@19563 551 int i;
keir@19563 552 uint64_t status;
keir@19563 553
keir@19563 554 for (i = 0; i < 32 && i < nr_mce_banks; i++) {
keir@19563 555 if (!test_bit(i, bankmask))
keir@19563 556 continue;
keir@19563 557 mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
keir@19563 558 if (!(status & MCi_STATUS_VAL))
keir@19563 559 continue;
keir@19563 560 mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
keir@19563 561 }
keir@19563 562 }
keir@19563 563
keir@19405 564 static int amd_mcheck_init(struct cpuinfo_x86 *ci)
keir@19405 565 {
keir@19405 566 int rc = 0;
keir@18006 567
keir@18006 568 switch (ci->x86) {
keir@18006 569 case 6:
keir@19405 570 rc = amd_k7_mcheck_init(ci);
keir@18006 571 break;
keir@18006 572
keir@19988 573 default:
keir@19988 574 /* Assume that machine check support is available.
keir@19988 575 * The minimum provided support is at least the K8. */
keir@18006 576 case 0xf:
keir@19405 577 rc = amd_k8_mcheck_init(ci);
keir@18006 578 break;
keir@18006 579
keir@18006 580 case 0x10:
keir@19988 581 case 0x11:
keir@19405 582 rc = amd_f10_mcheck_init(ci);
keir@18006 583 break;
keir@18006 584 }
keir@19405 585
keir@19405 586 return rc;
keir@18006 587 }
kaf24@8869 588
keir@18976 589 /*check the existence of Machine Check*/
keir@18976 590 int mce_available(struct cpuinfo_x86 *c)
keir@18976 591 {
keir@18976 592 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
keir@18976 593 }
keir@18976 594
keir@20911 595 static int mce_is_broadcast(struct cpuinfo_x86 *c)
keir@20911 596 {
keir@20911 597 if (mce_force_broadcast)
keir@20911 598 return 1;
keir@20911 599
keir@20911 600 /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
keir@20911 601 * DisplayFamily_DisplayModel encoding of 06H_EH and above,
keir@20911 602 * a MCA signal is broadcast to all logical processors in the system
keir@20911 603 */
keir@20911 604 if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
keir@20911 605 c->x86_model >= 0xe)
keir@20911 606 return 1;
keir@20911 607 return 0;
keir@20911 608 }
keir@20911 609
keir@19405 610 /*
keir@19405 611 * Check if bank 0 is usable for MCE. It isn't for AMD K7,
keir@19405 612 * and Intel P6 family before model 0x1a.
keir@19405 613 */
keir@19405 614 int mce_firstbank(struct cpuinfo_x86 *c)
keir@19405 615 {
keir@19405 616 if (c->x86 == 6) {
keir@19405 617 if (c->x86_vendor == X86_VENDOR_AMD)
keir@19405 618 return 1;
keir@19405 619
keir@19405 620 if (c->x86_vendor == X86_VENDOR_INTEL && c->x86_model < 0x1a)
keir@19405 621 return 1;
keir@19405 622 }
keir@19405 623
keir@19405 624 return 0;
keir@19405 625 }
keir@19405 626
kaf24@8869 627 /* This has to be run for each processor */
kaf24@8869 628 void mcheck_init(struct cpuinfo_x86 *c)
kaf24@8869 629 {
keir@20911 630 int inited = 0, i, broadcast;
keir@20911 631 static int broadcast_check;
keir@19405 632
keir@18006 633 if (mce_disabled == 1) {
keir@20911 634 dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
kaf24@8869 635 return;
keir@18006 636 }
keir@18006 637
keir@20911 638 broadcast = mce_is_broadcast(c);
keir@20911 639 if (broadcast_check && (broadcast != mce_broadcast) )
keir@20911 640 dprintk(XENLOG_INFO,
keir@20911 641 "CPUs have mixed broadcast support"
keir@20911 642 "may cause undetermined result!!!\n");
keir@20911 643
keir@20911 644 broadcast_check = 1;
keir@20911 645 if (broadcast)
keir@20911 646 mce_broadcast = broadcast;
keir@20911 647
keir@19405 648 for (i = 0; i < MAX_NR_BANKS; i++)
keir@19405 649 set_bit(i,mca_allbanks);
keir@19405 650
keir@19405 651 /* Enforce at least MCE support in CPUID information. Individual
keir@19405 652 * families may also need to enforce a check for MCA support. */
keir@18006 653 if (!cpu_has(c, X86_FEATURE_MCE)) {
keir@18006 654 printk(XENLOG_INFO "CPU%i: No machine check support available\n",
keir@18006 655 smp_processor_id());
keir@18006 656 return;
keir@18006 657 }
keir@18006 658
keir@19406 659 intpose_init();
keir@19405 660 mctelem_init(sizeof (struct mc_info));
kaf24@8869 661
kaf24@8869 662 switch (c->x86_vendor) {
keir@18006 663 case X86_VENDOR_AMD:
keir@19405 664 inited = amd_mcheck_init(c);
keir@18006 665 break;
kaf24@8869 666
keir@18006 667 case X86_VENDOR_INTEL:
keir@19405 668 switch (c->x86) {
keir@19405 669 case 6:
keir@19405 670 case 15:
keir@19405 671 inited = intel_mcheck_init(c);
keir@19405 672 break;
keir@18976 673 }
keir@18006 674 break;
kaf24@8869 675
keir@18006 676 default:
keir@18006 677 break;
kaf24@8869 678 }
keir@19405 679
keir@20908 680 if ( !h_mci_ctrl )
keir@20908 681 {
keir@20908 682 h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks);
keir@20908 683 if (!h_mci_ctrl)
keir@20908 684 {
keir@20908 685 dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n");
keir@20908 686 return;
keir@20908 687 }
keir@20908 688 /* Don't care banks before firstbank */
keir@20908 689 memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl));
keir@20908 690 for (i = firstbank; i < nr_mce_banks; i++)
keir@20908 691 rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]);
keir@20908 692 }
keir@20908 693 if (g_mcg_cap & MCG_CTL_P)
keir@20908 694 rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl);
keir@19539 695 set_poll_bankmask(c);
keir@19405 696 if (!inited)
keir@19405 697 printk(XENLOG_INFO "CPU%i: No machine check initialization\n",
keir@19405 698 smp_processor_id());
kaf24@8869 699 }
kaf24@8869 700
keir@19988 701 u64 mce_cap_init(void)
keir@19988 702 {
keir@19988 703 u32 l, h;
keir@19988 704 u64 value;
keir@16131 705
keir@19988 706 rdmsr(MSR_IA32_MCG_CAP, l, h);
keir@19988 707 value = ((u64)h << 32) | l;
keir@19988 708 /* For Guest vMCE usage */
keir@19988 709 g_mcg_cap = value & ~MCG_CMCI_P;
keir@19988 710
keir@19988 711 if (l & MCG_CTL_P) /* Control register present ? */
keir@19988 712 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
keir@19988 713
keir@19988 714 nr_mce_banks = l & MCG_CAP_COUNT;
keir@19988 715 if ( nr_mce_banks > MAX_NR_BANKS )
keir@19988 716 {
keir@19988 717 printk(KERN_WARNING "MCE: exceed max mce banks\n");
keir@19988 718 g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS;
keir@19988 719 }
keir@19988 720
keir@19988 721 return value;
keir@19988 722 }
keir@19988 723
keir@19988 724 /* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
keir@19988 725 void mce_init_msr(struct domain *d)
kaf24@8869 726 {
keir@19988 727 d->arch.vmca_msrs.mcg_status = 0x0;
keir@19988 728 d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
keir@19988 729 d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0;
keir@19988 730 d->arch.vmca_msrs.nr_injection = 0;
keir@19988 731 memset(d->arch.vmca_msrs.mci_ctl, ~0,
keir@19988 732 sizeof(d->arch.vmca_msrs.mci_ctl));
keir@19988 733 INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
keir@19988 734 spin_lock_init(&d->arch.vmca_msrs.lock);
kaf24@8869 735 }
kaf24@8869 736
keir@20099 737 int mce_rdmsr(uint32_t msr, uint64_t *val)
kaf24@8869 738 {
keir@19988 739 struct domain *d = current->domain;
keir@19988 740 int ret = 1;
keir@19988 741 unsigned int bank;
keir@19988 742 struct bank_entry *entry = NULL;
keir@19988 743
keir@20099 744 *val = 0;
keir@19988 745 spin_lock(&d->arch.vmca_msrs.lock);
keir@19988 746
keir@19988 747 switch ( msr )
keir@19988 748 {
keir@19988 749 case MSR_IA32_MCG_STATUS:
keir@20099 750 *val = d->arch.vmca_msrs.mcg_status;
keir@20099 751 if (*val)
keir@20288 752 mce_printk(MCE_VERBOSE,
keir@20099 753 "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val);
keir@19988 754 break;
keir@19988 755 case MSR_IA32_MCG_CAP:
keir@20099 756 *val = d->arch.vmca_msrs.mcg_cap;
keir@20288 757 mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n",
keir@20099 758 *val);
keir@19988 759 break;
keir@19988 760 case MSR_IA32_MCG_CTL:
keir@20908 761 /* Always 0 if no CTL support */
keir@20908 762 *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl;
keir@20288 763 mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n",
keir@20099 764 *val);
keir@19988 765 break;
keir@19988 766 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
keir@19988 767 bank = (msr - MSR_IA32_MC0_CTL) / 4;
keir@19988 768 if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
keir@19988 769 {
keir@20907 770 mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr);
keir@20907 771 ret = 0;
keir@19988 772 break;
keir@19988 773 }
keir@19988 774 switch (msr & (MSR_IA32_MC0_CTL | 3))
keir@19988 775 {
keir@19988 776 case MSR_IA32_MC0_CTL:
keir@20908 777 *val = d->arch.vmca_msrs.mci_ctl[bank] &
keir@20908 778 (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
keir@20288 779 mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n",
keir@20099 780 bank, *val);
keir@19988 781 break;
keir@19988 782 case MSR_IA32_MC0_STATUS:
keir@19988 783 /* Only error bank is read. Non-error banks simply return. */
keir@19988 784 if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
keir@19988 785 {
keir@19988 786 entry = list_entry(d->arch.vmca_msrs.impact_header.next,
keir@19988 787 struct bank_entry, list);
keir@19988 788 if (entry->bank == bank) {
keir@20099 789 *val = entry->mci_status;
keir@20288 790 mce_printk(MCE_VERBOSE,
keir@19988 791 "MCE: rd MC%u_STATUS in vMCE# context "
keir@20099 792 "value 0x%"PRIx64"\n", bank, *val);
keir@19988 793 }
keir@19988 794 else
keir@19988 795 entry = NULL;
keir@19988 796 }
keir@19988 797 break;
keir@19988 798 case MSR_IA32_MC0_ADDR:
keir@19988 799 if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
keir@19988 800 {
keir@19988 801 entry = list_entry(d->arch.vmca_msrs.impact_header.next,
keir@19988 802 struct bank_entry, list);
keir@19988 803 if ( entry->bank == bank )
keir@19988 804 {
keir@20099 805 *val = entry->mci_addr;
keir@20288 806 mce_printk(MCE_VERBOSE,
keir@20099 807 "MCE: rdmsr MC%u_ADDR in vMCE# context "
keir@20099 808 "0x%"PRIx64"\n", bank, *val);
keir@19988 809 }
keir@19988 810 }
keir@19988 811 break;
keir@19988 812 case MSR_IA32_MC0_MISC:
keir@19988 813 if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
keir@19988 814 {
keir@19988 815 entry = list_entry(d->arch.vmca_msrs.impact_header.next,
keir@19988 816 struct bank_entry, list);
keir@19988 817 if ( entry->bank == bank )
keir@19988 818 {
keir@20099 819 *val = entry->mci_misc;
keir@20288 820 mce_printk(MCE_VERBOSE,
keir@20099 821 "MCE: rd MC%u_MISC in vMCE# context "
keir@20099 822 "0x%"PRIx64"\n", bank, *val);
keir@19988 823 }
keir@19988 824 }
keir@19988 825 break;
keir@19988 826 }
keir@19988 827 break;
keir@19988 828 default:
keir@19988 829 switch ( boot_cpu_data.x86_vendor )
keir@19988 830 {
keir@19988 831 case X86_VENDOR_INTEL:
keir@20099 832 ret = intel_mce_rdmsr(msr, val);
keir@19988 833 break;
keir@19988 834 default:
keir@19988 835 ret = 0;
keir@19988 836 break;
keir@19988 837 }
keir@19988 838 break;
keir@19988 839 }
keir@19988 840
keir@19988 841 spin_unlock(&d->arch.vmca_msrs.lock);
keir@19988 842 return ret;
kaf24@8869 843 }
kaf24@8869 844
keir@20099 845 int mce_wrmsr(u32 msr, u64 val)
keir@19988 846 {
keir@19988 847 struct domain *d = current->domain;
keir@19988 848 struct bank_entry *entry = NULL;
keir@19988 849 unsigned int bank;
keir@19988 850 int ret = 1;
keir@19988 851
keir@19988 852 if ( !g_mcg_cap )
keir@19988 853 return 0;
keir@19988 854
keir@19988 855 spin_lock(&d->arch.vmca_msrs.lock);
keir@19988 856
keir@19988 857 switch ( msr )
keir@19988 858 {
keir@19988 859 case MSR_IA32_MCG_CTL:
keir@20099 860 d->arch.vmca_msrs.mcg_ctl = val;
keir@19988 861 break;
keir@19988 862 case MSR_IA32_MCG_STATUS:
keir@20099 863 d->arch.vmca_msrs.mcg_status = val;
keir@20288 864 mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val);
keir@19988 865 /* For HVM guest, this is the point for deleting vMCE injection node */
keir@19988 866 if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) )
keir@19988 867 {
keir@19988 868 d->arch.vmca_msrs.nr_injection--; /* Should be 0 */
keir@19988 869 if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
keir@19988 870 {
keir@19988 871 entry = list_entry(d->arch.vmca_msrs.impact_header.next,
keir@19988 872 struct bank_entry, list);
keir@19988 873 if ( entry->mci_status & MCi_STATUS_VAL )
keir@20288 874 mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have "
keir@19988 875 "been cleared before write MCG_STATUS MSR\n");
keir@19988 876
keir@20288 877 mce_printk(MCE_QUIET, "MCE: Delete HVM last injection "
keir@19988 878 "Node, nr_injection %u\n",
keir@19988 879 d->arch.vmca_msrs.nr_injection);
keir@19988 880 list_del(&entry->list);
keir@20268 881 xfree(entry);
keir@19988 882 }
keir@19988 883 else
keir@20288 884 mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
keir@19988 885 " last injection Node, something Wrong!\n");
keir@19988 886 }
keir@19988 887 break;
keir@19988 888 case MSR_IA32_MCG_CAP:
keir@20288 889 mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
keir@19988 890 ret = -1;
keir@19988 891 break;
keir@19988 892 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
keir@19988 893 bank = (msr - MSR_IA32_MC0_CTL) / 4;
keir@19988 894 if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
keir@19988 895 {
keir@20907 896 mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr);
keir@20907 897 ret = 0;
keir@19988 898 break;
keir@19988 899 }
keir@19988 900 switch ( msr & (MSR_IA32_MC0_CTL | 3) )
keir@19988 901 {
keir@19988 902 case MSR_IA32_MC0_CTL:
keir@20099 903 d->arch.vmca_msrs.mci_ctl[bank] = val;
keir@19988 904 break;
keir@19988 905 case MSR_IA32_MC0_STATUS:
keir@19988 906 /* Give the first entry of the list, it corresponds to current
keir@19988 907 * vMCE# injection. When vMCE# is finished processing by the
keir@19988 908 * the guest, this node will be deleted.
keir@19988 909 * Only error bank is written. Non-error banks simply return.
keir@19988 910 */
keir@19988 911 if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
keir@19988 912 {
keir@19988 913 entry = list_entry(d->arch.vmca_msrs.impact_header.next,
keir@19988 914 struct bank_entry, list);
keir@19988 915 if ( entry->bank == bank )
keir@20099 916 entry->mci_status = val;
keir@20288 917 mce_printk(MCE_VERBOSE,
keir@19988 918 "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
keir@20099 919 bank, val);
keir@19988 920 }
keir@19988 921 else
keir@20288 922 mce_printk(MCE_VERBOSE,
keir@20099 923 "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
keir@19988 924 break;
keir@19988 925 case MSR_IA32_MC0_ADDR:
keir@20288 926 mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
keir@19988 927 ret = -1;
keir@19988 928 break;
keir@19988 929 case MSR_IA32_MC0_MISC:
keir@20288 930 mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
keir@19988 931 ret = -1;
keir@19988 932 break;
keir@19988 933 }
keir@19988 934 break;
keir@19988 935 default:
keir@19988 936 switch ( boot_cpu_data.x86_vendor )
keir@19988 937 {
keir@19988 938 case X86_VENDOR_INTEL:
keir@20099 939 ret = intel_mce_wrmsr(msr, val);
keir@19988 940 break;
keir@19988 941 default:
keir@19988 942 ret = 0;
keir@19988 943 break;
keir@19988 944 }
keir@19988 945 break;
keir@19988 946 }
keir@19988 947
keir@19988 948 spin_unlock(&d->arch.vmca_msrs.lock);
keir@19988 949 return ret;
keir@19988 950 }
keir@18006 951
keir@19405 952 static void mcinfo_clear(struct mc_info *mi)
keir@18006 953 {
keir@18006 954 memset(mi, 0, sizeof(struct mc_info));
keir@18006 955 x86_mcinfo_nentries(mi) = 0;
keir@18006 956 }
keir@18006 957
keir@18006 958 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo)
keir@18006 959 {
keir@18006 960 int i;
keir@18006 961 unsigned long end1, end2;
keir@18006 962 struct mcinfo_common *mic, *mic_base, *mic_index;
keir@18006 963
keir@18006 964 mic = (struct mcinfo_common *)mcinfo;
keir@18006 965 mic_index = mic_base = x86_mcinfo_first(mi);
keir@18006 966
keir@18006 967 /* go to first free entry */
keir@18006 968 for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
keir@18006 969 mic_index = x86_mcinfo_next(mic_index);
keir@18006 970 }
keir@18006 971
keir@18006 972 /* check if there is enough size */
keir@18006 973 end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct mc_info));
keir@18006 974 end2 = (unsigned long)((uint8_t *)mic_index + mic->size);
keir@18006 975
keir@18006 976 if (end1 < end2)
keir@19405 977 return x86_mcerr("mcinfo_add: no more sparc", -ENOSPC);
keir@18006 978
keir@18006 979 /* there's enough space. add entry. */
keir@18006 980 memcpy(mic_index, mic, mic->size);
keir@18006 981 x86_mcinfo_nentries(mi)++;
keir@18006 982
keir@18006 983 return 0;
keir@18006 984 }
keir@18006 985
keir@18006 986 /* Dump machine check information in a format,
keir@18006 987 * mcelog can parse. This is used only when
keir@18006 988 * Dom0 does not take the notification. */
keir@18006 989 void x86_mcinfo_dump(struct mc_info *mi)
keir@18006 990 {
keir@18006 991 struct mcinfo_common *mic = NULL;
keir@18006 992 struct mcinfo_global *mc_global;
keir@18006 993 struct mcinfo_bank *mc_bank;
keir@18006 994
keir@18006 995 /* first print the global info */
keir@18006 996 x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
keir@18006 997 if (mic == NULL)
keir@18006 998 return;
keir@18006 999 mc_global = (struct mcinfo_global *)mic;
keir@19405 1000 if (mc_global->mc_flags & MC_FLAG_MCE) {
keir@18006 1001 printk(XENLOG_WARNING
keir@18006 1002 "CPU%d: Machine Check Exception: %16"PRIx64"\n",
keir@18006 1003 mc_global->mc_coreid, mc_global->mc_gstatus);
keir@18006 1004 } else {
keir@18006 1005 printk(XENLOG_WARNING "MCE: The hardware reports a non "
keir@19743 1006 "fatal, correctable incident occurred on "
keir@18006 1007 "CPU %d.\n",
keir@18006 1008 mc_global->mc_coreid);
keir@18006 1009 }
keir@18006 1010
keir@18006 1011 /* then the bank information */
keir@18006 1012 x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); /* finds the first entry */
keir@18006 1013 do {
keir@18006 1014 if (mic == NULL)
keir@18006 1015 return;
keir@18006 1016 if (mic->type != MC_TYPE_BANK)
keir@18976 1017 goto next;
keir@18006 1018
keir@18006 1019 mc_bank = (struct mcinfo_bank *)mic;
keir@19405 1020
keir@18006 1021 printk(XENLOG_WARNING "Bank %d: %16"PRIx64,
keir@18006 1022 mc_bank->mc_bank,
keir@18006 1023 mc_bank->mc_status);
keir@18006 1024 if (mc_bank->mc_status & MCi_STATUS_MISCV)
keir@18006 1025 printk("[%16"PRIx64"]", mc_bank->mc_misc);
keir@18006 1026 if (mc_bank->mc_status & MCi_STATUS_ADDRV)
keir@18006 1027 printk(" at %16"PRIx64, mc_bank->mc_addr);
keir@18006 1028
keir@18006 1029 printk("\n");
keir@18976 1030 next:
keir@18006 1031 mic = x86_mcinfo_next(mic); /* next entry */
keir@18006 1032 if ((mic == NULL) || (mic->size == 0))
keir@18006 1033 break;
keir@18006 1034 } while (1);
keir@18006 1035 }
keir@18006 1036
keir@19225 1037 static void do_mc_get_cpu_info(void *v)
keir@19225 1038 {
keir@19225 1039 int cpu = smp_processor_id();
keir@19225 1040 int cindex, cpn;
keir@19225 1041 struct cpuinfo_x86 *c;
keir@19225 1042 xen_mc_logical_cpu_t *log_cpus, *xcp;
keir@19225 1043 uint32_t junk, ebx;
keir@19225 1044
keir@19225 1045 log_cpus = v;
keir@19225 1046 c = &cpu_data[cpu];
keir@19225 1047 cindex = 0;
keir@19225 1048 cpn = cpu - 1;
keir@19225 1049
keir@19225 1050 /*
keir@19225 1051 * Deal with sparse masks, condensed into a contig array.
keir@19225 1052 */
keir@19225 1053 while (cpn >= 0) {
keir@19225 1054 if (cpu_isset(cpn, cpu_online_map))
keir@19225 1055 cindex++;
keir@19225 1056 cpn--;
keir@19225 1057 }
keir@19225 1058
keir@19225 1059 xcp = &log_cpus[cindex];
keir@19225 1060 c = &cpu_data[cpu];
keir@19225 1061 xcp->mc_cpunr = cpu;
keir@19225 1062 x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
keir@19225 1063 &xcp->mc_coreid, &xcp->mc_threadid,
keir@19225 1064 &xcp->mc_apicid, &xcp->mc_ncores,
keir@19225 1065 &xcp->mc_ncores_active, &xcp->mc_nthreads);
keir@19225 1066 xcp->mc_cpuid_level = c->cpuid_level;
keir@19225 1067 xcp->mc_family = c->x86;
keir@19225 1068 xcp->mc_vendor = c->x86_vendor;
keir@19225 1069 xcp->mc_model = c->x86_model;
keir@19225 1070 xcp->mc_step = c->x86_mask;
keir@19225 1071 xcp->mc_cache_size = c->x86_cache_size;
keir@19225 1072 xcp->mc_cache_alignment = c->x86_cache_alignment;
keir@19225 1073 memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
keir@19225 1074 memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
keir@19225 1075 memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
keir@19225 1076
keir@19225 1077 /*
keir@19225 1078 * This part needs to run on the CPU itself.
keir@19225 1079 */
keir@19225 1080 xcp->mc_nmsrvals = __MC_NMSRS;
keir@19225 1081 xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
keir@19225 1082 rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
keir@19225 1083
keir@19225 1084 if (c->cpuid_level >= 1) {
keir@19225 1085 cpuid(1, &junk, &ebx, &junk, &junk);
keir@19225 1086 xcp->mc_clusterid = (ebx >> 24) & 0xff;
keir@19225 1087 } else
keir@19225 1088 xcp->mc_clusterid = hard_smp_processor_id();
keir@19225 1089 }
keir@19225 1090
keir@19225 1091
keir@19225 1092 void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
keir@19225 1093 uint16_t *threadid, uint32_t *apicid,
keir@19225 1094 unsigned *ncores, unsigned *ncores_active,
keir@19225 1095 unsigned *nthreads)
keir@19225 1096 {
keir@19225 1097 struct cpuinfo_x86 *c;
keir@19225 1098
keir@19225 1099 *apicid = cpu_physical_id(cpu);
keir@19225 1100 c = &cpu_data[cpu];
keir@19225 1101 if (c->apicid == BAD_APICID) {
keir@19225 1102 *chipid = cpu;
keir@19225 1103 *coreid = 0;
keir@19225 1104 *threadid = 0;
keir@19225 1105 if (ncores != NULL)
keir@19225 1106 *ncores = 1;
keir@19225 1107 if (ncores_active != NULL)
keir@19225 1108 *ncores_active = 1;
keir@19225 1109 if (nthreads != NULL)
keir@19225 1110 *nthreads = 1;
keir@19225 1111 } else {
keir@19225 1112 *chipid = phys_proc_id[cpu];
keir@19225 1113 if (c->x86_max_cores > 1)
keir@19225 1114 *coreid = cpu_core_id[cpu];
keir@19225 1115 else
keir@19225 1116 *coreid = 0;
keir@19225 1117 *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
keir@19225 1118 if (ncores != NULL)
keir@19225 1119 *ncores = c->x86_max_cores;
keir@19225 1120 if (ncores_active != NULL)
keir@19225 1121 *ncores_active = c->booted_cores;
keir@19225 1122 if (nthreads != NULL)
keir@19225 1123 *nthreads = c->x86_num_siblings;
keir@19225 1124 }
keir@19225 1125 }
keir@19225 1126
keir@19406 1127 #define INTPOSE_NENT 50
keir@19406 1128
keir@19406 1129 static struct intpose_ent {
keir@19406 1130 unsigned int cpu_nr;
keir@19406 1131 uint64_t msr;
keir@19406 1132 uint64_t val;
keir@19406 1133 } intpose_arr[INTPOSE_NENT];
keir@19406 1134
keir@19406 1135 static void intpose_init(void)
keir@19406 1136 {
keir@19406 1137 static int done;
keir@19406 1138 int i;
keir@19406 1139
keir@19406 1140 if (done++ > 0)
keir@19406 1141 return;
keir@19406 1142
keir@19406 1143 for (i = 0; i < INTPOSE_NENT; i++) {
keir@19406 1144 intpose_arr[i].cpu_nr = -1;
keir@19406 1145 }
keir@19406 1146
keir@19406 1147 }
keir@19406 1148
keir@19406 1149 struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr,
keir@19406 1150 uint64_t *valp)
keir@19406 1151 {
keir@19406 1152 int i;
keir@19406 1153
keir@19406 1154 for (i = 0; i < INTPOSE_NENT; i++) {
keir@19406 1155 if (intpose_arr[i].cpu_nr == cpu_nr &&
keir@19406 1156 intpose_arr[i].msr == msr) {
keir@19406 1157 if (valp != NULL)
keir@19406 1158 *valp = intpose_arr[i].val;
keir@19406 1159 return &intpose_arr[i];
keir@19406 1160 }
keir@19406 1161 }
keir@19406 1162
keir@19406 1163 return NULL;
keir@19406 1164 }
keir@19406 1165
keir@19406 1166 static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val)
keir@19406 1167 {
keir@19406 1168 struct intpose_ent *ent;
keir@19406 1169 int i;
keir@19406 1170
keir@19406 1171 if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
keir@19406 1172 ent->val = val;
keir@19406 1173 return;
keir@19406 1174 }
keir@19406 1175
keir@19406 1176 for (i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++) {
keir@19406 1177 if (ent->cpu_nr == -1) {
keir@19406 1178 ent->cpu_nr = cpu_nr;
keir@19406 1179 ent->msr = msr;
keir@19406 1180 ent->val = val;
keir@19406 1181 return;
keir@19406 1182 }
keir@19406 1183 }
keir@19406 1184
keir@19406 1185 printk("intpose_add: interpose array full - request dropped\n");
keir@19406 1186 }
keir@19406 1187
keir@19406 1188 void intpose_inval(unsigned int cpu_nr, uint64_t msr)
keir@19406 1189 {
keir@19406 1190 struct intpose_ent *ent;
keir@19406 1191
keir@19406 1192 if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
keir@19406 1193 ent->cpu_nr = -1;
keir@19406 1194 }
keir@19406 1195 }
keir@19406 1196
keir@19406 1197 #define IS_MCA_BANKREG(r) \
keir@19406 1198 ((r) >= MSR_IA32_MC0_CTL && \
keir@19406 1199 (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \
keir@19406 1200 ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
keir@19406 1201
keir@20908 1202 int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d)
keir@20908 1203 {
keir@20908 1204 int bank_nr;
keir@20908 1205
keir@20908 1206 if ( !bank || !d || !h_mci_ctrl )
keir@20908 1207 return 1;
keir@20908 1208
keir@20908 1209 /* Will MCE happen in host if If host mcg_ctl is 0? */
keir@20908 1210 if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl )
keir@20908 1211 return 1;
keir@20908 1212
keir@20908 1213 bank_nr = bank->mc_bank;
keir@20908 1214 if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
keir@20908 1215 return 1;
keir@20908 1216 return 0;
keir@20908 1217 }
keir@20908 1218
keir@19406 1219 static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
keir@19406 1220 {
keir@19406 1221 struct cpuinfo_x86 *c;
keir@19406 1222 int i, errs = 0;
keir@19406 1223
keir@19406 1224 c = &cpu_data[smp_processor_id()];
keir@19406 1225
keir@19406 1226 for (i = 0; i < mci->mcinj_count; i++) {
keir@19406 1227 uint64_t reg = mci->mcinj_msr[i].reg;
keir@19406 1228 const char *reason = NULL;
keir@19406 1229
keir@19406 1230 if (IS_MCA_BANKREG(reg)) {
keir@19406 1231 if (c->x86_vendor == X86_VENDOR_AMD) {
keir@19406 1232 /* On AMD we can set MCi_STATUS_WREN in the
keir@19406 1233 * HWCR MSR to allow non-zero writes to banks
keir@19406 1234 * MSRs not to #GP. The injector in dom0
keir@19406 1235 * should set that bit, but we detect when it
keir@19406 1236 * is necessary and set it as a courtesy to
keir@19406 1237 * avoid #GP in the hypervisor. */
keir@19406 1238 mci->mcinj_flags |=
keir@19406 1239 _MC_MSRINJ_F_REQ_HWCR_WREN;
keir@19406 1240 continue;
keir@19406 1241 } else {
keir@19406 1242 /* No alternative but to interpose, so require
keir@19406 1243 * that the injector specified as such. */
keir@19406 1244 if (!(mci->mcinj_flags &
keir@19406 1245 MC_MSRINJ_F_INTERPOSE)) {
keir@19406 1246 reason = "must specify interposition";
keir@19406 1247 }
keir@19406 1248 }
keir@19406 1249 } else {
keir@19406 1250 switch (reg) {
keir@19406 1251 /* MSRs acceptable on all x86 cpus */
keir@19406 1252 case MSR_IA32_MCG_STATUS:
keir@19406 1253 break;
keir@19406 1254
keir@19406 1255 /* MSRs that the HV will take care of */
keir@19406 1256 case MSR_K8_HWCR:
keir@19406 1257 if (c->x86_vendor == X86_VENDOR_AMD)
keir@19406 1258 reason = "HV will operate HWCR";
keir@19406 1259 else
keir@19406 1260 reason ="only supported on AMD";
keir@19406 1261 break;
keir@19406 1262
keir@19406 1263 default:
keir@19406 1264 reason = "not a recognized MCA MSR";
keir@19406 1265 break;
keir@19406 1266 }
keir@19406 1267 }
keir@19406 1268
keir@19406 1269 if (reason != NULL) {
keir@19406 1270 printk("HV MSR INJECT ERROR: MSR 0x%llx %s\n",
keir@19406 1271 (unsigned long long)mci->mcinj_msr[i].reg, reason);
keir@19406 1272 errs++;
keir@19406 1273 }
keir@19406 1274 }
keir@19406 1275
keir@19406 1276 return !errs;
keir@19406 1277 }
keir@19406 1278
keir@19406 1279 static uint64_t x86_mc_hwcr_wren(void)
keir@19406 1280 {
keir@19406 1281 uint64_t old;
keir@19406 1282
keir@19406 1283 rdmsrl(MSR_K8_HWCR, old);
keir@19406 1284
keir@19406 1285 if (!(old & K8_HWCR_MCi_STATUS_WREN)) {
keir@19406 1286 uint64_t new = old | K8_HWCR_MCi_STATUS_WREN;
keir@19406 1287 wrmsrl(MSR_K8_HWCR, new);
keir@19406 1288 }
keir@19406 1289
keir@19406 1290 return old;
keir@19406 1291 }
keir@19406 1292
keir@19406 1293 static void x86_mc_hwcr_wren_restore(uint64_t hwcr)
keir@19406 1294 {
keir@19406 1295 if (!(hwcr & K8_HWCR_MCi_STATUS_WREN))
keir@19406 1296 wrmsrl(MSR_K8_HWCR, hwcr);
keir@19406 1297 }
keir@19406 1298
keir@19406 1299 static void x86_mc_msrinject(void *data)
keir@19406 1300 {
keir@19406 1301 struct xen_mc_msrinject *mci = data;
keir@19406 1302 struct mcinfo_msr *msr;
keir@19406 1303 struct cpuinfo_x86 *c;
keir@19406 1304 uint64_t hwcr = 0;
keir@19406 1305 int intpose;
keir@19406 1306 int i;
keir@19406 1307
keir@19406 1308 c = &cpu_data[smp_processor_id()];
keir@19406 1309
keir@19406 1310 if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
keir@19406 1311 hwcr = x86_mc_hwcr_wren();
keir@19406 1312
keir@19406 1313 intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0;
keir@19406 1314
keir@19406 1315 for (i = 0, msr = &mci->mcinj_msr[0];
keir@19406 1316 i < mci->mcinj_count; i++, msr++) {
keir@19406 1317 printk("HV MSR INJECT (%s) target %u actual %u MSR 0x%llx "
keir@19406 1318 "<-- 0x%llx\n",
keir@19406 1319 intpose ? "interpose" : "hardware",
keir@19406 1320 mci->mcinj_cpunr, smp_processor_id(),
keir@19406 1321 (unsigned long long)msr->reg,
keir@19406 1322 (unsigned long long)msr->value);
keir@19406 1323
keir@19406 1324 if (intpose)
keir@19406 1325 intpose_add(mci->mcinj_cpunr, msr->reg, msr->value);
keir@19406 1326 else
keir@19406 1327 wrmsrl(msr->reg, msr->value);
keir@19406 1328 }
keir@19406 1329
keir@19406 1330 if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
keir@19406 1331 x86_mc_hwcr_wren_restore(hwcr);
keir@19406 1332 }
keir@19406 1333
keir@19406 1334 /*ARGSUSED*/
keir@19406 1335 static void x86_mc_mceinject(void *data)
keir@19406 1336 {
keir@19406 1337 printk("Simulating #MC on cpu %d\n", smp_processor_id());
keir@19406 1338 __asm__ __volatile__("int $0x12");
keir@19406 1339 }
keir@19406 1340
keir@19405 1341 #if BITS_PER_LONG == 64
keir@19405 1342
keir@19405 1343 #define ID2COOKIE(id) ((mctelem_cookie_t)(id))
keir@19405 1344 #define COOKIE2ID(c) ((uint64_t)(c))
keir@19405 1345
keir@19405 1346 #elif BITS_PER_LONG == 32
keir@19405 1347
keir@19405 1348 #define ID2COOKIE(id) ((mctelem_cookie_t)(uint32_t)((id) & 0xffffffffU))
keir@19405 1349 #define COOKIE2ID(c) ((uint64_t)(uint32_t)(c))
keir@19405 1350
keir@19405 1351 #elif defined(BITS_PER_LONG)
keir@19405 1352 #error BITS_PER_LONG has unexpected value
keir@19405 1353 #else
keir@19405 1354 #error BITS_PER_LONG definition absent
keir@19405 1355 #endif
keir@19405 1356
keir@19502 1357 #ifdef CONFIG_COMPAT
keir@19502 1358 # include <compat/arch-x86/xen-mca.h>
keir@19502 1359
keir@19502 1360 # define xen_mcinfo_msr mcinfo_msr
keir@19502 1361 CHECK_mcinfo_msr;
keir@19502 1362 # undef xen_mcinfo_msr
keir@19502 1363 # undef CHECK_mcinfo_msr
keir@19502 1364 # define CHECK_mcinfo_msr struct mcinfo_msr
keir@19502 1365
keir@19502 1366 # define xen_mcinfo_common mcinfo_common
keir@19502 1367 CHECK_mcinfo_common;
keir@19502 1368 # undef xen_mcinfo_common
keir@19502 1369 # undef CHECK_mcinfo_common
keir@19502 1370 # define CHECK_mcinfo_common struct mcinfo_common
keir@19502 1371
keir@19502 1372 CHECK_FIELD_(struct, mc_fetch, flags);
keir@19502 1373 CHECK_FIELD_(struct, mc_fetch, fetch_id);
keir@19502 1374 # define CHECK_compat_mc_fetch struct mc_fetch
keir@19502 1375
keir@19502 1376 CHECK_FIELD_(struct, mc_physcpuinfo, ncpus);
keir@19502 1377 # define CHECK_compat_mc_physcpuinfo struct mc_physcpuinfo
keir@19502 1378
keir@19502 1379 CHECK_mc;
keir@19502 1380 # undef CHECK_compat_mc_fetch
keir@19502 1381 # undef CHECK_compat_mc_physcpuinfo
keir@19502 1382
keir@19502 1383 # define xen_mc_info mc_info
keir@19502 1384 CHECK_mc_info;
keir@19502 1385 # undef xen_mc_info
keir@19502 1386
keir@19502 1387 # define xen_mcinfo_global mcinfo_global
keir@19502 1388 CHECK_mcinfo_global;
keir@19502 1389 # undef xen_mcinfo_global
keir@19502 1390
keir@19502 1391 # define xen_mcinfo_bank mcinfo_bank
keir@19502 1392 CHECK_mcinfo_bank;
keir@19502 1393 # undef xen_mcinfo_bank
keir@19502 1394
keir@19502 1395 # define xen_mcinfo_extended mcinfo_extended
keir@19502 1396 CHECK_mcinfo_extended;
keir@19502 1397 # undef xen_mcinfo_extended
keir@19502 1398
keir@19502 1399 # define xen_mcinfo_recovery mcinfo_recovery
keir@19502 1400 # define xen_cpu_offline_action cpu_offline_action
keir@19502 1401 # define xen_page_offline_action page_offline_action
keir@19502 1402 CHECK_mcinfo_recovery;
keir@19502 1403 # undef xen_cpu_offline_action
keir@19502 1404 # undef xen_page_offline_action
keir@19502 1405 # undef xen_mcinfo_recovery
keir@19502 1406 #else
keir@19502 1407 # define compat_mc_fetch xen_mc_fetch
keir@19502 1408 # define compat_mc_physcpuinfo xen_mc_physcpuinfo
keir@19502 1409 # define compat_handle_is_null guest_handle_is_null
keir@19502 1410 # define copy_to_compat copy_to_guest
keir@19502 1411 #endif
keir@19502 1412
keir@18006 1413 /* Machine Check Architecture Hypercall */
keir@18006 1414 long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
keir@18006 1415 {
keir@18006 1416 long ret = 0;
keir@18006 1417 struct xen_mc curop, *op = &curop;
keir@18006 1418 struct vcpu *v = current;
keir@19502 1419 union {
keir@19502 1420 struct xen_mc_fetch *nat;
keir@19502 1421 struct compat_mc_fetch *cmp;
keir@19502 1422 } mc_fetch;
keir@19502 1423 union {
keir@19502 1424 struct xen_mc_physcpuinfo *nat;
keir@19502 1425 struct compat_mc_physcpuinfo *cmp;
keir@19502 1426 } mc_physcpuinfo;
keir@19405 1427 uint32_t flags, cmdflags;
keir@19225 1428 int nlcpu;
keir@19225 1429 xen_mc_logical_cpu_t *log_cpus = NULL;
keir@19405 1430 mctelem_cookie_t mctc;
keir@19405 1431 mctelem_class_t which;
keir@19406 1432 unsigned int target;
keir@19406 1433 struct xen_mc_msrinject *mc_msrinject;
keir@19406 1434 struct xen_mc_mceinject *mc_mceinject;
keir@18006 1435
keir@19558 1436 if (!IS_PRIV(v->domain) )
keir@19558 1437 return x86_mcerr(NULL, -EPERM);
keir@19558 1438
keir@18006 1439 if ( copy_from_guest(op, u_xen_mc, 1) )
keir@19405 1440 return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT);
keir@18006 1441
keir@18006 1442 if ( op->interface_version != XEN_MCA_INTERFACE_VERSION )
keir@19405 1443 return x86_mcerr("do_mca: interface version mismatch", -EACCES);
keir@18006 1444
keir@19405 1445 switch (op->cmd) {
keir@18006 1446 case XEN_MC_fetch:
keir@19502 1447 mc_fetch.nat = &op->u.mc_fetch;
keir@19502 1448 cmdflags = mc_fetch.nat->flags;
keir@18006 1449
keir@19405 1450 switch (cmdflags & (XEN_MC_NONURGENT | XEN_MC_URGENT)) {
keir@19405 1451 case XEN_MC_NONURGENT:
keir@19405 1452 which = MC_NONURGENT;
keir@18006 1453 break;
keir@18006 1454
keir@19405 1455 case XEN_MC_URGENT:
keir@19405 1456 which = MC_URGENT;
keir@18006 1457 break;
keir@19405 1458
keir@18006 1459 default:
keir@19405 1460 return x86_mcerr("do_mca fetch: bad cmdflags", -EINVAL);
keir@18006 1461 }
keir@18006 1462
keir@18006 1463 flags = XEN_MC_OK;
keir@18006 1464
keir@19405 1465 if (cmdflags & XEN_MC_ACK) {
keir@19502 1466 mctelem_cookie_t cookie = ID2COOKIE(mc_fetch.nat->fetch_id);
keir@19405 1467 mctelem_ack(which, cookie);
keir@18006 1468 } else {
keir@19502 1469 if (!is_pv_32on64_vcpu(v)
keir@19502 1470 ? guest_handle_is_null(mc_fetch.nat->data)
keir@19502 1471 : compat_handle_is_null(mc_fetch.cmp->data))
keir@19405 1472 return x86_mcerr("do_mca fetch: guest buffer "
keir@19405 1473 "invalid", -EINVAL);
keir@19405 1474
keir@19405 1475 if ((mctc = mctelem_consume_oldest_begin(which))) {
keir@19405 1476 struct mc_info *mcip = mctelem_dataptr(mctc);
keir@19502 1477 if (!is_pv_32on64_vcpu(v)
keir@19502 1478 ? copy_to_guest(mc_fetch.nat->data, mcip, 1)
keir@19502 1479 : copy_to_compat(mc_fetch.cmp->data,
keir@19502 1480 mcip, 1)) {
keir@19405 1481 ret = -EFAULT;
keir@19405 1482 flags |= XEN_MC_FETCHFAILED;
keir@19502 1483 mc_fetch.nat->fetch_id = 0;
keir@19405 1484 } else {
keir@19502 1485 mc_fetch.nat->fetch_id = COOKIE2ID(mctc);
keir@19405 1486 }
keir@19405 1487 mctelem_consume_oldest_end(mctc);
keir@18006 1488 } else {
keir@19405 1489 /* There is no data */
keir@19405 1490 flags |= XEN_MC_NODATA;
keir@19502 1491 mc_fetch.nat->fetch_id = 0;
keir@18006 1492 }
keir@19405 1493
keir@19502 1494 mc_fetch.nat->flags = flags;
keir@19405 1495 if (copy_to_guest(u_xen_mc, op, 1) != 0)
keir@19405 1496 ret = -EFAULT;
keir@18006 1497 }
keir@18006 1498
keir@18006 1499 break;
keir@18006 1500
keir@18006 1501 case XEN_MC_notifydomain:
keir@19405 1502 return x86_mcerr("do_mca notify unsupported", -EINVAL);
keir@18006 1503
keir@19405 1504 case XEN_MC_physcpuinfo:
keir@19502 1505 mc_physcpuinfo.nat = &op->u.mc_physcpuinfo;
keir@19405 1506 nlcpu = num_online_cpus();
keir@18006 1507
keir@19502 1508 if (!is_pv_32on64_vcpu(v)
keir@19502 1509 ? !guest_handle_is_null(mc_physcpuinfo.nat->info)
keir@19502 1510 : !compat_handle_is_null(mc_physcpuinfo.cmp->info)) {
keir@19502 1511 if (mc_physcpuinfo.nat->ncpus <= 0)
keir@19405 1512 return x86_mcerr("do_mca cpuinfo: ncpus <= 0",
keir@19405 1513 -EINVAL);
keir@19502 1514 nlcpu = min(nlcpu, (int)mc_physcpuinfo.nat->ncpus);
keir@19405 1515 log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
keir@19405 1516 if (log_cpus == NULL)
keir@19405 1517 return x86_mcerr("do_mca cpuinfo", -ENOMEM);
keir@18006 1518
keir@19690 1519 if (on_each_cpu(do_mc_get_cpu_info, log_cpus, 1)) {
keir@19405 1520 xfree(log_cpus);
keir@19405 1521 return x86_mcerr("do_mca cpuinfo", -EIO);
keir@19405 1522 }
keir@19502 1523 if (!is_pv_32on64_vcpu(v)
keir@19502 1524 ? copy_to_guest(mc_physcpuinfo.nat->info,
keir@19502 1525 log_cpus, nlcpu)
keir@19502 1526 : copy_to_compat(mc_physcpuinfo.cmp->info,
keir@19502 1527 log_cpus, nlcpu))
keir@19405 1528 ret = -EFAULT;
keir@19405 1529 xfree(log_cpus);
keir@19405 1530 }
keir@19502 1531
keir@19502 1532 mc_physcpuinfo.nat->ncpus = nlcpu;
keir@19502 1533
keir@19502 1534 if (copy_to_guest(u_xen_mc, op, 1))
keir@19502 1535 return x86_mcerr("do_mca cpuinfo", -EFAULT);
keir@19502 1536
keir@18006 1537 break;
keir@19225 1538
keir@19406 1539 case XEN_MC_msrinject:
keir@19406 1540 if (nr_mce_banks == 0)
keir@19406 1541 return x86_mcerr("do_mca inject", -ENODEV);
keir@19406 1542
keir@19406 1543 mc_msrinject = &op->u.mc_msrinject;
keir@19406 1544 target = mc_msrinject->mcinj_cpunr;
keir@19406 1545
keir@19406 1546 if (target >= NR_CPUS)
keir@19406 1547 return x86_mcerr("do_mca inject: bad target", -EINVAL);
keir@19406 1548
keir@19406 1549 if (!cpu_isset(target, cpu_online_map))
keir@19406 1550 return x86_mcerr("do_mca inject: target offline",
keir@19406 1551 -EINVAL);
keir@19406 1552
keir@19406 1553 if (mc_msrinject->mcinj_count == 0)
keir@19406 1554 return 0;
keir@19406 1555
keir@19406 1556 if (!x86_mc_msrinject_verify(mc_msrinject))
keir@19406 1557 return x86_mcerr("do_mca inject: illegal MSR", -EINVAL);
keir@19406 1558
keir@19406 1559 add_taint(TAINT_ERROR_INJECT);
keir@19406 1560
keir@19689 1561 on_selected_cpus(cpumask_of(target), x86_mc_msrinject,
keir@19690 1562 mc_msrinject, 1);
keir@19406 1563
keir@19406 1564 break;
keir@19406 1565
keir@19406 1566 case XEN_MC_mceinject:
keir@19406 1567 if (nr_mce_banks == 0)
keir@19406 1568 return x86_mcerr("do_mca #MC", -ENODEV);
keir@19406 1569
keir@19406 1570 mc_mceinject = &op->u.mc_mceinject;
keir@19406 1571 target = mc_mceinject->mceinj_cpunr;
keir@19406 1572
keir@19406 1573 if (target >= NR_CPUS)
keir@19406 1574 return x86_mcerr("do_mca #MC: bad target", -EINVAL);
keir@20911 1575
keir@19406 1576 if (!cpu_isset(target, cpu_online_map))
keir@19406 1577 return x86_mcerr("do_mca #MC: target offline", -EINVAL);
keir@19406 1578
keir@19406 1579 add_taint(TAINT_ERROR_INJECT);
keir@19406 1580
keir@20911 1581 if ( mce_broadcast )
keir@20911 1582 on_each_cpu(x86_mc_mceinject, mc_mceinject, 0);
keir@20911 1583 else
keir@20911 1584 on_selected_cpus(cpumask_of(target), x86_mc_mceinject,
keir@20911 1585 mc_mceinject, 1);
keir@19406 1586 break;
keir@19406 1587
keir@19405 1588 default:
keir@19405 1589 return x86_mcerr("do_mca: bad command", -EINVAL);
keir@18006 1590 }
keir@18006 1591
keir@18006 1592 return ret;
keir@18006 1593 }
keir@19539 1594 void set_poll_bankmask(struct cpuinfo_x86 *c)
keir@19539 1595 {
keir@18934 1596
keir@19539 1597 if (cmci_support && !mce_disabled) {
keir@19539 1598 memcpy(&(__get_cpu_var(poll_bankmask)),
keir@19539 1599 &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t));
keir@19539 1600 }
keir@19539 1601 else {
keir@19539 1602 memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, sizeof(cpu_banks_t));
keir@19539 1603 if (mce_firstbank(c))
keir@19539 1604 clear_bit(0, get_cpu_var(poll_bankmask));
keir@19539 1605 }
keir@19539 1606 }
keir@18934 1607 void mc_panic(char *s)
keir@18934 1608 {
keir@19563 1609 is_mc_panic = 1;
keir@20912 1610 console_force_unlock();
keir@18934 1611 printk("Fatal machine check: %s\n", s);
keir@18934 1612 printk("\n"
keir@18934 1613 "****************************************\n"
keir@18934 1614 "\n"
keir@18934 1615 " The processor has reported a hardware error which cannot\n"
keir@18934 1616 " be recovered from. Xen will now reboot the machine.\n");
keir@18934 1617 panic("HARDWARE ERROR");
keir@18934 1618 }