debuggers.hg

annotate xen/arch/x86/cpu/mcheck/mce_intel.c @ 20963:da7ae6d8838a

x86: MCE fixes

- fill_vmsr_data() leaked a domain reference; since the caller already
obtained one, there's no need to obtain another one here
- intel_UCR_handler() could call put_domain() with a NULL pointer
- mcheck_mca_logout() updated a local data structure that wasn't used
after the update

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 10 09:18:11 2010 +0000 (2010-02-10)
parents 5b895c3f4386
children 50ea24db1f88
rev   line source
keir@18976 1 #include <xen/init.h>
keir@18976 2 #include <xen/types.h>
keir@18976 3 #include <xen/irq.h>
keir@18976 4 #include <xen/event.h>
keir@18976 5 #include <xen/kernel.h>
keir@19460 6 #include <xen/delay.h>
keir@18976 7 #include <xen/smp.h>
keir@19781 8 #include <xen/mm.h>
keir@18976 9 #include <asm/processor.h>
keir@19781 10 #include <public/sysctl.h>
keir@18976 11 #include <asm/system.h>
keir@18976 12 #include <asm/msr.h>
keir@19909 13 #include <asm/p2m.h>
keir@18976 14 #include "mce.h"
keir@18976 15 #include "x86_mca.h"
keir@18976 16
keir@18976 17 DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
keir@19539 18 DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
keir@19539 19 int cmci_support = 0;
keir@19781 20 int ser_support = 0;
keir@18976 21
keir@18976 22 static int nr_intel_ext_msrs = 0;
keir@18976 23
keir@19563 24 /* Below are for MCE handling */
keir@19563 25 struct mce_softirq_barrier {
keir@19563 26 atomic_t val;
keir@19563 27 atomic_t ingen;
keir@19563 28 atomic_t outgen;
keir@19563 29 };
keir@19563 30
keir@19563 31 static struct mce_softirq_barrier mce_inside_bar, mce_severity_bar;
keir@19563 32 static struct mce_softirq_barrier mce_trap_bar;
keir@19563 33
keir@19563 34 /*
keir@19563 35 * mce_logout_lock should only be used in the trap handler,
keir@19563 36 * while MCIP has not been cleared yet in the global status
keir@19563 37 * register. Other use is not safe, since an MCE trap can
keir@19563 38 * happen at any moment, which would cause lock recursion.
keir@19563 39 */
keir@19563 40 static DEFINE_SPINLOCK(mce_logout_lock);
keir@19563 41
keir@19563 42 static atomic_t severity_cpu = ATOMIC_INIT(-1);
keir@19781 43 static atomic_t found_error = ATOMIC_INIT(0);
keir@19563 44
keir@19563 45 static void mce_barrier_enter(struct mce_softirq_barrier *);
keir@19563 46 static void mce_barrier_exit(struct mce_softirq_barrier *);
keir@19563 47
keir@18976 48 #ifdef CONFIG_X86_MCE_THERMAL
keir@18976 49 static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
keir@18977 50 {
keir@18976 51 printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
keir@18976 52 smp_processor_id());
keir@18976 53 add_taint(TAINT_MACHINE_CHECK);
keir@18976 54 }
keir@18976 55
keir@18976 56 /* P4/Xeon Thermal transition interrupt handler */
keir@18976 57 static void intel_thermal_interrupt(struct cpu_user_regs *regs)
keir@18976 58 {
keir@18976 59 u32 l, h;
keir@18976 60 unsigned int cpu = smp_processor_id();
keir@18976 61 static s_time_t next[NR_CPUS];
keir@18976 62
keir@18976 63 ack_APIC_irq();
keir@18976 64 if (NOW() < next[cpu])
keir@18976 65 return;
keir@18976 66
keir@18976 67 next[cpu] = NOW() + MILLISECS(5000);
keir@18976 68 rdmsr(MSR_IA32_THERM_STATUS, l, h);
keir@18976 69 if (l & 0x1) {
keir@18976 70 printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
keir@18976 71 printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
keir@18976 72 cpu);
keir@18976 73 add_taint(TAINT_MACHINE_CHECK);
keir@18976 74 } else {
keir@18976 75 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
keir@18976 76 }
keir@18976 77 }
keir@18976 78
keir@18976 79 /* Thermal interrupt handler for this CPU setup */
keir@18976 80 static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs)
keir@18976 81 = unexpected_thermal_interrupt;
keir@18976 82
keir@18976 83 fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
keir@18976 84 {
keir@20111 85 struct cpu_user_regs *old_regs = set_irq_regs(regs);
keir@18976 86 irq_enter();
keir@18976 87 vendor_thermal_interrupt(regs);
keir@18976 88 irq_exit();
keir@20111 89 set_irq_regs(old_regs);
keir@18976 90 }
keir@18976 91
keir@18976 92 /* P4/Xeon Thermal regulation detect and init */
keir@18976 93 static void intel_init_thermal(struct cpuinfo_x86 *c)
keir@18976 94 {
keir@18976 95 u32 l, h;
keir@18976 96 int tm2 = 0;
keir@18976 97 unsigned int cpu = smp_processor_id();
keir@18976 98
keir@18976 99 /* Thermal monitoring */
keir@18976 100 if (!cpu_has(c, X86_FEATURE_ACPI))
keir@18977 101 return; /* -ENODEV */
keir@18976 102
keir@18976 103 /* Clock modulation */
keir@18976 104 if (!cpu_has(c, X86_FEATURE_ACC))
keir@18977 105 return; /* -ENODEV */
keir@18976 106
keir@18976 107 /* first check if its enabled already, in which case there might
keir@18976 108 * be some SMM goo which handles it, so we can't even put a handler
keir@18976 109 * since it might be delivered via SMI already -zwanem.
keir@18976 110 */
keir@18976 111 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
keir@18976 112 h = apic_read(APIC_LVTTHMR);
keir@18976 113 if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
keir@18976 114 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
keir@18976 115 return; /* -EBUSY */
keir@18976 116 }
keir@18976 117
keir@18976 118 if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
keir@18976 119 tm2 = 1;
keir@18976 120
keir@18977 121 /* check whether a vector already exists, temporarily masked? */
keir@18976 122 if (h & APIC_VECTOR_MASK) {
keir@18976 123 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n",
keir@18976 124 cpu, (h & APIC_VECTOR_MASK));
keir@18976 125 return; /* -EBUSY */
keir@18976 126 }
keir@18976 127
keir@18976 128 /* The temperature transition interrupt handler setup */
keir@18977 129 h = THERMAL_APIC_VECTOR; /* our delivery vector */
keir@18977 130 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
keir@18976 131 apic_write_around(APIC_LVTTHMR, h);
keir@18976 132
keir@18976 133 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
keir@18976 134 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
keir@18976 135
keir@18976 136 /* ok we're good to go... */
keir@18976 137 vendor_thermal_interrupt = intel_thermal_interrupt;
keir@18976 138
keir@18976 139 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
keir@18976 140 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
keir@18976 141
keir@18976 142 l = apic_read (APIC_LVTTHMR);
keir@18976 143 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
keir@18976 144 printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
keir@18976 145 cpu, tm2 ? "TM2" : "TM1");
keir@18976 146 return;
keir@18976 147 }
keir@18976 148 #endif /* CONFIG_X86_MCE_THERMAL */
keir@18976 149
keir@19503 150 static inline void intel_get_extended_msr(struct mcinfo_extended *ext, u32 msr)
keir@19503 151 {
keir@19503 152 if ( ext->mc_msrs < ARRAY_SIZE(ext->mc_msr)
keir@19503 153 && msr < MSR_IA32_MCG_EAX + nr_intel_ext_msrs ) {
keir@19503 154 ext->mc_msr[ext->mc_msrs].reg = msr;
keir@19563 155 mca_rdmsrl(msr, ext->mc_msr[ext->mc_msrs].value);
keir@19503 156 ++ext->mc_msrs;
keir@19503 157 }
keir@19503 158 }
keir@19503 159
keir@19405 160 static enum mca_extinfo
keir@19405 161 intel_get_extended_msrs(struct mc_info *mci, uint16_t bank, uint64_t status)
keir@18976 162 {
keir@19405 163 struct mcinfo_extended mc_ext;
keir@19405 164
keir@19405 165 if (mci == NULL || nr_intel_ext_msrs == 0 || !(status & MCG_STATUS_EIPV))
keir@19405 166 return MCA_EXTINFO_IGNORED;
keir@18976 167
keir@19137 168 /* this function will called when CAP(9).MCG_EXT_P = 1 */
keir@19405 169 memset(&mc_ext, 0, sizeof(struct mcinfo_extended));
keir@19405 170 mc_ext.common.type = MC_TYPE_EXTENDED;
keir@19405 171 mc_ext.common.size = sizeof(mc_ext);
keir@18976 172
keir@19503 173 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EAX);
keir@19503 174 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EBX);
keir@19503 175 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ECX);
keir@19503 176 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EDX);
keir@19503 177 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ESI);
keir@19503 178 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EDI);
keir@19503 179 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EBP);
keir@19503 180 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ESP);
keir@19503 181 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EFLAGS);
keir@19503 182 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EIP);
keir@19503 183 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_MISC);
keir@18976 184
keir@19503 185 #ifdef __x86_64__
keir@19503 186 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R8);
keir@19503 187 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R9);
keir@19503 188 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R10);
keir@19503 189 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R11);
keir@19503 190 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R12);
keir@19503 191 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R13);
keir@19503 192 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R14);
keir@19503 193 intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R15);
keir@19503 194 #endif
keir@19405 195
keir@19405 196 x86_mcinfo_add(mci, &mc_ext);
keir@19405 197
keir@19405 198 return MCA_EXTINFO_GLOBAL;
keir@18976 199 }
keir@18976 200
keir@19460 201 /* This node list records errors impacting a domain. when one
keir@19460 202 * MCE# happens, one error bank impacts a domain. This error node
keir@19460 203 * will be inserted to the tail of the per_dom data for vMCE# MSR
keir@19460 204 * virtualization. When one vMCE# injection is finished processing
keir@19460 205 * processed by guest, the corresponding node will be deleted.
keir@19460 206 * This node list is for GUEST vMCE# MSRS virtualization.
keir@19460 207 */
keir@19460 208 static struct bank_entry* alloc_bank_entry(void) {
keir@19460 209 struct bank_entry *entry;
keir@19460 210
keir@19460 211 entry = xmalloc(struct bank_entry);
keir@19460 212 if (!entry) {
keir@19460 213 printk(KERN_ERR "MCE: malloc bank_entry failed\n");
keir@19460 214 return NULL;
keir@19460 215 }
keir@19460 216 memset(entry, 0x0, sizeof(entry));
keir@19460 217 INIT_LIST_HEAD(&entry->list);
keir@19460 218 return entry;
keir@19460 219 }
keir@19460 220
keir@19460 221 /* Fill error bank info for #vMCE injection and GUEST vMCE#
keir@19460 222 * MSR virtualization data
keir@19460 223 * 1) Log down how many nr_injections of the impacted.
keir@19460 224 * 2) Copy MCE# error bank to impacted DOM node list,
keir@19460 225 for vMCE# MSRs virtualization
keir@19460 226 */
keir@19460 227
keir@20963 228 static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
keir@19460 229 uint64_t gstatus) {
keir@19460 230 struct bank_entry *entry;
keir@19460 231
keir@19460 232 /* This error bank impacts one domain, we need to fill domain related
keir@19460 233 * data for vMCE MSRs virtualization and vMCE# injection */
keir@19460 234 if (mc_bank->mc_domid != (uint16_t)~0) {
keir@19909 235 /* For HVM guest, Only when first vMCE is consumed by HVM guest successfully,
keir@19909 236 * will we generete another node and inject another vMCE
keir@19909 237 */
keir@19909 238 if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) )
keir@19909 239 {
keir@20288 240 mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous"
keir@19909 241 " vMCE yet!\n");
keir@19909 242 return -1;
keir@19909 243 }
keir@19460 244 entry = alloc_bank_entry();
keir@19563 245 if (entry == NULL)
keir@19909 246 return -1;
keir@19909 247
keir@19460 248 entry->mci_status = mc_bank->mc_status;
keir@19460 249 entry->mci_addr = mc_bank->mc_addr;
keir@19460 250 entry->mci_misc = mc_bank->mc_misc;
keir@19460 251 entry->bank = mc_bank->mc_bank;
keir@19460 252
keir@19909 253 spin_lock(&d->arch.vmca_msrs.lock);
keir@19460 254 /* New error Node, insert to the tail of the per_dom data */
keir@19460 255 list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header);
keir@19460 256 /* Fill MSR global status */
keir@19460 257 d->arch.vmca_msrs.mcg_status = gstatus;
keir@19460 258 /* New node impact the domain, need another vMCE# injection*/
keir@19460 259 d->arch.vmca_msrs.nr_injection++;
keir@19909 260 spin_unlock(&d->arch.vmca_msrs.lock);
keir@19909 261
keir@20288 262 mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
keir@19909 263 "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
keir@19909 264 mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr,
keir@19909 265 mc_bank->mc_domid);
keir@19909 266 }
keir@19909 267 return 0;
keir@19909 268 }
keir@19909 269
keir@19909 270 static int inject_mce(struct domain *d)
keir@19909 271 {
keir@19909 272 int cpu = smp_processor_id();
keir@19909 273 cpumask_t affinity;
keir@19909 274
keir@19909 275 /* PV guest and HVM guest have different vMCE# injection
keir@19909 276 * methods*/
keir@19460 277
keir@19909 278 if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
keir@19909 279 {
keir@19909 280 if (d->is_hvm)
keir@19909 281 {
keir@20288 282 mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n",
keir@19909 283 d->domain_id);
keir@19909 284 vcpu_kick(d->vcpu[0]);
keir@19909 285 }
keir@19909 286 /* PV guest including DOM0 */
keir@19909 287 else
keir@19909 288 {
keir@20288 289 mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n",
keir@19909 290 d->domain_id);
keir@19909 291 if (guest_has_trap_callback
keir@19909 292 (d, 0, TRAP_machine_check))
keir@19909 293 {
keir@19909 294 d->vcpu[0]->cpu_affinity_tmp =
keir@19909 295 d->vcpu[0]->cpu_affinity;
keir@19909 296 cpus_clear(affinity);
keir@19909 297 cpu_set(cpu, affinity);
keir@20288 298 mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n", cpu,
keir@19909 299 d->vcpu[0]->processor);
keir@19909 300 vcpu_set_affinity(d->vcpu[0], &affinity);
keir@19909 301 vcpu_kick(d->vcpu[0]);
keir@19909 302 }
keir@19909 303 else
keir@19909 304 {
keir@20288 305 mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE handler\n");
keir@19909 306 domain_crash(d);
keir@19909 307 }
keir@19909 308 }
keir@19909 309 }
keir@19909 310 else {
keir@19909 311 /* new vMCE comes while first one has not been injected yet,
keir@19909 312 * in this case, inject fail. [We can't lose this vMCE for
keir@19909 313 * the mce node's consistency].
keir@19909 314 */
keir@20288 315 mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected "
keir@19909 316 " to this DOM%d!\n", d->domain_id);
keir@19909 317 return -1;
keir@19460 318 }
keir@19460 319 return 0;
keir@19460 320 }
keir@19460 321
keir@20300 322 static void intel_UCR_handler(struct mcinfo_bank *bank,
keir@19781 323 struct mcinfo_global *global,
keir@19781 324 struct mcinfo_extended *extension,
keir@19781 325 struct mca_handle_result *result)
keir@19781 326 {
keir@19781 327 struct domain *d;
keir@19909 328 unsigned long mfn, gfn;
keir@19781 329 uint32_t status;
keir@19781 330
keir@20288 331 mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
keir@19781 332 result->result = MCA_NEED_RESET;
keir@19781 333 if (bank->mc_addr != 0) {
keir@19781 334 mfn = bank->mc_addr >> PAGE_SHIFT;
keir@19781 335 if (!offline_page(mfn, 1, &status)) {
keir@19909 336 /* This is free page */
keir@19781 337 if (status & PG_OFFLINE_OFFLINED)
keir@19781 338 result->result = MCA_RECOVERED;
keir@19781 339 else if (status & PG_OFFLINE_PENDING) {
keir@19781 340 /* This page has owner */
keir@19781 341 if (status & PG_OFFLINE_OWNED) {
keir@19781 342 result->result |= MCA_OWNER;
keir@19781 343 result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
keir@20288 344 mce_printk(MCE_QUIET, "MCE: This error page is ownded"
keir@19781 345 " by DOM %d\n", result->owner);
keir@19909 346 /* Fill vMCE# injection and vMCE# MSR virtualization "
keir@19909 347 * "related data */
keir@19909 348 bank->mc_domid = result->owner;
keir@20726 349 /* XXX: Cannot handle shared pages yet
keir@20726 350 * (this should identify all domains and gfn mapping to
keir@20726 351 * the mfn in question) */
keir@20726 352 BUG_ON( result->owner == DOMID_COW );
keir@19909 353 if ( result->owner != DOMID_XEN ) {
keir@20908 354
keir@19781 355 d = get_domain_by_id(result->owner);
keir@20908 356 if ( mca_ctl_conflict(bank, d) )
keir@20908 357 {
keir@20908 358 /* Guest has different MCE ctl with hypervisor */
keir@20963 359 if ( d )
keir@20963 360 put_domain(d);
keir@20908 361 return;
keir@20908 362 }
keir@20908 363
keir@20963 364 ASSERT(d);
keir@19909 365 gfn =
keir@20959 366 get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
keir@20910 367 bank->mc_addr = gfn << PAGE_SHIFT |
keir@20910 368 (bank->mc_addr & (PAGE_SIZE -1 ));
keir@20963 369 if ( fill_vmsr_data(bank, d,
keir@20963 370 global->mc_gstatus) == -1 )
keir@19909 371 {
keir@20288 372 mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
keir@19909 373 "failed\n", result->owner);
keir@20909 374 put_domain(d);
keir@19909 375 domain_crash(d);
keir@19909 376 return;
keir@19909 377 }
keir@19909 378 /* We will inject vMCE to DOMU*/
keir@19909 379 if ( inject_mce(d) < 0 )
keir@19909 380 {
keir@20288 381 mce_printk(MCE_QUIET, "inject vMCE to DOM%d"
keir@19909 382 " failed\n", d->domain_id);
keir@20909 383 put_domain(d);
keir@19909 384 domain_crash(d);
keir@19909 385 return;
keir@19909 386 }
keir@19909 387 /* Impacted domain go on with domain's recovery job
keir@19909 388 * if the domain has its own MCA handler.
keir@19909 389 * For xen, it has contained the error and finished
keir@19909 390 * its own recovery job.
keir@19909 391 */
keir@19781 392 result->result = MCA_RECOVERED;
keir@20909 393 put_domain(d);
keir@19781 394 }
keir@19781 395 }
keir@19781 396 }
keir@19781 397 }
keir@19781 398 }
keir@19781 399 }
keir@19781 400
keir@19781 401 #define INTEL_MAX_RECOVERY 2
keir@19781 402 struct mca_error_handler intel_recovery_handler[INTEL_MAX_RECOVERY] =
keir@19781 403 {{0x017A, intel_UCR_handler}, {0x00C0, intel_UCR_handler}};
keir@19781 404
keir@19563 405 /*
keir@19563 406 * Called from mctelem_process_deferred. Return 1 if the telemetry
keir@19563 407 * should be committed for dom0 consumption, 0 if it should be
keir@19563 408 * dismissed.
keir@19563 409 */
keir@19909 410 static int mce_action(mctelem_cookie_t mctc)
keir@19563 411 {
keir@19460 412 struct mc_info *local_mi;
keir@19781 413 uint32_t i;
keir@19460 414 struct mcinfo_common *mic = NULL;
keir@19460 415 struct mcinfo_global *mc_global;
keir@19460 416 struct mcinfo_bank *mc_bank;
keir@19781 417 struct mca_handle_result mca_res;
keir@19460 418
keir@19563 419 local_mi = (struct mc_info*)mctelem_dataptr(mctc);
keir@19563 420 x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL);
keir@19563 421 if (mic == NULL) {
keir@19563 422 printk(KERN_ERR "MCE: get local buffer entry failed\n ");
keir@19563 423 return 0;
keir@19460 424 }
keir@19460 425
keir@19563 426 mc_global = (struct mcinfo_global *)mic;
keir@19563 427
keir@19563 428 /* Processing bank information */
keir@19563 429 x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
keir@19563 430
keir@19563 431 for ( ; mic && mic->size; mic = x86_mcinfo_next(mic) ) {
keir@19563 432 if (mic->type != MC_TYPE_BANK) {
keir@19563 433 continue;
keir@19563 434 }
keir@19563 435 mc_bank = (struct mcinfo_bank*)mic;
keir@19563 436
keir@19781 437 /* TODO: Add recovery actions here, such as page-offline, etc */
keir@19781 438 memset(&mca_res, 0x0f, sizeof(mca_res));
keir@19781 439 for ( i = 0; i < INTEL_MAX_RECOVERY; i++ ) {
keir@20281 440 if ( ((mc_bank->mc_status & 0xffff) ==
keir@20281 441 intel_recovery_handler[i].mca_code) ||
keir@20281 442 ((mc_bank->mc_status & 0xfff0) ==
keir@20281 443 intel_recovery_handler[i].mca_code)) {
keir@19781 444 /* For SRAR, OVER = 1 should have caused reset
keir@19781 445 * For SRAO, OVER = 1 skip recovery action, continue execution
keir@19781 446 */
keir@19781 447 if (!(mc_bank->mc_status & MCi_STATUS_OVER))
keir@19781 448 intel_recovery_handler[i].recovery_handler
keir@19781 449 (mc_bank, mc_global, NULL, &mca_res);
keir@19781 450 else {
keir@20281 451 if (!(mc_global->mc_gstatus & MCG_STATUS_RIPV))
keir@19781 452 mca_res.result = MCA_NEED_RESET;
keir@19781 453 else
keir@20281 454 mca_res.result = MCA_NO_ACTION;
keir@19781 455 }
keir@19781 456 if (mca_res.result & MCA_OWNER)
keir@19781 457 mc_bank->mc_domid = mca_res.owner;
keir@19781 458 if (mca_res.result == MCA_NEED_RESET)
keir@19781 459 /* DOMID_XEN*/
keir@19781 460 mc_panic("MCE: Software recovery failed for the UCR "
keir@19781 461 "error\n");
keir@19781 462 else if (mca_res.result == MCA_RECOVERED)
keir@20288 463 mce_printk(MCE_VERBOSE, "MCE: The UCR error is"
keir@20288 464 "successfully recovered by software!\n");
keir@19781 465 else if (mca_res.result == MCA_NO_ACTION)
keir@20288 466 mce_printk(MCE_VERBOSE, "MCE: Overwrite SRAO error can't"
keir@20288 467 "do recover action, RIPV=1, let it be.\n");
keir@19781 468 break;
keir@19781 469 }
keir@19781 470 }
keir@20281 471 /* For SRAR, no defined recovery action should have caused reset
keir@20281 472 * in MCA Handler
keir@20281 473 */
keir@20281 474 if ( i >= INTEL_MAX_RECOVERY )
keir@20288 475 mce_printk(MCE_VERBOSE, "MCE: No software recovery action"
keir@20288 476 " found for this SRAO error\n");
keir@20281 477
keir@19563 478 }
keir@19563 479 return 1;
keir@19460 480 }
keir@19460 481
keir@19460 482 /* Softirq Handler for this MCE# processing */
keir@19460 483 static void mce_softirq(void)
keir@19460 484 {
keir@19460 485 int cpu = smp_processor_id();
keir@19563 486 unsigned int workcpu;
keir@19460 487
keir@20288 488 mce_printk(MCE_VERBOSE, "CPU%d enter softirq\n", cpu);
keir@19563 489
keir@19563 490 mce_barrier_enter(&mce_inside_bar);
keir@19563 491
keir@19563 492 /*
keir@19563 493 * Everybody is here. Now let's see who gets to do the
keir@19563 494 * recovery work. Right now we just see if there's a CPU
keir@19563 495 * that did not have any problems, and pick that one.
keir@19563 496 *
keir@19563 497 * First, just set a default value: the last CPU who reaches this
keir@19563 498 * will overwrite the value and become the default.
keir@19563 499 */
keir@19563 500
keir@19563 501 atomic_set(&severity_cpu, cpu);
keir@19563 502
keir@19563 503 mce_barrier_enter(&mce_severity_bar);
keir@19563 504 if (!mctelem_has_deferred(cpu))
keir@19563 505 atomic_set(&severity_cpu, cpu);
keir@19563 506 mce_barrier_exit(&mce_severity_bar);
keir@19563 507
keir@19460 508 /* We choose severity_cpu for further processing */
keir@19563 509 if (atomic_read(&severity_cpu) == cpu) {
keir@19563 510
keir@20288 511 mce_printk(MCE_VERBOSE, "CPU%d handling errors\n", cpu);
keir@19460 512
keir@19460 513 /* Step1: Fill DOM0 LOG buffer, vMCE injection buffer and
keir@19460 514 * vMCE MSRs virtualization buffer
keir@19460 515 */
keir@19563 516 for_each_online_cpu(workcpu) {
keir@19909 517 mctelem_process_deferred(workcpu, mce_action);
keir@19563 518 }
keir@19460 519
keir@19460 520 /* Step2: Send Log to DOM0 through vIRQ */
keir@19460 521 if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
keir@20288 522 mce_printk(MCE_VERBOSE, "MCE: send MCE# to DOM0 through virq\n");
keir@19460 523 send_guest_global_virq(dom0, VIRQ_MCA);
keir@19460 524 }
keir@19460 525 }
keir@19460 526
keir@19563 527 mce_barrier_exit(&mce_inside_bar);
keir@19460 528 }
keir@19460 529
keir@19460 530 /* Machine Check owner judge algorithm:
keir@19460 531 * When error happens, all cpus serially read its msr banks.
keir@19460 532 * The first CPU who fetches the error bank's info will clear
keir@19460 533 * this bank. Later readers can't get any infor again.
keir@19460 534 * The first CPU is the actual mce_owner
keir@19460 535 *
keir@19460 536 * For Fatal (pcc=1) error, it might cause machine crash
keir@19460 537 * before we're able to log. For avoiding log missing, we adopt two
keir@19460 538 * round scanning:
keir@19460 539 * Round1: simply scan. If found pcc = 1 or ripv = 0, simply reset.
keir@19460 540 * All MCE banks are sticky, when boot up, MCE polling mechanism
keir@19460 541 * will help to collect and log those MCE errors.
keir@19460 542 * Round2: Do all MCE processing logic as normal.
keir@19460 543 */
keir@19460 544
keir@19563 545 static void mce_panic_check(void)
keir@19563 546 {
keir@19563 547 if (is_mc_panic) {
keir@19563 548 local_irq_enable();
keir@19563 549 for ( ; ; )
keir@19563 550 halt();
keir@19563 551 }
keir@19563 552 }
keir@19563 553
keir@19563 554 /*
keir@19563 555 * Initialize a barrier. Just set it to 0.
keir@19460 556 */
keir@19563 557 static void mce_barrier_init(struct mce_softirq_barrier *bar)
keir@19460 558 {
keir@19563 559 atomic_set(&bar->val, 0);
keir@19563 560 atomic_set(&bar->ingen, 0);
keir@19563 561 atomic_set(&bar->outgen, 0);
keir@19563 562 }
keir@19460 563
keir@19563 564 #if 0
keir@19563 565 /*
keir@19563 566 * This function will need to be used when offlining a CPU in the
keir@19563 567 * recovery actions.
keir@19563 568 *
keir@19563 569 * Decrement a barrier only. Needed for cases where the CPU
keir@19563 570 * in question can't do it itself (e.g. it is being offlined).
keir@19563 571 */
keir@19563 572 static void mce_barrier_dec(struct mce_softirq_barrier *bar)
keir@19563 573 {
keir@19563 574 atomic_inc(&bar->outgen);
keir@19563 575 wmb();
keir@19563 576 atomic_dec(&bar->val);
keir@19563 577 }
keir@19563 578 #endif
keir@19460 579
keir@19563 580 static void mce_spin_lock(spinlock_t *lk)
keir@19563 581 {
keir@19563 582 while (!spin_trylock(lk)) {
keir@19563 583 cpu_relax();
keir@19563 584 mce_panic_check();
keir@19563 585 }
keir@19563 586 }
keir@19460 587
keir@19563 588 static void mce_spin_unlock(spinlock_t *lk)
keir@19563 589 {
keir@19563 590 spin_unlock(lk);
keir@19460 591 }
keir@19460 592
keir@19563 593 /*
keir@19563 594 * Increment the generation number and the value. The generation number
keir@19563 595 * is incremented when entering a barrier. This way, it can be checked
keir@19563 596 * on exit if a CPU is trying to re-enter the barrier. This can happen
keir@19563 597 * if the first CPU to make it out immediately exits or re-enters, while
keir@19563 598 * another CPU that is still in the loop becomes otherwise occupied
keir@19563 599 * (e.g. it needs to service an interrupt, etc), missing the value
keir@19563 600 * it's waiting for.
keir@19563 601 *
keir@19563 602 * These barrier functions should always be paired, so that the
keir@19563 603 * counter value will reach 0 again after all CPUs have exited.
keir@19563 604 */
keir@19563 605 static void mce_barrier_enter(struct mce_softirq_barrier *bar)
keir@19563 606 {
keir@19563 607 int gen;
keir@19563 608
keir@20911 609 if (!mce_broadcast)
keir@20911 610 return;
keir@19563 611 atomic_inc(&bar->ingen);
keir@19563 612 gen = atomic_read(&bar->outgen);
keir@19563 613 mb();
keir@19563 614 atomic_inc(&bar->val);
keir@19563 615 while ( atomic_read(&bar->val) != num_online_cpus() &&
keir@19563 616 atomic_read(&bar->outgen) == gen) {
keir@19563 617 mb();
keir@19563 618 mce_panic_check();
keir@19563 619 }
keir@19563 620 }
keir@19563 621
keir@19563 622 static void mce_barrier_exit(struct mce_softirq_barrier *bar)
keir@19563 623 {
keir@19563 624 int gen;
keir@19563 625
keir@20911 626 if (!mce_broadcast)
keir@20911 627 return;
keir@19563 628 atomic_inc(&bar->outgen);
keir@19563 629 gen = atomic_read(&bar->ingen);
keir@19563 630 mb();
keir@19563 631 atomic_dec(&bar->val);
keir@19563 632 while ( atomic_read(&bar->val) != 0 &&
keir@19563 633 atomic_read(&bar->ingen) == gen ) {
keir@19563 634 mb();
keir@19563 635 mce_panic_check();
keir@19563 636 }
keir@19563 637 }
keir@19563 638
keir@19781 639 #if 0
keir@19563 640 static void mce_barrier(struct mce_softirq_barrier *bar)
keir@19563 641 {
keir@19563 642 mce_barrier_enter(bar);
keir@19563 643 mce_barrier_exit(bar);
keir@19563 644 }
keir@19781 645 #endif
keir@19460 646
keir@19405 647 static void intel_machine_check(struct cpu_user_regs * regs, long error_code)
keir@18976 648 {
keir@19460 649 uint64_t gstatus;
keir@19460 650 mctelem_cookie_t mctc = NULL;
keir@19460 651 struct mca_summary bs;
keir@20281 652 cpu_banks_t clear_bank;
keir@19460 653
keir@19563 654 mce_spin_lock(&mce_logout_lock);
keir@19563 655
keir@19781 656 memset( &clear_bank, 0x0, sizeof(cpu_banks_t));
keir@19781 657 mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, &clear_bank);
keir@19460 658
keir@19563 659 if (bs.errcnt) {
keir@20959 660 /* dump MCE error */
keir@20959 661 if (mctc != NULL)
keir@20959 662 x86_mcinfo_dump(mctelem_dataptr(mctc));
keir@20959 663
keir@19563 664 /*
keir@19563 665 * Uncorrected errors must be dealth with in softirq context.
keir@19563 666 */
keir@19563 667 if (bs.uc || bs.pcc) {
keir@19563 668 add_taint(TAINT_MACHINE_CHECK);
keir@19563 669 if (mctc != NULL)
keir@19563 670 mctelem_defer(mctc);
keir@19563 671 /*
keir@19781 672 * For PCC=1 and can't be recovered, context is lost, so reboot now without
keir@19781 673 * clearing the banks, and deal with the telemetry after reboot
keir@19563 674 * (the MSRs are sticky)
keir@19563 675 */
keir@19563 676 if (bs.pcc)
keir@19563 677 mc_panic("State lost due to machine check exception.\n");
keir@19781 678 if (!bs.ripv)
keir@19781 679 mc_panic("RIPV =0 can't resume execution!\n");
keir@19781 680 if (!bs.recoverable)
keir@19781 681 mc_panic("Machine check exception software recovery fail.\n");
keir@19563 682 } else {
keir@19563 683 if (mctc != NULL)
keir@19563 684 mctelem_commit(mctc);
keir@19563 685 }
keir@19781 686 atomic_set(&found_error, 1);
keir@19781 687
keir@20912 688 mce_printk(MCE_CRITICAL, "MCE: clear_bank map %lx on CPU%d\n",
keir@20281 689 *((unsigned long*)clear_bank), smp_processor_id());
keir@19781 690 mcheck_mca_clearbanks(clear_bank);
keir@19563 691 } else {
keir@19460 692 if (mctc != NULL)
keir@19460 693 mctelem_dismiss(mctc);
keir@19460 694 }
keir@19563 695 mce_spin_unlock(&mce_logout_lock);
keir@19563 696
keir@19563 697 /*
keir@19563 698 * Wait until everybody has processed the trap.
keir@19563 699 */
keir@19781 700 mce_barrier_enter(&mce_trap_bar);
keir@19781 701 /* According to latest MCA OS writer guide, if no error bank found
keir@20281 702 * on all cpus, something unexpected happening, we can't do any
keir@19781 703 * recovery job but to reset the system.
keir@19781 704 */
keir@19781 705 if (atomic_read(&found_error) == 0)
keir@19781 706 mc_panic("Unexpected condition for the MCE handler, need reset\n");
keir@19781 707 mce_barrier_exit(&mce_trap_bar);
keir@19460 708
keir@20281 709 /* Clear error finding flags after all cpus finishes above judgement */
keir@20281 710 mce_barrier_enter(&mce_trap_bar);
keir@20281 711 if (atomic_read(&found_error)) {
keir@20912 712 mce_printk(MCE_CRITICAL, "MCE: Choose one CPU "
keir@20281 713 "to clear error finding flag\n ");
keir@20281 714 atomic_set(&found_error, 0);
keir@19460 715 }
keir@20281 716 mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
keir@20281 717 if ((gstatus & MCG_STATUS_MCIP) != 0) {
keir@20912 718 mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step");
keir@20281 719 mca_wrmsrl(MSR_IA32_MCG_STATUS, gstatus & ~MCG_STATUS_MCIP);
keir@20281 720 }
keir@20281 721 mce_barrier_exit(&mce_trap_bar);
keir@19460 722
keir@19460 723 raise_softirq(MACHINE_CHECK_SOFTIRQ);
keir@18976 724 }
keir@18976 725
keir@19781 726 /* According to MCA OS writer guide, CMCI handler need to clear bank when
keir@19781 727 * 1) CE (UC = 0)
keir@19781 728 * 2) ser_support = 1, Superious error, OVER = 0, EN = 0, [UC = 1]
keir@19781 729 * 3) ser_support = 1, UCNA, OVER = 0, S = 1, AR = 0, PCC = 0, [UC = 1, EN = 1]
keir@19781 730 * MCA handler need to clear bank when
keir@19781 731 * 1) ser_support = 1, Superious error, OVER = 0, EN = 0, UC = 1
keir@19781 732 * 2) ser_support = 1, SRAR, UC = 1, OVER = 0, S = 1, AR = 1, [EN = 1]
keir@19781 733 * 3) ser_support = 1, SRAO, UC = 1, S = 1, AR = 0, [EN = 1]
keir@19781 734 */
keir@19781 735
keir@19781 736 static int intel_need_clearbank_scan(enum mca_source who, u64 status)
keir@19781 737 {
keir@19781 738 if ( who == MCA_CMCI_HANDLER) {
keir@19781 739 /* CMCI need clear bank */
keir@19781 740 if ( !(status & MCi_STATUS_UC) )
keir@19781 741 return 1;
keir@19781 742 /* Spurious need clear bank */
keir@19781 743 else if ( ser_support && !(status & MCi_STATUS_OVER)
keir@19781 744 && !(status & MCi_STATUS_EN) )
keir@19781 745 return 1;
keir@19781 746 /* UCNA OVER = 0 need clear bank */
keir@19781 747 else if ( ser_support && !(status & MCi_STATUS_OVER)
keir@19781 748 && !(status & MCi_STATUS_PCC) && !(status & MCi_STATUS_S)
keir@19781 749 && !(status & MCi_STATUS_AR))
keir@19781 750 return 1;
keir@19781 751 /* Only Log, no clear */
keir@19781 752 else return 0;
keir@19781 753 }
keir@19781 754 else if ( who == MCA_MCE_SCAN) {
keir@19781 755 /* Spurious need clear bank */
keir@19781 756 if ( ser_support && !(status & MCi_STATUS_OVER)
keir@19781 757 && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN))
keir@19781 758 return 1;
keir@19781 759 /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
keir@19781 760 else if ( ser_support && (status & MCi_STATUS_UC)
keir@19781 761 && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR )
keir@19781 762 && (status & MCi_STATUS_OVER) )
keir@19781 763 return 1;
keir@19781 764 /* SRAO need clear bank */
keir@19781 765 else if ( ser_support && !(status & MCi_STATUS_AR)
keir@19781 766 && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC))
keir@19781 767 return 1;
keir@19781 768 else
keir@19781 769 return 0;
keir@19781 770 }
keir@19781 771
keir@19781 772 return 1;
keir@19781 773 }
keir@19781 774
keir@19781 775 /* MCE continues/is recoverable when
keir@19781 776 * 1) CE UC = 0
keir@19781 777 * 2) Supious ser_support = 1, OVER = 0, En = 0 [UC = 1]
keir@19781 778 * 3) SRAR ser_support = 1, OVER = 0, PCC = 0, S = 1, AR = 1 [UC =1, EN = 1]
keir@19781 779 * 4) SRAO ser_support = 1, PCC = 0, S = 1, AR = 0, EN = 1 [UC = 1]
keir@19781 780 * 5) UCNA ser_support = 1, OVER = 0, EN = 1, PCC = 0, S = 0, AR = 0, [UC = 1]
keir@19781 781 */
keir@19781 782 static int intel_recoverable_scan(u64 status)
keir@19781 783 {
keir@19781 784
keir@19781 785 if ( !(status & MCi_STATUS_UC ) )
keir@19781 786 return 1;
keir@19781 787 else if ( ser_support && !(status & MCi_STATUS_EN)
keir@19781 788 && !(status & MCi_STATUS_OVER) )
keir@19781 789 return 1;
keir@19781 790 /* SRAR error */
keir@19781 791 else if ( ser_support && !(status & MCi_STATUS_OVER)
keir@19781 792 && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S)
keir@19781 793 && (status & MCi_STATUS_AR) ) {
keir@20288 794 mce_printk(MCE_VERBOSE, "MCE: No SRAR error defined currently.\n");
keir@19781 795 return 0;
keir@19781 796 }
keir@19781 797 /* SRAO error */
keir@19781 798 else if (ser_support && !(status & MCi_STATUS_PCC)
keir@19781 799 && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR)
keir@19781 800 && (status & MCi_STATUS_EN))
keir@19781 801 return 1;
keir@19781 802 /* UCNA error */
keir@19781 803 else if (ser_support && !(status & MCi_STATUS_OVER)
keir@19781 804 && (status & MCi_STATUS_EN) && !(status & MCi_STATUS_PCC)
keir@19781 805 && !(status & MCi_STATUS_S) && !(status & MCi_STATUS_AR))
keir@19781 806 return 1;
keir@19781 807 return 0;
keir@19781 808 }
keir@19781 809
keir@18976 810 static DEFINE_SPINLOCK(cmci_discover_lock);
keir@18976 811
keir@18976 812 /*
keir@18976 813 * Discover bank sharing using the algorithm recommended in the SDM.
keir@18976 814 */
keir@18976 815 static int do_cmci_discover(int i)
keir@18976 816 {
keir@18976 817 unsigned msr = MSR_IA32_MC0_CTL2 + i;
keir@18976 818 u64 val;
keir@18976 819
keir@18976 820 rdmsrl(msr, val);
keir@18976 821 /* Some other CPU already owns this bank. */
keir@18976 822 if (val & CMCI_EN) {
keir@18977 823 clear_bit(i, __get_cpu_var(mce_banks_owned));
keir@18977 824 goto out;
keir@18976 825 }
keir@18976 826 wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
keir@18976 827 rdmsrl(msr, val);
keir@18976 828
keir@18976 829 if (!(val & CMCI_EN)) {
keir@18977 830 /* This bank does not support CMCI. Polling timer has to handle it. */
keir@18977 831 set_bit(i, __get_cpu_var(no_cmci_banks));
keir@18977 832 return 0;
keir@18976 833 }
keir@18976 834 set_bit(i, __get_cpu_var(mce_banks_owned));
keir@18976 835 out:
keir@18976 836 clear_bit(i, __get_cpu_var(no_cmci_banks));
keir@18976 837 return 1;
keir@18976 838 }
keir@18976 839
keir@18977 840 static void cmci_discover(void)
keir@18976 841 {
keir@18977 842 unsigned long flags;
keir@18976 843 int i;
keir@19405 844 mctelem_cookie_t mctc;
keir@19405 845 struct mca_summary bs;
keir@18976 846
keir@20288 847 mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id());
keir@18977 848
keir@18977 849 spin_lock_irqsave(&cmci_discover_lock, flags);
keir@18977 850
keir@18977 851 for (i = 0; i < nr_mce_banks; i++)
keir@18977 852 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
keir@18977 853 do_cmci_discover(i);
keir@18977 854
keir@18977 855 spin_unlock_irqrestore(&cmci_discover_lock, flags);
keir@18977 856
keir@19287 857 /* In case CMCI happended when do owner change.
keir@19287 858 * If CMCI happened yet not processed immediately,
keir@19287 859 * MCi_status (error_count bit 38~52) is not cleared,
keir@19287 860 * the CMCI interrupt will never be triggered again.
keir@19287 861 */
keir@19405 862
keir@19405 863 mctc = mcheck_mca_logout(
keir@19781 864 MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
keir@19405 865
keir@19405 866 if (bs.errcnt && mctc != NULL) {
keir@19405 867 if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
keir@19405 868 mctelem_commit(mctc);
keir@19287 869 send_guest_global_virq(dom0, VIRQ_MCA);
keir@19405 870 } else {
keir@19444 871 x86_mcinfo_dump(mctelem_dataptr(mctc));
keir@19405 872 mctelem_dismiss(mctc);
keir@19460 873 }
keir@19405 874 } else if (mctc != NULL)
keir@19405 875 mctelem_dismiss(mctc);
keir@19287 876
keir@20288 877 mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
keir@20288 878 smp_processor_id(),
keir@20288 879 *((unsigned long *)__get_cpu_var(mce_banks_owned)),
keir@18977 880 *((unsigned long *)__get_cpu_var(no_cmci_banks)));
keir@18976 881 }
keir@18976 882
keir@18976 883 /*
keir@18976 884 * Define an owner for each bank. Banks can be shared between CPUs
keir@18976 885 * and to avoid reporting events multiple times always set up one
keir@18976 886 * CPU as owner.
keir@18976 887 *
keir@18976 888 * The assignment has to be redone when CPUs go offline and
keir@18976 889 * any of the owners goes away. Also pollers run in parallel so we
keir@18976 890 * have to be careful to update the banks in a way that doesn't
keir@18976 891 * lose or duplicate events.
keir@18976 892 */
keir@18976 893
keir@18976 894 static void mce_set_owner(void)
keir@18976 895 {
keir@18976 896 if (!cmci_support || mce_disabled == 1)
keir@18976 897 return;
keir@18976 898
keir@18976 899 cmci_discover();
keir@18976 900 }
keir@18976 901
keir@18977 902 static void __cpu_mcheck_distribute_cmci(void *unused)
keir@18977 903 {
keir@18977 904 cmci_discover();
keir@18977 905 }
keir@18977 906
keir@18977 907 void cpu_mcheck_distribute_cmci(void)
keir@18977 908 {
keir@18977 909 if (cmci_support && !mce_disabled)
keir@19690 910 on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0);
keir@18977 911 }
keir@18977 912
keir@18976 913 static void clear_cmci(void)
keir@18976 914 {
keir@18976 915 int i;
keir@18976 916
keir@18976 917 if (!cmci_support || mce_disabled == 1)
keir@18976 918 return;
keir@18976 919
keir@20288 920 mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n",
keir@18976 921 smp_processor_id());
keir@18976 922
keir@18976 923 for (i = 0; i < nr_mce_banks; i++) {
keir@18976 924 unsigned msr = MSR_IA32_MC0_CTL2 + i;
keir@18976 925 u64 val;
keir@18976 926 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
keir@18976 927 continue;
keir@18976 928 rdmsrl(msr, val);
keir@18976 929 if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
keir@18976 930 wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
keir@18976 931 clear_bit(i, __get_cpu_var(mce_banks_owned));
keir@18976 932 }
keir@18976 933 }
keir@18976 934
keir@18977 935 void cpu_mcheck_disable(void)
keir@18976 936 {
keir@18977 937 clear_in_cr4(X86_CR4_MCE);
keir@18976 938
keir@18977 939 if (cmci_support && !mce_disabled)
keir@18976 940 clear_cmci();
keir@18976 941 }
keir@18976 942
keir@18976 943 static void intel_init_cmci(struct cpuinfo_x86 *c)
keir@18976 944 {
keir@18976 945 u32 l, apic;
keir@18976 946 int cpu = smp_processor_id();
keir@18976 947
keir@18976 948 if (!mce_available(c) || !cmci_support) {
keir@20288 949 mce_printk(MCE_QUIET, "CMCI: CPU%d has no CMCI support\n", cpu);
keir@18976 950 return;
keir@18976 951 }
keir@18976 952
keir@18976 953 apic = apic_read(APIC_CMCI);
keir@18976 954 if ( apic & APIC_VECTOR_MASK )
keir@18976 955 {
keir@20288 956 mce_printk(MCE_QUIET, "CPU%d CMCI LVT vector (%#x) already installed\n",
keir@18976 957 cpu, ( apic & APIC_VECTOR_MASK ));
keir@18976 958 return;
keir@18976 959 }
keir@18976 960
keir@18976 961 apic = CMCI_APIC_VECTOR;
keir@18976 962 apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
keir@18976 963 apic_write_around(APIC_CMCI, apic);
keir@18976 964
keir@18976 965 l = apic_read(APIC_CMCI);
keir@18976 966 apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
keir@18976 967 }
keir@18976 968
keir@18976 969 fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
keir@18976 970 {
keir@19405 971 mctelem_cookie_t mctc;
keir@19405 972 struct mca_summary bs;
keir@20111 973 struct cpu_user_regs *old_regs = set_irq_regs(regs);
keir@18976 974
keir@18976 975 ack_APIC_irq();
keir@18976 976 irq_enter();
keir@19405 977
keir@19405 978 mctc = mcheck_mca_logout(
keir@19781 979 MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
keir@19405 980
keir@19405 981 if (bs.errcnt && mctc != NULL) {
keir@19405 982 if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
keir@19405 983 mctelem_commit(mctc);
keir@20288 984 mce_printk(MCE_VERBOSE, "CMCI: send CMCI to DOM0 through virq\n");
keir@18976 985 send_guest_global_virq(dom0, VIRQ_MCA);
keir@19405 986 } else {
keir@19444 987 x86_mcinfo_dump(mctelem_dataptr(mctc));
keir@19405 988 mctelem_dismiss(mctc);
keir@19460 989 }
keir@19405 990 } else if (mctc != NULL)
keir@19405 991 mctelem_dismiss(mctc);
keir@19405 992
keir@18976 993 irq_exit();
keir@20111 994 set_irq_regs(old_regs);
keir@18976 995 }
keir@18976 996
keir@18976 997 void mce_intel_feature_init(struct cpuinfo_x86 *c)
keir@18976 998 {
keir@18976 999
keir@18976 1000 #ifdef CONFIG_X86_MCE_THERMAL
keir@18976 1001 intel_init_thermal(c);
keir@18976 1002 #endif
keir@18976 1003 intel_init_cmci(c);
keir@18976 1004 }
keir@18976 1005
keir@19988 1006 static void _mce_cap_init(struct cpuinfo_x86 *c)
keir@18976 1007 {
keir@19988 1008 u32 l = mce_cap_init();
keir@19460 1009
keir@18976 1010 if ((l & MCG_CMCI_P) && cpu_has_apic)
keir@18976 1011 cmci_support = 1;
keir@18976 1012
keir@19781 1013 /* Support Software Error Recovery */
keir@19781 1014 if (l & MCG_SER_P)
keir@19781 1015 ser_support = 1;
keir@19781 1016
keir@18976 1017 if (l & MCG_EXT_P)
keir@18976 1018 {
keir@18976 1019 nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
keir@20288 1020 mce_printk (MCE_QUIET, "CPU%d: Intel Extended MCE MSRs (%d) available\n",
keir@18976 1021 smp_processor_id(), nr_intel_ext_msrs);
keir@18976 1022 }
keir@19405 1023 firstbank = mce_firstbank(c);
keir@18976 1024 }
keir@18976 1025
keir@18976 1026 static void mce_init(void)
keir@18976 1027 {
keir@18976 1028 u32 l, h;
keir@18986 1029 int i;
keir@19405 1030 mctelem_cookie_t mctc;
keir@19405 1031 struct mca_summary bs;
keir@19405 1032
keir@18976 1033 clear_in_cr4(X86_CR4_MCE);
keir@19405 1034
keir@19563 1035 mce_barrier_init(&mce_inside_bar);
keir@19563 1036 mce_barrier_init(&mce_severity_bar);
keir@19563 1037 mce_barrier_init(&mce_trap_bar);
keir@19563 1038 spin_lock_init(&mce_logout_lock);
keir@19563 1039
keir@18976 1040 /* log the machine checks left over from the previous reset.
keir@18976 1041 * This also clears all registers*/
keir@18976 1042
keir@19781 1043 mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs, NULL);
keir@19405 1044
keir@20297 1045 /* in the boot up stage, print out and also log in DOM0 boot process */
keir@19405 1046 if (bs.errcnt && mctc != NULL) {
keir@19405 1047 x86_mcinfo_dump(mctelem_dataptr(mctc));
keir@20297 1048 mctelem_commit(mctc);
keir@19405 1049 }
keir@18976 1050
keir@18976 1051 set_in_cr4(X86_CR4_MCE);
keir@18976 1052
keir@18976 1053 for (i = firstbank; i < nr_mce_banks; i++)
keir@18976 1054 {
keir@19137 1055 /* Some banks are shared across cores, use MCi_CTRL to judge whether
keir@19137 1056 * this bank has been initialized by other cores already. */
keir@18976 1057 rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
keir@19137 1058 if (!(l | h))
keir@18976 1059 {
keir@19137 1060 /* if ctl is 0, this bank is never initialized */
keir@20288 1061 mce_printk(MCE_VERBOSE, "mce_init: init bank%d\n", i);
keir@18976 1062 wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
keir@18976 1063 wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
keir@19137 1064 }
keir@18976 1065 }
keir@19137 1066 if (firstbank) /* if cmci enabled, firstbank = 0 */
keir@18976 1067 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
keir@18976 1068 }
keir@18976 1069
keir@19137 1070 /* p4/p6 family have similar MCA initialization process */
keir@19405 1071 int intel_mcheck_init(struct cpuinfo_x86 *c)
keir@18976 1072 {
keir@19988 1073 _mce_cap_init(c);
keir@20288 1074 mce_printk(MCE_QUIET, "Intel machine check reporting enabled on CPU#%d.\n",
keir@18977 1075 smp_processor_id());
keir@19405 1076
keir@18977 1077 /* machine check is available */
keir@19405 1078 x86_mce_vector_register(intel_machine_check);
keir@19405 1079 x86_mce_callback_register(intel_get_extended_msrs);
keir@19781 1080 mce_recoverable_register(intel_recoverable_scan);
keir@19781 1081 mce_need_clearbank_register(intel_need_clearbank_scan);
keir@19405 1082
keir@18977 1083 mce_init();
keir@18977 1084 mce_intel_feature_init(c);
keir@18977 1085 mce_set_owner();
keir@19405 1086
keir@19460 1087 open_softirq(MACHINE_CHECK_SOFTIRQ, mce_softirq);
keir@19405 1088 return 1;
keir@18976 1089 }
keir@19461 1090
keir@20099 1091 int intel_mce_wrmsr(uint32_t msr, uint64_t val)
keir@19461 1092 {
keir@19463 1093 int ret = 1;
keir@19461 1094
keir@19988 1095 switch ( msr )
keir@19461 1096 {
keir@19505 1097 case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
keir@20288 1098 mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
keir@19463 1099 "Guest should not write this MSR!\n");
keir@19463 1100 break;
keir@19463 1101 default:
keir@19463 1102 ret = 0;
keir@19463 1103 break;
keir@19461 1104 }
keir@19988 1105
keir@19461 1106 return ret;
keir@19461 1107 }
keir@19461 1108
keir@20099 1109 int intel_mce_rdmsr(uint32_t msr, uint64_t *val)
keir@19461 1110 {
keir@19463 1111 int ret = 1;
keir@19461 1112
keir@19988 1113 switch ( msr )
keir@19461 1114 {
keir@19505 1115 case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
keir@20288 1116 mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
keir@19463 1117 "Guest should not read this MSR!\n");
keir@19463 1118 break;
keir@19463 1119 default:
keir@19463 1120 ret = 0;
keir@19463 1121 break;
keir@19461 1122 }
keir@19988 1123
keir@19461 1124 return ret;
keir@19461 1125 }
keir@19461 1126
keir@19461 1127