debuggers.hg

view xen/arch/x86/cpu/mcheck/non-fatal.c @ 20998:50ea24db1f88

x86/mcheck: do not blindly de-reference dom0 et al

Since machine checks and CMCIs can happen before Dom0 even gets
constructed, the handlers of these events have to avoid de-referencing
respective pointers without checking.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 17 12:04:50 2010 +0000 (2010-02-17)
parents c23aeb37b17f
children 3ffdb094c2c0 fa94385978e6
line source
1 /*
2 * Non Fatal Machine Check Exception Reporting
3 *
4 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
5 *
6 * This file contains routines to check for non-fatal MCEs every 15s
7 *
8 */
10 #include <xen/config.h>
11 #include <xen/init.h>
12 #include <xen/types.h>
13 #include <xen/kernel.h>
14 #include <xen/smp.h>
15 #include <xen/timer.h>
16 #include <xen/errno.h>
17 #include <xen/event.h>
18 #include <xen/sched.h>
19 #include <asm/processor.h>
20 #include <asm/system.h>
21 #include <asm/msr.h>
23 #include "mce.h"
25 DEFINE_PER_CPU(cpu_banks_t, poll_bankmask);
26 static struct timer mce_timer;
28 #define MCE_PERIOD MILLISECS(8000)
29 #define MCE_PERIOD_MIN MILLISECS(2000)
30 #define MCE_PERIOD_MAX MILLISECS(16000)
32 static uint64_t period = MCE_PERIOD;
33 static int adjust = 0;
34 static int variable_period = 1;
36 static void mce_checkregs (void *info)
37 {
38 mctelem_cookie_t mctc;
39 struct mca_summary bs;
40 static uint64_t dumpcount = 0;
42 mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs, NULL);
44 if (bs.errcnt && mctc != NULL) {
45 adjust++;
47 /* If Dom0 enabled the VIRQ_MCA event, then notify it.
48 * Otherwise, if dom0 has had plenty of time to register
49 * the virq handler but still hasn't then dump telemetry
50 * to the Xen console. The call count may be incremented
51 * on multiple cpus at once and is indicative only - just
52 * a simple-minded attempt to avoid spamming the console
53 * for corrected errors in early startup.
54 */
56 if (dom0_vmce_enabled()) {
57 mctelem_commit(mctc);
58 send_guest_global_virq(dom0, VIRQ_MCA);
59 } else if (++dumpcount >= 10) {
60 x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
61 mctelem_dismiss(mctc);
62 } else {
63 mctelem_dismiss(mctc);
64 }
65 } else if (mctc != NULL) {
66 mctelem_dismiss(mctc);
67 }
68 }
70 static void mce_work_fn(void *data)
71 {
72 on_each_cpu(mce_checkregs, NULL, 1);
74 if (variable_period) {
75 if (adjust)
76 period /= (adjust + 1);
77 else
78 period *= 2;
79 if (period > MCE_PERIOD_MAX)
80 period = MCE_PERIOD_MAX;
81 if (period < MCE_PERIOD_MIN)
82 period = MCE_PERIOD_MIN;
83 }
85 set_timer(&mce_timer, NOW() + period);
86 adjust = 0;
87 }
89 static int __init init_nonfatal_mce_checker(void)
90 {
91 struct cpuinfo_x86 *c = &boot_cpu_data;
93 /* Check for MCE support */
94 if (!mce_available(c))
95 return -ENODEV;
97 /*
98 * Check for non-fatal errors every MCE_RATE s
99 */
100 switch (c->x86_vendor) {
101 case X86_VENDOR_AMD:
102 if (c->x86 == 6) { /* K7 */
103 init_timer(&mce_timer, mce_work_fn, NULL, 0);
104 set_timer(&mce_timer, NOW() + MCE_PERIOD);
105 break;
106 }
108 /* Assume we are on K8 or newer AMD CPU here */
109 amd_nonfatal_mcheck_init(c);
110 break;
112 case X86_VENDOR_INTEL:
113 /*
114 * The P5 family is different. P4/P6 and latest CPUs share the
115 * same polling methods.
116 */
117 if ( c->x86 != 5 )
118 {
119 init_timer(&mce_timer, mce_work_fn, NULL, 0);
120 set_timer(&mce_timer, NOW() + MCE_PERIOD);
121 }
122 break;
123 }
125 printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
126 return 0;
127 }
128 __initcall(init_nonfatal_mce_checker);