debuggers.hg

view xen/arch/x86/cpu/mcheck/amd_nonfatal.c @ 20998:50ea24db1f88

x86/mcheck: do not blindly de-reference dom0 et al

Since machine checks and CMCIs can happen before Dom0 even gets
constructed, the handlers of these events have to avoid de-referencing
respective pointers without checking.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 17 12:04:50 2010 +0000 (2010-02-17)
parents c6b22d0d1e90
children 3ffdb094c2c0 9f49e9794596
line source
1 /*
2 * MCA implementation for AMD CPUs
3 * Copyright (c) 2007 Advanced Micro Devices, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
21 /* K8 common MCA documentation published at
22 *
23 * AMD64 Architecture Programmer's Manual Volume 2:
24 * System Programming
25 * Publication # 24593 Revision: 3.12
26 * Issue Date: September 2006
27 *
28 * URL:
29 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf
30 */
32 /* The related documentation for K8 Revisions A - E is:
33 *
34 * BIOS and Kernel Developer's Guide for
35 * AMD Athlon 64 and AMD Opteron Processors
36 * Publication # 26094 Revision: 3.30
37 * Issue Date: February 2006
38 *
39 * URL:
40 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF
41 */
43 /* The related documentation for K8 Revisions F - G is:
44 *
45 * BIOS and Kernel Developer's Guide for
46 * AMD NPT Family 0Fh Processors
47 * Publication # 32559 Revision: 3.04
48 * Issue Date: December 2006
49 *
50 * URL:
51 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/32559.pdf
52 */
54 #include <xen/config.h>
55 #include <xen/init.h>
56 #include <xen/types.h>
57 #include <xen/kernel.h>
58 #include <xen/smp.h>
59 #include <xen/timer.h>
60 #include <xen/event.h>
62 #include <asm/processor.h>
63 #include <asm/system.h>
64 #include <asm/msr.h>
66 #include "mce.h"
68 static struct timer mce_timer;
70 #define MCE_PERIOD MILLISECS(10000)
71 #define MCE_MIN MILLISECS(2000)
72 #define MCE_MAX MILLISECS(30000)
74 static s_time_t period = MCE_PERIOD;
75 static int hw_threshold = 0;
76 static int adjust = 0;
77 static int variable_period = 1;
79 /* The polling service routine:
80 * Collects information of correctable errors and notifies
81 * Dom0 via an event.
82 */
83 static void mce_amd_checkregs(void *info)
84 {
85 mctelem_cookie_t mctc;
86 struct mca_summary bs;
88 mctc = mcheck_mca_logout(MCA_POLLER, mca_allbanks, &bs, NULL);
90 if (bs.errcnt && mctc != NULL) {
91 static uint64_t dumpcount = 0;
93 /* If Dom0 enabled the VIRQ_MCA event, then notify it.
94 * Otherwise, if dom0 has had plenty of time to register
95 * the virq handler but still hasn't then dump telemetry
96 * to the Xen console. The call count may be incremented
97 * on multiple cpus at once and is indicative only - just
98 * a simple-minded attempt to avoid spamming the console
99 * for corrected errors in early startup. */
101 if (dom0_vmce_enabled()) {
102 mctelem_commit(mctc);
103 send_guest_global_virq(dom0, VIRQ_MCA);
104 } else if (++dumpcount >= 10) {
105 x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
106 mctelem_dismiss(mctc);
107 } else {
108 mctelem_dismiss(mctc);
109 }
111 } else if (mctc != NULL) {
112 mctelem_dismiss(mctc);
113 }
115 /* adjust is global and all cpus may attempt to increment it without
116 * synchronisation, so they race and the final adjust count
117 * (number of cpus seeing any error) is approximate. We can
118 * guarantee that if any cpu observes an error that the
119 * adjust count is at least 1. */
120 if (bs.errcnt)
121 adjust++;
122 }
124 /* polling service routine invoker:
125 * Adjust poll frequency at runtime. No error means slow polling frequency,
126 * an error means higher polling frequency.
127 * It uses hw threshold register introduced in AMD K8 RevF to detect
128 * multiple correctable errors between two polls. In that case,
129 * increase polling frequency higher than normal.
130 */
131 static void mce_amd_work_fn(void *data)
132 {
133 on_each_cpu(mce_amd_checkregs, data, 1);
135 if (adjust > 0) {
136 if (!dom0_vmce_enabled()) {
137 /* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
138 printk("MCE: polling routine found correctable error. "
139 " Use mcelog to parse above error output.\n");
140 }
141 }
143 if (hw_threshold) {
144 uint64_t value;
145 uint32_t counter;
147 mca_rdmsrl(MSR_IA32_MC4_MISC, value);
148 /* Only the error counter field is of interest
149 * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
150 */
151 counter = (value & 0xFFF00000000ULL) >> 32U;
153 /* HW does not count *all* kinds of correctable errors.
154 * Thus it is possible, that the polling routine finds an
155 * correctable error even if the HW reports nothing.
156 * However, the other way around is not possible (= BUG).
157 */
158 if (counter > 0) {
159 /* HW reported correctable errors,
160 * the polling routine did not find...
161 */
162 BUG_ON(adjust == 0);
163 /* subtract 1 to not double count the error
164 * from the polling service routine */
165 adjust += (counter - 1);
167 /* Restart counter */
168 /* No interrupt, reset counter value */
169 value &= ~(0x60FFF00000000ULL);
170 /* Counter enable */
171 value |= (1ULL << 51);
172 mca_wrmsrl(MSR_IA32_MC4_MISC, value);
173 wmb();
174 }
175 }
177 if (variable_period && adjust > 0) {
178 /* Increase polling frequency */
179 adjust++; /* adjust == 1 must have an effect */
180 period /= adjust;
181 } else if (variable_period) {
182 /* Decrease polling frequency */
183 period *= 2;
184 }
185 if (variable_period && period > MCE_MAX) {
186 /* limit: Poll at least every 30s */
187 period = MCE_MAX;
188 }
189 if (variable_period && period < MCE_MIN) {
190 /* limit: Poll every 2s.
191 * When this is reached an uncorrectable error
192 * is expected to happen, if Dom0 does nothing.
193 */
194 period = MCE_MIN;
195 }
197 set_timer(&mce_timer, NOW() + period);
198 adjust = 0;
199 }
201 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c)
202 {
203 if (c->x86_vendor != X86_VENDOR_AMD)
204 return;
206 /* Assume we are on K8 or newer AMD CPU here */
208 /* The threshold bitfields in MSR_IA32_MC4_MISC has
209 * been introduced along with the SVME feature bit. */
210 if (variable_period && cpu_has(c, X86_FEATURE_SVME)) {
211 uint64_t value;
213 /* hw threshold registers present */
214 hw_threshold = 1;
215 rdmsrl(MSR_IA32_MC4_MISC, value);
217 if (value & (1ULL << 61)) { /* Locked bit */
218 /* Locked by BIOS. Not available for use */
219 hw_threshold = 0;
220 }
221 if (!(value & (1ULL << 63))) { /* Valid bit */
222 /* No CtrP present */
223 hw_threshold = 0;
224 } else {
225 if (!(value & (1ULL << 62))) { /* Counter Bit */
226 /* No counter field present */
227 hw_threshold = 0;
228 }
229 }
231 if (hw_threshold) {
232 /* No interrupt, reset counter value */
233 value &= ~(0x60FFF00000000ULL);
234 /* Counter enable */
235 value |= (1ULL << 51);
236 wrmsrl(MSR_IA32_MC4_MISC, value);
237 /* serialize */
238 wmb();
239 printk(XENLOG_INFO "MCA: Use hw thresholding to adjust polling frequency\n");
240 }
241 }
243 init_timer(&mce_timer, mce_amd_work_fn, NULL, 0);
244 set_timer(&mce_timer, NOW() + period);
246 return;
247 }