/root/src/xen/xen/arch/x86/cpu/mcheck/vmce.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * vmce.c - provide software emulated vMCE support to guest |
3 | | * |
4 | | * Copyright (C) 2010, 2011 Jiang, Yunhong <yunhong.jiang@intel.com> |
5 | | * Copyright (C) 2012, 2013 Liu, Jinsong <jinsong.liu@intel.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include <xen/init.h> |
22 | | #include <xen/types.h> |
23 | | #include <xen/irq.h> |
24 | | #include <xen/event.h> |
25 | | #include <xen/kernel.h> |
26 | | #include <xen/delay.h> |
27 | | #include <xen/smp.h> |
28 | | #include <xen/mm.h> |
29 | | #include <asm/hvm/save.h> |
30 | | #include <asm/processor.h> |
31 | | #include <public/sysctl.h> |
32 | | #include <asm/system.h> |
33 | | #include <asm/msr.h> |
34 | | #include <asm/p2m.h> |
35 | | #include <asm/pv/traps.h> |
36 | | |
37 | | #include "mce.h" |
38 | | #include "x86_mca.h" |
39 | | #include "vmce.h" |
40 | | |
41 | | /* |
42 | | * MCG_SER_P: software error recovery supported |
43 | | * MCG_TES_P: to avoid MCi_status bit56:53 model specific |
44 | | * MCG_CMCI_P: expose CMCI capability but never really inject it to guest, |
45 | | * for sake of performance since guest not polling periodically |
46 | | */ |
47 | 12 | #define INTEL_GUEST_MCG_CAP (MCG_SER_P | \ |
48 | 12 | MCG_TES_P | \ |
49 | 12 | MCG_CMCI_P | \ |
50 | 12 | GUEST_MC_BANK_NUM) |
51 | | |
52 | 0 | #define AMD_GUEST_MCG_CAP GUEST_MC_BANK_NUM |
53 | | |
54 | | void vmce_init_vcpu(struct vcpu *v) |
55 | 12 | { |
56 | 12 | int i; |
57 | 12 | |
58 | 12 | /* global MCA MSRs init */ |
59 | 12 | if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) |
60 | 12 | v->arch.vmce.mcg_cap = INTEL_GUEST_MCG_CAP; |
61 | 12 | else |
62 | 0 | v->arch.vmce.mcg_cap = AMD_GUEST_MCG_CAP; |
63 | 12 | |
64 | 12 | v->arch.vmce.mcg_status = 0; |
65 | 12 | |
66 | 12 | /* per-bank MCA MSRs init */ |
67 | 36 | for ( i = 0; i < GUEST_MC_BANK_NUM; i++ ) |
68 | 24 | memset(&v->arch.vmce.bank[i], 0, sizeof(struct vmce_bank)); |
69 | 12 | |
70 | 12 | spin_lock_init(&v->arch.vmce.lock); |
71 | 12 | } |
72 | | |
73 | | int vmce_restore_vcpu(struct vcpu *v, const struct hvm_vmce_vcpu *ctxt) |
74 | 0 | { |
75 | 0 | unsigned long guest_mcg_cap; |
76 | 0 |
|
77 | 0 | if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) |
78 | 0 | guest_mcg_cap = INTEL_GUEST_MCG_CAP | MCG_LMCE_P; |
79 | 0 | else |
80 | 0 | guest_mcg_cap = AMD_GUEST_MCG_CAP; |
81 | 0 |
|
82 | 0 | if ( ctxt->caps & ~guest_mcg_cap & ~MCG_CAP_COUNT & ~MCG_CTL_P ) |
83 | 0 | { |
84 | 0 | dprintk(XENLOG_G_ERR, "%s restore: unsupported MCA capabilities" |
85 | 0 | " %#" PRIx64 " for %pv (supported: %#Lx)\n", |
86 | 0 | is_hvm_vcpu(v) ? "HVM" : "PV", ctxt->caps, |
87 | 0 | v, guest_mcg_cap & ~MCG_CAP_COUNT); |
88 | 0 | return -EPERM; |
89 | 0 | } |
90 | 0 |
|
91 | 0 | v->arch.vmce.mcg_cap = ctxt->caps; |
92 | 0 | v->arch.vmce.bank[0].mci_ctl2 = ctxt->mci_ctl2_bank0; |
93 | 0 | v->arch.vmce.bank[1].mci_ctl2 = ctxt->mci_ctl2_bank1; |
94 | 0 | v->arch.vmce.mcg_ext_ctl = ctxt->mcg_ext_ctl; |
95 | 0 |
|
96 | 0 | return 0; |
97 | 0 | } |
98 | | |
99 | | /* |
100 | | * For historic version reason, bank number may greater than GUEST_MC_BANK_NUM, |
101 | | * when migrating from old vMCE version to new vMCE. |
102 | | */ |
103 | | static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) |
104 | 72 | { |
105 | 72 | int ret = 1; |
106 | 72 | unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4; |
107 | 72 | |
108 | 72 | *val = 0; |
109 | 72 | |
110 | 72 | switch ( msr & (-MSR_IA32_MC0_CTL | 3) ) |
111 | 72 | { |
112 | 0 | case MSR_IA32_MC0_CTL: |
113 | 0 | /* stick all 1's to MCi_CTL */ |
114 | 0 | *val = ~0UL; |
115 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MC%u_CTL %#"PRIx64"\n", |
116 | 0 | v, bank, *val); |
117 | 0 | break; |
118 | 0 |
|
119 | 0 | case MSR_IA32_MC0_STATUS: |
120 | 0 | if ( bank < GUEST_MC_BANK_NUM ) |
121 | 0 | { |
122 | 0 | *val = v->arch.vmce.bank[bank].mci_status; |
123 | 0 | if ( *val ) |
124 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MC%u_STATUS %#"PRIx64"\n", |
125 | 0 | v, bank, *val); |
126 | 0 | } |
127 | 0 | break; |
128 | 0 |
|
129 | 0 | case MSR_IA32_MC0_ADDR: |
130 | 0 | if ( bank < GUEST_MC_BANK_NUM ) |
131 | 0 | { |
132 | 0 | *val = v->arch.vmce.bank[bank].mci_addr; |
133 | 0 | if ( *val ) |
134 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MC%u_ADDR %#"PRIx64"\n", |
135 | 0 | v, bank, *val); |
136 | 0 | } |
137 | 0 | break; |
138 | 0 |
|
139 | 0 | case MSR_IA32_MC0_MISC: |
140 | 0 | if ( bank < GUEST_MC_BANK_NUM ) |
141 | 0 | { |
142 | 0 | *val = v->arch.vmce.bank[bank].mci_misc; |
143 | 0 | if ( *val ) |
144 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MC%u_MISC %#"PRIx64"\n", |
145 | 0 | v, bank, *val); |
146 | 0 | } |
147 | 0 | break; |
148 | 0 |
|
149 | 72 | default: |
150 | 72 | switch ( boot_cpu_data.x86_vendor ) |
151 | 72 | { |
152 | 71 | case X86_VENDOR_INTEL: |
153 | 71 | ret = vmce_intel_rdmsr(v, msr, val); |
154 | 71 | break; |
155 | 71 | |
156 | 0 | case X86_VENDOR_AMD: |
157 | 0 | ret = vmce_amd_rdmsr(v, msr, val); |
158 | 0 | break; |
159 | 71 | |
160 | 0 | default: |
161 | 0 | ret = 0; |
162 | 0 | break; |
163 | 72 | } |
164 | 71 | break; |
165 | 72 | } |
166 | 72 | |
167 | 71 | return ret; |
168 | 72 | } |
169 | | |
170 | | /* |
171 | | * < 0: Unsupported and will #GP fault to guest |
172 | | * = 0: Not handled, should be handled by other components |
173 | | * > 0: Success |
174 | | */ |
175 | | int vmce_rdmsr(uint32_t msr, uint64_t *val) |
176 | 96 | { |
177 | 96 | struct vcpu *cur = current; |
178 | 96 | int ret = 1; |
179 | 96 | |
180 | 96 | *val = 0; |
181 | 96 | |
182 | 96 | spin_lock(&cur->arch.vmce.lock); |
183 | 96 | |
184 | 96 | switch ( msr ) |
185 | 96 | { |
186 | 0 | case MSR_IA32_MCG_STATUS: |
187 | 0 | *val = cur->arch.vmce.mcg_status; |
188 | 0 | if ( *val ) |
189 | 0 | mce_printk(MCE_VERBOSE, |
190 | 0 | "MCE: %pv: rd MCG_STATUS %#"PRIx64"\n", cur, *val); |
191 | 0 | break; |
192 | 0 |
|
193 | 12 | case MSR_IA32_MCG_CAP: |
194 | 12 | *val = cur->arch.vmce.mcg_cap; |
195 | 12 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MCG_CAP %#"PRIx64"\n", cur, *val); |
196 | 12 | break; |
197 | 0 |
|
198 | 0 | case MSR_IA32_MCG_CTL: |
199 | 0 | if ( cur->arch.vmce.mcg_cap & MCG_CTL_P ) |
200 | 0 | *val = ~0ULL; |
201 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MCG_CTL %#"PRIx64"\n", cur, *val); |
202 | 0 | break; |
203 | 0 |
|
204 | 0 | case MSR_IA32_MCG_EXT_CTL: |
205 | 0 | /* |
206 | 0 | * If MCG_LMCE_P is present in guest MSR_IA32_MCG_CAP, the LMCE and LOCK |
207 | 0 | * bits are always set in guest MSR_IA32_FEATURE_CONTROL by Xen, so it |
208 | 0 | * does not need to check them here. |
209 | 0 | */ |
210 | 0 | if ( cur->arch.vmce.mcg_cap & MCG_LMCE_P ) |
211 | 0 | { |
212 | 0 | *val = cur->arch.vmce.mcg_ext_ctl; |
213 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MCG_EXT_CTL %#"PRIx64"\n", |
214 | 0 | cur, *val); |
215 | 0 | } |
216 | 0 | else |
217 | 0 | { |
218 | 0 | ret = -1; |
219 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: rd MCG_EXT_CTL, not supported\n", |
220 | 0 | cur); |
221 | 0 | } |
222 | 0 | break; |
223 | 0 |
|
224 | 84 | default: |
225 | 72 | ret = mce_bank_msr(cur, msr) ? bank_mce_rdmsr(cur, msr, val) : 0; |
226 | 84 | break; |
227 | 96 | } |
228 | 96 | |
229 | 96 | spin_unlock(&cur->arch.vmce.lock); |
230 | 96 | |
231 | 96 | return ret; |
232 | 96 | } |
233 | | |
234 | | /* |
235 | | * For historic version reason, bank number may greater than GUEST_MC_BANK_NUM, |
236 | | * when migratie from old vMCE version to new vMCE. |
237 | | */ |
238 | | static int bank_mce_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) |
239 | 117 | { |
240 | 117 | int ret = 1; |
241 | 117 | unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4; |
242 | 117 | |
243 | 117 | switch ( msr & (-MSR_IA32_MC0_CTL | 3) ) |
244 | 117 | { |
245 | 22 | case MSR_IA32_MC0_CTL: |
246 | 22 | /* |
247 | 22 | * if guest crazy clear any bit of MCi_CTL, |
248 | 22 | * treat it as not implement and ignore write change it. |
249 | 22 | */ |
250 | 22 | break; |
251 | 22 | |
252 | 24 | case MSR_IA32_MC0_STATUS: |
253 | 24 | mce_printk(MCE_VERBOSE, "MCE: %pv: wr MC%u_STATUS %#"PRIx64"\n", |
254 | 24 | v, bank, val); |
255 | 24 | if ( val ) |
256 | 0 | ret = -1; |
257 | 24 | else if ( bank < GUEST_MC_BANK_NUM ) |
258 | 24 | v->arch.vmce.bank[bank].mci_status = val; |
259 | 24 | break; |
260 | 22 | |
261 | 0 | case MSR_IA32_MC0_ADDR: |
262 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: wr MC%u_ADDR %#"PRIx64"\n", |
263 | 0 | v, bank, val); |
264 | 0 | if ( val ) |
265 | 0 | ret = -1; |
266 | 0 | else if ( bank < GUEST_MC_BANK_NUM ) |
267 | 0 | v->arch.vmce.bank[bank].mci_addr = val; |
268 | 0 | break; |
269 | 22 | |
270 | 0 | case MSR_IA32_MC0_MISC: |
271 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: wr MC%u_MISC %#"PRIx64"\n", |
272 | 0 | v, bank, val); |
273 | 0 | if ( val ) |
274 | 0 | ret = -1; |
275 | 0 | else if ( bank < GUEST_MC_BANK_NUM ) |
276 | 0 | v->arch.vmce.bank[bank].mci_misc = val; |
277 | 0 | break; |
278 | 22 | |
279 | 72 | default: |
280 | 72 | switch ( boot_cpu_data.x86_vendor ) |
281 | 72 | { |
282 | 72 | case X86_VENDOR_INTEL: |
283 | 72 | ret = vmce_intel_wrmsr(v, msr, val); |
284 | 72 | break; |
285 | 72 | |
286 | 0 | case X86_VENDOR_AMD: |
287 | 0 | ret = vmce_amd_wrmsr(v, msr, val); |
288 | 0 | break; |
289 | 72 | |
290 | 0 | default: |
291 | 0 | ret = 0; |
292 | 0 | break; |
293 | 72 | } |
294 | 72 | break; |
295 | 117 | } |
296 | 117 | |
297 | 118 | return ret; |
298 | 117 | } |
299 | | |
300 | | /* |
301 | | * < 0: Unsupported and will #GP fault to guest |
302 | | * = 0: Not handled, should be handled by other components |
303 | | * > 0: Success |
304 | | */ |
305 | | int vmce_wrmsr(uint32_t msr, uint64_t val) |
306 | 169 | { |
307 | 169 | struct vcpu *cur = current; |
308 | 169 | int ret = 1; |
309 | 169 | |
310 | 169 | spin_lock(&cur->arch.vmce.lock); |
311 | 169 | |
312 | 169 | switch ( msr ) |
313 | 169 | { |
314 | 0 | case MSR_IA32_MCG_CTL: |
315 | 0 | /* If MCG_CTL exists then stick to all 1's, else ignore. */ |
316 | 0 | break; |
317 | 0 |
|
318 | 0 | case MSR_IA32_MCG_STATUS: |
319 | 0 | cur->arch.vmce.mcg_status = val; |
320 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: wr MCG_STATUS %"PRIx64"\n", |
321 | 0 | cur, val); |
322 | 0 | break; |
323 | 0 |
|
324 | 0 | case MSR_IA32_MCG_CAP: |
325 | 0 | /* |
326 | 0 | * According to Intel SDM, IA32_MCG_CAP is a read-only register, |
327 | 0 | * the effect of writing to the IA32_MCG_CAP is undefined. Here we |
328 | 0 | * treat writing as 'write not change'. Guest would not surprise. |
329 | 0 | */ |
330 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: MCG_CAP is r/o\n", cur); |
331 | 0 | break; |
332 | 0 |
|
333 | 0 | case MSR_IA32_MCG_EXT_CTL: |
334 | 0 | if ( (cur->arch.vmce.mcg_cap & MCG_LMCE_P) && |
335 | 0 | !(val & ~MCG_EXT_CTL_LMCE_EN) ) |
336 | 0 | cur->arch.vmce.mcg_ext_ctl = val; |
337 | 0 | else |
338 | 0 | ret = -1; |
339 | 0 | mce_printk(MCE_VERBOSE, "MCE: %pv: wr MCG_EXT_CTL %"PRIx64"%s\n", |
340 | 0 | cur, val, (ret == -1) ? ", not supported" : ""); |
341 | 0 | break; |
342 | 0 |
|
343 | 167 | default: |
344 | 117 | ret = mce_bank_msr(cur, msr) ? bank_mce_wrmsr(cur, msr, val) : 0; |
345 | 167 | break; |
346 | 169 | } |
347 | 169 | |
348 | 168 | spin_unlock(&cur->arch.vmce.lock); |
349 | 168 | return ret; |
350 | 169 | } |
351 | | |
352 | | static int vmce_save_vcpu_ctxt(struct domain *d, hvm_domain_context_t *h) |
353 | 0 | { |
354 | 0 | struct vcpu *v; |
355 | 0 | int err = 0; |
356 | 0 |
|
357 | 0 | for_each_vcpu ( d, v ) |
358 | 0 | { |
359 | 0 | struct hvm_vmce_vcpu ctxt = { |
360 | 0 | .caps = v->arch.vmce.mcg_cap, |
361 | 0 | .mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2, |
362 | 0 | .mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2, |
363 | 0 | .mcg_ext_ctl = v->arch.vmce.mcg_ext_ctl, |
364 | 0 | }; |
365 | 0 |
|
366 | 0 | err = hvm_save_entry(VMCE_VCPU, v->vcpu_id, h, &ctxt); |
367 | 0 | if ( err ) |
368 | 0 | break; |
369 | 0 | } |
370 | 0 |
|
371 | 0 | return err; |
372 | 0 | } |
373 | | |
374 | | static int vmce_load_vcpu_ctxt(struct domain *d, hvm_domain_context_t *h) |
375 | 0 | { |
376 | 0 | unsigned int vcpuid = hvm_load_instance(h); |
377 | 0 | struct vcpu *v; |
378 | 0 | struct hvm_vmce_vcpu ctxt; |
379 | 0 | int err; |
380 | 0 |
|
381 | 0 | if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) |
382 | 0 | { |
383 | 0 | dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", |
384 | 0 | d->domain_id, vcpuid); |
385 | 0 | err = -EINVAL; |
386 | 0 | } |
387 | 0 | else |
388 | 0 | err = hvm_load_entry_zeroextend(VMCE_VCPU, h, &ctxt); |
389 | 0 |
|
390 | 0 | return err ?: vmce_restore_vcpu(v, &ctxt); |
391 | 0 | } |
392 | | |
393 | | HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt, |
394 | | vmce_load_vcpu_ctxt, 1, HVMSR_PER_VCPU); |
395 | | |
396 | | /* |
397 | | * for Intel MCE, broadcast vMCE to all vcpus |
398 | | * for AMD MCE, only inject vMCE to vcpu0 |
399 | | * |
400 | | * @ d, domain to which would inject vmce |
401 | | * @ vcpu, |
402 | | * -1 (VMCE_INJECT_BROADCAST), broadcast vMCE to all vcpus |
403 | | * >= 0, vcpu, the vMCE is injected to |
404 | | */ |
405 | | int inject_vmce(struct domain *d, int vcpu) |
406 | 0 | { |
407 | 0 | struct vcpu *v; |
408 | 0 | int ret = -ESRCH; |
409 | 0 |
|
410 | 0 | for_each_vcpu ( d, v ) |
411 | 0 | { |
412 | 0 | if ( vcpu != VMCE_INJECT_BROADCAST && vcpu != v->vcpu_id ) |
413 | 0 | continue; |
414 | 0 |
|
415 | 0 | /* Don't inject to uninitialized VCPU. */ |
416 | 0 | if ( !v->is_initialised ) |
417 | 0 | continue; |
418 | 0 |
|
419 | 0 | if ( (is_hvm_domain(d) || |
420 | 0 | pv_trap_callback_registered(v, TRAP_machine_check)) && |
421 | 0 | !test_and_set_bool(v->mce_pending) ) |
422 | 0 | { |
423 | 0 | mce_printk(MCE_VERBOSE, "MCE: inject vMCE to %pv\n", v); |
424 | 0 | vcpu_kick(v); |
425 | 0 | ret = 0; |
426 | 0 | } |
427 | 0 | else |
428 | 0 | { |
429 | 0 | mce_printk(MCE_QUIET, "Failed to inject vMCE to %pv\n", v); |
430 | 0 | ret = -EBUSY; |
431 | 0 | break; |
432 | 0 | } |
433 | 0 |
|
434 | 0 | if ( vcpu != VMCE_INJECT_BROADCAST ) |
435 | 0 | break; |
436 | 0 | } |
437 | 0 |
|
438 | 0 | return ret; |
439 | 0 | } |
440 | | |
441 | | static int vcpu_fill_mc_msrs(struct vcpu *v, uint64_t mcg_status, |
442 | | uint64_t mci_status, uint64_t mci_addr, |
443 | | uint64_t mci_misc) |
444 | 0 | { |
445 | 0 | if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP ) |
446 | 0 | { |
447 | 0 | mce_printk(MCE_QUIET, "MCE: %pv: guest has not handled previous" |
448 | 0 | " vMCE yet!\n", v); |
449 | 0 | return -EBUSY; |
450 | 0 | } |
451 | 0 |
|
452 | 0 | spin_lock(&v->arch.vmce.lock); |
453 | 0 |
|
454 | 0 | v->arch.vmce.mcg_status = mcg_status; |
455 | 0 | /* |
456 | 0 | * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors |
457 | 0 | * 2. Filter MCi_STATUS MSCOD model specific error code to guest |
458 | 0 | */ |
459 | 0 | v->arch.vmce.bank[1].mci_status = mci_status & MCi_STATUS_MSCOD_MASK; |
460 | 0 | v->arch.vmce.bank[1].mci_addr = mci_addr; |
461 | 0 | v->arch.vmce.bank[1].mci_misc = mci_misc; |
462 | 0 |
|
463 | 0 | spin_unlock(&v->arch.vmce.lock); |
464 | 0 |
|
465 | 0 | return 0; |
466 | 0 | } |
467 | | |
468 | | int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, |
469 | | uint64_t gstatus, int vmce_vcpuid) |
470 | 0 | { |
471 | 0 | struct vcpu *v = d->vcpu[0]; |
472 | 0 | bool broadcast = (vmce_vcpuid == VMCE_INJECT_BROADCAST); |
473 | 0 | int ret, err; |
474 | 0 |
|
475 | 0 | if ( mc_bank->mc_domid == DOMID_INVALID ) |
476 | 0 | return -EINVAL; |
477 | 0 |
|
478 | 0 | if ( broadcast ) |
479 | 0 | gstatus &= ~MCG_STATUS_LMCE; |
480 | 0 | else if ( gstatus & MCG_STATUS_LMCE ) |
481 | 0 | { |
482 | 0 | ASSERT(vmce_vcpuid >= 0 && vmce_vcpuid < d->max_vcpus); |
483 | 0 | v = d->vcpu[vmce_vcpuid]; |
484 | 0 | } |
485 | 0 |
|
486 | 0 | /* |
487 | 0 | * vMCE with the actual error information is injected to vCPU0, |
488 | 0 | * and, if broadcast is required, we choose to inject less severe |
489 | 0 | * vMCEs to other vCPUs. Thus guest can always get the severest |
490 | 0 | * error (i.e. the actual one) on vCPU0. If guest can recover from |
491 | 0 | * the severest error on vCPU0, the less severe errors on other |
492 | 0 | * vCPUs will not prevent guest from recovering on those vCPUs. |
493 | 0 | */ |
494 | 0 | ret = vcpu_fill_mc_msrs(v, gstatus, mc_bank->mc_status, |
495 | 0 | mc_bank->mc_addr, mc_bank->mc_misc); |
496 | 0 | if ( broadcast ) |
497 | 0 | for_each_vcpu ( d, v ) |
498 | 0 | { |
499 | 0 | if ( !v->vcpu_id ) |
500 | 0 | continue; |
501 | 0 | err = vcpu_fill_mc_msrs(v, MCG_STATUS_MCIP | MCG_STATUS_RIPV, |
502 | 0 | 0, 0, 0); |
503 | 0 | if ( err ) |
504 | 0 | ret = err; |
505 | 0 | } |
506 | 0 |
|
507 | 0 | return ret; |
508 | 0 | } |
509 | | |
510 | | /* It's said some ram is setup as mmio_direct for UC cache attribute */ |
511 | 0 | #define P2M_UNMAP_TYPES (p2m_to_mask(p2m_ram_rw) \ |
512 | 0 | | p2m_to_mask(p2m_ram_logdirty) \ |
513 | 0 | | p2m_to_mask(p2m_ram_ro) \ |
514 | 0 | | p2m_to_mask(p2m_mmio_direct)) |
515 | | |
516 | | /* |
517 | | * Currently all CPUs are redenzevous at the MCE softirq handler, no |
518 | | * need to consider paging p2m type |
519 | | * Currently only support HVM guest with EPT paging mode |
520 | | * XXX following situation missed: |
521 | | * PoD, Foreign mapped, Granted, Shared |
522 | | */ |
523 | | int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn) |
524 | 0 | { |
525 | 0 | mfn_t r_mfn; |
526 | 0 | p2m_type_t pt; |
527 | 0 | int rc; |
528 | 0 |
|
529 | 0 | /* Always trust dom0's MCE handler will prevent future access */ |
530 | 0 | if ( is_hardware_domain(d) ) |
531 | 0 | return 0; |
532 | 0 |
|
533 | 0 | if ( !mfn_valid(mfn) ) |
534 | 0 | return -EINVAL; |
535 | 0 |
|
536 | 0 | if ( !is_hvm_domain(d) || !paging_mode_hap(d) ) |
537 | 0 | return -EOPNOTSUPP; |
538 | 0 |
|
539 | 0 | rc = -1; |
540 | 0 | r_mfn = get_gfn_query(d, gfn, &pt); |
541 | 0 | if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES) |
542 | 0 | { |
543 | 0 | ASSERT(mfn_x(r_mfn) == mfn_x(mfn)); |
544 | 0 | rc = p2m_change_type_one(d, gfn, pt, p2m_ram_broken); |
545 | 0 | } |
546 | 0 | put_gfn(d, gfn); |
547 | 0 |
|
548 | 0 | return rc; |
549 | 0 | } |
550 | | |
551 | | int vmce_enable_mca_cap(struct domain *d, uint64_t cap) |
552 | 0 | { |
553 | 0 | struct vcpu *v; |
554 | 0 |
|
555 | 0 | if ( cap & ~XEN_HVM_MCA_CAP_MASK ) |
556 | 0 | return -EINVAL; |
557 | 0 |
|
558 | 0 | if ( cap & XEN_HVM_MCA_CAP_LMCE ) |
559 | 0 | { |
560 | 0 | if ( !lmce_support ) |
561 | 0 | return -EINVAL; |
562 | 0 | for_each_vcpu(d, v) |
563 | 0 | v->arch.vmce.mcg_cap |= MCG_LMCE_P; |
564 | 0 | } |
565 | 0 |
|
566 | 0 | return 0; |
567 | 0 | } |