/root/src/xen/xen/arch/x86/hvm/svm/svm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * svm.c: handling SVM architecture-related VM exits |
3 | | * Copyright (c) 2004, Intel Corporation. |
4 | | * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or modify it |
7 | | * under the terms and conditions of the GNU General Public License, |
8 | | * version 2, as published by the Free Software Foundation. |
9 | | * |
10 | | * This program is distributed in the hope it will be useful, but WITHOUT |
11 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
12 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
13 | | * more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License along with |
16 | | * this program; If not, see <http://www.gnu.org/licenses/>. |
17 | | */ |
18 | | |
19 | | #include <xen/init.h> |
20 | | #include <xen/lib.h> |
21 | | #include <xen/trace.h> |
22 | | #include <xen/sched.h> |
23 | | #include <xen/irq.h> |
24 | | #include <xen/softirq.h> |
25 | | #include <xen/hypercall.h> |
26 | | #include <xen/domain_page.h> |
27 | | #include <xen/xenoprof.h> |
28 | | #include <asm/current.h> |
29 | | #include <asm/io.h> |
30 | | #include <asm/paging.h> |
31 | | #include <asm/p2m.h> |
32 | | #include <asm/mem_sharing.h> |
33 | | #include <asm/regs.h> |
34 | | #include <asm/cpufeature.h> |
35 | | #include <asm/processor.h> |
36 | | #include <asm/amd.h> |
37 | | #include <asm/guest_access.h> |
38 | | #include <asm/debugreg.h> |
39 | | #include <asm/msr.h> |
40 | | #include <asm/i387.h> |
41 | | #include <asm/iocap.h> |
42 | | #include <asm/hvm/emulate.h> |
43 | | #include <asm/hvm/hvm.h> |
44 | | #include <asm/hvm/support.h> |
45 | | #include <asm/hvm/io.h> |
46 | | #include <asm/hvm/emulate.h> |
47 | | #include <asm/hvm/svm/asid.h> |
48 | | #include <asm/hvm/svm/svm.h> |
49 | | #include <asm/hvm/svm/vmcb.h> |
50 | | #include <asm/hvm/svm/emulate.h> |
51 | | #include <asm/hvm/svm/intr.h> |
52 | | #include <asm/hvm/svm/svmdebug.h> |
53 | | #include <asm/hvm/svm/nestedsvm.h> |
54 | | #include <asm/hvm/nestedhvm.h> |
55 | | #include <asm/x86_emulate.h> |
56 | | #include <public/sched.h> |
57 | | #include <asm/hvm/vpt.h> |
58 | | #include <asm/hvm/trace.h> |
59 | | #include <asm/hap.h> |
60 | | #include <asm/apic.h> |
61 | | #include <asm/debugger.h> |
62 | | #include <asm/xstate.h> |
63 | | |
64 | | void svm_asm_do_resume(void); |
65 | | |
66 | | u32 svm_feature_flags; |
67 | | |
68 | | /* Indicates whether guests may use EFER.LMSLE. */ |
69 | | bool_t cpu_has_lmsl; |
70 | | |
71 | | static void svm_update_guest_efer(struct vcpu *); |
72 | | |
73 | | static struct hvm_function_table svm_function_table; |
74 | | |
75 | | /* |
76 | | * Physical addresses of the Host State Area (for hardware) and vmcb (for Xen) |
77 | | * which contains Xen's fs/gs/tr/ldtr and GSBASE/STAR/SYSENTER state when in |
78 | | * guest vcpu context. |
79 | | */ |
80 | | static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, hsa); |
81 | | static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, host_vmcb); |
82 | | |
83 | | static bool_t amd_erratum383_found __read_mostly; |
84 | | |
85 | | /* OSVW bits */ |
86 | | static uint64_t osvw_length, osvw_status; |
87 | | static DEFINE_SPINLOCK(osvw_lock); |
88 | | |
89 | | /* Only crash the guest if the problem originates in kernel mode. */ |
90 | | static void svm_crash_or_fault(struct vcpu *v) |
91 | 0 | { |
92 | 0 | if ( vmcb_get_cpl(v->arch.hvm_svm.vmcb) ) |
93 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
94 | 0 | else |
95 | 0 | domain_crash(v->domain); |
96 | 0 | } |
97 | | |
98 | | void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len) |
99 | 0 | { |
100 | 0 | struct vcpu *curr = current; |
101 | 0 |
|
102 | 0 | if ( unlikely(inst_len == 0) ) |
103 | 0 | return; |
104 | 0 |
|
105 | 0 | if ( unlikely(inst_len > MAX_INST_LEN) ) |
106 | 0 | { |
107 | 0 | gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len); |
108 | 0 | svm_crash_or_fault(curr); |
109 | 0 | return; |
110 | 0 | } |
111 | 0 |
|
112 | 0 | ASSERT(regs == guest_cpu_user_regs()); |
113 | 0 |
|
114 | 0 | regs->rip += inst_len; |
115 | 0 | regs->eflags &= ~X86_EFLAGS_RF; |
116 | 0 |
|
117 | 0 | curr->arch.hvm_svm.vmcb->interrupt_shadow = 0; |
118 | 0 |
|
119 | 0 | if ( regs->eflags & X86_EFLAGS_TF ) |
120 | 0 | hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); |
121 | 0 | } |
122 | | |
123 | | static void svm_cpu_down(void) |
124 | 0 | { |
125 | 0 | write_efer(read_efer() & ~EFER_SVME); |
126 | 0 | } |
127 | | |
128 | | unsigned long * |
129 | | svm_msrbit(unsigned long *msr_bitmap, uint32_t msr) |
130 | 0 | { |
131 | 0 | unsigned long *msr_bit = NULL; |
132 | 0 |
|
133 | 0 | /* |
134 | 0 | * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address). |
135 | 0 | */ |
136 | 0 | if ( msr <= 0x1fff ) |
137 | 0 | msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG; |
138 | 0 | else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) |
139 | 0 | msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG; |
140 | 0 | else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) ) |
141 | 0 | msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG; |
142 | 0 |
|
143 | 0 | return msr_bit; |
144 | 0 | } |
145 | | |
146 | | void svm_intercept_msr(struct vcpu *v, uint32_t msr, int flags) |
147 | 0 | { |
148 | 0 | unsigned long *msr_bit; |
149 | 0 |
|
150 | 0 | msr_bit = svm_msrbit(v->arch.hvm_svm.msrpm, msr); |
151 | 0 | BUG_ON(msr_bit == NULL); |
152 | 0 | msr &= 0x1fff; |
153 | 0 |
|
154 | 0 | if ( flags & MSR_INTERCEPT_READ ) |
155 | 0 | __set_bit(msr * 2, msr_bit); |
156 | 0 | else |
157 | 0 | __clear_bit(msr * 2, msr_bit); |
158 | 0 |
|
159 | 0 | if ( flags & MSR_INTERCEPT_WRITE ) |
160 | 0 | __set_bit(msr * 2 + 1, msr_bit); |
161 | 0 | else |
162 | 0 | __clear_bit(msr * 2 + 1, msr_bit); |
163 | 0 | } |
164 | | |
165 | | static void svm_save_dr(struct vcpu *v) |
166 | 0 | { |
167 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
168 | 0 | unsigned int flag_dr_dirty = v->arch.hvm_vcpu.flag_dr_dirty; |
169 | 0 |
|
170 | 0 | if ( !flag_dr_dirty ) |
171 | 0 | return; |
172 | 0 |
|
173 | 0 | /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */ |
174 | 0 | v->arch.hvm_vcpu.flag_dr_dirty = 0; |
175 | 0 | vmcb_set_dr_intercepts(vmcb, ~0u); |
176 | 0 |
|
177 | 0 | if ( v->domain->arch.cpuid->extd.dbext ) |
178 | 0 | { |
179 | 0 | svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW); |
180 | 0 | svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_RW); |
181 | 0 | svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_RW); |
182 | 0 | svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_RW); |
183 | 0 |
|
184 | 0 | rdmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]); |
185 | 0 | rdmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]); |
186 | 0 | rdmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]); |
187 | 0 | rdmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]); |
188 | 0 | } |
189 | 0 |
|
190 | 0 | v->arch.debugreg[0] = read_debugreg(0); |
191 | 0 | v->arch.debugreg[1] = read_debugreg(1); |
192 | 0 | v->arch.debugreg[2] = read_debugreg(2); |
193 | 0 | v->arch.debugreg[3] = read_debugreg(3); |
194 | 0 | v->arch.debugreg[6] = vmcb_get_dr6(vmcb); |
195 | 0 | v->arch.debugreg[7] = vmcb_get_dr7(vmcb); |
196 | 0 | } |
197 | | |
198 | | static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v) |
199 | 0 | { |
200 | 0 | if ( v->arch.hvm_vcpu.flag_dr_dirty ) |
201 | 0 | return; |
202 | 0 |
|
203 | 0 | v->arch.hvm_vcpu.flag_dr_dirty = 1; |
204 | 0 | vmcb_set_dr_intercepts(vmcb, 0); |
205 | 0 |
|
206 | 0 | ASSERT(v == current); |
207 | 0 |
|
208 | 0 | if ( v->domain->arch.cpuid->extd.dbext ) |
209 | 0 | { |
210 | 0 | svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE); |
211 | 0 | svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE); |
212 | 0 | svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE); |
213 | 0 | svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE); |
214 | 0 |
|
215 | 0 | wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]); |
216 | 0 | wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]); |
217 | 0 | wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]); |
218 | 0 | wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]); |
219 | 0 | } |
220 | 0 |
|
221 | 0 | write_debugreg(0, v->arch.debugreg[0]); |
222 | 0 | write_debugreg(1, v->arch.debugreg[1]); |
223 | 0 | write_debugreg(2, v->arch.debugreg[2]); |
224 | 0 | write_debugreg(3, v->arch.debugreg[3]); |
225 | 0 | vmcb_set_dr6(vmcb, v->arch.debugreg[6]); |
226 | 0 | vmcb_set_dr7(vmcb, v->arch.debugreg[7]); |
227 | 0 | } |
228 | | |
229 | | /* |
230 | | * DR7 is saved and restored on every vmexit. Other debug registers only |
231 | | * need to be restored if their value is going to affect execution -- i.e., |
232 | | * if one of the breakpoints is enabled. So mask out all bits that don't |
233 | | * enable some breakpoint functionality. |
234 | | */ |
235 | | static void svm_restore_dr(struct vcpu *v) |
236 | 0 | { |
237 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
238 | 0 | if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) ) |
239 | 0 | __restore_debug_registers(vmcb, v); |
240 | 0 | } |
241 | | |
242 | | static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c) |
243 | 0 | { |
244 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
245 | 0 |
|
246 | 0 | c->cr0 = v->arch.hvm_vcpu.guest_cr[0]; |
247 | 0 | c->cr2 = v->arch.hvm_vcpu.guest_cr[2]; |
248 | 0 | c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; |
249 | 0 | c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; |
250 | 0 |
|
251 | 0 | c->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs; |
252 | 0 | c->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp; |
253 | 0 | c->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip; |
254 | 0 |
|
255 | 0 | c->pending_event = 0; |
256 | 0 | c->error_code = 0; |
257 | 0 | if ( vmcb->eventinj.fields.v && |
258 | 0 | hvm_event_needs_reinjection(vmcb->eventinj.fields.type, |
259 | 0 | vmcb->eventinj.fields.vector) ) |
260 | 0 | { |
261 | 0 | c->pending_event = (uint32_t)vmcb->eventinj.bytes; |
262 | 0 | c->error_code = vmcb->eventinj.fields.errorcode; |
263 | 0 | } |
264 | 0 |
|
265 | 0 | return 1; |
266 | 0 | } |
267 | | |
268 | | static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) |
269 | 0 | { |
270 | 0 | struct page_info *page = NULL; |
271 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
272 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); |
273 | 0 |
|
274 | 0 | if ( c->pending_valid ) |
275 | 0 | { |
276 | 0 | if ( (c->pending_type == 1) || (c->pending_type > 4) || |
277 | 0 | (c->pending_reserved != 0) ) |
278 | 0 | { |
279 | 0 | dprintk(XENLOG_ERR, "%pv: Invalid pending event %#"PRIx32"\n", |
280 | 0 | v, c->pending_event); |
281 | 0 | return -EINVAL; |
282 | 0 | } |
283 | 0 |
|
284 | 0 | if ( c->pending_error_valid && |
285 | 0 | c->error_code != (uint16_t)c->error_code ) |
286 | 0 | { |
287 | 0 | dprintk(XENLOG_ERR, "%pv: Invalid error code %#"PRIx32"\n", |
288 | 0 | v, c->error_code); |
289 | 0 | return -EINVAL; |
290 | 0 | } |
291 | 0 | } |
292 | 0 |
|
293 | 0 | if ( !paging_mode_hap(v->domain) ) |
294 | 0 | { |
295 | 0 | if ( c->cr0 & X86_CR0_PG ) |
296 | 0 | { |
297 | 0 | page = get_page_from_gfn(v->domain, c->cr3 >> PAGE_SHIFT, |
298 | 0 | NULL, P2M_ALLOC); |
299 | 0 | if ( !page ) |
300 | 0 | { |
301 | 0 | gdprintk(XENLOG_ERR, "Invalid CR3 value=%#"PRIx64"\n", |
302 | 0 | c->cr3); |
303 | 0 | return -EINVAL; |
304 | 0 | } |
305 | 0 | } |
306 | 0 |
|
307 | 0 | if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG ) |
308 | 0 | put_page(pagetable_get_page(v->arch.guest_table)); |
309 | 0 |
|
310 | 0 | v->arch.guest_table = |
311 | 0 | page ? pagetable_from_page(page) : pagetable_null(); |
312 | 0 | } |
313 | 0 |
|
314 | 0 | v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; |
315 | 0 | v->arch.hvm_vcpu.guest_cr[2] = c->cr2; |
316 | 0 | v->arch.hvm_vcpu.guest_cr[3] = c->cr3; |
317 | 0 | v->arch.hvm_vcpu.guest_cr[4] = c->cr4; |
318 | 0 | svm_update_guest_cr(v, 0); |
319 | 0 | svm_update_guest_cr(v, 2); |
320 | 0 | svm_update_guest_cr(v, 4); |
321 | 0 |
|
322 | 0 | /* Load sysenter MSRs into both VMCB save area and VCPU fields. */ |
323 | 0 | vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs; |
324 | 0 | vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = c->sysenter_esp; |
325 | 0 | vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = c->sysenter_eip; |
326 | 0 | |
327 | 0 | if ( paging_mode_hap(v->domain) ) |
328 | 0 | { |
329 | 0 | vmcb_set_np_enable(vmcb, 1); |
330 | 0 | vmcb_set_g_pat(vmcb, MSR_IA32_CR_PAT_RESET /* guest PAT */); |
331 | 0 | vmcb_set_h_cr3(vmcb, pagetable_get_paddr(p2m_get_pagetable(p2m))); |
332 | 0 | } |
333 | 0 |
|
334 | 0 | if ( c->pending_valid && |
335 | 0 | hvm_event_needs_reinjection(c->pending_type, c->pending_vector) ) |
336 | 0 | { |
337 | 0 | gdprintk(XENLOG_INFO, "Re-injecting %#"PRIx32", %#"PRIx32"\n", |
338 | 0 | c->pending_event, c->error_code); |
339 | 0 | vmcb->eventinj.bytes = c->pending_event; |
340 | 0 | vmcb->eventinj.fields.errorcode = c->error_code; |
341 | 0 | } |
342 | 0 | else |
343 | 0 | vmcb->eventinj.bytes = 0; |
344 | 0 |
|
345 | 0 | vmcb->cleanbits.bytes = 0; |
346 | 0 | paging_update_paging_modes(v); |
347 | 0 |
|
348 | 0 | return 0; |
349 | 0 | } |
350 | | |
351 | | |
352 | | static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data) |
353 | 0 | { |
354 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
355 | 0 |
|
356 | 0 | data->shadow_gs = vmcb->kerngsbase; |
357 | 0 | data->msr_lstar = vmcb->lstar; |
358 | 0 | data->msr_star = vmcb->star; |
359 | 0 | data->msr_cstar = vmcb->cstar; |
360 | 0 | data->msr_syscall_mask = vmcb->sfmask; |
361 | 0 | data->msr_efer = v->arch.hvm_vcpu.guest_efer; |
362 | 0 | data->msr_flags = 0; |
363 | 0 | } |
364 | | |
365 | | |
366 | | static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data) |
367 | 0 | { |
368 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
369 | 0 |
|
370 | 0 | vmcb->kerngsbase = data->shadow_gs; |
371 | 0 | vmcb->lstar = data->msr_lstar; |
372 | 0 | vmcb->star = data->msr_star; |
373 | 0 | vmcb->cstar = data->msr_cstar; |
374 | 0 | vmcb->sfmask = data->msr_syscall_mask; |
375 | 0 | v->arch.hvm_vcpu.guest_efer = data->msr_efer; |
376 | 0 | svm_update_guest_efer(v); |
377 | 0 | } |
378 | | |
379 | | static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt) |
380 | 0 | { |
381 | 0 | svm_save_cpu_state(v, ctxt); |
382 | 0 | svm_vmcb_save(v, ctxt); |
383 | 0 | } |
384 | | |
385 | | static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt) |
386 | 0 | { |
387 | 0 | svm_load_cpu_state(v, ctxt); |
388 | 0 | if (svm_vmcb_restore(v, ctxt)) { |
389 | 0 | gdprintk(XENLOG_ERR, "svm_vmcb restore failed!\n"); |
390 | 0 | domain_crash(v->domain); |
391 | 0 | return -EINVAL; |
392 | 0 | } |
393 | 0 |
|
394 | 0 | return 0; |
395 | 0 | } |
396 | | |
397 | | static unsigned int __init svm_init_msr(void) |
398 | 0 | { |
399 | 0 | return boot_cpu_has(X86_FEATURE_DBEXT) ? 4 : 0; |
400 | 0 | } |
401 | | |
402 | | static void svm_save_msr(struct vcpu *v, struct hvm_msr *ctxt) |
403 | 0 | { |
404 | 0 | if ( boot_cpu_has(X86_FEATURE_DBEXT) ) |
405 | 0 | { |
406 | 0 | ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[0]; |
407 | 0 | if ( ctxt->msr[ctxt->count].val ) |
408 | 0 | ctxt->msr[ctxt->count++].index = MSR_AMD64_DR0_ADDRESS_MASK; |
409 | 0 |
|
410 | 0 | ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[1]; |
411 | 0 | if ( ctxt->msr[ctxt->count].val ) |
412 | 0 | ctxt->msr[ctxt->count++].index = MSR_AMD64_DR1_ADDRESS_MASK; |
413 | 0 |
|
414 | 0 | ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[2]; |
415 | 0 | if ( ctxt->msr[ctxt->count].val ) |
416 | 0 | ctxt->msr[ctxt->count++].index = MSR_AMD64_DR2_ADDRESS_MASK; |
417 | 0 |
|
418 | 0 | ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[3]; |
419 | 0 | if ( ctxt->msr[ctxt->count].val ) |
420 | 0 | ctxt->msr[ctxt->count++].index = MSR_AMD64_DR3_ADDRESS_MASK; |
421 | 0 | } |
422 | 0 | } |
423 | | |
424 | | static int svm_load_msr(struct vcpu *v, struct hvm_msr *ctxt) |
425 | 0 | { |
426 | 0 | unsigned int i, idx; |
427 | 0 | int err = 0; |
428 | 0 |
|
429 | 0 | for ( i = 0; i < ctxt->count; ++i ) |
430 | 0 | { |
431 | 0 | switch ( idx = ctxt->msr[i].index ) |
432 | 0 | { |
433 | 0 | case MSR_AMD64_DR0_ADDRESS_MASK: |
434 | 0 | if ( !boot_cpu_has(X86_FEATURE_DBEXT) ) |
435 | 0 | err = -ENXIO; |
436 | 0 | else if ( ctxt->msr[i].val >> 32 ) |
437 | 0 | err = -EDOM; |
438 | 0 | else |
439 | 0 | v->arch.hvm_svm.dr_mask[0] = ctxt->msr[i].val; |
440 | 0 | break; |
441 | 0 |
|
442 | 0 | case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK: |
443 | 0 | if ( !boot_cpu_has(X86_FEATURE_DBEXT) ) |
444 | 0 | err = -ENXIO; |
445 | 0 | else if ( ctxt->msr[i].val >> 32 ) |
446 | 0 | err = -EDOM; |
447 | 0 | else |
448 | 0 | v->arch.hvm_svm.dr_mask[idx - MSR_AMD64_DR1_ADDRESS_MASK + 1] = |
449 | 0 | ctxt->msr[i].val; |
450 | 0 | break; |
451 | 0 |
|
452 | 0 | default: |
453 | 0 | continue; |
454 | 0 | } |
455 | 0 | if ( err ) |
456 | 0 | break; |
457 | 0 | ctxt->msr[i]._rsvd = 1; |
458 | 0 | } |
459 | 0 |
|
460 | 0 | return err; |
461 | 0 | } |
462 | | |
463 | | static void svm_fpu_enter(struct vcpu *v) |
464 | 0 | { |
465 | 0 | struct vmcb_struct *n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx; |
466 | 0 |
|
467 | 0 | vcpu_restore_fpu_lazy(v); |
468 | 0 | vmcb_set_exception_intercepts( |
469 | 0 | n1vmcb, |
470 | 0 | vmcb_get_exception_intercepts(n1vmcb) & ~(1U << TRAP_no_device)); |
471 | 0 | } |
472 | | |
473 | | static void svm_fpu_leave(struct vcpu *v) |
474 | 0 | { |
475 | 0 | struct vmcb_struct *n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx; |
476 | 0 |
|
477 | 0 | ASSERT(!v->fpu_dirtied); |
478 | 0 | ASSERT(read_cr0() & X86_CR0_TS); |
479 | 0 |
|
480 | 0 | /* |
481 | 0 | * If the guest does not have TS enabled then we must cause and handle an |
482 | 0 | * exception on first use of the FPU. If the guest *does* have TS enabled |
483 | 0 | * then this is not necessary: no FPU activity can occur until the guest |
484 | 0 | * clears CR0.TS, and we will initialise the FPU when that happens. |
485 | 0 | */ |
486 | 0 | if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) |
487 | 0 | { |
488 | 0 | vmcb_set_exception_intercepts( |
489 | 0 | n1vmcb, |
490 | 0 | vmcb_get_exception_intercepts(n1vmcb) | (1U << TRAP_no_device)); |
491 | 0 | vmcb_set_cr0(n1vmcb, vmcb_get_cr0(n1vmcb) | X86_CR0_TS); |
492 | 0 | } |
493 | 0 | } |
494 | | |
495 | | static unsigned int svm_get_interrupt_shadow(struct vcpu *v) |
496 | 0 | { |
497 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
498 | 0 | unsigned int intr_shadow = 0; |
499 | 0 |
|
500 | 0 | if ( vmcb->interrupt_shadow ) |
501 | 0 | intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI; |
502 | 0 |
|
503 | 0 | if ( vmcb_get_general1_intercepts(vmcb) & GENERAL1_INTERCEPT_IRET ) |
504 | 0 | intr_shadow |= HVM_INTR_SHADOW_NMI; |
505 | 0 |
|
506 | 0 | return intr_shadow; |
507 | 0 | } |
508 | | |
509 | | static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow) |
510 | 0 | { |
511 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
512 | 0 | u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb); |
513 | 0 |
|
514 | 0 | vmcb->interrupt_shadow = |
515 | 0 | !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI)); |
516 | 0 |
|
517 | 0 | general1_intercepts &= ~GENERAL1_INTERCEPT_IRET; |
518 | 0 | if ( intr_shadow & HVM_INTR_SHADOW_NMI ) |
519 | 0 | general1_intercepts |= GENERAL1_INTERCEPT_IRET; |
520 | 0 | vmcb_set_general1_intercepts(vmcb, general1_intercepts); |
521 | 0 | } |
522 | | |
523 | | static int svm_guest_x86_mode(struct vcpu *v) |
524 | 0 | { |
525 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
526 | 0 |
|
527 | 0 | if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) ) |
528 | 0 | return 0; |
529 | 0 | if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) ) |
530 | 0 | return 1; |
531 | 0 | if ( hvm_long_mode_active(v) && likely(vmcb->cs.l) ) |
532 | 0 | return 8; |
533 | 0 | return likely(vmcb->cs.db) ? 4 : 2; |
534 | 0 | } |
535 | | |
536 | | void svm_update_guest_cr(struct vcpu *v, unsigned int cr) |
537 | 0 | { |
538 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
539 | 0 | uint64_t value; |
540 | 0 |
|
541 | 0 | switch ( cr ) |
542 | 0 | { |
543 | 0 | case 0: { |
544 | 0 | unsigned long hw_cr0_mask = 0; |
545 | 0 |
|
546 | 0 | if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) |
547 | 0 | { |
548 | 0 | if ( v != current ) |
549 | 0 | hw_cr0_mask |= X86_CR0_TS; |
550 | 0 | else if ( vmcb_get_cr0(vmcb) & X86_CR0_TS ) |
551 | 0 | svm_fpu_enter(v); |
552 | 0 | } |
553 | 0 |
|
554 | 0 | value = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; |
555 | 0 | if ( !paging_mode_hap(v->domain) ) |
556 | 0 | value |= X86_CR0_PG | X86_CR0_WP; |
557 | 0 | vmcb_set_cr0(vmcb, value); |
558 | 0 | break; |
559 | 0 | } |
560 | 0 | case 2: |
561 | 0 | vmcb_set_cr2(vmcb, v->arch.hvm_vcpu.guest_cr[2]); |
562 | 0 | break; |
563 | 0 | case 3: |
564 | 0 | vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]); |
565 | 0 | if ( !nestedhvm_enabled(v->domain) ) |
566 | 0 | hvm_asid_flush_vcpu(v); |
567 | 0 | else if ( nestedhvm_vmswitch_in_progress(v) ) |
568 | 0 | ; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */ |
569 | 0 | else |
570 | 0 | hvm_asid_flush_vcpu_asid( |
571 | 0 | nestedhvm_vcpu_in_guestmode(v) |
572 | 0 | ? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid); |
573 | 0 | break; |
574 | 0 | case 4: |
575 | 0 | value = HVM_CR4_HOST_MASK; |
576 | 0 | if ( paging_mode_hap(v->domain) ) |
577 | 0 | value &= ~X86_CR4_PAE; |
578 | 0 | value |= v->arch.hvm_vcpu.guest_cr[4]; |
579 | 0 |
|
580 | 0 | if ( !hvm_paging_enabled(v) ) |
581 | 0 | { |
582 | 0 | /* |
583 | 0 | * When the guest thinks paging is disabled, Xen may need to hide |
584 | 0 | * the effects of shadow paging, as hardware runs with the host |
585 | 0 | * paging settings, rather than the guests settings. |
586 | 0 | * |
587 | 0 | * Without CR0.PG, all memory accesses are user mode, so |
588 | 0 | * _PAGE_USER must be set in the shadow pagetables for guest |
589 | 0 | * userspace to function. This in turn trips up guest supervisor |
590 | 0 | * mode if SMEP/SMAP are left active in context. They wouldn't |
591 | 0 | * have any effect if paging was actually disabled, so hide them |
592 | 0 | * behind the back of the guest. |
593 | 0 | */ |
594 | 0 | value &= ~(X86_CR4_SMEP | X86_CR4_SMAP); |
595 | 0 | } |
596 | 0 |
|
597 | 0 | vmcb_set_cr4(vmcb, value); |
598 | 0 | break; |
599 | 0 | default: |
600 | 0 | BUG(); |
601 | 0 | } |
602 | 0 | } |
603 | | |
604 | | static void svm_update_guest_efer(struct vcpu *v) |
605 | 0 | { |
606 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
607 | 0 | bool_t lma = !!(v->arch.hvm_vcpu.guest_efer & EFER_LMA); |
608 | 0 | uint64_t new_efer; |
609 | 0 |
|
610 | 0 | new_efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME; |
611 | 0 | if ( lma ) |
612 | 0 | new_efer |= EFER_LME; |
613 | 0 | vmcb_set_efer(vmcb, new_efer); |
614 | 0 | } |
615 | | |
616 | | static void svm_update_guest_vendor(struct vcpu *v) |
617 | 0 | { |
618 | 0 | struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; |
619 | 0 | struct vmcb_struct *vmcb = arch_svm->vmcb; |
620 | 0 | u32 bitmap = vmcb_get_exception_intercepts(vmcb); |
621 | 0 |
|
622 | 0 | if ( opt_hvm_fep || |
623 | 0 | (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) |
624 | 0 | bitmap |= (1U << TRAP_invalid_op); |
625 | 0 | else |
626 | 0 | bitmap &= ~(1U << TRAP_invalid_op); |
627 | 0 |
|
628 | 0 | vmcb_set_exception_intercepts(vmcb, bitmap); |
629 | 0 | } |
630 | | |
631 | | static void svm_sync_vmcb(struct vcpu *v) |
632 | 0 | { |
633 | 0 | struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; |
634 | 0 |
|
635 | 0 | if ( arch_svm->vmcb_in_sync ) |
636 | 0 | return; |
637 | 0 |
|
638 | 0 | arch_svm->vmcb_in_sync = 1; |
639 | 0 |
|
640 | 0 | svm_vmsave(arch_svm->vmcb); |
641 | 0 | } |
642 | | |
643 | | static unsigned int svm_get_cpl(struct vcpu *v) |
644 | 0 | { |
645 | 0 | return vmcb_get_cpl(v->arch.hvm_svm.vmcb); |
646 | 0 | } |
647 | | |
648 | | static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg, |
649 | | struct segment_register *reg) |
650 | 0 | { |
651 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
652 | 0 |
|
653 | 0 | ASSERT((v == current) || !vcpu_runnable(v)); |
654 | 0 |
|
655 | 0 | switch ( seg ) |
656 | 0 | { |
657 | 0 | case x86_seg_fs ... x86_seg_gs: |
658 | 0 | svm_sync_vmcb(v); |
659 | 0 |
|
660 | 0 | /* Fallthrough. */ |
661 | 0 | case x86_seg_es ... x86_seg_ds: |
662 | 0 | *reg = vmcb->sreg[seg]; |
663 | 0 |
|
664 | 0 | if ( seg == x86_seg_ss ) |
665 | 0 | reg->dpl = vmcb_get_cpl(vmcb); |
666 | 0 | break; |
667 | 0 |
|
668 | 0 | case x86_seg_tr: |
669 | 0 | svm_sync_vmcb(v); |
670 | 0 | *reg = vmcb->tr; |
671 | 0 | break; |
672 | 0 |
|
673 | 0 | case x86_seg_gdtr: |
674 | 0 | *reg = vmcb->gdtr; |
675 | 0 | break; |
676 | 0 |
|
677 | 0 | case x86_seg_idtr: |
678 | 0 | *reg = vmcb->idtr; |
679 | 0 | break; |
680 | 0 |
|
681 | 0 | case x86_seg_ldtr: |
682 | 0 | svm_sync_vmcb(v); |
683 | 0 | *reg = vmcb->ldtr; |
684 | 0 | break; |
685 | 0 |
|
686 | 0 | default: |
687 | 0 | ASSERT_UNREACHABLE(); |
688 | 0 | domain_crash(v->domain); |
689 | 0 | *reg = (struct segment_register){}; |
690 | 0 | } |
691 | 0 | } |
692 | | |
693 | | static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg, |
694 | | struct segment_register *reg) |
695 | 0 | { |
696 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
697 | 0 | bool sync = false; |
698 | 0 |
|
699 | 0 | ASSERT((v == current) || !vcpu_runnable(v)); |
700 | 0 |
|
701 | 0 | switch ( seg ) |
702 | 0 | { |
703 | 0 | case x86_seg_cs: |
704 | 0 | case x86_seg_ds: |
705 | 0 | case x86_seg_es: |
706 | 0 | case x86_seg_ss: /* cpl */ |
707 | 0 | vmcb->cleanbits.fields.seg = 0; |
708 | 0 | break; |
709 | 0 |
|
710 | 0 | case x86_seg_gdtr: |
711 | 0 | case x86_seg_idtr: |
712 | 0 | vmcb->cleanbits.fields.dt = 0; |
713 | 0 | break; |
714 | 0 |
|
715 | 0 | case x86_seg_fs: |
716 | 0 | case x86_seg_gs: |
717 | 0 | case x86_seg_tr: |
718 | 0 | case x86_seg_ldtr: |
719 | 0 | sync = (v == current); |
720 | 0 | break; |
721 | 0 |
|
722 | 0 | default: |
723 | 0 | ASSERT_UNREACHABLE(); |
724 | 0 | domain_crash(v->domain); |
725 | 0 | return; |
726 | 0 | } |
727 | 0 |
|
728 | 0 | if ( sync ) |
729 | 0 | svm_sync_vmcb(v); |
730 | 0 |
|
731 | 0 | switch ( seg ) |
732 | 0 | { |
733 | 0 | case x86_seg_ss: |
734 | 0 | vmcb_set_cpl(vmcb, reg->dpl); |
735 | 0 |
|
736 | 0 | /* Fallthrough */ |
737 | 0 | case x86_seg_es ... x86_seg_cs: |
738 | 0 | case x86_seg_ds ... x86_seg_gs: |
739 | 0 | vmcb->sreg[seg] = *reg; |
740 | 0 | break; |
741 | 0 |
|
742 | 0 | case x86_seg_tr: |
743 | 0 | vmcb->tr = *reg; |
744 | 0 | break; |
745 | 0 |
|
746 | 0 | case x86_seg_gdtr: |
747 | 0 | vmcb->gdtr.base = reg->base; |
748 | 0 | vmcb->gdtr.limit = reg->limit; |
749 | 0 | break; |
750 | 0 |
|
751 | 0 | case x86_seg_idtr: |
752 | 0 | vmcb->idtr.base = reg->base; |
753 | 0 | vmcb->idtr.limit = reg->limit; |
754 | 0 | break; |
755 | 0 |
|
756 | 0 | case x86_seg_ldtr: |
757 | 0 | vmcb->ldtr = *reg; |
758 | 0 | break; |
759 | 0 |
|
760 | 0 | case x86_seg_none: |
761 | 0 | ASSERT_UNREACHABLE(); |
762 | 0 | break; |
763 | 0 | } |
764 | 0 |
|
765 | 0 | if ( sync ) |
766 | 0 | svm_vmload(vmcb); |
767 | 0 | } |
768 | | |
769 | | static unsigned long svm_get_shadow_gs_base(struct vcpu *v) |
770 | 0 | { |
771 | 0 | return v->arch.hvm_svm.vmcb->kerngsbase; |
772 | 0 | } |
773 | | |
774 | | static int svm_set_guest_pat(struct vcpu *v, u64 gpat) |
775 | 0 | { |
776 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
777 | 0 |
|
778 | 0 | if ( !paging_mode_hap(v->domain) ) |
779 | 0 | return 0; |
780 | 0 |
|
781 | 0 | vmcb_set_g_pat(vmcb, gpat); |
782 | 0 | return 1; |
783 | 0 | } |
784 | | |
785 | | static int svm_get_guest_pat(struct vcpu *v, u64 *gpat) |
786 | 0 | { |
787 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
788 | 0 |
|
789 | 0 | if ( !paging_mode_hap(v->domain) ) |
790 | 0 | return 0; |
791 | 0 |
|
792 | 0 | *gpat = vmcb_get_g_pat(vmcb); |
793 | 0 | return 1; |
794 | 0 | } |
795 | | |
796 | | static uint64_t scale_tsc(uint64_t host_tsc, uint64_t ratio) |
797 | 0 | { |
798 | 0 | uint64_t mult, frac, scaled_host_tsc; |
799 | 0 |
|
800 | 0 | if ( ratio == DEFAULT_TSC_RATIO ) |
801 | 0 | return host_tsc; |
802 | 0 |
|
803 | 0 | /* |
804 | 0 | * Suppose the most significant 32 bits of host_tsc and ratio are |
805 | 0 | * tsc_h and mult, and the least 32 bits of them are tsc_l and frac, |
806 | 0 | * then |
807 | 0 | * host_tsc * ratio * 2^-32 |
808 | 0 | * = host_tsc * (mult * 2^32 + frac) * 2^-32 |
809 | 0 | * = host_tsc * mult + (tsc_h * 2^32 + tsc_l) * frac * 2^-32 |
810 | 0 | * = host_tsc * mult + tsc_h * frac + ((tsc_l * frac) >> 32) |
811 | 0 | * |
812 | 0 | * Multiplications in the last two terms are between 32-bit integers, |
813 | 0 | * so both of them can fit in 64-bit integers. |
814 | 0 | * |
815 | 0 | * Because mult is usually less than 10 in practice, it's very rare |
816 | 0 | * that host_tsc * mult can overflow a 64-bit integer. |
817 | 0 | */ |
818 | 0 | mult = ratio >> 32; |
819 | 0 | frac = ratio & ((1ULL << 32) - 1); |
820 | 0 | scaled_host_tsc = host_tsc * mult; |
821 | 0 | scaled_host_tsc += (host_tsc >> 32) * frac; |
822 | 0 | scaled_host_tsc += ((host_tsc & ((1ULL << 32) - 1)) * frac) >> 32; |
823 | 0 |
|
824 | 0 | return scaled_host_tsc; |
825 | 0 | } |
826 | | |
827 | | static uint64_t svm_get_tsc_offset(uint64_t host_tsc, uint64_t guest_tsc, |
828 | | uint64_t ratio) |
829 | 0 | { |
830 | 0 | return guest_tsc - scale_tsc(host_tsc, ratio); |
831 | 0 | } |
832 | | |
833 | | static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc) |
834 | 0 | { |
835 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
836 | 0 | struct vmcb_struct *n1vmcb, *n2vmcb; |
837 | 0 | uint64_t n2_tsc_offset = 0; |
838 | 0 | struct domain *d = v->domain; |
839 | 0 |
|
840 | 0 | if ( !nestedhvm_enabled(d) ) { |
841 | 0 | vmcb_set_tsc_offset(vmcb, offset); |
842 | 0 | return; |
843 | 0 | } |
844 | 0 |
|
845 | 0 | n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx; |
846 | 0 | n2vmcb = vcpu_nestedhvm(v).nv_n2vmcx; |
847 | 0 |
|
848 | 0 | if ( nestedhvm_vcpu_in_guestmode(v) ) { |
849 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
850 | 0 |
|
851 | 0 | n2_tsc_offset = vmcb_get_tsc_offset(n2vmcb) - |
852 | 0 | vmcb_get_tsc_offset(n1vmcb); |
853 | 0 | if ( svm->ns_tscratio != DEFAULT_TSC_RATIO ) { |
854 | 0 | uint64_t guest_tsc = hvm_get_guest_tsc_fixed(v, at_tsc); |
855 | 0 |
|
856 | 0 | n2_tsc_offset = svm_get_tsc_offset(guest_tsc, |
857 | 0 | guest_tsc + n2_tsc_offset, |
858 | 0 | svm->ns_tscratio); |
859 | 0 | } |
860 | 0 | vmcb_set_tsc_offset(n1vmcb, offset); |
861 | 0 | } |
862 | 0 |
|
863 | 0 | vmcb_set_tsc_offset(vmcb, offset + n2_tsc_offset); |
864 | 0 | } |
865 | | |
866 | | static void svm_set_rdtsc_exiting(struct vcpu *v, bool_t enable) |
867 | 0 | { |
868 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
869 | 0 | u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb); |
870 | 0 | u32 general2_intercepts = vmcb_get_general2_intercepts(vmcb); |
871 | 0 |
|
872 | 0 | general1_intercepts &= ~GENERAL1_INTERCEPT_RDTSC; |
873 | 0 | general2_intercepts &= ~GENERAL2_INTERCEPT_RDTSCP; |
874 | 0 |
|
875 | 0 | if ( enable ) |
876 | 0 | { |
877 | 0 | general1_intercepts |= GENERAL1_INTERCEPT_RDTSC; |
878 | 0 | general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP; |
879 | 0 | } |
880 | 0 |
|
881 | 0 | vmcb_set_general1_intercepts(vmcb, general1_intercepts); |
882 | 0 | vmcb_set_general2_intercepts(vmcb, general2_intercepts); |
883 | 0 | } |
884 | | |
885 | | static void svm_set_descriptor_access_exiting(struct vcpu *v, bool enable) |
886 | 0 | { |
887 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
888 | 0 | u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb); |
889 | 0 | u32 mask = GENERAL1_INTERCEPT_IDTR_READ | GENERAL1_INTERCEPT_GDTR_READ |
890 | 0 | | GENERAL1_INTERCEPT_LDTR_READ | GENERAL1_INTERCEPT_TR_READ |
891 | 0 | | GENERAL1_INTERCEPT_IDTR_WRITE | GENERAL1_INTERCEPT_GDTR_WRITE |
892 | 0 | | GENERAL1_INTERCEPT_LDTR_WRITE | GENERAL1_INTERCEPT_TR_WRITE; |
893 | 0 |
|
894 | 0 | if ( enable ) |
895 | 0 | general1_intercepts |= mask; |
896 | 0 | else |
897 | 0 | general1_intercepts &= ~mask; |
898 | 0 |
|
899 | 0 | vmcb_set_general1_intercepts(vmcb, general1_intercepts); |
900 | 0 | } |
901 | | |
902 | | static unsigned int svm_get_insn_bytes(struct vcpu *v, uint8_t *buf) |
903 | 0 | { |
904 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
905 | 0 | unsigned int len = v->arch.hvm_svm.cached_insn_len; |
906 | 0 |
|
907 | 0 | if ( len != 0 ) |
908 | 0 | { |
909 | 0 | /* Latch and clear the cached instruction. */ |
910 | 0 | memcpy(buf, vmcb->guest_ins, MAX_INST_LEN); |
911 | 0 | v->arch.hvm_svm.cached_insn_len = 0; |
912 | 0 | } |
913 | 0 |
|
914 | 0 | return len; |
915 | 0 | } |
916 | | |
917 | | static void svm_init_hypercall_page(struct domain *d, void *hypercall_page) |
918 | 0 | { |
919 | 0 | char *p; |
920 | 0 | int i; |
921 | 0 |
|
922 | 0 | for ( i = 0; i < (PAGE_SIZE / 32); i++ ) |
923 | 0 | { |
924 | 0 | if ( i == __HYPERVISOR_iret ) |
925 | 0 | continue; |
926 | 0 |
|
927 | 0 | p = (char *)(hypercall_page + (i * 32)); |
928 | 0 | *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */ |
929 | 0 | *(u32 *)(p + 1) = i; |
930 | 0 | *(u8 *)(p + 5) = 0x0f; /* vmmcall */ |
931 | 0 | *(u8 *)(p + 6) = 0x01; |
932 | 0 | *(u8 *)(p + 7) = 0xd9; |
933 | 0 | *(u8 *)(p + 8) = 0xc3; /* ret */ |
934 | 0 | } |
935 | 0 |
|
936 | 0 | /* Don't support HYPERVISOR_iret at the moment */ |
937 | 0 | *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */ |
938 | 0 | } |
939 | | |
940 | | static void svm_lwp_interrupt(struct cpu_user_regs *regs) |
941 | 0 | { |
942 | 0 | struct vcpu *curr = current; |
943 | 0 |
|
944 | 0 | ack_APIC_irq(); |
945 | 0 | vlapic_set_irq( |
946 | 0 | vcpu_vlapic(curr), |
947 | 0 | (curr->arch.hvm_svm.guest_lwp_cfg >> 40) & 0xff, |
948 | 0 | 0); |
949 | 0 | } |
950 | | |
951 | | static inline void svm_lwp_save(struct vcpu *v) |
952 | 0 | { |
953 | 0 | /* Don't mess up with other guests. Disable LWP for next VCPU. */ |
954 | 0 | if ( v->arch.hvm_svm.guest_lwp_cfg ) |
955 | 0 | { |
956 | 0 | wrmsrl(MSR_AMD64_LWP_CFG, 0x0); |
957 | 0 | wrmsrl(MSR_AMD64_LWP_CBADDR, 0x0); |
958 | 0 | } |
959 | 0 | } |
960 | | |
961 | | static inline void svm_lwp_load(struct vcpu *v) |
962 | 0 | { |
963 | 0 | /* Only LWP_CFG is reloaded. LWP_CBADDR will be reloaded via xrstor. */ |
964 | 0 | if ( v->arch.hvm_svm.guest_lwp_cfg ) |
965 | 0 | wrmsrl(MSR_AMD64_LWP_CFG, v->arch.hvm_svm.cpu_lwp_cfg); |
966 | 0 | } |
967 | | |
968 | | /* Update LWP_CFG MSR (0xc0000105). Return -1 if error; otherwise returns 0. */ |
969 | | static int svm_update_lwp_cfg(struct vcpu *v, uint64_t msr_content) |
970 | 0 | { |
971 | 0 | uint32_t msr_low; |
972 | 0 | static uint8_t lwp_intr_vector; |
973 | 0 |
|
974 | 0 | if ( xsave_enabled(v) && cpu_has_lwp ) |
975 | 0 | { |
976 | 0 | msr_low = (uint32_t)msr_content; |
977 | 0 | |
978 | 0 | /* generate #GP if guest tries to turn on unsupported features. */ |
979 | 0 | if ( msr_low & ~v->domain->arch.cpuid->extd.raw[0x1c].d ) |
980 | 0 | return -1; |
981 | 0 |
|
982 | 0 | v->arch.hvm_svm.guest_lwp_cfg = msr_content; |
983 | 0 |
|
984 | 0 | /* setup interrupt handler if needed */ |
985 | 0 | if ( (msr_content & 0x80000000) && ((msr_content >> 40) & 0xff) ) |
986 | 0 | { |
987 | 0 | alloc_direct_apic_vector(&lwp_intr_vector, svm_lwp_interrupt); |
988 | 0 | v->arch.hvm_svm.cpu_lwp_cfg = (msr_content & 0xffff00ffffffffffULL) |
989 | 0 | | ((uint64_t)lwp_intr_vector << 40); |
990 | 0 | } |
991 | 0 | else |
992 | 0 | { |
993 | 0 | /* otherwise disable it */ |
994 | 0 | v->arch.hvm_svm.cpu_lwp_cfg = msr_content & 0xffff00ff7fffffffULL; |
995 | 0 | } |
996 | 0 | |
997 | 0 | wrmsrl(MSR_AMD64_LWP_CFG, v->arch.hvm_svm.cpu_lwp_cfg); |
998 | 0 |
|
999 | 0 | /* track nonalzy state if LWP_CFG is non-zero. */ |
1000 | 0 | v->arch.nonlazy_xstate_used = !!(msr_content); |
1001 | 0 | } |
1002 | 0 |
|
1003 | 0 | return 0; |
1004 | 0 | } |
1005 | | |
1006 | | static inline void svm_tsc_ratio_save(struct vcpu *v) |
1007 | 0 | { |
1008 | 0 | /* Other vcpus might not have vtsc enabled. So disable TSC_RATIO here. */ |
1009 | 0 | if ( cpu_has_tsc_ratio && !v->domain->arch.vtsc ) |
1010 | 0 | wrmsrl(MSR_AMD64_TSC_RATIO, DEFAULT_TSC_RATIO); |
1011 | 0 | } |
1012 | | |
1013 | | static inline void svm_tsc_ratio_load(struct vcpu *v) |
1014 | 0 | { |
1015 | 0 | if ( cpu_has_tsc_ratio && !v->domain->arch.vtsc ) |
1016 | 0 | wrmsrl(MSR_AMD64_TSC_RATIO, hvm_tsc_scaling_ratio(v->domain)); |
1017 | 0 | } |
1018 | | |
1019 | | static void svm_ctxt_switch_from(struct vcpu *v) |
1020 | 0 | { |
1021 | 0 | int cpu = smp_processor_id(); |
1022 | 0 |
|
1023 | 0 | /* |
1024 | 0 | * Return early if trying to do a context switch without SVM enabled, |
1025 | 0 | * this can happen when the hypervisor shuts down with HVM guests |
1026 | 0 | * still running. |
1027 | 0 | */ |
1028 | 0 | if ( unlikely((read_efer() & EFER_SVME) == 0) ) |
1029 | 0 | return; |
1030 | 0 |
|
1031 | 0 | svm_fpu_leave(v); |
1032 | 0 |
|
1033 | 0 | svm_save_dr(v); |
1034 | 0 | svm_lwp_save(v); |
1035 | 0 | svm_tsc_ratio_save(v); |
1036 | 0 |
|
1037 | 0 | svm_sync_vmcb(v); |
1038 | 0 | svm_vmload_pa(per_cpu(host_vmcb, cpu)); |
1039 | 0 |
|
1040 | 0 | /* Resume use of ISTs now that the host TR is reinstated. */ |
1041 | 0 | set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); |
1042 | 0 | set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); |
1043 | 0 | set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); |
1044 | 0 | } |
1045 | | |
1046 | | static void svm_ctxt_switch_to(struct vcpu *v) |
1047 | 0 | { |
1048 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1049 | 0 | int cpu = smp_processor_id(); |
1050 | 0 |
|
1051 | 0 | /* |
1052 | 0 | * This is required, because VMRUN does consistency check and some of the |
1053 | 0 | * DOM0 selectors are pointing to invalid GDT locations, and cause AMD |
1054 | 0 | * processors to shutdown. |
1055 | 0 | */ |
1056 | 0 | asm volatile ("mov %0, %%ds; mov %0, %%es; mov %0, %%ss;" :: "r" (0)); |
1057 | 0 |
|
1058 | 0 | /* |
1059 | 0 | * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR. |
1060 | 0 | * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET. |
1061 | 0 | */ |
1062 | 0 | set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); |
1063 | 0 | set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); |
1064 | 0 | set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); |
1065 | 0 |
|
1066 | 0 | svm_restore_dr(v); |
1067 | 0 |
|
1068 | 0 | svm_vmsave_pa(per_cpu(host_vmcb, cpu)); |
1069 | 0 | svm_vmload(vmcb); |
1070 | 0 | vmcb->cleanbits.bytes = 0; |
1071 | 0 | svm_lwp_load(v); |
1072 | 0 | svm_tsc_ratio_load(v); |
1073 | 0 |
|
1074 | 0 | if ( cpu_has_rdtscp ) |
1075 | 0 | wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v)); |
1076 | 0 | } |
1077 | | |
1078 | | static void noreturn svm_do_resume(struct vcpu *v) |
1079 | 0 | { |
1080 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1081 | 0 | bool_t debug_state = v->domain->debugger_attached; |
1082 | 0 | bool_t vcpu_guestmode = 0; |
1083 | 0 | struct vlapic *vlapic = vcpu_vlapic(v); |
1084 | 0 |
|
1085 | 0 | if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) ) |
1086 | 0 | vcpu_guestmode = 1; |
1087 | 0 |
|
1088 | 0 | if ( !vcpu_guestmode && |
1089 | 0 | unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) ) |
1090 | 0 | { |
1091 | 0 | uint32_t intercepts = vmcb_get_exception_intercepts(vmcb); |
1092 | 0 |
|
1093 | 0 | v->arch.hvm_vcpu.debug_state_latch = debug_state; |
1094 | 0 | vmcb_set_exception_intercepts( |
1095 | 0 | vmcb, debug_state ? (intercepts | (1U << TRAP_int3)) |
1096 | 0 | : (intercepts & ~(1U << TRAP_int3))); |
1097 | 0 | } |
1098 | 0 |
|
1099 | 0 | if ( v->arch.hvm_svm.launch_core != smp_processor_id() ) |
1100 | 0 | { |
1101 | 0 | v->arch.hvm_svm.launch_core = smp_processor_id(); |
1102 | 0 | hvm_migrate_timers(v); |
1103 | 0 | hvm_migrate_pirqs(v); |
1104 | 0 | /* Migrating to another ASID domain. Request a new ASID. */ |
1105 | 0 | hvm_asid_flush_vcpu(v); |
1106 | 0 | } |
1107 | 0 |
|
1108 | 0 | if ( !vcpu_guestmode && !vlapic_hw_disabled(vlapic) ) |
1109 | 0 | { |
1110 | 0 | vintr_t intr; |
1111 | 0 |
|
1112 | 0 | /* Reflect the vlapic's TPR in the hardware vtpr */ |
1113 | 0 | intr = vmcb_get_vintr(vmcb); |
1114 | 0 | intr.fields.tpr = |
1115 | 0 | (vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xFF) >> 4; |
1116 | 0 | vmcb_set_vintr(vmcb, intr); |
1117 | 0 | } |
1118 | 0 |
|
1119 | 0 | hvm_do_resume(v); |
1120 | 0 |
|
1121 | 0 | reset_stack_and_jump(svm_asm_do_resume); |
1122 | 0 | } |
1123 | | |
1124 | | static void svm_guest_osvw_init(struct vcpu *vcpu) |
1125 | 0 | { |
1126 | 0 | if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) |
1127 | 0 | return; |
1128 | 0 |
|
1129 | 0 | /* |
1130 | 0 | * Guests should see errata 400 and 415 as fixed (assuming that |
1131 | 0 | * HLT and IO instructions are intercepted). |
1132 | 0 | */ |
1133 | 0 | vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3; |
1134 | 0 | vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL); |
1135 | 0 |
|
1136 | 0 | /* |
1137 | 0 | * By increasing VCPU's osvw.length to 3 we are telling the guest that |
1138 | 0 | * all osvw.status bits inside that length, including bit 0 (which is |
1139 | 0 | * reserved for erratum 298), are valid. However, if host processor's |
1140 | 0 | * osvw_len is 0 then osvw_status[0] carries no information. We need to |
1141 | 0 | * be conservative here and therefore we tell the guest that erratum 298 |
1142 | 0 | * is present (because we really don't know). |
1143 | 0 | */ |
1144 | 0 | if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 ) |
1145 | 0 | vcpu->arch.hvm_svm.osvw.status |= 1; |
1146 | 0 | } |
1147 | | |
1148 | | void svm_host_osvw_reset() |
1149 | 0 | { |
1150 | 0 | spin_lock(&osvw_lock); |
1151 | 0 |
|
1152 | 0 | osvw_length = 64; /* One register (MSRC001_0141) worth of errata */ |
1153 | 0 | osvw_status = 0; |
1154 | 0 |
|
1155 | 0 | spin_unlock(&osvw_lock); |
1156 | 0 | } |
1157 | | |
1158 | | void svm_host_osvw_init() |
1159 | 0 | { |
1160 | 0 | spin_lock(&osvw_lock); |
1161 | 0 |
|
1162 | 0 | /* |
1163 | 0 | * Get OSVW bits. If bits are not the same on different processors then |
1164 | 0 | * choose the worst case (i.e. if erratum is present on one processor and |
1165 | 0 | * not on another assume that the erratum is present everywhere). |
1166 | 0 | */ |
1167 | 0 | if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) ) |
1168 | 0 | { |
1169 | 0 | uint64_t len, status; |
1170 | 0 |
|
1171 | 0 | if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) || |
1172 | 0 | rdmsr_safe(MSR_AMD_OSVW_STATUS, status) ) |
1173 | 0 | len = status = 0; |
1174 | 0 |
|
1175 | 0 | if (len < osvw_length) |
1176 | 0 | osvw_length = len; |
1177 | 0 |
|
1178 | 0 | osvw_status |= status; |
1179 | 0 | osvw_status &= (1ULL << osvw_length) - 1; |
1180 | 0 | } |
1181 | 0 | else |
1182 | 0 | osvw_length = osvw_status = 0; |
1183 | 0 |
|
1184 | 0 | spin_unlock(&osvw_lock); |
1185 | 0 | } |
1186 | | |
1187 | | static int svm_domain_initialise(struct domain *d) |
1188 | 0 | { |
1189 | 0 | static const struct arch_csw csw = { |
1190 | 0 | .from = svm_ctxt_switch_from, |
1191 | 0 | .to = svm_ctxt_switch_to, |
1192 | 0 | .tail = svm_do_resume, |
1193 | 0 | }; |
1194 | 0 |
|
1195 | 0 | d->arch.ctxt_switch = &csw; |
1196 | 0 |
|
1197 | 0 | return 0; |
1198 | 0 | } |
1199 | | |
1200 | | static void svm_domain_destroy(struct domain *d) |
1201 | 0 | { |
1202 | 0 | } |
1203 | | |
1204 | | static int svm_vcpu_initialise(struct vcpu *v) |
1205 | 0 | { |
1206 | 0 | int rc; |
1207 | 0 |
|
1208 | 0 | v->arch.hvm_svm.launch_core = -1; |
1209 | 0 |
|
1210 | 0 | if ( (rc = svm_create_vmcb(v)) != 0 ) |
1211 | 0 | { |
1212 | 0 | dprintk(XENLOG_WARNING, |
1213 | 0 | "Failed to create VMCB for vcpu %d: err=%d.\n", |
1214 | 0 | v->vcpu_id, rc); |
1215 | 0 | return rc; |
1216 | 0 | } |
1217 | 0 |
|
1218 | 0 | svm_guest_osvw_init(v); |
1219 | 0 |
|
1220 | 0 | return 0; |
1221 | 0 | } |
1222 | | |
1223 | | static void svm_vcpu_destroy(struct vcpu *v) |
1224 | 0 | { |
1225 | 0 | svm_destroy_vmcb(v); |
1226 | 0 | passive_domain_destroy(v); |
1227 | 0 | } |
1228 | | |
1229 | | /* |
1230 | | * Emulate enough of interrupt injection to cover the DPL check (omitted by |
1231 | | * hardware), and to work out whether it is safe to move %rip fowards for |
1232 | | * architectural trap vs fault semantics in the exception frame (which |
1233 | | * hardware won't cope with). |
1234 | | * |
1235 | | * The event parameter will be modified to a fault if necessary. |
1236 | | */ |
1237 | | static void svm_emul_swint_injection(struct x86_event *event) |
1238 | 0 | { |
1239 | 0 | struct vcpu *curr = current; |
1240 | 0 | const struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; |
1241 | 0 | const struct cpu_user_regs *regs = guest_cpu_user_regs(); |
1242 | 0 | unsigned int trap = event->vector, type = event->type; |
1243 | 0 | unsigned int fault = TRAP_gp_fault, ec = 0; |
1244 | 0 | pagefault_info_t pfinfo; |
1245 | 0 | struct segment_register cs, idtr; |
1246 | 0 | unsigned int idte_size, idte_offset; |
1247 | 0 | unsigned long idte_linear_addr; |
1248 | 0 | struct { uint32_t a, b, c, d; } idte = {}; |
1249 | 0 | bool lm = vmcb_get_efer(vmcb) & EFER_LMA; |
1250 | 0 | int rc; |
1251 | 0 |
|
1252 | 0 | if ( !(vmcb_get_cr0(vmcb) & X86_CR0_PE) ) |
1253 | 0 | goto raise_exception; /* TODO: support real-mode injection? */ |
1254 | 0 |
|
1255 | 0 | idte_size = lm ? 16 : 8; |
1256 | 0 | idte_offset = trap * idte_size; |
1257 | 0 |
|
1258 | 0 | /* ICEBP sets the External Event bit despite being an instruction. */ |
1259 | 0 | ec = (trap << 3) | X86_XEC_IDT | |
1260 | 0 | (type == X86_EVENTTYPE_PRI_SW_EXCEPTION ? X86_XEC_EXT : 0); |
1261 | 0 |
|
1262 | 0 | /* |
1263 | 0 | * TODO: This does not cover the v8086 mode with CR4.VME case |
1264 | 0 | * correctly, but falls on the safe side from the point of view of a |
1265 | 0 | * 32bit OS. Someone with many TUITs can see about reading the TSS |
1266 | 0 | * Software Interrupt Redirection bitmap. |
1267 | 0 | */ |
1268 | 0 | if ( (regs->eflags & X86_EFLAGS_VM) && |
1269 | 0 | MASK_EXTR(regs->eflags, X86_EFLAGS_IOPL) != 3 ) |
1270 | 0 | goto raise_exception; |
1271 | 0 |
|
1272 | 0 | /* |
1273 | 0 | * Read all 8/16 bytes so the idtr limit check is applied properly to |
1274 | 0 | * this entry, even though we don't look at all the words read. |
1275 | 0 | */ |
1276 | 0 | hvm_get_segment_register(curr, x86_seg_cs, &cs); |
1277 | 0 | hvm_get_segment_register(curr, x86_seg_idtr, &idtr); |
1278 | 0 | if ( !hvm_virtual_to_linear_addr(x86_seg_idtr, &idtr, idte_offset, |
1279 | 0 | idte_size, hvm_access_read, |
1280 | 0 | &cs, &idte_linear_addr) ) |
1281 | 0 | goto raise_exception; |
1282 | 0 |
|
1283 | 0 | rc = hvm_copy_from_guest_linear(&idte, idte_linear_addr, idte_size, |
1284 | 0 | PFEC_implicit, &pfinfo); |
1285 | 0 | if ( rc ) |
1286 | 0 | { |
1287 | 0 | if ( rc == HVMTRANS_bad_linear_to_gfn ) |
1288 | 0 | { |
1289 | 0 | fault = TRAP_page_fault; |
1290 | 0 | ec = pfinfo.ec; |
1291 | 0 | event->cr2 = pfinfo.linear; |
1292 | 0 | } |
1293 | 0 |
|
1294 | 0 | goto raise_exception; |
1295 | 0 | } |
1296 | 0 |
|
1297 | 0 | /* This must be an interrupt, trap, or task gate. */ |
1298 | 0 | switch ( (idte.b >> 8) & 0x1f ) |
1299 | 0 | { |
1300 | 0 | case SYS_DESC_irq_gate: |
1301 | 0 | case SYS_DESC_trap_gate: |
1302 | 0 | break; |
1303 | 0 | case SYS_DESC_irq_gate16: |
1304 | 0 | case SYS_DESC_trap_gate16: |
1305 | 0 | case SYS_DESC_task_gate: |
1306 | 0 | if ( !lm ) |
1307 | 0 | break; |
1308 | 0 | /* fall through */ |
1309 | 0 | default: |
1310 | 0 | goto raise_exception; |
1311 | 0 | } |
1312 | 0 |
|
1313 | 0 | /* The 64-bit high half's type must be zero. */ |
1314 | 0 | if ( idte.d & 0x1f00 ) |
1315 | 0 | goto raise_exception; |
1316 | 0 |
|
1317 | 0 | /* ICEBP counts as a hardware event, and bypasses the dpl check. */ |
1318 | 0 | if ( type != X86_EVENTTYPE_PRI_SW_EXCEPTION && |
1319 | 0 | vmcb_get_cpl(vmcb) > ((idte.b >> 13) & 3) ) |
1320 | 0 | goto raise_exception; |
1321 | 0 |
|
1322 | 0 | /* Is this entry present? */ |
1323 | 0 | if ( !(idte.b & (1u << 15)) ) |
1324 | 0 | { |
1325 | 0 | fault = TRAP_no_segment; |
1326 | 0 | goto raise_exception; |
1327 | 0 | } |
1328 | 0 |
|
1329 | 0 | /* |
1330 | 0 | * Any further fault during injection will cause a double fault. It |
1331 | 0 | * is fine to leave this up to hardware, and software won't be in a |
1332 | 0 | * position to care about the architectural correctness of %rip in the |
1333 | 0 | * exception frame. |
1334 | 0 | */ |
1335 | 0 | return; |
1336 | 0 |
|
1337 | 0 | raise_exception: |
1338 | 0 | event->vector = fault; |
1339 | 0 | event->type = X86_EVENTTYPE_HW_EXCEPTION; |
1340 | 0 | event->insn_len = 0; |
1341 | 0 | event->error_code = ec; |
1342 | 0 | } |
1343 | | |
1344 | | static void svm_inject_event(const struct x86_event *event) |
1345 | 0 | { |
1346 | 0 | struct vcpu *curr = current; |
1347 | 0 | struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; |
1348 | 0 | eventinj_t eventinj = vmcb->eventinj; |
1349 | 0 | struct x86_event _event = *event; |
1350 | 0 | struct cpu_user_regs *regs = guest_cpu_user_regs(); |
1351 | 0 |
|
1352 | 0 | /* |
1353 | 0 | * For hardware lacking NRips support, and always for ICEBP instructions, |
1354 | 0 | * the processor requires extra help to deliver software events. |
1355 | 0 | * |
1356 | 0 | * Xen must emulate enough of the event injection to be sure that a |
1357 | 0 | * further fault shouldn't occur during delivery. This covers the fact |
1358 | 0 | * that hardware doesn't perform DPL checking on injection. |
1359 | 0 | * |
1360 | 0 | * Also, it accounts for proper positioning of %rip for an event with trap |
1361 | 0 | * semantics (where %rip should point after the instruction) which suffers |
1362 | 0 | * a fault during injection (at which point %rip should point at the |
1363 | 0 | * instruction). |
1364 | 0 | */ |
1365 | 0 | if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION || |
1366 | 0 | (!cpu_has_svm_nrips && (event->type == X86_EVENTTYPE_SW_INTERRUPT || |
1367 | 0 | event->type == X86_EVENTTYPE_SW_EXCEPTION)) ) |
1368 | 0 | svm_emul_swint_injection(&_event); |
1369 | 0 |
|
1370 | 0 | switch ( _event.vector ) |
1371 | 0 | { |
1372 | 0 | case TRAP_debug: |
1373 | 0 | if ( regs->eflags & X86_EFLAGS_TF ) |
1374 | 0 | { |
1375 | 0 | __restore_debug_registers(vmcb, curr); |
1376 | 0 | vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | 0x4000); |
1377 | 0 | } |
1378 | 0 | /* fall through */ |
1379 | 0 | case TRAP_int3: |
1380 | 0 | if ( curr->domain->debugger_attached ) |
1381 | 0 | { |
1382 | 0 | /* Debug/Int3: Trap to debugger. */ |
1383 | 0 | domain_pause_for_debugger(); |
1384 | 0 | return; |
1385 | 0 | } |
1386 | 0 | } |
1387 | 0 |
|
1388 | 0 | if ( unlikely(eventinj.fields.v) && |
1389 | 0 | (eventinj.fields.type == X86_EVENTTYPE_HW_EXCEPTION) ) |
1390 | 0 | { |
1391 | 0 | _event.vector = hvm_combine_hw_exceptions( |
1392 | 0 | eventinj.fields.vector, _event.vector); |
1393 | 0 | if ( _event.vector == TRAP_double_fault ) |
1394 | 0 | _event.error_code = 0; |
1395 | 0 | } |
1396 | 0 |
|
1397 | 0 | eventinj.bytes = 0; |
1398 | 0 | eventinj.fields.v = 1; |
1399 | 0 | eventinj.fields.vector = _event.vector; |
1400 | 0 |
|
1401 | 0 | /* |
1402 | 0 | * Refer to AMD Vol 2: System Programming, 15.20 Event Injection. |
1403 | 0 | * |
1404 | 0 | * On hardware lacking NextRIP support, and all hardware in the case of |
1405 | 0 | * icebp, software events with trap semantics need emulating, so %rip in |
1406 | 0 | * the trap frame points after the instruction. |
1407 | 0 | * |
1408 | 0 | * The x86 emulator (if requested by the x86_swint_emulate_* choice) will |
1409 | 0 | * have performed checks such as presence/dpl/etc and believes that the |
1410 | 0 | * event injection will succeed without faulting. |
1411 | 0 | * |
1412 | 0 | * The x86 emulator will always provide fault semantics for software |
1413 | 0 | * events, with _trap.insn_len set appropriately. If the injection |
1414 | 0 | * requires emulation, move %rip forwards at this point. |
1415 | 0 | */ |
1416 | 0 | switch ( _event.type ) |
1417 | 0 | { |
1418 | 0 | case X86_EVENTTYPE_SW_INTERRUPT: /* int $n */ |
1419 | 0 | if ( cpu_has_svm_nrips ) |
1420 | 0 | vmcb->nextrip = regs->rip + _event.insn_len; |
1421 | 0 | else |
1422 | 0 | regs->rip += _event.insn_len; |
1423 | 0 | eventinj.fields.type = X86_EVENTTYPE_SW_INTERRUPT; |
1424 | 0 | break; |
1425 | 0 |
|
1426 | 0 | case X86_EVENTTYPE_PRI_SW_EXCEPTION: /* icebp */ |
1427 | 0 | /* |
1428 | 0 | * icebp's injection must always be emulated, as hardware does not |
1429 | 0 | * special case HW_EXCEPTION with vector 1 (#DB) as having trap |
1430 | 0 | * semantics. |
1431 | 0 | */ |
1432 | 0 | regs->rip += _event.insn_len; |
1433 | 0 | if ( cpu_has_svm_nrips ) |
1434 | 0 | vmcb->nextrip = regs->rip; |
1435 | 0 | eventinj.fields.type = X86_EVENTTYPE_HW_EXCEPTION; |
1436 | 0 | break; |
1437 | 0 |
|
1438 | 0 | case X86_EVENTTYPE_SW_EXCEPTION: /* int3, into */ |
1439 | 0 | /* |
1440 | 0 | * Hardware special cases HW_EXCEPTION with vectors 3 and 4 as having |
1441 | 0 | * trap semantics, and will perform DPL checks. |
1442 | 0 | */ |
1443 | 0 | if ( cpu_has_svm_nrips ) |
1444 | 0 | vmcb->nextrip = regs->rip + _event.insn_len; |
1445 | 0 | else |
1446 | 0 | regs->rip += _event.insn_len; |
1447 | 0 | eventinj.fields.type = X86_EVENTTYPE_HW_EXCEPTION; |
1448 | 0 | break; |
1449 | 0 |
|
1450 | 0 | default: |
1451 | 0 | eventinj.fields.type = X86_EVENTTYPE_HW_EXCEPTION; |
1452 | 0 | eventinj.fields.ev = (_event.error_code != X86_EVENT_NO_EC); |
1453 | 0 | eventinj.fields.errorcode = _event.error_code; |
1454 | 0 | break; |
1455 | 0 | } |
1456 | 0 |
|
1457 | 0 | /* |
1458 | 0 | * If injecting an event outside of 64bit mode, zero the upper bits of the |
1459 | 0 | * %eip and nextrip after the adjustments above. |
1460 | 0 | */ |
1461 | 0 | if ( !((vmcb_get_efer(vmcb) & EFER_LMA) && vmcb->cs.l) ) |
1462 | 0 | { |
1463 | 0 | regs->rip = regs->eip; |
1464 | 0 | vmcb->nextrip = (uint32_t)vmcb->nextrip; |
1465 | 0 | } |
1466 | 0 |
|
1467 | 0 | ASSERT(!eventinj.fields.ev || |
1468 | 0 | eventinj.fields.errorcode == (uint16_t)eventinj.fields.errorcode); |
1469 | 0 | vmcb->eventinj = eventinj; |
1470 | 0 |
|
1471 | 0 | if ( _event.vector == TRAP_page_fault ) |
1472 | 0 | { |
1473 | 0 | curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2; |
1474 | 0 | vmcb_set_cr2(vmcb, _event.cr2); |
1475 | 0 | HVMTRACE_LONG_2D(PF_INJECT, _event.error_code, TRC_PAR_LONG(_event.cr2)); |
1476 | 0 | } |
1477 | 0 | else |
1478 | 0 | { |
1479 | 0 | HVMTRACE_2D(INJ_EXC, _event.vector, _event.error_code); |
1480 | 0 | } |
1481 | 0 | } |
1482 | | |
1483 | | static int svm_event_pending(struct vcpu *v) |
1484 | 0 | { |
1485 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1486 | 0 | return vmcb->eventinj.fields.v; |
1487 | 0 | } |
1488 | | |
1489 | | static void svm_cpu_dead(unsigned int cpu) |
1490 | 0 | { |
1491 | 0 | paddr_t *this_hsa = &per_cpu(hsa, cpu); |
1492 | 0 | paddr_t *this_vmcb = &per_cpu(host_vmcb, cpu); |
1493 | 0 |
|
1494 | 0 | if ( *this_hsa ) |
1495 | 0 | { |
1496 | 0 | free_domheap_page(maddr_to_page(*this_hsa)); |
1497 | 0 | *this_hsa = 0; |
1498 | 0 | } |
1499 | 0 |
|
1500 | 0 | if ( *this_vmcb ) |
1501 | 0 | { |
1502 | 0 | free_domheap_page(maddr_to_page(*this_vmcb)); |
1503 | 0 | *this_vmcb = 0; |
1504 | 0 | } |
1505 | 0 | } |
1506 | | |
1507 | | static int svm_cpu_up_prepare(unsigned int cpu) |
1508 | 0 | { |
1509 | 0 | paddr_t *this_hsa = &per_cpu(hsa, cpu); |
1510 | 0 | paddr_t *this_vmcb = &per_cpu(host_vmcb, cpu); |
1511 | 0 | nodeid_t node = cpu_to_node(cpu); |
1512 | 0 | unsigned int memflags = 0; |
1513 | 0 | struct page_info *pg; |
1514 | 0 |
|
1515 | 0 | if ( node != NUMA_NO_NODE ) |
1516 | 0 | memflags = MEMF_node(node); |
1517 | 0 |
|
1518 | 0 | if ( !*this_hsa ) |
1519 | 0 | { |
1520 | 0 | pg = alloc_domheap_page(NULL, memflags); |
1521 | 0 | if ( !pg ) |
1522 | 0 | goto err; |
1523 | 0 |
|
1524 | 0 | clear_domain_page(_mfn(page_to_mfn(pg))); |
1525 | 0 | *this_hsa = page_to_maddr(pg); |
1526 | 0 | } |
1527 | 0 |
|
1528 | 0 | if ( !*this_vmcb ) |
1529 | 0 | { |
1530 | 0 | pg = alloc_domheap_page(NULL, memflags); |
1531 | 0 | if ( !pg ) |
1532 | 0 | goto err; |
1533 | 0 |
|
1534 | 0 | clear_domain_page(_mfn(page_to_mfn(pg))); |
1535 | 0 | *this_vmcb = page_to_maddr(pg); |
1536 | 0 | } |
1537 | 0 |
|
1538 | 0 | return 0; |
1539 | 0 |
|
1540 | 0 | err: |
1541 | 0 | svm_cpu_dead(cpu); |
1542 | 0 | return -ENOMEM; |
1543 | 0 | } |
1544 | | |
1545 | | static void svm_init_erratum_383(const struct cpuinfo_x86 *c) |
1546 | 0 | { |
1547 | 0 | uint64_t msr_content; |
1548 | 0 |
|
1549 | 0 | /* check whether CPU is affected */ |
1550 | 0 | if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_383) ) |
1551 | 0 | return; |
1552 | 0 |
|
1553 | 0 | /* use safe methods to be compatible with nested virtualization */ |
1554 | 0 | if (rdmsr_safe(MSR_AMD64_DC_CFG, msr_content) == 0 && |
1555 | 0 | wrmsr_safe(MSR_AMD64_DC_CFG, msr_content | (1ULL << 47)) == 0) |
1556 | 0 | { |
1557 | 0 | amd_erratum383_found = 1; |
1558 | 0 | } else { |
1559 | 0 | printk("Failed to enable erratum 383\n"); |
1560 | 0 | } |
1561 | 0 | } |
1562 | | |
1563 | | static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t read) |
1564 | 0 | { |
1565 | 0 | if ( !v->domain->arch.cpuid->extd.osvw ) |
1566 | 0 | return -1; |
1567 | 0 |
|
1568 | 0 | if ( read ) |
1569 | 0 | { |
1570 | 0 | if (msr == MSR_AMD_OSVW_ID_LENGTH) |
1571 | 0 | *val = v->arch.hvm_svm.osvw.length; |
1572 | 0 | else |
1573 | 0 | *val = v->arch.hvm_svm.osvw.status; |
1574 | 0 | } |
1575 | 0 | /* Writes are ignored */ |
1576 | 0 |
|
1577 | 0 | return 0; |
1578 | 0 | } |
1579 | | |
1580 | | static int _svm_cpu_up(bool bsp) |
1581 | 0 | { |
1582 | 0 | uint64_t msr_content; |
1583 | 0 | int rc; |
1584 | 0 | unsigned int cpu = smp_processor_id(); |
1585 | 0 | const struct cpuinfo_x86 *c = &cpu_data[cpu]; |
1586 | 0 | |
1587 | 0 | /* Check whether SVM feature is disabled in BIOS */ |
1588 | 0 | rdmsrl(MSR_K8_VM_CR, msr_content); |
1589 | 0 | if ( msr_content & K8_VMCR_SVME_DISABLE ) |
1590 | 0 | { |
1591 | 0 | printk("CPU%d: AMD SVM Extension is disabled in BIOS.\n", cpu); |
1592 | 0 | return -EINVAL; |
1593 | 0 | } |
1594 | 0 |
|
1595 | 0 | if ( bsp && (rc = svm_cpu_up_prepare(cpu)) != 0 ) |
1596 | 0 | return rc; |
1597 | 0 |
|
1598 | 0 | write_efer(read_efer() | EFER_SVME); |
1599 | 0 |
|
1600 | 0 | /* Initialize the HSA for this core. */ |
1601 | 0 | wrmsrl(MSR_K8_VM_HSAVE_PA, per_cpu(hsa, cpu)); |
1602 | 0 |
|
1603 | 0 | /* check for erratum 383 */ |
1604 | 0 | svm_init_erratum_383(c); |
1605 | 0 |
|
1606 | 0 | /* Initialize core's ASID handling. */ |
1607 | 0 | svm_asid_init(c); |
1608 | 0 |
|
1609 | 0 | /* |
1610 | 0 | * Check whether EFER.LMSLE can be written. |
1611 | 0 | * Unfortunately there's no feature bit defined for this. |
1612 | 0 | */ |
1613 | 0 | msr_content = read_efer(); |
1614 | 0 | if ( wrmsr_safe(MSR_EFER, msr_content | EFER_LMSLE) == 0 ) |
1615 | 0 | rdmsrl(MSR_EFER, msr_content); |
1616 | 0 | if ( msr_content & EFER_LMSLE ) |
1617 | 0 | { |
1618 | 0 | if ( 0 && /* FIXME: Migration! */ bsp ) |
1619 | 0 | cpu_has_lmsl = 1; |
1620 | 0 | wrmsrl(MSR_EFER, msr_content ^ EFER_LMSLE); |
1621 | 0 | } |
1622 | 0 | else |
1623 | 0 | { |
1624 | 0 | if ( cpu_has_lmsl ) |
1625 | 0 | printk(XENLOG_WARNING "Inconsistent LMSLE support across CPUs!\n"); |
1626 | 0 | cpu_has_lmsl = 0; |
1627 | 0 | } |
1628 | 0 |
|
1629 | 0 | /* Initialize OSVW bits to be used by guests */ |
1630 | 0 | svm_host_osvw_init(); |
1631 | 0 |
|
1632 | 0 | return 0; |
1633 | 0 | } |
1634 | | |
1635 | | static int svm_cpu_up(void) |
1636 | 0 | { |
1637 | 0 | return _svm_cpu_up(false); |
1638 | 0 | } |
1639 | | |
1640 | | const struct hvm_function_table * __init start_svm(void) |
1641 | 0 | { |
1642 | 0 | bool_t printed = 0; |
1643 | 0 |
|
1644 | 0 | svm_host_osvw_reset(); |
1645 | 0 |
|
1646 | 0 | if ( _svm_cpu_up(true) ) |
1647 | 0 | { |
1648 | 0 | printk("SVM: failed to initialise.\n"); |
1649 | 0 | return NULL; |
1650 | 0 | } |
1651 | 0 |
|
1652 | 0 | setup_vmcb_dump(); |
1653 | 0 |
|
1654 | 0 | svm_feature_flags = (current_cpu_data.extended_cpuid_level >= 0x8000000A ? |
1655 | 0 | cpuid_edx(0x8000000A) : 0); |
1656 | 0 |
|
1657 | 0 | printk("SVM: Supported advanced features:\n"); |
1658 | 0 |
|
1659 | 0 | /* DecodeAssists fast paths assume nextrip is valid for fast rIP update. */ |
1660 | 0 | if ( !cpu_has_svm_nrips ) |
1661 | 0 | clear_bit(SVM_FEATURE_DECODEASSISTS, &svm_feature_flags); |
1662 | 0 |
|
1663 | 0 | if ( cpu_has_tsc_ratio ) |
1664 | 0 | svm_function_table.tsc_scaling.ratio_frac_bits = 32; |
1665 | 0 |
|
1666 | 0 | #define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; } |
1667 | 0 | P(cpu_has_svm_npt, "Nested Page Tables (NPT)"); |
1668 | 0 | P(cpu_has_svm_lbrv, "Last Branch Record (LBR) Virtualisation"); |
1669 | 0 | P(cpu_has_svm_nrips, "Next-RIP Saved on #VMEXIT"); |
1670 | 0 | P(cpu_has_svm_cleanbits, "VMCB Clean Bits"); |
1671 | 0 | P(cpu_has_svm_decode, "DecodeAssists"); |
1672 | 0 | P(cpu_has_pause_filter, "Pause-Intercept Filter"); |
1673 | 0 | P(cpu_has_tsc_ratio, "TSC Rate MSR"); |
1674 | 0 | #undef P |
1675 | 0 |
|
1676 | 0 | if ( !printed ) |
1677 | 0 | printk(" - none\n"); |
1678 | 0 |
|
1679 | 0 | svm_function_table.hap_supported = !!cpu_has_svm_npt; |
1680 | 0 | svm_function_table.hap_capabilities = HVM_HAP_SUPERPAGE_2MB | |
1681 | 0 | (cpu_has_page1gb ? HVM_HAP_SUPERPAGE_1GB : 0); |
1682 | 0 |
|
1683 | 0 | return &svm_function_table; |
1684 | 0 | } |
1685 | | |
1686 | | static void svm_do_nested_pgfault(struct vcpu *v, |
1687 | | struct cpu_user_regs *regs, uint64_t pfec, paddr_t gpa) |
1688 | 0 | { |
1689 | 0 | int ret; |
1690 | 0 | unsigned long gfn = gpa >> PAGE_SHIFT; |
1691 | 0 | mfn_t mfn; |
1692 | 0 | p2m_type_t p2mt; |
1693 | 0 | p2m_access_t p2ma; |
1694 | 0 | struct p2m_domain *p2m = NULL; |
1695 | 0 |
|
1696 | 0 | /* |
1697 | 0 | * Since HW doesn't explicitly provide a read access bit and we need to |
1698 | 0 | * somehow describe read-modify-write instructions we will conservatively |
1699 | 0 | * set read_access for all memory accesses that are not instruction fetches. |
1700 | 0 | */ |
1701 | 0 | struct npfec npfec = { |
1702 | 0 | .read_access = !(pfec & PFEC_insn_fetch), |
1703 | 0 | .write_access = !!(pfec & PFEC_write_access), |
1704 | 0 | .insn_fetch = !!(pfec & PFEC_insn_fetch), |
1705 | 0 | .present = !!(pfec & PFEC_page_present), |
1706 | 0 | }; |
1707 | 0 |
|
1708 | 0 | /* These bits are mutually exclusive */ |
1709 | 0 | if ( pfec & NPT_PFEC_with_gla ) |
1710 | 0 | npfec.kind = npfec_kind_with_gla; |
1711 | 0 | else if ( pfec & NPT_PFEC_in_gpt ) |
1712 | 0 | npfec.kind = npfec_kind_in_gpt; |
1713 | 0 |
|
1714 | 0 | ret = hvm_hap_nested_page_fault(gpa, ~0ul, npfec); |
1715 | 0 |
|
1716 | 0 | if ( tb_init_done ) |
1717 | 0 | { |
1718 | 0 | struct { |
1719 | 0 | uint64_t gpa; |
1720 | 0 | uint64_t mfn; |
1721 | 0 | uint32_t qualification; |
1722 | 0 | uint32_t p2mt; |
1723 | 0 | } _d; |
1724 | 0 |
|
1725 | 0 | p2m = p2m_get_p2m(v); |
1726 | 0 | _d.gpa = gpa; |
1727 | 0 | _d.qualification = 0; |
1728 | 0 | mfn = __get_gfn_type_access(p2m, gfn, &_d.p2mt, &p2ma, 0, NULL, 0); |
1729 | 0 | _d.mfn = mfn_x(mfn); |
1730 | 0 | |
1731 | 0 | __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d); |
1732 | 0 | } |
1733 | 0 |
|
1734 | 0 | switch (ret) { |
1735 | 0 | case 0: |
1736 | 0 | break; |
1737 | 0 | case 1: |
1738 | 0 | return; |
1739 | 0 | case -1: |
1740 | 0 | ASSERT(nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v)); |
1741 | 0 | /* inject #VMEXIT(NPF) into guest. */ |
1742 | 0 | nestedsvm_vmexit_defer(v, VMEXIT_NPF, pfec, gpa); |
1743 | 0 | return; |
1744 | 0 | } |
1745 | 0 |
|
1746 | 0 | if ( p2m == NULL ) |
1747 | 0 | p2m = p2m_get_p2m(v); |
1748 | 0 | /* Everything else is an error. */ |
1749 | 0 | mfn = __get_gfn_type_access(p2m, gfn, &p2mt, &p2ma, 0, NULL, 0); |
1750 | 0 | gdprintk(XENLOG_ERR, |
1751 | 0 | "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n", |
1752 | 0 | gpa, mfn_x(mfn), p2mt); |
1753 | 0 | domain_crash(v->domain); |
1754 | 0 | } |
1755 | | |
1756 | | static void svm_fpu_dirty_intercept(void) |
1757 | 0 | { |
1758 | 0 | struct vcpu *v = current; |
1759 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1760 | 0 | struct vmcb_struct *n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx; |
1761 | 0 |
|
1762 | 0 | svm_fpu_enter(v); |
1763 | 0 |
|
1764 | 0 | if ( vmcb != n1vmcb ) |
1765 | 0 | { |
1766 | 0 | /* Check if l1 guest must make FPU ready for the l2 guest */ |
1767 | 0 | if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS ) |
1768 | 0 | hvm_inject_hw_exception(TRAP_no_device, X86_EVENT_NO_EC); |
1769 | 0 | else |
1770 | 0 | vmcb_set_cr0(n1vmcb, vmcb_get_cr0(n1vmcb) & ~X86_CR0_TS); |
1771 | 0 | return; |
1772 | 0 | } |
1773 | 0 |
|
1774 | 0 | if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) |
1775 | 0 | vmcb_set_cr0(vmcb, vmcb_get_cr0(vmcb) & ~X86_CR0_TS); |
1776 | 0 | } |
1777 | | |
1778 | | static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs) |
1779 | 0 | { |
1780 | 0 | struct vcpu *curr = current; |
1781 | 0 | unsigned int inst_len; |
1782 | 0 | struct cpuid_leaf res; |
1783 | 0 |
|
1784 | 0 | if ( (inst_len = __get_instruction_length(curr, INSTR_CPUID)) == 0 ) |
1785 | 0 | return; |
1786 | 0 |
|
1787 | 0 | guest_cpuid(curr, regs->eax, regs->ecx, &res); |
1788 | 0 | HVMTRACE_5D(CPUID, regs->eax, res.a, res.b, res.c, res.d); |
1789 | 0 |
|
1790 | 0 | regs->rax = res.a; |
1791 | 0 | regs->rbx = res.b; |
1792 | 0 | regs->rcx = res.c; |
1793 | 0 | regs->rdx = res.d; |
1794 | 0 |
|
1795 | 0 | __update_guest_eip(regs, inst_len); |
1796 | 0 | } |
1797 | | |
1798 | | static void svm_vmexit_do_cr_access( |
1799 | | struct vmcb_struct *vmcb, struct cpu_user_regs *regs) |
1800 | 0 | { |
1801 | 0 | int gp, cr, dir, rc; |
1802 | 0 |
|
1803 | 0 | cr = vmcb->exitcode - VMEXIT_CR0_READ; |
1804 | 0 | dir = (cr > 15); |
1805 | 0 | cr &= 0xf; |
1806 | 0 | gp = vmcb->exitinfo1 & 0xf; |
1807 | 0 |
|
1808 | 0 | rc = dir ? hvm_mov_to_cr(cr, gp) : hvm_mov_from_cr(cr, gp); |
1809 | 0 |
|
1810 | 0 | if ( rc == X86EMUL_OKAY ) |
1811 | 0 | __update_guest_eip(regs, vmcb->nextrip - vmcb->rip); |
1812 | 0 | } |
1813 | | |
1814 | | static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs) |
1815 | 0 | { |
1816 | 0 | struct vmcb_struct *vmcb = vcpu_nestedhvm(v).nv_n1vmcx; |
1817 | 0 |
|
1818 | 0 | HVMTRACE_0D(DR_WRITE); |
1819 | 0 | __restore_debug_registers(vmcb, v); |
1820 | 0 | } |
1821 | | |
1822 | | static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) |
1823 | 0 | { |
1824 | 0 | int ret; |
1825 | 0 | struct vcpu *v = current; |
1826 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1827 | 0 |
|
1828 | 0 | switch ( msr ) |
1829 | 0 | { |
1830 | 0 | case MSR_IA32_SYSENTER_CS: |
1831 | 0 | *msr_content = v->arch.hvm_svm.guest_sysenter_cs; |
1832 | 0 | break; |
1833 | 0 | case MSR_IA32_SYSENTER_ESP: |
1834 | 0 | *msr_content = v->arch.hvm_svm.guest_sysenter_esp; |
1835 | 0 | break; |
1836 | 0 | case MSR_IA32_SYSENTER_EIP: |
1837 | 0 | *msr_content = v->arch.hvm_svm.guest_sysenter_eip; |
1838 | 0 | break; |
1839 | 0 |
|
1840 | 0 | case MSR_IA32_MCx_MISC(4): /* Threshold register */ |
1841 | 0 | case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: |
1842 | 0 | /* |
1843 | 0 | * MCA/MCE: We report that the threshold register is unavailable |
1844 | 0 | * for OS use (locked by the BIOS). |
1845 | 0 | */ |
1846 | 0 | *msr_content = 1ULL << 61; /* MC4_MISC.Locked */ |
1847 | 0 | break; |
1848 | 0 |
|
1849 | 0 | case MSR_IA32_EBC_FREQUENCY_ID: |
1850 | 0 | /* |
1851 | 0 | * This Intel-only register may be accessed if this HVM guest |
1852 | 0 | * has been migrated from an Intel host. The value zero is not |
1853 | 0 | * particularly meaningful, but at least avoids the guest crashing! |
1854 | 0 | */ |
1855 | 0 | *msr_content = 0; |
1856 | 0 | break; |
1857 | 0 |
|
1858 | 0 | case MSR_IA32_DEBUGCTLMSR: |
1859 | 0 | *msr_content = vmcb_get_debugctlmsr(vmcb); |
1860 | 0 | break; |
1861 | 0 |
|
1862 | 0 | case MSR_IA32_LASTBRANCHFROMIP: |
1863 | 0 | *msr_content = vmcb_get_lastbranchfromip(vmcb); |
1864 | 0 | break; |
1865 | 0 |
|
1866 | 0 | case MSR_IA32_LASTBRANCHTOIP: |
1867 | 0 | *msr_content = vmcb_get_lastbranchtoip(vmcb); |
1868 | 0 | break; |
1869 | 0 |
|
1870 | 0 | case MSR_IA32_LASTINTFROMIP: |
1871 | 0 | *msr_content = vmcb_get_lastintfromip(vmcb); |
1872 | 0 | break; |
1873 | 0 |
|
1874 | 0 | case MSR_IA32_LASTINTTOIP: |
1875 | 0 | *msr_content = vmcb_get_lastinttoip(vmcb); |
1876 | 0 | break; |
1877 | 0 |
|
1878 | 0 | case MSR_AMD64_LWP_CFG: |
1879 | 0 | *msr_content = v->arch.hvm_svm.guest_lwp_cfg; |
1880 | 0 | break; |
1881 | 0 |
|
1882 | 0 | case MSR_K7_PERFCTR0: |
1883 | 0 | case MSR_K7_PERFCTR1: |
1884 | 0 | case MSR_K7_PERFCTR2: |
1885 | 0 | case MSR_K7_PERFCTR3: |
1886 | 0 | case MSR_K7_EVNTSEL0: |
1887 | 0 | case MSR_K7_EVNTSEL1: |
1888 | 0 | case MSR_K7_EVNTSEL2: |
1889 | 0 | case MSR_K7_EVNTSEL3: |
1890 | 0 | case MSR_AMD_FAM15H_PERFCTR0: |
1891 | 0 | case MSR_AMD_FAM15H_PERFCTR1: |
1892 | 0 | case MSR_AMD_FAM15H_PERFCTR2: |
1893 | 0 | case MSR_AMD_FAM15H_PERFCTR3: |
1894 | 0 | case MSR_AMD_FAM15H_PERFCTR4: |
1895 | 0 | case MSR_AMD_FAM15H_PERFCTR5: |
1896 | 0 | case MSR_AMD_FAM15H_EVNTSEL0: |
1897 | 0 | case MSR_AMD_FAM15H_EVNTSEL1: |
1898 | 0 | case MSR_AMD_FAM15H_EVNTSEL2: |
1899 | 0 | case MSR_AMD_FAM15H_EVNTSEL3: |
1900 | 0 | case MSR_AMD_FAM15H_EVNTSEL4: |
1901 | 0 | case MSR_AMD_FAM15H_EVNTSEL5: |
1902 | 0 | if ( vpmu_do_rdmsr(msr, msr_content) ) |
1903 | 0 | goto gpf; |
1904 | 0 | break; |
1905 | 0 |
|
1906 | 0 | case MSR_AMD64_DR0_ADDRESS_MASK: |
1907 | 0 | if ( !v->domain->arch.cpuid->extd.dbext ) |
1908 | 0 | goto gpf; |
1909 | 0 | *msr_content = v->arch.hvm_svm.dr_mask[0]; |
1910 | 0 | break; |
1911 | 0 |
|
1912 | 0 | case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK: |
1913 | 0 | if ( !v->domain->arch.cpuid->extd.dbext ) |
1914 | 0 | goto gpf; |
1915 | 0 | *msr_content = |
1916 | 0 | v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1]; |
1917 | 0 | break; |
1918 | 0 |
|
1919 | 0 | case MSR_AMD_OSVW_ID_LENGTH: |
1920 | 0 | case MSR_AMD_OSVW_STATUS: |
1921 | 0 | ret = svm_handle_osvw(v, msr, msr_content, 1); |
1922 | 0 | if ( ret < 0 ) |
1923 | 0 | goto gpf; |
1924 | 0 | break; |
1925 | 0 |
|
1926 | 0 | default: |
1927 | 0 | ret = nsvm_rdmsr(v, msr, msr_content); |
1928 | 0 | if ( ret < 0 ) |
1929 | 0 | goto gpf; |
1930 | 0 | else if ( ret ) |
1931 | 0 | break; |
1932 | 0 |
|
1933 | 0 | if ( rdmsr_viridian_regs(msr, msr_content) || |
1934 | 0 | rdmsr_hypervisor_regs(msr, msr_content) ) |
1935 | 0 | break; |
1936 | 0 |
|
1937 | 0 | if ( rdmsr_safe(msr, *msr_content) == 0 ) |
1938 | 0 | break; |
1939 | 0 |
|
1940 | 0 | if ( boot_cpu_data.x86 == 0xf && msr == MSR_F10_BU_CFG ) |
1941 | 0 | { |
1942 | 0 | /* Win2k8 x64 reads this MSR on revF chips, where it |
1943 | 0 | * wasn't publically available; it uses a magic constant |
1944 | 0 | * in %rdi as a password, which we don't have in |
1945 | 0 | * rdmsr_safe(). Since we'll ignore the later writes, |
1946 | 0 | * just use a plausible value here (the reset value from |
1947 | 0 | * rev10h chips) if the real CPU didn't provide one. */ |
1948 | 0 | *msr_content = 0x0000000010200020ull; |
1949 | 0 | break; |
1950 | 0 | } |
1951 | 0 |
|
1952 | 0 | goto gpf; |
1953 | 0 | } |
1954 | 0 |
|
1955 | 0 | HVM_DBG_LOG(DBG_LEVEL_MSR, "returns: ecx=%x, msr_value=%"PRIx64, |
1956 | 0 | msr, *msr_content); |
1957 | 0 | return X86EMUL_OKAY; |
1958 | 0 |
|
1959 | 0 | gpf: |
1960 | 0 | return X86EMUL_EXCEPTION; |
1961 | 0 | } |
1962 | | |
1963 | | static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content) |
1964 | 0 | { |
1965 | 0 | int ret, result = X86EMUL_OKAY; |
1966 | 0 | struct vcpu *v = current; |
1967 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1968 | 0 | int sync = 0; |
1969 | 0 |
|
1970 | 0 | switch ( msr ) |
1971 | 0 | { |
1972 | 0 | case MSR_IA32_SYSENTER_CS: |
1973 | 0 | case MSR_IA32_SYSENTER_ESP: |
1974 | 0 | case MSR_IA32_SYSENTER_EIP: |
1975 | 0 | sync = 1; |
1976 | 0 | break; |
1977 | 0 | default: |
1978 | 0 | break; |
1979 | 0 | } |
1980 | 0 |
|
1981 | 0 | if ( sync ) |
1982 | 0 | svm_sync_vmcb(v); |
1983 | 0 |
|
1984 | 0 | switch ( msr ) |
1985 | 0 | { |
1986 | 0 | case MSR_IA32_SYSENTER_CS: |
1987 | 0 | vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content; |
1988 | 0 | break; |
1989 | 0 | case MSR_IA32_SYSENTER_ESP: |
1990 | 0 | vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content; |
1991 | 0 | break; |
1992 | 0 | case MSR_IA32_SYSENTER_EIP: |
1993 | 0 | vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content; |
1994 | 0 | break; |
1995 | 0 |
|
1996 | 0 | case MSR_IA32_DEBUGCTLMSR: |
1997 | 0 | vmcb_set_debugctlmsr(vmcb, msr_content); |
1998 | 0 | if ( !msr_content || !cpu_has_svm_lbrv ) |
1999 | 0 | break; |
2000 | 0 | vmcb->lbr_control.fields.enable = 1; |
2001 | 0 | svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR); |
2002 | 0 | svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP); |
2003 | 0 | svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP); |
2004 | 0 | svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP); |
2005 | 0 | svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP); |
2006 | 0 | break; |
2007 | 0 |
|
2008 | 0 | case MSR_IA32_LASTBRANCHFROMIP: |
2009 | 0 | vmcb_set_lastbranchfromip(vmcb, msr_content); |
2010 | 0 | break; |
2011 | 0 |
|
2012 | 0 | case MSR_IA32_LASTBRANCHTOIP: |
2013 | 0 | vmcb_set_lastbranchtoip(vmcb, msr_content); |
2014 | 0 | break; |
2015 | 0 |
|
2016 | 0 | case MSR_IA32_LASTINTFROMIP: |
2017 | 0 | vmcb_set_lastintfromip(vmcb, msr_content); |
2018 | 0 | break; |
2019 | 0 |
|
2020 | 0 | case MSR_IA32_LASTINTTOIP: |
2021 | 0 | vmcb_set_lastinttoip(vmcb, msr_content); |
2022 | 0 | break; |
2023 | 0 |
|
2024 | 0 | case MSR_AMD64_LWP_CFG: |
2025 | 0 | if ( svm_update_lwp_cfg(v, msr_content) < 0 ) |
2026 | 0 | goto gpf; |
2027 | 0 | break; |
2028 | 0 |
|
2029 | 0 | case MSR_K7_PERFCTR0: |
2030 | 0 | case MSR_K7_PERFCTR1: |
2031 | 0 | case MSR_K7_PERFCTR2: |
2032 | 0 | case MSR_K7_PERFCTR3: |
2033 | 0 | case MSR_K7_EVNTSEL0: |
2034 | 0 | case MSR_K7_EVNTSEL1: |
2035 | 0 | case MSR_K7_EVNTSEL2: |
2036 | 0 | case MSR_K7_EVNTSEL3: |
2037 | 0 | case MSR_AMD_FAM15H_PERFCTR0: |
2038 | 0 | case MSR_AMD_FAM15H_PERFCTR1: |
2039 | 0 | case MSR_AMD_FAM15H_PERFCTR2: |
2040 | 0 | case MSR_AMD_FAM15H_PERFCTR3: |
2041 | 0 | case MSR_AMD_FAM15H_PERFCTR4: |
2042 | 0 | case MSR_AMD_FAM15H_PERFCTR5: |
2043 | 0 | case MSR_AMD_FAM15H_EVNTSEL0: |
2044 | 0 | case MSR_AMD_FAM15H_EVNTSEL1: |
2045 | 0 | case MSR_AMD_FAM15H_EVNTSEL2: |
2046 | 0 | case MSR_AMD_FAM15H_EVNTSEL3: |
2047 | 0 | case MSR_AMD_FAM15H_EVNTSEL4: |
2048 | 0 | case MSR_AMD_FAM15H_EVNTSEL5: |
2049 | 0 | if ( vpmu_do_wrmsr(msr, msr_content, 0) ) |
2050 | 0 | goto gpf; |
2051 | 0 | break; |
2052 | 0 |
|
2053 | 0 | case MSR_IA32_MCx_MISC(4): /* Threshold register */ |
2054 | 0 | case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: |
2055 | 0 | /* |
2056 | 0 | * MCA/MCE: Threshold register is reported to be locked, so we ignore |
2057 | 0 | * all write accesses. This behaviour matches real HW, so guests should |
2058 | 0 | * have no problem with this. |
2059 | 0 | */ |
2060 | 0 | break; |
2061 | 0 |
|
2062 | 0 | case MSR_AMD64_DR0_ADDRESS_MASK: |
2063 | 0 | if ( !v->domain->arch.cpuid->extd.dbext || (msr_content >> 32) ) |
2064 | 0 | goto gpf; |
2065 | 0 | v->arch.hvm_svm.dr_mask[0] = msr_content; |
2066 | 0 | break; |
2067 | 0 |
|
2068 | 0 | case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK: |
2069 | 0 | if ( !v->domain->arch.cpuid->extd.dbext || (msr_content >> 32) ) |
2070 | 0 | goto gpf; |
2071 | 0 | v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1] = |
2072 | 0 | msr_content; |
2073 | 0 | break; |
2074 | 0 |
|
2075 | 0 | case MSR_AMD_OSVW_ID_LENGTH: |
2076 | 0 | case MSR_AMD_OSVW_STATUS: |
2077 | 0 | ret = svm_handle_osvw(v, msr, &msr_content, 0); |
2078 | 0 | if ( ret < 0 ) |
2079 | 0 | goto gpf; |
2080 | 0 | break; |
2081 | 0 |
|
2082 | 0 | default: |
2083 | 0 | ret = nsvm_wrmsr(v, msr, msr_content); |
2084 | 0 | if ( ret < 0 ) |
2085 | 0 | goto gpf; |
2086 | 0 | else if ( ret ) |
2087 | 0 | break; |
2088 | 0 |
|
2089 | 0 | if ( wrmsr_viridian_regs(msr, msr_content) ) |
2090 | 0 | break; |
2091 | 0 |
|
2092 | 0 | switch ( wrmsr_hypervisor_regs(msr, msr_content) ) |
2093 | 0 | { |
2094 | 0 | case -ERESTART: |
2095 | 0 | result = X86EMUL_RETRY; |
2096 | 0 | break; |
2097 | 0 | case 0: |
2098 | 0 | case 1: |
2099 | 0 | break; |
2100 | 0 | default: |
2101 | 0 | goto gpf; |
2102 | 0 | } |
2103 | 0 | break; |
2104 | 0 | } |
2105 | 0 |
|
2106 | 0 | if ( sync ) |
2107 | 0 | svm_vmload(vmcb); |
2108 | 0 |
|
2109 | 0 | return result; |
2110 | 0 |
|
2111 | 0 | gpf: |
2112 | 0 | return X86EMUL_EXCEPTION; |
2113 | 0 | } |
2114 | | |
2115 | | static void svm_do_msr_access(struct cpu_user_regs *regs) |
2116 | 0 | { |
2117 | 0 | struct vcpu *curr = current; |
2118 | 0 | bool rdmsr = curr->arch.hvm_svm.vmcb->exitinfo1 == 0; |
2119 | 0 | int rc, inst_len = __get_instruction_length( |
2120 | 0 | curr, rdmsr ? INSTR_RDMSR : INSTR_WRMSR); |
2121 | 0 |
|
2122 | 0 | if ( inst_len == 0 ) |
2123 | 0 | return; |
2124 | 0 |
|
2125 | 0 | if ( rdmsr ) |
2126 | 0 | { |
2127 | 0 | uint64_t msr_content = 0; |
2128 | 0 |
|
2129 | 0 | rc = hvm_msr_read_intercept(regs->ecx, &msr_content); |
2130 | 0 | if ( rc == X86EMUL_OKAY ) |
2131 | 0 | msr_split(regs, msr_content); |
2132 | 0 | } |
2133 | 0 | else |
2134 | 0 | rc = hvm_msr_write_intercept(regs->ecx, msr_fold(regs), 1); |
2135 | 0 |
|
2136 | 0 | if ( rc == X86EMUL_OKAY ) |
2137 | 0 | __update_guest_eip(regs, inst_len); |
2138 | 0 | else if ( rc == X86EMUL_EXCEPTION ) |
2139 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2140 | 0 | } |
2141 | | |
2142 | | static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb, |
2143 | | struct cpu_user_regs *regs) |
2144 | 0 | { |
2145 | 0 | unsigned int inst_len; |
2146 | 0 |
|
2147 | 0 | if ( (inst_len = __get_instruction_length(current, INSTR_HLT)) == 0 ) |
2148 | 0 | return; |
2149 | 0 | __update_guest_eip(regs, inst_len); |
2150 | 0 |
|
2151 | 0 | hvm_hlt(regs->eflags); |
2152 | 0 | } |
2153 | | |
2154 | | static void svm_vmexit_do_rdtsc(struct cpu_user_regs *regs) |
2155 | 0 | { |
2156 | 0 | unsigned int inst_len; |
2157 | 0 |
|
2158 | 0 | if ( (inst_len = __get_instruction_length(current, INSTR_RDTSC)) == 0 ) |
2159 | 0 | return; |
2160 | 0 | __update_guest_eip(regs, inst_len); |
2161 | 0 |
|
2162 | 0 | hvm_rdtsc_intercept(regs); |
2163 | 0 | } |
2164 | | |
2165 | | static void svm_vmexit_do_pause(struct cpu_user_regs *regs) |
2166 | 0 | { |
2167 | 0 | unsigned int inst_len; |
2168 | 0 |
|
2169 | 0 | if ( (inst_len = __get_instruction_length(current, INSTR_PAUSE)) == 0 ) |
2170 | 0 | return; |
2171 | 0 | __update_guest_eip(regs, inst_len); |
2172 | 0 |
|
2173 | 0 | /* |
2174 | 0 | * The guest is running a contended spinlock and we've detected it. |
2175 | 0 | * Do something useful, like reschedule the guest |
2176 | 0 | */ |
2177 | 0 | perfc_incr(pauseloop_exits); |
2178 | 0 | do_sched_op(SCHEDOP_yield, guest_handle_from_ptr(NULL, void)); |
2179 | 0 | } |
2180 | | |
2181 | | static void |
2182 | | svm_vmexit_do_vmrun(struct cpu_user_regs *regs, |
2183 | | struct vcpu *v, uint64_t vmcbaddr) |
2184 | 0 | { |
2185 | 0 | if ( !nsvm_efer_svm_enabled(v) ) |
2186 | 0 | { |
2187 | 0 | gdprintk(XENLOG_ERR, "VMRUN: nestedhvm disabled, injecting #UD\n"); |
2188 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
2189 | 0 | return; |
2190 | 0 | } |
2191 | 0 |
|
2192 | 0 | if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) |
2193 | 0 | { |
2194 | 0 | gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #GP\n"); |
2195 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2196 | 0 | return; |
2197 | 0 | } |
2198 | 0 |
|
2199 | 0 | vcpu_nestedhvm(v).nv_vmentry_pending = 1; |
2200 | 0 | return; |
2201 | 0 | } |
2202 | | |
2203 | | static struct page_info * |
2204 | | nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr) |
2205 | 0 | { |
2206 | 0 | p2m_type_t p2mt; |
2207 | 0 | struct page_info *page; |
2208 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
2209 | 0 |
|
2210 | 0 | if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) |
2211 | 0 | return NULL; |
2212 | 0 |
|
2213 | 0 | /* Need to translate L1-GPA to MPA */ |
2214 | 0 | page = get_page_from_gfn(v->domain, |
2215 | 0 | nv->nv_vvmcxaddr >> PAGE_SHIFT, |
2216 | 0 | &p2mt, P2M_ALLOC | P2M_UNSHARE); |
2217 | 0 | if ( !page ) |
2218 | 0 | return NULL; |
2219 | 0 |
|
2220 | 0 | if ( !p2m_is_ram(p2mt) || p2m_is_readonly(p2mt) ) |
2221 | 0 | { |
2222 | 0 | put_page(page); |
2223 | 0 | return NULL; |
2224 | 0 | } |
2225 | 0 |
|
2226 | 0 | return page; |
2227 | 0 | } |
2228 | | |
2229 | | static void |
2230 | | svm_vmexit_do_vmload(struct vmcb_struct *vmcb, |
2231 | | struct cpu_user_regs *regs, |
2232 | | struct vcpu *v, uint64_t vmcbaddr) |
2233 | 0 | { |
2234 | 0 | unsigned int inst_len; |
2235 | 0 | struct page_info *page; |
2236 | 0 |
|
2237 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 ) |
2238 | 0 | return; |
2239 | 0 |
|
2240 | 0 | if ( !nsvm_efer_svm_enabled(v) ) |
2241 | 0 | { |
2242 | 0 | gdprintk(XENLOG_ERR, "VMLOAD: nestedhvm disabled, injecting #UD\n"); |
2243 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
2244 | 0 | return; |
2245 | 0 | } |
2246 | 0 |
|
2247 | 0 | page = nsvm_get_nvmcb_page(v, vmcbaddr); |
2248 | 0 | if ( !page ) |
2249 | 0 | { |
2250 | 0 | gdprintk(XENLOG_ERR, |
2251 | 0 | "VMLOAD: mapping failed, injecting #GP\n"); |
2252 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2253 | 0 | return; |
2254 | 0 | } |
2255 | 0 |
|
2256 | 0 | svm_vmload_pa(page_to_maddr(page)); |
2257 | 0 | put_page(page); |
2258 | 0 |
|
2259 | 0 | /* State in L1 VMCB is stale now */ |
2260 | 0 | v->arch.hvm_svm.vmcb_in_sync = 0; |
2261 | 0 |
|
2262 | 0 | __update_guest_eip(regs, inst_len); |
2263 | 0 | } |
2264 | | |
2265 | | static void |
2266 | | svm_vmexit_do_vmsave(struct vmcb_struct *vmcb, |
2267 | | struct cpu_user_regs *regs, |
2268 | | struct vcpu *v, uint64_t vmcbaddr) |
2269 | 0 | { |
2270 | 0 | unsigned int inst_len; |
2271 | 0 | struct page_info *page; |
2272 | 0 |
|
2273 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 ) |
2274 | 0 | return; |
2275 | 0 |
|
2276 | 0 | if ( !nsvm_efer_svm_enabled(v) ) |
2277 | 0 | { |
2278 | 0 | gdprintk(XENLOG_ERR, "VMSAVE: nestedhvm disabled, injecting #UD\n"); |
2279 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
2280 | 0 | return; |
2281 | 0 | } |
2282 | 0 |
|
2283 | 0 | page = nsvm_get_nvmcb_page(v, vmcbaddr); |
2284 | 0 | if ( !page ) |
2285 | 0 | { |
2286 | 0 | gdprintk(XENLOG_ERR, |
2287 | 0 | "VMSAVE: mapping vmcb failed, injecting #GP\n"); |
2288 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2289 | 0 | return; |
2290 | 0 | } |
2291 | 0 |
|
2292 | 0 | svm_vmsave_pa(page_to_maddr(page)); |
2293 | 0 | put_page(page); |
2294 | 0 | __update_guest_eip(regs, inst_len); |
2295 | 0 | } |
2296 | | |
2297 | | static int svm_is_erratum_383(struct cpu_user_regs *regs) |
2298 | 0 | { |
2299 | 0 | uint64_t msr_content; |
2300 | 0 | uint32_t i; |
2301 | 0 | struct vcpu *v = current; |
2302 | 0 |
|
2303 | 0 | if ( !amd_erratum383_found ) |
2304 | 0 | return 0; |
2305 | 0 |
|
2306 | 0 | rdmsrl(MSR_IA32_MC0_STATUS, msr_content); |
2307 | 0 | /* Bit 62 may or may not be set for this mce */ |
2308 | 0 | msr_content &= ~(1ULL << 62); |
2309 | 0 |
|
2310 | 0 | if ( msr_content != 0xb600000000010015ULL ) |
2311 | 0 | return 0; |
2312 | 0 | |
2313 | 0 | /* Clear MCi_STATUS registers */ |
2314 | 0 | for (i = 0; i < nr_mce_banks; i++) |
2315 | 0 | wrmsrl(MSR_IA32_MCx_STATUS(i), 0ULL); |
2316 | 0 | |
2317 | 0 | rdmsrl(MSR_IA32_MCG_STATUS, msr_content); |
2318 | 0 | wrmsrl(MSR_IA32_MCG_STATUS, msr_content & ~(1ULL << 2)); |
2319 | 0 |
|
2320 | 0 | /* flush TLB */ |
2321 | 0 | flush_tlb_mask(v->domain->domain_dirty_cpumask); |
2322 | 0 |
|
2323 | 0 | return 1; |
2324 | 0 | } |
2325 | | |
2326 | | static void svm_vmexit_mce_intercept( |
2327 | | struct vcpu *v, struct cpu_user_regs *regs) |
2328 | 0 | { |
2329 | 0 | if ( svm_is_erratum_383(regs) ) |
2330 | 0 | { |
2331 | 0 | gdprintk(XENLOG_ERR, "SVM hits AMD erratum 383\n"); |
2332 | 0 | domain_crash(v->domain); |
2333 | 0 | } |
2334 | 0 | } |
2335 | | |
2336 | | static void svm_wbinvd_intercept(void) |
2337 | 0 | { |
2338 | 0 | if ( cache_flush_permitted(current->domain) ) |
2339 | 0 | flush_all(FLUSH_CACHE); |
2340 | 0 | } |
2341 | | |
2342 | | static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs) |
2343 | 0 | { |
2344 | 0 | static const enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD }; |
2345 | 0 | int inst_len; |
2346 | 0 |
|
2347 | 0 | inst_len = __get_instruction_length_from_list( |
2348 | 0 | current, list, ARRAY_SIZE(list)); |
2349 | 0 | if ( inst_len == 0 ) |
2350 | 0 | return; |
2351 | 0 |
|
2352 | 0 | svm_wbinvd_intercept(); |
2353 | 0 |
|
2354 | 0 | __update_guest_eip(regs, inst_len); |
2355 | 0 | } |
2356 | | |
2357 | | static void svm_invlpga_intercept( |
2358 | | struct vcpu *v, unsigned long vaddr, uint32_t asid) |
2359 | 0 | { |
2360 | 0 | svm_invlpga(vaddr, |
2361 | 0 | (asid == 0) |
2362 | 0 | ? v->arch.hvm_vcpu.n1asid.asid |
2363 | 0 | : vcpu_nestedhvm(v).nv_n2asid.asid); |
2364 | 0 | } |
2365 | | |
2366 | | static void svm_invlpg_intercept(unsigned long vaddr) |
2367 | 0 | { |
2368 | 0 | HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr)); |
2369 | 0 | paging_invlpg(current, vaddr); |
2370 | 0 | } |
2371 | | |
2372 | | static bool is_invlpg(const struct x86_emulate_state *state, |
2373 | | const struct x86_emulate_ctxt *ctxt) |
2374 | 0 | { |
2375 | 0 | unsigned int ext; |
2376 | 0 |
|
2377 | 0 | return ctxt->opcode == X86EMUL_OPC(0x0f, 0x01) && |
2378 | 0 | x86_insn_modrm(state, NULL, &ext) != 3 && |
2379 | 0 | (ext & 7) == 7; |
2380 | 0 | } |
2381 | | |
2382 | | static void svm_invlpg(struct vcpu *v, unsigned long vaddr) |
2383 | 0 | { |
2384 | 0 | svm_asid_g_invlpg(v, vaddr); |
2385 | 0 | } |
2386 | | |
2387 | | static bool svm_get_pending_event(struct vcpu *v, struct x86_event *info) |
2388 | 0 | { |
2389 | 0 | const struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
2390 | 0 |
|
2391 | 0 | if ( vmcb->eventinj.fields.v ) |
2392 | 0 | return false; |
2393 | 0 |
|
2394 | 0 | info->vector = vmcb->eventinj.fields.vector; |
2395 | 0 | info->type = vmcb->eventinj.fields.type; |
2396 | 0 | info->error_code = vmcb->eventinj.fields.errorcode; |
2397 | 0 |
|
2398 | 0 | return true; |
2399 | 0 | } |
2400 | | |
2401 | | static struct hvm_function_table __initdata svm_function_table = { |
2402 | | .name = "SVM", |
2403 | | .cpu_up_prepare = svm_cpu_up_prepare, |
2404 | | .cpu_dead = svm_cpu_dead, |
2405 | | .cpu_up = svm_cpu_up, |
2406 | | .cpu_down = svm_cpu_down, |
2407 | | .domain_initialise = svm_domain_initialise, |
2408 | | .domain_destroy = svm_domain_destroy, |
2409 | | .vcpu_initialise = svm_vcpu_initialise, |
2410 | | .vcpu_destroy = svm_vcpu_destroy, |
2411 | | .save_cpu_ctxt = svm_save_vmcb_ctxt, |
2412 | | .load_cpu_ctxt = svm_load_vmcb_ctxt, |
2413 | | .init_msr = svm_init_msr, |
2414 | | .save_msr = svm_save_msr, |
2415 | | .load_msr = svm_load_msr, |
2416 | | .get_interrupt_shadow = svm_get_interrupt_shadow, |
2417 | | .set_interrupt_shadow = svm_set_interrupt_shadow, |
2418 | | .guest_x86_mode = svm_guest_x86_mode, |
2419 | | .get_cpl = svm_get_cpl, |
2420 | | .get_segment_register = svm_get_segment_register, |
2421 | | .set_segment_register = svm_set_segment_register, |
2422 | | .get_shadow_gs_base = svm_get_shadow_gs_base, |
2423 | | .update_guest_cr = svm_update_guest_cr, |
2424 | | .update_guest_efer = svm_update_guest_efer, |
2425 | | .update_guest_vendor = svm_update_guest_vendor, |
2426 | | .fpu_leave = svm_fpu_leave, |
2427 | | .set_guest_pat = svm_set_guest_pat, |
2428 | | .get_guest_pat = svm_get_guest_pat, |
2429 | | .set_tsc_offset = svm_set_tsc_offset, |
2430 | | .inject_event = svm_inject_event, |
2431 | | .init_hypercall_page = svm_init_hypercall_page, |
2432 | | .event_pending = svm_event_pending, |
2433 | | .get_pending_event = svm_get_pending_event, |
2434 | | .invlpg = svm_invlpg, |
2435 | | .wbinvd_intercept = svm_wbinvd_intercept, |
2436 | | .fpu_dirty_intercept = svm_fpu_dirty_intercept, |
2437 | | .msr_read_intercept = svm_msr_read_intercept, |
2438 | | .msr_write_intercept = svm_msr_write_intercept, |
2439 | | .set_rdtsc_exiting = svm_set_rdtsc_exiting, |
2440 | | .set_descriptor_access_exiting = svm_set_descriptor_access_exiting, |
2441 | | .get_insn_bytes = svm_get_insn_bytes, |
2442 | | |
2443 | | .nhvm_vcpu_initialise = nsvm_vcpu_initialise, |
2444 | | .nhvm_vcpu_destroy = nsvm_vcpu_destroy, |
2445 | | .nhvm_vcpu_reset = nsvm_vcpu_reset, |
2446 | | .nhvm_vcpu_vmexit_event = nsvm_vcpu_vmexit_event, |
2447 | | .nhvm_vcpu_p2m_base = nsvm_vcpu_hostcr3, |
2448 | | .nhvm_vmcx_guest_intercepts_event = nsvm_vmcb_guest_intercepts_event, |
2449 | | .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled, |
2450 | | .nhvm_intr_blocked = nsvm_intr_blocked, |
2451 | | .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m, |
2452 | | |
2453 | | .tsc_scaling = { |
2454 | | .max_ratio = ~TSC_RATIO_RSVD_BITS, |
2455 | | }, |
2456 | | }; |
2457 | | |
2458 | | void svm_vmexit_handler(struct cpu_user_regs *regs) |
2459 | 0 | { |
2460 | 0 | uint64_t exit_reason; |
2461 | 0 | struct vcpu *v = current; |
2462 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
2463 | 0 | eventinj_t eventinj; |
2464 | 0 | int inst_len, rc; |
2465 | 0 | vintr_t intr; |
2466 | 0 | bool_t vcpu_guestmode = 0; |
2467 | 0 | struct vlapic *vlapic = vcpu_vlapic(v); |
2468 | 0 |
|
2469 | 0 | hvm_invalidate_regs_fields(regs); |
2470 | 0 |
|
2471 | 0 | if ( paging_mode_hap(v->domain) ) |
2472 | 0 | v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = |
2473 | 0 | vmcb_get_cr3(vmcb); |
2474 | 0 |
|
2475 | 0 | if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) ) |
2476 | 0 | vcpu_guestmode = 1; |
2477 | 0 |
|
2478 | 0 | /* |
2479 | 0 | * Before doing anything else, we need to sync up the VLAPIC's TPR with |
2480 | 0 | * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows) |
2481 | 0 | * because we update the vTPR on MMIO writes to the TPR. |
2482 | 0 | * NB. We need to preserve the low bits of the TPR to make checked builds |
2483 | 0 | * of Windows work, even though they don't actually do anything. |
2484 | 0 | */ |
2485 | 0 | if ( !vcpu_guestmode && !vlapic_hw_disabled(vlapic) ) |
2486 | 0 | { |
2487 | 0 | intr = vmcb_get_vintr(vmcb); |
2488 | 0 | vlapic_set_reg(vlapic, APIC_TASKPRI, |
2489 | 0 | ((intr.fields.tpr & 0x0F) << 4) | |
2490 | 0 | (vlapic_get_reg(vlapic, APIC_TASKPRI) & 0x0F)); |
2491 | 0 | } |
2492 | 0 |
|
2493 | 0 | exit_reason = vmcb->exitcode; |
2494 | 0 |
|
2495 | 0 | if ( hvm_long_mode_active(v) ) |
2496 | 0 | HVMTRACE_ND(VMEXIT64, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0, |
2497 | 0 | 1/*cycles*/, 3, exit_reason, |
2498 | 0 | regs->eip, regs->rip >> 32, 0, 0, 0); |
2499 | 0 | else |
2500 | 0 | HVMTRACE_ND(VMEXIT, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0, |
2501 | 0 | 1/*cycles*/, 2, exit_reason, |
2502 | 0 | regs->eip, 0, 0, 0, 0); |
2503 | 0 |
|
2504 | 0 | if ( vcpu_guestmode ) { |
2505 | 0 | enum nestedhvm_vmexits nsret; |
2506 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
2507 | 0 | struct vmcb_struct *ns_vmcb = nv->nv_vvmcx; |
2508 | 0 | uint64_t exitinfo1, exitinfo2; |
2509 | 0 |
|
2510 | 0 | paging_update_nestedmode(v); |
2511 | 0 |
|
2512 | 0 | /* Write real exitinfo1 back into virtual vmcb. |
2513 | 0 | * nestedsvm_check_intercepts() expects to have the correct |
2514 | 0 | * exitinfo1 value there. |
2515 | 0 | */ |
2516 | 0 | exitinfo1 = ns_vmcb->exitinfo1; |
2517 | 0 | ns_vmcb->exitinfo1 = vmcb->exitinfo1; |
2518 | 0 | nsret = nestedsvm_check_intercepts(v, regs, exit_reason); |
2519 | 0 | switch (nsret) { |
2520 | 0 | case NESTEDHVM_VMEXIT_CONTINUE: |
2521 | 0 | BUG(); |
2522 | 0 | break; |
2523 | 0 | case NESTEDHVM_VMEXIT_HOST: |
2524 | 0 | break; |
2525 | 0 | case NESTEDHVM_VMEXIT_INJECT: |
2526 | 0 | /* Switch vcpu from l2 to l1 guest. We must perform |
2527 | 0 | * the switch here to have svm_do_resume() working |
2528 | 0 | * as intended. |
2529 | 0 | */ |
2530 | 0 | exitinfo1 = vmcb->exitinfo1; |
2531 | 0 | exitinfo2 = vmcb->exitinfo2; |
2532 | 0 | nv->nv_vmswitch_in_progress = 1; |
2533 | 0 | nsret = nestedsvm_vmexit_n2n1(v, regs); |
2534 | 0 | nv->nv_vmswitch_in_progress = 0; |
2535 | 0 | switch (nsret) { |
2536 | 0 | case NESTEDHVM_VMEXIT_DONE: |
2537 | 0 | /* defer VMEXIT injection */ |
2538 | 0 | nestedsvm_vmexit_defer(v, exit_reason, exitinfo1, exitinfo2); |
2539 | 0 | goto out; |
2540 | 0 | case NESTEDHVM_VMEXIT_FATALERROR: |
2541 | 0 | gdprintk(XENLOG_ERR, "unexpected nestedsvm_vmexit() error\n"); |
2542 | 0 | domain_crash(v->domain); |
2543 | 0 | goto out; |
2544 | 0 | default: |
2545 | 0 | BUG(); |
2546 | 0 | case NESTEDHVM_VMEXIT_ERROR: |
2547 | 0 | break; |
2548 | 0 | } |
2549 | 0 | /* fallthrough */ |
2550 | 0 | case NESTEDHVM_VMEXIT_ERROR: |
2551 | 0 | gdprintk(XENLOG_ERR, |
2552 | 0 | "nestedsvm_check_intercepts() returned NESTEDHVM_VMEXIT_ERROR\n"); |
2553 | 0 | goto out; |
2554 | 0 | case NESTEDHVM_VMEXIT_FATALERROR: |
2555 | 0 | gdprintk(XENLOG_ERR, |
2556 | 0 | "unexpected nestedsvm_check_intercepts() error\n"); |
2557 | 0 | domain_crash(v->domain); |
2558 | 0 | goto out; |
2559 | 0 | default: |
2560 | 0 | gdprintk(XENLOG_INFO, "nestedsvm_check_intercepts() returned %i\n", |
2561 | 0 | nsret); |
2562 | 0 | domain_crash(v->domain); |
2563 | 0 | goto out; |
2564 | 0 | } |
2565 | 0 | } |
2566 | 0 |
|
2567 | 0 | if ( unlikely(exit_reason == VMEXIT_INVALID) ) |
2568 | 0 | { |
2569 | 0 | gdprintk(XENLOG_ERR, "invalid VMCB state:\n"); |
2570 | 0 | svm_vmcb_dump(__func__, vmcb); |
2571 | 0 | domain_crash(v->domain); |
2572 | 0 | goto out; |
2573 | 0 | } |
2574 | 0 |
|
2575 | 0 | perfc_incra(svmexits, exit_reason); |
2576 | 0 |
|
2577 | 0 | hvm_maybe_deassert_evtchn_irq(); |
2578 | 0 |
|
2579 | 0 | vmcb->cleanbits.bytes = cpu_has_svm_cleanbits ? ~0u : 0u; |
2580 | 0 |
|
2581 | 0 | /* Event delivery caused this intercept? Queue for redelivery. */ |
2582 | 0 | eventinj = vmcb->exitintinfo; |
2583 | 0 | if ( unlikely(eventinj.fields.v) && |
2584 | 0 | hvm_event_needs_reinjection(eventinj.fields.type, |
2585 | 0 | eventinj.fields.vector) ) |
2586 | 0 | vmcb->eventinj = eventinj; |
2587 | 0 |
|
2588 | 0 | switch ( exit_reason ) |
2589 | 0 | { |
2590 | 0 | case VMEXIT_INTR: |
2591 | 0 | /* Asynchronous event, handled when we STGI'd after the VMEXIT. */ |
2592 | 0 | HVMTRACE_0D(INTR); |
2593 | 0 | break; |
2594 | 0 |
|
2595 | 0 | case VMEXIT_NMI: |
2596 | 0 | /* Asynchronous event, handled when we STGI'd after the VMEXIT. */ |
2597 | 0 | HVMTRACE_0D(NMI); |
2598 | 0 | break; |
2599 | 0 |
|
2600 | 0 | case VMEXIT_SMI: |
2601 | 0 | /* Asynchronous event, handled when we STGI'd after the VMEXIT. */ |
2602 | 0 | HVMTRACE_0D(SMI); |
2603 | 0 | break; |
2604 | 0 |
|
2605 | 0 | case VMEXIT_EXCEPTION_DB: |
2606 | 0 | if ( !v->domain->debugger_attached ) |
2607 | 0 | hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); |
2608 | 0 | else |
2609 | 0 | domain_pause_for_debugger(); |
2610 | 0 | break; |
2611 | 0 |
|
2612 | 0 | case VMEXIT_EXCEPTION_BP: |
2613 | 0 | if ( !v->domain->debugger_attached ) |
2614 | 0 | goto unexpected_exit_type; |
2615 | 0 | /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */ |
2616 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 ) |
2617 | 0 | break; |
2618 | 0 | __update_guest_eip(regs, inst_len); |
2619 | 0 | current->arch.gdbsx_vcpu_event = TRAP_int3; |
2620 | 0 | domain_pause_for_debugger(); |
2621 | 0 | break; |
2622 | 0 |
|
2623 | 0 | case VMEXIT_EXCEPTION_NM: |
2624 | 0 | svm_fpu_dirty_intercept(); |
2625 | 0 | break; |
2626 | 0 |
|
2627 | 0 | case VMEXIT_EXCEPTION_PF: { |
2628 | 0 | unsigned long va; |
2629 | 0 | va = vmcb->exitinfo2; |
2630 | 0 | regs->error_code = vmcb->exitinfo1; |
2631 | 0 | HVM_DBG_LOG(DBG_LEVEL_VMMU, |
2632 | 0 | "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", |
2633 | 0 | regs->rax, regs->rbx, regs->rcx, |
2634 | 0 | regs->rdx, regs->rsi, regs->rdi); |
2635 | 0 |
|
2636 | 0 | if ( cpu_has_svm_decode ) |
2637 | 0 | v->arch.hvm_svm.cached_insn_len = vmcb->guest_ins_len & 0xf; |
2638 | 0 | rc = paging_fault(va, regs); |
2639 | 0 | v->arch.hvm_svm.cached_insn_len = 0; |
2640 | 0 |
|
2641 | 0 | if ( rc ) |
2642 | 0 | { |
2643 | 0 | if ( trace_will_trace_event(TRC_SHADOW) ) |
2644 | 0 | break; |
2645 | 0 | if ( hvm_long_mode_active(v) ) |
2646 | 0 | HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va)); |
2647 | 0 | else |
2648 | 0 | HVMTRACE_2D(PF_XEN, regs->error_code, va); |
2649 | 0 | break; |
2650 | 0 | } |
2651 | 0 |
|
2652 | 0 | hvm_inject_page_fault(regs->error_code, va); |
2653 | 0 | break; |
2654 | 0 | } |
2655 | 0 |
|
2656 | 0 | case VMEXIT_EXCEPTION_AC: |
2657 | 0 | HVMTRACE_1D(TRAP, TRAP_alignment_check); |
2658 | 0 | hvm_inject_hw_exception(TRAP_alignment_check, vmcb->exitinfo1); |
2659 | 0 | break; |
2660 | 0 |
|
2661 | 0 | case VMEXIT_EXCEPTION_UD: |
2662 | 0 | hvm_ud_intercept(regs); |
2663 | 0 | break; |
2664 | 0 |
|
2665 | 0 | /* Asynchronous event, handled when we STGI'd after the VMEXIT. */ |
2666 | 0 | case VMEXIT_EXCEPTION_MC: |
2667 | 0 | HVMTRACE_0D(MCE); |
2668 | 0 | svm_vmexit_mce_intercept(v, regs); |
2669 | 0 | break; |
2670 | 0 |
|
2671 | 0 | case VMEXIT_VINTR: { |
2672 | 0 | u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb); |
2673 | 0 | intr = vmcb_get_vintr(vmcb); |
2674 | 0 |
|
2675 | 0 | intr.fields.irq = 0; |
2676 | 0 | general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR; |
2677 | 0 |
|
2678 | 0 | vmcb_set_vintr(vmcb, intr); |
2679 | 0 | vmcb_set_general1_intercepts(vmcb, general1_intercepts); |
2680 | 0 | break; |
2681 | 0 | } |
2682 | 0 |
|
2683 | 0 | case VMEXIT_INVD: |
2684 | 0 | case VMEXIT_WBINVD: |
2685 | 0 | svm_vmexit_do_invalidate_cache(regs); |
2686 | 0 | break; |
2687 | 0 |
|
2688 | 0 | case VMEXIT_TASK_SWITCH: { |
2689 | 0 | enum hvm_task_switch_reason reason; |
2690 | 0 | int32_t errcode = -1; |
2691 | 0 | if ( (vmcb->exitinfo2 >> 36) & 1 ) |
2692 | 0 | reason = TSW_iret; |
2693 | 0 | else if ( (vmcb->exitinfo2 >> 38) & 1 ) |
2694 | 0 | reason = TSW_jmp; |
2695 | 0 | else |
2696 | 0 | reason = TSW_call_or_int; |
2697 | 0 | if ( (vmcb->exitinfo2 >> 44) & 1 ) |
2698 | 0 | errcode = (uint32_t)vmcb->exitinfo2; |
2699 | 0 |
|
2700 | 0 | /* |
2701 | 0 | * Some processors set the EXITINTINFO field when the task switch |
2702 | 0 | * is caused by a task gate in the IDT. In this case we will be |
2703 | 0 | * emulating the event injection, so we do not want the processor |
2704 | 0 | * to re-inject the original event! |
2705 | 0 | */ |
2706 | 0 | vmcb->eventinj.bytes = 0; |
2707 | 0 |
|
2708 | 0 | hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode); |
2709 | 0 | break; |
2710 | 0 | } |
2711 | 0 |
|
2712 | 0 | case VMEXIT_CPUID: |
2713 | 0 | svm_vmexit_do_cpuid(regs); |
2714 | 0 | break; |
2715 | 0 |
|
2716 | 0 | case VMEXIT_HLT: |
2717 | 0 | svm_vmexit_do_hlt(vmcb, regs); |
2718 | 0 | break; |
2719 | 0 |
|
2720 | 0 | case VMEXIT_IOIO: |
2721 | 0 | if ( (vmcb->exitinfo1 & (1u<<2)) == 0 ) |
2722 | 0 | { |
2723 | 0 | uint16_t port = (vmcb->exitinfo1 >> 16) & 0xFFFF; |
2724 | 0 | int bytes = ((vmcb->exitinfo1 >> 4) & 0x07); |
2725 | 0 | int dir = (vmcb->exitinfo1 & 1) ? IOREQ_READ : IOREQ_WRITE; |
2726 | 0 | if ( handle_pio(port, bytes, dir) ) |
2727 | 0 | __update_guest_eip(regs, vmcb->exitinfo2 - vmcb->rip); |
2728 | 0 | } |
2729 | 0 | else if ( !hvm_emulate_one_insn(x86_insn_is_portio, "port I/O") ) |
2730 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2731 | 0 | break; |
2732 | 0 |
|
2733 | 0 | case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: |
2734 | 0 | case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: |
2735 | 0 | if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) ) |
2736 | 0 | svm_vmexit_do_cr_access(vmcb, regs); |
2737 | 0 | else if ( !hvm_emulate_one_insn(x86_insn_is_cr_access, "CR access") ) |
2738 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2739 | 0 | break; |
2740 | 0 |
|
2741 | 0 | case VMEXIT_INVLPG: |
2742 | 0 | if ( cpu_has_svm_decode ) |
2743 | 0 | { |
2744 | 0 | svm_invlpg_intercept(vmcb->exitinfo1); |
2745 | 0 | __update_guest_eip(regs, vmcb->nextrip - vmcb->rip); |
2746 | 0 | } |
2747 | 0 | else if ( !hvm_emulate_one_insn(is_invlpg, "invlpg") ) |
2748 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2749 | 0 | break; |
2750 | 0 |
|
2751 | 0 | case VMEXIT_INVLPGA: |
2752 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_INVLPGA)) == 0 ) |
2753 | 0 | break; |
2754 | 0 | svm_invlpga_intercept(v, regs->rax, regs->ecx); |
2755 | 0 | __update_guest_eip(regs, inst_len); |
2756 | 0 | break; |
2757 | 0 |
|
2758 | 0 | case VMEXIT_VMMCALL: |
2759 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 ) |
2760 | 0 | break; |
2761 | 0 | BUG_ON(vcpu_guestmode); |
2762 | 0 | HVMTRACE_1D(VMMCALL, regs->eax); |
2763 | 0 |
|
2764 | 0 | if ( hvm_hypercall(regs) == HVM_HCALL_completed ) |
2765 | 0 | __update_guest_eip(regs, inst_len); |
2766 | 0 | break; |
2767 | 0 |
|
2768 | 0 | case VMEXIT_DR0_READ ... VMEXIT_DR7_READ: |
2769 | 0 | case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE: |
2770 | 0 | svm_dr_access(v, regs); |
2771 | 0 | break; |
2772 | 0 |
|
2773 | 0 | case VMEXIT_MSR: |
2774 | 0 | svm_do_msr_access(regs); |
2775 | 0 | break; |
2776 | 0 |
|
2777 | 0 | case VMEXIT_SHUTDOWN: |
2778 | 0 | hvm_triple_fault(); |
2779 | 0 | break; |
2780 | 0 |
|
2781 | 0 | case VMEXIT_RDTSCP: |
2782 | 0 | regs->rcx = hvm_msr_tsc_aux(v); |
2783 | 0 | /* fall through */ |
2784 | 0 | case VMEXIT_RDTSC: |
2785 | 0 | svm_vmexit_do_rdtsc(regs); |
2786 | 0 | break; |
2787 | 0 |
|
2788 | 0 | case VMEXIT_MONITOR: |
2789 | 0 | case VMEXIT_MWAIT: |
2790 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
2791 | 0 | break; |
2792 | 0 |
|
2793 | 0 | case VMEXIT_VMRUN: |
2794 | 0 | svm_vmexit_do_vmrun(regs, v, regs->rax); |
2795 | 0 | break; |
2796 | 0 | case VMEXIT_VMLOAD: |
2797 | 0 | svm_vmexit_do_vmload(vmcb, regs, v, regs->rax); |
2798 | 0 | break; |
2799 | 0 | case VMEXIT_VMSAVE: |
2800 | 0 | svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax); |
2801 | 0 | break; |
2802 | 0 | case VMEXIT_STGI: |
2803 | 0 | svm_vmexit_do_stgi(regs, v); |
2804 | 0 | break; |
2805 | 0 | case VMEXIT_CLGI: |
2806 | 0 | svm_vmexit_do_clgi(regs, v); |
2807 | 0 | break; |
2808 | 0 | case VMEXIT_SKINIT: |
2809 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
2810 | 0 | break; |
2811 | 0 |
|
2812 | 0 | case VMEXIT_XSETBV: |
2813 | 0 | if ( vmcb_get_cpl(vmcb) ) |
2814 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2815 | 0 | else if ( (inst_len = __get_instruction_length(v, INSTR_XSETBV)) && |
2816 | 0 | hvm_handle_xsetbv(regs->ecx, msr_fold(regs)) == 0 ) |
2817 | 0 | __update_guest_eip(regs, inst_len); |
2818 | 0 | break; |
2819 | 0 |
|
2820 | 0 | case VMEXIT_NPF: |
2821 | 0 | perfc_incra(svmexits, VMEXIT_NPF_PERFC); |
2822 | 0 | if ( cpu_has_svm_decode ) |
2823 | 0 | v->arch.hvm_svm.cached_insn_len = vmcb->guest_ins_len & 0xf; |
2824 | 0 | rc = vmcb->exitinfo1 & PFEC_page_present |
2825 | 0 | ? p2m_pt_handle_deferred_changes(vmcb->exitinfo2) : 0; |
2826 | 0 | if ( rc >= 0 ) |
2827 | 0 | svm_do_nested_pgfault(v, regs, vmcb->exitinfo1, vmcb->exitinfo2); |
2828 | 0 | else |
2829 | 0 | { |
2830 | 0 | printk(XENLOG_G_ERR |
2831 | 0 | "%pv: Error %d handling NPF (gpa=%08lx ec=%04lx)\n", |
2832 | 0 | v, rc, vmcb->exitinfo2, vmcb->exitinfo1); |
2833 | 0 | domain_crash(v->domain); |
2834 | 0 | } |
2835 | 0 | v->arch.hvm_svm.cached_insn_len = 0; |
2836 | 0 | break; |
2837 | 0 |
|
2838 | 0 | case VMEXIT_IRET: { |
2839 | 0 | u32 general1_intercepts = vmcb_get_general1_intercepts(vmcb); |
2840 | 0 |
|
2841 | 0 | /* |
2842 | 0 | * IRET clears the NMI mask. However because we clear the mask |
2843 | 0 | * /before/ executing IRET, we set the interrupt shadow to prevent |
2844 | 0 | * a pending NMI from being injected immediately. This will work |
2845 | 0 | * perfectly unless the IRET instruction faults: in that case we |
2846 | 0 | * may inject an NMI before the NMI handler's IRET instruction is |
2847 | 0 | * retired. |
2848 | 0 | */ |
2849 | 0 | general1_intercepts &= ~GENERAL1_INTERCEPT_IRET; |
2850 | 0 | vmcb->interrupt_shadow = 1; |
2851 | 0 |
|
2852 | 0 | vmcb_set_general1_intercepts(vmcb, general1_intercepts); |
2853 | 0 | break; |
2854 | 0 | } |
2855 | 0 |
|
2856 | 0 | case VMEXIT_PAUSE: |
2857 | 0 | svm_vmexit_do_pause(regs); |
2858 | 0 | break; |
2859 | 0 |
|
2860 | 0 | case VMEXIT_IDTR_READ: |
2861 | 0 | case VMEXIT_IDTR_WRITE: |
2862 | 0 | hvm_descriptor_access_intercept(vmcb->exitintinfo.bytes, 0, |
2863 | 0 | VM_EVENT_DESC_IDTR, exit_reason == VMEXIT_IDTR_WRITE); |
2864 | 0 | break; |
2865 | 0 |
|
2866 | 0 | case VMEXIT_GDTR_READ: |
2867 | 0 | case VMEXIT_GDTR_WRITE: |
2868 | 0 | hvm_descriptor_access_intercept(vmcb->exitintinfo.bytes, 0, |
2869 | 0 | VM_EVENT_DESC_GDTR, exit_reason == VMEXIT_GDTR_WRITE); |
2870 | 0 | break; |
2871 | 0 |
|
2872 | 0 | case VMEXIT_LDTR_READ: |
2873 | 0 | case VMEXIT_LDTR_WRITE: |
2874 | 0 | hvm_descriptor_access_intercept(vmcb->exitintinfo.bytes, 0, |
2875 | 0 | VM_EVENT_DESC_LDTR, exit_reason == VMEXIT_LDTR_WRITE); |
2876 | 0 | break; |
2877 | 0 |
|
2878 | 0 | case VMEXIT_TR_READ: |
2879 | 0 | case VMEXIT_TR_WRITE: |
2880 | 0 | hvm_descriptor_access_intercept(vmcb->exitintinfo.bytes, 0, |
2881 | 0 | VM_EVENT_DESC_TR, exit_reason == VMEXIT_TR_WRITE); |
2882 | 0 | break; |
2883 | 0 |
|
2884 | 0 | default: |
2885 | 0 | unexpected_exit_type: |
2886 | 0 | gprintk(XENLOG_ERR, "Unexpected vmexit: reason %#"PRIx64", " |
2887 | 0 | "exitinfo1 %#"PRIx64", exitinfo2 %#"PRIx64"\n", |
2888 | 0 | exit_reason, vmcb->exitinfo1, vmcb->exitinfo2); |
2889 | 0 | svm_crash_or_fault(v); |
2890 | 0 | break; |
2891 | 0 | } |
2892 | 0 |
|
2893 | 0 | out: |
2894 | 0 | if ( vcpu_guestmode || vlapic_hw_disabled(vlapic) ) |
2895 | 0 | return; |
2896 | 0 |
|
2897 | 0 | /* The exit may have updated the TPR: reflect this in the hardware vtpr */ |
2898 | 0 | intr = vmcb_get_vintr(vmcb); |
2899 | 0 | intr.fields.tpr = |
2900 | 0 | (vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xFF) >> 4; |
2901 | 0 | vmcb_set_vintr(vmcb, intr); |
2902 | 0 | } |
2903 | | |
2904 | | void svm_trace_vmentry(void) |
2905 | 0 | { |
2906 | 0 | struct vcpu *curr = current; |
2907 | 0 | HVMTRACE_ND(VMENTRY, |
2908 | 0 | nestedhvm_vcpu_in_guestmode(curr) ? TRC_HVM_NESTEDFLAG : 0, |
2909 | 0 | 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0); |
2910 | 0 | } |
2911 | | |
2912 | | /* |
2913 | | * Local variables: |
2914 | | * mode: C |
2915 | | * c-file-style: "BSD" |
2916 | | * c-basic-offset: 4 |
2917 | | * tab-width: 4 |
2918 | | * indent-tabs-mode: nil |
2919 | | * End: |
2920 | | */ |