/root/src/xen/xen/arch/x86/hvm/svm/nestedsvm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * nestedsvm.c: Nested Virtualization |
3 | | * Copyright (c) 2011, Advanced Micro Devices, Inc |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify it |
6 | | * under the terms and conditions of the GNU General Public License, |
7 | | * version 2, as published by the Free Software Foundation. |
8 | | * |
9 | | * This program is distributed in the hope it will be useful, but WITHOUT |
10 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
12 | | * more details. |
13 | | * |
14 | | * You should have received a copy of the GNU General Public License along with |
15 | | * this program; If not, see <http://www.gnu.org/licenses/>. |
16 | | * |
17 | | */ |
18 | | |
19 | | #include <asm/hvm/support.h> |
20 | | #include <asm/hvm/svm/emulate.h> |
21 | | #include <asm/hvm/svm/svm.h> |
22 | | #include <asm/hvm/svm/vmcb.h> |
23 | | #include <asm/hvm/nestedhvm.h> |
24 | | #include <asm/hvm/svm/nestedsvm.h> |
25 | | #include <asm/hvm/svm/svmdebug.h> |
26 | | #include <asm/paging.h> /* paging_mode_hap */ |
27 | | #include <asm/event.h> /* for local_event_delivery_(en|dis)able */ |
28 | | #include <asm/p2m.h> /* p2m_get_pagetable, p2m_get_nestedp2m */ |
29 | | |
30 | | |
31 | 0 | #define NSVM_ERROR_VVMCB 1 |
32 | 0 | #define NSVM_ERROR_VMENTRY 2 |
33 | | |
34 | | static void |
35 | | nestedsvm_vcpu_clgi(struct vcpu *v) |
36 | 0 | { |
37 | 0 | /* clear gif flag */ |
38 | 0 | vcpu_nestedsvm(v).ns_gif = 0; |
39 | 0 | local_event_delivery_disable(); /* mask events for PV drivers */ |
40 | 0 | } |
41 | | |
42 | | static void |
43 | | nestedsvm_vcpu_stgi(struct vcpu *v) |
44 | 0 | { |
45 | 0 | /* enable gif flag */ |
46 | 0 | vcpu_nestedsvm(v).ns_gif = 1; |
47 | 0 | local_event_delivery_enable(); /* unmask events for PV drivers */ |
48 | 0 | } |
49 | | |
50 | | static int |
51 | | nestedsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr) |
52 | 0 | { |
53 | 0 | /* Address must be 4k aligned */ |
54 | 0 | if ( (vmcxaddr & ~PAGE_MASK) != 0 ) |
55 | 0 | return 0; |
56 | 0 |
|
57 | 0 | /* Maximum valid physical address. |
58 | 0 | * See AMD BKDG for HSAVE_PA MSR. |
59 | 0 | */ |
60 | 0 | if ( vmcxaddr > 0xfd00000000ULL ) |
61 | 0 | return 0; |
62 | 0 |
|
63 | 0 | return 1; |
64 | 0 | } |
65 | | |
66 | | int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr) |
67 | 0 | { |
68 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
69 | 0 |
|
70 | 0 | if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) { |
71 | 0 | ASSERT(nv->nv_vvmcxaddr != INVALID_PADDR); |
72 | 0 | hvm_unmap_guest_frame(nv->nv_vvmcx, 1); |
73 | 0 | nv->nv_vvmcx = NULL; |
74 | 0 | nv->nv_vvmcxaddr = INVALID_PADDR; |
75 | 0 | } |
76 | 0 |
|
77 | 0 | if ( !nv->nv_vvmcx ) |
78 | 0 | { |
79 | 0 | bool_t writable; |
80 | 0 | void *vvmcx = hvm_map_guest_frame_rw(paddr_to_pfn(vmcbaddr), 1, |
81 | 0 | &writable); |
82 | 0 |
|
83 | 0 | if ( !vvmcx ) |
84 | 0 | return 0; |
85 | 0 | if ( !writable ) |
86 | 0 | { |
87 | 0 | hvm_unmap_guest_frame(vvmcx, 1); |
88 | 0 | return 0; |
89 | 0 | } |
90 | 0 | nv->nv_vvmcx = vvmcx; |
91 | 0 | nv->nv_vvmcxaddr = vmcbaddr; |
92 | 0 | } |
93 | 0 |
|
94 | 0 | return 1; |
95 | 0 | } |
96 | | |
97 | | /* Interface methods */ |
98 | | int nsvm_vcpu_initialise(struct vcpu *v) |
99 | 0 | { |
100 | 0 | void *msrpm; |
101 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
102 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
103 | 0 |
|
104 | 0 | msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0); |
105 | 0 | svm->ns_cached_msrpm = msrpm; |
106 | 0 | if (msrpm == NULL) |
107 | 0 | goto err; |
108 | 0 | memset(msrpm, 0x0, MSRPM_SIZE); |
109 | 0 |
|
110 | 0 | msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0); |
111 | 0 | svm->ns_merged_msrpm = msrpm; |
112 | 0 | if (msrpm == NULL) |
113 | 0 | goto err; |
114 | 0 | memset(msrpm, 0x0, MSRPM_SIZE); |
115 | 0 |
|
116 | 0 | nv->nv_n2vmcx = alloc_vmcb(); |
117 | 0 | if (nv->nv_n2vmcx == NULL) |
118 | 0 | goto err; |
119 | 0 | nv->nv_n2vmcx_pa = virt_to_maddr(nv->nv_n2vmcx); |
120 | 0 |
|
121 | 0 | return 0; |
122 | 0 |
|
123 | 0 | err: |
124 | 0 | nsvm_vcpu_destroy(v); |
125 | 0 | return -ENOMEM; |
126 | 0 | } |
127 | | |
128 | | void nsvm_vcpu_destroy(struct vcpu *v) |
129 | 0 | { |
130 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
131 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
132 | 0 |
|
133 | 0 | /* |
134 | 0 | * When destroying the vcpu, it may be running on behalf of l2 guest. |
135 | 0 | * Therefore we need to switch the VMCB pointer back to the l1 vmcb, |
136 | 0 | * in order to avoid double free of l2 vmcb and the possible memory leak |
137 | 0 | * of l1 vmcb page. |
138 | 0 | */ |
139 | 0 | if (nv->nv_n1vmcx) |
140 | 0 | v->arch.hvm_svm.vmcb = nv->nv_n1vmcx; |
141 | 0 |
|
142 | 0 | if (svm->ns_cached_msrpm) { |
143 | 0 | free_xenheap_pages(svm->ns_cached_msrpm, |
144 | 0 | get_order_from_bytes(MSRPM_SIZE)); |
145 | 0 | svm->ns_cached_msrpm = NULL; |
146 | 0 | } |
147 | 0 | if (svm->ns_merged_msrpm) { |
148 | 0 | free_xenheap_pages(svm->ns_merged_msrpm, |
149 | 0 | get_order_from_bytes(MSRPM_SIZE)); |
150 | 0 | svm->ns_merged_msrpm = NULL; |
151 | 0 | } |
152 | 0 | hvm_unmap_guest_frame(nv->nv_vvmcx, 1); |
153 | 0 | nv->nv_vvmcx = NULL; |
154 | 0 | if (nv->nv_n2vmcx) { |
155 | 0 | free_vmcb(nv->nv_n2vmcx); |
156 | 0 | nv->nv_n2vmcx = NULL; |
157 | 0 | nv->nv_n2vmcx_pa = INVALID_PADDR; |
158 | 0 | } |
159 | 0 | if (svm->ns_iomap) |
160 | 0 | svm->ns_iomap = NULL; |
161 | 0 | } |
162 | | |
163 | | int nsvm_vcpu_reset(struct vcpu *v) |
164 | 0 | { |
165 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
166 | 0 |
|
167 | 0 | svm->ns_msr_hsavepa = INVALID_PADDR; |
168 | 0 | svm->ns_ovvmcb_pa = INVALID_PADDR; |
169 | 0 |
|
170 | 0 | svm->ns_tscratio = DEFAULT_TSC_RATIO; |
171 | 0 |
|
172 | 0 | svm->ns_cr_intercepts = 0; |
173 | 0 | svm->ns_dr_intercepts = 0; |
174 | 0 | svm->ns_exception_intercepts = 0; |
175 | 0 | svm->ns_general1_intercepts = 0; |
176 | 0 | svm->ns_general2_intercepts = 0; |
177 | 0 | svm->ns_lbr_control.bytes = 0; |
178 | 0 |
|
179 | 0 | svm->ns_hap_enabled = 0; |
180 | 0 | svm->ns_vmcb_guestcr3 = 0; |
181 | 0 | svm->ns_vmcb_hostcr3 = 0; |
182 | 0 | svm->ns_guest_asid = 0; |
183 | 0 | svm->ns_hostflags.bytes = 0; |
184 | 0 | svm->ns_vmexit.exitinfo1 = 0; |
185 | 0 | svm->ns_vmexit.exitinfo2 = 0; |
186 | 0 |
|
187 | 0 | if (svm->ns_iomap) |
188 | 0 | svm->ns_iomap = NULL; |
189 | 0 |
|
190 | 0 | nestedsvm_vcpu_stgi(v); |
191 | 0 | return 0; |
192 | 0 | } |
193 | | |
194 | | static uint64_t nestedsvm_fpu_vmentry(uint64_t n1cr0, |
195 | | struct vmcb_struct *vvmcb, |
196 | | struct vmcb_struct *n1vmcb, struct vmcb_struct *n2vmcb) |
197 | 0 | { |
198 | 0 | uint64_t vcr0; |
199 | 0 |
|
200 | 0 | vcr0 = vvmcb->_cr0; |
201 | 0 | if ( !(n1cr0 & X86_CR0_TS) && (n1vmcb->_cr0 & X86_CR0_TS) ) { |
202 | 0 | /* svm_fpu_leave() run while l1 guest was running. |
203 | 0 | * Sync FPU state with l2 guest. |
204 | 0 | */ |
205 | 0 | vcr0 |= X86_CR0_TS; |
206 | 0 | n2vmcb->_exception_intercepts |= (1U << TRAP_no_device); |
207 | 0 | } else if ( !(vcr0 & X86_CR0_TS) && (n2vmcb->_cr0 & X86_CR0_TS) ) { |
208 | 0 | /* svm_fpu_enter() run while l1 guest was running. |
209 | 0 | * Sync FPU state with l2 guest. */ |
210 | 0 | vcr0 &= ~X86_CR0_TS; |
211 | 0 | n2vmcb->_exception_intercepts &= ~(1U << TRAP_no_device); |
212 | 0 | } |
213 | 0 |
|
214 | 0 | return vcr0; |
215 | 0 | } |
216 | | |
217 | | static void nestedsvm_fpu_vmexit(struct vmcb_struct *n1vmcb, |
218 | | struct vmcb_struct *n2vmcb, uint64_t n1cr0, uint64_t guest_cr0) |
219 | 0 | { |
220 | 0 | if ( !(guest_cr0 & X86_CR0_TS) && (n2vmcb->_cr0 & X86_CR0_TS) ) { |
221 | 0 | /* svm_fpu_leave() run while l2 guest was running. |
222 | 0 | * Sync FPU state with l1 guest. */ |
223 | 0 | n1vmcb->_cr0 |= X86_CR0_TS; |
224 | 0 | n1vmcb->_exception_intercepts |= (1U << TRAP_no_device); |
225 | 0 | } else if ( !(n1cr0 & X86_CR0_TS) && (n1vmcb->_cr0 & X86_CR0_TS) ) { |
226 | 0 | /* svm_fpu_enter() run while l2 guest was running. |
227 | 0 | * Sync FPU state with l1 guest. */ |
228 | 0 | n1vmcb->_cr0 &= ~X86_CR0_TS; |
229 | 0 | n1vmcb->_exception_intercepts &= ~(1U << TRAP_no_device); |
230 | 0 | } |
231 | 0 | } |
232 | | |
233 | | static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len) |
234 | 0 | { |
235 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
236 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
237 | 0 | struct vmcb_struct *n1vmcb; |
238 | 0 |
|
239 | 0 | n1vmcb = nv->nv_n1vmcx; |
240 | 0 | ASSERT(n1vmcb != NULL); |
241 | 0 |
|
242 | 0 | n1vmcb->rip += inst_len; |
243 | 0 |
|
244 | 0 | /* Save shadowed values. This ensures that the l1 guest |
245 | 0 | * cannot override them to break out. */ |
246 | 0 | n1vmcb->_efer = v->arch.hvm_vcpu.guest_efer; |
247 | 0 | n1vmcb->_cr0 = v->arch.hvm_vcpu.guest_cr[0]; |
248 | 0 | n1vmcb->_cr2 = v->arch.hvm_vcpu.guest_cr[2]; |
249 | 0 | n1vmcb->_cr4 = v->arch.hvm_vcpu.guest_cr[4]; |
250 | 0 |
|
251 | 0 | /* Remember the host interrupt flag */ |
252 | 0 | svm->ns_hostflags.fields.rflagsif = |
253 | 0 | (n1vmcb->rflags & X86_EFLAGS_IF) ? 1 : 0; |
254 | 0 |
|
255 | 0 | return 0; |
256 | 0 | } |
257 | | |
258 | | static int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs) |
259 | 0 | { |
260 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
261 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
262 | 0 | struct vmcb_struct *n1vmcb, *n2vmcb; |
263 | 0 | int rc; |
264 | 0 |
|
265 | 0 | n1vmcb = nv->nv_n1vmcx; |
266 | 0 | n2vmcb = nv->nv_n2vmcx; |
267 | 0 | ASSERT(n1vmcb != NULL); |
268 | 0 | ASSERT(n2vmcb != NULL); |
269 | 0 |
|
270 | 0 | /* nsvm_vmcb_prepare4vmexit() already saved register values |
271 | 0 | * handled by VMSAVE/VMLOAD into n1vmcb directly. |
272 | 0 | */ |
273 | 0 |
|
274 | 0 | /* switch vmcb to l1 guest's vmcb */ |
275 | 0 | v->arch.hvm_svm.vmcb = n1vmcb; |
276 | 0 | v->arch.hvm_svm.vmcb_pa = nv->nv_n1vmcx_pa; |
277 | 0 |
|
278 | 0 | /* EFER */ |
279 | 0 | v->arch.hvm_vcpu.guest_efer = n1vmcb->_efer; |
280 | 0 | rc = hvm_set_efer(n1vmcb->_efer); |
281 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
282 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
283 | 0 | if (rc != X86EMUL_OKAY) |
284 | 0 | gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc); |
285 | 0 |
|
286 | 0 | /* CR4 */ |
287 | 0 | v->arch.hvm_vcpu.guest_cr[4] = n1vmcb->_cr4; |
288 | 0 | rc = hvm_set_cr4(n1vmcb->_cr4, 1); |
289 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
290 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
291 | 0 | if (rc != X86EMUL_OKAY) |
292 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc); |
293 | 0 |
|
294 | 0 | /* CR0 */ |
295 | 0 | nestedsvm_fpu_vmexit(n1vmcb, n2vmcb, |
296 | 0 | svm->ns_cr0, v->arch.hvm_vcpu.guest_cr[0]); |
297 | 0 | v->arch.hvm_vcpu.guest_cr[0] = n1vmcb->_cr0 | X86_CR0_PE; |
298 | 0 | n1vmcb->rflags &= ~X86_EFLAGS_VM; |
299 | 0 | rc = hvm_set_cr0(n1vmcb->_cr0 | X86_CR0_PE, 1); |
300 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
301 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
302 | 0 | if (rc != X86EMUL_OKAY) |
303 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc); |
304 | 0 | svm->ns_cr0 = v->arch.hvm_vcpu.guest_cr[0]; |
305 | 0 |
|
306 | 0 | /* CR2 */ |
307 | 0 | v->arch.hvm_vcpu.guest_cr[2] = n1vmcb->_cr2; |
308 | 0 | hvm_update_guest_cr(v, 2); |
309 | 0 |
|
310 | 0 | /* CR3 */ |
311 | 0 | /* Nested paging mode */ |
312 | 0 | if (nestedhvm_paging_mode_hap(v)) { |
313 | 0 | /* host nested paging + guest nested paging. */ |
314 | 0 | /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */ |
315 | 0 | } else if (paging_mode_hap(v->domain)) { |
316 | 0 | /* host nested paging + guest shadow paging. */ |
317 | 0 | /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */ |
318 | 0 | } else { |
319 | 0 | /* host shadow paging + guest shadow paging. */ |
320 | 0 |
|
321 | 0 | /* Reset MMU context -- XXX (hostrestore) not yet working*/ |
322 | 0 | if (!pagetable_is_null(v->arch.guest_table)) |
323 | 0 | put_page(pagetable_get_page(v->arch.guest_table)); |
324 | 0 | v->arch.guest_table = pagetable_null(); |
325 | 0 | /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */ |
326 | 0 | } |
327 | 0 | rc = hvm_set_cr3(n1vmcb->_cr3, 1); |
328 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
329 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
330 | 0 | if (rc != X86EMUL_OKAY) |
331 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc); |
332 | 0 |
|
333 | 0 | regs->rax = n1vmcb->rax; |
334 | 0 | regs->rsp = n1vmcb->rsp; |
335 | 0 | regs->rip = n1vmcb->rip; |
336 | 0 | regs->rflags = n1vmcb->rflags; |
337 | 0 | n1vmcb->_dr7 = 0; /* disable all breakpoints */ |
338 | 0 | n1vmcb->_cpl = 0; |
339 | 0 |
|
340 | 0 | /* Clear exitintinfo to prevent a fault loop of re-injecting |
341 | 0 | * exceptions forever. |
342 | 0 | */ |
343 | 0 | n1vmcb->exitintinfo.bytes = 0; |
344 | 0 |
|
345 | 0 | /* Cleanbits */ |
346 | 0 | n1vmcb->cleanbits.bytes = 0; |
347 | 0 |
|
348 | 0 | return 0; |
349 | 0 | } |
350 | | |
351 | | static int nsvm_vmrun_permissionmap(struct vcpu *v, bool_t viopm) |
352 | 0 | { |
353 | 0 | struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; |
354 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
355 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
356 | 0 | struct vmcb_struct *ns_vmcb = nv->nv_vvmcx; |
357 | 0 | struct vmcb_struct *host_vmcb = arch_svm->vmcb; |
358 | 0 | unsigned long *ns_msrpm_ptr; |
359 | 0 | unsigned int i; |
360 | 0 | enum hvm_translation_result ret; |
361 | 0 | unsigned long *ns_viomap; |
362 | 0 | bool_t ioport_80 = 1, ioport_ed = 1; |
363 | 0 |
|
364 | 0 | ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm; |
365 | 0 |
|
366 | 0 | ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm, |
367 | 0 | ns_vmcb->_msrpm_base_pa, MSRPM_SIZE); |
368 | 0 | if ( ret != HVMTRANS_okay ) |
369 | 0 | { |
370 | 0 | gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret); |
371 | 0 | return 1; |
372 | 0 | } |
373 | 0 |
|
374 | 0 | /* Check l1 guest io permission map and get a shadow one based on |
375 | 0 | * if l1 guest intercepts io ports 0x80 and/or 0xED. |
376 | 0 | */ |
377 | 0 | svm->ns_oiomap_pa = svm->ns_iomap_pa; |
378 | 0 | svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa; |
379 | 0 |
|
380 | 0 | ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, 0); |
381 | 0 | if ( ns_viomap ) |
382 | 0 | { |
383 | 0 | ioport_80 = test_bit(0x80, ns_viomap); |
384 | 0 | ioport_ed = test_bit(0xed, ns_viomap); |
385 | 0 | hvm_unmap_guest_frame(ns_viomap, 0); |
386 | 0 | } |
387 | 0 |
|
388 | 0 | svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); |
389 | 0 |
|
390 | 0 | nv->nv_ioport80 = ioport_80; |
391 | 0 | nv->nv_ioportED = ioport_ed; |
392 | 0 |
|
393 | 0 | /* v->arch.hvm_svm.msrpm has type unsigned long, thus |
394 | 0 | * BYTES_PER_LONG. |
395 | 0 | */ |
396 | 0 | for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++) |
397 | 0 | svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i]; |
398 | 0 |
|
399 | 0 | host_vmcb->_iopm_base_pa = |
400 | 0 | (uint64_t)virt_to_maddr(svm->ns_iomap); |
401 | 0 | host_vmcb->_msrpm_base_pa = |
402 | 0 | (uint64_t)virt_to_maddr(svm->ns_merged_msrpm); |
403 | 0 |
|
404 | 0 | return 0; |
405 | 0 | } |
406 | | |
407 | | static void nestedsvm_vmcb_set_nestedp2m(struct vcpu *v, |
408 | | struct vmcb_struct *vvmcb, struct vmcb_struct *n2vmcb) |
409 | 0 | { |
410 | 0 | struct p2m_domain *p2m; |
411 | 0 |
|
412 | 0 | ASSERT(v != NULL); |
413 | 0 | ASSERT(vvmcb != NULL); |
414 | 0 | ASSERT(n2vmcb != NULL); |
415 | 0 |
|
416 | 0 | /* This will allow nsvm_vcpu_hostcr3() to return correct np2m_base */ |
417 | 0 | vcpu_nestedsvm(v).ns_vmcb_hostcr3 = vvmcb->_h_cr3; |
418 | 0 |
|
419 | 0 | p2m = p2m_get_nestedp2m(v); |
420 | 0 | n2vmcb->_h_cr3 = pagetable_get_paddr(p2m_get_pagetable(p2m)); |
421 | 0 | } |
422 | | |
423 | | static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs) |
424 | 0 | { |
425 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
426 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
427 | 0 | struct vmcb_struct *ns_vmcb, *n1vmcb, *n2vmcb; |
428 | 0 | bool_t vcleanbits_valid; |
429 | 0 | int rc; |
430 | 0 | uint64_t cr0; |
431 | 0 |
|
432 | 0 | ns_vmcb = nv->nv_vvmcx; |
433 | 0 | n1vmcb = nv->nv_n1vmcx; |
434 | 0 | n2vmcb = nv->nv_n2vmcx; |
435 | 0 | ASSERT(ns_vmcb != NULL); |
436 | 0 | ASSERT(n1vmcb != NULL); |
437 | 0 | ASSERT(n2vmcb != NULL); |
438 | 0 |
|
439 | 0 | /* Check if virtual VMCB cleanbits are valid */ |
440 | 0 | vcleanbits_valid = 1; |
441 | 0 | if ( svm->ns_ovvmcb_pa == INVALID_PADDR ) |
442 | 0 | vcleanbits_valid = 0; |
443 | 0 | if (svm->ns_ovvmcb_pa != nv->nv_vvmcxaddr) |
444 | 0 | vcleanbits_valid = 0; |
445 | 0 |
|
446 | 0 | #define vcleanbit_set(_name) \ |
447 | 0 | (vcleanbits_valid && ns_vmcb->cleanbits.fields._name) |
448 | 0 |
|
449 | 0 | /* Enable l2 guest intercepts */ |
450 | 0 | if (!vcleanbit_set(intercepts)) { |
451 | 0 | svm->ns_cr_intercepts = ns_vmcb->_cr_intercepts; |
452 | 0 | svm->ns_dr_intercepts = ns_vmcb->_dr_intercepts; |
453 | 0 | svm->ns_exception_intercepts = ns_vmcb->_exception_intercepts; |
454 | 0 | svm->ns_general1_intercepts = ns_vmcb->_general1_intercepts; |
455 | 0 | svm->ns_general2_intercepts = ns_vmcb->_general2_intercepts; |
456 | 0 | } |
457 | 0 |
|
458 | 0 | /* We could track the cleanbits of the n1vmcb from |
459 | 0 | * last emulated #VMEXIT to this emulated VMRUN to safe the merges |
460 | 0 | * below. Those cleanbits would be tracked in an integer field |
461 | 0 | * in struct nestedsvm. |
462 | 0 | * But this effort is not worth doing because: |
463 | 0 | * - Only the intercepts bit of the n1vmcb can effectively be used here |
464 | 0 | * - The CPU runs more instructions for the tracking than can be |
465 | 0 | * safed here. |
466 | 0 | * The overhead comes from (ordered from highest to lowest): |
467 | 0 | * - svm_ctxt_switch_to (CPU context switching) |
468 | 0 | * - svm_fpu_enter, svm_fpu_leave (lazy FPU switching) |
469 | 0 | * - emulated CLGI (clears VINTR intercept) |
470 | 0 | * - host clears VINTR intercept |
471 | 0 | * Test results show that the overhead is high enough that the |
472 | 0 | * tracked intercepts bit of the n1vmcb is practically *always* cleared. |
473 | 0 | */ |
474 | 0 |
|
475 | 0 | n2vmcb->_cr_intercepts = |
476 | 0 | n1vmcb->_cr_intercepts | ns_vmcb->_cr_intercepts; |
477 | 0 | n2vmcb->_dr_intercepts = |
478 | 0 | n1vmcb->_dr_intercepts | ns_vmcb->_dr_intercepts; |
479 | 0 | n2vmcb->_exception_intercepts = |
480 | 0 | n1vmcb->_exception_intercepts | ns_vmcb->_exception_intercepts; |
481 | 0 | n2vmcb->_general1_intercepts = |
482 | 0 | n1vmcb->_general1_intercepts | ns_vmcb->_general1_intercepts; |
483 | 0 | n2vmcb->_general2_intercepts = |
484 | 0 | n1vmcb->_general2_intercepts | ns_vmcb->_general2_intercepts; |
485 | 0 |
|
486 | 0 | /* Nested Pause Filter */ |
487 | 0 | if (ns_vmcb->_general1_intercepts & GENERAL1_INTERCEPT_PAUSE) |
488 | 0 | n2vmcb->_pause_filter_count = |
489 | 0 | min(n1vmcb->_pause_filter_count, ns_vmcb->_pause_filter_count); |
490 | 0 | else |
491 | 0 | n2vmcb->_pause_filter_count = n1vmcb->_pause_filter_count; |
492 | 0 |
|
493 | 0 | /* TSC offset */ |
494 | 0 | n2vmcb->_tsc_offset = n1vmcb->_tsc_offset + ns_vmcb->_tsc_offset; |
495 | 0 |
|
496 | 0 | /* Nested IO permission bitmaps */ |
497 | 0 | rc = nsvm_vmrun_permissionmap(v, vcleanbit_set(iopm)); |
498 | 0 | if (rc) |
499 | 0 | return rc; |
500 | 0 |
|
501 | 0 | /* ASID - Emulation handled in hvm_asid_handle_vmenter() */ |
502 | 0 |
|
503 | 0 | /* TLB control */ |
504 | 0 | n2vmcb->tlb_control = ns_vmcb->tlb_control; |
505 | 0 |
|
506 | 0 | /* Virtual Interrupts */ |
507 | 0 | if (!vcleanbit_set(tpr)) { |
508 | 0 | n2vmcb->_vintr = ns_vmcb->_vintr; |
509 | 0 | n2vmcb->_vintr.fields.intr_masking = 1; |
510 | 0 | } |
511 | 0 |
|
512 | 0 | /* Shadow Mode */ |
513 | 0 | n2vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow; |
514 | 0 |
|
515 | 0 | /* Exit codes */ |
516 | 0 | n2vmcb->exitcode = ns_vmcb->exitcode; |
517 | 0 | n2vmcb->exitinfo1 = ns_vmcb->exitinfo1; |
518 | 0 | n2vmcb->exitinfo2 = ns_vmcb->exitinfo2; |
519 | 0 | n2vmcb->exitintinfo = ns_vmcb->exitintinfo; |
520 | 0 |
|
521 | 0 | /* Pending Interrupts */ |
522 | 0 | n2vmcb->eventinj = ns_vmcb->eventinj; |
523 | 0 |
|
524 | 0 | /* LBR virtualization */ |
525 | 0 | if (!vcleanbit_set(lbr)) { |
526 | 0 | svm->ns_lbr_control = ns_vmcb->lbr_control; |
527 | 0 | } |
528 | 0 | n2vmcb->lbr_control.bytes = |
529 | 0 | n1vmcb->lbr_control.bytes | ns_vmcb->lbr_control.bytes; |
530 | 0 |
|
531 | 0 | /* NextRIP - only evaluated on #VMEXIT. */ |
532 | 0 |
|
533 | 0 | /* |
534 | 0 | * VMCB Save State Area |
535 | 0 | */ |
536 | 0 |
|
537 | 0 | /* Segments */ |
538 | 0 | if (!vcleanbit_set(seg)) { |
539 | 0 | n2vmcb->es = ns_vmcb->es; |
540 | 0 | n2vmcb->cs = ns_vmcb->cs; |
541 | 0 | n2vmcb->ss = ns_vmcb->ss; |
542 | 0 | n2vmcb->ds = ns_vmcb->ds; |
543 | 0 | /* CPL */ |
544 | 0 | n2vmcb->_cpl = ns_vmcb->_cpl; |
545 | 0 | } |
546 | 0 | if (!vcleanbit_set(dt)) { |
547 | 0 | n2vmcb->gdtr = ns_vmcb->gdtr; |
548 | 0 | n2vmcb->idtr = ns_vmcb->idtr; |
549 | 0 | } |
550 | 0 |
|
551 | 0 | /* EFER */ |
552 | 0 | v->arch.hvm_vcpu.guest_efer = ns_vmcb->_efer; |
553 | 0 | rc = hvm_set_efer(ns_vmcb->_efer); |
554 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
555 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
556 | 0 | if (rc != X86EMUL_OKAY) |
557 | 0 | gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc); |
558 | 0 |
|
559 | 0 | /* CR4 */ |
560 | 0 | v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->_cr4; |
561 | 0 | rc = hvm_set_cr4(ns_vmcb->_cr4, 1); |
562 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
563 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
564 | 0 | if (rc != X86EMUL_OKAY) |
565 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc); |
566 | 0 |
|
567 | 0 | /* CR0 */ |
568 | 0 | svm->ns_cr0 = v->arch.hvm_vcpu.guest_cr[0]; |
569 | 0 | cr0 = nestedsvm_fpu_vmentry(svm->ns_cr0, ns_vmcb, n1vmcb, n2vmcb); |
570 | 0 | v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->_cr0; |
571 | 0 | rc = hvm_set_cr0(cr0, 1); |
572 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
573 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
574 | 0 | if (rc != X86EMUL_OKAY) |
575 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc); |
576 | 0 |
|
577 | 0 | /* CR2 */ |
578 | 0 | v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->_cr2; |
579 | 0 | hvm_update_guest_cr(v, 2); |
580 | 0 |
|
581 | 0 | /* Nested paging mode */ |
582 | 0 | if (nestedhvm_paging_mode_hap(v)) { |
583 | 0 | /* host nested paging + guest nested paging. */ |
584 | 0 | n2vmcb->_np_enable = 1; |
585 | 0 |
|
586 | 0 | nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb); |
587 | 0 |
|
588 | 0 | /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */ |
589 | 0 | rc = hvm_set_cr3(ns_vmcb->_cr3, 1); |
590 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
591 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
592 | 0 | if (rc != X86EMUL_OKAY) |
593 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc); |
594 | 0 | } else if (paging_mode_hap(v->domain)) { |
595 | 0 | /* host nested paging + guest shadow paging. */ |
596 | 0 | n2vmcb->_np_enable = 1; |
597 | 0 | /* Keep h_cr3 as it is. */ |
598 | 0 | n2vmcb->_h_cr3 = n1vmcb->_h_cr3; |
599 | 0 | /* When l1 guest does shadow paging |
600 | 0 | * we assume it intercepts page faults. |
601 | 0 | */ |
602 | 0 | /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */ |
603 | 0 | rc = hvm_set_cr3(ns_vmcb->_cr3, 1); |
604 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
605 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
606 | 0 | if (rc != X86EMUL_OKAY) |
607 | 0 | gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc); |
608 | 0 | } else { |
609 | 0 | /* host shadow paging + guest shadow paging. */ |
610 | 0 | n2vmcb->_np_enable = 0; |
611 | 0 | n2vmcb->_h_cr3 = 0x0; |
612 | 0 |
|
613 | 0 | /* TODO: Once shadow-shadow paging is in place come back to here |
614 | 0 | * and set host_vmcb->_cr3 to the shadowed shadow table. |
615 | 0 | */ |
616 | 0 | } |
617 | 0 |
|
618 | 0 | /* DRn */ |
619 | 0 | if (!vcleanbit_set(dr)) { |
620 | 0 | n2vmcb->_dr7 = ns_vmcb->_dr7; |
621 | 0 | n2vmcb->_dr6 = ns_vmcb->_dr6; |
622 | 0 | } |
623 | 0 |
|
624 | 0 | /* RFLAGS */ |
625 | 0 | n2vmcb->rflags = ns_vmcb->rflags; |
626 | 0 |
|
627 | 0 | /* RIP */ |
628 | 0 | n2vmcb->rip = ns_vmcb->rip; |
629 | 0 |
|
630 | 0 | /* RSP */ |
631 | 0 | n2vmcb->rsp = ns_vmcb->rsp; |
632 | 0 |
|
633 | 0 | /* RAX */ |
634 | 0 | n2vmcb->rax = ns_vmcb->rax; |
635 | 0 |
|
636 | 0 | /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase, |
637 | 0 | * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp, |
638 | 0 | * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation. |
639 | 0 | */ |
640 | 0 |
|
641 | 0 | /* Page tables */ |
642 | 0 | n2vmcb->pdpe0 = ns_vmcb->pdpe0; |
643 | 0 | n2vmcb->pdpe1 = ns_vmcb->pdpe1; |
644 | 0 | n2vmcb->pdpe2 = ns_vmcb->pdpe2; |
645 | 0 | n2vmcb->pdpe3 = ns_vmcb->pdpe3; |
646 | 0 |
|
647 | 0 | /* PAT */ |
648 | 0 | if (!vcleanbit_set(np)) { |
649 | 0 | n2vmcb->_g_pat = ns_vmcb->_g_pat; |
650 | 0 | } |
651 | 0 |
|
652 | 0 | if (!vcleanbit_set(lbr)) { |
653 | 0 | /* Debug Control MSR */ |
654 | 0 | n2vmcb->_debugctlmsr = ns_vmcb->_debugctlmsr; |
655 | 0 |
|
656 | 0 | /* LBR MSRs */ |
657 | 0 | n2vmcb->_lastbranchfromip = ns_vmcb->_lastbranchfromip; |
658 | 0 | n2vmcb->_lastbranchtoip = ns_vmcb->_lastbranchtoip; |
659 | 0 | n2vmcb->_lastintfromip = ns_vmcb->_lastintfromip; |
660 | 0 | n2vmcb->_lastinttoip = ns_vmcb->_lastinttoip; |
661 | 0 | } |
662 | 0 |
|
663 | 0 | /* Cleanbits */ |
664 | 0 | n2vmcb->cleanbits.bytes = 0; |
665 | 0 |
|
666 | 0 | rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true); |
667 | 0 | if (rc) { |
668 | 0 | gdprintk(XENLOG_ERR, "virtual vmcb invalid\n"); |
669 | 0 | return NSVM_ERROR_VVMCB; |
670 | 0 | } |
671 | 0 |
|
672 | 0 | rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true); |
673 | 0 | if (rc) { |
674 | 0 | gdprintk(XENLOG_ERR, "n2vmcb invalid\n"); |
675 | 0 | return NSVM_ERROR_VMENTRY; |
676 | 0 | } |
677 | 0 |
|
678 | 0 | /* Switch guest registers to l2 guest */ |
679 | 0 | regs->rax = ns_vmcb->rax; |
680 | 0 | regs->rip = ns_vmcb->rip; |
681 | 0 | regs->rsp = ns_vmcb->rsp; |
682 | 0 | regs->rflags = ns_vmcb->rflags; |
683 | 0 |
|
684 | 0 | #undef vcleanbit_set |
685 | 0 | return 0; |
686 | 0 | } |
687 | | |
688 | | static int |
689 | | nsvm_vcpu_vmentry(struct vcpu *v, struct cpu_user_regs *regs, |
690 | | unsigned int inst_len) |
691 | 0 | { |
692 | 0 | int ret; |
693 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
694 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
695 | 0 | struct vmcb_struct *ns_vmcb; |
696 | 0 |
|
697 | 0 | ns_vmcb = nv->nv_vvmcx; |
698 | 0 | ASSERT(ns_vmcb != NULL); |
699 | 0 | ASSERT(nv->nv_n2vmcx != NULL); |
700 | 0 | ASSERT(nv->nv_n2vmcx_pa != INVALID_PADDR); |
701 | 0 |
|
702 | 0 | /* Save values for later use. Needed for Nested-on-Nested and |
703 | 0 | * Shadow-on-Shadow paging. |
704 | 0 | */ |
705 | 0 | svm->ns_vmcb_guestcr3 = ns_vmcb->_cr3; |
706 | 0 | svm->ns_vmcb_hostcr3 = ns_vmcb->_h_cr3; |
707 | 0 |
|
708 | 0 | /* Convert explicitely to boolean. Deals with l1 guests |
709 | 0 | * that use flush-by-asid w/o checking the cpuid bits */ |
710 | 0 | nv->nv_flushp2m = !!ns_vmcb->tlb_control; |
711 | 0 | if ( svm->ns_guest_asid != ns_vmcb->_guest_asid ) |
712 | 0 | { |
713 | 0 | nv->nv_flushp2m = 1; |
714 | 0 | hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid); |
715 | 0 | svm->ns_guest_asid = ns_vmcb->_guest_asid; |
716 | 0 | } |
717 | 0 |
|
718 | 0 | /* nested paging for the guest */ |
719 | 0 | svm->ns_hap_enabled = (ns_vmcb->_np_enable) ? 1 : 0; |
720 | 0 |
|
721 | 0 | /* Remember the V_INTR_MASK in hostflags */ |
722 | 0 | svm->ns_hostflags.fields.vintrmask = |
723 | 0 | (ns_vmcb->_vintr.fields.intr_masking) ? 1 : 0; |
724 | 0 |
|
725 | 0 | /* Save l1 guest state (= host state) */ |
726 | 0 | ret = nsvm_vcpu_hostsave(v, inst_len); |
727 | 0 | if (ret) { |
728 | 0 | gdprintk(XENLOG_ERR, "hostsave failed, ret = %i\n", ret); |
729 | 0 | return ret; |
730 | 0 | } |
731 | 0 |
|
732 | 0 | /* switch vmcb to shadow vmcb */ |
733 | 0 | v->arch.hvm_svm.vmcb = nv->nv_n2vmcx; |
734 | 0 | v->arch.hvm_svm.vmcb_pa = nv->nv_n2vmcx_pa; |
735 | 0 |
|
736 | 0 | ret = nsvm_vmcb_prepare4vmrun(v, regs); |
737 | 0 | if (ret) { |
738 | 0 | gdprintk(XENLOG_ERR, "prepare4vmrun failed, ret = %i\n", ret); |
739 | 0 | return ret; |
740 | 0 | } |
741 | 0 |
|
742 | 0 | nestedsvm_vcpu_stgi(v); |
743 | 0 | return 0; |
744 | 0 | } |
745 | | |
746 | | int |
747 | | nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs) |
748 | 0 | { |
749 | 0 | int ret; |
750 | 0 | unsigned int inst_len; |
751 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
752 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
753 | 0 |
|
754 | 0 | inst_len = __get_instruction_length(v, INSTR_VMRUN); |
755 | 0 | if (inst_len == 0) { |
756 | 0 | svm->ns_vmexit.exitcode = VMEXIT_SHUTDOWN; |
757 | 0 | return -1; |
758 | 0 | } |
759 | 0 |
|
760 | 0 | nv->nv_vmswitch_in_progress = 1; |
761 | 0 | ASSERT(nv->nv_vvmcx != NULL); |
762 | 0 |
|
763 | 0 | /* save host state */ |
764 | 0 | ret = nsvm_vcpu_vmentry(v, regs, inst_len); |
765 | 0 |
|
766 | 0 | /* Switch vcpu to guest mode. In the error case |
767 | 0 | * this ensures the host mode is restored correctly |
768 | 0 | * and l1 guest keeps alive. */ |
769 | 0 | nestedhvm_vcpu_enter_guestmode(v); |
770 | 0 |
|
771 | 0 | switch (ret) { |
772 | 0 | case 0: |
773 | 0 | break; |
774 | 0 | case NSVM_ERROR_VVMCB: |
775 | 0 | gdprintk(XENLOG_ERR, "inject VMEXIT(INVALID)\n"); |
776 | 0 | svm->ns_vmexit.exitcode = VMEXIT_INVALID; |
777 | 0 | return -1; |
778 | 0 | case NSVM_ERROR_VMENTRY: |
779 | 0 | default: |
780 | 0 | gdprintk(XENLOG_ERR, |
781 | 0 | "nsvm_vcpu_vmentry failed, injecting #UD\n"); |
782 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
783 | 0 | /* Must happen after hvm_inject_hw_exception or it doesn't work right. */ |
784 | 0 | nv->nv_vmswitch_in_progress = 0; |
785 | 0 | return 1; |
786 | 0 | } |
787 | 0 |
|
788 | 0 | /* If l1 guest uses shadow paging, update the paging mode. */ |
789 | 0 | if (!nestedhvm_paging_mode_hap(v)) |
790 | 0 | paging_update_paging_modes(v); |
791 | 0 |
|
792 | 0 | nv->nv_vmswitch_in_progress = 0; |
793 | 0 | return 0; |
794 | 0 | } |
795 | | |
796 | | static int |
797 | | nsvm_vcpu_vmexit_inject(struct vcpu *v, struct cpu_user_regs *regs, |
798 | | uint64_t exitcode) |
799 | 0 | { |
800 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
801 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
802 | 0 | struct vmcb_struct *ns_vmcb; |
803 | 0 |
|
804 | 0 | ASSERT(svm->ns_gif == 0); |
805 | 0 | ns_vmcb = nv->nv_vvmcx; |
806 | 0 |
|
807 | 0 | if (nv->nv_vmexit_pending) { |
808 | 0 |
|
809 | 0 | switch (exitcode) { |
810 | 0 | case VMEXIT_INTR: |
811 | 0 | if ( unlikely(ns_vmcb->eventinj.fields.v) |
812 | 0 | && nv->nv_vmentry_pending |
813 | 0 | && hvm_event_needs_reinjection(ns_vmcb->eventinj.fields.type, |
814 | 0 | ns_vmcb->eventinj.fields.vector) ) |
815 | 0 | { |
816 | 0 | ns_vmcb->exitintinfo.bytes = ns_vmcb->eventinj.bytes; |
817 | 0 | } |
818 | 0 | break; |
819 | 0 | case VMEXIT_EXCEPTION_PF: |
820 | 0 | ns_vmcb->_cr2 = ns_vmcb->exitinfo2; |
821 | 0 | /* fall through */ |
822 | 0 | case VMEXIT_NPF: |
823 | 0 | /* PF error code */ |
824 | 0 | ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1; |
825 | 0 | /* fault address */ |
826 | 0 | ns_vmcb->exitinfo2 = svm->ns_vmexit.exitinfo2; |
827 | 0 | break; |
828 | 0 | case VMEXIT_EXCEPTION_NP: |
829 | 0 | case VMEXIT_EXCEPTION_SS: |
830 | 0 | case VMEXIT_EXCEPTION_GP: |
831 | 0 | case VMEXIT_EXCEPTION_15: |
832 | 0 | case VMEXIT_EXCEPTION_MF: |
833 | 0 | case VMEXIT_EXCEPTION_AC: |
834 | 0 | ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1; |
835 | 0 | break; |
836 | 0 | default: |
837 | 0 | break; |
838 | 0 | } |
839 | 0 | } |
840 | 0 |
|
841 | 0 | ns_vmcb->exitcode = exitcode; |
842 | 0 | ns_vmcb->eventinj.bytes = 0; |
843 | 0 | return 0; |
844 | 0 | } |
845 | | |
846 | | int |
847 | | nsvm_vcpu_vmexit_event(struct vcpu *v, const struct x86_event *trap) |
848 | 0 | { |
849 | 0 | ASSERT(vcpu_nestedhvm(v).nv_vvmcx != NULL); |
850 | 0 |
|
851 | 0 | nestedsvm_vmexit_defer(v, VMEXIT_EXCEPTION_DE + trap->vector, |
852 | 0 | trap->error_code, trap->cr2); |
853 | 0 | return NESTEDHVM_VMEXIT_DONE; |
854 | 0 | } |
855 | | |
856 | | uint64_t nsvm_vcpu_hostcr3(struct vcpu *v) |
857 | 0 | { |
858 | 0 | return vcpu_nestedsvm(v).ns_vmcb_hostcr3; |
859 | 0 | } |
860 | | |
861 | | static int |
862 | | nsvm_vmcb_guest_intercepts_msr(unsigned long *msr_bitmap, |
863 | | uint32_t msr, bool_t write) |
864 | 0 | { |
865 | 0 | bool_t enabled; |
866 | 0 | unsigned long *msr_bit; |
867 | 0 |
|
868 | 0 | msr_bit = svm_msrbit(msr_bitmap, msr); |
869 | 0 |
|
870 | 0 | if (msr_bit == NULL) |
871 | 0 | /* MSR not in the permission map: Let the guest handle it. */ |
872 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
873 | 0 |
|
874 | 0 | msr &= 0x1fff; |
875 | 0 |
|
876 | 0 | if (write) |
877 | 0 | /* write access */ |
878 | 0 | enabled = test_bit(msr * 2 + 1, msr_bit); |
879 | 0 | else |
880 | 0 | /* read access */ |
881 | 0 | enabled = test_bit(msr * 2, msr_bit); |
882 | 0 |
|
883 | 0 | if (!enabled) |
884 | 0 | return NESTEDHVM_VMEXIT_HOST; |
885 | 0 |
|
886 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
887 | 0 | } |
888 | | |
889 | | static int |
890 | | nsvm_vmcb_guest_intercepts_ioio(paddr_t iopm_pa, uint64_t exitinfo1) |
891 | 0 | { |
892 | 0 | unsigned long gfn = iopm_pa >> PAGE_SHIFT; |
893 | 0 | unsigned long *io_bitmap; |
894 | 0 | ioio_info_t ioinfo; |
895 | 0 | uint16_t port; |
896 | 0 | unsigned int size; |
897 | 0 | bool_t enabled; |
898 | 0 |
|
899 | 0 | ioinfo.bytes = exitinfo1; |
900 | 0 | port = ioinfo.fields.port; |
901 | 0 | size = ioinfo.fields.sz32 ? 4 : ioinfo.fields.sz16 ? 2 : 1; |
902 | 0 |
|
903 | 0 | switch ( port ) |
904 | 0 | { |
905 | 0 | case 0 ... 8 * PAGE_SIZE - 1: /* first 4KB page */ |
906 | 0 | break; |
907 | 0 | case 8 * PAGE_SIZE ... 2 * 8 * PAGE_SIZE - 1: /* second 4KB page */ |
908 | 0 | port -= 8 * PAGE_SIZE; |
909 | 0 | ++gfn; |
910 | 0 | break; |
911 | 0 | default: |
912 | 0 | BUG(); |
913 | 0 | break; |
914 | 0 | } |
915 | 0 |
|
916 | 0 | for ( io_bitmap = hvm_map_guest_frame_ro(gfn, 0); ; ) |
917 | 0 | { |
918 | 0 | enabled = io_bitmap && test_bit(port, io_bitmap); |
919 | 0 | if ( !enabled || !--size ) |
920 | 0 | break; |
921 | 0 | if ( unlikely(++port == 8 * PAGE_SIZE) ) |
922 | 0 | { |
923 | 0 | hvm_unmap_guest_frame(io_bitmap, 0); |
924 | 0 | io_bitmap = hvm_map_guest_frame_ro(++gfn, 0); |
925 | 0 | port -= 8 * PAGE_SIZE; |
926 | 0 | } |
927 | 0 | } |
928 | 0 | hvm_unmap_guest_frame(io_bitmap, 0); |
929 | 0 |
|
930 | 0 | if ( !enabled ) |
931 | 0 | return NESTEDHVM_VMEXIT_HOST; |
932 | 0 |
|
933 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
934 | 0 | } |
935 | | |
936 | | static bool_t |
937 | | nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v, |
938 | | struct cpu_user_regs *regs, uint64_t exitcode) |
939 | 0 | { |
940 | 0 | uint64_t exit_bits; |
941 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
942 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
943 | 0 | struct vmcb_struct *ns_vmcb = nv->nv_vvmcx; |
944 | 0 | enum nestedhvm_vmexits vmexits; |
945 | 0 |
|
946 | 0 | switch (exitcode) { |
947 | 0 | case VMEXIT_CR0_READ ... VMEXIT_CR15_READ: |
948 | 0 | case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE: |
949 | 0 | exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ); |
950 | 0 | if (svm->ns_cr_intercepts & exit_bits) |
951 | 0 | break; |
952 | 0 | return 0; |
953 | 0 |
|
954 | 0 | case VMEXIT_DR0_READ ... VMEXIT_DR7_READ: |
955 | 0 | case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE: |
956 | 0 | exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ); |
957 | 0 | if (svm->ns_dr_intercepts & exit_bits) |
958 | 0 | break; |
959 | 0 | return 0; |
960 | 0 |
|
961 | 0 | case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF: |
962 | 0 | exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE); |
963 | 0 | if (svm->ns_exception_intercepts & exit_bits) |
964 | 0 | break; |
965 | 0 | return 0; |
966 | 0 |
|
967 | 0 | case VMEXIT_INTR ... VMEXIT_SHUTDOWN: |
968 | 0 | exit_bits = 1ULL << (exitcode - VMEXIT_INTR); |
969 | 0 | if (svm->ns_general1_intercepts & exit_bits) |
970 | 0 | break; |
971 | 0 | return 0; |
972 | 0 |
|
973 | 0 | case VMEXIT_VMRUN ... VMEXIT_XSETBV: |
974 | 0 | exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN); |
975 | 0 | if (svm->ns_general2_intercepts & exit_bits) |
976 | 0 | break; |
977 | 0 | return 0; |
978 | 0 |
|
979 | 0 | case VMEXIT_NPF: |
980 | 0 | if (nestedhvm_paging_mode_hap(v)) |
981 | 0 | break; |
982 | 0 | return 0; |
983 | 0 | case VMEXIT_INVALID: |
984 | 0 | /* Always intercepted */ |
985 | 0 | break; |
986 | 0 |
|
987 | 0 | default: |
988 | 0 | gdprintk(XENLOG_ERR, "Illegal exitcode %#"PRIx64"\n", exitcode); |
989 | 0 | BUG(); |
990 | 0 | break; |
991 | 0 | } |
992 | 0 |
|
993 | 0 | /* Special cases: Do more detailed checks */ |
994 | 0 | switch (exitcode) { |
995 | 0 | case VMEXIT_MSR: |
996 | 0 | ASSERT(regs != NULL); |
997 | 0 | if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) ) |
998 | 0 | break; |
999 | 0 | ns_vmcb = nv->nv_vvmcx; |
1000 | 0 | vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm, |
1001 | 0 | regs->ecx, ns_vmcb->exitinfo1 != 0); |
1002 | 0 | if (vmexits == NESTEDHVM_VMEXIT_HOST) |
1003 | 0 | return 0; |
1004 | 0 | break; |
1005 | 0 | case VMEXIT_IOIO: |
1006 | 0 | if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) ) |
1007 | 0 | break; |
1008 | 0 | ns_vmcb = nv->nv_vvmcx; |
1009 | 0 | vmexits = nsvm_vmcb_guest_intercepts_ioio(ns_vmcb->_iopm_base_pa, |
1010 | 0 | ns_vmcb->exitinfo1); |
1011 | 0 | if (vmexits == NESTEDHVM_VMEXIT_HOST) |
1012 | 0 | return 0; |
1013 | 0 | break; |
1014 | 0 | } |
1015 | 0 |
|
1016 | 0 | return 1; |
1017 | 0 | } |
1018 | | |
1019 | | bool_t |
1020 | | nsvm_vmcb_guest_intercepts_event( |
1021 | | struct vcpu *v, unsigned int vector, int errcode) |
1022 | 0 | { |
1023 | 0 | return nsvm_vmcb_guest_intercepts_exitcode(v, |
1024 | 0 | guest_cpu_user_regs(), VMEXIT_EXCEPTION_DE + vector); |
1025 | 0 | } |
1026 | | |
1027 | | static int |
1028 | | nsvm_vmcb_prepare4vmexit(struct vcpu *v, struct cpu_user_regs *regs) |
1029 | 0 | { |
1030 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
1031 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
1032 | 0 | struct vmcb_struct *ns_vmcb = nv->nv_vvmcx; |
1033 | 0 | struct vmcb_struct *n2vmcb = nv->nv_n2vmcx; |
1034 | 0 |
|
1035 | 0 | svm_vmsave(nv->nv_n1vmcx); |
1036 | 0 |
|
1037 | 0 | /* Cache guest physical address of virtual vmcb |
1038 | 0 | * for VMCB Cleanbit emulation. |
1039 | 0 | */ |
1040 | 0 | svm->ns_ovvmcb_pa = nv->nv_vvmcxaddr; |
1041 | 0 |
|
1042 | 0 | /* Intercepts - keep them as they are */ |
1043 | 0 |
|
1044 | 0 | /* Pausefilter - keep it as is */ |
1045 | 0 |
|
1046 | 0 | /* Nested IO permission bitmap */ |
1047 | 0 | /* Just keep the iopm_base_pa and msrpm_base_pa values. |
1048 | 0 | * The guest must not see the virtualized values. |
1049 | 0 | */ |
1050 | 0 |
|
1051 | 0 | /* TSC offset */ |
1052 | 0 | /* Keep it. It's maintainted by the l1 guest. */ |
1053 | 0 |
|
1054 | 0 | /* ASID */ |
1055 | 0 | /* ns_vmcb->_guest_asid = n2vmcb->_guest_asid; */ |
1056 | 0 |
|
1057 | 0 | /* TLB control */ |
1058 | 0 | ns_vmcb->tlb_control = 0; |
1059 | 0 |
|
1060 | 0 | /* Virtual Interrupts */ |
1061 | 0 | ns_vmcb->_vintr = n2vmcb->_vintr; |
1062 | 0 | if (!(svm->ns_hostflags.fields.vintrmask)) |
1063 | 0 | ns_vmcb->_vintr.fields.intr_masking = 0; |
1064 | 0 |
|
1065 | 0 | /* Shadow mode */ |
1066 | 0 | ns_vmcb->interrupt_shadow = n2vmcb->interrupt_shadow; |
1067 | 0 |
|
1068 | 0 | /* Exit codes */ |
1069 | 0 | ns_vmcb->exitcode = n2vmcb->exitcode; |
1070 | 0 | ns_vmcb->exitinfo1 = n2vmcb->exitinfo1; |
1071 | 0 | ns_vmcb->exitinfo2 = n2vmcb->exitinfo2; |
1072 | 0 | ns_vmcb->exitintinfo = n2vmcb->exitintinfo; |
1073 | 0 |
|
1074 | 0 | /* Interrupts */ |
1075 | 0 | /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have |
1076 | 0 | * to make sure that we do not lose injected events. So check eventinj |
1077 | 0 | * here and copy it to exitintinfo if it is valid. |
1078 | 0 | * exitintinfo and eventinj can't be both valid because the case below |
1079 | 0 | * only happens on a VMRUN instruction intercept which has no valid |
1080 | 0 | * exitintinfo set. |
1081 | 0 | */ |
1082 | 0 | if ( unlikely(n2vmcb->eventinj.fields.v) && |
1083 | 0 | hvm_event_needs_reinjection(n2vmcb->eventinj.fields.type, |
1084 | 0 | n2vmcb->eventinj.fields.vector) ) |
1085 | 0 | { |
1086 | 0 | ns_vmcb->exitintinfo = n2vmcb->eventinj; |
1087 | 0 | } |
1088 | 0 |
|
1089 | 0 | ns_vmcb->eventinj.bytes = 0; |
1090 | 0 |
|
1091 | 0 | /* Nested paging mode */ |
1092 | 0 | if (nestedhvm_paging_mode_hap(v)) { |
1093 | 0 | /* host nested paging + guest nested paging. */ |
1094 | 0 | ns_vmcb->_np_enable = n2vmcb->_np_enable; |
1095 | 0 | ns_vmcb->_cr3 = n2vmcb->_cr3; |
1096 | 0 | /* The vmcb->h_cr3 is the shadowed h_cr3. The original |
1097 | 0 | * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3, |
1098 | 0 | * hence we keep the ns_vmcb->h_cr3 value. */ |
1099 | 0 | } else if (paging_mode_hap(v->domain)) { |
1100 | 0 | /* host nested paging + guest shadow paging. */ |
1101 | 0 | ns_vmcb->_np_enable = 0; |
1102 | 0 | /* Throw h_cr3 away. Guest is not allowed to set it or |
1103 | 0 | * it can break out, otherwise (security hole!) */ |
1104 | 0 | ns_vmcb->_h_cr3 = 0x0; |
1105 | 0 | /* Stop intercepting #PF (already done above |
1106 | 0 | * by restoring cached intercepts). */ |
1107 | 0 | ns_vmcb->_cr3 = n2vmcb->_cr3; |
1108 | 0 | } else { |
1109 | 0 | /* host shadow paging + guest shadow paging. */ |
1110 | 0 | ns_vmcb->_np_enable = 0; |
1111 | 0 | ns_vmcb->_h_cr3 = 0x0; |
1112 | 0 | /* The vmcb->_cr3 is the shadowed cr3. The original |
1113 | 0 | * unshadowed guest cr3 is kept in ns_vmcb->_cr3, |
1114 | 0 | * hence we keep the ns_vmcb->_cr3 value. */ |
1115 | 0 | } |
1116 | 0 |
|
1117 | 0 | /* LBR virtualization - keep lbr control as is */ |
1118 | 0 |
|
1119 | 0 | /* NextRIP */ |
1120 | 0 | ns_vmcb->nextrip = n2vmcb->nextrip; |
1121 | 0 |
|
1122 | 0 | /* Decode Assist */ |
1123 | 0 | ns_vmcb->guest_ins_len = n2vmcb->guest_ins_len; |
1124 | 0 | memcpy(ns_vmcb->guest_ins, n2vmcb->guest_ins, sizeof(ns_vmcb->guest_ins)); |
1125 | 0 |
|
1126 | 0 | /* |
1127 | 0 | * VMCB Save State Area |
1128 | 0 | */ |
1129 | 0 |
|
1130 | 0 | /* Segments */ |
1131 | 0 | ns_vmcb->es = n2vmcb->es; |
1132 | 0 | ns_vmcb->cs = n2vmcb->cs; |
1133 | 0 | ns_vmcb->ss = n2vmcb->ss; |
1134 | 0 | ns_vmcb->ds = n2vmcb->ds; |
1135 | 0 | ns_vmcb->gdtr = n2vmcb->gdtr; |
1136 | 0 | ns_vmcb->idtr = n2vmcb->idtr; |
1137 | 0 |
|
1138 | 0 | /* CPL */ |
1139 | 0 | ns_vmcb->_cpl = n2vmcb->_cpl; |
1140 | 0 |
|
1141 | 0 | /* EFER */ |
1142 | 0 | ns_vmcb->_efer = n2vmcb->_efer; |
1143 | 0 |
|
1144 | 0 | /* CRn */ |
1145 | 0 | ns_vmcb->_cr4 = n2vmcb->_cr4; |
1146 | 0 | ns_vmcb->_cr0 = n2vmcb->_cr0; |
1147 | 0 |
|
1148 | 0 | /* DRn */ |
1149 | 0 | ns_vmcb->_dr7 = n2vmcb->_dr7; |
1150 | 0 | ns_vmcb->_dr6 = n2vmcb->_dr6; |
1151 | 0 |
|
1152 | 0 | /* Restore registers from regs as those values |
1153 | 0 | * can be newer than in n2vmcb (e.g. due to an |
1154 | 0 | * instruction emulation right before). |
1155 | 0 | */ |
1156 | 0 |
|
1157 | 0 | /* RFLAGS */ |
1158 | 0 | ns_vmcb->rflags = n2vmcb->rflags = regs->rflags; |
1159 | 0 |
|
1160 | 0 | /* RIP */ |
1161 | 0 | ns_vmcb->rip = n2vmcb->rip = regs->rip; |
1162 | 0 |
|
1163 | 0 | /* RSP */ |
1164 | 0 | ns_vmcb->rsp = n2vmcb->rsp = regs->rsp; |
1165 | 0 |
|
1166 | 0 | /* RAX */ |
1167 | 0 | ns_vmcb->rax = n2vmcb->rax = regs->rax; |
1168 | 0 |
|
1169 | 0 | /* Keep the l2 guest values of the fs, gs, ldtr, tr, kerngsbase, |
1170 | 0 | * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp, |
1171 | 0 | * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation. |
1172 | 0 | */ |
1173 | 0 |
|
1174 | 0 | /* CR2 */ |
1175 | 0 | ns_vmcb->_cr2 = n2vmcb->_cr2; |
1176 | 0 |
|
1177 | 0 | /* Page tables */ |
1178 | 0 | ns_vmcb->pdpe0 = n2vmcb->pdpe0; |
1179 | 0 | ns_vmcb->pdpe1 = n2vmcb->pdpe1; |
1180 | 0 | ns_vmcb->pdpe2 = n2vmcb->pdpe2; |
1181 | 0 | ns_vmcb->pdpe3 = n2vmcb->pdpe3; |
1182 | 0 |
|
1183 | 0 | /* PAT */ |
1184 | 0 | ns_vmcb->_g_pat = n2vmcb->_g_pat; |
1185 | 0 |
|
1186 | 0 | /* Debug Control MSR */ |
1187 | 0 | ns_vmcb->_debugctlmsr = n2vmcb->_debugctlmsr; |
1188 | 0 |
|
1189 | 0 | /* LBR MSRs */ |
1190 | 0 | ns_vmcb->_lastbranchfromip = n2vmcb->_lastbranchfromip; |
1191 | 0 | ns_vmcb->_lastbranchtoip = n2vmcb->_lastbranchtoip; |
1192 | 0 | ns_vmcb->_lastintfromip = n2vmcb->_lastintfromip; |
1193 | 0 | ns_vmcb->_lastinttoip = n2vmcb->_lastinttoip; |
1194 | 0 |
|
1195 | 0 | return 0; |
1196 | 0 | } |
1197 | | |
1198 | | bool_t |
1199 | | nsvm_vmcb_hap_enabled(struct vcpu *v) |
1200 | 0 | { |
1201 | 0 | return vcpu_nestedsvm(v).ns_hap_enabled; |
1202 | 0 | } |
1203 | | |
1204 | | /* This function uses L2_gpa to walk the P2M page table in L1. If the |
1205 | | * walk is successful, the translated value is returned in |
1206 | | * L1_gpa. The result value tells what to do next. |
1207 | | */ |
1208 | | int |
1209 | | nsvm_hap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa, |
1210 | | unsigned int *page_order, uint8_t *p2m_acc, |
1211 | | bool_t access_r, bool_t access_w, bool_t access_x) |
1212 | 0 | { |
1213 | 0 | uint32_t pfec; |
1214 | 0 | unsigned long nested_cr3, gfn; |
1215 | 0 |
|
1216 | 0 | nested_cr3 = nhvm_vcpu_p2m_base(v); |
1217 | 0 |
|
1218 | 0 | pfec = PFEC_user_mode | PFEC_page_present; |
1219 | 0 | if ( access_w ) |
1220 | 0 | pfec |= PFEC_write_access; |
1221 | 0 | if ( access_x ) |
1222 | 0 | pfec |= PFEC_insn_fetch; |
1223 | 0 |
|
1224 | 0 | /* Walk the guest-supplied NPT table, just as if it were a pagetable */ |
1225 | 0 | gfn = paging_ga_to_gfn_cr3(v, nested_cr3, L2_gpa, &pfec, page_order); |
1226 | 0 |
|
1227 | 0 | if ( gfn == gfn_x(INVALID_GFN) ) |
1228 | 0 | return NESTEDHVM_PAGEFAULT_INJECT; |
1229 | 0 |
|
1230 | 0 | *L1_gpa = (gfn << PAGE_SHIFT) + (L2_gpa & ~PAGE_MASK); |
1231 | 0 | return NESTEDHVM_PAGEFAULT_DONE; |
1232 | 0 | } |
1233 | | |
1234 | | enum hvm_intblk nsvm_intr_blocked(struct vcpu *v) |
1235 | 0 | { |
1236 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
1237 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
1238 | 0 |
|
1239 | 0 | ASSERT(nestedhvm_enabled(v->domain)); |
1240 | 0 |
|
1241 | 0 | if ( !nestedsvm_gif_isset(v) ) |
1242 | 0 | return hvm_intblk_svm_gif; |
1243 | 0 |
|
1244 | 0 | if ( nestedhvm_vcpu_in_guestmode(v) ) { |
1245 | 0 | struct vmcb_struct *n2vmcb = nv->nv_n2vmcx; |
1246 | 0 |
|
1247 | 0 | if ( svm->ns_hostflags.fields.vintrmask ) |
1248 | 0 | if ( !svm->ns_hostflags.fields.rflagsif ) |
1249 | 0 | return hvm_intblk_rflags_ie; |
1250 | 0 |
|
1251 | 0 | /* when l1 guest passes its devices through to the l2 guest |
1252 | 0 | * and l2 guest does an MMIO access then we may want to |
1253 | 0 | * inject an VMEXIT(#INTR) exitcode into the l1 guest. |
1254 | 0 | * Delay the injection because this would result in delivering |
1255 | 0 | * an interrupt *within* the execution of an instruction. |
1256 | 0 | */ |
1257 | 0 | if ( v->arch.hvm_vcpu.hvm_io.io_req.state != STATE_IOREQ_NONE ) |
1258 | 0 | return hvm_intblk_shadow; |
1259 | 0 |
|
1260 | 0 | if ( !nv->nv_vmexit_pending && n2vmcb->exitintinfo.bytes != 0 ) { |
1261 | 0 | /* Give the l2 guest a chance to finish the delivery of |
1262 | 0 | * the last injected interrupt or exception before we |
1263 | 0 | * emulate a VMEXIT (e.g. VMEXIT(INTR) ). |
1264 | 0 | */ |
1265 | 0 | return hvm_intblk_shadow; |
1266 | 0 | } |
1267 | 0 | } |
1268 | 0 |
|
1269 | 0 | if ( nv->nv_vmexit_pending ) { |
1270 | 0 | /* hvm_inject_hw_exception() must have run before. |
1271 | 0 | * exceptions have higher priority than interrupts. |
1272 | 0 | */ |
1273 | 0 | return hvm_intblk_rflags_ie; |
1274 | 0 | } |
1275 | 0 |
|
1276 | 0 | return hvm_intblk_none; |
1277 | 0 | } |
1278 | | |
1279 | | /* MSR handling */ |
1280 | | int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content) |
1281 | 0 | { |
1282 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
1283 | 0 | int ret = 1; |
1284 | 0 |
|
1285 | 0 | *msr_content = 0; |
1286 | 0 |
|
1287 | 0 | switch (msr) { |
1288 | 0 | case MSR_K8_VM_CR: |
1289 | 0 | break; |
1290 | 0 | case MSR_K8_VM_HSAVE_PA: |
1291 | 0 | *msr_content = svm->ns_msr_hsavepa; |
1292 | 0 | break; |
1293 | 0 | case MSR_AMD64_TSC_RATIO: |
1294 | 0 | *msr_content = svm->ns_tscratio; |
1295 | 0 | break; |
1296 | 0 | default: |
1297 | 0 | ret = 0; |
1298 | 0 | break; |
1299 | 0 | } |
1300 | 0 |
|
1301 | 0 | return ret; |
1302 | 0 | } |
1303 | | |
1304 | | int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content) |
1305 | 0 | { |
1306 | 0 | int ret = 1; |
1307 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
1308 | 0 |
|
1309 | 0 | switch (msr) { |
1310 | 0 | case MSR_K8_VM_CR: |
1311 | 0 | /* ignore write. handle all bits as read-only. */ |
1312 | 0 | break; |
1313 | 0 | case MSR_K8_VM_HSAVE_PA: |
1314 | 0 | if (!nestedsvm_vmcb_isvalid(v, msr_content)) { |
1315 | 0 | gdprintk(XENLOG_ERR, |
1316 | 0 | "MSR_K8_VM_HSAVE_PA value invalid %#"PRIx64"\n", msr_content); |
1317 | 0 | ret = -1; /* inject #GP */ |
1318 | 0 | break; |
1319 | 0 | } |
1320 | 0 | svm->ns_msr_hsavepa = msr_content; |
1321 | 0 | break; |
1322 | 0 | case MSR_AMD64_TSC_RATIO: |
1323 | 0 | if ((msr_content & ~TSC_RATIO_RSVD_BITS) != msr_content) { |
1324 | 0 | gdprintk(XENLOG_ERR, |
1325 | 0 | "reserved bits set in MSR_AMD64_TSC_RATIO %#"PRIx64"\n", |
1326 | 0 | msr_content); |
1327 | 0 | ret = -1; /* inject #GP */ |
1328 | 0 | break; |
1329 | 0 | } |
1330 | 0 | svm->ns_tscratio = msr_content; |
1331 | 0 | break; |
1332 | 0 | default: |
1333 | 0 | ret = 0; |
1334 | 0 | break; |
1335 | 0 | } |
1336 | 0 |
|
1337 | 0 | return ret; |
1338 | 0 | } |
1339 | | |
1340 | | /* VMEXIT emulation */ |
1341 | | void |
1342 | | nestedsvm_vmexit_defer(struct vcpu *v, |
1343 | | uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2) |
1344 | 0 | { |
1345 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
1346 | 0 |
|
1347 | 0 | nestedsvm_vcpu_clgi(v); |
1348 | 0 | svm->ns_vmexit.exitcode = exitcode; |
1349 | 0 | svm->ns_vmexit.exitinfo1 = exitinfo1; |
1350 | 0 | svm->ns_vmexit.exitinfo2 = exitinfo2; |
1351 | 0 | vcpu_nestedhvm(v).nv_vmexit_pending = 1; |
1352 | 0 | } |
1353 | | |
1354 | | enum nestedhvm_vmexits |
1355 | | nestedsvm_check_intercepts(struct vcpu *v, struct cpu_user_regs *regs, |
1356 | | uint64_t exitcode) |
1357 | 0 | { |
1358 | 0 | bool_t is_intercepted; |
1359 | 0 |
|
1360 | 0 | ASSERT(vcpu_nestedhvm(v).nv_vmexit_pending == 0); |
1361 | 0 | is_intercepted = nsvm_vmcb_guest_intercepts_exitcode(v, regs, exitcode); |
1362 | 0 |
|
1363 | 0 | switch (exitcode) { |
1364 | 0 | case VMEXIT_INVALID: |
1365 | 0 | if (is_intercepted) |
1366 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
1367 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1368 | 0 |
|
1369 | 0 | case VMEXIT_INTR: |
1370 | 0 | case VMEXIT_NMI: |
1371 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1372 | 0 | case VMEXIT_EXCEPTION_NM: |
1373 | 0 | /* Host must handle lazy fpu context switching first. |
1374 | 0 | * Then inject the VMEXIT if L1 guest intercepts this. |
1375 | 0 | */ |
1376 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1377 | 0 |
|
1378 | 0 | case VMEXIT_NPF: |
1379 | 0 | if (nestedhvm_paging_mode_hap(v)) { |
1380 | 0 | if (!is_intercepted) |
1381 | 0 | return NESTEDHVM_VMEXIT_FATALERROR; |
1382 | 0 | /* host nested paging + guest nested paging */ |
1383 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1384 | 0 | } |
1385 | 0 | if (paging_mode_hap(v->domain)) { |
1386 | 0 | if (is_intercepted) |
1387 | 0 | return NESTEDHVM_VMEXIT_FATALERROR; |
1388 | 0 | /* host nested paging + guest shadow paging */ |
1389 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1390 | 0 | } |
1391 | 0 | /* host shadow paging + guest shadow paging */ |
1392 | 0 | /* Can this happen? */ |
1393 | 0 | BUG(); |
1394 | 0 | return NESTEDHVM_VMEXIT_FATALERROR; |
1395 | 0 | case VMEXIT_EXCEPTION_PF: |
1396 | 0 | if (nestedhvm_paging_mode_hap(v)) { |
1397 | 0 | /* host nested paging + guest nested paging */ |
1398 | 0 | if (!is_intercepted) |
1399 | 0 | /* l1 guest intercepts #PF unnecessarily */ |
1400 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1401 | 0 | /* l2 guest intercepts #PF unnecessarily */ |
1402 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
1403 | 0 | } |
1404 | 0 | if (!paging_mode_hap(v->domain)) { |
1405 | 0 | /* host shadow paging + guest shadow paging */ |
1406 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1407 | 0 | } |
1408 | 0 | /* host nested paging + guest shadow paging */ |
1409 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
1410 | 0 | case VMEXIT_VMMCALL: |
1411 | 0 | /* Always let the guest handle VMMCALL/VMCALL */ |
1412 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
1413 | 0 | default: |
1414 | 0 | gprintk(XENLOG_ERR, "Unexpected nested vmexit: reason %#"PRIx64"\n", |
1415 | 0 | exitcode); |
1416 | 0 | break; |
1417 | 0 | } |
1418 | 0 |
|
1419 | 0 | if (is_intercepted) |
1420 | 0 | return NESTEDHVM_VMEXIT_INJECT; |
1421 | 0 | return NESTEDHVM_VMEXIT_HOST; |
1422 | 0 | } |
1423 | | |
1424 | | enum nestedhvm_vmexits |
1425 | | nestedsvm_vmexit_n2n1(struct vcpu *v, struct cpu_user_regs *regs) |
1426 | 0 | { |
1427 | 0 | int rc; |
1428 | 0 | enum nestedhvm_vmexits ret = NESTEDHVM_VMEXIT_DONE; |
1429 | 0 |
|
1430 | 0 | ASSERT(vcpu_nestedhvm(v).nv_vmswitch_in_progress); |
1431 | 0 | ASSERT(nestedhvm_vcpu_in_guestmode(v)); |
1432 | 0 |
|
1433 | 0 | rc = nsvm_vmcb_prepare4vmexit(v, regs); |
1434 | 0 | if (rc) |
1435 | 0 | ret = NESTEDHVM_VMEXIT_ERROR; |
1436 | 0 |
|
1437 | 0 | rc = nsvm_vcpu_hostrestore(v, regs); |
1438 | 0 | if (rc) |
1439 | 0 | ret = NESTEDHVM_VMEXIT_FATALERROR; |
1440 | 0 |
|
1441 | 0 | nestedhvm_vcpu_exit_guestmode(v); |
1442 | 0 | return ret; |
1443 | 0 | } |
1444 | | |
1445 | | /* The exitcode is in native SVM/VMX format. The forced exitcode |
1446 | | * is in generic format. |
1447 | | */ |
1448 | | static enum nestedhvm_vmexits |
1449 | | nestedsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, |
1450 | | uint64_t exitcode) |
1451 | 0 | { |
1452 | 0 | int rc; |
1453 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
1454 | 0 |
|
1455 | 0 | nv->nv_vmswitch_in_progress = 1; |
1456 | 0 |
|
1457 | 0 | ASSERT(nv->nv_vvmcx != NULL); |
1458 | 0 |
|
1459 | 0 | /* On special intercepts the host has to handle |
1460 | 0 | * the vcpu is still in guest mode here. |
1461 | 0 | */ |
1462 | 0 | if (nestedhvm_vcpu_in_guestmode(v)) { |
1463 | 0 | enum nestedhvm_vmexits ret; |
1464 | 0 |
|
1465 | 0 | ret = nestedsvm_vmexit_n2n1(v, regs); |
1466 | 0 | switch (ret) { |
1467 | 0 | case NESTEDHVM_VMEXIT_FATALERROR: |
1468 | 0 | gdprintk(XENLOG_ERR, "VMEXIT: fatal error\n"); |
1469 | 0 | return ret; |
1470 | 0 | case NESTEDHVM_VMEXIT_HOST: |
1471 | 0 | BUG(); |
1472 | 0 | return ret; |
1473 | 0 | case NESTEDHVM_VMEXIT_ERROR: |
1474 | 0 | exitcode = VMEXIT_INVALID; |
1475 | 0 | break; |
1476 | 0 | default: |
1477 | 0 | ASSERT(!nestedhvm_vcpu_in_guestmode(v)); |
1478 | 0 | break; |
1479 | 0 | } |
1480 | 0 |
|
1481 | 0 | /* host state has been restored */ |
1482 | 0 | } |
1483 | 0 |
|
1484 | 0 | ASSERT(!nestedhvm_vcpu_in_guestmode(v)); |
1485 | 0 |
|
1486 | 0 | /* Prepare for running the l1 guest. Make the actual |
1487 | 0 | * modifications to the virtual VMCB/VMCS. |
1488 | 0 | */ |
1489 | 0 | rc = nsvm_vcpu_vmexit_inject(v, regs, exitcode); |
1490 | 0 |
|
1491 | 0 | /* If l1 guest uses shadow paging, update the paging mode. */ |
1492 | 0 | if (!nestedhvm_paging_mode_hap(v)) |
1493 | 0 | paging_update_paging_modes(v); |
1494 | 0 |
|
1495 | 0 | nv->nv_vmswitch_in_progress = 0; |
1496 | 0 |
|
1497 | 0 | if (rc) |
1498 | 0 | return NESTEDHVM_VMEXIT_FATALERROR; |
1499 | 0 |
|
1500 | 0 | return NESTEDHVM_VMEXIT_DONE; |
1501 | 0 | } |
1502 | | |
1503 | | /* VCPU switch */ |
1504 | | void nsvm_vcpu_switch(struct cpu_user_regs *regs) |
1505 | 0 | { |
1506 | 0 | struct vcpu *v = current; |
1507 | 0 | struct nestedvcpu *nv; |
1508 | 0 | struct nestedsvm *svm; |
1509 | 0 |
|
1510 | 0 | if (!nestedhvm_enabled(v->domain)) |
1511 | 0 | return; |
1512 | 0 |
|
1513 | 0 | nv = &vcpu_nestedhvm(v); |
1514 | 0 | svm = &vcpu_nestedsvm(v); |
1515 | 0 | ASSERT(v->arch.hvm_svm.vmcb != NULL); |
1516 | 0 | ASSERT(nv->nv_n1vmcx != NULL); |
1517 | 0 | ASSERT(nv->nv_n2vmcx != NULL); |
1518 | 0 | ASSERT(nv->nv_n1vmcx_pa != INVALID_PADDR); |
1519 | 0 | ASSERT(nv->nv_n2vmcx_pa != INVALID_PADDR); |
1520 | 0 |
|
1521 | 0 | if (nv->nv_vmexit_pending) { |
1522 | 0 | vmexit: |
1523 | 0 | nestedsvm_vcpu_vmexit(v, regs, svm->ns_vmexit.exitcode); |
1524 | 0 | nv->nv_vmexit_pending = 0; |
1525 | 0 | nv->nv_vmentry_pending = 0; |
1526 | 0 | return; |
1527 | 0 | } |
1528 | 0 | if (nv->nv_vmentry_pending) { |
1529 | 0 | int ret; |
1530 | 0 | ASSERT(!nv->nv_vmexit_pending); |
1531 | 0 | ret = nsvm_vcpu_vmrun(v, regs); |
1532 | 0 | if (ret) |
1533 | 0 | goto vmexit; |
1534 | 0 |
|
1535 | 0 | ASSERT(nestedhvm_vcpu_in_guestmode(v)); |
1536 | 0 | nv->nv_vmentry_pending = 0; |
1537 | 0 | } |
1538 | 0 |
|
1539 | 0 | if (nestedhvm_vcpu_in_guestmode(v) |
1540 | 0 | && nestedhvm_paging_mode_hap(v)) |
1541 | 0 | { |
1542 | 0 | /* In case left the l2 guest due to a physical interrupt (e.g. IPI) |
1543 | 0 | * that is not for the l1 guest then we continue running the l2 guest |
1544 | 0 | * but check if the nestedp2m is still valid. |
1545 | 0 | */ |
1546 | 0 | if (nv->nv_p2m == NULL) |
1547 | 0 | nestedsvm_vmcb_set_nestedp2m(v, nv->nv_vvmcx, nv->nv_n2vmcx); |
1548 | 0 | } |
1549 | 0 | } |
1550 | | |
1551 | | /* Interrupts, Virtual GIF */ |
1552 | | int |
1553 | | nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack) |
1554 | 0 | { |
1555 | 0 | int ret; |
1556 | 0 | enum hvm_intblk intr; |
1557 | 0 | uint64_t exitcode = VMEXIT_INTR; |
1558 | 0 | uint64_t exitinfo2 = 0; |
1559 | 0 | ASSERT(nestedhvm_vcpu_in_guestmode(v)); |
1560 | 0 |
|
1561 | 0 | intr = nhvm_interrupt_blocked(v); |
1562 | 0 | if ( intr != hvm_intblk_none ) |
1563 | 0 | return NSVM_INTR_MASKED; |
1564 | 0 |
|
1565 | 0 | switch (intack.source) { |
1566 | 0 | case hvm_intsrc_pic: |
1567 | 0 | case hvm_intsrc_lapic: |
1568 | 0 | case hvm_intsrc_vector: |
1569 | 0 | exitcode = VMEXIT_INTR; |
1570 | 0 | exitinfo2 = intack.vector; |
1571 | 0 | break; |
1572 | 0 | case hvm_intsrc_nmi: |
1573 | 0 | exitcode = VMEXIT_NMI; |
1574 | 0 | exitinfo2 = intack.vector; |
1575 | 0 | break; |
1576 | 0 | case hvm_intsrc_mce: |
1577 | 0 | exitcode = VMEXIT_EXCEPTION_MC; |
1578 | 0 | exitinfo2 = intack.vector; |
1579 | 0 | break; |
1580 | 0 | case hvm_intsrc_none: |
1581 | 0 | return NSVM_INTR_NOTHANDLED; |
1582 | 0 | default: |
1583 | 0 | BUG(); |
1584 | 0 | } |
1585 | 0 |
|
1586 | 0 | ret = nsvm_vmcb_guest_intercepts_exitcode(v, |
1587 | 0 | guest_cpu_user_regs(), exitcode); |
1588 | 0 | if (ret) { |
1589 | 0 | nestedsvm_vmexit_defer(v, exitcode, intack.source, exitinfo2); |
1590 | 0 | return NSVM_INTR_FORCEVMEXIT; |
1591 | 0 | } |
1592 | 0 |
|
1593 | 0 | return NSVM_INTR_NOTINTERCEPTED; |
1594 | 0 | } |
1595 | | |
1596 | | bool_t |
1597 | | nestedsvm_gif_isset(struct vcpu *v) |
1598 | 0 | { |
1599 | 0 | struct nestedsvm *svm = &vcpu_nestedsvm(v); |
1600 | 0 |
|
1601 | 0 | return (!!svm->ns_gif); |
1602 | 0 | } |
1603 | | |
1604 | | void svm_vmexit_do_stgi(struct cpu_user_regs *regs, struct vcpu *v) |
1605 | 0 | { |
1606 | 0 | unsigned int inst_len; |
1607 | 0 |
|
1608 | 0 | if ( !nestedhvm_enabled(v->domain) ) { |
1609 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
1610 | 0 | return; |
1611 | 0 | } |
1612 | 0 |
|
1613 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_STGI)) == 0 ) |
1614 | 0 | return; |
1615 | 0 |
|
1616 | 0 | nestedsvm_vcpu_stgi(v); |
1617 | 0 |
|
1618 | 0 | __update_guest_eip(regs, inst_len); |
1619 | 0 | } |
1620 | | |
1621 | | void svm_vmexit_do_clgi(struct cpu_user_regs *regs, struct vcpu *v) |
1622 | 0 | { |
1623 | 0 | struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; |
1624 | 0 | unsigned int inst_len; |
1625 | 0 | uint32_t general1_intercepts = vmcb_get_general1_intercepts(vmcb); |
1626 | 0 | vintr_t intr; |
1627 | 0 |
|
1628 | 0 | if ( !nestedhvm_enabled(v->domain) ) { |
1629 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
1630 | 0 | return; |
1631 | 0 | } |
1632 | 0 |
|
1633 | 0 | if ( (inst_len = __get_instruction_length(v, INSTR_CLGI)) == 0 ) |
1634 | 0 | return; |
1635 | 0 |
|
1636 | 0 | nestedsvm_vcpu_clgi(v); |
1637 | 0 |
|
1638 | 0 | /* After a CLGI no interrupts should come */ |
1639 | 0 | intr = vmcb_get_vintr(vmcb); |
1640 | 0 | intr.fields.irq = 0; |
1641 | 0 | general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR; |
1642 | 0 | vmcb_set_vintr(vmcb, intr); |
1643 | 0 | vmcb_set_general1_intercepts(vmcb, general1_intercepts); |
1644 | 0 |
|
1645 | 0 | __update_guest_eip(regs, inst_len); |
1646 | 0 | } |