Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/hvm/svm/nestedsvm.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * nestedsvm.c: Nested Virtualization
3
 * Copyright (c) 2011, Advanced Micro Devices, Inc
4
 *
5
 * This program is free software; you can redistribute it and/or modify it
6
 * under the terms and conditions of the GNU General Public License,
7
 * version 2, as published by the Free Software Foundation.
8
 *
9
 * This program is distributed in the hope it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12
 * more details.
13
 *
14
 * You should have received a copy of the GNU General Public License along with
15
 * this program; If not, see <http://www.gnu.org/licenses/>.
16
 *
17
 */
18
19
#include <asm/hvm/support.h>
20
#include <asm/hvm/svm/emulate.h>
21
#include <asm/hvm/svm/svm.h>
22
#include <asm/hvm/svm/vmcb.h>
23
#include <asm/hvm/nestedhvm.h>
24
#include <asm/hvm/svm/nestedsvm.h>
25
#include <asm/hvm/svm/svmdebug.h>
26
#include <asm/paging.h> /* paging_mode_hap */
27
#include <asm/event.h> /* for local_event_delivery_(en|dis)able */
28
#include <asm/p2m.h> /* p2m_get_pagetable, p2m_get_nestedp2m */
29
30
31
0
#define NSVM_ERROR_VVMCB        1
32
0
#define NSVM_ERROR_VMENTRY      2
33
 
34
static void
35
nestedsvm_vcpu_clgi(struct vcpu *v)
36
0
{
37
0
    /* clear gif flag */
38
0
    vcpu_nestedsvm(v).ns_gif = 0;
39
0
    local_event_delivery_disable(); /* mask events for PV drivers */
40
0
}
41
42
static void
43
nestedsvm_vcpu_stgi(struct vcpu *v)
44
0
{
45
0
    /* enable gif flag */
46
0
    vcpu_nestedsvm(v).ns_gif = 1;
47
0
    local_event_delivery_enable(); /* unmask events for PV drivers */
48
0
}
49
50
static int
51
nestedsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr)
52
0
{
53
0
    /* Address must be 4k aligned */
54
0
    if ( (vmcxaddr & ~PAGE_MASK) != 0 )
55
0
        return 0;
56
0
57
0
    /* Maximum valid physical address.
58
0
     * See AMD BKDG for HSAVE_PA MSR.
59
0
     */
60
0
    if ( vmcxaddr > 0xfd00000000ULL )
61
0
        return 0;
62
0
63
0
    return 1;
64
0
}
65
66
int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
67
0
{
68
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
69
0
70
0
    if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
71
0
        ASSERT(nv->nv_vvmcxaddr != INVALID_PADDR);
72
0
        hvm_unmap_guest_frame(nv->nv_vvmcx, 1);
73
0
        nv->nv_vvmcx = NULL;
74
0
        nv->nv_vvmcxaddr = INVALID_PADDR;
75
0
    }
76
0
77
0
    if ( !nv->nv_vvmcx )
78
0
    {
79
0
        bool_t writable;
80
0
        void *vvmcx = hvm_map_guest_frame_rw(paddr_to_pfn(vmcbaddr), 1,
81
0
                                             &writable);
82
0
83
0
        if ( !vvmcx )
84
0
            return 0;
85
0
        if ( !writable )
86
0
        {
87
0
            hvm_unmap_guest_frame(vvmcx, 1);
88
0
            return 0;
89
0
        }
90
0
        nv->nv_vvmcx = vvmcx;
91
0
        nv->nv_vvmcxaddr = vmcbaddr;
92
0
    }
93
0
94
0
    return 1;
95
0
}
96
97
/* Interface methods */
98
int nsvm_vcpu_initialise(struct vcpu *v)
99
0
{
100
0
    void *msrpm;
101
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
102
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
103
0
104
0
    msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
105
0
    svm->ns_cached_msrpm = msrpm;
106
0
    if (msrpm == NULL)
107
0
        goto err;
108
0
    memset(msrpm, 0x0, MSRPM_SIZE);
109
0
110
0
    msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
111
0
    svm->ns_merged_msrpm = msrpm;
112
0
    if (msrpm == NULL)
113
0
        goto err;
114
0
    memset(msrpm, 0x0, MSRPM_SIZE);
115
0
116
0
    nv->nv_n2vmcx = alloc_vmcb();
117
0
    if (nv->nv_n2vmcx == NULL)
118
0
        goto err;
119
0
    nv->nv_n2vmcx_pa = virt_to_maddr(nv->nv_n2vmcx);
120
0
121
0
    return 0;
122
0
123
0
err:
124
0
    nsvm_vcpu_destroy(v);
125
0
    return -ENOMEM;
126
0
}
127
128
void nsvm_vcpu_destroy(struct vcpu *v)
129
0
{
130
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
131
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
132
0
133
0
    /*
134
0
     * When destroying the vcpu, it may be running on behalf of l2 guest.
135
0
     * Therefore we need to switch the VMCB pointer back to the l1 vmcb,
136
0
     * in order to avoid double free of l2 vmcb and the possible memory leak
137
0
     * of l1 vmcb page.
138
0
     */
139
0
    if (nv->nv_n1vmcx)
140
0
        v->arch.hvm_svm.vmcb = nv->nv_n1vmcx;
141
0
142
0
    if (svm->ns_cached_msrpm) {
143
0
        free_xenheap_pages(svm->ns_cached_msrpm,
144
0
                           get_order_from_bytes(MSRPM_SIZE));
145
0
        svm->ns_cached_msrpm = NULL;
146
0
    }
147
0
    if (svm->ns_merged_msrpm) {
148
0
        free_xenheap_pages(svm->ns_merged_msrpm,
149
0
                           get_order_from_bytes(MSRPM_SIZE));
150
0
        svm->ns_merged_msrpm = NULL;
151
0
    }
152
0
    hvm_unmap_guest_frame(nv->nv_vvmcx, 1);
153
0
    nv->nv_vvmcx = NULL;
154
0
    if (nv->nv_n2vmcx) {
155
0
        free_vmcb(nv->nv_n2vmcx);
156
0
        nv->nv_n2vmcx = NULL;
157
0
        nv->nv_n2vmcx_pa = INVALID_PADDR;
158
0
    }
159
0
    if (svm->ns_iomap)
160
0
        svm->ns_iomap = NULL;
161
0
}
162
163
int nsvm_vcpu_reset(struct vcpu *v)
164
0
{
165
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
166
0
167
0
    svm->ns_msr_hsavepa = INVALID_PADDR;
168
0
    svm->ns_ovvmcb_pa = INVALID_PADDR;
169
0
170
0
    svm->ns_tscratio = DEFAULT_TSC_RATIO;
171
0
172
0
    svm->ns_cr_intercepts = 0;
173
0
    svm->ns_dr_intercepts = 0;
174
0
    svm->ns_exception_intercepts = 0;
175
0
    svm->ns_general1_intercepts = 0;
176
0
    svm->ns_general2_intercepts = 0;
177
0
    svm->ns_lbr_control.bytes = 0;
178
0
179
0
    svm->ns_hap_enabled = 0;
180
0
    svm->ns_vmcb_guestcr3 = 0;
181
0
    svm->ns_vmcb_hostcr3 = 0;
182
0
    svm->ns_guest_asid = 0;
183
0
    svm->ns_hostflags.bytes = 0;
184
0
    svm->ns_vmexit.exitinfo1 = 0;
185
0
    svm->ns_vmexit.exitinfo2 = 0;
186
0
187
0
    if (svm->ns_iomap)
188
0
        svm->ns_iomap = NULL;
189
0
190
0
    nestedsvm_vcpu_stgi(v);
191
0
    return 0;
192
0
}
193
194
static uint64_t nestedsvm_fpu_vmentry(uint64_t n1cr0,
195
    struct vmcb_struct *vvmcb,
196
    struct vmcb_struct *n1vmcb, struct vmcb_struct *n2vmcb)
197
0
{
198
0
    uint64_t vcr0;
199
0
200
0
    vcr0 = vvmcb->_cr0;
201
0
    if ( !(n1cr0 & X86_CR0_TS) && (n1vmcb->_cr0 & X86_CR0_TS) ) {
202
0
        /* svm_fpu_leave() run while l1 guest was running.
203
0
         * Sync FPU state with l2 guest.
204
0
         */
205
0
        vcr0 |= X86_CR0_TS;
206
0
        n2vmcb->_exception_intercepts |= (1U << TRAP_no_device);
207
0
    } else if ( !(vcr0 & X86_CR0_TS) && (n2vmcb->_cr0 & X86_CR0_TS) ) {
208
0
        /* svm_fpu_enter() run while l1 guest was running.
209
0
         * Sync FPU state with l2 guest. */
210
0
        vcr0 &= ~X86_CR0_TS;
211
0
        n2vmcb->_exception_intercepts &= ~(1U << TRAP_no_device);
212
0
    }
213
0
214
0
    return vcr0;
215
0
}
216
217
static void nestedsvm_fpu_vmexit(struct vmcb_struct *n1vmcb,
218
    struct vmcb_struct *n2vmcb, uint64_t n1cr0, uint64_t guest_cr0)
219
0
{
220
0
    if ( !(guest_cr0 & X86_CR0_TS) && (n2vmcb->_cr0 & X86_CR0_TS) ) {
221
0
        /* svm_fpu_leave() run while l2 guest was running.
222
0
         * Sync FPU state with l1 guest. */
223
0
        n1vmcb->_cr0 |= X86_CR0_TS;
224
0
        n1vmcb->_exception_intercepts |= (1U << TRAP_no_device);
225
0
    } else if ( !(n1cr0 & X86_CR0_TS) && (n1vmcb->_cr0 & X86_CR0_TS) ) {
226
0
        /* svm_fpu_enter() run while l2 guest was running.
227
0
         * Sync FPU state with l1 guest. */
228
0
        n1vmcb->_cr0 &= ~X86_CR0_TS;
229
0
        n1vmcb->_exception_intercepts &= ~(1U << TRAP_no_device);
230
0
    }
231
0
}
232
233
static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len)
234
0
{
235
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
236
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
237
0
    struct vmcb_struct *n1vmcb;
238
0
239
0
    n1vmcb = nv->nv_n1vmcx;
240
0
    ASSERT(n1vmcb != NULL);
241
0
242
0
    n1vmcb->rip += inst_len;
243
0
244
0
    /* Save shadowed values. This ensures that the l1 guest
245
0
     * cannot override them to break out. */
246
0
    n1vmcb->_efer = v->arch.hvm_vcpu.guest_efer;
247
0
    n1vmcb->_cr0 = v->arch.hvm_vcpu.guest_cr[0];
248
0
    n1vmcb->_cr2 = v->arch.hvm_vcpu.guest_cr[2];
249
0
    n1vmcb->_cr4 = v->arch.hvm_vcpu.guest_cr[4];
250
0
251
0
    /* Remember the host interrupt flag */
252
0
    svm->ns_hostflags.fields.rflagsif =
253
0
        (n1vmcb->rflags & X86_EFLAGS_IF) ? 1 : 0;
254
0
255
0
    return 0;
256
0
}
257
258
static int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs)
259
0
{
260
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
261
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
262
0
    struct vmcb_struct *n1vmcb, *n2vmcb;
263
0
    int rc;
264
0
265
0
    n1vmcb = nv->nv_n1vmcx;
266
0
    n2vmcb = nv->nv_n2vmcx;
267
0
    ASSERT(n1vmcb != NULL);
268
0
    ASSERT(n2vmcb != NULL);
269
0
270
0
    /* nsvm_vmcb_prepare4vmexit() already saved register values
271
0
     * handled by VMSAVE/VMLOAD into n1vmcb directly.
272
0
     */
273
0
274
0
    /* switch vmcb to l1 guest's vmcb */
275
0
    v->arch.hvm_svm.vmcb = n1vmcb;
276
0
    v->arch.hvm_svm.vmcb_pa = nv->nv_n1vmcx_pa;
277
0
278
0
    /* EFER */
279
0
    v->arch.hvm_vcpu.guest_efer = n1vmcb->_efer;
280
0
    rc = hvm_set_efer(n1vmcb->_efer);
281
0
    if ( rc == X86EMUL_EXCEPTION )
282
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
283
0
    if (rc != X86EMUL_OKAY)
284
0
        gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
285
0
286
0
    /* CR4 */
287
0
    v->arch.hvm_vcpu.guest_cr[4] = n1vmcb->_cr4;
288
0
    rc = hvm_set_cr4(n1vmcb->_cr4, 1);
289
0
    if ( rc == X86EMUL_EXCEPTION )
290
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
291
0
    if (rc != X86EMUL_OKAY)
292
0
        gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
293
0
294
0
    /* CR0 */
295
0
    nestedsvm_fpu_vmexit(n1vmcb, n2vmcb,
296
0
        svm->ns_cr0, v->arch.hvm_vcpu.guest_cr[0]);
297
0
    v->arch.hvm_vcpu.guest_cr[0] = n1vmcb->_cr0 | X86_CR0_PE;
298
0
    n1vmcb->rflags &= ~X86_EFLAGS_VM;
299
0
    rc = hvm_set_cr0(n1vmcb->_cr0 | X86_CR0_PE, 1);
300
0
    if ( rc == X86EMUL_EXCEPTION )
301
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
302
0
    if (rc != X86EMUL_OKAY)
303
0
        gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
304
0
    svm->ns_cr0 = v->arch.hvm_vcpu.guest_cr[0];
305
0
306
0
    /* CR2 */
307
0
    v->arch.hvm_vcpu.guest_cr[2] = n1vmcb->_cr2;
308
0
    hvm_update_guest_cr(v, 2);
309
0
310
0
    /* CR3 */
311
0
    /* Nested paging mode */
312
0
    if (nestedhvm_paging_mode_hap(v)) {
313
0
        /* host nested paging + guest nested paging. */
314
0
        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
315
0
    } else if (paging_mode_hap(v->domain)) {
316
0
        /* host nested paging + guest shadow paging. */
317
0
        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
318
0
    } else {
319
0
        /* host shadow paging + guest shadow paging. */
320
0
321
0
        /* Reset MMU context  -- XXX (hostrestore) not yet working*/
322
0
        if (!pagetable_is_null(v->arch.guest_table))
323
0
            put_page(pagetable_get_page(v->arch.guest_table));
324
0
        v->arch.guest_table = pagetable_null();
325
0
        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
326
0
    }
327
0
    rc = hvm_set_cr3(n1vmcb->_cr3, 1);
328
0
    if ( rc == X86EMUL_EXCEPTION )
329
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
330
0
    if (rc != X86EMUL_OKAY)
331
0
        gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
332
0
333
0
    regs->rax = n1vmcb->rax;
334
0
    regs->rsp = n1vmcb->rsp;
335
0
    regs->rip = n1vmcb->rip;
336
0
    regs->rflags = n1vmcb->rflags;
337
0
    n1vmcb->_dr7 = 0; /* disable all breakpoints */
338
0
    n1vmcb->_cpl = 0;
339
0
340
0
    /* Clear exitintinfo to prevent a fault loop of re-injecting
341
0
     * exceptions forever.
342
0
     */
343
0
    n1vmcb->exitintinfo.bytes = 0;
344
0
345
0
    /* Cleanbits */
346
0
    n1vmcb->cleanbits.bytes = 0;
347
0
348
0
    return 0;
349
0
}
350
351
static int nsvm_vmrun_permissionmap(struct vcpu *v, bool_t viopm)
352
0
{
353
0
    struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
354
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
355
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
356
0
    struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
357
0
    struct vmcb_struct *host_vmcb = arch_svm->vmcb;
358
0
    unsigned long *ns_msrpm_ptr;
359
0
    unsigned int i;
360
0
    enum hvm_translation_result ret;
361
0
    unsigned long *ns_viomap;
362
0
    bool_t ioport_80 = 1, ioport_ed = 1;
363
0
364
0
    ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
365
0
366
0
    ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
367
0
                                   ns_vmcb->_msrpm_base_pa, MSRPM_SIZE);
368
0
    if ( ret != HVMTRANS_okay )
369
0
    {
370
0
        gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
371
0
        return 1;
372
0
    }
373
0
374
0
    /* Check l1 guest io permission map and get a shadow one based on
375
0
     * if l1 guest intercepts io ports 0x80 and/or 0xED.
376
0
     */
377
0
    svm->ns_oiomap_pa = svm->ns_iomap_pa;
378
0
    svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
379
0
380
0
    ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, 0);
381
0
    if ( ns_viomap )
382
0
    {
383
0
        ioport_80 = test_bit(0x80, ns_viomap);
384
0
        ioport_ed = test_bit(0xed, ns_viomap);
385
0
        hvm_unmap_guest_frame(ns_viomap, 0);
386
0
    }
387
0
388
0
    svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
389
0
390
0
    nv->nv_ioport80 = ioport_80;
391
0
    nv->nv_ioportED = ioport_ed;
392
0
393
0
    /* v->arch.hvm_svm.msrpm has type unsigned long, thus
394
0
     * BYTES_PER_LONG.
395
0
     */
396
0
    for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
397
0
        svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
398
0
399
0
    host_vmcb->_iopm_base_pa =
400
0
        (uint64_t)virt_to_maddr(svm->ns_iomap);
401
0
    host_vmcb->_msrpm_base_pa =
402
0
        (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
403
0
404
0
    return 0;
405
0
}
406
407
static void nestedsvm_vmcb_set_nestedp2m(struct vcpu *v,
408
    struct vmcb_struct *vvmcb, struct vmcb_struct *n2vmcb)
409
0
{
410
0
    struct p2m_domain *p2m;
411
0
412
0
    ASSERT(v != NULL);
413
0
    ASSERT(vvmcb != NULL);
414
0
    ASSERT(n2vmcb != NULL);
415
0
416
0
    /* This will allow nsvm_vcpu_hostcr3() to return correct np2m_base */
417
0
    vcpu_nestedsvm(v).ns_vmcb_hostcr3 = vvmcb->_h_cr3;
418
0
419
0
    p2m = p2m_get_nestedp2m(v);
420
0
    n2vmcb->_h_cr3 = pagetable_get_paddr(p2m_get_pagetable(p2m));
421
0
}
422
423
static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
424
0
{
425
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
426
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
427
0
    struct vmcb_struct *ns_vmcb, *n1vmcb, *n2vmcb;
428
0
    bool_t vcleanbits_valid;
429
0
    int rc;
430
0
    uint64_t cr0;
431
0
432
0
    ns_vmcb = nv->nv_vvmcx;
433
0
    n1vmcb = nv->nv_n1vmcx;
434
0
    n2vmcb = nv->nv_n2vmcx;
435
0
    ASSERT(ns_vmcb != NULL);
436
0
    ASSERT(n1vmcb != NULL);
437
0
    ASSERT(n2vmcb != NULL);
438
0
439
0
    /* Check if virtual VMCB cleanbits are valid */
440
0
    vcleanbits_valid = 1;
441
0
    if ( svm->ns_ovvmcb_pa == INVALID_PADDR )
442
0
        vcleanbits_valid = 0;
443
0
    if (svm->ns_ovvmcb_pa != nv->nv_vvmcxaddr)
444
0
        vcleanbits_valid = 0;
445
0
446
0
#define vcleanbit_set(_name)  \
447
0
    (vcleanbits_valid && ns_vmcb->cleanbits.fields._name)
448
0
449
0
    /* Enable l2 guest intercepts */
450
0
    if (!vcleanbit_set(intercepts)) {
451
0
        svm->ns_cr_intercepts = ns_vmcb->_cr_intercepts;
452
0
        svm->ns_dr_intercepts = ns_vmcb->_dr_intercepts;
453
0
        svm->ns_exception_intercepts = ns_vmcb->_exception_intercepts;
454
0
        svm->ns_general1_intercepts = ns_vmcb->_general1_intercepts;
455
0
        svm->ns_general2_intercepts = ns_vmcb->_general2_intercepts;
456
0
    }
457
0
458
0
    /* We could track the cleanbits of the n1vmcb from
459
0
     * last emulated #VMEXIT to this emulated VMRUN to safe the merges
460
0
     * below. Those cleanbits would be tracked in an integer field
461
0
     * in struct nestedsvm.
462
0
     * But this effort is not worth doing because:
463
0
     * - Only the intercepts bit of the n1vmcb can effectively be used here 
464
0
     * - The CPU runs more instructions for the tracking than can be
465
0
     *   safed here.
466
0
     * The overhead comes from (ordered from highest to lowest):
467
0
     * - svm_ctxt_switch_to (CPU context switching)
468
0
     * - svm_fpu_enter, svm_fpu_leave (lazy FPU switching)
469
0
     * - emulated CLGI (clears VINTR intercept)
470
0
     * - host clears VINTR intercept
471
0
     * Test results show that the overhead is high enough that the
472
0
     * tracked intercepts bit of the n1vmcb is practically *always* cleared.
473
0
     */
474
0
475
0
    n2vmcb->_cr_intercepts =
476
0
        n1vmcb->_cr_intercepts | ns_vmcb->_cr_intercepts;
477
0
    n2vmcb->_dr_intercepts =
478
0
        n1vmcb->_dr_intercepts | ns_vmcb->_dr_intercepts;
479
0
    n2vmcb->_exception_intercepts =
480
0
        n1vmcb->_exception_intercepts | ns_vmcb->_exception_intercepts;
481
0
    n2vmcb->_general1_intercepts =
482
0
        n1vmcb->_general1_intercepts | ns_vmcb->_general1_intercepts;
483
0
    n2vmcb->_general2_intercepts =
484
0
        n1vmcb->_general2_intercepts | ns_vmcb->_general2_intercepts;
485
0
486
0
    /* Nested Pause Filter */
487
0
    if (ns_vmcb->_general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
488
0
        n2vmcb->_pause_filter_count =
489
0
            min(n1vmcb->_pause_filter_count, ns_vmcb->_pause_filter_count);
490
0
    else
491
0
        n2vmcb->_pause_filter_count = n1vmcb->_pause_filter_count;
492
0
493
0
    /* TSC offset */
494
0
    n2vmcb->_tsc_offset = n1vmcb->_tsc_offset + ns_vmcb->_tsc_offset;
495
0
496
0
    /* Nested IO permission bitmaps */
497
0
    rc = nsvm_vmrun_permissionmap(v, vcleanbit_set(iopm));
498
0
    if (rc)
499
0
        return rc;
500
0
501
0
    /* ASID - Emulation handled in hvm_asid_handle_vmenter() */
502
0
503
0
    /* TLB control */
504
0
    n2vmcb->tlb_control = ns_vmcb->tlb_control;
505
0
506
0
    /* Virtual Interrupts */
507
0
    if (!vcleanbit_set(tpr)) {
508
0
        n2vmcb->_vintr = ns_vmcb->_vintr;
509
0
        n2vmcb->_vintr.fields.intr_masking = 1;
510
0
    }
511
0
512
0
    /* Shadow Mode */
513
0
    n2vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
514
0
515
0
    /* Exit codes */
516
0
    n2vmcb->exitcode = ns_vmcb->exitcode;
517
0
    n2vmcb->exitinfo1 = ns_vmcb->exitinfo1;
518
0
    n2vmcb->exitinfo2 = ns_vmcb->exitinfo2;
519
0
    n2vmcb->exitintinfo = ns_vmcb->exitintinfo;
520
0
521
0
    /* Pending Interrupts */
522
0
    n2vmcb->eventinj = ns_vmcb->eventinj;
523
0
524
0
    /* LBR virtualization */
525
0
    if (!vcleanbit_set(lbr)) {
526
0
        svm->ns_lbr_control = ns_vmcb->lbr_control;
527
0
    }
528
0
    n2vmcb->lbr_control.bytes =
529
0
        n1vmcb->lbr_control.bytes | ns_vmcb->lbr_control.bytes;
530
0
531
0
    /* NextRIP - only evaluated on #VMEXIT. */
532
0
533
0
    /*
534
0
     * VMCB Save State Area
535
0
     */
536
0
537
0
    /* Segments */
538
0
    if (!vcleanbit_set(seg)) {
539
0
        n2vmcb->es = ns_vmcb->es;
540
0
        n2vmcb->cs = ns_vmcb->cs;
541
0
        n2vmcb->ss = ns_vmcb->ss;
542
0
        n2vmcb->ds = ns_vmcb->ds;
543
0
        /* CPL */
544
0
        n2vmcb->_cpl = ns_vmcb->_cpl;
545
0
    }
546
0
    if (!vcleanbit_set(dt)) {
547
0
        n2vmcb->gdtr = ns_vmcb->gdtr;
548
0
        n2vmcb->idtr = ns_vmcb->idtr;
549
0
    }
550
0
551
0
    /* EFER */
552
0
    v->arch.hvm_vcpu.guest_efer = ns_vmcb->_efer;
553
0
    rc = hvm_set_efer(ns_vmcb->_efer);
554
0
    if ( rc == X86EMUL_EXCEPTION )
555
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
556
0
    if (rc != X86EMUL_OKAY)
557
0
        gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
558
0
559
0
    /* CR4 */
560
0
    v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->_cr4;
561
0
    rc = hvm_set_cr4(ns_vmcb->_cr4, 1);
562
0
    if ( rc == X86EMUL_EXCEPTION )
563
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
564
0
    if (rc != X86EMUL_OKAY)
565
0
        gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
566
0
567
0
    /* CR0 */
568
0
    svm->ns_cr0 = v->arch.hvm_vcpu.guest_cr[0];
569
0
    cr0 = nestedsvm_fpu_vmentry(svm->ns_cr0, ns_vmcb, n1vmcb, n2vmcb);
570
0
    v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->_cr0;
571
0
    rc = hvm_set_cr0(cr0, 1);
572
0
    if ( rc == X86EMUL_EXCEPTION )
573
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
574
0
    if (rc != X86EMUL_OKAY)
575
0
        gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
576
0
577
0
    /* CR2 */
578
0
    v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->_cr2;
579
0
    hvm_update_guest_cr(v, 2);
580
0
581
0
    /* Nested paging mode */
582
0
    if (nestedhvm_paging_mode_hap(v)) {
583
0
        /* host nested paging + guest nested paging. */
584
0
        n2vmcb->_np_enable = 1;
585
0
586
0
        nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb);
587
0
588
0
        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
589
0
        rc = hvm_set_cr3(ns_vmcb->_cr3, 1);
590
0
        if ( rc == X86EMUL_EXCEPTION )
591
0
            hvm_inject_hw_exception(TRAP_gp_fault, 0);
592
0
        if (rc != X86EMUL_OKAY)
593
0
            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
594
0
    } else if (paging_mode_hap(v->domain)) {
595
0
        /* host nested paging + guest shadow paging. */
596
0
        n2vmcb->_np_enable = 1;
597
0
        /* Keep h_cr3 as it is. */
598
0
        n2vmcb->_h_cr3 = n1vmcb->_h_cr3;
599
0
        /* When l1 guest does shadow paging
600
0
         * we assume it intercepts page faults.
601
0
         */
602
0
        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
603
0
        rc = hvm_set_cr3(ns_vmcb->_cr3, 1);
604
0
        if ( rc == X86EMUL_EXCEPTION )
605
0
            hvm_inject_hw_exception(TRAP_gp_fault, 0);
606
0
        if (rc != X86EMUL_OKAY)
607
0
            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
608
0
    } else {
609
0
        /* host shadow paging + guest shadow paging. */
610
0
        n2vmcb->_np_enable = 0;
611
0
        n2vmcb->_h_cr3 = 0x0;
612
0
613
0
        /* TODO: Once shadow-shadow paging is in place come back to here
614
0
         * and set host_vmcb->_cr3 to the shadowed shadow table.
615
0
         */
616
0
    }
617
0
618
0
    /* DRn */
619
0
    if (!vcleanbit_set(dr)) {
620
0
        n2vmcb->_dr7 = ns_vmcb->_dr7;
621
0
        n2vmcb->_dr6 = ns_vmcb->_dr6;
622
0
    }
623
0
624
0
    /* RFLAGS */
625
0
    n2vmcb->rflags = ns_vmcb->rflags;
626
0
627
0
    /* RIP */
628
0
    n2vmcb->rip = ns_vmcb->rip;
629
0
630
0
    /* RSP */
631
0
    n2vmcb->rsp = ns_vmcb->rsp;
632
0
633
0
    /* RAX */
634
0
    n2vmcb->rax = ns_vmcb->rax;
635
0
636
0
    /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
637
0
     * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
638
0
     * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
639
0
     */
640
0
641
0
    /* Page tables */
642
0
    n2vmcb->pdpe0 = ns_vmcb->pdpe0;
643
0
    n2vmcb->pdpe1 = ns_vmcb->pdpe1;
644
0
    n2vmcb->pdpe2 = ns_vmcb->pdpe2;
645
0
    n2vmcb->pdpe3 = ns_vmcb->pdpe3;
646
0
647
0
    /* PAT */
648
0
    if (!vcleanbit_set(np)) {
649
0
        n2vmcb->_g_pat = ns_vmcb->_g_pat;
650
0
    }
651
0
652
0
    if (!vcleanbit_set(lbr)) {
653
0
        /* Debug Control MSR */
654
0
        n2vmcb->_debugctlmsr = ns_vmcb->_debugctlmsr;
655
0
656
0
        /* LBR MSRs */
657
0
        n2vmcb->_lastbranchfromip = ns_vmcb->_lastbranchfromip;
658
0
        n2vmcb->_lastbranchtoip = ns_vmcb->_lastbranchtoip;
659
0
        n2vmcb->_lastintfromip = ns_vmcb->_lastintfromip;
660
0
        n2vmcb->_lastinttoip = ns_vmcb->_lastinttoip;
661
0
    }
662
0
663
0
    /* Cleanbits */
664
0
    n2vmcb->cleanbits.bytes = 0;
665
0
666
0
    rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
667
0
    if (rc) {
668
0
        gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
669
0
        return NSVM_ERROR_VVMCB;
670
0
    }
671
0
672
0
    rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
673
0
    if (rc) {
674
0
        gdprintk(XENLOG_ERR, "n2vmcb invalid\n");
675
0
        return NSVM_ERROR_VMENTRY;
676
0
    }
677
0
678
0
    /* Switch guest registers to l2 guest */
679
0
    regs->rax = ns_vmcb->rax;
680
0
    regs->rip = ns_vmcb->rip;
681
0
    regs->rsp = ns_vmcb->rsp;
682
0
    regs->rflags = ns_vmcb->rflags;
683
0
684
0
#undef vcleanbit_set
685
0
    return 0;
686
0
}
687
688
static int
689
nsvm_vcpu_vmentry(struct vcpu *v, struct cpu_user_regs *regs,
690
    unsigned int inst_len)
691
0
{
692
0
    int ret;
693
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
694
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
695
0
    struct vmcb_struct *ns_vmcb;
696
0
697
0
    ns_vmcb = nv->nv_vvmcx;
698
0
    ASSERT(ns_vmcb != NULL);
699
0
    ASSERT(nv->nv_n2vmcx != NULL);
700
0
    ASSERT(nv->nv_n2vmcx_pa != INVALID_PADDR);
701
0
702
0
    /* Save values for later use. Needed for Nested-on-Nested and
703
0
     * Shadow-on-Shadow paging.
704
0
     */
705
0
    svm->ns_vmcb_guestcr3 = ns_vmcb->_cr3;
706
0
    svm->ns_vmcb_hostcr3 = ns_vmcb->_h_cr3;
707
0
708
0
    /* Convert explicitely to boolean. Deals with l1 guests
709
0
     * that use flush-by-asid w/o checking the cpuid bits */
710
0
    nv->nv_flushp2m = !!ns_vmcb->tlb_control;
711
0
    if ( svm->ns_guest_asid != ns_vmcb->_guest_asid )
712
0
    {
713
0
        nv->nv_flushp2m = 1;
714
0
        hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
715
0
        svm->ns_guest_asid = ns_vmcb->_guest_asid;
716
0
    }
717
0
718
0
    /* nested paging for the guest */
719
0
    svm->ns_hap_enabled = (ns_vmcb->_np_enable) ? 1 : 0;
720
0
721
0
    /* Remember the V_INTR_MASK in hostflags */
722
0
    svm->ns_hostflags.fields.vintrmask =
723
0
        (ns_vmcb->_vintr.fields.intr_masking) ? 1 : 0;
724
0
725
0
    /* Save l1 guest state (= host state) */
726
0
    ret = nsvm_vcpu_hostsave(v, inst_len);
727
0
    if (ret) {
728
0
        gdprintk(XENLOG_ERR, "hostsave failed, ret = %i\n", ret);
729
0
        return ret;
730
0
    }
731
0
732
0
    /* switch vmcb to shadow vmcb */
733
0
    v->arch.hvm_svm.vmcb = nv->nv_n2vmcx;
734
0
    v->arch.hvm_svm.vmcb_pa = nv->nv_n2vmcx_pa;
735
0
736
0
    ret = nsvm_vmcb_prepare4vmrun(v, regs);
737
0
    if (ret) {
738
0
        gdprintk(XENLOG_ERR, "prepare4vmrun failed, ret = %i\n", ret);
739
0
        return ret;
740
0
    }
741
0
742
0
    nestedsvm_vcpu_stgi(v);
743
0
    return 0;
744
0
}
745
746
int
747
nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs)
748
0
{
749
0
    int ret;
750
0
    unsigned int inst_len;
751
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
752
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
753
0
754
0
    inst_len = __get_instruction_length(v, INSTR_VMRUN);
755
0
    if (inst_len == 0) {
756
0
        svm->ns_vmexit.exitcode = VMEXIT_SHUTDOWN;
757
0
        return -1;
758
0
    }
759
0
760
0
    nv->nv_vmswitch_in_progress = 1;
761
0
    ASSERT(nv->nv_vvmcx != NULL);
762
0
763
0
    /* save host state */
764
0
    ret = nsvm_vcpu_vmentry(v, regs, inst_len);
765
0
766
0
    /* Switch vcpu to guest mode. In the error case
767
0
     * this ensures the host mode is restored correctly
768
0
     * and l1 guest keeps alive. */
769
0
    nestedhvm_vcpu_enter_guestmode(v);
770
0
771
0
    switch (ret) {
772
0
    case 0:
773
0
        break;
774
0
    case NSVM_ERROR_VVMCB:
775
0
        gdprintk(XENLOG_ERR, "inject VMEXIT(INVALID)\n");
776
0
        svm->ns_vmexit.exitcode = VMEXIT_INVALID;
777
0
        return -1;
778
0
    case NSVM_ERROR_VMENTRY:
779
0
    default:
780
0
        gdprintk(XENLOG_ERR,
781
0
            "nsvm_vcpu_vmentry failed, injecting #UD\n");
782
0
        hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
783
0
        /* Must happen after hvm_inject_hw_exception or it doesn't work right. */
784
0
        nv->nv_vmswitch_in_progress = 0;
785
0
        return 1;
786
0
    }
787
0
788
0
    /* If l1 guest uses shadow paging, update the paging mode. */
789
0
    if (!nestedhvm_paging_mode_hap(v))
790
0
        paging_update_paging_modes(v);
791
0
792
0
    nv->nv_vmswitch_in_progress = 0;
793
0
    return 0;
794
0
}
795
796
static int
797
nsvm_vcpu_vmexit_inject(struct vcpu *v, struct cpu_user_regs *regs,
798
    uint64_t exitcode)
799
0
{
800
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
801
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
802
0
    struct vmcb_struct *ns_vmcb;
803
0
804
0
    ASSERT(svm->ns_gif == 0);
805
0
    ns_vmcb = nv->nv_vvmcx;
806
0
807
0
    if (nv->nv_vmexit_pending) {
808
0
809
0
        switch (exitcode) {
810
0
        case VMEXIT_INTR:
811
0
            if ( unlikely(ns_vmcb->eventinj.fields.v)
812
0
                && nv->nv_vmentry_pending
813
0
                && hvm_event_needs_reinjection(ns_vmcb->eventinj.fields.type,
814
0
                    ns_vmcb->eventinj.fields.vector) )
815
0
            {
816
0
                ns_vmcb->exitintinfo.bytes = ns_vmcb->eventinj.bytes;
817
0
            }
818
0
            break;
819
0
        case VMEXIT_EXCEPTION_PF:
820
0
            ns_vmcb->_cr2 = ns_vmcb->exitinfo2;
821
0
            /* fall through */
822
0
        case VMEXIT_NPF:
823
0
            /* PF error code */
824
0
            ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1;
825
0
            /* fault address */
826
0
            ns_vmcb->exitinfo2 = svm->ns_vmexit.exitinfo2;
827
0
            break;
828
0
        case VMEXIT_EXCEPTION_NP:
829
0
        case VMEXIT_EXCEPTION_SS:
830
0
        case VMEXIT_EXCEPTION_GP:
831
0
        case VMEXIT_EXCEPTION_15:
832
0
        case VMEXIT_EXCEPTION_MF:
833
0
        case VMEXIT_EXCEPTION_AC:
834
0
            ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1;
835
0
            break;
836
0
        default:
837
0
            break;
838
0
        }
839
0
    }
840
0
841
0
    ns_vmcb->exitcode = exitcode;
842
0
    ns_vmcb->eventinj.bytes = 0;
843
0
    return 0;
844
0
}
845
846
int
847
nsvm_vcpu_vmexit_event(struct vcpu *v, const struct x86_event *trap)
848
0
{
849
0
    ASSERT(vcpu_nestedhvm(v).nv_vvmcx != NULL);
850
0
851
0
    nestedsvm_vmexit_defer(v, VMEXIT_EXCEPTION_DE + trap->vector,
852
0
                           trap->error_code, trap->cr2);
853
0
    return NESTEDHVM_VMEXIT_DONE;
854
0
}
855
856
uint64_t nsvm_vcpu_hostcr3(struct vcpu *v)
857
0
{
858
0
    return vcpu_nestedsvm(v).ns_vmcb_hostcr3;
859
0
}
860
861
static int
862
nsvm_vmcb_guest_intercepts_msr(unsigned long *msr_bitmap,
863
    uint32_t msr, bool_t write)
864
0
{
865
0
    bool_t enabled;
866
0
    unsigned long *msr_bit;
867
0
868
0
    msr_bit = svm_msrbit(msr_bitmap, msr);
869
0
870
0
    if (msr_bit == NULL)
871
0
        /* MSR not in the permission map: Let the guest handle it. */
872
0
        return NESTEDHVM_VMEXIT_INJECT;
873
0
874
0
    msr &= 0x1fff;
875
0
876
0
    if (write)
877
0
        /* write access */
878
0
        enabled = test_bit(msr * 2 + 1, msr_bit);
879
0
    else
880
0
        /* read access */
881
0
        enabled = test_bit(msr * 2, msr_bit);
882
0
883
0
    if (!enabled)
884
0
        return NESTEDHVM_VMEXIT_HOST;
885
0
886
0
    return NESTEDHVM_VMEXIT_INJECT;
887
0
}
888
889
static int
890
nsvm_vmcb_guest_intercepts_ioio(paddr_t iopm_pa, uint64_t exitinfo1)
891
0
{
892
0
    unsigned long gfn = iopm_pa >> PAGE_SHIFT;
893
0
    unsigned long *io_bitmap;
894
0
    ioio_info_t ioinfo;
895
0
    uint16_t port;
896
0
    unsigned int size;
897
0
    bool_t enabled;
898
0
899
0
    ioinfo.bytes = exitinfo1;
900
0
    port = ioinfo.fields.port;
901
0
    size = ioinfo.fields.sz32 ? 4 : ioinfo.fields.sz16 ? 2 : 1;
902
0
903
0
    switch ( port )
904
0
    {
905
0
    case 0 ... 8 * PAGE_SIZE - 1: /* first 4KB page */
906
0
        break;
907
0
    case 8 * PAGE_SIZE ... 2 * 8 * PAGE_SIZE - 1: /* second 4KB page */
908
0
        port -= 8 * PAGE_SIZE;
909
0
        ++gfn;
910
0
        break;
911
0
    default:
912
0
        BUG();
913
0
        break;
914
0
    }
915
0
916
0
    for ( io_bitmap = hvm_map_guest_frame_ro(gfn, 0); ; )
917
0
    {
918
0
        enabled = io_bitmap && test_bit(port, io_bitmap);
919
0
        if ( !enabled || !--size )
920
0
            break;
921
0
        if ( unlikely(++port == 8 * PAGE_SIZE) )
922
0
        {
923
0
            hvm_unmap_guest_frame(io_bitmap, 0);
924
0
            io_bitmap = hvm_map_guest_frame_ro(++gfn, 0);
925
0
            port -= 8 * PAGE_SIZE;
926
0
        }
927
0
    }
928
0
    hvm_unmap_guest_frame(io_bitmap, 0);
929
0
930
0
    if ( !enabled )
931
0
        return NESTEDHVM_VMEXIT_HOST;
932
0
933
0
    return NESTEDHVM_VMEXIT_INJECT;
934
0
}
935
936
static bool_t
937
nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
938
    struct cpu_user_regs *regs, uint64_t exitcode)
939
0
{
940
0
    uint64_t exit_bits;
941
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
942
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
943
0
    struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
944
0
    enum nestedhvm_vmexits vmexits;
945
0
946
0
    switch (exitcode) {
947
0
    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
948
0
    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
949
0
        exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
950
0
        if (svm->ns_cr_intercepts & exit_bits)
951
0
            break;
952
0
        return 0;
953
0
954
0
    case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
955
0
    case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
956
0
        exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
957
0
        if (svm->ns_dr_intercepts & exit_bits)
958
0
            break;
959
0
        return 0;
960
0
961
0
    case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
962
0
        exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
963
0
        if (svm->ns_exception_intercepts & exit_bits)
964
0
            break;
965
0
        return 0;
966
0
967
0
    case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
968
0
        exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
969
0
        if (svm->ns_general1_intercepts & exit_bits)
970
0
            break;
971
0
        return 0;
972
0
973
0
    case VMEXIT_VMRUN ... VMEXIT_XSETBV:
974
0
        exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
975
0
        if (svm->ns_general2_intercepts & exit_bits)
976
0
            break;
977
0
        return 0;
978
0
979
0
    case VMEXIT_NPF:
980
0
        if (nestedhvm_paging_mode_hap(v))
981
0
            break;
982
0
        return 0;
983
0
    case VMEXIT_INVALID:
984
0
        /* Always intercepted */
985
0
        break;
986
0
987
0
    default:
988
0
        gdprintk(XENLOG_ERR, "Illegal exitcode %#"PRIx64"\n", exitcode);
989
0
        BUG();
990
0
        break;
991
0
    }
992
0
993
0
    /* Special cases: Do more detailed checks */
994
0
    switch (exitcode) {
995
0
    case VMEXIT_MSR:
996
0
        ASSERT(regs != NULL);
997
0
        if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) )
998
0
            break;
999
0
        ns_vmcb = nv->nv_vvmcx;
1000
0
        vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
1001
0
            regs->ecx, ns_vmcb->exitinfo1 != 0);
1002
0
        if (vmexits == NESTEDHVM_VMEXIT_HOST)
1003
0
            return 0;
1004
0
        break;
1005
0
    case VMEXIT_IOIO:
1006
0
        if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) )
1007
0
            break;
1008
0
        ns_vmcb = nv->nv_vvmcx;
1009
0
        vmexits = nsvm_vmcb_guest_intercepts_ioio(ns_vmcb->_iopm_base_pa,
1010
0
            ns_vmcb->exitinfo1);
1011
0
        if (vmexits == NESTEDHVM_VMEXIT_HOST)
1012
0
            return 0;
1013
0
        break;
1014
0
    }
1015
0
1016
0
    return 1;
1017
0
}
1018
1019
bool_t
1020
nsvm_vmcb_guest_intercepts_event(
1021
    struct vcpu *v, unsigned int vector, int errcode)
1022
0
{
1023
0
    return nsvm_vmcb_guest_intercepts_exitcode(v,
1024
0
        guest_cpu_user_regs(), VMEXIT_EXCEPTION_DE + vector);
1025
0
}
1026
1027
static int
1028
nsvm_vmcb_prepare4vmexit(struct vcpu *v, struct cpu_user_regs *regs)
1029
0
{
1030
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1031
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
1032
0
    struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
1033
0
    struct vmcb_struct *n2vmcb = nv->nv_n2vmcx;
1034
0
1035
0
    svm_vmsave(nv->nv_n1vmcx);
1036
0
1037
0
    /* Cache guest physical address of virtual vmcb
1038
0
     * for VMCB Cleanbit emulation.
1039
0
     */
1040
0
    svm->ns_ovvmcb_pa = nv->nv_vvmcxaddr;
1041
0
1042
0
    /* Intercepts - keep them as they are */
1043
0
1044
0
    /* Pausefilter - keep it as is */
1045
0
1046
0
    /* Nested IO permission bitmap */
1047
0
    /* Just keep the iopm_base_pa and msrpm_base_pa values.
1048
0
     * The guest must not see the virtualized values.
1049
0
     */
1050
0
1051
0
    /* TSC offset */
1052
0
    /* Keep it. It's maintainted by the l1 guest. */ 
1053
0
1054
0
    /* ASID */
1055
0
    /* ns_vmcb->_guest_asid = n2vmcb->_guest_asid; */
1056
0
1057
0
    /* TLB control */
1058
0
    ns_vmcb->tlb_control = 0;
1059
0
1060
0
    /* Virtual Interrupts */
1061
0
    ns_vmcb->_vintr = n2vmcb->_vintr;
1062
0
    if (!(svm->ns_hostflags.fields.vintrmask))
1063
0
        ns_vmcb->_vintr.fields.intr_masking = 0;
1064
0
1065
0
    /* Shadow mode */
1066
0
    ns_vmcb->interrupt_shadow = n2vmcb->interrupt_shadow;
1067
0
1068
0
    /* Exit codes */
1069
0
    ns_vmcb->exitcode = n2vmcb->exitcode;
1070
0
    ns_vmcb->exitinfo1 = n2vmcb->exitinfo1;
1071
0
    ns_vmcb->exitinfo2 = n2vmcb->exitinfo2;
1072
0
    ns_vmcb->exitintinfo = n2vmcb->exitintinfo;
1073
0
1074
0
    /* Interrupts */
1075
0
    /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have
1076
0
     * to make sure that we do not lose injected events. So check eventinj
1077
0
     * here and copy it to exitintinfo if it is valid.
1078
0
     * exitintinfo and eventinj can't be both valid because the case below
1079
0
     * only happens on a VMRUN instruction intercept which has no valid
1080
0
     * exitintinfo set.
1081
0
     */
1082
0
    if ( unlikely(n2vmcb->eventinj.fields.v) &&
1083
0
         hvm_event_needs_reinjection(n2vmcb->eventinj.fields.type,
1084
0
                                     n2vmcb->eventinj.fields.vector) )
1085
0
    {
1086
0
        ns_vmcb->exitintinfo = n2vmcb->eventinj;
1087
0
    }
1088
0
1089
0
    ns_vmcb->eventinj.bytes = 0;
1090
0
1091
0
    /* Nested paging mode */
1092
0
    if (nestedhvm_paging_mode_hap(v)) {
1093
0
        /* host nested paging + guest nested paging. */
1094
0
        ns_vmcb->_np_enable = n2vmcb->_np_enable;
1095
0
        ns_vmcb->_cr3 = n2vmcb->_cr3;
1096
0
        /* The vmcb->h_cr3 is the shadowed h_cr3. The original
1097
0
         * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3,
1098
0
         * hence we keep the ns_vmcb->h_cr3 value. */
1099
0
    } else if (paging_mode_hap(v->domain)) {
1100
0
        /* host nested paging + guest shadow paging. */
1101
0
        ns_vmcb->_np_enable = 0;
1102
0
        /* Throw h_cr3 away. Guest is not allowed to set it or
1103
0
         * it can break out, otherwise (security hole!) */
1104
0
        ns_vmcb->_h_cr3 = 0x0;
1105
0
        /* Stop intercepting #PF (already done above
1106
0
         * by restoring cached intercepts). */
1107
0
        ns_vmcb->_cr3 = n2vmcb->_cr3;
1108
0
    } else {
1109
0
        /* host shadow paging + guest shadow paging. */
1110
0
        ns_vmcb->_np_enable = 0;
1111
0
        ns_vmcb->_h_cr3 = 0x0;
1112
0
        /* The vmcb->_cr3 is the shadowed cr3. The original
1113
0
         * unshadowed guest cr3 is kept in ns_vmcb->_cr3,
1114
0
         * hence we keep the ns_vmcb->_cr3 value. */
1115
0
    }
1116
0
1117
0
    /* LBR virtualization - keep lbr control as is */
1118
0
1119
0
    /* NextRIP */
1120
0
    ns_vmcb->nextrip = n2vmcb->nextrip;
1121
0
1122
0
    /* Decode Assist */
1123
0
    ns_vmcb->guest_ins_len = n2vmcb->guest_ins_len;
1124
0
    memcpy(ns_vmcb->guest_ins, n2vmcb->guest_ins, sizeof(ns_vmcb->guest_ins));
1125
0
1126
0
    /*
1127
0
     * VMCB Save State Area
1128
0
     */
1129
0
1130
0
    /* Segments */
1131
0
    ns_vmcb->es = n2vmcb->es;
1132
0
    ns_vmcb->cs = n2vmcb->cs;
1133
0
    ns_vmcb->ss = n2vmcb->ss;
1134
0
    ns_vmcb->ds = n2vmcb->ds;
1135
0
    ns_vmcb->gdtr = n2vmcb->gdtr;
1136
0
    ns_vmcb->idtr = n2vmcb->idtr;
1137
0
1138
0
    /* CPL */
1139
0
    ns_vmcb->_cpl = n2vmcb->_cpl;
1140
0
1141
0
    /* EFER */
1142
0
    ns_vmcb->_efer = n2vmcb->_efer;
1143
0
1144
0
    /* CRn */
1145
0
    ns_vmcb->_cr4 = n2vmcb->_cr4;
1146
0
    ns_vmcb->_cr0 = n2vmcb->_cr0;
1147
0
1148
0
    /* DRn */
1149
0
    ns_vmcb->_dr7 = n2vmcb->_dr7;
1150
0
    ns_vmcb->_dr6 = n2vmcb->_dr6;
1151
0
1152
0
    /* Restore registers from regs as those values
1153
0
     * can be newer than in n2vmcb (e.g. due to an
1154
0
     * instruction emulation right before).
1155
0
     */
1156
0
1157
0
    /* RFLAGS */
1158
0
    ns_vmcb->rflags = n2vmcb->rflags = regs->rflags;
1159
0
1160
0
    /* RIP */
1161
0
    ns_vmcb->rip = n2vmcb->rip = regs->rip;
1162
0
1163
0
    /* RSP */
1164
0
    ns_vmcb->rsp = n2vmcb->rsp = regs->rsp;
1165
0
1166
0
    /* RAX */
1167
0
    ns_vmcb->rax = n2vmcb->rax = regs->rax;
1168
0
1169
0
    /* Keep the l2 guest values of the fs, gs, ldtr, tr, kerngsbase,
1170
0
     * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
1171
0
     * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
1172
0
     */
1173
0
1174
0
    /* CR2 */
1175
0
    ns_vmcb->_cr2 = n2vmcb->_cr2;
1176
0
1177
0
    /* Page tables */
1178
0
    ns_vmcb->pdpe0 = n2vmcb->pdpe0;
1179
0
    ns_vmcb->pdpe1 = n2vmcb->pdpe1;
1180
0
    ns_vmcb->pdpe2 = n2vmcb->pdpe2;
1181
0
    ns_vmcb->pdpe3 = n2vmcb->pdpe3;
1182
0
1183
0
    /* PAT */
1184
0
    ns_vmcb->_g_pat = n2vmcb->_g_pat;
1185
0
1186
0
    /* Debug Control MSR */
1187
0
    ns_vmcb->_debugctlmsr = n2vmcb->_debugctlmsr;
1188
0
1189
0
    /* LBR MSRs */
1190
0
    ns_vmcb->_lastbranchfromip = n2vmcb->_lastbranchfromip;
1191
0
    ns_vmcb->_lastbranchtoip = n2vmcb->_lastbranchtoip;
1192
0
    ns_vmcb->_lastintfromip = n2vmcb->_lastintfromip;
1193
0
    ns_vmcb->_lastinttoip = n2vmcb->_lastinttoip;
1194
0
1195
0
    return 0;
1196
0
}
1197
1198
bool_t
1199
nsvm_vmcb_hap_enabled(struct vcpu *v)
1200
0
{
1201
0
    return vcpu_nestedsvm(v).ns_hap_enabled;
1202
0
}
1203
1204
/* This function uses L2_gpa to walk the P2M page table in L1. If the
1205
 * walk is successful, the translated value is returned in
1206
 * L1_gpa. The result value tells what to do next.
1207
 */
1208
int
1209
nsvm_hap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
1210
                     unsigned int *page_order, uint8_t *p2m_acc,
1211
                     bool_t access_r, bool_t access_w, bool_t access_x)
1212
0
{
1213
0
    uint32_t pfec;
1214
0
    unsigned long nested_cr3, gfn;
1215
0
1216
0
    nested_cr3 = nhvm_vcpu_p2m_base(v);
1217
0
1218
0
    pfec = PFEC_user_mode | PFEC_page_present;
1219
0
    if ( access_w )
1220
0
        pfec |= PFEC_write_access;
1221
0
    if ( access_x )
1222
0
        pfec |= PFEC_insn_fetch;
1223
0
1224
0
    /* Walk the guest-supplied NPT table, just as if it were a pagetable */
1225
0
    gfn = paging_ga_to_gfn_cr3(v, nested_cr3, L2_gpa, &pfec, page_order);
1226
0
1227
0
    if ( gfn == gfn_x(INVALID_GFN) )
1228
0
        return NESTEDHVM_PAGEFAULT_INJECT;
1229
0
1230
0
    *L1_gpa = (gfn << PAGE_SHIFT) + (L2_gpa & ~PAGE_MASK);
1231
0
    return NESTEDHVM_PAGEFAULT_DONE;
1232
0
}
1233
1234
enum hvm_intblk nsvm_intr_blocked(struct vcpu *v)
1235
0
{
1236
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
1237
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1238
0
1239
0
    ASSERT(nestedhvm_enabled(v->domain));
1240
0
1241
0
    if ( !nestedsvm_gif_isset(v) )
1242
0
        return hvm_intblk_svm_gif;
1243
0
1244
0
    if ( nestedhvm_vcpu_in_guestmode(v) ) {
1245
0
        struct vmcb_struct *n2vmcb = nv->nv_n2vmcx;
1246
0
1247
0
        if ( svm->ns_hostflags.fields.vintrmask )
1248
0
            if ( !svm->ns_hostflags.fields.rflagsif )
1249
0
                return hvm_intblk_rflags_ie;
1250
0
1251
0
        /* when l1 guest passes its devices through to the l2 guest
1252
0
         * and l2 guest does an MMIO access then we may want to
1253
0
         * inject an VMEXIT(#INTR) exitcode into the l1 guest.
1254
0
         * Delay the injection because this would result in delivering
1255
0
         * an interrupt *within* the execution of an instruction.
1256
0
         */
1257
0
        if ( v->arch.hvm_vcpu.hvm_io.io_req.state != STATE_IOREQ_NONE )
1258
0
            return hvm_intblk_shadow;
1259
0
1260
0
        if ( !nv->nv_vmexit_pending && n2vmcb->exitintinfo.bytes != 0 ) {
1261
0
            /* Give the l2 guest a chance to finish the delivery of
1262
0
             * the last injected interrupt or exception before we
1263
0
             * emulate a VMEXIT (e.g. VMEXIT(INTR) ).
1264
0
             */
1265
0
            return hvm_intblk_shadow;
1266
0
        }
1267
0
    }
1268
0
1269
0
    if ( nv->nv_vmexit_pending ) {
1270
0
        /* hvm_inject_hw_exception() must have run before.
1271
0
         * exceptions have higher priority than interrupts.
1272
0
         */
1273
0
        return hvm_intblk_rflags_ie;
1274
0
    }
1275
0
1276
0
    return hvm_intblk_none;
1277
0
}
1278
1279
/* MSR handling */
1280
int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
1281
0
{
1282
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
1283
0
    int ret = 1;
1284
0
1285
0
    *msr_content = 0;
1286
0
1287
0
    switch (msr) {
1288
0
    case MSR_K8_VM_CR:
1289
0
        break;
1290
0
    case MSR_K8_VM_HSAVE_PA:
1291
0
        *msr_content = svm->ns_msr_hsavepa;
1292
0
        break;
1293
0
    case MSR_AMD64_TSC_RATIO:
1294
0
        *msr_content = svm->ns_tscratio;
1295
0
        break;
1296
0
    default:
1297
0
        ret = 0;
1298
0
        break;
1299
0
    }
1300
0
1301
0
    return ret;
1302
0
}
1303
1304
int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
1305
0
{
1306
0
    int ret = 1;
1307
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
1308
0
1309
0
    switch (msr) {
1310
0
    case MSR_K8_VM_CR:
1311
0
        /* ignore write. handle all bits as read-only. */
1312
0
        break;
1313
0
    case MSR_K8_VM_HSAVE_PA:
1314
0
        if (!nestedsvm_vmcb_isvalid(v, msr_content)) {
1315
0
            gdprintk(XENLOG_ERR,
1316
0
                "MSR_K8_VM_HSAVE_PA value invalid %#"PRIx64"\n", msr_content);
1317
0
            ret = -1; /* inject #GP */
1318
0
            break;
1319
0
        }
1320
0
        svm->ns_msr_hsavepa = msr_content;
1321
0
        break;
1322
0
    case MSR_AMD64_TSC_RATIO:
1323
0
        if ((msr_content & ~TSC_RATIO_RSVD_BITS) != msr_content) {
1324
0
            gdprintk(XENLOG_ERR,
1325
0
                "reserved bits set in MSR_AMD64_TSC_RATIO %#"PRIx64"\n",
1326
0
                msr_content);
1327
0
            ret = -1; /* inject #GP */
1328
0
            break;
1329
0
        }
1330
0
        svm->ns_tscratio = msr_content;
1331
0
        break;
1332
0
    default:
1333
0
        ret = 0;
1334
0
        break;
1335
0
    }
1336
0
1337
0
    return ret;
1338
0
}
1339
1340
/* VMEXIT emulation */
1341
void
1342
nestedsvm_vmexit_defer(struct vcpu *v,
1343
    uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2)
1344
0
{
1345
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
1346
0
1347
0
    nestedsvm_vcpu_clgi(v);
1348
0
    svm->ns_vmexit.exitcode = exitcode;
1349
0
    svm->ns_vmexit.exitinfo1 = exitinfo1;
1350
0
    svm->ns_vmexit.exitinfo2 = exitinfo2;
1351
0
    vcpu_nestedhvm(v).nv_vmexit_pending = 1;
1352
0
}
1353
1354
enum nestedhvm_vmexits
1355
nestedsvm_check_intercepts(struct vcpu *v, struct cpu_user_regs *regs,
1356
    uint64_t exitcode)
1357
0
{
1358
0
    bool_t is_intercepted;
1359
0
1360
0
    ASSERT(vcpu_nestedhvm(v).nv_vmexit_pending == 0);
1361
0
    is_intercepted = nsvm_vmcb_guest_intercepts_exitcode(v, regs, exitcode);
1362
0
1363
0
    switch (exitcode) {
1364
0
    case VMEXIT_INVALID:
1365
0
        if (is_intercepted)
1366
0
            return NESTEDHVM_VMEXIT_INJECT;
1367
0
        return NESTEDHVM_VMEXIT_HOST;
1368
0
1369
0
    case VMEXIT_INTR:
1370
0
    case VMEXIT_NMI:
1371
0
        return NESTEDHVM_VMEXIT_HOST;
1372
0
    case VMEXIT_EXCEPTION_NM:
1373
0
        /* Host must handle lazy fpu context switching first.
1374
0
         * Then inject the VMEXIT if L1 guest intercepts this.
1375
0
         */
1376
0
        return NESTEDHVM_VMEXIT_HOST;
1377
0
1378
0
    case VMEXIT_NPF:
1379
0
        if (nestedhvm_paging_mode_hap(v)) {
1380
0
            if (!is_intercepted)
1381
0
                return NESTEDHVM_VMEXIT_FATALERROR;
1382
0
            /* host nested paging + guest nested paging */
1383
0
            return NESTEDHVM_VMEXIT_HOST;
1384
0
        }
1385
0
        if (paging_mode_hap(v->domain)) {
1386
0
            if (is_intercepted)
1387
0
                return NESTEDHVM_VMEXIT_FATALERROR;
1388
0
            /* host nested paging + guest shadow paging */
1389
0
            return NESTEDHVM_VMEXIT_HOST;
1390
0
        }
1391
0
        /* host shadow paging + guest shadow paging */
1392
0
        /* Can this happen? */
1393
0
        BUG();
1394
0
        return NESTEDHVM_VMEXIT_FATALERROR;
1395
0
    case VMEXIT_EXCEPTION_PF:
1396
0
        if (nestedhvm_paging_mode_hap(v)) {
1397
0
            /* host nested paging + guest nested paging */
1398
0
            if (!is_intercepted)
1399
0
                /* l1 guest intercepts #PF unnecessarily */
1400
0
                return NESTEDHVM_VMEXIT_HOST;
1401
0
            /* l2 guest intercepts #PF unnecessarily */
1402
0
            return NESTEDHVM_VMEXIT_INJECT;
1403
0
        }
1404
0
        if (!paging_mode_hap(v->domain)) {
1405
0
            /* host shadow paging + guest shadow paging */
1406
0
            return NESTEDHVM_VMEXIT_HOST;
1407
0
        }
1408
0
        /* host nested paging + guest shadow paging */
1409
0
        return NESTEDHVM_VMEXIT_INJECT;
1410
0
    case VMEXIT_VMMCALL:
1411
0
        /* Always let the guest handle VMMCALL/VMCALL */
1412
0
        return NESTEDHVM_VMEXIT_INJECT;
1413
0
    default:
1414
0
        gprintk(XENLOG_ERR, "Unexpected nested vmexit: reason %#"PRIx64"\n",
1415
0
                exitcode);
1416
0
        break;
1417
0
    }
1418
0
1419
0
    if (is_intercepted)
1420
0
        return NESTEDHVM_VMEXIT_INJECT;
1421
0
    return NESTEDHVM_VMEXIT_HOST;
1422
0
}
1423
1424
enum nestedhvm_vmexits
1425
nestedsvm_vmexit_n2n1(struct vcpu *v, struct cpu_user_regs *regs)
1426
0
{
1427
0
    int rc;
1428
0
    enum nestedhvm_vmexits ret = NESTEDHVM_VMEXIT_DONE;
1429
0
1430
0
    ASSERT(vcpu_nestedhvm(v).nv_vmswitch_in_progress);
1431
0
    ASSERT(nestedhvm_vcpu_in_guestmode(v));
1432
0
1433
0
    rc = nsvm_vmcb_prepare4vmexit(v, regs);
1434
0
    if (rc)
1435
0
        ret = NESTEDHVM_VMEXIT_ERROR;
1436
0
1437
0
    rc = nsvm_vcpu_hostrestore(v, regs);
1438
0
    if (rc)
1439
0
        ret = NESTEDHVM_VMEXIT_FATALERROR;
1440
0
1441
0
    nestedhvm_vcpu_exit_guestmode(v);
1442
0
    return ret;
1443
0
}
1444
1445
/* The exitcode is in native SVM/VMX format. The forced exitcode
1446
 * is in generic format.
1447
 */
1448
static enum nestedhvm_vmexits
1449
nestedsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs,
1450
    uint64_t exitcode)
1451
0
{
1452
0
    int rc;
1453
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1454
0
1455
0
    nv->nv_vmswitch_in_progress = 1;
1456
0
1457
0
    ASSERT(nv->nv_vvmcx != NULL);
1458
0
1459
0
    /* On special intercepts the host has to handle
1460
0
     * the vcpu is still in guest mode here.
1461
0
     */
1462
0
    if (nestedhvm_vcpu_in_guestmode(v)) {
1463
0
        enum nestedhvm_vmexits ret;
1464
0
1465
0
        ret = nestedsvm_vmexit_n2n1(v, regs);
1466
0
        switch (ret) {
1467
0
        case NESTEDHVM_VMEXIT_FATALERROR:
1468
0
            gdprintk(XENLOG_ERR, "VMEXIT: fatal error\n");
1469
0
            return ret;
1470
0
        case NESTEDHVM_VMEXIT_HOST:
1471
0
            BUG();
1472
0
            return ret;
1473
0
        case NESTEDHVM_VMEXIT_ERROR:
1474
0
            exitcode = VMEXIT_INVALID;
1475
0
            break;
1476
0
        default:
1477
0
            ASSERT(!nestedhvm_vcpu_in_guestmode(v));
1478
0
            break;
1479
0
        }
1480
0
1481
0
        /* host state has been restored */
1482
0
    }
1483
0
1484
0
    ASSERT(!nestedhvm_vcpu_in_guestmode(v));
1485
0
1486
0
    /* Prepare for running the l1 guest. Make the actual
1487
0
     * modifications to the virtual VMCB/VMCS.
1488
0
     */
1489
0
    rc = nsvm_vcpu_vmexit_inject(v, regs, exitcode);
1490
0
1491
0
    /* If l1 guest uses shadow paging, update the paging mode. */
1492
0
    if (!nestedhvm_paging_mode_hap(v))
1493
0
        paging_update_paging_modes(v);
1494
0
1495
0
    nv->nv_vmswitch_in_progress = 0;
1496
0
1497
0
    if (rc)
1498
0
        return NESTEDHVM_VMEXIT_FATALERROR;
1499
0
1500
0
    return NESTEDHVM_VMEXIT_DONE;
1501
0
}
1502
1503
/* VCPU switch */
1504
void nsvm_vcpu_switch(struct cpu_user_regs *regs)
1505
0
{
1506
0
    struct vcpu *v = current;
1507
0
    struct nestedvcpu *nv;
1508
0
    struct nestedsvm *svm;
1509
0
1510
0
    if (!nestedhvm_enabled(v->domain))
1511
0
        return;
1512
0
1513
0
    nv = &vcpu_nestedhvm(v);
1514
0
    svm = &vcpu_nestedsvm(v);
1515
0
    ASSERT(v->arch.hvm_svm.vmcb != NULL);
1516
0
    ASSERT(nv->nv_n1vmcx != NULL);
1517
0
    ASSERT(nv->nv_n2vmcx != NULL);
1518
0
    ASSERT(nv->nv_n1vmcx_pa != INVALID_PADDR);
1519
0
    ASSERT(nv->nv_n2vmcx_pa != INVALID_PADDR);
1520
0
1521
0
    if (nv->nv_vmexit_pending) {
1522
0
 vmexit:
1523
0
        nestedsvm_vcpu_vmexit(v, regs, svm->ns_vmexit.exitcode);
1524
0
        nv->nv_vmexit_pending = 0;
1525
0
        nv->nv_vmentry_pending = 0;
1526
0
        return;
1527
0
    }
1528
0
    if (nv->nv_vmentry_pending) {
1529
0
        int ret;
1530
0
        ASSERT(!nv->nv_vmexit_pending);
1531
0
        ret = nsvm_vcpu_vmrun(v, regs);
1532
0
        if (ret)
1533
0
            goto vmexit;
1534
0
1535
0
        ASSERT(nestedhvm_vcpu_in_guestmode(v));
1536
0
        nv->nv_vmentry_pending = 0;
1537
0
    }
1538
0
1539
0
    if (nestedhvm_vcpu_in_guestmode(v)
1540
0
       && nestedhvm_paging_mode_hap(v))
1541
0
    {
1542
0
        /* In case left the l2 guest due to a physical interrupt (e.g. IPI)
1543
0
         * that is not for the l1 guest then we continue running the l2 guest
1544
0
         * but check if the nestedp2m is still valid.
1545
0
         */
1546
0
        if (nv->nv_p2m == NULL)
1547
0
            nestedsvm_vmcb_set_nestedp2m(v, nv->nv_vvmcx, nv->nv_n2vmcx);
1548
0
    }
1549
0
}
1550
1551
/* Interrupts, Virtual GIF */
1552
int
1553
nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack)
1554
0
{
1555
0
    int ret;
1556
0
    enum hvm_intblk intr;
1557
0
    uint64_t exitcode = VMEXIT_INTR;
1558
0
    uint64_t exitinfo2 = 0;
1559
0
    ASSERT(nestedhvm_vcpu_in_guestmode(v));
1560
0
1561
0
    intr = nhvm_interrupt_blocked(v);
1562
0
    if ( intr != hvm_intblk_none )
1563
0
        return NSVM_INTR_MASKED;
1564
0
1565
0
    switch (intack.source) {
1566
0
    case hvm_intsrc_pic:
1567
0
    case hvm_intsrc_lapic:
1568
0
    case hvm_intsrc_vector:
1569
0
        exitcode = VMEXIT_INTR;
1570
0
        exitinfo2 = intack.vector;
1571
0
        break;
1572
0
    case hvm_intsrc_nmi:
1573
0
        exitcode = VMEXIT_NMI;
1574
0
        exitinfo2 = intack.vector;
1575
0
        break;
1576
0
    case hvm_intsrc_mce:
1577
0
        exitcode = VMEXIT_EXCEPTION_MC;
1578
0
        exitinfo2 = intack.vector;
1579
0
        break;
1580
0
    case hvm_intsrc_none:
1581
0
        return NSVM_INTR_NOTHANDLED;
1582
0
    default:
1583
0
        BUG();
1584
0
    }
1585
0
1586
0
    ret = nsvm_vmcb_guest_intercepts_exitcode(v,
1587
0
                                     guest_cpu_user_regs(), exitcode);
1588
0
    if (ret) {
1589
0
        nestedsvm_vmexit_defer(v, exitcode, intack.source, exitinfo2);
1590
0
        return NSVM_INTR_FORCEVMEXIT;
1591
0
    }
1592
0
1593
0
    return NSVM_INTR_NOTINTERCEPTED;
1594
0
}
1595
1596
bool_t
1597
nestedsvm_gif_isset(struct vcpu *v)
1598
0
{
1599
0
    struct nestedsvm *svm = &vcpu_nestedsvm(v);
1600
0
1601
0
    return (!!svm->ns_gif);
1602
0
}
1603
1604
void svm_vmexit_do_stgi(struct cpu_user_regs *regs, struct vcpu *v)
1605
0
{
1606
0
    unsigned int inst_len;
1607
0
1608
0
    if ( !nestedhvm_enabled(v->domain) ) {
1609
0
        hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
1610
0
        return;
1611
0
    }
1612
0
1613
0
    if ( (inst_len = __get_instruction_length(v, INSTR_STGI)) == 0 )
1614
0
        return;
1615
0
1616
0
    nestedsvm_vcpu_stgi(v);
1617
0
1618
0
    __update_guest_eip(regs, inst_len);
1619
0
}
1620
1621
void svm_vmexit_do_clgi(struct cpu_user_regs *regs, struct vcpu *v)
1622
0
{
1623
0
    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1624
0
    unsigned int inst_len;
1625
0
    uint32_t general1_intercepts = vmcb_get_general1_intercepts(vmcb);
1626
0
    vintr_t intr;
1627
0
1628
0
    if ( !nestedhvm_enabled(v->domain) ) {
1629
0
        hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
1630
0
        return;
1631
0
    }
1632
0
1633
0
    if ( (inst_len = __get_instruction_length(v, INSTR_CLGI)) == 0 )
1634
0
        return;
1635
0
1636
0
    nestedsvm_vcpu_clgi(v);
1637
0
1638
0
    /* After a CLGI no interrupts should come */
1639
0
    intr = vmcb_get_vintr(vmcb);
1640
0
    intr.fields.irq = 0;
1641
0
    general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
1642
0
    vmcb_set_vintr(vmcb, intr);
1643
0
    vmcb_set_general1_intercepts(vmcb, general1_intercepts);
1644
0
1645
0
    __update_guest_eip(regs, inst_len);
1646
0
}