Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/hvm/vmx/vvmx.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * vvmx.c: Support virtual VMX for nested virtualization.
3
 *
4
 * Copyright (c) 2010, Intel Corporation.
5
 * Author: Qing He <qing.he@intel.com>
6
 *         Eddie Dong <eddie.dong@intel.com>
7
 *
8
 * This program is free software; you can redistribute it and/or modify it
9
 * under the terms and conditions of the GNU General Public License,
10
 * version 2, as published by the Free Software Foundation.
11
 *
12
 * This program is distributed in the hope it will be useful, but WITHOUT
13
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15
 * more details.
16
 *
17
 * You should have received a copy of the GNU General Public License along with
18
 * this program; If not, see <http://www.gnu.org/licenses/>.
19
 *
20
 */
21
22
#include <asm/types.h>
23
#include <asm/mtrr.h>
24
#include <asm/p2m.h>
25
#include <asm/hvm/ioreq.h>
26
#include <asm/hvm/vmx/vmx.h>
27
#include <asm/hvm/vmx/vvmx.h>
28
#include <asm/hvm/nestedhvm.h>
29
30
static DEFINE_PER_CPU(u64 *, vvmcs_buf);
31
32
static void nvmx_purge_vvmcs(struct vcpu *v);
33
34
static bool nvmx_vcpu_in_vmx(const struct vcpu *v)
35
0
{
36
0
    return vcpu_2_nvmx(v).vmxon_region_pa != INVALID_PADDR;
37
0
}
38
39
0
#define VMCS_BUF_SIZE 100
40
41
int nvmx_cpu_up_prepare(unsigned int cpu)
42
12
{
43
12
    if ( per_cpu(vvmcs_buf, cpu) != NULL )
44
0
        return 0;
45
12
46
12
    per_cpu(vvmcs_buf, cpu) = xzalloc_array(u64, VMCS_BUF_SIZE);
47
12
48
12
    if ( per_cpu(vvmcs_buf, cpu) != NULL )
49
12
        return 0;
50
12
51
0
    return -ENOMEM;
52
12
}
53
54
void nvmx_cpu_dead(unsigned int cpu)
55
0
{
56
0
    xfree(per_cpu(vvmcs_buf, cpu));
57
0
    per_cpu(vvmcs_buf, cpu) = NULL;
58
0
}
59
60
int nvmx_vcpu_initialise(struct vcpu *v)
61
0
{
62
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
63
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
64
0
    struct page_info *pg = alloc_domheap_page(NULL, 0);
65
0
66
0
    if ( !pg )
67
0
    {
68
0
        gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
69
0
        return -ENOMEM;
70
0
    }
71
0
    nvcpu->nv_n2vmcx_pa = page_to_maddr(pg);
72
0
73
0
    /* non-root VMREAD/VMWRITE bitmap. */
74
0
    if ( cpu_has_vmx_vmcs_shadowing )
75
0
    {
76
0
        struct page_info *vmread_bitmap, *vmwrite_bitmap;
77
0
        unsigned long *vw;
78
0
79
0
        vmread_bitmap = alloc_domheap_page(NULL, 0);
80
0
        if ( !vmread_bitmap )
81
0
        {
82
0
            gdprintk(XENLOG_ERR, "nest: allocation for vmread bitmap failed\n");
83
0
            return -ENOMEM;
84
0
        }
85
0
        v->arch.hvm_vmx.vmread_bitmap = vmread_bitmap;
86
0
87
0
        clear_domain_page(_mfn(page_to_mfn(vmread_bitmap)));
88
0
89
0
        vmwrite_bitmap = alloc_domheap_page(NULL, 0);
90
0
        if ( !vmwrite_bitmap )
91
0
        {
92
0
            gdprintk(XENLOG_ERR, "nest: allocation for vmwrite bitmap failed\n");
93
0
            return -ENOMEM;
94
0
        }
95
0
        v->arch.hvm_vmx.vmwrite_bitmap = vmwrite_bitmap;
96
0
97
0
        vw = __map_domain_page(vmwrite_bitmap);
98
0
        clear_page(vw);
99
0
100
0
        /*
101
0
         * For the following 6 encodings, we need to handle them in VMM.
102
0
         * Let them vmexit as usual.
103
0
         */
104
0
        set_bit(IO_BITMAP_A, vw);
105
0
        set_bit(VMCS_HIGH(IO_BITMAP_A), vw);
106
0
        set_bit(IO_BITMAP_B, vw);
107
0
        set_bit(VMCS_HIGH(IO_BITMAP_B), vw);
108
0
        set_bit(MSR_BITMAP, vw);
109
0
        set_bit(VMCS_HIGH(MSR_BITMAP), vw);
110
0
111
0
        unmap_domain_page(vw);
112
0
    }
113
0
114
0
    nvmx->ept.enabled = 0;
115
0
    nvmx->guest_vpid = 0;
116
0
    nvmx->vmxon_region_pa = INVALID_PADDR;
117
0
    nvcpu->nv_vvmcx = NULL;
118
0
    nvcpu->nv_vvmcxaddr = INVALID_PADDR;
119
0
    nvmx->intr.intr_info = 0;
120
0
    nvmx->intr.error_code = 0;
121
0
    nvmx->iobitmap[0] = NULL;
122
0
    nvmx->iobitmap[1] = NULL;
123
0
    nvmx->msrbitmap = NULL;
124
0
    INIT_LIST_HEAD(&nvmx->launched_list);
125
0
    return 0;
126
0
}
127
 
128
void nvmx_vcpu_destroy(struct vcpu *v)
129
0
{
130
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
131
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
132
0
    struct vvmcs_list *item, *n;
133
0
134
0
    /* 
135
0
     * When destroying the vcpu, it may be running on behalf of L2 guest.
136
0
     * Therefore we need to switch the VMCS pointer back to the L1 VMCS,
137
0
     * in order to avoid double free of L2 VMCS and the possible memory
138
0
     * leak of L1 VMCS page.
139
0
     */
140
0
    if ( nvcpu->nv_n1vmcx_pa )
141
0
        v->arch.hvm_vmx.vmcs_pa = nvcpu->nv_n1vmcx_pa;
142
0
143
0
    if ( nvcpu->nv_n2vmcx_pa )
144
0
    {
145
0
        __vmpclear(nvcpu->nv_n2vmcx_pa);
146
0
        free_domheap_page(maddr_to_page(nvcpu->nv_n2vmcx_pa));
147
0
        nvcpu->nv_n2vmcx_pa = 0;
148
0
    }
149
0
150
0
    /* Must also cope with nvmx_vcpu_initialise() not having got called. */
151
0
    if ( nvmx->launched_list.next )
152
0
        list_for_each_entry_safe(item, n, &nvmx->launched_list, node)
153
0
        {
154
0
            list_del(&item->node);
155
0
            xfree(item);
156
0
        }
157
0
158
0
    if ( v->arch.hvm_vmx.vmread_bitmap )
159
0
    {
160
0
        free_domheap_page(v->arch.hvm_vmx.vmread_bitmap);
161
0
        v->arch.hvm_vmx.vmread_bitmap = NULL;
162
0
    }
163
0
    if ( v->arch.hvm_vmx.vmwrite_bitmap )
164
0
    {
165
0
        free_domheap_page(v->arch.hvm_vmx.vmwrite_bitmap);
166
0
        v->arch.hvm_vmx.vmwrite_bitmap = NULL;
167
0
    }
168
0
}
169
 
170
void nvmx_domain_relinquish_resources(struct domain *d)
171
0
{
172
0
    struct vcpu *v;
173
0
174
0
    for_each_vcpu ( d, v )
175
0
        nvmx_purge_vvmcs(v);
176
0
}
177
178
int nvmx_vcpu_reset(struct vcpu *v)
179
0
{
180
0
    return 0;
181
0
}
182
183
uint64_t nvmx_vcpu_eptp_base(struct vcpu *v)
184
0
{
185
0
    return get_vvmcs(v, EPT_POINTER) & PAGE_MASK;
186
0
}
187
188
bool_t nvmx_ept_enabled(struct vcpu *v)
189
0
{
190
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
191
0
192
0
    return !!(nvmx->ept.enabled);
193
0
}
194
195
struct vmx_inst_decoded {
196
0
#define VMX_INST_MEMREG_TYPE_MEMORY 0
197
0
#define VMX_INST_MEMREG_TYPE_REG    1
198
    int type;
199
    union {
200
        struct {
201
            unsigned long mem;
202
            unsigned int  len;
203
        };
204
        enum vmx_regs_enc reg1;
205
    };
206
207
    enum vmx_regs_enc reg2;
208
};
209
210
enum vmx_ops_result {
211
    VMSUCCEED,
212
    VMFAIL_VALID,
213
    VMFAIL_INVALID,
214
};
215
216
#define CASE_SET_REG(REG, reg)      \
217
    case VMX_REG_ ## REG: regs->reg = value; break
218
#define CASE_GET_REG(REG, reg)      \
219
    case VMX_REG_ ## REG: value = regs->reg; break
220
221
static int vvmcs_offset(u32 width, u32 type, u32 index)
222
0
{
223
0
    int offset;
224
0
225
0
    offset = (index & 0x1f) | type << 5 | width << 7;
226
0
227
0
    if ( offset == 0 )    /* vpid */
228
0
        offset = 0x3f;
229
0
230
0
    return offset;
231
0
}
232
233
u64 get_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding)
234
0
{
235
0
    union vmcs_encoding enc;
236
0
    u64 *content = (u64 *) vvmcs;
237
0
    int offset;
238
0
    u64 res;
239
0
240
0
    enc.word = vmcs_encoding;
241
0
    offset = vvmcs_offset(enc.width, enc.type, enc.index);
242
0
    res = content[offset];
243
0
244
0
    switch ( enc.width ) {
245
0
    case VVMCS_WIDTH_16:
246
0
        res &= 0xffff;
247
0
        break;
248
0
   case VVMCS_WIDTH_64:
249
0
        if ( enc.access_type )
250
0
            res >>= 32;
251
0
        break;
252
0
    case VVMCS_WIDTH_32:
253
0
        res &= 0xffffffff;
254
0
        break;
255
0
    case VVMCS_WIDTH_NATURAL:
256
0
    default:
257
0
        break;
258
0
    }
259
0
260
0
    return res;
261
0
}
262
263
u64 get_vvmcs_real(const struct vcpu *v, u32 encoding)
264
0
{
265
0
    return virtual_vmcs_vmread(v, encoding);
266
0
}
267
268
enum vmx_insn_errno get_vvmcs_virtual_safe(void *vvmcs, u32 encoding, u64 *val)
269
0
{
270
0
    *val = get_vvmcs_virtual(vvmcs, encoding);
271
0
272
0
    /*
273
0
     * TODO: This should not always succeed. Fields and values need to be
274
0
     * audited against the features offered to the guest in the VT-x MSRs.
275
0
     * This should be fixed when the MSR levelling work is started, at which
276
0
     * point there will be a cpuid_policy-like object.
277
0
     */
278
0
    return VMX_INSN_SUCCEED;
279
0
}
280
281
enum vmx_insn_errno get_vvmcs_real_safe(const struct vcpu *v, u32 encoding,
282
                                        u64 *val)
283
0
{
284
0
    return virtual_vmcs_vmread_safe(v, encoding, val);
285
0
}
286
287
void set_vvmcs_virtual(void *vvmcs, u32 vmcs_encoding, u64 val)
288
0
{
289
0
    union vmcs_encoding enc;
290
0
    u64 *content = (u64 *) vvmcs;
291
0
    int offset;
292
0
    u64 res;
293
0
294
0
    enc.word = vmcs_encoding;
295
0
    offset = vvmcs_offset(enc.width, enc.type, enc.index);
296
0
    res = content[offset];
297
0
298
0
    switch ( enc.width ) {
299
0
    case VVMCS_WIDTH_16:
300
0
        res = val & 0xffff;
301
0
        break;
302
0
    case VVMCS_WIDTH_64:
303
0
        if ( enc.access_type )
304
0
        {
305
0
            res &= 0xffffffff;
306
0
            res |= val << 32;
307
0
        }
308
0
        else
309
0
            res = val;
310
0
        break;
311
0
    case VVMCS_WIDTH_32:
312
0
        res = val & 0xffffffff;
313
0
        break;
314
0
    case VVMCS_WIDTH_NATURAL:
315
0
    default:
316
0
        res = val;
317
0
        break;
318
0
    }
319
0
320
0
    content[offset] = res;
321
0
}
322
323
void set_vvmcs_real(const struct vcpu *v, u32 encoding, u64 val)
324
0
{
325
0
    virtual_vmcs_vmwrite(v, encoding, val);
326
0
}
327
328
enum vmx_insn_errno set_vvmcs_virtual_safe(void *vvmcs, u32 encoding, u64 val)
329
0
{
330
0
    set_vvmcs_virtual(vvmcs, encoding, val);
331
0
332
0
    /*
333
0
     * TODO: This should not always succeed. Fields and values need to be
334
0
     * audited against the features offered to the guest in the VT-x MSRs.
335
0
     * This should be fixed when the MSR levelling work is started, at which
336
0
     * point there will be a cpuid_policy-like object.
337
0
     */
338
0
    return VMX_INSN_SUCCEED;
339
0
}
340
341
enum vmx_insn_errno set_vvmcs_real_safe(const struct vcpu *v, u32 encoding,
342
                                        u64 val)
343
0
{
344
0
    return virtual_vmcs_vmwrite_safe(v, encoding, val);
345
0
}
346
347
static unsigned long reg_read(struct cpu_user_regs *regs,
348
                              enum vmx_regs_enc index)
349
0
{
350
0
    unsigned long *pval = decode_register(index, regs, 0);
351
0
352
0
    return *pval;
353
0
}
354
355
static void reg_write(struct cpu_user_regs *regs,
356
                      enum vmx_regs_enc index,
357
                      unsigned long value)
358
0
{
359
0
    unsigned long *pval = decode_register(index, regs, 0);
360
0
361
0
    *pval = value;
362
0
}
363
364
static inline u32 __n2_pin_exec_control(struct vcpu *v)
365
0
{
366
0
    return get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
367
0
}
368
369
static inline u32 __n2_exec_control(struct vcpu *v)
370
0
{
371
0
    return get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL);
372
0
}
373
374
static inline u32 __n2_secondary_exec_control(struct vcpu *v)
375
0
{
376
0
    u64 second_ctrl = 0;
377
0
378
0
    if ( __n2_exec_control(v) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
379
0
        second_ctrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
380
0
381
0
    return second_ctrl;
382
0
}
383
384
static int vmx_inst_check_privilege(struct cpu_user_regs *regs, int vmxop_check)
385
0
{
386
0
    struct vcpu *v = current;
387
0
388
0
    if ( vmxop_check )
389
0
    {
390
0
        if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
391
0
             !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) )
392
0
            goto invalid_op;
393
0
    }
394
0
    else if ( !nvmx_vcpu_in_vmx(v) )
395
0
        goto invalid_op;
396
0
397
0
    if ( vmx_guest_x86_mode(v) < (hvm_long_mode_active(v) ? 8 : 2) )
398
0
        goto invalid_op;
399
0
    else if ( nestedhvm_vcpu_in_guestmode(v) )
400
0
        goto vmexit;
401
0
402
0
    if ( vmx_get_cpl() > 0 )
403
0
        goto gp_fault;
404
0
405
0
    return X86EMUL_OKAY;
406
0
407
0
vmexit:
408
0
    gdprintk(XENLOG_ERR, "vmx_inst_check_privilege: vmexit\n");
409
0
    vcpu_nestedhvm(v).nv_vmexit_pending = 1;
410
0
    return X86EMUL_EXCEPTION;
411
0
    
412
0
invalid_op:
413
0
    gdprintk(XENLOG_ERR, "vmx_inst_check_privilege: invalid_op\n");
414
0
    hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
415
0
    return X86EMUL_EXCEPTION;
416
0
417
0
gp_fault:
418
0
    gdprintk(XENLOG_ERR, "vmx_inst_check_privilege: gp_fault\n");
419
0
    hvm_inject_hw_exception(TRAP_gp_fault, 0);
420
0
    return X86EMUL_EXCEPTION;
421
0
}
422
423
static int decode_vmx_inst(struct cpu_user_regs *regs,
424
                           struct vmx_inst_decoded *decode,
425
                           unsigned long *poperandS, int vmxon_check)
426
0
{
427
0
    struct vcpu *v = current;
428
0
    union vmx_inst_info info;
429
0
    struct segment_register seg;
430
0
    unsigned long base, index, seg_base, disp, offset;
431
0
    int scale, size;
432
0
433
0
    if ( vmx_inst_check_privilege(regs, vmxon_check) != X86EMUL_OKAY )
434
0
        return X86EMUL_EXCEPTION;
435
0
436
0
    __vmread(VMX_INSTRUCTION_INFO, &offset);
437
0
    info.word = offset;
438
0
439
0
    if ( info.fields.memreg ) {
440
0
        decode->type = VMX_INST_MEMREG_TYPE_REG;
441
0
        decode->reg1 = info.fields.reg1;
442
0
        if ( poperandS != NULL )
443
0
            *poperandS = reg_read(regs, decode->reg1);
444
0
    }
445
0
    else
446
0
    {
447
0
        bool mode_64bit = (vmx_guest_x86_mode(v) == 8);
448
0
449
0
        decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
450
0
451
0
        if ( info.fields.segment > x86_seg_gs )
452
0
            goto gp_fault;
453
0
        hvm_get_segment_register(v, info.fields.segment, &seg);
454
0
        seg_base = seg.base;
455
0
456
0
        base = info.fields.base_reg_invalid ? 0 :
457
0
            reg_read(regs, info.fields.base_reg);
458
0
459
0
        index = info.fields.index_reg_invalid ? 0 :
460
0
            reg_read(regs, info.fields.index_reg);
461
0
462
0
        scale = 1 << info.fields.scaling;
463
0
464
0
        __vmread(EXIT_QUALIFICATION, &disp);
465
0
466
0
        size = 1 << (info.fields.addr_size + 1);
467
0
468
0
        offset = base + index * scale + disp;
469
0
        base = !mode_64bit || info.fields.segment >= x86_seg_fs ?
470
0
               seg_base + offset : offset;
471
0
        if ( offset + size - 1 < offset ||
472
0
             (mode_64bit ?
473
0
              !is_canonical_address((long)base < 0 ? base :
474
0
                                    base + size - 1) :
475
0
              offset + size - 1 > seg.limit) )
476
0
            goto gp_fault;
477
0
478
0
        if ( poperandS != NULL )
479
0
        {
480
0
            pagefault_info_t pfinfo;
481
0
            int rc = hvm_copy_from_guest_linear(poperandS, base, size,
482
0
                                                0, &pfinfo);
483
0
484
0
            if ( rc == HVMTRANS_bad_linear_to_gfn )
485
0
                hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
486
0
            if ( rc != HVMTRANS_okay )
487
0
                return X86EMUL_EXCEPTION;
488
0
        }
489
0
        decode->mem = base;
490
0
        decode->len = size;
491
0
    }
492
0
493
0
    decode->reg2 = info.fields.reg2;
494
0
495
0
    return X86EMUL_OKAY;
496
0
497
0
gp_fault:
498
0
    hvm_inject_hw_exception(TRAP_gp_fault, 0);
499
0
    return X86EMUL_EXCEPTION;
500
0
}
501
502
static void vmsucceed(struct cpu_user_regs *regs)
503
0
{
504
0
    regs->eflags &= ~X86_EFLAGS_ARITH_MASK;
505
0
}
506
507
static void vmfail_valid(struct cpu_user_regs *regs, enum vmx_insn_errno errno)
508
0
{
509
0
    struct vcpu *v = current;
510
0
    unsigned int eflags = regs->eflags;
511
0
512
0
    regs->eflags = (eflags & ~X86_EFLAGS_ARITH_MASK) | X86_EFLAGS_ZF;
513
0
    set_vvmcs(v, VM_INSTRUCTION_ERROR, errno);
514
0
}
515
516
static void vmfail_invalid(struct cpu_user_regs *regs)
517
0
{
518
0
    unsigned int eflags = regs->eflags;
519
0
520
0
    regs->eflags = (eflags & ~X86_EFLAGS_ARITH_MASK) | X86_EFLAGS_CF;
521
0
}
522
523
static void vmfail(struct cpu_user_regs *regs, enum vmx_insn_errno errno)
524
0
{
525
0
    if ( errno == VMX_INSN_SUCCEED )
526
0
        return;
527
0
528
0
    if ( vcpu_nestedhvm(current).nv_vvmcxaddr != INVALID_PADDR &&
529
0
         errno != VMX_INSN_FAIL_INVALID )
530
0
        vmfail_valid(regs, errno);
531
0
    else
532
0
        vmfail_invalid(regs);
533
0
}
534
535
bool_t nvmx_intercepts_exception(
536
    struct vcpu *v, unsigned int vector, int error_code)
537
0
{
538
0
    u32 exception_bitmap, pfec_match=0, pfec_mask=0;
539
0
    int r;
540
0
541
0
    ASSERT(vector < 32);
542
0
543
0
    exception_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
544
0
    r = exception_bitmap & (1 << vector) ? 1: 0;
545
0
546
0
    if ( vector == TRAP_page_fault )
547
0
    {
548
0
        pfec_match = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH);
549
0
        pfec_mask  = get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK);
550
0
        if ( (error_code & pfec_mask) != pfec_match )
551
0
            r = !r;
552
0
    }
553
0
    return r;
554
0
}
555
556
/*
557
 * Nested VMX uses "strict" condition to exit from 
558
 * L2 guest if either L1 VMM or L0 VMM expect to exit.
559
 */
560
static inline u32 __shadow_control(struct vcpu *v,
561
                                 unsigned int field,
562
                                 u32 host_value)
563
0
{
564
0
    return get_vvmcs(v, field) | host_value;
565
0
}
566
567
static void set_shadow_control(struct vcpu *v,
568
                               unsigned int field,
569
                               u32 host_value)
570
0
{
571
0
    __vmwrite(field, __shadow_control(v, field, host_value));
572
0
}
573
574
unsigned long *_shadow_io_bitmap(struct vcpu *v)
575
0
{
576
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
577
0
    int port80, portED;
578
0
    u8 *bitmap;
579
0
580
0
    bitmap = nvmx->iobitmap[0];
581
0
    port80 = bitmap[0x80 >> 3] & (1 << (0x80 & 0x7)) ? 1 : 0;
582
0
    portED = bitmap[0xed >> 3] & (1 << (0xed & 0x7)) ? 1 : 0;
583
0
584
0
    return nestedhvm_vcpu_iomap_get(port80, portED);
585
0
}
586
587
void nvmx_update_exec_control(struct vcpu *v, u32 host_cntrl)
588
0
{
589
0
    u32 pio_cntrl = (CPU_BASED_ACTIVATE_IO_BITMAP
590
0
                     | CPU_BASED_UNCOND_IO_EXITING);
591
0
    unsigned long *bitmap; 
592
0
    u32 shadow_cntrl;
593
0
 
594
0
    shadow_cntrl = __n2_exec_control(v);
595
0
    pio_cntrl &= shadow_cntrl;
596
0
    /* Enforce the removed features */
597
0
    shadow_cntrl &= ~(CPU_BASED_ACTIVATE_MSR_BITMAP
598
0
                      | CPU_BASED_ACTIVATE_IO_BITMAP
599
0
                      | CPU_BASED_UNCOND_IO_EXITING);
600
0
    shadow_cntrl |= host_cntrl;
601
0
    if ( pio_cntrl == CPU_BASED_UNCOND_IO_EXITING ) {
602
0
        /* L1 VMM intercepts all I/O instructions */
603
0
        shadow_cntrl |= CPU_BASED_UNCOND_IO_EXITING;
604
0
        shadow_cntrl &= ~CPU_BASED_ACTIVATE_IO_BITMAP;
605
0
    }
606
0
    else {
607
0
        /* Use IO_BITMAP in shadow */
608
0
        if ( pio_cntrl == 0 ) {
609
0
            /* 
610
0
             * L1 VMM doesn't intercept IO instruction.
611
0
             * Use host configuration and reset IO_BITMAP
612
0
             */
613
0
            bitmap = hvm_io_bitmap;
614
0
        }
615
0
        else {
616
0
            /* use IO bitmap */
617
0
            bitmap = _shadow_io_bitmap(v);
618
0
        }
619
0
        __vmwrite(IO_BITMAP_A, virt_to_maddr(bitmap));
620
0
        __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
621
0
    }
622
0
623
0
    /* TODO: change L0 intr window to MTF or NMI window */
624
0
    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
625
0
}
626
627
void nvmx_update_secondary_exec_control(struct vcpu *v,
628
                                        unsigned long host_cntrl)
629
0
{
630
0
    u32 shadow_cntrl;
631
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
632
0
    u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT |
633
0
                    SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
634
0
635
0
    host_cntrl &= ~apicv_bit;
636
0
    shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
637
0
638
0
    /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
639
0
    ASSERT(!(shadow_cntrl & apicv_bit));
640
0
641
0
    nvmx->ept.enabled = !!(shadow_cntrl & SECONDARY_EXEC_ENABLE_EPT);
642
0
    shadow_cntrl |= host_cntrl;
643
0
    __vmwrite(SECONDARY_VM_EXEC_CONTROL, shadow_cntrl);
644
0
}
645
646
static void nvmx_update_pin_control(struct vcpu *v, unsigned long host_cntrl)
647
0
{
648
0
    u32 shadow_cntrl;
649
0
650
0
    host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT;
651
0
    shadow_cntrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
652
0
653
0
    /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
654
0
    ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT));
655
0
656
0
    shadow_cntrl |= host_cntrl;
657
0
    __vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl);
658
0
}
659
660
static void nvmx_update_exit_control(struct vcpu *v, unsigned long host_cntrl)
661
0
{
662
0
    u32 shadow_cntrl;
663
0
664
0
    shadow_cntrl = get_vvmcs(v, VM_EXIT_CONTROLS);
665
0
    shadow_cntrl &= ~(VM_EXIT_SAVE_DEBUG_CNTRLS 
666
0
                      | VM_EXIT_LOAD_HOST_PAT
667
0
                      | VM_EXIT_LOAD_HOST_EFER
668
0
                      | VM_EXIT_LOAD_PERF_GLOBAL_CTRL);
669
0
    shadow_cntrl |= host_cntrl;
670
0
    __vmwrite(VM_EXIT_CONTROLS, shadow_cntrl);
671
0
}
672
673
static void nvmx_update_entry_control(struct vcpu *v)
674
0
{
675
0
    u32 shadow_cntrl;
676
0
677
0
    shadow_cntrl = get_vvmcs(v, VM_ENTRY_CONTROLS);
678
0
    shadow_cntrl &= ~(VM_ENTRY_LOAD_GUEST_PAT
679
0
                      | VM_ENTRY_LOAD_GUEST_EFER
680
0
                      | VM_ENTRY_LOAD_PERF_GLOBAL_CTRL);
681
0
    __vmwrite(VM_ENTRY_CONTROLS, shadow_cntrl);
682
0
}
683
684
void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value)
685
0
{
686
0
    set_shadow_control(v, EXCEPTION_BITMAP, value);
687
0
}
688
689
static void nvmx_update_apic_access_address(struct vcpu *v)
690
0
{
691
0
    u32 ctrl;
692
0
693
0
    ctrl = __n2_secondary_exec_control(v);
694
0
    if ( ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES )
695
0
    {
696
0
        p2m_type_t p2mt;
697
0
        unsigned long apic_gpfn;
698
0
        struct page_info *apic_pg;
699
0
700
0
        apic_gpfn = get_vvmcs(v, APIC_ACCESS_ADDR) >> PAGE_SHIFT;
701
0
        apic_pg = get_page_from_gfn(v->domain, apic_gpfn, &p2mt, P2M_ALLOC);
702
0
        ASSERT(apic_pg && !p2m_is_paging(p2mt));
703
0
        __vmwrite(APIC_ACCESS_ADDR, page_to_maddr(apic_pg));
704
0
        put_page(apic_pg);
705
0
    }
706
0
    else
707
0
        __vmwrite(APIC_ACCESS_ADDR, 0);
708
0
}
709
710
static void nvmx_update_virtual_apic_address(struct vcpu *v)
711
0
{
712
0
    u32 ctrl;
713
0
714
0
    ctrl = __n2_exec_control(v);
715
0
    if ( ctrl & CPU_BASED_TPR_SHADOW )
716
0
    {
717
0
        p2m_type_t p2mt;
718
0
        unsigned long vapic_gpfn;
719
0
        struct page_info *vapic_pg;
720
0
721
0
        vapic_gpfn = get_vvmcs(v, VIRTUAL_APIC_PAGE_ADDR) >> PAGE_SHIFT;
722
0
        vapic_pg = get_page_from_gfn(v->domain, vapic_gpfn, &p2mt, P2M_ALLOC);
723
0
        ASSERT(vapic_pg && !p2m_is_paging(p2mt));
724
0
        __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vapic_pg));
725
0
        put_page(vapic_pg);
726
0
    }
727
0
    else
728
0
        __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
729
0
}
730
731
static void nvmx_update_tpr_threshold(struct vcpu *v)
732
0
{
733
0
    u32 ctrl = __n2_exec_control(v);
734
0
735
0
    if ( ctrl & CPU_BASED_TPR_SHADOW )
736
0
        __vmwrite(TPR_THRESHOLD, get_vvmcs(v, TPR_THRESHOLD));
737
0
    else
738
0
        __vmwrite(TPR_THRESHOLD, 0);
739
0
}
740
741
static void nvmx_update_pfec(struct vcpu *v)
742
0
{
743
0
    __vmwrite(PAGE_FAULT_ERROR_CODE_MASK,
744
0
              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MASK));
745
0
    __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH,
746
0
              get_vvmcs(v, PAGE_FAULT_ERROR_CODE_MATCH));
747
0
}
748
749
static void __clear_current_vvmcs(struct vcpu *v)
750
0
{
751
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
752
0
    
753
0
    if ( nvcpu->nv_n2vmcx_pa )
754
0
        __vmpclear(nvcpu->nv_n2vmcx_pa);
755
0
}
756
757
/*
758
 * Refreshes the MSR bitmap mapping for the current nested vcpu.  Returns true
759
 * for a successful mapping, and returns false for MSR_BITMAP parameter errors
760
 * or gfn mapping errors.
761
 */
762
static bool __must_check _map_msr_bitmap(struct vcpu *v)
763
0
{
764
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
765
0
    uint64_t gpa;
766
0
767
0
    if ( nvmx->msrbitmap )
768
0
    {
769
0
        hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
770
0
        nvmx->msrbitmap = NULL;
771
0
    }
772
0
773
0
    gpa = get_vvmcs(v, MSR_BITMAP);
774
0
775
0
    if ( !IS_ALIGNED(gpa, PAGE_SIZE) )
776
0
        return false;
777
0
778
0
    nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
779
0
780
0
    return nvmx->msrbitmap != NULL;
781
0
}
782
783
static bool_t __must_check _map_io_bitmap(struct vcpu *v, u64 vmcs_reg)
784
0
{
785
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
786
0
    unsigned long gpa;
787
0
    int index;
788
0
789
0
    index = vmcs_reg == IO_BITMAP_A ? 0 : 1;
790
0
    if (nvmx->iobitmap[index])
791
0
        hvm_unmap_guest_frame(nvmx->iobitmap[index], 1);
792
0
    gpa = get_vvmcs(v, vmcs_reg);
793
0
    nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1);
794
0
795
0
    return nvmx->iobitmap[index] != NULL;
796
0
}
797
798
static inline bool_t __must_check map_io_bitmap_all(struct vcpu *v)
799
0
{
800
0
   return _map_io_bitmap(v, IO_BITMAP_A) &&
801
0
          _map_io_bitmap(v, IO_BITMAP_B);
802
0
}
803
804
static void nvmx_purge_vvmcs(struct vcpu *v)
805
0
{
806
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
807
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
808
0
    int i;
809
0
810
0
    __clear_current_vvmcs(v);
811
0
    if ( nvcpu->nv_vvmcxaddr != INVALID_PADDR )
812
0
        hvm_unmap_guest_frame(nvcpu->nv_vvmcx, 1);
813
0
    nvcpu->nv_vvmcx = NULL;
814
0
    nvcpu->nv_vvmcxaddr = INVALID_PADDR;
815
0
    v->arch.hvm_vmx.vmcs_shadow_maddr = 0;
816
0
    for (i=0; i<2; i++) {
817
0
        if ( nvmx->iobitmap[i] ) {
818
0
            hvm_unmap_guest_frame(nvmx->iobitmap[i], 1);
819
0
            nvmx->iobitmap[i] = NULL;
820
0
        }
821
0
    }
822
0
    if ( nvmx->msrbitmap ) {
823
0
        hvm_unmap_guest_frame(nvmx->msrbitmap, 1);
824
0
        nvmx->msrbitmap = NULL;
825
0
    }
826
0
}
827
828
u64 nvmx_get_tsc_offset(struct vcpu *v)
829
0
{
830
0
    u64 offset = 0;
831
0
832
0
    if ( get_vvmcs(v, CPU_BASED_VM_EXEC_CONTROL) &
833
0
         CPU_BASED_USE_TSC_OFFSETING )
834
0
        offset = get_vvmcs(v, TSC_OFFSET);
835
0
836
0
    return offset;
837
0
}
838
839
/*
840
 * Context synchronized between shadow and virtual VMCS.
841
 */
842
static const u16 vmcs_gstate_field[] = {
843
    /* 16 BITS */
844
    GUEST_ES_SELECTOR,
845
    GUEST_CS_SELECTOR,
846
    GUEST_SS_SELECTOR,
847
    GUEST_DS_SELECTOR,
848
    GUEST_FS_SELECTOR,
849
    GUEST_GS_SELECTOR,
850
    GUEST_LDTR_SELECTOR,
851
    GUEST_TR_SELECTOR,
852
    /* 64 BITS */
853
    VMCS_LINK_POINTER,
854
    GUEST_IA32_DEBUGCTL,
855
    GUEST_PAT,
856
    GUEST_EFER,
857
    GUEST_PERF_GLOBAL_CTRL,
858
    /* 32 BITS */
859
    GUEST_ES_LIMIT,
860
    GUEST_CS_LIMIT,
861
    GUEST_SS_LIMIT,
862
    GUEST_DS_LIMIT,
863
    GUEST_FS_LIMIT,
864
    GUEST_GS_LIMIT,
865
    GUEST_LDTR_LIMIT,
866
    GUEST_TR_LIMIT,
867
    GUEST_GDTR_LIMIT,
868
    GUEST_IDTR_LIMIT,
869
    GUEST_ES_AR_BYTES,
870
    GUEST_CS_AR_BYTES,
871
    GUEST_SS_AR_BYTES,
872
    GUEST_DS_AR_BYTES,
873
    GUEST_FS_AR_BYTES,
874
    GUEST_GS_AR_BYTES,
875
    GUEST_LDTR_AR_BYTES,
876
    GUEST_TR_AR_BYTES,
877
    GUEST_INTERRUPTIBILITY_INFO,
878
    GUEST_ACTIVITY_STATE,
879
    GUEST_SYSENTER_CS,
880
    GUEST_PREEMPTION_TIMER,
881
    /* natural */
882
    GUEST_ES_BASE,
883
    GUEST_CS_BASE,
884
    GUEST_SS_BASE,
885
    GUEST_DS_BASE,
886
    GUEST_FS_BASE,
887
    GUEST_GS_BASE,
888
    GUEST_LDTR_BASE,
889
    GUEST_TR_BASE,
890
    GUEST_GDTR_BASE,
891
    GUEST_IDTR_BASE,
892
    GUEST_DR7,
893
    /*
894
     * Following guest states are in local cache (cpu_user_regs)
895
     GUEST_RSP,
896
     GUEST_RIP,
897
     */
898
    GUEST_RFLAGS,
899
    GUEST_PENDING_DBG_EXCEPTIONS,
900
    GUEST_SYSENTER_ESP,
901
    GUEST_SYSENTER_EIP,
902
};
903
904
static const u16 gpdpte_fields[] = {
905
    GUEST_PDPTE(0),
906
    GUEST_PDPTE(1),
907
    GUEST_PDPTE(2),
908
    GUEST_PDPTE(3),
909
};
910
911
/*
912
 * Context: shadow -> virtual VMCS
913
 */
914
static const u16 vmcs_ro_field[] = {
915
    GUEST_PHYSICAL_ADDRESS,
916
    VM_INSTRUCTION_ERROR,
917
    VM_EXIT_REASON,
918
    VM_EXIT_INTR_INFO,
919
    VM_EXIT_INTR_ERROR_CODE,
920
    IDT_VECTORING_INFO,
921
    IDT_VECTORING_ERROR_CODE,
922
    VM_EXIT_INSTRUCTION_LEN,
923
    VMX_INSTRUCTION_INFO,
924
    EXIT_QUALIFICATION,
925
    GUEST_LINEAR_ADDRESS
926
};
927
928
static struct vmcs_host_to_guest {
929
    u16 host_field;
930
    u16 guest_field;
931
} const vmcs_h2g_field[] = {
932
    {HOST_ES_SELECTOR, GUEST_ES_SELECTOR},
933
    {HOST_CS_SELECTOR, GUEST_CS_SELECTOR},
934
    {HOST_SS_SELECTOR, GUEST_SS_SELECTOR},
935
    {HOST_DS_SELECTOR, GUEST_DS_SELECTOR},
936
    {HOST_FS_SELECTOR, GUEST_FS_SELECTOR},
937
    {HOST_GS_SELECTOR, GUEST_GS_SELECTOR},
938
    {HOST_TR_SELECTOR, GUEST_TR_SELECTOR},
939
    {HOST_SYSENTER_CS, GUEST_SYSENTER_CS},
940
    {HOST_FS_BASE, GUEST_FS_BASE},
941
    {HOST_GS_BASE, GUEST_GS_BASE},
942
    {HOST_TR_BASE, GUEST_TR_BASE},
943
    {HOST_GDTR_BASE, GUEST_GDTR_BASE},
944
    {HOST_IDTR_BASE, GUEST_IDTR_BASE},
945
    {HOST_SYSENTER_ESP, GUEST_SYSENTER_ESP},
946
    {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP},
947
};
948
949
static void vvmcs_to_shadow(const struct vcpu *v, unsigned int field)
950
0
{
951
0
    __vmwrite(field, get_vvmcs(v, field));
952
0
}
953
954
static void vvmcs_to_shadow_bulk(struct vcpu *v, unsigned int n,
955
                                 const u16 *field)
956
0
{
957
0
    u64 *value = this_cpu(vvmcs_buf);
958
0
    unsigned int i;
959
0
960
0
    if ( !cpu_has_vmx_vmcs_shadowing )
961
0
        goto fallback;
962
0
963
0
    if ( !value || n > VMCS_BUF_SIZE )
964
0
    {
965
0
        gdprintk(XENLOG_DEBUG, "vmcs sync fall back to non-bulk mode, \
966
0
                 buffer: %p, buffer size: %d, fields number: %d.\n",
967
0
                 value, VMCS_BUF_SIZE, n);
968
0
        goto fallback;
969
0
    }
970
0
971
0
    virtual_vmcs_enter(v);
972
0
    for ( i = 0; i < n; i++ )
973
0
        __vmread(field[i], &value[i]);
974
0
    virtual_vmcs_exit(v);
975
0
976
0
    for ( i = 0; i < n; i++ )
977
0
        __vmwrite(field[i], value[i]);
978
0
979
0
    return;
980
0
981
0
fallback:
982
0
    for ( i = 0; i < n; i++ )
983
0
        vvmcs_to_shadow(v, field[i]);
984
0
}
985
986
static inline void shadow_to_vvmcs(const struct vcpu *v, unsigned int field)
987
0
{
988
0
    unsigned long value;
989
0
990
0
    if ( vmread_safe(field, &value) == 0 )
991
0
        set_vvmcs(v, field, value);
992
0
}
993
994
static void shadow_to_vvmcs_bulk(struct vcpu *v, unsigned int n,
995
                                 const u16 *field)
996
0
{
997
0
    u64 *value = this_cpu(vvmcs_buf);
998
0
    unsigned int i;
999
0
1000
0
    if ( !cpu_has_vmx_vmcs_shadowing )
1001
0
        goto fallback;
1002
0
1003
0
    if ( !value || n > VMCS_BUF_SIZE )
1004
0
    {
1005
0
        gdprintk(XENLOG_DEBUG, "vmcs sync fall back to non-bulk mode, \
1006
0
                 buffer: %p, buffer size: %d, fields number: %d.\n",
1007
0
                 value, VMCS_BUF_SIZE, n);
1008
0
        goto fallback;
1009
0
    }
1010
0
1011
0
    for ( i = 0; i < n; i++ )
1012
0
        __vmread(field[i], &value[i]);
1013
0
1014
0
    virtual_vmcs_enter(v);
1015
0
    for ( i = 0; i < n; i++ )
1016
0
        __vmwrite(field[i], value[i]);
1017
0
    virtual_vmcs_exit(v);
1018
0
1019
0
    return;
1020
0
1021
0
fallback:
1022
0
    for ( i = 0; i < n; i++ )
1023
0
        shadow_to_vvmcs(v, field[i]);
1024
0
}
1025
1026
static void load_shadow_control(struct vcpu *v)
1027
0
{
1028
0
    /*
1029
0
     * Set shadow controls:  PIN_BASED, CPU_BASED, EXIT, ENTRY
1030
0
     * and EXCEPTION
1031
0
     * Enforce the removed features
1032
0
     */
1033
0
    nvmx_update_pin_control(v, vmx_pin_based_exec_control);
1034
0
    vmx_update_cpu_exec_control(v);
1035
0
    vmx_update_secondary_exec_control(v);
1036
0
    nvmx_update_exit_control(v, vmx_vmexit_control);
1037
0
    nvmx_update_entry_control(v);
1038
0
    vmx_update_exception_bitmap(v);
1039
0
    nvmx_update_apic_access_address(v);
1040
0
    nvmx_update_virtual_apic_address(v);
1041
0
    nvmx_update_tpr_threshold(v);
1042
0
    nvmx_update_pfec(v);
1043
0
}
1044
1045
static void load_shadow_guest_state(struct vcpu *v)
1046
0
{
1047
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1048
0
    u32 control;
1049
0
    u64 cr_gh_mask, cr_read_shadow;
1050
0
    int rc;
1051
0
1052
0
    static const u16 vmentry_fields[] = {
1053
0
        VM_ENTRY_INTR_INFO,
1054
0
        VM_ENTRY_EXCEPTION_ERROR_CODE,
1055
0
        VM_ENTRY_INSTRUCTION_LEN,
1056
0
    };
1057
0
1058
0
    /* vvmcs.gstate to shadow vmcs.gstate */
1059
0
    vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
1060
0
                         vmcs_gstate_field);
1061
0
1062
0
    nvcpu->guest_cr[0] = get_vvmcs(v, CR0_READ_SHADOW);
1063
0
    nvcpu->guest_cr[4] = get_vvmcs(v, CR4_READ_SHADOW);
1064
0
1065
0
    rc = hvm_set_cr0(get_vvmcs(v, GUEST_CR0), 1);
1066
0
    if ( rc == X86EMUL_EXCEPTION )
1067
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
1068
0
1069
0
    rc = hvm_set_cr4(get_vvmcs(v, GUEST_CR4), 1);
1070
0
    if ( rc == X86EMUL_EXCEPTION )
1071
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
1072
0
1073
0
    rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), 1);
1074
0
    if ( rc == X86EMUL_EXCEPTION )
1075
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
1076
0
1077
0
    control = get_vvmcs(v, VM_ENTRY_CONTROLS);
1078
0
    if ( control & VM_ENTRY_LOAD_GUEST_PAT )
1079
0
        hvm_set_guest_pat(v, get_vvmcs(v, GUEST_PAT));
1080
0
    if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
1081
0
    {
1082
0
        rc = hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
1083
0
                                     get_vvmcs(v, GUEST_PERF_GLOBAL_CTRL), 0);
1084
0
        if ( rc == X86EMUL_EXCEPTION )
1085
0
            hvm_inject_hw_exception(TRAP_gp_fault, 0);
1086
0
    }
1087
0
1088
0
    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
1089
0
1090
0
    vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmentry_fields), vmentry_fields);
1091
0
1092
0
    /*
1093
0
     * While emulate CR0 and CR4 for nested virtualization, set the CR0/CR4
1094
0
     * guest host mask to 0xffffffff in shadow VMCS (follow the host L1 VMCS),
1095
0
     * then calculate the corresponding read shadow separately for CR0 and CR4.
1096
0
     */
1097
0
    cr_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
1098
0
    cr_read_shadow = (get_vvmcs(v, GUEST_CR0) & ~cr_gh_mask) |
1099
0
                     (get_vvmcs(v, CR0_READ_SHADOW) & cr_gh_mask);
1100
0
    __vmwrite(CR0_READ_SHADOW, cr_read_shadow);
1101
0
1102
0
    cr_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
1103
0
    cr_read_shadow = (get_vvmcs(v, GUEST_CR4) & ~cr_gh_mask) |
1104
0
                     (get_vvmcs(v, CR4_READ_SHADOW) & cr_gh_mask);
1105
0
    __vmwrite(CR4_READ_SHADOW, cr_read_shadow);
1106
0
1107
0
    /* TODO: CR3 target control */
1108
0
}
1109
1110
uint64_t get_shadow_eptp(struct vcpu *v)
1111
0
{
1112
0
    struct p2m_domain *p2m = p2m_get_nestedp2m(v);
1113
0
    struct ept_data *ept = &p2m->ept;
1114
0
1115
0
    ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1116
0
    return ept->eptp;
1117
0
}
1118
1119
static uint64_t get_host_eptp(struct vcpu *v)
1120
0
{
1121
0
    return p2m_get_hostp2m(v->domain)->ept.eptp;
1122
0
}
1123
1124
static bool_t nvmx_vpid_enabled(const struct vcpu *v)
1125
0
{
1126
0
    uint32_t second_cntl;
1127
0
1128
0
    second_cntl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
1129
0
    if ( second_cntl & SECONDARY_EXEC_ENABLE_VPID )
1130
0
        return 1;
1131
0
    return 0;
1132
0
}
1133
1134
static void nvmx_set_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
1135
0
{
1136
0
    paddr_t vvmcs_maddr = v->arch.hvm_vmx.vmcs_shadow_maddr;
1137
0
1138
0
    __vmpclear(vvmcs_maddr);
1139
0
    vvmcs->vmcs_revision_id |= VMCS_RID_TYPE_MASK;
1140
0
    __vmwrite(VMCS_LINK_POINTER, vvmcs_maddr);
1141
0
    __vmwrite(VMREAD_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmread_bitmap));
1142
0
    __vmwrite(VMWRITE_BITMAP, page_to_maddr(v->arch.hvm_vmx.vmwrite_bitmap));
1143
0
}
1144
1145
static void nvmx_clear_vmcs_pointer(struct vcpu *v, struct vmcs_struct *vvmcs)
1146
0
{
1147
0
    paddr_t vvmcs_maddr = v->arch.hvm_vmx.vmcs_shadow_maddr;
1148
0
1149
0
    __vmpclear(vvmcs_maddr);
1150
0
    vvmcs->vmcs_revision_id &= ~VMCS_RID_TYPE_MASK;
1151
0
    __vmwrite(VMCS_LINK_POINTER, ~0ul);
1152
0
    __vmwrite(VMREAD_BITMAP, 0);
1153
0
    __vmwrite(VMWRITE_BITMAP, 0);
1154
0
}
1155
1156
static void virtual_vmentry(struct cpu_user_regs *regs)
1157
0
{
1158
0
    struct vcpu *v = current;
1159
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1160
0
    unsigned long lm_l1, lm_l2;
1161
0
1162
0
    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n2vmcx_pa);
1163
0
1164
0
    nestedhvm_vcpu_enter_guestmode(v);
1165
0
    nvcpu->nv_vmentry_pending = 0;
1166
0
    nvcpu->nv_vmswitch_in_progress = 1;
1167
0
1168
0
    /*
1169
0
     * EFER handling:
1170
0
     * hvm_set_efer won't work if CR0.PG = 1, so we change the value
1171
0
     * directly to make hvm_long_mode_active(v) work in L2.
1172
0
     * An additional update_paging_modes is also needed if
1173
0
     * there is 32/64 switch. v->arch.hvm_vcpu.guest_efer doesn't
1174
0
     * need to be saved, since its value on vmexit is determined by
1175
0
     * L1 exit_controls
1176
0
     */
1177
0
    lm_l1 = hvm_long_mode_active(v);
1178
0
    lm_l2 = !!(get_vvmcs(v, VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
1179
0
1180
0
    if ( lm_l2 )
1181
0
        v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
1182
0
    else
1183
0
        v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME);
1184
0
1185
0
    load_shadow_control(v);
1186
0
    load_shadow_guest_state(v);
1187
0
1188
0
    if ( lm_l1 != lm_l2 )
1189
0
        paging_update_paging_modes(v);
1190
0
1191
0
    if ( nvmx_ept_enabled(v) && hvm_pae_enabled(v) &&
1192
0
         !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
1193
0
        vvmcs_to_shadow_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
1194
0
1195
0
    regs->rip = get_vvmcs(v, GUEST_RIP);
1196
0
    regs->rsp = get_vvmcs(v, GUEST_RSP);
1197
0
    regs->rflags = get_vvmcs(v, GUEST_RFLAGS);
1198
0
1199
0
    /* updating host cr0 to sync TS bit */
1200
0
    __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
1201
0
1202
0
    /* Setup virtual ETP for L2 guest*/
1203
0
    if ( nestedhvm_paging_mode_hap(v) )
1204
0
        /* This will setup the initial np2m for the nested vCPU */
1205
0
        __vmwrite(EPT_POINTER, get_shadow_eptp(v));
1206
0
    else
1207
0
        __vmwrite(EPT_POINTER, get_host_eptp(v));
1208
0
1209
0
    /* nested VPID support! */
1210
0
    if ( cpu_has_vmx_vpid && nvmx_vpid_enabled(v) )
1211
0
    {
1212
0
        struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1213
0
        uint32_t new_vpid = get_vvmcs(v, VIRTUAL_PROCESSOR_ID);
1214
0
1215
0
        if ( nvmx->guest_vpid != new_vpid )
1216
0
        {
1217
0
            hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
1218
0
            nvmx->guest_vpid = new_vpid;
1219
0
        }
1220
0
    }
1221
0
1222
0
}
1223
1224
static void sync_vvmcs_guest_state(struct vcpu *v, struct cpu_user_regs *regs)
1225
0
{
1226
0
    /* copy shadow vmcs.gstate back to vvmcs.gstate */
1227
0
    shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_gstate_field),
1228
0
                         vmcs_gstate_field);
1229
0
    /* RIP, RSP are in user regs */
1230
0
    set_vvmcs(v, GUEST_RIP, regs->rip);
1231
0
    set_vvmcs(v, GUEST_RSP, regs->rsp);
1232
0
1233
0
    /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */
1234
0
    if ( !(__n2_exec_control(v) & CPU_BASED_CR3_LOAD_EXITING) )
1235
0
        shadow_to_vvmcs(v, GUEST_CR3);
1236
0
}
1237
1238
static void sync_vvmcs_ro(struct vcpu *v)
1239
0
{
1240
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1241
0
1242
0
    shadow_to_vvmcs_bulk(v, ARRAY_SIZE(vmcs_ro_field), vmcs_ro_field);
1243
0
1244
0
    /* Adjust exit_reason/exit_qualifciation for violation case */
1245
0
    if ( get_vvmcs(v, VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION )
1246
0
    {
1247
0
        set_vvmcs(v, EXIT_QUALIFICATION, nvmx->ept.exit_qual);
1248
0
        set_vvmcs(v, VM_EXIT_REASON, nvmx->ept.exit_reason);
1249
0
    }
1250
0
}
1251
1252
static void load_vvmcs_host_state(struct vcpu *v)
1253
0
{
1254
0
    int i, rc;
1255
0
    u64 r;
1256
0
    u32 control;
1257
0
1258
0
    for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ )
1259
0
    {
1260
0
        r = get_vvmcs(v, vmcs_h2g_field[i].host_field);
1261
0
        __vmwrite(vmcs_h2g_field[i].guest_field, r);
1262
0
    }
1263
0
1264
0
    rc = hvm_set_cr0(get_vvmcs(v, HOST_CR0), 1);
1265
0
    if ( rc == X86EMUL_EXCEPTION )
1266
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
1267
0
1268
0
    rc = hvm_set_cr4(get_vvmcs(v, HOST_CR4), 1);
1269
0
    if ( rc == X86EMUL_EXCEPTION )
1270
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
1271
0
1272
0
    rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), 1);
1273
0
    if ( rc == X86EMUL_EXCEPTION )
1274
0
        hvm_inject_hw_exception(TRAP_gp_fault, 0);
1275
0
1276
0
    control = get_vvmcs(v, VM_EXIT_CONTROLS);
1277
0
    if ( control & VM_EXIT_LOAD_HOST_PAT )
1278
0
        hvm_set_guest_pat(v, get_vvmcs(v, HOST_PAT));
1279
0
    if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
1280
0
    {
1281
0
        rc = hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL,
1282
0
                                     get_vvmcs(v, HOST_PERF_GLOBAL_CTRL), 1);
1283
0
        if ( rc == X86EMUL_EXCEPTION )
1284
0
            hvm_inject_hw_exception(TRAP_gp_fault, 0);
1285
0
    }
1286
0
1287
0
    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
1288
0
1289
0
    set_vvmcs(v, VM_ENTRY_INTR_INFO, 0);
1290
0
}
1291
1292
static void sync_exception_state(struct vcpu *v)
1293
0
{
1294
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1295
0
1296
0
    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
1297
0
        return;
1298
0
1299
0
    switch ( MASK_EXTR(nvmx->intr.intr_info, INTR_INFO_INTR_TYPE_MASK) )
1300
0
    {
1301
0
    case X86_EVENTTYPE_EXT_INTR:
1302
0
        /* rename exit_reason to EXTERNAL_INTERRUPT */
1303
0
        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXTERNAL_INTERRUPT);
1304
0
        set_vvmcs(v, EXIT_QUALIFICATION, 0);
1305
0
        set_vvmcs(v, VM_EXIT_INTR_INFO,
1306
0
                    nvmx->intr.intr_info);
1307
0
        break;
1308
0
1309
0
    case X86_EVENTTYPE_HW_EXCEPTION:
1310
0
    case X86_EVENTTYPE_SW_INTERRUPT:
1311
0
    case X86_EVENTTYPE_SW_EXCEPTION:
1312
0
        /* throw to L1 */
1313
0
        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
1314
0
        set_vvmcs(v, VM_EXIT_INTR_ERROR_CODE, nvmx->intr.error_code);
1315
0
        break;
1316
0
    case X86_EVENTTYPE_NMI:
1317
0
        set_vvmcs(v, VM_EXIT_REASON, EXIT_REASON_EXCEPTION_NMI);
1318
0
        set_vvmcs(v, EXIT_QUALIFICATION, 0);
1319
0
        set_vvmcs(v, VM_EXIT_INTR_INFO, nvmx->intr.intr_info);
1320
0
        break;
1321
0
    default:
1322
0
        gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
1323
0
               nvmx->intr.intr_info); 
1324
0
        break;
1325
0
    }
1326
0
}
1327
1328
static void nvmx_update_apicv(struct vcpu *v)
1329
0
{
1330
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1331
0
    unsigned long reason = get_vvmcs(v, VM_EXIT_REASON);
1332
0
    uint32_t intr_info = get_vvmcs(v, VM_EXIT_INTR_INFO);
1333
0
1334
0
    if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
1335
0
         nvmx->intr.source == hvm_intsrc_lapic &&
1336
0
         (intr_info & INTR_INFO_VALID_MASK) )
1337
0
    {
1338
0
        uint16_t status;
1339
0
        uint32_t rvi, ppr;
1340
0
        uint32_t vector = intr_info & 0xff;
1341
0
        struct vlapic *vlapic = vcpu_vlapic(v);
1342
0
1343
0
        vlapic_ack_pending_irq(v, vector, 1);
1344
0
1345
0
        ppr = vlapic_set_ppr(vlapic);
1346
0
        WARN_ON((ppr & 0xf0) != (vector & 0xf0));
1347
0
1348
0
        status = vector << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
1349
0
        rvi = vlapic_has_pending_irq(v);
1350
0
        if ( rvi != -1 )
1351
0
            status |= rvi & VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
1352
0
1353
0
        __vmwrite(GUEST_INTR_STATUS, status);
1354
0
    }
1355
0
}
1356
1357
static void virtual_vmexit(struct cpu_user_regs *regs)
1358
0
{
1359
0
    struct vcpu *v = current;
1360
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1361
0
    unsigned long lm_l1, lm_l2;
1362
0
1363
0
    sync_vvmcs_ro(v);
1364
0
    sync_vvmcs_guest_state(v, regs);
1365
0
    sync_exception_state(v);
1366
0
1367
0
    if ( nvmx_ept_enabled(v) && hvm_pae_enabled(v) &&
1368
0
         !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
1369
0
        shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
1370
0
1371
0
    /* This will clear current pCPU bit in p2m->dirty_cpumask */
1372
0
    np2m_schedule(NP2M_SCHEDLE_OUT);
1373
0
1374
0
    vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa);
1375
0
1376
0
    nestedhvm_vcpu_exit_guestmode(v);
1377
0
    nvcpu->nv_vmexit_pending = 0;
1378
0
    nvcpu->nv_vmswitch_in_progress = 1;
1379
0
1380
0
    lm_l2 = hvm_long_mode_active(v);
1381
0
    lm_l1 = !!(get_vvmcs(v, VM_EXIT_CONTROLS) & VM_EXIT_IA32E_MODE);
1382
0
1383
0
    if ( lm_l1 )
1384
0
        v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME;
1385
0
    else
1386
0
        v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME);
1387
0
1388
0
    vmx_update_cpu_exec_control(v);
1389
0
    vmx_update_secondary_exec_control(v);
1390
0
    vmx_update_exception_bitmap(v);
1391
0
1392
0
    load_vvmcs_host_state(v);
1393
0
1394
0
    if ( lm_l1 != lm_l2 )
1395
0
        paging_update_paging_modes(v);
1396
0
1397
0
    regs->rip = get_vvmcs(v, HOST_RIP);
1398
0
    regs->rsp = get_vvmcs(v, HOST_RSP);
1399
0
    /* VM exit clears all bits except bit 1 */
1400
0
    regs->rflags = X86_EFLAGS_MBS;
1401
0
1402
0
    /* updating host cr0 to sync TS bit */
1403
0
    __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
1404
0
1405
0
    if ( cpu_has_vmx_virtual_intr_delivery )
1406
0
        nvmx_update_apicv(v);
1407
0
1408
0
    nvcpu->nv_vmswitch_in_progress = 0;
1409
0
    vmsucceed(regs);
1410
0
}
1411
1412
static void nvmx_eptp_update(void)
1413
9.93M
{
1414
9.93M
    struct vcpu *curr = current;
1415
9.93M
1416
9.93M
    if ( !nestedhvm_vcpu_in_guestmode(curr) ||
1417
0
          vcpu_nestedhvm(curr).nv_vmexit_pending ||
1418
0
         !vcpu_nestedhvm(curr).stale_np2m ||
1419
0
         !nestedhvm_paging_mode_hap(curr) )
1420
9.78M
        return;
1421
9.93M
1422
9.93M
    /*
1423
9.93M
     * Interrupts are enabled here, so we need to clear stale_np2m
1424
9.93M
     * before we do the vmwrite.  If we do it in the other order, an
1425
9.93M
     * and IPI comes in changing the shadow eptp after the vmwrite,
1426
9.93M
     * we'll complete the vmenter with a stale eptp value.
1427
9.93M
     */
1428
150k
    vcpu_nestedhvm(curr).stale_np2m = false;
1429
150k
    __vmwrite(EPT_POINTER, get_shadow_eptp(curr));
1430
150k
}
1431
1432
void nvmx_switch_guest(void)
1433
9.97M
{
1434
9.97M
    struct vcpu *v = current;
1435
9.97M
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1436
9.97M
    struct cpu_user_regs *regs = guest_cpu_user_regs();
1437
9.97M
1438
9.97M
    nvmx_eptp_update();
1439
9.97M
1440
9.97M
    /*
1441
9.97M
     * A pending IO emulation may still be not finished. In this case, no
1442
9.97M
     * virtual vmswitch is allowed. Or else, the following IO emulation will
1443
9.97M
     * be handled in a wrong VCPU context. If there are no IO backends - PVH
1444
9.97M
     * guest by itself or a PVH guest with an HVM guest running inside - we
1445
9.97M
     * don't want to continue as this setup is not implemented nor supported
1446
9.97M
     * as of right now.
1447
9.97M
     */
1448
9.97M
    if ( hvm_io_pending(v) )
1449
391
        return;
1450
9.97M
    /*
1451
9.97M
     * a softirq may interrupt us between a virtual vmentry is
1452
9.97M
     * just handled and the true vmentry. If during this window,
1453
9.97M
     * a L1 virtual interrupt causes another virtual vmexit, we
1454
9.97M
     * cannot let that happen or VM_ENTRY_INTR_INFO will be lost.
1455
9.97M
     */
1456
9.97M
    if ( unlikely(nvcpu->nv_vmswitch_in_progress) )
1457
0
        return;
1458
9.97M
1459
9.97M
    if ( nestedhvm_vcpu_in_guestmode(v) && nvcpu->nv_vmexit_pending )
1460
0
        virtual_vmexit(regs);
1461
9.97M
    else if ( !nestedhvm_vcpu_in_guestmode(v) && nvcpu->nv_vmentry_pending )
1462
0
        virtual_vmentry(regs);
1463
9.97M
}
1464
1465
/*
1466
 * VMX instructions handling
1467
 */
1468
1469
int nvmx_handle_vmxon(struct cpu_user_regs *regs)
1470
0
{
1471
0
    struct vcpu *v=current;
1472
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1473
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1474
0
    struct vmx_inst_decoded decode;
1475
0
    unsigned long gpa = 0;
1476
0
    uint32_t nvmcs_revid;
1477
0
    int rc;
1478
0
1479
0
    rc = decode_vmx_inst(regs, &decode, &gpa, 1);
1480
0
    if ( rc != X86EMUL_OKAY )
1481
0
        return rc;
1482
0
1483
0
    if ( nvmx_vcpu_in_vmx(v) )
1484
0
    {
1485
0
        vmfail(regs, VMX_INSN_VMXON_IN_VMX_ROOT);
1486
0
        return X86EMUL_OKAY;
1487
0
    }
1488
0
1489
0
    if ( (gpa & ~PAGE_MASK) || !gfn_valid(v->domain, _gfn(gpa >> PAGE_SHIFT)) )
1490
0
    {
1491
0
        vmfail_invalid(regs);
1492
0
        return X86EMUL_OKAY;
1493
0
    }
1494
0
1495
0
    rc = hvm_copy_from_guest_phys(&nvmcs_revid, gpa, sizeof(nvmcs_revid));
1496
0
    if ( rc != HVMTRANS_okay ||
1497
0
         (nvmcs_revid & ~VMX_BASIC_REVISION_MASK) ||
1498
0
         ((nvmcs_revid ^ vmx_basic_msr) & VMX_BASIC_REVISION_MASK) )
1499
0
    {
1500
0
        vmfail_invalid(regs);
1501
0
        return X86EMUL_OKAY;
1502
0
    }
1503
0
1504
0
    nvmx->vmxon_region_pa = gpa;
1505
0
1506
0
    /*
1507
0
     * `fork' the host vmcs to shadow_vmcs
1508
0
     * vmcs_lock is not needed since we are on current
1509
0
     */
1510
0
    nvcpu->nv_n1vmcx_pa = v->arch.hvm_vmx.vmcs_pa;
1511
0
    __vmpclear(v->arch.hvm_vmx.vmcs_pa);
1512
0
    copy_domain_page(_mfn(PFN_DOWN(nvcpu->nv_n2vmcx_pa)),
1513
0
                     _mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa)));
1514
0
    __vmptrld(v->arch.hvm_vmx.vmcs_pa);
1515
0
    v->arch.hvm_vmx.launched = 0;
1516
0
    vmsucceed(regs);
1517
0
1518
0
    return X86EMUL_OKAY;
1519
0
}
1520
1521
int nvmx_handle_vmxoff(struct cpu_user_regs *regs)
1522
0
{
1523
0
    struct vcpu *v=current;
1524
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1525
0
    int rc;
1526
0
1527
0
    rc = vmx_inst_check_privilege(regs, 0);
1528
0
    if ( rc != X86EMUL_OKAY )
1529
0
        return rc;
1530
0
1531
0
    nvmx_purge_vvmcs(v);
1532
0
    nvmx->vmxon_region_pa = INVALID_PADDR;
1533
0
1534
0
    vmsucceed(regs);
1535
0
    return X86EMUL_OKAY;
1536
0
}
1537
1538
static bool_t vvmcs_launched(struct list_head *launched_list,
1539
                             unsigned long vvmcs_mfn)
1540
0
{
1541
0
    struct vvmcs_list *vvmcs;
1542
0
    struct list_head *pos;
1543
0
    bool_t launched = 0;
1544
0
1545
0
    list_for_each(pos, launched_list)
1546
0
    {
1547
0
        vvmcs = list_entry(pos, struct vvmcs_list, node);
1548
0
        if ( vvmcs_mfn == vvmcs->vvmcs_mfn )
1549
0
        {
1550
0
            launched = 1;
1551
0
            break;
1552
0
        }
1553
0
    }
1554
0
1555
0
    return launched;
1556
0
}
1557
1558
static int set_vvmcs_launched(struct list_head *launched_list,
1559
                              unsigned long vvmcs_mfn)
1560
0
{
1561
0
    struct vvmcs_list *vvmcs;
1562
0
1563
0
    if ( vvmcs_launched(launched_list, vvmcs_mfn) )
1564
0
        return 0;
1565
0
1566
0
    vvmcs = xzalloc(struct vvmcs_list);
1567
0
    if ( !vvmcs )
1568
0
        return -ENOMEM;
1569
0
1570
0
    vvmcs->vvmcs_mfn = vvmcs_mfn;
1571
0
    list_add(&vvmcs->node, launched_list);
1572
0
1573
0
    return 0;
1574
0
}
1575
1576
static void clear_vvmcs_launched(struct list_head *launched_list,
1577
                                 paddr_t vvmcs_mfn)
1578
0
{
1579
0
    struct vvmcs_list *vvmcs;
1580
0
    struct list_head *pos;
1581
0
1582
0
    list_for_each(pos, launched_list)
1583
0
    {
1584
0
        vvmcs = list_entry(pos, struct vvmcs_list, node);
1585
0
        if ( vvmcs_mfn == vvmcs->vvmcs_mfn )
1586
0
        {
1587
0
            list_del(&vvmcs->node);
1588
0
            xfree(vvmcs);
1589
0
            break;
1590
0
        }
1591
0
    }
1592
0
}
1593
1594
static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
1595
0
{
1596
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1597
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1598
0
1599
0
    /* check VMCS is valid and IO BITMAP is set */
1600
0
    if ( (nvcpu->nv_vvmcxaddr != INVALID_PADDR) &&
1601
0
            ((nvmx->iobitmap[0] && nvmx->iobitmap[1]) ||
1602
0
            !(__n2_exec_control(v) & CPU_BASED_ACTIVATE_IO_BITMAP) ) )
1603
0
        nvcpu->nv_vmentry_pending = 1;
1604
0
    else
1605
0
        vmfail_invalid(regs);
1606
0
1607
0
    return X86EMUL_OKAY;
1608
0
}
1609
1610
int nvmx_handle_vmresume(struct cpu_user_regs *regs)
1611
0
{
1612
0
    bool_t launched;
1613
0
    struct vcpu *v = current;
1614
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1615
0
    unsigned long intr_shadow;
1616
0
    int rc = vmx_inst_check_privilege(regs, 0);
1617
0
1618
0
    if ( rc != X86EMUL_OKAY )
1619
0
        return rc;
1620
0
1621
0
    if ( vcpu_nestedhvm(v).nv_vvmcxaddr == INVALID_PADDR )
1622
0
    {
1623
0
        vmfail_invalid(regs);
1624
0
        return X86EMUL_OKAY;        
1625
0
    }
1626
0
1627
0
    __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
1628
0
    if ( intr_shadow & VMX_INTR_SHADOW_MOV_SS )
1629
0
    {
1630
0
        vmfail_valid(regs, VMX_INSN_VMENTRY_BLOCKED_BY_MOV_SS);
1631
0
        return X86EMUL_OKAY;
1632
0
    }
1633
0
1634
0
    launched = vvmcs_launched(&nvmx->launched_list,
1635
0
                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
1636
0
    if ( !launched )
1637
0
    {
1638
0
        vmfail_valid(regs, VMX_INSN_VMRESUME_NONLAUNCHED_VMCS);
1639
0
        return X86EMUL_OKAY;
1640
0
    }
1641
0
    return nvmx_vmresume(v,regs);
1642
0
}
1643
1644
int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
1645
0
{
1646
0
    bool_t launched;
1647
0
    struct vcpu *v = current;
1648
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1649
0
    unsigned long intr_shadow;
1650
0
    int rc = vmx_inst_check_privilege(regs, 0);
1651
0
1652
0
    if ( rc != X86EMUL_OKAY )
1653
0
        return rc;
1654
0
1655
0
    if ( vcpu_nestedhvm(v).nv_vvmcxaddr == INVALID_PADDR )
1656
0
    {
1657
0
        vmfail_invalid(regs);
1658
0
        return X86EMUL_OKAY;
1659
0
    }
1660
0
1661
0
    __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
1662
0
    if ( intr_shadow & VMX_INTR_SHADOW_MOV_SS )
1663
0
    {
1664
0
        vmfail_valid(regs, VMX_INSN_VMENTRY_BLOCKED_BY_MOV_SS);
1665
0
        return X86EMUL_OKAY;
1666
0
    }
1667
0
1668
0
    launched = vvmcs_launched(&nvmx->launched_list,
1669
0
                              PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
1670
0
    if ( launched )
1671
0
    {
1672
0
        vmfail_valid(regs, VMX_INSN_VMLAUNCH_NONCLEAR_VMCS);
1673
0
        return X86EMUL_OKAY;
1674
0
    }
1675
0
    else {
1676
0
        rc = nvmx_vmresume(v,regs);
1677
0
        if ( rc == X86EMUL_OKAY )
1678
0
        {
1679
0
            if ( set_vvmcs_launched(&nvmx->launched_list,
1680
0
                                    PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr)) < 0 )
1681
0
                return X86EMUL_UNHANDLEABLE;
1682
0
        }
1683
0
    }
1684
0
    return rc;
1685
0
}
1686
1687
int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
1688
0
{
1689
0
    struct vcpu *v = current;
1690
0
    struct vmx_inst_decoded decode;
1691
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1692
0
    unsigned long gpa = 0;
1693
0
    int rc;
1694
0
1695
0
    rc = decode_vmx_inst(regs, &decode, &gpa, 0);
1696
0
    if ( rc != X86EMUL_OKAY )
1697
0
        return rc;
1698
0
1699
0
    if ( gpa == vcpu_2_nvmx(v).vmxon_region_pa || gpa & 0xfff )
1700
0
    {
1701
0
        vmfail_invalid(regs);
1702
0
        goto out;
1703
0
    }
1704
0
1705
0
    if ( nvcpu->nv_vvmcxaddr != gpa )
1706
0
        nvmx_purge_vvmcs(v);
1707
0
1708
0
    if ( nvcpu->nv_vvmcxaddr == INVALID_PADDR )
1709
0
    {
1710
0
        bool_t writable;
1711
0
        void *vvmcx = hvm_map_guest_frame_rw(paddr_to_pfn(gpa), 1, &writable);
1712
0
1713
0
        if ( vvmcx )
1714
0
        {
1715
0
            if ( writable )
1716
0
            {
1717
0
                struct vmcs_struct *vvmcs = vvmcx;
1718
0
1719
0
                if ( ((vvmcs->vmcs_revision_id ^ vmx_basic_msr) &
1720
0
                                         VMX_BASIC_REVISION_MASK) ||
1721
0
                     (!cpu_has_vmx_vmcs_shadowing &&
1722
0
                      (vvmcs->vmcs_revision_id & ~VMX_BASIC_REVISION_MASK)) )
1723
0
                {
1724
0
                    hvm_unmap_guest_frame(vvmcx, 1);
1725
0
                    vmfail(regs, VMX_INSN_VMPTRLD_INCORRECT_VMCS_ID);
1726
0
1727
0
                    return X86EMUL_OKAY;
1728
0
                }
1729
0
                nvcpu->nv_vvmcx = vvmcx;
1730
0
                nvcpu->nv_vvmcxaddr = gpa;
1731
0
                v->arch.hvm_vmx.vmcs_shadow_maddr =
1732
0
                    pfn_to_paddr(domain_page_map_to_mfn(vvmcx));
1733
0
            }
1734
0
            else
1735
0
            {
1736
0
                hvm_unmap_guest_frame(vvmcx, 1);
1737
0
                vvmcx = NULL;
1738
0
            }
1739
0
        }
1740
0
        if ( !vvmcx ||
1741
0
             !map_io_bitmap_all(v) ||
1742
0
             !_map_msr_bitmap(v) )
1743
0
        {
1744
0
            vmfail_valid(regs, VMX_INSN_VMPTRLD_INVALID_PHYADDR);
1745
0
            goto out;
1746
0
        }
1747
0
    }
1748
0
1749
0
    if ( cpu_has_vmx_vmcs_shadowing )
1750
0
        nvmx_set_vmcs_pointer(v, nvcpu->nv_vvmcx);
1751
0
1752
0
    vmsucceed(regs);
1753
0
1754
0
out:
1755
0
    return X86EMUL_OKAY;
1756
0
}
1757
1758
int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
1759
0
{
1760
0
    struct vcpu *v = current;
1761
0
    struct vmx_inst_decoded decode;
1762
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1763
0
    pagefault_info_t pfinfo;
1764
0
    unsigned long gpa = 0;
1765
0
    int rc;
1766
0
1767
0
    rc = decode_vmx_inst(regs, &decode, &gpa, 0);
1768
0
    if ( rc != X86EMUL_OKAY )
1769
0
        return rc;
1770
0
1771
0
    gpa = nvcpu->nv_vvmcxaddr;
1772
0
1773
0
    rc = hvm_copy_to_guest_linear(decode.mem, &gpa, decode.len, 0, &pfinfo);
1774
0
    if ( rc == HVMTRANS_bad_linear_to_gfn )
1775
0
        hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
1776
0
    if ( rc != HVMTRANS_okay )
1777
0
        return X86EMUL_EXCEPTION;
1778
0
1779
0
    vmsucceed(regs);
1780
0
    return X86EMUL_OKAY;
1781
0
}
1782
1783
int nvmx_handle_vmclear(struct cpu_user_regs *regs)
1784
0
{
1785
0
    struct vcpu *v = current;
1786
0
    struct vmx_inst_decoded decode;
1787
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
1788
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
1789
0
    unsigned long gpa = 0;
1790
0
    void *vvmcs;
1791
0
    int rc;
1792
0
1793
0
    rc = decode_vmx_inst(regs, &decode, &gpa, 0);
1794
0
    if ( rc != X86EMUL_OKAY )
1795
0
        return rc;
1796
0
1797
0
    BUILD_BUG_ON(X86EMUL_OKAY != VMSUCCEED); /* rc = VMSUCCEED; */
1798
0
    if ( gpa & 0xfff )
1799
0
        rc = VMFAIL_INVALID;
1800
0
    else if ( gpa == nvcpu->nv_vvmcxaddr )
1801
0
    {
1802
0
        if ( cpu_has_vmx_vmcs_shadowing )
1803
0
            nvmx_clear_vmcs_pointer(v, nvcpu->nv_vvmcx);
1804
0
        clear_vvmcs_launched(&nvmx->launched_list,
1805
0
                             PFN_DOWN(v->arch.hvm_vmx.vmcs_shadow_maddr));
1806
0
        nvmx_purge_vvmcs(v);
1807
0
    }
1808
0
    else 
1809
0
    {
1810
0
        /* Even if this VMCS isn't the current one, we must clear it. */
1811
0
        bool_t writable;
1812
0
1813
0
        vvmcs = hvm_map_guest_frame_rw(paddr_to_pfn(gpa), 0, &writable);
1814
0
        if ( vvmcs ) 
1815
0
        {
1816
0
            if ( writable )
1817
0
                clear_vvmcs_launched(&nvmx->launched_list,
1818
0
                                     domain_page_map_to_mfn(vvmcs));
1819
0
            else
1820
0
                rc = VMFAIL_VALID;
1821
0
            hvm_unmap_guest_frame(vvmcs, 0);
1822
0
        }
1823
0
    }
1824
0
1825
0
    if ( rc == VMSUCCEED )
1826
0
        vmsucceed(regs);
1827
0
    else if ( rc == VMFAIL_VALID )
1828
0
        vmfail_valid(regs, VMX_INSN_VMCLEAR_INVALID_PHYADDR);
1829
0
    else
1830
0
        vmfail_invalid(regs);
1831
0
1832
0
    return X86EMUL_OKAY;
1833
0
}
1834
1835
int nvmx_handle_vmread(struct cpu_user_regs *regs)
1836
0
{
1837
0
    struct vcpu *v = current;
1838
0
    struct vmx_inst_decoded decode;
1839
0
    pagefault_info_t pfinfo;
1840
0
    u64 value = 0;
1841
0
    int rc;
1842
0
1843
0
    rc = decode_vmx_inst(regs, &decode, NULL, 0);
1844
0
    if ( rc != X86EMUL_OKAY )
1845
0
        return rc;
1846
0
1847
0
    if ( vcpu_nestedhvm(v).nv_vvmcxaddr == INVALID_PADDR )
1848
0
    {
1849
0
        vmfail_invalid(regs);
1850
0
        return X86EMUL_OKAY;
1851
0
    }
1852
0
1853
0
    rc = get_vvmcs_safe(v, reg_read(regs, decode.reg2), &value);
1854
0
    if ( rc != VMX_INSN_SUCCEED )
1855
0
    {
1856
0
        vmfail(regs, rc);
1857
0
        return X86EMUL_OKAY;
1858
0
    }
1859
0
1860
0
    switch ( decode.type ) {
1861
0
    case VMX_INST_MEMREG_TYPE_MEMORY:
1862
0
        rc = hvm_copy_to_guest_linear(decode.mem, &value, decode.len, 0, &pfinfo);
1863
0
        if ( rc == HVMTRANS_bad_linear_to_gfn )
1864
0
            hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
1865
0
        if ( rc != HVMTRANS_okay )
1866
0
            return X86EMUL_EXCEPTION;
1867
0
        break;
1868
0
    case VMX_INST_MEMREG_TYPE_REG:
1869
0
        reg_write(regs, decode.reg1, value);
1870
0
        break;
1871
0
    }
1872
0
1873
0
    vmsucceed(regs);
1874
0
    return X86EMUL_OKAY;
1875
0
}
1876
1877
int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
1878
0
{
1879
0
    struct vcpu *v = current;
1880
0
    struct vmx_inst_decoded decode;
1881
0
    unsigned long operand; 
1882
0
    u64 vmcs_encoding;
1883
0
    bool_t okay = 1;
1884
0
    enum vmx_insn_errno err;
1885
0
1886
0
    if ( decode_vmx_inst(regs, &decode, &operand, 0)
1887
0
             != X86EMUL_OKAY )
1888
0
        return X86EMUL_EXCEPTION;
1889
0
1890
0
    if ( vcpu_nestedhvm(v).nv_vvmcxaddr == INVALID_PADDR )
1891
0
    {
1892
0
        vmfail_invalid(regs);
1893
0
        return X86EMUL_OKAY;
1894
0
    }
1895
0
1896
0
    vmcs_encoding = reg_read(regs, decode.reg2);
1897
0
    err = set_vvmcs_safe(v, vmcs_encoding, operand);
1898
0
    if ( err != VMX_INSN_SUCCEED )
1899
0
    {
1900
0
        vmfail(regs, err);
1901
0
        return X86EMUL_OKAY;
1902
0
    }
1903
0
1904
0
    switch ( vmcs_encoding & ~VMCS_HIGH(0) )
1905
0
    {
1906
0
    case IO_BITMAP_A:
1907
0
        okay = _map_io_bitmap(v, IO_BITMAP_A);
1908
0
        break;
1909
0
    case IO_BITMAP_B:
1910
0
        okay = _map_io_bitmap(v, IO_BITMAP_B);
1911
0
        break;
1912
0
    case MSR_BITMAP:
1913
0
        okay = _map_msr_bitmap(v);
1914
0
        break;
1915
0
    }
1916
0
1917
0
    if ( okay )
1918
0
        vmsucceed(regs);
1919
0
    else
1920
0
        vmfail_valid(regs, VMX_INSN_UNSUPPORTED_VMCS_COMPONENT);
1921
0
1922
0
    return X86EMUL_OKAY;
1923
0
}
1924
1925
int nvmx_handle_invept(struct cpu_user_regs *regs)
1926
0
{
1927
0
    struct vmx_inst_decoded decode;
1928
0
    unsigned long eptp;
1929
0
    int ret;
1930
0
1931
0
    if ( (ret = decode_vmx_inst(regs, &decode, &eptp, 0)) != X86EMUL_OKAY )
1932
0
        return ret;
1933
0
1934
0
    switch ( reg_read(regs, decode.reg2) )
1935
0
    {
1936
0
    case INVEPT_SINGLE_CONTEXT:
1937
0
    {
1938
0
        np2m_flush_base(current, eptp);
1939
0
        break;
1940
0
    }
1941
0
    case INVEPT_ALL_CONTEXT:
1942
0
        p2m_flush_nestedp2m(current->domain);
1943
0
        __invept(INVEPT_ALL_CONTEXT, 0, 0);
1944
0
        break;
1945
0
    default:
1946
0
        vmfail_invalid(regs);
1947
0
        return X86EMUL_OKAY;
1948
0
    }
1949
0
    vmsucceed(regs);
1950
0
    return X86EMUL_OKAY;
1951
0
}
1952
1953
int nvmx_handle_invvpid(struct cpu_user_regs *regs)
1954
0
{
1955
0
    struct vmx_inst_decoded decode;
1956
0
    unsigned long vpid;
1957
0
    int ret;
1958
0
1959
0
    if ( (ret = decode_vmx_inst(regs, &decode, &vpid, 0)) != X86EMUL_OKAY )
1960
0
        return ret;
1961
0
1962
0
    switch ( reg_read(regs, decode.reg2) )
1963
0
    {
1964
0
    /* Just invalidate all tlb entries for all types! */
1965
0
    case INVVPID_INDIVIDUAL_ADDR:
1966
0
    case INVVPID_SINGLE_CONTEXT:
1967
0
    case INVVPID_ALL_CONTEXT:
1968
0
        hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(current).nv_n2asid);
1969
0
        break;
1970
0
    default:
1971
0
        vmfail_invalid(regs);
1972
0
        return X86EMUL_OKAY;
1973
0
    }
1974
0
1975
0
    vmsucceed(regs);
1976
0
    return X86EMUL_OKAY;
1977
0
}
1978
1979
#define __emul_value(enable1, default1) \
1980
0
    ((enable1 | default1) << 32 | (default1))
1981
1982
#define gen_vmx_msr(enable1, default1, host_value) \
1983
0
    (((__emul_value(enable1, default1) & host_value) & (~0ul << 32)) | \
1984
0
    ((uint32_t)(__emul_value(enable1, default1) | host_value)))
1985
1986
/*
1987
 * Capability reporting
1988
 */
1989
int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
1990
0
{
1991
0
    struct vcpu *v = current;
1992
0
    struct domain *d = v->domain;
1993
0
    u64 data = 0, host_data = 0;
1994
0
    int r = 1;
1995
0
1996
0
    /* VMX capablity MSRs are available only when guest supports VMX. */
1997
0
    if ( !nestedhvm_enabled(d) || !d->arch.cpuid->basic.vmx )
1998
0
        return 0;
1999
0
2000
0
    /*
2001
0
     * These MSRs are only available when flags in other MSRs are set.
2002
0
     * These prerequisites are listed in the Intel 64 and IA-32
2003
0
     * Architectures Software Developer’s Manual, Vol 3, Appendix A.
2004
0
     */
2005
0
    switch ( msr )
2006
0
    {
2007
0
    case MSR_IA32_VMX_PROCBASED_CTLS2:
2008
0
        if ( !cpu_has_vmx_secondary_exec_control )
2009
0
            return 0;
2010
0
        break;
2011
0
2012
0
    case MSR_IA32_VMX_EPT_VPID_CAP:
2013
0
        if ( !(cpu_has_vmx_ept || cpu_has_vmx_vpid) )
2014
0
            return 0;
2015
0
        break;
2016
0
2017
0
    case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2018
0
    case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2019
0
    case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2020
0
    case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2021
0
        if ( !(vmx_basic_msr & VMX_BASIC_DEFAULT1_ZERO) )
2022
0
            return 0;
2023
0
        break;
2024
0
2025
0
    case MSR_IA32_VMX_VMFUNC:
2026
0
        if ( !cpu_has_vmx_vmfunc )
2027
0
            return 0;
2028
0
        break;
2029
0
    }
2030
0
2031
0
    rdmsrl(msr, host_data);
2032
0
2033
0
    /*
2034
0
     * Remove unsupport features from n1 guest capability MSR
2035
0
     */
2036
0
    switch (msr) {
2037
0
    case MSR_IA32_VMX_BASIC:
2038
0
    {
2039
0
        const struct vmcs_struct *vmcs =
2040
0
            map_domain_page(_mfn(PFN_DOWN(v->arch.hvm_vmx.vmcs_pa)));
2041
0
2042
0
        data = (host_data & (~0ul << 32)) |
2043
0
               (vmcs->vmcs_revision_id & 0x7fffffff);
2044
0
        unmap_domain_page(vmcs);
2045
0
        break;
2046
0
    }
2047
0
    case MSR_IA32_VMX_PINBASED_CTLS:
2048
0
    case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2049
0
        /* 1-settings */
2050
0
        data = PIN_BASED_EXT_INTR_MASK |
2051
0
               PIN_BASED_NMI_EXITING |
2052
0
               PIN_BASED_PREEMPT_TIMER;
2053
0
        data = gen_vmx_msr(data, VMX_PINBASED_CTLS_DEFAULT1, host_data);
2054
0
        break;
2055
0
    case MSR_IA32_VMX_PROCBASED_CTLS:
2056
0
    case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2057
0
    {
2058
0
        u32 default1_bits = VMX_PROCBASED_CTLS_DEFAULT1;
2059
0
        /* 1-settings */
2060
0
        data = CPU_BASED_HLT_EXITING |
2061
0
               CPU_BASED_VIRTUAL_INTR_PENDING |
2062
0
               CPU_BASED_CR8_LOAD_EXITING |
2063
0
               CPU_BASED_CR8_STORE_EXITING |
2064
0
               CPU_BASED_INVLPG_EXITING |
2065
0
               CPU_BASED_CR3_LOAD_EXITING |
2066
0
               CPU_BASED_CR3_STORE_EXITING |
2067
0
               CPU_BASED_MONITOR_EXITING |
2068
0
               CPU_BASED_MWAIT_EXITING |
2069
0
               CPU_BASED_MOV_DR_EXITING |
2070
0
               CPU_BASED_ACTIVATE_IO_BITMAP |
2071
0
               CPU_BASED_USE_TSC_OFFSETING |
2072
0
               CPU_BASED_UNCOND_IO_EXITING |
2073
0
               CPU_BASED_RDTSC_EXITING |
2074
0
               CPU_BASED_MONITOR_TRAP_FLAG |
2075
0
               CPU_BASED_VIRTUAL_NMI_PENDING |
2076
0
               CPU_BASED_ACTIVATE_MSR_BITMAP |
2077
0
               CPU_BASED_PAUSE_EXITING |
2078
0
               CPU_BASED_RDPMC_EXITING |
2079
0
               CPU_BASED_TPR_SHADOW |
2080
0
               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2081
0
2082
0
        if ( msr == MSR_IA32_VMX_TRUE_PROCBASED_CTLS )
2083
0
            default1_bits &= ~(CPU_BASED_CR3_LOAD_EXITING |
2084
0
                               CPU_BASED_CR3_STORE_EXITING |
2085
0
                               CPU_BASED_INVLPG_EXITING);
2086
0
2087
0
        data = gen_vmx_msr(data, default1_bits, host_data);
2088
0
        break;
2089
0
    }
2090
0
    case MSR_IA32_VMX_PROCBASED_CTLS2:
2091
0
        /* 1-settings */
2092
0
        data = SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
2093
0
               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2094
0
               SECONDARY_EXEC_ENABLE_VPID |
2095
0
               SECONDARY_EXEC_UNRESTRICTED_GUEST |
2096
0
               SECONDARY_EXEC_ENABLE_EPT;
2097
0
        data = gen_vmx_msr(data, 0, host_data);
2098
0
        break;
2099
0
    case MSR_IA32_VMX_EXIT_CTLS:
2100
0
    case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2101
0
        /* 1-settings */
2102
0
        data = VM_EXIT_ACK_INTR_ON_EXIT |
2103
0
               VM_EXIT_IA32E_MODE |
2104
0
               VM_EXIT_SAVE_PREEMPT_TIMER |
2105
0
               VM_EXIT_SAVE_GUEST_PAT |
2106
0
               VM_EXIT_LOAD_HOST_PAT |
2107
0
               VM_EXIT_SAVE_GUEST_EFER |
2108
0
               VM_EXIT_LOAD_HOST_EFER |
2109
0
               VM_EXIT_LOAD_PERF_GLOBAL_CTRL;
2110
0
        data = gen_vmx_msr(data, VMX_EXIT_CTLS_DEFAULT1, host_data);
2111
0
        break;
2112
0
    case MSR_IA32_VMX_ENTRY_CTLS:
2113
0
    case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2114
0
        /* 1-settings */
2115
0
        data = VM_ENTRY_LOAD_GUEST_PAT |
2116
0
               VM_ENTRY_LOAD_GUEST_EFER |
2117
0
               VM_ENTRY_LOAD_PERF_GLOBAL_CTRL |
2118
0
               VM_ENTRY_IA32E_MODE;
2119
0
        data = gen_vmx_msr(data, VMX_ENTRY_CTLS_DEFAULT1, host_data);
2120
0
        break;
2121
0
2122
0
    case MSR_IA32_VMX_VMCS_ENUM:
2123
0
        /* The max index of VVMCS encoding is 0x1f. */
2124
0
        data = 0x1f << 1;
2125
0
        break;
2126
0
    case MSR_IA32_VMX_CR0_FIXED0:
2127
0
        /* PG, PE bits must be 1 in VMX operation */
2128
0
        data = X86_CR0_PE | X86_CR0_PG;
2129
0
        break;
2130
0
    case MSR_IA32_VMX_CR0_FIXED1:
2131
0
        /* allow 0-settings for all bits */
2132
0
        data = 0xffffffff;
2133
0
        break;
2134
0
    case MSR_IA32_VMX_CR4_FIXED0:
2135
0
        /* VMXE bit must be 1 in VMX operation */
2136
0
        data = X86_CR4_VMXE;
2137
0
        break;
2138
0
    case MSR_IA32_VMX_CR4_FIXED1:
2139
0
        data = hvm_cr4_guest_valid_bits(v, 0);
2140
0
        break;
2141
0
    case MSR_IA32_VMX_MISC:
2142
0
        /* Do not support CR3-target feature now */
2143
0
        data = host_data & ~VMX_MISC_CR3_TARGET;
2144
0
        break;
2145
0
    case MSR_IA32_VMX_EPT_VPID_CAP:
2146
0
        data = nept_get_ept_vpid_cap();
2147
0
        break;
2148
0
    default:
2149
0
        r = 0;
2150
0
        break;
2151
0
    }
2152
0
2153
0
    *msr_content = data;
2154
0
    return r;
2155
0
}
2156
2157
/* This function uses L2_gpa to walk the P2M page table in L1. If the
2158
 * walk is successful, the translated value is returned in
2159
 * L1_gpa. The result value tells what to do next.
2160
 */
2161
int
2162
nvmx_hap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
2163
                     unsigned int *page_order, uint8_t *p2m_acc,
2164
                     bool_t access_r, bool_t access_w, bool_t access_x)
2165
0
{
2166
0
    int rc;
2167
0
    unsigned long gfn;
2168
0
    uint64_t exit_qual;
2169
0
    uint32_t exit_reason = EXIT_REASON_EPT_VIOLATION;
2170
0
    uint32_t rwx_rights = (access_x << 2) | (access_w << 1) | access_r;
2171
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
2172
0
2173
0
    vmx_vmcs_enter(v);
2174
0
2175
0
    __vmread(EXIT_QUALIFICATION, &exit_qual);
2176
0
    rc = nept_translate_l2ga(v, L2_gpa, page_order, rwx_rights, &gfn, p2m_acc,
2177
0
                             &exit_qual, &exit_reason);
2178
0
    switch ( rc )
2179
0
    {
2180
0
    case EPT_TRANSLATE_SUCCEED:
2181
0
        *L1_gpa = (gfn << PAGE_SHIFT) + (L2_gpa & ~PAGE_MASK);
2182
0
        rc = NESTEDHVM_PAGEFAULT_DONE;
2183
0
        break;
2184
0
    case EPT_TRANSLATE_VIOLATION:
2185
0
    case EPT_TRANSLATE_MISCONFIG:
2186
0
        rc = NESTEDHVM_PAGEFAULT_INJECT;
2187
0
        nvmx->ept.exit_reason = exit_reason;
2188
0
        nvmx->ept.exit_qual = exit_qual;
2189
0
        break;
2190
0
    case EPT_TRANSLATE_RETRY:
2191
0
        rc = NESTEDHVM_PAGEFAULT_RETRY;
2192
0
        break;
2193
0
    default:
2194
0
        gdprintk(XENLOG_ERR, "GUEST EPT translation error!:%d\n", rc);
2195
0
        BUG();
2196
0
        break;
2197
0
    }
2198
0
2199
0
    vmx_vmcs_exit(v);
2200
0
2201
0
    return rc;
2202
0
}
2203
2204
void nvmx_idtv_handling(void)
2205
0
{
2206
0
    struct vcpu *v = current;
2207
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
2208
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
2209
0
    unsigned long idtv_info, reason;
2210
0
2211
0
    __vmread(IDT_VECTORING_INFO, &idtv_info);
2212
0
    if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
2213
0
        return;
2214
0
2215
0
    /*
2216
0
     * If L0 can solve the fault that causes idt vectoring, it should
2217
0
     * be reinjected, otherwise, pass to L1.
2218
0
     */
2219
0
    __vmread(VM_EXIT_REASON, &reason);
2220
0
    if ( reason != EXIT_REASON_EPT_VIOLATION ?
2221
0
         !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) :
2222
0
         !nvcpu->nv_vmexit_pending )
2223
0
    {
2224
0
        __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
2225
0
        if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
2226
0
        {
2227
0
            __vmread(IDT_VECTORING_ERROR_CODE, &reason);
2228
0
            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, reason);
2229
0
        }
2230
0
        /*
2231
0
         * SDM 23.2.4, if L1 tries to inject a software interrupt
2232
0
         * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
2233
0
         * the value of previous VM_ENTRY_INSTRUCTION_LEN.
2234
0
         *
2235
0
         * This means EXIT_INSTRUCTION_LEN is always valid here, for
2236
0
         * software interrupts both injected by L1, and generated in L2.
2237
0
         */
2238
0
        __vmread(VM_EXIT_INSTRUCTION_LEN, &reason);
2239
0
        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, reason);
2240
0
   }
2241
0
}
2242
2243
/*
2244
 * L2 VMExit handling
2245
 *    return 1: Done or skip the normal layer 0 hypervisor process.
2246
 *              Typically it requires layer 1 hypervisor processing
2247
 *              or it may be already processed here.
2248
 *           0: Require the normal layer 0 process.
2249
 */
2250
int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
2251
                               unsigned int exit_reason)
2252
0
{
2253
0
    struct vcpu *v = current;
2254
0
    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
2255
0
    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
2256
0
    u32 ctrl;
2257
0
2258
0
    nvcpu->nv_vmexit_pending = 0;
2259
0
    nvmx->intr.intr_info = 0;
2260
0
    nvmx->intr.error_code = 0;
2261
0
2262
0
    switch (exit_reason) {
2263
0
    case EXIT_REASON_EXCEPTION_NMI:
2264
0
    {
2265
0
        unsigned long intr_info;
2266
0
        u32 valid_mask = MASK_INSR(X86_EVENTTYPE_HW_EXCEPTION,
2267
0
                                  INTR_INFO_INTR_TYPE_MASK) |
2268
0
                         INTR_INFO_VALID_MASK;
2269
0
        u64 exec_bitmap;
2270
0
        int vector;
2271
0
2272
0
        __vmread(VM_EXIT_INTR_INFO, &intr_info);
2273
0
        vector = intr_info & INTR_INFO_VECTOR_MASK;
2274
0
        /*
2275
0
         * decided by L0 and L1 exception bitmap, if the vetor is set by
2276
0
         * both, L0 has priority on #PF and #NM, L1 has priority on others
2277
0
         */
2278
0
        if ( vector == TRAP_page_fault )
2279
0
        {
2280
0
            if ( paging_mode_hap(v->domain) )
2281
0
                nvcpu->nv_vmexit_pending = 1;
2282
0
        }
2283
0
        else if ( vector == TRAP_no_device )
2284
0
        {
2285
0
            if ( v->fpu_dirtied )
2286
0
                nvcpu->nv_vmexit_pending = 1;
2287
0
        }
2288
0
        else if ( (intr_info & valid_mask) == valid_mask )
2289
0
        {
2290
0
            exec_bitmap = get_vvmcs(v, EXCEPTION_BITMAP);
2291
0
2292
0
            if ( exec_bitmap & (1 << vector) )
2293
0
                nvcpu->nv_vmexit_pending = 1;
2294
0
        }
2295
0
        break;
2296
0
    }
2297
0
    case EXIT_REASON_WBINVD:
2298
0
    case EXIT_REASON_EPT_VIOLATION:
2299
0
    case EXIT_REASON_EPT_MISCONFIG:
2300
0
    case EXIT_REASON_EXTERNAL_INTERRUPT:
2301
0
        /* pass to L0 handler */
2302
0
        break;
2303
0
    case VMX_EXIT_REASONS_FAILED_VMENTRY:
2304
0
    case EXIT_REASON_TRIPLE_FAULT:
2305
0
    case EXIT_REASON_TASK_SWITCH:
2306
0
    case EXIT_REASON_CPUID:
2307
0
    case EXIT_REASON_VMCALL:
2308
0
    case EXIT_REASON_VMCLEAR:
2309
0
    case EXIT_REASON_VMLAUNCH:
2310
0
    case EXIT_REASON_VMPTRLD:
2311
0
    case EXIT_REASON_VMPTRST:
2312
0
    case EXIT_REASON_VMREAD:
2313
0
    case EXIT_REASON_VMRESUME:
2314
0
    case EXIT_REASON_VMWRITE:
2315
0
    case EXIT_REASON_VMXOFF:
2316
0
    case EXIT_REASON_VMXON:
2317
0
    case EXIT_REASON_INVEPT:
2318
0
    case EXIT_REASON_XSETBV:
2319
0
        /* inject to L1 */
2320
0
        nvcpu->nv_vmexit_pending = 1;
2321
0
        break;
2322
0
2323
0
    case EXIT_REASON_MSR_READ:
2324
0
    case EXIT_REASON_MSR_WRITE:
2325
0
        ctrl = __n2_exec_control(v);
2326
0
2327
0
        /* Without ACTIVATE_MSR_BITMAP, all MSRs are intercepted. */
2328
0
        if ( !(ctrl & CPU_BASED_ACTIVATE_MSR_BITMAP) )
2329
0
            nvcpu->nv_vmexit_pending = 1;
2330
0
        else if ( !nvmx->msrbitmap )
2331
0
            /* ACTIVATE_MSR_BITMAP set, but L2 bitmap not mapped??? */
2332
0
            domain_crash(v->domain);
2333
0
        else
2334
0
            nvcpu->nv_vmexit_pending =
2335
0
                vmx_msr_is_intercepted(nvmx->msrbitmap, regs->ecx,
2336
0
                                       exit_reason == EXIT_REASON_MSR_WRITE);
2337
0
        break;
2338
0
2339
0
    case EXIT_REASON_IO_INSTRUCTION:
2340
0
        ctrl = __n2_exec_control(v);
2341
0
        if ( ctrl & CPU_BASED_ACTIVATE_IO_BITMAP )
2342
0
        {
2343
0
            unsigned long qual;
2344
0
            u16 port, size;
2345
0
2346
0
            __vmread(EXIT_QUALIFICATION, &qual);
2347
0
            port = qual >> 16;
2348
0
            size = (qual & 7) + 1;
2349
0
            do {
2350
0
                const u8 *bitmap = nvmx->iobitmap[port >> 15];
2351
0
2352
0
                if ( bitmap[(port & 0x7fff) >> 3] & (1 << (port & 7)) )
2353
0
                    nvcpu->nv_vmexit_pending = 1;
2354
0
                if ( !--size )
2355
0
                    break;
2356
0
                if ( !++port )
2357
0
                    nvcpu->nv_vmexit_pending = 1;
2358
0
            } while ( !nvcpu->nv_vmexit_pending );
2359
0
            if ( !nvcpu->nv_vmexit_pending )
2360
0
                printk(XENLOG_G_WARNING "L0 PIO %04x\n", port);
2361
0
        }
2362
0
        else if ( ctrl & CPU_BASED_UNCOND_IO_EXITING )
2363
0
            nvcpu->nv_vmexit_pending = 1;
2364
0
        break;
2365
0
2366
0
    case EXIT_REASON_PENDING_VIRT_INTR:
2367
0
        ctrl = __n2_exec_control(v);
2368
0
        if ( ctrl & CPU_BASED_VIRTUAL_INTR_PENDING )
2369
0
            nvcpu->nv_vmexit_pending = 1;
2370
0
        break;
2371
0
    case EXIT_REASON_PENDING_VIRT_NMI:
2372
0
        ctrl = __n2_exec_control(v);
2373
0
        if ( ctrl & CPU_BASED_VIRTUAL_NMI_PENDING )
2374
0
            nvcpu->nv_vmexit_pending = 1;
2375
0
        break;
2376
0
    case EXIT_REASON_MONITOR_TRAP_FLAG:
2377
0
        ctrl = __n2_exec_control(v);
2378
0
        if ( ctrl & CPU_BASED_MONITOR_TRAP_FLAG)
2379
0
            nvcpu->nv_vmexit_pending = 1;
2380
0
        break;
2381
0
    case EXIT_REASON_ACCESS_GDTR_OR_IDTR:
2382
0
    case EXIT_REASON_ACCESS_LDTR_OR_TR:
2383
0
        ctrl = __n2_secondary_exec_control(v);
2384
0
        if ( ctrl & SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING )
2385
0
            nvcpu->nv_vmexit_pending = 1;
2386
0
        break;
2387
0
    case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED:
2388
0
        ctrl = __n2_pin_exec_control(v);
2389
0
        if ( ctrl & PIN_BASED_PREEMPT_TIMER )
2390
0
            nvcpu->nv_vmexit_pending = 1;
2391
0
        break;
2392
0
    /* L1 has priority handling several other types of exits */
2393
0
    case EXIT_REASON_HLT:
2394
0
        ctrl = __n2_exec_control(v);
2395
0
        if ( ctrl & CPU_BASED_HLT_EXITING )
2396
0
            nvcpu->nv_vmexit_pending = 1;
2397
0
        break;
2398
0
    case EXIT_REASON_RDTSC:
2399
0
        ctrl = __n2_exec_control(v);
2400
0
        if ( ctrl & CPU_BASED_RDTSC_EXITING )
2401
0
            nvcpu->nv_vmexit_pending = 1;
2402
0
        else
2403
0
        {
2404
0
            /*
2405
0
             * special handler is needed if L1 doesn't intercept rdtsc,
2406
0
             * avoiding changing guest_tsc and messing up timekeeping in L1
2407
0
             */
2408
0
            msr_split(regs, hvm_get_guest_tsc(v) + get_vvmcs(v, TSC_OFFSET));
2409
0
            update_guest_eip();
2410
0
2411
0
            return 1;
2412
0
        }
2413
0
        break;
2414
0
    case EXIT_REASON_RDPMC:
2415
0
        ctrl = __n2_exec_control(v);
2416
0
        if ( ctrl & CPU_BASED_RDPMC_EXITING )
2417
0
            nvcpu->nv_vmexit_pending = 1;
2418
0
        break;
2419
0
    case EXIT_REASON_MWAIT_INSTRUCTION:
2420
0
        ctrl = __n2_exec_control(v);
2421
0
        if ( ctrl & CPU_BASED_MWAIT_EXITING )
2422
0
            nvcpu->nv_vmexit_pending = 1;
2423
0
        break;
2424
0
    case EXIT_REASON_PAUSE_INSTRUCTION:
2425
0
        ctrl = __n2_exec_control(v);
2426
0
        if ( ctrl & CPU_BASED_PAUSE_EXITING )
2427
0
            nvcpu->nv_vmexit_pending = 1;
2428
0
        break;
2429
0
    case EXIT_REASON_MONITOR_INSTRUCTION:
2430
0
        ctrl = __n2_exec_control(v);
2431
0
        if ( ctrl & CPU_BASED_MONITOR_EXITING )
2432
0
            nvcpu->nv_vmexit_pending = 1;
2433
0
        break;
2434
0
    case EXIT_REASON_DR_ACCESS:
2435
0
        ctrl = __n2_exec_control(v);
2436
0
        if ( (ctrl & CPU_BASED_MOV_DR_EXITING) &&
2437
0
            v->arch.hvm_vcpu.flag_dr_dirty )
2438
0
            nvcpu->nv_vmexit_pending = 1;
2439
0
        break;
2440
0
    case EXIT_REASON_INVLPG:
2441
0
        ctrl = __n2_exec_control(v);
2442
0
        if ( ctrl & CPU_BASED_INVLPG_EXITING )
2443
0
            nvcpu->nv_vmexit_pending = 1;
2444
0
        break;
2445
0
    case EXIT_REASON_CR_ACCESS:
2446
0
    {
2447
0
        unsigned long exit_qualification;
2448
0
        int cr, write;
2449
0
        u32 mask = 0;
2450
0
2451
0
        __vmread(EXIT_QUALIFICATION, &exit_qualification);
2452
0
        cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification);
2453
0
        write = VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification);
2454
0
        /* also according to guest exec_control */
2455
0
        ctrl = __n2_exec_control(v);
2456
0
2457
0
        if ( cr == 3 )
2458
0
        {
2459
0
            mask = write? CPU_BASED_CR3_STORE_EXITING:
2460
0
                          CPU_BASED_CR3_LOAD_EXITING;
2461
0
            if ( ctrl & mask )
2462
0
                nvcpu->nv_vmexit_pending = 1;
2463
0
        }
2464
0
        else if ( cr == 8 )
2465
0
        {
2466
0
            mask = write? CPU_BASED_CR8_STORE_EXITING:
2467
0
                          CPU_BASED_CR8_LOAD_EXITING;
2468
0
            if ( ctrl & mask )
2469
0
                nvcpu->nv_vmexit_pending = 1;
2470
0
        }
2471
0
        else  /* CR0, CR4, CLTS, LMSW */
2472
0
        {
2473
0
            /*
2474
0
             * While getting the VM exit for CR0/CR4 access, check if L1 VMM owns
2475
0
             * the bit.
2476
0
             * If so, inject the VM exit to L1 VMM.
2477
0
             * Otherwise, L0 will handle it and sync the value to L1 virtual VMCS.
2478
0
             */
2479
0
            unsigned long old_val, val, changed_bits;
2480
0
            switch ( VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification) )
2481
0
            {
2482
0
            case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR:
2483
0
            {
2484
0
                unsigned long gp = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification);
2485
0
                unsigned long *reg;
2486
0
2487
0
                if ( (reg = decode_register(gp, guest_cpu_user_regs(), 0)) == NULL )
2488
0
                {
2489
0
                    gdprintk(XENLOG_ERR, "invalid gpr: %lx\n", gp);
2490
0
                    break;
2491
0
                }
2492
0
                val = *reg;
2493
0
                if ( cr == 0 )
2494
0
                {
2495
0
                    u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
2496
0
2497
0
                    __vmread(CR0_READ_SHADOW, &old_val);
2498
0
                    changed_bits = old_val ^ val;
2499
0
                    if ( changed_bits & cr0_gh_mask )
2500
0
                        nvcpu->nv_vmexit_pending = 1;
2501
0
                    else
2502
0
                    {
2503
0
                        u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
2504
0
2505
0
                        set_vvmcs(v, GUEST_CR0,
2506
0
                                  (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
2507
0
                    }
2508
0
                }
2509
0
                else if ( cr == 4 )
2510
0
                {
2511
0
                    u64 cr4_gh_mask = get_vvmcs(v, CR4_GUEST_HOST_MASK);
2512
0
2513
0
                    __vmread(CR4_READ_SHADOW, &old_val);
2514
0
                    changed_bits = old_val ^ val;
2515
0
                    if ( changed_bits & cr4_gh_mask )
2516
0
                        nvcpu->nv_vmexit_pending = 1;
2517
0
                    else
2518
0
                    {
2519
0
                        u64 guest_cr4 = get_vvmcs(v, GUEST_CR4);
2520
0
2521
0
                        set_vvmcs(v, GUEST_CR4,
2522
0
                                  (guest_cr4 & cr4_gh_mask) | (val & ~cr4_gh_mask));
2523
0
                    }
2524
0
                }
2525
0
                else
2526
0
                    nvcpu->nv_vmexit_pending = 1;
2527
0
                break;
2528
0
            }
2529
0
            case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
2530
0
            {
2531
0
                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
2532
0
2533
0
                if ( cr0_gh_mask & X86_CR0_TS )
2534
0
                    nvcpu->nv_vmexit_pending = 1;
2535
0
                else
2536
0
                {
2537
0
                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
2538
0
2539
0
                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & ~X86_CR0_TS));
2540
0
                }
2541
0
                break;
2542
0
            }
2543
0
            case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
2544
0
            {
2545
0
                u64 cr0_gh_mask = get_vvmcs(v, CR0_GUEST_HOST_MASK);
2546
0
2547
0
                __vmread(CR0_READ_SHADOW, &old_val);
2548
0
                old_val &= X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS;
2549
0
                val = VMX_CONTROL_REG_ACCESS_DATA(exit_qualification) &
2550
0
                      (X86_CR0_PE|X86_CR0_MP|X86_CR0_EM|X86_CR0_TS);
2551
0
                changed_bits = old_val ^ val;
2552
0
                if ( changed_bits & cr0_gh_mask )
2553
0
                    nvcpu->nv_vmexit_pending = 1;
2554
0
                else
2555
0
                {
2556
0
                    u64 guest_cr0 = get_vvmcs(v, GUEST_CR0);
2557
0
2558
0
                    set_vvmcs(v, GUEST_CR0, (guest_cr0 & cr0_gh_mask) | (val & ~cr0_gh_mask));
2559
0
                }
2560
0
                break;
2561
0
            }
2562
0
            default:
2563
0
                break;
2564
0
            }
2565
0
        }
2566
0
        break;
2567
0
    }
2568
0
    case EXIT_REASON_APIC_ACCESS:
2569
0
        ctrl = __n2_secondary_exec_control(v);
2570
0
        if ( ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES )
2571
0
            nvcpu->nv_vmexit_pending = 1;
2572
0
        break;
2573
0
    case EXIT_REASON_TPR_BELOW_THRESHOLD:
2574
0
        ctrl = __n2_exec_control(v);
2575
0
        if ( ctrl & CPU_BASED_TPR_SHADOW )
2576
0
            nvcpu->nv_vmexit_pending = 1;
2577
0
        break;
2578
0
    default:
2579
0
        gprintk(XENLOG_ERR, "Unexpected nested vmexit: reason %u\n",
2580
0
                exit_reason);
2581
0
    }
2582
0
2583
0
    return ( nvcpu->nv_vmexit_pending == 1 );
2584
0
}
2585
2586
void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
2587
0
{
2588
0
    unsigned long cr_field, read_shadow_field, mask_field;
2589
0
2590
0
    switch ( cr )
2591
0
    {
2592
0
    case 0:
2593
0
        cr_field = GUEST_CR0;
2594
0
        read_shadow_field = CR0_READ_SHADOW;
2595
0
        mask_field = CR0_GUEST_HOST_MASK;
2596
0
        break;
2597
0
    case 4:
2598
0
        cr_field = GUEST_CR4;
2599
0
        read_shadow_field = CR4_READ_SHADOW;
2600
0
        mask_field = CR4_GUEST_HOST_MASK;
2601
0
        break;
2602
0
    default:
2603
0
        gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr);
2604
0
        return;
2605
0
    }
2606
0
2607
0
    if ( !nestedhvm_vmswitch_in_progress(v) )
2608
0
    {
2609
0
        unsigned long virtual_cr_mask = 
2610
0
            get_vvmcs(v, mask_field);
2611
0
2612
0
        /*
2613
0
         * We get here when L2 changed cr in a way that did not change
2614
0
         * any of L1's shadowed bits (see nvmx_n2_vmexit_handler),
2615
0
         * but did change L0 shadowed bits. So we first calculate the
2616
0
         * effective cr value that L1 would like to write into the
2617
0
         * hardware. It consists of the L2-owned bits from the new
2618
0
         * value combined with the L1-owned bits from L1's guest cr.
2619
0
         */
2620
0
        v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask;
2621
0
        v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask &
2622
0
            get_vvmcs(v, cr_field);
2623
0
    }
2624
0
2625
0
    /* nvcpu.guest_cr is what L2 write to cr actually. */
2626
0
    __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]);
2627
0
}
2628
2629
/*
2630
 * Local variables:
2631
 * mode: C
2632
 * c-file-style: "BSD"
2633
 * c-basic-offset: 4
2634
 * tab-width: 4
2635
 * indent-tabs-mode: nil
2636
 * End:
2637
 */