Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/hvm/vmx/vmcs.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * vmcs.c: VMCS management
3
 * Copyright (c) 2004, Intel Corporation.
4
 *
5
 * This program is free software; you can redistribute it and/or modify it
6
 * under the terms and conditions of the GNU General Public License,
7
 * version 2, as published by the Free Software Foundation.
8
 *
9
 * This program is distributed in the hope it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12
 * more details.
13
 *
14
 * You should have received a copy of the GNU General Public License along with
15
 * this program; If not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
#include <xen/init.h>
19
#include <xen/mm.h>
20
#include <xen/lib.h>
21
#include <xen/errno.h>
22
#include <xen/domain_page.h>
23
#include <xen/event.h>
24
#include <xen/kernel.h>
25
#include <xen/keyhandler.h>
26
#include <xen/vm_event.h>
27
#include <asm/current.h>
28
#include <asm/cpufeature.h>
29
#include <asm/processor.h>
30
#include <asm/msr.h>
31
#include <asm/xstate.h>
32
#include <asm/hvm/hvm.h>
33
#include <asm/hvm/io.h>
34
#include <asm/hvm/support.h>
35
#include <asm/hvm/vmx/vmx.h>
36
#include <asm/hvm/vmx/vvmx.h>
37
#include <asm/hvm/vmx/vmcs.h>
38
#include <asm/flushtlb.h>
39
#include <asm/monitor.h>
40
#include <asm/shadow.h>
41
#include <asm/tboot.h>
42
#include <asm/apic.h>
43
44
static bool_t __read_mostly opt_vpid_enabled = 1;
45
boolean_param("vpid", opt_vpid_enabled);
46
47
static bool_t __read_mostly opt_unrestricted_guest_enabled = 1;
48
boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
49
50
static bool_t __read_mostly opt_apicv_enabled = 1;
51
boolean_param("apicv", opt_apicv_enabled);
52
53
/*
54
 * These two parameters are used to config the controls for Pause-Loop Exiting:
55
 * ple_gap:    upper bound on the amount of time between two successive
56
 *             executions of PAUSE in a loop.
57
 * ple_window: upper bound on the amount of time a guest is allowed to execute
58
 *             in a PAUSE loop.
59
 * Time is measured based on a counter that runs at the same rate as the TSC,
60
 * refer SDM volume 3b section 21.6.13 & 22.1.3.
61
 */
62
static unsigned int __read_mostly ple_gap = 128;
63
integer_param("ple_gap", ple_gap);
64
static unsigned int __read_mostly ple_window = 4096;
65
integer_param("ple_window", ple_window);
66
67
static bool_t __read_mostly opt_pml_enabled = 1;
68
static s8 __read_mostly opt_ept_ad = -1;
69
70
/*
71
 * The 'ept' parameter controls functionalities that depend on, or impact the
72
 * EPT mechanism. Optional comma separated value may contain:
73
 *
74
 *  pml                 Enable PML
75
 *  ad                  Use A/D bits
76
 */
77
static int __init parse_ept_param(const char *s)
78
0
{
79
0
    const char *ss;
80
0
    int rc = 0;
81
0
82
0
    do {
83
0
        bool_t val = !!strncmp(s, "no-", 3);
84
0
85
0
        if ( !val )
86
0
            s += 3;
87
0
88
0
        ss = strchr(s, ',');
89
0
        if ( !ss )
90
0
            ss = strchr(s, '\0');
91
0
92
0
        if ( !strncmp(s, "pml", ss - s) )
93
0
            opt_pml_enabled = val;
94
0
        else if ( !strncmp(s, "ad", ss - s) )
95
0
            opt_ept_ad = val;
96
0
        else
97
0
            rc = -EINVAL;
98
0
99
0
        s = ss + 1;
100
0
    } while ( *ss );
101
0
102
0
    return rc;
103
0
}
104
custom_param("ept", parse_ept_param);
105
106
/* Dynamic (run-time adjusted) execution control flags. */
107
u32 vmx_pin_based_exec_control __read_mostly;
108
u32 vmx_cpu_based_exec_control __read_mostly;
109
u32 vmx_secondary_exec_control __read_mostly;
110
u32 vmx_vmexit_control __read_mostly;
111
u32 vmx_vmentry_control __read_mostly;
112
u64 vmx_ept_vpid_cap __read_mostly;
113
u64 vmx_vmfunc __read_mostly;
114
bool_t vmx_virt_exception __read_mostly;
115
116
static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
117
static DEFINE_PER_CPU(paddr_t, current_vmcs);
118
static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
119
DEFINE_PER_CPU(bool_t, vmxon);
120
121
static u32 vmcs_revision_id __read_mostly;
122
u64 __read_mostly vmx_basic_msr;
123
124
static void __init vmx_display_features(void)
125
1
{
126
1
    int printed = 0;
127
1
128
1
    printk("VMX: Supported advanced features:\n");
129
1
130
15
#define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
131
1
    P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
132
1
    P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
133
1
    P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
134
1
    P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
135
1
    P(cpu_has_vmx_vnmi, "Virtual NMI");
136
1
    P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
137
1
    P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
138
1
    P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
139
1
    P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
140
1
    P(cpu_has_vmx_posted_intr_processing, "Posted Interrupt Processing");
141
1
    P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
142
1
    P(cpu_has_vmx_vmfunc, "VM Functions");
143
1
    P(cpu_has_vmx_virt_exceptions, "Virtualisation Exceptions");
144
1
    P(cpu_has_vmx_pml, "Page Modification Logging");
145
1
    P(cpu_has_vmx_tsc_scaling, "TSC Scaling");
146
1
#undef P
147
1
148
1
    if ( !printed )
149
0
        printk(" - none\n");
150
1
}
151
152
static u32 adjust_vmx_controls(
153
    const char *name, u32 ctl_min, u32 ctl_opt, u32 msr, bool_t *mismatch)
154
60
{
155
60
    u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
156
60
157
60
    rdmsr(msr, vmx_msr_low, vmx_msr_high);
158
60
159
60
    ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
160
60
    ctl |= vmx_msr_low;  /* bit == 1 in low word  ==> must be one  */
161
60
162
60
    /* Ensure minimum (required) set of control bits are supported. */
163
60
    if ( ctl_min & ~ctl )
164
0
    {
165
0
        *mismatch = 1;
166
0
        printk("VMX: CPU%d has insufficient %s (%08x; requires %08x)\n",
167
0
               smp_processor_id(), name, ctl, ctl_min);
168
0
    }
169
60
170
60
    return ctl;
171
60
}
172
173
static bool_t cap_check(const char *name, u32 expected, u32 saw)
174
88
{
175
88
    if ( saw != expected )
176
0
        printk("VMX %s: saw %#x expected %#x\n", name, saw, expected);
177
88
    return saw != expected;
178
88
}
179
180
static int vmx_init_vmcs_config(void)
181
12
{
182
12
    u32 vmx_basic_msr_low, vmx_basic_msr_high, min, opt;
183
12
    u32 _vmx_pin_based_exec_control;
184
12
    u32 _vmx_cpu_based_exec_control;
185
12
    u32 _vmx_secondary_exec_control = 0;
186
12
    u64 _vmx_ept_vpid_cap = 0;
187
12
    u64 _vmx_misc_cap = 0;
188
12
    u32 _vmx_vmexit_control;
189
12
    u32 _vmx_vmentry_control;
190
12
    u64 _vmx_vmfunc = 0;
191
12
    bool_t mismatch = 0;
192
12
193
12
    rdmsr(MSR_IA32_VMX_BASIC, vmx_basic_msr_low, vmx_basic_msr_high);
194
12
195
12
    min = (PIN_BASED_EXT_INTR_MASK |
196
12
           PIN_BASED_NMI_EXITING);
197
12
    opt = (PIN_BASED_VIRTUAL_NMIS |
198
12
           PIN_BASED_POSTED_INTERRUPT);
199
12
    _vmx_pin_based_exec_control = adjust_vmx_controls(
200
12
        "Pin-Based Exec Control", min, opt,
201
12
        MSR_IA32_VMX_PINBASED_CTLS, &mismatch);
202
12
203
12
    min = (CPU_BASED_HLT_EXITING |
204
12
           CPU_BASED_VIRTUAL_INTR_PENDING |
205
12
           CPU_BASED_CR8_LOAD_EXITING |
206
12
           CPU_BASED_CR8_STORE_EXITING |
207
12
           CPU_BASED_INVLPG_EXITING |
208
12
           CPU_BASED_CR3_LOAD_EXITING |
209
12
           CPU_BASED_CR3_STORE_EXITING |
210
12
           CPU_BASED_MONITOR_EXITING |
211
12
           CPU_BASED_MWAIT_EXITING |
212
12
           CPU_BASED_MOV_DR_EXITING |
213
12
           CPU_BASED_ACTIVATE_IO_BITMAP |
214
12
           CPU_BASED_USE_TSC_OFFSETING |
215
12
           CPU_BASED_RDTSC_EXITING);
216
12
    opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
217
12
           CPU_BASED_TPR_SHADOW |
218
12
           CPU_BASED_MONITOR_TRAP_FLAG |
219
12
           CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
220
12
    _vmx_cpu_based_exec_control = adjust_vmx_controls(
221
12
        "CPU-Based Exec Control", min, opt,
222
12
        MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
223
12
    _vmx_cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING;
224
12
    if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
225
12
        _vmx_cpu_based_exec_control &=
226
12
            ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
227
12
228
12
    if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
229
12
    {
230
12
        min = 0;
231
12
        opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
232
12
               SECONDARY_EXEC_WBINVD_EXITING |
233
12
               SECONDARY_EXEC_ENABLE_EPT |
234
12
               SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
235
12
               SECONDARY_EXEC_ENABLE_RDTSCP |
236
12
               SECONDARY_EXEC_PAUSE_LOOP_EXITING |
237
12
               SECONDARY_EXEC_ENABLE_INVPCID |
238
12
               SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
239
12
               SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
240
12
               SECONDARY_EXEC_XSAVES |
241
12
               SECONDARY_EXEC_TSC_SCALING);
242
12
        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
243
12
        if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
244
0
            opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
245
12
        if ( opt_vpid_enabled )
246
12
            opt |= SECONDARY_EXEC_ENABLE_VPID;
247
12
        if ( opt_unrestricted_guest_enabled )
248
12
            opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
249
12
        if ( opt_pml_enabled )
250
1
            opt |= SECONDARY_EXEC_ENABLE_PML;
251
12
252
12
        /*
253
12
         * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
254
12
         * can be set only when "use TPR shadow" is set
255
12
         */
256
12
        if ( (_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) &&
257
12
             opt_apicv_enabled )
258
12
            opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
259
12
                   SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
260
12
                   SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
261
12
262
12
        _vmx_secondary_exec_control = adjust_vmx_controls(
263
12
            "Secondary Exec Control", min, opt,
264
12
            MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
265
12
    }
266
12
267
12
    /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */
268
12
    if ( _vmx_secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT |
269
12
                                        SECONDARY_EXEC_ENABLE_VPID) )
270
12
    {
271
12
        rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap);
272
12
273
12
        if ( !opt_ept_ad )
274
0
            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
275
12
        else if ( /* Work around Erratum AVR41 on Avoton processors. */
276
12
                  boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d &&
277
0
                  opt_ept_ad < 0 )
278
0
            _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT;
279
12
280
12
        /*
281
12
         * Additional sanity checking before using EPT:
282
12
         * 1) the CPU we are running on must support EPT WB, as we will set
283
12
         *    ept paging structures memory type to WB;
284
12
         * 2) the CPU must support the EPT page-walk length of 4 according to
285
12
         *    Intel SDM 25.2.2.
286
12
         * 3) the CPU must support INVEPT all context invalidation, because we
287
12
         *    will use it as final resort if other types are not supported.
288
12
         *
289
12
         * Or we just don't use EPT.
290
12
         */
291
12
        if ( !(_vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB) ||
292
12
             !(_vmx_ept_vpid_cap & VMX_EPT_WALK_LENGTH_4_SUPPORTED) ||
293
12
             !(_vmx_ept_vpid_cap & VMX_EPT_INVEPT_ALL_CONTEXT) )
294
0
            _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
295
12
296
12
        /*
297
12
         * the CPU must support INVVPID all context invalidation, because we
298
12
         * will use it as final resort if other types are not supported.
299
12
         *
300
12
         * Or we just don't use VPID.
301
12
         */
302
12
        if ( !(_vmx_ept_vpid_cap & VMX_VPID_INVVPID_ALL_CONTEXT) )
303
0
            _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
304
12
305
12
        /* EPT A/D bits is required for PML */
306
12
        if ( !(_vmx_ept_vpid_cap & VMX_EPT_AD_BIT) )
307
12
            _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
308
12
    }
309
12
310
12
    if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
311
12
    {
312
12
        /*
313
12
         * To use EPT we expect to be able to clear certain intercepts.
314
12
         * We check VMX_BASIC_MSR[55] to correctly handle default controls.
315
12
         */
316
12
        uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS;
317
12
        if ( vmx_basic_msr_high & (VMX_BASIC_DEFAULT1_ZERO >> 32) )
318
12
            msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS;
319
12
        rdmsr(msr, must_be_one, must_be_zero);
320
12
        if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
321
12
                            CPU_BASED_CR3_LOAD_EXITING |
322
12
                            CPU_BASED_CR3_STORE_EXITING) )
323
0
            _vmx_secondary_exec_control &=
324
0
                ~(SECONDARY_EXEC_ENABLE_EPT |
325
0
                  SECONDARY_EXEC_UNRESTRICTED_GUEST);
326
12
    }
327
12
328
12
    /* PML cannot be supported if EPT is not used */
329
12
    if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) )
330
0
        _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
331
12
332
12
    /* Turn off opt_pml_enabled if PML feature is not present */
333
12
    if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) )
334
12
        opt_pml_enabled = 0;
335
12
336
12
    if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
337
12
          ple_gap == 0 )
338
0
    {
339
0
        if ( !vmx_pin_based_exec_control )
340
0
            printk(XENLOG_INFO "Disable Pause-Loop Exiting.\n");
341
0
        _vmx_secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
342
0
    }
343
12
344
12
    min = VM_EXIT_ACK_INTR_ON_EXIT;
345
12
    opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT |
346
12
          VM_EXIT_CLEAR_BNDCFGS;
347
12
    min |= VM_EXIT_IA32E_MODE;
348
12
    _vmx_vmexit_control = adjust_vmx_controls(
349
12
        "VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch);
350
12
351
12
    /*
352
12
     * "Process posted interrupt" can be set only when "virtual-interrupt
353
12
     * delivery" and "acknowledge interrupt on exit" is set. For the latter
354
12
     * is a minimal requirement, only check the former, which is optional.
355
12
     */
356
12
    if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) )
357
0
        _vmx_pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT;
358
12
359
12
    if ( iommu_intpost &&
360
0
         !(_vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
361
0
    {
362
0
        printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted "
363
0
               "Interrupt is not enabled\n");
364
0
        iommu_intpost = 0;
365
0
    }
366
12
367
12
    /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */
368
12
    if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS )
369
0
    {
370
0
        rdmsrl(MSR_IA32_VMX_VMFUNC, _vmx_vmfunc);
371
0
372
0
        /*
373
0
         * VMFUNC leaf 0 (EPTP switching) must be supported.
374
0
         *
375
0
         * Or we just don't use VMFUNC.
376
0
         */
377
0
        if ( !(_vmx_vmfunc & VMX_VMFUNC_EPTP_SWITCHING) )
378
0
            _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VM_FUNCTIONS;
379
0
    }
380
12
381
12
    /* Virtualization exceptions are only enabled if VMFUNC is enabled */
382
12
    if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
383
12
        _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS;
384
12
385
12
    min = 0;
386
12
    opt = VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_BNDCFGS;
387
12
    _vmx_vmentry_control = adjust_vmx_controls(
388
12
        "VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch);
389
12
390
12
    if ( mismatch )
391
0
        return -EINVAL;
392
12
393
12
    if ( !vmx_pin_based_exec_control )
394
1
    {
395
1
        /* First time through. */
396
1
        vmcs_revision_id           = vmx_basic_msr_low & VMX_BASIC_REVISION_MASK;
397
1
        vmx_pin_based_exec_control = _vmx_pin_based_exec_control;
398
1
        vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control;
399
1
        vmx_secondary_exec_control = _vmx_secondary_exec_control;
400
1
        vmx_ept_vpid_cap           = _vmx_ept_vpid_cap;
401
1
        vmx_vmexit_control         = _vmx_vmexit_control;
402
1
        vmx_vmentry_control        = _vmx_vmentry_control;
403
1
        vmx_basic_msr              = ((u64)vmx_basic_msr_high << 32) |
404
1
                                     vmx_basic_msr_low;
405
1
        vmx_vmfunc                 = _vmx_vmfunc;
406
1
        vmx_virt_exception         = !!(_vmx_secondary_exec_control &
407
1
                                       SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS);
408
1
        vmx_display_features();
409
1
410
1
        /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
411
1
        if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) >
412
1
             PAGE_SIZE )
413
0
        {
414
0
            printk("VMX: CPU%d VMCS size is too big (%Lu bytes)\n",
415
0
                   smp_processor_id(),
416
0
                   vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
417
0
            return -EINVAL;
418
0
        }
419
1
    }
420
12
    else
421
11
    {
422
11
        /* Globals are already initialised: re-check them. */
423
11
        mismatch |= cap_check(
424
11
            "VMCS revision ID",
425
11
            vmcs_revision_id, vmx_basic_msr_low & VMX_BASIC_REVISION_MASK);
426
11
        mismatch |= cap_check(
427
11
            "Pin-Based Exec Control",
428
11
            vmx_pin_based_exec_control, _vmx_pin_based_exec_control);
429
11
        mismatch |= cap_check(
430
11
            "CPU-Based Exec Control",
431
11
            vmx_cpu_based_exec_control, _vmx_cpu_based_exec_control);
432
11
        mismatch |= cap_check(
433
11
            "Secondary Exec Control",
434
11
            vmx_secondary_exec_control, _vmx_secondary_exec_control);
435
11
        mismatch |= cap_check(
436
11
            "VMExit Control",
437
11
            vmx_vmexit_control, _vmx_vmexit_control);
438
11
        mismatch |= cap_check(
439
11
            "VMEntry Control",
440
11
            vmx_vmentry_control, _vmx_vmentry_control);
441
11
        mismatch |= cap_check(
442
11
            "EPT and VPID Capability",
443
11
            vmx_ept_vpid_cap, _vmx_ept_vpid_cap);
444
11
        mismatch |= cap_check(
445
11
            "VMFUNC Capability",
446
11
            vmx_vmfunc, _vmx_vmfunc);
447
11
        if ( cpu_has_vmx_ins_outs_instr_info !=
448
11
             !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)) )
449
0
        {
450
0
            printk("VMX INS/OUTS Instruction Info: saw %d expected %d\n",
451
0
                   !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)),
452
0
                   cpu_has_vmx_ins_outs_instr_info);
453
0
            mismatch = 1;
454
0
        }
455
11
        if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) !=
456
11
             ((vmx_basic_msr & VMX_BASIC_VMCS_SIZE_MASK) >> 32) )
457
0
        {
458
0
            printk("VMX: CPU%d unexpected VMCS size %Lu\n",
459
0
                   smp_processor_id(),
460
0
                   vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
461
0
            mismatch = 1;
462
0
        }
463
11
        if ( mismatch )
464
0
        {
465
0
            printk("VMX: Capabilities fatally differ between CPU%d and CPU0\n",
466
0
                   smp_processor_id());
467
0
            return -EINVAL;
468
0
        }
469
11
    }
470
12
471
12
    /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
472
12
    if ( vmx_basic_msr_high & (VMX_BASIC_32BIT_ADDRESSES >> 32) )
473
0
    {
474
0
        printk("VMX: CPU%d limits VMX structure pointers to 32 bits\n",
475
0
               smp_processor_id());
476
0
        return -EINVAL;
477
0
    }
478
12
479
12
    /* Require Write-Back (WB) memory type for VMCS accesses. */
480
12
    opt = (vmx_basic_msr_high & (VMX_BASIC_MEMORY_TYPE_MASK >> 32)) /
481
12
          ((VMX_BASIC_MEMORY_TYPE_MASK & -VMX_BASIC_MEMORY_TYPE_MASK) >> 32);
482
12
    if ( opt != MTRR_TYPE_WRBACK )
483
0
    {
484
0
        printk("VMX: CPU%d has unexpected VMCS access type %u\n",
485
0
               smp_processor_id(), opt);
486
0
        return -EINVAL;
487
0
    }
488
12
489
12
    return 0;
490
12
}
491
492
static paddr_t vmx_alloc_vmcs(void)
493
24
{
494
24
    struct page_info *pg;
495
24
    struct vmcs_struct *vmcs;
496
24
497
24
    if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
498
0
    {
499
0
        gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
500
0
        return 0;
501
0
    }
502
24
503
24
    vmcs = __map_domain_page(pg);
504
24
    clear_page(vmcs);
505
24
    vmcs->vmcs_revision_id = vmcs_revision_id;
506
24
    unmap_domain_page(vmcs);
507
24
508
24
    return page_to_maddr(pg);
509
24
}
510
511
static void vmx_free_vmcs(paddr_t pa)
512
0
{
513
0
    free_domheap_page(maddr_to_page(pa));
514
0
}
515
516
static void __vmx_clear_vmcs(void *info)
517
807
{
518
807
    struct vcpu *v = info;
519
807
    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
520
807
521
807
    /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */
522
807
    ASSERT(!local_irq_is_enabled());
523
807
524
807
    if ( arch_vmx->active_cpu == smp_processor_id() )
525
807
    {
526
807
        __vmpclear(arch_vmx->vmcs_pa);
527
807
        if ( arch_vmx->vmcs_shadow_maddr )
528
0
            __vmpclear(arch_vmx->vmcs_shadow_maddr);
529
807
530
807
        arch_vmx->active_cpu = -1;
531
807
        arch_vmx->launched   = 0;
532
807
533
807
        list_del(&arch_vmx->active_list);
534
807
535
807
        if ( arch_vmx->vmcs_pa == this_cpu(current_vmcs) )
536
301
            this_cpu(current_vmcs) = 0;
537
807
    }
538
807
}
539
540
static void vmx_clear_vmcs(struct vcpu *v)
541
1.09k
{
542
1.09k
    int cpu = v->arch.hvm_vmx.active_cpu;
543
1.09k
544
1.09k
    if ( cpu != -1 )
545
807
        on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1);
546
1.09k
}
547
548
static void vmx_load_vmcs(struct vcpu *v)
549
34.7k
{
550
34.7k
    unsigned long flags;
551
34.7k
552
34.7k
    local_irq_save(flags);
553
34.7k
554
34.7k
    if ( v->arch.hvm_vmx.active_cpu == -1 )
555
819
    {
556
819
        list_add(&v->arch.hvm_vmx.active_list, &this_cpu(active_vmcs_list));
557
819
        v->arch.hvm_vmx.active_cpu = smp_processor_id();
558
819
    }
559
34.7k
560
34.7k
    ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id());
561
34.7k
562
34.7k
    __vmptrld(v->arch.hvm_vmx.vmcs_pa);
563
34.7k
    this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs_pa;
564
34.7k
565
34.7k
    local_irq_restore(flags);
566
34.7k
}
567
568
void vmx_vmcs_reload(struct vcpu *v)
569
4.57M
{
570
4.57M
    /*
571
4.57M
     * As we may be running with interrupts disabled, we can't acquire
572
4.57M
     * v->arch.hvm_vmx.vmcs_lock here. However, with interrupts disabled
573
4.57M
     * the VMCS can't be taken away from us anymore if we still own it.
574
4.57M
     */
575
4.57M
    ASSERT(v->is_running || !local_irq_is_enabled());
576
4.57M
    if ( v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs) )
577
4.56M
        return;
578
4.57M
579
11.0k
    vmx_load_vmcs(v);
580
11.0k
}
581
582
int vmx_cpu_up_prepare(unsigned int cpu)
583
12
{
584
12
    /*
585
12
     * If nvmx_cpu_up_prepare() failed, do not return failure and just fallback
586
12
     * to legacy mode for vvmcs synchronization.
587
12
     */
588
12
    if ( nvmx_cpu_up_prepare(cpu) != 0 )
589
0
        printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu);
590
12
591
12
    if ( per_cpu(vmxon_region, cpu) )
592
0
        return 0;
593
12
594
12
    per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs();
595
12
    if ( per_cpu(vmxon_region, cpu) )
596
12
        return 0;
597
12
598
0
    printk("CPU%d: Could not allocate host VMCS\n", cpu);
599
0
    nvmx_cpu_dead(cpu);
600
0
    return -ENOMEM;
601
12
}
602
603
void vmx_cpu_dead(unsigned int cpu)
604
0
{
605
0
    vmx_free_vmcs(per_cpu(vmxon_region, cpu));
606
0
    per_cpu(vmxon_region, cpu) = 0;
607
0
    nvmx_cpu_dead(cpu);
608
0
    vmx_pi_desc_fixup(cpu);
609
0
}
610
611
int _vmx_cpu_up(bool bsp)
612
12
{
613
12
    u32 eax, edx;
614
12
    int rc, bios_locked, cpu = smp_processor_id();
615
12
    u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;
616
12
617
12
    BUG_ON(!(read_cr4() & X86_CR4_VMXE));
618
12
619
12
    /* 
620
12
     * Ensure the current processor operating mode meets 
621
12
     * the requred CRO fixed bits in VMX operation. 
622
12
     */
623
12
    cr0 = read_cr0();
624
12
    rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
625
12
    rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
626
12
    if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
627
0
    {
628
0
        printk("CPU%d: some settings of host CR0 are " 
629
0
               "not allowed in VMX operation.\n", cpu);
630
0
        return -EINVAL;
631
0
    }
632
12
633
12
    rdmsr(MSR_IA32_FEATURE_CONTROL, eax, edx);
634
12
635
12
    bios_locked = !!(eax & IA32_FEATURE_CONTROL_LOCK);
636
12
    if ( bios_locked )
637
12
    {
638
12
        if ( !(eax & (tboot_in_measured_env()
639
0
                      ? IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX
640
12
                      : IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX)) )
641
0
        {
642
0
            printk("CPU%d: VMX disabled by BIOS.\n", cpu);
643
0
            return -EINVAL;
644
0
        }
645
12
    }
646
12
    else
647
0
    {
648
0
        eax  = IA32_FEATURE_CONTROL_LOCK;
649
0
        eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
650
0
        if ( test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) )
651
0
            eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX;
652
0
        wrmsr(MSR_IA32_FEATURE_CONTROL, eax, 0);
653
0
    }
654
12
655
12
    if ( (rc = vmx_init_vmcs_config()) != 0 )
656
0
        return rc;
657
12
658
12
    INIT_LIST_HEAD(&this_cpu(active_vmcs_list));
659
12
660
12
    if ( bsp && (rc = vmx_cpu_up_prepare(cpu)) != 0 )
661
0
        return rc;
662
12
663
12
    switch ( __vmxon(this_cpu(vmxon_region)) )
664
12
    {
665
0
    case -2: /* #UD or #GP */
666
0
        if ( bios_locked &&
667
0
             test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) &&
668
0
             (!(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX) ||
669
0
              !(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX)) )
670
0
        {
671
0
            printk("CPU%d: VMXON failed: perhaps because of TXT settings "
672
0
                   "in your BIOS configuration?\n", cpu);
673
0
            printk(" --> Disable TXT in your BIOS unless using a secure "
674
0
                   "bootloader.\n");
675
0
            return -EINVAL;
676
0
        }
677
0
        /* fall through */
678
0
    case -1: /* CF==1 or ZF==1 */
679
0
        printk("CPU%d: unexpected VMXON failure\n", cpu);
680
0
        return -EINVAL;
681
12
    case 0: /* success */
682
12
        this_cpu(vmxon) = 1;
683
12
        break;
684
0
    default:
685
0
        BUG();
686
12
    }
687
12
688
12
    hvm_asid_init(cpu_has_vmx_vpid ? (1u << VMCS_VPID_WIDTH) : 0);
689
12
690
12
    if ( cpu_has_vmx_ept )
691
12
        ept_sync_all();
692
12
693
12
    if ( cpu_has_vmx_vpid )
694
12
        vpid_sync_all();
695
12
696
12
    vmx_pi_per_cpu_init(cpu);
697
12
698
12
    return 0;
699
12
}
700
701
int vmx_cpu_up()
702
11
{
703
11
    return _vmx_cpu_up(false);
704
11
}
705
706
void vmx_cpu_down(void)
707
0
{
708
0
    struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list);
709
0
    unsigned long flags;
710
0
711
0
    if ( !this_cpu(vmxon) )
712
0
        return;
713
0
714
0
    local_irq_save(flags);
715
0
716
0
    while ( !list_empty(active_vmcs_list) )
717
0
        __vmx_clear_vmcs(list_entry(active_vmcs_list->next,
718
0
                                    struct vcpu, arch.hvm_vmx.active_list));
719
0
720
0
    BUG_ON(!(read_cr4() & X86_CR4_VMXE));
721
0
    this_cpu(vmxon) = 0;
722
0
    __vmxoff();
723
0
724
0
    local_irq_restore(flags);
725
0
}
726
727
struct foreign_vmcs {
728
    struct vcpu *v;
729
    unsigned int count;
730
};
731
static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
732
733
bool_t vmx_vmcs_try_enter(struct vcpu *v)
734
492k
{
735
492k
    struct foreign_vmcs *fv;
736
492k
737
492k
    /*
738
492k
     * NB. We must *always* run an HVM VCPU on its own VMCS, except for
739
492k
     * vmx_vmcs_enter/exit and scheduling tail critical regions.
740
492k
     */
741
492k
    if ( likely(v == current) )
742
492k
        return v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs);
743
492k
744
305
    fv = &this_cpu(foreign_vmcs);
745
305
746
305
    if ( fv->v == v )
747
24
    {
748
24
        BUG_ON(fv->count == 0);
749
24
    }
750
305
    else
751
281
    {
752
281
        BUG_ON(fv->v != NULL);
753
281
        BUG_ON(fv->count != 0);
754
281
755
281
        vcpu_pause(v);
756
281
        spin_lock(&v->arch.hvm_vmx.vmcs_lock);
757
281
758
281
        vmx_clear_vmcs(v);
759
281
        vmx_load_vmcs(v);
760
281
761
281
        fv->v = v;
762
281
    }
763
305
764
305
    fv->count++;
765
305
766
305
    return 1;
767
492k
}
768
769
void vmx_vmcs_enter(struct vcpu *v)
770
312k
{
771
312k
    bool_t okay = vmx_vmcs_try_enter(v);
772
312k
773
312k
    ASSERT(okay);
774
312k
}
775
776
void vmx_vmcs_exit(struct vcpu *v)
777
493k
{
778
493k
    struct foreign_vmcs *fv;
779
493k
780
493k
    if ( likely(v == current) )
781
492k
        return;
782
493k
783
258
    fv = &this_cpu(foreign_vmcs);
784
258
    BUG_ON(fv->v != v);
785
258
    BUG_ON(fv->count == 0);
786
258
787
258
    if ( --fv->count == 0 )
788
271
    {
789
271
        /* Don't confuse vmx_do_resume (for @v or @current!) */
790
271
        vmx_clear_vmcs(v);
791
271
        if ( is_hvm_vcpu(current) )
792
0
            vmx_load_vmcs(current);
793
271
794
271
        spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
795
271
        vcpu_unpause(v);
796
271
797
271
        fv->v = NULL;
798
271
    }
799
258
}
800
801
static void vmx_set_host_env(struct vcpu *v)
802
548
{
803
548
    unsigned int cpu = smp_processor_id();
804
548
805
548
    __vmwrite(HOST_GDTR_BASE,
806
548
              (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY));
807
548
    __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
808
548
809
548
    __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
810
548
    __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(init_tss, cpu));
811
548
812
548
    __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
813
548
814
548
    /*
815
548
     * Skip end of cpu_user_regs when entering the hypervisor because the
816
548
     * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
817
548
     * all get saved into the VMCS instead.
818
548
     */
819
548
    __vmwrite(HOST_RSP,
820
548
              (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
821
548
}
822
823
void vmx_clear_msr_intercept(struct vcpu *v, unsigned int msr,
824
                             enum vmx_msr_intercept_type type)
825
1.51k
{
826
1.51k
    struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
827
1.51k
    struct domain *d = v->domain;
828
1.51k
829
1.51k
    /* VMX MSR bitmap supported? */
830
1.51k
    if ( msr_bitmap == NULL )
831
0
        return;
832
1.51k
833
1.51k
    if ( unlikely(monitored_msr(d, msr)) )
834
0
        return;
835
1.51k
836
1.51k
    if ( msr <= 0x1fff )
837
1.42k
    {
838
1.42k
        if ( type & VMX_MSR_R )
839
1.40k
            clear_bit(msr, msr_bitmap->read_low);
840
1.42k
        if ( type & VMX_MSR_W )
841
104
            clear_bit(msr, msr_bitmap->write_low);
842
1.42k
    }
843
94
    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
844
36
    {
845
36
        msr &= 0x1fff;
846
36
        if ( type & VMX_MSR_R )
847
36
            clear_bit(msr, msr_bitmap->read_high);
848
36
        if ( type & VMX_MSR_W )
849
36
            clear_bit(msr, msr_bitmap->write_high);
850
36
    }
851
94
    else
852
58
        ASSERT(!"MSR out of range for interception\n");
853
1.51k
}
854
855
void vmx_set_msr_intercept(struct vcpu *v, unsigned int msr,
856
                           enum vmx_msr_intercept_type type)
857
3.12k
{
858
3.12k
    struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
859
3.12k
860
3.12k
    /* VMX MSR bitmap supported? */
861
3.12k
    if ( msr_bitmap == NULL )
862
0
        return;
863
3.12k
864
3.12k
    if ( msr <= 0x1fff )
865
3.12k
    {
866
3.12k
        if ( type & VMX_MSR_R )
867
3.12k
            set_bit(msr, msr_bitmap->read_low);
868
3.12k
        if ( type & VMX_MSR_W )
869
3.09k
            set_bit(msr, msr_bitmap->write_low);
870
3.12k
    }
871
18.4E
    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
872
0
    {
873
0
        msr &= 0x1fff;
874
0
        if ( type & VMX_MSR_R )
875
0
            set_bit(msr, msr_bitmap->read_high);
876
0
        if ( type & VMX_MSR_W )
877
0
            set_bit(msr, msr_bitmap->write_high);
878
0
    }
879
18.4E
    else
880
18.4E
        ASSERT(!"MSR out of range for interception\n");
881
3.12k
}
882
883
bool vmx_msr_is_intercepted(struct vmx_msr_bitmap *msr_bitmap,
884
                            unsigned int msr, bool is_write)
885
0
{
886
0
    if ( msr <= 0x1fff )
887
0
        return test_bit(msr, is_write ? msr_bitmap->write_low
888
0
                                      : msr_bitmap->read_low);
889
0
    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
890
0
        return test_bit(msr & 0x1fff, is_write ? msr_bitmap->write_high
891
0
                                               : msr_bitmap->read_high);
892
0
    else
893
0
        /* MSRs outside the bitmap ranges are always intercepted. */
894
0
        return true;
895
0
}
896
897
898
/*
899
 * Switch VMCS between layer 1 & 2 guest
900
 */
901
void vmx_vmcs_switch(paddr_t from, paddr_t to)
902
0
{
903
0
    struct arch_vmx_struct *vmx = &current->arch.hvm_vmx;
904
0
    spin_lock(&vmx->vmcs_lock);
905
0
906
0
    __vmpclear(from);
907
0
    if ( vmx->vmcs_shadow_maddr )
908
0
        __vmpclear(vmx->vmcs_shadow_maddr);
909
0
    __vmptrld(to);
910
0
911
0
    vmx->vmcs_pa = to;
912
0
    vmx->launched = 0;
913
0
    this_cpu(current_vmcs) = to;
914
0
915
0
    if ( vmx->hostenv_migrated )
916
0
    {
917
0
        vmx->hostenv_migrated = 0;
918
0
        vmx_set_host_env(current);
919
0
    }
920
0
921
0
    spin_unlock(&vmx->vmcs_lock);
922
0
}
923
924
void virtual_vmcs_enter(const struct vcpu *v)
925
0
{
926
0
    __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr);
927
0
}
928
929
void virtual_vmcs_exit(const struct vcpu *v)
930
0
{
931
0
    paddr_t cur = this_cpu(current_vmcs);
932
0
933
0
    __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr);
934
0
    if ( cur )
935
0
        __vmptrld(cur);
936
0
}
937
938
u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
939
0
{
940
0
    u64 res;
941
0
942
0
    virtual_vmcs_enter(v);
943
0
    __vmread(vmcs_encoding, &res);
944
0
    virtual_vmcs_exit(v);
945
0
946
0
    return res;
947
0
}
948
949
enum vmx_insn_errno virtual_vmcs_vmread_safe(const struct vcpu *v,
950
                                             u32 vmcs_encoding, u64 *val)
951
0
{
952
0
    enum vmx_insn_errno ret;
953
0
954
0
    virtual_vmcs_enter(v);
955
0
    ret = vmread_safe(vmcs_encoding, val);
956
0
    virtual_vmcs_exit(v);
957
0
958
0
    return ret;
959
0
}
960
961
void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
962
0
{
963
0
    virtual_vmcs_enter(v);
964
0
    __vmwrite(vmcs_encoding, val);
965
0
    virtual_vmcs_exit(v);
966
0
}
967
968
enum vmx_insn_errno virtual_vmcs_vmwrite_safe(const struct vcpu *v,
969
                                              u32 vmcs_encoding, u64 val)
970
0
{
971
0
    enum vmx_insn_errno ret;
972
0
973
0
    virtual_vmcs_enter(v);
974
0
    ret = vmwrite_safe(vmcs_encoding, val);
975
0
    virtual_vmcs_exit(v);
976
0
977
0
    return ret;
978
0
}
979
980
/*
981
 * This function is only called in a vCPU's initialization phase,
982
 * so we can update the posted-interrupt descriptor in non-atomic way.
983
 */
984
static void pi_desc_init(struct vcpu *v)
985
0
{
986
0
    v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector;
987
0
988
0
    /*
989
0
     * Mark NDST as invalid, then we can use this invalid value as a
990
0
     * marker to whether update NDST or not in vmx_pi_hooks_assign().
991
0
     */
992
0
    v->arch.hvm_vmx.pi_desc.ndst = APIC_INVALID_DEST;
993
0
}
994
995
static int construct_vmcs(struct vcpu *v)
996
12
{
997
12
    struct domain *d = v->domain;
998
12
    uint16_t sysenter_cs;
999
12
    unsigned long sysenter_eip;
1000
12
    u32 vmexit_ctl = vmx_vmexit_control;
1001
12
    u32 vmentry_ctl = vmx_vmentry_control;
1002
12
1003
12
    vmx_vmcs_enter(v);
1004
12
1005
12
    /* VMCS controls. */
1006
12
    __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
1007
12
1008
12
    v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
1009
12
    if ( d->arch.vtsc && !cpu_has_vmx_tsc_scaling )
1010
0
        v->arch.hvm_vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
1011
12
1012
12
    v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
1013
12
1014
12
    /*
1015
12
     * Disable descriptor table exiting: It's controlled by the VM event
1016
12
     * monitor requesting it.
1017
12
     */
1018
12
    v->arch.hvm_vmx.secondary_exec_control &=
1019
12
        ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING;
1020
12
1021
12
    /* Disable VPID for now: we decide when to enable it on VMENTER. */
1022
12
    v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
1023
12
1024
12
    if ( paging_mode_hap(d) )
1025
12
    {
1026
12
        v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
1027
12
                                          CPU_BASED_CR3_LOAD_EXITING |
1028
12
                                          CPU_BASED_CR3_STORE_EXITING);
1029
12
    }
1030
12
    else
1031
0
    {
1032
0
        v->arch.hvm_vmx.secondary_exec_control &= 
1033
0
            ~(SECONDARY_EXEC_ENABLE_EPT | 
1034
0
              SECONDARY_EXEC_UNRESTRICTED_GUEST |
1035
0
              SECONDARY_EXEC_ENABLE_INVPCID);
1036
0
        vmexit_ctl &= ~(VM_EXIT_SAVE_GUEST_PAT |
1037
0
                        VM_EXIT_LOAD_HOST_PAT);
1038
0
        vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT;
1039
0
    }
1040
12
1041
12
    /* Disable Virtualize x2APIC mode by default. */
1042
12
    v->arch.hvm_vmx.secondary_exec_control &=
1043
12
        ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
1044
12
1045
12
    /* Do not enable Monitor Trap Flag unless start single step debug */
1046
12
    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
1047
12
1048
12
    /* Disable VMFUNC and #VE for now: they may be enabled later by altp2m. */
1049
12
    v->arch.hvm_vmx.secondary_exec_control &=
1050
12
        ~(SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
1051
12
          SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS);
1052
12
1053
12
    if ( !has_vlapic(d) )
1054
0
    {
1055
0
        /* Disable virtual apics, TPR */
1056
0
        v->arch.hvm_vmx.secondary_exec_control &=
1057
0
            ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES
1058
0
              | SECONDARY_EXEC_APIC_REGISTER_VIRT
1059
0
              | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
1060
0
        v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
1061
0
1062
0
        /* In turn, disable posted interrupts. */
1063
0
        __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
1064
0
                  vmx_pin_based_exec_control & ~PIN_BASED_POSTED_INTERRUPT);
1065
0
    }
1066
12
1067
12
    vmx_update_cpu_exec_control(v);
1068
12
1069
12
    __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
1070
12
    __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
1071
12
1072
12
    if ( cpu_has_vmx_ple )
1073
12
    {
1074
12
        __vmwrite(PLE_GAP, ple_gap);
1075
12
        __vmwrite(PLE_WINDOW, ple_window);
1076
12
    }
1077
12
1078
12
    if ( cpu_has_vmx_secondary_exec_control )
1079
12
        __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1080
12
                  v->arch.hvm_vmx.secondary_exec_control);
1081
12
1082
12
    /* MSR access bitmap. */
1083
12
    if ( cpu_has_vmx_msr_bitmap )
1084
12
    {
1085
12
        struct vmx_msr_bitmap *msr_bitmap = alloc_xenheap_page();
1086
12
1087
12
        if ( msr_bitmap == NULL )
1088
0
        {
1089
0
            vmx_vmcs_exit(v);
1090
0
            return -ENOMEM;
1091
0
        }
1092
12
1093
12
        memset(msr_bitmap, ~0, PAGE_SIZE);
1094
12
        v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
1095
12
        __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
1096
12
1097
12
        vmx_clear_msr_intercept(v, MSR_FS_BASE, VMX_MSR_RW);
1098
12
        vmx_clear_msr_intercept(v, MSR_GS_BASE, VMX_MSR_RW);
1099
12
        vmx_clear_msr_intercept(v, MSR_SHADOW_GS_BASE, VMX_MSR_RW);
1100
12
        vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW);
1101
12
        vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW);
1102
12
        vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW);
1103
12
        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
1104
12
            vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
1105
12
        if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) &&
1106
0
             (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) )
1107
0
            vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW);
1108
12
    }
1109
12
1110
12
    /* I/O access bitmap. */
1111
12
    __vmwrite(IO_BITMAP_A, __pa(d->arch.hvm_domain.io_bitmap));
1112
12
    __vmwrite(IO_BITMAP_B, __pa(d->arch.hvm_domain.io_bitmap) + PAGE_SIZE);
1113
12
1114
12
    if ( cpu_has_vmx_virtual_intr_delivery )
1115
12
    {
1116
12
        unsigned int i;
1117
12
1118
12
        /* EOI-exit bitmap */
1119
12
        bitmap_zero(v->arch.hvm_vmx.eoi_exit_bitmap, NR_VECTORS);
1120
60
        for ( i = 0; i < ARRAY_SIZE(v->arch.hvm_vmx.eoi_exit_bitmap); ++i )
1121
48
            __vmwrite(EOI_EXIT_BITMAP(i), 0);
1122
12
1123
12
        /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */
1124
12
        __vmwrite(GUEST_INTR_STATUS, 0);
1125
12
    }
1126
12
1127
12
    if ( cpu_has_vmx_posted_intr_processing )
1128
12
    {
1129
12
        if ( iommu_intpost )
1130
0
            pi_desc_init(v);
1131
12
1132
12
        __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc));
1133
12
        __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
1134
12
    }
1135
12
1136
12
    /* Disable PML anyway here as it will only be enabled in log dirty mode */
1137
12
    v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
1138
12
1139
12
    /* Host data selectors. */
1140
12
    __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
1141
12
    __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
1142
12
    __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
1143
12
    __vmwrite(HOST_FS_SELECTOR, 0);
1144
12
    __vmwrite(HOST_GS_SELECTOR, 0);
1145
12
    __vmwrite(HOST_FS_BASE, 0);
1146
12
    __vmwrite(HOST_GS_BASE, 0);
1147
12
1148
12
    /* Host control registers. */
1149
12
    v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
1150
12
    __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
1151
12
    __vmwrite(HOST_CR4, mmu_cr4_features);
1152
12
1153
12
    /* Host CS:RIP. */
1154
12
    __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
1155
12
    __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
1156
12
1157
12
    /* Host SYSENTER CS:RIP. */
1158
12
    rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
1159
12
    __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
1160
12
    rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
1161
12
    __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
1162
12
1163
12
    /* MSR intercepts. */
1164
12
    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
1165
12
    __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
1166
12
    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
1167
12
1168
12
    __vmwrite(VM_ENTRY_INTR_INFO, 0);
1169
12
1170
12
    __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
1171
12
    __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
1172
12
1173
12
    __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
1174
12
    __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
1175
12
1176
12
    __vmwrite(CR3_TARGET_COUNT, 0);
1177
12
1178
12
    __vmwrite(GUEST_ACTIVITY_STATE, 0);
1179
12
1180
12
    /* Guest segment bases. */
1181
12
    __vmwrite(GUEST_ES_BASE, 0);
1182
12
    __vmwrite(GUEST_SS_BASE, 0);
1183
12
    __vmwrite(GUEST_DS_BASE, 0);
1184
12
    __vmwrite(GUEST_FS_BASE, 0);
1185
12
    __vmwrite(GUEST_GS_BASE, 0);
1186
12
    __vmwrite(GUEST_CS_BASE, 0);
1187
12
1188
12
    /* Guest segment limits. */
1189
12
    __vmwrite(GUEST_ES_LIMIT, ~0u);
1190
12
    __vmwrite(GUEST_SS_LIMIT, ~0u);
1191
12
    __vmwrite(GUEST_DS_LIMIT, ~0u);
1192
12
    __vmwrite(GUEST_FS_LIMIT, ~0u);
1193
12
    __vmwrite(GUEST_GS_LIMIT, ~0u);
1194
12
    __vmwrite(GUEST_CS_LIMIT, ~0u);
1195
12
1196
12
    /* Guest segment AR bytes. */
1197
12
    __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
1198
12
    __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
1199
12
    __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
1200
12
    __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
1201
12
    __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
1202
12
    __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */
1203
12
1204
12
    /* Guest IDT. */
1205
12
    __vmwrite(GUEST_IDTR_BASE, 0);
1206
12
    __vmwrite(GUEST_IDTR_LIMIT, 0);
1207
12
1208
12
    /* Guest GDT. */
1209
12
    __vmwrite(GUEST_GDTR_BASE, 0);
1210
12
    __vmwrite(GUEST_GDTR_LIMIT, 0);
1211
12
1212
12
    /* Guest LDT. */
1213
12
    __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */
1214
12
    __vmwrite(GUEST_LDTR_SELECTOR, 0);
1215
12
    __vmwrite(GUEST_LDTR_BASE, 0);
1216
12
    __vmwrite(GUEST_LDTR_LIMIT, 0);
1217
12
1218
12
    /* Guest TSS. */
1219
12
    __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */
1220
12
    __vmwrite(GUEST_TR_BASE, 0);
1221
12
    __vmwrite(GUEST_TR_LIMIT, 0xff);
1222
12
1223
12
    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
1224
12
    __vmwrite(GUEST_DR7, 0);
1225
12
    __vmwrite(VMCS_LINK_POINTER, ~0UL);
1226
12
1227
12
    v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK
1228
12
              | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
1229
12
              | (1U << TRAP_no_device);
1230
12
    vmx_update_exception_bitmap(v);
1231
12
1232
12
    v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
1233
12
    hvm_update_guest_cr(v, 0);
1234
12
1235
12
    v->arch.hvm_vcpu.guest_cr[4] = 0;
1236
12
    hvm_update_guest_cr(v, 4);
1237
12
1238
12
    if ( cpu_has_vmx_tpr_shadow )
1239
12
    {
1240
12
        __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
1241
12
                  page_to_maddr(vcpu_vlapic(v)->regs_page));
1242
12
        __vmwrite(TPR_THRESHOLD, 0);
1243
12
    }
1244
12
1245
12
    if ( paging_mode_hap(d) )
1246
12
    {
1247
12
        struct p2m_domain *p2m = p2m_get_hostp2m(d);
1248
12
        struct ept_data *ept = &p2m->ept;
1249
12
1250
12
        ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1251
12
        __vmwrite(EPT_POINTER, ept->eptp);
1252
12
    }
1253
12
1254
12
    if ( paging_mode_hap(d) )
1255
12
    {
1256
12
        u64 host_pat, guest_pat;
1257
12
1258
12
        rdmsrl(MSR_IA32_CR_PAT, host_pat);
1259
12
        guest_pat = MSR_IA32_CR_PAT_RESET;
1260
12
1261
12
        __vmwrite(HOST_PAT, host_pat);
1262
12
        __vmwrite(GUEST_PAT, guest_pat);
1263
12
    }
1264
12
    if ( cpu_has_vmx_mpx )
1265
0
        __vmwrite(GUEST_BNDCFGS, 0);
1266
12
    if ( cpu_has_vmx_xsaves )
1267
0
        __vmwrite(XSS_EXIT_BITMAP, 0);
1268
12
1269
12
    if ( cpu_has_vmx_tsc_scaling )
1270
0
        __vmwrite(TSC_MULTIPLIER, d->arch.hvm_domain.tsc_scaling_ratio);
1271
12
1272
12
    vmx_vmcs_exit(v);
1273
12
1274
12
    /* will update HOST & GUEST_CR3 as reqd */
1275
12
    paging_update_paging_modes(v);
1276
12
1277
12
    vmx_vlapic_msr_changed(v);
1278
12
1279
12
    return 0;
1280
12
}
1281
1282
static int vmx_msr_entry_key_cmp(const void *key, const void *elt)
1283
0
{
1284
0
    const u32 *msr = key;
1285
0
    const struct vmx_msr_entry *entry = elt;
1286
0
1287
0
    if ( *msr > entry->index )
1288
0
        return 1;
1289
0
    if ( *msr < entry->index )
1290
0
        return -1;
1291
0
1292
0
    return 0;
1293
0
}
1294
1295
struct vmx_msr_entry *vmx_find_msr(u32 msr, int type)
1296
2
{
1297
2
    struct vcpu *curr = current;
1298
2
    unsigned int msr_count;
1299
2
    struct vmx_msr_entry *msr_area;
1300
2
1301
2
    if ( type == VMX_GUEST_MSR )
1302
2
    {
1303
2
        msr_count = curr->arch.hvm_vmx.msr_count;
1304
2
        msr_area = curr->arch.hvm_vmx.msr_area;
1305
2
    }
1306
2
    else
1307
0
    {
1308
0
        ASSERT(type == VMX_HOST_MSR);
1309
0
        msr_count = curr->arch.hvm_vmx.host_msr_count;
1310
0
        msr_area = curr->arch.hvm_vmx.host_msr_area;
1311
0
    }
1312
2
1313
2
    if ( msr_area == NULL )
1314
2
        return NULL;
1315
2
1316
0
    return bsearch(&msr, msr_area, msr_count, sizeof(struct vmx_msr_entry),
1317
0
                   vmx_msr_entry_key_cmp);
1318
2
}
1319
1320
int vmx_read_guest_msr(u32 msr, u64 *val)
1321
0
{
1322
0
    struct vmx_msr_entry *ent;
1323
0
1324
0
    if ( (ent = vmx_find_msr(msr, VMX_GUEST_MSR)) != NULL )
1325
0
    {
1326
0
        *val = ent->data;
1327
0
        return 0;
1328
0
    }
1329
0
1330
0
    return -ESRCH;
1331
0
}
1332
1333
int vmx_write_guest_msr(u32 msr, u64 val)
1334
2
{
1335
2
    struct vmx_msr_entry *ent;
1336
2
1337
2
    if ( (ent = vmx_find_msr(msr, VMX_GUEST_MSR)) != NULL )
1338
0
    {
1339
0
        ent->data = val;
1340
0
        return 0;
1341
0
    }
1342
2
1343
2
    return -ESRCH;
1344
2
}
1345
1346
int vmx_add_msr(u32 msr, int type)
1347
0
{
1348
0
    struct vcpu *curr = current;
1349
0
    unsigned int idx, *msr_count;
1350
0
    struct vmx_msr_entry **msr_area, *msr_area_elem;
1351
0
1352
0
    if ( type == VMX_GUEST_MSR )
1353
0
    {
1354
0
        msr_count = &curr->arch.hvm_vmx.msr_count;
1355
0
        msr_area = &curr->arch.hvm_vmx.msr_area;
1356
0
    }
1357
0
    else
1358
0
    {
1359
0
        ASSERT(type == VMX_HOST_MSR);
1360
0
        msr_count = &curr->arch.hvm_vmx.host_msr_count;
1361
0
        msr_area = &curr->arch.hvm_vmx.host_msr_area;
1362
0
    }
1363
0
1364
0
    if ( *msr_area == NULL )
1365
0
    {
1366
0
        if ( (*msr_area = alloc_xenheap_page()) == NULL )
1367
0
            return -ENOMEM;
1368
0
1369
0
        if ( type == VMX_GUEST_MSR )
1370
0
        {
1371
0
            __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(*msr_area));
1372
0
            __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(*msr_area));
1373
0
        }
1374
0
        else
1375
0
            __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(*msr_area));
1376
0
    }
1377
0
1378
0
    for ( idx = 0; idx < *msr_count && (*msr_area)[idx].index <= msr; idx++ )
1379
0
        if ( (*msr_area)[idx].index == msr )
1380
0
            return 0;
1381
0
1382
0
    if ( *msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
1383
0
        return -ENOSPC;
1384
0
1385
0
    memmove(*msr_area + idx + 1, *msr_area + idx,
1386
0
            sizeof(*msr_area_elem) * (*msr_count - idx));
1387
0
1388
0
    msr_area_elem = *msr_area + idx;
1389
0
    msr_area_elem->index = msr;
1390
0
    msr_area_elem->mbz = 0;
1391
0
1392
0
    ++*msr_count;
1393
0
1394
0
    if ( type == VMX_GUEST_MSR )
1395
0
    {
1396
0
        msr_area_elem->data = 0;
1397
0
        __vmwrite(VM_EXIT_MSR_STORE_COUNT, *msr_count);
1398
0
        __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, *msr_count);
1399
0
    }
1400
0
    else
1401
0
    {
1402
0
        rdmsrl(msr, msr_area_elem->data);
1403
0
        __vmwrite(VM_EXIT_MSR_LOAD_COUNT, *msr_count);
1404
0
    }
1405
0
1406
0
    return 0;
1407
0
}
1408
1409
void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1410
296
{
1411
296
    if ( !test_and_set_bit(vector, v->arch.hvm_vmx.eoi_exit_bitmap) )
1412
3
        set_bit(vector / BITS_PER_LONG,
1413
296
                &v->arch.hvm_vmx.eoi_exitmap_changed);
1414
296
}
1415
1416
void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1417
3.84k
{
1418
3.84k
    if ( test_and_clear_bit(vector, v->arch.hvm_vmx.eoi_exit_bitmap) )
1419
0
        set_bit(vector / BITS_PER_LONG,
1420
3.84k
                &v->arch.hvm_vmx.eoi_exitmap_changed);
1421
3.84k
}
1422
1423
bool_t vmx_vcpu_pml_enabled(const struct vcpu *v)
1424
0
{
1425
0
    return !!(v->arch.hvm_vmx.secondary_exec_control &
1426
0
              SECONDARY_EXEC_ENABLE_PML);
1427
0
}
1428
1429
int vmx_vcpu_enable_pml(struct vcpu *v)
1430
0
{
1431
0
    if ( vmx_vcpu_pml_enabled(v) )
1432
0
        return 0;
1433
0
1434
0
    v->arch.hvm_vmx.pml_pg = v->domain->arch.paging.alloc_page(v->domain);
1435
0
    if ( !v->arch.hvm_vmx.pml_pg )
1436
0
        return -ENOMEM;
1437
0
1438
0
    vmx_vmcs_enter(v);
1439
0
1440
0
    __vmwrite(PML_ADDRESS, page_to_mfn(v->arch.hvm_vmx.pml_pg) << PAGE_SHIFT);
1441
0
    __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1);
1442
0
1443
0
    v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_PML;
1444
0
1445
0
    __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1446
0
              v->arch.hvm_vmx.secondary_exec_control);
1447
0
1448
0
    vmx_vmcs_exit(v);
1449
0
1450
0
    return 0;
1451
0
}
1452
1453
void vmx_vcpu_disable_pml(struct vcpu *v)
1454
0
{
1455
0
    if ( !vmx_vcpu_pml_enabled(v) )
1456
0
        return;
1457
0
1458
0
    /* Make sure we don't lose any logged GPAs. */
1459
0
    vmx_vcpu_flush_pml_buffer(v);
1460
0
1461
0
    vmx_vmcs_enter(v);
1462
0
1463
0
    v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
1464
0
    __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1465
0
              v->arch.hvm_vmx.secondary_exec_control);
1466
0
1467
0
    vmx_vmcs_exit(v);
1468
0
1469
0
    v->domain->arch.paging.free_page(v->domain, v->arch.hvm_vmx.pml_pg);
1470
0
    v->arch.hvm_vmx.pml_pg = NULL;
1471
0
}
1472
1473
void vmx_vcpu_flush_pml_buffer(struct vcpu *v)
1474
0
{
1475
0
    uint64_t *pml_buf;
1476
0
    unsigned long pml_idx;
1477
0
1478
0
    ASSERT((v == current) || (!vcpu_runnable(v) && !v->is_running));
1479
0
    ASSERT(vmx_vcpu_pml_enabled(v));
1480
0
1481
0
    vmx_vmcs_enter(v);
1482
0
1483
0
    __vmread(GUEST_PML_INDEX, &pml_idx);
1484
0
1485
0
    /* Do nothing if PML buffer is empty. */
1486
0
    if ( pml_idx == (NR_PML_ENTRIES - 1) )
1487
0
        goto out;
1488
0
1489
0
    pml_buf = __map_domain_page(v->arch.hvm_vmx.pml_pg);
1490
0
1491
0
    /*
1492
0
     * PML index can be either 2^16-1 (buffer is full), or 0 ~ NR_PML_ENTRIES-1
1493
0
     * (buffer is not full), and in latter case PML index always points to next
1494
0
     * available entity.
1495
0
     */
1496
0
    if ( pml_idx >= NR_PML_ENTRIES )
1497
0
        pml_idx = 0;
1498
0
    else
1499
0
        pml_idx++;
1500
0
1501
0
    for ( ; pml_idx < NR_PML_ENTRIES; pml_idx++ )
1502
0
    {
1503
0
        unsigned long gfn = pml_buf[pml_idx] >> PAGE_SHIFT;
1504
0
1505
0
        /*
1506
0
         * Need to change type from log-dirty to normal memory for logged GFN.
1507
0
         * hap_track_dirty_vram depends on it to work. And we mark all logged
1508
0
         * GFNs to be dirty, as we cannot be sure whether it's safe to ignore
1509
0
         * GFNs on which p2m_change_type_one returns failure. The failure cases
1510
0
         * are very rare, and additional cost is negligible, but a missing mark
1511
0
         * is extremely difficult to debug.
1512
0
         */
1513
0
        p2m_change_type_one(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
1514
0
1515
0
        /* HVM guest: pfn == gfn */
1516
0
        paging_mark_pfn_dirty(v->domain, _pfn(gfn));
1517
0
    }
1518
0
1519
0
    unmap_domain_page(pml_buf);
1520
0
1521
0
    /* Reset PML index */
1522
0
    __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1);
1523
0
1524
0
 out:
1525
0
    vmx_vmcs_exit(v);
1526
0
}
1527
1528
bool_t vmx_domain_pml_enabled(const struct domain *d)
1529
12
{
1530
12
    return !!(d->arch.hvm_domain.vmx.status & VMX_DOMAIN_PML_ENABLED);
1531
12
}
1532
1533
/*
1534
 * This function enables PML for particular domain. It should be called when
1535
 * domain is paused.
1536
 *
1537
 * PML needs to be enabled globally for all vcpus of the domain, as PML buffer
1538
 * and PML index are pre-vcpu, but EPT table is shared by vcpus, therefore
1539
 * enabling PML on partial vcpus won't work.
1540
 */
1541
int vmx_domain_enable_pml(struct domain *d)
1542
0
{
1543
0
    struct vcpu *v;
1544
0
    int rc;
1545
0
1546
0
    ASSERT(atomic_read(&d->pause_count));
1547
0
1548
0
    if ( vmx_domain_pml_enabled(d) )
1549
0
        return 0;
1550
0
1551
0
    for_each_vcpu ( d, v )
1552
0
        if ( (rc = vmx_vcpu_enable_pml(v)) != 0 )
1553
0
            goto error;
1554
0
1555
0
    d->arch.hvm_domain.vmx.status |= VMX_DOMAIN_PML_ENABLED;
1556
0
1557
0
    return 0;
1558
0
1559
0
 error:
1560
0
    for_each_vcpu ( d, v )
1561
0
        if ( vmx_vcpu_pml_enabled(v) )
1562
0
            vmx_vcpu_disable_pml(v);
1563
0
    return rc;
1564
0
}
1565
1566
/*
1567
 * Disable PML for particular domain. Called when domain is paused.
1568
 *
1569
 * The same as enabling PML for domain, disabling PML should be done for all
1570
 * vcpus at once.
1571
 */
1572
void vmx_domain_disable_pml(struct domain *d)
1573
0
{
1574
0
    struct vcpu *v;
1575
0
1576
0
    ASSERT(atomic_read(&d->pause_count));
1577
0
1578
0
    if ( !vmx_domain_pml_enabled(d) )
1579
0
        return;
1580
0
1581
0
    for_each_vcpu ( d, v )
1582
0
        vmx_vcpu_disable_pml(v);
1583
0
1584
0
    d->arch.hvm_domain.vmx.status &= ~VMX_DOMAIN_PML_ENABLED;
1585
0
}
1586
1587
/*
1588
 * Flush PML buffer of all vcpus, and update the logged dirty pages to log-dirty
1589
 * radix tree. Called when domain is paused.
1590
 */
1591
void vmx_domain_flush_pml_buffers(struct domain *d)
1592
0
{
1593
0
    struct vcpu *v;
1594
0
1595
0
    ASSERT(atomic_read(&d->pause_count));
1596
0
1597
0
    if ( !vmx_domain_pml_enabled(d) )
1598
0
        return;
1599
0
1600
0
    for_each_vcpu ( d, v )
1601
0
        vmx_vcpu_flush_pml_buffer(v);
1602
0
}
1603
1604
static void vmx_vcpu_update_eptp(struct vcpu *v, u64 eptp)
1605
0
{
1606
0
    vmx_vmcs_enter(v);
1607
0
    __vmwrite(EPT_POINTER, eptp);
1608
0
    vmx_vmcs_exit(v);
1609
0
}
1610
1611
/*
1612
 * Update EPTP data to VMCS of all vcpus of the domain. Must be called when
1613
 * domain is paused.
1614
 */
1615
void vmx_domain_update_eptp(struct domain *d)
1616
0
{
1617
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1618
0
    struct vcpu *v;
1619
0
1620
0
    ASSERT(atomic_read(&d->pause_count));
1621
0
1622
0
    for_each_vcpu ( d, v )
1623
0
        vmx_vcpu_update_eptp(v, p2m->ept.eptp);
1624
0
1625
0
    ept_sync_domain(p2m);
1626
0
}
1627
1628
int vmx_create_vmcs(struct vcpu *v)
1629
12
{
1630
12
    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
1631
12
    int rc;
1632
12
1633
12
    if ( (arch_vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 )
1634
0
        return -ENOMEM;
1635
12
1636
12
    INIT_LIST_HEAD(&arch_vmx->active_list);
1637
12
    __vmpclear(arch_vmx->vmcs_pa);
1638
12
    arch_vmx->active_cpu = -1;
1639
12
    arch_vmx->launched   = 0;
1640
12
1641
12
    if ( (rc = construct_vmcs(v)) != 0 )
1642
0
    {
1643
0
        vmx_free_vmcs(arch_vmx->vmcs_pa);
1644
0
        return rc;
1645
0
    }
1646
12
1647
12
    return 0;
1648
12
}
1649
1650
void vmx_destroy_vmcs(struct vcpu *v)
1651
0
{
1652
0
    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
1653
0
1654
0
    vmx_clear_vmcs(v);
1655
0
1656
0
    vmx_free_vmcs(arch_vmx->vmcs_pa);
1657
0
1658
0
    free_xenheap_page(v->arch.hvm_vmx.host_msr_area);
1659
0
    free_xenheap_page(v->arch.hvm_vmx.msr_area);
1660
0
    free_xenheap_page(v->arch.hvm_vmx.msr_bitmap);
1661
0
}
1662
1663
void vmx_vmentry_failure(void)
1664
0
{
1665
0
    struct vcpu *curr = current;
1666
0
    unsigned long error;
1667
0
1668
0
    __vmread(VM_INSTRUCTION_ERROR, &error);
1669
0
    gprintk(XENLOG_ERR, "VM%s error: %#lx\n",
1670
0
            curr->arch.hvm_vmx.launched ? "RESUME" : "LAUNCH", error);
1671
0
1672
0
    if ( error == VMX_INSN_INVALID_CONTROL_STATE ||
1673
0
         error == VMX_INSN_INVALID_HOST_STATE )
1674
0
        vmcs_dump_vcpu(curr);
1675
0
1676
0
    domain_crash_synchronous();
1677
0
}
1678
1679
void vmx_do_resume(struct vcpu *v)
1680
4.52M
{
1681
4.52M
    bool_t debug_state;
1682
4.52M
1683
4.52M
    if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() )
1684
4.53M
        vmx_vmcs_reload(v);
1685
4.52M
    else
1686
18.4E
    {
1687
18.4E
        /*
1688
18.4E
         * For pass-through domain, guest PCI-E device driver may leverage the
1689
18.4E
         * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space.
1690
18.4E
         * Since migration may occur before WBINVD or CLFLUSH, we need to
1691
18.4E
         * maintain data consistency either by:
1692
18.4E
         *  1: flushing cache (wbinvd) when the guest is scheduled out if
1693
18.4E
         *     there is no wbinvd exit, or
1694
18.4E
         *  2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
1695
18.4E
         * If VT-d engine can force snooping, we don't need to do these.
1696
18.4E
         */
1697
18.4E
        if ( has_arch_pdevs(v->domain) && !iommu_snoop
1698
0
                && !cpu_has_wbinvd_exiting )
1699
0
        {
1700
0
            int cpu = v->arch.hvm_vmx.active_cpu;
1701
0
            if ( cpu != -1 )
1702
0
                flush_mask(cpumask_of(cpu), FLUSH_CACHE);
1703
0
        }
1704
18.4E
1705
18.4E
        vmx_clear_vmcs(v);
1706
18.4E
        vmx_load_vmcs(v);
1707
18.4E
        hvm_migrate_timers(v);
1708
18.4E
        hvm_migrate_pirqs(v);
1709
18.4E
        vmx_set_host_env(v);
1710
18.4E
        /*
1711
18.4E
         * Both n1 VMCS and n2 VMCS need to update the host environment after 
1712
18.4E
         * VCPU migration. The environment of current VMCS is updated in place,
1713
18.4E
         * but the action of another VMCS is deferred till it is switched in.
1714
18.4E
         */
1715
18.4E
        v->arch.hvm_vmx.hostenv_migrated = 1;
1716
18.4E
1717
18.4E
        hvm_asid_flush_vcpu(v);
1718
18.4E
    }
1719
4.52M
1720
4.52M
    debug_state = v->domain->debugger_attached
1721
4.60M
                  || v->domain->arch.monitor.software_breakpoint_enabled
1722
4.60M
                  || v->domain->arch.monitor.singlestep_enabled;
1723
4.52M
1724
4.52M
    if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
1725
0
    {
1726
0
        v->arch.hvm_vcpu.debug_state_latch = debug_state;
1727
0
        vmx_update_debug_state(v);
1728
0
    }
1729
4.52M
1730
4.52M
    hvm_do_resume(v);
1731
4.52M
    reset_stack_and_jump(vmx_asm_do_vmentry);
1732
4.52M
}
1733
1734
static inline unsigned long vmr(unsigned long field)
1735
0
{
1736
0
    unsigned long val;
1737
0
1738
0
    return vmread_safe(field, &val) ? 0 : val;
1739
0
}
1740
1741
0
#define vmr16(fld) ({             \
1742
0
    BUILD_BUG_ON((fld) & 0x6001); \
1743
0
    (uint16_t)vmr(fld);           \
1744
0
})
1745
1746
0
#define vmr32(fld) ({                         \
1747
0
    BUILD_BUG_ON(((fld) & 0x6001) != 0x4000); \
1748
0
    (uint32_t)vmr(fld);                       \
1749
0
})
1750
1751
static void vmx_dump_sel(char *name, uint32_t selector)
1752
0
{
1753
0
    uint32_t sel, attr, limit;
1754
0
    uint64_t base;
1755
0
    sel = vmr(selector);
1756
0
    attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
1757
0
    limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
1758
0
    base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
1759
0
    printk("%s: %04x %05x %08x %016"PRIx64"\n", name, sel, attr, limit, base);
1760
0
}
1761
1762
static void vmx_dump_sel2(char *name, uint32_t lim)
1763
0
{
1764
0
    uint32_t limit;
1765
0
    uint64_t base;
1766
0
    limit = vmr(lim);
1767
0
    base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
1768
0
    printk("%s:            %08x %016"PRIx64"\n", name, limit, base);
1769
0
}
1770
1771
void vmcs_dump_vcpu(struct vcpu *v)
1772
0
{
1773
0
    struct cpu_user_regs *regs = &v->arch.user_regs;
1774
0
    uint32_t vmentry_ctl, vmexit_ctl;
1775
0
    unsigned long cr4;
1776
0
    uint64_t efer;
1777
0
    unsigned int i, n;
1778
0
1779
0
    if ( v == current )
1780
0
        regs = guest_cpu_user_regs();
1781
0
1782
0
    vmx_vmcs_enter(v);
1783
0
1784
0
    vmentry_ctl = vmr32(VM_ENTRY_CONTROLS),
1785
0
    vmexit_ctl = vmr32(VM_EXIT_CONTROLS);
1786
0
    cr4 = vmr(GUEST_CR4);
1787
0
    efer = vmr(GUEST_EFER);
1788
0
1789
0
    printk("*** Guest State ***\n");
1790
0
    printk("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1791
0
           vmr(GUEST_CR0), vmr(CR0_READ_SHADOW), vmr(CR0_GUEST_HOST_MASK));
1792
0
    printk("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1793
0
           cr4, vmr(CR4_READ_SHADOW), vmr(CR4_GUEST_HOST_MASK));
1794
0
    printk("CR3 = 0x%016lx\n", vmr(GUEST_CR3));
1795
0
    if ( (v->arch.hvm_vmx.secondary_exec_control &
1796
0
          SECONDARY_EXEC_ENABLE_EPT) &&
1797
0
         (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA) )
1798
0
    {
1799
0
        printk("PDPTE0 = 0x%016lx  PDPTE1 = 0x%016lx\n",
1800
0
               vmr(GUEST_PDPTE(0)), vmr(GUEST_PDPTE(1)));
1801
0
        printk("PDPTE2 = 0x%016lx  PDPTE3 = 0x%016lx\n",
1802
0
               vmr(GUEST_PDPTE(2)), vmr(GUEST_PDPTE(3)));
1803
0
    }
1804
0
    printk("RSP = 0x%016lx (0x%016lx)  RIP = 0x%016lx (0x%016lx)\n",
1805
0
           vmr(GUEST_RSP), regs->rsp,
1806
0
           vmr(GUEST_RIP), regs->rip);
1807
0
    printk("RFLAGS=0x%08lx (0x%08lx)  DR7 = 0x%016lx\n",
1808
0
           vmr(GUEST_RFLAGS), regs->rflags,
1809
0
           vmr(GUEST_DR7));
1810
0
    printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
1811
0
           vmr(GUEST_SYSENTER_ESP),
1812
0
           vmr32(GUEST_SYSENTER_CS), vmr(GUEST_SYSENTER_EIP));
1813
0
    printk("       sel  attr  limit   base\n");
1814
0
    vmx_dump_sel("  CS", GUEST_CS_SELECTOR);
1815
0
    vmx_dump_sel("  DS", GUEST_DS_SELECTOR);
1816
0
    vmx_dump_sel("  SS", GUEST_SS_SELECTOR);
1817
0
    vmx_dump_sel("  ES", GUEST_ES_SELECTOR);
1818
0
    vmx_dump_sel("  FS", GUEST_FS_SELECTOR);
1819
0
    vmx_dump_sel("  GS", GUEST_GS_SELECTOR);
1820
0
    vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
1821
0
    vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
1822
0
    vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
1823
0
    vmx_dump_sel("  TR", GUEST_TR_SELECTOR);
1824
0
    if ( (vmexit_ctl & (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_SAVE_GUEST_EFER)) ||
1825
0
         (vmentry_ctl & (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER)) )
1826
0
        printk("EFER = 0x%016lx  PAT = 0x%016lx\n", efer, vmr(GUEST_PAT));
1827
0
    printk("PreemptionTimer = 0x%08x  SM Base = 0x%08x\n",
1828
0
           vmr32(GUEST_PREEMPTION_TIMER), vmr32(GUEST_SMBASE));
1829
0
    printk("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
1830
0
           vmr(GUEST_IA32_DEBUGCTL), vmr(GUEST_PENDING_DBG_EXCEPTIONS));
1831
0
    if ( vmentry_ctl & (VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_BNDCFGS) )
1832
0
        printk("PerfGlobCtl = 0x%016lx  BndCfgS = 0x%016lx\n",
1833
0
               vmr(GUEST_PERF_GLOBAL_CTRL), vmr(GUEST_BNDCFGS));
1834
0
    printk("Interruptibility = %08x  ActivityState = %08x\n",
1835
0
           vmr32(GUEST_INTERRUPTIBILITY_INFO), vmr32(GUEST_ACTIVITY_STATE));
1836
0
    if ( v->arch.hvm_vmx.secondary_exec_control &
1837
0
         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY )
1838
0
        printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS));
1839
0
1840
0
    printk("*** Host State ***\n");
1841
0
    printk("RIP = 0x%016lx (%ps)  RSP = 0x%016lx\n",
1842
0
           vmr(HOST_RIP), (void *)vmr(HOST_RIP), vmr(HOST_RSP));
1843
0
    printk("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
1844
0
           vmr16(HOST_CS_SELECTOR), vmr16(HOST_SS_SELECTOR),
1845
0
           vmr16(HOST_DS_SELECTOR), vmr16(HOST_ES_SELECTOR),
1846
0
           vmr16(HOST_FS_SELECTOR), vmr16(HOST_GS_SELECTOR),
1847
0
           vmr16(HOST_TR_SELECTOR));
1848
0
    printk("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
1849
0
           vmr(HOST_FS_BASE), vmr(HOST_GS_BASE), vmr(HOST_TR_BASE));
1850
0
    printk("GDTBase=%016lx IDTBase=%016lx\n",
1851
0
           vmr(HOST_GDTR_BASE), vmr(HOST_IDTR_BASE));
1852
0
    printk("CR0=%016lx CR3=%016lx CR4=%016lx\n",
1853
0
           vmr(HOST_CR0), vmr(HOST_CR3), vmr(HOST_CR4));
1854
0
    printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
1855
0
           vmr(HOST_SYSENTER_ESP),
1856
0
           vmr32(HOST_SYSENTER_CS), vmr(HOST_SYSENTER_EIP));
1857
0
    if ( vmexit_ctl & (VM_EXIT_LOAD_HOST_PAT | VM_EXIT_LOAD_HOST_EFER) )
1858
0
        printk("EFER = 0x%016lx  PAT = 0x%016lx\n", vmr(HOST_EFER), vmr(HOST_PAT));
1859
0
    if ( vmexit_ctl & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
1860
0
        printk("PerfGlobCtl = 0x%016lx\n",
1861
0
               vmr(HOST_PERF_GLOBAL_CTRL));
1862
0
1863
0
    printk("*** Control State ***\n");
1864
0
    printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
1865
0
           vmr32(PIN_BASED_VM_EXEC_CONTROL),
1866
0
           vmr32(CPU_BASED_VM_EXEC_CONTROL),
1867
0
           vmr32(SECONDARY_VM_EXEC_CONTROL));
1868
0
    printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
1869
0
    printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
1870
0
           vmr32(EXCEPTION_BITMAP),
1871
0
           vmr32(PAGE_FAULT_ERROR_CODE_MASK),
1872
0
           vmr32(PAGE_FAULT_ERROR_CODE_MATCH));
1873
0
    printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
1874
0
           vmr32(VM_ENTRY_INTR_INFO),
1875
0
           vmr32(VM_ENTRY_EXCEPTION_ERROR_CODE),
1876
0
           vmr32(VM_ENTRY_INSTRUCTION_LEN));
1877
0
    printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
1878
0
           vmr32(VM_EXIT_INTR_INFO),
1879
0
           vmr32(VM_EXIT_INTR_ERROR_CODE),
1880
0
           vmr32(VM_EXIT_INSTRUCTION_LEN));
1881
0
    printk("        reason=%08x qualification=%016lx\n",
1882
0
           vmr32(VM_EXIT_REASON), vmr(EXIT_QUALIFICATION));
1883
0
    printk("IDTVectoring: info=%08x errcode=%08x\n",
1884
0
           vmr32(IDT_VECTORING_INFO), vmr32(IDT_VECTORING_ERROR_CODE));
1885
0
    printk("TSC Offset = 0x%016lx  TSC Multiplier = 0x%016lx\n",
1886
0
           vmr(TSC_OFFSET), vmr(TSC_MULTIPLIER));
1887
0
    if ( (v->arch.hvm_vmx.exec_control & CPU_BASED_TPR_SHADOW) ||
1888
0
         (vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
1889
0
        printk("TPR Threshold = 0x%02x  PostedIntrVec = 0x%02x\n",
1890
0
               vmr32(TPR_THRESHOLD), vmr16(POSTED_INTR_NOTIFICATION_VECTOR));
1891
0
    if ( (v->arch.hvm_vmx.secondary_exec_control &
1892
0
          SECONDARY_EXEC_ENABLE_EPT) )
1893
0
        printk("EPT pointer = 0x%016lx  EPTP index = 0x%04x\n",
1894
0
               vmr(EPT_POINTER), vmr16(EPTP_INDEX));
1895
0
    n = vmr32(CR3_TARGET_COUNT);
1896
0
    for ( i = 0; i + 1 < n; i += 2 )
1897
0
        printk("CR3 target%u=%016lx target%u=%016lx\n",
1898
0
               i, vmr(CR3_TARGET_VALUE(i)),
1899
0
               i + 1, vmr(CR3_TARGET_VALUE(i + 1)));
1900
0
    if ( i < n )
1901
0
        printk("CR3 target%u=%016lx\n", i, vmr(CR3_TARGET_VALUE(i)));
1902
0
    if ( v->arch.hvm_vmx.secondary_exec_control &
1903
0
         SECONDARY_EXEC_PAUSE_LOOP_EXITING )
1904
0
        printk("PLE Gap=%08x Window=%08x\n",
1905
0
               vmr32(PLE_GAP), vmr32(PLE_WINDOW));
1906
0
    if ( v->arch.hvm_vmx.secondary_exec_control &
1907
0
         (SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
1908
0
        printk("Virtual processor ID = 0x%04x VMfunc controls = %016lx\n",
1909
0
               vmr16(VIRTUAL_PROCESSOR_ID), vmr(VM_FUNCTION_CONTROL));
1910
0
1911
0
    vmx_vmcs_exit(v);
1912
0
}
1913
1914
static void vmcs_dump(unsigned char ch)
1915
0
{
1916
0
    struct domain *d;
1917
0
    struct vcpu *v;
1918
0
    
1919
0
    printk("*********** VMCS Areas **************\n");
1920
0
1921
0
    rcu_read_lock(&domlist_read_lock);
1922
0
1923
0
    for_each_domain ( d )
1924
0
    {
1925
0
        if ( !is_hvm_domain(d) )
1926
0
            continue;
1927
0
        printk("\n>>> Domain %d <<<\n", d->domain_id);
1928
0
        for_each_vcpu ( d, v )
1929
0
        {
1930
0
            printk("\tVCPU %d\n", v->vcpu_id);
1931
0
            vmcs_dump_vcpu(v);
1932
0
        }
1933
0
    }
1934
0
1935
0
    rcu_read_unlock(&domlist_read_lock);
1936
0
1937
0
    printk("**************************************\n");
1938
0
}
1939
1940
void __init setup_vmcs_dump(void)
1941
1
{
1942
1
    register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
1943
1
}
1944
1945
static void __init __maybe_unused build_assertions(void)
1946
0
{
1947
0
    struct vmx_msr_bitmap bitmap;
1948
0
1949
0
    /* Check vmx_msr_bitmap layoug against hardware expectations. */
1950
0
    BUILD_BUG_ON(sizeof(bitmap)            != PAGE_SIZE);
1951
0
    BUILD_BUG_ON(sizeof(bitmap.read_low)   != 1024);
1952
0
    BUILD_BUG_ON(sizeof(bitmap.read_high)  != 1024);
1953
0
    BUILD_BUG_ON(sizeof(bitmap.write_low)  != 1024);
1954
0
    BUILD_BUG_ON(sizeof(bitmap.write_high) != 1024);
1955
0
    BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_low)   != 0);
1956
0
    BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_high)  != 1024);
1957
0
    BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_low)  != 2048);
1958
0
    BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_high) != 3072);
1959
0
}
1960
1961
/*
1962
 * Local variables:
1963
 * mode: C
1964
 * c-file-style: "BSD"
1965
 * c-basic-offset: 4
1966
 * tab-width: 4
1967
 * indent-tabs-mode: nil
1968
 * End:
1969
 */