/root/src/xen/xen/arch/x86/hvm/vmx/vmcs.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * vmcs.c: VMCS management |
3 | | * Copyright (c) 2004, Intel Corporation. |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify it |
6 | | * under the terms and conditions of the GNU General Public License, |
7 | | * version 2, as published by the Free Software Foundation. |
8 | | * |
9 | | * This program is distributed in the hope it will be useful, but WITHOUT |
10 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
12 | | * more details. |
13 | | * |
14 | | * You should have received a copy of the GNU General Public License along with |
15 | | * this program; If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | |
18 | | #include <xen/init.h> |
19 | | #include <xen/mm.h> |
20 | | #include <xen/lib.h> |
21 | | #include <xen/errno.h> |
22 | | #include <xen/domain_page.h> |
23 | | #include <xen/event.h> |
24 | | #include <xen/kernel.h> |
25 | | #include <xen/keyhandler.h> |
26 | | #include <xen/vm_event.h> |
27 | | #include <asm/current.h> |
28 | | #include <asm/cpufeature.h> |
29 | | #include <asm/processor.h> |
30 | | #include <asm/msr.h> |
31 | | #include <asm/xstate.h> |
32 | | #include <asm/hvm/hvm.h> |
33 | | #include <asm/hvm/io.h> |
34 | | #include <asm/hvm/support.h> |
35 | | #include <asm/hvm/vmx/vmx.h> |
36 | | #include <asm/hvm/vmx/vvmx.h> |
37 | | #include <asm/hvm/vmx/vmcs.h> |
38 | | #include <asm/flushtlb.h> |
39 | | #include <asm/monitor.h> |
40 | | #include <asm/shadow.h> |
41 | | #include <asm/tboot.h> |
42 | | #include <asm/apic.h> |
43 | | |
44 | | static bool_t __read_mostly opt_vpid_enabled = 1; |
45 | | boolean_param("vpid", opt_vpid_enabled); |
46 | | |
47 | | static bool_t __read_mostly opt_unrestricted_guest_enabled = 1; |
48 | | boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled); |
49 | | |
50 | | static bool_t __read_mostly opt_apicv_enabled = 1; |
51 | | boolean_param("apicv", opt_apicv_enabled); |
52 | | |
53 | | /* |
54 | | * These two parameters are used to config the controls for Pause-Loop Exiting: |
55 | | * ple_gap: upper bound on the amount of time between two successive |
56 | | * executions of PAUSE in a loop. |
57 | | * ple_window: upper bound on the amount of time a guest is allowed to execute |
58 | | * in a PAUSE loop. |
59 | | * Time is measured based on a counter that runs at the same rate as the TSC, |
60 | | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
61 | | */ |
62 | | static unsigned int __read_mostly ple_gap = 128; |
63 | | integer_param("ple_gap", ple_gap); |
64 | | static unsigned int __read_mostly ple_window = 4096; |
65 | | integer_param("ple_window", ple_window); |
66 | | |
67 | | static bool_t __read_mostly opt_pml_enabled = 1; |
68 | | static s8 __read_mostly opt_ept_ad = -1; |
69 | | |
70 | | /* |
71 | | * The 'ept' parameter controls functionalities that depend on, or impact the |
72 | | * EPT mechanism. Optional comma separated value may contain: |
73 | | * |
74 | | * pml Enable PML |
75 | | * ad Use A/D bits |
76 | | */ |
77 | | static int __init parse_ept_param(const char *s) |
78 | 0 | { |
79 | 0 | const char *ss; |
80 | 0 | int rc = 0; |
81 | 0 |
|
82 | 0 | do { |
83 | 0 | bool_t val = !!strncmp(s, "no-", 3); |
84 | 0 |
|
85 | 0 | if ( !val ) |
86 | 0 | s += 3; |
87 | 0 |
|
88 | 0 | ss = strchr(s, ','); |
89 | 0 | if ( !ss ) |
90 | 0 | ss = strchr(s, '\0'); |
91 | 0 |
|
92 | 0 | if ( !strncmp(s, "pml", ss - s) ) |
93 | 0 | opt_pml_enabled = val; |
94 | 0 | else if ( !strncmp(s, "ad", ss - s) ) |
95 | 0 | opt_ept_ad = val; |
96 | 0 | else |
97 | 0 | rc = -EINVAL; |
98 | 0 |
|
99 | 0 | s = ss + 1; |
100 | 0 | } while ( *ss ); |
101 | 0 |
|
102 | 0 | return rc; |
103 | 0 | } |
104 | | custom_param("ept", parse_ept_param); |
105 | | |
106 | | /* Dynamic (run-time adjusted) execution control flags. */ |
107 | | u32 vmx_pin_based_exec_control __read_mostly; |
108 | | u32 vmx_cpu_based_exec_control __read_mostly; |
109 | | u32 vmx_secondary_exec_control __read_mostly; |
110 | | u32 vmx_vmexit_control __read_mostly; |
111 | | u32 vmx_vmentry_control __read_mostly; |
112 | | u64 vmx_ept_vpid_cap __read_mostly; |
113 | | u64 vmx_vmfunc __read_mostly; |
114 | | bool_t vmx_virt_exception __read_mostly; |
115 | | |
116 | | static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region); |
117 | | static DEFINE_PER_CPU(paddr_t, current_vmcs); |
118 | | static DEFINE_PER_CPU(struct list_head, active_vmcs_list); |
119 | | DEFINE_PER_CPU(bool_t, vmxon); |
120 | | |
121 | | static u32 vmcs_revision_id __read_mostly; |
122 | | u64 __read_mostly vmx_basic_msr; |
123 | | |
124 | | static void __init vmx_display_features(void) |
125 | 1 | { |
126 | 1 | int printed = 0; |
127 | 1 | |
128 | 1 | printk("VMX: Supported advanced features:\n"); |
129 | 1 | |
130 | 15 | #define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; } |
131 | 1 | P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation"); |
132 | 1 | P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow"); |
133 | 1 | P(cpu_has_vmx_ept, "Extended Page Tables (EPT)"); |
134 | 1 | P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)"); |
135 | 1 | P(cpu_has_vmx_vnmi, "Virtual NMI"); |
136 | 1 | P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap"); |
137 | 1 | P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest"); |
138 | 1 | P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization"); |
139 | 1 | P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery"); |
140 | 1 | P(cpu_has_vmx_posted_intr_processing, "Posted Interrupt Processing"); |
141 | 1 | P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing"); |
142 | 1 | P(cpu_has_vmx_vmfunc, "VM Functions"); |
143 | 1 | P(cpu_has_vmx_virt_exceptions, "Virtualisation Exceptions"); |
144 | 1 | P(cpu_has_vmx_pml, "Page Modification Logging"); |
145 | 1 | P(cpu_has_vmx_tsc_scaling, "TSC Scaling"); |
146 | 1 | #undef P |
147 | 1 | |
148 | 1 | if ( !printed ) |
149 | 0 | printk(" - none\n"); |
150 | 1 | } |
151 | | |
152 | | static u32 adjust_vmx_controls( |
153 | | const char *name, u32 ctl_min, u32 ctl_opt, u32 msr, bool_t *mismatch) |
154 | 60 | { |
155 | 60 | u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt; |
156 | 60 | |
157 | 60 | rdmsr(msr, vmx_msr_low, vmx_msr_high); |
158 | 60 | |
159 | 60 | ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ |
160 | 60 | ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ |
161 | 60 | |
162 | 60 | /* Ensure minimum (required) set of control bits are supported. */ |
163 | 60 | if ( ctl_min & ~ctl ) |
164 | 0 | { |
165 | 0 | *mismatch = 1; |
166 | 0 | printk("VMX: CPU%d has insufficient %s (%08x; requires %08x)\n", |
167 | 0 | smp_processor_id(), name, ctl, ctl_min); |
168 | 0 | } |
169 | 60 | |
170 | 60 | return ctl; |
171 | 60 | } |
172 | | |
173 | | static bool_t cap_check(const char *name, u32 expected, u32 saw) |
174 | 88 | { |
175 | 88 | if ( saw != expected ) |
176 | 0 | printk("VMX %s: saw %#x expected %#x\n", name, saw, expected); |
177 | 88 | return saw != expected; |
178 | 88 | } |
179 | | |
180 | | static int vmx_init_vmcs_config(void) |
181 | 12 | { |
182 | 12 | u32 vmx_basic_msr_low, vmx_basic_msr_high, min, opt; |
183 | 12 | u32 _vmx_pin_based_exec_control; |
184 | 12 | u32 _vmx_cpu_based_exec_control; |
185 | 12 | u32 _vmx_secondary_exec_control = 0; |
186 | 12 | u64 _vmx_ept_vpid_cap = 0; |
187 | 12 | u64 _vmx_misc_cap = 0; |
188 | 12 | u32 _vmx_vmexit_control; |
189 | 12 | u32 _vmx_vmentry_control; |
190 | 12 | u64 _vmx_vmfunc = 0; |
191 | 12 | bool_t mismatch = 0; |
192 | 12 | |
193 | 12 | rdmsr(MSR_IA32_VMX_BASIC, vmx_basic_msr_low, vmx_basic_msr_high); |
194 | 12 | |
195 | 12 | min = (PIN_BASED_EXT_INTR_MASK | |
196 | 12 | PIN_BASED_NMI_EXITING); |
197 | 12 | opt = (PIN_BASED_VIRTUAL_NMIS | |
198 | 12 | PIN_BASED_POSTED_INTERRUPT); |
199 | 12 | _vmx_pin_based_exec_control = adjust_vmx_controls( |
200 | 12 | "Pin-Based Exec Control", min, opt, |
201 | 12 | MSR_IA32_VMX_PINBASED_CTLS, &mismatch); |
202 | 12 | |
203 | 12 | min = (CPU_BASED_HLT_EXITING | |
204 | 12 | CPU_BASED_VIRTUAL_INTR_PENDING | |
205 | 12 | CPU_BASED_CR8_LOAD_EXITING | |
206 | 12 | CPU_BASED_CR8_STORE_EXITING | |
207 | 12 | CPU_BASED_INVLPG_EXITING | |
208 | 12 | CPU_BASED_CR3_LOAD_EXITING | |
209 | 12 | CPU_BASED_CR3_STORE_EXITING | |
210 | 12 | CPU_BASED_MONITOR_EXITING | |
211 | 12 | CPU_BASED_MWAIT_EXITING | |
212 | 12 | CPU_BASED_MOV_DR_EXITING | |
213 | 12 | CPU_BASED_ACTIVATE_IO_BITMAP | |
214 | 12 | CPU_BASED_USE_TSC_OFFSETING | |
215 | 12 | CPU_BASED_RDTSC_EXITING); |
216 | 12 | opt = (CPU_BASED_ACTIVATE_MSR_BITMAP | |
217 | 12 | CPU_BASED_TPR_SHADOW | |
218 | 12 | CPU_BASED_MONITOR_TRAP_FLAG | |
219 | 12 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); |
220 | 12 | _vmx_cpu_based_exec_control = adjust_vmx_controls( |
221 | 12 | "CPU-Based Exec Control", min, opt, |
222 | 12 | MSR_IA32_VMX_PROCBASED_CTLS, &mismatch); |
223 | 12 | _vmx_cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING; |
224 | 12 | if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW ) |
225 | 12 | _vmx_cpu_based_exec_control &= |
226 | 12 | ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING); |
227 | 12 | |
228 | 12 | if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) |
229 | 12 | { |
230 | 12 | min = 0; |
231 | 12 | opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
232 | 12 | SECONDARY_EXEC_WBINVD_EXITING | |
233 | 12 | SECONDARY_EXEC_ENABLE_EPT | |
234 | 12 | SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING | |
235 | 12 | SECONDARY_EXEC_ENABLE_RDTSCP | |
236 | 12 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
237 | 12 | SECONDARY_EXEC_ENABLE_INVPCID | |
238 | 12 | SECONDARY_EXEC_ENABLE_VM_FUNCTIONS | |
239 | 12 | SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS | |
240 | 12 | SECONDARY_EXEC_XSAVES | |
241 | 12 | SECONDARY_EXEC_TSC_SCALING); |
242 | 12 | rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap); |
243 | 12 | if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL ) |
244 | 0 | opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING; |
245 | 12 | if ( opt_vpid_enabled ) |
246 | 12 | opt |= SECONDARY_EXEC_ENABLE_VPID; |
247 | 12 | if ( opt_unrestricted_guest_enabled ) |
248 | 12 | opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST; |
249 | 12 | if ( opt_pml_enabled ) |
250 | 1 | opt |= SECONDARY_EXEC_ENABLE_PML; |
251 | 12 | |
252 | 12 | /* |
253 | 12 | * "APIC Register Virtualization" and "Virtual Interrupt Delivery" |
254 | 12 | * can be set only when "use TPR shadow" is set |
255 | 12 | */ |
256 | 12 | if ( (_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) && |
257 | 12 | opt_apicv_enabled ) |
258 | 12 | opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT | |
259 | 12 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
260 | 12 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
261 | 12 | |
262 | 12 | _vmx_secondary_exec_control = adjust_vmx_controls( |
263 | 12 | "Secondary Exec Control", min, opt, |
264 | 12 | MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch); |
265 | 12 | } |
266 | 12 | |
267 | 12 | /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */ |
268 | 12 | if ( _vmx_secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT | |
269 | 12 | SECONDARY_EXEC_ENABLE_VPID) ) |
270 | 12 | { |
271 | 12 | rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, _vmx_ept_vpid_cap); |
272 | 12 | |
273 | 12 | if ( !opt_ept_ad ) |
274 | 0 | _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT; |
275 | 12 | else if ( /* Work around Erratum AVR41 on Avoton processors. */ |
276 | 12 | boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x4d && |
277 | 0 | opt_ept_ad < 0 ) |
278 | 0 | _vmx_ept_vpid_cap &= ~VMX_EPT_AD_BIT; |
279 | 12 | |
280 | 12 | /* |
281 | 12 | * Additional sanity checking before using EPT: |
282 | 12 | * 1) the CPU we are running on must support EPT WB, as we will set |
283 | 12 | * ept paging structures memory type to WB; |
284 | 12 | * 2) the CPU must support the EPT page-walk length of 4 according to |
285 | 12 | * Intel SDM 25.2.2. |
286 | 12 | * 3) the CPU must support INVEPT all context invalidation, because we |
287 | 12 | * will use it as final resort if other types are not supported. |
288 | 12 | * |
289 | 12 | * Or we just don't use EPT. |
290 | 12 | */ |
291 | 12 | if ( !(_vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB) || |
292 | 12 | !(_vmx_ept_vpid_cap & VMX_EPT_WALK_LENGTH_4_SUPPORTED) || |
293 | 12 | !(_vmx_ept_vpid_cap & VMX_EPT_INVEPT_ALL_CONTEXT) ) |
294 | 0 | _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
295 | 12 | |
296 | 12 | /* |
297 | 12 | * the CPU must support INVVPID all context invalidation, because we |
298 | 12 | * will use it as final resort if other types are not supported. |
299 | 12 | * |
300 | 12 | * Or we just don't use VPID. |
301 | 12 | */ |
302 | 12 | if ( !(_vmx_ept_vpid_cap & VMX_VPID_INVVPID_ALL_CONTEXT) ) |
303 | 0 | _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
304 | 12 | |
305 | 12 | /* EPT A/D bits is required for PML */ |
306 | 12 | if ( !(_vmx_ept_vpid_cap & VMX_EPT_AD_BIT) ) |
307 | 12 | _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML; |
308 | 12 | } |
309 | 12 | |
310 | 12 | if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT ) |
311 | 12 | { |
312 | 12 | /* |
313 | 12 | * To use EPT we expect to be able to clear certain intercepts. |
314 | 12 | * We check VMX_BASIC_MSR[55] to correctly handle default controls. |
315 | 12 | */ |
316 | 12 | uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS; |
317 | 12 | if ( vmx_basic_msr_high & (VMX_BASIC_DEFAULT1_ZERO >> 32) ) |
318 | 12 | msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS; |
319 | 12 | rdmsr(msr, must_be_one, must_be_zero); |
320 | 12 | if ( must_be_one & (CPU_BASED_INVLPG_EXITING | |
321 | 12 | CPU_BASED_CR3_LOAD_EXITING | |
322 | 12 | CPU_BASED_CR3_STORE_EXITING) ) |
323 | 0 | _vmx_secondary_exec_control &= |
324 | 0 | ~(SECONDARY_EXEC_ENABLE_EPT | |
325 | 0 | SECONDARY_EXEC_UNRESTRICTED_GUEST); |
326 | 12 | } |
327 | 12 | |
328 | 12 | /* PML cannot be supported if EPT is not used */ |
329 | 12 | if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) ) |
330 | 0 | _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML; |
331 | 12 | |
332 | 12 | /* Turn off opt_pml_enabled if PML feature is not present */ |
333 | 12 | if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) ) |
334 | 12 | opt_pml_enabled = 0; |
335 | 12 | |
336 | 12 | if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) && |
337 | 12 | ple_gap == 0 ) |
338 | 0 | { |
339 | 0 | if ( !vmx_pin_based_exec_control ) |
340 | 0 | printk(XENLOG_INFO "Disable Pause-Loop Exiting.\n"); |
341 | 0 | _vmx_secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
342 | 0 | } |
343 | 12 | |
344 | 12 | min = VM_EXIT_ACK_INTR_ON_EXIT; |
345 | 12 | opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT | |
346 | 12 | VM_EXIT_CLEAR_BNDCFGS; |
347 | 12 | min |= VM_EXIT_IA32E_MODE; |
348 | 12 | _vmx_vmexit_control = adjust_vmx_controls( |
349 | 12 | "VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch); |
350 | 12 | |
351 | 12 | /* |
352 | 12 | * "Process posted interrupt" can be set only when "virtual-interrupt |
353 | 12 | * delivery" and "acknowledge interrupt on exit" is set. For the latter |
354 | 12 | * is a minimal requirement, only check the former, which is optional. |
355 | 12 | */ |
356 | 12 | if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ) |
357 | 0 | _vmx_pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT; |
358 | 12 | |
359 | 12 | if ( iommu_intpost && |
360 | 0 | !(_vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) ) |
361 | 0 | { |
362 | 0 | printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted " |
363 | 0 | "Interrupt is not enabled\n"); |
364 | 0 | iommu_intpost = 0; |
365 | 0 | } |
366 | 12 | |
367 | 12 | /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */ |
368 | 12 | if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS ) |
369 | 0 | { |
370 | 0 | rdmsrl(MSR_IA32_VMX_VMFUNC, _vmx_vmfunc); |
371 | 0 |
|
372 | 0 | /* |
373 | 0 | * VMFUNC leaf 0 (EPTP switching) must be supported. |
374 | 0 | * |
375 | 0 | * Or we just don't use VMFUNC. |
376 | 0 | */ |
377 | 0 | if ( !(_vmx_vmfunc & VMX_VMFUNC_EPTP_SWITCHING) ) |
378 | 0 | _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VM_FUNCTIONS; |
379 | 0 | } |
380 | 12 | |
381 | 12 | /* Virtualization exceptions are only enabled if VMFUNC is enabled */ |
382 | 12 | if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) ) |
383 | 12 | _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS; |
384 | 12 | |
385 | 12 | min = 0; |
386 | 12 | opt = VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_BNDCFGS; |
387 | 12 | _vmx_vmentry_control = adjust_vmx_controls( |
388 | 12 | "VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch); |
389 | 12 | |
390 | 12 | if ( mismatch ) |
391 | 0 | return -EINVAL; |
392 | 12 | |
393 | 12 | if ( !vmx_pin_based_exec_control ) |
394 | 1 | { |
395 | 1 | /* First time through. */ |
396 | 1 | vmcs_revision_id = vmx_basic_msr_low & VMX_BASIC_REVISION_MASK; |
397 | 1 | vmx_pin_based_exec_control = _vmx_pin_based_exec_control; |
398 | 1 | vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control; |
399 | 1 | vmx_secondary_exec_control = _vmx_secondary_exec_control; |
400 | 1 | vmx_ept_vpid_cap = _vmx_ept_vpid_cap; |
401 | 1 | vmx_vmexit_control = _vmx_vmexit_control; |
402 | 1 | vmx_vmentry_control = _vmx_vmentry_control; |
403 | 1 | vmx_basic_msr = ((u64)vmx_basic_msr_high << 32) | |
404 | 1 | vmx_basic_msr_low; |
405 | 1 | vmx_vmfunc = _vmx_vmfunc; |
406 | 1 | vmx_virt_exception = !!(_vmx_secondary_exec_control & |
407 | 1 | SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS); |
408 | 1 | vmx_display_features(); |
409 | 1 | |
410 | 1 | /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ |
411 | 1 | if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) > |
412 | 1 | PAGE_SIZE ) |
413 | 0 | { |
414 | 0 | printk("VMX: CPU%d VMCS size is too big (%Lu bytes)\n", |
415 | 0 | smp_processor_id(), |
416 | 0 | vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)); |
417 | 0 | return -EINVAL; |
418 | 0 | } |
419 | 1 | } |
420 | 12 | else |
421 | 11 | { |
422 | 11 | /* Globals are already initialised: re-check them. */ |
423 | 11 | mismatch |= cap_check( |
424 | 11 | "VMCS revision ID", |
425 | 11 | vmcs_revision_id, vmx_basic_msr_low & VMX_BASIC_REVISION_MASK); |
426 | 11 | mismatch |= cap_check( |
427 | 11 | "Pin-Based Exec Control", |
428 | 11 | vmx_pin_based_exec_control, _vmx_pin_based_exec_control); |
429 | 11 | mismatch |= cap_check( |
430 | 11 | "CPU-Based Exec Control", |
431 | 11 | vmx_cpu_based_exec_control, _vmx_cpu_based_exec_control); |
432 | 11 | mismatch |= cap_check( |
433 | 11 | "Secondary Exec Control", |
434 | 11 | vmx_secondary_exec_control, _vmx_secondary_exec_control); |
435 | 11 | mismatch |= cap_check( |
436 | 11 | "VMExit Control", |
437 | 11 | vmx_vmexit_control, _vmx_vmexit_control); |
438 | 11 | mismatch |= cap_check( |
439 | 11 | "VMEntry Control", |
440 | 11 | vmx_vmentry_control, _vmx_vmentry_control); |
441 | 11 | mismatch |= cap_check( |
442 | 11 | "EPT and VPID Capability", |
443 | 11 | vmx_ept_vpid_cap, _vmx_ept_vpid_cap); |
444 | 11 | mismatch |= cap_check( |
445 | 11 | "VMFUNC Capability", |
446 | 11 | vmx_vmfunc, _vmx_vmfunc); |
447 | 11 | if ( cpu_has_vmx_ins_outs_instr_info != |
448 | 11 | !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)) ) |
449 | 0 | { |
450 | 0 | printk("VMX INS/OUTS Instruction Info: saw %d expected %d\n", |
451 | 0 | !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)), |
452 | 0 | cpu_has_vmx_ins_outs_instr_info); |
453 | 0 | mismatch = 1; |
454 | 0 | } |
455 | 11 | if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) != |
456 | 11 | ((vmx_basic_msr & VMX_BASIC_VMCS_SIZE_MASK) >> 32) ) |
457 | 0 | { |
458 | 0 | printk("VMX: CPU%d unexpected VMCS size %Lu\n", |
459 | 0 | smp_processor_id(), |
460 | 0 | vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)); |
461 | 0 | mismatch = 1; |
462 | 0 | } |
463 | 11 | if ( mismatch ) |
464 | 0 | { |
465 | 0 | printk("VMX: Capabilities fatally differ between CPU%d and CPU0\n", |
466 | 0 | smp_processor_id()); |
467 | 0 | return -EINVAL; |
468 | 0 | } |
469 | 11 | } |
470 | 12 | |
471 | 12 | /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ |
472 | 12 | if ( vmx_basic_msr_high & (VMX_BASIC_32BIT_ADDRESSES >> 32) ) |
473 | 0 | { |
474 | 0 | printk("VMX: CPU%d limits VMX structure pointers to 32 bits\n", |
475 | 0 | smp_processor_id()); |
476 | 0 | return -EINVAL; |
477 | 0 | } |
478 | 12 | |
479 | 12 | /* Require Write-Back (WB) memory type for VMCS accesses. */ |
480 | 12 | opt = (vmx_basic_msr_high & (VMX_BASIC_MEMORY_TYPE_MASK >> 32)) / |
481 | 12 | ((VMX_BASIC_MEMORY_TYPE_MASK & -VMX_BASIC_MEMORY_TYPE_MASK) >> 32); |
482 | 12 | if ( opt != MTRR_TYPE_WRBACK ) |
483 | 0 | { |
484 | 0 | printk("VMX: CPU%d has unexpected VMCS access type %u\n", |
485 | 0 | smp_processor_id(), opt); |
486 | 0 | return -EINVAL; |
487 | 0 | } |
488 | 12 | |
489 | 12 | return 0; |
490 | 12 | } |
491 | | |
492 | | static paddr_t vmx_alloc_vmcs(void) |
493 | 24 | { |
494 | 24 | struct page_info *pg; |
495 | 24 | struct vmcs_struct *vmcs; |
496 | 24 | |
497 | 24 | if ( (pg = alloc_domheap_page(NULL, 0)) == NULL ) |
498 | 0 | { |
499 | 0 | gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n"); |
500 | 0 | return 0; |
501 | 0 | } |
502 | 24 | |
503 | 24 | vmcs = __map_domain_page(pg); |
504 | 24 | clear_page(vmcs); |
505 | 24 | vmcs->vmcs_revision_id = vmcs_revision_id; |
506 | 24 | unmap_domain_page(vmcs); |
507 | 24 | |
508 | 24 | return page_to_maddr(pg); |
509 | 24 | } |
510 | | |
511 | | static void vmx_free_vmcs(paddr_t pa) |
512 | 0 | { |
513 | 0 | free_domheap_page(maddr_to_page(pa)); |
514 | 0 | } |
515 | | |
516 | | static void __vmx_clear_vmcs(void *info) |
517 | 807 | { |
518 | 807 | struct vcpu *v = info; |
519 | 807 | struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; |
520 | 807 | |
521 | 807 | /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */ |
522 | 807 | ASSERT(!local_irq_is_enabled()); |
523 | 807 | |
524 | 807 | if ( arch_vmx->active_cpu == smp_processor_id() ) |
525 | 807 | { |
526 | 807 | __vmpclear(arch_vmx->vmcs_pa); |
527 | 807 | if ( arch_vmx->vmcs_shadow_maddr ) |
528 | 0 | __vmpclear(arch_vmx->vmcs_shadow_maddr); |
529 | 807 | |
530 | 807 | arch_vmx->active_cpu = -1; |
531 | 807 | arch_vmx->launched = 0; |
532 | 807 | |
533 | 807 | list_del(&arch_vmx->active_list); |
534 | 807 | |
535 | 807 | if ( arch_vmx->vmcs_pa == this_cpu(current_vmcs) ) |
536 | 301 | this_cpu(current_vmcs) = 0; |
537 | 807 | } |
538 | 807 | } |
539 | | |
540 | | static void vmx_clear_vmcs(struct vcpu *v) |
541 | 1.09k | { |
542 | 1.09k | int cpu = v->arch.hvm_vmx.active_cpu; |
543 | 1.09k | |
544 | 1.09k | if ( cpu != -1 ) |
545 | 807 | on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1); |
546 | 1.09k | } |
547 | | |
548 | | static void vmx_load_vmcs(struct vcpu *v) |
549 | 34.7k | { |
550 | 34.7k | unsigned long flags; |
551 | 34.7k | |
552 | 34.7k | local_irq_save(flags); |
553 | 34.7k | |
554 | 34.7k | if ( v->arch.hvm_vmx.active_cpu == -1 ) |
555 | 819 | { |
556 | 819 | list_add(&v->arch.hvm_vmx.active_list, &this_cpu(active_vmcs_list)); |
557 | 819 | v->arch.hvm_vmx.active_cpu = smp_processor_id(); |
558 | 819 | } |
559 | 34.7k | |
560 | 34.7k | ASSERT(v->arch.hvm_vmx.active_cpu == smp_processor_id()); |
561 | 34.7k | |
562 | 34.7k | __vmptrld(v->arch.hvm_vmx.vmcs_pa); |
563 | 34.7k | this_cpu(current_vmcs) = v->arch.hvm_vmx.vmcs_pa; |
564 | 34.7k | |
565 | 34.7k | local_irq_restore(flags); |
566 | 34.7k | } |
567 | | |
568 | | void vmx_vmcs_reload(struct vcpu *v) |
569 | 4.57M | { |
570 | 4.57M | /* |
571 | 4.57M | * As we may be running with interrupts disabled, we can't acquire |
572 | 4.57M | * v->arch.hvm_vmx.vmcs_lock here. However, with interrupts disabled |
573 | 4.57M | * the VMCS can't be taken away from us anymore if we still own it. |
574 | 4.57M | */ |
575 | 4.57M | ASSERT(v->is_running || !local_irq_is_enabled()); |
576 | 4.57M | if ( v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs) ) |
577 | 4.56M | return; |
578 | 4.57M | |
579 | 11.0k | vmx_load_vmcs(v); |
580 | 11.0k | } |
581 | | |
582 | | int vmx_cpu_up_prepare(unsigned int cpu) |
583 | 12 | { |
584 | 12 | /* |
585 | 12 | * If nvmx_cpu_up_prepare() failed, do not return failure and just fallback |
586 | 12 | * to legacy mode for vvmcs synchronization. |
587 | 12 | */ |
588 | 12 | if ( nvmx_cpu_up_prepare(cpu) != 0 ) |
589 | 0 | printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu); |
590 | 12 | |
591 | 12 | if ( per_cpu(vmxon_region, cpu) ) |
592 | 0 | return 0; |
593 | 12 | |
594 | 12 | per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs(); |
595 | 12 | if ( per_cpu(vmxon_region, cpu) ) |
596 | 12 | return 0; |
597 | 12 | |
598 | 0 | printk("CPU%d: Could not allocate host VMCS\n", cpu); |
599 | 0 | nvmx_cpu_dead(cpu); |
600 | 0 | return -ENOMEM; |
601 | 12 | } |
602 | | |
603 | | void vmx_cpu_dead(unsigned int cpu) |
604 | 0 | { |
605 | 0 | vmx_free_vmcs(per_cpu(vmxon_region, cpu)); |
606 | 0 | per_cpu(vmxon_region, cpu) = 0; |
607 | 0 | nvmx_cpu_dead(cpu); |
608 | 0 | vmx_pi_desc_fixup(cpu); |
609 | 0 | } |
610 | | |
611 | | int _vmx_cpu_up(bool bsp) |
612 | 12 | { |
613 | 12 | u32 eax, edx; |
614 | 12 | int rc, bios_locked, cpu = smp_processor_id(); |
615 | 12 | u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1; |
616 | 12 | |
617 | 12 | BUG_ON(!(read_cr4() & X86_CR4_VMXE)); |
618 | 12 | |
619 | 12 | /* |
620 | 12 | * Ensure the current processor operating mode meets |
621 | 12 | * the requred CRO fixed bits in VMX operation. |
622 | 12 | */ |
623 | 12 | cr0 = read_cr0(); |
624 | 12 | rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0); |
625 | 12 | rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1); |
626 | 12 | if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) ) |
627 | 0 | { |
628 | 0 | printk("CPU%d: some settings of host CR0 are " |
629 | 0 | "not allowed in VMX operation.\n", cpu); |
630 | 0 | return -EINVAL; |
631 | 0 | } |
632 | 12 | |
633 | 12 | rdmsr(MSR_IA32_FEATURE_CONTROL, eax, edx); |
634 | 12 | |
635 | 12 | bios_locked = !!(eax & IA32_FEATURE_CONTROL_LOCK); |
636 | 12 | if ( bios_locked ) |
637 | 12 | { |
638 | 12 | if ( !(eax & (tboot_in_measured_env() |
639 | 0 | ? IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX |
640 | 12 | : IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX)) ) |
641 | 0 | { |
642 | 0 | printk("CPU%d: VMX disabled by BIOS.\n", cpu); |
643 | 0 | return -EINVAL; |
644 | 0 | } |
645 | 12 | } |
646 | 12 | else |
647 | 0 | { |
648 | 0 | eax = IA32_FEATURE_CONTROL_LOCK; |
649 | 0 | eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX; |
650 | 0 | if ( test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) ) |
651 | 0 | eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX; |
652 | 0 | wrmsr(MSR_IA32_FEATURE_CONTROL, eax, 0); |
653 | 0 | } |
654 | 12 | |
655 | 12 | if ( (rc = vmx_init_vmcs_config()) != 0 ) |
656 | 0 | return rc; |
657 | 12 | |
658 | 12 | INIT_LIST_HEAD(&this_cpu(active_vmcs_list)); |
659 | 12 | |
660 | 12 | if ( bsp && (rc = vmx_cpu_up_prepare(cpu)) != 0 ) |
661 | 0 | return rc; |
662 | 12 | |
663 | 12 | switch ( __vmxon(this_cpu(vmxon_region)) ) |
664 | 12 | { |
665 | 0 | case -2: /* #UD or #GP */ |
666 | 0 | if ( bios_locked && |
667 | 0 | test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) && |
668 | 0 | (!(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX) || |
669 | 0 | !(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX)) ) |
670 | 0 | { |
671 | 0 | printk("CPU%d: VMXON failed: perhaps because of TXT settings " |
672 | 0 | "in your BIOS configuration?\n", cpu); |
673 | 0 | printk(" --> Disable TXT in your BIOS unless using a secure " |
674 | 0 | "bootloader.\n"); |
675 | 0 | return -EINVAL; |
676 | 0 | } |
677 | 0 | /* fall through */ |
678 | 0 | case -1: /* CF==1 or ZF==1 */ |
679 | 0 | printk("CPU%d: unexpected VMXON failure\n", cpu); |
680 | 0 | return -EINVAL; |
681 | 12 | case 0: /* success */ |
682 | 12 | this_cpu(vmxon) = 1; |
683 | 12 | break; |
684 | 0 | default: |
685 | 0 | BUG(); |
686 | 12 | } |
687 | 12 | |
688 | 12 | hvm_asid_init(cpu_has_vmx_vpid ? (1u << VMCS_VPID_WIDTH) : 0); |
689 | 12 | |
690 | 12 | if ( cpu_has_vmx_ept ) |
691 | 12 | ept_sync_all(); |
692 | 12 | |
693 | 12 | if ( cpu_has_vmx_vpid ) |
694 | 12 | vpid_sync_all(); |
695 | 12 | |
696 | 12 | vmx_pi_per_cpu_init(cpu); |
697 | 12 | |
698 | 12 | return 0; |
699 | 12 | } |
700 | | |
701 | | int vmx_cpu_up() |
702 | 11 | { |
703 | 11 | return _vmx_cpu_up(false); |
704 | 11 | } |
705 | | |
706 | | void vmx_cpu_down(void) |
707 | 0 | { |
708 | 0 | struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list); |
709 | 0 | unsigned long flags; |
710 | 0 |
|
711 | 0 | if ( !this_cpu(vmxon) ) |
712 | 0 | return; |
713 | 0 |
|
714 | 0 | local_irq_save(flags); |
715 | 0 |
|
716 | 0 | while ( !list_empty(active_vmcs_list) ) |
717 | 0 | __vmx_clear_vmcs(list_entry(active_vmcs_list->next, |
718 | 0 | struct vcpu, arch.hvm_vmx.active_list)); |
719 | 0 |
|
720 | 0 | BUG_ON(!(read_cr4() & X86_CR4_VMXE)); |
721 | 0 | this_cpu(vmxon) = 0; |
722 | 0 | __vmxoff(); |
723 | 0 |
|
724 | 0 | local_irq_restore(flags); |
725 | 0 | } |
726 | | |
727 | | struct foreign_vmcs { |
728 | | struct vcpu *v; |
729 | | unsigned int count; |
730 | | }; |
731 | | static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs); |
732 | | |
733 | | bool_t vmx_vmcs_try_enter(struct vcpu *v) |
734 | 492k | { |
735 | 492k | struct foreign_vmcs *fv; |
736 | 492k | |
737 | 492k | /* |
738 | 492k | * NB. We must *always* run an HVM VCPU on its own VMCS, except for |
739 | 492k | * vmx_vmcs_enter/exit and scheduling tail critical regions. |
740 | 492k | */ |
741 | 492k | if ( likely(v == current) ) |
742 | 492k | return v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs); |
743 | 492k | |
744 | 305 | fv = &this_cpu(foreign_vmcs); |
745 | 305 | |
746 | 305 | if ( fv->v == v ) |
747 | 24 | { |
748 | 24 | BUG_ON(fv->count == 0); |
749 | 24 | } |
750 | 305 | else |
751 | 281 | { |
752 | 281 | BUG_ON(fv->v != NULL); |
753 | 281 | BUG_ON(fv->count != 0); |
754 | 281 | |
755 | 281 | vcpu_pause(v); |
756 | 281 | spin_lock(&v->arch.hvm_vmx.vmcs_lock); |
757 | 281 | |
758 | 281 | vmx_clear_vmcs(v); |
759 | 281 | vmx_load_vmcs(v); |
760 | 281 | |
761 | 281 | fv->v = v; |
762 | 281 | } |
763 | 305 | |
764 | 305 | fv->count++; |
765 | 305 | |
766 | 305 | return 1; |
767 | 492k | } |
768 | | |
769 | | void vmx_vmcs_enter(struct vcpu *v) |
770 | 312k | { |
771 | 312k | bool_t okay = vmx_vmcs_try_enter(v); |
772 | 312k | |
773 | 312k | ASSERT(okay); |
774 | 312k | } |
775 | | |
776 | | void vmx_vmcs_exit(struct vcpu *v) |
777 | 493k | { |
778 | 493k | struct foreign_vmcs *fv; |
779 | 493k | |
780 | 493k | if ( likely(v == current) ) |
781 | 492k | return; |
782 | 493k | |
783 | 258 | fv = &this_cpu(foreign_vmcs); |
784 | 258 | BUG_ON(fv->v != v); |
785 | 258 | BUG_ON(fv->count == 0); |
786 | 258 | |
787 | 258 | if ( --fv->count == 0 ) |
788 | 271 | { |
789 | 271 | /* Don't confuse vmx_do_resume (for @v or @current!) */ |
790 | 271 | vmx_clear_vmcs(v); |
791 | 271 | if ( is_hvm_vcpu(current) ) |
792 | 0 | vmx_load_vmcs(current); |
793 | 271 | |
794 | 271 | spin_unlock(&v->arch.hvm_vmx.vmcs_lock); |
795 | 271 | vcpu_unpause(v); |
796 | 271 | |
797 | 271 | fv->v = NULL; |
798 | 271 | } |
799 | 258 | } |
800 | | |
801 | | static void vmx_set_host_env(struct vcpu *v) |
802 | 548 | { |
803 | 548 | unsigned int cpu = smp_processor_id(); |
804 | 548 | |
805 | 548 | __vmwrite(HOST_GDTR_BASE, |
806 | 548 | (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY)); |
807 | 548 | __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]); |
808 | 548 | |
809 | 548 | __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3); |
810 | 548 | __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(init_tss, cpu)); |
811 | 548 | |
812 | 548 | __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom()); |
813 | 548 | |
814 | 548 | /* |
815 | 548 | * Skip end of cpu_user_regs when entering the hypervisor because the |
816 | 548 | * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc |
817 | 548 | * all get saved into the VMCS instead. |
818 | 548 | */ |
819 | 548 | __vmwrite(HOST_RSP, |
820 | 548 | (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code); |
821 | 548 | } |
822 | | |
823 | | void vmx_clear_msr_intercept(struct vcpu *v, unsigned int msr, |
824 | | enum vmx_msr_intercept_type type) |
825 | 1.51k | { |
826 | 1.51k | struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm_vmx.msr_bitmap; |
827 | 1.51k | struct domain *d = v->domain; |
828 | 1.51k | |
829 | 1.51k | /* VMX MSR bitmap supported? */ |
830 | 1.51k | if ( msr_bitmap == NULL ) |
831 | 0 | return; |
832 | 1.51k | |
833 | 1.51k | if ( unlikely(monitored_msr(d, msr)) ) |
834 | 0 | return; |
835 | 1.51k | |
836 | 1.51k | if ( msr <= 0x1fff ) |
837 | 1.42k | { |
838 | 1.42k | if ( type & VMX_MSR_R ) |
839 | 1.40k | clear_bit(msr, msr_bitmap->read_low); |
840 | 1.42k | if ( type & VMX_MSR_W ) |
841 | 104 | clear_bit(msr, msr_bitmap->write_low); |
842 | 1.42k | } |
843 | 94 | else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) |
844 | 36 | { |
845 | 36 | msr &= 0x1fff; |
846 | 36 | if ( type & VMX_MSR_R ) |
847 | 36 | clear_bit(msr, msr_bitmap->read_high); |
848 | 36 | if ( type & VMX_MSR_W ) |
849 | 36 | clear_bit(msr, msr_bitmap->write_high); |
850 | 36 | } |
851 | 94 | else |
852 | 58 | ASSERT(!"MSR out of range for interception\n"); |
853 | 1.51k | } |
854 | | |
855 | | void vmx_set_msr_intercept(struct vcpu *v, unsigned int msr, |
856 | | enum vmx_msr_intercept_type type) |
857 | 3.12k | { |
858 | 3.12k | struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm_vmx.msr_bitmap; |
859 | 3.12k | |
860 | 3.12k | /* VMX MSR bitmap supported? */ |
861 | 3.12k | if ( msr_bitmap == NULL ) |
862 | 0 | return; |
863 | 3.12k | |
864 | 3.12k | if ( msr <= 0x1fff ) |
865 | 3.12k | { |
866 | 3.12k | if ( type & VMX_MSR_R ) |
867 | 3.12k | set_bit(msr, msr_bitmap->read_low); |
868 | 3.12k | if ( type & VMX_MSR_W ) |
869 | 3.09k | set_bit(msr, msr_bitmap->write_low); |
870 | 3.12k | } |
871 | 18.4E | else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) |
872 | 0 | { |
873 | 0 | msr &= 0x1fff; |
874 | 0 | if ( type & VMX_MSR_R ) |
875 | 0 | set_bit(msr, msr_bitmap->read_high); |
876 | 0 | if ( type & VMX_MSR_W ) |
877 | 0 | set_bit(msr, msr_bitmap->write_high); |
878 | 0 | } |
879 | 18.4E | else |
880 | 18.4E | ASSERT(!"MSR out of range for interception\n"); |
881 | 3.12k | } |
882 | | |
883 | | bool vmx_msr_is_intercepted(struct vmx_msr_bitmap *msr_bitmap, |
884 | | unsigned int msr, bool is_write) |
885 | 0 | { |
886 | 0 | if ( msr <= 0x1fff ) |
887 | 0 | return test_bit(msr, is_write ? msr_bitmap->write_low |
888 | 0 | : msr_bitmap->read_low); |
889 | 0 | else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) |
890 | 0 | return test_bit(msr & 0x1fff, is_write ? msr_bitmap->write_high |
891 | 0 | : msr_bitmap->read_high); |
892 | 0 | else |
893 | 0 | /* MSRs outside the bitmap ranges are always intercepted. */ |
894 | 0 | return true; |
895 | 0 | } |
896 | | |
897 | | |
898 | | /* |
899 | | * Switch VMCS between layer 1 & 2 guest |
900 | | */ |
901 | | void vmx_vmcs_switch(paddr_t from, paddr_t to) |
902 | 0 | { |
903 | 0 | struct arch_vmx_struct *vmx = ¤t->arch.hvm_vmx; |
904 | 0 | spin_lock(&vmx->vmcs_lock); |
905 | 0 |
|
906 | 0 | __vmpclear(from); |
907 | 0 | if ( vmx->vmcs_shadow_maddr ) |
908 | 0 | __vmpclear(vmx->vmcs_shadow_maddr); |
909 | 0 | __vmptrld(to); |
910 | 0 |
|
911 | 0 | vmx->vmcs_pa = to; |
912 | 0 | vmx->launched = 0; |
913 | 0 | this_cpu(current_vmcs) = to; |
914 | 0 |
|
915 | 0 | if ( vmx->hostenv_migrated ) |
916 | 0 | { |
917 | 0 | vmx->hostenv_migrated = 0; |
918 | 0 | vmx_set_host_env(current); |
919 | 0 | } |
920 | 0 |
|
921 | 0 | spin_unlock(&vmx->vmcs_lock); |
922 | 0 | } |
923 | | |
924 | | void virtual_vmcs_enter(const struct vcpu *v) |
925 | 0 | { |
926 | 0 | __vmptrld(v->arch.hvm_vmx.vmcs_shadow_maddr); |
927 | 0 | } |
928 | | |
929 | | void virtual_vmcs_exit(const struct vcpu *v) |
930 | 0 | { |
931 | 0 | paddr_t cur = this_cpu(current_vmcs); |
932 | 0 |
|
933 | 0 | __vmpclear(v->arch.hvm_vmx.vmcs_shadow_maddr); |
934 | 0 | if ( cur ) |
935 | 0 | __vmptrld(cur); |
936 | 0 | } |
937 | | |
938 | | u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding) |
939 | 0 | { |
940 | 0 | u64 res; |
941 | 0 |
|
942 | 0 | virtual_vmcs_enter(v); |
943 | 0 | __vmread(vmcs_encoding, &res); |
944 | 0 | virtual_vmcs_exit(v); |
945 | 0 |
|
946 | 0 | return res; |
947 | 0 | } |
948 | | |
949 | | enum vmx_insn_errno virtual_vmcs_vmread_safe(const struct vcpu *v, |
950 | | u32 vmcs_encoding, u64 *val) |
951 | 0 | { |
952 | 0 | enum vmx_insn_errno ret; |
953 | 0 |
|
954 | 0 | virtual_vmcs_enter(v); |
955 | 0 | ret = vmread_safe(vmcs_encoding, val); |
956 | 0 | virtual_vmcs_exit(v); |
957 | 0 |
|
958 | 0 | return ret; |
959 | 0 | } |
960 | | |
961 | | void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val) |
962 | 0 | { |
963 | 0 | virtual_vmcs_enter(v); |
964 | 0 | __vmwrite(vmcs_encoding, val); |
965 | 0 | virtual_vmcs_exit(v); |
966 | 0 | } |
967 | | |
968 | | enum vmx_insn_errno virtual_vmcs_vmwrite_safe(const struct vcpu *v, |
969 | | u32 vmcs_encoding, u64 val) |
970 | 0 | { |
971 | 0 | enum vmx_insn_errno ret; |
972 | 0 |
|
973 | 0 | virtual_vmcs_enter(v); |
974 | 0 | ret = vmwrite_safe(vmcs_encoding, val); |
975 | 0 | virtual_vmcs_exit(v); |
976 | 0 |
|
977 | 0 | return ret; |
978 | 0 | } |
979 | | |
980 | | /* |
981 | | * This function is only called in a vCPU's initialization phase, |
982 | | * so we can update the posted-interrupt descriptor in non-atomic way. |
983 | | */ |
984 | | static void pi_desc_init(struct vcpu *v) |
985 | 0 | { |
986 | 0 | v->arch.hvm_vmx.pi_desc.nv = posted_intr_vector; |
987 | 0 |
|
988 | 0 | /* |
989 | 0 | * Mark NDST as invalid, then we can use this invalid value as a |
990 | 0 | * marker to whether update NDST or not in vmx_pi_hooks_assign(). |
991 | 0 | */ |
992 | 0 | v->arch.hvm_vmx.pi_desc.ndst = APIC_INVALID_DEST; |
993 | 0 | } |
994 | | |
995 | | static int construct_vmcs(struct vcpu *v) |
996 | 12 | { |
997 | 12 | struct domain *d = v->domain; |
998 | 12 | uint16_t sysenter_cs; |
999 | 12 | unsigned long sysenter_eip; |
1000 | 12 | u32 vmexit_ctl = vmx_vmexit_control; |
1001 | 12 | u32 vmentry_ctl = vmx_vmentry_control; |
1002 | 12 | |
1003 | 12 | vmx_vmcs_enter(v); |
1004 | 12 | |
1005 | 12 | /* VMCS controls. */ |
1006 | 12 | __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); |
1007 | 12 | |
1008 | 12 | v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; |
1009 | 12 | if ( d->arch.vtsc && !cpu_has_vmx_tsc_scaling ) |
1010 | 0 | v->arch.hvm_vmx.exec_control |= CPU_BASED_RDTSC_EXITING; |
1011 | 12 | |
1012 | 12 | v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; |
1013 | 12 | |
1014 | 12 | /* |
1015 | 12 | * Disable descriptor table exiting: It's controlled by the VM event |
1016 | 12 | * monitor requesting it. |
1017 | 12 | */ |
1018 | 12 | v->arch.hvm_vmx.secondary_exec_control &= |
1019 | 12 | ~SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING; |
1020 | 12 | |
1021 | 12 | /* Disable VPID for now: we decide when to enable it on VMENTER. */ |
1022 | 12 | v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
1023 | 12 | |
1024 | 12 | if ( paging_mode_hap(d) ) |
1025 | 12 | { |
1026 | 12 | v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING | |
1027 | 12 | CPU_BASED_CR3_LOAD_EXITING | |
1028 | 12 | CPU_BASED_CR3_STORE_EXITING); |
1029 | 12 | } |
1030 | 12 | else |
1031 | 0 | { |
1032 | 0 | v->arch.hvm_vmx.secondary_exec_control &= |
1033 | 0 | ~(SECONDARY_EXEC_ENABLE_EPT | |
1034 | 0 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1035 | 0 | SECONDARY_EXEC_ENABLE_INVPCID); |
1036 | 0 | vmexit_ctl &= ~(VM_EXIT_SAVE_GUEST_PAT | |
1037 | 0 | VM_EXIT_LOAD_HOST_PAT); |
1038 | 0 | vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT; |
1039 | 0 | } |
1040 | 12 | |
1041 | 12 | /* Disable Virtualize x2APIC mode by default. */ |
1042 | 12 | v->arch.hvm_vmx.secondary_exec_control &= |
1043 | 12 | ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
1044 | 12 | |
1045 | 12 | /* Do not enable Monitor Trap Flag unless start single step debug */ |
1046 | 12 | v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; |
1047 | 12 | |
1048 | 12 | /* Disable VMFUNC and #VE for now: they may be enabled later by altp2m. */ |
1049 | 12 | v->arch.hvm_vmx.secondary_exec_control &= |
1050 | 12 | ~(SECONDARY_EXEC_ENABLE_VM_FUNCTIONS | |
1051 | 12 | SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS); |
1052 | 12 | |
1053 | 12 | if ( !has_vlapic(d) ) |
1054 | 0 | { |
1055 | 0 | /* Disable virtual apics, TPR */ |
1056 | 0 | v->arch.hvm_vmx.secondary_exec_control &= |
1057 | 0 | ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
1058 | 0 | | SECONDARY_EXEC_APIC_REGISTER_VIRT |
1059 | 0 | | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
1060 | 0 | v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW; |
1061 | 0 |
|
1062 | 0 | /* In turn, disable posted interrupts. */ |
1063 | 0 | __vmwrite(PIN_BASED_VM_EXEC_CONTROL, |
1064 | 0 | vmx_pin_based_exec_control & ~PIN_BASED_POSTED_INTERRUPT); |
1065 | 0 | } |
1066 | 12 | |
1067 | 12 | vmx_update_cpu_exec_control(v); |
1068 | 12 | |
1069 | 12 | __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); |
1070 | 12 | __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); |
1071 | 12 | |
1072 | 12 | if ( cpu_has_vmx_ple ) |
1073 | 12 | { |
1074 | 12 | __vmwrite(PLE_GAP, ple_gap); |
1075 | 12 | __vmwrite(PLE_WINDOW, ple_window); |
1076 | 12 | } |
1077 | 12 | |
1078 | 12 | if ( cpu_has_vmx_secondary_exec_control ) |
1079 | 12 | __vmwrite(SECONDARY_VM_EXEC_CONTROL, |
1080 | 12 | v->arch.hvm_vmx.secondary_exec_control); |
1081 | 12 | |
1082 | 12 | /* MSR access bitmap. */ |
1083 | 12 | if ( cpu_has_vmx_msr_bitmap ) |
1084 | 12 | { |
1085 | 12 | struct vmx_msr_bitmap *msr_bitmap = alloc_xenheap_page(); |
1086 | 12 | |
1087 | 12 | if ( msr_bitmap == NULL ) |
1088 | 0 | { |
1089 | 0 | vmx_vmcs_exit(v); |
1090 | 0 | return -ENOMEM; |
1091 | 0 | } |
1092 | 12 | |
1093 | 12 | memset(msr_bitmap, ~0, PAGE_SIZE); |
1094 | 12 | v->arch.hvm_vmx.msr_bitmap = msr_bitmap; |
1095 | 12 | __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); |
1096 | 12 | |
1097 | 12 | vmx_clear_msr_intercept(v, MSR_FS_BASE, VMX_MSR_RW); |
1098 | 12 | vmx_clear_msr_intercept(v, MSR_GS_BASE, VMX_MSR_RW); |
1099 | 12 | vmx_clear_msr_intercept(v, MSR_SHADOW_GS_BASE, VMX_MSR_RW); |
1100 | 12 | vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW); |
1101 | 12 | vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW); |
1102 | 12 | vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW); |
1103 | 12 | if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) ) |
1104 | 12 | vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW); |
1105 | 12 | if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) && |
1106 | 0 | (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) ) |
1107 | 0 | vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW); |
1108 | 12 | } |
1109 | 12 | |
1110 | 12 | /* I/O access bitmap. */ |
1111 | 12 | __vmwrite(IO_BITMAP_A, __pa(d->arch.hvm_domain.io_bitmap)); |
1112 | 12 | __vmwrite(IO_BITMAP_B, __pa(d->arch.hvm_domain.io_bitmap) + PAGE_SIZE); |
1113 | 12 | |
1114 | 12 | if ( cpu_has_vmx_virtual_intr_delivery ) |
1115 | 12 | { |
1116 | 12 | unsigned int i; |
1117 | 12 | |
1118 | 12 | /* EOI-exit bitmap */ |
1119 | 12 | bitmap_zero(v->arch.hvm_vmx.eoi_exit_bitmap, NR_VECTORS); |
1120 | 60 | for ( i = 0; i < ARRAY_SIZE(v->arch.hvm_vmx.eoi_exit_bitmap); ++i ) |
1121 | 48 | __vmwrite(EOI_EXIT_BITMAP(i), 0); |
1122 | 12 | |
1123 | 12 | /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */ |
1124 | 12 | __vmwrite(GUEST_INTR_STATUS, 0); |
1125 | 12 | } |
1126 | 12 | |
1127 | 12 | if ( cpu_has_vmx_posted_intr_processing ) |
1128 | 12 | { |
1129 | 12 | if ( iommu_intpost ) |
1130 | 0 | pi_desc_init(v); |
1131 | 12 | |
1132 | 12 | __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc)); |
1133 | 12 | __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector); |
1134 | 12 | } |
1135 | 12 | |
1136 | 12 | /* Disable PML anyway here as it will only be enabled in log dirty mode */ |
1137 | 12 | v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML; |
1138 | 12 | |
1139 | 12 | /* Host data selectors. */ |
1140 | 12 | __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); |
1141 | 12 | __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); |
1142 | 12 | __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); |
1143 | 12 | __vmwrite(HOST_FS_SELECTOR, 0); |
1144 | 12 | __vmwrite(HOST_GS_SELECTOR, 0); |
1145 | 12 | __vmwrite(HOST_FS_BASE, 0); |
1146 | 12 | __vmwrite(HOST_GS_BASE, 0); |
1147 | 12 | |
1148 | 12 | /* Host control registers. */ |
1149 | 12 | v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; |
1150 | 12 | __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); |
1151 | 12 | __vmwrite(HOST_CR4, mmu_cr4_features); |
1152 | 12 | |
1153 | 12 | /* Host CS:RIP. */ |
1154 | 12 | __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); |
1155 | 12 | __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); |
1156 | 12 | |
1157 | 12 | /* Host SYSENTER CS:RIP. */ |
1158 | 12 | rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs); |
1159 | 12 | __vmwrite(HOST_SYSENTER_CS, sysenter_cs); |
1160 | 12 | rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip); |
1161 | 12 | __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); |
1162 | 12 | |
1163 | 12 | /* MSR intercepts. */ |
1164 | 12 | __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); |
1165 | 12 | __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); |
1166 | 12 | __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); |
1167 | 12 | |
1168 | 12 | __vmwrite(VM_ENTRY_INTR_INFO, 0); |
1169 | 12 | |
1170 | 12 | __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); |
1171 | 12 | __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); |
1172 | 12 | |
1173 | 12 | __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); |
1174 | 12 | __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); |
1175 | 12 | |
1176 | 12 | __vmwrite(CR3_TARGET_COUNT, 0); |
1177 | 12 | |
1178 | 12 | __vmwrite(GUEST_ACTIVITY_STATE, 0); |
1179 | 12 | |
1180 | 12 | /* Guest segment bases. */ |
1181 | 12 | __vmwrite(GUEST_ES_BASE, 0); |
1182 | 12 | __vmwrite(GUEST_SS_BASE, 0); |
1183 | 12 | __vmwrite(GUEST_DS_BASE, 0); |
1184 | 12 | __vmwrite(GUEST_FS_BASE, 0); |
1185 | 12 | __vmwrite(GUEST_GS_BASE, 0); |
1186 | 12 | __vmwrite(GUEST_CS_BASE, 0); |
1187 | 12 | |
1188 | 12 | /* Guest segment limits. */ |
1189 | 12 | __vmwrite(GUEST_ES_LIMIT, ~0u); |
1190 | 12 | __vmwrite(GUEST_SS_LIMIT, ~0u); |
1191 | 12 | __vmwrite(GUEST_DS_LIMIT, ~0u); |
1192 | 12 | __vmwrite(GUEST_FS_LIMIT, ~0u); |
1193 | 12 | __vmwrite(GUEST_GS_LIMIT, ~0u); |
1194 | 12 | __vmwrite(GUEST_CS_LIMIT, ~0u); |
1195 | 12 | |
1196 | 12 | /* Guest segment AR bytes. */ |
1197 | 12 | __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */ |
1198 | 12 | __vmwrite(GUEST_SS_AR_BYTES, 0xc093); |
1199 | 12 | __vmwrite(GUEST_DS_AR_BYTES, 0xc093); |
1200 | 12 | __vmwrite(GUEST_FS_AR_BYTES, 0xc093); |
1201 | 12 | __vmwrite(GUEST_GS_AR_BYTES, 0xc093); |
1202 | 12 | __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ |
1203 | 12 | |
1204 | 12 | /* Guest IDT. */ |
1205 | 12 | __vmwrite(GUEST_IDTR_BASE, 0); |
1206 | 12 | __vmwrite(GUEST_IDTR_LIMIT, 0); |
1207 | 12 | |
1208 | 12 | /* Guest GDT. */ |
1209 | 12 | __vmwrite(GUEST_GDTR_BASE, 0); |
1210 | 12 | __vmwrite(GUEST_GDTR_LIMIT, 0); |
1211 | 12 | |
1212 | 12 | /* Guest LDT. */ |
1213 | 12 | __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */ |
1214 | 12 | __vmwrite(GUEST_LDTR_SELECTOR, 0); |
1215 | 12 | __vmwrite(GUEST_LDTR_BASE, 0); |
1216 | 12 | __vmwrite(GUEST_LDTR_LIMIT, 0); |
1217 | 12 | |
1218 | 12 | /* Guest TSS. */ |
1219 | 12 | __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */ |
1220 | 12 | __vmwrite(GUEST_TR_BASE, 0); |
1221 | 12 | __vmwrite(GUEST_TR_LIMIT, 0xff); |
1222 | 12 | |
1223 | 12 | __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); |
1224 | 12 | __vmwrite(GUEST_DR7, 0); |
1225 | 12 | __vmwrite(VMCS_LINK_POINTER, ~0UL); |
1226 | 12 | |
1227 | 12 | v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK |
1228 | 12 | | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault)) |
1229 | 12 | | (1U << TRAP_no_device); |
1230 | 12 | vmx_update_exception_bitmap(v); |
1231 | 12 | |
1232 | 12 | v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; |
1233 | 12 | hvm_update_guest_cr(v, 0); |
1234 | 12 | |
1235 | 12 | v->arch.hvm_vcpu.guest_cr[4] = 0; |
1236 | 12 | hvm_update_guest_cr(v, 4); |
1237 | 12 | |
1238 | 12 | if ( cpu_has_vmx_tpr_shadow ) |
1239 | 12 | { |
1240 | 12 | __vmwrite(VIRTUAL_APIC_PAGE_ADDR, |
1241 | 12 | page_to_maddr(vcpu_vlapic(v)->regs_page)); |
1242 | 12 | __vmwrite(TPR_THRESHOLD, 0); |
1243 | 12 | } |
1244 | 12 | |
1245 | 12 | if ( paging_mode_hap(d) ) |
1246 | 12 | { |
1247 | 12 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1248 | 12 | struct ept_data *ept = &p2m->ept; |
1249 | 12 | |
1250 | 12 | ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m)); |
1251 | 12 | __vmwrite(EPT_POINTER, ept->eptp); |
1252 | 12 | } |
1253 | 12 | |
1254 | 12 | if ( paging_mode_hap(d) ) |
1255 | 12 | { |
1256 | 12 | u64 host_pat, guest_pat; |
1257 | 12 | |
1258 | 12 | rdmsrl(MSR_IA32_CR_PAT, host_pat); |
1259 | 12 | guest_pat = MSR_IA32_CR_PAT_RESET; |
1260 | 12 | |
1261 | 12 | __vmwrite(HOST_PAT, host_pat); |
1262 | 12 | __vmwrite(GUEST_PAT, guest_pat); |
1263 | 12 | } |
1264 | 12 | if ( cpu_has_vmx_mpx ) |
1265 | 0 | __vmwrite(GUEST_BNDCFGS, 0); |
1266 | 12 | if ( cpu_has_vmx_xsaves ) |
1267 | 0 | __vmwrite(XSS_EXIT_BITMAP, 0); |
1268 | 12 | |
1269 | 12 | if ( cpu_has_vmx_tsc_scaling ) |
1270 | 0 | __vmwrite(TSC_MULTIPLIER, d->arch.hvm_domain.tsc_scaling_ratio); |
1271 | 12 | |
1272 | 12 | vmx_vmcs_exit(v); |
1273 | 12 | |
1274 | 12 | /* will update HOST & GUEST_CR3 as reqd */ |
1275 | 12 | paging_update_paging_modes(v); |
1276 | 12 | |
1277 | 12 | vmx_vlapic_msr_changed(v); |
1278 | 12 | |
1279 | 12 | return 0; |
1280 | 12 | } |
1281 | | |
1282 | | static int vmx_msr_entry_key_cmp(const void *key, const void *elt) |
1283 | 0 | { |
1284 | 0 | const u32 *msr = key; |
1285 | 0 | const struct vmx_msr_entry *entry = elt; |
1286 | 0 |
|
1287 | 0 | if ( *msr > entry->index ) |
1288 | 0 | return 1; |
1289 | 0 | if ( *msr < entry->index ) |
1290 | 0 | return -1; |
1291 | 0 |
|
1292 | 0 | return 0; |
1293 | 0 | } |
1294 | | |
1295 | | struct vmx_msr_entry *vmx_find_msr(u32 msr, int type) |
1296 | 2 | { |
1297 | 2 | struct vcpu *curr = current; |
1298 | 2 | unsigned int msr_count; |
1299 | 2 | struct vmx_msr_entry *msr_area; |
1300 | 2 | |
1301 | 2 | if ( type == VMX_GUEST_MSR ) |
1302 | 2 | { |
1303 | 2 | msr_count = curr->arch.hvm_vmx.msr_count; |
1304 | 2 | msr_area = curr->arch.hvm_vmx.msr_area; |
1305 | 2 | } |
1306 | 2 | else |
1307 | 0 | { |
1308 | 0 | ASSERT(type == VMX_HOST_MSR); |
1309 | 0 | msr_count = curr->arch.hvm_vmx.host_msr_count; |
1310 | 0 | msr_area = curr->arch.hvm_vmx.host_msr_area; |
1311 | 0 | } |
1312 | 2 | |
1313 | 2 | if ( msr_area == NULL ) |
1314 | 2 | return NULL; |
1315 | 2 | |
1316 | 0 | return bsearch(&msr, msr_area, msr_count, sizeof(struct vmx_msr_entry), |
1317 | 0 | vmx_msr_entry_key_cmp); |
1318 | 2 | } |
1319 | | |
1320 | | int vmx_read_guest_msr(u32 msr, u64 *val) |
1321 | 0 | { |
1322 | 0 | struct vmx_msr_entry *ent; |
1323 | 0 |
|
1324 | 0 | if ( (ent = vmx_find_msr(msr, VMX_GUEST_MSR)) != NULL ) |
1325 | 0 | { |
1326 | 0 | *val = ent->data; |
1327 | 0 | return 0; |
1328 | 0 | } |
1329 | 0 |
|
1330 | 0 | return -ESRCH; |
1331 | 0 | } |
1332 | | |
1333 | | int vmx_write_guest_msr(u32 msr, u64 val) |
1334 | 2 | { |
1335 | 2 | struct vmx_msr_entry *ent; |
1336 | 2 | |
1337 | 2 | if ( (ent = vmx_find_msr(msr, VMX_GUEST_MSR)) != NULL ) |
1338 | 0 | { |
1339 | 0 | ent->data = val; |
1340 | 0 | return 0; |
1341 | 0 | } |
1342 | 2 | |
1343 | 2 | return -ESRCH; |
1344 | 2 | } |
1345 | | |
1346 | | int vmx_add_msr(u32 msr, int type) |
1347 | 0 | { |
1348 | 0 | struct vcpu *curr = current; |
1349 | 0 | unsigned int idx, *msr_count; |
1350 | 0 | struct vmx_msr_entry **msr_area, *msr_area_elem; |
1351 | 0 |
|
1352 | 0 | if ( type == VMX_GUEST_MSR ) |
1353 | 0 | { |
1354 | 0 | msr_count = &curr->arch.hvm_vmx.msr_count; |
1355 | 0 | msr_area = &curr->arch.hvm_vmx.msr_area; |
1356 | 0 | } |
1357 | 0 | else |
1358 | 0 | { |
1359 | 0 | ASSERT(type == VMX_HOST_MSR); |
1360 | 0 | msr_count = &curr->arch.hvm_vmx.host_msr_count; |
1361 | 0 | msr_area = &curr->arch.hvm_vmx.host_msr_area; |
1362 | 0 | } |
1363 | 0 |
|
1364 | 0 | if ( *msr_area == NULL ) |
1365 | 0 | { |
1366 | 0 | if ( (*msr_area = alloc_xenheap_page()) == NULL ) |
1367 | 0 | return -ENOMEM; |
1368 | 0 |
|
1369 | 0 | if ( type == VMX_GUEST_MSR ) |
1370 | 0 | { |
1371 | 0 | __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(*msr_area)); |
1372 | 0 | __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(*msr_area)); |
1373 | 0 | } |
1374 | 0 | else |
1375 | 0 | __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(*msr_area)); |
1376 | 0 | } |
1377 | 0 |
|
1378 | 0 | for ( idx = 0; idx < *msr_count && (*msr_area)[idx].index <= msr; idx++ ) |
1379 | 0 | if ( (*msr_area)[idx].index == msr ) |
1380 | 0 | return 0; |
1381 | 0 |
|
1382 | 0 | if ( *msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) ) |
1383 | 0 | return -ENOSPC; |
1384 | 0 |
|
1385 | 0 | memmove(*msr_area + idx + 1, *msr_area + idx, |
1386 | 0 | sizeof(*msr_area_elem) * (*msr_count - idx)); |
1387 | 0 |
|
1388 | 0 | msr_area_elem = *msr_area + idx; |
1389 | 0 | msr_area_elem->index = msr; |
1390 | 0 | msr_area_elem->mbz = 0; |
1391 | 0 |
|
1392 | 0 | ++*msr_count; |
1393 | 0 |
|
1394 | 0 | if ( type == VMX_GUEST_MSR ) |
1395 | 0 | { |
1396 | 0 | msr_area_elem->data = 0; |
1397 | 0 | __vmwrite(VM_EXIT_MSR_STORE_COUNT, *msr_count); |
1398 | 0 | __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, *msr_count); |
1399 | 0 | } |
1400 | 0 | else |
1401 | 0 | { |
1402 | 0 | rdmsrl(msr, msr_area_elem->data); |
1403 | 0 | __vmwrite(VM_EXIT_MSR_LOAD_COUNT, *msr_count); |
1404 | 0 | } |
1405 | 0 |
|
1406 | 0 | return 0; |
1407 | 0 | } |
1408 | | |
1409 | | void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector) |
1410 | 296 | { |
1411 | 296 | if ( !test_and_set_bit(vector, v->arch.hvm_vmx.eoi_exit_bitmap) ) |
1412 | 3 | set_bit(vector / BITS_PER_LONG, |
1413 | 296 | &v->arch.hvm_vmx.eoi_exitmap_changed); |
1414 | 296 | } |
1415 | | |
1416 | | void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector) |
1417 | 3.84k | { |
1418 | 3.84k | if ( test_and_clear_bit(vector, v->arch.hvm_vmx.eoi_exit_bitmap) ) |
1419 | 0 | set_bit(vector / BITS_PER_LONG, |
1420 | 3.84k | &v->arch.hvm_vmx.eoi_exitmap_changed); |
1421 | 3.84k | } |
1422 | | |
1423 | | bool_t vmx_vcpu_pml_enabled(const struct vcpu *v) |
1424 | 0 | { |
1425 | 0 | return !!(v->arch.hvm_vmx.secondary_exec_control & |
1426 | 0 | SECONDARY_EXEC_ENABLE_PML); |
1427 | 0 | } |
1428 | | |
1429 | | int vmx_vcpu_enable_pml(struct vcpu *v) |
1430 | 0 | { |
1431 | 0 | if ( vmx_vcpu_pml_enabled(v) ) |
1432 | 0 | return 0; |
1433 | 0 |
|
1434 | 0 | v->arch.hvm_vmx.pml_pg = v->domain->arch.paging.alloc_page(v->domain); |
1435 | 0 | if ( !v->arch.hvm_vmx.pml_pg ) |
1436 | 0 | return -ENOMEM; |
1437 | 0 |
|
1438 | 0 | vmx_vmcs_enter(v); |
1439 | 0 |
|
1440 | 0 | __vmwrite(PML_ADDRESS, page_to_mfn(v->arch.hvm_vmx.pml_pg) << PAGE_SHIFT); |
1441 | 0 | __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1); |
1442 | 0 |
|
1443 | 0 | v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_PML; |
1444 | 0 |
|
1445 | 0 | __vmwrite(SECONDARY_VM_EXEC_CONTROL, |
1446 | 0 | v->arch.hvm_vmx.secondary_exec_control); |
1447 | 0 |
|
1448 | 0 | vmx_vmcs_exit(v); |
1449 | 0 |
|
1450 | 0 | return 0; |
1451 | 0 | } |
1452 | | |
1453 | | void vmx_vcpu_disable_pml(struct vcpu *v) |
1454 | 0 | { |
1455 | 0 | if ( !vmx_vcpu_pml_enabled(v) ) |
1456 | 0 | return; |
1457 | 0 |
|
1458 | 0 | /* Make sure we don't lose any logged GPAs. */ |
1459 | 0 | vmx_vcpu_flush_pml_buffer(v); |
1460 | 0 |
|
1461 | 0 | vmx_vmcs_enter(v); |
1462 | 0 |
|
1463 | 0 | v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML; |
1464 | 0 | __vmwrite(SECONDARY_VM_EXEC_CONTROL, |
1465 | 0 | v->arch.hvm_vmx.secondary_exec_control); |
1466 | 0 |
|
1467 | 0 | vmx_vmcs_exit(v); |
1468 | 0 |
|
1469 | 0 | v->domain->arch.paging.free_page(v->domain, v->arch.hvm_vmx.pml_pg); |
1470 | 0 | v->arch.hvm_vmx.pml_pg = NULL; |
1471 | 0 | } |
1472 | | |
1473 | | void vmx_vcpu_flush_pml_buffer(struct vcpu *v) |
1474 | 0 | { |
1475 | 0 | uint64_t *pml_buf; |
1476 | 0 | unsigned long pml_idx; |
1477 | 0 |
|
1478 | 0 | ASSERT((v == current) || (!vcpu_runnable(v) && !v->is_running)); |
1479 | 0 | ASSERT(vmx_vcpu_pml_enabled(v)); |
1480 | 0 |
|
1481 | 0 | vmx_vmcs_enter(v); |
1482 | 0 |
|
1483 | 0 | __vmread(GUEST_PML_INDEX, &pml_idx); |
1484 | 0 |
|
1485 | 0 | /* Do nothing if PML buffer is empty. */ |
1486 | 0 | if ( pml_idx == (NR_PML_ENTRIES - 1) ) |
1487 | 0 | goto out; |
1488 | 0 |
|
1489 | 0 | pml_buf = __map_domain_page(v->arch.hvm_vmx.pml_pg); |
1490 | 0 |
|
1491 | 0 | /* |
1492 | 0 | * PML index can be either 2^16-1 (buffer is full), or 0 ~ NR_PML_ENTRIES-1 |
1493 | 0 | * (buffer is not full), and in latter case PML index always points to next |
1494 | 0 | * available entity. |
1495 | 0 | */ |
1496 | 0 | if ( pml_idx >= NR_PML_ENTRIES ) |
1497 | 0 | pml_idx = 0; |
1498 | 0 | else |
1499 | 0 | pml_idx++; |
1500 | 0 |
|
1501 | 0 | for ( ; pml_idx < NR_PML_ENTRIES; pml_idx++ ) |
1502 | 0 | { |
1503 | 0 | unsigned long gfn = pml_buf[pml_idx] >> PAGE_SHIFT; |
1504 | 0 |
|
1505 | 0 | /* |
1506 | 0 | * Need to change type from log-dirty to normal memory for logged GFN. |
1507 | 0 | * hap_track_dirty_vram depends on it to work. And we mark all logged |
1508 | 0 | * GFNs to be dirty, as we cannot be sure whether it's safe to ignore |
1509 | 0 | * GFNs on which p2m_change_type_one returns failure. The failure cases |
1510 | 0 | * are very rare, and additional cost is negligible, but a missing mark |
1511 | 0 | * is extremely difficult to debug. |
1512 | 0 | */ |
1513 | 0 | p2m_change_type_one(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); |
1514 | 0 |
|
1515 | 0 | /* HVM guest: pfn == gfn */ |
1516 | 0 | paging_mark_pfn_dirty(v->domain, _pfn(gfn)); |
1517 | 0 | } |
1518 | 0 |
|
1519 | 0 | unmap_domain_page(pml_buf); |
1520 | 0 |
|
1521 | 0 | /* Reset PML index */ |
1522 | 0 | __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1); |
1523 | 0 |
|
1524 | 0 | out: |
1525 | 0 | vmx_vmcs_exit(v); |
1526 | 0 | } |
1527 | | |
1528 | | bool_t vmx_domain_pml_enabled(const struct domain *d) |
1529 | 12 | { |
1530 | 12 | return !!(d->arch.hvm_domain.vmx.status & VMX_DOMAIN_PML_ENABLED); |
1531 | 12 | } |
1532 | | |
1533 | | /* |
1534 | | * This function enables PML for particular domain. It should be called when |
1535 | | * domain is paused. |
1536 | | * |
1537 | | * PML needs to be enabled globally for all vcpus of the domain, as PML buffer |
1538 | | * and PML index are pre-vcpu, but EPT table is shared by vcpus, therefore |
1539 | | * enabling PML on partial vcpus won't work. |
1540 | | */ |
1541 | | int vmx_domain_enable_pml(struct domain *d) |
1542 | 0 | { |
1543 | 0 | struct vcpu *v; |
1544 | 0 | int rc; |
1545 | 0 |
|
1546 | 0 | ASSERT(atomic_read(&d->pause_count)); |
1547 | 0 |
|
1548 | 0 | if ( vmx_domain_pml_enabled(d) ) |
1549 | 0 | return 0; |
1550 | 0 |
|
1551 | 0 | for_each_vcpu ( d, v ) |
1552 | 0 | if ( (rc = vmx_vcpu_enable_pml(v)) != 0 ) |
1553 | 0 | goto error; |
1554 | 0 |
|
1555 | 0 | d->arch.hvm_domain.vmx.status |= VMX_DOMAIN_PML_ENABLED; |
1556 | 0 |
|
1557 | 0 | return 0; |
1558 | 0 |
|
1559 | 0 | error: |
1560 | 0 | for_each_vcpu ( d, v ) |
1561 | 0 | if ( vmx_vcpu_pml_enabled(v) ) |
1562 | 0 | vmx_vcpu_disable_pml(v); |
1563 | 0 | return rc; |
1564 | 0 | } |
1565 | | |
1566 | | /* |
1567 | | * Disable PML for particular domain. Called when domain is paused. |
1568 | | * |
1569 | | * The same as enabling PML for domain, disabling PML should be done for all |
1570 | | * vcpus at once. |
1571 | | */ |
1572 | | void vmx_domain_disable_pml(struct domain *d) |
1573 | 0 | { |
1574 | 0 | struct vcpu *v; |
1575 | 0 |
|
1576 | 0 | ASSERT(atomic_read(&d->pause_count)); |
1577 | 0 |
|
1578 | 0 | if ( !vmx_domain_pml_enabled(d) ) |
1579 | 0 | return; |
1580 | 0 |
|
1581 | 0 | for_each_vcpu ( d, v ) |
1582 | 0 | vmx_vcpu_disable_pml(v); |
1583 | 0 |
|
1584 | 0 | d->arch.hvm_domain.vmx.status &= ~VMX_DOMAIN_PML_ENABLED; |
1585 | 0 | } |
1586 | | |
1587 | | /* |
1588 | | * Flush PML buffer of all vcpus, and update the logged dirty pages to log-dirty |
1589 | | * radix tree. Called when domain is paused. |
1590 | | */ |
1591 | | void vmx_domain_flush_pml_buffers(struct domain *d) |
1592 | 0 | { |
1593 | 0 | struct vcpu *v; |
1594 | 0 |
|
1595 | 0 | ASSERT(atomic_read(&d->pause_count)); |
1596 | 0 |
|
1597 | 0 | if ( !vmx_domain_pml_enabled(d) ) |
1598 | 0 | return; |
1599 | 0 |
|
1600 | 0 | for_each_vcpu ( d, v ) |
1601 | 0 | vmx_vcpu_flush_pml_buffer(v); |
1602 | 0 | } |
1603 | | |
1604 | | static void vmx_vcpu_update_eptp(struct vcpu *v, u64 eptp) |
1605 | 0 | { |
1606 | 0 | vmx_vmcs_enter(v); |
1607 | 0 | __vmwrite(EPT_POINTER, eptp); |
1608 | 0 | vmx_vmcs_exit(v); |
1609 | 0 | } |
1610 | | |
1611 | | /* |
1612 | | * Update EPTP data to VMCS of all vcpus of the domain. Must be called when |
1613 | | * domain is paused. |
1614 | | */ |
1615 | | void vmx_domain_update_eptp(struct domain *d) |
1616 | 0 | { |
1617 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1618 | 0 | struct vcpu *v; |
1619 | 0 |
|
1620 | 0 | ASSERT(atomic_read(&d->pause_count)); |
1621 | 0 |
|
1622 | 0 | for_each_vcpu ( d, v ) |
1623 | 0 | vmx_vcpu_update_eptp(v, p2m->ept.eptp); |
1624 | 0 |
|
1625 | 0 | ept_sync_domain(p2m); |
1626 | 0 | } |
1627 | | |
1628 | | int vmx_create_vmcs(struct vcpu *v) |
1629 | 12 | { |
1630 | 12 | struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; |
1631 | 12 | int rc; |
1632 | 12 | |
1633 | 12 | if ( (arch_vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 ) |
1634 | 0 | return -ENOMEM; |
1635 | 12 | |
1636 | 12 | INIT_LIST_HEAD(&arch_vmx->active_list); |
1637 | 12 | __vmpclear(arch_vmx->vmcs_pa); |
1638 | 12 | arch_vmx->active_cpu = -1; |
1639 | 12 | arch_vmx->launched = 0; |
1640 | 12 | |
1641 | 12 | if ( (rc = construct_vmcs(v)) != 0 ) |
1642 | 0 | { |
1643 | 0 | vmx_free_vmcs(arch_vmx->vmcs_pa); |
1644 | 0 | return rc; |
1645 | 0 | } |
1646 | 12 | |
1647 | 12 | return 0; |
1648 | 12 | } |
1649 | | |
1650 | | void vmx_destroy_vmcs(struct vcpu *v) |
1651 | 0 | { |
1652 | 0 | struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; |
1653 | 0 |
|
1654 | 0 | vmx_clear_vmcs(v); |
1655 | 0 |
|
1656 | 0 | vmx_free_vmcs(arch_vmx->vmcs_pa); |
1657 | 0 |
|
1658 | 0 | free_xenheap_page(v->arch.hvm_vmx.host_msr_area); |
1659 | 0 | free_xenheap_page(v->arch.hvm_vmx.msr_area); |
1660 | 0 | free_xenheap_page(v->arch.hvm_vmx.msr_bitmap); |
1661 | 0 | } |
1662 | | |
1663 | | void vmx_vmentry_failure(void) |
1664 | 0 | { |
1665 | 0 | struct vcpu *curr = current; |
1666 | 0 | unsigned long error; |
1667 | 0 |
|
1668 | 0 | __vmread(VM_INSTRUCTION_ERROR, &error); |
1669 | 0 | gprintk(XENLOG_ERR, "VM%s error: %#lx\n", |
1670 | 0 | curr->arch.hvm_vmx.launched ? "RESUME" : "LAUNCH", error); |
1671 | 0 |
|
1672 | 0 | if ( error == VMX_INSN_INVALID_CONTROL_STATE || |
1673 | 0 | error == VMX_INSN_INVALID_HOST_STATE ) |
1674 | 0 | vmcs_dump_vcpu(curr); |
1675 | 0 |
|
1676 | 0 | domain_crash_synchronous(); |
1677 | 0 | } |
1678 | | |
1679 | | void vmx_do_resume(struct vcpu *v) |
1680 | 4.52M | { |
1681 | 4.52M | bool_t debug_state; |
1682 | 4.52M | |
1683 | 4.52M | if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() ) |
1684 | 4.53M | vmx_vmcs_reload(v); |
1685 | 4.52M | else |
1686 | 18.4E | { |
1687 | 18.4E | /* |
1688 | 18.4E | * For pass-through domain, guest PCI-E device driver may leverage the |
1689 | 18.4E | * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space. |
1690 | 18.4E | * Since migration may occur before WBINVD or CLFLUSH, we need to |
1691 | 18.4E | * maintain data consistency either by: |
1692 | 18.4E | * 1: flushing cache (wbinvd) when the guest is scheduled out if |
1693 | 18.4E | * there is no wbinvd exit, or |
1694 | 18.4E | * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits. |
1695 | 18.4E | * If VT-d engine can force snooping, we don't need to do these. |
1696 | 18.4E | */ |
1697 | 18.4E | if ( has_arch_pdevs(v->domain) && !iommu_snoop |
1698 | 0 | && !cpu_has_wbinvd_exiting ) |
1699 | 0 | { |
1700 | 0 | int cpu = v->arch.hvm_vmx.active_cpu; |
1701 | 0 | if ( cpu != -1 ) |
1702 | 0 | flush_mask(cpumask_of(cpu), FLUSH_CACHE); |
1703 | 0 | } |
1704 | 18.4E | |
1705 | 18.4E | vmx_clear_vmcs(v); |
1706 | 18.4E | vmx_load_vmcs(v); |
1707 | 18.4E | hvm_migrate_timers(v); |
1708 | 18.4E | hvm_migrate_pirqs(v); |
1709 | 18.4E | vmx_set_host_env(v); |
1710 | 18.4E | /* |
1711 | 18.4E | * Both n1 VMCS and n2 VMCS need to update the host environment after |
1712 | 18.4E | * VCPU migration. The environment of current VMCS is updated in place, |
1713 | 18.4E | * but the action of another VMCS is deferred till it is switched in. |
1714 | 18.4E | */ |
1715 | 18.4E | v->arch.hvm_vmx.hostenv_migrated = 1; |
1716 | 18.4E | |
1717 | 18.4E | hvm_asid_flush_vcpu(v); |
1718 | 18.4E | } |
1719 | 4.52M | |
1720 | 4.52M | debug_state = v->domain->debugger_attached |
1721 | 4.60M | || v->domain->arch.monitor.software_breakpoint_enabled |
1722 | 4.60M | || v->domain->arch.monitor.singlestep_enabled; |
1723 | 4.52M | |
1724 | 4.52M | if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) ) |
1725 | 0 | { |
1726 | 0 | v->arch.hvm_vcpu.debug_state_latch = debug_state; |
1727 | 0 | vmx_update_debug_state(v); |
1728 | 0 | } |
1729 | 4.52M | |
1730 | 4.52M | hvm_do_resume(v); |
1731 | 4.52M | reset_stack_and_jump(vmx_asm_do_vmentry); |
1732 | 4.52M | } |
1733 | | |
1734 | | static inline unsigned long vmr(unsigned long field) |
1735 | 0 | { |
1736 | 0 | unsigned long val; |
1737 | 0 |
|
1738 | 0 | return vmread_safe(field, &val) ? 0 : val; |
1739 | 0 | } |
1740 | | |
1741 | 0 | #define vmr16(fld) ({ \ |
1742 | 0 | BUILD_BUG_ON((fld) & 0x6001); \ |
1743 | 0 | (uint16_t)vmr(fld); \ |
1744 | 0 | }) |
1745 | | |
1746 | 0 | #define vmr32(fld) ({ \ |
1747 | 0 | BUILD_BUG_ON(((fld) & 0x6001) != 0x4000); \ |
1748 | 0 | (uint32_t)vmr(fld); \ |
1749 | 0 | }) |
1750 | | |
1751 | | static void vmx_dump_sel(char *name, uint32_t selector) |
1752 | 0 | { |
1753 | 0 | uint32_t sel, attr, limit; |
1754 | 0 | uint64_t base; |
1755 | 0 | sel = vmr(selector); |
1756 | 0 | attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR)); |
1757 | 0 | limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR)); |
1758 | 0 | base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR)); |
1759 | 0 | printk("%s: %04x %05x %08x %016"PRIx64"\n", name, sel, attr, limit, base); |
1760 | 0 | } |
1761 | | |
1762 | | static void vmx_dump_sel2(char *name, uint32_t lim) |
1763 | 0 | { |
1764 | 0 | uint32_t limit; |
1765 | 0 | uint64_t base; |
1766 | 0 | limit = vmr(lim); |
1767 | 0 | base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); |
1768 | 0 | printk("%s: %08x %016"PRIx64"\n", name, limit, base); |
1769 | 0 | } |
1770 | | |
1771 | | void vmcs_dump_vcpu(struct vcpu *v) |
1772 | 0 | { |
1773 | 0 | struct cpu_user_regs *regs = &v->arch.user_regs; |
1774 | 0 | uint32_t vmentry_ctl, vmexit_ctl; |
1775 | 0 | unsigned long cr4; |
1776 | 0 | uint64_t efer; |
1777 | 0 | unsigned int i, n; |
1778 | 0 |
|
1779 | 0 | if ( v == current ) |
1780 | 0 | regs = guest_cpu_user_regs(); |
1781 | 0 |
|
1782 | 0 | vmx_vmcs_enter(v); |
1783 | 0 |
|
1784 | 0 | vmentry_ctl = vmr32(VM_ENTRY_CONTROLS), |
1785 | 0 | vmexit_ctl = vmr32(VM_EXIT_CONTROLS); |
1786 | 0 | cr4 = vmr(GUEST_CR4); |
1787 | 0 | efer = vmr(GUEST_EFER); |
1788 | 0 |
|
1789 | 0 | printk("*** Guest State ***\n"); |
1790 | 0 | printk("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", |
1791 | 0 | vmr(GUEST_CR0), vmr(CR0_READ_SHADOW), vmr(CR0_GUEST_HOST_MASK)); |
1792 | 0 | printk("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", |
1793 | 0 | cr4, vmr(CR4_READ_SHADOW), vmr(CR4_GUEST_HOST_MASK)); |
1794 | 0 | printk("CR3 = 0x%016lx\n", vmr(GUEST_CR3)); |
1795 | 0 | if ( (v->arch.hvm_vmx.secondary_exec_control & |
1796 | 0 | SECONDARY_EXEC_ENABLE_EPT) && |
1797 | 0 | (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA) ) |
1798 | 0 | { |
1799 | 0 | printk("PDPTE0 = 0x%016lx PDPTE1 = 0x%016lx\n", |
1800 | 0 | vmr(GUEST_PDPTE(0)), vmr(GUEST_PDPTE(1))); |
1801 | 0 | printk("PDPTE2 = 0x%016lx PDPTE3 = 0x%016lx\n", |
1802 | 0 | vmr(GUEST_PDPTE(2)), vmr(GUEST_PDPTE(3))); |
1803 | 0 | } |
1804 | 0 | printk("RSP = 0x%016lx (0x%016lx) RIP = 0x%016lx (0x%016lx)\n", |
1805 | 0 | vmr(GUEST_RSP), regs->rsp, |
1806 | 0 | vmr(GUEST_RIP), regs->rip); |
1807 | 0 | printk("RFLAGS=0x%08lx (0x%08lx) DR7 = 0x%016lx\n", |
1808 | 0 | vmr(GUEST_RFLAGS), regs->rflags, |
1809 | 0 | vmr(GUEST_DR7)); |
1810 | 0 | printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", |
1811 | 0 | vmr(GUEST_SYSENTER_ESP), |
1812 | 0 | vmr32(GUEST_SYSENTER_CS), vmr(GUEST_SYSENTER_EIP)); |
1813 | 0 | printk(" sel attr limit base\n"); |
1814 | 0 | vmx_dump_sel(" CS", GUEST_CS_SELECTOR); |
1815 | 0 | vmx_dump_sel(" DS", GUEST_DS_SELECTOR); |
1816 | 0 | vmx_dump_sel(" SS", GUEST_SS_SELECTOR); |
1817 | 0 | vmx_dump_sel(" ES", GUEST_ES_SELECTOR); |
1818 | 0 | vmx_dump_sel(" FS", GUEST_FS_SELECTOR); |
1819 | 0 | vmx_dump_sel(" GS", GUEST_GS_SELECTOR); |
1820 | 0 | vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT); |
1821 | 0 | vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR); |
1822 | 0 | vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT); |
1823 | 0 | vmx_dump_sel(" TR", GUEST_TR_SELECTOR); |
1824 | 0 | if ( (vmexit_ctl & (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_SAVE_GUEST_EFER)) || |
1825 | 0 | (vmentry_ctl & (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER)) ) |
1826 | 0 | printk("EFER = 0x%016lx PAT = 0x%016lx\n", efer, vmr(GUEST_PAT)); |
1827 | 0 | printk("PreemptionTimer = 0x%08x SM Base = 0x%08x\n", |
1828 | 0 | vmr32(GUEST_PREEMPTION_TIMER), vmr32(GUEST_SMBASE)); |
1829 | 0 | printk("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", |
1830 | 0 | vmr(GUEST_IA32_DEBUGCTL), vmr(GUEST_PENDING_DBG_EXCEPTIONS)); |
1831 | 0 | if ( vmentry_ctl & (VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_BNDCFGS) ) |
1832 | 0 | printk("PerfGlobCtl = 0x%016lx BndCfgS = 0x%016lx\n", |
1833 | 0 | vmr(GUEST_PERF_GLOBAL_CTRL), vmr(GUEST_BNDCFGS)); |
1834 | 0 | printk("Interruptibility = %08x ActivityState = %08x\n", |
1835 | 0 | vmr32(GUEST_INTERRUPTIBILITY_INFO), vmr32(GUEST_ACTIVITY_STATE)); |
1836 | 0 | if ( v->arch.hvm_vmx.secondary_exec_control & |
1837 | 0 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY ) |
1838 | 0 | printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS)); |
1839 | 0 |
|
1840 | 0 | printk("*** Host State ***\n"); |
1841 | 0 | printk("RIP = 0x%016lx (%ps) RSP = 0x%016lx\n", |
1842 | 0 | vmr(HOST_RIP), (void *)vmr(HOST_RIP), vmr(HOST_RSP)); |
1843 | 0 | printk("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", |
1844 | 0 | vmr16(HOST_CS_SELECTOR), vmr16(HOST_SS_SELECTOR), |
1845 | 0 | vmr16(HOST_DS_SELECTOR), vmr16(HOST_ES_SELECTOR), |
1846 | 0 | vmr16(HOST_FS_SELECTOR), vmr16(HOST_GS_SELECTOR), |
1847 | 0 | vmr16(HOST_TR_SELECTOR)); |
1848 | 0 | printk("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", |
1849 | 0 | vmr(HOST_FS_BASE), vmr(HOST_GS_BASE), vmr(HOST_TR_BASE)); |
1850 | 0 | printk("GDTBase=%016lx IDTBase=%016lx\n", |
1851 | 0 | vmr(HOST_GDTR_BASE), vmr(HOST_IDTR_BASE)); |
1852 | 0 | printk("CR0=%016lx CR3=%016lx CR4=%016lx\n", |
1853 | 0 | vmr(HOST_CR0), vmr(HOST_CR3), vmr(HOST_CR4)); |
1854 | 0 | printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", |
1855 | 0 | vmr(HOST_SYSENTER_ESP), |
1856 | 0 | vmr32(HOST_SYSENTER_CS), vmr(HOST_SYSENTER_EIP)); |
1857 | 0 | if ( vmexit_ctl & (VM_EXIT_LOAD_HOST_PAT | VM_EXIT_LOAD_HOST_EFER) ) |
1858 | 0 | printk("EFER = 0x%016lx PAT = 0x%016lx\n", vmr(HOST_EFER), vmr(HOST_PAT)); |
1859 | 0 | if ( vmexit_ctl & VM_EXIT_LOAD_PERF_GLOBAL_CTRL ) |
1860 | 0 | printk("PerfGlobCtl = 0x%016lx\n", |
1861 | 0 | vmr(HOST_PERF_GLOBAL_CTRL)); |
1862 | 0 |
|
1863 | 0 | printk("*** Control State ***\n"); |
1864 | 0 | printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", |
1865 | 0 | vmr32(PIN_BASED_VM_EXEC_CONTROL), |
1866 | 0 | vmr32(CPU_BASED_VM_EXEC_CONTROL), |
1867 | 0 | vmr32(SECONDARY_VM_EXEC_CONTROL)); |
1868 | 0 | printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); |
1869 | 0 | printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", |
1870 | 0 | vmr32(EXCEPTION_BITMAP), |
1871 | 0 | vmr32(PAGE_FAULT_ERROR_CODE_MASK), |
1872 | 0 | vmr32(PAGE_FAULT_ERROR_CODE_MATCH)); |
1873 | 0 | printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", |
1874 | 0 | vmr32(VM_ENTRY_INTR_INFO), |
1875 | 0 | vmr32(VM_ENTRY_EXCEPTION_ERROR_CODE), |
1876 | 0 | vmr32(VM_ENTRY_INSTRUCTION_LEN)); |
1877 | 0 | printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", |
1878 | 0 | vmr32(VM_EXIT_INTR_INFO), |
1879 | 0 | vmr32(VM_EXIT_INTR_ERROR_CODE), |
1880 | 0 | vmr32(VM_EXIT_INSTRUCTION_LEN)); |
1881 | 0 | printk(" reason=%08x qualification=%016lx\n", |
1882 | 0 | vmr32(VM_EXIT_REASON), vmr(EXIT_QUALIFICATION)); |
1883 | 0 | printk("IDTVectoring: info=%08x errcode=%08x\n", |
1884 | 0 | vmr32(IDT_VECTORING_INFO), vmr32(IDT_VECTORING_ERROR_CODE)); |
1885 | 0 | printk("TSC Offset = 0x%016lx TSC Multiplier = 0x%016lx\n", |
1886 | 0 | vmr(TSC_OFFSET), vmr(TSC_MULTIPLIER)); |
1887 | 0 | if ( (v->arch.hvm_vmx.exec_control & CPU_BASED_TPR_SHADOW) || |
1888 | 0 | (vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) ) |
1889 | 0 | printk("TPR Threshold = 0x%02x PostedIntrVec = 0x%02x\n", |
1890 | 0 | vmr32(TPR_THRESHOLD), vmr16(POSTED_INTR_NOTIFICATION_VECTOR)); |
1891 | 0 | if ( (v->arch.hvm_vmx.secondary_exec_control & |
1892 | 0 | SECONDARY_EXEC_ENABLE_EPT) ) |
1893 | 0 | printk("EPT pointer = 0x%016lx EPTP index = 0x%04x\n", |
1894 | 0 | vmr(EPT_POINTER), vmr16(EPTP_INDEX)); |
1895 | 0 | n = vmr32(CR3_TARGET_COUNT); |
1896 | 0 | for ( i = 0; i + 1 < n; i += 2 ) |
1897 | 0 | printk("CR3 target%u=%016lx target%u=%016lx\n", |
1898 | 0 | i, vmr(CR3_TARGET_VALUE(i)), |
1899 | 0 | i + 1, vmr(CR3_TARGET_VALUE(i + 1))); |
1900 | 0 | if ( i < n ) |
1901 | 0 | printk("CR3 target%u=%016lx\n", i, vmr(CR3_TARGET_VALUE(i))); |
1902 | 0 | if ( v->arch.hvm_vmx.secondary_exec_control & |
1903 | 0 | SECONDARY_EXEC_PAUSE_LOOP_EXITING ) |
1904 | 0 | printk("PLE Gap=%08x Window=%08x\n", |
1905 | 0 | vmr32(PLE_GAP), vmr32(PLE_WINDOW)); |
1906 | 0 | if ( v->arch.hvm_vmx.secondary_exec_control & |
1907 | 0 | (SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) ) |
1908 | 0 | printk("Virtual processor ID = 0x%04x VMfunc controls = %016lx\n", |
1909 | 0 | vmr16(VIRTUAL_PROCESSOR_ID), vmr(VM_FUNCTION_CONTROL)); |
1910 | 0 |
|
1911 | 0 | vmx_vmcs_exit(v); |
1912 | 0 | } |
1913 | | |
1914 | | static void vmcs_dump(unsigned char ch) |
1915 | 0 | { |
1916 | 0 | struct domain *d; |
1917 | 0 | struct vcpu *v; |
1918 | 0 | |
1919 | 0 | printk("*********** VMCS Areas **************\n"); |
1920 | 0 |
|
1921 | 0 | rcu_read_lock(&domlist_read_lock); |
1922 | 0 |
|
1923 | 0 | for_each_domain ( d ) |
1924 | 0 | { |
1925 | 0 | if ( !is_hvm_domain(d) ) |
1926 | 0 | continue; |
1927 | 0 | printk("\n>>> Domain %d <<<\n", d->domain_id); |
1928 | 0 | for_each_vcpu ( d, v ) |
1929 | 0 | { |
1930 | 0 | printk("\tVCPU %d\n", v->vcpu_id); |
1931 | 0 | vmcs_dump_vcpu(v); |
1932 | 0 | } |
1933 | 0 | } |
1934 | 0 |
|
1935 | 0 | rcu_read_unlock(&domlist_read_lock); |
1936 | 0 |
|
1937 | 0 | printk("**************************************\n"); |
1938 | 0 | } |
1939 | | |
1940 | | void __init setup_vmcs_dump(void) |
1941 | 1 | { |
1942 | 1 | register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1); |
1943 | 1 | } |
1944 | | |
1945 | | static void __init __maybe_unused build_assertions(void) |
1946 | 0 | { |
1947 | 0 | struct vmx_msr_bitmap bitmap; |
1948 | 0 |
|
1949 | 0 | /* Check vmx_msr_bitmap layoug against hardware expectations. */ |
1950 | 0 | BUILD_BUG_ON(sizeof(bitmap) != PAGE_SIZE); |
1951 | 0 | BUILD_BUG_ON(sizeof(bitmap.read_low) != 1024); |
1952 | 0 | BUILD_BUG_ON(sizeof(bitmap.read_high) != 1024); |
1953 | 0 | BUILD_BUG_ON(sizeof(bitmap.write_low) != 1024); |
1954 | 0 | BUILD_BUG_ON(sizeof(bitmap.write_high) != 1024); |
1955 | 0 | BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_low) != 0); |
1956 | 0 | BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_high) != 1024); |
1957 | 0 | BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_low) != 2048); |
1958 | 0 | BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_high) != 3072); |
1959 | 0 | } |
1960 | | |
1961 | | /* |
1962 | | * Local variables: |
1963 | | * mode: C |
1964 | | * c-file-style: "BSD" |
1965 | | * c-basic-offset: 4 |
1966 | | * tab-width: 4 |
1967 | | * indent-tabs-mode: nil |
1968 | | * End: |
1969 | | */ |