/root/src/xen/xen/arch/x86/hvm/hvm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * hvm.c: Common hardware virtual machine abstractions. |
3 | | * |
4 | | * Copyright (c) 2004, Intel Corporation. |
5 | | * Copyright (c) 2005, International Business Machines Corporation. |
6 | | * Copyright (c) 2008, Citrix Systems, Inc. |
7 | | * |
8 | | * This program is free software; you can redistribute it and/or modify it |
9 | | * under the terms and conditions of the GNU General Public License, |
10 | | * version 2, as published by the Free Software Foundation. |
11 | | * |
12 | | * This program is distributed in the hope it will be useful, but WITHOUT |
13 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
15 | | * more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License along with |
18 | | * this program; If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include <xen/ctype.h> |
22 | | #include <xen/init.h> |
23 | | #include <xen/lib.h> |
24 | | #include <xen/trace.h> |
25 | | #include <xen/sched.h> |
26 | | #include <xen/irq.h> |
27 | | #include <xen/softirq.h> |
28 | | #include <xen/domain.h> |
29 | | #include <xen/domain_page.h> |
30 | | #include <xen/hypercall.h> |
31 | | #include <xen/guest_access.h> |
32 | | #include <xen/event.h> |
33 | | #include <xen/cpu.h> |
34 | | #include <xen/wait.h> |
35 | | #include <xen/mem_access.h> |
36 | | #include <xen/rangeset.h> |
37 | | #include <xen/monitor.h> |
38 | | #include <xen/warning.h> |
39 | | #include <xen/vpci.h> |
40 | | #include <asm/shadow.h> |
41 | | #include <asm/hap.h> |
42 | | #include <asm/current.h> |
43 | | #include <asm/e820.h> |
44 | | #include <asm/io.h> |
45 | | #include <asm/regs.h> |
46 | | #include <asm/cpufeature.h> |
47 | | #include <asm/processor.h> |
48 | | #include <asm/types.h> |
49 | | #include <asm/msr.h> |
50 | | #include <asm/i387.h> |
51 | | #include <asm/xstate.h> |
52 | | #include <asm/traps.h> |
53 | | #include <asm/mc146818rtc.h> |
54 | | #include <asm/mce.h> |
55 | | #include <asm/monitor.h> |
56 | | #include <asm/hvm/hvm.h> |
57 | | #include <asm/hvm/vpt.h> |
58 | | #include <asm/hvm/support.h> |
59 | | #include <asm/hvm/cacheattr.h> |
60 | | #include <asm/hvm/trace.h> |
61 | | #include <asm/hvm/nestedhvm.h> |
62 | | #include <asm/hvm/monitor.h> |
63 | | #include <asm/hvm/ioreq.h> |
64 | | #include <asm/hvm/vm_event.h> |
65 | | #include <asm/altp2m.h> |
66 | | #include <asm/mtrr.h> |
67 | | #include <asm/apic.h> |
68 | | #include <asm/vm_event.h> |
69 | | #include <public/sched.h> |
70 | | #include <public/hvm/ioreq.h> |
71 | | #include <public/version.h> |
72 | | #include <public/memory.h> |
73 | | #include <public/vm_event.h> |
74 | | #include <public/arch-x86/cpuid.h> |
75 | | #include <asm/cpuid.h> |
76 | | |
77 | | bool_t __read_mostly hvm_enabled; |
78 | | |
79 | | #ifdef DBG_LEVEL_0 |
80 | | unsigned int opt_hvm_debug_level __read_mostly; |
81 | | integer_param("hvm_debug", opt_hvm_debug_level); |
82 | | #endif |
83 | | |
84 | | struct hvm_function_table hvm_funcs __read_mostly; |
85 | | |
86 | | /* |
87 | | * The I/O permission bitmap is globally shared by all HVM guests except |
88 | | * the hardware domain which needs a more permissive one. |
89 | | */ |
90 | 1 | #define HVM_IOBITMAP_SIZE (3 * PAGE_SIZE) |
91 | | unsigned long __section(".bss.page_aligned") __aligned(PAGE_SIZE) |
92 | | hvm_io_bitmap[HVM_IOBITMAP_SIZE / BYTES_PER_LONG]; |
93 | | |
94 | | /* Xen command-line option to enable HAP */ |
95 | | static bool_t __initdata opt_hap_enabled = 1; |
96 | | boolean_param("hap", opt_hap_enabled); |
97 | | |
98 | | #ifndef opt_hvm_fep |
99 | | /* Permit use of the Forced Emulation Prefix in HVM guests */ |
100 | | bool_t __read_mostly opt_hvm_fep; |
101 | | boolean_param("hvm_fep", opt_hvm_fep); |
102 | | #endif |
103 | | static const char __initconst warning_hvm_fep[] = |
104 | | "WARNING: HVM FORCED EMULATION PREFIX IS AVAILABLE\n" |
105 | | "This option is *ONLY* intended to aid testing of Xen.\n" |
106 | | "It has implications on the security of the system.\n" |
107 | | "Please *DO NOT* use this in production.\n"; |
108 | | |
109 | | /* Xen command-line option to enable altp2m */ |
110 | | static bool_t __initdata opt_altp2m_enabled = 0; |
111 | | boolean_param("altp2m", opt_altp2m_enabled); |
112 | | |
113 | | static int cpu_callback( |
114 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
115 | 33 | { |
116 | 33 | unsigned int cpu = (unsigned long)hcpu; |
117 | 33 | int rc = 0; |
118 | 33 | |
119 | 33 | switch ( action ) |
120 | 33 | { |
121 | 11 | case CPU_UP_PREPARE: |
122 | 11 | rc = hvm_funcs.cpu_up_prepare(cpu); |
123 | 11 | break; |
124 | 0 | case CPU_DYING: |
125 | 0 | hvm_cpu_down(); |
126 | 0 | break; |
127 | 0 | case CPU_UP_CANCELED: |
128 | 0 | case CPU_DEAD: |
129 | 0 | hvm_funcs.cpu_dead(cpu); |
130 | 0 | break; |
131 | 22 | default: |
132 | 22 | break; |
133 | 33 | } |
134 | 33 | |
135 | 33 | return !rc ? NOTIFY_DONE : notifier_from_errno(rc); |
136 | 33 | } |
137 | | |
138 | | static struct notifier_block cpu_nfb = { |
139 | | .notifier_call = cpu_callback |
140 | | }; |
141 | | |
142 | | static int __init hvm_enable(void) |
143 | 1 | { |
144 | 1 | const struct hvm_function_table *fns = NULL; |
145 | 1 | |
146 | 1 | if ( cpu_has_vmx ) |
147 | 1 | fns = start_vmx(); |
148 | 0 | else if ( cpu_has_svm ) |
149 | 0 | fns = start_svm(); |
150 | 1 | |
151 | 1 | if ( fns == NULL ) |
152 | 0 | return 0; |
153 | 1 | |
154 | 1 | hvm_funcs = *fns; |
155 | 1 | hvm_enabled = 1; |
156 | 1 | |
157 | 1 | printk("HVM: %s enabled\n", fns->name); |
158 | 1 | if ( !fns->hap_supported ) |
159 | 0 | printk("HVM: Hardware Assisted Paging (HAP) not detected\n"); |
160 | 1 | else if ( !opt_hap_enabled ) |
161 | 0 | { |
162 | 0 | hvm_funcs.hap_supported = 0; |
163 | 0 | printk("HVM: Hardware Assisted Paging (HAP) detected but disabled\n"); |
164 | 0 | } |
165 | 1 | else |
166 | 1 | { |
167 | 1 | printk("HVM: Hardware Assisted Paging (HAP) detected\n"); |
168 | 1 | printk("HVM: HAP page sizes: 4kB"); |
169 | 1 | if ( fns->hap_capabilities & HVM_HAP_SUPERPAGE_2MB ) |
170 | 1 | { |
171 | 1 | printk(", 2MB%s", opt_hap_2mb ? "" : " [disabled]"); |
172 | 1 | if ( !opt_hap_2mb ) |
173 | 0 | hvm_funcs.hap_capabilities &= ~HVM_HAP_SUPERPAGE_2MB; |
174 | 1 | } |
175 | 1 | if ( fns->hap_capabilities & HVM_HAP_SUPERPAGE_1GB ) |
176 | 1 | { |
177 | 1 | printk(", 1GB%s", opt_hap_1gb ? "" : " [disabled]"); |
178 | 1 | if ( !opt_hap_1gb ) |
179 | 0 | hvm_funcs.hap_capabilities &= ~HVM_HAP_SUPERPAGE_1GB; |
180 | 1 | } |
181 | 1 | printk("\n"); |
182 | 1 | } |
183 | 1 | |
184 | 1 | if ( !opt_altp2m_enabled ) |
185 | 1 | hvm_funcs.altp2m_supported = 0; |
186 | 1 | |
187 | 1 | if ( opt_hvm_fep ) |
188 | 0 | warning_add(warning_hvm_fep); |
189 | 1 | |
190 | 1 | /* |
191 | 1 | * Allow direct access to the PC debug ports 0x80 and 0xed (they are |
192 | 1 | * often used for I/O delays, but the vmexits simply slow things down). |
193 | 1 | */ |
194 | 1 | memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap)); |
195 | 1 | if ( hvm_port80_allowed ) |
196 | 1 | __clear_bit(0x80, hvm_io_bitmap); |
197 | 1 | __clear_bit(0xed, hvm_io_bitmap); |
198 | 1 | |
199 | 1 | register_cpu_notifier(&cpu_nfb); |
200 | 1 | |
201 | 1 | return 0; |
202 | 1 | } |
203 | | presmp_initcall(hvm_enable); |
204 | | |
205 | | /* |
206 | | * Need to re-inject a given event? We avoid re-injecting software exceptions |
207 | | * and interrupts because the faulting/trapping instruction can simply be |
208 | | * re-executed (neither VMX nor SVM update RIP when they VMEXIT during |
209 | | * INT3/INTO/INTn). |
210 | | */ |
211 | | int hvm_event_needs_reinjection(uint8_t type, uint8_t vector) |
212 | 0 | { |
213 | 0 | switch ( type ) |
214 | 0 | { |
215 | 0 | case X86_EVENTTYPE_EXT_INTR: |
216 | 0 | case X86_EVENTTYPE_NMI: |
217 | 0 | return 1; |
218 | 0 | case X86_EVENTTYPE_HW_EXCEPTION: |
219 | 0 | /* |
220 | 0 | * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly |
221 | 0 | * check for these vectors, as they are really SW Exceptions. SVM has |
222 | 0 | * not updated RIP to point after the trapping instruction (INT3/INTO). |
223 | 0 | */ |
224 | 0 | return (vector != 3) && (vector != 4); |
225 | 0 | default: |
226 | 0 | /* Software exceptions/interrupts can be re-executed (e.g., INT n). */ |
227 | 0 | break; |
228 | 0 | } |
229 | 0 | return 0; |
230 | 0 | } |
231 | | |
232 | | /* |
233 | | * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1. |
234 | | * This means we can assume that @vec2 is contributory or a page fault. |
235 | | */ |
236 | | uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2) |
237 | 0 | { |
238 | 0 | const unsigned int contributory_exceptions = |
239 | 0 | (1 << TRAP_divide_error) | |
240 | 0 | (1 << TRAP_invalid_tss) | |
241 | 0 | (1 << TRAP_no_segment) | |
242 | 0 | (1 << TRAP_stack_error) | |
243 | 0 | (1 << TRAP_gp_fault); |
244 | 0 | const unsigned int page_faults = |
245 | 0 | (1 << TRAP_page_fault) | |
246 | 0 | (1 << TRAP_virtualisation); |
247 | 0 |
|
248 | 0 | /* Exception during double-fault delivery always causes a triple fault. */ |
249 | 0 | if ( vec1 == TRAP_double_fault ) |
250 | 0 | { |
251 | 0 | hvm_triple_fault(); |
252 | 0 | return TRAP_double_fault; /* dummy return */ |
253 | 0 | } |
254 | 0 |
|
255 | 0 | /* Exception during page-fault delivery always causes a double fault. */ |
256 | 0 | if ( (1u << vec1) & page_faults ) |
257 | 0 | return TRAP_double_fault; |
258 | 0 |
|
259 | 0 | /* Discard the first exception if it's benign or if we now have a #PF. */ |
260 | 0 | if ( !((1u << vec1) & contributory_exceptions) || |
261 | 0 | ((1u << vec2) & page_faults) ) |
262 | 0 | return vec2; |
263 | 0 |
|
264 | 0 | /* Cannot combine the exceptions: double fault. */ |
265 | 0 | return TRAP_double_fault; |
266 | 0 | } |
267 | | |
268 | | void hvm_set_rdtsc_exiting(struct domain *d, bool_t enable) |
269 | 0 | { |
270 | 0 | struct vcpu *v; |
271 | 0 |
|
272 | 0 | for_each_vcpu ( d, v ) |
273 | 0 | hvm_funcs.set_rdtsc_exiting(v, enable); |
274 | 0 | } |
275 | | |
276 | | void hvm_get_guest_pat(struct vcpu *v, u64 *guest_pat) |
277 | 0 | { |
278 | 0 | if ( !hvm_funcs.get_guest_pat(v, guest_pat) ) |
279 | 0 | *guest_pat = v->arch.hvm_vcpu.pat_cr; |
280 | 0 | } |
281 | | |
282 | | int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat) |
283 | 10 | { |
284 | 10 | int i; |
285 | 10 | uint8_t *value = (uint8_t *)&guest_pat; |
286 | 10 | |
287 | 96 | for ( i = 0; i < 8; i++ ) |
288 | 72 | switch ( value[i] ) |
289 | 72 | { |
290 | 85 | case PAT_TYPE_UC_MINUS: |
291 | 85 | case PAT_TYPE_UNCACHABLE: |
292 | 85 | case PAT_TYPE_WRBACK: |
293 | 85 | case PAT_TYPE_WRCOMB: |
294 | 85 | case PAT_TYPE_WRPROT: |
295 | 85 | case PAT_TYPE_WRTHROUGH: |
296 | 85 | break; |
297 | 0 | default: |
298 | 0 | HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid guest PAT: %"PRIx64"\n", |
299 | 0 | guest_pat); |
300 | 0 | return 0; |
301 | 72 | } |
302 | 10 | |
303 | 24 | if ( !hvm_funcs.set_guest_pat(v, guest_pat) ) |
304 | 10 | v->arch.hvm_vcpu.pat_cr = guest_pat; |
305 | 24 | |
306 | 24 | return 1; |
307 | 10 | } |
308 | | |
309 | | bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val) |
310 | 0 | { |
311 | 0 | if ( !hvm_funcs.set_guest_bndcfgs || |
312 | 0 | !is_canonical_address(val) || |
313 | 0 | (val & IA32_BNDCFGS_RESERVED) ) |
314 | 0 | return false; |
315 | 0 |
|
316 | 0 | /* |
317 | 0 | * While MPX instructions are supposed to be gated on XCR0.BND*, let's |
318 | 0 | * nevertheless force the relevant XCR0 bits on when the feature is being |
319 | 0 | * enabled in BNDCFGS. |
320 | 0 | */ |
321 | 0 | if ( (val & IA32_BNDCFGS_ENABLE) && |
322 | 0 | !(v->arch.xcr0_accum & (XSTATE_BNDREGS | XSTATE_BNDCSR)) ) |
323 | 0 | { |
324 | 0 | uint64_t xcr0 = get_xcr0(); |
325 | 0 | int rc; |
326 | 0 |
|
327 | 0 | if ( v != current ) |
328 | 0 | return false; |
329 | 0 |
|
330 | 0 | rc = handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, |
331 | 0 | xcr0 | XSTATE_BNDREGS | XSTATE_BNDCSR); |
332 | 0 |
|
333 | 0 | if ( rc ) |
334 | 0 | { |
335 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, "Failed to force XCR0.BND*: %d", rc); |
336 | 0 | return false; |
337 | 0 | } |
338 | 0 |
|
339 | 0 | if ( handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, xcr0) ) |
340 | 0 | /* nothing, best effort only */; |
341 | 0 | } |
342 | 0 |
|
343 | 0 | return hvm_funcs.set_guest_bndcfgs(v, val); |
344 | 0 | } |
345 | | |
346 | | /* |
347 | | * Get the ratio to scale host TSC frequency to gtsc_khz. zero will be |
348 | | * returned if TSC scaling is unavailable or ratio cannot be handled |
349 | | * by host CPU. Otherwise, a non-zero ratio will be returned. |
350 | | */ |
351 | | u64 hvm_get_tsc_scaling_ratio(u32 gtsc_khz) |
352 | 0 | { |
353 | 0 | u8 ratio_frac_bits = hvm_funcs.tsc_scaling.ratio_frac_bits; |
354 | 0 | u64 max_ratio = hvm_funcs.tsc_scaling.max_ratio; |
355 | 0 | u64 ratio, dummy; |
356 | 0 |
|
357 | 0 | if ( !hvm_tsc_scaling_supported ) |
358 | 0 | return 0; |
359 | 0 |
|
360 | 0 | /* |
361 | 0 | * Return early if the quotient is too large to fit in the integral |
362 | 0 | * part of TSC scaling ratio. This also avoids #DE from the following |
363 | 0 | * divq when the quotient can not fit in a 64-bit integer. |
364 | 0 | */ |
365 | 0 | if ( gtsc_khz / cpu_khz > (max_ratio >> ratio_frac_bits) ) |
366 | 0 | return 0; |
367 | 0 |
|
368 | 0 | /* ratio = (gtsc_khz << hvm_funcs.tsc_scaling.ratio_frac_bits) / cpu_khz */ |
369 | 0 | asm ( "shldq %[frac],%[gkhz],%[zero] ; " |
370 | 0 | "shlq %[frac],%[gkhz] ; " |
371 | 0 | "divq %[hkhz] " |
372 | 0 | : "=d" (dummy), "=a" (ratio) |
373 | 0 | : [frac] "c" (ratio_frac_bits), |
374 | 0 | [gkhz] "a" ((u64) gtsc_khz), |
375 | 0 | [zero] "d" (0ULL), |
376 | 0 | [hkhz] "rm" ((u64) cpu_khz) ); |
377 | 0 |
|
378 | 0 | return ratio > max_ratio ? 0 : ratio; |
379 | 0 | } |
380 | | |
381 | | u64 hvm_scale_tsc(const struct domain *d, u64 tsc) |
382 | 0 | { |
383 | 0 | u64 ratio = d->arch.hvm_domain.tsc_scaling_ratio; |
384 | 0 | u64 dummy; |
385 | 0 |
|
386 | 0 | if ( ratio == hvm_default_tsc_scaling_ratio ) |
387 | 0 | return tsc; |
388 | 0 |
|
389 | 0 | /* tsc = (tsc * ratio) >> hvm_funcs.tsc_scaling.ratio_frac_bits */ |
390 | 0 | asm ( "mulq %[ratio]; shrdq %[frac],%%rdx,%[tsc]" |
391 | 0 | : [tsc] "+a" (tsc), "=&d" (dummy) |
392 | 0 | : [frac] "c" (hvm_funcs.tsc_scaling.ratio_frac_bits), |
393 | 0 | [ratio] "rm" (ratio) ); |
394 | 0 |
|
395 | 0 | return tsc; |
396 | 0 | } |
397 | | |
398 | | static void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc) |
399 | 1 | { |
400 | 1 | uint64_t tsc; |
401 | 1 | uint64_t delta_tsc; |
402 | 1 | |
403 | 1 | if ( v->domain->arch.vtsc ) |
404 | 0 | { |
405 | 0 | tsc = hvm_get_guest_time_fixed(v, at_tsc); |
406 | 0 | tsc = gtime_to_gtsc(v->domain, tsc); |
407 | 0 | } |
408 | 1 | else |
409 | 1 | { |
410 | 1 | tsc = at_tsc ?: rdtsc(); |
411 | 1 | if ( hvm_tsc_scaling_supported ) |
412 | 0 | tsc = hvm_scale_tsc(v->domain, tsc); |
413 | 1 | } |
414 | 1 | |
415 | 1 | delta_tsc = guest_tsc - tsc; |
416 | 1 | v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc; |
417 | 1 | |
418 | 1 | hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, at_tsc); |
419 | 1 | } |
420 | | |
421 | 1 | #define hvm_set_guest_tsc(v, t) hvm_set_guest_tsc_fixed(v, t, 0) |
422 | | |
423 | | static void hvm_set_guest_tsc_msr(struct vcpu *v, u64 guest_tsc) |
424 | 0 | { |
425 | 0 | uint64_t tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset; |
426 | 0 |
|
427 | 0 | hvm_set_guest_tsc(v, guest_tsc); |
428 | 0 | v->arch.hvm_vcpu.msr_tsc_adjust += v->arch.hvm_vcpu.cache_tsc_offset |
429 | 0 | - tsc_offset; |
430 | 0 | } |
431 | | |
432 | | static void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust) |
433 | 0 | { |
434 | 0 | v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust |
435 | 0 | - v->arch.hvm_vcpu.msr_tsc_adjust; |
436 | 0 | hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0); |
437 | 0 | v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust; |
438 | 0 | } |
439 | | |
440 | | u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc) |
441 | 0 | { |
442 | 0 | uint64_t tsc; |
443 | 0 |
|
444 | 0 | if ( v->domain->arch.vtsc ) |
445 | 0 | { |
446 | 0 | tsc = hvm_get_guest_time_fixed(v, at_tsc); |
447 | 0 | tsc = gtime_to_gtsc(v->domain, tsc); |
448 | 0 | } |
449 | 0 | else |
450 | 0 | { |
451 | 0 | tsc = at_tsc ?: rdtsc(); |
452 | 0 | if ( hvm_tsc_scaling_supported ) |
453 | 0 | tsc = hvm_scale_tsc(v->domain, tsc); |
454 | 0 | } |
455 | 0 |
|
456 | 0 | return tsc + v->arch.hvm_vcpu.cache_tsc_offset; |
457 | 0 | } |
458 | | |
459 | | void hvm_migrate_timers(struct vcpu *v) |
460 | 548 | { |
461 | 548 | rtc_migrate_timers(v); |
462 | 548 | pt_migrate(v); |
463 | 548 | } |
464 | | |
465 | | static int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, |
466 | | void *arg) |
467 | 0 | { |
468 | 0 | struct vcpu *v = arg; |
469 | 0 |
|
470 | 0 | if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) && |
471 | 0 | /* Needn't migrate pirq if this pirq is delivered to guest directly.*/ |
472 | 0 | !pirq_dpci->gmsi.posted && |
473 | 0 | (pirq_dpci->gmsi.dest_vcpu_id == v->vcpu_id) ) |
474 | 0 | { |
475 | 0 | struct irq_desc *desc = |
476 | 0 | pirq_spin_lock_irq_desc(dpci_pirq(pirq_dpci), NULL); |
477 | 0 |
|
478 | 0 | if ( !desc ) |
479 | 0 | return 0; |
480 | 0 | ASSERT(MSI_IRQ(desc - irq_desc)); |
481 | 0 | irq_set_affinity(desc, cpumask_of(v->processor)); |
482 | 0 | spin_unlock_irq(&desc->lock); |
483 | 0 | } |
484 | 0 |
|
485 | 0 | return 0; |
486 | 0 | } |
487 | | |
488 | | void hvm_migrate_pirqs(struct vcpu *v) |
489 | 590 | { |
490 | 590 | struct domain *d = v->domain; |
491 | 590 | |
492 | 590 | if ( !iommu_enabled || !hvm_domain_irq(d)->dpci ) |
493 | 590 | return; |
494 | 590 | |
495 | 0 | spin_lock(&d->event_lock); |
496 | 0 | pt_pirq_iterate(d, hvm_migrate_pirq, v); |
497 | 0 | spin_unlock(&d->event_lock); |
498 | 0 | } |
499 | | |
500 | | static bool hvm_get_pending_event(struct vcpu *v, struct x86_event *info) |
501 | 0 | { |
502 | 0 | info->cr2 = v->arch.hvm_vcpu.guest_cr[2]; |
503 | 0 | return hvm_funcs.get_pending_event(v, info); |
504 | 0 | } |
505 | | |
506 | | void hvm_do_resume(struct vcpu *v) |
507 | 4.57M | { |
508 | 4.57M | check_wakeup_from_wait(); |
509 | 4.57M | |
510 | 4.57M | pt_restore_timer(v); |
511 | 4.57M | |
512 | 4.57M | if ( !handle_hvm_io_completion(v) ) |
513 | 0 | return; |
514 | 4.57M | |
515 | 4.57M | if ( unlikely(v->arch.vm_event) ) |
516 | 0 | hvm_vm_event_do_resume(v); |
517 | 4.57M | |
518 | 4.57M | /* Inject pending hw/sw event */ |
519 | 4.57M | if ( v->arch.hvm_vcpu.inject_event.vector >= 0 ) |
520 | 0 | { |
521 | 0 | smp_rmb(); |
522 | 0 |
|
523 | 0 | if ( !hvm_event_pending(v) ) |
524 | 0 | hvm_inject_event(&v->arch.hvm_vcpu.inject_event); |
525 | 0 |
|
526 | 0 | v->arch.hvm_vcpu.inject_event.vector = HVM_EVENT_VECTOR_UNSET; |
527 | 0 | } |
528 | 4.57M | |
529 | 4.57M | if ( unlikely(v->arch.vm_event) && v->arch.monitor.next_interrupt_enabled ) |
530 | 0 | { |
531 | 0 | struct x86_event info; |
532 | 0 |
|
533 | 0 | if ( hvm_get_pending_event(v, &info) ) |
534 | 0 | { |
535 | 0 | hvm_monitor_interrupt(info.vector, info.type, info.error_code, |
536 | 0 | info.cr2); |
537 | 0 | v->arch.monitor.next_interrupt_enabled = false; |
538 | 0 | } |
539 | 0 | } |
540 | 4.57M | } |
541 | | |
542 | | static int hvm_print_line( |
543 | | int dir, unsigned int port, unsigned int bytes, uint32_t *val) |
544 | 0 | { |
545 | 0 | struct domain *cd = current->domain; |
546 | 0 | char c = *val; |
547 | 0 |
|
548 | 0 | BUG_ON(bytes != 1); |
549 | 0 |
|
550 | 0 | /* Accept only printable characters, newline, and horizontal tab. */ |
551 | 0 | if ( !isprint(c) && (c != '\n') && (c != '\t') ) |
552 | 0 | return X86EMUL_OKAY; |
553 | 0 |
|
554 | 0 | spin_lock(&cd->pbuf_lock); |
555 | 0 | if ( c != '\n' ) |
556 | 0 | cd->pbuf[cd->pbuf_idx++] = c; |
557 | 0 | if ( (cd->pbuf_idx == (DOMAIN_PBUF_SIZE - 1)) || (c == '\n') ) |
558 | 0 | { |
559 | 0 | cd->pbuf[cd->pbuf_idx] = '\0'; |
560 | 0 | guest_printk(cd, XENLOG_G_DEBUG "%s\n", cd->pbuf); |
561 | 0 | cd->pbuf_idx = 0; |
562 | 0 | } |
563 | 0 | spin_unlock(&cd->pbuf_lock); |
564 | 0 |
|
565 | 0 | return X86EMUL_OKAY; |
566 | 0 | } |
567 | | |
568 | | int hvm_domain_initialise(struct domain *d, unsigned long domcr_flags, |
569 | | struct xen_arch_domainconfig *config) |
570 | 1 | { |
571 | 1 | unsigned int nr_gsis; |
572 | 1 | int rc; |
573 | 1 | |
574 | 1 | if ( !hvm_enabled ) |
575 | 0 | { |
576 | 0 | gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest " |
577 | 0 | "on a non-VT/AMDV platform.\n"); |
578 | 0 | return -EINVAL; |
579 | 0 | } |
580 | 1 | |
581 | 1 | spin_lock_init(&d->arch.hvm_domain.irq_lock); |
582 | 1 | spin_lock_init(&d->arch.hvm_domain.uc_lock); |
583 | 1 | spin_lock_init(&d->arch.hvm_domain.write_map.lock); |
584 | 1 | rwlock_init(&d->arch.hvm_domain.mmcfg_lock); |
585 | 1 | INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list); |
586 | 1 | INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list); |
587 | 1 | INIT_LIST_HEAD(&d->arch.hvm_domain.mmcfg_regions); |
588 | 1 | INIT_LIST_HEAD(&d->arch.hvm_domain.msix_tables); |
589 | 1 | |
590 | 1 | rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL); |
591 | 1 | if ( rc ) |
592 | 0 | goto fail; |
593 | 1 | |
594 | 1 | hvm_init_cacheattr_region_list(d); |
595 | 1 | |
596 | 1 | rc = paging_enable(d, PG_refcounts|PG_translate|PG_external); |
597 | 1 | if ( rc != 0 ) |
598 | 0 | goto fail0; |
599 | 1 | |
600 | 1 | nr_gsis = is_hardware_domain(d) ? nr_irqs_gsi : NR_HVM_DOMU_IRQS; |
601 | 1 | d->arch.hvm_domain.pl_time = xzalloc(struct pl_time); |
602 | 1 | d->arch.hvm_domain.params = xzalloc_array(uint64_t, HVM_NR_PARAMS); |
603 | 1 | d->arch.hvm_domain.io_handler = xzalloc_array(struct hvm_io_handler, |
604 | 1 | NR_IO_HANDLERS); |
605 | 1 | d->arch.hvm_domain.irq = xzalloc_bytes(hvm_irq_size(nr_gsis)); |
606 | 1 | |
607 | 1 | rc = -ENOMEM; |
608 | 1 | if ( !d->arch.hvm_domain.pl_time || !d->arch.hvm_domain.irq || |
609 | 1 | !d->arch.hvm_domain.params || !d->arch.hvm_domain.io_handler ) |
610 | 0 | goto fail1; |
611 | 1 | |
612 | 1 | /* Set the number of GSIs */ |
613 | 1 | hvm_domain_irq(d)->nr_gsis = nr_gsis; |
614 | 1 | |
615 | 1 | BUILD_BUG_ON(NR_HVM_DOMU_IRQS < NR_ISAIRQS); |
616 | 1 | ASSERT(hvm_domain_irq(d)->nr_gsis >= NR_ISAIRQS); |
617 | 1 | |
618 | 1 | /* need link to containing domain */ |
619 | 1 | d->arch.hvm_domain.pl_time->domain = d; |
620 | 1 | |
621 | 1 | /* Set the default IO Bitmap. */ |
622 | 1 | if ( is_hardware_domain(d) ) |
623 | 1 | { |
624 | 1 | d->arch.hvm_domain.io_bitmap = _xmalloc(HVM_IOBITMAP_SIZE, PAGE_SIZE); |
625 | 1 | if ( d->arch.hvm_domain.io_bitmap == NULL ) |
626 | 0 | { |
627 | 0 | rc = -ENOMEM; |
628 | 0 | goto fail1; |
629 | 0 | } |
630 | 1 | memset(d->arch.hvm_domain.io_bitmap, ~0, HVM_IOBITMAP_SIZE); |
631 | 1 | } |
632 | 1 | else |
633 | 0 | d->arch.hvm_domain.io_bitmap = hvm_io_bitmap; |
634 | 1 | |
635 | 1 | register_g2m_portio_handler(d); |
636 | 1 | register_vpci_portio_handler(d); |
637 | 1 | |
638 | 1 | hvm_ioreq_init(d); |
639 | 1 | |
640 | 1 | hvm_init_guest_time(d); |
641 | 1 | |
642 | 1 | d->arch.hvm_domain.params[HVM_PARAM_TRIPLE_FAULT_REASON] = SHUTDOWN_reboot; |
643 | 1 | |
644 | 1 | vpic_init(d); |
645 | 1 | |
646 | 1 | rc = vioapic_init(d); |
647 | 1 | if ( rc != 0 ) |
648 | 0 | goto fail1; |
649 | 1 | |
650 | 1 | stdvga_init(d); |
651 | 1 | |
652 | 1 | rtc_init(d); |
653 | 1 | |
654 | 1 | register_portio_handler(d, 0xe9, 1, hvm_print_line); |
655 | 1 | |
656 | 1 | if ( hvm_tsc_scaling_supported ) |
657 | 0 | d->arch.hvm_domain.tsc_scaling_ratio = hvm_default_tsc_scaling_ratio; |
658 | 1 | |
659 | 1 | rc = hvm_funcs.domain_initialise(d); |
660 | 1 | if ( rc != 0 ) |
661 | 0 | goto fail2; |
662 | 1 | |
663 | 1 | return 0; |
664 | 1 | |
665 | 0 | fail2: |
666 | 0 | rtc_deinit(d); |
667 | 0 | stdvga_deinit(d); |
668 | 0 | vioapic_deinit(d); |
669 | 0 | fail1: |
670 | 0 | if ( is_hardware_domain(d) ) |
671 | 0 | xfree(d->arch.hvm_domain.io_bitmap); |
672 | 0 | xfree(d->arch.hvm_domain.io_handler); |
673 | 0 | xfree(d->arch.hvm_domain.params); |
674 | 0 | xfree(d->arch.hvm_domain.pl_time); |
675 | 0 | xfree(d->arch.hvm_domain.irq); |
676 | 0 | fail0: |
677 | 0 | hvm_destroy_cacheattr_region_list(d); |
678 | 0 | destroy_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0); |
679 | 0 | fail: |
680 | 0 | return rc; |
681 | 0 | } |
682 | | |
683 | | void hvm_domain_relinquish_resources(struct domain *d) |
684 | 0 | { |
685 | 0 | if ( hvm_funcs.nhvm_domain_relinquish_resources ) |
686 | 0 | hvm_funcs.nhvm_domain_relinquish_resources(d); |
687 | 0 |
|
688 | 0 | viridian_domain_deinit(d); |
689 | 0 |
|
690 | 0 | hvm_destroy_all_ioreq_servers(d); |
691 | 0 |
|
692 | 0 | msixtbl_pt_cleanup(d); |
693 | 0 |
|
694 | 0 | /* Stop all asynchronous timer actions. */ |
695 | 0 | rtc_deinit(d); |
696 | 0 | if ( d->vcpu != NULL && d->vcpu[0] != NULL ) |
697 | 0 | { |
698 | 0 | pmtimer_deinit(d); |
699 | 0 | hpet_deinit(d); |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | | void hvm_domain_destroy(struct domain *d) |
704 | 0 | { |
705 | 0 | struct list_head *ioport_list, *tmp; |
706 | 0 | struct g2m_ioport *ioport; |
707 | 0 |
|
708 | 0 | xfree(d->arch.hvm_domain.io_handler); |
709 | 0 | d->arch.hvm_domain.io_handler = NULL; |
710 | 0 |
|
711 | 0 | xfree(d->arch.hvm_domain.params); |
712 | 0 | d->arch.hvm_domain.params = NULL; |
713 | 0 |
|
714 | 0 | hvm_destroy_cacheattr_region_list(d); |
715 | 0 |
|
716 | 0 | hvm_funcs.domain_destroy(d); |
717 | 0 | rtc_deinit(d); |
718 | 0 | stdvga_deinit(d); |
719 | 0 | vioapic_deinit(d); |
720 | 0 |
|
721 | 0 | xfree(d->arch.hvm_domain.pl_time); |
722 | 0 | d->arch.hvm_domain.pl_time = NULL; |
723 | 0 |
|
724 | 0 | xfree(d->arch.hvm_domain.irq); |
725 | 0 | d->arch.hvm_domain.irq = NULL; |
726 | 0 |
|
727 | 0 | list_for_each_safe ( ioport_list, tmp, |
728 | 0 | &d->arch.hvm_domain.g2m_ioport_list ) |
729 | 0 | { |
730 | 0 | ioport = list_entry(ioport_list, struct g2m_ioport, list); |
731 | 0 | list_del(&ioport->list); |
732 | 0 | xfree(ioport); |
733 | 0 | } |
734 | 0 |
|
735 | 0 | destroy_vpci_mmcfg(&d->arch.hvm_domain.mmcfg_regions); |
736 | 0 | } |
737 | | |
738 | | static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h) |
739 | 0 | { |
740 | 0 | struct vcpu *v; |
741 | 0 | struct hvm_tsc_adjust ctxt; |
742 | 0 | int err = 0; |
743 | 0 |
|
744 | 0 | for_each_vcpu ( d, v ) |
745 | 0 | { |
746 | 0 | ctxt.tsc_adjust = v->arch.hvm_vcpu.msr_tsc_adjust; |
747 | 0 | err = hvm_save_entry(TSC_ADJUST, v->vcpu_id, h, &ctxt); |
748 | 0 | if ( err ) |
749 | 0 | break; |
750 | 0 | } |
751 | 0 |
|
752 | 0 | return err; |
753 | 0 | } |
754 | | |
755 | | static int hvm_load_tsc_adjust(struct domain *d, hvm_domain_context_t *h) |
756 | 0 | { |
757 | 0 | unsigned int vcpuid = hvm_load_instance(h); |
758 | 0 | struct vcpu *v; |
759 | 0 | struct hvm_tsc_adjust ctxt; |
760 | 0 |
|
761 | 0 | if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) |
762 | 0 | { |
763 | 0 | dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", |
764 | 0 | d->domain_id, vcpuid); |
765 | 0 | return -EINVAL; |
766 | 0 | } |
767 | 0 |
|
768 | 0 | if ( hvm_load_entry(TSC_ADJUST, h, &ctxt) != 0 ) |
769 | 0 | return -EINVAL; |
770 | 0 |
|
771 | 0 | v->arch.hvm_vcpu.msr_tsc_adjust = ctxt.tsc_adjust; |
772 | 0 | return 0; |
773 | 0 | } |
774 | | |
775 | | HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, |
776 | | hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU); |
777 | | |
778 | | static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) |
779 | 0 | { |
780 | 0 | struct vcpu *v; |
781 | 0 | struct hvm_hw_cpu ctxt; |
782 | 0 | struct segment_register seg; |
783 | 0 |
|
784 | 0 | for_each_vcpu ( d, v ) |
785 | 0 | { |
786 | 0 | /* We don't need to save state for a vcpu that is down; the restore |
787 | 0 | * code will leave it down if there is nothing saved. */ |
788 | 0 | if ( v->pause_flags & VPF_down ) |
789 | 0 | continue; |
790 | 0 |
|
791 | 0 | memset(&ctxt, 0, sizeof(ctxt)); |
792 | 0 |
|
793 | 0 | /* Architecture-specific vmcs/vmcb bits */ |
794 | 0 | hvm_funcs.save_cpu_ctxt(v, &ctxt); |
795 | 0 |
|
796 | 0 | ctxt.tsc = hvm_get_guest_tsc_fixed(v, d->arch.hvm_domain.sync_tsc); |
797 | 0 |
|
798 | 0 | ctxt.msr_tsc_aux = hvm_msr_tsc_aux(v); |
799 | 0 |
|
800 | 0 | hvm_get_segment_register(v, x86_seg_idtr, &seg); |
801 | 0 | ctxt.idtr_limit = seg.limit; |
802 | 0 | ctxt.idtr_base = seg.base; |
803 | 0 |
|
804 | 0 | hvm_get_segment_register(v, x86_seg_gdtr, &seg); |
805 | 0 | ctxt.gdtr_limit = seg.limit; |
806 | 0 | ctxt.gdtr_base = seg.base; |
807 | 0 |
|
808 | 0 | hvm_get_segment_register(v, x86_seg_cs, &seg); |
809 | 0 | ctxt.cs_sel = seg.sel; |
810 | 0 | ctxt.cs_limit = seg.limit; |
811 | 0 | ctxt.cs_base = seg.base; |
812 | 0 | ctxt.cs_arbytes = seg.attr; |
813 | 0 |
|
814 | 0 | hvm_get_segment_register(v, x86_seg_ds, &seg); |
815 | 0 | ctxt.ds_sel = seg.sel; |
816 | 0 | ctxt.ds_limit = seg.limit; |
817 | 0 | ctxt.ds_base = seg.base; |
818 | 0 | ctxt.ds_arbytes = seg.attr; |
819 | 0 |
|
820 | 0 | hvm_get_segment_register(v, x86_seg_es, &seg); |
821 | 0 | ctxt.es_sel = seg.sel; |
822 | 0 | ctxt.es_limit = seg.limit; |
823 | 0 | ctxt.es_base = seg.base; |
824 | 0 | ctxt.es_arbytes = seg.attr; |
825 | 0 |
|
826 | 0 | hvm_get_segment_register(v, x86_seg_ss, &seg); |
827 | 0 | ctxt.ss_sel = seg.sel; |
828 | 0 | ctxt.ss_limit = seg.limit; |
829 | 0 | ctxt.ss_base = seg.base; |
830 | 0 | ctxt.ss_arbytes = seg.attr; |
831 | 0 |
|
832 | 0 | hvm_get_segment_register(v, x86_seg_fs, &seg); |
833 | 0 | ctxt.fs_sel = seg.sel; |
834 | 0 | ctxt.fs_limit = seg.limit; |
835 | 0 | ctxt.fs_base = seg.base; |
836 | 0 | ctxt.fs_arbytes = seg.attr; |
837 | 0 |
|
838 | 0 | hvm_get_segment_register(v, x86_seg_gs, &seg); |
839 | 0 | ctxt.gs_sel = seg.sel; |
840 | 0 | ctxt.gs_limit = seg.limit; |
841 | 0 | ctxt.gs_base = seg.base; |
842 | 0 | ctxt.gs_arbytes = seg.attr; |
843 | 0 |
|
844 | 0 | hvm_get_segment_register(v, x86_seg_tr, &seg); |
845 | 0 | ctxt.tr_sel = seg.sel; |
846 | 0 | ctxt.tr_limit = seg.limit; |
847 | 0 | ctxt.tr_base = seg.base; |
848 | 0 | ctxt.tr_arbytes = seg.attr; |
849 | 0 |
|
850 | 0 | hvm_get_segment_register(v, x86_seg_ldtr, &seg); |
851 | 0 | ctxt.ldtr_sel = seg.sel; |
852 | 0 | ctxt.ldtr_limit = seg.limit; |
853 | 0 | ctxt.ldtr_base = seg.base; |
854 | 0 | ctxt.ldtr_arbytes = seg.attr; |
855 | 0 |
|
856 | 0 | if ( v->fpu_initialised ) |
857 | 0 | { |
858 | 0 | memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs)); |
859 | 0 | ctxt.flags = XEN_X86_FPU_INITIALISED; |
860 | 0 | } |
861 | 0 |
|
862 | 0 | ctxt.rax = v->arch.user_regs.rax; |
863 | 0 | ctxt.rbx = v->arch.user_regs.rbx; |
864 | 0 | ctxt.rcx = v->arch.user_regs.rcx; |
865 | 0 | ctxt.rdx = v->arch.user_regs.rdx; |
866 | 0 | ctxt.rbp = v->arch.user_regs.rbp; |
867 | 0 | ctxt.rsi = v->arch.user_regs.rsi; |
868 | 0 | ctxt.rdi = v->arch.user_regs.rdi; |
869 | 0 | ctxt.rsp = v->arch.user_regs.rsp; |
870 | 0 | ctxt.rip = v->arch.user_regs.rip; |
871 | 0 | ctxt.rflags = v->arch.user_regs.rflags; |
872 | 0 | ctxt.r8 = v->arch.user_regs.r8; |
873 | 0 | ctxt.r9 = v->arch.user_regs.r9; |
874 | 0 | ctxt.r10 = v->arch.user_regs.r10; |
875 | 0 | ctxt.r11 = v->arch.user_regs.r11; |
876 | 0 | ctxt.r12 = v->arch.user_regs.r12; |
877 | 0 | ctxt.r13 = v->arch.user_regs.r13; |
878 | 0 | ctxt.r14 = v->arch.user_regs.r14; |
879 | 0 | ctxt.r15 = v->arch.user_regs.r15; |
880 | 0 | ctxt.dr0 = v->arch.debugreg[0]; |
881 | 0 | ctxt.dr1 = v->arch.debugreg[1]; |
882 | 0 | ctxt.dr2 = v->arch.debugreg[2]; |
883 | 0 | ctxt.dr3 = v->arch.debugreg[3]; |
884 | 0 | ctxt.dr6 = v->arch.debugreg[6]; |
885 | 0 | ctxt.dr7 = v->arch.debugreg[7]; |
886 | 0 |
|
887 | 0 | if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 ) |
888 | 0 | return 1; |
889 | 0 | } |
890 | 0 | return 0; |
891 | 0 | } |
892 | | |
893 | | /* Return a string indicating the error, or NULL for valid. */ |
894 | | const char *hvm_efer_valid(const struct vcpu *v, uint64_t value, |
895 | | signed int cr0_pg) |
896 | 37 | { |
897 | 37 | const struct domain *d = v->domain; |
898 | 37 | const struct cpuid_policy *p; |
899 | 37 | |
900 | 37 | if ( cr0_pg < 0 && !is_hardware_domain(d) ) |
901 | 0 | p = d->arch.cpuid; |
902 | 37 | else |
903 | 37 | p = &host_cpuid_policy; |
904 | 37 | |
905 | 37 | if ( (value & EFER_SCE) && !p->extd.syscall ) |
906 | 0 | return "SCE without feature"; |
907 | 37 | |
908 | 37 | if ( (value & (EFER_LME | EFER_LMA)) && !p->extd.lm ) |
909 | 0 | return "LME/LMA without feature"; |
910 | 37 | |
911 | 37 | if ( (value & EFER_LMA) && (!(value & EFER_LME) || !cr0_pg) ) |
912 | 0 | return "LMA/LME/CR0.PG inconsistency"; |
913 | 37 | |
914 | 37 | if ( (value & EFER_NX) && !p->extd.nx ) |
915 | 0 | return "NX without feature"; |
916 | 37 | |
917 | 37 | if ( (value & EFER_SVME) && (!p->extd.svm || !nestedhvm_enabled(d)) ) |
918 | 0 | return "SVME without nested virt"; |
919 | 37 | |
920 | 37 | if ( (value & EFER_LMSLE) && !cpu_has_lmsl ) |
921 | 0 | return "LMSLE without support"; |
922 | 37 | |
923 | 37 | if ( (value & EFER_FFXSE) && !p->extd.ffxsr ) |
924 | 0 | return "FFXSE without feature"; |
925 | 37 | |
926 | 37 | return NULL; |
927 | 37 | } |
928 | | |
929 | | /* These reserved bits in lower 32 remain 0 after any load of CR0 */ |
930 | | #define HVM_CR0_GUEST_RESERVED_BITS \ |
931 | 3.87k | (~((unsigned long) \ |
932 | 3.87k | (X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | \ |
933 | 3.87k | X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | \ |
934 | 3.87k | X86_CR0_WP | X86_CR0_AM | X86_CR0_NW | \ |
935 | 3.87k | X86_CR0_CD | X86_CR0_PG))) |
936 | | |
937 | | /* These bits in CR4 can be set by the guest. */ |
938 | | unsigned long hvm_cr4_guest_valid_bits(const struct vcpu *v, bool restore) |
939 | 98 | { |
940 | 98 | const struct domain *d = v->domain; |
941 | 98 | const struct cpuid_policy *p; |
942 | 98 | bool mce, vmxe; |
943 | 98 | |
944 | 99 | if ( !restore && !is_hardware_domain(d) ) |
945 | 0 | p = d->arch.cpuid; |
946 | 98 | else |
947 | 98 | p = &host_cpuid_policy; |
948 | 98 | |
949 | 98 | /* Logic broken out simply to aid readability below. */ |
950 | 0 | mce = p->basic.mce || p->basic.mca; |
951 | 99 | vmxe = p->basic.vmx && (restore || nestedhvm_enabled(d)); |
952 | 98 | |
953 | 18.4E | return ((p->basic.vme ? X86_CR4_VME | X86_CR4_PVI : 0) | |
954 | 18.4E | (p->basic.tsc ? X86_CR4_TSD : 0) | |
955 | 18.4E | (p->basic.de ? X86_CR4_DE : 0) | |
956 | 18.4E | (p->basic.pse ? X86_CR4_PSE : 0) | |
957 | 18.4E | (p->basic.pae ? X86_CR4_PAE : 0) | |
958 | 18.4E | (mce ? X86_CR4_MCE : 0) | |
959 | 18.4E | (p->basic.pge ? X86_CR4_PGE : 0) | |
960 | 98 | X86_CR4_PCE | |
961 | 18.4E | (p->basic.fxsr ? X86_CR4_OSFXSR : 0) | |
962 | 18.4E | (p->basic.sse ? X86_CR4_OSXMMEXCPT : 0) | |
963 | 98 | (vmxe ? X86_CR4_VMXE : 0) | |
964 | 18.4E | (p->feat.fsgsbase ? X86_CR4_FSGSBASE : 0) | |
965 | 18.4E | (p->basic.pcid ? X86_CR4_PCIDE : 0) | |
966 | 18.4E | (p->basic.xsave ? X86_CR4_OSXSAVE : 0) | |
967 | 18.4E | (p->feat.smep ? X86_CR4_SMEP : 0) | |
968 | 98 | (p->feat.smap ? X86_CR4_SMAP : 0) | |
969 | 98 | (p->feat.umip ? X86_CR4_UMIP : 0) | |
970 | 98 | (p->feat.pku ? X86_CR4_PKE : 0)); |
971 | 98 | } |
972 | | |
973 | | static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) |
974 | 0 | { |
975 | 0 | int vcpuid; |
976 | 0 | struct vcpu *v; |
977 | 0 | struct hvm_hw_cpu ctxt; |
978 | 0 | struct segment_register seg; |
979 | 0 | const char *errstr; |
980 | 0 | struct xsave_struct *xsave_area; |
981 | 0 |
|
982 | 0 | /* Which vcpu is this? */ |
983 | 0 | vcpuid = hvm_load_instance(h); |
984 | 0 | if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) |
985 | 0 | { |
986 | 0 | dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", |
987 | 0 | d->domain_id, vcpuid); |
988 | 0 | return -EINVAL; |
989 | 0 | } |
990 | 0 |
|
991 | 0 | if ( hvm_load_entry_zeroextend(CPU, h, &ctxt) != 0 ) |
992 | 0 | return -EINVAL; |
993 | 0 |
|
994 | 0 | if ( ctxt.pad0 != 0 ) |
995 | 0 | return -EINVAL; |
996 | 0 |
|
997 | 0 | /* Sanity check some control registers. */ |
998 | 0 | if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) || |
999 | 0 | !(ctxt.cr0 & X86_CR0_ET) || |
1000 | 0 | ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) ) |
1001 | 0 | { |
1002 | 0 | printk(XENLOG_G_ERR "HVM%d restore: bad CR0 %#" PRIx64 "\n", |
1003 | 0 | d->domain_id, ctxt.cr0); |
1004 | 0 | return -EINVAL; |
1005 | 0 | } |
1006 | 0 |
|
1007 | 0 | if ( ctxt.cr4 & ~hvm_cr4_guest_valid_bits(v, 1) ) |
1008 | 0 | { |
1009 | 0 | printk(XENLOG_G_ERR "HVM%d restore: bad CR4 %#" PRIx64 "\n", |
1010 | 0 | d->domain_id, ctxt.cr4); |
1011 | 0 | return -EINVAL; |
1012 | 0 | } |
1013 | 0 |
|
1014 | 0 | errstr = hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)); |
1015 | 0 | if ( errstr ) |
1016 | 0 | { |
1017 | 0 | printk(XENLOG_G_ERR "%pv: HVM restore: bad EFER %#" PRIx64 " - %s\n", |
1018 | 0 | v, ctxt.msr_efer, errstr); |
1019 | 0 | return -EINVAL; |
1020 | 0 | } |
1021 | 0 |
|
1022 | 0 | if ( (ctxt.flags & ~XEN_X86_FPU_INITIALISED) != 0 ) |
1023 | 0 | { |
1024 | 0 | gprintk(XENLOG_ERR, "bad flags value in CPU context: %#x\n", |
1025 | 0 | ctxt.flags); |
1026 | 0 | return -EINVAL; |
1027 | 0 | } |
1028 | 0 |
|
1029 | 0 | /* Older Xen versions used to save the segment arbytes directly |
1030 | 0 | * from the VMCS on Intel hosts. Detect this and rearrange them |
1031 | 0 | * into the struct segment_register format. */ |
1032 | 0 | #define UNFOLD_ARBYTES(_r) \ |
1033 | 0 | if ( (_r & 0xf000) && !(_r & 0x0f00) ) \ |
1034 | 0 | _r = ((_r & 0xff) | ((_r >> 4) & 0xf00)) |
1035 | 0 | UNFOLD_ARBYTES(ctxt.cs_arbytes); |
1036 | 0 | UNFOLD_ARBYTES(ctxt.ds_arbytes); |
1037 | 0 | UNFOLD_ARBYTES(ctxt.es_arbytes); |
1038 | 0 | UNFOLD_ARBYTES(ctxt.fs_arbytes); |
1039 | 0 | UNFOLD_ARBYTES(ctxt.gs_arbytes); |
1040 | 0 | UNFOLD_ARBYTES(ctxt.ss_arbytes); |
1041 | 0 | UNFOLD_ARBYTES(ctxt.tr_arbytes); |
1042 | 0 | UNFOLD_ARBYTES(ctxt.ldtr_arbytes); |
1043 | 0 | #undef UNFOLD_ARBYTES |
1044 | 0 |
|
1045 | 0 | /* Architecture-specific vmcs/vmcb bits */ |
1046 | 0 | if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 ) |
1047 | 0 | return -EINVAL; |
1048 | 0 |
|
1049 | 0 | if ( hvm_funcs.tsc_scaling.setup ) |
1050 | 0 | hvm_funcs.tsc_scaling.setup(v); |
1051 | 0 |
|
1052 | 0 | v->arch.hvm_vcpu.msr_tsc_aux = ctxt.msr_tsc_aux; |
1053 | 0 |
|
1054 | 0 | hvm_set_guest_tsc_fixed(v, ctxt.tsc, d->arch.hvm_domain.sync_tsc); |
1055 | 0 |
|
1056 | 0 | seg.limit = ctxt.idtr_limit; |
1057 | 0 | seg.base = ctxt.idtr_base; |
1058 | 0 | hvm_set_segment_register(v, x86_seg_idtr, &seg); |
1059 | 0 |
|
1060 | 0 | seg.limit = ctxt.gdtr_limit; |
1061 | 0 | seg.base = ctxt.gdtr_base; |
1062 | 0 | hvm_set_segment_register(v, x86_seg_gdtr, &seg); |
1063 | 0 |
|
1064 | 0 | seg.sel = ctxt.cs_sel; |
1065 | 0 | seg.limit = ctxt.cs_limit; |
1066 | 0 | seg.base = ctxt.cs_base; |
1067 | 0 | seg.attr = ctxt.cs_arbytes; |
1068 | 0 | hvm_set_segment_register(v, x86_seg_cs, &seg); |
1069 | 0 |
|
1070 | 0 | seg.sel = ctxt.ds_sel; |
1071 | 0 | seg.limit = ctxt.ds_limit; |
1072 | 0 | seg.base = ctxt.ds_base; |
1073 | 0 | seg.attr = ctxt.ds_arbytes; |
1074 | 0 | hvm_set_segment_register(v, x86_seg_ds, &seg); |
1075 | 0 |
|
1076 | 0 | seg.sel = ctxt.es_sel; |
1077 | 0 | seg.limit = ctxt.es_limit; |
1078 | 0 | seg.base = ctxt.es_base; |
1079 | 0 | seg.attr = ctxt.es_arbytes; |
1080 | 0 | hvm_set_segment_register(v, x86_seg_es, &seg); |
1081 | 0 |
|
1082 | 0 | seg.sel = ctxt.ss_sel; |
1083 | 0 | seg.limit = ctxt.ss_limit; |
1084 | 0 | seg.base = ctxt.ss_base; |
1085 | 0 | seg.attr = ctxt.ss_arbytes; |
1086 | 0 | hvm_set_segment_register(v, x86_seg_ss, &seg); |
1087 | 0 |
|
1088 | 0 | seg.sel = ctxt.fs_sel; |
1089 | 0 | seg.limit = ctxt.fs_limit; |
1090 | 0 | seg.base = ctxt.fs_base; |
1091 | 0 | seg.attr = ctxt.fs_arbytes; |
1092 | 0 | hvm_set_segment_register(v, x86_seg_fs, &seg); |
1093 | 0 |
|
1094 | 0 | seg.sel = ctxt.gs_sel; |
1095 | 0 | seg.limit = ctxt.gs_limit; |
1096 | 0 | seg.base = ctxt.gs_base; |
1097 | 0 | seg.attr = ctxt.gs_arbytes; |
1098 | 0 | hvm_set_segment_register(v, x86_seg_gs, &seg); |
1099 | 0 |
|
1100 | 0 | seg.sel = ctxt.tr_sel; |
1101 | 0 | seg.limit = ctxt.tr_limit; |
1102 | 0 | seg.base = ctxt.tr_base; |
1103 | 0 | seg.attr = ctxt.tr_arbytes; |
1104 | 0 | hvm_set_segment_register(v, x86_seg_tr, &seg); |
1105 | 0 |
|
1106 | 0 | seg.sel = ctxt.ldtr_sel; |
1107 | 0 | seg.limit = ctxt.ldtr_limit; |
1108 | 0 | seg.base = ctxt.ldtr_base; |
1109 | 0 | seg.attr = ctxt.ldtr_arbytes; |
1110 | 0 | hvm_set_segment_register(v, x86_seg_ldtr, &seg); |
1111 | 0 |
|
1112 | 0 | /* Cover xsave-absent save file restoration on xsave-capable host. */ |
1113 | 0 | xsave_area = xsave_enabled(v) ? NULL : v->arch.xsave_area; |
1114 | 0 |
|
1115 | 0 | v->fpu_initialised = !!(ctxt.flags & XEN_X86_FPU_INITIALISED); |
1116 | 0 | if ( v->fpu_initialised ) |
1117 | 0 | { |
1118 | 0 | memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); |
1119 | 0 | if ( xsave_area ) |
1120 | 0 | xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; |
1121 | 0 | } |
1122 | 0 | else if ( xsave_area ) |
1123 | 0 | { |
1124 | 0 | xsave_area->xsave_hdr.xstate_bv = 0; |
1125 | 0 | xsave_area->fpu_sse.mxcsr = MXCSR_DEFAULT; |
1126 | 0 | } |
1127 | 0 | if ( xsave_area ) |
1128 | 0 | xsave_area->xsave_hdr.xcomp_bv = 0; |
1129 | 0 |
|
1130 | 0 | v->arch.user_regs.rax = ctxt.rax; |
1131 | 0 | v->arch.user_regs.rbx = ctxt.rbx; |
1132 | 0 | v->arch.user_regs.rcx = ctxt.rcx; |
1133 | 0 | v->arch.user_regs.rdx = ctxt.rdx; |
1134 | 0 | v->arch.user_regs.rbp = ctxt.rbp; |
1135 | 0 | v->arch.user_regs.rsi = ctxt.rsi; |
1136 | 0 | v->arch.user_regs.rdi = ctxt.rdi; |
1137 | 0 | v->arch.user_regs.rsp = ctxt.rsp; |
1138 | 0 | v->arch.user_regs.rip = ctxt.rip; |
1139 | 0 | v->arch.user_regs.rflags = ctxt.rflags | X86_EFLAGS_MBS; |
1140 | 0 | v->arch.user_regs.r8 = ctxt.r8; |
1141 | 0 | v->arch.user_regs.r9 = ctxt.r9; |
1142 | 0 | v->arch.user_regs.r10 = ctxt.r10; |
1143 | 0 | v->arch.user_regs.r11 = ctxt.r11; |
1144 | 0 | v->arch.user_regs.r12 = ctxt.r12; |
1145 | 0 | v->arch.user_regs.r13 = ctxt.r13; |
1146 | 0 | v->arch.user_regs.r14 = ctxt.r14; |
1147 | 0 | v->arch.user_regs.r15 = ctxt.r15; |
1148 | 0 | v->arch.debugreg[0] = ctxt.dr0; |
1149 | 0 | v->arch.debugreg[1] = ctxt.dr1; |
1150 | 0 | v->arch.debugreg[2] = ctxt.dr2; |
1151 | 0 | v->arch.debugreg[3] = ctxt.dr3; |
1152 | 0 | v->arch.debugreg[6] = ctxt.dr6; |
1153 | 0 | v->arch.debugreg[7] = ctxt.dr7; |
1154 | 0 |
|
1155 | 0 | v->arch.vgc_flags = VGCF_online; |
1156 | 0 |
|
1157 | 0 | /* Auxiliary processors should be woken immediately. */ |
1158 | 0 | v->is_initialised = 1; |
1159 | 0 | clear_bit(_VPF_down, &v->pause_flags); |
1160 | 0 | vcpu_wake(v); |
1161 | 0 |
|
1162 | 0 | return 0; |
1163 | 0 | } |
1164 | | |
1165 | | HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, |
1166 | | 1, HVMSR_PER_VCPU); |
1167 | | |
1168 | 1 | #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \ |
1169 | 1 | save_area) + \ |
1170 | 1 | xstate_ctxt_size(xcr0)) |
1171 | | |
1172 | | static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) |
1173 | 0 | { |
1174 | 0 | struct vcpu *v; |
1175 | 0 | struct hvm_hw_cpu_xsave *ctxt; |
1176 | 0 |
|
1177 | 0 | if ( !cpu_has_xsave ) |
1178 | 0 | return 0; /* do nothing */ |
1179 | 0 |
|
1180 | 0 | for_each_vcpu ( d, v ) |
1181 | 0 | { |
1182 | 0 | unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum); |
1183 | 0 |
|
1184 | 0 | if ( !xsave_enabled(v) ) |
1185 | 0 | continue; |
1186 | 0 | if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size) ) |
1187 | 0 | return 1; |
1188 | 0 | ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur]; |
1189 | 0 | h->cur += size; |
1190 | 0 |
|
1191 | 0 | ctxt->xfeature_mask = xfeature_mask; |
1192 | 0 | ctxt->xcr0 = v->arch.xcr0; |
1193 | 0 | ctxt->xcr0_accum = v->arch.xcr0_accum; |
1194 | 0 | expand_xsave_states(v, &ctxt->save_area, |
1195 | 0 | size - offsetof(typeof(*ctxt), save_area)); |
1196 | 0 | } |
1197 | 0 |
|
1198 | 0 | return 0; |
1199 | 0 | } |
1200 | | |
1201 | | /* |
1202 | | * Structure layout conformity checks, documenting correctness of the cast in |
1203 | | * the invocation of validate_xstate() below. |
1204 | | * Leverage CONFIG_COMPAT machinery to perform this. |
1205 | | */ |
1206 | | #define xen_xsave_hdr xsave_hdr |
1207 | | #define compat_xsave_hdr hvm_hw_cpu_xsave_hdr |
1208 | | CHECK_FIELD_(struct, xsave_hdr, xstate_bv); |
1209 | | CHECK_FIELD_(struct, xsave_hdr, xcomp_bv); |
1210 | | CHECK_FIELD_(struct, xsave_hdr, reserved); |
1211 | | #undef compat_xsave_hdr |
1212 | | #undef xen_xsave_hdr |
1213 | | |
1214 | | static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) |
1215 | 0 | { |
1216 | 0 | unsigned int vcpuid, size; |
1217 | 0 | int err; |
1218 | 0 | struct vcpu *v; |
1219 | 0 | struct hvm_hw_cpu_xsave *ctxt; |
1220 | 0 | const struct hvm_save_descriptor *desc; |
1221 | 0 | unsigned int i, desc_start, desc_length; |
1222 | 0 |
|
1223 | 0 | /* Which vcpu is this? */ |
1224 | 0 | vcpuid = hvm_load_instance(h); |
1225 | 0 | if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) |
1226 | 0 | { |
1227 | 0 | dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", |
1228 | 0 | d->domain_id, vcpuid); |
1229 | 0 | return -EINVAL; |
1230 | 0 | } |
1231 | 0 |
|
1232 | 0 | /* Fails since we can't restore an img saved on xsave-capable host. */ |
1233 | 0 | if ( !cpu_has_xsave ) |
1234 | 0 | return -EOPNOTSUPP; |
1235 | 0 |
|
1236 | 0 | /* Customized checking for entry since our entry is of variable length */ |
1237 | 0 | desc = (struct hvm_save_descriptor *)&h->data[h->cur]; |
1238 | 0 | if ( sizeof (*desc) > h->size - h->cur) |
1239 | 0 | { |
1240 | 0 | printk(XENLOG_G_WARNING |
1241 | 0 | "HVM%d.%d restore: not enough data left to read xsave descriptor\n", |
1242 | 0 | d->domain_id, vcpuid); |
1243 | 0 | return -ENODATA; |
1244 | 0 | } |
1245 | 0 | if ( desc->length + sizeof (*desc) > h->size - h->cur) |
1246 | 0 | { |
1247 | 0 | printk(XENLOG_G_WARNING |
1248 | 0 | "HVM%d.%d restore: not enough data left to read %u xsave bytes\n", |
1249 | 0 | d->domain_id, vcpuid, desc->length); |
1250 | 0 | return -ENODATA; |
1251 | 0 | } |
1252 | 0 | if ( desc->length < offsetof(struct hvm_hw_cpu_xsave, save_area) + |
1253 | 0 | XSTATE_AREA_MIN_SIZE ) |
1254 | 0 | { |
1255 | 0 | printk(XENLOG_G_WARNING |
1256 | 0 | "HVM%d.%d restore mismatch: xsave length %u < %zu\n", |
1257 | 0 | d->domain_id, vcpuid, desc->length, |
1258 | 0 | offsetof(struct hvm_hw_cpu_xsave, |
1259 | 0 | save_area) + XSTATE_AREA_MIN_SIZE); |
1260 | 0 | return -EINVAL; |
1261 | 0 | } |
1262 | 0 | h->cur += sizeof (*desc); |
1263 | 0 | desc_start = h->cur; |
1264 | 0 |
|
1265 | 0 | ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur]; |
1266 | 0 | h->cur += desc->length; |
1267 | 0 |
|
1268 | 0 | err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum, |
1269 | 0 | (const void *)&ctxt->save_area.xsave_hdr); |
1270 | 0 | if ( err ) |
1271 | 0 | { |
1272 | 0 | printk(XENLOG_G_WARNING |
1273 | 0 | "HVM%d.%d restore: inconsistent xsave state (feat=%#"PRIx64 |
1274 | 0 | " accum=%#"PRIx64" xcr0=%#"PRIx64" bv=%#"PRIx64" err=%d)\n", |
1275 | 0 | d->domain_id, vcpuid, ctxt->xfeature_mask, ctxt->xcr0_accum, |
1276 | 0 | ctxt->xcr0, ctxt->save_area.xsave_hdr.xstate_bv, err); |
1277 | 0 | return err; |
1278 | 0 | } |
1279 | 0 | size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum); |
1280 | 0 | desc_length = desc->length; |
1281 | 0 | if ( desc_length > size ) |
1282 | 0 | { |
1283 | 0 | /* |
1284 | 0 | * Xen 4.3.0, 4.2.3 and older used to send longer-than-needed |
1285 | 0 | * xsave regions. Permit loading the record if the extra data |
1286 | 0 | * is all zero. |
1287 | 0 | */ |
1288 | 0 | for ( i = size; i < desc->length; i++ ) |
1289 | 0 | { |
1290 | 0 | if ( h->data[desc_start + i] ) |
1291 | 0 | { |
1292 | 0 | printk(XENLOG_G_WARNING |
1293 | 0 | "HVM%d.%u restore mismatch: xsave length %#x > %#x (non-zero data at %#x)\n", |
1294 | 0 | d->domain_id, vcpuid, desc->length, size, i); |
1295 | 0 | return -EOPNOTSUPP; |
1296 | 0 | } |
1297 | 0 | } |
1298 | 0 | printk(XENLOG_G_WARNING |
1299 | 0 | "HVM%d.%u restore mismatch: xsave length %#x > %#x\n", |
1300 | 0 | d->domain_id, vcpuid, desc->length, size); |
1301 | 0 | /* Rewind desc_length to ignore the extraneous zeros. */ |
1302 | 0 | desc_length = size; |
1303 | 0 | } |
1304 | 0 |
|
1305 | 0 | if ( xsave_area_compressed((const void *)&ctxt->save_area) ) |
1306 | 0 | { |
1307 | 0 | printk(XENLOG_G_WARNING |
1308 | 0 | "HVM%d.%u restore: compressed xsave state not supported\n", |
1309 | 0 | d->domain_id, vcpuid); |
1310 | 0 | return -EOPNOTSUPP; |
1311 | 0 | } |
1312 | 0 | else if ( desc_length != size ) |
1313 | 0 | { |
1314 | 0 | printk(XENLOG_G_WARNING |
1315 | 0 | "HVM%d.%u restore mismatch: xsave length %#x != %#x\n", |
1316 | 0 | d->domain_id, vcpuid, desc_length, size); |
1317 | 0 | return -EINVAL; |
1318 | 0 | } |
1319 | 0 | /* Checking finished */ |
1320 | 0 |
|
1321 | 0 | v->arch.xcr0 = ctxt->xcr0; |
1322 | 0 | v->arch.xcr0_accum = ctxt->xcr0_accum; |
1323 | 0 | if ( ctxt->xcr0_accum & XSTATE_NONLAZY ) |
1324 | 0 | v->arch.nonlazy_xstate_used = 1; |
1325 | 0 | compress_xsave_states(v, &ctxt->save_area, |
1326 | 0 | size - offsetof(struct hvm_hw_cpu_xsave, save_area)); |
1327 | 0 |
|
1328 | 0 | return 0; |
1329 | 0 | } |
1330 | | |
1331 | 0 | #define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) |
1332 | | static unsigned int __read_mostly msr_count_max; |
1333 | | |
1334 | | static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h) |
1335 | 0 | { |
1336 | 0 | struct vcpu *v; |
1337 | 0 |
|
1338 | 0 | for_each_vcpu ( d, v ) |
1339 | 0 | { |
1340 | 0 | struct hvm_msr *ctxt; |
1341 | 0 | unsigned int i; |
1342 | 0 |
|
1343 | 0 | if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, |
1344 | 0 | HVM_CPU_MSR_SIZE(msr_count_max)) ) |
1345 | 0 | return 1; |
1346 | 0 | ctxt = (struct hvm_msr *)&h->data[h->cur]; |
1347 | 0 | ctxt->count = 0; |
1348 | 0 |
|
1349 | 0 | if ( hvm_funcs.save_msr ) |
1350 | 0 | hvm_funcs.save_msr(v, ctxt); |
1351 | 0 |
|
1352 | 0 | ASSERT(ctxt->count <= msr_count_max); |
1353 | 0 |
|
1354 | 0 | for ( i = 0; i < ctxt->count; ++i ) |
1355 | 0 | ctxt->msr[i]._rsvd = 0; |
1356 | 0 |
|
1357 | 0 | if ( ctxt->count ) |
1358 | 0 | h->cur += HVM_CPU_MSR_SIZE(ctxt->count); |
1359 | 0 | else |
1360 | 0 | h->cur -= sizeof(struct hvm_save_descriptor); |
1361 | 0 | } |
1362 | 0 |
|
1363 | 0 | return 0; |
1364 | 0 | } |
1365 | | |
1366 | | static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) |
1367 | 0 | { |
1368 | 0 | unsigned int i, vcpuid = hvm_load_instance(h); |
1369 | 0 | struct vcpu *v; |
1370 | 0 | const struct hvm_save_descriptor *desc; |
1371 | 0 | struct hvm_msr *ctxt; |
1372 | 0 | int err = 0; |
1373 | 0 |
|
1374 | 0 | if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) |
1375 | 0 | { |
1376 | 0 | dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", |
1377 | 0 | d->domain_id, vcpuid); |
1378 | 0 | return -EINVAL; |
1379 | 0 | } |
1380 | 0 |
|
1381 | 0 | /* Customized checking for entry since our entry is of variable length */ |
1382 | 0 | desc = (struct hvm_save_descriptor *)&h->data[h->cur]; |
1383 | 0 | if ( sizeof (*desc) > h->size - h->cur) |
1384 | 0 | { |
1385 | 0 | printk(XENLOG_G_WARNING |
1386 | 0 | "HVM%d.%d restore: not enough data left to read MSR descriptor\n", |
1387 | 0 | d->domain_id, vcpuid); |
1388 | 0 | return -ENODATA; |
1389 | 0 | } |
1390 | 0 | if ( desc->length + sizeof (*desc) > h->size - h->cur) |
1391 | 0 | { |
1392 | 0 | printk(XENLOG_G_WARNING |
1393 | 0 | "HVM%d.%d restore: not enough data left to read %u MSR bytes\n", |
1394 | 0 | d->domain_id, vcpuid, desc->length); |
1395 | 0 | return -ENODATA; |
1396 | 0 | } |
1397 | 0 | if ( desc->length < HVM_CPU_MSR_SIZE(1) ) |
1398 | 0 | { |
1399 | 0 | printk(XENLOG_G_WARNING |
1400 | 0 | "HVM%d.%d restore mismatch: MSR length %u < %zu\n", |
1401 | 0 | d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1)); |
1402 | 0 | return -EINVAL; |
1403 | 0 | } |
1404 | 0 |
|
1405 | 0 | h->cur += sizeof(*desc); |
1406 | 0 | ctxt = (struct hvm_msr *)&h->data[h->cur]; |
1407 | 0 | h->cur += desc->length; |
1408 | 0 |
|
1409 | 0 | if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) |
1410 | 0 | { |
1411 | 0 | printk(XENLOG_G_WARNING |
1412 | 0 | "HVM%d.%d restore mismatch: MSR length %u != %zu\n", |
1413 | 0 | d->domain_id, vcpuid, desc->length, |
1414 | 0 | HVM_CPU_MSR_SIZE(ctxt->count)); |
1415 | 0 | return -EOPNOTSUPP; |
1416 | 0 | } |
1417 | 0 |
|
1418 | 0 | for ( i = 0; i < ctxt->count; ++i ) |
1419 | 0 | if ( ctxt->msr[i]._rsvd ) |
1420 | 0 | return -EOPNOTSUPP; |
1421 | 0 | /* Checking finished */ |
1422 | 0 |
|
1423 | 0 | if ( hvm_funcs.load_msr ) |
1424 | 0 | err = hvm_funcs.load_msr(v, ctxt); |
1425 | 0 |
|
1426 | 0 | for ( i = 0; !err && i < ctxt->count; ++i ) |
1427 | 0 | { |
1428 | 0 | switch ( ctxt->msr[i].index ) |
1429 | 0 | { |
1430 | 0 | default: |
1431 | 0 | if ( !ctxt->msr[i]._rsvd ) |
1432 | 0 | err = -ENXIO; |
1433 | 0 | break; |
1434 | 0 | } |
1435 | 0 | } |
1436 | 0 |
|
1437 | 0 | return err; |
1438 | 0 | } |
1439 | | |
1440 | | /* We need variable length data chunks for XSAVE area and MSRs, hence |
1441 | | * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE. |
1442 | | */ |
1443 | | static int __init hvm_register_CPU_save_and_restore(void) |
1444 | 1 | { |
1445 | 1 | hvm_register_savevm(CPU_XSAVE_CODE, |
1446 | 1 | "CPU_XSAVE", |
1447 | 1 | hvm_save_cpu_xsave_states, |
1448 | 1 | hvm_load_cpu_xsave_states, |
1449 | 1 | HVM_CPU_XSAVE_SIZE(xfeature_mask) + |
1450 | 1 | sizeof(struct hvm_save_descriptor), |
1451 | 1 | HVMSR_PER_VCPU); |
1452 | 1 | |
1453 | 1 | if ( hvm_funcs.init_msr ) |
1454 | 1 | msr_count_max += hvm_funcs.init_msr(); |
1455 | 1 | |
1456 | 1 | if ( msr_count_max ) |
1457 | 0 | hvm_register_savevm(CPU_MSR_CODE, |
1458 | 0 | "CPU_MSR", |
1459 | 0 | hvm_save_cpu_msrs, |
1460 | 0 | hvm_load_cpu_msrs, |
1461 | 0 | HVM_CPU_MSR_SIZE(msr_count_max) + |
1462 | 0 | sizeof(struct hvm_save_descriptor), |
1463 | 0 | HVMSR_PER_VCPU); |
1464 | 1 | |
1465 | 1 | return 0; |
1466 | 1 | } |
1467 | | __initcall(hvm_register_CPU_save_and_restore); |
1468 | | |
1469 | | int hvm_vcpu_initialise(struct vcpu *v) |
1470 | 12 | { |
1471 | 12 | int rc; |
1472 | 12 | struct domain *d = v->domain; |
1473 | 12 | |
1474 | 12 | hvm_asid_flush_vcpu(v); |
1475 | 12 | |
1476 | 12 | spin_lock_init(&v->arch.hvm_vcpu.tm_lock); |
1477 | 12 | INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); |
1478 | 12 | |
1479 | 12 | rc = hvm_vcpu_cacheattr_init(v); /* teardown: vcpu_cacheattr_destroy */ |
1480 | 12 | if ( rc != 0 ) |
1481 | 0 | goto fail1; |
1482 | 12 | |
1483 | 12 | /* NB: vlapic_init must be called before hvm_funcs.vcpu_initialise */ |
1484 | 12 | rc = vlapic_init(v); |
1485 | 12 | if ( rc != 0 ) /* teardown: vlapic_destroy */ |
1486 | 0 | goto fail2; |
1487 | 12 | |
1488 | 12 | if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) /* teardown: hvm_funcs.vcpu_destroy */ |
1489 | 0 | goto fail3; |
1490 | 12 | |
1491 | 12 | softirq_tasklet_init( |
1492 | 12 | &v->arch.hvm_vcpu.assert_evtchn_irq_tasklet, |
1493 | 12 | (void(*)(unsigned long))hvm_assert_evtchn_irq, |
1494 | 12 | (unsigned long)v); |
1495 | 12 | |
1496 | 12 | v->arch.hvm_vcpu.inject_event.vector = HVM_EVENT_VECTOR_UNSET; |
1497 | 12 | |
1498 | 12 | rc = setup_compat_arg_xlat(v); /* teardown: free_compat_arg_xlat() */ |
1499 | 12 | if ( rc != 0 ) |
1500 | 0 | goto fail4; |
1501 | 12 | |
1502 | 12 | if ( nestedhvm_enabled(d) |
1503 | 0 | && (rc = nestedhvm_vcpu_initialise(v)) < 0 ) /* teardown: nestedhvm_vcpu_destroy */ |
1504 | 0 | goto fail5; |
1505 | 12 | |
1506 | 12 | rc = hvm_all_ioreq_servers_add_vcpu(d, v); |
1507 | 12 | if ( rc != 0 ) |
1508 | 0 | goto fail6; |
1509 | 12 | |
1510 | 12 | if ( v->vcpu_id == 0 ) |
1511 | 1 | { |
1512 | 1 | /* NB. All these really belong in hvm_domain_initialise(). */ |
1513 | 1 | pmtimer_init(v); |
1514 | 1 | hpet_init(d); |
1515 | 1 | |
1516 | 1 | /* Init guest TSC to start from zero. */ |
1517 | 1 | hvm_set_guest_tsc(v, 0); |
1518 | 1 | } |
1519 | 12 | |
1520 | 12 | hvm_update_guest_vendor(v); |
1521 | 12 | |
1522 | 12 | return 0; |
1523 | 12 | |
1524 | 0 | fail6: |
1525 | 0 | nestedhvm_vcpu_destroy(v); |
1526 | 0 | fail5: |
1527 | 0 | free_compat_arg_xlat(v); |
1528 | 0 | fail4: |
1529 | 0 | hvm_funcs.vcpu_destroy(v); |
1530 | 0 | fail3: |
1531 | 0 | vlapic_destroy(v); |
1532 | 0 | fail2: |
1533 | 0 | hvm_vcpu_cacheattr_destroy(v); |
1534 | 0 | fail1: |
1535 | 0 | return rc; |
1536 | 0 | } |
1537 | | |
1538 | | void hvm_vcpu_destroy(struct vcpu *v) |
1539 | 0 | { |
1540 | 0 | viridian_vcpu_deinit(v); |
1541 | 0 |
|
1542 | 0 | hvm_all_ioreq_servers_remove_vcpu(v->domain, v); |
1543 | 0 |
|
1544 | 0 | if ( hvm_altp2m_supported() ) |
1545 | 0 | altp2m_vcpu_destroy(v); |
1546 | 0 |
|
1547 | 0 | nestedhvm_vcpu_destroy(v); |
1548 | 0 |
|
1549 | 0 | free_compat_arg_xlat(v); |
1550 | 0 |
|
1551 | 0 | tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet); |
1552 | 0 | hvm_funcs.vcpu_destroy(v); |
1553 | 0 |
|
1554 | 0 | vlapic_destroy(v); |
1555 | 0 |
|
1556 | 0 | hvm_vcpu_cacheattr_destroy(v); |
1557 | 0 | } |
1558 | | |
1559 | | void hvm_vcpu_down(struct vcpu *v) |
1560 | 0 | { |
1561 | 0 | struct domain *d = v->domain; |
1562 | 0 | int online_count = 0; |
1563 | 0 |
|
1564 | 0 | /* Doesn't halt us immediately, but we'll never return to guest context. */ |
1565 | 0 | set_bit(_VPF_down, &v->pause_flags); |
1566 | 0 | vcpu_sleep_nosync(v); |
1567 | 0 |
|
1568 | 0 | /* Any other VCPUs online? ... */ |
1569 | 0 | domain_lock(d); |
1570 | 0 | for_each_vcpu ( d, v ) |
1571 | 0 | if ( !(v->pause_flags & VPF_down) ) |
1572 | 0 | online_count++; |
1573 | 0 | domain_unlock(d); |
1574 | 0 |
|
1575 | 0 | /* ... Shut down the domain if not. */ |
1576 | 0 | if ( online_count == 0 ) |
1577 | 0 | { |
1578 | 0 | gdprintk(XENLOG_INFO, "All CPUs offline -- powering off.\n"); |
1579 | 0 | domain_shutdown(d, SHUTDOWN_poweroff); |
1580 | 0 | } |
1581 | 0 | } |
1582 | | |
1583 | | void hvm_hlt(unsigned int eflags) |
1584 | 65.4k | { |
1585 | 65.4k | struct vcpu *curr = current; |
1586 | 65.4k | |
1587 | 65.4k | if ( hvm_event_pending(curr) ) |
1588 | 0 | return; |
1589 | 65.4k | |
1590 | 65.4k | /* |
1591 | 65.4k | * If we halt with interrupts disabled, that's a pretty sure sign that we |
1592 | 65.4k | * want to shut down. In a real processor, NMIs are the only way to break |
1593 | 65.4k | * out of this. |
1594 | 65.4k | */ |
1595 | 65.4k | if ( unlikely(!(eflags & X86_EFLAGS_IF)) ) |
1596 | 0 | return hvm_vcpu_down(curr); |
1597 | 65.4k | |
1598 | 65.4k | do_sched_op(SCHEDOP_block, guest_handle_from_ptr(NULL, void)); |
1599 | 65.4k | |
1600 | 65.4k | HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr)); |
1601 | 65.4k | } |
1602 | | |
1603 | | void hvm_triple_fault(void) |
1604 | 0 | { |
1605 | 0 | struct vcpu *v = current; |
1606 | 0 | struct domain *d = v->domain; |
1607 | 0 | u8 reason = d->arch.hvm_domain.params[HVM_PARAM_TRIPLE_FAULT_REASON]; |
1608 | 0 |
|
1609 | 0 | gprintk(XENLOG_INFO, |
1610 | 0 | "Triple fault - invoking HVM shutdown action %d\n", |
1611 | 0 | reason); |
1612 | 0 | vcpu_show_execution_state(v); |
1613 | 0 | domain_shutdown(d, reason); |
1614 | 0 | } |
1615 | | |
1616 | | void hvm_inject_event(const struct x86_event *event) |
1617 | 0 | { |
1618 | 0 | struct vcpu *curr = current; |
1619 | 0 | const uint8_t vector = event->vector; |
1620 | 0 | const bool has_ec = ((event->type == X86_EVENTTYPE_HW_EXCEPTION) && |
1621 | 0 | (vector < 32) && ((TRAP_HAVE_EC & (1u << vector)))); |
1622 | 0 |
|
1623 | 0 | ASSERT(vector == event->vector); /* Confirm no truncation. */ |
1624 | 0 | if ( has_ec ) |
1625 | 0 | ASSERT(event->error_code != X86_EVENT_NO_EC); |
1626 | 0 | else |
1627 | 0 | ASSERT(event->error_code == X86_EVENT_NO_EC); |
1628 | 0 |
|
1629 | 0 | if ( nestedhvm_enabled(curr->domain) && |
1630 | 0 | !nestedhvm_vmswitch_in_progress(curr) && |
1631 | 0 | nestedhvm_vcpu_in_guestmode(curr) && |
1632 | 0 | nhvm_vmcx_guest_intercepts_event( |
1633 | 0 | curr, event->vector, event->error_code) ) |
1634 | 0 | { |
1635 | 0 | enum nestedhvm_vmexits nsret; |
1636 | 0 |
|
1637 | 0 | nsret = nhvm_vcpu_vmexit_event(curr, event); |
1638 | 0 |
|
1639 | 0 | switch ( nsret ) |
1640 | 0 | { |
1641 | 0 | case NESTEDHVM_VMEXIT_DONE: |
1642 | 0 | case NESTEDHVM_VMEXIT_ERROR: /* L1 guest will crash L2 guest */ |
1643 | 0 | return; |
1644 | 0 | case NESTEDHVM_VMEXIT_HOST: |
1645 | 0 | case NESTEDHVM_VMEXIT_CONTINUE: |
1646 | 0 | case NESTEDHVM_VMEXIT_FATALERROR: |
1647 | 0 | default: |
1648 | 0 | gdprintk(XENLOG_ERR, "unexpected nestedhvm error %i\n", nsret); |
1649 | 0 | return; |
1650 | 0 | } |
1651 | 0 | } |
1652 | 0 |
|
1653 | 0 | hvm_funcs.inject_event(event); |
1654 | 0 | } |
1655 | | |
1656 | | int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, |
1657 | | struct npfec npfec) |
1658 | 60.1k | { |
1659 | 60.1k | unsigned long gfn = gpa >> PAGE_SHIFT; |
1660 | 60.1k | p2m_type_t p2mt; |
1661 | 60.1k | p2m_access_t p2ma; |
1662 | 60.1k | mfn_t mfn; |
1663 | 60.1k | struct vcpu *curr = current; |
1664 | 60.1k | struct domain *currd = curr->domain; |
1665 | 60.1k | struct p2m_domain *p2m, *hostp2m; |
1666 | 60.1k | int rc, fall_through = 0, paged = 0; |
1667 | 60.1k | int sharing_enomem = 0; |
1668 | 60.1k | vm_event_request_t *req_ptr = NULL; |
1669 | 60.1k | bool_t ap2m_active, sync = 0; |
1670 | 60.1k | |
1671 | 60.1k | /* On Nested Virtualization, walk the guest page table. |
1672 | 60.1k | * If this succeeds, all is fine. |
1673 | 60.1k | * If this fails, inject a nested page fault into the guest. |
1674 | 60.1k | */ |
1675 | 60.1k | if ( nestedhvm_enabled(currd) |
1676 | 0 | && nestedhvm_vcpu_in_guestmode(curr) |
1677 | 0 | && nestedhvm_paging_mode_hap(curr) ) |
1678 | 0 | { |
1679 | 0 | int rv; |
1680 | 0 |
|
1681 | 0 | /* The vcpu is in guest mode and the l1 guest |
1682 | 0 | * uses hap. That means 'gpa' is in l2 guest |
1683 | 0 | * physical address space. |
1684 | 0 | * Fix the nested p2m or inject nested page fault |
1685 | 0 | * into l1 guest if not fixable. The algorithm is |
1686 | 0 | * the same as for shadow paging. |
1687 | 0 | */ |
1688 | 0 |
|
1689 | 0 | rv = nestedhvm_hap_nested_page_fault(curr, &gpa, |
1690 | 0 | npfec.read_access, |
1691 | 0 | npfec.write_access, |
1692 | 0 | npfec.insn_fetch); |
1693 | 0 | switch (rv) { |
1694 | 0 | case NESTEDHVM_PAGEFAULT_DONE: |
1695 | 0 | case NESTEDHVM_PAGEFAULT_RETRY: |
1696 | 0 | return 1; |
1697 | 0 | case NESTEDHVM_PAGEFAULT_L1_ERROR: |
1698 | 0 | /* An error occured while translating gpa from |
1699 | 0 | * l2 guest address to l1 guest address. */ |
1700 | 0 | return 0; |
1701 | 0 | case NESTEDHVM_PAGEFAULT_INJECT: |
1702 | 0 | return -1; |
1703 | 0 | case NESTEDHVM_PAGEFAULT_MMIO: |
1704 | 0 | if ( !handle_mmio() ) |
1705 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
1706 | 0 | return 1; |
1707 | 0 | case NESTEDHVM_PAGEFAULT_L0_ERROR: |
1708 | 0 | /* gpa is now translated to l1 guest address, update gfn. */ |
1709 | 0 | gfn = gpa >> PAGE_SHIFT; |
1710 | 0 | break; |
1711 | 0 | } |
1712 | 0 | } |
1713 | 60.1k | |
1714 | 60.1k | /* |
1715 | 60.1k | * No need to do the P2M lookup for internally handled MMIO, benefiting |
1716 | 60.1k | * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses, |
1717 | 60.1k | * - newer Windows (like Server 2012) for HPET accesses. |
1718 | 60.1k | */ |
1719 | 60.1k | if ( !nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa) ) |
1720 | 60.1k | { |
1721 | 60.1k | if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) |
1722 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
1723 | 60.1k | rc = 1; |
1724 | 60.1k | goto out; |
1725 | 60.1k | } |
1726 | 60.1k | |
1727 | 11 | ap2m_active = altp2m_active(currd); |
1728 | 11 | |
1729 | 11 | /* |
1730 | 11 | * Take a lock on the host p2m speculatively, to avoid potential |
1731 | 11 | * locking order problems later and to handle unshare etc. |
1732 | 11 | */ |
1733 | 11 | hostp2m = p2m_get_hostp2m(currd); |
1734 | 11 | mfn = get_gfn_type_access(hostp2m, gfn, &p2mt, &p2ma, |
1735 | 11 | P2M_ALLOC | (npfec.write_access ? P2M_UNSHARE : 0), |
1736 | 11 | NULL); |
1737 | 11 | |
1738 | 11 | if ( ap2m_active ) |
1739 | 0 | { |
1740 | 0 | if ( p2m_altp2m_lazy_copy(curr, gpa, gla, npfec, &p2m) ) |
1741 | 0 | { |
1742 | 0 | /* entry was lazily copied from host -- retry */ |
1743 | 0 | __put_gfn(hostp2m, gfn); |
1744 | 0 | rc = 1; |
1745 | 0 | goto out; |
1746 | 0 | } |
1747 | 0 |
|
1748 | 0 | mfn = get_gfn_type_access(p2m, gfn, &p2mt, &p2ma, 0, NULL); |
1749 | 0 | } |
1750 | 11 | else |
1751 | 11 | p2m = hostp2m; |
1752 | 11 | |
1753 | 11 | /* Check access permissions first, then handle faults */ |
1754 | 11 | if ( !mfn_eq(mfn, INVALID_MFN) ) |
1755 | 0 | { |
1756 | 0 | bool_t violation; |
1757 | 0 |
|
1758 | 0 | /* If the access is against the permissions, then send to vm_event */ |
1759 | 0 | switch (p2ma) |
1760 | 0 | { |
1761 | 0 | case p2m_access_n: |
1762 | 0 | case p2m_access_n2rwx: |
1763 | 0 | default: |
1764 | 0 | violation = npfec.read_access || npfec.write_access || npfec.insn_fetch; |
1765 | 0 | break; |
1766 | 0 | case p2m_access_r: |
1767 | 0 | violation = npfec.write_access || npfec.insn_fetch; |
1768 | 0 | break; |
1769 | 0 | case p2m_access_w: |
1770 | 0 | violation = npfec.read_access || npfec.insn_fetch; |
1771 | 0 | break; |
1772 | 0 | case p2m_access_x: |
1773 | 0 | violation = npfec.read_access || npfec.write_access; |
1774 | 0 | break; |
1775 | 0 | case p2m_access_rx: |
1776 | 0 | case p2m_access_rx2rw: |
1777 | 0 | violation = npfec.write_access; |
1778 | 0 | break; |
1779 | 0 | case p2m_access_wx: |
1780 | 0 | violation = npfec.read_access; |
1781 | 0 | break; |
1782 | 0 | case p2m_access_rw: |
1783 | 0 | violation = npfec.insn_fetch; |
1784 | 0 | break; |
1785 | 0 | case p2m_access_rwx: |
1786 | 0 | violation = 0; |
1787 | 0 | break; |
1788 | 0 | } |
1789 | 0 |
|
1790 | 0 | if ( violation ) |
1791 | 0 | { |
1792 | 0 | /* Should #VE be emulated for this fault? */ |
1793 | 0 | if ( p2m_is_altp2m(p2m) && !cpu_has_vmx_virt_exceptions ) |
1794 | 0 | { |
1795 | 0 | bool_t sve; |
1796 | 0 |
|
1797 | 0 | p2m->get_entry(p2m, _gfn(gfn), &p2mt, &p2ma, 0, NULL, &sve); |
1798 | 0 |
|
1799 | 0 | if ( !sve && altp2m_vcpu_emulate_ve(curr) ) |
1800 | 0 | { |
1801 | 0 | rc = 1; |
1802 | 0 | goto out_put_gfn; |
1803 | 0 | } |
1804 | 0 | } |
1805 | 0 |
|
1806 | 0 | sync = p2m_mem_access_check(gpa, gla, npfec, &req_ptr); |
1807 | 0 |
|
1808 | 0 | if ( !sync ) |
1809 | 0 | fall_through = 1; |
1810 | 0 | else |
1811 | 0 | { |
1812 | 0 | /* Rights not promoted (aka. sync event), work here is done */ |
1813 | 0 | rc = 1; |
1814 | 0 | goto out_put_gfn; |
1815 | 0 | } |
1816 | 0 | } |
1817 | 0 | } |
1818 | 11 | |
1819 | 11 | /* |
1820 | 11 | * If this GFN is emulated MMIO or marked as read-only, pass the fault |
1821 | 11 | * to the mmio handler. |
1822 | 11 | */ |
1823 | 11 | if ( (p2mt == p2m_mmio_dm) || |
1824 | 0 | (npfec.write_access && |
1825 | 0 | (p2m_is_discard_write(p2mt) || (p2mt == p2m_ioreq_server))) ) |
1826 | 11 | { |
1827 | 11 | if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) |
1828 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
1829 | 11 | rc = 1; |
1830 | 11 | goto out_put_gfn; |
1831 | 11 | } |
1832 | 11 | |
1833 | 11 | /* Check if the page has been paged out */ |
1834 | 0 | if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) ) |
1835 | 0 | paged = 1; |
1836 | 0 |
|
1837 | 0 | /* Mem sharing: unshare the page and try again */ |
1838 | 0 | if ( npfec.write_access && (p2mt == p2m_ram_shared) ) |
1839 | 0 | { |
1840 | 0 | ASSERT(p2m_is_hostp2m(p2m)); |
1841 | 0 | sharing_enomem = |
1842 | 0 | (mem_sharing_unshare_page(currd, gfn, 0) < 0); |
1843 | 0 | rc = 1; |
1844 | 0 | goto out_put_gfn; |
1845 | 0 | } |
1846 | 0 | |
1847 | 0 | /* Spurious fault? PoD and log-dirty also take this path. */ |
1848 | 0 | if ( p2m_is_ram(p2mt) ) |
1849 | 0 | { |
1850 | 0 | rc = 1; |
1851 | 0 | /* |
1852 | 0 | * Page log dirty is always done with order 0. If this mfn resides in |
1853 | 0 | * a large page, we do not change other pages type within that large |
1854 | 0 | * page. |
1855 | 0 | */ |
1856 | 0 | if ( npfec.write_access ) |
1857 | 0 | { |
1858 | 0 | paging_mark_dirty(currd, mfn); |
1859 | 0 | /* |
1860 | 0 | * If p2m is really an altp2m, unlock here to avoid lock ordering |
1861 | 0 | * violation when the change below is propagated from host p2m. |
1862 | 0 | */ |
1863 | 0 | if ( ap2m_active ) |
1864 | 0 | __put_gfn(p2m, gfn); |
1865 | 0 | p2m_change_type_one(currd, gfn, p2m_ram_logdirty, p2m_ram_rw); |
1866 | 0 | __put_gfn(ap2m_active ? hostp2m : p2m, gfn); |
1867 | 0 |
|
1868 | 0 | goto out; |
1869 | 0 | } |
1870 | 0 | goto out_put_gfn; |
1871 | 0 | } |
1872 | 0 |
|
1873 | 0 | if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) && |
1874 | 0 | npfec.write_access && npfec.present && |
1875 | 0 | (hvm_emulate_one_mmio(mfn_x(mfn), gla) == X86EMUL_OKAY) ) |
1876 | 0 | { |
1877 | 0 | rc = 1; |
1878 | 0 | goto out_put_gfn; |
1879 | 0 | } |
1880 | 0 |
|
1881 | 0 | /* If we fell through, the vcpu will retry now that access restrictions have |
1882 | 0 | * been removed. It may fault again if the p2m entry type still requires so. |
1883 | 0 | * Otherwise, this is an error condition. */ |
1884 | 0 | rc = fall_through; |
1885 | 0 |
|
1886 | 11 | out_put_gfn: |
1887 | 11 | __put_gfn(p2m, gfn); |
1888 | 11 | if ( ap2m_active ) |
1889 | 0 | __put_gfn(hostp2m, gfn); |
1890 | 60.1k | out: |
1891 | 60.1k | /* All of these are delayed until we exit, since we might |
1892 | 60.1k | * sleep on event ring wait queues, and we must not hold |
1893 | 60.1k | * locks in such circumstance */ |
1894 | 60.1k | if ( paged ) |
1895 | 0 | p2m_mem_paging_populate(currd, gfn); |
1896 | 60.1k | if ( sharing_enomem ) |
1897 | 0 | { |
1898 | 0 | int rv; |
1899 | 0 | if ( (rv = mem_sharing_notify_enomem(currd, gfn, 1)) < 0 ) |
1900 | 0 | { |
1901 | 0 | gdprintk(XENLOG_ERR, "Domain %hu attempt to unshare " |
1902 | 0 | "gfn %lx, ENOMEM and no helper (rc %d)\n", |
1903 | 0 | currd->domain_id, gfn, rv); |
1904 | 0 | /* Crash the domain */ |
1905 | 0 | rc = 0; |
1906 | 0 | } |
1907 | 0 | } |
1908 | 60.1k | if ( req_ptr ) |
1909 | 0 | { |
1910 | 0 | if ( monitor_traps(curr, sync, req_ptr) < 0 ) |
1911 | 0 | rc = 0; |
1912 | 0 |
|
1913 | 0 | xfree(req_ptr); |
1914 | 0 | } |
1915 | 60.1k | return rc; |
1916 | 11 | } |
1917 | | |
1918 | | int hvm_handle_xsetbv(u32 index, u64 new_bv) |
1919 | 11 | { |
1920 | 11 | int rc; |
1921 | 11 | |
1922 | 11 | hvm_monitor_crX(XCR0, new_bv, current->arch.xcr0); |
1923 | 11 | |
1924 | 11 | rc = handle_xsetbv(index, new_bv); |
1925 | 11 | if ( rc ) |
1926 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
1927 | 11 | |
1928 | 11 | return rc; |
1929 | 11 | } |
1930 | | |
1931 | | int hvm_set_efer(uint64_t value) |
1932 | 36 | { |
1933 | 36 | struct vcpu *v = current; |
1934 | 36 | const char *errstr; |
1935 | 36 | |
1936 | 36 | value &= ~EFER_LMA; |
1937 | 36 | |
1938 | 36 | errstr = hvm_efer_valid(v, value, -1); |
1939 | 36 | if ( errstr ) |
1940 | 0 | { |
1941 | 0 | printk(XENLOG_G_WARNING |
1942 | 0 | "%pv: Invalid EFER update: %#"PRIx64" -> %#"PRIx64" - %s\n", |
1943 | 0 | v, v->arch.hvm_vcpu.guest_efer, value, errstr); |
1944 | 0 | return X86EMUL_EXCEPTION; |
1945 | 0 | } |
1946 | 36 | |
1947 | 36 | if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) && |
1948 | 12 | hvm_paging_enabled(v) ) |
1949 | 0 | { |
1950 | 0 | gdprintk(XENLOG_WARNING, |
1951 | 0 | "Trying to change EFER.LME with paging enabled\n"); |
1952 | 0 | return X86EMUL_EXCEPTION; |
1953 | 0 | } |
1954 | 36 | |
1955 | 36 | if ( (value & EFER_LME) && !(v->arch.hvm_vcpu.guest_efer & EFER_LME) ) |
1956 | 12 | { |
1957 | 12 | struct segment_register cs; |
1958 | 12 | |
1959 | 12 | hvm_get_segment_register(v, x86_seg_cs, &cs); |
1960 | 12 | |
1961 | 12 | /* |
1962 | 12 | * %cs may be loaded with both .D and .L set in legacy mode, and both |
1963 | 12 | * are captured in the VMCS/VMCB. |
1964 | 12 | * |
1965 | 12 | * If a guest does this and then tries to transition into long mode, |
1966 | 12 | * the vmentry from setting LME fails due to invalid guest state, |
1967 | 12 | * because %cr0.PG is still clear. |
1968 | 12 | * |
1969 | 12 | * When LME becomes set, clobber %cs.L to keep the guest firmly in |
1970 | 12 | * compatibility mode until it reloads %cs itself. |
1971 | 12 | */ |
1972 | 12 | if ( cs.l ) |
1973 | 0 | { |
1974 | 0 | cs.l = 0; |
1975 | 0 | hvm_set_segment_register(v, x86_seg_cs, &cs); |
1976 | 0 | } |
1977 | 12 | } |
1978 | 36 | |
1979 | 36 | if ( nestedhvm_enabled(v->domain) && cpu_has_svm && |
1980 | 0 | ((value & EFER_SVME) == 0 ) && |
1981 | 0 | ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_SVME) ) |
1982 | 0 | { |
1983 | 0 | /* Cleared EFER.SVME: Flush all nestedp2m tables */ |
1984 | 0 | p2m_flush_nestedp2m(v->domain); |
1985 | 0 | nestedhvm_vcpu_reset(v); |
1986 | 0 | } |
1987 | 36 | |
1988 | 36 | value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA; |
1989 | 36 | v->arch.hvm_vcpu.guest_efer = value; |
1990 | 36 | hvm_update_guest_efer(v); |
1991 | 36 | |
1992 | 36 | return X86EMUL_OKAY; |
1993 | 36 | } |
1994 | | |
1995 | | /* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */ |
1996 | | static bool_t domain_exit_uc_mode(struct vcpu *v) |
1997 | 0 | { |
1998 | 0 | struct domain *d = v->domain; |
1999 | 0 | struct vcpu *vs; |
2000 | 0 |
|
2001 | 0 | for_each_vcpu ( d, vs ) |
2002 | 0 | { |
2003 | 0 | if ( (vs == v) || !vs->is_initialised ) |
2004 | 0 | continue; |
2005 | 0 | if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) || |
2006 | 0 | mtrr_pat_not_equal(vs, v) ) |
2007 | 0 | return 0; |
2008 | 0 | } |
2009 | 0 |
|
2010 | 0 | return 1; |
2011 | 0 | } |
2012 | | |
2013 | | static void hvm_set_uc_mode(struct vcpu *v, bool_t is_in_uc_mode) |
2014 | 0 | { |
2015 | 0 | v->domain->arch.hvm_domain.is_in_uc_mode = is_in_uc_mode; |
2016 | 0 | shadow_blow_tables_per_domain(v->domain); |
2017 | 0 | } |
2018 | | |
2019 | | int hvm_mov_to_cr(unsigned int cr, unsigned int gpr) |
2020 | 2.93k | { |
2021 | 2.93k | struct vcpu *curr = current; |
2022 | 2.93k | unsigned long val, *reg; |
2023 | 2.93k | int rc; |
2024 | 2.93k | |
2025 | 2.93k | if ( (reg = decode_register(gpr, guest_cpu_user_regs(), 0)) == NULL ) |
2026 | 0 | { |
2027 | 0 | gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr); |
2028 | 0 | goto exit_and_crash; |
2029 | 0 | } |
2030 | 2.93k | |
2031 | 2.93k | val = *reg; |
2032 | 2.93k | HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(val)); |
2033 | 2.93k | HVM_DBG_LOG(DBG_LEVEL_1, "CR%u, value = %lx", cr, val); |
2034 | 2.93k | |
2035 | 2.93k | switch ( cr ) |
2036 | 2.93k | { |
2037 | 2.83k | case 0: |
2038 | 2.83k | rc = hvm_set_cr0(val, 1); |
2039 | 2.83k | break; |
2040 | 2.83k | |
2041 | 0 | case 3: |
2042 | 0 | rc = hvm_set_cr3(val, 1); |
2043 | 0 | break; |
2044 | 2.83k | |
2045 | 98 | case 4: |
2046 | 98 | rc = hvm_set_cr4(val, 1); |
2047 | 98 | break; |
2048 | 2.83k | |
2049 | 0 | case 8: |
2050 | 0 | vlapic_set_reg(vcpu_vlapic(curr), APIC_TASKPRI, ((val & 0x0f) << 4)); |
2051 | 0 | rc = X86EMUL_OKAY; |
2052 | 0 | break; |
2053 | 2.83k | |
2054 | 0 | default: |
2055 | 0 | gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); |
2056 | 0 | goto exit_and_crash; |
2057 | 2.93k | } |
2058 | 2.93k | |
2059 | 2.93k | if ( rc == X86EMUL_EXCEPTION ) |
2060 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2061 | 2.93k | |
2062 | 2.93k | return rc; |
2063 | 2.93k | |
2064 | 0 | exit_and_crash: |
2065 | 0 | domain_crash(curr->domain); |
2066 | 0 | return X86EMUL_UNHANDLEABLE; |
2067 | 2.93k | } |
2068 | | |
2069 | | int hvm_mov_from_cr(unsigned int cr, unsigned int gpr) |
2070 | 0 | { |
2071 | 0 | struct vcpu *curr = current; |
2072 | 0 | unsigned long val = 0, *reg; |
2073 | 0 |
|
2074 | 0 | if ( (reg = decode_register(gpr, guest_cpu_user_regs(), 0)) == NULL ) |
2075 | 0 | { |
2076 | 0 | gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr); |
2077 | 0 | goto exit_and_crash; |
2078 | 0 | } |
2079 | 0 |
|
2080 | 0 | switch ( cr ) |
2081 | 0 | { |
2082 | 0 | case 0: |
2083 | 0 | case 2: |
2084 | 0 | case 3: |
2085 | 0 | case 4: |
2086 | 0 | val = curr->arch.hvm_vcpu.guest_cr[cr]; |
2087 | 0 | break; |
2088 | 0 | case 8: |
2089 | 0 | val = (vlapic_get_reg(vcpu_vlapic(curr), APIC_TASKPRI) & 0xf0) >> 4; |
2090 | 0 | break; |
2091 | 0 | default: |
2092 | 0 | gdprintk(XENLOG_ERR, "invalid cr: %u\n", cr); |
2093 | 0 | goto exit_and_crash; |
2094 | 0 | } |
2095 | 0 |
|
2096 | 0 | *reg = val; |
2097 | 0 | HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(val)); |
2098 | 0 | HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%u, value = %lx", cr, val); |
2099 | 0 |
|
2100 | 0 | return X86EMUL_OKAY; |
2101 | 0 |
|
2102 | 0 | exit_and_crash: |
2103 | 0 | domain_crash(curr->domain); |
2104 | 0 | return X86EMUL_UNHANDLEABLE; |
2105 | 0 | } |
2106 | | |
2107 | | void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value) |
2108 | 0 | { |
2109 | 0 | if ( value & X86_CR0_CD ) |
2110 | 0 | { |
2111 | 0 | /* Entering no fill cache mode. */ |
2112 | 0 | spin_lock(&v->domain->arch.hvm_domain.uc_lock); |
2113 | 0 | v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; |
2114 | 0 |
|
2115 | 0 | if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) |
2116 | 0 | { |
2117 | 0 | domain_pause_nosync(v->domain); |
2118 | 0 |
|
2119 | 0 | /* Flush physical caches. */ |
2120 | 0 | flush_all(FLUSH_CACHE); |
2121 | 0 | hvm_set_uc_mode(v, 1); |
2122 | 0 |
|
2123 | 0 | domain_unpause(v->domain); |
2124 | 0 | } |
2125 | 0 | spin_unlock(&v->domain->arch.hvm_domain.uc_lock); |
2126 | 0 | } |
2127 | 0 | else if ( !(value & X86_CR0_CD) && |
2128 | 0 | (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) |
2129 | 0 | { |
2130 | 0 | /* Exit from no fill cache mode. */ |
2131 | 0 | spin_lock(&v->domain->arch.hvm_domain.uc_lock); |
2132 | 0 | v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; |
2133 | 0 |
|
2134 | 0 | if ( domain_exit_uc_mode(v) ) |
2135 | 0 | hvm_set_uc_mode(v, 0); |
2136 | 0 |
|
2137 | 0 | spin_unlock(&v->domain->arch.hvm_domain.uc_lock); |
2138 | 0 | } |
2139 | 0 | } |
2140 | | |
2141 | | static void hvm_update_cr(struct vcpu *v, unsigned int cr, unsigned long value) |
2142 | 3.96k | { |
2143 | 3.96k | v->arch.hvm_vcpu.guest_cr[cr] = value; |
2144 | 3.96k | nestedhvm_set_cr(v, cr, value); |
2145 | 3.96k | hvm_update_guest_cr(v, cr); |
2146 | 3.96k | } |
2147 | | |
2148 | | int hvm_set_cr0(unsigned long value, bool_t may_defer) |
2149 | 3.87k | { |
2150 | 3.87k | struct vcpu *v = current; |
2151 | 3.87k | struct domain *d = v->domain; |
2152 | 3.87k | unsigned long gfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; |
2153 | 3.87k | struct page_info *page; |
2154 | 3.87k | |
2155 | 3.87k | HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); |
2156 | 3.87k | |
2157 | 3.87k | if ( (u32)value != value ) |
2158 | 0 | { |
2159 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, |
2160 | 0 | "Guest attempts to set upper 32 bits in CR0: %lx", |
2161 | 0 | value); |
2162 | 0 | return X86EMUL_EXCEPTION; |
2163 | 0 | } |
2164 | 3.87k | |
2165 | 3.87k | value &= ~HVM_CR0_GUEST_RESERVED_BITS; |
2166 | 3.87k | |
2167 | 3.87k | /* ET is reserved and should be always be 1. */ |
2168 | 3.87k | value |= X86_CR0_ET; |
2169 | 3.87k | |
2170 | 3.87k | if ( !nestedhvm_vmswitch_in_progress(v) && |
2171 | 3.87k | (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG ) |
2172 | 0 | return X86EMUL_EXCEPTION; |
2173 | 3.87k | |
2174 | 3.87k | if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & |
2175 | 3.87k | monitor_ctrlreg_bitmask(VM_EVENT_X86_CR0)) ) |
2176 | 0 | { |
2177 | 0 | ASSERT(v->arch.vm_event); |
2178 | 0 |
|
2179 | 0 | if ( hvm_monitor_crX(CR0, value, old_value) ) |
2180 | 0 | { |
2181 | 0 | /* The actual write will occur in hvm_do_resume(), if permitted. */ |
2182 | 0 | v->arch.vm_event->write_data.do_write.cr0 = 1; |
2183 | 0 | v->arch.vm_event->write_data.cr0 = value; |
2184 | 0 |
|
2185 | 0 | return X86EMUL_OKAY; |
2186 | 0 | } |
2187 | 0 | } |
2188 | 3.87k | |
2189 | 3.87k | if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) ) |
2190 | 12 | { |
2191 | 12 | if ( v->arch.hvm_vcpu.guest_efer & EFER_LME ) |
2192 | 12 | { |
2193 | 12 | if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) && |
2194 | 0 | !nestedhvm_vmswitch_in_progress(v) ) |
2195 | 0 | { |
2196 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable"); |
2197 | 0 | return X86EMUL_EXCEPTION; |
2198 | 0 | } |
2199 | 12 | HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode"); |
2200 | 12 | v->arch.hvm_vcpu.guest_efer |= EFER_LMA; |
2201 | 12 | hvm_update_guest_efer(v); |
2202 | 12 | } |
2203 | 12 | |
2204 | 12 | if ( !paging_mode_hap(d) ) |
2205 | 0 | { |
2206 | 0 | /* The guest CR3 must be pointing to the guest physical. */ |
2207 | 0 | gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT; |
2208 | 0 | page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); |
2209 | 0 | if ( !page ) |
2210 | 0 | { |
2211 | 0 | gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx\n", |
2212 | 0 | v->arch.hvm_vcpu.guest_cr[3]); |
2213 | 0 | domain_crash(d); |
2214 | 0 | return X86EMUL_UNHANDLEABLE; |
2215 | 0 | } |
2216 | 0 |
|
2217 | 0 | /* Now arch.guest_table points to machine physical. */ |
2218 | 0 | v->arch.guest_table = pagetable_from_page(page); |
2219 | 0 |
|
2220 | 0 | HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", |
2221 | 0 | v->arch.hvm_vcpu.guest_cr[3], page_to_mfn(page)); |
2222 | 0 | } |
2223 | 12 | } |
2224 | 3.86k | else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) ) |
2225 | 0 | { |
2226 | 0 | if ( hvm_pcid_enabled(v) ) |
2227 | 0 | { |
2228 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to clear CR0.PG " |
2229 | 0 | "while CR4.PCIDE=1"); |
2230 | 0 | return X86EMUL_EXCEPTION; |
2231 | 0 | } |
2232 | 0 |
|
2233 | 0 | /* When CR0.PG is cleared, LMA is cleared immediately. */ |
2234 | 0 | if ( hvm_long_mode_active(v) ) |
2235 | 0 | { |
2236 | 0 | v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA; |
2237 | 0 | hvm_update_guest_efer(v); |
2238 | 0 | } |
2239 | 0 |
|
2240 | 0 | if ( !paging_mode_hap(d) ) |
2241 | 0 | { |
2242 | 0 | put_page(pagetable_get_page(v->arch.guest_table)); |
2243 | 0 | v->arch.guest_table = pagetable_null(); |
2244 | 0 | } |
2245 | 0 | } |
2246 | 3.87k | |
2247 | 3.87k | if ( ((value ^ old_value) & X86_CR0_CD) && |
2248 | 45 | iommu_enabled && hvm_funcs.handle_cd && |
2249 | 44 | (!rangeset_is_empty(d->iomem_caps) || |
2250 | 0 | !rangeset_is_empty(d->arch.ioport_caps) || |
2251 | 0 | has_arch_pdevs(d)) ) |
2252 | 45 | hvm_funcs.handle_cd(v, value); |
2253 | 3.87k | |
2254 | 3.87k | hvm_update_cr(v, 0, value); |
2255 | 3.87k | |
2256 | 3.87k | if ( (value ^ old_value) & X86_CR0_PG ) { |
2257 | 12 | if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) |
2258 | 0 | paging_update_nestedmode(v); |
2259 | 12 | else |
2260 | 12 | paging_update_paging_modes(v); |
2261 | 12 | } |
2262 | 3.87k | |
2263 | 3.87k | return X86EMUL_OKAY; |
2264 | 3.87k | } |
2265 | | |
2266 | | int hvm_set_cr3(unsigned long value, bool_t may_defer) |
2267 | 0 | { |
2268 | 0 | struct vcpu *v = current; |
2269 | 0 | struct page_info *page; |
2270 | 0 | unsigned long old = v->arch.hvm_vcpu.guest_cr[3]; |
2271 | 0 |
|
2272 | 0 | if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & |
2273 | 0 | monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) |
2274 | 0 | { |
2275 | 0 | ASSERT(v->arch.vm_event); |
2276 | 0 |
|
2277 | 0 | if ( hvm_monitor_crX(CR3, value, old) ) |
2278 | 0 | { |
2279 | 0 | /* The actual write will occur in hvm_do_resume(), if permitted. */ |
2280 | 0 | v->arch.vm_event->write_data.do_write.cr3 = 1; |
2281 | 0 | v->arch.vm_event->write_data.cr3 = value; |
2282 | 0 |
|
2283 | 0 | return X86EMUL_OKAY; |
2284 | 0 | } |
2285 | 0 | } |
2286 | 0 |
|
2287 | 0 | if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && |
2288 | 0 | (value != v->arch.hvm_vcpu.guest_cr[3]) ) |
2289 | 0 | { |
2290 | 0 | /* Shadow-mode CR3 change. Check PDBR and update refcounts. */ |
2291 | 0 | HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); |
2292 | 0 | page = get_page_from_gfn(v->domain, value >> PAGE_SHIFT, |
2293 | 0 | NULL, P2M_ALLOC); |
2294 | 0 | if ( !page ) |
2295 | 0 | goto bad_cr3; |
2296 | 0 |
|
2297 | 0 | put_page(pagetable_get_page(v->arch.guest_table)); |
2298 | 0 | v->arch.guest_table = pagetable_from_page(page); |
2299 | 0 |
|
2300 | 0 | HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); |
2301 | 0 | } |
2302 | 0 |
|
2303 | 0 | v->arch.hvm_vcpu.guest_cr[3] = value; |
2304 | 0 | paging_update_cr3(v); |
2305 | 0 | return X86EMUL_OKAY; |
2306 | 0 |
|
2307 | 0 | bad_cr3: |
2308 | 0 | gdprintk(XENLOG_ERR, "Invalid CR3\n"); |
2309 | 0 | domain_crash(v->domain); |
2310 | 0 | return X86EMUL_UNHANDLEABLE; |
2311 | 0 | } |
2312 | | |
2313 | | int hvm_set_cr4(unsigned long value, bool_t may_defer) |
2314 | 98 | { |
2315 | 98 | struct vcpu *v = current; |
2316 | 98 | unsigned long old_cr; |
2317 | 98 | |
2318 | 98 | if ( value & ~hvm_cr4_guest_valid_bits(v, 0) ) |
2319 | 0 | { |
2320 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, |
2321 | 0 | "Guest attempts to set reserved bit in CR4: %lx", |
2322 | 0 | value); |
2323 | 0 | return X86EMUL_EXCEPTION; |
2324 | 0 | } |
2325 | 98 | |
2326 | 98 | if ( !(value & X86_CR4_PAE) ) |
2327 | 0 | { |
2328 | 0 | if ( hvm_long_mode_active(v) ) |
2329 | 0 | { |
2330 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while " |
2331 | 0 | "EFER.LMA is set"); |
2332 | 0 | return X86EMUL_EXCEPTION; |
2333 | 0 | } |
2334 | 0 | } |
2335 | 98 | |
2336 | 98 | old_cr = v->arch.hvm_vcpu.guest_cr[4]; |
2337 | 98 | |
2338 | 98 | if ( (value & X86_CR4_PCIDE) && !(old_cr & X86_CR4_PCIDE) && |
2339 | 12 | (!hvm_long_mode_active(v) || |
2340 | 12 | (v->arch.hvm_vcpu.guest_cr[3] & 0xfff)) ) |
2341 | 0 | { |
2342 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to change CR4.PCIDE from " |
2343 | 0 | "0 to 1 while either EFER.LMA=0 or CR3[11:0]!=000H"); |
2344 | 0 | return X86EMUL_EXCEPTION; |
2345 | 0 | } |
2346 | 98 | |
2347 | 98 | if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & |
2348 | 98 | monitor_ctrlreg_bitmask(VM_EVENT_X86_CR4)) ) |
2349 | 0 | { |
2350 | 0 | ASSERT(v->arch.vm_event); |
2351 | 0 |
|
2352 | 0 | if ( hvm_monitor_crX(CR4, value, old_cr) ) |
2353 | 0 | { |
2354 | 0 | /* The actual write will occur in hvm_do_resume(), if permitted. */ |
2355 | 0 | v->arch.vm_event->write_data.do_write.cr4 = 1; |
2356 | 0 | v->arch.vm_event->write_data.cr4 = value; |
2357 | 0 |
|
2358 | 0 | return X86EMUL_OKAY; |
2359 | 0 | } |
2360 | 0 | } |
2361 | 98 | |
2362 | 98 | hvm_update_cr(v, 4, value); |
2363 | 98 | |
2364 | 98 | /* |
2365 | 98 | * Modifying CR4.{PSE,PAE,PGE,SMEP}, or clearing CR4.PCIDE |
2366 | 98 | * invalidate all TLB entries. |
2367 | 98 | */ |
2368 | 98 | if ( ((old_cr ^ value) & |
2369 | 98 | (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE | X86_CR4_SMEP)) || |
2370 | 37 | (!(value & X86_CR4_PCIDE) && (old_cr & X86_CR4_PCIDE)) ) |
2371 | 61 | { |
2372 | 61 | if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) |
2373 | 0 | paging_update_nestedmode(v); |
2374 | 61 | else |
2375 | 61 | paging_update_paging_modes(v); |
2376 | 61 | } |
2377 | 98 | |
2378 | 98 | /* |
2379 | 98 | * {RD,WR}PKRU are not gated on XCR0.PKRU and hence an oddly behaving |
2380 | 98 | * guest may enable the feature in CR4 without enabling it in XCR0. We |
2381 | 98 | * need to context switch / migrate PKRU nevertheless. |
2382 | 98 | */ |
2383 | 98 | if ( (value & X86_CR4_PKE) && !(v->arch.xcr0_accum & XSTATE_PKRU) ) |
2384 | 0 | { |
2385 | 0 | int rc = handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, |
2386 | 0 | get_xcr0() | XSTATE_PKRU); |
2387 | 0 |
|
2388 | 0 | if ( rc ) |
2389 | 0 | { |
2390 | 0 | HVM_DBG_LOG(DBG_LEVEL_1, "Failed to force XCR0.PKRU: %d", rc); |
2391 | 0 | return X86EMUL_EXCEPTION; |
2392 | 0 | } |
2393 | 0 |
|
2394 | 0 | if ( handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, |
2395 | 0 | get_xcr0() & ~XSTATE_PKRU) ) |
2396 | 0 | /* nothing, best effort only */; |
2397 | 0 | } |
2398 | 98 | |
2399 | 98 | return X86EMUL_OKAY; |
2400 | 98 | } |
2401 | | |
2402 | | bool_t hvm_virtual_to_linear_addr( |
2403 | | enum x86_segment seg, |
2404 | | const struct segment_register *reg, |
2405 | | unsigned long offset, |
2406 | | unsigned int bytes, |
2407 | | enum hvm_access_type access_type, |
2408 | | const struct segment_register *active_cs, |
2409 | | unsigned long *linear_addr) |
2410 | 120k | { |
2411 | 120k | const struct vcpu *curr = current; |
2412 | 120k | unsigned long addr = offset, last_byte; |
2413 | 120k | bool_t okay = 0; |
2414 | 120k | |
2415 | 120k | /* |
2416 | 120k | * These checks are for a memory access through an active segment. |
2417 | 120k | * |
2418 | 120k | * It is expected that the access rights of reg are suitable for seg (and |
2419 | 120k | * that this is enforced at the point that seg is loaded). |
2420 | 120k | */ |
2421 | 120k | ASSERT(seg < x86_seg_none); |
2422 | 120k | |
2423 | 120k | if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) || |
2424 | 120k | (guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) ) |
2425 | 0 | { |
2426 | 0 | /* |
2427 | 0 | * REAL/VM86 MODE: Don't bother with segment access checks. |
2428 | 0 | * Certain of them are not done in native real mode anyway. |
2429 | 0 | */ |
2430 | 0 | addr = (uint32_t)(addr + reg->base); |
2431 | 0 | last_byte = (uint32_t)addr + bytes - !!bytes; |
2432 | 0 | if ( last_byte < addr ) |
2433 | 0 | goto out; |
2434 | 0 | } |
2435 | 120k | else if ( hvm_long_mode_active(curr) && |
2436 | 120k | (is_x86_system_segment(seg) || active_cs->l) ) |
2437 | 120k | { |
2438 | 120k | /* |
2439 | 120k | * User segments are always treated as present. System segment may |
2440 | 120k | * not be, and also incur limit checks. |
2441 | 120k | */ |
2442 | 120k | if ( is_x86_system_segment(seg) && |
2443 | 0 | (!reg->p || (offset + bytes - !!bytes) > reg->limit) ) |
2444 | 0 | goto out; |
2445 | 120k | |
2446 | 120k | /* |
2447 | 120k | * LONG MODE: FS, GS and system segments: add segment base. All |
2448 | 120k | * addresses must be canonical. |
2449 | 120k | */ |
2450 | 120k | if ( seg >= x86_seg_fs ) |
2451 | 0 | addr += reg->base; |
2452 | 120k | |
2453 | 120k | last_byte = addr + bytes - !!bytes; |
2454 | 120k | if ( !is_canonical_address(addr) || last_byte < addr || |
2455 | 120k | !is_canonical_address(last_byte) ) |
2456 | 0 | goto out; |
2457 | 120k | } |
2458 | 120k | else |
2459 | 0 | { |
2460 | 0 | /* |
2461 | 0 | * PROTECTED/COMPATIBILITY MODE: Apply segment checks and add base. |
2462 | 0 | */ |
2463 | 0 |
|
2464 | 0 | /* |
2465 | 0 | * Hardware truncates to 32 bits in compatibility mode. |
2466 | 0 | * It does not truncate to 16 bits in 16-bit address-size mode. |
2467 | 0 | */ |
2468 | 0 | addr = (uint32_t)(addr + reg->base); |
2469 | 0 |
|
2470 | 0 | /* Segment not valid for use (cooked meaning of .p)? */ |
2471 | 0 | if ( !reg->p ) |
2472 | 0 | goto out; |
2473 | 0 |
|
2474 | 0 | /* Read/write restrictions only exist for user segments. */ |
2475 | 0 | if ( reg->s ) |
2476 | 0 | { |
2477 | 0 | switch ( access_type ) |
2478 | 0 | { |
2479 | 0 | case hvm_access_read: |
2480 | 0 | if ( (reg->type & 0xa) == 0x8 ) |
2481 | 0 | goto out; /* execute-only code segment */ |
2482 | 0 | break; |
2483 | 0 | case hvm_access_write: |
2484 | 0 | if ( (reg->type & 0xa) != 0x2 ) |
2485 | 0 | goto out; /* not a writable data segment */ |
2486 | 0 | break; |
2487 | 0 | default: |
2488 | 0 | break; |
2489 | 0 | } |
2490 | 0 | } |
2491 | 0 |
|
2492 | 0 | last_byte = (uint32_t)offset + bytes - !!bytes; |
2493 | 0 |
|
2494 | 0 | /* Is this a grows-down data segment? Special limit check if so. */ |
2495 | 0 | if ( reg->s && (reg->type & 0xc) == 0x4 ) |
2496 | 0 | { |
2497 | 0 | /* Is upper limit 0xFFFF or 0xFFFFFFFF? */ |
2498 | 0 | if ( !reg->db ) |
2499 | 0 | last_byte = (uint16_t)last_byte; |
2500 | 0 |
|
2501 | 0 | /* Check first byte and last byte against respective bounds. */ |
2502 | 0 | if ( (offset <= reg->limit) || (last_byte < offset) ) |
2503 | 0 | goto out; |
2504 | 0 | } |
2505 | 0 | else if ( (last_byte > reg->limit) || (last_byte < offset) ) |
2506 | 0 | goto out; /* last byte is beyond limit or wraps 0xFFFFFFFF */ |
2507 | 0 | } |
2508 | 120k | |
2509 | 120k | /* All checks ok. */ |
2510 | 120k | okay = 1; |
2511 | 120k | |
2512 | 120k | out: |
2513 | 120k | /* |
2514 | 120k | * Always return the correct linear address, even if a permission check |
2515 | 120k | * failed. The permissions failure is not relevant to some callers. |
2516 | 120k | */ |
2517 | 120k | *linear_addr = addr; |
2518 | 120k | return okay; |
2519 | 120k | } |
2520 | | |
2521 | | struct hvm_write_map { |
2522 | | struct list_head list; |
2523 | | struct page_info *page; |
2524 | | }; |
2525 | | |
2526 | | /* On non-NULL return, we leave this function holding an additional |
2527 | | * ref on the underlying mfn, if any */ |
2528 | | static void *_hvm_map_guest_frame(unsigned long gfn, bool_t permanent, |
2529 | | bool_t *writable) |
2530 | 0 | { |
2531 | 0 | void *map; |
2532 | 0 | p2m_type_t p2mt; |
2533 | 0 | struct page_info *page; |
2534 | 0 | struct domain *d = current->domain; |
2535 | 0 |
|
2536 | 0 | page = get_page_from_gfn(d, gfn, &p2mt, |
2537 | 0 | writable ? P2M_UNSHARE : P2M_ALLOC); |
2538 | 0 | if ( (p2m_is_shared(p2mt) && writable) || !page ) |
2539 | 0 | { |
2540 | 0 | if ( page ) |
2541 | 0 | put_page(page); |
2542 | 0 | return NULL; |
2543 | 0 | } |
2544 | 0 | if ( p2m_is_paging(p2mt) ) |
2545 | 0 | { |
2546 | 0 | put_page(page); |
2547 | 0 | p2m_mem_paging_populate(d, gfn); |
2548 | 0 | return NULL; |
2549 | 0 | } |
2550 | 0 |
|
2551 | 0 | if ( writable ) |
2552 | 0 | { |
2553 | 0 | if ( unlikely(p2m_is_discard_write(p2mt)) ) |
2554 | 0 | *writable = 0; |
2555 | 0 | else if ( !permanent ) |
2556 | 0 | paging_mark_dirty(d, _mfn(page_to_mfn(page))); |
2557 | 0 | } |
2558 | 0 |
|
2559 | 0 | if ( !permanent ) |
2560 | 0 | return __map_domain_page(page); |
2561 | 0 |
|
2562 | 0 | if ( writable && *writable ) |
2563 | 0 | { |
2564 | 0 | struct hvm_write_map *track = xmalloc(struct hvm_write_map); |
2565 | 0 |
|
2566 | 0 | if ( !track ) |
2567 | 0 | { |
2568 | 0 | put_page(page); |
2569 | 0 | return NULL; |
2570 | 0 | } |
2571 | 0 | track->page = page; |
2572 | 0 | spin_lock(&d->arch.hvm_domain.write_map.lock); |
2573 | 0 | list_add_tail(&track->list, &d->arch.hvm_domain.write_map.list); |
2574 | 0 | spin_unlock(&d->arch.hvm_domain.write_map.lock); |
2575 | 0 | } |
2576 | 0 |
|
2577 | 0 | map = __map_domain_page_global(page); |
2578 | 0 | if ( !map ) |
2579 | 0 | put_page(page); |
2580 | 0 |
|
2581 | 0 | return map; |
2582 | 0 | } |
2583 | | |
2584 | | void *hvm_map_guest_frame_rw(unsigned long gfn, bool_t permanent, |
2585 | | bool_t *writable) |
2586 | 0 | { |
2587 | 0 | *writable = 1; |
2588 | 0 | return _hvm_map_guest_frame(gfn, permanent, writable); |
2589 | 0 | } |
2590 | | |
2591 | | void *hvm_map_guest_frame_ro(unsigned long gfn, bool_t permanent) |
2592 | 0 | { |
2593 | 0 | return _hvm_map_guest_frame(gfn, permanent, NULL); |
2594 | 0 | } |
2595 | | |
2596 | | void hvm_unmap_guest_frame(void *p, bool_t permanent) |
2597 | 0 | { |
2598 | 0 | unsigned long mfn; |
2599 | 0 | struct page_info *page; |
2600 | 0 |
|
2601 | 0 | if ( !p ) |
2602 | 0 | return; |
2603 | 0 |
|
2604 | 0 | mfn = domain_page_map_to_mfn(p); |
2605 | 0 | page = mfn_to_page(mfn); |
2606 | 0 |
|
2607 | 0 | if ( !permanent ) |
2608 | 0 | unmap_domain_page(p); |
2609 | 0 | else |
2610 | 0 | { |
2611 | 0 | struct domain *d = page_get_owner(page); |
2612 | 0 | struct hvm_write_map *track; |
2613 | 0 |
|
2614 | 0 | unmap_domain_page_global(p); |
2615 | 0 | spin_lock(&d->arch.hvm_domain.write_map.lock); |
2616 | 0 | list_for_each_entry(track, &d->arch.hvm_domain.write_map.list, list) |
2617 | 0 | if ( track->page == page ) |
2618 | 0 | { |
2619 | 0 | paging_mark_dirty(d, _mfn(mfn)); |
2620 | 0 | list_del(&track->list); |
2621 | 0 | xfree(track); |
2622 | 0 | break; |
2623 | 0 | } |
2624 | 0 | spin_unlock(&d->arch.hvm_domain.write_map.lock); |
2625 | 0 | } |
2626 | 0 |
|
2627 | 0 | put_page(page); |
2628 | 0 | } |
2629 | | |
2630 | | void hvm_mapped_guest_frames_mark_dirty(struct domain *d) |
2631 | 0 | { |
2632 | 0 | struct hvm_write_map *track; |
2633 | 0 |
|
2634 | 0 | spin_lock(&d->arch.hvm_domain.write_map.lock); |
2635 | 0 | list_for_each_entry(track, &d->arch.hvm_domain.write_map.list, list) |
2636 | 0 | paging_mark_dirty(d, _mfn(page_to_mfn(track->page))); |
2637 | 0 | spin_unlock(&d->arch.hvm_domain.write_map.lock); |
2638 | 0 | } |
2639 | | |
2640 | | static void *hvm_map_entry(unsigned long va, bool_t *writable) |
2641 | 0 | { |
2642 | 0 | unsigned long gfn; |
2643 | 0 | uint32_t pfec; |
2644 | 0 | char *v; |
2645 | 0 |
|
2646 | 0 | if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE ) |
2647 | 0 | { |
2648 | 0 | gdprintk(XENLOG_ERR, "Descriptor table entry " |
2649 | 0 | "straddles page boundary\n"); |
2650 | 0 | goto fail; |
2651 | 0 | } |
2652 | 0 |
|
2653 | 0 | /* |
2654 | 0 | * We're mapping on behalf of the segment-load logic, which might write |
2655 | 0 | * the accessed flags in the descriptors (in 32-bit mode), but we still |
2656 | 0 | * treat it as a kernel-mode read (i.e. no access checks). |
2657 | 0 | */ |
2658 | 0 | pfec = PFEC_page_present; |
2659 | 0 | gfn = paging_gva_to_gfn(current, va, &pfec); |
2660 | 0 | if ( pfec & (PFEC_page_paged | PFEC_page_shared) ) |
2661 | 0 | goto fail; |
2662 | 0 |
|
2663 | 0 | v = hvm_map_guest_frame_rw(gfn, 0, writable); |
2664 | 0 | if ( v == NULL ) |
2665 | 0 | goto fail; |
2666 | 0 |
|
2667 | 0 | return v + (va & ~PAGE_MASK); |
2668 | 0 |
|
2669 | 0 | fail: |
2670 | 0 | domain_crash(current->domain); |
2671 | 0 | return NULL; |
2672 | 0 | } |
2673 | | |
2674 | | static void hvm_unmap_entry(void *p) |
2675 | 0 | { |
2676 | 0 | hvm_unmap_guest_frame(p, 0); |
2677 | 0 | } |
2678 | | |
2679 | | static int hvm_load_segment_selector( |
2680 | | enum x86_segment seg, uint16_t sel, unsigned int cpl, unsigned int eflags) |
2681 | 0 | { |
2682 | 0 | struct segment_register desctab, segr; |
2683 | 0 | struct desc_struct *pdesc, desc; |
2684 | 0 | u8 dpl, rpl; |
2685 | 0 | bool_t writable; |
2686 | 0 | int fault_type = TRAP_invalid_tss; |
2687 | 0 | struct vcpu *v = current; |
2688 | 0 |
|
2689 | 0 | if ( eflags & X86_EFLAGS_VM ) |
2690 | 0 | { |
2691 | 0 | segr.sel = sel; |
2692 | 0 | segr.base = (uint32_t)sel << 4; |
2693 | 0 | segr.limit = 0xffffu; |
2694 | 0 | segr.attr = 0xf3; |
2695 | 0 | hvm_set_segment_register(v, seg, &segr); |
2696 | 0 | return 0; |
2697 | 0 | } |
2698 | 0 |
|
2699 | 0 | /* NULL selector? */ |
2700 | 0 | if ( (sel & 0xfffc) == 0 ) |
2701 | 0 | { |
2702 | 0 | if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) ) |
2703 | 0 | goto fail; |
2704 | 0 | memset(&segr, 0, sizeof(segr)); |
2705 | 0 | segr.sel = sel; |
2706 | 0 | hvm_set_segment_register(v, seg, &segr); |
2707 | 0 | return 0; |
2708 | 0 | } |
2709 | 0 |
|
2710 | 0 | /* LDT descriptor must be in the GDT. */ |
2711 | 0 | if ( (seg == x86_seg_ldtr) && (sel & 4) ) |
2712 | 0 | goto fail; |
2713 | 0 |
|
2714 | 0 | hvm_get_segment_register( |
2715 | 0 | v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab); |
2716 | 0 |
|
2717 | 0 | /* Segment not valid for use (cooked meaning of .p)? */ |
2718 | 0 | if ( !desctab.p ) |
2719 | 0 | goto fail; |
2720 | 0 |
|
2721 | 0 | /* Check against descriptor table limit. */ |
2722 | 0 | if ( ((sel & 0xfff8) + 7) > desctab.limit ) |
2723 | 0 | goto fail; |
2724 | 0 |
|
2725 | 0 | pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &writable); |
2726 | 0 | if ( pdesc == NULL ) |
2727 | 0 | goto hvm_map_fail; |
2728 | 0 |
|
2729 | 0 | do { |
2730 | 0 | desc = *pdesc; |
2731 | 0 |
|
2732 | 0 | /* LDT descriptor is a system segment. All others are code/data. */ |
2733 | 0 | if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) ) |
2734 | 0 | goto unmap_and_fail; |
2735 | 0 |
|
2736 | 0 | dpl = (desc.b >> 13) & 3; |
2737 | 0 | rpl = sel & 3; |
2738 | 0 |
|
2739 | 0 | switch ( seg ) |
2740 | 0 | { |
2741 | 0 | case x86_seg_cs: |
2742 | 0 | /* Code segment? */ |
2743 | 0 | if ( !(desc.b & _SEGMENT_CODE) ) |
2744 | 0 | goto unmap_and_fail; |
2745 | 0 | /* Non-conforming segment: check DPL against RPL. */ |
2746 | 0 | if ( !(desc.b & _SEGMENT_EC) && (dpl != rpl) ) |
2747 | 0 | goto unmap_and_fail; |
2748 | 0 | break; |
2749 | 0 | case x86_seg_ss: |
2750 | 0 | /* Writable data segment? */ |
2751 | 0 | if ( (desc.b & (_SEGMENT_CODE|_SEGMENT_WR)) != _SEGMENT_WR ) |
2752 | 0 | goto unmap_and_fail; |
2753 | 0 | if ( (dpl != cpl) || (dpl != rpl) ) |
2754 | 0 | goto unmap_and_fail; |
2755 | 0 | break; |
2756 | 0 | case x86_seg_ldtr: |
2757 | 0 | /* LDT system segment? */ |
2758 | 0 | if ( (desc.b & _SEGMENT_TYPE) != (2u<<8) ) |
2759 | 0 | goto unmap_and_fail; |
2760 | 0 | goto skip_accessed_flag; |
2761 | 0 | default: |
2762 | 0 | /* Readable code or data segment? */ |
2763 | 0 | if ( (desc.b & (_SEGMENT_CODE|_SEGMENT_WR)) == _SEGMENT_CODE ) |
2764 | 0 | goto unmap_and_fail; |
2765 | 0 | /* |
2766 | 0 | * Data or non-conforming code segment: |
2767 | 0 | * check DPL against RPL and CPL. |
2768 | 0 | */ |
2769 | 0 | if ( ((desc.b & (_SEGMENT_EC|_SEGMENT_CODE)) != |
2770 | 0 | (_SEGMENT_EC|_SEGMENT_CODE)) |
2771 | 0 | && ((dpl < cpl) || (dpl < rpl)) ) |
2772 | 0 | goto unmap_and_fail; |
2773 | 0 | break; |
2774 | 0 | } |
2775 | 0 |
|
2776 | 0 | /* Segment present in memory? */ |
2777 | 0 | if ( !(desc.b & _SEGMENT_P) ) |
2778 | 0 | { |
2779 | 0 | fault_type = (seg != x86_seg_ss) ? TRAP_no_segment |
2780 | 0 | : TRAP_stack_error; |
2781 | 0 | goto unmap_and_fail; |
2782 | 0 | } |
2783 | 0 | } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */ |
2784 | 0 | writable && /* except if we are to discard writes */ |
2785 | 0 | (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) ); |
2786 | 0 |
|
2787 | 0 | /* Force the Accessed flag in our local copy. */ |
2788 | 0 | desc.b |= 0x100; |
2789 | 0 |
|
2790 | 0 | skip_accessed_flag: |
2791 | 0 | hvm_unmap_entry(pdesc); |
2792 | 0 |
|
2793 | 0 | segr.base = (((desc.b << 0) & 0xff000000u) | |
2794 | 0 | ((desc.b << 16) & 0x00ff0000u) | |
2795 | 0 | ((desc.a >> 16) & 0x0000ffffu)); |
2796 | 0 | segr.attr = (((desc.b >> 8) & 0x00ffu) | |
2797 | 0 | ((desc.b >> 12) & 0x0f00u)); |
2798 | 0 | segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu); |
2799 | 0 | if ( segr.g ) |
2800 | 0 | segr.limit = (segr.limit << 12) | 0xfffu; |
2801 | 0 | segr.sel = sel; |
2802 | 0 | hvm_set_segment_register(v, seg, &segr); |
2803 | 0 |
|
2804 | 0 | return 0; |
2805 | 0 |
|
2806 | 0 | unmap_and_fail: |
2807 | 0 | hvm_unmap_entry(pdesc); |
2808 | 0 | fail: |
2809 | 0 | hvm_inject_hw_exception(fault_type, sel & 0xfffc); |
2810 | 0 | hvm_map_fail: |
2811 | 0 | return 1; |
2812 | 0 | } |
2813 | | |
2814 | | struct tss32 { |
2815 | | uint16_t back_link, :16; |
2816 | | uint32_t esp0; |
2817 | | uint16_t ss0, :16; |
2818 | | uint32_t esp1; |
2819 | | uint16_t ss1, :16; |
2820 | | uint32_t esp2; |
2821 | | uint16_t ss2, :16; |
2822 | | uint32_t cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi; |
2823 | | uint16_t es, :16, cs, :16, ss, :16, ds, :16, fs, :16, gs, :16, ldt, :16; |
2824 | | uint16_t trace /* :1 */, iomap; |
2825 | | }; |
2826 | | |
2827 | | void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit) |
2828 | 0 | { |
2829 | 0 | /* |
2830 | 0 | * If the provided area is large enough to cover at least the ISA port |
2831 | 0 | * range, keep the bitmaps outside the base structure. For rather small |
2832 | 0 | * areas (namely relevant for guests having been migrated from older |
2833 | 0 | * Xen versions), maximize interrupt vector and port coverage by pointing |
2834 | 0 | * the I/O bitmap at 0x20 (which puts the interrupt redirection bitmap |
2835 | 0 | * right at zero), accepting accesses to port 0x235 (represented by bit 5 |
2836 | 0 | * of byte 0x46) to trigger #GP (which will simply result in the access |
2837 | 0 | * being handled by the emulator via a slightly different path than it |
2838 | 0 | * would be anyway). Be sure to include one extra byte at the end of the |
2839 | 0 | * I/O bitmap (hence the missing "- 1" in the comparison is not an |
2840 | 0 | * off-by-one mistake), which we deliberately don't fill with all ones. |
2841 | 0 | */ |
2842 | 0 | uint16_t iomap = (limit >= sizeof(struct tss32) + (0x100 / 8) + (0x400 / 8) |
2843 | 0 | ? sizeof(struct tss32) : 0) + (0x100 / 8); |
2844 | 0 |
|
2845 | 0 | ASSERT(limit >= sizeof(struct tss32) - 1); |
2846 | 0 | /* |
2847 | 0 | * Strictly speaking we'd have to use hvm_copy_to_guest_linear() below, |
2848 | 0 | * but since the guest is (supposed to be, unless it corrupts that setup |
2849 | 0 | * itself, which would harm only itself) running on an identmap, we can |
2850 | 0 | * use the less overhead variant below, which also allows passing a vCPU |
2851 | 0 | * argument. |
2852 | 0 | */ |
2853 | 0 | hvm_copy_to_guest_phys(base, NULL, limit + 1, v); |
2854 | 0 | hvm_copy_to_guest_phys(base + offsetof(struct tss32, iomap), |
2855 | 0 | &iomap, sizeof(iomap), v); |
2856 | 0 | } |
2857 | | |
2858 | | void hvm_task_switch( |
2859 | | uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason, |
2860 | | int32_t errcode) |
2861 | 0 | { |
2862 | 0 | struct vcpu *v = current; |
2863 | 0 | struct cpu_user_regs *regs = guest_cpu_user_regs(); |
2864 | 0 | struct segment_register gdt, tr, prev_tr, segr; |
2865 | 0 | struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; |
2866 | 0 | bool_t otd_writable, ntd_writable; |
2867 | 0 | unsigned int eflags, new_cpl; |
2868 | 0 | pagefault_info_t pfinfo; |
2869 | 0 | int exn_raised, rc; |
2870 | 0 | struct tss32 tss; |
2871 | 0 |
|
2872 | 0 | hvm_get_segment_register(v, x86_seg_gdtr, &gdt); |
2873 | 0 | hvm_get_segment_register(v, x86_seg_tr, &prev_tr); |
2874 | 0 |
|
2875 | 0 | if ( ((tss_sel & 0xfff8) + 7) > gdt.limit ) |
2876 | 0 | { |
2877 | 0 | hvm_inject_hw_exception((taskswitch_reason == TSW_iret) ? |
2878 | 0 | TRAP_invalid_tss : TRAP_gp_fault, |
2879 | 0 | tss_sel & 0xfff8); |
2880 | 0 | goto out; |
2881 | 0 | } |
2882 | 0 |
|
2883 | 0 | optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), |
2884 | 0 | &otd_writable); |
2885 | 0 | if ( optss_desc == NULL ) |
2886 | 0 | goto out; |
2887 | 0 |
|
2888 | 0 | nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &ntd_writable); |
2889 | 0 | if ( nptss_desc == NULL ) |
2890 | 0 | goto out; |
2891 | 0 |
|
2892 | 0 | tss_desc = *nptss_desc; |
2893 | 0 | tr.sel = tss_sel; |
2894 | 0 | tr.base = (((tss_desc.b << 0) & 0xff000000u) | |
2895 | 0 | ((tss_desc.b << 16) & 0x00ff0000u) | |
2896 | 0 | ((tss_desc.a >> 16) & 0x0000ffffu)); |
2897 | 0 | tr.attr = (((tss_desc.b >> 8) & 0x00ffu) | |
2898 | 0 | ((tss_desc.b >> 12) & 0x0f00u)); |
2899 | 0 | tr.limit = (tss_desc.b & 0x000f0000u) | (tss_desc.a & 0x0000ffffu); |
2900 | 0 | if ( tr.g ) |
2901 | 0 | tr.limit = (tr.limit << 12) | 0xfffu; |
2902 | 0 |
|
2903 | 0 | if ( tr.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) ) |
2904 | 0 | { |
2905 | 0 | hvm_inject_hw_exception( |
2906 | 0 | (taskswitch_reason == TSW_iret) ? TRAP_invalid_tss : TRAP_gp_fault, |
2907 | 0 | tss_sel & 0xfff8); |
2908 | 0 | goto out; |
2909 | 0 | } |
2910 | 0 |
|
2911 | 0 | if ( !tr.p ) |
2912 | 0 | { |
2913 | 0 | hvm_inject_hw_exception(TRAP_no_segment, tss_sel & 0xfff8); |
2914 | 0 | goto out; |
2915 | 0 | } |
2916 | 0 |
|
2917 | 0 | if ( tr.limit < (sizeof(tss)-1) ) |
2918 | 0 | { |
2919 | 0 | hvm_inject_hw_exception(TRAP_invalid_tss, tss_sel & 0xfff8); |
2920 | 0 | goto out; |
2921 | 0 | } |
2922 | 0 |
|
2923 | 0 | rc = hvm_copy_from_guest_linear( |
2924 | 0 | &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo); |
2925 | 0 | if ( rc == HVMTRANS_bad_linear_to_gfn ) |
2926 | 0 | hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); |
2927 | 0 | if ( rc != HVMTRANS_okay ) |
2928 | 0 | goto out; |
2929 | 0 |
|
2930 | 0 | eflags = regs->eflags; |
2931 | 0 | if ( taskswitch_reason == TSW_iret ) |
2932 | 0 | eflags &= ~X86_EFLAGS_NT; |
2933 | 0 |
|
2934 | 0 | tss.eip = regs->eip; |
2935 | 0 | tss.eflags = eflags; |
2936 | 0 | tss.eax = regs->eax; |
2937 | 0 | tss.ecx = regs->ecx; |
2938 | 0 | tss.edx = regs->edx; |
2939 | 0 | tss.ebx = regs->ebx; |
2940 | 0 | tss.esp = regs->esp; |
2941 | 0 | tss.ebp = regs->ebp; |
2942 | 0 | tss.esi = regs->esi; |
2943 | 0 | tss.edi = regs->edi; |
2944 | 0 |
|
2945 | 0 | hvm_get_segment_register(v, x86_seg_es, &segr); |
2946 | 0 | tss.es = segr.sel; |
2947 | 0 | hvm_get_segment_register(v, x86_seg_cs, &segr); |
2948 | 0 | tss.cs = segr.sel; |
2949 | 0 | hvm_get_segment_register(v, x86_seg_ss, &segr); |
2950 | 0 | tss.ss = segr.sel; |
2951 | 0 | hvm_get_segment_register(v, x86_seg_ds, &segr); |
2952 | 0 | tss.ds = segr.sel; |
2953 | 0 | hvm_get_segment_register(v, x86_seg_fs, &segr); |
2954 | 0 | tss.fs = segr.sel; |
2955 | 0 | hvm_get_segment_register(v, x86_seg_gs, &segr); |
2956 | 0 | tss.gs = segr.sel; |
2957 | 0 | hvm_get_segment_register(v, x86_seg_ldtr, &segr); |
2958 | 0 | tss.ldt = segr.sel; |
2959 | 0 |
|
2960 | 0 | rc = hvm_copy_to_guest_linear(prev_tr.base + offsetof(typeof(tss), eip), |
2961 | 0 | &tss.eip, |
2962 | 0 | offsetof(typeof(tss), trace) - |
2963 | 0 | offsetof(typeof(tss), eip), |
2964 | 0 | PFEC_page_present, &pfinfo); |
2965 | 0 | if ( rc == HVMTRANS_bad_linear_to_gfn ) |
2966 | 0 | hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); |
2967 | 0 | if ( rc != HVMTRANS_okay ) |
2968 | 0 | goto out; |
2969 | 0 |
|
2970 | 0 | rc = hvm_copy_from_guest_linear( |
2971 | 0 | &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo); |
2972 | 0 | if ( rc == HVMTRANS_bad_linear_to_gfn ) |
2973 | 0 | hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); |
2974 | 0 | /* |
2975 | 0 | * Note: The HVMTRANS_gfn_shared case could be optimised, if the callee |
2976 | 0 | * functions knew we want RO access. |
2977 | 0 | */ |
2978 | 0 | if ( rc != HVMTRANS_okay ) |
2979 | 0 | goto out; |
2980 | 0 |
|
2981 | 0 | new_cpl = tss.eflags & X86_EFLAGS_VM ? 3 : tss.cs & 3; |
2982 | 0 |
|
2983 | 0 | if ( hvm_load_segment_selector(x86_seg_ldtr, tss.ldt, new_cpl, 0) ) |
2984 | 0 | goto out; |
2985 | 0 |
|
2986 | 0 | rc = hvm_set_cr3(tss.cr3, 1); |
2987 | 0 | if ( rc == X86EMUL_EXCEPTION ) |
2988 | 0 | hvm_inject_hw_exception(TRAP_gp_fault, 0); |
2989 | 0 | if ( rc != X86EMUL_OKAY ) |
2990 | 0 | goto out; |
2991 | 0 |
|
2992 | 0 | regs->rip = tss.eip; |
2993 | 0 | regs->rflags = tss.eflags | X86_EFLAGS_MBS; |
2994 | 0 | regs->rax = tss.eax; |
2995 | 0 | regs->rcx = tss.ecx; |
2996 | 0 | regs->rdx = tss.edx; |
2997 | 0 | regs->rbx = tss.ebx; |
2998 | 0 | regs->rsp = tss.esp; |
2999 | 0 | regs->rbp = tss.ebp; |
3000 | 0 | regs->rsi = tss.esi; |
3001 | 0 | regs->rdi = tss.edi; |
3002 | 0 |
|
3003 | 0 | exn_raised = 0; |
3004 | 0 | if ( hvm_load_segment_selector(x86_seg_es, tss.es, new_cpl, tss.eflags) || |
3005 | 0 | hvm_load_segment_selector(x86_seg_cs, tss.cs, new_cpl, tss.eflags) || |
3006 | 0 | hvm_load_segment_selector(x86_seg_ss, tss.ss, new_cpl, tss.eflags) || |
3007 | 0 | hvm_load_segment_selector(x86_seg_ds, tss.ds, new_cpl, tss.eflags) || |
3008 | 0 | hvm_load_segment_selector(x86_seg_fs, tss.fs, new_cpl, tss.eflags) || |
3009 | 0 | hvm_load_segment_selector(x86_seg_gs, tss.gs, new_cpl, tss.eflags) ) |
3010 | 0 | exn_raised = 1; |
3011 | 0 |
|
3012 | 0 | if ( taskswitch_reason == TSW_call_or_int ) |
3013 | 0 | { |
3014 | 0 | regs->eflags |= X86_EFLAGS_NT; |
3015 | 0 | tss.back_link = prev_tr.sel; |
3016 | 0 |
|
3017 | 0 | rc = hvm_copy_to_guest_linear(tr.base + offsetof(typeof(tss), back_link), |
3018 | 0 | &tss.back_link, sizeof(tss.back_link), 0, |
3019 | 0 | &pfinfo); |
3020 | 0 | if ( rc == HVMTRANS_bad_linear_to_gfn ) |
3021 | 0 | { |
3022 | 0 | hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); |
3023 | 0 | exn_raised = 1; |
3024 | 0 | } |
3025 | 0 | else if ( rc != HVMTRANS_okay ) |
3026 | 0 | goto out; |
3027 | 0 | } |
3028 | 0 |
|
3029 | 0 | tr.type = 0xb; /* busy 32-bit tss */ |
3030 | 0 | hvm_set_segment_register(v, x86_seg_tr, &tr); |
3031 | 0 |
|
3032 | 0 | v->arch.hvm_vcpu.guest_cr[0] |= X86_CR0_TS; |
3033 | 0 | hvm_update_guest_cr(v, 0); |
3034 | 0 |
|
3035 | 0 | if ( (taskswitch_reason == TSW_iret || |
3036 | 0 | taskswitch_reason == TSW_jmp) && otd_writable ) |
3037 | 0 | clear_bit(41, optss_desc); /* clear B flag of old task */ |
3038 | 0 |
|
3039 | 0 | if ( taskswitch_reason != TSW_iret && ntd_writable ) |
3040 | 0 | set_bit(41, nptss_desc); /* set B flag of new task */ |
3041 | 0 |
|
3042 | 0 | if ( errcode >= 0 ) |
3043 | 0 | { |
3044 | 0 | struct segment_register cs; |
3045 | 0 | unsigned long linear_addr; |
3046 | 0 | unsigned int opsz, sp; |
3047 | 0 |
|
3048 | 0 | hvm_get_segment_register(v, x86_seg_cs, &cs); |
3049 | 0 | opsz = cs.db ? 4 : 2; |
3050 | 0 | hvm_get_segment_register(v, x86_seg_ss, &segr); |
3051 | 0 | if ( segr.db ) |
3052 | 0 | sp = regs->esp -= opsz; |
3053 | 0 | else |
3054 | 0 | sp = regs->sp -= opsz; |
3055 | 0 | if ( hvm_virtual_to_linear_addr(x86_seg_ss, &segr, sp, opsz, |
3056 | 0 | hvm_access_write, |
3057 | 0 | &cs, &linear_addr) ) |
3058 | 0 | { |
3059 | 0 | rc = hvm_copy_to_guest_linear(linear_addr, &errcode, opsz, 0, |
3060 | 0 | &pfinfo); |
3061 | 0 | if ( rc == HVMTRANS_bad_linear_to_gfn ) |
3062 | 0 | { |
3063 | 0 | hvm_inject_page_fault(pfinfo.ec, pfinfo.linear); |
3064 | 0 | exn_raised = 1; |
3065 | 0 | } |
3066 | 0 | else if ( rc != HVMTRANS_okay ) |
3067 | 0 | goto out; |
3068 | 0 | } |
3069 | 0 | } |
3070 | 0 |
|
3071 | 0 | if ( (tss.trace & 1) && !exn_raised ) |
3072 | 0 | hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); |
3073 | 0 |
|
3074 | 0 | out: |
3075 | 0 | hvm_unmap_entry(optss_desc); |
3076 | 0 | hvm_unmap_entry(nptss_desc); |
3077 | 0 | } |
3078 | | |
3079 | | enum hvm_translation_result hvm_translate_get_page( |
3080 | | struct vcpu *v, unsigned long addr, bool linear, uint32_t pfec, |
3081 | | pagefault_info_t *pfinfo, struct page_info **page_p, |
3082 | | gfn_t *gfn_p, p2m_type_t *p2mt_p) |
3083 | 373k | { |
3084 | 373k | struct page_info *page; |
3085 | 373k | p2m_type_t p2mt; |
3086 | 373k | gfn_t gfn; |
3087 | 373k | |
3088 | 373k | if ( linear ) |
3089 | 364k | { |
3090 | 364k | gfn = _gfn(paging_gva_to_gfn(v, addr, &pfec)); |
3091 | 364k | |
3092 | 364k | if ( gfn_eq(gfn, INVALID_GFN) ) |
3093 | 0 | { |
3094 | 0 | if ( pfec & PFEC_page_paged ) |
3095 | 0 | return HVMTRANS_gfn_paged_out; |
3096 | 0 |
|
3097 | 0 | if ( pfec & PFEC_page_shared ) |
3098 | 0 | return HVMTRANS_gfn_shared; |
3099 | 0 |
|
3100 | 0 | if ( pfinfo ) |
3101 | 0 | { |
3102 | 0 | pfinfo->linear = addr; |
3103 | 0 | pfinfo->ec = pfec & ~PFEC_implicit; |
3104 | 0 | } |
3105 | 0 |
|
3106 | 0 | return HVMTRANS_bad_linear_to_gfn; |
3107 | 0 | } |
3108 | 364k | } |
3109 | 373k | else |
3110 | 8.52k | { |
3111 | 8.52k | gfn = gaddr_to_gfn(addr); |
3112 | 8.52k | ASSERT(!pfinfo); |
3113 | 8.52k | } |
3114 | 373k | |
3115 | 373k | /* |
3116 | 373k | * No need to do the P2M lookup for internally handled MMIO, benefiting |
3117 | 373k | * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses, |
3118 | 373k | * - newer Windows (like Server 2012) for HPET accesses. |
3119 | 373k | */ |
3120 | 373k | if ( v == current |
3121 | 365k | && !nestedhvm_vcpu_in_guestmode(v) |
3122 | 365k | && hvm_mmio_internal(gfn_to_gaddr(gfn)) ) |
3123 | 0 | return HVMTRANS_bad_gfn_to_mfn; |
3124 | 373k | |
3125 | 373k | page = get_page_from_gfn(v->domain, gfn_x(gfn), &p2mt, P2M_UNSHARE); |
3126 | 373k | |
3127 | 373k | if ( !page ) |
3128 | 0 | return HVMTRANS_bad_gfn_to_mfn; |
3129 | 373k | |
3130 | 373k | if ( p2m_is_paging(p2mt) ) |
3131 | 0 | { |
3132 | 0 | put_page(page); |
3133 | 0 | p2m_mem_paging_populate(v->domain, gfn_x(gfn)); |
3134 | 0 | return HVMTRANS_gfn_paged_out; |
3135 | 0 | } |
3136 | 373k | if ( p2m_is_shared(p2mt) ) |
3137 | 0 | { |
3138 | 0 | put_page(page); |
3139 | 0 | return HVMTRANS_gfn_shared; |
3140 | 0 | } |
3141 | 373k | if ( p2m_is_grant(p2mt) ) |
3142 | 0 | { |
3143 | 0 | put_page(page); |
3144 | 0 | return HVMTRANS_unhandleable; |
3145 | 0 | } |
3146 | 373k | |
3147 | 373k | *page_p = page; |
3148 | 373k | if ( gfn_p ) |
3149 | 375k | *gfn_p = gfn; |
3150 | 373k | if ( p2mt_p ) |
3151 | 375k | *p2mt_p = p2mt; |
3152 | 373k | |
3153 | 373k | return HVMTRANS_okay; |
3154 | 373k | } |
3155 | | |
3156 | 363k | #define HVMCOPY_from_guest (0u<<0) |
3157 | 373k | #define HVMCOPY_to_guest (1u<<0) |
3158 | 23 | #define HVMCOPY_phys (0u<<2) |
3159 | 737k | #define HVMCOPY_linear (1u<<2) |
3160 | | static enum hvm_translation_result __hvm_copy( |
3161 | | void *buf, paddr_t addr, int size, struct vcpu *v, unsigned int flags, |
3162 | | uint32_t pfec, pagefault_info_t *pfinfo) |
3163 | 364k | { |
3164 | 364k | gfn_t gfn; |
3165 | 364k | struct page_info *page; |
3166 | 364k | p2m_type_t p2mt; |
3167 | 364k | char *p; |
3168 | 364k | int count, todo = size; |
3169 | 364k | |
3170 | 364k | ASSERT(is_hvm_vcpu(v)); |
3171 | 364k | |
3172 | 364k | /* |
3173 | 364k | * XXX Disable for 4.1.0: PV-on-HVM drivers will do grant-table ops |
3174 | 364k | * such as query_size. Grant-table code currently does copy_to/from_guest |
3175 | 364k | * accesses under the big per-domain lock, which this test would disallow. |
3176 | 364k | * The test is not needed until we implement sleeping-on-waitqueue when |
3177 | 364k | * we access a paged-out frame, and that's post 4.1.0 now. |
3178 | 364k | */ |
3179 | 364k | #if 0 |
3180 | | /* |
3181 | | * If the required guest memory is paged out, this function may sleep. |
3182 | | * Hence we bail immediately if called from atomic context. |
3183 | | */ |
3184 | | if ( in_atomic() ) |
3185 | | return HVMTRANS_unhandleable; |
3186 | | #endif |
3187 | 364k | |
3188 | 737k | while ( todo > 0 ) |
3189 | 373k | { |
3190 | 373k | enum hvm_translation_result res; |
3191 | 373k | paddr_t gpa = addr & ~PAGE_MASK; |
3192 | 373k | |
3193 | 373k | count = min_t(int, PAGE_SIZE - gpa, todo); |
3194 | 373k | |
3195 | 373k | res = hvm_translate_get_page(v, addr, flags & HVMCOPY_linear, |
3196 | 373k | pfec, pfinfo, &page, &gfn, &p2mt); |
3197 | 373k | if ( res != HVMTRANS_okay ) |
3198 | 0 | return res; |
3199 | 373k | |
3200 | 373k | p = (char *)__map_domain_page(page) + (addr & ~PAGE_MASK); |
3201 | 373k | |
3202 | 373k | if ( flags & HVMCOPY_to_guest ) |
3203 | 9.12k | { |
3204 | 9.12k | if ( p2m_is_discard_write(p2mt) ) |
3205 | 0 | { |
3206 | 0 | static unsigned long lastpage; |
3207 | 0 |
|
3208 | 0 | if ( xchg(&lastpage, gfn_x(gfn)) != gfn_x(gfn) ) |
3209 | 0 | dprintk(XENLOG_G_DEBUG, |
3210 | 0 | "%pv attempted write to read-only gfn %#lx (mfn=%#lx)\n", |
3211 | 0 | v, gfn_x(gfn), page_to_mfn(page)); |
3212 | 0 | } |
3213 | 9.12k | else |
3214 | 9.12k | { |
3215 | 9.12k | if ( buf ) |
3216 | 7.80k | memcpy(p, buf, count); |
3217 | 9.12k | else |
3218 | 1.31k | memset(p, 0, count); |
3219 | 9.12k | paging_mark_dirty(v->domain, _mfn(page_to_mfn(page))); |
3220 | 9.12k | } |
3221 | 9.12k | } |
3222 | 373k | else |
3223 | 364k | { |
3224 | 364k | memcpy(buf, p, count); |
3225 | 364k | } |
3226 | 373k | |
3227 | 373k | unmap_domain_page(p); |
3228 | 373k | |
3229 | 373k | addr += count; |
3230 | 373k | if ( buf ) |
3231 | 373k | buf += count; |
3232 | 373k | todo -= count; |
3233 | 373k | put_page(page); |
3234 | 373k | } |
3235 | 364k | |
3236 | 364k | return HVMTRANS_okay; |
3237 | 364k | } |
3238 | | |
3239 | | enum hvm_translation_result hvm_copy_to_guest_phys( |
3240 | | paddr_t paddr, void *buf, int size, struct vcpu *v) |
3241 | 23 | { |
3242 | 23 | return __hvm_copy(buf, paddr, size, v, |
3243 | 23 | HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL); |
3244 | 23 | } |
3245 | | |
3246 | | enum hvm_translation_result hvm_copy_from_guest_phys( |
3247 | | void *buf, paddr_t paddr, int size) |
3248 | 0 | { |
3249 | 0 | return __hvm_copy(buf, paddr, size, current, |
3250 | 0 | HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL); |
3251 | 0 | } |
3252 | | |
3253 | | enum hvm_translation_result hvm_copy_to_guest_linear( |
3254 | | unsigned long addr, void *buf, int size, uint32_t pfec, |
3255 | | pagefault_info_t *pfinfo) |
3256 | 157 | { |
3257 | 157 | return __hvm_copy(buf, addr, size, current, |
3258 | 157 | HVMCOPY_to_guest | HVMCOPY_linear, |
3259 | 157 | PFEC_page_present | PFEC_write_access | pfec, pfinfo); |
3260 | 157 | } |
3261 | | |
3262 | | enum hvm_translation_result hvm_copy_from_guest_linear( |
3263 | | void *buf, unsigned long addr, int size, uint32_t pfec, |
3264 | | pagefault_info_t *pfinfo) |
3265 | 303k | { |
3266 | 303k | return __hvm_copy(buf, addr, size, current, |
3267 | 303k | HVMCOPY_from_guest | HVMCOPY_linear, |
3268 | 303k | PFEC_page_present | pfec, pfinfo); |
3269 | 303k | } |
3270 | | |
3271 | | enum hvm_translation_result hvm_fetch_from_guest_linear( |
3272 | | void *buf, unsigned long addr, int size, uint32_t pfec, |
3273 | | pagefault_info_t *pfinfo) |
3274 | 60.1k | { |
3275 | 60.1k | return __hvm_copy(buf, addr, size, current, |
3276 | 60.1k | HVMCOPY_from_guest | HVMCOPY_linear, |
3277 | 60.1k | PFEC_page_present | PFEC_insn_fetch | pfec, pfinfo); |
3278 | 60.1k | } |
3279 | | |
3280 | | unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len) |
3281 | 157 | { |
3282 | 157 | int rc; |
3283 | 157 | |
3284 | 157 | if ( current->hcall_compat && is_compat_arg_xlat_range(to, len) ) |
3285 | 0 | { |
3286 | 0 | memcpy(to, from, len); |
3287 | 0 | return 0; |
3288 | 0 | } |
3289 | 157 | |
3290 | 157 | rc = hvm_copy_to_guest_linear((unsigned long)to, (void *)from, len, 0, NULL); |
3291 | 157 | return rc ? len : 0; /* fake a copy_to_user() return code */ |
3292 | 157 | } |
3293 | | |
3294 | | unsigned long clear_user_hvm(void *to, unsigned int len) |
3295 | 0 | { |
3296 | 0 | int rc; |
3297 | 0 |
|
3298 | 0 | if ( current->hcall_compat && is_compat_arg_xlat_range(to, len) ) |
3299 | 0 | { |
3300 | 0 | memset(to, 0x00, len); |
3301 | 0 | return 0; |
3302 | 0 | } |
3303 | 0 |
|
3304 | 0 | rc = hvm_copy_to_guest_linear((unsigned long)to, NULL, len, 0, NULL); |
3305 | 0 | return rc ? len : 0; /* fake a copy_to_user() return code */ |
3306 | 0 | } |
3307 | | |
3308 | | unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len) |
3309 | 304k | { |
3310 | 304k | int rc; |
3311 | 304k | |
3312 | 304k | if ( current->hcall_compat && is_compat_arg_xlat_range(from, len) ) |
3313 | 0 | { |
3314 | 0 | memcpy(to, from, len); |
3315 | 0 | return 0; |
3316 | 0 | } |
3317 | 304k | |
3318 | 304k | rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, 0, NULL); |
3319 | 304k | return rc ? len : 0; /* fake a copy_from_user() return code */ |
3320 | 304k | } |
3321 | | |
3322 | | bool hvm_check_cpuid_faulting(struct vcpu *v) |
3323 | 2.66k | { |
3324 | 2.66k | const struct msr_vcpu_policy *vp = v->arch.msr; |
3325 | 2.66k | |
3326 | 2.66k | if ( !vp->misc_features_enables.cpuid_faulting ) |
3327 | 2.66k | return false; |
3328 | 2.66k | |
3329 | 0 | return hvm_get_cpl(v) > 0; |
3330 | 2.66k | } |
3331 | | |
3332 | | static uint64_t _hvm_rdtsc_intercept(void) |
3333 | 0 | { |
3334 | 0 | struct vcpu *curr = current; |
3335 | 0 | #if !defined(NDEBUG) || defined(CONFIG_PERF_COUNTERS) |
3336 | 0 | struct domain *currd = curr->domain; |
3337 | 0 |
|
3338 | 0 | if ( currd->arch.vtsc ) |
3339 | 0 | switch ( hvm_guest_x86_mode(curr) ) |
3340 | 0 | { |
3341 | 0 | case 8: |
3342 | 0 | case 4: |
3343 | 0 | case 2: |
3344 | 0 | if ( unlikely(hvm_get_cpl(curr)) ) |
3345 | 0 | { |
3346 | 0 | case 1: |
3347 | 0 | currd->arch.vtsc_usercount++; |
3348 | 0 | break; |
3349 | 0 | } |
3350 | 0 | /* fall through */ |
3351 | 0 | case 0: |
3352 | 0 | currd->arch.vtsc_kerncount++; |
3353 | 0 | break; |
3354 | 0 | } |
3355 | 0 | #endif |
3356 | 0 |
|
3357 | 0 | return hvm_get_guest_tsc(curr); |
3358 | 0 | } |
3359 | | |
3360 | | void hvm_rdtsc_intercept(struct cpu_user_regs *regs) |
3361 | 0 | { |
3362 | 0 | msr_split(regs, _hvm_rdtsc_intercept()); |
3363 | 0 |
|
3364 | 0 | HVMTRACE_2D(RDTSC, regs->eax, regs->edx); |
3365 | 0 | } |
3366 | | |
3367 | | int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) |
3368 | 271 | { |
3369 | 271 | struct vcpu *v = current; |
3370 | 271 | struct domain *d = v->domain; |
3371 | 271 | uint64_t *var_range_base, *fixed_range_base; |
3372 | 271 | int ret; |
3373 | 271 | |
3374 | 271 | var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges; |
3375 | 271 | fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges; |
3376 | 271 | |
3377 | 271 | if ( (ret = guest_rdmsr(v, msr, msr_content)) != X86EMUL_UNHANDLEABLE ) |
3378 | 0 | return ret; |
3379 | 271 | |
3380 | 271 | ret = X86EMUL_OKAY; |
3381 | 271 | |
3382 | 271 | switch ( msr ) |
3383 | 271 | { |
3384 | 0 | unsigned int index; |
3385 | 0 |
|
3386 | 36 | case MSR_EFER: |
3387 | 36 | *msr_content = v->arch.hvm_vcpu.guest_efer; |
3388 | 36 | break; |
3389 | 0 |
|
3390 | 0 | case MSR_IA32_TSC: |
3391 | 0 | *msr_content = _hvm_rdtsc_intercept(); |
3392 | 0 | break; |
3393 | 0 |
|
3394 | 0 | case MSR_IA32_TSC_ADJUST: |
3395 | 0 | *msr_content = v->arch.hvm_vcpu.msr_tsc_adjust; |
3396 | 0 | break; |
3397 | 0 |
|
3398 | 0 | case MSR_TSC_AUX: |
3399 | 0 | *msr_content = hvm_msr_tsc_aux(v); |
3400 | 0 | break; |
3401 | 0 |
|
3402 | 10 | case MSR_IA32_APICBASE: |
3403 | 10 | *msr_content = vcpu_vlapic(v)->hw.apic_base_msr; |
3404 | 10 | break; |
3405 | 0 |
|
3406 | 0 | case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff: |
3407 | 0 | if ( hvm_x2apic_msr_read(v, msr, msr_content) ) |
3408 | 0 | goto gp_fault; |
3409 | 0 | break; |
3410 | 0 |
|
3411 | 0 | case MSR_IA32_TSC_DEADLINE: |
3412 | 0 | *msr_content = vlapic_tdt_msr_get(vcpu_vlapic(v)); |
3413 | 0 | break; |
3414 | 0 |
|
3415 | 0 | case MSR_IA32_CR_PAT: |
3416 | 0 | hvm_get_guest_pat(v, msr_content); |
3417 | 0 | break; |
3418 | 0 |
|
3419 | 1 | case MSR_MTRRcap: |
3420 | 1 | if ( !d->arch.cpuid->basic.mtrr ) |
3421 | 0 | goto gp_fault; |
3422 | 1 | *msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap; |
3423 | 1 | break; |
3424 | 24 | case MSR_MTRRdefType: |
3425 | 24 | if ( !d->arch.cpuid->basic.mtrr ) |
3426 | 0 | goto gp_fault; |
3427 | 24 | *msr_content = v->arch.hvm_vcpu.mtrr.def_type |
3428 | 24 | | (v->arch.hvm_vcpu.mtrr.enabled << 10); |
3429 | 24 | break; |
3430 | 0 | case MSR_MTRRfix64K_00000: |
3431 | 0 | if ( !d->arch.cpuid->basic.mtrr ) |
3432 | 0 | goto gp_fault; |
3433 | 0 | *msr_content = fixed_range_base[0]; |
3434 | 0 | break; |
3435 | 0 | case MSR_MTRRfix16K_80000: |
3436 | 0 | case MSR_MTRRfix16K_A0000: |
3437 | 0 | if ( !d->arch.cpuid->basic.mtrr ) |
3438 | 0 | goto gp_fault; |
3439 | 0 | index = msr - MSR_MTRRfix16K_80000; |
3440 | 0 | *msr_content = fixed_range_base[index + 1]; |
3441 | 0 | break; |
3442 | 0 | case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000: |
3443 | 0 | if ( !d->arch.cpuid->basic.mtrr ) |
3444 | 0 | goto gp_fault; |
3445 | 0 | index = msr - MSR_MTRRfix4K_C0000; |
3446 | 0 | *msr_content = fixed_range_base[index + 3]; |
3447 | 0 | break; |
3448 | 104 | case MSR_IA32_MTRR_PHYSBASE(0)...MSR_IA32_MTRR_PHYSMASK(MTRR_VCNT-1): |
3449 | 104 | if ( !d->arch.cpuid->basic.mtrr ) |
3450 | 0 | goto gp_fault; |
3451 | 104 | index = msr - MSR_IA32_MTRR_PHYSBASE(0); |
3452 | 104 | *msr_content = var_range_base[index]; |
3453 | 104 | break; |
3454 | 104 | |
3455 | 0 | case MSR_IA32_XSS: |
3456 | 0 | if ( !d->arch.cpuid->xstate.xsaves ) |
3457 | 0 | goto gp_fault; |
3458 | 0 | *msr_content = v->arch.hvm_vcpu.msr_xss; |
3459 | 0 | break; |
3460 | 0 |
|
3461 | 0 | case MSR_IA32_BNDCFGS: |
3462 | 0 | if ( !d->arch.cpuid->feat.mpx || |
3463 | 0 | !hvm_get_guest_bndcfgs(v, msr_content) ) |
3464 | 0 | goto gp_fault; |
3465 | 0 | break; |
3466 | 0 |
|
3467 | 0 | case MSR_K8_ENABLE_C1E: |
3468 | 0 | case MSR_AMD64_NB_CFG: |
3469 | 0 | /* |
3470 | 0 | * These AMD-only registers may be accessed if this HVM guest |
3471 | 0 | * has been migrated to an Intel host. This fixes a guest crash |
3472 | 0 | * in this case. |
3473 | 0 | */ |
3474 | 0 | *msr_content = 0; |
3475 | 0 | break; |
3476 | 0 |
|
3477 | 96 | default: |
3478 | 96 | if ( (ret = vmce_rdmsr(msr, msr_content)) < 0 ) |
3479 | 0 | goto gp_fault; |
3480 | 96 | /* If ret == 0 then this is not an MCE MSR, see other MSRs. */ |
3481 | 96 | ret = ((ret == 0) |
3482 | 12 | ? hvm_funcs.msr_read_intercept(msr, msr_content) |
3483 | 84 | : X86EMUL_OKAY); |
3484 | 96 | break; |
3485 | 271 | } |
3486 | 271 | |
3487 | 270 | out: |
3488 | 270 | HVMTRACE_3D(MSR_READ, msr, |
3489 | 270 | (uint32_t)*msr_content, (uint32_t)(*msr_content >> 32)); |
3490 | 270 | return ret; |
3491 | 271 | |
3492 | 0 | gp_fault: |
3493 | 0 | ret = X86EMUL_EXCEPTION; |
3494 | 0 | *msr_content = -1ull; |
3495 | 0 | goto out; |
3496 | 271 | } |
3497 | | |
3498 | | int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content, |
3499 | | bool_t may_defer) |
3500 | 563 | { |
3501 | 563 | struct vcpu *v = current; |
3502 | 563 | struct domain *d = v->domain; |
3503 | 563 | int ret; |
3504 | 563 | |
3505 | 563 | HVMTRACE_3D(MSR_WRITE, msr, |
3506 | 563 | (uint32_t)msr_content, (uint32_t)(msr_content >> 32)); |
3507 | 563 | |
3508 | 565 | if ( may_defer && unlikely(monitored_msr(v->domain, msr)) ) |
3509 | 0 | { |
3510 | 0 | ASSERT(v->arch.vm_event); |
3511 | 0 |
|
3512 | 0 | /* The actual write will occur in hvm_do_resume() (if permitted). */ |
3513 | 0 | v->arch.vm_event->write_data.do_write.msr = 1; |
3514 | 0 | v->arch.vm_event->write_data.msr = msr; |
3515 | 0 | v->arch.vm_event->write_data.value = msr_content; |
3516 | 0 |
|
3517 | 0 | hvm_monitor_msr(msr, msr_content); |
3518 | 0 | return X86EMUL_OKAY; |
3519 | 0 | } |
3520 | 563 | |
3521 | 563 | if ( (ret = guest_wrmsr(v, msr, msr_content)) != X86EMUL_UNHANDLEABLE ) |
3522 | 0 | return ret; |
3523 | 563 | |
3524 | 563 | ret = X86EMUL_OKAY; |
3525 | 563 | |
3526 | 563 | switch ( msr ) |
3527 | 563 | { |
3528 | 0 | unsigned int index; |
3529 | 0 |
|
3530 | 36 | case MSR_EFER: |
3531 | 36 | if ( hvm_set_efer(msr_content) ) |
3532 | 0 | return X86EMUL_EXCEPTION; |
3533 | 36 | break; |
3534 | 36 | |
3535 | 0 | case MSR_IA32_TSC: |
3536 | 0 | hvm_set_guest_tsc_msr(v, msr_content); |
3537 | 0 | break; |
3538 | 36 | |
3539 | 0 | case MSR_IA32_TSC_ADJUST: |
3540 | 0 | hvm_set_guest_tsc_adjust(v, msr_content); |
3541 | 0 | break; |
3542 | 36 | |
3543 | 0 | case MSR_TSC_AUX: |
3544 | 0 | v->arch.hvm_vcpu.msr_tsc_aux = (uint32_t)msr_content; |
3545 | 0 | if ( cpu_has_rdtscp |
3546 | 0 | && (v->domain->arch.tsc_mode != TSC_MODE_PVRDTSCP) ) |
3547 | 0 | wrmsrl(MSR_TSC_AUX, (uint32_t)msr_content); |
3548 | 0 | break; |
3549 | 36 | |
3550 | 9 | case MSR_IA32_APICBASE: |
3551 | 9 | if ( !vlapic_msr_set(vcpu_vlapic(v), msr_content) ) |
3552 | 0 | goto gp_fault; |
3553 | 9 | break; |
3554 | 9 | |
3555 | 0 | case MSR_IA32_TSC_DEADLINE: |
3556 | 0 | vlapic_tdt_msr_set(vcpu_vlapic(v), msr_content); |
3557 | 0 | break; |
3558 | 9 | |
3559 | 129 | case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff: |
3560 | 129 | if ( hvm_x2apic_msr_write(v, msr, msr_content) ) |
3561 | 0 | goto gp_fault; |
3562 | 129 | break; |
3563 | 129 | |
3564 | 10 | case MSR_IA32_CR_PAT: |
3565 | 10 | if ( !hvm_set_guest_pat(v, msr_content) ) |
3566 | 0 | goto gp_fault; |
3567 | 10 | break; |
3568 | 10 | |
3569 | 0 | case MSR_MTRRcap: |
3570 | 0 | goto gp_fault; |
3571 | 10 | |
3572 | 34 | case MSR_MTRRdefType: |
3573 | 34 | if ( !d->arch.cpuid->basic.mtrr ) |
3574 | 0 | goto gp_fault; |
3575 | 34 | if ( !mtrr_def_type_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, |
3576 | 34 | msr_content) ) |
3577 | 0 | goto gp_fault; |
3578 | 34 | break; |
3579 | 0 | case MSR_MTRRfix64K_00000: |
3580 | 0 | if ( !d->arch.cpuid->basic.mtrr ) |
3581 | 0 | goto gp_fault; |
3582 | 0 | if ( !mtrr_fix_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, 0, |
3583 | 0 | msr_content) ) |
3584 | 0 | goto gp_fault; |
3585 | 0 | break; |
3586 | 0 | case MSR_MTRRfix16K_80000: |
3587 | 0 | case MSR_MTRRfix16K_A0000: |
3588 | 0 | if ( !d->arch.cpuid->basic.mtrr ) |
3589 | 0 | goto gp_fault; |
3590 | 0 | index = msr - MSR_MTRRfix16K_80000 + 1; |
3591 | 0 | if ( !mtrr_fix_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, |
3592 | 0 | index, msr_content) ) |
3593 | 0 | goto gp_fault; |
3594 | 0 | break; |
3595 | 0 | case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000: |
3596 | 0 | if ( !d->arch.cpuid->basic.mtrr ) |
3597 | 0 | goto gp_fault; |
3598 | 0 | index = msr - MSR_MTRRfix4K_C0000 + 3; |
3599 | 0 | if ( !mtrr_fix_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, |
3600 | 0 | index, msr_content) ) |
3601 | 0 | goto gp_fault; |
3602 | 0 | break; |
3603 | 176 | case MSR_IA32_MTRR_PHYSBASE(0)...MSR_IA32_MTRR_PHYSMASK(MTRR_VCNT-1): |
3604 | 176 | if ( !d->arch.cpuid->basic.mtrr ) |
3605 | 0 | goto gp_fault; |
3606 | 176 | if ( !mtrr_var_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, |
3607 | 176 | msr, msr_content) ) |
3608 | 0 | goto gp_fault; |
3609 | 176 | break; |
3610 | 176 | |
3611 | 0 | case MSR_IA32_XSS: |
3612 | 0 | /* No XSS features currently supported for guests. */ |
3613 | 0 | if ( !d->arch.cpuid->xstate.xsaves || msr_content != 0 ) |
3614 | 0 | goto gp_fault; |
3615 | 0 | v->arch.hvm_vcpu.msr_xss = msr_content; |
3616 | 0 | break; |
3617 | 0 |
|
3618 | 0 | case MSR_IA32_BNDCFGS: |
3619 | 0 | if ( !d->arch.cpuid->feat.mpx || |
3620 | 0 | !hvm_set_guest_bndcfgs(v, msr_content) ) |
3621 | 0 | goto gp_fault; |
3622 | 0 | break; |
3623 | 0 |
|
3624 | 0 | case MSR_AMD64_NB_CFG: |
3625 | 0 | /* ignore the write */ |
3626 | 0 | break; |
3627 | 0 |
|
3628 | 169 | default: |
3629 | 169 | if ( (ret = vmce_wrmsr(msr, msr_content)) < 0 ) |
3630 | 0 | goto gp_fault; |
3631 | 169 | /* If ret == 0 then this is not an MCE MSR, see other MSRs. */ |
3632 | 169 | ret = ((ret == 0) |
3633 | 50 | ? hvm_funcs.msr_write_intercept(msr, msr_content) |
3634 | 119 | : X86EMUL_OKAY); |
3635 | 169 | break; |
3636 | 563 | } |
3637 | 563 | |
3638 | 564 | return ret; |
3639 | 563 | |
3640 | 0 | gp_fault: |
3641 | 0 | return X86EMUL_EXCEPTION; |
3642 | 563 | } |
3643 | | |
3644 | | static bool is_sysdesc_access(const struct x86_emulate_state *state, |
3645 | | const struct x86_emulate_ctxt *ctxt) |
3646 | 0 | { |
3647 | 0 | unsigned int ext; |
3648 | 0 | int mode = x86_insn_modrm(state, NULL, &ext); |
3649 | 0 |
|
3650 | 0 | switch ( ctxt->opcode ) |
3651 | 0 | { |
3652 | 0 | case X86EMUL_OPC(0x0f, 0x00): |
3653 | 0 | if ( !(ext & 4) ) /* SLDT / STR / LLDT / LTR */ |
3654 | 0 | return true; |
3655 | 0 | break; |
3656 | 0 |
|
3657 | 0 | case X86EMUL_OPC(0x0f, 0x01): |
3658 | 0 | if ( mode != 3 && !(ext & 4) ) /* SGDT / SIDT / LGDT / LIDT */ |
3659 | 0 | return true; |
3660 | 0 | break; |
3661 | 0 | } |
3662 | 0 |
|
3663 | 0 | return false; |
3664 | 0 | } |
3665 | | |
3666 | | int hvm_descriptor_access_intercept(uint64_t exit_info, |
3667 | | uint64_t vmx_exit_qualification, |
3668 | | unsigned int descriptor, bool is_write) |
3669 | 0 | { |
3670 | 0 | struct vcpu *curr = current; |
3671 | 0 | struct domain *currd = curr->domain; |
3672 | 0 |
|
3673 | 0 | if ( currd->arch.monitor.descriptor_access_enabled ) |
3674 | 0 | { |
3675 | 0 | ASSERT(curr->arch.vm_event); |
3676 | 0 | hvm_monitor_descriptor_access(exit_info, vmx_exit_qualification, |
3677 | 0 | descriptor, is_write); |
3678 | 0 | } |
3679 | 0 | else if ( !hvm_emulate_one_insn(is_sysdesc_access, "sysdesc access") ) |
3680 | 0 | domain_crash(currd); |
3681 | 0 |
|
3682 | 0 | return X86EMUL_OKAY; |
3683 | 0 | } |
3684 | | |
3685 | | static bool is_cross_vendor(const struct x86_emulate_state *state, |
3686 | | const struct x86_emulate_ctxt *ctxt) |
3687 | 0 | { |
3688 | 0 | switch ( ctxt->opcode ) |
3689 | 0 | { |
3690 | 0 | case X86EMUL_OPC(0x0f, 0x05): /* syscall */ |
3691 | 0 | case X86EMUL_OPC(0x0f, 0x34): /* sysenter */ |
3692 | 0 | case X86EMUL_OPC(0x0f, 0x35): /* sysexit */ |
3693 | 0 | return true; |
3694 | 0 | } |
3695 | 0 |
|
3696 | 0 | return false; |
3697 | 0 | } |
3698 | | |
3699 | | void hvm_ud_intercept(struct cpu_user_regs *regs) |
3700 | 0 | { |
3701 | 0 | struct vcpu *cur = current; |
3702 | 0 | bool should_emulate = |
3703 | 0 | cur->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor; |
3704 | 0 | struct hvm_emulate_ctxt ctxt; |
3705 | 0 |
|
3706 | 0 | hvm_emulate_init_once(&ctxt, opt_hvm_fep ? NULL : is_cross_vendor, regs); |
3707 | 0 |
|
3708 | 0 | if ( opt_hvm_fep ) |
3709 | 0 | { |
3710 | 0 | const struct segment_register *cs = &ctxt.seg_reg[x86_seg_cs]; |
3711 | 0 | uint32_t walk = (ctxt.seg_reg[x86_seg_ss].dpl == 3) |
3712 | 0 | ? PFEC_user_mode : 0; |
3713 | 0 | unsigned long addr; |
3714 | 0 | char sig[5]; /* ud2; .ascii "xen" */ |
3715 | 0 |
|
3716 | 0 | if ( hvm_virtual_to_linear_addr(x86_seg_cs, cs, regs->rip, |
3717 | 0 | sizeof(sig), hvm_access_insn_fetch, |
3718 | 0 | cs, &addr) && |
3719 | 0 | (hvm_fetch_from_guest_linear(sig, addr, sizeof(sig), |
3720 | 0 | walk, NULL) == HVMTRANS_okay) && |
3721 | 0 | (memcmp(sig, "\xf\xbxen", sizeof(sig)) == 0) ) |
3722 | 0 | { |
3723 | 0 | regs->rip += sizeof(sig); |
3724 | 0 | regs->eflags &= ~X86_EFLAGS_RF; |
3725 | 0 |
|
3726 | 0 | /* Zero the upper 32 bits of %rip if not in 64bit mode. */ |
3727 | 0 | if ( !(hvm_long_mode_active(cur) && cs->l) ) |
3728 | 0 | regs->rip = regs->eip; |
3729 | 0 |
|
3730 | 0 | add_taint(TAINT_HVM_FEP); |
3731 | 0 |
|
3732 | 0 | should_emulate = true; |
3733 | 0 | } |
3734 | 0 | } |
3735 | 0 |
|
3736 | 0 | if ( !should_emulate ) |
3737 | 0 | { |
3738 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
3739 | 0 | return; |
3740 | 0 | } |
3741 | 0 |
|
3742 | 0 | switch ( hvm_emulate_one(&ctxt) ) |
3743 | 0 | { |
3744 | 0 | case X86EMUL_UNHANDLEABLE: |
3745 | 0 | case X86EMUL_UNIMPLEMENTED: |
3746 | 0 | hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); |
3747 | 0 | break; |
3748 | 0 | case X86EMUL_EXCEPTION: |
3749 | 0 | hvm_inject_event(&ctxt.ctxt.event); |
3750 | 0 | /* fall through */ |
3751 | 0 | default: |
3752 | 0 | hvm_emulate_writeback(&ctxt); |
3753 | 0 | break; |
3754 | 0 | } |
3755 | 0 | } |
3756 | | |
3757 | | enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack) |
3758 | 2.04M | { |
3759 | 2.04M | unsigned long intr_shadow; |
3760 | 2.04M | |
3761 | 2.04M | ASSERT(v == current); |
3762 | 2.04M | |
3763 | 2.04M | if ( nestedhvm_enabled(v->domain) ) { |
3764 | 0 | enum hvm_intblk intr; |
3765 | 0 |
|
3766 | 0 | intr = nhvm_interrupt_blocked(v); |
3767 | 0 | if ( intr != hvm_intblk_none ) |
3768 | 0 | return intr; |
3769 | 0 | } |
3770 | 2.04M | |
3771 | 2.04M | if ( (intack.source != hvm_intsrc_nmi) && |
3772 | 2.03M | !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) ) |
3773 | 1.89M | return hvm_intblk_rflags_ie; |
3774 | 2.04M | |
3775 | 152k | intr_shadow = hvm_funcs.get_interrupt_shadow(v); |
3776 | 152k | |
3777 | 152k | if ( intr_shadow & (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) ) |
3778 | 0 | return hvm_intblk_shadow; |
3779 | 152k | |
3780 | 152k | if ( intack.source == hvm_intsrc_nmi ) |
3781 | 0 | return ((intr_shadow & HVM_INTR_SHADOW_NMI) ? |
3782 | 0 | hvm_intblk_nmi_iret : hvm_intblk_none); |
3783 | 152k | |
3784 | 152k | if ( intack.source == hvm_intsrc_lapic ) |
3785 | 4.07k | { |
3786 | 4.07k | uint32_t tpr = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xF0; |
3787 | 4.07k | if ( (tpr >> 4) >= (intack.vector >> 4) ) |
3788 | 0 | return hvm_intblk_tpr; |
3789 | 4.07k | } |
3790 | 152k | |
3791 | 152k | return hvm_intblk_none; |
3792 | 152k | } |
3793 | | |
3794 | | static void hvm_latch_shinfo_size(struct domain *d) |
3795 | 3 | { |
3796 | 3 | /* |
3797 | 3 | * Called from operations which are among the very first executed by |
3798 | 3 | * PV drivers on initialisation or after save/restore. These are sensible |
3799 | 3 | * points at which to sample the execution mode of the guest and latch |
3800 | 3 | * 32- or 64-bit format for shared state. |
3801 | 3 | */ |
3802 | 3 | if ( current->domain == d ) |
3803 | 3 | { |
3804 | 3 | d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8); |
3805 | 3 | /* |
3806 | 3 | * Make sure that the timebase in the shared info structure is correct. |
3807 | 3 | * |
3808 | 3 | * If the bit-ness changed we should arguably try to convert the other |
3809 | 3 | * fields as well, but that's much more problematic (e.g. what do you |
3810 | 3 | * do if you're going from 64 bit to 32 bit and there's an event |
3811 | 3 | * channel pending which doesn't exist in the 32 bit version?). Just |
3812 | 3 | * setting the wallclock time seems to be sufficient for everything |
3813 | 3 | * we do, even if it is a bit of a hack. |
3814 | 3 | */ |
3815 | 3 | update_domain_wallclock_time(d); |
3816 | 3 | } |
3817 | 3 | } |
3818 | | |
3819 | | /* Initialise a hypercall transfer page for a VMX domain using |
3820 | | paravirtualised drivers. */ |
3821 | | void hvm_hypercall_page_initialise(struct domain *d, |
3822 | | void *hypercall_page) |
3823 | 2 | { |
3824 | 2 | hvm_latch_shinfo_size(d); |
3825 | 2 | hvm_funcs.init_hypercall_page(d, hypercall_page); |
3826 | 2 | } |
3827 | | |
3828 | | void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip) |
3829 | 22 | { |
3830 | 22 | struct domain *d = v->domain; |
3831 | 22 | struct segment_register reg; |
3832 | 22 | typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt; |
3833 | 22 | |
3834 | 22 | domain_lock(d); |
3835 | 22 | |
3836 | 22 | if ( v->is_initialised ) |
3837 | 11 | goto out; |
3838 | 22 | |
3839 | 11 | if ( !paging_mode_hap(d) ) |
3840 | 0 | { |
3841 | 0 | if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG ) |
3842 | 0 | put_page(pagetable_get_page(v->arch.guest_table)); |
3843 | 0 | v->arch.guest_table = pagetable_null(); |
3844 | 0 | } |
3845 | 11 | |
3846 | 11 | memset(fpu_ctxt, 0, sizeof(*fpu_ctxt)); |
3847 | 11 | fpu_ctxt->fcw = FCW_RESET; |
3848 | 11 | fpu_ctxt->mxcsr = MXCSR_DEFAULT; |
3849 | 11 | if ( v->arch.xsave_area ) |
3850 | 11 | { |
3851 | 11 | v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP; |
3852 | 11 | v->arch.xsave_area->xsave_hdr.xcomp_bv = 0; |
3853 | 11 | } |
3854 | 11 | |
3855 | 11 | v->arch.vgc_flags = VGCF_online; |
3856 | 11 | memset(&v->arch.user_regs, 0, sizeof(v->arch.user_regs)); |
3857 | 11 | v->arch.user_regs.rflags = X86_EFLAGS_MBS; |
3858 | 11 | v->arch.user_regs.rdx = 0x00000f00; |
3859 | 11 | v->arch.user_regs.rip = ip; |
3860 | 11 | memset(&v->arch.debugreg, 0, sizeof(v->arch.debugreg)); |
3861 | 11 | |
3862 | 11 | v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET; |
3863 | 11 | hvm_update_guest_cr(v, 0); |
3864 | 11 | |
3865 | 11 | v->arch.hvm_vcpu.guest_cr[2] = 0; |
3866 | 11 | hvm_update_guest_cr(v, 2); |
3867 | 11 | |
3868 | 11 | v->arch.hvm_vcpu.guest_cr[3] = 0; |
3869 | 11 | hvm_update_guest_cr(v, 3); |
3870 | 11 | |
3871 | 11 | v->arch.hvm_vcpu.guest_cr[4] = 0; |
3872 | 11 | hvm_update_guest_cr(v, 4); |
3873 | 11 | |
3874 | 11 | v->arch.hvm_vcpu.guest_efer = 0; |
3875 | 11 | hvm_update_guest_efer(v); |
3876 | 11 | |
3877 | 11 | reg.sel = cs; |
3878 | 11 | reg.base = (uint32_t)reg.sel << 4; |
3879 | 11 | reg.limit = 0xffff; |
3880 | 11 | reg.attr = 0x9b; |
3881 | 11 | hvm_set_segment_register(v, x86_seg_cs, ®); |
3882 | 11 | |
3883 | 11 | reg.sel = reg.base = 0; |
3884 | 11 | reg.limit = 0xffff; |
3885 | 11 | reg.attr = 0x93; |
3886 | 11 | hvm_set_segment_register(v, x86_seg_ds, ®); |
3887 | 11 | hvm_set_segment_register(v, x86_seg_es, ®); |
3888 | 11 | hvm_set_segment_register(v, x86_seg_fs, ®); |
3889 | 11 | hvm_set_segment_register(v, x86_seg_gs, ®); |
3890 | 11 | hvm_set_segment_register(v, x86_seg_ss, ®); |
3891 | 11 | |
3892 | 11 | reg.attr = 0x82; /* LDT */ |
3893 | 11 | hvm_set_segment_register(v, x86_seg_ldtr, ®); |
3894 | 11 | |
3895 | 11 | reg.attr = 0x8b; /* 32-bit TSS (busy) */ |
3896 | 11 | hvm_set_segment_register(v, x86_seg_tr, ®); |
3897 | 11 | |
3898 | 11 | reg.attr = 0; |
3899 | 11 | hvm_set_segment_register(v, x86_seg_gdtr, ®); |
3900 | 11 | hvm_set_segment_register(v, x86_seg_idtr, ®); |
3901 | 11 | |
3902 | 11 | if ( hvm_funcs.tsc_scaling.setup ) |
3903 | 11 | hvm_funcs.tsc_scaling.setup(v); |
3904 | 11 | |
3905 | 11 | /* Sync AP's TSC with BSP's. */ |
3906 | 11 | v->arch.hvm_vcpu.cache_tsc_offset = |
3907 | 11 | v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset; |
3908 | 11 | hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, |
3909 | 11 | d->arch.hvm_domain.sync_tsc); |
3910 | 11 | |
3911 | 11 | v->arch.hvm_vcpu.msr_tsc_adjust = 0; |
3912 | 11 | |
3913 | 11 | paging_update_paging_modes(v); |
3914 | 11 | |
3915 | 11 | v->arch.flags |= TF_kernel_mode; |
3916 | 11 | v->is_initialised = 1; |
3917 | 11 | clear_bit(_VPF_down, &v->pause_flags); |
3918 | 11 | |
3919 | 22 | out: |
3920 | 22 | domain_unlock(d); |
3921 | 22 | } |
3922 | | |
3923 | | static void hvm_s3_suspend(struct domain *d) |
3924 | 0 | { |
3925 | 0 | struct vcpu *v; |
3926 | 0 |
|
3927 | 0 | domain_pause(d); |
3928 | 0 | domain_lock(d); |
3929 | 0 |
|
3930 | 0 | if ( d->is_dying || (d->vcpu == NULL) || (d->vcpu[0] == NULL) || |
3931 | 0 | test_and_set_bool(d->arch.hvm_domain.is_s3_suspended) ) |
3932 | 0 | { |
3933 | 0 | domain_unlock(d); |
3934 | 0 | domain_unpause(d); |
3935 | 0 | return; |
3936 | 0 | } |
3937 | 0 |
|
3938 | 0 | for_each_vcpu ( d, v ) |
3939 | 0 | { |
3940 | 0 | int rc; |
3941 | 0 |
|
3942 | 0 | vlapic_reset(vcpu_vlapic(v)); |
3943 | 0 | rc = vcpu_reset(v); |
3944 | 0 | ASSERT(!rc); |
3945 | 0 | } |
3946 | 0 |
|
3947 | 0 | vpic_reset(d); |
3948 | 0 | vioapic_reset(d); |
3949 | 0 | pit_reset(d); |
3950 | 0 | rtc_reset(d); |
3951 | 0 | pmtimer_reset(d); |
3952 | 0 | hpet_reset(d); |
3953 | 0 |
|
3954 | 0 | hvm_vcpu_reset_state(d->vcpu[0], 0xf000, 0xfff0); |
3955 | 0 |
|
3956 | 0 | domain_unlock(d); |
3957 | 0 | } |
3958 | | |
3959 | | static void hvm_s3_resume(struct domain *d) |
3960 | 0 | { |
3961 | 0 | if ( test_and_clear_bool(d->arch.hvm_domain.is_s3_suspended) ) |
3962 | 0 | { |
3963 | 0 | struct vcpu *v; |
3964 | 0 |
|
3965 | 0 | for_each_vcpu( d, v ) |
3966 | 0 | hvm_set_guest_tsc(v, 0); |
3967 | 0 | domain_unpause(d); |
3968 | 0 | } |
3969 | 0 | } |
3970 | | |
3971 | | static int hvmop_flush_tlb_all(void) |
3972 | 0 | { |
3973 | 0 | struct domain *d = current->domain; |
3974 | 0 | struct vcpu *v; |
3975 | 0 |
|
3976 | 0 | if ( !is_hvm_domain(d) ) |
3977 | 0 | return -EINVAL; |
3978 | 0 |
|
3979 | 0 | /* Avoid deadlock if more than one vcpu tries this at the same time. */ |
3980 | 0 | if ( !spin_trylock(&d->hypercall_deadlock_mutex) ) |
3981 | 0 | return -ERESTART; |
3982 | 0 |
|
3983 | 0 | /* Pause all other vcpus. */ |
3984 | 0 | for_each_vcpu ( d, v ) |
3985 | 0 | if ( v != current ) |
3986 | 0 | vcpu_pause_nosync(v); |
3987 | 0 |
|
3988 | 0 | /* Now that all VCPUs are signalled to deschedule, we wait... */ |
3989 | 0 | for_each_vcpu ( d, v ) |
3990 | 0 | if ( v != current ) |
3991 | 0 | while ( !vcpu_runnable(v) && v->is_running ) |
3992 | 0 | cpu_relax(); |
3993 | 0 |
|
3994 | 0 | /* All other vcpus are paused, safe to unlock now. */ |
3995 | 0 | spin_unlock(&d->hypercall_deadlock_mutex); |
3996 | 0 |
|
3997 | 0 | /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */ |
3998 | 0 | for_each_vcpu ( d, v ) |
3999 | 0 | paging_update_cr3(v); |
4000 | 0 |
|
4001 | 0 | /* Flush all dirty TLBs. */ |
4002 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
4003 | 0 |
|
4004 | 0 | /* Done. */ |
4005 | 0 | for_each_vcpu ( d, v ) |
4006 | 0 | if ( v != current ) |
4007 | 0 | vcpu_unpause(v); |
4008 | 0 |
|
4009 | 0 | return 0; |
4010 | 0 | } |
4011 | | |
4012 | | static int hvmop_set_evtchn_upcall_vector( |
4013 | | XEN_GUEST_HANDLE_PARAM(xen_hvm_evtchn_upcall_vector_t) uop) |
4014 | 0 | { |
4015 | 0 | xen_hvm_evtchn_upcall_vector_t op; |
4016 | 0 | struct domain *d = current->domain; |
4017 | 0 | struct vcpu *v; |
4018 | 0 |
|
4019 | 0 | if ( !is_hvm_domain(d) ) |
4020 | 0 | return -EINVAL; |
4021 | 0 |
|
4022 | 0 | if ( copy_from_guest(&op, uop, 1) ) |
4023 | 0 | return -EFAULT; |
4024 | 0 |
|
4025 | 0 | if ( op.vector < 0x10 ) |
4026 | 0 | return -EINVAL; |
4027 | 0 |
|
4028 | 0 | if ( op.vcpu >= d->max_vcpus || (v = d->vcpu[op.vcpu]) == NULL ) |
4029 | 0 | return -ENOENT; |
4030 | 0 |
|
4031 | 0 | printk(XENLOG_G_INFO "%pv: upcall vector %02x\n", v, op.vector); |
4032 | 0 |
|
4033 | 0 | v->arch.hvm_vcpu.evtchn_upcall_vector = op.vector; |
4034 | 0 | return 0; |
4035 | 0 | } |
4036 | | |
4037 | | static int hvm_allow_set_param(struct domain *d, |
4038 | | const struct xen_hvm_param *a) |
4039 | 1 | { |
4040 | 1 | uint64_t value = d->arch.hvm_domain.params[a->index]; |
4041 | 1 | int rc; |
4042 | 1 | |
4043 | 1 | rc = xsm_hvm_param(XSM_TARGET, d, HVMOP_set_param); |
4044 | 1 | if ( rc ) |
4045 | 0 | return rc; |
4046 | 1 | |
4047 | 1 | switch ( a->index ) |
4048 | 1 | { |
4049 | 1 | /* The following parameters can be set by the guest. */ |
4050 | 1 | case HVM_PARAM_CALLBACK_IRQ: |
4051 | 1 | case HVM_PARAM_VM86_TSS: |
4052 | 1 | case HVM_PARAM_VM86_TSS_SIZED: |
4053 | 1 | case HVM_PARAM_ACPI_IOPORTS_LOCATION: |
4054 | 1 | case HVM_PARAM_VM_GENERATION_ID_ADDR: |
4055 | 1 | case HVM_PARAM_STORE_EVTCHN: |
4056 | 1 | case HVM_PARAM_CONSOLE_EVTCHN: |
4057 | 1 | case HVM_PARAM_X87_FIP_WIDTH: |
4058 | 1 | break; |
4059 | 1 | /* |
4060 | 1 | * The following parameters must not be set by the guest |
4061 | 1 | * since the domain may need to be paused. |
4062 | 1 | */ |
4063 | 0 | case HVM_PARAM_IDENT_PT: |
4064 | 0 | case HVM_PARAM_DM_DOMAIN: |
4065 | 0 | case HVM_PARAM_ACPI_S_STATE: |
4066 | 0 | /* The remaining parameters should not be set by the guest. */ |
4067 | 0 | default: |
4068 | 0 | if ( d == current->domain ) |
4069 | 0 | rc = -EPERM; |
4070 | 0 | break; |
4071 | 1 | } |
4072 | 1 | |
4073 | 1 | if ( rc ) |
4074 | 0 | return rc; |
4075 | 1 | |
4076 | 1 | switch ( a->index ) |
4077 | 1 | { |
4078 | 1 | /* The following parameters should only be changed once. */ |
4079 | 0 | case HVM_PARAM_VIRIDIAN: |
4080 | 0 | case HVM_PARAM_IOREQ_SERVER_PFN: |
4081 | 0 | case HVM_PARAM_NR_IOREQ_SERVER_PAGES: |
4082 | 0 | case HVM_PARAM_ALTP2M: |
4083 | 0 | case HVM_PARAM_MCA_CAP: |
4084 | 0 | if ( value != 0 && a->value != value ) |
4085 | 0 | rc = -EEXIST; |
4086 | 0 | break; |
4087 | 1 | default: |
4088 | 1 | break; |
4089 | 1 | } |
4090 | 1 | |
4091 | 1 | return rc; |
4092 | 1 | } |
4093 | | |
4094 | | static int hvmop_set_param( |
4095 | | XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg) |
4096 | 1 | { |
4097 | 1 | struct domain *curr_d = current->domain; |
4098 | 1 | struct xen_hvm_param a; |
4099 | 1 | struct domain *d; |
4100 | 1 | struct vcpu *v; |
4101 | 1 | int rc; |
4102 | 1 | |
4103 | 1 | if ( copy_from_guest(&a, arg, 1) ) |
4104 | 0 | return -EFAULT; |
4105 | 1 | |
4106 | 1 | if ( a.index >= HVM_NR_PARAMS ) |
4107 | 0 | return -EINVAL; |
4108 | 1 | |
4109 | 1 | d = rcu_lock_domain_by_any_id(a.domid); |
4110 | 1 | if ( d == NULL ) |
4111 | 0 | return -ESRCH; |
4112 | 1 | |
4113 | 1 | rc = -EINVAL; |
4114 | 1 | if ( !is_hvm_domain(d) ) |
4115 | 0 | goto out; |
4116 | 1 | |
4117 | 1 | rc = hvm_allow_set_param(d, &a); |
4118 | 1 | if ( rc ) |
4119 | 0 | goto out; |
4120 | 1 | |
4121 | 1 | switch ( a.index ) |
4122 | 1 | { |
4123 | 1 | case HVM_PARAM_CALLBACK_IRQ: |
4124 | 1 | hvm_set_callback_via(d, a.value); |
4125 | 1 | hvm_latch_shinfo_size(d); |
4126 | 1 | break; |
4127 | 0 | case HVM_PARAM_TIMER_MODE: |
4128 | 0 | if ( a.value > HVMPTM_one_missed_tick_pending ) |
4129 | 0 | rc = -EINVAL; |
4130 | 0 | break; |
4131 | 0 | case HVM_PARAM_VIRIDIAN: |
4132 | 0 | if ( (a.value & ~HVMPV_feature_mask) || |
4133 | 0 | !(a.value & HVMPV_base_freq) ) |
4134 | 0 | rc = -EINVAL; |
4135 | 0 | break; |
4136 | 0 | case HVM_PARAM_IDENT_PT: |
4137 | 0 | /* |
4138 | 0 | * Only actually required for VT-x lacking unrestricted_guest |
4139 | 0 | * capabilities. Short circuit the pause if possible. |
4140 | 0 | */ |
4141 | 0 | if ( !paging_mode_hap(d) || !cpu_has_vmx ) |
4142 | 0 | { |
4143 | 0 | d->arch.hvm_domain.params[a.index] = a.value; |
4144 | 0 | break; |
4145 | 0 | } |
4146 | 0 |
|
4147 | 0 | /* |
4148 | 0 | * Update GUEST_CR3 in each VMCS to point at identity map. |
4149 | 0 | * All foreign updates to guest state must synchronise on |
4150 | 0 | * the domctl_lock. |
4151 | 0 | */ |
4152 | 0 | rc = -ERESTART; |
4153 | 0 | if ( !domctl_lock_acquire() ) |
4154 | 0 | break; |
4155 | 0 |
|
4156 | 0 | rc = 0; |
4157 | 0 | domain_pause(d); |
4158 | 0 | d->arch.hvm_domain.params[a.index] = a.value; |
4159 | 0 | for_each_vcpu ( d, v ) |
4160 | 0 | paging_update_cr3(v); |
4161 | 0 | domain_unpause(d); |
4162 | 0 |
|
4163 | 0 | domctl_lock_release(); |
4164 | 0 | break; |
4165 | 0 | case HVM_PARAM_DM_DOMAIN: |
4166 | 0 | if ( a.value == DOMID_SELF ) |
4167 | 0 | a.value = curr_d->domain_id; |
4168 | 0 |
|
4169 | 0 | rc = hvm_set_dm_domain(d, a.value); |
4170 | 0 | break; |
4171 | 0 | case HVM_PARAM_ACPI_S_STATE: |
4172 | 0 | rc = 0; |
4173 | 0 | if ( a.value == 3 ) |
4174 | 0 | hvm_s3_suspend(d); |
4175 | 0 | else if ( a.value == 0 ) |
4176 | 0 | hvm_s3_resume(d); |
4177 | 0 | else |
4178 | 0 | rc = -EINVAL; |
4179 | 0 |
|
4180 | 0 | break; |
4181 | 0 | case HVM_PARAM_ACPI_IOPORTS_LOCATION: |
4182 | 0 | rc = pmtimer_change_ioport(d, a.value); |
4183 | 0 | break; |
4184 | 0 | case HVM_PARAM_MEMORY_EVENT_CR0: |
4185 | 0 | case HVM_PARAM_MEMORY_EVENT_CR3: |
4186 | 0 | case HVM_PARAM_MEMORY_EVENT_CR4: |
4187 | 0 | case HVM_PARAM_MEMORY_EVENT_INT3: |
4188 | 0 | case HVM_PARAM_MEMORY_EVENT_SINGLE_STEP: |
4189 | 0 | case HVM_PARAM_MEMORY_EVENT_MSR: |
4190 | 0 | /* Deprecated */ |
4191 | 0 | rc = -EOPNOTSUPP; |
4192 | 0 | break; |
4193 | 0 | case HVM_PARAM_NESTEDHVM: |
4194 | 0 | rc = xsm_hvm_param_nested(XSM_PRIV, d); |
4195 | 0 | if ( rc ) |
4196 | 0 | break; |
4197 | 0 | if ( a.value > 1 ) |
4198 | 0 | rc = -EINVAL; |
4199 | 0 | /* |
4200 | 0 | * Remove the check below once we have |
4201 | 0 | * shadow-on-shadow. |
4202 | 0 | */ |
4203 | 0 | if ( !paging_mode_hap(d) && a.value ) |
4204 | 0 | rc = -EINVAL; |
4205 | 0 | if ( a.value && |
4206 | 0 | d->arch.hvm_domain.params[HVM_PARAM_ALTP2M] ) |
4207 | 0 | rc = -EINVAL; |
4208 | 0 | /* Set up NHVM state for any vcpus that are already up. */ |
4209 | 0 | if ( a.value && |
4210 | 0 | !d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM] ) |
4211 | 0 | for_each_vcpu(d, v) |
4212 | 0 | if ( rc == 0 ) |
4213 | 0 | rc = nestedhvm_vcpu_initialise(v); |
4214 | 0 | if ( !a.value || rc ) |
4215 | 0 | for_each_vcpu(d, v) |
4216 | 0 | nestedhvm_vcpu_destroy(v); |
4217 | 0 | break; |
4218 | 0 | case HVM_PARAM_ALTP2M: |
4219 | 0 | rc = xsm_hvm_param_altp2mhvm(XSM_PRIV, d); |
4220 | 0 | if ( rc ) |
4221 | 0 | break; |
4222 | 0 | if ( a.value > XEN_ALTP2M_limited ) |
4223 | 0 | rc = -EINVAL; |
4224 | 0 | if ( a.value && |
4225 | 0 | d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM] ) |
4226 | 0 | rc = -EINVAL; |
4227 | 0 | break; |
4228 | 0 | case HVM_PARAM_BUFIOREQ_EVTCHN: |
4229 | 0 | rc = -EINVAL; |
4230 | 0 | break; |
4231 | 0 | case HVM_PARAM_TRIPLE_FAULT_REASON: |
4232 | 0 | if ( a.value > SHUTDOWN_MAX ) |
4233 | 0 | rc = -EINVAL; |
4234 | 0 | break; |
4235 | 0 | case HVM_PARAM_IOREQ_SERVER_PFN: |
4236 | 0 | d->arch.hvm_domain.ioreq_gfn.base = a.value; |
4237 | 0 | break; |
4238 | 0 | case HVM_PARAM_NR_IOREQ_SERVER_PAGES: |
4239 | 0 | { |
4240 | 0 | unsigned int i; |
4241 | 0 |
|
4242 | 0 | if ( a.value == 0 || |
4243 | 0 | a.value > sizeof(d->arch.hvm_domain.ioreq_gfn.mask) * 8 ) |
4244 | 0 | { |
4245 | 0 | rc = -EINVAL; |
4246 | 0 | break; |
4247 | 0 | } |
4248 | 0 | for ( i = 0; i < a.value; i++ ) |
4249 | 0 | set_bit(i, &d->arch.hvm_domain.ioreq_gfn.mask); |
4250 | 0 |
|
4251 | 0 | break; |
4252 | 0 | } |
4253 | 0 | case HVM_PARAM_X87_FIP_WIDTH: |
4254 | 0 | if ( a.value != 0 && a.value != 4 && a.value != 8 ) |
4255 | 0 | { |
4256 | 0 | rc = -EINVAL; |
4257 | 0 | break; |
4258 | 0 | } |
4259 | 0 | d->arch.x87_fip_width = a.value; |
4260 | 0 | break; |
4261 | 0 |
|
4262 | 0 | case HVM_PARAM_VM86_TSS: |
4263 | 0 | /* Hardware would silently truncate high bits. */ |
4264 | 0 | if ( a.value != (uint32_t)a.value ) |
4265 | 0 | { |
4266 | 0 | if ( d == curr_d ) |
4267 | 0 | domain_crash(d); |
4268 | 0 | rc = -EINVAL; |
4269 | 0 | } |
4270 | 0 | /* Old hvmloader binaries hardcode the size to 128 bytes. */ |
4271 | 0 | if ( a.value ) |
4272 | 0 | a.value |= (128ULL << 32) | VM86_TSS_UPDATED; |
4273 | 0 | a.index = HVM_PARAM_VM86_TSS_SIZED; |
4274 | 0 | break; |
4275 | 0 |
|
4276 | 0 | case HVM_PARAM_VM86_TSS_SIZED: |
4277 | 0 | if ( (a.value >> 32) < sizeof(struct tss32) ) |
4278 | 0 | { |
4279 | 0 | if ( d == curr_d ) |
4280 | 0 | domain_crash(d); |
4281 | 0 | rc = -EINVAL; |
4282 | 0 | } |
4283 | 0 | /* |
4284 | 0 | * Cap at the theoretically useful maximum (base structure plus |
4285 | 0 | * 256 bits interrupt redirection bitmap + 64k bits I/O bitmap |
4286 | 0 | * plus one padding byte). |
4287 | 0 | */ |
4288 | 0 | if ( (a.value >> 32) > sizeof(struct tss32) + |
4289 | 0 | (0x100 / 8) + (0x10000 / 8) + 1 ) |
4290 | 0 | a.value = (uint32_t)a.value | |
4291 | 0 | ((sizeof(struct tss32) + (0x100 / 8) + |
4292 | 0 | (0x10000 / 8) + 1) << 32); |
4293 | 0 | a.value |= VM86_TSS_UPDATED; |
4294 | 0 | break; |
4295 | 0 |
|
4296 | 0 | case HVM_PARAM_MCA_CAP: |
4297 | 0 | rc = vmce_enable_mca_cap(d, a.value); |
4298 | 0 | break; |
4299 | 1 | } |
4300 | 1 | |
4301 | 1 | if ( rc != 0 ) |
4302 | 0 | goto out; |
4303 | 1 | |
4304 | 1 | d->arch.hvm_domain.params[a.index] = a.value; |
4305 | 1 | |
4306 | 1 | HVM_DBG_LOG(DBG_LEVEL_HCALL, "set param %u = %"PRIx64, |
4307 | 1 | a.index, a.value); |
4308 | 1 | |
4309 | 1 | out: |
4310 | 1 | rcu_unlock_domain(d); |
4311 | 1 | return rc; |
4312 | 1 | } |
4313 | | |
4314 | | static int hvm_allow_get_param(struct domain *d, |
4315 | | const struct xen_hvm_param *a) |
4316 | 3 | { |
4317 | 3 | int rc; |
4318 | 3 | |
4319 | 3 | rc = xsm_hvm_param(XSM_TARGET, d, HVMOP_get_param); |
4320 | 3 | if ( rc ) |
4321 | 0 | return rc; |
4322 | 3 | |
4323 | 3 | switch ( a->index ) |
4324 | 3 | { |
4325 | 3 | /* The following parameters can be read by the guest. */ |
4326 | 3 | case HVM_PARAM_CALLBACK_IRQ: |
4327 | 3 | case HVM_PARAM_VM86_TSS: |
4328 | 3 | case HVM_PARAM_VM86_TSS_SIZED: |
4329 | 3 | case HVM_PARAM_ACPI_IOPORTS_LOCATION: |
4330 | 3 | case HVM_PARAM_VM_GENERATION_ID_ADDR: |
4331 | 3 | case HVM_PARAM_STORE_PFN: |
4332 | 3 | case HVM_PARAM_STORE_EVTCHN: |
4333 | 3 | case HVM_PARAM_CONSOLE_PFN: |
4334 | 3 | case HVM_PARAM_CONSOLE_EVTCHN: |
4335 | 3 | case HVM_PARAM_ALTP2M: |
4336 | 3 | case HVM_PARAM_X87_FIP_WIDTH: |
4337 | 3 | break; |
4338 | 3 | /* |
4339 | 3 | * The following parameters must not be read by the guest |
4340 | 3 | * since the domain may need to be paused. |
4341 | 3 | */ |
4342 | 0 | case HVM_PARAM_IOREQ_PFN: |
4343 | 0 | case HVM_PARAM_BUFIOREQ_PFN: |
4344 | 0 | case HVM_PARAM_BUFIOREQ_EVTCHN: |
4345 | 0 | /* The remaining parameters should not be read by the guest. */ |
4346 | 0 | default: |
4347 | 0 | if ( d == current->domain ) |
4348 | 0 | rc = -EPERM; |
4349 | 0 | break; |
4350 | 3 | } |
4351 | 3 | |
4352 | 3 | return rc; |
4353 | 3 | } |
4354 | | |
4355 | | static int hvmop_get_param( |
4356 | | XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg) |
4357 | 3 | { |
4358 | 3 | struct xen_hvm_param a; |
4359 | 3 | struct domain *d; |
4360 | 3 | int rc; |
4361 | 3 | |
4362 | 3 | if ( copy_from_guest(&a, arg, 1) ) |
4363 | 0 | return -EFAULT; |
4364 | 3 | |
4365 | 3 | if ( a.index >= HVM_NR_PARAMS ) |
4366 | 0 | return -EINVAL; |
4367 | 3 | |
4368 | 3 | d = rcu_lock_domain_by_any_id(a.domid); |
4369 | 3 | if ( d == NULL ) |
4370 | 0 | return -ESRCH; |
4371 | 3 | |
4372 | 3 | rc = -EINVAL; |
4373 | 3 | if ( !is_hvm_domain(d) ) |
4374 | 0 | goto out; |
4375 | 3 | |
4376 | 3 | rc = hvm_allow_get_param(d, &a); |
4377 | 3 | if ( rc ) |
4378 | 0 | goto out; |
4379 | 3 | |
4380 | 3 | switch ( a.index ) |
4381 | 3 | { |
4382 | 0 | case HVM_PARAM_ACPI_S_STATE: |
4383 | 0 | a.value = d->arch.hvm_domain.is_s3_suspended ? 3 : 0; |
4384 | 0 | break; |
4385 | 0 |
|
4386 | 0 | case HVM_PARAM_VM86_TSS: |
4387 | 0 | a.value = (uint32_t)d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZED]; |
4388 | 0 | break; |
4389 | 0 |
|
4390 | 0 | case HVM_PARAM_VM86_TSS_SIZED: |
4391 | 0 | a.value = d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZED] & |
4392 | 0 | ~VM86_TSS_UPDATED; |
4393 | 0 | break; |
4394 | 0 |
|
4395 | 0 | case HVM_PARAM_X87_FIP_WIDTH: |
4396 | 0 | a.value = d->arch.x87_fip_width; |
4397 | 0 | break; |
4398 | 0 | case HVM_PARAM_IOREQ_PFN: |
4399 | 0 | case HVM_PARAM_BUFIOREQ_PFN: |
4400 | 0 | case HVM_PARAM_BUFIOREQ_EVTCHN: |
4401 | 0 | /* |
4402 | 0 | * It may be necessary to create a default ioreq server here, |
4403 | 0 | * because legacy versions of QEMU are not aware of the new API for |
4404 | 0 | * explicit ioreq server creation. However, if the domain is not |
4405 | 0 | * under construction then it will not be QEMU querying the |
4406 | 0 | * parameters and thus the query should not have that side-effect. |
4407 | 0 | */ |
4408 | 0 | if ( !d->creation_finished ) |
4409 | 0 | { |
4410 | 0 | domid_t domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN]; |
4411 | 0 |
|
4412 | 0 | rc = hvm_create_ioreq_server(d, domid, true, |
4413 | 0 | HVM_IOREQSRV_BUFIOREQ_LEGACY, NULL); |
4414 | 0 | if ( rc != 0 && rc != -EEXIST ) |
4415 | 0 | goto out; |
4416 | 0 | } |
4417 | 0 |
|
4418 | 0 | /*FALLTHRU*/ |
4419 | 3 | default: |
4420 | 3 | a.value = d->arch.hvm_domain.params[a.index]; |
4421 | 3 | break; |
4422 | 3 | } |
4423 | 3 | |
4424 | 3 | rc = __copy_to_guest(arg, &a, 1) ? -EFAULT : 0; |
4425 | 3 | |
4426 | 3 | HVM_DBG_LOG(DBG_LEVEL_HCALL, "get param %u = %"PRIx64, |
4427 | 3 | a.index, a.value); |
4428 | 3 | |
4429 | 3 | out: |
4430 | 3 | rcu_unlock_domain(d); |
4431 | 3 | return rc; |
4432 | 3 | } |
4433 | | |
4434 | | static int do_altp2m_op( |
4435 | | XEN_GUEST_HANDLE_PARAM(void) arg) |
4436 | 0 | { |
4437 | 0 | struct xen_hvm_altp2m_op a; |
4438 | 0 | struct domain *d = NULL; |
4439 | 0 | int rc = 0; |
4440 | 0 | uint64_t mode; |
4441 | 0 |
|
4442 | 0 | if ( !hvm_altp2m_supported() ) |
4443 | 0 | return -EOPNOTSUPP; |
4444 | 0 |
|
4445 | 0 | if ( copy_from_guest(&a, arg, 1) ) |
4446 | 0 | return -EFAULT; |
4447 | 0 |
|
4448 | 0 | if ( a.pad1 || a.pad2 || |
4449 | 0 | (a.version != HVMOP_ALTP2M_INTERFACE_VERSION) ) |
4450 | 0 | return -EINVAL; |
4451 | 0 |
|
4452 | 0 | switch ( a.cmd ) |
4453 | 0 | { |
4454 | 0 | case HVMOP_altp2m_get_domain_state: |
4455 | 0 | case HVMOP_altp2m_set_domain_state: |
4456 | 0 | case HVMOP_altp2m_vcpu_enable_notify: |
4457 | 0 | case HVMOP_altp2m_create_p2m: |
4458 | 0 | case HVMOP_altp2m_destroy_p2m: |
4459 | 0 | case HVMOP_altp2m_switch_p2m: |
4460 | 0 | case HVMOP_altp2m_set_mem_access: |
4461 | 0 | case HVMOP_altp2m_change_gfn: |
4462 | 0 | break; |
4463 | 0 | default: |
4464 | 0 | return -EOPNOTSUPP; |
4465 | 0 | } |
4466 | 0 |
|
4467 | 0 | d = ( a.cmd != HVMOP_altp2m_vcpu_enable_notify ) ? |
4468 | 0 | rcu_lock_domain_by_any_id(a.domain) : rcu_lock_current_domain(); |
4469 | 0 |
|
4470 | 0 | if ( d == NULL ) |
4471 | 0 | return -ESRCH; |
4472 | 0 |
|
4473 | 0 | if ( !is_hvm_domain(d) ) |
4474 | 0 | { |
4475 | 0 | rc = -EOPNOTSUPP; |
4476 | 0 | goto out; |
4477 | 0 | } |
4478 | 0 |
|
4479 | 0 | if ( (a.cmd != HVMOP_altp2m_get_domain_state) && |
4480 | 0 | (a.cmd != HVMOP_altp2m_set_domain_state) && |
4481 | 0 | !d->arch.altp2m_active ) |
4482 | 0 | { |
4483 | 0 | rc = -EOPNOTSUPP; |
4484 | 0 | goto out; |
4485 | 0 | } |
4486 | 0 |
|
4487 | 0 | mode = d->arch.hvm_domain.params[HVM_PARAM_ALTP2M]; |
4488 | 0 |
|
4489 | 0 | if ( XEN_ALTP2M_disabled == mode ) |
4490 | 0 | { |
4491 | 0 | rc = -EINVAL; |
4492 | 0 | goto out; |
4493 | 0 | } |
4494 | 0 |
|
4495 | 0 | if ( (rc = xsm_hvm_altp2mhvm_op(XSM_OTHER, d, mode, a.cmd)) ) |
4496 | 0 | goto out; |
4497 | 0 |
|
4498 | 0 | switch ( a.cmd ) |
4499 | 0 | { |
4500 | 0 | case HVMOP_altp2m_get_domain_state: |
4501 | 0 | a.u.domain_state.state = altp2m_active(d); |
4502 | 0 | rc = __copy_to_guest(arg, &a, 1) ? -EFAULT : 0; |
4503 | 0 | break; |
4504 | 0 |
|
4505 | 0 | case HVMOP_altp2m_set_domain_state: |
4506 | 0 | { |
4507 | 0 | struct vcpu *v; |
4508 | 0 | bool_t ostate; |
4509 | 0 |
|
4510 | 0 | if ( nestedhvm_enabled(d) ) |
4511 | 0 | { |
4512 | 0 | rc = -EINVAL; |
4513 | 0 | break; |
4514 | 0 | } |
4515 | 0 |
|
4516 | 0 | ostate = d->arch.altp2m_active; |
4517 | 0 | d->arch.altp2m_active = !!a.u.domain_state.state; |
4518 | 0 |
|
4519 | 0 | /* If the alternate p2m state has changed, handle appropriately */ |
4520 | 0 | if ( d->arch.altp2m_active != ostate && |
4521 | 0 | (ostate || !(rc = p2m_init_altp2m_by_id(d, 0))) ) |
4522 | 0 | { |
4523 | 0 | for_each_vcpu( d, v ) |
4524 | 0 | { |
4525 | 0 | if ( !ostate ) |
4526 | 0 | altp2m_vcpu_initialise(v); |
4527 | 0 | else |
4528 | 0 | altp2m_vcpu_destroy(v); |
4529 | 0 | } |
4530 | 0 |
|
4531 | 0 | if ( ostate ) |
4532 | 0 | p2m_flush_altp2m(d); |
4533 | 0 | } |
4534 | 0 | break; |
4535 | 0 | } |
4536 | 0 |
|
4537 | 0 | case HVMOP_altp2m_vcpu_enable_notify: |
4538 | 0 | { |
4539 | 0 | struct vcpu *curr = current; |
4540 | 0 | p2m_type_t p2mt; |
4541 | 0 |
|
4542 | 0 | if ( a.u.enable_notify.pad || a.domain != DOMID_SELF || |
4543 | 0 | a.u.enable_notify.vcpu_id != curr->vcpu_id ) |
4544 | 0 | rc = -EINVAL; |
4545 | 0 |
|
4546 | 0 | if ( !gfn_eq(vcpu_altp2m(curr).veinfo_gfn, INVALID_GFN) || |
4547 | 0 | mfn_eq(get_gfn_query_unlocked(curr->domain, |
4548 | 0 | a.u.enable_notify.gfn, &p2mt), INVALID_MFN) ) |
4549 | 0 | return -EINVAL; |
4550 | 0 |
|
4551 | 0 | vcpu_altp2m(curr).veinfo_gfn = _gfn(a.u.enable_notify.gfn); |
4552 | 0 | altp2m_vcpu_update_vmfunc_ve(curr); |
4553 | 0 | break; |
4554 | 0 | } |
4555 | 0 |
|
4556 | 0 | case HVMOP_altp2m_create_p2m: |
4557 | 0 | if ( !(rc = p2m_init_next_altp2m(d, &a.u.view.view)) ) |
4558 | 0 | rc = __copy_to_guest(arg, &a, 1) ? -EFAULT : 0; |
4559 | 0 | break; |
4560 | 0 |
|
4561 | 0 | case HVMOP_altp2m_destroy_p2m: |
4562 | 0 | rc = p2m_destroy_altp2m_by_id(d, a.u.view.view); |
4563 | 0 | break; |
4564 | 0 |
|
4565 | 0 | case HVMOP_altp2m_switch_p2m: |
4566 | 0 | rc = p2m_switch_domain_altp2m_by_id(d, a.u.view.view); |
4567 | 0 | break; |
4568 | 0 |
|
4569 | 0 | case HVMOP_altp2m_set_mem_access: |
4570 | 0 | if ( a.u.set_mem_access.pad ) |
4571 | 0 | rc = -EINVAL; |
4572 | 0 | else |
4573 | 0 | rc = p2m_set_mem_access(d, _gfn(a.u.set_mem_access.gfn), 1, 0, 0, |
4574 | 0 | a.u.set_mem_access.hvmmem_access, |
4575 | 0 | a.u.set_mem_access.view); |
4576 | 0 | break; |
4577 | 0 |
|
4578 | 0 | case HVMOP_altp2m_change_gfn: |
4579 | 0 | if ( a.u.change_gfn.pad1 || a.u.change_gfn.pad2 ) |
4580 | 0 | rc = -EINVAL; |
4581 | 0 | else |
4582 | 0 | rc = p2m_change_altp2m_gfn(d, a.u.change_gfn.view, |
4583 | 0 | _gfn(a.u.change_gfn.old_gfn), |
4584 | 0 | _gfn(a.u.change_gfn.new_gfn)); |
4585 | 0 | break; |
4586 | 0 | default: |
4587 | 0 | ASSERT_UNREACHABLE(); |
4588 | 0 | } |
4589 | 0 |
|
4590 | 0 | out: |
4591 | 0 | rcu_unlock_domain(d); |
4592 | 0 |
|
4593 | 0 | return rc; |
4594 | 0 | } |
4595 | | |
4596 | | static int hvmop_get_mem_type( |
4597 | | XEN_GUEST_HANDLE_PARAM(xen_hvm_get_mem_type_t) arg) |
4598 | 0 | { |
4599 | 0 | struct xen_hvm_get_mem_type a; |
4600 | 0 | struct domain *d; |
4601 | 0 | p2m_type_t t; |
4602 | 0 | int rc; |
4603 | 0 |
|
4604 | 0 | if ( copy_from_guest(&a, arg, 1) ) |
4605 | 0 | return -EFAULT; |
4606 | 0 |
|
4607 | 0 | d = rcu_lock_domain_by_any_id(a.domid); |
4608 | 0 | if ( d == NULL ) |
4609 | 0 | return -ESRCH; |
4610 | 0 |
|
4611 | 0 | rc = xsm_hvm_param(XSM_TARGET, d, HVMOP_get_mem_type); |
4612 | 0 | if ( rc ) |
4613 | 0 | goto out; |
4614 | 0 |
|
4615 | 0 | rc = -EINVAL; |
4616 | 0 | if ( !is_hvm_domain(d) ) |
4617 | 0 | goto out; |
4618 | 0 |
|
4619 | 0 | /* |
4620 | 0 | * Use get_gfn query as we are interested in the current |
4621 | 0 | * type, not in allocating or unsharing. That'll happen |
4622 | 0 | * on access. |
4623 | 0 | */ |
4624 | 0 | get_gfn_query_unlocked(d, a.pfn, &t); |
4625 | 0 | if ( p2m_is_mmio(t) ) |
4626 | 0 | a.mem_type = HVMMEM_mmio_dm; |
4627 | 0 | else if ( t == p2m_ioreq_server ) |
4628 | 0 | a.mem_type = HVMMEM_ioreq_server; |
4629 | 0 | else if ( p2m_is_readonly(t) ) |
4630 | 0 | a.mem_type = HVMMEM_ram_ro; |
4631 | 0 | else if ( p2m_is_ram(t) ) |
4632 | 0 | a.mem_type = HVMMEM_ram_rw; |
4633 | 0 | else if ( p2m_is_pod(t) ) |
4634 | 0 | a.mem_type = HVMMEM_ram_rw; |
4635 | 0 | else if ( p2m_is_grant(t) ) |
4636 | 0 | a.mem_type = HVMMEM_ram_rw; |
4637 | 0 | else |
4638 | 0 | a.mem_type = HVMMEM_mmio_dm; |
4639 | 0 |
|
4640 | 0 | rc = -EFAULT; |
4641 | 0 | if ( __copy_to_guest(arg, &a, 1) ) |
4642 | 0 | goto out; |
4643 | 0 | rc = 0; |
4644 | 0 |
|
4645 | 0 | out: |
4646 | 0 | rcu_unlock_domain(d); |
4647 | 0 |
|
4648 | 0 | return rc; |
4649 | 0 | } |
4650 | | |
4651 | | long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) |
4652 | 4 | { |
4653 | 4 | long rc = 0; |
4654 | 4 | |
4655 | 4 | /* |
4656 | 4 | * NB: hvm_op can be part of a restarted hypercall; but at the |
4657 | 4 | * moment the only hypercalls which do continuations don't need to |
4658 | 4 | * store any iteration information (since they're just re-trying |
4659 | 4 | * the acquisition of a lock). |
4660 | 4 | */ |
4661 | 4 | |
4662 | 4 | switch ( op ) |
4663 | 4 | { |
4664 | 0 | case HVMOP_set_evtchn_upcall_vector: |
4665 | 0 | rc = hvmop_set_evtchn_upcall_vector( |
4666 | 0 | guest_handle_cast(arg, xen_hvm_evtchn_upcall_vector_t)); |
4667 | 0 | break; |
4668 | 0 | |
4669 | 1 | case HVMOP_set_param: |
4670 | 1 | rc = hvmop_set_param( |
4671 | 1 | guest_handle_cast(arg, xen_hvm_param_t)); |
4672 | 1 | break; |
4673 | 0 |
|
4674 | 3 | case HVMOP_get_param: |
4675 | 3 | rc = hvmop_get_param( |
4676 | 3 | guest_handle_cast(arg, xen_hvm_param_t)); |
4677 | 3 | break; |
4678 | 0 |
|
4679 | 0 | case HVMOP_flush_tlbs: |
4680 | 0 | rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -EINVAL; |
4681 | 0 | break; |
4682 | 0 |
|
4683 | 0 | case HVMOP_get_mem_type: |
4684 | 0 | rc = hvmop_get_mem_type( |
4685 | 0 | guest_handle_cast(arg, xen_hvm_get_mem_type_t)); |
4686 | 0 | break; |
4687 | 0 |
|
4688 | 0 | case HVMOP_pagetable_dying: |
4689 | 0 | { |
4690 | 0 | struct xen_hvm_pagetable_dying a; |
4691 | 0 | struct domain *d; |
4692 | 0 |
|
4693 | 0 | if ( copy_from_guest(&a, arg, 1) ) |
4694 | 0 | return -EFAULT; |
4695 | 0 |
|
4696 | 0 | d = rcu_lock_domain_by_any_id(a.domid); |
4697 | 0 | if ( d == NULL ) |
4698 | 0 | return -ESRCH; |
4699 | 0 |
|
4700 | 0 | rc = -EINVAL; |
4701 | 0 | if ( is_hvm_domain(d) && paging_mode_shadow(d) ) |
4702 | 0 | rc = xsm_hvm_param(XSM_TARGET, d, op); |
4703 | 0 | if ( !rc ) |
4704 | 0 | pagetable_dying(d, a.gpa); |
4705 | 0 |
|
4706 | 0 | rcu_unlock_domain(d); |
4707 | 0 | break; |
4708 | 0 | } |
4709 | 0 |
|
4710 | 0 | case HVMOP_get_time: { |
4711 | 0 | xen_hvm_get_time_t gxt; |
4712 | 0 |
|
4713 | 0 | gxt.now = NOW(); |
4714 | 0 | if ( copy_to_guest(arg, &gxt, 1) ) |
4715 | 0 | rc = -EFAULT; |
4716 | 0 | break; |
4717 | 0 | } |
4718 | 0 |
|
4719 | 0 | case HVMOP_xentrace: { |
4720 | 0 | xen_hvm_xentrace_t tr; |
4721 | 0 |
|
4722 | 0 | if ( copy_from_guest(&tr, arg, 1 ) ) |
4723 | 0 | return -EFAULT; |
4724 | 0 |
|
4725 | 0 | if ( tr.extra_bytes > sizeof(tr.extra) |
4726 | 0 | || (tr.event & ~((1u<<TRC_SUBCLS_SHIFT)-1)) ) |
4727 | 0 | return -EINVAL; |
4728 | 0 |
|
4729 | 0 | /* Cycles will be taken at the vmexit and vmenter */ |
4730 | 0 | trace_var(tr.event | TRC_GUEST, 0 /*!cycles*/, |
4731 | 0 | tr.extra_bytes, tr.extra); |
4732 | 0 | break; |
4733 | 0 | } |
4734 | 0 |
|
4735 | 0 | case HVMOP_guest_request_vm_event: |
4736 | 0 | if ( guest_handle_is_null(arg) ) |
4737 | 0 | monitor_guest_request(); |
4738 | 0 | else |
4739 | 0 | rc = -EINVAL; |
4740 | 0 | break; |
4741 | 0 |
|
4742 | 0 | case HVMOP_altp2m: |
4743 | 0 | rc = do_altp2m_op(arg); |
4744 | 0 | break; |
4745 | 0 |
|
4746 | 0 | default: |
4747 | 0 | { |
4748 | 0 | gdprintk(XENLOG_DEBUG, "Bad HVM op %ld.\n", op); |
4749 | 0 | rc = -ENOSYS; |
4750 | 0 | break; |
4751 | 0 | } |
4752 | 4 | } |
4753 | 4 | |
4754 | 4 | if ( rc == -ERESTART ) |
4755 | 0 | rc = hypercall_create_continuation(__HYPERVISOR_hvm_op, "lh", |
4756 | 0 | op, arg); |
4757 | 4 | |
4758 | 4 | return rc; |
4759 | 4 | } |
4760 | | |
4761 | | int hvm_debug_op(struct vcpu *v, int32_t op) |
4762 | 0 | { |
4763 | 0 | int rc; |
4764 | 0 |
|
4765 | 0 | switch ( op ) |
4766 | 0 | { |
4767 | 0 | case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: |
4768 | 0 | case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: |
4769 | 0 | rc = -EOPNOTSUPP; |
4770 | 0 | if ( !cpu_has_monitor_trap_flag ) |
4771 | 0 | break; |
4772 | 0 | rc = 0; |
4773 | 0 | vcpu_pause(v); |
4774 | 0 | v->arch.hvm_vcpu.single_step = |
4775 | 0 | (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON); |
4776 | 0 | vcpu_unpause(v); /* guest will latch new state */ |
4777 | 0 | break; |
4778 | 0 | default: |
4779 | 0 | rc = -ENOSYS; |
4780 | 0 | break; |
4781 | 0 | } |
4782 | 0 |
|
4783 | 0 | return rc; |
4784 | 0 | } |
4785 | | |
4786 | | void hvm_toggle_singlestep(struct vcpu *v) |
4787 | 0 | { |
4788 | 0 | ASSERT(atomic_read(&v->pause_count)); |
4789 | 0 |
|
4790 | 0 | if ( !hvm_is_singlestep_supported() ) |
4791 | 0 | return; |
4792 | 0 |
|
4793 | 0 | v->arch.hvm_vcpu.single_step = !v->arch.hvm_vcpu.single_step; |
4794 | 0 | } |
4795 | | |
4796 | | int hvm_set_mode(struct vcpu *v, int mode) |
4797 | 0 | { |
4798 | 0 |
|
4799 | 0 | switch ( mode ) |
4800 | 0 | { |
4801 | 0 | case 4: |
4802 | 0 | v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME); |
4803 | 0 | break; |
4804 | 0 | case 8: |
4805 | 0 | v->arch.hvm_vcpu.guest_efer |= (EFER_LMA | EFER_LME); |
4806 | 0 | break; |
4807 | 0 | default: |
4808 | 0 | return -EOPNOTSUPP; |
4809 | 0 | } |
4810 | 0 |
|
4811 | 0 | hvm_update_guest_efer(v); |
4812 | 0 |
|
4813 | 0 | if ( hvm_funcs.set_mode ) |
4814 | 0 | return hvm_funcs.set_mode(v, mode); |
4815 | 0 |
|
4816 | 0 | return 0; |
4817 | 0 | } |
4818 | | |
4819 | | void hvm_domain_soft_reset(struct domain *d) |
4820 | 0 | { |
4821 | 0 | hvm_destroy_all_ioreq_servers(d); |
4822 | 0 | } |
4823 | | |
4824 | | /* |
4825 | | * Segment caches in VMCB/VMCS are inconsistent about which bits are checked, |
4826 | | * important, and preserved across vmentry/exit. Cook the values to make them |
4827 | | * closer to what is architecturally expected from entries in the segment |
4828 | | * cache. |
4829 | | */ |
4830 | | void hvm_get_segment_register(struct vcpu *v, enum x86_segment seg, |
4831 | | struct segment_register *reg) |
4832 | 180k | { |
4833 | 180k | hvm_funcs.get_segment_register(v, seg, reg); |
4834 | 180k | |
4835 | 180k | switch ( seg ) |
4836 | 180k | { |
4837 | 60.1k | case x86_seg_ss: |
4838 | 60.1k | /* SVM may retain %ss.DB when %ss is loaded with a NULL selector. */ |
4839 | 60.1k | if ( !reg->p ) |
4840 | 6 | reg->db = 0; |
4841 | 60.1k | break; |
4842 | 60.1k | |
4843 | 0 | case x86_seg_tr: |
4844 | 0 | /* |
4845 | 0 | * SVM doesn't track %tr.B. Architecturally, a loaded TSS segment will |
4846 | 0 | * always be busy. |
4847 | 0 | */ |
4848 | 0 | reg->type |= 0x2; |
4849 | 0 |
|
4850 | 0 | /* |
4851 | 0 | * %cs and %tr are unconditionally present. SVM ignores these present |
4852 | 0 | * bits and will happily run without them set. |
4853 | 0 | */ |
4854 | 60.1k | case x86_seg_cs: |
4855 | 60.1k | reg->p = 1; |
4856 | 60.1k | break; |
4857 | 0 |
|
4858 | 0 | case x86_seg_gdtr: |
4859 | 0 | case x86_seg_idtr: |
4860 | 0 | /* |
4861 | 0 | * Treat GDTR/IDTR as being present system segments. This avoids them |
4862 | 0 | * needing special casing for segmentation checks. |
4863 | 0 | */ |
4864 | 0 | reg->attr = 0x80; |
4865 | 0 | break; |
4866 | 0 |
|
4867 | 60.1k | default: /* Avoid triggering -Werror=switch */ |
4868 | 60.1k | break; |
4869 | 180k | } |
4870 | 180k | |
4871 | 180k | if ( reg->p ) |
4872 | 180k | { |
4873 | 180k | /* |
4874 | 180k | * For segments which are present/usable, cook the system flag. SVM |
4875 | 180k | * ignores the S bit on all segments and will happily run with them in |
4876 | 180k | * any state. |
4877 | 180k | */ |
4878 | 180k | reg->s = is_x86_user_segment(seg); |
4879 | 180k | |
4880 | 180k | /* |
4881 | 180k | * SVM discards %cs.G on #VMEXIT. Other user segments do have .G |
4882 | 180k | * tracked, but Linux commit 80112c89ed87 "KVM: Synthesize G bit for |
4883 | 180k | * all segments." indicates that this isn't necessarily the case when |
4884 | 180k | * nested under ESXi. |
4885 | 180k | * |
4886 | 180k | * Unconditionally recalculate G. |
4887 | 180k | */ |
4888 | 180k | reg->g = !!(reg->limit >> 20); |
4889 | 180k | |
4890 | 180k | /* |
4891 | 180k | * SVM doesn't track the Accessed flag. It will always be set for |
4892 | 180k | * usable user segments loaded into the descriptor cache. |
4893 | 180k | */ |
4894 | 180k | if ( is_x86_user_segment(seg) ) |
4895 | 180k | reg->type |= 0x1; |
4896 | 180k | } |
4897 | 180k | } |
4898 | | |
4899 | | void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg, |
4900 | | struct segment_register *reg) |
4901 | 115 | { |
4902 | 115 | /* Set G to match the limit field. VT-x cares, while SVM doesn't. */ |
4903 | 115 | if ( reg->p ) |
4904 | 92 | reg->g = !!(reg->limit >> 20); |
4905 | 115 | |
4906 | 115 | switch ( seg ) |
4907 | 115 | { |
4908 | 12 | case x86_seg_cs: |
4909 | 12 | ASSERT(reg->p); /* Usable. */ |
4910 | 12 | ASSERT(reg->s); /* User segment. */ |
4911 | 12 | ASSERT(reg->type & 0x1); /* Accessed. */ |
4912 | 12 | ASSERT((reg->base >> 32) == 0); /* Upper bits clear. */ |
4913 | 12 | break; |
4914 | 12 | |
4915 | 12 | case x86_seg_ss: |
4916 | 12 | if ( reg->p ) |
4917 | 12 | { |
4918 | 12 | ASSERT(reg->s); /* User segment. */ |
4919 | 12 | ASSERT(!(reg->type & 0x8)); /* Data segment. */ |
4920 | 12 | ASSERT(reg->type & 0x2); /* Writeable. */ |
4921 | 12 | ASSERT(reg->type & 0x1); /* Accessed. */ |
4922 | 12 | ASSERT((reg->base >> 32) == 0); /* Upper bits clear. */ |
4923 | 12 | } |
4924 | 12 | break; |
4925 | 12 | |
4926 | 46 | case x86_seg_ds: |
4927 | 46 | case x86_seg_es: |
4928 | 46 | case x86_seg_fs: |
4929 | 46 | case x86_seg_gs: |
4930 | 46 | if ( reg->p ) |
4931 | 45 | { |
4932 | 45 | ASSERT(reg->s); /* User segment. */ |
4933 | 45 | |
4934 | 45 | if ( reg->type & 0x8 ) |
4935 | 0 | ASSERT(reg->type & 0x2); /* Readable. */ |
4936 | 45 | |
4937 | 45 | ASSERT(reg->type & 0x1); /* Accessed. */ |
4938 | 45 | |
4939 | 45 | if ( seg == x86_seg_fs || seg == x86_seg_gs ) |
4940 | 22 | ASSERT(is_canonical_address(reg->base)); |
4941 | 45 | else |
4942 | 23 | ASSERT((reg->base >> 32) == 0); /* Upper bits clear. */ |
4943 | 45 | } |
4944 | 46 | break; |
4945 | 46 | |
4946 | 12 | case x86_seg_tr: |
4947 | 12 | ASSERT(reg->p); /* Usable. */ |
4948 | 12 | ASSERT(!reg->s); /* System segment. */ |
4949 | 12 | ASSERT(!(reg->sel & 0x4)); /* !TI. */ |
4950 | 12 | if ( reg->type == SYS_DESC_tss_busy ) |
4951 | 12 | ASSERT(is_canonical_address(reg->base)); |
4952 | 0 | else if ( reg->type == SYS_DESC_tss16_busy ) |
4953 | 0 | ASSERT((reg->base >> 32) == 0); |
4954 | 0 | else |
4955 | 0 | ASSERT(!"%tr typecheck failure"); |
4956 | 12 | break; |
4957 | 46 | |
4958 | 11 | case x86_seg_ldtr: |
4959 | 11 | if ( reg->p ) |
4960 | 11 | { |
4961 | 11 | ASSERT(!reg->s); /* System segment. */ |
4962 | 11 | ASSERT(!(reg->sel & 0x4)); /* !TI. */ |
4963 | 11 | ASSERT(reg->type == SYS_DESC_ldt); |
4964 | 11 | ASSERT(is_canonical_address(reg->base)); |
4965 | 11 | } |
4966 | 11 | break; |
4967 | 46 | |
4968 | 22 | case x86_seg_gdtr: |
4969 | 22 | case x86_seg_idtr: |
4970 | 22 | ASSERT(is_canonical_address(reg->base)); |
4971 | 22 | ASSERT((reg->limit >> 16) == 0); /* Upper bits clear. */ |
4972 | 22 | break; |
4973 | 22 | |
4974 | 0 | default: |
4975 | 0 | ASSERT_UNREACHABLE(); |
4976 | 0 | return; |
4977 | 115 | } |
4978 | 115 | |
4979 | 115 | hvm_funcs.set_segment_register(v, seg, reg); |
4980 | 115 | } |
4981 | | |
4982 | | /* |
4983 | | * Local variables: |
4984 | | * mode: C |
4985 | | * c-file-style: "BSD" |
4986 | | * c-basic-offset: 4 |
4987 | | * tab-width: 4 |
4988 | | * indent-tabs-mode: nil |
4989 | | * End: |
4990 | | */ |
4991 | | |