/root/src/xen/xen/arch/x86/xstate.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * arch/x86/xstate.c |
3 | | * |
4 | | * x86 extended state operations |
5 | | * |
6 | | */ |
7 | | |
8 | | #include <xen/percpu.h> |
9 | | #include <xen/sched.h> |
10 | | #include <asm/current.h> |
11 | | #include <asm/processor.h> |
12 | | #include <asm/hvm/support.h> |
13 | | #include <asm/i387.h> |
14 | | #include <asm/xstate.h> |
15 | | #include <asm/asm_defns.h> |
16 | | |
17 | | /* |
18 | | * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all |
19 | | * the supported and enabled features on the processor, including the |
20 | | * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known. |
21 | | */ |
22 | | static u32 __read_mostly xsave_cntxt_size; |
23 | | |
24 | | /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ |
25 | | u64 __read_mostly xfeature_mask; |
26 | | |
27 | | unsigned int *__read_mostly xstate_offsets; |
28 | | unsigned int *__read_mostly xstate_sizes; |
29 | | u64 __read_mostly xstate_align; |
30 | | static unsigned int __read_mostly xstate_features; |
31 | | |
32 | | uint32_t __read_mostly mxcsr_mask = 0x0000ffbf; |
33 | | |
34 | | /* Cached xcr0 for fast read */ |
35 | | static DEFINE_PER_CPU(uint64_t, xcr0); |
36 | | |
37 | | /* Because XCR0 is cached for each CPU, xsetbv() is not exposed. Users should |
38 | | * use set_xcr0() instead. |
39 | | */ |
40 | | static inline bool xsetbv(u32 index, u64 xfeatures) |
41 | 1.19k | { |
42 | 1.19k | u32 hi = xfeatures >> 32; |
43 | 1.19k | u32 lo = (u32)xfeatures; |
44 | 1.19k | |
45 | 1.19k | asm volatile ( "1: .byte 0x0f,0x01,0xd1\n" |
46 | 1.19k | "3: \n" |
47 | 1.19k | ".section .fixup,\"ax\" \n" |
48 | 1.19k | "2: xor %0,%0 \n" |
49 | 1.19k | " jmp 3b \n" |
50 | 1.19k | ".previous \n" |
51 | 1.19k | _ASM_EXTABLE(1b, 2b) |
52 | 1.19k | : "+a" (lo) |
53 | 1.19k | : "c" (index), "d" (hi)); |
54 | 1.19k | return lo != 0; |
55 | 1.19k | } |
56 | | |
57 | | bool set_xcr0(u64 xfeatures) |
58 | 1.19k | { |
59 | 1.19k | if ( !xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures) ) |
60 | 0 | return false; |
61 | 1.19k | this_cpu(xcr0) = xfeatures; |
62 | 1.19k | return true; |
63 | 1.19k | } |
64 | | |
65 | | uint64_t get_xcr0(void) |
66 | 37.0k | { |
67 | 37.0k | return this_cpu(xcr0); |
68 | 37.0k | } |
69 | | |
70 | | /* Cached xss for fast read */ |
71 | | static DEFINE_PER_CPU(uint64_t, xss); |
72 | | |
73 | | void set_msr_xss(u64 xss) |
74 | 0 | { |
75 | 0 | u64 *this_xss = &this_cpu(xss); |
76 | 0 |
|
77 | 0 | if ( *this_xss != xss ) |
78 | 0 | { |
79 | 0 | wrmsrl(MSR_IA32_XSS, xss); |
80 | 0 | *this_xss = xss; |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | | uint64_t get_msr_xss(void) |
85 | 0 | { |
86 | 0 | return this_cpu(xss); |
87 | 0 | } |
88 | | |
89 | | static int setup_xstate_features(bool bsp) |
90 | 12 | { |
91 | 12 | unsigned int leaf, eax, ebx, ecx, edx; |
92 | 12 | |
93 | 12 | if ( bsp ) |
94 | 1 | { |
95 | 1 | xstate_features = flsl(xfeature_mask); |
96 | 1 | xstate_offsets = xzalloc_array(unsigned int, xstate_features); |
97 | 1 | if ( !xstate_offsets ) |
98 | 0 | return -ENOMEM; |
99 | 1 | |
100 | 1 | xstate_sizes = xzalloc_array(unsigned int, xstate_features); |
101 | 1 | if ( !xstate_sizes ) |
102 | 0 | return -ENOMEM; |
103 | 1 | } |
104 | 12 | |
105 | 24 | for ( leaf = 2; leaf < xstate_features; leaf++ ) |
106 | 12 | { |
107 | 12 | if ( bsp ) |
108 | 1 | { |
109 | 1 | cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf], |
110 | 1 | &xstate_offsets[leaf], &ecx, &edx); |
111 | 1 | if ( ecx & XSTATE_ALIGN64 ) |
112 | 0 | __set_bit(leaf, &xstate_align); |
113 | 1 | } |
114 | 12 | else |
115 | 11 | { |
116 | 11 | cpuid_count(XSTATE_CPUID, leaf, &eax, |
117 | 11 | &ebx, &ecx, &edx); |
118 | 11 | BUG_ON(eax != xstate_sizes[leaf]); |
119 | 11 | BUG_ON(ebx != xstate_offsets[leaf]); |
120 | 11 | BUG_ON(!(ecx & XSTATE_ALIGN64) != !test_bit(leaf, &xstate_align)); |
121 | 11 | } |
122 | 12 | } |
123 | 12 | |
124 | 12 | return 0; |
125 | 12 | } |
126 | | |
127 | | static void setup_xstate_comp(uint16_t *comp_offsets, |
128 | | const uint64_t xcomp_bv) |
129 | 0 | { |
130 | 0 | unsigned int i; |
131 | 0 | unsigned int offset; |
132 | 0 |
|
133 | 0 | /* |
134 | 0 | * The FP xstates and SSE xstates are legacy states. They are always |
135 | 0 | * in the fixed offsets in the xsave area in either compacted form |
136 | 0 | * or standard form. |
137 | 0 | */ |
138 | 0 | comp_offsets[0] = 0; |
139 | 0 | comp_offsets[1] = XSAVE_SSE_OFFSET; |
140 | 0 |
|
141 | 0 | comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; |
142 | 0 |
|
143 | 0 | offset = comp_offsets[2]; |
144 | 0 | for ( i = 2; i < xstate_features; i++ ) |
145 | 0 | { |
146 | 0 | if ( (1ul << i) & xcomp_bv ) |
147 | 0 | { |
148 | 0 | if ( test_bit(i, &xstate_align) ) |
149 | 0 | offset = ROUNDUP(offset, 64); |
150 | 0 | comp_offsets[i] = offset; |
151 | 0 | offset += xstate_sizes[i]; |
152 | 0 | } |
153 | 0 | } |
154 | 0 | ASSERT(offset <= xsave_cntxt_size); |
155 | 0 | } |
156 | | |
157 | | /* |
158 | | * Serialise a vcpus xsave state into a representation suitable for the |
159 | | * toolstack. |
160 | | * |
161 | | * Internally a vcpus xsave state may be compressed or uncompressed, depending |
162 | | * on the features in use, but the ABI with the toolstack is strictly |
163 | | * uncompressed. |
164 | | * |
165 | | * It is the callers responsibility to ensure that there is xsave state to |
166 | | * serialise, and that the provided buffer is exactly the right size. |
167 | | */ |
168 | | void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size) |
169 | 0 | { |
170 | 0 | const struct xsave_struct *xsave = v->arch.xsave_area; |
171 | 0 | const void *src; |
172 | 0 | uint16_t comp_offsets[sizeof(xfeature_mask)*8]; |
173 | 0 | u64 xstate_bv = xsave->xsave_hdr.xstate_bv; |
174 | 0 | u64 valid; |
175 | 0 |
|
176 | 0 | /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */ |
177 | 0 | BUG_ON(!v->arch.xcr0_accum); |
178 | 0 | /* Check there is the correct room to decompress into. */ |
179 | 0 | BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum)); |
180 | 0 |
|
181 | 0 | if ( !(xsave->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) ) |
182 | 0 | { |
183 | 0 | memcpy(dest, xsave, size); |
184 | 0 | return; |
185 | 0 | } |
186 | 0 |
|
187 | 0 | ASSERT(xsave_area_compressed(xsave)); |
188 | 0 | setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv); |
189 | 0 |
|
190 | 0 | /* |
191 | 0 | * Copy legacy XSAVE area and XSAVE hdr area. |
192 | 0 | */ |
193 | 0 | memcpy(dest, xsave, XSTATE_AREA_MIN_SIZE); |
194 | 0 | memset(dest + XSTATE_AREA_MIN_SIZE, 0, size - XSTATE_AREA_MIN_SIZE); |
195 | 0 |
|
196 | 0 | ((struct xsave_struct *)dest)->xsave_hdr.xcomp_bv = 0; |
197 | 0 |
|
198 | 0 | /* |
199 | 0 | * Copy each region from the possibly compacted offset to the |
200 | 0 | * non-compacted offset. |
201 | 0 | */ |
202 | 0 | src = xsave; |
203 | 0 | valid = xstate_bv & ~XSTATE_FP_SSE; |
204 | 0 | while ( valid ) |
205 | 0 | { |
206 | 0 | u64 feature = valid & -valid; |
207 | 0 | unsigned int index = fls(feature) - 1; |
208 | 0 |
|
209 | 0 | /* |
210 | 0 | * We previously verified xstate_bv. If there isn't valid |
211 | 0 | * comp_offsets[] information, something is very broken. |
212 | 0 | */ |
213 | 0 | BUG_ON(!comp_offsets[index]); |
214 | 0 | BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size); |
215 | 0 |
|
216 | 0 | memcpy(dest + xstate_offsets[index], src + comp_offsets[index], |
217 | 0 | xstate_sizes[index]); |
218 | 0 |
|
219 | 0 | valid &= ~feature; |
220 | 0 | } |
221 | 0 | } |
222 | | |
223 | | /* |
224 | | * Deserialise a toolstack's xsave state representation suitably for a vcpu. |
225 | | * |
226 | | * Internally a vcpus xsave state may be compressed or uncompressed, depending |
227 | | * on the features in use, but the ABI with the toolstack is strictly |
228 | | * uncompressed. |
229 | | * |
230 | | * It is the callers responsibility to ensure that the source buffer contains |
231 | | * xsave state, is uncompressed, and is exactly the right size. |
232 | | */ |
233 | | void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size) |
234 | 0 | { |
235 | 0 | struct xsave_struct *xsave = v->arch.xsave_area; |
236 | 0 | void *dest; |
237 | 0 | uint16_t comp_offsets[sizeof(xfeature_mask)*8]; |
238 | 0 | u64 xstate_bv, valid; |
239 | 0 |
|
240 | 0 | BUG_ON(!v->arch.xcr0_accum); |
241 | 0 | BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum)); |
242 | 0 | ASSERT(!xsave_area_compressed(src)); |
243 | 0 |
|
244 | 0 | xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv; |
245 | 0 |
|
246 | 0 | if ( !(v->arch.xcr0_accum & XSTATE_XSAVES_ONLY) ) |
247 | 0 | { |
248 | 0 | memcpy(xsave, src, size); |
249 | 0 | return; |
250 | 0 | } |
251 | 0 |
|
252 | 0 | /* |
253 | 0 | * Copy legacy XSAVE area, to avoid complications with CPUID |
254 | 0 | * leaves 0 and 1 in the loop below. |
255 | 0 | */ |
256 | 0 | memcpy(xsave, src, FXSAVE_SIZE); |
257 | 0 |
|
258 | 0 | /* Set XSTATE_BV and XCOMP_BV. */ |
259 | 0 | xsave->xsave_hdr.xstate_bv = xstate_bv; |
260 | 0 | xsave->xsave_hdr.xcomp_bv = v->arch.xcr0_accum | XSTATE_COMPACTION_ENABLED; |
261 | 0 |
|
262 | 0 | setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv); |
263 | 0 |
|
264 | 0 | /* |
265 | 0 | * Copy each region from the non-compacted offset to the |
266 | 0 | * possibly compacted offset. |
267 | 0 | */ |
268 | 0 | dest = xsave; |
269 | 0 | valid = xstate_bv & ~XSTATE_FP_SSE; |
270 | 0 | while ( valid ) |
271 | 0 | { |
272 | 0 | u64 feature = valid & -valid; |
273 | 0 | unsigned int index = fls(feature) - 1; |
274 | 0 |
|
275 | 0 | /* |
276 | 0 | * We previously verified xstate_bv. If we don't have valid |
277 | 0 | * comp_offset[] information, something is very broken. |
278 | 0 | */ |
279 | 0 | BUG_ON(!comp_offsets[index]); |
280 | 0 | BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size); |
281 | 0 |
|
282 | 0 | memcpy(dest + comp_offsets[index], src + xstate_offsets[index], |
283 | 0 | xstate_sizes[index]); |
284 | 0 |
|
285 | 0 | valid &= ~feature; |
286 | 0 | } |
287 | 0 | } |
288 | | |
289 | | void xsave(struct vcpu *v, uint64_t mask) |
290 | 277 | { |
291 | 277 | struct xsave_struct *ptr = v->arch.xsave_area; |
292 | 277 | uint32_t hmask = mask >> 32; |
293 | 277 | uint32_t lmask = mask; |
294 | 277 | unsigned int fip_width = v->domain->arch.x87_fip_width; |
295 | 277 | #define XSAVE(pfx) \ |
296 | 277 | if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \ |
297 | 0 | asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \ |
298 | 0 | : "=m" (*ptr) \ |
299 | 0 | : "a" (lmask), "d" (hmask), "D" (ptr) ); \ |
300 | 277 | else \ |
301 | 277 | alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \ |
302 | 277 | ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \ |
303 | 277 | X86_FEATURE_XSAVEOPT, \ |
304 | 277 | "=m" (*ptr), \ |
305 | 277 | "a" (lmask), "d" (hmask), "D" (ptr)) |
306 | 277 | |
307 | 277 | if ( fip_width == 8 || !(mask & XSTATE_FP) ) |
308 | 0 | { |
309 | 0 | XSAVE("0x48,"); |
310 | 0 | } |
311 | 277 | else if ( fip_width == 4 ) |
312 | 0 | { |
313 | 0 | XSAVE(""); |
314 | 0 | } |
315 | 277 | else |
316 | 277 | { |
317 | 277 | /* |
318 | 277 | * FIP/FDP may not be written in some cases (e.g., if XSAVEOPT/XSAVES |
319 | 277 | * is used, or on AMD CPUs if an exception isn't pending). |
320 | 277 | * |
321 | 277 | * To tell if the hardware writes these fields, poison the FIP field. |
322 | 277 | * The poison is |
323 | 277 | * a) non-canonical |
324 | 277 | * b) non-zero for the reserved part of a 32-bit FCS:FIP |
325 | 277 | * c) random with a vanishingly small probability to match a value the |
326 | 277 | * hardware may write (1e-19) even if it did not canonicalize the |
327 | 277 | * 64-bit FIP or zero-extend the 16-bit FCS. |
328 | 277 | */ |
329 | 277 | uint64_t orig_fip = ptr->fpu_sse.fip.addr; |
330 | 277 | const uint64_t bad_fip = 0x6a3f5c4b13a533f6; |
331 | 277 | |
332 | 277 | ptr->fpu_sse.fip.addr = bad_fip; |
333 | 277 | |
334 | 277 | XSAVE("0x48,"); |
335 | 277 | |
336 | 277 | /* FIP/FDP not updated? Restore the old FIP value. */ |
337 | 277 | if ( ptr->fpu_sse.fip.addr == bad_fip ) |
338 | 12 | { |
339 | 12 | ptr->fpu_sse.fip.addr = orig_fip; |
340 | 12 | return; |
341 | 12 | } |
342 | 277 | |
343 | 277 | /* |
344 | 277 | * If the FIP/FDP[63:32] are both zero, it is safe to use the |
345 | 277 | * 32-bit restore to also restore the selectors. |
346 | 277 | */ |
347 | 265 | if ( !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) ) |
348 | 265 | { |
349 | 265 | struct ix87_env fpu_env; |
350 | 265 | |
351 | 265 | asm volatile ( "fnstenv %0" : "=m" (fpu_env) ); |
352 | 265 | ptr->fpu_sse.fip.sel = fpu_env.fcs; |
353 | 265 | ptr->fpu_sse.fdp.sel = fpu_env.fds; |
354 | 265 | fip_width = 4; |
355 | 265 | } |
356 | 265 | else |
357 | 0 | fip_width = 8; |
358 | 265 | } |
359 | 277 | #undef XSAVE |
360 | 265 | if ( mask & XSTATE_FP ) |
361 | 265 | ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = fip_width; |
362 | 265 | } |
363 | | |
364 | | void xrstor(struct vcpu *v, uint64_t mask) |
365 | 286 | { |
366 | 286 | uint32_t hmask = mask >> 32; |
367 | 286 | uint32_t lmask = mask; |
368 | 286 | struct xsave_struct *ptr = v->arch.xsave_area; |
369 | 286 | unsigned int faults, prev_faults; |
370 | 286 | |
371 | 286 | /* |
372 | 286 | * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception |
373 | 286 | * is pending. Clear the x87 state here by setting it to fixed |
374 | 286 | * values. The hypervisor data segment can be sometimes 0 and |
375 | 286 | * sometimes new user value. Both should be ok. Use the FPU saved |
376 | 286 | * data block as a safe address because it should be in L1. |
377 | 286 | */ |
378 | 286 | if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) && |
379 | 276 | !(ptr->fpu_sse.fsw & ~ptr->fpu_sse.fcw & 0x003f) && |
380 | 276 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) |
381 | 0 | asm volatile ( "fnclex\n\t" /* clear exceptions */ |
382 | 0 | "ffree %%st(7)\n\t" /* clear stack tag */ |
383 | 0 | "fildl %0" /* load to clear state */ |
384 | 0 | : : "m" (ptr->fpu_sse) ); |
385 | 286 | |
386 | 286 | /* |
387 | 286 | * XRSTOR can fault if passed a corrupted data block. We handle this |
388 | 286 | * possibility, which may occur if the block was passed to us by control |
389 | 286 | * tools or through VCPUOP_initialise, by silently adjusting state. |
390 | 286 | */ |
391 | 0 | for ( prev_faults = faults = 0; ; prev_faults = faults ) |
392 | 286 | { |
393 | 286 | switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) ) |
394 | 286 | { |
395 | 0 | BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */ |
396 | 0 | #define _xrstor(insn) \ |
397 | 286 | asm volatile ( "1: .byte " insn "\n" \ |
398 | 286 | "3:\n" \ |
399 | 286 | " .section .fixup,\"ax\"\n" \ |
400 | 286 | "2: incl %[faults]\n" \ |
401 | 286 | " jmp 3b\n" \ |
402 | 286 | " .previous\n" \ |
403 | 286 | _ASM_EXTABLE(1b, 2b) \ |
404 | 286 | : [mem] "+m" (*ptr), [faults] "+g" (faults) \ |
405 | 286 | : [lmask] "a" (lmask), [hmask] "d" (hmask), \ |
406 | 286 | [ptr] "D" (ptr) ) |
407 | 0 |
|
408 | 0 | #define XRSTOR(pfx) \ |
409 | 286 | if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \ |
410 | 0 | { \ |
411 | 0 | if ( unlikely(!(ptr->xsave_hdr.xcomp_bv & \ |
412 | 0 | XSTATE_COMPACTION_ENABLED)) ) \ |
413 | 0 | { \ |
414 | 0 | ASSERT(!ptr->xsave_hdr.xcomp_bv); \ |
415 | 0 | ptr->xsave_hdr.xcomp_bv = ptr->xsave_hdr.xstate_bv | \ |
416 | 0 | XSTATE_COMPACTION_ENABLED; \ |
417 | 0 | } \ |
418 | 0 | _xrstor(pfx "0x0f,0xc7,0x1f"); /* xrstors */ \ |
419 | 0 | } \ |
420 | 286 | else \ |
421 | 286 | _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */ |
422 | 0 |
|
423 | 31 | default: |
424 | 31 | XRSTOR("0x48,"); |
425 | 31 | break; |
426 | 255 | case 4: case 2: |
427 | 255 | XRSTOR(""); |
428 | 255 | break; |
429 | 286 | #undef XRSTOR |
430 | 286 | #undef _xrstor |
431 | 286 | } |
432 | 288 | if ( likely(faults == prev_faults) ) |
433 | 288 | break; |
434 | 288 | #ifndef NDEBUG |
435 | 0 | gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n", |
436 | 0 | faults, ptr->fpu_sse.mxcsr); |
437 | 0 | gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n", |
438 | 0 | ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv); |
439 | 0 | gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n", |
440 | 0 | ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]); |
441 | 0 | gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n", |
442 | 0 | ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]); |
443 | 0 | gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n", |
444 | 0 | ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]); |
445 | 0 | #endif |
446 | 0 | switch ( faults ) |
447 | 0 | { |
448 | 0 | case 1: /* Stage 1: Reset state to be loaded. */ |
449 | 0 | ptr->xsave_hdr.xstate_bv &= ~mask; |
450 | 0 | /* |
451 | 0 | * Also try to eliminate fault reasons, even if this shouldn't be |
452 | 0 | * needed here (other code should ensure the sanity of the data). |
453 | 0 | */ |
454 | 0 | if ( ((mask & XSTATE_SSE) || |
455 | 0 | ((mask & XSTATE_YMM) && |
456 | 0 | !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) ) |
457 | 0 | ptr->fpu_sse.mxcsr &= mxcsr_mask; |
458 | 0 | if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) |
459 | 0 | { |
460 | 0 | ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss); |
461 | 0 | ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv; |
462 | 0 | ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED; |
463 | 0 | } |
464 | 0 | else |
465 | 0 | { |
466 | 0 | ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0); |
467 | 0 | ptr->xsave_hdr.xcomp_bv = 0; |
468 | 0 | } |
469 | 0 | memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved)); |
470 | 0 | continue; |
471 | 0 |
|
472 | 0 | case 2: /* Stage 2: Reset all state. */ |
473 | 0 | ptr->fpu_sse.mxcsr = MXCSR_DEFAULT; |
474 | 0 | ptr->xsave_hdr.xstate_bv = 0; |
475 | 0 | ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY |
476 | 0 | ? XSTATE_COMPACTION_ENABLED : 0; |
477 | 0 | continue; |
478 | 0 | } |
479 | 0 |
|
480 | 0 | domain_crash(current->domain); |
481 | 0 | return; |
482 | 0 | } |
483 | 286 | } |
484 | | |
485 | | bool xsave_enabled(const struct vcpu *v) |
486 | 0 | { |
487 | 0 | if ( !cpu_has_xsave ) |
488 | 0 | return false; |
489 | 0 |
|
490 | 0 | ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE); |
491 | 0 | ASSERT(v->arch.xsave_area); |
492 | 0 |
|
493 | 0 | return !!v->arch.xcr0_accum; |
494 | 0 | } |
495 | | |
496 | | int xstate_alloc_save_area(struct vcpu *v) |
497 | 24 | { |
498 | 24 | struct xsave_struct *save_area; |
499 | 24 | unsigned int size; |
500 | 24 | |
501 | 24 | if ( !cpu_has_xsave ) |
502 | 0 | return 0; |
503 | 24 | |
504 | 24 | if ( !is_idle_vcpu(v) || !cpu_has_xsavec ) |
505 | 24 | { |
506 | 24 | size = xsave_cntxt_size; |
507 | 24 | BUG_ON(size < XSTATE_AREA_MIN_SIZE); |
508 | 24 | } |
509 | 24 | else |
510 | 0 | { |
511 | 0 | /* |
512 | 0 | * For idle vcpus on XSAVEC-capable CPUs allocate an area large |
513 | 0 | * enough to save any individual extended state. |
514 | 0 | */ |
515 | 0 | unsigned int i; |
516 | 0 |
|
517 | 0 | for ( size = 0, i = 2; i < xstate_features; ++i ) |
518 | 0 | if ( size < xstate_sizes[i] ) |
519 | 0 | size = xstate_sizes[i]; |
520 | 0 | size += XSTATE_AREA_MIN_SIZE; |
521 | 0 | } |
522 | 24 | |
523 | 24 | /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ |
524 | 24 | BUILD_BUG_ON(__alignof(*save_area) < 64); |
525 | 24 | save_area = _xzalloc(size, __alignof(*save_area)); |
526 | 24 | if ( save_area == NULL ) |
527 | 0 | return -ENOMEM; |
528 | 24 | |
529 | 24 | /* |
530 | 24 | * Set the memory image to default values, but don't force the context |
531 | 24 | * to be loaded from memory (i.e. keep save_area->xsave_hdr.xstate_bv |
532 | 24 | * clear). |
533 | 24 | */ |
534 | 24 | save_area->fpu_sse.fcw = FCW_DEFAULT; |
535 | 24 | save_area->fpu_sse.mxcsr = MXCSR_DEFAULT; |
536 | 24 | |
537 | 24 | v->arch.xsave_area = save_area; |
538 | 24 | v->arch.xcr0 = 0; |
539 | 24 | v->arch.xcr0_accum = 0; |
540 | 24 | |
541 | 24 | return 0; |
542 | 24 | } |
543 | | |
544 | | void xstate_free_save_area(struct vcpu *v) |
545 | 0 | { |
546 | 0 | xfree(v->arch.xsave_area); |
547 | 0 | v->arch.xsave_area = NULL; |
548 | 0 | } |
549 | | |
550 | | static unsigned int _xstate_ctxt_size(u64 xcr0) |
551 | 12 | { |
552 | 12 | u64 act_xcr0 = get_xcr0(); |
553 | 12 | u32 eax, ebx = 0, ecx, edx; |
554 | 12 | bool ok = set_xcr0(xcr0); |
555 | 12 | |
556 | 12 | ASSERT(ok); |
557 | 12 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); |
558 | 12 | ASSERT(ebx <= ecx); |
559 | 12 | ok = set_xcr0(act_xcr0); |
560 | 12 | ASSERT(ok); |
561 | 12 | |
562 | 12 | return ebx; |
563 | 12 | } |
564 | | |
565 | | /* Fastpath for common xstate size requests, avoiding reloads of xcr0. */ |
566 | | unsigned int xstate_ctxt_size(u64 xcr0) |
567 | 1 | { |
568 | 1 | if ( xcr0 == xfeature_mask ) |
569 | 1 | return xsave_cntxt_size; |
570 | 1 | |
571 | 0 | if ( xcr0 == 0 ) |
572 | 0 | return 0; |
573 | 0 |
|
574 | 0 | return _xstate_ctxt_size(xcr0); |
575 | 0 | } |
576 | | |
577 | | /* Collect the information of processor's extended state */ |
578 | | void xstate_init(struct cpuinfo_x86 *c) |
579 | 12 | { |
580 | 12 | static bool __initdata use_xsave = true; |
581 | 12 | boolean_param("xsave", use_xsave); |
582 | 12 | |
583 | 12 | bool bsp = c == &boot_cpu_data; |
584 | 12 | u32 eax, ebx, ecx, edx; |
585 | 12 | u64 feature_mask; |
586 | 12 | |
587 | 12 | if ( (bsp && !use_xsave) || |
588 | 12 | boot_cpu_data.cpuid_level < XSTATE_CPUID ) |
589 | 0 | { |
590 | 0 | BUG_ON(!bsp); |
591 | 0 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
592 | 0 | return; |
593 | 0 | } |
594 | 12 | |
595 | 12 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); |
596 | 12 | |
597 | 12 | BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); |
598 | 12 | BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); |
599 | 12 | feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK; |
600 | 12 | |
601 | 12 | /* |
602 | 12 | * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. |
603 | 12 | */ |
604 | 12 | set_in_cr4(X86_CR4_OSXSAVE); |
605 | 12 | if ( !set_xcr0(feature_mask) ) |
606 | 0 | BUG(); |
607 | 12 | |
608 | 12 | if ( bsp ) |
609 | 1 | { |
610 | 1 | static typeof(current->arch.xsave_area->fpu_sse) __initdata ctxt; |
611 | 1 | |
612 | 1 | xfeature_mask = feature_mask; |
613 | 1 | /* |
614 | 1 | * xsave_cntxt_size is the max size required by enabled features. |
615 | 1 | * We know FP/SSE and YMM about eax, and nothing about edx at present. |
616 | 1 | */ |
617 | 1 | xsave_cntxt_size = _xstate_ctxt_size(feature_mask); |
618 | 1 | printk("xstate: size: %#x and states: %#"PRIx64"\n", |
619 | 1 | xsave_cntxt_size, xfeature_mask); |
620 | 1 | |
621 | 1 | asm ( "fxsave %0" : "=m" (ctxt) ); |
622 | 1 | if ( ctxt.mxcsr_mask ) |
623 | 1 | mxcsr_mask = ctxt.mxcsr_mask; |
624 | 1 | } |
625 | 12 | else |
626 | 11 | { |
627 | 11 | BUG_ON(xfeature_mask != feature_mask); |
628 | 11 | BUG_ON(xsave_cntxt_size != _xstate_ctxt_size(feature_mask)); |
629 | 11 | } |
630 | 12 | |
631 | 12 | /* Check extended XSAVE features. */ |
632 | 12 | cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); |
633 | 12 | |
634 | 12 | /* Mask out features not currently understood by Xen. */ |
635 | 12 | eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) | |
636 | 12 | cpufeat_mask(X86_FEATURE_XSAVEC) | |
637 | 12 | cpufeat_mask(X86_FEATURE_XGETBV1) | |
638 | 12 | cpufeat_mask(X86_FEATURE_XSAVES)); |
639 | 12 | |
640 | 12 | c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] = eax; |
641 | 12 | |
642 | 12 | BUG_ON(eax != boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)]); |
643 | 12 | |
644 | 12 | if ( setup_xstate_features(bsp) && bsp ) |
645 | 0 | BUG(); |
646 | 12 | } |
647 | | |
648 | | static bool valid_xcr0(u64 xcr0) |
649 | 11 | { |
650 | 11 | /* FP must be unconditionally set. */ |
651 | 11 | if ( !(xcr0 & XSTATE_FP) ) |
652 | 0 | return false; |
653 | 11 | |
654 | 11 | /* YMM depends on SSE. */ |
655 | 11 | if ( (xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE) ) |
656 | 0 | return false; |
657 | 11 | |
658 | 11 | if ( xcr0 & (XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) ) |
659 | 0 | { |
660 | 0 | /* OPMASK, ZMM, and HI_ZMM require YMM. */ |
661 | 0 | if ( !(xcr0 & XSTATE_YMM) ) |
662 | 0 | return false; |
663 | 0 |
|
664 | 0 | /* OPMASK, ZMM, and HI_ZMM must be the same. */ |
665 | 0 | if ( ~xcr0 & (XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) ) |
666 | 0 | return false; |
667 | 0 | } |
668 | 11 | |
669 | 11 | /* BNDREGS and BNDCSR must be the same. */ |
670 | 11 | return !(xcr0 & XSTATE_BNDREGS) == !(xcr0 & XSTATE_BNDCSR); |
671 | 11 | } |
672 | | |
673 | | int validate_xstate(u64 xcr0, u64 xcr0_accum, const struct xsave_hdr *hdr) |
674 | 0 | { |
675 | 0 | unsigned int i; |
676 | 0 |
|
677 | 0 | if ( (hdr->xstate_bv & ~xcr0_accum) || |
678 | 0 | (xcr0 & ~xcr0_accum) || |
679 | 0 | !valid_xcr0(xcr0) || |
680 | 0 | !valid_xcr0(xcr0_accum) ) |
681 | 0 | return -EINVAL; |
682 | 0 |
|
683 | 0 | if ( (xcr0_accum & ~xfeature_mask) || |
684 | 0 | hdr->xcomp_bv ) |
685 | 0 | return -EOPNOTSUPP; |
686 | 0 |
|
687 | 0 | for ( i = 0; i < ARRAY_SIZE(hdr->reserved); ++i ) |
688 | 0 | if ( hdr->reserved[i] ) |
689 | 0 | return -EIO; |
690 | 0 |
|
691 | 0 | return 0; |
692 | 0 | } |
693 | | |
694 | | int handle_xsetbv(u32 index, u64 new_bv) |
695 | 11 | { |
696 | 11 | struct vcpu *curr = current; |
697 | 11 | u64 mask; |
698 | 11 | |
699 | 11 | if ( index != XCR_XFEATURE_ENABLED_MASK ) |
700 | 0 | return -EOPNOTSUPP; |
701 | 11 | |
702 | 11 | if ( (new_bv & ~xfeature_mask) || !valid_xcr0(new_bv) ) |
703 | 0 | return -EINVAL; |
704 | 11 | |
705 | 11 | /* XCR0.PKRU is disabled on PV mode. */ |
706 | 11 | if ( is_pv_vcpu(curr) && (new_bv & XSTATE_PKRU) ) |
707 | 0 | return -EOPNOTSUPP; |
708 | 11 | |
709 | 11 | if ( !set_xcr0(new_bv) ) |
710 | 0 | return -EFAULT; |
711 | 11 | |
712 | 11 | mask = new_bv & ~curr->arch.xcr0_accum; |
713 | 11 | curr->arch.xcr0 = new_bv; |
714 | 11 | curr->arch.xcr0_accum |= new_bv; |
715 | 11 | |
716 | 11 | /* LWP sets nonlazy_xstate_used independently. */ |
717 | 11 | if ( new_bv & (XSTATE_NONLAZY & ~XSTATE_LWP) ) |
718 | 0 | curr->arch.nonlazy_xstate_used = 1; |
719 | 11 | |
720 | 10 | mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY; |
721 | 11 | if ( mask ) |
722 | 10 | { |
723 | 10 | unsigned long cr0 = read_cr0(); |
724 | 10 | |
725 | 10 | clts(); |
726 | 10 | if ( curr->fpu_dirtied ) |
727 | 11 | asm ( "stmxcsr %0" : "=m" (curr->arch.xsave_area->fpu_sse.mxcsr) ); |
728 | 18.4E | else if ( xstate_all(curr) ) |
729 | 0 | { |
730 | 0 | /* See the comment in i387.c:vcpu_restore_fpu_eager(). */ |
731 | 0 | mask |= XSTATE_LAZY; |
732 | 0 | curr->fpu_initialised = 1; |
733 | 0 | curr->fpu_dirtied = 1; |
734 | 0 | cr0 &= ~X86_CR0_TS; |
735 | 0 | } |
736 | 10 | xrstor(curr, mask); |
737 | 10 | if ( cr0 & X86_CR0_TS ) |
738 | 0 | write_cr0(cr0); |
739 | 10 | } |
740 | 11 | |
741 | 11 | return 0; |
742 | 11 | } |
743 | | |
744 | | uint64_t read_bndcfgu(void) |
745 | 0 | { |
746 | 0 | unsigned long cr0 = read_cr0(); |
747 | 0 | struct xsave_struct *xstate |
748 | 0 | = idle_vcpu[smp_processor_id()]->arch.xsave_area; |
749 | 0 | const struct xstate_bndcsr *bndcsr; |
750 | 0 |
|
751 | 0 | ASSERT(cpu_has_mpx); |
752 | 0 | clts(); |
753 | 0 |
|
754 | 0 | if ( cpu_has_xsavec ) |
755 | 0 | { |
756 | 0 | asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */ |
757 | 0 | : "=m" (*xstate) |
758 | 0 | : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) ); |
759 | 0 |
|
760 | 0 | bndcsr = (void *)(xstate + 1); |
761 | 0 | } |
762 | 0 | else |
763 | 0 | { |
764 | 0 | asm ( ".byte 0x0f,0xae,0x27\n" /* xsave */ |
765 | 0 | : "=m" (*xstate) |
766 | 0 | : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) ); |
767 | 0 |
|
768 | 0 | bndcsr = (void *)xstate + xstate_offsets[_XSTATE_BNDCSR]; |
769 | 0 | } |
770 | 0 |
|
771 | 0 | if ( cr0 & X86_CR0_TS ) |
772 | 0 | write_cr0(cr0); |
773 | 0 |
|
774 | 0 | return xstate->xsave_hdr.xstate_bv & XSTATE_BNDCSR ? bndcsr->bndcfgu : 0; |
775 | 0 | } |
776 | | |
777 | | void xstate_set_init(uint64_t mask) |
778 | 0 | { |
779 | 0 | unsigned long cr0 = read_cr0(); |
780 | 0 | unsigned long xcr0 = this_cpu(xcr0); |
781 | 0 | struct vcpu *v = idle_vcpu[smp_processor_id()]; |
782 | 0 | struct xsave_struct *xstate = v->arch.xsave_area; |
783 | 0 |
|
784 | 0 | if ( ~xfeature_mask & mask ) |
785 | 0 | { |
786 | 0 | ASSERT_UNREACHABLE(); |
787 | 0 | return; |
788 | 0 | } |
789 | 0 |
|
790 | 0 | if ( (~xcr0 & mask) && !set_xcr0(xcr0 | mask) ) |
791 | 0 | return; |
792 | 0 |
|
793 | 0 | clts(); |
794 | 0 |
|
795 | 0 | memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr)); |
796 | 0 | xrstor(v, mask); |
797 | 0 |
|
798 | 0 | if ( cr0 & X86_CR0_TS ) |
799 | 0 | write_cr0(cr0); |
800 | 0 |
|
801 | 0 | if ( (~xcr0 & mask) && !set_xcr0(xcr0) ) |
802 | 0 | BUG(); |
803 | 0 | } |
804 | | |
805 | | /* |
806 | | * Local variables: |
807 | | * mode: C |
808 | | * c-file-style: "BSD" |
809 | | * c-basic-offset: 4 |
810 | | * tab-width: 4 |
811 | | * indent-tabs-mode: nil |
812 | | * End: |
813 | | */ |