/root/src/xen/xen/arch/x86/i387.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * linux/arch/i386/kernel/i387.c |
3 | | * |
4 | | * Copyright (C) 1994 Linus Torvalds |
5 | | * |
6 | | * Pentium III FXSR, SSE support |
7 | | * General FPU state handling cleanups |
8 | | * Gareth Hughes <gareth@valinux.com>, May 2000 |
9 | | */ |
10 | | |
11 | | #include <xen/sched.h> |
12 | | #include <asm/current.h> |
13 | | #include <asm/processor.h> |
14 | | #include <asm/hvm/support.h> |
15 | | #include <asm/i387.h> |
16 | | #include <asm/xstate.h> |
17 | | #include <asm/asm_defns.h> |
18 | | |
19 | | /*******************************/ |
20 | | /* FPU Restore Functions */ |
21 | | /*******************************/ |
22 | | /* Restore x87 extended state */ |
23 | | static inline void fpu_xrstor(struct vcpu *v, uint64_t mask) |
24 | 276 | { |
25 | 276 | bool ok; |
26 | 276 | |
27 | 276 | ASSERT(v->arch.xsave_area); |
28 | 276 | /* |
29 | 276 | * XCR0 normally represents what guest OS set. In case of Xen itself, |
30 | 276 | * we set the accumulated feature mask before doing save/restore. |
31 | 276 | */ |
32 | 276 | ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); |
33 | 276 | ASSERT(ok); |
34 | 276 | xrstor(v, mask); |
35 | 20 | ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); |
36 | 276 | ASSERT(ok); |
37 | 276 | } |
38 | | |
39 | | /* Restor x87 FPU, MMX, SSE and SSE2 state */ |
40 | | static inline void fpu_fxrstor(struct vcpu *v) |
41 | 0 | { |
42 | 0 | const typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt; |
43 | 0 |
|
44 | 0 | /* |
45 | 0 | * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception |
46 | 0 | * is pending. Clear the x87 state here by setting it to fixed |
47 | 0 | * values. The hypervisor data segment can be sometimes 0 and |
48 | 0 | * sometimes new user value. Both should be ok. Use the FPU saved |
49 | 0 | * data block as a safe address because it should be in L1. |
50 | 0 | */ |
51 | 0 | if ( !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) && |
52 | 0 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) |
53 | 0 | { |
54 | 0 | asm volatile ( "fnclex\n\t" |
55 | 0 | "ffree %%st(7)\n\t" /* clear stack tag */ |
56 | 0 | "fildl %0" /* load to clear state */ |
57 | 0 | : : "m" (*fpu_ctxt) ); |
58 | 0 | } |
59 | 0 |
|
60 | 0 | /* |
61 | 0 | * FXRSTOR can fault if passed a corrupted data block. We handle this |
62 | 0 | * possibility, which may occur if the block was passed to us by control |
63 | 0 | * tools or through VCPUOP_initialise, by silently clearing the block. |
64 | 0 | */ |
65 | 0 | switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) ) |
66 | 0 | { |
67 | 0 | default: |
68 | 0 | asm volatile ( |
69 | 0 | /* See below for why the operands/constraints are this way. */ |
70 | 0 | "1: " REX64_PREFIX "fxrstor (%2)\n" |
71 | 0 | ".section .fixup,\"ax\" \n" |
72 | 0 | "2: push %%"__OP"ax \n" |
73 | 0 | " push %%"__OP"cx \n" |
74 | 0 | " push %%"__OP"di \n" |
75 | 0 | " mov %2,%%"__OP"di \n" |
76 | 0 | " mov %1,%%ecx \n" |
77 | 0 | " xor %%eax,%%eax \n" |
78 | 0 | " rep ; stosl \n" |
79 | 0 | " pop %%"__OP"di \n" |
80 | 0 | " pop %%"__OP"cx \n" |
81 | 0 | " pop %%"__OP"ax \n" |
82 | 0 | " jmp 1b \n" |
83 | 0 | ".previous \n" |
84 | 0 | _ASM_EXTABLE(1b, 2b) |
85 | 0 | : |
86 | 0 | : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4), "R" (fpu_ctxt) ); |
87 | 0 | break; |
88 | 0 | case 4: case 2: |
89 | 0 | asm volatile ( |
90 | 0 | "1: fxrstor %0 \n" |
91 | 0 | ".section .fixup,\"ax\"\n" |
92 | 0 | "2: push %%"__OP"ax \n" |
93 | 0 | " push %%"__OP"cx \n" |
94 | 0 | " push %%"__OP"di \n" |
95 | 0 | " lea %0,%%"__OP"di \n" |
96 | 0 | " mov %1,%%ecx \n" |
97 | 0 | " xor %%eax,%%eax \n" |
98 | 0 | " rep ; stosl \n" |
99 | 0 | " pop %%"__OP"di \n" |
100 | 0 | " pop %%"__OP"cx \n" |
101 | 0 | " pop %%"__OP"ax \n" |
102 | 0 | " jmp 1b \n" |
103 | 0 | ".previous \n" |
104 | 0 | _ASM_EXTABLE(1b, 2b) |
105 | 0 | : |
106 | 0 | : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) ); |
107 | 0 | break; |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | /*******************************/ |
112 | | /* FPU Save Functions */ |
113 | | /*******************************/ |
114 | | |
115 | | static inline uint64_t vcpu_xsave_mask(const struct vcpu *v) |
116 | 276 | { |
117 | 276 | if ( v->fpu_dirtied ) |
118 | 276 | return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY; |
119 | 276 | |
120 | 0 | ASSERT(v->arch.nonlazy_xstate_used); |
121 | 0 |
|
122 | 0 | /* |
123 | 0 | * The offsets of components which live in the extended region of |
124 | 0 | * compact xsave area are not fixed. Xsave area may be overwritten |
125 | 0 | * when a xsave with v->fpu_dirtied set is followed by one with |
126 | 0 | * v->fpu_dirtied clear. |
127 | 0 | * In such case, if hypervisor uses compact xsave area and guest |
128 | 0 | * has ever used lazy states (checking xcr0_accum excluding |
129 | 0 | * XSTATE_FP_SSE), vcpu_xsave_mask will return XSTATE_ALL. Otherwise |
130 | 0 | * return XSTATE_NONLAZY. |
131 | 0 | */ |
132 | 0 | return xstate_all(v) ? XSTATE_ALL : XSTATE_NONLAZY; |
133 | 276 | } |
134 | | |
135 | | /* Save x87 extended state */ |
136 | | static inline void fpu_xsave(struct vcpu *v) |
137 | 276 | { |
138 | 276 | bool ok; |
139 | 276 | uint64_t mask = vcpu_xsave_mask(v); |
140 | 276 | |
141 | 276 | ASSERT(mask); |
142 | 276 | ASSERT(v->arch.xsave_area); |
143 | 276 | /* |
144 | 276 | * XCR0 normally represents what guest OS set. In case of Xen itself, |
145 | 276 | * we set the accumulated feature mask before doing save/restore. |
146 | 276 | */ |
147 | 276 | ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); |
148 | 276 | ASSERT(ok); |
149 | 276 | xsave(v, mask); |
150 | 11 | ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); |
151 | 276 | ASSERT(ok); |
152 | 276 | } |
153 | | |
154 | | /* Save x87 FPU, MMX, SSE and SSE2 state */ |
155 | | static inline void fpu_fxsave(struct vcpu *v) |
156 | 0 | { |
157 | 0 | typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt; |
158 | 0 | unsigned int fip_width = v->domain->arch.x87_fip_width; |
159 | 0 |
|
160 | 0 | if ( fip_width != 4 ) |
161 | 0 | { |
162 | 0 | /* |
163 | 0 | * The only way to force fxsaveq on a wide range of gas versions. |
164 | 0 | * On older versions the rex64 prefix works only if we force an |
165 | 0 | * addressing mode that doesn't require extended registers. |
166 | 0 | */ |
167 | 0 | asm volatile ( REX64_PREFIX "fxsave (%1)" |
168 | 0 | : "=m" (*fpu_ctxt) : "R" (fpu_ctxt) ); |
169 | 0 |
|
170 | 0 | /* |
171 | 0 | * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception |
172 | 0 | * is pending. |
173 | 0 | */ |
174 | 0 | if ( !(fpu_ctxt->fsw & 0x0080) && |
175 | 0 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) |
176 | 0 | return; |
177 | 0 |
|
178 | 0 | /* |
179 | 0 | * If the FIP/FDP[63:32] are both zero, it is safe to use the |
180 | 0 | * 32-bit restore to also restore the selectors. |
181 | 0 | */ |
182 | 0 | if ( !fip_width && |
183 | 0 | !((fpu_ctxt->fip.addr | fpu_ctxt->fdp.addr) >> 32) ) |
184 | 0 | { |
185 | 0 | struct ix87_env fpu_env; |
186 | 0 |
|
187 | 0 | asm volatile ( "fnstenv %0" : "=m" (fpu_env) ); |
188 | 0 | fpu_ctxt->fip.sel = fpu_env.fcs; |
189 | 0 | fpu_ctxt->fdp.sel = fpu_env.fds; |
190 | 0 | fip_width = 4; |
191 | 0 | } |
192 | 0 | else |
193 | 0 | fip_width = 8; |
194 | 0 | } |
195 | 0 | else |
196 | 0 | { |
197 | 0 | asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) ); |
198 | 0 | fip_width = 4; |
199 | 0 | } |
200 | 0 |
|
201 | 0 | fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = fip_width; |
202 | 0 | } |
203 | | |
204 | | /*******************************/ |
205 | | /* VCPU FPU Functions */ |
206 | | /*******************************/ |
207 | | /* Restore FPU state whenever VCPU is schduled in. */ |
208 | | void vcpu_restore_fpu_eager(struct vcpu *v) |
209 | 37.0k | { |
210 | 37.0k | ASSERT(!is_idle_vcpu(v)); |
211 | 37.0k | |
212 | 37.0k | /* Restore nonlazy extended state (i.e. parts not tracked by CR0.TS). */ |
213 | 37.0k | if ( !v->arch.nonlazy_xstate_used ) |
214 | 37.0k | return; |
215 | 37.0k | |
216 | 37.0k | /* Avoid recursion */ |
217 | 18.4E | clts(); |
218 | 18.4E | |
219 | 18.4E | /* |
220 | 18.4E | * When saving full state even with !v->fpu_dirtied (see vcpu_xsave_mask() |
221 | 18.4E | * above) we also need to restore full state, to prevent subsequently |
222 | 18.4E | * saving state belonging to another vCPU. |
223 | 18.4E | */ |
224 | 18.4E | if ( xstate_all(v) ) |
225 | 0 | { |
226 | 0 | fpu_xrstor(v, XSTATE_ALL); |
227 | 0 | v->fpu_initialised = 1; |
228 | 0 | v->fpu_dirtied = 1; |
229 | 0 | } |
230 | 18.4E | else |
231 | 18.4E | { |
232 | 18.4E | fpu_xrstor(v, XSTATE_NONLAZY); |
233 | 18.4E | stts(); |
234 | 18.4E | } |
235 | 18.4E | } |
236 | | |
237 | | /* |
238 | | * Restore FPU state when #NM is triggered. |
239 | | */ |
240 | | void vcpu_restore_fpu_lazy(struct vcpu *v) |
241 | 3.82k | { |
242 | 3.82k | ASSERT(!is_idle_vcpu(v)); |
243 | 3.82k | |
244 | 3.82k | /* Avoid recursion. */ |
245 | 3.82k | clts(); |
246 | 3.82k | |
247 | 3.82k | if ( v->fpu_dirtied ) |
248 | 3.54k | return; |
249 | 3.82k | |
250 | 275 | if ( cpu_has_xsave ) |
251 | 276 | fpu_xrstor(v, XSTATE_LAZY); |
252 | 275 | else |
253 | 18.4E | fpu_fxrstor(v); |
254 | 275 | |
255 | 275 | v->fpu_initialised = 1; |
256 | 275 | v->fpu_dirtied = 1; |
257 | 275 | } |
258 | | |
259 | | /* |
260 | | * On each context switch, save the necessary FPU info of VCPU being switch |
261 | | * out. It dispatches saving operation based on CPU's capability. |
262 | | */ |
263 | | static bool _vcpu_save_fpu(struct vcpu *v) |
264 | 36.9k | { |
265 | 36.9k | if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used ) |
266 | 36.7k | return false; |
267 | 36.9k | |
268 | 259 | ASSERT(!is_idle_vcpu(v)); |
269 | 259 | |
270 | 259 | /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ |
271 | 259 | clts(); |
272 | 259 | |
273 | 259 | if ( cpu_has_xsave ) |
274 | 276 | fpu_xsave(v); |
275 | 259 | else |
276 | 18.4E | fpu_fxsave(v); |
277 | 259 | |
278 | 259 | v->fpu_dirtied = 0; |
279 | 259 | |
280 | 259 | return true; |
281 | 36.9k | } |
282 | | |
283 | | void vcpu_save_fpu(struct vcpu *v) |
284 | 36.9k | { |
285 | 36.9k | _vcpu_save_fpu(v); |
286 | 36.9k | stts(); |
287 | 36.9k | } |
288 | | |
289 | | void save_fpu_enable(void) |
290 | 0 | { |
291 | 0 | if ( !_vcpu_save_fpu(current) ) |
292 | 0 | clts(); |
293 | 0 | } |
294 | | |
295 | | /* Initialize FPU's context save area */ |
296 | | int vcpu_init_fpu(struct vcpu *v) |
297 | 12 | { |
298 | 12 | int rc; |
299 | 12 | |
300 | 12 | if ( (rc = xstate_alloc_save_area(v)) != 0 ) |
301 | 0 | return rc; |
302 | 12 | |
303 | 12 | if ( v->arch.xsave_area ) |
304 | 12 | v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; |
305 | 12 | else |
306 | 0 | { |
307 | 0 | BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16); |
308 | 0 | v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), |
309 | 0 | __alignof(v->arch.xsave_area->fpu_sse)); |
310 | 0 | if ( v->arch.fpu_ctxt ) |
311 | 0 | { |
312 | 0 | typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt; |
313 | 0 |
|
314 | 0 | fpu_sse->fcw = FCW_DEFAULT; |
315 | 0 | fpu_sse->mxcsr = MXCSR_DEFAULT; |
316 | 0 | } |
317 | 0 | else |
318 | 0 | rc = -ENOMEM; |
319 | 0 | } |
320 | 12 | |
321 | 12 | return rc; |
322 | 12 | } |
323 | | |
324 | | /* Free FPU's context save area */ |
325 | | void vcpu_destroy_fpu(struct vcpu *v) |
326 | 0 | { |
327 | 0 | if ( v->arch.xsave_area ) |
328 | 0 | xstate_free_save_area(v); |
329 | 0 | else |
330 | 0 | xfree(v->arch.fpu_ctxt); |
331 | 0 | } |
332 | | |
333 | | /* |
334 | | * Local variables: |
335 | | * mode: C |
336 | | * c-file-style: "BSD" |
337 | | * c-basic-offset: 4 |
338 | | * tab-width: 4 |
339 | | * indent-tabs-mode: nil |
340 | | * End: |
341 | | */ |