debuggers.hg

view xen/arch/x86/i387.c @ 22797:58304c1cc725

x86 fpu: Code clean up. Eliminate per-cpu xsave init verbosity.

Signed-off-by: Keir Fraser <keir@xen.org>
author Keir Fraser <keir@xen.org>
date Fri Jan 14 09:11:28 2011 +0000 (2011-01-14)
parents 4b7cb21caf0e
children
line source
1 /*
2 * linux/arch/i386/kernel/i387.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * General FPU state handling cleanups
8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 */
11 #include <xen/config.h>
12 #include <xen/sched.h>
13 #include <asm/current.h>
14 #include <asm/processor.h>
15 #include <asm/hvm/support.h>
16 #include <asm/i387.h>
17 #include <asm/asm_defns.h>
19 static bool_t __read_mostly cpu_has_xsaveopt;
21 static void xsave(struct vcpu *v)
22 {
23 struct xsave_struct *ptr = v->arch.xsave_area;
25 asm volatile (
26 ".byte " REX_PREFIX "0x0f,0xae,0x27"
27 :
28 : "a" (-1), "d" (-1), "D"(ptr)
29 : "memory" );
30 }
32 static void xsaveopt(struct vcpu *v)
33 {
34 struct xsave_struct *ptr = v->arch.xsave_area;
36 asm volatile (
37 ".byte " REX_PREFIX "0x0f,0xae,0x37"
38 :
39 : "a" (-1), "d" (-1), "D"(ptr)
40 : "memory" );
41 }
43 static void xrstor(struct vcpu *v)
44 {
45 struct xsave_struct *ptr = v->arch.xsave_area;
47 asm volatile (
48 ".byte " REX_PREFIX "0x0f,0xae,0x2f"
49 :
50 : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) );
51 }
53 static void load_mxcsr(unsigned long val)
54 {
55 val &= 0xffbf;
56 asm volatile ( "ldmxcsr %0" : : "m" (val) );
57 }
59 static void init_fpu(void);
60 static void restore_fpu(struct vcpu *v);
62 void setup_fpu(struct vcpu *v)
63 {
64 ASSERT(!is_idle_vcpu(v));
66 /* Avoid recursion. */
67 clts();
69 if ( v->fpu_dirtied )
70 return;
72 if ( cpu_has_xsave )
73 {
74 /*
75 * XCR0 normally represents what guest OS set. In case of Xen itself,
76 * we set all supported feature mask before doing save/restore.
77 */
78 set_xcr0(v->arch.xcr0_accum);
79 xrstor(v);
80 set_xcr0(v->arch.xcr0);
81 }
82 else if ( v->fpu_initialised )
83 {
84 restore_fpu(v);
85 }
86 else
87 {
88 init_fpu();
89 }
91 v->fpu_initialised = 1;
92 v->fpu_dirtied = 1;
93 }
95 static void init_fpu(void)
96 {
97 asm volatile ( "fninit" );
98 if ( cpu_has_xmm )
99 load_mxcsr(0x1f80);
100 }
102 void save_init_fpu(struct vcpu *v)
103 {
104 unsigned long cr0;
105 char *fpu_ctxt;
107 if ( !v->fpu_dirtied )
108 return;
110 ASSERT(!is_idle_vcpu(v));
112 cr0 = read_cr0();
113 fpu_ctxt = v->arch.guest_context.fpu_ctxt.x;
115 /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
116 if ( cr0 & X86_CR0_TS )
117 clts();
119 if ( cpu_has_xsave )
120 {
121 /* XCR0 normally represents what guest OS set. In case of Xen itself,
122 * we set all accumulated feature mask before doing save/restore.
123 */
124 set_xcr0(v->arch.xcr0_accum);
125 if ( cpu_has_xsaveopt )
126 xsaveopt(v);
127 else
128 xsave(v);
129 set_xcr0(v->arch.xcr0);
130 }
131 else if ( cpu_has_fxsr )
132 {
133 #ifdef __i386__
134 asm volatile (
135 "fxsave %0"
136 : "=m" (*fpu_ctxt) );
137 #else /* __x86_64__ */
138 /*
139 * The only way to force fxsaveq on a wide range of gas versions. On
140 * older versions the rex64 prefix works only if we force an
141 * addressing mode that doesn't require extended registers.
142 */
143 asm volatile (
144 REX64_PREFIX "fxsave (%1)"
145 : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
146 #endif
148 /* Clear exception flags if FSW.ES is set. */
149 if ( unlikely(fpu_ctxt[2] & 0x80) )
150 asm volatile ("fnclex");
152 /*
153 * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
154 * is pending. Clear the x87 state here by setting it to fixed
155 * values. The hypervisor data segment can be sometimes 0 and
156 * sometimes new user value. Both should be ok. Use the FPU saved
157 * data block as a safe address because it should be in L1.
158 */
159 if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
160 {
161 asm volatile (
162 "emms\n\t" /* clear stack tags */
163 "fildl %0" /* load to clear state */
164 : : "m" (*fpu_ctxt) );
165 }
166 }
167 else
168 {
169 /* FWAIT is required to make FNSAVE synchronous. */
170 asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) );
171 }
173 v->fpu_dirtied = 0;
174 write_cr0(cr0|X86_CR0_TS);
175 }
177 static void restore_fpu(struct vcpu *v)
178 {
179 char *fpu_ctxt = v->arch.guest_context.fpu_ctxt.x;
181 /*
182 * FXRSTOR can fault if passed a corrupted data block. We handle this
183 * possibility, which may occur if the block was passed to us by control
184 * tools, by silently clearing the block.
185 */
186 if ( cpu_has_fxsr )
187 {
188 asm volatile (
189 #ifdef __i386__
190 "1: fxrstor %0 \n"
191 #else /* __x86_64__ */
192 /* See above for why the operands/constraints are this way. */
193 "1: " REX64_PREFIX "fxrstor (%2)\n"
194 #endif
195 ".section .fixup,\"ax\" \n"
196 "2: push %%"__OP"ax \n"
197 " push %%"__OP"cx \n"
198 " push %%"__OP"di \n"
199 " lea %0,%%"__OP"di \n"
200 " mov %1,%%ecx \n"
201 " xor %%eax,%%eax \n"
202 " rep ; stosl \n"
203 " pop %%"__OP"di \n"
204 " pop %%"__OP"cx \n"
205 " pop %%"__OP"ax \n"
206 " jmp 1b \n"
207 ".previous \n"
208 _ASM_EXTABLE(1b, 2b)
209 :
210 : "m" (*fpu_ctxt),
211 "i" (sizeof(v->arch.guest_context.fpu_ctxt)/4)
212 #ifdef __x86_64__
213 ,"cdaSDb" (fpu_ctxt)
214 #endif
215 );
216 }
217 else
218 {
219 asm volatile ( "frstor %0" : : "m" (v->arch.guest_context.fpu_ctxt) );
220 }
221 }
223 #define XSTATE_CPUID 0xd
224 #define XSAVE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */
226 /*
227 * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all
228 * the supported and enabled features on the processor, including the
229 * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
230 */
231 u32 xsave_cntxt_size;
233 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
234 u64 xfeature_mask;
236 /* Cached xcr0 for fast read */
237 DEFINE_PER_CPU(uint64_t, xcr0);
239 void xsave_init(void)
240 {
241 u32 eax, ebx, ecx, edx;
242 int cpu = smp_processor_id();
243 u32 min_size;
245 if ( boot_cpu_data.cpuid_level < XSTATE_CPUID )
246 return;
248 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
250 BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE);
251 BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE));
253 /* FP/SSE, XSAVE.HEADER, YMM */
254 min_size = XSAVE_AREA_MIN_SIZE;
255 if ( eax & XSTATE_YMM )
256 min_size += XSTATE_YMM_SIZE;
257 BUG_ON(ecx < min_size);
259 /*
260 * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
261 */
262 set_in_cr4(X86_CR4_OSXSAVE);
263 set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK);
264 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
266 if ( cpu == 0 )
267 {
268 /*
269 * xsave_cntxt_size is the max size required by enabled features.
270 * We know FP/SSE and YMM about eax, and nothing about edx at present.
271 */
272 xsave_cntxt_size = ebx;
273 xfeature_mask = eax + ((u64)edx << 32);
274 xfeature_mask &= XCNTXT_MASK;
275 printk("%s: using cntxt_size: 0x%x and states: 0x%"PRIx64"\n",
276 __func__, xsave_cntxt_size, xfeature_mask);
278 /* Check XSAVEOPT feature. */
279 cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
280 cpu_has_xsaveopt = !!(eax & XSAVEOPT);
281 }
282 else
283 {
284 BUG_ON(xsave_cntxt_size != ebx);
285 BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK));
286 }
287 }
289 int xsave_alloc_save_area(struct vcpu *v)
290 {
291 void *save_area;
293 if ( !cpu_has_xsave || is_idle_vcpu(v) )
294 return 0;
296 BUG_ON(xsave_cntxt_size < XSAVE_AREA_MIN_SIZE);
298 /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
299 save_area = _xmalloc(xsave_cntxt_size, 64);
300 if ( save_area == NULL )
301 return -ENOMEM;
303 memset(save_area, 0, xsave_cntxt_size);
304 ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */
305 *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */
307 v->arch.xsave_area = save_area;
308 v->arch.xcr0 = XSTATE_FP_SSE;
309 v->arch.xcr0_accum = XSTATE_FP_SSE;
311 return 0;
312 }
314 void xsave_free_save_area(struct vcpu *v)
315 {
316 xfree(v->arch.xsave_area);
317 v->arch.xsave_area = NULL;
318 }
320 /*
321 * Local variables:
322 * mode: C
323 * c-set-style: "BSD"
324 * c-basic-offset: 4
325 * tab-width: 4
326 * indent-tabs-mode: nil
327 * End:
328 */