Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/xstate.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  arch/x86/xstate.c
3
 *
4
 *  x86 extended state operations
5
 *
6
 */
7
8
#include <xen/percpu.h>
9
#include <xen/sched.h>
10
#include <asm/current.h>
11
#include <asm/processor.h>
12
#include <asm/hvm/support.h>
13
#include <asm/i387.h>
14
#include <asm/xstate.h>
15
#include <asm/asm_defns.h>
16
17
/*
18
 * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all
19
 * the supported and enabled features on the processor, including the
20
 * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
21
 */
22
static u32 __read_mostly xsave_cntxt_size;
23
24
/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
25
u64 __read_mostly xfeature_mask;
26
27
unsigned int *__read_mostly xstate_offsets;
28
unsigned int *__read_mostly xstate_sizes;
29
u64 __read_mostly xstate_align;
30
static unsigned int __read_mostly xstate_features;
31
32
uint32_t __read_mostly mxcsr_mask = 0x0000ffbf;
33
34
/* Cached xcr0 for fast read */
35
static DEFINE_PER_CPU(uint64_t, xcr0);
36
37
/* Because XCR0 is cached for each CPU, xsetbv() is not exposed. Users should 
38
 * use set_xcr0() instead.
39
 */
40
static inline bool xsetbv(u32 index, u64 xfeatures)
41
1.19k
{
42
1.19k
    u32 hi = xfeatures >> 32;
43
1.19k
    u32 lo = (u32)xfeatures;
44
1.19k
45
1.19k
    asm volatile ( "1: .byte 0x0f,0x01,0xd1\n"
46
1.19k
                   "3:                     \n"
47
1.19k
                   ".section .fixup,\"ax\" \n"
48
1.19k
                   "2: xor %0,%0           \n"
49
1.19k
                   "   jmp 3b              \n"
50
1.19k
                   ".previous              \n"
51
1.19k
                   _ASM_EXTABLE(1b, 2b)
52
1.19k
                   : "+a" (lo)
53
1.19k
                   : "c" (index), "d" (hi));
54
1.19k
    return lo != 0;
55
1.19k
}
56
57
bool set_xcr0(u64 xfeatures)
58
1.19k
{
59
1.19k
    if ( !xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures) )
60
0
        return false;
61
1.19k
    this_cpu(xcr0) = xfeatures;
62
1.19k
    return true;
63
1.19k
}
64
65
uint64_t get_xcr0(void)
66
37.0k
{
67
37.0k
    return this_cpu(xcr0);
68
37.0k
}
69
70
/* Cached xss for fast read */
71
static DEFINE_PER_CPU(uint64_t, xss);
72
73
void set_msr_xss(u64 xss)
74
0
{
75
0
    u64 *this_xss = &this_cpu(xss);
76
0
77
0
    if ( *this_xss != xss )
78
0
    {
79
0
        wrmsrl(MSR_IA32_XSS, xss);
80
0
        *this_xss = xss;
81
0
    }
82
0
}
83
84
uint64_t get_msr_xss(void)
85
0
{
86
0
    return this_cpu(xss);
87
0
}
88
89
static int setup_xstate_features(bool bsp)
90
12
{
91
12
    unsigned int leaf, eax, ebx, ecx, edx;
92
12
93
12
    if ( bsp )
94
1
    {
95
1
        xstate_features = flsl(xfeature_mask);
96
1
        xstate_offsets = xzalloc_array(unsigned int, xstate_features);
97
1
        if ( !xstate_offsets )
98
0
            return -ENOMEM;
99
1
100
1
        xstate_sizes = xzalloc_array(unsigned int, xstate_features);
101
1
        if ( !xstate_sizes )
102
0
            return -ENOMEM;
103
1
    }
104
12
105
24
    for ( leaf = 2; leaf < xstate_features; leaf++ )
106
12
    {
107
12
        if ( bsp )
108
1
        {
109
1
            cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf],
110
1
                        &xstate_offsets[leaf], &ecx, &edx);
111
1
            if ( ecx & XSTATE_ALIGN64 )
112
0
                __set_bit(leaf, &xstate_align);
113
1
        }
114
12
        else
115
11
        {
116
11
            cpuid_count(XSTATE_CPUID, leaf, &eax,
117
11
                        &ebx, &ecx, &edx);
118
11
            BUG_ON(eax != xstate_sizes[leaf]);
119
11
            BUG_ON(ebx != xstate_offsets[leaf]);
120
11
            BUG_ON(!(ecx & XSTATE_ALIGN64) != !test_bit(leaf, &xstate_align));
121
11
        }
122
12
    }
123
12
124
12
    return 0;
125
12
}
126
127
static void setup_xstate_comp(uint16_t *comp_offsets,
128
                              const uint64_t xcomp_bv)
129
0
{
130
0
    unsigned int i;
131
0
    unsigned int offset;
132
0
133
0
    /*
134
0
     * The FP xstates and SSE xstates are legacy states. They are always
135
0
     * in the fixed offsets in the xsave area in either compacted form
136
0
     * or standard form.
137
0
     */
138
0
    comp_offsets[0] = 0;
139
0
    comp_offsets[1] = XSAVE_SSE_OFFSET;
140
0
141
0
    comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
142
0
143
0
    offset = comp_offsets[2];
144
0
    for ( i = 2; i < xstate_features; i++ )
145
0
    {
146
0
        if ( (1ul << i) & xcomp_bv )
147
0
        {
148
0
            if ( test_bit(i, &xstate_align) )
149
0
                offset = ROUNDUP(offset, 64);
150
0
            comp_offsets[i] = offset;
151
0
            offset += xstate_sizes[i];
152
0
        }
153
0
    }
154
0
    ASSERT(offset <= xsave_cntxt_size);
155
0
}
156
157
/*
158
 * Serialise a vcpus xsave state into a representation suitable for the
159
 * toolstack.
160
 *
161
 * Internally a vcpus xsave state may be compressed or uncompressed, depending
162
 * on the features in use, but the ABI with the toolstack is strictly
163
 * uncompressed.
164
 *
165
 * It is the callers responsibility to ensure that there is xsave state to
166
 * serialise, and that the provided buffer is exactly the right size.
167
 */
168
void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size)
169
0
{
170
0
    const struct xsave_struct *xsave = v->arch.xsave_area;
171
0
    const void *src;
172
0
    uint16_t comp_offsets[sizeof(xfeature_mask)*8];
173
0
    u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
174
0
    u64 valid;
175
0
176
0
    /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */
177
0
    BUG_ON(!v->arch.xcr0_accum);
178
0
    /* Check there is the correct room to decompress into. */
179
0
    BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
180
0
181
0
    if ( !(xsave->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) )
182
0
    {
183
0
        memcpy(dest, xsave, size);
184
0
        return;
185
0
    }
186
0
187
0
    ASSERT(xsave_area_compressed(xsave));
188
0
    setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv);
189
0
190
0
    /*
191
0
     * Copy legacy XSAVE area and XSAVE hdr area.
192
0
     */
193
0
    memcpy(dest, xsave, XSTATE_AREA_MIN_SIZE);
194
0
    memset(dest + XSTATE_AREA_MIN_SIZE, 0, size - XSTATE_AREA_MIN_SIZE);
195
0
196
0
    ((struct xsave_struct *)dest)->xsave_hdr.xcomp_bv =  0;
197
0
198
0
    /*
199
0
     * Copy each region from the possibly compacted offset to the
200
0
     * non-compacted offset.
201
0
     */
202
0
    src = xsave;
203
0
    valid = xstate_bv & ~XSTATE_FP_SSE;
204
0
    while ( valid )
205
0
    {
206
0
        u64 feature = valid & -valid;
207
0
        unsigned int index = fls(feature) - 1;
208
0
209
0
        /*
210
0
         * We previously verified xstate_bv.  If there isn't valid
211
0
         * comp_offsets[] information, something is very broken.
212
0
         */
213
0
        BUG_ON(!comp_offsets[index]);
214
0
        BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
215
0
216
0
        memcpy(dest + xstate_offsets[index], src + comp_offsets[index],
217
0
               xstate_sizes[index]);
218
0
219
0
        valid &= ~feature;
220
0
    }
221
0
}
222
223
/*
224
 * Deserialise a toolstack's xsave state representation suitably for a vcpu.
225
 *
226
 * Internally a vcpus xsave state may be compressed or uncompressed, depending
227
 * on the features in use, but the ABI with the toolstack is strictly
228
 * uncompressed.
229
 *
230
 * It is the callers responsibility to ensure that the source buffer contains
231
 * xsave state, is uncompressed, and is exactly the right size.
232
 */
233
void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size)
234
0
{
235
0
    struct xsave_struct *xsave = v->arch.xsave_area;
236
0
    void *dest;
237
0
    uint16_t comp_offsets[sizeof(xfeature_mask)*8];
238
0
    u64 xstate_bv, valid;
239
0
240
0
    BUG_ON(!v->arch.xcr0_accum);
241
0
    BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
242
0
    ASSERT(!xsave_area_compressed(src));
243
0
244
0
    xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv;
245
0
246
0
    if ( !(v->arch.xcr0_accum & XSTATE_XSAVES_ONLY) )
247
0
    {
248
0
        memcpy(xsave, src, size);
249
0
        return;
250
0
    }
251
0
252
0
    /*
253
0
     * Copy legacy XSAVE area, to avoid complications with CPUID
254
0
     * leaves 0 and 1 in the loop below.
255
0
     */
256
0
    memcpy(xsave, src, FXSAVE_SIZE);
257
0
258
0
    /* Set XSTATE_BV and XCOMP_BV.  */
259
0
    xsave->xsave_hdr.xstate_bv = xstate_bv;
260
0
    xsave->xsave_hdr.xcomp_bv = v->arch.xcr0_accum | XSTATE_COMPACTION_ENABLED;
261
0
262
0
    setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv);
263
0
264
0
    /*
265
0
     * Copy each region from the non-compacted offset to the
266
0
     * possibly compacted offset.
267
0
     */
268
0
    dest = xsave;
269
0
    valid = xstate_bv & ~XSTATE_FP_SSE;
270
0
    while ( valid )
271
0
    {
272
0
        u64 feature = valid & -valid;
273
0
        unsigned int index = fls(feature) - 1;
274
0
275
0
        /*
276
0
         * We previously verified xstate_bv.  If we don't have valid
277
0
         * comp_offset[] information, something is very broken.
278
0
         */
279
0
        BUG_ON(!comp_offsets[index]);
280
0
        BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
281
0
282
0
        memcpy(dest + comp_offsets[index], src + xstate_offsets[index],
283
0
               xstate_sizes[index]);
284
0
285
0
        valid &= ~feature;
286
0
    }
287
0
}
288
289
void xsave(struct vcpu *v, uint64_t mask)
290
277
{
291
277
    struct xsave_struct *ptr = v->arch.xsave_area;
292
277
    uint32_t hmask = mask >> 32;
293
277
    uint32_t lmask = mask;
294
277
    unsigned int fip_width = v->domain->arch.x87_fip_width;
295
277
#define XSAVE(pfx) \
296
277
        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
297
0
            asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
298
0
                           : "=m" (*ptr) \
299
0
                           : "a" (lmask), "d" (hmask), "D" (ptr) ); \
300
277
        else \
301
277
            alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
302
277
                           ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
303
277
                           X86_FEATURE_XSAVEOPT, \
304
277
                           "=m" (*ptr), \
305
277
                           "a" (lmask), "d" (hmask), "D" (ptr))
306
277
307
277
    if ( fip_width == 8 || !(mask & XSTATE_FP) )
308
0
    {
309
0
        XSAVE("0x48,");
310
0
    }
311
277
    else if ( fip_width == 4 )
312
0
    {
313
0
        XSAVE("");
314
0
    }
315
277
    else
316
277
    {
317
277
        /*
318
277
         * FIP/FDP may not be written in some cases (e.g., if XSAVEOPT/XSAVES
319
277
         * is used, or on AMD CPUs if an exception isn't pending).
320
277
         *
321
277
         * To tell if the hardware writes these fields, poison the FIP field.
322
277
         * The poison is
323
277
         * a) non-canonical
324
277
         * b) non-zero for the reserved part of a 32-bit FCS:FIP
325
277
         * c) random with a vanishingly small probability to match a value the
326
277
         *    hardware may write (1e-19) even if it did not canonicalize the
327
277
         *    64-bit FIP or zero-extend the 16-bit FCS.
328
277
         */
329
277
        uint64_t orig_fip = ptr->fpu_sse.fip.addr;
330
277
        const uint64_t bad_fip = 0x6a3f5c4b13a533f6;
331
277
332
277
        ptr->fpu_sse.fip.addr = bad_fip;
333
277
334
277
        XSAVE("0x48,");
335
277
336
277
        /* FIP/FDP not updated? Restore the old FIP value. */
337
277
        if ( ptr->fpu_sse.fip.addr == bad_fip )
338
12
        {
339
12
            ptr->fpu_sse.fip.addr = orig_fip;
340
12
            return;
341
12
        }
342
277
343
277
        /*
344
277
         * If the FIP/FDP[63:32] are both zero, it is safe to use the
345
277
         * 32-bit restore to also restore the selectors.
346
277
         */
347
265
        if ( !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )
348
265
        {
349
265
            struct ix87_env fpu_env;
350
265
351
265
            asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
352
265
            ptr->fpu_sse.fip.sel = fpu_env.fcs;
353
265
            ptr->fpu_sse.fdp.sel = fpu_env.fds;
354
265
            fip_width = 4;
355
265
        }
356
265
        else
357
0
            fip_width = 8;
358
265
    }
359
277
#undef XSAVE
360
265
    if ( mask & XSTATE_FP )
361
265
        ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = fip_width;
362
265
}
363
364
void xrstor(struct vcpu *v, uint64_t mask)
365
286
{
366
286
    uint32_t hmask = mask >> 32;
367
286
    uint32_t lmask = mask;
368
286
    struct xsave_struct *ptr = v->arch.xsave_area;
369
286
    unsigned int faults, prev_faults;
370
286
371
286
    /*
372
286
     * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
373
286
     * is pending. Clear the x87 state here by setting it to fixed
374
286
     * values. The hypervisor data segment can be sometimes 0 and
375
286
     * sometimes new user value. Both should be ok. Use the FPU saved
376
286
     * data block as a safe address because it should be in L1.
377
286
     */
378
286
    if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) &&
379
276
         !(ptr->fpu_sse.fsw & ~ptr->fpu_sse.fcw & 0x003f) &&
380
276
         boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
381
0
        asm volatile ( "fnclex\n\t"        /* clear exceptions */
382
0
                       "ffree %%st(7)\n\t" /* clear stack tag */
383
0
                       "fildl %0"          /* load to clear state */
384
0
                       : : "m" (ptr->fpu_sse) );
385
286
386
286
    /*
387
286
     * XRSTOR can fault if passed a corrupted data block. We handle this
388
286
     * possibility, which may occur if the block was passed to us by control
389
286
     * tools or through VCPUOP_initialise, by silently adjusting state.
390
286
     */
391
0
    for ( prev_faults = faults = 0; ; prev_faults = faults )
392
286
    {
393
286
        switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
394
286
        {
395
0
            BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
396
0
#define _xrstor(insn) \
397
286
        asm volatile ( "1: .byte " insn "\n" \
398
286
                       "3:\n" \
399
286
                       "   .section .fixup,\"ax\"\n" \
400
286
                       "2: incl %[faults]\n" \
401
286
                       "   jmp 3b\n" \
402
286
                       "   .previous\n" \
403
286
                       _ASM_EXTABLE(1b, 2b) \
404
286
                       : [mem] "+m" (*ptr), [faults] "+g" (faults) \
405
286
                       : [lmask] "a" (lmask), [hmask] "d" (hmask), \
406
286
                         [ptr] "D" (ptr) )
407
0
408
0
#define XRSTOR(pfx) \
409
286
        if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
410
0
        { \
411
0
            if ( unlikely(!(ptr->xsave_hdr.xcomp_bv & \
412
0
                            XSTATE_COMPACTION_ENABLED)) ) \
413
0
            { \
414
0
                ASSERT(!ptr->xsave_hdr.xcomp_bv); \
415
0
                ptr->xsave_hdr.xcomp_bv = ptr->xsave_hdr.xstate_bv | \
416
0
                                          XSTATE_COMPACTION_ENABLED; \
417
0
            } \
418
0
            _xrstor(pfx "0x0f,0xc7,0x1f"); /* xrstors */ \
419
0
        } \
420
286
        else \
421
286
            _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */
422
0
423
31
        default:
424
31
            XRSTOR("0x48,");
425
31
            break;
426
255
        case 4: case 2:
427
255
            XRSTOR("");
428
255
            break;
429
286
#undef XRSTOR
430
286
#undef _xrstor
431
286
        }
432
288
        if ( likely(faults == prev_faults) )
433
288
            break;
434
288
#ifndef NDEBUG
435
0
        gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
436
0
                faults, ptr->fpu_sse.mxcsr);
437
0
        gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
438
0
                ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
439
0
        gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
440
0
                ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
441
0
        gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
442
0
                ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
443
0
        gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
444
0
                ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
445
0
#endif
446
0
        switch ( faults )
447
0
        {
448
0
        case 1: /* Stage 1: Reset state to be loaded. */
449
0
            ptr->xsave_hdr.xstate_bv &= ~mask;
450
0
            /*
451
0
             * Also try to eliminate fault reasons, even if this shouldn't be
452
0
             * needed here (other code should ensure the sanity of the data).
453
0
             */
454
0
            if ( ((mask & XSTATE_SSE) ||
455
0
                  ((mask & XSTATE_YMM) &&
456
0
                   !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
457
0
                ptr->fpu_sse.mxcsr &= mxcsr_mask;
458
0
            if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )
459
0
            {
460
0
                ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
461
0
                ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
462
0
                ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED;
463
0
            }
464
0
            else
465
0
            {
466
0
                ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
467
0
                ptr->xsave_hdr.xcomp_bv = 0;
468
0
            }
469
0
            memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
470
0
            continue;
471
0
472
0
        case 2: /* Stage 2: Reset all state. */
473
0
            ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
474
0
            ptr->xsave_hdr.xstate_bv = 0;
475
0
            ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
476
0
                                      ? XSTATE_COMPACTION_ENABLED : 0;
477
0
            continue;
478
0
        }
479
0
480
0
        domain_crash(current->domain);
481
0
        return;
482
0
    }
483
286
}
484
485
bool xsave_enabled(const struct vcpu *v)
486
0
{
487
0
    if ( !cpu_has_xsave )
488
0
        return false;
489
0
490
0
    ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
491
0
    ASSERT(v->arch.xsave_area);
492
0
493
0
    return !!v->arch.xcr0_accum;
494
0
}
495
496
int xstate_alloc_save_area(struct vcpu *v)
497
24
{
498
24
    struct xsave_struct *save_area;
499
24
    unsigned int size;
500
24
501
24
    if ( !cpu_has_xsave )
502
0
        return 0;
503
24
504
24
    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
505
24
    {
506
24
        size = xsave_cntxt_size;
507
24
        BUG_ON(size < XSTATE_AREA_MIN_SIZE);
508
24
    }
509
24
    else
510
0
    {
511
0
        /*
512
0
         * For idle vcpus on XSAVEC-capable CPUs allocate an area large
513
0
         * enough to save any individual extended state.
514
0
         */
515
0
        unsigned int i;
516
0
517
0
        for ( size = 0, i = 2; i < xstate_features; ++i )
518
0
            if ( size < xstate_sizes[i] )
519
0
                size = xstate_sizes[i];
520
0
        size += XSTATE_AREA_MIN_SIZE;
521
0
    }
522
24
523
24
    /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
524
24
    BUILD_BUG_ON(__alignof(*save_area) < 64);
525
24
    save_area = _xzalloc(size, __alignof(*save_area));
526
24
    if ( save_area == NULL )
527
0
        return -ENOMEM;
528
24
529
24
    /*
530
24
     * Set the memory image to default values, but don't force the context
531
24
     * to be loaded from memory (i.e. keep save_area->xsave_hdr.xstate_bv
532
24
     * clear).
533
24
     */
534
24
    save_area->fpu_sse.fcw = FCW_DEFAULT;
535
24
    save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
536
24
537
24
    v->arch.xsave_area = save_area;
538
24
    v->arch.xcr0 = 0;
539
24
    v->arch.xcr0_accum = 0;
540
24
541
24
    return 0;
542
24
}
543
544
void xstate_free_save_area(struct vcpu *v)
545
0
{
546
0
    xfree(v->arch.xsave_area);
547
0
    v->arch.xsave_area = NULL;
548
0
}
549
550
static unsigned int _xstate_ctxt_size(u64 xcr0)
551
12
{
552
12
    u64 act_xcr0 = get_xcr0();
553
12
    u32 eax, ebx = 0, ecx, edx;
554
12
    bool ok = set_xcr0(xcr0);
555
12
556
12
    ASSERT(ok);
557
12
    cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
558
12
    ASSERT(ebx <= ecx);
559
12
    ok = set_xcr0(act_xcr0);
560
12
    ASSERT(ok);
561
12
562
12
    return ebx;
563
12
}
564
565
/* Fastpath for common xstate size requests, avoiding reloads of xcr0. */
566
unsigned int xstate_ctxt_size(u64 xcr0)
567
1
{
568
1
    if ( xcr0 == xfeature_mask )
569
1
        return xsave_cntxt_size;
570
1
571
0
    if ( xcr0 == 0 )
572
0
        return 0;
573
0
574
0
    return _xstate_ctxt_size(xcr0);
575
0
}
576
577
/* Collect the information of processor's extended state */
578
void xstate_init(struct cpuinfo_x86 *c)
579
12
{
580
12
    static bool __initdata use_xsave = true;
581
12
    boolean_param("xsave", use_xsave);
582
12
583
12
    bool bsp = c == &boot_cpu_data;
584
12
    u32 eax, ebx, ecx, edx;
585
12
    u64 feature_mask;
586
12
587
12
    if ( (bsp && !use_xsave) ||
588
12
         boot_cpu_data.cpuid_level < XSTATE_CPUID )
589
0
    {
590
0
        BUG_ON(!bsp);
591
0
        setup_clear_cpu_cap(X86_FEATURE_XSAVE);
592
0
        return;
593
0
    }
594
12
595
12
    cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
596
12
597
12
    BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE);
598
12
    BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE));
599
12
    feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK;
600
12
601
12
    /*
602
12
     * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
603
12
     */
604
12
    set_in_cr4(X86_CR4_OSXSAVE);
605
12
    if ( !set_xcr0(feature_mask) )
606
0
        BUG();
607
12
608
12
    if ( bsp )
609
1
    {
610
1
        static typeof(current->arch.xsave_area->fpu_sse) __initdata ctxt;
611
1
612
1
        xfeature_mask = feature_mask;
613
1
        /*
614
1
         * xsave_cntxt_size is the max size required by enabled features.
615
1
         * We know FP/SSE and YMM about eax, and nothing about edx at present.
616
1
         */
617
1
        xsave_cntxt_size = _xstate_ctxt_size(feature_mask);
618
1
        printk("xstate: size: %#x and states: %#"PRIx64"\n",
619
1
               xsave_cntxt_size, xfeature_mask);
620
1
621
1
        asm ( "fxsave %0" : "=m" (ctxt) );
622
1
        if ( ctxt.mxcsr_mask )
623
1
            mxcsr_mask = ctxt.mxcsr_mask;
624
1
    }
625
12
    else
626
11
    {
627
11
        BUG_ON(xfeature_mask != feature_mask);
628
11
        BUG_ON(xsave_cntxt_size != _xstate_ctxt_size(feature_mask));
629
11
    }
630
12
631
12
    /* Check extended XSAVE features. */
632
12
    cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
633
12
634
12
    /* Mask out features not currently understood by Xen. */
635
12
    eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) |
636
12
            cpufeat_mask(X86_FEATURE_XSAVEC) |
637
12
            cpufeat_mask(X86_FEATURE_XGETBV1) |
638
12
            cpufeat_mask(X86_FEATURE_XSAVES));
639
12
640
12
    c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] = eax;
641
12
642
12
    BUG_ON(eax != boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)]);
643
12
644
12
    if ( setup_xstate_features(bsp) && bsp )
645
0
        BUG();
646
12
}
647
648
static bool valid_xcr0(u64 xcr0)
649
11
{
650
11
    /* FP must be unconditionally set. */
651
11
    if ( !(xcr0 & XSTATE_FP) )
652
0
        return false;
653
11
654
11
    /* YMM depends on SSE. */
655
11
    if ( (xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE) )
656
0
        return false;
657
11
658
11
    if ( xcr0 & (XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) )
659
0
    {
660
0
        /* OPMASK, ZMM, and HI_ZMM require YMM. */
661
0
        if ( !(xcr0 & XSTATE_YMM) )
662
0
            return false;
663
0
664
0
        /* OPMASK, ZMM, and HI_ZMM must be the same. */
665
0
        if ( ~xcr0 & (XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) )
666
0
            return false;
667
0
    }
668
11
669
11
    /* BNDREGS and BNDCSR must be the same. */
670
11
    return !(xcr0 & XSTATE_BNDREGS) == !(xcr0 & XSTATE_BNDCSR);
671
11
}
672
673
int validate_xstate(u64 xcr0, u64 xcr0_accum, const struct xsave_hdr *hdr)
674
0
{
675
0
    unsigned int i;
676
0
677
0
    if ( (hdr->xstate_bv & ~xcr0_accum) ||
678
0
         (xcr0 & ~xcr0_accum) ||
679
0
         !valid_xcr0(xcr0) ||
680
0
         !valid_xcr0(xcr0_accum) )
681
0
        return -EINVAL;
682
0
683
0
    if ( (xcr0_accum & ~xfeature_mask) ||
684
0
         hdr->xcomp_bv )
685
0
        return -EOPNOTSUPP;
686
0
687
0
    for ( i = 0; i < ARRAY_SIZE(hdr->reserved); ++i )
688
0
        if ( hdr->reserved[i] )
689
0
            return -EIO;
690
0
691
0
    return 0;
692
0
}
693
694
int handle_xsetbv(u32 index, u64 new_bv)
695
11
{
696
11
    struct vcpu *curr = current;
697
11
    u64 mask;
698
11
699
11
    if ( index != XCR_XFEATURE_ENABLED_MASK )
700
0
        return -EOPNOTSUPP;
701
11
702
11
    if ( (new_bv & ~xfeature_mask) || !valid_xcr0(new_bv) )
703
0
        return -EINVAL;
704
11
705
11
    /* XCR0.PKRU is disabled on PV mode. */
706
11
    if ( is_pv_vcpu(curr) && (new_bv & XSTATE_PKRU) )
707
0
        return -EOPNOTSUPP;
708
11
709
11
    if ( !set_xcr0(new_bv) )
710
0
        return -EFAULT;
711
11
712
11
    mask = new_bv & ~curr->arch.xcr0_accum;
713
11
    curr->arch.xcr0 = new_bv;
714
11
    curr->arch.xcr0_accum |= new_bv;
715
11
716
11
    /* LWP sets nonlazy_xstate_used independently. */
717
11
    if ( new_bv & (XSTATE_NONLAZY & ~XSTATE_LWP) )
718
0
        curr->arch.nonlazy_xstate_used = 1;
719
11
720
10
    mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY;
721
11
    if ( mask )
722
10
    {
723
10
        unsigned long cr0 = read_cr0();
724
10
725
10
        clts();
726
10
        if ( curr->fpu_dirtied )
727
11
            asm ( "stmxcsr %0" : "=m" (curr->arch.xsave_area->fpu_sse.mxcsr) );
728
18.4E
        else if ( xstate_all(curr) )
729
0
        {
730
0
            /* See the comment in i387.c:vcpu_restore_fpu_eager(). */
731
0
            mask |= XSTATE_LAZY;
732
0
            curr->fpu_initialised = 1;
733
0
            curr->fpu_dirtied = 1;
734
0
            cr0 &= ~X86_CR0_TS;
735
0
        }
736
10
        xrstor(curr, mask);
737
10
        if ( cr0 & X86_CR0_TS )
738
0
            write_cr0(cr0);
739
10
    }
740
11
741
11
    return 0;
742
11
}
743
744
uint64_t read_bndcfgu(void)
745
0
{
746
0
    unsigned long cr0 = read_cr0();
747
0
    struct xsave_struct *xstate
748
0
        = idle_vcpu[smp_processor_id()]->arch.xsave_area;
749
0
    const struct xstate_bndcsr *bndcsr;
750
0
751
0
    ASSERT(cpu_has_mpx);
752
0
    clts();
753
0
754
0
    if ( cpu_has_xsavec )
755
0
    {
756
0
        asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
757
0
              : "=m" (*xstate)
758
0
              : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) );
759
0
760
0
        bndcsr = (void *)(xstate + 1);
761
0
    }
762
0
    else
763
0
    {
764
0
        asm ( ".byte 0x0f,0xae,0x27\n" /* xsave */
765
0
              : "=m" (*xstate)
766
0
              : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) );
767
0
768
0
        bndcsr = (void *)xstate + xstate_offsets[_XSTATE_BNDCSR];
769
0
    }
770
0
771
0
    if ( cr0 & X86_CR0_TS )
772
0
        write_cr0(cr0);
773
0
774
0
    return xstate->xsave_hdr.xstate_bv & XSTATE_BNDCSR ? bndcsr->bndcfgu : 0;
775
0
}
776
777
void xstate_set_init(uint64_t mask)
778
0
{
779
0
    unsigned long cr0 = read_cr0();
780
0
    unsigned long xcr0 = this_cpu(xcr0);
781
0
    struct vcpu *v = idle_vcpu[smp_processor_id()];
782
0
    struct xsave_struct *xstate = v->arch.xsave_area;
783
0
784
0
    if ( ~xfeature_mask & mask )
785
0
    {
786
0
        ASSERT_UNREACHABLE();
787
0
        return;
788
0
    }
789
0
790
0
    if ( (~xcr0 & mask) && !set_xcr0(xcr0 | mask) )
791
0
        return;
792
0
793
0
    clts();
794
0
795
0
    memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
796
0
    xrstor(v, mask);
797
0
798
0
    if ( cr0 & X86_CR0_TS )
799
0
        write_cr0(cr0);
800
0
801
0
    if ( (~xcr0 & mask) && !set_xcr0(xcr0) )
802
0
        BUG();
803
0
}
804
805
/*
806
 * Local variables:
807
 * mode: C
808
 * c-file-style: "BSD"
809
 * c-basic-offset: 4
810
 * tab-width: 4
811
 * indent-tabs-mode: nil
812
 * End:
813
 */