Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/cpu/vpmu_intel.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
3
 *
4
 * Copyright (c) 2007, Intel Corporation.
5
 *
6
 * This program is free software; you can redistribute it and/or modify it
7
 * under the terms and conditions of the GNU General Public License,
8
 * version 2, as published by the Free Software Foundation.
9
 *
10
 * This program is distributed in the hope it will be useful, but WITHOUT
11
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13
 * more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along with
16
 * this program; If not, see <http://www.gnu.org/licenses/>.
17
 *
18
 * Author: Haitao Shan <haitao.shan@intel.com>
19
 */
20
21
#include <xen/sched.h>
22
#include <xen/xenoprof.h>
23
#include <xen/irq.h>
24
#include <asm/system.h>
25
#include <asm/regs.h>
26
#include <asm/types.h>
27
#include <asm/apic.h>
28
#include <asm/traps.h>
29
#include <asm/msr.h>
30
#include <asm/msr-index.h>
31
#include <asm/vpmu.h>
32
#include <asm/hvm/support.h>
33
#include <asm/hvm/vlapic.h>
34
#include <asm/hvm/vmx/vmx.h>
35
#include <asm/hvm/vmx/vmcs.h>
36
#include <public/sched.h>
37
#include <public/hvm/save.h>
38
#include <public/pmu.h>
39
40
/*
41
 * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
42
 * instruction.
43
 * cpuid 0xa - Architectural Performance Monitoring Leaf
44
 * Register eax
45
 */
46
#define PMU_VERSION_SHIFT        0  /* Version ID */
47
#define PMU_VERSION_BITS         8  /* 8 bits 0..7 */
48
#define PMU_VERSION_MASK         (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
49
50
#define PMU_GENERAL_NR_SHIFT     8  /* Number of general pmu registers */
51
#define PMU_GENERAL_NR_BITS      8  /* 8 bits 8..15 */
52
#define PMU_GENERAL_NR_MASK      (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
53
54
#define PMU_GENERAL_WIDTH_SHIFT 16  /* Width of general pmu registers */
55
#define PMU_GENERAL_WIDTH_BITS   8  /* 8 bits 16..23 */
56
#define PMU_GENERAL_WIDTH_MASK  (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
57
/* Register edx */
58
#define PMU_FIXED_NR_SHIFT       0  /* Number of fixed pmu registers */
59
#define PMU_FIXED_NR_BITS        5  /* 5 bits 0..4 */
60
#define PMU_FIXED_NR_MASK        (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
61
62
#define PMU_FIXED_WIDTH_SHIFT    5  /* Width of fixed pmu registers */
63
#define PMU_FIXED_WIDTH_BITS     8  /* 8 bits 5..12 */
64
#define PMU_FIXED_WIDTH_MASK     (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
65
66
/* Alias registers (0x4c1) for full-width writes to PMCs */
67
0
#define MSR_PMC_ALIAS_MASK       (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
68
static bool_t __read_mostly full_width_write;
69
70
/*
71
 * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
72
 * counters. 4 bits for every counter.
73
 */
74
0
#define FIXED_CTR_CTRL_BITS 4
75
0
#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
76
0
#define FIXED_CTR_CTRL_ANYTHREAD_MASK 0x4
77
78
0
#define ARCH_CNTR_ENABLED   (1ULL << 22)
79
0
#define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
80
81
/* Number of general-purpose and fixed performance counters */
82
static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
83
84
/* Masks used for testing whether and MSR is valid */
85
0
#define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21) | ARCH_CNTR_PIN_CONTROL)
86
static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
87
static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask;
88
89
/* Total size of PMU registers block (copied to/from PV(H) guest) */
90
static unsigned int __read_mostly regs_sz;
91
/* Offset into context of the beginning of PMU register block */
92
static const unsigned int regs_off =
93
        sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
94
        sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
95
96
/*
97
 * QUIRK to workaround an issue on various family 6 cpus.
98
 * The issue leads to endless PMC interrupt loops on the processor.
99
 * If the interrupt handler is running and a pmc reaches the value 0, this
100
 * value remains forever and it triggers immediately a new interrupt after
101
 * finishing the handler.
102
 * A workaround is to read all flagged counters and if the value is 0 write
103
 * 1 (or another value != 0) into it.
104
 * There exist no errata and the real cause of this behaviour is unknown.
105
 */
106
bool_t __read_mostly is_pmc_quirk;
107
108
static void check_pmc_quirk(void)
109
0
{
110
0
    if ( current_cpu_data.x86 == 6 )
111
0
        is_pmc_quirk = 1;
112
0
    else
113
0
        is_pmc_quirk = 0;    
114
0
}
115
116
static void handle_pmc_quirk(u64 msr_content)
117
0
{
118
0
    int i;
119
0
    u64 val;
120
0
121
0
    if ( !is_pmc_quirk )
122
0
        return;
123
0
124
0
    val = msr_content;
125
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
126
0
    {
127
0
        if ( val & 0x1 )
128
0
        {
129
0
            u64 cnt;
130
0
            rdmsrl(MSR_P6_PERFCTR(i), cnt);
131
0
            if ( cnt == 0 )
132
0
                wrmsrl(MSR_P6_PERFCTR(i), 1);
133
0
        }
134
0
        val >>= 1;
135
0
    }
136
0
    val = msr_content >> 32;
137
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
138
0
    {
139
0
        if ( val & 0x1 )
140
0
        {
141
0
            u64 cnt;
142
0
            rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
143
0
            if ( cnt == 0 )
144
0
                wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
145
0
        }
146
0
        val >>= 1;
147
0
    }
148
0
}
149
150
/*
151
 * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
152
 */
153
static int core2_get_arch_pmc_count(void)
154
0
{
155
0
    u32 eax;
156
0
157
0
    eax = cpuid_eax(0xa);
158
0
    return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
159
0
}
160
161
/*
162
 * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
163
 */
164
static int core2_get_fixed_pmc_count(void)
165
0
{
166
0
    u32 edx = cpuid_edx(0xa);
167
0
168
0
    return MASK_EXTR(edx, PMU_FIXED_NR_MASK);
169
0
}
170
171
/* edx bits 5-12: Bit width of fixed-function performance counters  */
172
static int core2_get_bitwidth_fix_count(void)
173
0
{
174
0
    u32 edx;
175
0
176
0
    edx = cpuid_edx(0xa);
177
0
    return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
178
0
}
179
180
static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
181
0
{
182
0
    u32 msr_index_pmc;
183
0
184
0
    switch ( msr_index )
185
0
    {
186
0
    case MSR_CORE_PERF_FIXED_CTR_CTRL:
187
0
    case MSR_IA32_DS_AREA:
188
0
    case MSR_IA32_PEBS_ENABLE:
189
0
        *type = MSR_TYPE_CTRL;
190
0
        return 1;
191
0
192
0
    case MSR_CORE_PERF_GLOBAL_CTRL:
193
0
    case MSR_CORE_PERF_GLOBAL_STATUS:
194
0
    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
195
0
        *type = MSR_TYPE_GLOBAL;
196
0
        return 1;
197
0
198
0
    default:
199
0
200
0
        if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
201
0
             (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
202
0
        {
203
0
            *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
204
0
            *type = MSR_TYPE_COUNTER;
205
0
            return 1;
206
0
        }
207
0
208
0
        if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
209
0
             (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
210
0
        {
211
0
            *index = msr_index - MSR_P6_EVNTSEL(0);
212
0
            *type = MSR_TYPE_ARCH_CTRL;
213
0
            return 1;
214
0
        }
215
0
216
0
        msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
217
0
        if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
218
0
             (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
219
0
        {
220
0
            *type = MSR_TYPE_ARCH_COUNTER;
221
0
            *index = msr_index_pmc - MSR_IA32_PERFCTR0;
222
0
            return 1;
223
0
        }
224
0
        return 0;
225
0
    }
226
0
}
227
228
static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
229
0
{
230
0
    unsigned int i;
231
0
232
0
    /* Allow Read/Write PMU Counters MSR Directly. */
233
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
234
0
        vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
235
0
236
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
237
0
    {
238
0
        vmx_clear_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
239
0
240
0
        if ( full_width_write )
241
0
            vmx_clear_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
242
0
    }
243
0
244
0
    /* Allow Read PMU Non-global Controls Directly. */
245
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
246
0
        vmx_clear_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
247
0
248
0
    vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
249
0
    vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
250
0
}
251
252
static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
253
0
{
254
0
    unsigned int i;
255
0
256
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
257
0
        vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
258
0
259
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
260
0
    {
261
0
        vmx_set_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
262
0
263
0
        if ( full_width_write )
264
0
            vmx_set_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
265
0
    }
266
0
267
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
268
0
        vmx_set_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
269
0
270
0
    vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
271
0
    vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
272
0
}
273
274
static inline void __core2_vpmu_save(struct vcpu *v)
275
0
{
276
0
    int i;
277
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
278
0
    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
279
0
    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
280
0
        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
281
0
282
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
283
0
        rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
284
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
285
0
        rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
286
0
287
0
    if ( !is_hvm_vcpu(v) )
288
0
        rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
289
0
}
290
291
static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
292
0
{
293
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
294
0
295
0
    if ( !is_hvm_vcpu(v) )
296
0
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
297
0
298
0
    if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
299
0
        return 0;
300
0
301
0
    __core2_vpmu_save(v);
302
0
303
0
    /* Unset PMU MSR bitmap to trap lazy load. */
304
0
    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && is_hvm_vcpu(v) &&
305
0
         cpu_has_vmx_msr_bitmap )
306
0
        core2_vpmu_unset_msr_bitmap(v);
307
0
308
0
    if ( to_guest )
309
0
    {
310
0
        ASSERT(!has_vlapic(v->domain));
311
0
        memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
312
0
               vpmu->context + regs_off, regs_sz);
313
0
    }
314
0
315
0
    return 1;
316
0
}
317
318
static inline void __core2_vpmu_load(struct vcpu *v)
319
0
{
320
0
    unsigned int i, pmc_start;
321
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
322
0
    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
323
0
    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
324
0
        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
325
0
326
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
327
0
        wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
328
0
329
0
    if ( full_width_write )
330
0
        pmc_start = MSR_IA32_A_PERFCTR0;
331
0
    else
332
0
        pmc_start = MSR_IA32_PERFCTR0;
333
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
334
0
    {
335
0
        wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
336
0
        wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
337
0
    }
338
0
339
0
    wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
340
0
    if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
341
0
        wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
342
0
343
0
    if ( !is_hvm_vcpu(v) )
344
0
    {
345
0
        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
346
0
        core2_vpmu_cxt->global_ovf_ctrl = 0;
347
0
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
348
0
    }
349
0
}
350
351
static int core2_vpmu_verify(struct vcpu *v)
352
0
{
353
0
    unsigned int i;
354
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
355
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
356
0
    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
357
0
    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
358
0
        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
359
0
    uint64_t fixed_ctrl;
360
0
    uint64_t *priv_context = vpmu->priv_context;
361
0
    uint64_t enabled_cntrs = 0;
362
0
363
0
    if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
364
0
        return -EINVAL;
365
0
    if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask )
366
0
        return -EINVAL;
367
0
    if ( core2_vpmu_cxt->pebs_enable )
368
0
        return -EINVAL;
369
0
370
0
    fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
371
0
    if ( fixed_ctrl & fixed_ctrl_mask )
372
0
        return -EINVAL;
373
0
374
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
375
0
    {
376
0
        if ( fixed_counters[i] & fixed_counters_mask )
377
0
            return -EINVAL;
378
0
        if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
379
0
            enabled_cntrs |= (1ULL << i);
380
0
    }
381
0
    enabled_cntrs <<= 32;
382
0
383
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
384
0
    {
385
0
        uint64_t control = xen_pmu_cntr_pair[i].control;
386
0
387
0
        if ( control & ARCH_CTRL_MASK )
388
0
            return -EINVAL;
389
0
        if ( control & ARCH_CNTR_ENABLED )
390
0
            enabled_cntrs |= (1ULL << i);
391
0
    }
392
0
393
0
    if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) &&
394
0
         !(is_hvm_vcpu(v)
395
0
           ? is_canonical_address(core2_vpmu_cxt->ds_area)
396
0
           : __addr_ok(core2_vpmu_cxt->ds_area)) )
397
0
        return -EINVAL;
398
0
399
0
    if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
400
0
         (core2_vpmu_cxt->ds_area != 0) )
401
0
        vpmu_set(vpmu, VPMU_RUNNING);
402
0
    else
403
0
        vpmu_reset(vpmu, VPMU_RUNNING);
404
0
405
0
    *priv_context = enabled_cntrs;
406
0
407
0
    return 0;
408
0
}
409
410
static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
411
0
{
412
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
413
0
414
0
    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
415
0
        return 0;
416
0
417
0
    if ( from_guest )
418
0
    {
419
0
        int ret;
420
0
421
0
        ASSERT(!has_vlapic(v->domain));
422
0
423
0
        memcpy(vpmu->context + regs_off,
424
0
               (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
425
0
               regs_sz);
426
0
427
0
        ret = core2_vpmu_verify(v);
428
0
        if ( ret )
429
0
        {
430
0
            /*
431
0
             * Not necessary since we should never load the context until
432
0
             * guest provides valid values. But just to be safe.
433
0
             */
434
0
            memset(vpmu->context + regs_off, 0, regs_sz);
435
0
            return ret;
436
0
        }
437
0
    }
438
0
439
0
    vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
440
0
441
0
    __core2_vpmu_load(v);
442
0
443
0
    return 0;
444
0
}
445
446
static int core2_vpmu_alloc_resource(struct vcpu *v)
447
0
{
448
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
449
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
450
0
    uint64_t *p = NULL;
451
0
452
0
    if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
453
0
        return 0;
454
0
455
0
    if ( is_hvm_vcpu(v) )
456
0
    {
457
0
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
458
0
        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
459
0
            goto out_err;
460
0
461
0
        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
462
0
            goto out_err;
463
0
        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
464
0
    }
465
0
466
0
    core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
467
0
                                   sizeof(uint64_t) * fixed_pmc_cnt +
468
0
                                   sizeof(struct xen_pmu_cntr_pair) *
469
0
                                   arch_pmc_cnt);
470
0
    p = xzalloc(uint64_t);
471
0
    if ( !core2_vpmu_cxt || !p )
472
0
        goto out_err;
473
0
474
0
    core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
475
0
    core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
476
0
                                    sizeof(uint64_t) * fixed_pmc_cnt;
477
0
478
0
    vpmu->context = core2_vpmu_cxt;
479
0
    vpmu->priv_context = p;
480
0
481
0
    if ( !has_vlapic(v->domain) )
482
0
    {
483
0
        /* Copy fixed/arch register offsets to shared area */
484
0
        ASSERT(vpmu->xenpmu_data);
485
0
        memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
486
0
    }
487
0
488
0
    vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
489
0
490
0
    return 1;
491
0
492
0
out_err:
493
0
    release_pmu_ownership(PMU_OWNER_HVM);
494
0
495
0
    xfree(core2_vpmu_cxt);
496
0
    xfree(p);
497
0
498
0
    printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
499
0
           v->vcpu_id, v->domain->domain_id);
500
0
501
0
    return 0;
502
0
}
503
504
static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
505
0
{
506
0
    struct vpmu_struct *vpmu = vcpu_vpmu(current);
507
0
508
0
    if ( !is_core2_vpmu_msr(msr_index, type, index) )
509
0
        return 0;
510
0
511
0
    if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
512
0
         !core2_vpmu_alloc_resource(current) )
513
0
        return 0;
514
0
515
0
    /* Do the lazy load staff. */
516
0
    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
517
0
    {
518
0
        __core2_vpmu_load(current);
519
0
        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
520
0
521
0
        if ( is_hvm_vcpu(current) && cpu_has_vmx_msr_bitmap )
522
0
            core2_vpmu_set_msr_bitmap(current);
523
0
    }
524
0
    return 1;
525
0
}
526
527
static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
528
                               uint64_t supported)
529
0
{
530
0
    int i, tmp;
531
0
    int type = -1, index = -1;
532
0
    struct vcpu *v = current;
533
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
534
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
535
0
    uint64_t *enabled_cntrs;
536
0
537
0
    if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
538
0
    {
539
0
        /* Special handling for BTS */
540
0
        if ( msr == MSR_IA32_DEBUGCTLMSR )
541
0
        {
542
0
            supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
543
0
                         IA32_DEBUGCTLMSR_BTINT;
544
0
545
0
            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
546
0
                supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
547
0
                             IA32_DEBUGCTLMSR_BTS_OFF_USR;
548
0
            if ( !(msr_content & ~supported) &&
549
0
                 vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
550
0
                return 0;
551
0
            if ( (msr_content & supported) &&
552
0
                 !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
553
0
                printk(XENLOG_G_WARNING
554
0
                       "%pv: Debug Store unsupported on this CPU\n",
555
0
                       current);
556
0
        }
557
0
        return -EINVAL;
558
0
    }
559
0
560
0
    ASSERT(!supported);
561
0
562
0
    if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
563
0
        /* Writing unsupported bits to a fixed counter */
564
0
        return -EINVAL;
565
0
566
0
    core2_vpmu_cxt = vpmu->context;
567
0
    enabled_cntrs = vpmu->priv_context;
568
0
    switch ( msr )
569
0
    {
570
0
    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
571
0
        if ( msr_content & global_ovf_ctrl_mask )
572
0
            return -EINVAL;
573
0
        core2_vpmu_cxt->global_status &= ~msr_content;
574
0
        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
575
0
        return 0;
576
0
    case MSR_CORE_PERF_GLOBAL_STATUS:
577
0
        gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
578
0
                 "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
579
0
        return -EINVAL;
580
0
    case MSR_IA32_PEBS_ENABLE:
581
0
        if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
582
0
                              XENPMU_FEATURE_ARCH_ONLY) )
583
0
            return -EINVAL;
584
0
        if ( msr_content )
585
0
            /* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */
586
0
            return -EINVAL;
587
0
        return 0;
588
0
    case MSR_IA32_DS_AREA:
589
0
        if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
590
0
            return -EINVAL;
591
0
        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
592
0
        {
593
0
            if ( !(is_hvm_vcpu(v) ? is_canonical_address(msr_content)
594
0
                                  : __addr_ok(msr_content)) )
595
0
            {
596
0
                gdprintk(XENLOG_WARNING,
597
0
                         "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
598
0
                         msr_content);
599
0
                return -EINVAL;
600
0
            }
601
0
            core2_vpmu_cxt->ds_area = msr_content;
602
0
            break;
603
0
        }
604
0
        gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
605
0
        return 0;
606
0
    case MSR_CORE_PERF_GLOBAL_CTRL:
607
0
        if ( msr_content & global_ctrl_mask )
608
0
            return -EINVAL;
609
0
        core2_vpmu_cxt->global_ctrl = msr_content;
610
0
        break;
611
0
    case MSR_CORE_PERF_FIXED_CTR_CTRL:
612
0
        if ( msr_content & fixed_ctrl_mask )
613
0
            return -EINVAL;
614
0
615
0
        if ( is_hvm_vcpu(v) )
616
0
            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
617
0
                               &core2_vpmu_cxt->global_ctrl);
618
0
        else
619
0
            rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
620
0
        *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
621
0
        if ( msr_content != 0 )
622
0
        {
623
0
            u64 val = msr_content;
624
0
            for ( i = 0; i < fixed_pmc_cnt; i++ )
625
0
            {
626
0
                if ( val & 3 )
627
0
                    *enabled_cntrs |= (1ULL << 32) << i;
628
0
                val >>= FIXED_CTR_CTRL_BITS;
629
0
            }
630
0
        }
631
0
632
0
        core2_vpmu_cxt->fixed_ctrl = msr_content;
633
0
        break;
634
0
    default:
635
0
        tmp = msr - MSR_P6_EVNTSEL(0);
636
0
        if ( tmp >= 0 && tmp < arch_pmc_cnt )
637
0
        {
638
0
            bool_t blocked = 0;
639
0
            uint64_t umaskevent = msr_content & MSR_IA32_CMT_EVTSEL_UE_MASK;
640
0
            struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
641
0
                vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
642
0
643
0
            if ( msr_content & ARCH_CTRL_MASK )
644
0
                return -EINVAL;
645
0
646
0
            /* PMC filters */
647
0
            if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
648
0
                                  XENPMU_FEATURE_ARCH_ONLY) )
649
0
            {
650
0
                blocked = 1;
651
0
                switch ( umaskevent )
652
0
                {
653
0
                /*
654
0
                 * See the Pre-Defined Architectural Performance Events table
655
0
                 * from the Intel 64 and IA-32 Architectures Software
656
0
                 * Developer's Manual, Volume 3B, System Programming Guide,
657
0
                 * Part 2.
658
0
                 */
659
0
                case 0x003c:  /* UnHalted Core Cycles */
660
0
                case 0x013c:  /* UnHalted Reference Cycles */
661
0
                case 0x00c0:  /* Instructions Retired */
662
0
                    blocked = 0;
663
0
                    break;
664
0
                }
665
0
            }
666
0
667
0
            if ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY )
668
0
            {
669
0
                /* Additional counters beyond IPC only; blocked already set. */
670
0
                switch ( umaskevent )
671
0
                {
672
0
                case 0x4f2e:  /* Last Level Cache References */
673
0
                case 0x412e:  /* Last Level Cache Misses */
674
0
                case 0x00c4:  /* Branch Instructions Retired */
675
0
                case 0x00c5:  /* All Branch Mispredict Retired */
676
0
                    blocked = 0;
677
0
                    break;
678
0
               }
679
0
            }
680
0
681
0
            if ( blocked )
682
0
                return -EINVAL;
683
0
684
0
            if ( is_hvm_vcpu(v) )
685
0
                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
686
0
                                   &core2_vpmu_cxt->global_ctrl);
687
0
            else
688
0
                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
689
0
690
0
            if ( msr_content & ARCH_CNTR_ENABLED )
691
0
                *enabled_cntrs |= 1ULL << tmp;
692
0
            else
693
0
                *enabled_cntrs &= ~(1ULL << tmp);
694
0
695
0
            xen_pmu_cntr_pair[tmp].control = msr_content;
696
0
        }
697
0
    }
698
0
699
0
    if ( type != MSR_TYPE_GLOBAL )
700
0
        wrmsrl(msr, msr_content);
701
0
    else
702
0
    {
703
0
        if ( is_hvm_vcpu(v) )
704
0
            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
705
0
        else
706
0
            wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
707
0
    }
708
0
709
0
    if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
710
0
         (core2_vpmu_cxt->ds_area != 0) )
711
0
        vpmu_set(vpmu, VPMU_RUNNING);
712
0
    else
713
0
        vpmu_reset(vpmu, VPMU_RUNNING);
714
0
715
0
    return 0;
716
0
}
717
718
static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
719
0
{
720
0
    int type = -1, index = -1;
721
0
    struct vcpu *v = current;
722
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
723
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
724
0
725
0
    if ( core2_vpmu_msr_common_check(msr, &type, &index) )
726
0
    {
727
0
        core2_vpmu_cxt = vpmu->context;
728
0
        switch ( msr )
729
0
        {
730
0
        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
731
0
            *msr_content = 0;
732
0
            break;
733
0
        case MSR_CORE_PERF_GLOBAL_STATUS:
734
0
            *msr_content = core2_vpmu_cxt->global_status;
735
0
            break;
736
0
        case MSR_CORE_PERF_GLOBAL_CTRL:
737
0
            if ( is_hvm_vcpu(v) )
738
0
                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
739
0
            else
740
0
                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
741
0
            break;
742
0
        default:
743
0
            rdmsrl(msr, *msr_content);
744
0
        }
745
0
    }
746
0
    else if ( msr == MSR_IA32_MISC_ENABLE )
747
0
    {
748
0
        /* Extension for BTS */
749
0
        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
750
0
            *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
751
0
        *msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
752
0
    }
753
0
754
0
    return 0;
755
0
}
756
757
/* Dump vpmu info on console, called in the context of keyhandler 'q'. */
758
static void core2_vpmu_dump(const struct vcpu *v)
759
0
{
760
0
    const struct vpmu_struct *vpmu = vcpu_vpmu(v);
761
0
    unsigned int i;
762
0
    const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
763
0
    u64 val;
764
0
    uint64_t *fixed_counters;
765
0
    struct xen_pmu_cntr_pair *cntr_pair;
766
0
767
0
    if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
768
0
         return;
769
0
770
0
    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
771
0
    {
772
0
        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
773
0
            printk("    vPMU loaded\n");
774
0
        else
775
0
            printk("    vPMU allocated\n");
776
0
        return;
777
0
    }
778
0
779
0
    printk("    vPMU running\n");
780
0
781
0
    cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
782
0
    fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
783
0
784
0
    /* Print the contents of the counter and its configuration msr. */
785
0
    for ( i = 0; i < arch_pmc_cnt; i++ )
786
0
        printk("      general_%d: 0x%016lx ctrl: 0x%016lx\n",
787
0
            i, cntr_pair[i].counter, cntr_pair[i].control);
788
0
789
0
    /*
790
0
     * The configuration of the fixed counter is 4 bits each in the
791
0
     * MSR_CORE_PERF_FIXED_CTR_CTRL.
792
0
     */
793
0
    val = core2_vpmu_cxt->fixed_ctrl;
794
0
    for ( i = 0; i < fixed_pmc_cnt; i++ )
795
0
    {
796
0
        printk("      fixed_%d:   0x%016lx ctrl: %#lx\n",
797
0
               i, fixed_counters[i],
798
0
               val & FIXED_CTR_CTRL_MASK);
799
0
        val >>= FIXED_CTR_CTRL_BITS;
800
0
    }
801
0
}
802
803
static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
804
0
{
805
0
    struct vcpu *v = current;
806
0
    u64 msr_content;
807
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
808
0
    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
809
0
810
0
    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
811
0
    if ( msr_content )
812
0
    {
813
0
        if ( is_pmc_quirk )
814
0
            handle_pmc_quirk(msr_content);
815
0
        core2_vpmu_cxt->global_status |= msr_content;
816
0
        msr_content &= ~global_ovf_ctrl_mask;
817
0
        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
818
0
    }
819
0
    else
820
0
    {
821
0
        /* No PMC overflow but perhaps a Trace Message interrupt. */
822
0
        __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
823
0
        if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
824
0
            return 0;
825
0
    }
826
0
827
0
    return 1;
828
0
}
829
830
static void core2_vpmu_destroy(struct vcpu *v)
831
0
{
832
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
833
0
834
0
    xfree(vpmu->context);
835
0
    vpmu->context = NULL;
836
0
    xfree(vpmu->priv_context);
837
0
    vpmu->priv_context = NULL;
838
0
    if ( is_hvm_vcpu(v) && cpu_has_vmx_msr_bitmap )
839
0
        core2_vpmu_unset_msr_bitmap(v);
840
0
    release_pmu_ownership(PMU_OWNER_HVM);
841
0
    vpmu_clear(vpmu);
842
0
}
843
844
static const struct arch_vpmu_ops core2_vpmu_ops = {
845
    .do_wrmsr = core2_vpmu_do_wrmsr,
846
    .do_rdmsr = core2_vpmu_do_rdmsr,
847
    .do_interrupt = core2_vpmu_do_interrupt,
848
    .arch_vpmu_destroy = core2_vpmu_destroy,
849
    .arch_vpmu_save = core2_vpmu_save,
850
    .arch_vpmu_load = core2_vpmu_load,
851
    .arch_vpmu_dump = core2_vpmu_dump
852
};
853
854
int vmx_vpmu_initialise(struct vcpu *v)
855
0
{
856
0
    struct vpmu_struct *vpmu = vcpu_vpmu(v);
857
0
    u64 msr_content;
858
0
    static bool_t ds_warned;
859
0
860
0
    if ( vpmu_mode == XENPMU_MODE_OFF )
861
0
        return 0;
862
0
863
0
    if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
864
0
         v->domain->arch.cpuid->basic.pmu_version >= 5 )
865
0
        return -EINVAL;
866
0
867
0
    if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
868
0
        return -EINVAL;
869
0
870
0
    if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
871
0
        goto func_out;
872
0
    /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
873
0
    while ( boot_cpu_has(X86_FEATURE_DS) )
874
0
    {
875
0
        if ( !boot_cpu_has(X86_FEATURE_DTES64) )
876
0
        {
877
0
            if ( !ds_warned )
878
0
                printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
879
0
                       " - Debug Store disabled for guests\n");
880
0
            break;
881
0
        }
882
0
        vpmu_set(vpmu, VPMU_CPU_HAS_DS);
883
0
        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
884
0
        if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
885
0
        {
886
0
            /* If BTS_UNAVAIL is set reset the DS feature. */
887
0
            vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
888
0
            if ( !ds_warned )
889
0
                printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
890
0
                       " - Debug Store disabled for guests\n");
891
0
            break;
892
0
        }
893
0
894
0
        vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
895
0
        if ( !ds_warned )
896
0
        {
897
0
            if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
898
0
                printk(XENLOG_G_INFO
899
0
                       "vpmu: CPU doesn't support CPL-Qualified BTS\n");
900
0
            printk("******************************************************\n");
901
0
            printk("** WARNING: Emulation of BTS Feature is switched on **\n");
902
0
            printk("** Using this processor feature in a virtualized    **\n");
903
0
            printk("** environment is not 100%% safe.                    **\n");
904
0
            printk("** Setting the DS buffer address with wrong values  **\n");
905
0
            printk("** may lead to hypervisor hangs or crashes.         **\n");
906
0
            printk("** It is NOT recommended for production use!        **\n");
907
0
            printk("******************************************************\n");
908
0
        }
909
0
        break;
910
0
    }
911
0
    ds_warned = 1;
912
0
 func_out:
913
0
914
0
    /* PV domains can allocate resources immediately */
915
0
    if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
916
0
        return -EIO;
917
0
918
0
    vpmu->arch_vpmu_ops = &core2_vpmu_ops;
919
0
920
0
    return 0;
921
0
}
922
923
int __init core2_vpmu_init(void)
924
0
{
925
0
    u64 caps;
926
0
    unsigned int version = 0;
927
0
    unsigned int i;
928
0
929
0
    if ( current_cpu_data.cpuid_level >= 0xa )
930
0
        version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
931
0
932
0
    switch ( version )
933
0
    {
934
0
    case 4:
935
0
        printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. "
936
0
               "Emulating version 3\n");
937
0
        /* FALLTHROUGH */
938
0
939
0
    case 2:
940
0
    case 3:
941
0
        break;
942
0
943
0
    default:
944
0
        printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
945
0
               version);
946
0
        return -EINVAL;
947
0
    }
948
0
949
0
    if ( current_cpu_data.x86 != 6 )
950
0
    {
951
0
        printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
952
0
        return -EINVAL;
953
0
    }
954
0
955
0
    arch_pmc_cnt = core2_get_arch_pmc_count();
956
0
    fixed_pmc_cnt = core2_get_fixed_pmc_count();
957
0
    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
958
0
    full_width_write = (caps >> 13) & 1;
959
0
960
0
    fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
961
0
    /* mask .AnyThread bits for all fixed counters */
962
0
    for( i = 0; i < fixed_pmc_cnt; i++ )
963
0
       fixed_ctrl_mask |=
964
0
           (FIXED_CTR_CTRL_ANYTHREAD_MASK << (FIXED_CTR_CTRL_BITS * i));
965
0
966
0
    fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
967
0
    global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) |
968
0
                         ((1ULL << arch_pmc_cnt) - 1));
969
0
    global_ovf_ctrl_mask = ~(0xC000000000000000 |
970
0
                             (((1ULL << fixed_pmc_cnt) - 1) << 32) |
971
0
                             ((1ULL << arch_pmc_cnt) - 1));
972
0
    if ( version > 2 )
973
0
        /*
974
0
         * Even though we don't support Uncore counters guests should be
975
0
         * able to clear all available overflows.
976
0
         */
977
0
        global_ovf_ctrl_mask &= ~(1ULL << 61);
978
0
979
0
    regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
980
0
              sizeof(uint64_t) * fixed_pmc_cnt +
981
0
              sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
982
0
983
0
    check_pmc_quirk();
984
0
985
0
    if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
986
0
         sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
987
0
    {
988
0
        printk(XENLOG_WARNING
989
0
               "VPMU: Register bank does not fit into VPMU share page\n");
990
0
        arch_pmc_cnt = fixed_pmc_cnt = 0;
991
0
        return -ENOSPC;
992
0
    }
993
0
994
0
    return 0;
995
0
}
996