Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/cpu/mcheck/mce_intel.c
Line
Count
Source (jump to first uncovered line)
1
#include <xen/init.h>
2
#include <xen/types.h>
3
#include <xen/irq.h>
4
#include <xen/event.h>
5
#include <xen/kernel.h>
6
#include <xen/delay.h>
7
#include <xen/smp.h>
8
#include <xen/mm.h>
9
#include <xen/cpu.h>
10
#include <asm/processor.h>
11
#include <public/sysctl.h>
12
#include <asm/system.h>
13
#include <asm/msr.h>
14
#include <asm/p2m.h>
15
#include <asm/mce.h>
16
#include <asm/apic.h>
17
#include "mce.h"
18
#include "x86_mca.h"
19
#include "barrier.h"
20
#include "util.h"
21
#include "vmce.h"
22
#include "mcaction.h"
23
24
static DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, mce_banks_owned);
25
bool __read_mostly cmci_support;
26
static bool __read_mostly ser_support;
27
static bool __read_mostly mce_force_broadcast;
28
boolean_param("mce_fb", mce_force_broadcast);
29
30
static int __read_mostly nr_intel_ext_msrs;
31
32
/* If mce_force_broadcast == 1, lmce_support will be disabled forcibly. */
33
bool __read_mostly lmce_support;
34
35
/* Intel SDM define bit15~bit0 of IA32_MCi_STATUS as the MC error code */
36
0
#define INTEL_MCCOD_MASK 0xFFFF
37
38
/*
39
 * Currently Intel SDM define 2 kinds of srao errors:
40
 * 1). Memory scrubbing error, error code = 0xC0 ~ 0xCF
41
 * 2). L3 explicit writeback error, error code = 0x17A
42
 */
43
0
#define INTEL_SRAO_MEM_SCRUB 0xC0 ... 0xCF
44
0
#define INTEL_SRAO_L3_EWB    0x17A
45
46
/*
47
 * Currently Intel SDM define 2 kinds of srar errors:
48
 * 1). Data Load error, error code = 0x134
49
 * 2). Instruction Fetch error, error code = 0x150
50
 */
51
0
#define INTEL_SRAR_DATA_LOAD  0x134
52
0
#define INTEL_SRAR_INSTR_FETCH  0x150
53
54
#ifdef CONFIG_X86_MCE_THERMAL
55
0
#define MCE_RING                0x1
56
static DEFINE_PER_CPU(int, last_state);
57
58
static void intel_thermal_interrupt(struct cpu_user_regs *regs)
59
0
{
60
0
    uint64_t msr_content;
61
0
    unsigned int cpu = smp_processor_id();
62
0
    static DEFINE_PER_CPU(s_time_t, next);
63
0
    int *this_last_state;
64
0
65
0
    ack_APIC_irq();
66
0
67
0
    if ( NOW() < per_cpu(next, cpu) )
68
0
        return;
69
0
70
0
    per_cpu(next, cpu) = NOW() + MILLISECS(5000);
71
0
    rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
72
0
    this_last_state = &per_cpu(last_state, cpu);
73
0
    if ( *this_last_state == (msr_content & MCE_RING) )
74
0
        return;
75
0
    *this_last_state = msr_content & MCE_RING;
76
0
    if ( msr_content & MCE_RING )
77
0
    {
78
0
        printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu);
79
0
        printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu);
80
0
        add_taint(TAINT_MACHINE_CHECK);
81
0
    } else
82
0
        printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu);
83
0
}
84
85
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
86
static bool intel_thermal_supported(struct cpuinfo_x86 *c)
87
13
{
88
13
    if ( !cpu_has_apic )
89
0
        return false;
90
13
    if ( !cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_TM1) )
91
0
        return false;
92
13
    return true;
93
13
}
94
95
static u32 __read_mostly lvtthmr_init;
96
97
static void __init mcheck_intel_therm_init(void)
98
1
{
99
1
    /*
100
1
     * This function is only called on boot CPU. Save the init thermal
101
1
     * LVT value on BSP and use that value to restore APs' thermal LVT
102
1
     * entry BIOS programmed later
103
1
     */
104
1
    if ( intel_thermal_supported(&boot_cpu_data) )
105
1
        lvtthmr_init = apic_read(APIC_LVTTHMR);
106
1
}
107
108
/* P4/Xeon Thermal regulation detect and init */
109
static void intel_init_thermal(struct cpuinfo_x86 *c)
110
12
{
111
12
    uint64_t msr_content;
112
12
    uint32_t val;
113
12
    int tm2 = 0;
114
12
    unsigned int cpu = smp_processor_id();
115
12
    static uint8_t thermal_apic_vector;
116
12
117
12
    if ( !intel_thermal_supported(c) )
118
0
        return; /* -ENODEV */
119
12
120
12
    /* first check if its enabled already, in which case there might
121
12
     * be some SMM goo which handles it, so we can't even put a handler
122
12
     * since it might be delivered via SMI already -zwanem.
123
12
     */
124
12
    rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
125
12
    val = lvtthmr_init;
126
12
    /*
127
12
     * The initial value of thermal LVT entries on all APs always reads
128
12
     * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
129
12
     * sequence to them and LVT registers are reset to 0s except for
130
12
     * the mask bits which are set to 1s when APs receive INIT IPI.
131
12
     * If BIOS takes over the thermal interrupt and sets its interrupt
132
12
     * delivery mode to SMI (not fixed), it restores the value that the
133
12
     * BIOS has programmed on AP based on BSP's info we saved (since BIOS
134
12
     * is required to set the same value for all threads/cores).
135
12
     */
136
12
    if ( (val & APIC_MODE_MASK) != APIC_DM_FIXED
137
12
         || (val & APIC_VECTOR_MASK) > 0xf )
138
0
        apic_write(APIC_LVTTHMR, val);
139
12
140
12
    if ( (msr_content & (1ULL<<3))
141
12
         && (val & APIC_MODE_MASK) == APIC_DM_SMI )
142
0
    {
143
0
        if ( c == &boot_cpu_data )
144
0
            printk(KERN_DEBUG "Thermal monitoring handled by SMI\n");
145
0
        return; /* -EBUSY */
146
0
    }
147
12
148
12
    if ( cpu_has(c, X86_FEATURE_TM2) && (msr_content & (1ULL << 13)) )
149
0
        tm2 = 1;
150
12
151
12
    /* check whether a vector already exists, temporarily masked? */
152
12
    if ( val & APIC_VECTOR_MASK )
153
0
    {
154
0
        if ( c == &boot_cpu_data )
155
0
            printk(KERN_DEBUG "Thermal LVT vector (%#x) already installed\n",
156
0
                   val & APIC_VECTOR_MASK);
157
0
        return; /* -EBUSY */
158
0
    }
159
12
160
12
    alloc_direct_apic_vector(&thermal_apic_vector, intel_thermal_interrupt);
161
12
162
12
    /* The temperature transition interrupt handler setup */
163
12
    val = thermal_apic_vector;    /* our delivery vector */
164
12
    val |= (APIC_DM_FIXED | APIC_LVT_MASKED);  /* we'll mask till we're ready */
165
12
    apic_write(APIC_LVTTHMR, val);
166
12
167
12
    rdmsrl(MSR_IA32_THERM_INTERRUPT, msr_content);
168
12
    wrmsrl(MSR_IA32_THERM_INTERRUPT, msr_content | 0x03);
169
12
170
12
    rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
171
12
    wrmsrl(MSR_IA32_MISC_ENABLE, msr_content | (1ULL<<3));
172
12
173
12
    apic_write(APIC_LVTTHMR, val & ~APIC_LVT_MASKED);
174
12
    if ( opt_cpu_info )
175
0
        printk(KERN_INFO "CPU%u: Thermal monitoring enabled (%s)\n",
176
0
               cpu, tm2 ? "TM2" : "TM1");
177
12
    return;
178
12
}
179
#endif /* CONFIG_X86_MCE_THERMAL */
180
181
/* Intel MCE handler */
182
static inline void intel_get_extended_msr(struct mcinfo_extended *ext, u32 msr)
183
0
{
184
0
    if ( ext->mc_msrs < ARRAY_SIZE(ext->mc_msr)
185
0
         && msr < MSR_IA32_MCG_EAX + nr_intel_ext_msrs )
186
0
    {
187
0
        ext->mc_msr[ext->mc_msrs].reg = msr;
188
0
        rdmsrl(msr, ext->mc_msr[ext->mc_msrs].value);
189
0
        ++ext->mc_msrs;
190
0
    }
191
0
}
192
193
194
struct mcinfo_extended *
195
intel_get_extended_msrs(struct mcinfo_global *mig, struct mc_info *mi)
196
0
{
197
0
    struct mcinfo_extended *mc_ext;
198
0
    int i;
199
0
200
0
    /*
201
0
     * According to spec, processor _support_ 64 bit will always
202
0
     * have MSR beyond IA32_MCG_MISC
203
0
     */
204
0
    if ( !mi|| !mig || nr_intel_ext_msrs == 0 ||
205
0
         !(mig->mc_gstatus & MCG_STATUS_EIPV) )
206
0
        return NULL;
207
0
208
0
    mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED);
209
0
    if ( !mc_ext )
210
0
    {
211
0
        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
212
0
        return NULL;
213
0
    }
214
0
215
0
    for ( i = MSR_IA32_MCG_EAX; i <= MSR_IA32_MCG_MISC; i++ )
216
0
        intel_get_extended_msr(mc_ext, i);
217
0
218
0
    for ( i = MSR_IA32_MCG_R8; i <= MSR_IA32_MCG_R15; i++ )
219
0
        intel_get_extended_msr(mc_ext, i);
220
0
221
0
    return mc_ext;
222
0
}
223
224
enum intel_mce_type
225
{
226
    intel_mce_invalid,
227
    intel_mce_fatal,
228
    intel_mce_corrected,
229
    intel_mce_ucr_ucna,
230
    intel_mce_ucr_srao,
231
    intel_mce_ucr_srar,
232
};
233
234
static enum intel_mce_type intel_check_mce_type(uint64_t status)
235
0
{
236
0
    if ( !(status & MCi_STATUS_VAL) )
237
0
        return intel_mce_invalid;
238
0
239
0
    if ( status & MCi_STATUS_PCC )
240
0
        return intel_mce_fatal;
241
0
242
0
    /* Corrected error? */
243
0
    if ( !(status & MCi_STATUS_UC) )
244
0
        return intel_mce_corrected;
245
0
246
0
    if ( !ser_support )
247
0
        return intel_mce_fatal;
248
0
249
0
    if ( status & MCi_STATUS_S )
250
0
    {
251
0
        if ( status & MCi_STATUS_AR )
252
0
        {
253
0
            if ( status & MCi_STATUS_OVER )
254
0
                return intel_mce_fatal;
255
0
            else
256
0
                return intel_mce_ucr_srar;
257
0
        } else
258
0
            return intel_mce_ucr_srao;
259
0
    }
260
0
    else
261
0
        return intel_mce_ucr_ucna;
262
0
263
0
    /* Any type not included abovoe ? */
264
0
    return intel_mce_fatal;
265
0
}
266
267
static void intel_memerr_dhandler(
268
             struct mca_binfo *binfo,
269
             enum mce_result *result,
270
             const struct cpu_user_regs *regs)
271
0
{
272
0
    mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
273
0
    mc_memerr_dhandler(binfo, result, regs);
274
0
}
275
276
static bool intel_srar_check(uint64_t status)
277
0
{
278
0
    return (intel_check_mce_type(status) == intel_mce_ucr_srar);
279
0
}
280
281
static bool intel_checkaddr(uint64_t status, uint64_t misc, int addrtype)
282
0
{
283
0
    if ( !(status & MCi_STATUS_ADDRV) ||
284
0
         !(status & MCi_STATUS_MISCV) ||
285
0
         ((misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
286
0
        /* addr is virtual */
287
0
        return (addrtype == MC_ADDR_VIRTUAL);
288
0
289
0
    return (addrtype == MC_ADDR_PHYSICAL);
290
0
}
291
292
static void intel_srar_dhandler(
293
             struct mca_binfo *binfo,
294
             enum mce_result *result,
295
             const struct cpu_user_regs *regs)
296
0
{
297
0
    uint64_t status = binfo->mib->mc_status;
298
0
299
0
    /* For unknown srar error code, reset system */
300
0
    *result = MCER_RESET;
301
0
302
0
    switch ( status & INTEL_MCCOD_MASK )
303
0
    {
304
0
    case INTEL_SRAR_DATA_LOAD:
305
0
    case INTEL_SRAR_INSTR_FETCH:
306
0
        intel_memerr_dhandler(binfo, result, regs);
307
0
        break;
308
0
    }
309
0
}
310
311
static bool intel_srao_check(uint64_t status)
312
0
{
313
0
    return (intel_check_mce_type(status) == intel_mce_ucr_srao);
314
0
}
315
316
static void intel_srao_dhandler(
317
             struct mca_binfo *binfo,
318
             enum mce_result *result,
319
             const struct cpu_user_regs *regs)
320
0
{
321
0
    uint64_t status = binfo->mib->mc_status;
322
0
323
0
    /* For unknown srao error code, no action required */
324
0
    *result = MCER_CONTINUE;
325
0
326
0
    if ( status & MCi_STATUS_VAL )
327
0
    {
328
0
        switch ( status & INTEL_MCCOD_MASK )
329
0
        {
330
0
        case INTEL_SRAO_MEM_SCRUB:
331
0
        case INTEL_SRAO_L3_EWB:
332
0
            intel_memerr_dhandler(binfo, result, regs);
333
0
            break;
334
0
        }
335
0
    }
336
0
}
337
338
static bool intel_default_check(uint64_t status)
339
0
{
340
0
    return true;
341
0
}
342
343
static void intel_default_mce_dhandler(
344
             struct mca_binfo *binfo,
345
             enum mce_result *result,
346
             const struct cpu_user_regs * regs)
347
0
{
348
0
    uint64_t status = binfo->mib->mc_status;
349
0
    enum intel_mce_type type;
350
0
351
0
    type = intel_check_mce_type(status);
352
0
353
0
    if ( type == intel_mce_fatal )
354
0
        *result = MCER_RESET;
355
0
    else
356
0
        *result = MCER_CONTINUE;
357
0
}
358
359
static const struct mca_error_handler intel_mce_dhandlers[] = {
360
    {intel_srao_check, intel_srao_dhandler},
361
    {intel_srar_check, intel_srar_dhandler},
362
    {intel_default_check, intel_default_mce_dhandler}
363
};
364
365
static void intel_default_mce_uhandler(
366
             struct mca_binfo *binfo,
367
             enum mce_result *result,
368
             const struct cpu_user_regs *regs)
369
0
{
370
0
    uint64_t status = binfo->mib->mc_status;
371
0
    enum intel_mce_type type;
372
0
373
0
    type = intel_check_mce_type(status);
374
0
375
0
    switch ( type )
376
0
    {
377
0
    case intel_mce_fatal:
378
0
        *result = MCER_RESET;
379
0
        break;
380
0
381
0
    default:
382
0
        *result = MCER_CONTINUE;
383
0
        break;
384
0
    }
385
0
}
386
387
static const struct mca_error_handler intel_mce_uhandlers[] = {
388
    {intel_default_check, intel_default_mce_uhandler}
389
};
390
391
/* According to MCA OS writer guide, CMCI handler need to clear bank when
392
 * 1) CE (UC = 0)
393
 * 2) ser_support = 1, Superious error, OVER = 0, EN = 0, [UC = 1]
394
 * 3) ser_support = 1, UCNA, OVER = 0, S = 1, AR = 0, PCC = 0, [UC = 1, EN = 1]
395
 * MCA handler need to clear bank when
396
 * 1) ser_support = 1, Superious error, OVER = 0, EN = 0, UC = 1
397
 * 2) ser_support = 1, SRAR, UC = 1, OVER = 0, S = 1, AR = 1, [EN = 1]
398
 * 3) ser_support = 1, SRAO, UC = 1, S = 1, AR = 0, [EN = 1]
399
 */
400
401
static bool intel_need_clearbank_scan(enum mca_source who, u64 status)
402
0
{
403
0
    if ( who == MCA_CMCI_HANDLER )
404
0
    {
405
0
        /* CMCI need clear bank */
406
0
        if ( !(status & MCi_STATUS_UC) )
407
0
            return true;
408
0
        /* Spurious need clear bank */
409
0
        else if ( ser_support && !(status & MCi_STATUS_OVER)
410
0
                  && !(status & MCi_STATUS_EN) )
411
0
            return true;
412
0
        /* UCNA OVER = 0 need clear bank */
413
0
        else if ( ser_support && !(status & MCi_STATUS_OVER)
414
0
                  && !(status & MCi_STATUS_PCC) && !(status & MCi_STATUS_S)
415
0
                  && !(status & MCi_STATUS_AR) )
416
0
            return true;
417
0
        /* Only Log, no clear */
418
0
        else return false;
419
0
    }
420
0
    else if ( who == MCA_MCE_SCAN )
421
0
    {
422
0
        if ( !ser_support )
423
0
            return false;
424
0
        /*
425
0
         * For fatal error, it shouldn't be cleared so that sticky bank
426
0
         * have chance to be handled after reboot by polling
427
0
         */
428
0
        if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
429
0
            return false;
430
0
        /* Spurious need clear bank */
431
0
        else if ( !(status & MCi_STATUS_OVER)
432
0
                  && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN) )
433
0
            return true;
434
0
        /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
435
0
        else if ( (status & MCi_STATUS_UC)
436
0
                  && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR)
437
0
                  && !(status & MCi_STATUS_OVER) )
438
0
            return true;
439
0
        /* SRAO need clear bank */
440
0
        else if ( !(status & MCi_STATUS_AR)
441
0
                  && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC) )
442
0
            return true;
443
0
        else
444
0
            return false;
445
0
    }
446
0
447
0
    return true;
448
0
}
449
450
/*
451
 * MCE continues/is recoverable when
452
 * 1) CE UC = 0
453
 * 2) Supious ser_support = 1, OVER = 0, En = 0 [UC = 1]
454
 * 3) SRAR ser_support = 1, OVER = 0, PCC = 0, S = 1, AR = 1 [UC =1, EN = 1]
455
 * 4) SRAO ser_support = 1, PCC = 0, S = 1, AR = 0, EN = 1 [UC = 1]
456
 * 5) UCNA ser_support = 1, OVER = 0, EN = 1, PCC = 0, S = 0, AR = 0, [UC = 1]
457
 */
458
static bool intel_recoverable_scan(uint64_t status)
459
0
{
460
0
461
0
    if ( !(status & MCi_STATUS_UC ) )
462
0
        return true;
463
0
    else if ( ser_support && !(status & MCi_STATUS_EN)
464
0
              && !(status & MCi_STATUS_OVER) )
465
0
        return true;
466
0
    /* SRAR error */
467
0
    else if ( ser_support && !(status & MCi_STATUS_OVER)
468
0
              && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S)
469
0
              && (status & MCi_STATUS_AR) && (status & MCi_STATUS_EN) )
470
0
        return true;
471
0
    /* SRAO error */
472
0
    else if ( ser_support && !(status & MCi_STATUS_PCC)
473
0
              && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR)
474
0
              && (status & MCi_STATUS_EN) )
475
0
        return true;
476
0
    /* UCNA error */
477
0
    else if ( ser_support && !(status & MCi_STATUS_OVER)
478
0
              && (status & MCi_STATUS_EN) && !(status & MCi_STATUS_PCC)
479
0
              && !(status & MCi_STATUS_S) && !(status & MCi_STATUS_AR) )
480
0
        return true;
481
0
    return false;
482
0
}
483
484
/* CMCI */
485
static DEFINE_SPINLOCK(cmci_discover_lock);
486
487
/*
488
 * Discover bank sharing using the algorithm recommended in the SDM.
489
 */
490
static int do_cmci_discover(int i)
491
276
{
492
276
    unsigned msr = MSR_IA32_MCx_CTL2(i);
493
276
    u64 val;
494
276
    unsigned int threshold, max_threshold;
495
276
    static unsigned int cmci_threshold = 2;
496
276
    integer_param("cmci-threshold", cmci_threshold);
497
276
498
276
    rdmsrl(msr, val);
499
276
    /* Some other CPU already owns this bank. */
500
276
    if ( val & CMCI_EN )
501
161
    {
502
161
        mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
503
161
        goto out;
504
161
    }
505
276
506
115
    if ( cmci_threshold )
507
115
    {
508
115
        wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
509
115
        rdmsrl(msr, val);
510
115
    }
511
115
512
115
    if ( !(val & CMCI_EN) )
513
84
    {
514
84
        /* This bank does not support CMCI. Polling timer has to handle it. */
515
84
        mcabanks_set(i, __get_cpu_var(no_cmci_banks));
516
84
        wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK);
517
84
        return 0;
518
84
    }
519
31
    max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
520
31
    threshold = cmci_threshold;
521
31
    if ( threshold > max_threshold )
522
0
    {
523
0
        mce_printk(MCE_QUIET,
524
0
                   "CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n",
525
0
                   threshold, smp_processor_id(), i, max_threshold);
526
0
        threshold = max_threshold;
527
0
    }
528
31
    wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
529
31
    mcabanks_set(i, __get_cpu_var(mce_banks_owned));
530
192
out:
531
192
    mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
532
192
    return 1;
533
31
}
534
535
static void cmci_discover(void)
536
12
{
537
12
    unsigned long flags;
538
12
    int i;
539
12
    mctelem_cookie_t mctc;
540
12
    struct mca_summary bs;
541
12
542
12
    mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id());
543
12
544
12
    spin_lock_irqsave(&cmci_discover_lock, flags);
545
12
546
288
    for ( i = 0; i < nr_mce_banks; i++ )
547
276
        if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
548
276
            do_cmci_discover(i);
549
12
550
12
    spin_unlock_irqrestore(&cmci_discover_lock, flags);
551
12
552
12
    /*
553
12
     * In case CMCI happended when do owner change.
554
12
     * If CMCI happened yet not processed immediately,
555
12
     * MCi_status (error_count bit 38~52) is not cleared,
556
12
     * the CMCI interrupt will never be triggered again.
557
12
     */
558
12
559
12
    mctc = mcheck_mca_logout(
560
12
        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
561
12
562
12
    if ( bs.errcnt && mctc != NULL )
563
0
    {
564
0
        if ( dom0_vmce_enabled() )
565
0
        {
566
0
            mctelem_commit(mctc);
567
0
            send_global_virq(VIRQ_MCA);
568
0
        }
569
0
        else
570
0
        {
571
0
            x86_mcinfo_dump(mctelem_dataptr(mctc));
572
0
            mctelem_dismiss(mctc);
573
0
        }
574
0
    }
575
12
    else if ( mctc != NULL )
576
0
        mctelem_dismiss(mctc);
577
12
578
12
    mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
579
0
               smp_processor_id(),
580
0
               *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map),
581
0
               *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map));
582
12
}
583
584
/*
585
 * Define an owner for each bank. Banks can be shared between CPUs
586
 * and to avoid reporting events multiple times always set up one
587
 * CPU as owner.
588
 *
589
 * The assignment has to be redone when CPUs go offline and
590
 * any of the owners goes away. Also pollers run in parallel so we
591
 * have to be careful to update the banks in a way that doesn't
592
 * lose or duplicate events.
593
 */
594
595
static void mce_set_owner(void)
596
12
{
597
12
    if ( !cmci_support || !opt_mce )
598
0
        return;
599
12
600
12
    cmci_discover();
601
12
}
602
603
static void __cpu_mcheck_distribute_cmci(void *unused)
604
0
{
605
0
    cmci_discover();
606
0
}
607
608
static void cpu_mcheck_distribute_cmci(void)
609
0
{
610
0
    if ( cmci_support && opt_mce )
611
0
        on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0);
612
0
}
613
614
static void clear_cmci(void)
615
0
{
616
0
    int i;
617
0
618
0
    if ( !cmci_support || !opt_mce )
619
0
        return;
620
0
621
0
    mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n",
622
0
               smp_processor_id());
623
0
624
0
    for ( i = 0; i < nr_mce_banks; i++ )
625
0
    {
626
0
        unsigned msr = MSR_IA32_MCx_CTL2(i);
627
0
        u64 val;
628
0
        if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
629
0
            continue;
630
0
        rdmsrl(msr, val);
631
0
        if ( val & (CMCI_EN|CMCI_THRESHOLD_MASK) )
632
0
            wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
633
0
        mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
634
0
    }
635
0
}
636
637
static void cpu_mcheck_disable(void)
638
0
{
639
0
    clear_in_cr4(X86_CR4_MCE);
640
0
641
0
    if ( cmci_support && opt_mce )
642
0
        clear_cmci();
643
0
}
644
645
static void cmci_interrupt(struct cpu_user_regs *regs)
646
0
{
647
0
    mctelem_cookie_t mctc;
648
0
    struct mca_summary bs;
649
0
650
0
    ack_APIC_irq();
651
0
652
0
    mctc = mcheck_mca_logout(
653
0
        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
654
0
655
0
    if ( bs.errcnt && mctc != NULL )
656
0
    {
657
0
        if ( dom0_vmce_enabled() )
658
0
        {
659
0
            mctelem_commit(mctc);
660
0
            mce_printk(MCE_VERBOSE, "CMCI: send CMCI to DOM0 through virq\n");
661
0
            send_global_virq(VIRQ_MCA);
662
0
        }
663
0
        else
664
0
        {
665
0
            x86_mcinfo_dump(mctelem_dataptr(mctc));
666
0
            mctelem_dismiss(mctc);
667
0
        }
668
0
    }
669
0
    else if ( mctc != NULL )
670
0
        mctelem_dismiss(mctc);
671
0
}
672
673
static void intel_init_cmci(struct cpuinfo_x86 *c)
674
12
{
675
12
    u32 l, apic;
676
12
    int cpu = smp_processor_id();
677
12
678
12
    if ( !mce_available(c) || !cmci_support )
679
0
    {
680
0
        if ( opt_cpu_info )
681
0
            mce_printk(MCE_QUIET, "CMCI: CPU%d has no CMCI support\n", cpu);
682
0
        return;
683
0
    }
684
12
685
12
    apic = apic_read(APIC_CMCI);
686
12
    if ( apic & APIC_VECTOR_MASK )
687
0
    {
688
0
        mce_printk(MCE_QUIET, "CPU%d CMCI LVT vector (%#x) already installed\n",
689
0
                   cpu, ( apic & APIC_VECTOR_MASK ));
690
0
        return;
691
0
    }
692
12
693
12
    alloc_direct_apic_vector(&cmci_apic_vector, cmci_interrupt);
694
12
695
12
    apic = cmci_apic_vector;
696
12
    apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
697
12
    apic_write(APIC_CMCI, apic);
698
12
699
12
    l = apic_read(APIC_CMCI);
700
12
    apic_write(APIC_CMCI, l & ~APIC_LVT_MASKED);
701
12
702
12
    mce_set_owner();
703
12
}
704
705
/* MCA */
706
707
static bool mce_is_broadcast(struct cpuinfo_x86 *c)
708
12
{
709
12
    if ( mce_force_broadcast )
710
0
        return true;
711
12
712
12
    /*
713
12
     * According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
714
12
     * DisplayFamily_DisplayModel encoding of 06H_EH and above,
715
12
     * a MCA signal is broadcast to all logical processors in the system
716
12
     */
717
12
    if ( c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
718
12
         c->x86_model >= 0xe )
719
12
        return true;
720
0
    return false;
721
12
}
722
723
static bool intel_enable_lmce(void)
724
0
{
725
0
    uint64_t msr_content;
726
0
727
0
    /*
728
0
     * Section "Enabling Local Machine Check" in Intel SDM Vol 3
729
0
     * requires software must ensure the LOCK bit and LMCE_ON bit
730
0
     * of MSR_IA32_FEATURE_CONTROL are set before setting
731
0
     * MSR_IA32_MCG_EXT_CTL.LMCE_EN.
732
0
     */
733
0
734
0
    if ( rdmsr_safe(MSR_IA32_FEATURE_CONTROL, msr_content) )
735
0
        return false;
736
0
737
0
    if ( (msr_content & IA32_FEATURE_CONTROL_LOCK) &&
738
0
         (msr_content & IA32_FEATURE_CONTROL_LMCE_ON) )
739
0
    {
740
0
        wrmsrl(MSR_IA32_MCG_EXT_CTL, MCG_EXT_CTL_LMCE_EN);
741
0
        return true;
742
0
    }
743
0
744
0
    return false;
745
0
}
746
747
/* Check and init MCA */
748
static void intel_init_mca(struct cpuinfo_x86 *c)
749
12
{
750
12
    bool broadcast, cmci = false, ser = false, lmce = false;
751
12
    int ext_num = 0, first;
752
12
    uint64_t msr_content;
753
12
754
12
    broadcast = mce_is_broadcast(c);
755
12
756
12
    rdmsrl(MSR_IA32_MCG_CAP, msr_content);
757
12
758
12
    if ( (msr_content & MCG_CMCI_P) && cpu_has_apic )
759
12
        cmci = true;
760
12
761
12
    /* Support Software Error Recovery */
762
12
    if ( msr_content & MCG_SER_P )
763
12
        ser = true;
764
12
765
12
    if ( msr_content & MCG_EXT_P )
766
0
        ext_num = (msr_content >> MCG_EXT_CNT) & 0xff;
767
12
768
12
    first = mce_firstbank(c);
769
12
770
12
    if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) )
771
0
        lmce = intel_enable_lmce();
772
12
773
4
#define CAP(enabled, name) ((enabled) ? ", " name : "")
774
12
    if ( smp_processor_id() == 0 )
775
1
    {
776
1
        dprintk(XENLOG_INFO,
777
1
                "MCA Capability: firstbank %d, extended MCE MSR %d%s%s%s%s\n",
778
1
                first, ext_num,
779
1
                CAP(broadcast, "BCAST"),
780
1
                CAP(ser, "SER"),
781
1
                CAP(cmci, "CMCI"),
782
1
                CAP(lmce, "LMCE"));
783
1
784
1
        mce_broadcast = broadcast;
785
1
        cmci_support = cmci;
786
1
        ser_support = ser;
787
1
        lmce_support = lmce;
788
1
        nr_intel_ext_msrs = ext_num;
789
1
        firstbank = first;
790
1
    }
791
11
    else if ( cmci != cmci_support || ser != ser_support ||
792
11
              broadcast != mce_broadcast ||
793
11
              first != firstbank || ext_num != nr_intel_ext_msrs ||
794
11
              lmce != lmce_support )
795
0
        dprintk(XENLOG_WARNING,
796
12
                "CPU%u has different MCA capability "
797
12
                "(firstbank %d, extended MCE MSR %d%s%s%s%s)"
798
12
                " than BSP, may cause undetermined result!!!\n",
799
0
                smp_processor_id(), first, ext_num,
800
0
                CAP(broadcast, "BCAST"),
801
0
                CAP(ser, "SER"),
802
0
                CAP(cmci, "CMCI"),
803
0
                CAP(lmce, "LMCE"));
804
12
#undef CAP
805
12
}
806
807
static void intel_mce_post_reset(void)
808
12
{
809
12
    mctelem_cookie_t mctc;
810
12
    struct mca_summary bs;
811
12
812
12
    mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs, NULL);
813
12
814
12
    /* in the boot up stage, print out and also log in DOM0 boot process */
815
12
    if ( bs.errcnt && mctc != NULL )
816
0
    {
817
0
        x86_mcinfo_dump(mctelem_dataptr(mctc));
818
0
        mctelem_commit(mctc);
819
0
    }
820
12
    return;
821
12
}
822
823
static void intel_init_mce(void)
824
12
{
825
12
    uint64_t msr_content;
826
12
    int i;
827
12
828
12
    intel_mce_post_reset();
829
12
830
12
    /* clear all banks */
831
288
    for ( i = firstbank; i < nr_mce_banks; i++ )
832
276
    {
833
276
        /*
834
276
         * Some banks are shared across cores, use MCi_CTRL to judge whether
835
276
         * this bank has been initialized by other cores already.
836
276
         */
837
276
        rdmsrl(MSR_IA32_MCx_CTL(i), msr_content);
838
276
        if ( !msr_content )
839
60
        {
840
60
            /* if ctl is 0, this bank is never initialized */
841
60
            mce_printk(MCE_VERBOSE, "mce_init: init bank%d\n", i);
842
60
            wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL);
843
60
            wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL);
844
60
        }
845
276
    }
846
12
    if ( firstbank ) /* if cmci enabled, firstbank = 0 */
847
0
        wrmsrl(MSR_IA32_MC0_STATUS, 0x0ULL);
848
12
849
12
    x86_mce_vector_register(mcheck_cmn_handler);
850
12
    mce_recoverable_register(intel_recoverable_scan);
851
12
    mce_need_clearbank_register(intel_need_clearbank_scan);
852
12
    mce_register_addrcheck(intel_checkaddr);
853
12
854
12
    mce_dhandlers = intel_mce_dhandlers;
855
12
    mce_dhandler_num = ARRAY_SIZE(intel_mce_dhandlers);
856
12
    mce_uhandlers = intel_mce_uhandlers;
857
12
    mce_uhandler_num = ARRAY_SIZE(intel_mce_uhandlers);
858
12
}
859
860
static void cpu_mcabank_free(unsigned int cpu)
861
0
{
862
0
    struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu);
863
0
    struct mca_banks *owned = per_cpu(mce_banks_owned, cpu);
864
0
865
0
    mcabanks_free(cmci);
866
0
    mcabanks_free(owned);
867
0
}
868
869
static int cpu_mcabank_alloc(unsigned int cpu)
870
12
{
871
12
    struct mca_banks *cmci = mcabanks_alloc();
872
12
    struct mca_banks *owned = mcabanks_alloc();
873
12
874
12
    if ( !cmci || !owned )
875
0
        goto out;
876
12
877
12
    per_cpu(no_cmci_banks, cpu) = cmci;
878
12
    per_cpu(mce_banks_owned, cpu) = owned;
879
12
    per_cpu(last_state, cpu) = -1;
880
12
881
12
    return 0;
882
0
 out:
883
0
    mcabanks_free(cmci);
884
0
    mcabanks_free(owned);
885
0
    return -ENOMEM;
886
12
}
887
888
static int cpu_callback(
889
    struct notifier_block *nfb, unsigned long action, void *hcpu)
890
33
{
891
33
    unsigned int cpu = (unsigned long)hcpu;
892
33
    int rc = 0;
893
33
894
33
    switch ( action )
895
33
    {
896
11
    case CPU_UP_PREPARE:
897
11
        rc = cpu_mcabank_alloc(cpu);
898
11
        break;
899
11
900
0
    case CPU_DYING:
901
0
        cpu_mcheck_disable();
902
0
        break;
903
11
904
0
    case CPU_UP_CANCELED:
905
0
    case CPU_DEAD:
906
0
        cpu_mcheck_distribute_cmci();
907
0
        cpu_mcabank_free(cpu);
908
0
        break;
909
33
    }
910
33
911
33
    return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
912
33
}
913
914
static struct notifier_block cpu_nfb = {
915
    .notifier_call = cpu_callback
916
};
917
918
/* p4/p6 family have similar MCA initialization process */
919
enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp)
920
12
{
921
12
    if ( bsp )
922
1
    {
923
1
        /* Early MCE initialisation for BSP. */
924
1
        if ( cpu_mcabank_alloc(0) )
925
0
            BUG();
926
1
        register_cpu_notifier(&cpu_nfb);
927
1
        mcheck_intel_therm_init();
928
1
    }
929
12
930
12
    intel_init_mca(c);
931
12
932
12
    mce_handler_init();
933
12
934
12
    intel_init_mce();
935
12
936
12
    intel_init_cmci(c);
937
12
#ifdef CONFIG_X86_MCE_THERMAL
938
12
    intel_init_thermal(c);
939
12
#endif
940
12
941
12
    return mcheck_intel;
942
12
}
943
944
/* intel specific MCA MSR */
945
int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
946
72
{
947
72
    unsigned int bank = msr - MSR_IA32_MC0_CTL2;
948
72
949
72
    if ( bank < GUEST_MC_BANK_NUM )
950
72
    {
951
72
        v->arch.vmce.bank[bank].mci_ctl2 = val;
952
72
        mce_printk(MCE_VERBOSE, "MCE: wr MC%u_CTL2 %#"PRIx64"\n", bank, val);
953
72
    }
954
72
955
72
    return 1;
956
72
}
957
958
int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
959
71
{
960
71
    unsigned int bank = msr - MSR_IA32_MC0_CTL2;
961
71
962
71
    if ( bank < GUEST_MC_BANK_NUM )
963
71
    {
964
71
        *val = v->arch.vmce.bank[bank].mci_ctl2;
965
71
        mce_printk(MCE_VERBOSE, "MCE: rd MC%u_CTL2 %#"PRIx64"\n", bank, *val);
966
71
    }
967
71
968
71
    return 1;
969
71
}
970
971
bool vmce_has_lmce(const struct vcpu *v)
972
0
{
973
0
    return v->arch.vmce.mcg_cap & MCG_LMCE_P;
974
0
}