Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/cpu/mcheck/mce.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * mce.c - x86 Machine Check Exception Reporting
3
 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
4
 */
5
6
#include <xen/init.h>
7
#include <xen/types.h>
8
#include <xen/kernel.h>
9
#include <xen/smp.h>
10
#include <xen/errno.h>
11
#include <xen/console.h>
12
#include <xen/sched.h>
13
#include <xen/sched-if.h>
14
#include <xen/cpumask.h>
15
#include <xen/event.h>
16
#include <xen/guest_access.h>
17
#include <xen/hypercall.h> /* for do_mca */
18
#include <xen/cpu.h>
19
20
#include <asm/processor.h>
21
#include <asm/setup.h>
22
#include <asm/system.h>
23
#include <asm/apic.h>
24
#include <asm/msr.h>
25
#include <asm/p2m.h>
26
27
#include "mce.h"
28
#include "barrier.h"
29
#include "mcaction.h"
30
#include "util.h"
31
#include "vmce.h"
32
33
bool __read_mostly opt_mce = true;
34
boolean_param("mce", opt_mce);
35
bool __read_mostly mce_broadcast;
36
bool is_mc_panic;
37
unsigned int __read_mostly nr_mce_banks;
38
unsigned int __read_mostly firstbank;
39
uint8_t __read_mostly cmci_apic_vector;
40
41
DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, poll_bankmask);
42
DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, no_cmci_banks);
43
DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, mce_clear_banks);
44
45
static void intpose_init(void);
46
static void mcinfo_clear(struct mc_info *);
47
struct mca_banks *mca_allbanks;
48
49
#define SEG_PL(segsel)   ((segsel) & 0x3)
50
0
#define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16)
51
52
#if 0
53
#define x86_mcerr(fmt, err, args...)                                    \
54
    ({                                                                  \
55
        int _err = (err);                                               \
56
        gdprintk(XENLOG_WARNING, "x86_mcerr: " fmt ", returning %d\n",  \
57
                 ## args, _err);                                        \
58
        _err;                                                           \
59
    })
60
#else
61
0
#define x86_mcerr(fmt, err, args...) (err)
62
#endif
63
64
int mce_verbosity;
65
static int __init mce_set_verbosity(const char *str)
66
0
{
67
0
    if ( strcmp("verbose", str) == 0 )
68
0
        mce_verbosity = MCE_VERBOSE;
69
0
    else
70
0
        return -EINVAL;
71
0
72
0
    return 0;
73
0
}
74
custom_param("mce_verbosity", mce_set_verbosity);
75
76
/* Handle unconfigured int18 (should never happen) */
77
static void unexpected_machine_check(const struct cpu_user_regs *regs)
78
0
{
79
0
    console_force_unlock();
80
0
    printk("Unexpected Machine Check Exception\n");
81
0
    fatal_trap(regs, 1);
82
0
}
83
84
static x86_mce_vector_t _machine_check_vector = unexpected_machine_check;
85
86
void x86_mce_vector_register(x86_mce_vector_t hdlr)
87
12
{
88
12
    _machine_check_vector = hdlr;
89
12
    wmb();
90
12
}
91
92
/* Call the installed machine check handler for this CPU setup. */
93
94
void do_machine_check(const struct cpu_user_regs *regs)
95
0
{
96
0
    _machine_check_vector(regs);
97
0
}
98
99
/*
100
 * Init machine check callback handler
101
 * It is used to collect additional information provided by newer
102
 * CPU families/models without the need to duplicate the whole handler.
103
 * This avoids having many handlers doing almost nearly the same and each
104
 * with its own tweaks ands bugs.
105
 */
106
static x86_mce_callback_t mc_callback_bank_extended = NULL;
107
108
void x86_mce_callback_register(x86_mce_callback_t cbfunc)
109
0
{
110
0
    mc_callback_bank_extended = cbfunc;
111
0
}
112
113
/*
114
 * Machine check recoverable judgement callback handler
115
 * It is used to judge whether an UC error is recoverable by software
116
 */
117
static mce_recoverable_t mc_recoverable_scan = NULL;
118
119
void mce_recoverable_register(mce_recoverable_t cbfunc)
120
12
{
121
12
    mc_recoverable_scan = cbfunc;
122
12
}
123
124
struct mca_banks *mcabanks_alloc(void)
125
49
{
126
49
    struct mca_banks *mb;
127
49
128
49
    mb = xmalloc(struct mca_banks);
129
49
    if ( !mb )
130
0
        return NULL;
131
49
132
49
    mb->bank_map = xzalloc_array(unsigned long,
133
49
                                 BITS_TO_LONGS(nr_mce_banks));
134
49
    if ( !mb->bank_map )
135
0
    {
136
0
        xfree(mb);
137
0
        return NULL;
138
0
    }
139
49
140
49
    mb->num = nr_mce_banks;
141
49
142
49
    return mb;
143
49
}
144
145
void mcabanks_free(struct mca_banks *banks)
146
0
{
147
0
    if ( banks == NULL )
148
0
        return;
149
0
    if ( banks->bank_map )
150
0
        xfree(banks->bank_map);
151
0
    xfree(banks);
152
0
}
153
154
static void mcabank_clear(int banknum)
155
0
{
156
0
    uint64_t status;
157
0
158
0
    status = mca_rdmsr(MSR_IA32_MCx_STATUS(banknum));
159
0
160
0
    if ( status & MCi_STATUS_ADDRV )
161
0
        mca_wrmsr(MSR_IA32_MCx_ADDR(banknum), 0x0ULL);
162
0
    if ( status & MCi_STATUS_MISCV )
163
0
        mca_wrmsr(MSR_IA32_MCx_MISC(banknum), 0x0ULL);
164
0
165
0
    mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL);
166
0
}
167
168
/*
169
 * Judging whether to Clear Machine Check error bank callback handler
170
 * According to Intel latest MCA OS Recovery Writer's Guide,
171
 * whether the error MCA bank needs to be cleared is decided by the mca_source
172
 * and MCi_status bit value.
173
 */
174
static mce_need_clearbank_t mc_need_clearbank_scan = NULL;
175
176
void mce_need_clearbank_register(mce_need_clearbank_t cbfunc)
177
12
{
178
12
    mc_need_clearbank_scan = cbfunc;
179
12
}
180
181
/*
182
 * mce_logout_lock should only be used in the trap handler,
183
 * while MCIP has not been cleared yet in the global status
184
 * register. Other use is not safe, since an MCE trap can
185
 * happen at any moment, which would cause lock recursion.
186
 */
187
static DEFINE_SPINLOCK(mce_logout_lock);
188
189
const struct mca_error_handler *__read_mostly mce_dhandlers;
190
const struct mca_error_handler *__read_mostly mce_uhandlers;
191
unsigned int __read_mostly mce_dhandler_num;
192
unsigned int __read_mostly mce_uhandler_num;
193
194
static void mca_init_bank(enum mca_source who, struct mc_info *mi, int bank)
195
0
{
196
0
    struct mcinfo_bank *mib;
197
0
198
0
    if ( !mi )
199
0
        return;
200
0
201
0
    mib = x86_mcinfo_reserve(mi, sizeof(*mib), MC_TYPE_BANK);
202
0
    if ( !mib )
203
0
    {
204
0
        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
205
0
        return;
206
0
    }
207
0
208
0
    mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank));
209
0
210
0
    mib->mc_bank = bank;
211
0
    mib->mc_domid = DOMID_INVALID;
212
0
213
0
    if ( mib->mc_status & MCi_STATUS_MISCV )
214
0
        mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank));
215
0
216
0
    if ( mib->mc_status & MCi_STATUS_ADDRV )
217
0
        mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank));
218
0
219
0
    if ( (mib->mc_status & MCi_STATUS_MISCV) &&
220
0
         (mib->mc_status & MCi_STATUS_ADDRV) &&
221
0
         (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) &&
222
0
         (who == MCA_POLLER || who == MCA_CMCI_HANDLER) &&
223
0
         (mfn_valid(_mfn(paddr_to_pfn(mib->mc_addr)))) )
224
0
    {
225
0
        struct domain *d;
226
0
227
0
        d = maddr_get_owner(mib->mc_addr);
228
0
        if ( d )
229
0
            mib->mc_domid = d->domain_id;
230
0
    }
231
0
232
0
    if ( who == MCA_CMCI_HANDLER )
233
0
    {
234
0
        mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
235
0
        mib->mc_tsc = rdtsc();
236
0
    }
237
0
}
238
239
static int mca_init_global(uint32_t flags, struct mcinfo_global *mig)
240
0
{
241
0
    uint64_t status;
242
0
    int cpu_nr;
243
0
    const struct vcpu *curr = current;
244
0
245
0
    /* Set global information */
246
0
    status = mca_rdmsr(MSR_IA32_MCG_STATUS);
247
0
    mig->mc_gstatus = status;
248
0
    mig->mc_domid = DOMID_INVALID;
249
0
    mig->mc_vcpuid = XEN_MC_VCPUID_INVALID;
250
0
    mig->mc_flags = flags;
251
0
    cpu_nr = smp_processor_id();
252
0
    /* Retrieve detector information */
253
0
    x86_mc_get_cpu_info(cpu_nr, &mig->mc_socketid,
254
0
                        &mig->mc_coreid, &mig->mc_core_threadid,
255
0
                        &mig->mc_apicid, NULL, NULL, NULL);
256
0
257
0
    if ( curr != INVALID_VCPU )
258
0
    {
259
0
        mig->mc_domid = curr->domain->domain_id;
260
0
        mig->mc_vcpuid = curr->vcpu_id;
261
0
    }
262
0
263
0
    return 0;
264
0
}
265
266
/*
267
 * Utility function to perform MCA bank telemetry readout and to push that
268
 * telemetry towards an interested dom0 for logging and diagnosis.
269
 * The caller - #MC handler or MCA poll function - must arrange that we
270
 * do not migrate cpus.
271
 */
272
273
/* XXFM Could add overflow counting? */
274
275
/*
276
 *  Add out_param clear_bank for Machine Check Handler Caller.
277
 * For Intel latest CPU, whether to clear the error bank status needs to
278
 * be judged by the callback function defined above.
279
 */
280
mctelem_cookie_t
281
mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask,
282
                  struct mca_summary *sp, struct mca_banks *clear_bank)
283
73
{
284
73
    uint64_t gstatus, status;
285
73
    struct mcinfo_global *mig = NULL; /* on stack */
286
73
    mctelem_cookie_t mctc = NULL;
287
73
    bool uc = false, pcc = false, recover = true, need_clear = true;
288
73
    uint32_t mc_flags = 0;
289
73
    struct mc_info *mci = NULL;
290
73
    mctelem_class_t which = MC_URGENT; /* XXXgcc */
291
73
    int errcnt = 0;
292
73
    int i;
293
73
294
73
    gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
295
73
    switch ( who )
296
73
    {
297
0
    case MCA_MCE_SCAN:
298
0
        mc_flags = MC_FLAG_MCE;
299
0
        which = MC_URGENT;
300
0
        break;
301
0
302
53
    case MCA_POLLER:
303
53
    case MCA_RESET:
304
53
        mc_flags = MC_FLAG_POLLED;
305
53
        which = MC_NONURGENT;
306
53
        break;
307
53
308
12
    case MCA_CMCI_HANDLER:
309
12
        mc_flags = MC_FLAG_CMCI;
310
12
        which = MC_NONURGENT;
311
12
        break;
312
53
313
0
    default:
314
0
        BUG();
315
73
    }
316
73
317
73
    /*
318
73
     * If no mc_recovery_scan callback handler registered,
319
73
     * this error is not recoverable
320
73
     */
321
76
    recover = mc_recoverable_scan ? 1 : 0;
322
76
323
1.62k
    for ( i = 0; i < nr_mce_banks; i++ )
324
1.55k
    {
325
1.55k
        /* Skip bank if corresponding bit in bankmask is clear */
326
1.55k
        if ( !mcabanks_test(i, bankmask) )
327
1.01k
            continue;
328
1.55k
329
539
        status = mca_rdmsr(MSR_IA32_MCx_STATUS(i));
330
539
        if ( !(status & MCi_STATUS_VAL) )
331
755
            continue; /* this bank has no valid telemetry */
332
539
333
539
        /*
334
539
         * For Intel Latest CPU CMCI/MCE Handler caller, we need to
335
539
         * decide whether to clear bank by MCi_STATUS bit value such as
336
539
         * OVER/UC/EN/PCC/S/AR
337
539
         */
338
18.4E
        if ( mc_need_clearbank_scan )
339
0
            need_clear = mc_need_clearbank_scan(who, status);
340
18.4E
341
18.4E
        /*
342
18.4E
         * If this is the first bank with valid MCA DATA, then
343
18.4E
         * try to reserve an entry from the urgent/nonurgent queue
344
18.4E
         * depending on whether we are called from an exception or
345
18.4E
         * a poller;  this can fail (for example dom0 may not
346
18.4E
         * yet have consumed past telemetry).
347
18.4E
         */
348
18.4E
        if ( errcnt++ == 0 )
349
0
        {
350
0
            mctc = mctelem_reserve(which);
351
0
            if ( mctc )
352
0
            {
353
0
                mci = mctelem_dataptr(mctc);
354
0
                mcinfo_clear(mci);
355
0
                mig = x86_mcinfo_reserve(mci, sizeof(*mig), MC_TYPE_GLOBAL);
356
0
                /* mc_info should at least hold up the global information */
357
0
                ASSERT(mig);
358
0
                mca_init_global(mc_flags, mig);
359
0
                /* A hook here to get global extended msrs */
360
0
                if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
361
0
                    intel_get_extended_msrs(mig, mci);
362
0
            }
363
0
        }
364
18.4E
365
18.4E
        /* flag for uncorrected errors */
366
18.4E
        if ( !uc && ((status & MCi_STATUS_UC) != 0) )
367
0
            uc = true;
368
18.4E
369
18.4E
        /* flag processor context corrupt */
370
18.4E
        if ( !pcc && ((status & MCi_STATUS_PCC) != 0) )
371
0
            pcc = true;
372
18.4E
373
18.4E
        if ( recover && uc )
374
18.4E
            /* uc = true, recover = true, we need not panic. */
375
0
            recover = mc_recoverable_scan(status);
376
18.4E
377
18.4E
        mca_init_bank(who, mci, i);
378
18.4E
379
18.4E
        if ( mc_callback_bank_extended )
380
0
            mc_callback_bank_extended(mci, i, status);
381
18.4E
382
18.4E
        /* By default, need_clear = true */
383
18.4E
        if ( who != MCA_MCE_SCAN && need_clear )
384
18.4E
            /* Clear bank */
385
0
            mcabank_clear(i);
386
18.4E
        else if ( who == MCA_MCE_SCAN && need_clear )
387
0
            mcabanks_set(i, clear_bank);
388
18.4E
389
18.4E
        wmb();
390
18.4E
    }
391
76
392
76
    if ( mig && errcnt > 0 )
393
0
    {
394
0
        if ( pcc )
395
0
            mig->mc_flags |= MC_FLAG_UNCORRECTABLE;
396
0
        else if ( uc )
397
0
            mig->mc_flags |= MC_FLAG_RECOVERABLE;
398
0
        else
399
0
            mig->mc_flags |= MC_FLAG_CORRECTABLE;
400
0
    }
401
76
402
76
    if ( sp )
403
87
    {
404
87
        sp->errcnt = errcnt;
405
87
        sp->ripv = (gstatus & MCG_STATUS_RIPV) != 0;
406
87
        sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0;
407
87
        sp->lmce = (gstatus & MCG_STATUS_LMCE) != 0;
408
87
        sp->uc = uc;
409
87
        sp->pcc = pcc;
410
87
        sp->recoverable = recover;
411
87
    }
412
76
413
76
    return mci != NULL ? mctc : NULL; /* may be NULL */
414
73
}
415
416
static void mce_spin_lock(spinlock_t *lk)
417
0
{
418
0
    while ( !spin_trylock(lk) )
419
0
    {
420
0
        cpu_relax();
421
0
        mce_panic_check();
422
0
    }
423
0
}
424
425
static void mce_spin_unlock(spinlock_t *lk)
426
0
{
427
0
    spin_unlock(lk);
428
0
}
429
430
static enum mce_result mce_action(const struct cpu_user_regs *regs,
431
                                  mctelem_cookie_t mctc);
432
433
/*
434
 * Return:
435
 * -1: if system can't be recovered
436
 * 0: Continue to next step
437
 */
438
static int mce_urgent_action(const struct cpu_user_regs *regs,
439
                             mctelem_cookie_t mctc)
440
0
{
441
0
    uint64_t gstatus;
442
0
443
0
    if ( mctc == NULL )
444
0
        return 0;
445
0
446
0
    gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
447
0
448
0
    /*
449
0
     * FIXME: When RIPV = EIPV = 0, it's a little bit tricky. It may be an
450
0
     * asynchronic error, currently we have no way to precisely locate
451
0
     * whether the error occur at guest or hypervisor.
452
0
     * To avoid handling error in wrong way, we treat it as unrecovered.
453
0
     *
454
0
     * Another unrecovered case is RIPV = 0 while in hypervisor
455
0
     * since Xen is not pre-emptible.
456
0
     */
457
0
    if ( !(gstatus & MCG_STATUS_RIPV) &&
458
0
         (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs)) )
459
0
        return -1;
460
0
461
0
    return mce_action(regs, mctc) == MCER_RESET ? -1 : 0;
462
0
}
463
464
/* Shared #MC handler. */
465
void mcheck_cmn_handler(const struct cpu_user_regs *regs)
466
0
{
467
0
    static DEFINE_MCE_BARRIER(mce_trap_bar);
468
0
    static atomic_t severity_cpu = ATOMIC_INIT(-1);
469
0
    static atomic_t found_error = ATOMIC_INIT(0);
470
0
    static cpumask_t mce_fatal_cpus;
471
0
    struct mca_banks *bankmask = mca_allbanks;
472
0
    struct mca_banks *clear_bank = __get_cpu_var(mce_clear_banks);
473
0
    uint64_t gstatus;
474
0
    mctelem_cookie_t mctc = NULL;
475
0
    struct mca_summary bs;
476
0
    bool bcast, lmce;
477
0
478
0
    mce_spin_lock(&mce_logout_lock);
479
0
480
0
    if ( clear_bank != NULL )
481
0
        memset(clear_bank->bank_map, 0x0,
482
0
               sizeof(long) * BITS_TO_LONGS(clear_bank->num));
483
0
    mctc = mcheck_mca_logout(MCA_MCE_SCAN, bankmask, &bs, clear_bank);
484
0
    lmce = bs.lmce;
485
0
    bcast = mce_broadcast && !lmce;
486
0
487
0
    if ( bs.errcnt )
488
0
    {
489
0
        /*
490
0
         * Uncorrected errors must be dealt with in softirq context.
491
0
         */
492
0
        if ( bs.uc || bs.pcc )
493
0
        {
494
0
            add_taint(TAINT_MACHINE_CHECK);
495
0
            if ( mctc )
496
0
                mctelem_defer(mctc, lmce);
497
0
            /*
498
0
             * For PCC=1 and can't be recovered, context is lost, so
499
0
             * reboot now without clearing the banks, and deal with
500
0
             * the telemetry after reboot (the MSRs are sticky)
501
0
             */
502
0
            if ( bs.pcc || !bs.recoverable )
503
0
                cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus);
504
0
        }
505
0
        else if ( mctc != NULL )
506
0
            mctelem_commit(mctc);
507
0
        atomic_set(&found_error, 1);
508
0
509
0
        /* The last CPU will be take check/clean-up etc */
510
0
        atomic_set(&severity_cpu, smp_processor_id());
511
0
512
0
        mce_printk(MCE_CRITICAL, "MCE: clear_bank map %lx on CPU%d\n",
513
0
                   *((unsigned long *)clear_bank), smp_processor_id());
514
0
        if ( clear_bank != NULL )
515
0
            mcheck_mca_clearbanks(clear_bank);
516
0
    }
517
0
    else if ( mctc != NULL )
518
0
        mctelem_dismiss(mctc);
519
0
    mce_spin_unlock(&mce_logout_lock);
520
0
521
0
    mce_barrier_enter(&mce_trap_bar, bcast);
522
0
    if ( mctc != NULL && mce_urgent_action(regs, mctc) )
523
0
        cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus);
524
0
    mce_barrier_exit(&mce_trap_bar, bcast);
525
0
526
0
    /*
527
0
     * Wait until everybody has processed the trap.
528
0
     */
529
0
    mce_barrier_enter(&mce_trap_bar, bcast);
530
0
    if ( lmce || atomic_read(&severity_cpu) == smp_processor_id() )
531
0
    {
532
0
        /*
533
0
         * According to SDM, if no error bank found on any cpus,
534
0
         * something unexpected happening, we can't do any
535
0
         * recovery job but to reset the system.
536
0
         */
537
0
        if ( atomic_read(&found_error) == 0 )
538
0
            mc_panic("MCE: No CPU found valid MCE, need reset");
539
0
        if ( !cpumask_empty(&mce_fatal_cpus) )
540
0
        {
541
0
            char *ebufp, ebuf[96] = "MCE: Fatal error happened on CPUs ";
542
0
            ebufp = ebuf + strlen(ebuf);
543
0
            cpumask_scnprintf(ebufp, 95 - strlen(ebuf), &mce_fatal_cpus);
544
0
            mc_panic(ebuf);
545
0
        }
546
0
        atomic_set(&found_error, 0);
547
0
        atomic_set(&severity_cpu, -1);
548
0
    }
549
0
    mce_barrier_exit(&mce_trap_bar, bcast);
550
0
551
0
    /* Clear flags after above fatal check */
552
0
    mce_barrier_enter(&mce_trap_bar, bcast);
553
0
    gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
554
0
    if ( (gstatus & MCG_STATUS_MCIP) != 0 )
555
0
    {
556
0
        mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step");
557
0
        mca_wrmsr(MSR_IA32_MCG_STATUS, 0);
558
0
    }
559
0
    mce_barrier_exit(&mce_trap_bar, bcast);
560
0
561
0
    raise_softirq(MACHINE_CHECK_SOFTIRQ);
562
0
}
563
564
void mcheck_mca_clearbanks(struct mca_banks *bankmask)
565
0
{
566
0
    int i;
567
0
568
0
    for ( i = 0; i < nr_mce_banks; i++ )
569
0
    {
570
0
        if ( !mcabanks_test(i, bankmask) )
571
0
            continue;
572
0
        mcabank_clear(i);
573
0
    }
574
0
}
575
576
/*check the existence of Machine Check*/
577
bool mce_available(const struct cpuinfo_x86 *c)
578
25
{
579
25
    return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
580
25
}
581
582
/*
583
 * Check if bank 0 is usable for MCE. It isn't for Intel P6 family
584
 * before model 0x1a.
585
 */
586
unsigned int mce_firstbank(struct cpuinfo_x86 *c)
587
12
{
588
12
    return c->x86 == 6 &&
589
12
           c->x86_vendor == X86_VENDOR_INTEL && c->x86_model < 0x1a;
590
12
}
591
592
int show_mca_info(int inited, struct cpuinfo_x86 *c)
593
12
{
594
12
    static enum mcheck_type g_type = mcheck_unset;
595
12
596
12
    if ( inited != g_type )
597
1
    {
598
1
        char prefix[20];
599
1
        static const char *const type_str[] = {
600
1
            [mcheck_amd_famXX] = "AMD",
601
1
            [mcheck_amd_k8] = "AMD K8",
602
1
            [mcheck_intel] = "Intel"
603
1
        };
604
1
605
1
        snprintf(prefix, ARRAY_SIZE(prefix), "%sCPU%u: ",
606
1
                 g_type != mcheck_unset ? XENLOG_WARNING : XENLOG_INFO,
607
1
                 smp_processor_id());
608
1
        BUG_ON(inited >= ARRAY_SIZE(type_str));
609
1
        switch ( inited )
610
1
        {
611
1
        default:
612
1
            printk("%s%s machine check reporting enabled\n",
613
1
                   prefix, type_str[inited]);
614
1
            break;
615
1
616
0
        case mcheck_amd_famXX:
617
0
            printk("%s%s Fam%xh machine check reporting enabled\n",
618
0
                   prefix, type_str[inited], c->x86);
619
0
            break;
620
1
621
0
        case mcheck_none:
622
0
            printk("%sNo machine check initialization\n", prefix);
623
0
            break;
624
1
        }
625
1
        g_type = inited;
626
1
    }
627
12
628
12
    return 0;
629
12
}
630
631
static void set_poll_bankmask(struct cpuinfo_x86 *c)
632
12
{
633
12
    int cpu = smp_processor_id();
634
12
    struct mca_banks *mb;
635
12
636
12
    mb = per_cpu(poll_bankmask, cpu);
637
12
    BUG_ON(!mb);
638
12
639
12
    if ( cmci_support && opt_mce )
640
12
    {
641
12
        mb->num = per_cpu(no_cmci_banks, cpu)->num;
642
12
        bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map,
643
12
                    nr_mce_banks);
644
12
    }
645
12
    else
646
0
    {
647
0
        bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks);
648
0
        if ( mce_firstbank(c) )
649
0
            mcabanks_clear(0, mb);
650
0
    }
651
12
}
652
653
/* The perbank ctl/status init is platform specific because of AMD's quirk */
654
int mca_cap_init(void)
655
12
{
656
12
    uint64_t msr_content;
657
12
658
12
    rdmsrl(MSR_IA32_MCG_CAP, msr_content);
659
12
660
12
    if ( msr_content & MCG_CTL_P ) /* Control register present ? */
661
0
        wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
662
12
663
12
    if ( nr_mce_banks && (msr_content & MCG_CAP_COUNT) != nr_mce_banks )
664
0
    {
665
0
        dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n",
666
0
                smp_processor_id());
667
0
        return -ENODEV;
668
0
    }
669
12
    nr_mce_banks = msr_content & MCG_CAP_COUNT;
670
12
671
12
    if ( !nr_mce_banks )
672
0
    {
673
0
        printk(XENLOG_INFO "CPU%u: No MCE banks present. "
674
0
               "Machine check support disabled\n", smp_processor_id());
675
0
        return -ENODEV;
676
0
    }
677
12
678
12
    /* mcabanks_alloc depends on nr_mce_banks */
679
12
    if ( !mca_allbanks )
680
1
    {
681
1
        int i;
682
1
683
1
        mca_allbanks = mcabanks_alloc();
684
24
        for ( i = 0; i < nr_mce_banks; i++ )
685
23
            mcabanks_set(i, mca_allbanks);
686
1
    }
687
12
688
12
    return mca_allbanks ? 0 : -ENOMEM;
689
12
}
690
691
static void cpu_bank_free(unsigned int cpu)
692
0
{
693
0
    struct mca_banks *poll = per_cpu(poll_bankmask, cpu);
694
0
    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu);
695
0
696
0
    mcabanks_free(poll);
697
0
    mcabanks_free(clr);
698
0
}
699
700
static int cpu_bank_alloc(unsigned int cpu)
701
12
{
702
12
    struct mca_banks *poll = mcabanks_alloc();
703
12
    struct mca_banks *clr = mcabanks_alloc();
704
12
705
12
    if ( !poll || !clr )
706
0
    {
707
0
        mcabanks_free(poll);
708
0
        mcabanks_free(clr);
709
0
        return -ENOMEM;
710
0
    }
711
12
712
12
    per_cpu(poll_bankmask, cpu) = poll;
713
12
    per_cpu(mce_clear_banks, cpu) = clr;
714
12
    return 0;
715
12
}
716
717
static int cpu_callback(
718
    struct notifier_block *nfb, unsigned long action, void *hcpu)
719
33
{
720
33
    unsigned int cpu = (unsigned long)hcpu;
721
33
    int rc = 0;
722
33
723
33
    switch ( action )
724
33
    {
725
11
    case CPU_UP_PREPARE:
726
11
        rc = cpu_bank_alloc(cpu);
727
11
        break;
728
11
729
0
    case CPU_UP_CANCELED:
730
0
    case CPU_DEAD:
731
0
        cpu_bank_free(cpu);
732
0
        break;
733
33
    }
734
33
735
33
    return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
736
33
}
737
738
static struct notifier_block cpu_nfb = {
739
    .notifier_call = cpu_callback
740
};
741
742
/* This has to be run for each processor */
743
void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
744
12
{
745
12
    enum mcheck_type inited = mcheck_none;
746
12
747
12
    if ( !opt_mce )
748
0
    {
749
0
        if ( bsp )
750
0
            printk(XENLOG_INFO "MCE support disabled by bootparam\n");
751
0
        return;
752
0
    }
753
12
754
12
    if ( !mce_available(c) )
755
0
    {
756
0
        printk(XENLOG_INFO "CPU%i: No machine check support available\n",
757
0
               smp_processor_id());
758
0
        return;
759
0
    }
760
12
761
12
    /*Hardware Enable */
762
12
    if ( mca_cap_init() )
763
0
        return;
764
12
765
12
    /* Early MCE initialisation for BSP. */
766
12
    if ( bsp && cpu_bank_alloc(smp_processor_id()) )
767
0
        BUG();
768
12
769
12
    switch ( c->x86_vendor )
770
12
    {
771
0
    case X86_VENDOR_AMD:
772
0
        inited = amd_mcheck_init(c);
773
0
        break;
774
0
775
12
    case X86_VENDOR_INTEL:
776
12
        switch ( c->x86 )
777
12
        {
778
12
        case 6:
779
12
        case 15:
780
12
            inited = intel_mcheck_init(c, bsp);
781
12
            break;
782
12
        }
783
12
        break;
784
12
785
0
    default:
786
0
        break;
787
12
    }
788
12
789
12
    show_mca_info(inited, c);
790
12
    if ( inited == mcheck_none || inited == mcheck_unset )
791
0
        goto out;
792
12
793
12
    intpose_init();
794
12
795
12
    if ( bsp )
796
1
    {
797
1
        mctelem_init(sizeof(struct mc_info));
798
1
        register_cpu_notifier(&cpu_nfb);
799
1
    }
800
12
801
12
    /* Turn on MCE now */
802
12
    set_in_cr4(X86_CR4_MCE);
803
12
804
12
    set_poll_bankmask(c);
805
12
806
12
    return;
807
0
 out:
808
0
    if ( bsp )
809
0
    {
810
0
        cpu_bank_free(smp_processor_id());
811
0
        mcabanks_free(mca_allbanks);
812
0
        mca_allbanks = NULL;
813
0
    }
814
0
}
815
816
static void mcinfo_clear(struct mc_info *mi)
817
0
{
818
0
    memset(mi, 0, sizeof(struct mc_info));
819
0
    x86_mcinfo_nentries(mi) = 0;
820
0
}
821
822
void *x86_mcinfo_reserve(struct mc_info *mi,
823
                         unsigned int size, unsigned int type)
824
0
{
825
0
    int i;
826
0
    unsigned long end1, end2;
827
0
    struct mcinfo_common *mic_base, *mic_index;
828
0
829
0
    mic_index = mic_base = x86_mcinfo_first(mi);
830
0
831
0
    /* go to first free entry */
832
0
    for ( i = 0; i < x86_mcinfo_nentries(mi); i++ )
833
0
        mic_index = x86_mcinfo_next(mic_index);
834
0
835
0
    /* check if there is enough size */
836
0
    end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct mc_info));
837
0
    end2 = (unsigned long)((uint8_t *)mic_index + size);
838
0
839
0
    if ( end1 < end2 )
840
0
    {
841
0
        mce_printk(MCE_CRITICAL,
842
0
                   "mcinfo_add: No space left in mc_info\n");
843
0
        return NULL;
844
0
    }
845
0
846
0
    /* there's enough space. add entry. */
847
0
    x86_mcinfo_nentries(mi)++;
848
0
849
0
    memset(mic_index, 0, size);
850
0
    mic_index->size = size;
851
0
    mic_index->type = type;
852
0
853
0
    return mic_index;
854
0
}
855
856
static void x86_mcinfo_apei_save(
857
    struct mcinfo_global *mc_global, struct mcinfo_bank *mc_bank)
858
0
{
859
0
    struct mce m;
860
0
861
0
    memset(&m, 0, sizeof(struct mce));
862
0
863
0
    m.cpu = mc_global->mc_coreid;
864
0
    m.cpuvendor = boot_cpu_data.x86_vendor;
865
0
    m.cpuid = cpuid_eax(1);
866
0
    m.socketid = mc_global->mc_socketid;
867
0
    m.apicid = mc_global->mc_apicid;
868
0
869
0
    m.mcgstatus = mc_global->mc_gstatus;
870
0
    m.status = mc_bank->mc_status;
871
0
    m.misc = mc_bank->mc_misc;
872
0
    m.addr = mc_bank->mc_addr;
873
0
    m.bank = mc_bank->mc_bank;
874
0
875
0
    apei_write_mce(&m);
876
0
}
877
878
/*
879
 * Dump machine check information in a format,
880
 * mcelog can parse. This is used only when
881
 * Dom0 does not take the notification.
882
 */
883
void x86_mcinfo_dump(struct mc_info *mi)
884
0
{
885
0
    struct mcinfo_common *mic = NULL;
886
0
    struct mcinfo_global *mc_global;
887
0
    struct mcinfo_bank *mc_bank;
888
0
889
0
    /* first print the global info */
890
0
    x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
891
0
    if ( mic == NULL )
892
0
        return;
893
0
    mc_global = (struct mcinfo_global *)mic;
894
0
    if ( mc_global->mc_flags & MC_FLAG_MCE )
895
0
        printk(XENLOG_WARNING
896
0
               "CPU%d: Machine Check Exception: %16"PRIx64"\n",
897
0
               mc_global->mc_coreid, mc_global->mc_gstatus);
898
0
    else if ( mc_global->mc_flags & MC_FLAG_CMCI )
899
0
        printk(XENLOG_WARNING "CMCI occurred on CPU %d.\n",
900
0
               mc_global->mc_coreid);
901
0
    else if ( mc_global->mc_flags & MC_FLAG_POLLED )
902
0
        printk(XENLOG_WARNING "POLLED occurred on CPU %d.\n",
903
0
               mc_global->mc_coreid);
904
0
905
0
    /* then the bank information */
906
0
    x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); /* finds the first entry */
907
0
    do {
908
0
        if ( mic == NULL )
909
0
            return;
910
0
        if ( mic->type != MC_TYPE_BANK )
911
0
            goto next;
912
0
913
0
        mc_bank = (struct mcinfo_bank *)mic;
914
0
915
0
        printk(XENLOG_WARNING "Bank %d: %16"PRIx64,
916
0
               mc_bank->mc_bank,
917
0
               mc_bank->mc_status);
918
0
        if ( mc_bank->mc_status & MCi_STATUS_MISCV )
919
0
            printk("[%16"PRIx64"]", mc_bank->mc_misc);
920
0
        if ( mc_bank->mc_status & MCi_STATUS_ADDRV )
921
0
            printk(" at %16"PRIx64, mc_bank->mc_addr);
922
0
        printk("\n");
923
0
924
0
        if ( is_mc_panic )
925
0
            x86_mcinfo_apei_save(mc_global, mc_bank);
926
0
927
0
 next:
928
0
        mic = x86_mcinfo_next(mic); /* next entry */
929
0
        if ( (mic == NULL) || (mic->size == 0) )
930
0
            break;
931
0
    } while ( 1 );
932
0
}
933
934
static void do_mc_get_cpu_info(void *v)
935
0
{
936
0
    int cpu = smp_processor_id();
937
0
    int cindex, cpn;
938
0
    struct cpuinfo_x86 *c;
939
0
    xen_mc_logical_cpu_t *log_cpus, *xcp;
940
0
    uint32_t junk, ebx;
941
0
942
0
    log_cpus = v;
943
0
    c = &cpu_data[cpu];
944
0
    cindex = 0;
945
0
    cpn = cpu - 1;
946
0
947
0
    /*
948
0
     * Deal with sparse masks, condensed into a contig array.
949
0
     */
950
0
    while ( cpn >= 0 )
951
0
    {
952
0
        if ( cpu_online(cpn) )
953
0
            cindex++;
954
0
        cpn--;
955
0
    }
956
0
957
0
    xcp = &log_cpus[cindex];
958
0
    c = &cpu_data[cpu];
959
0
    xcp->mc_cpunr = cpu;
960
0
    x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
961
0
                        &xcp->mc_coreid, &xcp->mc_threadid,
962
0
                        &xcp->mc_apicid, &xcp->mc_ncores,
963
0
                        &xcp->mc_ncores_active, &xcp->mc_nthreads);
964
0
    xcp->mc_cpuid_level = c->cpuid_level;
965
0
    xcp->mc_family = c->x86;
966
0
    xcp->mc_vendor = c->x86_vendor;
967
0
    xcp->mc_model = c->x86_model;
968
0
    xcp->mc_step = c->x86_mask;
969
0
    xcp->mc_cache_size = c->x86_cache_size;
970
0
    xcp->mc_cache_alignment = c->x86_cache_alignment;
971
0
    memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
972
0
    memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
973
0
    memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
974
0
975
0
    /*
976
0
     * This part needs to run on the CPU itself.
977
0
     */
978
0
    xcp->mc_nmsrvals = __MC_NMSRS;
979
0
    xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
980
0
    rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
981
0
982
0
    if ( c->cpuid_level >= 1 )
983
0
    {
984
0
        cpuid(1, &junk, &ebx, &junk, &junk);
985
0
        xcp->mc_clusterid = (ebx >> 24) & 0xff;
986
0
    }
987
0
    else
988
0
        xcp->mc_clusterid = get_apic_id();
989
0
}
990
991
void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
992
                         uint16_t *threadid, uint32_t *apicid,
993
                         unsigned *ncores, unsigned *ncores_active,
994
                         unsigned *nthreads)
995
0
{
996
0
    struct cpuinfo_x86 *c;
997
0
998
0
    *apicid = cpu_physical_id(cpu);
999
0
    c = &cpu_data[cpu];
1000
0
    if ( c->apicid == BAD_APICID )
1001
0
    {
1002
0
        *chipid = cpu;
1003
0
        *coreid = 0;
1004
0
        *threadid = 0;
1005
0
        if ( ncores != NULL )
1006
0
            *ncores = 1;
1007
0
        if ( ncores_active != NULL )
1008
0
            *ncores_active = 1;
1009
0
        if ( nthreads != NULL )
1010
0
            *nthreads = 1;
1011
0
    }
1012
0
    else
1013
0
    {
1014
0
        *chipid = c->phys_proc_id;
1015
0
        if ( c->x86_max_cores > 1 )
1016
0
            *coreid = c->cpu_core_id;
1017
0
        else
1018
0
            *coreid = 0;
1019
0
        *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
1020
0
        if ( ncores != NULL )
1021
0
            *ncores = c->x86_max_cores;
1022
0
        if ( ncores_active != NULL )
1023
0
            *ncores_active = c->booted_cores;
1024
0
        if ( nthreads != NULL )
1025
0
            *nthreads = c->x86_num_siblings;
1026
0
    }
1027
0
}
1028
1029
34.2k
#define INTPOSE_NENT 50
1030
1031
static struct intpose_ent {
1032
    unsigned int cpu_nr;
1033
    uint64_t msr;
1034
    uint64_t val;
1035
} intpose_arr[INTPOSE_NENT];
1036
1037
static void intpose_init(void)
1038
12
{
1039
12
    static int done;
1040
12
    int i;
1041
12
1042
12
    if ( done++ > 0 )
1043
11
        return;
1044
12
1045
51
    for ( i = 0; i < INTPOSE_NENT; i++ )
1046
50
        intpose_arr[i].cpu_nr = -1;
1047
1
1048
1
}
1049
1050
struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr,
1051
                                   uint64_t *valp)
1052
694
{
1053
694
    int i;
1054
694
1055
34.1k
    for ( i = 0; i < INTPOSE_NENT; i++ )
1056
33.5k
    {
1057
33.5k
        if ( intpose_arr[i].cpu_nr == cpu_nr && intpose_arr[i].msr == msr )
1058
0
        {
1059
0
            if ( valp != NULL )
1060
0
                *valp = intpose_arr[i].val;
1061
0
            return &intpose_arr[i];
1062
0
        }
1063
33.5k
    }
1064
694
1065
694
    return NULL;
1066
694
}
1067
1068
static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val)
1069
0
{
1070
0
    struct intpose_ent *ent = intpose_lookup(cpu_nr, msr, NULL);
1071
0
    int i;
1072
0
1073
0
    if ( ent )
1074
0
    {
1075
0
        ent->val = val;
1076
0
        return;
1077
0
    }
1078
0
1079
0
    for ( i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++ )
1080
0
    {
1081
0
        if ( ent->cpu_nr == -1 )
1082
0
        {
1083
0
            ent->cpu_nr = cpu_nr;
1084
0
            ent->msr = msr;
1085
0
            ent->val = val;
1086
0
            return;
1087
0
        }
1088
0
    }
1089
0
1090
0
    printk("intpose_add: interpose array full - request dropped\n");
1091
0
}
1092
1093
bool intpose_inval(unsigned int cpu_nr, uint64_t msr)
1094
0
{
1095
0
    struct intpose_ent *ent = intpose_lookup(cpu_nr, msr, NULL);
1096
0
1097
0
    if ( !ent )
1098
0
        return false;
1099
0
1100
0
    ent->cpu_nr = -1;
1101
0
    return true;
1102
0
}
1103
1104
#define IS_MCA_BANKREG(r) \
1105
0
    ((r) >= MSR_IA32_MC0_CTL && \
1106
0
    (r) <= MSR_IA32_MCx_MISC(nr_mce_banks - 1) && \
1107
0
    ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
1108
1109
static bool x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
1110
0
{
1111
0
    struct cpuinfo_x86 *c;
1112
0
    int i, errs = 0;
1113
0
1114
0
    c = &cpu_data[smp_processor_id()];
1115
0
1116
0
    for ( i = 0; i < mci->mcinj_count; i++ )
1117
0
    {
1118
0
        uint64_t reg = mci->mcinj_msr[i].reg;
1119
0
        const char *reason = NULL;
1120
0
1121
0
        if ( IS_MCA_BANKREG(reg) )
1122
0
        {
1123
0
            if ( c->x86_vendor == X86_VENDOR_AMD )
1124
0
            {
1125
0
                /*
1126
0
                 * On AMD we can set MCi_STATUS_WREN in the
1127
0
                 * HWCR MSR to allow non-zero writes to banks
1128
0
                 * MSRs not to #GP.  The injector in dom0
1129
0
                 * should set that bit, but we detect when it
1130
0
                 * is necessary and set it as a courtesy to
1131
0
                 * avoid #GP in the hypervisor.
1132
0
                 */
1133
0
                mci->mcinj_flags |=
1134
0
                    _MC_MSRINJ_F_REQ_HWCR_WREN;
1135
0
                continue;
1136
0
            }
1137
0
            else
1138
0
            {
1139
0
                /*
1140
0
                 * No alternative but to interpose, so require
1141
0
                 * that the injector specified as such.
1142
0
                 */
1143
0
                if ( !(mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) )
1144
0
                    reason = "must specify interposition";
1145
0
            }
1146
0
        }
1147
0
        else
1148
0
        {
1149
0
            switch ( reg )
1150
0
            {
1151
0
            /* MSRs acceptable on all x86 cpus */
1152
0
            case MSR_IA32_MCG_STATUS:
1153
0
                break;
1154
0
1155
0
            case MSR_F10_MC4_MISC1:
1156
0
            case MSR_F10_MC4_MISC2:
1157
0
            case MSR_F10_MC4_MISC3:
1158
0
                if ( c->x86_vendor != X86_VENDOR_AMD )
1159
0
                    reason = "only supported on AMD";
1160
0
                else if ( c->x86 < 0x10 )
1161
0
                    reason = "only supported on AMD Fam10h+";
1162
0
                break;
1163
0
1164
0
            /* MSRs that the HV will take care of */
1165
0
            case MSR_K8_HWCR:
1166
0
                if ( c->x86_vendor == X86_VENDOR_AMD )
1167
0
                    reason = "HV will operate HWCR";
1168
0
                else
1169
0
                    reason = "only supported on AMD";
1170
0
                break;
1171
0
1172
0
            default:
1173
0
                reason = "not a recognized MCA MSR";
1174
0
                break;
1175
0
            }
1176
0
        }
1177
0
1178
0
        if ( reason != NULL )
1179
0
        {
1180
0
            printk("HV MSR INJECT ERROR: MSR %#Lx %s\n",
1181
0
                   (unsigned long long)mci->mcinj_msr[i].reg, reason);
1182
0
            errs++;
1183
0
        }
1184
0
    }
1185
0
1186
0
    return !errs;
1187
0
}
1188
1189
static uint64_t x86_mc_hwcr_wren(void)
1190
0
{
1191
0
    uint64_t old;
1192
0
1193
0
    rdmsrl(MSR_K8_HWCR, old);
1194
0
1195
0
    if ( !(old & K8_HWCR_MCi_STATUS_WREN) )
1196
0
    {
1197
0
        uint64_t new = old | K8_HWCR_MCi_STATUS_WREN;
1198
0
        wrmsrl(MSR_K8_HWCR, new);
1199
0
    }
1200
0
1201
0
    return old;
1202
0
}
1203
1204
static void x86_mc_hwcr_wren_restore(uint64_t hwcr)
1205
0
{
1206
0
    if ( !(hwcr & K8_HWCR_MCi_STATUS_WREN) )
1207
0
        wrmsrl(MSR_K8_HWCR, hwcr);
1208
0
}
1209
1210
static void x86_mc_msrinject(void *data)
1211
0
{
1212
0
    struct xen_mc_msrinject *mci = data;
1213
0
    struct mcinfo_msr *msr;
1214
0
    uint64_t hwcr = 0;
1215
0
    int intpose;
1216
0
    int i;
1217
0
1218
0
    if ( mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN )
1219
0
        hwcr = x86_mc_hwcr_wren();
1220
0
1221
0
    intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0;
1222
0
1223
0
    for ( i = 0, msr = &mci->mcinj_msr[0]; i < mci->mcinj_count; i++, msr++ )
1224
0
    {
1225
0
        printk("HV MSR INJECT (%s) target %u actual %u MSR %#Lx <-- %#Lx\n",
1226
0
               intpose ? "interpose" : "hardware",
1227
0
               mci->mcinj_cpunr, smp_processor_id(),
1228
0
               (unsigned long long)msr->reg,
1229
0
               (unsigned long long)msr->value);
1230
0
1231
0
        if ( intpose )
1232
0
            intpose_add(mci->mcinj_cpunr, msr->reg, msr->value);
1233
0
        else
1234
0
            wrmsrl(msr->reg, msr->value);
1235
0
    }
1236
0
1237
0
    if ( mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN )
1238
0
        x86_mc_hwcr_wren_restore(hwcr);
1239
0
}
1240
1241
/*ARGSUSED*/
1242
static void x86_mc_mceinject(void *data)
1243
0
{
1244
0
    printk("Simulating #MC on cpu %d\n", smp_processor_id());
1245
0
    __asm__ __volatile__("int $0x12");
1246
0
}
1247
1248
#if BITS_PER_LONG == 64
1249
1250
0
#define ID2COOKIE(id) ((mctelem_cookie_t)(id))
1251
0
#define COOKIE2ID(c) ((uint64_t)(c))
1252
1253
#elif defined(BITS_PER_LONG)
1254
#error BITS_PER_LONG has unexpected value
1255
#else
1256
#error BITS_PER_LONG definition absent
1257
#endif
1258
1259
# include <compat/arch-x86/xen-mca.h>
1260
1261
# define xen_mcinfo_msr              mcinfo_msr
1262
CHECK_mcinfo_msr;
1263
# undef xen_mcinfo_msr
1264
# undef CHECK_mcinfo_msr
1265
# define CHECK_mcinfo_msr            struct mcinfo_msr
1266
1267
# define xen_mcinfo_common           mcinfo_common
1268
CHECK_mcinfo_common;
1269
# undef xen_mcinfo_common
1270
# undef CHECK_mcinfo_common
1271
# define CHECK_mcinfo_common         struct mcinfo_common
1272
1273
CHECK_FIELD_(struct, mc_fetch, flags);
1274
CHECK_FIELD_(struct, mc_fetch, fetch_id);
1275
# define CHECK_compat_mc_fetch       struct mc_fetch
1276
1277
CHECK_FIELD_(struct, mc_physcpuinfo, ncpus);
1278
# define CHECK_compat_mc_physcpuinfo struct mc_physcpuinfo
1279
1280
#define CHECK_compat_mc_inject_v2   struct mc_inject_v2
1281
CHECK_mc;
1282
# undef CHECK_compat_mc_fetch
1283
# undef CHECK_compat_mc_physcpuinfo
1284
1285
# define xen_mc_info                 mc_info
1286
CHECK_mc_info;
1287
# undef xen_mc_info
1288
1289
# define xen_mcinfo_global           mcinfo_global
1290
CHECK_mcinfo_global;
1291
# undef xen_mcinfo_global
1292
1293
# define xen_mcinfo_bank             mcinfo_bank
1294
CHECK_mcinfo_bank;
1295
# undef xen_mcinfo_bank
1296
1297
# define xen_mcinfo_extended         mcinfo_extended
1298
CHECK_mcinfo_extended;
1299
# undef xen_mcinfo_extended
1300
1301
# define xen_mcinfo_recovery         mcinfo_recovery
1302
# define xen_cpu_offline_action      cpu_offline_action
1303
# define xen_page_offline_action     page_offline_action
1304
CHECK_mcinfo_recovery;
1305
# undef xen_cpu_offline_action
1306
# undef xen_page_offline_action
1307
# undef xen_mcinfo_recovery
1308
1309
/* Machine Check Architecture Hypercall */
1310
long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
1311
0
{
1312
0
    long ret = 0;
1313
0
    struct xen_mc curop, *op = &curop;
1314
0
    struct vcpu *v = current;
1315
0
    union {
1316
0
        struct xen_mc_fetch *nat;
1317
0
        struct compat_mc_fetch *cmp;
1318
0
    } mc_fetch;
1319
0
    union {
1320
0
        struct xen_mc_physcpuinfo *nat;
1321
0
        struct compat_mc_physcpuinfo *cmp;
1322
0
    } mc_physcpuinfo;
1323
0
    uint32_t flags, cmdflags;
1324
0
    int nlcpu;
1325
0
    xen_mc_logical_cpu_t *log_cpus = NULL;
1326
0
    mctelem_cookie_t mctc;
1327
0
    mctelem_class_t which;
1328
0
    unsigned int target;
1329
0
    struct xen_mc_msrinject *mc_msrinject;
1330
0
    struct xen_mc_mceinject *mc_mceinject;
1331
0
1332
0
    ret = xsm_do_mca(XSM_PRIV);
1333
0
    if ( ret )
1334
0
        return x86_mcerr("", ret);
1335
0
1336
0
    if ( copy_from_guest(op, u_xen_mc, 1) )
1337
0
        return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT);
1338
0
1339
0
    if ( op->interface_version != XEN_MCA_INTERFACE_VERSION )
1340
0
        return x86_mcerr("do_mca: interface version mismatch", -EACCES);
1341
0
1342
0
    switch ( op->cmd )
1343
0
    {
1344
0
    case XEN_MC_fetch:
1345
0
        mc_fetch.nat = &op->u.mc_fetch;
1346
0
        cmdflags = mc_fetch.nat->flags;
1347
0
1348
0
        switch ( cmdflags & (XEN_MC_NONURGENT | XEN_MC_URGENT) )
1349
0
        {
1350
0
        case XEN_MC_NONURGENT:
1351
0
            which = MC_NONURGENT;
1352
0
            break;
1353
0
1354
0
        case XEN_MC_URGENT:
1355
0
            which = MC_URGENT;
1356
0
            break;
1357
0
1358
0
        default:
1359
0
            return x86_mcerr("do_mca fetch: bad cmdflags", -EINVAL);
1360
0
        }
1361
0
1362
0
        flags = XEN_MC_OK;
1363
0
1364
0
        if ( cmdflags & XEN_MC_ACK )
1365
0
        {
1366
0
            mctelem_cookie_t cookie = ID2COOKIE(mc_fetch.nat->fetch_id);
1367
0
            mctelem_ack(which, cookie);
1368
0
        }
1369
0
        else
1370
0
        {
1371
0
            if ( !is_pv_32bit_vcpu(v)
1372
0
                 ? guest_handle_is_null(mc_fetch.nat->data)
1373
0
                 : compat_handle_is_null(mc_fetch.cmp->data) )
1374
0
                return x86_mcerr("do_mca fetch: guest buffer "
1375
0
                                 "invalid", -EINVAL);
1376
0
1377
0
            mctc = mctelem_consume_oldest_begin(which);
1378
0
            if ( mctc )
1379
0
            {
1380
0
                struct mc_info *mcip = mctelem_dataptr(mctc);
1381
0
                if ( !is_pv_32bit_vcpu(v)
1382
0
                     ? copy_to_guest(mc_fetch.nat->data, mcip, 1)
1383
0
                     : copy_to_compat(mc_fetch.cmp->data, mcip, 1) )
1384
0
                {
1385
0
                    ret = -EFAULT;
1386
0
                    flags |= XEN_MC_FETCHFAILED;
1387
0
                    mc_fetch.nat->fetch_id = 0;
1388
0
                }
1389
0
                else
1390
0
                    mc_fetch.nat->fetch_id = COOKIE2ID(mctc);
1391
0
                mctelem_consume_oldest_end(mctc);
1392
0
            }
1393
0
            else
1394
0
            {
1395
0
                /* There is no data */
1396
0
                flags |= XEN_MC_NODATA;
1397
0
                mc_fetch.nat->fetch_id = 0;
1398
0
            }
1399
0
1400
0
            mc_fetch.nat->flags = flags;
1401
0
            if (copy_to_guest(u_xen_mc, op, 1) != 0)
1402
0
                ret = -EFAULT;
1403
0
        }
1404
0
1405
0
        break;
1406
0
1407
0
    case XEN_MC_notifydomain:
1408
0
        return x86_mcerr("do_mca notify unsupported", -EINVAL);
1409
0
1410
0
    case XEN_MC_physcpuinfo:
1411
0
        mc_physcpuinfo.nat = &op->u.mc_physcpuinfo;
1412
0
        nlcpu = num_online_cpus();
1413
0
1414
0
        if ( !is_pv_32bit_vcpu(v)
1415
0
             ? !guest_handle_is_null(mc_physcpuinfo.nat->info)
1416
0
             : !compat_handle_is_null(mc_physcpuinfo.cmp->info) )
1417
0
        {
1418
0
            if ( mc_physcpuinfo.nat->ncpus <= 0 )
1419
0
                return x86_mcerr("do_mca cpuinfo: ncpus <= 0",
1420
0
                                 -EINVAL);
1421
0
            nlcpu = min(nlcpu, (int)mc_physcpuinfo.nat->ncpus);
1422
0
            log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
1423
0
            if ( log_cpus == NULL )
1424
0
                return x86_mcerr("do_mca cpuinfo", -ENOMEM);
1425
0
            on_each_cpu(do_mc_get_cpu_info, log_cpus, 1);
1426
0
            if ( !is_pv_32bit_vcpu(v)
1427
0
                 ? copy_to_guest(mc_physcpuinfo.nat->info, log_cpus, nlcpu)
1428
0
                 : copy_to_compat(mc_physcpuinfo.cmp->info, log_cpus, nlcpu) )
1429
0
                ret = -EFAULT;
1430
0
            xfree(log_cpus);
1431
0
        }
1432
0
1433
0
        mc_physcpuinfo.nat->ncpus = nlcpu;
1434
0
1435
0
        if ( copy_to_guest(u_xen_mc, op, 1) )
1436
0
            return x86_mcerr("do_mca cpuinfo", -EFAULT);
1437
0
1438
0
        break;
1439
0
1440
0
    case XEN_MC_msrinject:
1441
0
        if ( nr_mce_banks == 0 )
1442
0
            return x86_mcerr("do_mca inject", -ENODEV);
1443
0
1444
0
        mc_msrinject = &op->u.mc_msrinject;
1445
0
        target = mc_msrinject->mcinj_cpunr;
1446
0
1447
0
        if ( target >= nr_cpu_ids )
1448
0
            return x86_mcerr("do_mca inject: bad target", -EINVAL);
1449
0
1450
0
        if ( !cpu_online(target) )
1451
0
            return x86_mcerr("do_mca inject: target offline",
1452
0
                             -EINVAL);
1453
0
1454
0
        if ( mc_msrinject->mcinj_count == 0 )
1455
0
            return 0;
1456
0
1457
0
        if ( mc_msrinject->mcinj_flags & MC_MSRINJ_F_GPADDR )
1458
0
        {
1459
0
            domid_t domid;
1460
0
            struct domain *d;
1461
0
            struct mcinfo_msr *msr;
1462
0
            unsigned int i;
1463
0
            paddr_t gaddr;
1464
0
            unsigned long gfn, mfn;
1465
0
            p2m_type_t t;
1466
0
1467
0
            domid = (mc_msrinject->mcinj_domid == DOMID_SELF) ?
1468
0
                    current->domain->domain_id : mc_msrinject->mcinj_domid;
1469
0
            if ( domid >= DOMID_FIRST_RESERVED )
1470
0
                return x86_mcerr("do_mca inject: incompatible flag "
1471
0
                                 "MC_MSRINJ_F_GPADDR with domain %d",
1472
0
                                 -EINVAL, domid);
1473
0
1474
0
            d = get_domain_by_id(domid);
1475
0
            if ( d == NULL )
1476
0
                return x86_mcerr("do_mca inject: bad domain id %d",
1477
0
                                 -EINVAL, domid);
1478
0
1479
0
            for ( i = 0, msr = &mc_msrinject->mcinj_msr[0];
1480
0
                  i < mc_msrinject->mcinj_count;
1481
0
                  i++, msr++ )
1482
0
            {
1483
0
                gaddr = msr->value;
1484
0
                gfn = PFN_DOWN(gaddr);
1485
0
                mfn = mfn_x(get_gfn(d, gfn, &t));
1486
0
1487
0
                if ( mfn == mfn_x(INVALID_MFN) )
1488
0
                {
1489
0
                    put_gfn(d, gfn);
1490
0
                    put_domain(d);
1491
0
                    return x86_mcerr("do_mca inject: bad gfn %#lx of domain %d",
1492
0
                                     -EINVAL, gfn, domid);
1493
0
                }
1494
0
1495
0
                msr->value = pfn_to_paddr(mfn) | (gaddr & (PAGE_SIZE - 1));
1496
0
1497
0
                put_gfn(d, gfn);
1498
0
            }
1499
0
1500
0
            put_domain(d);
1501
0
        }
1502
0
1503
0
        if ( !x86_mc_msrinject_verify(mc_msrinject) )
1504
0
            return x86_mcerr("do_mca inject: illegal MSR", -EINVAL);
1505
0
1506
0
        add_taint(TAINT_ERROR_INJECT);
1507
0
1508
0
        on_selected_cpus(cpumask_of(target), x86_mc_msrinject,
1509
0
                         mc_msrinject, 1);
1510
0
1511
0
        break;
1512
0
1513
0
    case XEN_MC_mceinject:
1514
0
        if ( nr_mce_banks == 0 )
1515
0
            return x86_mcerr("do_mca #MC", -ENODEV);
1516
0
1517
0
        mc_mceinject = &op->u.mc_mceinject;
1518
0
        target = mc_mceinject->mceinj_cpunr;
1519
0
1520
0
        if ( target >= nr_cpu_ids )
1521
0
            return x86_mcerr("do_mca #MC: bad target", -EINVAL);
1522
0
1523
0
        if ( !cpu_online(target) )
1524
0
            return x86_mcerr("do_mca #MC: target offline", -EINVAL);
1525
0
1526
0
        add_taint(TAINT_ERROR_INJECT);
1527
0
1528
0
        if ( mce_broadcast )
1529
0
            on_each_cpu(x86_mc_mceinject, mc_mceinject, 1);
1530
0
        else
1531
0
            on_selected_cpus(cpumask_of(target), x86_mc_mceinject,
1532
0
                             mc_mceinject, 1);
1533
0
        break;
1534
0
1535
0
    case XEN_MC_inject_v2:
1536
0
    {
1537
0
        const cpumask_t *cpumap;
1538
0
        cpumask_var_t cmv;
1539
0
        bool broadcast = op->u.mc_inject_v2.flags & XEN_MC_INJECT_CPU_BROADCAST;
1540
0
1541
0
        if ( nr_mce_banks == 0 )
1542
0
            return x86_mcerr("do_mca #MC", -ENODEV);
1543
0
1544
0
        if ( broadcast )
1545
0
            cpumap = &cpu_online_map;
1546
0
        else
1547
0
        {
1548
0
            ret = xenctl_bitmap_to_cpumask(&cmv, &op->u.mc_inject_v2.cpumap);
1549
0
            if ( ret )
1550
0
                break;
1551
0
            cpumap = cmv;
1552
0
            if ( !cpumask_intersects(cpumap, &cpu_online_map) )
1553
0
            {
1554
0
                free_cpumask_var(cmv);
1555
0
                ret = x86_mcerr("No online CPU passed\n", -EINVAL);
1556
0
                break;
1557
0
            }
1558
0
            if ( !cpumask_subset(cpumap, &cpu_online_map) )
1559
0
                dprintk(XENLOG_INFO,
1560
0
                        "Not all required CPUs are online\n");
1561
0
        }
1562
0
1563
0
        switch ( op->u.mc_inject_v2.flags & XEN_MC_INJECT_TYPE_MASK )
1564
0
        {
1565
0
        case XEN_MC_INJECT_TYPE_MCE:
1566
0
            if ( mce_broadcast &&
1567
0
                 !cpumask_equal(cpumap, &cpu_online_map) )
1568
0
                printk("Not trigger MCE on all CPUs, may HANG!\n");
1569
0
            on_selected_cpus(cpumap, x86_mc_mceinject, NULL, 1);
1570
0
            break;
1571
0
1572
0
        case XEN_MC_INJECT_TYPE_CMCI:
1573
0
            if ( !cmci_apic_vector )
1574
0
                ret = x86_mcerr("No CMCI supported in platform\n", -EINVAL);
1575
0
            else
1576
0
            {
1577
0
                if ( cpumask_test_cpu(smp_processor_id(), cpumap) )
1578
0
                    send_IPI_self(cmci_apic_vector);
1579
0
                send_IPI_mask(cpumap, cmci_apic_vector);
1580
0
            }
1581
0
            break;
1582
0
1583
0
        case XEN_MC_INJECT_TYPE_LMCE:
1584
0
            if ( !lmce_support )
1585
0
            {
1586
0
                ret = x86_mcerr("No LMCE support", -EINVAL);
1587
0
                break;
1588
0
            }
1589
0
            if ( broadcast )
1590
0
            {
1591
0
                ret = x86_mcerr("Broadcast cannot be used with LMCE", -EINVAL);
1592
0
                break;
1593
0
            }
1594
0
            /* Ensure at most one CPU is specified. */
1595
0
            if ( nr_cpu_ids > cpumask_next(cpumask_first(cpumap), cpumap) )
1596
0
            {
1597
0
                ret = x86_mcerr("More than one CPU specified for LMCE",
1598
0
                                -EINVAL);
1599
0
                break;
1600
0
            }
1601
0
            on_selected_cpus(cpumap, x86_mc_mceinject, NULL, 1);
1602
0
            break;
1603
0
1604
0
        default:
1605
0
            ret = x86_mcerr("Wrong mca type\n", -EINVAL);
1606
0
            break;
1607
0
        }
1608
0
1609
0
        if ( cpumap != &cpu_online_map )
1610
0
            free_cpumask_var(cmv);
1611
0
1612
0
        break;
1613
0
    }
1614
0
1615
0
    default:
1616
0
        return x86_mcerr("do_mca: bad command", -EINVAL);
1617
0
    }
1618
0
1619
0
    return ret;
1620
0
}
1621
1622
int mcinfo_dumpped;
1623
static int x86_mcinfo_dump_panic(mctelem_cookie_t mctc)
1624
0
{
1625
0
    struct mc_info *mcip = mctelem_dataptr(mctc);
1626
0
1627
0
    x86_mcinfo_dump(mcip);
1628
0
    mcinfo_dumpped++;
1629
0
1630
0
    return 0;
1631
0
}
1632
1633
/* XXX shall we dump commited mc_info?? */
1634
static void mc_panic_dump(void)
1635
0
{
1636
0
    int cpu;
1637
0
1638
0
    dprintk(XENLOG_ERR, "Begin dump mc_info\n");
1639
0
    for_each_online_cpu(cpu)
1640
0
        mctelem_process_deferred(cpu, x86_mcinfo_dump_panic,
1641
0
                                 mctelem_has_deferred_lmce(cpu));
1642
0
    dprintk(XENLOG_ERR, "End dump mc_info, %x mcinfo dumped\n", mcinfo_dumpped);
1643
0
}
1644
1645
void mc_panic(char *s)
1646
0
{
1647
0
    is_mc_panic = true;
1648
0
    console_force_unlock();
1649
0
1650
0
    printk("Fatal machine check: %s\n", s);
1651
0
    printk("\n"
1652
0
           "****************************************\n"
1653
0
           "\n"
1654
0
           "   The processor has reported a hardware error which cannot\n"
1655
0
           "   be recovered from.  Xen will now reboot the machine.\n");
1656
0
    mc_panic_dump();
1657
0
    panic("HARDWARE ERROR");
1658
0
}
1659
1660
/*
1661
 * Machine Check owner judge algorithm:
1662
 * When error happens, all cpus serially read its msr banks.
1663
 * The first CPU who fetches the error bank's info will clear
1664
 * this bank. Later readers can't get any information again.
1665
 * The first CPU is the actual mce_owner
1666
 *
1667
 * For Fatal (pcc=1) error, it might cause machine crash
1668
 * before we're able to log. For avoiding log missing, we adopt two
1669
 * round scanning:
1670
 * Round1: simply scan. If found pcc = 1 or ripv = 0, simply reset.
1671
 * All MCE banks are sticky, when boot up, MCE polling mechanism
1672
 * will help to collect and log those MCE errors.
1673
 * Round2: Do all MCE processing logic as normal.
1674
 */
1675
1676
/* Maybe called in MCE context, no lock, no printk */
1677
static enum mce_result mce_action(const struct cpu_user_regs *regs,
1678
                                  mctelem_cookie_t mctc)
1679
0
{
1680
0
    struct mc_info *local_mi;
1681
0
    enum mce_result bank_result = MCER_NOERROR;
1682
0
    enum mce_result worst_result = MCER_NOERROR;
1683
0
    struct mcinfo_common *mic = NULL;
1684
0
    struct mca_binfo binfo;
1685
0
    const struct mca_error_handler *handlers = mce_dhandlers;
1686
0
    unsigned int i, handler_num = mce_dhandler_num;
1687
0
1688
0
    /* When in mce context, regs is valid */
1689
0
    if ( regs )
1690
0
    {
1691
0
        handler_num = mce_uhandler_num;
1692
0
        handlers = mce_uhandlers;
1693
0
    }
1694
0
1695
0
    local_mi = (struct mc_info *)mctelem_dataptr(mctc);
1696
0
    x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL);
1697
0
    if ( mic == NULL )
1698
0
    {
1699
0
        printk(KERN_ERR "MCE: get local buffer entry failed\n ");
1700
0
        return MCER_CONTINUE;
1701
0
    }
1702
0
1703
0
    memset(&binfo, 0, sizeof(binfo));
1704
0
    binfo.mig = (struct mcinfo_global *)mic;
1705
0
    binfo.mi = local_mi;
1706
0
1707
0
    /* Processing bank information */
1708
0
    x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
1709
0
1710
0
    for ( ; bank_result != MCER_RESET && mic && mic->size;
1711
0
          mic = x86_mcinfo_next(mic) )
1712
0
    {
1713
0
        if ( mic->type != MC_TYPE_BANK )
1714
0
        {
1715
0
            continue;
1716
0
        }
1717
0
        binfo.mib = (struct mcinfo_bank *)mic;
1718
0
        binfo.bank = binfo.mib->mc_bank;
1719
0
        bank_result = MCER_NOERROR;
1720
0
        for ( i = 0; i < handler_num; i++ )
1721
0
        {
1722
0
            if ( handlers[i].owned_error(binfo.mib->mc_status) )
1723
0
            {
1724
0
                handlers[i].recovery_handler(&binfo, &bank_result, regs);
1725
0
                if ( worst_result < bank_result )
1726
0
                    worst_result = bank_result;
1727
0
                break;
1728
0
            }
1729
0
        }
1730
0
    }
1731
0
1732
0
    return worst_result;
1733
0
}
1734
1735
/*
1736
 * Called from mctelem_process_deferred. Return 1 if the telemetry
1737
 * should be committed for dom0 consumption, 0 if it should be
1738
 * dismissed.
1739
 */
1740
static int mce_delayed_action(mctelem_cookie_t mctc)
1741
0
{
1742
0
    enum mce_result result;
1743
0
    int ret = 0;
1744
0
1745
0
    result = mce_action(NULL, mctc);
1746
0
1747
0
    switch ( result )
1748
0
    {
1749
0
    case MCER_RESET:
1750
0
        dprintk(XENLOG_ERR, "MCE delayed action failed\n");
1751
0
        is_mc_panic = true;
1752
0
        x86_mcinfo_dump(mctelem_dataptr(mctc));
1753
0
        panic("MCE: Software recovery failed for the UCR");
1754
0
        break;
1755
0
1756
0
    case MCER_RECOVERED:
1757
0
        dprintk(XENLOG_INFO, "MCE: Error is successfully recovered\n");
1758
0
        ret = 1;
1759
0
        break;
1760
0
1761
0
    case MCER_CONTINUE:
1762
0
        dprintk(XENLOG_INFO, "MCE: Error can't be recovered, "
1763
0
                "system is tainted\n");
1764
0
        x86_mcinfo_dump(mctelem_dataptr(mctc));
1765
0
        ret = 1;
1766
0
        break;
1767
0
1768
0
    default:
1769
0
        ret = 0;
1770
0
        break;
1771
0
    }
1772
0
    return ret;
1773
0
}
1774
1775
/* Softirq Handler for this MCE# processing */
1776
static void mce_softirq(void)
1777
0
{
1778
0
    static DEFINE_MCE_BARRIER(mce_inside_bar);
1779
0
    static DEFINE_MCE_BARRIER(mce_severity_bar);
1780
0
    static atomic_t severity_cpu;
1781
0
    int cpu = smp_processor_id();
1782
0
    unsigned int workcpu;
1783
0
    bool lmce = mctelem_has_deferred_lmce(cpu);
1784
0
    bool bcast = mce_broadcast && !lmce;
1785
0
1786
0
    mce_printk(MCE_VERBOSE, "CPU%d enter softirq\n", cpu);
1787
0
1788
0
    mce_barrier_enter(&mce_inside_bar, bcast);
1789
0
1790
0
    if ( !lmce )
1791
0
    {
1792
0
        /*
1793
0
         * Everybody is here. Now let's see who gets to do the
1794
0
         * recovery work. Right now we just see if there's a CPU
1795
0
         * that did not have any problems, and pick that one.
1796
0
         *
1797
0
         * First, just set a default value: the last CPU who reaches this
1798
0
         * will overwrite the value and become the default.
1799
0
         */
1800
0
1801
0
        atomic_set(&severity_cpu, cpu);
1802
0
1803
0
        mce_barrier_enter(&mce_severity_bar, bcast);
1804
0
        if ( !mctelem_has_deferred(cpu) )
1805
0
            atomic_set(&severity_cpu, cpu);
1806
0
        mce_barrier_exit(&mce_severity_bar, bcast);
1807
0
    }
1808
0
1809
0
    /* We choose severity_cpu for further processing */
1810
0
    if ( lmce || atomic_read(&severity_cpu) == cpu )
1811
0
    {
1812
0
1813
0
        mce_printk(MCE_VERBOSE, "CPU%d handling errors\n", cpu);
1814
0
1815
0
        /*
1816
0
         * Step1: Fill DOM0 LOG buffer, vMCE injection buffer and
1817
0
         * vMCE MSRs virtualization buffer
1818
0
         */
1819
0
1820
0
        if ( lmce )
1821
0
            mctelem_process_deferred(cpu, mce_delayed_action, true);
1822
0
        else
1823
0
            for_each_online_cpu(workcpu)
1824
0
                mctelem_process_deferred(workcpu, mce_delayed_action, false);
1825
0
1826
0
        /* Step2: Send Log to DOM0 through vIRQ */
1827
0
        if ( dom0_vmce_enabled() )
1828
0
        {
1829
0
            mce_printk(MCE_VERBOSE, "MCE: send MCE# to DOM0 through virq\n");
1830
0
            send_global_virq(VIRQ_MCA);
1831
0
        }
1832
0
    }
1833
0
1834
0
    mce_barrier_exit(&mce_inside_bar, bcast);
1835
0
}
1836
1837
/*
1838
 * Machine Check owner judge algorithm:
1839
 * When error happens, all cpus serially read its msr banks.
1840
 * The first CPU who fetches the error bank's info will clear
1841
 * this bank. Later readers can't get any infor again.
1842
 * The first CPU is the actual mce_owner
1843
 *
1844
 * For Fatal (pcc=1) error, it might cause machine crash
1845
 * before we're able to log. For avoiding log missing, we adopt two
1846
 * round scanning:
1847
 * Round1: simply scan. If found pcc = 1 or ripv = 0, simply reset.
1848
 * All MCE banks are sticky, when boot up, MCE polling mechanism
1849
 * will help to collect and log those MCE errors.
1850
 * Round2: Do all MCE processing logic as normal.
1851
 */
1852
void mce_handler_init(void)
1853
12
{
1854
12
    if ( smp_processor_id() != 0 )
1855
11
        return;
1856
12
1857
12
    /* callback register, do we really need so many callback? */
1858
12
    /* mce handler data initialization */
1859
1
    spin_lock_init(&mce_logout_lock);
1860
1
    open_softirq(MACHINE_CHECK_SOFTIRQ, mce_softirq);
1861
1
}