Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/msi.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * File:    msi.c
3
 * Purpose: PCI Message Signaled Interrupt (MSI)
4
 *
5
 * Copyright (C) 2003-2004 Intel
6
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7
 */
8
9
#include <xen/lib.h>
10
#include <xen/init.h>
11
#include <xen/irq.h>
12
#include <xen/delay.h>
13
#include <xen/sched.h>
14
#include <xen/acpi.h>
15
#include <xen/cpu.h>
16
#include <xen/errno.h>
17
#include <xen/pci.h>
18
#include <xen/pci_regs.h>
19
#include <xen/iocap.h>
20
#include <xen/keyhandler.h>
21
#include <xen/pfn.h>
22
#include <asm/io.h>
23
#include <asm/smp.h>
24
#include <asm/desc.h>
25
#include <asm/msi.h>
26
#include <asm/fixmap.h>
27
#include <asm/p2m.h>
28
#include <mach_apic.h>
29
#include <io_ports.h>
30
#include <public/physdev.h>
31
#include <xen/iommu.h>
32
#include <xsm/xsm.h>
33
#include <xen/vpci.h>
34
35
static s8 __read_mostly use_msi = -1;
36
boolean_param("msi", use_msi);
37
38
static void __pci_disable_msix(struct msi_desc *);
39
40
/* bitmap indicate which fixed map is free */
41
static DEFINE_SPINLOCK(msix_fixmap_lock);
42
static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
43
44
static int msix_fixmap_alloc(void)
45
4
{
46
4
    int i, rc = -ENOMEM;
47
4
48
4
    spin_lock(&msix_fixmap_lock);
49
10
    for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
50
10
        if ( !test_bit(i, &msix_fixmap_pages) )
51
4
            break;
52
4
    if ( i == FIX_MSIX_MAX_PAGES )
53
0
        goto out;
54
4
    rc = FIX_MSIX_IO_RESERV_BASE + i;
55
4
    set_bit(i, &msix_fixmap_pages);
56
4
57
4
 out:
58
4
    spin_unlock(&msix_fixmap_lock);
59
4
    return rc;
60
4
}
61
62
static void msix_fixmap_free(int idx)
63
0
{
64
0
    spin_lock(&msix_fixmap_lock);
65
0
    if ( idx >= FIX_MSIX_IO_RESERV_BASE )
66
0
        clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
67
0
    spin_unlock(&msix_fixmap_lock);
68
0
}
69
70
static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
71
                           u64 entry_paddr)
72
36
{
73
36
    long nr_page;
74
36
    int idx;
75
36
76
36
    nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
77
36
78
36
    if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
79
0
        return -EINVAL;
80
36
81
36
    spin_lock(&msix->table_lock);
82
36
    if ( msix->table_refcnt[nr_page]++ == 0 )
83
4
    {
84
4
        idx = msix_fixmap_alloc();
85
4
        if ( idx < 0 )
86
0
        {
87
0
            msix->table_refcnt[nr_page]--;
88
0
            goto out;
89
0
        }
90
4
        set_fixmap_nocache(idx, entry_paddr);
91
4
        msix->table_idx[nr_page] = idx;
92
4
    }
93
36
    else
94
32
        idx = msix->table_idx[nr_page];
95
36
96
36
 out:
97
36
    spin_unlock(&msix->table_lock);
98
36
    return idx;
99
36
}
100
101
static void msix_put_fixmap(struct arch_msix *msix, int idx)
102
0
{
103
0
    int i;
104
0
105
0
    spin_lock(&msix->table_lock);
106
0
    for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
107
0
    {
108
0
        if ( msix->table_idx[i] == idx )
109
0
            break;
110
0
    }
111
0
    if ( i == MAX_MSIX_TABLE_PAGES )
112
0
        goto out;
113
0
114
0
    if ( --msix->table_refcnt[i] == 0 )
115
0
    {
116
0
        clear_fixmap(idx);
117
0
        msix_fixmap_free(idx);
118
0
        msix->table_idx[i] = 0;
119
0
    }
120
0
121
0
 out:
122
0
    spin_unlock(&msix->table_lock);
123
0
}
124
125
static bool memory_decoded(const struct pci_dev *dev)
126
216
{
127
216
    u8 bus, slot, func;
128
216
129
216
    if ( !dev->info.is_virtfn )
130
216
    {
131
216
        bus = dev->bus;
132
216
        slot = PCI_SLOT(dev->devfn);
133
216
        func = PCI_FUNC(dev->devfn);
134
216
    }
135
216
    else
136
0
    {
137
0
        bus = dev->info.physfn.bus;
138
0
        slot = PCI_SLOT(dev->info.physfn.devfn);
139
0
        func = PCI_FUNC(dev->info.physfn.devfn);
140
0
    }
141
216
142
216
    return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
143
216
              PCI_COMMAND_MEMORY);
144
216
}
145
146
static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
147
108
{
148
108
    u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn),
149
108
                                  PCI_FUNC(dev->devfn), msix_control_reg(pos));
150
108
151
108
    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
152
0
        return false;
153
108
154
108
    return memory_decoded(dev);
155
108
}
156
157
/*
158
 * MSI message composition
159
 */
160
void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg *msg)
161
44
{
162
44
    memset(msg, 0, sizeof(*msg));
163
44
164
44
    if ( vector < FIRST_DYNAMIC_VECTOR )
165
0
        return;
166
44
167
44
    if ( cpu_mask )
168
42
    {
169
42
        cpumask_t *mask = this_cpu(scratch_cpumask);
170
42
171
42
        if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
172
0
            return;
173
42
174
42
        cpumask_and(mask, cpu_mask, &cpu_online_map);
175
42
        msg->dest32 = cpu_mask_to_apicid(mask);
176
42
    }
177
44
178
44
    msg->address_hi = MSI_ADDR_BASE_HI;
179
44
    msg->address_lo = MSI_ADDR_BASE_LO |
180
44
                      (INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC
181
0
                                     : MSI_ADDR_DESTMODE_PHYS) |
182
44
                      ((INT_DELIVERY_MODE != dest_LowestPrio)
183
0
                       ? MSI_ADDR_REDIRECTION_CPU
184
44
                       : MSI_ADDR_REDIRECTION_LOWPRI) |
185
44
                      MSI_ADDR_DEST_ID(msg->dest32);
186
44
187
44
    msg->data = MSI_DATA_TRIGGER_EDGE |
188
44
                MSI_DATA_LEVEL_ASSERT |
189
44
                ((INT_DELIVERY_MODE != dest_LowestPrio)
190
0
                 ? MSI_DATA_DELIVERY_FIXED
191
44
                 : MSI_DATA_DELIVERY_LOWPRI) |
192
44
                MSI_DATA_VECTOR(vector);
193
44
}
194
195
static bool read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
196
42
{
197
42
    switch ( entry->msi_attrib.type )
198
42
    {
199
6
    case PCI_CAP_ID_MSI:
200
6
    {
201
6
        struct pci_dev *dev = entry->dev;
202
6
        int pos = entry->msi_attrib.pos;
203
6
        u16 data, seg = dev->seg;
204
6
        u8 bus = dev->bus;
205
6
        u8 slot = PCI_SLOT(dev->devfn);
206
6
        u8 func = PCI_FUNC(dev->devfn);
207
6
208
6
        msg->address_lo = pci_conf_read32(seg, bus, slot, func,
209
6
                                          msi_lower_address_reg(pos));
210
6
        if ( entry->msi_attrib.is_64 )
211
5
        {
212
5
            msg->address_hi = pci_conf_read32(seg, bus, slot, func,
213
5
                                              msi_upper_address_reg(pos));
214
5
            data = pci_conf_read16(seg, bus, slot, func,
215
5
                                   msi_data_reg(pos, 1));
216
5
        }
217
6
        else
218
1
        {
219
1
            msg->address_hi = 0;
220
1
            data = pci_conf_read16(seg, bus, slot, func,
221
1
                                   msi_data_reg(pos, 0));
222
1
        }
223
6
        msg->data = data;
224
6
        break;
225
6
    }
226
36
    case PCI_CAP_ID_MSIX:
227
36
    {
228
36
        void __iomem *base = entry->mask_base;
229
36
230
36
        if ( unlikely(!msix_memory_decoded(entry->dev,
231
36
                                           entry->msi_attrib.pos)) )
232
0
            return false;
233
36
        msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
234
36
        msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
235
36
        msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
236
36
        break;
237
36
    }
238
0
    default:
239
0
        BUG();
240
42
    }
241
42
242
42
    if ( iommu_intremap )
243
42
        iommu_read_msi_from_ire(entry, msg);
244
42
245
42
    return true;
246
42
}
247
248
static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
249
84
{
250
84
    entry->msg = *msg;
251
84
252
84
    if ( iommu_intremap )
253
84
    {
254
84
        int rc;
255
84
256
84
        ASSERT(msg != &entry->msg);
257
84
        rc = iommu_update_ire_from_msi(entry, msg);
258
84
        if ( rc )
259
0
            return rc;
260
84
    }
261
84
262
84
    switch ( entry->msi_attrib.type )
263
84
    {
264
12
    case PCI_CAP_ID_MSI:
265
12
    {
266
12
        struct pci_dev *dev = entry->dev;
267
12
        int pos = entry->msi_attrib.pos;
268
12
        u16 seg = dev->seg;
269
12
        u8 bus = dev->bus;
270
12
        u8 slot = PCI_SLOT(dev->devfn);
271
12
        u8 func = PCI_FUNC(dev->devfn);
272
12
        int nr = entry->msi_attrib.entry_nr;
273
12
274
12
        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
275
12
        if ( nr )
276
0
            return 0;
277
12
278
12
        pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
279
12
                         msg->address_lo);
280
12
        if ( entry->msi_attrib.is_64 )
281
10
        {
282
10
            pci_conf_write32(seg, bus, slot, func, msi_upper_address_reg(pos),
283
10
                             msg->address_hi);
284
10
            pci_conf_write16(seg, bus, slot, func, msi_data_reg(pos, 1),
285
10
                             msg->data);
286
10
        }
287
12
        else
288
2
            pci_conf_write16(seg, bus, slot, func, msi_data_reg(pos, 0),
289
2
                             msg->data);
290
12
        break;
291
12
    }
292
72
    case PCI_CAP_ID_MSIX:
293
72
    {
294
72
        void __iomem *base = entry->mask_base;
295
72
296
72
        if ( unlikely(!msix_memory_decoded(entry->dev,
297
72
                                           entry->msi_attrib.pos)) )
298
0
            return -ENXIO;
299
72
        writel(msg->address_lo,
300
72
               base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
301
72
        writel(msg->address_hi,
302
72
               base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
303
72
        writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
304
72
        break;
305
72
    }
306
0
    default:
307
0
        BUG();
308
84
    }
309
84
310
84
    return 0;
311
84
}
312
313
void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
314
42
{
315
42
    struct msi_msg msg;
316
42
    unsigned int dest;
317
42
    struct msi_desc *msi_desc = desc->msi_desc;
318
42
319
42
    dest = set_desc_affinity(desc, mask);
320
42
    if ( dest == BAD_APICID || !msi_desc )
321
0
        return;
322
42
323
42
    ASSERT(spin_is_locked(&desc->lock));
324
42
325
42
    memset(&msg, 0, sizeof(msg));
326
42
    if ( !read_msi_msg(msi_desc, &msg) )
327
0
        return;
328
42
329
42
    msg.data &= ~MSI_DATA_VECTOR_MASK;
330
42
    msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
331
42
    msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
332
42
    msg.address_lo |= MSI_ADDR_DEST_ID(dest);
333
42
    msg.dest32 = dest;
334
42
335
42
    write_msi_msg(msi_desc, &msg);
336
42
}
337
338
void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable)
339
6
{
340
6
    u16 control = pci_conf_read16(seg, bus, slot, func, pos + PCI_MSI_FLAGS);
341
6
342
6
    control &= ~PCI_MSI_FLAGS_ENABLE;
343
6
    if ( enable )
344
0
        control |= PCI_MSI_FLAGS_ENABLE;
345
6
    pci_conf_write16(seg, bus, slot, func, pos + PCI_MSI_FLAGS, control);
346
6
}
347
348
static void msi_set_enable(struct pci_dev *dev, int enable)
349
6
{
350
6
    int pos;
351
6
    u16 seg = dev->seg;
352
6
    u8 bus = dev->bus;
353
6
    u8 slot = PCI_SLOT(dev->devfn);
354
6
    u8 func = PCI_FUNC(dev->devfn);
355
6
356
6
    pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
357
6
    if ( pos )
358
6
        __msi_set_enable(seg, bus, slot, func, pos, enable);
359
6
}
360
361
static void msix_set_enable(struct pci_dev *dev, int enable)
362
0
{
363
0
    int pos;
364
0
    u16 control, seg = dev->seg;
365
0
    u8 bus = dev->bus;
366
0
    u8 slot = PCI_SLOT(dev->devfn);
367
0
    u8 func = PCI_FUNC(dev->devfn);
368
0
369
0
    pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
370
0
    if ( pos )
371
0
    {
372
0
        control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
373
0
        control &= ~PCI_MSIX_FLAGS_ENABLE;
374
0
        if ( enable )
375
0
            control |= PCI_MSIX_FLAGS_ENABLE;
376
0
        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
377
0
    }
378
0
}
379
380
int msi_maskable_irq(const struct msi_desc *entry)
381
84
{
382
84
    BUG_ON(!entry);
383
84
    return entry->msi_attrib.type != PCI_CAP_ID_MSI
384
12
           || entry->msi_attrib.maskbit;
385
84
}
386
387
static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
388
72
{
389
72
    struct msi_desc *entry = desc->msi_desc;
390
72
    struct pci_dev *pdev;
391
72
    u16 seg, control;
392
72
    u8 bus, slot, func;
393
72
    bool flag = host || guest, maskall;
394
72
395
72
    ASSERT(spin_is_locked(&desc->lock));
396
72
    BUG_ON(!entry || !entry->dev);
397
72
    pdev = entry->dev;
398
72
    seg = pdev->seg;
399
72
    bus = pdev->bus;
400
72
    slot = PCI_SLOT(pdev->devfn);
401
72
    func = PCI_FUNC(pdev->devfn);
402
72
    switch ( entry->msi_attrib.type )
403
72
    {
404
0
    case PCI_CAP_ID_MSI:
405
0
        if ( entry->msi_attrib.maskbit )
406
0
        {
407
0
            u32 mask_bits;
408
0
409
0
            mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
410
0
            mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
411
0
            mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
412
0
            pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
413
0
        }
414
0
        break;
415
72
    case PCI_CAP_ID_MSIX:
416
72
        maskall = pdev->msix->host_maskall;
417
72
        control = pci_conf_read16(seg, bus, slot, func,
418
72
                                  msix_control_reg(entry->msi_attrib.pos));
419
72
        if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
420
0
        {
421
0
            pdev->msix->host_maskall = 1;
422
0
            pci_conf_write16(seg, bus, slot, func,
423
0
                             msix_control_reg(entry->msi_attrib.pos),
424
0
                             control | (PCI_MSIX_FLAGS_ENABLE |
425
0
                                        PCI_MSIX_FLAGS_MASKALL));
426
0
        }
427
72
        if ( likely(memory_decoded(pdev)) )
428
72
        {
429
72
            writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
430
72
            readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
431
72
432
72
            if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
433
72
                break;
434
72
435
0
            entry->msi_attrib.host_masked = host;
436
0
            entry->msi_attrib.guest_masked = guest;
437
0
438
0
            flag = true;
439
0
        }
440
0
        else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
441
0
        {
442
0
            domid_t domid = pdev->domain->domain_id;
443
0
444
0
            maskall = true;
445
0
            if ( pdev->msix->warned != domid )
446
0
            {
447
0
                pdev->msix->warned = domid;
448
0
                printk(XENLOG_G_WARNING
449
0
                       "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
450
0
                       desc->irq, domid, pdev->seg, pdev->bus,
451
0
                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
452
0
            }
453
0
        }
454
0
        pdev->msix->host_maskall = maskall;
455
0
        if ( maskall || pdev->msix->guest_maskall )
456
0
            control |= PCI_MSIX_FLAGS_MASKALL;
457
0
        pci_conf_write16(seg, bus, slot, func,
458
0
                         msix_control_reg(entry->msi_attrib.pos), control);
459
0
        return flag;
460
0
    default:
461
0
        return 0;
462
72
    }
463
72
    entry->msi_attrib.host_masked = host;
464
72
    entry->msi_attrib.guest_masked = guest;
465
72
466
72
    return 1;
467
72
}
468
469
static int msi_get_mask_bit(const struct msi_desc *entry)
470
0
{
471
0
    if ( !entry->dev )
472
0
        return -1;
473
0
474
0
    switch ( entry->msi_attrib.type )
475
0
    {
476
0
    case PCI_CAP_ID_MSI:
477
0
        if ( !entry->msi_attrib.maskbit )
478
0
            break;
479
0
        return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
480
0
                                PCI_SLOT(entry->dev->devfn),
481
0
                                PCI_FUNC(entry->dev->devfn),
482
0
                                entry->msi.mpos) >>
483
0
                entry->msi_attrib.entry_nr) & 1;
484
0
    case PCI_CAP_ID_MSIX:
485
0
        if ( unlikely(!msix_memory_decoded(entry->dev,
486
0
                                           entry->msi_attrib.pos)) )
487
0
            break;
488
0
        return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
489
0
    }
490
0
    return -1;
491
0
}
492
493
void mask_msi_irq(struct irq_desc *desc)
494
0
{
495
0
    if ( unlikely(!msi_set_mask_bit(desc, 1,
496
0
                                    desc->msi_desc->msi_attrib.guest_masked)) )
497
0
        BUG_ON(!(desc->status & IRQ_DISABLED));
498
0
}
499
500
void unmask_msi_irq(struct irq_desc *desc)
501
0
{
502
0
    if ( unlikely(!msi_set_mask_bit(desc, 0,
503
0
                                    desc->msi_desc->msi_attrib.guest_masked)) )
504
0
        WARN();
505
0
}
506
507
void guest_mask_msi_irq(struct irq_desc *desc, bool mask)
508
36
{
509
36
    msi_set_mask_bit(desc, desc->msi_desc->msi_attrib.host_masked, mask);
510
36
}
511
512
static unsigned int startup_msi_irq(struct irq_desc *desc)
513
36
{
514
36
    if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
515
36
        WARN();
516
36
    return 0;
517
36
}
518
519
static void shutdown_msi_irq(struct irq_desc *desc)
520
0
{
521
0
    if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
522
0
        BUG_ON(!(desc->status & IRQ_DISABLED));
523
0
}
524
525
void ack_nonmaskable_msi_irq(struct irq_desc *desc)
526
3.84k
{
527
3.84k
    irq_complete_move(desc);
528
3.84k
    move_native_irq(desc);
529
3.84k
}
530
531
static void ack_maskable_msi_irq(struct irq_desc *desc)
532
300
{
533
300
    ack_nonmaskable_msi_irq(desc);
534
300
    ack_APIC_irq(); /* ACKTYPE_NONE */
535
300
}
536
537
void end_nonmaskable_msi_irq(struct irq_desc *desc, u8 vector)
538
3.54k
{
539
3.54k
    ack_APIC_irq(); /* ACKTYPE_EOI */
540
3.54k
}
541
542
/*
543
 * IRQ chip for MSI PCI/PCI-X/PCI-Express devices,
544
 * which implement the MSI or MSI-X capability structure.
545
 */
546
static hw_irq_controller pci_msi_maskable = {
547
    .typename     = "PCI-MSI/-X",
548
    .startup      = startup_msi_irq,
549
    .shutdown     = shutdown_msi_irq,
550
    .enable       = unmask_msi_irq,
551
    .disable      = mask_msi_irq,
552
    .ack          = ack_maskable_msi_irq,
553
    .set_affinity = set_msi_affinity
554
};
555
556
/* As above, but without having masking capability. */
557
static hw_irq_controller pci_msi_nonmaskable = {
558
    .typename     = "PCI-MSI",
559
    .startup      = irq_startup_none,
560
    .shutdown     = irq_shutdown_none,
561
    .enable       = irq_enable_none,
562
    .disable      = irq_disable_none,
563
    .ack          = ack_nonmaskable_msi_irq,
564
    .end          = end_nonmaskable_msi_irq,
565
    .set_affinity = set_msi_affinity
566
};
567
568
static struct msi_desc *alloc_msi_entry(unsigned int nr)
569
42
{
570
42
    struct msi_desc *entry;
571
42
572
42
    entry = xmalloc_array(struct msi_desc, nr);
573
42
    if ( !entry )
574
0
        return NULL;
575
42
576
42
    INIT_LIST_HEAD(&entry->list);
577
84
    while ( nr-- )
578
42
    {
579
42
        entry[nr].dev = NULL;
580
42
        entry[nr].irq = -1;
581
42
        entry[nr].remap_index = -1;
582
42
        entry[nr].pi_desc = NULL;
583
42
        entry[nr].irte_initialized = false;
584
42
    }
585
42
586
42
    return entry;
587
42
}
588
589
int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
590
42
{
591
42
    const struct pci_dev *pdev = msidesc->dev;
592
42
    unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
593
42
    u16 control = ~0;
594
42
    int rc;
595
42
596
42
    if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
597
36
    {
598
36
        control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
599
36
                                  PCI_FUNC(pdev->devfn), cpos);
600
36
        if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
601
0
            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
602
0
                             PCI_FUNC(pdev->devfn), cpos,
603
0
                             control | (PCI_MSIX_FLAGS_ENABLE |
604
0
                                        PCI_MSIX_FLAGS_MASKALL));
605
36
    }
606
42
607
42
    rc = __setup_msi_irq(desc, msidesc,
608
36
                         msi_maskable_irq(msidesc) ? &pci_msi_maskable
609
6
                                                   : &pci_msi_nonmaskable);
610
42
611
42
    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
612
0
        pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
613
0
                         PCI_FUNC(pdev->devfn), cpos, control);
614
42
615
42
    return rc;
616
42
}
617
618
int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
619
                    hw_irq_controller *handler)
620
42
{
621
42
    struct msi_msg msg;
622
42
    int ret;
623
42
624
42
    desc->msi_desc = msidesc;
625
42
    desc->handler = handler;
626
42
    msi_compose_msg(desc->arch.vector, desc->arch.cpu_mask, &msg);
627
42
    ret = write_msi_msg(msidesc, &msg);
628
42
    if ( unlikely(ret) )
629
0
    {
630
0
        desc->handler = &no_irq_type;
631
0
        desc->msi_desc = NULL;
632
0
    }
633
42
634
42
    return ret;
635
42
}
636
637
int msi_free_irq(struct msi_desc *entry)
638
0
{
639
0
    unsigned int nr = entry->msi_attrib.type != PCI_CAP_ID_MSIX
640
0
                      ? entry->msi.nvec : 1;
641
0
642
0
    while ( nr-- )
643
0
    {
644
0
        if ( entry[nr].irq >= 0 )
645
0
            destroy_irq(entry[nr].irq);
646
0
647
0
        /* Free the unused IRTE if intr remap enabled */
648
0
        if ( iommu_intremap )
649
0
            iommu_update_ire_from_msi(entry + nr, NULL);
650
0
    }
651
0
652
0
    if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
653
0
        msix_put_fixmap(entry->dev->msix,
654
0
                        virt_to_fix((unsigned long)entry->mask_base));
655
0
656
0
    list_del(&entry->list);
657
0
    xfree(entry);
658
0
    return 0;
659
0
}
660
661
static struct msi_desc *find_msi_entry(struct pci_dev *dev,
662
                                       int irq, int cap_id)
663
88
{
664
88
    struct msi_desc *entry;
665
88
666
88
    list_for_each_entry( entry, &dev->msi_list, list )
667
288
    {
668
288
        if ( entry->msi_attrib.type == cap_id &&
669
144
             (irq == -1 || entry->irq == irq) )
670
0
            return entry;
671
288
    }
672
88
673
88
    return NULL;
674
88
}
675
676
/**
677
 * msi_capability_init - configure device's MSI capability structure
678
 * @dev: pointer to the pci_dev data structure of MSI device function
679
 *
680
 * Setup the MSI capability structure of device function with a single
681
 * MSI irq, regardless of device function is capable of handling
682
 * multiple messages. A return of zero indicates the successful setup
683
 * of an entry zero with the new MSI irq or non-zero for otherwise.
684
 **/
685
static int msi_capability_init(struct pci_dev *dev,
686
                               int irq,
687
                               struct msi_desc **desc,
688
                               unsigned int nvec)
689
6
{
690
6
    struct msi_desc *entry;
691
6
    int pos;
692
6
    unsigned int i, maxvec, mpos;
693
6
    u16 control, seg = dev->seg;
694
6
    u8 bus = dev->bus;
695
6
    u8 slot = PCI_SLOT(dev->devfn);
696
6
    u8 func = PCI_FUNC(dev->devfn);
697
6
698
6
    ASSERT(pcidevs_locked());
699
6
    pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
700
6
    if ( !pos )
701
0
        return -ENODEV;
702
6
    control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
703
6
    maxvec = multi_msi_capable(control);
704
6
    if ( nvec > maxvec )
705
0
        return maxvec;
706
6
    control &= ~PCI_MSI_FLAGS_QSIZE;
707
6
    multi_msi_enable(control, nvec);
708
6
709
6
    /* MSI Entry Initialization */
710
6
    msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
711
6
712
6
    entry = alloc_msi_entry(nvec);
713
6
    if ( !entry )
714
0
        return -ENOMEM;
715
6
716
6
    mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
717
12
    for ( i = 0; i < nvec; ++i )
718
6
    {
719
6
        entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
720
6
        entry[i].msi_attrib.is_64 = is_64bit_address(control);
721
6
        entry[i].msi_attrib.entry_nr = i;
722
6
        entry[i].msi_attrib.host_masked =
723
6
        entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
724
6
        entry[i].msi_attrib.guest_masked = 0;
725
6
        entry[i].msi_attrib.pos = pos;
726
6
        if ( entry[i].msi_attrib.maskbit )
727
0
            entry[i].msi.mpos = mpos;
728
6
        entry[i].msi.nvec = 0;
729
6
        entry[i].dev = dev;
730
6
    }
731
6
    entry->msi.nvec = nvec;
732
6
    entry->irq = irq;
733
6
    if ( entry->msi_attrib.maskbit )
734
0
    {
735
0
        u32 maskbits;
736
0
737
0
        /* All MSIs are unmasked by default, Mask them all */
738
0
        maskbits = pci_conf_read32(seg, bus, slot, func, mpos);
739
0
        maskbits |= ~(u32)0 >> (32 - maxvec);
740
0
        pci_conf_write32(seg, bus, slot, func, mpos, maskbits);
741
0
    }
742
6
    list_add_tail(&entry->list, &dev->msi_list);
743
6
744
6
    *desc = entry;
745
6
    /* Restore the original MSI enabled bits  */
746
6
    pci_conf_write16(seg, bus, slot, func, msi_control_reg(pos), control);
747
6
748
6
    return 0;
749
6
}
750
751
static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf)
752
40
{
753
40
    u8 limit;
754
40
    u32 addr, base = PCI_BASE_ADDRESS_0;
755
40
    u64 disp = 0;
756
40
757
40
    if ( vf >= 0 )
758
0
    {
759
0
        struct pci_dev *pdev = pci_get_pdev(seg, bus, PCI_DEVFN(slot, func));
760
0
        unsigned int pos = pci_find_ext_capability(seg, bus,
761
0
                                                   PCI_DEVFN(slot, func),
762
0
                                                   PCI_EXT_CAP_ID_SRIOV);
763
0
        u16 ctrl = pci_conf_read16(seg, bus, slot, func, pos + PCI_SRIOV_CTRL);
764
0
        u16 num_vf = pci_conf_read16(seg, bus, slot, func,
765
0
                                     pos + PCI_SRIOV_NUM_VF);
766
0
        u16 offset = pci_conf_read16(seg, bus, slot, func,
767
0
                                     pos + PCI_SRIOV_VF_OFFSET);
768
0
        u16 stride = pci_conf_read16(seg, bus, slot, func,
769
0
                                     pos + PCI_SRIOV_VF_STRIDE);
770
0
771
0
        if ( !pdev || !pos ||
772
0
             !(ctrl & PCI_SRIOV_CTRL_VFE) ||
773
0
             !(ctrl & PCI_SRIOV_CTRL_MSE) ||
774
0
             !num_vf || !offset || (num_vf > 1 && !stride) ||
775
0
             bir >= PCI_SRIOV_NUM_BARS ||
776
0
             !pdev->vf_rlen[bir] )
777
0
            return 0;
778
0
        base = pos + PCI_SRIOV_BAR;
779
0
        vf -= PCI_BDF(bus, slot, func) + offset;
780
0
        if ( vf < 0 )
781
0
            return 0;
782
0
        if ( stride )
783
0
        {
784
0
            if ( vf % stride )
785
0
                return 0;
786
0
            vf /= stride;
787
0
        }
788
0
        if ( vf >= num_vf )
789
0
            return 0;
790
0
        BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
791
0
        disp = vf * pdev->vf_rlen[bir];
792
0
        limit = PCI_SRIOV_NUM_BARS;
793
0
    }
794
40
    else switch ( pci_conf_read8(seg, bus, slot, func,
795
40
                                 PCI_HEADER_TYPE) & 0x7f )
796
40
    {
797
40
    case PCI_HEADER_TYPE_NORMAL:
798
40
        limit = 6;
799
40
        break;
800
0
    case PCI_HEADER_TYPE_BRIDGE:
801
0
        limit = 2;
802
0
        break;
803
0
    case PCI_HEADER_TYPE_CARDBUS:
804
0
        limit = 1;
805
0
        break;
806
0
    default:
807
0
        return 0;
808
40
    }
809
40
810
40
    if ( bir >= limit )
811
0
        return 0;
812
40
    addr = pci_conf_read32(seg, bus, slot, func, base + bir * 4);
813
40
    if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
814
0
        return 0;
815
40
    if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
816
0
    {
817
0
        addr &= PCI_BASE_ADDRESS_MEM_MASK;
818
0
        if ( ++bir >= limit )
819
0
            return 0;
820
0
        return addr + disp +
821
0
               ((u64)pci_conf_read32(seg, bus, slot, func,
822
0
                                     base + bir * 4) << 32);
823
0
    }
824
40
    return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
825
40
}
826
827
/**
828
 * msix_capability_init - configure device's MSI-X capability
829
 * @dev: pointer to the pci_dev data structure of MSI-X device function
830
 * @entries: pointer to an array of struct msix_entry entries
831
 * @nvec: number of @entries
832
 *
833
 * Setup the MSI-X capability structure of device function with the requested
834
 * number MSI-X irqs. A return of zero indicates the successful setup of
835
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
836
 **/
837
static int msix_capability_init(struct pci_dev *dev,
838
                                unsigned int pos,
839
                                struct msi_info *msi,
840
                                struct msi_desc **desc,
841
                                unsigned int nr_entries)
842
36
{
843
36
    struct arch_msix *msix = dev->msix;
844
36
    struct msi_desc *entry = NULL;
845
36
    int vf;
846
36
    u16 control;
847
36
    u64 table_paddr;
848
36
    u32 table_offset;
849
36
    u8 bir, pbus, pslot, pfunc;
850
36
    u16 seg = dev->seg;
851
36
    u8 bus = dev->bus;
852
36
    u8 slot = PCI_SLOT(dev->devfn);
853
36
    u8 func = PCI_FUNC(dev->devfn);
854
36
    bool maskall = msix->host_maskall;
855
36
856
36
    ASSERT(pcidevs_locked());
857
36
858
36
    control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
859
36
    /*
860
36
     * Ensure MSI-X interrupts are masked during setup. Some devices require
861
36
     * MSI-X to be enabled before we can touch the MSI-X registers. We need
862
36
     * to mask all the vectors to prevent interrupts coming in before they're
863
36
     * fully set up.
864
36
     */
865
36
    msix->host_maskall = 1;
866
36
    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
867
36
                     control | (PCI_MSIX_FLAGS_ENABLE |
868
36
                                PCI_MSIX_FLAGS_MASKALL));
869
36
870
36
    if ( unlikely(!memory_decoded(dev)) )
871
0
    {
872
0
        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
873
0
                         control & ~PCI_MSIX_FLAGS_ENABLE);
874
0
        return -ENXIO;
875
0
    }
876
36
877
36
    if ( desc )
878
36
    {
879
36
        entry = alloc_msi_entry(1);
880
36
        if ( !entry )
881
0
        {
882
0
            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
883
0
                             control & ~PCI_MSIX_FLAGS_ENABLE);
884
0
            return -ENOMEM;
885
0
        }
886
36
        ASSERT(msi);
887
36
    }
888
36
889
36
    /* Locate MSI-X table region */
890
36
    table_offset = pci_conf_read32(seg, bus, slot, func,
891
36
                                   msix_table_offset_reg(pos));
892
36
    bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
893
36
    table_offset &= ~PCI_MSIX_BIRMASK;
894
36
895
36
    if ( !dev->info.is_virtfn )
896
36
    {
897
36
        pbus = bus;
898
36
        pslot = slot;
899
36
        pfunc = func;
900
36
        vf = -1;
901
36
    }
902
36
    else
903
0
    {
904
0
        pbus = dev->info.physfn.bus;
905
0
        pslot = PCI_SLOT(dev->info.physfn.devfn);
906
0
        pfunc = PCI_FUNC(dev->info.physfn.devfn);
907
0
        vf = PCI_BDF2(dev->bus, dev->devfn);
908
0
    }
909
36
910
36
    table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
911
36
    WARN_ON(msi && msi->table_base != table_paddr);
912
36
    if ( !table_paddr )
913
0
    {
914
0
        if ( !msi || !msi->table_base )
915
0
        {
916
0
            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
917
0
                             control & ~PCI_MSIX_FLAGS_ENABLE);
918
0
            xfree(entry);
919
0
            return -ENXIO;
920
0
        }
921
0
        table_paddr = msi->table_base;
922
0
    }
923
36
    table_paddr += table_offset;
924
36
925
36
    if ( !msix->used_entries )
926
4
    {
927
4
        u64 pba_paddr;
928
4
        u32 pba_offset;
929
4
930
4
        msix->nr_entries = nr_entries;
931
4
        msix->table.first = PFN_DOWN(table_paddr);
932
4
        msix->table.last = PFN_DOWN(table_paddr +
933
4
                                    nr_entries * PCI_MSIX_ENTRY_SIZE - 1);
934
4
        WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->table.first,
935
4
                                        msix->table.last));
936
4
937
4
        pba_offset = pci_conf_read32(seg, bus, slot, func,
938
4
                                     msix_pba_offset_reg(pos));
939
4
        bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
940
4
        pba_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
941
4
        WARN_ON(!pba_paddr);
942
4
        pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
943
4
944
4
        msix->pba.first = PFN_DOWN(pba_paddr);
945
4
        msix->pba.last = PFN_DOWN(pba_paddr +
946
4
                                  BITS_TO_LONGS(nr_entries) - 1);
947
4
        WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first,
948
4
                                        msix->pba.last));
949
4
    }
950
36
951
36
    if ( entry )
952
36
    {
953
36
        /* Map MSI-X table region */
954
36
        u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
955
36
        int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
956
36
        void __iomem *base;
957
36
958
36
        if ( idx < 0 )
959
0
        {
960
0
            pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
961
0
                             control & ~PCI_MSIX_FLAGS_ENABLE);
962
0
            xfree(entry);
963
0
            return idx;
964
0
        }
965
36
        base = (void *)(fix_to_virt(idx) +
966
36
                        ((unsigned long)entry_paddr & (PAGE_SIZE - 1)));
967
36
968
36
        /* Mask interrupt here */
969
36
        writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
970
36
971
36
        entry->msi_attrib.type = PCI_CAP_ID_MSIX;
972
36
        entry->msi_attrib.is_64 = 1;
973
36
        entry->msi_attrib.entry_nr = msi->entry_nr;
974
36
        entry->msi_attrib.maskbit = 1;
975
36
        entry->msi_attrib.host_masked = 1;
976
36
        entry->msi_attrib.guest_masked = 1;
977
36
        entry->msi_attrib.pos = pos;
978
36
        entry->irq = msi->irq;
979
36
        entry->dev = dev;
980
36
        entry->mask_base = base;
981
36
982
36
        list_add_tail(&entry->list, &dev->msi_list);
983
36
        *desc = entry;
984
36
    }
985
36
986
36
    if ( !msix->used_entries )
987
4
    {
988
4
        maskall = false;
989
4
        if ( !msix->guest_maskall )
990
4
            control &= ~PCI_MSIX_FLAGS_MASKALL;
991
4
        else
992
0
            control |= PCI_MSIX_FLAGS_MASKALL;
993
4
994
4
        if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
995
4
                                msix->table.last) )
996
4
            WARN();
997
4
        if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
998
4
                                msix->pba.last) )
999
4
            WARN();
1000
4
1001
4
        if ( desc )
1002
4
        {
1003
4
            struct domain *currd = current->domain;
1004
0
            struct domain *d = dev->domain ?: currd;
1005
4
1006
4
            if ( !is_hardware_domain(currd) || d != currd )
1007
0
                printk("%s use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
1008
0
                       is_hardware_domain(currd)
1009
0
                       ? XENLOG_WARNING "Potentially insecure"
1010
0
                       : XENLOG_ERR "Insecure",
1011
0
                       seg, bus, slot, func, d->domain_id);
1012
4
            if ( !is_hardware_domain(d) &&
1013
4
                 /* Assume a domain without memory has no mappings yet. */
1014
0
                 (!is_hardware_domain(currd) || d->tot_pages) )
1015
0
                domain_crash(d);
1016
4
            /* XXX How to deal with existing mappings? */
1017
4
        }
1018
4
    }
1019
36
    WARN_ON(msix->nr_entries != nr_entries);
1020
36
    WARN_ON(msix->table.first != (table_paddr >> PAGE_SHIFT));
1021
36
    ++msix->used_entries;
1022
36
1023
36
    /* Restore MSI-X enabled bits */
1024
36
    msix->host_maskall = maskall;
1025
36
    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
1026
36
1027
36
    return 0;
1028
36
}
1029
1030
/**
1031
 * pci_enable_msi - configure device's MSI capability structure
1032
 * @dev: pointer to the pci_dev data structure of MSI device function
1033
 *
1034
 * Setup the MSI capability structure of device function with
1035
 * a single MSI irq upon its software driver call to request for
1036
 * MSI mode enabled on its hardware device function. A return of zero
1037
 * indicates the successful setup of an entry zero with the new MSI
1038
 * irq or non-zero for otherwise.
1039
 **/
1040
1041
static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1042
6
{
1043
6
    struct pci_dev *pdev;
1044
6
    struct msi_desc *old_desc;
1045
6
1046
6
    ASSERT(pcidevs_locked());
1047
6
    pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1048
6
    if ( !pdev )
1049
0
        return -ENODEV;
1050
6
1051
6
    old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
1052
6
    if ( old_desc )
1053
0
    {
1054
0
        printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
1055
0
               msi->irq, msi->seg, msi->bus,
1056
0
               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1057
0
        return -EEXIST;
1058
0
    }
1059
6
1060
6
    old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1061
6
    if ( old_desc )
1062
0
    {
1063
0
        printk(XENLOG_WARNING "MSI-X already in use on %04x:%02x:%02x.%u\n",
1064
0
               msi->seg, msi->bus,
1065
0
               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1066
0
        __pci_disable_msix(old_desc);
1067
0
    }
1068
6
1069
6
    return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
1070
6
}
1071
1072
static void __pci_disable_msi(struct msi_desc *entry)
1073
0
{
1074
0
    struct pci_dev *dev;
1075
0
1076
0
    dev = entry->dev;
1077
0
    msi_set_enable(dev, 0);
1078
0
1079
0
    BUG_ON(list_empty(&dev->msi_list));
1080
0
}
1081
1082
/**
1083
 * pci_enable_msix - configure device's MSI-X capability structure
1084
 * @dev: pointer to the pci_dev data structure of MSI-X device function
1085
 * @entries: pointer to an array of MSI-X entries
1086
 * @nvec: number of MSI-X irqs requested for allocation by device driver
1087
 *
1088
 * Setup the MSI-X capability structure of device function with the number
1089
 * of requested irqs upon its software driver call to request for
1090
 * MSI-X mode enabled on its hardware device function. A return of zero
1091
 * indicates the successful configuration of MSI-X capability structure
1092
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
1093
 * Or a return of > 0 indicates that driver request is exceeding the number
1094
 * of irqs available. Driver should use the returned value to re-send
1095
 * its request.
1096
 **/
1097
static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
1098
36
{
1099
36
    int pos, nr_entries;
1100
36
    struct pci_dev *pdev;
1101
36
    u16 control;
1102
36
    u8 slot = PCI_SLOT(msi->devfn);
1103
36
    u8 func = PCI_FUNC(msi->devfn);
1104
36
    struct msi_desc *old_desc;
1105
36
1106
36
    ASSERT(pcidevs_locked());
1107
36
    pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1108
36
    pos = pci_find_cap_offset(msi->seg, msi->bus, slot, func, PCI_CAP_ID_MSIX);
1109
36
    if ( !pdev || !pos )
1110
0
        return -ENODEV;
1111
36
1112
36
    control = pci_conf_read16(msi->seg, msi->bus, slot, func,
1113
36
                              msix_control_reg(pos));
1114
36
    nr_entries = multi_msix_capable(control);
1115
36
    if ( msi->entry_nr >= nr_entries )
1116
0
        return -EINVAL;
1117
36
1118
36
    old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
1119
36
    if ( old_desc )
1120
0
    {
1121
0
        printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
1122
0
               msi->irq, msi->seg, msi->bus,
1123
0
               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1124
0
        return -EEXIST;
1125
0
    }
1126
36
1127
36
    old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1128
36
    if ( old_desc )
1129
0
    {
1130
0
        printk(XENLOG_WARNING "MSI already in use on %04x:%02x:%02x.%u\n",
1131
0
               msi->seg, msi->bus,
1132
0
               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1133
0
        __pci_disable_msi(old_desc);
1134
0
    }
1135
36
1136
36
    return msix_capability_init(pdev, pos, msi, desc, nr_entries);
1137
36
}
1138
1139
static void _pci_cleanup_msix(struct arch_msix *msix)
1140
0
{
1141
0
    if ( !--msix->used_entries )
1142
0
    {
1143
0
        if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
1144
0
                                   msix->table.last) )
1145
0
            WARN();
1146
0
        if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
1147
0
                                   msix->pba.last) )
1148
0
            WARN();
1149
0
    }
1150
0
}
1151
1152
static void __pci_disable_msix(struct msi_desc *entry)
1153
0
{
1154
0
    struct pci_dev *dev = entry->dev;
1155
0
    u16 seg = dev->seg;
1156
0
    u8 bus = dev->bus;
1157
0
    u8 slot = PCI_SLOT(dev->devfn);
1158
0
    u8 func = PCI_FUNC(dev->devfn);
1159
0
    unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
1160
0
                                           PCI_CAP_ID_MSIX);
1161
0
    u16 control = pci_conf_read16(seg, bus, slot, func,
1162
0
                                  msix_control_reg(entry->msi_attrib.pos));
1163
0
    bool maskall = dev->msix->host_maskall;
1164
0
1165
0
    if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
1166
0
    {
1167
0
        dev->msix->host_maskall = 1;
1168
0
        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
1169
0
                         control | (PCI_MSIX_FLAGS_ENABLE |
1170
0
                                    PCI_MSIX_FLAGS_MASKALL));
1171
0
    }
1172
0
1173
0
    BUG_ON(list_empty(&dev->msi_list));
1174
0
1175
0
    if ( likely(memory_decoded(dev)) )
1176
0
        writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
1177
0
    else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
1178
0
    {
1179
0
        printk(XENLOG_WARNING
1180
0
               "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
1181
0
               entry->irq, dev->seg, dev->bus,
1182
0
               PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
1183
0
        maskall = true;
1184
0
    }
1185
0
    dev->msix->host_maskall = maskall;
1186
0
    if ( maskall || dev->msix->guest_maskall )
1187
0
        control |= PCI_MSIX_FLAGS_MASKALL;
1188
0
    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
1189
0
1190
0
    _pci_cleanup_msix(dev->msix);
1191
0
}
1192
1193
int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
1194
0
{
1195
0
    int rc;
1196
0
    struct pci_dev *pdev;
1197
0
    u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
1198
0
    unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
1199
0
                                           PCI_CAP_ID_MSIX);
1200
0
1201
0
    if ( !use_msi )
1202
0
        return 0;
1203
0
1204
0
    if ( !pos )
1205
0
        return -ENODEV;
1206
0
1207
0
    pcidevs_lock();
1208
0
    pdev = pci_get_pdev(seg, bus, devfn);
1209
0
    if ( !pdev )
1210
0
        rc = -ENODEV;
1211
0
    else if ( pdev->msix->used_entries != !!off )
1212
0
        rc = -EBUSY;
1213
0
    else if ( off )
1214
0
    {
1215
0
        _pci_cleanup_msix(pdev->msix);
1216
0
        rc = 0;
1217
0
    }
1218
0
    else
1219
0
    {
1220
0
        u16 control = pci_conf_read16(seg, bus, slot, func,
1221
0
                                      msix_control_reg(pos));
1222
0
1223
0
        rc = msix_capability_init(pdev, pos, NULL, NULL,
1224
0
                                  multi_msix_capable(control));
1225
0
    }
1226
0
    pcidevs_unlock();
1227
0
1228
0
    return rc;
1229
0
}
1230
1231
/*
1232
 * Notice: only construct the msi_desc
1233
 * no change to irq_desc here, and the interrupt is masked
1234
 */
1235
int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1236
42
{
1237
42
    ASSERT(pcidevs_locked());
1238
42
1239
42
    if ( !use_msi )
1240
0
        return -EPERM;
1241
42
1242
42
    return msi->table_base ? __pci_enable_msix(msi, desc) :
1243
6
                             __pci_enable_msi(msi, desc);
1244
42
}
1245
1246
/*
1247
 * Device only, no irq_desc
1248
 */
1249
void pci_disable_msi(struct msi_desc *msi_desc)
1250
0
{
1251
0
    if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
1252
0
        __pci_disable_msi(msi_desc);
1253
0
    else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
1254
0
        __pci_disable_msix(msi_desc);
1255
0
}
1256
1257
static void msi_free_irqs(struct pci_dev* dev)
1258
0
{
1259
0
    struct msi_desc *entry, *tmp;
1260
0
1261
0
    list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
1262
0
    {
1263
0
        pci_disable_msi(entry);
1264
0
        msi_free_irq(entry);
1265
0
    }
1266
0
}
1267
1268
void pci_cleanup_msi(struct pci_dev *pdev)
1269
0
{
1270
0
    /* Disable MSI and/or MSI-X */
1271
0
    msi_set_enable(pdev, 0);
1272
0
    msix_set_enable(pdev, 0);
1273
0
    msi_free_irqs(pdev);
1274
0
}
1275
1276
int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
1277
                                 unsigned int size, uint32_t *data)
1278
4
{
1279
4
    u16 seg = pdev->seg;
1280
4
    u8 bus = pdev->bus;
1281
4
    u8 slot = PCI_SLOT(pdev->devfn);
1282
4
    u8 func = PCI_FUNC(pdev->devfn);
1283
4
    struct msi_desc *entry;
1284
4
    unsigned int pos;
1285
4
1286
4
    if ( pdev->msix )
1287
4
    {
1288
4
        entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1289
0
        pos = entry ? entry->msi_attrib.pos
1290
4
                    : pci_find_cap_offset(seg, bus, slot, func,
1291
4
                                          PCI_CAP_ID_MSIX);
1292
4
        ASSERT(pos);
1293
4
1294
4
        if ( reg >= pos && reg < msix_pba_offset_reg(pos) + 4 )
1295
4
        {
1296
4
            if ( reg != msix_control_reg(pos) || size != 2 )
1297
0
                return -EACCES;
1298
4
1299
4
            pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
1300
4
            if ( pdev->msix->host_maskall )
1301
0
                *data |= PCI_MSIX_FLAGS_MASKALL;
1302
4
1303
4
            return 1;
1304
4
        }
1305
4
    }
1306
4
1307
0
    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1308
0
    if ( entry && entry->msi_attrib.maskbit )
1309
0
    {
1310
0
        uint16_t cntl;
1311
0
        uint32_t unused;
1312
0
1313
0
        pos = entry->msi_attrib.pos;
1314
0
        if ( reg < pos || reg >= entry->msi.mpos + 8 )
1315
0
            return 0;
1316
0
1317
0
        if ( reg == msi_control_reg(pos) )
1318
0
            return size == 2 ? 1 : -EACCES;
1319
0
        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
1320
0
            return -EACCES;
1321
0
1322
0
        cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
1323
0
        unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl));
1324
0
        for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry )
1325
0
        {
1326
0
            entry->msi_attrib.guest_masked =
1327
0
                *data >> entry->msi_attrib.entry_nr;
1328
0
            if ( entry->msi_attrib.host_masked )
1329
0
                *data |= 1 << pos;
1330
0
            unused &= ~(1 << pos);
1331
0
        }
1332
0
1333
0
        *data |= unused;
1334
0
1335
0
        return 1;
1336
0
    }
1337
0
1338
0
    return 0;
1339
0
}
1340
1341
int pci_restore_msi_state(struct pci_dev *pdev)
1342
0
{
1343
0
    unsigned long flags;
1344
0
    int irq;
1345
0
    int ret;
1346
0
    struct msi_desc *entry, *tmp;
1347
0
    struct irq_desc *desc;
1348
0
    struct msi_msg msg;
1349
0
    u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
1350
0
    unsigned int type = 0, pos = 0;
1351
0
    u16 control = 0;
1352
0
1353
0
    ASSERT(pcidevs_locked());
1354
0
1355
0
    if ( !use_msi )
1356
0
        return -EOPNOTSUPP;
1357
0
1358
0
    ret = xsm_resource_setup_pci(XSM_PRIV,
1359
0
                                (pdev->seg << 16) | (pdev->bus << 8) |
1360
0
                                pdev->devfn);
1361
0
    if ( ret )
1362
0
        return ret;
1363
0
1364
0
    list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
1365
0
    {
1366
0
        unsigned int i = 0, nr = 1;
1367
0
1368
0
        irq = entry->irq;
1369
0
        desc = &irq_desc[irq];
1370
0
1371
0
        spin_lock_irqsave(&desc->lock, flags);
1372
0
1373
0
        ASSERT(desc->msi_desc == entry);
1374
0
1375
0
        if (desc->msi_desc != entry)
1376
0
        {
1377
0
    bogus:
1378
0
            dprintk(XENLOG_ERR,
1379
0
                    "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
1380
0
                    pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1381
0
                    PCI_FUNC(pdev->devfn), i);
1382
0
            spin_unlock_irqrestore(&desc->lock, flags);
1383
0
            if ( type == PCI_CAP_ID_MSIX )
1384
0
                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1385
0
                                 msix_control_reg(pos),
1386
0
                                 control & ~PCI_MSIX_FLAGS_ENABLE);
1387
0
            return -EINVAL;
1388
0
        }
1389
0
1390
0
        ASSERT(!type || type == entry->msi_attrib.type);
1391
0
        pos = entry->msi_attrib.pos;
1392
0
        if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
1393
0
        {
1394
0
            msi_set_enable(pdev, 0);
1395
0
            nr = entry->msi.nvec;
1396
0
        }
1397
0
        else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX )
1398
0
        {
1399
0
            control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
1400
0
                                      msix_control_reg(pos));
1401
0
            pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1402
0
                             msix_control_reg(pos),
1403
0
                             control | (PCI_MSIX_FLAGS_ENABLE |
1404
0
                                        PCI_MSIX_FLAGS_MASKALL));
1405
0
            if ( unlikely(!memory_decoded(pdev)) )
1406
0
            {
1407
0
                spin_unlock_irqrestore(&desc->lock, flags);
1408
0
                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1409
0
                                 msix_control_reg(pos),
1410
0
                                 control & ~PCI_MSIX_FLAGS_ENABLE);
1411
0
                return -ENXIO;
1412
0
            }
1413
0
        }
1414
0
        type = entry->msi_attrib.type;
1415
0
1416
0
        msg = entry->msg;
1417
0
        write_msi_msg(entry, &msg);
1418
0
1419
0
        for ( i = 0; ; )
1420
0
        {
1421
0
            if ( unlikely(!msi_set_mask_bit(desc,
1422
0
                                            entry[i].msi_attrib.host_masked,
1423
0
                                            entry[i].msi_attrib.guest_masked)) )
1424
0
                BUG();
1425
0
1426
0
            if ( !--nr )
1427
0
                break;
1428
0
1429
0
            spin_unlock_irqrestore(&desc->lock, flags);
1430
0
            desc = &irq_desc[entry[++i].irq];
1431
0
            spin_lock_irqsave(&desc->lock, flags);
1432
0
            if ( desc->msi_desc != entry + i )
1433
0
                goto bogus;
1434
0
        }
1435
0
1436
0
        spin_unlock_irqrestore(&desc->lock, flags);
1437
0
1438
0
        if ( type == PCI_CAP_ID_MSI )
1439
0
        {
1440
0
            unsigned int cpos = msi_control_reg(pos);
1441
0
1442
0
            control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
1443
0
                      ~PCI_MSI_FLAGS_QSIZE;
1444
0
            multi_msi_enable(control, entry->msi.nvec);
1445
0
            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1446
0
                             PCI_FUNC(pdev->devfn), cpos, control);
1447
0
1448
0
            msi_set_enable(pdev, 1);
1449
0
        }
1450
0
    }
1451
0
1452
0
    if ( type == PCI_CAP_ID_MSIX )
1453
0
        pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1454
0
                         msix_control_reg(pos),
1455
0
                         control | PCI_MSIX_FLAGS_ENABLE);
1456
0
1457
0
    return 0;
1458
0
}
1459
1460
void __init early_msi_init(void)
1461
1
{
1462
1
    if ( use_msi < 0 )
1463
1
        use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
1464
1
    if ( !use_msi )
1465
0
        return;
1466
1
}
1467
1468
static void dump_msi(unsigned char key)
1469
0
{
1470
0
    unsigned int irq;
1471
0
1472
0
    printk("MSI information:\n");
1473
0
1474
0
    for ( irq = 0; irq < nr_irqs; irq++ )
1475
0
    {
1476
0
        struct irq_desc *desc = irq_to_desc(irq);
1477
0
        const struct msi_desc *entry;
1478
0
        u32 addr, data, dest32;
1479
0
        signed char mask;
1480
0
        struct msi_attrib attr;
1481
0
        unsigned long flags;
1482
0
        const char *type = "???";
1483
0
1484
0
        if ( !irq_desc_initialized(desc) )
1485
0
            continue;
1486
0
1487
0
        spin_lock_irqsave(&desc->lock, flags);
1488
0
1489
0
        entry = desc->msi_desc;
1490
0
        if ( !entry )
1491
0
        {
1492
0
            spin_unlock_irqrestore(&desc->lock, flags);
1493
0
            continue;
1494
0
        }
1495
0
1496
0
        switch ( entry->msi_attrib.type )
1497
0
        {
1498
0
        case PCI_CAP_ID_MSI: type = "MSI"; break;
1499
0
        case PCI_CAP_ID_MSIX: type = "MSI-X"; break;
1500
0
        case 0:
1501
0
            switch ( entry->msi_attrib.pos )
1502
0
            {
1503
0
            case MSI_TYPE_HPET: type = "HPET"; break;
1504
0
            case MSI_TYPE_IOMMU: type = "IOMMU"; break;
1505
0
            }
1506
0
            break;
1507
0
        }
1508
0
1509
0
        data = entry->msg.data;
1510
0
        addr = entry->msg.address_lo;
1511
0
        dest32 = entry->msg.dest32;
1512
0
        attr = entry->msi_attrib;
1513
0
        if ( entry->msi_attrib.type )
1514
0
            mask = msi_get_mask_bit(entry);
1515
0
        else
1516
0
            mask = -1;
1517
0
1518
0
        spin_unlock_irqrestore(&desc->lock, flags);
1519
0
1520
0
        if ( mask >= 0 )
1521
0
            mask += '0';
1522
0
        else
1523
0
            mask = '?';
1524
0
        printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
1525
0
               " dest=%08x mask=%d/%c%c/%c\n",
1526
0
               type, irq,
1527
0
               (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
1528
0
               data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
1529
0
               data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
1530
0
               data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
1531
0
               addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
1532
0
               addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
1533
0
               dest32, attr.maskbit,
1534
0
               attr.host_masked ? 'H' : ' ',
1535
0
               attr.guest_masked ? 'G' : ' ',
1536
0
               mask);
1537
0
    }
1538
0
1539
0
    vpci_dump_msi();
1540
0
}
1541
1542
static int __init msi_setup_keyhandler(void)
1543
1
{
1544
1
    register_keyhandler('M', dump_msi, "dump MSI state", 1);
1545
1
    return 0;
1546
1
}
1547
__initcall(msi_setup_keyhandler);