Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/drivers/vpci/msix.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Handlers for accesses to the MSI-X capability structure and the memory
3
 * region.
4
 *
5
 * Copyright (C) 2017 Citrix Systems R&D
6
 *
7
 * This program is free software; you can redistribute it and/or
8
 * modify it under the terms and conditions of the GNU General Public
9
 * License, version 2, as published by the Free Software Foundation.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public
17
 * License along with this program; If not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#include <xen/sched.h>
21
#include <xen/vpci.h>
22
#include <asm/msi.h>
23
#include <xen/p2m-common.h>
24
#include <xen/keyhandler.h>
25
26
#define VMSIX_SIZE(num) offsetof(struct vpci_msix, entries[num])
27
28
#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
29
4.82M
    ((addr) >= VMSIX_TABLE_ADDR(vpci, nr) &&                              \
30
101k
     (addr) < VMSIX_TABLE_ADDR(vpci, nr) + VMSIX_TABLE_SIZE(vpci, nr))
31
32
static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
33
                             void *data)
34
13
{
35
13
    const struct vpci_msix *msix = data;
36
13
37
13
    return (msix->max_entries - 1) |
38
9
           (msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0) |
39
13
           (msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0);
40
13
}
41
42
static void control_write(const struct pci_dev *pdev, unsigned int reg,
43
                          uint32_t val, void *data)
44
13
{
45
13
    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
46
13
    struct vpci_msix *msix = data;
47
13
    bool new_masked = val & PCI_MSIX_FLAGS_MASKALL;
48
13
    bool new_enabled = val & PCI_MSIX_FLAGS_ENABLE;
49
13
    unsigned int i;
50
13
    int rc;
51
13
52
13
    if ( new_masked == msix->masked && new_enabled == msix->enabled )
53
9
        return;
54
13
55
13
    /*
56
13
     * According to the PCI 3.0 specification, switching the enable bit to 1
57
13
     * or the function mask bit to 0 should cause all the cached addresses
58
13
     * and data fields to be recalculated.
59
13
     *
60
13
     * In order to avoid the overhead of disabling and enabling all the
61
13
     * entries every time the guest sets the maskall bit, Xen will only
62
13
     * perform the disable and enable sequence when the guest has written to
63
13
     * the entry.
64
13
     */
65
4
    if ( new_enabled && !new_masked && (!msix->enabled || msix->masked) )
66
4
    {
67
44
        for ( i = 0; i < msix->max_entries; i++ )
68
40
        {
69
40
            if ( msix->entries[i].masked || !msix->entries[i].updated )
70
40
                continue;
71
40
72
0
            rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev);
73
0
            /* Ignore ENOENT, it means the entry wasn't setup. */
74
0
            if ( rc && rc != -ENOENT )
75
0
            {
76
0
                gprintk(XENLOG_WARNING,
77
0
                        "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
78
0
                        pdev->seg, pdev->bus, slot, func, i, rc);
79
0
                return;
80
0
            }
81
0
82
0
            rc = vpci_msix_arch_enable_entry(&msix->entries[i], pdev,
83
0
                                             VMSIX_TABLE_BASE(pdev->vpci,
84
0
                                                              VPCI_MSIX_TABLE));
85
0
            if ( rc )
86
0
            {
87
0
                gprintk(XENLOG_WARNING,
88
0
                        "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n",
89
0
                        pdev->seg, pdev->bus, slot, func, i, rc);
90
0
                /* Entry is likely not properly configured, skip it. */
91
0
                continue;
92
0
            }
93
0
94
0
            /*
95
0
             * At this point the PIRQ is still masked. Unmask it, or else the
96
0
             * guest won't receive interrupts. This is due to the
97
0
             * disable/enable sequence performed above.
98
0
             */
99
0
            vpci_msix_arch_mask_entry(&msix->entries[i], pdev, false);
100
0
101
0
            msix->entries[i].updated = false;
102
0
        }
103
4
    }
104
0
    else if ( !new_enabled && msix->enabled )
105
0
    {
106
0
        /* Guest has disabled MSIX, disable all entries. */
107
0
        for ( i = 0; i < msix->max_entries; i++ )
108
0
        {
109
0
            /*
110
0
             * NB: vpci_msix_arch_disable can be called for entries that are
111
0
             * not setup, it will return -ENOENT in that case.
112
0
             */
113
0
            rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev);
114
0
            switch ( rc )
115
0
            {
116
0
            case 0:
117
0
                /*
118
0
                 * Mark the entry successfully disabled as updated, so that on
119
0
                 * the next enable the entry is properly setup. This is done
120
0
                 * so that the following flow works correctly:
121
0
                 *
122
0
                 * mask entry -> disable MSIX -> enable MSIX -> unmask entry
123
0
                 *
124
0
                 * Without setting 'updated', the 'unmask entry' step will fail
125
0
                 * because the entry has not been updated, so it would not be
126
0
                 * mapped/bound at all.
127
0
                 */
128
0
                msix->entries[i].updated = true;
129
0
                break;
130
0
            case -ENOENT:
131
0
                /* Ignore non-present entry. */
132
0
                break;
133
0
            default:
134
0
                gprintk(XENLOG_WARNING,
135
0
                        "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
136
0
                        pdev->seg, pdev->bus, slot, func, i, rc);
137
0
                return;
138
0
            }
139
0
        }
140
0
    }
141
4
142
4
    msix->masked = new_masked;
143
4
    msix->enabled = new_enabled;
144
4
145
4
    val = control_read(pdev, reg, data);
146
4
    if ( pci_msi_conf_write_intercept(msix->pdev, reg, 2, &val) >= 0 )
147
4
        pci_conf_write16(pdev->seg, pdev->bus, slot, func, reg, val);
148
4
}
149
150
static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
151
481k
{
152
481k
    struct vpci_msix *msix;
153
481k
154
481k
    list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next )
155
2.41M
    {
156
2.41M
        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
157
2.41M
        unsigned int i;
158
2.41M
159
7.23M
        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
160
4.82M
            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
161
4.82M
                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
162
880
                return msix;
163
2.41M
    }
164
481k
165
480k
    return NULL;
166
481k
}
167
168
static int msix_accept(struct vcpu *v, unsigned long addr)
169
481k
{
170
481k
    return !!msix_find(v->domain, addr);
171
481k
}
172
173
static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
174
                           unsigned int len)
175
220
{
176
220
    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
177
220
178
220
    /* Only allow 32/64b accesses. */
179
220
    if ( len != 4 && len != 8 )
180
0
    {
181
0
        gprintk(XENLOG_WARNING,
182
0
                "%04x:%02x:%02x.%u: invalid MSI-X table access size: %u\n",
183
0
                pdev->seg, pdev->bus, slot, func, len);
184
0
        return false;
185
0
    }
186
220
187
220
    /* Only allow aligned accesses. */
188
220
    if ( (addr & (len - 1)) != 0 )
189
0
    {
190
0
        gprintk(XENLOG_WARNING,
191
0
                "%04x:%02x:%02x.%u: MSI-X only allows aligned accesses\n",
192
0
                pdev->seg, pdev->bus, slot, func);
193
0
        return false;
194
0
    }
195
220
196
220
    return true;
197
220
}
198
199
static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
200
                                         paddr_t addr)
201
220
{
202
220
    paddr_t start = VMSIX_TABLE_ADDR(msix->pdev->vpci, VPCI_MSIX_TABLE);
203
220
204
220
    return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
205
220
}
206
207
static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
208
                     unsigned long *data)
209
76
{
210
76
    const struct domain *d = v->domain;
211
76
    struct vpci_msix *msix = msix_find(d, addr);
212
76
    const struct vpci_msix_entry *entry;
213
76
    unsigned int offset;
214
76
215
76
    *data = ~0ul;
216
76
217
76
    if ( !msix )
218
0
        return X86EMUL_RETRY;
219
76
220
76
    if ( !access_allowed(msix->pdev, addr, len) )
221
0
        return X86EMUL_OKAY;
222
76
223
76
    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
224
0
    {
225
0
        /*
226
0
         * Access to PBA.
227
0
         *
228
0
         * TODO: note that this relies on having the PBA identity mapped to the
229
0
         * guest address space. If this changes the address will need to be
230
0
         * translated.
231
0
         */
232
0
        switch ( len )
233
0
        {
234
0
        case 4:
235
0
            *data = readl(addr);
236
0
            break;
237
0
        case 8:
238
0
            *data = readq(addr);
239
0
            break;
240
0
        default:
241
0
            ASSERT_UNREACHABLE();
242
0
            break;
243
0
        }
244
0
245
0
        return X86EMUL_OKAY;
246
0
    }
247
76
248
76
    spin_lock(&msix->pdev->vpci->lock);
249
76
    entry = get_entry(msix, addr);
250
76
    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
251
76
252
76
    switch ( offset )
253
76
    {
254
0
    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
255
0
        *data = entry->addr;
256
0
        break;
257
0
    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
258
0
        *data = entry->addr >> 32;
259
0
        break;
260
0
    case PCI_MSIX_ENTRY_DATA_OFFSET:
261
0
        *data = entry->data;
262
0
        if ( len == 8 )
263
0
            *data |=
264
0
                (uint64_t)(entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0) << 32;
265
0
        break;
266
76
    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
267
76
        *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
268
76
        break;
269
0
    default:
270
0
        ASSERT_UNREACHABLE();
271
0
        break;
272
76
    }
273
76
    spin_unlock(&msix->pdev->vpci->lock);
274
76
275
76
    return X86EMUL_OKAY;
276
76
}
277
278
static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
279
                      unsigned long data)
280
144
{
281
144
    const struct domain *d = v->domain;
282
144
    struct vpci_msix *msix = msix_find(d, addr);
283
144
    struct vpci_msix_entry *entry;
284
144
    unsigned int offset;
285
144
286
144
    if ( !msix )
287
0
        return X86EMUL_RETRY;
288
144
289
144
    if ( !access_allowed(msix->pdev, addr, len) )
290
0
        return X86EMUL_OKAY;
291
144
292
144
    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
293
0
    {
294
0
        /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
295
0
        if ( is_hardware_domain(d) )
296
0
        {
297
0
            switch ( len )
298
0
            {
299
0
            case 4:
300
0
                writel(data, addr);
301
0
                break;
302
0
            case 8:
303
0
                writeq(data, addr);
304
0
                break;
305
0
            default:
306
0
                ASSERT_UNREACHABLE();
307
0
                break;
308
0
            }
309
0
        }
310
0
311
0
        return X86EMUL_OKAY;
312
0
    }
313
144
314
144
    spin_lock(&msix->pdev->vpci->lock);
315
144
    entry = get_entry(msix, addr);
316
144
    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
317
144
318
144
    /*
319
144
     * NB: Xen allows writes to the data/address registers with the entry
320
144
     * unmasked. The specification says this is undefined behavior, and Xen
321
144
     * implements it as storing the written value, which will be made effective
322
144
     * in the next mask/unmask cycle. This also mimics the implementation in
323
144
     * QEMU.
324
144
     */
325
144
    switch ( offset )
326
144
    {
327
36
    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
328
36
        entry->updated = true;
329
36
        if ( len == 8 )
330
0
        {
331
0
            entry->addr = data;
332
0
            break;
333
0
        }
334
36
        entry->addr &= ~0xffffffff;
335
36
        entry->addr |= data;
336
36
        break;
337
36
    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
338
36
        entry->updated = true;
339
36
        entry->addr &= 0xffffffff;
340
36
        entry->addr |= (uint64_t)data << 32;
341
36
        break;
342
36
    case PCI_MSIX_ENTRY_DATA_OFFSET:
343
36
        entry->updated = true;
344
36
        entry->data = data;
345
36
346
36
        if ( len == 4 )
347
36
            break;
348
36
349
0
        data >>= 32;
350
0
        /* fallthrough */
351
36
    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
352
36
    {
353
36
        bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
354
36
        const struct pci_dev *pdev = msix->pdev;
355
36
        int rc;
356
36
357
36
        if ( entry->masked == new_masked )
358
36
            /* No change in the mask bit, nothing to do. */
359
0
            break;
360
36
361
36
        if ( !new_masked && msix->enabled && !msix->masked && entry->updated )
362
36
        {
363
36
            /*
364
36
             * If MSI-X is enabled, the function mask is not active, the entry
365
36
             * is being unmasked and there have been changes to the address or
366
36
             * data fields Xen needs to disable and enable the entry in order
367
36
             * to pick up the changes.
368
36
             */
369
36
            rc = vpci_msix_arch_disable_entry(entry, pdev);
370
36
            if ( rc && rc != -ENOENT )
371
0
            {
372
0
                gprintk(XENLOG_WARNING,
373
0
                        "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
374
0
                        pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
375
0
                        PCI_FUNC(pdev->devfn), VMSIX_ENTRY_NR(msix, entry), rc);
376
0
                break;
377
0
            }
378
36
379
36
            rc = vpci_msix_arch_enable_entry(entry, pdev,
380
36
                                             VMSIX_TABLE_BASE(pdev->vpci,
381
36
                                                              VPCI_MSIX_TABLE));
382
36
            if ( rc )
383
0
            {
384
0
                gprintk(XENLOG_WARNING,
385
0
                        "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n",
386
0
                        pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
387
0
                        PCI_FUNC(pdev->devfn), VMSIX_ENTRY_NR(msix, entry), rc);
388
0
                break;
389
0
            }
390
36
            entry->updated = false;
391
36
        }
392
36
393
36
        vpci_msix_arch_mask_entry(entry, pdev, new_masked);
394
36
        entry->masked = new_masked;
395
36
396
36
        break;
397
36
    }
398
0
    default:
399
0
        ASSERT_UNREACHABLE();
400
0
        break;
401
144
    }
402
144
    spin_unlock(&msix->pdev->vpci->lock);
403
144
404
144
    return X86EMUL_OKAY;
405
144
}
406
407
static const struct hvm_mmio_ops vpci_msix_table_ops = {
408
    .check = msix_accept,
409
    .read = msix_read,
410
    .write = msix_write,
411
};
412
413
static int init_msix(struct pci_dev *pdev)
414
68
{
415
68
    struct domain *d = pdev->domain;
416
68
    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
417
68
    struct vpci_msix *msix;
418
68
    unsigned int msix_offset, i, max_entries;
419
68
    uint16_t control;
420
68
    int rc;
421
68
422
68
    msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
423
68
                                      PCI_CAP_ID_MSIX);
424
68
    if ( !msix_offset )
425
63
        return 0;
426
68
427
5
    control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
428
5
                              msix_control_reg(msix_offset));
429
5
430
5
    max_entries = msix_table_size(control);
431
5
432
5
    msix = xzalloc_bytes(VMSIX_SIZE(max_entries));
433
5
    if ( !msix )
434
0
        return -ENOMEM;
435
5
436
5
    msix->max_entries = max_entries;
437
5
    msix->pdev = pdev;
438
5
439
5
    msix->tables[VPCI_MSIX_TABLE] =
440
5
        pci_conf_read32(pdev->seg, pdev->bus, slot, func,
441
5
                        msix_table_offset_reg(msix_offset));
442
5
    msix->tables[VPCI_MSIX_PBA] =
443
5
        pci_conf_read32(pdev->seg, pdev->bus, slot, func,
444
5
                        msix_pba_offset_reg(msix_offset));
445
5
446
53
    for ( i = 0; i < msix->max_entries; i++)
447
48
    {
448
48
        msix->entries[i].masked = true;
449
48
        vpci_msix_arch_init_entry(&msix->entries[i]);
450
48
    }
451
5
452
5
    if ( list_empty(&d->arch.hvm_domain.msix_tables) )
453
1
        register_mmio_handler(d, &vpci_msix_table_ops);
454
5
455
5
    list_add(&msix->next, &d->arch.hvm_domain.msix_tables);
456
5
457
5
    rc = vpci_add_register(pdev->vpci, control_read, control_write,
458
5
                           msix_control_reg(msix_offset), 2, msix);
459
5
    if ( rc )
460
0
    {
461
0
        xfree(msix);
462
0
        return rc;
463
0
    }
464
5
465
5
    pdev->vpci->msix = msix;
466
5
467
5
    return 0;
468
5
}
469
REGISTER_VPCI_INIT(init_msix, VPCI_PRIORITY_HIGH);
470
471
/*
472
 * Local variables:
473
 * mode: C
474
 * c-file-style: "BSD"
475
 * c-basic-offset: 4
476
 * tab-width: 4
477
 * indent-tabs-mode: nil
478
 * End:
479
 */