Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/drivers/passthrough/io.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2006, Intel Corporation.
3
 *
4
 * This program is free software; you can redistribute it and/or modify it
5
 * under the terms and conditions of the GNU General Public License,
6
 * version 2, as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11
 * more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along with
14
 * this program; If not, see <http://www.gnu.org/licenses/>.
15
 *
16
 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17
 * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18
 */
19
20
#include <xen/event.h>
21
#include <xen/iommu.h>
22
#include <xen/cpu.h>
23
#include <xen/irq.h>
24
#include <asm/hvm/irq.h>
25
#include <asm/hvm/support.h>
26
#include <asm/io_apic.h>
27
28
static DEFINE_PER_CPU(struct list_head, dpci_list);
29
30
/*
31
 * These two bit states help to safely schedule, deschedule, and wait until
32
 * the softirq has finished.
33
 *
34
 * The semantics behind these two bits is as follow:
35
 *  - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
36
 *  - STATE_RUN - only softirq is allowed to set and clear it. If it has
37
 *      been set hvm_dirq_assist will RUN with a saved value of the
38
 *      'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
39
 *
40
 * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
41
 * STATE_SCHED(unset) -> STATE_RUN(unset).
42
 *
43
 * However the states can also diverge such as: STATE_SCHED(set) ->
44
 * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
45
 * the 'hvm_dirq_assist' never run and that the softirq did not do any
46
 * ref-counting.
47
 */
48
49
enum {
50
    STATE_SCHED,
51
    STATE_RUN
52
};
53
54
/*
55
 * This can be called multiple times, but the softirq is only raised once.
56
 * That is until the STATE_SCHED state has been cleared. The state can be
57
 * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
58
 * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
59
 * the softirq had a chance to run).
60
 */
61
static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
62
4.14k
{
63
4.14k
    unsigned long flags;
64
4.14k
65
4.14k
    if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
66
0
        return;
67
4.14k
68
4.14k
    get_knownalive_domain(pirq_dpci->dom);
69
4.14k
70
4.14k
    local_irq_save(flags);
71
4.14k
    list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
72
4.14k
    local_irq_restore(flags);
73
4.14k
74
4.14k
    raise_softirq(HVM_DPCI_SOFTIRQ);
75
4.14k
}
76
77
/*
78
 * If we are racing with softirq_dpci (STATE_SCHED) we return
79
 * true. Otherwise we return false.
80
 *
81
 * If it is false, it is the callers responsibility to make sure
82
 * that the softirq (with the event_lock dropped) has ran.
83
 */
84
bool pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
85
48
{
86
48
    if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
87
0
        return true;
88
48
89
48
    /*
90
48
     * If in the future we would call 'raise_softirq_for' right away
91
48
     * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it
92
48
     * might have stale data).
93
48
     */
94
48
    return false;
95
48
}
96
97
/*
98
 * Reset the pirq_dpci->dom parameter to NULL.
99
 *
100
 * This function checks the different states to make sure it can do it
101
 * at the right time. If it unschedules the 'hvm_dirq_assist' from running
102
 * it also refcounts (which is what the softirq would have done) properly.
103
 */
104
static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
105
0
{
106
0
    struct domain *d = pirq_dpci->dom;
107
0
108
0
    ASSERT(spin_is_locked(&d->event_lock));
109
0
110
0
    switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
111
0
    {
112
0
    case (1 << STATE_SCHED):
113
0
        /*
114
0
         * We are going to try to de-schedule the softirq before it goes in
115
0
         * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
116
0
         */
117
0
        put_domain(d);
118
0
        /* fallthrough. */
119
0
    case (1 << STATE_RUN):
120
0
    case (1 << STATE_RUN) | (1 << STATE_SCHED):
121
0
        /*
122
0
         * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
123
0
         * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
124
0
         * in local variable before it sets STATE_RUN - and therefore will not
125
0
         * dereference '->dom' which would crash.
126
0
         */
127
0
        pirq_dpci->dom = NULL;
128
0
        break;
129
0
    }
130
0
    /*
131
0
     * Inhibit 'hvm_dirq_assist' from doing anything useful and at worst
132
0
     * calling 'set_timer' which will blow up (as we have called kill_timer
133
0
     * or never initialized it). Note that we hold the lock that
134
0
     * 'hvm_dirq_assist' could be spinning on.
135
0
     */
136
0
    pirq_dpci->masked = 0;
137
0
}
138
139
bool pt_irq_need_timer(uint32_t flags)
140
307
{
141
307
    return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
142
307
}
143
144
static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
145
                            void *arg)
146
67
{
147
67
    if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
148
67
                              &pirq_dpci->flags) )
149
67
    {
150
67
        pirq_dpci->masked = 0;
151
67
        pirq_dpci->pending = 0;
152
67
        pirq_guest_eoi(dpci_pirq(pirq_dpci));
153
67
    }
154
67
155
67
    return 0;
156
67
}
157
158
static void pt_irq_time_out(void *data)
159
67
{
160
67
    struct hvm_pirq_dpci *irq_map = data;
161
67
    const struct hvm_irq_dpci *dpci;
162
67
    const struct dev_intx_gsi_link *digl;
163
67
164
67
    spin_lock(&irq_map->dom->event_lock);
165
67
166
67
    if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
167
67
    {
168
67
        ASSERT(is_hardware_domain(irq_map->dom));
169
67
        /*
170
67
         * Identity mapped, no need to iterate over the guest GSI list to find
171
67
         * other pirqs sharing the same guest GSI.
172
67
         *
173
67
         * In the identity mapped case the EOI can also be done now, this way
174
67
         * the iteration over the list of domain pirqs is avoided.
175
67
         */
176
67
        hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq);
177
67
        irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
178
67
        pt_irq_guest_eoi(irq_map->dom, irq_map, NULL);
179
67
        spin_unlock(&irq_map->dom->event_lock);
180
67
        return;
181
67
    }
182
67
183
0
    dpci = domain_get_irq_dpci(irq_map->dom);
184
0
    if ( unlikely(!dpci) )
185
0
    {
186
0
        ASSERT_UNREACHABLE();
187
0
        spin_unlock(&irq_map->dom->event_lock);
188
0
        return;
189
0
    }
190
0
    list_for_each_entry ( digl, &irq_map->digl_list, list )
191
0
    {
192
0
        unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
193
0
        const struct hvm_girq_dpci_mapping *girq;
194
0
195
0
        list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
196
0
        {
197
0
            struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
198
0
199
0
            pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
200
0
        }
201
0
        hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
202
0
    }
203
0
204
0
    pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
205
0
206
0
    spin_unlock(&irq_map->dom->event_lock);
207
0
}
208
209
struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
210
4.19k
{
211
4.19k
    if ( !d || !is_hvm_domain(d) )
212
0
        return NULL;
213
4.19k
214
4.19k
    return hvm_domain_irq(d)->dpci;
215
4.19k
}
216
217
void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
218
0
{
219
0
    xfree(dpci);
220
0
}
221
222
/*
223
 * This routine handles lowest-priority interrupts using vector-hashing
224
 * mechanism. As an example, modern Intel CPUs use this method to handle
225
 * lowest-priority interrupts.
226
 *
227
 * Here is the details about the vector-hashing mechanism:
228
 * 1. For lowest-priority interrupts, store all the possible destination
229
 *    vCPUs in an array.
230
 * 2. Use "gvec % max number of destination vCPUs" to find the right
231
 *    destination vCPU in the array for the lowest-priority interrupt.
232
 */
233
static struct vcpu *vector_hashing_dest(const struct domain *d,
234
                                        uint32_t dest_id,
235
                                        bool dest_mode,
236
                                        uint8_t gvec)
237
238
0
{
239
0
    unsigned long *dest_vcpu_bitmap;
240
0
    unsigned int dest_vcpus = 0;
241
0
    struct vcpu *v, *dest = NULL;
242
0
    unsigned int i;
243
0
244
0
    dest_vcpu_bitmap = xzalloc_array(unsigned long,
245
0
                                     BITS_TO_LONGS(d->max_vcpus));
246
0
    if ( !dest_vcpu_bitmap )
247
0
        return NULL;
248
0
249
0
    for_each_vcpu ( d, v )
250
0
    {
251
0
        if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
252
0
                                dest_id, dest_mode) )
253
0
            continue;
254
0
255
0
        __set_bit(v->vcpu_id, dest_vcpu_bitmap);
256
0
        dest_vcpus++;
257
0
    }
258
0
259
0
    if ( dest_vcpus != 0 )
260
0
    {
261
0
        unsigned int mod = gvec % dest_vcpus;
262
0
        unsigned int idx = 0;
263
0
264
0
        for ( i = 0; i <= mod; i++ )
265
0
        {
266
0
            idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
267
0
            BUG_ON(idx > d->max_vcpus);
268
0
        }
269
0
270
0
        dest = d->vcpu[idx - 1];
271
0
    }
272
0
273
0
    xfree(dest_vcpu_bitmap);
274
0
275
0
    return dest;
276
0
}
277
278
int pt_irq_create_bind(
279
    struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
280
48
{
281
48
    struct hvm_irq_dpci *hvm_irq_dpci;
282
48
    struct hvm_pirq_dpci *pirq_dpci;
283
48
    struct pirq *info;
284
48
    int rc, pirq = pt_irq_bind->machine_irq;
285
48
286
48
    if ( pirq < 0 || pirq >= d->nr_pirqs )
287
0
        return -EINVAL;
288
48
289
48
 restart:
290
48
    spin_lock(&d->event_lock);
291
48
292
48
    hvm_irq_dpci = domain_get_irq_dpci(d);
293
48
    if ( !hvm_irq_dpci && !is_hardware_domain(d) )
294
0
    {
295
0
        unsigned int i;
296
0
297
0
        /*
298
0
         * NB: the hardware domain doesn't use a hvm_irq_dpci struct because
299
0
         * it's only allowed to identity map GSIs, and so the data contained in
300
0
         * that struct (used to map guest GSIs into machine GSIs and perform
301
0
         * interrupt routing) is completely useless to it.
302
0
         */
303
0
        hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
304
0
        if ( hvm_irq_dpci == NULL )
305
0
        {
306
0
            spin_unlock(&d->event_lock);
307
0
            return -ENOMEM;
308
0
        }
309
0
        for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ )
310
0
            INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
311
0
312
0
        hvm_domain_irq(d)->dpci = hvm_irq_dpci;
313
0
    }
314
48
315
48
    info = pirq_get_info(d, pirq);
316
48
    if ( !info )
317
0
    {
318
0
        spin_unlock(&d->event_lock);
319
0
        return -ENOMEM;
320
0
    }
321
48
    pirq_dpci = pirq_dpci(info);
322
48
323
48
    /*
324
48
     * A crude 'while' loop with us dropping the spinlock and giving
325
48
     * the softirq_dpci a chance to run.
326
48
     * We MUST check for this condition as the softirq could be scheduled
327
48
     * and hasn't run yet. Note that this code replaced tasklet_kill which
328
48
     * would have spun forever and would do the same thing (wait to flush out
329
48
     * outstanding hvm_dirq_assist calls.
330
48
     */
331
48
    if ( pt_pirq_softirq_active(pirq_dpci) )
332
0
    {
333
0
        spin_unlock(&d->event_lock);
334
0
        cpu_relax();
335
0
        goto restart;
336
0
    }
337
48
338
48
    switch ( pt_irq_bind->irq_type )
339
48
    {
340
42
    case PT_IRQ_TYPE_MSI:
341
42
    {
342
42
        uint8_t dest, delivery_mode;
343
42
        bool dest_mode;
344
42
        int dest_vcpu_id;
345
42
        const struct vcpu *vcpu;
346
42
        uint32_t gflags = pt_irq_bind->u.msi.gflags &
347
42
                          ~XEN_DOMCTL_VMSI_X86_UNMASKED;
348
42
349
42
        if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
350
42
        {
351
42
            pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
352
42
                               HVM_IRQ_DPCI_GUEST_MSI;
353
42
            pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
354
42
            pirq_dpci->gmsi.gflags = gflags;
355
42
            /*
356
42
             * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
357
42
             * The 'pirq_cleanup_check' which would free the structure is only
358
42
             * called if the event channel for the PIRQ is active. However
359
42
             * OS-es that use event channels usually bind PIRQs to eventds
360
42
             * and unbind them before calling 'pt_irq_destroy_bind' - with the
361
42
             * result that we re-use the 'dpci' structure. This can be
362
42
             * reproduced with unloading and loading the driver for a device.
363
42
             *
364
42
             * As such on every 'pt_irq_create_bind' call we MUST set it.
365
42
             */
366
42
            pirq_dpci->dom = d;
367
42
            /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
368
42
            rc = pirq_guest_bind(d->vcpu[0], info, 0);
369
42
            if ( rc == 0 && pt_irq_bind->u.msi.gtable )
370
0
            {
371
0
                rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
372
0
                if ( unlikely(rc) )
373
0
                {
374
0
                    pirq_guest_unbind(d, info);
375
0
                    /*
376
0
                     * Between 'pirq_guest_bind' and before 'pirq_guest_unbind'
377
0
                     * an interrupt can be scheduled. No more of them are going
378
0
                     * to be scheduled but we must deal with the one that may be
379
0
                     * in the queue.
380
0
                     */
381
0
                    pt_pirq_softirq_reset(pirq_dpci);
382
0
                }
383
0
            }
384
42
            if ( unlikely(rc) )
385
0
            {
386
0
                pirq_dpci->gmsi.gflags = 0;
387
0
                pirq_dpci->gmsi.gvec = 0;
388
0
                pirq_dpci->dom = NULL;
389
0
                pirq_dpci->flags = 0;
390
0
                pirq_cleanup_check(info, d);
391
0
                spin_unlock(&d->event_lock);
392
0
                return rc;
393
0
            }
394
42
        }
395
42
        else
396
0
        {
397
0
            uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
398
0
399
0
            if ( (pirq_dpci->flags & mask) != mask )
400
0
            {
401
0
                spin_unlock(&d->event_lock);
402
0
                return -EBUSY;
403
0
            }
404
0
405
0
            /* If pirq is already mapped as vmsi, update guest data/addr. */
406
0
            if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
407
0
                 pirq_dpci->gmsi.gflags != gflags )
408
0
            {
409
0
                /* Directly clear pending EOIs before enabling new MSI info. */
410
0
                pirq_guest_eoi(info);
411
0
412
0
                pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
413
0
                pirq_dpci->gmsi.gflags = gflags;
414
0
            }
415
0
        }
416
42
        /* Calculate dest_vcpu_id for MSI-type pirq migration. */
417
42
        dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
418
42
                         XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
419
42
        dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
420
42
        delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
421
42
                                  XEN_DOMCTL_VMSI_X86_DELIV_MASK);
422
42
423
42
        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
424
42
        pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
425
42
        spin_unlock(&d->event_lock);
426
42
427
42
        pirq_dpci->gmsi.posted = false;
428
42
        vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
429
42
        if ( iommu_intpost )
430
0
        {
431
0
            if ( delivery_mode == dest_LowestPrio )
432
0
                vcpu = vector_hashing_dest(d, dest, dest_mode,
433
0
                                           pirq_dpci->gmsi.gvec);
434
0
            if ( vcpu )
435
0
                pirq_dpci->gmsi.posted = true;
436
0
        }
437
42
        if ( dest_vcpu_id >= 0 )
438
42
            hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
439
42
440
42
        /* Use interrupt posting if it is supported. */
441
42
        if ( iommu_intpost )
442
0
            pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
443
0
                           info, pirq_dpci->gmsi.gvec);
444
42
445
42
        if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
446
0
        {
447
0
            unsigned long flags;
448
0
            struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags);
449
0
450
0
            if ( !desc )
451
0
            {
452
0
                pt_irq_destroy_bind(d, pt_irq_bind);
453
0
                return -EINVAL;
454
0
            }
455
0
456
0
            guest_mask_msi_irq(desc, false);
457
0
            spin_unlock_irqrestore(&desc->lock, flags);
458
0
        }
459
42
460
42
        break;
461
42
    }
462
42
463
6
    case PT_IRQ_TYPE_PCI:
464
6
    case PT_IRQ_TYPE_MSI_TRANSLATE:
465
6
    {
466
6
        struct dev_intx_gsi_link *digl = NULL;
467
6
        struct hvm_girq_dpci_mapping *girq = NULL;
468
6
        unsigned int guest_gsi;
469
6
470
6
        /*
471
6
         * Mapping GSIs for the hardware domain is different than doing it for
472
6
         * an unpriviledged guest, the hardware domain is only allowed to
473
6
         * identity map GSIs, and as such all the data in the u.pci union is
474
6
         * discarded.
475
6
         */
476
6
        if ( hvm_irq_dpci )
477
0
        {
478
0
            unsigned int link;
479
0
480
0
            digl = xmalloc(struct dev_intx_gsi_link);
481
0
            girq = xmalloc(struct hvm_girq_dpci_mapping);
482
0
483
0
            if ( !digl || !girq )
484
0
            {
485
0
                spin_unlock(&d->event_lock);
486
0
                xfree(girq);
487
0
                xfree(digl);
488
0
                return -ENOMEM;
489
0
            }
490
0
491
0
            girq->bus = digl->bus = pt_irq_bind->u.pci.bus;
492
0
            girq->device = digl->device = pt_irq_bind->u.pci.device;
493
0
            girq->intx = digl->intx = pt_irq_bind->u.pci.intx;
494
0
            list_add_tail(&digl->list, &pirq_dpci->digl_list);
495
0
496
0
            guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
497
0
            link = hvm_pci_intx_link(digl->device, digl->intx);
498
0
499
0
            hvm_irq_dpci->link_cnt[link]++;
500
0
501
0
            girq->machine_gsi = pirq;
502
0
            list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
503
0
        }
504
6
        else
505
6
        {
506
6
            ASSERT(is_hardware_domain(d));
507
6
508
6
            /* MSI_TRANSLATE is not supported for the hardware domain. */
509
6
            if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI ||
510
6
                 pirq >= hvm_domain_irq(d)->nr_gsis )
511
0
            {
512
0
                spin_unlock(&d->event_lock);
513
0
514
0
                return -EINVAL;
515
0
            }
516
6
            guest_gsi = pirq;
517
6
        }
518
6
519
6
        /* Bind the same mirq once in the same domain */
520
6
        if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
521
6
        {
522
6
            unsigned int share;
523
6
524
6
            /* MUST be set, as the pirq_dpci can be re-used. */
525
6
            pirq_dpci->dom = d;
526
6
            if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
527
0
            {
528
0
                pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
529
0
                                   HVM_IRQ_DPCI_MACH_MSI |
530
0
                                   HVM_IRQ_DPCI_GUEST_PCI |
531
0
                                   HVM_IRQ_DPCI_TRANSLATE;
532
0
                share = 0;
533
0
            }
534
6
            else    /* PT_IRQ_TYPE_PCI */
535
6
            {
536
6
                pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
537
6
                                   HVM_IRQ_DPCI_MACH_PCI |
538
6
                                   HVM_IRQ_DPCI_GUEST_PCI;
539
6
                if ( !is_hardware_domain(d) )
540
0
                    share = BIND_PIRQ__WILL_SHARE;
541
6
                else
542
6
                {
543
6
                    int mask = vioapic_get_mask(d, guest_gsi);
544
6
                    int trigger_mode = vioapic_get_trigger_mode(d, guest_gsi);
545
6
546
6
                    if ( mask < 0 || trigger_mode < 0 )
547
0
                    {
548
0
                        spin_unlock(&d->event_lock);
549
0
550
0
                        ASSERT_UNREACHABLE();
551
0
                        return -EINVAL;
552
0
                    }
553
6
                    pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI;
554
6
                    /*
555
6
                     * Check if the corresponding vIO APIC pin is configured
556
6
                     * level or edge trigger, level triggered interrupts will
557
6
                     * be marked as shareable.
558
6
                     */
559
6
                    ASSERT(!mask);
560
6
                    share = trigger_mode;
561
6
                }
562
6
            }
563
6
564
6
            /* Init timer before binding */
565
6
            if ( pt_irq_need_timer(pirq_dpci->flags) )
566
6
                init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
567
6
            /* Deal with gsi for legacy devices */
568
6
            rc = pirq_guest_bind(d->vcpu[0], info, share);
569
6
            if ( unlikely(rc) )
570
0
            {
571
0
                if ( pt_irq_need_timer(pirq_dpci->flags) )
572
0
                    kill_timer(&pirq_dpci->timer);
573
0
                /*
574
0
                 * There is no path for __do_IRQ to schedule softirq as
575
0
                 * IRQ_GUEST is not set. As such we can reset 'dom' directly.
576
0
                 */
577
0
                pirq_dpci->dom = NULL;
578
0
                if ( hvm_irq_dpci )
579
0
                {
580
0
                    unsigned int link;
581
0
582
0
                    ASSERT(girq && digl);
583
0
                    list_del(&girq->list);
584
0
                    list_del(&digl->list);
585
0
                    link = hvm_pci_intx_link(digl->device, digl->intx);
586
0
                    hvm_irq_dpci->link_cnt[link]--;
587
0
                }
588
0
                pirq_dpci->flags = 0;
589
0
                pirq_cleanup_check(info, d);
590
0
                spin_unlock(&d->event_lock);
591
0
                xfree(girq);
592
0
                xfree(digl);
593
0
                return rc;
594
0
            }
595
6
        }
596
6
597
6
        spin_unlock(&d->event_lock);
598
6
599
6
        if ( iommu_verbose )
600
6
        {
601
6
            char buf[24] = "";
602
6
603
6
            if ( digl )
604
0
                snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
605
0
                         digl->bus, PCI_SLOT(digl->device),
606
0
                         PCI_FUNC(digl->device), digl->intx);
607
6
608
6
            printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n",
609
6
                   d->domain_id, pirq, guest_gsi, buf);
610
6
        }
611
6
        break;
612
6
    }
613
6
614
0
    default:
615
0
        spin_unlock(&d->event_lock);
616
0
        return -EOPNOTSUPP;
617
48
    }
618
48
619
48
    return 0;
620
48
}
621
622
int pt_irq_destroy_bind(
623
    struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
624
0
{
625
0
    struct hvm_irq_dpci *hvm_irq_dpci;
626
0
    struct hvm_pirq_dpci *pirq_dpci;
627
0
    unsigned int machine_gsi = pt_irq_bind->machine_irq;
628
0
    struct pirq *pirq;
629
0
    const char *what = NULL;
630
0
631
0
    switch ( pt_irq_bind->irq_type )
632
0
    {
633
0
    case PT_IRQ_TYPE_PCI:
634
0
    case PT_IRQ_TYPE_MSI_TRANSLATE:
635
0
        if ( iommu_verbose )
636
0
        {
637
0
            unsigned int device = pt_irq_bind->u.pci.device;
638
0
            unsigned int intx = pt_irq_bind->u.pci.intx;
639
0
640
0
            printk(XENLOG_G_INFO
641
0
                   "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n",
642
0
                   d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx),
643
0
                   pt_irq_bind->u.pci.bus,
644
0
                   PCI_SLOT(device), PCI_FUNC(device), intx);
645
0
        }
646
0
        break;
647
0
    case PT_IRQ_TYPE_MSI:
648
0
        break;
649
0
    default:
650
0
        return -EOPNOTSUPP;
651
0
    }
652
0
653
0
    spin_lock(&d->event_lock);
654
0
655
0
    hvm_irq_dpci = domain_get_irq_dpci(d);
656
0
657
0
    if ( !hvm_irq_dpci && !is_hardware_domain(d) )
658
0
    {
659
0
        spin_unlock(&d->event_lock);
660
0
        return -EINVAL;
661
0
    }
662
0
663
0
    pirq = pirq_info(d, machine_gsi);
664
0
    pirq_dpci = pirq_dpci(pirq);
665
0
666
0
    if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
667
0
    {
668
0
        unsigned int bus = pt_irq_bind->u.pci.bus;
669
0
        unsigned int device = pt_irq_bind->u.pci.device;
670
0
        unsigned int intx = pt_irq_bind->u.pci.intx;
671
0
        unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx);
672
0
        unsigned int link = hvm_pci_intx_link(device, intx);
673
0
        struct hvm_girq_dpci_mapping *girq;
674
0
        struct dev_intx_gsi_link *digl, *tmp;
675
0
676
0
        list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
677
0
        {
678
0
            if ( girq->bus         == bus &&
679
0
                 girq->device      == device &&
680
0
                 girq->intx        == intx &&
681
0
                 girq->machine_gsi == machine_gsi )
682
0
            {
683
0
                list_del(&girq->list);
684
0
                xfree(girq);
685
0
                girq = NULL;
686
0
                break;
687
0
            }
688
0
        }
689
0
690
0
        if ( girq )
691
0
        {
692
0
            spin_unlock(&d->event_lock);
693
0
            return -EINVAL;
694
0
        }
695
0
696
0
        hvm_irq_dpci->link_cnt[link]--;
697
0
698
0
        /* clear the mirq info */
699
0
        if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
700
0
        {
701
0
            list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
702
0
            {
703
0
                if ( digl->bus    == bus &&
704
0
                     digl->device == device &&
705
0
                     digl->intx   == intx )
706
0
                {
707
0
                    list_del(&digl->list);
708
0
                    xfree(digl);
709
0
                }
710
0
            }
711
0
            what = list_empty(&pirq_dpci->digl_list) ? "final" : "partial";
712
0
        }
713
0
        else
714
0
            what = "bogus";
715
0
    }
716
0
    else if ( pirq_dpci && pirq_dpci->gmsi.posted )
717
0
        pi_update_irte(NULL, pirq, 0);
718
0
719
0
    if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
720
0
         list_empty(&pirq_dpci->digl_list) )
721
0
    {
722
0
        pirq_guest_unbind(d, pirq);
723
0
        msixtbl_pt_unregister(d, pirq);
724
0
        if ( pt_irq_need_timer(pirq_dpci->flags) )
725
0
            kill_timer(&pirq_dpci->timer);
726
0
        pirq_dpci->flags = 0;
727
0
        /*
728
0
         * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
729
0
         * call to pt_pirq_softirq_reset.
730
0
         */
731
0
        pt_pirq_softirq_reset(pirq_dpci);
732
0
733
0
        pirq_cleanup_check(pirq, d);
734
0
    }
735
0
736
0
    spin_unlock(&d->event_lock);
737
0
738
0
    if ( what && iommu_verbose )
739
0
    {
740
0
        unsigned int device = pt_irq_bind->u.pci.device;
741
0
        char buf[24] = "";
742
0
743
0
        if ( hvm_irq_dpci )
744
0
            snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
745
0
                     pt_irq_bind->u.pci.bus, PCI_SLOT(device),
746
0
                     PCI_FUNC(device), pt_irq_bind->u.pci.intx);
747
0
748
0
        printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n",
749
0
               d->domain_id, what, machine_gsi, buf);
750
0
    }
751
0
752
0
    return 0;
753
0
}
754
755
void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
756
48
{
757
48
    INIT_LIST_HEAD(&dpci->digl_list);
758
48
    dpci->gmsi.dest_vcpu_id = -1;
759
48
}
760
761
bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
762
0
{
763
0
    if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
764
0
    {
765
0
        dpci->dom = NULL;
766
0
        return true;
767
0
    }
768
0
    return false;
769
0
}
770
771
int pt_pirq_iterate(struct domain *d,
772
                    int (*cb)(struct domain *,
773
                              struct hvm_pirq_dpci *, void *),
774
                    void *arg)
775
0
{
776
0
    int rc = 0;
777
0
    unsigned int pirq = 0, n, i;
778
0
    struct pirq *pirqs[8];
779
0
780
0
    ASSERT(spin_is_locked(&d->event_lock));
781
0
782
0
    do {
783
0
        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
784
0
                                   ARRAY_SIZE(pirqs));
785
0
        for ( i = 0; i < n; ++i )
786
0
        {
787
0
            struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
788
0
789
0
            pirq = pirqs[i]->pirq;
790
0
            if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
791
0
                rc = cb(d, pirq_dpci, arg);
792
0
        }
793
0
    } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
794
0
795
0
    return rc;
796
0
}
797
798
int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
799
4.14k
{
800
4.14k
    struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
801
4.14k
    struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
802
4.14k
803
4.14k
    ASSERT(is_hvm_domain(d));
804
4.14k
805
4.14k
    if ( !iommu_enabled || (!is_hardware_domain(d) && !dpci) ||
806
4.14k
         !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
807
0
        return 0;
808
4.14k
809
4.14k
    pirq_dpci->masked = 1;
810
4.14k
    raise_softirq_for(pirq_dpci);
811
4.14k
    return 1;
812
4.14k
}
813
814
/* called with d->event_lock held */
815
static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
816
0
{
817
0
    irq_desc_t *desc;
818
0
819
0
    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
820
0
         (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
821
0
    {
822
0
        struct pirq *pirq = dpci_pirq(pirq_dpci);
823
0
824
0
        BUG_ON(!local_irq_is_enabled());
825
0
        desc = pirq_spin_lock_irq_desc(pirq, NULL);
826
0
        if ( !desc )
827
0
            return;
828
0
        desc_guest_eoi(desc, pirq);
829
0
    }
830
0
}
831
832
static int _hvm_dpci_msi_eoi(struct domain *d,
833
                             struct hvm_pirq_dpci *pirq_dpci, void *arg)
834
0
{
835
0
    int vector = (long)arg;
836
0
837
0
    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
838
0
         (pirq_dpci->gmsi.gvec == vector) )
839
0
    {
840
0
        unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
841
0
                                      XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
842
0
        bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
843
0
844
0
        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
845
0
                               dest_mode) )
846
0
        {
847
0
            __msi_pirq_eoi(pirq_dpci);
848
0
            return 1;
849
0
        }
850
0
    }
851
0
852
0
    return 0;
853
0
}
854
855
void hvm_dpci_msi_eoi(struct domain *d, int vector)
856
296
{
857
296
    if ( !iommu_enabled || !hvm_domain_irq(d)->dpci )
858
296
       return;
859
296
860
0
    spin_lock(&d->event_lock);
861
0
    pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
862
0
    spin_unlock(&d->event_lock);
863
0
}
864
865
static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
866
4.14k
{
867
4.14k
    if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) )
868
0
    {
869
0
        ASSERT_UNREACHABLE();
870
0
        return;
871
0
    }
872
4.14k
873
4.14k
    spin_lock(&d->event_lock);
874
4.14k
    if ( test_and_clear_bool(pirq_dpci->masked) )
875
4.14k
    {
876
4.14k
        struct pirq *pirq = dpci_pirq(pirq_dpci);
877
4.14k
        const struct dev_intx_gsi_link *digl;
878
4.14k
879
4.14k
        if ( hvm_domain_use_pirq(d, pirq) )
880
0
        {
881
0
            send_guest_pirq(d, pirq);
882
0
883
0
            if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
884
0
            {
885
0
                spin_unlock(&d->event_lock);
886
0
                return;
887
0
            }
888
0
        }
889
4.14k
890
4.14k
        if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
891
3.84k
        {
892
3.84k
            vmsi_deliver_pirq(d, pirq_dpci);
893
3.84k
            spin_unlock(&d->event_lock);
894
3.84k
            return;
895
3.84k
        }
896
4.14k
897
301
        list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
898
0
        {
899
0
            ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI));
900
0
            hvm_pci_intx_assert(d, digl->device, digl->intx);
901
0
            pirq_dpci->pending++;
902
0
        }
903
301
904
301
        if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
905
301
        {
906
301
            hvm_gsi_assert(d, pirq->pirq);
907
301
            pirq_dpci->pending++;
908
301
        }
909
301
910
301
        if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
911
0
        {
912
0
            /* for translated MSI to INTx interrupt, eoi as early as possible */
913
0
            __msi_pirq_eoi(pirq_dpci);
914
0
            spin_unlock(&d->event_lock);
915
0
            return;
916
0
        }
917
301
918
301
        /*
919
301
         * Set a timer to see if the guest can finish the interrupt or not. For
920
301
         * example, the guest OS may unmask the PIC during boot, before the
921
301
         * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
922
301
         * guest will never deal with the irq, then the physical interrupt line
923
301
         * will never be deasserted.
924
301
         */
925
301
        ASSERT(pt_irq_need_timer(pirq_dpci->flags));
926
301
        set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
927
301
    }
928
301
    spin_unlock(&d->event_lock);
929
301
}
930
931
static void hvm_pirq_eoi(struct pirq *pirq,
932
                         const union vioapic_redir_entry *ent)
933
532
{
934
532
    struct hvm_pirq_dpci *pirq_dpci;
935
532
936
532
    if ( !pirq )
937
0
    {
938
0
        ASSERT_UNREACHABLE();
939
0
        return;
940
0
    }
941
532
942
532
    pirq_dpci = pirq_dpci(pirq);
943
532
944
532
    /*
945
532
     * No need to get vector lock for timer
946
532
     * since interrupt is still not EOIed
947
532
     */
948
532
    if ( --pirq_dpci->pending ||
949
293
         (ent && ent->fields.mask) ||
950
0
         !pt_irq_need_timer(pirq_dpci->flags) )
951
532
        return;
952
532
953
0
    stop_timer(&pirq_dpci->timer);
954
0
    pirq_guest_eoi(pirq);
955
0
}
956
957
static void __hvm_dpci_eoi(struct domain *d,
958
                           const struct hvm_girq_dpci_mapping *girq,
959
                           const union vioapic_redir_entry *ent)
960
0
{
961
0
    struct pirq *pirq = pirq_info(d, girq->machine_gsi);
962
0
963
0
    if ( !hvm_domain_use_pirq(d, pirq) )
964
0
        hvm_pci_intx_deassert(d, girq->device, girq->intx);
965
0
966
0
    hvm_pirq_eoi(pirq, ent);
967
0
}
968
969
static void hvm_gsi_eoi(struct domain *d, unsigned int gsi,
970
                        const union vioapic_redir_entry *ent)
971
650
{
972
650
    struct pirq *pirq = pirq_info(d, gsi);
973
650
974
650
    /* Check if GSI is actually mapped. */
975
650
    if ( !pirq_dpci(pirq) )
976
118
        return;
977
650
978
532
    hvm_gsi_deassert(d, gsi);
979
532
    hvm_pirq_eoi(pirq, ent);
980
532
}
981
982
void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
983
                  const union vioapic_redir_entry *ent)
984
650
{
985
650
    const struct hvm_irq_dpci *hvm_irq_dpci;
986
650
    const struct hvm_girq_dpci_mapping *girq;
987
650
988
650
    if ( !iommu_enabled )
989
0
        return;
990
650
991
650
    if ( is_hardware_domain(d) )
992
650
    {
993
650
        spin_lock(&d->event_lock);
994
650
        hvm_gsi_eoi(d, guest_gsi, ent);
995
650
        goto unlock;
996
650
    }
997
650
998
0
    if ( guest_gsi < NR_ISAIRQS )
999
0
    {
1000
0
        hvm_dpci_isairq_eoi(d, guest_gsi);
1001
0
        return;
1002
0
    }
1003
0
1004
0
    spin_lock(&d->event_lock);
1005
0
    hvm_irq_dpci = domain_get_irq_dpci(d);
1006
0
1007
0
    if ( !hvm_irq_dpci )
1008
0
        goto unlock;
1009
0
1010
0
    list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
1011
0
        __hvm_dpci_eoi(d, girq, ent);
1012
0
1013
650
unlock:
1014
650
    spin_unlock(&d->event_lock);
1015
650
}
1016
1017
/*
1018
 * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
1019
 * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting.
1020
 */
1021
static void dpci_softirq(void)
1022
4.05k
{
1023
4.05k
    unsigned int cpu = smp_processor_id();
1024
4.05k
    LIST_HEAD(our_list);
1025
4.05k
1026
4.05k
    local_irq_disable();
1027
4.05k
    list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
1028
4.05k
    local_irq_enable();
1029
4.05k
1030
8.20k
    while ( !list_empty(&our_list) )
1031
4.14k
    {
1032
4.14k
        struct hvm_pirq_dpci *pirq_dpci;
1033
4.14k
        struct domain *d;
1034
4.14k
1035
4.14k
        pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list);
1036
4.14k
        list_del(&pirq_dpci->softirq_list);
1037
4.14k
1038
4.14k
        d = pirq_dpci->dom;
1039
4.14k
        smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
1040
4.14k
        if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
1041
0
        {
1042
0
            unsigned long flags;
1043
0
1044
0
            /* Put back on the list and retry. */
1045
0
            local_irq_save(flags);
1046
0
            list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
1047
0
            local_irq_restore(flags);
1048
0
1049
0
            raise_softirq(HVM_DPCI_SOFTIRQ);
1050
0
            continue;
1051
0
        }
1052
4.14k
        /*
1053
4.14k
         * The one who clears STATE_SCHED MUST refcount the domain.
1054
4.14k
         */
1055
4.14k
        if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
1056
4.14k
        {
1057
4.14k
            hvm_dirq_assist(d, pirq_dpci);
1058
4.14k
            put_domain(d);
1059
4.14k
        }
1060
4.14k
        clear_bit(STATE_RUN, &pirq_dpci->state);
1061
4.14k
    }
1062
4.05k
}
1063
1064
static int cpu_callback(
1065
    struct notifier_block *nfb, unsigned long action, void *hcpu)
1066
0
{
1067
0
    unsigned int cpu = (unsigned long)hcpu;
1068
0
1069
0
    switch ( action )
1070
0
    {
1071
0
    case CPU_UP_PREPARE:
1072
0
        INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1073
0
        break;
1074
0
    case CPU_UP_CANCELED:
1075
0
    case CPU_DEAD:
1076
0
        /*
1077
0
         * On CPU_DYING this callback is called (on the CPU that is dying)
1078
0
         * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can
1079
0
         * clear out any outstanding domains (by the virtue of the idle loop
1080
0
         * calling the softirq later). In CPU_DEAD case the CPU is deaf and
1081
0
         * there are no pending softirqs for us to handle so we can chill.
1082
0
         */
1083
0
        ASSERT(list_empty(&per_cpu(dpci_list, cpu)));
1084
0
        break;
1085
0
    }
1086
0
1087
0
    return NOTIFY_DONE;
1088
0
}
1089
1090
static struct notifier_block cpu_nfb = {
1091
    .notifier_call = cpu_callback,
1092
};
1093
1094
static int __init setup_dpci_softirq(void)
1095
1
{
1096
1
    unsigned int cpu;
1097
1
1098
1
    for_each_online_cpu(cpu)
1099
12
        INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1100
1
1101
1
    open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
1102
1
    register_cpu_notifier(&cpu_nfb);
1103
1
    return 0;
1104
1
}
1105
__initcall(setup_dpci_softirq);