/root/src/xen/xen/drivers/vpci/msix.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Handlers for accesses to the MSI-X capability structure and the memory |
3 | | * region. |
4 | | * |
5 | | * Copyright (C) 2017 Citrix Systems R&D |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or |
8 | | * modify it under the terms and conditions of the GNU General Public |
9 | | * License, version 2, as published by the Free Software Foundation. |
10 | | * |
11 | | * This program is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU General Public |
17 | | * License along with this program; If not, see <http://www.gnu.org/licenses/>. |
18 | | */ |
19 | | |
20 | | #include <xen/sched.h> |
21 | | #include <xen/vpci.h> |
22 | | #include <asm/msi.h> |
23 | | #include <xen/p2m-common.h> |
24 | | #include <xen/keyhandler.h> |
25 | | |
26 | | #define VMSIX_SIZE(num) offsetof(struct vpci_msix, entries[num]) |
27 | | |
28 | | #define VMSIX_ADDR_IN_RANGE(addr, vpci, nr) \ |
29 | 4.82M | ((addr) >= VMSIX_TABLE_ADDR(vpci, nr) && \ |
30 | 101k | (addr) < VMSIX_TABLE_ADDR(vpci, nr) + VMSIX_TABLE_SIZE(vpci, nr)) |
31 | | |
32 | | static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg, |
33 | | void *data) |
34 | 13 | { |
35 | 13 | const struct vpci_msix *msix = data; |
36 | 13 | |
37 | 13 | return (msix->max_entries - 1) | |
38 | 9 | (msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0) | |
39 | 13 | (msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0); |
40 | 13 | } |
41 | | |
42 | | static void control_write(const struct pci_dev *pdev, unsigned int reg, |
43 | | uint32_t val, void *data) |
44 | 13 | { |
45 | 13 | uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn); |
46 | 13 | struct vpci_msix *msix = data; |
47 | 13 | bool new_masked = val & PCI_MSIX_FLAGS_MASKALL; |
48 | 13 | bool new_enabled = val & PCI_MSIX_FLAGS_ENABLE; |
49 | 13 | unsigned int i; |
50 | 13 | int rc; |
51 | 13 | |
52 | 13 | if ( new_masked == msix->masked && new_enabled == msix->enabled ) |
53 | 9 | return; |
54 | 13 | |
55 | 13 | /* |
56 | 13 | * According to the PCI 3.0 specification, switching the enable bit to 1 |
57 | 13 | * or the function mask bit to 0 should cause all the cached addresses |
58 | 13 | * and data fields to be recalculated. |
59 | 13 | * |
60 | 13 | * In order to avoid the overhead of disabling and enabling all the |
61 | 13 | * entries every time the guest sets the maskall bit, Xen will only |
62 | 13 | * perform the disable and enable sequence when the guest has written to |
63 | 13 | * the entry. |
64 | 13 | */ |
65 | 4 | if ( new_enabled && !new_masked && (!msix->enabled || msix->masked) ) |
66 | 4 | { |
67 | 44 | for ( i = 0; i < msix->max_entries; i++ ) |
68 | 40 | { |
69 | 40 | if ( msix->entries[i].masked || !msix->entries[i].updated ) |
70 | 40 | continue; |
71 | 40 | |
72 | 0 | rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev); |
73 | 0 | /* Ignore ENOENT, it means the entry wasn't setup. */ |
74 | 0 | if ( rc && rc != -ENOENT ) |
75 | 0 | { |
76 | 0 | gprintk(XENLOG_WARNING, |
77 | 0 | "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n", |
78 | 0 | pdev->seg, pdev->bus, slot, func, i, rc); |
79 | 0 | return; |
80 | 0 | } |
81 | 0 |
|
82 | 0 | rc = vpci_msix_arch_enable_entry(&msix->entries[i], pdev, |
83 | 0 | VMSIX_TABLE_BASE(pdev->vpci, |
84 | 0 | VPCI_MSIX_TABLE)); |
85 | 0 | if ( rc ) |
86 | 0 | { |
87 | 0 | gprintk(XENLOG_WARNING, |
88 | 0 | "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n", |
89 | 0 | pdev->seg, pdev->bus, slot, func, i, rc); |
90 | 0 | /* Entry is likely not properly configured, skip it. */ |
91 | 0 | continue; |
92 | 0 | } |
93 | 0 |
|
94 | 0 | /* |
95 | 0 | * At this point the PIRQ is still masked. Unmask it, or else the |
96 | 0 | * guest won't receive interrupts. This is due to the |
97 | 0 | * disable/enable sequence performed above. |
98 | 0 | */ |
99 | 0 | vpci_msix_arch_mask_entry(&msix->entries[i], pdev, false); |
100 | 0 |
|
101 | 0 | msix->entries[i].updated = false; |
102 | 0 | } |
103 | 4 | } |
104 | 0 | else if ( !new_enabled && msix->enabled ) |
105 | 0 | { |
106 | 0 | /* Guest has disabled MSIX, disable all entries. */ |
107 | 0 | for ( i = 0; i < msix->max_entries; i++ ) |
108 | 0 | { |
109 | 0 | /* |
110 | 0 | * NB: vpci_msix_arch_disable can be called for entries that are |
111 | 0 | * not setup, it will return -ENOENT in that case. |
112 | 0 | */ |
113 | 0 | rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev); |
114 | 0 | switch ( rc ) |
115 | 0 | { |
116 | 0 | case 0: |
117 | 0 | /* |
118 | 0 | * Mark the entry successfully disabled as updated, so that on |
119 | 0 | * the next enable the entry is properly setup. This is done |
120 | 0 | * so that the following flow works correctly: |
121 | 0 | * |
122 | 0 | * mask entry -> disable MSIX -> enable MSIX -> unmask entry |
123 | 0 | * |
124 | 0 | * Without setting 'updated', the 'unmask entry' step will fail |
125 | 0 | * because the entry has not been updated, so it would not be |
126 | 0 | * mapped/bound at all. |
127 | 0 | */ |
128 | 0 | msix->entries[i].updated = true; |
129 | 0 | break; |
130 | 0 | case -ENOENT: |
131 | 0 | /* Ignore non-present entry. */ |
132 | 0 | break; |
133 | 0 | default: |
134 | 0 | gprintk(XENLOG_WARNING, |
135 | 0 | "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n", |
136 | 0 | pdev->seg, pdev->bus, slot, func, i, rc); |
137 | 0 | return; |
138 | 0 | } |
139 | 0 | } |
140 | 0 | } |
141 | 4 | |
142 | 4 | msix->masked = new_masked; |
143 | 4 | msix->enabled = new_enabled; |
144 | 4 | |
145 | 4 | val = control_read(pdev, reg, data); |
146 | 4 | if ( pci_msi_conf_write_intercept(msix->pdev, reg, 2, &val) >= 0 ) |
147 | 4 | pci_conf_write16(pdev->seg, pdev->bus, slot, func, reg, val); |
148 | 4 | } |
149 | | |
150 | | static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr) |
151 | 481k | { |
152 | 481k | struct vpci_msix *msix; |
153 | 481k | |
154 | 481k | list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next ) |
155 | 2.41M | { |
156 | 2.41M | const struct vpci_bar *bars = msix->pdev->vpci->header.bars; |
157 | 2.41M | unsigned int i; |
158 | 2.41M | |
159 | 7.23M | for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ ) |
160 | 4.82M | if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled && |
161 | 4.82M | VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) ) |
162 | 880 | return msix; |
163 | 2.41M | } |
164 | 481k | |
165 | 480k | return NULL; |
166 | 481k | } |
167 | | |
168 | | static int msix_accept(struct vcpu *v, unsigned long addr) |
169 | 481k | { |
170 | 481k | return !!msix_find(v->domain, addr); |
171 | 481k | } |
172 | | |
173 | | static bool access_allowed(const struct pci_dev *pdev, unsigned long addr, |
174 | | unsigned int len) |
175 | 220 | { |
176 | 220 | uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn); |
177 | 220 | |
178 | 220 | /* Only allow 32/64b accesses. */ |
179 | 220 | if ( len != 4 && len != 8 ) |
180 | 0 | { |
181 | 0 | gprintk(XENLOG_WARNING, |
182 | 0 | "%04x:%02x:%02x.%u: invalid MSI-X table access size: %u\n", |
183 | 0 | pdev->seg, pdev->bus, slot, func, len); |
184 | 0 | return false; |
185 | 0 | } |
186 | 220 | |
187 | 220 | /* Only allow aligned accesses. */ |
188 | 220 | if ( (addr & (len - 1)) != 0 ) |
189 | 0 | { |
190 | 0 | gprintk(XENLOG_WARNING, |
191 | 0 | "%04x:%02x:%02x.%u: MSI-X only allows aligned accesses\n", |
192 | 0 | pdev->seg, pdev->bus, slot, func); |
193 | 0 | return false; |
194 | 0 | } |
195 | 220 | |
196 | 220 | return true; |
197 | 220 | } |
198 | | |
199 | | static struct vpci_msix_entry *get_entry(struct vpci_msix *msix, |
200 | | paddr_t addr) |
201 | 220 | { |
202 | 220 | paddr_t start = VMSIX_TABLE_ADDR(msix->pdev->vpci, VPCI_MSIX_TABLE); |
203 | 220 | |
204 | 220 | return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE]; |
205 | 220 | } |
206 | | |
207 | | static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len, |
208 | | unsigned long *data) |
209 | 76 | { |
210 | 76 | const struct domain *d = v->domain; |
211 | 76 | struct vpci_msix *msix = msix_find(d, addr); |
212 | 76 | const struct vpci_msix_entry *entry; |
213 | 76 | unsigned int offset; |
214 | 76 | |
215 | 76 | *data = ~0ul; |
216 | 76 | |
217 | 76 | if ( !msix ) |
218 | 0 | return X86EMUL_RETRY; |
219 | 76 | |
220 | 76 | if ( !access_allowed(msix->pdev, addr, len) ) |
221 | 0 | return X86EMUL_OKAY; |
222 | 76 | |
223 | 76 | if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) ) |
224 | 0 | { |
225 | 0 | /* |
226 | 0 | * Access to PBA. |
227 | 0 | * |
228 | 0 | * TODO: note that this relies on having the PBA identity mapped to the |
229 | 0 | * guest address space. If this changes the address will need to be |
230 | 0 | * translated. |
231 | 0 | */ |
232 | 0 | switch ( len ) |
233 | 0 | { |
234 | 0 | case 4: |
235 | 0 | *data = readl(addr); |
236 | 0 | break; |
237 | 0 | case 8: |
238 | 0 | *data = readq(addr); |
239 | 0 | break; |
240 | 0 | default: |
241 | 0 | ASSERT_UNREACHABLE(); |
242 | 0 | break; |
243 | 0 | } |
244 | 0 |
|
245 | 0 | return X86EMUL_OKAY; |
246 | 0 | } |
247 | 76 | |
248 | 76 | spin_lock(&msix->pdev->vpci->lock); |
249 | 76 | entry = get_entry(msix, addr); |
250 | 76 | offset = addr & (PCI_MSIX_ENTRY_SIZE - 1); |
251 | 76 | |
252 | 76 | switch ( offset ) |
253 | 76 | { |
254 | 0 | case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET: |
255 | 0 | *data = entry->addr; |
256 | 0 | break; |
257 | 0 | case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET: |
258 | 0 | *data = entry->addr >> 32; |
259 | 0 | break; |
260 | 0 | case PCI_MSIX_ENTRY_DATA_OFFSET: |
261 | 0 | *data = entry->data; |
262 | 0 | if ( len == 8 ) |
263 | 0 | *data |= |
264 | 0 | (uint64_t)(entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0) << 32; |
265 | 0 | break; |
266 | 76 | case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET: |
267 | 76 | *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0; |
268 | 76 | break; |
269 | 0 | default: |
270 | 0 | ASSERT_UNREACHABLE(); |
271 | 0 | break; |
272 | 76 | } |
273 | 76 | spin_unlock(&msix->pdev->vpci->lock); |
274 | 76 | |
275 | 76 | return X86EMUL_OKAY; |
276 | 76 | } |
277 | | |
278 | | static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len, |
279 | | unsigned long data) |
280 | 144 | { |
281 | 144 | const struct domain *d = v->domain; |
282 | 144 | struct vpci_msix *msix = msix_find(d, addr); |
283 | 144 | struct vpci_msix_entry *entry; |
284 | 144 | unsigned int offset; |
285 | 144 | |
286 | 144 | if ( !msix ) |
287 | 0 | return X86EMUL_RETRY; |
288 | 144 | |
289 | 144 | if ( !access_allowed(msix->pdev, addr, len) ) |
290 | 0 | return X86EMUL_OKAY; |
291 | 144 | |
292 | 144 | if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) ) |
293 | 0 | { |
294 | 0 | /* Ignore writes to PBA for DomUs, it's behavior is undefined. */ |
295 | 0 | if ( is_hardware_domain(d) ) |
296 | 0 | { |
297 | 0 | switch ( len ) |
298 | 0 | { |
299 | 0 | case 4: |
300 | 0 | writel(data, addr); |
301 | 0 | break; |
302 | 0 | case 8: |
303 | 0 | writeq(data, addr); |
304 | 0 | break; |
305 | 0 | default: |
306 | 0 | ASSERT_UNREACHABLE(); |
307 | 0 | break; |
308 | 0 | } |
309 | 0 | } |
310 | 0 |
|
311 | 0 | return X86EMUL_OKAY; |
312 | 0 | } |
313 | 144 | |
314 | 144 | spin_lock(&msix->pdev->vpci->lock); |
315 | 144 | entry = get_entry(msix, addr); |
316 | 144 | offset = addr & (PCI_MSIX_ENTRY_SIZE - 1); |
317 | 144 | |
318 | 144 | /* |
319 | 144 | * NB: Xen allows writes to the data/address registers with the entry |
320 | 144 | * unmasked. The specification says this is undefined behavior, and Xen |
321 | 144 | * implements it as storing the written value, which will be made effective |
322 | 144 | * in the next mask/unmask cycle. This also mimics the implementation in |
323 | 144 | * QEMU. |
324 | 144 | */ |
325 | 144 | switch ( offset ) |
326 | 144 | { |
327 | 36 | case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET: |
328 | 36 | entry->updated = true; |
329 | 36 | if ( len == 8 ) |
330 | 0 | { |
331 | 0 | entry->addr = data; |
332 | 0 | break; |
333 | 0 | } |
334 | 36 | entry->addr &= ~0xffffffff; |
335 | 36 | entry->addr |= data; |
336 | 36 | break; |
337 | 36 | case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET: |
338 | 36 | entry->updated = true; |
339 | 36 | entry->addr &= 0xffffffff; |
340 | 36 | entry->addr |= (uint64_t)data << 32; |
341 | 36 | break; |
342 | 36 | case PCI_MSIX_ENTRY_DATA_OFFSET: |
343 | 36 | entry->updated = true; |
344 | 36 | entry->data = data; |
345 | 36 | |
346 | 36 | if ( len == 4 ) |
347 | 36 | break; |
348 | 36 | |
349 | 0 | data >>= 32; |
350 | 0 | /* fallthrough */ |
351 | 36 | case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET: |
352 | 36 | { |
353 | 36 | bool new_masked = data & PCI_MSIX_VECTOR_BITMASK; |
354 | 36 | const struct pci_dev *pdev = msix->pdev; |
355 | 36 | int rc; |
356 | 36 | |
357 | 36 | if ( entry->masked == new_masked ) |
358 | 36 | /* No change in the mask bit, nothing to do. */ |
359 | 0 | break; |
360 | 36 | |
361 | 36 | if ( !new_masked && msix->enabled && !msix->masked && entry->updated ) |
362 | 36 | { |
363 | 36 | /* |
364 | 36 | * If MSI-X is enabled, the function mask is not active, the entry |
365 | 36 | * is being unmasked and there have been changes to the address or |
366 | 36 | * data fields Xen needs to disable and enable the entry in order |
367 | 36 | * to pick up the changes. |
368 | 36 | */ |
369 | 36 | rc = vpci_msix_arch_disable_entry(entry, pdev); |
370 | 36 | if ( rc && rc != -ENOENT ) |
371 | 0 | { |
372 | 0 | gprintk(XENLOG_WARNING, |
373 | 0 | "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n", |
374 | 0 | pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), |
375 | 0 | PCI_FUNC(pdev->devfn), VMSIX_ENTRY_NR(msix, entry), rc); |
376 | 0 | break; |
377 | 0 | } |
378 | 36 | |
379 | 36 | rc = vpci_msix_arch_enable_entry(entry, pdev, |
380 | 36 | VMSIX_TABLE_BASE(pdev->vpci, |
381 | 36 | VPCI_MSIX_TABLE)); |
382 | 36 | if ( rc ) |
383 | 0 | { |
384 | 0 | gprintk(XENLOG_WARNING, |
385 | 0 | "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n", |
386 | 0 | pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), |
387 | 0 | PCI_FUNC(pdev->devfn), VMSIX_ENTRY_NR(msix, entry), rc); |
388 | 0 | break; |
389 | 0 | } |
390 | 36 | entry->updated = false; |
391 | 36 | } |
392 | 36 | |
393 | 36 | vpci_msix_arch_mask_entry(entry, pdev, new_masked); |
394 | 36 | entry->masked = new_masked; |
395 | 36 | |
396 | 36 | break; |
397 | 36 | } |
398 | 0 | default: |
399 | 0 | ASSERT_UNREACHABLE(); |
400 | 0 | break; |
401 | 144 | } |
402 | 144 | spin_unlock(&msix->pdev->vpci->lock); |
403 | 144 | |
404 | 144 | return X86EMUL_OKAY; |
405 | 144 | } |
406 | | |
407 | | static const struct hvm_mmio_ops vpci_msix_table_ops = { |
408 | | .check = msix_accept, |
409 | | .read = msix_read, |
410 | | .write = msix_write, |
411 | | }; |
412 | | |
413 | | static int init_msix(struct pci_dev *pdev) |
414 | 68 | { |
415 | 68 | struct domain *d = pdev->domain; |
416 | 68 | uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn); |
417 | 68 | struct vpci_msix *msix; |
418 | 68 | unsigned int msix_offset, i, max_entries; |
419 | 68 | uint16_t control; |
420 | 68 | int rc; |
421 | 68 | |
422 | 68 | msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func, |
423 | 68 | PCI_CAP_ID_MSIX); |
424 | 68 | if ( !msix_offset ) |
425 | 63 | return 0; |
426 | 68 | |
427 | 5 | control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, |
428 | 5 | msix_control_reg(msix_offset)); |
429 | 5 | |
430 | 5 | max_entries = msix_table_size(control); |
431 | 5 | |
432 | 5 | msix = xzalloc_bytes(VMSIX_SIZE(max_entries)); |
433 | 5 | if ( !msix ) |
434 | 0 | return -ENOMEM; |
435 | 5 | |
436 | 5 | msix->max_entries = max_entries; |
437 | 5 | msix->pdev = pdev; |
438 | 5 | |
439 | 5 | msix->tables[VPCI_MSIX_TABLE] = |
440 | 5 | pci_conf_read32(pdev->seg, pdev->bus, slot, func, |
441 | 5 | msix_table_offset_reg(msix_offset)); |
442 | 5 | msix->tables[VPCI_MSIX_PBA] = |
443 | 5 | pci_conf_read32(pdev->seg, pdev->bus, slot, func, |
444 | 5 | msix_pba_offset_reg(msix_offset)); |
445 | 5 | |
446 | 53 | for ( i = 0; i < msix->max_entries; i++) |
447 | 48 | { |
448 | 48 | msix->entries[i].masked = true; |
449 | 48 | vpci_msix_arch_init_entry(&msix->entries[i]); |
450 | 48 | } |
451 | 5 | |
452 | 5 | if ( list_empty(&d->arch.hvm_domain.msix_tables) ) |
453 | 1 | register_mmio_handler(d, &vpci_msix_table_ops); |
454 | 5 | |
455 | 5 | list_add(&msix->next, &d->arch.hvm_domain.msix_tables); |
456 | 5 | |
457 | 5 | rc = vpci_add_register(pdev->vpci, control_read, control_write, |
458 | 5 | msix_control_reg(msix_offset), 2, msix); |
459 | 5 | if ( rc ) |
460 | 0 | { |
461 | 0 | xfree(msix); |
462 | 0 | return rc; |
463 | 0 | } |
464 | 5 | |
465 | 5 | pdev->vpci->msix = msix; |
466 | 5 | |
467 | 5 | return 0; |
468 | 5 | } |
469 | | REGISTER_VPCI_INIT(init_msix, VPCI_PRIORITY_HIGH); |
470 | | |
471 | | /* |
472 | | * Local variables: |
473 | | * mode: C |
474 | | * c-file-style: "BSD" |
475 | | * c-basic-offset: 4 |
476 | | * tab-width: 4 |
477 | | * indent-tabs-mode: nil |
478 | | * End: |
479 | | */ |