debuggers.hg

view xen/drivers/passthrough/vtd/iommu.c @ 22795:93e7bf0e1845

vt-d: quirks for Sandybridge errata workaround, WLAN, VT-d fault escalation

Adding errata workaround for newly released Sandybridge processor
graphics, additional WLAN device ID's for WLAN quirk, a quirk for
masking VT-d fault escalation to IOH HW that can cause system hangs on
some OEM hardware where the BIOS erroneously escalates VT-d faults to
the platform.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
author Allen Kay <allen.m.kay@intel.com>
date Fri Jan 14 08:11:46 2011 +0000 (2011-01-14)
parents 47713825a3f9
children
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include <asm/hvm/iommu.h>
28 #include <xen/numa.h>
29 #include <xen/softirq.h>
30 #include <xen/time.h>
31 #include <xen/pci.h>
32 #include <xen/pci_regs.h>
33 #include <xen/keyhandler.h>
34 #include <asm/msi.h>
35 #include <asm/irq.h>
36 #include <asm/hvm/vmx/vmx.h>
37 #include <asm/p2m.h>
38 #include <mach_apic.h>
39 #include "iommu.h"
40 #include "dmar.h"
41 #include "extern.h"
42 #include "vtd.h"
44 #ifdef __ia64__
45 #define nr_ioapics iosapic_get_nr_iosapics()
46 #endif
48 static int sharept = 0;
49 boolean_param("sharept", sharept);
51 int nr_iommus;
53 static void setup_dom0_devices(struct domain *d);
54 static void setup_dom0_rmrr(struct domain *d);
56 static int domain_iommu_domid(struct domain *d,
57 struct iommu *iommu)
58 {
59 unsigned long nr_dom, i;
61 nr_dom = cap_ndoms(iommu->cap);
62 i = find_first_bit(iommu->domid_bitmap, nr_dom);
63 while ( i < nr_dom )
64 {
65 if ( iommu->domid_map[i] == d->domain_id )
66 return i;
68 i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
69 }
71 dprintk(XENLOG_ERR VTDPREFIX,
72 "Cannot get valid iommu domid: domid=%d iommu->index=%d\n",
73 d->domain_id, iommu->index);
74 return -1;
75 }
77 #define DID_FIELD_WIDTH 16
78 #define DID_HIGH_OFFSET 8
79 static int context_set_domain_id(struct context_entry *context,
80 struct domain *d,
81 struct iommu *iommu)
82 {
83 unsigned long nr_dom, i;
84 int found = 0;
86 ASSERT(spin_is_locked(&iommu->lock));
88 nr_dom = cap_ndoms(iommu->cap);
89 i = find_first_bit(iommu->domid_bitmap, nr_dom);
90 while ( i < nr_dom )
91 {
92 if ( iommu->domid_map[i] == d->domain_id )
93 {
94 found = 1;
95 break;
96 }
97 i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
98 }
100 if ( found == 0 )
101 {
102 i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
103 if ( i >= nr_dom )
104 {
105 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
106 return -EFAULT;
107 }
108 iommu->domid_map[i] = d->domain_id;
109 }
111 set_bit(i, iommu->domid_bitmap);
112 context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
113 return 0;
114 }
116 static struct intel_iommu *__init alloc_intel_iommu(void)
117 {
118 struct intel_iommu *intel;
120 intel = xmalloc(struct intel_iommu);
121 if ( intel == NULL )
122 return NULL;
123 memset(intel, 0, sizeof(struct intel_iommu));
125 spin_lock_init(&intel->qi_ctrl.qinval_lock);
126 spin_lock_init(&intel->ir_ctrl.iremap_lock);
128 return intel;
129 }
131 static void __init free_intel_iommu(struct intel_iommu *intel)
132 {
133 xfree(intel);
134 }
136 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
137 {
138 return iommu ? &iommu->intel->qi_ctrl : NULL;
139 }
141 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
142 {
143 return iommu ? &iommu->intel->ir_ctrl : NULL;
144 }
146 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
147 {
148 return iommu ? &iommu->intel->flush : NULL;
149 }
151 static int iommus_incoherent;
152 static void __iommu_flush_cache(void *addr, unsigned int size)
153 {
154 int i;
155 static unsigned int clflush_size = 0;
157 if ( !iommus_incoherent )
158 return;
160 if ( clflush_size == 0 )
161 clflush_size = get_cache_line_size();
163 for ( i = 0; i < size; i += clflush_size )
164 cacheline_flush((char *)addr + i);
165 }
167 void iommu_flush_cache_entry(void *addr, unsigned int size)
168 {
169 __iommu_flush_cache(addr, size);
170 }
172 void iommu_flush_cache_page(void *addr, unsigned long npages)
173 {
174 __iommu_flush_cache(addr, PAGE_SIZE * npages);
175 }
177 /* Allocate page table, return its machine address */
178 u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
179 {
180 struct acpi_rhsa_unit *rhsa;
181 struct page_info *pg, *cur_pg;
182 u64 *vaddr;
183 int node = -1, i;
185 rhsa = drhd_to_rhsa(drhd);
186 if ( rhsa )
187 node = pxm_to_node(rhsa->proximity_domain);
189 pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
190 (node == -1 ) ? 0 : MEMF_node(node));
191 if ( !pg )
192 return 0;
194 cur_pg = pg;
195 for ( i = 0; i < npages; i++ )
196 {
197 vaddr = __map_domain_page(cur_pg);
198 memset(vaddr, 0, PAGE_SIZE);
200 iommu_flush_cache_page(vaddr, 1);
201 unmap_domain_page(vaddr);
202 cur_pg++;
203 }
205 return page_to_maddr(pg);
206 }
208 void free_pgtable_maddr(u64 maddr)
209 {
210 if ( maddr != 0 )
211 free_domheap_page(maddr_to_page(maddr));
212 }
214 /* context entry handling */
215 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
216 {
217 struct acpi_drhd_unit *drhd;
218 struct root_entry *root, *root_entries;
219 u64 maddr;
221 ASSERT(spin_is_locked(&iommu->lock));
222 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
223 root = &root_entries[bus];
224 if ( !root_present(*root) )
225 {
226 drhd = iommu_to_drhd(iommu);
227 maddr = alloc_pgtable_maddr(drhd, 1);
228 if ( maddr == 0 )
229 {
230 unmap_vtd_domain_page(root_entries);
231 return 0;
232 }
233 set_root_value(*root, maddr);
234 set_root_present(*root);
235 iommu_flush_cache_entry(root, sizeof(struct root_entry));
236 }
237 maddr = (u64) get_context_addr(*root);
238 unmap_vtd_domain_page(root_entries);
239 return maddr;
240 }
242 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
243 {
244 struct acpi_drhd_unit *drhd;
245 struct pci_dev *pdev;
246 struct hvm_iommu *hd = domain_hvm_iommu(domain);
247 int addr_width = agaw_to_width(hd->agaw);
248 struct dma_pte *parent, *pte = NULL;
249 int level = agaw_to_level(hd->agaw);
250 int offset;
251 u64 pte_maddr = 0, maddr;
252 u64 *vaddr = NULL;
254 addr &= (((u64)1) << addr_width) - 1;
255 ASSERT(spin_is_locked(&hd->mapping_lock));
256 if ( hd->pgd_maddr == 0 )
257 {
258 /*
259 * just get any passthrough device in the domainr - assume user
260 * assigns only devices from same node to a given guest.
261 */
262 pdev = pci_get_pdev_by_domain(domain, -1, -1);
263 drhd = acpi_find_matched_drhd_unit(pdev);
264 if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) )
265 goto out;
266 }
268 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
269 while ( level > 1 )
270 {
271 offset = address_level_offset(addr, level);
272 pte = &parent[offset];
274 if ( dma_pte_addr(*pte) == 0 )
275 {
276 if ( !alloc )
277 break;
279 pdev = pci_get_pdev_by_domain(domain, -1, -1);
280 drhd = acpi_find_matched_drhd_unit(pdev);
281 maddr = alloc_pgtable_maddr(drhd, 1);
282 if ( !maddr )
283 break;
285 dma_set_pte_addr(*pte, maddr);
286 vaddr = map_vtd_domain_page(maddr);
288 /*
289 * high level table always sets r/w, last level
290 * page table control read/write
291 */
292 dma_set_pte_readable(*pte);
293 dma_set_pte_writable(*pte);
294 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
295 }
296 else
297 {
298 vaddr = map_vtd_domain_page(pte->val);
299 }
301 if ( level == 2 )
302 {
303 pte_maddr = pte->val & PAGE_MASK_4K;
304 unmap_vtd_domain_page(vaddr);
305 break;
306 }
308 unmap_vtd_domain_page(parent);
309 parent = (struct dma_pte *)vaddr;
310 vaddr = NULL;
311 level--;
312 }
314 unmap_vtd_domain_page(parent);
315 out:
316 return pte_maddr;
317 }
319 static void iommu_flush_write_buffer(struct iommu *iommu)
320 {
321 u32 val;
322 unsigned long flags;
324 if ( !rwbf_quirk && !cap_rwbf(iommu->cap) )
325 return;
327 spin_lock_irqsave(&iommu->register_lock, flags);
328 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
329 dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
331 /* Make sure hardware complete it */
332 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
333 !(val & DMA_GSTS_WBFS), val);
335 spin_unlock_irqrestore(&iommu->register_lock, flags);
336 }
338 /* return value determine if we need a write buffer flush */
339 static int flush_context_reg(
340 void *_iommu,
341 u16 did, u16 source_id, u8 function_mask, u64 type,
342 int flush_non_present_entry)
343 {
344 struct iommu *iommu = (struct iommu *) _iommu;
345 u64 val = 0;
346 unsigned long flags;
348 /*
349 * In the non-present entry flush case, if hardware doesn't cache
350 * non-present entry we do nothing and if hardware cache non-present
351 * entry, we flush entries of domain 0 (the domain id is used to cache
352 * any non-present entries)
353 */
354 if ( flush_non_present_entry )
355 {
356 if ( !cap_caching_mode(iommu->cap) )
357 return 1;
358 else
359 did = 0;
360 }
362 /* use register invalidation */
363 switch ( type )
364 {
365 case DMA_CCMD_GLOBAL_INVL:
366 val = DMA_CCMD_GLOBAL_INVL;
367 break;
368 case DMA_CCMD_DOMAIN_INVL:
369 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
370 break;
371 case DMA_CCMD_DEVICE_INVL:
372 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
373 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
374 break;
375 default:
376 BUG();
377 }
378 val |= DMA_CCMD_ICC;
380 spin_lock_irqsave(&iommu->register_lock, flags);
381 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
383 /* Make sure hardware complete it */
384 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
385 !(val & DMA_CCMD_ICC), val);
387 spin_unlock_irqrestore(&iommu->register_lock, flags);
388 /* flush context entry will implicitly flush write buffer */
389 return 0;
390 }
392 static int iommu_flush_context_global(
393 struct iommu *iommu, int flush_non_present_entry)
394 {
395 struct iommu_flush *flush = iommu_get_flush(iommu);
396 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
397 flush_non_present_entry);
398 }
400 static int iommu_flush_context_device(
401 struct iommu *iommu, u16 did, u16 source_id,
402 u8 function_mask, int flush_non_present_entry)
403 {
404 struct iommu_flush *flush = iommu_get_flush(iommu);
405 return flush->context(iommu, did, source_id, function_mask,
406 DMA_CCMD_DEVICE_INVL,
407 flush_non_present_entry);
408 }
410 /* return value determine if we need a write buffer flush */
411 static int flush_iotlb_reg(void *_iommu, u16 did,
412 u64 addr, unsigned int size_order, u64 type,
413 int flush_non_present_entry, int flush_dev_iotlb)
414 {
415 struct iommu *iommu = (struct iommu *) _iommu;
416 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
417 u64 val = 0, val_iva = 0;
418 unsigned long flags;
420 /*
421 * In the non-present entry flush case, if hardware doesn't cache
422 * non-present entry we do nothing and if hardware cache non-present
423 * entry, we flush entries of domain 0 (the domain id is used to cache
424 * any non-present entries)
425 */
426 if ( flush_non_present_entry )
427 {
428 if ( !cap_caching_mode(iommu->cap) )
429 return 1;
430 else
431 did = 0;
432 }
434 /* use register invalidation */
435 switch ( type )
436 {
437 case DMA_TLB_GLOBAL_FLUSH:
438 /* global flush doesn't need set IVA_REG */
439 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
440 break;
441 case DMA_TLB_DSI_FLUSH:
442 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
443 break;
444 case DMA_TLB_PSI_FLUSH:
445 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
446 /* Note: always flush non-leaf currently */
447 val_iva = size_order | addr;
448 break;
449 default:
450 BUG();
451 }
452 /* Note: set drain read/write */
453 if ( cap_read_drain(iommu->cap) )
454 val |= DMA_TLB_READ_DRAIN;
455 if ( cap_write_drain(iommu->cap) )
456 val |= DMA_TLB_WRITE_DRAIN;
458 spin_lock_irqsave(&iommu->register_lock, flags);
459 /* Note: Only uses first TLB reg currently */
460 if ( val_iva )
461 dmar_writeq(iommu->reg, tlb_offset, val_iva);
462 dmar_writeq(iommu->reg, tlb_offset + 8, val);
464 /* Make sure hardware complete it */
465 IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
466 !(val & DMA_TLB_IVT), val);
467 spin_unlock_irqrestore(&iommu->register_lock, flags);
469 /* check IOTLB invalidation granularity */
470 if ( DMA_TLB_IAIG(val) == 0 )
471 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
473 /* flush iotlb entry will implicitly flush write buffer */
474 return 0;
475 }
477 static int iommu_flush_iotlb_global(struct iommu *iommu,
478 int flush_non_present_entry, int flush_dev_iotlb)
479 {
480 struct iommu_flush *flush = iommu_get_flush(iommu);
481 int status;
483 /* apply platform specific errata workarounds */
484 vtd_ops_preamble_quirk(iommu);
486 status = flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
487 flush_non_present_entry, flush_dev_iotlb);
489 /* undo platform specific errata workarounds */
490 vtd_ops_postamble_quirk(iommu);
492 return status;
493 }
495 static int iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
496 int flush_non_present_entry, int flush_dev_iotlb)
497 {
498 struct iommu_flush *flush = iommu_get_flush(iommu);
499 int status;
501 /* apply platform specific errata workarounds */
502 vtd_ops_preamble_quirk(iommu);
504 status = flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
505 flush_non_present_entry, flush_dev_iotlb);
507 /* undo platform specific errata workarounds */
508 vtd_ops_postamble_quirk(iommu);
510 return status;
511 }
513 static int iommu_flush_iotlb_psi(
514 struct iommu *iommu, u16 did, u64 addr, unsigned int order,
515 int flush_non_present_entry, int flush_dev_iotlb)
516 {
517 struct iommu_flush *flush = iommu_get_flush(iommu);
518 int status;
520 ASSERT(!(addr & (~PAGE_MASK_4K)));
522 /* Fallback to domain selective flush if no PSI support */
523 if ( !cap_pgsel_inv(iommu->cap) )
524 return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry, flush_dev_iotlb);
526 /* Fallback to domain selective flush if size is too big */
527 if ( order > cap_max_amask_val(iommu->cap) )
528 return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry, flush_dev_iotlb);
530 addr >>= PAGE_SHIFT_4K + order;
531 addr <<= PAGE_SHIFT_4K + order;
533 /* apply platform specific errata workarounds */
534 vtd_ops_preamble_quirk(iommu);
536 status = flush->iotlb(iommu, did, addr, order, DMA_TLB_PSI_FLUSH,
537 flush_non_present_entry, flush_dev_iotlb);
539 /* undo platform specific errata workarounds */
540 vtd_ops_postamble_quirk(iommu);
542 return status;
543 }
545 static void iommu_flush_all(void)
546 {
547 struct acpi_drhd_unit *drhd;
548 struct iommu *iommu;
549 int flush_dev_iotlb;
551 flush_all_cache();
552 for_each_drhd_unit ( drhd )
553 {
554 iommu = drhd->iommu;
555 iommu_flush_context_global(iommu, 0);
556 flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
557 iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
558 }
559 }
561 /* clear one page's page table */
562 static void dma_pte_clear_one(struct domain *domain, u64 addr)
563 {
564 struct hvm_iommu *hd = domain_hvm_iommu(domain);
565 struct acpi_drhd_unit *drhd;
566 struct iommu *iommu;
567 struct dma_pte *page = NULL, *pte = NULL;
568 u64 pg_maddr;
569 int flush_dev_iotlb;
570 int iommu_domid;
571 struct list_head *rmrr_list, *tmp;
572 struct mapped_rmrr *mrmrr;
574 spin_lock(&hd->mapping_lock);
575 /* get last level pte */
576 pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
577 if ( pg_maddr == 0 )
578 {
579 spin_unlock(&hd->mapping_lock);
580 return;
581 }
583 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
584 pte = page + address_level_offset(addr, 1);
586 if ( !dma_pte_present(*pte) )
587 {
588 spin_unlock(&hd->mapping_lock);
589 unmap_vtd_domain_page(page);
590 return;
591 }
593 dma_clear_pte(*pte);
594 spin_unlock(&hd->mapping_lock);
595 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
597 /* No need pcidevs_lock here since do that on assign/deassign device*/
598 for_each_drhd_unit ( drhd )
599 {
600 iommu = drhd->iommu;
601 if ( test_bit(iommu->index, &hd->iommu_bitmap) )
602 {
603 flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
604 iommu_domid= domain_iommu_domid(domain, iommu);
605 if ( iommu_domid == -1 )
606 continue;
607 if ( iommu_flush_iotlb_psi(iommu, iommu_domid, addr,
608 0, 0, flush_dev_iotlb) )
609 iommu_flush_write_buffer(iommu);
610 }
611 }
613 unmap_vtd_domain_page(page);
615 /* if the cleared address is between mapped RMRR region,
616 * remove the mapped RMRR
617 */
618 spin_lock(&pcidevs_lock);
619 list_for_each_safe ( rmrr_list, tmp, &hd->mapped_rmrrs )
620 {
621 mrmrr = list_entry(rmrr_list, struct mapped_rmrr, list);
622 if ( addr >= mrmrr->base && addr <= mrmrr->end )
623 {
624 list_del(&mrmrr->list);
625 xfree(mrmrr);
626 break;
627 }
628 }
629 spin_unlock(&pcidevs_lock);
630 }
632 static void iommu_free_pagetable(u64 pt_maddr, int level)
633 {
634 int i;
635 struct dma_pte *pt_vaddr, *pte;
636 int next_level = level - 1;
638 if ( pt_maddr == 0 )
639 return;
641 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
643 for ( i = 0; i < PTE_NUM; i++ )
644 {
645 pte = &pt_vaddr[i];
646 if ( !dma_pte_present(*pte) )
647 continue;
649 if ( next_level >= 1 )
650 iommu_free_pagetable(dma_pte_addr(*pte), next_level);
652 dma_clear_pte(*pte);
653 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
654 }
656 unmap_vtd_domain_page(pt_vaddr);
657 free_pgtable_maddr(pt_maddr);
658 }
660 static int iommu_set_root_entry(struct iommu *iommu)
661 {
662 struct acpi_drhd_unit *drhd;
663 u32 sts;
664 unsigned long flags;
666 spin_lock(&iommu->lock);
668 if ( iommu->root_maddr == 0 )
669 {
670 drhd = iommu_to_drhd(iommu);
671 iommu->root_maddr = alloc_pgtable_maddr(drhd, 1);
672 }
674 if ( iommu->root_maddr == 0 )
675 {
676 spin_unlock(&iommu->lock);
677 return -ENOMEM;
678 }
680 spin_unlock(&iommu->lock);
681 spin_lock_irqsave(&iommu->register_lock, flags);
682 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
684 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
685 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_SRTP);
687 /* Make sure hardware complete it */
688 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
689 (sts & DMA_GSTS_RTPS), sts);
690 spin_unlock_irqrestore(&iommu->register_lock, flags);
692 return 0;
693 }
695 static void iommu_enable_translation(struct acpi_drhd_unit *drhd)
696 {
697 u32 sts;
698 unsigned long flags;
699 struct iommu *iommu = drhd->iommu;
701 if ( is_igd_drhd(drhd) && !is_igd_vt_enabled_quirk() )
702 {
703 if ( force_iommu )
704 panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n");
705 else
706 {
707 dprintk(XENLOG_WARNING VTDPREFIX,
708 "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n");
709 return;
710 }
711 }
713 /* apply platform specific errata workarounds */
714 vtd_ops_preamble_quirk(iommu);
716 if ( iommu_verbose )
717 dprintk(VTDPREFIX,
718 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
719 spin_lock_irqsave(&iommu->register_lock, flags);
720 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
721 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_TE);
723 /* Make sure hardware complete it */
724 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
725 (sts & DMA_GSTS_TES), sts);
726 spin_unlock_irqrestore(&iommu->register_lock, flags);
728 /* undo platform specific errata workarounds */
729 vtd_ops_postamble_quirk(iommu);
731 /* Disable PMRs when VT-d engine takes effect per spec definition */
732 disable_pmr(iommu);
733 }
735 static void iommu_disable_translation(struct iommu *iommu)
736 {
737 u32 sts;
738 unsigned long flags;
740 /* apply platform specific errata workarounds */
741 vtd_ops_preamble_quirk(iommu);
743 spin_lock_irqsave(&iommu->register_lock, flags);
744 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
745 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_TE));
747 /* Make sure hardware complete it */
748 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
749 !(sts & DMA_GSTS_TES), sts);
750 spin_unlock_irqrestore(&iommu->register_lock, flags);
752 /* undo platform specific errata workarounds */
753 vtd_ops_postamble_quirk(iommu);
754 }
756 enum faulttype {
757 DMA_REMAP,
758 INTR_REMAP,
759 UNKNOWN,
760 };
762 static const char *dma_remap_fault_reasons[] =
763 {
764 "Software",
765 "Present bit in root entry is clear",
766 "Present bit in context entry is clear",
767 "Invalid context entry",
768 "Access beyond MGAW",
769 "PTE Write access is not set",
770 "PTE Read access is not set",
771 "Next page table ptr is invalid",
772 "Root table address invalid",
773 "Context table ptr is invalid",
774 "non-zero reserved fields in RTP",
775 "non-zero reserved fields in CTP",
776 "non-zero reserved fields in PTE",
777 "Blocked a DMA translation request",
778 };
780 static const char *intr_remap_fault_reasons[] =
781 {
782 "Detected reserved fields in the decoded interrupt-remapped request",
783 "Interrupt index exceeded the interrupt-remapping table size",
784 "Present field in the IRTE entry is clear",
785 "Error accessing interrupt-remapping table pointed by IRTA_REG",
786 "Detected reserved fields in the IRTE entry",
787 "Blocked a compatibility format interrupt request",
788 "Blocked an interrupt request due to source-id verification failure",
789 };
791 static const char *iommu_get_fault_reason(u8 fault_reason, int *fault_type)
792 {
793 if ( fault_reason >= 0x20 && ( fault_reason < 0x20 +
794 ARRAY_SIZE(intr_remap_fault_reasons)) )
795 {
796 *fault_type = INTR_REMAP;
797 return intr_remap_fault_reasons[fault_reason - 0x20];
798 }
799 else if ( fault_reason < ARRAY_SIZE(dma_remap_fault_reasons) )
800 {
801 *fault_type = DMA_REMAP;
802 return dma_remap_fault_reasons[fault_reason];
803 }
804 else
805 {
806 *fault_type = UNKNOWN;
807 return "Unknown";
808 }
809 }
811 static struct iommu **irq_to_iommu;
812 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
813 u8 fault_reason, u16 source_id, u64 addr)
814 {
815 const char *reason;
816 int fault_type;
817 reason = iommu_get_fault_reason(fault_reason, &fault_type);
819 if ( fault_type == DMA_REMAP )
820 {
821 dprintk(XENLOG_WARNING VTDPREFIX,
822 "DMAR:[%s] Request device [%02x:%02x.%d] "
823 "fault addr %"PRIx64", iommu reg = %p\n"
824 "DMAR:[fault reason %02xh] %s\n",
825 (type ? "DMA Read" : "DMA Write"),
826 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
827 PCI_FUNC(source_id & 0xFF), addr, iommu->reg,
828 fault_reason, reason);
829 #ifndef __i386__ /* map_domain_page() cannot be used in this context */
830 print_vtd_entries(iommu, (source_id >> 8),
831 (source_id & 0xff), (addr >> PAGE_SHIFT));
832 #endif
833 }
834 else
835 dprintk(XENLOG_WARNING VTDPREFIX,
836 "INTR-REMAP: Request device [%02x:%02x.%d] "
837 "fault index %"PRIx64", iommu reg = %p\n"
838 "INTR-REMAP:[fault reason %02xh] %s\n",
839 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
840 PCI_FUNC(source_id & 0xFF), addr >> 48, iommu->reg,
841 fault_reason, reason);
842 return 0;
844 }
846 static void iommu_fault_status(u32 fault_status)
847 {
848 if ( fault_status & DMA_FSTS_PFO )
849 dprintk(XENLOG_ERR VTDPREFIX,
850 "iommu_fault_status: Fault Overflow\n");
851 if ( fault_status & DMA_FSTS_PPF )
852 dprintk(XENLOG_ERR VTDPREFIX,
853 "iommu_fault_status: Primary Pending Fault\n");
854 if ( fault_status & DMA_FSTS_AFO )
855 dprintk(XENLOG_ERR VTDPREFIX,
856 "iommu_fault_status: Advanced Fault Overflow\n");
857 if ( fault_status & DMA_FSTS_APF )
858 dprintk(XENLOG_ERR VTDPREFIX,
859 "iommu_fault_status: Advanced Pending Fault\n");
860 if ( fault_status & DMA_FSTS_IQE )
861 dprintk(XENLOG_ERR VTDPREFIX,
862 "iommu_fault_status: Invalidation Queue Error\n");
863 if ( fault_status & DMA_FSTS_ICE )
864 dprintk(XENLOG_ERR VTDPREFIX,
865 "iommu_fault_status: Invalidation Completion Error\n");
866 if ( fault_status & DMA_FSTS_ITE )
867 dprintk(XENLOG_ERR VTDPREFIX,
868 "iommu_fault_status: Invalidation Time-out Error\n");
869 }
871 #define PRIMARY_FAULT_REG_LEN (16)
872 static void iommu_page_fault(int irq, void *dev_id,
873 struct cpu_user_regs *regs)
874 {
875 struct iommu *iommu = dev_id;
876 int reg, fault_index;
877 u32 fault_status;
878 unsigned long flags;
880 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
882 iommu_fault_status(fault_status);
884 /* FIXME: ignore advanced fault log */
885 if ( !(fault_status & DMA_FSTS_PPF) )
886 goto clear_overflow;
888 fault_index = dma_fsts_fault_record_index(fault_status);
889 reg = cap_fault_reg_offset(iommu->cap);
890 while (1)
891 {
892 u8 fault_reason;
893 u16 source_id;
894 u32 data;
895 u64 guest_addr;
896 int type;
898 /* highest 32 bits */
899 spin_lock_irqsave(&iommu->register_lock, flags);
900 data = dmar_readl(iommu->reg, reg +
901 fault_index * PRIMARY_FAULT_REG_LEN + 12);
902 if ( !(data & DMA_FRCD_F) )
903 {
904 spin_unlock_irqrestore(&iommu->register_lock, flags);
905 break;
906 }
908 fault_reason = dma_frcd_fault_reason(data);
909 type = dma_frcd_type(data);
911 data = dmar_readl(iommu->reg, reg +
912 fault_index * PRIMARY_FAULT_REG_LEN + 8);
913 source_id = dma_frcd_source_id(data);
915 guest_addr = dmar_readq(iommu->reg, reg +
916 fault_index * PRIMARY_FAULT_REG_LEN);
917 guest_addr = dma_frcd_page_addr(guest_addr);
918 /* clear the fault */
919 dmar_writel(iommu->reg, reg +
920 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
921 spin_unlock_irqrestore(&iommu->register_lock, flags);
923 iommu_page_fault_do_one(iommu, type, fault_reason,
924 source_id, guest_addr);
926 fault_index++;
927 if ( fault_index > cap_num_fault_regs(iommu->cap) )
928 fault_index = 0;
929 }
930 clear_overflow:
931 /* clear primary fault overflow */
932 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
933 if ( fault_status & DMA_FSTS_PFO )
934 {
935 spin_lock_irqsave(&iommu->register_lock, flags);
936 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
937 spin_unlock_irqrestore(&iommu->register_lock, flags);
938 }
939 }
941 static void dma_msi_unmask(unsigned int irq)
942 {
943 struct iommu *iommu = irq_to_iommu[irq];
944 unsigned long flags;
946 /* unmask it */
947 spin_lock_irqsave(&iommu->register_lock, flags);
948 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
949 spin_unlock_irqrestore(&iommu->register_lock, flags);
950 }
952 static void dma_msi_mask(unsigned int irq)
953 {
954 unsigned long flags;
955 struct iommu *iommu = irq_to_iommu[irq];
956 struct irq_desc *desc = irq_to_desc(irq);
958 irq_complete_move(&desc);
960 /* mask it */
961 spin_lock_irqsave(&iommu->register_lock, flags);
962 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
963 spin_unlock_irqrestore(&iommu->register_lock, flags);
964 }
966 static unsigned int dma_msi_startup(unsigned int irq)
967 {
968 dma_msi_unmask(irq);
969 return 0;
970 }
972 static void dma_msi_end(unsigned int irq)
973 {
974 dma_msi_unmask(irq);
975 ack_APIC_irq();
976 }
978 static void dma_msi_set_affinity(unsigned int irq, cpumask_t mask)
979 {
980 struct msi_msg msg;
981 unsigned int dest;
982 unsigned long flags;
984 struct iommu *iommu = irq_to_iommu[irq];
985 struct irq_desc *desc = irq_to_desc(irq);
986 struct irq_cfg *cfg = desc->chip_data;
988 #ifdef CONFIG_X86
989 dest = set_desc_affinity(desc, &mask);
990 if (dest == BAD_APICID){
991 dprintk(XENLOG_ERR VTDPREFIX, "Set iommu interrupt affinity error!\n");
992 return;
993 }
995 memset(&msg, 0, sizeof(msg));
996 msg.data = MSI_DATA_VECTOR(cfg->vector) & 0xff;
997 msg.data |= 1 << 14;
998 msg.data |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
999 MSI_DATA_DELIVERY_FIXED:
1000 MSI_DATA_DELIVERY_LOWPRI;
1002 /* Follow MSI setting */
1003 if (x2apic_enabled)
1004 msg.address_hi = dest & 0xFFFFFF00;
1005 msg.address_lo = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
1006 msg.address_lo |= INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC:
1007 MSI_ADDR_DESTMODE_PHYS;
1008 msg.address_lo |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
1009 MSI_ADDR_REDIRECTION_CPU:
1010 MSI_ADDR_REDIRECTION_LOWPRI;
1011 msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
1012 #else
1013 memset(&msg, 0, sizeof(msg));
1014 msg.data = cfg->vector & 0xff;
1015 msg.data |= 1 << 14;
1016 msg.address_lo = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
1017 msg.address_lo |= MSI_PHYSICAL_MODE << 2;
1018 msg.address_lo |= MSI_REDIRECTION_HINT_MODE << 3;
1019 dest = cpu_physical_id(first_cpu(mask));
1020 msg.address_lo |= dest << MSI_TARGET_CPU_SHIFT;
1021 #endif
1023 spin_lock_irqsave(&iommu->register_lock, flags);
1024 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg.data);
1025 dmar_writel(iommu->reg, DMAR_FEADDR_REG, msg.address_lo);
1026 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, msg.address_hi);
1027 spin_unlock_irqrestore(&iommu->register_lock, flags);
1030 static hw_irq_controller dma_msi_type = {
1031 .typename = "DMA_MSI",
1032 .startup = dma_msi_startup,
1033 .shutdown = dma_msi_mask,
1034 .enable = dma_msi_unmask,
1035 .disable = dma_msi_mask,
1036 .ack = dma_msi_mask,
1037 .end = dma_msi_end,
1038 .set_affinity = dma_msi_set_affinity,
1039 };
1041 static int iommu_set_interrupt(struct iommu *iommu)
1043 int irq, ret;
1045 irq = create_irq();
1046 if ( irq <= 0 )
1048 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no irq available!\n");
1049 return -EINVAL;
1052 irq_desc[irq].handler = &dma_msi_type;
1053 irq_to_iommu[irq] = iommu;
1054 #ifdef CONFIG_X86
1055 ret = request_irq(irq, iommu_page_fault, 0, "dmar", iommu);
1056 #else
1057 ret = request_irq_vector(irq, iommu_page_fault, 0, "dmar", iommu);
1058 #endif
1059 if ( ret )
1061 irq_desc[irq].handler = &no_irq_type;
1062 irq_to_iommu[irq] = NULL;
1063 destroy_irq(irq);
1064 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
1065 return ret;
1068 return irq;
1071 int __init iommu_alloc(struct acpi_drhd_unit *drhd)
1073 struct iommu *iommu;
1074 unsigned long sagaw, nr_dom;
1075 int agaw;
1077 if ( nr_iommus > MAX_IOMMUS )
1079 dprintk(XENLOG_ERR VTDPREFIX,
1080 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
1081 return -ENOMEM;
1084 iommu = xmalloc(struct iommu);
1085 if ( iommu == NULL )
1086 return -ENOMEM;
1087 memset(iommu, 0, sizeof(struct iommu));
1089 iommu->irq = -1; /* No irq assigned yet. */
1091 iommu->intel = alloc_intel_iommu();
1092 if ( iommu->intel == NULL )
1094 xfree(iommu);
1095 return -ENOMEM;
1097 iommu->intel->drhd = drhd;
1099 iommu->reg = map_to_nocache_virt(nr_iommus, drhd->address);
1100 iommu->index = nr_iommus++;
1102 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1103 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1105 drhd->iommu = iommu;
1107 if ( iommu_verbose )
1109 dprintk(VTDPREFIX,
1110 "drhd->address = %"PRIx64" iommu->reg = %p\n",
1111 drhd->address, iommu->reg);
1112 dprintk(VTDPREFIX,
1113 "cap = %"PRIx64" ecap = %"PRIx64"\n", iommu->cap, iommu->ecap);
1115 if ( cap_fault_reg_offset(iommu->cap) +
1116 cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE ||
1117 ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE )
1119 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: unsupported\n");
1120 print_iommu_regs(drhd);
1121 return -ENODEV;
1124 /* Calculate number of pagetable levels: between 2 and 4. */
1125 sagaw = cap_sagaw(iommu->cap);
1126 for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
1127 if ( test_bit(agaw, &sagaw) )
1128 break;
1129 if ( agaw < 0 )
1131 dprintk(XENLOG_ERR VTDPREFIX,
1132 "IOMMU: unsupported sagaw %lx\n", sagaw);
1133 print_iommu_regs(drhd);
1134 return -ENODEV;
1136 iommu->nr_pt_levels = agaw_to_level(agaw);
1138 if ( !ecap_coherent(iommu->ecap) )
1139 iommus_incoherent = 1;
1141 /* allocate domain id bitmap */
1142 nr_dom = cap_ndoms(iommu->cap);
1143 iommu->domid_bitmap = xmalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
1144 if ( !iommu->domid_bitmap )
1145 return -ENOMEM ;
1146 memset(iommu->domid_bitmap, 0, nr_dom / 8);
1148 /*
1149 * if Caching mode is set, then invalid translations are tagged with
1150 * domain id 0, Hence reserve bit 0 for it
1151 */
1152 if ( cap_caching_mode(iommu->cap) )
1153 set_bit(0, iommu->domid_bitmap);
1155 iommu->domid_map = xmalloc_array(u16, nr_dom);
1156 if ( !iommu->domid_map )
1157 return -ENOMEM ;
1158 memset(iommu->domid_map, 0, nr_dom * sizeof(*iommu->domid_map));
1160 spin_lock_init(&iommu->lock);
1161 spin_lock_init(&iommu->register_lock);
1163 return 0;
1166 void __init iommu_free(struct acpi_drhd_unit *drhd)
1168 struct iommu *iommu = drhd->iommu;
1170 if ( iommu == NULL )
1171 return;
1173 drhd->iommu = NULL;
1175 if ( iommu->root_maddr != 0 )
1177 free_pgtable_maddr(iommu->root_maddr);
1178 iommu->root_maddr = 0;
1181 if ( iommu->reg )
1182 iounmap(iommu->reg);
1184 xfree(iommu->domid_bitmap);
1185 xfree(iommu->domid_map);
1187 free_intel_iommu(iommu->intel);
1188 if ( iommu->irq >= 0 )
1189 destroy_irq(iommu->irq);
1190 xfree(iommu);
1193 #define guestwidth_to_adjustwidth(gaw) ({ \
1194 int agaw, r = (gaw - 12) % 9; \
1195 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1196 if ( agaw > 64 ) \
1197 agaw = 64; \
1198 agaw; })
1200 static int intel_iommu_domain_init(struct domain *d)
1202 struct hvm_iommu *hd = domain_hvm_iommu(d);
1204 hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
1206 return 0;
1209 static void __init intel_iommu_dom0_init(struct domain *d)
1211 struct acpi_drhd_unit *drhd;
1213 if ( !iommu_passthrough && !need_iommu(d) )
1215 /* Set up 1:1 page table for dom0 */
1216 iommu_set_dom0_mapping(d);
1219 setup_dom0_devices(d);
1220 setup_dom0_rmrr(d);
1222 iommu_flush_all();
1224 for_each_drhd_unit ( drhd )
1226 iommu_enable_translation(drhd);
1230 int domain_context_mapping_one(
1231 struct domain *domain,
1232 struct iommu *iommu,
1233 u8 bus, u8 devfn)
1235 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1236 struct context_entry *context, *context_entries;
1237 u64 maddr, pgd_maddr;
1238 struct pci_dev *pdev = NULL;
1239 int agaw;
1241 ASSERT(spin_is_locked(&pcidevs_lock));
1242 spin_lock(&iommu->lock);
1243 maddr = bus_to_context_maddr(iommu, bus);
1244 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1245 context = &context_entries[devfn];
1247 if ( context_present(*context) )
1249 int res = 0;
1251 pdev = pci_get_pdev(bus, devfn);
1252 if (!pdev)
1253 res = -ENODEV;
1254 else if (pdev->domain != domain)
1255 res = -EINVAL;
1256 unmap_vtd_domain_page(context_entries);
1257 spin_unlock(&iommu->lock);
1258 return res;
1261 if ( iommu_passthrough && (domain->domain_id == 0) )
1263 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1264 agaw = level_to_agaw(iommu->nr_pt_levels);
1266 else
1268 spin_lock(&hd->mapping_lock);
1270 /* Ensure we have pagetables allocated down to leaf PTE. */
1271 if ( hd->pgd_maddr == 0 )
1273 addr_to_dma_page_maddr(domain, 0, 1);
1274 if ( hd->pgd_maddr == 0 )
1276 nomem:
1277 spin_unlock(&hd->mapping_lock);
1278 spin_unlock(&iommu->lock);
1279 unmap_vtd_domain_page(context_entries);
1280 return -ENOMEM;
1284 /* Skip top levels of page tables for 2- and 3-level DRHDs. */
1285 pgd_maddr = hd->pgd_maddr;
1286 for ( agaw = level_to_agaw(4);
1287 agaw != level_to_agaw(iommu->nr_pt_levels);
1288 agaw-- )
1290 struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
1291 pgd_maddr = dma_pte_addr(*p);
1292 unmap_vtd_domain_page(p);
1293 if ( pgd_maddr == 0 )
1294 goto nomem;
1297 context_set_address_root(*context, pgd_maddr);
1298 if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) )
1299 context_set_translation_type(*context, CONTEXT_TT_DEV_IOTLB);
1300 else
1301 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1303 spin_unlock(&hd->mapping_lock);
1306 if ( context_set_domain_id(context, domain, iommu) )
1308 spin_unlock(&iommu->lock);
1309 unmap_vtd_domain_page(context_entries);
1310 return -EFAULT;
1313 context_set_address_width(*context, agaw);
1314 context_set_fault_enable(*context);
1315 context_set_present(*context);
1316 iommu_flush_cache_entry(context, sizeof(struct context_entry));
1317 spin_unlock(&iommu->lock);
1319 /* Context entry was previously non-present (with domid 0). */
1320 if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
1321 DMA_CCMD_MASK_NOBIT, 1) )
1322 iommu_flush_write_buffer(iommu);
1323 else
1325 int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
1326 iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb);
1329 set_bit(iommu->index, &hd->iommu_bitmap);
1331 unmap_vtd_domain_page(context_entries);
1333 me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC);
1335 return 0;
1338 static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
1340 struct acpi_drhd_unit *drhd;
1341 int ret = 0;
1342 u32 type;
1343 u8 secbus;
1344 struct pci_dev *pdev = pci_get_pdev(bus, devfn);
1346 drhd = acpi_find_matched_drhd_unit(pdev);
1347 if ( !drhd )
1348 return -ENODEV;
1350 ASSERT(spin_is_locked(&pcidevs_lock));
1352 type = pdev_type(bus, devfn);
1353 switch ( type )
1355 case DEV_TYPE_PCIe_BRIDGE:
1356 case DEV_TYPE_PCIe2PCI_BRIDGE:
1357 case DEV_TYPE_LEGACY_PCI_BRIDGE:
1358 break;
1360 case DEV_TYPE_PCIe_ENDPOINT:
1361 if ( iommu_verbose )
1362 dprintk(VTDPREFIX, "d%d:PCIe: map bdf = %x:%x.%x\n",
1363 domain->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1364 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1365 if ( !ret && ats_device(0, bus, devfn) )
1366 enable_ats_device(0, bus, devfn);
1368 break;
1370 case DEV_TYPE_PCI:
1371 if ( iommu_verbose )
1372 dprintk(VTDPREFIX, "d%d:PCI: map bdf = %x:%x.%x\n",
1373 domain->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1375 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1376 if ( ret )
1377 break;
1379 if ( find_upstream_bridge(&bus, &devfn, &secbus) < 1 )
1380 break;
1382 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1384 /*
1385 * Devices behind PCIe-to-PCI/PCIx bridge may generate different
1386 * requester-id. It may originate from devfn=0 on the secondary bus
1387 * behind the bridge. Map that id as well if we didn't already.
1388 */
1389 if ( !ret && pdev_type(bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
1390 (secbus != pdev->bus || pdev->devfn != 0) )
1391 ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
1393 break;
1395 default:
1396 dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): bdf = %x:%x.%x\n",
1397 domain->domain_id, type,
1398 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1399 ret = -EINVAL;
1400 break;
1403 if ( iommu_verbose )
1404 process_pending_softirqs();
1406 return ret;
1409 int domain_context_unmap_one(
1410 struct domain *domain,
1411 struct iommu *iommu,
1412 u8 bus, u8 devfn)
1414 struct context_entry *context, *context_entries;
1415 u64 maddr;
1416 int iommu_domid;
1418 ASSERT(spin_is_locked(&pcidevs_lock));
1419 spin_lock(&iommu->lock);
1421 maddr = bus_to_context_maddr(iommu, bus);
1422 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1423 context = &context_entries[devfn];
1425 if ( !context_present(*context) )
1427 spin_unlock(&iommu->lock);
1428 unmap_vtd_domain_page(context_entries);
1429 return 0;
1432 context_clear_present(*context);
1433 context_clear_entry(*context);
1434 iommu_flush_cache_entry(context, sizeof(struct context_entry));
1436 iommu_domid= domain_iommu_domid(domain, iommu);
1437 if ( iommu_domid == -1 )
1439 spin_unlock(&iommu->lock);
1440 unmap_vtd_domain_page(context_entries);
1441 return -EINVAL;
1444 if ( iommu_flush_context_device(iommu, iommu_domid,
1445 (((u16)bus) << 8) | devfn,
1446 DMA_CCMD_MASK_NOBIT, 0) )
1447 iommu_flush_write_buffer(iommu);
1448 else
1450 int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
1451 iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb);
1454 spin_unlock(&iommu->lock);
1455 unmap_vtd_domain_page(context_entries);
1457 me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC);
1459 return 0;
1462 static int domain_context_unmap(struct domain *domain, u8 bus, u8 devfn)
1464 struct acpi_drhd_unit *drhd;
1465 struct iommu *iommu;
1466 int ret = 0;
1467 u32 type;
1468 u8 tmp_bus, tmp_devfn, secbus;
1469 struct pci_dev *pdev = pci_get_pdev(bus, devfn);
1470 int found = 0;
1472 BUG_ON(!pdev);
1474 drhd = acpi_find_matched_drhd_unit(pdev);
1475 if ( !drhd )
1476 return -ENODEV;
1477 iommu = drhd->iommu;
1479 type = pdev_type(bus, devfn);
1480 switch ( type )
1482 case DEV_TYPE_PCIe_BRIDGE:
1483 case DEV_TYPE_PCIe2PCI_BRIDGE:
1484 case DEV_TYPE_LEGACY_PCI_BRIDGE:
1485 goto out;
1487 case DEV_TYPE_PCIe_ENDPOINT:
1488 if ( iommu_verbose )
1489 dprintk(VTDPREFIX, "d%d:PCIe: unmap bdf = %x:%x.%x\n",
1490 domain->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1491 ret = domain_context_unmap_one(domain, iommu, bus, devfn);
1492 if ( !ret && ats_device(0, bus, devfn) )
1493 disable_ats_device(0, bus, devfn);
1495 break;
1497 case DEV_TYPE_PCI:
1498 if ( iommu_verbose )
1499 dprintk(VTDPREFIX, "d%d:PCI: unmap bdf = %x:%x.%x\n",
1500 domain->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1501 ret = domain_context_unmap_one(domain, iommu, bus, devfn);
1502 if ( ret )
1503 break;
1505 tmp_bus = bus;
1506 tmp_devfn = devfn;
1507 if ( find_upstream_bridge(&tmp_bus, &tmp_devfn, &secbus) < 1 )
1508 break;
1510 /* PCIe to PCI/PCIx bridge */
1511 if ( pdev_type(tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
1513 ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn);
1514 if ( ret )
1515 return ret;
1517 ret = domain_context_unmap_one(domain, iommu, secbus, 0);
1519 else /* Legacy PCI bridge */
1520 ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn);
1522 break;
1524 default:
1525 dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): bdf = %x:%x.%x\n",
1526 domain->domain_id, type,
1527 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1528 ret = -EINVAL;
1529 goto out;
1532 /*
1533 * if no other devices under the same iommu owned by this domain,
1534 * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
1535 */
1536 for_each_pdev ( domain, pdev )
1538 if ( pdev->bus == bus && pdev->devfn == devfn )
1539 continue;
1541 drhd = acpi_find_matched_drhd_unit(pdev);
1542 if ( drhd && drhd->iommu == iommu )
1544 found = 1;
1545 break;
1549 if ( found == 0 )
1551 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1552 int iommu_domid;
1554 clear_bit(iommu->index, &hd->iommu_bitmap);
1556 iommu_domid = domain_iommu_domid(domain, iommu);
1557 if ( iommu_domid == -1 )
1559 ret = -EINVAL;
1560 goto out;
1563 clear_bit(iommu_domid, iommu->domid_bitmap);
1564 iommu->domid_map[iommu_domid] = 0;
1567 out:
1568 return ret;
1571 static int reassign_device_ownership(
1572 struct domain *source,
1573 struct domain *target,
1574 u8 bus, u8 devfn)
1576 struct pci_dev *pdev;
1577 int ret;
1579 ASSERT(spin_is_locked(&pcidevs_lock));
1580 pdev = pci_get_pdev_by_domain(source, bus, devfn);
1582 if (!pdev)
1583 return -ENODEV;
1585 ret = domain_context_unmap(source, bus, devfn);
1586 if ( ret )
1587 return ret;
1589 ret = domain_context_mapping(target, bus, devfn);
1590 if ( ret )
1591 return ret;
1593 list_move(&pdev->domain_list, &target->arch.pdev_list);
1594 pdev->domain = target;
1596 return ret;
1599 void iommu_domain_teardown(struct domain *d)
1601 struct hvm_iommu *hd = domain_hvm_iommu(d);
1603 if ( list_empty(&acpi_drhd_units) )
1604 return;
1606 if ( iommu_hap_pt_share )
1607 return;
1609 spin_lock(&hd->mapping_lock);
1610 iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
1611 hd->pgd_maddr = 0;
1612 spin_unlock(&hd->mapping_lock);
1615 static int intel_iommu_map_page(
1616 struct domain *d, unsigned long gfn, unsigned long mfn,
1617 unsigned int flags)
1619 struct hvm_iommu *hd = domain_hvm_iommu(d);
1620 struct acpi_drhd_unit *drhd;
1621 struct iommu *iommu;
1622 struct dma_pte *page = NULL, *pte = NULL, old, new = { 0 };
1623 u64 pg_maddr;
1624 int flush_dev_iotlb;
1625 int iommu_domid;
1627 /* Do nothing if VT-d shares EPT page table */
1628 if ( iommu_hap_pt_share )
1629 return 0;
1631 /* do nothing if dom0 and iommu supports pass thru */
1632 if ( iommu_passthrough && (d->domain_id == 0) )
1633 return 0;
1635 spin_lock(&hd->mapping_lock);
1637 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
1638 if ( pg_maddr == 0 )
1640 spin_unlock(&hd->mapping_lock);
1641 return -ENOMEM;
1643 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1644 pte = page + (gfn & LEVEL_MASK);
1645 old = *pte;
1646 dma_set_pte_addr(new, (paddr_t)mfn << PAGE_SHIFT_4K);
1647 dma_set_pte_prot(new,
1648 ((flags & IOMMUF_readable) ? DMA_PTE_READ : 0) |
1649 ((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0));
1651 /* Set the SNP on leaf page table if Snoop Control available */
1652 if ( iommu_snoop )
1653 dma_set_pte_snp(new);
1655 if ( old.val == new.val )
1657 spin_unlock(&hd->mapping_lock);
1658 unmap_vtd_domain_page(page);
1659 return 0;
1661 *pte = new;
1663 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
1664 spin_unlock(&hd->mapping_lock);
1665 unmap_vtd_domain_page(page);
1667 /*
1668 * No need pcideves_lock here because we have flush
1669 * when assign/deassign device
1670 */
1671 for_each_drhd_unit ( drhd )
1673 iommu = drhd->iommu;
1675 if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
1676 continue;
1678 flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
1679 iommu_domid= domain_iommu_domid(d, iommu);
1680 if ( iommu_domid == -1 )
1681 continue;
1682 if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
1683 (paddr_t)gfn << PAGE_SHIFT_4K, 0,
1684 !dma_pte_present(old), flush_dev_iotlb) )
1685 iommu_flush_write_buffer(iommu);
1688 return 0;
1691 static int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1693 /* Do nothing if dom0 and iommu supports pass thru. */
1694 if ( iommu_passthrough && (d->domain_id == 0) )
1695 return 0;
1697 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1699 return 0;
1702 void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte,
1703 int order, int present)
1705 struct acpi_drhd_unit *drhd;
1706 struct iommu *iommu = NULL;
1707 struct hvm_iommu *hd = domain_hvm_iommu(d);
1708 int flush_dev_iotlb;
1709 int iommu_domid;
1711 iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
1713 for_each_drhd_unit ( drhd )
1715 iommu = drhd->iommu;
1716 if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
1717 continue;
1719 flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
1720 iommu_domid= domain_iommu_domid(d, iommu);
1721 if ( iommu_domid == -1 )
1722 continue;
1723 if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
1724 (paddr_t)gfn << PAGE_SHIFT_4K,
1725 order, !present, flush_dev_iotlb) )
1726 iommu_flush_write_buffer(iommu);
1730 static int vtd_ept_page_compatible(struct iommu *iommu)
1732 u64 cap = iommu->cap;
1734 if ( ept_has_2mb(cpu_has_vmx_ept_2mb) != cap_sps_2mb(cap) )
1735 return 0;
1737 if ( ept_has_1gb(cpu_has_vmx_ept_1gb) != cap_sps_1gb(cap) )
1738 return 0;
1740 return 1;
1743 static bool_t vtd_ept_share(void)
1745 struct acpi_drhd_unit *drhd;
1746 struct iommu *iommu;
1747 bool_t share = TRUE;
1749 /* sharept defaults to 0 for now, default to 1 when feature matures */
1750 if ( !sharept )
1751 share = FALSE;
1753 /*
1754 * Determine whether EPT and VT-d page tables can be shared or not.
1755 */
1756 for_each_drhd_unit ( drhd )
1758 iommu = drhd->iommu;
1759 if ( !vtd_ept_page_compatible(drhd->iommu) )
1760 share = FALSE;
1762 return share;
1765 /*
1766 * set VT-d page table directory to EPT table if allowed
1767 */
1768 void iommu_set_pgd(struct domain *d)
1770 struct hvm_iommu *hd = domain_hvm_iommu(d);
1771 mfn_t pgd_mfn;
1773 ASSERT( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled );
1775 iommu_hap_pt_share = vtd_ept_share();
1776 if ( !iommu_hap_pt_share )
1777 goto out;
1779 pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d)));
1780 hd->pgd_maddr = pagetable_get_paddr(pagetable_from_mfn(pgd_mfn));
1782 out:
1783 dprintk(XENLOG_INFO VTDPREFIX,
1784 "VT-d page table %s with EPT table\n",
1785 iommu_hap_pt_share ? "shares" : "not sharing");
1788 static int domain_rmrr_mapped(struct domain *d,
1789 struct acpi_rmrr_unit *rmrr)
1791 struct hvm_iommu *hd = domain_hvm_iommu(d);
1792 struct mapped_rmrr *mrmrr;
1794 list_for_each_entry( mrmrr, &hd->mapped_rmrrs, list )
1796 if ( mrmrr->base == rmrr->base_address &&
1797 mrmrr->end == rmrr->end_address )
1798 return 1;
1801 return 0;
1804 static int rmrr_identity_mapping(struct domain *d,
1805 struct acpi_rmrr_unit *rmrr)
1807 u64 base, end;
1808 unsigned long base_pfn, end_pfn;
1809 struct mapped_rmrr *mrmrr;
1810 struct hvm_iommu *hd = domain_hvm_iommu(d);
1812 ASSERT(spin_is_locked(&pcidevs_lock));
1813 ASSERT(rmrr->base_address < rmrr->end_address);
1815 if ( domain_rmrr_mapped(d, rmrr) )
1816 return 0;
1818 base = rmrr->base_address & PAGE_MASK_4K;
1819 base_pfn = base >> PAGE_SHIFT_4K;
1820 end = PAGE_ALIGN_4K(rmrr->end_address);
1821 end_pfn = end >> PAGE_SHIFT_4K;
1823 while ( base_pfn < end_pfn )
1825 if ( intel_iommu_map_page(d, base_pfn, base_pfn,
1826 IOMMUF_readable|IOMMUF_writable) )
1827 return -1;
1828 base_pfn++;
1831 mrmrr = xmalloc(struct mapped_rmrr);
1832 if ( !mrmrr )
1833 return -ENOMEM;
1834 mrmrr->base = rmrr->base_address;
1835 mrmrr->end = rmrr->end_address;
1836 list_add_tail(&mrmrr->list, &hd->mapped_rmrrs);
1838 return 0;
1841 static int intel_iommu_add_device(struct pci_dev *pdev)
1843 struct acpi_rmrr_unit *rmrr;
1844 u16 bdf;
1845 int ret, i;
1847 ASSERT(spin_is_locked(&pcidevs_lock));
1849 if ( !pdev->domain )
1850 return -EINVAL;
1852 ret = domain_context_mapping(pdev->domain, pdev->bus, pdev->devfn);
1853 if ( ret )
1855 dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
1856 pdev->domain->domain_id);
1857 return ret;
1860 for_each_rmrr_device ( rmrr, bdf, i )
1862 if ( PCI_BUS(bdf) == pdev->bus && PCI_DEVFN2(bdf) == pdev->devfn )
1864 ret = rmrr_identity_mapping(pdev->domain, rmrr);
1865 if ( ret )
1866 dprintk(XENLOG_ERR VTDPREFIX, "d%d: RMRR mapping failed\n",
1867 pdev->domain->domain_id);
1871 return ret;
1874 static int intel_iommu_remove_device(struct pci_dev *pdev)
1876 struct acpi_rmrr_unit *rmrr;
1877 u16 bdf;
1878 int i;
1880 if ( !pdev->domain )
1881 return -EINVAL;
1883 /* If the device belongs to dom0, and it has RMRR, don't remove it
1884 * from dom0, because BIOS may use RMRR at booting time.
1885 */
1886 if ( pdev->domain->domain_id == 0 )
1888 for_each_rmrr_device ( rmrr, bdf, i )
1890 if ( PCI_BUS(bdf) == pdev->bus &&
1891 PCI_DEVFN2(bdf) == pdev->devfn )
1892 return 0;
1896 return domain_context_unmap(pdev->domain, pdev->bus, pdev->devfn);
1899 static void __init setup_dom0_devices(struct domain *d)
1901 struct pci_dev *pdev;
1902 int bus, devfn;
1904 spin_lock(&pcidevs_lock);
1905 for ( bus = 0; bus < 256; bus++ )
1907 for ( devfn = 0; devfn < 256; devfn++ )
1909 pdev = pci_get_pdev(bus, devfn);
1910 if ( !pdev )
1911 continue;
1913 pdev->domain = d;
1914 list_add(&pdev->domain_list, &d->arch.pdev_list);
1915 domain_context_mapping(d, pdev->bus, pdev->devfn);
1916 pci_enable_acs(pdev);
1917 pci_vtd_quirk(pdev);
1920 spin_unlock(&pcidevs_lock);
1923 void clear_fault_bits(struct iommu *iommu)
1925 u64 val;
1926 unsigned long flags;
1928 spin_lock_irqsave(&iommu->register_lock, flags);
1929 val = dmar_readq(iommu->reg, cap_fault_reg_offset(iommu->cap) + 8);
1930 dmar_writeq(iommu->reg, cap_fault_reg_offset(iommu->cap) + 8, val);
1931 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1932 spin_unlock_irqrestore(&iommu->register_lock, flags);
1935 static int init_vtd_hw(void)
1937 struct acpi_drhd_unit *drhd;
1938 struct iommu *iommu;
1939 struct iommu_flush *flush = NULL;
1940 int irq;
1941 int ret;
1942 unsigned long flags;
1943 struct irq_cfg *cfg;
1945 /*
1946 * Basic VT-d HW init: set VT-d interrupt, clear VT-d faults.
1947 */
1948 for_each_drhd_unit ( drhd )
1950 iommu = drhd->iommu;
1951 if ( iommu->irq < 0 )
1953 irq = iommu_set_interrupt(iommu);
1954 if ( irq < 0 )
1956 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
1957 return irq;
1959 iommu->irq = irq;
1962 cfg = irq_cfg(iommu->irq);
1963 dma_msi_set_affinity(iommu->irq, cfg->cpu_mask);
1965 clear_fault_bits(iommu);
1967 spin_lock_irqsave(&iommu->register_lock, flags);
1968 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1969 spin_unlock_irqrestore(&iommu->register_lock, flags);
1972 /*
1973 * Enable queue invalidation
1974 */
1975 for_each_drhd_unit ( drhd )
1977 iommu = drhd->iommu;
1978 /*
1979 * If queued invalidation not enabled, use regiser based
1980 * invalidation
1981 */
1982 if ( enable_qinval(iommu) != 0 )
1984 flush = iommu_get_flush(iommu);
1985 flush->context = flush_context_reg;
1986 flush->iotlb = flush_iotlb_reg;
1990 /*
1991 * Enable interrupt remapping
1992 */
1993 if ( iommu_intremap )
1995 int apic;
1996 for ( apic = 0; apic < nr_ioapics; apic++ )
1998 if ( ioapic_to_iommu(IO_APIC_ID(apic)) == NULL )
2000 iommu_intremap = 0;
2001 dprintk(XENLOG_ERR VTDPREFIX,
2002 "ioapic_to_iommu: ioapic 0x%x (id: 0x%x) is NULL! "
2003 "Will not try to enable Interrupt Remapping.\n",
2004 apic, IO_APIC_ID(apic));
2005 break;
2009 if ( iommu_intremap )
2011 for_each_drhd_unit ( drhd )
2013 iommu = drhd->iommu;
2014 if ( enable_intremap(iommu, 0) != 0 )
2016 dprintk(XENLOG_WARNING VTDPREFIX,
2017 "Failed to enable Interrupt Remapping!\n");
2018 break;
2023 /*
2024 * Set root entries for each VT-d engine. After set root entry,
2025 * must globally invalidate context cache, and then globally
2026 * invalidate IOTLB
2027 */
2028 for_each_drhd_unit ( drhd )
2030 iommu = drhd->iommu;
2031 ret = iommu_set_root_entry(iommu);
2032 if ( ret )
2034 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
2035 return -EIO;
2038 iommu_flush_all();
2039 return 0;
2042 static void __init setup_dom0_rmrr(struct domain *d)
2044 struct acpi_rmrr_unit *rmrr;
2045 u16 bdf;
2046 int ret, i;
2048 spin_lock(&pcidevs_lock);
2049 for_each_rmrr_device ( rmrr, bdf, i )
2051 ret = rmrr_identity_mapping(d, rmrr);
2052 if ( ret )
2053 dprintk(XENLOG_ERR VTDPREFIX,
2054 "IOMMU: mapping reserved region failed\n");
2056 spin_unlock(&pcidevs_lock);
2059 int __init intel_vtd_setup(void)
2061 struct acpi_drhd_unit *drhd;
2062 struct iommu *iommu;
2064 if ( list_empty(&acpi_drhd_units) )
2065 return -ENODEV;
2067 platform_quirks_init();
2069 irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
2070 BUG_ON(!irq_to_iommu);
2071 memset(irq_to_iommu, 0, nr_irqs * sizeof(struct iommu*));
2073 if(!irq_to_iommu)
2074 return -ENOMEM;
2076 /* We enable the following features only if they are supported by all VT-d
2077 * engines: Snoop Control, DMA passthrough, Queued Invalidation and
2078 * Interrupt Remapping.
2079 */
2080 for_each_drhd_unit ( drhd )
2082 iommu = drhd->iommu;
2084 if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
2085 iommu_snoop = 0;
2087 if ( iommu_passthrough && !ecap_pass_thru(iommu->ecap) )
2088 iommu_passthrough = 0;
2090 if ( iommu_qinval && !ecap_queued_inval(iommu->ecap) )
2091 iommu_qinval = 0;
2093 if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
2094 iommu_intremap = 0;
2097 if ( !iommu_qinval && iommu_intremap )
2099 iommu_intremap = 0;
2100 dprintk(XENLOG_WARNING VTDPREFIX, "Interrupt Remapping disabled "
2101 "since Queued Invalidation isn't supported or enabled.\n");
2104 #define P(p,s) printk("Intel VT-d %s %senabled.\n", s, (p)? "" : "not ")
2105 P(iommu_snoop, "Snoop Control");
2106 P(iommu_passthrough, "Dom0 DMA Passthrough");
2107 P(iommu_qinval, "Queued Invalidation");
2108 P(iommu_intremap, "Interrupt Remapping");
2109 #undef P
2111 scan_pci_devices();
2113 if ( init_vtd_hw() )
2114 goto error;
2116 register_keyhandler('V', &dump_iommu_info_keyhandler);
2118 return 0;
2120 error:
2121 iommu_enabled = 0;
2122 iommu_snoop = 0;
2123 iommu_passthrough = 0;
2124 iommu_qinval = 0;
2125 iommu_intremap = 0;
2126 return -ENOMEM;
2129 /*
2130 * If the device isn't owned by dom0, it means it already
2131 * has been assigned to other domain, or it's not exist.
2132 */
2133 int device_assigned(u8 bus, u8 devfn)
2135 struct pci_dev *pdev;
2137 spin_lock(&pcidevs_lock);
2138 pdev = pci_get_pdev_by_domain(dom0, bus, devfn);
2139 if (!pdev)
2141 spin_unlock(&pcidevs_lock);
2142 return -1;
2145 spin_unlock(&pcidevs_lock);
2146 return 0;
2149 static int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
2151 struct acpi_rmrr_unit *rmrr;
2152 int ret = 0, i;
2153 struct pci_dev *pdev;
2154 u16 bdf;
2156 if ( list_empty(&acpi_drhd_units) )
2157 return -ENODEV;
2159 ASSERT(spin_is_locked(&pcidevs_lock));
2160 pdev = pci_get_pdev(bus, devfn);
2161 if (!pdev)
2162 return -ENODEV;
2164 if (pdev->domain != dom0)
2166 dprintk(XENLOG_ERR VTDPREFIX,
2167 "IOMMU: assign a assigned device\n");
2168 return -EBUSY;
2171 ret = reassign_device_ownership(dom0, d, bus, devfn);
2172 if ( ret )
2173 goto done;
2175 /* FIXME: Because USB RMRR conflicts with guest bios region,
2176 * ignore USB RMRR temporarily.
2177 */
2178 if ( is_usb_device(bus, devfn) )
2180 ret = 0;
2181 goto done;
2184 /* Setup rmrr identity mapping */
2185 for_each_rmrr_device( rmrr, bdf, i )
2187 if ( PCI_BUS(bdf) == bus && PCI_DEVFN2(bdf) == devfn )
2189 ret = rmrr_identity_mapping(d, rmrr);
2190 if ( ret )
2192 dprintk(XENLOG_ERR VTDPREFIX,
2193 "IOMMU: mapping reserved region failed\n");
2194 goto done;
2199 done:
2200 return ret;
2203 static int intel_iommu_group_id(u8 bus, u8 devfn)
2205 u8 secbus;
2206 if ( find_upstream_bridge(&bus, &devfn, &secbus) < 0 )
2207 return -1;
2208 else
2209 return PCI_BDF2(bus, devfn);
2212 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
2213 static void vtd_suspend(void)
2215 struct acpi_drhd_unit *drhd;
2216 struct iommu *iommu;
2217 u32 i;
2219 if ( !iommu_enabled )
2220 return;
2222 iommu_flush_all();
2224 for_each_drhd_unit ( drhd )
2226 iommu = drhd->iommu;
2227 i = iommu->index;
2229 iommu_state[i][DMAR_FECTL_REG] =
2230 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
2231 iommu_state[i][DMAR_FEDATA_REG] =
2232 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
2233 iommu_state[i][DMAR_FEADDR_REG] =
2234 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
2235 iommu_state[i][DMAR_FEUADDR_REG] =
2236 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
2238 /* don't disable VT-d engine when force_iommu is set. */
2239 if ( force_iommu )
2240 continue;
2242 iommu_disable_translation(iommu);
2244 /* If interrupt remapping is enabled, queued invalidation
2245 * will be disabled following interupt remapping disabling
2246 * in local apic suspend
2247 */
2248 if ( !iommu_intremap && iommu_qinval )
2249 disable_qinval(iommu);
2253 static void vtd_resume(void)
2255 struct acpi_drhd_unit *drhd;
2256 struct iommu *iommu;
2257 u32 i;
2258 unsigned long flags;
2260 if ( !iommu_enabled )
2261 return;
2263 if ( init_vtd_hw() != 0 && force_iommu )
2264 panic("IOMMU setup failed, crash Xen for security purpose!\n");
2266 for_each_drhd_unit ( drhd )
2268 iommu = drhd->iommu;
2269 i = iommu->index;
2271 spin_lock_irqsave(&iommu->register_lock, flags);
2272 dmar_writel(iommu->reg, DMAR_FECTL_REG,
2273 (u32) iommu_state[i][DMAR_FECTL_REG]);
2274 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
2275 (u32) iommu_state[i][DMAR_FEDATA_REG]);
2276 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
2277 (u32) iommu_state[i][DMAR_FEADDR_REG]);
2278 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
2279 (u32) iommu_state[i][DMAR_FEUADDR_REG]);
2280 spin_unlock_irqrestore(&iommu->register_lock, flags);
2282 iommu_enable_translation(drhd);
2286 const struct iommu_ops intel_iommu_ops = {
2287 .init = intel_iommu_domain_init,
2288 .dom0_init = intel_iommu_dom0_init,
2289 .add_device = intel_iommu_add_device,
2290 .remove_device = intel_iommu_remove_device,
2291 .assign_device = intel_iommu_assign_device,
2292 .teardown = iommu_domain_teardown,
2293 .map_page = intel_iommu_map_page,
2294 .unmap_page = intel_iommu_unmap_page,
2295 .reassign_device = reassign_device_ownership,
2296 .get_device_group_id = intel_iommu_group_id,
2297 .update_ire_from_apic = io_apic_write_remap_rte,
2298 .update_ire_from_msi = msi_msg_write_remap_rte,
2299 .read_apic_from_ire = io_apic_read_remap_rte,
2300 .read_msi_from_ire = msi_msg_read_remap_rte,
2301 .suspend = vtd_suspend,
2302 .resume = vtd_resume,
2303 };
2305 /*
2306 * Local variables:
2307 * mode: C
2308 * c-set-style: "BSD"
2309 * c-basic-offset: 4
2310 * tab-width: 4
2311 * indent-tabs-mode: nil
2312 * End:
2313 */