debuggers.hg

view xen/drivers/passthrough/vtd/iommu.c @ 0:7d21f7218375

Exact replica of unstable on 051908 + README-this
author Mukesh Rathor
date Mon May 19 15:34:57 2008 -0700 (2008-05-19)
parents
children 5c0bf00e371d
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include <xen/numa.h>
28 #include <xen/time.h>
29 #include <xen/pci.h>
30 #include <xen/pci_regs.h>
31 #include <asm/paging.h>
32 #include <asm/msi.h>
33 #include "iommu.h"
34 #include "dmar.h"
35 #include "extern.h"
36 #include "vtd.h"
38 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
40 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
41 static int domid_bitmap_size; /* domain id bitmap size in bits */
42 static unsigned long *domid_bitmap; /* iommu domain id bitmap */
44 static void setup_dom0_devices(struct domain *d);
45 static void setup_dom0_rmrr(struct domain *d);
47 #define DID_FIELD_WIDTH 16
48 #define DID_HIGH_OFFSET 8
49 static void context_set_domain_id(struct context_entry *context,
50 struct domain *d)
51 {
52 unsigned long flags;
53 domid_t iommu_domid = domain_iommu_domid(d);
55 if ( iommu_domid == 0 )
56 {
57 spin_lock_irqsave(&domid_bitmap_lock, flags);
58 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
59 set_bit(iommu_domid, domid_bitmap);
60 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
61 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
62 }
64 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
65 context->hi |= iommu_domid << DID_HIGH_OFFSET;
66 }
68 static void iommu_domid_release(struct domain *d)
69 {
70 domid_t iommu_domid = domain_iommu_domid(d);
72 if ( iommu_domid != 0 )
73 {
74 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
75 clear_bit(iommu_domid, domid_bitmap);
76 }
77 }
79 static struct intel_iommu *alloc_intel_iommu(void)
80 {
81 struct intel_iommu *intel;
83 intel = xmalloc(struct intel_iommu);
84 if ( intel == NULL )
85 return NULL;
86 memset(intel, 0, sizeof(struct intel_iommu));
88 spin_lock_init(&intel->qi_ctrl.qinval_lock);
89 spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
90 spin_lock_init(&intel->ir_ctrl.iremap_lock);
92 return intel;
93 }
95 static void free_intel_iommu(struct intel_iommu *intel)
96 {
97 xfree(intel);
98 }
100 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
101 {
102 return iommu ? &iommu->intel->qi_ctrl : NULL;
103 }
105 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
106 {
107 return iommu ? &iommu->intel->ir_ctrl : NULL;
108 }
110 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
111 {
112 return iommu ? &iommu->intel->flush : NULL;
113 }
115 unsigned int clflush_size;
116 void clflush_cache_range(void *adr, int size)
117 {
118 int i;
119 for ( i = 0; i < size; i += clflush_size )
120 clflush(adr + i);
121 }
123 static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
124 {
125 if ( !ecap_coherent(iommu->ecap) )
126 clflush_cache_range(addr, size);
127 }
129 void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
130 {
131 __iommu_flush_cache(iommu, addr, 8);
132 }
134 void iommu_flush_cache_page(struct iommu *iommu, void *addr)
135 {
136 __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
137 }
139 int nr_iommus;
140 /* context entry handling */
141 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
142 {
143 struct root_entry *root, *root_entries;
144 unsigned long flags;
145 u64 maddr;
147 spin_lock_irqsave(&iommu->lock, flags);
148 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
149 root = &root_entries[bus];
150 if ( !root_present(*root) )
151 {
152 maddr = alloc_pgtable_maddr();
153 if ( maddr == 0 )
154 {
155 spin_unlock_irqrestore(&iommu->lock, flags);
156 return 0;
157 }
158 set_root_value(*root, maddr);
159 set_root_present(*root);
160 iommu_flush_cache_entry(iommu, root);
161 }
162 maddr = (u64) get_context_addr(*root);
163 unmap_vtd_domain_page(root_entries);
164 spin_unlock_irqrestore(&iommu->lock, flags);
165 return maddr;
166 }
168 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
169 {
170 struct root_entry *root, *root_entries;
171 struct context_entry *context;
172 u64 context_maddr;
173 int ret;
174 unsigned long flags;
176 spin_lock_irqsave(&iommu->lock, flags);
177 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
178 root = &root_entries[bus];
179 if ( !root_present(*root) )
180 {
181 ret = 0;
182 goto out;
183 }
184 context_maddr = get_context_addr(*root);
185 context = (struct context_entry *)map_vtd_domain_page(context_maddr);
186 ret = context_present(context[devfn]);
187 unmap_vtd_domain_page(context);
188 out:
189 unmap_vtd_domain_page(root_entries);
190 spin_unlock_irqrestore(&iommu->lock, flags);
191 return ret;
192 }
194 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
195 {
196 struct hvm_iommu *hd = domain_hvm_iommu(domain);
197 struct acpi_drhd_unit *drhd;
198 struct iommu *iommu;
199 int addr_width = agaw_to_width(hd->agaw);
200 struct dma_pte *parent, *pte = NULL;
201 int level = agaw_to_level(hd->agaw);
202 int offset;
203 unsigned long flags;
204 u64 pte_maddr = 0;
205 u64 *vaddr = NULL;
207 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
208 iommu = drhd->iommu;
210 addr &= (((u64)1) << addr_width) - 1;
211 spin_lock_irqsave(&hd->mapping_lock, flags);
212 if ( hd->pgd_maddr == 0 )
213 {
214 hd->pgd_maddr = alloc_pgtable_maddr();
215 if ( hd->pgd_maddr == 0 )
216 return 0;
217 }
219 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
220 while ( level > 1 )
221 {
222 offset = address_level_offset(addr, level);
223 pte = &parent[offset];
225 if ( dma_pte_addr(*pte) == 0 )
226 {
227 u64 maddr = alloc_pgtable_maddr();
228 dma_set_pte_addr(*pte, maddr);
229 vaddr = map_vtd_domain_page(maddr);
230 if ( !vaddr )
231 break;
233 /*
234 * high level table always sets r/w, last level
235 * page table control read/write
236 */
237 dma_set_pte_readable(*pte);
238 dma_set_pte_writable(*pte);
239 iommu_flush_cache_entry(iommu, pte);
240 }
241 else
242 {
243 vaddr = map_vtd_domain_page(pte->val);
244 if ( !vaddr )
245 break;
246 }
248 if ( level == 2 )
249 {
250 pte_maddr = pte->val & PAGE_MASK_4K;
251 unmap_vtd_domain_page(vaddr);
252 break;
253 }
255 unmap_vtd_domain_page(parent);
256 parent = (struct dma_pte *)vaddr;
257 vaddr = NULL;
258 level--;
259 }
261 unmap_vtd_domain_page(parent);
262 spin_unlock_irqrestore(&hd->mapping_lock, flags);
263 return pte_maddr;
264 }
266 /* return address's page at specific level */
267 static u64 dma_addr_level_page_maddr(
268 struct domain *domain, u64 addr, int level)
269 {
270 struct hvm_iommu *hd = domain_hvm_iommu(domain);
271 struct dma_pte *parent, *pte = NULL;
272 int total = agaw_to_level(hd->agaw);
273 int offset;
274 u64 pg_maddr = hd->pgd_maddr;
276 if ( pg_maddr == 0 )
277 return 0;
279 parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
280 while ( level <= total )
281 {
282 offset = address_level_offset(addr, total);
283 pte = &parent[offset];
284 if ( dma_pte_addr(*pte) == 0 )
285 break;
287 pg_maddr = pte->val & PAGE_MASK_4K;
288 unmap_vtd_domain_page(parent);
290 if ( level == total )
291 return pg_maddr;
293 parent = map_vtd_domain_page(pte->val);
294 total--;
295 }
297 unmap_vtd_domain_page(parent);
298 return 0;
299 }
301 static void iommu_flush_write_buffer(struct iommu *iommu)
302 {
303 u32 val;
304 unsigned long flag;
305 s_time_t start_time;
307 if ( !cap_rwbf(iommu->cap) )
308 return;
309 val = iommu->gcmd | DMA_GCMD_WBF;
311 spin_lock_irqsave(&iommu->register_lock, flag);
312 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
314 /* Make sure hardware complete it */
315 start_time = NOW();
316 for ( ; ; )
317 {
318 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
319 if ( !(val & DMA_GSTS_WBFS) )
320 break;
321 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
322 panic("DMAR hardware is malfunctional,"
323 " please disable IOMMU\n");
324 cpu_relax();
325 }
326 spin_unlock_irqrestore(&iommu->register_lock, flag);
327 }
329 /* return value determine if we need a write buffer flush */
330 static int flush_context_reg(
331 void *_iommu,
332 u16 did, u16 source_id, u8 function_mask, u64 type,
333 int non_present_entry_flush)
334 {
335 struct iommu *iommu = (struct iommu *) _iommu;
336 u64 val = 0;
337 unsigned long flag;
338 s_time_t start_time;
340 /*
341 * In the non-present entry flush case, if hardware doesn't cache
342 * non-present entry we do nothing and if hardware cache non-present
343 * entry, we flush entries of domain 0 (the domain id is used to cache
344 * any non-present entries)
345 */
346 if ( non_present_entry_flush )
347 {
348 if ( !cap_caching_mode(iommu->cap) )
349 return 1;
350 else
351 did = 0;
352 }
354 /* use register invalidation */
355 switch ( type )
356 {
357 case DMA_CCMD_GLOBAL_INVL:
358 val = DMA_CCMD_GLOBAL_INVL;
359 break;
360 case DMA_CCMD_DOMAIN_INVL:
361 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
362 break;
363 case DMA_CCMD_DEVICE_INVL:
364 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
365 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
366 break;
367 default:
368 BUG();
369 }
370 val |= DMA_CCMD_ICC;
372 spin_lock_irqsave(&iommu->register_lock, flag);
373 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
375 /* Make sure hardware complete it */
376 start_time = NOW();
377 for ( ; ; )
378 {
379 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
380 if ( !(val & DMA_CCMD_ICC) )
381 break;
382 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
383 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
384 cpu_relax();
385 }
386 spin_unlock_irqrestore(&iommu->register_lock, flag);
387 /* flush context entry will implictly flush write buffer */
388 return 0;
389 }
391 static int inline iommu_flush_context_global(
392 struct iommu *iommu, int non_present_entry_flush)
393 {
394 struct iommu_flush *flush = iommu_get_flush(iommu);
395 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
396 non_present_entry_flush);
397 }
399 static int inline iommu_flush_context_domain(
400 struct iommu *iommu, u16 did, int non_present_entry_flush)
401 {
402 struct iommu_flush *flush = iommu_get_flush(iommu);
403 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
404 non_present_entry_flush);
405 }
407 static int inline iommu_flush_context_device(
408 struct iommu *iommu, u16 did, u16 source_id,
409 u8 function_mask, int non_present_entry_flush)
410 {
411 struct iommu_flush *flush = iommu_get_flush(iommu);
412 return flush->context(iommu, did, source_id, function_mask,
413 DMA_CCMD_DEVICE_INVL,
414 non_present_entry_flush);
415 }
417 /* return value determine if we need a write buffer flush */
418 static int flush_iotlb_reg(void *_iommu, u16 did,
419 u64 addr, unsigned int size_order, u64 type,
420 int non_present_entry_flush)
421 {
422 struct iommu *iommu = (struct iommu *) _iommu;
423 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
424 u64 val = 0, val_iva = 0;
425 unsigned long flag;
426 s_time_t start_time;
428 /*
429 * In the non-present entry flush case, if hardware doesn't cache
430 * non-present entry we do nothing and if hardware cache non-present
431 * entry, we flush entries of domain 0 (the domain id is used to cache
432 * any non-present entries)
433 */
434 if ( non_present_entry_flush )
435 {
436 if ( !cap_caching_mode(iommu->cap) )
437 return 1;
438 else
439 did = 0;
440 }
442 /* use register invalidation */
443 switch ( type )
444 {
445 case DMA_TLB_GLOBAL_FLUSH:
446 /* global flush doesn't need set IVA_REG */
447 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
448 break;
449 case DMA_TLB_DSI_FLUSH:
450 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
451 break;
452 case DMA_TLB_PSI_FLUSH:
453 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
454 /* Note: always flush non-leaf currently */
455 val_iva = size_order | addr;
456 break;
457 default:
458 BUG();
459 }
460 /* Note: set drain read/write */
461 if ( cap_read_drain(iommu->cap) )
462 val |= DMA_TLB_READ_DRAIN;
463 if ( cap_write_drain(iommu->cap) )
464 val |= DMA_TLB_WRITE_DRAIN;
466 spin_lock_irqsave(&iommu->register_lock, flag);
467 /* Note: Only uses first TLB reg currently */
468 if ( val_iva )
469 dmar_writeq(iommu->reg, tlb_offset, val_iva);
470 dmar_writeq(iommu->reg, tlb_offset + 8, val);
472 /* Make sure hardware complete it */
473 start_time = NOW();
474 for ( ; ; )
475 {
476 val = dmar_readq(iommu->reg, tlb_offset + 8);
477 if ( !(val & DMA_TLB_IVT) )
478 break;
479 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
480 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
481 cpu_relax();
482 }
483 spin_unlock_irqrestore(&iommu->register_lock, flag);
485 /* check IOTLB invalidation granularity */
486 if ( DMA_TLB_IAIG(val) == 0 )
487 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
488 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
489 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
490 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
491 /* flush context entry will implictly flush write buffer */
492 return 0;
493 }
495 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
496 int non_present_entry_flush)
497 {
498 struct iommu_flush *flush = iommu_get_flush(iommu);
499 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
500 non_present_entry_flush);
501 }
503 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
504 int non_present_entry_flush)
505 {
506 struct iommu_flush *flush = iommu_get_flush(iommu);
507 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
508 non_present_entry_flush);
509 }
511 static int inline get_alignment(u64 base, unsigned int size)
512 {
513 int t = 0;
514 u64 end;
516 end = base + size - 1;
517 while ( base != end )
518 {
519 t++;
520 base >>= 1;
521 end >>= 1;
522 }
523 return t;
524 }
526 static int inline iommu_flush_iotlb_psi(
527 struct iommu *iommu, u16 did,
528 u64 addr, unsigned int pages, int non_present_entry_flush)
529 {
530 unsigned int align;
531 struct iommu_flush *flush = iommu_get_flush(iommu);
533 BUG_ON(addr & (~PAGE_MASK_4K));
534 BUG_ON(pages == 0);
536 /* Fallback to domain selective flush if no PSI support */
537 if ( !cap_pgsel_inv(iommu->cap) )
538 return iommu_flush_iotlb_dsi(iommu, did,
539 non_present_entry_flush);
541 /*
542 * PSI requires page size is 2 ^ x, and the base address is naturally
543 * aligned to the size
544 */
545 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
546 /* Fallback to domain selective flush if size is too big */
547 if ( align > cap_max_amask_val(iommu->cap) )
548 return iommu_flush_iotlb_dsi(iommu, did,
549 non_present_entry_flush);
551 addr >>= PAGE_SHIFT_4K + align;
552 addr <<= PAGE_SHIFT_4K + align;
554 return flush->iotlb(iommu, did, addr, align,
555 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
556 }
558 void iommu_flush_all(void)
559 {
560 struct acpi_drhd_unit *drhd;
561 struct iommu *iommu;
563 wbinvd();
564 for_each_drhd_unit ( drhd )
565 {
566 iommu = drhd->iommu;
567 iommu_flush_context_global(iommu, 0);
568 iommu_flush_iotlb_global(iommu, 0);
569 }
570 }
572 /* clear one page's page table */
573 static void dma_pte_clear_one(struct domain *domain, u64 addr)
574 {
575 struct acpi_drhd_unit *drhd;
576 struct iommu *iommu;
577 struct dma_pte *page = NULL, *pte = NULL;
578 u64 pg_maddr;
580 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
582 /* get last level pte */
583 pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
584 if ( pg_maddr == 0 )
585 return;
586 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
587 pte = page + address_level_offset(addr, 1);
588 if ( pte )
589 {
590 dma_clear_pte(*pte);
591 iommu_flush_cache_entry(drhd->iommu, pte);
593 for_each_drhd_unit ( drhd )
594 {
595 iommu = drhd->iommu;
596 if ( cap_caching_mode(iommu->cap) )
597 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
598 addr, 1, 0);
599 else if (cap_rwbf(iommu->cap))
600 iommu_flush_write_buffer(iommu);
601 }
602 }
603 unmap_vtd_domain_page(page);
604 }
606 /* clear last level pte, a tlb flush should be followed */
607 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
608 {
609 struct hvm_iommu *hd = domain_hvm_iommu(domain);
610 int addr_width = agaw_to_width(hd->agaw);
612 start &= (((u64)1) << addr_width) - 1;
613 end &= (((u64)1) << addr_width) - 1;
614 /* in case it's partial page */
615 start = PAGE_ALIGN_4K(start);
616 end &= PAGE_MASK_4K;
618 /* we don't need lock here, nobody else touches the iova range */
619 while ( start < end )
620 {
621 dma_pte_clear_one(domain, start);
622 start += PAGE_SIZE_4K;
623 }
624 }
626 static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index,
627 int level)
628 {
629 struct acpi_drhd_unit *drhd;
630 unsigned long next_index;
631 struct dma_pte *pt_vaddr, *pde;
632 int next_level;
634 if ( pt_maddr == 0 )
635 return;
637 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
638 pde = &pt_vaddr[index];
639 if ( dma_pte_addr(*pde) != 0 )
640 {
641 next_level = level - 1;
642 if ( next_level > 1 )
643 {
644 next_index = 0;
645 do
646 {
647 iommu_free_next_pagetable(pde->val,
648 next_index, next_level);
649 next_index++;
650 } while ( next_index < PTE_NUM );
651 }
653 dma_clear_pte(*pde);
654 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
655 iommu_flush_cache_entry(drhd->iommu, pde);
656 free_pgtable_maddr(pde->val);
657 unmap_vtd_domain_page(pt_vaddr);
658 }
659 else
660 unmap_vtd_domain_page(pt_vaddr);
661 }
663 /* free all VT-d page tables when shut down or destroy domain. */
664 static void iommu_free_pagetable(struct domain *domain)
665 {
666 unsigned long index;
667 struct hvm_iommu *hd = domain_hvm_iommu(domain);
668 int total_level = agaw_to_level(hd->agaw);
670 if ( hd->pgd_maddr != 0 )
671 {
672 index = 0;
673 do
674 {
675 iommu_free_next_pagetable(hd->pgd_maddr,
676 index, total_level + 1);
677 index++;
678 } while ( index < PTE_NUM );
680 free_pgtable_maddr(hd->pgd_maddr);
681 hd->pgd_maddr = 0;
682 }
683 }
685 static int iommu_set_root_entry(struct iommu *iommu)
686 {
687 u32 cmd, sts;
688 unsigned long flags;
689 s_time_t start_time;
691 if ( iommu->root_maddr != 0 )
692 {
693 free_pgtable_maddr(iommu->root_maddr);
694 iommu->root_maddr = 0;
695 }
697 spin_lock_irqsave(&iommu->register_lock, flags);
699 iommu->root_maddr = alloc_pgtable_maddr();
700 if ( iommu->root_maddr == 0 )
701 return -ENOMEM;
703 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
704 cmd = iommu->gcmd | DMA_GCMD_SRTP;
705 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
707 /* Make sure hardware complete it */
708 start_time = NOW();
709 for ( ; ; )
710 {
711 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
712 if ( sts & DMA_GSTS_RTPS )
713 break;
714 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
715 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
716 cpu_relax();
717 }
719 spin_unlock_irqrestore(&iommu->register_lock, flags);
721 return 0;
722 }
724 static int iommu_enable_translation(struct iommu *iommu)
725 {
726 u32 sts;
727 unsigned long flags;
728 s_time_t start_time;
730 dprintk(XENLOG_INFO VTDPREFIX,
731 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
732 spin_lock_irqsave(&iommu->register_lock, flags);
733 iommu->gcmd |= DMA_GCMD_TE;
734 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
735 /* Make sure hardware complete it */
736 start_time = NOW();
737 for ( ; ; )
738 {
739 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
740 if ( sts & DMA_GSTS_TES )
741 break;
742 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
743 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
744 cpu_relax();
745 }
747 /* Disable PMRs when VT-d engine takes effect per spec definition */
748 disable_pmr(iommu);
749 spin_unlock_irqrestore(&iommu->register_lock, flags);
750 return 0;
751 }
753 int iommu_disable_translation(struct iommu *iommu)
754 {
755 u32 sts;
756 unsigned long flags;
757 s_time_t start_time;
759 spin_lock_irqsave(&iommu->register_lock, flags);
760 iommu->gcmd &= ~ DMA_GCMD_TE;
761 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
763 /* Make sure hardware complete it */
764 start_time = NOW();
765 for ( ; ; )
766 {
767 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
768 if ( !(sts & DMA_GSTS_TES) )
769 break;
770 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
771 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
772 cpu_relax();
773 }
774 spin_unlock_irqrestore(&iommu->register_lock, flags);
775 return 0;
776 }
778 static struct iommu *vector_to_iommu[NR_VECTORS];
779 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
780 u8 fault_reason, u16 source_id, u32 addr)
781 {
782 dprintk(XENLOG_WARNING VTDPREFIX,
783 "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
784 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
785 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
786 fault_reason, iommu->reg);
788 if ( fault_reason < 0x20 )
789 print_vtd_entries(current->domain, iommu, (source_id >> 8),
790 (source_id & 0xff), (addr >> PAGE_SHIFT));
792 return 0;
793 }
795 static void iommu_fault_status(u32 fault_status)
796 {
797 if ( fault_status & DMA_FSTS_PFO )
798 dprintk(XENLOG_ERR VTDPREFIX,
799 "iommu_fault_status: Fault Overflow\n");
800 else if ( fault_status & DMA_FSTS_PPF )
801 dprintk(XENLOG_ERR VTDPREFIX,
802 "iommu_fault_status: Primary Pending Fault\n");
803 else if ( fault_status & DMA_FSTS_AFO )
804 dprintk(XENLOG_ERR VTDPREFIX,
805 "iommu_fault_status: Advanced Fault Overflow\n");
806 else if ( fault_status & DMA_FSTS_APF )
807 dprintk(XENLOG_ERR VTDPREFIX,
808 "iommu_fault_status: Advanced Pending Fault\n");
809 else if ( fault_status & DMA_FSTS_IQE )
810 dprintk(XENLOG_ERR VTDPREFIX,
811 "iommu_fault_status: Invalidation Queue Error\n");
812 else if ( fault_status & DMA_FSTS_ICE )
813 dprintk(XENLOG_ERR VTDPREFIX,
814 "iommu_fault_status: Invalidation Completion Error\n");
815 else if ( fault_status & DMA_FSTS_ITE )
816 dprintk(XENLOG_ERR VTDPREFIX,
817 "iommu_fault_status: Invalidation Time-out Error\n");
818 }
820 #define PRIMARY_FAULT_REG_LEN (16)
821 static void iommu_page_fault(int vector, void *dev_id,
822 struct cpu_user_regs *regs)
823 {
824 struct iommu *iommu = dev_id;
825 int reg, fault_index;
826 u32 fault_status;
827 unsigned long flags;
829 dprintk(XENLOG_WARNING VTDPREFIX,
830 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
832 spin_lock_irqsave(&iommu->register_lock, flags);
833 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
834 spin_unlock_irqrestore(&iommu->register_lock, flags);
836 iommu_fault_status(fault_status);
838 /* FIXME: ignore advanced fault log */
839 if ( !(fault_status & DMA_FSTS_PPF) )
840 return;
841 fault_index = dma_fsts_fault_record_index(fault_status);
842 reg = cap_fault_reg_offset(iommu->cap);
843 for ( ; ; )
844 {
845 u8 fault_reason;
846 u16 source_id;
847 u32 guest_addr, data;
848 int type;
850 /* highest 32 bits */
851 spin_lock_irqsave(&iommu->register_lock, flags);
852 data = dmar_readl(iommu->reg, reg +
853 fault_index * PRIMARY_FAULT_REG_LEN + 12);
854 if ( !(data & DMA_FRCD_F) )
855 {
856 spin_unlock_irqrestore(&iommu->register_lock, flags);
857 break;
858 }
860 fault_reason = dma_frcd_fault_reason(data);
861 type = dma_frcd_type(data);
863 data = dmar_readl(iommu->reg, reg +
864 fault_index * PRIMARY_FAULT_REG_LEN + 8);
865 source_id = dma_frcd_source_id(data);
867 guest_addr = dmar_readq(iommu->reg, reg +
868 fault_index * PRIMARY_FAULT_REG_LEN);
869 guest_addr = dma_frcd_page_addr(guest_addr);
870 /* clear the fault */
871 dmar_writel(iommu->reg, reg +
872 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
873 spin_unlock_irqrestore(&iommu->register_lock, flags);
875 iommu_page_fault_do_one(iommu, type, fault_reason,
876 source_id, guest_addr);
878 fault_index++;
879 if ( fault_index > cap_num_fault_regs(iommu->cap) )
880 fault_index = 0;
881 }
883 /* clear primary fault overflow */
884 if ( fault_status & DMA_FSTS_PFO )
885 {
886 spin_lock_irqsave(&iommu->register_lock, flags);
887 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
888 spin_unlock_irqrestore(&iommu->register_lock, flags);
889 }
890 }
892 static void dma_msi_unmask(unsigned int vector)
893 {
894 struct iommu *iommu = vector_to_iommu[vector];
895 unsigned long flags;
897 /* unmask it */
898 spin_lock_irqsave(&iommu->register_lock, flags);
899 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
900 spin_unlock_irqrestore(&iommu->register_lock, flags);
901 }
903 static void dma_msi_mask(unsigned int vector)
904 {
905 unsigned long flags;
906 struct iommu *iommu = vector_to_iommu[vector];
908 /* mask it */
909 spin_lock_irqsave(&iommu->register_lock, flags);
910 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
911 spin_unlock_irqrestore(&iommu->register_lock, flags);
912 }
914 static unsigned int dma_msi_startup(unsigned int vector)
915 {
916 dma_msi_unmask(vector);
917 return 0;
918 }
920 static void dma_msi_end(unsigned int vector)
921 {
922 dma_msi_unmask(vector);
923 ack_APIC_irq();
924 }
926 static void dma_msi_data_init(struct iommu *iommu, int vector)
927 {
928 u32 msi_data = 0;
929 unsigned long flags;
931 /* Fixed, edge, assert mode. Follow MSI setting */
932 msi_data |= vector & 0xff;
933 msi_data |= 1 << 14;
935 spin_lock_irqsave(&iommu->register_lock, flags);
936 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
937 spin_unlock_irqrestore(&iommu->register_lock, flags);
938 }
940 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
941 {
942 u64 msi_address;
943 unsigned long flags;
945 /* Physical, dedicated cpu. Follow MSI setting */
946 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
947 msi_address |= MSI_PHYSICAL_MODE << 2;
948 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
949 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
951 spin_lock_irqsave(&iommu->register_lock, flags);
952 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
953 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
954 spin_unlock_irqrestore(&iommu->register_lock, flags);
955 }
957 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
958 {
959 struct iommu *iommu = vector_to_iommu[vector];
960 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
961 }
963 static struct hw_interrupt_type dma_msi_type = {
964 .typename = "DMA_MSI",
965 .startup = dma_msi_startup,
966 .shutdown = dma_msi_mask,
967 .enable = dma_msi_unmask,
968 .disable = dma_msi_mask,
969 .ack = dma_msi_mask,
970 .end = dma_msi_end,
971 .set_affinity = dma_msi_set_affinity,
972 };
974 int iommu_set_interrupt(struct iommu *iommu)
975 {
976 int vector, ret;
978 vector = assign_irq_vector(AUTO_ASSIGN);
979 vector_to_iommu[vector] = iommu;
981 /* VT-d fault is a MSI, make irq == vector */
982 irq_vector[vector] = vector;
983 vector_irq[vector] = vector;
985 if ( !vector )
986 {
987 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
988 return -EINVAL;
989 }
991 irq_desc[vector].handler = &dma_msi_type;
992 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
993 if ( ret )
994 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
995 return vector;
996 }
998 static int iommu_alloc(struct acpi_drhd_unit *drhd)
999 {
1000 struct iommu *iommu;
1002 if ( nr_iommus > MAX_IOMMUS )
1004 gdprintk(XENLOG_ERR VTDPREFIX,
1005 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
1006 return -ENOMEM;
1009 iommu = xmalloc(struct iommu);
1010 if ( iommu == NULL )
1011 return -ENOMEM;
1012 memset(iommu, 0, sizeof(struct iommu));
1014 iommu->intel = alloc_intel_iommu();
1015 if ( iommu->intel == NULL )
1017 xfree(iommu);
1018 return -ENOMEM;
1021 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
1022 iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
1023 nr_iommus++;
1025 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1026 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1028 spin_lock_init(&iommu->lock);
1029 spin_lock_init(&iommu->register_lock);
1031 drhd->iommu = iommu;
1032 return 0;
1035 static void iommu_free(struct acpi_drhd_unit *drhd)
1037 struct iommu *iommu = drhd->iommu;
1039 if ( iommu == NULL )
1040 return;
1042 if ( iommu->root_maddr != 0 )
1044 free_pgtable_maddr(iommu->root_maddr);
1045 iommu->root_maddr = 0;
1048 if ( iommu->reg )
1049 iounmap(iommu->reg);
1051 free_intel_iommu(iommu->intel);
1052 free_irq(iommu->vector);
1053 xfree(iommu);
1055 drhd->iommu = NULL;
1058 #define guestwidth_to_adjustwidth(gaw) ({ \
1059 int agaw, r = (gaw - 12) % 9; \
1060 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1061 if ( agaw > 64 ) \
1062 agaw = 64; \
1063 agaw; })
1065 static int intel_iommu_domain_init(struct domain *d)
1067 struct hvm_iommu *hd = domain_hvm_iommu(d);
1068 struct iommu *iommu = NULL;
1069 int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
1070 int i, adjust_width, agaw;
1071 unsigned long sagaw;
1072 struct acpi_drhd_unit *drhd;
1074 INIT_LIST_HEAD(&hd->pdev_list);
1076 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1077 iommu = drhd->iommu;
1079 /* Calculate AGAW. */
1080 if ( guest_width > cap_mgaw(iommu->cap) )
1081 guest_width = cap_mgaw(iommu->cap);
1082 adjust_width = guestwidth_to_adjustwidth(guest_width);
1083 agaw = width_to_agaw(adjust_width);
1084 /* FIXME: hardware doesn't support it, choose a bigger one? */
1085 sagaw = cap_sagaw(iommu->cap);
1086 if ( !test_bit(agaw, &sagaw) )
1088 gdprintk(XENLOG_ERR VTDPREFIX,
1089 "IOMMU: hardware doesn't support the agaw\n");
1090 agaw = find_next_bit(&sagaw, 5, agaw);
1091 if ( agaw >= 5 )
1092 return -ENODEV;
1094 hd->agaw = agaw;
1096 if ( d->domain_id == 0 )
1098 /* Set up 1:1 page table for dom0. */
1099 for ( i = 0; i < max_page; i++ )
1100 iommu_map_page(d, i, i);
1102 setup_dom0_devices(d);
1103 setup_dom0_rmrr(d);
1105 iommu_flush_all();
1107 for_each_drhd_unit ( drhd )
1109 iommu = drhd->iommu;
1110 if ( iommu_enable_translation(iommu) )
1111 return -EIO;
1115 return 0;
1118 static int domain_context_mapping_one(
1119 struct domain *domain,
1120 struct iommu *iommu,
1121 u8 bus, u8 devfn)
1123 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1124 struct context_entry *context, *context_entries;
1125 unsigned long flags;
1126 u64 maddr;
1128 maddr = bus_to_context_maddr(iommu, bus);
1129 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1130 context = &context_entries[devfn];
1132 if ( context_present(*context) )
1134 unmap_vtd_domain_page(context_entries);
1135 return 0;
1138 spin_lock_irqsave(&iommu->lock, flags);
1139 /*
1140 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1141 * be 1 based as required by intel's iommu hw.
1142 */
1143 context_set_domain_id(context, domain);
1144 context_set_address_width(*context, hd->agaw);
1146 if ( ecap_pass_thru(iommu->ecap) )
1147 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1148 #ifdef CONTEXT_PASSTHRU
1149 else
1151 #endif
1152 ASSERT(hd->pgd_maddr != 0);
1153 context_set_address_root(*context, hd->pgd_maddr);
1154 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1155 #ifdef CONTEXT_PASSTHRU
1157 #endif
1159 context_set_fault_enable(*context);
1160 context_set_present(*context);
1161 iommu_flush_cache_entry(iommu, context);
1163 unmap_vtd_domain_page(context_entries);
1165 if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
1166 (((u16)bus) << 8) | devfn,
1167 DMA_CCMD_MASK_NOBIT, 1) )
1168 iommu_flush_write_buffer(iommu);
1169 else
1170 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1171 spin_unlock_irqrestore(&iommu->lock, flags);
1173 return 0;
1176 #define PCI_BASE_CLASS_BRIDGE 0x06
1177 #define PCI_CLASS_BRIDGE_PCI 0x0604
1179 #define DEV_TYPE_PCIe_ENDPOINT 1
1180 #define DEV_TYPE_PCI_BRIDGE 2
1181 #define DEV_TYPE_PCI 3
1183 int pdev_type(struct pci_dev *dev)
1185 u16 class_device;
1186 u16 status;
1188 class_device = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
1189 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1190 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1191 return DEV_TYPE_PCI_BRIDGE;
1193 status = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
1194 PCI_FUNC(dev->devfn), PCI_STATUS);
1196 if ( !(status & PCI_STATUS_CAP_LIST) )
1197 return DEV_TYPE_PCI;
1199 if ( pci_find_next_cap(dev->bus, dev->devfn,
1200 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1201 return DEV_TYPE_PCIe_ENDPOINT;
1203 return DEV_TYPE_PCI;
1206 #define MAX_BUSES 256
1207 struct pci_dev bus2bridge[MAX_BUSES];
1209 static int domain_context_mapping(
1210 struct domain *domain,
1211 struct iommu *iommu,
1212 struct pci_dev *pdev)
1214 int ret = 0;
1215 int dev, func, sec_bus, sub_bus;
1216 u32 type;
1218 type = pdev_type(pdev);
1219 switch ( type )
1221 case DEV_TYPE_PCI_BRIDGE:
1222 sec_bus = pci_conf_read8(
1223 pdev->bus, PCI_SLOT(pdev->devfn),
1224 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1226 if ( bus2bridge[sec_bus].bus == 0 )
1228 bus2bridge[sec_bus].bus = pdev->bus;
1229 bus2bridge[sec_bus].devfn = pdev->devfn;
1232 sub_bus = pci_conf_read8(
1233 pdev->bus, PCI_SLOT(pdev->devfn),
1234 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1236 if ( sec_bus != sub_bus )
1237 gdprintk(XENLOG_WARNING VTDPREFIX,
1238 "context_context_mapping: nested PCI bridge not "
1239 "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1240 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1241 sec_bus, sub_bus);
1242 break;
1243 case DEV_TYPE_PCIe_ENDPOINT:
1244 gdprintk(XENLOG_INFO VTDPREFIX,
1245 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1246 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1247 ret = domain_context_mapping_one(domain, iommu,
1248 (u8)(pdev->bus), (u8)(pdev->devfn));
1249 break;
1250 case DEV_TYPE_PCI:
1251 gdprintk(XENLOG_INFO VTDPREFIX,
1252 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1253 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1255 if ( pdev->bus == 0 )
1256 ret = domain_context_mapping_one(
1257 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1258 else
1260 if ( bus2bridge[pdev->bus].bus != 0 )
1261 gdprintk(XENLOG_WARNING VTDPREFIX,
1262 "domain_context_mapping:bus2bridge"
1263 "[%d].bus != 0\n", pdev->bus);
1265 ret = domain_context_mapping_one(
1266 domain, iommu,
1267 (u8)(bus2bridge[pdev->bus].bus),
1268 (u8)(bus2bridge[pdev->bus].devfn));
1270 /* now map everything behind the PCI bridge */
1271 for ( dev = 0; dev < 32; dev++ )
1273 for ( func = 0; func < 8; func++ )
1275 ret = domain_context_mapping_one(
1276 domain, iommu,
1277 pdev->bus, (u8)PCI_DEVFN(dev, func));
1278 if ( ret )
1279 return ret;
1283 break;
1284 default:
1285 gdprintk(XENLOG_ERR VTDPREFIX,
1286 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1287 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1288 ret = -EINVAL;
1289 break;
1292 return ret;
1295 static int domain_context_unmap_one(
1296 struct domain *domain,
1297 struct iommu *iommu,
1298 u8 bus, u8 devfn)
1300 struct context_entry *context, *context_entries;
1301 unsigned long flags;
1302 u64 maddr;
1304 maddr = bus_to_context_maddr(iommu, bus);
1305 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1306 context = &context_entries[devfn];
1308 if ( !context_present(*context) )
1310 unmap_vtd_domain_page(context_entries);
1311 return 0;
1314 spin_lock_irqsave(&iommu->lock, flags);
1315 context_clear_present(*context);
1316 context_clear_entry(*context);
1317 iommu_flush_cache_entry(iommu, context);
1318 iommu_flush_context_global(iommu, 0);
1319 iommu_flush_iotlb_global(iommu, 0);
1320 unmap_vtd_domain_page(context_entries);
1321 spin_unlock_irqrestore(&iommu->lock, flags);
1323 return 0;
1326 static int domain_context_unmap(
1327 struct domain *domain,
1328 struct iommu *iommu,
1329 struct pci_dev *pdev)
1331 int ret = 0;
1332 int dev, func, sec_bus, sub_bus;
1333 u32 type;
1335 type = pdev_type(pdev);
1336 switch ( type )
1338 case DEV_TYPE_PCI_BRIDGE:
1339 sec_bus = pci_conf_read8(
1340 pdev->bus, PCI_SLOT(pdev->devfn),
1341 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1342 sub_bus = pci_conf_read8(
1343 pdev->bus, PCI_SLOT(pdev->devfn),
1344 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1345 break;
1346 case DEV_TYPE_PCIe_ENDPOINT:
1347 ret = domain_context_unmap_one(domain, iommu,
1348 (u8)(pdev->bus), (u8)(pdev->devfn));
1349 break;
1350 case DEV_TYPE_PCI:
1351 if ( pdev->bus == 0 )
1352 ret = domain_context_unmap_one(
1353 domain, iommu,
1354 (u8)(pdev->bus), (u8)(pdev->devfn));
1355 else
1357 if ( bus2bridge[pdev->bus].bus != 0 )
1358 gdprintk(XENLOG_WARNING VTDPREFIX,
1359 "domain_context_unmap:"
1360 "bus2bridge[%d].bus != 0\n", pdev->bus);
1362 ret = domain_context_unmap_one(domain, iommu,
1363 (u8)(bus2bridge[pdev->bus].bus),
1364 (u8)(bus2bridge[pdev->bus].devfn));
1366 /* Unmap everything behind the PCI bridge */
1367 for ( dev = 0; dev < 32; dev++ )
1369 for ( func = 0; func < 8; func++ )
1371 ret = domain_context_unmap_one(
1372 domain, iommu,
1373 pdev->bus, (u8)PCI_DEVFN(dev, func));
1374 if ( ret )
1375 return ret;
1379 break;
1380 default:
1381 gdprintk(XENLOG_ERR VTDPREFIX,
1382 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1383 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1384 ret = -EINVAL;
1385 break;
1388 return ret;
1391 void reassign_device_ownership(
1392 struct domain *source,
1393 struct domain *target,
1394 u8 bus, u8 devfn)
1396 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1397 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1398 struct pci_dev *pdev;
1399 struct acpi_drhd_unit *drhd;
1400 struct iommu *iommu;
1401 int status;
1402 unsigned long flags;
1404 pdev_flr(bus, devfn);
1406 for_each_pdev( source, pdev )
1407 if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
1408 goto found;
1410 return;
1412 found:
1413 drhd = acpi_find_matched_drhd_unit(pdev);
1414 iommu = drhd->iommu;
1415 domain_context_unmap(source, iommu, pdev);
1417 /* Move pci device from the source domain to target domain. */
1418 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1419 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1420 list_move(&pdev->list, &target_hd->pdev_list);
1421 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1422 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1424 status = domain_context_mapping(target, iommu, pdev);
1425 if ( status != 0 )
1426 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1429 void return_devices_to_dom0(struct domain *d)
1431 struct hvm_iommu *hd = domain_hvm_iommu(d);
1432 struct pci_dev *pdev;
1434 while ( !list_empty(&hd->pdev_list) )
1436 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1437 pci_cleanup_msi(pdev->bus, pdev->devfn);
1438 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1441 #ifdef VTD_DEBUG
1442 for_each_pdev ( dom0, pdev )
1443 dprintk(XENLOG_INFO VTDPREFIX,
1444 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1445 dom0->domain_id, pdev->bus,
1446 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1447 #endif
1450 void iommu_domain_teardown(struct domain *d)
1452 if ( list_empty(&acpi_drhd_units) )
1453 return;
1455 iommu_free_pagetable(d);
1456 return_devices_to_dom0(d);
1457 iommu_domid_release(d);
1460 static int domain_context_mapped(struct pci_dev *pdev)
1462 struct acpi_drhd_unit *drhd;
1463 struct iommu *iommu;
1464 int ret;
1466 for_each_drhd_unit ( drhd )
1468 iommu = drhd->iommu;
1469 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1470 if ( ret )
1471 return ret;
1474 return 0;
1477 int intel_iommu_map_page(
1478 struct domain *d, unsigned long gfn, unsigned long mfn)
1480 struct acpi_drhd_unit *drhd;
1481 struct iommu *iommu;
1482 struct dma_pte *page = NULL, *pte = NULL;
1483 u64 pg_maddr;
1485 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1486 iommu = drhd->iommu;
1488 #ifdef CONTEXT_PASSTHRU
1489 /* do nothing if dom0 and iommu supports pass thru */
1490 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1491 return 0;
1492 #endif
1494 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1495 if ( pg_maddr == 0 )
1496 return -ENOMEM;
1497 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1498 pte = page + (gfn & LEVEL_MASK);
1499 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
1500 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1501 iommu_flush_cache_entry(iommu, pte);
1502 unmap_vtd_domain_page(page);
1504 for_each_drhd_unit ( drhd )
1506 iommu = drhd->iommu;
1507 if ( cap_caching_mode(iommu->cap) )
1508 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1509 (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
1510 else if ( cap_rwbf(iommu->cap) )
1511 iommu_flush_write_buffer(iommu);
1514 return 0;
1517 int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1519 struct acpi_drhd_unit *drhd;
1520 struct iommu *iommu;
1522 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1523 iommu = drhd->iommu;
1525 #ifdef CONTEXT_PASSTHRU
1526 /* do nothing if dom0 and iommu supports pass thru */
1527 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1528 return 0;
1529 #endif
1531 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1533 return 0;
1536 int iommu_page_mapping(struct domain *domain, paddr_t iova,
1537 paddr_t hpa, size_t size, int prot)
1539 struct acpi_drhd_unit *drhd;
1540 struct iommu *iommu;
1541 u64 start_pfn, end_pfn;
1542 struct dma_pte *page = NULL, *pte = NULL;
1543 int index;
1544 u64 pg_maddr;
1546 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1547 iommu = drhd->iommu;
1548 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1549 return -EINVAL;
1550 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1551 start_pfn = hpa >> PAGE_SHIFT_4K;
1552 end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
1553 index = 0;
1554 while ( start_pfn < end_pfn )
1556 pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
1557 if ( pg_maddr == 0 )
1558 return -ENOMEM;
1559 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1560 pte = page + (start_pfn & LEVEL_MASK);
1561 dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
1562 dma_set_pte_prot(*pte, prot);
1563 iommu_flush_cache_entry(iommu, pte);
1564 unmap_vtd_domain_page(page);
1565 start_pfn++;
1566 index++;
1569 for_each_drhd_unit ( drhd )
1571 iommu = drhd->iommu;
1572 if ( cap_caching_mode(iommu->cap) )
1573 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1574 iova, index, 0);
1575 else if ( cap_rwbf(iommu->cap) )
1576 iommu_flush_write_buffer(iommu);
1579 return 0;
1582 int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size)
1584 dma_pte_clear_range(domain, addr, addr + size);
1586 return 0;
1589 void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry)
1591 struct acpi_drhd_unit *drhd;
1592 struct iommu *iommu = NULL;
1593 struct dma_pte *pte = (struct dma_pte *) p2m_entry;
1595 for_each_drhd_unit ( drhd )
1597 iommu = drhd->iommu;
1598 if ( cap_caching_mode(iommu->cap) )
1599 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1600 (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
1601 else if ( cap_rwbf(iommu->cap) )
1602 iommu_flush_write_buffer(iommu);
1605 iommu_flush_cache_entry(iommu, pte);
1608 static int iommu_prepare_rmrr_dev(
1609 struct domain *d,
1610 struct acpi_rmrr_unit *rmrr,
1611 struct pci_dev *pdev)
1613 struct acpi_drhd_unit *drhd;
1614 unsigned long size;
1615 int ret;
1617 /* page table init */
1618 size = rmrr->end_address - rmrr->base_address + 1;
1619 ret = iommu_page_mapping(d, rmrr->base_address,
1620 rmrr->base_address, size,
1621 DMA_PTE_READ|DMA_PTE_WRITE);
1622 if ( ret )
1623 return ret;
1625 if ( domain_context_mapped(pdev) == 0 )
1627 drhd = acpi_find_matched_drhd_unit(pdev);
1628 ret = domain_context_mapping(d, drhd->iommu, pdev);
1629 if ( !ret )
1630 return 0;
1633 return ret;
1636 static void setup_dom0_devices(struct domain *d)
1638 struct hvm_iommu *hd;
1639 struct acpi_drhd_unit *drhd;
1640 struct pci_dev *pdev;
1641 int bus, dev, func, ret;
1642 u32 l;
1644 hd = domain_hvm_iommu(d);
1646 for ( bus = 0; bus < 256; bus++ )
1648 for ( dev = 0; dev < 32; dev++ )
1650 for ( func = 0; func < 8; func++ )
1652 l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
1653 /* some broken boards return 0 or ~0 if a slot is empty: */
1654 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1655 (l == 0x0000ffff) || (l == 0xffff0000) )
1656 continue;
1657 pdev = xmalloc(struct pci_dev);
1658 pdev->bus = bus;
1659 pdev->devfn = PCI_DEVFN(dev, func);
1660 list_add_tail(&pdev->list, &hd->pdev_list);
1662 drhd = acpi_find_matched_drhd_unit(pdev);
1663 ret = domain_context_mapping(d, drhd->iommu, pdev);
1664 if ( ret != 0 )
1665 gdprintk(XENLOG_ERR VTDPREFIX,
1666 "domain_context_mapping failed\n");
1672 void clear_fault_bits(struct iommu *iommu)
1674 u64 val;
1676 val = dmar_readq(
1677 iommu->reg,
1678 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1679 dmar_writeq(
1680 iommu->reg,
1681 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1682 val);
1683 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1686 static int init_vtd_hw(void)
1688 struct acpi_drhd_unit *drhd;
1689 struct iommu *iommu;
1690 struct iommu_flush *flush = NULL;
1691 int vector;
1692 int ret;
1694 for_each_drhd_unit ( drhd )
1696 iommu = drhd->iommu;
1697 ret = iommu_set_root_entry(iommu);
1698 if ( ret )
1700 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1701 return -EIO;
1704 vector = iommu_set_interrupt(iommu);
1705 dma_msi_data_init(iommu, vector);
1706 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1707 iommu->vector = vector;
1708 clear_fault_bits(iommu);
1709 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1711 /* initialize flush functions */
1712 flush = iommu_get_flush(iommu);
1713 flush->context = flush_context_reg;
1714 flush->iotlb = flush_iotlb_reg;
1717 for_each_drhd_unit ( drhd )
1719 iommu = drhd->iommu;
1720 if ( qinval_setup(iommu) != 0 )
1721 dprintk(XENLOG_ERR VTDPREFIX,
1722 "Queued Invalidation hardware not found\n");
1725 for_each_drhd_unit ( drhd )
1727 iommu = drhd->iommu;
1728 if ( intremap_setup(iommu) != 0 )
1729 dprintk(XENLOG_ERR VTDPREFIX,
1730 "Interrupt Remapping hardware not found\n");
1733 return 0;
1736 static void setup_dom0_rmrr(struct domain *d)
1738 struct acpi_rmrr_unit *rmrr;
1739 struct pci_dev *pdev;
1740 int ret;
1742 for_each_rmrr_device ( rmrr, pdev )
1743 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1744 if ( ret )
1745 gdprintk(XENLOG_ERR VTDPREFIX,
1746 "IOMMU: mapping reserved region failed\n");
1747 end_for_each_rmrr_device ( rmrr, pdev )
1750 int intel_vtd_setup(void)
1752 struct acpi_drhd_unit *drhd;
1753 struct iommu *iommu;
1755 if ( !vtd_enabled )
1756 return -ENODEV;
1758 spin_lock_init(&domid_bitmap_lock);
1759 clflush_size = get_clflush_size();
1761 for_each_drhd_unit ( drhd )
1762 if ( iommu_alloc(drhd) != 0 )
1763 goto error;
1765 /* Allocate IO page directory page for the domain. */
1766 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1767 iommu = drhd->iommu;
1769 /* Allocate domain id bitmap, and set bit 0 as reserved */
1770 domid_bitmap_size = cap_ndoms(iommu->cap);
1771 domid_bitmap = xmalloc_array(unsigned long,
1772 BITS_TO_LONGS(domid_bitmap_size));
1773 if ( domid_bitmap == NULL )
1774 goto error;
1775 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1776 set_bit(0, domid_bitmap);
1778 init_vtd_hw();
1780 return 0;
1782 error:
1783 for_each_drhd_unit ( drhd )
1784 iommu_free(drhd);
1785 vtd_enabled = 0;
1786 return -ENOMEM;
1789 /*
1790 * If the device isn't owned by dom0, it means it already
1791 * has been assigned to other domain, or it's not exist.
1792 */
1793 int device_assigned(u8 bus, u8 devfn)
1795 struct pci_dev *pdev;
1797 for_each_pdev( dom0, pdev )
1798 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1799 return 0;
1801 return 1;
1804 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
1806 struct acpi_rmrr_unit *rmrr;
1807 struct pci_dev *pdev;
1808 int ret = 0;
1810 if ( list_empty(&acpi_drhd_units) )
1811 return ret;
1813 reassign_device_ownership(dom0, d, bus, devfn);
1815 /* Setup rmrr identify mapping */
1816 for_each_rmrr_device( rmrr, pdev )
1817 if ( pdev->bus == bus && pdev->devfn == devfn )
1819 /* FIXME: Because USB RMRR conflicts with guest bios region,
1820 * ignore USB RMRR temporarily.
1821 */
1822 if ( is_usb_device(pdev) )
1823 return 0;
1825 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1826 if ( ret )
1828 gdprintk(XENLOG_ERR VTDPREFIX,
1829 "IOMMU: mapping reserved region failed\n");
1830 return ret;
1833 end_for_each_rmrr_device(rmrr, pdev)
1835 return ret;
1838 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
1839 int iommu_suspend(void)
1841 struct acpi_drhd_unit *drhd;
1842 struct iommu *iommu;
1843 int i = 0;
1845 iommu_flush_all();
1847 for_each_drhd_unit ( drhd )
1849 iommu = drhd->iommu;
1850 iommu_state[DMAR_RTADDR_REG * i] =
1851 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
1852 iommu_state[DMAR_FECTL_REG * i] =
1853 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
1854 iommu_state[DMAR_FEDATA_REG * i] =
1855 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
1856 iommu_state[DMAR_FEADDR_REG * i] =
1857 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
1858 iommu_state[DMAR_FEUADDR_REG * i] =
1859 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
1860 iommu_state[DMAR_PLMBASE_REG * i] =
1861 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
1862 iommu_state[DMAR_PLMLIMIT_REG * i] =
1863 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
1864 iommu_state[DMAR_PHMBASE_REG * i] =
1865 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
1866 iommu_state[DMAR_PHMLIMIT_REG * i] =
1867 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
1868 i++;
1871 return 0;
1874 int iommu_resume(void)
1876 struct acpi_drhd_unit *drhd;
1877 struct iommu *iommu;
1878 int i = 0;
1880 iommu_flush_all();
1882 init_vtd_hw();
1883 for_each_drhd_unit ( drhd )
1885 iommu = drhd->iommu;
1886 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
1887 (u64) iommu_state[DMAR_RTADDR_REG * i]);
1888 dmar_writel(iommu->reg, DMAR_FECTL_REG,
1889 (u32) iommu_state[DMAR_FECTL_REG * i]);
1890 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
1891 (u32) iommu_state[DMAR_FEDATA_REG * i]);
1892 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
1893 (u32) iommu_state[DMAR_FEADDR_REG * i]);
1894 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
1895 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
1896 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
1897 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
1898 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
1899 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
1900 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
1901 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
1902 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
1903 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
1905 if ( iommu_enable_translation(iommu) )
1906 return -EIO;
1907 i++;
1909 return 0;
1912 struct iommu_ops intel_iommu_ops = {
1913 .init = intel_iommu_domain_init,
1914 .assign_device = intel_iommu_assign_device,
1915 .teardown = iommu_domain_teardown,
1916 .map_page = intel_iommu_map_page,
1917 .unmap_page = intel_iommu_unmap_page,
1918 .reassign_device = reassign_device_ownership,
1919 };
1921 /*
1922 * Local variables:
1923 * mode: C
1924 * c-set-style: "BSD"
1925 * c-basic-offset: 4
1926 * tab-width: 4
1927 * indent-tabs-mode: nil
1928 * End:
1929 */