debuggers.hg

view xen/arch/x86/hvm/vmx/vtd/intel-iommu.c @ 16569:32237d8517b1

vt-d: Use bitmap to solve domain-id limitation issue.

The Capability register reports the domain-id width supported by
hardware. For implementations supporting less than 16-bit domainids,
unused bits of domain identifier field(87:72) in Context entry are
treated as reserved by hardware. For example, for an implementation
supporting 4-bit domain-ids, bits 87:76 of this field are treated as
reserved. 16 is a small number, overflow is easy to happen. What's
more,
context-entries programmed with the same domain identifier must always
reference the same address translation structure (through the ASR
field). So Dom16 will conflict with Dom0, and device assignment fails.

This patch implements a domaid id bitmap to solve above issue.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Dec 05 10:53:47 2007 +0000 (2007-12-05)
parents f173cd885ffb
children 8ae3f083490a
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/init.h>
23 #include <xen/irq.h>
24 #include <xen/spinlock.h>
25 #include <xen/sched.h>
26 #include <xen/xmalloc.h>
27 #include <xen/domain_page.h>
28 #include <asm/delay.h>
29 #include <asm/string.h>
30 #include <asm/mm.h>
31 #include <asm/iommu.h>
32 #include <asm/hvm/vmx/intel-iommu.h>
33 #include "dmar.h"
34 #include "pci-direct.h"
35 #include "pci_regs.h"
36 #include "msi.h"
38 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
40 #define VTDPREFIX
41 extern void print_iommu_regs(struct acpi_drhd_unit *drhd);
42 extern void print_vtd_entries(struct domain *d, int bus, int devfn,
43 unsigned long gmfn);
45 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
46 static int domid_bitmap_size; /* domain id bitmap size in bit */
47 static void *domid_bitmap; /* iommu domain id bitmap */
49 #define DID_FIELD_WIDTH 16
50 #define DID_HIGH_OFFSET 8
51 static void context_set_domain_id(struct context_entry *context,
52 struct domain *d)
53 {
54 unsigned long flags;
55 domid_t iommu_domid = domain_iommu_domid(d);
57 if ( iommu_domid == 0 )
58 {
59 spin_lock_irqsave(&domid_bitmap_lock, flags);
60 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
61 set_bit(iommu_domid, domid_bitmap);
62 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
63 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
64 }
66 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
67 context->hi |= iommu_domid << DID_HIGH_OFFSET;
68 }
70 static void iommu_domid_release(struct domain *d)
71 {
72 domid_t iommu_domid = domain_iommu_domid(d);
74 if ( iommu_domid != 0 )
75 {
76 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
77 clear_bit(iommu_domid, domid_bitmap);
78 }
79 }
81 unsigned int x86_clflush_size;
82 void clflush_cache_range(void *adr, int size)
83 {
84 int i;
85 for ( i = 0; i < size; i += x86_clflush_size )
86 clflush(adr + i);
87 }
89 static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
90 {
91 if ( !ecap_coherent(iommu->ecap) )
92 clflush_cache_range(addr, size);
93 }
95 #define iommu_flush_cache_entry(iommu, addr) \
96 __iommu_flush_cache(iommu, addr, 8)
97 #define iommu_flush_cache_page(iommu, addr) \
98 __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
100 int nr_iommus;
101 /* context entry handling */
102 static struct context_entry * device_to_context_entry(struct iommu *iommu,
103 u8 bus, u8 devfn)
104 {
105 struct root_entry *root;
106 struct context_entry *context;
107 unsigned long phy_addr;
108 unsigned long flags;
110 spin_lock_irqsave(&iommu->lock, flags);
111 root = &iommu->root_entry[bus];
112 if ( !root_present(*root) )
113 {
114 phy_addr = (unsigned long) alloc_xenheap_page();
115 if ( !phy_addr )
116 {
117 spin_unlock_irqrestore(&iommu->lock, flags);
118 return NULL;
119 }
120 memset((void *) phy_addr, 0, PAGE_SIZE);
121 iommu_flush_cache_page(iommu, (void *)phy_addr);
122 phy_addr = virt_to_maddr((void *)phy_addr);
123 set_root_value(*root, phy_addr);
124 set_root_present(*root);
125 iommu_flush_cache_entry(iommu, root);
126 }
127 phy_addr = (unsigned long) get_context_addr(*root);
128 context = (struct context_entry *)maddr_to_virt(phy_addr);
129 spin_unlock_irqrestore(&iommu->lock, flags);
130 return &context[devfn];
131 }
133 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
134 {
135 struct root_entry *root;
136 struct context_entry *context;
137 unsigned long phy_addr;
138 int ret;
139 unsigned long flags;
141 spin_lock_irqsave(&iommu->lock, flags);
142 root = &iommu->root_entry[bus];
143 if ( !root_present(*root) )
144 {
145 ret = 0;
146 goto out;
147 }
148 phy_addr = get_context_addr(*root);
149 context = (struct context_entry *)maddr_to_virt(phy_addr);
150 ret = context_present(context[devfn]);
151 out:
152 spin_unlock_irqrestore(&iommu->lock, flags);
153 return ret;
154 }
156 static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
157 {
158 struct hvm_iommu *hd = domain_hvm_iommu(domain);
159 struct acpi_drhd_unit *drhd;
160 struct iommu *iommu;
161 int addr_width = agaw_to_width(hd->agaw);
162 struct dma_pte *parent, *pte = NULL, *pgd;
163 int level = agaw_to_level(hd->agaw);
164 int offset;
165 unsigned long flags;
166 struct page_info *pg = NULL;
167 u64 *vaddr = NULL;
169 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
170 iommu = drhd->iommu;
172 addr &= (((u64)1) << addr_width) - 1;
173 spin_lock_irqsave(&hd->mapping_lock, flags);
174 if ( !hd->pgd )
175 {
176 pgd = (struct dma_pte *)alloc_xenheap_page();
177 if ( !pgd )
178 {
179 spin_unlock_irqrestore(&hd->mapping_lock, flags);
180 return NULL;
181 }
182 memset(pgd, 0, PAGE_SIZE);
183 hd->pgd = pgd;
184 }
186 parent = hd->pgd;
187 while ( level > 1 )
188 {
189 offset = address_level_offset(addr, level);
190 pte = &parent[offset];
192 if ( dma_pte_addr(*pte) == 0 )
193 {
194 pg = alloc_domheap_page(NULL);
195 vaddr = map_domain_page(page_to_mfn(pg));
196 if ( !vaddr )
197 {
198 spin_unlock_irqrestore(&hd->mapping_lock, flags);
199 return NULL;
200 }
201 memset(vaddr, 0, PAGE_SIZE);
202 iommu_flush_cache_page(iommu, vaddr);
204 dma_set_pte_addr(*pte, page_to_maddr(pg));
206 /*
207 * high level table always sets r/w, last level
208 * page table control read/write
209 */
210 dma_set_pte_readable(*pte);
211 dma_set_pte_writable(*pte);
212 iommu_flush_cache_entry(iommu, pte);
213 }
214 else
215 {
216 pg = maddr_to_page(pte->val);
217 vaddr = map_domain_page(page_to_mfn(pg));
218 if ( !vaddr )
219 {
220 spin_unlock_irqrestore(&hd->mapping_lock, flags);
221 return NULL;
222 }
223 }
225 if ( parent != hd->pgd )
226 unmap_domain_page(parent);
228 if ( level == 2 && vaddr )
229 {
230 unmap_domain_page(vaddr);
231 break;
232 }
234 parent = (struct dma_pte *)vaddr;
235 vaddr = NULL;
236 level--;
237 }
239 spin_unlock_irqrestore(&hd->mapping_lock, flags);
240 return pg;
241 }
243 /* return address's page at specific level */
244 static struct page_info *dma_addr_level_page(struct domain *domain,
245 u64 addr, int level)
246 {
247 struct hvm_iommu *hd = domain_hvm_iommu(domain);
248 struct dma_pte *parent, *pte = NULL;
249 int total = agaw_to_level(hd->agaw);
250 int offset;
251 struct page_info *pg = NULL;
253 parent = hd->pgd;
254 while ( level <= total )
255 {
256 offset = address_level_offset(addr, total);
257 pte = &parent[offset];
258 if ( dma_pte_addr(*pte) == 0 )
259 {
260 if ( parent != hd->pgd )
261 unmap_domain_page(parent);
262 break;
263 }
265 pg = maddr_to_page(pte->val);
266 if ( parent != hd->pgd )
267 unmap_domain_page(parent);
269 if ( level == total )
270 return pg;
272 parent = map_domain_page(page_to_mfn(pg));
273 total--;
274 }
276 return NULL;
277 }
279 static void iommu_flush_write_buffer(struct iommu *iommu)
280 {
281 u32 val;
282 unsigned long flag;
283 unsigned long start_time;
285 if ( !cap_rwbf(iommu->cap) )
286 return;
287 val = iommu->gcmd | DMA_GCMD_WBF;
289 spin_lock_irqsave(&iommu->register_lock, flag);
290 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
292 /* Make sure hardware complete it */
293 start_time = jiffies;
294 for ( ; ; )
295 {
296 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
297 if ( !(val & DMA_GSTS_WBFS) )
298 break;
299 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
300 panic("DMAR hardware is malfunctional,"
301 " please disable IOMMU\n");
302 cpu_relax();
303 }
304 spin_unlock_irqrestore(&iommu->register_lock, flag);
305 }
307 /* return value determine if we need a write buffer flush */
308 static int __iommu_flush_context(
309 struct iommu *iommu,
310 u16 did, u16 source_id, u8 function_mask, u64 type,
311 int non_present_entry_flush)
312 {
313 u64 val = 0;
314 unsigned long flag;
315 unsigned long start_time;
317 /*
318 * In the non-present entry flush case, if hardware doesn't cache
319 * non-present entry we do nothing and if hardware cache non-present
320 * entry, we flush entries of domain 0 (the domain id is used to cache
321 * any non-present entries)
322 */
323 if ( non_present_entry_flush )
324 {
325 if ( !cap_caching_mode(iommu->cap) )
326 return 1;
327 else
328 did = 0;
329 }
331 /* use register invalidation */
332 switch ( type )
333 {
334 case DMA_CCMD_GLOBAL_INVL:
335 val = DMA_CCMD_GLOBAL_INVL;
336 break;
337 case DMA_CCMD_DOMAIN_INVL:
338 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
339 break;
340 case DMA_CCMD_DEVICE_INVL:
341 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
342 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
343 break;
344 default:
345 BUG();
346 }
347 val |= DMA_CCMD_ICC;
349 spin_lock_irqsave(&iommu->register_lock, flag);
350 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
352 /* Make sure hardware complete it */
353 start_time = jiffies;
354 for ( ; ; )
355 {
356 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
357 if ( !(val & DMA_CCMD_ICC) )
358 break;
359 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
360 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
361 cpu_relax();
362 }
363 spin_unlock_irqrestore(&iommu->register_lock, flag);
364 /* flush context entry will implictly flush write buffer */
365 return 0;
366 }
368 static int inline iommu_flush_context_global(
369 struct iommu *iommu, int non_present_entry_flush)
370 {
371 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
372 non_present_entry_flush);
373 }
375 static int inline iommu_flush_context_domain(
376 struct iommu *iommu, u16 did, int non_present_entry_flush)
377 {
378 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
379 non_present_entry_flush);
380 }
382 static int inline iommu_flush_context_device(
383 struct iommu *iommu, u16 did, u16 source_id,
384 u8 function_mask, int non_present_entry_flush)
385 {
386 return __iommu_flush_context(iommu, did, source_id, function_mask,
387 DMA_CCMD_DEVICE_INVL,
388 non_present_entry_flush);
389 }
391 /* return value determine if we need a write buffer flush */
392 static int __iommu_flush_iotlb(struct iommu *iommu, u16 did,
393 u64 addr, unsigned int size_order, u64 type,
394 int non_present_entry_flush)
395 {
396 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
397 u64 val = 0, val_iva = 0;
398 unsigned long flag;
399 unsigned long start_time;
401 /*
402 * In the non-present entry flush case, if hardware doesn't cache
403 * non-present entry we do nothing and if hardware cache non-present
404 * entry, we flush entries of domain 0 (the domain id is used to cache
405 * any non-present entries)
406 */
407 if ( non_present_entry_flush )
408 {
409 if ( !cap_caching_mode(iommu->cap) )
410 return 1;
411 else
412 did = 0;
413 }
415 /* use register invalidation */
416 switch ( type )
417 {
418 case DMA_TLB_GLOBAL_FLUSH:
419 /* global flush doesn't need set IVA_REG */
420 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
421 break;
422 case DMA_TLB_DSI_FLUSH:
423 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
424 break;
425 case DMA_TLB_PSI_FLUSH:
426 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
427 /* Note: always flush non-leaf currently */
428 val_iva = size_order | addr;
429 break;
430 default:
431 BUG();
432 }
433 /* Note: set drain read/write */
434 if ( cap_read_drain(iommu->cap) )
435 val |= DMA_TLB_READ_DRAIN;
436 if ( cap_write_drain(iommu->cap) )
437 val |= DMA_TLB_WRITE_DRAIN;
439 spin_lock_irqsave(&iommu->register_lock, flag);
440 /* Note: Only uses first TLB reg currently */
441 if ( val_iva )
442 dmar_writeq(iommu->reg, tlb_offset, val_iva);
443 dmar_writeq(iommu->reg, tlb_offset + 8, val);
445 /* Make sure hardware complete it */
446 start_time = jiffies;
447 for ( ; ; )
448 {
449 val = dmar_readq(iommu->reg, tlb_offset + 8);
450 if ( !(val & DMA_TLB_IVT) )
451 break;
452 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
453 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
454 cpu_relax();
455 }
456 spin_unlock_irqrestore(&iommu->register_lock, flag);
458 /* check IOTLB invalidation granularity */
459 if ( DMA_TLB_IAIG(val) == 0 )
460 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
461 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
462 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
463 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
464 /* flush context entry will implictly flush write buffer */
465 return 0;
466 }
468 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
469 int non_present_entry_flush)
470 {
471 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
472 non_present_entry_flush);
473 }
475 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
476 int non_present_entry_flush)
477 {
478 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
479 non_present_entry_flush);
480 }
482 static int inline get_alignment(u64 base, unsigned int size)
483 {
484 int t = 0;
485 u64 end;
487 end = base + size - 1;
488 while ( base != end )
489 {
490 t++;
491 base >>= 1;
492 end >>= 1;
493 }
494 return t;
495 }
497 static int inline iommu_flush_iotlb_psi(
498 struct iommu *iommu, u16 did,
499 u64 addr, unsigned int pages, int non_present_entry_flush)
500 {
501 unsigned int align;
503 BUG_ON(addr & (~PAGE_MASK_4K));
504 BUG_ON(pages == 0);
506 /* Fallback to domain selective flush if no PSI support */
507 if ( !cap_pgsel_inv(iommu->cap) )
508 return iommu_flush_iotlb_dsi(iommu, did,
509 non_present_entry_flush);
511 /*
512 * PSI requires page size is 2 ^ x, and the base address is naturally
513 * aligned to the size
514 */
515 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
516 /* Fallback to domain selective flush if size is too big */
517 if ( align > cap_max_amask_val(iommu->cap) )
518 return iommu_flush_iotlb_dsi(iommu, did,
519 non_present_entry_flush);
521 addr >>= PAGE_SHIFT_4K + align;
522 addr <<= PAGE_SHIFT_4K + align;
524 return __iommu_flush_iotlb(iommu, did, addr, align,
525 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
526 }
528 void iommu_flush_all(void)
529 {
530 struct acpi_drhd_unit *drhd;
531 struct iommu *iommu;
532 int i = 0;
534 wbinvd();
535 for_each_drhd_unit ( drhd )
536 {
537 iommu = drhd->iommu;
538 iommu_flush_context_global(iommu, 0);
539 iommu_flush_iotlb_global(iommu, 0);
540 i++;
541 }
542 }
544 /* clear one page's page table */
545 static void dma_pte_clear_one(struct domain *domain, u64 addr)
546 {
547 struct acpi_drhd_unit *drhd;
548 struct iommu *iommu;
549 struct dma_pte *pte = NULL;
550 struct page_info *pg = NULL;
552 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
554 /* get last level pte */
555 pg = dma_addr_level_page(domain, addr, 1);
556 if ( !pg )
557 return;
558 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
559 pte += address_level_offset(addr, 1);
560 if ( pte )
561 {
562 dma_clear_pte(*pte);
563 iommu_flush_cache_entry(drhd->iommu, pte);
565 for_each_drhd_unit ( drhd )
566 {
567 iommu = drhd->iommu;
568 if ( cap_caching_mode(iommu->cap) )
569 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
570 addr, 1, 0);
571 else if (cap_rwbf(iommu->cap))
572 iommu_flush_write_buffer(iommu);
573 }
574 }
575 unmap_domain_page(pte);
576 }
578 /* clear last level pte, a tlb flush should be followed */
579 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
580 {
581 struct hvm_iommu *hd = domain_hvm_iommu(domain);
582 int addr_width = agaw_to_width(hd->agaw);
584 start &= (((u64)1) << addr_width) - 1;
585 end &= (((u64)1) << addr_width) - 1;
586 /* in case it's partial page */
587 start = PAGE_ALIGN_4K(start);
588 end &= PAGE_MASK_4K;
590 /* we don't need lock here, nobody else touches the iova range */
591 while ( start < end )
592 {
593 dma_pte_clear_one(domain, start);
594 start += PAGE_SIZE_4K;
595 }
596 }
598 /* free page table pages. last level pte should already be cleared */
599 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
600 {
601 struct acpi_drhd_unit *drhd;
602 struct hvm_iommu *hd = domain_hvm_iommu(domain);
603 struct iommu *iommu;
604 int addr_width = agaw_to_width(hd->agaw);
605 struct dma_pte *pte;
606 int total = agaw_to_level(hd->agaw);
607 int level;
608 u32 tmp;
609 struct page_info *pg = NULL;
611 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
612 iommu = drhd->iommu;
614 start &= (((u64)1) << addr_width) - 1;
615 end &= (((u64)1) << addr_width) - 1;
617 /* we don't need lock here, nobody else touches the iova range */
618 level = 2;
619 while ( level <= total )
620 {
621 tmp = align_to_level(start, level);
622 if ( (tmp >= end) || ((tmp + level_size(level)) > end) )
623 return;
625 while ( tmp < end )
626 {
627 pg = dma_addr_level_page(domain, tmp, level);
628 if ( !pg )
629 return;
630 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
631 pte += address_level_offset(tmp, level);
632 dma_clear_pte(*pte);
633 iommu_flush_cache_entry(iommu, pte);
634 unmap_domain_page(pte);
635 free_domheap_page(pg);
637 tmp += level_size(level);
638 }
639 level++;
640 }
642 /* free pgd */
643 if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
644 {
645 free_xenheap_page((void *)hd->pgd);
646 hd->pgd = NULL;
647 }
648 }
650 /* iommu handling */
651 static int iommu_set_root_entry(struct iommu *iommu)
652 {
653 void *addr;
654 u32 cmd, sts;
655 struct root_entry *root;
656 unsigned long flags;
658 if ( iommu == NULL )
659 {
660 gdprintk(XENLOG_ERR VTDPREFIX,
661 "iommu_set_root_entry: iommu == NULL\n");
662 return -EINVAL;
663 }
665 if ( unlikely(!iommu->root_entry) )
666 {
667 root = (struct root_entry *)alloc_xenheap_page();
668 if ( root == NULL )
669 return -ENOMEM;
671 memset((u8*)root, 0, PAGE_SIZE);
672 iommu_flush_cache_page(iommu, root);
674 if ( cmpxchg((unsigned long *)&iommu->root_entry,
675 0, (unsigned long)root) != 0 )
676 free_xenheap_page((void *)root);
677 }
679 addr = iommu->root_entry;
681 spin_lock_irqsave(&iommu->register_lock, flags);
683 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
684 cmd = iommu->gcmd | DMA_GCMD_SRTP;
685 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
687 /* Make sure hardware complete it */
688 for ( ; ; )
689 {
690 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
691 if ( sts & DMA_GSTS_RTPS )
692 break;
693 cpu_relax();
694 }
696 spin_unlock_irqrestore(&iommu->register_lock, flags);
698 return 0;
699 }
701 static int iommu_enable_translation(struct iommu *iommu)
702 {
703 u32 sts;
704 unsigned long flags;
706 dprintk(XENLOG_INFO VTDPREFIX,
707 "iommu_enable_translation: enabling vt-d translation\n");
708 spin_lock_irqsave(&iommu->register_lock, flags);
709 iommu->gcmd |= DMA_GCMD_TE;
710 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
711 /* Make sure hardware complete it */
712 for ( ; ; )
713 {
714 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
715 if ( sts & DMA_GSTS_TES )
716 break;
717 cpu_relax();
718 }
720 /* Disable PMRs when VT-d engine takes effect per spec definition */
721 disable_pmr(iommu);
722 spin_unlock_irqrestore(&iommu->register_lock, flags);
723 return 0;
724 }
726 int iommu_disable_translation(struct iommu *iommu)
727 {
728 u32 sts;
729 unsigned long flags;
731 spin_lock_irqsave(&iommu->register_lock, flags);
732 iommu->gcmd &= ~ DMA_GCMD_TE;
733 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
735 /* Make sure hardware complete it */
736 for ( ; ; )
737 {
738 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
739 if ( !(sts & DMA_GSTS_TES) )
740 break;
741 cpu_relax();
742 }
743 spin_unlock_irqrestore(&iommu->register_lock, flags);
744 return 0;
745 }
747 static struct iommu *vector_to_iommu[NR_VECTORS];
748 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
749 u8 fault_reason, u16 source_id, u32 addr)
750 {
751 dprintk(XENLOG_WARNING VTDPREFIX,
752 "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n",
753 (type ? "DMA Read" : "DMA Write"),
754 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
755 PCI_FUNC(source_id & 0xFF), addr, fault_reason);
757 print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff),
758 (addr >> PAGE_SHIFT));
759 return 0;
760 }
762 #define PRIMARY_FAULT_REG_LEN (16)
763 static void iommu_page_fault(int vector, void *dev_id,
764 struct cpu_user_regs *regs)
765 {
766 struct iommu *iommu = dev_id;
767 int reg, fault_index;
768 u32 fault_status;
769 unsigned long flags;
771 dprintk(XENLOG_WARNING VTDPREFIX,
772 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
774 spin_lock_irqsave(&iommu->register_lock, flags);
775 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
776 spin_unlock_irqrestore(&iommu->register_lock, flags);
778 /* FIXME: ignore advanced fault log */
779 if ( !(fault_status & DMA_FSTS_PPF) )
780 return;
781 fault_index = dma_fsts_fault_record_index(fault_status);
782 reg = cap_fault_reg_offset(iommu->cap);
783 for ( ; ; )
784 {
785 u8 fault_reason;
786 u16 source_id;
787 u32 guest_addr, data;
788 int type;
790 /* highest 32 bits */
791 spin_lock_irqsave(&iommu->register_lock, flags);
792 data = dmar_readl(iommu->reg, reg +
793 fault_index * PRIMARY_FAULT_REG_LEN + 12);
794 if ( !(data & DMA_FRCD_F) )
795 {
796 spin_unlock_irqrestore(&iommu->register_lock, flags);
797 break;
798 }
800 fault_reason = dma_frcd_fault_reason(data);
801 type = dma_frcd_type(data);
803 data = dmar_readl(iommu->reg, reg +
804 fault_index * PRIMARY_FAULT_REG_LEN + 8);
805 source_id = dma_frcd_source_id(data);
807 guest_addr = dmar_readq(iommu->reg, reg +
808 fault_index * PRIMARY_FAULT_REG_LEN);
809 guest_addr = dma_frcd_page_addr(guest_addr);
810 /* clear the fault */
811 dmar_writel(iommu->reg, reg +
812 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
813 spin_unlock_irqrestore(&iommu->register_lock, flags);
815 iommu_page_fault_do_one(iommu, type, fault_reason,
816 source_id, guest_addr);
818 fault_index++;
819 if ( fault_index > cap_num_fault_regs(iommu->cap) )
820 fault_index = 0;
821 }
823 /* clear primary fault overflow */
824 if ( fault_status & DMA_FSTS_PFO )
825 {
826 spin_lock_irqsave(&iommu->register_lock, flags);
827 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
828 spin_unlock_irqrestore(&iommu->register_lock, flags);
829 }
830 }
832 static void dma_msi_unmask(unsigned int vector)
833 {
834 struct iommu *iommu = vector_to_iommu[vector];
835 unsigned long flags;
837 /* unmask it */
838 spin_lock_irqsave(&iommu->register_lock, flags);
839 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
840 spin_unlock_irqrestore(&iommu->register_lock, flags);
841 }
843 static void dma_msi_mask(unsigned int vector)
844 {
845 unsigned long flags;
846 struct iommu *iommu = vector_to_iommu[vector];
848 /* mask it */
849 spin_lock_irqsave(&iommu->register_lock, flags);
850 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
851 spin_unlock_irqrestore(&iommu->register_lock, flags);
852 }
854 static unsigned int dma_msi_startup(unsigned int vector)
855 {
856 dma_msi_unmask(vector);
857 return 0;
858 }
860 static void dma_msi_end(unsigned int vector)
861 {
862 dma_msi_unmask(vector);
863 ack_APIC_irq();
864 }
866 static void dma_msi_data_init(struct iommu *iommu, int vector)
867 {
868 u32 msi_data = 0;
869 unsigned long flags;
871 /* Fixed, edge, assert mode. Follow MSI setting */
872 msi_data |= vector & 0xff;
873 msi_data |= 1 << 14;
875 spin_lock_irqsave(&iommu->register_lock, flags);
876 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
877 spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
880 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
881 {
882 u64 msi_address;
883 unsigned long flags;
885 /* Physical, dedicated cpu. Follow MSI setting */
886 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
887 msi_address |= MSI_PHYSICAL_MODE << 2;
888 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
889 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
891 spin_lock_irqsave(&iommu->register_lock, flags);
892 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
893 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
894 spin_unlock_irqrestore(&iommu->register_lock, flags);
895 }
897 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
898 {
899 struct iommu *iommu = vector_to_iommu[vector];
900 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
901 }
903 static struct hw_interrupt_type dma_msi_type = {
904 .typename = "DMA_MSI",
905 .startup = dma_msi_startup,
906 .shutdown = dma_msi_mask,
907 .enable = dma_msi_unmask,
908 .disable = dma_msi_mask,
909 .ack = dma_msi_mask,
910 .end = dma_msi_end,
911 .set_affinity = dma_msi_set_affinity,
912 };
914 int iommu_set_interrupt(struct iommu *iommu)
915 {
916 int vector, ret;
918 vector = assign_irq_vector(AUTO_ASSIGN);
919 vector_to_iommu[vector] = iommu;
921 /* VT-d fault is a MSI, make irq == vector */
922 irq_vector[vector] = vector;
923 vector_irq[vector] = vector;
925 if ( !vector )
926 {
927 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
928 return -EINVAL;
929 }
931 irq_desc[vector].handler = &dma_msi_type;
932 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
933 if ( ret )
934 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
935 return vector;
936 }
938 struct iommu *iommu_alloc(void *hw_data)
939 {
940 struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
941 struct iommu *iommu;
943 if ( nr_iommus > MAX_IOMMUS )
944 {
945 gdprintk(XENLOG_ERR VTDPREFIX,
946 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
947 return NULL;
948 }
950 iommu = xmalloc(struct iommu);
951 if ( !iommu )
952 return NULL;
953 memset(iommu, 0, sizeof(struct iommu));
955 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
956 iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
957 dprintk(XENLOG_ERR VTDPREFIX,
958 "iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
959 iommu->reg, drhd->address);
960 nr_iommus++;
962 if ( !iommu->reg )
963 {
964 printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
965 goto error;
966 }
968 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
969 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
971 spin_lock_init(&iommu->lock);
972 spin_lock_init(&iommu->register_lock);
974 drhd->iommu = iommu;
975 return iommu;
976 error:
977 xfree(iommu);
978 return NULL;
979 }
981 static void free_iommu(struct iommu *iommu)
982 {
983 if ( !iommu )
984 return;
985 if ( iommu->root_entry )
986 free_xenheap_page((void *)iommu->root_entry);
987 if ( iommu->reg )
988 iounmap(iommu->reg);
989 free_irq(iommu->vector);
990 xfree(iommu);
991 }
993 #define guestwidth_to_adjustwidth(gaw) ({ \
994 int agaw, r = (gaw - 12) % 9; \
995 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
996 if ( agaw > 64 ) \
997 agaw = 64; \
998 agaw; })
1000 int iommu_domain_init(struct domain *domain)
1002 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1003 struct iommu *iommu = NULL;
1004 int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
1005 int adjust_width, agaw;
1006 unsigned long sagaw;
1007 struct acpi_drhd_unit *drhd;
1009 spin_lock_init(&hd->mapping_lock);
1010 spin_lock_init(&hd->iommu_list_lock);
1011 INIT_LIST_HEAD(&hd->pdev_list);
1012 INIT_LIST_HEAD(&hd->g2m_ioport_list);
1014 if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
1015 return 0;
1017 for_each_drhd_unit ( drhd )
1018 iommu = drhd->iommu ? : iommu_alloc(drhd);
1020 /* calculate AGAW */
1021 if (guest_width > cap_mgaw(iommu->cap))
1022 guest_width = cap_mgaw(iommu->cap);
1023 adjust_width = guestwidth_to_adjustwidth(guest_width);
1024 agaw = width_to_agaw(adjust_width);
1025 /* FIXME: hardware doesn't support it, choose a bigger one? */
1026 sagaw = cap_sagaw(iommu->cap);
1027 if ( !test_bit(agaw, &sagaw) )
1029 gdprintk(XENLOG_ERR VTDPREFIX,
1030 "IOMMU: hardware doesn't support the agaw\n");
1031 agaw = find_next_bit(&sagaw, 5, agaw);
1032 if ( agaw >= 5 )
1033 return -ENODEV;
1035 hd->agaw = agaw;
1036 return 0;
1039 static int domain_context_mapping_one(
1040 struct domain *domain,
1041 struct iommu *iommu,
1042 u8 bus, u8 devfn)
1044 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1045 struct context_entry *context;
1046 unsigned long flags;
1047 int ret = 0;
1049 context = device_to_context_entry(iommu, bus, devfn);
1050 if ( !context )
1052 gdprintk(XENLOG_ERR VTDPREFIX,
1053 "domain_context_mapping_one:context == NULL:"
1054 "bdf = %x:%x:%x\n",
1055 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1056 return -ENOMEM;
1059 if ( context_present(*context) )
1061 gdprintk(XENLOG_INFO VTDPREFIX,
1062 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
1063 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1064 return 0;
1067 spin_lock_irqsave(&iommu->lock, flags);
1068 /*
1069 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1070 * be 1 based as required by intel's iommu hw.
1071 */
1072 context_set_domain_id(context, domain);
1073 context_set_address_width(*context, hd->agaw);
1075 if ( ecap_pass_thru(iommu->ecap) )
1076 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1077 else
1079 if ( !hd->pgd )
1081 struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
1082 if ( !pgd )
1084 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1085 return -ENOMEM;
1087 memset(pgd, 0, PAGE_SIZE);
1088 hd->pgd = pgd;
1091 context_set_address_root(*context, virt_to_maddr(hd->pgd));
1092 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1095 context_set_fault_enable(*context);
1096 context_set_present(*context);
1097 iommu_flush_cache_entry(iommu, context);
1099 gdprintk(XENLOG_INFO VTDPREFIX,
1100 "context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
1101 " hd->pgd=%p\n",
1102 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1103 context->hi, context->lo, hd->pgd);
1105 if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
1106 (((u16)bus) << 8) | devfn,
1107 DMA_CCMD_MASK_NOBIT, 1) )
1108 iommu_flush_write_buffer(iommu);
1109 else
1110 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1111 spin_unlock_irqrestore(&iommu->lock, flags);
1112 return ret;
1115 static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
1117 u8 id;
1118 int ttl = 48;
1120 while ( ttl-- )
1122 pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
1123 if ( pos < 0x40 )
1124 break;
1126 pos &= ~3;
1127 id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1128 pos + PCI_CAP_LIST_ID);
1130 if ( id == 0xff )
1131 break;
1132 if ( id == cap )
1133 return pos;
1135 pos += PCI_CAP_LIST_NEXT;
1137 return 0;
1140 #define PCI_BASE_CLASS_BRIDGE 0x06
1141 #define PCI_CLASS_BRIDGE_PCI 0x0604
1143 #define DEV_TYPE_PCIe_ENDPOINT 1
1144 #define DEV_TYPE_PCI_BRIDGE 2
1145 #define DEV_TYPE_PCI 3
1147 int pdev_type(struct pci_dev *dev)
1149 u16 class_device;
1150 u16 status;
1152 class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1153 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1154 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1155 return DEV_TYPE_PCI_BRIDGE;
1157 status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1158 PCI_FUNC(dev->devfn), PCI_STATUS);
1160 if ( !(status & PCI_STATUS_CAP_LIST) )
1161 return DEV_TYPE_PCI;
1163 if ( __pci_find_next_cap(dev->bus, dev->devfn,
1164 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1165 return DEV_TYPE_PCIe_ENDPOINT;
1167 return DEV_TYPE_PCI;
1170 #define MAX_BUSES 256
1171 struct pci_dev bus2bridge[MAX_BUSES];
1173 static int domain_context_mapping(
1174 struct domain *domain,
1175 struct iommu *iommu,
1176 struct pci_dev *pdev)
1178 int ret = 0;
1179 int dev, func, sec_bus, sub_bus;
1180 u32 type;
1182 type = pdev_type(pdev);
1183 switch ( type )
1185 case DEV_TYPE_PCI_BRIDGE:
1186 sec_bus = read_pci_config_byte(
1187 pdev->bus, PCI_SLOT(pdev->devfn),
1188 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1190 if ( bus2bridge[sec_bus].bus == 0 )
1192 bus2bridge[sec_bus].bus = pdev->bus;
1193 bus2bridge[sec_bus].devfn = pdev->devfn;
1196 sub_bus = read_pci_config_byte(
1197 pdev->bus, PCI_SLOT(pdev->devfn),
1198 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1200 if ( sec_bus != sub_bus )
1202 dprintk(XENLOG_INFO VTDPREFIX,
1203 "context_mapping: nested PCI bridge not supported\n");
1204 dprintk(XENLOG_INFO VTDPREFIX,
1205 " bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1206 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1207 sec_bus, sub_bus);
1209 break;
1210 case DEV_TYPE_PCIe_ENDPOINT:
1211 gdprintk(XENLOG_INFO VTDPREFIX,
1212 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1213 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1214 ret = domain_context_mapping_one(domain, iommu,
1215 (u8)(pdev->bus), (u8)(pdev->devfn));
1216 break;
1217 case DEV_TYPE_PCI:
1218 gdprintk(XENLOG_INFO VTDPREFIX,
1219 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1220 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1222 if ( pdev->bus == 0 )
1223 ret = domain_context_mapping_one(
1224 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1225 else
1227 if ( bus2bridge[pdev->bus].bus != 0 )
1228 gdprintk(XENLOG_WARNING VTDPREFIX,
1229 "domain_context_mapping:bus2bridge"
1230 "[pdev->bus].bus != 0\n");
1232 ret = domain_context_mapping_one(
1233 domain, iommu,
1234 (u8)(bus2bridge[pdev->bus].bus),
1235 (u8)(bus2bridge[pdev->bus].devfn));
1237 /* now map everything behind the PCI bridge */
1238 for ( dev = 0; dev < 32; dev++ )
1240 for ( func = 0; func < 8; func++ )
1242 ret = domain_context_mapping_one(
1243 domain, iommu,
1244 pdev->bus, (u8)PCI_DEVFN(dev, func));
1245 if ( ret )
1246 return ret;
1250 break;
1251 default:
1252 gdprintk(XENLOG_ERR VTDPREFIX,
1253 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1254 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1255 ret = -EINVAL;
1256 break;
1259 return ret;
1262 static int domain_context_unmap_one(
1263 struct domain *domain,
1264 struct iommu *iommu,
1265 u8 bus, u8 devfn)
1267 struct context_entry *context;
1268 unsigned long flags;
1270 context = device_to_context_entry(iommu, bus, devfn);
1271 if ( !context )
1273 gdprintk(XENLOG_ERR VTDPREFIX,
1274 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
1275 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1276 return -ENOMEM;
1279 if ( !context_present(*context) )
1281 gdprintk(XENLOG_WARNING VTDPREFIX,
1282 "domain_context_unmap_one-%x:%x:%x- "
1283 "context NOT present:return\n",
1284 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1285 return 0;
1288 gdprintk(XENLOG_INFO VTDPREFIX,
1289 "domain_context_unmap_one: bdf = %x:%x:%x\n",
1290 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1292 spin_lock_irqsave(&iommu->lock, flags);
1293 context_clear_present(*context);
1294 context_clear_entry(*context);
1295 iommu_flush_cache_entry(iommu, context);
1296 iommu_flush_context_global(iommu, 0);
1297 iommu_flush_iotlb_global(iommu, 0);
1298 spin_unlock_irqrestore(&iommu->lock, flags);
1300 return 0;
1303 static int domain_context_unmap(
1304 struct domain *domain,
1305 struct iommu *iommu,
1306 struct pci_dev *pdev)
1308 int ret = 0;
1309 int dev, func, sec_bus, sub_bus;
1310 u32 type;
1312 type = pdev_type(pdev);
1313 switch ( type )
1315 case DEV_TYPE_PCI_BRIDGE:
1316 sec_bus = read_pci_config_byte(
1317 pdev->bus, PCI_SLOT(pdev->devfn),
1318 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1319 sub_bus = read_pci_config_byte(
1320 pdev->bus, PCI_SLOT(pdev->devfn),
1321 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1323 gdprintk(XENLOG_INFO VTDPREFIX,
1324 "domain_context_unmap:BRIDGE:%x:%x:%x "
1325 "sec_bus=%x sub_bus=%x\n",
1326 pdev->bus, PCI_SLOT(pdev->devfn),
1327 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
1328 break;
1329 case DEV_TYPE_PCIe_ENDPOINT:
1330 gdprintk(XENLOG_INFO VTDPREFIX,
1331 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
1332 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1333 ret = domain_context_unmap_one(domain, iommu,
1334 (u8)(pdev->bus), (u8)(pdev->devfn));
1335 break;
1336 case DEV_TYPE_PCI:
1337 gdprintk(XENLOG_INFO VTDPREFIX,
1338 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
1339 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1340 if ( pdev->bus == 0 )
1341 ret = domain_context_unmap_one(
1342 domain, iommu,
1343 (u8)(pdev->bus), (u8)(pdev->devfn));
1344 else
1346 if ( bus2bridge[pdev->bus].bus != 0 )
1347 gdprintk(XENLOG_WARNING VTDPREFIX,
1348 "domain_context_mapping:"
1349 "bus2bridge[pdev->bus].bus != 0\n");
1351 ret = domain_context_unmap_one(domain, iommu,
1352 (u8)(bus2bridge[pdev->bus].bus),
1353 (u8)(bus2bridge[pdev->bus].devfn));
1355 /* Unmap everything behind the PCI bridge */
1356 for ( dev = 0; dev < 32; dev++ )
1358 for ( func = 0; func < 8; func++ )
1360 ret = domain_context_unmap_one(
1361 domain, iommu,
1362 pdev->bus, (u8)PCI_DEVFN(dev, func));
1363 if ( ret )
1364 return ret;
1368 break;
1369 default:
1370 gdprintk(XENLOG_ERR VTDPREFIX,
1371 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1372 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1373 ret = -EINVAL;
1374 break;
1377 return ret;
1380 void reassign_device_ownership(
1381 struct domain *source,
1382 struct domain *target,
1383 u8 bus, u8 devfn)
1385 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1386 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1387 struct pci_dev *pdev;
1388 struct acpi_drhd_unit *drhd;
1389 struct iommu *iommu;
1390 int status;
1391 unsigned long flags;
1393 gdprintk(XENLOG_INFO VTDPREFIX,
1394 "reassign_device-%x:%x:%x- source = %d target = %d\n",
1395 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1396 source->domain_id, target->domain_id);
1398 for_each_pdev( source, pdev )
1400 if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
1401 continue;
1403 drhd = acpi_find_matched_drhd_unit(pdev);
1404 iommu = drhd->iommu;
1405 domain_context_unmap(source, iommu, pdev);
1407 /* Move pci device from the source domain to target domain. */
1408 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1409 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1410 list_move(&pdev->list, &target_hd->pdev_list);
1411 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1412 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1414 status = domain_context_mapping(target, iommu, pdev);
1415 if ( status != 0 )
1416 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1418 break;
1422 void return_devices_to_dom0(struct domain *d)
1424 struct hvm_iommu *hd = domain_hvm_iommu(d);
1425 struct pci_dev *pdev;
1427 while ( !list_empty(&hd->pdev_list) )
1429 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1430 dprintk(XENLOG_INFO VTDPREFIX,
1431 "return_devices_to_dom0: bdf = %x:%x:%x\n",
1432 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1433 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1436 #ifdef VTD_DEBUG
1437 for_each_pdev ( dom0, pdev )
1438 dprintk(XENLOG_INFO VTDPREFIX,
1439 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1440 dom0->domain_id, pdev->bus,
1441 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1442 #endif
1445 void iommu_domain_teardown(struct domain *d)
1447 if ( list_empty(&acpi_drhd_units) )
1448 return;
1450 iommu_domid_release(d);
1452 #if CONFIG_PAGING_LEVELS == 3
1454 struct hvm_iommu *hd = domain_hvm_iommu(d);
1455 int level = agaw_to_level(hd->agaw);
1456 struct dma_pte *pgd = NULL;
1458 switch ( level )
1460 case VTD_PAGE_TABLE_LEVEL_3:
1461 if ( hd->pgd )
1462 free_xenheap_page((void *)hd->pgd);
1463 break;
1464 case VTD_PAGE_TABLE_LEVEL_4:
1465 if ( hd->pgd )
1467 pgd = hd->pgd;
1468 if ( pgd[0].val != 0 )
1469 free_xenheap_page((void*)maddr_to_virt(
1470 dma_pte_addr(pgd[0])));
1472 free_xenheap_page((void *)hd->pgd);
1474 break;
1475 default:
1476 gdprintk(XENLOG_ERR VTDPREFIX,
1477 "Unsupported p2m table sharing level!\n");
1478 break;
1481 #endif
1482 return_devices_to_dom0(d);
1485 static int domain_context_mapped(struct pci_dev *pdev)
1487 struct acpi_drhd_unit *drhd;
1488 struct iommu *iommu;
1489 int ret;
1491 for_each_drhd_unit ( drhd )
1493 iommu = drhd->iommu;
1494 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1495 if ( ret )
1496 return ret;
1499 return 0;
1502 int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn)
1504 struct acpi_drhd_unit *drhd;
1505 struct iommu *iommu;
1506 struct dma_pte *pte = NULL;
1507 struct page_info *pg = NULL;
1509 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1510 iommu = drhd->iommu;
1512 /* do nothing if dom0 and iommu supports pass thru */
1513 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1514 return 0;
1516 pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
1517 if ( !pg )
1518 return -ENOMEM;
1519 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1520 pte += gfn & LEVEL_MASK;
1521 dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
1522 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1523 iommu_flush_cache_entry(iommu, pte);
1524 unmap_domain_page(pte);
1526 for_each_drhd_unit ( drhd )
1528 iommu = drhd->iommu;
1529 if ( cap_caching_mode(iommu->cap) )
1530 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1531 gfn << PAGE_SHIFT_4K, 1, 0);
1532 else if ( cap_rwbf(iommu->cap) )
1533 iommu_flush_write_buffer(iommu);
1536 return 0;
1539 int iommu_unmap_page(struct domain *d, dma_addr_t gfn)
1541 struct acpi_drhd_unit *drhd;
1542 struct iommu *iommu;
1544 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1545 iommu = drhd->iommu;
1547 /* do nothing if dom0 and iommu supports pass thru */
1548 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1549 return 0;
1551 dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
1553 return 0;
1556 int iommu_page_mapping(struct domain *domain, dma_addr_t iova,
1557 void *hpa, size_t size, int prot)
1559 struct acpi_drhd_unit *drhd;
1560 struct iommu *iommu;
1561 unsigned long start_pfn, end_pfn;
1562 struct dma_pte *pte = NULL;
1563 int index;
1564 struct page_info *pg = NULL;
1566 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1567 iommu = drhd->iommu;
1568 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1569 return -EINVAL;
1570 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1571 start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
1572 end_pfn = (unsigned long)
1573 ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
1574 index = 0;
1575 while ( start_pfn < end_pfn )
1577 pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
1578 if ( !pg )
1579 return -ENOMEM;
1580 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1581 pte += start_pfn & LEVEL_MASK;
1582 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1583 dma_set_pte_prot(*pte, prot);
1584 iommu_flush_cache_entry(iommu, pte);
1585 unmap_domain_page(pte);
1586 start_pfn++;
1587 index++;
1590 for_each_drhd_unit ( drhd )
1592 iommu = drhd->iommu;
1593 if ( cap_caching_mode(iommu->cap) )
1594 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1595 iova, index, 0);
1596 else if ( cap_rwbf(iommu->cap) )
1597 iommu_flush_write_buffer(iommu);
1600 return 0;
1603 int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
1605 dma_pte_clear_range(domain, addr, addr + size);
1607 return 0;
1610 void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry)
1612 struct acpi_drhd_unit *drhd;
1613 struct iommu *iommu = NULL;
1614 struct dma_pte *pte = (struct dma_pte *) p2m_entry;
1616 for_each_drhd_unit ( drhd )
1618 iommu = drhd->iommu;
1619 if ( cap_caching_mode(iommu->cap) )
1620 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1621 gfn << PAGE_SHIFT_4K, 1, 0);
1622 else if ( cap_rwbf(iommu->cap) )
1623 iommu_flush_write_buffer(iommu);
1626 iommu_flush_cache_entry(iommu, pte);
1629 static int iommu_prepare_rmrr_dev(
1630 struct domain *d,
1631 struct acpi_rmrr_unit *rmrr,
1632 struct pci_dev *pdev)
1634 struct acpi_drhd_unit *drhd;
1635 unsigned long size;
1636 int ret;
1638 /* page table init */
1639 size = rmrr->end_address - rmrr->base_address + 1;
1640 ret = iommu_page_mapping(d, rmrr->base_address,
1641 (void *)rmrr->base_address, size,
1642 DMA_PTE_READ|DMA_PTE_WRITE);
1643 if ( ret )
1644 return ret;
1646 if ( domain_context_mapped(pdev) == 0 )
1648 drhd = acpi_find_matched_drhd_unit(pdev);
1649 ret = domain_context_mapping(d, drhd->iommu, pdev);
1650 if ( !ret )
1651 return 0;
1654 return ret;
1657 void __init setup_dom0_devices(void)
1659 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1660 struct acpi_drhd_unit *drhd;
1661 struct pci_dev *pdev;
1662 int bus, dev, func, ret;
1663 u32 l;
1665 #ifdef DEBUG_VTD_CONTEXT_ENTRY
1666 for ( bus = 0; bus < 256; bus++ )
1668 for ( dev = 0; dev < 32; dev++ )
1670 for ( func = 0; func < 8; func++ )
1672 struct context_entry *context;
1673 struct pci_dev device;
1675 device.bus = bus;
1676 device.devfn = PCI_DEVFN(dev, func);
1677 drhd = acpi_find_matched_drhd_unit(&device);
1678 context = device_to_context_entry(drhd->iommu,
1679 bus, PCI_DEVFN(dev, func));
1680 if ( (context->lo != 0) || (context->hi != 0) )
1681 dprintk(XENLOG_INFO VTDPREFIX,
1682 "setup_dom0_devices-%x:%x:%x- context not 0\n",
1683 bus, dev, func);
1687 #endif
1689 for ( bus = 0; bus < 256; bus++ )
1691 for ( dev = 0; dev < 32; dev++ )
1693 for ( func = 0; func < 8; func++ )
1695 l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
1696 /* some broken boards return 0 or ~0 if a slot is empty: */
1697 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1698 (l == 0x0000ffff) || (l == 0xffff0000) )
1699 continue;
1700 pdev = xmalloc(struct pci_dev);
1701 pdev->bus = bus;
1702 pdev->devfn = PCI_DEVFN(dev, func);
1703 list_add_tail(&pdev->list, &hd->pdev_list);
1705 drhd = acpi_find_matched_drhd_unit(pdev);
1706 ret = domain_context_mapping(dom0, drhd->iommu, pdev);
1707 if ( ret != 0 )
1708 gdprintk(XENLOG_ERR VTDPREFIX,
1709 "domain_context_mapping failed\n");
1714 for_each_pdev ( dom0, pdev )
1715 dprintk(XENLOG_INFO VTDPREFIX,
1716 "setup_dom0_devices: bdf = %x:%x:%x\n",
1717 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1720 void clear_fault_bit(struct iommu *iommu)
1722 u64 val;
1724 val = dmar_readq(
1725 iommu->reg,
1726 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1727 dmar_writeq(
1728 iommu->reg,
1729 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1730 val);
1731 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
1734 static int init_vtd_hw(void)
1736 struct acpi_drhd_unit *drhd;
1737 struct iommu *iommu;
1738 int ret;
1740 for_each_drhd_unit ( drhd )
1742 iommu = drhd->iommu;
1743 ret = iommu_set_root_entry(iommu);
1744 if ( ret )
1746 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1747 return -EIO;
1751 return 0;
1754 static int enable_vtd_translation(void)
1756 struct acpi_drhd_unit *drhd;
1757 struct iommu *iommu;
1758 int vector = 0;
1760 for_each_drhd_unit ( drhd )
1762 iommu = drhd->iommu;
1763 vector = iommu_set_interrupt(iommu);
1764 dma_msi_data_init(iommu, vector);
1765 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1766 iommu->vector = vector;
1767 clear_fault_bit(iommu);
1768 if ( iommu_enable_translation(iommu) )
1769 return -EIO;
1772 return 0;
1775 static void setup_dom0_rmrr(void)
1777 struct acpi_rmrr_unit *rmrr;
1778 struct pci_dev *pdev;
1779 int ret;
1781 for_each_rmrr_device ( rmrr, pdev )
1782 ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
1783 if ( ret )
1784 gdprintk(XENLOG_ERR VTDPREFIX,
1785 "IOMMU: mapping reserved region failed\n");
1786 end_for_each_rmrr_device ( rmrr, pdev )
1789 int iommu_setup(void)
1791 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1792 struct acpi_drhd_unit *drhd;
1793 struct iommu *iommu;
1794 unsigned long i;
1796 if ( !vtd_enabled )
1797 return 0;
1799 spin_lock_init(&domid_bitmap_lock);
1800 INIT_LIST_HEAD(&hd->pdev_list);
1802 /* start from scratch */
1803 iommu_flush_all();
1805 /* setup clflush size */
1806 x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
1808 /* Allocate IO page directory page for the domain. */
1809 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1810 iommu = drhd->iommu;
1812 /* Allocate domain id bitmap, and set bit 0 as reserved */
1813 domid_bitmap_size = cap_ndoms(iommu->cap);
1814 domid_bitmap = xmalloc_bytes(domid_bitmap_size / 8);
1815 if ( domid_bitmap == NULL )
1816 goto error;
1817 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1818 set_bit(0, domid_bitmap);
1820 /* setup 1:1 page table for dom0 */
1821 for ( i = 0; i < max_page; i++ )
1822 iommu_map_page(dom0, i, i);
1824 if ( init_vtd_hw() )
1825 goto error;
1826 setup_dom0_devices();
1827 setup_dom0_rmrr();
1828 if ( enable_vtd_translation() )
1829 goto error;
1831 return 0;
1833 error:
1834 printk("iommu_setup() failed\n");
1835 for_each_drhd_unit ( drhd )
1837 iommu = drhd->iommu;
1838 free_iommu(iommu);
1840 return -EIO;
1843 /*
1844 * If the device isn't owned by dom0, it means it already
1845 * has been assigned to other domain, or it's not exist.
1846 */
1847 int device_assigned(u8 bus, u8 devfn)
1849 struct pci_dev *pdev;
1851 for_each_pdev( dom0, pdev )
1852 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1853 return 0;
1855 return 1;
1858 int assign_device(struct domain *d, u8 bus, u8 devfn)
1860 struct acpi_rmrr_unit *rmrr;
1861 struct pci_dev *pdev;
1862 int ret = 0;
1864 if ( list_empty(&acpi_drhd_units) )
1865 return ret;
1867 gdprintk(XENLOG_INFO VTDPREFIX,
1868 "assign_device: bus = %x dev = %x func = %x\n",
1869 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1871 reassign_device_ownership(dom0, d, bus, devfn);
1873 /* setup rmrr identify mapping just once per domain */
1874 for_each_rmrr_device(rmrr, pdev)
1875 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1876 if ( ret )
1878 gdprintk(XENLOG_ERR VTDPREFIX,
1879 "IOMMU: mapping reserved region failed\n");
1880 return ret;
1882 end_for_each_rmrr_device(rmrr, pdev)
1884 return ret;
1887 void iommu_set_pgd(struct domain *d)
1889 struct hvm_iommu *hd = domain_hvm_iommu(d);
1890 unsigned long p2m_table;
1892 if ( hd->pgd )
1894 gdprintk(XENLOG_INFO VTDPREFIX,
1895 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
1896 hd->pgd = NULL;
1898 p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
1900 #if CONFIG_PAGING_LEVELS == 3
1901 if ( !hd->pgd )
1903 int level = agaw_to_level(hd->agaw);
1904 struct dma_pte *pmd = NULL;
1905 struct dma_pte *pgd = NULL;
1906 struct dma_pte *pte = NULL;
1907 l3_pgentry_t *l3e;
1908 unsigned long flags;
1909 int i;
1911 spin_lock_irqsave(&hd->mapping_lock, flags);
1912 if ( !hd->pgd )
1914 pgd = (struct dma_pte *)alloc_xenheap_page();
1915 if ( !pgd )
1917 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1918 gdprintk(XENLOG_ERR VTDPREFIX,
1919 "Allocate pgd memory failed!\n");
1920 return;
1922 memset(pgd, 0, PAGE_SIZE);
1923 hd->pgd = pgd;
1926 l3e = map_domain_page(p2m_table);
1927 switch ( level )
1929 case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
1930 /* We only support 8 entries for the PAE L3 p2m table */
1931 for ( i = 0; i < 8 ; i++ )
1933 /* Don't create new L2 entry, use ones from p2m table */
1934 pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
1936 break;
1938 case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
1939 /* We allocate one more page for the top vtd page table. */
1940 pmd = (struct dma_pte *)alloc_xenheap_page();
1941 if ( !pmd )
1943 unmap_domain_page(l3e);
1944 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1945 gdprintk(XENLOG_ERR VTDPREFIX,
1946 "Allocate pmd memory failed!\n");
1947 return;
1949 memset((u8*)pmd, 0, PAGE_SIZE);
1950 pte = &pgd[0];
1951 dma_set_pte_addr(*pte, virt_to_maddr(pmd));
1952 dma_set_pte_readable(*pte);
1953 dma_set_pte_writable(*pte);
1955 for ( i = 0; i < 8; i++ )
1957 /* Don't create new L2 entry, use ones from p2m table */
1958 pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
1960 break;
1961 default:
1962 gdprintk(XENLOG_ERR VTDPREFIX,
1963 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
1964 break;
1966 unmap_domain_page(l3e);
1967 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1969 #elif CONFIG_PAGING_LEVELS == 4
1970 if ( !hd->pgd )
1972 int level = agaw_to_level(hd->agaw);
1973 l3_pgentry_t *l3e;
1974 mfn_t pgd_mfn;
1976 switch ( level )
1978 case VTD_PAGE_TABLE_LEVEL_3:
1979 l3e = map_domain_page(p2m_table);
1980 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
1982 gdprintk(XENLOG_ERR VTDPREFIX,
1983 "iommu_set_pgd: second level wasn't there\n");
1984 unmap_domain_page(l3e);
1985 return;
1987 pgd_mfn = _mfn(l3e_get_pfn(*l3e));
1988 unmap_domain_page(l3e);
1989 hd->pgd = maddr_to_virt(pagetable_get_paddr(
1990 pagetable_from_mfn(pgd_mfn)));
1991 break;
1993 case VTD_PAGE_TABLE_LEVEL_4:
1994 pgd_mfn = _mfn(p2m_table);
1995 hd->pgd = maddr_to_virt(pagetable_get_paddr(
1996 pagetable_from_mfn(pgd_mfn)));
1997 break;
1998 default:
1999 gdprintk(XENLOG_ERR VTDPREFIX,
2000 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2001 break;
2004 #endif
2005 gdprintk(XENLOG_INFO VTDPREFIX,
2006 "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
2010 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
2011 int iommu_suspend(void)
2013 struct acpi_drhd_unit *drhd;
2014 struct iommu *iommu;
2015 int i = 0;
2017 iommu_flush_all();
2019 for_each_drhd_unit ( drhd )
2021 iommu = drhd->iommu;
2022 iommu_state[DMAR_RTADDR_REG * i] =
2023 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
2024 iommu_state[DMAR_FECTL_REG * i] =
2025 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
2026 iommu_state[DMAR_FEDATA_REG * i] =
2027 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
2028 iommu_state[DMAR_FEADDR_REG * i] =
2029 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
2030 iommu_state[DMAR_FEUADDR_REG * i] =
2031 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
2032 iommu_state[DMAR_PLMBASE_REG * i] =
2033 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
2034 iommu_state[DMAR_PLMLIMIT_REG * i] =
2035 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
2036 iommu_state[DMAR_PHMBASE_REG * i] =
2037 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
2038 iommu_state[DMAR_PHMLIMIT_REG * i] =
2039 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
2040 i++;
2043 return 0;
2046 int iommu_resume(void)
2048 struct acpi_drhd_unit *drhd;
2049 struct iommu *iommu;
2050 int i = 0;
2052 iommu_flush_all();
2054 init_vtd_hw();
2055 for_each_drhd_unit ( drhd )
2057 iommu = drhd->iommu;
2058 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
2059 (u64) iommu_state[DMAR_RTADDR_REG * i]);
2060 dmar_writel(iommu->reg, DMAR_FECTL_REG,
2061 (u32) iommu_state[DMAR_FECTL_REG * i]);
2062 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
2063 (u32) iommu_state[DMAR_FEDATA_REG * i]);
2064 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
2065 (u32) iommu_state[DMAR_FEADDR_REG * i]);
2066 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
2067 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
2068 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
2069 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
2070 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
2071 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
2072 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
2073 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
2074 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
2075 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
2077 if ( iommu_enable_translation(iommu) )
2078 return -EIO;
2079 i++;
2081 return 0;
2084 /*
2085 * Local variables:
2086 * mode: C
2087 * c-set-style: "BSD"
2088 * c-basic-offset: 4
2089 * tab-width: 4
2090 * indent-tabs-mode: nil
2091 * End:
2092 */