/root/src/xen/xen/drivers/passthrough/vtd/iommu.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2006, Intel Corporation. |
3 | | * |
4 | | * This program is free software; you can redistribute it and/or modify it |
5 | | * under the terms and conditions of the GNU General Public License, |
6 | | * version 2, as published by the Free Software Foundation. |
7 | | * |
8 | | * This program is distributed in the hope it will be useful, but WITHOUT |
9 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
10 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
11 | | * more details. |
12 | | * |
13 | | * You should have received a copy of the GNU General Public License along with |
14 | | * this program; If not, see <http://www.gnu.org/licenses/>. |
15 | | * |
16 | | * Copyright (C) Ashok Raj <ashok.raj@intel.com> |
17 | | * Copyright (C) Shaohua Li <shaohua.li@intel.com> |
18 | | * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen |
19 | | */ |
20 | | |
21 | | #include <xen/irq.h> |
22 | | #include <xen/sched.h> |
23 | | #include <xen/xmalloc.h> |
24 | | #include <xen/domain_page.h> |
25 | | #include <xen/iocap.h> |
26 | | #include <xen/iommu.h> |
27 | | #include <xen/numa.h> |
28 | | #include <xen/softirq.h> |
29 | | #include <xen/time.h> |
30 | | #include <xen/pci.h> |
31 | | #include <xen/pci_regs.h> |
32 | | #include <xen/keyhandler.h> |
33 | | #include <asm/msi.h> |
34 | | #include <asm/irq.h> |
35 | | #include <asm/hvm/vmx/vmx.h> |
36 | | #include <asm/p2m.h> |
37 | | #include <mach_apic.h> |
38 | | #include "iommu.h" |
39 | | #include "dmar.h" |
40 | | #include "extern.h" |
41 | | #include "vtd.h" |
42 | | #include "../ats.h" |
43 | | |
44 | | struct mapped_rmrr { |
45 | | struct list_head list; |
46 | | u64 base, end; |
47 | | unsigned int count; |
48 | | }; |
49 | | |
50 | | /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */ |
51 | | bool __read_mostly untrusted_msi; |
52 | | |
53 | | int nr_iommus; |
54 | | |
55 | | static struct tasklet vtd_fault_tasklet; |
56 | | |
57 | | static int setup_hwdom_device(u8 devfn, struct pci_dev *); |
58 | | static void setup_hwdom_rmrr(struct domain *d); |
59 | | |
60 | | static int domain_iommu_domid(struct domain *d, |
61 | | struct iommu *iommu) |
62 | 4.56M | { |
63 | 4.56M | unsigned long nr_dom, i; |
64 | 4.56M | |
65 | 4.56M | nr_dom = cap_ndoms(iommu->cap); |
66 | 4.56M | i = find_first_bit(iommu->domid_bitmap, nr_dom); |
67 | 4.56M | while ( i < nr_dom ) |
68 | 4.56M | { |
69 | 4.56M | if ( iommu->domid_map[i] == d->domain_id ) |
70 | 4.56M | return i; |
71 | 4.56M | |
72 | 0 | i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1); |
73 | 0 | } |
74 | 4.56M | |
75 | 0 | dprintk(XENLOG_ERR VTDPREFIX, |
76 | 0 | "Cannot get valid iommu domid: domid=%d iommu->index=%d\n", |
77 | 0 | d->domain_id, iommu->index); |
78 | 0 | return -1; |
79 | 4.56M | } |
80 | | |
81 | 57 | #define DID_FIELD_WIDTH 16 |
82 | 57 | #define DID_HIGH_OFFSET 8 |
83 | | static int context_set_domain_id(struct context_entry *context, |
84 | | struct domain *d, |
85 | | struct iommu *iommu) |
86 | 57 | { |
87 | 57 | unsigned long nr_dom, i; |
88 | 57 | int found = 0; |
89 | 57 | |
90 | 57 | ASSERT(spin_is_locked(&iommu->lock)); |
91 | 57 | |
92 | 57 | nr_dom = cap_ndoms(iommu->cap); |
93 | 57 | i = find_first_bit(iommu->domid_bitmap, nr_dom); |
94 | 57 | while ( i < nr_dom ) |
95 | 56 | { |
96 | 56 | if ( iommu->domid_map[i] == d->domain_id ) |
97 | 56 | { |
98 | 56 | found = 1; |
99 | 56 | break; |
100 | 56 | } |
101 | 0 | i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1); |
102 | 0 | } |
103 | 57 | |
104 | 57 | if ( found == 0 ) |
105 | 1 | { |
106 | 1 | i = find_first_zero_bit(iommu->domid_bitmap, nr_dom); |
107 | 1 | if ( i >= nr_dom ) |
108 | 0 | { |
109 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n"); |
110 | 0 | return -EFAULT; |
111 | 0 | } |
112 | 1 | iommu->domid_map[i] = d->domain_id; |
113 | 1 | } |
114 | 57 | |
115 | 57 | set_bit(i, iommu->domid_bitmap); |
116 | 57 | context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET; |
117 | 57 | return 0; |
118 | 57 | } |
119 | | |
120 | | static int context_get_domain_id(struct context_entry *context, |
121 | | struct iommu *iommu) |
122 | 0 | { |
123 | 0 | unsigned long dom_index, nr_dom; |
124 | 0 | int domid = -1; |
125 | 0 |
|
126 | 0 | if (iommu && context) |
127 | 0 | { |
128 | 0 | nr_dom = cap_ndoms(iommu->cap); |
129 | 0 |
|
130 | 0 | dom_index = context_domain_id(*context); |
131 | 0 |
|
132 | 0 | if ( dom_index < nr_dom && iommu->domid_map ) |
133 | 0 | domid = iommu->domid_map[dom_index]; |
134 | 0 | else |
135 | 0 | dprintk(XENLOG_DEBUG VTDPREFIX, |
136 | 0 | "dom_index %lu exceeds nr_dom %lu or iommu has no domid_map\n", |
137 | 0 | dom_index, nr_dom); |
138 | 0 | } |
139 | 0 | return domid; |
140 | 0 | } |
141 | | |
142 | | static struct intel_iommu *__init alloc_intel_iommu(void) |
143 | 1 | { |
144 | 1 | struct intel_iommu *intel; |
145 | 1 | |
146 | 1 | intel = xzalloc(struct intel_iommu); |
147 | 1 | if ( intel == NULL ) |
148 | 0 | return NULL; |
149 | 1 | |
150 | 1 | spin_lock_init(&intel->ir_ctrl.iremap_lock); |
151 | 1 | |
152 | 1 | return intel; |
153 | 1 | } |
154 | | |
155 | | static void __init free_intel_iommu(struct intel_iommu *intel) |
156 | 0 | { |
157 | 0 | xfree(intel); |
158 | 0 | } |
159 | | |
160 | | static int iommus_incoherent; |
161 | | static void __iommu_flush_cache(void *addr, unsigned int size) |
162 | 4.57M | { |
163 | 4.57M | int i; |
164 | 4.57M | static unsigned int clflush_size = 0; |
165 | 4.57M | |
166 | 4.57M | if ( !iommus_incoherent ) |
167 | 1 | return; |
168 | 4.57M | |
169 | 4.57M | if ( clflush_size == 0 ) |
170 | 1 | clflush_size = get_cache_line_size(); |
171 | 4.57M | |
172 | 9.68M | for ( i = 0; i < size; i += clflush_size ) |
173 | 5.10M | cacheline_flush((char *)addr + i); |
174 | 4.57M | } |
175 | | |
176 | | void iommu_flush_cache_entry(void *addr, unsigned int size) |
177 | 4.56M | { |
178 | 4.56M | __iommu_flush_cache(addr, size); |
179 | 4.56M | } |
180 | | |
181 | | void iommu_flush_cache_page(void *addr, unsigned long npages) |
182 | 8.34k | { |
183 | 8.34k | __iommu_flush_cache(addr, PAGE_SIZE * npages); |
184 | 8.34k | } |
185 | | |
186 | | /* Allocate page table, return its machine address */ |
187 | | u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages) |
188 | 8.09k | { |
189 | 8.09k | struct acpi_rhsa_unit *rhsa; |
190 | 8.09k | struct page_info *pg, *cur_pg; |
191 | 8.09k | u64 *vaddr; |
192 | 8.09k | nodeid_t node = NUMA_NO_NODE; |
193 | 8.09k | unsigned int i; |
194 | 8.09k | |
195 | 8.09k | rhsa = drhd_to_rhsa(drhd); |
196 | 8.09k | if ( rhsa ) |
197 | 8.09k | node = pxm_to_node(rhsa->proximity_domain); |
198 | 8.09k | |
199 | 8.09k | pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), |
200 | 8.09k | (node == NUMA_NO_NODE) ? 0 : MEMF_node(node)); |
201 | 8.09k | if ( !pg ) |
202 | 0 | return 0; |
203 | 8.09k | |
204 | 8.09k | cur_pg = pg; |
205 | 16.4k | for ( i = 0; i < npages; i++ ) |
206 | 8.34k | { |
207 | 8.34k | vaddr = __map_domain_page(cur_pg); |
208 | 8.34k | memset(vaddr, 0, PAGE_SIZE); |
209 | 8.34k | |
210 | 8.34k | iommu_flush_cache_page(vaddr, 1); |
211 | 8.34k | unmap_domain_page(vaddr); |
212 | 8.34k | cur_pg++; |
213 | 8.34k | } |
214 | 8.09k | |
215 | 8.09k | return page_to_maddr(pg); |
216 | 8.09k | } |
217 | | |
218 | | void free_pgtable_maddr(u64 maddr) |
219 | 0 | { |
220 | 0 | if ( maddr != 0 ) |
221 | 0 | free_domheap_page(maddr_to_page(maddr)); |
222 | 0 | } |
223 | | |
224 | | /* context entry handling */ |
225 | | static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) |
226 | 57 | { |
227 | 57 | struct acpi_drhd_unit *drhd; |
228 | 57 | struct root_entry *root, *root_entries; |
229 | 57 | u64 maddr; |
230 | 57 | |
231 | 57 | ASSERT(spin_is_locked(&iommu->lock)); |
232 | 57 | root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr); |
233 | 57 | root = &root_entries[bus]; |
234 | 57 | if ( !root_present(*root) ) |
235 | 6 | { |
236 | 6 | drhd = iommu_to_drhd(iommu); |
237 | 6 | maddr = alloc_pgtable_maddr(drhd, 1); |
238 | 6 | if ( maddr == 0 ) |
239 | 0 | { |
240 | 0 | unmap_vtd_domain_page(root_entries); |
241 | 0 | return 0; |
242 | 0 | } |
243 | 6 | set_root_value(*root, maddr); |
244 | 6 | set_root_present(*root); |
245 | 6 | iommu_flush_cache_entry(root, sizeof(struct root_entry)); |
246 | 6 | } |
247 | 57 | maddr = (u64) get_context_addr(*root); |
248 | 57 | unmap_vtd_domain_page(root_entries); |
249 | 57 | return maddr; |
250 | 57 | } |
251 | | |
252 | | static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) |
253 | 4.56M | { |
254 | 4.56M | struct acpi_drhd_unit *drhd; |
255 | 4.56M | struct pci_dev *pdev; |
256 | 4.56M | struct domain_iommu *hd = dom_iommu(domain); |
257 | 4.56M | int addr_width = agaw_to_width(hd->arch.agaw); |
258 | 4.56M | struct dma_pte *parent, *pte = NULL; |
259 | 4.56M | int level = agaw_to_level(hd->arch.agaw); |
260 | 4.56M | int offset; |
261 | 4.56M | u64 pte_maddr = 0; |
262 | 4.56M | |
263 | 4.56M | addr &= (((u64)1) << addr_width) - 1; |
264 | 4.56M | ASSERT(spin_is_locked(&hd->arch.mapping_lock)); |
265 | 4.56M | if ( hd->arch.pgd_maddr == 0 ) |
266 | 1 | { |
267 | 1 | /* |
268 | 1 | * just get any passthrough device in the domainr - assume user |
269 | 1 | * assigns only devices from same node to a given guest. |
270 | 1 | */ |
271 | 1 | pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); |
272 | 1 | drhd = acpi_find_matched_drhd_unit(pdev); |
273 | 1 | if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) ) |
274 | 0 | goto out; |
275 | 1 | } |
276 | 4.56M | |
277 | 4.56M | parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.pgd_maddr); |
278 | 13.6M | while ( level > 1 ) |
279 | 13.6M | { |
280 | 13.6M | offset = address_level_offset(addr, level); |
281 | 13.6M | pte = &parent[offset]; |
282 | 13.6M | |
283 | 13.6M | pte_maddr = dma_pte_addr(*pte); |
284 | 13.6M | if ( !pte_maddr ) |
285 | 8.08k | { |
286 | 8.08k | if ( !alloc ) |
287 | 0 | break; |
288 | 8.08k | |
289 | 8.08k | pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); |
290 | 8.08k | drhd = acpi_find_matched_drhd_unit(pdev); |
291 | 8.08k | pte_maddr = alloc_pgtable_maddr(drhd, 1); |
292 | 8.08k | if ( !pte_maddr ) |
293 | 0 | break; |
294 | 8.08k | |
295 | 8.08k | dma_set_pte_addr(*pte, pte_maddr); |
296 | 8.08k | |
297 | 8.08k | /* |
298 | 8.08k | * high level table always sets r/w, last level |
299 | 8.08k | * page table control read/write |
300 | 8.08k | */ |
301 | 8.08k | dma_set_pte_readable(*pte); |
302 | 8.08k | dma_set_pte_writable(*pte); |
303 | 8.08k | iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); |
304 | 8.08k | } |
305 | 13.6M | |
306 | 13.6M | if ( level == 2 ) |
307 | 4.56M | break; |
308 | 13.6M | |
309 | 9.12M | unmap_vtd_domain_page(parent); |
310 | 9.12M | parent = map_vtd_domain_page(pte_maddr); |
311 | 9.12M | level--; |
312 | 9.12M | } |
313 | 4.56M | |
314 | 4.56M | unmap_vtd_domain_page(parent); |
315 | 4.56M | out: |
316 | 4.56M | return pte_maddr; |
317 | 4.56M | } |
318 | | |
319 | | static void iommu_flush_write_buffer(struct iommu *iommu) |
320 | 4.34M | { |
321 | 4.34M | u32 val; |
322 | 4.34M | unsigned long flags; |
323 | 4.34M | |
324 | 4.34M | if ( !rwbf_quirk && !cap_rwbf(iommu->cap) ) |
325 | 4.34M | return; |
326 | 4.34M | |
327 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
328 | 0 | val = dmar_readl(iommu->reg, DMAR_GSTS_REG); |
329 | 0 | dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF); |
330 | 0 |
|
331 | 0 | /* Make sure hardware complete it */ |
332 | 0 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, |
333 | 0 | !(val & DMA_GSTS_WBFS), val); |
334 | 0 |
|
335 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
336 | 0 | } |
337 | | |
338 | | /* return value determine if we need a write buffer flush */ |
339 | | static int __must_check flush_context_reg(void *_iommu, u16 did, u16 source_id, |
340 | | u8 function_mask, u64 type, |
341 | | bool_t flush_non_present_entry) |
342 | 0 | { |
343 | 0 | struct iommu *iommu = (struct iommu *) _iommu; |
344 | 0 | u64 val = 0; |
345 | 0 | unsigned long flags; |
346 | 0 |
|
347 | 0 | /* |
348 | 0 | * In the non-present entry flush case, if hardware doesn't cache |
349 | 0 | * non-present entry we do nothing and if hardware cache non-present |
350 | 0 | * entry, we flush entries of domain 0 (the domain id is used to cache |
351 | 0 | * any non-present entries) |
352 | 0 | */ |
353 | 0 | if ( flush_non_present_entry ) |
354 | 0 | { |
355 | 0 | if ( !cap_caching_mode(iommu->cap) ) |
356 | 0 | return 1; |
357 | 0 | else |
358 | 0 | did = 0; |
359 | 0 | } |
360 | 0 |
|
361 | 0 | /* use register invalidation */ |
362 | 0 | switch ( type ) |
363 | 0 | { |
364 | 0 | case DMA_CCMD_GLOBAL_INVL: |
365 | 0 | val = DMA_CCMD_GLOBAL_INVL; |
366 | 0 | break; |
367 | 0 | case DMA_CCMD_DOMAIN_INVL: |
368 | 0 | val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); |
369 | 0 | break; |
370 | 0 | case DMA_CCMD_DEVICE_INVL: |
371 | 0 | val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) |
372 | 0 | |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask); |
373 | 0 | break; |
374 | 0 | default: |
375 | 0 | BUG(); |
376 | 0 | } |
377 | 0 | val |= DMA_CCMD_ICC; |
378 | 0 |
|
379 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
380 | 0 | dmar_writeq(iommu->reg, DMAR_CCMD_REG, val); |
381 | 0 |
|
382 | 0 | /* Make sure hardware complete it */ |
383 | 0 | IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq, |
384 | 0 | !(val & DMA_CCMD_ICC), val); |
385 | 0 |
|
386 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
387 | 0 | /* flush context entry will implicitly flush write buffer */ |
388 | 0 | return 0; |
389 | 0 | } |
390 | | |
391 | | static int __must_check iommu_flush_context_global(struct iommu *iommu, |
392 | | bool_t flush_non_present_entry) |
393 | 2 | { |
394 | 2 | struct iommu_flush *flush = iommu_get_flush(iommu); |
395 | 2 | return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, |
396 | 2 | flush_non_present_entry); |
397 | 2 | } |
398 | | |
399 | | static int __must_check iommu_flush_context_device(struct iommu *iommu, |
400 | | u16 did, u16 source_id, |
401 | | u8 function_mask, |
402 | | bool_t flush_non_present_entry) |
403 | 57 | { |
404 | 57 | struct iommu_flush *flush = iommu_get_flush(iommu); |
405 | 57 | return flush->context(iommu, did, source_id, function_mask, |
406 | 57 | DMA_CCMD_DEVICE_INVL, |
407 | 57 | flush_non_present_entry); |
408 | 57 | } |
409 | | |
410 | | /* return value determine if we need a write buffer flush */ |
411 | | static int __must_check flush_iotlb_reg(void *_iommu, u16 did, u64 addr, |
412 | | unsigned int size_order, u64 type, |
413 | | bool_t flush_non_present_entry, |
414 | | bool_t flush_dev_iotlb) |
415 | 0 | { |
416 | 0 | struct iommu *iommu = (struct iommu *) _iommu; |
417 | 0 | int tlb_offset = ecap_iotlb_offset(iommu->ecap); |
418 | 0 | u64 val = 0; |
419 | 0 | unsigned long flags; |
420 | 0 |
|
421 | 0 | /* |
422 | 0 | * In the non-present entry flush case, if hardware doesn't cache |
423 | 0 | * non-present entry we do nothing and if hardware cache non-present |
424 | 0 | * entry, we flush entries of domain 0 (the domain id is used to cache |
425 | 0 | * any non-present entries) |
426 | 0 | */ |
427 | 0 | if ( flush_non_present_entry ) |
428 | 0 | { |
429 | 0 | if ( !cap_caching_mode(iommu->cap) ) |
430 | 0 | return 1; |
431 | 0 | else |
432 | 0 | did = 0; |
433 | 0 | } |
434 | 0 |
|
435 | 0 | /* use register invalidation */ |
436 | 0 | switch ( type ) |
437 | 0 | { |
438 | 0 | case DMA_TLB_GLOBAL_FLUSH: |
439 | 0 | val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; |
440 | 0 | break; |
441 | 0 | case DMA_TLB_DSI_FLUSH: |
442 | 0 | val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); |
443 | 0 | break; |
444 | 0 | case DMA_TLB_PSI_FLUSH: |
445 | 0 | val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); |
446 | 0 | break; |
447 | 0 | default: |
448 | 0 | BUG(); |
449 | 0 | } |
450 | 0 | /* Note: set drain read/write */ |
451 | 0 | if ( cap_read_drain(iommu->cap) ) |
452 | 0 | val |= DMA_TLB_READ_DRAIN; |
453 | 0 | if ( cap_write_drain(iommu->cap) ) |
454 | 0 | val |= DMA_TLB_WRITE_DRAIN; |
455 | 0 |
|
456 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
457 | 0 | /* Note: Only uses first TLB reg currently */ |
458 | 0 | if ( type == DMA_TLB_PSI_FLUSH ) |
459 | 0 | { |
460 | 0 | /* Note: always flush non-leaf currently. */ |
461 | 0 | dmar_writeq(iommu->reg, tlb_offset, size_order | addr); |
462 | 0 | } |
463 | 0 | dmar_writeq(iommu->reg, tlb_offset + 8, val); |
464 | 0 |
|
465 | 0 | /* Make sure hardware complete it */ |
466 | 0 | IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq, |
467 | 0 | !(val & DMA_TLB_IVT), val); |
468 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
469 | 0 |
|
470 | 0 | /* check IOTLB invalidation granularity */ |
471 | 0 | if ( DMA_TLB_IAIG(val) == 0 ) |
472 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n"); |
473 | 0 |
|
474 | 0 | /* flush iotlb entry will implicitly flush write buffer */ |
475 | 0 | return 0; |
476 | 0 | } |
477 | | |
478 | | static int __must_check iommu_flush_iotlb_global(struct iommu *iommu, |
479 | | bool_t flush_non_present_entry, |
480 | | bool_t flush_dev_iotlb) |
481 | 2 | { |
482 | 2 | struct iommu_flush *flush = iommu_get_flush(iommu); |
483 | 2 | int status; |
484 | 2 | |
485 | 2 | /* apply platform specific errata workarounds */ |
486 | 2 | vtd_ops_preamble_quirk(iommu); |
487 | 2 | |
488 | 2 | status = flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, |
489 | 2 | flush_non_present_entry, flush_dev_iotlb); |
490 | 2 | |
491 | 2 | /* undo platform specific errata workarounds */ |
492 | 2 | vtd_ops_postamble_quirk(iommu); |
493 | 2 | |
494 | 2 | return status; |
495 | 2 | } |
496 | | |
497 | | static int __must_check iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did, |
498 | | bool_t flush_non_present_entry, |
499 | | bool_t flush_dev_iotlb) |
500 | 57 | { |
501 | 57 | struct iommu_flush *flush = iommu_get_flush(iommu); |
502 | 57 | int status; |
503 | 57 | |
504 | 57 | /* apply platform specific errata workarounds */ |
505 | 57 | vtd_ops_preamble_quirk(iommu); |
506 | 57 | |
507 | 57 | status = flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, |
508 | 57 | flush_non_present_entry, flush_dev_iotlb); |
509 | 57 | |
510 | 57 | /* undo platform specific errata workarounds */ |
511 | 57 | vtd_ops_postamble_quirk(iommu); |
512 | 57 | |
513 | 57 | return status; |
514 | 57 | } |
515 | | |
516 | | static int __must_check iommu_flush_iotlb_psi(struct iommu *iommu, u16 did, |
517 | | u64 addr, unsigned int order, |
518 | | bool_t flush_non_present_entry, |
519 | | bool_t flush_dev_iotlb) |
520 | 4.56M | { |
521 | 4.56M | struct iommu_flush *flush = iommu_get_flush(iommu); |
522 | 4.56M | int status; |
523 | 4.56M | |
524 | 4.56M | ASSERT(!(addr & (~PAGE_MASK_4K))); |
525 | 4.56M | |
526 | 4.56M | /* Fallback to domain selective flush if no PSI support */ |
527 | 4.56M | if ( !cap_pgsel_inv(iommu->cap) ) |
528 | 0 | return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry, flush_dev_iotlb); |
529 | 4.56M | |
530 | 4.56M | /* Fallback to domain selective flush if size is too big */ |
531 | 4.56M | if ( order > cap_max_amask_val(iommu->cap) ) |
532 | 0 | return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry, flush_dev_iotlb); |
533 | 4.56M | |
534 | 4.56M | addr >>= PAGE_SHIFT_4K + order; |
535 | 4.56M | addr <<= PAGE_SHIFT_4K + order; |
536 | 4.56M | |
537 | 4.56M | /* apply platform specific errata workarounds */ |
538 | 4.56M | vtd_ops_preamble_quirk(iommu); |
539 | 4.56M | |
540 | 4.56M | status = flush->iotlb(iommu, did, addr, order, DMA_TLB_PSI_FLUSH, |
541 | 4.56M | flush_non_present_entry, flush_dev_iotlb); |
542 | 4.56M | |
543 | 4.56M | /* undo platform specific errata workarounds */ |
544 | 4.56M | vtd_ops_postamble_quirk(iommu); |
545 | 4.56M | |
546 | 4.56M | return status; |
547 | 4.56M | } |
548 | | |
549 | | static int __must_check iommu_flush_all(void) |
550 | 2 | { |
551 | 2 | struct acpi_drhd_unit *drhd; |
552 | 2 | struct iommu *iommu; |
553 | 2 | bool_t flush_dev_iotlb; |
554 | 2 | int rc = 0; |
555 | 2 | |
556 | 2 | flush_all_cache(); |
557 | 2 | for_each_drhd_unit ( drhd ) |
558 | 2 | { |
559 | 2 | int context_rc, iotlb_rc; |
560 | 2 | |
561 | 2 | iommu = drhd->iommu; |
562 | 2 | context_rc = iommu_flush_context_global(iommu, 0); |
563 | 2 | flush_dev_iotlb = !!find_ats_dev_drhd(iommu); |
564 | 2 | iotlb_rc = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb); |
565 | 2 | |
566 | 2 | /* |
567 | 2 | * The current logic for returns: |
568 | 2 | * - positive invoke iommu_flush_write_buffer to flush cache. |
569 | 2 | * - zero on success. |
570 | 2 | * - negative on failure. Continue to flush IOMMU IOTLB on a |
571 | 2 | * best effort basis. |
572 | 2 | */ |
573 | 2 | if ( context_rc > 0 || iotlb_rc > 0 ) |
574 | 0 | iommu_flush_write_buffer(iommu); |
575 | 2 | if ( rc >= 0 ) |
576 | 2 | rc = context_rc; |
577 | 2 | if ( rc >= 0 ) |
578 | 2 | rc = iotlb_rc; |
579 | 2 | } |
580 | 2 | |
581 | 2 | if ( rc > 0 ) |
582 | 0 | rc = 0; |
583 | 2 | |
584 | 2 | return rc; |
585 | 2 | } |
586 | | |
587 | | static int __must_check iommu_flush_iotlb(struct domain *d, |
588 | | unsigned long gfn, |
589 | | bool_t dma_old_pte_present, |
590 | | unsigned int page_count) |
591 | 4.56M | { |
592 | 4.56M | struct domain_iommu *hd = dom_iommu(d); |
593 | 4.56M | struct acpi_drhd_unit *drhd; |
594 | 4.56M | struct iommu *iommu; |
595 | 4.56M | bool_t flush_dev_iotlb; |
596 | 4.56M | int iommu_domid; |
597 | 4.56M | int rc = 0; |
598 | 4.56M | |
599 | 4.56M | /* |
600 | 4.56M | * No need pcideves_lock here because we have flush |
601 | 4.56M | * when assign/deassign device |
602 | 4.56M | */ |
603 | 4.56M | for_each_drhd_unit ( drhd ) |
604 | 4.56M | { |
605 | 4.56M | iommu = drhd->iommu; |
606 | 4.56M | |
607 | 4.56M | if ( !test_bit(iommu->index, &hd->arch.iommu_bitmap) ) |
608 | 0 | continue; |
609 | 4.56M | |
610 | 4.56M | flush_dev_iotlb = !!find_ats_dev_drhd(iommu); |
611 | 4.56M | iommu_domid= domain_iommu_domid(d, iommu); |
612 | 4.56M | if ( iommu_domid == -1 ) |
613 | 0 | continue; |
614 | 4.56M | |
615 | 4.56M | if ( page_count != 1 || gfn == gfn_x(INVALID_GFN) ) |
616 | 0 | rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, |
617 | 0 | 0, flush_dev_iotlb); |
618 | 4.56M | else |
619 | 4.56M | rc = iommu_flush_iotlb_psi(iommu, iommu_domid, |
620 | 4.56M | (paddr_t)gfn << PAGE_SHIFT_4K, |
621 | 4.56M | PAGE_ORDER_4K, |
622 | 4.56M | !dma_old_pte_present, |
623 | 4.56M | flush_dev_iotlb); |
624 | 4.56M | |
625 | 4.56M | if ( rc > 0 ) |
626 | 4.34M | { |
627 | 4.34M | iommu_flush_write_buffer(iommu); |
628 | 4.34M | rc = 0; |
629 | 4.34M | } |
630 | 4.56M | } |
631 | 4.56M | |
632 | 4.56M | return rc; |
633 | 4.56M | } |
634 | | |
635 | | static int __must_check iommu_flush_iotlb_pages(struct domain *d, |
636 | | unsigned long gfn, |
637 | | unsigned int page_count) |
638 | 218k | { |
639 | 218k | return iommu_flush_iotlb(d, gfn, 1, page_count); |
640 | 218k | } |
641 | | |
642 | | static int __must_check iommu_flush_iotlb_all(struct domain *d) |
643 | 0 | { |
644 | 0 | return iommu_flush_iotlb(d, gfn_x(INVALID_GFN), 0, 0); |
645 | 0 | } |
646 | | |
647 | | /* clear one page's page table */ |
648 | | static int __must_check dma_pte_clear_one(struct domain *domain, u64 addr) |
649 | 218k | { |
650 | 218k | struct domain_iommu *hd = dom_iommu(domain); |
651 | 218k | struct dma_pte *page = NULL, *pte = NULL; |
652 | 218k | u64 pg_maddr; |
653 | 218k | int rc = 0; |
654 | 218k | |
655 | 218k | spin_lock(&hd->arch.mapping_lock); |
656 | 218k | /* get last level pte */ |
657 | 218k | pg_maddr = addr_to_dma_page_maddr(domain, addr, 0); |
658 | 218k | if ( pg_maddr == 0 ) |
659 | 0 | { |
660 | 0 | spin_unlock(&hd->arch.mapping_lock); |
661 | 0 | return 0; |
662 | 0 | } |
663 | 218k | |
664 | 218k | page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); |
665 | 218k | pte = page + address_level_offset(addr, 1); |
666 | 218k | |
667 | 218k | if ( !dma_pte_present(*pte) ) |
668 | 0 | { |
669 | 0 | spin_unlock(&hd->arch.mapping_lock); |
670 | 0 | unmap_vtd_domain_page(page); |
671 | 0 | return 0; |
672 | 0 | } |
673 | 218k | |
674 | 218k | dma_clear_pte(*pte); |
675 | 218k | spin_unlock(&hd->arch.mapping_lock); |
676 | 218k | iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); |
677 | 218k | |
678 | 218k | if ( !this_cpu(iommu_dont_flush_iotlb) ) |
679 | 218k | rc = iommu_flush_iotlb_pages(domain, addr >> PAGE_SHIFT_4K, 1); |
680 | 218k | |
681 | 218k | unmap_vtd_domain_page(page); |
682 | 218k | |
683 | 218k | return rc; |
684 | 218k | } |
685 | | |
686 | | static void iommu_free_pagetable(u64 pt_maddr, int level) |
687 | 0 | { |
688 | 0 | struct page_info *pg = maddr_to_page(pt_maddr); |
689 | 0 |
|
690 | 0 | if ( pt_maddr == 0 ) |
691 | 0 | return; |
692 | 0 |
|
693 | 0 | PFN_ORDER(pg) = level; |
694 | 0 | spin_lock(&iommu_pt_cleanup_lock); |
695 | 0 | page_list_add_tail(pg, &iommu_pt_cleanup_list); |
696 | 0 | spin_unlock(&iommu_pt_cleanup_lock); |
697 | 0 | } |
698 | | |
699 | | static void iommu_free_page_table(struct page_info *pg) |
700 | 0 | { |
701 | 0 | unsigned int i, next_level = PFN_ORDER(pg) - 1; |
702 | 0 | u64 pt_maddr = page_to_maddr(pg); |
703 | 0 | struct dma_pte *pt_vaddr, *pte; |
704 | 0 |
|
705 | 0 | PFN_ORDER(pg) = 0; |
706 | 0 | pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); |
707 | 0 |
|
708 | 0 | for ( i = 0; i < PTE_NUM; i++ ) |
709 | 0 | { |
710 | 0 | pte = &pt_vaddr[i]; |
711 | 0 | if ( !dma_pte_present(*pte) ) |
712 | 0 | continue; |
713 | 0 |
|
714 | 0 | if ( next_level >= 1 ) |
715 | 0 | iommu_free_pagetable(dma_pte_addr(*pte), next_level); |
716 | 0 |
|
717 | 0 | dma_clear_pte(*pte); |
718 | 0 | iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); |
719 | 0 | } |
720 | 0 |
|
721 | 0 | unmap_vtd_domain_page(pt_vaddr); |
722 | 0 | free_pgtable_maddr(pt_maddr); |
723 | 0 | } |
724 | | |
725 | | static int iommu_set_root_entry(struct iommu *iommu) |
726 | 1 | { |
727 | 1 | u32 sts; |
728 | 1 | unsigned long flags; |
729 | 1 | |
730 | 1 | spin_lock_irqsave(&iommu->register_lock, flags); |
731 | 1 | dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr); |
732 | 1 | |
733 | 1 | sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); |
734 | 1 | dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_SRTP); |
735 | 1 | |
736 | 1 | /* Make sure hardware complete it */ |
737 | 1 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, |
738 | 1 | (sts & DMA_GSTS_RTPS), sts); |
739 | 1 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
740 | 1 | |
741 | 1 | return 0; |
742 | 1 | } |
743 | | |
744 | | static void iommu_enable_translation(struct acpi_drhd_unit *drhd) |
745 | 1 | { |
746 | 1 | u32 sts; |
747 | 1 | unsigned long flags; |
748 | 1 | struct iommu *iommu = drhd->iommu; |
749 | 1 | |
750 | 1 | if ( is_igd_drhd(drhd) ) |
751 | 0 | { |
752 | 0 | if ( !iommu_igfx ) |
753 | 0 | { |
754 | 0 | printk(XENLOG_INFO VTDPREFIX |
755 | 0 | "Passed iommu=no-igfx option. Disabling IGD VT-d engine.\n"); |
756 | 0 | return; |
757 | 0 | } |
758 | 0 |
|
759 | 0 | if ( !is_igd_vt_enabled_quirk() ) |
760 | 0 | { |
761 | 0 | if ( force_iommu ) |
762 | 0 | panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose"); |
763 | 0 |
|
764 | 0 | printk(XENLOG_WARNING VTDPREFIX |
765 | 0 | "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); |
766 | 0 | return; |
767 | 0 | } |
768 | 0 | } |
769 | 1 | |
770 | 1 | /* apply platform specific errata workarounds */ |
771 | 1 | vtd_ops_preamble_quirk(iommu); |
772 | 1 | |
773 | 1 | if ( iommu_verbose ) |
774 | 1 | printk(VTDPREFIX "iommu_enable_translation: iommu->reg = %p\n", |
775 | 1 | iommu->reg); |
776 | 1 | spin_lock_irqsave(&iommu->register_lock, flags); |
777 | 1 | sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); |
778 | 1 | dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_TE); |
779 | 1 | |
780 | 1 | /* Make sure hardware complete it */ |
781 | 1 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, |
782 | 1 | (sts & DMA_GSTS_TES), sts); |
783 | 1 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
784 | 1 | |
785 | 1 | /* undo platform specific errata workarounds */ |
786 | 1 | vtd_ops_postamble_quirk(iommu); |
787 | 1 | |
788 | 1 | /* Disable PMRs when VT-d engine takes effect per spec definition */ |
789 | 1 | disable_pmr(iommu); |
790 | 1 | } |
791 | | |
792 | | static void iommu_disable_translation(struct iommu *iommu) |
793 | 0 | { |
794 | 0 | u32 sts; |
795 | 0 | unsigned long flags; |
796 | 0 |
|
797 | 0 | /* apply platform specific errata workarounds */ |
798 | 0 | vtd_ops_preamble_quirk(iommu); |
799 | 0 |
|
800 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
801 | 0 | sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); |
802 | 0 | dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_TE)); |
803 | 0 |
|
804 | 0 | /* Make sure hardware complete it */ |
805 | 0 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, |
806 | 0 | !(sts & DMA_GSTS_TES), sts); |
807 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
808 | 0 |
|
809 | 0 | /* undo platform specific errata workarounds */ |
810 | 0 | vtd_ops_postamble_quirk(iommu); |
811 | 0 | } |
812 | | |
813 | | enum faulttype { |
814 | | DMA_REMAP, |
815 | | INTR_REMAP, |
816 | | UNKNOWN, |
817 | | }; |
818 | | |
819 | | static const char *dma_remap_fault_reasons[] = |
820 | | { |
821 | | "Software", |
822 | | "Present bit in root entry is clear", |
823 | | "Present bit in context entry is clear", |
824 | | "Invalid context entry", |
825 | | "Access beyond MGAW", |
826 | | "PTE Write access is not set", |
827 | | "PTE Read access is not set", |
828 | | "Next page table ptr is invalid", |
829 | | "Root table address invalid", |
830 | | "Context table ptr is invalid", |
831 | | "non-zero reserved fields in RTP", |
832 | | "non-zero reserved fields in CTP", |
833 | | "non-zero reserved fields in PTE", |
834 | | "Blocked a DMA translation request", |
835 | | }; |
836 | | |
837 | | static const char *intr_remap_fault_reasons[] = |
838 | | { |
839 | | "Detected reserved fields in the decoded interrupt-remapped request", |
840 | | "Interrupt index exceeded the interrupt-remapping table size", |
841 | | "Present field in the IRTE entry is clear", |
842 | | "Error accessing interrupt-remapping table pointed by IRTA_REG", |
843 | | "Detected reserved fields in the IRTE entry", |
844 | | "Blocked a compatibility format interrupt request", |
845 | | "Blocked an interrupt request due to source-id verification failure", |
846 | | }; |
847 | | |
848 | | static const char *iommu_get_fault_reason(u8 fault_reason, |
849 | | enum faulttype *fault_type) |
850 | 0 | { |
851 | 0 | if ( fault_reason >= 0x20 && ( fault_reason < 0x20 + |
852 | 0 | ARRAY_SIZE(intr_remap_fault_reasons)) ) |
853 | 0 | { |
854 | 0 | *fault_type = INTR_REMAP; |
855 | 0 | return intr_remap_fault_reasons[fault_reason - 0x20]; |
856 | 0 | } |
857 | 0 | else if ( fault_reason < ARRAY_SIZE(dma_remap_fault_reasons) ) |
858 | 0 | { |
859 | 0 | *fault_type = DMA_REMAP; |
860 | 0 | return dma_remap_fault_reasons[fault_reason]; |
861 | 0 | } |
862 | 0 | else |
863 | 0 | { |
864 | 0 | *fault_type = UNKNOWN; |
865 | 0 | return "Unknown"; |
866 | 0 | } |
867 | 0 | } |
868 | | |
869 | | static int iommu_page_fault_do_one(struct iommu *iommu, int type, |
870 | | u8 fault_reason, u16 source_id, u64 addr) |
871 | 0 | { |
872 | 0 | const char *reason, *kind; |
873 | 0 | enum faulttype fault_type; |
874 | 0 | u16 seg = iommu->intel->drhd->segment; |
875 | 0 |
|
876 | 0 | reason = iommu_get_fault_reason(fault_reason, &fault_type); |
877 | 0 | switch ( fault_type ) |
878 | 0 | { |
879 | 0 | case DMA_REMAP: |
880 | 0 | printk(XENLOG_G_WARNING VTDPREFIX |
881 | 0 | "DMAR:[%s] Request device [%04x:%02x:%02x.%u] " |
882 | 0 | "fault addr %"PRIx64", iommu reg = %p\n", |
883 | 0 | (type ? "DMA Read" : "DMA Write"), |
884 | 0 | seg, PCI_BUS(source_id), PCI_SLOT(source_id), |
885 | 0 | PCI_FUNC(source_id), addr, iommu->reg); |
886 | 0 | kind = "DMAR"; |
887 | 0 | break; |
888 | 0 | case INTR_REMAP: |
889 | 0 | printk(XENLOG_G_WARNING VTDPREFIX |
890 | 0 | "INTR-REMAP: Request device [%04x:%02x:%02x.%u] " |
891 | 0 | "fault index %"PRIx64", iommu reg = %p\n", |
892 | 0 | seg, PCI_BUS(source_id), PCI_SLOT(source_id), |
893 | 0 | PCI_FUNC(source_id), addr >> 48, iommu->reg); |
894 | 0 | kind = "INTR-REMAP"; |
895 | 0 | break; |
896 | 0 | default: |
897 | 0 | printk(XENLOG_G_WARNING VTDPREFIX |
898 | 0 | "UNKNOWN: Request device [%04x:%02x:%02x.%u] " |
899 | 0 | "fault addr %"PRIx64", iommu reg = %p\n", |
900 | 0 | seg, PCI_BUS(source_id), PCI_SLOT(source_id), |
901 | 0 | PCI_FUNC(source_id), addr, iommu->reg); |
902 | 0 | kind = "UNKNOWN"; |
903 | 0 | break; |
904 | 0 | } |
905 | 0 |
|
906 | 0 | printk(XENLOG_G_WARNING VTDPREFIX "%s: reason %02x - %s\n", |
907 | 0 | kind, fault_reason, reason); |
908 | 0 |
|
909 | 0 | if ( iommu_verbose && fault_type == DMA_REMAP ) |
910 | 0 | print_vtd_entries(iommu, PCI_BUS(source_id), PCI_DEVFN2(source_id), |
911 | 0 | addr >> PAGE_SHIFT); |
912 | 0 |
|
913 | 0 | return 0; |
914 | 0 | } |
915 | | |
916 | | static void iommu_fault_status(u32 fault_status) |
917 | 0 | { |
918 | 0 | if ( fault_status & DMA_FSTS_PFO ) |
919 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Fault Overflow\n"); |
920 | 0 | if ( fault_status & DMA_FSTS_PPF ) |
921 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Primary Pending Fault\n"); |
922 | 0 | if ( fault_status & DMA_FSTS_AFO ) |
923 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Fault Overflow\n"); |
924 | 0 | if ( fault_status & DMA_FSTS_APF ) |
925 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Pending Fault\n"); |
926 | 0 | if ( fault_status & DMA_FSTS_IQE ) |
927 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Queue Error\n"); |
928 | 0 | if ( fault_status & DMA_FSTS_ICE ) |
929 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Completion Error\n"); |
930 | 0 | if ( fault_status & DMA_FSTS_ITE ) |
931 | 0 | INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Time-out Error\n"); |
932 | 0 | } |
933 | | |
934 | 1 | #define PRIMARY_FAULT_REG_LEN (16) |
935 | | static void __do_iommu_page_fault(struct iommu *iommu) |
936 | 0 | { |
937 | 0 | int reg, fault_index; |
938 | 0 | u32 fault_status; |
939 | 0 | unsigned long flags; |
940 | 0 |
|
941 | 0 | fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG); |
942 | 0 |
|
943 | 0 | iommu_fault_status(fault_status); |
944 | 0 |
|
945 | 0 | /* FIXME: ignore advanced fault log */ |
946 | 0 | if ( !(fault_status & DMA_FSTS_PPF) ) |
947 | 0 | goto clear_overflow; |
948 | 0 |
|
949 | 0 | fault_index = dma_fsts_fault_record_index(fault_status); |
950 | 0 | reg = cap_fault_reg_offset(iommu->cap); |
951 | 0 | while (1) |
952 | 0 | { |
953 | 0 | u8 fault_reason; |
954 | 0 | u16 source_id; |
955 | 0 | u32 data; |
956 | 0 | u64 guest_addr; |
957 | 0 | int type; |
958 | 0 |
|
959 | 0 | /* highest 32 bits */ |
960 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
961 | 0 | data = dmar_readl(iommu->reg, reg + |
962 | 0 | fault_index * PRIMARY_FAULT_REG_LEN + 12); |
963 | 0 | if ( !(data & DMA_FRCD_F) ) |
964 | 0 | { |
965 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
966 | 0 | break; |
967 | 0 | } |
968 | 0 |
|
969 | 0 | fault_reason = dma_frcd_fault_reason(data); |
970 | 0 | type = dma_frcd_type(data); |
971 | 0 |
|
972 | 0 | data = dmar_readl(iommu->reg, reg + |
973 | 0 | fault_index * PRIMARY_FAULT_REG_LEN + 8); |
974 | 0 | source_id = dma_frcd_source_id(data); |
975 | 0 |
|
976 | 0 | guest_addr = dmar_readq(iommu->reg, reg + |
977 | 0 | fault_index * PRIMARY_FAULT_REG_LEN); |
978 | 0 | guest_addr = dma_frcd_page_addr(guest_addr); |
979 | 0 | /* clear the fault */ |
980 | 0 | dmar_writel(iommu->reg, reg + |
981 | 0 | fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F); |
982 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
983 | 0 |
|
984 | 0 | iommu_page_fault_do_one(iommu, type, fault_reason, |
985 | 0 | source_id, guest_addr); |
986 | 0 |
|
987 | 0 | pci_check_disable_device(iommu->intel->drhd->segment, |
988 | 0 | PCI_BUS(source_id), PCI_DEVFN2(source_id)); |
989 | 0 |
|
990 | 0 | fault_index++; |
991 | 0 | if ( fault_index > cap_num_fault_regs(iommu->cap) ) |
992 | 0 | fault_index = 0; |
993 | 0 | } |
994 | 0 | clear_overflow: |
995 | 0 | /* clear primary fault overflow */ |
996 | 0 | fault_status = readl(iommu->reg + DMAR_FSTS_REG); |
997 | 0 | if ( fault_status & DMA_FSTS_PFO ) |
998 | 0 | { |
999 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
1000 | 0 | dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO); |
1001 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
1002 | 0 | } |
1003 | 0 | } |
1004 | | |
1005 | | static void do_iommu_page_fault(unsigned long data) |
1006 | 0 | { |
1007 | 0 | struct acpi_drhd_unit *drhd; |
1008 | 0 |
|
1009 | 0 | if ( list_empty(&acpi_drhd_units) ) |
1010 | 0 | { |
1011 | 0 | INTEL_IOMMU_DEBUG("no device found, something must be very wrong!\n"); |
1012 | 0 | return; |
1013 | 0 | } |
1014 | 0 |
|
1015 | 0 | /* |
1016 | 0 | * No matter from whom the interrupt came from, check all the |
1017 | 0 | * IOMMUs present in the system. This allows for having just one |
1018 | 0 | * tasklet (instead of one per each IOMMUs) and should be more than |
1019 | 0 | * fine, considering how rare the event of a fault should be. |
1020 | 0 | */ |
1021 | 0 | for_each_drhd_unit ( drhd ) |
1022 | 0 | __do_iommu_page_fault(drhd->iommu); |
1023 | 0 | } |
1024 | | |
1025 | | static void iommu_page_fault(int irq, void *dev_id, |
1026 | | struct cpu_user_regs *regs) |
1027 | 0 | { |
1028 | 0 | /* |
1029 | 0 | * Just flag the tasklet as runnable. This is fine, according to VT-d |
1030 | 0 | * specs since a new interrupt won't be generated until we clear all |
1031 | 0 | * the faults that caused this one to happen. |
1032 | 0 | */ |
1033 | 0 | tasklet_schedule(&vtd_fault_tasklet); |
1034 | 0 | } |
1035 | | |
1036 | | static void dma_msi_unmask(struct irq_desc *desc) |
1037 | 1 | { |
1038 | 1 | struct iommu *iommu = desc->action->dev_id; |
1039 | 1 | unsigned long flags; |
1040 | 1 | u32 sts; |
1041 | 1 | |
1042 | 1 | /* unmask it */ |
1043 | 1 | spin_lock_irqsave(&iommu->register_lock, flags); |
1044 | 1 | sts = dmar_readl(iommu->reg, DMAR_FECTL_REG); |
1045 | 1 | sts &= ~DMA_FECTL_IM; |
1046 | 1 | dmar_writel(iommu->reg, DMAR_FECTL_REG, sts); |
1047 | 1 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
1048 | 1 | iommu->msi.msi_attrib.host_masked = 0; |
1049 | 1 | } |
1050 | | |
1051 | | static void dma_msi_mask(struct irq_desc *desc) |
1052 | 0 | { |
1053 | 0 | unsigned long flags; |
1054 | 0 | struct iommu *iommu = desc->action->dev_id; |
1055 | 0 | u32 sts; |
1056 | 0 |
|
1057 | 0 | /* mask it */ |
1058 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
1059 | 0 | sts = dmar_readl(iommu->reg, DMAR_FECTL_REG); |
1060 | 0 | sts |= DMA_FECTL_IM; |
1061 | 0 | dmar_writel(iommu->reg, DMAR_FECTL_REG, sts); |
1062 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
1063 | 0 | iommu->msi.msi_attrib.host_masked = 1; |
1064 | 0 | } |
1065 | | |
1066 | | static unsigned int dma_msi_startup(struct irq_desc *desc) |
1067 | 1 | { |
1068 | 1 | dma_msi_unmask(desc); |
1069 | 1 | return 0; |
1070 | 1 | } |
1071 | | |
1072 | | static void dma_msi_ack(struct irq_desc *desc) |
1073 | 0 | { |
1074 | 0 | irq_complete_move(desc); |
1075 | 0 | dma_msi_mask(desc); |
1076 | 0 | move_masked_irq(desc); |
1077 | 0 | } |
1078 | | |
1079 | | static void dma_msi_end(struct irq_desc *desc, u8 vector) |
1080 | 0 | { |
1081 | 0 | dma_msi_unmask(desc); |
1082 | 0 | ack_APIC_irq(); |
1083 | 0 | } |
1084 | | |
1085 | | static void dma_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask) |
1086 | 2 | { |
1087 | 2 | struct msi_msg msg; |
1088 | 2 | unsigned int dest; |
1089 | 2 | unsigned long flags; |
1090 | 2 | struct iommu *iommu = desc->action->dev_id; |
1091 | 2 | |
1092 | 2 | dest = set_desc_affinity(desc, mask); |
1093 | 2 | if (dest == BAD_APICID){ |
1094 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "Set iommu interrupt affinity error!\n"); |
1095 | 0 | return; |
1096 | 0 | } |
1097 | 2 | |
1098 | 2 | msi_compose_msg(desc->arch.vector, NULL, &msg); |
1099 | 2 | msg.dest32 = dest; |
1100 | 2 | if (x2apic_enabled) |
1101 | 2 | msg.address_hi = dest & 0xFFFFFF00; |
1102 | 2 | ASSERT(!(msg.address_lo & MSI_ADDR_DEST_ID_MASK)); |
1103 | 2 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
1104 | 2 | iommu->msi.msg = msg; |
1105 | 2 | |
1106 | 2 | spin_lock_irqsave(&iommu->register_lock, flags); |
1107 | 2 | dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg.data); |
1108 | 2 | dmar_writeq(iommu->reg, DMAR_FEADDR_REG, msg.address); |
1109 | 2 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
1110 | 2 | } |
1111 | | |
1112 | | static hw_irq_controller dma_msi_type = { |
1113 | | .typename = "DMA_MSI", |
1114 | | .startup = dma_msi_startup, |
1115 | | .shutdown = dma_msi_mask, |
1116 | | .enable = dma_msi_unmask, |
1117 | | .disable = dma_msi_mask, |
1118 | | .ack = dma_msi_ack, |
1119 | | .end = dma_msi_end, |
1120 | | .set_affinity = dma_msi_set_affinity, |
1121 | | }; |
1122 | | |
1123 | | static int __init iommu_set_interrupt(struct acpi_drhd_unit *drhd) |
1124 | 1 | { |
1125 | 1 | int irq, ret; |
1126 | 1 | struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd); |
1127 | 1 | struct iommu *iommu = drhd->iommu; |
1128 | 1 | struct irq_desc *desc; |
1129 | 1 | |
1130 | 1 | irq = create_irq(rhsa ? pxm_to_node(rhsa->proximity_domain) |
1131 | 0 | : NUMA_NO_NODE); |
1132 | 1 | if ( irq <= 0 ) |
1133 | 0 | { |
1134 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no irq available!\n"); |
1135 | 0 | return -EINVAL; |
1136 | 0 | } |
1137 | 1 | |
1138 | 1 | desc = irq_to_desc(irq); |
1139 | 1 | desc->handler = &dma_msi_type; |
1140 | 1 | ret = request_irq(irq, 0, iommu_page_fault, "dmar", iommu); |
1141 | 1 | if ( ret ) |
1142 | 0 | { |
1143 | 0 | desc->handler = &no_irq_type; |
1144 | 0 | destroy_irq(irq); |
1145 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); |
1146 | 0 | return ret; |
1147 | 0 | } |
1148 | 1 | |
1149 | 1 | iommu->msi.irq = irq; |
1150 | 1 | iommu->msi.msi_attrib.pos = MSI_TYPE_IOMMU; |
1151 | 1 | iommu->msi.msi_attrib.maskbit = 1; |
1152 | 1 | iommu->msi.msi_attrib.is_64 = 1; |
1153 | 1 | desc->msi_desc = &iommu->msi; |
1154 | 1 | |
1155 | 1 | return 0; |
1156 | 1 | } |
1157 | | |
1158 | | int __init iommu_alloc(struct acpi_drhd_unit *drhd) |
1159 | 1 | { |
1160 | 1 | struct iommu *iommu; |
1161 | 1 | unsigned long sagaw, nr_dom; |
1162 | 1 | int agaw; |
1163 | 1 | |
1164 | 1 | if ( nr_iommus > MAX_IOMMUS ) |
1165 | 0 | { |
1166 | 0 | dprintk(XENLOG_ERR VTDPREFIX, |
1167 | 0 | "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus); |
1168 | 0 | return -ENOMEM; |
1169 | 0 | } |
1170 | 1 | |
1171 | 1 | iommu = xzalloc(struct iommu); |
1172 | 1 | if ( iommu == NULL ) |
1173 | 0 | return -ENOMEM; |
1174 | 1 | |
1175 | 1 | iommu->msi.irq = -1; /* No irq assigned yet. */ |
1176 | 1 | INIT_LIST_HEAD(&iommu->ats_devices); |
1177 | 1 | |
1178 | 1 | iommu->intel = alloc_intel_iommu(); |
1179 | 1 | if ( iommu->intel == NULL ) |
1180 | 0 | { |
1181 | 0 | xfree(iommu); |
1182 | 0 | return -ENOMEM; |
1183 | 0 | } |
1184 | 1 | iommu->intel->drhd = drhd; |
1185 | 1 | drhd->iommu = iommu; |
1186 | 1 | |
1187 | 1 | if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) ) |
1188 | 0 | return -ENOMEM; |
1189 | 1 | |
1190 | 1 | iommu->reg = ioremap(drhd->address, PAGE_SIZE); |
1191 | 1 | if ( !iommu->reg ) |
1192 | 0 | return -ENOMEM; |
1193 | 1 | iommu->index = nr_iommus++; |
1194 | 1 | |
1195 | 1 | iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); |
1196 | 1 | iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); |
1197 | 1 | |
1198 | 1 | if ( iommu_verbose ) |
1199 | 1 | { |
1200 | 1 | printk(VTDPREFIX "drhd->address = %"PRIx64" iommu->reg = %p\n", |
1201 | 1 | drhd->address, iommu->reg); |
1202 | 1 | printk(VTDPREFIX "cap = %"PRIx64" ecap = %"PRIx64"\n", |
1203 | 1 | iommu->cap, iommu->ecap); |
1204 | 1 | } |
1205 | 1 | if ( !(iommu->cap + 1) || !(iommu->ecap + 1) ) |
1206 | 0 | return -ENODEV; |
1207 | 1 | |
1208 | 1 | if ( cap_fault_reg_offset(iommu->cap) + |
1209 | 1 | cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE || |
1210 | 1 | ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE ) |
1211 | 0 | { |
1212 | 0 | printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n"); |
1213 | 0 | print_iommu_regs(drhd); |
1214 | 0 | return -ENODEV; |
1215 | 0 | } |
1216 | 1 | |
1217 | 1 | /* Calculate number of pagetable levels: between 2 and 4. */ |
1218 | 1 | sagaw = cap_sagaw(iommu->cap); |
1219 | 1 | for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- ) |
1220 | 1 | if ( test_bit(agaw, &sagaw) ) |
1221 | 1 | break; |
1222 | 1 | if ( agaw < 0 ) |
1223 | 0 | { |
1224 | 0 | printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported sagaw %lx\n", sagaw); |
1225 | 0 | print_iommu_regs(drhd); |
1226 | 0 | return -ENODEV; |
1227 | 0 | } |
1228 | 1 | iommu->nr_pt_levels = agaw_to_level(agaw); |
1229 | 1 | |
1230 | 1 | if ( !ecap_coherent(iommu->ecap) ) |
1231 | 1 | iommus_incoherent = 1; |
1232 | 1 | |
1233 | 1 | /* allocate domain id bitmap */ |
1234 | 1 | nr_dom = cap_ndoms(iommu->cap); |
1235 | 1 | iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom)); |
1236 | 1 | if ( !iommu->domid_bitmap ) |
1237 | 0 | return -ENOMEM ; |
1238 | 1 | |
1239 | 1 | /* |
1240 | 1 | * if Caching mode is set, then invalid translations are tagged with |
1241 | 1 | * domain id 0, Hence reserve bit 0 for it |
1242 | 1 | */ |
1243 | 1 | if ( cap_caching_mode(iommu->cap) ) |
1244 | 0 | set_bit(0, iommu->domid_bitmap); |
1245 | 1 | |
1246 | 1 | iommu->domid_map = xzalloc_array(u16, nr_dom); |
1247 | 1 | if ( !iommu->domid_map ) |
1248 | 0 | return -ENOMEM ; |
1249 | 1 | |
1250 | 1 | spin_lock_init(&iommu->lock); |
1251 | 1 | spin_lock_init(&iommu->register_lock); |
1252 | 1 | |
1253 | 1 | return 0; |
1254 | 1 | } |
1255 | | |
1256 | | void __init iommu_free(struct acpi_drhd_unit *drhd) |
1257 | 0 | { |
1258 | 0 | struct iommu *iommu = drhd->iommu; |
1259 | 0 |
|
1260 | 0 | if ( iommu == NULL ) |
1261 | 0 | return; |
1262 | 0 |
|
1263 | 0 | drhd->iommu = NULL; |
1264 | 0 |
|
1265 | 0 | if ( iommu->root_maddr != 0 ) |
1266 | 0 | { |
1267 | 0 | free_pgtable_maddr(iommu->root_maddr); |
1268 | 0 | iommu->root_maddr = 0; |
1269 | 0 | } |
1270 | 0 |
|
1271 | 0 | if ( iommu->reg ) |
1272 | 0 | iounmap(iommu->reg); |
1273 | 0 |
|
1274 | 0 | xfree(iommu->domid_bitmap); |
1275 | 0 | xfree(iommu->domid_map); |
1276 | 0 |
|
1277 | 0 | free_intel_iommu(iommu->intel); |
1278 | 0 | if ( iommu->msi.irq >= 0 ) |
1279 | 0 | destroy_irq(iommu->msi.irq); |
1280 | 0 | xfree(iommu); |
1281 | 0 | } |
1282 | | |
1283 | | #define guestwidth_to_adjustwidth(gaw) ({ \ |
1284 | | int agaw, r = (gaw - 12) % 9; \ |
1285 | | agaw = (r == 0) ? gaw : (gaw + 9 - r); \ |
1286 | | if ( agaw > 64 ) \ |
1287 | | agaw = 64; \ |
1288 | | agaw; }) |
1289 | | |
1290 | | static int intel_iommu_domain_init(struct domain *d) |
1291 | 1 | { |
1292 | 1 | dom_iommu(d)->arch.agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); |
1293 | 1 | |
1294 | 1 | return 0; |
1295 | 1 | } |
1296 | | |
1297 | | static void __hwdom_init intel_iommu_hwdom_init(struct domain *d) |
1298 | 1 | { |
1299 | 1 | struct acpi_drhd_unit *drhd; |
1300 | 1 | |
1301 | 1 | if ( !iommu_passthrough && !need_iommu(d) ) |
1302 | 0 | { |
1303 | 0 | /* Set up 1:1 page table for hardware domain. */ |
1304 | 0 | vtd_set_hwdom_mapping(d); |
1305 | 0 | } |
1306 | 1 | |
1307 | 1 | setup_hwdom_pci_devices(d, setup_hwdom_device); |
1308 | 1 | setup_hwdom_rmrr(d); |
1309 | 1 | |
1310 | 1 | if ( iommu_flush_all() ) |
1311 | 0 | printk(XENLOG_WARNING VTDPREFIX |
1312 | 0 | " IOMMU flush all failed for hardware domain\n"); |
1313 | 1 | |
1314 | 1 | for_each_drhd_unit ( drhd ) |
1315 | 1 | { |
1316 | 1 | if ( iomem_deny_access(d, PFN_DOWN(drhd->address), |
1317 | 1 | PFN_DOWN(drhd->address)) ) |
1318 | 0 | BUG(); |
1319 | 1 | iommu_enable_translation(drhd); |
1320 | 1 | } |
1321 | 1 | } |
1322 | | |
1323 | | int domain_context_mapping_one( |
1324 | | struct domain *domain, |
1325 | | struct iommu *iommu, |
1326 | | u8 bus, u8 devfn, const struct pci_dev *pdev) |
1327 | 57 | { |
1328 | 57 | struct domain_iommu *hd = dom_iommu(domain); |
1329 | 57 | struct context_entry *context, *context_entries; |
1330 | 57 | u64 maddr, pgd_maddr; |
1331 | 57 | u16 seg = iommu->intel->drhd->segment; |
1332 | 57 | int agaw, rc, ret; |
1333 | 57 | bool_t flush_dev_iotlb; |
1334 | 57 | |
1335 | 57 | ASSERT(pcidevs_locked()); |
1336 | 57 | spin_lock(&iommu->lock); |
1337 | 57 | maddr = bus_to_context_maddr(iommu, bus); |
1338 | 57 | context_entries = (struct context_entry *)map_vtd_domain_page(maddr); |
1339 | 57 | context = &context_entries[devfn]; |
1340 | 57 | |
1341 | 57 | if ( context_present(*context) ) |
1342 | 0 | { |
1343 | 0 | int res = 0; |
1344 | 0 |
|
1345 | 0 | /* Try to get domain ownership from device structure. If that's |
1346 | 0 | * not available, try to read it from the context itself. */ |
1347 | 0 | if ( pdev ) |
1348 | 0 | { |
1349 | 0 | if ( pdev->domain != domain ) |
1350 | 0 | { |
1351 | 0 | printk(XENLOG_G_INFO VTDPREFIX |
1352 | 0 | "d%d: %04x:%02x:%02x.%u owned by d%d!", |
1353 | 0 | domain->domain_id, |
1354 | 0 | seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), |
1355 | 0 | pdev->domain ? pdev->domain->domain_id : -1); |
1356 | 0 | res = -EINVAL; |
1357 | 0 | } |
1358 | 0 | } |
1359 | 0 | else |
1360 | 0 | { |
1361 | 0 | int cdomain; |
1362 | 0 | cdomain = context_get_domain_id(context, iommu); |
1363 | 0 | |
1364 | 0 | if ( cdomain < 0 ) |
1365 | 0 | { |
1366 | 0 | printk(XENLOG_G_WARNING VTDPREFIX |
1367 | 0 | "d%d: %04x:%02x:%02x.%u mapped, but can't find owner!\n", |
1368 | 0 | domain->domain_id, |
1369 | 0 | seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1370 | 0 | res = -EINVAL; |
1371 | 0 | } |
1372 | 0 | else if ( cdomain != domain->domain_id ) |
1373 | 0 | { |
1374 | 0 | printk(XENLOG_G_INFO VTDPREFIX |
1375 | 0 | "d%d: %04x:%02x:%02x.%u already mapped to d%d!", |
1376 | 0 | domain->domain_id, |
1377 | 0 | seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), |
1378 | 0 | cdomain); |
1379 | 0 | res = -EINVAL; |
1380 | 0 | } |
1381 | 0 | } |
1382 | 0 |
|
1383 | 0 | unmap_vtd_domain_page(context_entries); |
1384 | 0 | spin_unlock(&iommu->lock); |
1385 | 0 | return res; |
1386 | 0 | } |
1387 | 57 | |
1388 | 57 | if ( iommu_passthrough && is_hardware_domain(domain) ) |
1389 | 0 | { |
1390 | 0 | context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); |
1391 | 0 | agaw = level_to_agaw(iommu->nr_pt_levels); |
1392 | 0 | } |
1393 | 57 | else |
1394 | 57 | { |
1395 | 57 | spin_lock(&hd->arch.mapping_lock); |
1396 | 57 | |
1397 | 57 | /* Ensure we have pagetables allocated down to leaf PTE. */ |
1398 | 57 | if ( hd->arch.pgd_maddr == 0 ) |
1399 | 1 | { |
1400 | 1 | addr_to_dma_page_maddr(domain, 0, 1); |
1401 | 1 | if ( hd->arch.pgd_maddr == 0 ) |
1402 | 0 | { |
1403 | 0 | nomem: |
1404 | 0 | spin_unlock(&hd->arch.mapping_lock); |
1405 | 0 | spin_unlock(&iommu->lock); |
1406 | 0 | unmap_vtd_domain_page(context_entries); |
1407 | 0 | return -ENOMEM; |
1408 | 0 | } |
1409 | 1 | } |
1410 | 57 | |
1411 | 57 | /* Skip top levels of page tables for 2- and 3-level DRHDs. */ |
1412 | 57 | pgd_maddr = hd->arch.pgd_maddr; |
1413 | 57 | for ( agaw = level_to_agaw(4); |
1414 | 57 | agaw != level_to_agaw(iommu->nr_pt_levels); |
1415 | 0 | agaw-- ) |
1416 | 0 | { |
1417 | 0 | struct dma_pte *p = map_vtd_domain_page(pgd_maddr); |
1418 | 0 | pgd_maddr = dma_pte_addr(*p); |
1419 | 0 | unmap_vtd_domain_page(p); |
1420 | 0 | if ( pgd_maddr == 0 ) |
1421 | 0 | goto nomem; |
1422 | 0 | } |
1423 | 57 | |
1424 | 57 | context_set_address_root(*context, pgd_maddr); |
1425 | 57 | if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) ) |
1426 | 0 | context_set_translation_type(*context, CONTEXT_TT_DEV_IOTLB); |
1427 | 57 | else |
1428 | 57 | context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); |
1429 | 57 | |
1430 | 57 | spin_unlock(&hd->arch.mapping_lock); |
1431 | 57 | } |
1432 | 57 | |
1433 | 57 | if ( context_set_domain_id(context, domain, iommu) ) |
1434 | 0 | { |
1435 | 0 | spin_unlock(&iommu->lock); |
1436 | 0 | unmap_vtd_domain_page(context_entries); |
1437 | 0 | return -EFAULT; |
1438 | 0 | } |
1439 | 57 | |
1440 | 57 | context_set_address_width(*context, agaw); |
1441 | 57 | context_set_fault_enable(*context); |
1442 | 57 | context_set_present(*context); |
1443 | 57 | iommu_flush_cache_entry(context, sizeof(struct context_entry)); |
1444 | 57 | spin_unlock(&iommu->lock); |
1445 | 57 | |
1446 | 57 | /* Context entry was previously non-present (with domid 0). */ |
1447 | 57 | rc = iommu_flush_context_device(iommu, 0, PCI_BDF2(bus, devfn), |
1448 | 57 | DMA_CCMD_MASK_NOBIT, 1); |
1449 | 57 | flush_dev_iotlb = !!find_ats_dev_drhd(iommu); |
1450 | 57 | ret = iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb); |
1451 | 57 | |
1452 | 57 | /* |
1453 | 57 | * The current logic for returns: |
1454 | 57 | * - positive invoke iommu_flush_write_buffer to flush cache. |
1455 | 57 | * - zero on success. |
1456 | 57 | * - negative on failure. Continue to flush IOMMU IOTLB on a |
1457 | 57 | * best effort basis. |
1458 | 57 | */ |
1459 | 57 | if ( rc > 0 || ret > 0 ) |
1460 | 57 | iommu_flush_write_buffer(iommu); |
1461 | 57 | if ( rc >= 0 ) |
1462 | 57 | rc = ret; |
1463 | 57 | if ( rc > 0 ) |
1464 | 57 | rc = 0; |
1465 | 57 | |
1466 | 57 | set_bit(iommu->index, &hd->arch.iommu_bitmap); |
1467 | 57 | |
1468 | 57 | unmap_vtd_domain_page(context_entries); |
1469 | 57 | |
1470 | 57 | if ( !seg && !rc ) |
1471 | 57 | rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); |
1472 | 57 | |
1473 | 57 | return rc; |
1474 | 57 | } |
1475 | | |
1476 | | static int domain_context_mapping(struct domain *domain, u8 devfn, |
1477 | | struct pci_dev *pdev) |
1478 | 68 | { |
1479 | 68 | struct acpi_drhd_unit *drhd; |
1480 | 68 | int ret = 0; |
1481 | 68 | u8 seg = pdev->seg, bus = pdev->bus, secbus; |
1482 | 68 | |
1483 | 68 | drhd = acpi_find_matched_drhd_unit(pdev); |
1484 | 68 | if ( !drhd ) |
1485 | 0 | return -ENODEV; |
1486 | 68 | |
1487 | 68 | ASSERT(pcidevs_locked()); |
1488 | 68 | |
1489 | 68 | switch ( pdev->type ) |
1490 | 68 | { |
1491 | 1 | case DEV_TYPE_PCI_HOST_BRIDGE: |
1492 | 1 | if ( iommu_debug ) |
1493 | 1 | printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u map\n", |
1494 | 1 | domain->domain_id, seg, bus, |
1495 | 1 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1496 | 1 | if ( !is_hardware_domain(domain) ) |
1497 | 0 | return -EPERM; |
1498 | 1 | break; |
1499 | 1 | |
1500 | 10 | case DEV_TYPE_PCIe_BRIDGE: |
1501 | 10 | case DEV_TYPE_PCIe2PCI_BRIDGE: |
1502 | 10 | case DEV_TYPE_LEGACY_PCI_BRIDGE: |
1503 | 10 | break; |
1504 | 10 | |
1505 | 25 | case DEV_TYPE_PCIe_ENDPOINT: |
1506 | 25 | if ( iommu_debug ) |
1507 | 25 | printk(VTDPREFIX "d%d:PCIe: map %04x:%02x:%02x.%u\n", |
1508 | 25 | domain->domain_id, seg, bus, |
1509 | 25 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1510 | 25 | ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, |
1511 | 25 | pdev); |
1512 | 25 | if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) |
1513 | 0 | enable_ats_device(pdev, &drhd->iommu->ats_devices); |
1514 | 25 | |
1515 | 25 | break; |
1516 | 10 | |
1517 | 32 | case DEV_TYPE_PCI: |
1518 | 32 | if ( iommu_debug ) |
1519 | 32 | printk(VTDPREFIX "d%d:PCI: map %04x:%02x:%02x.%u\n", |
1520 | 32 | domain->domain_id, seg, bus, |
1521 | 32 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1522 | 32 | |
1523 | 32 | ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, |
1524 | 32 | pdev); |
1525 | 32 | if ( ret ) |
1526 | 0 | break; |
1527 | 32 | |
1528 | 32 | if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 ) |
1529 | 32 | break; |
1530 | 32 | |
1531 | 0 | ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, |
1532 | 0 | pci_get_pdev(seg, bus, devfn)); |
1533 | 0 |
|
1534 | 0 | /* |
1535 | 0 | * Devices behind PCIe-to-PCI/PCIx bridge may generate different |
1536 | 0 | * requester-id. It may originate from devfn=0 on the secondary bus |
1537 | 0 | * behind the bridge. Map that id as well if we didn't already. |
1538 | 0 | */ |
1539 | 0 | if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE && |
1540 | 0 | (secbus != pdev->bus || pdev->devfn != 0) ) |
1541 | 0 | ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0, |
1542 | 0 | pci_get_pdev(seg, secbus, 0)); |
1543 | 0 |
|
1544 | 0 | break; |
1545 | 32 | |
1546 | 0 | default: |
1547 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n", |
1548 | 0 | domain->domain_id, pdev->type, |
1549 | 0 | seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1550 | 0 | ret = -EINVAL; |
1551 | 0 | break; |
1552 | 68 | } |
1553 | 68 | |
1554 | 68 | if ( !ret && devfn == pdev->devfn ) |
1555 | 68 | pci_vtd_quirk(pdev); |
1556 | 68 | |
1557 | 68 | return ret; |
1558 | 68 | } |
1559 | | |
1560 | | int domain_context_unmap_one( |
1561 | | struct domain *domain, |
1562 | | struct iommu *iommu, |
1563 | | u8 bus, u8 devfn) |
1564 | 0 | { |
1565 | 0 | struct context_entry *context, *context_entries; |
1566 | 0 | u64 maddr; |
1567 | 0 | int iommu_domid, rc, ret; |
1568 | 0 | bool_t flush_dev_iotlb; |
1569 | 0 |
|
1570 | 0 | ASSERT(pcidevs_locked()); |
1571 | 0 | spin_lock(&iommu->lock); |
1572 | 0 |
|
1573 | 0 | maddr = bus_to_context_maddr(iommu, bus); |
1574 | 0 | context_entries = (struct context_entry *)map_vtd_domain_page(maddr); |
1575 | 0 | context = &context_entries[devfn]; |
1576 | 0 |
|
1577 | 0 | if ( !context_present(*context) ) |
1578 | 0 | { |
1579 | 0 | spin_unlock(&iommu->lock); |
1580 | 0 | unmap_vtd_domain_page(context_entries); |
1581 | 0 | return 0; |
1582 | 0 | } |
1583 | 0 |
|
1584 | 0 | context_clear_present(*context); |
1585 | 0 | context_clear_entry(*context); |
1586 | 0 | iommu_flush_cache_entry(context, sizeof(struct context_entry)); |
1587 | 0 |
|
1588 | 0 | iommu_domid= domain_iommu_domid(domain, iommu); |
1589 | 0 | if ( iommu_domid == -1 ) |
1590 | 0 | { |
1591 | 0 | spin_unlock(&iommu->lock); |
1592 | 0 | unmap_vtd_domain_page(context_entries); |
1593 | 0 | return -EINVAL; |
1594 | 0 | } |
1595 | 0 |
|
1596 | 0 | rc = iommu_flush_context_device(iommu, iommu_domid, |
1597 | 0 | PCI_BDF2(bus, devfn), |
1598 | 0 | DMA_CCMD_MASK_NOBIT, 0); |
1599 | 0 |
|
1600 | 0 | flush_dev_iotlb = !!find_ats_dev_drhd(iommu); |
1601 | 0 | ret = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb); |
1602 | 0 |
|
1603 | 0 | /* |
1604 | 0 | * The current logic for returns: |
1605 | 0 | * - positive invoke iommu_flush_write_buffer to flush cache. |
1606 | 0 | * - zero on success. |
1607 | 0 | * - negative on failure. Continue to flush IOMMU IOTLB on a |
1608 | 0 | * best effort basis. |
1609 | 0 | */ |
1610 | 0 | if ( rc > 0 || ret > 0 ) |
1611 | 0 | iommu_flush_write_buffer(iommu); |
1612 | 0 | if ( rc >= 0 ) |
1613 | 0 | rc = ret; |
1614 | 0 | if ( rc > 0 ) |
1615 | 0 | rc = 0; |
1616 | 0 |
|
1617 | 0 | spin_unlock(&iommu->lock); |
1618 | 0 | unmap_vtd_domain_page(context_entries); |
1619 | 0 |
|
1620 | 0 | if ( !iommu->intel->drhd->segment && !rc ) |
1621 | 0 | rc = me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); |
1622 | 0 |
|
1623 | 0 | return rc; |
1624 | 0 | } |
1625 | | |
1626 | | static int domain_context_unmap(struct domain *domain, u8 devfn, |
1627 | | struct pci_dev *pdev) |
1628 | 0 | { |
1629 | 0 | struct acpi_drhd_unit *drhd; |
1630 | 0 | struct iommu *iommu; |
1631 | 0 | int ret = 0; |
1632 | 0 | u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus; |
1633 | 0 | int found = 0; |
1634 | 0 |
|
1635 | 0 | drhd = acpi_find_matched_drhd_unit(pdev); |
1636 | 0 | if ( !drhd ) |
1637 | 0 | return -ENODEV; |
1638 | 0 | iommu = drhd->iommu; |
1639 | 0 |
|
1640 | 0 | switch ( pdev->type ) |
1641 | 0 | { |
1642 | 0 | case DEV_TYPE_PCI_HOST_BRIDGE: |
1643 | 0 | if ( iommu_debug ) |
1644 | 0 | printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u unmap\n", |
1645 | 0 | domain->domain_id, seg, bus, |
1646 | 0 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1647 | 0 | if ( !is_hardware_domain(domain) ) |
1648 | 0 | return -EPERM; |
1649 | 0 | goto out; |
1650 | 0 |
|
1651 | 0 | case DEV_TYPE_PCIe_BRIDGE: |
1652 | 0 | case DEV_TYPE_PCIe2PCI_BRIDGE: |
1653 | 0 | case DEV_TYPE_LEGACY_PCI_BRIDGE: |
1654 | 0 | goto out; |
1655 | 0 |
|
1656 | 0 | case DEV_TYPE_PCIe_ENDPOINT: |
1657 | 0 | if ( iommu_debug ) |
1658 | 0 | printk(VTDPREFIX "d%d:PCIe: unmap %04x:%02x:%02x.%u\n", |
1659 | 0 | domain->domain_id, seg, bus, |
1660 | 0 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1661 | 0 | ret = domain_context_unmap_one(domain, iommu, bus, devfn); |
1662 | 0 | if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) |
1663 | 0 | disable_ats_device(pdev); |
1664 | 0 |
|
1665 | 0 | break; |
1666 | 0 |
|
1667 | 0 | case DEV_TYPE_PCI: |
1668 | 0 | if ( iommu_debug ) |
1669 | 0 | printk(VTDPREFIX "d%d:PCI: unmap %04x:%02x:%02x.%u\n", |
1670 | 0 | domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1671 | 0 | ret = domain_context_unmap_one(domain, iommu, bus, devfn); |
1672 | 0 | if ( ret ) |
1673 | 0 | break; |
1674 | 0 |
|
1675 | 0 | tmp_bus = bus; |
1676 | 0 | tmp_devfn = devfn; |
1677 | 0 | if ( find_upstream_bridge(seg, &tmp_bus, &tmp_devfn, &secbus) < 1 ) |
1678 | 0 | break; |
1679 | 0 |
|
1680 | 0 | /* PCIe to PCI/PCIx bridge */ |
1681 | 0 | if ( pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE ) |
1682 | 0 | { |
1683 | 0 | ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn); |
1684 | 0 | if ( ret ) |
1685 | 0 | return ret; |
1686 | 0 |
|
1687 | 0 | ret = domain_context_unmap_one(domain, iommu, secbus, 0); |
1688 | 0 | } |
1689 | 0 | else /* Legacy PCI bridge */ |
1690 | 0 | ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn); |
1691 | 0 |
|
1692 | 0 | break; |
1693 | 0 |
|
1694 | 0 | default: |
1695 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n", |
1696 | 0 | domain->domain_id, pdev->type, |
1697 | 0 | seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1698 | 0 | ret = -EINVAL; |
1699 | 0 | goto out; |
1700 | 0 | } |
1701 | 0 |
|
1702 | 0 | /* |
1703 | 0 | * if no other devices under the same iommu owned by this domain, |
1704 | 0 | * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp |
1705 | 0 | */ |
1706 | 0 | for_each_pdev ( domain, pdev ) |
1707 | 0 | { |
1708 | 0 | if ( pdev->seg == seg && pdev->bus == bus && pdev->devfn == devfn ) |
1709 | 0 | continue; |
1710 | 0 |
|
1711 | 0 | drhd = acpi_find_matched_drhd_unit(pdev); |
1712 | 0 | if ( drhd && drhd->iommu == iommu ) |
1713 | 0 | { |
1714 | 0 | found = 1; |
1715 | 0 | break; |
1716 | 0 | } |
1717 | 0 | } |
1718 | 0 |
|
1719 | 0 | if ( found == 0 ) |
1720 | 0 | { |
1721 | 0 | int iommu_domid; |
1722 | 0 |
|
1723 | 0 | clear_bit(iommu->index, &dom_iommu(domain)->arch.iommu_bitmap); |
1724 | 0 |
|
1725 | 0 | iommu_domid = domain_iommu_domid(domain, iommu); |
1726 | 0 | if ( iommu_domid == -1 ) |
1727 | 0 | { |
1728 | 0 | ret = -EINVAL; |
1729 | 0 | goto out; |
1730 | 0 | } |
1731 | 0 |
|
1732 | 0 | clear_bit(iommu_domid, iommu->domid_bitmap); |
1733 | 0 | iommu->domid_map[iommu_domid] = 0; |
1734 | 0 | } |
1735 | 0 |
|
1736 | 0 | out: |
1737 | 0 | return ret; |
1738 | 0 | } |
1739 | | |
1740 | | static void iommu_domain_teardown(struct domain *d) |
1741 | 0 | { |
1742 | 0 | struct domain_iommu *hd = dom_iommu(d); |
1743 | 0 | struct mapped_rmrr *mrmrr, *tmp; |
1744 | 0 |
|
1745 | 0 | if ( list_empty(&acpi_drhd_units) ) |
1746 | 0 | return; |
1747 | 0 |
|
1748 | 0 | list_for_each_entry_safe ( mrmrr, tmp, &hd->arch.mapped_rmrrs, list ) |
1749 | 0 | { |
1750 | 0 | list_del(&mrmrr->list); |
1751 | 0 | xfree(mrmrr); |
1752 | 0 | } |
1753 | 0 |
|
1754 | 0 | if ( iommu_use_hap_pt(d) ) |
1755 | 0 | return; |
1756 | 0 |
|
1757 | 0 | spin_lock(&hd->arch.mapping_lock); |
1758 | 0 | iommu_free_pagetable(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw)); |
1759 | 0 | hd->arch.pgd_maddr = 0; |
1760 | 0 | spin_unlock(&hd->arch.mapping_lock); |
1761 | 0 | } |
1762 | | |
1763 | | static int __must_check intel_iommu_map_page(struct domain *d, |
1764 | | unsigned long gfn, |
1765 | | unsigned long mfn, |
1766 | | unsigned int flags) |
1767 | 4.34M | { |
1768 | 4.34M | struct domain_iommu *hd = dom_iommu(d); |
1769 | 4.34M | struct dma_pte *page = NULL, *pte = NULL, old, new = { 0 }; |
1770 | 4.34M | u64 pg_maddr; |
1771 | 4.34M | int rc = 0; |
1772 | 4.34M | |
1773 | 4.34M | /* Do nothing if VT-d shares EPT page table */ |
1774 | 4.34M | if ( iommu_use_hap_pt(d) ) |
1775 | 0 | return 0; |
1776 | 4.34M | |
1777 | 4.34M | /* Do nothing if hardware domain and iommu supports pass thru. */ |
1778 | 4.34M | if ( iommu_passthrough && is_hardware_domain(d) ) |
1779 | 0 | return 0; |
1780 | 4.34M | |
1781 | 4.34M | spin_lock(&hd->arch.mapping_lock); |
1782 | 4.34M | |
1783 | 4.34M | pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1); |
1784 | 4.34M | if ( pg_maddr == 0 ) |
1785 | 0 | { |
1786 | 0 | spin_unlock(&hd->arch.mapping_lock); |
1787 | 0 | return -ENOMEM; |
1788 | 0 | } |
1789 | 4.34M | page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); |
1790 | 4.34M | pte = page + (gfn & LEVEL_MASK); |
1791 | 4.34M | old = *pte; |
1792 | 4.34M | dma_set_pte_addr(new, (paddr_t)mfn << PAGE_SHIFT_4K); |
1793 | 4.34M | dma_set_pte_prot(new, |
1794 | 4.34M | ((flags & IOMMUF_readable) ? DMA_PTE_READ : 0) | |
1795 | 4.34M | ((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0)); |
1796 | 4.34M | |
1797 | 4.34M | /* Set the SNP on leaf page table if Snoop Control available */ |
1798 | 4.34M | if ( iommu_snoop ) |
1799 | 4.34M | dma_set_pte_snp(new); |
1800 | 4.34M | |
1801 | 4.34M | if ( old.val == new.val ) |
1802 | 0 | { |
1803 | 0 | spin_unlock(&hd->arch.mapping_lock); |
1804 | 0 | unmap_vtd_domain_page(page); |
1805 | 0 | return 0; |
1806 | 0 | } |
1807 | 4.34M | *pte = new; |
1808 | 4.34M | |
1809 | 4.34M | iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); |
1810 | 4.34M | spin_unlock(&hd->arch.mapping_lock); |
1811 | 4.34M | unmap_vtd_domain_page(page); |
1812 | 4.34M | |
1813 | 4.34M | if ( !this_cpu(iommu_dont_flush_iotlb) ) |
1814 | 4.34M | rc = iommu_flush_iotlb(d, gfn, dma_pte_present(old), 1); |
1815 | 4.34M | |
1816 | 4.34M | return rc; |
1817 | 4.34M | } |
1818 | | |
1819 | | static int __must_check intel_iommu_unmap_page(struct domain *d, |
1820 | | unsigned long gfn) |
1821 | 218k | { |
1822 | 218k | /* Do nothing if hardware domain and iommu supports pass thru. */ |
1823 | 218k | if ( iommu_passthrough && is_hardware_domain(d) ) |
1824 | 0 | return 0; |
1825 | 218k | |
1826 | 218k | return dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K); |
1827 | 218k | } |
1828 | | |
1829 | | int iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, |
1830 | | int order, int present) |
1831 | 0 | { |
1832 | 0 | struct acpi_drhd_unit *drhd; |
1833 | 0 | struct iommu *iommu = NULL; |
1834 | 0 | struct domain_iommu *hd = dom_iommu(d); |
1835 | 0 | bool_t flush_dev_iotlb; |
1836 | 0 | int iommu_domid; |
1837 | 0 | int rc = 0; |
1838 | 0 |
|
1839 | 0 | iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); |
1840 | 0 |
|
1841 | 0 | for_each_drhd_unit ( drhd ) |
1842 | 0 | { |
1843 | 0 | iommu = drhd->iommu; |
1844 | 0 | if ( !test_bit(iommu->index, &hd->arch.iommu_bitmap) ) |
1845 | 0 | continue; |
1846 | 0 |
|
1847 | 0 | flush_dev_iotlb = !!find_ats_dev_drhd(iommu); |
1848 | 0 | iommu_domid= domain_iommu_domid(d, iommu); |
1849 | 0 | if ( iommu_domid == -1 ) |
1850 | 0 | continue; |
1851 | 0 |
|
1852 | 0 | rc = iommu_flush_iotlb_psi(iommu, iommu_domid, |
1853 | 0 | (paddr_t)gfn << PAGE_SHIFT_4K, |
1854 | 0 | order, !present, flush_dev_iotlb); |
1855 | 0 | if ( rc > 0 ) |
1856 | 0 | { |
1857 | 0 | iommu_flush_write_buffer(iommu); |
1858 | 0 | rc = 0; |
1859 | 0 | } |
1860 | 0 | } |
1861 | 0 |
|
1862 | 0 | if ( unlikely(rc) ) |
1863 | 0 | { |
1864 | 0 | if ( !d->is_shutting_down && printk_ratelimit() ) |
1865 | 0 | printk(XENLOG_ERR VTDPREFIX |
1866 | 0 | " d%d: IOMMU pages flush failed: %d\n", |
1867 | 0 | d->domain_id, rc); |
1868 | 0 |
|
1869 | 0 | if ( !is_hardware_domain(d) ) |
1870 | 0 | domain_crash(d); |
1871 | 0 | } |
1872 | 0 |
|
1873 | 0 | return rc; |
1874 | 0 | } |
1875 | | |
1876 | | static int __init vtd_ept_page_compatible(struct iommu *iommu) |
1877 | 1 | { |
1878 | 1 | u64 ept_cap, vtd_cap = iommu->cap; |
1879 | 1 | |
1880 | 1 | /* EPT is not initialised yet, so we must check the capability in |
1881 | 1 | * the MSR explicitly rather than use cpu_has_vmx_ept_*() */ |
1882 | 1 | if ( rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, ept_cap) != 0 ) |
1883 | 0 | return 0; |
1884 | 1 | |
1885 | 1 | return (ept_has_2mb(ept_cap) && opt_hap_2mb) == cap_sps_2mb(vtd_cap) && |
1886 | 1 | (ept_has_1gb(ept_cap) && opt_hap_1gb) == cap_sps_1gb(vtd_cap); |
1887 | 1 | } |
1888 | | |
1889 | | /* |
1890 | | * set VT-d page table directory to EPT table if allowed |
1891 | | */ |
1892 | | static void iommu_set_pgd(struct domain *d) |
1893 | 0 | { |
1894 | 0 | mfn_t pgd_mfn; |
1895 | 0 |
|
1896 | 0 | pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))); |
1897 | 0 | dom_iommu(d)->arch.pgd_maddr = |
1898 | 0 | pagetable_get_paddr(pagetable_from_mfn(pgd_mfn)); |
1899 | 0 | } |
1900 | | |
1901 | | static int rmrr_identity_mapping(struct domain *d, bool_t map, |
1902 | | const struct acpi_rmrr_unit *rmrr, |
1903 | | u32 flag) |
1904 | 2 | { |
1905 | 2 | unsigned long base_pfn = rmrr->base_address >> PAGE_SHIFT_4K; |
1906 | 2 | unsigned long end_pfn = PAGE_ALIGN_4K(rmrr->end_address) >> PAGE_SHIFT_4K; |
1907 | 2 | struct mapped_rmrr *mrmrr; |
1908 | 2 | struct domain_iommu *hd = dom_iommu(d); |
1909 | 2 | |
1910 | 2 | ASSERT(pcidevs_locked()); |
1911 | 2 | ASSERT(rmrr->base_address < rmrr->end_address); |
1912 | 2 | |
1913 | 2 | /* |
1914 | 2 | * No need to acquire hd->arch.mapping_lock: Both insertion and removal |
1915 | 2 | * get done while holding pcidevs_lock. |
1916 | 2 | */ |
1917 | 2 | list_for_each_entry( mrmrr, &hd->arch.mapped_rmrrs, list ) |
1918 | 1 | { |
1919 | 1 | if ( mrmrr->base == rmrr->base_address && |
1920 | 1 | mrmrr->end == rmrr->end_address ) |
1921 | 1 | { |
1922 | 1 | int ret = 0; |
1923 | 1 | |
1924 | 1 | if ( map ) |
1925 | 1 | { |
1926 | 1 | ++mrmrr->count; |
1927 | 1 | return 0; |
1928 | 1 | } |
1929 | 1 | |
1930 | 0 | if ( --mrmrr->count ) |
1931 | 0 | return 0; |
1932 | 0 |
|
1933 | 0 | while ( base_pfn < end_pfn ) |
1934 | 0 | { |
1935 | 0 | if ( clear_identity_p2m_entry(d, base_pfn) ) |
1936 | 0 | ret = -ENXIO; |
1937 | 0 | base_pfn++; |
1938 | 0 | } |
1939 | 0 |
|
1940 | 0 | list_del(&mrmrr->list); |
1941 | 0 | xfree(mrmrr); |
1942 | 0 | return ret; |
1943 | 0 | } |
1944 | 1 | } |
1945 | 2 | |
1946 | 1 | if ( !map ) |
1947 | 0 | return -ENOENT; |
1948 | 1 | |
1949 | 40 | while ( base_pfn < end_pfn ) |
1950 | 39 | { |
1951 | 39 | int err = set_identity_p2m_entry(d, base_pfn, p2m_access_rw, flag); |
1952 | 39 | |
1953 | 39 | if ( err ) |
1954 | 0 | return err; |
1955 | 39 | base_pfn++; |
1956 | 39 | } |
1957 | 1 | |
1958 | 1 | mrmrr = xmalloc(struct mapped_rmrr); |
1959 | 1 | if ( !mrmrr ) |
1960 | 0 | return -ENOMEM; |
1961 | 1 | mrmrr->base = rmrr->base_address; |
1962 | 1 | mrmrr->end = rmrr->end_address; |
1963 | 1 | mrmrr->count = 1; |
1964 | 1 | list_add_tail(&mrmrr->list, &hd->arch.mapped_rmrrs); |
1965 | 1 | |
1966 | 1 | return 0; |
1967 | 1 | } |
1968 | | |
1969 | | static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev) |
1970 | 0 | { |
1971 | 0 | struct acpi_rmrr_unit *rmrr; |
1972 | 0 | u16 bdf; |
1973 | 0 | int ret, i; |
1974 | 0 |
|
1975 | 0 | ASSERT(pcidevs_locked()); |
1976 | 0 |
|
1977 | 0 | if ( !pdev->domain ) |
1978 | 0 | return -EINVAL; |
1979 | 0 |
|
1980 | 0 | ret = domain_context_mapping(pdev->domain, devfn, pdev); |
1981 | 0 | if ( ret ) |
1982 | 0 | { |
1983 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n", |
1984 | 0 | pdev->domain->domain_id); |
1985 | 0 | return ret; |
1986 | 0 | } |
1987 | 0 |
|
1988 | 0 | for_each_rmrr_device ( rmrr, bdf, i ) |
1989 | 0 | { |
1990 | 0 | if ( rmrr->segment == pdev->seg && |
1991 | 0 | PCI_BUS(bdf) == pdev->bus && |
1992 | 0 | PCI_DEVFN2(bdf) == devfn ) |
1993 | 0 | { |
1994 | 0 | /* |
1995 | 0 | * iommu_add_device() is only called for the hardware |
1996 | 0 | * domain (see xen/drivers/passthrough/pci.c:pci_add_device()). |
1997 | 0 | * Since RMRRs are always reserved in the e820 map for the hardware |
1998 | 0 | * domain, there shouldn't be a conflict. |
1999 | 0 | */ |
2000 | 0 | ret = rmrr_identity_mapping(pdev->domain, 1, rmrr, 0); |
2001 | 0 | if ( ret ) |
2002 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "d%d: RMRR mapping failed\n", |
2003 | 0 | pdev->domain->domain_id); |
2004 | 0 | } |
2005 | 0 | } |
2006 | 0 |
|
2007 | 0 | return 0; |
2008 | 0 | } |
2009 | | |
2010 | | static int intel_iommu_enable_device(struct pci_dev *pdev) |
2011 | 0 | { |
2012 | 0 | struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); |
2013 | 0 | int ret = drhd ? ats_device(pdev, drhd) : -ENODEV; |
2014 | 0 |
|
2015 | 0 | pci_vtd_quirk(pdev); |
2016 | 0 |
|
2017 | 0 | if ( ret <= 0 ) |
2018 | 0 | return ret; |
2019 | 0 |
|
2020 | 0 | ret = enable_ats_device(pdev, &drhd->iommu->ats_devices); |
2021 | 0 |
|
2022 | 0 | return ret >= 0 ? 0 : ret; |
2023 | 0 | } |
2024 | | |
2025 | | static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev) |
2026 | 0 | { |
2027 | 0 | struct acpi_rmrr_unit *rmrr; |
2028 | 0 | u16 bdf; |
2029 | 0 | int i; |
2030 | 0 |
|
2031 | 0 | if ( !pdev->domain ) |
2032 | 0 | return -EINVAL; |
2033 | 0 |
|
2034 | 0 | for_each_rmrr_device ( rmrr, bdf, i ) |
2035 | 0 | { |
2036 | 0 | if ( rmrr->segment != pdev->seg || |
2037 | 0 | PCI_BUS(bdf) != pdev->bus || |
2038 | 0 | PCI_DEVFN2(bdf) != devfn ) |
2039 | 0 | continue; |
2040 | 0 |
|
2041 | 0 | /* |
2042 | 0 | * Any flag is nothing to clear these mappings but here |
2043 | 0 | * its always safe and strict to set 0. |
2044 | 0 | */ |
2045 | 0 | rmrr_identity_mapping(pdev->domain, 0, rmrr, 0); |
2046 | 0 | } |
2047 | 0 |
|
2048 | 0 | return domain_context_unmap(pdev->domain, devfn, pdev); |
2049 | 0 | } |
2050 | | |
2051 | | static int __hwdom_init setup_hwdom_device(u8 devfn, struct pci_dev *pdev) |
2052 | 68 | { |
2053 | 68 | return domain_context_mapping(pdev->domain, devfn, pdev); |
2054 | 68 | } |
2055 | | |
2056 | | void clear_fault_bits(struct iommu *iommu) |
2057 | 2 | { |
2058 | 2 | u64 val; |
2059 | 2 | unsigned long flags; |
2060 | 2 | |
2061 | 2 | spin_lock_irqsave(&iommu->register_lock, flags); |
2062 | 2 | val = dmar_readq(iommu->reg, cap_fault_reg_offset(iommu->cap) + 8); |
2063 | 2 | dmar_writeq(iommu->reg, cap_fault_reg_offset(iommu->cap) + 8, val); |
2064 | 2 | dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS); |
2065 | 2 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
2066 | 2 | } |
2067 | | |
2068 | | static void adjust_irq_affinity(struct acpi_drhd_unit *drhd) |
2069 | 2 | { |
2070 | 2 | const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd); |
2071 | 2 | unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) |
2072 | 0 | : NUMA_NO_NODE; |
2073 | 2 | const cpumask_t *cpumask = &cpu_online_map; |
2074 | 2 | |
2075 | 2 | if ( node < MAX_NUMNODES && node_online(node) && |
2076 | 0 | cpumask_intersects(&node_to_cpumask(node), cpumask) ) |
2077 | 0 | cpumask = &node_to_cpumask(node); |
2078 | 2 | dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); |
2079 | 2 | } |
2080 | | |
2081 | | int adjust_vtd_irq_affinities(void) |
2082 | 1 | { |
2083 | 1 | struct acpi_drhd_unit *drhd; |
2084 | 1 | |
2085 | 1 | if ( !iommu_enabled ) |
2086 | 0 | return 0; |
2087 | 1 | |
2088 | 1 | for_each_drhd_unit ( drhd ) |
2089 | 1 | adjust_irq_affinity(drhd); |
2090 | 1 | |
2091 | 1 | return 0; |
2092 | 1 | } |
2093 | | __initcall(adjust_vtd_irq_affinities); |
2094 | | |
2095 | | static int __must_check init_vtd_hw(void) |
2096 | 1 | { |
2097 | 1 | struct acpi_drhd_unit *drhd; |
2098 | 1 | struct iommu *iommu; |
2099 | 1 | struct iommu_flush *flush = NULL; |
2100 | 1 | int ret; |
2101 | 1 | unsigned long flags; |
2102 | 1 | u32 sts; |
2103 | 1 | |
2104 | 1 | /* |
2105 | 1 | * Basic VT-d HW init: set VT-d interrupt, clear VT-d faults. |
2106 | 1 | */ |
2107 | 1 | for_each_drhd_unit ( drhd ) |
2108 | 1 | { |
2109 | 1 | adjust_irq_affinity(drhd); |
2110 | 1 | |
2111 | 1 | iommu = drhd->iommu; |
2112 | 1 | |
2113 | 1 | clear_fault_bits(iommu); |
2114 | 1 | |
2115 | 1 | spin_lock_irqsave(&iommu->register_lock, flags); |
2116 | 1 | sts = dmar_readl(iommu->reg, DMAR_FECTL_REG); |
2117 | 1 | sts &= ~DMA_FECTL_IM; |
2118 | 1 | dmar_writel(iommu->reg, DMAR_FECTL_REG, sts); |
2119 | 1 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
2120 | 1 | } |
2121 | 1 | |
2122 | 1 | /* |
2123 | 1 | * Enable queue invalidation |
2124 | 1 | */ |
2125 | 1 | for_each_drhd_unit ( drhd ) |
2126 | 1 | { |
2127 | 1 | iommu = drhd->iommu; |
2128 | 1 | /* |
2129 | 1 | * If queued invalidation not enabled, use regiser based |
2130 | 1 | * invalidation |
2131 | 1 | */ |
2132 | 1 | if ( enable_qinval(iommu) != 0 ) |
2133 | 0 | { |
2134 | 0 | flush = iommu_get_flush(iommu); |
2135 | 0 | flush->context = flush_context_reg; |
2136 | 0 | flush->iotlb = flush_iotlb_reg; |
2137 | 0 | } |
2138 | 1 | } |
2139 | 1 | |
2140 | 1 | /* |
2141 | 1 | * Enable interrupt remapping |
2142 | 1 | */ |
2143 | 1 | if ( iommu_intremap ) |
2144 | 1 | { |
2145 | 1 | int apic; |
2146 | 3 | for ( apic = 0; apic < nr_ioapics; apic++ ) |
2147 | 2 | { |
2148 | 2 | if ( ioapic_to_iommu(IO_APIC_ID(apic)) == NULL ) |
2149 | 0 | { |
2150 | 0 | iommu_intremap = 0; |
2151 | 0 | dprintk(XENLOG_ERR VTDPREFIX, |
2152 | 0 | "ioapic_to_iommu: ioapic %#x (id: %#x) is NULL! " |
2153 | 0 | "Will not try to enable Interrupt Remapping.\n", |
2154 | 0 | apic, IO_APIC_ID(apic)); |
2155 | 0 | break; |
2156 | 0 | } |
2157 | 2 | } |
2158 | 1 | } |
2159 | 1 | if ( iommu_intremap ) |
2160 | 1 | { |
2161 | 1 | for_each_drhd_unit ( drhd ) |
2162 | 1 | { |
2163 | 1 | iommu = drhd->iommu; |
2164 | 1 | if ( enable_intremap(iommu, 0) != 0 ) |
2165 | 0 | { |
2166 | 0 | iommu_intremap = 0; |
2167 | 0 | dprintk(XENLOG_WARNING VTDPREFIX, |
2168 | 0 | "Interrupt Remapping not enabled\n"); |
2169 | 0 |
|
2170 | 0 | break; |
2171 | 0 | } |
2172 | 1 | } |
2173 | 1 | if ( !iommu_intremap ) |
2174 | 0 | for_each_drhd_unit ( drhd ) |
2175 | 0 | disable_intremap(drhd->iommu); |
2176 | 1 | } |
2177 | 1 | |
2178 | 1 | /* |
2179 | 1 | * Set root entries for each VT-d engine. After set root entry, |
2180 | 1 | * must globally invalidate context cache, and then globally |
2181 | 1 | * invalidate IOTLB |
2182 | 1 | */ |
2183 | 1 | for_each_drhd_unit ( drhd ) |
2184 | 1 | { |
2185 | 1 | iommu = drhd->iommu; |
2186 | 1 | ret = iommu_set_root_entry(iommu); |
2187 | 1 | if ( ret ) |
2188 | 0 | { |
2189 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n"); |
2190 | 0 | return -EIO; |
2191 | 0 | } |
2192 | 1 | } |
2193 | 1 | |
2194 | 1 | return iommu_flush_all(); |
2195 | 1 | } |
2196 | | |
2197 | | static void __hwdom_init setup_hwdom_rmrr(struct domain *d) |
2198 | 1 | { |
2199 | 1 | struct acpi_rmrr_unit *rmrr; |
2200 | 1 | u16 bdf; |
2201 | 1 | int ret, i; |
2202 | 1 | |
2203 | 1 | pcidevs_lock(); |
2204 | 1 | for_each_rmrr_device ( rmrr, bdf, i ) |
2205 | 2 | { |
2206 | 2 | /* |
2207 | 2 | * Here means we're add a device to the hardware domain. |
2208 | 2 | * Since RMRRs are always reserved in the e820 map for the hardware |
2209 | 2 | * domain, there shouldn't be a conflict. So its always safe and |
2210 | 2 | * strict to set 0. |
2211 | 2 | */ |
2212 | 2 | ret = rmrr_identity_mapping(d, 1, rmrr, 0); |
2213 | 2 | if ( ret ) |
2214 | 0 | dprintk(XENLOG_ERR VTDPREFIX, |
2215 | 2 | "IOMMU: mapping reserved region failed\n"); |
2216 | 2 | } |
2217 | 1 | pcidevs_unlock(); |
2218 | 1 | } |
2219 | | |
2220 | | int __init intel_vtd_setup(void) |
2221 | 1 | { |
2222 | 1 | struct acpi_drhd_unit *drhd; |
2223 | 1 | struct iommu *iommu; |
2224 | 1 | int ret; |
2225 | 1 | |
2226 | 1 | if ( list_empty(&acpi_drhd_units) ) |
2227 | 0 | { |
2228 | 0 | ret = -ENODEV; |
2229 | 0 | goto error; |
2230 | 0 | } |
2231 | 1 | |
2232 | 1 | if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) ) |
2233 | 0 | { |
2234 | 0 | ret = -EPERM; |
2235 | 0 | goto error; |
2236 | 0 | } |
2237 | 1 | |
2238 | 1 | platform_quirks_init(); |
2239 | 1 | if ( !iommu_enable ) |
2240 | 0 | { |
2241 | 0 | ret = -ENODEV; |
2242 | 0 | goto error; |
2243 | 0 | } |
2244 | 1 | |
2245 | 1 | /* We enable the following features only if they are supported by all VT-d |
2246 | 1 | * engines: Snoop Control, DMA passthrough, Queued Invalidation, Interrupt |
2247 | 1 | * Remapping, and Posted Interrupt |
2248 | 1 | */ |
2249 | 1 | for_each_drhd_unit ( drhd ) |
2250 | 1 | { |
2251 | 1 | iommu = drhd->iommu; |
2252 | 1 | |
2253 | 1 | printk("Intel VT-d iommu %"PRIu32" supported page sizes: 4kB", |
2254 | 1 | iommu->index); |
2255 | 1 | if (cap_sps_2mb(iommu->cap)) |
2256 | 1 | printk(", 2MB"); |
2257 | 1 | |
2258 | 1 | if (cap_sps_1gb(iommu->cap)) |
2259 | 1 | printk(", 1GB"); |
2260 | 1 | |
2261 | 1 | printk(".\n"); |
2262 | 1 | |
2263 | 1 | if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) ) |
2264 | 0 | iommu_snoop = 0; |
2265 | 1 | |
2266 | 1 | if ( iommu_passthrough && !ecap_pass_thru(iommu->ecap) ) |
2267 | 0 | iommu_passthrough = 0; |
2268 | 1 | |
2269 | 1 | if ( iommu_qinval && !ecap_queued_inval(iommu->ecap) ) |
2270 | 0 | iommu_qinval = 0; |
2271 | 1 | |
2272 | 1 | if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) ) |
2273 | 0 | iommu_intremap = 0; |
2274 | 1 | |
2275 | 1 | /* |
2276 | 1 | * We cannot use posted interrupt if X86_FEATURE_CX16 is |
2277 | 1 | * not supported, since we count on this feature to |
2278 | 1 | * atomically update 16-byte IRTE in posted format. |
2279 | 1 | */ |
2280 | 1 | if ( !cap_intr_post(iommu->cap) || !cpu_has_cx16 ) |
2281 | 1 | iommu_intpost = 0; |
2282 | 1 | |
2283 | 1 | if ( !vtd_ept_page_compatible(iommu) ) |
2284 | 0 | iommu_hap_pt_share = 0; |
2285 | 1 | |
2286 | 1 | ret = iommu_set_interrupt(drhd); |
2287 | 1 | if ( ret ) |
2288 | 0 | { |
2289 | 0 | dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n"); |
2290 | 0 | goto error; |
2291 | 0 | } |
2292 | 1 | } |
2293 | 1 | |
2294 | 1 | softirq_tasklet_init(&vtd_fault_tasklet, do_iommu_page_fault, 0); |
2295 | 1 | |
2296 | 1 | if ( !iommu_qinval && iommu_intremap ) |
2297 | 0 | { |
2298 | 0 | iommu_intremap = 0; |
2299 | 0 | dprintk(XENLOG_WARNING VTDPREFIX, "Interrupt Remapping disabled " |
2300 | 0 | "since Queued Invalidation isn't supported or enabled.\n"); |
2301 | 0 | } |
2302 | 1 | |
2303 | 6 | #define P(p,s) printk("Intel VT-d %s %senabled.\n", s, (p)? "" : "not ") |
2304 | 1 | P(iommu_snoop, "Snoop Control"); |
2305 | 1 | P(iommu_passthrough, "Dom0 DMA Passthrough"); |
2306 | 1 | P(iommu_qinval, "Queued Invalidation"); |
2307 | 1 | P(iommu_intremap, "Interrupt Remapping"); |
2308 | 1 | P(iommu_intpost, "Posted Interrupt"); |
2309 | 1 | P(iommu_hap_pt_share, "Shared EPT tables"); |
2310 | 1 | #undef P |
2311 | 1 | |
2312 | 1 | ret = scan_pci_devices(); |
2313 | 1 | if ( ret ) |
2314 | 0 | goto error; |
2315 | 1 | |
2316 | 1 | ret = init_vtd_hw(); |
2317 | 1 | if ( ret ) |
2318 | 0 | goto error; |
2319 | 1 | |
2320 | 1 | register_keyhandler('V', vtd_dump_iommu_info, "dump iommu info", 1); |
2321 | 1 | |
2322 | 1 | return 0; |
2323 | 1 | |
2324 | 0 | error: |
2325 | 0 | iommu_enabled = 0; |
2326 | 0 | iommu_snoop = 0; |
2327 | 0 | iommu_passthrough = 0; |
2328 | 0 | iommu_qinval = 0; |
2329 | 0 | iommu_intremap = 0; |
2330 | 0 | iommu_intpost = 0; |
2331 | 0 | return ret; |
2332 | 1 | } |
2333 | | |
2334 | | static int reassign_device_ownership( |
2335 | | struct domain *source, |
2336 | | struct domain *target, |
2337 | | u8 devfn, struct pci_dev *pdev) |
2338 | 0 | { |
2339 | 0 | int ret; |
2340 | 0 |
|
2341 | 0 | /* |
2342 | 0 | * Devices assigned to untrusted domains (here assumed to be any domU) |
2343 | 0 | * can attempt to send arbitrary LAPIC/MSI messages. We are unprotected |
2344 | 0 | * by the root complex unless interrupt remapping is enabled. |
2345 | 0 | */ |
2346 | 0 | if ( (target != hardware_domain) && !iommu_intremap ) |
2347 | 0 | untrusted_msi = true; |
2348 | 0 |
|
2349 | 0 | /* |
2350 | 0 | * If the device belongs to the hardware domain, and it has RMRR, don't |
2351 | 0 | * remove it from the hardware domain, because BIOS may use RMRR at |
2352 | 0 | * booting time. |
2353 | 0 | */ |
2354 | 0 | if ( !is_hardware_domain(source) ) |
2355 | 0 | { |
2356 | 0 | const struct acpi_rmrr_unit *rmrr; |
2357 | 0 | u16 bdf; |
2358 | 0 | unsigned int i; |
2359 | 0 |
|
2360 | 0 | for_each_rmrr_device( rmrr, bdf, i ) |
2361 | 0 | if ( rmrr->segment == pdev->seg && |
2362 | 0 | PCI_BUS(bdf) == pdev->bus && |
2363 | 0 | PCI_DEVFN2(bdf) == devfn ) |
2364 | 0 | { |
2365 | 0 | /* |
2366 | 0 | * Any RMRR flag is always ignored when remove a device, |
2367 | 0 | * but its always safe and strict to set 0. |
2368 | 0 | */ |
2369 | 0 | ret = rmrr_identity_mapping(source, 0, rmrr, 0); |
2370 | 0 | if ( ret != -ENOENT ) |
2371 | 0 | return ret; |
2372 | 0 | } |
2373 | 0 | } |
2374 | 0 |
|
2375 | 0 | ret = domain_context_unmap(source, devfn, pdev); |
2376 | 0 | if ( ret ) |
2377 | 0 | return ret; |
2378 | 0 |
|
2379 | 0 | if ( !has_arch_pdevs(target) ) |
2380 | 0 | vmx_pi_hooks_assign(target); |
2381 | 0 |
|
2382 | 0 | ret = domain_context_mapping(target, devfn, pdev); |
2383 | 0 | if ( ret ) |
2384 | 0 | { |
2385 | 0 | if ( !has_arch_pdevs(target) ) |
2386 | 0 | vmx_pi_hooks_deassign(target); |
2387 | 0 |
|
2388 | 0 | return ret; |
2389 | 0 | } |
2390 | 0 |
|
2391 | 0 | if ( devfn == pdev->devfn ) |
2392 | 0 | { |
2393 | 0 | list_move(&pdev->domain_list, &target->arch.pdev_list); |
2394 | 0 | pdev->domain = target; |
2395 | 0 | } |
2396 | 0 |
|
2397 | 0 | if ( !has_arch_pdevs(source) ) |
2398 | 0 | vmx_pi_hooks_deassign(source); |
2399 | 0 |
|
2400 | 0 | return ret; |
2401 | 0 | } |
2402 | | |
2403 | | static int intel_iommu_assign_device( |
2404 | | struct domain *d, u8 devfn, struct pci_dev *pdev, u32 flag) |
2405 | 0 | { |
2406 | 0 | struct acpi_rmrr_unit *rmrr; |
2407 | 0 | int ret = 0, i; |
2408 | 0 | u16 bdf, seg; |
2409 | 0 | u8 bus; |
2410 | 0 |
|
2411 | 0 | if ( list_empty(&acpi_drhd_units) ) |
2412 | 0 | return -ENODEV; |
2413 | 0 |
|
2414 | 0 | seg = pdev->seg; |
2415 | 0 | bus = pdev->bus; |
2416 | 0 | /* |
2417 | 0 | * In rare cases one given rmrr is shared by multiple devices but |
2418 | 0 | * obviously this would put the security of a system at risk. So |
2419 | 0 | * we would prevent from this sort of device assignment. But this |
2420 | 0 | * can be permitted if user set |
2421 | 0 | * "pci = [ 'sbdf, rdm_policy=relaxed' ]" |
2422 | 0 | * |
2423 | 0 | * TODO: in the future we can introduce group device assignment |
2424 | 0 | * interface to make sure devices sharing RMRR are assigned to the |
2425 | 0 | * same domain together. |
2426 | 0 | */ |
2427 | 0 | for_each_rmrr_device( rmrr, bdf, i ) |
2428 | 0 | { |
2429 | 0 | if ( rmrr->segment == seg && |
2430 | 0 | PCI_BUS(bdf) == bus && |
2431 | 0 | PCI_DEVFN2(bdf) == devfn && |
2432 | 0 | rmrr->scope.devices_cnt > 1 ) |
2433 | 0 | { |
2434 | 0 | bool_t relaxed = !!(flag & XEN_DOMCTL_DEV_RDM_RELAXED); |
2435 | 0 |
|
2436 | 0 | printk(XENLOG_GUEST "%s" VTDPREFIX |
2437 | 0 | " It's %s to assign %04x:%02x:%02x.%u" |
2438 | 0 | " with shared RMRR at %"PRIx64" for Dom%d.\n", |
2439 | 0 | relaxed ? XENLOG_WARNING : XENLOG_ERR, |
2440 | 0 | relaxed ? "risky" : "disallowed", |
2441 | 0 | seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), |
2442 | 0 | rmrr->base_address, d->domain_id); |
2443 | 0 | if ( !relaxed ) |
2444 | 0 | return -EPERM; |
2445 | 0 | } |
2446 | 0 | } |
2447 | 0 |
|
2448 | 0 | ret = reassign_device_ownership(hardware_domain, d, devfn, pdev); |
2449 | 0 | if ( ret ) |
2450 | 0 | return ret; |
2451 | 0 |
|
2452 | 0 | /* Setup rmrr identity mapping */ |
2453 | 0 | for_each_rmrr_device( rmrr, bdf, i ) |
2454 | 0 | { |
2455 | 0 | if ( rmrr->segment == seg && |
2456 | 0 | PCI_BUS(bdf) == bus && |
2457 | 0 | PCI_DEVFN2(bdf) == devfn ) |
2458 | 0 | { |
2459 | 0 | ret = rmrr_identity_mapping(d, 1, rmrr, flag); |
2460 | 0 | if ( ret ) |
2461 | 0 | { |
2462 | 0 | reassign_device_ownership(d, hardware_domain, devfn, pdev); |
2463 | 0 | printk(XENLOG_G_ERR VTDPREFIX |
2464 | 0 | " cannot map reserved region (%"PRIx64",%"PRIx64"] for Dom%d (%d)\n", |
2465 | 0 | rmrr->base_address, rmrr->end_address, |
2466 | 0 | d->domain_id, ret); |
2467 | 0 | break; |
2468 | 0 | } |
2469 | 0 | } |
2470 | 0 | } |
2471 | 0 |
|
2472 | 0 | return ret; |
2473 | 0 | } |
2474 | | |
2475 | | static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn) |
2476 | 0 | { |
2477 | 0 | u8 secbus; |
2478 | 0 | if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 0 ) |
2479 | 0 | return -1; |
2480 | 0 | else |
2481 | 0 | return PCI_BDF2(bus, devfn); |
2482 | 0 | } |
2483 | | |
2484 | | static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS]; |
2485 | | |
2486 | | static int __must_check vtd_suspend(void) |
2487 | 0 | { |
2488 | 0 | struct acpi_drhd_unit *drhd; |
2489 | 0 | struct iommu *iommu; |
2490 | 0 | u32 i; |
2491 | 0 | int rc; |
2492 | 0 |
|
2493 | 0 | if ( !iommu_enabled ) |
2494 | 0 | return 0; |
2495 | 0 |
|
2496 | 0 | rc = iommu_flush_all(); |
2497 | 0 | if ( unlikely(rc) ) |
2498 | 0 | { |
2499 | 0 | printk(XENLOG_WARNING VTDPREFIX |
2500 | 0 | " suspend: IOMMU flush all failed: %d\n", rc); |
2501 | 0 |
|
2502 | 0 | return rc; |
2503 | 0 | } |
2504 | 0 |
|
2505 | 0 | for_each_drhd_unit ( drhd ) |
2506 | 0 | { |
2507 | 0 | iommu = drhd->iommu; |
2508 | 0 | i = iommu->index; |
2509 | 0 |
|
2510 | 0 | iommu_state[i][DMAR_FECTL_REG] = |
2511 | 0 | (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG); |
2512 | 0 | iommu_state[i][DMAR_FEDATA_REG] = |
2513 | 0 | (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG); |
2514 | 0 | iommu_state[i][DMAR_FEADDR_REG] = |
2515 | 0 | (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG); |
2516 | 0 | iommu_state[i][DMAR_FEUADDR_REG] = |
2517 | 0 | (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG); |
2518 | 0 |
|
2519 | 0 | /* don't disable VT-d engine when force_iommu is set. */ |
2520 | 0 | if ( force_iommu ) |
2521 | 0 | continue; |
2522 | 0 |
|
2523 | 0 | iommu_disable_translation(iommu); |
2524 | 0 |
|
2525 | 0 | /* If interrupt remapping is enabled, queued invalidation |
2526 | 0 | * will be disabled following interupt remapping disabling |
2527 | 0 | * in local apic suspend |
2528 | 0 | */ |
2529 | 0 | if ( !iommu_intremap && iommu_qinval ) |
2530 | 0 | disable_qinval(iommu); |
2531 | 0 | } |
2532 | 0 |
|
2533 | 0 | return 0; |
2534 | 0 | } |
2535 | | |
2536 | | static void vtd_crash_shutdown(void) |
2537 | 0 | { |
2538 | 0 | struct acpi_drhd_unit *drhd; |
2539 | 0 | struct iommu *iommu; |
2540 | 0 |
|
2541 | 0 | if ( !iommu_enabled ) |
2542 | 0 | return; |
2543 | 0 |
|
2544 | 0 | if ( iommu_flush_all() ) |
2545 | 0 | printk(XENLOG_WARNING VTDPREFIX |
2546 | 0 | " crash shutdown: IOMMU flush all failed\n"); |
2547 | 0 |
|
2548 | 0 | for_each_drhd_unit ( drhd ) |
2549 | 0 | { |
2550 | 0 | iommu = drhd->iommu; |
2551 | 0 | iommu_disable_translation(iommu); |
2552 | 0 | disable_intremap(drhd->iommu); |
2553 | 0 | disable_qinval(drhd->iommu); |
2554 | 0 | } |
2555 | 0 | } |
2556 | | |
2557 | | static void vtd_resume(void) |
2558 | 0 | { |
2559 | 0 | struct acpi_drhd_unit *drhd; |
2560 | 0 | struct iommu *iommu; |
2561 | 0 | u32 i; |
2562 | 0 | unsigned long flags; |
2563 | 0 |
|
2564 | 0 | if ( !iommu_enabled ) |
2565 | 0 | return; |
2566 | 0 |
|
2567 | 0 | if ( init_vtd_hw() != 0 && force_iommu ) |
2568 | 0 | panic("IOMMU setup failed, crash Xen for security purpose"); |
2569 | 0 |
|
2570 | 0 | for_each_drhd_unit ( drhd ) |
2571 | 0 | { |
2572 | 0 | iommu = drhd->iommu; |
2573 | 0 | i = iommu->index; |
2574 | 0 |
|
2575 | 0 | spin_lock_irqsave(&iommu->register_lock, flags); |
2576 | 0 | dmar_writel(iommu->reg, DMAR_FECTL_REG, |
2577 | 0 | (u32) iommu_state[i][DMAR_FECTL_REG]); |
2578 | 0 | dmar_writel(iommu->reg, DMAR_FEDATA_REG, |
2579 | 0 | (u32) iommu_state[i][DMAR_FEDATA_REG]); |
2580 | 0 | dmar_writel(iommu->reg, DMAR_FEADDR_REG, |
2581 | 0 | (u32) iommu_state[i][DMAR_FEADDR_REG]); |
2582 | 0 | dmar_writel(iommu->reg, DMAR_FEUADDR_REG, |
2583 | 0 | (u32) iommu_state[i][DMAR_FEUADDR_REG]); |
2584 | 0 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
2585 | 0 |
|
2586 | 0 | iommu_enable_translation(drhd); |
2587 | 0 | } |
2588 | 0 | } |
2589 | | |
2590 | | static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, |
2591 | | int indent) |
2592 | 0 | { |
2593 | 0 | paddr_t address; |
2594 | 0 | int i; |
2595 | 0 | struct dma_pte *pt_vaddr, *pte; |
2596 | 0 | int next_level; |
2597 | 0 |
|
2598 | 0 | if ( level < 1 ) |
2599 | 0 | return; |
2600 | 0 |
|
2601 | 0 | pt_vaddr = map_vtd_domain_page(pt_maddr); |
2602 | 0 | if ( pt_vaddr == NULL ) |
2603 | 0 | { |
2604 | 0 | printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); |
2605 | 0 | return; |
2606 | 0 | } |
2607 | 0 |
|
2608 | 0 | next_level = level - 1; |
2609 | 0 | for ( i = 0; i < PTE_NUM; i++ ) |
2610 | 0 | { |
2611 | 0 | if ( !(i % 2) ) |
2612 | 0 | process_pending_softirqs(); |
2613 | 0 |
|
2614 | 0 | pte = &pt_vaddr[i]; |
2615 | 0 | if ( !dma_pte_present(*pte) ) |
2616 | 0 | continue; |
2617 | 0 |
|
2618 | 0 | address = gpa + offset_level_address(i, level); |
2619 | 0 | if ( next_level >= 1 ) |
2620 | 0 | vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, |
2621 | 0 | address, indent + 1); |
2622 | 0 | else |
2623 | 0 | printk("%*sgfn: %08lx mfn: %08lx\n", |
2624 | 0 | indent, "", |
2625 | 0 | (unsigned long)(address >> PAGE_SHIFT_4K), |
2626 | 0 | (unsigned long)(dma_pte_addr(*pte) >> PAGE_SHIFT_4K)); |
2627 | 0 | } |
2628 | 0 |
|
2629 | 0 | unmap_vtd_domain_page(pt_vaddr); |
2630 | 0 | } |
2631 | | |
2632 | | static void vtd_dump_p2m_table(struct domain *d) |
2633 | 0 | { |
2634 | 0 | const struct domain_iommu *hd; |
2635 | 0 |
|
2636 | 0 | if ( list_empty(&acpi_drhd_units) ) |
2637 | 0 | return; |
2638 | 0 |
|
2639 | 0 | hd = dom_iommu(d); |
2640 | 0 | printk("p2m table has %d levels\n", agaw_to_level(hd->arch.agaw)); |
2641 | 0 | vtd_dump_p2m_table_level(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw), 0, 0); |
2642 | 0 | } |
2643 | | |
2644 | | const struct iommu_ops intel_iommu_ops = { |
2645 | | .init = intel_iommu_domain_init, |
2646 | | .hwdom_init = intel_iommu_hwdom_init, |
2647 | | .add_device = intel_iommu_add_device, |
2648 | | .enable_device = intel_iommu_enable_device, |
2649 | | .remove_device = intel_iommu_remove_device, |
2650 | | .assign_device = intel_iommu_assign_device, |
2651 | | .teardown = iommu_domain_teardown, |
2652 | | .map_page = intel_iommu_map_page, |
2653 | | .unmap_page = intel_iommu_unmap_page, |
2654 | | .free_page_table = iommu_free_page_table, |
2655 | | .reassign_device = reassign_device_ownership, |
2656 | | .get_device_group_id = intel_iommu_group_id, |
2657 | | .update_ire_from_apic = io_apic_write_remap_rte, |
2658 | | .update_ire_from_msi = msi_msg_write_remap_rte, |
2659 | | .read_apic_from_ire = io_apic_read_remap_rte, |
2660 | | .read_msi_from_ire = msi_msg_read_remap_rte, |
2661 | | .setup_hpet_msi = intel_setup_hpet_msi, |
2662 | | .suspend = vtd_suspend, |
2663 | | .resume = vtd_resume, |
2664 | | .share_p2m = iommu_set_pgd, |
2665 | | .crash_shutdown = vtd_crash_shutdown, |
2666 | | .iotlb_flush = iommu_flush_iotlb_pages, |
2667 | | .iotlb_flush_all = iommu_flush_iotlb_all, |
2668 | | .get_reserved_device_memory = intel_iommu_get_reserved_device_memory, |
2669 | | .dump_p2m_table = vtd_dump_p2m_table, |
2670 | | }; |
2671 | | |
2672 | | /* |
2673 | | * Local variables: |
2674 | | * mode: C |
2675 | | * c-file-style: "BSD" |
2676 | | * c-basic-offset: 4 |
2677 | | * tab-width: 4 |
2678 | | * indent-tabs-mode: nil |
2679 | | * End: |
2680 | | */ |