Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/drivers/passthrough/amd/iommu_map.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2007 Advanced Micro Devices, Inc.
3
 * Author: Leo Duran <leo.duran@amd.com>
4
 * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
5
 *
6
 * This program is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#include <xen/acpi.h>
21
#include <xen/sched.h>
22
#include <asm/p2m.h>
23
#include <asm/amd-iommu.h>
24
#include <asm/hvm/svm/amd-iommu-proto.h>
25
#include "../ats.h"
26
#include <xen/pci.h>
27
28
/* Given pfn and page table level, return pde index */
29
static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
30
0
{
31
0
    unsigned int idx;
32
0
33
0
    idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
34
0
    idx &= ~PTE_PER_TABLE_MASK;
35
0
    return idx;
36
0
}
37
38
void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
39
0
{
40
0
    u64 *table, *pte;
41
0
42
0
    table = map_domain_page(_mfn(l1_mfn));
43
0
    pte = table + pfn_to_pde_idx(gfn, IOMMU_PAGING_MODE_LEVEL_1);
44
0
    *pte = 0;
45
0
    unmap_domain_page(table);
46
0
}
47
48
static bool_t set_iommu_pde_present(u32 *pde, unsigned long next_mfn, 
49
                                    unsigned int next_level,
50
                                    bool_t iw, bool_t ir)
51
0
{
52
0
    u64 addr_lo, addr_hi, maddr_old, maddr_next;
53
0
    u32 entry;
54
0
    bool_t need_flush = 0;
55
0
56
0
    maddr_next = (u64)next_mfn << PAGE_SHIFT;
57
0
58
0
    addr_hi = get_field_from_reg_u32(pde[1],
59
0
                                     IOMMU_PTE_ADDR_HIGH_MASK,
60
0
                                     IOMMU_PTE_ADDR_HIGH_SHIFT);
61
0
    addr_lo = get_field_from_reg_u32(pde[0],
62
0
                                     IOMMU_PTE_ADDR_LOW_MASK,
63
0
                                     IOMMU_PTE_ADDR_LOW_SHIFT);
64
0
65
0
    maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
66
0
67
0
    if ( maddr_old != maddr_next )
68
0
        need_flush = 1;
69
0
70
0
    addr_lo = maddr_next & DMA_32BIT_MASK;
71
0
    addr_hi = maddr_next >> 32;
72
0
73
0
    /* enable read/write permissions,which will be enforced at the PTE */
74
0
    set_field_in_reg_u32((u32)addr_hi, 0,
75
0
                         IOMMU_PDE_ADDR_HIGH_MASK,
76
0
                         IOMMU_PDE_ADDR_HIGH_SHIFT, &entry);
77
0
    set_field_in_reg_u32(iw, entry,
78
0
                         IOMMU_PDE_IO_WRITE_PERMISSION_MASK,
79
0
                         IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT, &entry);
80
0
    set_field_in_reg_u32(ir, entry,
81
0
                         IOMMU_PDE_IO_READ_PERMISSION_MASK,
82
0
                         IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry);
83
0
84
0
    /* FC bit should be enabled in PTE, this helps to solve potential
85
0
     * issues with ATS devices
86
0
     */
87
0
    if ( next_level == IOMMU_PAGING_MODE_LEVEL_0 )
88
0
        set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
89
0
                             IOMMU_PTE_FC_MASK, IOMMU_PTE_FC_SHIFT, &entry);
90
0
    pde[1] = entry;
91
0
92
0
    /* mark next level as 'present' */
93
0
    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
94
0
                         IOMMU_PDE_ADDR_LOW_MASK,
95
0
                         IOMMU_PDE_ADDR_LOW_SHIFT, &entry);
96
0
    set_field_in_reg_u32(next_level, entry,
97
0
                         IOMMU_PDE_NEXT_LEVEL_MASK,
98
0
                         IOMMU_PDE_NEXT_LEVEL_SHIFT, &entry);
99
0
    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
100
0
                         IOMMU_PDE_PRESENT_MASK,
101
0
                         IOMMU_PDE_PRESENT_SHIFT, &entry);
102
0
    pde[0] = entry;
103
0
104
0
    return need_flush;
105
0
}
106
107
static bool_t set_iommu_pte_present(unsigned long pt_mfn, unsigned long gfn, 
108
                                    unsigned long next_mfn, int pde_level, 
109
                                    bool_t iw, bool_t ir)
110
0
{
111
0
    u64 *table;
112
0
    u32 *pde;
113
0
    bool_t need_flush = 0;
114
0
115
0
    table = map_domain_page(_mfn(pt_mfn));
116
0
117
0
    pde = (u32*)(table + pfn_to_pde_idx(gfn, pde_level));
118
0
119
0
    need_flush = set_iommu_pde_present(pde, next_mfn, 
120
0
                                       IOMMU_PAGING_MODE_LEVEL_0, iw, ir);
121
0
    unmap_domain_page(table);
122
0
    return need_flush;
123
0
}
124
125
void amd_iommu_set_root_page_table(
126
    u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid)
127
0
{
128
0
    u64 addr_hi, addr_lo;
129
0
    u32 entry;
130
0
    set_field_in_reg_u32(domain_id, 0,
131
0
                         IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
132
0
                         IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
133
0
    dte[2] = entry;
134
0
135
0
    addr_lo = root_ptr & DMA_32BIT_MASK;
136
0
    addr_hi = root_ptr >> 32;
137
0
138
0
    set_field_in_reg_u32((u32)addr_hi, 0,
139
0
                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
140
0
                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT, &entry);
141
0
    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
142
0
                         IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_MASK,
143
0
                         IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_SHIFT, &entry);
144
0
    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
145
0
                         IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK,
146
0
                         IOMMU_DEV_TABLE_IO_READ_PERMISSION_SHIFT, &entry);
147
0
    dte[1] = entry;
148
0
149
0
    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
150
0
                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
151
0
                         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT, &entry);
152
0
    set_field_in_reg_u32(paging_mode, entry,
153
0
                         IOMMU_DEV_TABLE_PAGING_MODE_MASK,
154
0
                         IOMMU_DEV_TABLE_PAGING_MODE_SHIFT, &entry);
155
0
    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
156
0
                         IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
157
0
                         IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
158
0
    set_field_in_reg_u32(valid ? IOMMU_CONTROL_ENABLED :
159
0
                         IOMMU_CONTROL_DISABLED, entry,
160
0
                         IOMMU_DEV_TABLE_VALID_MASK,
161
0
                         IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
162
0
    dte[0] = entry;
163
0
}
164
165
void iommu_dte_set_iotlb(u32 *dte, u8 i)
166
0
{
167
0
    u32 entry;
168
0
169
0
    entry = dte[3];
170
0
    set_field_in_reg_u32(!!i, entry,
171
0
                         IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
172
0
                         IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry);
173
0
    dte[3] = entry;
174
0
}
175
176
void __init amd_iommu_set_intremap_table(
177
    u32 *dte, u64 intremap_ptr, u8 int_valid)
178
0
{
179
0
    u64 addr_hi, addr_lo;
180
0
    u32 entry;
181
0
182
0
    addr_lo = intremap_ptr & DMA_32BIT_MASK;
183
0
    addr_hi = intremap_ptr >> 32;
184
0
185
0
    entry = dte[5];
186
0
    set_field_in_reg_u32((u32)addr_hi, entry,
187
0
                        IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK,
188
0
                        IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_SHIFT, &entry);
189
0
    /* Fixed and arbitrated interrupts remapepd */
190
0
    set_field_in_reg_u32(2, entry,
191
0
                        IOMMU_DEV_TABLE_INT_CONTROL_MASK,
192
0
                        IOMMU_DEV_TABLE_INT_CONTROL_SHIFT, &entry);
193
0
    dte[5] = entry;
194
0
195
0
    set_field_in_reg_u32((u32)addr_lo >> 6, 0,
196
0
                        IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_MASK,
197
0
                        IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_SHIFT, &entry);
198
0
    /* 2048 entries */
199
0
    set_field_in_reg_u32(0xB, entry,
200
0
                         IOMMU_DEV_TABLE_INT_TABLE_LENGTH_MASK,
201
0
                         IOMMU_DEV_TABLE_INT_TABLE_LENGTH_SHIFT, &entry);
202
0
203
0
    /* unmapped interrupt results io page faults*/
204
0
    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
205
0
                         IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
206
0
                         IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
207
0
    set_field_in_reg_u32(int_valid ? IOMMU_CONTROL_ENABLED :
208
0
                         IOMMU_CONTROL_DISABLED, entry,
209
0
                         IOMMU_DEV_TABLE_INT_VALID_MASK,
210
0
                         IOMMU_DEV_TABLE_INT_VALID_SHIFT, &entry);
211
0
    dte[4] = entry;
212
0
}
213
214
void __init iommu_dte_add_device_entry(u32 *dte, struct ivrs_mappings *ivrs_dev)
215
0
{
216
0
    u32 entry;
217
0
    u8 sys_mgt, dev_ex, flags;
218
0
    u8 mask = ~(0x7 << 3);
219
0
220
0
    dte[7] = dte[6] = dte[4] = dte[2] = dte[1] = dte[0] = 0;
221
0
222
0
    flags = ivrs_dev->device_flags;
223
0
    sys_mgt = get_field_from_byte(flags, ACPI_IVHD_SYSTEM_MGMT);
224
0
    dev_ex = ivrs_dev->dte_allow_exclusion;
225
0
226
0
    flags &= mask;
227
0
    set_field_in_reg_u32(flags, 0,
228
0
                         IOMMU_DEV_TABLE_IVHD_FLAGS_MASK,
229
0
                         IOMMU_DEV_TABLE_IVHD_FLAGS_SHIFT, &entry);
230
0
    dte[5] = entry;
231
0
232
0
    set_field_in_reg_u32(sys_mgt, 0,
233
0
                         IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
234
0
                         IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
235
0
    set_field_in_reg_u32(dev_ex, entry,
236
0
                         IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
237
0
                         IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
238
0
    dte[3] = entry;
239
0
}
240
241
void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3,
242
                             int gv, unsigned int glx)
243
0
{
244
0
    u32 entry, gcr3_1, gcr3_2, gcr3_3;
245
0
246
0
    gcr3_3 = gcr3 >> 31;
247
0
    gcr3_2 = (gcr3 >> 15) & 0xFFFF;
248
0
    gcr3_1 = (gcr3 >> PAGE_SHIFT) & 0x7;
249
0
250
0
    /* I bit must be set when gcr3 is enabled */
251
0
    entry = dte[3];
252
0
    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
253
0
                         IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
254
0
                         IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry);
255
0
    /* update gcr3 */
256
0
    set_field_in_reg_u32(gcr3_3, entry,
257
0
                         IOMMU_DEV_TABLE_GCR3_3_MASK,
258
0
                         IOMMU_DEV_TABLE_GCR3_3_SHIFT, &entry);
259
0
    dte[3] = entry;
260
0
261
0
    set_field_in_reg_u32(dom_id, entry,
262
0
                         IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
263
0
                         IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
264
0
    /* update gcr3 */
265
0
    entry = dte[2];
266
0
    set_field_in_reg_u32(gcr3_2, entry,
267
0
                         IOMMU_DEV_TABLE_GCR3_2_MASK,
268
0
                         IOMMU_DEV_TABLE_GCR3_2_SHIFT, &entry);
269
0
    dte[2] = entry;
270
0
271
0
    entry = dte[1];
272
0
    /* Enable GV bit */
273
0
    set_field_in_reg_u32(!!gv, entry,
274
0
                         IOMMU_DEV_TABLE_GV_MASK,
275
0
                         IOMMU_DEV_TABLE_GV_SHIFT, &entry);
276
0
277
0
    /* 1 level guest cr3 table  */
278
0
    set_field_in_reg_u32(glx, entry,
279
0
                         IOMMU_DEV_TABLE_GLX_MASK,
280
0
                         IOMMU_DEV_TABLE_GLX_SHIFT, &entry);
281
0
    /* update gcr3 */
282
0
    set_field_in_reg_u32(gcr3_1, entry,
283
0
                         IOMMU_DEV_TABLE_GCR3_1_MASK,
284
0
                         IOMMU_DEV_TABLE_GCR3_1_SHIFT, &entry);
285
0
    dte[1] = entry;
286
0
}
287
288
u64 amd_iommu_get_next_table_from_pte(u32 *entry)
289
0
{
290
0
    u64 addr_lo, addr_hi, ptr;
291
0
292
0
    addr_lo = get_field_from_reg_u32(
293
0
        entry[0],
294
0
        IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
295
0
        IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT);
296
0
297
0
    addr_hi = get_field_from_reg_u32(
298
0
        entry[1],
299
0
        IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
300
0
        IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT);
301
0
302
0
    ptr = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
303
0
    return ptr;
304
0
}
305
306
/* For each pde, We use ignored bits (bit 1 - bit 8 and bit 63)
307
 * to save pde count, pde count = 511 is a candidate of page coalescing.
308
 */
309
static unsigned int get_pde_count(u64 pde)
310
0
{
311
0
    unsigned int count;
312
0
    u64 upper_mask = 1ULL << 63 ;
313
0
    u64 lower_mask = 0xFF << 1;
314
0
315
0
    count = ((pde & upper_mask) >> 55) | ((pde & lower_mask) >> 1);
316
0
    return count;
317
0
}
318
319
/* Convert pde count into iommu pte ignored bits */
320
static void set_pde_count(u64 *pde, unsigned int count)
321
0
{
322
0
    u64 upper_mask = 1ULL << 8 ;
323
0
    u64 lower_mask = 0xFF;
324
0
    u64 pte_mask = (~(1ULL << 63)) & (~(0xFF << 1));
325
0
326
0
    *pde &= pte_mask;
327
0
    *pde |= ((count & upper_mask ) << 55) | ((count & lower_mask ) << 1);
328
0
}
329
330
/* Return 1, if pages are suitable for merging at merge_level.
331
 * otherwise increase pde count if mfn is contigous with mfn - 1
332
 */
333
static int iommu_update_pde_count(struct domain *d, unsigned long pt_mfn,
334
                                  unsigned long gfn, unsigned long mfn,
335
                                  unsigned int merge_level)
336
0
{
337
0
    unsigned int pde_count, next_level;
338
0
    unsigned long first_mfn;
339
0
    u64 *table, *pde, *ntable;
340
0
    u64 ntable_maddr, mask;
341
0
    struct domain_iommu *hd = dom_iommu(d);
342
0
    bool_t ok = 0;
343
0
344
0
    ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
345
0
346
0
    next_level = merge_level - 1;
347
0
348
0
    /* get pde at merge level */
349
0
    table = map_domain_page(_mfn(pt_mfn));
350
0
    pde = table + pfn_to_pde_idx(gfn, merge_level);
351
0
352
0
    /* get page table of next level */
353
0
    ntable_maddr = amd_iommu_get_next_table_from_pte((u32*)pde);
354
0
    ntable = map_domain_page(_mfn(paddr_to_pfn(ntable_maddr)));
355
0
356
0
    /* get the first mfn of next level */
357
0
    first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
358
0
359
0
    if ( first_mfn == 0 )
360
0
        goto out;
361
0
362
0
    mask = (1ULL<< (PTE_PER_TABLE_SHIFT * next_level)) - 1;
363
0
364
0
    if ( ((first_mfn & mask) == 0) &&
365
0
         (((gfn & mask) | first_mfn) == mfn) )
366
0
    {
367
0
        pde_count = get_pde_count(*pde);
368
0
369
0
        if ( pde_count == (PTE_PER_TABLE_SIZE - 1) )
370
0
            ok = 1;
371
0
        else if ( pde_count < (PTE_PER_TABLE_SIZE - 1))
372
0
        {
373
0
            pde_count++;
374
0
            set_pde_count(pde, pde_count);
375
0
        }
376
0
    }
377
0
378
0
    else
379
0
        /* non-contiguous mapping */
380
0
        set_pde_count(pde, 0);
381
0
382
0
out:
383
0
    unmap_domain_page(ntable);
384
0
    unmap_domain_page(table);
385
0
386
0
    return ok;
387
0
}
388
389
static int iommu_merge_pages(struct domain *d, unsigned long pt_mfn,
390
                             unsigned long gfn, unsigned int flags,
391
                             unsigned int merge_level)
392
0
{
393
0
    u64 *table, *pde, *ntable;
394
0
    u64 ntable_mfn;
395
0
    unsigned long first_mfn;
396
0
    struct domain_iommu *hd = dom_iommu(d);
397
0
398
0
    ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
399
0
400
0
    table = map_domain_page(_mfn(pt_mfn));
401
0
    pde = table + pfn_to_pde_idx(gfn, merge_level);
402
0
403
0
    /* get first mfn */
404
0
    ntable_mfn = amd_iommu_get_next_table_from_pte((u32*)pde) >> PAGE_SHIFT;
405
0
406
0
    if ( ntable_mfn == 0 )
407
0
    {
408
0
        unmap_domain_page(table);
409
0
        return 1;
410
0
    }
411
0
412
0
    ntable = map_domain_page(_mfn(ntable_mfn));
413
0
    first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
414
0
415
0
    if ( first_mfn == 0 )
416
0
    {
417
0
        unmap_domain_page(ntable);
418
0
        unmap_domain_page(table);
419
0
        return 1;
420
0
    }
421
0
422
0
    /* setup super page mapping, next level = 0 */
423
0
    set_iommu_pde_present((u32*)pde, first_mfn,
424
0
                          IOMMU_PAGING_MODE_LEVEL_0,
425
0
                          !!(flags & IOMMUF_writable),
426
0
                          !!(flags & IOMMUF_readable));
427
0
428
0
    amd_iommu_flush_all_pages(d);
429
0
430
0
    unmap_domain_page(ntable);
431
0
    unmap_domain_page(table);
432
0
    return 0;
433
0
}
434
435
/* Walk io page tables and build level page tables if necessary
436
 * {Re, un}mapping super page frames causes re-allocation of io
437
 * page tables.
438
 */
439
static int iommu_pde_from_gfn(struct domain *d, unsigned long pfn, 
440
                              unsigned long pt_mfn[])
441
0
{
442
0
    u64 *pde, *next_table_vaddr;
443
0
    unsigned long  next_table_mfn;
444
0
    unsigned int level;
445
0
    struct page_info *table;
446
0
    const struct domain_iommu *hd = dom_iommu(d);
447
0
448
0
    table = hd->arch.root_table;
449
0
    level = hd->arch.paging_mode;
450
0
451
0
    BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 || 
452
0
            level > IOMMU_PAGING_MODE_LEVEL_6 );
453
0
454
0
    next_table_mfn = page_to_mfn(table);
455
0
456
0
    if ( level == IOMMU_PAGING_MODE_LEVEL_1 )
457
0
    {
458
0
        pt_mfn[level] = next_table_mfn;
459
0
        return 0;
460
0
    }
461
0
462
0
    while ( level > IOMMU_PAGING_MODE_LEVEL_1 )
463
0
    {
464
0
        unsigned int next_level = level - 1;
465
0
        pt_mfn[level] = next_table_mfn;
466
0
467
0
        next_table_vaddr = map_domain_page(_mfn(next_table_mfn));
468
0
        pde = next_table_vaddr + pfn_to_pde_idx(pfn, level);
469
0
470
0
        /* Here might be a super page frame */
471
0
        next_table_mfn = amd_iommu_get_next_table_from_pte((uint32_t*)pde) 
472
0
                         >> PAGE_SHIFT;
473
0
474
0
        /* Split super page frame into smaller pieces.*/
475
0
        if ( iommu_is_pte_present((u32*)pde) &&
476
0
             (iommu_next_level((u32*)pde) == 0) &&
477
0
             next_table_mfn != 0 )
478
0
        {
479
0
            int i;
480
0
            unsigned long mfn, gfn;
481
0
            unsigned int page_sz;
482
0
483
0
            page_sz = 1 << (PTE_PER_TABLE_SHIFT * (next_level - 1));
484
0
            gfn =  pfn & ~((1 << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
485
0
            mfn = next_table_mfn;
486
0
487
0
            /* allocate lower level page table */
488
0
            table = alloc_amd_iommu_pgtable();
489
0
            if ( table == NULL )
490
0
            {
491
0
                AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
492
0
                unmap_domain_page(next_table_vaddr);
493
0
                return 1;
494
0
            }
495
0
496
0
            next_table_mfn = page_to_mfn(table);
497
0
            set_iommu_pde_present((u32*)pde, next_table_mfn, next_level, 
498
0
                                  !!IOMMUF_writable, !!IOMMUF_readable);
499
0
500
0
            for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
501
0
            {
502
0
                set_iommu_pte_present(next_table_mfn, gfn, mfn, next_level,
503
0
                                      !!IOMMUF_writable, !!IOMMUF_readable);
504
0
                mfn += page_sz;
505
0
                gfn += page_sz;
506
0
             }
507
0
508
0
            amd_iommu_flush_all_pages(d);
509
0
        }
510
0
511
0
        /* Install lower level page table for non-present entries */
512
0
        else if ( !iommu_is_pte_present((u32*)pde) )
513
0
        {
514
0
            if ( next_table_mfn == 0 )
515
0
            {
516
0
                table = alloc_amd_iommu_pgtable();
517
0
                if ( table == NULL )
518
0
                {
519
0
                    AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
520
0
                    unmap_domain_page(next_table_vaddr);
521
0
                    return 1;
522
0
                }
523
0
                next_table_mfn = page_to_mfn(table);
524
0
                set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
525
0
                                      !!IOMMUF_writable, !!IOMMUF_readable);
526
0
            }
527
0
            else /* should never reach here */
528
0
            {
529
0
                unmap_domain_page(next_table_vaddr);
530
0
                return 1;
531
0
            }
532
0
        }
533
0
534
0
        unmap_domain_page(next_table_vaddr);
535
0
        level--;
536
0
    }
537
0
538
0
    /* mfn of level 1 page table */
539
0
    pt_mfn[level] = next_table_mfn;
540
0
    return 0;
541
0
}
542
543
static int update_paging_mode(struct domain *d, unsigned long gfn)
544
0
{
545
0
    u16 bdf;
546
0
    void *device_entry;
547
0
    unsigned int req_id, level, offset;
548
0
    unsigned long flags;
549
0
    struct pci_dev *pdev;
550
0
    struct amd_iommu *iommu = NULL;
551
0
    struct page_info *new_root = NULL;
552
0
    struct page_info *old_root = NULL;
553
0
    void *new_root_vaddr;
554
0
    unsigned long old_root_mfn;
555
0
    struct domain_iommu *hd = dom_iommu(d);
556
0
557
0
    if ( gfn == gfn_x(INVALID_GFN) )
558
0
        return -EADDRNOTAVAIL;
559
0
    ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH));
560
0
561
0
    level = hd->arch.paging_mode;
562
0
    old_root = hd->arch.root_table;
563
0
    offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1));
564
0
565
0
    ASSERT(spin_is_locked(&hd->arch.mapping_lock) && is_hvm_domain(d));
566
0
567
0
    while ( offset >= PTE_PER_TABLE_SIZE )
568
0
    {
569
0
        /* Allocate and install a new root table.
570
0
         * Only upper I/O page table grows, no need to fix next level bits */
571
0
        new_root = alloc_amd_iommu_pgtable();
572
0
        if ( new_root == NULL )
573
0
        {
574
0
            AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n",
575
0
                            __func__);
576
0
            return -ENOMEM;
577
0
        }
578
0
579
0
        new_root_vaddr = __map_domain_page(new_root);
580
0
        old_root_mfn = page_to_mfn(old_root);
581
0
        set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
582
0
                              !!IOMMUF_writable, !!IOMMUF_readable);
583
0
        level++;
584
0
        old_root = new_root;
585
0
        offset >>= PTE_PER_TABLE_SHIFT;
586
0
        unmap_domain_page(new_root_vaddr);
587
0
    }
588
0
589
0
    if ( new_root != NULL )
590
0
    {
591
0
        hd->arch.paging_mode = level;
592
0
        hd->arch.root_table = new_root;
593
0
594
0
        if ( !pcidevs_locked() )
595
0
            AMD_IOMMU_DEBUG("%s Try to access pdev_list "
596
0
                            "without aquiring pcidevs_lock.\n", __func__);
597
0
598
0
        /* Update device table entries using new root table and paging mode */
599
0
        for_each_pdev( d, pdev )
600
0
        {
601
0
            bdf = PCI_BDF2(pdev->bus, pdev->devfn);
602
0
            iommu = find_iommu_for_device(pdev->seg, bdf);
603
0
            if ( !iommu )
604
0
            {
605
0
                AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__);
606
0
                return -ENODEV;
607
0
            }
608
0
609
0
            spin_lock_irqsave(&iommu->lock, flags);
610
0
            do {
611
0
                req_id = get_dma_requestor_id(pdev->seg, bdf);
612
0
                device_entry = iommu->dev_table.buffer +
613
0
                               (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
614
0
615
0
                /* valid = 0 only works for dom0 passthrough mode */
616
0
                amd_iommu_set_root_page_table((u32 *)device_entry,
617
0
                                              page_to_maddr(hd->arch.root_table),
618
0
                                              d->domain_id,
619
0
                                              hd->arch.paging_mode, 1);
620
0
621
0
                amd_iommu_flush_device(iommu, req_id);
622
0
                bdf += pdev->phantom_stride;
623
0
            } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
624
0
                      PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
625
0
            spin_unlock_irqrestore(&iommu->lock, flags);
626
0
        }
627
0
628
0
        /* For safety, invalidate all entries */
629
0
        amd_iommu_flush_all_pages(d);
630
0
    }
631
0
    return 0;
632
0
}
633
634
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
635
                       unsigned int flags)
636
0
{
637
0
    bool_t need_flush = 0;
638
0
    struct domain_iommu *hd = dom_iommu(d);
639
0
    int rc;
640
0
    unsigned long pt_mfn[7];
641
0
    unsigned int merge_level;
642
0
643
0
    if ( iommu_use_hap_pt(d) )
644
0
        return 0;
645
0
646
0
    memset(pt_mfn, 0, sizeof(pt_mfn));
647
0
648
0
    spin_lock(&hd->arch.mapping_lock);
649
0
650
0
    rc = amd_iommu_alloc_root(hd);
651
0
    if ( rc )
652
0
    {
653
0
        spin_unlock(&hd->arch.mapping_lock);
654
0
        AMD_IOMMU_DEBUG("Root table alloc failed, gfn = %lx\n", gfn);
655
0
        domain_crash(d);
656
0
        return rc;
657
0
    }
658
0
659
0
    /* Since HVM domain is initialized with 2 level IO page table,
660
0
     * we might need a deeper page table for lager gfn now */
661
0
    if ( is_hvm_domain(d) )
662
0
    {
663
0
        if ( update_paging_mode(d, gfn) )
664
0
        {
665
0
            spin_unlock(&hd->arch.mapping_lock);
666
0
            AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
667
0
            domain_crash(d);
668
0
            return -EFAULT;
669
0
        }
670
0
    }
671
0
672
0
    if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
673
0
    {
674
0
        spin_unlock(&hd->arch.mapping_lock);
675
0
        AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
676
0
        domain_crash(d);
677
0
        return -EFAULT;
678
0
    }
679
0
680
0
    /* Install 4k mapping first */
681
0
    need_flush = set_iommu_pte_present(pt_mfn[1], gfn, mfn, 
682
0
                                       IOMMU_PAGING_MODE_LEVEL_1,
683
0
                                       !!(flags & IOMMUF_writable),
684
0
                                       !!(flags & IOMMUF_readable));
685
0
686
0
    /* Do not increase pde count if io mapping has not been changed */
687
0
    if ( !need_flush )
688
0
        goto out;
689
0
690
0
    /* 4K mapping for PV guests never changes, 
691
0
     * no need to flush if we trust non-present bits */
692
0
    if ( is_hvm_domain(d) )
693
0
        amd_iommu_flush_pages(d, gfn, 0);
694
0
695
0
    for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
696
0
          merge_level <= hd->arch.paging_mode; merge_level++ )
697
0
    {
698
0
        if ( pt_mfn[merge_level] == 0 )
699
0
            break;
700
0
        if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
701
0
                                     gfn, mfn, merge_level) )
702
0
            break;
703
0
704
0
        if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn, 
705
0
                               flags, merge_level) )
706
0
        {
707
0
            spin_unlock(&hd->arch.mapping_lock);
708
0
            AMD_IOMMU_DEBUG("Merge iommu page failed at level %d, "
709
0
                            "gfn = %lx mfn = %lx\n", merge_level, gfn, mfn);
710
0
            domain_crash(d);
711
0
            return -EFAULT;
712
0
        }
713
0
714
0
        /* Deallocate lower level page table */
715
0
        free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
716
0
    }
717
0
718
0
out:
719
0
    spin_unlock(&hd->arch.mapping_lock);
720
0
    return 0;
721
0
}
722
723
int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
724
0
{
725
0
    unsigned long pt_mfn[7];
726
0
    struct domain_iommu *hd = dom_iommu(d);
727
0
728
0
    if ( iommu_use_hap_pt(d) )
729
0
        return 0;
730
0
731
0
    memset(pt_mfn, 0, sizeof(pt_mfn));
732
0
733
0
    spin_lock(&hd->arch.mapping_lock);
734
0
735
0
    if ( !hd->arch.root_table )
736
0
    {
737
0
        spin_unlock(&hd->arch.mapping_lock);
738
0
        return 0;
739
0
    }
740
0
741
0
    /* Since HVM domain is initialized with 2 level IO page table,
742
0
     * we might need a deeper page table for lager gfn now */
743
0
    if ( is_hvm_domain(d) )
744
0
    {
745
0
        int rc = update_paging_mode(d, gfn);
746
0
747
0
        if ( rc )
748
0
        {
749
0
            spin_unlock(&hd->arch.mapping_lock);
750
0
            AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
751
0
            if ( rc != -EADDRNOTAVAIL )
752
0
                domain_crash(d);
753
0
            return rc;
754
0
        }
755
0
    }
756
0
757
0
    if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
758
0
    {
759
0
        spin_unlock(&hd->arch.mapping_lock);
760
0
        AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
761
0
        domain_crash(d);
762
0
        return -EFAULT;
763
0
    }
764
0
765
0
    /* mark PTE as 'page not present' */
766
0
    clear_iommu_pte_present(pt_mfn[1], gfn);
767
0
    spin_unlock(&hd->arch.mapping_lock);
768
0
769
0
    amd_iommu_flush_pages(d, gfn, 0);
770
0
771
0
    return 0;
772
0
}
773
774
int amd_iommu_reserve_domain_unity_map(struct domain *domain,
775
                                       u64 phys_addr,
776
                                       unsigned long size, int iw, int ir)
777
0
{
778
0
    unsigned long npages, i;
779
0
    unsigned long gfn;
780
0
    unsigned int flags = !!ir;
781
0
    int rt = 0;
782
0
783
0
    if ( iw )
784
0
        flags |= IOMMUF_writable;
785
0
786
0
    npages = region_to_pages(phys_addr, size);
787
0
    gfn = phys_addr >> PAGE_SHIFT;
788
0
    for ( i = 0; i < npages; i++ )
789
0
    {
790
0
        rt = amd_iommu_map_page(domain, gfn +i, gfn +i, flags);
791
0
        if ( rt != 0 )
792
0
            return rt;
793
0
    }
794
0
    return 0;
795
0
}
796
797
/* Share p2m table with iommu. */
798
void amd_iommu_share_p2m(struct domain *d)
799
0
{
800
0
    struct domain_iommu *hd = dom_iommu(d);
801
0
    struct page_info *p2m_table;
802
0
    mfn_t pgd_mfn;
803
0
804
0
    pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d)));
805
0
    p2m_table = mfn_to_page(mfn_x(pgd_mfn));
806
0
807
0
    if ( hd->arch.root_table != p2m_table )
808
0
    {
809
0
        free_amd_iommu_pgtable(hd->arch.root_table);
810
0
        hd->arch.root_table = p2m_table;
811
0
812
0
        /* When sharing p2m with iommu, paging mode = 4 */
813
0
        hd->arch.paging_mode = IOMMU_PAGING_MODE_LEVEL_4;
814
0
        AMD_IOMMU_DEBUG("Share p2m table with iommu: p2m table = %#lx\n",
815
0
                        mfn_x(pgd_mfn));
816
0
    }
817
0
}