Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/p2m-ept.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * ept-p2m.c: use the EPT page table as p2m
3
 * Copyright (c) 2007, Intel Corporation.
4
 *
5
 * This program is free software; you can redistribute it and/or modify it
6
 * under the terms and conditions of the GNU General Public License,
7
 * version 2, as published by the Free Software Foundation.
8
 *
9
 * This program is distributed in the hope it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12
 * more details.
13
 *
14
 * You should have received a copy of the GNU General Public License along with
15
 * this program; If not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
#include <xen/domain_page.h>
19
#include <xen/sched.h>
20
#include <asm/current.h>
21
#include <asm/paging.h>
22
#include <asm/types.h>
23
#include <asm/domain.h>
24
#include <asm/p2m.h>
25
#include <asm/hvm/vmx/vmx.h>
26
#include <asm/hvm/vmx/vmcs.h>
27
#include <asm/hvm/nestedhvm.h>
28
#include <xen/iommu.h>
29
#include <asm/mtrr.h>
30
#include <asm/hvm/cacheattr.h>
31
#include <xen/keyhandler.h>
32
#include <xen/softirq.h>
33
34
#include "mm-locks.h"
35
36
#define atomic_read_ept_entry(__pepte)                              \
37
18.6M
    ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
38
39
20.7M
#define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
40
14.9M
#define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
41
static inline bool_t is_epte_valid(ept_entry_t *e)
42
5.61M
{
43
5.61M
    /* suppress_ve alone is not considered valid, so mask it off */
44
4.42M
    return ((e->epte & ~(1ul << 63)) != 0 && e->sa_p2mt != p2m_invalid);
45
5.61M
}
46
47
/* returns : 0 for success, -errno otherwise */
48
static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new,
49
                                  int level)
50
3.08M
{
51
3.08M
    int rc;
52
3.08M
    unsigned long oldmfn = mfn_x(INVALID_MFN);
53
3.08M
    bool_t check_foreign = (new.mfn != entryptr->mfn ||
54
2.02M
                            new.sa_p2mt != entryptr->sa_p2mt);
55
3.08M
56
3.08M
    if ( level )
57
1.01M
    {
58
1.01M
        ASSERT(!is_epte_superpage(&new) || !p2m_is_foreign(new.sa_p2mt));
59
1.01M
        write_atomic(&entryptr->epte, new.epte);
60
1.01M
        return 0;
61
1.01M
    }
62
3.08M
63
2.07M
    if ( unlikely(p2m_is_foreign(new.sa_p2mt)) )
64
0
    {
65
0
        rc = -EINVAL;
66
0
        if ( !is_epte_present(&new) )
67
0
                goto out;
68
0
69
0
        if ( check_foreign )
70
0
        {
71
0
            struct domain *fdom;
72
0
73
0
            if ( !mfn_valid(_mfn(new.mfn)) )
74
0
                goto out;
75
0
76
0
            rc = -ESRCH;
77
0
            fdom = page_get_owner(mfn_to_page(new.mfn));
78
0
            if ( fdom == NULL )
79
0
                goto out;
80
0
81
0
            /* get refcount on the page */
82
0
            rc = -EBUSY;
83
0
            if ( !get_page(mfn_to_page(new.mfn), fdom) )
84
0
                goto out;
85
0
        }
86
0
    }
87
2.07M
88
2.07M
    if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && check_foreign )
89
0
        oldmfn = entryptr->mfn;
90
2.07M
91
2.07M
    write_atomic(&entryptr->epte, new.epte);
92
2.07M
93
2.07M
    if ( unlikely(oldmfn != mfn_x(INVALID_MFN)) )
94
0
        put_page(mfn_to_page(oldmfn));
95
2.07M
96
2.07M
    rc = 0;
97
2.07M
98
2.07M
 out:
99
2.07M
    if ( rc )
100
0
        gdprintk(XENLOG_ERR, "epte o:%"PRIx64" n:%"PRIx64" rc:%d\n",
101
2.07M
                 entryptr->epte, new.epte, rc);
102
2.07M
    return rc;
103
2.07M
}
104
105
static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
106
                                  p2m_type_t type, p2m_access_t access)
107
850k
{
108
850k
    /*
109
850k
     * First apply type permissions.
110
850k
     *
111
850k
     * A/D bits are also manually set to avoid overhead of MMU having to set
112
850k
     * them later. Both A/D bits are safe to be updated directly as they are
113
850k
     * ignored by processor if EPT A/D bits is not turned on.
114
850k
     *
115
850k
     * A bit is set for all present p2m types in middle and leaf EPT entries.
116
850k
     * D bit is set for all writable types in EPT leaf entry, except for
117
850k
     * log-dirty type with PML.
118
850k
     */
119
850k
    switch(type)
120
850k
    {
121
0
        case p2m_invalid:
122
0
        case p2m_mmio_dm:
123
0
        case p2m_populate_on_demand:
124
0
        case p2m_ram_paging_out:
125
0
        case p2m_ram_paged:
126
0
        case p2m_ram_paging_in:
127
0
        default:
128
0
            entry->r = entry->w = entry->x = 0;
129
0
            break;
130
561k
        case p2m_ram_rw:
131
561k
            entry->r = entry->w = entry->x = 1;
132
561k
            entry->a = entry->d = !!cpu_has_vmx_ept_ad;
133
561k
            break;
134
0
        case p2m_ioreq_server:
135
0
            entry->r = 1;
136
0
            entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE);
137
0
            entry->x = 0;
138
0
            entry->a = !!cpu_has_vmx_ept_ad;
139
0
            entry->d = entry->w && entry->a;
140
0
            break;
141
289k
        case p2m_mmio_direct:
142
289k
            entry->r = entry->x = 1;
143
289k
            entry->w = !rangeset_contains_singleton(mmio_ro_ranges,
144
289k
                                                    entry->mfn);
145
289k
            ASSERT(entry->w || !is_epte_superpage(entry));
146
289k
            entry->a = !!cpu_has_vmx_ept_ad;
147
289k
            entry->d = entry->w && cpu_has_vmx_ept_ad;
148
289k
            break;
149
0
        case p2m_ram_logdirty:
150
0
            entry->r = entry->x = 1;
151
0
            /*
152
0
             * In case of PML, we don't have to write protect 4K page, but
153
0
             * only need to clear D-bit for it, but we still need to write
154
0
             * protect super page in order to split it to 4K pages in EPT
155
0
             * violation.
156
0
             */
157
0
            if ( vmx_domain_pml_enabled(p2m->domain) &&
158
0
                 !is_epte_superpage(entry) )
159
0
                entry->w = 1;
160
0
            else
161
0
                entry->w = 0;
162
0
            entry->a = !!cpu_has_vmx_ept_ad;
163
0
            /* For both PML or non-PML cases we clear D bit anyway */
164
0
            entry->d = 0;
165
0
            break;
166
0
        case p2m_ram_ro:
167
0
        case p2m_ram_shared:
168
0
            entry->r = entry->x = 1;
169
0
            entry->w = 0;
170
0
            entry->a = !!cpu_has_vmx_ept_ad;
171
0
            entry->d = 0;
172
0
            break;
173
0
        case p2m_grant_map_rw:
174
0
        case p2m_map_foreign:
175
0
            entry->r = entry->w = 1;
176
0
            entry->x = 0;
177
0
            entry->a = entry->d = !!cpu_has_vmx_ept_ad;
178
0
            break;
179
0
        case p2m_grant_map_ro:
180
0
            entry->r = 1;
181
0
            entry->w = entry->x = 0;
182
0
            entry->a = !!cpu_has_vmx_ept_ad;
183
0
            entry->d = 0;
184
0
            break;
185
850k
    }
186
850k
187
850k
188
850k
    /* Then restrict with access permissions */
189
850k
    switch (access) 
190
850k
    {
191
0
        case p2m_access_n:
192
0
        case p2m_access_n2rwx:
193
0
            entry->r = entry->w = entry->x = 0;
194
0
            break;
195
0
        case p2m_access_r:
196
0
            entry->w = entry->x = 0;
197
0
            break;
198
0
        case p2m_access_w:
199
0
            entry->r = entry->x = 0;
200
0
            break;
201
0
        case p2m_access_x:
202
0
            entry->r = entry->w = 0;
203
0
            break;
204
0
        case p2m_access_rx:
205
0
        case p2m_access_rx2rw:
206
0
            entry->w = 0;
207
0
            break;
208
0
        case p2m_access_wx:
209
0
            entry->r = 0;
210
0
            break;
211
39
        case p2m_access_rw:
212
39
            entry->x = 0;
213
39
            break;           
214
850k
        case p2m_access_rwx:
215
850k
            break;
216
850k
    }
217
850k
    
218
850k
}
219
220
1.64M
#define GUEST_TABLE_MAP_FAILED  0
221
14.8M
#define GUEST_TABLE_NORMAL_PAGE 1
222
8.48M
#define GUEST_TABLE_SUPER_PAGE  2
223
8.46M
#define GUEST_TABLE_POD_PAGE    3
224
225
/* Fill in middle levels of ept table */
226
static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry)
227
1.27k
{
228
1.27k
    mfn_t mfn;
229
1.27k
    ept_entry_t *table;
230
1.27k
    unsigned int i;
231
1.27k
232
1.27k
    mfn = p2m_alloc_ptp(p2m, 0);
233
1.27k
    if ( mfn_eq(mfn, INVALID_MFN) )
234
0
        return 0;
235
1.27k
236
1.27k
    ept_entry->epte = 0;
237
1.27k
    ept_entry->mfn = mfn_x(mfn);
238
1.27k
    ept_entry->access = p2m->default_access;
239
1.27k
240
1.27k
    ept_entry->r = ept_entry->w = ept_entry->x = 1;
241
1.27k
    /* Manually set A bit to avoid overhead of MMU having to write it later. */
242
1.27k
    ept_entry->a = !!cpu_has_vmx_ept_ad;
243
1.27k
244
1.27k
    ept_entry->suppress_ve = 1;
245
1.27k
246
1.27k
    table = map_domain_page(mfn);
247
1.27k
248
654k
    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
249
653k
        table[i].suppress_ve = 1;
250
1.27k
251
1.27k
    unmap_domain_page(table);
252
1.27k
253
1.27k
    return 1;
254
1.27k
}
255
256
/* free ept sub tree behind an entry */
257
static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
258
218k
{
259
218k
    /* End if the entry is a leaf entry. */
260
218k
    if ( level == 0 || !is_epte_present(ept_entry) ||
261
0
         is_epte_superpage(ept_entry) )
262
218k
        return;
263
218k
264
0
    if ( level > 1 )
265
0
    {
266
0
        ept_entry_t *epte = map_domain_page(_mfn(ept_entry->mfn));
267
0
        for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
268
0
            ept_free_entry(p2m, epte + i, level - 1);
269
0
        unmap_domain_page(epte);
270
0
    }
271
0
    
272
0
    p2m_tlb_flush_sync(p2m);
273
0
    p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
274
0
}
275
276
static bool_t ept_split_super_page(struct p2m_domain *p2m,
277
                                   ept_entry_t *ept_entry,
278
                                   unsigned int level, unsigned int target)
279
0
{
280
0
    ept_entry_t new_ept, *table;
281
0
    uint64_t trunk;
282
0
    unsigned int i;
283
0
    bool_t rv = 1;
284
0
285
0
    /* End if the entry is a leaf entry or reaches the target level. */
286
0
    if ( level <= target )
287
0
        return 1;
288
0
289
0
    ASSERT(is_epte_superpage(ept_entry));
290
0
291
0
    if ( !ept_set_middle_entry(p2m, &new_ept) )
292
0
        return 0;
293
0
294
0
    table = map_domain_page(_mfn(new_ept.mfn));
295
0
    trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
296
0
297
0
    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
298
0
    {
299
0
        ept_entry_t *epte = table + i;
300
0
301
0
        *epte = *ept_entry;
302
0
        epte->sp = (level > 1);
303
0
        epte->mfn += i * trunk;
304
0
        epte->snp = (iommu_enabled && iommu_snoop);
305
0
        epte->suppress_ve = 1;
306
0
307
0
        ept_p2m_type_to_flags(p2m, epte, epte->sa_p2mt, epte->access);
308
0
309
0
        if ( (level - 1) == target )
310
0
            continue;
311
0
312
0
        ASSERT(is_epte_superpage(epte));
313
0
314
0
        if ( !(rv = ept_split_super_page(p2m, epte, level - 1, target)) )
315
0
            break;
316
0
    }
317
0
318
0
    unmap_domain_page(table);
319
0
320
0
    /* Even failed we should install the newly allocated ept page. */
321
0
    *ept_entry = new_ept;
322
0
323
0
    return rv;
324
0
}
325
326
/* Take the currently mapped table, find the corresponding gfn entry,
327
 * and map the next table, if available.  If the entry is empty
328
 * and read_only is set, 
329
 * Return values:
330
 *  0: Failed to map.  Either read_only was set and the entry was
331
 *   empty, or allocating a new page failed.
332
 *  GUEST_TABLE_NORMAL_PAGE: next level mapped normally
333
 *  GUEST_TABLE_SUPER_PAGE:
334
 *   The next entry points to a superpage, and caller indicates
335
 *   that they are going to the superpage level, or are only doing
336
 *   a read.
337
 *  GUEST_TABLE_POD:
338
 *   The next entry is marked populate-on-demand.
339
 */
340
static int ept_next_level(struct p2m_domain *p2m, bool_t read_only,
341
                          ept_entry_t **table, unsigned long *gfn_remainder,
342
                          int next_level)
343
12.2M
{
344
12.2M
    unsigned long mfn;
345
12.2M
    ept_entry_t *ept_entry, e;
346
12.2M
    u32 shift, index;
347
12.2M
348
12.2M
    shift = next_level * EPT_TABLE_ORDER;
349
12.2M
350
12.2M
    index = *gfn_remainder >> shift;
351
12.2M
352
12.2M
    /* index must be falling into the page */
353
12.2M
    ASSERT(index < EPT_PAGETABLE_ENTRIES);
354
12.2M
355
12.2M
    ept_entry = (*table) + index;
356
12.2M
357
12.2M
    /* ept_next_level() is called (sometimes) without a lock.  Read
358
12.2M
     * the entry once, and act on the "cached" entry after that to
359
12.2M
     * avoid races. */
360
12.2M
    e = atomic_read_ept_entry(ept_entry);
361
12.2M
362
12.2M
    if ( !is_epte_present(&e) )
363
574k
    {
364
574k
        if ( e.sa_p2mt == p2m_populate_on_demand )
365
0
            return GUEST_TABLE_POD_PAGE;
366
574k
367
574k
        if ( read_only )
368
572k
            return GUEST_TABLE_MAP_FAILED;
369
574k
370
1.27k
        if ( !ept_set_middle_entry(p2m, ept_entry) )
371
0
            return GUEST_TABLE_MAP_FAILED;
372
1.27k
        else
373
1.27k
            e = atomic_read_ept_entry(ept_entry); /* Refresh */
374
1.27k
    }
375
12.2M
376
12.2M
    /* The only time sp would be set here is if we had hit a superpage */
377
11.6M
    if ( is_epte_superpage(&e) )
378
19.4k
        return GUEST_TABLE_SUPER_PAGE;
379
11.6M
380
11.6M
    mfn = e.mfn;
381
11.6M
    unmap_domain_page(*table);
382
11.6M
    *table = map_domain_page(_mfn(mfn));
383
11.6M
    *gfn_remainder &= (1UL << shift) - 1;
384
11.6M
    return GUEST_TABLE_NORMAL_PAGE;
385
11.6M
}
386
387
/*
388
 * Invalidate (via setting the EMT field to an invalid value) all valid
389
 * present entries in the given page table, optionally marking the entries
390
 * also for their subtrees needing P2M type re-calculation.
391
 */
392
static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level)
393
2.07k
{
394
2.07k
    int rc;
395
2.07k
    ept_entry_t *epte = map_domain_page(mfn);
396
2.07k
    unsigned int i;
397
2.07k
    bool_t changed = 0;
398
2.07k
399
1.06M
    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
400
1.06M
    {
401
1.06M
        ept_entry_t e = atomic_read_ept_entry(&epte[i]);
402
1.06M
403
1.06M
        if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
404
1.02M
             (e.emt == MTRR_NUM_TYPES && (e.recalc || !recalc)) )
405
51.6k
            continue;
406
1.06M
407
1.01M
        e.emt = MTRR_NUM_TYPES;
408
1.01M
        if ( recalc )
409
0
            e.recalc = 1;
410
1.01M
        rc = atomic_write_ept_entry(&epte[i], e, level);
411
1.01M
        ASSERT(rc == 0);
412
1.01M
        changed = 1;
413
1.01M
    }
414
2.07k
415
2.07k
    unmap_domain_page(epte);
416
2.07k
417
2.07k
    return changed;
418
2.07k
}
419
420
/*
421
 * Just like ept_invalidate_emt() except that
422
 * - not all entries at the targeted level may need processing,
423
 * - the re-calculation flag gets always set.
424
 * The passed in range is guaranteed to not cross a page (table)
425
 * boundary at the targeted level.
426
 */
427
static int ept_invalidate_emt_range(struct p2m_domain *p2m,
428
                                    unsigned int target,
429
                                    unsigned long first_gfn,
430
                                    unsigned long last_gfn)
431
0
{
432
0
    ept_entry_t *table;
433
0
    unsigned long gfn_remainder = first_gfn;
434
0
    unsigned int i, index;
435
0
    int wrc, rc = 0, ret = GUEST_TABLE_MAP_FAILED;
436
0
437
0
    table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
438
0
    for ( i = p2m->ept.wl; i > target; --i )
439
0
    {
440
0
        ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
441
0
        if ( ret == GUEST_TABLE_MAP_FAILED )
442
0
            goto out;
443
0
        if ( ret != GUEST_TABLE_NORMAL_PAGE )
444
0
            break;
445
0
    }
446
0
447
0
    if ( i > target )
448
0
    {
449
0
        /* We need to split the original page. */
450
0
        ept_entry_t split_ept_entry;
451
0
452
0
        index = gfn_remainder >> (i * EPT_TABLE_ORDER);
453
0
        split_ept_entry = atomic_read_ept_entry(&table[index]);
454
0
        ASSERT(is_epte_superpage(&split_ept_entry));
455
0
        if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
456
0
        {
457
0
            ept_free_entry(p2m, &split_ept_entry, i);
458
0
            rc = -ENOMEM;
459
0
            goto out;
460
0
        }
461
0
        wrc = atomic_write_ept_entry(&table[index], split_ept_entry, i);
462
0
        ASSERT(wrc == 0);
463
0
464
0
        for ( ; i > target; --i )
465
0
            if ( !ept_next_level(p2m, 1, &table, &gfn_remainder, i) )
466
0
                break;
467
0
        ASSERT(i == target);
468
0
    }
469
0
470
0
    index = gfn_remainder >> (i * EPT_TABLE_ORDER);
471
0
    i = (last_gfn >> (i * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
472
0
    for ( ; index <= i; ++index )
473
0
    {
474
0
        ept_entry_t e = atomic_read_ept_entry(&table[index]);
475
0
476
0
        if ( is_epte_valid(&e) && is_epte_present(&e) &&
477
0
             (e.emt != MTRR_NUM_TYPES || !e.recalc) )
478
0
        {
479
0
            e.emt = MTRR_NUM_TYPES;
480
0
            e.recalc = 1;
481
0
            wrc = atomic_write_ept_entry(&table[index], e, target);
482
0
            ASSERT(wrc == 0);
483
0
            rc = 1;
484
0
        }
485
0
    }
486
0
487
0
 out:
488
0
    unmap_domain_page(table);
489
0
490
0
    return rc;
491
0
}
492
493
/*
494
 * Resolve deliberately mis-configured (EMT field set to an invalid value)
495
 * entries in the page table hierarchy for the given GFN:
496
 * - calculate the correct value for the EMT field,
497
 * - if marked so, re-calculate the P2M type,
498
 * - propagate EMT and re-calculation flag down to the next page table level
499
 *   for entries not involved in the translation of the given GFN.
500
 * Returns:
501
 * - negative errno values in error,
502
 * - zero if no adjustment was done,
503
 * - a positive value if at least one adjustment was done.
504
 */
505
static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
506
1.07M
{
507
1.07M
    struct ept_data *ept = &p2m->ept;
508
1.07M
    unsigned int level = ept->wl;
509
1.07M
    unsigned long mfn = ept->mfn;
510
1.07M
    ept_entry_t *epte;
511
1.07M
    int wrc, rc = 0;
512
1.07M
513
1.07M
    if ( !mfn )
514
12
        return 0;
515
1.07M
516
3.21M
    for ( ; ; --level )
517
4.28M
    {
518
4.28M
        ept_entry_t e;
519
4.28M
        unsigned int i;
520
4.28M
521
4.28M
        epte = map_domain_page(_mfn(mfn));
522
4.28M
        i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
523
4.28M
        e = atomic_read_ept_entry(&epte[i]);
524
4.28M
525
4.28M
        if ( level == 0 || is_epte_superpage(&e) )
526
1.06M
        {
527
1.06M
            uint8_t ipat = 0;
528
1.06M
529
1.06M
            if ( e.emt != MTRR_NUM_TYPES )
530
1.06M
                break;
531
1.06M
532
2.73k
            if ( level == 0 )
533
1.97k
            {
534
1.01M
                for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
535
1.01M
                {
536
1.01M
                    p2m_type_t nt;
537
1.01M
538
1.01M
                    e = atomic_read_ept_entry(&epte[i]);
539
1.01M
                    if ( e.emt == MTRR_NUM_TYPES )
540
1.00M
                        e.emt = 0;
541
1.01M
                    if ( !is_epte_valid(&e) || !is_epte_present(&e) )
542
5.02k
                        continue;
543
1.00M
                    e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
544
1.00M
                                               _mfn(e.mfn), 0, &ipat,
545
1.00M
                                               e.sa_p2mt == p2m_mmio_direct);
546
1.00M
                    e.ipat = ipat;
547
1.00M
548
1.00M
                    nt = p2m_recalc_type(e.recalc, e.sa_p2mt, p2m, gfn + i);
549
1.00M
                    if ( nt != e.sa_p2mt )
550
0
                    {
551
0
                        if ( e.sa_p2mt == p2m_ioreq_server )
552
0
                        {
553
0
                            ASSERT(p2m->ioreq.entry_count > 0);
554
0
                            p2m->ioreq.entry_count--;
555
0
                        }
556
0
557
0
                        e.sa_p2mt = nt;
558
0
                        ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
559
0
                    }
560
1.00M
                    e.recalc = 0;
561
1.00M
                    wrc = atomic_write_ept_entry(&epte[i], e, level);
562
1.00M
                    ASSERT(wrc == 0);
563
1.00M
                }
564
1.97k
            }
565
2.73k
            else
566
755
            {
567
755
                int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
568
755
                                             level * EPT_TABLE_ORDER, &ipat,
569
755
                                             e.sa_p2mt == p2m_mmio_direct);
570
755
                bool_t recalc = e.recalc;
571
755
572
755
                if ( recalc && p2m_is_changeable(e.sa_p2mt) )
573
0
                {
574
0
                    unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER);
575
0
576
0
                    ASSERT(e.sa_p2mt != p2m_ioreq_server);
577
0
                    switch ( p2m_is_logdirty_range(p2m, gfn & mask,
578
0
                                                   gfn | ~mask) )
579
0
                    {
580
0
                    case 0:
581
0
                         e.sa_p2mt = p2m_ram_rw;
582
0
                         e.recalc = 0;
583
0
                         break;
584
0
                    case 1:
585
0
                         e.sa_p2mt = p2m_ram_logdirty;
586
0
                         e.recalc = 0;
587
0
                         break;
588
0
                    default: /* Force split. */
589
0
                         emt = -1;
590
0
                         break;
591
0
                    }
592
0
                }
593
755
                if ( unlikely(emt < 0) )
594
0
                {
595
0
                    if ( ept_split_super_page(p2m, &e, level, level - 1) )
596
0
                    {
597
0
                        wrc = atomic_write_ept_entry(&epte[i], e, level);
598
0
                        ASSERT(wrc == 0);
599
0
                        unmap_domain_page(epte);
600
0
                        mfn = e.mfn;
601
0
                        continue;
602
0
                    }
603
0
                    ept_free_entry(p2m, &e, level);
604
0
                    rc = -ENOMEM;
605
0
                    break;
606
0
                }
607
755
                e.emt = emt;
608
755
                e.ipat = ipat;
609
755
                e.recalc = 0;
610
755
                if ( recalc && p2m_is_changeable(e.sa_p2mt) )
611
0
                    ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
612
755
                wrc = atomic_write_ept_entry(&epte[i], e, level);
613
755
                ASSERT(wrc == 0);
614
755
            }
615
2.73k
616
2.73k
            rc = 1;
617
2.73k
            break;
618
2.73k
        }
619
4.28M
620
3.21M
        if ( e.emt == MTRR_NUM_TYPES )
621
2.05k
        {
622
2.05k
            ASSERT(is_epte_present(&e));
623
2.05k
            ept_invalidate_emt(_mfn(e.mfn), e.recalc, level);
624
2.05k
            smp_wmb();
625
2.05k
            e.emt = 0;
626
2.05k
            e.recalc = 0;
627
2.05k
            wrc = atomic_write_ept_entry(&epte[i], e, level);
628
2.05k
            ASSERT(wrc == 0);
629
2.05k
            unmap_domain_page(epte);
630
2.05k
            rc = 1;
631
2.05k
        }
632
3.21M
        else if ( is_epte_present(&e) && !e.emt )
633
3.21M
            unmap_domain_page(epte);
634
3.21M
        else
635
1.93k
            break;
636
3.21M
637
3.21M
        mfn = e.mfn;
638
3.21M
    }
639
1.07M
640
1.07M
    unmap_domain_page(epte);
641
1.07M
    if ( rc )
642
2.73k
    {
643
2.73k
        struct vcpu *v;
644
2.73k
645
2.73k
        for_each_vcpu ( p2m->domain, v )
646
32.8k
            v->arch.hvm_vmx.ept_spurious_misconfig = 1;
647
2.73k
    }
648
1.07M
649
1.07M
    return rc;
650
1.07M
}
651
652
bool_t ept_handle_misconfig(uint64_t gpa)
653
3.14k
{
654
3.14k
    struct vcpu *curr = current;
655
3.14k
    struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
656
3.14k
    bool_t spurious;
657
3.14k
    int rc;
658
3.14k
659
3.14k
    p2m_lock(p2m);
660
3.14k
661
3.14k
    spurious = curr->arch.hvm_vmx.ept_spurious_misconfig;
662
3.14k
    rc = resolve_misconfig(p2m, PFN_DOWN(gpa));
663
3.14k
    curr->arch.hvm_vmx.ept_spurious_misconfig = 0;
664
3.14k
665
3.14k
    p2m_unlock(p2m);
666
3.14k
667
2.21k
    return spurious ? (rc >= 0) : (rc > 0);
668
3.14k
}
669
670
/*
671
 * ept_set_entry() computes 'need_modify_vtd_table' for itself,
672
 * by observing whether any gfn->mfn translations are modified.
673
 *
674
 * Returns: 0 for success, -errno for failure
675
 */
676
static int
677
ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
678
              unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma,
679
              int sve)
680
1.06M
{
681
1.06M
    ept_entry_t *table, *ept_entry = NULL;
682
1.06M
    unsigned long gfn = gfn_x(gfn_);
683
1.06M
    unsigned long gfn_remainder = gfn;
684
1.06M
    unsigned int i, target = order / EPT_TABLE_ORDER;
685
1.06M
    unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? (gfn | mfn_x(mfn)) : gfn;
686
1.06M
    int ret, rc = 0;
687
1.06M
    bool_t entry_written = 0;
688
1.06M
    bool_t direct_mmio = (p2mt == p2m_mmio_direct);
689
1.06M
    uint8_t ipat = 0;
690
1.06M
    bool_t need_modify_vtd_table = 1;
691
1.06M
    bool_t vtd_pte_present = 0;
692
1.06M
    unsigned int iommu_flags = p2m_get_iommu_flags(p2mt, mfn);
693
1.06M
    bool_t needs_sync = 1;
694
1.06M
    ept_entry_t old_entry = { .epte = 0 };
695
1.06M
    ept_entry_t new_entry = { .epte = 0 };
696
1.06M
    struct ept_data *ept = &p2m->ept;
697
1.06M
    struct domain *d = p2m->domain;
698
1.06M
699
1.06M
    ASSERT(ept);
700
1.06M
    /*
701
1.06M
     * the caller must make sure:
702
1.06M
     * 1. passing valid gfn and mfn at order boundary.
703
1.06M
     * 2. gfn not exceeding guest physical address width.
704
1.06M
     * 3. passing a valid order.
705
1.06M
     */
706
1.06M
    if ( (fn_mask & ((1UL << order) - 1)) ||
707
1.06M
         ((u64)gfn >> ((ept->wl + 1) * EPT_TABLE_ORDER)) ||
708
1.06M
         (order % EPT_TABLE_ORDER) )
709
0
        return -EINVAL;
710
1.06M
711
1.06M
    /* Carry out any eventually pending earlier changes first. */
712
1.06M
    ret = resolve_misconfig(p2m, gfn);
713
1.06M
    if ( ret < 0 )
714
0
        return ret;
715
1.06M
716
1.06M
    ASSERT((target == 2 && hap_has_1gb) ||
717
1.06M
           (target == 1 && hap_has_2mb) ||
718
1.06M
           (target == 0));
719
1.06M
    ASSERT(!p2m_is_foreign(p2mt) || target == 0);
720
1.06M
721
1.06M
    table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
722
1.06M
723
1.06M
    ret = GUEST_TABLE_MAP_FAILED;
724
4.27M
    for ( i = ept->wl; i > target; i-- )
725
3.20M
    {
726
3.20M
        ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i);
727
3.20M
        if ( !ret )
728
0
        {
729
0
            rc = -ENOENT;
730
0
            goto out;
731
0
        }
732
3.20M
        else if ( ret != GUEST_TABLE_NORMAL_PAGE )
733
0
            break;
734
3.20M
    }
735
1.06M
736
1.06M
    ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
737
1.06M
738
1.06M
    ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
739
1.06M
740
1.06M
    /* In case VT-d uses same page table, this flag is needed by VT-d */ 
741
1.06M
    vtd_pte_present = is_epte_present(ept_entry);
742
1.06M
743
1.06M
    /*
744
1.06M
     * If we're here with i > target, we must be at a leaf node, and
745
1.06M
     * we need to break up the superpage.
746
1.06M
     *
747
1.06M
     * If we're here with i == target and i > 0, we need to check to see
748
1.06M
     * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
749
1.06M
     * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately.
750
1.06M
     */
751
1.06M
752
1.06M
    if ( i == target )
753
1.06M
    {
754
1.06M
        /* We reached the target level. */
755
1.06M
756
1.06M
        /* No need to flush if the old entry wasn't valid */
757
1.06M
        if ( !is_epte_present(ept_entry) )
758
850k
            needs_sync = 0;
759
1.06M
760
1.06M
        /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
761
1.06M
         * the intermediate tables will be freed below after the ept flush
762
1.06M
         *
763
1.06M
         * Read-then-write is OK because we hold the p2m lock. */
764
1.06M
        old_entry = *ept_entry;
765
1.06M
    }
766
1.06M
    else
767
0
    {
768
0
        /* We need to split the original page. */
769
0
        ept_entry_t split_ept_entry;
770
0
771
0
        ASSERT(is_epte_superpage(ept_entry));
772
0
773
0
        split_ept_entry = atomic_read_ept_entry(ept_entry);
774
0
775
0
        if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
776
0
        {
777
0
            ept_free_entry(p2m, &split_ept_entry, i);
778
0
            rc = -ENOMEM;
779
0
            goto out;
780
0
        }
781
0
782
0
        /* now install the newly split ept sub-tree */
783
0
        /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
784
0
        rc = atomic_write_ept_entry(ept_entry, split_ept_entry, i);
785
0
        ASSERT(rc == 0);
786
0
787
0
        /* then move to the level we want to make real changes */
788
0
        for ( ; i > target; i-- )
789
0
            if ( !ept_next_level(p2m, 0, &table, &gfn_remainder, i) )
790
0
                break;
791
0
        /* We just installed the pages we need. */
792
0
        ASSERT(i == target);
793
0
794
0
        ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
795
0
    }
796
1.06M
797
1.06M
    if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
798
850k
    {
799
850k
        int emt = epte_get_entry_emt(p2m->domain, gfn, mfn,
800
850k
                                     i * EPT_TABLE_ORDER, &ipat, direct_mmio);
801
850k
802
850k
        if ( emt >= 0 )
803
850k
            new_entry.emt = emt;
804
850k
        else /* ept_handle_misconfig() will need to take care of this. */
805
0
            new_entry.emt = MTRR_NUM_TYPES;
806
850k
807
850k
        new_entry.ipat = ipat;
808
850k
        new_entry.sp = !!i;
809
850k
        new_entry.sa_p2mt = p2mt;
810
850k
        new_entry.access = p2ma;
811
850k
        new_entry.snp = (iommu_enabled && iommu_snoop);
812
850k
813
850k
        /* the caller should take care of the previous page */
814
850k
        new_entry.mfn = mfn_x(mfn);
815
850k
816
850k
        /* Safe to read-then-write because we hold the p2m lock */
817
850k
        if ( ept_entry->mfn == new_entry.mfn &&
818
150
             p2m_get_iommu_flags(ept_entry->sa_p2mt, _mfn(ept_entry->mfn)) ==
819
150
             iommu_flags )
820
150
            need_modify_vtd_table = 0;
821
850k
822
850k
        ept_p2m_type_to_flags(p2m, &new_entry, p2mt, p2ma);
823
850k
    }
824
1.06M
825
1.06M
    if ( sve != -1 )
826
0
        new_entry.suppress_ve = !!sve;
827
1.06M
    else
828
1.06M
        new_entry.suppress_ve = is_epte_valid(&old_entry) ?
829
850k
                                    old_entry.suppress_ve : 1;
830
1.06M
831
1.06M
    /*
832
1.06M
     * p2m_ioreq_server is only used for 4K pages, so the
833
1.06M
     * count is only done on ept page table entries.
834
1.06M
     */
835
1.06M
    if ( p2mt == p2m_ioreq_server )
836
0
    {
837
0
        ASSERT(i == 0);
838
0
        p2m->ioreq.entry_count++;
839
0
    }
840
1.06M
841
1.06M
    if ( ept_entry->sa_p2mt == p2m_ioreq_server )
842
0
    {
843
0
        ASSERT(i == 0);
844
0
        ASSERT(p2m->ioreq.entry_count > 0);
845
0
        p2m->ioreq.entry_count--;
846
0
    }
847
1.06M
848
1.06M
    rc = atomic_write_ept_entry(ept_entry, new_entry, target);
849
1.06M
    if ( unlikely(rc) )
850
0
        old_entry.epte = 0;
851
1.06M
    else
852
1.06M
    {
853
1.06M
        entry_written = 1;
854
1.06M
855
1.06M
        if ( p2mt != p2m_invalid &&
856
850k
             (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
857
1.06M
            /* Track the highest gfn for which we have ever had a valid mapping */
858
1.00k
            p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
859
1.06M
    }
860
1.06M
861
1.06M
out:
862
1.06M
    if ( needs_sync )
863
218k
        ept_sync_domain(p2m);
864
1.06M
865
1.06M
    /* For host p2m, may need to change VT-d page table.*/
866
1.06M
    if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) &&
867
1.06M
         need_modify_vtd_table )
868
1.06M
    {
869
1.06M
        if ( iommu_hap_pt_share )
870
0
            rc = iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present);
871
1.06M
        else
872
1.06M
        {
873
1.06M
            if ( iommu_flags )
874
5.19M
                for ( i = 0; i < (1 << order); i++ )
875
4.34M
                {
876
4.34M
                    rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
877
4.34M
                    if ( unlikely(rc) )
878
0
                    {
879
0
                        while ( i-- )
880
0
                            /* If statement to satisfy __must_check. */
881
0
                            if ( iommu_unmap_page(p2m->domain, gfn + i) )
882
0
                                continue;
883
0
884
0
                        break;
885
0
                    }
886
4.34M
                }
887
1.06M
            else
888
436k
                for ( i = 0; i < (1 << order); i++ )
889
218k
                {
890
218k
                    ret = iommu_unmap_page(d, gfn + i);
891
218k
                    if ( !rc )
892
218k
                        rc = ret;
893
218k
                }
894
1.06M
        }
895
1.06M
    }
896
1.06M
897
1.06M
    unmap_domain_page(table);
898
1.06M
899
1.06M
    /* Release the old intermediate tables, if any.  This has to be the
900
1.06M
       last thing we do, after the ept_sync_domain() and removal
901
1.06M
       from the iommu tables, so as to avoid a potential
902
1.06M
       use-after-free. */
903
1.06M
    if ( is_epte_present(&old_entry) )
904
218k
        ept_free_entry(p2m, &old_entry, target);
905
1.06M
906
1.06M
    if ( entry_written && p2m_is_hostp2m(p2m) )
907
1.06M
        p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
908
1.06M
909
1.06M
    return rc;
910
1.06M
}
911
912
/* Read ept p2m entries */
913
static mfn_t ept_get_entry(struct p2m_domain *p2m,
914
                           gfn_t gfn_, p2m_type_t *t, p2m_access_t* a,
915
                           p2m_query_t q, unsigned int *page_order,
916
                           bool_t *sve)
917
6.52M
{
918
6.52M
    ept_entry_t *table =
919
6.52M
        map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
920
6.52M
    unsigned long gfn = gfn_x(gfn_);
921
6.52M
    unsigned long gfn_remainder = gfn;
922
6.52M
    ept_entry_t *ept_entry;
923
6.52M
    u32 index;
924
6.52M
    int i;
925
6.52M
    int ret = 0;
926
6.52M
    bool_t recalc = 0;
927
6.52M
    mfn_t mfn = INVALID_MFN;
928
6.52M
    struct ept_data *ept = &p2m->ept;
929
6.52M
930
6.52M
    *t = p2m_mmio_dm;
931
6.52M
    *a = p2m_access_n;
932
6.52M
    if ( sve )
933
0
        *sve = 1;
934
6.52M
935
6.52M
    /* This pfn is higher than the highest the p2m map currently holds */
936
6.52M
    if ( gfn > p2m->max_mapped_pfn )
937
3.47M
    {
938
7.14M
        for ( i = ept->wl; i > 0; --i )
939
7.14M
            if ( (gfn & ~((1UL << (i * EPT_TABLE_ORDER)) - 1)) >
940
7.14M
                 p2m->max_mapped_pfn )
941
3.47M
                break;
942
3.47M
        goto out;
943
3.47M
    }
944
6.52M
945
6.52M
    /* Should check if gfn obeys GAW here. */
946
6.52M
947
11.4M
    for ( i = ept->wl; i > 0; i-- )
948
9.03M
    {
949
9.03M
    retry:
950
9.03M
        if ( table[gfn_remainder >> (i * EPT_TABLE_ORDER)].recalc )
951
0
            recalc = 1;
952
9.03M
        ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
953
9.03M
        if ( !ret )
954
572k
            goto out;
955
8.46M
        else if ( ret == GUEST_TABLE_POD_PAGE )
956
0
        {
957
0
            if ( !(q & P2M_ALLOC) )
958
0
            {
959
0
                *t = p2m_populate_on_demand;
960
0
                goto out;
961
0
            }
962
0
963
0
            /* Populate this superpage */
964
0
            ASSERT(i <= 2);
965
0
966
0
            index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
967
0
            ept_entry = table + index;
968
0
969
0
            if ( p2m_pod_demand_populate(p2m, gfn_, i * EPT_TABLE_ORDER) )
970
0
                goto retry;
971
0
            else
972
0
                goto out;
973
0
        }
974
8.46M
        else if ( ret == GUEST_TABLE_SUPER_PAGE )
975
19.4k
            break;
976
9.03M
    }
977
3.04M
978
2.47M
    index = gfn_remainder >> (i * EPT_TABLE_ORDER);
979
2.47M
    ept_entry = table + index;
980
2.47M
981
2.47M
    if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
982
0
    {
983
0
        if ( !(q & P2M_ALLOC) )
984
0
        {
985
0
            *t = p2m_populate_on_demand;
986
0
            goto out;
987
0
        }
988
0
989
0
        ASSERT(i == 0);
990
0
        
991
0
        if ( !p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
992
0
            goto out;
993
0
    }
994
2.47M
995
2.47M
    if ( is_epte_valid(ept_entry) )
996
2.18M
    {
997
2.18M
        *t = p2m_recalc_type(recalc || ept_entry->recalc,
998
2.18M
                             ept_entry->sa_p2mt, p2m, gfn);
999
2.18M
        *a = ept_entry->access;
1000
2.18M
        if ( sve )
1001
0
            *sve = ept_entry->suppress_ve;
1002
2.18M
1003
2.18M
        mfn = _mfn(ept_entry->mfn);
1004
2.18M
        if ( i )
1005
19.4k
        {
1006
19.4k
            /* 
1007
19.4k
             * We may meet super pages, and to split into 4k pages
1008
19.4k
             * to emulate p2m table
1009
19.4k
             */
1010
19.4k
            unsigned long split_mfn = mfn_x(mfn) +
1011
19.4k
                (gfn_remainder &
1012
19.4k
                 ((1 << (i * EPT_TABLE_ORDER)) - 1));
1013
19.4k
            mfn = _mfn(split_mfn);
1014
19.4k
        }
1015
2.18M
    }
1016
2.47M
1017
6.52M
 out:
1018
6.52M
    if ( page_order )
1019
507k
        *page_order = i * EPT_TABLE_ORDER;
1020
6.52M
1021
6.52M
    unmap_domain_page(table);
1022
6.52M
    return mfn;
1023
2.47M
}
1024
1025
void ept_walk_table(struct domain *d, unsigned long gfn)
1026
0
{
1027
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1028
0
    struct ept_data *ept = &p2m->ept;
1029
0
    ept_entry_t *table =
1030
0
        map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
1031
0
    unsigned long gfn_remainder = gfn;
1032
0
1033
0
    int i;
1034
0
1035
0
    gprintk(XENLOG_ERR, "Walking EPT tables for GFN %lx:\n", gfn);
1036
0
1037
0
    /* This pfn is higher than the highest the p2m map currently holds */
1038
0
    if ( gfn > p2m->max_mapped_pfn )
1039
0
    {
1040
0
        gprintk(XENLOG_ERR, " gfn exceeds max_mapped_pfn %lx\n",
1041
0
                p2m->max_mapped_pfn);
1042
0
        goto out;
1043
0
    }
1044
0
1045
0
    for ( i = ept->wl; i >= 0; i-- )
1046
0
    {
1047
0
        ept_entry_t *ept_entry, *next;
1048
0
        u32 index;
1049
0
1050
0
        /* Stolen from ept_next_level */
1051
0
        index = gfn_remainder >> (i*EPT_TABLE_ORDER);
1052
0
        ept_entry = table + index;
1053
0
1054
0
        gprintk(XENLOG_ERR, " epte %"PRIx64"\n", ept_entry->epte);
1055
0
1056
0
        if ( (i == 0) || !is_epte_present(ept_entry) ||
1057
0
             is_epte_superpage(ept_entry) )
1058
0
            goto out;
1059
0
        else
1060
0
        {
1061
0
            gfn_remainder &= (1UL << (i*EPT_TABLE_ORDER)) - 1;
1062
0
1063
0
            next = map_domain_page(_mfn(ept_entry->mfn));
1064
0
1065
0
            unmap_domain_page(table);
1066
0
1067
0
            table = next;
1068
0
        }
1069
0
    }
1070
0
1071
0
out:
1072
0
    unmap_domain_page(table);
1073
0
    return;
1074
0
}
1075
1076
static void ept_change_entry_type_global(struct p2m_domain *p2m,
1077
                                         p2m_type_t ot, p2m_type_t nt)
1078
0
{
1079
0
    unsigned long mfn = p2m->ept.mfn;
1080
0
1081
0
    if ( !mfn )
1082
0
        return;
1083
0
1084
0
    if ( ept_invalidate_emt(_mfn(mfn), 1, p2m->ept.wl) )
1085
0
        ept_sync_domain(p2m);
1086
0
}
1087
1088
static int ept_change_entry_type_range(struct p2m_domain *p2m,
1089
                                       p2m_type_t ot, p2m_type_t nt,
1090
                                       unsigned long first_gfn,
1091
                                       unsigned long last_gfn)
1092
0
{
1093
0
    unsigned int i, wl = p2m->ept.wl;
1094
0
    unsigned long mask = (1 << EPT_TABLE_ORDER) - 1;
1095
0
    int rc = 0, sync = 0;
1096
0
1097
0
    if ( !p2m->ept.mfn )
1098
0
        return -EINVAL;
1099
0
1100
0
    for ( i = 0; i <= wl; )
1101
0
    {
1102
0
        if ( first_gfn & mask )
1103
0
        {
1104
0
            unsigned long end_gfn = min(first_gfn | mask, last_gfn);
1105
0
1106
0
            rc = ept_invalidate_emt_range(p2m, i, first_gfn, end_gfn);
1107
0
            sync |= rc;
1108
0
            if ( rc < 0 || end_gfn >= last_gfn )
1109
0
                break;
1110
0
            first_gfn = end_gfn + 1;
1111
0
        }
1112
0
        else if ( (last_gfn & mask) != mask )
1113
0
        {
1114
0
            unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
1115
0
1116
0
            rc = ept_invalidate_emt_range(p2m, i, start_gfn, last_gfn);
1117
0
            sync |= rc;
1118
0
            if ( rc < 0 || start_gfn <= first_gfn )
1119
0
                break;
1120
0
            last_gfn = start_gfn - 1;
1121
0
        }
1122
0
        else
1123
0
        {
1124
0
            ++i;
1125
0
            mask |= mask << EPT_TABLE_ORDER;
1126
0
        }
1127
0
    }
1128
0
1129
0
    if ( sync )
1130
0
        ept_sync_domain(p2m);
1131
0
1132
0
    return rc < 0 ? rc : 0;
1133
0
}
1134
1135
static void ept_memory_type_changed(struct p2m_domain *p2m)
1136
23
{
1137
23
    unsigned long mfn = p2m->ept.mfn;
1138
23
1139
23
    if ( !mfn )
1140
0
        return;
1141
23
1142
23
    if ( ept_invalidate_emt(_mfn(mfn), 0, p2m->ept.wl) )
1143
23
        ept_sync_domain(p2m);
1144
23
}
1145
1146
static void __ept_sync_domain(void *info)
1147
1.97M
{
1148
1.97M
    /*
1149
1.97M
     * The invalidation will be done before VMENTER (see
1150
1.97M
     * vmx_vmenter_helper()).
1151
1.97M
     */
1152
1.97M
}
1153
1154
static void ept_sync_domain_prepare(struct p2m_domain *p2m)
1155
218k
{
1156
218k
    struct domain *d = p2m->domain;
1157
218k
    struct ept_data *ept = &p2m->ept;
1158
218k
1159
218k
    if ( nestedhvm_enabled(d) )
1160
0
    {
1161
0
        if ( p2m_is_nestedp2m(p2m) )
1162
0
            ept = &p2m_get_hostp2m(d)->ept;
1163
0
        else
1164
0
            p2m_flush_nestedp2m(d);
1165
0
    }
1166
218k
1167
218k
    /*
1168
218k
     * Need to invalidate on all PCPUs because either:
1169
218k
     *
1170
218k
     * a) A VCPU has run and some translations may be cached.
1171
218k
     * b) A VCPU has not run and and the initial invalidation in case
1172
218k
     *    of an EP4TA reuse is still needed.
1173
218k
     */
1174
218k
    cpumask_setall(ept->invalidate);
1175
218k
}
1176
1177
static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask)
1178
218k
{
1179
218k
    on_selected_cpus(mask, __ept_sync_domain, p2m, 1);
1180
218k
}
1181
1182
void ept_sync_domain(struct p2m_domain *p2m)
1183
218k
{
1184
218k
    struct domain *d = p2m->domain;
1185
218k
1186
218k
    /* Only if using EPT and this domain has some VCPUs to dirty. */
1187
218k
    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
1188
0
        return;
1189
218k
1190
218k
    ept_sync_domain_prepare(p2m);
1191
218k
1192
218k
    if ( p2m->defer_flush )
1193
218k
    {
1194
218k
        p2m->need_flush = 1;
1195
218k
        return;
1196
218k
    }
1197
218k
1198
0
    ept_sync_domain_mask(p2m, d->domain_dirty_cpumask);
1199
0
}
1200
1201
static void ept_tlb_flush(struct p2m_domain *p2m)
1202
218k
{
1203
218k
    ept_sync_domain_mask(p2m, p2m->domain->domain_dirty_cpumask);
1204
218k
}
1205
1206
static void ept_enable_pml(struct p2m_domain *p2m)
1207
0
{
1208
0
    /* Domain must have been paused */
1209
0
    ASSERT(atomic_read(&p2m->domain->pause_count));
1210
0
1211
0
    /*
1212
0
     * No need to return whether vmx_domain_enable_pml has succeeded, as
1213
0
     * ept_p2m_type_to_flags will do the check, and write protection will be
1214
0
     * used if PML is not enabled.
1215
0
     */
1216
0
    if ( vmx_domain_enable_pml(p2m->domain) )
1217
0
        return;
1218
0
1219
0
    /* Enable EPT A/D bit for PML */
1220
0
    p2m->ept.ad = 1;
1221
0
    vmx_domain_update_eptp(p2m->domain);
1222
0
}
1223
1224
static void ept_disable_pml(struct p2m_domain *p2m)
1225
0
{
1226
0
    /* Domain must have been paused */
1227
0
    ASSERT(atomic_read(&p2m->domain->pause_count));
1228
0
1229
0
    vmx_domain_disable_pml(p2m->domain);
1230
0
1231
0
    /* Disable EPT A/D bit */
1232
0
    p2m->ept.ad = 0;
1233
0
    vmx_domain_update_eptp(p2m->domain);
1234
0
}
1235
1236
static void ept_flush_pml_buffers(struct p2m_domain *p2m)
1237
0
{
1238
0
    /* Domain must have been paused */
1239
0
    ASSERT(atomic_read(&p2m->domain->pause_count));
1240
0
1241
0
    vmx_domain_flush_pml_buffers(p2m->domain);
1242
0
}
1243
1244
int ept_p2m_init(struct p2m_domain *p2m)
1245
21
{
1246
21
    struct ept_data *ept = &p2m->ept;
1247
21
1248
21
    p2m->set_entry = ept_set_entry;
1249
21
    p2m->get_entry = ept_get_entry;
1250
21
    p2m->recalc = resolve_misconfig;
1251
21
    p2m->change_entry_type_global = ept_change_entry_type_global;
1252
21
    p2m->change_entry_type_range = ept_change_entry_type_range;
1253
21
    p2m->memory_type_changed = ept_memory_type_changed;
1254
21
    p2m->audit_p2m = NULL;
1255
21
    p2m->tlb_flush = ept_tlb_flush;
1256
21
1257
21
    /* Set the memory type used when accessing EPT paging structures. */
1258
21
    ept->mt = EPT_DEFAULT_MT;
1259
21
1260
21
    /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */
1261
21
    ept->wl = 3;
1262
21
1263
21
    if ( cpu_has_vmx_pml )
1264
0
    {
1265
0
        p2m->enable_hardware_log_dirty = ept_enable_pml;
1266
0
        p2m->disable_hardware_log_dirty = ept_disable_pml;
1267
0
        p2m->flush_hardware_cached_dirty = ept_flush_pml_buffers;
1268
0
    }
1269
21
1270
21
    if ( !zalloc_cpumask_var(&ept->invalidate) )
1271
0
        return -ENOMEM;
1272
21
1273
21
    /*
1274
21
     * Assume an initial invalidation is required, in case an EP4TA is
1275
21
     * reused.
1276
21
     */
1277
21
    cpumask_setall(ept->invalidate);
1278
21
1279
21
    return 0;
1280
21
}
1281
1282
void ept_p2m_uninit(struct p2m_domain *p2m)
1283
0
{
1284
0
    struct ept_data *ept = &p2m->ept;
1285
0
    free_cpumask_var(ept->invalidate);
1286
0
}
1287
1288
static const char *memory_type_to_str(unsigned int x)
1289
0
{
1290
0
    static const char memory_types[8][3] = {
1291
0
        [MTRR_TYPE_UNCACHABLE]     = "UC",
1292
0
        [MTRR_TYPE_WRCOMB]         = "WC",
1293
0
        [MTRR_TYPE_WRTHROUGH]      = "WT",
1294
0
        [MTRR_TYPE_WRPROT]         = "WP",
1295
0
        [MTRR_TYPE_WRBACK]         = "WB",
1296
0
        [MTRR_NUM_TYPES]           = "??"
1297
0
    };
1298
0
1299
0
    ASSERT(x < ARRAY_SIZE(memory_types));
1300
0
    return memory_types[x][0] ? memory_types[x] : "?";
1301
0
}
1302
1303
static void ept_dump_p2m_table(unsigned char key)
1304
0
{
1305
0
    struct domain *d;
1306
0
    ept_entry_t *table, *ept_entry;
1307
0
    int order;
1308
0
    int i;
1309
0
    int ret = 0;
1310
0
    unsigned long gfn, gfn_remainder;
1311
0
    unsigned long record_counter = 0;
1312
0
    struct p2m_domain *p2m;
1313
0
    struct ept_data *ept;
1314
0
1315
0
    for_each_domain(d)
1316
0
    {
1317
0
        if ( !hap_enabled(d) )
1318
0
            continue;
1319
0
1320
0
        p2m = p2m_get_hostp2m(d);
1321
0
        ept = &p2m->ept;
1322
0
        printk("\ndomain%d EPT p2m table:\n", d->domain_id);
1323
0
1324
0
        for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order )
1325
0
        {
1326
0
            char c = 0;
1327
0
1328
0
            gfn_remainder = gfn;
1329
0
            table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
1330
0
1331
0
            for ( i = ept->wl; i > 0; i-- )
1332
0
            {
1333
0
                ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
1334
0
                if ( ept_entry->emt == MTRR_NUM_TYPES )
1335
0
                    c = '?';
1336
0
                ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
1337
0
                if ( ret != GUEST_TABLE_NORMAL_PAGE )
1338
0
                    break;
1339
0
            }
1340
0
1341
0
            order = i * EPT_TABLE_ORDER;
1342
0
            ept_entry = table + (gfn_remainder >> order);
1343
0
            if ( ret != GUEST_TABLE_MAP_FAILED && is_epte_valid(ept_entry) )
1344
0
            {
1345
0
                if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
1346
0
                    printk("gfn: %13lx order: %2d PoD\n", gfn, order);
1347
0
                else
1348
0
                    printk("gfn: %13lx order: %2d mfn: %13lx %c%c%c %c%c%c\n",
1349
0
                           gfn, order, ept_entry->mfn + 0UL,
1350
0
                           ept_entry->r ? 'r' : ' ',
1351
0
                           ept_entry->w ? 'w' : ' ',
1352
0
                           ept_entry->x ? 'x' : ' ',
1353
0
                           memory_type_to_str(ept_entry->emt)[0],
1354
0
                           memory_type_to_str(ept_entry->emt)[1]
1355
0
                           ?: ept_entry->emt + '0',
1356
0
                           c ?: ept_entry->ipat ? '!' : ' ');
1357
0
1358
0
                if ( !(record_counter++ % 100) )
1359
0
                    process_pending_softirqs();
1360
0
            }
1361
0
            unmap_domain_page(table);
1362
0
        }
1363
0
    }
1364
0
}
1365
1366
void setup_ept_dump(void)
1367
1
{
1368
1
    register_keyhandler('D', ept_dump_p2m_table, "dump VT-x EPT tables", 0);
1369
1
}
1370
1371
void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
1372
0
{
1373
0
    struct p2m_domain *p2m = d->arch.altp2m_p2m[i];
1374
0
    struct ept_data *ept;
1375
0
1376
0
    p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
1377
0
    p2m->max_remapped_gfn = 0;
1378
0
    ept = &p2m->ept;
1379
0
    ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1380
0
    d->arch.altp2m_eptp[i] = ept->eptp;
1381
0
}
1382
1383
unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
1384
0
{
1385
0
    struct p2m_domain *p2m;
1386
0
    struct ept_data *ept;
1387
0
    unsigned int i;
1388
0
1389
0
    altp2m_list_lock(d);
1390
0
1391
0
    for ( i = 0; i < MAX_ALTP2M; i++ )
1392
0
    {
1393
0
        if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
1394
0
            continue;
1395
0
1396
0
        p2m = d->arch.altp2m_p2m[i];
1397
0
        ept = &p2m->ept;
1398
0
1399
0
        if ( eptp == ept->eptp )
1400
0
            goto out;
1401
0
    }
1402
0
1403
0
    i = INVALID_ALTP2M;
1404
0
1405
0
 out:
1406
0
    altp2m_list_unlock(d);
1407
0
    return i;
1408
0
}
1409
1410
/*
1411
 * Local variables:
1412
 * mode: C
1413
 * c-file-style: "BSD"
1414
 * c-basic-offset: 4
1415
 * tab-width: 4
1416
 * indent-tabs-mode: nil
1417
 * End:
1418
 */