Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/guest_walk.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/mm/guest_walk.c
3
 *
4
 * Pagetable walker for guest memory accesses.
5
 *
6
 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7
 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8
 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
22
 */
23
24
/* Allow uniquely identifying static symbols in the 3 generated objects. */
25
asm(".file \"" __OBJECT_FILE__ "\"");
26
27
#include <xen/types.h>
28
#include <xen/mm.h>
29
#include <xen/paging.h>
30
#include <xen/domain_page.h>
31
#include <xen/sched.h>
32
#include <asm/page.h>
33
#include <asm/guest_pt.h>
34
35
/*
36
 * Modify a guest pagetable entry to set the Accessed and Dirty bits.
37
 * Returns true if it actually writes to guest memory.
38
 */
39
static bool set_ad_bits(guest_intpte_t *guest_p, guest_intpte_t *walk_p,
40
                        bool set_dirty)
41
1.23M
{
42
1.23M
    guest_intpte_t new, old = *walk_p;
43
1.23M
44
1.23M
    new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
45
1.23M
    if ( old != new )
46
0
    {
47
0
        /*
48
0
         * Write the new entry into the walk, and try to write it back
49
0
         * into the guest table as well.  If the guest table has changed
50
0
         * under our feet then leave it alone.
51
0
         */
52
0
        *walk_p = new;
53
0
        if ( cmpxchg(guest_p, old, new) == old )
54
0
            return true;
55
0
    }
56
1.23M
    return false;
57
1.23M
}
58
59
/*
60
 * Walk the guest pagetables, after the manner of a hardware walker.
61
 *
62
 * This is a condensing of the 'Paging' chapters from Intel and AMD software
63
 * manuals.  Please refer closely to them.
64
 *
65
 * A pagetable walk consists of two parts:
66
 *   1) to find whether a translation exists, and
67
 *   2) if a translation does exist, to check whether the translation's access
68
 *      rights permit the access.
69
 *
70
 * A translation is found by following the pagetable structure (starting at
71
 * %cr3) to a leaf entry (an L1 PTE, or a higher level entry with PSE set)
72
 * which identifies the physical destination of the access.
73
 *
74
 * A translation from one level to the next exists if the PTE is both present
75
 * and has no reserved bits set.  If the pagewalk counters a situation where a
76
 * translation does not exist, the walk stops at that point.
77
 *
78
 * The access rights (NX, User, RW bits) are collected as the walk progresses.
79
 * If a translation exists, the accumulated access rights are compared to the
80
 * requested walk, to see whether the access is permitted.
81
 */
82
bool
83
guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
84
                  unsigned long va, walk_t *gw,
85
                  uint32_t walk, mfn_t top_mfn, void *top_map)
86
364k
{
87
364k
    struct domain *d = v->domain;
88
364k
    p2m_type_t p2mt;
89
364k
    guest_l1e_t *l1p = NULL;
90
364k
    guest_l2e_t *l2p = NULL;
91
364k
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
92
364k
    guest_l3e_t *l3p = NULL;
93
    guest_l4e_t *l4p;
94
#endif
95
364k
    uint32_t gflags, rc;
96
364k
    unsigned int leaf_level;
97
364k
    p2m_query_t qt = P2M_ALLOC | P2M_UNSHARE;
98
364k
99
365k
#define AR_ACCUM_AND (_PAGE_USER | _PAGE_RW)
100
365k
#define AR_ACCUM_OR  (_PAGE_NX_BIT)
101
364k
    /* Start with all AND bits set, all OR bits clear. */
102
364k
    uint32_t ar, ar_and = ~0u, ar_or = 0;
103
364k
104
364k
    bool walk_ok = false;
105
364k
106
364k
    /*
107
364k
     * TODO - We should ASSERT() that only the following bits are set as
108
364k
     * inputs to a guest walk, but a whole load of code currently passes in
109
364k
     * other PFEC_ constants.
110
364k
     */
111
364k
    walk &= (PFEC_implicit | PFEC_insn_fetch | PFEC_user_mode | PFEC_write_access);
112
364k
113
364k
    /* Only implicit supervisor data accesses exist. */
114
364k
    ASSERT(!(walk & PFEC_implicit) ||
115
364k
           !(walk & (PFEC_insn_fetch | PFEC_user_mode)));
116
364k
117
364k
    perfc_incr(guest_walk);
118
364k
    memset(gw, 0, sizeof(*gw));
119
364k
    gw->va = va;
120
364k
    gw->pfec = walk & (PFEC_user_mode | PFEC_write_access);
121
364k
122
364k
    /*
123
364k
     * PFEC_insn_fetch is only reported if NX or SMEP are enabled.  Hardware
124
364k
     * still distingueses instruction fetches during determination of access
125
364k
     * rights.
126
364k
     */
127
364k
    if ( guest_nx_enabled(v) || guest_smep_enabled(v) )
128
365k
        gw->pfec |= (walk & PFEC_insn_fetch);
129
364k
130
364k
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
131
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
132
133
    /* Get the l4e from the top level table and check its flags*/
134
    gw->l4mfn = top_mfn;
135
    l4p = (guest_l4e_t *) top_map;
136
364k
    gw->l4e = l4p[guest_l4_table_offset(va)];
137
    gflags = guest_l4e_get_flags(gw->l4e);
138
364k
    if ( !(gflags & _PAGE_PRESENT) )
139
0
        goto out;
140
141
    /* Check for reserved bits. */
142
364k
    if ( guest_l4e_rsvd_bits(v, gw->l4e) )
143
0
    {
144
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
145
0
        goto out;
146
0
    }
147
364k
148
364k
    /* Accumulate l4e access rights. */
149
364k
    ar_and &= gflags;
150
364k
    ar_or  |= gflags;
151
364k
152
364k
    /* Map the l3 table */
153
364k
    l3p = map_domain_gfn(p2m,
154
364k
                         guest_l4e_get_gfn(gw->l4e),
155
364k
                         &gw->l3mfn,
156
364k
                         &p2mt,
157
364k
                         qt,
158
364k
                         &rc);
159
364k
    if ( l3p == NULL )
160
0
    {
161
0
        gw->pfec |= rc & PFEC_synth_mask;
162
0
        goto out;
163
0
    }
164
364k
165
364k
    /* Get the l3e and check its flags*/
166
364k
    gw->l3e = l3p[guest_l3_table_offset(va)];
167
364k
    gflags = guest_l3e_get_flags(gw->l3e);
168
364k
    if ( !(gflags & _PAGE_PRESENT) )
169
0
        goto out;
170
364k
171
364k
    /* Check for reserved bits, including possibly _PAGE_PSE. */
172
364k
    if ( guest_l3e_rsvd_bits(v, gw->l3e) )
173
0
    {
174
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
175
0
        goto out;
176
0
    }
177
364k
178
364k
    /* Accumulate l3e access rights. */
179
364k
    ar_and &= gflags;
180
364k
    ar_or  |= gflags;
181
364k
182
364k
    if ( gflags & _PAGE_PSE )
183
0
    {
184
0
        /*
185
0
         * Generate a fake l1 table entry so callers don't all
186
0
         * have to understand superpages.
187
0
         */
188
0
        gfn_t start = guest_l3e_get_gfn(gw->l3e);
189
0
        /*
190
0
         * Grant full access in the l1e, since all the guest entry's
191
0
         * access controls are enforced in the l3e.
192
0
         */
193
0
        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
194
0
                     _PAGE_ACCESSED|_PAGE_DIRTY);
195
0
        /*
196
0
         * Import protection key and cache-control bits. Note that _PAGE_PAT
197
0
         * is actually _PAGE_PSE, and it is always set. We will clear it in
198
0
         * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear.
199
0
         */
200
0
        flags |= (guest_l3e_get_flags(gw->l3e)
201
0
                  & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
202
0
        if ( !(gfn_x(start) & 1) )
203
0
            /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
204
0
            flags &= ~_PAGE_PAT;
205
0
206
0
        /* Increment the pfn by the right number of 4k pages. */
207
0
        start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) +
208
0
                     ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK));
209
0
        gw->l1e = guest_l1e_from_gfn(start, flags);
210
0
        gw->l2mfn = gw->l1mfn = INVALID_MFN;
211
0
        leaf_level = 3;
212
0
        goto leaf;
213
0
    }
214
364k
215
364k
#else /* PAE only... */
216
217
    /* Get the l3e and check its flag */
218
0
    gw->l3e = ((guest_l3e_t *) top_map)[guest_l3_table_offset(va)];
219
    gflags = guest_l3e_get_flags(gw->l3e);
220
0
    if ( !(gflags & _PAGE_PRESENT) )
221
0
        goto out;
222
223
0
    if ( guest_l3e_rsvd_bits(v, gw->l3e) )
224
0
    {
225
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
226
0
        goto out;
227
0
    }
228
0
229
0
#endif /* PAE or 64... */
230
0
231
0
    /* Map the l2 table */
232
364k
    l2p = map_domain_gfn(p2m,
233
364k
                         guest_l3e_get_gfn(gw->l3e),
234
364k
                         &gw->l2mfn,
235
364k
                         &p2mt,
236
364k
                         qt,
237
364k
                         &rc);
238
364k
    if ( l2p == NULL )
239
0
    {
240
0
        gw->pfec |= rc & PFEC_synth_mask;
241
0
        goto out;
242
0
    }
243
364k
244
364k
    /* Get the l2e */
245
364k
    gw->l2e = l2p[guest_l2_table_offset(va)];
246
364k
247
364k
#else /* 32-bit only... */
248
249
    /* Get l2e from the top level table */
250
    gw->l2mfn = top_mfn;
251
    l2p = (guest_l2e_t *) top_map;
252
0
    gw->l2e = l2p[guest_l2_table_offset(va)];
253
254
#endif /* All levels... */
255
364k
256
364k
    /* Check the l2e flags. */
257
364k
    gflags = guest_l2e_get_flags(gw->l2e);
258
364k
    if ( !(gflags & _PAGE_PRESENT) )
259
0
        goto out;
260
364k
261
364k
    /*
262
364k
     * In 2-level paging without CR0.PSE, there are no reserved bits, and the
263
364k
     * PAT/PSE bit is ignored.
264
364k
     */
265
364k
    if ( GUEST_PAGING_LEVELS == 2 && !guest_can_use_l2_superpages(v) )
266
0
    {
267
0
        gw->l2e.l2 &= ~_PAGE_PSE;
268
0
        gflags &= ~_PAGE_PSE;
269
0
    }
270
364k
    /* else check for reserved bits, including possibly _PAGE_PSE. */
271
364k
    else if ( guest_l2e_rsvd_bits(v, gw->l2e) )
272
0
    {
273
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
274
0
        goto out;
275
0
    }
276
364k
277
364k
    /* Accumulate l2e access rights. */
278
364k
    ar_and &= gflags;
279
364k
    ar_or  |= gflags;
280
364k
281
364k
    if ( gflags & _PAGE_PSE )
282
225k
    {
283
225k
        /*
284
225k
         * Special case: this guest VA is in a PSE superpage, so there's
285
225k
         * no guest l1e.  We make one up so that the propagation code
286
225k
         * can generate a shadow l1 table.  Start with the gfn of the
287
225k
         * first 4k-page of the superpage.
288
225k
         */
289
225k
#if GUEST_PAGING_LEVELS == 2
290
0
        gfn_t start = _gfn(unfold_pse36(gw->l2e.l2) >> PAGE_SHIFT);
291
#else
292
        gfn_t start = guest_l2e_get_gfn(gw->l2e);
293
#endif
294
225k
        /*
295
225k
         * Grant full access in the l1e, since all the guest entry's
296
225k
         * access controls are enforced in the shadow l2e.
297
225k
         */
298
225k
        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
299
225k
                     _PAGE_ACCESSED|_PAGE_DIRTY);
300
225k
        /*
301
225k
         * Import protection key and cache-control bits. Note that _PAGE_PAT
302
225k
         * is actually _PAGE_PSE, and it is always set. We will clear it in
303
225k
         * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear.
304
225k
         */
305
225k
        flags |= (guest_l2e_get_flags(gw->l2e)
306
225k
                  & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
307
225k
        if ( !(gfn_x(start) & 1) )
308
225k
            /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
309
225k
            flags &= ~_PAGE_PAT;
310
225k
311
225k
        /* Increment the pfn by the right number of 4k pages. */
312
225k
        start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) +
313
225k
                     guest_l1_table_offset(va));
314
225k
#if GUEST_PAGING_LEVELS == 2
315
         /* Wider than 32 bits if PSE36 superpage. */
316
0
        gw->el1e = (gfn_x(start) << PAGE_SHIFT) | flags;
317
#else
318
        gw->l1e = guest_l1e_from_gfn(start, flags);
319
#endif
320
225k
        gw->l1mfn = INVALID_MFN;
321
225k
        leaf_level = 2;
322
225k
        goto leaf;
323
225k
    }
324
364k
325
364k
    /* Map the l1 table */
326
139k
    l1p = map_domain_gfn(p2m,
327
139k
                         guest_l2e_get_gfn(gw->l2e),
328
139k
                         &gw->l1mfn,
329
139k
                         &p2mt,
330
139k
                         qt,
331
139k
                         &rc);
332
139k
    if ( l1p == NULL )
333
0
    {
334
0
        gw->pfec |= rc & PFEC_synth_mask;
335
0
        goto out;
336
0
    }
337
139k
    gw->l1e = l1p[guest_l1_table_offset(va)];
338
139k
    gflags = guest_l1e_get_flags(gw->l1e);
339
139k
    if ( !(gflags & _PAGE_PRESENT) )
340
0
        goto out;
341
139k
342
139k
    /* Check for reserved bits. */
343
139k
    if ( guest_l1e_rsvd_bits(v, gw->l1e) )
344
0
    {
345
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
346
0
        goto out;
347
0
    }
348
139k
349
139k
    /* Accumulate l1e access rights. */
350
139k
    ar_and &= gflags;
351
139k
    ar_or  |= gflags;
352
139k
353
139k
    leaf_level = 1;
354
139k
355
365k
 leaf:
356
365k
    gw->pfec |= PFEC_page_present;
357
365k
358
365k
    /*
359
365k
     * The pagetable walk has returned a successful translation (i.e. All PTEs
360
365k
     * are present and have no reserved bits set).  Now check access rights to
361
365k
     * see whether the access should succeed.
362
365k
     */
363
365k
    ar = (ar_and & AR_ACCUM_AND) | (ar_or & AR_ACCUM_OR);
364
365k
365
365k
    /*
366
365k
     * Sanity check.  If EFER.NX is disabled, _PAGE_NX_BIT is reserved and
367
365k
     * should have caused a translation failure before we get here.
368
365k
     */
369
365k
    if ( ar & _PAGE_NX_BIT )
370
257
        ASSERT(guest_nx_enabled(v));
371
365k
372
365k
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
373
    /*
374
     * If all access checks are thus far ok, check Protection Key for 64bit
375
     * data accesses to user mappings.
376
     *
377
     * N.B. In the case that the walk ended with a superpage, the fabricated
378
     * gw->l1e contains the appropriate leaf pkey.
379
     */
380
365k
    if ( (ar & _PAGE_USER) && !(walk & PFEC_insn_fetch) &&
381
282
         guest_pku_enabled(v) )
382
0
    {
383
0
        unsigned int pkey = guest_l1e_get_pkey(gw->l1e);
384
0
        unsigned int pkru = read_pkru();
385
0
386
0
        if ( read_pkru_ad(pkru, pkey) ||
387
0
             ((walk & PFEC_write_access) && read_pkru_wd(pkru, pkey) &&
388
0
              ((walk & PFEC_user_mode) || guest_wp_enabled(v))) )
389
0
        {
390
0
            gw->pfec |= PFEC_prot_key;
391
0
            goto out;
392
0
        }
393
0
    }
394
#endif
395
365k
396
365k
    if ( (walk & PFEC_insn_fetch) && (ar & _PAGE_NX_BIT) )
397
365k
        /* Requested an instruction fetch and found NX? Fail. */
398
0
        goto out;
399
365k
400
365k
    if ( walk & PFEC_user_mode ) /* Requested a user acess. */
401
0
    {
402
0
        if ( !(ar & _PAGE_USER) )
403
0
            /* Got a supervisor walk?  Unconditional fail. */
404
0
            goto out;
405
0
406
0
        if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) )
407
0
            /* Requested a write and only got a read? Fail. */
408
0
            goto out;
409
0
    }
410
365k
    else /* Requested a supervisor access. */
411
365k
    {
412
365k
        if ( ar & _PAGE_USER ) /* Got a user walk. */
413
282
        {
414
282
            if ( (walk & PFEC_insn_fetch) && guest_smep_enabled(v) )
415
282
                /* User insn fetch and smep? Fail. */
416
0
                goto out;
417
282
418
282
            if ( !(walk & PFEC_insn_fetch) && guest_smap_enabled(v) &&
419
0
                 ((walk & PFEC_implicit) ||
420
0
                  !(guest_cpu_user_regs()->eflags & X86_EFLAGS_AC)) )
421
282
                /* User data access and smap? Fail. */
422
0
                goto out;
423
282
        }
424
365k
425
365k
        if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) &&
426
0
             guest_wp_enabled(v) )
427
365k
            /* Requested a write, got a read, and CR0.WP is set? Fail. */
428
0
            goto out;
429
365k
    }
430
365k
431
365k
    walk_ok = true;
432
365k
433
365k
    /*
434
365k
     * Go back and set accessed and dirty bits only if the walk was a
435
365k
     * success.  Although the PRMs say higher-level _PAGE_ACCESSED bits
436
365k
     * get set whenever a lower-level PT is used, at least some hardware
437
365k
     * walkers behave this way.
438
365k
     */
439
365k
    switch ( leaf_level )
440
365k
    {
441
0
    default:
442
0
        ASSERT_UNREACHABLE();
443
0
        break;
444
0
445
0
    case 1:
446
139k
        if ( set_ad_bits(&l1p[guest_l1_table_offset(va)].l1, &gw->l1e.l1,
447
139k
                         (walk & PFEC_write_access)) )
448
0
            paging_mark_dirty(d, gw->l1mfn);
449
0
        /* Fallthrough */
450
0
    case 2:
451
364k
        if ( set_ad_bits(&l2p[guest_l2_table_offset(va)].l2, &gw->l2e.l2,
452
364k
                         (walk & PFEC_write_access) && leaf_level == 2) )
453
0
            paging_mark_dirty(d, gw->l2mfn);
454
364k
        /* Fallthrough */
455
364k
#if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
456
364k
    case 3:
457
364k
        if ( set_ad_bits(&l3p[guest_l3_table_offset(va)].l3, &gw->l3e.l3,
458
364k
                         (walk & PFEC_write_access) && leaf_level == 3) )
459
0
            paging_mark_dirty(d, gw->l3mfn);
460
364k
461
364k
        if ( set_ad_bits(&l4p[guest_l4_table_offset(va)].l4, &gw->l4e.l4,
462
364k
                         false) )
463
0
            paging_mark_dirty(d, gw->l4mfn);
464
#endif
465
364k
    }
466
364k
467
364k
 out:
468
364k
#if GUEST_PAGING_LEVELS == 4
469
364k
    if ( l3p )
470
364k
    {
471
364k
        unmap_domain_page(l3p);
472
364k
        put_page(mfn_to_page(mfn_x(gw->l3mfn)));
473
364k
    }
474
364k
#endif
475
364k
#if GUEST_PAGING_LEVELS >= 3
476
364k
    if ( l2p )
477
364k
    {
478
364k
        unmap_domain_page(l2p);
479
364k
        put_page(mfn_to_page(mfn_x(gw->l2mfn)));
480
364k
    }
481
364k
#endif
482
364k
    if ( l1p )
483
364k
    {
484
364k
        unmap_domain_page(l1p);
485
364k
        put_page(mfn_to_page(mfn_x(gw->l1mfn)));
486
364k
    }
487
364k
488
364k
    return walk_ok;
489
364k
}
Unexecuted instantiation: guest_walk_tables_2_levels
Unexecuted instantiation: guest_walk_tables_3_levels
guest_walk_tables_4_levels
Line
Count
Source
86
364k
{
87
364k
    struct domain *d = v->domain;
88
364k
    p2m_type_t p2mt;
89
364k
    guest_l1e_t *l1p = NULL;
90
364k
    guest_l2e_t *l2p = NULL;
91
364k
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
92
364k
    guest_l3e_t *l3p = NULL;
93
364k
    guest_l4e_t *l4p;
94
364k
#endif
95
364k
    uint32_t gflags, rc;
96
364k
    unsigned int leaf_level;
97
364k
    p2m_query_t qt = P2M_ALLOC | P2M_UNSHARE;
98
364k
99
364k
#define AR_ACCUM_AND (_PAGE_USER | _PAGE_RW)
100
364k
#define AR_ACCUM_OR  (_PAGE_NX_BIT)
101
364k
    /* Start with all AND bits set, all OR bits clear. */
102
364k
    uint32_t ar, ar_and = ~0u, ar_or = 0;
103
364k
104
364k
    bool walk_ok = false;
105
364k
106
364k
    /*
107
364k
     * TODO - We should ASSERT() that only the following bits are set as
108
364k
     * inputs to a guest walk, but a whole load of code currently passes in
109
364k
     * other PFEC_ constants.
110
364k
     */
111
364k
    walk &= (PFEC_implicit | PFEC_insn_fetch | PFEC_user_mode | PFEC_write_access);
112
364k
113
364k
    /* Only implicit supervisor data accesses exist. */
114
364k
    ASSERT(!(walk & PFEC_implicit) ||
115
364k
           !(walk & (PFEC_insn_fetch | PFEC_user_mode)));
116
364k
117
364k
    perfc_incr(guest_walk);
118
364k
    memset(gw, 0, sizeof(*gw));
119
364k
    gw->va = va;
120
364k
    gw->pfec = walk & (PFEC_user_mode | PFEC_write_access);
121
364k
122
364k
    /*
123
364k
     * PFEC_insn_fetch is only reported if NX or SMEP are enabled.  Hardware
124
364k
     * still distingueses instruction fetches during determination of access
125
364k
     * rights.
126
364k
     */
127
364k
    if ( guest_nx_enabled(v) || guest_smep_enabled(v) )
128
365k
        gw->pfec |= (walk & PFEC_insn_fetch);
129
364k
130
364k
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
131
364k
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
132
364k
133
364k
    /* Get the l4e from the top level table and check its flags*/
134
364k
    gw->l4mfn = top_mfn;
135
364k
    l4p = (guest_l4e_t *) top_map;
136
364k
    gw->l4e = l4p[guest_l4_table_offset(va)];
137
364k
    gflags = guest_l4e_get_flags(gw->l4e);
138
364k
    if ( !(gflags & _PAGE_PRESENT) )
139
0
        goto out;
140
364k
141
364k
    /* Check for reserved bits. */
142
364k
    if ( guest_l4e_rsvd_bits(v, gw->l4e) )
143
0
    {
144
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
145
0
        goto out;
146
0
    }
147
364k
148
364k
    /* Accumulate l4e access rights. */
149
364k
    ar_and &= gflags;
150
364k
    ar_or  |= gflags;
151
364k
152
364k
    /* Map the l3 table */
153
364k
    l3p = map_domain_gfn(p2m,
154
364k
                         guest_l4e_get_gfn(gw->l4e),
155
364k
                         &gw->l3mfn,
156
364k
                         &p2mt,
157
364k
                         qt,
158
364k
                         &rc);
159
364k
    if ( l3p == NULL )
160
0
    {
161
0
        gw->pfec |= rc & PFEC_synth_mask;
162
0
        goto out;
163
0
    }
164
364k
165
364k
    /* Get the l3e and check its flags*/
166
364k
    gw->l3e = l3p[guest_l3_table_offset(va)];
167
364k
    gflags = guest_l3e_get_flags(gw->l3e);
168
364k
    if ( !(gflags & _PAGE_PRESENT) )
169
0
        goto out;
170
364k
171
364k
    /* Check for reserved bits, including possibly _PAGE_PSE. */
172
364k
    if ( guest_l3e_rsvd_bits(v, gw->l3e) )
173
0
    {
174
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
175
0
        goto out;
176
0
    }
177
364k
178
364k
    /* Accumulate l3e access rights. */
179
364k
    ar_and &= gflags;
180
364k
    ar_or  |= gflags;
181
364k
182
364k
    if ( gflags & _PAGE_PSE )
183
0
    {
184
0
        /*
185
0
         * Generate a fake l1 table entry so callers don't all
186
0
         * have to understand superpages.
187
0
         */
188
0
        gfn_t start = guest_l3e_get_gfn(gw->l3e);
189
0
        /*
190
0
         * Grant full access in the l1e, since all the guest entry's
191
0
         * access controls are enforced in the l3e.
192
0
         */
193
0
        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
194
0
                     _PAGE_ACCESSED|_PAGE_DIRTY);
195
0
        /*
196
0
         * Import protection key and cache-control bits. Note that _PAGE_PAT
197
0
         * is actually _PAGE_PSE, and it is always set. We will clear it in
198
0
         * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear.
199
0
         */
200
0
        flags |= (guest_l3e_get_flags(gw->l3e)
201
0
                  & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
202
0
        if ( !(gfn_x(start) & 1) )
203
0
            /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
204
0
            flags &= ~_PAGE_PAT;
205
0
206
0
        /* Increment the pfn by the right number of 4k pages. */
207
0
        start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) +
208
0
                     ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK));
209
0
        gw->l1e = guest_l1e_from_gfn(start, flags);
210
0
        gw->l2mfn = gw->l1mfn = INVALID_MFN;
211
0
        leaf_level = 3;
212
0
        goto leaf;
213
0
    }
214
364k
215
364k
#else /* PAE only... */
216
217
    /* Get the l3e and check its flag */
218
    gw->l3e = ((guest_l3e_t *) top_map)[guest_l3_table_offset(va)];
219
    gflags = guest_l3e_get_flags(gw->l3e);
220
    if ( !(gflags & _PAGE_PRESENT) )
221
        goto out;
222
223
    if ( guest_l3e_rsvd_bits(v, gw->l3e) )
224
    {
225
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
226
        goto out;
227
    }
228
229
#endif /* PAE or 64... */
230
364k
231
364k
    /* Map the l2 table */
232
364k
    l2p = map_domain_gfn(p2m,
233
364k
                         guest_l3e_get_gfn(gw->l3e),
234
364k
                         &gw->l2mfn,
235
364k
                         &p2mt,
236
364k
                         qt,
237
364k
                         &rc);
238
364k
    if ( l2p == NULL )
239
0
    {
240
0
        gw->pfec |= rc & PFEC_synth_mask;
241
0
        goto out;
242
0
    }
243
364k
244
364k
    /* Get the l2e */
245
364k
    gw->l2e = l2p[guest_l2_table_offset(va)];
246
364k
247
364k
#else /* 32-bit only... */
248
249
    /* Get l2e from the top level table */
250
    gw->l2mfn = top_mfn;
251
    l2p = (guest_l2e_t *) top_map;
252
    gw->l2e = l2p[guest_l2_table_offset(va)];
253
254
#endif /* All levels... */
255
364k
256
364k
    /* Check the l2e flags. */
257
364k
    gflags = guest_l2e_get_flags(gw->l2e);
258
364k
    if ( !(gflags & _PAGE_PRESENT) )
259
0
        goto out;
260
364k
261
364k
    /*
262
364k
     * In 2-level paging without CR0.PSE, there are no reserved bits, and the
263
364k
     * PAT/PSE bit is ignored.
264
364k
     */
265
364k
    if ( GUEST_PAGING_LEVELS == 2 && !guest_can_use_l2_superpages(v) )
266
0
    {
267
0
        gw->l2e.l2 &= ~_PAGE_PSE;
268
0
        gflags &= ~_PAGE_PSE;
269
0
    }
270
364k
    /* else check for reserved bits, including possibly _PAGE_PSE. */
271
364k
    else if ( guest_l2e_rsvd_bits(v, gw->l2e) )
272
0
    {
273
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
274
0
        goto out;
275
0
    }
276
364k
277
364k
    /* Accumulate l2e access rights. */
278
364k
    ar_and &= gflags;
279
364k
    ar_or  |= gflags;
280
364k
281
364k
    if ( gflags & _PAGE_PSE )
282
225k
    {
283
225k
        /*
284
225k
         * Special case: this guest VA is in a PSE superpage, so there's
285
225k
         * no guest l1e.  We make one up so that the propagation code
286
225k
         * can generate a shadow l1 table.  Start with the gfn of the
287
225k
         * first 4k-page of the superpage.
288
225k
         */
289
225k
#if GUEST_PAGING_LEVELS == 2
290
        gfn_t start = _gfn(unfold_pse36(gw->l2e.l2) >> PAGE_SHIFT);
291
#else
292
225k
        gfn_t start = guest_l2e_get_gfn(gw->l2e);
293
225k
#endif
294
225k
        /*
295
225k
         * Grant full access in the l1e, since all the guest entry's
296
225k
         * access controls are enforced in the shadow l2e.
297
225k
         */
298
225k
        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
299
225k
                     _PAGE_ACCESSED|_PAGE_DIRTY);
300
225k
        /*
301
225k
         * Import protection key and cache-control bits. Note that _PAGE_PAT
302
225k
         * is actually _PAGE_PSE, and it is always set. We will clear it in
303
225k
         * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear.
304
225k
         */
305
225k
        flags |= (guest_l2e_get_flags(gw->l2e)
306
225k
                  & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
307
225k
        if ( !(gfn_x(start) & 1) )
308
225k
            /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
309
225k
            flags &= ~_PAGE_PAT;
310
225k
311
225k
        /* Increment the pfn by the right number of 4k pages. */
312
225k
        start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) +
313
225k
                     guest_l1_table_offset(va));
314
225k
#if GUEST_PAGING_LEVELS == 2
315
         /* Wider than 32 bits if PSE36 superpage. */
316
        gw->el1e = (gfn_x(start) << PAGE_SHIFT) | flags;
317
#else
318
225k
        gw->l1e = guest_l1e_from_gfn(start, flags);
319
225k
#endif
320
225k
        gw->l1mfn = INVALID_MFN;
321
225k
        leaf_level = 2;
322
225k
        goto leaf;
323
225k
    }
324
364k
325
364k
    /* Map the l1 table */
326
139k
    l1p = map_domain_gfn(p2m,
327
139k
                         guest_l2e_get_gfn(gw->l2e),
328
139k
                         &gw->l1mfn,
329
139k
                         &p2mt,
330
139k
                         qt,
331
139k
                         &rc);
332
139k
    if ( l1p == NULL )
333
0
    {
334
0
        gw->pfec |= rc & PFEC_synth_mask;
335
0
        goto out;
336
0
    }
337
139k
    gw->l1e = l1p[guest_l1_table_offset(va)];
338
139k
    gflags = guest_l1e_get_flags(gw->l1e);
339
139k
    if ( !(gflags & _PAGE_PRESENT) )
340
0
        goto out;
341
139k
342
139k
    /* Check for reserved bits. */
343
139k
    if ( guest_l1e_rsvd_bits(v, gw->l1e) )
344
0
    {
345
0
        gw->pfec |= PFEC_reserved_bit | PFEC_page_present;
346
0
        goto out;
347
0
    }
348
139k
349
139k
    /* Accumulate l1e access rights. */
350
139k
    ar_and &= gflags;
351
139k
    ar_or  |= gflags;
352
139k
353
139k
    leaf_level = 1;
354
139k
355
365k
 leaf:
356
365k
    gw->pfec |= PFEC_page_present;
357
365k
358
365k
    /*
359
365k
     * The pagetable walk has returned a successful translation (i.e. All PTEs
360
365k
     * are present and have no reserved bits set).  Now check access rights to
361
365k
     * see whether the access should succeed.
362
365k
     */
363
365k
    ar = (ar_and & AR_ACCUM_AND) | (ar_or & AR_ACCUM_OR);
364
365k
365
365k
    /*
366
365k
     * Sanity check.  If EFER.NX is disabled, _PAGE_NX_BIT is reserved and
367
365k
     * should have caused a translation failure before we get here.
368
365k
     */
369
365k
    if ( ar & _PAGE_NX_BIT )
370
257
        ASSERT(guest_nx_enabled(v));
371
365k
372
365k
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
373
365k
    /*
374
365k
     * If all access checks are thus far ok, check Protection Key for 64bit
375
365k
     * data accesses to user mappings.
376
365k
     *
377
365k
     * N.B. In the case that the walk ended with a superpage, the fabricated
378
365k
     * gw->l1e contains the appropriate leaf pkey.
379
365k
     */
380
365k
    if ( (ar & _PAGE_USER) && !(walk & PFEC_insn_fetch) &&
381
282
         guest_pku_enabled(v) )
382
0
    {
383
0
        unsigned int pkey = guest_l1e_get_pkey(gw->l1e);
384
0
        unsigned int pkru = read_pkru();
385
0
386
0
        if ( read_pkru_ad(pkru, pkey) ||
387
0
             ((walk & PFEC_write_access) && read_pkru_wd(pkru, pkey) &&
388
0
              ((walk & PFEC_user_mode) || guest_wp_enabled(v))) )
389
0
        {
390
0
            gw->pfec |= PFEC_prot_key;
391
0
            goto out;
392
0
        }
393
0
    }
394
365k
#endif
395
365k
396
365k
    if ( (walk & PFEC_insn_fetch) && (ar & _PAGE_NX_BIT) )
397
365k
        /* Requested an instruction fetch and found NX? Fail. */
398
0
        goto out;
399
365k
400
365k
    if ( walk & PFEC_user_mode ) /* Requested a user acess. */
401
0
    {
402
0
        if ( !(ar & _PAGE_USER) )
403
0
            /* Got a supervisor walk?  Unconditional fail. */
404
0
            goto out;
405
0
406
0
        if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) )
407
0
            /* Requested a write and only got a read? Fail. */
408
0
            goto out;
409
0
    }
410
365k
    else /* Requested a supervisor access. */
411
365k
    {
412
365k
        if ( ar & _PAGE_USER ) /* Got a user walk. */
413
282
        {
414
282
            if ( (walk & PFEC_insn_fetch) && guest_smep_enabled(v) )
415
282
                /* User insn fetch and smep? Fail. */
416
0
                goto out;
417
282
418
282
            if ( !(walk & PFEC_insn_fetch) && guest_smap_enabled(v) &&
419
0
                 ((walk & PFEC_implicit) ||
420
0
                  !(guest_cpu_user_regs()->eflags & X86_EFLAGS_AC)) )
421
282
                /* User data access and smap? Fail. */
422
0
                goto out;
423
282
        }
424
365k
425
365k
        if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) &&
426
0
             guest_wp_enabled(v) )
427
365k
            /* Requested a write, got a read, and CR0.WP is set? Fail. */
428
0
            goto out;
429
365k
    }
430
365k
431
365k
    walk_ok = true;
432
365k
433
365k
    /*
434
365k
     * Go back and set accessed and dirty bits only if the walk was a
435
365k
     * success.  Although the PRMs say higher-level _PAGE_ACCESSED bits
436
365k
     * get set whenever a lower-level PT is used, at least some hardware
437
365k
     * walkers behave this way.
438
365k
     */
439
365k
    switch ( leaf_level )
440
365k
    {
441
0
    default:
442
0
        ASSERT_UNREACHABLE();
443
0
        break;
444
0
445
139k
    case 1:
446
139k
        if ( set_ad_bits(&l1p[guest_l1_table_offset(va)].l1, &gw->l1e.l1,
447
139k
                         (walk & PFEC_write_access)) )
448
0
            paging_mark_dirty(d, gw->l1mfn);
449
139k
        /* Fallthrough */
450
364k
    case 2:
451
364k
        if ( set_ad_bits(&l2p[guest_l2_table_offset(va)].l2, &gw->l2e.l2,
452
364k
                         (walk & PFEC_write_access) && leaf_level == 2) )
453
0
            paging_mark_dirty(d, gw->l2mfn);
454
364k
        /* Fallthrough */
455
364k
#if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
456
364k
    case 3:
457
364k
        if ( set_ad_bits(&l3p[guest_l3_table_offset(va)].l3, &gw->l3e.l3,
458
364k
                         (walk & PFEC_write_access) && leaf_level == 3) )
459
0
            paging_mark_dirty(d, gw->l3mfn);
460
364k
461
364k
        if ( set_ad_bits(&l4p[guest_l4_table_offset(va)].l4, &gw->l4e.l4,
462
364k
                         false) )
463
0
            paging_mark_dirty(d, gw->l4mfn);
464
365k
#endif
465
365k
    }
466
365k
467
365k
 out:
468
365k
#if GUEST_PAGING_LEVELS == 4
469
365k
    if ( l3p )
470
365k
    {
471
365k
        unmap_domain_page(l3p);
472
365k
        put_page(mfn_to_page(mfn_x(gw->l3mfn)));
473
365k
    }
474
365k
#endif
475
365k
#if GUEST_PAGING_LEVELS >= 3
476
365k
    if ( l2p )
477
366k
    {
478
366k
        unmap_domain_page(l2p);
479
366k
        put_page(mfn_to_page(mfn_x(gw->l2mfn)));
480
366k
    }
481
365k
#endif
482
365k
    if ( l1p )
483
141k
    {
484
141k
        unmap_domain_page(l1p);
485
141k
        put_page(mfn_to_page(mfn_x(gw->l1mfn)));
486
141k
    }
487
365k
488
365k
    return walk_ok;
489
365k
}
490
364k
491
364k
/*
492
364k
 * Local variables:
493
364k
 * mode: C
494
364k
 * c-file-style: "BSD"
495
364k
 * c-basic-offset: 4
496
364k
 * tab-width: 4
497
364k
 * indent-tabs-mode: nil
498
364k
 * End:
499
364k
 */