Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/shadow/multi.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/mm/shadow/multi.c
3
 *
4
 * Simple, mostly-synchronous shadow page tables.
5
 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6
 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7
 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8
 *
9
 * This program is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 2 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU General Public License
20
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
/* Allow uniquely identifying static symbols in the 3 generated objects. */
24
asm(".file \"" __OBJECT_FILE__ "\"");
25
26
#include <xen/types.h>
27
#include <xen/mm.h>
28
#include <xen/trace.h>
29
#include <xen/sched.h>
30
#include <xen/perfc.h>
31
#include <xen/domain_page.h>
32
#include <xen/iocap.h>
33
#include <xsm/xsm.h>
34
#include <asm/page.h>
35
#include <asm/current.h>
36
#include <asm/shadow.h>
37
#include <asm/flushtlb.h>
38
#include <asm/hvm/hvm.h>
39
#include <asm/hvm/cacheattr.h>
40
#include <asm/mtrr.h>
41
#include <asm/guest_pt.h>
42
#include <public/sched.h>
43
#include "private.h"
44
#include "types.h"
45
46
/* THINGS TO DO LATER:
47
 *
48
 * TEARDOWN HEURISTICS
49
 * Also: have a heuristic for when to destroy a previous paging-mode's
50
 * shadows.  When a guest is done with its start-of-day 32-bit tables
51
 * and reuses the memory we want to drop those shadows.  Start with
52
 * shadows in a page in two modes as a hint, but beware of clever tricks
53
 * like reusing a pagetable for both PAE and 64-bit during boot...
54
 *
55
 * PAE LINEAR MAPS
56
 * Rework shadow_get_l*e() to have the option of using map_domain_page()
57
 * instead of linear maps.  Add appropriate unmap_l*e calls in the users.
58
 * Then we can test the speed difference made by linear maps.  If the
59
 * map_domain_page() version is OK on PAE, we could maybe allow a lightweight
60
 * l3-and-l2h-only shadow mode for PAE PV guests that would allow them
61
 * to share l2h pages again.
62
 *
63
 * PSE disabled / PSE36
64
 * We don't support any modes other than PSE enabled, PSE36 disabled.
65
 * Neither of those would be hard to change, but we'd need to be able to
66
 * deal with shadows made in one mode and used in another.
67
 */
68
69
#define FETCH_TYPE_PREFETCH 1
70
#define FETCH_TYPE_DEMAND   2
71
0
#define FETCH_TYPE_WRITE    4
72
typedef enum {
73
    ft_prefetch     = FETCH_TYPE_PREFETCH,
74
    ft_demand_read  = FETCH_TYPE_DEMAND,
75
    ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
76
} fetch_type_t;
77
78
extern const char *const fetch_type_names[];
79
80
#if SHADOW_DEBUG_PROPAGATE && CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS
81
const char *const fetch_type_names[] = {
82
    [ft_prefetch]     = "prefetch",
83
    [ft_demand_read]  = "demand read",
84
    [ft_demand_write] = "demand write",
85
};
86
#endif
87
88
/**************************************************************************/
89
/* Hash table mapping from guest pagetables to shadows
90
 *
91
 * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
92
 * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
93
 *              shadow L1 which maps its "splinters".
94
 */
95
96
static inline mfn_t
97
get_fl1_shadow_status(struct domain *d, gfn_t gfn)
98
/* Look for FL1 shadows in the hash table */
99
0
{
100
0
    mfn_t smfn = shadow_hash_lookup(d, gfn_x(gfn), SH_type_fl1_shadow);
101
0
    ASSERT(!mfn_valid(smfn) || mfn_to_page(smfn)->u.sh.head);
102
0
    return smfn;
103
0
}
104
105
static inline mfn_t
106
get_shadow_status(struct domain *d, mfn_t gmfn, u32 shadow_type)
107
/* Look for shadows in the hash table */
108
0
{
109
0
    mfn_t smfn = shadow_hash_lookup(d, mfn_x(gmfn), shadow_type);
110
0
    ASSERT(!mfn_valid(smfn) || mfn_to_page(smfn)->u.sh.head);
111
0
    perfc_incr(shadow_get_shadow_status);
112
0
    return smfn;
113
0
}
114
115
static inline void
116
set_fl1_shadow_status(struct domain *d, gfn_t gfn, mfn_t smfn)
117
/* Put an FL1 shadow into the hash table */
118
0
{
119
0
    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%"PRI_mfn"\n",
120
0
                   gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
121
0
122
0
    ASSERT(mfn_to_page(smfn)->u.sh.head);
123
0
    shadow_hash_insert(d, gfn_x(gfn), SH_type_fl1_shadow, smfn);
124
0
}
125
126
static inline void
127
set_shadow_status(struct domain *d, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
128
/* Put a shadow into the hash table */
129
0
{
130
0
    int res;
131
0
132
0
    SHADOW_PRINTK("d%d gmfn=%lx, type=%08x, smfn=%lx\n",
133
0
                  d->domain_id, mfn_x(gmfn), shadow_type, mfn_x(smfn));
134
0
135
0
    ASSERT(mfn_to_page(smfn)->u.sh.head);
136
0
137
0
    /* 32-bit PV guests don't own their l4 pages so can't get_page them */
138
0
    if ( !is_pv_32bit_domain(d) || shadow_type != SH_type_l4_64_shadow )
139
0
    {
140
0
        res = get_page(mfn_to_page(gmfn), d);
141
0
        ASSERT(res == 1);
142
0
    }
143
0
144
0
    shadow_hash_insert(d, mfn_x(gmfn), shadow_type, smfn);
145
0
}
146
147
static inline void
148
delete_fl1_shadow_status(struct domain *d, gfn_t gfn, mfn_t smfn)
149
/* Remove a shadow from the hash table */
150
0
{
151
0
    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%"PRI_mfn"\n",
152
0
                   gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
153
0
    ASSERT(mfn_to_page(smfn)->u.sh.head);
154
0
    shadow_hash_delete(d, gfn_x(gfn), SH_type_fl1_shadow, smfn);
155
0
}
156
157
static inline void
158
delete_shadow_status(struct domain *d, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
159
/* Remove a shadow from the hash table */
160
0
{
161
0
    SHADOW_PRINTK("d%d gmfn=%"PRI_mfn", type=%08x, smfn=%"PRI_mfn"\n",
162
0
                  d->domain_id, mfn_x(gmfn), shadow_type, mfn_x(smfn));
163
0
    ASSERT(mfn_to_page(smfn)->u.sh.head);
164
0
    shadow_hash_delete(d, mfn_x(gmfn), shadow_type, smfn);
165
0
    /* 32-bit PV guests don't own their l4 pages; see set_shadow_status */
166
0
    if ( !is_pv_32bit_domain(d) || shadow_type != SH_type_l4_64_shadow )
167
0
        put_page(mfn_to_page(gmfn));
168
0
}
169
170
171
/**************************************************************************/
172
/* Functions for walking the guest page tables */
173
174
static inline bool
175
sh_walk_guest_tables(struct vcpu *v, unsigned long va, walk_t *gw,
176
                     uint32_t pfec)
177
0
{
178
0
    return guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec,
179
0
#if GUEST_PAGING_LEVELS == 3 /* PAE */
180
                             INVALID_MFN,
181
                             v->arch.paging.shadow.gl3e
182
#else /* 32 or 64 */
183
0
                             pagetable_get_mfn(v->arch.guest_table),
184
0
                             v->arch.paging.shadow.guest_vtable
185
0
#endif
186
0
                             );
187
0
}
188
189
/* This validation is called with lock held, and after write permission
190
 * removal. Then check is atomic and no more inconsistent content can
191
 * be observed before lock is released
192
 *
193
 * Return 1 to indicate success and 0 for inconsistency
194
 */
195
static inline uint32_t
196
shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw, int version)
197
0
{
198
0
    struct domain *d = v->domain;
199
0
    guest_l1e_t *l1p;
200
0
    guest_l2e_t *l2p;
201
0
#if GUEST_PAGING_LEVELS >= 4
202
    guest_l3e_t *l3p;
203
    guest_l4e_t *l4p;
204
#endif
205
0
    int mismatch = 0;
206
0
207
0
    ASSERT(paging_locked_by_me(d));
208
0
209
0
    /* No need for smp_rmb() here; taking the paging lock was enough. */
210
0
    if ( version == atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
211
0
         return 1;
212
0
213
0
    /* We may consider caching guest page mapping from last
214
0
     * guest table walk. However considering this check happens
215
0
     * relatively less-frequent, and a bit burden here to
216
0
     * remap guest page is better than caching mapping in each
217
0
     * guest table walk.
218
0
     *
219
0
     * Also when inconsistency occurs, simply return to trigger
220
0
     * another fault instead of re-validate new path to make
221
0
     * logic simple.
222
0
     */
223
0
    perfc_incr(shadow_check_gwalk);
224
0
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
225
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
226
    l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable;
227
    mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4);
228
    l3p = map_domain_page(gw->l3mfn);
229
    mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3);
230
    unmap_domain_page(l3p);
231
#else
232
    mismatch |= (gw->l3e.l3 !=
233
                 v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);
234
#endif
235
    l2p = map_domain_page(gw->l2mfn);
236
    mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
237
    unmap_domain_page(l2p);
238
#else
239
0
    l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable;
240
0
    mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
241
0
#endif
242
0
    if ( !(guest_can_use_l2_superpages(v) &&
243
0
           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
244
0
    {
245
0
        l1p = map_domain_page(gw->l1mfn);
246
0
        mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1);
247
0
        unmap_domain_page(l1p);
248
0
    }
249
0
250
0
    return !mismatch;
251
0
}
252
253
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
254
static int
255
shadow_check_gl1e(struct vcpu *v, walk_t *gw)
256
0
{
257
0
    guest_l1e_t *l1p, nl1e;
258
0
259
0
    if ( !mfn_valid(gw->l1mfn) )
260
0
        return 0;
261
0
262
0
    /* Can't just pull-through because mfn may have changed */
263
0
    l1p = map_domain_page(gw->l1mfn);
264
0
    nl1e.l1 = l1p[guest_l1_table_offset(gw->va)].l1;
265
0
    unmap_domain_page(l1p);
266
0
267
0
    return gw->l1e.l1 != nl1e.l1;
268
0
}
269
#endif
270
271
/* Remove write access permissions from a gwalk_t in a batch, and
272
 * return OR-ed result for TLB flush hint and need to rewalk the guest
273
 * pages.
274
 *
275
 * Syncing pages will remove write access to that page; but it may
276
 * also give write access to other pages in the path. If we resync any
277
 * pages, re-walk from the beginning.
278
 */
279
0
#define GW_RMWR_FLUSHTLB 1
280
0
#define GW_RMWR_REWALK   2
281
282
static inline uint32_t
283
gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw)
284
0
{
285
0
    struct domain *d = v->domain;
286
0
    uint32_t rc = 0;
287
0
288
0
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
289
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
290
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
291
    if ( mfn_is_out_of_sync(gw->l3mfn) )
292
    {
293
        sh_resync(d, gw->l3mfn);
294
        rc = GW_RMWR_REWALK;
295
    }
296
    else
297
#endif /* OOS */
298
     if ( sh_remove_write_access(d, gw->l3mfn, 3, va) )
299
         rc = GW_RMWR_FLUSHTLB;
300
#endif /* GUEST_PAGING_LEVELS >= 4 */
301
302
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
303
    if ( mfn_is_out_of_sync(gw->l2mfn) )
304
    {
305
        sh_resync(d, gw->l2mfn);
306
        rc |= GW_RMWR_REWALK;
307
    }
308
    else
309
#endif /* OOS */
310
    if ( sh_remove_write_access(d, gw->l2mfn, 2, va) )
311
        rc |= GW_RMWR_FLUSHTLB;
312
#endif /* GUEST_PAGING_LEVELS >= 3 */
313
0
314
0
    if ( !(guest_can_use_l2_superpages(v) &&
315
0
           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE))
316
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
317
0
         && !mfn_is_out_of_sync(gw->l1mfn)
318
0
#endif /* OOS */
319
0
         && sh_remove_write_access(d, gw->l1mfn, 1, va) )
320
0
        rc |= GW_RMWR_FLUSHTLB;
321
0
322
0
    return rc;
323
0
}
324
325
/* Lightweight audit: pass all the shadows associated with this guest walk
326
 * through the audit mechanisms */
327
static void sh_audit_gw(struct vcpu *v, const walk_t *gw)
328
0
{
329
0
#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
330
0
    struct domain *d = v->domain;
331
0
    mfn_t smfn;
332
0
333
0
    if ( !(SHADOW_AUDIT_ENABLE) )
334
0
        return;
335
0
336
0
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
337
    if ( mfn_valid(gw->l4mfn)
338
         && mfn_valid((smfn = get_shadow_status(d, gw->l4mfn,
339
                                                SH_type_l4_shadow))) )
340
        (void) sh_audit_l4_table(v, smfn, INVALID_MFN);
341
    if ( mfn_valid(gw->l3mfn)
342
         && mfn_valid((smfn = get_shadow_status(d, gw->l3mfn,
343
                                                SH_type_l3_shadow))) )
344
        (void) sh_audit_l3_table(v, smfn, INVALID_MFN);
345
#endif /* PAE or 64... */
346
0
    if ( mfn_valid(gw->l2mfn) )
347
0
    {
348
0
        if ( mfn_valid((smfn = get_shadow_status(d, gw->l2mfn,
349
0
                                                 SH_type_l2_shadow))) )
350
0
            (void) sh_audit_l2_table(v, smfn, INVALID_MFN);
351
0
#if GUEST_PAGING_LEVELS == 3
352
        if ( mfn_valid((smfn = get_shadow_status(d, gw->l2mfn,
353
                                                 SH_type_l2h_shadow))) )
354
            (void) sh_audit_l2_table(v, smfn, INVALID_MFN);
355
#endif
356
0
    }
357
0
    if ( mfn_valid(gw->l1mfn)
358
0
         && mfn_valid((smfn = get_shadow_status(d, gw->l1mfn,
359
0
                                                SH_type_l1_shadow))) )
360
0
        (void) sh_audit_l1_table(v, smfn, INVALID_MFN);
361
0
    else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT)
362
0
              && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)
363
0
              && mfn_valid(
364
0
              (smfn = get_fl1_shadow_status(d, guest_l2e_get_gfn(gw->l2e)))) )
365
0
        (void) sh_audit_fl1_table(v, smfn, INVALID_MFN);
366
0
#endif /* SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES */
367
0
}
368
369
/*
370
 * Write a new value into the guest pagetable, and update the shadows
371
 * appropriately.  Returns false if we page-faulted, true for success.
372
 */
373
static bool
374
sh_write_guest_entry(struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn)
375
0
{
376
0
#if CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS
377
    int failed;
378
379
    paging_lock(v->domain);
380
    failed = __copy_to_user(p, &new, sizeof(new));
381
    if ( failed != sizeof(new) )
382
        sh_validate_guest_entry(v, gmfn, p, sizeof(new));
383
    paging_unlock(v->domain);
384
385
    return !failed;
386
#else
387
0
    return false;
388
0
#endif
389
0
}
390
391
/*
392
 * Cmpxchg a new value into the guest pagetable, and update the shadows
393
 * appropriately. Returns false if we page-faulted, true if not.
394
 * N.B. caller should check the value of "old" to see if the cmpxchg itself
395
 * was successful.
396
 */
397
static bool
398
sh_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, intpte_t *old,
399
                       intpte_t new, mfn_t gmfn)
400
0
{
401
0
#if CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS
402
    int failed;
403
    guest_intpte_t t = *old;
404
405
    paging_lock(v->domain);
406
    failed = cmpxchg_user(p, t, new);
407
    if ( t == *old )
408
        sh_validate_guest_entry(v, gmfn, p, sizeof(new));
409
    *old = t;
410
    paging_unlock(v->domain);
411
412
    return !failed;
413
#else
414
0
    return false;
415
0
#endif
416
0
}
417
418
/**************************************************************************/
419
/* Functions to compute the correct index into a shadow page, given an
420
 * index into the guest page (as returned by guest_get_index()).
421
 * This is trivial when the shadow and guest use the same sized PTEs, but
422
 * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
423
 * PAE- or 64-bit shadows).
424
 *
425
 * These functions also increment the shadow mfn, when necessary.  When PTE
426
 * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
427
 * page.  In this case, we allocate 2 contiguous pages for the shadow L1, and
428
 * use simple pointer arithmetic on a pointer to the guest L1e to figure out
429
 * which shadow page we really want.  Similarly, when PTE sizes are
430
 * mismatched, we shadow a guest L2 page with 4 shadow L2 pages.  (The easiest
431
 * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
432
 * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
433
 * space.)
434
 */
435
436
#if GUEST_PAGING_LEVELS == 2
437
/* From one page of a multi-page shadow, find the next one */
438
static inline mfn_t sh_next_page(mfn_t smfn)
439
0
{
440
0
    struct page_info *pg = mfn_to_page(smfn), *next;
441
0
    struct page_list_head h = PAGE_LIST_HEAD_INIT(h);
442
0
443
0
    ASSERT(pg->u.sh.type == SH_type_l1_32_shadow
444
0
           || pg->u.sh.type == SH_type_fl1_32_shadow
445
0
           || pg->u.sh.type == SH_type_l2_32_shadow);
446
0
    ASSERT(pg->u.sh.type == SH_type_l2_32_shadow || pg->u.sh.head);
447
0
448
0
    next = page_list_next(pg, &h);
449
0
450
0
    ASSERT(next);
451
0
    ASSERT(next->u.sh.type == pg->u.sh.type);
452
0
    ASSERT(!next->u.sh.head);
453
0
    return page_to_mfn(next);
454
0
}
455
#endif
456
457
static inline u32
458
guest_index(void *ptr)
459
0
{
460
0
    return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
461
0
}
462
463
static u32
464
shadow_l1_index(mfn_t *smfn, u32 guest_index)
465
0
{
466
0
#if (GUEST_PAGING_LEVELS == 2)
467
0
    ASSERT(mfn_to_page(*smfn)->u.sh.head);
468
0
    if ( guest_index >= SHADOW_L1_PAGETABLE_ENTRIES )
469
0
        *smfn = sh_next_page(*smfn);
470
0
    return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
471
0
#else
472
    return guest_index;
473
#endif
474
0
}
475
476
static u32
477
shadow_l2_index(mfn_t *smfn, u32 guest_index)
478
0
{
479
0
#if (GUEST_PAGING_LEVELS == 2)
480
0
    int i;
481
0
    ASSERT(mfn_to_page(*smfn)->u.sh.head);
482
0
    // Because we use 2 shadow l2 entries for each guest entry, the number of
483
0
    // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
484
0
    for ( i = 0; i < guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2); i++ )
485
0
        *smfn = sh_next_page(*smfn);
486
0
    // We multiply by two to get the index of the first of the two entries
487
0
    // used to shadow the specified guest entry.
488
0
    return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
489
0
#else
490
    return guest_index;
491
#endif
492
0
}
493
494
#if GUEST_PAGING_LEVELS >= 4
495
496
static u32
497
shadow_l3_index(mfn_t *smfn, u32 guest_index)
498
0
{
499
0
    return guest_index;
500
0
}
501
502
static u32
503
shadow_l4_index(mfn_t *smfn, u32 guest_index)
504
0
{
505
0
    return guest_index;
506
0
}
507
508
#endif // GUEST_PAGING_LEVELS >= 4
509
510
511
/**************************************************************************/
512
/* Function which computes shadow entries from their corresponding guest
513
 * entries.  This is the "heart" of the shadow code. It operates using
514
 * level-1 shadow types, but handles all levels of entry.
515
 * Don't call it directly, but use the four wrappers below.
516
 */
517
518
static always_inline void
519
_sh_propagate(struct vcpu *v,
520
              guest_intpte_t guest_intpte,
521
              mfn_t target_mfn,
522
              void *shadow_entry_ptr,
523
              int level,
524
              fetch_type_t ft,
525
              p2m_type_t p2mt)
526
0
{
527
0
    guest_l1e_t guest_entry = { guest_intpte };
528
0
    shadow_l1e_t *sp = shadow_entry_ptr;
529
0
    struct domain *d = v->domain;
530
0
    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
531
0
    gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
532
0
    u32 pass_thru_flags;
533
0
    u32 gflags, sflags;
534
0
    bool mmio_mfn;
535
0
536
0
    /* We don't shadow PAE l3s */
537
0
    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
538
0
539
0
    /* Check there's something for the shadows to map to */
540
0
    if ( (!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt))
541
0
         || !gfn_valid(d, target_gfn) )
542
0
    {
543
0
        *sp = shadow_l1e_empty();
544
0
        goto done;
545
0
    }
546
0
547
0
    gflags = guest_l1e_get_flags(guest_entry);
548
0
549
0
    if ( unlikely(!(gflags & _PAGE_PRESENT)) )
550
0
    {
551
0
#if !(SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
552
        /* If a guest l1 entry is not present, shadow with the magic
553
         * guest-not-present entry. */
554
        if ( level == 1 )
555
            *sp = sh_l1e_gnp();
556
        else
557
#endif /* !OOS */
558
0
            *sp = shadow_l1e_empty();
559
0
        goto done;
560
0
    }
561
0
562
0
    if ( level == 1 && p2mt == p2m_mmio_dm )
563
0
    {
564
0
        /* Guest l1e maps emulated MMIO space */
565
0
        *sp = sh_l1e_mmio(target_gfn, gflags);
566
0
        if ( !d->arch.paging.shadow.has_fast_mmio_entries )
567
0
            d->arch.paging.shadow.has_fast_mmio_entries = 1;
568
0
        goto done;
569
0
    }
570
0
571
0
    // Must have a valid target_mfn unless this is a prefetch or an l1
572
0
    // pointing at MMIO space.  In the case of a prefetch, an invalid
573
0
    // mfn means that we can not usefully shadow anything, and so we
574
0
    // return early.
575
0
    //
576
0
    mmio_mfn = !mfn_valid(target_mfn)
577
0
               || (level == 1
578
0
                   && page_get_owner(mfn_to_page(target_mfn)) == dom_io);
579
0
    if ( mmio_mfn
580
0
         && !(level == 1 && (!shadow_mode_refcounts(d)
581
0
                             || p2mt == p2m_mmio_direct)) )
582
0
    {
583
0
        ASSERT((ft == ft_prefetch));
584
0
        *sp = shadow_l1e_empty();
585
0
        goto done;
586
0
    }
587
0
588
0
    // Propagate bits from the guest to the shadow.
589
0
    // Some of these may be overwritten, below.
590
0
    // Since we know the guest's PRESENT bit is set, we also set the shadow's
591
0
    // SHADOW_PRESENT bit.
592
0
    //
593
0
    pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER |
594
0
                       _PAGE_RW | _PAGE_PRESENT);
595
0
    if ( guest_nx_enabled(v) )
596
0
        pass_thru_flags |= _PAGE_NX_BIT;
597
0
    if ( level == 1 && !shadow_mode_refcounts(d) && mmio_mfn )
598
0
        pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
599
0
    sflags = gflags & pass_thru_flags;
600
0
601
0
    /*
602
0
     * For HVM domains with direct access to MMIO areas, set the correct
603
0
     * caching attributes in the shadows to match what was asked for.
604
0
     */
605
0
    if ( (level == 1) && is_hvm_domain(d) &&
606
0
         !is_xen_heap_mfn(mfn_x(target_mfn)) )
607
0
    {
608
0
        int type;
609
0
610
0
        ASSERT(!(sflags & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)));
611
0
612
0
        /* compute the PAT index for shadow page entry when VT-d is enabled
613
0
         * and device assigned.
614
0
         * 1) direct MMIO: compute the PAT index with gMTRR=UC and gPAT.
615
0
         * 2) if enables snoop control, compute the PAT index as WB.
616
0
         * 3) if disables snoop control, compute the PAT index with
617
0
         *    gMTRR and gPAT.
618
0
         */
619
0
        if ( !mmio_mfn &&
620
0
             (type = hvm_get_mem_pinned_cacheattr(d, target_gfn, 0)) >= 0 )
621
0
            sflags |= pat_type_2_pte_flags(type);
622
0
        else if ( d->arch.hvm_domain.is_in_uc_mode )
623
0
            sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
624
0
        else
625
0
            if ( iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
626
0
            {
627
0
                if ( p2mt == p2m_mmio_direct )
628
0
                    sflags |= get_pat_flags(v,
629
0
                            gflags,
630
0
                            gfn_to_paddr(target_gfn),
631
0
                            pfn_to_paddr(mfn_x(target_mfn)),
632
0
                            MTRR_TYPE_UNCACHABLE);
633
0
                else if ( iommu_snoop )
634
0
                    sflags |= pat_type_2_pte_flags(PAT_TYPE_WRBACK);
635
0
                else
636
0
                    sflags |= get_pat_flags(v,
637
0
                            gflags,
638
0
                            gfn_to_paddr(target_gfn),
639
0
                            pfn_to_paddr(mfn_x(target_mfn)),
640
0
                            NO_HARDCODE_MEM_TYPE);
641
0
            }
642
0
    }
643
0
644
0
    // Set the A&D bits for higher level shadows.
645
0
    // Higher level entries do not, strictly speaking, have dirty bits, but
646
0
    // since we use shadow linear tables, each of these entries may, at some
647
0
    // point in time, also serve as a shadow L1 entry.
648
0
    // By setting both the A&D bits in each of these, we eliminate the burden
649
0
    // on the hardware to update these bits on initial accesses.
650
0
    //
651
0
    if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
652
0
        sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
653
0
654
0
    // If the A or D bit has not yet been set in the guest, then we must
655
0
    // prevent the corresponding kind of access.
656
0
    //
657
0
    if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
658
0
        sflags &= ~_PAGE_PRESENT;
659
0
660
0
    /* D bits exist in L1es and PSE L2es */
661
0
    if ( unlikely(((level == 1) ||
662
0
                   ((level == 2) &&
663
0
                    (gflags & _PAGE_PSE) &&
664
0
                    guest_can_use_l2_superpages(v)))
665
0
                  && !(gflags & _PAGE_DIRTY)) )
666
0
        sflags &= ~_PAGE_RW;
667
0
668
0
    // shadow_mode_log_dirty support
669
0
    //
670
0
    // Only allow the guest write access to a page a) on a demand fault,
671
0
    // or b) if the page is already marked as dirty.
672
0
    //
673
0
    // (We handle log-dirty entirely inside the shadow code, without using the
674
0
    // p2m_ram_logdirty p2m type: only HAP uses that.)
675
0
    if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
676
0
    {
677
0
        if ( mfn_valid(target_mfn) ) {
678
0
            if ( ft & FETCH_TYPE_WRITE )
679
0
                paging_mark_dirty(d, target_mfn);
680
0
            else if ( !paging_mfn_is_dirty(d, target_mfn) )
681
0
                sflags &= ~_PAGE_RW;
682
0
        }
683
0
    }
684
0
685
0
    if ( unlikely((level == 1) && dirty_vram
686
0
            && dirty_vram->last_dirty == -1
687
0
            && gfn_x(target_gfn) >= dirty_vram->begin_pfn
688
0
            && gfn_x(target_gfn) < dirty_vram->end_pfn) )
689
0
    {
690
0
        if ( ft & FETCH_TYPE_WRITE )
691
0
            dirty_vram->last_dirty = NOW();
692
0
        else
693
0
            sflags &= ~_PAGE_RW;
694
0
    }
695
0
696
0
    /* Read-only memory */
697
0
    if ( p2m_is_readonly(p2mt) )
698
0
        sflags &= ~_PAGE_RW;
699
0
    else if ( p2mt == p2m_mmio_direct &&
700
0
              rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn)) )
701
0
    {
702
0
        sflags &= ~(_PAGE_RW | _PAGE_PAT);
703
0
        sflags |= _PAGE_PCD | _PAGE_PWT;
704
0
    }
705
0
706
0
    // protect guest page tables
707
0
    //
708
0
    if ( unlikely((level == 1)
709
0
                  && sh_mfn_is_a_page_table(target_mfn)
710
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
711
0
                  /* Unless the page is out of sync and the guest is
712
0
                     writing to it. */
713
0
                  && !(mfn_oos_may_write(target_mfn)
714
0
                       && (ft == ft_demand_write))
715
0
#endif /* OOS */
716
0
                  ) )
717
0
        sflags &= ~_PAGE_RW;
718
0
719
0
    // PV guests in 64-bit mode use two different page tables for user vs
720
0
    // supervisor permissions, making the guest's _PAGE_USER bit irrelevant.
721
0
    // It is always shadowed as present...
722
0
    if ( (GUEST_PAGING_LEVELS == 4) && !is_pv_32bit_domain(d)
723
0
         && is_pv_domain(d) )
724
0
    {
725
0
        sflags |= _PAGE_USER;
726
0
    }
727
0
728
0
    *sp = shadow_l1e_from_mfn(target_mfn, sflags);
729
0
730
0
 done:
731
0
    SHADOW_DEBUG(PROPAGATE,
732
0
                 "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
733
0
                 fetch_type_names[ft], level, guest_entry.l1, sp->l1);
734
0
}
735
736
737
/* These four wrappers give us a little bit of type-safety back around
738
 * the use of void-* pointers and intpte types in _sh_propagate(), and
739
 * allow the compiler to optimize out some level checks. */
740
741
#if GUEST_PAGING_LEVELS >= 4
742
static void
743
l4e_propagate_from_guest(struct vcpu *v,
744
                         guest_l4e_t gl4e,
745
                         mfn_t sl3mfn,
746
                         shadow_l4e_t *sl4e,
747
                         fetch_type_t ft)
748
0
{
749
0
    if ( !mfn_eq(sl3mfn, INVALID_MFN) &&
750
0
         (guest_l4e_get_flags(gl4e) & _PAGE_PRESENT) )
751
0
        ASSERT(!guest_l4e_rsvd_bits(v, gl4e));
752
0
753
0
    _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
754
0
}
755
756
static void
757
l3e_propagate_from_guest(struct vcpu *v,
758
                         guest_l3e_t gl3e,
759
                         mfn_t sl2mfn,
760
                         shadow_l3e_t *sl3e,
761
                         fetch_type_t ft)
762
0
{
763
0
    if ( !mfn_eq(sl2mfn, INVALID_MFN) &&
764
0
         (guest_l3e_get_flags(gl3e) & _PAGE_PRESENT) )
765
0
        ASSERT(!guest_l3e_rsvd_bits(v, gl3e));
766
0
767
0
    _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
768
0
}
769
#endif // GUEST_PAGING_LEVELS >= 4
770
771
static void
772
l2e_propagate_from_guest(struct vcpu *v,
773
                         guest_l2e_t gl2e,
774
                         mfn_t sl1mfn,
775
                         shadow_l2e_t *sl2e,
776
                         fetch_type_t ft)
777
0
{
778
0
    if ( !mfn_eq(sl1mfn, INVALID_MFN) &&
779
0
         (guest_l2e_get_flags(gl2e) & _PAGE_PRESENT) )
780
0
        ASSERT(!guest_l2e_rsvd_bits(v, gl2e));
781
0
782
0
    _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
783
0
}
784
785
static void
786
l1e_propagate_from_guest(struct vcpu *v,
787
                         guest_l1e_t gl1e,
788
                         mfn_t gmfn,
789
                         shadow_l1e_t *sl1e,
790
                         fetch_type_t ft,
791
                         p2m_type_t p2mt)
792
0
{
793
0
    if ( !mfn_eq(gmfn, INVALID_MFN) &&
794
0
         (guest_l1e_get_flags(gl1e) & _PAGE_PRESENT) )
795
0
        ASSERT(!guest_l1e_rsvd_bits(v, gl1e));
796
0
797
0
    _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt);
798
0
}
799
800
801
/**************************************************************************/
802
/* These functions update shadow entries (and do bookkeeping on the shadow
803
 * tables they are in).  It is intended that they are the only
804
 * functions which ever write (non-zero) data onto a shadow page.
805
 */
806
807
static inline void safe_write_entry(void *dst, void *src)
808
/* Copy one PTE safely when processors might be running on the
809
 * destination pagetable.   This does *not* give safety against
810
 * concurrent writes (that's what the paging lock is for), just
811
 * stops the hardware picking up partially written entries. */
812
0
{
813
0
    volatile unsigned long *d = dst;
814
0
    unsigned long *s = src;
815
0
    ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
816
0
    /* In 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
817
0
     * which will be an atomic write, since the entry is aligned. */
818
0
    BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
819
0
    *d = *s;
820
0
}
821
822
823
static inline void
824
shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
825
/* This function does the actual writes to shadow pages.
826
 * It must not be called directly, since it doesn't do the bookkeeping
827
 * that shadow_set_l*e() functions do. */
828
0
{
829
0
    shadow_l1e_t *dst = d;
830
0
    shadow_l1e_t *src = s;
831
0
    void *map = NULL;
832
0
    int i;
833
0
834
0
    /* Because we mirror access rights at all levels in the shadow, an
835
0
     * l2 (or higher) entry with the RW bit cleared will leave us with
836
0
     * no write access through the linear map.
837
0
     * We detect that by writing to the shadow with copy_to_user() and
838
0
     * using map_domain_page() to get a writeable mapping if we need to. */
839
0
    if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 )
840
0
    {
841
0
        perfc_incr(shadow_linear_map_failed);
842
0
        map = map_domain_page(mfn);
843
0
        dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
844
0
    }
845
0
846
0
847
0
    for ( i = 0; i < entries; i++ )
848
0
        safe_write_entry(dst++, src++);
849
0
850
0
    if ( map != NULL ) unmap_domain_page(map);
851
0
}
852
853
/* type is only used to distinguish grant map pages from ordinary RAM
854
 * i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw.  */
855
static int inline
856
shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
857
0
{
858
0
    int res;
859
0
    mfn_t mfn;
860
0
    struct domain *owner;
861
0
862
0
    ASSERT(!sh_l1e_is_magic(sl1e));
863
0
864
0
    if ( !shadow_mode_refcounts(d) )
865
0
        return 1;
866
0
867
0
    res = get_page_from_l1e(sl1e, d, d);
868
0
869
0
    // If a privileged domain is attempting to install a map of a page it does
870
0
    // not own, we let it succeed anyway.
871
0
    //
872
0
    if ( unlikely(res < 0) &&
873
0
         !shadow_mode_translate(d) &&
874
0
         mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) &&
875
0
         (owner = page_get_owner(mfn_to_page(mfn))) &&
876
0
         (d != owner) )
877
0
    {
878
0
        res = xsm_priv_mapping(XSM_TARGET, d, owner);
879
0
        if ( !res ) {
880
0
            res = get_page_from_l1e(sl1e, d, owner);
881
0
            SHADOW_PRINTK("privileged domain %d installs map of mfn %"PRI_mfn" "
882
0
                           "which is owned by d%d: %s\n",
883
0
                           d->domain_id, mfn_x(mfn), owner->domain_id,
884
0
                           res >= 0 ? "success" : "failed");
885
0
        }
886
0
    }
887
0
888
0
    /* Okay, it might still be a grant mapping PTE.  Try it. */
889
0
    if ( unlikely(res < 0) &&
890
0
         (type == p2m_grant_map_rw ||
891
0
          (type == p2m_grant_map_ro &&
892
0
           !(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) )
893
0
    {
894
0
        /* It's a grant mapping.  The grant table implementation will
895
0
           already have checked that we're supposed to have access, so
896
0
           we can just grab a reference directly. */
897
0
        mfn = shadow_l1e_get_mfn(sl1e);
898
0
        if ( mfn_valid(mfn) )
899
0
            res = get_page_from_l1e(sl1e, d, page_get_owner(mfn_to_page(mfn)));
900
0
    }
901
0
902
0
    if ( unlikely(res < 0) )
903
0
    {
904
0
        perfc_incr(shadow_get_page_fail);
905
0
        SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
906
0
    }
907
0
908
0
    return res;
909
0
}
910
911
static void inline
912
shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
913
0
{
914
0
    if ( !shadow_mode_refcounts(d) )
915
0
        return;
916
0
917
0
    put_page_from_l1e(sl1e, d);
918
0
}
919
920
#if GUEST_PAGING_LEVELS >= 4
921
static int shadow_set_l4e(struct domain *d,
922
                          shadow_l4e_t *sl4e,
923
                          shadow_l4e_t new_sl4e,
924
                          mfn_t sl4mfn)
925
0
{
926
0
    int flags = 0, ok;
927
0
    shadow_l4e_t old_sl4e;
928
0
    paddr_t paddr;
929
0
    ASSERT(sl4e != NULL);
930
0
    old_sl4e = *sl4e;
931
0
932
0
    if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
933
0
934
0
    paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
935
0
             | (((unsigned long)sl4e) & ~PAGE_MASK));
936
0
937
0
    if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
938
0
    {
939
0
        /* About to install a new reference */
940
0
        mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
941
0
        ok = sh_get_ref(d, sl3mfn, paddr);
942
0
        /* Are we pinning l3 shadows to handle wierd linux behaviour? */
943
0
        if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) )
944
0
            ok |= sh_pin(d, sl3mfn);
945
0
        if ( !ok )
946
0
        {
947
0
            domain_crash(d);
948
0
            return SHADOW_SET_ERROR;
949
0
        }
950
0
    }
951
0
952
0
    /* Write the new entry */
953
0
    shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
954
0
    flags |= SHADOW_SET_CHANGED;
955
0
956
0
    if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
957
0
    {
958
0
        /* We lost a reference to an old mfn. */
959
0
        mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
960
0
        if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
961
0
             || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e),
962
0
                                          shadow_l4e_get_flags(new_sl4e)) )
963
0
        {
964
0
            flags |= SHADOW_SET_FLUSH;
965
0
        }
966
0
        sh_put_ref(d, osl3mfn, paddr);
967
0
    }
968
0
    return flags;
969
0
}
970
971
static int shadow_set_l3e(struct domain *d,
972
                          shadow_l3e_t *sl3e,
973
                          shadow_l3e_t new_sl3e,
974
                          mfn_t sl3mfn)
975
0
{
976
0
    int flags = 0;
977
0
    shadow_l3e_t old_sl3e;
978
0
    paddr_t paddr;
979
0
    ASSERT(sl3e != NULL);
980
0
    old_sl3e = *sl3e;
981
0
982
0
    if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
983
0
984
0
    paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
985
0
             | (((unsigned long)sl3e) & ~PAGE_MASK));
986
0
987
0
    if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
988
0
    {
989
0
        /* About to install a new reference */
990
0
        if ( !sh_get_ref(d, shadow_l3e_get_mfn(new_sl3e), paddr) )
991
0
        {
992
0
            domain_crash(d);
993
0
            return SHADOW_SET_ERROR;
994
0
        }
995
0
    }
996
0
997
0
    /* Write the new entry */
998
0
    shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
999
0
    flags |= SHADOW_SET_CHANGED;
1000
0
1001
0
    if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
1002
0
    {
1003
0
        /* We lost a reference to an old mfn. */
1004
0
        mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
1005
0
        if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
1006
0
             !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e),
1007
0
                                       shadow_l3e_get_flags(new_sl3e)) )
1008
0
        {
1009
0
            flags |= SHADOW_SET_FLUSH;
1010
0
        }
1011
0
        sh_put_ref(d, osl2mfn, paddr);
1012
0
    }
1013
0
    return flags;
1014
0
}
1015
#endif /* GUEST_PAGING_LEVELS >= 4 */
1016
1017
static int shadow_set_l2e(struct domain *d,
1018
                          shadow_l2e_t *sl2e,
1019
                          shadow_l2e_t new_sl2e,
1020
                          mfn_t sl2mfn)
1021
0
{
1022
0
    int flags = 0;
1023
0
    shadow_l2e_t old_sl2e;
1024
0
    paddr_t paddr;
1025
0
1026
0
#if GUEST_PAGING_LEVELS == 2
1027
0
    /* In 2-on-3 we work with pairs of l2es pointing at two-page
1028
0
     * shadows.  Reference counting and up-pointers track from the first
1029
0
     * page of the shadow to the first l2e, so make sure that we're
1030
0
     * working with those:
1031
0
     * Start with a pair of identical entries */
1032
0
    shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
1033
0
    /* Align the pointer down so it's pointing at the first of the pair */
1034
0
    sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
1035
0
#endif
1036
0
1037
0
    ASSERT(sl2e != NULL);
1038
0
    old_sl2e = *sl2e;
1039
0
1040
0
    if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
1041
0
1042
0
    paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
1043
0
             | (((unsigned long)sl2e) & ~PAGE_MASK));
1044
0
1045
0
    if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
1046
0
    {
1047
0
        mfn_t sl1mfn = shadow_l2e_get_mfn(new_sl2e);
1048
0
        ASSERT(mfn_to_page(sl1mfn)->u.sh.head);
1049
0
1050
0
        /* About to install a new reference */
1051
0
        if ( !sh_get_ref(d, sl1mfn, paddr) )
1052
0
        {
1053
0
            domain_crash(d);
1054
0
            return SHADOW_SET_ERROR;
1055
0
        }
1056
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
1057
0
        {
1058
0
            struct page_info *sp = mfn_to_page(sl1mfn);
1059
0
            mfn_t gl1mfn;
1060
0
1061
0
            ASSERT(sp->u.sh.head);
1062
0
            gl1mfn = backpointer(sp);
1063
0
            /* If the shadow is a fl1 then the backpointer contains
1064
0
               the GFN instead of the GMFN, and it's definitely not
1065
0
               OOS. */
1066
0
            if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
1067
0
                 && mfn_is_out_of_sync(gl1mfn) )
1068
0
                sh_resync(d, gl1mfn);
1069
0
        }
1070
0
#endif
1071
0
#if GUEST_PAGING_LEVELS == 2
1072
0
        /* Update the second entry to point tio the second half of the l1 */
1073
0
        sl1mfn = sh_next_page(sl1mfn);
1074
0
        pair[1] = shadow_l2e_from_mfn(sl1mfn, shadow_l2e_get_flags(new_sl2e));
1075
0
#endif
1076
0
    }
1077
0
1078
0
    /* Write the new entry */
1079
0
#if GUEST_PAGING_LEVELS == 2
1080
0
    shadow_write_entries(sl2e, &pair, 2, sl2mfn);
1081
0
#else /* normal case */
1082
    shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
1083
#endif
1084
0
    flags |= SHADOW_SET_CHANGED;
1085
0
1086
0
    if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
1087
0
    {
1088
0
        /* We lost a reference to an old mfn. */
1089
0
        mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
1090
0
        if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
1091
0
             !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e),
1092
0
                                       shadow_l2e_get_flags(new_sl2e)) )
1093
0
        {
1094
0
            flags |= SHADOW_SET_FLUSH;
1095
0
        }
1096
0
        sh_put_ref(d, osl1mfn, paddr);
1097
0
    }
1098
0
    return flags;
1099
0
}
1100
1101
static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
1102
                                       shadow_l1e_t *sl1e,
1103
                                       mfn_t sl1mfn,
1104
                                       struct domain *d)
1105
0
{
1106
0
    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
1107
0
    int flags = shadow_l1e_get_flags(new_sl1e);
1108
0
    unsigned long gfn;
1109
0
    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
1110
0
1111
0
    if ( !dirty_vram         /* tracking disabled? */
1112
0
         || !(flags & _PAGE_RW) /* read-only mapping? */
1113
0
         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
1114
0
        return;
1115
0
1116
0
    gfn = mfn_to_gfn(d, mfn);
1117
0
    /* Page sharing not supported on shadow PTs */
1118
0
    BUG_ON(SHARED_M2P(gfn));
1119
0
1120
0
    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
1121
0
    {
1122
0
        unsigned long i = gfn - dirty_vram->begin_pfn;
1123
0
        struct page_info *page = mfn_to_page(mfn);
1124
0
1125
0
        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
1126
0
            /* Initial guest reference, record it */
1127
0
            dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
1128
0
                | ((unsigned long)sl1e & ~PAGE_MASK);
1129
0
    }
1130
0
}
1131
1132
static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
1133
                                       shadow_l1e_t *sl1e,
1134
                                       mfn_t sl1mfn,
1135
                                       struct domain *d)
1136
0
{
1137
0
    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
1138
0
    int flags = shadow_l1e_get_flags(old_sl1e);
1139
0
    unsigned long gfn;
1140
0
    struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
1141
0
1142
0
    if ( !dirty_vram         /* tracking disabled? */
1143
0
         || !(flags & _PAGE_RW) /* read-only mapping? */
1144
0
         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
1145
0
        return;
1146
0
1147
0
    gfn = mfn_to_gfn(d, mfn);
1148
0
    /* Page sharing not supported on shadow PTs */
1149
0
    BUG_ON(SHARED_M2P(gfn));
1150
0
1151
0
    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
1152
0
    {
1153
0
        unsigned long i = gfn - dirty_vram->begin_pfn;
1154
0
        struct page_info *page = mfn_to_page(mfn);
1155
0
        int dirty = 0;
1156
0
        paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
1157
0
            | ((unsigned long)sl1e & ~PAGE_MASK);
1158
0
1159
0
        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
1160
0
        {
1161
0
            /* Last reference */
1162
0
            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
1163
0
                /* We didn't know it was that one, let's say it is dirty */
1164
0
                dirty = 1;
1165
0
            }
1166
0
            else
1167
0
            {
1168
0
                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
1169
0
                dirty_vram->sl1ma[i] = INVALID_PADDR;
1170
0
                if ( flags & _PAGE_DIRTY )
1171
0
                    dirty = 1;
1172
0
            }
1173
0
        }
1174
0
        else
1175
0
        {
1176
0
            /* We had more than one reference, just consider the page dirty. */
1177
0
            dirty = 1;
1178
0
            /* Check that it's not the one we recorded. */
1179
0
            if ( dirty_vram->sl1ma[i] == sl1ma )
1180
0
            {
1181
0
                /* Too bad, we remembered the wrong one... */
1182
0
                dirty_vram->sl1ma[i] = INVALID_PADDR;
1183
0
            }
1184
0
            else
1185
0
            {
1186
0
                /* Ok, our recorded sl1e is still pointing to this page, let's
1187
0
                 * just hope it will remain. */
1188
0
            }
1189
0
        }
1190
0
        if ( dirty )
1191
0
        {
1192
0
            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
1193
0
            dirty_vram->last_dirty = NOW();
1194
0
        }
1195
0
    }
1196
0
}
1197
1198
static int shadow_set_l1e(struct domain *d,
1199
                          shadow_l1e_t *sl1e,
1200
                          shadow_l1e_t new_sl1e,
1201
                          p2m_type_t new_type,
1202
                          mfn_t sl1mfn)
1203
0
{
1204
0
    int flags = 0;
1205
0
    shadow_l1e_t old_sl1e;
1206
0
#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
1207
0
    mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
1208
0
#endif
1209
0
    ASSERT(sl1e != NULL);
1210
0
1211
0
#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
1212
0
    if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn)
1213
0
         && ((shadow_l1e_get_flags(new_sl1e) & (_PAGE_RW|_PAGE_PRESENT))
1214
0
             == (_PAGE_RW|_PAGE_PRESENT)) )
1215
0
        oos_fixup_add(d, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e));
1216
0
#endif
1217
0
1218
0
    old_sl1e = *sl1e;
1219
0
1220
0
    if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
1221
0
1222
0
    if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT)
1223
0
         && !sh_l1e_is_magic(new_sl1e) )
1224
0
    {
1225
0
        /* About to install a new reference */
1226
0
        if ( shadow_mode_refcounts(d) )
1227
0
        {
1228
0
#define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
1229
0
            int rc;
1230
0
1231
0
            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
1232
0
            switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) )
1233
0
            {
1234
0
            default:
1235
0
                /* Doesn't look like a pagetable. */
1236
0
                flags |= SHADOW_SET_ERROR;
1237
0
                new_sl1e = shadow_l1e_empty();
1238
0
                break;
1239
0
            case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE:
1240
0
                ASSERT(!(rc & ~PAGE_FLIPPABLE));
1241
0
                new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc);
1242
0
                /* fall through */
1243
0
            case 0:
1244
0
                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
1245
0
                break;
1246
0
            }
1247
0
#undef PAGE_FLIPPABLE
1248
0
        }
1249
0
    }
1250
0
1251
0
    /* Write the new entry */
1252
0
    shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
1253
0
    flags |= SHADOW_SET_CHANGED;
1254
0
1255
0
    if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT)
1256
0
         && !sh_l1e_is_magic(old_sl1e) )
1257
0
    {
1258
0
        /* We lost a reference to an old mfn. */
1259
0
        /* N.B. Unlike higher-level sets, never need an extra flush
1260
0
         * when writing an l1e.  Because it points to the same guest frame
1261
0
         * as the guest l1e did, it's the guest's responsibility to
1262
0
         * trigger a flush later. */
1263
0
        if ( shadow_mode_refcounts(d) )
1264
0
        {
1265
0
            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
1266
0
            shadow_put_page_from_l1e(old_sl1e, d);
1267
0
            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
1268
0
        }
1269
0
    }
1270
0
    return flags;
1271
0
}
1272
1273
1274
/**************************************************************************/
1275
/* Macros to walk pagetables.  These take the shadow of a pagetable and
1276
 * walk every "interesting" entry.  That is, they don't touch Xen mappings,
1277
 * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every
1278
 * second entry (since pairs of entries are managed together). For multi-page
1279
 * shadows they walk all pages.
1280
 *
1281
 * Arguments are an MFN, the variable to point to each entry, a variable
1282
 * to indicate that we are done (we will shortcut to the end of the scan
1283
 * when _done != 0), a variable to indicate that we should avoid Xen mappings,
1284
 * and the code.
1285
 *
1286
 * WARNING: These macros have side-effects.  They change the values of both
1287
 * the pointer and the MFN. */
1288
1289
static inline void increment_ptr_to_guest_entry(void *ptr)
1290
0
{
1291
0
    if ( ptr )
1292
0
    {
1293
0
        guest_l1e_t **entry = ptr;
1294
0
        (*entry)++;
1295
0
    }
1296
0
}
1297
1298
/* All kinds of l1: touch all entries */
1299
0
#define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)        \
1300
0
do {                                                                    \
1301
0
    int _i;                                                             \
1302
0
    shadow_l1e_t *_sp = map_domain_page((_sl1mfn));                     \
1303
0
    ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow  \
1304
0
           || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\
1305
0
    for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
1306
0
    {                                                                   \
1307
0
        (_sl1e) = _sp + _i;                                             \
1308
0
        if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT )           \
1309
0
            {_code}                                                     \
1310
0
        if ( _done ) break;                                             \
1311
0
        increment_ptr_to_guest_entry(_gl1p);                            \
1312
0
    }                                                                   \
1313
0
    unmap_domain_page(_sp);                                             \
1314
0
} while (0)
1315
1316
/* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
1317
#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1318
0
#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done,  _code)        \
1319
0
do {                                                                    \
1320
0
    int __done = 0;                                                     \
1321
0
    _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                          \
1322
0
                         ({ (__done = _done); }), _code);               \
1323
0
    _sl1mfn = sh_next_page(_sl1mfn);                                    \
1324
0
    if ( !__done )                                                      \
1325
0
        _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                      \
1326
0
                             ({ (__done = _done); }), _code);           \
1327
0
} while (0)
1328
#else /* Everything else; l1 shadows are only one page */
1329
#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)         \
1330
0
       _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
1331
#endif
1332
1333
1334
#if GUEST_PAGING_LEVELS == 2
1335
1336
/* 32-bit l2 on PAE/64: four pages, touch every second entry */
1337
0
#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code)     \
1338
0
do {                                                                      \
1339
0
    int _i, _j, __done = 0;                                               \
1340
0
    ASSERT(shadow_mode_external(_dom));                                   \
1341
0
    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);      \
1342
0
    for ( _j = 0; _j < 4 && !__done; _j++ )                               \
1343
0
    {                                                                     \
1344
0
        shadow_l2e_t *_sp = map_domain_page(_sl2mfn);                     \
1345
0
        for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 )         \
1346
0
        {                                                                 \
1347
0
            (_sl2e) = _sp + _i;                                           \
1348
0
            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )         \
1349
0
                {_code}                                                   \
1350
0
            if ( (__done = (_done)) ) break;                              \
1351
0
            increment_ptr_to_guest_entry(_gl2p);                          \
1352
0
        }                                                                 \
1353
0
        unmap_domain_page(_sp);                                           \
1354
0
        if ( _j < 3 ) _sl2mfn = sh_next_page(_sl2mfn);                    \
1355
0
    }                                                                     \
1356
0
} while (0)
1357
1358
#elif GUEST_PAGING_LEVELS == 3
1359
1360
/* PAE: touch all entries */
1361
0
#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code)      \
1362
0
do {                                                                       \
1363
0
    int _i;                                                                \
1364
0
    shadow_l2e_t *_sp = map_domain_page((_sl2mfn));                        \
1365
0
    ASSERT(shadow_mode_external(_dom));                                    \
1366
0
    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow        \
1367
0
           || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);  \
1368
0
    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
1369
0
    {                                                                      \
1370
0
        (_sl2e) = _sp + _i;                                                \
1371
0
        if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )              \
1372
0
            {_code}                                                        \
1373
0
        if ( _done ) break;                                                \
1374
0
        increment_ptr_to_guest_entry(_gl2p);                               \
1375
0
    }                                                                      \
1376
0
    unmap_domain_page(_sp);                                                \
1377
0
} while (0)
1378
1379
#else
1380
1381
/* 64-bit l2: touch all entries except for PAE compat guests. */
1382
0
#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code)       \
1383
0
do {                                                                        \
1384
0
    int _i;                                                                 \
1385
0
    int _xen = !shadow_mode_external(_dom);                                 \
1386
0
    shadow_l2e_t *_sp = map_domain_page((_sl2mfn));                         \
1387
0
    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\
1388
0
           mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\
1389
0
    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                  \
1390
0
    {                                                                       \
1391
0
        if ( (!(_xen))                                                      \
1392
0
             || !is_pv_32bit_domain(_dom)                                   \
1393
0
             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow    \
1394
0
             || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) )           \
1395
0
        {                                                                   \
1396
0
            (_sl2e) = _sp + _i;                                             \
1397
0
            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )           \
1398
0
                {_code}                                                     \
1399
0
            if ( _done ) break;                                             \
1400
0
            increment_ptr_to_guest_entry(_gl2p);                            \
1401
0
        }                                                                   \
1402
0
    }                                                                       \
1403
0
    unmap_domain_page(_sp);                                                 \
1404
0
} while (0)
1405
1406
#endif /* different kinds of l2 */
1407
1408
#if GUEST_PAGING_LEVELS == 4
1409
1410
/* 64-bit l3: touch all entries */
1411
0
#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)         \
1412
0
do {                                                                    \
1413
0
    int _i;                                                             \
1414
0
    shadow_l3e_t *_sp = map_domain_page((_sl3mfn));                     \
1415
0
    ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\
1416
0
    for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
1417
0
    {                                                                   \
1418
0
        (_sl3e) = _sp + _i;                                             \
1419
0
        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
1420
0
            {_code}                                                     \
1421
0
        if ( _done ) break;                                             \
1422
0
        increment_ptr_to_guest_entry(_gl3p);                            \
1423
0
    }                                                                   \
1424
0
    unmap_domain_page(_sp);                                             \
1425
0
} while (0)
1426
1427
/* 64-bit l4: avoid Xen mappings */
1428
0
#define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _dom, _code)   \
1429
0
do {                                                                    \
1430
0
    shadow_l4e_t *_sp = map_domain_page((_sl4mfn));                     \
1431
0
    int _xen = !shadow_mode_external(_dom);                             \
1432
0
    int _i;                                                             \
1433
0
    ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\
1434
0
    for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
1435
0
    {                                                                   \
1436
0
        if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) )                  \
1437
0
        {                                                               \
1438
0
            (_sl4e) = _sp + _i;                                         \
1439
0
            if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT )       \
1440
0
                {_code}                                                 \
1441
0
            if ( _done ) break;                                         \
1442
0
        }                                                               \
1443
0
        increment_ptr_to_guest_entry(_gl4p);                            \
1444
0
    }                                                                   \
1445
0
    unmap_domain_page(_sp);                                             \
1446
0
} while (0)
1447
1448
#endif
1449
1450
1451
/**************************************************************************/
1452
/* Create a shadow of a given guest page.
1453
 */
1454
static mfn_t
1455
sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
1456
0
{
1457
0
    struct domain *d = v->domain;
1458
0
    mfn_t smfn = shadow_alloc(d, shadow_type, mfn_x(gmfn));
1459
0
    SHADOW_DEBUG(MAKE_SHADOW, "(%"PRI_mfn", %u)=>%"PRI_mfn"\n",
1460
0
                  mfn_x(gmfn), shadow_type, mfn_x(smfn));
1461
0
1462
0
    if ( sh_type_has_up_pointer(d, shadow_type) )
1463
0
        /* Lower-level shadow, not yet linked form a higher level */
1464
0
        mfn_to_page(smfn)->up = 0;
1465
0
1466
0
#if GUEST_PAGING_LEVELS == 4
1467
#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
1468
    if ( shadow_type == SH_type_l4_64_shadow &&
1469
         unlikely(d->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) )
1470
    {
1471
        /* We're shadowing a new l4, but we've been assuming the guest uses
1472
         * only one l4 per vcpu and context switches using an l4 entry.
1473
         * Count the number of active l4 shadows.  If there are enough
1474
         * of them, decide that this isn't an old linux guest, and stop
1475
         * pinning l3es.  This is not very quick but it doesn't happen
1476
         * very often. */
1477
        struct page_info *sp, *t;
1478
        struct vcpu *v2;
1479
        int l4count = 0, vcpus = 0;
1480
        page_list_for_each(sp, &d->arch.paging.shadow.pinned_shadows)
1481
        {
1482
            if ( sp->u.sh.type == SH_type_l4_64_shadow )
1483
                l4count++;
1484
        }
1485
        for_each_vcpu ( d, v2 )
1486
            vcpus++;
1487
        if ( l4count > 2 * vcpus )
1488
        {
1489
            /* Unpin all the pinned l3 tables, and don't pin any more. */
1490
            page_list_for_each_safe(sp, t, &d->arch.paging.shadow.pinned_shadows)
1491
            {
1492
                if ( sp->u.sh.type == SH_type_l3_64_shadow )
1493
                    sh_unpin(d, page_to_mfn(sp));
1494
            }
1495
            d->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL;
1496
            sh_reset_l3_up_pointers(v);
1497
        }
1498
    }
1499
#endif
1500
#endif
1501
0
1502
0
    // Create the Xen mappings...
1503
0
    if ( !shadow_mode_external(d) )
1504
0
    {
1505
0
        switch (shadow_type)
1506
0
        {
1507
0
#if GUEST_PAGING_LEVELS == 4
1508
        case SH_type_l4_shadow:
1509
        {
1510
            shadow_l4e_t *l4t = map_domain_page(smfn);
1511
1512
            BUILD_BUG_ON(sizeof(l4_pgentry_t) != sizeof(shadow_l4e_t));
1513
1514
            init_xen_l4_slots(l4t, gmfn, d, smfn, (!is_pv_32bit_domain(d) &&
1515
                                                   VM_ASSIST(d, m2p_strict)));
1516
            unmap_domain_page(l4t);
1517
        }
1518
        break;
1519
#endif
1520
0
#if GUEST_PAGING_LEVELS >= 3
1521
        case SH_type_l2h_shadow:
1522
            BUILD_BUG_ON(sizeof(l2_pgentry_t) != sizeof(shadow_l2e_t));
1523
            if ( is_pv_32bit_domain(d) )
1524
            {
1525
                shadow_l2e_t *l2t = map_domain_page(smfn);
1526
1527
                init_xen_pae_l2_slots(l2t, d);
1528
                unmap_domain_page(l2t);
1529
            }
1530
            break;
1531
#endif
1532
0
        default: /* Do nothing */ break;
1533
0
        }
1534
0
    }
1535
0
1536
0
    shadow_promote(d, gmfn, shadow_type);
1537
0
    set_shadow_status(d, gmfn, shadow_type, smfn);
1538
0
1539
0
    return smfn;
1540
0
}
1541
1542
/* Make a splintered superpage shadow */
1543
static mfn_t
1544
make_fl1_shadow(struct domain *d, gfn_t gfn)
1545
0
{
1546
0
    mfn_t smfn = shadow_alloc(d, SH_type_fl1_shadow, gfn_x(gfn));
1547
0
1548
0
    SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n",
1549
0
                  gfn_x(gfn), mfn_x(smfn));
1550
0
1551
0
    set_fl1_shadow_status(d, gfn, smfn);
1552
0
    return smfn;
1553
0
}
1554
1555
1556
#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
1557
mfn_t
1558
sh_make_monitor_table(struct vcpu *v)
1559
0
{
1560
0
    struct domain *d = v->domain;
1561
0
1562
0
    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
1563
0
1564
0
    /* Guarantee we can get the memory we need */
1565
0
    shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS);
1566
0
1567
0
    {
1568
0
        mfn_t m4mfn;
1569
0
        l4_pgentry_t *l4e;
1570
0
1571
0
        m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1572
0
        mfn_to_page(m4mfn)->shadow_flags = 4;
1573
0
1574
0
        l4e = map_domain_page(m4mfn);
1575
0
1576
0
        /*
1577
0
         * Create a self-linear mapping, but no shadow-linear mapping.  A
1578
0
         * shadow-linear mapping will either be inserted below when creating
1579
0
         * lower level monitor tables, or later in sh_update_cr3().
1580
0
         */
1581
0
        init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
1582
0
1583
0
#if SHADOW_PAGING_LEVELS < 4
1584
        {
1585
            mfn_t m3mfn, m2mfn;
1586
            l3_pgentry_t *l3e;
1587
            /* Install an l3 table and an l2 table that will hold the shadow
1588
             * linear map entries.  This overrides the linear map entry that
1589
             * was installed by sh_install_xen_entries_in_l4. */
1590
1591
0
            m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1592
0
            mfn_to_page(m3mfn)->shadow_flags = 3;
1593
0
            l4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)]
1594
0
                = l4e_from_mfn(m3mfn, __PAGE_HYPERVISOR_RW);
1595
1596
0
            m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1597
0
            mfn_to_page(m2mfn)->shadow_flags = 2;
1598
            l3e = map_domain_page(m3mfn);
1599
0
            l3e[0] = l3e_from_mfn(m2mfn, __PAGE_HYPERVISOR_RW);
1600
            unmap_domain_page(l3e);
1601
1602
0
            if ( is_pv_32bit_domain(d) )
1603
0
            {
1604
0
                l2_pgentry_t *l2t;
1605
0
1606
0
                /* For 32-bit PV guests, we need to map the 32-bit Xen
1607
0
                 * area into its usual VAs in the monitor tables */
1608
0
                m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1609
0
                mfn_to_page(m3mfn)->shadow_flags = 3;
1610
0
                l4e[0] = l4e_from_mfn(m3mfn, __PAGE_HYPERVISOR_RW);
1611
0
1612
0
                m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1613
0
                mfn_to_page(m2mfn)->shadow_flags = 2;
1614
0
                l3e = map_domain_page(m3mfn);
1615
0
                l3e[3] = l3e_from_mfn(m2mfn, _PAGE_PRESENT);
1616
0
1617
0
                l2t = map_domain_page(m2mfn);
1618
0
                init_xen_pae_l2_slots(l2t, d);
1619
0
                unmap_domain_page(l2t);
1620
0
1621
0
                unmap_domain_page(l3e);
1622
0
            }
1623
1624
        }
1625
#endif /* SHADOW_PAGING_LEVELS < 4 */
1626
0
1627
0
        unmap_domain_page(l4e);
1628
0
1629
0
        return m4mfn;
1630
0
    }
1631
0
}
Unexecuted instantiation: sh_make_monitor_table__sh_3
Unexecuted instantiation: sh_make_monitor_table__sh_4
1632
#endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
1633
1634
/**************************************************************************/
1635
/* These functions also take a virtual address and return the level-N
1636
 * shadow table mfn and entry, but they create the shadow pagetables if
1637
 * they are needed.  The "demand" argument is non-zero when handling
1638
 * a demand fault (so we know what to do about accessed bits &c).
1639
 * If the necessary tables are not present in the guest, they return NULL. */
1640
1641
/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
1642
 * more levels than the guest, the upper levels are always fixed and do not
1643
 * reflect any information from the guest, so we do not use these functions
1644
 * to access them. */
1645
1646
#if GUEST_PAGING_LEVELS >= 4
1647
static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v,
1648
                                                walk_t *gw,
1649
                                                mfn_t *sl4mfn)
1650
0
{
1651
0
    /* There is always a shadow of the top level table.  Get it. */
1652
0
    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
1653
0
    /* Reading the top level table is always valid. */
1654
0
    return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
1655
0
}
1656
1657
static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
1658
                                                walk_t *gw,
1659
                                                mfn_t *sl3mfn,
1660
                                                fetch_type_t ft,
1661
                                                int *resync)
1662
0
{
1663
0
    struct domain *d = v->domain;
1664
0
    mfn_t sl4mfn;
1665
0
    shadow_l4e_t *sl4e;
1666
0
    if ( !mfn_valid(gw->l3mfn) ) return NULL; /* No guest page. */
1667
0
    /* Get the l4e */
1668
0
    sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
1669
0
    ASSERT(sl4e != NULL);
1670
0
    if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
1671
0
    {
1672
0
        *sl3mfn = shadow_l4e_get_mfn(*sl4e);
1673
0
        ASSERT(mfn_valid(*sl3mfn));
1674
0
    }
1675
0
    else
1676
0
    {
1677
0
        int r;
1678
0
        shadow_l4e_t new_sl4e;
1679
0
        /* No l3 shadow installed: find and install it. */
1680
0
        *sl3mfn = get_shadow_status(d, gw->l3mfn, SH_type_l3_shadow);
1681
0
        if ( !mfn_valid(*sl3mfn) )
1682
0
        {
1683
0
            /* No l3 shadow of this page exists at all: make one. */
1684
0
            *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
1685
0
        }
1686
0
        /* Install the new sl3 table in the sl4e */
1687
0
        l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft);
1688
0
        r = shadow_set_l4e(d, sl4e, new_sl4e, sl4mfn);
1689
0
        ASSERT((r & SHADOW_SET_FLUSH) == 0);
1690
0
        if ( r & SHADOW_SET_ERROR )
1691
0
            return NULL;
1692
0
1693
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
1694
0
        *resync |= 1;
1695
0
#endif
1696
0
1697
0
    }
1698
0
    /* Now follow it down a level.  Guaranteed to succeed. */
1699
0
    return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
1700
0
}
1701
#endif /* GUEST_PAGING_LEVELS >= 4 */
1702
1703
1704
static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
1705
                                                walk_t *gw,
1706
                                                mfn_t *sl2mfn,
1707
                                                fetch_type_t ft,
1708
                                                int *resync)
1709
0
{
1710
0
#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
1711
    struct domain *d = v->domain;
1712
    mfn_t sl3mfn = INVALID_MFN;
1713
    shadow_l3e_t *sl3e;
1714
    if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
1715
    /* Get the l3e */
1716
    sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft, resync);
1717
    if ( sl3e == NULL ) return NULL;
1718
    if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
1719
    {
1720
        *sl2mfn = shadow_l3e_get_mfn(*sl3e);
1721
        ASSERT(mfn_valid(*sl2mfn));
1722
    }
1723
    else
1724
    {
1725
        int r;
1726
        shadow_l3e_t new_sl3e;
1727
        unsigned int t = SH_type_l2_shadow;
1728
1729
        /* Tag compat L2 containing hypervisor (m2p) mappings */
1730
        if ( is_pv_32bit_vcpu(v) &&
1731
             guest_l4_table_offset(gw->va) == 0 &&
1732
             guest_l3_table_offset(gw->va) == 3 )
1733
            t = SH_type_l2h_shadow;
1734
1735
        /* No l2 shadow installed: find and install it. */
1736
        *sl2mfn = get_shadow_status(d, gw->l2mfn, t);
1737
        if ( !mfn_valid(*sl2mfn) )
1738
        {
1739
            /* No l2 shadow of this page exists at all: make one. */
1740
            *sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
1741
        }
1742
        /* Install the new sl2 table in the sl3e */
1743
        l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft);
1744
        r = shadow_set_l3e(d, sl3e, new_sl3e, sl3mfn);
1745
        ASSERT((r & SHADOW_SET_FLUSH) == 0);
1746
        if ( r & SHADOW_SET_ERROR )
1747
            return NULL;
1748
1749
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
1750
        *resync |= 1;
1751
#endif
1752
1753
    }
1754
    /* Now follow it down a level.  Guaranteed to succeed. */
1755
    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
1756
#elif GUEST_PAGING_LEVELS == 3 /* PAE... */
1757
    /* We never demand-shadow PAE l3es: they are only created in
1758
     * sh_update_cr3().  Check if the relevant sl3e is present. */
1759
    shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table)
1760
        + shadow_l3_linear_offset(gw->va);
1761
    if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
1762
        return NULL;
1763
    *sl2mfn = shadow_l3e_get_mfn(*sl3e);
1764
    ASSERT(mfn_valid(*sl2mfn));
1765
    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
1766
#else /* 32bit... */
1767
0
    /* There is always a shadow of the top level table.  Get it. */
1768
0
    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
1769
0
    /* This next line is important: the guest l2 has a 16k
1770
0
     * shadow, we need to return the right mfn of the four. This
1771
0
     * call will set it for us as a side-effect. */
1772
0
    (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va));
1773
0
    /* Reading the top level table is always valid. */
1774
0
    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
1775
0
#endif
1776
0
}
1777
1778
1779
static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v,
1780
                                                walk_t *gw,
1781
                                                mfn_t *sl1mfn,
1782
                                                fetch_type_t ft)
1783
0
{
1784
0
    struct domain *d = v->domain;
1785
0
    mfn_t sl2mfn;
1786
0
    int resync = 0;
1787
0
    shadow_l2e_t *sl2e;
1788
0
1789
0
    /* Get the l2e */
1790
0
    sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft, &resync);
1791
0
    if ( sl2e == NULL ) return NULL;
1792
0
1793
0
    /* Install the sl1 in the l2e if it wasn't there or if we need to
1794
0
     * re-do it to fix a PSE dirty bit. */
1795
0
    if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT
1796
0
         && likely(ft != ft_demand_write
1797
0
                   || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW)
1798
0
                   || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
1799
0
    {
1800
0
        *sl1mfn = shadow_l2e_get_mfn(*sl2e);
1801
0
        ASSERT(mfn_valid(*sl1mfn));
1802
0
    }
1803
0
    else
1804
0
    {
1805
0
        shadow_l2e_t new_sl2e;
1806
0
        int r, flags = guest_l2e_get_flags(gw->l2e);
1807
0
        /* No l1 shadow installed: find and install it. */
1808
0
        if ( !(flags & _PAGE_PRESENT) )
1809
0
            return NULL; /* No guest page. */
1810
0
        if ( guest_can_use_l2_superpages(v) && (flags & _PAGE_PSE) )
1811
0
        {
1812
0
            /* Splintering a superpage */
1813
0
            gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e);
1814
0
            *sl1mfn = get_fl1_shadow_status(d, l2gfn);
1815
0
            if ( !mfn_valid(*sl1mfn) )
1816
0
            {
1817
0
                /* No fl1 shadow of this superpage exists at all: make one. */
1818
0
                *sl1mfn = make_fl1_shadow(d, l2gfn);
1819
0
            }
1820
0
        }
1821
0
        else
1822
0
        {
1823
0
            /* Shadowing an actual guest l1 table */
1824
0
            if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */
1825
0
            *sl1mfn = get_shadow_status(d, gw->l1mfn, SH_type_l1_shadow);
1826
0
            if ( !mfn_valid(*sl1mfn) )
1827
0
            {
1828
0
                /* No l1 shadow of this page exists at all: make one. */
1829
0
                *sl1mfn = sh_make_shadow(v, gw->l1mfn, SH_type_l1_shadow);
1830
0
            }
1831
0
        }
1832
0
        /* Install the new sl1 table in the sl2e */
1833
0
        l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft);
1834
0
        r = shadow_set_l2e(d, sl2e, new_sl2e, sl2mfn);
1835
0
        ASSERT((r & SHADOW_SET_FLUSH) == 0);
1836
0
        if ( r & SHADOW_SET_ERROR )
1837
0
            return NULL;
1838
0
1839
0
        /* This next line is important: in 32-on-PAE and 32-on-64 modes,
1840
0
         * the guest l1 table has an 8k shadow, and we need to return
1841
0
         * the right mfn of the pair. This call will set it for us as a
1842
0
         * side-effect.  (In all other cases, it's a no-op and will be
1843
0
         * compiled out.) */
1844
0
        (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
1845
0
    }
1846
0
1847
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
1848
0
    /* All pages walked are now pagetables. Safe to resync pages
1849
0
       in case level 4 or 3 shadows were set. */
1850
0
    if ( resync )
1851
0
        shadow_resync_all(v);
1852
0
#endif
1853
0
1854
0
    /* Now follow it down a level.  Guaranteed to succeed. */
1855
0
    return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
1856
0
}
1857
1858
1859
1860
/**************************************************************************/
1861
/* Destructors for shadow tables:
1862
 * Unregister the shadow, decrement refcounts of any entries present in it,
1863
 * and release the memory.
1864
 *
1865
 * N.B. These destructors do not clear the contents of the shadows.
1866
 *      This allows us to delay TLB shootdowns until the page is being reused.
1867
 *      See shadow_alloc() and shadow_free() for how this is handled.
1868
 */
1869
1870
#if GUEST_PAGING_LEVELS >= 4
1871
void sh_destroy_l4_shadow(struct domain *d, mfn_t smfn)
1872
0
{
1873
0
    shadow_l4e_t *sl4e;
1874
0
    struct page_info *sp = mfn_to_page(smfn);
1875
0
    u32 t = sp->u.sh.type;
1876
0
    mfn_t gmfn, sl4mfn;
1877
0
1878
0
    SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn));
1879
0
    ASSERT(t == SH_type_l4_shadow);
1880
0
    ASSERT(sp->u.sh.head);
1881
0
1882
0
    /* Record that the guest page isn't shadowed any more (in this type) */
1883
0
    gmfn = backpointer(sp);
1884
0
    delete_shadow_status(d, gmfn, t, smfn);
1885
0
    shadow_demote(d, gmfn, t);
1886
0
    /* Decrement refcounts of all the old entries */
1887
0
    sl4mfn = smfn;
1888
0
    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, d, {
1889
0
        if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
1890
0
        {
1891
0
            sh_put_ref(d, shadow_l4e_get_mfn(*sl4e),
1892
0
                       (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
1893
0
                       | ((unsigned long)sl4e & ~PAGE_MASK));
1894
0
        }
1895
0
    });
1896
0
1897
0
    /* Put the memory back in the pool */
1898
0
    shadow_free(d, smfn);
1899
0
}
1900
1901
void sh_destroy_l3_shadow(struct domain *d, mfn_t smfn)
1902
0
{
1903
0
    shadow_l3e_t *sl3e;
1904
0
    struct page_info *sp = mfn_to_page(smfn);
1905
0
    u32 t = sp->u.sh.type;
1906
0
    mfn_t gmfn, sl3mfn;
1907
0
1908
0
    SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn));
1909
0
    ASSERT(t == SH_type_l3_shadow);
1910
0
    ASSERT(sp->u.sh.head);
1911
0
1912
0
    /* Record that the guest page isn't shadowed any more (in this type) */
1913
0
    gmfn = backpointer(sp);
1914
0
    delete_shadow_status(d, gmfn, t, smfn);
1915
0
    shadow_demote(d, gmfn, t);
1916
0
1917
0
    /* Decrement refcounts of all the old entries */
1918
0
    sl3mfn = smfn;
1919
0
    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
1920
0
        if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
1921
0
            sh_put_ref(d, shadow_l3e_get_mfn(*sl3e),
1922
0
                        (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
1923
0
                        | ((unsigned long)sl3e & ~PAGE_MASK));
1924
0
    });
1925
0
1926
0
    /* Put the memory back in the pool */
1927
0
    shadow_free(d, smfn);
1928
0
}
1929
#endif /* GUEST_PAGING_LEVELS >= 4 */
1930
1931
1932
void sh_destroy_l2_shadow(struct domain *d, mfn_t smfn)
1933
0
{
1934
0
    shadow_l2e_t *sl2e;
1935
0
    struct page_info *sp = mfn_to_page(smfn);
1936
0
    u32 t = sp->u.sh.type;
1937
0
    mfn_t gmfn, sl2mfn;
1938
0
1939
0
    SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn));
1940
0
1941
0
#if GUEST_PAGING_LEVELS >= 3
1942
0
    ASSERT(t == SH_type_l2_shadow || t == SH_type_l2h_shadow);
1943
#else
1944
0
    ASSERT(t == SH_type_l2_shadow);
1945
#endif
1946
0
    ASSERT(sp->u.sh.head);
1947
0
1948
0
    /* Record that the guest page isn't shadowed any more (in this type) */
1949
0
    gmfn = backpointer(sp);
1950
0
    delete_shadow_status(d, gmfn, t, smfn);
1951
0
    shadow_demote(d, gmfn, t);
1952
0
1953
0
    /* Decrement refcounts of all the old entries */
1954
0
    sl2mfn = smfn;
1955
0
    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, d, {
1956
0
        if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT )
1957
0
            sh_put_ref(d, shadow_l2e_get_mfn(*sl2e),
1958
0
                        (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
1959
0
                        | ((unsigned long)sl2e & ~PAGE_MASK));
1960
0
    });
1961
0
1962
0
    /* Put the memory back in the pool */
1963
0
    shadow_free(d, smfn);
1964
0
}
Unexecuted instantiation: sh_destroy_l2_shadow__guest_2
Unexecuted instantiation: sh_destroy_l2_shadow__guest_3
Unexecuted instantiation: sh_destroy_l2_shadow__guest_4
1965
1966
void sh_destroy_l1_shadow(struct domain *d, mfn_t smfn)
1967
0
{
1968
0
    shadow_l1e_t *sl1e;
1969
0
    struct page_info *sp = mfn_to_page(smfn);
1970
0
    u32 t = sp->u.sh.type;
1971
0
1972
0
    SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn));
1973
0
    ASSERT(t == SH_type_l1_shadow || t == SH_type_fl1_shadow);
1974
0
    ASSERT(sp->u.sh.head);
1975
0
1976
0
    /* Record that the guest page isn't shadowed any more (in this type) */
1977
0
    if ( t == SH_type_fl1_shadow )
1978
0
    {
1979
0
        gfn_t gfn = _gfn(sp->v.sh.back);
1980
0
        delete_fl1_shadow_status(d, gfn, smfn);
1981
0
    }
1982
0
    else
1983
0
    {
1984
0
        mfn_t gmfn = backpointer(sp);
1985
0
        delete_shadow_status(d, gmfn, t, smfn);
1986
0
        shadow_demote(d, gmfn, t);
1987
0
    }
1988
0
1989
0
    if ( shadow_mode_refcounts(d) )
1990
0
    {
1991
0
        /* Decrement refcounts of all the old entries */
1992
0
        mfn_t sl1mfn = smfn;
1993
0
        SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
1994
0
            if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
1995
0
                 && !sh_l1e_is_magic(*sl1e) ) {
1996
0
                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
1997
0
                shadow_put_page_from_l1e(*sl1e, d);
1998
0
            }
1999
0
        });
2000
0
    }
2001
0
2002
0
    /* Put the memory back in the pool */
2003
0
    shadow_free(d, smfn);
2004
0
}
Unexecuted instantiation: sh_destroy_l1_shadow__guest_2
Unexecuted instantiation: sh_destroy_l1_shadow__guest_3
Unexecuted instantiation: sh_destroy_l1_shadow__guest_4
2005
2006
#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
2007
void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
2008
0
{
2009
0
    struct domain *d = v->domain;
2010
0
    ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table);
2011
0
2012
0
#if SHADOW_PAGING_LEVELS != 4
2013
    {
2014
        mfn_t m3mfn;
2015
        l4_pgentry_t *l4e = map_domain_page(mmfn);
2016
        l3_pgentry_t *l3e;
2017
0
        int linear_slot = shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
2018
2019
        /* Need to destroy the l3 and l2 monitor pages used
2020
         * for the linear map */
2021
0
        ASSERT(l4e_get_flags(l4e[linear_slot]) & _PAGE_PRESENT);
2022
0
        m3mfn = l4e_get_mfn(l4e[linear_slot]);
2023
        l3e = map_domain_page(m3mfn);
2024
0
        ASSERT(l3e_get_flags(l3e[0]) & _PAGE_PRESENT);
2025
0
        shadow_free(d, l3e_get_mfn(l3e[0]));
2026
        unmap_domain_page(l3e);
2027
        shadow_free(d, m3mfn);
2028
2029
0
        if ( is_pv_32bit_domain(d) )
2030
0
        {
2031
0
            /* Need to destroy the l3 and l2 monitor pages that map the
2032
0
             * Xen VAs at 3GB-4GB */
2033
0
            ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
2034
0
            m3mfn = l4e_get_mfn(l4e[0]);
2035
0
            l3e = map_domain_page(m3mfn);
2036
0
            ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
2037
0
            shadow_free(d, l3e_get_mfn(l3e[3]));
2038
0
            unmap_domain_page(l3e);
2039
0
            shadow_free(d, m3mfn);
2040
0
        }
2041
        unmap_domain_page(l4e);
2042
    }
2043
#endif
2044
0
2045
0
    /* Put the memory back in the pool */
2046
0
    shadow_free(d, mmfn);
2047
0
}
Unexecuted instantiation: sh_destroy_monitor_table__sh_3
Unexecuted instantiation: sh_destroy_monitor_table__sh_4
2048
#endif
2049
2050
/**************************************************************************/
2051
/* Functions to destroy non-Xen mappings in a pagetable hierarchy.
2052
 * These are called from common code when we are running out of shadow
2053
 * memory, and unpinning all the top-level shadows hasn't worked.
2054
 *
2055
 * With user_only == 1, we leave guest kernel-mode mappings in place too,
2056
 * unhooking only the user-mode mappings
2057
 *
2058
 * This implementation is pretty crude and slow, but we hope that it won't
2059
 * be called very often. */
2060
2061
#if GUEST_PAGING_LEVELS == 2
2062
2063
void sh_unhook_32b_mappings(struct domain *d, mfn_t sl2mfn, int user_only)
2064
0
{
2065
0
    shadow_l2e_t *sl2e;
2066
0
    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, d, {
2067
0
        if ( !user_only || (sl2e->l2 & _PAGE_USER) )
2068
0
            (void) shadow_set_l2e(d, sl2e, shadow_l2e_empty(), sl2mfn);
2069
0
    });
2070
0
}
2071
2072
#elif GUEST_PAGING_LEVELS == 3
2073
2074
void sh_unhook_pae_mappings(struct domain *d, mfn_t sl2mfn, int user_only)
2075
/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
2076
0
{
2077
0
    shadow_l2e_t *sl2e;
2078
0
    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, d, {
2079
0
        if ( !user_only || (sl2e->l2 & _PAGE_USER) )
2080
0
            (void) shadow_set_l2e(d, sl2e, shadow_l2e_empty(), sl2mfn);
2081
0
    });
2082
0
}
2083
2084
#elif GUEST_PAGING_LEVELS == 4
2085
2086
void sh_unhook_64b_mappings(struct domain *d, mfn_t sl4mfn, int user_only)
2087
0
{
2088
0
    shadow_l4e_t *sl4e;
2089
0
    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, d, {
2090
0
        if ( !user_only || (sl4e->l4 & _PAGE_USER) )
2091
0
            (void) shadow_set_l4e(d, sl4e, shadow_l4e_empty(), sl4mfn);
2092
0
    });
2093
0
}
2094
2095
#endif
2096
2097
/**************************************************************************/
2098
/* Internal translation functions.
2099
 * These functions require a pointer to the shadow entry that will be updated.
2100
 */
2101
2102
/* These functions take a new guest entry, translate it to shadow and write
2103
 * the shadow entry.
2104
 *
2105
 * They return the same bitmaps as the shadow_set_lXe() functions.
2106
 */
2107
2108
#if GUEST_PAGING_LEVELS >= 4
2109
static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
2110
0
{
2111
0
    shadow_l4e_t new_sl4e;
2112
0
    guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge;
2113
0
    shadow_l4e_t *sl4p = se;
2114
0
    mfn_t sl3mfn = INVALID_MFN;
2115
0
    struct domain *d = v->domain;
2116
0
    p2m_type_t p2mt;
2117
0
    int result = 0;
2118
0
2119
0
    perfc_incr(shadow_validate_gl4e_calls);
2120
0
2121
0
    if ( (guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT) &&
2122
0
         !guest_l4e_rsvd_bits(v, new_gl4e) )
2123
0
    {
2124
0
        gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
2125
0
        mfn_t gl3mfn = get_gfn_query_unlocked(d, gfn_x(gl3gfn), &p2mt);
2126
0
        if ( p2m_is_ram(p2mt) )
2127
0
            sl3mfn = get_shadow_status(d, gl3mfn, SH_type_l3_shadow);
2128
0
        else if ( p2mt != p2m_populate_on_demand )
2129
0
            result |= SHADOW_SET_ERROR;
2130
0
2131
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
2132
0
        if ( mfn_valid(sl3mfn) )
2133
0
            shadow_resync_all(v);
2134
0
#endif
2135
0
    }
2136
0
    l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
2137
0
2138
0
    // check for updates to xen reserved slots
2139
0
    if ( !shadow_mode_external(d) )
2140
0
    {
2141
0
        int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
2142
0
                            sizeof(shadow_l4e_t));
2143
0
        int reserved_xen_slot = !is_guest_l4_slot(d, shadow_index);
2144
0
2145
0
        if ( unlikely(reserved_xen_slot) )
2146
0
        {
2147
0
            // attempt by the guest to write to a xen reserved slot
2148
0
            //
2149
0
            SHADOW_PRINTK("out-of-range update "
2150
0
                          "sl4mfn=%"PRI_mfn" index=%#x val=%" SH_PRI_pte "\n",
2151
0
                          mfn_x(sl4mfn), shadow_index, new_sl4e.l4);
2152
0
            if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
2153
0
            {
2154
0
                SHADOW_ERROR("out-of-range l4e update\n");
2155
0
                result |= SHADOW_SET_ERROR;
2156
0
            }
2157
0
2158
0
            // do not call shadow_set_l4e...
2159
0
            return result;
2160
0
        }
2161
0
    }
2162
0
2163
0
    result |= shadow_set_l4e(d, sl4p, new_sl4e, sl4mfn);
2164
0
    return result;
2165
0
}
2166
2167
2168
static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
2169
0
{
2170
0
    struct domain *d = v->domain;
2171
0
    shadow_l3e_t new_sl3e;
2172
0
    guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge;
2173
0
    shadow_l3e_t *sl3p = se;
2174
0
    mfn_t sl2mfn = INVALID_MFN;
2175
0
    p2m_type_t p2mt;
2176
0
    int result = 0;
2177
0
2178
0
    perfc_incr(shadow_validate_gl3e_calls);
2179
0
2180
0
    if ( (guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT) &&
2181
0
         !guest_l3e_rsvd_bits(v, new_gl3e) )
2182
0
    {
2183
0
        gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
2184
0
        mfn_t gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
2185
0
        if ( p2m_is_ram(p2mt) )
2186
0
            sl2mfn = get_shadow_status(d, gl2mfn, SH_type_l2_shadow);
2187
0
        else if ( p2mt != p2m_populate_on_demand )
2188
0
            result |= SHADOW_SET_ERROR;
2189
0
2190
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
2191
0
        if ( mfn_valid(sl2mfn) )
2192
0
            shadow_resync_all(v);
2193
0
#endif
2194
0
    }
2195
0
    l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
2196
0
    result |= shadow_set_l3e(d, sl3p, new_sl3e, sl3mfn);
2197
0
2198
0
    return result;
2199
0
}
2200
#endif // GUEST_PAGING_LEVELS >= 4
2201
2202
static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
2203
0
{
2204
0
    struct domain *d = v->domain;
2205
0
    shadow_l2e_t new_sl2e;
2206
0
    guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
2207
0
    shadow_l2e_t *sl2p = se;
2208
0
    mfn_t sl1mfn = INVALID_MFN;
2209
0
    p2m_type_t p2mt;
2210
0
    int result = 0;
2211
0
2212
0
    perfc_incr(shadow_validate_gl2e_calls);
2213
0
2214
0
    if ( (guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT) &&
2215
0
         !guest_l2e_rsvd_bits(v, new_gl2e) )
2216
0
    {
2217
0
        gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e);
2218
0
        if ( guest_can_use_l2_superpages(v) &&
2219
0
             (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) )
2220
0
        {
2221
0
            // superpage -- need to look up the shadow L1 which holds the
2222
0
            // splitters...
2223
0
            sl1mfn = get_fl1_shadow_status(d, gl1gfn);
2224
0
#if 0
2225
            // XXX - it's possible that we want to do some kind of prefetch
2226
            // for superpage fl1's here, but this is *not* on the demand path,
2227
            // so we'll hold off trying that for now...
2228
            //
2229
            if ( !mfn_valid(sl1mfn) )
2230
                sl1mfn = make_fl1_shadow(d, gl1gfn);
2231
#endif
2232
0
        }
2233
0
        else
2234
0
        {
2235
0
            mfn_t gl1mfn = get_gfn_query_unlocked(d, gfn_x(gl1gfn), &p2mt);
2236
0
            if ( p2m_is_ram(p2mt) )
2237
0
                sl1mfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow);
2238
0
            else if ( p2mt != p2m_populate_on_demand )
2239
0
                result |= SHADOW_SET_ERROR;
2240
0
        }
2241
0
    }
2242
0
    l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch);
2243
0
2244
0
    result |= shadow_set_l2e(d, sl2p, new_sl2e, sl2mfn);
2245
0
2246
0
    return result;
2247
0
}
2248
2249
static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
2250
0
{
2251
0
    struct domain *d = v->domain;
2252
0
    shadow_l1e_t new_sl1e;
2253
0
    guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge;
2254
0
    shadow_l1e_t *sl1p = se;
2255
0
    gfn_t gfn;
2256
0
    mfn_t gmfn = INVALID_MFN;
2257
0
    p2m_type_t p2mt = p2m_invalid;
2258
0
    int result = 0;
2259
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2260
0
    mfn_t gl1mfn;
2261
0
#endif /* OOS */
2262
0
2263
0
    perfc_incr(shadow_validate_gl1e_calls);
2264
0
2265
0
    if ( (guest_l1e_get_flags(new_gl1e) & _PAGE_PRESENT) &&
2266
0
         !guest_l1e_rsvd_bits(v, new_gl1e) )
2267
0
    {
2268
0
        gfn = guest_l1e_get_gfn(new_gl1e);
2269
0
        gmfn = get_gfn_query_unlocked(d, gfn_x(gfn), &p2mt);
2270
0
    }
2271
0
2272
0
    l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
2273
0
    result |= shadow_set_l1e(d, sl1p, new_sl1e, p2mt, sl1mfn);
2274
0
2275
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2276
0
    gl1mfn = backpointer(mfn_to_page(sl1mfn));
2277
0
    if ( mfn_valid(gl1mfn)
2278
0
         && mfn_is_out_of_sync(gl1mfn) )
2279
0
    {
2280
0
        /* Update the OOS snapshot. */
2281
0
        mfn_t snpmfn = oos_snapshot_lookup(d, gl1mfn);
2282
0
        guest_l1e_t *snp;
2283
0
2284
0
        ASSERT(mfn_valid(snpmfn));
2285
0
2286
0
        snp = map_domain_page(snpmfn);
2287
0
        snp[guest_index(new_ge)] = new_gl1e;
2288
0
        unmap_domain_page(snp);
2289
0
    }
2290
0
#endif /* OOS */
2291
0
2292
0
    return result;
2293
0
}
2294
2295
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2296
/**************************************************************************/
2297
/* Special validation function for re-syncing out-of-sync shadows.
2298
 * Walks the *shadow* page, and for every entry that it finds,
2299
 * revalidates the guest entry that corresponds to it.
2300
 * N.B. This function is called with the vcpu that unsynced the page,
2301
 *      *not* the one that is causing it to be resynced. */
2302
void sh_resync_l1(struct vcpu *v, mfn_t gl1mfn, mfn_t snpmfn)
2303
0
{
2304
0
    struct domain *d = v->domain;
2305
0
    mfn_t sl1mfn;
2306
0
    shadow_l1e_t *sl1p;
2307
0
    guest_l1e_t *gl1p, *gp, *snp;
2308
0
    int rc = 0;
2309
0
2310
0
    ASSERT(mfn_valid(snpmfn));
2311
0
2312
0
    sl1mfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow);
2313
0
    ASSERT(mfn_valid(sl1mfn)); /* Otherwise we would not have been called */
2314
0
2315
0
    snp = map_domain_page(snpmfn);
2316
0
    gp = map_domain_page(gl1mfn);
2317
0
    gl1p = gp;
2318
0
2319
0
   SHADOW_FOREACH_L1E(sl1mfn, sl1p, &gl1p, 0, {
2320
0
        guest_l1e_t gl1e = *gl1p;
2321
0
        guest_l1e_t *snpl1p = (guest_l1e_t *)snp + guest_index(gl1p);
2322
0
2323
0
        if ( memcmp(snpl1p, &gl1e, sizeof(gl1e)) )
2324
0
        {
2325
0
            gfn_t gfn;
2326
0
            mfn_t gmfn = INVALID_MFN;
2327
0
            p2m_type_t p2mt = p2m_invalid;
2328
0
            shadow_l1e_t nsl1e;
2329
0
2330
0
            if ( (guest_l1e_get_flags(gl1e) & _PAGE_PRESENT) &&
2331
0
                 !guest_l1e_rsvd_bits(v, gl1e) )
2332
0
            {
2333
0
                gfn = guest_l1e_get_gfn(gl1e);
2334
0
                gmfn = get_gfn_query_unlocked(d, gfn_x(gfn), &p2mt);
2335
0
            }
2336
0
2337
0
            l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt);
2338
0
            rc |= shadow_set_l1e(d, sl1p, nsl1e, p2mt, sl1mfn);
2339
0
            *snpl1p = gl1e;
2340
0
        }
2341
0
    });
2342
0
2343
0
    unmap_domain_page(gp);
2344
0
    unmap_domain_page(snp);
2345
0
2346
0
    /* Setting shadow L1 entries should never need us to flush the TLB */
2347
0
    ASSERT(!(rc & SHADOW_SET_FLUSH));
2348
0
}
Unexecuted instantiation: sh_resync_l1__guest_2
Unexecuted instantiation: sh_resync_l1__guest_4
Unexecuted instantiation: sh_resync_l1__guest_3
2349
2350
/* Figure out whether it's definitely safe not to sync this l1 table.
2351
 * That is: if we can tell that it's only used once, and that the
2352
 * toplevel shadow responsible is not one of ours.
2353
 * N.B. This function is called with the vcpu that required the resync,
2354
 *      *not* the one that originally unsynced the page, but it is
2355
 *      called in the *mode* of the vcpu that unsynced it.  Clear?  Good. */
2356
int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
2357
0
{
2358
0
    struct domain *d = v->domain;
2359
0
    struct page_info *sp;
2360
0
    mfn_t smfn;
2361
0
2362
0
    if ( !sh_type_has_up_pointer(d, SH_type_l1_shadow) )
2363
0
        return 0;
2364
0
2365
0
    smfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow);
2366
0
    ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
2367
0
2368
0
    /* Up to l2 */
2369
0
    sp = mfn_to_page(smfn);
2370
0
    if ( sp->u.sh.count != 1 || !sp->up )
2371
0
        return 0;
2372
0
    smfn = maddr_to_mfn(sp->up);
2373
0
    ASSERT(mfn_valid(smfn));
2374
0
2375
0
#if (SHADOW_PAGING_LEVELS == 4)
2376
    /* up to l3 */
2377
0
    sp = mfn_to_page(smfn);
2378
0
    ASSERT(sh_type_has_up_pointer(d, SH_type_l2_shadow));
2379
0
    if ( sp->u.sh.count != 1 || !sp->up )
2380
0
        return 0;
2381
0
    smfn = maddr_to_mfn(sp->up);
2382
0
    ASSERT(mfn_valid(smfn));
2383
0
2384
0
    /* up to l4 */
2385
0
    sp = mfn_to_page(smfn);
2386
0
    if ( sp->u.sh.count != 1
2387
0
         || !sh_type_has_up_pointer(d, SH_type_l3_64_shadow) || !sp->up )
2388
0
        return 0;
2389
0
    smfn = maddr_to_mfn(sp->up);
2390
0
    ASSERT(mfn_valid(smfn));
2391
0
#endif
2392
0
2393
0
    if ( pagetable_get_pfn(v->arch.shadow_table[0]) == mfn_x(smfn)
2394
0
#if (SHADOW_PAGING_LEVELS == 3)
2395
0
         || pagetable_get_pfn(v->arch.shadow_table[1]) == mfn_x(smfn)
2396
0
         || pagetable_get_pfn(v->arch.shadow_table[2]) == mfn_x(smfn)
2397
0
         || pagetable_get_pfn(v->arch.shadow_table[3]) == mfn_x(smfn)
2398
#endif
2399
0
        )
2400
0
        return 0;
2401
0
2402
0
    /* Only in use in one toplevel shadow, and it's not the one we're
2403
0
     * running on */
2404
0
    return 1;
2405
0
}
Unexecuted instantiation: sh_safe_not_to_sync__guest_2
Unexecuted instantiation: sh_safe_not_to_sync__guest_4
Unexecuted instantiation: sh_safe_not_to_sync__guest_3
2406
0
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
2407
0
2408
0
2409
0
/**************************************************************************/
2410
0
/* Functions which translate and install the shadows of arbitrary guest
2411
0
 * entries that we have just seen the guest write. */
2412
0
2413
0
2414
0
static inline int
2415
0
sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
2416
0
                     void *new_gp, u32 size, u32 sh_type,
2417
0
                     u32 (*shadow_index)(mfn_t *smfn, u32 idx),
2418
0
                     int (*validate_ge)(struct vcpu *v, void *ge,
2419
0
                                        mfn_t smfn, void *se))
2420
0
/* Generic function for mapping and validating. */
2421
0
{
2422
0
    struct domain *d = v->domain;
2423
0
    mfn_t smfn, smfn2, map_mfn;
2424
0
    shadow_l1e_t *sl1p;
2425
0
    u32 shadow_idx, guest_idx;
2426
0
    int result = 0;
2427
0
2428
0
    /* Align address and size to guest entry boundaries */
2429
0
    size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
2430
0
    new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
2431
0
    size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
2432
0
    ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
2433
0
2434
0
    /* Map the shadow page */
2435
0
    smfn = get_shadow_status(d, gmfn, sh_type);
2436
0
    ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
2437
0
    guest_idx = guest_index(new_gp);
2438
0
    map_mfn = smfn;
2439
0
    shadow_idx = shadow_index(&map_mfn, guest_idx);
2440
0
    sl1p = map_domain_page(map_mfn);
2441
0
2442
0
    /* Validate one entry at a time */
2443
0
    while ( size )
2444
0
    {
2445
0
        smfn2 = smfn;
2446
0
        guest_idx = guest_index(new_gp);
2447
0
        shadow_idx = shadow_index(&smfn2, guest_idx);
2448
0
        if ( mfn_x(smfn2) != mfn_x(map_mfn) )
2449
0
        {
2450
0
            /* We have moved to another page of the shadow */
2451
0
            map_mfn = smfn2;
2452
0
            unmap_domain_page(sl1p);
2453
0
            sl1p = map_domain_page(map_mfn);
2454
0
        }
2455
0
        result |= validate_ge(v,
2456
0
                              new_gp,
2457
0
                              map_mfn,
2458
0
                              &sl1p[shadow_idx]);
2459
0
        size -= sizeof(guest_l1e_t);
2460
0
        new_gp += sizeof(guest_l1e_t);
2461
0
    }
2462
0
    unmap_domain_page(sl1p);
2463
0
    return result;
2464
0
}
2465
0
2466
0
2467
0
int
2468
0
sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
2469
0
                          void *new_gl4p, u32 size)
2470
0
{
2471
0
#if GUEST_PAGING_LEVELS >= 4
2472
0
    return sh_map_and_validate(v, gl4mfn, new_gl4p, size,
2473
0
                                SH_type_l4_shadow,
2474
0
                                shadow_l4_index,
2475
0
                                validate_gl4e);
2476
0
#else // ! GUEST_PAGING_LEVELS >= 4
2477
0
    SHADOW_ERROR("called in wrong paging mode!\n");
2478
0
    BUG();
2479
0
    return 0;
2480
0
#endif
2481
0
}
Unexecuted instantiation: sh_map_and_validate_gl4e__guest_3
Unexecuted instantiation: sh_map_and_validate_gl4e__guest_2
Unexecuted instantiation: sh_map_and_validate_gl4e__guest_4
2482
0
2483
0
int
2484
0
sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
2485
0
                          void *new_gl3p, u32 size)
2486
0
{
2487
0
#if GUEST_PAGING_LEVELS >= 4
2488
0
    return sh_map_and_validate(v, gl3mfn, new_gl3p, size,
2489
0
                                SH_type_l3_shadow,
2490
0
                                shadow_l3_index,
2491
0
                                validate_gl3e);
2492
0
#else // ! GUEST_PAGING_LEVELS >= 4
2493
0
    SHADOW_ERROR("called in wrong paging mode!\n");
2494
0
    BUG();
2495
0
    return 0;
2496
0
#endif
2497
0
}
Unexecuted instantiation: sh_map_and_validate_gl3e__guest_3
Unexecuted instantiation: sh_map_and_validate_gl3e__guest_4
Unexecuted instantiation: sh_map_and_validate_gl3e__guest_2
2498
0
2499
0
int
2500
0
sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
2501
0
                          void *new_gl2p, u32 size)
2502
0
{
2503
0
    return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
2504
0
                                SH_type_l2_shadow,
2505
0
                                shadow_l2_index,
2506
0
                                validate_gl2e);
2507
0
}
Unexecuted instantiation: sh_map_and_validate_gl2e__guest_4
Unexecuted instantiation: sh_map_and_validate_gl2e__guest_3
Unexecuted instantiation: sh_map_and_validate_gl2e__guest_2
2508
0
2509
0
int
2510
0
sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
2511
0
                           void *new_gl2p, u32 size)
2512
0
{
2513
0
#if GUEST_PAGING_LEVELS >= 3
2514
0
    return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
2515
0
                                SH_type_l2h_shadow,
2516
0
                                shadow_l2_index,
2517
0
                                validate_gl2e);
2518
0
#else /* Non-PAE guests don't have different kinds of l2 table */
2519
0
    SHADOW_ERROR("called in wrong paging mode!\n");
2520
0
    BUG();
2521
0
    return 0;
2522
0
#endif
2523
0
}
Unexecuted instantiation: sh_map_and_validate_gl2he__guest_4
Unexecuted instantiation: sh_map_and_validate_gl2he__guest_3
Unexecuted instantiation: sh_map_and_validate_gl2he__guest_2
2524
0
2525
0
int
2526
0
sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
2527
0
                          void *new_gl1p, u32 size)
2528
0
{
2529
0
    return sh_map_and_validate(v, gl1mfn, new_gl1p, size,
2530
0
                                SH_type_l1_shadow,
2531
0
                                shadow_l1_index,
2532
0
                                validate_gl1e);
2533
0
}
Unexecuted instantiation: sh_map_and_validate_gl1e__guest_3
Unexecuted instantiation: sh_map_and_validate_gl1e__guest_4
Unexecuted instantiation: sh_map_and_validate_gl1e__guest_2
2534
0
2535
0
2536
0
/**************************************************************************/
2537
0
/* Optimization: If we see two emulated writes of zeros to the same
2538
0
 * page-table without another kind of page fault in between, we guess
2539
0
 * that this is a batch of changes (for process destruction) and
2540
0
 * unshadow the page so we don't take a pagefault on every entry.  This
2541
0
 * should also make finding writeable mappings of pagetables much
2542
0
 * easier. */
2543
0
2544
0
/* Look to see if this is the second emulated write in a row to this
2545
0
 * page, and unshadow if it is */
2546
0
static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
2547
0
{
2548
0
#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
2549
0
    struct domain *d = v->domain;
2550
0
    /* If the domain has never made a "dying" op, use the two-writes
2551
0
     * heuristic; otherwise, unshadow as soon as we write a zero for a dying
2552
0
     * process.
2553
0
     *
2554
0
     * Don't bother trying to unshadow if it's not a PT, or if it's > l1.
2555
0
     */
2556
0
    if ( ( v->arch.paging.shadow.pagetable_dying
2557
0
           || ( !d->arch.paging.shadow.pagetable_dying_op
2558
0
                && v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) ) )
2559
0
         && sh_mfn_is_a_page_table(gmfn)
2560
0
         && (!d->arch.paging.shadow.pagetable_dying_op ||
2561
0
             !(mfn_to_page(gmfn)->shadow_flags
2562
0
               & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64))) )
2563
0
    {
2564
0
        perfc_incr(shadow_early_unshadow);
2565
0
        sh_remove_shadows(d, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
2566
0
        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
2567
0
    }
2568
0
    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
2569
0
#endif
2570
0
}
2571
0
2572
0
/* Stop counting towards early unshadows, as we've seen a real page fault */
2573
0
static inline void reset_early_unshadow(struct vcpu *v)
2574
0
{
2575
0
#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
2576
0
    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(INVALID_MFN);
2577
0
#endif
2578
0
}
2579
0
2580
0
2581
0
2582
0
/**************************************************************************/
2583
0
/* Optimization: Prefetch multiple L1 entries.  This is called after we have
2584
0
 * demand-faulted a shadow l1e in the fault handler, to see if it's
2585
0
 * worth fetching some more.
2586
0
 */
2587
0
2588
0
#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
2589
0
2590
0
/* XXX magic number */
2591
0
#define PREFETCH_DISTANCE 32
2592
0
2593
0
static void sh_prefetch(struct vcpu *v, walk_t *gw,
2594
0
                        shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
2595
0
{
2596
0
    struct domain *d = v->domain;
2597
0
    int i, dist;
2598
0
    gfn_t gfn;
2599
0
    mfn_t gmfn;
2600
0
    guest_l1e_t *gl1p = NULL, gl1e;
2601
0
    shadow_l1e_t sl1e;
2602
0
    u32 gflags;
2603
0
    p2m_type_t p2mt;
2604
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2605
0
    guest_l1e_t *snpl1p = NULL;
2606
0
#endif /* OOS */
2607
0
2608
0
2609
0
    /* Prefetch no further than the end of the _shadow_ l1 MFN */
2610
0
    dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
2611
0
    /* And no more than a maximum fetches-per-fault */
2612
0
    if ( dist > PREFETCH_DISTANCE )
2613
0
        dist = PREFETCH_DISTANCE;
2614
0
2615
0
    if ( mfn_valid(gw->l1mfn) )
2616
0
    {
2617
0
        /* Normal guest page; grab the next guest entry */
2618
0
        gl1p = map_domain_page(gw->l1mfn);
2619
0
        gl1p += guest_l1_table_offset(gw->va);
2620
0
2621
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2622
0
        if ( mfn_is_out_of_sync(gw->l1mfn) )
2623
0
        {
2624
0
            mfn_t snpmfn = oos_snapshot_lookup(d, gw->l1mfn);
2625
0
2626
0
            ASSERT(mfn_valid(snpmfn));
2627
0
            snpl1p = map_domain_page(snpmfn);
2628
0
            snpl1p += guest_l1_table_offset(gw->va);
2629
0
        }
2630
0
#endif /* OOS */
2631
0
    }
2632
0
2633
0
    for ( i = 1; i < dist ; i++ )
2634
0
    {
2635
0
        /* No point in prefetching if there's already a shadow */
2636
0
        if ( ptr_sl1e[i].l1 != 0 )
2637
0
            break;
2638
0
2639
0
        if ( mfn_valid(gw->l1mfn) )
2640
0
        {
2641
0
            /* Normal guest page; grab the next guest entry */
2642
0
            gl1e = gl1p[i];
2643
0
            /* Not worth continuing if we hit an entry that will need another
2644
0
             * fault for A/D-bit propagation anyway */
2645
0
            gflags = guest_l1e_get_flags(gl1e);
2646
0
            if ( (gflags & _PAGE_PRESENT)
2647
0
                 && (!(gflags & _PAGE_ACCESSED)
2648
0
                     || ((gflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY))) )
2649
0
                break;
2650
0
        }
2651
0
        else
2652
0
        {
2653
0
            /* Fragmented superpage, unless we've been called wrongly */
2654
0
            ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE);
2655
0
            /* Increment the l1e's GFN by the right number of guest pages */
2656
0
            gl1e = guest_l1e_from_gfn(
2657
0
                _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i),
2658
0
                guest_l1e_get_flags(gw->l1e));
2659
0
        }
2660
0
2661
0
        /* Look at the gfn that the l1e is pointing at */
2662
0
        if ( (guest_l1e_get_flags(gl1e) & _PAGE_PRESENT) &&
2663
0
             !guest_l1e_rsvd_bits(v, gl1e) )
2664
0
        {
2665
0
            gfn = guest_l1e_get_gfn(gl1e);
2666
0
            gmfn = get_gfn_query_unlocked(d, gfn_x(gfn), &p2mt);
2667
0
        }
2668
0
        else
2669
0
        {
2670
0
            gmfn = INVALID_MFN;
2671
0
            p2mt = p2m_invalid;
2672
0
        }
2673
0
2674
0
        /* Propagate the entry.  */
2675
0
        l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
2676
0
        (void) shadow_set_l1e(d, ptr_sl1e + i, sl1e, p2mt, sl1mfn);
2677
0
2678
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2679
0
        if ( snpl1p != NULL )
2680
0
            snpl1p[i] = gl1e;
2681
0
#endif /* OOS */
2682
0
    }
2683
0
    if ( gl1p != NULL )
2684
0
        unmap_domain_page(gl1p);
2685
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2686
0
    if ( snpl1p != NULL )
2687
0
        unmap_domain_page(snpl1p);
2688
0
#endif /* OOS */
2689
0
}
2690
0
2691
0
#endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
2692
0
2693
0
#if GUEST_PAGING_LEVELS == 4
2694
0
typedef u64 guest_va_t;
2695
0
typedef u64 guest_pa_t;
2696
0
#elif GUEST_PAGING_LEVELS == 3
2697
0
typedef u32 guest_va_t;
2698
0
typedef u64 guest_pa_t;
2699
0
#else
2700
0
typedef u32 guest_va_t;
2701
0
typedef u32 guest_pa_t;
2702
0
#endif
2703
0
2704
0
static inline void trace_shadow_gen(u32 event, guest_va_t va)
2705
0
{
2706
0
    if ( tb_init_done )
2707
0
    {
2708
0
        event |= (GUEST_PAGING_LEVELS-2)<<8;
2709
0
        __trace_var(event, 0/*!tsc*/, sizeof(va), &va);
2710
0
    }
2711
0
}
2712
0
2713
0
static inline void trace_shadow_fixup(guest_l1e_t gl1e,
2714
0
                                      guest_va_t va)
2715
0
{
2716
0
    if ( tb_init_done )
2717
0
    {
2718
0
        struct __packed {
2719
0
            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
2720
0
               so put it first for alignment sake. */
2721
0
            guest_l1e_t gl1e;
2722
0
            guest_va_t va;
2723
0
            u32 flags;
2724
0
        } d;
2725
0
        u32 event;
2726
0
2727
0
        event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
2728
0
2729
0
        d.gl1e = gl1e;
2730
0
        d.va = va;
2731
0
        d.flags = this_cpu(trace_shadow_path_flags);
2732
0
2733
0
        __trace_var(event, 0/*!tsc*/, sizeof(d), &d);
2734
0
    }
2735
0
}
2736
0
2737
0
static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
2738
0
                                          guest_va_t va)
2739
0
{
2740
0
    if ( tb_init_done )
2741
0
    {
2742
0
        struct __packed {
2743
0
            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
2744
0
               so put it first for alignment sake. */
2745
0
            guest_l1e_t gl1e;
2746
0
            guest_va_t va;
2747
0
            u32 flags;
2748
0
        } d;
2749
0
        u32 event;
2750
0
2751
0
        event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
2752
0
2753
0
        d.gl1e = gl1e;
2754
0
        d.va = va;
2755
0
        d.flags = this_cpu(trace_shadow_path_flags);
2756
0
2757
0
        __trace_var(event, 0/*!tsc*/, sizeof(d), &d);
2758
0
    }
2759
0
}
2760
0
2761
0
static inline void trace_shadow_emulate_other(u32 event,
2762
0
                                                 guest_va_t va,
2763
0
                                                 gfn_t gfn)
2764
0
{
2765
0
    if ( tb_init_done )
2766
0
    {
2767
0
        struct __packed {
2768
0
            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
2769
0
               so put it first for alignment sake. */
2770
0
#if GUEST_PAGING_LEVELS == 2
2771
0
            u32 gfn;
2772
0
#else
2773
0
            u64 gfn;
2774
0
#endif
2775
0
            guest_va_t va;
2776
0
        } d;
2777
0
2778
0
        event |= ((GUEST_PAGING_LEVELS-2)<<8);
2779
0
2780
0
        d.gfn=gfn_x(gfn);
2781
0
        d.va = va;
2782
0
2783
0
        __trace_var(event, 0/*!tsc*/, sizeof(d), &d);
2784
0
    }
2785
0
}
2786
0
2787
0
#if GUEST_PAGING_LEVELS == 3
2788
0
static DEFINE_PER_CPU(guest_va_t,trace_emulate_initial_va);
2789
0
static DEFINE_PER_CPU(int,trace_extra_emulation_count);
2790
0
#endif
2791
0
static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val);
2792
0
2793
0
static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va)
2794
0
{
2795
0
    if ( tb_init_done )
2796
0
    {
2797
0
        struct __packed {
2798
0
            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
2799
0
               so put it first for alignment sake. */
2800
0
            guest_l1e_t gl1e, write_val;
2801
0
            guest_va_t va;
2802
0
            unsigned flags:29, emulation_count:3;
2803
0
        } d;
2804
0
        u32 event;
2805
0
2806
0
        event = TRC_SHADOW_EMULATE | ((GUEST_PAGING_LEVELS-2)<<8);
2807
0
2808
0
        d.gl1e = gl1e;
2809
0
        d.write_val.l1 = this_cpu(trace_emulate_write_val);
2810
0
        d.va = va;
2811
0
#if GUEST_PAGING_LEVELS == 3
2812
0
        d.emulation_count = this_cpu(trace_extra_emulation_count);
2813
0
#endif
2814
0
        d.flags = this_cpu(trace_shadow_path_flags);
2815
0
2816
0
        __trace_var(event, 0/*!tsc*/, sizeof(d), &d);
2817
0
    }
2818
0
}
2819
0
2820
0
/**************************************************************************/
2821
0
/* Entry points into the shadow code */
2822
0
2823
0
/* Called from pagefault handler in Xen, and from the HVM trap handlers
2824
0
 * for pagefaults.  Returns 1 if this fault was an artefact of the
2825
0
 * shadow code (and the guest should retry) or 0 if it is not (and the
2826
0
 * fault should be handled elsewhere or passed to the guest). */
2827
0
2828
0
static int sh_page_fault(struct vcpu *v,
2829
0
                          unsigned long va,
2830
0
                          struct cpu_user_regs *regs)
2831
0
{
2832
0
    struct domain *d = v->domain;
2833
0
    walk_t gw;
2834
0
    gfn_t gfn = _gfn(0);
2835
0
    mfn_t gmfn, sl1mfn = _mfn(0);
2836
0
    shadow_l1e_t sl1e, *ptr_sl1e;
2837
0
    paddr_t gpa;
2838
0
    struct sh_emulate_ctxt emul_ctxt;
2839
0
    const struct x86_emulate_ops *emul_ops;
2840
0
    int r;
2841
0
    p2m_type_t p2mt;
2842
0
    uint32_t rc, error_code;
2843
0
    bool walk_ok;
2844
0
    int version;
2845
0
    const struct npfec access = {
2846
0
         .read_access = 1,
2847
0
         .write_access = !!(regs->error_code & PFEC_write_access),
2848
0
         .gla_valid = 1,
2849
0
         .kind = npfec_kind_with_gla
2850
0
    };
2851
0
    const fetch_type_t ft =
2852
0
        access.write_access ? ft_demand_write : ft_demand_read;
2853
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
2854
0
    int fast_emul = 0;
2855
0
#endif
2856
0
2857
0
    SHADOW_PRINTK("%pv va=%#lx err=%#x, rip=%lx\n",
2858
0
                  v, va, regs->error_code, regs->rip);
2859
0
2860
0
    perfc_incr(shadow_fault);
2861
0
2862
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
2863
0
    /* If faulting frame is successfully emulated in last shadow fault
2864
0
     * it's highly likely to reach same emulation action for this frame.
2865
0
     * Then try to emulate early to avoid lock aquisition.
2866
0
     */
2867
0
    if ( v->arch.paging.last_write_emul_ok
2868
0
         && v->arch.paging.shadow.last_emulated_frame == (va >> PAGE_SHIFT) )
2869
0
    {
2870
0
        /* check whether error code is 3, or else fall back to normal path
2871
0
         * in case of some validation is required
2872
0
         */
2873
0
        if ( regs->error_code == (PFEC_write_access | PFEC_page_present) )
2874
0
        {
2875
0
            fast_emul = 1;
2876
0
            gmfn = _mfn(v->arch.paging.shadow.last_emulated_mfn);
2877
0
2878
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2879
0
            /* Fall back to the slow path if we're trying to emulate
2880
0
               writes to an out of sync page. */
2881
0
            if ( mfn_valid(gmfn) && mfn_is_out_of_sync(gmfn) )
2882
0
            {
2883
0
                fast_emul = 0;
2884
0
                v->arch.paging.last_write_emul_ok = 0;
2885
0
                goto page_fault_slow_path;
2886
0
            }
2887
0
#endif /* OOS */
2888
0
2889
0
            perfc_incr(shadow_fault_fast_emulate);
2890
0
            goto early_emulation;
2891
0
        }
2892
0
        else
2893
0
            v->arch.paging.last_write_emul_ok = 0;
2894
0
    }
2895
0
#endif
2896
0
2897
0
    //
2898
0
    // XXX: Need to think about eventually mapping superpages directly in the
2899
0
    //      shadow (when possible), as opposed to splintering them into a
2900
0
    //      bunch of 4K maps.
2901
0
    //
2902
0
2903
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
2904
0
    if ( (regs->error_code & PFEC_reserved_bit) )
2905
0
    {
2906
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2907
0
        /* First, need to check that this isn't an out-of-sync
2908
0
         * shadow l1e.  If it is, we fall back to the slow path, which
2909
0
         * will sync it up again. */
2910
0
        {
2911
0
            shadow_l2e_t sl2e;
2912
0
            mfn_t gl1mfn;
2913
0
            if ( (__copy_from_user(&sl2e,
2914
0
                                   (sh_linear_l2_table(v)
2915
0
                                    + shadow_l2_linear_offset(va)),
2916
0
                                   sizeof(sl2e)) != 0)
2917
0
                 || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
2918
0
                 || !mfn_valid(gl1mfn = backpointer(mfn_to_page(
2919
0
                                  shadow_l2e_get_mfn(sl2e))))
2920
0
                 || unlikely(mfn_is_out_of_sync(gl1mfn)) )
2921
0
            {
2922
0
                /* Hit the slow path as if there had been no
2923
0
                 * shadow entry at all, and let it tidy up */
2924
0
                ASSERT(regs->error_code & PFEC_page_present);
2925
0
                regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
2926
0
                goto page_fault_slow_path;
2927
0
            }
2928
0
        }
2929
0
#endif /* SHOPT_OUT_OF_SYNC */
2930
0
        /* The only reasons for reserved bits to be set in shadow entries
2931
0
         * are the two "magic" shadow_l1e entries. */
2932
0
        if ( likely((__copy_from_user(&sl1e,
2933
0
                                      (sh_linear_l1_table(v)
2934
0
                                       + shadow_l1_linear_offset(va)),
2935
0
                                      sizeof(sl1e)) == 0)
2936
0
                    && sh_l1e_is_magic(sl1e)) )
2937
0
        {
2938
0
2939
0
            if ( sh_l1e_is_gnp(sl1e) )
2940
0
            {
2941
0
                /* Not-present in a guest PT: pass to the guest as
2942
0
                 * a not-present fault (by flipping two bits). */
2943
0
                ASSERT(regs->error_code & PFEC_page_present);
2944
0
                regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
2945
0
                reset_early_unshadow(v);
2946
0
                perfc_incr(shadow_fault_fast_gnp);
2947
0
                SHADOW_PRINTK("fast path not-present\n");
2948
0
                trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va);
2949
0
                return 0;
2950
0
            }
2951
0
            else
2952
0
            {
2953
0
                /* Magic MMIO marker: extract gfn for MMIO address */
2954
0
                ASSERT(sh_l1e_is_mmio(sl1e));
2955
0
                gpa = (((paddr_t)(gfn_x(sh_l1e_mmio_get_gfn(sl1e))))
2956
0
                       << PAGE_SHIFT)
2957
0
                    | (va & ~PAGE_MASK);
2958
0
            }
2959
0
            perfc_incr(shadow_fault_fast_mmio);
2960
0
            SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
2961
0
            reset_early_unshadow(v);
2962
0
            trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va);
2963
0
            return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access)
2964
0
                    ? EXCRET_fault_fixed : 0);
2965
0
        }
2966
0
        else
2967
0
        {
2968
0
            /* This should be exceptionally rare: another vcpu has fixed
2969
0
             * the tables between the fault and our reading the l1e.
2970
0
             * Retry and let the hardware give us the right fault next time. */
2971
0
            perfc_incr(shadow_fault_fast_fail);
2972
0
            SHADOW_PRINTK("fast path false alarm!\n");
2973
0
            trace_shadow_gen(TRC_SHADOW_FALSE_FAST_PATH, va);
2974
0
            return EXCRET_fault_fixed;
2975
0
        }
2976
0
    }
2977
0
2978
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
2979
0
 page_fault_slow_path:
2980
0
#endif
2981
0
#endif /* SHOPT_FAST_FAULT_PATH */
2982
0
2983
0
    /* Detect if this page fault happened while we were already in Xen
2984
0
     * doing a shadow operation.  If that happens, the only thing we can
2985
0
     * do is let Xen's normal fault handlers try to fix it.  In any case,
2986
0
     * a diagnostic trace of the fault will be more useful than
2987
0
     * a BUG() when we try to take the lock again. */
2988
0
    if ( unlikely(paging_locked_by_me(d)) )
2989
0
    {
2990
0
        SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n",
2991
0
                     d->arch.paging.lock.locker_function);
2992
0
        return 0;
2993
0
    }
2994
0
2995
0
 rewalk:
2996
0
2997
0
    error_code = regs->error_code;
2998
0
2999
0
    /*
3000
0
     * When CR4.SMAP is enabled, instructions which have a side effect of
3001
0
     * accessing the system data structures (e.g. mov to %ds accessing the
3002
0
     * LDT/GDT, or int $n accessing the IDT) are known as implicit supervisor
3003
0
     * accesses.
3004
0
     *
3005
0
     * The distinction between implicit and explicit accesses form part of the
3006
0
     * determination of access rights, controlling whether the access is
3007
0
     * successful, or raises a #PF.
3008
0
     *
3009
0
     * Unfortunately, the processor throws away the implicit/explicit
3010
0
     * distinction and does not provide it to the pagefault handler
3011
0
     * (i.e. here.) in the #PF error code.  Therefore, we must try to
3012
0
     * reconstruct the lost state so it can be fed back into our pagewalk
3013
0
     * through the guest tables.
3014
0
     *
3015
0
     * User mode accesses are easy to reconstruct:
3016
0
     *
3017
0
     *   If we observe a cpl3 data fetch which was a supervisor walk, this
3018
0
     *   must have been an implicit access to a system table.
3019
0
     *
3020
0
     * Supervisor mode accesses are not easy:
3021
0
     *
3022
0
     *   In principle, we could decode the instruction under %rip and have the
3023
0
     *   instruction emulator tell us if there is an implicit access.
3024
0
     *   However, this is racy with other vcpus updating the pagetable or
3025
0
     *   rewriting the instruction stream under our feet.
3026
0
     *
3027
0
     *   Therefore, we do nothing.  (If anyone has a sensible suggestion for
3028
0
     *   how to distinguish these cases, xen-devel@ is all ears...)
3029
0
     *
3030
0
     * As a result, one specific corner case will fail.  If a guest OS with
3031
0
     * SMAP enabled ends up mapping a system table with user mappings, sets
3032
0
     * EFLAGS.AC to allow explicit accesses to user mappings, and implicitly
3033
0
     * accesses the user mapping, hardware and the shadow code will disagree
3034
0
     * on whether a #PF should be raised.
3035
0
     *
3036
0
     * Hardware raises #PF because implicit supervisor accesses to user
3037
0
     * mappings are strictly disallowed.  As we can't reconstruct the correct
3038
0
     * input, the pagewalk is performed as if it were an explicit access,
3039
0
     * which concludes that the access should have succeeded and the shadow
3040
0
     * pagetables need modifying.  The shadow pagetables are modified (to the
3041
0
     * same value), and we re-enter the guest to re-execute the instruction,
3042
0
     * which causes another #PF, and the vcpu livelocks, unable to make
3043
0
     * forward progress.
3044
0
     *
3045
0
     * In practice, this is tolerable.  No production OS will deliberately
3046
0
     * construct this corner case (as doing so would mean that a system table
3047
0
     * is directly accessable to userspace, and the OS is trivially rootable.)
3048
0
     * If this corner case comes about accidentally, then a security-relevant
3049
0
     * bug has been tickled.
3050
0
     */
3051
0
    if ( !(error_code & (PFEC_insn_fetch|PFEC_user_mode)) &&
3052
0
         (is_pv_vcpu(v) ? (regs->ss & 3) : hvm_get_cpl(v)) == 3 )
3053
0
        error_code |= PFEC_implicit;
3054
0
3055
0
    /* The walk is done in a lock-free style, with some sanity check
3056
0
     * postponed after grabbing paging lock later. Those delayed checks
3057
0
     * will make sure no inconsistent mapping being translated into
3058
0
     * shadow page table. */
3059
0
    version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
3060
0
    smp_rmb();
3061
0
    walk_ok = sh_walk_guest_tables(v, va, &gw, error_code);
3062
0
3063
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
3064
0
    regs->error_code &= ~PFEC_page_present;
3065
0
    if ( gw.pfec & PFEC_page_present )
3066
0
        regs->error_code |= PFEC_page_present;
3067
0
#endif
3068
0
3069
0
    if ( !walk_ok )
3070
0
    {
3071
0
        perfc_incr(shadow_fault_bail_real_fault);
3072
0
        SHADOW_PRINTK("not a shadow fault\n");
3073
0
        reset_early_unshadow(v);
3074
0
        regs->error_code = gw.pfec & PFEC_arch_mask;
3075
0
        goto propagate;
3076
0
    }
3077
0
3078
0
    /* It's possible that the guest has put pagetables in memory that it has
3079
0
     * already used for some special purpose (ioreq pages, or granted pages).
3080
0
     * If that happens we'll have killed the guest already but it's still not
3081
0
     * safe to propagate entries out of the guest PT so get out now. */
3082
0
    if ( unlikely(d->is_shutting_down && d->shutdown_code == SHUTDOWN_crash) )
3083
0
    {
3084
0
        SHADOW_PRINTK("guest is shutting down\n");
3085
0
        goto propagate;
3086
0
    }
3087
0
3088
0
    /* What mfn is the guest trying to access? */
3089
0
    gfn = guest_walk_to_gfn(&gw);
3090
0
    gmfn = get_gfn(d, gfn, &p2mt);
3091
0
3092
0
    if ( shadow_mode_refcounts(d) &&
3093
0
         ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) ||
3094
0
          (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn))) )
3095
0
    {
3096
0
        perfc_incr(shadow_fault_bail_bad_gfn);
3097
0
        SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
3098
0
                      gfn_x(gfn), mfn_x(gmfn));
3099
0
        reset_early_unshadow(v);
3100
0
        put_gfn(d, gfn_x(gfn));
3101
0
        goto propagate;
3102
0
    }
3103
0
3104
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3105
0
    /* Remember this successful VA->GFN translation for later. */
3106
0
    vtlb_insert(v, va >> PAGE_SHIFT, gfn_x(gfn),
3107
0
                regs->error_code | PFEC_page_present);
3108
0
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
3109
0
3110
0
    paging_lock(d);
3111
0
3112
0
    TRACE_CLEAR_PATH_FLAGS;
3113
0
3114
0
    /* Make sure there is enough free shadow memory to build a chain of
3115
0
     * shadow tables. (We never allocate a top-level shadow on this path,
3116
0
     * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
3117
0
     * SH_type_l1_shadow isn't correct in the latter case, all page
3118
0
     * tables are the same size there.)
3119
0
     *
3120
0
     * Preallocate shadow pages *before* removing writable accesses
3121
0
     * otherwhise an OOS L1 might be demoted and promoted again with
3122
0
     * writable mappings. */
3123
0
    shadow_prealloc(d,
3124
0
                    SH_type_l1_shadow,
3125
0
                    GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
3126
0
3127
0
    rc = gw_remove_write_accesses(v, va, &gw);
3128
0
3129
0
    /* First bit set: Removed write access to a page. */
3130
0
    if ( rc & GW_RMWR_FLUSHTLB )
3131
0
    {
3132
0
        /* Write permission removal is also a hint that other gwalks
3133
0
         * overlapping with this one may be inconsistent
3134
0
         */
3135
0
        perfc_incr(shadow_rm_write_flush_tlb);
3136
0
        smp_wmb();
3137
0
        atomic_inc(&d->arch.paging.shadow.gtable_dirty_version);
3138
0
        flush_tlb_mask(d->domain_dirty_cpumask);
3139
0
    }
3140
0
3141
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
3142
0
    /* Second bit set: Resynced a page. Re-walk needed. */
3143
0
    if ( rc & GW_RMWR_REWALK )
3144
0
    {
3145
0
        paging_unlock(d);
3146
0
        put_gfn(d, gfn_x(gfn));
3147
0
        goto rewalk;
3148
0
    }
3149
0
#endif /* OOS */
3150
0
3151
0
    if ( !shadow_check_gwalk(v, va, &gw, version) )
3152
0
    {
3153
0
        perfc_incr(shadow_inconsistent_gwalk);
3154
0
        paging_unlock(d);
3155
0
        put_gfn(d, gfn_x(gfn));
3156
0
        goto rewalk;
3157
0
    }
3158
0
3159
0
    shadow_audit_tables(v);
3160
0
    sh_audit_gw(v, &gw);
3161
0
3162
0
    /* Acquire the shadow.  This must happen before we figure out the rights
3163
0
     * for the shadow entry, since we might promote a page here. */
3164
0
    ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
3165
0
    if ( unlikely(ptr_sl1e == NULL) )
3166
0
    {
3167
0
        /* Couldn't get the sl1e!  Since we know the guest entries
3168
0
         * are OK, this can only have been caused by a failed
3169
0
         * shadow_set_l*e(), which will have crashed the guest.
3170
0
         * Get out of the fault handler immediately. */
3171
0
        /* Windows 7 apparently relies on the hardware to do something
3172
0
         * it explicitly hasn't promised to do: load l3 values after
3173
0
         * the cr3 is loaded.
3174
0
         * In any case, in the PAE case, the ASSERT is not true; it can
3175
0
         * happen because of actions the guest is taking. */
3176
0
#if GUEST_PAGING_LEVELS == 3
3177
0
        v->arch.paging.mode->update_cr3(v, 0);
3178
0
#else
3179
0
        ASSERT(d->is_shutting_down);
3180
0
#endif
3181
0
        paging_unlock(d);
3182
0
        put_gfn(d, gfn_x(gfn));
3183
0
        trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va);
3184
0
        return 0;
3185
0
    }
3186
0
3187
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
3188
0
    /* Always unsync when writing to L1 page tables. */
3189
0
    if ( sh_mfn_is_a_page_table(gmfn)
3190
0
         && ft == ft_demand_write )
3191
0
        sh_unsync(v, gmfn);
3192
0
3193
0
    if ( unlikely(d->is_shutting_down && d->shutdown_code == SHUTDOWN_crash) )
3194
0
    {
3195
0
        /* We might end up with a crashed domain here if
3196
0
         * sh_remove_shadows() in a previous sh_resync() call has
3197
0
         * failed. We cannot safely continue since some page is still
3198
0
         * OOS but not in the hash table anymore. */
3199
0
        paging_unlock(d);
3200
0
        put_gfn(d, gfn_x(gfn));
3201
0
        return 0;
3202
0
    }
3203
0
3204
0
    /* Final check: if someone has synced a page, it's possible that
3205
0
     * our l1e is stale.  Compare the entries, and rewalk if necessary. */
3206
0
    if ( shadow_check_gl1e(v, &gw)  )
3207
0
    {
3208
0
        perfc_incr(shadow_inconsistent_gwalk);
3209
0
        paging_unlock(d);
3210
0
        put_gfn(d, gfn_x(gfn));
3211
0
        goto rewalk;
3212
0
    }
3213
0
#endif /* OOS */
3214
0
3215
0
    /* Calculate the shadow entry and write it */
3216
0
    l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt);
3217
0
    r = shadow_set_l1e(d, ptr_sl1e, sl1e, p2mt, sl1mfn);
3218
0
3219
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
3220
0
    if ( mfn_valid(gw.l1mfn)
3221
0
         && mfn_is_out_of_sync(gw.l1mfn) )
3222
0
    {
3223
0
        /* Update the OOS snapshot. */
3224
0
        mfn_t snpmfn = oos_snapshot_lookup(d, gw.l1mfn);
3225
0
        guest_l1e_t *snp;
3226
0
3227
0
        ASSERT(mfn_valid(snpmfn));
3228
0
3229
0
        snp = map_domain_page(snpmfn);
3230
0
        snp[guest_l1_table_offset(va)] = gw.l1e;
3231
0
        unmap_domain_page(snp);
3232
0
    }
3233
0
#endif /* OOS */
3234
0
3235
0
#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
3236
0
    /* Prefetch some more shadow entries */
3237
0
    sh_prefetch(v, &gw, ptr_sl1e, sl1mfn);
3238
0
#endif
3239
0
3240
0
    /* Need to emulate accesses to page tables */
3241
0
    if ( sh_mfn_is_a_page_table(gmfn)
3242
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
3243
0
         /* Unless they've been allowed to go out of sync with their
3244
0
            shadows and we don't need to unshadow it. */
3245
0
         && !(mfn_is_out_of_sync(gmfn)
3246
0
              && !(regs->error_code & PFEC_user_mode))
3247
0
#endif
3248
0
         && (ft == ft_demand_write) )
3249
0
    {
3250
0
        perfc_incr(shadow_fault_emulate_write);
3251
0
        goto emulate;
3252
0
    }
3253
0
3254
0
    /* Need to hand off device-model MMIO to the device model */
3255
0
    if ( p2mt == p2m_mmio_dm )
3256
0
    {
3257
0
        gpa = guest_walk_to_gpa(&gw);
3258
0
        goto mmio;
3259
0
    }
3260
0
3261
0
    /* Ignore attempts to write to read-only memory. */
3262
0
    if ( p2m_is_readonly(p2mt) && (ft == ft_demand_write) )
3263
0
    {
3264
0
        static unsigned long lastpage;
3265
0
        if ( xchg(&lastpage, va & PAGE_MASK) != (va & PAGE_MASK) )
3266
0
            gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory"
3267
0
                     " page. va page=%#lx, mfn=%#lx\n",
3268
0
                     va & PAGE_MASK, mfn_x(gmfn));
3269
0
        goto emulate_readonly; /* skip over the instruction */
3270
0
    }
3271
0
3272
0
    /* In HVM guests, we force CR0.WP always to be set, so that the
3273
0
     * pagetables are always write-protected.  If the guest thinks
3274
0
     * CR0.WP is clear, we must emulate faulting supervisor writes to
3275
0
     * allow the guest to write through read-only PTEs.  Emulate if the
3276
0
     * fault was a non-user write to a present page.  */
3277
0
    if ( is_hvm_domain(d)
3278
0
         && unlikely(!hvm_wp_enabled(v))
3279
0
         && regs->error_code == (PFEC_write_access|PFEC_page_present)
3280
0
         && mfn_valid(gmfn) )
3281
0
    {
3282
0
        perfc_incr(shadow_fault_emulate_wp);
3283
0
        goto emulate;
3284
0
    }
3285
0
3286
0
    perfc_incr(shadow_fault_fixed);
3287
0
    d->arch.paging.log_dirty.fault_count++;
3288
0
    reset_early_unshadow(v);
3289
0
3290
0
    trace_shadow_fixup(gw.l1e, va);
3291
0
 done:
3292
0
    sh_audit_gw(v, &gw);
3293
0
    SHADOW_PRINTK("fixed\n");
3294
0
    shadow_audit_tables(v);
3295
0
    paging_unlock(d);
3296
0
    put_gfn(d, gfn_x(gfn));
3297
0
    return EXCRET_fault_fixed;
3298
0
3299
0
 emulate:
3300
0
    if ( !shadow_mode_refcounts(d) || !guest_mode(regs) )
3301
0
        goto not_a_shadow_fault;
3302
0
3303
0
    /*
3304
0
     * We do not emulate user writes. Instead we use them as a hint that the
3305
0
     * page is no longer a page table. This behaviour differs from native, but
3306
0
     * it seems very unlikely that any OS grants user access to page tables.
3307
0
     */
3308
0
    if ( (regs->error_code & PFEC_user_mode) )
3309
0
    {
3310
0
        SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n",
3311
0
                      mfn_x(gmfn));
3312
0
        perfc_incr(shadow_fault_emulate_failed);
3313
0
        sh_remove_shadows(d, gmfn, 0 /* thorough */, 1 /* must succeed */);
3314
0
        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_USER,
3315
0
                                      va, gfn);
3316
0
        goto done;
3317
0
    }
3318
0
3319
0
    /*
3320
0
     * Write from userspace to ro-mem needs to jump here to avoid getting
3321
0
     * caught by user-mode page-table check above.
3322
0
     */
3323
0
 emulate_readonly:
3324
0
3325
0
    /* Unshadow if we are writing to a toplevel pagetable that is
3326
0
     * flagged as a dying process, and that is not currently used. */
3327
0
    if ( sh_mfn_is_a_page_table(gmfn)
3328
0
         && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) )
3329
0
    {
3330
0
        int used = 0;
3331
0
        struct vcpu *tmp;
3332
0
        for_each_vcpu(d, tmp)
3333
0
        {
3334
0
#if GUEST_PAGING_LEVELS == 3
3335
0
            int i;
3336
0
            for ( i = 0; i < 4; i++ )
3337
0
            {
3338
0
                mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
3339
0
3340
0
                if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) )
3341
0
                {
3342
0
                    used |= (mfn_to_page(smfn)->v.sh.back == mfn_x(gmfn));
3343
0
3344
0
                    if ( used )
3345
0
                        break;
3346
0
                }
3347
0
            }
3348
0
#else /* 32 or 64 */
3349
0
            used = mfn_eq(pagetable_get_mfn(tmp->arch.guest_table), gmfn);
3350
0
#endif
3351
0
            if ( used )
3352
0
                break;
3353
0
        }
3354
0
3355
0
        if ( !used )
3356
0
            sh_remove_shadows(d, gmfn, 1 /* fast */, 0 /* can fail */);
3357
0
    }
3358
0
3359
0
    /*
3360
0
     * We don't need to hold the lock for the whole emulation; we will
3361
0
     * take it again when we write to the pagetables.
3362
0
     */
3363
0
    sh_audit_gw(v, &gw);
3364
0
    shadow_audit_tables(v);
3365
0
    paging_unlock(d);
3366
0
    put_gfn(d, gfn_x(gfn));
3367
0
3368
0
    this_cpu(trace_emulate_write_val) = 0;
3369
0
3370
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
3371
0
 early_emulation:
3372
0
#endif
3373
0
    if ( is_hvm_domain(d) )
3374
0
    {
3375
0
        /*
3376
0
         * If we are in the middle of injecting an exception or interrupt then
3377
0
         * we should not emulate: it is not the instruction at %eip that caused
3378
0
         * the fault. Furthermore it is almost certainly the case the handler
3379
0
         * stack is currently considered to be a page table, so we should
3380
0
         * unshadow the faulting page before exiting.
3381
0
         */
3382
0
        if ( unlikely(hvm_event_pending(v)) )
3383
0
        {
3384
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
3385
0
            if ( fast_emul )
3386
0
            {
3387
0
                perfc_incr(shadow_fault_fast_emulate_fail);
3388
0
                v->arch.paging.last_write_emul_ok = 0;
3389
0
            }
3390
0
#endif
3391
0
            gdprintk(XENLOG_DEBUG, "write to pagetable during event "
3392
0
                     "injection: cr2=%#lx, mfn=%#lx\n",
3393
0
                     va, mfn_x(gmfn));
3394
0
            sh_remove_shadows(d, gmfn, 0 /* thorough */, 1 /* must succeed */);
3395
0
            trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ,
3396
0
                                       va, gfn);
3397
0
            return EXCRET_fault_fixed;
3398
0
        }
3399
0
    }
3400
0
3401
0
    SHADOW_PRINTK("emulate: eip=%#lx esp=%#lx\n", regs->rip, regs->rsp);
3402
0
3403
0
    emul_ops = shadow_init_emulation(&emul_ctxt, regs);
3404
0
3405
0
    r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
3406
0
3407
0
    if ( r == X86EMUL_EXCEPTION )
3408
0
    {
3409
0
        /*
3410
0
         * This emulation covers writes to shadow pagetables.  We tolerate #PF
3411
0
         * (from accesses spanning pages, concurrent paging updated from
3412
0
         * vcpus, etc) and #GP[0]/#SS[0] (from segmentation errors).  Anything
3413
0
         * else is an emulation bug, or a guest playing with the instruction
3414
0
         * stream under Xen's feet.
3415
0
         */
3416
0
        if ( emul_ctxt.ctxt.event.type == X86_EVENTTYPE_HW_EXCEPTION &&
3417
0
             ((emul_ctxt.ctxt.event.vector == TRAP_page_fault) ||
3418
0
              (((emul_ctxt.ctxt.event.vector == TRAP_gp_fault) ||
3419
0
                (emul_ctxt.ctxt.event.vector == TRAP_stack_error)) &&
3420
0
               emul_ctxt.ctxt.event.error_code == 0)) )
3421
0
            hvm_inject_event(&emul_ctxt.ctxt.event);
3422
0
        else
3423
0
        {
3424
0
            SHADOW_PRINTK(
3425
0
                "Unexpected event (type %u, vector %#x) from emulation\n",
3426
0
                emul_ctxt.ctxt.event.type, emul_ctxt.ctxt.event.vector);
3427
0
            r = X86EMUL_UNHANDLEABLE;
3428
0
        }
3429
0
    }
3430
0
3431
0
    /*
3432
0
     * NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it
3433
0
     * would be a good unshadow hint. If we *do* decide to unshadow-on-fault
3434
0
     * then it must be 'failable': we cannot require the unshadow to succeed.
3435
0
     */
3436
0
    if ( r == X86EMUL_UNHANDLEABLE || r == X86EMUL_UNIMPLEMENTED )
3437
0
    {
3438
0
        perfc_incr(shadow_fault_emulate_failed);
3439
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
3440
0
        if ( fast_emul )
3441
0
        {
3442
0
            perfc_incr(shadow_fault_fast_emulate_fail);
3443
0
            v->arch.paging.last_write_emul_ok = 0;
3444
0
        }
3445
0
#endif
3446
0
        SHADOW_PRINTK("emulator failure (rc=%d), unshadowing mfn %#lx\n",
3447
0
                       r, mfn_x(gmfn));
3448
0
        /* If this is actually a page table, then we have a bug, and need
3449
0
         * to support more operations in the emulator.  More likely,
3450
0
         * though, this is a hint that this page should not be shadowed. */
3451
0
        shadow_remove_all_shadows(d, gmfn);
3452
0
3453
0
        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED,
3454
0
                                   va, gfn);
3455
0
        goto emulate_done;
3456
0
    }
3457
0
3458
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
3459
0
    /* Record successfully emulated information as heuristics to next
3460
0
     * fault on same frame for acceleration. But be careful to verify
3461
0
     * its attribute still as page table, or else unshadow triggered
3462
0
     * in write emulation normally requires a re-sync with guest page
3463
0
     * table to recover r/w permission. Incorrect record for such case
3464
0
     * will cause unexpected more shadow faults due to propagation is
3465
0
     * skipped.
3466
0
     */
3467
0
    if ( (r == X86EMUL_OKAY) && sh_mfn_is_a_page_table(gmfn) )
3468
0
    {
3469
0
        if ( !fast_emul )
3470
0
        {
3471
0
            v->arch.paging.shadow.last_emulated_frame = va >> PAGE_SHIFT;
3472
0
            v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn);
3473
0
            v->arch.paging.last_write_emul_ok = 1;
3474
0
        }
3475
0
    }
3476
0
    else if ( fast_emul )
3477
0
        v->arch.paging.last_write_emul_ok = 0;
3478
0
#endif
3479
0
3480
0
    if ( emul_ctxt.ctxt.retire.singlestep )
3481
0
        hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
3482
0
3483
0
#if GUEST_PAGING_LEVELS == 3 /* PAE guest */
3484
0
    /*
3485
0
     * If there are no pending actions, emulate up to four extra instructions
3486
0
     * in the hope of catching the "second half" of a 64-bit pagetable write.
3487
0
     */
3488
0
    if ( r == X86EMUL_OKAY && !emul_ctxt.ctxt.retire.raw )
3489
0
    {
3490
0
        int i, emulation_count=0;
3491
0
        this_cpu(trace_emulate_initial_va) = va;
3492
0
3493
0
        for ( i = 0 ; i < 4 ; i++ )
3494
0
        {
3495
0
            shadow_continue_emulation(&emul_ctxt, regs);
3496
0
            v->arch.paging.last_write_was_pt = 0;
3497
0
            r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
3498
0
3499
0
            /*
3500
0
             * Only continue the search for the second half if there are no
3501
0
             * exceptions or pending actions.  Otherwise, give up and re-enter
3502
0
             * the guest.
3503
0
             */
3504
0
            if ( r == X86EMUL_OKAY && !emul_ctxt.ctxt.retire.raw )
3505
0
            {
3506
0
                emulation_count++;
3507
0
                if ( v->arch.paging.last_write_was_pt )
3508
0
                {
3509
0
                    perfc_incr(shadow_em_ex_pt);
3510
0
                    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN);
3511
0
                    break; /* Don't emulate past the other half of the write */
3512
0
                }
3513
0
                else
3514
0
                    perfc_incr(shadow_em_ex_non_pt);
3515
0
            }
3516
0
            else
3517
0
            {
3518
0
                perfc_incr(shadow_em_ex_fail);
3519
0
                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_LAST_FAILED);
3520
0
3521
0
                if ( emul_ctxt.ctxt.retire.singlestep )
3522
0
                    hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
3523
0
3524
0
                break; /* Don't emulate again if we failed! */
3525
0
            }
3526
0
        }
3527
0
        this_cpu(trace_extra_emulation_count)=emulation_count;
3528
0
    }
3529
0
#endif /* PAE guest */
3530
0
3531
0
    trace_shadow_emulate(gw.l1e, va);
3532
0
 emulate_done:
3533
0
    SHADOW_PRINTK("emulated\n");
3534
0
    return EXCRET_fault_fixed;
3535
0
3536
0
 mmio:
3537
0
    if ( !guest_mode(regs) )
3538
0
        goto not_a_shadow_fault;
3539
0
    perfc_incr(shadow_fault_mmio);
3540
0
    sh_audit_gw(v, &gw);
3541
0
    SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
3542
0
    shadow_audit_tables(v);
3543
0
    reset_early_unshadow(v);
3544
0
    paging_unlock(d);
3545
0
    put_gfn(d, gfn_x(gfn));
3546
0
    trace_shadow_gen(TRC_SHADOW_MMIO, va);
3547
0
    return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access)
3548
0
            ? EXCRET_fault_fixed : 0);
3549
0
3550
0
 not_a_shadow_fault:
3551
0
    sh_audit_gw(v, &gw);
3552
0
    SHADOW_PRINTK("not a shadow fault\n");
3553
0
    shadow_audit_tables(v);
3554
0
    reset_early_unshadow(v);
3555
0
    paging_unlock(d);
3556
0
    put_gfn(d, gfn_x(gfn));
3557
0
3558
0
propagate:
3559
0
    trace_not_shadow_fault(gw.l1e, va);
3560
0
3561
0
    return 0;
3562
0
}
Unexecuted instantiation: multi.c:sh_page_fault__guest_3
Unexecuted instantiation: multi.c:sh_page_fault__guest_4
Unexecuted instantiation: multi.c:sh_page_fault__guest_2
3563
0
3564
0
3565
0
/*
3566
0
 * Called when the guest requests an invlpg.  Returns true if the invlpg
3567
0
 * instruction should be issued on the hardware, or false if it's safe not
3568
0
 * to do so.
3569
0
 */
3570
0
static bool sh_invlpg(struct vcpu *v, unsigned long va)
3571
0
{
3572
0
    mfn_t sl1mfn;
3573
0
    shadow_l2e_t sl2e;
3574
0
3575
0
    perfc_incr(shadow_invlpg);
3576
0
3577
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3578
0
    /* No longer safe to use cached gva->gfn translations */
3579
0
    vtlb_flush(v);
3580
0
#endif
3581
0
3582
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
3583
0
    v->arch.paging.last_write_emul_ok = 0;
3584
0
#endif
3585
0
3586
0
    /* First check that we can safely read the shadow l2e.  SMP/PAE linux can
3587
0
     * run as high as 6% of invlpg calls where we haven't shadowed the l2
3588
0
     * yet. */
3589
0
#if SHADOW_PAGING_LEVELS == 4
3590
0
    {
3591
0
        shadow_l3e_t sl3e;
3592
0
        if ( !(shadow_l4e_get_flags(
3593
0
                   sh_linear_l4_table(v)[shadow_l4_linear_offset(va)])
3594
0
               & _PAGE_PRESENT) )
3595
0
            return false;
3596
0
        /* This must still be a copy-from-user because we don't have the
3597
0
         * paging lock, and the higher-level shadows might disappear
3598
0
         * under our feet. */
3599
0
        if ( __copy_from_user(&sl3e, (sh_linear_l3_table(v)
3600
0
                                      + shadow_l3_linear_offset(va)),
3601
0
                              sizeof (sl3e)) != 0 )
3602
0
        {
3603
0
            perfc_incr(shadow_invlpg_fault);
3604
0
            return false;
3605
0
        }
3606
0
        if ( !(shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) )
3607
0
            return false;
3608
0
    }
3609
0
#else /* SHADOW_PAGING_LEVELS == 3 */
3610
0
    if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)])
3611
0
           & _PAGE_PRESENT) )
3612
0
        // no need to flush anything if there's no SL2...
3613
0
        return false;
3614
0
#endif
3615
0
3616
0
    /* This must still be a copy-from-user because we don't have the shadow
3617
0
     * lock, and the higher-level shadows might disappear under our feet. */
3618
0
    if ( __copy_from_user(&sl2e,
3619
0
                          sh_linear_l2_table(v) + shadow_l2_linear_offset(va),
3620
0
                          sizeof (sl2e)) != 0 )
3621
0
    {
3622
0
        perfc_incr(shadow_invlpg_fault);
3623
0
        return false;
3624
0
    }
3625
0
3626
0
    // If there's nothing shadowed for this particular sl2e, then
3627
0
    // there is no need to do an invlpg, either...
3628
0
    //
3629
0
    if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
3630
0
        return false;
3631
0
3632
0
    // Check to see if the SL2 is a splintered superpage...
3633
0
    // If so, then we'll need to flush the entire TLB (because that's
3634
0
    // easier than invalidating all of the individual 4K pages).
3635
0
    //
3636
0
    sl1mfn = shadow_l2e_get_mfn(sl2e);
3637
0
    if ( mfn_to_page(sl1mfn)->u.sh.type
3638
0
         == SH_type_fl1_shadow )
3639
0
    {
3640
0
        flush_tlb_local();
3641
0
        return false;
3642
0
    }
3643
0
3644
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
3645
0
    /* Check to see if the SL1 is out of sync. */
3646
0
    {
3647
0
        struct domain *d = v->domain;
3648
0
        mfn_t gl1mfn = backpointer(mfn_to_page(sl1mfn));
3649
0
        struct page_info *pg = mfn_to_page(gl1mfn);
3650
0
        if ( mfn_valid(gl1mfn)
3651
0
             && page_is_out_of_sync(pg) )
3652
0
        {
3653
0
            /* The test above may give false positives, since we don't
3654
0
             * hold the paging lock yet.  Check again with the lock held. */
3655
0
            paging_lock(d);
3656
0
3657
0
            /* This must still be a copy-from-user because we didn't
3658
0
             * have the paging lock last time we checked, and the
3659
0
             * higher-level shadows might have disappeared under our
3660
0
             * feet. */
3661
0
            if ( __copy_from_user(&sl2e,
3662
0
                                  sh_linear_l2_table(v)
3663
0
                                  + shadow_l2_linear_offset(va),
3664
0
                                  sizeof (sl2e)) != 0 )
3665
0
            {
3666
0
                perfc_incr(shadow_invlpg_fault);
3667
0
                paging_unlock(d);
3668
0
                return false;
3669
0
            }
3670
0
3671
0
            if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
3672
0
            {
3673
0
                paging_unlock(d);
3674
0
                return false;
3675
0
            }
3676
0
3677
0
            sl1mfn = shadow_l2e_get_mfn(sl2e);
3678
0
            gl1mfn = backpointer(mfn_to_page(sl1mfn));
3679
0
            pg = mfn_to_page(gl1mfn);
3680
0
3681
0
            if ( likely(sh_mfn_is_a_page_table(gl1mfn)
3682
0
                        && page_is_out_of_sync(pg) ) )
3683
0
            {
3684
0
                shadow_l1e_t *sl1;
3685
0
                sl1 = sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
3686
0
                /* Remove the shadow entry that maps this VA */
3687
0
                (void) shadow_set_l1e(d, sl1, shadow_l1e_empty(),
3688
0
                                      p2m_invalid, sl1mfn);
3689
0
            }
3690
0
            paging_unlock(d);
3691
0
            /* Need the invlpg, to pick up the disappeareance of the sl1e */
3692
0
            return true;
3693
0
        }
3694
0
    }
3695
0
#endif
3696
0
3697
0
    return true;
3698
0
}
Unexecuted instantiation: multi.c:sh_invlpg__guest_4
Unexecuted instantiation: multi.c:sh_invlpg__guest_2
Unexecuted instantiation: multi.c:sh_invlpg__guest_3
3699
0
3700
0
3701
0
static unsigned long
3702
0
sh_gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
3703
0
    unsigned long va, uint32_t *pfec)
3704
0
/* Called to translate a guest virtual address to what the *guest*
3705
0
 * pagetables would map it to. */
3706
0
{
3707
0
    walk_t gw;
3708
0
    gfn_t gfn;
3709
0
    bool walk_ok;
3710
0
3711
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3712
0
    /* Check the vTLB cache first */
3713
0
    unsigned long vtlb_gfn = vtlb_lookup(v, va, *pfec);
3714
0
    if ( vtlb_gfn != gfn_x(INVALID_GFN) )
3715
0
        return vtlb_gfn;
3716
0
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
3717
0
3718
0
    if ( !(walk_ok = sh_walk_guest_tables(v, va, &gw, *pfec)) )
3719
0
    {
3720
0
        *pfec = gw.pfec;
3721
0
        return gfn_x(INVALID_GFN);
3722
0
    }
3723
0
    gfn = guest_walk_to_gfn(&gw);
3724
0
3725
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3726
0
    /* Remember this successful VA->GFN translation for later. */
3727
0
    vtlb_insert(v, va >> PAGE_SHIFT, gfn_x(gfn), *pfec);
3728
0
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
3729
0
3730
0
    return gfn_x(gfn);
3731
0
}
Unexecuted instantiation: multi.c:sh_gva_to_gfn__guest_2
Unexecuted instantiation: multi.c:sh_gva_to_gfn__guest_3
Unexecuted instantiation: multi.c:sh_gva_to_gfn__guest_4
3732
0
3733
0
3734
0
static inline void
3735
0
sh_update_linear_entries(struct vcpu *v)
3736
0
/* Sync up all the linear mappings for this vcpu's pagetables */
3737
0
{
3738
0
    struct domain *d = v->domain;
3739
0
3740
0
    /* Linear pagetables in PV guests
3741
0
     * ------------------------------
3742
0
     *
3743
0
     * Guest linear pagetables, which map the guest pages, are at
3744
0
     * LINEAR_PT_VIRT_START.  Shadow linear pagetables, which map the
3745
0
     * shadows, are at SH_LINEAR_PT_VIRT_START.  Most of the time these
3746
0
     * are set up at shadow creation time, but (of course!) the PAE case
3747
0
     * is subtler.  Normal linear mappings are made by having an entry
3748
0
     * in the top-level table that points to itself (shadow linear) or
3749
0
     * to the guest top-level table (guest linear).  For PAE, to set up
3750
0
     * a linear map requires us to copy the four top-level entries into
3751
0
     * level-2 entries.  That means that every time we change a PAE l3e,
3752
0
     * we need to reflect the change into the copy.
3753
0
     *
3754
0
     * Linear pagetables in HVM guests
3755
0
     * -------------------------------
3756
0
     *
3757
0
     * For HVM guests, the linear pagetables are installed in the monitor
3758
0
     * tables (since we can't put them in the shadow).  Shadow linear
3759
0
     * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
3760
0
     * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for
3761
0
     * a linear pagetable of the monitor tables themselves.  We have
3762
0
     * the same issue of having to re-copy PAE l3 entries whevever we use
3763
0
     * PAE shadows.
3764
0
     *
3765
0
     * Because HVM guests run on the same monitor tables regardless of the
3766
0
     * shadow tables in use, the linear mapping of the shadow tables has to
3767
0
     * be updated every time v->arch.shadow_table changes.
3768
0
     */
3769
0
3770
0
    /* Don't try to update the monitor table if it doesn't exist */
3771
0
    if ( shadow_mode_external(d)
3772
0
         && pagetable_get_pfn(v->arch.monitor_table) == 0 )
3773
0
        return;
3774
0
3775
0
#if SHADOW_PAGING_LEVELS == 4
3776
0
3777
0
    /* For PV, one l4e points at the guest l4, one points at the shadow
3778
0
     * l4.  No maintenance required.
3779
0
     * For HVM, just need to update the l4e that points to the shadow l4. */
3780
0
3781
0
    if ( shadow_mode_external(d) )
3782
0
    {
3783
0
        /* Use the linear map if we can; otherwise make a new mapping */
3784
0
        if ( v == current )
3785
0
        {
3786
0
            __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] =
3787
0
                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
3788
0
                             __PAGE_HYPERVISOR_RW);
3789
0
        }
3790
0
        else
3791
0
        {
3792
0
            l4_pgentry_t *ml4e;
3793
0
            ml4e = map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3794
0
            ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
3795
0
                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
3796
0
                             __PAGE_HYPERVISOR_RW);
3797
0
            unmap_domain_page(ml4e);
3798
0
        }
3799
0
    }
3800
0
3801
0
#elif SHADOW_PAGING_LEVELS == 3
3802
0
3803
0
    /* PV: XXX
3804
0
     *
3805
0
     * HVM: To give ourselves a linear map of the  shadows, we need to
3806
0
     * extend a PAE shadow to 4 levels.  We do this by  having a monitor
3807
0
     * l3 in slot 0 of the monitor l4 table, and  copying the PAE l3
3808
0
     * entries into it.  Then, by having the monitor l4e for shadow
3809
0
     * pagetables also point to the monitor l4, we can use it to access
3810
0
     * the shadows.
3811
0
     */
3812
0
3813
0
    if ( shadow_mode_external(d) )
3814
0
    {
3815
0
        /* Install copies of the shadow l3es into the monitor l2 table
3816
0
         * that maps SH_LINEAR_PT_VIRT_START. */
3817
0
        shadow_l3e_t *sl3e;
3818
0
        l2_pgentry_t *ml2e;
3819
0
        int i;
3820
0
3821
0
        /* Use linear mappings if we can; otherwise make new mappings */
3822
0
        if ( v == current )
3823
0
            ml2e = __linear_l2_table
3824
0
                + l2_linear_offset(SH_LINEAR_PT_VIRT_START);
3825
0
        else
3826
0
        {
3827
0
            mfn_t l3mfn, l2mfn;
3828
0
            l4_pgentry_t *ml4e;
3829
0
            l3_pgentry_t *ml3e;
3830
0
            int linear_slot = shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
3831
0
            ml4e = map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3832
0
3833
0
            ASSERT(l4e_get_flags(ml4e[linear_slot]) & _PAGE_PRESENT);
3834
0
            l3mfn = l4e_get_mfn(ml4e[linear_slot]);
3835
0
            ml3e = map_domain_page(l3mfn);
3836
0
            unmap_domain_page(ml4e);
3837
0
3838
0
            ASSERT(l3e_get_flags(ml3e[0]) & _PAGE_PRESENT);
3839
0
            l2mfn = l3e_get_mfn(ml3e[0]);
3840
0
            ml2e = map_domain_page(l2mfn);
3841
0
            unmap_domain_page(ml3e);
3842
0
        }
3843
0
3844
0
        /* Shadow l3 tables are made up by sh_update_cr3 */
3845
0
        sl3e = v->arch.paging.shadow.l3table;
3846
0
3847
0
        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3848
0
        {
3849
0
            ml2e[i] =
3850
0
                (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT)
3851
0
                ? l2e_from_mfn(shadow_l3e_get_mfn(sl3e[i]),
3852
0
                               __PAGE_HYPERVISOR_RW)
3853
0
                : l2e_empty();
3854
0
        }
3855
0
3856
0
        if ( v != current )
3857
0
            unmap_domain_page(ml2e);
3858
0
    }
3859
0
    else
3860
0
        domain_crash(d); /* XXX */
3861
0
3862
0
#else
3863
0
#error this should not happen
3864
0
#endif
3865
0
3866
0
    if ( shadow_mode_external(d) )
3867
0
    {
3868
0
        /*
3869
0
         * Having modified the linear pagetable mapping, flush local host TLBs.
3870
0
         * This was not needed when vmenter/vmexit always had the side effect
3871
0
         * of flushing host TLBs but, with ASIDs, it is possible to finish
3872
0
         * this CR3 update, vmenter the guest, vmexit due to a page fault,
3873
0
         * without an intervening host TLB flush. Then the page fault code
3874
0
         * could use the linear pagetable to read a top-level shadow page
3875
0
         * table entry. But, without this change, it would fetch the wrong
3876
0
         * value due to a stale TLB.
3877
0
         */
3878
0
        flush_tlb_local();
3879
0
    }
3880
0
}
3881
0
3882
0
3883
0
/* Removes vcpu->arch.paging.shadow.guest_vtable and vcpu->arch.shadow_table[].
3884
0
 * Does all appropriate management/bookkeeping/refcounting/etc...
3885
0
 */
3886
0
static void
3887
0
sh_detach_old_tables(struct vcpu *v)
3888
0
{
3889
0
    struct domain *d = v->domain;
3890
0
    mfn_t smfn;
3891
0
    int i = 0;
3892
0
3893
0
    ////
3894
0
    //// vcpu->arch.paging.shadow.guest_vtable
3895
0
    ////
3896
0
3897
0
#if GUEST_PAGING_LEVELS == 3
3898
0
    /* PAE guests don't have a mapping of the guest top-level table */
3899
0
    ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
3900
0
#else
3901
0
    if ( v->arch.paging.shadow.guest_vtable )
3902
0
    {
3903
0
        if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3904
0
            unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
3905
0
        v->arch.paging.shadow.guest_vtable = NULL;
3906
0
    }
3907
0
#endif // !NDEBUG
3908
0
3909
0
3910
0
    ////
3911
0
    //// vcpu->arch.shadow_table[]
3912
0
    ////
3913
0
3914
0
#if GUEST_PAGING_LEVELS == 3
3915
0
    /* PAE guests have four shadow_table entries */
3916
0
    for ( i = 0 ; i < 4 ; i++ )
3917
0
#endif
3918
0
    {
3919
0
        smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
3920
0
        if ( mfn_x(smfn) )
3921
0
            sh_put_ref(d, smfn, 0);
3922
0
        v->arch.shadow_table[i] = pagetable_null();
3923
0
    }
3924
0
}
Unexecuted instantiation: multi.c:sh_detach_old_tables__guest_3
Unexecuted instantiation: multi.c:sh_detach_old_tables__guest_2
Unexecuted instantiation: multi.c:sh_detach_old_tables__guest_4
3925
0
3926
0
/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
3927
0
static void
3928
0
sh_set_toplevel_shadow(struct vcpu *v,
3929
0
                       int slot,
3930
0
                       mfn_t gmfn,
3931
0
                       unsigned int root_type)
3932
0
{
3933
0
    mfn_t smfn;
3934
0
    pagetable_t old_entry, new_entry;
3935
0
3936
0
    struct domain *d = v->domain;
3937
0
3938
0
    /* Remember the old contents of this slot */
3939
0
    old_entry = v->arch.shadow_table[slot];
3940
0
3941
0
    /* Now figure out the new contents: is this a valid guest MFN? */
3942
0
    if ( !mfn_valid(gmfn) )
3943
0
    {
3944
0
        new_entry = pagetable_null();
3945
0
        goto install_new_entry;
3946
0
    }
3947
0
3948
0
    /* Guest mfn is valid: shadow it and install the shadow */
3949
0
    smfn = get_shadow_status(d, gmfn, root_type);
3950
0
    if ( !mfn_valid(smfn) )
3951
0
    {
3952
0
        /* Make sure there's enough free shadow memory. */
3953
0
        shadow_prealloc(d, root_type, 1);
3954
0
        /* Shadow the page. */
3955
0
        smfn = sh_make_shadow(v, gmfn, root_type);
3956
0
    }
3957
0
    ASSERT(mfn_valid(smfn));
3958
0
3959
0
    /* Pin the shadow and put it (back) on the list of pinned shadows */
3960
0
    if ( sh_pin(d, smfn) == 0 )
3961
0
    {
3962
0
        SHADOW_ERROR("can't pin %#lx as toplevel shadow\n", mfn_x(smfn));
3963
0
        domain_crash(d);
3964
0
    }
3965
0
3966
0
    /* Take a ref to this page: it will be released in sh_detach_old_tables()
3967
0
     * or the next call to set_toplevel_shadow() */
3968
0
    if ( !sh_get_ref(d, smfn, 0) )
3969
0
    {
3970
0
        SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn));
3971
0
        domain_crash(d);
3972
0
    }
3973
0
3974
0
    new_entry = pagetable_from_mfn(smfn);
3975
0
3976
0
 install_new_entry:
3977
0
    /* Done.  Install it */
3978
0
    SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n",
3979
0
                  GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
3980
0
                  mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry)));
3981
0
    v->arch.shadow_table[slot] = new_entry;
3982
0
3983
0
    /* Decrement the refcount of the old contents of this slot */
3984
0
    if ( !pagetable_is_null(old_entry) ) {
3985
0
        mfn_t old_smfn = pagetable_get_mfn(old_entry);
3986
0
        /* Need to repin the old toplevel shadow if it's been unpinned
3987
0
         * by shadow_prealloc(): in PV mode we're still running on this
3988
0
         * shadow and it's not safe to free it yet. */
3989
0
        if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) )
3990
0
        {
3991
0
            SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn));
3992
0
            domain_crash(d);
3993
0
        }
3994
0
        sh_put_ref(d, old_smfn, 0);
3995
0
    }
3996
0
}
3997
0
3998
0
3999
0
static void
4000
0
sh_update_cr3(struct vcpu *v, int do_locking)
4001
0
/* Updates vcpu->arch.cr3 after the guest has changed CR3.
4002
0
 * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
4003
0
 * if appropriate).
4004
0
 * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works;
4005
0
 * this function will call hvm_update_guest_cr(v, 3) to tell them where the
4006
0
 * shadow tables are.
4007
0
 * If do_locking != 0, assume we are being called from outside the
4008
0
 * shadow code, and must take and release the paging lock; otherwise
4009
0
 * that is the caller's responsibility.
4010
0
 */
4011
0
{
4012
0
    struct domain *d = v->domain;
4013
0
    mfn_t gmfn;
4014
0
#if GUEST_PAGING_LEVELS == 3
4015
0
    guest_l3e_t *gl3e;
4016
0
    u32 guest_idx=0;
4017
0
    int i;
4018
0
#endif
4019
0
4020
0
    /* Don't do anything on an uninitialised vcpu */
4021
0
    if ( is_pv_domain(d) && !v->is_initialised )
4022
0
    {
4023
0
        ASSERT(v->arch.cr3 == 0);
4024
0
        return;
4025
0
    }
4026
0
4027
0
    if ( do_locking ) paging_lock(v->domain);
4028
0
4029
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
4030
0
    /* Need to resync all the shadow entries on a TLB flush.  Resync
4031
0
     * current vcpus OOS pages before switching to the new shadow
4032
0
     * tables so that the VA hint is still valid.  */
4033
0
    shadow_resync_current_vcpu(v);
4034
0
#endif
4035
0
4036
0
    ASSERT(paging_locked_by_me(v->domain));
4037
0
    ASSERT(v->arch.paging.mode);
4038
0
4039
0
    ////
4040
0
    //// vcpu->arch.guest_table is already set
4041
0
    ////
4042
0
4043
0
#ifndef NDEBUG
4044
0
    /* Double-check that the HVM code has sent us a sane guest_table */
4045
0
    if ( is_hvm_domain(d) )
4046
0
    {
4047
0
        ASSERT(shadow_mode_external(d));
4048
0
        if ( hvm_paging_enabled(v) )
4049
0
            ASSERT(pagetable_get_pfn(v->arch.guest_table));
4050
0
        else
4051
0
            ASSERT(v->arch.guest_table.pfn
4052
0
                   == d->arch.paging.shadow.unpaged_pagetable.pfn);
4053
0
    }
4054
0
#endif
4055
0
4056
0
    SHADOW_PRINTK("%pv guest_table=%"PRI_mfn"\n",
4057
0
                  v, (unsigned long)pagetable_get_pfn(v->arch.guest_table));
4058
0
4059
0
#if GUEST_PAGING_LEVELS == 4
4060
0
    if ( !(v->arch.flags & TF_kernel_mode) && !is_pv_32bit_domain(d) )
4061
0
        gmfn = pagetable_get_mfn(v->arch.guest_table_user);
4062
0
    else
4063
0
#endif
4064
0
        gmfn = pagetable_get_mfn(v->arch.guest_table);
4065
0
4066
0
4067
0
    ////
4068
0
    //// vcpu->arch.paging.shadow.guest_vtable
4069
0
    ////
4070
0
#if GUEST_PAGING_LEVELS == 4
4071
0
    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
4072
0
    {
4073
0
        if ( v->arch.paging.shadow.guest_vtable )
4074
0
            unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
4075
0
        v->arch.paging.shadow.guest_vtable = map_domain_page_global(gmfn);
4076
0
        /* PAGING_LEVELS==4 implies 64-bit, which means that
4077
0
         * map_domain_page_global can't fail */
4078
0
        BUG_ON(v->arch.paging.shadow.guest_vtable == NULL);
4079
0
    }
4080
0
    else
4081
0
        v->arch.paging.shadow.guest_vtable = __linear_l4_table;
4082
0
#elif GUEST_PAGING_LEVELS == 3
4083
0
     /* On PAE guests we don't use a mapping of the guest's own top-level
4084
0
      * table.  We cache the current state of that table and shadow that,
4085
0
      * until the next CR3 write makes us refresh our cache. */
4086
0
     ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
4087
0
4088
0
     if ( shadow_mode_external(d) )
4089
0
         /* Find where in the page the l3 table is */
4090
0
         guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]);
4091
0
     else
4092
0
         /* PV guest: l3 is at the start of a page */
4093
0
         guest_idx = 0;
4094
0
4095
0
     // Ignore the low 2 bits of guest_idx -- they are really just
4096
0
     // cache control.
4097
0
     guest_idx &= ~3;
4098
0
4099
0
     gl3e = ((guest_l3e_t *)map_domain_page(gmfn)) + guest_idx;
4100
0
     for ( i = 0; i < 4 ; i++ )
4101
0
         v->arch.paging.shadow.gl3e[i] = gl3e[i];
4102
0
     unmap_domain_page(gl3e);
4103
0
#elif GUEST_PAGING_LEVELS == 2
4104
0
    if ( shadow_mode_external(d) || shadow_mode_translate(d) )
4105
0
    {
4106
0
        if ( v->arch.paging.shadow.guest_vtable )
4107
0
            unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
4108
0
        v->arch.paging.shadow.guest_vtable = map_domain_page_global(gmfn);
4109
0
        /* Does this really need map_domain_page_global?  Handle the
4110
0
         * error properly if so. */
4111
0
        BUG_ON(v->arch.paging.shadow.guest_vtable == NULL); /* XXX */
4112
0
    }
4113
0
    else
4114
0
        v->arch.paging.shadow.guest_vtable = __linear_l2_table;
4115
0
#else
4116
0
#error this should never happen
4117
0
#endif
4118
0
4119
0
4120
0
    ////
4121
0
    //// vcpu->arch.shadow_table[]
4122
0
    ////
4123
0
4124
0
    /* We revoke write access to the new guest toplevel page(s) before we
4125
0
     * replace the old shadow pagetable(s), so that we can safely use the
4126
0
     * (old) shadow linear maps in the writeable mapping heuristics. */
4127
0
#if GUEST_PAGING_LEVELS == 2
4128
0
    if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
4129
0
        flush_tlb_mask(d->domain_dirty_cpumask);
4130
0
    sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow);
4131
0
#elif GUEST_PAGING_LEVELS == 3
4132
0
    /* PAE guests have four shadow_table entries, based on the
4133
0
     * current values of the guest's four l3es. */
4134
0
    {
4135
0
        int flush = 0;
4136
0
        gfn_t gl2gfn;
4137
0
        mfn_t gl2mfn;
4138
0
        p2m_type_t p2mt;
4139
0
        guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
4140
0
        /* First, make all four entries read-only. */
4141
0
        for ( i = 0; i < 4; i++ )
4142
0
        {
4143
0
            if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
4144
0
            {
4145
0
                gl2gfn = guest_l3e_get_gfn(gl3e[i]);
4146
0
                gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
4147
0
                if ( p2m_is_ram(p2mt) )
4148
0
                    flush |= sh_remove_write_access(d, gl2mfn, 2, 0);
4149
0
            }
4150
0
        }
4151
0
        if ( flush )
4152
0
            flush_tlb_mask(d->domain_dirty_cpumask);
4153
0
        /* Now install the new shadows. */
4154
0
        for ( i = 0; i < 4; i++ )
4155
0
        {
4156
0
            if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
4157
0
            {
4158
0
                gl2gfn = guest_l3e_get_gfn(gl3e[i]);
4159
0
                gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
4160
0
                if ( p2m_is_ram(p2mt) )
4161
0
                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
4162
0
                                           ? SH_type_l2h_shadow
4163
0
                                           : SH_type_l2_shadow);
4164
0
                else
4165
0
                    sh_set_toplevel_shadow(v, i, INVALID_MFN, 0);
4166
0
            }
4167
0
            else
4168
0
                sh_set_toplevel_shadow(v, i, INVALID_MFN, 0);
4169
0
        }
4170
0
    }
4171
0
#elif GUEST_PAGING_LEVELS == 4
4172
0
    if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
4173
0
        flush_tlb_mask(d->domain_dirty_cpumask);
4174
0
    sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
4175
0
    if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
4176
0
    {
4177
0
        mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]);
4178
0
4179
0
        if ( !(v->arch.flags & TF_kernel_mode) && VM_ASSIST(d, m2p_strict) )
4180
0
            zap_ro_mpt(smfn);
4181
0
        else if ( (v->arch.flags & TF_kernel_mode) &&
4182
0
                  !VM_ASSIST(d, m2p_strict) )
4183
0
            fill_ro_mpt(smfn);
4184
0
    }
4185
0
#else
4186
0
#error This should never happen
4187
0
#endif
4188
0
4189
0
4190
0
    ///
4191
0
    /// v->arch.paging.shadow.l3table
4192
0
    ///
4193
0
#if SHADOW_PAGING_LEVELS == 3
4194
0
        {
4195
0
            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]);
4196
0
            int i;
4197
0
            for ( i = 0; i < 4; i++ )
4198
0
            {
4199
0
#if GUEST_PAGING_LEVELS == 2
4200
0
                /* 2-on-3: make a PAE l3 that points at the four-page l2 */
4201
0
                if ( i != 0 )
4202
0
                    smfn = sh_next_page(smfn);
4203
0
#else
4204
0
                /* 3-on-3: make a PAE l3 that points at the four l2 pages */
4205
0
                smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
4206
0
#endif
4207
0
                v->arch.paging.shadow.l3table[i] =
4208
0
                    (mfn_x(smfn) == 0)
4209
0
                    ? shadow_l3e_empty()
4210
0
                    : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
4211
0
            }
4212
0
        }
4213
0
#endif /* SHADOW_PAGING_LEVELS == 3 */
4214
0
4215
0
4216
0
    ///
4217
0
    /// v->arch.cr3
4218
0
    ///
4219
0
    if ( shadow_mode_external(d) )
4220
0
    {
4221
0
        make_cr3(v, pagetable_get_mfn(v->arch.monitor_table));
4222
0
    }
4223
0
    else // not shadow_mode_external...
4224
0
    {
4225
0
        /* We don't support PV except guest == shadow == config levels */
4226
0
        BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
4227
0
#if SHADOW_PAGING_LEVELS == 3
4228
0
        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated.
4229
0
         * Don't use make_cr3 because (a) we know it's below 4GB, and
4230
0
         * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */
4231
0
        ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL);
4232
0
        v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table);
4233
0
#else
4234
0
        /* 4-on-4: Just use the shadow top-level directly */
4235
0
        make_cr3(v, pagetable_get_mfn(v->arch.shadow_table[0]));
4236
0
#endif
4237
0
    }
4238
0
4239
0
4240
0
    ///
4241
0
    /// v->arch.hvm_vcpu.hw_cr[3]
4242
0
    ///
4243
0
    if ( shadow_mode_external(d) )
4244
0
    {
4245
0
        ASSERT(is_hvm_domain(d));
4246
0
#if SHADOW_PAGING_LEVELS == 3
4247
0
        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
4248
0
        v->arch.hvm_vcpu.hw_cr[3] =
4249
0
            virt_to_maddr(&v->arch.paging.shadow.l3table);
4250
0
#else
4251
0
        /* 4-on-4: Just use the shadow top-level directly */
4252
0
        v->arch.hvm_vcpu.hw_cr[3] =
4253
0
            pagetable_get_paddr(v->arch.shadow_table[0]);
4254
0
#endif
4255
0
        hvm_update_guest_cr(v, 3);
4256
0
    }
4257
0
4258
0
    /* Fix up the linear pagetable mappings */
4259
0
    sh_update_linear_entries(v);
4260
0
4261
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
4262
0
    /* No longer safe to use cached gva->gfn translations */
4263
0
    vtlb_flush(v);
4264
0
#endif
4265
0
4266
0
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
4267
0
    v->arch.paging.last_write_emul_ok = 0;
4268
0
#endif
4269
0
4270
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
4271
0
    /* Need to resync all the shadow entries on a TLB flush. We only
4272
0
     * update the shadows, leaving the pages out of sync. Also, we try
4273
0
     * to skip synchronization of shadows not mapped in the new
4274
0
     * tables. */
4275
0
    shadow_sync_other_vcpus(v);
4276
0
#endif
4277
0
4278
0
    /* Release the lock, if we took it (otherwise it's the caller's problem) */
4279
0
    if ( do_locking ) paging_unlock(v->domain);
4280
0
}
Unexecuted instantiation: multi.c:sh_update_cr3__guest_2
Unexecuted instantiation: multi.c:sh_update_cr3__guest_4
Unexecuted instantiation: multi.c:sh_update_cr3__guest_3
4281
0
4282
0
4283
0
/**************************************************************************/
4284
0
/* Functions to revoke guest rights */
4285
0
4286
0
#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
4287
0
int sh_rm_write_access_from_sl1p(struct domain *d, mfn_t gmfn,
4288
0
                                 mfn_t smfn, unsigned long off)
4289
0
{
4290
0
    struct vcpu *curr = current;
4291
0
    int r;
4292
0
    shadow_l1e_t *sl1p, sl1e;
4293
0
    struct page_info *sp;
4294
0
4295
0
    ASSERT(mfn_valid(gmfn));
4296
0
    ASSERT(mfn_valid(smfn));
4297
0
4298
0
    /* Remember if we've been told that this process is being torn down */
4299
0
    if ( curr->domain == d )
4300
0
        curr->arch.paging.shadow.pagetable_dying
4301
0
            = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying);
4302
0
4303
0
    sp = mfn_to_page(smfn);
4304
0
4305
0
    if ( ((sp->count_info & PGC_count_mask) != 0)
4306
0
         || (sp->u.sh.type != SH_type_l1_shadow
4307
0
             && sp->u.sh.type != SH_type_fl1_shadow) )
4308
0
        goto fail;
4309
0
4310
0
    sl1p = map_domain_page(smfn);
4311
0
    sl1p += off;
4312
0
    sl1e = *sl1p;
4313
0
    if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
4314
0
          != (_PAGE_PRESENT|_PAGE_RW))
4315
0
         || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
4316
0
    {
4317
0
        unmap_domain_page(sl1p);
4318
0
        goto fail;
4319
0
    }
4320
0
4321
0
    /* Found it!  Need to remove its write permissions. */
4322
0
    sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
4323
0
    r = shadow_set_l1e(d, sl1p, sl1e, p2m_ram_rw, smfn);
4324
0
    ASSERT( !(r & SHADOW_SET_ERROR) );
4325
0
4326
0
    unmap_domain_page(sl1p);
4327
0
    perfc_incr(shadow_writeable_h_7);
4328
0
    return 1;
4329
0
4330
0
 fail:
4331
0
    perfc_incr(shadow_writeable_h_8);
4332
0
    return 0;
4333
0
}
Unexecuted instantiation: sh_rm_write_access_from_sl1p__guest_4
Unexecuted instantiation: sh_rm_write_access_from_sl1p__guest_3
Unexecuted instantiation: sh_rm_write_access_from_sl1p__guest_2
4334
0
#endif /* OOS */
4335
0
4336
0
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
4337
0
static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
4338
0
/* Look up this vaddr in the current shadow and see if it's a writeable
4339
0
 * mapping of this gmfn.  If so, remove it.  Returns 1 if it worked. */
4340
0
{
4341
0
    struct domain *d = v->domain;
4342
0
    shadow_l1e_t sl1e, *sl1p;
4343
0
    shadow_l2e_t *sl2p;
4344
0
    shadow_l3e_t *sl3p;
4345
0
#if SHADOW_PAGING_LEVELS >= 4
4346
0
    shadow_l4e_t *sl4p;
4347
0
#endif
4348
0
    mfn_t sl1mfn;
4349
0
    int r;
4350
0
4351
0
    /* Carefully look in the shadow linear map for the l1e we expect */
4352
0
#if SHADOW_PAGING_LEVELS >= 4
4353
0
    /* Is a shadow linear map is installed in the first place? */
4354
0
    sl4p  = v->arch.paging.shadow.guest_vtable;
4355
0
    sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
4356
0
    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
4357
0
        return 0;
4358
0
    sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
4359
0
    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
4360
0
        return 0;
4361
0
    sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
4362
0
    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
4363
0
        return 0;
4364
0
#else /* SHADOW_PAGING_LEVELS == 3 */
4365
0
    sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table)
4366
0
        + shadow_l3_linear_offset(vaddr);
4367
0
    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
4368
0
        return 0;
4369
0
#endif
4370
0
    sl2p = sh_linear_l2_table(v) + shadow_l2_linear_offset(vaddr);
4371
0
    if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) )
4372
0
        return 0;
4373
0
    sl1p = sh_linear_l1_table(v) + shadow_l1_linear_offset(vaddr);
4374
0
    sl1e = *sl1p;
4375
0
    if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
4376
0
          != (_PAGE_PRESENT|_PAGE_RW))
4377
0
         || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
4378
0
        return 0;
4379
0
4380
0
    /* Found it!  Need to remove its write permissions. */
4381
0
    sl1mfn = shadow_l2e_get_mfn(*sl2p);
4382
0
    sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
4383
0
    r = shadow_set_l1e(d, sl1p, sl1e, p2m_ram_rw, sl1mfn);
4384
0
    if ( r & SHADOW_SET_ERROR ) {
4385
0
        /* Can only currently happen if we found a grant-mapped
4386
0
         * page.  Just make the guess fail. */
4387
0
        return 0;
4388
0
    }
4389
0
    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);
4390
0
    return 1;
4391
0
}
Unexecuted instantiation: multi.c:sh_guess_wrmap__guest_4
Unexecuted instantiation: multi.c:sh_guess_wrmap__guest_2
Unexecuted instantiation: multi.c:sh_guess_wrmap__guest_3
4392
0
#endif
4393
0
4394
0
int sh_rm_write_access_from_l1(struct domain *d, mfn_t sl1mfn,
4395
0
                               mfn_t readonly_mfn)
4396
0
/* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
4397
0
{
4398
0
    shadow_l1e_t *sl1e;
4399
0
    int done = 0;
4400
0
    int flags;
4401
0
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
4402
0
    struct vcpu *curr = current;
4403
0
    mfn_t base_sl1mfn = sl1mfn; /* Because sl1mfn changes in the foreach */
4404
0
#endif
4405
0
4406
0
    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done,
4407
0
    {
4408
0
        flags = shadow_l1e_get_flags(*sl1e);
4409
0
        if ( (flags & _PAGE_PRESENT)
4410
0
             && (flags & _PAGE_RW)
4411
0
             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
4412
0
        {
4413
0
            shadow_l1e_t ro_sl1e = shadow_l1e_remove_flags(*sl1e, _PAGE_RW);
4414
0
            (void) shadow_set_l1e(d, sl1e, ro_sl1e, p2m_ram_rw, sl1mfn);
4415
0
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
4416
0
            /* Remember the last shadow that we shot a writeable mapping in */
4417
0
            if ( curr->domain == d )
4418
0
                curr->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn);
4419
0
#endif
4420
0
            if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
4421
0
                  & PGT_count_mask) == 0 )
4422
0
                /* This breaks us cleanly out of the FOREACH macro */
4423
0
                done = 1;
4424
0
        }
4425
0
    });
4426
0
    return done;
4427
0
}
Unexecuted instantiation: sh_rm_write_access_from_l1__guest_3
Unexecuted instantiation: sh_rm_write_access_from_l1__guest_2
Unexecuted instantiation: sh_rm_write_access_from_l1__guest_4
4428
0
4429
0
4430
0
int sh_rm_mappings_from_l1(struct domain *d, mfn_t sl1mfn, mfn_t target_mfn)
4431
0
/* Excises all mappings to guest frame from this shadow l1 table */
4432
0
{
4433
0
    shadow_l1e_t *sl1e;
4434
0
    int done = 0;
4435
0
    int flags;
4436
0
4437
0
    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done,
4438
0
    {
4439
0
        flags = shadow_l1e_get_flags(*sl1e);
4440
0
        if ( (flags & _PAGE_PRESENT)
4441
0
             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
4442
0
        {
4443
0
            (void) shadow_set_l1e(d, sl1e, shadow_l1e_empty(),
4444
0
                                  p2m_invalid, sl1mfn);
4445
0
            if ( sh_check_page_has_no_refs(mfn_to_page(target_mfn)) )
4446
0
                /* This breaks us cleanly out of the FOREACH macro */
4447
0
                done = 1;
4448
0
        }
4449
0
    });
4450
0
    return done;
4451
0
}
Unexecuted instantiation: sh_rm_mappings_from_l1__guest_4
Unexecuted instantiation: sh_rm_mappings_from_l1__guest_2
Unexecuted instantiation: sh_rm_mappings_from_l1__guest_3
4452
0
4453
0
/**************************************************************************/
4454
0
/* Functions to excise all pointers to shadows from higher-level shadows. */
4455
0
4456
0
void sh_clear_shadow_entry(struct domain *d, void *ep, mfn_t smfn)
4457
0
/* Blank out a single shadow entry */
4458
0
{
4459
0
    switch ( mfn_to_page(smfn)->u.sh.type )
4460
0
    {
4461
0
    case SH_type_l1_shadow:
4462
0
        (void) shadow_set_l1e(d, ep, shadow_l1e_empty(), p2m_invalid, smfn);
4463
0
        break;
4464
0
    case SH_type_l2_shadow:
4465
0
#if GUEST_PAGING_LEVELS >= 3
4466
0
    case SH_type_l2h_shadow:
4467
0
#endif
4468
0
        (void) shadow_set_l2e(d, ep, shadow_l2e_empty(), smfn);
4469
0
        break;
4470
0
#if GUEST_PAGING_LEVELS >= 4
4471
0
    case SH_type_l3_shadow:
4472
0
        (void) shadow_set_l3e(d, ep, shadow_l3e_empty(), smfn);
4473
0
        break;
4474
0
    case SH_type_l4_shadow:
4475
0
        (void) shadow_set_l4e(d, ep, shadow_l4e_empty(), smfn);
4476
0
        break;
4477
0
#endif
4478
0
    default: BUG(); /* Called with the wrong kind of shadow. */
4479
0
    }
4480
0
}
Unexecuted instantiation: sh_clear_shadow_entry__guest_2
Unexecuted instantiation: sh_clear_shadow_entry__guest_3
Unexecuted instantiation: sh_clear_shadow_entry__guest_4
4481
0
4482
0
int sh_remove_l1_shadow(struct domain *d, mfn_t sl2mfn, mfn_t sl1mfn)
4483
0
/* Remove all mappings of this l1 shadow from this l2 shadow */
4484
0
{
4485
0
    shadow_l2e_t *sl2e;
4486
0
    int done = 0;
4487
0
    int flags;
4488
0
4489
0
    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, done, d,
4490
0
    {
4491
0
        flags = shadow_l2e_get_flags(*sl2e);
4492
0
        if ( (flags & _PAGE_PRESENT)
4493
0
             && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
4494
0
        {
4495
0
            (void) shadow_set_l2e(d, sl2e, shadow_l2e_empty(), sl2mfn);
4496
0
            if ( mfn_to_page(sl1mfn)->u.sh.type == 0 )
4497
0
                /* This breaks us cleanly out of the FOREACH macro */
4498
0
                done = 1;
4499
0
        }
4500
0
    });
4501
0
    return done;
4502
0
}
Unexecuted instantiation: sh_remove_l1_shadow__guest_2
Unexecuted instantiation: sh_remove_l1_shadow__guest_4
Unexecuted instantiation: sh_remove_l1_shadow__guest_3
4503
0
4504
0
#if GUEST_PAGING_LEVELS >= 4
4505
0
int sh_remove_l2_shadow(struct domain *d, mfn_t sl3mfn, mfn_t sl2mfn)
4506
0
/* Remove all mappings of this l2 shadow from this l3 shadow */
4507
0
{
4508
0
    shadow_l3e_t *sl3e;
4509
0
    int done = 0;
4510
0
    int flags;
4511
0
4512
0
    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, done,
4513
0
    {
4514
0
        flags = shadow_l3e_get_flags(*sl3e);
4515
0
        if ( (flags & _PAGE_PRESENT)
4516
0
             && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
4517
0
        {
4518
0
            (void) shadow_set_l3e(d, sl3e, shadow_l3e_empty(), sl3mfn);
4519
0
            if ( mfn_to_page(sl2mfn)->u.sh.type == 0 )
4520
0
                /* This breaks us cleanly out of the FOREACH macro */
4521
0
                done = 1;
4522
0
        }
4523
0
    });
4524
0
    return done;
4525
0
}
4526
0
4527
0
int sh_remove_l3_shadow(struct domain *d, mfn_t sl4mfn, mfn_t sl3mfn)
4528
0
/* Remove all mappings of this l3 shadow from this l4 shadow */
4529
0
{
4530
0
    shadow_l4e_t *sl4e;
4531
0
    int done = 0;
4532
0
    int flags;
4533
0
4534
0
    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, done, d,
4535
0
    {
4536
0
        flags = shadow_l4e_get_flags(*sl4e);
4537
0
        if ( (flags & _PAGE_PRESENT)
4538
0
             && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
4539
0
        {
4540
0
            (void) shadow_set_l4e(d, sl4e, shadow_l4e_empty(), sl4mfn);
4541
0
            if ( mfn_to_page(sl3mfn)->u.sh.type == 0 )
4542
0
                /* This breaks us cleanly out of the FOREACH macro */
4543
0
                done = 1;
4544
0
        }
4545
0
    });
4546
0
    return done;
4547
0
}
4548
0
#endif /* 64bit guest */
4549
0
4550
0
/**************************************************************************/
4551
0
/* Function for the guest to inform us that a process is being torn
4552
0
 * down.  We remember that as a hint to unshadow its pagetables soon,
4553
0
 * and in the meantime we unhook its top-level user-mode entries. */
4554
0
4555
0
#if GUEST_PAGING_LEVELS == 3
4556
0
static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
4557
0
{
4558
0
    struct domain *d = v->domain;
4559
0
    int i = 0;
4560
0
    int flush = 0;
4561
0
    int fast_path = 0;
4562
0
    paddr_t gcr3 = 0;
4563
0
    p2m_type_t p2mt;
4564
0
    char *gl3pa = NULL;
4565
0
    guest_l3e_t *gl3e = NULL;
4566
0
    unsigned long l3gfn;
4567
0
    mfn_t l3mfn;
4568
0
4569
0
    gcr3 = (v->arch.hvm_vcpu.guest_cr[3]);
4570
0
    /* fast path: the pagetable belongs to the current context */
4571
0
    if ( gcr3 == gpa )
4572
0
        fast_path = 1;
4573
0
4574
0
    l3gfn = gpa >> PAGE_SHIFT;
4575
0
    l3mfn = get_gfn_query(d, _gfn(l3gfn), &p2mt);
4576
0
    if ( !mfn_valid(l3mfn) || !p2m_is_ram(p2mt) )
4577
0
    {
4578
0
        printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %"PRIpaddr"\n",
4579
0
               gpa);
4580
0
        goto out_put_gfn;
4581
0
    }
4582
0
4583
0
    paging_lock(d);
4584
0
4585
0
    if ( !fast_path )
4586
0
    {
4587
0
        gl3pa = map_domain_page(l3mfn);
4588
0
        gl3e = (guest_l3e_t *)(gl3pa + ((unsigned long)gpa & ~PAGE_MASK));
4589
0
    }
4590
0
    for ( i = 0; i < 4; i++ )
4591
0
    {
4592
0
        mfn_t smfn, gmfn;
4593
0
4594
0
        if ( fast_path ) {
4595
0
            if ( pagetable_is_null(v->arch.shadow_table[i]) )
4596
0
                smfn = INVALID_MFN;
4597
0
            else
4598
0
                smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
4599
0
        }
4600
0
        else
4601
0
        {
4602
0
            /* retrieving the l2s */
4603
0
            gmfn = get_gfn_query_unlocked(d, gfn_x(guest_l3e_get_gfn(gl3e[i])),
4604
0
                                          &p2mt);
4605
0
            smfn = unlikely(mfn_eq(gmfn, INVALID_MFN))
4606
0
                   ? INVALID_MFN
4607
0
                   : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow);
4608
0
        }
4609
0
4610
0
        if ( mfn_valid(smfn) )
4611
0
        {
4612
0
            gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
4613
0
            mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
4614
0
            shadow_unhook_mappings(d, smfn, 1/* user pages only */);
4615
0
            flush = 1;
4616
0
        }
4617
0
    }
4618
0
    if ( flush )
4619
0
        flush_tlb_mask(d->domain_dirty_cpumask);
4620
0
4621
0
    /* Remember that we've seen the guest use this interface, so we
4622
0
     * can rely on it using it in future, instead of guessing at
4623
0
     * when processes are being torn down. */
4624
0
    d->arch.paging.shadow.pagetable_dying_op = 1;
4625
0
4626
0
    v->arch.paging.shadow.pagetable_dying = 1;
4627
0
4628
0
    if ( !fast_path )
4629
0
        unmap_domain_page(gl3pa);
4630
0
    paging_unlock(d);
4631
0
out_put_gfn:
4632
0
    put_gfn(d, l3gfn);
4633
0
}
4634
0
#else
4635
0
static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
4636
0
{
4637
0
    struct domain *d = v->domain;
4638
0
    mfn_t smfn, gmfn;
4639
0
    p2m_type_t p2mt;
4640
0
4641
0
    gmfn = get_gfn_query(d, _gfn(gpa >> PAGE_SHIFT), &p2mt);
4642
0
    paging_lock(d);
4643
0
4644
0
#if GUEST_PAGING_LEVELS == 2
4645
0
    smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_32_shadow);
4646
0
#else
4647
0
    smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow);
4648
0
#endif
4649
0
4650
0
    if ( mfn_valid(smfn) )
4651
0
    {
4652
0
        mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
4653
0
        shadow_unhook_mappings(d, smfn, 1/* user pages only */);
4654
0
        /* Now flush the TLB: we removed toplevel mappings. */
4655
0
        flush_tlb_mask(d->domain_dirty_cpumask);
4656
0
    }
4657
0
4658
0
    /* Remember that we've seen the guest use this interface, so we
4659
0
     * can rely on it using it in future, instead of guessing at
4660
0
     * when processes are being torn down. */
4661
0
    d->arch.paging.shadow.pagetable_dying_op = 1;
4662
0
4663
0
    v->arch.paging.shadow.pagetable_dying = 1;
4664
0
4665
0
    paging_unlock(d);
4666
0
    put_gfn(d, gpa >> PAGE_SHIFT);
4667
0
}
4668
0
#endif
4669
0
4670
0
/**************************************************************************/
4671
0
/* Handling guest writes to pagetables. */
4672
0
4673
0
/* Tidy up after the emulated write: mark pages dirty, verify the new
4674
0
 * contents, and undo the mapping */
4675
0
static void emulate_unmap_dest(struct vcpu *v,
4676
0
                               void *addr,
4677
0
                               u32 bytes,
4678
0
                               struct sh_emulate_ctxt *sh_ctxt)
4679
0
{
4680
0
    ASSERT(mfn_valid(sh_ctxt->mfn[0]));
4681
0
4682
0
    /* If we are writing lots of PTE-aligned zeros, might want to unshadow */
4683
0
    if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
4684
0
    {
4685
0
        if ( ((unsigned long) addr & ((sizeof (guest_intpte_t)) - 1)) == 0 )
4686
0
            check_for_early_unshadow(v, sh_ctxt->mfn[0]);
4687
0
        /* Don't reset the heuristic if we're writing zeros at non-aligned
4688
0
         * addresses, otherwise it doesn't catch REP MOVSD on PAE guests */
4689
0
    }
4690
0
    else
4691
0
        reset_early_unshadow(v);
4692
0
4693
0
    sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);
4694
0
}
4695
0
4696
0
static int
4697
0
sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
4698
0
                     u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
4699
0
{
4700
0
    void *addr;
4701
0
4702
0
    /* Unaligned writes are only acceptable on HVM */
4703
0
    if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v)  )
4704
0
        return X86EMUL_UNHANDLEABLE;
4705
0
4706
0
    addr = sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt);
4707
0
    if ( IS_ERR(addr) )
4708
0
        return ~PTR_ERR(addr);
4709
0
4710
0
    paging_lock(v->domain);
4711
0
    memcpy(addr, src, bytes);
4712
0
4713
0
    if ( tb_init_done )
4714
0
    {
4715
0
#if GUEST_PAGING_LEVELS == 3
4716
0
        if ( vaddr == this_cpu(trace_emulate_initial_va) )
4717
0
            memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
4718
0
        else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) )
4719
0
        {
4720
0
            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
4721
0
            memcpy(&this_cpu(trace_emulate_write_val),
4722
0
                   (void *)(((unsigned long) addr) & ~(0x7UL)), GUEST_PTE_SIZE);
4723
0
        }
4724
0
#else
4725
0
        memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
4726
0
#endif
4727
0
    }
4728
0
4729
0
    emulate_unmap_dest(v, addr, bytes, sh_ctxt);
4730
0
    shadow_audit_tables(v);
4731
0
    paging_unlock(v->domain);
4732
0
    return X86EMUL_OKAY;
4733
0
}
Unexecuted instantiation: multi.c:sh_x86_emulate_write__guest_4
Unexecuted instantiation: multi.c:sh_x86_emulate_write__guest_3
Unexecuted instantiation: multi.c:sh_x86_emulate_write__guest_2
4734
0
4735
0
static int
4736
0
sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
4737
0
                        unsigned long old, unsigned long new,
4738
0
                        unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
4739
0
{
4740
0
    void *addr;
4741
0
    unsigned long prev;
4742
0
    int rv = X86EMUL_OKAY;
4743
0
4744
0
    /* Unaligned writes are only acceptable on HVM */
4745
0
    if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v)  )
4746
0
        return X86EMUL_UNHANDLEABLE;
4747
0
4748
0
    addr = sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt);
4749
0
    if ( IS_ERR(addr) )
4750
0
        return ~PTR_ERR(addr);
4751
0
4752
0
    paging_lock(v->domain);
4753
0
    switch ( bytes )
4754
0
    {
4755
0
    case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
4756
0
    case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
4757
0
    case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
4758
0
    case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
4759
0
    default:
4760
0
        SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes);
4761
0
        prev = ~old;
4762
0
    }
4763
0
4764
0
    if ( prev != old )
4765
0
        rv = X86EMUL_RETRY;
4766
0
4767
0
    SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
4768
0
                  " wanted %#lx now %#lx bytes %u\n",
4769
0
                  vaddr, prev, old, new, *(unsigned long *)addr, bytes);
4770
0
4771
0
    emulate_unmap_dest(v, addr, bytes, sh_ctxt);
4772
0
    shadow_audit_tables(v);
4773
0
    paging_unlock(v->domain);
4774
0
    return rv;
4775
0
}
Unexecuted instantiation: multi.c:sh_x86_emulate_cmpxchg__guest_2
Unexecuted instantiation: multi.c:sh_x86_emulate_cmpxchg__guest_3
Unexecuted instantiation: multi.c:sh_x86_emulate_cmpxchg__guest_4
4776
0
4777
0
/**************************************************************************/
4778
0
/* Audit tools */
4779
0
4780
0
#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
4781
0
4782
0
#define AUDIT_FAIL(_level, _fmt, _a...) do {                            \
4783
0
    printk("Shadow %u-on-%u audit failed at level %i, index %i\n"       \
4784
0
           "gl" #_level "mfn = %" PRI_mfn                               \
4785
0
           " sl" #_level "mfn = %" PRI_mfn                              \
4786
0
           " &gl" #_level "e = %p &sl" #_level "e = %p"                 \
4787
0
           " gl" #_level "e = %" SH_PRI_gpte                            \
4788
0
           " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n",      \
4789
0
           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                   \
4790
0
               _level, guest_index(gl ## _level ## e),                  \
4791
0
               mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),  \
4792
0
               gl ## _level ## e, sl ## _level ## e,                    \
4793
0
               gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
4794
0
               ##_a);                                                   \
4795
0
        BUG();                                                          \
4796
0
        done = 1;                                                       \
4797
0
} while (0)
4798
0
4799
0
#define AUDIT_FAIL_MIN(_level, _fmt, _a...) do {                        \
4800
0
    printk("Shadow %u-on-%u audit failed at level %i\n"                 \
4801
0
           "gl" #_level "mfn = %" PRI_mfn                               \
4802
0
           " sl" #_level "mfn = %" PRI_mfn                              \
4803
0
           " Error: " _fmt "\n",                                        \
4804
0
           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                   \
4805
0
           _level,                                                      \
4806
0
           mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),      \
4807
0
           ##_a);                                                       \
4808
0
    BUG();                                                              \
4809
0
    done = 1;                                                           \
4810
0
} while (0)
4811
0
4812
0
static char * sh_audit_flags(struct vcpu *v, int level,
4813
0
                              int gflags, int sflags)
4814
0
/* Common code for auditing flag bits */
4815
0
{
4816
0
    if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) )
4817
0
        return "shadow is present but guest is not present";
4818
0
    if ( (sflags & _PAGE_GLOBAL) && !is_hvm_vcpu(v) )
4819
0
        return "global bit set in PV shadow";
4820
0
    if ( level == 2 && (sflags & _PAGE_PSE) )
4821
0
        return "PS bit set in shadow";
4822
0
#if SHADOW_PAGING_LEVELS == 3
4823
0
    if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */
4824
0
#endif
4825
0
    if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_ACCESSED) )
4826
0
        return "accessed bit not propagated";
4827
0
    if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE)))
4828
0
         && ((sflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY)) )
4829
0
        return "dirty bit not propagated";
4830
0
    if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) )
4831
0
        return "user/supervisor bit does not match";
4832
0
    if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) )
4833
0
        return "NX bit does not match";
4834
0
    if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) )
4835
0
        return "shadow grants write access but guest does not";
4836
0
    return NULL;
4837
0
}
4838
0
4839
0
int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
4840
0
{
4841
0
    guest_l1e_t *gl1e, *gp;
4842
0
    shadow_l1e_t *sl1e;
4843
0
    mfn_t mfn, gmfn, gl1mfn;
4844
0
    gfn_t gfn;
4845
0
    p2m_type_t p2mt;
4846
0
    char *s;
4847
0
    int done = 0;
4848
0
4849
0
    /* Follow the backpointer */
4850
0
    ASSERT(mfn_to_page(sl1mfn)->u.sh.head);
4851
0
    gl1mfn = backpointer(mfn_to_page(sl1mfn));
4852
0
4853
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
4854
0
    /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
4855
0
    if ( page_is_out_of_sync(mfn_to_page(gl1mfn)) )
4856
0
    {
4857
0
        oos_audit_hash_is_present(v->domain, gl1mfn);
4858
0
        return 0;
4859
0
    }
4860
0
#endif
4861
0
4862
0
    gl1e = gp = map_domain_page(gl1mfn);
4863
0
    SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
4864
0
4865
0
        if ( sh_l1e_is_magic(*sl1e) )
4866
0
        {
4867
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
4868
0
            if ( sh_l1e_is_gnp(*sl1e) )
4869
0
            {
4870
0
                if ( guest_l1e_get_flags(*gl1e) & _PAGE_PRESENT )
4871
0
                    AUDIT_FAIL(1, "shadow is GNP magic but guest is present");
4872
0
            }
4873
0
            else
4874
0
            {
4875
0
                ASSERT(sh_l1e_is_mmio(*sl1e));
4876
0
                gfn = sh_l1e_mmio_get_gfn(*sl1e);
4877
0
                if ( gfn_x(gfn) != gfn_x(guest_l1e_get_gfn(*gl1e)) )
4878
0
                    AUDIT_FAIL(1, "shadow MMIO gfn is %" SH_PRI_gfn
4879
0
                               " but guest gfn is %" SH_PRI_gfn,
4880
0
                               gfn_x(gfn),
4881
0
                               gfn_x(guest_l1e_get_gfn(*gl1e)));
4882
0
            }
4883
0
#endif
4884
0
        }
4885
0
        else
4886
0
        {
4887
0
            s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
4888
0
                               shadow_l1e_get_flags(*sl1e));
4889
0
            if ( s ) AUDIT_FAIL(1, "%s", s);
4890
0
4891
0
            if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4892
0
            {
4893
0
                gfn = guest_l1e_get_gfn(*gl1e);
4894
0
                mfn = shadow_l1e_get_mfn(*sl1e);
4895
0
                gmfn = get_gfn_query_unlocked(v->domain, gfn_x(gfn), &p2mt);
4896
0
                if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) )
4897
0
                    AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
4898
0
                               " --> %" PRI_mfn " != mfn %" PRI_mfn,
4899
0
                               gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4900
0
            }
4901
0
        }
4902
0
    });
4903
0
    unmap_domain_page(gp);
4904
0
    return done;
4905
0
}
Unexecuted instantiation: sh_audit_l1_table__guest_2
Unexecuted instantiation: sh_audit_l1_table__guest_4
Unexecuted instantiation: sh_audit_l1_table__guest_3
4906
0
4907
0
int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
4908
0
{
4909
0
    guest_l1e_t *gl1e, e;
4910
0
    shadow_l1e_t *sl1e;
4911
0
    mfn_t gl1mfn = INVALID_MFN;
4912
0
    int f;
4913
0
    int done = 0;
4914
0
4915
0
    /* fl1 has no useful backpointer: all we can check are flags */
4916
0
    e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */
4917
0
    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, {
4918
0
        f = shadow_l1e_get_flags(*sl1e);
4919
0
        f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2);
4920
0
        if ( !(f == 0
4921
0
               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
4922
0
                        _PAGE_ACCESSED)
4923
0
               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED)
4924
0
               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
4925
0
                        _PAGE_ACCESSED|_PAGE_DIRTY)
4926
0
               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
4927
0
               || sh_l1e_is_magic(*sl1e)) )
4928
0
            AUDIT_FAIL(1, "fl1e has bad flags");
4929
0
    });
4930
0
    return 0;
4931
0
}
Unexecuted instantiation: sh_audit_fl1_table__guest_2
Unexecuted instantiation: sh_audit_fl1_table__guest_3
Unexecuted instantiation: sh_audit_fl1_table__guest_4
4932
0
4933
0
int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
4934
0
{
4935
0
    struct domain *d = v->domain;
4936
0
    guest_l2e_t *gl2e, *gp;
4937
0
    shadow_l2e_t *sl2e;
4938
0
    mfn_t mfn, gmfn, gl2mfn;
4939
0
    gfn_t gfn;
4940
0
    p2m_type_t p2mt;
4941
0
    char *s;
4942
0
    int done = 0;
4943
0
4944
0
    /* Follow the backpointer */
4945
0
    ASSERT(mfn_to_page(sl2mfn)->u.sh.head);
4946
0
    gl2mfn = backpointer(mfn_to_page(sl2mfn));
4947
0
4948
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
4949
0
    /* Only L1's may be out of sync. */
4950
0
    if ( page_is_out_of_sync(mfn_to_page(gl2mfn)) )
4951
0
        AUDIT_FAIL_MIN(2, "gmfn %lx is out of sync", mfn_x(gl2mfn));
4952
0
#endif
4953
0
4954
0
    gl2e = gp = map_domain_page(gl2mfn);
4955
0
    SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, d, {
4956
0
4957
0
        s = sh_audit_flags(v, 2, guest_l2e_get_flags(*gl2e),
4958
0
                            shadow_l2e_get_flags(*sl2e));
4959
0
        if ( s ) AUDIT_FAIL(2, "%s", s);
4960
0
4961
0
        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4962
0
        {
4963
0
            gfn = guest_l2e_get_gfn(*gl2e);
4964
0
            mfn = shadow_l2e_get_mfn(*sl2e);
4965
0
            gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)
4966
0
                ? get_fl1_shadow_status(d, gfn)
4967
0
                : get_shadow_status(d,
4968
0
                    get_gfn_query_unlocked(d, gfn_x(gfn),
4969
0
                                        &p2mt), SH_type_l1_shadow);
4970
0
            if ( mfn_x(gmfn) != mfn_x(mfn) )
4971
0
                AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
4972
0
                           " (--> %" PRI_mfn ")"
4973
0
                           " --> %" PRI_mfn " != mfn %" PRI_mfn,
4974
0
                           gfn_x(gfn),
4975
0
                           (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
4976
0
                           : mfn_x(get_gfn_query_unlocked(d,
4977
0
                                   gfn_x(gfn), &p2mt)), mfn_x(gmfn), mfn_x(mfn));
4978
0
        }
4979
0
    });
4980
0
    unmap_domain_page(gp);
4981
0
    return 0;
4982
0
}
Unexecuted instantiation: sh_audit_l2_table__guest_3
Unexecuted instantiation: sh_audit_l2_table__guest_2
Unexecuted instantiation: sh_audit_l2_table__guest_4
4983
0
4984
0
#if GUEST_PAGING_LEVELS >= 4
4985
0
int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
4986
0
{
4987
0
    struct domain *d = v->domain;
4988
0
    guest_l3e_t *gl3e, *gp;
4989
0
    shadow_l3e_t *sl3e;
4990
0
    mfn_t mfn, gmfn, gl3mfn;
4991
0
    gfn_t gfn;
4992
0
    p2m_type_t p2mt;
4993
0
    char *s;
4994
0
    int done = 0;
4995
0
4996
0
    /* Follow the backpointer */
4997
0
    ASSERT(mfn_to_page(sl3mfn)->u.sh.head);
4998
0
    gl3mfn = backpointer(mfn_to_page(sl3mfn));
4999
0
5000
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
5001
0
    /* Only L1's may be out of sync. */
5002
0
    if ( page_is_out_of_sync(mfn_to_page(gl3mfn)) )
5003
0
        AUDIT_FAIL_MIN(3, "gmfn %lx is out of sync", mfn_x(gl3mfn));
5004
0
#endif
5005
0
5006
0
    gl3e = gp = map_domain_page(gl3mfn);
5007
0
    SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
5008
0
5009
0
        s = sh_audit_flags(v, 3, guest_l3e_get_flags(*gl3e),
5010
0
                            shadow_l3e_get_flags(*sl3e));
5011
0
        if ( s ) AUDIT_FAIL(3, "%s", s);
5012
0
5013
0
        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
5014
0
        {
5015
0
            gfn = guest_l3e_get_gfn(*gl3e);
5016
0
            mfn = shadow_l3e_get_mfn(*sl3e);
5017
0
            gmfn = get_shadow_status(d, get_gfn_query_unlocked(
5018
0
                                        d, gfn_x(gfn), &p2mt),
5019
0
                                     ((GUEST_PAGING_LEVELS == 3 ||
5020
0
                                       is_pv_32bit_domain(d))
5021
0
                                      && !shadow_mode_external(d)
5022
0
                                      && (guest_index(gl3e) % 4) == 3)
5023
0
                                     ? SH_type_l2h_shadow
5024
0
                                     : SH_type_l2_shadow);
5025
0
            if ( mfn_x(gmfn) != mfn_x(mfn) )
5026
0
                AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn
5027
0
                           " --> %" PRI_mfn " != mfn %" PRI_mfn,
5028
0
                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
5029
0
        }
5030
0
    });
5031
0
    unmap_domain_page(gp);
5032
0
    return 0;
5033
0
}
5034
0
5035
0
int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
5036
0
{
5037
0
    struct domain *d = v->domain;
5038
0
    guest_l4e_t *gl4e, *gp;
5039
0
    shadow_l4e_t *sl4e;
5040
0
    mfn_t mfn, gmfn, gl4mfn;
5041
0
    gfn_t gfn;
5042
0
    p2m_type_t p2mt;
5043
0
    char *s;
5044
0
    int done = 0;
5045
0
5046
0
    /* Follow the backpointer */
5047
0
    ASSERT(mfn_to_page(sl4mfn)->u.sh.head);
5048
0
    gl4mfn = backpointer(mfn_to_page(sl4mfn));
5049
0
5050
0
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
5051
0
    /* Only L1's may be out of sync. */
5052
0
    if ( page_is_out_of_sync(mfn_to_page(gl4mfn)) )
5053
0
        AUDIT_FAIL_MIN(4, "gmfn %lx is out of sync", mfn_x(gl4mfn));
5054
0
#endif
5055
0
5056
0
    gl4e = gp = map_domain_page(gl4mfn);
5057
0
    SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, d,
5058
0
    {
5059
0
        s = sh_audit_flags(v, 4, guest_l4e_get_flags(*gl4e),
5060
0
                            shadow_l4e_get_flags(*sl4e));
5061
0
        if ( s ) AUDIT_FAIL(4, "%s", s);
5062
0
5063
0
        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
5064
0
        {
5065
0
            gfn = guest_l4e_get_gfn(*gl4e);
5066
0
            mfn = shadow_l4e_get_mfn(*sl4e);
5067
0
            gmfn = get_shadow_status(d, get_gfn_query_unlocked(
5068
0
                                     d, gfn_x(gfn), &p2mt),
5069
0
                                     SH_type_l3_shadow);
5070
0
            if ( mfn_x(gmfn) != mfn_x(mfn) )
5071
0
                AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
5072
0
                           " --> %" PRI_mfn " != mfn %" PRI_mfn,
5073
0
                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
5074
0
        }
5075
0
    });
5076
0
    unmap_domain_page(gp);
5077
0
    return 0;
5078
0
}
5079
0
#endif /* GUEST_PAGING_LEVELS >= 4 */
5080
0
5081
0
5082
0
#undef AUDIT_FAIL
5083
0
5084
0
#endif /* Audit code */
5085
0
5086
0
/**************************************************************************/
5087
0
/* Entry points into this mode of the shadow code.
5088
0
 * This will all be mangled by the preprocessor to uniquify everything. */
5089
0
const struct paging_mode sh_paging_mode = {
5090
0
    .page_fault                    = sh_page_fault,
5091
0
    .invlpg                        = sh_invlpg,
5092
0
    .gva_to_gfn                    = sh_gva_to_gfn,
5093
0
    .update_cr3                    = sh_update_cr3,
5094
0
    .update_paging_modes           = shadow_update_paging_modes,
5095
0
    .write_p2m_entry               = shadow_write_p2m_entry,
5096
0
    .guest_levels                  = GUEST_PAGING_LEVELS,
5097
0
    .shadow.detach_old_tables      = sh_detach_old_tables,
5098
0
    .shadow.x86_emulate_write      = sh_x86_emulate_write,
5099
0
    .shadow.x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
5100
0
    .shadow.write_guest_entry      = sh_write_guest_entry,
5101
0
    .shadow.cmpxchg_guest_entry    = sh_cmpxchg_guest_entry,
5102
0
    .shadow.make_monitor_table     = sh_make_monitor_table,
5103
0
    .shadow.destroy_monitor_table  = sh_destroy_monitor_table,
5104
0
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
5105
0
    .shadow.guess_wrmap            = sh_guess_wrmap,
5106
0
#endif
5107
0
    .shadow.pagetable_dying        = sh_pagetable_dying,
5108
0
    .shadow.shadow_levels          = SHADOW_PAGING_LEVELS,
5109
0
};
5110
0
5111
0
/*
5112
0
 * Local variables:
5113
0
 * mode: C
5114
0
 * c-file-style: "BSD"
5115
0
 * c-basic-offset: 4
5116
0
 * indent-tabs-mode: nil
5117
0
 * End:
5118
0
 */