Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/p2m.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/mm/p2m.c
3
 *
4
 * physical-to-machine mappings for automatically-translated domains.
5
 *
6
 * Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
7
 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
8
 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9
 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10
 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
24
 */
25
26
#include <xen/guest_access.h> /* copy_from_guest() */
27
#include <xen/iommu.h>
28
#include <xen/vm_event.h>
29
#include <xen/event.h>
30
#include <public/vm_event.h>
31
#include <asm/domain.h>
32
#include <asm/page.h>
33
#include <asm/paging.h>
34
#include <asm/p2m.h>
35
#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
36
#include <asm/mem_sharing.h>
37
#include <asm/hvm/nestedhvm.h>
38
#include <asm/altp2m.h>
39
#include <asm/hvm/svm/amd-iommu-proto.h>
40
#include <asm/vm_event.h>
41
#include <xsm/xsm.h>
42
43
#include "mm-locks.h"
44
45
/* Turn on/off host superpage page table support for hap, default on. */
46
bool_t __initdata opt_hap_1gb = 1, __initdata opt_hap_2mb = 1;
47
boolean_param("hap_1gb", opt_hap_1gb);
48
boolean_param("hap_2mb", opt_hap_2mb);
49
50
/* Override macros from asm/page.h to make them work with mfn_t */
51
#undef mfn_to_page
52
1.96M
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53
#undef page_to_mfn
54
868k
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
55
56
DEFINE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
57
58
/* Init the datastructures for later use by the p2m code */
59
static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
60
21
{
61
21
    unsigned int i;
62
21
    int ret = 0;
63
21
64
21
    mm_rwlock_init(&p2m->lock);
65
21
    mm_lock_init(&p2m->pod.lock);
66
21
    INIT_LIST_HEAD(&p2m->np2m_list);
67
21
    INIT_PAGE_LIST_HEAD(&p2m->pages);
68
21
    INIT_PAGE_LIST_HEAD(&p2m->pod.super);
69
21
    INIT_PAGE_LIST_HEAD(&p2m->pod.single);
70
21
71
21
    p2m->domain = d;
72
21
    p2m->default_access = p2m_access_rwx;
73
21
    p2m->p2m_class = p2m_host;
74
21
75
21
    p2m->np2m_base = P2M_BASE_EADDR;
76
21
    p2m->np2m_generation = 0;
77
21
78
693
    for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
79
672
        p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
80
21
81
21
    if ( hap_enabled(d) && cpu_has_vmx )
82
21
        ret = ept_p2m_init(p2m);
83
21
    else
84
0
        p2m_pt_init(p2m);
85
21
86
21
    spin_lock_init(&p2m->ioreq.lock);
87
21
88
21
    return ret;
89
21
}
90
91
static struct p2m_domain *p2m_init_one(struct domain *d)
92
21
{
93
21
    struct p2m_domain *p2m = xzalloc(struct p2m_domain);
94
21
95
21
    if ( !p2m )
96
0
        return NULL;
97
21
98
21
    if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) )
99
0
        goto free_p2m;
100
21
101
21
    if ( p2m_initialise(d, p2m) )
102
0
        goto free_cpumask;
103
21
    return p2m;
104
21
105
0
free_cpumask:
106
0
    free_cpumask_var(p2m->dirty_cpumask);
107
0
free_p2m:
108
0
    xfree(p2m);
109
0
    return NULL;
110
0
}
111
112
static void p2m_free_one(struct p2m_domain *p2m)
113
0
{
114
0
    if ( hap_enabled(p2m->domain) && cpu_has_vmx )
115
0
        ept_p2m_uninit(p2m);
116
0
    free_cpumask_var(p2m->dirty_cpumask);
117
0
    xfree(p2m);
118
0
}
119
120
static int p2m_init_hostp2m(struct domain *d)
121
1
{
122
1
    struct p2m_domain *p2m = p2m_init_one(d);
123
1
124
1
    if ( p2m )
125
1
    {
126
1
        p2m->logdirty_ranges = rangeset_new(d, "log-dirty",
127
1
                                            RANGESETF_prettyprint_hex);
128
1
        if ( p2m->logdirty_ranges )
129
1
        {
130
1
            d->arch.p2m = p2m;
131
1
            return 0;
132
1
        }
133
0
        p2m_free_one(p2m);
134
0
    }
135
0
    return -ENOMEM;
136
1
}
137
138
static void p2m_teardown_hostp2m(struct domain *d)
139
0
{
140
0
    /* Iterate over all p2m tables per domain */
141
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
142
0
143
0
    if ( p2m )
144
0
    {
145
0
        rangeset_destroy(p2m->logdirty_ranges);
146
0
        p2m_free_one(p2m);
147
0
        d->arch.p2m = NULL;
148
0
    }
149
0
}
150
151
static void p2m_teardown_nestedp2m(struct domain *d)
152
0
{
153
0
    unsigned int i;
154
0
    struct p2m_domain *p2m;
155
0
156
0
    for ( i = 0; i < MAX_NESTEDP2M; i++ )
157
0
    {
158
0
        if ( !d->arch.nested_p2m[i] )
159
0
            continue;
160
0
        p2m = d->arch.nested_p2m[i];
161
0
        list_del(&p2m->np2m_list);
162
0
        p2m_free_one(p2m);
163
0
        d->arch.nested_p2m[i] = NULL;
164
0
    }
165
0
}
166
167
static int p2m_init_nestedp2m(struct domain *d)
168
1
{
169
1
    unsigned int i;
170
1
    struct p2m_domain *p2m;
171
1
172
1
    mm_lock_init(&d->arch.nested_p2m_lock);
173
11
    for ( i = 0; i < MAX_NESTEDP2M; i++ )
174
10
    {
175
10
        d->arch.nested_p2m[i] = p2m = p2m_init_one(d);
176
10
        if ( p2m == NULL )
177
0
        {
178
0
            p2m_teardown_nestedp2m(d);
179
0
            return -ENOMEM;
180
0
        }
181
10
        p2m->p2m_class = p2m_nested;
182
10
        p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
183
10
        list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
184
10
    }
185
1
186
1
    return 0;
187
1
}
188
189
static void p2m_teardown_altp2m(struct domain *d)
190
0
{
191
0
    unsigned int i;
192
0
    struct p2m_domain *p2m;
193
0
194
0
    for ( i = 0; i < MAX_ALTP2M; i++ )
195
0
    {
196
0
        if ( !d->arch.altp2m_p2m[i] )
197
0
            continue;
198
0
        p2m = d->arch.altp2m_p2m[i];
199
0
        d->arch.altp2m_p2m[i] = NULL;
200
0
        p2m_free_one(p2m);
201
0
    }
202
0
}
203
204
static int p2m_init_altp2m(struct domain *d)
205
1
{
206
1
    unsigned int i;
207
1
    struct p2m_domain *p2m;
208
1
209
1
    mm_lock_init(&d->arch.altp2m_list_lock);
210
11
    for ( i = 0; i < MAX_ALTP2M; i++ )
211
10
    {
212
10
        d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d);
213
10
        if ( p2m == NULL )
214
0
        {
215
0
            p2m_teardown_altp2m(d);
216
0
            return -ENOMEM;
217
0
        }
218
10
        p2m->p2m_class = p2m_alternate;
219
10
        p2m->access_required = 1;
220
10
        _atomic_set(&p2m->active_vcpus, 0);
221
10
    }
222
1
223
1
    return 0;
224
1
}
225
226
int p2m_init(struct domain *d)
227
1
{
228
1
    int rc;
229
1
230
1
    rc = p2m_init_hostp2m(d);
231
1
    if ( rc )
232
0
        return rc;
233
1
234
1
    /* Must initialise nestedp2m unconditionally
235
1
     * since nestedhvm_enabled(d) returns false here.
236
1
     * (p2m_init runs too early for HVM_PARAM_* options) */
237
1
    rc = p2m_init_nestedp2m(d);
238
1
    if ( rc )
239
0
    {
240
0
        p2m_teardown_hostp2m(d);
241
0
        return rc;
242
0
    }
243
1
244
1
    rc = p2m_init_altp2m(d);
245
1
    if ( rc )
246
0
    {
247
0
        p2m_teardown_hostp2m(d);
248
0
        p2m_teardown_nestedp2m(d);
249
0
    }
250
1
251
1
    return rc;
252
1
}
253
254
int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start,
255
                          unsigned long end)
256
0
{
257
0
    ASSERT(p2m_is_hostp2m(p2m));
258
0
    if ( p2m->global_logdirty ||
259
0
         rangeset_contains_range(p2m->logdirty_ranges, start, end) )
260
0
        return 1;
261
0
    if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) )
262
0
        return -1;
263
0
    return 0;
264
0
}
265
266
void p2m_change_entry_type_global(struct domain *d,
267
                                  p2m_type_t ot, p2m_type_t nt)
268
0
{
269
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
270
0
271
0
    ASSERT(ot != nt);
272
0
    ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
273
0
274
0
    p2m_lock(p2m);
275
0
    p2m->change_entry_type_global(p2m, ot, nt);
276
0
    p2m->global_logdirty = (nt == p2m_ram_logdirty);
277
0
    p2m_unlock(p2m);
278
0
}
279
280
void p2m_memory_type_changed(struct domain *d)
281
23
{
282
23
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
283
23
284
23
    if ( p2m->memory_type_changed )
285
23
    {
286
23
        p2m_lock(p2m);
287
23
        p2m->memory_type_changed(p2m);
288
23
        p2m_unlock(p2m);
289
23
    }
290
23
}
291
292
int p2m_set_ioreq_server(struct domain *d,
293
                         unsigned int flags,
294
                         struct hvm_ioreq_server *s)
295
0
{
296
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
297
0
    int rc;
298
0
299
0
    /*
300
0
     * Use lock to prevent concurrent setting attempts
301
0
     * from multiple ioreq servers.
302
0
     */
303
0
    spin_lock(&p2m->ioreq.lock);
304
0
305
0
    /* Unmap ioreq server from p2m type by passing flags with 0. */
306
0
    if ( flags == 0 )
307
0
    {
308
0
        rc = -EINVAL;
309
0
        if ( p2m->ioreq.server != s )
310
0
            goto out;
311
0
312
0
        p2m->ioreq.server = NULL;
313
0
        p2m->ioreq.flags = 0;
314
0
    }
315
0
    else
316
0
    {
317
0
        rc = -EBUSY;
318
0
        if ( p2m->ioreq.server != NULL )
319
0
            goto out;
320
0
321
0
        /*
322
0
         * It is possible that an ioreq server has just been unmapped,
323
0
         * released the spin lock, with some p2m_ioreq_server entries
324
0
         * in p2m table remained. We shall refuse another ioreq server
325
0
         * mapping request in such case.
326
0
         */
327
0
        if ( read_atomic(&p2m->ioreq.entry_count) )
328
0
            goto out;
329
0
330
0
        p2m->ioreq.server = s;
331
0
        p2m->ioreq.flags = flags;
332
0
    }
333
0
334
0
    rc = 0;
335
0
336
0
 out:
337
0
    spin_unlock(&p2m->ioreq.lock);
338
0
339
0
    return rc;
340
0
}
341
342
struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
343
                                              unsigned int *flags)
344
0
{
345
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
346
0
    struct hvm_ioreq_server *s;
347
0
348
0
    spin_lock(&p2m->ioreq.lock);
349
0
350
0
    s = p2m->ioreq.server;
351
0
    *flags = p2m->ioreq.flags;
352
0
353
0
    spin_unlock(&p2m->ioreq.lock);
354
0
    return s;
355
0
}
356
357
void p2m_enable_hardware_log_dirty(struct domain *d)
358
0
{
359
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
360
0
361
0
    if ( p2m->enable_hardware_log_dirty )
362
0
    {
363
0
        p2m_lock(p2m);
364
0
        p2m->enable_hardware_log_dirty(p2m);
365
0
        p2m_unlock(p2m);
366
0
    }
367
0
}
368
369
void p2m_disable_hardware_log_dirty(struct domain *d)
370
0
{
371
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
372
0
373
0
    if ( p2m->disable_hardware_log_dirty )
374
0
    {
375
0
        p2m_lock(p2m);
376
0
        p2m->disable_hardware_log_dirty(p2m);
377
0
        p2m_unlock(p2m);
378
0
    }
379
0
}
380
381
void p2m_flush_hardware_cached_dirty(struct domain *d)
382
0
{
383
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
384
0
385
0
    if ( p2m->flush_hardware_cached_dirty )
386
0
    {
387
0
        p2m_lock(p2m);
388
0
        p2m->flush_hardware_cached_dirty(p2m);
389
0
        p2m_unlock(p2m);
390
0
    }
391
0
}
392
393
/*
394
 * Force a synchronous P2M TLB flush if a deferred flush is pending.
395
 *
396
 * Must be called with the p2m lock held.
397
 */
398
void p2m_tlb_flush_sync(struct p2m_domain *p2m)
399
0
{
400
0
    if ( p2m->need_flush ) {
401
0
        p2m->need_flush = 0;
402
0
        p2m->tlb_flush(p2m);
403
0
    }
404
0
}
405
406
/*
407
 * Unlock the p2m lock and do a P2M TLB flush if needed.
408
 */
409
void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m)
410
510k
{
411
510k
    if ( p2m->need_flush ) {
412
218k
        p2m->need_flush = 0;
413
218k
        mm_write_unlock(&p2m->lock);
414
218k
        p2m->tlb_flush(p2m);
415
218k
    } else
416
292k
        mm_write_unlock(&p2m->lock);
417
510k
}
418
419
mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
420
                    p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
421
                    unsigned int *page_order, bool_t locked)
422
1.96M
{
423
1.96M
    mfn_t mfn;
424
1.96M
    gfn_t gfn = _gfn(gfn_l);
425
1.96M
426
1.96M
    /* Unshare makes no sense withuot populate. */
427
1.96M
    if ( q & P2M_UNSHARE )
428
55
        q |= P2M_ALLOC;
429
1.96M
430
1.96M
    if ( !p2m || !paging_mode_translate(p2m->domain) )
431
0
    {
432
0
        /* Not necessarily true, but for non-translated guests, we claim
433
0
         * it's the most generic kind of memory */
434
0
        *t = p2m_ram_rw;
435
0
        return _mfn(gfn_l);
436
0
    }
437
1.96M
438
1.96M
    if ( locked )
439
1.96M
        /* Grab the lock here, don't release until put_gfn */
440
1.96M
        gfn_lock(p2m, gfn, 0);
441
1.96M
442
1.96M
    mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
443
1.96M
444
1.96M
    if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
445
0
    {
446
0
        ASSERT(p2m_is_hostp2m(p2m));
447
0
        /* Try to unshare. If we fail, communicate ENOMEM without
448
0
         * sleeping. */
449
0
        if ( mem_sharing_unshare_page(p2m->domain, gfn_l, 0) < 0 )
450
0
            (void)mem_sharing_notify_enomem(p2m->domain, gfn_l, 0);
451
0
        mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
452
0
    }
453
1.96M
454
1.96M
    if (unlikely((p2m_is_broken(*t))))
455
0
    {
456
0
        /* Return invalid_mfn to avoid caller's access */
457
0
        mfn = INVALID_MFN;
458
0
        if ( q & P2M_ALLOC )
459
0
            domain_crash(p2m->domain);
460
0
    }
461
1.96M
462
1.96M
    return mfn;
463
1.96M
}
464
465
void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
466
168
{
467
168
    if ( !p2m || !paging_mode_translate(p2m->domain) )
468
168
        /* Nothing to do in this case */
469
0
        return;
470
168
471
168
    ASSERT(gfn_locked_by_me(p2m, gfn));
472
168
473
168
    gfn_unlock(p2m, gfn, 0);
474
168
}
475
476
/* Atomically look up a GFN and take a reference count on the backing page. */
477
struct page_info *p2m_get_page_from_gfn(
478
    struct p2m_domain *p2m, gfn_t gfn,
479
    p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
480
1.95M
{
481
1.95M
    struct page_info *page = NULL;
482
1.95M
    p2m_access_t _a;
483
1.95M
    p2m_type_t _t;
484
1.95M
    mfn_t mfn;
485
1.95M
486
1.95M
    /* Allow t or a to be NULL */
487
18.4E
    t = t ?: &_t;
488
1.95M
    a = a ?: &_a;
489
1.95M
490
1.95M
    if ( likely(!p2m_locked_by_me(p2m)) )
491
1.96M
    {
492
1.96M
        /* Fast path: look up and get out */
493
1.96M
        p2m_read_lock(p2m);
494
1.96M
        mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0);
495
1.96M
        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
496
1.96M
             && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
497
1.96M
        {
498
1.96M
            page = mfn_to_page(mfn);
499
1.96M
            if ( unlikely(p2m_is_foreign(*t)) )
500
0
            {
501
0
                struct domain *fdom = page_get_owner_and_reference(page);
502
0
503
0
                ASSERT(fdom != p2m->domain);
504
0
                if ( fdom == NULL )
505
0
                    page = NULL;
506
0
            }
507
1.96M
            else if ( !get_page(page, p2m->domain) &&
508
1.96M
                      /* Page could be shared */
509
0
                      (!p2m_is_shared(*t) || !get_page(page, dom_cow)) )
510
0
                page = NULL;
511
1.96M
        }
512
1.96M
        p2m_read_unlock(p2m);
513
1.96M
514
1.96M
        if ( page )
515
1.97M
            return page;
516
1.96M
517
1.96M
        /* Error path: not a suitable GFN at all */
518
18.4E
        if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
519
0
            return NULL;
520
18.4E
    }
521
1.95M
522
1.95M
    /* Slow path: take the write lock and do fixups */
523
18.4E
    mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
524
18.4E
    if ( p2m_is_ram(*t) && mfn_valid(mfn) )
525
55
    {
526
55
        page = mfn_to_page(mfn);
527
55
        if ( !get_page(page, p2m->domain) )
528
0
            page = NULL;
529
55
    }
530
18.4E
    put_gfn(p2m->domain, gfn_x(gfn));
531
18.4E
532
18.4E
    return page;
533
1.95M
}
534
535
/* Returns: 0 for success, -errno for failure */
536
int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
537
                  unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
538
507k
{
539
507k
    struct domain *d = p2m->domain;
540
507k
    unsigned long todo = 1ul << page_order;
541
507k
    unsigned int order;
542
507k
    int set_rc, rc = 0;
543
507k
544
507k
    ASSERT(gfn_locked_by_me(p2m, gfn));
545
507k
546
1.57M
    while ( todo )
547
1.06M
    {
548
1.06M
        if ( hap_enabled(d) )
549
1.06M
        {
550
1.06M
            unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0;
551
1.06M
552
1.06M
            fn_mask |= gfn_x(gfn) | todo;
553
1.06M
554
1.06M
            order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
555
12
                     hap_has_1gb) ? PAGE_ORDER_1G :
556
1.06M
                    (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
557
1.06M
                     hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
558
1.06M
        }
559
1.06M
        else
560
0
            order = 0;
561
1.06M
562
1.06M
        set_rc = p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, -1);
563
1.06M
        if ( set_rc )
564
0
            rc = set_rc;
565
1.06M
566
1.06M
        gfn = gfn_add(gfn, 1ul << order);
567
1.06M
        if ( !mfn_eq(mfn, INVALID_MFN) )
568
850k
            mfn = mfn_add(mfn, 1ul << order);
569
1.06M
        todo -= 1ul << order;
570
1.06M
    }
571
507k
572
507k
    return rc;
573
507k
}
574
575
mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level)
576
1.28k
{
577
1.28k
    struct page_info *pg;
578
1.28k
579
1.28k
    ASSERT(p2m);
580
1.28k
    ASSERT(p2m->domain);
581
1.28k
    ASSERT(p2m->domain->arch.paging.alloc_page);
582
1.28k
    pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
583
1.28k
    if ( !pg )
584
0
        return INVALID_MFN;
585
1.28k
586
1.28k
    page_list_add_tail(pg, &p2m->pages);
587
1.28k
    BUILD_BUG_ON(PGT_l1_page_table * 2 != PGT_l2_page_table);
588
1.28k
    BUILD_BUG_ON(PGT_l1_page_table * 3 != PGT_l3_page_table);
589
1.28k
    BUILD_BUG_ON(PGT_l1_page_table * 4 != PGT_l4_page_table);
590
1.28k
    pg->u.inuse.type_info = (PGT_l1_page_table * level) | 1 | PGT_validated;
591
1.28k
592
1.28k
    return page_to_mfn(pg);
593
1.28k
}
594
595
void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
596
0
{
597
0
    ASSERT(pg);
598
0
    ASSERT(p2m);
599
0
    ASSERT(p2m->domain);
600
0
    ASSERT(p2m->domain->arch.paging.free_page);
601
0
602
0
    page_list_del(pg, &p2m->pages);
603
0
    p2m->domain->arch.paging.free_page(p2m->domain, pg);
604
0
605
0
    return;
606
0
}
607
608
/*
609
 * Allocate a new p2m table for a domain.
610
 *
611
 * The structure of the p2m table is that of a pagetable for xen (i.e. it is
612
 * controlled by CONFIG_PAGING_LEVELS).
613
 *
614
 * Returns 0 for success, -errno for failure.
615
 */
616
int p2m_alloc_table(struct p2m_domain *p2m)
617
11
{
618
11
    mfn_t top_mfn;
619
11
    struct domain *d = p2m->domain;
620
11
    int rc = 0;
621
11
622
11
    p2m_lock(p2m);
623
11
624
11
    if ( p2m_is_hostp2m(p2m)
625
1
         && !page_list_empty(&d->page_list) )
626
0
    {
627
0
        P2M_ERROR("dom %d already has memory allocated\n", d->domain_id);
628
0
        p2m_unlock(p2m);
629
0
        return -EINVAL;
630
0
    }
631
11
632
11
    if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
633
0
    {
634
0
        P2M_ERROR("p2m already allocated for this domain\n");
635
0
        p2m_unlock(p2m);
636
0
        return -EINVAL;
637
0
    }
638
11
639
11
    P2M_PRINTK("allocating p2m table\n");
640
11
641
11
    top_mfn = p2m_alloc_ptp(p2m, 4);
642
11
    if ( mfn_eq(top_mfn, INVALID_MFN) )
643
0
    {
644
0
        p2m_unlock(p2m);
645
0
        return -ENOMEM;
646
0
    }
647
11
648
11
    p2m->phys_table = pagetable_from_mfn(top_mfn);
649
11
650
11
    if ( hap_enabled(d) )
651
11
        iommu_share_p2m_table(d);
652
11
653
11
    P2M_PRINTK("populating p2m table\n");
654
11
655
11
    /* Initialise physmap tables for slot zero. Other code assumes this. */
656
11
    p2m->defer_nested_flush = 1;
657
11
    rc = p2m_set_entry(p2m, _gfn(0), INVALID_MFN, PAGE_ORDER_4K,
658
11
                       p2m_invalid, p2m->default_access);
659
11
    p2m->defer_nested_flush = 0;
660
11
    p2m_unlock(p2m);
661
11
    if ( !rc )
662
11
        P2M_PRINTK("p2m table initialised for slot zero\n");
663
11
    else
664
0
        P2M_PRINTK("failed to initialise p2m table for slot zero (%d)\n", rc);
665
11
    return rc;
666
11
}
667
668
/*
669
 * hvm fixme: when adding support for pvh non-hardware domains, this path must
670
 * cleanup any foreign p2m types (release refcnts on them).
671
 */
672
void p2m_teardown(struct p2m_domain *p2m)
673
/* Return all the p2m pages to Xen.
674
 * We know we don't have any extra mappings to these pages */
675
0
{
676
0
    struct page_info *pg;
677
0
    struct domain *d;
678
0
679
0
    if (p2m == NULL)
680
0
        return;
681
0
682
0
    d = p2m->domain;
683
0
684
0
    p2m_lock(p2m);
685
0
    ASSERT(atomic_read(&d->shr_pages) == 0);
686
0
    p2m->phys_table = pagetable_null();
687
0
688
0
    while ( (pg = page_list_remove_head(&p2m->pages)) )
689
0
        d->arch.paging.free_page(d, pg);
690
0
    p2m_unlock(p2m);
691
0
}
692
693
void p2m_final_teardown(struct domain *d)
694
0
{
695
0
    /*
696
0
     * We must teardown both of them unconditionally because
697
0
     * we initialise them unconditionally.
698
0
     */
699
0
    p2m_teardown_altp2m(d);
700
0
    p2m_teardown_nestedp2m(d);
701
0
702
0
    /* Iterate over all p2m tables per domain */
703
0
    p2m_teardown_hostp2m(d);
704
0
}
705
706
707
static int
708
p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn_l, unsigned long mfn,
709
                unsigned int page_order)
710
2
{
711
2
    unsigned long i;
712
2
    gfn_t gfn = _gfn(gfn_l);
713
2
    mfn_t mfn_return;
714
2
    p2m_type_t t;
715
2
    p2m_access_t a;
716
2
717
2
    if ( !paging_mode_translate(p2m->domain) )
718
0
    {
719
0
        int rc = 0;
720
0
721
0
        if ( need_iommu(p2m->domain) )
722
0
        {
723
0
            for ( i = 0; i < (1 << page_order); i++ )
724
0
            {
725
0
                int ret = iommu_unmap_page(p2m->domain, mfn + i);
726
0
727
0
                if ( !rc )
728
0
                    rc = ret;
729
0
            }
730
0
        }
731
0
732
0
        return rc;
733
0
    }
734
2
735
2
    ASSERT(gfn_locked_by_me(p2m, gfn));
736
2
    P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_l, mfn);
737
2
738
2
    if ( mfn_valid(_mfn(mfn)) )
739
2
    {
740
4
        for ( i = 0; i < (1UL << page_order); i++ )
741
2
        {
742
2
            mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
743
2
                                        NULL, NULL);
744
2
            if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
745
2
                set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
746
2
            ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
747
2
        }
748
2
    }
749
2
    return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
750
2
                         p2m->default_access);
751
2
}
752
753
int
754
guest_physmap_remove_page(struct domain *d, gfn_t gfn,
755
                          mfn_t mfn, unsigned int page_order)
756
2
{
757
2
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
758
2
    int rc;
759
2
    gfn_lock(p2m, gfn, page_order);
760
2
    rc = p2m_remove_page(p2m, gfn_x(gfn), mfn_x(mfn), page_order);
761
2
    gfn_unlock(p2m, gfn, page_order);
762
2
    return rc;
763
2
}
764
765
int
766
guest_physmap_add_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
767
                        unsigned int page_order, p2m_type_t t)
768
228
{
769
228
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
770
228
    unsigned long i;
771
228
    gfn_t ogfn;
772
228
    p2m_type_t ot;
773
228
    p2m_access_t a;
774
228
    mfn_t omfn;
775
228
    int pod_count = 0;
776
228
    int rc = 0;
777
228
778
228
    if ( !paging_mode_translate(d) )
779
0
    {
780
0
        if ( need_iommu(d) && t == p2m_ram_rw )
781
0
        {
782
0
            for ( i = 0; i < (1 << page_order); i++ )
783
0
            {
784
0
                rc = iommu_map_page(d, mfn_x(mfn_add(mfn, i)),
785
0
                                    mfn_x(mfn_add(mfn, i)),
786
0
                                    IOMMUF_readable|IOMMUF_writable);
787
0
                if ( rc != 0 )
788
0
                {
789
0
                    while ( i-- > 0 )
790
0
                        /* If statement to satisfy __must_check. */
791
0
                        if ( iommu_unmap_page(d, mfn_x(mfn_add(mfn, i))) )
792
0
                            continue;
793
0
794
0
                    return rc;
795
0
                }
796
0
            }
797
0
        }
798
0
        return 0;
799
0
    }
800
228
801
228
    /* foreign pages are added thru p2m_add_foreign */
802
228
    if ( p2m_is_foreign(t) )
803
0
        return -EINVAL;
804
228
805
228
    p2m_lock(p2m);
806
228
807
228
    P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
808
228
809
228
    /* First, remove m->p mappings for existing p->m mappings */
810
4.05M
    for ( i = 0; i < (1UL << page_order); i++ )
811
4.05M
    {
812
4.05M
        omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot,
813
4.05M
                              &a, 0, NULL, NULL);
814
4.05M
        if ( p2m_is_shared(ot) )
815
0
        {
816
0
            /* Do an unshare to cleanly take care of all corner 
817
0
             * cases. */
818
0
            int rc;
819
0
            rc = mem_sharing_unshare_page(p2m->domain,
820
0
                                          gfn_x(gfn_add(gfn, i)), 0);
821
0
            if ( rc )
822
0
            {
823
0
                p2m_unlock(p2m);
824
0
                /* NOTE: Should a guest domain bring this upon itself,
825
0
                 * there is not a whole lot we can do. We are buried
826
0
                 * deep in locks from most code paths by now. So, fail
827
0
                 * the call and don't try to sleep on a wait queue
828
0
                 * while placing the mem event.
829
0
                 *
830
0
                 * However, all current (changeset 3432abcf9380) code
831
0
                 * paths avoid this unsavoury situation. For now.
832
0
                 *
833
0
                 * Foreign domains are okay to place an event as they 
834
0
                 * won't go to sleep. */
835
0
                (void)mem_sharing_notify_enomem(p2m->domain,
836
0
                                                gfn_x(gfn_add(gfn, i)),
837
0
                                                0);
838
0
                return rc;
839
0
            }
840
0
            omfn = p2m->get_entry(p2m, gfn_add(gfn, i),
841
0
                                  &ot, &a, 0, NULL, NULL);
842
0
            ASSERT(!p2m_is_shared(ot));
843
0
        }
844
4.05M
        if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
845
0
        {
846
0
            /* Really shouldn't be unmapping grant/foreign maps this way */
847
0
            domain_crash(d);
848
0
            p2m_unlock(p2m);
849
0
            
850
0
            return -EINVAL;
851
0
        }
852
4.05M
        else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
853
0
        {
854
0
            ASSERT(mfn_valid(omfn));
855
0
            set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
856
0
        }
857
4.05M
        else if ( ot == p2m_populate_on_demand )
858
0
        {
859
0
            /* Count how man PoD entries we'll be replacing if successful */
860
0
            pod_count++;
861
0
        }
862
4.05M
        else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) )
863
0
        {
864
0
            /* We're plugging a hole in the physmap where a paged out page was */
865
0
            atomic_dec(&d->paged_pages);
866
0
        }
867
4.05M
    }
868
228
869
228
    /* Then, look for m->p mappings for this range and deal with them */
870
4.05M
    for ( i = 0; i < (1UL << page_order); i++ )
871
4.05M
    {
872
4.05M
        if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) == dom_cow )
873
0
        {
874
0
            /* This is no way to add a shared page to your physmap! */
875
0
            gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom%d physmap not allowed.\n",
876
0
                     mfn_x(mfn_add(mfn, i)), d->domain_id);
877
0
            p2m_unlock(p2m);
878
0
            return -EINVAL;
879
0
        }
880
4.05M
        if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) != d )
881
0
            continue;
882
4.05M
        ogfn = _gfn(mfn_to_gfn(d, mfn_add(mfn, i)));
883
4.05M
        if ( !gfn_eq(ogfn, _gfn(INVALID_M2P_ENTRY)) &&
884
0
             !gfn_eq(ogfn, gfn_add(gfn, i)) )
885
0
        {
886
0
            /* This machine frame is already mapped at another physical
887
0
             * address */
888
0
            P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
889
0
                      mfn_x(mfn_add(mfn, i)), gfn_x(ogfn),
890
0
                      gfn_x(gfn_add(gfn, i)));
891
0
            omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL, NULL);
892
0
            if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
893
0
            {
894
0
                ASSERT(mfn_valid(omfn));
895
0
                P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
896
0
                          gfn_x(ogfn) , mfn_x(omfn));
897
0
                if ( mfn_eq(omfn, mfn_add(mfn, i)) )
898
0
                    p2m_remove_page(p2m, gfn_x(ogfn), mfn_x(mfn_add(mfn, i)),
899
0
                                    0);
900
0
            }
901
0
        }
902
4.05M
    }
903
228
904
228
    /* Now, actually do the two-way mapping */
905
228
    if ( mfn_valid(mfn) )
906
228
    {
907
228
        rc = p2m_set_entry(p2m, gfn, mfn, page_order, t,
908
228
                           p2m->default_access);
909
228
        if ( rc )
910
0
            goto out; /* Failed to update p2m, bail without updating m2p. */
911
228
912
228
        if ( !p2m_is_grant(t) )
913
228
        {
914
4.05M
            for ( i = 0; i < (1UL << page_order); i++ )
915
4.05M
                set_gpfn_from_mfn(mfn_x(mfn_add(mfn, i)),
916
228
                                  gfn_x(gfn_add(gfn, i)));
917
228
        }
918
228
    }
919
228
    else
920
0
    {
921
0
        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
922
0
                 gfn_x(gfn), mfn_x(mfn));
923
0
        rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order,
924
0
                           p2m_invalid, p2m->default_access);
925
0
        if ( rc == 0 )
926
0
        {
927
0
            pod_lock(p2m);
928
0
            p2m->pod.entry_count -= pod_count;
929
0
            BUG_ON(p2m->pod.entry_count < 0);
930
0
            pod_unlock(p2m);
931
0
        }
932
0
    }
933
228
934
228
out:
935
228
    p2m_unlock(p2m);
936
228
937
228
    return rc;
938
228
}
939
940
941
/*
942
 * Modify the p2m type of a single gfn from ot to nt.
943
 * Returns: 0 for success, -errno for failure.
944
 * Resets the access permissions.
945
 */
946
int p2m_change_type_one(struct domain *d, unsigned long gfn_l,
947
                       p2m_type_t ot, p2m_type_t nt)
948
0
{
949
0
    p2m_access_t a;
950
0
    p2m_type_t pt;
951
0
    gfn_t gfn = _gfn(gfn_l);
952
0
    mfn_t mfn;
953
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
954
0
    int rc;
955
0
956
0
    BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
957
0
    BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
958
0
959
0
    gfn_lock(p2m, gfn, 0);
960
0
961
0
    mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL, NULL);
962
0
    rc = likely(pt == ot)
963
0
         ? p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
964
0
                         p2m->default_access)
965
0
         : -EBUSY;
966
0
967
0
    gfn_unlock(p2m, gfn, 0);
968
0
969
0
    return rc;
970
0
}
971
972
/* Modify the p2m type of a range of gfns from ot to nt. */
973
void p2m_change_type_range(struct domain *d, 
974
                           unsigned long start, unsigned long end,
975
                           p2m_type_t ot, p2m_type_t nt)
976
0
{
977
0
    unsigned long gfn = start;
978
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
979
0
    int rc = 0;
980
0
981
0
    ASSERT(ot != nt);
982
0
    ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
983
0
984
0
    p2m_lock(p2m);
985
0
    p2m->defer_nested_flush = 1;
986
0
987
0
    if ( unlikely(end > p2m->max_mapped_pfn) )
988
0
    {
989
0
        if ( !gfn )
990
0
        {
991
0
            p2m->change_entry_type_global(p2m, ot, nt);
992
0
            gfn = end;
993
0
        }
994
0
        end = p2m->max_mapped_pfn + 1;
995
0
    }
996
0
    if ( gfn < end )
997
0
        rc = p2m->change_entry_type_range(p2m, ot, nt, gfn, end - 1);
998
0
    if ( rc )
999
0
    {
1000
0
        printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d to %d\n",
1001
0
               rc, d->domain_id, start, end - 1, ot, nt);
1002
0
        domain_crash(d);
1003
0
    }
1004
0
1005
0
    switch ( nt )
1006
0
    {
1007
0
    case p2m_ram_rw:
1008
0
        if ( ot == p2m_ram_logdirty )
1009
0
            rc = rangeset_remove_range(p2m->logdirty_ranges, start, end - 1);
1010
0
        break;
1011
0
    case p2m_ram_logdirty:
1012
0
        if ( ot == p2m_ram_rw )
1013
0
            rc = rangeset_add_range(p2m->logdirty_ranges, start, end - 1);
1014
0
        break;
1015
0
    default:
1016
0
        break;
1017
0
    }
1018
0
    if ( rc )
1019
0
    {
1020
0
        printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n",
1021
0
               rc, d->domain_id);
1022
0
        domain_crash(d);
1023
0
    }
1024
0
1025
0
    p2m->defer_nested_flush = 0;
1026
0
    if ( nestedhvm_enabled(d) )
1027
0
        p2m_flush_nestedp2m(d);
1028
0
    p2m_unlock(p2m);
1029
0
}
1030
1031
/*
1032
 * Finish p2m type change for gfns which are marked as need_recalc in a range.
1033
 * Returns: 0/1 for success, negative for failure
1034
 */
1035
int p2m_finish_type_change(struct domain *d,
1036
                           gfn_t first_gfn, unsigned long max_nr)
1037
0
{
1038
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1039
0
    unsigned long gfn = gfn_x(first_gfn);
1040
0
    unsigned long last_gfn = gfn + max_nr - 1;
1041
0
    int rc = 0;
1042
0
1043
0
    p2m_lock(p2m);
1044
0
1045
0
    last_gfn = min(last_gfn, p2m->max_mapped_pfn);
1046
0
    while ( gfn <= last_gfn )
1047
0
    {
1048
0
        rc = p2m->recalc(p2m, gfn);
1049
0
        /*
1050
0
         * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return
1051
0
         * 0/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
1052
0
         * gfn here.
1053
0
         */
1054
0
        if ( rc == -ENOENT )
1055
0
            rc = 0;
1056
0
        else if ( rc < 0 )
1057
0
        {
1058
0
            gdprintk(XENLOG_ERR, "p2m->recalc failed! Dom%d gfn=%lx\n",
1059
0
                     d->domain_id, gfn);
1060
0
            break;
1061
0
        }
1062
0
1063
0
        gfn++;
1064
0
    }
1065
0
1066
0
    p2m_unlock(p2m);
1067
0
1068
0
    return rc;
1069
0
}
1070
1071
/*
1072
 * Returns:
1073
 *    0              for success
1074
 *    -errno         for failure
1075
 *    1 + new order  for caller to retry with smaller order (guaranteed
1076
 *                   to be smaller than order passed in)
1077
 */
1078
static int set_typed_p2m_entry(struct domain *d, unsigned long gfn_l,
1079
                               mfn_t mfn, unsigned int order,
1080
                               p2m_type_t gfn_p2mt, p2m_access_t access)
1081
289k
{
1082
289k
    int rc = 0;
1083
289k
    p2m_access_t a;
1084
289k
    p2m_type_t ot;
1085
289k
    mfn_t omfn;
1086
289k
    gfn_t gfn = _gfn(gfn_l);
1087
289k
    unsigned int cur_order = 0;
1088
289k
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1089
289k
1090
289k
    if ( !paging_mode_translate(d) )
1091
0
        return -EIO;
1092
289k
1093
289k
    gfn_lock(p2m, gfn, order);
1094
289k
    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
1095
289k
    if ( cur_order < order )
1096
0
    {
1097
0
        gfn_unlock(p2m, gfn, order);
1098
0
        return cur_order + 1;
1099
0
    }
1100
289k
    if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
1101
0
    {
1102
0
        gfn_unlock(p2m, gfn, order);
1103
0
        domain_crash(d);
1104
0
        return -ENOENT;
1105
0
    }
1106
289k
    else if ( p2m_is_ram(ot) )
1107
0
    {
1108
0
        unsigned long i;
1109
0
1110
0
        for ( i = 0; i < (1UL << order); ++i )
1111
0
        {
1112
0
            ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
1113
0
            set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
1114
0
        }
1115
0
    }
1116
289k
1117
289k
    P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
1118
289k
    rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
1119
289k
    if ( rc )
1120
0
        gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
1121
289k
                 gfn_l, order, rc, mfn_x(mfn));
1122
289k
    else if ( p2m_is_pod(ot) )
1123
0
    {
1124
0
        pod_lock(p2m);
1125
0
        p2m->pod.entry_count -= 1UL << order;
1126
0
        BUG_ON(p2m->pod.entry_count < 0);
1127
0
        pod_unlock(p2m);
1128
0
    }
1129
289k
    gfn_unlock(p2m, gfn, order);
1130
289k
1131
289k
    return rc;
1132
289k
}
1133
1134
/* Set foreign mfn in the given guest's p2m table. */
1135
static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
1136
                                 mfn_t mfn)
1137
0
{
1138
0
    return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign,
1139
0
                               p2m_get_hostp2m(d)->default_access);
1140
0
}
1141
1142
int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1143
                       unsigned int order, p2m_access_t access)
1144
289k
{
1145
289k
    if ( order > PAGE_ORDER_4K &&
1146
0
         rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
1147
0
                                 mfn_x(mfn) + (1UL << order) - 1) )
1148
0
        return PAGE_ORDER_4K + 1;
1149
289k
1150
289k
    return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access);
1151
289k
}
1152
1153
int set_identity_p2m_entry(struct domain *d, unsigned long gfn_l,
1154
                           p2m_access_t p2ma, unsigned int flag)
1155
39
{
1156
39
    p2m_type_t p2mt;
1157
39
    p2m_access_t a;
1158
39
    gfn_t gfn = _gfn(gfn_l);
1159
39
    mfn_t mfn;
1160
39
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1161
39
    int ret;
1162
39
1163
39
    if ( !paging_mode_translate(p2m->domain) )
1164
0
    {
1165
0
        if ( !need_iommu(d) )
1166
0
            return 0;
1167
0
        return iommu_map_page(d, gfn_l, gfn_l, IOMMUF_readable|IOMMUF_writable);
1168
0
    }
1169
39
1170
39
    gfn_lock(p2m, gfn, 0);
1171
39
1172
39
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1173
39
1174
39
    if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm )
1175
39
        ret = p2m_set_entry(p2m, gfn, _mfn(gfn_l), PAGE_ORDER_4K,
1176
39
                            p2m_mmio_direct, p2ma);
1177
0
    else if ( mfn_x(mfn) == gfn_l && p2mt == p2m_mmio_direct && a == p2ma )
1178
0
        ret = 0;
1179
0
    else
1180
0
    {
1181
0
        if ( flag & XEN_DOMCTL_DEV_RDM_RELAXED )
1182
0
            ret = 0;
1183
0
        else
1184
0
            ret = -EBUSY;
1185
0
        printk(XENLOG_G_WARNING
1186
0
               "Cannot setup identity map d%d:%lx,"
1187
0
               " gfn already mapped to %lx.\n",
1188
0
               d->domain_id, gfn_l, mfn_x(mfn));
1189
0
    }
1190
39
1191
39
    gfn_unlock(p2m, gfn, 0);
1192
39
    return ret;
1193
39
}
1194
1195
/*
1196
 * Returns:
1197
 *    0        for success
1198
 *    -errno   for failure
1199
 *    order+1  for caller to retry with order (guaranteed smaller than
1200
 *             the order value passed in)
1201
 */
1202
int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn,
1203
                         unsigned int order)
1204
218k
{
1205
218k
    int rc = -EINVAL;
1206
218k
    gfn_t gfn = _gfn(gfn_l);
1207
218k
    mfn_t actual_mfn;
1208
218k
    p2m_access_t a;
1209
218k
    p2m_type_t t;
1210
218k
    unsigned int cur_order = 0;
1211
218k
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1212
218k
1213
218k
    if ( !paging_mode_translate(d) )
1214
0
        return -EIO;
1215
218k
1216
218k
    gfn_lock(p2m, gfn, order);
1217
218k
    actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
1218
218k
    if ( cur_order < order )
1219
0
    {
1220
0
        rc = cur_order + 1;
1221
0
        goto out;
1222
0
    }
1223
218k
1224
218k
    /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
1225
218k
    if ( mfn_eq(actual_mfn, INVALID_MFN) || (t != p2m_mmio_direct) )
1226
0
    {
1227
0
        gdprintk(XENLOG_ERR,
1228
0
                 "gfn_to_mfn failed! gfn=%08lx type:%d\n", gfn_l, t);
1229
0
        goto out;
1230
0
    }
1231
218k
    if ( mfn_x(mfn) != mfn_x(actual_mfn) )
1232
0
        gdprintk(XENLOG_WARNING,
1233
218k
                 "no mapping between mfn %08lx and gfn %08lx\n",
1234
218k
                 mfn_x(mfn), gfn_l);
1235
218k
    rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
1236
218k
                       p2m->default_access);
1237
218k
1238
218k
 out:
1239
218k
    gfn_unlock(p2m, gfn, order);
1240
218k
1241
218k
    return rc;
1242
218k
}
1243
1244
int clear_identity_p2m_entry(struct domain *d, unsigned long gfn_l)
1245
0
{
1246
0
    p2m_type_t p2mt;
1247
0
    p2m_access_t a;
1248
0
    gfn_t gfn = _gfn(gfn_l);
1249
0
    mfn_t mfn;
1250
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1251
0
    int ret;
1252
0
1253
0
    if ( !paging_mode_translate(d) )
1254
0
    {
1255
0
        if ( !need_iommu(d) )
1256
0
            return 0;
1257
0
        return iommu_unmap_page(d, gfn_l);
1258
0
    }
1259
0
1260
0
    gfn_lock(p2m, gfn, 0);
1261
0
1262
0
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1263
0
    if ( p2mt == p2m_mmio_direct && mfn_x(mfn) == gfn_l )
1264
0
    {
1265
0
        ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1266
0
                            p2m_invalid, p2m->default_access);
1267
0
        gfn_unlock(p2m, gfn, 0);
1268
0
    }
1269
0
    else
1270
0
    {
1271
0
        gfn_unlock(p2m, gfn, 0);
1272
0
        printk(XENLOG_G_WARNING
1273
0
               "non-identity map d%d:%lx not cleared (mapped to %lx)\n",
1274
0
               d->domain_id, gfn_l, mfn_x(mfn));
1275
0
        ret = 0;
1276
0
    }
1277
0
1278
0
    return ret;
1279
0
}
1280
1281
/* Returns: 0 for success, -errno for failure */
1282
int set_shared_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn)
1283
0
{
1284
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1285
0
    int rc = 0;
1286
0
    gfn_t gfn = _gfn(gfn_l);
1287
0
    p2m_access_t a;
1288
0
    p2m_type_t ot;
1289
0
    mfn_t omfn;
1290
0
    unsigned long pg_type;
1291
0
1292
0
    if ( !paging_mode_translate(p2m->domain) )
1293
0
        return -EIO;
1294
0
1295
0
    gfn_lock(p2m, gfn, 0);
1296
0
    omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
1297
0
    /* At the moment we only allow p2m change if gfn has already been made
1298
0
     * sharable first */
1299
0
    ASSERT(p2m_is_shared(ot));
1300
0
    ASSERT(mfn_valid(omfn));
1301
0
    /* Set the m2p entry to invalid only if there are no further type
1302
0
     * refs to this page as shared */
1303
0
    pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
1304
0
    if ( (pg_type & PGT_count_mask) == 0
1305
0
         || (pg_type & PGT_type_mask) != PGT_shared_page )
1306
0
        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1307
0
1308
0
    P2M_DEBUG("set shared %lx %lx\n", gfn_l, mfn_x(mfn));
1309
0
    rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared,
1310
0
                       p2m->default_access);
1311
0
    gfn_unlock(p2m, gfn, 0);
1312
0
    if ( rc )
1313
0
        gdprintk(XENLOG_ERR,
1314
0
                 "p2m_set_entry failed! mfn=%08lx rc:%d\n",
1315
0
                 mfn_x(get_gfn_query_unlocked(p2m->domain, gfn_l, &ot)), rc);
1316
0
    return rc;
1317
0
}
1318
1319
/**
1320
 * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
1321
 * @d: guest domain
1322
 * @gfn: guest page to nominate
1323
 *
1324
 * Returns 0 for success or negative errno values if gfn is not pageable.
1325
 *
1326
 * p2m_mem_paging_nominate() is called by the pager and checks if a guest page
1327
 * can be paged out. If the following conditions are met the p2mt will be
1328
 * changed:
1329
 * - the gfn is backed by a mfn
1330
 * - the p2mt of the gfn is pageable
1331
 * - the mfn is not used for IO
1332
 * - the mfn has exactly one user and has no special meaning
1333
 *
1334
 * Once the p2mt is changed the page is readonly for the guest.  On success the
1335
 * pager can write the page contents to disk and later evict the page.
1336
 */
1337
int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn_l)
1338
0
{
1339
0
    struct page_info *page;
1340
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1341
0
    p2m_type_t p2mt;
1342
0
    p2m_access_t a;
1343
0
    gfn_t gfn = _gfn(gfn_l);
1344
0
    mfn_t mfn;
1345
0
    int ret = -EBUSY;
1346
0
1347
0
    gfn_lock(p2m, gfn, 0);
1348
0
1349
0
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1350
0
1351
0
    /* Check if mfn is valid */
1352
0
    if ( !mfn_valid(mfn) )
1353
0
        goto out;
1354
0
1355
0
    /* Check p2m type */
1356
0
    if ( !p2m_is_pageable(p2mt) )
1357
0
        goto out;
1358
0
1359
0
    /* Check for io memory page */
1360
0
    if ( is_iomem_page(mfn) )
1361
0
        goto out;
1362
0
1363
0
    /* Check page count and type */
1364
0
    page = mfn_to_page(mfn);
1365
0
    if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
1366
0
         (1 | PGC_allocated) )
1367
0
        goto out;
1368
0
1369
0
    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
1370
0
        goto out;
1371
0
1372
0
    /* Fix p2m entry */
1373
0
    ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
1374
0
1375
0
 out:
1376
0
    gfn_unlock(p2m, gfn, 0);
1377
0
    return ret;
1378
0
}
1379
1380
/**
1381
 * p2m_mem_paging_evict - Mark a guest page as paged-out
1382
 * @d: guest domain
1383
 * @gfn: guest page to evict
1384
 *
1385
 * Returns 0 for success or negative errno values if eviction is not possible.
1386
 *
1387
 * p2m_mem_paging_evict() is called by the pager and will free a guest page and
1388
 * release it back to Xen. If the following conditions are met the page can be
1389
 * freed:
1390
 * - the gfn is backed by a mfn
1391
 * - the gfn was nominated
1392
 * - the mfn has still exactly one user and has no special meaning
1393
 *
1394
 * After successful nomination some other process could have mapped the page. In
1395
 * this case eviction can not be done. If the gfn was populated before the pager
1396
 * could evict it, eviction can not be done either. In this case the gfn is
1397
 * still backed by a mfn.
1398
 */
1399
int p2m_mem_paging_evict(struct domain *d, unsigned long gfn_l)
1400
0
{
1401
0
    struct page_info *page;
1402
0
    p2m_type_t p2mt;
1403
0
    p2m_access_t a;
1404
0
    gfn_t gfn = _gfn(gfn_l);
1405
0
    mfn_t mfn;
1406
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1407
0
    int ret = -EBUSY;
1408
0
1409
0
    gfn_lock(p2m, gfn, 0);
1410
0
1411
0
    /* Get mfn */
1412
0
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1413
0
    if ( unlikely(!mfn_valid(mfn)) )
1414
0
        goto out;
1415
0
1416
0
    /* Allow only nominated pages */
1417
0
    if ( p2mt != p2m_ram_paging_out )
1418
0
        goto out;
1419
0
1420
0
    /* Get the page so it doesn't get modified under Xen's feet */
1421
0
    page = mfn_to_page(mfn);
1422
0
    if ( unlikely(!get_page(page, d)) )
1423
0
        goto out;
1424
0
1425
0
    /* Check page count and type once more */
1426
0
    if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
1427
0
         (2 | PGC_allocated) )
1428
0
        goto out_put;
1429
0
1430
0
    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
1431
0
        goto out_put;
1432
0
1433
0
    /* Decrement guest domain's ref count of the page */
1434
0
    if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
1435
0
        put_page(page);
1436
0
1437
0
    /* Remove mapping from p2m table */
1438
0
    ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1439
0
                        p2m_ram_paged, a);
1440
0
1441
0
    /* Clear content before returning the page to Xen */
1442
0
    scrub_one_page(page);
1443
0
1444
0
    /* Track number of paged gfns */
1445
0
    atomic_inc(&d->paged_pages);
1446
0
1447
0
 out_put:
1448
0
    /* Put the page back so it gets freed */
1449
0
    put_page(page);
1450
0
1451
0
 out:
1452
0
    gfn_unlock(p2m, gfn, 0);
1453
0
    return ret;
1454
0
}
1455
1456
/**
1457
 * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
1458
 * @d: guest domain
1459
 * @gfn: guest page to drop
1460
 *
1461
 * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
1462
 * released by the guest. The pager is supposed to drop its reference of the
1463
 * gfn.
1464
 */
1465
void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn,
1466
                                p2m_type_t p2mt)
1467
0
{
1468
0
    vm_event_request_t req = {
1469
0
        .reason = VM_EVENT_REASON_MEM_PAGING,
1470
0
        .u.mem_paging.gfn = gfn
1471
0
    };
1472
0
1473
0
    /* We allow no ring in this unique case, because it won't affect
1474
0
     * correctness of the guest execution at this point.  If this is the only
1475
0
     * page that happens to be paged-out, we'll be okay..  but it's likely the
1476
0
     * guest will crash shortly anyways. */
1477
0
    int rc = vm_event_claim_slot(d, d->vm_event_paging);
1478
0
    if ( rc < 0 )
1479
0
        return;
1480
0
1481
0
    /* Send release notification to pager */
1482
0
    req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
1483
0
1484
0
    /* Update stats unless the page hasn't yet been evicted */
1485
0
    if ( p2mt != p2m_ram_paging_out )
1486
0
        atomic_dec(&d->paged_pages);
1487
0
    else
1488
0
        /* Evict will fail now, tag this request for pager */
1489
0
        req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
1490
0
1491
0
    vm_event_put_request(d, d->vm_event_paging, &req);
1492
0
}
1493
1494
/**
1495
 * p2m_mem_paging_populate - Tell pager to populate a paged page
1496
 * @d: guest domain
1497
 * @gfn: guest page in paging state
1498
 *
1499
 * p2m_mem_paging_populate() will notify the pager that a page in any of the
1500
 * paging states needs to be written back into the guest.
1501
 * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
1502
 * paging types because the gfn may not be backed by a mfn.
1503
 *
1504
 * The gfn can be in any of the paging states, but the pager needs only be
1505
 * notified when the gfn is in the paging-out path (paging_out or paged).  This
1506
 * function may be called more than once from several vcpus. If the vcpu belongs
1507
 * to the guest, the vcpu must be stopped and the pager notified that the vcpu
1508
 * was stopped. The pager needs to handle several requests for the same gfn.
1509
 *
1510
 * If the gfn is not in the paging-out path and the vcpu does not belong to the
1511
 * guest, nothing needs to be done and the function assumes that a request was
1512
 * already sent to the pager. In this case the caller has to try again until the
1513
 * gfn is fully paged in again.
1514
 */
1515
void p2m_mem_paging_populate(struct domain *d, unsigned long gfn_l)
1516
0
{
1517
0
    struct vcpu *v = current;
1518
0
    vm_event_request_t req = {
1519
0
        .reason = VM_EVENT_REASON_MEM_PAGING,
1520
0
        .u.mem_paging.gfn = gfn_l
1521
0
    };
1522
0
    p2m_type_t p2mt;
1523
0
    p2m_access_t a;
1524
0
    gfn_t gfn = _gfn(gfn_l);
1525
0
    mfn_t mfn;
1526
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1527
0
1528
0
    /* We're paging. There should be a ring */
1529
0
    int rc = vm_event_claim_slot(d, d->vm_event_paging);
1530
0
    if ( rc == -ENOSYS )
1531
0
    {
1532
0
        gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
1533
0
                             "in place\n", d->domain_id, gfn_l);
1534
0
        /* Prevent the vcpu from faulting repeatedly on the same gfn */
1535
0
        if ( v->domain == d )
1536
0
            vcpu_pause_nosync(v);
1537
0
        domain_crash(d);
1538
0
        return;
1539
0
    }
1540
0
    else if ( rc < 0 )
1541
0
        return;
1542
0
1543
0
    /* Fix p2m mapping */
1544
0
    gfn_lock(p2m, gfn, 0);
1545
0
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1546
0
    /* Allow only nominated or evicted pages to enter page-in path */
1547
0
    if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
1548
0
    {
1549
0
        /* Evict will fail now, tag this request for pager */
1550
0
        if ( p2mt == p2m_ram_paging_out )
1551
0
            req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
1552
0
1553
0
        p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
1554
0
    }
1555
0
    gfn_unlock(p2m, gfn, 0);
1556
0
1557
0
    /* Pause domain if request came from guest and gfn has paging type */
1558
0
    if ( p2m_is_paging(p2mt) && v->domain == d )
1559
0
    {
1560
0
        vm_event_vcpu_pause(v);
1561
0
        req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
1562
0
    }
1563
0
    /* No need to inform pager if the gfn is not in the page-out path */
1564
0
    else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
1565
0
    {
1566
0
        /* gfn is already on its way back and vcpu is not paused */
1567
0
        vm_event_cancel_slot(d, d->vm_event_paging);
1568
0
        return;
1569
0
    }
1570
0
1571
0
    /* Send request to pager */
1572
0
    req.u.mem_paging.p2mt = p2mt;
1573
0
    req.vcpu_id = v->vcpu_id;
1574
0
1575
0
    vm_event_put_request(d, d->vm_event_paging, &req);
1576
0
}
1577
1578
/**
1579
 * p2m_mem_paging_prep - Allocate a new page for the guest
1580
 * @d: guest domain
1581
 * @gfn: guest page in paging state
1582
 *
1583
 * p2m_mem_paging_prep() will allocate a new page for the guest if the gfn is
1584
 * not backed by a mfn. It is called by the pager.
1585
 * It is required that the gfn was already populated. The gfn may already have a
1586
 * mfn if populate was called for  gfn which was nominated but not evicted. In
1587
 * this case only the p2mt needs to be forwarded.
1588
 */
1589
int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer)
1590
0
{
1591
0
    struct page_info *page;
1592
0
    p2m_type_t p2mt;
1593
0
    p2m_access_t a;
1594
0
    gfn_t gfn = _gfn(gfn_l);
1595
0
    mfn_t mfn;
1596
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1597
0
    int ret, page_extant = 1;
1598
0
    const void *user_ptr = (const void *) buffer;
1599
0
1600
0
    if ( user_ptr )
1601
0
        /* Sanity check the buffer and bail out early if trouble */
1602
0
        if ( (buffer & (PAGE_SIZE - 1)) || 
1603
0
             (!access_ok(user_ptr, PAGE_SIZE)) )
1604
0
            return -EINVAL;
1605
0
1606
0
    gfn_lock(p2m, gfn, 0);
1607
0
1608
0
    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1609
0
1610
0
    ret = -ENOENT;
1611
0
    /* Allow missing pages */
1612
0
    if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
1613
0
        goto out;
1614
0
1615
0
    /* Allocate a page if the gfn does not have one yet */
1616
0
    if ( !mfn_valid(mfn) )
1617
0
    {
1618
0
        /* If the user did not provide a buffer, we disallow */
1619
0
        ret = -EINVAL;
1620
0
        if ( unlikely(user_ptr == NULL) )
1621
0
            goto out;
1622
0
        /* Get a free page */
1623
0
        ret = -ENOMEM;
1624
0
        page = alloc_domheap_page(p2m->domain, 0);
1625
0
        if ( unlikely(page == NULL) )
1626
0
            goto out;
1627
0
        mfn = page_to_mfn(page);
1628
0
        page_extant = 0;
1629
0
    }
1630
0
1631
0
    /* If we were given a buffer, now is the time to use it */
1632
0
    if ( !page_extant && user_ptr )
1633
0
    {
1634
0
        void *guest_map;
1635
0
        int rc;
1636
0
1637
0
        ASSERT( mfn_valid(mfn) );
1638
0
        guest_map = map_domain_page(mfn);
1639
0
        rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE);
1640
0
        unmap_domain_page(guest_map);
1641
0
        if ( rc )
1642
0
        {
1643
0
            gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u "
1644
0
                                 "bytes left %d\n", gfn_l, d->domain_id, rc);
1645
0
            ret = -EFAULT;
1646
0
            put_page(page); /* Don't leak pages */
1647
0
            goto out;            
1648
0
        }
1649
0
    }
1650
0
1651
0
    /* Make the page already guest-accessible. If the pager still has a
1652
0
     * pending resume operation, it will be idempotent p2m entry-wise,
1653
0
     * but will unpause the vcpu */
1654
0
    ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
1655
0
                        paging_mode_log_dirty(d) ? p2m_ram_logdirty
1656
0
                                                 : p2m_ram_rw, a);
1657
0
    set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
1658
0
1659
0
    if ( !page_extant )
1660
0
        atomic_dec(&d->paged_pages);
1661
0
1662
0
 out:
1663
0
    gfn_unlock(p2m, gfn, 0);
1664
0
    return ret;
1665
0
}
1666
1667
/**
1668
 * p2m_mem_paging_resume - Resume guest gfn
1669
 * @d: guest domain
1670
 * @rsp: vm_event response received
1671
 *
1672
 * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
1673
 * called by the pager.
1674
 *
1675
 * The gfn was previously either evicted and populated, or nominated and
1676
 * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
1677
 * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
1678
 * the pager did not call p2m_mem_paging_prep().
1679
 *
1680
 * If the gfn was dropped the vcpu needs to be unpaused.
1681
 */
1682
1683
void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
1684
0
{
1685
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1686
0
    p2m_type_t p2mt;
1687
0
    p2m_access_t a;
1688
0
    mfn_t mfn;
1689
0
1690
0
    /* Fix p2m entry if the page was not dropped */
1691
0
    if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
1692
0
    {
1693
0
        gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
1694
0
1695
0
        gfn_lock(p2m, gfn, 0);
1696
0
        mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1697
0
        /*
1698
0
         * Allow only pages which were prepared properly, or pages which
1699
0
         * were nominated but not evicted.
1700
0
         */
1701
0
        if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
1702
0
        {
1703
0
            p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
1704
0
                          paging_mode_log_dirty(d) ? p2m_ram_logdirty :
1705
0
                          p2m_ram_rw, a);
1706
0
            set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
1707
0
        }
1708
0
        gfn_unlock(p2m, gfn, 0);
1709
0
    }
1710
0
}
1711
1712
void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
1713
0
{
1714
0
    if ( altp2m_active(v->domain) )
1715
0
        p2m_switch_vcpu_altp2m_by_id(v, idx);
1716
0
}
1717
1718
static struct p2m_domain *
1719
p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m)
1720
0
{
1721
0
    struct list_head *lru_list = &p2m_get_hostp2m(d)->np2m_list;
1722
0
    
1723
0
    ASSERT(!list_empty(lru_list));
1724
0
1725
0
    if ( p2m == NULL )
1726
0
        p2m = list_entry(lru_list->prev, struct p2m_domain, np2m_list);
1727
0
1728
0
    list_move(&p2m->np2m_list, lru_list);
1729
0
1730
0
    return p2m;
1731
0
}
1732
1733
static void
1734
p2m_flush_table_locked(struct p2m_domain *p2m)
1735
0
{
1736
0
    struct page_info *top, *pg;
1737
0
    struct domain *d = p2m->domain;
1738
0
    mfn_t mfn;
1739
0
1740
0
    ASSERT(p2m_locked_by_me(p2m));
1741
0
1742
0
    /*
1743
0
     * "Host" p2m tables can have shared entries &c that need a bit more care
1744
0
     * when discarding them.
1745
0
     */
1746
0
    ASSERT(!p2m_is_hostp2m(p2m));
1747
0
    /* Nested p2m's do not do pod, hence the asserts (and no pod lock)*/
1748
0
    ASSERT(page_list_empty(&p2m->pod.super));
1749
0
    ASSERT(page_list_empty(&p2m->pod.single));
1750
0
1751
0
    /* No need to flush if it's already empty */
1752
0
    if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR )
1753
0
        return;
1754
0
1755
0
    /* This is no longer a valid nested p2m for any address space */
1756
0
    p2m->np2m_base = P2M_BASE_EADDR;
1757
0
    p2m->np2m_generation++;
1758
0
1759
0
    /* Make sure nobody else is using this p2m table */
1760
0
    nestedhvm_vmcx_flushtlb(p2m);
1761
0
1762
0
    /* Zap the top level of the trie */
1763
0
    mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
1764
0
    clear_domain_page(mfn);
1765
0
1766
0
    /* Free the rest of the trie pages back to the paging pool */
1767
0
    top = mfn_to_page(mfn);
1768
0
    while ( (pg = page_list_remove_head(&p2m->pages)) )
1769
0
    {
1770
0
        if ( pg != top )
1771
0
            d->arch.paging.free_page(d, pg);
1772
0
    }
1773
0
    page_list_add(top, &p2m->pages);
1774
0
}
1775
1776
/* Reset this p2m table to be empty */
1777
static void
1778
p2m_flush_table(struct p2m_domain *p2m)
1779
0
{
1780
0
    p2m_lock(p2m);
1781
0
    p2m_flush_table_locked(p2m);
1782
0
    p2m_unlock(p2m);
1783
0
}
1784
1785
void
1786
p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
1787
0
{
1788
0
    ASSERT(v->domain == p2m->domain);
1789
0
    vcpu_nestedhvm(v).nv_p2m = NULL;
1790
0
    p2m_flush_table(p2m);
1791
0
    hvm_asid_flush_vcpu(v);
1792
0
}
1793
1794
void
1795
p2m_flush_nestedp2m(struct domain *d)
1796
0
{
1797
0
    int i;
1798
0
    for ( i = 0; i < MAX_NESTEDP2M; i++ )
1799
0
        p2m_flush_table(d->arch.nested_p2m[i]);
1800
0
}
1801
1802
void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
1803
0
{
1804
0
    struct domain *d = v->domain;
1805
0
    struct p2m_domain *p2m;
1806
0
    unsigned int i;
1807
0
1808
0
    np2m_base &= ~(0xfffull);
1809
0
1810
0
    nestedp2m_lock(d);
1811
0
    for ( i = 0; i < MAX_NESTEDP2M; i++ )
1812
0
    {
1813
0
        p2m = d->arch.nested_p2m[i];
1814
0
        p2m_lock(p2m);
1815
0
        if ( p2m->np2m_base == np2m_base )
1816
0
        {
1817
0
            p2m_flush_table_locked(p2m);
1818
0
            p2m_unlock(p2m);
1819
0
            break;
1820
0
        }
1821
0
        p2m_unlock(p2m);
1822
0
    }
1823
0
    nestedp2m_unlock(d);
1824
0
}
1825
1826
static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
1827
0
{
1828
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1829
0
    struct domain *d = v->domain;
1830
0
1831
0
    /* Bring this np2m to the top of the LRU list */
1832
0
    p2m_getlru_nestedp2m(d, p2m);
1833
0
1834
0
    nv->nv_flushp2m = 0;
1835
0
    nv->nv_p2m = p2m;
1836
0
    nv->np2m_generation = p2m->np2m_generation;
1837
0
    cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
1838
0
}
1839
1840
static void nvcpu_flush(struct vcpu *v)
1841
0
{
1842
0
    hvm_asid_flush_vcpu(v);
1843
0
    vcpu_nestedhvm(v).stale_np2m = true;
1844
0
}
1845
1846
struct p2m_domain *
1847
p2m_get_nestedp2m_locked(struct vcpu *v)
1848
0
{
1849
0
    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1850
0
    struct domain *d = v->domain;
1851
0
    struct p2m_domain *p2m;
1852
0
    uint64_t np2m_base = nhvm_vcpu_p2m_base(v);
1853
0
    unsigned int i;
1854
0
    bool needs_flush = true;
1855
0
1856
0
    /* Mask out low bits; this avoids collisions with P2M_BASE_EADDR */
1857
0
    np2m_base &= ~(0xfffull);
1858
0
1859
0
    if (nv->nv_flushp2m && nv->nv_p2m) {
1860
0
        nv->nv_p2m = NULL;
1861
0
    }
1862
0
1863
0
    nestedp2m_lock(d);
1864
0
    p2m = nv->nv_p2m;
1865
0
    if ( p2m ) 
1866
0
    {
1867
0
        p2m_lock(p2m);
1868
0
        if ( p2m->np2m_base == np2m_base )
1869
0
        {
1870
0
            /* Check if np2m was flushed just before the lock */
1871
0
            if ( nv->np2m_generation == p2m->np2m_generation )
1872
0
                needs_flush = false;
1873
0
            /* np2m is up-to-date */
1874
0
            goto found;
1875
0
        }
1876
0
        else if ( p2m->np2m_base != P2M_BASE_EADDR )
1877
0
        {
1878
0
            /* vCPU is switching from some other valid np2m */
1879
0
            cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
1880
0
        }
1881
0
        p2m_unlock(p2m);
1882
0
    }
1883
0
1884
0
    /* Share a np2m if possible */
1885
0
    for ( i = 0; i < MAX_NESTEDP2M; i++ )
1886
0
    {
1887
0
        p2m = d->arch.nested_p2m[i];
1888
0
        p2m_lock(p2m);
1889
0
1890
0
        if ( p2m->np2m_base == np2m_base )
1891
0
            goto found;
1892
0
1893
0
        p2m_unlock(p2m);
1894
0
    }
1895
0
1896
0
    /* All p2m's are or were in use. Take the least recent used one,
1897
0
     * flush it and reuse. */
1898
0
    p2m = p2m_getlru_nestedp2m(d, NULL);
1899
0
    p2m_flush_table(p2m);
1900
0
    p2m_lock(p2m);
1901
0
1902
0
 found:
1903
0
    if ( needs_flush )
1904
0
        nvcpu_flush(v);
1905
0
    p2m->np2m_base = np2m_base;
1906
0
    assign_np2m(v, p2m);
1907
0
    nestedp2m_unlock(d);
1908
0
1909
0
    return p2m;
1910
0
}
1911
1912
struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v)
1913
0
{
1914
0
    struct p2m_domain *p2m = p2m_get_nestedp2m_locked(v);
1915
0
    p2m_unlock(p2m);
1916
0
1917
0
    return p2m;
1918
0
}
1919
1920
struct p2m_domain *
1921
p2m_get_p2m(struct vcpu *v)
1922
0
{
1923
0
    if (!nestedhvm_is_n2(v))
1924
0
        return p2m_get_hostp2m(v->domain);
1925
0
1926
0
    return p2m_get_nestedp2m(v);
1927
0
}
1928
1929
void np2m_schedule(int dir)
1930
327k
{
1931
327k
    struct vcpu *curr = current;
1932
327k
    struct nestedvcpu *nv = &vcpu_nestedhvm(curr);
1933
327k
    struct p2m_domain *p2m;
1934
327k
1935
327k
    ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
1936
327k
1937
327k
    if ( !nestedhvm_enabled(curr->domain) ||
1938
0
         !nestedhvm_vcpu_in_guestmode(curr) ||
1939
0
         !nestedhvm_paging_mode_hap(curr) )
1940
327k
        return;
1941
327k
1942
61
    p2m = nv->nv_p2m;
1943
61
    if ( p2m )
1944
0
    {
1945
0
        bool np2m_valid;
1946
0
1947
0
        p2m_lock(p2m);
1948
0
        np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) &&
1949
0
                     nv->np2m_generation == p2m->np2m_generation;
1950
0
        if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
1951
0
        {
1952
0
            /*
1953
0
             * The np2m is up to date but this vCPU will no longer use it,
1954
0
             * which means there are no reasons to send a flush IPI.
1955
0
             */
1956
0
            cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask);
1957
0
        }
1958
0
        else if ( dir == NP2M_SCHEDLE_IN )
1959
0
        {
1960
0
            if ( !np2m_valid )
1961
0
            {
1962
0
                /* This vCPU's np2m was flushed while it was not runnable */
1963
0
                hvm_asid_flush_core();
1964
0
                vcpu_nestedhvm(curr).nv_p2m = NULL;
1965
0
            }
1966
0
            else
1967
0
                cpumask_set_cpu(curr->processor, p2m->dirty_cpumask);
1968
0
        }
1969
0
        p2m_unlock(p2m);
1970
0
    }
1971
61
}
1972
1973
unsigned long paging_gva_to_gfn(struct vcpu *v,
1974
                                unsigned long va,
1975
                                uint32_t *pfec)
1976
364k
{
1977
364k
    struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain);
1978
364k
    const struct paging_mode *hostmode = paging_get_hostmode(v);
1979
364k
1980
364k
    if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) )
1981
0
    {
1982
0
        unsigned long l2_gfn, l1_gfn;
1983
0
        struct p2m_domain *p2m;
1984
0
        const struct paging_mode *mode;
1985
0
        uint8_t l1_p2ma;
1986
0
        unsigned int l1_page_order;
1987
0
        int rv;
1988
0
1989
0
        /* translate l2 guest va into l2 guest gfn */
1990
0
        p2m = p2m_get_nestedp2m(v);
1991
0
        mode = paging_get_nestedmode(v);
1992
0
        l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec);
1993
0
1994
0
        if ( l2_gfn == gfn_x(INVALID_GFN) )
1995
0
            return gfn_x(INVALID_GFN);
1996
0
1997
0
        /* translate l2 guest gfn into l1 guest gfn */
1998
0
        rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma,
1999
0
                                   1,
2000
0
                                   !!(*pfec & PFEC_write_access),
2001
0
                                   !!(*pfec & PFEC_insn_fetch));
2002
0
2003
0
        if ( rv != NESTEDHVM_PAGEFAULT_DONE )
2004
0
            return gfn_x(INVALID_GFN);
2005
0
2006
0
        /*
2007
0
         * Sanity check that l1_gfn can be used properly as a 4K mapping, even
2008
0
         * if it mapped by a nested superpage.
2009
0
         */
2010
0
        ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) ==
2011
0
               (l1_gfn & ((1ul << l1_page_order) - 1)));
2012
0
2013
0
        return l1_gfn;
2014
0
    }
2015
364k
2016
364k
    return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
2017
364k
}
2018
2019
/*
2020
 * If the map is non-NULL, we leave this function having acquired an extra ref
2021
 * on mfn_to_page(*mfn).  In all cases, *pfec contains appropriate
2022
 * synthetic/structure PFEC_* bits.
2023
 */
2024
void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
2025
                     p2m_type_t *p2mt, p2m_query_t q, uint32_t *pfec)
2026
867k
{
2027
867k
    struct page_info *page;
2028
867k
2029
867k
    if ( !gfn_valid(p2m->domain, gfn) )
2030
0
    {
2031
0
        *pfec = PFEC_reserved_bit | PFEC_page_present;
2032
0
        return NULL;
2033
0
    }
2034
867k
2035
867k
    /* Translate the gfn, unsharing if shared. */
2036
867k
    page = p2m_get_page_from_gfn(p2m, gfn, p2mt, NULL, q);
2037
867k
    if ( p2m_is_paging(*p2mt) )
2038
0
    {
2039
0
        ASSERT(p2m_is_hostp2m(p2m));
2040
0
        if ( page )
2041
0
            put_page(page);
2042
0
        p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
2043
0
        *pfec = PFEC_page_paged;
2044
0
        return NULL;
2045
0
    }
2046
867k
    if ( p2m_is_shared(*p2mt) )
2047
0
    {
2048
0
        if ( page )
2049
0
            put_page(page);
2050
0
        *pfec = PFEC_page_shared;
2051
0
        return NULL;
2052
0
    }
2053
867k
    if ( !page )
2054
0
    {
2055
0
        *pfec = 0;
2056
0
        return NULL;
2057
0
    }
2058
867k
2059
867k
    *pfec = PFEC_page_present;
2060
867k
    *mfn = page_to_mfn(page);
2061
867k
    ASSERT(mfn_valid(*mfn));
2062
867k
2063
867k
    return map_domain_page(*mfn);
2064
867k
}
2065
2066
static unsigned int mmio_order(const struct domain *d,
2067
                               unsigned long start_fn, unsigned long nr)
2068
507k
{
2069
507k
    /*
2070
507k
     * Note that the !iommu_use_hap_pt() here has three effects:
2071
507k
     * - cover iommu_{,un}map_page() not having an "order" input yet,
2072
507k
     * - exclude shadow mode (which doesn't support large MMIO mappings),
2073
507k
     * - exclude PV guests, should execution reach this code for such.
2074
507k
     * So be careful when altering this.
2075
507k
     */
2076
507k
    if ( !need_iommu(d) || !iommu_use_hap_pt(d) ||
2077
0
         (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
2078
507k
        return PAGE_ORDER_4K;
2079
507k
2080
0
    if ( 0 /*
2081
0
            * Don't use 1Gb pages, to limit the iteration count in
2082
0
            * set_typed_p2m_entry() when it needs to zap M2P entries
2083
0
            * for a RAM range.
2084
0
            */ &&
2085
0
         !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
2086
0
         hap_has_1gb )
2087
0
        return PAGE_ORDER_1G;
2088
0
2089
0
    if ( hap_has_2mb )
2090
0
        return PAGE_ORDER_2M;
2091
0
2092
0
    return PAGE_ORDER_4K;
2093
0
}
2094
2095
515k
#define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
2096
2097
int map_mmio_regions(struct domain *d,
2098
                     gfn_t start_gfn,
2099
                     unsigned long nr,
2100
                     mfn_t mfn)
2101
4.64k
{
2102
4.64k
    int ret = 0;
2103
4.64k
    unsigned long i;
2104
4.64k
    unsigned int iter, order;
2105
4.64k
2106
4.64k
    if ( !paging_mode_translate(d) )
2107
0
        return 0;
2108
4.64k
2109
293k
    for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
2110
289k
          i += 1UL << order, ++iter )
2111
289k
    {
2112
289k
        /* OR'ing gfn and mfn values will return an order suitable to both. */
2113
289k
        for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
2114
0
              order = ret - 1 )
2115
289k
        {
2116
289k
            ret = set_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
2117
289k
                                     mfn_add(mfn, i), order,
2118
289k
                                     p2m_get_hostp2m(d)->default_access);
2119
289k
            if ( ret <= 0 )
2120
289k
                break;
2121
0
            ASSERT(ret <= order);
2122
0
        }
2123
289k
        if ( ret < 0 )
2124
0
            break;
2125
289k
    }
2126
4.64k
2127
4.47k
    return i == nr ? 0 : i ?: ret;
2128
4.64k
}
2129
2130
int unmap_mmio_regions(struct domain *d,
2131
                       gfn_t start_gfn,
2132
                       unsigned long nr,
2133
                       mfn_t mfn)
2134
3.51k
{
2135
3.51k
    int ret = 0;
2136
3.51k
    unsigned long i;
2137
3.51k
    unsigned int iter, order;
2138
3.51k
2139
3.51k
    if ( !paging_mode_translate(d) )
2140
0
        return 0;
2141
3.51k
2142
221k
    for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
2143
218k
          i += 1UL << order, ++iter )
2144
218k
    {
2145
218k
        /* OR'ing gfn and mfn values will return an order suitable to both. */
2146
218k
        for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
2147
0
              order = ret - 1 )
2148
218k
        {
2149
218k
            ret = clear_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
2150
218k
                                       mfn_add(mfn, i), order);
2151
218k
            if ( ret <= 0 )
2152
218k
                break;
2153
0
            ASSERT(ret <= order);
2154
0
        }
2155
218k
        if ( ret < 0 )
2156
0
            break;
2157
218k
    }
2158
3.51k
2159
3.36k
    return i == nr ? 0 : i ?: ret;
2160
3.51k
}
2161
2162
bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
2163
0
{
2164
0
    struct domain *d = v->domain;
2165
0
    bool_t rc = 0;
2166
0
2167
0
    if ( idx >= MAX_ALTP2M )
2168
0
        return rc;
2169
0
2170
0
    altp2m_list_lock(d);
2171
0
2172
0
    if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2173
0
    {
2174
0
        if ( idx != vcpu_altp2m(v).p2midx )
2175
0
        {
2176
0
            atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2177
0
            vcpu_altp2m(v).p2midx = idx;
2178
0
            atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2179
0
            altp2m_vcpu_update_p2m(v);
2180
0
        }
2181
0
        rc = 1;
2182
0
    }
2183
0
2184
0
    altp2m_list_unlock(d);
2185
0
    return rc;
2186
0
}
2187
2188
/*
2189
 * If the fault is for a not present entry:
2190
 *     if the entry in the host p2m has a valid mfn, copy it and retry
2191
 *     else indicate that outer handler should handle fault
2192
 *
2193
 * If the fault is for a present entry:
2194
 *     indicate that outer handler should handle fault
2195
 */
2196
2197
bool_t p2m_altp2m_lazy_copy(struct vcpu *v, paddr_t gpa,
2198
                            unsigned long gla, struct npfec npfec,
2199
                            struct p2m_domain **ap2m)
2200
0
{
2201
0
    struct p2m_domain *hp2m = p2m_get_hostp2m(v->domain);
2202
0
    p2m_type_t p2mt;
2203
0
    p2m_access_t p2ma;
2204
0
    unsigned int page_order;
2205
0
    gfn_t gfn = _gfn(paddr_to_pfn(gpa));
2206
0
    unsigned long mask;
2207
0
    mfn_t mfn;
2208
0
    int rv;
2209
0
2210
0
    *ap2m = p2m_get_altp2m(v);
2211
0
2212
0
    mfn = get_gfn_type_access(*ap2m, gfn_x(gfn), &p2mt, &p2ma,
2213
0
                              0, &page_order);
2214
0
    __put_gfn(*ap2m, gfn_x(gfn));
2215
0
2216
0
    if ( !mfn_eq(mfn, INVALID_MFN) )
2217
0
        return 0;
2218
0
2219
0
    mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma,
2220
0
                              P2M_ALLOC, &page_order);
2221
0
    __put_gfn(hp2m, gfn_x(gfn));
2222
0
2223
0
    if ( mfn_eq(mfn, INVALID_MFN) )
2224
0
        return 0;
2225
0
2226
0
    p2m_lock(*ap2m);
2227
0
2228
0
    /*
2229
0
     * If this is a superpage mapping, round down both frame numbers
2230
0
     * to the start of the superpage.
2231
0
     */
2232
0
    mask = ~((1UL << page_order) - 1);
2233
0
    mfn = _mfn(mfn_x(mfn) & mask);
2234
0
    gfn = _gfn(gfn_x(gfn) & mask);
2235
0
2236
0
    rv = p2m_set_entry(*ap2m, gfn, mfn, page_order, p2mt, p2ma);
2237
0
    p2m_unlock(*ap2m);
2238
0
2239
0
    if ( rv )
2240
0
    {
2241
0
        gdprintk(XENLOG_ERR,
2242
0
      "failed to set entry for %#"PRIx64" -> %#"PRIx64" p2m %#"PRIx64"\n",
2243
0
      gfn_x(gfn), mfn_x(mfn), (unsigned long)*ap2m);
2244
0
        domain_crash(hp2m->domain);
2245
0
    }
2246
0
2247
0
    return 1;
2248
0
}
2249
2250
void p2m_flush_altp2m(struct domain *d)
2251
0
{
2252
0
    unsigned int i;
2253
0
2254
0
    altp2m_list_lock(d);
2255
0
2256
0
    for ( i = 0; i < MAX_ALTP2M; i++ )
2257
0
    {
2258
0
        p2m_flush_table(d->arch.altp2m_p2m[i]);
2259
0
        /* Uninit and reinit ept to force TLB shootdown */
2260
0
        ept_p2m_uninit(d->arch.altp2m_p2m[i]);
2261
0
        ept_p2m_init(d->arch.altp2m_p2m[i]);
2262
0
        d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
2263
0
    }
2264
0
2265
0
    altp2m_list_unlock(d);
2266
0
}
2267
2268
int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
2269
0
{
2270
0
    int rc = -EINVAL;
2271
0
2272
0
    if ( idx >= MAX_ALTP2M )
2273
0
        return rc;
2274
0
2275
0
    altp2m_list_lock(d);
2276
0
2277
0
    if ( d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
2278
0
    {
2279
0
        p2m_init_altp2m_ept(d, idx);
2280
0
        rc = 0;
2281
0
    }
2282
0
2283
0
    altp2m_list_unlock(d);
2284
0
    return rc;
2285
0
}
2286
2287
int p2m_init_next_altp2m(struct domain *d, uint16_t *idx)
2288
0
{
2289
0
    int rc = -EINVAL;
2290
0
    unsigned int i;
2291
0
2292
0
    altp2m_list_lock(d);
2293
0
2294
0
    for ( i = 0; i < MAX_ALTP2M; i++ )
2295
0
    {
2296
0
        if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
2297
0
            continue;
2298
0
2299
0
        p2m_init_altp2m_ept(d, i);
2300
0
        *idx = i;
2301
0
        rc = 0;
2302
0
2303
0
        break;
2304
0
    }
2305
0
2306
0
    altp2m_list_unlock(d);
2307
0
    return rc;
2308
0
}
2309
2310
int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
2311
0
{
2312
0
    struct p2m_domain *p2m;
2313
0
    int rc = -EBUSY;
2314
0
2315
0
    if ( !idx || idx >= MAX_ALTP2M )
2316
0
        return rc;
2317
0
2318
0
    domain_pause_except_self(d);
2319
0
2320
0
    altp2m_list_lock(d);
2321
0
2322
0
    if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2323
0
    {
2324
0
        p2m = d->arch.altp2m_p2m[idx];
2325
0
2326
0
        if ( !_atomic_read(p2m->active_vcpus) )
2327
0
        {
2328
0
            p2m_flush_table(d->arch.altp2m_p2m[idx]);
2329
0
            /* Uninit and reinit ept to force TLB shootdown */
2330
0
            ept_p2m_uninit(d->arch.altp2m_p2m[idx]);
2331
0
            ept_p2m_init(d->arch.altp2m_p2m[idx]);
2332
0
            d->arch.altp2m_eptp[idx] = mfn_x(INVALID_MFN);
2333
0
            rc = 0;
2334
0
        }
2335
0
    }
2336
0
2337
0
    altp2m_list_unlock(d);
2338
0
2339
0
    domain_unpause_except_self(d);
2340
0
2341
0
    return rc;
2342
0
}
2343
2344
int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
2345
0
{
2346
0
    struct vcpu *v;
2347
0
    int rc = -EINVAL;
2348
0
2349
0
    if ( idx >= MAX_ALTP2M )
2350
0
        return rc;
2351
0
2352
0
    domain_pause_except_self(d);
2353
0
2354
0
    altp2m_list_lock(d);
2355
0
2356
0
    if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2357
0
    {
2358
0
        for_each_vcpu( d, v )
2359
0
            if ( idx != vcpu_altp2m(v).p2midx )
2360
0
            {
2361
0
                atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2362
0
                vcpu_altp2m(v).p2midx = idx;
2363
0
                atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2364
0
                altp2m_vcpu_update_p2m(v);
2365
0
            }
2366
0
2367
0
        rc = 0;
2368
0
    }
2369
0
2370
0
    altp2m_list_unlock(d);
2371
0
2372
0
    domain_unpause_except_self(d);
2373
0
2374
0
    return rc;
2375
0
}
2376
2377
int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
2378
                          gfn_t old_gfn, gfn_t new_gfn)
2379
0
{
2380
0
    struct p2m_domain *hp2m, *ap2m;
2381
0
    p2m_access_t a;
2382
0
    p2m_type_t t;
2383
0
    mfn_t mfn;
2384
0
    unsigned int page_order;
2385
0
    int rc = -EINVAL;
2386
0
2387
0
    if ( idx >= MAX_ALTP2M || d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
2388
0
        return rc;
2389
0
2390
0
    hp2m = p2m_get_hostp2m(d);
2391
0
    ap2m = d->arch.altp2m_p2m[idx];
2392
0
2393
0
    p2m_lock(hp2m);
2394
0
    p2m_lock(ap2m);
2395
0
2396
0
    mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL);
2397
0
2398
0
    if ( gfn_eq(new_gfn, INVALID_GFN) )
2399
0
    {
2400
0
        if ( mfn_valid(mfn) )
2401
0
            p2m_remove_page(ap2m, gfn_x(old_gfn), mfn_x(mfn), PAGE_ORDER_4K);
2402
0
        rc = 0;
2403
0
        goto out;
2404
0
    }
2405
0
2406
0
    /* Check host p2m if no valid entry in alternate */
2407
0
    if ( !mfn_valid(mfn) )
2408
0
    {
2409
0
        mfn = __get_gfn_type_access(hp2m, gfn_x(old_gfn), &t, &a,
2410
0
                                    P2M_ALLOC, &page_order, 0);
2411
0
2412
0
        if ( !mfn_valid(mfn) || t != p2m_ram_rw )
2413
0
            goto out;
2414
0
2415
0
        /* If this is a superpage, copy that first */
2416
0
        if ( page_order != PAGE_ORDER_4K )
2417
0
        {
2418
0
            gfn_t gfn;
2419
0
            unsigned long mask;
2420
0
2421
0
            mask = ~((1UL << page_order) - 1);
2422
0
            gfn = _gfn(gfn_x(old_gfn) & mask);
2423
0
            mfn = _mfn(mfn_x(mfn) & mask);
2424
0
2425
0
            if ( ap2m->set_entry(ap2m, gfn, mfn, page_order, t, a, 1) )
2426
0
                goto out;
2427
0
        }
2428
0
    }
2429
0
2430
0
    mfn = ap2m->get_entry(ap2m, new_gfn, &t, &a, 0, NULL, NULL);
2431
0
2432
0
    if ( !mfn_valid(mfn) )
2433
0
        mfn = hp2m->get_entry(hp2m, new_gfn, &t, &a, 0, NULL, NULL);
2434
0
2435
0
    /* Note: currently it is not safe to remap to a shared entry */
2436
0
    if ( !mfn_valid(mfn) || (t != p2m_ram_rw) )
2437
0
        goto out;
2438
0
2439
0
    if ( !ap2m->set_entry(ap2m, old_gfn, mfn, PAGE_ORDER_4K, t, a,
2440
0
                          (current->domain != d)) )
2441
0
    {
2442
0
        rc = 0;
2443
0
2444
0
        if ( gfn_x(new_gfn) < ap2m->min_remapped_gfn )
2445
0
            ap2m->min_remapped_gfn = gfn_x(new_gfn);
2446
0
        if ( gfn_x(new_gfn) > ap2m->max_remapped_gfn )
2447
0
            ap2m->max_remapped_gfn = gfn_x(new_gfn);
2448
0
    }
2449
0
2450
0
 out:
2451
0
    p2m_unlock(ap2m);
2452
0
    p2m_unlock(hp2m);
2453
0
    return rc;
2454
0
}
2455
2456
static void p2m_reset_altp2m(struct p2m_domain *p2m)
2457
0
{
2458
0
    p2m_flush_table(p2m);
2459
0
    /* Uninit and reinit ept to force TLB shootdown */
2460
0
    ept_p2m_uninit(p2m);
2461
0
    ept_p2m_init(p2m);
2462
0
    p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2463
0
    p2m->max_remapped_gfn = 0;
2464
0
}
2465
2466
void p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
2467
                                 mfn_t mfn, unsigned int page_order,
2468
                                 p2m_type_t p2mt, p2m_access_t p2ma)
2469
1.06M
{
2470
1.06M
    struct p2m_domain *p2m;
2471
1.06M
    p2m_access_t a;
2472
1.06M
    p2m_type_t t;
2473
1.06M
    mfn_t m;
2474
1.06M
    unsigned int i;
2475
1.06M
    unsigned int reset_count = 0;
2476
1.06M
    unsigned int last_reset_idx = ~0;
2477
1.06M
2478
1.06M
    if ( !altp2m_active(d) )
2479
1.06M
        return;
2480
1.06M
2481
0
    altp2m_list_lock(d);
2482
0
2483
0
    for ( i = 0; i < MAX_ALTP2M; i++ )
2484
0
    {
2485
0
        if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2486
0
            continue;
2487
0
2488
0
        p2m = d->arch.altp2m_p2m[i];
2489
0
        m = get_gfn_type_access(p2m, gfn_x(gfn), &t, &a, 0, NULL);
2490
0
2491
0
        /* Check for a dropped page that may impact this altp2m */
2492
0
        if ( mfn_eq(mfn, INVALID_MFN) &&
2493
0
             gfn_x(gfn) >= p2m->min_remapped_gfn &&
2494
0
             gfn_x(gfn) <= p2m->max_remapped_gfn )
2495
0
        {
2496
0
            if ( !reset_count++ )
2497
0
            {
2498
0
                p2m_reset_altp2m(p2m);
2499
0
                last_reset_idx = i;
2500
0
            }
2501
0
            else
2502
0
            {
2503
0
                /* At least 2 altp2m's impacted, so reset everything */
2504
0
                __put_gfn(p2m, gfn_x(gfn));
2505
0
2506
0
                for ( i = 0; i < MAX_ALTP2M; i++ )
2507
0
                {
2508
0
                    if ( i == last_reset_idx ||
2509
0
                         d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2510
0
                        continue;
2511
0
2512
0
                    p2m = d->arch.altp2m_p2m[i];
2513
0
                    p2m_lock(p2m);
2514
0
                    p2m_reset_altp2m(p2m);
2515
0
                    p2m_unlock(p2m);
2516
0
                }
2517
0
2518
0
                goto out;
2519
0
            }
2520
0
        }
2521
0
        else if ( !mfn_eq(m, INVALID_MFN) )
2522
0
            p2m_set_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
2523
0
2524
0
        __put_gfn(p2m, gfn_x(gfn));
2525
0
    }
2526
0
2527
0
 out:
2528
0
    altp2m_list_unlock(d);
2529
0
}
2530
2531
/*** Audit ***/
2532
2533
#if P2M_AUDIT
2534
void audit_p2m(struct domain *d,
2535
               uint64_t *orphans,
2536
                uint64_t *m2p_bad,
2537
                uint64_t *p2m_bad)
2538
0
{
2539
0
    struct page_info *page;
2540
0
    struct domain *od;
2541
0
    unsigned long mfn, gfn;
2542
0
    mfn_t p2mfn;
2543
0
    unsigned long orphans_count = 0, mpbad = 0, pmbad = 0;
2544
0
    p2m_access_t p2ma;
2545
0
    p2m_type_t type;
2546
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
2547
0
2548
0
    if ( !paging_mode_translate(d) )
2549
0
        goto out_p2m_audit;
2550
0
2551
0
    P2M_PRINTK("p2m audit starts\n");
2552
0
2553
0
    p2m_lock(p2m);
2554
0
    pod_lock(p2m);
2555
0
2556
0
    if (p2m->audit_p2m)
2557
0
        pmbad = p2m->audit_p2m(p2m);
2558
0
2559
0
    /* Audit part two: walk the domain's page allocation list, checking
2560
0
     * the m2p entries. */
2561
0
    spin_lock(&d->page_alloc_lock);
2562
0
    page_list_for_each ( page, &d->page_list )
2563
0
    {
2564
0
        mfn = mfn_x(page_to_mfn(page));
2565
0
2566
0
        P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
2567
0
2568
0
        od = page_get_owner(page);
2569
0
2570
0
        if ( od != d )
2571
0
        {
2572
0
            P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
2573
0
                       mfn, od, (od?od->domain_id:-1), d, d->domain_id);
2574
0
            continue;
2575
0
        }
2576
0
2577
0
        gfn = get_gpfn_from_mfn(mfn);
2578
0
        if ( gfn == INVALID_M2P_ENTRY )
2579
0
        {
2580
0
            orphans_count++;
2581
0
            P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
2582
0
                           mfn);
2583
0
            continue;
2584
0
        }
2585
0
2586
0
        if ( gfn == SHARED_M2P_ENTRY )
2587
0
        {
2588
0
            P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
2589
0
                    mfn);
2590
0
            continue;
2591
0
        }
2592
0
2593
0
        p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, 0, NULL);
2594
0
        if ( mfn_x(p2mfn) != mfn )
2595
0
        {
2596
0
            mpbad++;
2597
0
            P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
2598
0
                       " (-> gfn %#lx)\n",
2599
0
                       mfn, gfn, mfn_x(p2mfn),
2600
0
                       (mfn_valid(p2mfn)
2601
0
                        ? get_gpfn_from_mfn(mfn_x(p2mfn))
2602
0
                        : -1u));
2603
0
            /* This m2p entry is stale: the domain has another frame in
2604
0
             * this physical slot.  No great disaster, but for neatness,
2605
0
             * blow away the m2p entry. */
2606
0
            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
2607
0
        }
2608
0
        __put_gfn(p2m, gfn);
2609
0
2610
0
        P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx\n",
2611
0
                       mfn, gfn, mfn_x(p2mfn));
2612
0
    }
2613
0
    spin_unlock(&d->page_alloc_lock);
2614
0
2615
0
    pod_unlock(p2m);
2616
0
    p2m_unlock(p2m);
2617
0
 
2618
0
    P2M_PRINTK("p2m audit complete\n");
2619
0
    if ( orphans_count | mpbad | pmbad )
2620
0
        P2M_PRINTK("p2m audit found %lu orphans\n", orphans_count);
2621
0
    if ( mpbad | pmbad )
2622
0
    {
2623
0
        P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
2624
0
                   pmbad, mpbad);
2625
0
        WARN();
2626
0
    }
2627
0
2628
0
out_p2m_audit:
2629
0
    *orphans = (uint64_t) orphans_count;
2630
0
    *m2p_bad = (uint64_t) mpbad;
2631
0
    *p2m_bad = (uint64_t) pmbad;
2632
0
}
2633
#endif /* P2M_AUDIT */
2634
2635
/*
2636
 * Add frame from foreign domain to target domain's physmap. Similar to
2637
 * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
2638
 * and is not removed from foreign domain.
2639
 *
2640
 * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
2641
 *        - xentrace running on dom0 mapping xenheap pages. foreigndom would
2642
 *          be DOMID_XEN in such a case.
2643
 *        etc..
2644
 *
2645
 * Side Effect: the mfn for fgfn will be refcounted in lower level routines
2646
 *              so it is not lost while mapped here. The refcnt is released
2647
 *              via the XENMEM_remove_from_physmap path.
2648
 *
2649
 * Returns: 0 ==> success
2650
 */
2651
int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
2652
                    unsigned long gpfn, domid_t foreigndom)
2653
0
{
2654
0
    p2m_type_t p2mt, p2mt_prev;
2655
0
    mfn_t prev_mfn, mfn;
2656
0
    struct page_info *page;
2657
0
    int rc;
2658
0
    struct domain *fdom;
2659
0
2660
0
    ASSERT(tdom);
2661
0
    if ( foreigndom == DOMID_SELF )
2662
0
        return -EINVAL;
2663
0
    /*
2664
0
     * hvm fixme: until support is added to p2m teardown code to cleanup any
2665
0
     * foreign entries, limit this to hardware domain only.
2666
0
     */
2667
0
    if ( !is_hardware_domain(tdom) )
2668
0
        return -EPERM;
2669
0
2670
0
    if ( foreigndom == DOMID_XEN )
2671
0
        fdom = rcu_lock_domain(dom_xen);
2672
0
    else
2673
0
        fdom = rcu_lock_domain_by_id(foreigndom);
2674
0
    if ( fdom == NULL )
2675
0
        return -ESRCH;
2676
0
2677
0
    rc = -EINVAL;
2678
0
    if ( tdom == fdom )
2679
0
        goto out;
2680
0
2681
0
    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
2682
0
    if ( rc )
2683
0
        goto out;
2684
0
2685
0
    /*
2686
0
     * Take a refcnt on the mfn. NB: following supported for foreign mapping:
2687
0
     *     ram_rw | ram_logdirty | ram_ro | paging_out.
2688
0
     */
2689
0
    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
2690
0
    if ( !page ||
2691
0
         !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
2692
0
    {
2693
0
        if ( page )
2694
0
            put_page(page);
2695
0
        rc = -EINVAL;
2696
0
        goto out;
2697
0
    }
2698
0
    mfn = page_to_mfn(page);
2699
0
2700
0
    /* Remove previously mapped page if it is present. */
2701
0
    prev_mfn = get_gfn(tdom, gpfn, &p2mt_prev);
2702
0
    if ( mfn_valid(prev_mfn) )
2703
0
    {
2704
0
        if ( is_xen_heap_mfn(mfn_x(prev_mfn)) )
2705
0
            /* Xen heap frames are simply unhooked from this phys slot */
2706
0
            rc = guest_physmap_remove_page(tdom, _gfn(gpfn), prev_mfn, 0);
2707
0
        else
2708
0
            /* Normal domain memory is freed, to avoid leaking memory. */
2709
0
            rc = guest_remove_page(tdom, gpfn);
2710
0
        if ( rc )
2711
0
            goto put_both;
2712
0
    }
2713
0
    /*
2714
0
     * Create the new mapping. Can't use guest_physmap_add_page() because it
2715
0
     * will update the m2p table which will result in  mfn -> gpfn of dom0
2716
0
     * and not fgfn of domU.
2717
0
     */
2718
0
    rc = set_foreign_p2m_entry(tdom, gpfn, mfn);
2719
0
    if ( rc )
2720
0
        gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
2721
0
                 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
2722
0
                 gpfn, mfn_x(mfn), fgfn, tdom->domain_id, fdom->domain_id);
2723
0
2724
0
 put_both:
2725
0
    put_page(page);
2726
0
2727
0
    /*
2728
0
     * This put_gfn for the above get_gfn for prev_mfn.  We must do this
2729
0
     * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
2730
0
     * before us.
2731
0
     */
2732
0
    put_gfn(tdom, gpfn);
2733
0
2734
0
out:
2735
0
    if ( fdom )
2736
0
        rcu_unlock_domain(fdom);
2737
0
    return rc;
2738
0
}
2739
/*
2740
 * Local variables:
2741
 * mode: C
2742
 * c-file-style: "BSD"
2743
 * c-basic-offset: 4
2744
 * indent-tabs-mode: nil
2745
 * End:
2746
 */