Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/paging.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/paging.c
3
 *
4
 * x86 specific paging support
5
 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6
 * Copyright (c) 2007 XenSource Inc.
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20
 */
21
22
#include <xen/init.h>
23
#include <xen/guest_access.h>
24
#include <asm/paging.h>
25
#include <asm/shadow.h>
26
#include <asm/p2m.h>
27
#include <asm/hap.h>
28
#include <asm/event.h>
29
#include <asm/hvm/nestedhvm.h>
30
#include <xen/numa.h>
31
#include <xsm/xsm.h>
32
#include <public/sched.h> /* SHUTDOWN_suspend */
33
34
#include "mm-locks.h"
35
36
/* Printouts */
37
#define PAGING_PRINTK(_f, _a...)                                     \
38
    debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
39
#define PAGING_ERROR(_f, _a...)                                      \
40
    printk("pg error: %s(): " _f, __func__, ##_a)
41
#define PAGING_DEBUG(flag, _f, _a...)                                \
42
0
    do {                                                             \
43
0
        if (PAGING_DEBUG_ ## flag)                                   \
44
0
            debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
45
0
    } while (0)
46
47
/* Per-CPU variable for enforcing the lock ordering */
48
DEFINE_PER_CPU(int, mm_lock_level);
49
50
/* Override macros from asm/page.h to make them work with mfn_t */
51
#undef mfn_to_page
52
0
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53
#undef page_to_mfn
54
0
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
55
56
/************************************************/
57
/*              LOG DIRTY SUPPORT               */
58
/************************************************/
59
60
static mfn_t paging_new_log_dirty_page(struct domain *d)
61
0
{
62
0
    struct page_info *page;
63
0
64
0
    page = d->arch.paging.alloc_page(d);
65
0
    if ( unlikely(page == NULL) )
66
0
    {
67
0
        d->arch.paging.log_dirty.failed_allocs++;
68
0
        return INVALID_MFN;
69
0
    }
70
0
71
0
    d->arch.paging.log_dirty.allocs++;
72
0
73
0
    return page_to_mfn(page);
74
0
}
75
76
/* Alloc and init a new leaf node */
77
static mfn_t paging_new_log_dirty_leaf(struct domain *d)
78
0
{
79
0
    mfn_t mfn = paging_new_log_dirty_page(d);
80
0
81
0
    if ( mfn_valid(mfn) )
82
0
        clear_domain_page(mfn);
83
0
84
0
    return mfn;
85
0
}
86
87
/* Alloc and init a new non-leaf node */
88
static mfn_t paging_new_log_dirty_node(struct domain *d)
89
0
{
90
0
    mfn_t mfn = paging_new_log_dirty_page(d);
91
0
    if ( mfn_valid(mfn) )
92
0
    {
93
0
        int i;
94
0
        mfn_t *node = map_domain_page(mfn);
95
0
        for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
96
0
            node[i] = INVALID_MFN;
97
0
        unmap_domain_page(node);
98
0
    }
99
0
    return mfn;
100
0
}
101
102
/* get the top of the log-dirty bitmap trie */
103
static mfn_t *paging_map_log_dirty_bitmap(struct domain *d)
104
0
{
105
0
    if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) )
106
0
        return map_domain_page(d->arch.paging.log_dirty.top);
107
0
    return NULL;
108
0
}
109
110
static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
111
0
{
112
0
    d->arch.paging.log_dirty.allocs--;
113
0
    d->arch.paging.free_page(d, mfn_to_page(mfn));
114
0
}
115
116
static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
117
0
{
118
0
    mfn_t *l4, *l3, *l2;
119
0
    int i4, i3, i2;
120
0
121
0
    paging_lock(d);
122
0
123
0
    if ( !mfn_valid(d->arch.paging.log_dirty.top) )
124
0
    {
125
0
        paging_unlock(d);
126
0
        return 0;
127
0
    }
128
0
129
0
    if ( !d->arch.paging.preempt.dom )
130
0
    {
131
0
        memset(&d->arch.paging.preempt.log_dirty, 0,
132
0
               sizeof(d->arch.paging.preempt.log_dirty));
133
0
        ASSERT(rc <= 0);
134
0
        d->arch.paging.preempt.log_dirty.done = -rc;
135
0
    }
136
0
    else if ( d->arch.paging.preempt.dom != current->domain ||
137
0
              d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
138
0
    {
139
0
        paging_unlock(d);
140
0
        return -EBUSY;
141
0
    }
142
0
143
0
    l4 = map_domain_page(d->arch.paging.log_dirty.top);
144
0
    i4 = d->arch.paging.preempt.log_dirty.i4;
145
0
    i3 = d->arch.paging.preempt.log_dirty.i3;
146
0
    rc = 0;
147
0
148
0
    for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
149
0
    {
150
0
        if ( !mfn_valid(l4[i4]) )
151
0
            continue;
152
0
153
0
        l3 = map_domain_page(l4[i4]);
154
0
155
0
        for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
156
0
        {
157
0
            if ( !mfn_valid(l3[i3]) )
158
0
                continue;
159
0
160
0
            l2 = map_domain_page(l3[i3]);
161
0
162
0
            for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
163
0
                if ( mfn_valid(l2[i2]) )
164
0
                    paging_free_log_dirty_page(d, l2[i2]);
165
0
166
0
            unmap_domain_page(l2);
167
0
            paging_free_log_dirty_page(d, l3[i3]);
168
0
            l3[i3] = INVALID_MFN;
169
0
170
0
            if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
171
0
            {
172
0
                d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
173
0
                d->arch.paging.preempt.log_dirty.i4 = i4;
174
0
                rc = -ERESTART;
175
0
                break;
176
0
            }
177
0
        }
178
0
179
0
        unmap_domain_page(l3);
180
0
        if ( rc )
181
0
            break;
182
0
        paging_free_log_dirty_page(d, l4[i4]);
183
0
        l4[i4] = INVALID_MFN;
184
0
185
0
        if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
186
0
        {
187
0
            d->arch.paging.preempt.log_dirty.i3 = 0;
188
0
            d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
189
0
            rc = -ERESTART;
190
0
            break;
191
0
        }
192
0
    }
193
0
194
0
    unmap_domain_page(l4);
195
0
196
0
    if ( !rc )
197
0
    {
198
0
        paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
199
0
        d->arch.paging.log_dirty.top = INVALID_MFN;
200
0
201
0
        ASSERT(d->arch.paging.log_dirty.allocs == 0);
202
0
        d->arch.paging.log_dirty.failed_allocs = 0;
203
0
204
0
        rc = -d->arch.paging.preempt.log_dirty.done;
205
0
        d->arch.paging.preempt.dom = NULL;
206
0
    }
207
0
    else
208
0
    {
209
0
        d->arch.paging.preempt.dom = current->domain;
210
0
        d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
211
0
    }
212
0
213
0
    paging_unlock(d);
214
0
215
0
    return rc;
216
0
}
217
218
int paging_log_dirty_enable(struct domain *d, bool_t log_global)
219
0
{
220
0
    int ret;
221
0
222
0
    if ( need_iommu(d) && log_global )
223
0
    {
224
0
        /*
225
0
         * Refuse to turn on global log-dirty mode
226
0
         * if the domain is using the IOMMU.
227
0
         */
228
0
        return -EINVAL;
229
0
    }
230
0
231
0
    if ( paging_mode_log_dirty(d) )
232
0
        return -EINVAL;
233
0
234
0
    domain_pause(d);
235
0
    ret = d->arch.paging.log_dirty.ops->enable(d, log_global);
236
0
    domain_unpause(d);
237
0
238
0
    return ret;
239
0
}
240
241
static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
242
0
{
243
0
    int ret = 1;
244
0
245
0
    if ( !resuming )
246
0
    {
247
0
        domain_pause(d);
248
0
        /* Safe because the domain is paused. */
249
0
        if ( paging_mode_log_dirty(d) )
250
0
        {
251
0
            ret = d->arch.paging.log_dirty.ops->disable(d);
252
0
            ASSERT(ret <= 0);
253
0
        }
254
0
    }
255
0
256
0
    ret = paging_free_log_dirty_bitmap(d, ret);
257
0
    if ( ret == -ERESTART )
258
0
        return ret;
259
0
260
0
    domain_unpause(d);
261
0
262
0
    return ret;
263
0
}
264
265
/* Mark a page as dirty, with taking guest pfn as parameter */
266
void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
267
0
{
268
0
    bool changed;
269
0
    mfn_t mfn, *l4, *l3, *l2;
270
0
    unsigned long *l1;
271
0
    unsigned int i1, i2, i3, i4;
272
0
273
0
    if ( !paging_mode_log_dirty(d) )
274
0
        return;
275
0
276
0
    /* Shared MFNs should NEVER be marked dirty */
277
0
    BUG_ON(SHARED_M2P(pfn_x(pfn)));
278
0
279
0
    /*
280
0
     * Values with the MSB set denote MFNs that aren't really part of the
281
0
     * domain's pseudo-physical memory map (e.g., the shared info frame).
282
0
     * Nothing to do here...
283
0
     */
284
0
    if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
285
0
        return;
286
0
287
0
    i1 = L1_LOGDIRTY_IDX(pfn);
288
0
    i2 = L2_LOGDIRTY_IDX(pfn);
289
0
    i3 = L3_LOGDIRTY_IDX(pfn);
290
0
    i4 = L4_LOGDIRTY_IDX(pfn);
291
0
292
0
    /* Recursive: this is called from inside the shadow code */
293
0
    paging_lock_recursive(d);
294
0
295
0
    if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
296
0
    {
297
0
         d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
298
0
         if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
299
0
             goto out;
300
0
    }
301
0
302
0
    l4 = paging_map_log_dirty_bitmap(d);
303
0
    mfn = l4[i4];
304
0
    if ( !mfn_valid(mfn) )
305
0
        l4[i4] = mfn = paging_new_log_dirty_node(d);
306
0
    unmap_domain_page(l4);
307
0
    if ( !mfn_valid(mfn) )
308
0
        goto out;
309
0
310
0
    l3 = map_domain_page(mfn);
311
0
    mfn = l3[i3];
312
0
    if ( !mfn_valid(mfn) )
313
0
        l3[i3] = mfn = paging_new_log_dirty_node(d);
314
0
    unmap_domain_page(l3);
315
0
    if ( !mfn_valid(mfn) )
316
0
        goto out;
317
0
318
0
    l2 = map_domain_page(mfn);
319
0
    mfn = l2[i2];
320
0
    if ( !mfn_valid(mfn) )
321
0
        l2[i2] = mfn = paging_new_log_dirty_leaf(d);
322
0
    unmap_domain_page(l2);
323
0
    if ( !mfn_valid(mfn) )
324
0
        goto out;
325
0
326
0
    l1 = map_domain_page(mfn);
327
0
    changed = !__test_and_set_bit(i1, l1);
328
0
    unmap_domain_page(l1);
329
0
    if ( changed )
330
0
    {
331
0
        PAGING_DEBUG(LOGDIRTY,
332
0
                     "d%d: marked mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n",
333
0
                     d->domain_id, mfn_x(mfn), pfn_x(pfn));
334
0
        d->arch.paging.log_dirty.dirty_count++;
335
0
    }
336
0
337
0
out:
338
0
    /* We've already recorded any failed allocations */
339
0
    paging_unlock(d);
340
0
    return;
341
0
}
342
343
/* Mark a page as dirty */
344
void paging_mark_dirty(struct domain *d, mfn_t gmfn)
345
9.13k
{
346
9.13k
    pfn_t pfn;
347
9.13k
348
9.13k
    if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ||
349
0
         page_get_owner(mfn_to_page(gmfn)) != d )
350
9.13k
        return;
351
9.13k
352
9.13k
    /* We /really/ mean PFN here, even for non-translated guests. */
353
0
    pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
354
0
355
0
    paging_mark_pfn_dirty(d, pfn);
356
0
}
357
358
359
/* Is this guest page dirty? */
360
int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
361
0
{
362
0
    pfn_t pfn;
363
0
    mfn_t mfn, *l4, *l3, *l2;
364
0
    unsigned long *l1;
365
0
    int rv;
366
0
367
0
    ASSERT(paging_locked_by_me(d));
368
0
    ASSERT(paging_mode_log_dirty(d));
369
0
370
0
    /* We /really/ mean PFN here, even for non-translated guests. */
371
0
    pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
372
0
    /* Shared pages are always read-only; invalid pages can't be dirty. */
373
0
    if ( unlikely(SHARED_M2P(pfn_x(pfn)) || !VALID_M2P(pfn_x(pfn))) )
374
0
        return 0;
375
0
376
0
    mfn = d->arch.paging.log_dirty.top;
377
0
    if ( !mfn_valid(mfn) )
378
0
        return 0;
379
0
380
0
    l4 = map_domain_page(mfn);
381
0
    mfn = l4[L4_LOGDIRTY_IDX(pfn)];
382
0
    unmap_domain_page(l4);
383
0
    if ( !mfn_valid(mfn) )
384
0
        return 0;
385
0
386
0
    l3 = map_domain_page(mfn);
387
0
    mfn = l3[L3_LOGDIRTY_IDX(pfn)];
388
0
    unmap_domain_page(l3);
389
0
    if ( !mfn_valid(mfn) )
390
0
        return 0;
391
0
392
0
    l2 = map_domain_page(mfn);
393
0
    mfn = l2[L2_LOGDIRTY_IDX(pfn)];
394
0
    unmap_domain_page(l2);
395
0
    if ( !mfn_valid(mfn) )
396
0
        return 0;
397
0
398
0
    l1 = map_domain_page(mfn);
399
0
    rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1);
400
0
    unmap_domain_page(l1);
401
0
    return rv;
402
0
}
403
404
405
/* Read a domain's log-dirty bitmap and stats.  If the operation is a CLEAN,
406
 * clear the bitmap and stats as well. */
407
static int paging_log_dirty_op(struct domain *d,
408
                               struct xen_domctl_shadow_op *sc,
409
                               bool_t resuming)
410
0
{
411
0
    int rv = 0, clean = 0, peek = 1;
412
0
    unsigned long pages = 0;
413
0
    mfn_t *l4 = NULL, *l3 = NULL, *l2 = NULL;
414
0
    unsigned long *l1 = NULL;
415
0
    int i4, i3, i2;
416
0
417
0
    if ( !resuming )
418
0
    {
419
0
        /*
420
0
         * Mark dirty all currently write-mapped pages on e.g. the
421
0
         * final iteration of a save operation.
422
0
         */
423
0
        if ( is_hvm_domain(d) &&
424
0
             (sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL) )
425
0
            hvm_mapped_guest_frames_mark_dirty(d);
426
0
427
0
        domain_pause(d);
428
0
429
0
        /*
430
0
         * Flush dirty GFNs potentially cached by hardware. Only need to flush
431
0
         * when not resuming, as domain was paused in resuming case therefore
432
0
         * it's not possible to have any new dirty pages.
433
0
         */
434
0
        p2m_flush_hardware_cached_dirty(d);
435
0
    }
436
0
437
0
    paging_lock(d);
438
0
439
0
    if ( !d->arch.paging.preempt.dom )
440
0
        memset(&d->arch.paging.preempt.log_dirty, 0,
441
0
               sizeof(d->arch.paging.preempt.log_dirty));
442
0
    else if ( d->arch.paging.preempt.dom != current->domain ||
443
0
              d->arch.paging.preempt.op != sc->op )
444
0
    {
445
0
        paging_unlock(d);
446
0
        ASSERT(!resuming);
447
0
        domain_unpause(d);
448
0
        return -EBUSY;
449
0
    }
450
0
451
0
    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
452
0
453
0
    PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
454
0
                 (clean) ? "clean" : "peek",
455
0
                 d->domain_id,
456
0
                 d->arch.paging.log_dirty.fault_count,
457
0
                 d->arch.paging.log_dirty.dirty_count);
458
0
459
0
    sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
460
0
    sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
461
0
462
0
    if ( guest_handle_is_null(sc->dirty_bitmap) )
463
0
        /* caller may have wanted just to clean the state or access stats. */
464
0
        peek = 0;
465
0
466
0
    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
467
0
        printk(XENLOG_WARNING
468
0
               "%u failed page allocs while logging dirty pages of d%d\n",
469
0
               d->arch.paging.log_dirty.failed_allocs, d->domain_id);
470
0
        rv = -ENOMEM;
471
0
        goto out;
472
0
    }
473
0
474
0
    l4 = paging_map_log_dirty_bitmap(d);
475
0
    i4 = d->arch.paging.preempt.log_dirty.i4;
476
0
    i3 = d->arch.paging.preempt.log_dirty.i3;
477
0
    pages = d->arch.paging.preempt.log_dirty.done;
478
0
479
0
    for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
480
0
    {
481
0
        l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(l4[i4]) : NULL;
482
0
        for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
483
0
        {
484
0
            l2 = ((l3 && mfn_valid(l3[i3])) ?
485
0
                  map_domain_page(l3[i3]) : NULL);
486
0
            for ( i2 = 0;
487
0
                  (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
488
0
                  i2++ )
489
0
            {
490
0
                unsigned int bytes = PAGE_SIZE;
491
0
                l1 = ((l2 && mfn_valid(l2[i2])) ?
492
0
                      map_domain_page(l2[i2]) : NULL);
493
0
                if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
494
0
                    bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
495
0
                if ( likely(peek) )
496
0
                {
497
0
                    if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap,
498
0
                                                    pages >> 3, (uint8_t *)l1,
499
0
                                                    bytes)
500
0
                             : clear_guest_offset(sc->dirty_bitmap,
501
0
                                                  pages >> 3, bytes)) != 0 )
502
0
                    {
503
0
                        rv = -EFAULT;
504
0
                        goto out;
505
0
                    }
506
0
                }
507
0
                pages += bytes << 3;
508
0
                if ( l1 )
509
0
                {
510
0
                    if ( clean )
511
0
                        clear_page(l1);
512
0
                    unmap_domain_page(l1);
513
0
                }
514
0
            }
515
0
            if ( l2 )
516
0
                unmap_domain_page(l2);
517
0
518
0
            if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
519
0
            {
520
0
                d->arch.paging.preempt.log_dirty.i4 = i4;
521
0
                d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
522
0
                rv = -ERESTART;
523
0
                break;
524
0
            }
525
0
        }
526
0
        if ( l3 )
527
0
            unmap_domain_page(l3);
528
0
529
0
        if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
530
0
             hypercall_preempt_check() )
531
0
        {
532
0
            d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
533
0
            d->arch.paging.preempt.log_dirty.i3 = 0;
534
0
            rv = -ERESTART;
535
0
        }
536
0
        if ( rv )
537
0
            break;
538
0
    }
539
0
    if ( l4 )
540
0
        unmap_domain_page(l4);
541
0
542
0
    if ( !rv )
543
0
    {
544
0
        d->arch.paging.preempt.dom = NULL;
545
0
        if ( clean )
546
0
        {
547
0
            d->arch.paging.log_dirty.fault_count = 0;
548
0
            d->arch.paging.log_dirty.dirty_count = 0;
549
0
        }
550
0
    }
551
0
    else
552
0
    {
553
0
        d->arch.paging.preempt.dom = current->domain;
554
0
        d->arch.paging.preempt.op = sc->op;
555
0
        d->arch.paging.preempt.log_dirty.done = pages;
556
0
    }
557
0
558
0
    paging_unlock(d);
559
0
560
0
    if ( rv )
561
0
    {
562
0
        /* Never leave the domain paused on real errors. */
563
0
        ASSERT(rv == -ERESTART);
564
0
        return rv;
565
0
    }
566
0
567
0
    if ( pages < sc->pages )
568
0
        sc->pages = pages;
569
0
    if ( clean )
570
0
    {
571
0
        /* We need to further call clean_dirty_bitmap() functions of specific
572
0
         * paging modes (shadow or hap).  Safe because the domain is paused. */
573
0
        d->arch.paging.log_dirty.ops->clean(d);
574
0
    }
575
0
    domain_unpause(d);
576
0
    return rv;
577
0
578
0
 out:
579
0
    d->arch.paging.preempt.dom = NULL;
580
0
    paging_unlock(d);
581
0
    domain_unpause(d);
582
0
583
0
    if ( l1 )
584
0
        unmap_domain_page(l1);
585
0
    if ( l2 )
586
0
        unmap_domain_page(l2);
587
0
    if ( l3 )
588
0
        unmap_domain_page(l3);
589
0
    if ( l4 )
590
0
        unmap_domain_page(l4);
591
0
592
0
    return rv;
593
0
}
594
595
void paging_log_dirty_range(struct domain *d,
596
                           unsigned long begin_pfn,
597
                           unsigned long nr,
598
                           uint8_t *dirty_bitmap)
599
0
{
600
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
601
0
    int i;
602
0
    unsigned long pfn;
603
0
604
0
    /*
605
0
     * Set l1e entries of P2M table to be read-only.
606
0
     *
607
0
     * On first write, it page faults, its entry is changed to read-write,
608
0
     * and on retry the write succeeds.
609
0
     *
610
0
     * We populate dirty_bitmap by looking for entries that have been
611
0
     * switched to read-write.
612
0
     */
613
0
614
0
    p2m_lock(p2m);
615
0
616
0
    for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
617
0
        if ( !p2m_change_type_one(d, pfn, p2m_ram_rw, p2m_ram_logdirty) )
618
0
            dirty_bitmap[i >> 3] |= (1 << (i & 7));
619
0
620
0
    p2m_unlock(p2m);
621
0
622
0
    flush_tlb_mask(d->domain_dirty_cpumask);
623
0
}
624
625
/*
626
 * Callers must supply log_dirty_ops for the log dirty code to call. This
627
 * function usually is invoked when paging is enabled. Check shadow_enable()
628
 * and hap_enable() for reference.
629
 *
630
 * These function pointers must not be followed with the log-dirty lock held.
631
 */
632
void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops)
633
1
{
634
1
    d->arch.paging.log_dirty.ops = ops;
635
1
}
636
637
/************************************************/
638
/*           CODE FOR PAGING SUPPORT            */
639
/************************************************/
640
/* Domain paging struct initialization. */
641
int paging_domain_init(struct domain *d, unsigned int domcr_flags)
642
1
{
643
1
    int rc;
644
1
645
1
    if ( (rc = p2m_init(d)) != 0 )
646
0
        return rc;
647
1
648
1
    mm_lock_init(&d->arch.paging.lock);
649
1
650
1
    /* This must be initialized separately from the rest of the
651
1
     * log-dirty init code as that can be called more than once and we
652
1
     * don't want to leak any active log-dirty bitmaps */
653
1
    d->arch.paging.log_dirty.top = INVALID_MFN;
654
1
655
1
    /*
656
1
     * Shadow pagetables are the default, but we will use
657
1
     * hardware assistance if it's available and enabled.
658
1
     */
659
1
    if ( hap_enabled(d) )
660
1
        hap_domain_init(d);
661
1
    else
662
0
        rc = shadow_domain_init(d, domcr_flags);
663
1
664
1
    return rc;
665
1
}
666
667
/* vcpu paging struct initialization goes here */
668
void paging_vcpu_init(struct vcpu *v)
669
12
{
670
12
    if ( hap_enabled(v->domain) )
671
12
        hap_vcpu_init(v);
672
12
    else
673
0
        shadow_vcpu_init(v);
674
12
}
675
676
677
int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
678
                  XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
679
                  bool_t resuming)
680
0
{
681
0
    int rc;
682
0
683
0
    if ( unlikely(d == current->domain) )
684
0
    {
685
0
        gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
686
0
        return -EINVAL;
687
0
    }
688
0
689
0
    if ( unlikely(d->is_dying) )
690
0
    {
691
0
        gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
692
0
                 d->domain_id);
693
0
        return 0;
694
0
    }
695
0
696
0
    if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
697
0
    {
698
0
        gdprintk(XENLOG_DEBUG, "Paging op on a domain (%u) with no vcpus\n",
699
0
                 d->domain_id);
700
0
        return -EINVAL;
701
0
    }
702
0
703
0
    if ( resuming
704
0
         ? (d->arch.paging.preempt.dom != current->domain ||
705
0
            d->arch.paging.preempt.op != sc->op)
706
0
         : (d->arch.paging.preempt.dom &&
707
0
            sc->op != XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION) )
708
0
    {
709
0
        printk(XENLOG_G_DEBUG
710
0
               "%pv: Paging op %#x on Dom%u with unfinished prior op %#x by Dom%u\n",
711
0
               current, sc->op, d->domain_id, d->arch.paging.preempt.op,
712
0
               d->arch.paging.preempt.dom
713
0
               ? d->arch.paging.preempt.dom->domain_id : DOMID_INVALID);
714
0
        return -EBUSY;
715
0
    }
716
0
717
0
    rc = xsm_shadow_control(XSM_HOOK, d, sc->op);
718
0
    if ( rc )
719
0
        return rc;
720
0
721
0
    /* Code to handle log-dirty. Note that some log dirty operations
722
0
     * piggy-back on shadow operations. For example, when
723
0
     * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
724
0
     * mode is enabled. If does, we disables log dirty and continues with
725
0
     * shadow code. For this reason, we need to further dispatch domctl
726
0
     * to next-level paging code (shadow or hap).
727
0
     */
728
0
    switch ( sc->op )
729
0
    {
730
0
731
0
    case XEN_DOMCTL_SHADOW_OP_ENABLE:
732
0
        if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) )
733
0
            break;
734
0
        /* Else fall through... */
735
0
    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
736
0
        return paging_log_dirty_enable(d, 1);
737
0
738
0
    case XEN_DOMCTL_SHADOW_OP_OFF:
739
0
        if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
740
0
            return rc;
741
0
        break;
742
0
743
0
    case XEN_DOMCTL_SHADOW_OP_CLEAN:
744
0
    case XEN_DOMCTL_SHADOW_OP_PEEK:
745
0
        if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL )
746
0
            return -EINVAL;
747
0
        return paging_log_dirty_op(d, sc, resuming);
748
0
    }
749
0
750
0
    /* Here, dispatch domctl to the appropriate paging code */
751
0
    if ( hap_enabled(d) )
752
0
        return hap_domctl(d, sc, u_domctl);
753
0
    else
754
0
        return shadow_domctl(d, sc, u_domctl);
755
0
}
756
757
long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
758
0
{
759
0
    struct xen_domctl op;
760
0
    struct domain *d;
761
0
    int ret;
762
0
763
0
    if ( copy_from_guest(&op, u_domctl, 1) )
764
0
        return -EFAULT;
765
0
766
0
    if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
767
0
         op.cmd != XEN_DOMCTL_shadow_op )
768
0
        return -EOPNOTSUPP;
769
0
770
0
    d = rcu_lock_domain_by_id(op.domain);
771
0
    if ( d == NULL )
772
0
        return -ESRCH;
773
0
774
0
    ret = xsm_domctl(XSM_OTHER, d, op.cmd);
775
0
    if ( !ret )
776
0
    {
777
0
        if ( domctl_lock_acquire() )
778
0
        {
779
0
            ret = paging_domctl(d, &op.u.shadow_op, u_domctl, 1);
780
0
781
0
            domctl_lock_release();
782
0
        }
783
0
        else
784
0
            ret = -ERESTART;
785
0
    }
786
0
787
0
    rcu_unlock_domain(d);
788
0
789
0
    if ( ret == -ERESTART )
790
0
        ret = hypercall_create_continuation(__HYPERVISOR_arch_1,
791
0
                                            "h", u_domctl);
792
0
    else if ( __copy_field_to_guest(u_domctl, &op, u.shadow_op) )
793
0
        ret = -EFAULT;
794
0
795
0
    return ret;
796
0
}
797
798
/* Call when destroying a domain */
799
int paging_teardown(struct domain *d)
800
0
{
801
0
    int rc;
802
0
    bool preempted = false;
803
0
804
0
    if ( hap_enabled(d) )
805
0
        hap_teardown(d, &preempted);
806
0
    else
807
0
        shadow_teardown(d, &preempted);
808
0
809
0
    if ( preempted )
810
0
        return -ERESTART;
811
0
812
0
    /* clean up log dirty resources. */
813
0
    rc = paging_free_log_dirty_bitmap(d, 0);
814
0
    if ( rc == -ERESTART )
815
0
        return rc;
816
0
817
0
    /* Move populate-on-demand cache back to domain_list for destruction */
818
0
    rc = p2m_pod_empty_cache(d);
819
0
820
0
    return rc;
821
0
}
822
823
/* Call once all of the references to the domain have gone away */
824
void paging_final_teardown(struct domain *d)
825
0
{
826
0
    if ( hap_enabled(d) )
827
0
        hap_final_teardown(d);
828
0
    else
829
0
        shadow_final_teardown(d);
830
0
831
0
    p2m_final_teardown(d);
832
0
}
833
834
/* Enable an arbitrary paging-assistance mode.  Call once at domain
835
 * creation. */
836
int paging_enable(struct domain *d, u32 mode)
837
1
{
838
1
    /* Unrecognised paging mode? */
839
1
    if ( mode & ~PG_MASK )
840
0
        return -EINVAL;
841
1
842
1
    /* All of external|translate|refcounts, or none. */
843
1
    switch ( mode & (PG_external | PG_translate | PG_refcounts) )
844
1
    {
845
1
    case 0:
846
1
    case PG_external | PG_translate | PG_refcounts:
847
1
        break;
848
0
    default:
849
0
        return -EINVAL;
850
1
    }
851
1
852
1
    if ( hap_enabled(d) )
853
1
        return hap_enable(d, mode);
854
1
    else
855
0
        return shadow_enable(d, mode);
856
1
}
857
858
/* Called from the guest to indicate that a process is being torn down
859
 * and therefore its pagetables will soon be discarded */
860
void pagetable_dying(struct domain *d, paddr_t gpa)
861
0
{
862
0
#ifdef CONFIG_SHADOW_PAGING
863
0
    struct vcpu *v;
864
0
865
0
    ASSERT(paging_mode_shadow(d));
866
0
867
0
    v = d->vcpu[0];
868
0
    v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
869
0
#else
870
    BUG();
871
#endif
872
0
}
873
874
/* Print paging-assistance info to the console */
875
void paging_dump_domain_info(struct domain *d)
876
0
{
877
0
    if ( paging_mode_enabled(d) )
878
0
    {
879
0
        printk("    paging assistance: ");
880
0
        if ( paging_mode_shadow(d) )
881
0
            printk("shadow ");
882
0
        if ( paging_mode_hap(d) )
883
0
            printk("hap ");
884
0
        if ( paging_mode_refcounts(d) )
885
0
            printk("refcounts ");
886
0
        if ( paging_mode_log_dirty(d) )
887
0
            printk("log_dirty ");
888
0
        if ( paging_mode_translate(d) )
889
0
            printk("translate ");
890
0
        if ( paging_mode_external(d) )
891
0
            printk("external ");
892
0
        printk("\n");
893
0
    }
894
0
}
895
896
void paging_dump_vcpu_info(struct vcpu *v)
897
0
{
898
0
    if ( paging_mode_enabled(v->domain) )
899
0
    {
900
0
        printk("    paging assistance: ");
901
0
        if ( paging_mode_shadow(v->domain) )
902
0
        {
903
0
            if ( paging_get_hostmode(v) )
904
0
                printk("shadowed %u-on-%u\n",
905
0
                       paging_get_hostmode(v)->guest_levels,
906
0
                       paging_get_hostmode(v)->shadow.shadow_levels);
907
0
            else
908
0
                printk("not shadowed\n");
909
0
        }
910
0
        else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) )
911
0
            printk("hap, %u levels\n",
912
0
                   paging_get_hostmode(v)->guest_levels);
913
0
        else
914
0
            printk("none\n");
915
0
    }
916
0
}
917
918
const struct paging_mode *paging_get_mode(struct vcpu *v)
919
0
{
920
0
    if (!nestedhvm_is_n2(v))
921
0
        return paging_get_hostmode(v);
922
0
923
0
    return paging_get_nestedmode(v);
924
0
}
925
926
void paging_update_nestedmode(struct vcpu *v)
927
0
{
928
0
    ASSERT(nestedhvm_enabled(v->domain));
929
0
    if (nestedhvm_paging_mode_hap(v))
930
0
        /* nested-on-nested */
931
0
        v->arch.paging.nestedmode = hap_paging_get_mode(v);
932
0
    else
933
0
        /* TODO: shadow-on-shadow */
934
0
        v->arch.paging.nestedmode = NULL;
935
0
    hvm_asid_flush_vcpu(v);
936
0
}
937
938
void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
939
                            l1_pgentry_t *p, l1_pgentry_t new,
940
                            unsigned int level)
941
0
{
942
0
    struct domain *d = p2m->domain;
943
0
    struct vcpu *v = current;
944
0
    if ( v->domain != d )
945
0
        v = d->vcpu ? d->vcpu[0] : NULL;
946
0
    if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
947
0
        paging_get_hostmode(v)->write_p2m_entry(d, gfn, p, new, level);
948
0
    else
949
0
        safe_write_pte(p, new);
950
0
}
951
952
int paging_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
953
34.5k
{
954
34.5k
    int rc;
955
34.5k
956
34.5k
    ASSERT(paging_mode_enabled(d));
957
34.5k
958
34.5k
    paging_lock(d);
959
34.5k
    if ( hap_enabled(d) )
960
34.5k
        rc = hap_set_allocation(d, pages, preempted);
961
34.5k
    else
962
0
        rc = shadow_set_allocation(d, pages, preempted);
963
34.5k
    paging_unlock(d);
964
34.5k
965
34.5k
    return rc;
966
34.5k
}
967
968
/*
969
 * Local variables:
970
 * mode: C
971
 * c-file-style: "BSD"
972
 * c-basic-offset: 4
973
 * indent-tabs-mode: nil
974
 * End:
975
 */