Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/hap/hap.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/mm/hap/hap.c
3
 *
4
 * hardware assisted paging
5
 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6
 * Parts of this code are Copyright (c) 2007 by XenSource Inc.
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20
 */
21
22
#include <xen/types.h>
23
#include <xen/mm.h>
24
#include <xen/trace.h>
25
#include <xen/sched.h>
26
#include <xen/perfc.h>
27
#include <xen/irq.h>
28
#include <xen/domain_page.h>
29
#include <xen/guest_access.h>
30
#include <xen/keyhandler.h>
31
#include <asm/event.h>
32
#include <asm/page.h>
33
#include <asm/current.h>
34
#include <asm/flushtlb.h>
35
#include <asm/shared.h>
36
#include <asm/hap.h>
37
#include <asm/paging.h>
38
#include <asm/p2m.h>
39
#include <asm/domain.h>
40
#include <xen/numa.h>
41
#include <asm/hvm/nestedhvm.h>
42
43
#include "private.h"
44
45
/* Override macros from asm/page.h to make them work with mfn_t */
46
#undef mfn_to_page
47
0
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
48
#undef page_to_mfn
49
1.31k
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
50
51
/************************************************/
52
/*          HAP VRAM TRACKING SUPPORT           */
53
/************************************************/
54
55
/*
56
 * hap_track_dirty_vram()
57
 * Create the domain's dv_dirty_vram struct on demand.
58
 * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
59
 * first encountered.
60
 * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
61
 * calling paging_log_dirty_range(), which interrogates each vram
62
 * page's p2m type looking for pages that have been made writable.
63
 */
64
65
int hap_track_dirty_vram(struct domain *d,
66
                         unsigned long begin_pfn,
67
                         unsigned long nr,
68
                         XEN_GUEST_HANDLE_PARAM(void) guest_dirty_bitmap)
69
0
{
70
0
    long rc = 0;
71
0
    struct sh_dirty_vram *dirty_vram;
72
0
    uint8_t *dirty_bitmap = NULL;
73
0
74
0
    if ( nr )
75
0
    {
76
0
        int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
77
0
78
0
        if ( !paging_mode_log_dirty(d) )
79
0
        {
80
0
            rc = paging_log_dirty_enable(d, 0);
81
0
            if ( rc )
82
0
                goto out;
83
0
        }
84
0
85
0
        rc = -ENOMEM;
86
0
        dirty_bitmap = vzalloc(size);
87
0
        if ( !dirty_bitmap )
88
0
            goto out;
89
0
90
0
        paging_lock(d);
91
0
92
0
        dirty_vram = d->arch.hvm_domain.dirty_vram;
93
0
        if ( !dirty_vram )
94
0
        {
95
0
            rc = -ENOMEM;
96
0
            if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
97
0
            {
98
0
                paging_unlock(d);
99
0
                goto out;
100
0
            }
101
0
102
0
            d->arch.hvm_domain.dirty_vram = dirty_vram;
103
0
        }
104
0
105
0
        if ( begin_pfn != dirty_vram->begin_pfn ||
106
0
             begin_pfn + nr != dirty_vram->end_pfn )
107
0
        {
108
0
            unsigned long ostart = dirty_vram->begin_pfn;
109
0
            unsigned long oend = dirty_vram->end_pfn;
110
0
111
0
            dirty_vram->begin_pfn = begin_pfn;
112
0
            dirty_vram->end_pfn = begin_pfn + nr;
113
0
114
0
            paging_unlock(d);
115
0
116
0
            if ( oend > ostart )
117
0
                p2m_change_type_range(d, ostart, oend,
118
0
                                      p2m_ram_logdirty, p2m_ram_rw);
119
0
120
0
            /*
121
0
             * Switch vram to log dirty mode, either by setting l1e entries of
122
0
             * P2M table to be read-only, or via hardware-assisted log-dirty.
123
0
             */
124
0
            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
125
0
                                  p2m_ram_rw, p2m_ram_logdirty);
126
0
127
0
            flush_tlb_mask(d->domain_dirty_cpumask);
128
0
129
0
            memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
130
0
        }
131
0
        else
132
0
        {
133
0
            paging_unlock(d);
134
0
135
0
            domain_pause(d);
136
0
137
0
            /* Flush dirty GFNs potentially cached by hardware. */
138
0
            p2m_flush_hardware_cached_dirty(d);
139
0
140
0
            /* get the bitmap */
141
0
            paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
142
0
143
0
            domain_unpause(d);
144
0
        }
145
0
146
0
        rc = -EFAULT;
147
0
        if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
148
0
            rc = 0;
149
0
    }
150
0
    else
151
0
    {
152
0
        paging_lock(d);
153
0
154
0
        dirty_vram = d->arch.hvm_domain.dirty_vram;
155
0
        if ( dirty_vram )
156
0
        {
157
0
            /*
158
0
             * If zero pages specified while tracking dirty vram
159
0
             * then stop tracking
160
0
             */
161
0
            begin_pfn = dirty_vram->begin_pfn;
162
0
            nr = dirty_vram->end_pfn - dirty_vram->begin_pfn;
163
0
            xfree(dirty_vram);
164
0
            d->arch.hvm_domain.dirty_vram = NULL;
165
0
        }
166
0
167
0
        paging_unlock(d);
168
0
        if ( nr )
169
0
            p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
170
0
                                  p2m_ram_logdirty, p2m_ram_rw);
171
0
    }
172
0
out:
173
0
    vfree(dirty_bitmap);
174
0
175
0
    return rc;
176
0
}
177
178
/************************************************/
179
/*            HAP LOG DIRTY SUPPORT             */
180
/************************************************/
181
182
/*
183
 * hap code to call when log_dirty is enable. return 0 if no problem found.
184
 *
185
 * NB: Domain that having device assigned should not set log_global. Because
186
 * there is no way to track the memory updating from device.
187
 */
188
static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
189
0
{
190
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
191
0
192
0
    /*
193
0
     * Refuse to turn on global log-dirty mode if
194
0
     * there are outstanding p2m_ioreq_server pages.
195
0
     */
196
0
    if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
197
0
        return -EBUSY;
198
0
199
0
    /* turn on PG_log_dirty bit in paging mode */
200
0
    paging_lock(d);
201
0
    d->arch.paging.mode |= PG_log_dirty;
202
0
    paging_unlock(d);
203
0
204
0
    /* Enable hardware-assisted log-dirty if it is supported. */
205
0
    p2m_enable_hardware_log_dirty(d);
206
0
207
0
    if ( log_global )
208
0
    {
209
0
        /*
210
0
         * Switch to log dirty mode, either by setting l1e entries of P2M table
211
0
         * to be read-only, or via hardware-assisted log-dirty.
212
0
         */
213
0
        p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
214
0
        flush_tlb_mask(d->domain_dirty_cpumask);
215
0
    }
216
0
    return 0;
217
0
}
218
219
static int hap_disable_log_dirty(struct domain *d)
220
0
{
221
0
    paging_lock(d);
222
0
    d->arch.paging.mode &= ~PG_log_dirty;
223
0
    paging_unlock(d);
224
0
225
0
    /* Disable hardware-assisted log-dirty if it is supported. */
226
0
    p2m_disable_hardware_log_dirty(d);
227
0
228
0
    /*
229
0
     * switch to normal mode, either by setting l1e entries of P2M table to
230
0
     * normal mode, or via hardware-assisted log-dirty.
231
0
     */
232
0
    p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
233
0
    return 0;
234
0
}
235
236
static void hap_clean_dirty_bitmap(struct domain *d)
237
0
{
238
0
    /*
239
0
     * Switch to log-dirty mode, either by setting l1e entries of P2M table to
240
0
     * be read-only, or via hardware-assisted log-dirty.
241
0
     */
242
0
    p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
243
0
    flush_tlb_mask(d->domain_dirty_cpumask);
244
0
}
245
246
/************************************************/
247
/*             HAP SUPPORT FUNCTIONS            */
248
/************************************************/
249
static struct page_info *hap_alloc(struct domain *d)
250
1.29k
{
251
1.29k
    struct page_info *pg;
252
1.29k
253
1.29k
    ASSERT(paging_locked_by_me(d));
254
1.29k
255
1.29k
    pg = page_list_remove_head(&d->arch.paging.hap.freelist);
256
1.29k
    if ( unlikely(!pg) )
257
0
        return NULL;
258
1.29k
259
1.29k
    d->arch.paging.hap.free_pages--;
260
1.29k
261
1.29k
    clear_domain_page(page_to_mfn(pg));
262
1.29k
263
1.29k
    return pg;
264
1.29k
}
265
266
static void hap_free(struct domain *d, mfn_t mfn)
267
0
{
268
0
    struct page_info *pg = mfn_to_page(mfn);
269
0
270
0
    ASSERT(paging_locked_by_me(d));
271
0
272
0
    d->arch.paging.hap.free_pages++;
273
0
    page_list_add_tail(pg, &d->arch.paging.hap.freelist);
274
0
}
275
276
static struct page_info *hap_alloc_p2m_page(struct domain *d)
277
1.28k
{
278
1.28k
    struct page_info *pg;
279
1.28k
280
1.28k
    /* This is called both from the p2m code (which never holds the 
281
1.28k
     * paging lock) and the log-dirty code (which always does). */
282
1.28k
    paging_lock_recursive(d);
283
1.28k
    pg = hap_alloc(d);
284
1.28k
285
1.28k
    if ( likely(pg != NULL) )
286
1.28k
    {
287
1.28k
        d->arch.paging.hap.total_pages--;
288
1.28k
        d->arch.paging.hap.p2m_pages++;
289
1.28k
        page_set_owner(pg, d);
290
1.28k
        pg->count_info |= 1;
291
1.28k
    }
292
0
    else if ( !d->arch.paging.p2m_alloc_failed )
293
0
    {
294
0
        d->arch.paging.p2m_alloc_failed = 1;
295
0
        dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
296
0
                d->domain_id);
297
0
    }
298
1.28k
299
1.28k
    paging_unlock(d);
300
1.28k
    return pg;
301
1.28k
}
302
303
static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
304
0
{
305
0
    /* This is called both from the p2m code (which never holds the 
306
0
     * paging lock) and the log-dirty code (which always does). */
307
0
    paging_lock_recursive(d);
308
0
309
0
    ASSERT(page_get_owner(pg) == d);
310
0
    /* Should have just the one ref we gave it in alloc_p2m_page() */
311
0
    if ( (pg->count_info & PGC_count_mask) != 1 ) {
312
0
        HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
313
0
                     pg, pg->count_info, pg->u.inuse.type_info);
314
0
        WARN();
315
0
    }
316
0
    pg->count_info &= ~PGC_count_mask;
317
0
    /* Free should not decrement domain's total allocation, since
318
0
     * these pages were allocated without an owner. */
319
0
    page_set_owner(pg, NULL);
320
0
    d->arch.paging.hap.p2m_pages--;
321
0
    d->arch.paging.hap.total_pages++;
322
0
    hap_free(d, page_to_mfn(pg));
323
0
324
0
    paging_unlock(d);
325
0
}
326
327
/* Return the size of the pool, rounded up to the nearest MB */
328
static unsigned int
329
hap_get_allocation(struct domain *d)
330
0
{
331
0
    unsigned int pg = d->arch.paging.hap.total_pages
332
0
        + d->arch.paging.hap.p2m_pages;
333
0
334
0
    return ((pg >> (20 - PAGE_SHIFT))
335
0
            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
336
0
}
337
338
/* Set the pool of pages to the required number of pages.
339
 * Returns 0 for success, non-zero for failure. */
340
int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
341
34.5k
{
342
34.5k
    struct page_info *pg;
343
34.5k
344
34.5k
    ASSERT(paging_locked_by_me(d));
345
34.5k
346
34.5k
    if ( pages < d->arch.paging.hap.p2m_pages )
347
0
        pages = 0;
348
34.5k
    else
349
34.5k
        pages -= d->arch.paging.hap.p2m_pages;
350
34.5k
351
34.5k
    for ( ; ; )
352
34.8k
    {
353
34.8k
        if ( d->arch.paging.hap.total_pages < pages )
354
34.8k
        {
355
34.8k
            /* Need to allocate more memory from domheap */
356
34.8k
            pg = alloc_domheap_page(d, MEMF_no_owner);
357
34.8k
            if ( pg == NULL )
358
0
            {
359
0
                HAP_PRINTK("failed to allocate hap pages.\n");
360
0
                return -ENOMEM;
361
0
            }
362
34.8k
            d->arch.paging.hap.free_pages++;
363
34.8k
            d->arch.paging.hap.total_pages++;
364
34.8k
            page_list_add_tail(pg, &d->arch.paging.hap.freelist);
365
34.8k
        }
366
2
        else if ( d->arch.paging.hap.total_pages > pages )
367
0
        {
368
0
            /* Need to return memory to domheap */
369
0
            if ( page_list_empty(&d->arch.paging.hap.freelist) )
370
0
            {
371
0
                HAP_PRINTK("failed to free enough hap pages.\n");
372
0
                return -ENOMEM;
373
0
            }
374
0
            pg = page_list_remove_head(&d->arch.paging.hap.freelist);
375
0
            ASSERT(pg);
376
0
            d->arch.paging.hap.free_pages--;
377
0
            d->arch.paging.hap.total_pages--;
378
0
            free_domheap_page(pg);
379
0
        }
380
2
        else
381
2
            break;
382
34.8k
383
34.8k
        /* Check to see if we need to yield and try again */
384
34.8k
        if ( preempted && general_preempt_check() )
385
34.5k
        {
386
34.5k
            *preempted = true;
387
34.5k
            return 0;
388
34.5k
        }
389
34.8k
    }
390
34.5k
391
2
    return 0;
392
34.5k
}
393
394
static mfn_t hap_make_monitor_table(struct vcpu *v)
395
12
{
396
12
    struct domain *d = v->domain;
397
12
    struct page_info *pg;
398
12
    l4_pgentry_t *l4e;
399
12
    mfn_t m4mfn;
400
12
401
12
    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
402
12
403
12
    if ( (pg = hap_alloc(d)) == NULL )
404
0
        goto oom;
405
12
406
12
    m4mfn = page_to_mfn(pg);
407
12
    l4e = map_domain_page(m4mfn);
408
12
409
12
    init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
410
12
    unmap_domain_page(l4e);
411
12
412
12
    return m4mfn;
413
12
414
0
 oom:
415
0
    HAP_ERROR("out of memory building monitor pagetable\n");
416
0
    domain_crash(d);
417
0
    return INVALID_MFN;
418
12
}
419
420
static void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
421
0
{
422
0
    struct domain *d = v->domain;
423
0
424
0
    /* Put the memory back in the pool */
425
0
    hap_free(d, mmfn);
426
0
}
427
428
/************************************************/
429
/*          HAP DOMAIN LEVEL FUNCTIONS          */
430
/************************************************/
431
void hap_domain_init(struct domain *d)
432
1
{
433
1
    static const struct log_dirty_ops hap_ops = {
434
1
        .enable  = hap_enable_log_dirty,
435
1
        .disable = hap_disable_log_dirty,
436
1
        .clean   = hap_clean_dirty_bitmap,
437
1
    };
438
1
439
1
    INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
440
1
441
1
    /* Use HAP logdirty mechanism. */
442
1
    paging_log_dirty_init(d, &hap_ops);
443
1
}
444
445
/* return 0 for success, -errno for failure */
446
int hap_enable(struct domain *d, u32 mode)
447
1
{
448
1
    unsigned int old_pages;
449
1
    unsigned int i;
450
1
    int rv = 0;
451
1
452
1
    domain_pause(d);
453
1
454
1
    old_pages = d->arch.paging.hap.total_pages;
455
1
    if ( old_pages == 0 )
456
1
    {
457
1
        paging_lock(d);
458
1
        rv = hap_set_allocation(d, 256, NULL);
459
1
        if ( rv != 0 )
460
0
        {
461
0
            hap_set_allocation(d, 0, NULL);
462
0
            paging_unlock(d);
463
0
            goto out;
464
0
        }
465
1
        paging_unlock(d);
466
1
    }
467
1
468
1
    /* Allow p2m and log-dirty code to borrow our memory */
469
1
    d->arch.paging.alloc_page = hap_alloc_p2m_page;
470
1
    d->arch.paging.free_page = hap_free_p2m_page;
471
1
472
1
    /* allocate P2m table */
473
1
    if ( mode & PG_translate )
474
1
    {
475
1
        rv = p2m_alloc_table(p2m_get_hostp2m(d));
476
1
        if ( rv != 0 )
477
0
            goto out;
478
1
    }
479
1
480
11
    for (i = 0; i < MAX_NESTEDP2M; i++) {
481
10
        rv = p2m_alloc_table(d->arch.nested_p2m[i]);
482
10
        if ( rv != 0 )
483
0
           goto out;
484
10
    }
485
1
486
1
    if ( hvm_altp2m_supported() )
487
0
    {
488
0
        /* Init alternate p2m data */
489
0
        if ( (d->arch.altp2m_eptp = alloc_xenheap_page()) == NULL )
490
0
        {
491
0
            rv = -ENOMEM;
492
0
            goto out;
493
0
        }
494
0
495
0
        for ( i = 0; i < MAX_EPTP; i++ )
496
0
            d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
497
0
498
0
        for ( i = 0; i < MAX_ALTP2M; i++ )
499
0
        {
500
0
            rv = p2m_alloc_table(d->arch.altp2m_p2m[i]);
501
0
            if ( rv != 0 )
502
0
               goto out;
503
0
        }
504
0
505
0
        d->arch.altp2m_active = 0;
506
0
    }
507
1
508
1
    /* Now let other users see the new mode */
509
1
    d->arch.paging.mode = mode | PG_HAP_enable;
510
1
511
1
 out:
512
1
    domain_unpause(d);
513
1
    return rv;
514
1
}
515
516
void hap_final_teardown(struct domain *d)
517
0
{
518
0
    unsigned int i;
519
0
520
0
    if ( hvm_altp2m_supported() )
521
0
    {
522
0
        d->arch.altp2m_active = 0;
523
0
524
0
        if ( d->arch.altp2m_eptp )
525
0
        {
526
0
            free_xenheap_page(d->arch.altp2m_eptp);
527
0
            d->arch.altp2m_eptp = NULL;
528
0
        }
529
0
530
0
        for ( i = 0; i < MAX_ALTP2M; i++ )
531
0
            p2m_teardown(d->arch.altp2m_p2m[i]);
532
0
    }
533
0
534
0
    /* Destroy nestedp2m's first */
535
0
    for (i = 0; i < MAX_NESTEDP2M; i++) {
536
0
        p2m_teardown(d->arch.nested_p2m[i]);
537
0
    }
538
0
539
0
    if ( d->arch.paging.hap.total_pages != 0 )
540
0
        hap_teardown(d, NULL);
541
0
542
0
    p2m_teardown(p2m_get_hostp2m(d));
543
0
    /* Free any memory that the p2m teardown released */
544
0
    paging_lock(d);
545
0
    hap_set_allocation(d, 0, NULL);
546
0
    ASSERT(d->arch.paging.hap.p2m_pages == 0);
547
0
    paging_unlock(d);
548
0
}
549
550
void hap_teardown(struct domain *d, bool *preempted)
551
0
{
552
0
    struct vcpu *v;
553
0
    mfn_t mfn;
554
0
555
0
    ASSERT(d->is_dying);
556
0
    ASSERT(d != current->domain);
557
0
558
0
    paging_lock(d); /* Keep various asserts happy */
559
0
560
0
    if ( paging_mode_enabled(d) )
561
0
    {
562
0
        /* release the monitor table held by each vcpu */
563
0
        for_each_vcpu ( d, v )
564
0
        {
565
0
            if ( paging_get_hostmode(v) && paging_mode_external(d) )
566
0
            {
567
0
                mfn = pagetable_get_mfn(v->arch.monitor_table);
568
0
                if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
569
0
                    hap_destroy_monitor_table(v, mfn);
570
0
                v->arch.monitor_table = pagetable_null();
571
0
            }
572
0
        }
573
0
    }
574
0
575
0
    if ( d->arch.paging.hap.total_pages != 0 )
576
0
    {
577
0
        hap_set_allocation(d, 0, preempted);
578
0
579
0
        if ( preempted && *preempted )
580
0
            goto out;
581
0
582
0
        ASSERT(d->arch.paging.hap.total_pages == 0);
583
0
    }
584
0
585
0
    d->arch.paging.mode &= ~PG_log_dirty;
586
0
587
0
    xfree(d->arch.hvm_domain.dirty_vram);
588
0
    d->arch.hvm_domain.dirty_vram = NULL;
589
0
590
0
out:
591
0
    paging_unlock(d);
592
0
}
593
594
int hap_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
595
               XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
596
0
{
597
0
    int rc;
598
0
    bool preempted = false;
599
0
600
0
    switch ( sc->op )
601
0
    {
602
0
    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
603
0
        paging_lock(d);
604
0
        rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
605
0
        paging_unlock(d);
606
0
        if ( preempted )
607
0
            /* Not finished.  Set up to re-run the call. */
608
0
            rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
609
0
                                               u_domctl);
610
0
        else
611
0
            /* Finished.  Return the new allocation */
612
0
            sc->mb = hap_get_allocation(d);
613
0
        return rc;
614
0
    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
615
0
        sc->mb = hap_get_allocation(d);
616
0
        /* Fall through... */
617
0
    case XEN_DOMCTL_SHADOW_OP_OFF:
618
0
        return 0;
619
0
    default:
620
0
        HAP_PRINTK("Bad hap domctl op %u\n", sc->op);
621
0
        return -EINVAL;
622
0
    }
623
0
}
624
625
static const struct paging_mode hap_paging_real_mode;
626
static const struct paging_mode hap_paging_protected_mode;
627
static const struct paging_mode hap_paging_pae_mode;
628
static const struct paging_mode hap_paging_long_mode;
629
630
void hap_vcpu_init(struct vcpu *v)
631
12
{
632
12
    v->arch.paging.mode = &hap_paging_real_mode;
633
12
    v->arch.paging.nestedmode = &hap_paging_real_mode;
634
12
}
635
636
/************************************************/
637
/*          HAP PAGING MODE FUNCTIONS           */
638
/************************************************/
639
/*
640
 * HAP guests can handle page faults (in the guest page tables) without
641
 * needing any action from Xen, so we should not be intercepting them.
642
 */
643
static int hap_page_fault(struct vcpu *v, unsigned long va,
644
                          struct cpu_user_regs *regs)
645
0
{
646
0
    struct domain *d = v->domain;
647
0
648
0
    HAP_ERROR("Intercepted a guest #PF (%pv) with HAP enabled\n", v);
649
0
    domain_crash(d);
650
0
    return 0;
651
0
}
652
653
/*
654
 * HAP guests can handle invlpg without needing any action from Xen, so
655
 * should not be intercepting it.  However, we need to correctly handle
656
 * getting here from instruction emulation.
657
 */
658
static bool_t hap_invlpg(struct vcpu *v, unsigned long va)
659
0
{
660
0
    /*
661
0
     * Emulate INVLPGA:
662
0
     * Must perform the flush right now or an other vcpu may
663
0
     * use it when we use the next VMRUN emulation, otherwise.
664
0
     */
665
0
    if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m )
666
0
        p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
667
0
668
0
    return 1;
669
0
}
670
671
static void hap_update_cr3(struct vcpu *v, int do_locking)
672
98
{
673
98
    v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
674
98
    hvm_update_guest_cr(v, 3);
675
98
}
676
677
const struct paging_mode *
678
hap_paging_get_mode(struct vcpu *v)
679
98
{
680
98
    return (!hvm_paging_enabled(v)  ? &hap_paging_real_mode :
681
62
            hvm_long_mode_active(v) ? &hap_paging_long_mode :
682
0
            hvm_pae_enabled(v)      ? &hap_paging_pae_mode  :
683
0
                                      &hap_paging_protected_mode);
684
98
}
685
686
static void hap_update_paging_modes(struct vcpu *v)
687
97
{
688
97
    struct domain *d = v->domain;
689
97
    unsigned long cr3_gfn = v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT;
690
97
    p2m_type_t t;
691
97
692
97
    /* We hold onto the cr3 as it may be modified later, and
693
97
     * we need to respect lock ordering. No need for 
694
97
     * checks here as they are performed by vmx_load_pdptrs
695
97
     * (the potential user of the cr3) */
696
97
    (void)get_gfn(d, cr3_gfn, &t);
697
97
    paging_lock(d);
698
97
699
97
    v->arch.paging.mode = hap_paging_get_mode(v);
700
97
701
97
    if ( pagetable_is_null(v->arch.monitor_table) )
702
12
    {
703
12
        mfn_t mmfn = hap_make_monitor_table(v);
704
12
        v->arch.monitor_table = pagetable_from_mfn(mmfn);
705
12
        make_cr3(v, mmfn);
706
12
        hvm_update_host_cr3(v);
707
12
    }
708
97
709
97
    /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
710
97
    hap_update_cr3(v, 0);
711
97
712
97
    paging_unlock(d);
713
97
    put_gfn(d, cr3_gfn);
714
97
}
715
716
static void
717
hap_write_p2m_entry(struct domain *d, unsigned long gfn, l1_pgentry_t *p,
718
                    l1_pgentry_t new, unsigned int level)
719
0
{
720
0
    uint32_t old_flags;
721
0
    bool_t flush_nestedp2m = 0;
722
0
723
0
    /* We know always use the host p2m here, regardless if the vcpu
724
0
     * is in host or guest mode. The vcpu can be in guest mode by
725
0
     * a hypercall which passes a domain and chooses mostly the first
726
0
     * vcpu. */
727
0
728
0
    paging_lock(d);
729
0
    old_flags = l1e_get_flags(*p);
730
0
731
0
    if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) 
732
0
         && !p2m_get_hostp2m(d)->defer_nested_flush ) {
733
0
        /* We are replacing a valid entry so we need to flush nested p2ms,
734
0
         * unless the only change is an increase in access rights. */
735
0
        mfn_t omfn = l1e_get_mfn(*p);
736
0
        mfn_t nmfn = l1e_get_mfn(new);
737
0
        flush_nestedp2m = !( mfn_x(omfn) == mfn_x(nmfn)
738
0
            && perms_strictly_increased(old_flags, l1e_get_flags(new)) );
739
0
    }
740
0
741
0
    safe_write_pte(p, new);
742
0
    if ( old_flags & _PAGE_PRESENT )
743
0
        flush_tlb_mask(d->domain_dirty_cpumask);
744
0
745
0
    paging_unlock(d);
746
0
747
0
    if ( flush_nestedp2m )
748
0
        p2m_flush_nestedp2m(d);
749
0
}
750
751
static unsigned long hap_gva_to_gfn_real_mode(
752
    struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
753
0
{
754
0
    return ((paddr_t)gva >> PAGE_SHIFT);
755
0
}
756
757
static unsigned long hap_p2m_ga_to_gfn_real_mode(
758
    struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
759
    paddr_t ga, uint32_t *pfec, unsigned int *page_order)
760
0
{
761
0
    if ( page_order )
762
0
        *page_order = PAGE_ORDER_4K;
763
0
    return (ga >> PAGE_SHIFT);
764
0
}
765
766
/* Entry points into this mode of the hap code. */
767
static const struct paging_mode hap_paging_real_mode = {
768
    .page_fault             = hap_page_fault,
769
    .invlpg                 = hap_invlpg,
770
    .gva_to_gfn             = hap_gva_to_gfn_real_mode,
771
    .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_real_mode,
772
    .update_cr3             = hap_update_cr3,
773
    .update_paging_modes    = hap_update_paging_modes,
774
    .write_p2m_entry        = hap_write_p2m_entry,
775
    .guest_levels           = 1
776
};
777
778
static const struct paging_mode hap_paging_protected_mode = {
779
    .page_fault             = hap_page_fault,
780
    .invlpg                 = hap_invlpg,
781
    .gva_to_gfn             = hap_gva_to_gfn_2_levels,
782
    .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_2_levels,
783
    .update_cr3             = hap_update_cr3,
784
    .update_paging_modes    = hap_update_paging_modes,
785
    .write_p2m_entry        = hap_write_p2m_entry,
786
    .guest_levels           = 2
787
};
788
789
static const struct paging_mode hap_paging_pae_mode = {
790
    .page_fault             = hap_page_fault,
791
    .invlpg                 = hap_invlpg,
792
    .gva_to_gfn             = hap_gva_to_gfn_3_levels,
793
    .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_3_levels,
794
    .update_cr3             = hap_update_cr3,
795
    .update_paging_modes    = hap_update_paging_modes,
796
    .write_p2m_entry        = hap_write_p2m_entry,
797
    .guest_levels           = 3
798
};
799
800
static const struct paging_mode hap_paging_long_mode = {
801
    .page_fault             = hap_page_fault,
802
    .invlpg                 = hap_invlpg,
803
    .gva_to_gfn             = hap_gva_to_gfn_4_levels,
804
    .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_4_levels,
805
    .update_cr3             = hap_update_cr3,
806
    .update_paging_modes    = hap_update_paging_modes,
807
    .write_p2m_entry        = hap_write_p2m_entry,
808
    .guest_levels           = 4
809
};
810
811
/*
812
 * Local variables:
813
 * mode: C
814
 * c-file-style: "BSD"
815
 * c-basic-offset: 4
816
 * indent-tabs-mode: nil
817
 * End:
818
 */