Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/pv/ro-page-fault.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/pv/ro-page-fault.c
3
 *
4
 * Read-only page fault emulation for PV guests
5
 *
6
 * Copyright (c) 2002-2005 K A Fraser
7
 * Copyright (c) 2004 Christian Limpach
8
 *
9
 * This program is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 2 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU General Public License
20
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include <xen/guest_access.h>
24
#include <xen/rangeset.h>
25
#include <xen/sched.h>
26
#include <xen/trace.h>
27
28
#include <asm/domain.h>
29
#include <asm/mm.h>
30
#include <asm/pci.h>
31
#include <asm/pv/mm.h>
32
33
#include "emulate.h"
34
#include "mm.h"
35
36
/* Override macros from asm/page.h to make them work with mfn_t */
37
#undef mfn_to_page
38
0
#define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
39
#undef page_to_mfn
40
#define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
41
42
/*********************
43
 * Writable Pagetables
44
 */
45
46
struct ptwr_emulate_ctxt {
47
    unsigned long cr2;
48
    l1_pgentry_t  pte;
49
};
50
51
static int ptwr_emulated_read(enum x86_segment seg, unsigned long offset,
52
                              void *p_data, unsigned int bytes,
53
                              struct x86_emulate_ctxt *ctxt)
54
0
{
55
0
    unsigned int rc = bytes;
56
0
    unsigned long addr = offset;
57
0
58
0
    if ( !__addr_ok(addr) ||
59
0
         (rc = __copy_from_user(p_data, (void *)addr, bytes)) )
60
0
    {
61
0
        x86_emul_pagefault(0, addr + bytes - rc, ctxt);  /* Read fault. */
62
0
        return X86EMUL_EXCEPTION;
63
0
    }
64
0
65
0
    return X86EMUL_OKAY;
66
0
}
67
68
static int ptwr_emulated_update(unsigned long addr, paddr_t old, paddr_t val,
69
                                unsigned int bytes, unsigned int do_cmpxchg,
70
                                struct x86_emulate_ctxt *ctxt)
71
0
{
72
0
    unsigned long mfn;
73
0
    unsigned long unaligned_addr = addr;
74
0
    struct page_info *page;
75
0
    l1_pgentry_t pte, ol1e, nl1e, *pl1e;
76
0
    struct vcpu *v = current;
77
0
    struct domain *d = v->domain;
78
0
    struct ptwr_emulate_ctxt *ptwr_ctxt = ctxt->data;
79
0
    int ret;
80
0
81
0
    /* Only allow naturally-aligned stores within the original %cr2 page. */
82
0
    if ( unlikely(((addr ^ ptwr_ctxt->cr2) & PAGE_MASK) ||
83
0
                  (addr & (bytes - 1))) )
84
0
    {
85
0
        gdprintk(XENLOG_WARNING, "bad access (cr2=%lx, addr=%lx, bytes=%u)\n",
86
0
                 ptwr_ctxt->cr2, addr, bytes);
87
0
        return X86EMUL_UNHANDLEABLE;
88
0
    }
89
0
90
0
    /* Turn a sub-word access into a full-word access. */
91
0
    if ( bytes != sizeof(paddr_t) )
92
0
    {
93
0
        paddr_t      full;
94
0
        unsigned int rc, offset = addr & (sizeof(paddr_t) - 1);
95
0
96
0
        /* Align address; read full word. */
97
0
        addr &= ~(sizeof(paddr_t) - 1);
98
0
        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
99
0
        {
100
0
            x86_emul_pagefault(0, /* Read fault. */
101
0
                               addr + sizeof(paddr_t) - rc,
102
0
                               ctxt);
103
0
            return X86EMUL_EXCEPTION;
104
0
        }
105
0
        /* Mask out bits provided by caller. */
106
0
        full &= ~((((paddr_t)1 << (bytes * 8)) - 1) << (offset * 8));
107
0
        /* Shift the caller value and OR in the missing bits. */
108
0
        val  &= (((paddr_t)1 << (bytes * 8)) - 1);
109
0
        val <<= (offset) * 8;
110
0
        val  |= full;
111
0
        /* Also fill in missing parts of the cmpxchg old value. */
112
0
        old  &= (((paddr_t)1 << (bytes * 8)) - 1);
113
0
        old <<= (offset) * 8;
114
0
        old  |= full;
115
0
    }
116
0
117
0
    pte  = ptwr_ctxt->pte;
118
0
    mfn  = l1e_get_pfn(pte);
119
0
    page = mfn_to_page(_mfn(mfn));
120
0
121
0
    /* We are looking only for read-only mappings of p.t. pages. */
122
0
    ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
123
0
    ASSERT(mfn_valid(_mfn(mfn)));
124
0
    ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table);
125
0
    ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0);
126
0
    ASSERT(page_get_owner(page) == d);
127
0
128
0
    /* Check the new PTE. */
129
0
    nl1e = l1e_from_intpte(val);
130
0
    switch ( ret = get_page_from_l1e(nl1e, d, d) )
131
0
    {
132
0
    default:
133
0
        if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
134
0
             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
135
0
        {
136
0
            /*
137
0
             * If this is an upper-half write to a PAE PTE then we assume that
138
0
             * the guest has simply got the two writes the wrong way round. We
139
0
             * zap the PRESENT bit on the assumption that the bottom half will
140
0
             * be written immediately after we return to the guest.
141
0
             */
142
0
            gdprintk(XENLOG_DEBUG, "ptwr_emulate: fixing up invalid PAE PTE %"
143
0
                     PRIpte"\n", l1e_get_intpte(nl1e));
144
0
            l1e_remove_flags(nl1e, _PAGE_PRESENT);
145
0
        }
146
0
        else
147
0
        {
148
0
            gdprintk(XENLOG_WARNING, "could not get_page_from_l1e()\n");
149
0
            return X86EMUL_UNHANDLEABLE;
150
0
        }
151
0
        break;
152
0
    case 0:
153
0
        break;
154
0
    case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS:
155
0
        ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS)));
156
0
        l1e_flip_flags(nl1e, ret);
157
0
        break;
158
0
    }
159
0
160
0
    nl1e = adjust_guest_l1e(nl1e, d);
161
0
162
0
    /* Checked successfully: do the update (write or cmpxchg). */
163
0
    pl1e = map_domain_page(_mfn(mfn));
164
0
    pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
165
0
    if ( do_cmpxchg )
166
0
    {
167
0
        bool okay;
168
0
        intpte_t t = old;
169
0
170
0
        ol1e = l1e_from_intpte(old);
171
0
        okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
172
0
                                          &t, l1e_get_intpte(nl1e), _mfn(mfn));
173
0
        okay = (okay && t == old);
174
0
175
0
        if ( !okay )
176
0
        {
177
0
            unmap_domain_page(pl1e);
178
0
            put_page_from_l1e(nl1e, d);
179
0
            return X86EMUL_RETRY;
180
0
        }
181
0
    }
182
0
    else
183
0
    {
184
0
        ol1e = *pl1e;
185
0
        if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, mfn, v, 0) )
186
0
            BUG();
187
0
    }
188
0
189
0
    trace_ptwr_emulation(addr, nl1e);
190
0
191
0
    unmap_domain_page(pl1e);
192
0
193
0
    /* Finally, drop the old PTE. */
194
0
    put_page_from_l1e(ol1e, d);
195
0
196
0
    return X86EMUL_OKAY;
197
0
}
198
199
static int ptwr_emulated_write(enum x86_segment seg, unsigned long offset,
200
                               void *p_data, unsigned int bytes,
201
                               struct x86_emulate_ctxt *ctxt)
202
0
{
203
0
    paddr_t val = 0;
204
0
205
0
    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes )
206
0
    {
207
0
        gdprintk(XENLOG_WARNING, "bad write size (addr=%lx, bytes=%u)\n",
208
0
                 offset, bytes);
209
0
        return X86EMUL_UNHANDLEABLE;
210
0
    }
211
0
212
0
    memcpy(&val, p_data, bytes);
213
0
214
0
    return ptwr_emulated_update(offset, 0, val, bytes, 0, ctxt);
215
0
}
216
217
static int ptwr_emulated_cmpxchg(enum x86_segment seg, unsigned long offset,
218
                                 void *p_old, void *p_new, unsigned int bytes,
219
                                 struct x86_emulate_ctxt *ctxt)
220
0
{
221
0
    paddr_t old = 0, new = 0;
222
0
223
0
    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) )
224
0
    {
225
0
        gdprintk(XENLOG_WARNING, "bad cmpxchg size (addr=%lx, bytes=%u)\n",
226
0
                 offset, bytes);
227
0
        return X86EMUL_UNHANDLEABLE;
228
0
    }
229
0
230
0
    memcpy(&old, p_old, bytes);
231
0
    memcpy(&new, p_new, bytes);
232
0
233
0
    return ptwr_emulated_update(offset, old, new, bytes, 1, ctxt);
234
0
}
235
236
static const struct x86_emulate_ops ptwr_emulate_ops = {
237
    .read       = ptwr_emulated_read,
238
    .insn_fetch = ptwr_emulated_read,
239
    .write      = ptwr_emulated_write,
240
    .cmpxchg    = ptwr_emulated_cmpxchg,
241
    .validate   = pv_emul_is_mem_write,
242
    .cpuid      = pv_emul_cpuid,
243
};
244
245
/* Write page fault handler: check if guest is trying to modify a PTE. */
246
static int ptwr_do_page_fault(struct x86_emulate_ctxt *ctxt,
247
                              unsigned long addr, l1_pgentry_t pte)
248
0
{
249
0
    struct ptwr_emulate_ctxt ptwr_ctxt = {
250
0
        .cr2 = addr,
251
0
        .pte = pte,
252
0
    };
253
0
    struct page_info *page;
254
0
    int rc;
255
0
256
0
    if ( !get_page_from_mfn(l1e_get_mfn(pte), current->domain) )
257
0
        return X86EMUL_UNHANDLEABLE;
258
0
259
0
    page = l1e_get_page(pte);
260
0
    if ( !page_lock(page) )
261
0
    {
262
0
        put_page(page);
263
0
        return X86EMUL_UNHANDLEABLE;
264
0
    }
265
0
266
0
    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
267
0
    {
268
0
        page_unlock(page);
269
0
        put_page(page);
270
0
        return X86EMUL_UNHANDLEABLE;
271
0
    }
272
0
273
0
    ctxt->data = &ptwr_ctxt;
274
0
    rc = x86_emulate(ctxt, &ptwr_emulate_ops);
275
0
276
0
    page_unlock(page);
277
0
    put_page(page);
278
0
279
0
    return rc;
280
0
}
281
282
/*****************************************
283
 * fault handling for read-only MMIO pages
284
 */
285
286
static const struct x86_emulate_ops mmio_ro_emulate_ops = {
287
    .read       = x86emul_unhandleable_rw,
288
    .insn_fetch = ptwr_emulated_read,
289
    .write      = mmio_ro_emulated_write,
290
    .validate   = pv_emul_is_mem_write,
291
    .cpuid      = pv_emul_cpuid,
292
};
293
294
static const struct x86_emulate_ops mmcfg_intercept_ops = {
295
    .read       = x86emul_unhandleable_rw,
296
    .insn_fetch = ptwr_emulated_read,
297
    .write      = mmcfg_intercept_write,
298
    .validate   = pv_emul_is_mem_write,
299
    .cpuid      = pv_emul_cpuid,
300
};
301
302
/* Check if guest is trying to modify a r/o MMIO page. */
303
static int mmio_ro_do_page_fault(struct x86_emulate_ctxt *ctxt,
304
                                 unsigned long addr, l1_pgentry_t pte)
305
0
{
306
0
    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = addr };
307
0
    mfn_t mfn = l1e_get_mfn(pte);
308
0
309
0
    if ( mfn_valid(mfn) )
310
0
    {
311
0
        struct page_info *page = mfn_to_page(mfn);
312
0
        const struct domain *owner = page_get_owner_and_reference(page);
313
0
314
0
        if ( owner )
315
0
            put_page(page);
316
0
        if ( owner != dom_io )
317
0
            return X86EMUL_UNHANDLEABLE;
318
0
    }
319
0
320
0
    ctxt->data = &mmio_ro_ctxt;
321
0
    if ( pci_ro_mmcfg_decode(mfn_x(mfn), &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) )
322
0
        return x86_emulate(ctxt, &mmcfg_intercept_ops);
323
0
    else
324
0
        return x86_emulate(ctxt, &mmio_ro_emulate_ops);
325
0
}
326
327
int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
328
0
{
329
0
    l1_pgentry_t pte;
330
0
    const struct domain *currd = current->domain;
331
0
    unsigned int addr_size = is_pv_32bit_domain(currd) ? 32 : BITS_PER_LONG;
332
0
    struct x86_emulate_ctxt ctxt = {
333
0
        .regs      = regs,
334
0
        .vendor    = currd->arch.cpuid->x86_vendor,
335
0
        .addr_size = addr_size,
336
0
        .sp_size   = addr_size,
337
0
        .lma       = addr_size > 32,
338
0
    };
339
0
    int rc;
340
0
    bool mmio_ro;
341
0
342
0
    /* Attempt to read the PTE that maps the VA being accessed. */
343
0
    pte = guest_get_eff_l1e(addr);
344
0
345
0
    /* We are only looking for read-only mappings */
346
0
    if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT | _PAGE_RW)) != _PAGE_PRESENT) )
347
0
        return 0;
348
0
349
0
    mmio_ro = is_hardware_domain(currd) &&
350
0
              rangeset_contains_singleton(mmio_ro_ranges, l1e_get_pfn(pte));
351
0
    if ( mmio_ro )
352
0
        rc = mmio_ro_do_page_fault(&ctxt, addr, pte);
353
0
    else
354
0
        rc = ptwr_do_page_fault(&ctxt, addr, pte);
355
0
356
0
    switch ( rc )
357
0
    {
358
0
    case X86EMUL_EXCEPTION:
359
0
        /*
360
0
         * This emulation covers writes to:
361
0
         *  - L1 pagetables.
362
0
         *  - MMCFG space or read-only MFNs.
363
0
         * We tolerate #PF (from hitting an adjacent page or a successful
364
0
         * concurrent pagetable update).  Anything else is an emulation bug,
365
0
         * or a guest playing with the instruction stream under Xen's feet.
366
0
         */
367
0
        if ( ctxt.event.type == X86_EVENTTYPE_HW_EXCEPTION &&
368
0
             ctxt.event.vector == TRAP_page_fault )
369
0
            pv_inject_event(&ctxt.event);
370
0
        else
371
0
            gdprintk(XENLOG_WARNING,
372
0
                     "Unexpected event (type %u, vector %#x) from emulation\n",
373
0
                     ctxt.event.type, ctxt.event.vector);
374
0
375
0
        /* Fallthrough */
376
0
    case X86EMUL_OKAY:
377
0
        if ( ctxt.retire.singlestep )
378
0
            pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
379
0
380
0
        /* Fallthrough */
381
0
    case X86EMUL_RETRY:
382
0
        if ( mmio_ro )
383
0
            perfc_incr(mmio_ro_emulations);
384
0
        else
385
0
            perfc_incr(ptwr_emulations);
386
0
        return EXCRET_fault_fixed;
387
0
    }
388
0
389
0
    return 0;
390
0
}
391
392
/*
393
 * Local variables:
394
 * mode: C
395
 * c-file-style: "BSD"
396
 * c-basic-offset: 4
397
 * tab-width: 4
398
 * indent-tabs-mode: nil
399
 * End:
400
 */