Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/hvm/emulate.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * hvm/emulate.c
3
 * 
4
 * HVM instruction emulation. Used for MMIO and VMX real mode.
5
 * 
6
 * Copyright (c) 2008, Citrix Systems, Inc.
7
 * 
8
 * Authors:
9
 *    Keir Fraser <keir@xen.org>
10
 */
11
12
#include <xen/init.h>
13
#include <xen/lib.h>
14
#include <xen/sched.h>
15
#include <xen/paging.h>
16
#include <xen/trace.h>
17
#include <xen/vm_event.h>
18
#include <asm/event.h>
19
#include <asm/i387.h>
20
#include <asm/xstate.h>
21
#include <asm/hvm/emulate.h>
22
#include <asm/hvm/hvm.h>
23
#include <asm/hvm/ioreq.h>
24
#include <asm/hvm/monitor.h>
25
#include <asm/hvm/trace.h>
26
#include <asm/hvm/support.h>
27
#include <asm/hvm/svm/svm.h>
28
#include <asm/vm_event.h>
29
30
static void hvmtrace_io_assist(const ioreq_t *p)
31
80.2k
{
32
80.2k
    unsigned int size, event;
33
80.2k
    unsigned char buffer[12];
34
80.2k
35
80.2k
    if ( likely(!tb_init_done) )
36
80.2k
        return;
37
80.2k
38
0
    if ( p->type == IOREQ_TYPE_COPY )
39
0
        event = p->dir ? TRC_HVM_IOMEM_READ : TRC_HVM_IOMEM_WRITE;
40
0
    else
41
0
        event = p->dir ? TRC_HVM_IOPORT_READ : TRC_HVM_IOPORT_WRITE;
42
0
43
0
    *(uint64_t *)buffer = p->addr;
44
0
    size = (p->addr != (u32)p->addr) ? 8 : 4;
45
0
    if ( size == 8 )
46
0
        event |= TRC_64_FLAG;
47
0
48
0
    if ( !p->data_is_ptr )
49
0
    {
50
0
        *(uint32_t *)&buffer[size] = p->data;
51
0
        size += 4;
52
0
    }
53
0
54
0
    trace_var(event, 0/*!cycles*/, size, buffer);
55
0
}
56
57
static int null_read(const struct hvm_io_handler *io_handler,
58
                     uint64_t addr,
59
                     uint32_t size,
60
                     uint64_t *data)
61
20.0k
{
62
20.0k
    *data = ~0ul;
63
20.0k
    return X86EMUL_OKAY;
64
20.0k
}
65
66
static int null_write(const struct hvm_io_handler *handler,
67
                      uint64_t addr,
68
                      uint32_t size,
69
                      uint64_t data)
70
26
{
71
26
    return X86EMUL_OKAY;
72
26
}
73
74
static int set_context_data(void *buffer, unsigned int size)
75
0
{
76
0
    struct vcpu *curr = current;
77
0
78
0
    if ( curr->arch.vm_event )
79
0
    {
80
0
        unsigned int safe_size =
81
0
            min(size, curr->arch.vm_event->emul.read.size);
82
0
83
0
        memcpy(buffer, curr->arch.vm_event->emul.read.data, safe_size);
84
0
        memset(buffer + safe_size, 0, size - safe_size);
85
0
        return X86EMUL_OKAY;
86
0
    }
87
0
88
0
    return X86EMUL_UNHANDLEABLE;
89
0
}
90
91
static const struct hvm_io_ops null_ops = {
92
    .read = null_read,
93
    .write = null_write
94
};
95
96
static const struct hvm_io_handler null_handler = {
97
    .ops = &null_ops
98
};
99
100
static int ioreq_server_read(const struct hvm_io_handler *io_handler,
101
                    uint64_t addr,
102
                    uint32_t size,
103
                    uint64_t *data)
104
0
{
105
0
    if ( hvm_copy_from_guest_phys(data, addr, size) != HVMTRANS_okay )
106
0
        return X86EMUL_UNHANDLEABLE;
107
0
108
0
    return X86EMUL_OKAY;
109
0
}
110
111
static const struct hvm_io_ops ioreq_server_ops = {
112
    .read = ioreq_server_read,
113
    .write = null_write
114
};
115
116
static const struct hvm_io_handler ioreq_server_handler = {
117
    .ops = &ioreq_server_ops
118
};
119
120
static int hvmemul_do_io(
121
    bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size,
122
    uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data)
123
80.2k
{
124
80.2k
    struct vcpu *curr = current;
125
80.2k
    struct domain *currd = curr->domain;
126
80.2k
    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
127
80.2k
    ioreq_t p = {
128
60.1k
        .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
129
80.2k
        .addr = addr,
130
80.2k
        .size = size,
131
80.2k
        .count = *reps,
132
80.2k
        .dir = dir,
133
80.2k
        .df = df,
134
80.2k
        .data = data_is_addr ? data : 0,
135
80.2k
        .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */
136
80.2k
        .state = STATE_IOREQ_READY,
137
80.2k
    };
138
80.2k
    void *p_data = (void *)data;
139
80.2k
    int rc;
140
80.2k
141
80.2k
    /*
142
80.2k
     * Weird-sized accesses have undefined behaviour: we discard writes
143
80.2k
     * and read all-ones.
144
80.2k
     */
145
80.2k
    if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
146
0
    {
147
0
        gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
148
0
        return X86EMUL_UNHANDLEABLE;
149
0
    }
150
80.2k
151
80.2k
    switch ( vio->io_req.state )
152
80.2k
    {
153
80.2k
    case STATE_IOREQ_NONE:
154
80.2k
        break;
155
0
    case STATE_IORESP_READY:
156
0
        vio->io_req.state = STATE_IOREQ_NONE;
157
0
        p = vio->io_req;
158
0
159
0
        /* Verify the emulation request has been correctly re-issued */
160
0
        if ( (p.type != (is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO)) ||
161
0
             (p.addr != addr) ||
162
0
             (p.size != size) ||
163
0
             (p.count > *reps) ||
164
0
             (p.dir != dir) ||
165
0
             (p.df != df) ||
166
0
             (p.data_is_ptr != data_is_addr) )
167
0
            domain_crash(currd);
168
0
169
0
        if ( data_is_addr )
170
0
            return X86EMUL_UNHANDLEABLE;
171
0
172
0
        *reps = p.count;
173
0
        goto finish_access;
174
0
    default:
175
0
        return X86EMUL_UNHANDLEABLE;
176
80.2k
    }
177
80.2k
178
80.2k
    if ( dir == IOREQ_WRITE )
179
4.17k
    {
180
4.17k
        if ( !data_is_addr )
181
4.17k
            memcpy(&p.data, p_data, size);
182
4.17k
183
4.17k
        hvmtrace_io_assist(&p);
184
4.17k
    }
185
80.2k
186
80.2k
    vio->io_req = p;
187
80.2k
188
80.2k
    rc = hvm_io_intercept(&p);
189
80.2k
190
80.2k
    /*
191
80.2k
     * p.count may have got reduced (see hvm_process_io_intercept()) - inform
192
80.2k
     * our callers and mirror this into latched state.
193
80.2k
     */
194
80.2k
    ASSERT(p.count <= *reps);
195
80.2k
    *reps = vio->io_req.count = p.count;
196
80.2k
197
80.2k
    switch ( rc )
198
80.2k
    {
199
60.1k
    case X86EMUL_OKAY:
200
60.1k
        vio->io_req.state = STATE_IOREQ_NONE;
201
60.1k
        break;
202
20.0k
    case X86EMUL_UNHANDLEABLE:
203
20.0k
    {
204
20.0k
        /*
205
20.0k
         * Xen isn't emulating the instruction internally, so see if there's
206
20.0k
         * an ioreq server that can handle it.
207
20.0k
         *
208
20.0k
         * Rules:
209
20.0k
         * A> PIO or MMIO accesses run through hvm_select_ioreq_server() to
210
20.0k
         * choose the ioreq server by range. If no server is found, the access
211
20.0k
         * is ignored.
212
20.0k
         *
213
20.0k
         * B> p2m_ioreq_server accesses are handled by the designated
214
20.0k
         * ioreq server for the domain, but there are some corner cases:
215
20.0k
         *
216
20.0k
         *   - If the domain ioreq server is NULL, it's likely we suffer from
217
20.0k
         *   a race with an unmap operation on the ioreq server, so re-try the
218
20.0k
         *   instruction.
219
20.0k
         *
220
20.0k
         *   - If the accesss is a read, this could be part of a
221
20.0k
         *   read-modify-write instruction, emulate the read first.
222
20.0k
         *
223
20.0k
         * Note: Even when an ioreq server is found, its value could become
224
20.0k
         * stale later, because it is possible that
225
20.0k
         *
226
20.0k
         *   - the PIO or MMIO address is removed from the rangeset of the
227
20.0k
         *   ioreq server, before the event is delivered to the device model.
228
20.0k
         *
229
20.0k
         *   - the p2m_ioreq_server type is unmapped from the ioreq server,
230
20.0k
         *   before the event is delivered to the device model.
231
20.0k
         *
232
20.0k
         * However, there's no cheap approach to avoid above situations in xen,
233
20.0k
         * so the device model side needs to check the incoming ioreq event.
234
20.0k
         */
235
20.0k
        struct hvm_ioreq_server *s = NULL;
236
20.0k
        p2m_type_t p2mt = p2m_invalid;
237
20.0k
238
20.0k
        if ( is_mmio )
239
11
        {
240
11
            unsigned long gmfn = paddr_to_pfn(addr);
241
11
242
11
            get_gfn_query_unlocked(currd, gmfn, &p2mt);
243
11
244
11
            if ( p2mt == p2m_ioreq_server )
245
0
            {
246
0
                unsigned int flags;
247
0
248
0
                s = p2m_get_ioreq_server(currd, &flags);
249
0
250
0
                if ( s == NULL )
251
0
                {
252
0
                    rc = X86EMUL_RETRY;
253
0
                    vio->io_req.state = STATE_IOREQ_NONE;
254
0
                    break;
255
0
                }
256
0
257
0
                /*
258
0
                 * This is part of a read-modify-write instruction.
259
0
                 * Emulate the read part so we have the value available.
260
0
                 */
261
0
                if ( dir == IOREQ_READ )
262
0
                {
263
0
                    rc = hvm_process_io_intercept(&ioreq_server_handler, &p);
264
0
                    vio->io_req.state = STATE_IOREQ_NONE;
265
0
                    break;
266
0
                }
267
0
            }
268
11
        }
269
20.0k
270
20.0k
        if ( !s )
271
20.0k
            s = hvm_select_ioreq_server(currd, &p);
272
20.0k
273
20.0k
        /* If there is no suitable backing DM, just ignore accesses */
274
20.0k
        if ( !s )
275
20.0k
        {
276
20.0k
            rc = hvm_process_io_intercept(&null_handler, &p);
277
20.0k
            vio->io_req.state = STATE_IOREQ_NONE;
278
20.0k
        }
279
20.0k
        else
280
0
        {
281
0
            rc = hvm_send_ioreq(s, &p, 0);
282
0
            if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
283
0
                vio->io_req.state = STATE_IOREQ_NONE;
284
0
            else if ( data_is_addr )
285
0
                rc = X86EMUL_OKAY;
286
0
        }
287
20.0k
        break;
288
20.0k
    }
289
0
    case X86EMUL_UNIMPLEMENTED:
290
0
        ASSERT_UNREACHABLE();
291
0
        /* Fall-through */
292
0
    default:
293
0
        BUG();
294
80.2k
    }
295
80.2k
296
80.2k
    ASSERT(rc != X86EMUL_UNIMPLEMENTED);
297
80.2k
298
80.2k
    if ( rc != X86EMUL_OKAY )
299
0
        return rc;
300
80.2k
301
80.2k
 finish_access:
302
80.2k
    if ( dir == IOREQ_READ )
303
76.0k
    {
304
76.0k
        hvmtrace_io_assist(&p);
305
76.0k
306
76.0k
        if ( !data_is_addr )
307
76.0k
            memcpy(p_data, &p.data, size);
308
76.0k
    }
309
80.2k
310
80.2k
    return X86EMUL_OKAY;
311
80.2k
}
312
313
static int hvmemul_do_io_buffer(
314
    bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size,
315
    uint8_t dir, bool_t df, void *buffer)
316
80.2k
{
317
80.2k
    int rc;
318
80.2k
319
80.2k
    BUG_ON(buffer == NULL);
320
80.2k
321
80.2k
    rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0,
322
80.2k
                       (uintptr_t)buffer);
323
80.2k
324
80.2k
    ASSERT(rc != X86EMUL_UNIMPLEMENTED);
325
80.2k
326
80.2k
    if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ )
327
0
        memset(buffer, 0xff, size);
328
80.2k
329
80.2k
    return rc;
330
80.2k
}
331
332
static int hvmemul_acquire_page(unsigned long gmfn, struct page_info **page)
333
0
{
334
0
    struct domain *curr_d = current->domain;
335
0
    p2m_type_t p2mt;
336
0
337
0
    *page = get_page_from_gfn(curr_d, gmfn, &p2mt, P2M_UNSHARE);
338
0
339
0
    if ( *page == NULL )
340
0
        return X86EMUL_UNHANDLEABLE;
341
0
342
0
    if ( p2m_is_paging(p2mt) )
343
0
    {
344
0
        put_page(*page);
345
0
        p2m_mem_paging_populate(curr_d, gmfn);
346
0
        return X86EMUL_RETRY;
347
0
    }
348
0
349
0
    if ( p2m_is_shared(p2mt) )
350
0
    {
351
0
        put_page(*page);
352
0
        return X86EMUL_RETRY;
353
0
    }
354
0
355
0
    /* This code should not be reached if the gmfn is not RAM */
356
0
    if ( p2m_is_mmio(p2mt) )
357
0
    {
358
0
        domain_crash(curr_d);
359
0
360
0
        put_page(*page);
361
0
        return X86EMUL_UNHANDLEABLE;
362
0
    }
363
0
364
0
    return X86EMUL_OKAY;
365
0
}
366
367
static inline void hvmemul_release_page(struct page_info *page)
368
0
{
369
0
    put_page(page);
370
0
}
371
372
static int hvmemul_do_io_addr(
373
    bool_t is_mmio, paddr_t addr, unsigned long *reps,
374
    unsigned int size, uint8_t dir, bool_t df, paddr_t ram_gpa)
375
0
{
376
0
    struct vcpu *v = current;
377
0
    unsigned long ram_gmfn = paddr_to_pfn(ram_gpa);
378
0
    unsigned int page_off = ram_gpa & (PAGE_SIZE - 1);
379
0
    struct page_info *ram_page[2];
380
0
    unsigned int nr_pages = 0;
381
0
    unsigned long count;
382
0
    int rc;
383
0
384
0
    rc = hvmemul_acquire_page(ram_gmfn, &ram_page[nr_pages]);
385
0
    if ( rc != X86EMUL_OKAY )
386
0
        goto out;
387
0
388
0
    nr_pages++;
389
0
390
0
    /* Detemine how many reps will fit within this page */
391
0
    count = min_t(unsigned long,
392
0
                  *reps,
393
0
                  df ?
394
0
                  ((page_off + size - 1) & ~PAGE_MASK) / size :
395
0
                  (PAGE_SIZE - page_off) / size);
396
0
397
0
    if ( count == 0 )
398
0
    {
399
0
        /*
400
0
         * This access must span two pages, so grab a reference to
401
0
         * the next page and do a single rep.
402
0
         * It is safe to assume multiple pages are physically
403
0
         * contiguous at this point as hvmemul_linear_to_phys() will
404
0
         * ensure this is the case.
405
0
         */
406
0
        rc = hvmemul_acquire_page(df ? ram_gmfn - 1 : ram_gmfn + 1,
407
0
                                  &ram_page[nr_pages]);
408
0
        if ( rc != X86EMUL_OKAY )
409
0
            goto out;
410
0
411
0
        nr_pages++;
412
0
        count = 1;
413
0
    }
414
0
415
0
    rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1,
416
0
                       ram_gpa);
417
0
418
0
    ASSERT(rc != X86EMUL_UNIMPLEMENTED);
419
0
420
0
    if ( rc == X86EMUL_OKAY )
421
0
        v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps);
422
0
423
0
    *reps = count;
424
0
425
0
 out:
426
0
    while ( nr_pages )
427
0
        hvmemul_release_page(ram_page[--nr_pages]);
428
0
429
0
    return rc;
430
0
}
431
432
/*
433
 * Perform I/O between <port> and <buffer>. <dir> indicates the
434
 * direction: IOREQ_READ means a read from <port> to <buffer> and
435
 * IOREQ_WRITE means a write from <buffer> to <port>. Each access has
436
 * width <size>.
437
 */
438
int hvmemul_do_pio_buffer(uint16_t port,
439
                          unsigned int size,
440
                          uint8_t dir,
441
                          void *buffer)
442
20.1k
{
443
20.1k
    unsigned long one_rep = 1;
444
20.1k
445
20.1k
    return hvmemul_do_io_buffer(0, port, &one_rep, size, dir, 0, buffer);
446
20.1k
}
447
448
/*
449
 * Perform I/O between <port> and guest RAM starting at <ram_addr>.
450
 * <dir> indicates the direction: IOREQ_READ means a read from <port> to
451
 * RAM and IOREQ_WRITE means a write from RAM to <port>. Each access has
452
 * width <size> and up to *<reps> accesses will be performed. If
453
 * X86EMUL_OKAY is returned then <reps> will be updated with the number
454
 * of accesses actually performed.
455
 * Each access will be done to/from successive RAM addresses, increasing
456
 * if <df> is 0 or decreasing if <df> is 1.
457
 */
458
static int hvmemul_do_pio_addr(uint16_t port,
459
                               unsigned long *reps,
460
                               unsigned int size,
461
                               uint8_t dir,
462
                               bool_t df,
463
                               paddr_t ram_addr)
464
0
{
465
0
    return hvmemul_do_io_addr(0, port, reps, size, dir, df, ram_addr);
466
0
}
467
468
/*
469
 * Perform I/O between MMIO space starting at <mmio_gpa> and <buffer>.
470
 * <dir> indicates the direction: IOREQ_READ means a read from MMIO to
471
 * <buffer> and IOREQ_WRITE means a write from <buffer> to MMIO. Each
472
 * access has width <size> and up to *<reps> accesses will be performed.
473
 * If X86EMUL_OKAY is returned then <reps> will be updated with the number
474
 * of accesses actually performed.
475
 * Each access will be done to/from successive MMIO addresses, increasing
476
 * if <df> is 0 or decreasing if <df> is 1.
477
 *
478
 * NOTE: If *<reps> is greater than 1, each access will use the
479
 *       <buffer> pointer; there is no implicit interation over a
480
 *       block of memory starting at <buffer>.
481
 */
482
static int hvmemul_do_mmio_buffer(paddr_t mmio_gpa,
483
                                  unsigned long *reps,
484
                                  unsigned int size,
485
                                  uint8_t dir,
486
                                  bool_t df,
487
                                  void *buffer)
488
60.1k
{
489
60.1k
    return hvmemul_do_io_buffer(1, mmio_gpa, reps, size, dir, df, buffer);
490
60.1k
}
491
492
/*
493
 * Perform I/O between MMIO space starting at <mmio_gpa> and guest RAM
494
 * starting at <ram_gpa>. <dir> indicates the direction: IOREQ_READ
495
 * means a read from MMIO to RAM and IOREQ_WRITE means a write from RAM
496
 * to MMIO. Each access has width <size> and up to *<reps> accesses will
497
 * be performed. If X86EMUL_OKAY is returned then <reps> will be updated
498
 * with the number of accesses actually performed.
499
 * Each access will be done to/from successive RAM *and* MMIO addresses,
500
 * increasing if <df> is 0 or decreasing if <df> is 1.
501
 */
502
static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
503
                                unsigned long *reps,
504
                                unsigned int size,
505
                                uint8_t dir,
506
                                bool_t df,
507
                                paddr_t ram_gpa)
508
0
{
509
0
    return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
510
0
}
511
512
/*
513
 * Map the frame(s) covering an individual linear access, for writeable
514
 * access.  May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors
515
 * including ERR_PTR(~X86EMUL_OKAY) for write-discard mappings.
516
 *
517
 * In debug builds, map() checks that each slot in hvmemul_ctxt->mfn[] is
518
 * clean before use, and poisions unused slots with INVALID_MFN.
519
 */
520
static void *hvmemul_map_linear_addr(
521
    unsigned long linear, unsigned int bytes, uint32_t pfec,
522
    struct hvm_emulate_ctxt *hvmemul_ctxt)
523
0
{
524
0
    struct vcpu *curr = current;
525
0
    void *err, *mapping;
526
0
    unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
527
0
        (linear >> PAGE_SHIFT) + 1;
528
0
    unsigned int i;
529
0
530
0
    /*
531
0
     * mfn points to the next free slot.  All used slots have a page reference
532
0
     * held on them.
533
0
     */
534
0
    mfn_t *mfn = &hvmemul_ctxt->mfn[0];
535
0
536
0
    /*
537
0
     * The caller has no legitimate reason for trying a zero-byte write, but
538
0
     * all other code here is written to work if the check below was dropped.
539
0
     *
540
0
     * The maximum write size depends on the number of adjacent mfns[] which
541
0
     * can be vmap()'d, accouting for possible misalignment within the region.
542
0
     * The higher level emulation callers are responsible for ensuring that
543
0
     * mfns[] is large enough for the requested write size.
544
0
     */
545
0
    if ( bytes == 0 ||
546
0
         nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) )
547
0
    {
548
0
        ASSERT_UNREACHABLE();
549
0
        goto unhandleable;
550
0
    }
551
0
552
0
    for ( i = 0; i < nr_frames; i++ )
553
0
    {
554
0
        enum hvm_translation_result res;
555
0
        struct page_info *page;
556
0
        pagefault_info_t pfinfo;
557
0
        p2m_type_t p2mt;
558
0
        unsigned long addr = i ? (linear + (i << PAGE_SHIFT)) & PAGE_MASK : linear;
559
0
560
0
        if ( hvmemul_ctxt->ctxt.addr_size < 64 )
561
0
            addr = (uint32_t)addr;
562
0
563
0
        /* Error checking.  Confirm that the current slot is clean. */
564
0
        ASSERT(mfn_x(*mfn) == 0);
565
0
566
0
        res = hvm_translate_get_page(curr, addr, true, pfec,
567
0
                                     &pfinfo, &page, NULL, &p2mt);
568
0
569
0
        switch ( res )
570
0
        {
571
0
        case HVMTRANS_okay:
572
0
            break;
573
0
574
0
        case HVMTRANS_bad_linear_to_gfn:
575
0
            ASSERT(pfinfo.linear == addr);
576
0
            x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
577
0
            err = ERR_PTR(~X86EMUL_EXCEPTION);
578
0
            goto out;
579
0
580
0
        case HVMTRANS_bad_gfn_to_mfn:
581
0
            err = NULL;
582
0
            goto out;
583
0
584
0
        case HVMTRANS_gfn_paged_out:
585
0
        case HVMTRANS_gfn_shared:
586
0
            err = ERR_PTR(~X86EMUL_RETRY);
587
0
            goto out;
588
0
589
0
        default:
590
0
            goto unhandleable;
591
0
        }
592
0
593
0
        *mfn++ = _mfn(page_to_mfn(page));
594
0
595
0
        if ( p2m_is_discard_write(p2mt) )
596
0
        {
597
0
            err = ERR_PTR(~X86EMUL_OKAY);
598
0
            goto out;
599
0
        }
600
0
    }
601
0
602
0
    /* Entire access within a single frame? */
603
0
    if ( nr_frames == 1 )
604
0
        mapping = map_domain_page(hvmemul_ctxt->mfn[0]);
605
0
    /* Multiple frames? Need to vmap(). */
606
0
    else if ( (mapping = vmap(hvmemul_ctxt->mfn,
607
0
                              nr_frames)) == NULL )
608
0
        goto unhandleable;
609
0
610
0
#ifndef NDEBUG /* Poision unused mfn[]s with INVALID_MFN. */
611
0
    while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) )
612
0
    {
613
0
        ASSERT(mfn_x(*mfn) == 0);
614
0
        *mfn++ = INVALID_MFN;
615
0
    }
616
0
#endif
617
0
    return mapping + (linear & ~PAGE_MASK);
618
0
619
0
 unhandleable:
620
0
    err = ERR_PTR(~X86EMUL_UNHANDLEABLE);
621
0
622
0
 out:
623
0
    /* Drop all held references. */
624
0
    while ( mfn-- > hvmemul_ctxt->mfn )
625
0
        put_page(mfn_to_page(mfn_x(*mfn)));
626
0
627
0
    return err;
628
0
}
629
630
static void hvmemul_unmap_linear_addr(
631
    void *mapping, unsigned long linear, unsigned int bytes,
632
    struct hvm_emulate_ctxt *hvmemul_ctxt)
633
0
{
634
0
    struct domain *currd = current->domain;
635
0
    unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
636
0
        (linear >> PAGE_SHIFT) + 1;
637
0
    unsigned int i;
638
0
    mfn_t *mfn = &hvmemul_ctxt->mfn[0];
639
0
640
0
    ASSERT(bytes > 0);
641
0
642
0
    if ( nr_frames == 1 )
643
0
        unmap_domain_page(mapping);
644
0
    else
645
0
        vunmap(mapping);
646
0
647
0
    for ( i = 0; i < nr_frames; i++ )
648
0
    {
649
0
        ASSERT(mfn_valid(*mfn));
650
0
        paging_mark_dirty(currd, *mfn);
651
0
        put_page(mfn_to_page(mfn_x(*mfn)));
652
0
653
0
        *mfn++ = _mfn(0); /* Clean slot for map()'s error checking. */
654
0
    }
655
0
656
0
#ifndef NDEBUG /* Check (and clean) all unused mfns. */
657
0
    while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) )
658
0
    {
659
0
        ASSERT(mfn_eq(*mfn, INVALID_MFN));
660
0
        *mfn++ = _mfn(0);
661
0
    }
662
0
#endif
663
0
}
664
665
/*
666
 * Convert addr from linear to physical form, valid over the range
667
 * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
668
 * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
669
 * @pfec indicates the access checks to be performed during page-table walks.
670
 */
671
static int hvmemul_linear_to_phys(
672
    unsigned long addr,
673
    paddr_t *paddr,
674
    unsigned int bytes_per_rep,
675
    unsigned long *reps,
676
    uint32_t pfec,
677
    struct hvm_emulate_ctxt *hvmemul_ctxt)
678
0
{
679
0
    struct vcpu *curr = current;
680
0
    unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK;
681
0
    int reverse;
682
0
683
0
    /*
684
0
     * Clip repetitions to a sensible maximum. This avoids extensive looping in
685
0
     * this function while still amortising the cost of I/O trap-and-emulate.
686
0
     */
687
0
    *reps = min_t(unsigned long, *reps, 4096);
688
0
689
0
    /* With no paging it's easy: linear == physical. */
690
0
    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
691
0
    {
692
0
        *paddr = addr;
693
0
        return X86EMUL_OKAY;
694
0
    }
695
0
696
0
    /* Reverse mode if this is a backwards multi-iteration string operation. */
697
0
    reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1);
698
0
699
0
    if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) )
700
0
    {
701
0
        /* Do page-straddling first iteration forwards via recursion. */
702
0
        paddr_t _paddr;
703
0
        unsigned long one_rep = 1;
704
0
        int rc = hvmemul_linear_to_phys(
705
0
            addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt);
706
0
        if ( rc != X86EMUL_OKAY )
707
0
            return rc;
708
0
        pfn = _paddr >> PAGE_SHIFT;
709
0
    }
710
0
    else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == gfn_x(INVALID_GFN) )
711
0
    {
712
0
        if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
713
0
            return X86EMUL_RETRY;
714
0
        *reps = 0;
715
0
        x86_emul_pagefault(pfec, addr, &hvmemul_ctxt->ctxt);
716
0
        return X86EMUL_EXCEPTION;
717
0
    }
718
0
719
0
    done = reverse ? bytes_per_rep + offset : PAGE_SIZE - offset;
720
0
    todo = *reps * bytes_per_rep;
721
0
    for ( i = 1; done < todo; i++ )
722
0
    {
723
0
        /* Get the next PFN in the range. */
724
0
        addr += reverse ? -PAGE_SIZE : PAGE_SIZE;
725
0
        npfn = paging_gva_to_gfn(curr, addr, &pfec);
726
0
727
0
        /* Is it contiguous with the preceding PFNs? If not then we're done. */
728
0
        if ( (npfn == gfn_x(INVALID_GFN)) ||
729
0
             (npfn != (pfn + (reverse ? -i : i))) )
730
0
        {
731
0
            if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
732
0
                return X86EMUL_RETRY;
733
0
            done /= bytes_per_rep;
734
0
            if ( done == 0 )
735
0
            {
736
0
                ASSERT(!reverse);
737
0
                if ( npfn != gfn_x(INVALID_GFN) )
738
0
                    return X86EMUL_UNHANDLEABLE;
739
0
                *reps = 0;
740
0
                x86_emul_pagefault(pfec, addr & PAGE_MASK, &hvmemul_ctxt->ctxt);
741
0
                return X86EMUL_EXCEPTION;
742
0
            }
743
0
            *reps = done;
744
0
            break;
745
0
        }
746
0
747
0
        done += PAGE_SIZE;
748
0
    }
749
0
750
0
    *paddr = ((paddr_t)pfn << PAGE_SHIFT) | offset;
751
0
    return X86EMUL_OKAY;
752
0
}
753
    
754
755
static int hvmemul_virtual_to_linear(
756
    enum x86_segment seg,
757
    unsigned long offset,
758
    unsigned int bytes_per_rep,
759
    unsigned long *reps,
760
    enum hvm_access_type access_type,
761
    struct hvm_emulate_ctxt *hvmemul_ctxt,
762
    unsigned long *linear)
763
60.1k
{
764
60.1k
    struct segment_register *reg;
765
60.1k
    int okay;
766
60.1k
    unsigned long max_reps = 4096;
767
60.1k
768
60.1k
    if ( seg == x86_seg_none )
769
0
    {
770
0
        *linear = offset;
771
0
        return X86EMUL_OKAY;
772
0
    }
773
60.1k
774
60.1k
    /*
775
60.1k
     * If introspection has been enabled for this domain, and we're emulating
776
60.1k
     * becase a vm_reply asked us to (i.e. not doing regular IO) reps should
777
60.1k
     * be at most 1, since optimization might otherwise cause a single
778
60.1k
     * vm_event being triggered for repeated writes to a whole page.
779
60.1k
     */
780
60.1k
    if ( unlikely(current->domain->arch.mem_access_emulate_each_rep) &&
781
0
         current->arch.vm_event->emulate_flags != 0 )
782
0
       max_reps = 1;
783
60.1k
784
60.1k
    /*
785
60.1k
     * Clip repetitions to avoid overflow when multiplying by @bytes_per_rep.
786
60.1k
     * The chosen maximum is very conservative but it's what we use in
787
60.1k
     * hvmemul_linear_to_phys() so there is no point in using a larger value.
788
60.1k
     */
789
60.1k
    *reps = min_t(unsigned long, *reps, max_reps);
790
60.1k
791
60.1k
    reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
792
60.1k
    if ( IS_ERR(reg) )
793
0
        return -PTR_ERR(reg);
794
60.1k
795
60.1k
    if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) )
796
0
    {
797
0
        /*
798
0
         * x86_emulate() clips the repetition count to ensure we don't wrap
799
0
         * the effective-address index register. Hence this assertion holds.
800
0
         */
801
0
        ASSERT(offset >= ((*reps - 1) * bytes_per_rep));
802
0
        okay = hvm_virtual_to_linear_addr(
803
0
            seg, reg, offset - (*reps - 1) * bytes_per_rep,
804
0
            *reps * bytes_per_rep, access_type,
805
0
            hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt), linear);
806
0
        *linear += (*reps - 1) * bytes_per_rep;
807
0
        if ( hvmemul_ctxt->ctxt.addr_size != 64 )
808
0
            *linear = (uint32_t)*linear;
809
0
    }
810
60.1k
    else
811
60.1k
    {
812
60.1k
        okay = hvm_virtual_to_linear_addr(
813
60.1k
            seg, reg, offset, *reps * bytes_per_rep, access_type,
814
60.1k
            hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt), linear);
815
60.1k
    }
816
60.1k
817
60.1k
    if ( okay )
818
60.1k
        return X86EMUL_OKAY;
819
60.1k
820
60.1k
    /* If this is a string operation, emulate each iteration separately. */
821
0
    if ( *reps != 1 )
822
0
        return X86EMUL_UNHANDLEABLE;
823
0
824
0
    /*
825
0
     * Leave exception injection to the caller for non-user segments: We
826
0
     * neither know the exact error code to be used, nor can we easily
827
0
     * determine the kind of exception (#GP or #TS) in that case.
828
0
     */
829
0
    *reps = 0;
830
0
    if ( is_x86_user_segment(seg) )
831
0
        x86_emul_hw_exception((seg == x86_seg_ss)
832
0
                              ? TRAP_stack_error
833
0
                              : TRAP_gp_fault, 0, &hvmemul_ctxt->ctxt);
834
0
835
0
    return X86EMUL_EXCEPTION;
836
0
}
837
838
static int hvmemul_phys_mmio_access(
839
    struct hvm_mmio_cache *cache, paddr_t gpa, unsigned int size, uint8_t dir,
840
    uint8_t *buffer, unsigned int offset)
841
60.1k
{
842
60.1k
    unsigned long one_rep = 1;
843
60.1k
    unsigned int chunk;
844
60.1k
    int rc = X86EMUL_OKAY;
845
60.1k
846
60.1k
    /* Accesses must fall within a page. */
847
60.1k
    BUG_ON((gpa & ~PAGE_MASK) + size > PAGE_SIZE);
848
60.1k
849
60.1k
    /*
850
60.1k
     * hvmemul_do_io() cannot handle non-power-of-2 accesses or
851
60.1k
     * accesses larger than sizeof(long), so choose the highest power
852
60.1k
     * of 2 not exceeding sizeof(long) as the 'chunk' size.
853
60.1k
     */
854
60.1k
    ASSERT(size != 0);
855
60.1k
    chunk = 1u << (fls(size) - 1);
856
60.1k
    if ( chunk > sizeof (long) )
857
0
        chunk = sizeof (long);
858
60.1k
859
60.1k
    for ( ;; )
860
60.1k
    {
861
60.1k
        /* Have we already done this chunk? */
862
60.1k
        if ( offset < cache->size )
863
0
        {
864
0
            ASSERT((offset + chunk) <= cache->size);
865
0
866
0
            if ( dir == IOREQ_READ )
867
0
                memcpy(&buffer[offset], &cache->buffer[offset], chunk);
868
0
            else if ( memcmp(&buffer[offset], &cache->buffer[offset], chunk) != 0 )
869
0
                domain_crash(current->domain);
870
0
        }
871
60.1k
        else
872
60.1k
        {
873
60.1k
            ASSERT(offset == cache->size);
874
60.1k
875
60.1k
            rc = hvmemul_do_mmio_buffer(gpa, &one_rep, chunk, dir, 0,
876
60.1k
                                        &buffer[offset]);
877
60.1k
            if ( rc != X86EMUL_OKAY )
878
0
                break;
879
60.1k
880
60.1k
            /* Note that we have now done this chunk. */
881
60.1k
            memcpy(&cache->buffer[offset], &buffer[offset], chunk);
882
60.1k
            cache->size += chunk;
883
60.1k
        }
884
60.1k
885
60.1k
        /* Advance to the next chunk. */
886
60.1k
        gpa += chunk;
887
60.1k
        offset += chunk;
888
60.1k
        size -= chunk;
889
60.1k
890
60.1k
        if ( size == 0 )
891
60.1k
            break;
892
60.1k
893
60.1k
        /*
894
60.1k
         * If the chunk now exceeds the remaining size, choose the next
895
60.1k
         * lowest power of 2 that will fit.
896
60.1k
         */
897
0
        while ( chunk > size )
898
0
            chunk >>= 1;
899
0
    }
900
60.1k
901
60.1k
    return rc;
902
60.1k
}
903
904
/*
905
 * Multi-cycle MMIO handling is based upon the assumption that emulation
906
 * of the same instruction will not access the same MMIO region more
907
 * than once. Hence we can deal with re-emulation (for secondary or
908
 * subsequent cycles) by looking up the result or previous I/O in a
909
 * cache indexed by linear MMIO address.
910
 */
911
static struct hvm_mmio_cache *hvmemul_find_mmio_cache(
912
    struct hvm_vcpu_io *vio, unsigned long gla, uint8_t dir)
913
60.1k
{
914
60.1k
    unsigned int i;
915
60.1k
    struct hvm_mmio_cache *cache;
916
60.1k
917
60.1k
    for ( i = 0; i < vio->mmio_cache_count; i ++ )
918
0
    {
919
0
        cache = &vio->mmio_cache[i];
920
0
921
0
        if ( gla == cache->gla &&
922
0
             dir == cache->dir )
923
0
            return cache;
924
0
    }
925
60.1k
926
60.1k
    i = vio->mmio_cache_count++;
927
60.1k
    if( i == ARRAY_SIZE(vio->mmio_cache) )
928
0
    {
929
0
        domain_crash(current->domain);
930
0
        return NULL;
931
0
    }
932
60.1k
933
60.1k
    cache = &vio->mmio_cache[i];
934
60.1k
    memset(cache, 0, sizeof (*cache));
935
60.1k
936
60.1k
    cache->gla = gla;
937
60.1k
    cache->dir = dir;
938
60.1k
939
60.1k
    return cache;
940
60.1k
}
941
942
static void latch_linear_to_phys(struct hvm_vcpu_io *vio, unsigned long gla,
943
                                 unsigned long gpa, bool_t write)
944
0
{
945
0
    if ( vio->mmio_access.gla_valid )
946
0
        return;
947
0
948
0
    vio->mmio_gla = gla & PAGE_MASK;
949
0
    vio->mmio_gpfn = PFN_DOWN(gpa);
950
0
    vio->mmio_access = (struct npfec){ .gla_valid = 1,
951
0
                                       .read_access = 1,
952
0
                                       .write_access = write };
953
0
}
954
955
static int hvmemul_linear_mmio_access(
956
    unsigned long gla, unsigned int size, uint8_t dir, void *buffer,
957
    uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt, bool_t known_gpfn)
958
60.1k
{
959
60.1k
    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
960
60.1k
    unsigned long offset = gla & ~PAGE_MASK;
961
60.1k
    struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(vio, gla, dir);
962
60.1k
    unsigned int chunk, buffer_offset = 0;
963
60.1k
    paddr_t gpa;
964
60.1k
    unsigned long one_rep = 1;
965
60.1k
    int rc;
966
60.1k
967
60.1k
    if ( cache == NULL )
968
0
        return X86EMUL_UNHANDLEABLE;
969
60.1k
970
60.1k
    chunk = min_t(unsigned int, size, PAGE_SIZE - offset);
971
60.1k
972
60.1k
    if ( known_gpfn )
973
60.1k
        gpa = pfn_to_paddr(vio->mmio_gpfn) | offset;
974
60.1k
    else
975
0
    {
976
0
        rc = hvmemul_linear_to_phys(gla, &gpa, chunk, &one_rep, pfec,
977
0
                                    hvmemul_ctxt);
978
0
        if ( rc != X86EMUL_OKAY )
979
0
            return rc;
980
0
981
0
        latch_linear_to_phys(vio, gla, gpa, dir == IOREQ_WRITE);
982
0
    }
983
60.1k
984
60.1k
    for ( ;; )
985
60.1k
    {
986
60.1k
        rc = hvmemul_phys_mmio_access(cache, gpa, chunk, dir, buffer, buffer_offset);
987
60.1k
        if ( rc != X86EMUL_OKAY )
988
0
            break;
989
60.1k
990
60.1k
        gla += chunk;
991
60.1k
        buffer_offset += chunk;
992
60.1k
        size -= chunk;
993
60.1k
994
60.1k
        if ( size == 0 )
995
60.1k
            break;
996
60.1k
997
0
        chunk = min_t(unsigned int, size, PAGE_SIZE);
998
0
        rc = hvmemul_linear_to_phys(gla, &gpa, chunk, &one_rep, pfec,
999
0
                                    hvmemul_ctxt);
1000
0
        if ( rc != X86EMUL_OKAY )
1001
0
            return rc;
1002
0
    }
1003
60.1k
1004
60.1k
    return rc;
1005
60.1k
}
1006
1007
static inline int hvmemul_linear_mmio_read(
1008
    unsigned long gla, unsigned int size, void *buffer,
1009
    uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
1010
    bool_t translate)
1011
56.0k
{
1012
56.0k
    return hvmemul_linear_mmio_access(gla, size, IOREQ_READ, buffer,
1013
56.0k
                                      pfec, hvmemul_ctxt, translate);
1014
56.0k
}
1015
1016
static inline int hvmemul_linear_mmio_write(
1017
    unsigned long gla, unsigned int size, void *buffer,
1018
    uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
1019
    bool_t translate)
1020
4.11k
{
1021
4.11k
    return hvmemul_linear_mmio_access(gla, size, IOREQ_WRITE, buffer,
1022
4.11k
                                      pfec, hvmemul_ctxt, translate);
1023
4.11k
}
1024
1025
static int __hvmemul_read(
1026
    enum x86_segment seg,
1027
    unsigned long offset,
1028
    void *p_data,
1029
    unsigned int bytes,
1030
    enum hvm_access_type access_type,
1031
    struct hvm_emulate_ctxt *hvmemul_ctxt)
1032
56.0k
{
1033
56.0k
    struct vcpu *curr = current;
1034
56.0k
    pagefault_info_t pfinfo;
1035
56.0k
    unsigned long addr, reps = 1;
1036
56.0k
    uint32_t pfec = PFEC_page_present;
1037
56.0k
    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
1038
56.0k
    int rc;
1039
56.0k
1040
56.0k
    if ( is_x86_system_segment(seg) )
1041
0
        pfec |= PFEC_implicit;
1042
56.0k
    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1043
0
        pfec |= PFEC_user_mode;
1044
56.0k
1045
56.0k
    rc = hvmemul_virtual_to_linear(
1046
56.0k
        seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr);
1047
56.0k
    if ( rc != X86EMUL_OKAY || !bytes )
1048
0
        return rc;
1049
56.0k
    if ( ((access_type != hvm_access_insn_fetch
1050
56.0k
           ? vio->mmio_access.read_access
1051
0
           : vio->mmio_access.insn_fetch)) &&
1052
56.0k
         (vio->mmio_gla == (addr & PAGE_MASK)) )
1053
56.0k
        return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec, hvmemul_ctxt, 1);
1054
56.0k
1055
0
    rc = ((access_type == hvm_access_insn_fetch) ?
1056
0
          hvm_fetch_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo) :
1057
0
          hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo));
1058
0
1059
0
    switch ( rc )
1060
0
    {
1061
0
    case HVMTRANS_okay:
1062
0
        break;
1063
0
    case HVMTRANS_bad_linear_to_gfn:
1064
0
        x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
1065
0
        return X86EMUL_EXCEPTION;
1066
0
    case HVMTRANS_bad_gfn_to_mfn:
1067
0
        if ( access_type == hvm_access_insn_fetch )
1068
0
            return X86EMUL_UNHANDLEABLE;
1069
0
1070
0
        return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec, hvmemul_ctxt, 0);
1071
0
    case HVMTRANS_gfn_paged_out:
1072
0
    case HVMTRANS_gfn_shared:
1073
0
        return X86EMUL_RETRY;
1074
0
    default:
1075
0
        return X86EMUL_UNHANDLEABLE;
1076
0
    }
1077
0
1078
0
    return X86EMUL_OKAY;
1079
0
}
1080
1081
static int hvmemul_read(
1082
    enum x86_segment seg,
1083
    unsigned long offset,
1084
    void *p_data,
1085
    unsigned int bytes,
1086
    struct x86_emulate_ctxt *ctxt)
1087
56.0k
{
1088
56.0k
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1089
56.0k
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1090
56.0k
1091
56.0k
    if ( unlikely(hvmemul_ctxt->set_context) )
1092
0
        return set_context_data(p_data, bytes);
1093
56.0k
1094
56.0k
    return __hvmemul_read(
1095
56.0k
        seg, offset, p_data, bytes, hvm_access_read,
1096
56.0k
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
1097
56.0k
}
1098
1099
int hvmemul_insn_fetch(
1100
    enum x86_segment seg,
1101
    unsigned long offset,
1102
    void *p_data,
1103
    unsigned int bytes,
1104
    struct x86_emulate_ctxt *ctxt)
1105
190k
{
1106
190k
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1107
190k
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1108
190k
    /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
1109
190k
    uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
1110
190k
1111
190k
    /*
1112
190k
     * Fall back if requested bytes are not in the prefetch cache.
1113
190k
     * But always perform the (fake) read when bytes == 0.
1114
190k
     */
1115
190k
    if ( !bytes ||
1116
190k
         unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) )
1117
0
    {
1118
0
        int rc = __hvmemul_read(seg, offset, p_data, bytes,
1119
0
                                hvm_access_insn_fetch, hvmemul_ctxt);
1120
0
1121
0
        if ( rc == X86EMUL_OKAY && bytes )
1122
0
        {
1123
0
            /*
1124
0
             * Will we overflow insn_buf[]?  This shouldn't be able to happen,
1125
0
             * which means something went wrong with instruction decoding...
1126
0
             */
1127
0
            if ( insn_off >= sizeof(hvmemul_ctxt->insn_buf) ||
1128
0
                 insn_off + bytes > sizeof(hvmemul_ctxt->insn_buf) )
1129
0
            {
1130
0
                ASSERT_UNREACHABLE();
1131
0
                return X86EMUL_UNHANDLEABLE;
1132
0
            }
1133
0
1134
0
            memcpy(&hvmemul_ctxt->insn_buf[insn_off], p_data, bytes);
1135
0
            hvmemul_ctxt->insn_buf_bytes = insn_off + bytes;
1136
0
        }
1137
0
1138
0
        return rc;
1139
0
    }
1140
190k
1141
190k
    /* Hit the cache. Simple memcpy. */
1142
190k
    memcpy(p_data, &hvmemul_ctxt->insn_buf[insn_off], bytes);
1143
190k
    return X86EMUL_OKAY;
1144
190k
}
1145
1146
static int hvmemul_write(
1147
    enum x86_segment seg,
1148
    unsigned long offset,
1149
    void *p_data,
1150
    unsigned int bytes,
1151
    struct x86_emulate_ctxt *ctxt)
1152
4.11k
{
1153
4.11k
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1154
4.11k
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1155
4.11k
    struct vcpu *curr = current;
1156
4.11k
    unsigned long addr, reps = 1;
1157
4.11k
    uint32_t pfec = PFEC_page_present | PFEC_write_access;
1158
4.11k
    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
1159
4.11k
    int rc;
1160
4.11k
    void *mapping;
1161
4.11k
1162
4.11k
    if ( is_x86_system_segment(seg) )
1163
0
        pfec |= PFEC_implicit;
1164
4.11k
    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1165
0
        pfec |= PFEC_user_mode;
1166
4.11k
1167
4.11k
    rc = hvmemul_virtual_to_linear(
1168
4.11k
        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
1169
4.11k
    if ( rc != X86EMUL_OKAY || !bytes )
1170
0
        return rc;
1171
4.11k
1172
4.11k
    if ( vio->mmio_access.write_access &&
1173
4.11k
         (vio->mmio_gla == (addr & PAGE_MASK)) )
1174
4.11k
        return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 1);
1175
4.11k
1176
0
    mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
1177
0
    if ( IS_ERR(mapping) )
1178
0
        return ~PTR_ERR(mapping);
1179
0
1180
0
    if ( !mapping )
1181
0
        return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 0);
1182
0
1183
0
    memcpy(mapping, p_data, bytes);
1184
0
1185
0
    hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
1186
0
1187
0
    return X86EMUL_OKAY;
1188
0
}
1189
1190
static int hvmemul_write_discard(
1191
    enum x86_segment seg,
1192
    unsigned long offset,
1193
    void *p_data,
1194
    unsigned int bytes,
1195
    struct x86_emulate_ctxt *ctxt)
1196
0
{
1197
0
    /* Discarding the write. */
1198
0
    return X86EMUL_OKAY;
1199
0
}
1200
1201
static int hvmemul_rep_ins_discard(
1202
    uint16_t src_port,
1203
    enum x86_segment dst_seg,
1204
    unsigned long dst_offset,
1205
    unsigned int bytes_per_rep,
1206
    unsigned long *reps,
1207
    struct x86_emulate_ctxt *ctxt)
1208
0
{
1209
0
    return X86EMUL_OKAY;
1210
0
}
1211
1212
static int hvmemul_rep_movs_discard(
1213
   enum x86_segment src_seg,
1214
   unsigned long src_offset,
1215
   enum x86_segment dst_seg,
1216
   unsigned long dst_offset,
1217
   unsigned int bytes_per_rep,
1218
   unsigned long *reps,
1219
   struct x86_emulate_ctxt *ctxt)
1220
0
{
1221
0
    return X86EMUL_OKAY;
1222
0
}
1223
1224
static int hvmemul_rep_stos_discard(
1225
    void *p_data,
1226
    enum x86_segment seg,
1227
    unsigned long offset,
1228
    unsigned int bytes_per_rep,
1229
    unsigned long *reps,
1230
    struct x86_emulate_ctxt *ctxt)
1231
0
{
1232
0
    return X86EMUL_OKAY;
1233
0
}
1234
1235
static int hvmemul_rep_outs_discard(
1236
    enum x86_segment src_seg,
1237
    unsigned long src_offset,
1238
    uint16_t dst_port,
1239
    unsigned int bytes_per_rep,
1240
    unsigned long *reps,
1241
    struct x86_emulate_ctxt *ctxt)
1242
0
{
1243
0
    return X86EMUL_OKAY;
1244
0
}
1245
1246
static int hvmemul_cmpxchg_discard(
1247
    enum x86_segment seg,
1248
    unsigned long offset,
1249
    void *p_old,
1250
    void *p_new,
1251
    unsigned int bytes,
1252
    struct x86_emulate_ctxt *ctxt)
1253
0
{
1254
0
    return X86EMUL_OKAY;
1255
0
}
1256
1257
static int hvmemul_read_io_discard(
1258
    unsigned int port,
1259
    unsigned int bytes,
1260
    unsigned long *val,
1261
    struct x86_emulate_ctxt *ctxt)
1262
0
{
1263
0
    return X86EMUL_OKAY;
1264
0
}
1265
1266
static int hvmemul_write_io_discard(
1267
    unsigned int port,
1268
    unsigned int bytes,
1269
    unsigned long val,
1270
    struct x86_emulate_ctxt *ctxt)
1271
0
{
1272
0
    return X86EMUL_OKAY;
1273
0
}
1274
1275
static int hvmemul_write_msr_discard(
1276
    unsigned int reg,
1277
    uint64_t val,
1278
    struct x86_emulate_ctxt *ctxt)
1279
0
{
1280
0
    return X86EMUL_OKAY;
1281
0
}
1282
1283
static int hvmemul_wbinvd_discard(
1284
    struct x86_emulate_ctxt *ctxt)
1285
0
{
1286
0
    return X86EMUL_OKAY;
1287
0
}
1288
1289
static int hvmemul_cmpxchg(
1290
    enum x86_segment seg,
1291
    unsigned long offset,
1292
    void *p_old,
1293
    void *p_new,
1294
    unsigned int bytes,
1295
    struct x86_emulate_ctxt *ctxt)
1296
0
{
1297
0
    /* Fix this in case the guest is really relying on r-m-w atomicity. */
1298
0
    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
1299
0
}
1300
1301
static int hvmemul_validate(
1302
    const struct x86_emulate_state *state,
1303
    struct x86_emulate_ctxt *ctxt)
1304
60.1k
{
1305
60.1k
    const struct hvm_emulate_ctxt *hvmemul_ctxt =
1306
60.1k
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1307
60.1k
1308
60.1k
    return !hvmemul_ctxt->validate || hvmemul_ctxt->validate(state, ctxt)
1309
60.1k
           ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
1310
60.1k
}
1311
1312
static int hvmemul_rep_ins(
1313
    uint16_t src_port,
1314
    enum x86_segment dst_seg,
1315
    unsigned long dst_offset,
1316
    unsigned int bytes_per_rep,
1317
    unsigned long *reps,
1318
    struct x86_emulate_ctxt *ctxt)
1319
0
{
1320
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1321
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1322
0
    unsigned long addr;
1323
0
    uint32_t pfec = PFEC_page_present | PFEC_write_access;
1324
0
    paddr_t gpa;
1325
0
    p2m_type_t p2mt;
1326
0
    int rc;
1327
0
1328
0
    rc = hvmemul_virtual_to_linear(
1329
0
        dst_seg, dst_offset, bytes_per_rep, reps, hvm_access_write,
1330
0
        hvmemul_ctxt, &addr);
1331
0
    if ( rc != X86EMUL_OKAY )
1332
0
        return rc;
1333
0
1334
0
    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1335
0
        pfec |= PFEC_user_mode;
1336
0
1337
0
    rc = hvmemul_linear_to_phys(
1338
0
        addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
1339
0
    if ( rc != X86EMUL_OKAY )
1340
0
        return rc;
1341
0
1342
0
    (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1343
0
    if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm )
1344
0
        return X86EMUL_UNHANDLEABLE;
1345
0
1346
0
    return hvmemul_do_pio_addr(src_port, reps, bytes_per_rep, IOREQ_READ,
1347
0
                               !!(ctxt->regs->eflags & X86_EFLAGS_DF), gpa);
1348
0
}
1349
1350
static int hvmemul_rep_outs_set_context(
1351
    enum x86_segment src_seg,
1352
    unsigned long src_offset,
1353
    uint16_t dst_port,
1354
    unsigned int bytes_per_rep,
1355
    unsigned long *reps,
1356
    struct x86_emulate_ctxt *ctxt)
1357
0
{
1358
0
    unsigned int bytes = *reps * bytes_per_rep;
1359
0
    char *buf;
1360
0
    int rc;
1361
0
1362
0
    buf = xmalloc_array(char, bytes);
1363
0
1364
0
    if ( buf == NULL )
1365
0
        return X86EMUL_UNHANDLEABLE;
1366
0
1367
0
    rc = set_context_data(buf, bytes);
1368
0
1369
0
    if ( rc == X86EMUL_OKAY )
1370
0
        rc = hvmemul_do_pio_buffer(dst_port, bytes, IOREQ_WRITE, buf);
1371
0
1372
0
    xfree(buf);
1373
0
1374
0
    return rc;
1375
0
}
1376
1377
static int hvmemul_rep_outs(
1378
    enum x86_segment src_seg,
1379
    unsigned long src_offset,
1380
    uint16_t dst_port,
1381
    unsigned int bytes_per_rep,
1382
    unsigned long *reps,
1383
    struct x86_emulate_ctxt *ctxt)
1384
0
{
1385
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1386
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1387
0
    unsigned long addr;
1388
0
    uint32_t pfec = PFEC_page_present;
1389
0
    paddr_t gpa;
1390
0
    p2m_type_t p2mt;
1391
0
    int rc;
1392
0
1393
0
    if ( unlikely(hvmemul_ctxt->set_context) )
1394
0
        return hvmemul_rep_outs_set_context(src_seg, src_offset, dst_port,
1395
0
                                            bytes_per_rep, reps, ctxt);
1396
0
1397
0
    rc = hvmemul_virtual_to_linear(
1398
0
        src_seg, src_offset, bytes_per_rep, reps, hvm_access_read,
1399
0
        hvmemul_ctxt, &addr);
1400
0
    if ( rc != X86EMUL_OKAY )
1401
0
        return rc;
1402
0
1403
0
    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1404
0
        pfec |= PFEC_user_mode;
1405
0
1406
0
    rc = hvmemul_linear_to_phys(
1407
0
        addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
1408
0
    if ( rc != X86EMUL_OKAY )
1409
0
        return rc;
1410
0
1411
0
    (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1412
0
    if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm )
1413
0
        return X86EMUL_UNHANDLEABLE;
1414
0
1415
0
    return hvmemul_do_pio_addr(dst_port, reps, bytes_per_rep, IOREQ_WRITE,
1416
0
                               !!(ctxt->regs->eflags & X86_EFLAGS_DF), gpa);
1417
0
}
1418
1419
static int hvmemul_rep_movs(
1420
   enum x86_segment src_seg,
1421
   unsigned long src_offset,
1422
   enum x86_segment dst_seg,
1423
   unsigned long dst_offset,
1424
   unsigned int bytes_per_rep,
1425
   unsigned long *reps,
1426
   struct x86_emulate_ctxt *ctxt)
1427
0
{
1428
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1429
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1430
0
    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
1431
0
    unsigned long saddr, daddr, bytes;
1432
0
    paddr_t sgpa, dgpa;
1433
0
    uint32_t pfec = PFEC_page_present;
1434
0
    p2m_type_t sp2mt, dp2mt;
1435
0
    int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
1436
0
    char *buf;
1437
0
1438
0
    rc = hvmemul_virtual_to_linear(
1439
0
        src_seg, src_offset, bytes_per_rep, reps, hvm_access_read,
1440
0
        hvmemul_ctxt, &saddr);
1441
0
    if ( rc != X86EMUL_OKAY )
1442
0
        return rc;
1443
0
1444
0
    rc = hvmemul_virtual_to_linear(
1445
0
        dst_seg, dst_offset, bytes_per_rep, reps, hvm_access_write,
1446
0
        hvmemul_ctxt, &daddr);
1447
0
    if ( rc != X86EMUL_OKAY )
1448
0
        return rc;
1449
0
1450
0
    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1451
0
        pfec |= PFEC_user_mode;
1452
0
1453
0
    if ( vio->mmio_access.read_access &&
1454
0
         (vio->mmio_gla == (saddr & PAGE_MASK)) &&
1455
0
         /*
1456
0
          * Upon initial invocation don't truncate large batches just because
1457
0
          * of a hit for the translation: Doing the guest page table walk is
1458
0
          * cheaper than multiple round trips through the device model. Yet
1459
0
          * when processing a response we can always re-use the translation.
1460
0
          */
1461
0
         (vio->io_req.state == STATE_IORESP_READY ||
1462
0
          ((!df || *reps == 1) &&
1463
0
           PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1464
0
        sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
1465
0
    else
1466
0
    {
1467
0
        rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
1468
0
                                    hvmemul_ctxt);
1469
0
        if ( rc != X86EMUL_OKAY )
1470
0
            return rc;
1471
0
    }
1472
0
1473
0
    bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
1474
0
    if ( vio->mmio_access.write_access &&
1475
0
         (vio->mmio_gla == (daddr & PAGE_MASK)) &&
1476
0
         /* See comment above. */
1477
0
         (vio->io_req.state == STATE_IORESP_READY ||
1478
0
          ((!df || *reps == 1) &&
1479
0
           PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1480
0
        dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
1481
0
    else
1482
0
    {
1483
0
        rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
1484
0
                                    pfec | PFEC_write_access, hvmemul_ctxt);
1485
0
        if ( rc != X86EMUL_OKAY )
1486
0
            return rc;
1487
0
    }
1488
0
1489
0
    /* Check for MMIO ops */
1490
0
    (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT, &sp2mt);
1491
0
    (void) get_gfn_query_unlocked(current->domain, dgpa >> PAGE_SHIFT, &dp2mt);
1492
0
1493
0
    if ( sp2mt == p2m_mmio_direct || dp2mt == p2m_mmio_direct ||
1494
0
         (sp2mt == p2m_mmio_dm && dp2mt == p2m_mmio_dm) )
1495
0
        return X86EMUL_UNHANDLEABLE;
1496
0
1497
0
    if ( sp2mt == p2m_mmio_dm )
1498
0
    {
1499
0
        latch_linear_to_phys(vio, saddr, sgpa, 0);
1500
0
        return hvmemul_do_mmio_addr(
1501
0
            sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
1502
0
    }
1503
0
1504
0
    if ( dp2mt == p2m_mmio_dm )
1505
0
    {
1506
0
        latch_linear_to_phys(vio, daddr, dgpa, 1);
1507
0
        return hvmemul_do_mmio_addr(
1508
0
            dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
1509
0
    }
1510
0
1511
0
    /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa, bytes). */
1512
0
    bytes = *reps * bytes_per_rep;
1513
0
1514
0
    /* Adjust source address for reverse copy. */
1515
0
    if ( df )
1516
0
        sgpa -= bytes - bytes_per_rep;
1517
0
1518
0
    /*
1519
0
     * Will first iteration copy fall within source range? If not then entire
1520
0
     * copy does not corrupt itself. If so, then this is more complex than
1521
0
     * can be emulated by a source-to-buffer-to-destination block copy.
1522
0
     */
1523
0
    if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) )
1524
0
        return X86EMUL_UNHANDLEABLE;
1525
0
1526
0
    /* Adjust destination address for reverse copy. */
1527
0
    if ( df )
1528
0
        dgpa -= bytes - bytes_per_rep;
1529
0
1530
0
    /* Allocate temporary buffer. Fall back to slow emulation if this fails. */
1531
0
    buf = xmalloc_bytes(bytes);
1532
0
    if ( buf == NULL )
1533
0
        return X86EMUL_UNHANDLEABLE;
1534
0
1535
0
    if ( unlikely(hvmemul_ctxt->set_context) )
1536
0
    {
1537
0
        rc = set_context_data(buf, bytes);
1538
0
1539
0
        if ( rc != X86EMUL_OKAY)
1540
0
        {
1541
0
            xfree(buf);
1542
0
            return rc;
1543
0
        }
1544
0
1545
0
        rc = HVMTRANS_okay;
1546
0
    }
1547
0
    else
1548
0
        /*
1549
0
         * We do a modicum of checking here, just for paranoia's sake and to
1550
0
         * definitely avoid copying an unitialised buffer into guest address
1551
0
         * space.
1552
0
         */
1553
0
        rc = hvm_copy_from_guest_phys(buf, sgpa, bytes);
1554
0
1555
0
    if ( rc == HVMTRANS_okay )
1556
0
        rc = hvm_copy_to_guest_phys(dgpa, buf, bytes, current);
1557
0
1558
0
    xfree(buf);
1559
0
1560
0
    if ( rc == HVMTRANS_gfn_paged_out )
1561
0
        return X86EMUL_RETRY;
1562
0
    if ( rc == HVMTRANS_gfn_shared )
1563
0
        return X86EMUL_RETRY;
1564
0
    if ( rc != HVMTRANS_okay )
1565
0
    {
1566
0
        gdprintk(XENLOG_WARNING, "Failed memory-to-memory REP MOVS: sgpa=%"
1567
0
                 PRIpaddr" dgpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
1568
0
                 sgpa, dgpa, *reps, bytes_per_rep);
1569
0
        return X86EMUL_UNHANDLEABLE;
1570
0
    }
1571
0
1572
0
    return X86EMUL_OKAY;
1573
0
}
1574
1575
static int hvmemul_rep_stos(
1576
    void *p_data,
1577
    enum x86_segment seg,
1578
    unsigned long offset,
1579
    unsigned int bytes_per_rep,
1580
    unsigned long *reps,
1581
    struct x86_emulate_ctxt *ctxt)
1582
0
{
1583
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1584
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1585
0
    struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
1586
0
    unsigned long addr, bytes;
1587
0
    paddr_t gpa;
1588
0
    p2m_type_t p2mt;
1589
0
    bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
1590
0
    int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
1591
0
                                       hvm_access_write, hvmemul_ctxt, &addr);
1592
0
1593
0
    if ( rc != X86EMUL_OKAY )
1594
0
        return rc;
1595
0
1596
0
    bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
1597
0
    if ( vio->mmio_access.write_access &&
1598
0
         (vio->mmio_gla == (addr & PAGE_MASK)) &&
1599
0
         /* See respective comment in MOVS processing. */
1600
0
         (vio->io_req.state == STATE_IORESP_READY ||
1601
0
          ((!df || *reps == 1) &&
1602
0
           PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1603
0
        gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
1604
0
    else
1605
0
    {
1606
0
        uint32_t pfec = PFEC_page_present | PFEC_write_access;
1607
0
1608
0
        if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1609
0
            pfec |= PFEC_user_mode;
1610
0
1611
0
        rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
1612
0
                                    hvmemul_ctxt);
1613
0
        if ( rc != X86EMUL_OKAY )
1614
0
            return rc;
1615
0
    }
1616
0
1617
0
    /* Check for MMIO op */
1618
0
    (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1619
0
1620
0
    switch ( p2mt )
1621
0
    {
1622
0
        unsigned long bytes;
1623
0
        void *buf;
1624
0
1625
0
    default:
1626
0
        /* Allocate temporary buffer. */
1627
0
        for ( ; ; )
1628
0
        {
1629
0
            bytes = *reps * bytes_per_rep;
1630
0
            buf = xmalloc_bytes(bytes);
1631
0
            if ( buf || *reps <= 1 )
1632
0
                break;
1633
0
            *reps >>= 1;
1634
0
        }
1635
0
1636
0
        if ( !buf )
1637
0
            buf = p_data;
1638
0
        else
1639
0
            switch ( bytes_per_rep )
1640
0
            {
1641
0
                unsigned long dummy;
1642
0
1643
0
#define CASE(bits, suffix)                                     \
1644
0
            case (bits) / 8:                                   \
1645
0
                asm ( "rep stos" #suffix                       \
1646
0
                      : "=m" (*(char (*)[bytes])buf),          \
1647
0
                        "=D" (dummy), "=c" (dummy)             \
1648
0
                      : "a" (*(const uint##bits##_t *)p_data), \
1649
0
                         "1" (buf), "2" (*reps) );             \
1650
0
                break
1651
0
            CASE(8, b);
1652
0
            CASE(16, w);
1653
0
            CASE(32, l);
1654
0
            CASE(64, q);
1655
0
#undef CASE
1656
0
1657
0
            default:
1658
0
                ASSERT_UNREACHABLE();
1659
0
                xfree(buf);
1660
0
                return X86EMUL_UNHANDLEABLE;
1661
0
            }
1662
0
1663
0
        /* Adjust address for reverse store. */
1664
0
        if ( df )
1665
0
            gpa -= bytes - bytes_per_rep;
1666
0
1667
0
        rc = hvm_copy_to_guest_phys(gpa, buf, bytes, current);
1668
0
1669
0
        if ( buf != p_data )
1670
0
            xfree(buf);
1671
0
1672
0
        switch ( rc )
1673
0
        {
1674
0
        case HVMTRANS_gfn_paged_out:
1675
0
        case HVMTRANS_gfn_shared:
1676
0
            return X86EMUL_RETRY;
1677
0
        case HVMTRANS_okay:
1678
0
            return X86EMUL_OKAY;
1679
0
        }
1680
0
1681
0
        gdprintk(XENLOG_WARNING,
1682
0
                 "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
1683
0
                 gpa, *reps, bytes_per_rep);
1684
0
        /* fall through */
1685
0
    case p2m_mmio_direct:
1686
0
        return X86EMUL_UNHANDLEABLE;
1687
0
1688
0
    case p2m_mmio_dm:
1689
0
        latch_linear_to_phys(vio, addr, gpa, 1);
1690
0
        return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep, IOREQ_WRITE, df,
1691
0
                                      p_data);
1692
0
    }
1693
0
}
1694
1695
static int hvmemul_read_segment(
1696
    enum x86_segment seg,
1697
    struct segment_register *reg,
1698
    struct x86_emulate_ctxt *ctxt)
1699
0
{
1700
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1701
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1702
0
    struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
1703
0
1704
0
    if ( IS_ERR(sreg) )
1705
0
         return -PTR_ERR(sreg);
1706
0
1707
0
    *reg = *sreg;
1708
0
1709
0
    return X86EMUL_OKAY;
1710
0
}
1711
1712
static int hvmemul_write_segment(
1713
    enum x86_segment seg,
1714
    const struct segment_register *reg,
1715
    struct x86_emulate_ctxt *ctxt)
1716
0
{
1717
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1718
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1719
0
    unsigned int idx = seg;
1720
0
1721
0
    if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
1722
0
        return X86EMUL_UNHANDLEABLE;
1723
0
1724
0
    hvmemul_ctxt->seg_reg[idx] = *reg;
1725
0
    __set_bit(idx, &hvmemul_ctxt->seg_reg_accessed);
1726
0
    __set_bit(idx, &hvmemul_ctxt->seg_reg_dirty);
1727
0
1728
0
    return X86EMUL_OKAY;
1729
0
}
1730
1731
static int hvmemul_read_io(
1732
    unsigned int port,
1733
    unsigned int bytes,
1734
    unsigned long *val,
1735
    struct x86_emulate_ctxt *ctxt)
1736
0
{
1737
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
1738
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1739
0
1740
0
    *val = 0;
1741
0
1742
0
    if ( unlikely(hvmemul_ctxt->set_context) )
1743
0
        return set_context_data(val, bytes);
1744
0
1745
0
    return hvmemul_do_pio_buffer(port, bytes, IOREQ_READ, val);
1746
0
}
1747
1748
static int hvmemul_write_io(
1749
    unsigned int port,
1750
    unsigned int bytes,
1751
    unsigned long val,
1752
    struct x86_emulate_ctxt *ctxt)
1753
0
{
1754
0
    return hvmemul_do_pio_buffer(port, bytes, IOREQ_WRITE, &val);
1755
0
}
1756
1757
static int hvmemul_read_cr(
1758
    unsigned int reg,
1759
    unsigned long *val,
1760
    struct x86_emulate_ctxt *ctxt)
1761
0
{
1762
0
    switch ( reg )
1763
0
    {
1764
0
    case 0:
1765
0
    case 2:
1766
0
    case 3:
1767
0
    case 4:
1768
0
        *val = current->arch.hvm_vcpu.guest_cr[reg];
1769
0
        HVMTRACE_LONG_2D(CR_READ, reg, TRC_PAR_LONG(*val));
1770
0
        return X86EMUL_OKAY;
1771
0
    default:
1772
0
        break;
1773
0
    }
1774
0
1775
0
    return X86EMUL_UNHANDLEABLE;
1776
0
}
1777
1778
static int hvmemul_write_cr(
1779
    unsigned int reg,
1780
    unsigned long val,
1781
    struct x86_emulate_ctxt *ctxt)
1782
0
{
1783
0
    int rc;
1784
0
1785
0
    HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
1786
0
    switch ( reg )
1787
0
    {
1788
0
    case 0:
1789
0
        rc = hvm_set_cr0(val, 1);
1790
0
        break;
1791
0
1792
0
    case 2:
1793
0
        current->arch.hvm_vcpu.guest_cr[2] = val;
1794
0
        rc = X86EMUL_OKAY;
1795
0
        break;
1796
0
1797
0
    case 3:
1798
0
        rc = hvm_set_cr3(val, 1);
1799
0
        break;
1800
0
1801
0
    case 4:
1802
0
        rc = hvm_set_cr4(val, 1);
1803
0
        break;
1804
0
1805
0
    default:
1806
0
        rc = X86EMUL_UNHANDLEABLE;
1807
0
        break;
1808
0
    }
1809
0
1810
0
    if ( rc == X86EMUL_EXCEPTION )
1811
0
        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1812
0
1813
0
    return rc;
1814
0
}
1815
1816
static int hvmemul_read_msr(
1817
    unsigned int reg,
1818
    uint64_t *val,
1819
    struct x86_emulate_ctxt *ctxt)
1820
0
{
1821
0
    int rc = hvm_msr_read_intercept(reg, val);
1822
0
1823
0
    if ( rc == X86EMUL_EXCEPTION )
1824
0
        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1825
0
1826
0
    return rc;
1827
0
}
1828
1829
static int hvmemul_write_msr(
1830
    unsigned int reg,
1831
    uint64_t val,
1832
    struct x86_emulate_ctxt *ctxt)
1833
0
{
1834
0
    int rc = hvm_msr_write_intercept(reg, val, 1);
1835
0
1836
0
    if ( rc == X86EMUL_EXCEPTION )
1837
0
        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1838
0
1839
0
    return rc;
1840
0
}
1841
1842
static int hvmemul_wbinvd(
1843
    struct x86_emulate_ctxt *ctxt)
1844
0
{
1845
0
    hvm_funcs.wbinvd_intercept();
1846
0
    return X86EMUL_OKAY;
1847
0
}
1848
1849
int hvmemul_cpuid(uint32_t leaf, uint32_t subleaf,
1850
                  struct cpuid_leaf *res, struct x86_emulate_ctxt *ctxt)
1851
0
{
1852
0
    guest_cpuid(current, leaf, subleaf, res);
1853
0
    return X86EMUL_OKAY;
1854
0
}
1855
1856
static int hvmemul_get_fpu(
1857
    void (*exception_callback)(void *, struct cpu_user_regs *),
1858
    void *exception_callback_arg,
1859
    enum x86_emulate_fpu_type type,
1860
    struct x86_emulate_ctxt *ctxt)
1861
0
{
1862
0
    struct vcpu *curr = current;
1863
0
1864
0
    switch ( type )
1865
0
    {
1866
0
    case X86EMUL_FPU_fpu:
1867
0
    case X86EMUL_FPU_wait:
1868
0
    case X86EMUL_FPU_mmx:
1869
0
    case X86EMUL_FPU_xmm:
1870
0
        break;
1871
0
    case X86EMUL_FPU_ymm:
1872
0
        if ( !(curr->arch.xcr0 & XSTATE_SSE) ||
1873
0
             !(curr->arch.xcr0 & XSTATE_YMM) )
1874
0
            return X86EMUL_UNHANDLEABLE;
1875
0
        break;
1876
0
    default:
1877
0
        return X86EMUL_UNHANDLEABLE;
1878
0
    }
1879
0
1880
0
    if ( !curr->fpu_dirtied )
1881
0
        hvm_funcs.fpu_dirty_intercept();
1882
0
    else if ( type == X86EMUL_FPU_fpu )
1883
0
    {
1884
0
        const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
1885
0
            curr->arch.fpu_ctxt;
1886
0
1887
0
        /*
1888
0
         * Latch current register state so that we can back out changes
1889
0
         * if needed (namely when a memory write fails after register state
1890
0
         * has already been updated).
1891
0
         * NB: We don't really need the "enable" part of the called function
1892
0
         * (->fpu_dirtied set implies CR0.TS clear), but the additional
1893
0
         * overhead should be low enough to not warrant introduction of yet
1894
0
         * another slightly different function. However, we need to undo the
1895
0
         * ->fpu_dirtied clearing the function does as well as the possible
1896
0
         * masking of all exceptions by FNSTENV.)
1897
0
         */
1898
0
        save_fpu_enable();
1899
0
        curr->fpu_dirtied = true;
1900
0
        if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
1901
0
        {
1902
0
            uint16_t fcw;
1903
0
1904
0
            asm ( "fnstcw %0" : "=m" (fcw) );
1905
0
            if ( (fcw & 0x3f) == 0x3f )
1906
0
                asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
1907
0
            else
1908
0
                ASSERT(fcw == fpu_ctxt->fcw);
1909
0
        }
1910
0
    }
1911
0
1912
0
    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
1913
0
    curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
1914
0
1915
0
    return X86EMUL_OKAY;
1916
0
}
1917
1918
static void hvmemul_put_fpu(
1919
    struct x86_emulate_ctxt *ctxt,
1920
    enum x86_emulate_fpu_type backout,
1921
    const struct x86_emul_fpu_aux *aux)
1922
0
{
1923
0
    struct vcpu *curr = current;
1924
0
1925
0
    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
1926
0
1927
0
    if ( aux )
1928
0
    {
1929
0
        typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
1930
0
        bool dval = aux->dval;
1931
0
        int mode = hvm_guest_x86_mode(curr);
1932
0
1933
0
        ASSERT(backout == X86EMUL_FPU_none);
1934
0
        /*
1935
0
         * Latch current register state so that we can replace FIP/FDP/FOP
1936
0
         * (which have values resulting from our own invocation of the FPU
1937
0
         * instruction during emulation).
1938
0
         * NB: See also the comment in hvmemul_get_fpu(); we don't need to
1939
0
         * set ->fpu_dirtied here as it is going to be cleared below, and
1940
0
         * we also don't need to reload FCW as we're forcing full state to
1941
0
         * be reloaded anyway.
1942
0
         */
1943
0
        save_fpu_enable();
1944
0
1945
0
        if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
1946
0
             !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
1947
0
            dval = false;
1948
0
1949
0
        switch ( mode )
1950
0
        {
1951
0
        case 8:
1952
0
            fpu_ctxt->fip.addr = aux->ip;
1953
0
            if ( dval )
1954
0
                fpu_ctxt->fdp.addr = aux->dp;
1955
0
            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 8;
1956
0
            break;
1957
0
1958
0
        case 4: case 2:
1959
0
            fpu_ctxt->fip.offs = aux->ip;
1960
0
            fpu_ctxt->fip.sel  = aux->cs;
1961
0
            if ( dval )
1962
0
            {
1963
0
                fpu_ctxt->fdp.offs = aux->dp;
1964
0
                fpu_ctxt->fdp.sel  = aux->ds;
1965
0
            }
1966
0
            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = mode;
1967
0
            break;
1968
0
1969
0
        case 0: case 1:
1970
0
            fpu_ctxt->fip.addr = aux->ip | (aux->cs << 4);
1971
0
            if ( dval )
1972
0
                fpu_ctxt->fdp.addr = aux->dp | (aux->ds << 4);
1973
0
            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 2;
1974
0
            break;
1975
0
1976
0
        default:
1977
0
            ASSERT_UNREACHABLE();
1978
0
            return;
1979
0
        }
1980
0
1981
0
        fpu_ctxt->fop = aux->op;
1982
0
1983
0
        /* Re-use backout code below. */
1984
0
        backout = X86EMUL_FPU_fpu;
1985
0
    }
1986
0
1987
0
    if ( backout == X86EMUL_FPU_fpu )
1988
0
    {
1989
0
        /*
1990
0
         * To back out changes to the register file simply adjust state such
1991
0
         * that upon next FPU insn use by the guest we'll reload the state
1992
0
         * saved (or freshly loaded) by hvmemul_get_fpu().
1993
0
         */
1994
0
        curr->fpu_dirtied = false;
1995
0
        stts();
1996
0
        hvm_funcs.fpu_leave(curr);
1997
0
    }
1998
0
}
1999
2000
static int hvmemul_invlpg(
2001
    enum x86_segment seg,
2002
    unsigned long offset,
2003
    struct x86_emulate_ctxt *ctxt)
2004
0
{
2005
0
    struct hvm_emulate_ctxt *hvmemul_ctxt =
2006
0
        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
2007
0
    unsigned long addr, reps = 1;
2008
0
    int rc;
2009
0
2010
0
    rc = hvmemul_virtual_to_linear(
2011
0
        seg, offset, 1, &reps, hvm_access_none, hvmemul_ctxt, &addr);
2012
0
2013
0
    if ( rc == X86EMUL_EXCEPTION )
2014
0
    {
2015
0
        /*
2016
0
         * `invlpg` takes segment bases into account, but is not subject to
2017
0
         * faults from segment type/limit checks, and is specified as a NOP
2018
0
         * when issued on non-canonical addresses.
2019
0
         *
2020
0
         * hvmemul_virtual_to_linear() raises exceptions for type/limit
2021
0
         * violations, so squash them.
2022
0
         */
2023
0
        x86_emul_reset_event(ctxt);
2024
0
        rc = X86EMUL_OKAY;
2025
0
    }
2026
0
2027
0
    if ( rc == X86EMUL_OKAY )
2028
0
        paging_invlpg(current, addr);
2029
0
2030
0
    return rc;
2031
0
}
2032
2033
static int hvmemul_vmfunc(
2034
    struct x86_emulate_ctxt *ctxt)
2035
0
{
2036
0
    int rc;
2037
0
2038
0
    if ( !hvm_funcs.altp2m_vcpu_emulate_vmfunc )
2039
0
        return X86EMUL_UNHANDLEABLE;
2040
0
    rc = hvm_funcs.altp2m_vcpu_emulate_vmfunc(ctxt->regs);
2041
0
    if ( rc == X86EMUL_EXCEPTION )
2042
0
        x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
2043
0
2044
0
    return rc;
2045
0
}
2046
2047
static const struct x86_emulate_ops hvm_emulate_ops = {
2048
    .read          = hvmemul_read,
2049
    .insn_fetch    = hvmemul_insn_fetch,
2050
    .write         = hvmemul_write,
2051
    .cmpxchg       = hvmemul_cmpxchg,
2052
    .validate      = hvmemul_validate,
2053
    .rep_ins       = hvmemul_rep_ins,
2054
    .rep_outs      = hvmemul_rep_outs,
2055
    .rep_movs      = hvmemul_rep_movs,
2056
    .rep_stos      = hvmemul_rep_stos,
2057
    .read_segment  = hvmemul_read_segment,
2058
    .write_segment = hvmemul_write_segment,
2059
    .read_io       = hvmemul_read_io,
2060
    .write_io      = hvmemul_write_io,
2061
    .read_cr       = hvmemul_read_cr,
2062
    .write_cr      = hvmemul_write_cr,
2063
    .read_msr      = hvmemul_read_msr,
2064
    .write_msr     = hvmemul_write_msr,
2065
    .wbinvd        = hvmemul_wbinvd,
2066
    .cpuid         = hvmemul_cpuid,
2067
    .get_fpu       = hvmemul_get_fpu,
2068
    .put_fpu       = hvmemul_put_fpu,
2069
    .invlpg        = hvmemul_invlpg,
2070
    .vmfunc        = hvmemul_vmfunc,
2071
};
2072
2073
static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
2074
    .read          = hvmemul_read,
2075
    .insn_fetch    = hvmemul_insn_fetch,
2076
    .write         = hvmemul_write_discard,
2077
    .cmpxchg       = hvmemul_cmpxchg_discard,
2078
    .rep_ins       = hvmemul_rep_ins_discard,
2079
    .rep_outs      = hvmemul_rep_outs_discard,
2080
    .rep_movs      = hvmemul_rep_movs_discard,
2081
    .rep_stos      = hvmemul_rep_stos_discard,
2082
    .read_segment  = hvmemul_read_segment,
2083
    .write_segment = hvmemul_write_segment,
2084
    .read_io       = hvmemul_read_io_discard,
2085
    .write_io      = hvmemul_write_io_discard,
2086
    .read_cr       = hvmemul_read_cr,
2087
    .write_cr      = hvmemul_write_cr,
2088
    .read_msr      = hvmemul_read_msr,
2089
    .write_msr     = hvmemul_write_msr_discard,
2090
    .wbinvd        = hvmemul_wbinvd_discard,
2091
    .cpuid         = hvmemul_cpuid,
2092
    .get_fpu       = hvmemul_get_fpu,
2093
    .put_fpu       = hvmemul_put_fpu,
2094
    .invlpg        = hvmemul_invlpg,
2095
    .vmfunc        = hvmemul_vmfunc,
2096
};
2097
2098
static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
2099
    const struct x86_emulate_ops *ops)
2100
60.1k
{
2101
60.1k
    const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
2102
60.1k
    struct vcpu *curr = current;
2103
60.1k
    uint32_t new_intr_shadow;
2104
60.1k
    struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
2105
60.1k
    int rc;
2106
60.1k
2107
60.1k
    hvm_emulate_init_per_insn(hvmemul_ctxt, vio->mmio_insn,
2108
60.1k
                              vio->mmio_insn_bytes);
2109
60.1k
2110
60.1k
    vio->mmio_retry = 0;
2111
60.1k
2112
60.1k
    rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
2113
60.1k
2114
60.1k
    if ( rc == X86EMUL_OKAY && vio->mmio_retry )
2115
0
        rc = X86EMUL_RETRY;
2116
60.1k
    if ( rc != X86EMUL_RETRY )
2117
60.1k
    {
2118
60.1k
        vio->mmio_cache_count = 0;
2119
60.1k
        vio->mmio_insn_bytes = 0;
2120
60.1k
    }
2121
60.1k
    else
2122
0
    {
2123
0
        BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf));
2124
0
        vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes;
2125
0
        memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes);
2126
0
    }
2127
60.1k
2128
60.1k
    if ( hvmemul_ctxt->ctxt.retire.singlestep )
2129
0
        hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
2130
60.1k
2131
60.1k
    new_intr_shadow = hvmemul_ctxt->intr_shadow;
2132
60.1k
2133
60.1k
    /* MOV-SS instruction toggles MOV-SS shadow, else we just clear it. */
2134
60.1k
    if ( hvmemul_ctxt->ctxt.retire.mov_ss )
2135
0
        new_intr_shadow ^= HVM_INTR_SHADOW_MOV_SS;
2136
60.1k
    else if ( rc != X86EMUL_RETRY )
2137
60.1k
        new_intr_shadow &= ~HVM_INTR_SHADOW_MOV_SS;
2138
60.1k
2139
60.1k
    /* STI instruction toggles STI shadow, else we just clear it. */
2140
60.1k
    if ( hvmemul_ctxt->ctxt.retire.sti )
2141
0
        new_intr_shadow ^= HVM_INTR_SHADOW_STI;
2142
60.1k
    else if ( rc != X86EMUL_RETRY )
2143
60.1k
        new_intr_shadow &= ~HVM_INTR_SHADOW_STI;
2144
60.1k
2145
60.1k
    /* IRET, if valid in the given context, clears NMI blocking. */
2146
60.1k
    if ( hvmemul_ctxt->ctxt.retire.unblock_nmi )
2147
0
        new_intr_shadow &= ~HVM_INTR_SHADOW_NMI;
2148
60.1k
2149
60.1k
    if ( hvmemul_ctxt->intr_shadow != new_intr_shadow )
2150
0
    {
2151
0
        hvmemul_ctxt->intr_shadow = new_intr_shadow;
2152
0
        hvm_funcs.set_interrupt_shadow(curr, new_intr_shadow);
2153
0
    }
2154
60.1k
2155
60.1k
    if ( hvmemul_ctxt->ctxt.retire.hlt &&
2156
0
         !hvm_local_events_need_delivery(curr) )
2157
0
    {
2158
0
        hvm_hlt(regs->eflags);
2159
0
    }
2160
60.1k
2161
60.1k
    return rc;
2162
60.1k
}
2163
2164
int hvm_emulate_one(
2165
    struct hvm_emulate_ctxt *hvmemul_ctxt)
2166
60.1k
{
2167
60.1k
    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops);
2168
60.1k
}
2169
2170
int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
2171
0
{
2172
0
    static const struct x86_emulate_ops hvm_intercept_ops_mmcfg = {
2173
0
        .read       = x86emul_unhandleable_rw,
2174
0
        .insn_fetch = hvmemul_insn_fetch,
2175
0
        .write      = mmcfg_intercept_write,
2176
0
        .cpuid      = hvmemul_cpuid,
2177
0
    };
2178
0
    static const struct x86_emulate_ops hvm_ro_emulate_ops_mmio = {
2179
0
        .read       = x86emul_unhandleable_rw,
2180
0
        .insn_fetch = hvmemul_insn_fetch,
2181
0
        .write      = mmio_ro_emulated_write,
2182
0
        .cpuid      = hvmemul_cpuid,
2183
0
    };
2184
0
    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla };
2185
0
    struct hvm_emulate_ctxt ctxt;
2186
0
    const struct x86_emulate_ops *ops;
2187
0
    unsigned int seg, bdf;
2188
0
    int rc;
2189
0
2190
0
    if ( pci_ro_mmcfg_decode(mfn, &seg, &bdf) )
2191
0
    {
2192
0
        mmio_ro_ctxt.seg = seg;
2193
0
        mmio_ro_ctxt.bdf = bdf;
2194
0
        ops = &hvm_intercept_ops_mmcfg;
2195
0
    }
2196
0
    else
2197
0
        ops = &hvm_ro_emulate_ops_mmio;
2198
0
2199
0
    hvm_emulate_init_once(&ctxt, x86_insn_is_mem_write,
2200
0
                          guest_cpu_user_regs());
2201
0
    ctxt.ctxt.data = &mmio_ro_ctxt;
2202
0
    rc = _hvm_emulate_one(&ctxt, ops);
2203
0
    switch ( rc )
2204
0
    {
2205
0
    case X86EMUL_UNHANDLEABLE:
2206
0
    case X86EMUL_UNIMPLEMENTED:
2207
0
        hvm_dump_emulation_state(XENLOG_G_WARNING, "MMCFG", &ctxt, rc);
2208
0
        break;
2209
0
    case X86EMUL_EXCEPTION:
2210
0
        hvm_inject_event(&ctxt.ctxt.event);
2211
0
        /* fallthrough */
2212
0
    default:
2213
0
        hvm_emulate_writeback(&ctxt);
2214
0
    }
2215
0
2216
0
    return rc;
2217
0
}
2218
2219
void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
2220
    unsigned int errcode)
2221
0
{
2222
0
    struct hvm_emulate_ctxt ctx = {{ 0 }};
2223
0
    int rc;
2224
0
2225
0
    hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
2226
0
2227
0
    switch ( kind )
2228
0
    {
2229
0
    case EMUL_KIND_NOWRITE:
2230
0
        rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
2231
0
        break;
2232
0
    case EMUL_KIND_SET_CONTEXT_INSN: {
2233
0
        struct vcpu *curr = current;
2234
0
        struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
2235
0
2236
0
        BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
2237
0
                     sizeof(curr->arch.vm_event->emul.insn.data));
2238
0
        ASSERT(!vio->mmio_insn_bytes);
2239
0
2240
0
        /*
2241
0
         * Stash insn buffer into mmio buffer here instead of ctx
2242
0
         * to avoid having to add more logic to hvm_emulate_one.
2243
0
         */
2244
0
        vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
2245
0
        memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
2246
0
               vio->mmio_insn_bytes);
2247
0
    }
2248
0
    /* Fall-through */
2249
0
    default:
2250
0
        ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
2251
0
        rc = hvm_emulate_one(&ctx);
2252
0
    }
2253
0
2254
0
    switch ( rc )
2255
0
    {
2256
0
    case X86EMUL_RETRY:
2257
0
        /*
2258
0
         * This function is called when handling an EPT-related vm_event
2259
0
         * reply. As such, nothing else needs to be done here, since simply
2260
0
         * returning makes the current instruction cause a page fault again,
2261
0
         * consistent with X86EMUL_RETRY.
2262
0
         */
2263
0
        return;
2264
0
    case X86EMUL_UNIMPLEMENTED:
2265
0
        if ( hvm_monitor_emul_unimplemented() )
2266
0
            return;
2267
0
        /* fall-through */
2268
0
    case X86EMUL_UNHANDLEABLE:
2269
0
        hvm_dump_emulation_state(XENLOG_G_DEBUG, "Mem event", &ctx, rc);
2270
0
        hvm_inject_hw_exception(trapnr, errcode);
2271
0
        break;
2272
0
    case X86EMUL_EXCEPTION:
2273
0
        hvm_inject_event(&ctx.ctxt.event);
2274
0
        break;
2275
0
    }
2276
0
2277
0
    hvm_emulate_writeback(&ctx);
2278
0
}
2279
2280
void hvm_emulate_init_once(
2281
    struct hvm_emulate_ctxt *hvmemul_ctxt,
2282
    hvm_emulate_validate_t *validate,
2283
    struct cpu_user_regs *regs)
2284
60.1k
{
2285
60.1k
    struct vcpu *curr = current;
2286
60.1k
2287
60.1k
    memset(hvmemul_ctxt, 0, sizeof(*hvmemul_ctxt));
2288
60.1k
2289
60.1k
    hvmemul_ctxt->intr_shadow = hvm_funcs.get_interrupt_shadow(curr);
2290
60.1k
    hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
2291
60.1k
    hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
2292
60.1k
2293
60.1k
    hvmemul_ctxt->validate = validate;
2294
60.1k
    hvmemul_ctxt->ctxt.regs = regs;
2295
60.1k
    hvmemul_ctxt->ctxt.vendor = curr->domain->arch.cpuid->x86_vendor;
2296
60.1k
    hvmemul_ctxt->ctxt.force_writeback = true;
2297
60.1k
}
2298
2299
void hvm_emulate_init_per_insn(
2300
    struct hvm_emulate_ctxt *hvmemul_ctxt,
2301
    const unsigned char *insn_buf,
2302
    unsigned int insn_bytes)
2303
60.1k
{
2304
60.1k
    struct vcpu *curr = current;
2305
60.1k
    unsigned int pfec = PFEC_page_present;
2306
60.1k
    unsigned long addr;
2307
60.1k
2308
60.1k
    hvmemul_ctxt->ctxt.lma = hvm_long_mode_active(curr);
2309
60.1k
2310
60.1k
    if ( hvmemul_ctxt->ctxt.lma &&
2311
60.1k
         hvmemul_ctxt->seg_reg[x86_seg_cs].l )
2312
60.1k
        hvmemul_ctxt->ctxt.addr_size = hvmemul_ctxt->ctxt.sp_size = 64;
2313
60.1k
    else
2314
0
    {
2315
0
        hvmemul_ctxt->ctxt.addr_size =
2316
0
            hvmemul_ctxt->seg_reg[x86_seg_cs].db ? 32 : 16;
2317
0
        hvmemul_ctxt->ctxt.sp_size =
2318
0
            hvmemul_ctxt->seg_reg[x86_seg_ss].db ? 32 : 16;
2319
0
    }
2320
60.1k
2321
60.1k
    if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
2322
0
        pfec |= PFEC_user_mode;
2323
60.1k
2324
60.1k
    hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->rip;
2325
60.1k
    if ( !insn_bytes )
2326
60.1k
    {
2327
60.1k
        hvmemul_ctxt->insn_buf_bytes =
2328
60.1k
            hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
2329
60.1k
            (hvm_virtual_to_linear_addr(x86_seg_cs,
2330
60.1k
                                        &hvmemul_ctxt->seg_reg[x86_seg_cs],
2331
60.1k
                                        hvmemul_ctxt->insn_buf_eip,
2332
60.1k
                                        sizeof(hvmemul_ctxt->insn_buf),
2333
60.1k
                                        hvm_access_insn_fetch,
2334
60.1k
                                        &hvmemul_ctxt->seg_reg[x86_seg_cs],
2335
60.1k
                                        &addr) &&
2336
60.1k
             hvm_fetch_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
2337
60.1k
                                         sizeof(hvmemul_ctxt->insn_buf),
2338
60.1k
                                         pfec, NULL) == HVMTRANS_okay) ?
2339
60.1k
            sizeof(hvmemul_ctxt->insn_buf) : 0;
2340
60.1k
    }
2341
60.1k
    else
2342
0
    {
2343
0
        hvmemul_ctxt->insn_buf_bytes = insn_bytes;
2344
0
        memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
2345
0
    }
2346
60.1k
}
2347
2348
void hvm_emulate_writeback(
2349
    struct hvm_emulate_ctxt *hvmemul_ctxt)
2350
60.1k
{
2351
60.1k
    enum x86_segment seg;
2352
60.1k
2353
60.1k
    seg = find_first_bit(&hvmemul_ctxt->seg_reg_dirty,
2354
60.1k
                         ARRAY_SIZE(hvmemul_ctxt->seg_reg));
2355
60.1k
2356
60.1k
    while ( seg < ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
2357
0
    {
2358
0
        hvm_set_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
2359
0
        seg = find_next_bit(&hvmemul_ctxt->seg_reg_dirty,
2360
0
                            ARRAY_SIZE(hvmemul_ctxt->seg_reg),
2361
0
                            seg+1);
2362
0
    }
2363
60.1k
}
2364
2365
/*
2366
 * Callers which pass a known in-range x86_segment can rely on the return
2367
 * pointer being valid.  Other callers must explicitly check for errors.
2368
 */
2369
struct segment_register *hvmemul_get_seg_reg(
2370
    enum x86_segment seg,
2371
    struct hvm_emulate_ctxt *hvmemul_ctxt)
2372
240k
{
2373
240k
    unsigned int idx = seg;
2374
240k
2375
240k
    if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
2376
0
        return ERR_PTR(-X86EMUL_UNHANDLEABLE);
2377
240k
2378
240k
    if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) )
2379
180k
        hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]);
2380
240k
    return &hvmemul_ctxt->seg_reg[idx];
2381
240k
}
2382
2383
static const char *guest_x86_mode_to_str(int mode)
2384
0
{
2385
0
    switch ( mode )
2386
0
    {
2387
0
    case 0:  return "Real";
2388
0
    case 1:  return "v86";
2389
0
    case 2:  return "16bit";
2390
0
    case 4:  return "32bit";
2391
0
    case 8:  return "64bit";
2392
0
    default: return "Unknown";
2393
0
    }
2394
0
}
2395
2396
void hvm_dump_emulation_state(const char *loglvl, const char *prefix,
2397
                              struct hvm_emulate_ctxt *hvmemul_ctxt, int rc)
2398
0
{
2399
0
    struct vcpu *curr = current;
2400
0
    const char *mode_str = guest_x86_mode_to_str(hvm_guest_x86_mode(curr));
2401
0
    const struct segment_register *cs =
2402
0
        hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
2403
0
2404
0
    printk("%s%s emulation failed (%d): %pv %s @ %04x:%08lx -> %*ph\n",
2405
0
           loglvl, prefix, rc, curr, mode_str, cs->sel,
2406
0
           hvmemul_ctxt->insn_buf_eip, hvmemul_ctxt->insn_buf_bytes,
2407
0
           hvmemul_ctxt->insn_buf);
2408
0
}
2409
2410
/*
2411
 * Local variables:
2412
 * mode: C
2413
 * c-file-style: "BSD"
2414
 * c-basic-offset: 4
2415
 * tab-width: 4
2416
 * indent-tabs-mode: nil
2417
 * End:
2418
 */