Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/pv/emul-priv-op.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/pv/emul-priv-op.c
3
 *
4
 * Emulate privileged instructions for PV guests
5
 *
6
 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20
 */
21
22
#include <xen/errno.h>
23
#include <xen/event.h>
24
#include <xen/guest_access.h>
25
#include <xen/iocap.h>
26
#include <xen/spinlock.h>
27
#include <xen/trace.h>
28
29
#include <asm/apic.h>
30
#include <asm/debugreg.h>
31
#include <asm/hpet.h>
32
#include <asm/hypercall.h>
33
#include <asm/mc146818rtc.h>
34
#include <asm/p2m.h>
35
#include <asm/pv/traps.h>
36
#include <asm/shared.h>
37
#include <asm/traps.h>
38
#include <asm/x86_emulate.h>
39
40
#include <xsm/xsm.h>
41
42
#include "../x86_64/mmconfig.h"
43
#include "emulate.h"
44
#include "mm.h"
45
46
/* Override macros from asm/page.h to make them work with mfn_t */
47
#undef mfn_to_page
48
#define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
49
#undef page_to_mfn
50
0
#define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
51
52
/***********************
53
 * I/O emulation support
54
 */
55
56
struct priv_op_ctxt {
57
    struct x86_emulate_ctxt ctxt;
58
    struct {
59
        unsigned long base, limit;
60
    } cs;
61
    char *io_emul_stub;
62
    unsigned int bpmatch;
63
    unsigned int tsc;
64
0
#define TSC_BASE 1
65
0
#define TSC_AUX 2
66
};
67
68
/* I/O emulation support. Helper routines for, and type of, the stack stub. */
69
void host_to_guest_gpr_switch(struct cpu_user_regs *);
70
unsigned long guest_to_host_gpr_switch(unsigned long);
71
72
void (*pv_post_outb_hook)(unsigned int port, u8 value);
73
74
typedef void io_emul_stub_t(struct cpu_user_regs *);
75
76
static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
77
                                          unsigned int port, unsigned int bytes)
78
0
{
79
0
    if ( !ctxt->io_emul_stub )
80
0
        ctxt->io_emul_stub = map_domain_page(_mfn(this_cpu(stubs.mfn))) +
81
0
                                             (this_cpu(stubs.addr) &
82
0
                                              ~PAGE_MASK) +
83
0
                                             STUB_BUF_SIZE / 2;
84
0
85
0
    /* movq $host_to_guest_gpr_switch,%rcx */
86
0
    ctxt->io_emul_stub[0] = 0x48;
87
0
    ctxt->io_emul_stub[1] = 0xb9;
88
0
    *(void **)&ctxt->io_emul_stub[2] = (void *)host_to_guest_gpr_switch;
89
0
    /* callq *%rcx */
90
0
    ctxt->io_emul_stub[10] = 0xff;
91
0
    ctxt->io_emul_stub[11] = 0xd1;
92
0
    /* data16 or nop */
93
0
    ctxt->io_emul_stub[12] = (bytes != 2) ? 0x90 : 0x66;
94
0
    /* <io-access opcode> */
95
0
    ctxt->io_emul_stub[13] = opcode;
96
0
    /* imm8 or nop */
97
0
    ctxt->io_emul_stub[14] = !(opcode & 8) ? port : 0x90;
98
0
    /* ret (jumps to guest_to_host_gpr_switch) */
99
0
    ctxt->io_emul_stub[15] = 0xc3;
100
0
    BUILD_BUG_ON(STUB_BUF_SIZE / 2 < 16);
101
0
102
0
    if ( ioemul_handle_quirk )
103
0
        ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[12], ctxt->ctxt.regs);
104
0
105
0
    /* Handy function-typed pointer to the stub. */
106
0
    return (void *)(this_cpu(stubs.addr) + STUB_BUF_SIZE / 2);
107
0
}
108
109
110
/* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */
111
static bool iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs)
112
0
{
113
0
    unsigned int cpl = guest_kernel_mode(v, regs) ?
114
0
        (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3;
115
0
116
0
    ASSERT((v->arch.pv_vcpu.iopl & ~X86_EFLAGS_IOPL) == 0);
117
0
118
0
    return IOPL(cpl) <= v->arch.pv_vcpu.iopl;
119
0
}
120
121
/* Has the guest requested sufficient permission for this I/O access? */
122
static bool guest_io_okay(unsigned int port, unsigned int bytes,
123
                          struct vcpu *v, struct cpu_user_regs *regs)
124
0
{
125
0
    /* If in user mode, switch to kernel mode just to read I/O bitmap. */
126
0
    const bool user_mode = !(v->arch.flags & TF_kernel_mode);
127
0
128
0
    if ( iopl_ok(v, regs) )
129
0
        return true;
130
0
131
0
    if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
132
0
    {
133
0
        union { uint8_t bytes[2]; uint16_t mask; } x;
134
0
135
0
        /*
136
0
         * Grab permission bytes from guest space. Inaccessible bytes are
137
0
         * read as 0xff (no access allowed).
138
0
         */
139
0
        if ( user_mode )
140
0
            toggle_guest_mode(v);
141
0
142
0
        switch ( __copy_from_guest_offset(x.bytes, v->arch.pv_vcpu.iobmp,
143
0
                                          port>>3, 2) )
144
0
        {
145
0
        default: x.bytes[0] = ~0;
146
0
            /* fallthrough */
147
0
        case 1:  x.bytes[1] = ~0;
148
0
            /* fallthrough */
149
0
        case 0:  break;
150
0
        }
151
0
152
0
        if ( user_mode )
153
0
            toggle_guest_mode(v);
154
0
155
0
        if ( (x.mask & (((1 << bytes) - 1) << (port & 7))) == 0 )
156
0
            return true;
157
0
    }
158
0
159
0
    return false;
160
0
}
161
162
/* Has the administrator granted sufficient permission for this I/O access? */
163
static bool admin_io_okay(unsigned int port, unsigned int bytes,
164
                          const struct domain *d)
165
0
{
166
0
    /*
167
0
     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
168
0
     * We never permit direct access to that register.
169
0
     */
170
0
    if ( (port == 0xcf8) && (bytes == 4) )
171
0
        return false;
172
0
173
0
    /* We also never permit direct access to the RTC/CMOS registers. */
174
0
    if ( ((port & ~1) == RTC_PORT(0)) )
175
0
        return false;
176
0
177
0
    return ioports_access_permitted(d, port, port + bytes - 1);
178
0
}
179
180
static bool pci_cfg_ok(struct domain *currd, unsigned int start,
181
                       unsigned int size, uint32_t *write)
182
0
{
183
0
    uint32_t machine_bdf;
184
0
185
0
    if ( !is_hardware_domain(currd) )
186
0
        return false;
187
0
188
0
    if ( !CF8_ENABLED(currd->arch.pci_cf8) )
189
0
        return true;
190
0
191
0
    machine_bdf = CF8_BDF(currd->arch.pci_cf8);
192
0
    if ( write )
193
0
    {
194
0
        const unsigned long *ro_map = pci_get_ro_map(0);
195
0
196
0
        if ( ro_map && test_bit(machine_bdf, ro_map) )
197
0
            return false;
198
0
    }
199
0
    start |= CF8_ADDR_LO(currd->arch.pci_cf8);
200
0
    /* AMD extended configuration space access? */
201
0
    if ( CF8_ADDR_HI(currd->arch.pci_cf8) &&
202
0
         boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
203
0
         boot_cpu_data.x86 >= 0x10 && boot_cpu_data.x86 <= 0x17 )
204
0
    {
205
0
        uint64_t msr_val;
206
0
207
0
        if ( rdmsr_safe(MSR_AMD64_NB_CFG, msr_val) )
208
0
            return false;
209
0
        if ( msr_val & (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT) )
210
0
            start |= CF8_ADDR_HI(currd->arch.pci_cf8);
211
0
    }
212
0
213
0
    return !write ?
214
0
           xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
215
0
                                     start, start + size - 1, 0) == 0 :
216
0
           pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
217
0
}
218
219
uint32_t guest_io_read(unsigned int port, unsigned int bytes,
220
                       struct domain *currd)
221
0
{
222
0
    uint32_t data = 0;
223
0
    unsigned int shift = 0;
224
0
225
0
    if ( admin_io_okay(port, bytes, currd) )
226
0
    {
227
0
        switch ( bytes )
228
0
        {
229
0
        case 1: return inb(port);
230
0
        case 2: return inw(port);
231
0
        case 4: return inl(port);
232
0
        }
233
0
    }
234
0
235
0
    while ( bytes != 0 )
236
0
    {
237
0
        unsigned int size = 1;
238
0
        uint32_t sub_data = ~0;
239
0
240
0
        if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
241
0
        {
242
0
            sub_data = pv_pit_handler(port, 0, 0);
243
0
        }
244
0
        else if ( port == RTC_PORT(0) )
245
0
        {
246
0
            sub_data = currd->arch.cmos_idx;
247
0
        }
248
0
        else if ( (port == RTC_PORT(1)) &&
249
0
                  ioports_access_permitted(currd, RTC_PORT(0), RTC_PORT(1)) )
250
0
        {
251
0
            unsigned long flags;
252
0
253
0
            spin_lock_irqsave(&rtc_lock, flags);
254
0
            outb(currd->arch.cmos_idx & 0x7f, RTC_PORT(0));
255
0
            sub_data = inb(RTC_PORT(1));
256
0
            spin_unlock_irqrestore(&rtc_lock, flags);
257
0
        }
258
0
        else if ( (port == 0xcf8) && (bytes == 4) )
259
0
        {
260
0
            size = 4;
261
0
            sub_data = currd->arch.pci_cf8;
262
0
        }
263
0
        else if ( (port & 0xfffc) == 0xcfc )
264
0
        {
265
0
            size = min(bytes, 4 - (port & 3));
266
0
            if ( size == 3 )
267
0
                size = 2;
268
0
            if ( pci_cfg_ok(currd, port & 3, size, NULL) )
269
0
                sub_data = pci_conf_read(currd->arch.pci_cf8, port & 3, size);
270
0
        }
271
0
272
0
        if ( size == 4 )
273
0
            return sub_data;
274
0
275
0
        data |= (sub_data & ((1u << (size * 8)) - 1)) << shift;
276
0
        shift += size * 8;
277
0
        port += size;
278
0
        bytes -= size;
279
0
    }
280
0
281
0
    return data;
282
0
}
283
284
static unsigned int check_guest_io_breakpoint(struct vcpu *v,
285
                                              unsigned int port,
286
                                              unsigned int len)
287
0
{
288
0
    unsigned int width, i, match = 0;
289
0
    unsigned long start;
290
0
291
0
    if ( !(v->arch.debugreg[5]) ||
292
0
         !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) )
293
0
        return 0;
294
0
295
0
    for ( i = 0; i < 4; i++ )
296
0
    {
297
0
        if ( !(v->arch.debugreg[5] &
298
0
               (3 << (i * DR_ENABLE_SIZE))) )
299
0
            continue;
300
0
301
0
        start = v->arch.debugreg[i];
302
0
        width = 0;
303
0
304
0
        switch ( (v->arch.debugreg[7] >>
305
0
                  (DR_CONTROL_SHIFT + i * DR_CONTROL_SIZE)) & 0xc )
306
0
        {
307
0
        case DR_LEN_1: width = 1; break;
308
0
        case DR_LEN_2: width = 2; break;
309
0
        case DR_LEN_4: width = 4; break;
310
0
        case DR_LEN_8: width = 8; break;
311
0
        }
312
0
313
0
        if ( (start < (port + len)) && ((start + width) > port) )
314
0
            match |= 1u << i;
315
0
    }
316
0
317
0
    return match;
318
0
}
319
320
static int read_io(unsigned int port, unsigned int bytes,
321
                   unsigned long *val, struct x86_emulate_ctxt *ctxt)
322
0
{
323
0
    struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
324
0
    struct vcpu *curr = current;
325
0
    struct domain *currd = current->domain;
326
0
327
0
    /* INS must not come here. */
328
0
    ASSERT((ctxt->opcode & ~9) == 0xe4);
329
0
330
0
    if ( !guest_io_okay(port, bytes, curr, ctxt->regs) )
331
0
        return X86EMUL_UNHANDLEABLE;
332
0
333
0
    poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes);
334
0
335
0
    if ( admin_io_okay(port, bytes, currd) )
336
0
    {
337
0
        io_emul_stub_t *io_emul =
338
0
            io_emul_stub_setup(poc, ctxt->opcode, port, bytes);
339
0
340
0
        mark_regs_dirty(ctxt->regs);
341
0
        io_emul(ctxt->regs);
342
0
        return X86EMUL_DONE;
343
0
    }
344
0
345
0
    *val = guest_io_read(port, bytes, currd);
346
0
347
0
    return X86EMUL_OKAY;
348
0
}
349
350
void guest_io_write(unsigned int port, unsigned int bytes, uint32_t data,
351
                    struct domain *currd)
352
0
{
353
0
    if ( admin_io_okay(port, bytes, currd) )
354
0
    {
355
0
        switch ( bytes )
356
0
        {
357
0
        case 1:
358
0
            outb((uint8_t)data, port);
359
0
            if ( pv_post_outb_hook )
360
0
                pv_post_outb_hook(port, (uint8_t)data);
361
0
            break;
362
0
        case 2:
363
0
            outw((uint16_t)data, port);
364
0
            break;
365
0
        case 4:
366
0
            outl(data, port);
367
0
            break;
368
0
        }
369
0
        return;
370
0
    }
371
0
372
0
    while ( bytes != 0 )
373
0
    {
374
0
        unsigned int size = 1;
375
0
376
0
        if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
377
0
        {
378
0
            pv_pit_handler(port, (uint8_t)data, 1);
379
0
        }
380
0
        else if ( port == RTC_PORT(0) )
381
0
        {
382
0
            currd->arch.cmos_idx = data;
383
0
        }
384
0
        else if ( (port == RTC_PORT(1)) &&
385
0
                  ioports_access_permitted(currd, RTC_PORT(0), RTC_PORT(1)) )
386
0
        {
387
0
            unsigned long flags;
388
0
389
0
            if ( pv_rtc_handler )
390
0
                pv_rtc_handler(currd->arch.cmos_idx & 0x7f, data);
391
0
            spin_lock_irqsave(&rtc_lock, flags);
392
0
            outb(currd->arch.cmos_idx & 0x7f, RTC_PORT(0));
393
0
            outb(data, RTC_PORT(1));
394
0
            spin_unlock_irqrestore(&rtc_lock, flags);
395
0
        }
396
0
        else if ( (port == 0xcf8) && (bytes == 4) )
397
0
        {
398
0
            size = 4;
399
0
            currd->arch.pci_cf8 = data;
400
0
        }
401
0
        else if ( (port & 0xfffc) == 0xcfc )
402
0
        {
403
0
            size = min(bytes, 4 - (port & 3));
404
0
            if ( size == 3 )
405
0
                size = 2;
406
0
            if ( pci_cfg_ok(currd, port & 3, size, &data) )
407
0
                pci_conf_write(currd->arch.pci_cf8, port & 3, size, data);
408
0
        }
409
0
410
0
        if ( size == 4 )
411
0
            return;
412
0
413
0
        port += size;
414
0
        bytes -= size;
415
0
        data >>= size * 8;
416
0
    }
417
0
}
418
419
static int write_io(unsigned int port, unsigned int bytes,
420
                    unsigned long val, struct x86_emulate_ctxt *ctxt)
421
0
{
422
0
    struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
423
0
    struct vcpu *curr = current;
424
0
    struct domain *currd = current->domain;
425
0
426
0
    /* OUTS must not come here. */
427
0
    ASSERT((ctxt->opcode & ~9) == 0xe6);
428
0
429
0
    if ( !guest_io_okay(port, bytes, curr, ctxt->regs) )
430
0
        return X86EMUL_UNHANDLEABLE;
431
0
432
0
    poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes);
433
0
434
0
    if ( admin_io_okay(port, bytes, currd) )
435
0
    {
436
0
        io_emul_stub_t *io_emul =
437
0
            io_emul_stub_setup(poc, ctxt->opcode, port, bytes);
438
0
439
0
        mark_regs_dirty(ctxt->regs);
440
0
        io_emul(ctxt->regs);
441
0
        if ( (bytes == 1) && pv_post_outb_hook )
442
0
            pv_post_outb_hook(port, val);
443
0
        return X86EMUL_DONE;
444
0
    }
445
0
446
0
    guest_io_write(port, bytes, val, currd);
447
0
448
0
    return X86EMUL_OKAY;
449
0
}
450
451
static int read_segment(enum x86_segment seg,
452
                        struct segment_register *reg,
453
                        struct x86_emulate_ctxt *ctxt)
454
0
{
455
0
    /* Check if this is an attempt to access the I/O bitmap. */
456
0
    if ( seg == x86_seg_tr )
457
0
    {
458
0
        switch ( ctxt->opcode )
459
0
        {
460
0
        case 0x6c ... 0x6f: /* ins / outs */
461
0
        case 0xe4 ... 0xe7: /* in / out (immediate port) */
462
0
        case 0xec ... 0xef: /* in / out (port in %dx) */
463
0
            /* Defer the check to priv_op_{read,write}_io(). */
464
0
            return X86EMUL_DONE;
465
0
        }
466
0
    }
467
0
468
0
    if ( ctxt->addr_size < 64 )
469
0
    {
470
0
        unsigned long limit;
471
0
        unsigned int sel, ar;
472
0
473
0
        switch ( seg )
474
0
        {
475
0
        case x86_seg_cs: sel = ctxt->regs->cs; break;
476
0
        case x86_seg_ds: sel = read_sreg(ds);  break;
477
0
        case x86_seg_es: sel = read_sreg(es);  break;
478
0
        case x86_seg_fs: sel = read_sreg(fs);  break;
479
0
        case x86_seg_gs: sel = read_sreg(gs);  break;
480
0
        case x86_seg_ss: sel = ctxt->regs->ss; break;
481
0
        default: return X86EMUL_UNHANDLEABLE;
482
0
        }
483
0
484
0
        if ( !pv_emul_read_descriptor(sel, current, &reg->base,
485
0
                                      &limit, &ar, 0) )
486
0
            return X86EMUL_UNHANDLEABLE;
487
0
488
0
        reg->limit = limit;
489
0
        reg->attr = ar >> 8;
490
0
    }
491
0
    else
492
0
    {
493
0
        switch ( seg )
494
0
        {
495
0
        default:
496
0
            if ( !is_x86_user_segment(seg) )
497
0
                return X86EMUL_UNHANDLEABLE;
498
0
            reg->base = 0;
499
0
            break;
500
0
        case x86_seg_fs:
501
0
            reg->base = rdfsbase();
502
0
            break;
503
0
        case x86_seg_gs:
504
0
            reg->base = rdgsbase();
505
0
            break;
506
0
        }
507
0
508
0
        reg->limit = ~0U;
509
0
510
0
        reg->attr = 0;
511
0
        reg->type = _SEGMENT_WR >> 8;
512
0
        if ( seg == x86_seg_cs )
513
0
        {
514
0
            reg->type |= _SEGMENT_CODE >> 8;
515
0
            reg->l = 1;
516
0
        }
517
0
        else
518
0
            reg->db = 1;
519
0
        reg->s   = 1;
520
0
        reg->dpl = 3;
521
0
        reg->p   = 1;
522
0
        reg->g   = 1;
523
0
    }
524
0
525
0
    /*
526
0
     * For x86_emulate.c's mode_ring0() to work, fake a DPL of zero.
527
0
     * Also do this for consistency for non-conforming code segments.
528
0
     */
529
0
    if ( (seg == x86_seg_ss ||
530
0
          (seg == x86_seg_cs &&
531
0
           !(reg->type & (_SEGMENT_EC >> 8)))) &&
532
0
         guest_kernel_mode(current, ctxt->regs) )
533
0
        reg->dpl = 0;
534
0
535
0
    return X86EMUL_OKAY;
536
0
}
537
538
static int pv_emul_virt_to_linear(unsigned long base, unsigned long offset,
539
                                  unsigned int bytes, unsigned long limit,
540
                                  enum x86_segment seg,
541
                                  struct x86_emulate_ctxt *ctxt,
542
                                  unsigned long *addr)
543
0
{
544
0
    int rc = X86EMUL_OKAY;
545
0
546
0
    *addr = base + offset;
547
0
548
0
    if ( ctxt->addr_size < 64 )
549
0
    {
550
0
        if ( limit < bytes - 1 || offset > limit - bytes + 1 )
551
0
            rc = X86EMUL_EXCEPTION;
552
0
        *addr = (uint32_t)*addr;
553
0
    }
554
0
    else if ( !__addr_ok(*addr) )
555
0
        rc = X86EMUL_EXCEPTION;
556
0
557
0
    if ( unlikely(rc == X86EMUL_EXCEPTION) )
558
0
        x86_emul_hw_exception(seg != x86_seg_ss ? TRAP_gp_fault
559
0
                                                : TRAP_stack_error,
560
0
                              0, ctxt);
561
0
562
0
    return rc;
563
0
}
564
565
static int rep_ins(uint16_t port,
566
                   enum x86_segment seg, unsigned long offset,
567
                   unsigned int bytes_per_rep, unsigned long *reps,
568
                   struct x86_emulate_ctxt *ctxt)
569
0
{
570
0
    struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
571
0
    struct vcpu *curr = current;
572
0
    struct domain *currd = current->domain;
573
0
    unsigned long goal = *reps;
574
0
    struct segment_register sreg;
575
0
    int rc;
576
0
577
0
    ASSERT(seg == x86_seg_es);
578
0
579
0
    *reps = 0;
580
0
581
0
    if ( !guest_io_okay(port, bytes_per_rep, curr, ctxt->regs) )
582
0
        return X86EMUL_UNHANDLEABLE;
583
0
584
0
    rc = read_segment(x86_seg_es, &sreg, ctxt);
585
0
    if ( rc != X86EMUL_OKAY )
586
0
        return rc;
587
0
588
0
    if ( !sreg.p )
589
0
        return X86EMUL_UNHANDLEABLE;
590
0
    if ( !sreg.s ||
591
0
         (sreg.type & (_SEGMENT_CODE >> 8)) ||
592
0
         !(sreg.type & (_SEGMENT_WR >> 8)) )
593
0
    {
594
0
        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
595
0
        return X86EMUL_EXCEPTION;
596
0
    }
597
0
598
0
    poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep);
599
0
600
0
    while ( *reps < goal )
601
0
    {
602
0
        unsigned int data = guest_io_read(port, bytes_per_rep, currd);
603
0
        unsigned long addr;
604
0
605
0
        rc = pv_emul_virt_to_linear(sreg.base, offset, bytes_per_rep,
606
0
                                    sreg.limit, x86_seg_es, ctxt, &addr);
607
0
        if ( rc != X86EMUL_OKAY )
608
0
            return rc;
609
0
610
0
        if ( (rc = __copy_to_user((void *)addr, &data, bytes_per_rep)) != 0 )
611
0
        {
612
0
            x86_emul_pagefault(PFEC_write_access,
613
0
                               addr + bytes_per_rep - rc, ctxt);
614
0
            return X86EMUL_EXCEPTION;
615
0
        }
616
0
617
0
        ++*reps;
618
0
619
0
        if ( poc->bpmatch || hypercall_preempt_check() )
620
0
            break;
621
0
622
0
        /* x86_emulate() clips the repetition count to ensure we don't wrap. */
623
0
        if ( unlikely(ctxt->regs->eflags & X86_EFLAGS_DF) )
624
0
            offset -= bytes_per_rep;
625
0
        else
626
0
            offset += bytes_per_rep;
627
0
    }
628
0
629
0
    return X86EMUL_OKAY;
630
0
}
631
632
static int rep_outs(enum x86_segment seg, unsigned long offset,
633
                    uint16_t port,
634
                    unsigned int bytes_per_rep, unsigned long *reps,
635
                    struct x86_emulate_ctxt *ctxt)
636
0
{
637
0
    struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
638
0
    struct vcpu *curr = current;
639
0
    struct domain *currd = current->domain;
640
0
    unsigned long goal = *reps;
641
0
    struct segment_register sreg;
642
0
    int rc;
643
0
644
0
    *reps = 0;
645
0
646
0
    if ( !guest_io_okay(port, bytes_per_rep, curr, ctxt->regs) )
647
0
        return X86EMUL_UNHANDLEABLE;
648
0
649
0
    rc = read_segment(seg, &sreg, ctxt);
650
0
    if ( rc != X86EMUL_OKAY )
651
0
        return rc;
652
0
653
0
    if ( !sreg.p )
654
0
        return X86EMUL_UNHANDLEABLE;
655
0
    if ( !sreg.s ||
656
0
         ((sreg.type & (_SEGMENT_CODE >> 8)) &&
657
0
          !(sreg.type & (_SEGMENT_WR >> 8))) )
658
0
    {
659
0
        x86_emul_hw_exception(seg != x86_seg_ss ? TRAP_gp_fault
660
0
                                                : TRAP_stack_error,
661
0
                              0, ctxt);
662
0
        return X86EMUL_EXCEPTION;
663
0
    }
664
0
665
0
    poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep);
666
0
667
0
    while ( *reps < goal )
668
0
    {
669
0
        unsigned int data = 0;
670
0
        unsigned long addr;
671
0
672
0
        rc = pv_emul_virt_to_linear(sreg.base, offset, bytes_per_rep,
673
0
                                    sreg.limit, seg, ctxt, &addr);
674
0
        if ( rc != X86EMUL_OKAY )
675
0
            return rc;
676
0
677
0
        if ( (rc = __copy_from_user(&data, (void *)addr, bytes_per_rep)) != 0 )
678
0
        {
679
0
            x86_emul_pagefault(0, addr + bytes_per_rep - rc, ctxt);
680
0
            return X86EMUL_EXCEPTION;
681
0
        }
682
0
683
0
        guest_io_write(port, bytes_per_rep, data, currd);
684
0
685
0
        ++*reps;
686
0
687
0
        if ( poc->bpmatch || hypercall_preempt_check() )
688
0
            break;
689
0
690
0
        /* x86_emulate() clips the repetition count to ensure we don't wrap. */
691
0
        if ( unlikely(ctxt->regs->eflags & X86_EFLAGS_DF) )
692
0
            offset -= bytes_per_rep;
693
0
        else
694
0
            offset += bytes_per_rep;
695
0
    }
696
0
697
0
    return X86EMUL_OKAY;
698
0
}
699
700
static int read_cr(unsigned int reg, unsigned long *val,
701
                   struct x86_emulate_ctxt *ctxt)
702
0
{
703
0
    const struct vcpu *curr = current;
704
0
705
0
    switch ( reg )
706
0
    {
707
0
    case 0: /* Read CR0 */
708
0
        *val = (read_cr0() & ~X86_CR0_TS) | curr->arch.pv_vcpu.ctrlreg[0];
709
0
        return X86EMUL_OKAY;
710
0
711
0
    case 2: /* Read CR2 */
712
0
    case 4: /* Read CR4 */
713
0
        *val = curr->arch.pv_vcpu.ctrlreg[reg];
714
0
        return X86EMUL_OKAY;
715
0
716
0
    case 3: /* Read CR3 */
717
0
    {
718
0
        const struct domain *currd = curr->domain;
719
0
        mfn_t mfn;
720
0
721
0
        if ( !is_pv_32bit_domain(currd) )
722
0
        {
723
0
            mfn = pagetable_get_mfn(curr->arch.guest_table);
724
0
            *val = xen_pfn_to_cr3(mfn_to_gmfn(currd, mfn_x(mfn)));
725
0
        }
726
0
        else
727
0
        {
728
0
            l4_pgentry_t *pl4e =
729
0
                map_domain_page(pagetable_get_mfn(curr->arch.guest_table));
730
0
731
0
            mfn = l4e_get_mfn(*pl4e);
732
0
            unmap_domain_page(pl4e);
733
0
            *val = compat_pfn_to_cr3(mfn_to_gmfn(currd, mfn_x(mfn)));
734
0
        }
735
0
        /* PTs should not be shared */
736
0
        BUG_ON(page_get_owner(mfn_to_page(mfn)) == dom_cow);
737
0
        return X86EMUL_OKAY;
738
0
    }
739
0
    }
740
0
741
0
    return X86EMUL_UNHANDLEABLE;
742
0
}
743
744
static int write_cr(unsigned int reg, unsigned long val,
745
                    struct x86_emulate_ctxt *ctxt)
746
0
{
747
0
    struct vcpu *curr = current;
748
0
749
0
    switch ( reg )
750
0
    {
751
0
    case 0: /* Write CR0 */
752
0
        if ( (val ^ read_cr0()) & ~X86_CR0_TS )
753
0
        {
754
0
            gdprintk(XENLOG_WARNING,
755
0
                     "Attempt to change unmodifiable CR0 flags\n");
756
0
            break;
757
0
        }
758
0
        do_fpu_taskswitch(!!(val & X86_CR0_TS));
759
0
        return X86EMUL_OKAY;
760
0
761
0
    case 2: /* Write CR2 */
762
0
        curr->arch.pv_vcpu.ctrlreg[2] = val;
763
0
        arch_set_cr2(curr, val);
764
0
        return X86EMUL_OKAY;
765
0
766
0
    case 3: /* Write CR3 */
767
0
    {
768
0
        struct domain *currd = curr->domain;
769
0
        unsigned long gfn;
770
0
        struct page_info *page;
771
0
        int rc;
772
0
773
0
        gfn = !is_pv_32bit_domain(currd)
774
0
              ? xen_cr3_to_pfn(val) : compat_cr3_to_pfn(val);
775
0
        page = get_page_from_gfn(currd, gfn, NULL, P2M_ALLOC);
776
0
        if ( !page )
777
0
            break;
778
0
        rc = new_guest_cr3(page_to_mfn(page));
779
0
        put_page(page);
780
0
781
0
        switch ( rc )
782
0
        {
783
0
        case 0:
784
0
            return X86EMUL_OKAY;
785
0
        case -ERESTART: /* retry after preemption */
786
0
            return X86EMUL_RETRY;
787
0
        }
788
0
        break;
789
0
    }
790
0
791
0
    case 4: /* Write CR4 */
792
0
        curr->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(curr, val);
793
0
        write_cr4(pv_guest_cr4_to_real_cr4(curr));
794
0
        ctxt_switch_levelling(curr);
795
0
        return X86EMUL_OKAY;
796
0
    }
797
0
798
0
    return X86EMUL_UNHANDLEABLE;
799
0
}
800
801
static int read_dr(unsigned int reg, unsigned long *val,
802
                   struct x86_emulate_ctxt *ctxt)
803
0
{
804
0
    unsigned long res = do_get_debugreg(reg);
805
0
806
0
    if ( IS_ERR_VALUE(res) )
807
0
        return X86EMUL_UNHANDLEABLE;
808
0
809
0
    *val = res;
810
0
811
0
    return X86EMUL_OKAY;
812
0
}
813
814
static int write_dr(unsigned int reg, unsigned long val,
815
                    struct x86_emulate_ctxt *ctxt)
816
0
{
817
0
    return do_set_debugreg(reg, val) == 0
818
0
           ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
819
0
}
820
821
static inline uint64_t guest_misc_enable(uint64_t val)
822
0
{
823
0
    val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
824
0
             MSR_IA32_MISC_ENABLE_MONITOR_ENABLE);
825
0
    val |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
826
0
           MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
827
0
           MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
828
0
    return val;
829
0
}
830
831
static inline bool is_cpufreq_controller(const struct domain *d)
832
0
{
833
0
    return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
834
0
            is_hardware_domain(d));
835
0
}
836
837
static int read_msr(unsigned int reg, uint64_t *val,
838
                    struct x86_emulate_ctxt *ctxt)
839
0
{
840
0
    struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
841
0
    const struct vcpu *curr = current;
842
0
    const struct domain *currd = curr->domain;
843
0
    bool vpmu_msr = false;
844
0
    int ret;
845
0
846
0
    if ( (ret = guest_rdmsr(curr, reg, val)) != X86EMUL_UNHANDLEABLE )
847
0
    {
848
0
        if ( ret == X86EMUL_EXCEPTION )
849
0
            x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
850
0
851
0
        return ret;
852
0
    }
853
0
854
0
    switch ( reg )
855
0
    {
856
0
        int rc;
857
0
858
0
    case MSR_FS_BASE:
859
0
        if ( is_pv_32bit_domain(currd) )
860
0
            break;
861
0
        *val = cpu_has_fsgsbase ? __rdfsbase() : curr->arch.pv_vcpu.fs_base;
862
0
        return X86EMUL_OKAY;
863
0
864
0
    case MSR_GS_BASE:
865
0
        if ( is_pv_32bit_domain(currd) )
866
0
            break;
867
0
        *val = cpu_has_fsgsbase ? __rdgsbase()
868
0
                                : curr->arch.pv_vcpu.gs_base_kernel;
869
0
        return X86EMUL_OKAY;
870
0
871
0
    case MSR_SHADOW_GS_BASE:
872
0
        if ( is_pv_32bit_domain(currd) )
873
0
            break;
874
0
        *val = curr->arch.pv_vcpu.gs_base_user;
875
0
        return X86EMUL_OKAY;
876
0
877
0
    /*
878
0
     * In order to fully retain original behavior, defer calling
879
0
     * pv_soft_rdtsc() until after emulation. This may want/need to be
880
0
     * reconsidered.
881
0
     */
882
0
    case MSR_IA32_TSC:
883
0
        poc->tsc |= TSC_BASE;
884
0
        goto normal;
885
0
886
0
    case MSR_TSC_AUX:
887
0
        poc->tsc |= TSC_AUX;
888
0
        if ( cpu_has_rdtscp )
889
0
            goto normal;
890
0
        *val = 0;
891
0
        return X86EMUL_OKAY;
892
0
893
0
    case MSR_EFER:
894
0
        *val = read_efer();
895
0
        if ( is_pv_32bit_domain(currd) )
896
0
            *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE);
897
0
        return X86EMUL_OKAY;
898
0
899
0
    case MSR_K7_FID_VID_CTL:
900
0
    case MSR_K7_FID_VID_STATUS:
901
0
    case MSR_K8_PSTATE_LIMIT:
902
0
    case MSR_K8_PSTATE_CTRL:
903
0
    case MSR_K8_PSTATE_STATUS:
904
0
    case MSR_K8_PSTATE0:
905
0
    case MSR_K8_PSTATE1:
906
0
    case MSR_K8_PSTATE2:
907
0
    case MSR_K8_PSTATE3:
908
0
    case MSR_K8_PSTATE4:
909
0
    case MSR_K8_PSTATE5:
910
0
    case MSR_K8_PSTATE6:
911
0
    case MSR_K8_PSTATE7:
912
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
913
0
            break;
914
0
        if ( unlikely(is_cpufreq_controller(currd)) )
915
0
            goto normal;
916
0
        *val = 0;
917
0
        return X86EMUL_OKAY;
918
0
919
0
    case MSR_IA32_UCODE_REV:
920
0
        BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
921
0
        if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
922
0
        {
923
0
            if ( wrmsr_safe(MSR_IA32_UCODE_REV, 0) )
924
0
                break;
925
0
            /* As documented in the SDM: Do a CPUID 1 here */
926
0
            cpuid_eax(1);
927
0
        }
928
0
        goto normal;
929
0
930
0
    case MSR_IA32_MISC_ENABLE:
931
0
        if ( rdmsr_safe(reg, *val) )
932
0
            break;
933
0
        *val = guest_misc_enable(*val);
934
0
        return X86EMUL_OKAY;
935
0
936
0
    case MSR_AMD64_DR0_ADDRESS_MASK:
937
0
        if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
938
0
            break;
939
0
        *val = curr->arch.pv_vcpu.dr_mask[0];
940
0
        return X86EMUL_OKAY;
941
0
942
0
    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
943
0
        if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
944
0
            break;
945
0
        *val = curr->arch.pv_vcpu.dr_mask[reg - MSR_AMD64_DR1_ADDRESS_MASK + 1];
946
0
        return X86EMUL_OKAY;
947
0
948
0
    case MSR_IA32_PERF_CAPABILITIES:
949
0
        /* No extra capabilities are supported. */
950
0
        *val = 0;
951
0
        return X86EMUL_OKAY;
952
0
953
0
    case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7):
954
0
    case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3):
955
0
    case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
956
0
    case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
957
0
        if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
958
0
        {
959
0
            vpmu_msr = true;
960
0
            /* fall through */
961
0
    case MSR_AMD_FAM15H_EVNTSEL0 ... MSR_AMD_FAM15H_PERFCTR5:
962
0
    case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
963
0
            if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
964
0
            {
965
0
                if ( vpmu_do_rdmsr(reg, val) )
966
0
                    break;
967
0
                return X86EMUL_OKAY;
968
0
            }
969
0
        }
970
0
        /* fall through */
971
0
    default:
972
0
        if ( rdmsr_hypervisor_regs(reg, val) )
973
0
            return X86EMUL_OKAY;
974
0
975
0
        rc = vmce_rdmsr(reg, val);
976
0
        if ( rc < 0 )
977
0
            break;
978
0
        if ( rc )
979
0
            return X86EMUL_OKAY;
980
0
        /* fall through */
981
0
    normal:
982
0
        /* Everyone can read the MSR space. */
983
0
        /* gdprintk(XENLOG_WARNING, "Domain attempted RDMSR %08x\n", reg); */
984
0
        if ( rdmsr_safe(reg, *val) )
985
0
            break;
986
0
        return X86EMUL_OKAY;
987
0
    }
988
0
989
0
    return X86EMUL_UNHANDLEABLE;
990
0
}
991
992
static int write_msr(unsigned int reg, uint64_t val,
993
                     struct x86_emulate_ctxt *ctxt)
994
0
{
995
0
    struct vcpu *curr = current;
996
0
    const struct domain *currd = curr->domain;
997
0
    bool vpmu_msr = false;
998
0
    int ret;
999
0
1000
0
    if ( (ret = guest_wrmsr(curr, reg, val)) != X86EMUL_UNHANDLEABLE )
1001
0
    {
1002
0
        if ( ret == X86EMUL_EXCEPTION )
1003
0
            x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1004
0
1005
0
        return ret;
1006
0
    }
1007
0
1008
0
    switch ( reg )
1009
0
    {
1010
0
        uint64_t temp;
1011
0
        int rc;
1012
0
1013
0
    case MSR_FS_BASE:
1014
0
        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
1015
0
            break;
1016
0
        wrfsbase(val);
1017
0
        curr->arch.pv_vcpu.fs_base = val;
1018
0
        return X86EMUL_OKAY;
1019
0
1020
0
    case MSR_GS_BASE:
1021
0
        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
1022
0
            break;
1023
0
        wrgsbase(val);
1024
0
        curr->arch.pv_vcpu.gs_base_kernel = val;
1025
0
        return X86EMUL_OKAY;
1026
0
1027
0
    case MSR_SHADOW_GS_BASE:
1028
0
        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
1029
0
            break;
1030
0
        wrmsrl(MSR_SHADOW_GS_BASE, val);
1031
0
        curr->arch.pv_vcpu.gs_base_user = val;
1032
0
        return X86EMUL_OKAY;
1033
0
1034
0
    case MSR_K7_FID_VID_STATUS:
1035
0
    case MSR_K7_FID_VID_CTL:
1036
0
    case MSR_K8_PSTATE_LIMIT:
1037
0
    case MSR_K8_PSTATE_CTRL:
1038
0
    case MSR_K8_PSTATE_STATUS:
1039
0
    case MSR_K8_PSTATE0:
1040
0
    case MSR_K8_PSTATE1:
1041
0
    case MSR_K8_PSTATE2:
1042
0
    case MSR_K8_PSTATE3:
1043
0
    case MSR_K8_PSTATE4:
1044
0
    case MSR_K8_PSTATE5:
1045
0
    case MSR_K8_PSTATE6:
1046
0
    case MSR_K8_PSTATE7:
1047
0
    case MSR_K8_HWCR:
1048
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
1049
0
            break;
1050
0
        if ( likely(!is_cpufreq_controller(currd)) ||
1051
0
             wrmsr_safe(reg, val) == 0 )
1052
0
            return X86EMUL_OKAY;
1053
0
        break;
1054
0
1055
0
    case MSR_AMD64_NB_CFG:
1056
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
1057
0
             boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x17 )
1058
0
            break;
1059
0
        if ( !is_hardware_domain(currd) || !is_pinned_vcpu(curr) )
1060
0
            return X86EMUL_OKAY;
1061
0
        if ( (rdmsr_safe(MSR_AMD64_NB_CFG, temp) != 0) ||
1062
0
             ((val ^ temp) & ~(1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT)) )
1063
0
            goto invalid;
1064
0
        if ( wrmsr_safe(MSR_AMD64_NB_CFG, val) == 0 )
1065
0
            return X86EMUL_OKAY;
1066
0
        break;
1067
0
1068
0
    case MSR_FAM10H_MMIO_CONF_BASE:
1069
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
1070
0
             boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x17 )
1071
0
            break;
1072
0
        if ( !is_hardware_domain(currd) || !is_pinned_vcpu(curr) )
1073
0
            return X86EMUL_OKAY;
1074
0
        if ( rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, temp) != 0 )
1075
0
            break;
1076
0
        if ( (pci_probe & PCI_PROBE_MASK) == PCI_PROBE_MMCONF ?
1077
0
             temp != val :
1078
0
             ((temp ^ val) &
1079
0
              ~(FAM10H_MMIO_CONF_ENABLE |
1080
0
                (FAM10H_MMIO_CONF_BUSRANGE_MASK <<
1081
0
                 FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
1082
0
                ((u64)FAM10H_MMIO_CONF_BASE_MASK <<
1083
0
                 FAM10H_MMIO_CONF_BASE_SHIFT))) )
1084
0
            goto invalid;
1085
0
        if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, val) == 0 )
1086
0
            return X86EMUL_OKAY;
1087
0
        break;
1088
0
1089
0
    case MSR_IA32_UCODE_REV:
1090
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
1091
0
            break;
1092
0
        if ( !is_hardware_domain(currd) || !is_pinned_vcpu(curr) )
1093
0
            return X86EMUL_OKAY;
1094
0
        if ( rdmsr_safe(reg, temp) )
1095
0
            break;
1096
0
        if ( val )
1097
0
            goto invalid;
1098
0
        return X86EMUL_OKAY;
1099
0
1100
0
    case MSR_IA32_MISC_ENABLE:
1101
0
        if ( rdmsr_safe(reg, temp) )
1102
0
            break;
1103
0
        if ( val != guest_misc_enable(temp) )
1104
0
            goto invalid;
1105
0
        return X86EMUL_OKAY;
1106
0
1107
0
    case MSR_IA32_MPERF:
1108
0
    case MSR_IA32_APERF:
1109
0
        if ( (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) &&
1110
0
             (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
1111
0
            break;
1112
0
        if ( likely(!is_cpufreq_controller(currd)) ||
1113
0
             wrmsr_safe(reg, val) == 0 )
1114
0
            return X86EMUL_OKAY;
1115
0
        break;
1116
0
1117
0
    case MSR_IA32_PERF_CTL:
1118
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
1119
0
            break;
1120
0
        if ( likely(!is_cpufreq_controller(currd)) ||
1121
0
             wrmsr_safe(reg, val) == 0 )
1122
0
            return X86EMUL_OKAY;
1123
0
        break;
1124
0
1125
0
    case MSR_IA32_THERM_CONTROL:
1126
0
    case MSR_IA32_ENERGY_PERF_BIAS:
1127
0
        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
1128
0
            break;
1129
0
        if ( !is_hardware_domain(currd) || !is_pinned_vcpu(curr) ||
1130
0
             wrmsr_safe(reg, val) == 0 )
1131
0
            return X86EMUL_OKAY;
1132
0
        break;
1133
0
1134
0
    case MSR_AMD64_DR0_ADDRESS_MASK:
1135
0
        if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (val >> 32) )
1136
0
            break;
1137
0
        curr->arch.pv_vcpu.dr_mask[0] = val;
1138
0
        if ( curr->arch.debugreg[7] & DR7_ACTIVE_MASK )
1139
0
            wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, val);
1140
0
        return X86EMUL_OKAY;
1141
0
1142
0
    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
1143
0
        if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (val >> 32) )
1144
0
            break;
1145
0
        curr->arch.pv_vcpu.dr_mask[reg - MSR_AMD64_DR1_ADDRESS_MASK + 1] = val;
1146
0
        if ( curr->arch.debugreg[7] & DR7_ACTIVE_MASK )
1147
0
            wrmsrl(reg, val);
1148
0
        return X86EMUL_OKAY;
1149
0
1150
0
    case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7):
1151
0
    case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3):
1152
0
    case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
1153
0
    case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
1154
0
        if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
1155
0
        {
1156
0
            vpmu_msr = true;
1157
0
    case MSR_AMD_FAM15H_EVNTSEL0 ... MSR_AMD_FAM15H_PERFCTR5:
1158
0
    case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
1159
0
            if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
1160
0
            {
1161
0
                if ( (vpmu_mode & XENPMU_MODE_ALL) &&
1162
0
                     !is_hardware_domain(currd) )
1163
0
                    return X86EMUL_OKAY;
1164
0
1165
0
                if ( vpmu_do_wrmsr(reg, val, 0) )
1166
0
                    break;
1167
0
                return X86EMUL_OKAY;
1168
0
            }
1169
0
        }
1170
0
        /* fall through */
1171
0
    default:
1172
0
        if ( wrmsr_hypervisor_regs(reg, val) == 1 )
1173
0
            return X86EMUL_OKAY;
1174
0
1175
0
        rc = vmce_wrmsr(reg, val);
1176
0
        if ( rc < 0 )
1177
0
            break;
1178
0
        if ( rc )
1179
0
            return X86EMUL_OKAY;
1180
0
1181
0
        if ( (rdmsr_safe(reg, temp) != 0) || (val != temp) )
1182
0
    invalid:
1183
0
            gdprintk(XENLOG_WARNING,
1184
0
                     "Domain attempted WRMSR %08x from 0x%016"PRIx64" to 0x%016"PRIx64"\n",
1185
0
                     reg, temp, val);
1186
0
        return X86EMUL_OKAY;
1187
0
    }
1188
0
1189
0
    return X86EMUL_UNHANDLEABLE;
1190
0
}
1191
1192
/* Name it differently to avoid clashing with wbinvd() */
1193
static int _wbinvd(struct x86_emulate_ctxt *ctxt)
1194
0
{
1195
0
    /* Ignore the instruction if unprivileged. */
1196
0
    if ( !cache_flush_permitted(current->domain) )
1197
0
        /*
1198
0
         * Non-physdev domain attempted WBINVD; ignore for now since
1199
0
         * newer linux uses this in some start-of-day timing loops.
1200
0
         */
1201
0
        ;
1202
0
    else
1203
0
        wbinvd();
1204
0
1205
0
    return X86EMUL_OKAY;
1206
0
}
1207
1208
int pv_emul_cpuid(uint32_t leaf, uint32_t subleaf,
1209
                  struct cpuid_leaf *res, struct x86_emulate_ctxt *ctxt)
1210
0
{
1211
0
    guest_cpuid(current, leaf, subleaf, res);
1212
0
1213
0
    return X86EMUL_OKAY;
1214
0
}
1215
1216
static int validate(const struct x86_emulate_state *state,
1217
                    struct x86_emulate_ctxt *ctxt)
1218
0
{
1219
0
    switch ( ctxt->opcode )
1220
0
    {
1221
0
    case 0x6c ... 0x6f: /* ins / outs */
1222
0
    case 0xe4 ... 0xe7: /* in / out (immediate port) */
1223
0
    case 0xec ... 0xef: /* in / out (port in %dx) */
1224
0
    case X86EMUL_OPC(0x0f, 0x06): /* clts */
1225
0
    case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */
1226
0
    case X86EMUL_OPC(0x0f, 0x20) ...
1227
0
         X86EMUL_OPC(0x0f, 0x23): /* mov to/from cr/dr */
1228
0
    case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
1229
0
    case X86EMUL_OPC(0x0f, 0x31): /* rdtsc */
1230
0
    case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */
1231
0
    case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
1232
0
        return X86EMUL_OKAY;
1233
0
1234
0
    case 0xfa: case 0xfb: /* cli / sti */
1235
0
        if ( !iopl_ok(current, ctxt->regs) )
1236
0
            break;
1237
0
        /*
1238
0
         * This is just too dangerous to allow, in my opinion. Consider if the
1239
0
         * caller then tries to reenable interrupts using POPF: we can't trap
1240
0
         * that and we'll end up with hard-to-debug lockups. Fast & loose will
1241
0
         * do for us. :-)
1242
0
        vcpu_info(current, evtchn_upcall_mask) = (ctxt->opcode == 0xfa);
1243
0
         */
1244
0
        return X86EMUL_DONE;
1245
0
1246
0
    case X86EMUL_OPC(0x0f, 0x01):
1247
0
    {
1248
0
        unsigned int modrm_rm, modrm_reg;
1249
0
1250
0
        if ( x86_insn_modrm(state, &modrm_rm, &modrm_reg) != 3 ||
1251
0
             (modrm_rm & 7) != 1 )
1252
0
            break;
1253
0
        switch ( modrm_reg & 7 )
1254
0
        {
1255
0
        case 2: /* xsetbv */
1256
0
        case 7: /* rdtscp */
1257
0
            return X86EMUL_OKAY;
1258
0
        }
1259
0
        break;
1260
0
    }
1261
0
    }
1262
0
1263
0
    return X86EMUL_UNHANDLEABLE;
1264
0
}
1265
1266
static int insn_fetch(enum x86_segment seg,
1267
                      unsigned long offset,
1268
                      void *p_data,
1269
                      unsigned int bytes,
1270
                      struct x86_emulate_ctxt *ctxt)
1271
0
{
1272
0
    const struct priv_op_ctxt *poc =
1273
0
        container_of(ctxt, struct priv_op_ctxt, ctxt);
1274
0
    unsigned int rc;
1275
0
    unsigned long addr = poc->cs.base + offset;
1276
0
1277
0
    ASSERT(seg == x86_seg_cs);
1278
0
1279
0
    /* We don't mean to emulate any branches. */
1280
0
    if ( !bytes )
1281
0
        return X86EMUL_UNHANDLEABLE;
1282
0
1283
0
    rc = pv_emul_virt_to_linear(poc->cs.base, offset, bytes, poc->cs.limit,
1284
0
                                x86_seg_cs, ctxt, &addr);
1285
0
    if ( rc != X86EMUL_OKAY )
1286
0
        return rc;
1287
0
1288
0
    if ( (rc = __copy_from_user(p_data, (void *)addr, bytes)) != 0 )
1289
0
    {
1290
0
        /*
1291
0
         * TODO: This should report PFEC_insn_fetch when goc->insn_fetch &&
1292
0
         * cpu_has_nx, but we'd then need a "fetch" variant of
1293
0
         * __copy_from_user() respecting NX, SMEP, and protection keys.
1294
0
         */
1295
0
        x86_emul_pagefault(0, addr + bytes - rc, ctxt);
1296
0
        return X86EMUL_EXCEPTION;
1297
0
    }
1298
0
1299
0
    return X86EMUL_OKAY;
1300
0
}
1301
1302
1303
static const struct x86_emulate_ops priv_op_ops = {
1304
    .insn_fetch          = insn_fetch,
1305
    .read                = x86emul_unhandleable_rw,
1306
    .validate            = validate,
1307
    .read_io             = read_io,
1308
    .write_io            = write_io,
1309
    .rep_ins             = rep_ins,
1310
    .rep_outs            = rep_outs,
1311
    .read_segment        = read_segment,
1312
    .read_cr             = read_cr,
1313
    .write_cr            = write_cr,
1314
    .read_dr             = read_dr,
1315
    .write_dr            = write_dr,
1316
    .read_msr            = read_msr,
1317
    .write_msr           = write_msr,
1318
    .cpuid               = pv_emul_cpuid,
1319
    .wbinvd              = _wbinvd,
1320
};
1321
1322
int pv_emulate_privileged_op(struct cpu_user_regs *regs)
1323
0
{
1324
0
    struct vcpu *curr = current;
1325
0
    struct domain *currd = curr->domain;
1326
0
    struct priv_op_ctxt ctxt = {
1327
0
        .ctxt.regs = regs,
1328
0
        .ctxt.vendor = currd->arch.cpuid->x86_vendor,
1329
0
        .ctxt.lma = !is_pv_32bit_domain(currd),
1330
0
    };
1331
0
    int rc;
1332
0
    unsigned int eflags, ar;
1333
0
1334
0
    if ( !pv_emul_read_descriptor(regs->cs, curr, &ctxt.cs.base,
1335
0
                                  &ctxt.cs.limit, &ar, 1) ||
1336
0
         !(ar & _SEGMENT_S) ||
1337
0
         !(ar & _SEGMENT_P) ||
1338
0
         !(ar & _SEGMENT_CODE) )
1339
0
        return 0;
1340
0
1341
0
    /* Mirror virtualized state into EFLAGS. */
1342
0
    ASSERT(regs->eflags & X86_EFLAGS_IF);
1343
0
    if ( vcpu_info(curr, evtchn_upcall_mask) )
1344
0
        regs->eflags &= ~X86_EFLAGS_IF;
1345
0
    else
1346
0
        regs->eflags |= X86_EFLAGS_IF;
1347
0
    ASSERT(!(regs->eflags & X86_EFLAGS_IOPL));
1348
0
    regs->eflags |= curr->arch.pv_vcpu.iopl;
1349
0
    eflags = regs->eflags;
1350
0
1351
0
    ctxt.ctxt.addr_size = ar & _SEGMENT_L ? 64 : ar & _SEGMENT_DB ? 32 : 16;
1352
0
    /* Leave zero in ctxt.ctxt.sp_size, as it's not needed. */
1353
0
    rc = x86_emulate(&ctxt.ctxt, &priv_op_ops);
1354
0
1355
0
    if ( ctxt.io_emul_stub )
1356
0
        unmap_domain_page(ctxt.io_emul_stub);
1357
0
1358
0
    /*
1359
0
     * Un-mirror virtualized state from EFLAGS.
1360
0
     * Nothing we allow to be emulated can change anything other than the
1361
0
     * arithmetic bits, and the resume flag.
1362
0
     */
1363
0
    ASSERT(!((regs->eflags ^ eflags) &
1364
0
             ~(X86_EFLAGS_RF | X86_EFLAGS_ARITH_MASK)));
1365
0
    regs->eflags |= X86_EFLAGS_IF;
1366
0
    regs->eflags &= ~X86_EFLAGS_IOPL;
1367
0
1368
0
    switch ( rc )
1369
0
    {
1370
0
    case X86EMUL_OKAY:
1371
0
        if ( ctxt.tsc & TSC_BASE )
1372
0
        {
1373
0
            if ( ctxt.tsc & TSC_AUX )
1374
0
                pv_soft_rdtsc(curr, regs, 1);
1375
0
            else if ( currd->arch.vtsc )
1376
0
                pv_soft_rdtsc(curr, regs, 0);
1377
0
            else
1378
0
                msr_split(regs, rdtsc());
1379
0
        }
1380
0
1381
0
        if ( ctxt.ctxt.retire.singlestep )
1382
0
            ctxt.bpmatch |= DR_STEP;
1383
0
        if ( ctxt.bpmatch )
1384
0
        {
1385
0
            curr->arch.debugreg[6] |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE;
1386
0
            if ( !(curr->arch.pv_vcpu.trap_bounce.flags & TBF_EXCEPTION) )
1387
0
                pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
1388
0
        }
1389
0
        /* fall through */
1390
0
    case X86EMUL_RETRY:
1391
0
        return EXCRET_fault_fixed;
1392
0
1393
0
    case X86EMUL_EXCEPTION:
1394
0
        pv_inject_event(&ctxt.ctxt.event);
1395
0
        return EXCRET_fault_fixed;
1396
0
    }
1397
0
1398
0
    return 0;
1399
0
}
1400
1401
/*
1402
 * Local variables:
1403
 * mode: C
1404
 * c-file-style: "BSD"
1405
 * c-basic-offset: 4
1406
 * tab-width: 4
1407
 * indent-tabs-mode: nil
1408
 * End:
1409
 */