Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/x86_emulate/x86_emulate.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * x86_emulate.c
3
 * 
4
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5
 * 
6
 * Copyright (c) 2005-2007 Keir Fraser
7
 * Copyright (c) 2005-2007 XenSource Inc.
8
 * 
9
 * This program is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 2 of the License, or
12
 * (at your option) any later version.
13
 * 
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 * 
19
 * You should have received a copy of the GNU General Public License
20
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
/* Operand sizes: 8-bit operands or specified/overridden size. */
24
117k
#define ByteOp      (1<<0) /* 8-bit operands. */
25
/* Destination operand type. */
26
0
#define DstNone     (0<<1) /* No destination operand. */
27
0
#define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
28
0
#define DstBitBase  (1<<1) /* Memory operand, bit string. */
29
56.0k
#define DstReg      (2<<1) /* Register operand. */
30
0
#define DstEax      DstReg /* Register EAX (aka DstReg with no ModRM) */
31
4.11k
#define DstMem      (3<<1) /* Memory operand. */
32
60.1k
#define DstMask     (3<<1)
33
/* Source operand type. */
34
0
#define SrcNone     (0<<3) /* No source operand. */
35
0
#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
36
4.11k
#define SrcReg      (1<<3) /* Register operand. */
37
#define SrcEax      SrcReg /* Register EAX (aka SrcReg with no ModRM) */
38
53.1k
#define SrcMem      (2<<3) /* Memory operand. */
39
2.87k
#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
40
12
#define SrcImm      (4<<3) /* Immediate operand. */
41
0
#define SrcImmByte  (5<<3) /* 8-bit sign-extended immediate operand. */
42
0
#define SrcImm16    (6<<3) /* 16-bit zero-extended immediate operand. */
43
120k
#define SrcMask     (7<<3)
44
/* Generic ModRM decode. */
45
180k
#define ModRM       (1<<6)
46
/* vSIB addressing mode (0f38 extension opcodes only), aliasing ModRM. */
47
58.3k
#define vSIB        (1<<6)
48
/* Destination is only written; never read. */
49
8.23k
#define Mov         (1<<7)
50
/* VEX/EVEX (SIMD only): 2nd source operand unused (must be all ones) */
51
0
#define TwoOp       Mov
52
/* All operands are implicit in the opcode. */
53
#define ImplicitOps (DstImplicit|SrcImplicit)
54
55
typedef uint8_t opcode_desc_t;
56
57
static const opcode_desc_t opcode_table[256] = {
58
    /* 0x00 - 0x07 */
59
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
60
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
61
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
62
    /* 0x08 - 0x0F */
63
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
64
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
65
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, 0,
66
    /* 0x10 - 0x17 */
67
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
68
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
69
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
70
    /* 0x18 - 0x1F */
71
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
72
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
73
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
74
    /* 0x20 - 0x27 */
75
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
76
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
77
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
78
    /* 0x28 - 0x2F */
79
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
80
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
81
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
82
    /* 0x30 - 0x37 */
83
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
84
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
85
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
86
    /* 0x38 - 0x3F */
87
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
88
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
89
    ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
90
    /* 0x40 - 0x4F */
91
    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
92
    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
93
    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
94
    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
95
    /* 0x50 - 0x5F */
96
    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
97
    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
98
    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
99
    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
100
    /* 0x60 - 0x67 */
101
    ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcNone|ModRM|Mov,
102
    0, 0, 0, 0,
103
    /* 0x68 - 0x6F */
104
    DstImplicit|SrcImm|Mov, DstReg|SrcImm|ModRM|Mov,
105
    DstImplicit|SrcImmByte|Mov, DstReg|SrcImmByte|ModRM|Mov,
106
    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
107
    /* 0x70 - 0x77 */
108
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
109
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
110
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
111
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
112
    /* 0x78 - 0x7F */
113
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
114
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
115
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
116
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
117
    /* 0x80 - 0x87 */
118
    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
119
    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
120
    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
121
    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
122
    /* 0x88 - 0x8F */
123
    ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
124
    ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
125
    DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM,
126
    DstReg|SrcMem16|ModRM|Mov, DstMem|SrcNone|ModRM|Mov,
127
    /* 0x90 - 0x97 */
128
    DstImplicit|SrcEax, DstImplicit|SrcEax,
129
    DstImplicit|SrcEax, DstImplicit|SrcEax,
130
    DstImplicit|SrcEax, DstImplicit|SrcEax,
131
    DstImplicit|SrcEax, DstImplicit|SrcEax,
132
    /* 0x98 - 0x9F */
133
    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
134
    ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps, ImplicitOps,
135
    /* 0xA0 - 0xA7 */
136
    ByteOp|DstEax|SrcMem|Mov, DstEax|SrcMem|Mov,
137
    ByteOp|DstMem|SrcEax|Mov, DstMem|SrcEax|Mov,
138
    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
139
    ByteOp|ImplicitOps, ImplicitOps,
140
    /* 0xA8 - 0xAF */
141
    ByteOp|DstEax|SrcImm, DstEax|SrcImm,
142
    ByteOp|DstImplicit|SrcEax|Mov, DstImplicit|SrcEax|Mov,
143
    ByteOp|DstEax|SrcImplicit|Mov, DstEax|SrcImplicit|Mov,
144
    ByteOp|DstImplicit|SrcEax, DstImplicit|SrcEax,
145
    /* 0xB0 - 0xB7 */
146
    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
147
    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
148
    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
149
    ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
150
    /* 0xB8 - 0xBF */
151
    DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
152
    DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
153
    /* 0xC0 - 0xC7 */
154
    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
155
    DstImplicit|SrcImm16, ImplicitOps,
156
    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
157
    ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov,
158
    /* 0xC8 - 0xCF */
159
    DstImplicit|SrcImm16, ImplicitOps, DstImplicit|SrcImm16, ImplicitOps,
160
    ImplicitOps, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
161
    /* 0xD0 - 0xD7 */
162
    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
163
    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
164
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
165
    /* 0xD8 - 0xDF */
166
    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
167
    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
168
    ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
169
    DstImplicit|SrcMem16|ModRM, ImplicitOps|ModRM|Mov,
170
    /* 0xE0 - 0xE7 */
171
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
172
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
173
    DstEax|SrcImmByte, DstEax|SrcImmByte,
174
    DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
175
    /* 0xE8 - 0xEF */
176
    DstImplicit|SrcImm|Mov, DstImplicit|SrcImm,
177
    ImplicitOps, DstImplicit|SrcImmByte,
178
    DstEax|SrcImplicit, DstEax|SrcImplicit, ImplicitOps, ImplicitOps,
179
    /* 0xF0 - 0xF7 */
180
    0, ImplicitOps, 0, 0,
181
    ImplicitOps, ImplicitOps, ByteOp|ModRM, ModRM,
182
    /* 0xF8 - 0xFF */
183
    ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
184
    ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
185
};
186
187
enum simd_opsize {
188
    simd_none,
189
190
    /*
191
     * Ordinary packed integers:
192
     * - 64 bits without prefix 66 (MMX)
193
     * - 128 bits with prefix 66 (SSEn)
194
     * - 128/256 bits depending on VEX.L (AVX)
195
     */
196
    simd_packed_int,
197
198
    /*
199
     * Ordinary packed/scalar floating point:
200
     * - 128 bits without prefix or with prefix 66 (SSEn)
201
     * - 128/256 bits depending on VEX.L (AVX)
202
     * - 32 bits with prefix F3 (scalar single)
203
     * - 64 bits with prefix F2 (scalar doubgle)
204
     */
205
    simd_any_fp,
206
207
    /*
208
     * Packed floating point:
209
     * - 128 bits without prefix or with prefix 66 (SSEn)
210
     * - 128/256 bits depending on VEX.L (AVX)
211
     */
212
    simd_packed_fp,
213
214
    /*
215
     * Single precision packed/scalar floating point:
216
     * - 128 bits without prefix (SSEn)
217
     * - 128/256 bits depending on VEX.L, no prefix (AVX)
218
     * - 32 bits with prefix F3 (scalar)
219
     */
220
    simd_single_fp,
221
222
    /*
223
     * Scalar floating point:
224
     * - 32 bits with low opcode bit clear (scalar single)
225
     * - 64 bits with low opcode bit set (scalar double)
226
     */
227
    simd_scalar_fp,
228
229
    /*
230
     * 128 bits of integer or floating point data, with no further
231
     * formatting information.
232
     */
233
    simd_128,
234
235
    /* Operand size encoded in non-standard way. */
236
    simd_other
237
};
238
typedef uint8_t simd_opsize_t;
239
240
static const struct {
241
    opcode_desc_t desc;
242
    simd_opsize_t size;
243
} twobyte_table[256] = {
244
    [0x00] = { ModRM },
245
    [0x01] = { ImplicitOps|ModRM },
246
    [0x02] = { DstReg|SrcMem16|ModRM },
247
    [0x03] = { DstReg|SrcMem16|ModRM },
248
    [0x05] = { ImplicitOps },
249
    [0x06] = { ImplicitOps },
250
    [0x07] = { ImplicitOps },
251
    [0x08] = { ImplicitOps },
252
    [0x09] = { ImplicitOps },
253
    [0x0b] = { ImplicitOps },
254
    [0x0d] = { ImplicitOps|ModRM },
255
    [0x0e] = { ImplicitOps },
256
    [0x0f] = { ModRM|SrcImmByte },
257
    [0x10] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp },
258
    [0x11] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp },
259
    [0x12] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
260
    [0x13] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
261
    [0x14 ... 0x15] = { DstImplicit|SrcMem|ModRM, simd_packed_fp },
262
    [0x16] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
263
    [0x17] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
264
    [0x18 ... 0x1f] = { ImplicitOps|ModRM },
265
    [0x20 ... 0x21] = { DstMem|SrcImplicit|ModRM },
266
    [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
267
    [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp },
268
    [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp },
269
    [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
270
    [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp },
271
    [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
272
    [0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp },
273
    [0x30 ... 0x35] = { ImplicitOps },
274
    [0x37] = { ImplicitOps },
275
    [0x38] = { DstReg|SrcMem|ModRM },
276
    [0x3a] = { DstReg|SrcImmByte|ModRM },
277
    [0x40 ... 0x4f] = { DstReg|SrcMem|ModRM|Mov },
278
    [0x50] = { DstReg|SrcImplicit|ModRM|Mov },
279
    [0x51] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_any_fp },
280
    [0x52 ... 0x53] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_single_fp },
281
    [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp },
282
    [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp },
283
    [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
284
    [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp },
285
    [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other },
286
    [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
287
    [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other },
288
    [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
289
    [0x6e] = { DstImplicit|SrcMem|ModRM|Mov },
290
    [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int },
291
    [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other },
292
    [0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM },
293
    [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
294
    [0x77] = { DstImplicit|SrcNone },
295
    [0x78] = { ImplicitOps|ModRM },
296
    [0x79] = { DstReg|SrcMem|ModRM, simd_packed_int },
297
    [0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
298
    [0x7e] = { DstMem|SrcImplicit|ModRM|Mov },
299
    [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int },
300
    [0x80 ... 0x8f] = { DstImplicit|SrcImm },
301
    [0x90 ... 0x9f] = { ByteOp|DstMem|SrcNone|ModRM|Mov },
302
    [0xa0 ... 0xa1] = { ImplicitOps|Mov },
303
    [0xa2] = { ImplicitOps },
304
    [0xa3] = { DstBitBase|SrcReg|ModRM },
305
    [0xa4] = { DstMem|SrcImmByte|ModRM },
306
    [0xa5] = { DstMem|SrcReg|ModRM },
307
    [0xa6 ... 0xa7] = { ModRM },
308
    [0xa8 ... 0xa9] = { ImplicitOps|Mov },
309
    [0xaa] = { ImplicitOps },
310
    [0xab] = { DstBitBase|SrcReg|ModRM },
311
    [0xac] = { DstMem|SrcImmByte|ModRM },
312
    [0xad] = { DstMem|SrcReg|ModRM },
313
    [0xae] = { ImplicitOps|ModRM },
314
    [0xaf] = { DstReg|SrcMem|ModRM },
315
    [0xb0] = { ByteOp|DstMem|SrcReg|ModRM },
316
    [0xb1] = { DstMem|SrcReg|ModRM },
317
    [0xb2] = { DstReg|SrcMem|ModRM|Mov },
318
    [0xb3] = { DstBitBase|SrcReg|ModRM },
319
    [0xb4 ... 0xb5] = { DstReg|SrcMem|ModRM|Mov },
320
    [0xb6] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
321
    [0xb7] = { DstReg|SrcMem16|ModRM|Mov },
322
    [0xb8] = { DstReg|SrcMem|ModRM },
323
    [0xb9] = { ModRM },
324
    [0xba] = { DstBitBase|SrcImmByte|ModRM },
325
    [0xbb] = { DstBitBase|SrcReg|ModRM },
326
    [0xbc ... 0xbd] = { DstReg|SrcMem|ModRM },
327
    [0xbe] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
328
    [0xbf] = { DstReg|SrcMem16|ModRM|Mov },
329
    [0xc0] = { ByteOp|DstMem|SrcReg|ModRM },
330
    [0xc1] = { DstMem|SrcReg|ModRM },
331
    [0xc2] = { DstImplicit|SrcImmByte|ModRM, simd_any_fp },
332
    [0xc3] = { DstMem|SrcReg|ModRM|Mov },
333
    [0xc4] = { DstReg|SrcImmByte|ModRM, simd_packed_int },
334
    [0xc5] = { DstReg|SrcImmByte|ModRM|Mov },
335
    [0xc6] = { DstImplicit|SrcImmByte|ModRM, simd_packed_fp },
336
    [0xc7] = { ImplicitOps|ModRM },
337
    [0xc8 ... 0xcf] = { ImplicitOps },
338
    [0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
339
    [0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
340
    [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
341
    [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
342
    [0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
343
    [0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
344
    [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
345
    [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
346
    [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
347
    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
348
    [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int },
349
    [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
350
    [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
351
    [0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
352
    [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
353
    [0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
354
    [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
355
    [0xff] = { ModRM }
356
};
357
358
/*
359
 * "two_op" and "four_op" below refer to the number of register operands
360
 * (one of which possibly also allowing to be a memory one). The named
361
 * operand counts do not include any immediate operands.
362
 */
363
static const struct {
364
    uint8_t simd_size:5;
365
    uint8_t to_mem:1;
366
    uint8_t two_op:1;
367
    uint8_t vsib:1;
368
} ext0f38_table[256] = {
369
    [0x00 ... 0x0b] = { .simd_size = simd_packed_int },
370
    [0x0c ... 0x0f] = { .simd_size = simd_packed_fp },
371
    [0x10] = { .simd_size = simd_packed_int },
372
    [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
373
    [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
374
    [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 },
375
    [0x1a] = { .simd_size = simd_128, .two_op = 1 },
376
    [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
377
    [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
378
    [0x28 ... 0x29] = { .simd_size = simd_packed_int },
379
    [0x2a] = { .simd_size = simd_packed_int, .two_op = 1 },
380
    [0x2b] = { .simd_size = simd_packed_int },
381
    [0x2c ... 0x2d] = { .simd_size = simd_other },
382
    [0x2e ... 0x2f] = { .simd_size = simd_other, .to_mem = 1 },
383
    [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
384
    [0x37 ... 0x3f] = { .simd_size = simd_packed_int },
385
    [0x40] = { .simd_size = simd_packed_int },
386
    [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
387
    [0xc8 ... 0xcd] = { .simd_size = simd_other },
388
    [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
389
    [0xdc ... 0xdf] = { .simd_size = simd_packed_int },
390
    [0xf0] = { .two_op = 1 },
391
    [0xf1] = { .to_mem = 1, .two_op = 1 },
392
    [0xf2 ... 0xf3] = {},
393
    [0xf5 ... 0xf7] = {},
394
};
395
396
/* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
397
static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
398
399
static const struct {
400
    uint8_t simd_size:5;
401
    uint8_t to_mem:1;
402
    uint8_t two_op:1;
403
    uint8_t four_op:1;
404
} ext0f3a_table[256] = {
405
    [0x04 ... 0x05] = { .simd_size = simd_packed_fp, .two_op = 1 },
406
    [0x06] = { .simd_size = simd_packed_fp },
407
    [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1 },
408
    [0x0a ... 0x0b] = { .simd_size = simd_scalar_fp },
409
    [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
410
    [0x0e ... 0x0f] = { .simd_size = simd_packed_int },
411
    [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 },
412
    [0x18] = { .simd_size = simd_128 },
413
    [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
414
    [0x20] = { .simd_size = simd_none },
415
    [0x21] = { .simd_size = simd_other },
416
    [0x22] = { .simd_size = simd_none },
417
    [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
418
    [0x42] = { .simd_size = simd_packed_int },
419
    [0x44] = { .simd_size = simd_packed_int },
420
    [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
421
    [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
422
    [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
423
    [0xcc] = { .simd_size = simd_other },
424
    [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
425
    [0xf0] = {},
426
};
427
428
static const opcode_desc_t xop_table[] = {
429
    DstReg|SrcImmByte|ModRM,
430
    DstReg|SrcMem|ModRM,
431
    DstReg|SrcImm|ModRM,
432
};
433
434
0
#define REX_PREFIX 0x40
435
0
#define REX_B 0x01
436
0
#define REX_X 0x02
437
0
#define REX_R 0x04
438
60.1k
#define REX_W 0x08
439
440
0
#define vex_none 0
441
442
enum vex_opcx {
443
    vex_0f = vex_none + 1,
444
    vex_0f38,
445
    vex_0f3a,
446
};
447
448
enum vex_pfx {
449
    vex_66 = vex_none + 1,
450
    vex_f3,
451
    vex_f2
452
};
453
454
0
#define VEX_PREFIX_DOUBLE_MASK 0x1
455
0
#define VEX_PREFIX_SCALAR_MASK 0x2
456
457
static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 };
458
459
union vex {
460
    uint8_t raw[2];
461
    struct {
462
        uint8_t opcx:5;
463
        uint8_t b:1;
464
        uint8_t x:1;
465
        uint8_t r:1;
466
        uint8_t pfx:2;
467
        uint8_t l:1;
468
        uint8_t reg:4;
469
        uint8_t w:1;
470
    };
471
};
472
473
#ifdef __x86_64__
474
0
# define PFX2 REX_PREFIX
475
#else
476
# define PFX2 0x3e
477
#endif
478
0
#define PFX_BYTES 3
479
0
#define init_prefixes(stub) ({ \
480
0
    uint8_t *buf_ = get_stub(stub); \
481
0
    buf_[0] = 0x3e; \
482
0
    buf_[1] = PFX2; \
483
0
    buf_[2] = 0x0f; \
484
0
    buf_ + 3; \
485
0
})
486
487
0
#define copy_VEX(ptr, vex) ({ \
488
0
    if ( !mode_64bit() ) \
489
0
        (vex).reg |= 8; \
490
0
    (ptr)[0 - PFX_BYTES] = 0xc4; \
491
0
    (ptr)[1 - PFX_BYTES] = (vex).raw[0]; \
492
0
    (ptr)[2 - PFX_BYTES] = (vex).raw[1]; \
493
0
    container_of((ptr) + 1 - PFX_BYTES, typeof(vex), raw[0]); \
494
0
})
495
496
0
#define copy_REX_VEX(ptr, rex, vex) do { \
497
0
    if ( (vex).opcx != vex_none ) \
498
0
        copy_VEX(ptr, vex); \
499
0
    else \
500
0
    { \
501
0
        if ( (vex).pfx ) \
502
0
            (ptr)[0 - PFX_BYTES] = sse_prefix[(vex).pfx - 1]; \
503
0
        /* \
504
0
         * "rex" is always zero for other than 64-bit mode, so OR-ing it \
505
0
         * into any prefix (and not just REX_PREFIX) is safe on 32-bit \
506
0
         * (test harness) builds. \
507
0
         */ \
508
0
        (ptr)[1 - PFX_BYTES] |= rex; \
509
0
    } \
510
0
} while (0)
511
512
union evex {
513
    uint8_t raw[3];
514
    struct {
515
        uint8_t opcx:2;
516
        uint8_t mbz:2;
517
        uint8_t R:1;
518
        uint8_t b:1;
519
        uint8_t x:1;
520
        uint8_t r:1;
521
        uint8_t pfx:2;
522
        uint8_t mbs:1;
523
        uint8_t reg:4;
524
        uint8_t w:1;
525
        uint8_t opmsk:3;
526
        uint8_t RX:1;
527
        uint8_t bcst:1;
528
        uint8_t lr:2;
529
        uint8_t z:1;
530
    };
531
};
532
533
0
#define rep_prefix()   (vex.pfx >= vex_f3)
534
0
#define repe_prefix()  (vex.pfx == vex_f3)
535
0
#define repne_prefix() (vex.pfx == vex_f2)
536
537
/* Type, address-of, and value of an instruction's operand. */
538
struct operand {
539
    enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
540
    unsigned int bytes;
541
542
    /* Operand value. */
543
    unsigned long val;
544
545
    /* Original operand value. */
546
    unsigned long orig_val;
547
548
    /* OP_REG: Pointer to register field. */
549
    unsigned long *reg;
550
551
    /* OP_MEM: Segment and offset. */
552
    struct {
553
        enum x86_segment seg;
554
        unsigned long    off;
555
    } mem;
556
};
557
558
struct x86_emulate_state {
559
    unsigned int op_bytes, ad_bytes;
560
561
    enum {
562
        ext_none = vex_none,
563
        ext_0f   = vex_0f,
564
        ext_0f38 = vex_0f38,
565
        ext_0f3a = vex_0f3a,
566
        /*
567
         * For XOP use values such that the respective instruction field
568
         * can be used without adjustment.
569
         */
570
        ext_8f08 = 8,
571
        ext_8f09,
572
        ext_8f0a,
573
    } ext;
574
    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
575
    uint8_t rex_prefix;
576
    bool lock_prefix;
577
    bool not_64bit; /* Instruction not available in 64bit. */
578
    bool fpu_ctrl;  /* Instruction is an FPU control one. */
579
    opcode_desc_t desc;
580
    union vex vex;
581
    union evex evex;
582
    enum simd_opsize simd_size;
583
584
    /*
585
     * Data operand effective address (usually computed from ModRM).
586
     * Default is a memory operand relative to segment DS.
587
     */
588
    struct operand ea;
589
590
    /* Immediate operand values, if any. Use otherwise unused fields. */
591
12
#define imm1 ea.val
592
0
#define imm2 ea.orig_val
593
594
    unsigned long ip;
595
    struct cpu_user_regs *regs;
596
597
#ifndef NDEBUG
598
    /*
599
     * Track caller of x86_decode_insn() to spot missing as well as
600
     * premature calls to x86_emulate_free_state().
601
     */
602
    void *caller;
603
#endif
604
};
605
606
#ifdef __x86_64__
607
180k
#define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
608
#else
609
#define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
610
#endif
611
612
typedef union {
613
    uint64_t mmx;
614
    uint64_t __attribute__ ((aligned(16))) xmm[2];
615
    uint64_t __attribute__ ((aligned(32))) ymm[4];
616
} mmval_t;
617
618
/*
619
 * While proper alignment gets specified above, this doesn't get honored by
620
 * the compiler for automatic variables. Use this helper to instantiate a
621
 * suitably aligned variable, producing a pointer to access it.
622
 */
623
#define DECLARE_ALIGNED(type, var)                                        \
624
60.1k
    long __##var[(sizeof(type) + __alignof(type)) / __alignof(long) - 1]; \
625
60.1k
    type *const var##p =                                                  \
626
60.1k
        (void *)(((long)__##var + __alignof(type) - __alignof(__##var))   \
627
60.1k
                 & -__alignof(type))
628
629
#ifdef __GCC_ASM_FLAG_OUTPUTS__
630
# define ASM_FLAG_OUT(yes, no) yes
631
#else
632
# define ASM_FLAG_OUT(yes, no) no
633
#endif
634
635
/* Floating point status word definitions. */
636
0
#define FSW_ES    (1U << 7)
637
638
/* MXCSR bit definitions. */
639
0
#define MXCSR_MM  (1U << 17)
640
641
/* Exception definitions. */
642
#define EXC_DE  0
643
0
#define EXC_DB  1
644
0
#define EXC_BP  3
645
0
#define EXC_OF  4
646
#define EXC_BR  5
647
0
#define EXC_UD  6
648
#define EXC_NM  7
649
0
#define EXC_DF  8
650
0
#define EXC_TS 10
651
0
#define EXC_NP 11
652
0
#define EXC_SS 12
653
0
#define EXC_GP 13
654
0
#define EXC_PF 14
655
#define EXC_MF 16
656
0
#define EXC_AC 17
657
0
#define EXC_XM 19
658
659
#define EXC_HAS_EC                                                      \
660
0
    ((1u << EXC_DF) | (1u << EXC_TS) | (1u << EXC_NP) |                 \
661
0
     (1u << EXC_SS) | (1u << EXC_GP) | (1u << EXC_PF) | (1u << EXC_AC))
662
663
/* Segment selector error code bits. */
664
#define ECODE_EXT (1 << 0)
665
#define ECODE_IDT (1 << 1)
666
#define ECODE_TI  (1 << 2)
667
668
/*
669
 * Instruction emulation:
670
 * Most instructions are emulated directly via a fragment of inline assembly
671
 * code. This allows us to save/restore EFLAGS and thus very easily pick up
672
 * any modified flags.
673
 */
674
675
#if defined(__x86_64__)
676
#define _LO32 "k"          /* force 32-bit operand */
677
#define _STK  "%%rsp"      /* stack pointer */
678
#define _BYTES_PER_LONG "8"
679
#elif defined(__i386__)
680
#define _LO32 ""           /* force 32-bit operand */
681
#define _STK  "%%esp"      /* stack pointer */
682
#define _BYTES_PER_LONG "4"
683
#endif
684
685
/*
686
 * These EFLAGS bits are restored from saved value during emulation, and
687
 * any changes are written back to the saved value after emulation.
688
 */
689
0
#define EFLAGS_MASK (X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
690
0
                     X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
691
692
/*
693
 * These EFLAGS bits are modifiable (by POPF and IRET), possibly subject
694
 * to further CPL and IOPL constraints.
695
 */
696
0
#define EFLAGS_MODIFIABLE (X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_RF | \
697
0
                           X86_EFLAGS_NT | X86_EFLAGS_IOPL | X86_EFLAGS_DF | \
698
0
                           X86_EFLAGS_IF | X86_EFLAGS_TF | EFLAGS_MASK)
699
700
/* Before executing instruction: restore necessary bits in EFLAGS. */
701
#define _PRE_EFLAGS(_sav, _msk, _tmp)                           \
702
/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
703
"movl %"_LO32 _sav",%"_LO32 _tmp"; "                            \
704
"push %"_tmp"; "                                                \
705
"push %"_tmp"; "                                                \
706
"movl %"_msk",%"_LO32 _tmp"; "                                  \
707
"andl %"_LO32 _tmp",("_STK"); "                                 \
708
"pushf; "                                                       \
709
"notl %"_LO32 _tmp"; "                                          \
710
"andl %"_LO32 _tmp",("_STK"); "                                 \
711
"andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); "              \
712
"pop  %"_tmp"; "                                                \
713
"orl  %"_LO32 _tmp",("_STK"); "                                 \
714
"popf; "                                                        \
715
"pop  %"_tmp"; "                                                \
716
"movl %"_LO32 _tmp",%"_LO32 _sav"; "
717
718
/* After executing instruction: write-back necessary bits in EFLAGS. */
719
#define _POST_EFLAGS(_sav, _msk, _tmp)          \
720
/* _sav |= EFLAGS & _msk; */                    \
721
"pushf; "                                       \
722
"pop  %"_tmp"; "                                \
723
"andl %"_msk",%"_LO32 _tmp"; "                  \
724
"orl  %"_LO32 _tmp",%"_LO32 _sav"; "
725
726
/* Raw emulation: instruction has two explicit operands. */
727
#define __emulate_2op_nobyte(_op,_src,_dst,_eflags, wsx,wsy,wdx,wdy,       \
728
0
                             lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy)             \
729
0
do{ unsigned long _tmp;                                                    \
730
0
    switch ( (_dst).bytes )                                                \
731
0
    {                                                                      \
732
0
    case 2:                                                                \
733
0
        asm volatile (                                                     \
734
0
            _PRE_EFLAGS("0","4","2")                                       \
735
0
            _op"w %"wsx"3,%"wdx"1; "                                       \
736
0
            _POST_EFLAGS("0","4","2")                                      \
737
0
            : "+g" (_eflags), "+" wdy ((_dst).val), "=&r" (_tmp)           \
738
0
            : wsy ((_src).val), "i" (EFLAGS_MASK) );                       \
739
0
        break;                                                             \
740
0
    case 4:                                                                \
741
0
        asm volatile (                                                     \
742
0
            _PRE_EFLAGS("0","4","2")                                       \
743
0
            _op"l %"lsx"3,%"ldx"1; "                                       \
744
0
            _POST_EFLAGS("0","4","2")                                      \
745
0
            : "+g" (_eflags), "+" ldy ((_dst).val), "=&r" (_tmp)           \
746
0
            : lsy ((_src).val), "i" (EFLAGS_MASK) );                       \
747
0
        break;                                                             \
748
0
    case 8:                                                                \
749
0
        __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy); \
750
0
        break;                                                             \
751
0
    }                                                                      \
752
0
} while (0)
753
0
#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
754
0
do{ unsigned long _tmp;                                                    \
755
0
    switch ( (_dst).bytes )                                                \
756
0
    {                                                                      \
757
0
    case 1:                                                                \
758
0
        asm volatile (                                                     \
759
0
            _PRE_EFLAGS("0","4","2")                                       \
760
0
            _op"b %"_bx"3,%1; "                                            \
761
0
            _POST_EFLAGS("0","4","2")                                      \
762
0
            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
763
0
            : _by ((_src).val), "i" (EFLAGS_MASK) );                       \
764
0
        break;                                                             \
765
0
    default:                                                               \
766
0
        __emulate_2op_nobyte(_op,_src,_dst,_eflags, _wx,_wy,"","m",        \
767
0
                             _lx,_ly,"","m", _qx,_qy,"","m");              \
768
0
        break;                                                             \
769
0
    }                                                                      \
770
0
} while (0)
771
/* Source operand is byte-sized and may be restricted to just %cl. */
772
#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                         \
773
0
    __emulate_2op(_op, _src, _dst, _eflags,                                \
774
0
                  "b", "c", "b", "c", "b", "c", "b", "c")
775
/* Source operand is byte, word, long or quad sized. */
776
#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                         \
777
0
    __emulate_2op(_op, _src, _dst, _eflags,                                \
778
0
                  "b", "q", "w", "r", _LO32, "r", "", "r")
779
/* Source operand is word, long or quad sized. */
780
#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)                  \
781
0
    __emulate_2op_nobyte(_op, _src, _dst, _eflags, "w", "r", "", "m",      \
782
0
                         _LO32, "r", "", "m", "", "r", "", "m")
783
/* Operands are word, long or quad sized and source may be in memory. */
784
#define emulate_2op_SrcV_srcmem(_op, _src, _dst, _eflags)                  \
785
0
    __emulate_2op_nobyte(_op, _src, _dst, _eflags, "", "m", "w", "r",      \
786
0
                         "", "m", _LO32, "r", "", "m", "", "r")
787
788
/* Instruction has only one explicit operand (no source operand). */
789
0
#define emulate_1op(_op,_dst,_eflags)                                      \
790
0
do{ unsigned long _tmp;                                                    \
791
0
    switch ( (_dst).bytes )                                                \
792
0
    {                                                                      \
793
0
    case 1:                                                                \
794
0
        asm volatile (                                                     \
795
0
            _PRE_EFLAGS("0","3","2")                                       \
796
0
            _op"b %1; "                                                    \
797
0
            _POST_EFLAGS("0","3","2")                                      \
798
0
            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
799
0
            : "i" (EFLAGS_MASK) );                                         \
800
0
        break;                                                             \
801
0
    case 2:                                                                \
802
0
        asm volatile (                                                     \
803
0
            _PRE_EFLAGS("0","3","2")                                       \
804
0
            _op"w %1; "                                                    \
805
0
            _POST_EFLAGS("0","3","2")                                      \
806
0
            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
807
0
            : "i" (EFLAGS_MASK) );                                         \
808
0
        break;                                                             \
809
0
    case 4:                                                                \
810
0
        asm volatile (                                                     \
811
0
            _PRE_EFLAGS("0","3","2")                                       \
812
0
            _op"l %1; "                                                    \
813
0
            _POST_EFLAGS("0","3","2")                                      \
814
0
            : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
815
0
            : "i" (EFLAGS_MASK) );                                         \
816
0
        break;                                                             \
817
0
    case 8:                                                                \
818
0
        __emulate_1op_8byte(_op, _dst, _eflags);                           \
819
0
        break;                                                             \
820
0
    }                                                                      \
821
0
} while (0)
822
823
/* Emulate an instruction with quadword operands (x86/64 only). */
824
#if defined(__x86_64__)
825
0
#define __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy) \
826
0
do{ asm volatile (                                                      \
827
0
        _PRE_EFLAGS("0","4","2")                                        \
828
0
        _op"q %"qsx"3,%"qdx"1; "                                        \
829
0
        _POST_EFLAGS("0","4","2")                                       \
830
0
        : "+g" (_eflags), "+" qdy ((_dst).val), "=&r" (_tmp)            \
831
0
        : qsy ((_src).val), "i" (EFLAGS_MASK) );                        \
832
0
} while (0)
833
0
#define __emulate_1op_8byte(_op, _dst, _eflags)                         \
834
0
do{ asm volatile (                                                      \
835
0
        _PRE_EFLAGS("0","3","2")                                        \
836
0
        _op"q %1; "                                                     \
837
0
        _POST_EFLAGS("0","3","2")                                       \
838
0
        : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)               \
839
0
        : "i" (EFLAGS_MASK) );                                          \
840
0
} while (0)
841
#elif defined(__i386__)
842
#define __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy)
843
#define __emulate_1op_8byte(_op, _dst, _eflags)
844
#endif /* __i386__ */
845
846
8.23k
#define fail_if(p)                                      \
847
8.23k
do {                                                    \
848
8.23k
    rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY;     \
849
8.23k
    if ( rc ) goto done;                                \
850
8.23k
} while (0)
851
852
static inline int mkec(uint8_t e, int32_t ec, ...)
853
0
{
854
0
    return (e < 32 && ((1u << e) & EXC_HAS_EC)) ? ec : X86_EVENT_NO_EC;
855
0
}
856
857
312k
#define generate_exception_if(p, e, ec...)                                \
858
312k
({  if ( (p) ) {                                                          \
859
0
        x86_emul_hw_exception(e, mkec(e, ##ec, 0), ctxt);                 \
860
0
        rc = X86EMUL_EXCEPTION;                                           \
861
0
        goto done;                                                        \
862
0
    }                                                                     \
863
312k
})
864
865
0
#define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
866
867
#ifdef __XEN__
868
0
# define invoke_stub(pre, post, constraints...) do {                    \
869
0
    union stub_exception_token res_ = { .raw = ~0 };                    \
870
0
    asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n"                \
871
0
                   ".Lret%=:\n\t"                                       \
872
0
                   ".pushsection .fixup,\"ax\"\n"                       \
873
0
                   ".Lfix%=:\n\t"                                       \
874
0
                   "pop %[exn]\n\t"                                     \
875
0
                   "jmp .Lret%=\n\t"                                    \
876
0
                   ".popsection\n\t"                                    \
877
0
                   _ASM_EXTABLE(.Lret%=, .Lfix%=)                       \
878
0
                   : [exn] "+g" (res_), constraints,                    \
879
0
                     [stub] "rm" (stub.func),                           \
880
0
                     "m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) );   \
881
0
    if ( unlikely(~res_.raw) )                                          \
882
0
    {                                                                   \
883
0
        gprintk(XENLOG_WARNING,                                         \
884
0
                "exception %u (ec=%04x) in emulation stub (line %u)\n", \
885
0
                res_.fields.trapnr, res_.fields.ec, __LINE__);          \
886
0
        gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  \
887
0
                stub.func);                                             \
888
0
        generate_exception_if(res_.fields.trapnr == EXC_UD, EXC_UD);    \
889
0
        domain_crash(current->domain);                                  \
890
0
        rc = X86EMUL_UNHANDLEABLE;                                      \
891
0
        goto done;                                                      \
892
0
    }                                                                   \
893
0
} while (0)
894
#else
895
# define invoke_stub(pre, post, constraints...)                         \
896
    asm volatile ( pre "\n\tcall *%[stub]\n\t" post                     \
897
                   : constraints, [stub] "rm" (stub.func),              \
898
                     "m" (*(typeof(stub.buf) *)stub.addr) )
899
#endif
900
901
0
#define emulate_stub(dst, src...) do {                                  \
902
0
    unsigned long tmp;                                                  \
903
0
    invoke_stub(_PRE_EFLAGS("[efl]", "[msk]", "[tmp]"),                 \
904
0
                _POST_EFLAGS("[efl]", "[msk]", "[tmp]"),                \
905
0
                dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs.eflags)       \
906
0
                : [msk] "i" (EFLAGS_MASK), ## src);                     \
907
0
} while (0)
908
909
/* Fetch next part of the instruction being emulated. */
910
190k
#define insn_fetch_bytes(_size)                                         \
911
190k
({ unsigned long _x = 0, _ip = state->ip;                               \
912
190k
   state->ip += (_size); /* real hardware doesn't truncate */           \
913
190k
   generate_exception_if((uint8_t)(state->ip -                          \
914
190k
                                   ctxt->regs->r(ip)) > MAX_INST_LEN,   \
915
190k
                         EXC_GP, 0);                                    \
916
190k
   rc = ops->insn_fetch(x86_seg_cs, _ip, &_x, (_size), ctxt);           \
917
190k
   if ( rc ) goto done;                                                 \
918
190k
   _x;                                                                  \
919
190k
})
920
190k
#define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type)))
921
922
64.2k
#define truncate_word(ea, byte_width)           \
923
64.2k
({  unsigned long __ea = (ea);                  \
924
64.2k
    unsigned int _width = (byte_width);         \
925
64.2k
    ((_width == sizeof(unsigned long)) ? __ea : \
926
0
     (__ea & ((1UL << (_width << 3)) - 1)));    \
927
64.2k
})
928
64.2k
#define truncate_ea(ea) truncate_word((ea), ad_bytes)
929
930
#ifdef __x86_64__
931
124k
# define mode_64bit() (ctxt->addr_size == 64)
932
#else
933
# define mode_64bit() false
934
#endif
935
936
/*
937
 * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
938
 * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
939
 */
940
static bool even_parity(uint8_t v)
941
0
{
942
0
    asm ( "test %1,%1" ASM_FLAG_OUT(, "; setp %0")
943
0
          : ASM_FLAG_OUT("=@ccp", "=qm") (v) : "q" (v) );
944
0
945
0
    return v;
946
0
}
947
948
/* Update address held in a register, based on addressing mode. */
949
0
#define _register_address_increment(reg, inc, byte_width)               \
950
0
do {                                                                    \
951
0
    int _inc = (inc); /* signed type ensures sign extension to long */  \
952
0
    unsigned int _width = (byte_width);                                 \
953
0
    if ( _width == sizeof(unsigned long) )                              \
954
0
        (reg) += _inc;                                                  \
955
0
    else if ( mode_64bit() )                                            \
956
0
        (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1);          \
957
0
    else                                                                \
958
0
        (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) |               \
959
0
                (((reg) + _inc) & ((1UL << (_width << 3)) - 1));        \
960
0
} while (0)
961
#define register_address_adjust(reg, adj)                               \
962
0
    _register_address_increment(reg,                                    \
963
0
                                _regs.eflags & X86_EFLAGS_DF ?          \
964
0
                                -(adj) : (adj),                         \
965
0
                                ad_bytes)
966
967
0
#define sp_pre_dec(dec) ({                                              \
968
0
    _register_address_increment(_regs.r(sp), -(dec), ctxt->sp_size/8);  \
969
0
    truncate_word(_regs.r(sp), ctxt->sp_size/8);                        \
970
0
})
971
0
#define sp_post_inc(inc) ({                                             \
972
0
    unsigned long sp = truncate_word(_regs.r(sp), ctxt->sp_size/8);     \
973
0
    _register_address_increment(_regs.r(sp), (inc), ctxt->sp_size/8);   \
974
0
    sp;                                                                 \
975
0
})
976
977
0
#define jmp_rel(rel)                                                    \
978
0
do {                                                                    \
979
0
    unsigned long ip = _regs.r(ip) + (int)(rel);                        \
980
0
    if ( op_bytes == 2 )                                                \
981
0
        ip = (uint16_t)ip;                                              \
982
0
    else if ( !mode_64bit() )                                           \
983
0
        ip = (uint32_t)ip;                                              \
984
0
    rc = ops->insn_fetch(x86_seg_cs, ip, NULL, 0, ctxt);                \
985
0
    if ( rc ) goto done;                                                \
986
0
    _regs.r(ip) = ip;                                                   \
987
0
    singlestep = _regs.eflags & X86_EFLAGS_TF;                          \
988
0
} while (0)
989
990
0
#define validate_far_branch(cs, ip) ({                                  \
991
0
    if ( sizeof(ip) <= 4 ) {                                            \
992
0
        ASSERT(!ctxt->lma);                                             \
993
0
        generate_exception_if((ip) > (cs)->limit, EXC_GP, 0);           \
994
0
    } else                                                              \
995
0
        generate_exception_if(ctxt->lma && (cs)->l                      \
996
0
                              ? !is_canonical_address(ip)               \
997
0
                              : (ip) > (cs)->limit, EXC_GP, 0);         \
998
0
})
999
1000
0
#define commit_far_branch(cs, newip) ({                                 \
1001
0
    validate_far_branch(cs, newip);                                     \
1002
0
    _regs.r(ip) = (newip);                                              \
1003
0
    singlestep = _regs.eflags & X86_EFLAGS_TF;                          \
1004
0
    ops->write_segment(x86_seg_cs, cs, ctxt);                           \
1005
0
})
1006
1007
struct fpu_insn_ctxt {
1008
    uint8_t insn_bytes;
1009
    uint8_t type;
1010
    int8_t exn_raised;
1011
};
1012
1013
static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
1014
0
{
1015
0
    struct fpu_insn_ctxt *fic = _fic;
1016
0
    ASSERT(regs->entry_vector < 0x20);
1017
0
    fic->exn_raised = regs->entry_vector;
1018
0
    regs->r(ip) += fic->insn_bytes;
1019
0
}
1020
1021
static int _get_fpu(
1022
    enum x86_emulate_fpu_type type,
1023
    struct fpu_insn_ctxt *fic,
1024
    struct x86_emulate_ctxt *ctxt,
1025
    const struct x86_emulate_ops *ops)
1026
0
{
1027
0
    int rc;
1028
0
1029
0
    fail_if(!ops->get_fpu);
1030
0
    ASSERT(type != X86EMUL_FPU_none);
1031
0
    rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
1032
0
1033
0
    if ( rc == X86EMUL_OKAY )
1034
0
    {
1035
0
        unsigned long cr0;
1036
0
1037
0
        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
1038
0
        fic->type = type;
1039
0
1040
0
        fail_if(!ops->read_cr);
1041
0
        if ( type >= X86EMUL_FPU_xmm )
1042
0
        {
1043
0
            unsigned long cr4;
1044
0
1045
0
            rc = ops->read_cr(4, &cr4, ctxt);
1046
0
            if ( rc != X86EMUL_OKAY )
1047
0
                return rc;
1048
0
            generate_exception_if(!(cr4 & ((type == X86EMUL_FPU_xmm)
1049
0
                                           ? X86_CR4_OSFXSR : X86_CR4_OSXSAVE)),
1050
0
                                  EXC_UD);
1051
0
        }
1052
0
1053
0
        rc = ops->read_cr(0, &cr0, ctxt);
1054
0
        if ( rc != X86EMUL_OKAY )
1055
0
            return rc;
1056
0
        if ( type >= X86EMUL_FPU_ymm )
1057
0
        {
1058
0
            /* Should be unreachable if VEX decoding is working correctly. */
1059
0
            ASSERT((cr0 & X86_CR0_PE) && !(ctxt->regs->eflags & X86_EFLAGS_VM));
1060
0
        }
1061
0
        if ( cr0 & X86_CR0_EM )
1062
0
        {
1063
0
            generate_exception_if(type == X86EMUL_FPU_fpu, EXC_NM);
1064
0
            generate_exception_if(type == X86EMUL_FPU_mmx, EXC_UD);
1065
0
            generate_exception_if(type == X86EMUL_FPU_xmm, EXC_UD);
1066
0
        }
1067
0
        generate_exception_if((cr0 & X86_CR0_TS) &&
1068
0
                              (type != X86EMUL_FPU_wait || (cr0 & X86_CR0_MP)),
1069
0
                              EXC_NM);
1070
0
    }
1071
0
1072
0
 done:
1073
0
    return rc;
1074
0
}
1075
1076
0
#define get_fpu(_type, _fic)                                    \
1077
0
do {                                                            \
1078
0
    rc = _get_fpu(_type, _fic, ctxt, ops);                      \
1079
0
    if ( rc ) goto done;                                        \
1080
0
} while (0)
1081
1082
0
#define check_fpu_exn(fic)                                      \
1083
0
do {                                                            \
1084
0
    generate_exception_if((fic)->exn_raised >= 0,               \
1085
0
                          (fic)->exn_raised);                   \
1086
0
} while (0)
1087
1088
0
#define check_xmm_exn(fic)                                      \
1089
0
do {                                                            \
1090
0
    if ( (fic)->exn_raised == EXC_XM && ops->read_cr &&         \
1091
0
         ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
1092
0
         !(cr4 & X86_CR4_OSXMMEXCPT) )                          \
1093
0
        (fic)->exn_raised = EXC_UD;                             \
1094
0
    check_fpu_exn(fic);                                         \
1095
0
} while (0)
1096
1097
static void put_fpu(
1098
    struct fpu_insn_ctxt *fic,
1099
    bool failed_late,
1100
    const struct x86_emulate_state *state,
1101
    struct x86_emulate_ctxt *ctxt,
1102
    const struct x86_emulate_ops *ops)
1103
120k
{
1104
120k
    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
1105
0
        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
1106
120k
    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
1107
0
    {
1108
0
        struct x86_emul_fpu_aux aux = {
1109
0
            .ip = ctxt->regs->r(ip),
1110
0
            .cs = ctxt->regs->cs,
1111
0
            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
1112
0
        };
1113
0
        struct segment_register sreg;
1114
0
1115
0
        if ( ops->read_segment &&
1116
0
             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
1117
0
            aux.cs = sreg.sel;
1118
0
        if ( state->ea.type == OP_MEM )
1119
0
        {
1120
0
            aux.dp = state->ea.mem.off;
1121
0
            if ( ops->read_segment &&
1122
0
                 ops->read_segment(state->ea.mem.seg, &sreg,
1123
0
                                   ctxt) == X86EMUL_OKAY )
1124
0
                aux.ds = sreg.sel;
1125
0
            else
1126
0
                switch ( state->ea.mem.seg )
1127
0
                {
1128
0
                case x86_seg_cs: aux.ds = ctxt->regs->cs; break;
1129
0
                case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
1130
0
                case x86_seg_es: aux.ds = ctxt->regs->es; break;
1131
0
                case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
1132
0
                case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
1133
0
                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
1134
0
                default:         ASSERT_UNREACHABLE();    break;
1135
0
                }
1136
0
            aux.dval = true;
1137
0
        }
1138
0
        ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
1139
0
    }
1140
120k
    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
1141
0
        ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
1142
120k
    fic->type = X86EMUL_FPU_none;
1143
120k
}
1144
1145
static inline bool fpu_check_write(void)
1146
0
{
1147
0
    uint16_t fsw;
1148
0
1149
0
    asm ( "fnstsw %0" : "=am" (fsw) );
1150
0
1151
0
    return !(fsw & FSW_ES);
1152
0
}
1153
1154
#define emulate_fpu_insn(_op)                           \
1155
    asm volatile (                                      \
1156
        "movb $2f-1f,%0 \n"                             \
1157
        "1: " _op "     \n"                             \
1158
        "2:             \n"                             \
1159
        : "=m" (fic.insn_bytes) : : "memory" )
1160
1161
#define emulate_fpu_insn_memdst(_op, _arg)              \
1162
0
    asm volatile (                                      \
1163
0
        "movb $2f-1f,%0 \n"                             \
1164
0
        "1: " _op " %1  \n"                             \
1165
0
        "2:             \n"                             \
1166
0
        : "=m" (fic.insn_bytes), "=m" (_arg)            \
1167
0
        : : "memory" )
1168
1169
#define emulate_fpu_insn_memsrc(_op, _arg)              \
1170
0
    asm volatile (                                      \
1171
0
        "movb $2f-1f,%0 \n"                             \
1172
0
        "1: " _op " %1  \n"                             \
1173
0
        "2:             \n"                             \
1174
0
        : "=m" (fic.insn_bytes)                         \
1175
0
        : "m" (_arg) : "memory" )
1176
1177
0
#define emulate_fpu_insn_stub(bytes...)                                 \
1178
0
do {                                                                    \
1179
0
    unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
1180
0
    fic.insn_bytes = nr_;                                               \
1181
0
    memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
1182
0
    invoke_stub("", "", "=m" (fic) : "m" (fic));                        \
1183
0
    put_stub(stub);                                                     \
1184
0
} while (0)
1185
1186
0
#define emulate_fpu_insn_stub_eflags(bytes...)                          \
1187
0
do {                                                                    \
1188
0
    unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
1189
0
    unsigned long tmp_;                                                 \
1190
0
    fic.insn_bytes = nr_;                                               \
1191
0
    memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
1192
0
    invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
1193
0
                _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
1194
0
                [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_),       \
1195
0
                "+m" (fic)                                              \
1196
0
                : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
1197
0
    put_stub(stub);                                                     \
1198
0
} while (0)
1199
1200
static inline unsigned long get_loop_count(
1201
    const struct cpu_user_regs *regs,
1202
    int ad_bytes)
1203
0
{
1204
0
    return (ad_bytes > 4) ? regs->r(cx)
1205
0
                          : (ad_bytes < 4) ? regs->cx : regs->ecx;
1206
0
}
1207
1208
static inline void put_loop_count(
1209
    struct cpu_user_regs *regs,
1210
    int ad_bytes,
1211
    unsigned long count)
1212
0
{
1213
0
    if ( ad_bytes == 2 )
1214
0
        regs->cx = count;
1215
0
    else
1216
0
        regs->r(cx) = ad_bytes == 4 ? (uint32_t)count : count;
1217
0
}
1218
1219
0
#define get_rep_prefix(using_si, using_di) ({                           \
1220
0
    unsigned long max_reps = 1;                                         \
1221
0
    if ( rep_prefix() )                                                 \
1222
0
        max_reps = get_loop_count(&_regs, ad_bytes);                    \
1223
0
    if ( max_reps == 0 )                                                \
1224
0
    {                                                                   \
1225
0
        /*                                                              \
1226
0
         * Skip the instruction if no repetitions are required, but     \
1227
0
         * zero extend involved registers first when using 32-bit       \
1228
0
         * addressing in 64-bit mode.                                   \
1229
0
         */                                                             \
1230
0
        if ( mode_64bit() && ad_bytes == 4 )                            \
1231
0
        {                                                               \
1232
0
            _regs.r(cx) = 0;                                            \
1233
0
            if ( using_si ) _regs.r(si) = _regs.esi;                    \
1234
0
            if ( using_di ) _regs.r(di) = _regs.edi;                    \
1235
0
        }                                                               \
1236
0
        goto complete_insn;                                             \
1237
0
    }                                                                   \
1238
0
    if ( max_reps > 1 && (_regs.eflags & X86_EFLAGS_TF) &&              \
1239
0
         !is_branch_step(ctxt, ops) )                                   \
1240
0
        max_reps = 1;                                                   \
1241
0
    max_reps;                                                           \
1242
0
})
1243
1244
static void __put_rep_prefix(
1245
    struct cpu_user_regs *int_regs,
1246
    struct cpu_user_regs *ext_regs,
1247
    int ad_bytes,
1248
    unsigned long reps_completed)
1249
0
{
1250
0
    unsigned long ecx = get_loop_count(int_regs, ad_bytes);
1251
0
1252
0
    /* Reduce counter appropriately, and repeat instruction if non-zero. */
1253
0
    ecx -= reps_completed;
1254
0
    if ( ecx != 0 )
1255
0
        int_regs->r(ip) = ext_regs->r(ip);
1256
0
1257
0
    put_loop_count(int_regs, ad_bytes, ecx);
1258
0
}
1259
1260
0
#define put_rep_prefix(reps_completed) ({                               \
1261
0
    if ( rep_prefix() )                                                 \
1262
0
    {                                                                   \
1263
0
        __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
1264
0
        if ( unlikely(rc == X86EMUL_EXCEPTION) )                        \
1265
0
            goto complete_insn;                                         \
1266
0
    }                                                                   \
1267
0
})
1268
1269
/* Clip maximum repetitions so that the index register at most just wraps. */
1270
0
#define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({                  \
1271
0
    unsigned long todo__, ea__ = truncate_ea(ea);                         \
1272
0
    if ( !(_regs.eflags & X86_EFLAGS_DF) )                                \
1273
0
        todo__ = truncate_ea(-ea__) / (bytes_per_rep);                    \
1274
0
    else if ( truncate_ea(ea__ + (bytes_per_rep) - 1) < ea__ )            \
1275
0
        todo__ = 1;                                                       \
1276
0
    else                                                                  \
1277
0
        todo__ = ea__ / (bytes_per_rep) + 1;                              \
1278
0
    if ( !todo__ )                                                        \
1279
0
        (reps) = 1;                                                       \
1280
0
    else if ( todo__ < (reps) )                                           \
1281
0
        (reps) = todo__;                                                  \
1282
0
    ea__;                                                                 \
1283
0
})
1284
1285
/* Compatibility function: read guest memory, zero-extend result to a ulong. */
1286
static int read_ulong(
1287
        enum x86_segment seg,
1288
        unsigned long offset,
1289
        unsigned long *val,
1290
        unsigned int bytes,
1291
        struct x86_emulate_ctxt *ctxt,
1292
        const struct x86_emulate_ops *ops)
1293
56.0k
{
1294
56.0k
    *val = 0;
1295
56.0k
    return ops->read(seg, offset, val, bytes, ctxt);
1296
56.0k
}
1297
1298
/*
1299
 * Unsigned multiplication with double-word result.
1300
 * IN:  Multiplicand=m[0], Multiplier=m[1]
1301
 * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
1302
 */
1303
static bool mul_dbl(unsigned long m[2])
1304
0
{
1305
0
    bool rc;
1306
0
1307
0
    asm ( "mul %1" ASM_FLAG_OUT(, "; seto %2")
1308
0
          : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) );
1309
0
1310
0
    return rc;
1311
0
}
1312
1313
/*
1314
 * Signed multiplication with double-word result.
1315
 * IN:  Multiplicand=m[0], Multiplier=m[1]
1316
 * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
1317
 */
1318
static bool imul_dbl(unsigned long m[2])
1319
0
{
1320
0
    bool rc;
1321
0
1322
0
    asm ( "imul %1" ASM_FLAG_OUT(, "; seto %2")
1323
0
          : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) );
1324
0
1325
0
    return rc;
1326
0
}
1327
1328
/*
1329
 * Unsigned division of double-word dividend.
1330
 * IN:  Dividend=u[1]:u[0], Divisor=v
1331
 * OUT: Return 1: #DE
1332
 *      Return 0: Quotient=u[0], Remainder=u[1]
1333
 */
1334
static bool div_dbl(unsigned long u[2], unsigned long v)
1335
0
{
1336
0
    if ( (v == 0) || (u[1] >= v) )
1337
0
        return 1;
1338
0
    asm ( "div"__OS" %2" : "+a" (u[0]), "+d" (u[1]) : "rm" (v) );
1339
0
    return 0;
1340
0
}
1341
1342
/*
1343
 * Signed division of double-word dividend.
1344
 * IN:  Dividend=u[1]:u[0], Divisor=v
1345
 * OUT: Return 1: #DE
1346
 *      Return 0: Quotient=u[0], Remainder=u[1]
1347
 * NB. We don't use idiv directly as it's moderately hard to work out
1348
 *     ahead of time whether it will #DE, which we cannot allow to happen.
1349
 */
1350
static bool idiv_dbl(unsigned long u[2], long v)
1351
0
{
1352
0
    bool negu = (long)u[1] < 0, negv = v < 0;
1353
0
1354
0
    /* u = abs(u) */
1355
0
    if ( negu )
1356
0
    {
1357
0
        u[1] = ~u[1];
1358
0
        if ( (u[0] = -u[0]) == 0 )
1359
0
            u[1]++;
1360
0
    }
1361
0
1362
0
    /* abs(u) / abs(v) */
1363
0
    if ( div_dbl(u, negv ? -v : v) )
1364
0
        return 1;
1365
0
1366
0
    /* Remainder has same sign as dividend. It cannot overflow. */
1367
0
    if ( negu )
1368
0
        u[1] = -u[1];
1369
0
1370
0
    /* Quotient is overflowed if sign bit is set. */
1371
0
    if ( negu ^ negv )
1372
0
    {
1373
0
        if ( (long)u[0] >= 0 )
1374
0
            u[0] = -u[0];
1375
0
        else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */
1376
0
            return 1;
1377
0
    }
1378
0
    else if ( (long)u[0] < 0 )
1379
0
        return 1;
1380
0
1381
0
    return 0;
1382
0
}
1383
1384
static bool
1385
test_cc(
1386
    unsigned int condition, unsigned int flags)
1387
0
{
1388
0
    int rc = 0;
1389
0
1390
0
    switch ( (condition & 15) >> 1 )
1391
0
    {
1392
0
    case 0: /* o */
1393
0
        rc |= (flags & X86_EFLAGS_OF);
1394
0
        break;
1395
0
    case 1: /* b/c/nae */
1396
0
        rc |= (flags & X86_EFLAGS_CF);
1397
0
        break;
1398
0
    case 2: /* z/e */
1399
0
        rc |= (flags & X86_EFLAGS_ZF);
1400
0
        break;
1401
0
    case 3: /* be/na */
1402
0
        rc |= (flags & (X86_EFLAGS_CF | X86_EFLAGS_ZF));
1403
0
        break;
1404
0
    case 4: /* s */
1405
0
        rc |= (flags & X86_EFLAGS_SF);
1406
0
        break;
1407
0
    case 5: /* p/pe */
1408
0
        rc |= (flags & X86_EFLAGS_PF);
1409
0
        break;
1410
0
    case 7: /* le/ng */
1411
0
        rc |= (flags & X86_EFLAGS_ZF);
1412
0
        /* fall through */
1413
0
    case 6: /* l/nge */
1414
0
        rc |= (!(flags & X86_EFLAGS_SF) != !(flags & X86_EFLAGS_OF));
1415
0
        break;
1416
0
    }
1417
0
1418
0
    /* Odd condition identifiers (lsb == 1) have inverted sense. */
1419
0
    return (!!rc ^ (condition & 1));
1420
0
}
1421
1422
static int
1423
get_cpl(
1424
    struct x86_emulate_ctxt *ctxt,
1425
    const struct x86_emulate_ops  *ops)
1426
0
{
1427
0
    struct segment_register reg;
1428
0
1429
0
    if ( ctxt->regs->eflags & X86_EFLAGS_VM )
1430
0
        return 3;
1431
0
1432
0
    if ( (ops->read_segment == NULL) ||
1433
0
         ops->read_segment(x86_seg_ss, &reg, ctxt) )
1434
0
        return -1;
1435
0
1436
0
    return reg.dpl;
1437
0
}
1438
1439
static int
1440
_mode_iopl(
1441
    struct x86_emulate_ctxt *ctxt,
1442
    const struct x86_emulate_ops  *ops)
1443
0
{
1444
0
    int cpl = get_cpl(ctxt, ops);
1445
0
    if ( cpl == -1 )
1446
0
        return -1;
1447
0
    return cpl <= MASK_EXTR(ctxt->regs->eflags, X86_EFLAGS_IOPL);
1448
0
}
1449
1450
0
#define mode_ring0() ({                         \
1451
0
    int _cpl = get_cpl(ctxt, ops);              \
1452
0
    fail_if(_cpl < 0);                          \
1453
0
    (_cpl == 0);                                \
1454
0
})
1455
0
#define mode_iopl() ({                          \
1456
0
    int _iopl = _mode_iopl(ctxt, ops);          \
1457
0
    fail_if(_iopl < 0);                         \
1458
0
    _iopl;                                      \
1459
0
})
1460
#define mode_vif() ({                                        \
1461
    cr4 = 0;                                                 \
1462
    if ( ops->read_cr && get_cpl(ctxt, ops) == 3 )           \
1463
    {                                                        \
1464
        rc = ops->read_cr(4, &cr4, ctxt);                    \
1465
        if ( rc != X86EMUL_OKAY ) goto done;                 \
1466
    }                                                        \
1467
    !!(cr4 & (_regs.eflags & X86_EFLAGS_VM ? X86_CR4_VME : X86_CR4_PVI)); \
1468
})
1469
1470
static int ioport_access_check(
1471
    unsigned int first_port,
1472
    unsigned int bytes,
1473
    struct x86_emulate_ctxt *ctxt,
1474
    const struct x86_emulate_ops *ops)
1475
0
{
1476
0
    unsigned long iobmp;
1477
0
    struct segment_register tr;
1478
0
    int rc = X86EMUL_OKAY;
1479
0
1480
0
    if ( !(ctxt->regs->eflags & X86_EFLAGS_VM) && mode_iopl() )
1481
0
        return X86EMUL_OKAY;
1482
0
1483
0
    fail_if(ops->read_segment == NULL);
1484
0
    /*
1485
0
     * X86EMUL_DONE coming back here may be used to defer the port
1486
0
     * permission check to the respective ioport hook.
1487
0
     */
1488
0
    if ( (rc = ops->read_segment(x86_seg_tr, &tr, ctxt)) != 0 )
1489
0
        return rc == X86EMUL_DONE ? X86EMUL_OKAY : rc;
1490
0
1491
0
    /* Ensure the TSS has an io-bitmap-offset field. */
1492
0
    generate_exception_if(tr.type != 0xb, EXC_GP, 0);
1493
0
1494
0
    switch ( rc = read_ulong(x86_seg_tr, 0x66, &iobmp, 2, ctxt, ops) )
1495
0
    {
1496
0
    case X86EMUL_OKAY:
1497
0
        break;
1498
0
1499
0
    case X86EMUL_EXCEPTION:
1500
0
        generate_exception_if(!ctxt->event_pending, EXC_GP, 0);
1501
0
        /* fallthrough */
1502
0
1503
0
    default:
1504
0
        return rc;
1505
0
    }
1506
0
1507
0
    /* Read two bytes including byte containing first port. */
1508
0
    switch ( rc = read_ulong(x86_seg_tr, iobmp + first_port / 8,
1509
0
                             &iobmp, 2, ctxt, ops) )
1510
0
    {
1511
0
    case X86EMUL_OKAY:
1512
0
        break;
1513
0
1514
0
    case X86EMUL_EXCEPTION:
1515
0
        generate_exception_if(!ctxt->event_pending, EXC_GP, 0);
1516
0
        /* fallthrough */
1517
0
1518
0
    default:
1519
0
        return rc;
1520
0
    }
1521
0
1522
0
    generate_exception_if(iobmp & (((1 << bytes) - 1) << (first_port & 7)),
1523
0
                          EXC_GP, 0);
1524
0
1525
0
 done:
1526
0
    return rc;
1527
0
}
1528
1529
static bool
1530
in_realmode(
1531
    struct x86_emulate_ctxt *ctxt,
1532
    const struct x86_emulate_ops  *ops)
1533
0
{
1534
0
    unsigned long cr0;
1535
0
    int rc;
1536
0
1537
0
    if ( ops->read_cr == NULL )
1538
0
        return 0;
1539
0
1540
0
    rc = ops->read_cr(0, &cr0, ctxt);
1541
0
    return (!rc && !(cr0 & X86_CR0_PE));
1542
0
}
1543
1544
static bool
1545
in_protmode(
1546
    struct x86_emulate_ctxt *ctxt,
1547
    const struct x86_emulate_ops  *ops)
1548
0
{
1549
0
    return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & X86_EFLAGS_VM));
1550
0
}
1551
1552
0
#define EAX 0
1553
0
#define ECX 1
1554
0
#define EDX 2
1555
0
#define EBX 3
1556
1557
static bool vcpu_has(
1558
    unsigned int eax,
1559
    unsigned int reg,
1560
    unsigned int bit,
1561
    struct x86_emulate_ctxt *ctxt,
1562
    const struct x86_emulate_ops *ops)
1563
0
{
1564
0
    struct cpuid_leaf res;
1565
0
    int rc = X86EMUL_OKAY;
1566
0
1567
0
    fail_if(!ops->cpuid);
1568
0
    rc = ops->cpuid(eax, 0, &res, ctxt);
1569
0
    if ( rc == X86EMUL_OKAY )
1570
0
    {
1571
0
        switch ( reg )
1572
0
        {
1573
0
        case EAX: reg = res.a; break;
1574
0
        case EBX: reg = res.b; break;
1575
0
        case ECX: reg = res.c; break;
1576
0
        case EDX: reg = res.d; break;
1577
0
        default: BUG();
1578
0
        }
1579
0
        if ( !(reg & (1U << bit)) )
1580
0
            rc = ~X86EMUL_OKAY;
1581
0
    }
1582
0
1583
0
 done:
1584
0
    return rc == X86EMUL_OKAY;
1585
0
}
1586
1587
#define vcpu_has_fpu()         vcpu_has(         1, EDX,  0, ctxt, ops)
1588
#define vcpu_has_sep()         vcpu_has(         1, EDX, 11, ctxt, ops)
1589
#define vcpu_has_cx8()         vcpu_has(         1, EDX,  8, ctxt, ops)
1590
#define vcpu_has_cmov()        vcpu_has(         1, EDX, 15, ctxt, ops)
1591
0
#define vcpu_has_clflush()     vcpu_has(         1, EDX, 19, ctxt, ops)
1592
#define vcpu_has_mmx()         vcpu_has(         1, EDX, 23, ctxt, ops)
1593
#define vcpu_has_sse()         vcpu_has(         1, EDX, 25, ctxt, ops)
1594
#define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
1595
#define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
1596
#define vcpu_has_pclmulqdq()   vcpu_has(         1, ECX,  1, ctxt, ops)
1597
#define vcpu_has_ssse3()       vcpu_has(         1, ECX,  9, ctxt, ops)
1598
#define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
1599
#define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
1600
#define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
1601
#define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
1602
#define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
1603
#define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)
1604
#define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
1605
#define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
1606
#define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
1607
                               vcpu_has_sse())
1608
#define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
1609
0
#define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
1610
0
#define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
1611
#define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
1612
0
#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
1613
#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
1614
0
#define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
1615
#define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
1616
#define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
1617
#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
1618
0
#define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
1619
0
#define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
1620
#define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
1621
#define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
1622
#define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
1623
#define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
1624
#define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
1625
#define vcpu_has_sha()         vcpu_has(         7, EBX, 29, ctxt, ops)
1626
#define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
1627
#define vcpu_has_clzero()      vcpu_has(0x80000008, EBX,  0, ctxt, ops)
1628
1629
#define vcpu_must_have(feat) \
1630
0
    generate_exception_if(!vcpu_has_##feat(), EXC_UD)
1631
1632
#ifdef __XEN__
1633
/*
1634
 * Note the difference between vcpu_must_have(<feature>) and
1635
 * host_and_vcpu_must_have(<feature>): The latter needs to be used when
1636
 * emulation code is using the same instruction class for carrying out
1637
 * the actual operation.
1638
 */
1639
0
#define host_and_vcpu_must_have(feat) ({ \
1640
0
    generate_exception_if(!cpu_has_##feat, EXC_UD); \
1641
0
    vcpu_must_have(feat); \
1642
0
})
1643
#else
1644
/*
1645
 * For the test harness both are fine to be used interchangeably, i.e.
1646
 * features known to always be available (e.g. SSE/SSE2) to (64-bit) Xen
1647
 * may be checked for by just vcpu_must_have().
1648
 */
1649
#define host_and_vcpu_must_have(feat) vcpu_must_have(feat)
1650
#endif
1651
1652
static int
1653
realmode_load_seg(
1654
    enum x86_segment seg,
1655
    uint16_t sel,
1656
    struct segment_register *sreg,
1657
    struct x86_emulate_ctxt *ctxt,
1658
    const struct x86_emulate_ops *ops)
1659
0
{
1660
0
    int rc;
1661
0
1662
0
    if ( !ops->read_segment )
1663
0
        return X86EMUL_UNHANDLEABLE;
1664
0
1665
0
    if ( (rc = ops->read_segment(seg, sreg, ctxt)) == X86EMUL_OKAY )
1666
0
    {
1667
0
        sreg->sel  = sel;
1668
0
        sreg->base = (uint32_t)sel << 4;
1669
0
    }
1670
0
1671
0
    return rc;
1672
0
}
1673
1674
/*
1675
 * Passing in x86_seg_none means
1676
 * - suppress any exceptions other than #PF,
1677
 * - don't commit any state.
1678
 */
1679
static int
1680
protmode_load_seg(
1681
    enum x86_segment seg,
1682
    uint16_t sel, bool is_ret,
1683
    struct segment_register *sreg,
1684
    struct x86_emulate_ctxt *ctxt,
1685
    const struct x86_emulate_ops *ops)
1686
0
{
1687
0
    enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr;
1688
0
    struct { uint32_t a, b; } desc, desc_hi = {};
1689
0
    uint8_t dpl, rpl;
1690
0
    int cpl = get_cpl(ctxt, ops);
1691
0
    uint32_t a_flag = 0x100;
1692
0
    int rc, fault_type = EXC_GP;
1693
0
1694
0
    if ( cpl < 0 )
1695
0
        return X86EMUL_UNHANDLEABLE;
1696
0
1697
0
    /* NULL selector? */
1698
0
    if ( (sel & 0xfffc) == 0 )
1699
0
    {
1700
0
        switch ( seg )
1701
0
        {
1702
0
        case x86_seg_ss:
1703
0
            if ( mode_64bit() && (cpl != 3) && (cpl == sel) )
1704
0
        default:
1705
0
                break;
1706
0
            /* fall through */
1707
0
        case x86_seg_cs:
1708
0
        case x86_seg_tr:
1709
0
            goto raise_exn;
1710
0
        }
1711
0
        if ( ctxt->vendor != X86_VENDOR_AMD || !ops->read_segment ||
1712
0
             ops->read_segment(seg, sreg, ctxt) != X86EMUL_OKAY )
1713
0
            memset(sreg, 0, sizeof(*sreg));
1714
0
        else
1715
0
            sreg->attr = 0;
1716
0
        sreg->sel = sel;
1717
0
1718
0
        /* Since CPL == SS.DPL, we need to put back DPL. */
1719
0
        if ( seg == x86_seg_ss )
1720
0
            sreg->dpl = sel;
1721
0
1722
0
        return X86EMUL_OKAY;
1723
0
    }
1724
0
1725
0
    /* System segment descriptors must reside in the GDT. */
1726
0
    if ( is_x86_system_segment(seg) && (sel & 4) )
1727
0
        goto raise_exn;
1728
0
1729
0
    switch ( rc = ops->read(sel_seg, sel & 0xfff8, &desc, sizeof(desc), ctxt) )
1730
0
    {
1731
0
    case X86EMUL_OKAY:
1732
0
        break;
1733
0
1734
0
    case X86EMUL_EXCEPTION:
1735
0
        if ( !ctxt->event_pending )
1736
0
            goto raise_exn;
1737
0
        /* fallthrough */
1738
0
1739
0
    default:
1740
0
        return rc;
1741
0
    }
1742
0
1743
0
    /* System segments must have S flag == 0. */
1744
0
    if ( is_x86_system_segment(seg) && (desc.b & (1u << 12)) )
1745
0
        goto raise_exn;
1746
0
    /* User segments must have S flag == 1. */
1747
0
    if ( is_x86_user_segment(seg) && !(desc.b & (1u << 12)) )
1748
0
        goto raise_exn;
1749
0
1750
0
    dpl = (desc.b >> 13) & 3;
1751
0
    rpl = sel & 3;
1752
0
1753
0
    switch ( seg )
1754
0
    {
1755
0
    case x86_seg_cs:
1756
0
        /* Code segment? */
1757
0
        if ( !(desc.b & (1u<<11)) )
1758
0
            goto raise_exn;
1759
0
        if ( is_ret
1760
0
             ? /*
1761
0
                * Really rpl < cpl, but our sole caller doesn't handle
1762
0
                * privilege level changes.
1763
0
                */
1764
0
               rpl != cpl || (desc.b & (1 << 10) ? dpl > rpl : dpl != rpl)
1765
0
             : desc.b & (1 << 10)
1766
0
               /* Conforming segment: check DPL against CPL. */
1767
0
               ? dpl > cpl
1768
0
               /* Non-conforming segment: check RPL and DPL against CPL. */
1769
0
               : rpl > cpl || dpl != cpl )
1770
0
            goto raise_exn;
1771
0
        /*
1772
0
         * 64-bit code segments (L bit set) must have D bit clear.
1773
0
         * Experimentally in long mode, the L and D bits are checked before
1774
0
         * the Present bit.
1775
0
         */
1776
0
        if ( ctxt->lma && (desc.b & (1 << 21)) && (desc.b & (1 << 22)) )
1777
0
            goto raise_exn;
1778
0
        sel = (sel ^ rpl) | cpl;
1779
0
        break;
1780
0
    case x86_seg_ss:
1781
0
        /* Writable data segment? */
1782
0
        if ( (desc.b & (5u<<9)) != (1u<<9) )
1783
0
            goto raise_exn;
1784
0
        if ( (dpl != cpl) || (dpl != rpl) )
1785
0
            goto raise_exn;
1786
0
        break;
1787
0
    case x86_seg_ldtr:
1788
0
        /* LDT system segment? */
1789
0
        if ( (desc.b & (15u<<8)) != (2u<<8) )
1790
0
            goto raise_exn;
1791
0
        a_flag = 0;
1792
0
        break;
1793
0
    case x86_seg_tr:
1794
0
        /* Available TSS system segment? */
1795
0
        if ( (desc.b & (15u<<8)) != (9u<<8) )
1796
0
            goto raise_exn;
1797
0
        a_flag = 0x200; /* busy flag */
1798
0
        break;
1799
0
    default:
1800
0
        /* Readable code or data segment? */
1801
0
        if ( (desc.b & (5u<<9)) == (4u<<9) )
1802
0
            goto raise_exn;
1803
0
        /* Non-conforming segment: check DPL against RPL and CPL. */
1804
0
        if ( ((desc.b & (6u<<9)) != (6u<<9)) &&
1805
0
             ((dpl < cpl) || (dpl < rpl)) )
1806
0
            goto raise_exn;
1807
0
        break;
1808
0
    case x86_seg_none:
1809
0
        /* Non-conforming segment: check DPL against RPL and CPL. */
1810
0
        if ( ((desc.b & (0x1c << 8)) != (0x1c << 8)) &&
1811
0
             ((dpl < cpl) || (dpl < rpl)) )
1812
0
            return X86EMUL_EXCEPTION;
1813
0
        a_flag = 0;
1814
0
        break;
1815
0
    }
1816
0
1817
0
    /* Segment present in memory? */
1818
0
    if ( !(desc.b & (1 << 15)) && seg != x86_seg_none )
1819
0
    {
1820
0
        fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
1821
0
        goto raise_exn;
1822
0
    }
1823
0
1824
0
    if ( !is_x86_user_segment(seg) )
1825
0
    {
1826
0
        /*
1827
0
         * Whether to use an 8- or 16-byte descriptor in long mode depends
1828
0
         * on sub-mode, descriptor type, and vendor:
1829
0
         * - non-system descriptors are always 8-byte ones,
1830
0
         * - system descriptors are always 16-byte ones in 64-bit mode,
1831
0
         * - (call) gates are always 16-byte ones,
1832
0
         * - other system descriptors in compatibility mode have
1833
0
         *   - only their low 8-byte bytes read on Intel,
1834
0
         *   - all 16 bytes read with the high 8 bytes ignored on AMD.
1835
0
         */
1836
0
        bool wide = desc.b & 0x1000
1837
0
                    ? false : (desc.b & 0xf00) != 0xc00 &&
1838
0
                               ctxt->vendor != X86_VENDOR_AMD
1839
0
                               ? mode_64bit() : ctxt->lma;
1840
0
1841
0
        if ( wide )
1842
0
        {
1843
0
            switch ( rc = ops->read(sel_seg, (sel & 0xfff8) + 8,
1844
0
                                    &desc_hi, sizeof(desc_hi), ctxt) )
1845
0
            {
1846
0
            case X86EMUL_OKAY:
1847
0
                break;
1848
0
1849
0
            case X86EMUL_EXCEPTION:
1850
0
                if ( !ctxt->event_pending )
1851
0
                    goto raise_exn;
1852
0
                /* fall through */
1853
0
            default:
1854
0
                return rc;
1855
0
            }
1856
0
            if ( !mode_64bit() && ctxt->vendor == X86_VENDOR_AMD &&
1857
0
                 (desc.b & 0xf00) != 0xc00 )
1858
0
                desc_hi.b = desc_hi.a = 0;
1859
0
            if ( (desc_hi.b & 0x00001f00) ||
1860
0
                 (seg != x86_seg_none &&
1861
0
                  !is_canonical_address((uint64_t)desc_hi.a << 32)) )
1862
0
                goto raise_exn;
1863
0
        }
1864
0
    }
1865
0
1866
0
    /* Ensure Accessed flag is set. */
1867
0
    if ( a_flag && !(desc.b & a_flag) )
1868
0
    {
1869
0
        uint32_t new_desc_b = desc.b | a_flag;
1870
0
1871
0
        fail_if(!ops->cmpxchg);
1872
0
        switch ( (rc = ops->cmpxchg(sel_seg, (sel & 0xfff8) + 4, &desc.b,
1873
0
                                    &new_desc_b, sizeof(desc.b), ctxt)) )
1874
0
        {
1875
0
        case X86EMUL_OKAY:
1876
0
            break;
1877
0
1878
0
        case X86EMUL_EXCEPTION:
1879
0
            if ( !ctxt->event_pending )
1880
0
                goto raise_exn;
1881
0
            /* fallthrough */
1882
0
1883
0
        default:
1884
0
            return rc;
1885
0
        }
1886
0
1887
0
        /* Force the Accessed flag in our local copy. */
1888
0
        desc.b = new_desc_b;
1889
0
    }
1890
0
1891
0
    sreg->base = (((uint64_t)desc_hi.a << 32) |
1892
0
                  ((desc.b <<  0) & 0xff000000u) |
1893
0
                  ((desc.b << 16) & 0x00ff0000u) |
1894
0
                  ((desc.a >> 16) & 0x0000ffffu));
1895
0
    sreg->attr = (((desc.b >>  8) & 0x00ffu) |
1896
0
                  ((desc.b >> 12) & 0x0f00u));
1897
0
    sreg->limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
1898
0
    if ( sreg->g )
1899
0
        sreg->limit = (sreg->limit << 12) | 0xfffu;
1900
0
    sreg->sel = sel;
1901
0
    return X86EMUL_OKAY;
1902
0
1903
0
 raise_exn:
1904
0
    generate_exception_if(seg != x86_seg_none, fault_type, sel & 0xfffc);
1905
0
    rc = X86EMUL_EXCEPTION;
1906
0
 done:
1907
0
    return rc;
1908
0
}
1909
1910
static int
1911
load_seg(
1912
    enum x86_segment seg,
1913
    uint16_t sel, bool is_ret,
1914
    struct segment_register *sreg,
1915
    struct x86_emulate_ctxt *ctxt,
1916
    const struct x86_emulate_ops *ops)
1917
0
{
1918
0
    struct segment_register reg;
1919
0
    int rc;
1920
0
1921
0
    if ( !ops->write_segment )
1922
0
        return X86EMUL_UNHANDLEABLE;
1923
0
1924
0
    if ( !sreg )
1925
0
        sreg = &reg;
1926
0
1927
0
    if ( in_protmode(ctxt, ops) )
1928
0
        rc = protmode_load_seg(seg, sel, is_ret, sreg, ctxt, ops);
1929
0
    else
1930
0
        rc = realmode_load_seg(seg, sel, sreg, ctxt, ops);
1931
0
1932
0
    if ( !rc && sreg == &reg )
1933
0
        rc = ops->write_segment(seg, sreg, ctxt);
1934
0
1935
0
    return rc;
1936
0
}
1937
1938
void *
1939
decode_register(
1940
    uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
1941
184k
{
1942
184k
    void *p;
1943
184k
1944
184k
    switch ( modrm_reg )
1945
184k
    {
1946
64.5k
    case  0: p = &regs->r(ax); break;
1947
58.3k
    case  1: p = &regs->r(cx); break;
1948
55.8k
    case  2: p = &regs->r(dx); break;
1949
392
    case  3: p = &regs->r(bx); break;
1950
0
    case  4: p = (highbyte_regs ? &regs->ah : (void *)&regs->r(sp)); break;
1951
0
    case  5: p = (highbyte_regs ? &regs->ch : (void *)&regs->r(bp)); break;
1952
47
    case  6: p = (highbyte_regs ? &regs->dh : (void *)&regs->r(si)); break;
1953
11
    case  7: p = (highbyte_regs ? &regs->bh : (void *)&regs->r(di)); break;
1954
64.5k
#if defined(__x86_64__)
1955
0
    case  8: p = &regs->r8;  break;
1956
0
    case  9: p = &regs->r9;  break;
1957
1
    case 10: p = &regs->r10; break;
1958
0
    case 11: p = &regs->r11; break;
1959
675
    case 12: mark_regs_dirty(regs); p = &regs->r12; break;
1960
559
    case 13: mark_regs_dirty(regs); p = &regs->r13; break;
1961
751
    case 14: mark_regs_dirty(regs); p = &regs->r14; break;
1962
3.40k
    case 15: mark_regs_dirty(regs); p = &regs->r15; break;
1963
64.5k
#endif
1964
0
    default: BUG(); p = NULL; break;
1965
184k
    }
1966
184k
1967
184k
    return p;
1968
184k
}
1969
1970
static void *decode_vex_gpr(unsigned int vex_reg, struct cpu_user_regs *regs,
1971
                            const struct x86_emulate_ctxt *ctxt)
1972
0
{
1973
0
    return decode_register(~vex_reg & (mode_64bit() ? 0xf : 7), regs, 0);
1974
0
}
1975
1976
static bool is_aligned(enum x86_segment seg, unsigned long offs,
1977
                       unsigned int size, struct x86_emulate_ctxt *ctxt,
1978
                       const struct x86_emulate_ops *ops)
1979
0
{
1980
0
    struct segment_register reg;
1981
0
1982
0
    /* Expecting powers of two only. */
1983
0
    ASSERT(!(size & (size - 1)));
1984
0
1985
0
    if ( mode_64bit() && seg < x86_seg_fs )
1986
0
        memset(&reg, 0, sizeof(reg));
1987
0
    else
1988
0
    {
1989
0
        /* No alignment checking when we have no way to read segment data. */
1990
0
        if ( !ops->read_segment )
1991
0
            return true;
1992
0
1993
0
        if ( ops->read_segment(seg, &reg, ctxt) != X86EMUL_OKAY )
1994
0
            return false;
1995
0
    }
1996
0
1997
0
    return !((reg.base + offs) & (size - 1));
1998
0
}
1999
2000
static bool is_branch_step(struct x86_emulate_ctxt *ctxt,
2001
                           const struct x86_emulate_ops *ops)
2002
0
{
2003
0
    uint64_t debugctl;
2004
0
2005
0
    return ops->read_msr &&
2006
0
           ops->read_msr(MSR_IA32_DEBUGCTLMSR, &debugctl, ctxt) == X86EMUL_OKAY &&
2007
0
           (debugctl & IA32_DEBUGCTLMSR_BTF);
2008
0
}
2009
2010
static bool umip_active(struct x86_emulate_ctxt *ctxt,
2011
                        const struct x86_emulate_ops *ops)
2012
0
{
2013
0
    unsigned long cr4;
2014
0
2015
0
    /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
2016
0
    return get_cpl(ctxt, ops) > 0 &&
2017
0
           ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
2018
0
           (cr4 & X86_CR4_UMIP);
2019
0
}
2020
2021
static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
2022
                       const struct x86_emulate_ops *ops, enum vex_pfx pfx)
2023
0
{
2024
0
    uint64_t bndcfg;
2025
0
    int rc;
2026
0
2027
0
    if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() )
2028
0
        return;
2029
0
2030
0
    if ( !mode_ring0() )
2031
0
        bndcfg = read_bndcfgu();
2032
0
    else if ( !ops->read_msr ||
2033
0
              ops->read_msr(MSR_IA32_BNDCFGS, &bndcfg, ctxt) != X86EMUL_OKAY )
2034
0
        return;
2035
0
    if ( (bndcfg & IA32_BNDCFGS_ENABLE) && !(bndcfg & IA32_BNDCFGS_PRESERVE) )
2036
0
    {
2037
0
        /*
2038
0
         * Using BNDMK or any other MPX instruction here is pointless, as
2039
0
         * we run with MPX disabled ourselves, and hence they're all no-ops.
2040
0
         * Therefore we have two ways to clear BNDn: Enable MPX temporarily
2041
0
         * (in which case executing any suitable non-prefixed branch
2042
0
         * instruction would do), or use XRSTOR.
2043
0
         */
2044
0
        xstate_set_init(XSTATE_BNDREGS);
2045
0
    }
2046
0
 done:;
2047
0
}
2048
2049
int x86emul_unhandleable_rw(
2050
    enum x86_segment seg,
2051
    unsigned long offset,
2052
    void *p_data,
2053
    unsigned int bytes,
2054
    struct x86_emulate_ctxt *ctxt)
2055
0
{
2056
0
    return X86EMUL_UNHANDLEABLE;
2057
0
}
2058
2059
/* Helper definitions. */
2060
412k
#define op_bytes (state->op_bytes)
2061
120k
#define ad_bytes (state->ad_bytes)
2062
186k
#define ext (state->ext)
2063
240k
#define modrm (state->modrm)
2064
238k
#define modrm_mod (state->modrm_mod)
2065
123k
#define modrm_reg (state->modrm_reg)
2066
184k
#define modrm_rm (state->modrm_rm)
2067
247k
#define rex_prefix (state->rex_prefix)
2068
4.11k
#define lock_prefix (state->lock_prefix)
2069
2.19k
#define vex (state->vex)
2070
0
#define evex (state->evex)
2071
719k
#define ea (state->ea)
2072
2073
static int
2074
x86_decode_onebyte(
2075
    struct x86_emulate_state *state,
2076
    struct x86_emulate_ctxt *ctxt,
2077
    const struct x86_emulate_ops *ops)
2078
54.2k
{
2079
54.2k
    int rc = X86EMUL_OKAY;
2080
54.2k
2081
54.2k
    switch ( ctxt->opcode )
2082
54.2k
    {
2083
0
    case 0x06: /* push %%es */
2084
0
    case 0x07: /* pop %%es */
2085
0
    case 0x0e: /* push %%cs */
2086
0
    case 0x16: /* push %%ss */
2087
0
    case 0x17: /* pop %%ss */
2088
0
    case 0x1e: /* push %%ds */
2089
0
    case 0x1f: /* pop %%ds */
2090
0
    case 0x27: /* daa */
2091
0
    case 0x2f: /* das */
2092
0
    case 0x37: /* aaa */
2093
0
    case 0x3f: /* aas */
2094
0
    case 0x60: /* pusha */
2095
0
    case 0x61: /* popa */
2096
0
    case 0x62: /* bound */
2097
0
    case 0x82: /* Grp1 (x86/32 only) */
2098
0
    case 0xc4: /* les */
2099
0
    case 0xc5: /* lds */
2100
0
    case 0xce: /* into */
2101
0
    case 0xd4: /* aam */
2102
0
    case 0xd5: /* aad */
2103
0
    case 0xd6: /* salc */
2104
0
        state->not_64bit = true;
2105
0
        break;
2106
0
2107
0
    case 0x90: /* nop / pause */
2108
0
        if ( repe_prefix() )
2109
0
            ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
2110
0
        break;
2111
0
2112
0
    case 0x9a: /* call (far, absolute) */
2113
0
    case 0xea: /* jmp (far, absolute) */
2114
0
        generate_exception_if(mode_64bit(), EXC_UD);
2115
0
2116
0
        imm1 = insn_fetch_bytes(op_bytes);
2117
0
        imm2 = insn_fetch_type(uint16_t);
2118
0
        break;
2119
0
2120
0
    case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
2121
0
    case 0xa2: case 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
2122
0
        /* Source EA is not encoded via ModRM. */
2123
0
        ea.type = OP_MEM;
2124
0
        ea.mem.off = insn_fetch_bytes(ad_bytes);
2125
0
        break;
2126
0
2127
0
    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
2128
0
        if ( op_bytes == 8 ) /* Fetch more bytes to obtain imm64. */
2129
0
            imm1 = ((uint32_t)imm1 |
2130
0
                    ((uint64_t)insn_fetch_type(uint32_t) << 32));
2131
0
        break;
2132
0
2133
0
    case 0xc8: /* enter imm16,imm8 */
2134
0
        imm2 = insn_fetch_type(uint8_t);
2135
0
        break;
2136
0
2137
0
    case 0xff: /* Grp5 */
2138
0
        switch ( modrm_reg & 7 )
2139
0
        {
2140
0
        case 2: /* call (near) */
2141
0
        case 4: /* jmp (near) */
2142
0
        case 6: /* push */
2143
0
            if ( mode_64bit() && op_bytes == 4 )
2144
0
                op_bytes = 8;
2145
0
            /* fall through */
2146
0
        case 3: /* call (far, absolute indirect) */
2147
0
        case 5: /* jmp (far, absolute indirect) */
2148
0
            state->desc = DstNone | SrcMem | Mov;
2149
0
            break;
2150
0
        }
2151
0
        break;
2152
54.2k
    }
2153
54.2k
2154
54.2k
 done:
2155
54.2k
    return rc;
2156
54.2k
}
2157
2158
static int
2159
x86_decode_twobyte(
2160
    struct x86_emulate_state *state,
2161
    struct x86_emulate_ctxt *ctxt,
2162
    const struct x86_emulate_ops *ops)
2163
5.92k
{
2164
5.92k
    int rc = X86EMUL_OKAY;
2165
5.92k
2166
5.92k
    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
2167
5.92k
    {
2168
0
    case 0x00: /* Grp6 */
2169
0
        switch ( modrm_reg & 6 )
2170
0
        {
2171
0
        case 0:
2172
0
            state->desc |= DstMem | SrcImplicit | Mov;
2173
0
            break;
2174
0
        case 2: case 4:
2175
0
            state->desc |= SrcMem16;
2176
0
            break;
2177
0
        }
2178
0
        break;
2179
0
2180
0
    case 0x78:
2181
0
        switch ( vex.pfx )
2182
0
        {
2183
0
        case vex_66: /* extrq $imm8, $imm8, xmm */
2184
0
        case vex_f2: /* insertq $imm8, $imm8, xmm, xmm */
2185
0
            imm1 = insn_fetch_type(uint8_t);
2186
0
            imm2 = insn_fetch_type(uint8_t);
2187
0
            break;
2188
0
        }
2189
0
        /* fall through */
2190
0
    case 0x10 ... 0x18:
2191
0
    case 0x28 ... 0x2f:
2192
0
    case 0x50 ... 0x77:
2193
0
    case 0x79 ... 0x7d:
2194
0
    case 0x7f:
2195
0
    case 0xc2 ... 0xc3:
2196
0
    case 0xc5 ... 0xc6:
2197
0
    case 0xd0 ... 0xef:
2198
0
    case 0xf1 ... 0xfe:
2199
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2200
0
        break;
2201
0
2202
0
    case 0x20: case 0x22: /* mov to/from cr */
2203
0
        if ( lock_prefix && vcpu_has_cr8_legacy() )
2204
0
        {
2205
0
            modrm_reg += 8;
2206
0
            lock_prefix = false;
2207
0
        }
2208
0
        /* fall through */
2209
0
    case 0x21: case 0x23: /* mov to/from dr */
2210
0
        ASSERT(ea.type == OP_REG); /* Early operand adjustment ensures this. */
2211
0
        generate_exception_if(lock_prefix, EXC_UD);
2212
0
        op_bytes = mode_64bit() ? 8 : 4;
2213
0
        break;
2214
0
2215
0
    case 0x7e:
2216
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2217
0
        if ( vex.pfx == vex_f3 ) /* movq xmm/m64,xmm */
2218
0
        {
2219
0
    case X86EMUL_OPC_VEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
2220
0
            state->desc = DstImplicit | SrcMem | TwoOp;
2221
0
            state->simd_size = simd_other;
2222
0
            /* Avoid the state->desc clobbering of TwoOp below. */
2223
0
            return X86EMUL_OKAY;
2224
0
        }
2225
0
        break;
2226
0
2227
0
    case 0xae:
2228
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2229
0
        /* fall through */
2230
0
    case X86EMUL_OPC_VEX(0, 0xae):
2231
0
        switch ( modrm_reg & 7 )
2232
0
        {
2233
0
        case 2: /* {,v}ldmxcsr */
2234
0
            state->desc = DstImplicit | SrcMem | Mov;
2235
0
            op_bytes = 4;
2236
0
            break;
2237
0
2238
0
        case 3: /* {,v}stmxcsr */
2239
0
            state->desc = DstMem | SrcImplicit | Mov;
2240
0
            op_bytes = 4;
2241
0
            break;
2242
0
        }
2243
0
        break;
2244
0
2245
0
    case 0xb8: /* jmpe / popcnt */
2246
0
        if ( rep_prefix() )
2247
0
            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2248
0
        break;
2249
0
2250
0
        /* Intentionally not handling here despite being modified by F3:
2251
0
    case 0xbc: bsf / tzcnt
2252
0
    case 0xbd: bsr / lzcnt
2253
0
         * They're being dealt with in the execution phase (if at all).
2254
0
         */
2255
0
2256
0
    case 0xc4: /* pinsrw */
2257
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2258
0
        /* fall through */
2259
0
    case X86EMUL_OPC_VEX_66(0, 0xc4): /* vpinsrw */
2260
0
        state->desc = DstReg | SrcMem16;
2261
0
        break;
2262
0
2263
0
    case 0xf0:
2264
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2265
0
        if ( vex.pfx == vex_f2 ) /* lddqu mem,xmm */
2266
0
        {
2267
0
        /* fall through */
2268
0
    case X86EMUL_OPC_VEX_F2(0, 0xf0): /* vlddqu mem,{x,y}mm */
2269
0
            state->desc = DstImplicit | SrcMem | TwoOp;
2270
0
            state->simd_size = simd_other;
2271
0
            /* Avoid the state->desc clobbering of TwoOp below. */
2272
0
            return X86EMUL_OKAY;
2273
0
        }
2274
0
        break;
2275
5.92k
    }
2276
5.92k
2277
5.92k
    /*
2278
5.92k
     * Scalar forms of most VEX-encoded TwoOp instructions have
2279
5.92k
     * three operands.  Those which do really have two operands
2280
5.92k
     * should have exited earlier.
2281
5.92k
     */
2282
5.92k
    if ( state->simd_size && vex.opcx &&
2283
0
         (vex.pfx & VEX_PREFIX_SCALAR_MASK) )
2284
0
        state->desc &= ~TwoOp;
2285
5.92k
2286
5.92k
 done:
2287
5.92k
    return rc;
2288
5.92k
}
2289
2290
static int
2291
x86_decode_0f38(
2292
    struct x86_emulate_state *state,
2293
    struct x86_emulate_ctxt *ctxt,
2294
    const struct x86_emulate_ops *ops)
2295
0
{
2296
0
    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
2297
0
    {
2298
0
    case 0x00 ... 0xef:
2299
0
    case 0xf2 ... 0xf5:
2300
0
    case 0xf7 ... 0xff:
2301
0
        op_bytes = 0;
2302
0
        /* fall through */
2303
0
    case 0xf6: /* adcx / adox */
2304
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2305
0
        break;
2306
0
2307
0
    case 0xf0: /* movbe / crc32 */
2308
0
        state->desc |= repne_prefix() ? ByteOp : Mov;
2309
0
        if ( rep_prefix() )
2310
0
            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2311
0
        break;
2312
0
2313
0
    case 0xf1: /* movbe / crc32 */
2314
0
        if ( repne_prefix() )
2315
0
            state->desc = DstReg | SrcMem;
2316
0
        if ( rep_prefix() )
2317
0
            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2318
0
        break;
2319
0
2320
0
    case X86EMUL_OPC_VEX(0, 0xf2):    /* andn */
2321
0
    case X86EMUL_OPC_VEX(0, 0xf3):    /* Grp 17 */
2322
0
    case X86EMUL_OPC_VEX(0, 0xf5):    /* bzhi */
2323
0
    case X86EMUL_OPC_VEX_F3(0, 0xf5): /* pext */
2324
0
    case X86EMUL_OPC_VEX_F2(0, 0xf5): /* pdep */
2325
0
    case X86EMUL_OPC_VEX_F2(0, 0xf6): /* mulx */
2326
0
    case X86EMUL_OPC_VEX(0, 0xf7):    /* bextr */
2327
0
    case X86EMUL_OPC_VEX_66(0, 0xf7): /* shlx */
2328
0
    case X86EMUL_OPC_VEX_F3(0, 0xf7): /* sarx */
2329
0
    case X86EMUL_OPC_VEX_F2(0, 0xf7): /* shrx */
2330
0
        break;
2331
0
2332
0
    default:
2333
0
        op_bytes = 0;
2334
0
        break;
2335
0
    }
2336
0
2337
0
    return X86EMUL_OKAY;
2338
0
}
2339
2340
static int
2341
x86_decode_0f3a(
2342
    struct x86_emulate_state *state,
2343
    struct x86_emulate_ctxt *ctxt,
2344
    const struct x86_emulate_ops *ops)
2345
0
{
2346
0
    if ( !vex.opcx )
2347
0
        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2348
0
2349
0
    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
2350
0
    {
2351
0
    case X86EMUL_OPC_66(0, 0x14)
2352
0
     ... X86EMUL_OPC_66(0, 0x17):     /* pextr*, extractps */
2353
0
    case X86EMUL_OPC_VEX_66(0, 0x14)
2354
0
     ... X86EMUL_OPC_VEX_66(0, 0x17): /* vpextr*, vextractps */
2355
0
    case X86EMUL_OPC_VEX_F2(0, 0xf0): /* rorx */
2356
0
        break;
2357
0
2358
0
    case X86EMUL_OPC_66(0, 0x20):     /* pinsrb */
2359
0
    case X86EMUL_OPC_VEX_66(0, 0x20): /* vpinsrb */
2360
0
        state->desc = DstImplicit | SrcMem;
2361
0
        if ( modrm_mod != 3 )
2362
0
            state->desc |= ByteOp;
2363
0
        break;
2364
0
2365
0
    case X86EMUL_OPC_66(0, 0x22):     /* pinsr{d,q} */
2366
0
    case X86EMUL_OPC_VEX_66(0, 0x22): /* vpinsr{d,q} */
2367
0
        state->desc = DstImplicit | SrcMem;
2368
0
        break;
2369
0
2370
0
    default:
2371
0
        op_bytes = 0;
2372
0
        break;
2373
0
    }
2374
0
2375
0
    return X86EMUL_OKAY;
2376
0
}
2377
2378
static int
2379
x86_decode(
2380
    struct x86_emulate_state *state,
2381
    struct x86_emulate_ctxt *ctxt,
2382
    const struct x86_emulate_ops  *ops)
2383
60.1k
{
2384
60.1k
    uint8_t b, d, sib, sib_index, sib_base;
2385
60.1k
    unsigned int def_op_bytes, def_ad_bytes, opcode;
2386
60.1k
    enum x86_segment override_seg = x86_seg_none;
2387
60.1k
    bool pc_rel = false;
2388
60.1k
    int rc = X86EMUL_OKAY;
2389
60.1k
2390
60.1k
    ASSERT(ops->insn_fetch);
2391
60.1k
2392
60.1k
    memset(state, 0, sizeof(*state));
2393
60.1k
    ea.type = OP_NONE;
2394
60.1k
    ea.mem.seg = x86_seg_ds;
2395
60.1k
    ea.reg = PTR_POISON;
2396
60.1k
    state->regs = ctxt->regs;
2397
60.1k
    state->ip = ctxt->regs->r(ip);
2398
60.1k
2399
60.1k
    /* Initialise output state in x86_emulate_ctxt */
2400
60.1k
    ctxt->retire.raw = 0;
2401
60.1k
    x86_emul_reset_event(ctxt);
2402
60.1k
2403
60.1k
    op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8;
2404
60.1k
    if ( op_bytes == 8 )
2405
60.1k
    {
2406
60.1k
        op_bytes = def_op_bytes = 4;
2407
60.1k
#ifndef __x86_64__
2408
        return X86EMUL_UNHANDLEABLE;
2409
#endif
2410
60.1k
    }
2411
60.1k
2412
60.1k
    /* Prefix bytes. */
2413
60.1k
    for ( ; ; )
2414
65.3k
    {
2415
130k
        switch ( b = insn_fetch_type(uint8_t) )
2416
130k
        {
2417
1.09k
        case 0x66: /* operand-size override */
2418
1.09k
            op_bytes = def_op_bytes ^ 6;
2419
1.09k
            if ( !vex.pfx )
2420
1.09k
                vex.pfx = vex_66;
2421
1.09k
            break;
2422
0
        case 0x67: /* address-size override */
2423
0
            ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
2424
0
            break;
2425
0
        case 0x2e: /* CS override */
2426
0
            override_seg = x86_seg_cs;
2427
0
            break;
2428
0
        case 0x3e: /* DS override */
2429
0
            override_seg = x86_seg_ds;
2430
0
            break;
2431
0
        case 0x26: /* ES override */
2432
0
            override_seg = x86_seg_es;
2433
0
            break;
2434
0
        case 0x64: /* FS override */
2435
0
            override_seg = x86_seg_fs;
2436
0
            break;
2437
0
        case 0x65: /* GS override */
2438
0
            override_seg = x86_seg_gs;
2439
0
            break;
2440
0
        case 0x36: /* SS override */
2441
0
            override_seg = x86_seg_ss;
2442
0
            break;
2443
0
        case 0xf0: /* LOCK */
2444
0
            lock_prefix = 1;
2445
0
            break;
2446
0
        case 0xf2: /* REPNE/REPNZ */
2447
0
            vex.pfx = vex_f2;
2448
0
            break;
2449
0
        case 0xf3: /* REP/REPE/REPZ */
2450
0
            vex.pfx = vex_f3;
2451
0
            break;
2452
4.08k
        case 0x40 ... 0x4f: /* REX */
2453
4.08k
            if ( !mode_64bit() )
2454
0
                goto done_prefixes;
2455
4.08k
            rex_prefix = b;
2456
4.08k
            continue;
2457
60.1k
        default:
2458
60.1k
            goto done_prefixes;
2459
130k
        }
2460
130k
2461
130k
        /* Any legacy prefix after a REX prefix nullifies its effect. */
2462
1.09k
        rex_prefix = 0;
2463
1.09k
    }
2464
60.1k
 done_prefixes:
2465
60.1k
2466
60.1k
    if ( rex_prefix & REX_W )
2467
0
        op_bytes = 8;
2468
60.1k
2469
60.1k
    /* Opcode byte(s). */
2470
60.1k
    d = opcode_table[b];
2471
60.1k
    if ( d == 0 && b == 0x0f )
2472
5.92k
    {
2473
5.92k
        /* Two-byte opcode. */
2474
11.8k
        b = insn_fetch_type(uint8_t);
2475
11.8k
        d = twobyte_table[b].desc;
2476
11.8k
        switch ( b )
2477
11.8k
        {
2478
5.92k
        default:
2479
5.92k
            opcode = b | MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
2480
5.92k
            ext = ext_0f;
2481
5.92k
            state->simd_size = twobyte_table[b].size;
2482
5.92k
            break;
2483
0
        case 0x38:
2484
0
            b = insn_fetch_type(uint8_t);
2485
0
            opcode = b | MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
2486
0
            ext = ext_0f38;
2487
0
            break;
2488
0
        case 0x3a:
2489
0
            b = insn_fetch_type(uint8_t);
2490
0
            opcode = b | MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
2491
0
            ext = ext_0f3a;
2492
0
            break;
2493
11.8k
        }
2494
11.8k
    }
2495
60.1k
    else
2496
54.2k
        opcode = b;
2497
60.1k
2498
60.1k
    /* ModRM and SIB bytes. */
2499
60.1k
    if ( d & ModRM )
2500
60.1k
    {
2501
120k
        modrm = insn_fetch_type(uint8_t);
2502
60.1k
        modrm_mod = (modrm & 0xc0) >> 6;
2503
120k
2504
60.1k
        if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18)) ||
2505
54.2k
                      b == 0x62) )
2506
0
            switch ( def_ad_bytes )
2507
0
            {
2508
0
            default:
2509
0
                BUG(); /* Shouldn't be possible. */
2510
0
            case 2:
2511
0
                if ( state->regs->eflags & X86_EFLAGS_VM )
2512
0
                    break;
2513
0
                /* fall through */
2514
0
            case 4:
2515
0
                if ( modrm_mod != 3 || in_realmode(ctxt, ops) )
2516
0
                    break;
2517
0
                /* fall through */
2518
0
            case 8:
2519
0
                /* VEX / XOP / EVEX */
2520
0
                generate_exception_if(rex_prefix || vex.pfx, EXC_UD);
2521
0
                /*
2522
0
                 * With operand size override disallowed (see above), op_bytes
2523
0
                 * should not have changed from its default.
2524
0
                 */
2525
0
                ASSERT(op_bytes == def_op_bytes);
2526
0
2527
0
                vex.raw[0] = modrm;
2528
0
                if ( b == 0xc5 )
2529
0
                {
2530
0
                    opcode = X86EMUL_OPC_VEX_;
2531
0
                    vex.raw[1] = modrm;
2532
0
                    vex.opcx = vex_0f;
2533
0
                    vex.x = 1;
2534
0
                    vex.b = 1;
2535
0
                    vex.w = 0;
2536
0
                }
2537
0
                else
2538
0
                {
2539
0
                    vex.raw[1] = insn_fetch_type(uint8_t);
2540
0
                    if ( mode_64bit() )
2541
0
                    {
2542
0
                        if ( !vex.b )
2543
0
                            rex_prefix |= REX_B;
2544
0
                        if ( !vex.x )
2545
0
                            rex_prefix |= REX_X;
2546
0
                        if ( vex.w )
2547
0
                        {
2548
0
                            rex_prefix |= REX_W;
2549
0
                            op_bytes = 8;
2550
0
                        }
2551
0
                    }
2552
0
                    else
2553
0
                    {
2554
0
                        /* Operand size fixed at 4 (no override via W bit). */
2555
0
                        op_bytes = 4;
2556
0
                        vex.b = 1;
2557
0
                    }
2558
0
                    switch ( b )
2559
0
                    {
2560
0
                    case 0x62:
2561
0
                        opcode = X86EMUL_OPC_EVEX_;
2562
0
                        evex.raw[0] = vex.raw[0];
2563
0
                        evex.raw[1] = vex.raw[1];
2564
0
                        evex.raw[2] = insn_fetch_type(uint8_t);
2565
0
2566
0
                        generate_exception_if(evex.mbs || !evex.mbz, EXC_UD);
2567
0
2568
0
                        if ( !mode_64bit() )
2569
0
                        {
2570
0
                            generate_exception_if(!evex.RX, EXC_UD);
2571
0
                            evex.R = 1;
2572
0
                        }
2573
0
2574
0
                        vex.opcx = evex.opcx;
2575
0
                        break;
2576
0
                    case 0xc4:
2577
0
                        opcode = X86EMUL_OPC_VEX_;
2578
0
                        break;
2579
0
                    default:
2580
0
                        opcode = 0;
2581
0
                        break;
2582
0
                    }
2583
0
                }
2584
0
                if ( !vex.r )
2585
0
                    rex_prefix |= REX_R;
2586
0
2587
0
                ext = vex.opcx;
2588
0
                if ( b != 0x8f )
2589
0
                {
2590
0
                    b = insn_fetch_type(uint8_t);
2591
0
                    switch ( ext )
2592
0
                    {
2593
0
                    case vex_0f:
2594
0
                        opcode |= MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
2595
0
                        d = twobyte_table[b].desc;
2596
0
                        state->simd_size = twobyte_table[b].size;
2597
0
                        break;
2598
0
                    case vex_0f38:
2599
0
                        opcode |= MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
2600
0
                        d = twobyte_table[0x38].desc;
2601
0
                        break;
2602
0
                    case vex_0f3a:
2603
0
                        opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
2604
0
                        d = twobyte_table[0x3a].desc;
2605
0
                        break;
2606
0
                    default:
2607
0
                        rc = X86EMUL_UNRECOGNIZED;
2608
0
                        goto done;
2609
0
                    }
2610
0
                }
2611
0
                else if ( ext < ext_8f08 +
2612
0
                                sizeof(xop_table) / sizeof(*xop_table) )
2613
0
                {
2614
0
                    b = insn_fetch_type(uint8_t);
2615
0
                    opcode |= MASK_INSR(0x8f08 + ext - ext_8f08,
2616
0
                                        X86EMUL_OPC_EXT_MASK);
2617
0
                    d = xop_table[ext - ext_8f08];
2618
0
                }
2619
0
                else
2620
0
                {
2621
0
                    rc = X86EMUL_UNRECOGNIZED;
2622
0
                    goto done;
2623
0
                }
2624
0
2625
0
                opcode |= b | MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
2626
0
2627
0
                if ( !(d & ModRM) )
2628
0
                    break;
2629
0
2630
0
                modrm = insn_fetch_type(uint8_t);
2631
0
                modrm_mod = (modrm & 0xc0) >> 6;
2632
0
2633
0
                break;
2634
0
            }
2635
120k
    }
2636
60.1k
2637
60.1k
    if ( d & ModRM )
2638
60.1k
    {
2639
60.1k
        d &= ~ModRM;
2640
60.1k
#undef ModRM /* Only its aliases are valid to use from here on. */
2641
60.1k
        modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
2642
60.1k
        modrm_rm  = modrm & 0x07;
2643
60.1k
2644
60.1k
        /*
2645
60.1k
         * Early operand adjustments. Only ones affecting further processing
2646
60.1k
         * prior to the x86_decode_*() calls really belong here. That would
2647
60.1k
         * normally be only addition/removal of SrcImm/SrcImm16, so their
2648
60.1k
         * fetching can be taken care of by the common code below.
2649
60.1k
         */
2650
60.1k
        switch ( ext )
2651
60.1k
        {
2652
54.2k
        case ext_none:
2653
54.2k
            switch ( b )
2654
54.2k
            {
2655
0
            case 0xf6 ... 0xf7: /* Grp3 */
2656
0
                switch ( modrm_reg & 7 )
2657
0
                {
2658
0
                case 0 ... 1: /* test */
2659
0
                    d |= DstMem | SrcImm;
2660
0
                    break;
2661
0
                case 2: /* not */
2662
0
                case 3: /* neg */
2663
0
                    d |= DstMem;
2664
0
                    break;
2665
0
                case 4: /* mul */
2666
0
                case 5: /* imul */
2667
0
                case 6: /* div */
2668
0
                case 7: /* idiv */
2669
0
                    /*
2670
0
                     * DstEax isn't really precise for all cases; updates to
2671
0
                     * rDX get handled in an open coded manner.
2672
0
                     */
2673
0
                    d |= DstEax | SrcMem;
2674
0
                    break;
2675
0
                }
2676
0
                break;
2677
54.2k
            }
2678
54.2k
            break;
2679
54.2k
2680
5.92k
        case ext_0f:
2681
5.92k
            switch ( b )
2682
5.92k
            {
2683
0
            case 0x20: /* mov cr,reg */
2684
0
            case 0x21: /* mov dr,reg */
2685
0
            case 0x22: /* mov reg,cr */
2686
0
            case 0x23: /* mov reg,dr */
2687
0
                /*
2688
0
                 * Mov to/from cr/dr ignore the encoding of Mod, and behave as
2689
0
                 * if they were encoded as reg/reg instructions.  No futher
2690
0
                 * disp/SIB bytes are fetched.
2691
0
                 */
2692
0
                modrm_mod = 3;
2693
0
                break;
2694
5.92k
            }
2695
5.92k
            break;
2696
5.92k
2697
0
        case vex_0f38:
2698
0
            d = ext0f38_table[b].to_mem ? DstMem | SrcReg
2699
0
                                        : DstReg | SrcMem;
2700
0
            if ( ext0f38_table[b].two_op )
2701
0
                d |= TwoOp;
2702
0
            if ( ext0f38_table[b].vsib )
2703
0
                d |= vSIB;
2704
0
            state->simd_size = ext0f38_table[b].simd_size;
2705
0
            break;
2706
5.92k
2707
0
        case vex_0f3a:
2708
0
            /*
2709
0
             * Cannot update d here yet, as the immediate operand still
2710
0
             * needs fetching.
2711
0
             */
2712
0
        default:
2713
0
            break;
2714
60.1k
        }
2715
60.1k
2716
60.1k
        if ( modrm_mod == 3 )
2717
0
        {
2718
0
            modrm_rm |= (rex_prefix & 1) << 3;
2719
0
            ea.type = OP_REG;
2720
0
        }
2721
60.1k
        else if ( ad_bytes == 2 )
2722
0
        {
2723
0
            /* 16-bit ModR/M decode. */
2724
0
            generate_exception_if(d & vSIB, EXC_UD);
2725
0
            ea.type = OP_MEM;
2726
0
            switch ( modrm_rm )
2727
0
            {
2728
0
            case 0:
2729
0
                ea.mem.off = state->regs->bx + state->regs->si;
2730
0
                break;
2731
0
            case 1:
2732
0
                ea.mem.off = state->regs->bx + state->regs->di;
2733
0
                break;
2734
0
            case 2:
2735
0
                ea.mem.seg = x86_seg_ss;
2736
0
                ea.mem.off = state->regs->bp + state->regs->si;
2737
0
                break;
2738
0
            case 3:
2739
0
                ea.mem.seg = x86_seg_ss;
2740
0
                ea.mem.off = state->regs->bp + state->regs->di;
2741
0
                break;
2742
0
            case 4:
2743
0
                ea.mem.off = state->regs->si;
2744
0
                break;
2745
0
            case 5:
2746
0
                ea.mem.off = state->regs->di;
2747
0
                break;
2748
0
            case 6:
2749
0
                if ( modrm_mod == 0 )
2750
0
                    break;
2751
0
                ea.mem.seg = x86_seg_ss;
2752
0
                ea.mem.off = state->regs->bp;
2753
0
                break;
2754
0
            case 7:
2755
0
                ea.mem.off = state->regs->bx;
2756
0
                break;
2757
0
            }
2758
0
            switch ( modrm_mod )
2759
0
            {
2760
0
            case 0:
2761
0
                if ( modrm_rm == 6 )
2762
0
                    ea.mem.off = insn_fetch_type(int16_t);
2763
0
                break;
2764
0
            case 1:
2765
0
                ea.mem.off += insn_fetch_type(int8_t);
2766
0
                break;
2767
0
            case 2:
2768
0
                ea.mem.off += insn_fetch_type(int16_t);
2769
0
                break;
2770
0
            }
2771
0
        }
2772
60.1k
        else
2773
60.1k
        {
2774
60.1k
            /* 32/64-bit ModR/M decode. */
2775
60.1k
            ea.type = OP_MEM;
2776
60.1k
            if ( modrm_rm == 4 )
2777
58.3k
            {
2778
116k
                sib = insn_fetch_type(uint8_t);
2779
58.3k
                sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
2780
58.3k
                sib_base  = (sib & 7) | ((rex_prefix << 3) & 8);
2781
58.3k
                if ( sib_index != 4 && !(d & vSIB) )
2782
58.3k
                    ea.mem.off = *(long *)decode_register(sib_index,
2783
58.3k
                                                          state->regs, 0);
2784
58.3k
                ea.mem.off <<= (sib >> 6) & 3;
2785
58.3k
                if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
2786
0
                    ea.mem.off += insn_fetch_type(int32_t);
2787
58.3k
                else if ( sib_base == 4 )
2788
0
                {
2789
0
                    ea.mem.seg  = x86_seg_ss;
2790
0
                    ea.mem.off += state->regs->r(sp);
2791
0
                    if ( !ext && (b == 0x8f) )
2792
0
                        /* POP <rm> computes its EA post increment. */
2793
0
                        ea.mem.off += ((mode_64bit() && (op_bytes == 4))
2794
0
                                       ? 8 : op_bytes);
2795
0
                }
2796
58.3k
                else if ( sib_base == 5 )
2797
0
                {
2798
0
                    ea.mem.seg  = x86_seg_ss;
2799
0
                    ea.mem.off += state->regs->r(bp);
2800
0
                }
2801
58.3k
                else
2802
58.3k
                    ea.mem.off += *(long *)decode_register(sib_base,
2803
58.3k
                                                           state->regs, 0);
2804
116k
            }
2805
60.1k
            else
2806
1.77k
            {
2807
1.77k
                generate_exception_if(d & vSIB, EXC_UD);
2808
1.77k
                modrm_rm |= (rex_prefix & 1) << 3;
2809
1.77k
                ea.mem.off = *(long *)decode_register(modrm_rm,
2810
1.77k
                                                      state->regs, 0);
2811
1.77k
                if ( (modrm_rm == 5) && (modrm_mod != 0) )
2812
0
                    ea.mem.seg = x86_seg_ss;
2813
1.77k
            }
2814
60.1k
            switch ( modrm_mod )
2815
60.1k
            {
2816
59.2k
            case 0:
2817
59.2k
                if ( (modrm_rm & 7) != 5 )
2818
59.2k
                    break;
2819
0
                ea.mem.off = insn_fetch_type(int32_t);
2820
0
                pc_rel = mode_64bit();
2821
0
                break;
2822
896
            case 1:
2823
896
                ea.mem.off += insn_fetch_type(int8_t);
2824
896
                break;
2825
0
            case 2:
2826
0
                ea.mem.off += insn_fetch_type(int32_t);
2827
0
                break;
2828
60.1k
            }
2829
60.1k
        }
2830
60.1k
    }
2831
60.1k
    else
2832
0
    {
2833
0
        modrm_mod = 0xff;
2834
0
        modrm_reg = modrm_rm = modrm = 0;
2835
0
    }
2836
60.1k
2837
60.1k
    if ( override_seg != x86_seg_none )
2838
0
        ea.mem.seg = override_seg;
2839
60.1k
2840
60.1k
    /* Fetch the immediate operand, if present. */
2841
60.1k
    switch ( d & SrcMask )
2842
60.1k
    {
2843
0
        unsigned int bytes;
2844
0
2845
6
    case SrcImm:
2846
6
        if ( !(d & ByteOp) )
2847
6
            bytes = op_bytes != 8 ? op_bytes : 4;
2848
6
        else
2849
0
        {
2850
0
    case SrcImmByte:
2851
0
            bytes = 1;
2852
0
        }
2853
6
        /* NB. Immediates are sign-extended as necessary. */
2854
6
        switch ( bytes )
2855
6
        {
2856
0
        case 1: imm1 = insn_fetch_type(int8_t);  break;
2857
0
        case 2: imm1 = insn_fetch_type(int16_t); break;
2858
12
        case 4: imm1 = insn_fetch_type(int32_t); break;
2859
6
        }
2860
6
        break;
2861
0
    case SrcImm16:
2862
0
        imm1 = insn_fetch_type(uint16_t);
2863
0
        break;
2864
60.1k
    }
2865
60.1k
2866
60.1k
    ctxt->opcode = opcode;
2867
60.1k
    state->desc = d;
2868
60.1k
2869
60.1k
    switch ( ext )
2870
60.1k
    {
2871
54.2k
    case ext_none:
2872
54.2k
        rc = x86_decode_onebyte(state, ctxt, ops);
2873
54.2k
        break;
2874
54.2k
2875
5.92k
    case ext_0f:
2876
5.92k
        rc = x86_decode_twobyte(state, ctxt, ops);
2877
5.92k
        break;
2878
54.2k
2879
0
    case ext_0f38:
2880
0
        rc = x86_decode_0f38(state, ctxt, ops);
2881
0
        break;
2882
54.2k
2883
0
    case ext_0f3a:
2884
0
        d = ext0f3a_table[b].to_mem ? DstMem | SrcReg : DstReg | SrcMem;
2885
0
        if ( ext0f3a_table[b].two_op )
2886
0
            d |= TwoOp;
2887
0
        else if ( ext0f3a_table[b].four_op && !mode_64bit() && vex.opcx )
2888
0
            imm1 &= 0x7f;
2889
0
        state->desc = d;
2890
0
        state->simd_size = ext0f3a_table[b].simd_size;
2891
0
        rc = x86_decode_0f3a(state, ctxt, ops);
2892
0
        break;
2893
54.2k
2894
0
    case ext_8f08:
2895
0
    case ext_8f09:
2896
0
    case ext_8f0a:
2897
0
        break;
2898
0
2899
0
    default:
2900
0
        ASSERT_UNREACHABLE();
2901
0
        return X86EMUL_UNIMPLEMENTED;
2902
60.1k
    }
2903
60.1k
2904
60.1k
    if ( ea.type == OP_MEM )
2905
60.1k
    {
2906
60.1k
        if ( pc_rel )
2907
0
            ea.mem.off += state->ip;
2908
60.1k
2909
60.1k
        ea.mem.off = truncate_ea(ea.mem.off);
2910
60.1k
    }
2911
60.1k
2912
60.1k
    /*
2913
60.1k
     * Simple op_bytes calculations. More complicated cases produce 0
2914
60.1k
     * and are further handled during execute.
2915
60.1k
     */
2916
60.1k
    switch ( state->simd_size )
2917
60.1k
    {
2918
60.1k
    case simd_none:
2919
60.1k
        /*
2920
60.1k
         * When prefix 66 has a meaning different from operand-size override,
2921
60.1k
         * operand size defaults to 4 and can't be overridden to 2.
2922
60.1k
         */
2923
60.1k
        if ( op_bytes == 2 &&
2924
1.09k
             (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
2925
0
            op_bytes = 4;
2926
60.1k
        break;
2927
60.1k
2928
0
    case simd_packed_int:
2929
0
        switch ( vex.pfx )
2930
0
        {
2931
0
        case vex_none: op_bytes = 8;           break;
2932
0
        case vex_66:   op_bytes = 16 << vex.l; break;
2933
0
        default:       op_bytes = 0;           break;
2934
0
        }
2935
0
        break;
2936
0
2937
0
    case simd_single_fp:
2938
0
        if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
2939
0
        {
2940
0
            op_bytes = 0;
2941
0
            break;
2942
0
    case simd_packed_fp:
2943
0
            if ( vex.pfx & VEX_PREFIX_SCALAR_MASK )
2944
0
            {
2945
0
                op_bytes = 0;
2946
0
                break;
2947
0
            }
2948
0
        }
2949
0
        /* fall through */
2950
0
    case simd_any_fp:
2951
0
        switch ( vex.pfx )
2952
0
        {
2953
0
        default:     op_bytes = 16 << vex.l; break;
2954
0
        case vex_f3: op_bytes = 4;           break;
2955
0
        case vex_f2: op_bytes = 8;           break;
2956
0
        }
2957
0
        break;
2958
0
2959
0
    case simd_scalar_fp:
2960
0
        op_bytes = 4 << (ctxt->opcode & 1);
2961
0
        break;
2962
0
2963
0
    case simd_128:
2964
0
        op_bytes = 16;
2965
0
        break;
2966
0
2967
0
    default:
2968
0
        op_bytes = 0;
2969
0
        break;
2970
60.1k
    }
2971
60.1k
2972
60.1k
 done:
2973
60.1k
    return rc;
2974
60.1k
}
2975
2976
/* No insn fetching past this point. */
2977
#undef insn_fetch_bytes
2978
#undef insn_fetch_type
2979
2980
/* Undo DEBUG wrapper. */
2981
#undef x86_emulate
2982
2983
int
2984
x86_emulate(
2985
    struct x86_emulate_ctxt *ctxt,
2986
    const struct x86_emulate_ops *ops)
2987
60.1k
{
2988
60.1k
    /* Shadow copy of register state. Committed on successful emulation. */
2989
60.1k
    struct cpu_user_regs _regs = *ctxt->regs;
2990
60.1k
    struct x86_emulate_state state;
2991
60.1k
    int rc;
2992
60.1k
    uint8_t b, d, *opc = NULL;
2993
60.1k
    unsigned int first_byte = 0;
2994
60.1k
    bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
2995
0
      !is_branch_step(ctxt, ops);
2996
60.1k
    bool sfence = false;
2997
60.1k
    struct operand src = { .reg = PTR_POISON };
2998
60.1k
    struct operand dst = { .reg = PTR_POISON };
2999
60.1k
    unsigned long cr4;
3000
60.1k
    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };
3001
60.1k
    struct x86_emulate_stub stub = {};
3002
60.1k
    DECLARE_ALIGNED(mmval_t, mmval);
3003
60.1k
3004
60.1k
    ASSERT(ops->read);
3005
60.1k
3006
60.1k
    rc = x86_decode(&state, ctxt, ops);
3007
60.1k
    if ( rc != X86EMUL_OKAY )
3008
0
        return rc;
3009
60.1k
3010
60.1k
    /* Sync rIP to post decode value. */
3011
60.1k
    _regs.r(ip) = state.ip;
3012
60.1k
3013
60.1k
    if ( ops->validate )
3014
60.1k
    {
3015
60.1k
#ifndef NDEBUG
3016
60.1k
        state.caller = __builtin_return_address(0);
3017
60.1k
#endif
3018
60.1k
        rc = ops->validate(&state, ctxt);
3019
60.1k
#ifndef NDEBUG
3020
60.1k
        state.caller = NULL;
3021
60.1k
#endif
3022
60.1k
        if ( rc == X86EMUL_DONE )
3023
0
            goto complete_insn;
3024
60.1k
        if ( rc != X86EMUL_OKAY )
3025
0
            return rc;
3026
60.1k
    }
3027
60.1k
3028
60.1k
    b = ctxt->opcode;
3029
60.1k
    d = state.desc;
3030
606k
#define state (&state)
3031
60.1k
3032
60.1k
    generate_exception_if(state->not_64bit && mode_64bit(), EXC_UD);
3033
60.1k
3034
60.1k
    if ( ea.type == OP_REG )
3035
0
        ea.reg = decode_register(modrm_rm, &_regs,
3036
0
                                 (d & ByteOp) && !rex_prefix);
3037
60.1k
3038
60.1k
    memset(mmvalp, 0xaa /* arbitrary */, sizeof(*mmvalp));
3039
60.1k
3040
60.1k
    /* Decode and fetch the source operand: register, memory or immediate. */
3041
60.1k
    switch ( d & SrcMask )
3042
60.1k
    {
3043
0
    case SrcNone: /* case SrcImplicit: */
3044
0
        src.type = OP_NONE;
3045
0
        break;
3046
4.11k
    case SrcReg:
3047
4.11k
        src.type = OP_REG;
3048
4.11k
        if ( d & ByteOp )
3049
607
        {
3050
607
            src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
3051
607
            src.val = *(uint8_t *)src.reg;
3052
607
            src.bytes = 1;
3053
607
        }
3054
4.11k
        else
3055
3.50k
        {
3056
3.50k
            src.reg = decode_register(modrm_reg, &_regs, 0);
3057
3.50k
            switch ( (src.bytes = op_bytes) )
3058
3.50k
            {
3059
1.09k
            case 2: src.val = *(uint16_t *)src.reg; break;
3060
2.41k
            case 4: src.val = *(uint32_t *)src.reg; break;
3061
0
            case 8: src.val = *(uint64_t *)src.reg; break;
3062
3.50k
            }
3063
3.50k
        }
3064
4.11k
        break;
3065
2.87k
    case SrcMem16:
3066
2.87k
        ea.bytes = 2;
3067
2.87k
        goto srcmem_common;
3068
53.1k
    case SrcMem:
3069
53.1k
        if ( state->simd_size )
3070
0
            break;
3071
53.1k
        ea.bytes = (d & ByteOp) ? 1 : op_bytes;
3072
56.0k
    srcmem_common:
3073
56.0k
        src = ea;
3074
56.0k
        if ( src.type == OP_REG )
3075
0
        {
3076
0
            switch ( src.bytes )
3077
0
            {
3078
0
            case 1: src.val = *(uint8_t  *)src.reg; break;
3079
0
            case 2: src.val = *(uint16_t *)src.reg; break;
3080
0
            case 4: src.val = *(uint32_t *)src.reg; break;
3081
0
            case 8: src.val = *(uint64_t *)src.reg; break;
3082
0
            }
3083
0
        }
3084
56.0k
        else if ( (rc = read_ulong(src.mem.seg, src.mem.off,
3085
56.0k
                                   &src.val, src.bytes, ctxt, ops)) )
3086
0
            goto done;
3087
56.0k
        break;
3088
6
    case SrcImm:
3089
6
        if ( !(d & ByteOp) )
3090
6
            src.bytes = op_bytes != 8 ? op_bytes : 4;
3091
6
        else
3092
0
        {
3093
0
    case SrcImmByte:
3094
0
            src.bytes = 1;
3095
0
        }
3096
6
        src.type  = OP_IMM;
3097
6
        src.val   = imm1;
3098
6
        break;
3099
0
    case SrcImm16:
3100
0
        src.type  = OP_IMM;
3101
0
        src.bytes = 2;
3102
0
        src.val   = imm1;
3103
0
        break;
3104
60.1k
    }
3105
60.1k
3106
60.1k
    /* Decode and fetch the destination operand: register or memory. */
3107
60.1k
    switch ( d & DstMask )
3108
60.1k
    {
3109
0
    case DstNone: /* case DstImplicit: */
3110
0
        /*
3111
0
         * The only implicit-operands instructions allowed a LOCK prefix are
3112
0
         * CMPXCHG{8,16}B (MOV CRn is being handled elsewhere).
3113
0
         */
3114
0
        generate_exception_if(lock_prefix &&
3115
0
                              (vex.opcx || ext != ext_0f || b != 0xc7 ||
3116
0
                               (modrm_reg & 7) != 1 || ea.type != OP_MEM),
3117
0
                              EXC_UD);
3118
0
        dst.type = OP_NONE;
3119
0
        break;
3120
0
3121
56.0k
    case DstReg:
3122
56.0k
        generate_exception_if(lock_prefix, EXC_UD);
3123
56.0k
        dst.type = OP_REG;
3124
56.0k
        if ( d & ByteOp )
3125
3.05k
        {
3126
3.05k
            dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
3127
3.05k
            dst.val = *(uint8_t *)dst.reg;
3128
3.05k
            dst.bytes = 1;
3129
3.05k
        }
3130
56.0k
        else
3131
52.9k
        {
3132
52.9k
            dst.reg = decode_register(modrm_reg, &_regs, 0);
3133
52.9k
            switch ( (dst.bytes = op_bytes) )
3134
52.9k
            {
3135
0
            case 2: dst.val = *(uint16_t *)dst.reg; break;
3136
52.9k
            case 4: dst.val = *(uint32_t *)dst.reg; break;
3137
0
            case 8: dst.val = *(uint64_t *)dst.reg; break;
3138
52.9k
            }
3139
52.9k
        }
3140
56.0k
        break;
3141
0
    case DstBitBase:
3142
0
        if ( ea.type == OP_MEM )
3143
0
        {
3144
0
            /*
3145
0
             * Instructions such as bt can reference an arbitrary offset from
3146
0
             * their memory operand, but the instruction doing the actual
3147
0
             * emulation needs the appropriate op_bytes read from memory.
3148
0
             * Adjust both the source register and memory operand to make an
3149
0
             * equivalent instruction.
3150
0
             *
3151
0
             * EA       += BitOffset DIV op_bytes*8
3152
0
             * BitOffset = BitOffset MOD op_bytes*8
3153
0
             * DIV truncates towards negative infinity.
3154
0
             * MOD always produces a positive result.
3155
0
             */
3156
0
            if ( op_bytes == 2 )
3157
0
                src.val = (int16_t)src.val;
3158
0
            else if ( op_bytes == 4 )
3159
0
                src.val = (int32_t)src.val;
3160
0
            if ( (long)src.val < 0 )
3161
0
                ea.mem.off -=
3162
0
                    op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L));
3163
0
            else
3164
0
                ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L);
3165
0
            ea.mem.off = truncate_ea(ea.mem.off);
3166
0
        }
3167
0
3168
0
        /* Bit index always truncated to within range. */
3169
0
        src.val &= (op_bytes << 3) - 1;
3170
0
3171
0
        d = (d & ~DstMask) | DstMem;
3172
0
        /* Becomes a normal DstMem operation from here on. */
3173
4.11k
    case DstMem:
3174
4.11k
        if ( state->simd_size )
3175
0
        {
3176
0
            generate_exception_if(lock_prefix, EXC_UD);
3177
0
            break;
3178
0
        }
3179
4.11k
        ea.bytes = (d & ByteOp) ? 1 : op_bytes;
3180
4.11k
        dst = ea;
3181
4.11k
        if ( dst.type == OP_REG )
3182
0
        {
3183
0
            generate_exception_if(lock_prefix, EXC_UD);
3184
0
            switch ( dst.bytes )
3185
0
            {
3186
0
            case 1: dst.val = *(uint8_t  *)dst.reg; break;
3187
0
            case 2: dst.val = *(uint16_t *)dst.reg; break;
3188
0
            case 4: dst.val = *(uint32_t *)dst.reg; break;
3189
0
            case 8: dst.val = *(uint64_t *)dst.reg; break;
3190
0
            }
3191
0
        }
3192
4.11k
        else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */
3193
0
        {
3194
0
            fail_if(lock_prefix ? !ops->cmpxchg : !ops->write);
3195
0
            if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
3196
0
                                  &dst.val, dst.bytes, ctxt, ops)) )
3197
0
                goto done;
3198
0
            dst.orig_val = dst.val;
3199
0
        }
3200
4.11k
        else
3201
4.11k
        {
3202
4.11k
            /* Lock prefix is allowed only on RMW instructions. */
3203
4.11k
            generate_exception_if(lock_prefix, EXC_UD);
3204
4.11k
            fail_if(!ops->write);
3205
4.11k
        }
3206
4.11k
        break;
3207
60.1k
    }
3208
60.1k
3209
60.1k
    switch ( ctxt->opcode )
3210
60.1k
    {
3211
0
        enum x86_segment seg;
3212
0
        struct segment_register cs, sreg;
3213
0
        struct cpuid_leaf cpuid_leaf;
3214
0
        uint64_t msr_val;
3215
0
        unsigned int i, n;
3216
0
        unsigned long dummy;
3217
0
3218
0
    case 0x00 ... 0x05: add: /* add */
3219
0
        emulate_2op_SrcV("add", src, dst, _regs.eflags);
3220
0
        break;
3221
0
3222
0
    case 0x08 ... 0x0d: or:  /* or */
3223
0
        emulate_2op_SrcV("or", src, dst, _regs.eflags);
3224
0
        break;
3225
0
3226
0
    case 0x10 ... 0x15: adc: /* adc */
3227
0
        emulate_2op_SrcV("adc", src, dst, _regs.eflags);
3228
0
        break;
3229
0
3230
0
    case 0x18 ... 0x1d: sbb: /* sbb */
3231
0
        emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
3232
0
        break;
3233
0
3234
0
    case 0x20 ... 0x25: and: /* and */
3235
0
        emulate_2op_SrcV("and", src, dst, _regs.eflags);
3236
0
        break;
3237
0
3238
0
    case 0x28 ... 0x2d: sub: /* sub */
3239
0
        emulate_2op_SrcV("sub", src, dst, _regs.eflags);
3240
0
        break;
3241
0
3242
0
    case 0x30 ... 0x35: xor: /* xor */
3243
0
        emulate_2op_SrcV("xor", src, dst, _regs.eflags);
3244
0
        break;
3245
0
3246
0
    case 0x38 ... 0x3d: cmp: /* cmp */
3247
0
        generate_exception_if(lock_prefix, EXC_UD);
3248
0
        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
3249
0
        dst.type = OP_NONE;
3250
0
        break;
3251
0
3252
0
    case 0x06: /* push %%es */
3253
0
    case 0x0e: /* push %%cs */
3254
0
    case 0x16: /* push %%ss */
3255
0
    case 0x1e: /* push %%ds */
3256
0
    case X86EMUL_OPC(0x0f, 0xa0): /* push %%fs */
3257
0
    case X86EMUL_OPC(0x0f, 0xa8): /* push %%gs */
3258
0
        fail_if(ops->read_segment == NULL);
3259
0
        if ( (rc = ops->read_segment((b >> 3) & 7, &sreg,
3260
0
                                     ctxt)) != X86EMUL_OKAY )
3261
0
            goto done;
3262
0
        src.val = sreg.sel;
3263
0
        goto push;
3264
0
3265
0
    case 0x07: /* pop %%es */
3266
0
    case 0x17: /* pop %%ss */
3267
0
    case 0x1f: /* pop %%ds */
3268
0
    case X86EMUL_OPC(0x0f, 0xa1): /* pop %%fs */
3269
0
    case X86EMUL_OPC(0x0f, 0xa9): /* pop %%gs */
3270
0
        fail_if(ops->write_segment == NULL);
3271
0
        /* 64-bit mode: POP defaults to a 64-bit operand. */
3272
0
        if ( mode_64bit() && (op_bytes == 4) )
3273
0
            op_bytes = 8;
3274
0
        seg = (b >> 3) & 7;
3275
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &dst.val,
3276
0
                              op_bytes, ctxt, ops)) != X86EMUL_OKAY ||
3277
0
             (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY )
3278
0
            goto done;
3279
0
        if ( seg == x86_seg_ss )
3280
0
            ctxt->retire.mov_ss = true;
3281
0
        break;
3282
0
3283
0
    case 0x27: /* daa */
3284
0
    case 0x2f: /* das */ {
3285
0
        uint8_t al = _regs.al;
3286
0
        unsigned int eflags = _regs.eflags;
3287
0
3288
0
        _regs.eflags &= ~(X86_EFLAGS_CF | X86_EFLAGS_AF | X86_EFLAGS_SF |
3289
0
                          X86_EFLAGS_ZF | X86_EFLAGS_PF);
3290
0
        if ( ((al & 0x0f) > 9) || (eflags & X86_EFLAGS_AF) )
3291
0
        {
3292
0
            _regs.eflags |= X86_EFLAGS_AF;
3293
0
            if ( b == 0x2f && (al < 6 || (eflags & X86_EFLAGS_CF)) )
3294
0
                _regs.eflags |= X86_EFLAGS_CF;
3295
0
            _regs.al += (b == 0x27) ? 6 : -6;
3296
0
        }
3297
0
        if ( (al > 0x99) || (eflags & X86_EFLAGS_CF) )
3298
0
        {
3299
0
            _regs.al += (b == 0x27) ? 0x60 : -0x60;
3300
0
            _regs.eflags |= X86_EFLAGS_CF;
3301
0
        }
3302
0
        _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
3303
0
        _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
3304
0
        _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0;
3305
0
        break;
3306
0
    }
3307
0
3308
0
    case 0x37: /* aaa */
3309
0
    case 0x3f: /* aas */
3310
0
        _regs.eflags &= ~X86_EFLAGS_CF;
3311
0
        if ( (_regs.al > 9) || (_regs.eflags & X86_EFLAGS_AF) )
3312
0
        {
3313
0
            _regs.al += (b == 0x37) ? 6 : -6;
3314
0
            _regs.ah += (b == 0x37) ? 1 : -1;
3315
0
            _regs.eflags |= X86_EFLAGS_CF | X86_EFLAGS_AF;
3316
0
        }
3317
0
        _regs.al &= 0x0f;
3318
0
        break;
3319
0
3320
0
    case 0x40 ... 0x4f: /* inc/dec reg */
3321
0
        dst.type  = OP_REG;
3322
0
        dst.reg   = decode_register(b & 7, &_regs, 0);
3323
0
        dst.bytes = op_bytes;
3324
0
        dst.val   = *dst.reg;
3325
0
        if ( b & 8 )
3326
0
            emulate_1op("dec", dst, _regs.eflags);
3327
0
        else
3328
0
            emulate_1op("inc", dst, _regs.eflags);
3329
0
        break;
3330
0
3331
0
    case 0x50 ... 0x57: /* push reg */
3332
0
        src.val = *(unsigned long *)decode_register(
3333
0
            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
3334
0
        goto push;
3335
0
3336
0
    case 0x58 ... 0x5f: /* pop reg */
3337
0
        dst.type  = OP_REG;
3338
0
        dst.reg   = decode_register(
3339
0
            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
3340
0
        dst.bytes = op_bytes;
3341
0
        if ( mode_64bit() && (dst.bytes == 4) )
3342
0
            dst.bytes = 8;
3343
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
3344
0
                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
3345
0
            goto done;
3346
0
        break;
3347
0
3348
0
    case 0x60: /* pusha */
3349
0
        fail_if(!ops->write);
3350
0
        ea.val = _regs.esp;
3351
0
        for ( i = 0; i < 8; i++ )
3352
0
        {
3353
0
            void *reg = decode_register(i, &_regs, 0);
3354
0
3355
0
            if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
3356
0
                                  reg != &_regs.esp ? reg : &ea.val,
3357
0
                                  op_bytes, ctxt)) != 0 )
3358
0
                goto done;
3359
0
        }
3360
0
        break;
3361
0
3362
0
    case 0x61: /* popa */
3363
0
        for ( i = 0; i < 8; i++ )
3364
0
        {
3365
0
            void *reg = decode_register(7 - i, &_regs, 0);
3366
0
3367
0
            if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
3368
0
                                  &dst.val, op_bytes, ctxt, ops)) != 0 )
3369
0
                goto done;
3370
0
            if ( reg == &_regs.r(sp) )
3371
0
                continue;
3372
0
            if ( op_bytes == 2 )
3373
0
                *(uint16_t *)reg = dst.val;
3374
0
            else
3375
0
                *(unsigned long *)reg = dst.val;
3376
0
        }
3377
0
        break;
3378
0
3379
0
    case 0x62: /* bound */ {
3380
0
        int lb, ub, idx;
3381
0
3382
0
        generate_exception_if(src.type != OP_MEM, EXC_UD);
3383
0
        if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + op_bytes),
3384
0
                              &ea.val, op_bytes, ctxt, ops)) )
3385
0
            goto done;
3386
0
        ub  = (op_bytes == 2) ? (int16_t)ea.val   : (int32_t)ea.val;
3387
0
        lb  = (op_bytes == 2) ? (int16_t)src.val  : (int32_t)src.val;
3388
0
        idx = (op_bytes == 2) ? (int16_t)dst.val  : (int32_t)dst.val;
3389
0
        generate_exception_if((idx < lb) || (idx > ub), EXC_BR);
3390
0
        dst.type = OP_NONE;
3391
0
        break;
3392
0
    }
3393
0
3394
0
    case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
3395
0
        if ( mode_64bit() )
3396
0
        {
3397
0
            /* movsxd */
3398
0
            if ( ea.type == OP_REG )
3399
0
                src.val = *ea.reg;
3400
0
            else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
3401
0
                                       &src.val, 4, ctxt, ops)) )
3402
0
                goto done;
3403
0
            dst.val = (int32_t)src.val;
3404
0
        }
3405
0
        else
3406
0
        {
3407
0
            /* arpl */
3408
0
            unsigned int src_rpl = dst.val & 3;
3409
0
3410
0
            dst = ea;
3411
0
            dst.bytes = 2;
3412
0
            if ( dst.type == OP_REG )
3413
0
                dst.val = *dst.reg;
3414
0
            else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
3415
0
                                       &dst.val, 2, ctxt, ops)) )
3416
0
                goto done;
3417
0
            if ( src_rpl > (dst.val & 3) )
3418
0
            {
3419
0
                _regs.eflags |= X86_EFLAGS_ZF;
3420
0
                dst.val = (dst.val & ~3) | src_rpl;
3421
0
            }
3422
0
            else
3423
0
            {
3424
0
                _regs.eflags &= ~X86_EFLAGS_ZF;
3425
0
                dst.type = OP_NONE;
3426
0
            }
3427
0
            generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
3428
0
        }
3429
0
        break;
3430
0
3431
0
    case 0x68: /* push imm{16,32,64} */
3432
0
    case 0x6a: /* push imm8 */
3433
0
    push:
3434
0
        ASSERT(d & Mov); /* writeback needed */
3435
0
        dst.type  = OP_MEM;
3436
0
        dst.bytes = mode_64bit() && (op_bytes == 4) ? 8 : op_bytes;
3437
0
        dst.val = src.val;
3438
0
        dst.mem.seg = x86_seg_ss;
3439
0
        dst.mem.off = sp_pre_dec(dst.bytes);
3440
0
        break;
3441
0
3442
0
    case 0x69: /* imul imm16/32 */
3443
0
    case 0x6b: /* imul imm8 */
3444
0
        if ( ea.type == OP_REG )
3445
0
            dst.val = *ea.reg;
3446
0
        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
3447
0
                                   &dst.val, op_bytes, ctxt, ops)) )
3448
0
            goto done;
3449
0
        goto imul;
3450
0
3451
0
    case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
3452
0
        unsigned long nr_reps = get_rep_prefix(false, true);
3453
0
        unsigned int port = _regs.dx;
3454
0
3455
0
        dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
3456
0
        dst.mem.seg = x86_seg_es;
3457
0
        dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
3458
0
        if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
3459
0
            goto done;
3460
0
        /* Try the presumably most efficient approach first. */
3461
0
        if ( !ops->rep_ins )
3462
0
            nr_reps = 1;
3463
0
        rc = X86EMUL_UNHANDLEABLE;
3464
0
        if ( nr_reps == 1 && ops->read_io && ops->write )
3465
0
        {
3466
0
            rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
3467
0
            if ( rc != X86EMUL_UNHANDLEABLE )
3468
0
                nr_reps = 0;
3469
0
        }
3470
0
        if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_ins )
3471
0
            rc = ops->rep_ins(port, dst.mem.seg, dst.mem.off, dst.bytes,
3472
0
                              &nr_reps, ctxt);
3473
0
        if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE )
3474
0
        {
3475
0
            fail_if(!ops->read_io || !ops->write);
3476
0
            if ( (rc = ops->read_io(port, dst.bytes, &dst.val, ctxt)) != 0 )
3477
0
                goto done;
3478
0
            nr_reps = 0;
3479
0
        }
3480
0
        if ( !nr_reps && rc == X86EMUL_OKAY )
3481
0
        {
3482
0
            dst.type = OP_MEM;
3483
0
            nr_reps = 1;
3484
0
        }
3485
0
        register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
3486
0
        put_rep_prefix(nr_reps);
3487
0
        if ( rc != X86EMUL_OKAY )
3488
0
            goto done;
3489
0
        break;
3490
0
    }
3491
0
3492
0
    case 0x6e ... 0x6f: /* outs %esi,%dx */ {
3493
0
        unsigned long nr_reps = get_rep_prefix(true, false);
3494
0
        unsigned int port = _regs.dx;
3495
0
3496
0
        dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
3497
0
        ea.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
3498
0
        if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
3499
0
            goto done;
3500
0
        /* Try the presumably most efficient approach first. */
3501
0
        if ( !ops->rep_outs )
3502
0
            nr_reps = 1;
3503
0
        rc = X86EMUL_UNHANDLEABLE;
3504
0
        if ( nr_reps == 1 && ops->write_io )
3505
0
        {
3506
0
            rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val, dst.bytes,
3507
0
                            ctxt, ops);
3508
0
            if ( rc != X86EMUL_UNHANDLEABLE )
3509
0
                nr_reps = 0;
3510
0
        }
3511
0
        if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_outs )
3512
0
            rc = ops->rep_outs(ea.mem.seg, ea.mem.off, port, dst.bytes,
3513
0
                               &nr_reps, ctxt);
3514
0
        if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE )
3515
0
        {
3516
0
            if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val,
3517
0
                                  dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
3518
0
                goto done;
3519
0
            fail_if(ops->write_io == NULL);
3520
0
            nr_reps = 0;
3521
0
        }
3522
0
        if ( !nr_reps && rc == X86EMUL_OKAY )
3523
0
        {
3524
0
            if ( (rc = ops->write_io(port, dst.bytes, dst.val, ctxt)) != 0 )
3525
0
                goto done;
3526
0
            nr_reps = 1;
3527
0
        }
3528
0
        register_address_adjust(_regs.r(si), nr_reps * dst.bytes);
3529
0
        put_rep_prefix(nr_reps);
3530
0
        if ( rc != X86EMUL_OKAY )
3531
0
            goto done;
3532
0
        break;
3533
0
    }
3534
0
3535
0
    case 0x70 ... 0x7f: /* jcc (short) */
3536
0
        if ( test_cc(b, _regs.eflags) )
3537
0
            jmp_rel((int32_t)src.val);
3538
0
        adjust_bnd(ctxt, ops, vex.pfx);
3539
0
        break;
3540
0
3541
0
    case 0x80: case 0x81: case 0x82: case 0x83: /* Grp1 */
3542
0
        switch ( modrm_reg & 7 )
3543
0
        {
3544
0
        case 0: goto add;
3545
0
        case 1: goto or;
3546
0
        case 2: goto adc;
3547
0
        case 3: goto sbb;
3548
0
        case 4: goto and;
3549
0
        case 5: goto sub;
3550
0
        case 6: goto xor;
3551
0
        case 7: goto cmp;
3552
0
        }
3553
0
        break;
3554
0
3555
0
    case 0xa8 ... 0xa9: /* test imm,%%eax */
3556
0
    case 0x84 ... 0x85: test: /* test */
3557
0
        emulate_2op_SrcV("test", src, dst, _regs.eflags);
3558
0
        dst.type = OP_NONE;
3559
0
        break;
3560
0
3561
0
    case 0x86 ... 0x87: xchg: /* xchg */
3562
0
        /* Write back the register source. */
3563
0
        switch ( dst.bytes )
3564
0
        {
3565
0
        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
3566
0
        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
3567
0
        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
3568
0
        case 8: *src.reg = dst.val; break;
3569
0
        }
3570
0
        /* Write back the memory destination with implicit LOCK prefix. */
3571
0
        dst.val = src.val;
3572
0
        lock_prefix = 1;
3573
0
        break;
3574
0
3575
6
    case 0xc6: /* Grp11: mov / xabort */
3576
6
    case 0xc7: /* Grp11: mov / xbegin */
3577
6
        if ( modrm == 0xf8 && vcpu_has_rtm() )
3578
0
        {
3579
0
            /*
3580
0
             * xbegin unconditionally aborts, xabort is unconditionally
3581
0
             * a nop.
3582
0
             */
3583
0
            if ( b & 1 )
3584
0
            {
3585
0
                jmp_rel((int32_t)src.val);
3586
0
                _regs.r(ax) = 0;
3587
0
            }
3588
0
            dst.type = OP_NONE;
3589
0
            break;
3590
0
        }
3591
6
        generate_exception_if((modrm_reg & 7) != 0, EXC_UD);
3592
54.2k
    case 0x88 ... 0x8b: /* mov */
3593
54.2k
    case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
3594
54.2k
    case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
3595
54.2k
        dst.val = src.val;
3596
54.2k
        break;
3597
54.2k
3598
0
    case 0x8c: /* mov Sreg,r/m */
3599
0
        seg = modrm_reg & 7; /* REX.R is ignored. */
3600
0
        generate_exception_if(!is_x86_user_segment(seg), EXC_UD);
3601
0
    store_selector:
3602
0
        fail_if(ops->read_segment == NULL);
3603
0
        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 )
3604
0
            goto done;
3605
0
        dst.val = sreg.sel;
3606
0
        if ( dst.type == OP_MEM )
3607
0
            dst.bytes = 2;
3608
0
        break;
3609
0
3610
0
    case 0x8d: /* lea */
3611
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
3612
0
        dst.val = ea.mem.off;
3613
0
        break;
3614
0
3615
0
    case 0x8e: /* mov r/m,Sreg */
3616
0
        seg = modrm_reg & 7; /* REX.R is ignored. */
3617
0
        generate_exception_if(!is_x86_user_segment(seg) ||
3618
0
                              seg == x86_seg_cs, EXC_UD);
3619
0
        if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
3620
0
            goto done;
3621
0
        if ( seg == x86_seg_ss )
3622
0
            ctxt->retire.mov_ss = true;
3623
0
        dst.type = OP_NONE;
3624
0
        break;
3625
0
3626
0
    case 0x8f: /* pop (sole member of Grp1a) */
3627
0
        generate_exception_if((modrm_reg & 7) != 0, EXC_UD);
3628
0
        /* 64-bit mode: POP defaults to a 64-bit operand. */
3629
0
        if ( mode_64bit() && (dst.bytes == 4) )
3630
0
            dst.bytes = 8;
3631
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
3632
0
                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
3633
0
            goto done;
3634
0
        break;
3635
0
3636
0
    case 0x90: /* nop / xchg %%r8,%%rax */
3637
0
    case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */
3638
0
        if ( !(rex_prefix & REX_B) )
3639
0
            break; /* nop / pause */
3640
0
        /* fall through */
3641
0
3642
0
    case 0x91 ... 0x97: /* xchg reg,%%rax */
3643
0
        dst.type = OP_REG;
3644
0
        dst.bytes = op_bytes;
3645
0
        dst.reg  = decode_register(
3646
0
            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
3647
0
        dst.val  = *dst.reg;
3648
0
        goto xchg;
3649
0
3650
0
    case 0x98: /* cbw/cwde/cdqe */
3651
0
        switch ( op_bytes )
3652
0
        {
3653
0
        case 2: _regs.ax = (int8_t)_regs.al; break; /* cbw */
3654
0
        case 4: _regs.r(ax) = (uint32_t)(int16_t)_regs.ax; break; /* cwde */
3655
0
        case 8: _regs.r(ax) = (int32_t)_regs.eax; break; /* cdqe */
3656
0
        }
3657
0
        break;
3658
0
3659
0
    case 0x99: /* cwd/cdq/cqo */
3660
0
        switch ( op_bytes )
3661
0
        {
3662
0
        case 2: _regs.dx = -((int16_t)_regs.ax < 0); break;
3663
0
        case 4: _regs.r(dx) = (uint32_t)-((int32_t)_regs.eax < 0); break;
3664
0
#ifdef __x86_64__
3665
0
        case 8: _regs.rdx = -((int64_t)_regs.rax < 0); break;
3666
0
#endif
3667
0
        }
3668
0
        break;
3669
0
3670
0
    case 0x9a: /* call (far, absolute) */
3671
0
        ASSERT(!mode_64bit());
3672
0
    far_call:
3673
0
        fail_if(!ops->read_segment || !ops->write);
3674
0
3675
0
        if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
3676
0
             (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
3677
0
             (validate_far_branch(&cs, imm1),
3678
0
              src.val = sreg.sel,
3679
0
              rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
3680
0
                              &src.val, op_bytes, ctxt)) ||
3681
0
             (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
3682
0
                              &_regs.r(ip), op_bytes, ctxt)) ||
3683
0
             (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
3684
0
            goto done;
3685
0
3686
0
        _regs.r(ip) = imm1;
3687
0
        singlestep = _regs.eflags & X86_EFLAGS_TF;
3688
0
        break;
3689
0
3690
0
    case 0x9b:  /* wait/fwait */
3691
0
        host_and_vcpu_must_have(fpu);
3692
0
        get_fpu(X86EMUL_FPU_wait, &fic);
3693
0
        fic.insn_bytes = 1;
3694
0
        asm volatile ( "fwait" ::: "memory" );
3695
0
        check_fpu_exn(&fic);
3696
0
        break;
3697
0
3698
0
    case 0x9c: /* pushf */
3699
0
        if ( (_regs.eflags & X86_EFLAGS_VM) &&
3700
0
             MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3 )
3701
0
        {
3702
0
            cr4 = 0;
3703
0
            if ( op_bytes == 2 && ops->read_cr )
3704
0
            {
3705
0
                rc = ops->read_cr(4, &cr4, ctxt);
3706
0
                if ( rc != X86EMUL_OKAY )
3707
0
                    goto done;
3708
0
            }
3709
0
            generate_exception_if(!(cr4 & X86_CR4_VME), EXC_GP, 0);
3710
0
            src.val = (_regs.flags & ~X86_EFLAGS_IF) | X86_EFLAGS_IOPL;
3711
0
            if ( _regs.eflags & X86_EFLAGS_VIF )
3712
0
                src.val |= X86_EFLAGS_IF;
3713
0
        }
3714
0
        else
3715
0
            src.val = _regs.r(flags) & ~(X86_EFLAGS_VM | X86_EFLAGS_RF);
3716
0
        goto push;
3717
0
3718
0
    case 0x9d: /* popf */ {
3719
0
        uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM;
3720
0
3721
0
        cr4 = 0;
3722
0
        if ( !mode_ring0() )
3723
0
        {
3724
0
            if ( _regs.eflags & X86_EFLAGS_VM )
3725
0
            {
3726
0
                if ( op_bytes == 2 && ops->read_cr )
3727
0
                {
3728
0
                    rc = ops->read_cr(4, &cr4, ctxt);
3729
0
                    if ( rc != X86EMUL_OKAY )
3730
0
                        goto done;
3731
0
                }
3732
0
                generate_exception_if(!(cr4 & X86_CR4_VME) &&
3733
0
                                      MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3,
3734
0
                                      EXC_GP, 0);
3735
0
            }
3736
0
            mask |= X86_EFLAGS_IOPL;
3737
0
            if ( !mode_iopl() )
3738
0
                mask |= X86_EFLAGS_IF;
3739
0
        }
3740
0
        /* 64-bit mode: POP defaults to a 64-bit operand. */
3741
0
        if ( mode_64bit() && (op_bytes == 4) )
3742
0
            op_bytes = 8;
3743
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
3744
0
                              &dst.val, op_bytes, ctxt, ops)) != 0 )
3745
0
            goto done;
3746
0
        if ( op_bytes == 2 )
3747
0
        {
3748
0
            dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u);
3749
0
            if ( cr4 & X86_CR4_VME )
3750
0
            {
3751
0
                if ( dst.val & X86_EFLAGS_IF )
3752
0
                {
3753
0
                    generate_exception_if(_regs.eflags & X86_EFLAGS_VIP,
3754
0
                                          EXC_GP, 0);
3755
0
                    dst.val |= X86_EFLAGS_VIF;
3756
0
                }
3757
0
                else
3758
0
                    dst.val &= ~X86_EFLAGS_VIF;
3759
0
                mask &= ~X86_EFLAGS_VIF;
3760
0
            }
3761
0
        }
3762
0
        dst.val &= EFLAGS_MODIFIABLE;
3763
0
        _regs.eflags &= mask;
3764
0
        _regs.eflags |= (dst.val & ~mask) | X86_EFLAGS_MBS;
3765
0
        break;
3766
0
    }
3767
0
3768
0
    case 0x9e: /* sahf */
3769
0
        if ( mode_64bit() )
3770
0
            vcpu_must_have(lahf_lm);
3771
0
        *(uint8_t *)&_regs.eflags = (_regs.ah & EFLAGS_MASK) | X86_EFLAGS_MBS;
3772
0
        break;
3773
0
3774
0
    case 0x9f: /* lahf */
3775
0
        if ( mode_64bit() )
3776
0
            vcpu_must_have(lahf_lm);
3777
0
        _regs.ah = (_regs.eflags & EFLAGS_MASK) | X86_EFLAGS_MBS;
3778
0
        break;
3779
0
3780
0
    case 0xa4 ... 0xa5: /* movs */ {
3781
0
        unsigned long nr_reps = get_rep_prefix(true, true);
3782
0
3783
0
        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
3784
0
        dst.mem.seg = x86_seg_es;
3785
0
        dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
3786
0
        src.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
3787
0
        if ( (nr_reps == 1) || !ops->rep_movs ||
3788
0
             ((rc = ops->rep_movs(ea.mem.seg, src.mem.off,
3789
0
                                  dst.mem.seg, dst.mem.off, dst.bytes,
3790
0
                                  &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
3791
0
        {
3792
0
            if ( (rc = read_ulong(ea.mem.seg, src.mem.off,
3793
0
                                  &dst.val, dst.bytes, ctxt, ops)) != 0 )
3794
0
                goto done;
3795
0
            dst.type = OP_MEM;
3796
0
            nr_reps = 1;
3797
0
        }
3798
0
        register_address_adjust(_regs.r(si), nr_reps * dst.bytes);
3799
0
        register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
3800
0
        put_rep_prefix(nr_reps);
3801
0
        if ( rc != X86EMUL_OKAY )
3802
0
            goto done;
3803
0
        break;
3804
0
    }
3805
0
3806
0
    case 0xa6 ... 0xa7: /* cmps */ {
3807
0
        unsigned long next_eip = _regs.r(ip);
3808
0
3809
0
        get_rep_prefix(true, true);
3810
0
        src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
3811
0
        if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
3812
0
                              &dst.val, dst.bytes, ctxt, ops)) ||
3813
0
             (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
3814
0
                              &src.val, src.bytes, ctxt, ops)) )
3815
0
            goto done;
3816
0
        register_address_adjust(_regs.r(si), dst.bytes);
3817
0
        register_address_adjust(_regs.r(di), src.bytes);
3818
0
        put_rep_prefix(1);
3819
0
        /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */
3820
0
        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
3821
0
        if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) ||
3822
0
             (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) )
3823
0
            _regs.r(ip) = next_eip;
3824
0
        break;
3825
0
    }
3826
0
3827
0
    case 0xaa ... 0xab: /* stos */ {
3828
0
        unsigned long nr_reps = get_rep_prefix(false, true);
3829
0
3830
0
        dst.bytes = src.bytes;
3831
0
        dst.mem.seg = x86_seg_es;
3832
0
        dst.mem.off = truncate_ea(_regs.r(di));
3833
0
        if ( (nr_reps == 1) || !ops->rep_stos ||
3834
0
             ((rc = ops->rep_stos(&src.val,
3835
0
                                  dst.mem.seg, dst.mem.off, dst.bytes,
3836
0
                                  &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
3837
0
        {
3838
0
            dst.val = src.val;
3839
0
            dst.type = OP_MEM;
3840
0
            nr_reps = 1;
3841
0
            rc = X86EMUL_OKAY;
3842
0
        }
3843
0
        register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
3844
0
        put_rep_prefix(nr_reps);
3845
0
        if ( rc != X86EMUL_OKAY )
3846
0
            goto done;
3847
0
        break;
3848
0
    }
3849
0
3850
0
    case 0xac ... 0xad: /* lods */
3851
0
        get_rep_prefix(true, false);
3852
0
        if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
3853
0
                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
3854
0
            goto done;
3855
0
        register_address_adjust(_regs.r(si), dst.bytes);
3856
0
        put_rep_prefix(1);
3857
0
        break;
3858
0
3859
0
    case 0xae ... 0xaf: /* scas */ {
3860
0
        unsigned long next_eip = _regs.r(ip);
3861
0
3862
0
        get_rep_prefix(false, true);
3863
0
        if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
3864
0
                              &dst.val, src.bytes, ctxt, ops)) != 0 )
3865
0
            goto done;
3866
0
        register_address_adjust(_regs.r(di), src.bytes);
3867
0
        put_rep_prefix(1);
3868
0
        /* cmp: %%eax - *%%edi ==> src=%%eax,dst=*%%edi ==> src - dst */
3869
0
        dst.bytes = src.bytes;
3870
0
        emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
3871
0
        if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) ||
3872
0
             (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) )
3873
0
            _regs.r(ip) = next_eip;
3874
0
        break;
3875
0
    }
3876
0
3877
0
    case 0xb0 ... 0xb7: /* mov imm8,r8 */
3878
0
        dst.reg = decode_register(
3879
0
            (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0));
3880
0
        dst.val = src.val;
3881
0
        break;
3882
0
3883
0
    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
3884
0
        dst.reg = decode_register(
3885
0
            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
3886
0
        dst.val = src.val;
3887
0
        break;
3888
0
3889
0
    case 0xc0 ... 0xc1: grp2: /* Grp2 */
3890
0
        generate_exception_if(lock_prefix, EXC_UD);
3891
0
        switch ( modrm_reg & 7 )
3892
0
        {
3893
0
        case 0: /* rol */
3894
0
            emulate_2op_SrcB("rol", src, dst, _regs.eflags);
3895
0
            break;
3896
0
        case 1: /* ror */
3897
0
            emulate_2op_SrcB("ror", src, dst, _regs.eflags);
3898
0
            break;
3899
0
        case 2: /* rcl */
3900
0
            emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
3901
0
            break;
3902
0
        case 3: /* rcr */
3903
0
            emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
3904
0
            break;
3905
0
        case 4: /* sal/shl */
3906
0
        case 6: /* sal/shl */
3907
0
            emulate_2op_SrcB("sal", src, dst, _regs.eflags);
3908
0
            break;
3909
0
        case 5: /* shr */
3910
0
            emulate_2op_SrcB("shr", src, dst, _regs.eflags);
3911
0
            break;
3912
0
        case 7: /* sar */
3913
0
            emulate_2op_SrcB("sar", src, dst, _regs.eflags);
3914
0
            break;
3915
0
        }
3916
0
        break;
3917
0
3918
0
    case 0xc2: /* ret imm16 (near) */
3919
0
    case 0xc3: /* ret (near) */
3920
0
        op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
3921
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val),
3922
0
                              &dst.val, op_bytes, ctxt, ops)) != 0 ||
3923
0
             (rc = ops->insn_fetch(x86_seg_cs, dst.val, NULL, 0, ctxt)) )
3924
0
            goto done;
3925
0
        _regs.r(ip) = dst.val;
3926
0
        adjust_bnd(ctxt, ops, vex.pfx);
3927
0
        break;
3928
0
3929
0
    case 0xc4: /* les */
3930
0
    case 0xc5: /* lds */
3931
0
        seg = (b & 1) * 3; /* es = 0, ds = 3 */
3932
0
    les:
3933
0
        generate_exception_if(src.type != OP_MEM, EXC_UD);
3934
0
        if ( (rc = read_ulong(src.mem.seg, truncate_ea(src.mem.off + src.bytes),
3935
0
                              &dst.val, 2, ctxt, ops)) != X86EMUL_OKAY )
3936
0
            goto done;
3937
0
        ASSERT(is_x86_user_segment(seg));
3938
0
        if ( (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY )
3939
0
            goto done;
3940
0
        dst.val = src.val;
3941
0
        break;
3942
0
3943
0
    case 0xc8: /* enter imm16,imm8 */
3944
0
        dst.type = OP_REG;
3945
0
        dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
3946
0
        dst.reg = (unsigned long *)&_regs.r(bp);
3947
0
        fail_if(!ops->write);
3948
0
        if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
3949
0
                              &_regs.r(bp), dst.bytes, ctxt)) )
3950
0
            goto done;
3951
0
        dst.val = _regs.r(sp);
3952
0
3953
0
        n = imm2 & 31;
3954
0
        if ( n )
3955
0
        {
3956
0
            for ( i = 1; i < n; i++ )
3957
0
            {
3958
0
                unsigned long ebp, temp_data;
3959
0
                ebp = truncate_word(_regs.r(bp) - i*dst.bytes, ctxt->sp_size/8);
3960
0
                if ( (rc = read_ulong(x86_seg_ss, ebp,
3961
0
                                      &temp_data, dst.bytes, ctxt, ops)) ||
3962
0
                     (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
3963
0
                                      &temp_data, dst.bytes, ctxt)) )
3964
0
                    goto done;
3965
0
            }
3966
0
            if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
3967
0
                                  &dst.val, dst.bytes, ctxt)) )
3968
0
                goto done;
3969
0
        }
3970
0
3971
0
        sp_pre_dec(src.val);
3972
0
        break;
3973
0
3974
0
    case 0xc9: /* leave */
3975
0
        /* First writeback, to %%esp. */
3976
0
        dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
3977
0
        if ( dst.bytes == 2 )
3978
0
            _regs.sp = _regs.bp;
3979
0
        else
3980
0
            _regs.r(sp) = dst.bytes == 4 ? _regs.ebp : _regs.r(bp);
3981
0
3982
0
        /* Second writeback, to %%ebp. */
3983
0
        dst.type = OP_REG;
3984
0
        dst.reg = (unsigned long *)&_regs.r(bp);
3985
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
3986
0
                              &dst.val, dst.bytes, ctxt, ops)) )
3987
0
            goto done;
3988
0
        break;
3989
0
3990
0
    case 0xca: /* ret imm16 (far) */
3991
0
    case 0xcb: /* ret (far) */
3992
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
3993
0
                              &dst.val, op_bytes, ctxt, ops)) ||
3994
0
             (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val),
3995
0
                              &src.val, op_bytes, ctxt, ops)) ||
3996
0
             (rc = load_seg(x86_seg_cs, src.val, 1, &cs, ctxt, ops)) ||
3997
0
             (rc = commit_far_branch(&cs, dst.val)) )
3998
0
            goto done;
3999
0
        break;
4000
0
4001
0
    case 0xce: /* into */
4002
0
        if ( !(_regs.eflags & X86_EFLAGS_OF) )
4003
0
            break;
4004
0
        /* Fallthrough */
4005
0
    case 0xcc: /* int3 */
4006
0
    case 0xcd: /* int imm8 */
4007
0
    case 0xf1: /* int1 (icebp) */
4008
0
        ASSERT(!ctxt->event_pending);
4009
0
        switch ( ctxt->opcode )
4010
0
        {
4011
0
        case 0xcc: /* int3 */
4012
0
            ctxt->event.vector = EXC_BP;
4013
0
            ctxt->event.type = X86_EVENTTYPE_SW_EXCEPTION;
4014
0
            break;
4015
0
        case 0xcd: /* int imm8 */
4016
0
            ctxt->event.vector = imm1;
4017
0
            ctxt->event.type = X86_EVENTTYPE_SW_INTERRUPT;
4018
0
            break;
4019
0
        case 0xce: /* into */
4020
0
            ctxt->event.vector = EXC_OF;
4021
0
            ctxt->event.type = X86_EVENTTYPE_SW_EXCEPTION;
4022
0
            break;
4023
0
        case 0xf1: /* icebp */
4024
0
            ctxt->event.vector = EXC_DB;
4025
0
            ctxt->event.type = X86_EVENTTYPE_PRI_SW_EXCEPTION;
4026
0
            break;
4027
0
        }
4028
0
        ctxt->event.error_code = X86_EVENT_NO_EC;
4029
0
        ctxt->event.insn_len = _regs.r(ip) - ctxt->regs->r(ip);
4030
0
        ctxt->event_pending = true;
4031
0
        rc = X86EMUL_EXCEPTION;
4032
0
        goto done;
4033
0
4034
0
    case 0xcf: /* iret */ {
4035
0
        unsigned long sel, eip, eflags;
4036
0
        uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM;
4037
0
4038
0
        fail_if(!in_realmode(ctxt, ops));
4039
0
        ctxt->retire.unblock_nmi = true;
4040
0
        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
4041
0
                              &eip, op_bytes, ctxt, ops)) ||
4042
0
             (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
4043
0
                              &sel, op_bytes, ctxt, ops)) ||
4044
0
             (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
4045
0
                              &eflags, op_bytes, ctxt, ops)) )
4046
0
            goto done;
4047
0
        if ( op_bytes == 2 )
4048
0
            eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u);
4049
0
        eflags &= EFLAGS_MODIFIABLE;
4050
0
        _regs.eflags &= mask;
4051
0
        _regs.eflags |= (eflags & ~mask) | X86_EFLAGS_MBS;
4052
0
        if ( (rc = load_seg(x86_seg_cs, sel, 1, &cs, ctxt, ops)) ||
4053
0
             (rc = commit_far_branch(&cs, (uint32_t)eip)) )
4054
0
            goto done;
4055
0
        break;
4056
0
    }
4057
0
4058
0
    case 0xd0 ... 0xd1: /* Grp2 */
4059
0
        src.val = 1;
4060
0
        goto grp2;
4061
0
4062
0
    case 0xd2 ... 0xd3: /* Grp2 */
4063
0
        src.val = _regs.cl;
4064
0
        goto grp2;
4065
0
4066
0
    case 0xd4: /* aam */
4067
0
    case 0xd5: /* aad */
4068
0
        n = (uint8_t)src.val;
4069
0
        if ( b & 0x01 )
4070
0
            _regs.ax = (uint8_t)(_regs.al + (_regs.ah * n));
4071
0
        else
4072
0
        {
4073
0
            generate_exception_if(!n, EXC_DE);
4074
0
            _regs.al = _regs.al % n;
4075
0
            _regs.ah = _regs.al / n;
4076
0
        }
4077
0
        _regs.eflags &= ~(X86_EFLAGS_SF | X86_EFLAGS_ZF | X86_EFLAGS_PF);
4078
0
        _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
4079
0
        _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
4080
0
        _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0;
4081
0
        break;
4082
0
4083
0
    case 0xd6: /* salc */
4084
0
        _regs.al = (_regs.eflags & X86_EFLAGS_CF) ? 0xff : 0x00;
4085
0
        break;
4086
0
4087
0
    case 0xd7: /* xlat */ {
4088
0
        unsigned long al;
4089
0
4090
0
        if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(bx) + _regs.al),
4091
0
                              &al, 1, ctxt, ops)) != 0 )
4092
0
            goto done;
4093
0
        _regs.al = al;
4094
0
        break;
4095
0
    }
4096
0
4097
0
    case 0xd8: /* FPU 0xd8 */
4098
0
        host_and_vcpu_must_have(fpu);
4099
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4100
0
        switch ( modrm )
4101
0
        {
4102
0
        case 0xc0 ... 0xc7: /* fadd %stN,%st */
4103
0
        case 0xc8 ... 0xcf: /* fmul %stN,%st */
4104
0
        case 0xd0 ... 0xd7: /* fcom %stN,%st */
4105
0
        case 0xd8 ... 0xdf: /* fcomp %stN,%st */
4106
0
        case 0xe0 ... 0xe7: /* fsub %stN,%st */
4107
0
        case 0xe8 ... 0xef: /* fsubr %stN,%st */
4108
0
        case 0xf0 ... 0xf7: /* fdiv %stN,%st */
4109
0
        case 0xf8 ... 0xff: /* fdivr %stN,%st */
4110
0
            emulate_fpu_insn_stub(0xd8, modrm);
4111
0
            break;
4112
0
        default:
4113
0
            ASSERT(ea.type == OP_MEM);
4114
0
            if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4115
0
                                 4, ctxt)) != X86EMUL_OKAY )
4116
0
                goto done;
4117
0
            switch ( modrm_reg & 7 )
4118
0
            {
4119
0
            case 0: /* fadd */
4120
0
                emulate_fpu_insn_memsrc("fadds", src.val);
4121
0
                break;
4122
0
            case 1: /* fmul */
4123
0
                emulate_fpu_insn_memsrc("fmuls", src.val);
4124
0
                break;
4125
0
            case 2: /* fcom */
4126
0
                emulate_fpu_insn_memsrc("fcoms", src.val);
4127
0
                break;
4128
0
            case 3: /* fcomp */
4129
0
                emulate_fpu_insn_memsrc("fcomps", src.val);
4130
0
                break;
4131
0
            case 4: /* fsub */
4132
0
                emulate_fpu_insn_memsrc("fsubs", src.val);
4133
0
                break;
4134
0
            case 5: /* fsubr */
4135
0
                emulate_fpu_insn_memsrc("fsubrs", src.val);
4136
0
                break;
4137
0
            case 6: /* fdiv */
4138
0
                emulate_fpu_insn_memsrc("fdivs", src.val);
4139
0
                break;
4140
0
            case 7: /* fdivr */
4141
0
                emulate_fpu_insn_memsrc("fdivrs", src.val);
4142
0
                break;
4143
0
            }
4144
0
        }
4145
0
        check_fpu_exn(&fic);
4146
0
        break;
4147
0
4148
0
    case 0xd9: /* FPU 0xd9 */
4149
0
        host_and_vcpu_must_have(fpu);
4150
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4151
0
        switch ( modrm )
4152
0
        {
4153
0
        case 0xfb: /* fsincos */
4154
0
            fail_if(cpu_has_amd_erratum(573));
4155
0
            /* fall through */
4156
0
        case 0xc0 ... 0xc7: /* fld %stN */
4157
0
        case 0xc8 ... 0xcf: /* fxch %stN */
4158
0
        case 0xd0: /* fnop */
4159
0
        case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
4160
0
        case 0xe0: /* fchs */
4161
0
        case 0xe1: /* fabs */
4162
0
        case 0xe4: /* ftst */
4163
0
        case 0xe5: /* fxam */
4164
0
        case 0xe8: /* fld1 */
4165
0
        case 0xe9: /* fldl2t */
4166
0
        case 0xea: /* fldl2e */
4167
0
        case 0xeb: /* fldpi */
4168
0
        case 0xec: /* fldlg2 */
4169
0
        case 0xed: /* fldln2 */
4170
0
        case 0xee: /* fldz */
4171
0
        case 0xf0: /* f2xm1 */
4172
0
        case 0xf1: /* fyl2x */
4173
0
        case 0xf2: /* fptan */
4174
0
        case 0xf3: /* fpatan */
4175
0
        case 0xf4: /* fxtract */
4176
0
        case 0xf5: /* fprem1 */
4177
0
        case 0xf6: /* fdecstp */
4178
0
        case 0xf7: /* fincstp */
4179
0
        case 0xf8: /* fprem */
4180
0
        case 0xf9: /* fyl2xp1 */
4181
0
        case 0xfa: /* fsqrt */
4182
0
        case 0xfc: /* frndint */
4183
0
        case 0xfd: /* fscale */
4184
0
        case 0xfe: /* fsin */
4185
0
        case 0xff: /* fcos */
4186
0
            emulate_fpu_insn_stub(0xd9, modrm);
4187
0
            break;
4188
0
        default:
4189
0
            generate_exception_if(ea.type != OP_MEM, EXC_UD);
4190
0
            dst = ea;
4191
0
            switch ( modrm_reg & 7 )
4192
0
            {
4193
0
            case 0: /* fld m32fp */
4194
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4195
0
                                     4, ctxt)) != X86EMUL_OKAY )
4196
0
                    goto done;
4197
0
                emulate_fpu_insn_memsrc("flds", src.val);
4198
0
                dst.type = OP_NONE;
4199
0
                break;
4200
0
            case 2: /* fst m32fp */
4201
0
                emulate_fpu_insn_memdst("fsts", dst.val);
4202
0
                dst.bytes = 4;
4203
0
                break;
4204
0
            case 3: /* fstp m32fp */
4205
0
                emulate_fpu_insn_memdst("fstps", dst.val);
4206
0
                dst.bytes = 4;
4207
0
                break;
4208
0
            case 4: /* fldenv - TODO */
4209
0
                state->fpu_ctrl = true;
4210
0
                goto unimplemented_insn;
4211
0
            case 5: /* fldcw m2byte */
4212
0
                state->fpu_ctrl = true;
4213
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4214
0
                                     2, ctxt)) != X86EMUL_OKAY )
4215
0
                    goto done;
4216
0
                emulate_fpu_insn_memsrc("fldcw", src.val);
4217
0
                dst.type = OP_NONE;
4218
0
                break;
4219
0
            case 6: /* fnstenv - TODO */
4220
0
                state->fpu_ctrl = true;
4221
0
                goto unimplemented_insn;
4222
0
            case 7: /* fnstcw m2byte */
4223
0
                state->fpu_ctrl = true;
4224
0
                emulate_fpu_insn_memdst("fnstcw", dst.val);
4225
0
                dst.bytes = 2;
4226
0
                break;
4227
0
            default:
4228
0
                generate_exception(EXC_UD);
4229
0
            }
4230
0
            /*
4231
0
             * Control instructions can't raise FPU exceptions, so we need
4232
0
             * to consider suppressing writes only for non-control ones. All
4233
0
             * of them in this group have data width 4.
4234
0
             */
4235
0
            if ( dst.type == OP_MEM && dst.bytes == 4 && !fpu_check_write() )
4236
0
                dst.type = OP_NONE;
4237
0
        }
4238
0
        check_fpu_exn(&fic);
4239
0
        break;
4240
0
4241
0
    case 0xda: /* FPU 0xda */
4242
0
        host_and_vcpu_must_have(fpu);
4243
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4244
0
        switch ( modrm )
4245
0
        {
4246
0
        case 0xc0 ... 0xc7: /* fcmovb %stN */
4247
0
        case 0xc8 ... 0xcf: /* fcmove %stN */
4248
0
        case 0xd0 ... 0xd7: /* fcmovbe %stN */
4249
0
        case 0xd8 ... 0xdf: /* fcmovu %stN */
4250
0
            vcpu_must_have(cmov);
4251
0
            emulate_fpu_insn_stub_eflags(0xda, modrm);
4252
0
            break;
4253
0
        case 0xe9:          /* fucompp */
4254
0
            emulate_fpu_insn_stub(0xda, modrm);
4255
0
            break;
4256
0
        default:
4257
0
            generate_exception_if(ea.type != OP_MEM, EXC_UD);
4258
0
            if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4259
0
                                 4, ctxt)) != X86EMUL_OKAY )
4260
0
                goto done;
4261
0
            switch ( modrm_reg & 7 )
4262
0
            {
4263
0
            case 0: /* fiadd m32i */
4264
0
                emulate_fpu_insn_memsrc("fiaddl", src.val);
4265
0
                break;
4266
0
            case 1: /* fimul m32i */
4267
0
                emulate_fpu_insn_memsrc("fimull", src.val);
4268
0
                break;
4269
0
            case 2: /* ficom m32i */
4270
0
                emulate_fpu_insn_memsrc("ficoml", src.val);
4271
0
                break;
4272
0
            case 3: /* ficomp m32i */
4273
0
                emulate_fpu_insn_memsrc("ficompl", src.val);
4274
0
                break;
4275
0
            case 4: /* fisub m32i */
4276
0
                emulate_fpu_insn_memsrc("fisubl", src.val);
4277
0
                break;
4278
0
            case 5: /* fisubr m32i */
4279
0
                emulate_fpu_insn_memsrc("fisubrl", src.val);
4280
0
                break;
4281
0
            case 6: /* fidiv m32i */
4282
0
                emulate_fpu_insn_memsrc("fidivl", src.val);
4283
0
                break;
4284
0
            case 7: /* fidivr m32i */
4285
0
                emulate_fpu_insn_memsrc("fidivrl", src.val);
4286
0
                break;
4287
0
            }
4288
0
        }
4289
0
        check_fpu_exn(&fic);
4290
0
        break;
4291
0
4292
0
    case 0xdb: /* FPU 0xdb */
4293
0
        host_and_vcpu_must_have(fpu);
4294
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4295
0
        switch ( modrm )
4296
0
        {
4297
0
        case 0xc0 ... 0xc7: /* fcmovnb %stN */
4298
0
        case 0xc8 ... 0xcf: /* fcmovne %stN */
4299
0
        case 0xd0 ... 0xd7: /* fcmovnbe %stN */
4300
0
        case 0xd8 ... 0xdf: /* fcmovnu %stN */
4301
0
        case 0xe8 ... 0xef: /* fucomi %stN */
4302
0
        case 0xf0 ... 0xf7: /* fcomi %stN */
4303
0
            vcpu_must_have(cmov);
4304
0
            emulate_fpu_insn_stub_eflags(0xdb, modrm);
4305
0
            break;
4306
0
        case 0xe0: /* fneni - 8087 only, ignored by 287 */
4307
0
        case 0xe1: /* fndisi - 8087 only, ignored by 287 */
4308
0
        case 0xe2: /* fnclex */
4309
0
        case 0xe3: /* fninit */
4310
0
        case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
4311
0
        /* case 0xe5: frstpm - 287 only, #UD on 387 */
4312
0
            state->fpu_ctrl = true;
4313
0
            emulate_fpu_insn_stub(0xdb, modrm);
4314
0
            break;
4315
0
        default:
4316
0
            generate_exception_if(ea.type != OP_MEM, EXC_UD);
4317
0
            dst = ea;
4318
0
            switch ( modrm_reg & 7 )
4319
0
            {
4320
0
            case 0: /* fild m32i */
4321
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4322
0
                                     4, ctxt)) != X86EMUL_OKAY )
4323
0
                    goto done;
4324
0
                emulate_fpu_insn_memsrc("fildl", src.val);
4325
0
                dst.type = OP_NONE;
4326
0
                break;
4327
0
            case 1: /* fisttp m32i */
4328
0
                host_and_vcpu_must_have(sse3);
4329
0
                emulate_fpu_insn_memdst("fisttpl", dst.val);
4330
0
                dst.bytes = 4;
4331
0
                break;
4332
0
            case 2: /* fist m32i */
4333
0
                emulate_fpu_insn_memdst("fistl", dst.val);
4334
0
                dst.bytes = 4;
4335
0
                break;
4336
0
            case 3: /* fistp m32i */
4337
0
                emulate_fpu_insn_memdst("fistpl", dst.val);
4338
0
                dst.bytes = 4;
4339
0
                break;
4340
0
            case 5: /* fld m80fp */
4341
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
4342
0
                                     10, ctxt)) != X86EMUL_OKAY )
4343
0
                    goto done;
4344
0
                emulate_fpu_insn_memsrc("fldt", *mmvalp);
4345
0
                dst.type = OP_NONE;
4346
0
                break;
4347
0
            case 7: /* fstp m80fp */
4348
0
                fail_if(!ops->write);
4349
0
                emulate_fpu_insn_memdst("fstpt", *mmvalp);
4350
0
                if ( fpu_check_write() &&
4351
0
                     (rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
4352
0
                                      10, ctxt)) != X86EMUL_OKAY )
4353
0
                    goto done;
4354
0
                dst.type = OP_NONE;
4355
0
                break;
4356
0
            default:
4357
0
                generate_exception(EXC_UD);
4358
0
            }
4359
0
            if ( dst.type == OP_MEM && !fpu_check_write() )
4360
0
                dst.type = OP_NONE;
4361
0
        }
4362
0
        check_fpu_exn(&fic);
4363
0
        break;
4364
0
4365
0
    case 0xdc: /* FPU 0xdc */
4366
0
        host_and_vcpu_must_have(fpu);
4367
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4368
0
        switch ( modrm )
4369
0
        {
4370
0
        case 0xc0 ... 0xc7: /* fadd %st,%stN */
4371
0
        case 0xc8 ... 0xcf: /* fmul %st,%stN */
4372
0
        case 0xd0 ... 0xd7: /* fcom %stN,%st (alternative encoding) */
4373
0
        case 0xd8 ... 0xdf: /* fcomp %stN,%st (alternative encoding) */
4374
0
        case 0xe0 ... 0xe7: /* fsubr %st,%stN */
4375
0
        case 0xe8 ... 0xef: /* fsub %st,%stN */
4376
0
        case 0xf0 ... 0xf7: /* fdivr %st,%stN */
4377
0
        case 0xf8 ... 0xff: /* fdiv %st,%stN */
4378
0
            emulate_fpu_insn_stub(0xdc, modrm);
4379
0
            break;
4380
0
        default:
4381
0
            ASSERT(ea.type == OP_MEM);
4382
0
            if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4383
0
                                 8, ctxt)) != X86EMUL_OKAY )
4384
0
                goto done;
4385
0
            switch ( modrm_reg & 7 )
4386
0
            {
4387
0
            case 0: /* fadd m64fp */
4388
0
                emulate_fpu_insn_memsrc("faddl", src.val);
4389
0
                break;
4390
0
            case 1: /* fmul m64fp */
4391
0
                emulate_fpu_insn_memsrc("fmull", src.val);
4392
0
                break;
4393
0
            case 2: /* fcom m64fp */
4394
0
                emulate_fpu_insn_memsrc("fcoml", src.val);
4395
0
                break;
4396
0
            case 3: /* fcomp m64fp */
4397
0
                emulate_fpu_insn_memsrc("fcompl", src.val);
4398
0
                break;
4399
0
            case 4: /* fsub m64fp */
4400
0
                emulate_fpu_insn_memsrc("fsubl", src.val);
4401
0
                break;
4402
0
            case 5: /* fsubr m64fp */
4403
0
                emulate_fpu_insn_memsrc("fsubrl", src.val);
4404
0
                break;
4405
0
            case 6: /* fdiv m64fp */
4406
0
                emulate_fpu_insn_memsrc("fdivl", src.val);
4407
0
                break;
4408
0
            case 7: /* fdivr m64fp */
4409
0
                emulate_fpu_insn_memsrc("fdivrl", src.val);
4410
0
                break;
4411
0
            }
4412
0
        }
4413
0
        check_fpu_exn(&fic);
4414
0
        break;
4415
0
4416
0
    case 0xdd: /* FPU 0xdd */
4417
0
        host_and_vcpu_must_have(fpu);
4418
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4419
0
        switch ( modrm )
4420
0
        {
4421
0
        case 0xc0 ... 0xc7: /* ffree %stN */
4422
0
        case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
4423
0
        case 0xd0 ... 0xd7: /* fst %stN */
4424
0
        case 0xd8 ... 0xdf: /* fstp %stN */
4425
0
        case 0xe0 ... 0xe7: /* fucom %stN */
4426
0
        case 0xe8 ... 0xef: /* fucomp %stN */
4427
0
            emulate_fpu_insn_stub(0xdd, modrm);
4428
0
            break;
4429
0
        default:
4430
0
            generate_exception_if(ea.type != OP_MEM, EXC_UD);
4431
0
            dst = ea;
4432
0
            switch ( modrm_reg & 7 )
4433
0
            {
4434
0
            case 0: /* fld m64fp */;
4435
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4436
0
                                     8, ctxt)) != X86EMUL_OKAY )
4437
0
                    goto done;
4438
0
                emulate_fpu_insn_memsrc("fldl", src.val);
4439
0
                dst.type = OP_NONE;
4440
0
                break;
4441
0
            case 1: /* fisttp m64i */
4442
0
                host_and_vcpu_must_have(sse3);
4443
0
                emulate_fpu_insn_memdst("fisttpll", dst.val);
4444
0
                dst.bytes = 8;
4445
0
                break;
4446
0
            case 2: /* fst m64fp */
4447
0
                emulate_fpu_insn_memdst("fstl", dst.val);
4448
0
                dst.bytes = 8;
4449
0
                break;
4450
0
            case 3: /* fstp m64fp */
4451
0
                emulate_fpu_insn_memdst("fstpl", dst.val);
4452
0
                dst.bytes = 8;
4453
0
                break;
4454
0
            case 4: /* frstor - TODO */
4455
0
            case 6: /* fnsave - TODO */
4456
0
                state->fpu_ctrl = true;
4457
0
                goto unimplemented_insn;
4458
0
            case 7: /* fnstsw m2byte */
4459
0
                state->fpu_ctrl = true;
4460
0
                emulate_fpu_insn_memdst("fnstsw", dst.val);
4461
0
                dst.bytes = 2;
4462
0
                break;
4463
0
            default:
4464
0
                generate_exception(EXC_UD);
4465
0
            }
4466
0
            /*
4467
0
             * Control instructions can't raise FPU exceptions, so we need
4468
0
             * to consider suppressing writes only for non-control ones. All
4469
0
             * of them in this group have data width 8.
4470
0
             */
4471
0
            if ( dst.type == OP_MEM && dst.bytes == 8 && !fpu_check_write() )
4472
0
                dst.type = OP_NONE;
4473
0
        }
4474
0
        check_fpu_exn(&fic);
4475
0
        break;
4476
0
4477
0
    case 0xde: /* FPU 0xde */
4478
0
        host_and_vcpu_must_have(fpu);
4479
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4480
0
        switch ( modrm )
4481
0
        {
4482
0
        case 0xc0 ... 0xc7: /* faddp %stN */
4483
0
        case 0xc8 ... 0xcf: /* fmulp %stN */
4484
0
        case 0xd0 ... 0xd7: /* fcomp %stN (alternative encoding) */
4485
0
        case 0xd9: /* fcompp */
4486
0
        case 0xe0 ... 0xe7: /* fsubrp %stN */
4487
0
        case 0xe8 ... 0xef: /* fsubp %stN */
4488
0
        case 0xf0 ... 0xf7: /* fdivrp %stN */
4489
0
        case 0xf8 ... 0xff: /* fdivp %stN */
4490
0
            emulate_fpu_insn_stub(0xde, modrm);
4491
0
            break;
4492
0
        default:
4493
0
            generate_exception_if(ea.type != OP_MEM, EXC_UD);
4494
0
            switch ( modrm_reg & 7 )
4495
0
            {
4496
0
            case 0: /* fiadd m16i */
4497
0
                emulate_fpu_insn_memsrc("fiadds", src.val);
4498
0
                break;
4499
0
            case 1: /* fimul m16i */
4500
0
                emulate_fpu_insn_memsrc("fimuls", src.val);
4501
0
                break;
4502
0
            case 2: /* ficom m16i */
4503
0
                emulate_fpu_insn_memsrc("ficoms", src.val);
4504
0
                break;
4505
0
            case 3: /* ficomp m16i */
4506
0
                emulate_fpu_insn_memsrc("ficomps", src.val);
4507
0
                break;
4508
0
            case 4: /* fisub m16i */
4509
0
                emulate_fpu_insn_memsrc("fisubs", src.val);
4510
0
                break;
4511
0
            case 5: /* fisubr m16i */
4512
0
                emulate_fpu_insn_memsrc("fisubrs", src.val);
4513
0
                break;
4514
0
            case 6: /* fidiv m16i */
4515
0
                emulate_fpu_insn_memsrc("fidivs", src.val);
4516
0
                break;
4517
0
            case 7: /* fidivr m16i */
4518
0
                emulate_fpu_insn_memsrc("fidivrs", src.val);
4519
0
                break;
4520
0
            }
4521
0
        }
4522
0
        check_fpu_exn(&fic);
4523
0
        break;
4524
0
4525
0
    case 0xdf: /* FPU 0xdf */
4526
0
        host_and_vcpu_must_have(fpu);
4527
0
        get_fpu(X86EMUL_FPU_fpu, &fic);
4528
0
        switch ( modrm )
4529
0
        {
4530
0
        case 0xe0:
4531
0
            /* fnstsw %ax */
4532
0
            state->fpu_ctrl = true;
4533
0
            dst.bytes = 2;
4534
0
            dst.type = OP_REG;
4535
0
            dst.reg = (void *)&_regs.ax;
4536
0
            emulate_fpu_insn_memdst("fnstsw", dst.val);
4537
0
            break;
4538
0
        case 0xe8 ... 0xef: /* fucomip %stN */
4539
0
        case 0xf0 ... 0xf7: /* fcomip %stN */
4540
0
            vcpu_must_have(cmov);
4541
0
            emulate_fpu_insn_stub_eflags(0xdf, modrm);
4542
0
            break;
4543
0
        case 0xc0 ... 0xc7: /* ffreep %stN */
4544
0
        case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
4545
0
        case 0xd0 ... 0xd7: /* fstp %stN (alternative encoding) */
4546
0
        case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
4547
0
            emulate_fpu_insn_stub(0xdf, modrm);
4548
0
            break;
4549
0
        default:
4550
0
            generate_exception_if(ea.type != OP_MEM, EXC_UD);
4551
0
            dst = ea;
4552
0
            switch ( modrm_reg & 7 )
4553
0
            {
4554
0
            case 0: /* fild m16i */
4555
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4556
0
                                     2, ctxt)) != X86EMUL_OKAY )
4557
0
                    goto done;
4558
0
                emulate_fpu_insn_memsrc("filds", src.val);
4559
0
                dst.type = OP_NONE;
4560
0
                break;
4561
0
            case 1: /* fisttp m16i */
4562
0
                host_and_vcpu_must_have(sse3);
4563
0
                emulate_fpu_insn_memdst("fisttps", dst.val);
4564
0
                dst.bytes = 2;
4565
0
                break;
4566
0
            case 2: /* fist m16i */
4567
0
                emulate_fpu_insn_memdst("fists", dst.val);
4568
0
                dst.bytes = 2;
4569
0
                break;
4570
0
            case 3: /* fistp m16i */
4571
0
                emulate_fpu_insn_memdst("fistps", dst.val);
4572
0
                dst.bytes = 2;
4573
0
                break;
4574
0
            case 4: /* fbld m80dec */
4575
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
4576
0
                                     10, ctxt)) != X86EMUL_OKAY )
4577
0
                    goto done;
4578
0
                emulate_fpu_insn_memsrc("fbld", *mmvalp);
4579
0
                dst.type = OP_NONE;
4580
0
                break;
4581
0
            case 5: /* fild m64i */
4582
0
                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
4583
0
                                     8, ctxt)) != X86EMUL_OKAY )
4584
0
                    goto done;
4585
0
                emulate_fpu_insn_memsrc("fildll", src.val);
4586
0
                dst.type = OP_NONE;
4587
0
                break;
4588
0
            case 6: /* fbstp packed bcd */
4589
0
                fail_if(!ops->write);
4590
0
                emulate_fpu_insn_memdst("fbstp", *mmvalp);
4591
0
                if ( fpu_check_write() &&
4592
0
                     (rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
4593
0
                                      10, ctxt)) != X86EMUL_OKAY )
4594
0
                    goto done;
4595
0
                dst.type = OP_NONE;
4596
0
                break;
4597
0
            case 7: /* fistp m64i */
4598
0
                emulate_fpu_insn_memdst("fistpll", dst.val);
4599
0
                dst.bytes = 8;
4600
0
                break;
4601
0
            }
4602
0
            if ( dst.type == OP_MEM && !fpu_check_write() )
4603
0
                dst.type = OP_NONE;
4604
0
        }
4605
0
        check_fpu_exn(&fic);
4606
0
        break;
4607
0
4608
0
    case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
4609
0
        unsigned long count = get_loop_count(&_regs, ad_bytes);
4610
0
        int do_jmp = !(_regs.eflags & X86_EFLAGS_ZF); /* loopnz */
4611
0
4612
0
        if ( b == 0xe1 )
4613
0
            do_jmp = !do_jmp; /* loopz */
4614
0
        else if ( b == 0xe2 )
4615
0
            do_jmp = 1; /* loop */
4616
0
        if ( count != 1 && do_jmp )
4617
0
            jmp_rel((int32_t)src.val);
4618
0
        put_loop_count(&_regs, ad_bytes, count - 1);
4619
0
        break;
4620
0
    }
4621
0
4622
0
    case 0xe3: /* jcxz/jecxz (short) */
4623
0
        if ( !get_loop_count(&_regs, ad_bytes) )
4624
0
            jmp_rel((int32_t)src.val);
4625
0
        break;
4626
0
4627
0
    case 0xe4: /* in imm8,%al */
4628
0
    case 0xe5: /* in imm8,%eax */
4629
0
    case 0xe6: /* out %al,imm8 */
4630
0
    case 0xe7: /* out %eax,imm8 */
4631
0
    case 0xec: /* in %dx,%al */
4632
0
    case 0xed: /* in %dx,%eax */
4633
0
    case 0xee: /* out %al,%dx */
4634
0
    case 0xef: /* out %eax,%dx */ {
4635
0
        unsigned int port = ((b < 0xe8) ? (uint8_t)src.val : _regs.dx);
4636
0
4637
0
        op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
4638
0
        if ( (rc = ioport_access_check(port, op_bytes, ctxt, ops)) != 0 )
4639
0
            goto done;
4640
0
        if ( b & 2 )
4641
0
        {
4642
0
            /* out */
4643
0
            fail_if(ops->write_io == NULL);
4644
0
            rc = ops->write_io(port, op_bytes, _regs.eax, ctxt);
4645
0
        }
4646
0
        else
4647
0
        {
4648
0
            /* in */
4649
0
            dst.bytes = op_bytes;
4650
0
            fail_if(ops->read_io == NULL);
4651
0
            rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
4652
0
        }
4653
0
        if ( rc != 0 )
4654
0
        {
4655
0
            if ( rc == X86EMUL_DONE )
4656
0
                goto complete_insn;
4657
0
            goto done;
4658
0
        }
4659
0
        break;
4660
0
    }
4661
0
4662
0
    case 0xe8: /* call (near) */ {
4663
0
        int32_t rel = src.val;
4664
0
4665
0
        op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
4666
0
        src.val = _regs.r(ip);
4667
0
        jmp_rel(rel);
4668
0
        adjust_bnd(ctxt, ops, vex.pfx);
4669
0
        goto push;
4670
0
    }
4671
0
4672
0
    case 0xe9: /* jmp (near) */
4673
0
    case 0xeb: /* jmp (short) */
4674
0
        jmp_rel((int32_t)src.val);
4675
0
        if ( !(b & 2) )
4676
0
            adjust_bnd(ctxt, ops, vex.pfx);
4677
0
        break;
4678
0
4679
0
    case 0xea: /* jmp (far, absolute) */
4680
0
        ASSERT(!mode_64bit());
4681
0
    far_jmp:
4682
0
        if ( (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
4683
0
             (rc = commit_far_branch(&cs, imm1)) )
4684
0
            goto done;
4685
0
        break;
4686
0
4687
0
    case 0xf4: /* hlt */
4688
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
4689
0
        ctxt->retire.hlt = true;
4690
0
        break;
4691
0
4692
0
    case 0xf5: /* cmc */
4693
0
        _regs.eflags ^= X86_EFLAGS_CF;
4694
0
        break;
4695
0
4696
0
    case 0xf6 ... 0xf7: /* Grp3 */
4697
0
        if ( (d & DstMask) == DstEax )
4698
0
            dst.reg = (unsigned long *)&_regs.r(ax);
4699
0
        switch ( modrm_reg & 7 )
4700
0
        {
4701
0
            unsigned long u[2], v;
4702
0
4703
0
        case 0 ... 1: /* test */
4704
0
            generate_exception_if(lock_prefix, EXC_UD);
4705
0
            goto test;
4706
0
        case 2: /* not */
4707
0
            dst.val = ~dst.val;
4708
0
            break;
4709
0
        case 3: /* neg */
4710
0
            emulate_1op("neg", dst, _regs.eflags);
4711
0
            break;
4712
0
        case 4: /* mul */
4713
0
            _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
4714
0
            switch ( dst.bytes )
4715
0
            {
4716
0
            case 1:
4717
0
                dst.val = _regs.al;
4718
0
                dst.val *= src.val;
4719
0
                if ( (uint8_t)dst.val != (uint16_t)dst.val )
4720
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4721
0
                dst.bytes = 2;
4722
0
                break;
4723
0
            case 2:
4724
0
                dst.val = _regs.ax;
4725
0
                dst.val *= src.val;
4726
0
                if ( (uint16_t)dst.val != (uint32_t)dst.val )
4727
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4728
0
                _regs.dx = dst.val >> 16;
4729
0
                break;
4730
0
#ifdef __x86_64__
4731
0
            case 4:
4732
0
                dst.val = _regs.eax;
4733
0
                dst.val *= src.val;
4734
0
                if ( (uint32_t)dst.val != dst.val )
4735
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4736
0
                _regs.rdx = dst.val >> 32;
4737
0
                break;
4738
0
#endif
4739
0
            default:
4740
0
                u[0] = src.val;
4741
0
                u[1] = _regs.r(ax);
4742
0
                if ( mul_dbl(u) )
4743
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4744
0
                _regs.r(dx) = u[1];
4745
0
                dst.val = u[0];
4746
0
                break;
4747
0
            }
4748
0
            break;
4749
0
        case 5: /* imul */
4750
0
        imul:
4751
0
            _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
4752
0
            switch ( dst.bytes )
4753
0
            {
4754
0
            case 1:
4755
0
                dst.val = (int8_t)src.val * (int8_t)_regs.al;
4756
0
                if ( (int8_t)dst.val != (int16_t)dst.val )
4757
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4758
0
                ASSERT(b > 0x6b);
4759
0
                dst.bytes = 2;
4760
0
                break;
4761
0
            case 2:
4762
0
                dst.val = ((uint32_t)(int16_t)src.val *
4763
0
                           (uint32_t)(int16_t)_regs.ax);
4764
0
                if ( (int16_t)dst.val != (int32_t)dst.val )
4765
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4766
0
                if ( b > 0x6b )
4767
0
                    _regs.dx = dst.val >> 16;
4768
0
                break;
4769
0
#ifdef __x86_64__
4770
0
            case 4:
4771
0
                dst.val = ((uint64_t)(int32_t)src.val *
4772
0
                           (uint64_t)(int32_t)_regs.eax);
4773
0
                if ( (int32_t)dst.val != dst.val )
4774
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4775
0
                if ( b > 0x6b )
4776
0
                    _regs.rdx = dst.val >> 32;
4777
0
                break;
4778
0
#endif
4779
0
            default:
4780
0
                u[0] = src.val;
4781
0
                u[1] = _regs.r(ax);
4782
0
                if ( imul_dbl(u) )
4783
0
                    _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
4784
0
                if ( b > 0x6b )
4785
0
                    _regs.r(dx) = u[1];
4786
0
                dst.val = u[0];
4787
0
                break;
4788
0
            }
4789
0
            break;
4790
0
        case 6: /* div */
4791
0
            switch ( src.bytes )
4792
0
            {
4793
0
            case 1:
4794
0
                u[0] = _regs.ax;
4795
0
                u[1] = 0;
4796
0
                v    = (uint8_t)src.val;
4797
0
                generate_exception_if(
4798
0
                    div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
4799
0
                    EXC_DE);
4800
0
                dst.val = (uint8_t)u[0];
4801
0
                _regs.ah = u[1];
4802
0
                break;
4803
0
            case 2:
4804
0
                u[0] = (_regs.edx << 16) | _regs.ax;
4805
0
                u[1] = 0;
4806
0
                v    = (uint16_t)src.val;
4807
0
                generate_exception_if(
4808
0
                    div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
4809
0
                    EXC_DE);
4810
0
                dst.val = (uint16_t)u[0];
4811
0
                _regs.dx = u[1];
4812
0
                break;
4813
0
#ifdef __x86_64__
4814
0
            case 4:
4815
0
                u[0] = (_regs.rdx << 32) | _regs.eax;
4816
0
                u[1] = 0;
4817
0
                v    = (uint32_t)src.val;
4818
0
                generate_exception_if(
4819
0
                    div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
4820
0
                    EXC_DE);
4821
0
                dst.val   = (uint32_t)u[0];
4822
0
                _regs.rdx = (uint32_t)u[1];
4823
0
                break;
4824
0
#endif
4825
0
            default:
4826
0
                u[0] = _regs.r(ax);
4827
0
                u[1] = _regs.r(dx);
4828
0
                v    = src.val;
4829
0
                generate_exception_if(div_dbl(u, v), EXC_DE);
4830
0
                dst.val     = u[0];
4831
0
                _regs.r(dx) = u[1];
4832
0
                break;
4833
0
            }
4834
0
            break;
4835
0
        case 7: /* idiv */
4836
0
            switch ( src.bytes )
4837
0
            {
4838
0
            case 1:
4839
0
                u[0] = (int16_t)_regs.ax;
4840
0
                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
4841
0
                v    = (int8_t)src.val;
4842
0
                generate_exception_if(
4843
0
                    idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
4844
0
                    EXC_DE);
4845
0
                dst.val = (int8_t)u[0];
4846
0
                _regs.ah = u[1];
4847
0
                break;
4848
0
            case 2:
4849
0
                u[0] = (int32_t)((_regs.edx << 16) | _regs.ax);
4850
0
                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
4851
0
                v    = (int16_t)src.val;
4852
0
                generate_exception_if(
4853
0
                    idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
4854
0
                    EXC_DE);
4855
0
                dst.val = (int16_t)u[0];
4856
0
                _regs.dx = u[1];
4857
0
                break;
4858
0
#ifdef __x86_64__
4859
0
            case 4:
4860
0
                u[0] = (_regs.rdx << 32) | _regs.eax;
4861
0
                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
4862
0
                v    = (int32_t)src.val;
4863
0
                generate_exception_if(
4864
0
                    idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
4865
0
                    EXC_DE);
4866
0
                dst.val   = (int32_t)u[0];
4867
0
                _regs.rdx = (uint32_t)u[1];
4868
0
                break;
4869
0
#endif
4870
0
            default:
4871
0
                u[0] = _regs.r(ax);
4872
0
                u[1] = _regs.r(dx);
4873
0
                v    = src.val;
4874
0
                generate_exception_if(idiv_dbl(u, v), EXC_DE);
4875
0
                dst.val     = u[0];
4876
0
                _regs.r(dx) = u[1];
4877
0
                break;
4878
0
            }
4879
0
            break;
4880
0
        }
4881
0
        break;
4882
0
4883
0
    case 0xf8: /* clc */
4884
0
        _regs.eflags &= ~X86_EFLAGS_CF;
4885
0
        break;
4886
0
4887
0
    case 0xf9: /* stc */
4888
0
        _regs.eflags |= X86_EFLAGS_CF;
4889
0
        break;
4890
0
4891
0
    case 0xfa: /* cli */
4892
0
        if ( mode_iopl() )
4893
0
            _regs.eflags &= ~X86_EFLAGS_IF;
4894
0
        else
4895
0
        {
4896
0
            generate_exception_if(!mode_vif(), EXC_GP, 0);
4897
0
            _regs.eflags &= ~X86_EFLAGS_VIF;
4898
0
        }
4899
0
        break;
4900
0
4901
0
    case 0xfb: /* sti */
4902
0
        if ( mode_iopl() )
4903
0
        {
4904
0
            if ( !(_regs.eflags & X86_EFLAGS_IF) )
4905
0
                ctxt->retire.sti = true;
4906
0
            _regs.eflags |= X86_EFLAGS_IF;
4907
0
        }
4908
0
        else
4909
0
        {
4910
0
            generate_exception_if((_regs.eflags & X86_EFLAGS_VIP) ||
4911
0
          !mode_vif(),
4912
0
                                  EXC_GP, 0);
4913
0
            if ( !(_regs.eflags & X86_EFLAGS_VIF) )
4914
0
                ctxt->retire.sti = true;
4915
0
            _regs.eflags |= X86_EFLAGS_VIF;
4916
0
        }
4917
0
        break;
4918
0
4919
0
    case 0xfc: /* cld */
4920
0
        _regs.eflags &= ~X86_EFLAGS_DF;
4921
0
        break;
4922
0
4923
0
    case 0xfd: /* std */
4924
0
        _regs.eflags |= X86_EFLAGS_DF;
4925
0
        break;
4926
0
4927
0
    case 0xfe: /* Grp4 */
4928
0
        generate_exception_if((modrm_reg & 7) >= 2, EXC_UD);
4929
0
        /* Fallthrough. */
4930
0
    case 0xff: /* Grp5 */
4931
0
        switch ( modrm_reg & 7 )
4932
0
        {
4933
0
        case 0: /* inc */
4934
0
            emulate_1op("inc", dst, _regs.eflags);
4935
0
            break;
4936
0
        case 1: /* dec */
4937
0
            emulate_1op("dec", dst, _regs.eflags);
4938
0
            break;
4939
0
        case 2: /* call (near) */
4940
0
            dst.val = _regs.r(ip);
4941
0
            if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) )
4942
0
                goto done;
4943
0
            _regs.r(ip) = src.val;
4944
0
            src.val = dst.val;
4945
0
            adjust_bnd(ctxt, ops, vex.pfx);
4946
0
            goto push;
4947
0
        case 4: /* jmp (near) */
4948
0
            if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) )
4949
0
                goto done;
4950
0
            _regs.r(ip) = src.val;
4951
0
            dst.type = OP_NONE;
4952
0
            adjust_bnd(ctxt, ops, vex.pfx);
4953
0
            break;
4954
0
        case 3: /* call (far, absolute indirect) */
4955
0
        case 5: /* jmp (far, absolute indirect) */
4956
0
            generate_exception_if(src.type != OP_MEM, EXC_UD);
4957
0
4958
0
            if ( (rc = read_ulong(src.mem.seg,
4959
0
                                  truncate_ea(src.mem.off + op_bytes),
4960
0
                                  &imm2, 2, ctxt, ops)) )
4961
0
                goto done;
4962
0
            imm1 = src.val;
4963
0
            if ( !(modrm_reg & 4) )
4964
0
                goto far_call;
4965
0
            goto far_jmp;
4966
0
        case 6: /* push */
4967
0
            goto push;
4968
0
        case 7:
4969
0
            generate_exception(EXC_UD);
4970
0
        }
4971
0
        break;
4972
0
4973
0
    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
4974
0
        seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
4975
0
        generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
4976
0
        switch ( modrm_reg & 6 )
4977
0
        {
4978
0
        case 0: /* sldt / str */
4979
0
            generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
4980
0
            goto store_selector;
4981
0
        case 2: /* lldt / ltr */
4982
0
            generate_exception_if(!mode_ring0(), EXC_GP, 0);
4983
0
            if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
4984
0
                goto done;
4985
0
            break;
4986
0
        case 4: /* verr / verw */
4987
0
            _regs.eflags &= ~X86_EFLAGS_ZF;
4988
0
            switch ( rc = protmode_load_seg(x86_seg_none, src.val, false,
4989
0
                                            &sreg, ctxt, ops) )
4990
0
            {
4991
0
            case X86EMUL_OKAY:
4992
0
                if ( sreg.s &&
4993
0
                     ((modrm_reg & 1) ? ((sreg.type & 0xa) == 0x2)
4994
0
                                      : ((sreg.type & 0xa) != 0x8)) )
4995
0
                    _regs.eflags |= X86_EFLAGS_ZF;
4996
0
                break;
4997
0
            case X86EMUL_EXCEPTION:
4998
0
                if ( ctxt->event_pending )
4999
0
                {
5000
0
                    ASSERT(ctxt->event.vector == EXC_PF);
5001
0
            default:
5002
0
                    goto done;
5003
0
                }
5004
0
                /* Instead of the exception, ZF remains cleared. */
5005
0
                rc = X86EMUL_OKAY;
5006
0
                break;
5007
0
            }
5008
0
            break;
5009
0
        default:
5010
0
            generate_exception_if(true, EXC_UD);
5011
0
            break;
5012
0
        }
5013
0
        break;
5014
0
5015
0
    case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */
5016
0
    {
5017
0
        unsigned long base, limit, cr0, cr0w;
5018
0
5019
0
        seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr;
5020
0
5021
0
        switch( modrm )
5022
0
        {
5023
0
        case 0xca: /* clac */
5024
0
        case 0xcb: /* stac */
5025
0
            vcpu_must_have(smap);
5026
0
            generate_exception_if(vex.pfx || !mode_ring0(), EXC_UD);
5027
0
5028
0
            _regs.eflags &= ~X86_EFLAGS_AC;
5029
0
            if ( modrm == 0xcb )
5030
0
                _regs.eflags |= X86_EFLAGS_AC;
5031
0
            break;
5032
0
5033
0
#ifdef __XEN__
5034
0
        case 0xd1: /* xsetbv */
5035
0
            generate_exception_if(vex.pfx, EXC_UD);
5036
0
            if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
5037
0
                cr4 = 0;
5038
0
            generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
5039
0
            generate_exception_if(!mode_ring0() ||
5040
0
                                  handle_xsetbv(_regs.ecx,
5041
0
                                                _regs.eax | (_regs.rdx << 32)),
5042
0
                                  EXC_GP, 0);
5043
0
            break;
5044
0
#endif
5045
0
5046
0
        case 0xd4: /* vmfunc */
5047
0
            generate_exception_if(vex.pfx, EXC_UD);
5048
0
            fail_if(!ops->vmfunc);
5049
0
            if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY )
5050
0
                goto done;
5051
0
            break;
5052
0
5053
0
        case 0xd5: /* xend */
5054
0
            generate_exception_if(vex.pfx, EXC_UD);
5055
0
            generate_exception_if(!vcpu_has_rtm(), EXC_UD);
5056
0
            generate_exception_if(vcpu_has_rtm(), EXC_GP, 0);
5057
0
            break;
5058
0
5059
0
        case 0xd6: /* xtest */
5060
0
            generate_exception_if(vex.pfx, EXC_UD);
5061
0
            generate_exception_if(!vcpu_has_rtm() && !vcpu_has_hle(),
5062
0
                                  EXC_UD);
5063
0
            /* Neither HLE nor RTM can be active when we get here. */
5064
0
            _regs.eflags |= X86_EFLAGS_ZF;
5065
0
            break;
5066
0
5067
0
        case 0xdf: /* invlpga */
5068
0
            generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
5069
0
            generate_exception_if(!mode_ring0(), EXC_GP, 0);
5070
0
            fail_if(ops->invlpg == NULL);
5071
0
            if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.r(ax)),
5072
0
                                   ctxt)) )
5073
0
                goto done;
5074
0
            break;
5075
0
5076
0
        case 0xf9: /* rdtscp */
5077
0
            fail_if(ops->read_msr == NULL);
5078
0
            if ( (rc = ops->read_msr(MSR_TSC_AUX,
5079
0
                                     &msr_val, ctxt)) != X86EMUL_OKAY )
5080
0
                goto done;
5081
0
            _regs.r(cx) = (uint32_t)msr_val;
5082
0
            goto rdtsc;
5083
0
5084
0
        case 0xfc: /* clzero */
5085
0
        {
5086
0
            unsigned long zero = 0;
5087
0
5088
0
            vcpu_must_have(clzero);
5089
0
5090
0
            base = ad_bytes == 8 ? _regs.r(ax) :
5091
0
                   ad_bytes == 4 ? _regs.eax : _regs.ax;
5092
0
            limit = 0;
5093
0
            if ( vcpu_has_clflush() &&
5094
0
                 ops->cpuid(1, 0, &cpuid_leaf, ctxt) == X86EMUL_OKAY )
5095
0
                limit = ((cpuid_leaf.b >> 8) & 0xff) * 8;
5096
0
            generate_exception_if(limit < sizeof(long) ||
5097
0
                                  (limit & (limit - 1)), EXC_UD);
5098
0
            base &= ~(limit - 1);
5099
0
            if ( ops->rep_stos )
5100
0
            {
5101
0
                unsigned long nr_reps = limit / sizeof(zero);
5102
0
5103
0
                rc = ops->rep_stos(&zero, ea.mem.seg, base, sizeof(zero),
5104
0
                                   &nr_reps, ctxt);
5105
0
                if ( rc == X86EMUL_OKAY )
5106
0
                {
5107
0
                    base += nr_reps * sizeof(zero);
5108
0
                    limit -= nr_reps * sizeof(zero);
5109
0
                }
5110
0
                else if ( rc != X86EMUL_UNHANDLEABLE )
5111
0
                    goto done;
5112
0
            }
5113
0
            fail_if(limit && !ops->write);
5114
0
            while ( limit )
5115
0
            {
5116
0
                rc = ops->write(ea.mem.seg, base, &zero, sizeof(zero), ctxt);
5117
0
                if ( rc != X86EMUL_OKAY )
5118
0
                    goto done;
5119
0
                base += sizeof(zero);
5120
0
                limit -= sizeof(zero);
5121
0
            }
5122
0
            break;
5123
0
        }
5124
0
5125
0
#define _GRP7(mod, reg) \
5126
0
            (((mod) << 6) | ((reg) << 3)) ... (((mod) << 6) | ((reg) << 3) | 7)
5127
0
#define GRP7_MEM(reg) _GRP7(0, reg): case _GRP7(1, reg): case _GRP7(2, reg)
5128
0
#define GRP7_ALL(reg) GRP7_MEM(reg): case _GRP7(3, reg)
5129
0
5130
0
        case GRP7_MEM(0): /* sgdt */
5131
0
        case GRP7_MEM(1): /* sidt */
5132
0
            ASSERT(ea.type == OP_MEM);
5133
0
            generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
5134
0
            fail_if(!ops->read_segment || !ops->write);
5135
0
            if ( (rc = ops->read_segment(seg, &sreg, ctxt)) )
5136
0
                goto done;
5137
0
            if ( mode_64bit() )
5138
0
                op_bytes = 8;
5139
0
            else if ( op_bytes == 2 )
5140
0
            {
5141
0
                sreg.base &= 0xffffff;
5142
0
                op_bytes = 4;
5143
0
            }
5144
0
            if ( (rc = ops->write(ea.mem.seg, ea.mem.off, &sreg.limit,
5145
0
                                  2, ctxt)) != X86EMUL_OKAY ||
5146
0
                 (rc = ops->write(ea.mem.seg, truncate_ea(ea.mem.off + 2),
5147
0
                                  &sreg.base, op_bytes, ctxt)) != X86EMUL_OKAY )
5148
0
                goto done;
5149
0
            break;
5150
0
5151
0
        case GRP7_MEM(2): /* lgdt */
5152
0
        case GRP7_MEM(3): /* lidt */
5153
0
            ASSERT(ea.type == OP_MEM);
5154
0
            generate_exception_if(!mode_ring0(), EXC_GP, 0);
5155
0
            fail_if(ops->write_segment == NULL);
5156
0
            memset(&sreg, 0, sizeof(sreg));
5157
0
            if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
5158
0
                                  &limit, 2, ctxt, ops)) ||
5159
0
                 (rc = read_ulong(ea.mem.seg, truncate_ea(ea.mem.off + 2),
5160
0
                                  &base, mode_64bit() ? 8 : 4, ctxt, ops)) )
5161
0
                goto done;
5162
0
            generate_exception_if(!is_canonical_address(base), EXC_GP, 0);
5163
0
            sreg.base = base;
5164
0
            sreg.limit = limit;
5165
0
            if ( !mode_64bit() && op_bytes == 2 )
5166
0
                sreg.base &= 0xffffff;
5167
0
            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) )
5168
0
                goto done;
5169
0
            break;
5170
0
5171
0
        case GRP7_ALL(4): /* smsw */
5172
0
            generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
5173
0
            if ( ea.type == OP_MEM )
5174
0
            {
5175
0
                fail_if(!ops->write);
5176
0
                d |= Mov; /* force writeback */
5177
0
                ea.bytes = 2;
5178
0
            }
5179
0
            else
5180
0
                ea.bytes = op_bytes;
5181
0
            dst = ea;
5182
0
            fail_if(ops->read_cr == NULL);
5183
0
            if ( (rc = ops->read_cr(0, &dst.val, ctxt)) )
5184
0
                goto done;
5185
0
            break;
5186
0
5187
0
        case GRP7_ALL(6): /* lmsw */
5188
0
            fail_if(ops->read_cr == NULL);
5189
0
            fail_if(ops->write_cr == NULL);
5190
0
            generate_exception_if(!mode_ring0(), EXC_GP, 0);
5191
0
            if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
5192
0
                goto done;
5193
0
            if ( ea.type == OP_REG )
5194
0
                cr0w = *ea.reg;
5195
0
            else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
5196
0
                                       &cr0w, 2, ctxt, ops)) )
5197
0
                goto done;
5198
0
            /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
5199
0
            cr0 = (cr0 & ~0xe) | (cr0w & 0xf);
5200
0
            if ( (rc = ops->write_cr(0, cr0, ctxt)) )
5201
0
                goto done;
5202
0
            break;
5203
0
5204
0
        case GRP7_MEM(7): /* invlpg */
5205
0
            ASSERT(ea.type == OP_MEM);
5206
0
            generate_exception_if(!mode_ring0(), EXC_GP, 0);
5207
0
            fail_if(ops->invlpg == NULL);
5208
0
            if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) )
5209
0
                goto done;
5210
0
            break;
5211
0
5212
0
#undef GRP7_ALL
5213
0
#undef GRP7_MEM
5214
0
#undef _GRP7
5215
0
5216
0
        default:
5217
0
            goto unimplemented_insn;
5218
0
        }
5219
0
        break;
5220
0
    }
5221
0
5222
0
    case X86EMUL_OPC(0x0f, 0x02): /* lar */
5223
0
        generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
5224
0
        _regs.eflags &= ~X86_EFLAGS_ZF;
5225
0
        switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg,
5226
0
                                        ctxt, ops) )
5227
0
        {
5228
0
        case X86EMUL_OKAY:
5229
0
            if ( !sreg.s )
5230
0
            {
5231
0
                switch ( sreg.type )
5232
0
                {
5233
0
                case 0x01: /* available 16-bit TSS */
5234
0
                case 0x03: /* busy 16-bit TSS */
5235
0
                case 0x04: /* 16-bit call gate */
5236
0
                case 0x05: /* 16/32-bit task gate */
5237
0
                    if ( ctxt->lma )
5238
0
                        break;
5239
0
                    /* fall through */
5240
0
                case 0x02: /* LDT */
5241
0
                case 0x09: /* available 32/64-bit TSS */
5242
0
                case 0x0b: /* busy 32/64-bit TSS */
5243
0
                case 0x0c: /* 32/64-bit call gate */
5244
0
                    _regs.eflags |= X86_EFLAGS_ZF;
5245
0
                    break;
5246
0
                }
5247
0
            }
5248
0
            else
5249
0
                _regs.eflags |= X86_EFLAGS_ZF;
5250
0
            break;
5251
0
        case X86EMUL_EXCEPTION:
5252
0
            if ( ctxt->event_pending )
5253
0
            {
5254
0
                ASSERT(ctxt->event.vector == EXC_PF);
5255
0
        default:
5256
0
                goto done;
5257
0
            }
5258
0
            /* Instead of the exception, ZF remains cleared. */
5259
0
            rc = X86EMUL_OKAY;
5260
0
            break;
5261
0
        }
5262
0
        if ( _regs.eflags & X86_EFLAGS_ZF )
5263
0
            dst.val = ((sreg.attr & 0xff) << 8) |
5264
0
                      ((sreg.limit >> (sreg.g ? 12 : 0)) & 0xf0000) |
5265
0
                      ((sreg.attr & 0xf00) << 12);
5266
0
        else
5267
0
            dst.type = OP_NONE;
5268
0
        break;
5269
0
5270
0
    case X86EMUL_OPC(0x0f, 0x03): /* lsl */
5271
0
        generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
5272
0
        _regs.eflags &= ~X86_EFLAGS_ZF;
5273
0
        switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg,
5274
0
                                        ctxt, ops) )
5275
0
        {
5276
0
        case X86EMUL_OKAY:
5277
0
            if ( !sreg.s )
5278
0
            {
5279
0
                switch ( sreg.type )
5280
0
                {
5281
0
                case 0x01: /* available 16-bit TSS */
5282
0
                case 0x03: /* busy 16-bit TSS */
5283
0
                    if ( ctxt->lma )
5284
0
                        break;
5285
0
                    /* fall through */
5286
0
                case 0x02: /* LDT */
5287
0
                case 0x09: /* available 32/64-bit TSS */
5288
0
                case 0x0b: /* busy 32/64-bit TSS */
5289
0
                    _regs.eflags |= X86_EFLAGS_ZF;
5290
0
                    break;
5291
0
                }
5292
0
            }
5293
0
            else
5294
0
                _regs.eflags |= X86_EFLAGS_ZF;
5295
0
            break;
5296
0
        case X86EMUL_EXCEPTION:
5297
0
            if ( ctxt->event_pending )
5298
0
            {
5299
0
                ASSERT(ctxt->event.vector == EXC_PF);
5300
0
        default:
5301
0
                goto done;
5302
0
            }
5303
0
            /* Instead of the exception, ZF remains cleared. */
5304
0
            rc = X86EMUL_OKAY;
5305
0
            break;
5306
0
        }
5307
0
        if ( _regs.eflags & X86_EFLAGS_ZF )
5308
0
            dst.val = sreg.limit;
5309
0
        else
5310
0
            dst.type = OP_NONE;
5311
0
        break;
5312
0
5313
0
    case X86EMUL_OPC(0x0f, 0x05): /* syscall */
5314
0
        generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
5315
0
5316
0
        /* Inject #UD if syscall/sysret are disabled. */
5317
0
        fail_if(ops->read_msr == NULL);
5318
0
        if ( (rc = ops->read_msr(MSR_EFER, &msr_val, ctxt)) != X86EMUL_OKAY )
5319
0
            goto done;
5320
0
        generate_exception_if((msr_val & EFER_SCE) == 0, EXC_UD);
5321
0
5322
0
        if ( (rc = ops->read_msr(MSR_STAR, &msr_val, ctxt)) != X86EMUL_OKAY )
5323
0
            goto done;
5324
0
5325
0
        cs.sel = (msr_val >> 32) & ~3; /* SELECTOR_RPL_MASK */
5326
0
        sreg.sel = cs.sel + 8;
5327
0
5328
0
        cs.base = sreg.base = 0; /* flat segment */
5329
0
        cs.limit = sreg.limit = ~0u;  /* 4GB limit */
5330
0
        sreg.attr = 0xc93; /* G+DB+P+S+Data */
5331
0
5332
0
#ifdef __x86_64__
5333
0
        if ( ctxt->lma )
5334
0
        {
5335
0
            cs.attr = 0xa9b; /* L+DB+P+S+Code */
5336
0
5337
0
            _regs.rcx = _regs.rip;
5338
0
            _regs.r11 = _regs.eflags & ~X86_EFLAGS_RF;
5339
0
5340
0
            if ( (rc = ops->read_msr(mode_64bit() ? MSR_LSTAR : MSR_CSTAR,
5341
0
                                     &msr_val, ctxt)) != X86EMUL_OKAY )
5342
0
                goto done;
5343
0
            _regs.rip = msr_val;
5344
0
5345
0
            if ( (rc = ops->read_msr(MSR_SYSCALL_MASK,
5346
0
                                     &msr_val, ctxt)) != X86EMUL_OKAY )
5347
0
                goto done;
5348
0
            _regs.eflags &= ~(msr_val | X86_EFLAGS_RF);
5349
0
        }
5350
0
        else
5351
0
#endif
5352
0
        {
5353
0
            cs.attr = 0xc9b; /* G+DB+P+S+Code */
5354
0
5355
0
            _regs.r(cx) = _regs.eip;
5356
0
            _regs.eip = msr_val;
5357
0
            _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF);
5358
0
        }
5359
0
5360
0
        fail_if(ops->write_segment == NULL);
5361
0
        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ||
5362
0
             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) )
5363
0
            goto done;
5364
0
5365
0
        /*
5366
0
         * SYSCALL (unlike most instructions) evaluates its singlestep action
5367
0
         * based on the resulting EFLAGS.TF, not the starting EFLAGS.TF.
5368
0
         *
5369
0
         * As the #DB is raised after the CPL change and before the OS can
5370
0
         * switch stack, it is a large risk for privilege escalation.
5371
0
         *
5372
0
         * 64bit kernels should mask EFLAGS.TF in MSR_SYSCALL_MASK to avoid any
5373
0
         * vulnerability.  Running the #DB handler on an IST stack is also a
5374
0
         * mitigation.
5375
0
         *
5376
0
         * 32bit kernels have no ability to mask EFLAGS.TF at all.
5377
0
         * Their only mitigation is to use a task gate for handling
5378
0
         * #DB (or to not use enable EFER.SCE to start with).
5379
0
         */
5380
0
        singlestep = _regs.eflags & X86_EFLAGS_TF;
5381
0
        break;
5382
0
5383
0
    case X86EMUL_OPC(0x0f, 0x06): /* clts */
5384
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
5385
0
        fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL));
5386
0
        if ( (rc = ops->read_cr(0, &dst.val, ctxt)) != X86EMUL_OKAY ||
5387
0
             (rc = ops->write_cr(0, dst.val & ~X86_CR0_TS, ctxt)) != X86EMUL_OKAY )
5388
0
            goto done;
5389
0
        break;
5390
0
5391
0
    case X86EMUL_OPC(0x0f, 0x08): /* invd */
5392
0
    case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */
5393
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
5394
0
        fail_if(ops->wbinvd == NULL);
5395
0
        if ( (rc = ops->wbinvd(ctxt)) != 0 )
5396
0
            goto done;
5397
0
        break;
5398
0
5399
0
    case X86EMUL_OPC(0x0f, 0x0b): /* ud2 */
5400
0
    case X86EMUL_OPC(0x0f, 0xb9): /* ud1 */
5401
0
    case X86EMUL_OPC(0x0f, 0xff): /* ud0 */
5402
0
        generate_exception(EXC_UD);
5403
0
5404
0
    case X86EMUL_OPC(0x0f, 0x0d): /* GrpP (prefetch) */
5405
0
    case X86EMUL_OPC(0x0f, 0x18): /* Grp16 (prefetch/nop) */
5406
0
    case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
5407
0
        break;
5408
0
5409
0
#define CASE_SIMD_PACKED_INT(pfx, opc)       \
5410
0
    case X86EMUL_OPC(pfx, opc):              \
5411
0
    case X86EMUL_OPC_66(pfx, opc)
5412
0
#define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
5413
0
    case X86EMUL_OPC##kind(pfx, opc):        \
5414
0
    case X86EMUL_OPC##kind##_F3(pfx, opc)
5415
0
#define CASE_SIMD_DOUBLE_FP(kind, pfx, opc)  \
5416
0
    case X86EMUL_OPC##kind##_66(pfx, opc):   \
5417
0
    case X86EMUL_OPC##kind##_F2(pfx, opc)
5418
0
#define CASE_SIMD_ALL_FP(kind, pfx, opc)     \
5419
0
    CASE_SIMD_SINGLE_FP(kind, pfx, opc):     \
5420
0
    CASE_SIMD_DOUBLE_FP(kind, pfx, opc)
5421
0
#define CASE_SIMD_PACKED_FP(kind, pfx, opc)  \
5422
0
    case X86EMUL_OPC##kind(pfx, opc):        \
5423
0
    case X86EMUL_OPC##kind##_66(pfx, opc)
5424
0
#define CASE_SIMD_SCALAR_FP(kind, pfx, opc)  \
5425
0
    case X86EMUL_OPC##kind##_F3(pfx, opc):   \
5426
0
    case X86EMUL_OPC##kind##_F2(pfx, opc)
5427
0
5428
0
    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
5429
0
        host_and_vcpu_must_have(sse4a);
5430
0
        /* fall through */
5431
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2b):     /* movntp{s,d} xmm,m128 */
5432
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2b): /* vmovntp{s,d} {x,y}mm,mem */
5433
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
5434
0
        sfence = true;
5435
0
        /* fall through */
5436
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x10):        /* mov{up,s}{s,d} xmm/mem,xmm */
5437
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x10): /* vmovup{s,d} {x,y}mm/mem,{x,y}mm */
5438
0
    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x10): /* vmovs{s,d} mem,xmm */
5439
0
                                           /* vmovs{s,d} xmm,xmm,xmm */
5440
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x11):        /* mov{up,s}{s,d} xmm,xmm/mem */
5441
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x11): /* vmovup{s,d} {x,y}mm,{x,y}mm/mem */
5442
0
    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x11): /* vmovs{s,d} xmm,mem */
5443
0
                                           /* vmovs{s,d} xmm,xmm,xmm */
5444
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x14):     /* unpcklp{s,d} xmm/m128,xmm */
5445
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x14): /* vunpcklp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5446
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x15):     /* unpckhp{s,d} xmm/m128,xmm */
5447
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x15): /* vunpckhp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5448
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x28):     /* movap{s,d} xmm/m128,xmm */
5449
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x28): /* vmovap{s,d} {x,y}mm/mem,{x,y}mm */
5450
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x29):     /* movap{s,d} xmm,xmm/m128 */
5451
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x29): /* vmovap{s,d} {x,y}mm,{x,y}mm/mem */
5452
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x51):        /* sqrt{p,s}{s,d} xmm/mem,xmm */
5453
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x51):    /* vsqrtp{s,d} {x,y}mm/mem,{x,y}mm */
5454
0
                                           /* vsqrts{s,d} xmm/m32,xmm,xmm */
5455
0
    CASE_SIMD_SINGLE_FP(, 0x0f, 0x52):     /* rsqrt{p,s}s xmm/mem,xmm */
5456
0
    CASE_SIMD_SINGLE_FP(_VEX, 0x0f, 0x52): /* vrsqrtps {x,y}mm/mem,{x,y}mm */
5457
0
                                           /* vrsqrtss xmm/m32,xmm,xmm */
5458
0
    CASE_SIMD_SINGLE_FP(, 0x0f, 0x53):     /* rcp{p,s}s xmm/mem,xmm */
5459
0
    CASE_SIMD_SINGLE_FP(_VEX, 0x0f, 0x53): /* vrcpps {x,y}mm/mem,{x,y}mm */
5460
0
                                           /* vrcpss xmm/m32,xmm,xmm */
5461
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x54):     /* andp{s,d} xmm/m128,xmm */
5462
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x54): /* vandp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5463
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x55):     /* andnp{s,d} xmm/m128,xmm */
5464
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x55): /* vandnp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5465
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x56):     /* orp{s,d} xmm/m128,xmm */
5466
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x56): /* vorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5467
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x57):     /* xorp{s,d} xmm/m128,xmm */
5468
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x57): /* vxorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5469
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x58):        /* add{p,s}{s,d} xmm/mem,xmm */
5470
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x58):    /* vadd{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5471
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x59):        /* mul{p,s}{s,d} xmm/mem,xmm */
5472
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x59):    /* vmul{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5473
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x5c):        /* sub{p,s}{s,d} xmm/mem,xmm */
5474
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5c):    /* vsub{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5475
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x5d):        /* min{p,s}{s,d} xmm/mem,xmm */
5476
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5d):    /* vmin{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5477
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x5e):        /* div{p,s}{s,d} xmm/mem,xmm */
5478
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5e):    /* vdiv{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5479
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x5f):        /* max{p,s}{s,d} xmm/mem,xmm */
5480
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5f):    /* vmax{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
5481
0
    simd_0f_fp:
5482
0
        if ( vex.opcx == vex_none )
5483
0
        {
5484
0
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
5485
0
            {
5486
0
    simd_0f_sse2:
5487
0
                vcpu_must_have(sse2);
5488
0
            }
5489
0
            else
5490
0
                vcpu_must_have(sse);
5491
0
    simd_0f_xmm:
5492
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
5493
0
        }
5494
0
        else
5495
0
        {
5496
0
            /* vmovs{s,d} to/from memory have only two operands. */
5497
0
            if ( (b & ~1) == 0x10 && ea.type == OP_MEM )
5498
0
                d |= TwoOp;
5499
0
    simd_0f_avx:
5500
0
            host_and_vcpu_must_have(avx);
5501
0
    simd_0f_ymm:
5502
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
5503
0
        }
5504
0
    simd_0f_common:
5505
0
        opc = init_prefixes(stub);
5506
0
        opc[0] = b;
5507
0
        opc[1] = modrm;
5508
0
        if ( ea.type == OP_MEM )
5509
0
        {
5510
0
            /* convert memory operand to (%rAX) */
5511
0
            rex_prefix &= ~REX_B;
5512
0
            vex.b = 1;
5513
0
            opc[1] &= 0x38;
5514
0
        }
5515
0
        fic.insn_bytes = PFX_BYTES + 2;
5516
0
        break;
5517
0
5518
0
    case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
5519
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
5520
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x13):     /* movlp{s,d} xmm,m64 */
5521
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x13): /* vmovlp{s,d} xmm,m64 */
5522
0
    case X86EMUL_OPC_66(0x0f, 0x16):       /* movhpd m64,xmm */
5523
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x16):   /* vmovhpd m64,xmm,xmm */
5524
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x17):     /* movhp{s,d} xmm,m64 */
5525
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x17): /* vmovhp{s,d} xmm,m64 */
5526
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
5527
0
        /* fall through */
5528
0
    case X86EMUL_OPC(0x0f, 0x12):          /* movlps m64,xmm */
5529
0
                                           /* movhlps xmm,xmm */
5530
0
    case X86EMUL_OPC_VEX(0x0f, 0x12):      /* vmovlps m64,xmm,xmm */
5531
0
                                           /* vmovhlps xmm,xmm,xmm */
5532
0
    case X86EMUL_OPC(0x0f, 0x16):          /* movhps m64,xmm */
5533
0
                                           /* movlhps xmm,xmm */
5534
0
    case X86EMUL_OPC_VEX(0x0f, 0x16):      /* vmovhps m64,xmm,xmm */
5535
0
                                           /* vmovlhps xmm,xmm,xmm */
5536
0
        generate_exception_if(vex.l, EXC_UD);
5537
0
        if ( (d & DstMask) != DstMem )
5538
0
            d &= ~TwoOp;
5539
0
        op_bytes = 8;
5540
0
        goto simd_0f_fp;
5541
0
5542
0
    case X86EMUL_OPC_F3(0x0f, 0x12):       /* movsldup xmm/m128,xmm */
5543
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x12):   /* vmovsldup {x,y}mm/mem,{x,y}mm */
5544
0
    case X86EMUL_OPC_F2(0x0f, 0x12):       /* movddup xmm/m64,xmm */
5545
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0x12):   /* vmovddup {x,y}mm/mem,{x,y}mm */
5546
0
    case X86EMUL_OPC_F3(0x0f, 0x16):       /* movshdup xmm/m128,xmm */
5547
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x16):   /* vmovshdup {x,y}mm/mem,{x,y}mm */
5548
0
        d |= TwoOp;
5549
0
        op_bytes = !(vex.pfx & VEX_PREFIX_DOUBLE_MASK) || vex.l
5550
0
                   ? 16 << vex.l : 8;
5551
0
    simd_0f_sse3_avx:
5552
0
        if ( vex.opcx != vex_none )
5553
0
            goto simd_0f_avx;
5554
0
        host_and_vcpu_must_have(sse3);
5555
0
        goto simd_0f_xmm;
5556
0
5557
0
    case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */
5558
0
    case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */
5559
0
    case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */
5560
0
    case X86EMUL_OPC(0x0f, 0x23): /* mov reg,dr */
5561
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
5562
0
        if ( b & 2 )
5563
0
        {
5564
0
            /* Write to CR/DR. */
5565
0
            typeof(ops->write_cr) write = (b & 1) ? ops->write_dr
5566
0
                                                  : ops->write_cr;
5567
0
5568
0
            fail_if(!write);
5569
0
            rc = write(modrm_reg, src.val, ctxt);
5570
0
        }
5571
0
        else
5572
0
        {
5573
0
            /* Read from CR/DR. */
5574
0
            typeof(ops->read_cr) read = (b & 1) ? ops->read_dr : ops->read_cr;
5575
0
5576
0
            fail_if(!read);
5577
0
            rc = read(modrm_reg, &dst.val, ctxt);
5578
0
        }
5579
0
        if ( rc != X86EMUL_OKAY )
5580
0
            goto done;
5581
0
        break;
5582
0
5583
0
    case X86EMUL_OPC_66(0x0f, 0x2a):       /* cvtpi2pd mm/m64,xmm */
5584
0
        if ( ea.type == OP_REG )
5585
0
        {
5586
0
    case X86EMUL_OPC(0x0f, 0x2a):          /* cvtpi2ps mm/m64,xmm */
5587
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2c):     /* cvttp{s,d}2pi xmm/mem,mm */
5588
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2d):     /* cvtp{s,d}2pi xmm/mem,mm */
5589
0
            host_and_vcpu_must_have(mmx);
5590
0
        }
5591
0
        op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8;
5592
0
        goto simd_0f_fp;
5593
0
5594
0
    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2a):     /* cvtsi2s{s,d} r/m,xmm */
5595
0
    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
5596
0
        if ( vex.opcx == vex_none )
5597
0
        {
5598
0
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
5599
0
                vcpu_must_have(sse2);
5600
0
            else
5601
0
                vcpu_must_have(sse);
5602
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
5603
0
        }
5604
0
        else
5605
0
        {
5606
0
            host_and_vcpu_must_have(avx);
5607
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
5608
0
        }
5609
0
5610
0
        if ( ea.type == OP_MEM )
5611
0
        {
5612
0
            rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
5613
0
                            rex_prefix & REX_W ? 8 : 4, ctxt, ops);
5614
0
            if ( rc != X86EMUL_OKAY )
5615
0
                goto done;
5616
0
        }
5617
0
        else
5618
0
            src.val = rex_prefix & REX_W ? *ea.reg : (uint32_t)*ea.reg;
5619
0
5620
0
        state->simd_size = simd_none;
5621
0
        goto simd_0f_rm;
5622
0
5623
0
    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c):     /* cvtts{s,d}2si xmm/mem,reg */
5624
0
    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
5625
0
    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d):     /* cvts{s,d}2si xmm/mem,reg */
5626
0
    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
5627
0
        if ( vex.opcx == vex_none )
5628
0
        {
5629
0
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
5630
0
                vcpu_must_have(sse2);
5631
0
            else
5632
0
                vcpu_must_have(sse);
5633
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
5634
0
        }
5635
0
        else
5636
0
        {
5637
0
            generate_exception_if(vex.reg != 0xf, EXC_UD);
5638
0
            vex.l = 0;
5639
0
            host_and_vcpu_must_have(avx);
5640
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
5641
0
        }
5642
0
5643
0
        opc = init_prefixes(stub);
5644
0
        opc[0] = b;
5645
0
        /* Convert GPR destination to %rAX and memory operand to (%rCX). */
5646
0
        rex_prefix &= ~REX_R;
5647
0
        vex.r = 1;
5648
0
        if ( ea.type == OP_MEM )
5649
0
        {
5650
0
            rex_prefix &= ~REX_B;
5651
0
            vex.b = 1;
5652
0
            opc[1] = 0x01;
5653
0
5654
0
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
5655
0
                           vex.pfx & VEX_PREFIX_DOUBLE_MASK ? 8 : 4, ctxt);
5656
0
            if ( rc != X86EMUL_OKAY )
5657
0
                goto done;
5658
0
        }
5659
0
        else
5660
0
            opc[1] = modrm & 0xc7;
5661
0
        if ( !mode_64bit() )
5662
0
            vex.w = 0;
5663
0
        fic.insn_bytes = PFX_BYTES + 2;
5664
0
        opc[2] = 0xc3;
5665
0
5666
0
        copy_REX_VEX(opc, rex_prefix, vex);
5667
0
        ea.reg = decode_register(modrm_reg, &_regs, 0);
5668
0
        invoke_stub("", "", "=a" (*ea.reg), "+m" (fic.exn_raised)
5669
0
                            : "c" (mmvalp), "m" (*mmvalp));
5670
0
5671
0
        put_stub(stub);
5672
0
        check_xmm_exn(&fic);
5673
0
5674
0
        state->simd_size = simd_none;
5675
0
        break;
5676
0
5677
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2e):     /* ucomis{s,d} xmm/mem,xmm */
5678
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
5679
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x2f):     /* comis{s,d} xmm/mem,xmm */
5680
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
5681
0
        if ( vex.opcx == vex_none )
5682
0
        {
5683
0
            if ( vex.pfx )
5684
0
                vcpu_must_have(sse2);
5685
0
            else
5686
0
                vcpu_must_have(sse);
5687
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
5688
0
        }
5689
0
        else
5690
0
        {
5691
0
            generate_exception_if(vex.reg != 0xf, EXC_UD);
5692
0
            host_and_vcpu_must_have(avx);
5693
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
5694
0
        }
5695
0
5696
0
        opc = init_prefixes(stub);
5697
0
        opc[0] = b;
5698
0
        opc[1] = modrm;
5699
0
        if ( ea.type == OP_MEM )
5700
0
        {
5701
0
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, vex.pfx ? 8 : 4,
5702
0
                           ctxt);
5703
0
            if ( rc != X86EMUL_OKAY )
5704
0
                goto done;
5705
0
5706
0
            /* Convert memory operand to (%rAX). */
5707
0
            rex_prefix &= ~REX_B;
5708
0
            vex.b = 1;
5709
0
            opc[1] &= 0x38;
5710
0
        }
5711
0
        fic.insn_bytes = PFX_BYTES + 2;
5712
0
        opc[2] = 0xc3;
5713
0
5714
0
        copy_REX_VEX(opc, rex_prefix, vex);
5715
0
        invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
5716
0
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
5717
0
                    [eflags] "+g" (_regs.eflags),
5718
0
                    [tmp] "=&r" (dummy), "+m" (*mmvalp),
5719
0
                    "+m" (fic.exn_raised)
5720
0
                    : "a" (mmvalp), [mask] "i" (EFLAGS_MASK));
5721
0
5722
0
        put_stub(stub);
5723
0
        check_xmm_exn(&fic);
5724
0
5725
0
        ASSERT(!state->simd_size);
5726
0
        break;
5727
0
5728
0
    case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
5729
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
5730
0
        fail_if(ops->write_msr == NULL);
5731
0
        if ( (rc = ops->write_msr(_regs.ecx,
5732
0
                                  ((uint64_t)_regs.r(dx) << 32) | _regs.eax,
5733
0
                                  ctxt)) != 0 )
5734
0
            goto done;
5735
0
        break;
5736
0
5737
0
    case X86EMUL_OPC(0x0f, 0x31): rdtsc: /* rdtsc */
5738
0
        if ( !mode_ring0() )
5739
0
        {
5740
0
            fail_if(ops->read_cr == NULL);
5741
0
            if ( (rc = ops->read_cr(4, &cr4, ctxt)) )
5742
0
                goto done;
5743
0
            generate_exception_if(cr4 & X86_CR4_TSD, EXC_GP, 0);
5744
0
        }
5745
0
        fail_if(ops->read_msr == NULL);
5746
0
        if ( (rc = ops->read_msr(MSR_IA32_TSC,
5747
0
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
5748
0
            goto done;
5749
0
        _regs.r(dx) = msr_val >> 32;
5750
0
        _regs.r(ax) = (uint32_t)msr_val;
5751
0
        break;
5752
0
5753
0
    case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */
5754
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
5755
0
        fail_if(ops->read_msr == NULL);
5756
0
        if ( (rc = ops->read_msr(_regs.ecx, &msr_val, ctxt)) != X86EMUL_OKAY )
5757
0
            goto done;
5758
0
        _regs.r(dx) = msr_val >> 32;
5759
0
        _regs.r(ax) = (uint32_t)msr_val;
5760
0
        break;
5761
0
5762
0
    case X86EMUL_OPC(0x0f, 0x34): /* sysenter */
5763
0
        vcpu_must_have(sep);
5764
0
        generate_exception_if(mode_ring0(), EXC_GP, 0);
5765
0
        generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0);
5766
0
5767
0
        fail_if(ops->read_msr == NULL);
5768
0
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS,
5769
0
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
5770
0
            goto done;
5771
0
5772
0
        generate_exception_if(!(msr_val & 0xfffc), EXC_GP, 0);
5773
0
5774
0
        _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF);
5775
0
5776
0
        cs.sel = msr_val & ~3; /* SELECTOR_RPL_MASK */
5777
0
        cs.base = 0;   /* flat segment */
5778
0
        cs.limit = ~0u;  /* 4GB limit */
5779
0
        cs.attr = ctxt->lma ? 0xa9b  /* G+L+P+S+Code */
5780
0
                            : 0xc9b; /* G+DB+P+S+Code */
5781
0
5782
0
        sreg.sel = cs.sel + 8;
5783
0
        sreg.base = 0;   /* flat segment */
5784
0
        sreg.limit = ~0u;  /* 4GB limit */
5785
0
        sreg.attr = 0xc93; /* G+DB+P+S+Data */
5786
0
5787
0
        fail_if(ops->write_segment == NULL);
5788
0
        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
5789
0
             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
5790
0
            goto done;
5791
0
5792
0
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_EIP,
5793
0
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
5794
0
            goto done;
5795
0
        _regs.r(ip) = ctxt->lma ? msr_val : (uint32_t)msr_val;
5796
0
5797
0
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_ESP,
5798
0
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
5799
0
            goto done;
5800
0
        _regs.r(sp) = ctxt->lma ? msr_val : (uint32_t)msr_val;
5801
0
5802
0
        singlestep = _regs.eflags & X86_EFLAGS_TF;
5803
0
        break;
5804
0
5805
0
    case X86EMUL_OPC(0x0f, 0x35): /* sysexit */
5806
0
        vcpu_must_have(sep);
5807
0
        generate_exception_if(!mode_ring0(), EXC_GP, 0);
5808
0
        generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0);
5809
0
5810
0
        fail_if(ops->read_msr == NULL);
5811
0
        if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS,
5812
0
                                 &msr_val, ctxt)) != X86EMUL_OKAY )
5813
0
            goto done;
5814
0
5815
0
        generate_exception_if(!(msr_val & 0xfffc), EXC_GP, 0);
5816
0
        generate_exception_if(op_bytes == 8 &&
5817
0
                              (!is_canonical_address(_regs.r(dx)) ||
5818
0
                               !is_canonical_address(_regs.r(cx))),
5819
0
                              EXC_GP, 0);
5820
0
5821
0
        cs.sel = (msr_val | 3) + /* SELECTOR_RPL_MASK */
5822
0
                 (op_bytes == 8 ? 32 : 16);
5823
0
        cs.base = 0;   /* flat segment */
5824
0
        cs.limit = ~0u;  /* 4GB limit */
5825
0
        cs.attr = op_bytes == 8 ? 0xafb  /* L+DB+P+DPL3+S+Code */
5826
0
                                : 0xcfb; /* G+DB+P+DPL3+S+Code */
5827
0
5828
0
        sreg.sel = cs.sel + 8;
5829
0
        sreg.base = 0;   /* flat segment */
5830
0
        sreg.limit = ~0u;  /* 4GB limit */
5831
0
        sreg.attr = 0xcf3; /* G+DB+P+DPL3+S+Data */
5832
0
5833
0
        fail_if(ops->write_segment == NULL);
5834
0
        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
5835
0
             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
5836
0
            goto done;
5837
0
5838
0
        _regs.r(ip) = op_bytes == 8 ? _regs.r(dx) : _regs.edx;
5839
0
        _regs.r(sp) = op_bytes == 8 ? _regs.r(cx) : _regs.ecx;
5840
0
5841
0
        singlestep = _regs.eflags & X86_EFLAGS_TF;
5842
0
        break;
5843
0
5844
0
    case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */
5845
0
        vcpu_must_have(cmov);
5846
0
        if ( test_cc(b, _regs.eflags) )
5847
0
            dst.val = src.val;
5848
0
        break;
5849
0
5850
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x50):     /* movmskp{s,d} xmm,reg */
5851
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
5852
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd7):      /* pmovmskb {,x}mm,reg */
5853
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd7):   /* vpmovmskb {x,y}mm,reg */
5854
0
        opc = init_prefixes(stub);
5855
0
        opc[0] = b;
5856
0
        /* Convert GPR destination to %rAX. */
5857
0
        rex_prefix &= ~REX_R;
5858
0
        vex.r = 1;
5859
0
        if ( !mode_64bit() )
5860
0
            vex.w = 0;
5861
0
        opc[1] = modrm & 0xc7;
5862
0
        fic.insn_bytes = PFX_BYTES + 2;
5863
0
    simd_0f_to_gpr:
5864
0
        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
5865
0
5866
0
        generate_exception_if(ea.type != OP_REG, EXC_UD);
5867
0
5868
0
        if ( vex.opcx == vex_none )
5869
0
        {
5870
0
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
5871
0
                vcpu_must_have(sse2);
5872
0
            else
5873
0
            {
5874
0
                if ( b != 0x50 )
5875
0
                {
5876
0
                    host_and_vcpu_must_have(mmx);
5877
0
                    vcpu_must_have(mmxext);
5878
0
                }
5879
0
                else
5880
0
                    vcpu_must_have(sse);
5881
0
            }
5882
0
            if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
5883
0
                get_fpu(X86EMUL_FPU_xmm, &fic);
5884
0
            else
5885
0
                get_fpu(X86EMUL_FPU_mmx, &fic);
5886
0
        }
5887
0
        else
5888
0
        {
5889
0
            generate_exception_if(vex.reg != 0xf, EXC_UD);
5890
0
            if ( b == 0x50 || !vex.l )
5891
0
                host_and_vcpu_must_have(avx);
5892
0
            else
5893
0
                host_and_vcpu_must_have(avx2);
5894
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
5895
0
        }
5896
0
5897
0
        copy_REX_VEX(opc, rex_prefix, vex);
5898
0
        invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
5899
0
5900
0
        put_stub(stub);
5901
0
        check_xmm_exn(&fic);
5902
0
5903
0
        ASSERT(!state->simd_size);
5904
0
        dst.bytes = 4;
5905
0
        break;
5906
0
5907
0
    CASE_SIMD_ALL_FP(, 0x0f, 0x5a):        /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */
5908
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5a):    /* vcvtp{s,d}2p{s,d} xmm/mem,xmm */
5909
0
                                           /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */
5910
0
        op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) +
5911
0
                         !!(vex.pfx & VEX_PREFIX_DOUBLE_MASK));
5912
0
    simd_0f_cvt:
5913
0
        if ( vex.opcx == vex_none )
5914
0
            goto simd_0f_sse2;
5915
0
        goto simd_0f_avx;
5916
0
5917
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0x5b):     /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
5918
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x5b): /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
5919
0
    case X86EMUL_OPC_F3(0x0f, 0x5b):       /* cvttps2dq xmm/mem,xmm */
5920
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x5b):   /* vcvttps2dq {x,y}mm/mem,{x,y}mm */
5921
0
        d |= TwoOp;
5922
0
        op_bytes = 16 << vex.l;
5923
0
        goto simd_0f_cvt;
5924
0
5925
0
    CASE_SIMD_PACKED_INT(0x0f, 0x60):    /* punpcklbw {,x}mm/mem,{,x}mm */
5926
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5927
0
    CASE_SIMD_PACKED_INT(0x0f, 0x61):    /* punpcklwd {,x}mm/mem,{,x}mm */
5928
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x61): /* vpunpcklwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
5929
0
    CASE_SIMD_PACKED_INT(0x0f, 0x62):    /* punpckldq {,x}mm/mem,{,x}mm */
5930
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x62): /* vpunpckldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
5931
0
    CASE_SIMD_PACKED_INT(0x0f, 0x68):    /* punpckhbw {,x}mm/mem,{,x}mm */
5932
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x68): /* vpunpckhbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5933
0
    CASE_SIMD_PACKED_INT(0x0f, 0x69):    /* punpckhwd {,x}mm/mem,{,x}mm */
5934
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x69): /* vpunpckhwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
5935
0
    CASE_SIMD_PACKED_INT(0x0f, 0x6a):    /* punpckhdq {,x}mm/mem,{,x}mm */
5936
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x6a): /* vpunpckhdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
5937
0
        op_bytes = vex.pfx ? 16 << vex.l : b & 8 ? 8 : 4;
5938
0
        /* fall through */
5939
0
    CASE_SIMD_PACKED_INT(0x0f, 0x63):    /* packssbw {,x}mm/mem,{,x}mm */
5940
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x63): /* vpackssbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5941
0
    CASE_SIMD_PACKED_INT(0x0f, 0x64):    /* pcmpgtb {,x}mm/mem,{,x}mm */
5942
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x64): /* vpcmpgtb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5943
0
    CASE_SIMD_PACKED_INT(0x0f, 0x65):    /* pcmpgtw {,x}mm/mem,{,x}mm */
5944
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x65): /* vpcmpgtw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5945
0
    CASE_SIMD_PACKED_INT(0x0f, 0x66):    /* pcmpgtd {,x}mm/mem,{,x}mm */
5946
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x66): /* vpcmpgtd {x,y}mm/mem,{x,y}mm,{x,y}mm */
5947
0
    CASE_SIMD_PACKED_INT(0x0f, 0x67):    /* packusbw {,x}mm/mem,{,x}mm */
5948
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x67): /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5949
0
    CASE_SIMD_PACKED_INT(0x0f, 0x6b):    /* packsswd {,x}mm/mem,{,x}mm */
5950
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x6b): /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
5951
0
    case X86EMUL_OPC_66(0x0f, 0x6c):     /* punpcklqdq xmm/m128,xmm */
5952
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
5953
0
    case X86EMUL_OPC_66(0x0f, 0x6d):     /* punpckhqdq xmm/m128,xmm */
5954
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x6d): /* vpunpckhqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
5955
0
    CASE_SIMD_PACKED_INT(0x0f, 0x74):    /* pcmpeqb {,x}mm/mem,{,x}mm */
5956
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x74): /* vpcmpeqb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5957
0
    CASE_SIMD_PACKED_INT(0x0f, 0x75):    /* pcmpeqw {,x}mm/mem,{,x}mm */
5958
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x75): /* vpcmpeqw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5959
0
    CASE_SIMD_PACKED_INT(0x0f, 0x76):    /* pcmpeqd {,x}mm/mem,{,x}mm */
5960
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x76): /* vpcmpeqd {x,y}mm/mem,{x,y}mm,{x,y}mm */
5961
0
    case X86EMUL_OPC_66(0x0f, 0xd4):     /* paddq xmm/m128,xmm */
5962
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd4): /* vpaddq {x,y}mm/mem,{x,y}mm,{x,y}mm */
5963
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd5):    /* pmullw {,x}mm/mem,{,x}mm */
5964
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd5): /* vpmullw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5965
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd8):    /* psubusb {,x}mm/mem,{,x}mm */
5966
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd8): /* vpsubusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5967
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd9):    /* psubusw {,x}mm/mem,{,x}mm */
5968
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd9): /* vpsubusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5969
0
    case X86EMUL_OPC_66(0x0f, 0xda):     /* pminub xmm/m128,xmm */
5970
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xda): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
5971
0
    CASE_SIMD_PACKED_INT(0x0f, 0xdb):    /* pand {,x}mm/mem,{,x}mm */
5972
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xdb): /* vpand {x,y}mm/mem,{x,y}mm,{x,y}mm */
5973
0
    CASE_SIMD_PACKED_INT(0x0f, 0xdc):    /* paddusb {,x}mm/mem,{,x}mm */
5974
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xdc): /* vpaddusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5975
0
    CASE_SIMD_PACKED_INT(0x0f, 0xdd):    /* paddusw {,x}mm/mem,{,x}mm */
5976
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xdd): /* vpaddusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5977
0
    case X86EMUL_OPC_66(0x0f, 0xde):     /* pmaxub xmm/m128,xmm */
5978
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xde): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
5979
0
    CASE_SIMD_PACKED_INT(0x0f, 0xdf):    /* pandn {,x}mm/mem,{,x}mm */
5980
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xdf): /* vpandn {x,y}mm/mem,{x,y}mm,{x,y}mm */
5981
0
    case X86EMUL_OPC_66(0x0f, 0xe0):     /* pavgb xmm/m128,xmm */
5982
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe0): /* vpavgb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5983
0
    case X86EMUL_OPC_66(0x0f, 0xe3):     /* pavgw xmm/m128,xmm */
5984
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe3): /* vpavgw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5985
0
    case X86EMUL_OPC_66(0x0f, 0xe4):     /* pmulhuw xmm/m128,xmm */
5986
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe4): /* vpmulhuw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5987
0
    CASE_SIMD_PACKED_INT(0x0f, 0xe5):    /* pmulhw {,x}mm/mem,{,x}mm */
5988
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe5): /* vpmulhw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5989
0
    CASE_SIMD_PACKED_INT(0x0f, 0xe8):    /* psubsb {,x}mm/mem,{,x}mm */
5990
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe8): /* vpsubsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5991
0
    CASE_SIMD_PACKED_INT(0x0f, 0xe9):    /* psubsw {,x}mm/mem,{,x}mm */
5992
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe9): /* vpsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5993
0
    case X86EMUL_OPC_66(0x0f, 0xea):     /* pminsw xmm/m128,xmm */
5994
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xea): /* vpminsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
5995
0
    CASE_SIMD_PACKED_INT(0x0f, 0xeb):    /* por {,x}mm/mem,{,x}mm */
5996
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xeb): /* vpor {x,y}mm/mem,{x,y}mm,{x,y}mm */
5997
0
    CASE_SIMD_PACKED_INT(0x0f, 0xec):    /* paddsb {,x}mm/mem,{,x}mm */
5998
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xec): /* vpaddsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
5999
0
    CASE_SIMD_PACKED_INT(0x0f, 0xed):    /* paddsw {,x}mm/mem,{,x}mm */
6000
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xed): /* vpaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6001
0
    case X86EMUL_OPC_66(0x0f, 0xee):     /* pmaxsw xmm/m128,xmm */
6002
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xee): /* vpmaxsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6003
0
    CASE_SIMD_PACKED_INT(0x0f, 0xef):    /* pxor {,x}mm/mem,{,x}mm */
6004
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xef): /* vpxor {x,y}mm/mem,{x,y}mm,{x,y}mm */
6005
0
    case X86EMUL_OPC_66(0x0f, 0xf4):     /* pmuludq xmm/m128,xmm */
6006
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf4): /* vpmuludq {x,y}mm/mem,{x,y}mm,{x,y}mm */
6007
0
    case X86EMUL_OPC_66(0x0f, 0xf6):     /* psadbw xmm/m128,xmm */
6008
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf6): /* vpsadbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6009
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf8):    /* psubb {,x}mm/mem,{,x}mm */
6010
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf8): /* vpsubb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6011
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf9):    /* psubw {,x}mm/mem,{,x}mm */
6012
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf9): /* vpsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6013
0
    CASE_SIMD_PACKED_INT(0x0f, 0xfa):    /* psubd {,x}mm/mem,{,x}mm */
6014
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xfa): /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6015
0
    case X86EMUL_OPC_66(0x0f, 0xfb):     /* psubq xmm/m128,xmm */
6016
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */
6017
0
    CASE_SIMD_PACKED_INT(0x0f, 0xfc):    /* paddb {,x}mm/mem,{,x}mm */
6018
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xfc): /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6019
0
    CASE_SIMD_PACKED_INT(0x0f, 0xfd):    /* paddw {,x}mm/mem,{,x}mm */
6020
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xfd): /* vpaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6021
0
    CASE_SIMD_PACKED_INT(0x0f, 0xfe):    /* paddd {,x}mm/mem,{,x}mm */
6022
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xfe): /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6023
0
    simd_0f_int:
6024
0
        if ( vex.opcx != vex_none )
6025
0
        {
6026
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x00): /* vpshufb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6027
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x01): /* vphaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6028
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x02): /* vphaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6029
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x03): /* vphaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6030
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x04): /* vpmaddubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6031
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x05): /* vphsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6032
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x06): /* vphsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6033
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x07): /* vphsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6034
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x08): /* vpsignb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6035
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x09): /* vpsignw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6036
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0a): /* vpsignd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6037
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0b): /* vpmulhrsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6038
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1c): /* vpabsb {x,y}mm/mem,{x,y}mm */
6039
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1d): /* vpabsw {x,y}mm/mem,{x,y}mm */
6040
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1e): /* vpabsd {x,y}mm/mem,{x,y}mm */
6041
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
6042
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq {x,y}mm/mem,{x,y}mm,{x,y}mm */
6043
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw {x,y}mm/mem,{x,y}mm,{x,y}mm */
6044
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x37): /* vpcmpgtq {x,y}mm/mem,{x,y}mm,{x,y}mm */
6045
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6046
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6047
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
6048
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3b): /* vpminud {x,y}mm/mem,{x,y}mm,{x,y}mm */
6049
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3c): /* vpmaxsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
6050
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3d): /* vpmaxsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6051
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3e): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
6052
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x3f): /* vpmaxud {x,y}mm/mem,{x,y}mm,{x,y}mm */
6053
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld {x,y}mm/mem,{x,y}mm,{x,y}mm */
6054
0
            if ( !vex.l )
6055
0
                goto simd_0f_avx;
6056
0
            host_and_vcpu_must_have(avx2);
6057
0
            goto simd_0f_ymm;
6058
0
        }
6059
0
        if ( vex.pfx )
6060
0
            goto simd_0f_sse2;
6061
0
    simd_0f_mmx:
6062
0
        host_and_vcpu_must_have(mmx);
6063
0
        get_fpu(X86EMUL_FPU_mmx, &fic);
6064
0
        goto simd_0f_common;
6065
0
6066
0
    CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
6067
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
6068
0
    CASE_SIMD_PACKED_INT(0x0f, 0x7e):    /* mov{d,q} {,x}mm,r/m */
6069
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
6070
0
        if ( vex.opcx != vex_none )
6071
0
        {
6072
0
            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
6073
0
            host_and_vcpu_must_have(avx);
6074
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
6075
0
        }
6076
0
        else if ( vex.pfx )
6077
0
        {
6078
0
            vcpu_must_have(sse2);
6079
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
6080
0
        }
6081
0
        else
6082
0
        {
6083
0
            host_and_vcpu_must_have(mmx);
6084
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
6085
0
        }
6086
0
6087
0
    simd_0f_rm:
6088
0
        opc = init_prefixes(stub);
6089
0
        opc[0] = b;
6090
0
        /* Convert memory/GPR operand to (%rAX). */
6091
0
        rex_prefix &= ~REX_B;
6092
0
        vex.b = 1;
6093
0
        if ( !mode_64bit() )
6094
0
            vex.w = 0;
6095
0
        opc[1] = modrm & 0x38;
6096
0
        fic.insn_bytes = PFX_BYTES + 2;
6097
0
        opc[2] = 0xc3;
6098
0
6099
0
        copy_REX_VEX(opc, rex_prefix, vex);
6100
0
        invoke_stub("", "", "+m" (src.val), "+m" (fic.exn_raised)
6101
0
                            : "a" (&src.val));
6102
0
        dst.val = src.val;
6103
0
6104
0
        put_stub(stub);
6105
0
        check_xmm_exn(&fic);
6106
0
6107
0
        ASSERT(!state->simd_size);
6108
0
        break;
6109
0
6110
0
    case X86EMUL_OPC_66(0x0f, 0xe7):     /* movntdq xmm,m128 */
6111
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq {x,y}mm,mem */
6112
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
6113
0
        sfence = true;
6114
0
        /* fall through */
6115
0
    case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
6116
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa {x,y}mm/mem,{x,y}mm */
6117
0
    case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
6118
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu {x,y}mm/mem,{x,y}mm */
6119
0
    case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
6120
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/mem */
6121
0
    case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
6122
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */
6123
0
    movdqa:
6124
0
        d |= TwoOp;
6125
0
        op_bytes = 16 << vex.l;
6126
0
        if ( vex.opcx != vex_none )
6127
0
            goto simd_0f_avx;
6128
0
        goto simd_0f_sse2;
6129
0
6130
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
6131
0
        generate_exception_if(vex.l, EXC_UD);
6132
0
        d |= TwoOp;
6133
0
        /* fall through */
6134
0
    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
6135
0
    case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
6136
0
    case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
6137
0
        op_bytes = 8;
6138
0
        goto simd_0f_int;
6139
0
6140
0
    CASE_SIMD_PACKED_INT(0x0f, 0x70):    /* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
6141
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x70): /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
6142
0
    case X86EMUL_OPC_F3(0x0f, 0x70):     /* pshufhw $imm8,xmm/m128,xmm */
6143
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x70): /* vpshufhw $imm8,{x,y}mm/mem,{x,y}mm */
6144
0
    case X86EMUL_OPC_F2(0x0f, 0x70):     /* pshuflw $imm8,xmm/m128,xmm */
6145
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0x70): /* vpshuflw $imm8,{x,y}mm/mem,{x,y}mm */
6146
0
        d = (d & ~SrcMask) | SrcMem | TwoOp;
6147
0
        op_bytes = vex.pfx ? 16 << vex.l : 8;
6148
0
    simd_0f_int_imm8:
6149
0
        if ( vex.opcx != vex_none )
6150
0
        {
6151
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6152
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6153
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6154
0
            if ( vex.l )
6155
0
                host_and_vcpu_must_have(avx2);
6156
0
            else
6157
0
            {
6158
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */
6159
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd $imm8,{x,y}mm/mem,{x,y}mm */
6160
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6161
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0b): /* vroundsd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6162
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6163
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6164
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
6165
0
    simd_0f_imm8_avx:
6166
0
                host_and_vcpu_must_have(avx);
6167
0
            }
6168
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
6169
0
        }
6170
0
        else if ( vex.pfx )
6171
0
        {
6172
0
    simd_0f_imm8_sse2:
6173
0
            vcpu_must_have(sse2);
6174
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
6175
0
        }
6176
0
        else
6177
0
        {
6178
0
            host_and_vcpu_must_have(mmx);
6179
0
            vcpu_must_have(mmxext);
6180
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
6181
0
        }
6182
0
    simd_0f_imm8:
6183
0
        opc = init_prefixes(stub);
6184
0
        opc[0] = b;
6185
0
        opc[1] = modrm;
6186
0
        if ( ea.type == OP_MEM )
6187
0
        {
6188
0
            /* Convert memory operand to (%rAX). */
6189
0
            rex_prefix &= ~REX_B;
6190
0
            vex.b = 1;
6191
0
            opc[1] &= 0x38;
6192
0
        }
6193
0
        opc[2] = imm1;
6194
0
        fic.insn_bytes = PFX_BYTES + 3;
6195
0
        break;
6196
0
6197
0
    CASE_SIMD_PACKED_INT(0x0f, 0x71):    /* Grp12 */
6198
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x71):
6199
0
    CASE_SIMD_PACKED_INT(0x0f, 0x72):    /* Grp13 */
6200
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x72):
6201
0
        switch ( modrm_reg & 7 )
6202
0
        {
6203
0
        case 2: /* psrl{w,d} $imm8,{,x}mm */
6204
0
                /* vpsrl{w,d} $imm8,{x,y}mm,{x,y}mm */
6205
0
        case 4: /* psra{w,d} $imm8,{,x}mm */
6206
0
                /* vpsra{w,d} $imm8,{x,y}mm,{x,y}mm */
6207
0
        case 6: /* psll{w,d} $imm8,{,x}mm */
6208
0
                /* vpsll{w,d} $imm8,{x,y}mm,{x,y}mm */
6209
0
            break;
6210
0
        default:
6211
0
            goto unrecognized_insn;
6212
0
        }
6213
0
    simd_0f_shift_imm:
6214
0
        generate_exception_if(ea.type != OP_REG, EXC_UD);
6215
0
6216
0
        if ( vex.opcx != vex_none )
6217
0
        {
6218
0
            if ( vex.l )
6219
0
                host_and_vcpu_must_have(avx2);
6220
0
            else
6221
0
                host_and_vcpu_must_have(avx);
6222
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
6223
0
        }
6224
0
        else if ( vex.pfx )
6225
0
        {
6226
0
            vcpu_must_have(sse2);
6227
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
6228
0
        }
6229
0
        else
6230
0
        {
6231
0
            host_and_vcpu_must_have(mmx);
6232
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
6233
0
        }
6234
0
6235
0
        opc = init_prefixes(stub);
6236
0
        opc[0] = b;
6237
0
        opc[1] = modrm;
6238
0
        opc[2] = imm1;
6239
0
        fic.insn_bytes = PFX_BYTES + 3;
6240
0
    simd_0f_reg_only:
6241
0
        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
6242
0
6243
0
        copy_REX_VEX(opc, rex_prefix, vex);
6244
0
        invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
6245
0
6246
0
        put_stub(stub);
6247
0
        check_xmm_exn(&fic);
6248
0
6249
0
        ASSERT(!state->simd_size);
6250
0
        break;
6251
0
6252
0
    case X86EMUL_OPC(0x0f, 0x73):        /* Grp14 */
6253
0
        switch ( modrm_reg & 7 )
6254
0
        {
6255
0
        case 2: /* psrlq $imm8,mm */
6256
0
        case 6: /* psllq $imm8,mm */
6257
0
            goto simd_0f_shift_imm;
6258
0
        }
6259
0
        goto unrecognized_insn;
6260
0
6261
0
    case X86EMUL_OPC_66(0x0f, 0x73):
6262
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x73):
6263
0
        switch ( modrm_reg & 7 )
6264
0
        {
6265
0
        case 2: /* psrlq $imm8,xmm */
6266
0
                /* vpsrlq $imm8,{x,y}mm,{x,y}mm */
6267
0
        case 3: /* psrldq $imm8,xmm */
6268
0
                /* vpsrldq $imm8,{x,y}mm,{x,y}mm */
6269
0
        case 6: /* psllq $imm8,xmm */
6270
0
                /* vpsllq $imm8,{x,y}mm,{x,y}mm */
6271
0
        case 7: /* pslldq $imm8,xmm */
6272
0
                /* vpslldq $imm8,{x,y}mm,{x,y}mm */
6273
0
            goto simd_0f_shift_imm;
6274
0
        }
6275
0
        goto unrecognized_insn;
6276
0
6277
0
    case X86EMUL_OPC(0x0f, 0x77):        /* emms */
6278
0
    case X86EMUL_OPC_VEX(0x0f, 0x77):    /* vzero{all,upper} */
6279
0
        if ( vex.opcx != vex_none )
6280
0
        {
6281
0
            generate_exception_if(vex.reg != 0xf, EXC_UD);
6282
0
            host_and_vcpu_must_have(avx);
6283
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
6284
0
6285
0
#ifdef __x86_64__
6286
0
            if ( !mode_64bit() )
6287
0
            {
6288
0
                /*
6289
0
                 * Can't use the actual instructions here, as we must not
6290
0
                 * touch YMM8...YMM15.
6291
0
                 */
6292
0
                if ( vex.l )
6293
0
                {
6294
0
                    /* vpxor %xmmN, %xmmN, %xmmN */
6295
0
                    asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
6296
0
                    asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
6297
0
                    asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
6298
0
                    asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
6299
0
                    asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
6300
0
                    asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
6301
0
                    asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
6302
0
                    asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
6303
0
                }
6304
0
                else
6305
0
                {
6306
0
                    /* vpor %xmmN, %xmmN, %xmmN */
6307
0
                    asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
6308
0
                    asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
6309
0
                    asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
6310
0
                    asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
6311
0
                    asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
6312
0
                    asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
6313
0
                    asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
6314
0
                    asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
6315
0
                }
6316
0
6317
0
                ASSERT(!state->simd_size);
6318
0
                break;
6319
0
            }
6320
0
#endif
6321
0
        }
6322
0
        else
6323
0
        {
6324
0
            host_and_vcpu_must_have(mmx);
6325
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
6326
0
        }
6327
0
6328
0
        opc = init_prefixes(stub);
6329
0
        opc[0] = b;
6330
0
        fic.insn_bytes = PFX_BYTES + 1;
6331
0
        goto simd_0f_reg_only;
6332
0
6333
0
    case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
6334
0
        switch ( modrm_reg & 7 )
6335
0
        {
6336
0
        case 0: /* extrq $imm8,$imm8,xmm */
6337
0
            break;
6338
0
        default:
6339
0
            goto unrecognized_insn;
6340
0
        }
6341
0
        /* fall through */
6342
0
    case X86EMUL_OPC_F2(0x0f, 0x78):     /* insertq $imm8,$imm8,xmm,xmm */
6343
0
        generate_exception_if(ea.type != OP_REG, EXC_UD);
6344
0
6345
0
        host_and_vcpu_must_have(sse4a);
6346
0
        get_fpu(X86EMUL_FPU_xmm, &fic);
6347
0
6348
0
        opc = init_prefixes(stub);
6349
0
        opc[0] = b;
6350
0
        opc[1] = modrm;
6351
0
        opc[2] = imm1;
6352
0
        opc[3] = imm2;
6353
0
        fic.insn_bytes = PFX_BYTES + 4;
6354
0
        goto simd_0f_reg_only;
6355
0
6356
0
    case X86EMUL_OPC_66(0x0f, 0x79):     /* extrq xmm,xmm */
6357
0
    case X86EMUL_OPC_F2(0x0f, 0x79):     /* insertq xmm,xmm */
6358
0
        generate_exception_if(ea.type != OP_REG, EXC_UD);
6359
0
        host_and_vcpu_must_have(sse4a);
6360
0
        op_bytes = 8;
6361
0
        goto simd_0f_xmm;
6362
0
6363
0
    case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu m128,xmm */
6364
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */
6365
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
6366
0
        /* fall through */
6367
0
    case X86EMUL_OPC_66(0x0f, 0x7c):     /* haddpd xmm/m128,xmm */
6368
0
    case X86EMUL_OPC_F2(0x0f, 0x7c):     /* haddps xmm/m128,xmm */
6369
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x7c): /* vhaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6370
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0x7c): /* vhaddps {x,y}mm/mem,{x,y}mm,{x,y}mm */
6371
0
    case X86EMUL_OPC_66(0x0f, 0x7d):     /* hsubpd xmm/m128,xmm */
6372
0
    case X86EMUL_OPC_F2(0x0f, 0x7d):     /* hsubps xmm/m128,xmm */
6373
0
    case X86EMUL_OPC_VEX_66(0x0f, 0x7d): /* vhsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6374
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0x7d): /* vhsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */
6375
0
    case X86EMUL_OPC_66(0x0f, 0xd0):     /* addsubpd xmm/m128,xmm */
6376
0
    case X86EMUL_OPC_F2(0x0f, 0xd0):     /* addsubps xmm/m128,xmm */
6377
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd0): /* vaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
6378
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0xd0): /* vaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */
6379
0
        op_bytes = 16 << vex.l;
6380
0
        goto simd_0f_sse3_avx;
6381
0
6382
0
    case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
6383
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
6384
0
        generate_exception_if(vex.l, EXC_UD);
6385
0
        op_bytes = 8;
6386
0
        goto simd_0f_int;
6387
0
6388
0
    case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */
6389
0
        if ( test_cc(b, _regs.eflags) )
6390
0
            jmp_rel((int32_t)src.val);
6391
0
        adjust_bnd(ctxt, ops, vex.pfx);
6392
0
        break;
6393
0
6394
0
    case X86EMUL_OPC(0x0f, 0x90) ... X86EMUL_OPC(0x0f, 0x9f): /* setcc */
6395
0
        dst.val = test_cc(b, _regs.eflags);
6396
0
        break;
6397
0
6398
0
    case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
6399
0
        msr_val = 0;
6400
0
        fail_if(ops->cpuid == NULL);
6401
0
6402
0
        /* Speculatively read MSR_INTEL_MISC_FEATURES_ENABLES. */
6403
0
        if ( ops->read_msr && !mode_ring0() &&
6404
0
             (rc = ops->read_msr(MSR_INTEL_MISC_FEATURES_ENABLES,
6405
0
                                 &msr_val, ctxt)) == X86EMUL_EXCEPTION )
6406
0
        {
6407
0
            /* Not implemented.  Squash the exception and proceed normally. */
6408
0
            x86_emul_reset_event(ctxt);
6409
0
            rc = X86EMUL_OKAY;
6410
0
        }
6411
0
        if ( rc != X86EMUL_OKAY )
6412
0
            goto done;
6413
0
6414
0
        generate_exception_if((msr_val & MSR_MISC_FEATURES_CPUID_FAULTING),
6415
0
                              EXC_GP, 0); /* Faulting active? (Inc. CPL test) */
6416
0
6417
0
        rc = ops->cpuid(_regs.eax, _regs.ecx, &cpuid_leaf, ctxt);
6418
0
        if ( rc != X86EMUL_OKAY )
6419
0
            goto done;
6420
0
        _regs.r(ax) = cpuid_leaf.a;
6421
0
        _regs.r(bx) = cpuid_leaf.b;
6422
0
        _regs.r(cx) = cpuid_leaf.c;
6423
0
        _regs.r(dx) = cpuid_leaf.d;
6424
0
        break;
6425
0
6426
0
    case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */
6427
0
        generate_exception_if(lock_prefix, EXC_UD);
6428
0
        emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
6429
0
        dst.type = OP_NONE;
6430
0
        break;
6431
0
6432
0
    case X86EMUL_OPC(0x0f, 0xa4): /* shld imm8,r,r/m */
6433
0
    case X86EMUL_OPC(0x0f, 0xa5): /* shld %%cl,r,r/m */
6434
0
    case X86EMUL_OPC(0x0f, 0xac): /* shrd imm8,r,r/m */
6435
0
    case X86EMUL_OPC(0x0f, 0xad): /* shrd %%cl,r,r/m */ {
6436
0
        uint8_t shift, width = dst.bytes << 3;
6437
0
6438
0
        generate_exception_if(lock_prefix, EXC_UD);
6439
0
        if ( b & 1 )
6440
0
            shift = _regs.cl;
6441
0
        else
6442
0
        {
6443
0
            shift = src.val;
6444
0
            src.reg = decode_register(modrm_reg, &_regs, 0);
6445
0
            src.val = truncate_word(*src.reg, dst.bytes);
6446
0
        }
6447
0
        if ( (shift &= width - 1) == 0 )
6448
0
            break;
6449
0
        dst.orig_val = dst.val;
6450
0
        dst.val = (b & 8) ?
6451
0
                  /* shrd */
6452
0
                  ((dst.orig_val >> shift) |
6453
0
                   truncate_word(src.val << (width - shift), dst.bytes)) :
6454
0
                  /* shld */
6455
0
                  (truncate_word(dst.orig_val << shift, dst.bytes) |
6456
0
                   (src.val >> (width - shift)));
6457
0
        _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF |
6458
0
                          X86_EFLAGS_PF | X86_EFLAGS_CF);
6459
0
        if ( (dst.orig_val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
6460
0
            _regs.eflags |= X86_EFLAGS_CF;
6461
0
        if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
6462
0
            _regs.eflags |= X86_EFLAGS_OF;
6463
0
        _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? X86_EFLAGS_SF : 0;
6464
0
        _regs.eflags |= (dst.val == 0) ? X86_EFLAGS_ZF : 0;
6465
0
        _regs.eflags |= even_parity(dst.val) ? X86_EFLAGS_PF : 0;
6466
0
        break;
6467
0
    }
6468
0
6469
0
    case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */
6470
0
        emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
6471
0
        break;
6472
0
6473
0
    case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */
6474
0
        switch ( modrm_reg & 7 )
6475
0
        {
6476
0
        case 2: /* ldmxcsr */
6477
0
            generate_exception_if(vex.pfx, EXC_UD);
6478
0
            vcpu_must_have(sse);
6479
0
        ldmxcsr:
6480
0
            generate_exception_if(src.type != OP_MEM, EXC_UD);
6481
0
            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
6482
0
            generate_exception_if(src.val & ~mxcsr_mask, EXC_GP, 0);
6483
0
            asm volatile ( "ldmxcsr %0" :: "m" (src.val) );
6484
0
            break;
6485
0
6486
0
        case 3: /* stmxcsr */
6487
0
            generate_exception_if(vex.pfx, EXC_UD);
6488
0
            vcpu_must_have(sse);
6489
0
        stmxcsr:
6490
0
            generate_exception_if(dst.type != OP_MEM, EXC_UD);
6491
0
            get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
6492
0
            asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
6493
0
            break;
6494
0
6495
0
        case 5: /* lfence */
6496
0
            fail_if(modrm_mod != 3);
6497
0
            generate_exception_if(vex.pfx, EXC_UD);
6498
0
            vcpu_must_have(sse2);
6499
0
            asm volatile ( "lfence" ::: "memory" );
6500
0
            break;
6501
0
        case 6:
6502
0
            if ( modrm_mod == 3 ) /* mfence */
6503
0
            {
6504
0
                generate_exception_if(vex.pfx, EXC_UD);
6505
0
                vcpu_must_have(sse2);
6506
0
                asm volatile ( "mfence" ::: "memory" );
6507
0
                break;
6508
0
            }
6509
0
            /* else clwb */
6510
0
            fail_if(!vex.pfx);
6511
0
            vcpu_must_have(clwb);
6512
0
            fail_if(!ops->wbinvd);
6513
0
            if ( (rc = ops->wbinvd(ctxt)) != X86EMUL_OKAY )
6514
0
                goto done;
6515
0
            break;
6516
0
        case 7:
6517
0
            if ( modrm_mod == 3 ) /* sfence */
6518
0
            {
6519
0
                generate_exception_if(vex.pfx, EXC_UD);
6520
0
                vcpu_must_have(mmxext);
6521
0
                asm volatile ( "sfence" ::: "memory" );
6522
0
                break;
6523
0
            }
6524
0
            /* else clflush{,opt} */
6525
0
            if ( !vex.pfx )
6526
0
                vcpu_must_have(clflush);
6527
0
            else
6528
0
                vcpu_must_have(clflushopt);
6529
0
            fail_if(ops->wbinvd == NULL);
6530
0
            if ( (rc = ops->wbinvd(ctxt)) != 0 )
6531
0
                goto done;
6532
0
            break;
6533
0
        default:
6534
0
            goto unimplemented_insn;
6535
0
        }
6536
0
        break;
6537
0
6538
0
    case X86EMUL_OPC_VEX(0x0f, 0xae): /* Grp15 */
6539
0
        switch ( modrm_reg & 7 )
6540
0
        {
6541
0
        case 2: /* vldmxcsr */
6542
0
            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
6543
0
            vcpu_must_have(avx);
6544
0
            goto ldmxcsr;
6545
0
        case 3: /* vstmxcsr */
6546
0
            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
6547
0
            vcpu_must_have(avx);
6548
0
            goto stmxcsr;
6549
0
        }
6550
0
        goto unrecognized_insn;
6551
0
6552
0
    case X86EMUL_OPC_F3(0x0f, 0xae): /* Grp15 */
6553
0
        fail_if(modrm_mod != 3);
6554
0
        generate_exception_if((modrm_reg & 4) || !mode_64bit(), EXC_UD);
6555
0
        fail_if(!ops->read_cr);
6556
0
        if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
6557
0
            goto done;
6558
0
        generate_exception_if(!(cr4 & X86_CR4_FSGSBASE), EXC_UD);
6559
0
        seg = modrm_reg & 1 ? x86_seg_gs : x86_seg_fs;
6560
0
        fail_if(!ops->read_segment);
6561
0
        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
6562
0
            goto done;
6563
0
        dst.reg = decode_register(modrm_rm, &_regs, 0);
6564
0
        if ( !(modrm_reg & 2) )
6565
0
        {
6566
0
            /* rd{f,g}sbase */
6567
0
            dst.type = OP_REG;
6568
0
            dst.bytes = (op_bytes == 8) ? 8 : 4;
6569
0
            dst.val = sreg.base;
6570
0
        }
6571
0
        else
6572
0
        {
6573
0
            /* wr{f,g}sbase */
6574
0
            if ( op_bytes == 8 )
6575
0
            {
6576
0
                sreg.base = *dst.reg;
6577
0
                generate_exception_if(!is_canonical_address(sreg.base),
6578
0
                                      EXC_GP, 0);
6579
0
            }
6580
0
            else
6581
0
                sreg.base = (uint32_t)*dst.reg;
6582
0
            fail_if(!ops->write_segment);
6583
0
            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
6584
0
                goto done;
6585
0
        }
6586
0
        break;
6587
0
6588
0
    case X86EMUL_OPC(0x0f, 0xaf): /* imul */
6589
0
        emulate_2op_SrcV_srcmem("imul", src, dst, _regs.eflags);
6590
0
        break;
6591
0
6592
0
    case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
6593
0
        /* Save real source value, then compare EAX against destination. */
6594
0
        src.orig_val = src.val;
6595
0
        src.val = _regs.r(ax);
6596
0
        /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
6597
0
        emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
6598
0
        if ( _regs.eflags & X86_EFLAGS_ZF )
6599
0
        {
6600
0
            /* Success: write back to memory. */
6601
0
            dst.val = src.orig_val;
6602
0
        }
6603
0
        else
6604
0
        {
6605
0
            /* Failure: write the value we saw to EAX. */
6606
0
            dst.type = OP_REG;
6607
0
            dst.reg  = (unsigned long *)&_regs.r(ax);
6608
0
        }
6609
0
        break;
6610
0
6611
0
    case X86EMUL_OPC(0x0f, 0xb2): /* lss */
6612
0
    case X86EMUL_OPC(0x0f, 0xb4): /* lfs */
6613
0
    case X86EMUL_OPC(0x0f, 0xb5): /* lgs */
6614
0
        seg = b & 7;
6615
0
        goto les;
6616
0
6617
0
    case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */
6618
0
        emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
6619
0
        break;
6620
0
6621
3.05k
    case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */
6622
3.05k
        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
6623
3.05k
        dst.reg   = decode_register(modrm_reg, &_regs, 0);
6624
3.05k
        dst.bytes = op_bytes;
6625
3.05k
        dst.val   = (uint8_t)src.val;
6626
3.05k
        break;
6627
0
6628
2.87k
    case X86EMUL_OPC(0x0f, 0xb7): /* movzx rm16,r{16,32,64} */
6629
2.87k
        dst.val = (uint16_t)src.val;
6630
2.87k
        break;
6631
0
6632
0
    case X86EMUL_OPC_F3(0x0f, 0xb8): /* popcnt r/m,r */
6633
0
        host_and_vcpu_must_have(popcnt);
6634
0
        asm ( "popcnt %1,%0" : "=r" (dst.val) : "rm" (src.val) );
6635
0
        _regs.eflags &= ~EFLAGS_MASK;
6636
0
        if ( !dst.val )
6637
0
            _regs.eflags |= X86_EFLAGS_ZF;
6638
0
        break;
6639
0
6640
0
    case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */
6641
0
        switch ( modrm_reg & 7 )
6642
0
        {
6643
0
        case 4: goto bt;
6644
0
        case 5: goto bts;
6645
0
        case 6: goto btr;
6646
0
        case 7: goto btc;
6647
0
        default: generate_exception(EXC_UD);
6648
0
        }
6649
0
        break;
6650
0
6651
0
    case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */
6652
0
        emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
6653
0
        break;
6654
0
6655
0
    case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */
6656
0
    {
6657
0
        bool zf;
6658
0
6659
0
        asm ( "bsf %2,%0" ASM_FLAG_OUT(, "; setz %1")
6660
0
              : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf)
6661
0
              : "rm" (src.val) );
6662
0
        _regs.eflags &= ~X86_EFLAGS_ZF;
6663
0
        if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() )
6664
0
        {
6665
0
            _regs.eflags &= ~X86_EFLAGS_CF;
6666
0
            if ( zf )
6667
0
            {
6668
0
                _regs.eflags |= X86_EFLAGS_CF;
6669
0
                dst.val = op_bytes * 8;
6670
0
            }
6671
0
            else if ( !dst.val )
6672
0
                _regs.eflags |= X86_EFLAGS_ZF;
6673
0
        }
6674
0
        else if ( zf )
6675
0
        {
6676
0
            _regs.eflags |= X86_EFLAGS_ZF;
6677
0
            dst.type = OP_NONE;
6678
0
        }
6679
0
        break;
6680
0
    }
6681
0
6682
0
    case X86EMUL_OPC(0x0f, 0xbd): /* bsr or lzcnt */
6683
0
    {
6684
0
        bool zf;
6685
0
6686
0
        asm ( "bsr %2,%0" ASM_FLAG_OUT(, "; setz %1")
6687
0
              : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf)
6688
0
              : "rm" (src.val) );
6689
0
        _regs.eflags &= ~X86_EFLAGS_ZF;
6690
0
        if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() )
6691
0
        {
6692
0
            _regs.eflags &= ~X86_EFLAGS_CF;
6693
0
            if ( zf )
6694
0
            {
6695
0
                _regs.eflags |= X86_EFLAGS_CF;
6696
0
                dst.val = op_bytes * 8;
6697
0
            }
6698
0
            else
6699
0
            {
6700
0
                dst.val = op_bytes * 8 - 1 - dst.val;
6701
0
                if ( !dst.val )
6702
0
                    _regs.eflags |= X86_EFLAGS_ZF;
6703
0
            }
6704
0
        }
6705
0
        else if ( zf )
6706
0
        {
6707
0
            _regs.eflags |= X86_EFLAGS_ZF;
6708
0
            dst.type = OP_NONE;
6709
0
        }
6710
0
        break;
6711
0
    }
6712
0
6713
0
    case X86EMUL_OPC(0x0f, 0xbe): /* movsx rm8,r{16,32,64} */
6714
0
        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
6715
0
        dst.reg   = decode_register(modrm_reg, &_regs, 0);
6716
0
        dst.bytes = op_bytes;
6717
0
        dst.val   = (int8_t)src.val;
6718
0
        break;
6719
0
6720
0
    case X86EMUL_OPC(0x0f, 0xbf): /* movsx rm16,r{16,32,64} */
6721
0
        dst.val = (int16_t)src.val;
6722
0
        break;
6723
0
6724
0
    case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */
6725
0
        /* Write back the register source. */
6726
0
        switch ( dst.bytes )
6727
0
        {
6728
0
        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
6729
0
        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
6730
0
        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
6731
0
        case 8: *src.reg = dst.val; break;
6732
0
        }
6733
0
        goto add;
6734
0
6735
0
    CASE_SIMD_ALL_FP(, 0x0f, 0xc2):        /* cmp{p,s}{s,d} $imm8,xmm/mem,xmm */
6736
0
    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0xc2):    /* vcmp{p,s}{s,d} $imm8,{x,y}mm/mem,{x,y}mm */
6737
0
    CASE_SIMD_PACKED_FP(, 0x0f, 0xc6):     /* shufp{s,d} $imm8,xmm/mem,xmm */
6738
0
    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,{x,y}mm/mem,{x,y}mm */
6739
0
        d = (d & ~SrcMask) | SrcMem;
6740
0
        if ( vex.opcx == vex_none )
6741
0
        {
6742
0
            if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
6743
0
                goto simd_0f_imm8_sse2;
6744
0
            vcpu_must_have(sse);
6745
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
6746
0
            goto simd_0f_imm8;
6747
0
        }
6748
0
        goto simd_0f_imm8_avx;
6749
0
6750
0
    case X86EMUL_OPC(0x0f, 0xc3): /* movnti */
6751
0
        /* Ignore the non-temporal hint for now. */
6752
0
        vcpu_must_have(sse2);
6753
0
        dst.val = src.val;
6754
0
        sfence = true;
6755
0
        break;
6756
0
6757
0
    CASE_SIMD_PACKED_INT(0x0f, 0xc4):      /* pinsrw $imm8,r32/m16,{,x}mm */
6758
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xc4):   /* vpinsrw $imm8,r32/m16,xmm,xmm */
6759
0
        generate_exception_if(vex.l, EXC_UD);
6760
0
        memcpy(mmvalp, &src.val, 2);
6761
0
        ea.type = OP_MEM;
6762
0
        goto simd_0f_int_imm8;
6763
0
6764
0
    CASE_SIMD_PACKED_INT(0x0f, 0xc5):      /* pextrw $imm8,{,x}mm,reg */
6765
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
6766
0
        generate_exception_if(vex.l, EXC_UD);
6767
0
        opc = init_prefixes(stub);
6768
0
        opc[0] = b;
6769
0
        /* Convert GPR destination to %rAX. */
6770
0
        rex_prefix &= ~REX_R;
6771
0
        vex.r = 1;
6772
0
        if ( !mode_64bit() )
6773
0
            vex.w = 0;
6774
0
        opc[1] = modrm & 0xc7;
6775
0
        opc[2] = imm1;
6776
0
        fic.insn_bytes = PFX_BYTES + 3;
6777
0
        goto simd_0f_to_gpr;
6778
0
6779
0
    case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
6780
0
    {
6781
0
        union {
6782
0
            uint32_t u32[2];
6783
0
            uint64_t u64[2];
6784
0
        } *old, *aux;
6785
0
6786
0
        if ( ea.type == OP_REG )
6787
0
        {
6788
0
            bool __maybe_unused carry;
6789
0
6790
0
            switch ( modrm_reg & 7 )
6791
0
            {
6792
0
            default:
6793
0
                goto unrecognized_insn;
6794
0
6795
0
            case 6: /* rdrand */
6796
0
#ifdef HAVE_GAS_RDRAND
6797
0
                generate_exception_if(rep_prefix(), EXC_UD);
6798
0
                host_and_vcpu_must_have(rdrand);
6799
0
                dst = ea;
6800
0
                switch ( op_bytes )
6801
0
                {
6802
0
                case 2:
6803
0
                    asm ( "rdrand %w0" ASM_FLAG_OUT(, "; setc %1")
6804
0
                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
6805
0
                    break;
6806
0
                default:
6807
0
# ifdef __x86_64__
6808
0
                    asm ( "rdrand %k0" ASM_FLAG_OUT(, "; setc %1")
6809
0
                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
6810
0
                    break;
6811
0
                case 8:
6812
0
# endif
6813
0
                    asm ( "rdrand %0" ASM_FLAG_OUT(, "; setc %1")
6814
0
                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
6815
0
                    break;
6816
0
                }
6817
0
                _regs.eflags &= ~EFLAGS_MASK;
6818
0
                if ( carry )
6819
0
                    _regs.eflags |= X86_EFLAGS_CF;
6820
0
                break;
6821
0
#else
6822
                goto unimplemented_insn;
6823
#endif
6824
0
6825
0
            case 7: /* rdseed / rdpid */
6826
0
                if ( repe_prefix() ) /* rdpid */
6827
0
                {
6828
0
                    generate_exception_if(ea.type != OP_REG, EXC_UD);
6829
0
                    vcpu_must_have(rdpid);
6830
0
                    fail_if(!ops->read_msr);
6831
0
                    if ( (rc = ops->read_msr(MSR_TSC_AUX, &msr_val,
6832
0
                                             ctxt)) != X86EMUL_OKAY )
6833
0
                        goto done;
6834
0
                    dst = ea;
6835
0
                    dst.val = msr_val;
6836
0
                    dst.bytes = 4;
6837
0
                    break;
6838
0
                }
6839
0
#ifdef HAVE_GAS_RDSEED
6840
0
                generate_exception_if(rep_prefix(), EXC_UD);
6841
0
                host_and_vcpu_must_have(rdseed);
6842
0
                dst = ea;
6843
0
                switch ( op_bytes )
6844
0
                {
6845
0
                case 2:
6846
0
                    asm ( "rdseed %w0" ASM_FLAG_OUT(, "; setc %1")
6847
0
                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
6848
0
                    break;
6849
0
                default:
6850
0
# ifdef __x86_64__
6851
0
                    asm ( "rdseed %k0" ASM_FLAG_OUT(, "; setc %1")
6852
0
                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
6853
0
                    break;
6854
0
                case 8:
6855
0
# endif
6856
0
                    asm ( "rdseed %0" ASM_FLAG_OUT(, "; setc %1")
6857
0
                          : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
6858
0
                    break;
6859
0
                }
6860
0
                _regs.eflags &= ~EFLAGS_MASK;
6861
0
                if ( carry )
6862
0
                    _regs.eflags |= X86_EFLAGS_CF;
6863
0
                break;
6864
0
#endif
6865
0
            }
6866
0
            break;
6867
0
        }
6868
0
6869
0
        /* cmpxchg8b/cmpxchg16b */
6870
0
        generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
6871
0
        fail_if(!ops->cmpxchg);
6872
0
        if ( rex_prefix & REX_W )
6873
0
        {
6874
0
            host_and_vcpu_must_have(cx16);
6875
0
            generate_exception_if(!is_aligned(ea.mem.seg, ea.mem.off, 16,
6876
0
                                              ctxt, ops),
6877
0
                                  EXC_GP, 0);
6878
0
            op_bytes = 16;
6879
0
        }
6880
0
        else
6881
0
        {
6882
0
            vcpu_must_have(cx8);
6883
0
            op_bytes = 8;
6884
0
        }
6885
0
6886
0
        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
6887
0
        aux = container_of(&mmvalp->ymm[2], typeof(*aux), u64[0]);
6888
0
6889
0
        /* Get actual old value. */
6890
0
        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
6891
0
                             ctxt)) != X86EMUL_OKAY )
6892
0
            goto done;
6893
0
6894
0
        /* Get expected value. */
6895
0
        if ( !(rex_prefix & REX_W) )
6896
0
        {
6897
0
            aux->u32[0] = _regs.eax;
6898
0
            aux->u32[1] = _regs.edx;
6899
0
        }
6900
0
        else
6901
0
        {
6902
0
            aux->u64[0] = _regs.r(ax);
6903
0
            aux->u64[1] = _regs.r(dx);
6904
0
        }
6905
0
6906
0
        if ( memcmp(old, aux, op_bytes) )
6907
0
        {
6908
0
            /* Expected != actual: store actual to rDX:rAX and clear ZF. */
6909
0
            _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
6910
0
            _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
6911
0
            _regs.eflags &= ~X86_EFLAGS_ZF;
6912
0
        }
6913
0
        else
6914
0
        {
6915
0
            /*
6916
0
             * Expected == actual: Get proposed value, attempt atomic cmpxchg
6917
0
             * and set ZF.
6918
0
             */
6919
0
            if ( !(rex_prefix & REX_W) )
6920
0
            {
6921
0
                aux->u32[0] = _regs.ebx;
6922
0
                aux->u32[1] = _regs.ecx;
6923
0
            }
6924
0
            else
6925
0
            {
6926
0
                aux->u64[0] = _regs.r(bx);
6927
0
                aux->u64[1] = _regs.r(cx);
6928
0
            }
6929
0
6930
0
            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
6931
0
                                    op_bytes, ctxt)) != X86EMUL_OKAY )
6932
0
                goto done;
6933
0
            _regs.eflags |= X86_EFLAGS_ZF;
6934
0
        }
6935
0
        break;
6936
0
    }
6937
0
6938
0
    case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */
6939
0
        dst.type = OP_REG;
6940
0
        dst.reg  = decode_register(
6941
0
            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
6942
0
        switch ( dst.bytes = op_bytes )
6943
0
        {
6944
0
        default: /* case 2: */
6945
0
            /* Undefined behaviour. Writes zero on all tested CPUs. */
6946
0
            dst.val = 0;
6947
0
            break;
6948
0
        case 4:
6949
0
#ifdef __x86_64__
6950
0
            asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );
6951
0
            break;
6952
0
        case 8:
6953
0
#endif
6954
0
            asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) );
6955
0
            break;
6956
0
        }
6957
0
        break;
6958
0
6959
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd1):    /* psrlw {,x}mm/mem,{,x}mm */
6960
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,{x,y}mm,{x,y}mm */
6961
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd2):    /* psrld {,x}mm/mem,{,x}mm */
6962
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,{x,y}mm,{x,y}mm */
6963
0
    CASE_SIMD_PACKED_INT(0x0f, 0xd3):    /* psrlq {,x}mm/mem,{,x}mm */
6964
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,{x,y}mm,{x,y}mm */
6965
0
    CASE_SIMD_PACKED_INT(0x0f, 0xe1):    /* psraw {,x}mm/mem,{,x}mm */
6966
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,{x,y}mm,{x,y}mm */
6967
0
    CASE_SIMD_PACKED_INT(0x0f, 0xe2):    /* psrad {,x}mm/mem,{,x}mm */
6968
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe2): /* vpsrad xmm/m128,{x,y}mm,{x,y}mm */
6969
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf1):    /* psllw {,x}mm/mem,{,x}mm */
6970
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,{x,y}mm,{x,y}mm */
6971
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf2):    /* pslld {,x}mm/mem,{,x}mm */
6972
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf2): /* vpslld xmm/m128,{x,y}mm,{x,y}mm */
6973
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf3):    /* psllq {,x}mm/mem,{,x}mm */
6974
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf3): /* vpsllq xmm/m128,{x,y}mm,{x,y}mm */
6975
0
        op_bytes = vex.pfx ? 16 : 8;
6976
0
        goto simd_0f_int;
6977
0
6978
0
    case X86EMUL_OPC(0x0f, 0xd4):        /* paddq mm/m64,mm */
6979
0
    case X86EMUL_OPC(0x0f, 0xf4):        /* pmuludq mm/m64,mm */
6980
0
    case X86EMUL_OPC(0x0f, 0xfb):        /* psubq mm/m64,mm */
6981
0
        vcpu_must_have(sse2);
6982
0
        goto simd_0f_mmx;
6983
0
6984
0
    case X86EMUL_OPC_F3(0x0f, 0xd6):     /* movq2dq mm,xmm */
6985
0
    case X86EMUL_OPC_F2(0x0f, 0xd6):     /* movdq2q xmm,mm */
6986
0
        generate_exception_if(ea.type != OP_REG, EXC_UD);
6987
0
        op_bytes = 8;
6988
0
        host_and_vcpu_must_have(mmx);
6989
0
        goto simd_0f_int;
6990
0
6991
0
    case X86EMUL_OPC(0x0f, 0xe7):        /* movntq mm,m64 */
6992
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
6993
0
        sfence = true;
6994
0
        /* fall through */
6995
0
    case X86EMUL_OPC(0x0f, 0xda):        /* pminub mm/m64,mm */
6996
0
    case X86EMUL_OPC(0x0f, 0xde):        /* pmaxub mm/m64,mm */
6997
0
    case X86EMUL_OPC(0x0f, 0xea):        /* pminsw mm/m64,mm */
6998
0
    case X86EMUL_OPC(0x0f, 0xee):        /* pmaxsw mm/m64,mm */
6999
0
    case X86EMUL_OPC(0x0f, 0xe0):        /* pavgb mm/m64,mm */
7000
0
    case X86EMUL_OPC(0x0f, 0xe3):        /* pavgw mm/m64,mm */
7001
0
    case X86EMUL_OPC(0x0f, 0xe4):        /* pmulhuw mm/m64,mm */
7002
0
    case X86EMUL_OPC(0x0f, 0xf6):        /* psadbw mm/m64,mm */
7003
0
        vcpu_must_have(mmxext);
7004
0
        goto simd_0f_mmx;
7005
0
7006
0
    case X86EMUL_OPC_66(0x0f, 0xe6):       /* cvttpd2dq xmm/mem,xmm */
7007
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xe6):   /* vcvttpd2dq {x,y}mm/mem,xmm */
7008
0
    case X86EMUL_OPC_F3(0x0f, 0xe6):       /* cvtdq2pd xmm/mem,xmm */
7009
0
    case X86EMUL_OPC_VEX_F3(0x0f, 0xe6):   /* vcvtdq2pd xmm/mem,{x,y}mm */
7010
0
    case X86EMUL_OPC_F2(0x0f, 0xe6):       /* cvtpd2dq xmm/mem,xmm */
7011
0
    case X86EMUL_OPC_VEX_F2(0x0f, 0xe6):   /* vcvtpd2dq {x,y}mm/mem,xmm */
7012
0
        d |= TwoOp;
7013
0
        op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l);
7014
0
        goto simd_0f_cvt;
7015
0
7016
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf7):    /* maskmov{q,dqu} {,x}mm,{,x}mm */
7017
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* vmaskmovdqu xmm,xmm */
7018
0
        generate_exception_if(ea.type != OP_REG, EXC_UD);
7019
0
        if ( vex.opcx != vex_none )
7020
0
        {
7021
0
            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
7022
0
            d |= TwoOp;
7023
0
            host_and_vcpu_must_have(avx);
7024
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
7025
0
        }
7026
0
        else if ( vex.pfx )
7027
0
        {
7028
0
            vcpu_must_have(sse2);
7029
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
7030
0
        }
7031
0
        else
7032
0
        {
7033
0
            host_and_vcpu_must_have(mmx);
7034
0
            vcpu_must_have(mmxext);
7035
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
7036
0
        }
7037
0
7038
0
        /*
7039
0
         * While we can't reasonably provide fully correct behavior here
7040
0
         * (in particular avoiding the memory read in anticipation of all
7041
0
         * bytes in the range eventually being written), we can (and should)
7042
0
         * still suppress the memory access if all mask bits are clear. Read
7043
0
         * the mask bits via {,v}pmovmskb for that purpose.
7044
0
         */
7045
0
        opc = init_prefixes(stub);
7046
0
        opc[0] = 0xd7; /* {,v}pmovmskb */
7047
0
        /* (Ab)use "sfence" for latching the original REX.R / VEX.R. */
7048
0
        sfence = rex_prefix & REX_R;
7049
0
        /* Convert GPR destination to %rAX. */
7050
0
        rex_prefix &= ~REX_R;
7051
0
        vex.r = 1;
7052
0
        if ( !mode_64bit() )
7053
0
            vex.w = 0;
7054
0
        opc[1] = modrm & 0xc7;
7055
0
        fic.insn_bytes = PFX_BYTES + 2;
7056
0
        opc[2] = 0xc3;
7057
0
7058
0
        copy_REX_VEX(opc, rex_prefix, vex);
7059
0
        invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
7060
0
7061
0
        put_stub(stub);
7062
0
        if ( !ea.val )
7063
0
            goto complete_insn;
7064
0
7065
0
        opc = init_prefixes(stub);
7066
0
        opc[0] = b;
7067
0
        opc[1] = modrm;
7068
0
        /* Restore high bit of XMM destination. */
7069
0
        if ( sfence )
7070
0
        {
7071
0
            rex_prefix |= REX_R;
7072
0
            vex.r = 0;
7073
0
        }
7074
0
7075
0
        ea.type = OP_MEM;
7076
0
        ea.mem.off = truncate_ea(_regs.r(di));
7077
0
        sfence = true;
7078
0
        break;
7079
0
7080
0
    case X86EMUL_OPC(0x0f38, 0x00):    /* pshufb mm/m64,mm */
7081
0
    case X86EMUL_OPC_66(0x0f38, 0x00): /* pshufb xmm/m128,xmm */
7082
0
    case X86EMUL_OPC(0x0f38, 0x01):    /* phaddw mm/m64,mm */
7083
0
    case X86EMUL_OPC_66(0x0f38, 0x01): /* phaddw xmm/m128,xmm */
7084
0
    case X86EMUL_OPC(0x0f38, 0x02):    /* phaddd mm/m64,mm */
7085
0
    case X86EMUL_OPC_66(0x0f38, 0x02): /* phaddd xmm/m128,xmm */
7086
0
    case X86EMUL_OPC(0x0f38, 0x03):    /* phaddsw mm/m64,mm */
7087
0
    case X86EMUL_OPC_66(0x0f38, 0x03): /* phaddsw xmm/m128,xmm */
7088
0
    case X86EMUL_OPC(0x0f38, 0x04):    /* pmaddubsw mm/m64,mm */
7089
0
    case X86EMUL_OPC_66(0x0f38, 0x04): /* pmaddubsw xmm/m128,xmm */
7090
0
    case X86EMUL_OPC(0x0f38, 0x05):    /* phsubw mm/m64,mm */
7091
0
    case X86EMUL_OPC_66(0x0f38, 0x05): /* phsubw xmm/m128,xmm */
7092
0
    case X86EMUL_OPC(0x0f38, 0x06):    /* phsubd mm/m64,mm */
7093
0
    case X86EMUL_OPC_66(0x0f38, 0x06): /* phsubd xmm/m128,xmm */
7094
0
    case X86EMUL_OPC(0x0f38, 0x07):    /* phsubsw mm/m64,mm */
7095
0
    case X86EMUL_OPC_66(0x0f38, 0x07): /* phsubsw xmm/m128,xmm */
7096
0
    case X86EMUL_OPC(0x0f38, 0x08):    /* psignb mm/m64,mm */
7097
0
    case X86EMUL_OPC_66(0x0f38, 0x08): /* psignb xmm/m128,xmm */
7098
0
    case X86EMUL_OPC(0x0f38, 0x09):    /* psignw mm/m64,mm */
7099
0
    case X86EMUL_OPC_66(0x0f38, 0x09): /* psignw xmm/m128,xmm */
7100
0
    case X86EMUL_OPC(0x0f38, 0x0a):    /* psignd mm/m64,mm */
7101
0
    case X86EMUL_OPC_66(0x0f38, 0x0a): /* psignd xmm/m128,xmm */
7102
0
    case X86EMUL_OPC(0x0f38, 0x0b):    /* pmulhrsw mm/m64,mm */
7103
0
    case X86EMUL_OPC_66(0x0f38, 0x0b): /* pmulhrsw xmm/m128,xmm */
7104
0
    case X86EMUL_OPC(0x0f38, 0x1c):    /* pabsb mm/m64,mm */
7105
0
    case X86EMUL_OPC_66(0x0f38, 0x1c): /* pabsb xmm/m128,xmm */
7106
0
    case X86EMUL_OPC(0x0f38, 0x1d):    /* pabsw mm/m64,mm */
7107
0
    case X86EMUL_OPC_66(0x0f38, 0x1d): /* pabsw xmm/m128,xmm */
7108
0
    case X86EMUL_OPC(0x0f38, 0x1e):    /* pabsd mm/m64,mm */
7109
0
    case X86EMUL_OPC_66(0x0f38, 0x1e): /* pabsd xmm/m128,xmm */
7110
0
        host_and_vcpu_must_have(ssse3);
7111
0
        if ( vex.pfx )
7112
0
        {
7113
0
    simd_0f38_common:
7114
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
7115
0
        }
7116
0
        else
7117
0
        {
7118
0
            host_and_vcpu_must_have(mmx);
7119
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
7120
0
        }
7121
0
        opc = init_prefixes(stub);
7122
0
        opc[0] = 0x38;
7123
0
        opc[1] = b;
7124
0
        opc[2] = modrm;
7125
0
        if ( ea.type == OP_MEM )
7126
0
        {
7127
0
            /* Convert memory operand to (%rAX). */
7128
0
            rex_prefix &= ~REX_B;
7129
0
            vex.b = 1;
7130
0
            opc[2] &= 0x38;
7131
0
        }
7132
0
        fic.insn_bytes = PFX_BYTES + 3;
7133
0
        break;
7134
0
7135
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */
7136
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
7137
0
        generate_exception_if(!vex.l, EXC_UD);
7138
0
        /* fall through */
7139
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss m32,{x,y}mm */
7140
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
7141
0
        /* fall through */
7142
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps {x,y}mm/mem,{x,y}mm,{x,y}mm */
7143
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
7144
0
        generate_exception_if(vex.w, EXC_UD);
7145
0
        goto simd_0f_avx;
7146
0
7147
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0e): /* vtestps {x,y}mm/mem,{x,y}mm */
7148
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x0f): /* vtestpd {x,y}mm/mem,{x,y}mm */
7149
0
        generate_exception_if(vex.w, EXC_UD);
7150
0
        /* fall through */
7151
0
    case X86EMUL_OPC_66(0x0f38, 0x17):     /* ptest xmm/m128,xmm */
7152
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x17): /* vptest {x,y}mm/mem,{x,y}mm */
7153
0
        if ( vex.opcx == vex_none )
7154
0
        {
7155
0
            host_and_vcpu_must_have(sse4_1);
7156
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
7157
0
        }
7158
0
        else
7159
0
        {
7160
0
            generate_exception_if(vex.reg != 0xf, EXC_UD);
7161
0
            host_and_vcpu_must_have(avx);
7162
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
7163
0
        }
7164
0
7165
0
        opc = init_prefixes(stub);
7166
0
        if ( vex.opcx == vex_none )
7167
0
            opc++[0] = 0x38;
7168
0
        opc[0] = b;
7169
0
        opc[1] = modrm;
7170
0
        if ( ea.type == OP_MEM )
7171
0
        {
7172
0
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16 << vex.l, ctxt);
7173
0
            if ( rc != X86EMUL_OKAY )
7174
0
                goto done;
7175
0
7176
0
            /* Convert memory operand to (%rAX). */
7177
0
            rex_prefix &= ~REX_B;
7178
0
            vex.b = 1;
7179
0
            opc[1] &= 0x38;
7180
0
        }
7181
0
        fic.insn_bytes = PFX_BYTES + 2;
7182
0
        opc[2] = 0xc3;
7183
0
        if ( vex.opcx == vex_none )
7184
0
        {
7185
0
            /* Cover for extra prefix byte. */
7186
0
            --opc;
7187
0
            ++fic.insn_bytes;
7188
0
        }
7189
0
7190
0
        copy_REX_VEX(opc, rex_prefix, vex);
7191
0
        emulate_stub("+m" (*mmvalp), "a" (mmvalp));
7192
0
7193
0
        put_stub(stub);
7194
0
        check_xmm_exn(&fic);
7195
0
7196
0
        state->simd_size = simd_none;
7197
0
        dst.type = OP_NONE;
7198
0
        break;
7199
0
7200
0
    case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
7201
0
    case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
7202
0
    case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */
7203
0
    case X86EMUL_OPC_66(0x0f38, 0x23): /* pmovsxwd xmm/m64,xmm */
7204
0
    case X86EMUL_OPC_66(0x0f38, 0x24): /* pmovsxwq xmm/m32,xmm */
7205
0
    case X86EMUL_OPC_66(0x0f38, 0x25): /* pmovsxdq xmm/m64,xmm */
7206
0
    case X86EMUL_OPC_66(0x0f38, 0x30): /* pmovzxbw xmm/m64,xmm */
7207
0
    case X86EMUL_OPC_66(0x0f38, 0x31): /* pmovzxbd xmm/m32,xmm */
7208
0
    case X86EMUL_OPC_66(0x0f38, 0x32): /* pmovzxbq xmm/m16,xmm */
7209
0
    case X86EMUL_OPC_66(0x0f38, 0x33): /* pmovzxwd xmm/m64,xmm */
7210
0
    case X86EMUL_OPC_66(0x0f38, 0x34): /* pmovzxwq xmm/m32,xmm */
7211
0
    case X86EMUL_OPC_66(0x0f38, 0x35): /* pmovzxdq xmm/m64,xmm */
7212
0
        op_bytes = 16 >> pmov_convert_delta[b & 7];
7213
0
        /* fall through */
7214
0
    case X86EMUL_OPC_66(0x0f38, 0x10): /* pblendvb XMM0,xmm/m128,xmm */
7215
0
    case X86EMUL_OPC_66(0x0f38, 0x14): /* blendvps XMM0,xmm/m128,xmm */
7216
0
    case X86EMUL_OPC_66(0x0f38, 0x15): /* blendvpd XMM0,xmm/m128,xmm */
7217
0
    case X86EMUL_OPC_66(0x0f38, 0x28): /* pmuldq xmm/m128,xmm */
7218
0
    case X86EMUL_OPC_66(0x0f38, 0x29): /* pcmpeqq xmm/m128,xmm */
7219
0
    case X86EMUL_OPC_66(0x0f38, 0x2b): /* packusdw xmm/m128,xmm */
7220
0
    case X86EMUL_OPC_66(0x0f38, 0x38): /* pminsb xmm/m128,xmm */
7221
0
    case X86EMUL_OPC_66(0x0f38, 0x39): /* pminsd xmm/m128,xmm */
7222
0
    case X86EMUL_OPC_66(0x0f38, 0x3a): /* pminub xmm/m128,xmm */
7223
0
    case X86EMUL_OPC_66(0x0f38, 0x3b): /* pminud xmm/m128,xmm */
7224
0
    case X86EMUL_OPC_66(0x0f38, 0x3c): /* pmaxsb xmm/m128,xmm */
7225
0
    case X86EMUL_OPC_66(0x0f38, 0x3d): /* pmaxsd xmm/m128,xmm */
7226
0
    case X86EMUL_OPC_66(0x0f38, 0x3e): /* pmaxub xmm/m128,xmm */
7227
0
    case X86EMUL_OPC_66(0x0f38, 0x3f): /* pmaxud xmm/m128,xmm */
7228
0
    case X86EMUL_OPC_66(0x0f38, 0x40): /* pmulld xmm/m128,xmm */
7229
0
    case X86EMUL_OPC_66(0x0f38, 0x41): /* phminposuw xmm/m128,xmm */
7230
0
        host_and_vcpu_must_have(sse4_1);
7231
0
        goto simd_0f38_common;
7232
0
7233
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
7234
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
7235
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
7236
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x23): /* vpmovsxwd xmm/mem,{x,y}mm */
7237
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,{x,y}mm */
7238
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x25): /* vpmovsxdq xmm/mem,{x,y}mm */
7239
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x30): /* vpmovzxbw xmm/mem,{x,y}mm */
7240
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,{x,y}mm */
7241
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,{x,y}mm */
7242
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x33): /* vpmovzxwd xmm/mem,{x,y}mm */
7243
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,{x,y}mm */
7244
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x35): /* vpmovzxdq xmm/mem,{x,y}mm */
7245
0
        op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l);
7246
0
        goto simd_0f_int;
7247
0
7248
0
    case X86EMUL_OPC_66(0x0f38, 0x2a):     /* movntdqa m128,xmm */
7249
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
7250
0
        generate_exception_if(ea.type != OP_MEM, EXC_UD);
7251
0
        /* Ignore the non-temporal hint for now, using movdqa instead. */
7252
0
        asm volatile ( "mfence" ::: "memory" );
7253
0
        b = 0x6f;
7254
0
        if ( vex.opcx == vex_none )
7255
0
            vcpu_must_have(sse4_1);
7256
0
        else
7257
0
        {
7258
0
            vex.opcx = vex_0f;
7259
0
            if ( vex.l )
7260
0
                vcpu_must_have(avx2);
7261
0
        }
7262
0
        goto movdqa;
7263
0
7264
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2c): /* vmaskmovps mem,{x,y}mm,{x,y}mm */
7265
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2d): /* vmaskmovpd mem,{x,y}mm,{x,y}mm */
7266
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps {x,y}mm,{x,y}mm,mem */
7267
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x2f): /* vmaskmovpd {x,y}mm,{x,y}mm,mem */
7268
0
    {
7269
0
        typeof(vex) *pvex;
7270
0
7271
0
        generate_exception_if(ea.type != OP_MEM || vex.w, EXC_UD);
7272
0
        host_and_vcpu_must_have(avx);
7273
0
        get_fpu(X86EMUL_FPU_ymm, &fic);
7274
0
7275
0
        /*
7276
0
         * While we can't reasonably provide fully correct behavior here
7277
0
         * (in particular, for writes, avoiding the memory read in anticipation
7278
0
         * of all elements in the range eventually being written), we can (and
7279
0
         * should) still limit the memory access to the smallest possible range
7280
0
         * (suppressing it altogether if all mask bits are clear), to provide
7281
0
         * correct faulting behavior. Read the mask bits via vmovmskp{s,d}
7282
0
         * for that purpose.
7283
0
         */
7284
0
        opc = init_prefixes(stub);
7285
0
        pvex = copy_VEX(opc, vex);
7286
0
        pvex->opcx = vex_0f;
7287
0
        if ( !(b & 1) )
7288
0
            pvex->pfx = vex_none;
7289
0
        opc[0] = 0x50; /* vmovmskp{s,d} */
7290
0
        /* Use %rax as GPR destination and VEX.vvvv as source. */
7291
0
        pvex->r = 1;
7292
0
        pvex->b = !mode_64bit() || (vex.reg >> 3);
7293
0
        opc[1] = 0xc0 | (~vex.reg & 7);
7294
0
        pvex->reg = 0xf;
7295
0
        opc[2] = 0xc3;
7296
0
7297
0
        invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
7298
0
        put_stub(stub);
7299
0
7300
0
        if ( !ea.val )
7301
0
            goto complete_insn;
7302
0
7303
0
        op_bytes = 4 << (b & 1);
7304
0
        first_byte = __builtin_ctz(ea.val);
7305
0
        ea.val >>= first_byte;
7306
0
        first_byte *= op_bytes;
7307
0
        op_bytes *= 32 - __builtin_clz(ea.val);
7308
0
7309
0
        /*
7310
0
         * Even for the memory write variant a memory read is needed, unless
7311
0
         * all set mask bits are contiguous.
7312
0
         */
7313
0
        if ( ea.val & (ea.val + 1) )
7314
0
            d = (d & ~SrcMask) | SrcMem;
7315
0
7316
0
        opc = init_prefixes(stub);
7317
0
        opc[0] = b;
7318
0
        /* Convert memory operand to (%rAX). */
7319
0
        rex_prefix &= ~REX_B;
7320
0
        vex.b = 1;
7321
0
        opc[1] = modrm & 0x38;
7322
0
        fic.insn_bytes = PFX_BYTES + 2;
7323
0
7324
0
        break;
7325
0
    }
7326
0
7327
0
    case X86EMUL_OPC_66(0x0f38, 0x37): /* pcmpgtq xmm/m128,xmm */
7328
0
        host_and_vcpu_must_have(sse4_2);
7329
0
        goto simd_0f38_common;
7330
0
7331
0
    case X86EMUL_OPC_66(0x0f38, 0xdb):     /* aesimc xmm/m128,xmm */
7332
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
7333
0
    case X86EMUL_OPC_66(0x0f38, 0xdc):     /* aesenc xmm/m128,xmm,xmm */
7334
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdc): /* vaesenc xmm/m128,xmm,xmm */
7335
0
    case X86EMUL_OPC_66(0x0f38, 0xdd):     /* aesenclast xmm/m128,xmm,xmm */
7336
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdd): /* vaesenclast xmm/m128,xmm,xmm */
7337
0
    case X86EMUL_OPC_66(0x0f38, 0xde):     /* aesdec xmm/m128,xmm,xmm */
7338
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0xde): /* vaesdec xmm/m128,xmm,xmm */
7339
0
    case X86EMUL_OPC_66(0x0f38, 0xdf):     /* aesdeclast xmm/m128,xmm,xmm */
7340
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0xdf): /* vaesdeclast xmm/m128,xmm,xmm */
7341
0
        host_and_vcpu_must_have(aesni);
7342
0
        if ( vex.opcx == vex_none )
7343
0
            goto simd_0f38_common;
7344
0
        /* fall through */
7345
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0x41): /* vphminposuw xmm/m128,xmm,xmm */
7346
0
        generate_exception_if(vex.l, EXC_UD);
7347
0
        goto simd_0f_avx;
7348
0
7349
0
    case X86EMUL_OPC(0x0f38, 0xc8):     /* sha1nexte xmm/m128,xmm */
7350
0
    case X86EMUL_OPC(0x0f38, 0xc9):     /* sha1msg1 xmm/m128,xmm */
7351
0
    case X86EMUL_OPC(0x0f38, 0xca):     /* sha1msg2 xmm/m128,xmm */
7352
0
    case X86EMUL_OPC(0x0f38, 0xcb):     /* sha256rnds2 XMM0,xmm/m128,xmm */
7353
0
    case X86EMUL_OPC(0x0f38, 0xcc):     /* sha256msg1 xmm/m128,xmm */
7354
0
    case X86EMUL_OPC(0x0f38, 0xcd):     /* sha256msg2 xmm/m128,xmm */
7355
0
        host_and_vcpu_must_have(sha);
7356
0
        op_bytes = 16;
7357
0
        goto simd_0f38_common;
7358
0
7359
0
    case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
7360
0
    case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
7361
0
        vcpu_must_have(movbe);
7362
0
        switch ( op_bytes )
7363
0
        {
7364
0
        case 2:
7365
0
            asm ( "xchg %h0,%b0" : "=Q" (dst.val)
7366
0
                                 : "0" (*(uint32_t *)&src.val) );
7367
0
            break;
7368
0
        case 4:
7369
0
#ifdef __x86_64__
7370
0
            asm ( "bswap %k0" : "=r" (dst.val)
7371
0
                              : "0" (*(uint32_t *)&src.val) );
7372
0
            break;
7373
0
        case 8:
7374
0
#endif
7375
0
            asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
7376
0
            break;
7377
0
        default:
7378
0
            ASSERT_UNREACHABLE();
7379
0
        }
7380
0
        break;
7381
0
#ifdef HAVE_GAS_SSE4_2
7382
0
    case X86EMUL_OPC_F2(0x0f38, 0xf0): /* crc32 r/m8, r{32,64} */
7383
0
    case X86EMUL_OPC_F2(0x0f38, 0xf1): /* crc32 r/m{16,32,64}, r{32,64} */
7384
0
        host_and_vcpu_must_have(sse4_2);
7385
0
        dst.bytes = rex_prefix & REX_W ? 8 : 4;
7386
0
        switch ( op_bytes )
7387
0
        {
7388
0
        case 1:
7389
0
            asm ( "crc32b %1,%k0" : "+r" (dst.val)
7390
0
                                  : "qm" (*(uint8_t *)&src.val) );
7391
0
            break;
7392
0
        case 2:
7393
0
            asm ( "crc32w %1,%k0" : "+r" (dst.val)
7394
0
                                  : "rm" (*(uint16_t *)&src.val) );
7395
0
            break;
7396
0
        case 4:
7397
0
            asm ( "crc32l %1,%k0" : "+r" (dst.val)
7398
0
                                  : "rm" (*(uint32_t *)&src.val) );
7399
0
            break;
7400
0
# ifdef __x86_64__
7401
0
        case 8:
7402
0
            asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
7403
0
            break;
7404
0
# endif
7405
0
        default:
7406
0
            ASSERT_UNREACHABLE();
7407
0
        }
7408
0
        break;
7409
0
#endif
7410
0
7411
0
    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
7412
0
    case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
7413
0
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
7414
0
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
7415
0
    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
7416
0
    case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
7417
0
    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
7418
0
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
7419
0
    {
7420
0
        uint8_t *buf = get_stub(stub);
7421
0
        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
7422
0
7423
0
        if ( b == 0xf5 || vex.pfx )
7424
0
            host_and_vcpu_must_have(bmi2);
7425
0
        else
7426
0
            host_and_vcpu_must_have(bmi1);
7427
0
        generate_exception_if(vex.l, EXC_UD);
7428
0
7429
0
        buf[0] = 0xc4;
7430
0
        *pvex = vex;
7431
0
        pvex->b = 1;
7432
0
        pvex->r = 1;
7433
0
        pvex->reg = 0xf; /* rAX */
7434
0
        buf[3] = b;
7435
0
        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
7436
0
        buf[5] = 0xc3;
7437
0
7438
0
        src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
7439
0
        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
7440
0
7441
0
        put_stub(stub);
7442
0
        break;
7443
0
    }
7444
0
7445
0
    case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
7446
0
    {
7447
0
        uint8_t *buf = get_stub(stub);
7448
0
        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
7449
0
7450
0
        switch ( modrm_reg & 7 )
7451
0
        {
7452
0
        case 1: /* blsr r,r/m */
7453
0
        case 2: /* blsmsk r,r/m */
7454
0
        case 3: /* blsi r,r/m */
7455
0
            host_and_vcpu_must_have(bmi1);
7456
0
            break;
7457
0
        default:
7458
0
            goto unrecognized_insn;
7459
0
        }
7460
0
7461
0
        generate_exception_if(vex.l, EXC_UD);
7462
0
7463
0
        buf[0] = 0xc4;
7464
0
        *pvex = vex;
7465
0
        pvex->b = 1;
7466
0
        pvex->r = 1;
7467
0
        pvex->reg = 0xf; /* rAX */
7468
0
        buf[3] = b;
7469
0
        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
7470
0
        buf[5] = 0xc3;
7471
0
7472
0
        dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
7473
0
        emulate_stub("=&a" (dst.val), "c" (&src.val));
7474
0
7475
0
        put_stub(stub);
7476
0
        break;
7477
0
    }
7478
0
7479
0
    case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
7480
0
    case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
7481
0
    {
7482
0
        unsigned int mask = rep_prefix() ? X86_EFLAGS_OF : X86_EFLAGS_CF;
7483
0
        unsigned int aux = _regs.eflags & mask ? ~0 : 0;
7484
0
        bool carry;
7485
0
7486
0
        vcpu_must_have(adx);
7487
0
#ifdef __x86_64__
7488
0
        if ( op_bytes == 8 )
7489
0
            asm ( "add %[aux],%[aux]\n\t"
7490
0
                  "adc %[src],%[dst]\n\t"
7491
0
                  ASM_FLAG_OUT(, "setc %[carry]")
7492
0
                  : [dst] "+r" (dst.val),
7493
0
                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
7494
0
                    [aux] "+r" (aux)
7495
0
                  : [src] "rm" (src.val) );
7496
0
        else
7497
0
#endif
7498
0
            asm ( "add %[aux],%[aux]\n\t"
7499
0
                  "adc %k[src],%k[dst]\n\t"
7500
0
                  ASM_FLAG_OUT(, "setc %[carry]")
7501
0
                  : [dst] "+r" (dst.val),
7502
0
                    [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
7503
0
                    [aux] "+r" (aux)
7504
0
                  : [src] "rm" (src.val) );
7505
0
        if ( carry )
7506
0
            _regs.eflags |= mask;
7507
0
        else
7508
0
            _regs.eflags &= ~mask;
7509
0
        break;
7510
0
    }
7511
0
7512
0
    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
7513
0
        vcpu_must_have(bmi2);
7514
0
        generate_exception_if(vex.l, EXC_UD);
7515
0
        ea.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
7516
0
        if ( mode_64bit() && vex.w )
7517
0
            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
7518
0
                            : "0" (src.val), "rm" (_regs.r(dx)) );
7519
0
        else
7520
0
            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
7521
0
                            : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
7522
0
        break;
7523
0
7524
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x06): /* vperm2f128 $imm8,ymm/m256,ymm,ymm */
7525
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x18): /* vinsertf128 $imm8,xmm/m128,ymm,ymm */
7526
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x19): /* vextractf128 $imm8,ymm,xmm/m128 */
7527
0
        generate_exception_if(!vex.l, EXC_UD);
7528
0
        /* fall through */
7529
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x04): /* vpermilps $imm8,{x,y}mm/mem,{x,y}mm */
7530
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x05): /* vpermilpd $imm8,{x,y}mm/mem,{x,y}mm */
7531
0
        generate_exception_if(vex.w, EXC_UD);
7532
0
        goto simd_0f_imm8_avx;
7533
0
7534
0
    case X86EMUL_OPC_66(0x0f3a, 0x08): /* roundps $imm8,xmm/m128,xmm */
7535
0
    case X86EMUL_OPC_66(0x0f3a, 0x09): /* roundpd $imm8,xmm/m128,xmm */
7536
0
    case X86EMUL_OPC_66(0x0f3a, 0x0a): /* roundss $imm8,xmm/m128,xmm */
7537
0
    case X86EMUL_OPC_66(0x0f3a, 0x0b): /* roundsd $imm8,xmm/m128,xmm */
7538
0
    case X86EMUL_OPC_66(0x0f3a, 0x0c): /* blendps $imm8,xmm/m128,xmm */
7539
0
    case X86EMUL_OPC_66(0x0f3a, 0x0d): /* blendpd $imm8,xmm/m128,xmm */
7540
0
    case X86EMUL_OPC_66(0x0f3a, 0x0e): /* pblendw $imm8,xmm/m128,xmm */
7541
0
    case X86EMUL_OPC_66(0x0f3a, 0x40): /* dpps $imm8,xmm/m128,xmm */
7542
0
    case X86EMUL_OPC_66(0x0f3a, 0x41): /* dppd $imm8,xmm/m128,xmm */
7543
0
    case X86EMUL_OPC_66(0x0f3a, 0x42): /* mpsadbw $imm8,xmm/m128,xmm */
7544
0
        host_and_vcpu_must_have(sse4_1);
7545
0
        goto simd_0f3a_common;
7546
0
7547
0
    case X86EMUL_OPC(0x0f3a, 0x0f):    /* palignr $imm8,mm/m64,mm */
7548
0
    case X86EMUL_OPC_66(0x0f3a, 0x0f): /* palignr $imm8,xmm/m128,xmm */
7549
0
        host_and_vcpu_must_have(ssse3);
7550
0
        if ( vex.pfx )
7551
0
        {
7552
0
    simd_0f3a_common:
7553
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
7554
0
        }
7555
0
        else
7556
0
        {
7557
0
            host_and_vcpu_must_have(mmx);
7558
0
            get_fpu(X86EMUL_FPU_mmx, &fic);
7559
0
        }
7560
0
        opc = init_prefixes(stub);
7561
0
        opc[0] = 0x3a;
7562
0
        opc[1] = b;
7563
0
        opc[2] = modrm;
7564
0
        if ( ea.type == OP_MEM )
7565
0
        {
7566
0
            /* Convert memory operand to (%rAX). */
7567
0
            rex_prefix &= ~REX_B;
7568
0
            vex.b = 1;
7569
0
            opc[2] &= 0x38;
7570
0
        }
7571
0
        opc[3] = imm1;
7572
0
        fic.insn_bytes = PFX_BYTES + 4;
7573
0
        break;
7574
0
7575
0
    case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
7576
0
    case X86EMUL_OPC_66(0x0f3a, 0x15): /* pextrw $imm8,xmm,r/m */
7577
0
    case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
7578
0
    case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
7579
0
        host_and_vcpu_must_have(sse4_1);
7580
0
        get_fpu(X86EMUL_FPU_xmm, &fic);
7581
0
7582
0
        opc = init_prefixes(stub);
7583
0
        opc++[0] = 0x3a;
7584
0
    pextr:
7585
0
        opc[0] = b;
7586
0
        /* Convert memory/GPR operand to (%rAX). */
7587
0
        rex_prefix &= ~REX_B;
7588
0
        vex.b = 1;
7589
0
        if ( !mode_64bit() )
7590
0
            vex.w = 0;
7591
0
        opc[1] = modrm & 0x38;
7592
0
        opc[2] = imm1;
7593
0
        fic.insn_bytes = PFX_BYTES + 3;
7594
0
        opc[3] = 0xc3;
7595
0
        if ( vex.opcx == vex_none )
7596
0
        {
7597
0
            /* Cover for extra prefix byte. */
7598
0
            --opc;
7599
0
            ++fic.insn_bytes;
7600
0
        }
7601
0
7602
0
        copy_REX_VEX(opc, rex_prefix, vex);
7603
0
        invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
7604
0
7605
0
        put_stub(stub);
7606
0
        check_xmm_exn(&fic);
7607
0
7608
0
        ASSERT(!state->simd_size);
7609
0
        dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
7610
0
        if ( b == 0x16 && (rex_prefix & REX_W) )
7611
0
            dst.bytes = 8;
7612
0
        break;
7613
0
7614
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */
7615
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */
7616
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */
7617
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
7618
0
        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
7619
0
        host_and_vcpu_must_have(avx);
7620
0
        get_fpu(X86EMUL_FPU_ymm, &fic);
7621
0
        opc = init_prefixes(stub);
7622
0
        goto pextr;
7623
0
7624
0
    case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
7625
0
    case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
7626
0
        host_and_vcpu_must_have(sse4_1);
7627
0
        get_fpu(X86EMUL_FPU_xmm, &fic);
7628
0
        memcpy(mmvalp, &src.val, op_bytes);
7629
0
        ea.type = OP_MEM;
7630
0
        op_bytes = src.bytes;
7631
0
        d = SrcMem16; /* Fake for the common SIMD code below. */
7632
0
        state->simd_size = simd_other;
7633
0
        goto simd_0f3a_common;
7634
0
7635
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
7636
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
7637
0
        generate_exception_if(vex.l, EXC_UD);
7638
0
        if ( !mode_64bit() )
7639
0
            vex.w = 0;
7640
0
        memcpy(mmvalp, &src.val, op_bytes);
7641
0
        ea.type = OP_MEM;
7642
0
        op_bytes = src.bytes;
7643
0
        d = SrcMem16; /* Fake for the common SIMD code below. */
7644
0
        state->simd_size = simd_other;
7645
0
        goto simd_0f_int_imm8;
7646
0
7647
0
    case X86EMUL_OPC_66(0x0f3a, 0x21): /* insertps $imm8,xmm/m32,xmm */
7648
0
        host_and_vcpu_must_have(sse4_1);
7649
0
        op_bytes = 4;
7650
0
        goto simd_0f3a_common;
7651
0
7652
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m128,xmm,xmm */
7653
0
        op_bytes = 4;
7654
0
        /* fall through */
7655
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x41): /* vdppd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7656
0
        generate_exception_if(vex.l, EXC_UD);
7657
0
        goto simd_0f_imm8_avx;
7658
0
7659
0
    case X86EMUL_OPC_66(0x0f3a, 0x44):     /* pclmulqdq $imm8,xmm/m128,xmm */
7660
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,xmm/m128,xmm,xmm */
7661
0
        host_and_vcpu_must_have(pclmulqdq);
7662
0
        if ( vex.opcx == vex_none )
7663
0
            goto simd_0f3a_common;
7664
0
        generate_exception_if(vex.l, EXC_UD);
7665
0
        goto simd_0f_imm8_avx;
7666
0
7667
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4a): /* vblendvps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7668
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4b): /* vblendvpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7669
0
        generate_exception_if(vex.w, EXC_UD);
7670
0
        goto simd_0f_imm8_avx;
7671
0
7672
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4c): /* vpblendvb {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
7673
0
        generate_exception_if(vex.w, EXC_UD);
7674
0
        goto simd_0f_int_imm8;
7675
0
7676
0
    case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
7677
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
7678
0
    case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
7679
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x61): /* vpcmpestri $imm8,xmm/m128,xmm */
7680
0
    case X86EMUL_OPC_66(0x0f3a, 0x62):     /* pcmpistrm $imm8,xmm/m128,xmm */
7681
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x62): /* vpcmpistrm $imm8,xmm/m128,xmm */
7682
0
    case X86EMUL_OPC_66(0x0f3a, 0x63):     /* pcmpistri $imm8,xmm/m128,xmm */
7683
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0x63): /* vpcmpistri $imm8,xmm/m128,xmm */
7684
0
        if ( vex.opcx == vex_none )
7685
0
        {
7686
0
            host_and_vcpu_must_have(sse4_2);
7687
0
            get_fpu(X86EMUL_FPU_xmm, &fic);
7688
0
        }
7689
0
        else
7690
0
        {
7691
0
            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
7692
0
            host_and_vcpu_must_have(avx);
7693
0
            get_fpu(X86EMUL_FPU_ymm, &fic);
7694
0
        }
7695
0
7696
0
        opc = init_prefixes(stub);
7697
0
        if ( vex.opcx == vex_none )
7698
0
            opc++[0] = 0x3a;
7699
0
        opc[0] = b;
7700
0
        opc[1] = modrm;
7701
0
        if ( ea.type == OP_MEM )
7702
0
        {
7703
0
            /* Convert memory operand to (%rDI). */
7704
0
            rex_prefix &= ~REX_B;
7705
0
            vex.b = 1;
7706
0
            opc[1] &= 0x3f;
7707
0
            opc[1] |= 0x07;
7708
0
7709
0
            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, ctxt);
7710
0
            if ( rc != X86EMUL_OKAY )
7711
0
                goto done;
7712
0
        }
7713
0
        opc[2] = imm1;
7714
0
        fic.insn_bytes = PFX_BYTES + 3;
7715
0
        opc[3] = 0xc3;
7716
0
        if ( vex.opcx == vex_none )
7717
0
        {
7718
0
            /* Cover for extra prefix byte. */
7719
0
            --opc;
7720
0
            ++fic.insn_bytes;
7721
0
        }
7722
0
7723
0
        copy_REX_VEX(opc, rex_prefix, vex);
7724
0
#ifdef __x86_64__
7725
0
        if ( rex_prefix & REX_W )
7726
0
            emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
7727
0
                         "a" (_regs.rax), "d" (_regs.rdx));
7728
0
        else
7729
0
#endif
7730
0
            emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
7731
0
                         "a" (_regs.eax), "d" (_regs.edx));
7732
0
7733
0
        state->simd_size = simd_none;
7734
0
        if ( b & 1 )
7735
0
            _regs.r(cx) = (uint32_t)dst.val;
7736
0
        dst.type = OP_NONE;
7737
0
        break;
7738
0
7739
0
    case X86EMUL_OPC(0x0f3a, 0xcc):     /* sha1rnds4 $imm8,xmm/m128,xmm */
7740
0
        host_and_vcpu_must_have(sha);
7741
0
        op_bytes = 16;
7742
0
        goto simd_0f3a_common;
7743
0
7744
0
    case X86EMUL_OPC_66(0x0f3a, 0xdf):     /* aeskeygenassist $imm8,xmm/m128,xmm */
7745
0
    case X86EMUL_OPC_VEX_66(0x0f3a, 0xdf): /* vaeskeygenassist $imm8,xmm/m128,xmm */
7746
0
        host_and_vcpu_must_have(aesni);
7747
0
        if ( vex.opcx == vex_none )
7748
0
            goto simd_0f3a_common;
7749
0
        generate_exception_if(vex.l, EXC_UD);
7750
0
        goto simd_0f_imm8_avx;
7751
0
7752
0
    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
7753
0
        vcpu_must_have(bmi2);
7754
0
        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
7755
0
        if ( ea.type == OP_REG )
7756
0
            src.val = *ea.reg;
7757
0
        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
7758
0
                                   ctxt, ops)) != X86EMUL_OKAY )
7759
0
            goto done;
7760
0
        if ( mode_64bit() && vex.w )
7761
0
            asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
7762
0
        else
7763
0
            asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
7764
0
        break;
7765
0
7766
0
    case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
7767
0
        switch ( modrm_reg & 7 )
7768
0
        {
7769
0
        case 1: /* blcfill r/m,r */
7770
0
        case 2: /* blsfill r/m,r */
7771
0
        case 3: /* blcs r/m,r */
7772
0
        case 4: /* tzmsk r/m,r */
7773
0
        case 5: /* blcic r/m,r */
7774
0
        case 6: /* blsic r/m,r */
7775
0
        case 7: /* t1mskc r/m,r */
7776
0
            host_and_vcpu_must_have(tbm);
7777
0
            break;
7778
0
        default:
7779
0
            goto unrecognized_insn;
7780
0
        }
7781
0
7782
0
    xop_09_rm_rv:
7783
0
    {
7784
0
        uint8_t *buf = get_stub(stub);
7785
0
        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
7786
0
7787
0
        generate_exception_if(vex.l, EXC_UD);
7788
0
7789
0
        buf[0] = 0x8f;
7790
0
        *pxop = vex;
7791
0
        pxop->b = 1;
7792
0
        pxop->r = 1;
7793
0
        pxop->reg = 0xf; /* rAX */
7794
0
        buf[3] = b;
7795
0
        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
7796
0
        buf[5] = 0xc3;
7797
0
7798
0
        dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
7799
0
        emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
7800
0
7801
0
        put_stub(stub);
7802
0
        break;
7803
0
    }
7804
0
7805
0
    case X86EMUL_OPC_XOP(09, 0x02): /* XOP Grp2 */
7806
0
        switch ( modrm_reg & 7 )
7807
0
        {
7808
0
        case 1: /* blcmsk r/m,r */
7809
0
        case 6: /* blci r/m,r */
7810
0
            host_and_vcpu_must_have(tbm);
7811
0
            goto xop_09_rm_rv;
7812
0
        }
7813
0
        goto unrecognized_insn;
7814
0
7815
0
    case X86EMUL_OPC_XOP(0a, 0x10): /* bextr imm,r/m,r */
7816
0
    {
7817
0
        uint8_t *buf = get_stub(stub);
7818
0
        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
7819
0
7820
0
        host_and_vcpu_must_have(tbm);
7821
0
        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
7822
0
7823
0
        if ( ea.type == OP_REG )
7824
0
            src.val = *ea.reg;
7825
0
        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
7826
0
                                   ctxt, ops)) != X86EMUL_OKAY )
7827
0
            goto done;
7828
0
7829
0
        buf[0] = 0x8f;
7830
0
        *pxop = vex;
7831
0
        pxop->b = 1;
7832
0
        pxop->r = 1;
7833
0
        buf[3] = b;
7834
0
        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
7835
0
        *(uint32_t *)(buf + 5) = imm1;
7836
0
        buf[9] = 0xc3;
7837
0
7838
0
        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
7839
0
7840
0
        put_stub(stub);
7841
0
        break;
7842
0
    }
7843
0
7844
0
    default:
7845
0
    unimplemented_insn:
7846
0
        rc = X86EMUL_UNIMPLEMENTED;
7847
0
        goto done;
7848
0
    unrecognized_insn:
7849
0
        rc = X86EMUL_UNRECOGNIZED;
7850
0
        goto done;
7851
60.1k
    }
7852
60.1k
7853
60.1k
    if ( state->simd_size )
7854
0
    {
7855
0
        generate_exception_if(!op_bytes, EXC_UD);
7856
0
        generate_exception_if(vex.opcx && (d & TwoOp) && vex.reg != 0xf,
7857
0
                              EXC_UD);
7858
0
7859
0
        if ( !opc )
7860
0
            BUG();
7861
0
        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
7862
0
        copy_REX_VEX(opc, rex_prefix, vex);
7863
0
7864
0
        if ( ea.type == OP_MEM )
7865
0
        {
7866
0
            uint32_t mxcsr = 0;
7867
0
7868
0
            if ( op_bytes < 16 ||
7869
0
                 (vex.opcx
7870
0
                  ? /* vmov{{a,nt}p{s,d},dqa,ntdq} are exceptions. */
7871
0
                    ext != ext_0f ||
7872
0
                    ((b | 1) != 0x29 && b != 0x2b &&
7873
0
                     ((b | 0x10) != 0x7f || vex.pfx != vex_66) &&
7874
0
                     b != 0xe7)
7875
0
                  : /* movup{s,d}, {,mask}movdqu, and lddqu are exceptions. */
7876
0
                    ext == ext_0f &&
7877
0
                    ((b | 1) == 0x11 ||
7878
0
                     ((b | 0x10) == 0x7f && vex.pfx == vex_f3) ||
7879
0
                     b == 0xf7 || b == 0xf0)) )
7880
0
                mxcsr = MXCSR_MM;
7881
0
            else if ( vcpu_has_misalignsse() )
7882
0
                asm ( "stmxcsr %0" : "=m" (mxcsr) );
7883
0
            generate_exception_if(!(mxcsr & MXCSR_MM) &&
7884
0
                                  !is_aligned(ea.mem.seg, ea.mem.off, op_bytes,
7885
0
                                              ctxt, ops),
7886
0
                                  EXC_GP, 0);
7887
0
            switch ( d & SrcMask )
7888
0
            {
7889
0
            case SrcMem:
7890
0
                rc = ops->read(ea.mem.seg, truncate_ea(ea.mem.off + first_byte),
7891
0
                               (void *)mmvalp + first_byte, op_bytes,
7892
0
                               ctxt);
7893
0
                if ( rc != X86EMUL_OKAY )
7894
0
                    goto done;
7895
0
                /* fall through */
7896
0
            case SrcMem16:
7897
0
                dst.type = OP_NONE;
7898
0
                break;
7899
0
            default:
7900
0
                if ( (d & DstMask) != DstMem )
7901
0
                {
7902
0
                    ASSERT_UNREACHABLE();
7903
0
                    rc = X86EMUL_UNHANDLEABLE;
7904
0
                    goto done;
7905
0
                }
7906
0
                break;
7907
0
            }
7908
0
            if ( (d & DstMask) == DstMem )
7909
0
            {
7910
0
                fail_if(!ops->write); /* Check before running the stub. */
7911
0
                if ( (d & SrcMask) == SrcMem )
7912
0
                    d |= Mov; /* Force memory write to occur below. */
7913
0
7914
0
                switch ( ctxt->opcode )
7915
0
                {
7916
0
                case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps */
7917
0
                case X86EMUL_OPC_VEX_66(0x0f38, 0x2f): /* vmaskmovpd */
7918
0
                    /* These have merge semantics; force write to occur. */
7919
0
                    d |= Mov;
7920
0
                    break;
7921
0
                default:
7922
0
                    ASSERT(d & Mov);
7923
0
                    break;
7924
0
                }
7925
0
7926
0
                dst.type = OP_MEM;
7927
0
                dst.bytes = op_bytes;
7928
0
                dst.mem = ea.mem;
7929
0
            }
7930
0
        }
7931
0
        else
7932
0
            dst.type = OP_NONE;
7933
0
7934
0
        /* {,v}maskmov{q,dqu}, as an exception, uses rDI. */
7935
0
        if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
7936
0
                                      X86EMUL_OPC_ENCODING_MASK)) !=
7937
0
                    X86EMUL_OPC(0x0f, 0xf7)) )
7938
0
            invoke_stub("", "", "+m" (*mmvalp), "+m" (fic.exn_raised)
7939
0
                                : "a" (mmvalp));
7940
0
        else
7941
0
            invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
7942
0
7943
0
        put_stub(stub);
7944
0
        check_xmm_exn(&fic);
7945
0
    }
7946
60.1k
7947
60.1k
    switch ( dst.type )
7948
60.1k
    {
7949
56.0k
    case OP_REG:
7950
56.0k
        /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
7951
56.0k
        switch ( dst.bytes )
7952
56.0k
        {
7953
0
        case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
7954
0
        case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
7955
56.0k
        case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
7956
0
        case 8: *dst.reg = dst.val; break;
7957
56.0k
        }
7958
56.0k
        break;
7959
4.11k
    case OP_MEM:
7960
4.11k
        if ( !(d & Mov) && (dst.orig_val == dst.val) &&
7961
0
             !ctxt->force_writeback )
7962
0
            /* nothing to do */;
7963
4.11k
        else if ( lock_prefix )
7964
0
        {
7965
0
            fail_if(!ops->cmpxchg);
7966
0
            rc = ops->cmpxchg(
7967
0
                dst.mem.seg, dst.mem.off, &dst.orig_val,
7968
0
                &dst.val, dst.bytes, ctxt);
7969
0
        }
7970
4.11k
        else
7971
4.11k
        {
7972
4.11k
            fail_if(!ops->write);
7973
4.11k
            rc = ops->write(dst.mem.seg, truncate_ea(dst.mem.off + first_byte),
7974
4.11k
                            !state->simd_size ? &dst.val
7975
0
                                              : (void *)mmvalp + first_byte,
7976
4.11k
                            dst.bytes, ctxt);
7977
4.11k
            if ( sfence )
7978
0
                asm volatile ( "sfence" ::: "memory" );
7979
4.11k
        }
7980
4.11k
        if ( rc != 0 )
7981
0
            goto done;
7982
4.11k
    default:
7983
4.11k
        break;
7984
60.1k
    }
7985
60.1k
7986
60.1k
 complete_insn: /* Commit shadow register state. */
7987
60.1k
    put_fpu(&fic, false, state, ctxt, ops);
7988
60.1k
7989
60.1k
    /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
7990
60.1k
    if ( !mode_64bit() )
7991
0
        _regs.r(ip) = _regs.eip;
7992
60.1k
7993
60.1k
    /* Should a singlestep #DB be raised? */
7994
60.1k
    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
7995
0
    {
7996
0
        ctxt->retire.singlestep = true;
7997
0
        ctxt->retire.sti = false;
7998
0
    }
7999
60.1k
8000
60.1k
    if ( rc != X86EMUL_DONE )
8001
60.1k
        *ctxt->regs = _regs;
8002
60.1k
    else
8003
0
    {
8004
0
        ctxt->regs->r(ip) = _regs.r(ip);
8005
0
        rc = X86EMUL_OKAY;
8006
0
    }
8007
60.1k
8008
60.1k
    ctxt->regs->eflags &= ~X86_EFLAGS_RF;
8009
60.1k
8010
60.1k
 done:
8011
60.1k
    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
8012
60.1k
    put_stub(stub);
8013
60.1k
    return rc;
8014
60.1k
#undef state
8015
60.1k
}
8016
8017
#undef op_bytes
8018
#undef ad_bytes
8019
#undef ext
8020
#undef modrm
8021
#undef modrm_mod
8022
#undef modrm_reg
8023
#undef modrm_rm
8024
#undef rex_prefix
8025
#undef lock_prefix
8026
#undef vex
8027
#undef ea
8028
8029
static void __init __maybe_unused build_assertions(void)
8030
0
{
8031
0
    /* Check the values against SReg3 encoding in opcode/ModRM bytes. */
8032
0
    BUILD_BUG_ON(x86_seg_es != 0);
8033
0
    BUILD_BUG_ON(x86_seg_cs != 1);
8034
0
    BUILD_BUG_ON(x86_seg_ss != 2);
8035
0
    BUILD_BUG_ON(x86_seg_ds != 3);
8036
0
    BUILD_BUG_ON(x86_seg_fs != 4);
8037
0
    BUILD_BUG_ON(x86_seg_gs != 5);
8038
0
8039
0
    /*
8040
0
     * Check X86_EVENTTYPE_* against VMCB EVENTINJ and VMCS INTR_INFO type
8041
0
     * fields.
8042
0
     */
8043
0
    BUILD_BUG_ON(X86_EVENTTYPE_EXT_INTR != 0);
8044
0
    BUILD_BUG_ON(X86_EVENTTYPE_NMI != 2);
8045
0
    BUILD_BUG_ON(X86_EVENTTYPE_HW_EXCEPTION != 3);
8046
0
    BUILD_BUG_ON(X86_EVENTTYPE_SW_INTERRUPT != 4);
8047
0
    BUILD_BUG_ON(X86_EVENTTYPE_PRI_SW_EXCEPTION != 5);
8048
0
    BUILD_BUG_ON(X86_EVENTTYPE_SW_EXCEPTION != 6);
8049
0
}
8050
8051
#ifndef NDEBUG
8052
/*
8053
 * In debug builds, wrap x86_emulate() with some assertions about its expected
8054
 * behaviour.
8055
 */
8056
int x86_emulate_wrapper(
8057
    struct x86_emulate_ctxt *ctxt,
8058
    const struct x86_emulate_ops *ops)
8059
60.1k
{
8060
60.1k
    unsigned long orig_ip = ctxt->regs->r(ip);
8061
60.1k
    int rc;
8062
60.1k
8063
60.1k
    if ( mode_64bit() )
8064
60.1k
        ASSERT(ctxt->lma);
8065
60.1k
8066
60.1k
    rc = x86_emulate(ctxt, ops);
8067
60.1k
8068
60.1k
    /*
8069
60.1k
     * Most retire flags should only be set for successful instruction
8070
60.1k
     * emulation.
8071
60.1k
     */
8072
60.1k
    if ( rc != X86EMUL_OKAY )
8073
0
    {
8074
0
        typeof(ctxt->retire) retire = ctxt->retire;
8075
0
8076
0
        retire.unblock_nmi = false;
8077
0
        ASSERT(!retire.raw);
8078
0
    }
8079
60.1k
8080
60.1k
    /* All cases returning X86EMUL_EXCEPTION should have fault semantics. */
8081
60.1k
    if ( rc == X86EMUL_EXCEPTION )
8082
0
        ASSERT(ctxt->regs->r(ip) == orig_ip);
8083
60.1k
8084
60.1k
    /*
8085
60.1k
     * An event being pending should exactly match returning
8086
60.1k
     * X86EMUL_EXCEPTION.  (If this trips, the chances are a codepath has
8087
60.1k
     * called hvm_inject_hw_exception() rather than using
8088
60.1k
     * x86_emul_hw_exception().)
8089
60.1k
     */
8090
60.1k
    ASSERT(ctxt->event_pending == (rc == X86EMUL_EXCEPTION));
8091
60.1k
8092
60.1k
    return rc;
8093
60.1k
}
8094
#endif
8095
8096
#ifdef __XEN__
8097
8098
#include <xen/err.h>
8099
8100
struct x86_emulate_state *
8101
x86_decode_insn(
8102
    struct x86_emulate_ctxt *ctxt,
8103
    int (*insn_fetch)(
8104
        enum x86_segment seg, unsigned long offset,
8105
        void *p_data, unsigned int bytes,
8106
        struct x86_emulate_ctxt *ctxt))
8107
0
{
8108
0
    static DEFINE_PER_CPU(struct x86_emulate_state, state);
8109
0
    struct x86_emulate_state *state = &this_cpu(state);
8110
0
    const struct x86_emulate_ops ops = {
8111
0
        .insn_fetch = insn_fetch,
8112
0
        .read       = x86emul_unhandleable_rw,
8113
0
    };
8114
0
    int rc = x86_decode(state, ctxt, &ops);
8115
0
8116
0
    if ( unlikely(rc != X86EMUL_OKAY) )
8117
0
        return ERR_PTR(-rc);
8118
0
8119
0
#ifndef NDEBUG
8120
0
    /*
8121
0
     * While we avoid memory allocation (by use of per-CPU data) above,
8122
0
     * nevertheless make sure callers properly release the state structure
8123
0
     * for forward compatibility.
8124
0
     */
8125
0
    if ( state->caller )
8126
0
    {
8127
0
        printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
8128
0
               state->caller);
8129
0
        dump_execution_state();
8130
0
    }
8131
0
    state->caller = __builtin_return_address(0);
8132
0
#endif
8133
0
8134
0
    return state;
8135
0
}
8136
8137
static inline void check_state(const struct x86_emulate_state *state)
8138
0
{
8139
0
#ifndef NDEBUG
8140
0
    ASSERT(state->caller);
8141
0
#endif
8142
0
}
8143
8144
#ifndef NDEBUG
8145
void x86_emulate_free_state(struct x86_emulate_state *state)
8146
0
{
8147
0
    check_state(state);
8148
0
    state->caller = NULL;
8149
0
}
8150
#endif
8151
8152
unsigned int
8153
x86_insn_opsize(const struct x86_emulate_state *state)
8154
0
{
8155
0
    check_state(state);
8156
0
8157
0
    return state->op_bytes << 3;
8158
0
}
8159
8160
int
8161
x86_insn_modrm(const struct x86_emulate_state *state,
8162
               unsigned int *rm, unsigned int *reg)
8163
0
{
8164
0
    check_state(state);
8165
0
8166
0
    if ( unlikely(state->modrm_mod > 3) )
8167
0
    {
8168
0
        if ( rm )
8169
0
            *rm = ~0U;
8170
0
        if ( reg )
8171
0
            *reg = ~0U;
8172
0
        return -EINVAL;
8173
0
    }
8174
0
8175
0
    if ( rm )
8176
0
        *rm = state->modrm_rm;
8177
0
    if ( reg )
8178
0
        *reg = state->modrm_reg;
8179
0
8180
0
    return state->modrm_mod;
8181
0
}
8182
8183
unsigned long
8184
x86_insn_operand_ea(const struct x86_emulate_state *state,
8185
                    enum x86_segment *seg)
8186
0
{
8187
0
    *seg = state->ea.type == OP_MEM ? state->ea.mem.seg : x86_seg_none;
8188
0
8189
0
    check_state(state);
8190
0
8191
0
    return state->ea.mem.off;
8192
0
}
8193
8194
bool
8195
x86_insn_is_mem_access(const struct x86_emulate_state *state,
8196
                       const struct x86_emulate_ctxt *ctxt)
8197
60.1k
{
8198
60.1k
    if ( state->ea.type == OP_MEM )
8199
60.1k
        return ctxt->opcode != 0x8d /* LEA */ &&
8200
60.1k
               (ctxt->opcode != X86EMUL_OPC(0x0f, 0x01) ||
8201
0
                (state->modrm_reg & 7) != 7) /* INVLPG */;
8202
60.1k
8203
0
    switch ( ctxt->opcode )
8204
0
    {
8205
0
    case 0x6c ... 0x6f: /* INS / OUTS */
8206
0
    case 0xa4 ... 0xa7: /* MOVS / CMPS */
8207
0
    case 0xaa ... 0xaf: /* STOS / LODS / SCAS */
8208
0
    case 0xd7:          /* XLAT */
8209
0
    CASE_SIMD_PACKED_INT(0x0f, 0xf7):    /* MASKMOV{Q,DQU} */
8210
0
    case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* VMASKMOVDQU */
8211
0
        return true;
8212
0
8213
0
    case X86EMUL_OPC(0x0f, 0x01):
8214
0
        /* Cover CLZERO. */
8215
0
        return (state->modrm_rm & 7) == 4 && (state->modrm_reg & 7) == 7;
8216
0
    }
8217
0
8218
0
    return false;
8219
0
}
8220
8221
bool
8222
x86_insn_is_mem_write(const struct x86_emulate_state *state,
8223
                      const struct x86_emulate_ctxt *ctxt)
8224
0
{
8225
0
    switch ( state->desc & DstMask )
8226
0
    {
8227
0
    case DstMem:
8228
0
        /* The SrcMem check is to cover {,V}MASKMOV{Q,DQU}. */
8229
0
        return state->modrm_mod != 3 || (state->desc & SrcMask) == SrcMem;
8230
0
8231
0
    case DstBitBase:
8232
0
    case DstImplicit:
8233
0
        break;
8234
0
8235
0
    default:
8236
0
        return false;
8237
0
    }
8238
0
8239
0
    if ( state->modrm_mod == 3 )
8240
0
        /* CLZERO is the odd one. */
8241
0
        return ctxt->opcode == X86EMUL_OPC(0x0f, 0x01) &&
8242
0
               (state->modrm_rm & 7) == 4 && (state->modrm_reg & 7) == 7;
8243
0
8244
0
    switch ( ctxt->opcode )
8245
0
    {
8246
0
    case 0x6c: case 0x6d:                /* INS */
8247
0
    case 0xa4: case 0xa5:                /* MOVS */
8248
0
    case 0xaa: case 0xab:                /* STOS */
8249
0
    case X86EMUL_OPC(0x0f, 0xab):        /* BTS */
8250
0
    case X86EMUL_OPC(0x0f, 0xb3):        /* BTR */
8251
0
    case X86EMUL_OPC(0x0f, 0xbb):        /* BTC */
8252
0
        return true;
8253
0
8254
0
    case 0xd9:
8255
0
        switch ( state->modrm_reg & 7 )
8256
0
        {
8257
0
        case 2: /* FST m32fp */
8258
0
        case 3: /* FSTP m32fp */
8259
0
        case 6: /* FNSTENV */
8260
0
        case 7: /* FNSTCW */
8261
0
            return true;
8262
0
        }
8263
0
        break;
8264
0
8265
0
    case 0xdb:
8266
0
        switch ( state->modrm_reg & 7 )
8267
0
        {
8268
0
        case 1: /* FISTTP m32i */
8269
0
        case 2: /* FIST m32i */
8270
0
        case 3: /* FISTP m32i */
8271
0
        case 7: /* FSTP m80fp */
8272
0
            return true;
8273
0
        }
8274
0
        break;
8275
0
8276
0
    case 0xdd:
8277
0
        switch ( state->modrm_reg & 7 )
8278
0
        {
8279
0
        case 1: /* FISTTP m64i */
8280
0
        case 2: /* FST m64fp */
8281
0
        case 3: /* FSTP m64fp */
8282
0
        case 6: /* FNSAVE */
8283
0
        case 7: /* FNSTSW */
8284
0
            return true;
8285
0
        }
8286
0
        break;
8287
0
8288
0
    case 0xdf:
8289
0
        switch ( state->modrm_reg & 7 )
8290
0
        {
8291
0
        case 1: /* FISTTP m16i */
8292
0
        case 2: /* FIST m16i */
8293
0
        case 3: /* FISTP m16i */
8294
0
        case 6: /* FBSTP */
8295
0
        case 7: /* FISTP m64i */
8296
0
            return true;
8297
0
        }
8298
0
        break;
8299
0
8300
0
    case X86EMUL_OPC(0x0f, 0x01):
8301
0
        return !(state->modrm_reg & 6); /* SGDT / SIDT */
8302
0
8303
0
    case X86EMUL_OPC(0x0f, 0xba):
8304
0
        return (state->modrm_reg & 7) > 4; /* BTS / BTR / BTC */
8305
0
8306
0
    case X86EMUL_OPC(0x0f, 0xc7):
8307
0
        return (state->modrm_reg & 7) == 1; /* CMPXCHG{8,16}B */
8308
0
    }
8309
0
8310
0
    return false;
8311
0
}
8312
8313
bool
8314
x86_insn_is_portio(const struct x86_emulate_state *state,
8315
                   const struct x86_emulate_ctxt *ctxt)
8316
0
{
8317
0
    switch ( ctxt->opcode )
8318
0
    {
8319
0
    case 0x6c ... 0x6f: /* INS / OUTS */
8320
0
    case 0xe4 ... 0xe7: /* IN / OUT imm8 */
8321
0
    case 0xec ... 0xef: /* IN / OUT %dx */
8322
0
        return true;
8323
0
    }
8324
0
8325
0
    return false;
8326
0
}
8327
8328
bool
8329
x86_insn_is_cr_access(const struct x86_emulate_state *state,
8330
                      const struct x86_emulate_ctxt *ctxt)
8331
0
{
8332
0
    switch ( ctxt->opcode )
8333
0
    {
8334
0
        unsigned int ext;
8335
0
8336
0
    case X86EMUL_OPC(0x0f, 0x01):
8337
0
        if ( x86_insn_modrm(state, NULL, &ext) >= 0
8338
0
             && (ext & 5) == 4 ) /* SMSW / LMSW */
8339
0
            return true;
8340
0
        break;
8341
0
8342
0
    case X86EMUL_OPC(0x0f, 0x06): /* CLTS */
8343
0
    case X86EMUL_OPC(0x0f, 0x20): /* MOV from CRn */
8344
0
    case X86EMUL_OPC(0x0f, 0x22): /* MOV to CRn */
8345
0
        return true;
8346
0
    }
8347
0
8348
0
    return false;
8349
0
}
8350
8351
unsigned long
8352
x86_insn_immediate(const struct x86_emulate_state *state, unsigned int nr)
8353
0
{
8354
0
    check_state(state);
8355
0
8356
0
    switch ( nr )
8357
0
    {
8358
0
    case 0:
8359
0
        return state->imm1;
8360
0
    case 1:
8361
0
        return state->imm2;
8362
0
    }
8363
0
8364
0
    return 0;
8365
0
}
8366
8367
unsigned int
8368
x86_insn_length(const struct x86_emulate_state *state,
8369
                const struct x86_emulate_ctxt *ctxt)
8370
0
{
8371
0
    check_state(state);
8372
0
8373
0
    return state->ip - ctxt->regs->r(ip);
8374
0
}
8375
8376
#endif