/root/src/xen/xen/arch/x86/cpuid.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <xen/init.h> |
2 | | #include <xen/lib.h> |
3 | | #include <xen/sched.h> |
4 | | #include <asm/cpuid.h> |
5 | | #include <asm/hvm/hvm.h> |
6 | | #include <asm/hvm/nestedhvm.h> |
7 | | #include <asm/hvm/svm/svm.h> |
8 | | #include <asm/hvm/vmx/vmcs.h> |
9 | | #include <asm/paging.h> |
10 | | #include <asm/processor.h> |
11 | | #include <asm/xstate.h> |
12 | | |
13 | | const uint32_t known_features[] = INIT_KNOWN_FEATURES; |
14 | | const uint32_t special_features[] = INIT_SPECIAL_FEATURES; |
15 | | |
16 | | static const uint32_t pv_featuremask[] = INIT_PV_FEATURES; |
17 | | static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES; |
18 | | static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES; |
19 | | static const uint32_t deep_features[] = INIT_DEEP_FEATURES; |
20 | | |
21 | 2.68k | #define EMPTY_LEAF ((struct cpuid_leaf){}) |
22 | | static void zero_leaves(struct cpuid_leaf *l, |
23 | | unsigned int first, unsigned int last) |
24 | 1 | { |
25 | 1 | memset(&l[first], 0, sizeof(*l) * (last - first + 1)); |
26 | 1 | } |
27 | | |
28 | | struct cpuid_policy __read_mostly raw_cpuid_policy, |
29 | | __read_mostly host_cpuid_policy, |
30 | | __read_mostly pv_max_cpuid_policy, |
31 | | __read_mostly hvm_max_cpuid_policy; |
32 | | |
33 | | static void cpuid_leaf(uint32_t leaf, struct cpuid_leaf *data) |
34 | 20 | { |
35 | 20 | cpuid(leaf, &data->a, &data->b, &data->c, &data->d); |
36 | 20 | } |
37 | | |
38 | | static void sanitise_featureset(uint32_t *fs) |
39 | 3 | { |
40 | 3 | /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ |
41 | 3 | uint32_t disabled_features[ |
42 | 3 | ROUNDUP(FSCAPINTS, sizeof(unsigned long)/sizeof(uint32_t))] = {}; |
43 | 3 | unsigned int i; |
44 | 3 | |
45 | 33 | for ( i = 0; i < FSCAPINTS; ++i ) |
46 | 30 | { |
47 | 30 | /* Clamp to known mask. */ |
48 | 30 | fs[i] &= known_features[i]; |
49 | 30 | |
50 | 30 | /* |
51 | 30 | * Identify which features with deep dependencies have been |
52 | 30 | * disabled. |
53 | 30 | */ |
54 | 30 | disabled_features[i] = ~fs[i] & deep_features[i]; |
55 | 30 | } |
56 | 3 | |
57 | 3 | for_each_set_bit(i, (void *)disabled_features, |
58 | 3 | sizeof(disabled_features) * 8) |
59 | 8 | { |
60 | 8 | const uint32_t *dfs = lookup_deep_deps(i); |
61 | 8 | unsigned int j; |
62 | 8 | |
63 | 8 | ASSERT(dfs); /* deep_features[] should guarentee this. */ |
64 | 8 | |
65 | 88 | for ( j = 0; j < FSCAPINTS; ++j ) |
66 | 80 | { |
67 | 80 | fs[j] &= ~dfs[j]; |
68 | 80 | disabled_features[j] &= ~dfs[j]; |
69 | 80 | } |
70 | 8 | } |
71 | 3 | } |
72 | | |
73 | | static void recalculate_xstate(struct cpuid_policy *p) |
74 | 4 | { |
75 | 4 | uint64_t xstates = XSTATE_FP_SSE; |
76 | 4 | uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; |
77 | 4 | unsigned int i, Da1 = p->xstate.Da1; |
78 | 4 | |
79 | 4 | /* |
80 | 4 | * The Da1 leaf is the only piece of information preserved in the common |
81 | 4 | * case. Everything else is derived from other feature state. |
82 | 4 | */ |
83 | 4 | memset(&p->xstate, 0, sizeof(p->xstate)); |
84 | 4 | |
85 | 4 | if ( !p->basic.xsave ) |
86 | 0 | return; |
87 | 4 | |
88 | 4 | if ( p->basic.avx ) |
89 | 4 | { |
90 | 4 | xstates |= XSTATE_YMM; |
91 | 4 | xstate_size = max(xstate_size, |
92 | 4 | xstate_offsets[_XSTATE_YMM] + |
93 | 4 | xstate_sizes[_XSTATE_YMM]); |
94 | 4 | } |
95 | 4 | |
96 | 4 | if ( p->feat.mpx ) |
97 | 0 | { |
98 | 0 | xstates |= XSTATE_BNDREGS | XSTATE_BNDCSR; |
99 | 0 | xstate_size = max(xstate_size, |
100 | 0 | xstate_offsets[_XSTATE_BNDCSR] + |
101 | 0 | xstate_sizes[_XSTATE_BNDCSR]); |
102 | 0 | } |
103 | 4 | |
104 | 4 | if ( p->feat.avx512f ) |
105 | 0 | { |
106 | 0 | xstates |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM; |
107 | 0 | xstate_size = max(xstate_size, |
108 | 0 | xstate_offsets[_XSTATE_HI_ZMM] + |
109 | 0 | xstate_sizes[_XSTATE_HI_ZMM]); |
110 | 0 | } |
111 | 4 | |
112 | 4 | if ( p->feat.pku ) |
113 | 0 | { |
114 | 0 | xstates |= XSTATE_PKRU; |
115 | 0 | xstate_size = max(xstate_size, |
116 | 0 | xstate_offsets[_XSTATE_PKRU] + |
117 | 0 | xstate_sizes[_XSTATE_PKRU]); |
118 | 0 | } |
119 | 4 | |
120 | 4 | if ( p->extd.lwp ) |
121 | 0 | { |
122 | 0 | xstates |= XSTATE_LWP; |
123 | 0 | xstate_size = max(xstate_size, |
124 | 0 | xstate_offsets[_XSTATE_LWP] + |
125 | 0 | xstate_sizes[_XSTATE_LWP]); |
126 | 0 | } |
127 | 4 | |
128 | 4 | p->xstate.max_size = xstate_size; |
129 | 4 | p->xstate.xcr0_low = xstates & ~XSTATE_XSAVES_ONLY; |
130 | 4 | p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32; |
131 | 4 | |
132 | 4 | p->xstate.Da1 = Da1; |
133 | 4 | if ( p->xstate.xsaves ) |
134 | 0 | { |
135 | 0 | p->xstate.xss_low = xstates & XSTATE_XSAVES_ONLY; |
136 | 0 | p->xstate.xss_high = (xstates & XSTATE_XSAVES_ONLY) >> 32; |
137 | 0 | } |
138 | 4 | else |
139 | 4 | xstates &= ~XSTATE_XSAVES_ONLY; |
140 | 4 | |
141 | 248 | for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.comp)); ++i ) |
142 | 244 | { |
143 | 244 | uint64_t curr_xstate = 1ul << i; |
144 | 244 | |
145 | 244 | if ( !(xstates & curr_xstate) ) |
146 | 240 | continue; |
147 | 244 | |
148 | 4 | p->xstate.comp[i].size = xstate_sizes[i]; |
149 | 4 | p->xstate.comp[i].offset = xstate_offsets[i]; |
150 | 4 | p->xstate.comp[i].xss = curr_xstate & XSTATE_XSAVES_ONLY; |
151 | 4 | p->xstate.comp[i].align = curr_xstate & xstate_align; |
152 | 4 | } |
153 | 4 | } |
154 | | |
155 | | /* |
156 | | * Misc adjustments to the policy. Mostly clobbering reserved fields and |
157 | | * duplicating shared fields. Intentionally hidden fields are annotated. |
158 | | */ |
159 | | static void recalculate_misc(struct cpuid_policy *p) |
160 | 2 | { |
161 | 2 | p->basic.raw_fms &= 0x0fff0fff; /* Clobber Processor Type on Intel. */ |
162 | 2 | p->basic.apic_id = 0; /* Dynamic. */ |
163 | 2 | |
164 | 2 | p->basic.raw[0x5] = EMPTY_LEAF; /* MONITOR not exposed to guests. */ |
165 | 2 | p->basic.raw[0x6] = EMPTY_LEAF; /* Therm/Power not exposed to guests. */ |
166 | 2 | |
167 | 2 | p->basic.raw[0x8] = EMPTY_LEAF; |
168 | 2 | p->basic.raw[0xb] = EMPTY_LEAF; /* TODO: Rework topology logic. */ |
169 | 2 | p->basic.raw[0xc] = EMPTY_LEAF; |
170 | 2 | |
171 | 2 | p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; |
172 | 2 | |
173 | 2 | /* Most of Power/RAS hidden from guests. */ |
174 | 2 | p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0; |
175 | 2 | |
176 | 2 | p->extd.raw[0x8].d = 0; |
177 | 2 | |
178 | 2 | switch ( p->x86_vendor ) |
179 | 2 | { |
180 | 2 | case X86_VENDOR_INTEL: |
181 | 2 | p->basic.l2_nr_queries = 1; /* Fixed to 1 query. */ |
182 | 2 | p->basic.raw[0x3] = EMPTY_LEAF; /* PSN - always hidden. */ |
183 | 2 | p->basic.raw[0x9] = EMPTY_LEAF; /* DCA - always hidden. */ |
184 | 2 | |
185 | 2 | p->extd.vendor_ebx = 0; |
186 | 2 | p->extd.vendor_ecx = 0; |
187 | 2 | p->extd.vendor_edx = 0; |
188 | 2 | |
189 | 2 | p->extd.raw[0x1].a = p->extd.raw[0x1].b = 0; |
190 | 2 | |
191 | 2 | p->extd.raw[0x5] = EMPTY_LEAF; |
192 | 2 | p->extd.raw[0x6].a = p->extd.raw[0x6].b = p->extd.raw[0x6].d = 0; |
193 | 2 | |
194 | 2 | p->extd.raw[0x8].a &= 0x0000ffff; |
195 | 2 | p->extd.raw[0x8].c = 0; |
196 | 2 | break; |
197 | 2 | |
198 | 0 | case X86_VENDOR_AMD: |
199 | 0 | zero_leaves(p->basic.raw, 0x2, 0x3); |
200 | 0 | memset(p->cache.raw, 0, sizeof(p->cache.raw)); |
201 | 0 | zero_leaves(p->basic.raw, 0x9, 0xa); |
202 | 0 |
|
203 | 0 | p->extd.vendor_ebx = p->basic.vendor_ebx; |
204 | 0 | p->extd.vendor_ecx = p->basic.vendor_ecx; |
205 | 0 | p->extd.vendor_edx = p->basic.vendor_edx; |
206 | 0 |
|
207 | 0 | p->extd.raw_fms = p->basic.raw_fms; |
208 | 0 | p->extd.raw[0x1].b &= 0xff00ffff; |
209 | 0 | p->extd.e1d |= p->basic._1d & CPUID_COMMON_1D_FEATURES; |
210 | 0 |
|
211 | 0 | p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */ |
212 | 0 | p->extd.raw[0x8].c &= 0x0003f0ff; |
213 | 0 |
|
214 | 0 | p->extd.raw[0x9] = EMPTY_LEAF; |
215 | 0 |
|
216 | 0 | zero_leaves(p->extd.raw, 0xb, 0x18); |
217 | 0 |
|
218 | 0 | p->extd.raw[0x1b] = EMPTY_LEAF; /* IBS - not supported. */ |
219 | 0 |
|
220 | 0 | p->extd.raw[0x1c].a = 0; /* LWP.a entirely dynamic. */ |
221 | 0 | break; |
222 | 2 | } |
223 | 2 | } |
224 | | |
225 | | static void __init calculate_raw_policy(void) |
226 | 1 | { |
227 | 1 | struct cpuid_policy *p = &raw_cpuid_policy; |
228 | 1 | unsigned int i; |
229 | 1 | |
230 | 1 | cpuid_leaf(0, &p->basic.raw[0]); |
231 | 14 | for ( i = 1; i < min(ARRAY_SIZE(p->basic.raw), |
232 | 13 | p->basic.max_leaf + 1ul); ++i ) |
233 | 13 | { |
234 | 13 | switch ( i ) |
235 | 13 | { |
236 | 3 | case 0x4: case 0x7: case 0xd: |
237 | 3 | /* Multi-invocation leaves. Deferred. */ |
238 | 3 | continue; |
239 | 13 | } |
240 | 13 | |
241 | 10 | cpuid_leaf(i, &p->basic.raw[i]); |
242 | 10 | } |
243 | 1 | |
244 | 1 | if ( p->basic.max_leaf >= 4 ) |
245 | 1 | { |
246 | 5 | for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) |
247 | 5 | { |
248 | 5 | union { |
249 | 5 | struct cpuid_leaf l; |
250 | 5 | struct cpuid_cache_leaf c; |
251 | 5 | } u; |
252 | 5 | |
253 | 5 | cpuid_count_leaf(4, i, &u.l); |
254 | 5 | |
255 | 5 | if ( u.c.type == 0 ) |
256 | 1 | break; |
257 | 5 | |
258 | 4 | p->cache.subleaf[i] = u.c; |
259 | 4 | } |
260 | 1 | |
261 | 1 | /* |
262 | 1 | * The choice of CPUID_GUEST_NR_CACHE is arbitrary. It is expected |
263 | 1 | * that it will eventually need increasing for future hardware. |
264 | 1 | */ |
265 | 1 | if ( i == ARRAY_SIZE(p->cache.raw) ) |
266 | 0 | printk(XENLOG_WARNING |
267 | 0 | "CPUID: Insufficient Leaf 4 space for this hardware\n"); |
268 | 1 | } |
269 | 1 | |
270 | 1 | if ( p->basic.max_leaf >= 7 ) |
271 | 1 | { |
272 | 1 | cpuid_count_leaf(7, 0, &p->feat.raw[0]); |
273 | 1 | |
274 | 1 | for ( i = 1; i < min(ARRAY_SIZE(p->feat.raw), |
275 | 0 | p->feat.max_subleaf + 1ul); ++i ) |
276 | 0 | cpuid_count_leaf(7, i, &p->feat.raw[i]); |
277 | 1 | } |
278 | 1 | |
279 | 1 | if ( p->basic.max_leaf >= XSTATE_CPUID ) |
280 | 1 | { |
281 | 1 | uint64_t xstates; |
282 | 1 | |
283 | 1 | cpuid_count_leaf(XSTATE_CPUID, 0, &p->xstate.raw[0]); |
284 | 1 | cpuid_count_leaf(XSTATE_CPUID, 1, &p->xstate.raw[1]); |
285 | 1 | |
286 | 1 | xstates = ((uint64_t)(p->xstate.xcr0_high | p->xstate.xss_high) << 32) | |
287 | 1 | (p->xstate.xcr0_low | p->xstate.xss_low); |
288 | 1 | |
289 | 62 | for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.raw)); ++i ) |
290 | 61 | { |
291 | 61 | if ( xstates & (1ul << i) ) |
292 | 1 | cpuid_count_leaf(XSTATE_CPUID, i, &p->xstate.raw[i]); |
293 | 61 | } |
294 | 1 | } |
295 | 1 | |
296 | 1 | /* Extended leaves. */ |
297 | 1 | cpuid_leaf(0x80000000, &p->extd.raw[0]); |
298 | 9 | for ( i = 1; i < min(ARRAY_SIZE(p->extd.raw), |
299 | 8 | p->extd.max_leaf + 1 - 0x80000000ul); ++i ) |
300 | 8 | cpuid_leaf(0x80000000 + i, &p->extd.raw[i]); |
301 | 1 | |
302 | 1 | p->x86_vendor = boot_cpu_data.x86_vendor; |
303 | 1 | } |
304 | | |
305 | | static void __init calculate_host_policy(void) |
306 | 1 | { |
307 | 1 | struct cpuid_policy *p = &host_cpuid_policy; |
308 | 1 | |
309 | 1 | *p = raw_cpuid_policy; |
310 | 1 | |
311 | 1 | p->basic.max_leaf = |
312 | 1 | min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); |
313 | 1 | p->feat.max_subleaf = |
314 | 1 | min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1); |
315 | 1 | p->extd.max_leaf = 0x80000000 | min_t(uint32_t, p->extd.max_leaf & 0xffff, |
316 | 1 | ARRAY_SIZE(p->extd.raw) - 1); |
317 | 1 | |
318 | 1 | cpuid_featureset_to_policy(boot_cpu_data.x86_capability, p); |
319 | 1 | recalculate_xstate(p); |
320 | 1 | recalculate_misc(p); |
321 | 1 | |
322 | 1 | if ( p->extd.svm ) |
323 | 0 | { |
324 | 0 | /* Clamp to implemented features which require hardware support. */ |
325 | 0 | p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) | |
326 | 0 | (1u << SVM_FEATURE_LBRV) | |
327 | 0 | (1u << SVM_FEATURE_NRIPS) | |
328 | 0 | (1u << SVM_FEATURE_PAUSEFILTER) | |
329 | 0 | (1u << SVM_FEATURE_DECODEASSISTS)); |
330 | 0 | /* Enable features which are always emulated. */ |
331 | 0 | p->extd.raw[0xa].d |= ((1u << SVM_FEATURE_VMCBCLEAN) | |
332 | 0 | (1u << SVM_FEATURE_TSCRATEMSR)); |
333 | 0 | } |
334 | 1 | } |
335 | | |
336 | | static void __init calculate_pv_max_policy(void) |
337 | 1 | { |
338 | 1 | struct cpuid_policy *p = &pv_max_cpuid_policy; |
339 | 1 | uint32_t pv_featureset[FSCAPINTS]; |
340 | 1 | unsigned int i; |
341 | 1 | |
342 | 1 | *p = host_cpuid_policy; |
343 | 1 | cpuid_policy_to_featureset(p, pv_featureset); |
344 | 1 | |
345 | 11 | for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) |
346 | 10 | pv_featureset[i] &= pv_featuremask[i]; |
347 | 1 | |
348 | 1 | /* Unconditionally claim to be able to set the hypervisor bit. */ |
349 | 1 | __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset); |
350 | 1 | |
351 | 1 | sanitise_featureset(pv_featureset); |
352 | 1 | cpuid_featureset_to_policy(pv_featureset, p); |
353 | 1 | recalculate_xstate(p); |
354 | 1 | |
355 | 1 | p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ |
356 | 1 | } |
357 | | |
358 | | static void __init calculate_hvm_max_policy(void) |
359 | 1 | { |
360 | 1 | struct cpuid_policy *p = &hvm_max_cpuid_policy; |
361 | 1 | uint32_t hvm_featureset[FSCAPINTS]; |
362 | 1 | unsigned int i; |
363 | 1 | const uint32_t *hvm_featuremask; |
364 | 1 | |
365 | 1 | if ( !hvm_enabled ) |
366 | 0 | return; |
367 | 1 | |
368 | 1 | *p = host_cpuid_policy; |
369 | 1 | cpuid_policy_to_featureset(p, hvm_featureset); |
370 | 1 | |
371 | 1 | hvm_featuremask = hvm_funcs.hap_supported ? |
372 | 1 | hvm_hap_featuremask : hvm_shadow_featuremask; |
373 | 1 | |
374 | 11 | for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i ) |
375 | 10 | hvm_featureset[i] &= hvm_featuremask[i]; |
376 | 1 | |
377 | 1 | /* Unconditionally claim to be able to set the hypervisor bit. */ |
378 | 1 | __set_bit(X86_FEATURE_HYPERVISOR, hvm_featureset); |
379 | 1 | |
380 | 1 | /* |
381 | 1 | * Xen can provide an APIC emulation to HVM guests even if the host's APIC |
382 | 1 | * isn't enabled. |
383 | 1 | */ |
384 | 1 | __set_bit(X86_FEATURE_APIC, hvm_featureset); |
385 | 1 | |
386 | 1 | /* |
387 | 1 | * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in |
388 | 1 | * long mode (and init_amd() has cleared it out of host capabilities), but |
389 | 1 | * HVM guests are able if running in protected mode. |
390 | 1 | */ |
391 | 1 | if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && |
392 | 0 | raw_cpuid_policy.basic.sep ) |
393 | 0 | __set_bit(X86_FEATURE_SEP, hvm_featureset); |
394 | 1 | |
395 | 1 | /* |
396 | 1 | * With VT-x, some features are only supported by Xen if dedicated |
397 | 1 | * hardware support is also available. |
398 | 1 | */ |
399 | 1 | if ( cpu_has_vmx ) |
400 | 1 | { |
401 | 1 | if ( !cpu_has_vmx_mpx ) |
402 | 1 | __clear_bit(X86_FEATURE_MPX, hvm_featureset); |
403 | 1 | |
404 | 1 | if ( !cpu_has_vmx_xsaves ) |
405 | 1 | __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); |
406 | 1 | } |
407 | 1 | |
408 | 1 | sanitise_featureset(hvm_featureset); |
409 | 1 | cpuid_featureset_to_policy(hvm_featureset, p); |
410 | 1 | recalculate_xstate(p); |
411 | 1 | } |
412 | | |
413 | | void __init init_guest_cpuid(void) |
414 | 1 | { |
415 | 1 | calculate_raw_policy(); |
416 | 1 | calculate_host_policy(); |
417 | 1 | calculate_pv_max_policy(); |
418 | 1 | calculate_hvm_max_policy(); |
419 | 1 | } |
420 | | |
421 | | const uint32_t *lookup_deep_deps(uint32_t feature) |
422 | 9 | { |
423 | 9 | static const struct { |
424 | 9 | uint32_t feature; |
425 | 9 | uint32_t fs[FSCAPINTS]; |
426 | 9 | } deep_deps[] = INIT_DEEP_DEPS; |
427 | 9 | unsigned int start = 0, end = ARRAY_SIZE(deep_deps); |
428 | 9 | |
429 | 9 | BUILD_BUG_ON(ARRAY_SIZE(deep_deps) != NR_DEEP_DEPS); |
430 | 9 | |
431 | 9 | /* Fast early exit. */ |
432 | 9 | if ( !test_bit(feature, deep_features) ) |
433 | 1 | return NULL; |
434 | 9 | |
435 | 9 | /* deep_deps[] is sorted. Perform a binary search. */ |
436 | 29 | while ( start < end ) |
437 | 29 | { |
438 | 29 | unsigned int mid = start + ((end - start) / 2); |
439 | 29 | |
440 | 29 | if ( deep_deps[mid].feature > feature ) |
441 | 8 | end = mid; |
442 | 21 | else if ( deep_deps[mid].feature < feature ) |
443 | 13 | start = mid + 1; |
444 | 21 | else |
445 | 8 | return deep_deps[mid].fs; |
446 | 29 | } |
447 | 8 | |
448 | 0 | return NULL; |
449 | 8 | } |
450 | | |
451 | | void recalculate_cpuid_policy(struct domain *d) |
452 | 1 | { |
453 | 1 | struct cpuid_policy *p = d->arch.cpuid; |
454 | 1 | const struct cpuid_policy *max = |
455 | 1 | is_pv_domain(d) ? &pv_max_cpuid_policy : &hvm_max_cpuid_policy; |
456 | 1 | uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; |
457 | 1 | unsigned int i; |
458 | 1 | |
459 | 1 | p->x86_vendor = get_cpu_vendor(p->basic.vendor_ebx, p->basic.vendor_ecx, |
460 | 1 | p->basic.vendor_edx, gcv_guest); |
461 | 1 | |
462 | 1 | p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf); |
463 | 1 | p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf); |
464 | 1 | p->extd.max_leaf = 0x80000000 | min(p->extd.max_leaf & 0xffff, |
465 | 1 | (p->x86_vendor == X86_VENDOR_AMD |
466 | 1 | ? CPUID_GUEST_NR_EXTD_AMD |
467 | 1 | : CPUID_GUEST_NR_EXTD_INTEL) - 1); |
468 | 1 | |
469 | 1 | cpuid_policy_to_featureset(p, fs); |
470 | 1 | cpuid_policy_to_featureset(max, max_fs); |
471 | 1 | |
472 | 1 | if ( is_hvm_domain(d) ) |
473 | 1 | { |
474 | 1 | /* |
475 | 1 | * HVM domains using Shadow paging have further restrictions on their |
476 | 1 | * available paging features. |
477 | 1 | */ |
478 | 1 | if ( !hap_enabled(d) ) |
479 | 0 | { |
480 | 0 | for ( i = 0; i < ARRAY_SIZE(max_fs); i++ ) |
481 | 0 | max_fs[i] &= hvm_shadow_featuremask[i]; |
482 | 0 | } |
483 | 1 | |
484 | 1 | /* Hide nested-virt if it hasn't been explicitly configured. */ |
485 | 1 | if ( !nestedhvm_enabled(d) ) |
486 | 1 | { |
487 | 1 | __clear_bit(X86_FEATURE_VMX, max_fs); |
488 | 1 | __clear_bit(X86_FEATURE_SVM, max_fs); |
489 | 1 | } |
490 | 1 | } |
491 | 1 | |
492 | 1 | /* |
493 | 1 | * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits |
494 | 1 | * affect how to interpret topology information in other cpuid leaves. |
495 | 1 | */ |
496 | 1 | __set_bit(X86_FEATURE_HTT, max_fs); |
497 | 1 | __set_bit(X86_FEATURE_X2APIC, max_fs); |
498 | 1 | __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); |
499 | 1 | |
500 | 1 | /* |
501 | 1 | * 32bit PV domains can't use any Long Mode features, and cannot use |
502 | 1 | * SYSCALL on non-AMD hardware. |
503 | 1 | */ |
504 | 1 | if ( is_pv_32bit_domain(d) ) |
505 | 0 | { |
506 | 0 | __clear_bit(X86_FEATURE_LM, max_fs); |
507 | 0 | if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) |
508 | 0 | __clear_bit(X86_FEATURE_SYSCALL, max_fs); |
509 | 0 | } |
510 | 1 | |
511 | 1 | /* |
512 | 1 | * ITSC is masked by default (so domains are safe to migrate), but a |
513 | 1 | * toolstack which has configured disable_migrate or vTSC for a domain may |
514 | 1 | * safely select it, and needs a way of doing so. |
515 | 1 | */ |
516 | 1 | if ( cpu_has_itsc && (d->disable_migrate || d->arch.vtsc) ) |
517 | 1 | __set_bit(X86_FEATURE_ITSC, max_fs); |
518 | 1 | |
519 | 1 | /* Clamp the toolstacks choices to reality. */ |
520 | 11 | for ( i = 0; i < ARRAY_SIZE(fs); i++ ) |
521 | 10 | fs[i] &= max_fs[i]; |
522 | 1 | |
523 | 1 | if ( p->basic.max_leaf < XSTATE_CPUID ) |
524 | 0 | __clear_bit(X86_FEATURE_XSAVE, fs); |
525 | 1 | |
526 | 1 | sanitise_featureset(fs); |
527 | 1 | |
528 | 1 | /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ |
529 | 1 | fs[FEATURESET_7b0] &= ~special_features[FEATURESET_7b0]; |
530 | 1 | fs[FEATURESET_7b0] |= (host_cpuid_policy.feat._7b0 & |
531 | 1 | special_features[FEATURESET_7b0]); |
532 | 1 | |
533 | 1 | cpuid_featureset_to_policy(fs, p); |
534 | 1 | |
535 | 1 | /* Pass host cacheline size through to guests. */ |
536 | 1 | p->basic.clflush_size = max->basic.clflush_size; |
537 | 1 | |
538 | 1 | p->extd.maxphysaddr = min(p->extd.maxphysaddr, max->extd.maxphysaddr); |
539 | 1 | p->extd.maxphysaddr = min_t(uint8_t, p->extd.maxphysaddr, |
540 | 1 | paging_max_paddr_bits(d)); |
541 | 1 | p->extd.maxphysaddr = max_t(uint8_t, p->extd.maxphysaddr, |
542 | 1 | (p->basic.pae || p->basic.pse36) ? 36 : 32); |
543 | 1 | |
544 | 1 | p->extd.maxlinaddr = p->extd.lm ? 48 : 32; |
545 | 1 | |
546 | 1 | recalculate_xstate(p); |
547 | 1 | recalculate_misc(p); |
548 | 1 | |
549 | 5 | for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) |
550 | 5 | { |
551 | 5 | if ( p->cache.subleaf[i].type >= 1 && |
552 | 4 | p->cache.subleaf[i].type <= 3 ) |
553 | 4 | { |
554 | 4 | /* Subleaf has a valid cache type. Zero reserved fields. */ |
555 | 4 | p->cache.raw[i].a &= 0xffffc3ffu; |
556 | 4 | p->cache.raw[i].d &= 0x00000007u; |
557 | 4 | } |
558 | 5 | else |
559 | 1 | { |
560 | 1 | /* Subleaf is not valid. Zero the rest of the union. */ |
561 | 1 | zero_leaves(p->cache.raw, i, ARRAY_SIZE(p->cache.raw) - 1); |
562 | 1 | break; |
563 | 1 | } |
564 | 5 | } |
565 | 1 | |
566 | 1 | if ( !p->extd.svm ) |
567 | 1 | p->extd.raw[0xa] = EMPTY_LEAF; |
568 | 1 | |
569 | 1 | if ( !p->extd.page1gb ) |
570 | 0 | p->extd.raw[0x19] = EMPTY_LEAF; |
571 | 1 | |
572 | 1 | if ( p->extd.lwp ) |
573 | 0 | p->extd.raw[0x1c].d &= max->extd.raw[0x1c].d; |
574 | 1 | else |
575 | 1 | p->extd.raw[0x1c] = EMPTY_LEAF; |
576 | 1 | } |
577 | | |
578 | | int init_domain_cpuid_policy(struct domain *d) |
579 | 1 | { |
580 | 1 | d->arch.cpuid = xmalloc(struct cpuid_policy); |
581 | 1 | |
582 | 1 | if ( !d->arch.cpuid ) |
583 | 0 | return -ENOMEM; |
584 | 1 | |
585 | 1 | *d->arch.cpuid = is_pv_domain(d) |
586 | 1 | ? pv_max_cpuid_policy : hvm_max_cpuid_policy; |
587 | 1 | |
588 | 1 | if ( d->disable_migrate ) |
589 | 1 | d->arch.cpuid->extd.itsc = cpu_has_itsc; |
590 | 1 | |
591 | 1 | recalculate_cpuid_policy(d); |
592 | 1 | |
593 | 1 | return 0; |
594 | 1 | } |
595 | | |
596 | | void guest_cpuid(const struct vcpu *v, uint32_t leaf, |
597 | | uint32_t subleaf, struct cpuid_leaf *res) |
598 | 2.66k | { |
599 | 2.66k | const struct domain *d = v->domain; |
600 | 2.66k | const struct cpuid_policy *p = d->arch.cpuid; |
601 | 2.66k | |
602 | 2.66k | *res = EMPTY_LEAF; |
603 | 2.66k | |
604 | 2.66k | /* |
605 | 2.66k | * First pass: |
606 | 2.66k | * - Perform max_leaf/subleaf calculations. Out-of-range leaves return |
607 | 2.66k | * all zeros, following the AMD model. |
608 | 2.66k | * - Fill in *res for leaves no longer handled on the legacy path. |
609 | 2.66k | * - Dispatch the virtualised leaves to their respective handlers. |
610 | 2.66k | */ |
611 | 2.66k | switch ( leaf ) |
612 | 2.66k | { |
613 | 2.65k | case 0 ... CPUID_GUEST_NR_BASIC - 1: |
614 | 2.65k | ASSERT(p->basic.max_leaf < ARRAY_SIZE(p->basic.raw)); |
615 | 2.65k | if ( leaf > min_t(uint32_t, p->basic.max_leaf, |
616 | 2.65k | ARRAY_SIZE(p->basic.raw) - 1) ) |
617 | 0 | return; |
618 | 2.65k | |
619 | 2.65k | switch ( leaf ) |
620 | 2.65k | { |
621 | 9 | case 0x4: |
622 | 9 | if ( subleaf >= ARRAY_SIZE(p->cache.raw) ) |
623 | 0 | return; |
624 | 9 | |
625 | 9 | *res = p->cache.raw[subleaf]; |
626 | 9 | break; |
627 | 9 | |
628 | 1.04k | case 0x7: |
629 | 1.04k | ASSERT(p->feat.max_subleaf < ARRAY_SIZE(p->feat.raw)); |
630 | 1.04k | if ( subleaf > min_t(uint32_t, p->feat.max_subleaf, |
631 | 1.04k | ARRAY_SIZE(p->feat.raw) - 1) ) |
632 | 0 | return; |
633 | 1.04k | |
634 | 1.04k | *res = p->feat.raw[subleaf]; |
635 | 1.04k | break; |
636 | 1.04k | |
637 | 20 | case XSTATE_CPUID: |
638 | 20 | if ( !p->basic.xsave || subleaf >= ARRAY_SIZE(p->xstate.raw) ) |
639 | 0 | return; |
640 | 20 | |
641 | 20 | *res = p->xstate.raw[subleaf]; |
642 | 20 | break; |
643 | 20 | |
644 | 1.57k | default: |
645 | 1.57k | *res = p->basic.raw[leaf]; |
646 | 1.57k | break; |
647 | 2.65k | } |
648 | 2.65k | break; |
649 | 2.65k | |
650 | 9 | case 0x40000000 ... 0x400000ff: |
651 | 9 | if ( is_viridian_domain(d) ) |
652 | 0 | return cpuid_viridian_leaves(v, leaf, subleaf, res); |
653 | 9 | |
654 | 9 | /* |
655 | 9 | * Fallthrough. |
656 | 9 | * |
657 | 9 | * Intel reserve up until 0x4fffffff for hypervisor use. AMD reserve |
658 | 9 | * only until 0x400000ff, but we already use double that. |
659 | 9 | */ |
660 | 9 | case 0x40000100 ... 0x400001ff: |
661 | 9 | return cpuid_hypervisor_leaves(v, leaf, subleaf, res); |
662 | 9 | |
663 | 8 | case 0x80000000 ... 0x80000000 + CPUID_GUEST_NR_EXTD - 1: |
664 | 8 | ASSERT((p->extd.max_leaf & 0xffff) < ARRAY_SIZE(p->extd.raw)); |
665 | 8 | if ( (leaf & 0xffff) > min_t(uint32_t, p->extd.max_leaf & 0xffff, |
666 | 8 | ARRAY_SIZE(p->extd.raw) - 1) ) |
667 | 0 | return; |
668 | 8 | |
669 | 8 | *res = p->extd.raw[leaf & 0xffff]; |
670 | 8 | break; |
671 | 8 | |
672 | 0 | default: |
673 | 0 | return; |
674 | 2.66k | } |
675 | 2.66k | |
676 | 2.66k | /* |
677 | 2.66k | * Skip dynamic adjustments if we are in the wrong context. |
678 | 2.66k | * |
679 | 2.66k | * All dynamic adjustments depends on current register state, which will |
680 | 2.66k | * be stale if the vcpu is running elsewhere. It is simpler, quicker, and |
681 | 2.66k | * more reliable for the caller to do nothing (consistently) than to hand |
682 | 2.66k | * back stale data which it can't use safely. |
683 | 2.66k | */ |
684 | 2.65k | if ( v != current ) |
685 | 0 | return; |
686 | 2.65k | |
687 | 2.65k | /* |
688 | 2.65k | * Second pass: |
689 | 2.65k | * - Dynamic adjustments |
690 | 2.65k | */ |
691 | 2.65k | switch ( leaf ) |
692 | 2.65k | { |
693 | 0 | const struct cpu_user_regs *regs; |
694 | 0 |
|
695 | 531 | case 0x1: |
696 | 531 | /* TODO: Rework topology logic. */ |
697 | 531 | res->b &= 0x00ffffffu; |
698 | 531 | if ( is_hvm_domain(d) ) |
699 | 531 | res->b |= (v->vcpu_id * 2) << 24; |
700 | 531 | |
701 | 531 | /* TODO: Rework vPMU control in terms of toolstack choices. */ |
702 | 531 | if ( vpmu_available(v) && |
703 | 0 | vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) ) |
704 | 0 | { |
705 | 0 | res->d |= cpufeat_mask(X86_FEATURE_DS); |
706 | 0 | if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) |
707 | 0 | res->c |= cpufeat_mask(X86_FEATURE_DTES64); |
708 | 0 | if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) |
709 | 0 | res->c |= cpufeat_mask(X86_FEATURE_DSCPL); |
710 | 0 | } |
711 | 531 | |
712 | 531 | if ( is_hvm_domain(d) ) |
713 | 531 | { |
714 | 531 | /* OSXSAVE clear in policy. Fast-forward CR4 back in. */ |
715 | 531 | if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE ) |
716 | 530 | res->c |= cpufeat_mask(X86_FEATURE_OSXSAVE); |
717 | 531 | } |
718 | 531 | else /* PV domain */ |
719 | 0 | { |
720 | 0 | regs = guest_cpu_user_regs(); |
721 | 0 |
|
722 | 0 | /* |
723 | 0 | * !!! OSXSAVE handling for PV guests is non-architectural !!! |
724 | 0 | * |
725 | 0 | * Architecturally, the correct code here is simply: |
726 | 0 | * |
727 | 0 | * if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE ) |
728 | 0 | * c |= cpufeat_mask(X86_FEATURE_OSXSAVE); |
729 | 0 | * |
730 | 0 | * However because of bugs in Xen (before c/s bd19080b, Nov 2010, |
731 | 0 | * the XSAVE cpuid flag leaked into guests despite the feature not |
732 | 0 | * being available for use), buggy workarounds where introduced to |
733 | 0 | * Linux (c/s 947ccf9c, also Nov 2010) which relied on the fact |
734 | 0 | * that Xen also incorrectly leaked OSXSAVE into the guest. |
735 | 0 | * |
736 | 0 | * Furthermore, providing architectural OSXSAVE behaviour to a |
737 | 0 | * many Linux PV guests triggered a further kernel bug when the |
738 | 0 | * fpu code observes that XSAVEOPT is available, assumes that |
739 | 0 | * xsave state had been set up for the task, and follows a wild |
740 | 0 | * pointer. |
741 | 0 | * |
742 | 0 | * Older Linux PVOPS kernels however do require architectural |
743 | 0 | * behaviour. They observe Xen's leaked OSXSAVE and assume they |
744 | 0 | * can already use XSETBV, dying with a #UD because the shadowed |
745 | 0 | * CR4.OSXSAVE is clear. This behaviour has been adjusted in all |
746 | 0 | * observed cases via stable backports of the above changeset. |
747 | 0 | * |
748 | 0 | * Therefore, the leaking of Xen's OSXSAVE setting has become a |
749 | 0 | * defacto part of the PV ABI and can't reasonably be corrected. |
750 | 0 | * It can however be restricted to only the enlightened CPUID |
751 | 0 | * view, as seen by the guest kernel. |
752 | 0 | * |
753 | 0 | * The following situations and logic now applies: |
754 | 0 | * |
755 | 0 | * - Hardware without CPUID faulting support and native CPUID: |
756 | 0 | * There is nothing Xen can do here. The hosts XSAVE flag will |
757 | 0 | * leak through and Xen's OSXSAVE choice will leak through. |
758 | 0 | * |
759 | 0 | * In the case that the guest kernel has not set up OSXSAVE, only |
760 | 0 | * SSE will be set in xcr0, and guest userspace can't do too much |
761 | 0 | * damage itself. |
762 | 0 | * |
763 | 0 | * - Enlightened CPUID or CPUID faulting available: |
764 | 0 | * Xen can fully control what is seen here. Guest kernels need |
765 | 0 | * to see the leaked OSXSAVE via the enlightened path, but |
766 | 0 | * guest userspace and the native is given architectural |
767 | 0 | * behaviour. |
768 | 0 | * |
769 | 0 | * Emulated vs Faulted CPUID is distinguised based on whether a |
770 | 0 | * #UD or #GP is currently being serviced. |
771 | 0 | */ |
772 | 0 | /* OSXSAVE clear in policy. Fast-forward CR4 back in. */ |
773 | 0 | if ( (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) || |
774 | 0 | (regs->entry_vector == TRAP_invalid_op && |
775 | 0 | guest_kernel_mode(v, regs) && |
776 | 0 | (read_cr4() & X86_CR4_OSXSAVE)) ) |
777 | 0 | res->c |= cpufeat_mask(X86_FEATURE_OSXSAVE); |
778 | 0 |
|
779 | 0 | /* |
780 | 0 | * At the time of writing, a PV domain is the only viable option |
781 | 0 | * for Dom0. Several interactions between dom0 and Xen for real |
782 | 0 | * hardware setup have unfortunately been implemented based on |
783 | 0 | * state which incorrectly leaked into dom0. |
784 | 0 | * |
785 | 0 | * These leaks are retained for backwards compatibility, but |
786 | 0 | * restricted to the hardware domains kernel only. |
787 | 0 | */ |
788 | 0 | if ( is_hardware_domain(d) && guest_kernel_mode(v, regs) ) |
789 | 0 | { |
790 | 0 | /* |
791 | 0 | * MONITOR never leaked into PV guests, as PV guests cannot |
792 | 0 | * use the MONITOR/MWAIT instructions. As such, they require |
793 | 0 | * the feature to not being present in emulated CPUID. |
794 | 0 | * |
795 | 0 | * Modern PVOPS Linux try to be cunning and use native CPUID |
796 | 0 | * to see if the hardware actually supports MONITOR, and by |
797 | 0 | * extension, deep C states. |
798 | 0 | * |
799 | 0 | * If the feature is seen, deep-C state information is |
800 | 0 | * obtained from the DSDT and handed back to Xen via the |
801 | 0 | * XENPF_set_processor_pminfo hypercall. |
802 | 0 | * |
803 | 0 | * This mechanism is incompatible with an HVM-based hardware |
804 | 0 | * domain, and also with CPUID Faulting. |
805 | 0 | * |
806 | 0 | * Luckily, Xen can be just as 'cunning', and distinguish an |
807 | 0 | * emulated CPUID from a faulted CPUID by whether a #UD or #GP |
808 | 0 | * fault is currently being serviced. Yuck... |
809 | 0 | */ |
810 | 0 | if ( cpu_has_monitor && regs->entry_vector == TRAP_gp_fault ) |
811 | 0 | res->c |= cpufeat_mask(X86_FEATURE_MONITOR); |
812 | 0 |
|
813 | 0 | /* |
814 | 0 | * While MONITOR never leaked into PV guests, EIST always used |
815 | 0 | * to. |
816 | 0 | * |
817 | 0 | * Modern PVOPS Linux will only parse P state information from |
818 | 0 | * the DSDT and return it to Xen if EIST is seen in the |
819 | 0 | * emulated CPUID information. |
820 | 0 | */ |
821 | 0 | if ( cpu_has_eist ) |
822 | 0 | res->c |= cpufeat_mask(X86_FEATURE_EIST); |
823 | 0 | } |
824 | 0 | } |
825 | 531 | goto common_leaf1_adjustments; |
826 | 0 |
|
827 | 0 | case 0x5: |
828 | 0 | /* |
829 | 0 | * Leak the hardware MONITOR leaf under the same conditions that the |
830 | 0 | * MONITOR feature flag is leaked. See above for details. |
831 | 0 | */ |
832 | 0 | regs = guest_cpu_user_regs(); |
833 | 0 | if ( is_pv_domain(d) && is_hardware_domain(d) && |
834 | 0 | guest_kernel_mode(v, regs) && cpu_has_monitor && |
835 | 0 | regs->entry_vector == TRAP_gp_fault ) |
836 | 0 | *res = raw_cpuid_policy.basic.raw[leaf]; |
837 | 0 | break; |
838 | 0 |
|
839 | 1.04k | case 0x7: |
840 | 1.04k | switch ( subleaf ) |
841 | 1.04k | { |
842 | 1.04k | case 0: |
843 | 1.04k | /* OSPKE clear in policy. Fast-forward CR4 back in. */ |
844 | 1.04k | if ( (is_pv_domain(d) |
845 | 0 | ? v->arch.pv_vcpu.ctrlreg[4] |
846 | 1.04k | : v->arch.hvm_vcpu.guest_cr[4]) & X86_CR4_PKE ) |
847 | 0 | res->c |= cpufeat_mask(X86_FEATURE_OSPKE); |
848 | 1.04k | break; |
849 | 1.04k | } |
850 | 1.04k | break; |
851 | 1.04k | |
852 | 0 | case 0xa: |
853 | 0 | /* TODO: Rework vPMU control in terms of toolstack choices. */ |
854 | 0 | if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || |
855 | 0 | !vpmu_available(v) ) |
856 | 0 | *res = EMPTY_LEAF; |
857 | 0 | else |
858 | 0 | { |
859 | 0 | /* Report at most v3 since that's all we currently emulate. */ |
860 | 0 | if ( (res->a & 0xff) > 3 ) |
861 | 0 | res->a = (res->a & ~0xff) | 3; |
862 | 0 | } |
863 | 0 | break; |
864 | 1.04k | |
865 | 1 | case 0xb: |
866 | 1 | /* |
867 | 1 | * In principle, this leaf is Intel-only. In practice, it is tightly |
868 | 1 | * coupled with x2apic, and we offer an x2apic-capable APIC emulation |
869 | 1 | * to guests on AMD hardware as well. |
870 | 1 | * |
871 | 1 | * TODO: Rework topology logic. |
872 | 1 | */ |
873 | 1 | if ( p->basic.x2apic ) |
874 | 1 | { |
875 | 1 | *(uint8_t *)&res->c = subleaf; |
876 | 1 | |
877 | 1 | /* Fix the x2APIC identifier. */ |
878 | 1 | res->d = v->vcpu_id * 2; |
879 | 1 | } |
880 | 1 | break; |
881 | 1.04k | |
882 | 20 | case XSTATE_CPUID: |
883 | 20 | switch ( subleaf ) |
884 | 20 | { |
885 | 2 | case 1: |
886 | 2 | if ( p->xstate.xsaves ) |
887 | 0 | { |
888 | 0 | /* |
889 | 0 | * TODO: Figure out what to do for XSS state. VT-x manages |
890 | 0 | * host vs guest MSR_XSS automatically, so as soon as we start |
891 | 0 | * supporting any XSS states, the wrong XSS will be in |
892 | 0 | * context. |
893 | 0 | */ |
894 | 0 | BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); |
895 | 0 |
|
896 | 0 | /* |
897 | 0 | * Read CPUID[0xD,0/1].EBX from hardware. They vary with |
898 | 0 | * enabled XSTATE, and appropraite XCR0|XSS are in context. |
899 | 0 | */ |
900 | 17 | case 0: |
901 | 17 | res->b = cpuid_count_ebx(leaf, subleaf); |
902 | 17 | } |
903 | 19 | break; |
904 | 20 | } |
905 | 20 | break; |
906 | 20 | |
907 | 1 | case 0x80000001: |
908 | 1 | /* SYSCALL is hidden outside of long mode on Intel. */ |
909 | 1 | if ( p->x86_vendor == X86_VENDOR_INTEL && |
910 | 1 | is_hvm_domain(d) && !hvm_long_mode_active(v) ) |
911 | 0 | res->d &= ~cpufeat_mask(X86_FEATURE_SYSCALL); |
912 | 1 | |
913 | 532 | common_leaf1_adjustments: |
914 | 532 | if ( is_hvm_domain(d) ) |
915 | 532 | { |
916 | 532 | /* Fast-forward MSR_APIC_BASE.EN. */ |
917 | 532 | if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) |
918 | 0 | res->d &= ~cpufeat_bit(X86_FEATURE_APIC); |
919 | 532 | |
920 | 532 | /* |
921 | 532 | * PSE36 is not supported in shadow mode. This bit should be |
922 | 532 | * clear in hvm_shadow_featuremask[]. |
923 | 532 | * |
924 | 532 | * However, an unspecified version of Hyper-V from 2011 refuses to |
925 | 532 | * start as the "cpu does not provide required hw features" if it |
926 | 532 | * can't see PSE36. |
927 | 532 | * |
928 | 532 | * As a workaround, leak the toolstack-provided PSE36 value into a |
929 | 532 | * shadow guest if the guest is already using PAE paging (and |
930 | 532 | * won't care about reverting back to PSE paging). Otherwise, |
931 | 532 | * knoble it, so a 32bit guest doesn't get the impression that it |
932 | 532 | * could try to use PSE36 paging. |
933 | 532 | */ |
934 | 532 | if ( !hap_enabled(d) && !hvm_pae_enabled(v) ) |
935 | 0 | res->d &= ~cpufeat_mask(X86_FEATURE_PSE36); |
936 | 532 | } |
937 | 532 | else /* PV domain */ |
938 | 0 | { |
939 | 0 | /* |
940 | 0 | * MTRR used to unconditionally leak into PV guests. They cannot |
941 | 0 | * MTRR infrastructure at all, and shouldn't be able to see the |
942 | 0 | * feature. |
943 | 0 | * |
944 | 0 | * Modern PVOPS Linux self-clobbers the MTRR feature, to avoid |
945 | 0 | * trying to use the associated MSRs. Xenolinux-based PV dom0's |
946 | 0 | * however use the MTRR feature as an indication of the presence |
947 | 0 | * of the XENPF_{add,del,read}_memtype hypercalls. |
948 | 0 | */ |
949 | 0 | if ( is_hardware_domain(d) && cpu_has_mtrr && |
950 | 0 | guest_kernel_mode(v, guest_cpu_user_regs()) ) |
951 | 0 | res->d |= cpufeat_mask(X86_FEATURE_MTRR); |
952 | 0 | } |
953 | 532 | break; |
954 | 1 | |
955 | 0 | case 0x8000001c: |
956 | 0 | if ( (v->arch.xcr0 & XSTATE_LWP) && cpu_has_svm ) |
957 | 0 | /* Turn on available bit and other features specified in lwp_cfg. */ |
958 | 0 | res->a = (res->d & v->arch.hvm_svm.guest_lwp_cfg) | 1; |
959 | 0 | break; |
960 | 2.65k | } |
961 | 2.65k | } |
962 | | |
963 | | static void __init __maybe_unused build_assertions(void) |
964 | 0 | { |
965 | 0 | BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS); |
966 | 0 | BUILD_BUG_ON(ARRAY_SIZE(special_features) != FSCAPINTS); |
967 | 0 | BUILD_BUG_ON(ARRAY_SIZE(pv_featuremask) != FSCAPINTS); |
968 | 0 | BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_featuremask) != FSCAPINTS); |
969 | 0 | BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_featuremask) != FSCAPINTS); |
970 | 0 | BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS); |
971 | 0 |
|
972 | 0 | /* Find some more clever allocation scheme if this trips. */ |
973 | 0 | BUILD_BUG_ON(sizeof(struct cpuid_policy) > PAGE_SIZE); |
974 | 0 |
|
975 | 0 | BUILD_BUG_ON(sizeof(raw_cpuid_policy.basic) != |
976 | 0 | sizeof(raw_cpuid_policy.basic.raw)); |
977 | 0 | BUILD_BUG_ON(sizeof(raw_cpuid_policy.feat) != |
978 | 0 | sizeof(raw_cpuid_policy.feat.raw)); |
979 | 0 | BUILD_BUG_ON(sizeof(raw_cpuid_policy.xstate) != |
980 | 0 | sizeof(raw_cpuid_policy.xstate.raw)); |
981 | 0 | BUILD_BUG_ON(sizeof(raw_cpuid_policy.extd) != |
982 | 0 | sizeof(raw_cpuid_policy.extd.raw)); |
983 | 0 | } |
984 | | |
985 | | /* |
986 | | * Local variables: |
987 | | * mode: C |
988 | | * c-file-style: "BSD" |
989 | | * c-basic-offset: 4 |
990 | | * tab-width: 4 |
991 | | * indent-tabs-mode: nil |
992 | | * End: |
993 | | */ |