/root/src/xen/xen/arch/x86/cpu/intel.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <xen/init.h> |
2 | | #include <xen/kernel.h> |
3 | | #include <xen/string.h> |
4 | | #include <xen/bitops.h> |
5 | | #include <xen/smp.h> |
6 | | #include <asm/processor.h> |
7 | | #include <asm/msr.h> |
8 | | #include <asm/uaccess.h> |
9 | | #include <asm/mpspec.h> |
10 | | #include <asm/apic.h> |
11 | | #include <asm/i387.h> |
12 | | #include <mach_apic.h> |
13 | | #include <asm/hvm/support.h> |
14 | | #include <asm/setup.h> |
15 | | |
16 | | #include "cpu.h" |
17 | | |
18 | 12 | #define select_idle_routine(x) ((void)0) |
19 | | |
20 | | static bool __init probe_intel_cpuid_faulting(void) |
21 | 1 | { |
22 | 1 | uint64_t x; |
23 | 1 | |
24 | 1 | if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, x) || |
25 | 1 | !(x & MSR_PLATFORM_INFO_CPUID_FAULTING)) |
26 | 0 | return 0; |
27 | 1 | |
28 | 1 | expected_levelling_cap |= LCAP_faulting; |
29 | 1 | levelling_caps |= LCAP_faulting; |
30 | 1 | setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING); |
31 | 1 | return 1; |
32 | 1 | } |
33 | | |
34 | | DEFINE_PER_CPU(bool, cpuid_faulting_enabled); |
35 | | |
36 | | static void set_cpuid_faulting(bool enable) |
37 | 37.0k | { |
38 | 37.0k | bool *this_enabled = &this_cpu(cpuid_faulting_enabled); |
39 | 37.0k | uint32_t hi, lo; |
40 | 37.0k | |
41 | 37.0k | ASSERT(cpu_has_cpuid_faulting); |
42 | 37.0k | |
43 | 37.0k | if (*this_enabled == enable) |
44 | 37.0k | return; |
45 | 37.0k | |
46 | 18.4E | rdmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi); |
47 | 18.4E | lo &= ~MSR_MISC_FEATURES_CPUID_FAULTING; |
48 | 18.4E | if (enable) |
49 | 0 | lo |= MSR_MISC_FEATURES_CPUID_FAULTING; |
50 | 18.4E | wrmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi); |
51 | 18.4E | |
52 | 18.4E | *this_enabled = enable; |
53 | 18.4E | } |
54 | | |
55 | | /* |
56 | | * Set caps in expected_levelling_cap, probe a specific masking MSR, and set |
57 | | * caps in levelling_caps if it is found, or clobber the MSR index if missing. |
58 | | * If preset, reads the default value into msr_val. |
59 | | */ |
60 | | static uint64_t __init _probe_mask_msr(unsigned int *msr, uint64_t caps) |
61 | 0 | { |
62 | 0 | uint64_t val = 0; |
63 | 0 |
|
64 | 0 | expected_levelling_cap |= caps; |
65 | 0 |
|
66 | 0 | if (rdmsr_safe(*msr, val) || wrmsr_safe(*msr, val)) |
67 | 0 | *msr = 0; |
68 | 0 | else |
69 | 0 | levelling_caps |= caps; |
70 | 0 |
|
71 | 0 | return val; |
72 | 0 | } |
73 | | |
74 | | /* Indices of the masking MSRs, or 0 if unavailable. */ |
75 | | static unsigned int __read_mostly msr_basic, __read_mostly msr_ext, |
76 | | __read_mostly msr_xsave; |
77 | | |
78 | | /* |
79 | | * Probe for the existance of the expected masking MSRs. They might easily |
80 | | * not be available if Xen is running virtualised. |
81 | | */ |
82 | | static void __init probe_masking_msrs(void) |
83 | 0 | { |
84 | 0 | const struct cpuinfo_x86 *c = &boot_cpu_data; |
85 | 0 | unsigned int exp_msr_basic, exp_msr_ext, exp_msr_xsave; |
86 | 0 |
|
87 | 0 | /* Only family 6 supports this feature. */ |
88 | 0 | if (c->x86 != 6) |
89 | 0 | return; |
90 | 0 |
|
91 | 0 | switch (c->x86_model) { |
92 | 0 | case 0x17: /* Yorkfield, Wolfdale, Penryn, Harpertown(DP) */ |
93 | 0 | case 0x1d: /* Dunnington(MP) */ |
94 | 0 | msr_basic = MSR_INTEL_MASK_V1_CPUID1; |
95 | 0 | break; |
96 | 0 |
|
97 | 0 | case 0x1a: /* Bloomfield, Nehalem-EP(Gainestown) */ |
98 | 0 | case 0x1e: /* Clarksfield, Lynnfield, Jasper Forest */ |
99 | 0 | case 0x1f: /* Something Nehalem-based - perhaps Auburndale/Havendale? */ |
100 | 0 | case 0x25: /* Arrandale, Clarksdale */ |
101 | 0 | case 0x2c: /* Gulftown, Westmere-EP */ |
102 | 0 | case 0x2e: /* Nehalem-EX(Beckton) */ |
103 | 0 | case 0x2f: /* Westmere-EX */ |
104 | 0 | msr_basic = MSR_INTEL_MASK_V2_CPUID1; |
105 | 0 | msr_ext = MSR_INTEL_MASK_V2_CPUID80000001; |
106 | 0 | break; |
107 | 0 |
|
108 | 0 | case 0x2a: /* SandyBridge */ |
109 | 0 | case 0x2d: /* SandyBridge-E, SandyBridge-EN, SandyBridge-EP */ |
110 | 0 | msr_basic = MSR_INTEL_MASK_V3_CPUID1; |
111 | 0 | msr_ext = MSR_INTEL_MASK_V3_CPUID80000001; |
112 | 0 | msr_xsave = MSR_INTEL_MASK_V3_CPUIDD_01; |
113 | 0 | break; |
114 | 0 | } |
115 | 0 |
|
116 | 0 | exp_msr_basic = msr_basic; |
117 | 0 | exp_msr_ext = msr_ext; |
118 | 0 | exp_msr_xsave = msr_xsave; |
119 | 0 |
|
120 | 0 | if (msr_basic) |
121 | 0 | cpuidmask_defaults._1cd = _probe_mask_msr(&msr_basic, LCAP_1cd); |
122 | 0 |
|
123 | 0 | if (msr_ext) |
124 | 0 | cpuidmask_defaults.e1cd = _probe_mask_msr(&msr_ext, LCAP_e1cd); |
125 | 0 |
|
126 | 0 | if (msr_xsave) |
127 | 0 | cpuidmask_defaults.Da1 = _probe_mask_msr(&msr_xsave, LCAP_Da1); |
128 | 0 |
|
129 | 0 | /* |
130 | 0 | * Don't bother warning about a mismatch if virtualised. These MSRs |
131 | 0 | * are not architectural and almost never virtualised. |
132 | 0 | */ |
133 | 0 | if ((expected_levelling_cap == levelling_caps) || |
134 | 0 | cpu_has_hypervisor) |
135 | 0 | return; |
136 | 0 |
|
137 | 0 | printk(XENLOG_WARNING "Mismatch between expected (%#x) " |
138 | 0 | "and real (%#x) levelling caps: missing %#x\n", |
139 | 0 | expected_levelling_cap, levelling_caps, |
140 | 0 | (expected_levelling_cap ^ levelling_caps) & levelling_caps); |
141 | 0 | printk(XENLOG_WARNING "Fam %#x, model %#x expected (%#x/%#x/%#x), " |
142 | 0 | "got (%#x/%#x/%#x)\n", c->x86, c->x86_model, |
143 | 0 | exp_msr_basic, exp_msr_ext, exp_msr_xsave, |
144 | 0 | msr_basic, msr_ext, msr_xsave); |
145 | 0 | printk(XENLOG_WARNING |
146 | 0 | "If not running virtualised, please report a bug\n"); |
147 | 0 | } |
148 | | |
149 | | /* |
150 | | * Context switch levelling state to the next domain. A parameter of NULL is |
151 | | * used to context switch to the default host state (by the cpu bringup-code, |
152 | | * crash path, etc). |
153 | | */ |
154 | | static void intel_ctxt_switch_levelling(const struct vcpu *next) |
155 | 37.0k | { |
156 | 37.0k | struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); |
157 | 37.0k | const struct domain *nextd = next ? next->domain : NULL; |
158 | 37.0k | const struct cpuidmasks *masks; |
159 | 37.0k | |
160 | 37.0k | if (cpu_has_cpuid_faulting) { |
161 | 37.0k | /* |
162 | 37.0k | * We *should* be enabling faulting for the control domain. |
163 | 37.0k | * |
164 | 37.0k | * Unfortunately, the domain builder (having only ever been a |
165 | 37.0k | * PV guest) expects to be able to see host cpuid state in a |
166 | 37.0k | * native CPUID instruction, to correctly build a CPUID policy |
167 | 37.0k | * for HVM guests (notably the xstate leaves). |
168 | 37.0k | * |
169 | 37.0k | * This logic is fundimentally broken for HVM toolstack |
170 | 37.0k | * domains, and faulting causes PV guests to behave like HVM |
171 | 37.0k | * guests from their point of view. |
172 | 37.0k | * |
173 | 37.0k | * Future development plans will move responsibility for |
174 | 37.0k | * generating the maximum full cpuid policy into Xen, at which |
175 | 37.0k | * this problem will disappear. |
176 | 37.0k | */ |
177 | 37.0k | set_cpuid_faulting(nextd && !is_control_domain(nextd) && |
178 | 0 | (is_pv_domain(nextd) || |
179 | 0 | next->arch.msr->misc_features_enables.cpuid_faulting)); |
180 | 37.0k | return; |
181 | 37.0k | } |
182 | 37.0k | |
183 | 18.4E | masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) |
184 | 18.4E | ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; |
185 | 18.4E | |
186 | 18.4E | if (msr_basic) { |
187 | 0 | uint64_t val = masks->_1cd; |
188 | 0 |
|
189 | 0 | /* |
190 | 0 | * OSXSAVE defaults to 1, which causes fast-forwarding of |
191 | 0 | * Xen's real setting. Clobber it if disabled by the guest |
192 | 0 | * kernel. |
193 | 0 | */ |
194 | 0 | if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) && |
195 | 0 | !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE)) |
196 | 0 | val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE); |
197 | 0 |
|
198 | 0 | if (unlikely(these_masks->_1cd != val)) { |
199 | 0 | wrmsrl(msr_basic, val); |
200 | 0 | these_masks->_1cd = val; |
201 | 0 | } |
202 | 0 | } |
203 | 18.4E | |
204 | 18.4E | #define LAZY(msr, field) \ |
205 | 18.4E | ({ \ |
206 | 18.4E | if (unlikely(these_masks->field != masks->field) && \ |
207 | 0 | (msr)) \ |
208 | 0 | { \ |
209 | 0 | wrmsrl((msr), masks->field); \ |
210 | 0 | these_masks->field = masks->field; \ |
211 | 0 | } \ |
212 | 18.4E | }) |
213 | 18.4E | |
214 | 18.4E | LAZY(msr_ext, e1cd); |
215 | 18.4E | LAZY(msr_xsave, Da1); |
216 | 18.4E | |
217 | 18.4E | #undef LAZY |
218 | 18.4E | } |
219 | | |
220 | | /* |
221 | | * opt_cpuid_mask_ecx/edx: cpuid.1[ecx, edx] feature mask. |
222 | | * For example, E8400[Intel Core 2 Duo Processor series] ecx = 0x0008E3FD, |
223 | | * edx = 0xBFEBFBFF when executing CPUID.EAX = 1 normally. If you want to |
224 | | * 'rev down' to E8400, you can set these values in these Xen boot parameters. |
225 | | */ |
226 | | static void __init noinline intel_init_levelling(void) |
227 | 1 | { |
228 | 1 | if (!probe_intel_cpuid_faulting()) |
229 | 0 | probe_masking_msrs(); |
230 | 1 | |
231 | 1 | if (msr_basic) { |
232 | 0 | uint32_t ecx, edx, tmp; |
233 | 0 |
|
234 | 0 | cpuid(0x00000001, &tmp, &tmp, &ecx, &edx); |
235 | 0 |
|
236 | 0 | ecx &= opt_cpuid_mask_ecx; |
237 | 0 | edx &= opt_cpuid_mask_edx; |
238 | 0 |
|
239 | 0 | /* Fast-forward bits - Must be set. */ |
240 | 0 | if (ecx & cpufeat_mask(X86_FEATURE_XSAVE)) |
241 | 0 | ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE); |
242 | 0 | edx |= cpufeat_mask(X86_FEATURE_APIC); |
243 | 0 |
|
244 | 0 | cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx; |
245 | 0 | } |
246 | 1 | |
247 | 1 | if (msr_ext) { |
248 | 0 | uint32_t ecx, edx, tmp; |
249 | 0 |
|
250 | 0 | cpuid(0x80000001, &tmp, &tmp, &ecx, &edx); |
251 | 0 |
|
252 | 0 | ecx &= opt_cpuid_mask_ext_ecx; |
253 | 0 | edx &= opt_cpuid_mask_ext_edx; |
254 | 0 |
|
255 | 0 | cpuidmask_defaults.e1cd &= ((u64)edx << 32) | ecx; |
256 | 0 | } |
257 | 1 | |
258 | 1 | if (msr_xsave) { |
259 | 0 | uint32_t eax, tmp; |
260 | 0 |
|
261 | 0 | cpuid_count(0x0000000d, 1, &eax, &tmp, &tmp, &tmp); |
262 | 0 |
|
263 | 0 | eax &= opt_cpuid_mask_xsave_eax; |
264 | 0 |
|
265 | 0 | cpuidmask_defaults.Da1 &= (~0ULL << 32) | eax; |
266 | 0 | } |
267 | 1 | |
268 | 1 | if (opt_cpu_info) { |
269 | 0 | printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps); |
270 | 0 |
|
271 | 0 | if (!cpu_has_cpuid_faulting) |
272 | 0 | printk(XENLOG_INFO |
273 | 0 | "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, " |
274 | 0 | "e1c 0x%08x, Da1 0x%08x\n", |
275 | 0 | (uint32_t)(cpuidmask_defaults._1cd >> 32), |
276 | 0 | (uint32_t)cpuidmask_defaults._1cd, |
277 | 0 | (uint32_t)(cpuidmask_defaults.e1cd >> 32), |
278 | 0 | (uint32_t)cpuidmask_defaults.e1cd, |
279 | 0 | (uint32_t)cpuidmask_defaults.Da1); |
280 | 0 | } |
281 | 1 | |
282 | 1 | if (levelling_caps) |
283 | 1 | ctxt_switch_levelling = intel_ctxt_switch_levelling; |
284 | 1 | } |
285 | | |
286 | | static void early_init_intel(struct cpuinfo_x86 *c) |
287 | 12 | { |
288 | 12 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ |
289 | 12 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
290 | 0 | c->x86_cache_alignment = 128; |
291 | 12 | |
292 | 12 | /* Unmask CPUID levels and NX if masked: */ |
293 | 12 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { |
294 | 12 | u64 misc_enable, disable; |
295 | 12 | |
296 | 12 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
297 | 12 | |
298 | 12 | disable = misc_enable & (MSR_IA32_MISC_ENABLE_LIMIT_CPUID | |
299 | 12 | MSR_IA32_MISC_ENABLE_XD_DISABLE); |
300 | 12 | if (disable) { |
301 | 0 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); |
302 | 0 | bootsym(trampoline_misc_enable_off) |= disable; |
303 | 0 | } |
304 | 12 | |
305 | 12 | if (disable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) |
306 | 0 | printk(KERN_INFO "revised cpuid level: %d\n", |
307 | 0 | cpuid_eax(0)); |
308 | 12 | if (disable & MSR_IA32_MISC_ENABLE_XD_DISABLE) { |
309 | 0 | write_efer(read_efer() | EFER_NX); |
310 | 0 | printk(KERN_INFO |
311 | 0 | "re-enabled NX (Execute Disable) protection\n"); |
312 | 0 | } |
313 | 12 | } |
314 | 12 | |
315 | 12 | /* CPUID workaround for Intel 0F33/0F34 CPU */ |
316 | 12 | if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 && |
317 | 0 | (boot_cpu_data.x86_mask == 3 || boot_cpu_data.x86_mask == 4)) |
318 | 0 | paddr_bits = 36; |
319 | 12 | |
320 | 12 | if (c == &boot_cpu_data) |
321 | 1 | intel_init_levelling(); |
322 | 12 | |
323 | 12 | intel_ctxt_switch_levelling(NULL); |
324 | 12 | } |
325 | | |
326 | | /* |
327 | | * P4 Xeon errata 037 workaround. |
328 | | * Hardware prefetcher may cause stale data to be loaded into the cache. |
329 | | * |
330 | | * Xeon 7400 erratum AAI65 (and further newer Xeons) |
331 | | * MONITOR/MWAIT may have excessive false wakeups |
332 | | */ |
333 | | static void Intel_errata_workarounds(struct cpuinfo_x86 *c) |
334 | 12 | { |
335 | 12 | unsigned long lo, hi; |
336 | 12 | |
337 | 12 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { |
338 | 0 | rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); |
339 | 0 | if ((lo & (1<<9)) == 0) { |
340 | 0 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); |
341 | 0 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); |
342 | 0 | lo |= (1<<9); /* Disable hw prefetching */ |
343 | 0 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); |
344 | 0 | } |
345 | 0 | } |
346 | 12 | |
347 | 12 | if (c->x86 == 6 && cpu_has_clflush && |
348 | 12 | (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) |
349 | 0 | __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability); |
350 | 12 | } |
351 | | |
352 | | |
353 | | /* |
354 | | * find out the number of processor cores on the die |
355 | | */ |
356 | | static int num_cpu_cores(struct cpuinfo_x86 *c) |
357 | 0 | { |
358 | 0 | unsigned int eax, ebx, ecx, edx; |
359 | 0 |
|
360 | 0 | if (c->cpuid_level < 4) |
361 | 0 | return 1; |
362 | 0 |
|
363 | 0 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ |
364 | 0 | cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); |
365 | 0 | if (eax & 0x1f) |
366 | 0 | return ((eax >> 26) + 1); |
367 | 0 | else |
368 | 0 | return 1; |
369 | 0 | } |
370 | | |
371 | | static void init_intel(struct cpuinfo_x86 *c) |
372 | 12 | { |
373 | 12 | unsigned int l2 = 0; |
374 | 12 | |
375 | 12 | /* Detect the extended topology information if available */ |
376 | 12 | detect_extended_topology(c); |
377 | 12 | |
378 | 12 | select_idle_routine(c); |
379 | 12 | l2 = init_intel_cacheinfo(c); |
380 | 12 | if (c->cpuid_level > 9) { |
381 | 12 | unsigned eax = cpuid_eax(10); |
382 | 12 | /* Check for version and the number of counters */ |
383 | 12 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) |
384 | 12 | __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); |
385 | 12 | } |
386 | 12 | |
387 | 12 | if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) ) |
388 | 0 | { |
389 | 0 | c->x86_max_cores = num_cpu_cores(c); |
390 | 0 | detect_ht(c); |
391 | 0 | } |
392 | 12 | |
393 | 12 | /* Work around errata */ |
394 | 12 | Intel_errata_workarounds(c); |
395 | 12 | |
396 | 12 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
397 | 12 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
398 | 12 | __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); |
399 | 12 | if (cpu_has(c, X86_FEATURE_ITSC)) { |
400 | 12 | __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); |
401 | 12 | __set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability); |
402 | 12 | __set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability); |
403 | 12 | } |
404 | 12 | if ( opt_arat && |
405 | 12 | ( c->cpuid_level >= 0x00000006 ) && |
406 | 12 | ( cpuid_eax(0x00000006) & (1u<<2) ) ) |
407 | 12 | __set_bit(X86_FEATURE_ARAT, c->x86_capability); |
408 | 12 | } |
409 | | |
410 | | static const struct cpu_dev intel_cpu_dev = { |
411 | | .c_vendor = "Intel", |
412 | | .c_ident = { "GenuineIntel" }, |
413 | | .c_early_init = early_init_intel, |
414 | | .c_init = init_intel, |
415 | | }; |
416 | | |
417 | | int __init intel_cpu_init(void) |
418 | 1 | { |
419 | 1 | cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; |
420 | 1 | return 0; |
421 | 1 | } |
422 | | |
423 | | // arch_initcall(intel_cpu_init); |
424 | | |