debuggers.hg

view xen/arch/x86/nmi.c @ 19826:2f9e1348aa98

x86_64: allow more vCPU-s per guest

Since the shared info layout is fixed, guests are required to use
VCPUOP_register_vcpu_info prior to booting any vCPU beyond the
traditional limit of 32.

MAX_VIRT_CPUS, being an implemetation detail of the hypervisor, is no
longer being exposed in the public headers.

The tools changes are clearly incomplete (and done only so things
would
build again), and the current state of the tools (using scalar
variables all over the place to represent vCPU bitmaps) very likely
doesn't permit booting DomU-s with more than the traditional number of
vCPU-s. Testing of the extended functionality was done with Dom0 (96
vCPU-s, as well as 128 vCPU-s out of which the kernel elected - by way
of a simple kernel side patch - to use only some, resulting in a
sparse
bitmap).

ia64 changes only to make things build, and build-tested only (and the
tools part only as far as the build would go without encountering
unrelated problems in the blktap code).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:14:16 2009 +0100 (2009-06-18)
parents a37267e43365
children 91407452cdb6
line source
1 /*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Pavel Machek and
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
14 */
16 #include <xen/config.h>
17 #include <xen/init.h>
18 #include <xen/lib.h>
19 #include <xen/mm.h>
20 #include <xen/irq.h>
21 #include <xen/delay.h>
22 #include <xen/time.h>
23 #include <xen/sched.h>
24 #include <xen/console.h>
25 #include <xen/smp.h>
26 #include <xen/keyhandler.h>
27 #include <asm/current.h>
28 #include <asm/mc146818rtc.h>
29 #include <asm/msr.h>
30 #include <asm/mpspec.h>
31 #include <asm/debugger.h>
32 #include <asm/div64.h>
33 #include <asm/apic.h>
35 unsigned int nmi_watchdog = NMI_NONE;
36 static unsigned int nmi_hz = HZ;
37 static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
38 static unsigned int nmi_p4_cccr_val;
39 static DEFINE_PER_CPU(struct timer, nmi_timer);
40 static DEFINE_PER_CPU(unsigned int, nmi_timer_ticks);
42 /*
43 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
44 * - it may be reserved by some other driver, or not
45 * - when not reserved by some other driver, it may be used for
46 * the NMI watchdog, or not
47 *
48 * This is maintained separately from nmi_active because the NMI
49 * watchdog may also be driven from the I/O APIC timer.
50 */
51 static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
52 static unsigned int lapic_nmi_owner;
53 #define LAPIC_NMI_WATCHDOG (1<<0)
54 #define LAPIC_NMI_RESERVED (1<<1)
56 /* nmi_active:
57 * +1: the lapic NMI watchdog is active, but can be disabled
58 * 0: the lapic NMI watchdog has not been set up, and cannot
59 * be enabled
60 * -1: the lapic NMI watchdog is disabled, but can be enabled
61 */
62 int nmi_active;
64 #define K7_EVNTSEL_ENABLE (1 << 22)
65 #define K7_EVNTSEL_INT (1 << 20)
66 #define K7_EVNTSEL_OS (1 << 17)
67 #define K7_EVNTSEL_USR (1 << 16)
68 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
69 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
71 #define P6_EVNTSEL0_ENABLE (1 << 22)
72 #define P6_EVNTSEL_INT (1 << 20)
73 #define P6_EVNTSEL_OS (1 << 17)
74 #define P6_EVNTSEL_USR (1 << 16)
75 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
76 #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
78 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
79 #define P4_CCCR_OVF_PMI0 (1<<26)
80 #define P4_CCCR_OVF_PMI1 (1<<27)
81 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
82 #define P4_CCCR_COMPLEMENT (1<<19)
83 #define P4_CCCR_COMPARE (1<<18)
84 #define P4_CCCR_REQUIRED (3<<16)
85 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
86 #define P4_CCCR_ENABLE (1<<12)
87 /*
88 * Set up IQ_PERFCTR0 to behave like a clock, by having IQ_CCCR0 filter
89 * CRU_ESCR0 (with any non-null event selector) through a complemented
90 * max threshold. [IA32-Vol3, Section 14.9.9]
91 */
92 #define P4_NMI_CRU_ESCR0 P4_ESCR_EVENT_SELECT(0x3F)
93 #define P4_NMI_IQ_CCCR0 \
94 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
95 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
97 int __init check_nmi_watchdog (void)
98 {
99 static unsigned int __initdata prev_nmi_count[NR_CPUS];
100 int cpu;
102 if ( !nmi_watchdog )
103 return 0;
105 printk("Testing NMI watchdog --- ");
107 for ( cpu = 0; cpu < NR_CPUS; cpu++ )
108 prev_nmi_count[cpu] = nmi_count(cpu);
109 local_irq_enable();
110 mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
112 for ( cpu = 0; cpu < NR_CPUS; cpu++ )
113 {
114 if ( !cpu_isset(cpu, cpu_callin_map) &&
115 !cpu_isset(cpu, cpu_online_map) )
116 continue;
117 if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
118 printk("CPU#%d stuck. ", cpu);
119 else
120 printk("CPU#%d okay. ", cpu);
121 }
123 printk("\n");
125 /*
126 * Now that we know it works we can reduce NMI frequency to
127 * something more reasonable; makes a difference in some configs.
128 * There's a limit to how slow we can go because writing the perfctr
129 * MSRs only sets the low 32 bits, with the top 8 bits sign-extended
130 * from those, so it's not possible to set up a delay larger than
131 * 2^31 cycles and smaller than (2^40 - 2^31) cycles.
132 * (Intel SDM, section 18.22.2)
133 */
134 if ( nmi_watchdog == NMI_LOCAL_APIC )
135 nmi_hz = max(1ul, cpu_khz >> 20);
137 return 0;
138 }
140 static void nmi_timer_fn(void *unused)
141 {
142 this_cpu(nmi_timer_ticks)++;
143 set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000));
144 }
146 static void disable_lapic_nmi_watchdog(void)
147 {
148 if (nmi_active <= 0)
149 return;
150 switch (boot_cpu_data.x86_vendor) {
151 case X86_VENDOR_AMD:
152 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
153 break;
154 case X86_VENDOR_INTEL:
155 switch (boot_cpu_data.x86) {
156 case 6:
157 if (boot_cpu_data.x86_model > 0xd)
158 break;
160 wrmsr(MSR_P6_EVNTSEL0, 0, 0);
161 break;
162 case 15:
163 if (boot_cpu_data.x86_model > 0x4)
164 break;
166 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
167 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
168 break;
169 }
170 break;
171 }
172 nmi_active = -1;
173 /* tell do_nmi() and others that we're not active any more */
174 nmi_watchdog = 0;
175 }
177 static void enable_lapic_nmi_watchdog(void)
178 {
179 if (nmi_active < 0) {
180 nmi_watchdog = NMI_LOCAL_APIC;
181 setup_apic_nmi_watchdog();
182 }
183 }
185 int reserve_lapic_nmi(void)
186 {
187 unsigned int old_owner;
189 spin_lock(&lapic_nmi_owner_lock);
190 old_owner = lapic_nmi_owner;
191 lapic_nmi_owner |= LAPIC_NMI_RESERVED;
192 spin_unlock(&lapic_nmi_owner_lock);
193 if (old_owner & LAPIC_NMI_RESERVED)
194 return -EBUSY;
195 if (old_owner & LAPIC_NMI_WATCHDOG)
196 disable_lapic_nmi_watchdog();
197 return 0;
198 }
200 void release_lapic_nmi(void)
201 {
202 unsigned int new_owner;
204 spin_lock(&lapic_nmi_owner_lock);
205 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
206 lapic_nmi_owner = new_owner;
207 spin_unlock(&lapic_nmi_owner_lock);
208 if (new_owner & LAPIC_NMI_WATCHDOG)
209 enable_lapic_nmi_watchdog();
210 }
212 #define __pminit __devinit
214 /*
215 * Activate the NMI watchdog via the local APIC.
216 * Original code written by Keith Owens.
217 */
219 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
220 {
221 unsigned int i;
223 for (i = 0; i < n; i++)
224 wrmsr(base+i, 0, 0);
225 }
227 static inline void write_watchdog_counter(const char *descr)
228 {
229 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, nmi_hz);
232 if(descr)
233 Dprintk("setting %s to -0x%08Lx\n", descr, count);
234 wrmsrl(nmi_perfctr_msr, 0 - count);
235 }
237 static void __pminit setup_k7_watchdog(void)
238 {
239 unsigned int evntsel;
241 nmi_perfctr_msr = MSR_K7_PERFCTR0;
243 clear_msr_range(MSR_K7_EVNTSEL0, 4);
244 clear_msr_range(MSR_K7_PERFCTR0, 4);
246 evntsel = K7_EVNTSEL_INT
247 | K7_EVNTSEL_OS
248 | K7_EVNTSEL_USR
249 | K7_NMI_EVENT;
251 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
252 write_watchdog_counter("K7_PERFCTR0");
253 apic_write(APIC_LVTPC, APIC_DM_NMI);
254 evntsel |= K7_EVNTSEL_ENABLE;
255 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
256 }
258 static void __pminit setup_p6_watchdog(unsigned counter)
259 {
260 unsigned int evntsel;
262 nmi_perfctr_msr = MSR_P6_PERFCTR0;
264 clear_msr_range(MSR_P6_EVNTSEL0, 2);
265 clear_msr_range(MSR_P6_PERFCTR0, 2);
267 evntsel = P6_EVNTSEL_INT
268 | P6_EVNTSEL_OS
269 | P6_EVNTSEL_USR
270 | counter;
272 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
273 write_watchdog_counter("P6_PERFCTR0");
274 apic_write(APIC_LVTPC, APIC_DM_NMI);
275 evntsel |= P6_EVNTSEL0_ENABLE;
276 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
277 }
279 static int __pminit setup_p4_watchdog(void)
280 {
281 unsigned int misc_enable, dummy;
283 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
284 if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL))
285 return 0;
287 nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0;
288 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
289 if ( boot_cpu_data.x86_num_siblings == 2 )
290 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
292 if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
293 clear_msr_range(0x3F1, 2);
294 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
295 docs doesn't fully define it, so leave it alone for now. */
296 if (boot_cpu_data.x86_model >= 0x3) {
297 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
298 clear_msr_range(0x3A0, 26);
299 clear_msr_range(0x3BC, 3);
300 } else {
301 clear_msr_range(0x3A0, 31);
302 }
303 clear_msr_range(0x3C0, 6);
304 clear_msr_range(0x3C8, 6);
305 clear_msr_range(0x3E0, 2);
306 clear_msr_range(MSR_P4_BPU_CCCR0, 18);
307 clear_msr_range(MSR_P4_BPU_PERFCTR0, 18);
309 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
310 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
311 write_watchdog_counter("P4_IQ_COUNTER0");
312 apic_write(APIC_LVTPC, APIC_DM_NMI);
313 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
314 return 1;
315 }
317 void __pminit setup_apic_nmi_watchdog(void)
318 {
319 if (!nmi_watchdog)
320 return;
322 switch (boot_cpu_data.x86_vendor) {
323 case X86_VENDOR_AMD:
324 switch (boot_cpu_data.x86) {
325 case 6:
326 case 15 ... 17:
327 setup_k7_watchdog();
328 break;
329 default:
330 return;
331 }
332 break;
333 case X86_VENDOR_INTEL:
334 switch (boot_cpu_data.x86) {
335 case 6:
336 setup_p6_watchdog((boot_cpu_data.x86_model < 14)
337 ? P6_EVENT_CPU_CLOCKS_NOT_HALTED
338 : CORE_EVENT_CPU_CLOCKS_NOT_HALTED);
339 break;
340 case 15:
341 if (!setup_p4_watchdog())
342 return;
343 break;
344 default:
345 return;
346 }
347 break;
348 default:
349 return;
350 }
352 lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
353 nmi_active = 1;
354 }
356 static DEFINE_PER_CPU(unsigned int, last_irq_sums);
357 static DEFINE_PER_CPU(unsigned int, alert_counter);
359 static atomic_t watchdog_disable_count = ATOMIC_INIT(1);
361 void watchdog_disable(void)
362 {
363 atomic_inc(&watchdog_disable_count);
364 }
366 void watchdog_enable(void)
367 {
368 static unsigned long heartbeat_initialised;
369 unsigned int cpu;
371 if ( !atomic_dec_and_test(&watchdog_disable_count) ||
372 test_and_set_bit(0, &heartbeat_initialised) )
373 return;
375 /*
376 * Activate periodic heartbeats. We cannot do this earlier during
377 * setup because the timer infrastructure is not available.
378 */
379 for_each_online_cpu ( cpu )
380 {
381 init_timer(&per_cpu(nmi_timer, cpu), nmi_timer_fn, NULL, cpu);
382 set_timer(&per_cpu(nmi_timer, cpu), NOW());
383 }
384 }
386 void nmi_watchdog_tick(struct cpu_user_regs * regs)
387 {
388 unsigned int sum = this_cpu(nmi_timer_ticks);
390 if ( (this_cpu(last_irq_sums) == sum) &&
391 !atomic_read(&watchdog_disable_count) )
392 {
393 /*
394 * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
395 * before doing the oops ...
396 */
397 this_cpu(alert_counter)++;
398 if ( this_cpu(alert_counter) == 5*nmi_hz )
399 {
400 console_force_unlock();
401 printk("Watchdog timer detects that CPU%d is stuck!\n",
402 smp_processor_id());
403 fatal_trap(TRAP_nmi, regs);
404 }
405 }
406 else
407 {
408 this_cpu(last_irq_sums) = sum;
409 this_cpu(alert_counter) = 0;
410 }
412 if ( nmi_perfctr_msr )
413 {
414 if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
415 {
416 /*
417 * P4 quirks:
418 * - An overflown perfctr will assert its interrupt
419 * until the OVF flag in its CCCR is cleared.
420 * - LVTPC is masked on interrupt and must be
421 * unmasked by the LVTPC handler.
422 */
423 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
424 apic_write(APIC_LVTPC, APIC_DM_NMI);
425 }
426 else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
427 {
428 /*
429 * Only P6 based Pentium M need to re-unmask the apic vector but
430 * it doesn't hurt other P6 variants.
431 */
432 apic_write(APIC_LVTPC, APIC_DM_NMI);
433 }
434 write_watchdog_counter(NULL);
435 }
436 }
438 /*
439 * For some reason the destination shorthand for self is not valid
440 * when used with the NMI delivery mode. This is documented in Tables
441 * 8-3 and 8-4 in IA32 Reference Manual Volume 3. We send the IPI to
442 * our own APIC ID explicitly which is valid.
443 */
444 static void do_nmi_trigger(unsigned char key)
445 {
446 u32 id = get_apic_id();
448 printk("Triggering NMI on APIC ID %x\n", id);
450 local_irq_disable();
451 apic_wait_icr_idle();
452 apic_icr_write(APIC_DM_NMI | APIC_DEST_PHYSICAL, id);
453 local_irq_enable();
454 }
456 static void do_nmi_stats(unsigned char key)
457 {
458 int i;
459 struct domain *d;
460 struct vcpu *v;
462 printk("CPU\tNMI\n");
463 for_each_cpu ( i )
464 printk("%3d\t%3d\n", i, nmi_count(i));
466 if ( ((d = dom0) == NULL) || (d->vcpu == NULL) ||
467 ((v = d->vcpu[0]) == NULL) )
468 return;
470 if ( v->nmi_pending || (v->trap_priority >= VCPU_TRAP_NMI) )
471 printk("dom0 vpu0: NMI %s%s\n",
472 v->nmi_pending ? "pending " : "",
473 (v->trap_priority >= VCPU_TRAP_NMI) ? "masked " : "");
474 else
475 printk("dom0 vcpu0: NMI neither pending nor masked\n");
476 }
478 static __init int register_nmi_trigger(void)
479 {
480 register_keyhandler('n', do_nmi_trigger, "trigger an NMI");
481 register_keyhandler('N', do_nmi_stats, "NMI statistics");
482 return 0;
483 }
484 __initcall(register_nmi_trigger);