debuggers.hg

view xen/arch/x86/nmi.c @ 3674:fb875591fd72

bitkeeper revision 1.1159.223.63 (42028527-fv-d9BM0_LRp8UKGP19gQ)

Fix NMI deferral.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 20:10:15 2005 +0000 (2005-02-03)
parents 7f2bf9fecd7e
children d1e0d9a8fde0
line source
1 /*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Keir Fraser : Pentium 4 Hyperthreading support
13 */
15 #include <xen/config.h>
16 #include <xen/init.h>
17 #include <xen/lib.h>
18 #include <xen/mm.h>
19 #include <xen/irq.h>
20 #include <xen/delay.h>
21 #include <xen/time.h>
22 #include <xen/sched.h>
23 #include <xen/console.h>
24 #include <asm/mc146818rtc.h>
25 #include <asm/smp.h>
26 #include <asm/msr.h>
27 #include <asm/mpspec.h>
28 #include <asm/debugger.h>
30 unsigned int nmi_watchdog = NMI_NONE;
31 unsigned int watchdog_on = 0;
32 static unsigned int nmi_hz = HZ;
33 unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
35 extern int logical_proc_id[];
37 #define K7_EVNTSEL_ENABLE (1 << 22)
38 #define K7_EVNTSEL_INT (1 << 20)
39 #define K7_EVNTSEL_OS (1 << 17)
40 #define K7_EVNTSEL_USR (1 << 16)
41 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
42 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
44 #define P6_EVNTSEL0_ENABLE (1 << 22)
45 #define P6_EVNTSEL_INT (1 << 20)
46 #define P6_EVNTSEL_OS (1 << 17)
47 #define P6_EVNTSEL_USR (1 << 16)
48 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
49 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
51 #define MSR_P4_MISC_ENABLE 0x1A0
52 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
53 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
54 #define MSR_P4_PERFCTR0 0x300
55 #define MSR_P4_CCCR0 0x360
56 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
57 #define P4_ESCR_OS0 (1<<3)
58 #define P4_ESCR_USR0 (1<<2)
59 #define P4_ESCR_OS1 (1<<1)
60 #define P4_ESCR_USR1 (1<<0)
61 #define P4_CCCR_OVF_PMI0 (1<<26)
62 #define P4_CCCR_OVF_PMI1 (1<<27)
63 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
64 #define P4_CCCR_COMPLEMENT (1<<19)
65 #define P4_CCCR_COMPARE (1<<18)
66 #define P4_CCCR_REQUIRED (3<<16)
67 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
68 #define P4_CCCR_ENABLE (1<<12)
69 /*
70 * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter
71 * CRU_ESCR0 (with any non-null event selector) through a complemented
72 * max threshold. [IA32-Vol3, Section 14.9.9]
73 */
74 #define MSR_P4_IQ_COUNTER0 0x30C
75 #define MSR_P4_IQ_COUNTER1 0x30D
76 #define MSR_P4_IQ_CCCR0 0x36C
77 #define MSR_P4_IQ_CCCR1 0x36D
78 #define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
79 #define P4_NMI_CRU_ESCR0 \
80 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
81 P4_ESCR_OS1|P4_ESCR_USR1)
82 #define P4_NMI_IQ_CCCR0 \
83 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
84 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
85 #define P4_NMI_IQ_CCCR1 \
86 (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
87 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
89 int __init check_nmi_watchdog (void)
90 {
91 unsigned int prev_nmi_count[NR_CPUS];
92 int j, cpu;
94 if ( !nmi_watchdog )
95 return 0;
97 printk("Testing NMI watchdog --- ");
99 for ( j = 0; j < smp_num_cpus; j++ )
100 {
101 cpu = cpu_logical_map(j);
102 prev_nmi_count[cpu] = nmi_count(cpu);
103 }
104 __sti();
105 mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
107 for ( j = 0; j < smp_num_cpus; j++ )
108 {
109 cpu = cpu_logical_map(j);
110 if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
111 printk("CPU#%d stuck. ", cpu);
112 else
113 printk("CPU#%d okay. ", cpu);
114 }
116 printk("\n");
118 /* now that we know it works we can reduce NMI frequency to
119 something more reasonable; makes a difference in some configs */
120 if ( nmi_watchdog == NMI_LOCAL_APIC )
121 nmi_hz = 1;
123 return 0;
124 }
126 static inline void nmi_pm_init(void) { }
127 #define __pminit __init
129 /*
130 * Activate the NMI watchdog via the local APIC.
131 * Original code written by Keith Owens.
132 */
134 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
135 {
136 unsigned int i;
137 for ( i = 0; i < n; i++ )
138 wrmsr(base+i, 0, 0);
139 }
141 static void __pminit setup_k7_watchdog(void)
142 {
143 unsigned int evntsel;
145 nmi_perfctr_msr = MSR_K7_PERFCTR0;
147 clear_msr_range(MSR_K7_EVNTSEL0, 4);
148 clear_msr_range(MSR_K7_PERFCTR0, 4);
150 evntsel = K7_EVNTSEL_INT
151 | K7_EVNTSEL_OS
152 | K7_EVNTSEL_USR
153 | K7_NMI_EVENT;
155 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
156 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
157 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
158 apic_write(APIC_LVTPC, APIC_DM_NMI);
159 evntsel |= K7_EVNTSEL_ENABLE;
160 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
161 }
163 static void __pminit setup_p6_watchdog(void)
164 {
165 unsigned int evntsel;
167 nmi_perfctr_msr = MSR_P6_PERFCTR0;
169 clear_msr_range(MSR_P6_EVNTSEL0, 2);
170 clear_msr_range(MSR_P6_PERFCTR0, 2);
172 evntsel = P6_EVNTSEL_INT
173 | P6_EVNTSEL_OS
174 | P6_EVNTSEL_USR
175 | P6_NMI_EVENT;
177 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
178 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
179 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
180 apic_write(APIC_LVTPC, APIC_DM_NMI);
181 evntsel |= P6_EVNTSEL0_ENABLE;
182 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
183 }
185 static int __pminit setup_p4_watchdog(void)
186 {
187 unsigned int misc_enable, dummy;
189 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
190 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
191 return 0;
193 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
195 if ( logical_proc_id[smp_processor_id()] == 0 )
196 {
197 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
198 clear_msr_range(0x3F1, 2);
199 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
200 docs doesn't fully define it, so leave it alone for now. */
201 clear_msr_range(0x3A0, 31);
202 clear_msr_range(0x3C0, 6);
203 clear_msr_range(0x3C8, 6);
204 clear_msr_range(0x3E0, 2);
205 clear_msr_range(MSR_P4_CCCR0, 18);
206 clear_msr_range(MSR_P4_PERFCTR0, 18);
208 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
209 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
210 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
211 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
212 apic_write(APIC_LVTPC, APIC_DM_NMI);
213 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
214 }
215 else if ( logical_proc_id[smp_processor_id()] == 1 )
216 {
217 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0);
218 Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
219 wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
220 apic_write(APIC_LVTPC, APIC_DM_NMI);
221 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
222 }
223 else
224 {
225 return 0;
226 }
228 return 1;
229 }
231 void __pminit setup_apic_nmi_watchdog(void)
232 {
233 if (!nmi_watchdog)
234 return;
236 switch (boot_cpu_data.x86_vendor) {
237 case X86_VENDOR_AMD:
238 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
239 return;
240 setup_k7_watchdog();
241 break;
242 case X86_VENDOR_INTEL:
243 switch (boot_cpu_data.x86) {
244 case 6:
245 setup_p6_watchdog();
246 break;
247 case 15:
248 if (!setup_p4_watchdog())
249 return;
250 break;
251 default:
252 return;
253 }
254 break;
255 default:
256 return;
257 }
258 nmi_pm_init();
259 }
262 static unsigned int
263 last_irq_sums [NR_CPUS],
264 alert_counter [NR_CPUS];
266 void touch_nmi_watchdog (void)
267 {
268 int i;
269 for (i = 0; i < smp_num_cpus; i++)
270 alert_counter[i] = 0;
271 }
273 void nmi_watchdog_tick (struct xen_regs * regs)
274 {
275 int sum, cpu = smp_processor_id();
277 sum = apic_timer_irqs[cpu];
279 if ( (last_irq_sums[cpu] == sum) && watchdog_on )
280 {
281 /*
282 * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
283 * before doing the oops ...
284 */
285 alert_counter[cpu]++;
286 if ( alert_counter[cpu] == 5*nmi_hz )
287 {
288 console_force_unlock();
289 printk("Watchdog timer detects that CPU%d is stuck!\n", cpu);
290 fatal_trap(TRAP_nmi, regs);
291 }
292 }
293 else
294 {
295 last_irq_sums[cpu] = sum;
296 alert_counter[cpu] = 0;
297 }
299 if ( nmi_perfctr_msr )
300 {
301 if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 )
302 {
303 if ( logical_proc_id[cpu] == 0 )
304 {
305 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
306 apic_write(APIC_LVTPC, APIC_DM_NMI);
307 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
308 }
309 else
310 {
311 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
312 apic_write(APIC_LVTPC, APIC_DM_NMI);
313 wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
314 }
315 }
316 else
317 {
318 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
319 }
320 }
321 }