debuggers.hg

view xen/arch/x86/nmi.c @ 3545:6b76ae4b9ea7

bitkeeper revision 1.1159.212.30 (41f5268cMEdHLMEMs4o0SWqVEHZvuw)

Minor cleanup. Removed some duplicate MSRs, fixed some MSR names,
added a few new MSRs and MSR bit fields.
Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Mon Jan 24 16:47:08 2005 +0000 (2005-01-24)
parents d1e0d9a8fde0
children d8ba911dce48
line source
1 /*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Keir Fraser : Pentium 4 Hyperthreading support
13 */
15 #include <xen/config.h>
16 #include <xen/init.h>
17 #include <xen/lib.h>
18 #include <xen/mm.h>
19 #include <xen/irq.h>
20 #include <xen/delay.h>
21 #include <xen/time.h>
22 #include <xen/sched.h>
23 #include <xen/console.h>
24 #include <asm/mc146818rtc.h>
25 #include <asm/smp.h>
26 #include <asm/msr.h>
27 #include <asm/mpspec.h>
28 #include <asm/debugger.h>
30 unsigned int nmi_watchdog = NMI_NONE;
31 unsigned int watchdog_on = 0;
32 static unsigned int nmi_hz = HZ;
33 unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
35 extern int logical_proc_id[];
37 #define K7_EVNTSEL_ENABLE (1 << 22)
38 #define K7_EVNTSEL_INT (1 << 20)
39 #define K7_EVNTSEL_OS (1 << 17)
40 #define K7_EVNTSEL_USR (1 << 16)
41 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
42 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
44 #define P6_EVNTSEL0_ENABLE (1 << 22)
45 #define P6_EVNTSEL_INT (1 << 20)
46 #define P6_EVNTSEL_OS (1 << 17)
47 #define P6_EVNTSEL_USR (1 << 16)
48 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
49 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
51 #define MSR_P4_PERFCTR0 0x300
52 #define MSR_P4_CCCR0 0x360
53 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
54 #define P4_ESCR_OS0 (1<<3)
55 #define P4_ESCR_USR0 (1<<2)
56 #define P4_ESCR_OS1 (1<<1)
57 #define P4_ESCR_USR1 (1<<0)
58 #define P4_CCCR_OVF_PMI0 (1<<26)
59 #define P4_CCCR_OVF_PMI1 (1<<27)
60 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
61 #define P4_CCCR_COMPLEMENT (1<<19)
62 #define P4_CCCR_COMPARE (1<<18)
63 #define P4_CCCR_REQUIRED (3<<16)
64 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
65 #define P4_CCCR_ENABLE (1<<12)
66 /*
67 * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter
68 * CRU_ESCR0 (with any non-null event selector) through a complemented
69 * max threshold. [IA32-Vol3, Section 14.9.9]
70 */
71 #define MSR_P4_IQ_COUNTER0 0x30C
72 #define MSR_P4_IQ_COUNTER1 0x30D
73 #define MSR_P4_IQ_CCCR0 0x36C
74 #define MSR_P4_IQ_CCCR1 0x36D
75 #define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
76 #define P4_NMI_CRU_ESCR0 \
77 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
78 P4_ESCR_OS1|P4_ESCR_USR1)
79 #define P4_NMI_IQ_CCCR0 \
80 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
81 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
82 #define P4_NMI_IQ_CCCR1 \
83 (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
84 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
86 int __init check_nmi_watchdog (void)
87 {
88 unsigned int prev_nmi_count[NR_CPUS];
89 int j, cpu;
91 if ( !nmi_watchdog )
92 return 0;
94 printk("Testing NMI watchdog --- ");
96 for ( j = 0; j < smp_num_cpus; j++ )
97 {
98 cpu = cpu_logical_map(j);
99 prev_nmi_count[cpu] = nmi_count(cpu);
100 }
101 __sti();
102 mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
104 for ( j = 0; j < smp_num_cpus; j++ )
105 {
106 cpu = cpu_logical_map(j);
107 if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
108 printk("CPU#%d stuck. ", cpu);
109 else
110 printk("CPU#%d okay. ", cpu);
111 }
113 printk("\n");
115 /* now that we know it works we can reduce NMI frequency to
116 something more reasonable; makes a difference in some configs */
117 if ( nmi_watchdog == NMI_LOCAL_APIC )
118 nmi_hz = 1;
120 return 0;
121 }
123 static inline void nmi_pm_init(void) { }
124 #define __pminit __init
126 /*
127 * Activate the NMI watchdog via the local APIC.
128 * Original code written by Keith Owens.
129 */
131 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
132 {
133 unsigned int i;
134 for ( i = 0; i < n; i++ )
135 wrmsr(base+i, 0, 0);
136 }
138 static void __pminit setup_k7_watchdog(void)
139 {
140 unsigned int evntsel;
142 nmi_perfctr_msr = MSR_K7_PERFCTR0;
144 clear_msr_range(MSR_K7_EVNTSEL0, 4);
145 clear_msr_range(MSR_K7_PERFCTR0, 4);
147 evntsel = K7_EVNTSEL_INT
148 | K7_EVNTSEL_OS
149 | K7_EVNTSEL_USR
150 | K7_NMI_EVENT;
152 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
153 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
154 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
155 apic_write(APIC_LVTPC, APIC_DM_NMI);
156 evntsel |= K7_EVNTSEL_ENABLE;
157 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
158 }
160 static void __pminit setup_p6_watchdog(void)
161 {
162 unsigned int evntsel;
164 nmi_perfctr_msr = MSR_P6_PERFCTR0;
166 clear_msr_range(MSR_P6_EVNTSEL0, 2);
167 clear_msr_range(MSR_P6_PERFCTR0, 2);
169 evntsel = P6_EVNTSEL_INT
170 | P6_EVNTSEL_OS
171 | P6_EVNTSEL_USR
172 | P6_NMI_EVENT;
174 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
175 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
176 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
177 apic_write(APIC_LVTPC, APIC_DM_NMI);
178 evntsel |= P6_EVNTSEL0_ENABLE;
179 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
180 }
182 static int __pminit setup_p4_watchdog(void)
183 {
184 unsigned int misc_enable, dummy;
186 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
187 if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL))
188 return 0;
190 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
192 if ( logical_proc_id[smp_processor_id()] == 0 )
193 {
194 if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
195 clear_msr_range(0x3F1, 2);
196 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
197 docs doesn't fully define it, so leave it alone for now. */
198 clear_msr_range(0x3A0, 31);
199 clear_msr_range(0x3C0, 6);
200 clear_msr_range(0x3C8, 6);
201 clear_msr_range(0x3E0, 2);
202 clear_msr_range(MSR_P4_CCCR0, 18);
203 clear_msr_range(MSR_P4_PERFCTR0, 18);
205 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
206 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
207 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
208 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
209 apic_write(APIC_LVTPC, APIC_DM_NMI);
210 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
211 }
212 else if ( logical_proc_id[smp_processor_id()] == 1 )
213 {
214 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0);
215 Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
216 wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
217 apic_write(APIC_LVTPC, APIC_DM_NMI);
218 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
219 }
220 else
221 {
222 return 0;
223 }
225 return 1;
226 }
228 void __pminit setup_apic_nmi_watchdog(void)
229 {
230 if (!nmi_watchdog)
231 return;
233 switch (boot_cpu_data.x86_vendor) {
234 case X86_VENDOR_AMD:
235 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
236 return;
237 setup_k7_watchdog();
238 break;
239 case X86_VENDOR_INTEL:
240 switch (boot_cpu_data.x86) {
241 case 6:
242 setup_p6_watchdog();
243 break;
244 case 15:
245 if (!setup_p4_watchdog())
246 return;
247 break;
248 default:
249 return;
250 }
251 break;
252 default:
253 return;
254 }
255 nmi_pm_init();
256 }
259 static unsigned int
260 last_irq_sums [NR_CPUS],
261 alert_counter [NR_CPUS];
263 void touch_nmi_watchdog (void)
264 {
265 int i;
266 for (i = 0; i < smp_num_cpus; i++)
267 alert_counter[i] = 0;
268 }
270 void nmi_watchdog_tick (struct xen_regs * regs)
271 {
272 int sum, cpu = smp_processor_id();
274 sum = apic_timer_irqs[cpu];
276 if ( (last_irq_sums[cpu] == sum) && watchdog_on )
277 {
278 /*
279 * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
280 * before doing the oops ...
281 */
282 alert_counter[cpu]++;
283 if ( alert_counter[cpu] == 5*nmi_hz )
284 {
285 console_force_unlock();
286 printk("Watchdog timer detects that CPU%d is stuck!\n", cpu);
287 fatal_trap(TRAP_nmi, regs);
288 }
289 }
290 else
291 {
292 last_irq_sums[cpu] = sum;
293 alert_counter[cpu] = 0;
294 }
296 if ( nmi_perfctr_msr )
297 {
298 if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 )
299 {
300 if ( logical_proc_id[cpu] == 0 )
301 {
302 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
303 apic_write(APIC_LVTPC, APIC_DM_NMI);
304 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
305 }
306 else
307 {
308 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
309 apic_write(APIC_LVTPC, APIC_DM_NMI);
310 wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
311 }
312 }
313 else
314 {
315 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
316 }
317 }
318 }