/root/src/xen/xen/arch/x86/oprofile/op_model_p4.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * @file op_model_p4.c |
3 | | * P4 model-specific MSR operations |
4 | | * |
5 | | * @remark Copyright 2002 OProfile authors |
6 | | * @remark Read the file COPYING |
7 | | * |
8 | | * @author Graydon Hoare |
9 | | */ |
10 | | |
11 | | #include <xen/types.h> |
12 | | #include <asm/msr.h> |
13 | | #include <asm/io.h> |
14 | | #include <asm/apic.h> |
15 | | #include <asm/processor.h> |
16 | | #include <xen/xenoprof.h> |
17 | | #include <asm/regs.h> |
18 | | #include <asm/current.h> |
19 | | |
20 | | #include "op_x86_model.h" |
21 | | #include "op_counter.h" |
22 | | |
23 | 0 | #define NUM_EVENTS 39 |
24 | | |
25 | 0 | #define NUM_COUNTERS_NON_HT 8 |
26 | | #define NUM_ESCRS_NON_HT 45 |
27 | 0 | #define NUM_CCCRS_NON_HT 18 |
28 | | #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) |
29 | | |
30 | 0 | #define NUM_COUNTERS_HT2 4 |
31 | | #define NUM_ESCRS_HT2 23 |
32 | | #define NUM_CCCRS_HT2 9 |
33 | | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) |
34 | | |
35 | | static unsigned int num_counters = NUM_COUNTERS_NON_HT; |
36 | | |
37 | | |
38 | | /* this has to be checked dynamically since the |
39 | | hyper-threadedness of a chip is discovered at |
40 | | kernel boot-time. */ |
41 | | static inline void setup_num_counters(void) |
42 | 0 | { |
43 | 0 | if (boot_cpu_data.x86_num_siblings == 2) /* XXX */ |
44 | 0 | num_counters = NUM_COUNTERS_HT2; |
45 | 0 | } |
46 | | |
47 | | static int inline addr_increment(void) |
48 | 0 | { |
49 | 0 | return boot_cpu_data.x86_num_siblings == 2 ? 2 : 1; |
50 | 0 | } |
51 | | |
52 | | |
53 | | /* tables to simulate simplified hardware view of p4 registers */ |
54 | | struct p4_counter_binding { |
55 | | int virt_counter; |
56 | | int counter_address; |
57 | | int cccr_address; |
58 | | }; |
59 | | |
60 | | struct p4_event_binding { |
61 | | int escr_select; /* value to put in CCCR */ |
62 | | int event_select; /* value to put in ESCR */ |
63 | | struct { |
64 | | int virt_counter; /* for this counter... */ |
65 | | int escr_address; /* use this ESCR */ |
66 | | } bindings[2]; |
67 | | }; |
68 | | |
69 | | /* nb: these CTR_* defines are a duplicate of defines in |
70 | | event/i386.p4*events. */ |
71 | | |
72 | | |
73 | | #define CTR_BPU_0 (1 << 0) |
74 | | #define CTR_MS_0 (1 << 1) |
75 | | #define CTR_FLAME_0 (1 << 2) |
76 | | #define CTR_IQ_4 (1 << 3) |
77 | | #define CTR_BPU_2 (1 << 4) |
78 | | #define CTR_MS_2 (1 << 5) |
79 | | #define CTR_FLAME_2 (1 << 6) |
80 | | #define CTR_IQ_5 (1 << 7) |
81 | | |
82 | | static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { |
83 | | { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, |
84 | | { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, |
85 | | { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, |
86 | | { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, |
87 | | { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, |
88 | | { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, |
89 | | { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, |
90 | | { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } |
91 | | }; |
92 | | |
93 | 0 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT |
94 | | |
95 | | /* All cccr we don't use. */ |
96 | | static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { |
97 | | MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, |
98 | | MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, |
99 | | MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, |
100 | | MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, |
101 | | MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 |
102 | | }; |
103 | | |
104 | | /* p4 event codes in libop/op_event.h are indices into this table. */ |
105 | | |
106 | | static const struct p4_event_binding p4_events[NUM_EVENTS] = { |
107 | | |
108 | | { /* BRANCH_RETIRED */ |
109 | | 0x05, 0x06, |
110 | | { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, |
111 | | {CTR_IQ_5, MSR_P4_CRU_ESCR3} } |
112 | | }, |
113 | | |
114 | | { /* MISPRED_BRANCH_RETIRED */ |
115 | | 0x04, 0x03, |
116 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, |
117 | | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } |
118 | | }, |
119 | | |
120 | | { /* TC_DELIVER_MODE */ |
121 | | 0x01, 0x01, |
122 | | { { CTR_MS_0, MSR_P4_TC_ESCR0}, |
123 | | { CTR_MS_2, MSR_P4_TC_ESCR1} } |
124 | | }, |
125 | | |
126 | | { /* BPU_FETCH_REQUEST */ |
127 | | 0x00, 0x03, |
128 | | { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, |
129 | | { CTR_BPU_2, MSR_P4_BPU_ESCR1} } |
130 | | }, |
131 | | |
132 | | { /* ITLB_REFERENCE */ |
133 | | 0x03, 0x18, |
134 | | { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, |
135 | | { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } |
136 | | }, |
137 | | |
138 | | { /* MEMORY_CANCEL */ |
139 | | 0x05, 0x02, |
140 | | { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, |
141 | | { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } |
142 | | }, |
143 | | |
144 | | { /* MEMORY_COMPLETE */ |
145 | | 0x02, 0x08, |
146 | | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, |
147 | | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } |
148 | | }, |
149 | | |
150 | | { /* LOAD_PORT_REPLAY */ |
151 | | 0x02, 0x04, |
152 | | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, |
153 | | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } |
154 | | }, |
155 | | |
156 | | { /* STORE_PORT_REPLAY */ |
157 | | 0x02, 0x05, |
158 | | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, |
159 | | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } |
160 | | }, |
161 | | |
162 | | { /* MOB_LOAD_REPLAY */ |
163 | | 0x02, 0x03, |
164 | | { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, |
165 | | { CTR_BPU_2, MSR_P4_MOB_ESCR1} } |
166 | | }, |
167 | | |
168 | | { /* PAGE_WALK_TYPE */ |
169 | | 0x04, 0x01, |
170 | | { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, |
171 | | { CTR_BPU_2, MSR_P4_PMH_ESCR1} } |
172 | | }, |
173 | | |
174 | | { /* BSQ_CACHE_REFERENCE */ |
175 | | 0x07, 0x0c, |
176 | | { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, |
177 | | { CTR_BPU_2, MSR_P4_BSU_ESCR1} } |
178 | | }, |
179 | | |
180 | | { /* IOQ_ALLOCATION */ |
181 | | 0x06, 0x03, |
182 | | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, |
183 | | { 0, 0 } } |
184 | | }, |
185 | | |
186 | | { /* IOQ_ACTIVE_ENTRIES */ |
187 | | 0x06, 0x1a, |
188 | | { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, |
189 | | { 0, 0 } } |
190 | | }, |
191 | | |
192 | | { /* FSB_DATA_ACTIVITY */ |
193 | | 0x06, 0x17, |
194 | | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, |
195 | | { CTR_BPU_2, MSR_P4_FSB_ESCR1} } |
196 | | }, |
197 | | |
198 | | { /* BSQ_ALLOCATION */ |
199 | | 0x07, 0x05, |
200 | | { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, |
201 | | { 0, 0 } } |
202 | | }, |
203 | | |
204 | | { /* BSQ_ACTIVE_ENTRIES */ |
205 | | 0x07, 0x06, |
206 | | { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, |
207 | | { 0, 0 } } |
208 | | }, |
209 | | |
210 | | { /* X87_ASSIST */ |
211 | | 0x05, 0x03, |
212 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, |
213 | | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } |
214 | | }, |
215 | | |
216 | | { /* SSE_INPUT_ASSIST */ |
217 | | 0x01, 0x34, |
218 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
219 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
220 | | }, |
221 | | |
222 | | { /* PACKED_SP_UOP */ |
223 | | 0x01, 0x08, |
224 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
225 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
226 | | }, |
227 | | |
228 | | { /* PACKED_DP_UOP */ |
229 | | 0x01, 0x0c, |
230 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
231 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
232 | | }, |
233 | | |
234 | | { /* SCALAR_SP_UOP */ |
235 | | 0x01, 0x0a, |
236 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
237 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
238 | | }, |
239 | | |
240 | | { /* SCALAR_DP_UOP */ |
241 | | 0x01, 0x0e, |
242 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
243 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
244 | | }, |
245 | | |
246 | | { /* 64BIT_MMX_UOP */ |
247 | | 0x01, 0x02, |
248 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
249 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
250 | | }, |
251 | | |
252 | | { /* 128BIT_MMX_UOP */ |
253 | | 0x01, 0x1a, |
254 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
255 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
256 | | }, |
257 | | |
258 | | { /* X87_FP_UOP */ |
259 | | 0x01, 0x04, |
260 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
261 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
262 | | }, |
263 | | |
264 | | { /* X87_SIMD_MOVES_UOP */ |
265 | | 0x01, 0x2e, |
266 | | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, |
267 | | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } |
268 | | }, |
269 | | |
270 | | { /* MACHINE_CLEAR */ |
271 | | 0x05, 0x02, |
272 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, |
273 | | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } |
274 | | }, |
275 | | |
276 | | { /* GLOBAL_POWER_EVENTS */ |
277 | | 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, |
278 | | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, |
279 | | { CTR_BPU_2, MSR_P4_FSB_ESCR1} } |
280 | | }, |
281 | | |
282 | | { /* TC_MS_XFER */ |
283 | | 0x00, 0x05, |
284 | | { { CTR_MS_0, MSR_P4_MS_ESCR0}, |
285 | | { CTR_MS_2, MSR_P4_MS_ESCR1} } |
286 | | }, |
287 | | |
288 | | { /* UOP_QUEUE_WRITES */ |
289 | | 0x00, 0x09, |
290 | | { { CTR_MS_0, MSR_P4_MS_ESCR0}, |
291 | | { CTR_MS_2, MSR_P4_MS_ESCR1} } |
292 | | }, |
293 | | |
294 | | { /* FRONT_END_EVENT */ |
295 | | 0x05, 0x08, |
296 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, |
297 | | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } |
298 | | }, |
299 | | |
300 | | { /* EXECUTION_EVENT */ |
301 | | 0x05, 0x0c, |
302 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, |
303 | | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } |
304 | | }, |
305 | | |
306 | | { /* REPLAY_EVENT */ |
307 | | 0x05, 0x09, |
308 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, |
309 | | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } |
310 | | }, |
311 | | |
312 | | { /* INSTR_RETIRED */ |
313 | | 0x04, 0x02, |
314 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, |
315 | | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } |
316 | | }, |
317 | | |
318 | | { /* UOPS_RETIRED */ |
319 | | 0x04, 0x01, |
320 | | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, |
321 | | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } |
322 | | }, |
323 | | |
324 | | { /* UOP_TYPE */ |
325 | | 0x02, 0x02, |
326 | | { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, |
327 | | { CTR_IQ_5, MSR_P4_RAT_ESCR1} } |
328 | | }, |
329 | | |
330 | | { /* RETIRED_MISPRED_BRANCH_TYPE */ |
331 | | 0x02, 0x05, |
332 | | { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, |
333 | | { CTR_MS_2, MSR_P4_TBPU_ESCR1} } |
334 | | }, |
335 | | |
336 | | { /* RETIRED_BRANCH_TYPE */ |
337 | | 0x02, 0x04, |
338 | | { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, |
339 | | { CTR_MS_2, MSR_P4_TBPU_ESCR1} } |
340 | | } |
341 | | }; |
342 | | |
343 | | |
344 | 0 | #define MISC_PMC_ENABLED_P(x) ((x) & 1ULL << 7) |
345 | | |
346 | 0 | #define ESCR_RESERVED_BITS 0x80000003ULL |
347 | 0 | #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) |
348 | 0 | #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1ULL) << 2)) |
349 | 0 | #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1ULL) << 3)) |
350 | 0 | #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1ULL))) |
351 | 0 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1ULL) << 1)) |
352 | 0 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3fULL) << 25)) |
353 | 0 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffffULL) << 9)) |
354 | 0 | #define ESCR_READ(escr,ev,i) do {rdmsrl(ev->bindings[(i)].escr_address, (escr));} while (0) |
355 | 0 | #define ESCR_WRITE(escr,ev,i) do {wrmsrl(ev->bindings[(i)].escr_address, (escr));} while (0) |
356 | | |
357 | 0 | #define CCCR_RESERVED_BITS 0x38030FFFULL |
358 | 0 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) |
359 | 0 | #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000ULL) |
360 | 0 | #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07ULL) << 13)) |
361 | 0 | #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1ULL<<26)) |
362 | 0 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1ULL<<27)) |
363 | 0 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1ULL<<12)) |
364 | 0 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1ULL<<12)) |
365 | 0 | #define CCCR_READ(msr_content, i) do {rdmsrl(p4_counters[(i)].cccr_address, (msr_content));} while (0) |
366 | 0 | #define CCCR_WRITE(msr_content, i) do {wrmsrl(p4_counters[(i)].cccr_address, (msr_content));} while (0) |
367 | 0 | #define CCCR_OVF_P(cccr) ((cccr) & (1ULL<<31)) |
368 | 0 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1ULL<<31))) |
369 | | |
370 | 0 | #define CTR_READ(msr_content,i) do {rdmsrl(p4_counters[(i)].counter_address, (msr_content));} while (0) |
371 | 0 | #define CTR_WRITE(msr_content,i) do {wrmsrl(p4_counters[(i)].counter_address, -(msr_content));} while (0) |
372 | 0 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000ULL)) |
373 | | |
374 | | |
375 | | /* this assigns a "stagger" to the current CPU, which is used throughout |
376 | | the code in this module as an extra array offset, to select the "even" |
377 | | or "odd" part of all the divided resources. */ |
378 | | static unsigned int get_stagger(void) |
379 | 0 | { |
380 | 0 | int cpu = smp_processor_id(); |
381 | 0 | return (cpu != cpumask_first(per_cpu(cpu_sibling_mask, cpu))); |
382 | 0 | } |
383 | | |
384 | | |
385 | | /* finally, mediate access to a real hardware counter |
386 | | by passing a "virtual" counter numer to this macro, |
387 | | along with your stagger setting. */ |
388 | 0 | #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) |
389 | | |
390 | | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; |
391 | | |
392 | | |
393 | | static void p4_fill_in_addresses(struct op_msrs * const msrs) |
394 | 0 | { |
395 | 0 | unsigned int i; |
396 | 0 | unsigned int addr, stag; |
397 | 0 |
|
398 | 0 | setup_num_counters(); |
399 | 0 | stag = get_stagger(); |
400 | 0 |
|
401 | 0 | /* the counter registers we pay attention to */ |
402 | 0 | for (i = 0; i < num_counters; ++i) { |
403 | 0 | msrs->counters[i].addr = |
404 | 0 | p4_counters[VIRT_CTR(stag, i)].counter_address; |
405 | 0 | } |
406 | 0 |
|
407 | 0 | /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ |
408 | 0 |
|
409 | 0 | /* 18 CCCR registers */ |
410 | 0 | for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; |
411 | 0 | addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { |
412 | 0 | msrs->controls[i].addr = addr; |
413 | 0 | } |
414 | 0 | |
415 | 0 | /* 43 ESCR registers in three or four discontiguous group */ |
416 | 0 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
417 | 0 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { |
418 | 0 | msrs->controls[i].addr = addr; |
419 | 0 | } |
420 | 0 |
|
421 | 0 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 |
422 | 0 | * to avoid special case in nmi_{save|restore}_registers() */ |
423 | 0 | if (boot_cpu_data.x86_model >= 0x3) { |
424 | 0 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
425 | 0 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { |
426 | 0 | msrs->controls[i].addr = addr; |
427 | 0 | } |
428 | 0 | } else { |
429 | 0 | for (addr = MSR_P4_IQ_ESCR0 + stag; |
430 | 0 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { |
431 | 0 | msrs->controls[i].addr = addr; |
432 | 0 | } |
433 | 0 | } |
434 | 0 |
|
435 | 0 | for (addr = MSR_P4_RAT_ESCR0 + stag; |
436 | 0 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { |
437 | 0 | msrs->controls[i].addr = addr; |
438 | 0 | } |
439 | 0 | |
440 | 0 | for (addr = MSR_P4_MS_ESCR0 + stag; |
441 | 0 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { |
442 | 0 | msrs->controls[i].addr = addr; |
443 | 0 | } |
444 | 0 | |
445 | 0 | for (addr = MSR_P4_IX_ESCR0 + stag; |
446 | 0 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { |
447 | 0 | msrs->controls[i].addr = addr; |
448 | 0 | } |
449 | 0 |
|
450 | 0 | /* there are 2 remaining non-contiguously located ESCRs */ |
451 | 0 |
|
452 | 0 | if (num_counters == NUM_COUNTERS_NON_HT) { |
453 | 0 | /* standard non-HT CPUs handle both remaining ESCRs*/ |
454 | 0 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
455 | 0 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; |
456 | 0 |
|
457 | 0 | } else if (stag == 0) { |
458 | 0 | /* HT CPUs give the first remainder to the even thread, as |
459 | 0 | the 32nd control register */ |
460 | 0 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; |
461 | 0 |
|
462 | 0 | } else { |
463 | 0 | /* and two copies of the second to the odd thread, |
464 | 0 | for the 22st and 23nd control registers */ |
465 | 0 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
466 | 0 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
467 | 0 | } |
468 | 0 | } |
469 | | |
470 | | |
471 | | static void pmc_setup_one_p4_counter(unsigned int ctr) |
472 | 0 | { |
473 | 0 | int i; |
474 | 0 | int const maxbind = 2; |
475 | 0 | uint64_t cccr = 0; |
476 | 0 | uint64_t escr = 0; |
477 | 0 | unsigned int counter_bit; |
478 | 0 | const struct p4_event_binding *ev = NULL; |
479 | 0 | unsigned int stag; |
480 | 0 |
|
481 | 0 | stag = get_stagger(); |
482 | 0 | |
483 | 0 | /* convert from counter *number* to counter *bit* */ |
484 | 0 | counter_bit = 1 << VIRT_CTR(stag, ctr); |
485 | 0 | |
486 | 0 | /* find our event binding structure. */ |
487 | 0 | if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { |
488 | 0 | printk(KERN_ERR "oprofile: P4 event code %#lx out of range\n", |
489 | 0 | counter_config[ctr].event); |
490 | 0 | return; |
491 | 0 | } |
492 | 0 | |
493 | 0 | ev = &(p4_events[counter_config[ctr].event - 1]); |
494 | 0 | |
495 | 0 | for (i = 0; i < maxbind; i++) { |
496 | 0 | if (ev->bindings[i].virt_counter & counter_bit) { |
497 | 0 |
|
498 | 0 | /* modify ESCR */ |
499 | 0 | ESCR_READ(escr, ev, i); |
500 | 0 | ESCR_CLEAR(escr); |
501 | 0 | if (stag == 0) { |
502 | 0 | ESCR_SET_USR_0(escr, counter_config[ctr].user); |
503 | 0 | ESCR_SET_OS_0(escr, counter_config[ctr].kernel); |
504 | 0 | } else { |
505 | 0 | ESCR_SET_USR_1(escr, counter_config[ctr].user); |
506 | 0 | ESCR_SET_OS_1(escr, counter_config[ctr].kernel); |
507 | 0 | } |
508 | 0 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); |
509 | 0 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); |
510 | 0 | ESCR_WRITE(escr, ev, i); |
511 | 0 | |
512 | 0 | /* modify CCCR */ |
513 | 0 | CCCR_READ(cccr, VIRT_CTR(stag, ctr)); |
514 | 0 | CCCR_CLEAR(cccr); |
515 | 0 | CCCR_SET_REQUIRED_BITS(cccr); |
516 | 0 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); |
517 | 0 | if (stag == 0) { |
518 | 0 | CCCR_SET_PMI_OVF_0(cccr); |
519 | 0 | } else { |
520 | 0 | CCCR_SET_PMI_OVF_1(cccr); |
521 | 0 | } |
522 | 0 | CCCR_WRITE(cccr, VIRT_CTR(stag, ctr)); |
523 | 0 | return; |
524 | 0 | } |
525 | 0 | } |
526 | 0 |
|
527 | 0 | printk(KERN_ERR |
528 | 0 | "oprofile: P4 event code %#lx no binding, stag %d ctr %d\n", |
529 | 0 | counter_config[ctr].event, stag, ctr); |
530 | 0 | } |
531 | | |
532 | | |
533 | | static void p4_setup_ctrs(struct op_msrs const * const msrs) |
534 | 0 | { |
535 | 0 | unsigned int i; |
536 | 0 | uint64_t msr_content; |
537 | 0 | unsigned int addr; |
538 | 0 | unsigned int stag; |
539 | 0 |
|
540 | 0 | stag = get_stagger(); |
541 | 0 |
|
542 | 0 | rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); |
543 | 0 | if (! MISC_PMC_ENABLED_P(msr_content)) { |
544 | 0 | printk(KERN_ERR "oprofile: P4 PMC not available\n"); |
545 | 0 | return; |
546 | 0 | } |
547 | 0 |
|
548 | 0 | /* clear the cccrs we will use */ |
549 | 0 | for (i = 0 ; i < num_counters ; i++) { |
550 | 0 | rdmsrl(p4_counters[VIRT_CTR(stag, i)].cccr_address, msr_content); |
551 | 0 | CCCR_CLEAR(msr_content); |
552 | 0 | CCCR_SET_REQUIRED_BITS(msr_content); |
553 | 0 | wrmsrl(p4_counters[VIRT_CTR(stag, i)].cccr_address, msr_content); |
554 | 0 | } |
555 | 0 |
|
556 | 0 | /* clear cccrs outside our concern */ |
557 | 0 | for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { |
558 | 0 | rdmsrl(p4_unused_cccr[i], msr_content); |
559 | 0 | CCCR_CLEAR(msr_content); |
560 | 0 | CCCR_SET_REQUIRED_BITS(msr_content); |
561 | 0 | wrmsrl(p4_unused_cccr[i], msr_content); |
562 | 0 | } |
563 | 0 |
|
564 | 0 | /* clear all escrs (including those outside our concern) */ |
565 | 0 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
566 | 0 | addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { |
567 | 0 | wrmsrl(addr, 0x0ULL); |
568 | 0 | } |
569 | 0 |
|
570 | 0 | /* On older models clear also MSR_P4_IQ_ESCR0/1 */ |
571 | 0 | if (boot_cpu_data.x86_model < 0x3) { |
572 | 0 | wrmsrl(MSR_P4_IQ_ESCR0, 0x0ULL); |
573 | 0 | wrmsrl(MSR_P4_IQ_ESCR1, 0x0ULL); |
574 | 0 | } |
575 | 0 |
|
576 | 0 | for (addr = MSR_P4_RAT_ESCR0 + stag; |
577 | 0 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { |
578 | 0 | wrmsrl(addr, 0x0ULL); |
579 | 0 | } |
580 | 0 | |
581 | 0 | for (addr = MSR_P4_MS_ESCR0 + stag; |
582 | 0 | addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ |
583 | 0 | wrmsrl(addr, 0x0ULL); |
584 | 0 | } |
585 | 0 | |
586 | 0 | for (addr = MSR_P4_IX_ESCR0 + stag; |
587 | 0 | addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ |
588 | 0 | wrmsrl(addr, 0x0ULL); |
589 | 0 | } |
590 | 0 |
|
591 | 0 | if (num_counters == NUM_COUNTERS_NON_HT) { |
592 | 0 | wrmsrl(MSR_P4_CRU_ESCR4, 0x0ULL); |
593 | 0 | wrmsrl(MSR_P4_CRU_ESCR5, 0x0ULL); |
594 | 0 | } else if (stag == 0) { |
595 | 0 | wrmsrl(MSR_P4_CRU_ESCR4, 0x0ULL); |
596 | 0 | } else { |
597 | 0 | wrmsrl(MSR_P4_CRU_ESCR5, 0x0ULL); |
598 | 0 | } |
599 | 0 | |
600 | 0 | /* setup all counters */ |
601 | 0 | for (i = 0 ; i < num_counters ; ++i) { |
602 | 0 | if (counter_config[i].enabled) { |
603 | 0 | reset_value[i] = counter_config[i].count; |
604 | 0 | pmc_setup_one_p4_counter(i); |
605 | 0 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); |
606 | 0 | } else { |
607 | 0 | reset_value[i] = 0; |
608 | 0 | } |
609 | 0 | } |
610 | 0 | } |
611 | | |
612 | | static int p4_check_ctrs(unsigned int const cpu, |
613 | | struct op_msrs const * const msrs, |
614 | | struct cpu_user_regs const * const regs) |
615 | 0 | { |
616 | 0 | unsigned long ctr, stag, real; |
617 | 0 | uint64_t msr_content; |
618 | 0 | int i; |
619 | 0 | int ovf = 0; |
620 | 0 | unsigned long eip = regs->rip; |
621 | 0 | int mode = xenoprofile_get_mode(current, regs); |
622 | 0 |
|
623 | 0 | stag = get_stagger(); |
624 | 0 |
|
625 | 0 | for (i = 0; i < num_counters; ++i) { |
626 | 0 | |
627 | 0 | if (!reset_value[i]) |
628 | 0 | continue; |
629 | 0 |
|
630 | 0 | /* |
631 | 0 | * there is some eccentricity in the hardware which |
632 | 0 | * requires that we perform 2 extra corrections: |
633 | 0 | * |
634 | 0 | * - check both the CCCR:OVF flag for overflow and the |
635 | 0 | * counter high bit for un-flagged overflows. |
636 | 0 | * |
637 | 0 | * - write the counter back twice to ensure it gets |
638 | 0 | * updated properly. |
639 | 0 | * |
640 | 0 | * the former seems to be related to extra NMIs happening |
641 | 0 | * during the current NMI; the latter is reported as errata |
642 | 0 | * N15 in intel doc 249199-029, pentium 4 specification |
643 | 0 | * update, though their suggested work-around does not |
644 | 0 | * appear to solve the problem. |
645 | 0 | */ |
646 | 0 | |
647 | 0 | real = VIRT_CTR(stag, i); |
648 | 0 |
|
649 | 0 | CCCR_READ(msr_content, real); |
650 | 0 | CTR_READ(ctr, real); |
651 | 0 | if (CCCR_OVF_P(msr_content) || CTR_OVERFLOW_P(ctr)) { |
652 | 0 | xenoprof_log_event(current, regs, eip, mode, i); |
653 | 0 | CTR_WRITE(reset_value[i], real); |
654 | 0 | CCCR_CLEAR_OVF(msr_content); |
655 | 0 | CCCR_WRITE(msr_content, real); |
656 | 0 | CTR_WRITE(reset_value[i], real); |
657 | 0 | ovf = 1; |
658 | 0 | } |
659 | 0 | } |
660 | 0 |
|
661 | 0 | /* P4 quirk: you have to re-unmask the apic vector */ |
662 | 0 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); |
663 | 0 |
|
664 | 0 | return ovf; |
665 | 0 | } |
666 | | |
667 | | |
668 | | static void p4_start(struct op_msrs const * const msrs) |
669 | 0 | { |
670 | 0 | unsigned int stag; |
671 | 0 | uint64_t msr_content; |
672 | 0 | int i; |
673 | 0 |
|
674 | 0 | stag = get_stagger(); |
675 | 0 |
|
676 | 0 | for (i = 0; i < num_counters; ++i) { |
677 | 0 | if (!reset_value[i]) |
678 | 0 | continue; |
679 | 0 | CCCR_READ(msr_content, VIRT_CTR(stag, i)); |
680 | 0 | CCCR_SET_ENABLE(msr_content); |
681 | 0 | CCCR_WRITE(msr_content, VIRT_CTR(stag, i)); |
682 | 0 | } |
683 | 0 | } |
684 | | |
685 | | |
686 | | static void p4_stop(struct op_msrs const * const msrs) |
687 | 0 | { |
688 | 0 | unsigned int stag; |
689 | 0 | uint64_t msr_content; |
690 | 0 | int i; |
691 | 0 |
|
692 | 0 | stag = get_stagger(); |
693 | 0 |
|
694 | 0 | for (i = 0; i < num_counters; ++i) { |
695 | 0 | CCCR_READ(msr_content, VIRT_CTR(stag, i)); |
696 | 0 | CCCR_SET_DISABLE(msr_content); |
697 | 0 | CCCR_WRITE(msr_content, VIRT_CTR(stag, i)); |
698 | 0 | } |
699 | 0 | } |
700 | | |
701 | | |
702 | | struct op_x86_model_spec const op_p4_ht2_spec = { |
703 | | .num_counters = NUM_COUNTERS_HT2, |
704 | | .num_controls = NUM_CONTROLS_HT2, |
705 | | .fill_in_addresses = &p4_fill_in_addresses, |
706 | | .setup_ctrs = &p4_setup_ctrs, |
707 | | .check_ctrs = &p4_check_ctrs, |
708 | | .start = &p4_start, |
709 | | .stop = &p4_stop |
710 | | }; |
711 | | |
712 | | |
713 | | struct op_x86_model_spec const op_p4_spec = { |
714 | | .num_counters = NUM_COUNTERS_NON_HT, |
715 | | .num_controls = NUM_CONTROLS_NON_HT, |
716 | | .fill_in_addresses = &p4_fill_in_addresses, |
717 | | .setup_ctrs = &p4_setup_ctrs, |
718 | | .check_ctrs = &p4_check_ctrs, |
719 | | .start = &p4_start, |
720 | | .stop = &p4_stop |
721 | | }; |