debuggers.hg

view xen/arch/x86/oprofile/op_model_p4.c @ 19965:2dbabefe62dc

Move cpu_{sibling,core}_map into per-CPU space

These cpu maps get read from all CPUs, so apart from addressing the
square(nr_cpus) growth of these objects, they also get moved into the
previously introduced read-mostly sub-section of the per-CPU section,
in order to not need to waste a full cacheline in order to align (and
properly pad) them, which would be undue overhead on systems with low
NR_CPUS.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 11:45:31 2009 +0100 (2009-07-13)
parents a37267e43365
children 30bfa1d8895d
line source
1 /**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
11 #include <xen/types.h>
12 #include <asm/msr.h>
13 #include <asm/io.h>
14 #include <asm/apic.h>
15 #include <asm/processor.h>
16 #include <xen/sched.h>
17 #include <asm/regs.h>
18 #include <asm/current.h>
20 #include "op_x86_model.h"
21 #include "op_counter.h"
23 #define NUM_EVENTS 39
25 #define NUM_COUNTERS_NON_HT 8
26 #define NUM_ESCRS_NON_HT 45
27 #define NUM_CCCRS_NON_HT 18
28 #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
30 #define NUM_COUNTERS_HT2 4
31 #define NUM_ESCRS_HT2 23
32 #define NUM_CCCRS_HT2 9
33 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
35 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
38 /* this has to be checked dynamically since the
39 hyper-threadedness of a chip is discovered at
40 kernel boot-time. */
41 static inline void setup_num_counters(void)
42 {
43 #ifdef CONFIG_SMP
44 if (boot_cpu_data.x86_num_siblings == 2) /* XXX */
45 num_counters = NUM_COUNTERS_HT2;
46 #endif
47 }
49 static int inline addr_increment(void)
50 {
51 #ifdef CONFIG_SMP
52 return boot_cpu_data.x86_num_siblings == 2 ? 2 : 1;
53 #else
54 return 1;
55 #endif
56 }
59 /* tables to simulate simplified hardware view of p4 registers */
60 struct p4_counter_binding {
61 int virt_counter;
62 int counter_address;
63 int cccr_address;
64 };
66 struct p4_event_binding {
67 int escr_select; /* value to put in CCCR */
68 int event_select; /* value to put in ESCR */
69 struct {
70 int virt_counter; /* for this counter... */
71 int escr_address; /* use this ESCR */
72 } bindings[2];
73 };
75 /* nb: these CTR_* defines are a duplicate of defines in
76 event/i386.p4*events. */
79 #define CTR_BPU_0 (1 << 0)
80 #define CTR_MS_0 (1 << 1)
81 #define CTR_FLAME_0 (1 << 2)
82 #define CTR_IQ_4 (1 << 3)
83 #define CTR_BPU_2 (1 << 4)
84 #define CTR_MS_2 (1 << 5)
85 #define CTR_FLAME_2 (1 << 6)
86 #define CTR_IQ_5 (1 << 7)
88 static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
89 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
90 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
91 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
92 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
93 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
94 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
95 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
96 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
97 };
99 #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
101 /* All cccr we don't use. */
102 static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
103 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
104 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
105 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
106 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
107 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
108 };
110 /* p4 event codes in libop/op_event.h are indices into this table. */
112 static struct p4_event_binding p4_events[NUM_EVENTS] = {
114 { /* BRANCH_RETIRED */
115 0x05, 0x06,
116 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
117 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
118 },
120 { /* MISPRED_BRANCH_RETIRED */
121 0x04, 0x03,
122 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
123 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
124 },
126 { /* TC_DELIVER_MODE */
127 0x01, 0x01,
128 { { CTR_MS_0, MSR_P4_TC_ESCR0},
129 { CTR_MS_2, MSR_P4_TC_ESCR1} }
130 },
132 { /* BPU_FETCH_REQUEST */
133 0x00, 0x03,
134 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
135 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
136 },
138 { /* ITLB_REFERENCE */
139 0x03, 0x18,
140 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
141 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
142 },
144 { /* MEMORY_CANCEL */
145 0x05, 0x02,
146 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
147 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
148 },
150 { /* MEMORY_COMPLETE */
151 0x02, 0x08,
152 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
153 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
154 },
156 { /* LOAD_PORT_REPLAY */
157 0x02, 0x04,
158 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
159 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
160 },
162 { /* STORE_PORT_REPLAY */
163 0x02, 0x05,
164 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
165 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
166 },
168 { /* MOB_LOAD_REPLAY */
169 0x02, 0x03,
170 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
171 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
172 },
174 { /* PAGE_WALK_TYPE */
175 0x04, 0x01,
176 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
177 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
178 },
180 { /* BSQ_CACHE_REFERENCE */
181 0x07, 0x0c,
182 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
183 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
184 },
186 { /* IOQ_ALLOCATION */
187 0x06, 0x03,
188 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
189 { 0, 0 } }
190 },
192 { /* IOQ_ACTIVE_ENTRIES */
193 0x06, 0x1a,
194 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
195 { 0, 0 } }
196 },
198 { /* FSB_DATA_ACTIVITY */
199 0x06, 0x17,
200 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
201 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
202 },
204 { /* BSQ_ALLOCATION */
205 0x07, 0x05,
206 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
207 { 0, 0 } }
208 },
210 { /* BSQ_ACTIVE_ENTRIES */
211 0x07, 0x06,
212 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
213 { 0, 0 } }
214 },
216 { /* X87_ASSIST */
217 0x05, 0x03,
218 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
219 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
220 },
222 { /* SSE_INPUT_ASSIST */
223 0x01, 0x34,
224 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
225 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
226 },
228 { /* PACKED_SP_UOP */
229 0x01, 0x08,
230 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
231 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
232 },
234 { /* PACKED_DP_UOP */
235 0x01, 0x0c,
236 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
237 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
238 },
240 { /* SCALAR_SP_UOP */
241 0x01, 0x0a,
242 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
243 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
244 },
246 { /* SCALAR_DP_UOP */
247 0x01, 0x0e,
248 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
249 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
250 },
252 { /* 64BIT_MMX_UOP */
253 0x01, 0x02,
254 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
255 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
256 },
258 { /* 128BIT_MMX_UOP */
259 0x01, 0x1a,
260 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
261 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
262 },
264 { /* X87_FP_UOP */
265 0x01, 0x04,
266 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
267 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
268 },
270 { /* X87_SIMD_MOVES_UOP */
271 0x01, 0x2e,
272 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
273 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
274 },
276 { /* MACHINE_CLEAR */
277 0x05, 0x02,
278 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
279 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
280 },
282 { /* GLOBAL_POWER_EVENTS */
283 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
284 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
285 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
286 },
288 { /* TC_MS_XFER */
289 0x00, 0x05,
290 { { CTR_MS_0, MSR_P4_MS_ESCR0},
291 { CTR_MS_2, MSR_P4_MS_ESCR1} }
292 },
294 { /* UOP_QUEUE_WRITES */
295 0x00, 0x09,
296 { { CTR_MS_0, MSR_P4_MS_ESCR0},
297 { CTR_MS_2, MSR_P4_MS_ESCR1} }
298 },
300 { /* FRONT_END_EVENT */
301 0x05, 0x08,
302 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
303 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
304 },
306 { /* EXECUTION_EVENT */
307 0x05, 0x0c,
308 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
309 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
310 },
312 { /* REPLAY_EVENT */
313 0x05, 0x09,
314 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
315 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
316 },
318 { /* INSTR_RETIRED */
319 0x04, 0x02,
320 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
321 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
322 },
324 { /* UOPS_RETIRED */
325 0x04, 0x01,
326 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
327 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
328 },
330 { /* UOP_TYPE */
331 0x02, 0x02,
332 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
333 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
334 },
336 { /* RETIRED_MISPRED_BRANCH_TYPE */
337 0x02, 0x05,
338 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
339 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
340 },
342 { /* RETIRED_BRANCH_TYPE */
343 0x02, 0x04,
344 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
345 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
346 }
347 };
350 #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
352 #define ESCR_RESERVED_BITS 0x80000003
353 #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
354 #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
355 #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
356 #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
357 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
358 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
359 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
360 #define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
361 #define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
363 #define CCCR_RESERVED_BITS 0x38030FFF
364 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
365 #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
366 #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
367 #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
368 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
369 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
370 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
371 #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
372 #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
373 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
374 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
376 #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
377 #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
378 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
381 /* this assigns a "stagger" to the current CPU, which is used throughout
382 the code in this module as an extra array offset, to select the "even"
383 or "odd" part of all the divided resources. */
384 static unsigned int get_stagger(void)
385 {
386 #ifdef CONFIG_SMP
387 int cpu = smp_processor_id();
388 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
389 #endif
390 return 0;
391 }
394 /* finally, mediate access to a real hardware counter
395 by passing a "virtual" counter numer to this macro,
396 along with your stagger setting. */
397 #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
399 static unsigned long reset_value[NUM_COUNTERS_NON_HT];
402 static void p4_fill_in_addresses(struct op_msrs * const msrs)
403 {
404 unsigned int i;
405 unsigned int addr, stag;
407 setup_num_counters();
408 stag = get_stagger();
410 /* the counter registers we pay attention to */
411 for (i = 0; i < num_counters; ++i) {
412 msrs->counters[i].addr =
413 p4_counters[VIRT_CTR(stag, i)].counter_address;
414 }
416 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
418 /* 18 CCCR registers */
419 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
420 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
421 msrs->controls[i].addr = addr;
422 }
424 /* 43 ESCR registers in three or four discontiguous group */
425 for (addr = MSR_P4_BSU_ESCR0 + stag;
426 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
427 msrs->controls[i].addr = addr;
428 }
430 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
431 * to avoid special case in nmi_{save|restore}_registers() */
432 if (boot_cpu_data.x86_model >= 0x3) {
433 for (addr = MSR_P4_BSU_ESCR0 + stag;
434 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
435 msrs->controls[i].addr = addr;
436 }
437 } else {
438 for (addr = MSR_P4_IQ_ESCR0 + stag;
439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
440 msrs->controls[i].addr = addr;
441 }
442 }
444 for (addr = MSR_P4_RAT_ESCR0 + stag;
445 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
446 msrs->controls[i].addr = addr;
447 }
449 for (addr = MSR_P4_MS_ESCR0 + stag;
450 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
451 msrs->controls[i].addr = addr;
452 }
454 for (addr = MSR_P4_IX_ESCR0 + stag;
455 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
456 msrs->controls[i].addr = addr;
457 }
459 /* there are 2 remaining non-contiguously located ESCRs */
461 if (num_counters == NUM_COUNTERS_NON_HT) {
462 /* standard non-HT CPUs handle both remaining ESCRs*/
463 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
464 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
466 } else if (stag == 0) {
467 /* HT CPUs give the first remainder to the even thread, as
468 the 32nd control register */
469 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
471 } else {
472 /* and two copies of the second to the odd thread,
473 for the 22st and 23nd control registers */
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
475 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
476 }
477 }
480 static void pmc_setup_one_p4_counter(unsigned int ctr)
481 {
482 int i;
483 int const maxbind = 2;
484 unsigned int cccr = 0;
485 unsigned int escr = 0;
486 unsigned int high = 0;
487 unsigned int counter_bit;
488 struct p4_event_binding *ev = NULL;
489 unsigned int stag;
491 stag = get_stagger();
493 /* convert from counter *number* to counter *bit* */
494 counter_bit = 1 << VIRT_CTR(stag, ctr);
496 /* find our event binding structure. */
497 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
498 printk(KERN_ERR
499 "oprofile: P4 event code 0x%lx out of range\n",
500 counter_config[ctr].event);
501 return;
502 }
504 ev = &(p4_events[counter_config[ctr].event - 1]);
506 for (i = 0; i < maxbind; i++) {
507 if (ev->bindings[i].virt_counter & counter_bit) {
509 /* modify ESCR */
510 ESCR_READ(escr, high, ev, i);
511 ESCR_CLEAR(escr);
512 if (stag == 0) {
513 ESCR_SET_USR_0(escr, counter_config[ctr].user);
514 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
515 } else {
516 ESCR_SET_USR_1(escr, counter_config[ctr].user);
517 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
518 }
519 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
520 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
521 ESCR_WRITE(escr, high, ev, i);
523 /* modify CCCR */
524 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
525 CCCR_CLEAR(cccr);
526 CCCR_SET_REQUIRED_BITS(cccr);
527 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
528 if (stag == 0) {
529 CCCR_SET_PMI_OVF_0(cccr);
530 } else {
531 CCCR_SET_PMI_OVF_1(cccr);
532 }
533 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
534 return;
535 }
536 }
538 printk(KERN_ERR
539 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
540 counter_config[ctr].event, stag, ctr);
541 }
544 static void p4_setup_ctrs(struct op_msrs const * const msrs)
545 {
546 unsigned int i;
547 unsigned int low, high;
548 unsigned int addr;
549 unsigned int stag;
551 stag = get_stagger();
553 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
554 if (! MISC_PMC_ENABLED_P(low)) {
555 printk(KERN_ERR "oprofile: P4 PMC not available\n");
556 return;
557 }
559 /* clear the cccrs we will use */
560 for (i = 0 ; i < num_counters ; i++) {
561 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
562 CCCR_CLEAR(low);
563 CCCR_SET_REQUIRED_BITS(low);
564 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
565 }
567 /* clear cccrs outside our concern */
568 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
569 rdmsr(p4_unused_cccr[i], low, high);
570 CCCR_CLEAR(low);
571 CCCR_SET_REQUIRED_BITS(low);
572 wrmsr(p4_unused_cccr[i], low, high);
573 }
575 /* clear all escrs (including those outside our concern) */
576 for (addr = MSR_P4_BSU_ESCR0 + stag;
577 addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
578 wrmsr(addr, 0, 0);
579 }
581 /* On older models clear also MSR_P4_IQ_ESCR0/1 */
582 if (boot_cpu_data.x86_model < 0x3) {
583 wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
584 wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
585 }
587 for (addr = MSR_P4_RAT_ESCR0 + stag;
588 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
589 wrmsr(addr, 0, 0);
590 }
592 for (addr = MSR_P4_MS_ESCR0 + stag;
593 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
594 wrmsr(addr, 0, 0);
595 }
597 for (addr = MSR_P4_IX_ESCR0 + stag;
598 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
599 wrmsr(addr, 0, 0);
600 }
602 if (num_counters == NUM_COUNTERS_NON_HT) {
603 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
604 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
605 } else if (stag == 0) {
606 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
607 } else {
608 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
609 }
611 /* setup all counters */
612 for (i = 0 ; i < num_counters ; ++i) {
613 if (counter_config[i].enabled) {
614 reset_value[i] = counter_config[i].count;
615 pmc_setup_one_p4_counter(i);
616 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
617 } else {
618 reset_value[i] = 0;
619 }
620 }
621 }
623 extern void xenoprof_log_event(struct vcpu *v, struct cpu_user_regs * regs,
624 unsigned long eip, int mode, int event);
625 extern int xenoprofile_get_mode(struct vcpu *v,
626 struct cpu_user_regs * const regs);
628 static int p4_check_ctrs(unsigned int const cpu,
629 struct op_msrs const * const msrs,
630 struct cpu_user_regs * const regs)
631 {
632 unsigned long ctr, low, high, stag, real;
633 int i;
634 int ovf = 0;
635 unsigned long eip = regs->eip;
636 int mode = xenoprofile_get_mode(current, regs);
638 stag = get_stagger();
640 for (i = 0; i < num_counters; ++i) {
642 if (!reset_value[i])
643 continue;
645 /*
646 * there is some eccentricity in the hardware which
647 * requires that we perform 2 extra corrections:
648 *
649 * - check both the CCCR:OVF flag for overflow and the
650 * counter high bit for un-flagged overflows.
651 *
652 * - write the counter back twice to ensure it gets
653 * updated properly.
654 *
655 * the former seems to be related to extra NMIs happening
656 * during the current NMI; the latter is reported as errata
657 * N15 in intel doc 249199-029, pentium 4 specification
658 * update, though their suggested work-around does not
659 * appear to solve the problem.
660 */
662 real = VIRT_CTR(stag, i);
664 CCCR_READ(low, high, real);
665 CTR_READ(ctr, high, real);
666 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
667 xenoprof_log_event(current, regs, eip, mode, i);
668 CTR_WRITE(reset_value[i], real);
669 CCCR_CLEAR_OVF(low);
670 CCCR_WRITE(low, high, real);
671 CTR_WRITE(reset_value[i], real);
672 ovf = 1;
673 }
674 }
676 /* P4 quirk: you have to re-unmask the apic vector */
677 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
679 return ovf;
680 }
683 static void p4_start(struct op_msrs const * const msrs)
684 {
685 unsigned int low, high, stag;
686 int i;
688 stag = get_stagger();
690 for (i = 0; i < num_counters; ++i) {
691 if (!reset_value[i])
692 continue;
693 CCCR_READ(low, high, VIRT_CTR(stag, i));
694 CCCR_SET_ENABLE(low);
695 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
696 }
697 }
700 static void p4_stop(struct op_msrs const * const msrs)
701 {
702 unsigned int low, high, stag;
703 int i;
705 stag = get_stagger();
707 for (i = 0; i < num_counters; ++i) {
708 CCCR_READ(low, high, VIRT_CTR(stag, i));
709 CCCR_SET_DISABLE(low);
710 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
711 }
712 }
715 #ifdef CONFIG_SMP
716 struct op_x86_model_spec const op_p4_ht2_spec = {
717 .num_counters = NUM_COUNTERS_HT2,
718 .num_controls = NUM_CONTROLS_HT2,
719 .fill_in_addresses = &p4_fill_in_addresses,
720 .setup_ctrs = &p4_setup_ctrs,
721 .check_ctrs = &p4_check_ctrs,
722 .start = &p4_start,
723 .stop = &p4_stop
724 };
725 #endif
727 struct op_x86_model_spec const op_p4_spec = {
728 .num_counters = NUM_COUNTERS_NON_HT,
729 .num_controls = NUM_CONTROLS_NON_HT,
730 .fill_in_addresses = &p4_fill_in_addresses,
731 .setup_ctrs = &p4_setup_ctrs,
732 .check_ctrs = &p4_check_ctrs,
733 .start = &p4_start,
734 .stop = &p4_stop
735 };