Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/oprofile/op_model_p4.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * @file op_model_p4.c
3
 * P4 model-specific MSR operations
4
 *
5
 * @remark Copyright 2002 OProfile authors
6
 * @remark Read the file COPYING
7
 *
8
 * @author Graydon Hoare
9
 */
10
11
#include <xen/types.h>
12
#include <asm/msr.h>
13
#include <asm/io.h>
14
#include <asm/apic.h>
15
#include <asm/processor.h>
16
#include <xen/xenoprof.h>
17
#include <asm/regs.h>
18
#include <asm/current.h>
19
20
#include "op_x86_model.h"
21
#include "op_counter.h"
22
23
0
#define NUM_EVENTS 39
24
25
0
#define NUM_COUNTERS_NON_HT 8
26
#define NUM_ESCRS_NON_HT 45
27
0
#define NUM_CCCRS_NON_HT 18
28
#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30
0
#define NUM_COUNTERS_HT2 4
31
#define NUM_ESCRS_HT2 23
32
#define NUM_CCCRS_HT2 9
33
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
35
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
36
37
38
/* this has to be checked dynamically since the
39
   hyper-threadedness of a chip is discovered at
40
   kernel boot-time. */
41
static inline void setup_num_counters(void)
42
0
{
43
0
  if (boot_cpu_data.x86_num_siblings == 2)   /* XXX */
44
0
    num_counters = NUM_COUNTERS_HT2;
45
0
}
46
47
static int inline addr_increment(void)
48
0
{
49
0
  return boot_cpu_data.x86_num_siblings == 2 ? 2 : 1;
50
0
}
51
52
53
/* tables to simulate simplified hardware view of p4 registers */
54
struct p4_counter_binding {
55
  int virt_counter;
56
  int counter_address;
57
  int cccr_address;
58
};
59
60
struct p4_event_binding {
61
  int escr_select;  /* value to put in CCCR */
62
  int event_select; /* value to put in ESCR */
63
  struct {
64
    int virt_counter; /* for this counter... */
65
    int escr_address; /* use this ESCR       */
66
  } bindings[2];
67
};
68
69
/* nb: these CTR_* defines are a duplicate of defines in
70
   event/i386.p4*events. */
71
72
73
#define CTR_BPU_0      (1 << 0)
74
#define CTR_MS_0       (1 << 1)
75
#define CTR_FLAME_0    (1 << 2)
76
#define CTR_IQ_4       (1 << 3)
77
#define CTR_BPU_2      (1 << 4)
78
#define CTR_MS_2       (1 << 5)
79
#define CTR_FLAME_2    (1 << 6)
80
#define CTR_IQ_5       (1 << 7)
81
82
static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
83
  { CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
84
  { CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
85
  { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
86
  { CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
87
  { CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
88
  { CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
89
  { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
90
  { CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
91
};
92
93
0
#define NUM_UNUSED_CCCRS  NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
94
95
/* All cccr we don't use. */
96
static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
97
  MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
98
  MSR_P4_MS_CCCR1,  MSR_P4_MS_CCCR3,
99
  MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
100
  MSR_P4_IQ_CCCR0,  MSR_P4_IQ_CCCR1,
101
  MSR_P4_IQ_CCCR2,  MSR_P4_IQ_CCCR3
102
};
103
104
/* p4 event codes in libop/op_event.h are indices into this table. */
105
106
static const struct p4_event_binding p4_events[NUM_EVENTS] = {
107
  
108
  { /* BRANCH_RETIRED */
109
    0x05, 0x06, 
110
    { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
111
      {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
112
  },
113
  
114
  { /* MISPRED_BRANCH_RETIRED */
115
    0x04, 0x03, 
116
    { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
117
      { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
118
  },
119
  
120
  { /* TC_DELIVER_MODE */
121
    0x01, 0x01,
122
    { { CTR_MS_0, MSR_P4_TC_ESCR0},  
123
      { CTR_MS_2, MSR_P4_TC_ESCR1} }
124
  },
125
  
126
  { /* BPU_FETCH_REQUEST */
127
    0x00, 0x03, 
128
    { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
129
      { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
130
  },
131
132
  { /* ITLB_REFERENCE */
133
    0x03, 0x18,
134
    { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
135
      { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
136
  },
137
138
  { /* MEMORY_CANCEL */
139
    0x05, 0x02,
140
    { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
141
      { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
142
  },
143
144
  { /* MEMORY_COMPLETE */
145
    0x02, 0x08,
146
    { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
147
      { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
148
  },
149
150
  { /* LOAD_PORT_REPLAY */
151
    0x02, 0x04, 
152
    { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
153
      { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
154
  },
155
156
  { /* STORE_PORT_REPLAY */
157
    0x02, 0x05,
158
    { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
159
      { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
160
  },
161
162
  { /* MOB_LOAD_REPLAY */
163
    0x02, 0x03,
164
    { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
165
      { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
166
  },
167
168
  { /* PAGE_WALK_TYPE */
169
    0x04, 0x01,
170
    { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
171
      { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
172
  },
173
174
  { /* BSQ_CACHE_REFERENCE */
175
    0x07, 0x0c, 
176
    { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
177
      { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
178
  },
179
180
  { /* IOQ_ALLOCATION */
181
    0x06, 0x03, 
182
    { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
183
      { 0, 0 } }
184
  },
185
186
  { /* IOQ_ACTIVE_ENTRIES */
187
    0x06, 0x1a, 
188
    { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
189
      { 0, 0 } }
190
  },
191
192
  { /* FSB_DATA_ACTIVITY */
193
    0x06, 0x17, 
194
    { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
195
      { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
196
  },
197
198
  { /* BSQ_ALLOCATION */
199
    0x07, 0x05, 
200
    { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
201
      { 0, 0 } }
202
  },
203
204
  { /* BSQ_ACTIVE_ENTRIES */
205
    0x07, 0x06,
206
    { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
207
      { 0, 0 } }
208
  },
209
210
  { /* X87_ASSIST */
211
    0x05, 0x03, 
212
    { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
213
      { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
214
  },
215
216
  { /* SSE_INPUT_ASSIST */
217
    0x01, 0x34,
218
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
219
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
220
  },
221
  
222
  { /* PACKED_SP_UOP */
223
    0x01, 0x08, 
224
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
225
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
226
  },
227
  
228
  { /* PACKED_DP_UOP */
229
    0x01, 0x0c, 
230
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
231
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
232
  },
233
234
  { /* SCALAR_SP_UOP */
235
    0x01, 0x0a, 
236
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
237
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
238
  },
239
240
  { /* SCALAR_DP_UOP */
241
    0x01, 0x0e,
242
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
243
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
244
  },
245
246
  { /* 64BIT_MMX_UOP */
247
    0x01, 0x02, 
248
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
249
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
250
  },
251
  
252
  { /* 128BIT_MMX_UOP */
253
    0x01, 0x1a, 
254
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
255
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
256
  },
257
258
  { /* X87_FP_UOP */
259
    0x01, 0x04, 
260
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
261
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
262
  },
263
  
264
  { /* X87_SIMD_MOVES_UOP */
265
    0x01, 0x2e, 
266
    { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
267
      { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
268
  },
269
  
270
  { /* MACHINE_CLEAR */
271
    0x05, 0x02, 
272
    { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
273
      { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
274
  },
275
276
  { /* GLOBAL_POWER_EVENTS */
277
    0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
278
    { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
279
      { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
280
  },
281
  
282
  { /* TC_MS_XFER */
283
    0x00, 0x05, 
284
    { { CTR_MS_0, MSR_P4_MS_ESCR0},
285
      { CTR_MS_2, MSR_P4_MS_ESCR1} }
286
  },
287
288
  { /* UOP_QUEUE_WRITES */
289
    0x00, 0x09,
290
    { { CTR_MS_0, MSR_P4_MS_ESCR0},
291
      { CTR_MS_2, MSR_P4_MS_ESCR1} }
292
  },
293
294
  { /* FRONT_END_EVENT */
295
    0x05, 0x08,
296
    { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
297
      { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
298
  },
299
300
  { /* EXECUTION_EVENT */
301
    0x05, 0x0c,
302
    { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
303
      { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
304
  },
305
306
  { /* REPLAY_EVENT */
307
    0x05, 0x09,
308
    { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
309
      { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
310
  },
311
312
  { /* INSTR_RETIRED */
313
    0x04, 0x02, 
314
    { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
315
      { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
316
  },
317
318
  { /* UOPS_RETIRED */
319
    0x04, 0x01,
320
    { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
321
      { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
322
  },
323
324
  { /* UOP_TYPE */    
325
    0x02, 0x02, 
326
    { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
327
      { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
328
  },
329
330
  { /* RETIRED_MISPRED_BRANCH_TYPE */
331
    0x02, 0x05, 
332
    { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
333
      { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
334
  },
335
336
  { /* RETIRED_BRANCH_TYPE */
337
    0x02, 0x04,
338
    { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
339
      { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
340
  }
341
};
342
343
344
0
#define MISC_PMC_ENABLED_P(x) ((x) & 1ULL << 7)
345
346
0
#define ESCR_RESERVED_BITS 0x80000003ULL
347
0
#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
348
0
#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1ULL) << 2))
349
0
#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1ULL) << 3))
350
0
#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1ULL)))
351
0
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1ULL) << 1))
352
0
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3fULL) << 25))
353
0
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffffULL) << 9))
354
0
#define ESCR_READ(escr,ev,i) do {rdmsrl(ev->bindings[(i)].escr_address, (escr));} while (0)
355
0
#define ESCR_WRITE(escr,ev,i) do {wrmsrl(ev->bindings[(i)].escr_address, (escr));} while (0)
356
357
0
#define CCCR_RESERVED_BITS 0x38030FFFULL
358
0
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
359
0
#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000ULL)
360
0
#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07ULL) << 13))
361
0
#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1ULL<<26))
362
0
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1ULL<<27))
363
0
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1ULL<<12))
364
0
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1ULL<<12))
365
0
#define CCCR_READ(msr_content, i) do {rdmsrl(p4_counters[(i)].cccr_address, (msr_content));} while (0)
366
0
#define CCCR_WRITE(msr_content, i) do {wrmsrl(p4_counters[(i)].cccr_address, (msr_content));} while (0)
367
0
#define CCCR_OVF_P(cccr) ((cccr) & (1ULL<<31))
368
0
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1ULL<<31)))
369
370
0
#define CTR_READ(msr_content,i) do {rdmsrl(p4_counters[(i)].counter_address, (msr_content));} while (0)
371
0
#define CTR_WRITE(msr_content,i) do {wrmsrl(p4_counters[(i)].counter_address, -(msr_content));} while (0)
372
0
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000ULL))
373
374
375
/* this assigns a "stagger" to the current CPU, which is used throughout
376
   the code in this module as an extra array offset, to select the "even"
377
   or "odd" part of all the divided resources. */
378
static unsigned int get_stagger(void)
379
0
{
380
0
  int cpu = smp_processor_id();
381
0
  return (cpu != cpumask_first(per_cpu(cpu_sibling_mask, cpu)));
382
0
}
383
384
385
/* finally, mediate access to a real hardware counter
386
   by passing a "virtual" counter numer to this macro,
387
   along with your stagger setting. */
388
0
#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
389
390
static unsigned long reset_value[NUM_COUNTERS_NON_HT];
391
392
393
static void p4_fill_in_addresses(struct op_msrs * const msrs)
394
0
{
395
0
  unsigned int i;
396
0
  unsigned int addr, stag;
397
0
398
0
  setup_num_counters();
399
0
  stag = get_stagger();
400
0
401
0
  /* the counter registers we pay attention to */
402
0
  for (i = 0; i < num_counters; ++i) {
403
0
    msrs->counters[i].addr = 
404
0
      p4_counters[VIRT_CTR(stag, i)].counter_address;
405
0
  }
406
0
407
0
  /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
408
0
409
0
  /* 18 CCCR registers */
410
0
  for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
411
0
       addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
412
0
    msrs->controls[i].addr = addr;
413
0
  }
414
0
  
415
0
  /* 43 ESCR registers in three or four discontiguous group */
416
0
  for (addr = MSR_P4_BSU_ESCR0 + stag;
417
0
       addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
418
0
    msrs->controls[i].addr = addr;
419
0
  }
420
0
421
0
  /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
422
0
   * to avoid special case in nmi_{save|restore}_registers() */
423
0
  if (boot_cpu_data.x86_model >= 0x3) {
424
0
    for (addr = MSR_P4_BSU_ESCR0 + stag;
425
0
         addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
426
0
      msrs->controls[i].addr = addr;
427
0
    }
428
0
  } else {
429
0
    for (addr = MSR_P4_IQ_ESCR0 + stag;
430
0
         addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
431
0
      msrs->controls[i].addr = addr;
432
0
    }
433
0
  }
434
0
435
0
  for (addr = MSR_P4_RAT_ESCR0 + stag;
436
0
       addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
437
0
    msrs->controls[i].addr = addr;
438
0
  }
439
0
  
440
0
  for (addr = MSR_P4_MS_ESCR0 + stag;
441
0
       addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
442
0
    msrs->controls[i].addr = addr;
443
0
  }
444
0
  
445
0
  for (addr = MSR_P4_IX_ESCR0 + stag;
446
0
       addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
447
0
    msrs->controls[i].addr = addr;
448
0
  }
449
0
450
0
  /* there are 2 remaining non-contiguously located ESCRs */
451
0
452
0
  if (num_counters == NUM_COUNTERS_NON_HT) {   
453
0
    /* standard non-HT CPUs handle both remaining ESCRs*/
454
0
    msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
455
0
    msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
456
0
457
0
  } else if (stag == 0) {
458
0
    /* HT CPUs give the first remainder to the even thread, as
459
0
       the 32nd control register */
460
0
    msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
461
0
462
0
  } else {
463
0
    /* and two copies of the second to the odd thread,
464
0
       for the 22st and 23nd control registers */
465
0
    msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
466
0
    msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
467
0
  }
468
0
}
469
470
471
static void pmc_setup_one_p4_counter(unsigned int ctr)
472
0
{
473
0
  int i;
474
0
  int const maxbind = 2;
475
0
  uint64_t cccr = 0;
476
0
  uint64_t escr = 0;
477
0
  unsigned int counter_bit;
478
0
  const struct p4_event_binding *ev = NULL;
479
0
  unsigned int stag;
480
0
481
0
  stag = get_stagger();
482
0
  
483
0
  /* convert from counter *number* to counter *bit* */
484
0
  counter_bit = 1 << VIRT_CTR(stag, ctr);
485
0
  
486
0
  /* find our event binding structure. */
487
0
  if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
488
0
    printk(KERN_ERR "oprofile: P4 event code %#lx out of range\n",
489
0
           counter_config[ctr].event);
490
0
    return;
491
0
  }
492
0
  
493
0
  ev = &(p4_events[counter_config[ctr].event - 1]);
494
0
  
495
0
  for (i = 0; i < maxbind; i++) {
496
0
    if (ev->bindings[i].virt_counter & counter_bit) {
497
0
498
0
      /* modify ESCR */
499
0
      ESCR_READ(escr, ev, i);
500
0
      ESCR_CLEAR(escr);
501
0
      if (stag == 0) {
502
0
        ESCR_SET_USR_0(escr, counter_config[ctr].user);
503
0
        ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
504
0
      } else {
505
0
        ESCR_SET_USR_1(escr, counter_config[ctr].user);
506
0
        ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
507
0
      }
508
0
      ESCR_SET_EVENT_SELECT(escr, ev->event_select);
509
0
      ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);      
510
0
      ESCR_WRITE(escr, ev, i);
511
0
           
512
0
      /* modify CCCR */
513
0
      CCCR_READ(cccr, VIRT_CTR(stag, ctr));
514
0
      CCCR_CLEAR(cccr);
515
0
      CCCR_SET_REQUIRED_BITS(cccr);
516
0
      CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
517
0
      if (stag == 0) {
518
0
        CCCR_SET_PMI_OVF_0(cccr);
519
0
      } else {
520
0
        CCCR_SET_PMI_OVF_1(cccr);
521
0
      }
522
0
      CCCR_WRITE(cccr, VIRT_CTR(stag, ctr));
523
0
      return;
524
0
    }
525
0
  }
526
0
527
0
  printk(KERN_ERR 
528
0
         "oprofile: P4 event code %#lx no binding, stag %d ctr %d\n",
529
0
         counter_config[ctr].event, stag, ctr);
530
0
}
531
532
533
static void p4_setup_ctrs(struct op_msrs const * const msrs)
534
0
{
535
0
  unsigned int i;
536
0
  uint64_t msr_content;
537
0
  unsigned int addr;
538
0
  unsigned int stag;
539
0
540
0
  stag = get_stagger();
541
0
542
0
  rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
543
0
  if (! MISC_PMC_ENABLED_P(msr_content)) {
544
0
    printk(KERN_ERR "oprofile: P4 PMC not available\n");
545
0
    return;
546
0
  }
547
0
548
0
  /* clear the cccrs we will use */
549
0
  for (i = 0 ; i < num_counters ; i++) {
550
0
    rdmsrl(p4_counters[VIRT_CTR(stag, i)].cccr_address, msr_content);
551
0
    CCCR_CLEAR(msr_content);
552
0
    CCCR_SET_REQUIRED_BITS(msr_content);
553
0
    wrmsrl(p4_counters[VIRT_CTR(stag, i)].cccr_address, msr_content);
554
0
  }
555
0
556
0
  /* clear cccrs outside our concern */
557
0
  for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
558
0
    rdmsrl(p4_unused_cccr[i], msr_content);
559
0
    CCCR_CLEAR(msr_content);
560
0
    CCCR_SET_REQUIRED_BITS(msr_content);
561
0
    wrmsrl(p4_unused_cccr[i], msr_content);
562
0
  }
563
0
564
0
  /* clear all escrs (including those outside our concern) */
565
0
  for (addr = MSR_P4_BSU_ESCR0 + stag;
566
0
       addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
567
0
    wrmsrl(addr, 0x0ULL);
568
0
  }
569
0
570
0
  /* On older models clear also MSR_P4_IQ_ESCR0/1 */
571
0
  if (boot_cpu_data.x86_model < 0x3) {
572
0
    wrmsrl(MSR_P4_IQ_ESCR0, 0x0ULL);
573
0
    wrmsrl(MSR_P4_IQ_ESCR1, 0x0ULL);
574
0
  }
575
0
576
0
  for (addr = MSR_P4_RAT_ESCR0 + stag;
577
0
       addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
578
0
    wrmsrl(addr, 0x0ULL);
579
0
  }
580
0
  
581
0
  for (addr = MSR_P4_MS_ESCR0 + stag;
582
0
       addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
583
0
    wrmsrl(addr, 0x0ULL);
584
0
  }
585
0
  
586
0
  for (addr = MSR_P4_IX_ESCR0 + stag;
587
0
       addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
588
0
    wrmsrl(addr, 0x0ULL);
589
0
  }
590
0
591
0
  if (num_counters == NUM_COUNTERS_NON_HT) {   
592
0
    wrmsrl(MSR_P4_CRU_ESCR4, 0x0ULL);
593
0
    wrmsrl(MSR_P4_CRU_ESCR5, 0x0ULL);
594
0
  } else if (stag == 0) {
595
0
    wrmsrl(MSR_P4_CRU_ESCR4, 0x0ULL);
596
0
  } else {
597
0
    wrmsrl(MSR_P4_CRU_ESCR5, 0x0ULL);
598
0
  }    
599
0
  
600
0
  /* setup all counters */
601
0
  for (i = 0 ; i < num_counters ; ++i) {
602
0
    if (counter_config[i].enabled) {
603
0
      reset_value[i] = counter_config[i].count;
604
0
      pmc_setup_one_p4_counter(i);
605
0
      CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
606
0
    } else {
607
0
      reset_value[i] = 0;
608
0
    }
609
0
  }
610
0
}
611
612
static int p4_check_ctrs(unsigned int const cpu,
613
                         struct op_msrs const * const msrs,
614
                         struct cpu_user_regs const * const regs)
615
0
{
616
0
  unsigned long ctr, stag, real;
617
0
  uint64_t msr_content;
618
0
  int i;
619
0
  int ovf = 0;
620
0
  unsigned long eip = regs->rip;
621
0
  int mode = xenoprofile_get_mode(current, regs);
622
0
623
0
  stag = get_stagger();
624
0
625
0
  for (i = 0; i < num_counters; ++i) {
626
0
    
627
0
    if (!reset_value[i]) 
628
0
      continue;
629
0
630
0
    /* 
631
0
     * there is some eccentricity in the hardware which
632
0
     * requires that we perform 2 extra corrections:
633
0
     *
634
0
     * - check both the CCCR:OVF flag for overflow and the
635
0
     *   counter high bit for un-flagged overflows.
636
0
     *
637
0
     * - write the counter back twice to ensure it gets
638
0
     *   updated properly.
639
0
     * 
640
0
     * the former seems to be related to extra NMIs happening
641
0
     * during the current NMI; the latter is reported as errata
642
0
     * N15 in intel doc 249199-029, pentium 4 specification
643
0
     * update, though their suggested work-around does not
644
0
     * appear to solve the problem.
645
0
     */
646
0
    
647
0
    real = VIRT_CTR(stag, i);
648
0
649
0
    CCCR_READ(msr_content, real);
650
0
    CTR_READ(ctr, real);
651
0
    if (CCCR_OVF_P(msr_content) || CTR_OVERFLOW_P(ctr)) {
652
0
      xenoprof_log_event(current, regs, eip, mode, i);
653
0
      CTR_WRITE(reset_value[i], real);
654
0
      CCCR_CLEAR_OVF(msr_content);
655
0
      CCCR_WRITE(msr_content, real);
656
0
      CTR_WRITE(reset_value[i], real);
657
0
      ovf = 1;
658
0
    }
659
0
  }
660
0
661
0
  /* P4 quirk: you have to re-unmask the apic vector */
662
0
  apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
663
0
664
0
  return ovf;
665
0
}
666
667
668
static void p4_start(struct op_msrs const * const msrs)
669
0
{
670
0
  unsigned int stag;
671
0
  uint64_t msr_content;
672
0
  int i;
673
0
674
0
  stag = get_stagger();
675
0
676
0
  for (i = 0; i < num_counters; ++i) {
677
0
    if (!reset_value[i])
678
0
      continue;
679
0
    CCCR_READ(msr_content, VIRT_CTR(stag, i));
680
0
    CCCR_SET_ENABLE(msr_content);
681
0
    CCCR_WRITE(msr_content, VIRT_CTR(stag, i));
682
0
  }
683
0
}
684
685
686
static void p4_stop(struct op_msrs const * const msrs)
687
0
{
688
0
  unsigned int stag;
689
0
  uint64_t msr_content;
690
0
  int i;
691
0
692
0
  stag = get_stagger();
693
0
694
0
  for (i = 0; i < num_counters; ++i) {
695
0
    CCCR_READ(msr_content, VIRT_CTR(stag, i));
696
0
    CCCR_SET_DISABLE(msr_content);
697
0
    CCCR_WRITE(msr_content, VIRT_CTR(stag, i));
698
0
  }
699
0
}
700
701
702
struct op_x86_model_spec const op_p4_ht2_spec = {
703
  .num_counters = NUM_COUNTERS_HT2,
704
  .num_controls = NUM_CONTROLS_HT2,
705
  .fill_in_addresses = &p4_fill_in_addresses,
706
  .setup_ctrs = &p4_setup_ctrs,
707
  .check_ctrs = &p4_check_ctrs,
708
  .start = &p4_start,
709
  .stop = &p4_stop
710
};
711
712
713
struct op_x86_model_spec const op_p4_spec = {
714
  .num_counters = NUM_COUNTERS_NON_HT,
715
  .num_controls = NUM_CONTROLS_NON_HT,
716
  .fill_in_addresses = &p4_fill_in_addresses,
717
  .setup_ctrs = &p4_setup_ctrs,
718
  .check_ctrs = &p4_check_ctrs,
719
  .start = &p4_start,
720
  .stop = &p4_stop
721
};