Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/cpu/mwait-idle.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * mwait_idle.c - native hardware idle loop for modern processors
3
 *
4
 * Copyright (c) 2013, Intel Corporation.
5
 * Len Brown <len.brown@intel.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify it
8
 * under the terms and conditions of the GNU General Public License,
9
 * version 2, as published by the Free Software Foundation.
10
 *
11
 * This program is distributed in the hope it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
 * more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along with
17
 * this program; If not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
/*
21
 * mwait_idle is a cpuidle driver that loads on specific processors
22
 * in lieu of the legacy ACPI processor_idle driver.  The intent is to
23
 * make Linux more efficient on these processors, as mwait_idle knows
24
 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
25
 */
26
27
/*
28
 * Design Assumptions
29
 *
30
 * All CPUs have same idle states as boot CPU
31
 *
32
 * Chipset BM_STS (bus master status) bit is a NOP
33
 *  for preventing entry into deep C-states
34
 */
35
36
/*
37
 * Known limitations
38
 *
39
 * The driver currently initializes for_each_online_cpu() upon load.
40
 * It it unaware of subsequent processors hot-added to the system.
41
 * This means that if you boot with maxcpus=n and later online
42
 * processors above n, those processors will use C1 only.
43
 *
44
 * ACPI has a .suspend hack to turn off deep C-states during suspend
45
 * to avoid complications with the lapic timer workaround.
46
 * Have not seen issues with suspend, but may need same workaround here.
47
 */
48
49
/* un-comment DEBUG to enable pr_debug() statements */
50
#define DEBUG
51
52
#include <xen/lib.h>
53
#include <xen/cpu.h>
54
#include <xen/init.h>
55
#include <xen/softirq.h>
56
#include <xen/trace.h>
57
#include <asm/cpuidle.h>
58
#include <asm/hpet.h>
59
#include <asm/mwait.h>
60
#include <asm/msr.h>
61
#include <acpi/cpufreq/cpufreq.h>
62
63
#define MWAIT_IDLE_VERSION "0.4.1"
64
#undef PREFIX
65
0
#define PREFIX "mwait-idle: "
66
67
#ifdef DEBUG
68
3
# define pr_debug(fmt...) printk(KERN_DEBUG fmt)
69
#else
70
# define pr_debug(fmt...)
71
#endif
72
73
static __initdata bool_t opt_mwait_idle = 1;
74
boolean_param("mwait-idle", opt_mwait_idle);
75
76
static unsigned int mwait_substates;
77
78
1
#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
79
/* Reliable LAPIC Timer States, bit 1 for C1 etc. Default to only C1. */
80
static unsigned int lapic_timer_reliable_states = (1 << 1);
81
82
struct idle_cpu {
83
  const struct cpuidle_state *state_table;
84
85
  /*
86
   * Hardware C-state auto-demotion may not always be optimal.
87
   * Indicate which enable bits to clear here.
88
   */
89
  unsigned long auto_demotion_disable_flags;
90
  bool_t byt_auto_demotion_disable_flag;
91
  bool_t disable_promotion_to_c1e;
92
};
93
94
static const struct idle_cpu *icpu;
95
96
static const struct cpuidle_state {
97
  char    name[16];
98
  unsigned int  flags;
99
  unsigned int  exit_latency; /* in US */
100
  unsigned int  target_residency; /* in US */
101
} *cpuidle_state_table;
102
103
48
#define CPUIDLE_FLAG_DISABLED   0x1
104
/*
105
 * Set this flag for states where the HW flushes the TLB for us
106
 * and so we don't need cross-calls to keep it consistent.
107
 * If this flag is set, SW flushes the TLB, so even if the
108
 * HW doesn't do the flushing, this flag is safe to use.
109
 */
110
#define CPUIDLE_FLAG_TLB_FLUSHED  0x10000
111
112
/*
113
 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
114
 * the C-state (top nibble) and sub-state (bottom nibble)
115
 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
116
 *
117
 * We store the hint at the top of our "flags" for each state.
118
 */
119
48
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
120
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
121
48
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
122
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
123
124
/*
125
 * States are indexed by the cstate number,
126
 * which is also the index into the MWAIT hint array.
127
 * Thus C0 is a dummy.
128
 */
129
static const struct cpuidle_state nehalem_cstates[] = {
130
  {
131
    .name = "C1-NHM",
132
    .flags = MWAIT2flg(0x00),
133
    .exit_latency = 3,
134
    .target_residency = 6,
135
  },
136
  {
137
    .name = "C1E-NHM",
138
    .flags = MWAIT2flg(0x01),
139
    .exit_latency = 10,
140
    .target_residency = 20,
141
  },
142
  {
143
    .name = "C3-NHM",
144
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
145
    .exit_latency = 20,
146
    .target_residency = 80,
147
  },
148
  {
149
    .name = "C6-NHM",
150
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
151
    .exit_latency = 200,
152
    .target_residency = 800,
153
  },
154
  {}
155
};
156
157
static const struct cpuidle_state snb_cstates[] = {
158
  {
159
    .name = "C1-SNB",
160
    .flags = MWAIT2flg(0x00),
161
    .exit_latency = 2,
162
    .target_residency = 2,
163
  },
164
  {
165
    .name = "C1E-SNB",
166
    .flags = MWAIT2flg(0x01),
167
    .exit_latency = 10,
168
    .target_residency = 20,
169
  },
170
  {
171
    .name = "C3-SNB",
172
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
173
    .exit_latency = 80,
174
    .target_residency = 211,
175
  },
176
  {
177
    .name = "C6-SNB",
178
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
179
    .exit_latency = 104,
180
    .target_residency = 345,
181
  },
182
  {
183
    .name = "C7-SNB",
184
    .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
185
    .exit_latency = 109,
186
    .target_residency = 345,
187
  },
188
  {}
189
};
190
191
static const struct cpuidle_state byt_cstates[] = {
192
  {
193
    .name = "C1-BYT",
194
    .flags = MWAIT2flg(0x00),
195
    .exit_latency = 1,
196
    .target_residency = 1,
197
  },
198
  {
199
    .name = "C6N-BYT",
200
    .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
201
    .exit_latency = 300,
202
    .target_residency = 275,
203
  },
204
  {
205
    .name = "C6S-BYT",
206
    .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
207
    .exit_latency = 500,
208
    .target_residency = 560,
209
  },
210
  {
211
    .name = "C7-BYT",
212
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
213
    .exit_latency = 1200,
214
    .target_residency = 4000,
215
  },
216
  {
217
    .name = "C7S-BYT",
218
    .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
219
    .exit_latency = 10000,
220
    .target_residency = 20000,
221
  },
222
  {}
223
};
224
225
static const struct cpuidle_state cht_cstates[] = {
226
  {
227
    .name = "C1-CHT",
228
    .flags = MWAIT2flg(0x00),
229
    .exit_latency = 1,
230
    .target_residency = 1,
231
  },
232
  {
233
    .name = "C6N-CHT",
234
    .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
235
    .exit_latency = 80,
236
    .target_residency = 275,
237
  },
238
  {
239
    .name = "C6S-CHT",
240
    .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
241
    .exit_latency = 200,
242
    .target_residency = 560,
243
  },
244
  {
245
    .name = "C7-CHT",
246
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
247
    .exit_latency = 1200,
248
    .target_residency = 4000,
249
  },
250
  {
251
    .name = "C7S-CHT",
252
    .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
253
    .exit_latency = 10000,
254
    .target_residency = 20000,
255
  },
256
  {}
257
};
258
259
static const struct cpuidle_state ivb_cstates[] = {
260
  {
261
    .name = "C1-IVB",
262
    .flags = MWAIT2flg(0x00),
263
    .exit_latency = 1,
264
    .target_residency = 1,
265
  },
266
  {
267
    .name = "C1E-IVB",
268
    .flags = MWAIT2flg(0x01),
269
    .exit_latency = 10,
270
    .target_residency = 20,
271
  },
272
  {
273
    .name = "C3-IVB",
274
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
275
    .exit_latency = 59,
276
    .target_residency = 156,
277
  },
278
  {
279
    .name = "C6-IVB",
280
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
281
    .exit_latency = 80,
282
    .target_residency = 300,
283
  },
284
  {
285
    .name = "C7-IVB",
286
    .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
287
    .exit_latency = 87,
288
    .target_residency = 300,
289
  },
290
  {}
291
};
292
293
static const struct cpuidle_state ivt_cstates[] = {
294
  {
295
    .name = "C1-IVT",
296
    .flags = MWAIT2flg(0x00),
297
    .exit_latency = 1,
298
    .target_residency = 1,
299
  },
300
  {
301
    .name = "C1E-IVT",
302
    .flags = MWAIT2flg(0x01),
303
    .exit_latency = 10,
304
    .target_residency = 80,
305
  },
306
  {
307
    .name = "C3-IVT",
308
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
309
    .exit_latency = 59,
310
    .target_residency = 156,
311
  },
312
  {
313
    .name = "C6-IVT",
314
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
315
    .exit_latency = 82,
316
    .target_residency = 300,
317
  },
318
  {}
319
};
320
321
static const struct cpuidle_state ivt_cstates_4s[] = {
322
  {
323
    .name = "C1-IVT-4S",
324
    .flags = MWAIT2flg(0x00),
325
    .exit_latency = 1,
326
    .target_residency = 1,
327
  },
328
  {
329
    .name = "C1E-IVT-4S",
330
    .flags = MWAIT2flg(0x01),
331
    .exit_latency = 10,
332
    .target_residency = 250,
333
  },
334
  {
335
    .name = "C3-IVT-4S",
336
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
337
    .exit_latency = 59,
338
    .target_residency = 300,
339
  },
340
  {
341
    .name = "C6-IVT-4S",
342
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
343
    .exit_latency = 84,
344
    .target_residency = 400,
345
  },
346
  {}
347
};
348
349
static const struct cpuidle_state ivt_cstates_8s[] = {
350
  {
351
    .name = "C1-IVT-8S",
352
    .flags = MWAIT2flg(0x00),
353
    .exit_latency = 1,
354
    .target_residency = 1,
355
  },
356
  {
357
    .name = "C1E-IVT-8S",
358
    .flags = MWAIT2flg(0x01),
359
    .exit_latency = 10,
360
    .target_residency = 500,
361
  },
362
  {
363
    .name = "C3-IVT-8S",
364
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
365
    .exit_latency = 59,
366
    .target_residency = 600,
367
  },
368
  {
369
    .name = "C6-IVT-8S",
370
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
371
    .exit_latency = 88,
372
    .target_residency = 700,
373
  },
374
  {}
375
};
376
377
static const struct cpuidle_state hsw_cstates[] = {
378
  {
379
    .name = "C1-HSW",
380
    .flags = MWAIT2flg(0x00),
381
    .exit_latency = 2,
382
    .target_residency = 2,
383
  },
384
  {
385
    .name = "C1E-HSW",
386
    .flags = MWAIT2flg(0x01),
387
    .exit_latency = 10,
388
    .target_residency = 20,
389
  },
390
  {
391
    .name = "C3-HSW",
392
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
393
    .exit_latency = 33,
394
    .target_residency = 100,
395
  },
396
  {
397
    .name = "C6-HSW",
398
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
399
    .exit_latency = 133,
400
    .target_residency = 400,
401
  },
402
  {
403
    .name = "C7s-HSW",
404
    .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
405
    .exit_latency = 166,
406
    .target_residency = 500,
407
  },
408
  {
409
    .name = "C8-HSW",
410
    .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
411
    .exit_latency = 300,
412
    .target_residency = 900,
413
  },
414
  {
415
    .name = "C9-HSW",
416
    .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
417
    .exit_latency = 600,
418
    .target_residency = 1800,
419
  },
420
  {
421
    .name = "C10-HSW",
422
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
423
    .exit_latency = 2600,
424
    .target_residency = 7700,
425
  },
426
  {}
427
};
428
429
static const struct cpuidle_state bdw_cstates[] = {
430
  {
431
    .name = "C1-BDW",
432
    .flags = MWAIT2flg(0x00),
433
    .exit_latency = 2,
434
    .target_residency = 2,
435
  },
436
  {
437
    .name = "C1E-BDW",
438
    .flags = MWAIT2flg(0x01),
439
    .exit_latency = 10,
440
    .target_residency = 20,
441
  },
442
  {
443
    .name = "C3-BDW",
444
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
445
    .exit_latency = 40,
446
    .target_residency = 100,
447
  },
448
  {
449
    .name = "C6-BDW",
450
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
451
    .exit_latency = 133,
452
    .target_residency = 400,
453
  },
454
  {
455
    .name = "C7s-BDW",
456
    .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
457
    .exit_latency = 166,
458
    .target_residency = 500,
459
  },
460
  {
461
    .name = "C8-BDW",
462
    .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
463
    .exit_latency = 300,
464
    .target_residency = 900,
465
  },
466
  {
467
    .name = "C9-BDW",
468
    .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
469
    .exit_latency = 600,
470
    .target_residency = 1800,
471
  },
472
  {
473
    .name = "C10-BDW",
474
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
475
    .exit_latency = 2600,
476
    .target_residency = 7700,
477
  },
478
  {}
479
};
480
481
static struct cpuidle_state skl_cstates[] = {
482
  {
483
    .name = "C1-SKL",
484
    .flags = MWAIT2flg(0x00),
485
    .exit_latency = 2,
486
    .target_residency = 2,
487
  },
488
  {
489
    .name = "C1E-SKL",
490
    .flags = MWAIT2flg(0x01),
491
    .exit_latency = 10,
492
    .target_residency = 20,
493
  },
494
  {
495
    .name = "C3-SKL",
496
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
497
    .exit_latency = 70,
498
    .target_residency = 100,
499
  },
500
  {
501
    .name = "C6-SKL",
502
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
503
    .exit_latency = 85,
504
    .target_residency = 200,
505
  },
506
  {
507
    .name = "C7s-SKL",
508
    .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
509
    .exit_latency = 124,
510
    .target_residency = 800,
511
  },
512
  {
513
    .name = "C8-SKL",
514
    .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
515
    .exit_latency = 200,
516
    .target_residency = 800,
517
  },
518
  {
519
    .name = "C9-SKL",
520
    .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
521
    .exit_latency = 480,
522
    .target_residency = 5000,
523
  },
524
  {
525
    .name = "C10-SKL",
526
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
527
    .exit_latency = 890,
528
    .target_residency = 5000,
529
  },
530
  {}
531
};
532
533
static const struct cpuidle_state skx_cstates[] = {
534
  {
535
    .name = "C1-SKX",
536
    .flags = MWAIT2flg(0x00),
537
    .exit_latency = 2,
538
    .target_residency = 2,
539
  },
540
  {
541
    .name = "C1E-SKX",
542
    .flags = MWAIT2flg(0x01),
543
    .exit_latency = 10,
544
    .target_residency = 20,
545
  },
546
  {
547
    .name = "C6-SKX",
548
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
549
    .exit_latency = 133,
550
    .target_residency = 600,
551
  },
552
  {}
553
};
554
555
static const struct cpuidle_state atom_cstates[] = {
556
  {
557
    .name = "C1E-ATM",
558
    .flags = MWAIT2flg(0x00),
559
    .exit_latency = 10,
560
    .target_residency = 20,
561
  },
562
  {
563
    .name = "C2-ATM",
564
    .flags = MWAIT2flg(0x10),
565
    .exit_latency = 20,
566
    .target_residency = 80,
567
  },
568
  {
569
    .name = "C4-ATM",
570
    .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
571
    .exit_latency = 100,
572
    .target_residency = 400,
573
  },
574
  {
575
    .name = "C6-ATM",
576
    .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
577
    .exit_latency = 140,
578
    .target_residency = 560,
579
  },
580
  {}
581
};
582
583
static const struct cpuidle_state tangier_cstates[] = {
584
  {
585
    .name = "C1-TNG",
586
    .flags = MWAIT2flg(0x00),
587
    .exit_latency = 1,
588
    .target_residency = 4,
589
  },
590
  {
591
    .name = "C4-TNG",
592
    .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
593
    .exit_latency = 100,
594
    .target_residency = 400,
595
  },
596
  {
597
    .name = "C6-TNG",
598
    .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
599
    .exit_latency = 140,
600
    .target_residency = 560,
601
  },
602
  {
603
    .name = "C7-TNG",
604
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
605
    .exit_latency = 1200,
606
    .target_residency = 4000,
607
  },
608
  {
609
    .name = "C9-TNG",
610
    .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
611
    .exit_latency = 10000,
612
    .target_residency = 20000,
613
  },
614
  {}
615
};
616
617
static const struct cpuidle_state avn_cstates[] = {
618
  {
619
    .name = "C1-AVN",
620
    .flags = MWAIT2flg(0x00),
621
    .exit_latency = 2,
622
    .target_residency = 2,
623
  },
624
  {
625
    .name = "C6-AVN",
626
    .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
627
    .exit_latency = 15,
628
    .target_residency = 45,
629
  },
630
  {}
631
};
632
633
static const struct cpuidle_state knl_cstates[] = {
634
  {
635
    .name = "C1-KNL",
636
    .flags = MWAIT2flg(0x00),
637
    .exit_latency = 1,
638
    .target_residency = 2,
639
  },
640
  {
641
    .name = "C6-KNL",
642
    .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
643
    .exit_latency = 120,
644
    .target_residency = 500,
645
  },
646
  {}
647
};
648
649
static struct cpuidle_state bxt_cstates[] = {
650
  {
651
    .name = "C1-BXT",
652
    .flags = MWAIT2flg(0x00),
653
    .exit_latency = 2,
654
    .target_residency = 2,
655
  },
656
  {
657
    .name = "C1E-BXT",
658
    .flags = MWAIT2flg(0x01),
659
    .exit_latency = 10,
660
    .target_residency = 20,
661
  },
662
  {
663
    .name = "C6-BXT",
664
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
665
    .exit_latency = 133,
666
    .target_residency = 133,
667
  },
668
  {
669
    .name = "C7s-BXT",
670
    .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
671
    .exit_latency = 155,
672
    .target_residency = 155,
673
  },
674
  {
675
    .name = "C8-BXT",
676
    .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
677
    .exit_latency = 1000,
678
    .target_residency = 1000,
679
  },
680
  {
681
    .name = "C9-BXT",
682
    .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
683
    .exit_latency = 2000,
684
    .target_residency = 2000,
685
  },
686
  {
687
    .name = "C10-BXT",
688
    .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
689
    .exit_latency = 10000,
690
    .target_residency = 10000,
691
  },
692
  {}
693
};
694
695
static const struct cpuidle_state dnv_cstates[] = {
696
  {
697
    .name = "C1-DNV",
698
    .flags = MWAIT2flg(0x00),
699
    .exit_latency = 2,
700
    .target_residency = 2,
701
  },
702
  {
703
    .name = "C1E-DNV",
704
    .flags = MWAIT2flg(0x01),
705
    .exit_latency = 10,
706
    .target_residency = 20,
707
  },
708
  {
709
    .name = "C6-DNV",
710
    .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
711
    .exit_latency = 50,
712
    .target_residency = 500,
713
  },
714
  {}
715
};
716
717
static void mwait_idle(void)
718
2.10M
{
719
2.10M
  unsigned int cpu = smp_processor_id();
720
2.10M
  struct acpi_processor_power *power = processor_powers[cpu];
721
2.10M
  struct acpi_processor_cx *cx = NULL;
722
2.10M
  unsigned int eax, next_state, cstate;
723
2.10M
  u64 before, after;
724
2.10M
  u32 exp = 0, pred = 0, irq_traced[4] = { 0 };
725
2.10M
726
2.10M
  if (max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
727
2.07M
      (next_state = cpuidle_current_governor->select(power)) > 0) {
728
1.82M
    do {
729
1.82M
      cx = &power->states[next_state];
730
1.82M
    } while (cx->type > max_cstate && --next_state);
731
1.82M
    if (!next_state)
732
0
      cx = NULL;
733
1.82M
    menu_get_trace_data(&exp, &pred);
734
1.82M
  }
735
2.10M
  if (!cx) {
736
0
    if (pm_idle_save)
737
0
      pm_idle_save();
738
0
    else
739
0
      safe_halt();
740
0
    return;
741
0
  }
742
2.10M
743
2.10M
  cpufreq_dbs_timer_suspend();
744
2.10M
745
2.10M
  sched_tick_suspend();
746
2.10M
  /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
747
2.10M
  process_pending_softirqs();
748
2.10M
749
2.10M
  /* Interrupts must be disabled for C2 and higher transitions. */
750
2.10M
  local_irq_disable();
751
2.10M
752
2.10M
  if (!cpu_is_haltable(cpu)) {
753
498
    local_irq_enable();
754
498
    sched_tick_resume();
755
498
    cpufreq_dbs_timer_resume();
756
498
    return;
757
498
  }
758
2.10M
759
2.10M
  eax = cx->address;
760
2.10M
  cstate = ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
761
2.10M
762
2.10M
#if 0 /* XXX Can we/do we need to do something similar on Xen? */
763
  /*
764
   * leave_mm() to avoid costly and often unnecessary wakeups
765
   * for flushing the user TLB's associated with the active mm.
766
   */
767
  if (cpuidle_state_table[].flags & CPUIDLE_FLAG_TLB_FLUSHED)
768
    leave_mm(cpu);
769
#endif
770
2.10M
771
2.10M
  if (!(lapic_timer_reliable_states & (1 << cstate)))
772
0
    lapic_timer_off();
773
2.10M
774
2.10M
  before = cpuidle_get_tick();
775
2.10M
  TRACE_4D(TRC_PM_IDLE_ENTRY, cx->type, before, exp, pred);
776
2.10M
777
2.10M
  update_last_cx_stat(power, cx, before);
778
2.10M
779
2.10M
  if (cpu_is_haltable(cpu))
780
1.95M
    mwait_idle_with_hints(eax, MWAIT_ECX_INTERRUPT_BREAK);
781
2.10M
782
2.10M
  after = cpuidle_get_tick();
783
2.10M
784
2.10M
  cstate_restore_tsc();
785
2.10M
  trace_exit_reason(irq_traced);
786
2.10M
  TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after,
787
2.10M
    irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]);
788
2.10M
789
2.10M
  /* Now back in C0. */
790
2.10M
  update_idle_stats(power, cx, before, after);
791
2.10M
  local_irq_enable();
792
2.10M
793
2.10M
  if (!(lapic_timer_reliable_states & (1 << cstate)))
794
0
    lapic_timer_on();
795
2.10M
796
2.10M
  sched_tick_resume();
797
2.10M
  cpufreq_dbs_timer_resume();
798
2.10M
799
2.10M
  if ( cpuidle_current_governor->reflect )
800
1.83M
    cpuidle_current_governor->reflect(power);
801
2.10M
}
802
803
static void auto_demotion_disable(void *dummy)
804
0
{
805
0
  u64 msr_bits;
806
0
807
0
  rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
808
0
  msr_bits &= ~(icpu->auto_demotion_disable_flags);
809
0
  wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
810
0
}
811
812
static void byt_auto_demotion_disable(void *dummy)
813
0
{
814
0
  wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
815
0
  wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
816
0
}
817
818
static void c1e_promotion_disable(void *dummy)
819
12
{
820
12
  u64 msr_bits;
821
12
822
12
  rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
823
12
  msr_bits &= ~0x2;
824
12
  wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
825
12
}
826
827
static const struct idle_cpu idle_cpu_nehalem = {
828
  .state_table = nehalem_cstates,
829
  .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
830
  .disable_promotion_to_c1e = 1,
831
};
832
833
static const struct idle_cpu idle_cpu_atom = {
834
  .state_table = atom_cstates,
835
};
836
837
static const struct idle_cpu idle_cpu_tangier = {
838
  .state_table = tangier_cstates,
839
};
840
841
static const struct idle_cpu idle_cpu_lincroft = {
842
  .state_table = atom_cstates,
843
  .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
844
};
845
846
static const struct idle_cpu idle_cpu_snb = {
847
  .state_table = snb_cstates,
848
  .disable_promotion_to_c1e = 1,
849
};
850
851
static const struct idle_cpu idle_cpu_byt = {
852
  .state_table = byt_cstates,
853
  .disable_promotion_to_c1e = 1,
854
  .byt_auto_demotion_disable_flag = 1,
855
};
856
857
static const struct idle_cpu idle_cpu_cht = {
858
  .state_table = cht_cstates,
859
  .disable_promotion_to_c1e = 1,
860
  .byt_auto_demotion_disable_flag = 1,
861
};
862
863
static const struct idle_cpu idle_cpu_ivb = {
864
  .state_table = ivb_cstates,
865
  .disable_promotion_to_c1e = 1,
866
};
867
868
static const struct idle_cpu idle_cpu_ivt = {
869
  .state_table = ivt_cstates,
870
  .disable_promotion_to_c1e = 1,
871
};
872
873
static const struct idle_cpu idle_cpu_hsw = {
874
  .state_table = hsw_cstates,
875
  .disable_promotion_to_c1e = 1,
876
};
877
878
static const struct idle_cpu idle_cpu_bdw = {
879
  .state_table = bdw_cstates,
880
  .disable_promotion_to_c1e = 1,
881
};
882
883
static const struct idle_cpu idle_cpu_skl = {
884
  .state_table = skl_cstates,
885
  .disable_promotion_to_c1e = 1,
886
};
887
888
static const struct idle_cpu idle_cpu_skx = {
889
  .state_table = skx_cstates,
890
  .disable_promotion_to_c1e = 1,
891
};
892
893
static const struct idle_cpu idle_cpu_avn = {
894
  .state_table = avn_cstates,
895
  .disable_promotion_to_c1e = 1,
896
};
897
898
static const struct idle_cpu idle_cpu_knl = {
899
  .state_table = knl_cstates,
900
};
901
902
static const struct idle_cpu idle_cpu_bxt = {
903
  .state_table = bxt_cstates,
904
  .disable_promotion_to_c1e = 1,
905
};
906
907
static const struct idle_cpu idle_cpu_dnv = {
908
  .state_table = dnv_cstates,
909
  .disable_promotion_to_c1e = 1,
910
};
911
912
#define ICPU(model, cpu) \
913
    { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MONITOR, \
914
        &idle_cpu_##cpu}
915
916
static const struct x86_cpu_id intel_idle_ids[] __initconstrel = {
917
  ICPU(0x1a, nehalem),
918
  ICPU(0x1e, nehalem),
919
  ICPU(0x1f, nehalem),
920
  ICPU(0x25, nehalem),
921
  ICPU(0x2c, nehalem),
922
  ICPU(0x2e, nehalem),
923
  ICPU(0x2f, nehalem),
924
  ICPU(0x1c, atom),
925
  ICPU(0x26, lincroft),
926
  ICPU(0x2a, snb),
927
  ICPU(0x2d, snb),
928
  ICPU(0x36, atom),
929
  ICPU(0x37, byt),
930
  ICPU(0x4a, tangier),
931
  ICPU(0x4c, cht),
932
  ICPU(0x3a, ivb),
933
  ICPU(0x3e, ivt),
934
  ICPU(0x3c, hsw),
935
  ICPU(0x3f, hsw),
936
  ICPU(0x45, hsw),
937
  ICPU(0x46, hsw),
938
  ICPU(0x4d, avn),
939
  ICPU(0x3d, bdw),
940
  ICPU(0x47, bdw),
941
  ICPU(0x4f, bdw),
942
  ICPU(0x56, bdw),
943
  ICPU(0x4e, skl),
944
  ICPU(0x5e, skl),
945
  ICPU(0x8e, skl),
946
  ICPU(0x9e, skl),
947
  ICPU(0x55, skx),
948
  ICPU(0x57, knl),
949
  ICPU(0x85, knl),
950
  ICPU(0x5c, bxt),
951
  ICPU(0x5f, dnv),
952
  {}
953
};
954
955
/*
956
 * ivt_idle_state_table_update(void)
957
 *
958
 * Tune IVT multi-socket targets
959
 * Assumption: num_sockets == (max_package_num + 1)
960
 */
961
static void __init ivt_idle_state_table_update(void)
962
1
{
963
1
  /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
964
1
  unsigned int cpu, max_apicid = boot_cpu_physical_apicid;
965
1
966
1
  for_each_present_cpu(cpu)
967
12
    if (max_apicid < x86_cpu_to_apicid[cpu])
968
11
      max_apicid = x86_cpu_to_apicid[cpu];
969
1
  switch (apicid_to_socket(max_apicid)) {
970
1
  case 0: case 1:
971
1
    /* 1 and 2 socket systems use default ivt_cstates */
972
1
    break;
973
0
  case 2: case 3:
974
0
    cpuidle_state_table = ivt_cstates_4s;
975
0
    break;
976
0
  default:
977
0
    cpuidle_state_table = ivt_cstates_8s;
978
0
    break;
979
1
  }
980
1
}
981
982
/*
983
 * Translate IRTL (Interrupt Response Time Limit) MSR to usec
984
 */
985
986
static const unsigned int __initconst irtl_ns_units[] = {
987
  1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
988
989
static unsigned long long __init irtl_2_usec(unsigned long long irtl)
990
0
{
991
0
  unsigned long long ns;
992
0
993
0
  if (!irtl)
994
0
    return 0;
995
0
996
0
  ns = irtl_ns_units[(irtl >> 10) & 0x7];
997
0
998
0
  return (irtl & 0x3FF) * ns / 1000;
999
0
}
1000
/*
1001
 * bxt_idle_state_table_update(void)
1002
 *
1003
 * On BXT, we trust the IRTL to show the definitive maximum latency
1004
 * We use the same value for target_residency.
1005
 */
1006
static void __init bxt_idle_state_table_update(void)
1007
0
{
1008
0
  unsigned long long msr;
1009
0
  unsigned int usec;
1010
0
1011
0
  rdmsrl(MSR_PKGC6_IRTL, msr);
1012
0
  usec = irtl_2_usec(msr);
1013
0
  if (usec) {
1014
0
    bxt_cstates[2].exit_latency = usec;
1015
0
    bxt_cstates[2].target_residency = usec;
1016
0
  }
1017
0
1018
0
  rdmsrl(MSR_PKGC7_IRTL, msr);
1019
0
  usec = irtl_2_usec(msr);
1020
0
  if (usec) {
1021
0
    bxt_cstates[3].exit_latency = usec;
1022
0
    bxt_cstates[3].target_residency = usec;
1023
0
  }
1024
0
1025
0
  rdmsrl(MSR_PKGC8_IRTL, msr);
1026
0
  usec = irtl_2_usec(msr);
1027
0
  if (usec) {
1028
0
    bxt_cstates[4].exit_latency = usec;
1029
0
    bxt_cstates[4].target_residency = usec;
1030
0
  }
1031
0
1032
0
  rdmsrl(MSR_PKGC9_IRTL, msr);
1033
0
  usec = irtl_2_usec(msr);
1034
0
  if (usec) {
1035
0
    bxt_cstates[5].exit_latency = usec;
1036
0
    bxt_cstates[5].target_residency = usec;
1037
0
  }
1038
0
1039
0
  rdmsrl(MSR_PKGC10_IRTL, msr);
1040
0
  usec = irtl_2_usec(msr);
1041
0
  if (usec) {
1042
0
    bxt_cstates[6].exit_latency = usec;
1043
0
    bxt_cstates[6].target_residency = usec;
1044
0
  }
1045
0
}
1046
1047
/*
1048
 * sklh_idle_state_table_update(void)
1049
 *
1050
 * On SKL-H (model 0x5e) disable C8 and C9 if:
1051
 * C10 is enabled and SGX disabled
1052
 */
1053
static void __init sklh_idle_state_table_update(void)
1054
0
{
1055
0
  u64 msr;
1056
0
1057
0
  /* if PC10 disabled via cmdline max_cstate=7 or shallower */
1058
0
  if (max_cstate <= 7)
1059
0
    return;
1060
0
1061
0
  /* if PC10 not present in CPUID.MWAIT.EDX */
1062
0
  if ((mwait_substates & (MWAIT_CSTATE_MASK << 28)) == 0)
1063
0
    return;
1064
0
1065
0
  rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
1066
0
1067
0
  /* PC10 is not enabled in PKG C-state limit */
1068
0
  if ((msr & 0xF) != 8)
1069
0
    return;
1070
0
1071
0
  /* if SGX is present */
1072
0
  if (boot_cpu_has(X86_FEATURE_SGX)) {
1073
0
    rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1074
0
1075
0
    /* if SGX is enabled */
1076
0
    if (msr & IA32_FEATURE_CONTROL_SGX_ENABLE)
1077
0
      return;
1078
0
  }
1079
0
1080
0
  skl_cstates[5].flags |= CPUIDLE_FLAG_DISABLED; /* C8-SKL */
1081
0
  skl_cstates[6].flags |= CPUIDLE_FLAG_DISABLED; /* C9-SKL */
1082
0
}
1083
1084
/*
1085
 * mwait_idle_state_table_update()
1086
 *
1087
 * Update the default state_table for this CPU-id
1088
 */
1089
static void __init mwait_idle_state_table_update(void)
1090
1
{
1091
1
  switch (boot_cpu_data.x86_model) {
1092
1
  case 0x3e: /* IVT */
1093
1
    ivt_idle_state_table_update();
1094
1
    break;
1095
0
  case 0x5c: /* BXT */
1096
0
    bxt_idle_state_table_update();
1097
0
    break;
1098
0
  case 0x5e: /* SKL-H */
1099
0
    sklh_idle_state_table_update();
1100
0
    break;
1101
1
  }
1102
1
}
1103
1104
static int __init mwait_idle_probe(void)
1105
1
{
1106
1
  unsigned int eax, ebx, ecx;
1107
1
  const struct x86_cpu_id *id = x86_match_cpu(intel_idle_ids);
1108
1
1109
1
  if (!id) {
1110
0
    pr_debug(PREFIX "does not run on family %d model %d\n",
1111
0
       boot_cpu_data.x86, boot_cpu_data.x86_model);
1112
0
    return -ENODEV;
1113
0
  }
1114
1
1115
1
  if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1116
0
    return -ENODEV;
1117
1
1118
1
  cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1119
1
1120
1
  if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1121
1
      !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1122
1
      !mwait_substates)
1123
0
    return -ENODEV;
1124
1
1125
1
  if (!max_cstate || !opt_mwait_idle) {
1126
0
    pr_debug(PREFIX "disabled\n");
1127
0
    return -EPERM;
1128
0
  }
1129
1
1130
1
  pr_debug(PREFIX "MWAIT substates: %#x\n", mwait_substates);
1131
1
1132
1
  icpu = id->driver_data;
1133
1
  cpuidle_state_table = icpu->state_table;
1134
1
1135
1
  if (boot_cpu_has(X86_FEATURE_ARAT))
1136
1
    lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1137
1
1138
1
  pr_debug(PREFIX "v" MWAIT_IDLE_VERSION " model %#x\n",
1139
1
     boot_cpu_data.x86_model);
1140
1
1141
1
  pr_debug(PREFIX "lapic_timer_reliable_states %#x\n",
1142
1
     lapic_timer_reliable_states);
1143
1
1144
1
  mwait_idle_state_table_update();
1145
1
1146
1
  return 0;
1147
1
}
1148
1149
static int mwait_idle_cpu_init(struct notifier_block *nfb,
1150
             unsigned long action, void *hcpu)
1151
35
{
1152
35
  unsigned int cpu = (unsigned long)hcpu, cstate;
1153
35
  struct acpi_processor_power *dev = processor_powers[cpu];
1154
35
1155
35
  switch (action) {
1156
11
  default:
1157
11
    return NOTIFY_DONE;
1158
11
1159
12
  case CPU_UP_PREPARE:
1160
12
    cpuidle_init_cpu(cpu);
1161
12
    return NOTIFY_DONE;
1162
11
1163
12
  case CPU_ONLINE:
1164
12
    if (!dev)
1165
0
      return NOTIFY_DONE;
1166
12
    break;
1167
35
  }
1168
35
1169
12
  dev->count = 1;
1170
12
1171
60
  for (cstate = 0; cpuidle_state_table[cstate].target_residency; ++cstate) {
1172
48
    unsigned int num_substates, hint, state;
1173
48
    struct acpi_processor_cx *cx;
1174
48
1175
48
    hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1176
48
    state = MWAIT_HINT2CSTATE(hint) + 1;
1177
48
1178
48
    if (state > max_cstate) {
1179
0
      printk(PREFIX "max C-state %u reached\n", max_cstate);
1180
0
      break;
1181
0
    }
1182
48
1183
48
    /* Number of sub-states for this state in CPUID.MWAIT. */
1184
48
    num_substates = (mwait_substates >> (state * 4))
1185
48
                    & MWAIT_SUBSTATE_MASK;
1186
48
    /* If NO sub-states for this state in CPUID, skip it. */
1187
48
    if (num_substates == 0)
1188
0
      continue;
1189
48
1190
48
    /* if state marked as disabled, skip it */
1191
48
    if (cpuidle_state_table[cstate].flags &
1192
48
        CPUIDLE_FLAG_DISABLED) {
1193
0
      printk(XENLOG_DEBUG PREFIX "state %s is disabled",
1194
0
             cpuidle_state_table[cstate].name);
1195
0
      continue;
1196
0
    }
1197
48
1198
48
    if (dev->count >= ACPI_PROCESSOR_MAX_POWER) {
1199
0
      printk(PREFIX "max C-state count of %u reached\n",
1200
0
             ACPI_PROCESSOR_MAX_POWER);
1201
0
      break;
1202
0
    }
1203
48
1204
48
    if (state > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
1205
0
        !pm_idle_save)
1206
0
      setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
1207
48
1208
48
    cx = dev->states + dev->count;
1209
48
    cx->type = state;
1210
48
    cx->address = hint;
1211
48
    cx->entry_method = ACPI_CSTATE_EM_FFH;
1212
48
    cx->latency = cpuidle_state_table[cstate].exit_latency;
1213
48
    cx->target_residency =
1214
48
      cpuidle_state_table[cstate].target_residency;
1215
48
1216
48
    dev->count++;
1217
48
  }
1218
12
1219
12
  if (icpu->auto_demotion_disable_flags)
1220
0
    on_selected_cpus(cpumask_of(cpu), auto_demotion_disable, NULL, 1);
1221
12
1222
12
  if (icpu->byt_auto_demotion_disable_flag)
1223
0
    on_selected_cpus(cpumask_of(cpu), byt_auto_demotion_disable, NULL, 1);
1224
12
1225
12
  if (icpu->disable_promotion_to_c1e)
1226
12
    on_selected_cpus(cpumask_of(cpu), c1e_promotion_disable, NULL, 1);
1227
12
1228
12
  return NOTIFY_DONE;
1229
35
}
1230
1231
int __init mwait_idle_init(struct notifier_block *nfb)
1232
1
{
1233
1
  int err;
1234
1
1235
1
  if (pm_idle_save)
1236
0
    return -ENODEV;
1237
1
1238
1
  err = mwait_idle_probe();
1239
1
  if (!err && !boot_cpu_has(X86_FEATURE_ARAT)) {
1240
0
    hpet_broadcast_init();
1241
0
    if (xen_cpuidle < 0 && !hpet_broadcast_is_available())
1242
0
      err = -ENODEV;
1243
0
    else if(!lapic_timer_init())
1244
0
      err = -EINVAL;
1245
0
    if (err)
1246
0
      pr_debug(PREFIX "not used (%d)\n", err);
1247
0
  }
1248
1
  if (!err) {
1249
1
    nfb->notifier_call = mwait_idle_cpu_init;
1250
1
    mwait_idle_cpu_init(nfb, CPU_UP_PREPARE, NULL);
1251
1
1252
1
    pm_idle_save = pm_idle;
1253
1
    pm_idle = mwait_idle;
1254
1
    dead_idle = acpi_dead_idle;
1255
1
  }
1256
1
1257
1
  return err;
1258
1
}