/root/src/xen/xen/arch/x86/acpi/cpu_idle.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * cpu_idle - xen idle state module derived from Linux |
3 | | * drivers/acpi/processor_idle.c & |
4 | | * arch/x86/kernel/acpi/cstate.c |
5 | | * |
6 | | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> |
7 | | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> |
8 | | * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> |
9 | | * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
10 | | * - Added processor hotplug support |
11 | | * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> |
12 | | * - Added support for C3 on SMP |
13 | | * Copyright (C) 2007, 2008 Intel Corporation |
14 | | * |
15 | | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
16 | | * |
17 | | * This program is free software; you can redistribute it and/or modify |
18 | | * it under the terms of the GNU General Public License as published by |
19 | | * the Free Software Foundation; either version 2 of the License, or (at |
20 | | * your option) any later version. |
21 | | * |
22 | | * This program is distributed in the hope that it will be useful, but |
23 | | * WITHOUT ANY WARRANTY; without even the implied warranty of |
24 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
25 | | * General Public License for more details. |
26 | | * |
27 | | * You should have received a copy of the GNU General Public License along |
28 | | * with this program; If not, see <http://www.gnu.org/licenses/>. |
29 | | * |
30 | | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
31 | | */ |
32 | | |
33 | | #include <xen/errno.h> |
34 | | #include <xen/lib.h> |
35 | | #include <xen/types.h> |
36 | | #include <xen/acpi.h> |
37 | | #include <xen/smp.h> |
38 | | #include <xen/guest_access.h> |
39 | | #include <xen/keyhandler.h> |
40 | | #include <xen/trace.h> |
41 | | #include <xen/sched-if.h> |
42 | | #include <xen/irq.h> |
43 | | #include <asm/cache.h> |
44 | | #include <asm/io.h> |
45 | | #include <asm/iocap.h> |
46 | | #include <asm/hpet.h> |
47 | | #include <asm/processor.h> |
48 | | #include <xen/pmstat.h> |
49 | | #include <xen/softirq.h> |
50 | | #include <public/platform.h> |
51 | | #include <public/sysctl.h> |
52 | | #include <acpi/cpufreq/cpufreq.h> |
53 | | #include <asm/apic.h> |
54 | | #include <asm/cpuidle.h> |
55 | | #include <asm/mwait.h> |
56 | | #include <xen/notifier.h> |
57 | | #include <xen/cpu.h> |
58 | | |
59 | | /*#define DEBUG_PM_CX*/ |
60 | | |
61 | | #define GET_HW_RES_IN_NS(msr, val) \ |
62 | 0 | do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 ) |
63 | 0 | #define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val) |
64 | 0 | #define GET_PC2_RES(val) GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */ |
65 | 0 | #define GET_PC3_RES(val) GET_HW_RES_IN_NS(0x3F8, val) |
66 | 0 | #define GET_PC6_RES(val) GET_HW_RES_IN_NS(0x3F9, val) |
67 | 0 | #define GET_PC7_RES(val) GET_HW_RES_IN_NS(0x3FA, val) |
68 | 0 | #define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */ |
69 | 0 | #define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */ |
70 | 0 | #define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */ |
71 | 0 | #define GET_CC1_RES(val) GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */ |
72 | 0 | #define GET_CC3_RES(val) GET_HW_RES_IN_NS(0x3FC, val) |
73 | 0 | #define GET_CC6_RES(val) GET_HW_RES_IN_NS(0x3FD, val) |
74 | 0 | #define GET_CC7_RES(val) GET_HW_RES_IN_NS(0x3FE, val) /* SNB onwards */ |
75 | 0 | #define PHI_CC6_RES(val) GET_HW_RES_IN_NS(0x3FF, val) /* Xeon Phi only */ |
76 | | |
77 | 0 | static void lapic_timer_nop(void) { } |
78 | | void (*__read_mostly lapic_timer_off)(void); |
79 | | void (*__read_mostly lapic_timer_on)(void); |
80 | | |
81 | | bool lapic_timer_init(void) |
82 | 0 | { |
83 | 0 | if ( boot_cpu_has(X86_FEATURE_ARAT) ) |
84 | 0 | { |
85 | 0 | lapic_timer_off = lapic_timer_nop; |
86 | 0 | lapic_timer_on = lapic_timer_nop; |
87 | 0 | } |
88 | 0 | else if ( hpet_broadcast_is_available() ) |
89 | 0 | { |
90 | 0 | lapic_timer_off = hpet_broadcast_enter; |
91 | 0 | lapic_timer_on = hpet_broadcast_exit; |
92 | 0 | } |
93 | 0 | else if ( pit_broadcast_is_available() ) |
94 | 0 | { |
95 | 0 | lapic_timer_off = pit_broadcast_enter; |
96 | 0 | lapic_timer_on = pit_broadcast_exit; |
97 | 0 | } |
98 | 0 | else |
99 | 0 | return false; |
100 | 0 |
|
101 | 0 | return true; |
102 | 0 | } |
103 | | |
104 | | static uint64_t (*__read_mostly tick_to_ns)(uint64_t) = acpi_pm_tick_to_ns; |
105 | | |
106 | | void (*__read_mostly pm_idle_save)(void); |
107 | | unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER - 1; |
108 | | integer_param("max_cstate", max_cstate); |
109 | | static bool __read_mostly local_apic_timer_c2_ok; |
110 | | boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok); |
111 | | |
112 | | struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS]; |
113 | | |
114 | | struct hw_residencies |
115 | | { |
116 | | uint64_t mc0; |
117 | | uint64_t mc6; |
118 | | uint64_t pc2; |
119 | | uint64_t pc3; |
120 | | uint64_t pc4; |
121 | | uint64_t pc6; |
122 | | uint64_t pc7; |
123 | | uint64_t pc8; |
124 | | uint64_t pc9; |
125 | | uint64_t pc10; |
126 | | uint64_t cc1; |
127 | | uint64_t cc3; |
128 | | uint64_t cc6; |
129 | | uint64_t cc7; |
130 | | }; |
131 | | |
132 | | static void do_get_hw_residencies(void *arg) |
133 | 0 | { |
134 | 0 | struct cpuinfo_x86 *c = ¤t_cpu_data; |
135 | 0 | struct hw_residencies *hw_res = arg; |
136 | 0 |
|
137 | 0 | if ( c->x86_vendor != X86_VENDOR_INTEL || c->x86 != 6 ) |
138 | 0 | return; |
139 | 0 |
|
140 | 0 | switch ( c->x86_model ) |
141 | 0 | { |
142 | 0 | /* 4th generation Intel Core (Haswell) */ |
143 | 0 | case 0x45: |
144 | 0 | GET_PC8_RES(hw_res->pc8); |
145 | 0 | GET_PC9_RES(hw_res->pc9); |
146 | 0 | GET_PC10_RES(hw_res->pc10); |
147 | 0 | /* fall through */ |
148 | 0 | /* Sandy bridge */ |
149 | 0 | case 0x2A: |
150 | 0 | case 0x2D: |
151 | 0 | /* Ivy bridge */ |
152 | 0 | case 0x3A: |
153 | 0 | case 0x3E: |
154 | 0 | /* Haswell */ |
155 | 0 | case 0x3C: |
156 | 0 | case 0x3F: |
157 | 0 | case 0x46: |
158 | 0 | /* Broadwell */ |
159 | 0 | case 0x3D: |
160 | 0 | case 0x47: |
161 | 0 | case 0x4F: |
162 | 0 | case 0x56: |
163 | 0 | /* Skylake */ |
164 | 0 | case 0x4E: |
165 | 0 | case 0x55: |
166 | 0 | case 0x5E: |
167 | 0 | /* Cannon Lake */ |
168 | 0 | case 0x66: |
169 | 0 | /* Kaby Lake */ |
170 | 0 | case 0x8E: |
171 | 0 | case 0x9E: |
172 | 0 | GET_PC2_RES(hw_res->pc2); |
173 | 0 | GET_CC7_RES(hw_res->cc7); |
174 | 0 | /* fall through */ |
175 | 0 | /* Nehalem */ |
176 | 0 | case 0x1A: |
177 | 0 | case 0x1E: |
178 | 0 | case 0x1F: |
179 | 0 | case 0x2E: |
180 | 0 | /* Westmere */ |
181 | 0 | case 0x25: |
182 | 0 | case 0x2C: |
183 | 0 | case 0x2F: |
184 | 0 | GET_PC3_RES(hw_res->pc3); |
185 | 0 | GET_PC6_RES(hw_res->pc6); |
186 | 0 | GET_PC7_RES(hw_res->pc7); |
187 | 0 | GET_CC3_RES(hw_res->cc3); |
188 | 0 | GET_CC6_RES(hw_res->cc6); |
189 | 0 | break; |
190 | 0 | /* Xeon Phi Knights Landing */ |
191 | 0 | case 0x57: |
192 | 0 | /* Xeon Phi Knights Mill */ |
193 | 0 | case 0x85: |
194 | 0 | GET_CC3_RES(hw_res->mc0); /* abusing GET_CC3_RES */ |
195 | 0 | GET_CC6_RES(hw_res->mc6); /* abusing GET_CC6_RES */ |
196 | 0 | GET_PC2_RES(hw_res->pc2); |
197 | 0 | GET_PC3_RES(hw_res->pc3); |
198 | 0 | GET_PC6_RES(hw_res->pc6); |
199 | 0 | GET_PC7_RES(hw_res->pc7); |
200 | 0 | PHI_CC6_RES(hw_res->cc6); |
201 | 0 | break; |
202 | 0 | /* various Atoms */ |
203 | 0 | case 0x27: |
204 | 0 | GET_PC3_RES(hw_res->pc2); /* abusing GET_PC3_RES */ |
205 | 0 | GET_PC6_RES(hw_res->pc4); /* abusing GET_PC6_RES */ |
206 | 0 | GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */ |
207 | 0 | break; |
208 | 0 | /* Silvermont */ |
209 | 0 | case 0x37: |
210 | 0 | case 0x4A: |
211 | 0 | case 0x4D: |
212 | 0 | case 0x5A: |
213 | 0 | case 0x5D: |
214 | 0 | /* Airmont */ |
215 | 0 | case 0x4C: |
216 | 0 | GET_MC6_RES(hw_res->mc6); |
217 | 0 | GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */ |
218 | 0 | GET_CC1_RES(hw_res->cc1); |
219 | 0 | GET_CC6_RES(hw_res->cc6); |
220 | 0 | break; |
221 | 0 | /* Goldmont */ |
222 | 0 | case 0x5C: |
223 | 0 | case 0x5F: |
224 | 0 | /* Goldmont Plus */ |
225 | 0 | case 0x7A: |
226 | 0 | GET_PC2_RES(hw_res->pc2); |
227 | 0 | GET_PC3_RES(hw_res->pc3); |
228 | 0 | GET_PC6_RES(hw_res->pc6); |
229 | 0 | GET_PC10_RES(hw_res->pc10); |
230 | 0 | GET_CC1_RES(hw_res->cc1); |
231 | 0 | GET_CC3_RES(hw_res->cc3); |
232 | 0 | GET_CC6_RES(hw_res->cc6); |
233 | 0 | break; |
234 | 0 | } |
235 | 0 | } |
236 | | |
237 | | static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res) |
238 | 0 | { |
239 | 0 | memset(hw_res, 0, sizeof(*hw_res)); |
240 | 0 |
|
241 | 0 | if ( smp_processor_id() == cpu ) |
242 | 0 | do_get_hw_residencies(hw_res); |
243 | 0 | else |
244 | 0 | on_selected_cpus(cpumask_of(cpu), do_get_hw_residencies, hw_res, 1); |
245 | 0 | } |
246 | | |
247 | | static void print_hw_residencies(uint32_t cpu) |
248 | 0 | { |
249 | 0 | struct hw_residencies hw_res; |
250 | 0 |
|
251 | 0 | get_hw_residencies(cpu, &hw_res); |
252 | 0 |
|
253 | 0 | if ( hw_res.mc0 | hw_res.mc6 ) |
254 | 0 | printk("MC0[%"PRIu64"] MC6[%"PRIu64"]\n", |
255 | 0 | hw_res.mc0, hw_res.mc6); |
256 | 0 | printk("PC2[%"PRIu64"] PC%d[%"PRIu64"] PC6[%"PRIu64"] PC7[%"PRIu64"]\n", |
257 | 0 | hw_res.pc2, |
258 | 0 | hw_res.pc4 ? 4 : 3, hw_res.pc4 ?: hw_res.pc3, |
259 | 0 | hw_res.pc6, hw_res.pc7); |
260 | 0 | if ( hw_res.pc8 | hw_res.pc9 | hw_res.pc10 ) |
261 | 0 | printk("PC8[%"PRIu64"] PC9[%"PRIu64"] PC10[%"PRIu64"]\n", |
262 | 0 | hw_res.pc8, hw_res.pc9, hw_res.pc10); |
263 | 0 | printk("CC%d[%"PRIu64"] CC6[%"PRIu64"] CC7[%"PRIu64"]\n", |
264 | 0 | hw_res.cc1 ? 1 : 3, hw_res.cc1 ?: hw_res.cc3, |
265 | 0 | hw_res.cc6, hw_res.cc7); |
266 | 0 | } |
267 | | |
268 | | static char* acpi_cstate_method_name[] = |
269 | | { |
270 | | "NONE", |
271 | | "SYSIO", |
272 | | "FFH", |
273 | | "HALT" |
274 | | }; |
275 | | |
276 | 3.43M | static uint64_t get_stime_tick(void) { return (uint64_t)NOW(); } |
277 | 1.56M | static uint64_t stime_ticks_elapsed(uint64_t t1, uint64_t t2) { return t2 - t1; } |
278 | 1.75M | static uint64_t stime_tick_to_ns(uint64_t ticks) { return ticks; } |
279 | | |
280 | 0 | static uint64_t get_acpi_pm_tick(void) { return (uint64_t)inl(pmtmr_ioport); } |
281 | | static uint64_t acpi_pm_ticks_elapsed(uint64_t t1, uint64_t t2) |
282 | 0 | { |
283 | 0 | if ( t2 >= t1 ) |
284 | 0 | return (t2 - t1); |
285 | 0 | else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) ) |
286 | 0 | return (((0x00FFFFFF - t1) + t2 + 1) & 0x00FFFFFF); |
287 | 0 | else |
288 | 0 | return ((0xFFFFFFFF - t1) + t2 +1); |
289 | 0 | } |
290 | | |
291 | | uint64_t (*__read_mostly cpuidle_get_tick)(void) = get_acpi_pm_tick; |
292 | | static uint64_t (*__read_mostly ticks_elapsed)(uint64_t, uint64_t) |
293 | | = acpi_pm_ticks_elapsed; |
294 | | |
295 | | static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power) |
296 | 0 | { |
297 | 0 | uint64_t idle_res = 0, idle_usage = 0; |
298 | 0 | uint64_t last_state_update_tick, current_tick, current_stime; |
299 | 0 | uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 }; |
300 | 0 | uint64_t res_tick[ACPI_PROCESSOR_MAX_POWER] = { 0 }; |
301 | 0 | unsigned int i; |
302 | 0 | signed int last_state_idx; |
303 | 0 |
|
304 | 0 | printk("==cpu%d==\n", cpu); |
305 | 0 | last_state_idx = power->last_state ? power->last_state->idx : -1; |
306 | 0 | printk("active state:\t\tC%d\n", last_state_idx); |
307 | 0 | printk("max_cstate:\t\tC%d\n", max_cstate); |
308 | 0 | printk("states:\n"); |
309 | 0 |
|
310 | 0 | spin_lock_irq(&power->stat_lock); |
311 | 0 | current_tick = cpuidle_get_tick(); |
312 | 0 | current_stime = NOW(); |
313 | 0 | for ( i = 1; i < power->count; i++ ) |
314 | 0 | { |
315 | 0 | res_tick[i] = power->states[i].time; |
316 | 0 | usage[i] = power->states[i].usage; |
317 | 0 | } |
318 | 0 | last_state_update_tick = power->last_state_update_tick; |
319 | 0 | spin_unlock_irq(&power->stat_lock); |
320 | 0 |
|
321 | 0 | if ( last_state_idx >= 0 ) |
322 | 0 | { |
323 | 0 | res_tick[last_state_idx] += ticks_elapsed(last_state_update_tick, |
324 | 0 | current_tick); |
325 | 0 | usage[last_state_idx]++; |
326 | 0 | } |
327 | 0 |
|
328 | 0 | for ( i = 1; i < power->count; i++ ) |
329 | 0 | { |
330 | 0 | idle_usage += usage[i]; |
331 | 0 | idle_res += tick_to_ns(res_tick[i]); |
332 | 0 |
|
333 | 0 | printk((last_state_idx == i) ? " *" : " "); |
334 | 0 | printk("C%d:\t", i); |
335 | 0 | printk("type[C%d] ", power->states[i].type); |
336 | 0 | printk("latency[%03d] ", power->states[i].latency); |
337 | 0 | printk("usage[%08"PRIu64"] ", usage[i]); |
338 | 0 | printk("method[%5s] ", acpi_cstate_method_name[power->states[i].entry_method]); |
339 | 0 | printk("duration[%"PRIu64"]\n", tick_to_ns(res_tick[i])); |
340 | 0 | } |
341 | 0 | printk((last_state_idx == 0) ? " *" : " "); |
342 | 0 | printk("C0:\tusage[%08"PRIu64"] duration[%"PRIu64"]\n", |
343 | 0 | usage[0] + idle_usage, current_stime - idle_res); |
344 | 0 |
|
345 | 0 | print_hw_residencies(cpu); |
346 | 0 | } |
347 | | |
348 | | static void dump_cx(unsigned char key) |
349 | 0 | { |
350 | 0 | unsigned int cpu; |
351 | 0 |
|
352 | 0 | printk("'%c' pressed -> printing ACPI Cx structures\n", key); |
353 | 0 | for_each_online_cpu ( cpu ) |
354 | 0 | if (processor_powers[cpu]) |
355 | 0 | print_acpi_power(cpu, processor_powers[cpu]); |
356 | 0 | } |
357 | | |
358 | | static int __init cpu_idle_key_init(void) |
359 | 1 | { |
360 | 1 | register_keyhandler('c', dump_cx, "dump ACPI Cx structures", 1); |
361 | 1 | return 0; |
362 | 1 | } |
363 | | __initcall(cpu_idle_key_init); |
364 | | |
365 | | /* |
366 | | * The bit is set iff cpu use monitor/mwait to enter C state |
367 | | * with this flag set, CPU can be waken up from C state |
368 | | * by writing to specific memory address, instead of sending an IPI. |
369 | | */ |
370 | | static cpumask_t cpuidle_mwait_flags; |
371 | | |
372 | | void cpuidle_wakeup_mwait(cpumask_t *mask) |
373 | 0 | { |
374 | 0 | cpumask_t target; |
375 | 0 | unsigned int cpu; |
376 | 0 |
|
377 | 0 | cpumask_and(&target, mask, &cpuidle_mwait_flags); |
378 | 0 |
|
379 | 0 | /* CPU is MWAITing on the cpuidle_mwait_wakeup flag. */ |
380 | 0 | for_each_cpu(cpu, &target) |
381 | 0 | mwait_wakeup(cpu) = 0; |
382 | 0 |
|
383 | 0 | cpumask_andnot(mask, mask, &target); |
384 | 0 | } |
385 | | |
386 | | bool arch_skip_send_event_check(unsigned int cpu) |
387 | 98.6k | { |
388 | 98.6k | /* |
389 | 98.6k | * This relies on softirq_pending() and mwait_wakeup() to access data |
390 | 98.6k | * on the same cache line. |
391 | 98.6k | */ |
392 | 98.6k | smp_mb(); |
393 | 98.6k | return !!cpumask_test_cpu(cpu, &cpuidle_mwait_flags); |
394 | 98.6k | } |
395 | | |
396 | | void mwait_idle_with_hints(unsigned int eax, unsigned int ecx) |
397 | 1.94M | { |
398 | 1.94M | unsigned int cpu = smp_processor_id(); |
399 | 1.94M | s_time_t expires = per_cpu(timer_deadline, cpu); |
400 | 1.94M | |
401 | 1.94M | if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) ) |
402 | 0 | { |
403 | 0 | mb(); |
404 | 0 | clflush((void *)&mwait_wakeup(cpu)); |
405 | 0 | mb(); |
406 | 0 | } |
407 | 1.94M | |
408 | 1.94M | __monitor((void *)&mwait_wakeup(cpu), 0, 0); |
409 | 1.94M | smp_mb(); |
410 | 1.94M | |
411 | 1.94M | /* |
412 | 1.94M | * Timer deadline passing is the event on which we will be woken via |
413 | 1.94M | * cpuidle_mwait_wakeup. So check it now that the location is armed. |
414 | 1.94M | */ |
415 | 1.94M | if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) ) |
416 | 1.87M | { |
417 | 1.87M | cpumask_set_cpu(cpu, &cpuidle_mwait_flags); |
418 | 1.87M | __mwait(eax, ecx); |
419 | 1.87M | cpumask_clear_cpu(cpu, &cpuidle_mwait_flags); |
420 | 1.87M | } |
421 | 1.94M | |
422 | 1.94M | if ( expires <= NOW() && expires > 0 ) |
423 | 8.46k | raise_softirq(TIMER_SOFTIRQ); |
424 | 1.94M | } |
425 | | |
426 | | static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) |
427 | 0 | { |
428 | 0 | mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK); |
429 | 0 | } |
430 | | |
431 | | static void acpi_idle_do_entry(struct acpi_processor_cx *cx) |
432 | 0 | { |
433 | 0 | switch ( cx->entry_method ) |
434 | 0 | { |
435 | 0 | case ACPI_CSTATE_EM_FFH: |
436 | 0 | /* Call into architectural FFH based C-state */ |
437 | 0 | acpi_processor_ffh_cstate_enter(cx); |
438 | 0 | return; |
439 | 0 | case ACPI_CSTATE_EM_SYSIO: |
440 | 0 | /* IO port based C-state */ |
441 | 0 | inb(cx->address); |
442 | 0 | /* Dummy wait op - must do something useless after P_LVL2 read |
443 | 0 | because chipsets cannot guarantee that STPCLK# signal |
444 | 0 | gets asserted in time to freeze execution properly. */ |
445 | 0 | inl(pmtmr_ioport); |
446 | 0 | return; |
447 | 0 | case ACPI_CSTATE_EM_HALT: |
448 | 0 | safe_halt(); |
449 | 0 | local_irq_disable(); |
450 | 0 | return; |
451 | 0 | } |
452 | 0 | } |
453 | | |
454 | | static int acpi_idle_bm_check(void) |
455 | 0 | { |
456 | 0 | u32 bm_status = 0; |
457 | 0 |
|
458 | 0 | acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); |
459 | 0 | if ( bm_status ) |
460 | 0 | acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); |
461 | 0 | /* |
462 | 0 | * TBD: PIIX4 Erratum #18: Note that BM_STS doesn't always reflect |
463 | 0 | * the true state of bus mastering activity; forcing us to |
464 | 0 | * manually check the BMIDEA bit of each IDE channel. |
465 | 0 | */ |
466 | 0 | return bm_status; |
467 | 0 | } |
468 | | |
469 | | static struct { |
470 | | spinlock_t lock; |
471 | | unsigned int count; |
472 | | } c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED }; |
473 | | |
474 | | void trace_exit_reason(u32 *irq_traced) |
475 | 1.51M | { |
476 | 1.51M | if ( unlikely(tb_init_done) ) |
477 | 0 | { |
478 | 0 | int i, curbit; |
479 | 0 | u32 irr_status[8] = { 0 }; |
480 | 0 |
|
481 | 0 | /* Get local apic IRR register */ |
482 | 0 | for ( i = 0; i < 8; i++ ) |
483 | 0 | irr_status[i] = apic_read(APIC_IRR + (i << 4)); |
484 | 0 | i = 0; |
485 | 0 | curbit = find_first_bit((const unsigned long *)irr_status, 256); |
486 | 0 | while ( i < 4 && curbit < 256 ) |
487 | 0 | { |
488 | 0 | irq_traced[i++] = curbit; |
489 | 0 | curbit = find_next_bit((const unsigned long *)irr_status, 256, curbit + 1); |
490 | 0 | } |
491 | 0 | } |
492 | 1.51M | } |
493 | | |
494 | | /* |
495 | | * "AAJ72. EOI Transaction May Not be Sent if Software Enters Core C6 During |
496 | | * an Interrupt Service Routine" |
497 | | * |
498 | | * There was an errata with some Core i7 processors that an EOI transaction |
499 | | * may not be sent if software enters core C6 during an interrupt service |
500 | | * routine. So we don't enter deep Cx state if there is an EOI pending. |
501 | | */ |
502 | | static bool errata_c6_eoi_workaround(void) |
503 | 0 | { |
504 | 0 | static int8_t fix_needed = -1; |
505 | 0 |
|
506 | 0 | if ( unlikely(fix_needed == -1) ) |
507 | 0 | { |
508 | 0 | int model = boot_cpu_data.x86_model; |
509 | 0 | fix_needed = (cpu_has_apic && !directed_eoi_enabled && |
510 | 0 | (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && |
511 | 0 | (boot_cpu_data.x86 == 6) && |
512 | 0 | ((model == 0x1a) || (model == 0x1e) || (model == 0x1f) || |
513 | 0 | (model == 0x25) || (model == 0x2c) || (model == 0x2f))); |
514 | 0 | } |
515 | 0 |
|
516 | 0 | return (fix_needed && cpu_has_pending_apic_eoi()); |
517 | 0 | } |
518 | | |
519 | | void update_last_cx_stat(struct acpi_processor_power *power, |
520 | | struct acpi_processor_cx *cx, uint64_t ticks) |
521 | 1.89M | { |
522 | 1.89M | ASSERT(!local_irq_is_enabled()); |
523 | 1.89M | |
524 | 1.89M | spin_lock(&power->stat_lock); |
525 | 1.89M | power->last_state = cx; |
526 | 1.89M | power->last_state_update_tick = ticks; |
527 | 1.89M | spin_unlock(&power->stat_lock); |
528 | 1.89M | } |
529 | | |
530 | | void update_idle_stats(struct acpi_processor_power *power, |
531 | | struct acpi_processor_cx *cx, |
532 | | uint64_t before, uint64_t after) |
533 | 1.57M | { |
534 | 1.57M | int64_t sleep_ticks = ticks_elapsed(before, after); |
535 | 1.57M | /* Interrupts are disabled */ |
536 | 1.57M | |
537 | 1.57M | spin_lock(&power->stat_lock); |
538 | 1.57M | |
539 | 1.57M | cx->usage++; |
540 | 1.57M | if ( sleep_ticks > 0 ) |
541 | 1.78M | { |
542 | 1.78M | power->last_residency = tick_to_ns(sleep_ticks) / 1000UL; |
543 | 1.78M | cx->time += sleep_ticks; |
544 | 1.78M | } |
545 | 1.57M | power->last_state = &power->states[0]; |
546 | 1.57M | power->last_state_update_tick = after; |
547 | 1.57M | |
548 | 1.57M | spin_unlock(&power->stat_lock); |
549 | 1.57M | } |
550 | | |
551 | | static void acpi_processor_idle(void) |
552 | 0 | { |
553 | 0 | struct acpi_processor_power *power = processor_powers[smp_processor_id()]; |
554 | 0 | struct acpi_processor_cx *cx = NULL; |
555 | 0 | int next_state; |
556 | 0 | uint64_t t1, t2 = 0; |
557 | 0 | u32 exp = 0, pred = 0; |
558 | 0 | u32 irq_traced[4] = { 0 }; |
559 | 0 |
|
560 | 0 | if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() && |
561 | 0 | (next_state = cpuidle_current_governor->select(power)) > 0 ) |
562 | 0 | { |
563 | 0 | cx = &power->states[next_state]; |
564 | 0 | if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check && |
565 | 0 | acpi_idle_bm_check() ) |
566 | 0 | cx = power->safe_state; |
567 | 0 | if ( cx->idx > max_cstate ) |
568 | 0 | cx = &power->states[max_cstate]; |
569 | 0 | menu_get_trace_data(&exp, &pred); |
570 | 0 | } |
571 | 0 | if ( !cx ) |
572 | 0 | { |
573 | 0 | if ( pm_idle_save ) |
574 | 0 | pm_idle_save(); |
575 | 0 | else |
576 | 0 | safe_halt(); |
577 | 0 | return; |
578 | 0 | } |
579 | 0 |
|
580 | 0 | cpufreq_dbs_timer_suspend(); |
581 | 0 |
|
582 | 0 | sched_tick_suspend(); |
583 | 0 | /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ |
584 | 0 | process_pending_softirqs(); |
585 | 0 |
|
586 | 0 | /* |
587 | 0 | * Interrupts must be disabled during bus mastering calculations and |
588 | 0 | * for C2/C3 transitions. |
589 | 0 | */ |
590 | 0 | local_irq_disable(); |
591 | 0 |
|
592 | 0 | if ( !cpu_is_haltable(smp_processor_id()) ) |
593 | 0 | { |
594 | 0 | local_irq_enable(); |
595 | 0 | sched_tick_resume(); |
596 | 0 | cpufreq_dbs_timer_resume(); |
597 | 0 | return; |
598 | 0 | } |
599 | 0 |
|
600 | 0 | if ( (cx->type == ACPI_STATE_C3) && errata_c6_eoi_workaround() ) |
601 | 0 | cx = power->safe_state; |
602 | 0 |
|
603 | 0 |
|
604 | 0 | /* |
605 | 0 | * Sleep: |
606 | 0 | * ------ |
607 | 0 | * Invoke the current Cx state to put the processor to sleep. |
608 | 0 | */ |
609 | 0 | switch ( cx->type ) |
610 | 0 | { |
611 | 0 | case ACPI_STATE_C1: |
612 | 0 | case ACPI_STATE_C2: |
613 | 0 | if ( cx->type == ACPI_STATE_C1 || local_apic_timer_c2_ok ) |
614 | 0 | { |
615 | 0 | /* Get start time (ticks) */ |
616 | 0 | t1 = cpuidle_get_tick(); |
617 | 0 | /* Trace cpu idle entry */ |
618 | 0 | TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, t1, exp, pred); |
619 | 0 |
|
620 | 0 | update_last_cx_stat(power, cx, t1); |
621 | 0 |
|
622 | 0 | /* Invoke C2 */ |
623 | 0 | acpi_idle_do_entry(cx); |
624 | 0 | /* Get end time (ticks) */ |
625 | 0 | t2 = cpuidle_get_tick(); |
626 | 0 | trace_exit_reason(irq_traced); |
627 | 0 | /* Trace cpu idle exit */ |
628 | 0 | TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, t2, |
629 | 0 | irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]); |
630 | 0 | /* Update statistics */ |
631 | 0 | update_idle_stats(power, cx, t1, t2); |
632 | 0 | /* Re-enable interrupts */ |
633 | 0 | local_irq_enable(); |
634 | 0 | break; |
635 | 0 | } |
636 | 0 |
|
637 | 0 | case ACPI_STATE_C3: |
638 | 0 | /* |
639 | 0 | * Before invoking C3, be aware that TSC/APIC timer may be |
640 | 0 | * stopped by H/W. Without carefully handling of TSC/APIC stop issues, |
641 | 0 | * deep C state can't work correctly. |
642 | 0 | */ |
643 | 0 | /* preparing APIC stop */ |
644 | 0 | lapic_timer_off(); |
645 | 0 |
|
646 | 0 | /* Get start time (ticks) */ |
647 | 0 | t1 = cpuidle_get_tick(); |
648 | 0 | /* Trace cpu idle entry */ |
649 | 0 | TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, t1, exp, pred); |
650 | 0 |
|
651 | 0 | update_last_cx_stat(power, cx, t1); |
652 | 0 |
|
653 | 0 | /* |
654 | 0 | * disable bus master |
655 | 0 | * bm_check implies we need ARB_DIS |
656 | 0 | * !bm_check implies we need cache flush |
657 | 0 | * bm_control implies whether we can do ARB_DIS |
658 | 0 | * |
659 | 0 | * That leaves a case where bm_check is set and bm_control is |
660 | 0 | * not set. In that case we cannot do much, we enter C3 |
661 | 0 | * without doing anything. |
662 | 0 | */ |
663 | 0 | if ( cx->type != ACPI_STATE_C3 ) |
664 | 0 | /* nothing to be done here */; |
665 | 0 | else if ( power->flags.bm_check && power->flags.bm_control ) |
666 | 0 | { |
667 | 0 | spin_lock(&c3_cpu_status.lock); |
668 | 0 | if ( ++c3_cpu_status.count == num_online_cpus() ) |
669 | 0 | { |
670 | 0 | /* |
671 | 0 | * All CPUs are trying to go to C3 |
672 | 0 | * Disable bus master arbitration |
673 | 0 | */ |
674 | 0 | acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); |
675 | 0 | } |
676 | 0 | spin_unlock(&c3_cpu_status.lock); |
677 | 0 | } |
678 | 0 | else if ( !power->flags.bm_check ) |
679 | 0 | { |
680 | 0 | /* SMP with no shared cache... Invalidate cache */ |
681 | 0 | ACPI_FLUSH_CPU_CACHE(); |
682 | 0 | } |
683 | 0 |
|
684 | 0 | /* Invoke C3 */ |
685 | 0 | acpi_idle_do_entry(cx); |
686 | 0 |
|
687 | 0 | if ( (cx->type == ACPI_STATE_C3) && |
688 | 0 | power->flags.bm_check && power->flags.bm_control ) |
689 | 0 | { |
690 | 0 | /* Enable bus master arbitration */ |
691 | 0 | spin_lock(&c3_cpu_status.lock); |
692 | 0 | if ( c3_cpu_status.count-- == num_online_cpus() ) |
693 | 0 | acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); |
694 | 0 | spin_unlock(&c3_cpu_status.lock); |
695 | 0 | } |
696 | 0 |
|
697 | 0 | /* Get end time (ticks) */ |
698 | 0 | t2 = cpuidle_get_tick(); |
699 | 0 |
|
700 | 0 | /* recovering TSC */ |
701 | 0 | cstate_restore_tsc(); |
702 | 0 | trace_exit_reason(irq_traced); |
703 | 0 | /* Trace cpu idle exit */ |
704 | 0 | TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, t2, |
705 | 0 | irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]); |
706 | 0 |
|
707 | 0 | /* Update statistics */ |
708 | 0 | update_idle_stats(power, cx, t1, t2); |
709 | 0 | /* Re-enable interrupts */ |
710 | 0 | local_irq_enable(); |
711 | 0 | /* recovering APIC */ |
712 | 0 | lapic_timer_on(); |
713 | 0 |
|
714 | 0 | break; |
715 | 0 |
|
716 | 0 | default: |
717 | 0 | /* Now in C0 */ |
718 | 0 | power->last_state = &power->states[0]; |
719 | 0 | local_irq_enable(); |
720 | 0 | sched_tick_resume(); |
721 | 0 | cpufreq_dbs_timer_resume(); |
722 | 0 | return; |
723 | 0 | } |
724 | 0 |
|
725 | 0 | /* Now in C0 */ |
726 | 0 | power->last_state = &power->states[0]; |
727 | 0 |
|
728 | 0 | sched_tick_resume(); |
729 | 0 | cpufreq_dbs_timer_resume(); |
730 | 0 |
|
731 | 0 | if ( cpuidle_current_governor->reflect ) |
732 | 0 | cpuidle_current_governor->reflect(power); |
733 | 0 | } |
734 | | |
735 | | void acpi_dead_idle(void) |
736 | 0 | { |
737 | 0 | struct acpi_processor_power *power; |
738 | 0 | struct acpi_processor_cx *cx; |
739 | 0 |
|
740 | 0 | if ( (power = processor_powers[smp_processor_id()]) == NULL ) |
741 | 0 | goto default_halt; |
742 | 0 |
|
743 | 0 | if ( (cx = &power->states[power->count-1]) == NULL ) |
744 | 0 | goto default_halt; |
745 | 0 |
|
746 | 0 | if ( cx->entry_method == ACPI_CSTATE_EM_FFH ) |
747 | 0 | { |
748 | 0 | void *mwait_ptr = &mwait_wakeup(smp_processor_id()); |
749 | 0 |
|
750 | 0 | /* |
751 | 0 | * Cache must be flushed as the last operation before sleeping. |
752 | 0 | * Otherwise, CPU may still hold dirty data, breaking cache coherency, |
753 | 0 | * leading to strange errors. |
754 | 0 | */ |
755 | 0 | wbinvd(); |
756 | 0 |
|
757 | 0 | while ( 1 ) |
758 | 0 | { |
759 | 0 | /* |
760 | 0 | * 1. The CLFLUSH is a workaround for erratum AAI65 for |
761 | 0 | * the Xeon 7400 series. |
762 | 0 | * 2. The WBINVD is insufficient due to the spurious-wakeup |
763 | 0 | * case where we return around the loop. |
764 | 0 | * 3. Unlike wbinvd, clflush is a light weight but not serializing |
765 | 0 | * instruction, hence memory fence is necessary to make sure all |
766 | 0 | * load/store visible before flush cache line. |
767 | 0 | */ |
768 | 0 | mb(); |
769 | 0 | clflush(mwait_ptr); |
770 | 0 | __monitor(mwait_ptr, 0, 0); |
771 | 0 | mb(); |
772 | 0 | __mwait(cx->address, 0); |
773 | 0 | } |
774 | 0 | } |
775 | 0 | else if ( current_cpu_data.x86_vendor == X86_VENDOR_AMD && |
776 | 0 | cx->entry_method == ACPI_CSTATE_EM_SYSIO ) |
777 | 0 | { |
778 | 0 | /* Intel prefers not to use SYSIO */ |
779 | 0 |
|
780 | 0 | /* Avoid references to shared data after the cache flush */ |
781 | 0 | u32 address = cx->address; |
782 | 0 | u32 pmtmr_ioport_local = pmtmr_ioport; |
783 | 0 |
|
784 | 0 | wbinvd(); |
785 | 0 |
|
786 | 0 | while ( 1 ) |
787 | 0 | { |
788 | 0 | inb(address); |
789 | 0 | inl(pmtmr_ioport_local); |
790 | 0 | } |
791 | 0 | } |
792 | 0 |
|
793 | 0 | default_halt: |
794 | 0 | default_dead_idle(); |
795 | 0 | } |
796 | | |
797 | | int cpuidle_init_cpu(unsigned int cpu) |
798 | 12 | { |
799 | 12 | struct acpi_processor_power *acpi_power; |
800 | 12 | |
801 | 12 | acpi_power = processor_powers[cpu]; |
802 | 12 | if ( !acpi_power ) |
803 | 12 | { |
804 | 12 | unsigned int i; |
805 | 12 | |
806 | 12 | if ( cpu == 0 && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ) |
807 | 1 | { |
808 | 1 | cpuidle_get_tick = get_stime_tick; |
809 | 1 | ticks_elapsed = stime_ticks_elapsed; |
810 | 1 | tick_to_ns = stime_tick_to_ns; |
811 | 1 | } |
812 | 12 | |
813 | 12 | acpi_power = xzalloc(struct acpi_processor_power); |
814 | 12 | if ( !acpi_power ) |
815 | 0 | return -ENOMEM; |
816 | 12 | |
817 | 108 | for ( i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++ ) |
818 | 96 | acpi_power->states[i].idx = i; |
819 | 12 | |
820 | 12 | acpi_power->cpu = cpu; |
821 | 12 | processor_powers[cpu] = acpi_power; |
822 | 12 | } |
823 | 12 | |
824 | 12 | acpi_power->count = 2; |
825 | 12 | acpi_power->states[1].type = ACPI_STATE_C1; |
826 | 12 | acpi_power->states[1].entry_method = ACPI_CSTATE_EM_HALT; |
827 | 12 | acpi_power->safe_state = &acpi_power->states[1]; |
828 | 12 | spin_lock_init(&acpi_power->stat_lock); |
829 | 12 | |
830 | 12 | return 0; |
831 | 12 | } |
832 | | |
833 | | static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx) |
834 | 0 | { |
835 | 0 | struct cpuinfo_x86 *c = ¤t_cpu_data; |
836 | 0 | unsigned int eax, ebx, ecx, edx; |
837 | 0 | unsigned int edx_part; |
838 | 0 | unsigned int cstate_type; /* C-state type and not ACPI C-state type */ |
839 | 0 | unsigned int num_cstate_subtype; |
840 | 0 | int ret = 0; |
841 | 0 | static unsigned long printed; |
842 | 0 |
|
843 | 0 | if ( c->cpuid_level < CPUID_MWAIT_LEAF ) |
844 | 0 | { |
845 | 0 | printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n"); |
846 | 0 | return -EFAULT; |
847 | 0 | } |
848 | 0 |
|
849 | 0 | cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); |
850 | 0 | if ( opt_cpu_info ) |
851 | 0 | printk(XENLOG_DEBUG "cpuid.MWAIT[eax=%x ebx=%x ecx=%x edx=%x]\n", |
852 | 0 | eax, ebx, ecx, edx); |
853 | 0 |
|
854 | 0 | /* Check whether this particular cx_type (in CST) is supported or not */ |
855 | 0 | cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1; |
856 | 0 | edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); |
857 | 0 | num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; |
858 | 0 |
|
859 | 0 | if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) ) |
860 | 0 | ret = -ERANGE; |
861 | 0 | /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ |
862 | 0 | else if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || |
863 | 0 | !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ) |
864 | 0 | ret = -ENODEV; |
865 | 0 | else if ( opt_cpu_info || cx->type >= BITS_PER_LONG || |
866 | 0 | !test_and_set_bit(cx->type, &printed) ) |
867 | 0 | printk(XENLOG_INFO "Monitor-Mwait will be used to enter C%d state\n", |
868 | 0 | cx->type); |
869 | 0 | return ret; |
870 | 0 | } |
871 | | |
872 | | /* |
873 | | * Initialize bm_flags based on the CPU cache properties |
874 | | * On SMP it depends on cache configuration |
875 | | * - When cache is not shared among all CPUs, we flush cache |
876 | | * before entering C3. |
877 | | * - When cache is shared among all CPUs, we use bm_check |
878 | | * mechanism as in UP case |
879 | | * |
880 | | * This routine is called only after all the CPUs are online |
881 | | */ |
882 | | static void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags) |
883 | 0 | { |
884 | 0 | struct cpuinfo_x86 *c = ¤t_cpu_data; |
885 | 0 |
|
886 | 0 | flags->bm_check = 0; |
887 | 0 | if ( num_online_cpus() == 1 ) |
888 | 0 | flags->bm_check = 1; |
889 | 0 | else if ( (c->x86_vendor == X86_VENDOR_INTEL) || |
890 | 0 | ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 0x15)) ) |
891 | 0 | { |
892 | 0 | /* |
893 | 0 | * Today all MP CPUs that support C3 share cache. |
894 | 0 | * And caches should not be flushed by software while |
895 | 0 | * entering C3 type state. |
896 | 0 | */ |
897 | 0 | flags->bm_check = 1; |
898 | 0 | } |
899 | 0 |
|
900 | 0 | /* |
901 | 0 | * On all recent platforms, ARB_DISABLE is a nop. |
902 | 0 | * So, set bm_control to zero to indicate that ARB_DISABLE |
903 | 0 | * is not required while entering C3 type state on |
904 | 0 | * P4, Core and beyond CPUs |
905 | 0 | */ |
906 | 0 | if ( c->x86_vendor == X86_VENDOR_INTEL && |
907 | 0 | (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)) ) |
908 | 0 | flags->bm_control = 0; |
909 | 0 | } |
910 | | |
911 | 0 | #define VENDOR_INTEL (1) |
912 | 0 | #define NATIVE_CSTATE_BEYOND_HALT (2) |
913 | | |
914 | | static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx) |
915 | 0 | { |
916 | 0 | static int bm_check_flag = -1; |
917 | 0 | static int bm_control_flag = -1; |
918 | 0 |
|
919 | 0 | switch ( cx->reg.space_id ) |
920 | 0 | { |
921 | 0 | case ACPI_ADR_SPACE_SYSTEM_IO: |
922 | 0 | if ( cx->reg.address == 0 ) |
923 | 0 | return -EINVAL; |
924 | 0 | break; |
925 | 0 |
|
926 | 0 | case ACPI_ADR_SPACE_FIXED_HARDWARE: |
927 | 0 | if ( cx->reg.bit_width != VENDOR_INTEL || |
928 | 0 | cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT ) |
929 | 0 | return -EINVAL; |
930 | 0 |
|
931 | 0 | /* assume all logical cpu has the same support for mwait */ |
932 | 0 | if ( acpi_processor_ffh_cstate_probe(cx) ) |
933 | 0 | return -EINVAL; |
934 | 0 | break; |
935 | 0 |
|
936 | 0 | default: |
937 | 0 | return -ENODEV; |
938 | 0 | } |
939 | 0 |
|
940 | 0 | switch ( cx->type ) |
941 | 0 | { |
942 | 0 | case ACPI_STATE_C2: |
943 | 0 | if ( local_apic_timer_c2_ok ) |
944 | 0 | break; |
945 | 0 | case ACPI_STATE_C3: |
946 | 0 | if ( !lapic_timer_init() ) |
947 | 0 | return -EINVAL; |
948 | 0 |
|
949 | 0 | /* All the logic here assumes flags.bm_check is same across all CPUs */ |
950 | 0 | if ( bm_check_flag < 0 ) |
951 | 0 | { |
952 | 0 | /* Determine whether bm_check is needed based on CPU */ |
953 | 0 | acpi_processor_power_init_bm_check(&(power->flags)); |
954 | 0 | } |
955 | 0 | else |
956 | 0 | { |
957 | 0 | power->flags.bm_check = bm_check_flag; |
958 | 0 | power->flags.bm_control = bm_control_flag; |
959 | 0 | } |
960 | 0 |
|
961 | 0 | if ( power->flags.bm_check ) |
962 | 0 | { |
963 | 0 | if ( !power->flags.bm_control ) |
964 | 0 | { |
965 | 0 | if ( power->flags.has_cst != 1 ) |
966 | 0 | { |
967 | 0 | /* bus mastering control is necessary */ |
968 | 0 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
969 | 0 | "C3 support requires BM control\n")); |
970 | 0 | return -EINVAL; |
971 | 0 | } |
972 | 0 | else |
973 | 0 | { |
974 | 0 | /* Here we enter C3 without bus mastering */ |
975 | 0 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
976 | 0 | "C3 support without BM control\n")); |
977 | 0 | } |
978 | 0 | } |
979 | 0 | /* |
980 | 0 | * On older chipsets, BM_RLD needs to be set in order for Bus |
981 | 0 | * Master activity to wake the system from C3, hence |
982 | 0 | * acpi_set_register() is always being called once below. Newer |
983 | 0 | * chipsets handle DMA during C3 automatically and BM_RLD is a |
984 | 0 | * NOP. In either case, the proper way to handle BM_RLD is to |
985 | 0 | * set it and leave it set. |
986 | 0 | */ |
987 | 0 | } |
988 | 0 | else |
989 | 0 | { |
990 | 0 | /* |
991 | 0 | * WBINVD should be set in fadt, for C3 state to be |
992 | 0 | * supported on when bm_check is not required. |
993 | 0 | */ |
994 | 0 | if ( !(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD) ) |
995 | 0 | { |
996 | 0 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
997 | 0 | "Cache invalidation should work properly" |
998 | 0 | " for C3 to be enabled on SMP systems\n")); |
999 | 0 | return -EINVAL; |
1000 | 0 | } |
1001 | 0 | } |
1002 | 0 |
|
1003 | 0 | if ( bm_check_flag < 0 ) |
1004 | 0 | { |
1005 | 0 | bm_check_flag = power->flags.bm_check; |
1006 | 0 | bm_control_flag = power->flags.bm_control; |
1007 | 0 | acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, bm_check_flag); |
1008 | 0 | } |
1009 | 0 |
|
1010 | 0 | break; |
1011 | 0 | } |
1012 | 0 |
|
1013 | 0 | return 0; |
1014 | 0 | } |
1015 | | |
1016 | | static unsigned int latency_factor = 2; |
1017 | | integer_param("idle_latency_factor", latency_factor); |
1018 | | |
1019 | | static void set_cx( |
1020 | | struct acpi_processor_power *acpi_power, |
1021 | | xen_processor_cx_t *xen_cx) |
1022 | 0 | { |
1023 | 0 | struct acpi_processor_cx *cx; |
1024 | 0 |
|
1025 | 0 | if ( check_cx(acpi_power, xen_cx) != 0 ) |
1026 | 0 | return; |
1027 | 0 |
|
1028 | 0 | switch ( xen_cx->type ) |
1029 | 0 | { |
1030 | 0 | case ACPI_STATE_C1: |
1031 | 0 | cx = &acpi_power->states[1]; |
1032 | 0 | break; |
1033 | 0 | default: |
1034 | 0 | if ( acpi_power->count >= ACPI_PROCESSOR_MAX_POWER ) |
1035 | 0 | { |
1036 | 0 | case ACPI_STATE_C0: |
1037 | 0 | printk(XENLOG_WARNING "CPU%u: C%d data ignored\n", |
1038 | 0 | acpi_power->cpu, xen_cx->type); |
1039 | 0 | return; |
1040 | 0 | } |
1041 | 0 | cx = &acpi_power->states[acpi_power->count]; |
1042 | 0 | cx->type = xen_cx->type; |
1043 | 0 | break; |
1044 | 0 | } |
1045 | 0 |
|
1046 | 0 | cx->address = xen_cx->reg.address; |
1047 | 0 |
|
1048 | 0 | switch ( xen_cx->reg.space_id ) |
1049 | 0 | { |
1050 | 0 | case ACPI_ADR_SPACE_FIXED_HARDWARE: |
1051 | 0 | if ( xen_cx->reg.bit_width == VENDOR_INTEL && |
1052 | 0 | xen_cx->reg.bit_offset == NATIVE_CSTATE_BEYOND_HALT && |
1053 | 0 | boot_cpu_has(X86_FEATURE_MONITOR) ) |
1054 | 0 | cx->entry_method = ACPI_CSTATE_EM_FFH; |
1055 | 0 | else |
1056 | 0 | cx->entry_method = ACPI_CSTATE_EM_HALT; |
1057 | 0 | break; |
1058 | 0 | case ACPI_ADR_SPACE_SYSTEM_IO: |
1059 | 0 | if ( ioports_deny_access(hardware_domain, cx->address, cx->address) ) |
1060 | 0 | printk(XENLOG_WARNING "Could not deny access to port %04x\n", |
1061 | 0 | cx->address); |
1062 | 0 | cx->entry_method = ACPI_CSTATE_EM_SYSIO; |
1063 | 0 | break; |
1064 | 0 | default: |
1065 | 0 | cx->entry_method = ACPI_CSTATE_EM_NONE; |
1066 | 0 | break; |
1067 | 0 | } |
1068 | 0 |
|
1069 | 0 | cx->latency = xen_cx->latency; |
1070 | 0 | cx->target_residency = cx->latency * latency_factor; |
1071 | 0 |
|
1072 | 0 | smp_wmb(); |
1073 | 0 | acpi_power->count += (cx->type != ACPI_STATE_C1); |
1074 | 0 | if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 ) |
1075 | 0 | acpi_power->safe_state = cx; |
1076 | 0 | } |
1077 | | |
1078 | | int get_cpu_id(u32 acpi_id) |
1079 | 0 | { |
1080 | 0 | int i; |
1081 | 0 | u32 apic_id; |
1082 | 0 |
|
1083 | 0 | if ( acpi_id >= MAX_MADT_ENTRIES ) |
1084 | 0 | return -1; |
1085 | 0 |
|
1086 | 0 | apic_id = x86_acpiid_to_apicid[acpi_id]; |
1087 | 0 | if ( apic_id == BAD_APICID ) |
1088 | 0 | return -1; |
1089 | 0 |
|
1090 | 0 | for ( i = 0; i < nr_cpu_ids; i++ ) |
1091 | 0 | { |
1092 | 0 | if ( apic_id == x86_cpu_to_apicid[i] ) |
1093 | 0 | return i; |
1094 | 0 | } |
1095 | 0 |
|
1096 | 0 | return -1; |
1097 | 0 | } |
1098 | | |
1099 | | #ifdef DEBUG_PM_CX |
1100 | | static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) |
1101 | | { |
1102 | | XEN_GUEST_HANDLE(xen_processor_cx_t) states; |
1103 | | xen_processor_cx_t state; |
1104 | | XEN_GUEST_HANDLE(xen_processor_csd_t) csd; |
1105 | | xen_processor_csd_t dp; |
1106 | | uint32_t i; |
1107 | | |
1108 | | printk("cpu%d cx acpi info:\n", cpu); |
1109 | | printk("\tcount = %d\n", power->count); |
1110 | | printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n" |
1111 | | "\t pwr_setup_done[%d], bm_rld_set[%d]\n", |
1112 | | power->flags.bm_control, power->flags.bm_check, power->flags.has_cst, |
1113 | | power->flags.power_setup_done, power->flags.bm_rld_set); |
1114 | | |
1115 | | states = power->states; |
1116 | | |
1117 | | for ( i = 0; i < power->count; i++ ) |
1118 | | { |
1119 | | if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) |
1120 | | return; |
1121 | | |
1122 | | printk("\tstates[%d]:\n", i); |
1123 | | printk("\t\treg.space_id = %#x\n", state.reg.space_id); |
1124 | | printk("\t\treg.bit_width = %#x\n", state.reg.bit_width); |
1125 | | printk("\t\treg.bit_offset = %#x\n", state.reg.bit_offset); |
1126 | | printk("\t\treg.access_size = %#x\n", state.reg.access_size); |
1127 | | printk("\t\treg.address = %#"PRIx64"\n", state.reg.address); |
1128 | | printk("\t\ttype = %d\n", state.type); |
1129 | | printk("\t\tlatency = %d\n", state.latency); |
1130 | | printk("\t\tpower = %d\n", state.power); |
1131 | | |
1132 | | csd = state.dp; |
1133 | | printk("\t\tdp(@0x%p)\n", csd.p); |
1134 | | |
1135 | | if ( csd.p != NULL ) |
1136 | | { |
1137 | | if ( unlikely(copy_from_guest(&dp, csd, 1)) ) |
1138 | | return; |
1139 | | printk("\t\t\tdomain = %d\n", dp.domain); |
1140 | | printk("\t\t\tcoord_type = %d\n", dp.coord_type); |
1141 | | printk("\t\t\tnum = %d\n", dp.num); |
1142 | | } |
1143 | | } |
1144 | | } |
1145 | | #else |
1146 | | #define print_cx_pminfo(c, p) |
1147 | | #endif |
1148 | | |
1149 | | long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) |
1150 | 0 | { |
1151 | 0 | XEN_GUEST_HANDLE(xen_processor_cx_t) states; |
1152 | 0 | xen_processor_cx_t xen_cx; |
1153 | 0 | struct acpi_processor_power *acpi_power; |
1154 | 0 | int cpu_id, i, ret; |
1155 | 0 |
|
1156 | 0 | if ( unlikely(!guest_handle_okay(power->states, power->count)) ) |
1157 | 0 | return -EFAULT; |
1158 | 0 |
|
1159 | 0 | if ( pm_idle_save && pm_idle != acpi_processor_idle ) |
1160 | 0 | return 0; |
1161 | 0 |
|
1162 | 0 | print_cx_pminfo(cpu, power); |
1163 | 0 |
|
1164 | 0 | /* map from acpi_id to cpu_id */ |
1165 | 0 | cpu_id = get_cpu_id(cpu); |
1166 | 0 | if ( cpu_id == -1 ) |
1167 | 0 | { |
1168 | 0 | static bool warn_once = true; |
1169 | 0 |
|
1170 | 0 | if ( warn_once || opt_cpu_info ) |
1171 | 0 | printk(XENLOG_WARNING "No CPU ID for APIC ID %#x\n", cpu); |
1172 | 0 | warn_once = false; |
1173 | 0 | return -EINVAL; |
1174 | 0 | } |
1175 | 0 |
|
1176 | 0 | ret = cpuidle_init_cpu(cpu_id); |
1177 | 0 | if ( ret < 0 ) |
1178 | 0 | return ret; |
1179 | 0 |
|
1180 | 0 | acpi_power = processor_powers[cpu_id]; |
1181 | 0 | acpi_power->flags.bm_check = power->flags.bm_check; |
1182 | 0 | acpi_power->flags.bm_control = power->flags.bm_control; |
1183 | 0 | acpi_power->flags.has_cst = power->flags.has_cst; |
1184 | 0 |
|
1185 | 0 | states = power->states; |
1186 | 0 | for ( i = 0; i < power->count; i++ ) |
1187 | 0 | { |
1188 | 0 | if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) ) |
1189 | 0 | return -EFAULT; |
1190 | 0 |
|
1191 | 0 | set_cx(acpi_power, &xen_cx); |
1192 | 0 | } |
1193 | 0 |
|
1194 | 0 | if ( cpuidle_current_governor->enable && |
1195 | 0 | cpuidle_current_governor->enable(acpi_power) ) |
1196 | 0 | return -EFAULT; |
1197 | 0 |
|
1198 | 0 | /* FIXME: C-state dependency is not supported by far */ |
1199 | 0 |
|
1200 | 0 | if ( cpu_id == 0 ) |
1201 | 0 | { |
1202 | 0 | if ( pm_idle_save == NULL ) |
1203 | 0 | { |
1204 | 0 | pm_idle_save = pm_idle; |
1205 | 0 | pm_idle = acpi_processor_idle; |
1206 | 0 | } |
1207 | 0 |
|
1208 | 0 | dead_idle = acpi_dead_idle; |
1209 | 0 | } |
1210 | 0 | |
1211 | 0 | return 0; |
1212 | 0 | } |
1213 | | |
1214 | | uint32_t pmstat_get_cx_nr(uint32_t cpuid) |
1215 | 0 | { |
1216 | 0 | return processor_powers[cpuid] ? processor_powers[cpuid]->count : 0; |
1217 | 0 | } |
1218 | | |
1219 | | int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat) |
1220 | 0 | { |
1221 | 0 | struct acpi_processor_power *power = processor_powers[cpuid]; |
1222 | 0 | uint64_t idle_usage = 0, idle_res = 0; |
1223 | 0 | uint64_t last_state_update_tick, current_stime, current_tick; |
1224 | 0 | uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 }; |
1225 | 0 | uint64_t res[ACPI_PROCESSOR_MAX_POWER] = { 0 }; |
1226 | 0 | unsigned int i, nr, nr_pc = 0, nr_cc = 0; |
1227 | 0 |
|
1228 | 0 | if ( power == NULL ) |
1229 | 0 | { |
1230 | 0 | stat->last = 0; |
1231 | 0 | stat->nr = 0; |
1232 | 0 | stat->idle_time = 0; |
1233 | 0 | stat->nr_pc = 0; |
1234 | 0 | stat->nr_cc = 0; |
1235 | 0 | return 0; |
1236 | 0 | } |
1237 | 0 |
|
1238 | 0 | stat->idle_time = get_cpu_idle_time(cpuid); |
1239 | 0 | nr = min(stat->nr, power->count); |
1240 | 0 |
|
1241 | 0 | /* mimic the stat when detail info hasn't been registered by dom0 */ |
1242 | 0 | if ( pm_idle_save == NULL ) |
1243 | 0 | { |
1244 | 0 | stat->nr = 2; |
1245 | 0 | stat->last = power->last_state ? power->last_state->idx : 0; |
1246 | 0 |
|
1247 | 0 | usage[1] = idle_usage = 1; |
1248 | 0 | res[1] = idle_res = stat->idle_time; |
1249 | 0 |
|
1250 | 0 | current_stime = NOW(); |
1251 | 0 | } |
1252 | 0 | else |
1253 | 0 | { |
1254 | 0 | struct hw_residencies hw_res; |
1255 | 0 | signed int last_state_idx; |
1256 | 0 |
|
1257 | 0 | stat->nr = power->count; |
1258 | 0 |
|
1259 | 0 | spin_lock_irq(&power->stat_lock); |
1260 | 0 | current_tick = cpuidle_get_tick(); |
1261 | 0 | current_stime = NOW(); |
1262 | 0 | for ( i = 1; i < nr; i++ ) |
1263 | 0 | { |
1264 | 0 | usage[i] = power->states[i].usage; |
1265 | 0 | res[i] = power->states[i].time; |
1266 | 0 | } |
1267 | 0 | last_state_update_tick = power->last_state_update_tick; |
1268 | 0 | last_state_idx = power->last_state ? power->last_state->idx : -1; |
1269 | 0 | spin_unlock_irq(&power->stat_lock); |
1270 | 0 |
|
1271 | 0 | if ( last_state_idx >= 0 ) |
1272 | 0 | { |
1273 | 0 | usage[last_state_idx]++; |
1274 | 0 | res[last_state_idx] += ticks_elapsed(last_state_update_tick, |
1275 | 0 | current_tick); |
1276 | 0 | stat->last = last_state_idx; |
1277 | 0 | } |
1278 | 0 | else |
1279 | 0 | stat->last = 0; |
1280 | 0 |
|
1281 | 0 | for ( i = 1; i < nr; i++ ) |
1282 | 0 | { |
1283 | 0 | res[i] = tick_to_ns(res[i]); |
1284 | 0 | idle_usage += usage[i]; |
1285 | 0 | idle_res += res[i]; |
1286 | 0 | } |
1287 | 0 |
|
1288 | 0 | get_hw_residencies(cpuid, &hw_res); |
1289 | 0 |
|
1290 | 0 | #define PUT_xC(what, n) do { \ |
1291 | 0 | if ( stat->nr_##what >= n && \ |
1292 | 0 | copy_to_guest_offset(stat->what, n - 1, &hw_res.what##n, 1) ) \ |
1293 | 0 | return -EFAULT; \ |
1294 | 0 | if ( hw_res.what##n ) \ |
1295 | 0 | nr_##what = n; \ |
1296 | 0 | } while ( 0 ) |
1297 | 0 | #define PUT_PC(n) PUT_xC(pc, n) |
1298 | 0 | PUT_PC(2); |
1299 | 0 | PUT_PC(3); |
1300 | 0 | PUT_PC(4); |
1301 | 0 | PUT_PC(6); |
1302 | 0 | PUT_PC(7); |
1303 | 0 | PUT_PC(8); |
1304 | 0 | PUT_PC(9); |
1305 | 0 | PUT_PC(10); |
1306 | 0 | #undef PUT_PC |
1307 | 0 | #define PUT_CC(n) PUT_xC(cc, n) |
1308 | 0 | PUT_CC(1); |
1309 | 0 | PUT_CC(3); |
1310 | 0 | PUT_CC(6); |
1311 | 0 | PUT_CC(7); |
1312 | 0 | #undef PUT_CC |
1313 | 0 | #undef PUT_xC |
1314 | 0 | } |
1315 | 0 |
|
1316 | 0 | usage[0] += idle_usage; |
1317 | 0 | res[0] = current_stime - idle_res; |
1318 | 0 |
|
1319 | 0 | if ( copy_to_guest(stat->triggers, usage, nr) || |
1320 | 0 | copy_to_guest(stat->residencies, res, nr) ) |
1321 | 0 | return -EFAULT; |
1322 | 0 |
|
1323 | 0 | stat->nr_pc = nr_pc; |
1324 | 0 | stat->nr_cc = nr_cc; |
1325 | 0 |
|
1326 | 0 | return 0; |
1327 | 0 | } |
1328 | | |
1329 | | int pmstat_reset_cx_stat(uint32_t cpuid) |
1330 | 0 | { |
1331 | 0 | return 0; |
1332 | 0 | } |
1333 | | |
1334 | | void cpuidle_disable_deep_cstate(void) |
1335 | 0 | { |
1336 | 0 | if ( max_cstate > 1 ) |
1337 | 0 | { |
1338 | 0 | if ( local_apic_timer_c2_ok ) |
1339 | 0 | max_cstate = 2; |
1340 | 0 | else |
1341 | 0 | max_cstate = 1; |
1342 | 0 | } |
1343 | 0 |
|
1344 | 0 | mb(); |
1345 | 0 |
|
1346 | 0 | hpet_disable_legacy_broadcast(); |
1347 | 0 | } |
1348 | | |
1349 | | bool cpuidle_using_deep_cstate(void) |
1350 | 1 | { |
1351 | 1 | return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1); |
1352 | 1 | } |
1353 | | |
1354 | | static int cpu_callback( |
1355 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
1356 | 0 | { |
1357 | 0 | unsigned int cpu = (unsigned long)hcpu; |
1358 | 0 |
|
1359 | 0 | /* Only hook on CPU_ONLINE because a dead cpu may utilize the info to |
1360 | 0 | * to enter deep C-state */ |
1361 | 0 | switch ( action ) |
1362 | 0 | { |
1363 | 0 | case CPU_ONLINE: |
1364 | 0 | (void)cpuidle_init_cpu(cpu); |
1365 | 0 | break; |
1366 | 0 | default: |
1367 | 0 | break; |
1368 | 0 | } |
1369 | 0 |
|
1370 | 0 | return NOTIFY_DONE; |
1371 | 0 | } |
1372 | | |
1373 | | static struct notifier_block cpu_nfb = { |
1374 | | .notifier_call = cpu_callback |
1375 | | }; |
1376 | | |
1377 | | static int __init cpuidle_presmp_init(void) |
1378 | 1 | { |
1379 | 1 | void *cpu = (void *)(long)smp_processor_id(); |
1380 | 1 | |
1381 | 1 | if ( !xen_cpuidle ) |
1382 | 0 | return 0; |
1383 | 1 | |
1384 | 1 | mwait_idle_init(&cpu_nfb); |
1385 | 1 | cpu_nfb.notifier_call(&cpu_nfb, CPU_ONLINE, cpu); |
1386 | 1 | register_cpu_notifier(&cpu_nfb); |
1387 | 1 | return 0; |
1388 | 1 | } |
1389 | | presmp_initcall(cpuidle_presmp_init); |
1390 | | |