Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/acpi/cpu_idle.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * cpu_idle - xen idle state module derived from Linux 
3
 *            drivers/acpi/processor_idle.c & 
4
 *            arch/x86/kernel/acpi/cstate.c
5
 *
6
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
7
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
8
 *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
9
 *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
10
 *                      - Added processor hotplug support
11
 *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
12
 *                      - Added support for C3 on SMP
13
 *  Copyright (C) 2007, 2008 Intel Corporation
14
 *
15
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16
 *
17
 *  This program is free software; you can redistribute it and/or modify
18
 *  it under the terms of the GNU General Public License as published by
19
 *  the Free Software Foundation; either version 2 of the License, or (at
20
 *  your option) any later version.
21
 *
22
 *  This program is distributed in the hope that it will be useful, but
23
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
24
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25
 *  General Public License for more details.
26
 *
27
 *  You should have received a copy of the GNU General Public License along
28
 *  with this program; If not, see <http://www.gnu.org/licenses/>.
29
 *
30
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31
 */
32
33
#include <xen/errno.h>
34
#include <xen/lib.h>
35
#include <xen/types.h>
36
#include <xen/acpi.h>
37
#include <xen/smp.h>
38
#include <xen/guest_access.h>
39
#include <xen/keyhandler.h>
40
#include <xen/trace.h>
41
#include <xen/sched-if.h>
42
#include <xen/irq.h>
43
#include <asm/cache.h>
44
#include <asm/io.h>
45
#include <asm/iocap.h>
46
#include <asm/hpet.h>
47
#include <asm/processor.h>
48
#include <xen/pmstat.h>
49
#include <xen/softirq.h>
50
#include <public/platform.h>
51
#include <public/sysctl.h>
52
#include <acpi/cpufreq/cpufreq.h>
53
#include <asm/apic.h>
54
#include <asm/cpuidle.h>
55
#include <asm/mwait.h>
56
#include <xen/notifier.h>
57
#include <xen/cpu.h>
58
59
/*#define DEBUG_PM_CX*/
60
61
#define GET_HW_RES_IN_NS(msr, val) \
62
0
    do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
63
0
#define GET_MC6_RES(val)  GET_HW_RES_IN_NS(0x664, val)
64
0
#define GET_PC2_RES(val)  GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */
65
0
#define GET_PC3_RES(val)  GET_HW_RES_IN_NS(0x3F8, val)
66
0
#define GET_PC6_RES(val)  GET_HW_RES_IN_NS(0x3F9, val)
67
0
#define GET_PC7_RES(val)  GET_HW_RES_IN_NS(0x3FA, val)
68
0
#define GET_PC8_RES(val)  GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */
69
0
#define GET_PC9_RES(val)  GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */
70
0
#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */
71
0
#define GET_CC1_RES(val)  GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */
72
0
#define GET_CC3_RES(val)  GET_HW_RES_IN_NS(0x3FC, val)
73
0
#define GET_CC6_RES(val)  GET_HW_RES_IN_NS(0x3FD, val)
74
0
#define GET_CC7_RES(val)  GET_HW_RES_IN_NS(0x3FE, val) /* SNB onwards */
75
0
#define PHI_CC6_RES(val)  GET_HW_RES_IN_NS(0x3FF, val) /* Xeon Phi only */
76
77
0
static void lapic_timer_nop(void) { }
78
void (*__read_mostly lapic_timer_off)(void);
79
void (*__read_mostly lapic_timer_on)(void);
80
81
bool lapic_timer_init(void)
82
0
{
83
0
    if ( boot_cpu_has(X86_FEATURE_ARAT) )
84
0
    {
85
0
        lapic_timer_off = lapic_timer_nop;
86
0
        lapic_timer_on = lapic_timer_nop;
87
0
    }
88
0
    else if ( hpet_broadcast_is_available() )
89
0
    {
90
0
        lapic_timer_off = hpet_broadcast_enter;
91
0
        lapic_timer_on = hpet_broadcast_exit;
92
0
    }
93
0
    else if ( pit_broadcast_is_available() )
94
0
    {
95
0
        lapic_timer_off = pit_broadcast_enter;
96
0
        lapic_timer_on = pit_broadcast_exit;
97
0
    }
98
0
    else
99
0
        return false;
100
0
101
0
    return true;
102
0
}
103
104
static uint64_t (*__read_mostly tick_to_ns)(uint64_t) = acpi_pm_tick_to_ns;
105
106
void (*__read_mostly pm_idle_save)(void);
107
unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER - 1;
108
integer_param("max_cstate", max_cstate);
109
static bool __read_mostly local_apic_timer_c2_ok;
110
boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
111
112
struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS];
113
114
struct hw_residencies
115
{
116
    uint64_t mc0;
117
    uint64_t mc6;
118
    uint64_t pc2;
119
    uint64_t pc3;
120
    uint64_t pc4;
121
    uint64_t pc6;
122
    uint64_t pc7;
123
    uint64_t pc8;
124
    uint64_t pc9;
125
    uint64_t pc10;
126
    uint64_t cc1;
127
    uint64_t cc3;
128
    uint64_t cc6;
129
    uint64_t cc7;
130
};
131
132
static void do_get_hw_residencies(void *arg)
133
0
{
134
0
    struct cpuinfo_x86 *c = &current_cpu_data;
135
0
    struct hw_residencies *hw_res = arg;
136
0
137
0
    if ( c->x86_vendor != X86_VENDOR_INTEL || c->x86 != 6 )
138
0
        return;
139
0
140
0
    switch ( c->x86_model )
141
0
    {
142
0
    /* 4th generation Intel Core (Haswell) */
143
0
    case 0x45:
144
0
        GET_PC8_RES(hw_res->pc8);
145
0
        GET_PC9_RES(hw_res->pc9);
146
0
        GET_PC10_RES(hw_res->pc10);
147
0
        /* fall through */
148
0
    /* Sandy bridge */
149
0
    case 0x2A:
150
0
    case 0x2D:
151
0
    /* Ivy bridge */
152
0
    case 0x3A:
153
0
    case 0x3E:
154
0
    /* Haswell */
155
0
    case 0x3C:
156
0
    case 0x3F:
157
0
    case 0x46:
158
0
    /* Broadwell */
159
0
    case 0x3D:
160
0
    case 0x47:
161
0
    case 0x4F:
162
0
    case 0x56:
163
0
    /* Skylake */
164
0
    case 0x4E:
165
0
    case 0x55:
166
0
    case 0x5E:
167
0
    /* Cannon Lake */
168
0
    case 0x66:
169
0
    /* Kaby Lake */
170
0
    case 0x8E:
171
0
    case 0x9E:
172
0
        GET_PC2_RES(hw_res->pc2);
173
0
        GET_CC7_RES(hw_res->cc7);
174
0
        /* fall through */
175
0
    /* Nehalem */
176
0
    case 0x1A:
177
0
    case 0x1E:
178
0
    case 0x1F:
179
0
    case 0x2E:
180
0
    /* Westmere */
181
0
    case 0x25:
182
0
    case 0x2C:
183
0
    case 0x2F:
184
0
        GET_PC3_RES(hw_res->pc3);
185
0
        GET_PC6_RES(hw_res->pc6);
186
0
        GET_PC7_RES(hw_res->pc7);
187
0
        GET_CC3_RES(hw_res->cc3);
188
0
        GET_CC6_RES(hw_res->cc6);
189
0
        break;
190
0
    /* Xeon Phi Knights Landing */
191
0
    case 0x57:
192
0
    /* Xeon Phi Knights Mill */
193
0
    case 0x85:
194
0
        GET_CC3_RES(hw_res->mc0); /* abusing GET_CC3_RES */
195
0
        GET_CC6_RES(hw_res->mc6); /* abusing GET_CC6_RES */
196
0
        GET_PC2_RES(hw_res->pc2);
197
0
        GET_PC3_RES(hw_res->pc3);
198
0
        GET_PC6_RES(hw_res->pc6);
199
0
        GET_PC7_RES(hw_res->pc7);
200
0
        PHI_CC6_RES(hw_res->cc6);
201
0
        break;
202
0
    /* various Atoms */
203
0
    case 0x27:
204
0
        GET_PC3_RES(hw_res->pc2); /* abusing GET_PC3_RES */
205
0
        GET_PC6_RES(hw_res->pc4); /* abusing GET_PC6_RES */
206
0
        GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
207
0
        break;
208
0
    /* Silvermont */
209
0
    case 0x37:
210
0
    case 0x4A:
211
0
    case 0x4D:
212
0
    case 0x5A:
213
0
    case 0x5D:
214
0
    /* Airmont */
215
0
    case 0x4C:
216
0
        GET_MC6_RES(hw_res->mc6);
217
0
        GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
218
0
        GET_CC1_RES(hw_res->cc1);
219
0
        GET_CC6_RES(hw_res->cc6);
220
0
        break;
221
0
    /* Goldmont */
222
0
    case 0x5C:
223
0
    case 0x5F:
224
0
    /* Goldmont Plus */
225
0
    case 0x7A:
226
0
        GET_PC2_RES(hw_res->pc2);
227
0
        GET_PC3_RES(hw_res->pc3);
228
0
        GET_PC6_RES(hw_res->pc6);
229
0
        GET_PC10_RES(hw_res->pc10);
230
0
        GET_CC1_RES(hw_res->cc1);
231
0
        GET_CC3_RES(hw_res->cc3);
232
0
        GET_CC6_RES(hw_res->cc6);
233
0
        break;
234
0
    }
235
0
}
236
237
static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res)
238
0
{
239
0
    memset(hw_res, 0, sizeof(*hw_res));
240
0
241
0
    if ( smp_processor_id() == cpu )
242
0
        do_get_hw_residencies(hw_res);
243
0
    else
244
0
        on_selected_cpus(cpumask_of(cpu), do_get_hw_residencies, hw_res, 1);
245
0
}
246
247
static void print_hw_residencies(uint32_t cpu)
248
0
{
249
0
    struct hw_residencies hw_res;
250
0
251
0
    get_hw_residencies(cpu, &hw_res);
252
0
253
0
    if ( hw_res.mc0 | hw_res.mc6 )
254
0
        printk("MC0[%"PRIu64"] MC6[%"PRIu64"]\n",
255
0
               hw_res.mc0, hw_res.mc6);
256
0
    printk("PC2[%"PRIu64"] PC%d[%"PRIu64"] PC6[%"PRIu64"] PC7[%"PRIu64"]\n",
257
0
           hw_res.pc2,
258
0
           hw_res.pc4 ? 4 : 3, hw_res.pc4 ?: hw_res.pc3,
259
0
           hw_res.pc6, hw_res.pc7);
260
0
    if ( hw_res.pc8 | hw_res.pc9 | hw_res.pc10 )
261
0
        printk("PC8[%"PRIu64"] PC9[%"PRIu64"] PC10[%"PRIu64"]\n",
262
0
               hw_res.pc8, hw_res.pc9, hw_res.pc10);
263
0
    printk("CC%d[%"PRIu64"] CC6[%"PRIu64"] CC7[%"PRIu64"]\n",
264
0
           hw_res.cc1 ? 1 : 3, hw_res.cc1 ?: hw_res.cc3,
265
0
           hw_res.cc6, hw_res.cc7);
266
0
}
267
268
static char* acpi_cstate_method_name[] =
269
{
270
    "NONE",
271
    "SYSIO",
272
    "FFH",
273
    "HALT"
274
};
275
276
3.43M
static uint64_t get_stime_tick(void) { return (uint64_t)NOW(); }
277
1.56M
static uint64_t stime_ticks_elapsed(uint64_t t1, uint64_t t2) { return t2 - t1; }
278
1.75M
static uint64_t stime_tick_to_ns(uint64_t ticks) { return ticks; }
279
280
0
static uint64_t get_acpi_pm_tick(void) { return (uint64_t)inl(pmtmr_ioport); }
281
static uint64_t acpi_pm_ticks_elapsed(uint64_t t1, uint64_t t2)
282
0
{
283
0
    if ( t2 >= t1 )
284
0
        return (t2 - t1);
285
0
    else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) )
286
0
        return (((0x00FFFFFF - t1) + t2 + 1) & 0x00FFFFFF);
287
0
    else
288
0
        return ((0xFFFFFFFF - t1) + t2 +1);
289
0
}
290
291
uint64_t (*__read_mostly cpuidle_get_tick)(void) = get_acpi_pm_tick;
292
static uint64_t (*__read_mostly ticks_elapsed)(uint64_t, uint64_t)
293
    = acpi_pm_ticks_elapsed;
294
295
static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
296
0
{
297
0
    uint64_t idle_res = 0, idle_usage = 0;
298
0
    uint64_t last_state_update_tick, current_tick, current_stime;
299
0
    uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 };
300
0
    uint64_t res_tick[ACPI_PROCESSOR_MAX_POWER] = { 0 };
301
0
    unsigned int i;
302
0
    signed int last_state_idx;
303
0
304
0
    printk("==cpu%d==\n", cpu);
305
0
    last_state_idx = power->last_state ? power->last_state->idx : -1;
306
0
    printk("active state:\t\tC%d\n", last_state_idx);
307
0
    printk("max_cstate:\t\tC%d\n", max_cstate);
308
0
    printk("states:\n");
309
0
310
0
    spin_lock_irq(&power->stat_lock);
311
0
    current_tick = cpuidle_get_tick();
312
0
    current_stime = NOW();
313
0
    for ( i = 1; i < power->count; i++ )
314
0
    {
315
0
        res_tick[i] = power->states[i].time;
316
0
        usage[i] = power->states[i].usage;
317
0
    }
318
0
    last_state_update_tick = power->last_state_update_tick;
319
0
    spin_unlock_irq(&power->stat_lock);
320
0
321
0
    if ( last_state_idx >= 0 )
322
0
    {
323
0
        res_tick[last_state_idx] += ticks_elapsed(last_state_update_tick,
324
0
                                                  current_tick);
325
0
        usage[last_state_idx]++;
326
0
    }
327
0
328
0
    for ( i = 1; i < power->count; i++ )
329
0
    {
330
0
        idle_usage += usage[i];
331
0
        idle_res += tick_to_ns(res_tick[i]);
332
0
333
0
        printk((last_state_idx == i) ? "   *" : "    ");
334
0
        printk("C%d:\t", i);
335
0
        printk("type[C%d] ", power->states[i].type);
336
0
        printk("latency[%03d] ", power->states[i].latency);
337
0
        printk("usage[%08"PRIu64"] ", usage[i]);
338
0
        printk("method[%5s] ", acpi_cstate_method_name[power->states[i].entry_method]);
339
0
        printk("duration[%"PRIu64"]\n", tick_to_ns(res_tick[i]));
340
0
    }
341
0
    printk((last_state_idx == 0) ? "   *" : "    ");
342
0
    printk("C0:\tusage[%08"PRIu64"] duration[%"PRIu64"]\n",
343
0
           usage[0] + idle_usage, current_stime - idle_res);
344
0
345
0
    print_hw_residencies(cpu);
346
0
}
347
348
static void dump_cx(unsigned char key)
349
0
{
350
0
    unsigned int cpu;
351
0
352
0
    printk("'%c' pressed -> printing ACPI Cx structures\n", key);
353
0
    for_each_online_cpu ( cpu )
354
0
        if (processor_powers[cpu])
355
0
            print_acpi_power(cpu, processor_powers[cpu]);
356
0
}
357
358
static int __init cpu_idle_key_init(void)
359
1
{
360
1
    register_keyhandler('c', dump_cx, "dump ACPI Cx structures", 1);
361
1
    return 0;
362
1
}
363
__initcall(cpu_idle_key_init);
364
365
/*
366
 * The bit is set iff cpu use monitor/mwait to enter C state
367
 * with this flag set, CPU can be waken up from C state
368
 * by writing to specific memory address, instead of sending an IPI.
369
 */
370
static cpumask_t cpuidle_mwait_flags;
371
372
void cpuidle_wakeup_mwait(cpumask_t *mask)
373
0
{
374
0
    cpumask_t target;
375
0
    unsigned int cpu;
376
0
377
0
    cpumask_and(&target, mask, &cpuidle_mwait_flags);
378
0
379
0
    /* CPU is MWAITing on the cpuidle_mwait_wakeup flag. */
380
0
    for_each_cpu(cpu, &target)
381
0
        mwait_wakeup(cpu) = 0;
382
0
383
0
    cpumask_andnot(mask, mask, &target);
384
0
}
385
386
bool arch_skip_send_event_check(unsigned int cpu)
387
98.6k
{
388
98.6k
    /*
389
98.6k
     * This relies on softirq_pending() and mwait_wakeup() to access data
390
98.6k
     * on the same cache line.
391
98.6k
     */
392
98.6k
    smp_mb();
393
98.6k
    return !!cpumask_test_cpu(cpu, &cpuidle_mwait_flags);
394
98.6k
}
395
396
void mwait_idle_with_hints(unsigned int eax, unsigned int ecx)
397
1.94M
{
398
1.94M
    unsigned int cpu = smp_processor_id();
399
1.94M
    s_time_t expires = per_cpu(timer_deadline, cpu);
400
1.94M
401
1.94M
    if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) )
402
0
    {
403
0
        mb();
404
0
        clflush((void *)&mwait_wakeup(cpu));
405
0
        mb();
406
0
    }
407
1.94M
408
1.94M
    __monitor((void *)&mwait_wakeup(cpu), 0, 0);
409
1.94M
    smp_mb();
410
1.94M
411
1.94M
    /*
412
1.94M
     * Timer deadline passing is the event on which we will be woken via
413
1.94M
     * cpuidle_mwait_wakeup. So check it now that the location is armed.
414
1.94M
     */
415
1.94M
    if ( (expires > NOW() || expires == 0) && !softirq_pending(cpu) )
416
1.87M
    {
417
1.87M
        cpumask_set_cpu(cpu, &cpuidle_mwait_flags);
418
1.87M
        __mwait(eax, ecx);
419
1.87M
        cpumask_clear_cpu(cpu, &cpuidle_mwait_flags);
420
1.87M
    }
421
1.94M
422
1.94M
    if ( expires <= NOW() && expires > 0 )
423
8.46k
        raise_softirq(TIMER_SOFTIRQ);
424
1.94M
}
425
426
static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
427
0
{
428
0
    mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK);
429
0
}
430
431
static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
432
0
{
433
0
    switch ( cx->entry_method )
434
0
    {
435
0
    case ACPI_CSTATE_EM_FFH:
436
0
        /* Call into architectural FFH based C-state */
437
0
        acpi_processor_ffh_cstate_enter(cx);
438
0
        return;
439
0
    case ACPI_CSTATE_EM_SYSIO:
440
0
        /* IO port based C-state */
441
0
        inb(cx->address);
442
0
        /* Dummy wait op - must do something useless after P_LVL2 read
443
0
           because chipsets cannot guarantee that STPCLK# signal
444
0
           gets asserted in time to freeze execution properly. */
445
0
        inl(pmtmr_ioport);
446
0
        return;
447
0
    case ACPI_CSTATE_EM_HALT:
448
0
        safe_halt();
449
0
        local_irq_disable();
450
0
        return;
451
0
    }
452
0
}
453
454
static int acpi_idle_bm_check(void)
455
0
{
456
0
    u32 bm_status = 0;
457
0
458
0
    acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
459
0
    if ( bm_status )
460
0
        acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
461
0
    /*
462
0
     * TBD: PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
463
0
     * the true state of bus mastering activity; forcing us to
464
0
     * manually check the BMIDEA bit of each IDE channel.
465
0
     */
466
0
    return bm_status;
467
0
}
468
469
static struct {
470
    spinlock_t lock;
471
    unsigned int count;
472
} c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED };
473
474
void trace_exit_reason(u32 *irq_traced)
475
1.51M
{
476
1.51M
    if ( unlikely(tb_init_done) )
477
0
    {
478
0
        int i, curbit;
479
0
        u32 irr_status[8] = { 0 };
480
0
481
0
        /* Get local apic IRR register */
482
0
        for ( i = 0; i < 8; i++ )
483
0
            irr_status[i] = apic_read(APIC_IRR + (i << 4));
484
0
        i = 0;
485
0
        curbit = find_first_bit((const unsigned long *)irr_status, 256);
486
0
        while ( i < 4 && curbit < 256 )
487
0
        {
488
0
            irq_traced[i++] = curbit;
489
0
            curbit = find_next_bit((const unsigned long *)irr_status, 256, curbit + 1);
490
0
        }
491
0
    }
492
1.51M
}
493
494
/*
495
 * "AAJ72. EOI Transaction May Not be Sent if Software Enters Core C6 During 
496
 * an Interrupt Service Routine"
497
 * 
498
 * There was an errata with some Core i7 processors that an EOI transaction 
499
 * may not be sent if software enters core C6 during an interrupt service 
500
 * routine. So we don't enter deep Cx state if there is an EOI pending.
501
 */
502
static bool errata_c6_eoi_workaround(void)
503
0
{
504
0
    static int8_t fix_needed = -1;
505
0
506
0
    if ( unlikely(fix_needed == -1) )
507
0
    {
508
0
        int model = boot_cpu_data.x86_model;
509
0
        fix_needed = (cpu_has_apic && !directed_eoi_enabled &&
510
0
                      (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
511
0
                      (boot_cpu_data.x86 == 6) &&
512
0
                      ((model == 0x1a) || (model == 0x1e) || (model == 0x1f) ||
513
0
                       (model == 0x25) || (model == 0x2c) || (model == 0x2f)));
514
0
    }
515
0
516
0
    return (fix_needed && cpu_has_pending_apic_eoi());
517
0
}
518
519
void update_last_cx_stat(struct acpi_processor_power *power,
520
                         struct acpi_processor_cx *cx, uint64_t ticks)
521
1.89M
{
522
1.89M
    ASSERT(!local_irq_is_enabled());
523
1.89M
524
1.89M
    spin_lock(&power->stat_lock);
525
1.89M
    power->last_state = cx;
526
1.89M
    power->last_state_update_tick = ticks;
527
1.89M
    spin_unlock(&power->stat_lock);
528
1.89M
}
529
530
void update_idle_stats(struct acpi_processor_power *power,
531
                       struct acpi_processor_cx *cx,
532
                       uint64_t before, uint64_t after)
533
1.57M
{
534
1.57M
    int64_t sleep_ticks = ticks_elapsed(before, after);
535
1.57M
    /* Interrupts are disabled */
536
1.57M
537
1.57M
    spin_lock(&power->stat_lock);
538
1.57M
539
1.57M
    cx->usage++;
540
1.57M
    if ( sleep_ticks > 0 )
541
1.78M
    {
542
1.78M
        power->last_residency = tick_to_ns(sleep_ticks) / 1000UL;
543
1.78M
        cx->time += sleep_ticks;
544
1.78M
    }
545
1.57M
    power->last_state = &power->states[0];
546
1.57M
    power->last_state_update_tick = after;
547
1.57M
548
1.57M
    spin_unlock(&power->stat_lock);
549
1.57M
}
550
551
static void acpi_processor_idle(void)
552
0
{
553
0
    struct acpi_processor_power *power = processor_powers[smp_processor_id()];
554
0
    struct acpi_processor_cx *cx = NULL;
555
0
    int next_state;
556
0
    uint64_t t1, t2 = 0;
557
0
    u32 exp = 0, pred = 0;
558
0
    u32 irq_traced[4] = { 0 };
559
0
560
0
    if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
561
0
         (next_state = cpuidle_current_governor->select(power)) > 0 )
562
0
    {
563
0
        cx = &power->states[next_state];
564
0
        if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check &&
565
0
             acpi_idle_bm_check() )
566
0
            cx = power->safe_state;
567
0
        if ( cx->idx > max_cstate )
568
0
            cx = &power->states[max_cstate];
569
0
        menu_get_trace_data(&exp, &pred);
570
0
    }
571
0
    if ( !cx )
572
0
    {
573
0
        if ( pm_idle_save )
574
0
            pm_idle_save();
575
0
        else
576
0
            safe_halt();
577
0
        return;
578
0
    }
579
0
580
0
    cpufreq_dbs_timer_suspend();
581
0
582
0
    sched_tick_suspend();
583
0
    /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
584
0
    process_pending_softirqs();
585
0
586
0
    /*
587
0
     * Interrupts must be disabled during bus mastering calculations and
588
0
     * for C2/C3 transitions.
589
0
     */
590
0
    local_irq_disable();
591
0
592
0
    if ( !cpu_is_haltable(smp_processor_id()) )
593
0
    {
594
0
        local_irq_enable();
595
0
        sched_tick_resume();
596
0
        cpufreq_dbs_timer_resume();
597
0
        return;
598
0
    }
599
0
600
0
    if ( (cx->type == ACPI_STATE_C3) && errata_c6_eoi_workaround() )
601
0
        cx = power->safe_state;
602
0
603
0
604
0
    /*
605
0
     * Sleep:
606
0
     * ------
607
0
     * Invoke the current Cx state to put the processor to sleep.
608
0
     */
609
0
    switch ( cx->type )
610
0
    {
611
0
    case ACPI_STATE_C1:
612
0
    case ACPI_STATE_C2:
613
0
        if ( cx->type == ACPI_STATE_C1 || local_apic_timer_c2_ok )
614
0
        {
615
0
            /* Get start time (ticks) */
616
0
            t1 = cpuidle_get_tick();
617
0
            /* Trace cpu idle entry */
618
0
            TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, t1, exp, pred);
619
0
620
0
            update_last_cx_stat(power, cx, t1);
621
0
622
0
            /* Invoke C2 */
623
0
            acpi_idle_do_entry(cx);
624
0
            /* Get end time (ticks) */
625
0
            t2 = cpuidle_get_tick();
626
0
            trace_exit_reason(irq_traced);
627
0
            /* Trace cpu idle exit */
628
0
            TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, t2,
629
0
                     irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]);
630
0
            /* Update statistics */
631
0
            update_idle_stats(power, cx, t1, t2);
632
0
            /* Re-enable interrupts */
633
0
            local_irq_enable();
634
0
            break;
635
0
        }
636
0
637
0
    case ACPI_STATE_C3:
638
0
        /*
639
0
         * Before invoking C3, be aware that TSC/APIC timer may be 
640
0
         * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
641
0
         * deep C state can't work correctly.
642
0
         */
643
0
        /* preparing APIC stop */
644
0
        lapic_timer_off();
645
0
646
0
        /* Get start time (ticks) */
647
0
        t1 = cpuidle_get_tick();
648
0
        /* Trace cpu idle entry */
649
0
        TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, t1, exp, pred);
650
0
651
0
        update_last_cx_stat(power, cx, t1);
652
0
653
0
        /*
654
0
         * disable bus master
655
0
         * bm_check implies we need ARB_DIS
656
0
         * !bm_check implies we need cache flush
657
0
         * bm_control implies whether we can do ARB_DIS
658
0
         *
659
0
         * That leaves a case where bm_check is set and bm_control is
660
0
         * not set. In that case we cannot do much, we enter C3
661
0
         * without doing anything.
662
0
         */
663
0
        if ( cx->type != ACPI_STATE_C3 )
664
0
            /* nothing to be done here */;
665
0
        else if ( power->flags.bm_check && power->flags.bm_control )
666
0
        {
667
0
            spin_lock(&c3_cpu_status.lock);
668
0
            if ( ++c3_cpu_status.count == num_online_cpus() )
669
0
            {
670
0
                /*
671
0
                 * All CPUs are trying to go to C3
672
0
                 * Disable bus master arbitration
673
0
                 */
674
0
                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
675
0
            }
676
0
            spin_unlock(&c3_cpu_status.lock);
677
0
        }
678
0
        else if ( !power->flags.bm_check )
679
0
        {
680
0
            /* SMP with no shared cache... Invalidate cache  */
681
0
            ACPI_FLUSH_CPU_CACHE();
682
0
        }
683
0
684
0
        /* Invoke C3 */
685
0
        acpi_idle_do_entry(cx);
686
0
687
0
        if ( (cx->type == ACPI_STATE_C3) &&
688
0
             power->flags.bm_check && power->flags.bm_control )
689
0
        {
690
0
            /* Enable bus master arbitration */
691
0
            spin_lock(&c3_cpu_status.lock);
692
0
            if ( c3_cpu_status.count-- == num_online_cpus() )
693
0
                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
694
0
            spin_unlock(&c3_cpu_status.lock);
695
0
        }
696
0
697
0
        /* Get end time (ticks) */
698
0
        t2 = cpuidle_get_tick();
699
0
700
0
        /* recovering TSC */
701
0
        cstate_restore_tsc();
702
0
        trace_exit_reason(irq_traced);
703
0
        /* Trace cpu idle exit */
704
0
        TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, t2,
705
0
                 irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]);
706
0
707
0
        /* Update statistics */
708
0
        update_idle_stats(power, cx, t1, t2);
709
0
        /* Re-enable interrupts */
710
0
        local_irq_enable();
711
0
        /* recovering APIC */
712
0
        lapic_timer_on();
713
0
714
0
        break;
715
0
716
0
    default:
717
0
        /* Now in C0 */
718
0
        power->last_state = &power->states[0];
719
0
        local_irq_enable();
720
0
        sched_tick_resume();
721
0
        cpufreq_dbs_timer_resume();
722
0
        return;
723
0
    }
724
0
725
0
    /* Now in C0 */
726
0
    power->last_state = &power->states[0];
727
0
728
0
    sched_tick_resume();
729
0
    cpufreq_dbs_timer_resume();
730
0
731
0
    if ( cpuidle_current_governor->reflect )
732
0
        cpuidle_current_governor->reflect(power);
733
0
}
734
735
void acpi_dead_idle(void)
736
0
{
737
0
    struct acpi_processor_power *power;
738
0
    struct acpi_processor_cx *cx;
739
0
740
0
    if ( (power = processor_powers[smp_processor_id()]) == NULL )
741
0
        goto default_halt;
742
0
743
0
    if ( (cx = &power->states[power->count-1]) == NULL )
744
0
        goto default_halt;
745
0
746
0
    if ( cx->entry_method == ACPI_CSTATE_EM_FFH )
747
0
    {
748
0
        void *mwait_ptr = &mwait_wakeup(smp_processor_id());
749
0
750
0
        /*
751
0
         * Cache must be flushed as the last operation before sleeping.
752
0
         * Otherwise, CPU may still hold dirty data, breaking cache coherency,
753
0
         * leading to strange errors.
754
0
         */
755
0
        wbinvd();
756
0
757
0
        while ( 1 )
758
0
        {
759
0
            /*
760
0
             * 1. The CLFLUSH is a workaround for erratum AAI65 for
761
0
             * the Xeon 7400 series.  
762
0
             * 2. The WBINVD is insufficient due to the spurious-wakeup
763
0
             * case where we return around the loop.
764
0
             * 3. Unlike wbinvd, clflush is a light weight but not serializing 
765
0
             * instruction, hence memory fence is necessary to make sure all 
766
0
             * load/store visible before flush cache line.
767
0
             */
768
0
            mb();
769
0
            clflush(mwait_ptr);
770
0
            __monitor(mwait_ptr, 0, 0);
771
0
            mb();
772
0
            __mwait(cx->address, 0);
773
0
        }
774
0
    }
775
0
    else if ( current_cpu_data.x86_vendor == X86_VENDOR_AMD &&
776
0
              cx->entry_method == ACPI_CSTATE_EM_SYSIO )
777
0
    {
778
0
        /* Intel prefers not to use SYSIO */
779
0
780
0
        /* Avoid references to shared data after the cache flush */
781
0
        u32 address = cx->address;
782
0
        u32 pmtmr_ioport_local = pmtmr_ioport;
783
0
784
0
        wbinvd();
785
0
786
0
        while ( 1 )
787
0
        {
788
0
            inb(address);
789
0
            inl(pmtmr_ioport_local);
790
0
        }
791
0
    }
792
0
793
0
default_halt:
794
0
    default_dead_idle();
795
0
}
796
797
int cpuidle_init_cpu(unsigned int cpu)
798
12
{
799
12
    struct acpi_processor_power *acpi_power;
800
12
801
12
    acpi_power = processor_powers[cpu];
802
12
    if ( !acpi_power )
803
12
    {
804
12
        unsigned int i;
805
12
806
12
        if ( cpu == 0 && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
807
1
        {
808
1
            cpuidle_get_tick = get_stime_tick;
809
1
            ticks_elapsed = stime_ticks_elapsed;
810
1
            tick_to_ns = stime_tick_to_ns;
811
1
        }
812
12
813
12
        acpi_power = xzalloc(struct acpi_processor_power);
814
12
        if ( !acpi_power )
815
0
            return -ENOMEM;
816
12
817
108
        for ( i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++ )
818
96
            acpi_power->states[i].idx = i;
819
12
820
12
        acpi_power->cpu = cpu;
821
12
        processor_powers[cpu] = acpi_power;
822
12
    }
823
12
824
12
    acpi_power->count = 2;
825
12
    acpi_power->states[1].type = ACPI_STATE_C1;
826
12
    acpi_power->states[1].entry_method = ACPI_CSTATE_EM_HALT;
827
12
    acpi_power->safe_state = &acpi_power->states[1];
828
12
    spin_lock_init(&acpi_power->stat_lock);
829
12
830
12
    return 0;
831
12
}
832
833
static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx)
834
0
{
835
0
    struct cpuinfo_x86 *c = &current_cpu_data;
836
0
    unsigned int eax, ebx, ecx, edx;
837
0
    unsigned int edx_part;
838
0
    unsigned int cstate_type; /* C-state type and not ACPI C-state type */
839
0
    unsigned int num_cstate_subtype;
840
0
    int ret = 0;
841
0
    static unsigned long printed;
842
0
843
0
    if ( c->cpuid_level < CPUID_MWAIT_LEAF )
844
0
    {
845
0
        printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n");
846
0
        return -EFAULT;
847
0
    }
848
0
849
0
    cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
850
0
    if ( opt_cpu_info )
851
0
        printk(XENLOG_DEBUG "cpuid.MWAIT[eax=%x ebx=%x ecx=%x edx=%x]\n",
852
0
               eax, ebx, ecx, edx);
853
0
854
0
    /* Check whether this particular cx_type (in CST) is supported or not */
855
0
    cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
856
0
    edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
857
0
    num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
858
0
859
0
    if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
860
0
        ret = -ERANGE;
861
0
    /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
862
0
    else if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
863
0
              !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
864
0
        ret = -ENODEV;
865
0
    else if ( opt_cpu_info || cx->type >= BITS_PER_LONG ||
866
0
              !test_and_set_bit(cx->type, &printed) )
867
0
        printk(XENLOG_INFO "Monitor-Mwait will be used to enter C%d state\n",
868
0
               cx->type);
869
0
    return ret;
870
0
}
871
872
/*
873
 * Initialize bm_flags based on the CPU cache properties
874
 * On SMP it depends on cache configuration
875
 * - When cache is not shared among all CPUs, we flush cache
876
 *   before entering C3.
877
 * - When cache is shared among all CPUs, we use bm_check
878
 *   mechanism as in UP case
879
 *
880
 * This routine is called only after all the CPUs are online
881
 */
882
static void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags)
883
0
{
884
0
    struct cpuinfo_x86 *c = &current_cpu_data;
885
0
886
0
    flags->bm_check = 0;
887
0
    if ( num_online_cpus() == 1 )
888
0
        flags->bm_check = 1;
889
0
    else if ( (c->x86_vendor == X86_VENDOR_INTEL) ||
890
0
              ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 0x15)) )
891
0
    {
892
0
        /*
893
0
         * Today all MP CPUs that support C3 share cache.
894
0
         * And caches should not be flushed by software while
895
0
         * entering C3 type state.
896
0
         */
897
0
        flags->bm_check = 1;
898
0
    }
899
0
900
0
    /*
901
0
     * On all recent platforms, ARB_DISABLE is a nop.
902
0
     * So, set bm_control to zero to indicate that ARB_DISABLE
903
0
     * is not required while entering C3 type state on
904
0
     * P4, Core and beyond CPUs
905
0
     */
906
0
    if ( c->x86_vendor == X86_VENDOR_INTEL &&
907
0
        (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)) )
908
0
            flags->bm_control = 0;
909
0
}
910
911
0
#define VENDOR_INTEL                   (1)
912
0
#define NATIVE_CSTATE_BEYOND_HALT      (2)
913
914
static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx)
915
0
{
916
0
    static int bm_check_flag = -1;
917
0
    static int bm_control_flag = -1;
918
0
919
0
    switch ( cx->reg.space_id )
920
0
    {
921
0
    case ACPI_ADR_SPACE_SYSTEM_IO:
922
0
        if ( cx->reg.address == 0 )
923
0
            return -EINVAL;
924
0
        break;
925
0
926
0
    case ACPI_ADR_SPACE_FIXED_HARDWARE:
927
0
        if ( cx->reg.bit_width != VENDOR_INTEL || 
928
0
             cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
929
0
            return -EINVAL;
930
0
931
0
        /* assume all logical cpu has the same support for mwait */
932
0
        if ( acpi_processor_ffh_cstate_probe(cx) )
933
0
            return -EINVAL;
934
0
        break;
935
0
936
0
    default:
937
0
        return -ENODEV;
938
0
    }
939
0
940
0
    switch ( cx->type )
941
0
    {
942
0
    case ACPI_STATE_C2:
943
0
        if ( local_apic_timer_c2_ok )
944
0
            break;
945
0
    case ACPI_STATE_C3:
946
0
        if ( !lapic_timer_init() )
947
0
            return -EINVAL;
948
0
949
0
        /* All the logic here assumes flags.bm_check is same across all CPUs */
950
0
        if ( bm_check_flag < 0 )
951
0
        {
952
0
            /* Determine whether bm_check is needed based on CPU  */
953
0
            acpi_processor_power_init_bm_check(&(power->flags));
954
0
        }
955
0
        else
956
0
        {
957
0
            power->flags.bm_check = bm_check_flag;
958
0
            power->flags.bm_control = bm_control_flag;
959
0
        }
960
0
961
0
        if ( power->flags.bm_check )
962
0
        {
963
0
            if ( !power->flags.bm_control )
964
0
            {
965
0
                if ( power->flags.has_cst != 1 )
966
0
                {
967
0
                    /* bus mastering control is necessary */
968
0
                    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
969
0
                        "C3 support requires BM control\n"));
970
0
                    return -EINVAL;
971
0
                }
972
0
                else
973
0
                {
974
0
                    /* Here we enter C3 without bus mastering */
975
0
                    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
976
0
                        "C3 support without BM control\n"));
977
0
                }
978
0
            }
979
0
            /*
980
0
             * On older chipsets, BM_RLD needs to be set in order for Bus
981
0
             * Master activity to wake the system from C3, hence
982
0
             * acpi_set_register() is always being called once below.  Newer
983
0
             * chipsets handle DMA during C3 automatically and BM_RLD is a
984
0
             * NOP.  In either case, the proper way to handle BM_RLD is to
985
0
             * set it and leave it set.
986
0
             */
987
0
        }
988
0
        else
989
0
        {
990
0
            /*
991
0
             * WBINVD should be set in fadt, for C3 state to be
992
0
             * supported on when bm_check is not required.
993
0
             */
994
0
            if ( !(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD) )
995
0
            {
996
0
                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
997
0
                          "Cache invalidation should work properly"
998
0
                          " for C3 to be enabled on SMP systems\n"));
999
0
                return -EINVAL;
1000
0
            }
1001
0
        }
1002
0
1003
0
        if ( bm_check_flag < 0 )
1004
0
        {
1005
0
            bm_check_flag = power->flags.bm_check;
1006
0
            bm_control_flag = power->flags.bm_control;
1007
0
            acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, bm_check_flag);
1008
0
        }
1009
0
1010
0
        break;
1011
0
    }
1012
0
1013
0
    return 0;
1014
0
}
1015
1016
static unsigned int latency_factor = 2;
1017
integer_param("idle_latency_factor", latency_factor);
1018
1019
static void set_cx(
1020
    struct acpi_processor_power *acpi_power,
1021
    xen_processor_cx_t *xen_cx)
1022
0
{
1023
0
    struct acpi_processor_cx *cx;
1024
0
1025
0
    if ( check_cx(acpi_power, xen_cx) != 0 )
1026
0
        return;
1027
0
1028
0
    switch ( xen_cx->type )
1029
0
    {
1030
0
    case ACPI_STATE_C1:
1031
0
        cx = &acpi_power->states[1];
1032
0
        break;
1033
0
    default:
1034
0
        if ( acpi_power->count >= ACPI_PROCESSOR_MAX_POWER )
1035
0
        {
1036
0
    case ACPI_STATE_C0:
1037
0
            printk(XENLOG_WARNING "CPU%u: C%d data ignored\n",
1038
0
                   acpi_power->cpu, xen_cx->type);
1039
0
            return;
1040
0
        }
1041
0
        cx = &acpi_power->states[acpi_power->count];
1042
0
        cx->type = xen_cx->type;
1043
0
        break;
1044
0
    }
1045
0
1046
0
    cx->address = xen_cx->reg.address;
1047
0
1048
0
    switch ( xen_cx->reg.space_id )
1049
0
    {
1050
0
    case ACPI_ADR_SPACE_FIXED_HARDWARE:
1051
0
        if ( xen_cx->reg.bit_width == VENDOR_INTEL &&
1052
0
             xen_cx->reg.bit_offset == NATIVE_CSTATE_BEYOND_HALT &&
1053
0
             boot_cpu_has(X86_FEATURE_MONITOR) )
1054
0
            cx->entry_method = ACPI_CSTATE_EM_FFH;
1055
0
        else
1056
0
            cx->entry_method = ACPI_CSTATE_EM_HALT;
1057
0
        break;
1058
0
    case ACPI_ADR_SPACE_SYSTEM_IO:
1059
0
        if ( ioports_deny_access(hardware_domain, cx->address, cx->address) )
1060
0
            printk(XENLOG_WARNING "Could not deny access to port %04x\n",
1061
0
                   cx->address);
1062
0
        cx->entry_method = ACPI_CSTATE_EM_SYSIO;
1063
0
        break;
1064
0
    default:
1065
0
        cx->entry_method = ACPI_CSTATE_EM_NONE;
1066
0
        break;
1067
0
    }
1068
0
1069
0
    cx->latency = xen_cx->latency;
1070
0
    cx->target_residency = cx->latency * latency_factor;
1071
0
1072
0
    smp_wmb();
1073
0
    acpi_power->count += (cx->type != ACPI_STATE_C1);
1074
0
    if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
1075
0
        acpi_power->safe_state = cx;
1076
0
}
1077
1078
int get_cpu_id(u32 acpi_id)
1079
0
{
1080
0
    int i;
1081
0
    u32 apic_id;
1082
0
1083
0
    if ( acpi_id >= MAX_MADT_ENTRIES )
1084
0
        return -1;
1085
0
1086
0
    apic_id = x86_acpiid_to_apicid[acpi_id];
1087
0
    if ( apic_id == BAD_APICID )
1088
0
        return -1;
1089
0
1090
0
    for ( i = 0; i < nr_cpu_ids; i++ )
1091
0
    {
1092
0
        if ( apic_id == x86_cpu_to_apicid[i] )
1093
0
            return i;
1094
0
    }
1095
0
1096
0
    return -1;
1097
0
}
1098
1099
#ifdef DEBUG_PM_CX
1100
static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
1101
{
1102
    XEN_GUEST_HANDLE(xen_processor_cx_t) states;
1103
    xen_processor_cx_t  state;
1104
    XEN_GUEST_HANDLE(xen_processor_csd_t) csd;
1105
    xen_processor_csd_t dp;
1106
    uint32_t i;
1107
1108
    printk("cpu%d cx acpi info:\n", cpu);
1109
    printk("\tcount = %d\n", power->count);
1110
    printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n"
1111
           "\t       pwr_setup_done[%d], bm_rld_set[%d]\n",
1112
           power->flags.bm_control, power->flags.bm_check, power->flags.has_cst,
1113
           power->flags.power_setup_done, power->flags.bm_rld_set);
1114
    
1115
    states = power->states;
1116
    
1117
    for ( i = 0; i < power->count; i++ )
1118
    {
1119
        if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) )
1120
            return;
1121
        
1122
        printk("\tstates[%d]:\n", i);
1123
        printk("\t\treg.space_id = %#x\n", state.reg.space_id);
1124
        printk("\t\treg.bit_width = %#x\n", state.reg.bit_width);
1125
        printk("\t\treg.bit_offset = %#x\n", state.reg.bit_offset);
1126
        printk("\t\treg.access_size = %#x\n", state.reg.access_size);
1127
        printk("\t\treg.address = %#"PRIx64"\n", state.reg.address);
1128
        printk("\t\ttype    = %d\n", state.type);
1129
        printk("\t\tlatency = %d\n", state.latency);
1130
        printk("\t\tpower   = %d\n", state.power);
1131
1132
        csd = state.dp;
1133
        printk("\t\tdp(@0x%p)\n", csd.p);
1134
        
1135
        if ( csd.p != NULL )
1136
        {
1137
            if ( unlikely(copy_from_guest(&dp, csd, 1)) )
1138
                return;
1139
            printk("\t\t\tdomain = %d\n", dp.domain);
1140
            printk("\t\t\tcoord_type   = %d\n", dp.coord_type);
1141
            printk("\t\t\tnum = %d\n", dp.num);
1142
        }
1143
    }
1144
}
1145
#else
1146
#define print_cx_pminfo(c, p)
1147
#endif
1148
1149
long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
1150
0
{
1151
0
    XEN_GUEST_HANDLE(xen_processor_cx_t) states;
1152
0
    xen_processor_cx_t xen_cx;
1153
0
    struct acpi_processor_power *acpi_power;
1154
0
    int cpu_id, i, ret;
1155
0
1156
0
    if ( unlikely(!guest_handle_okay(power->states, power->count)) )
1157
0
        return -EFAULT;
1158
0
1159
0
    if ( pm_idle_save && pm_idle != acpi_processor_idle )
1160
0
        return 0;
1161
0
1162
0
    print_cx_pminfo(cpu, power);
1163
0
1164
0
    /* map from acpi_id to cpu_id */
1165
0
    cpu_id = get_cpu_id(cpu);
1166
0
    if ( cpu_id == -1 )
1167
0
    {
1168
0
        static bool warn_once = true;
1169
0
1170
0
        if ( warn_once || opt_cpu_info )
1171
0
            printk(XENLOG_WARNING "No CPU ID for APIC ID %#x\n", cpu);
1172
0
        warn_once = false;
1173
0
        return -EINVAL;
1174
0
    }
1175
0
1176
0
    ret = cpuidle_init_cpu(cpu_id);
1177
0
    if ( ret < 0 )
1178
0
        return ret;
1179
0
1180
0
    acpi_power = processor_powers[cpu_id];
1181
0
    acpi_power->flags.bm_check = power->flags.bm_check;
1182
0
    acpi_power->flags.bm_control = power->flags.bm_control;
1183
0
    acpi_power->flags.has_cst = power->flags.has_cst;
1184
0
1185
0
    states = power->states;
1186
0
    for ( i = 0; i < power->count; i++ )
1187
0
    {
1188
0
        if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) )
1189
0
            return -EFAULT;
1190
0
1191
0
        set_cx(acpi_power, &xen_cx);
1192
0
    }
1193
0
1194
0
    if ( cpuidle_current_governor->enable &&
1195
0
         cpuidle_current_governor->enable(acpi_power) )
1196
0
        return -EFAULT;
1197
0
1198
0
    /* FIXME: C-state dependency is not supported by far */
1199
0
1200
0
    if ( cpu_id == 0 )
1201
0
    {
1202
0
        if ( pm_idle_save == NULL )
1203
0
        {
1204
0
            pm_idle_save = pm_idle;
1205
0
            pm_idle = acpi_processor_idle;
1206
0
        }
1207
0
1208
0
        dead_idle = acpi_dead_idle;
1209
0
    }
1210
0
 
1211
0
    return 0;
1212
0
}
1213
1214
uint32_t pmstat_get_cx_nr(uint32_t cpuid)
1215
0
{
1216
0
    return processor_powers[cpuid] ? processor_powers[cpuid]->count : 0;
1217
0
}
1218
1219
int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
1220
0
{
1221
0
    struct acpi_processor_power *power = processor_powers[cpuid];
1222
0
    uint64_t idle_usage = 0, idle_res = 0;
1223
0
    uint64_t last_state_update_tick, current_stime, current_tick;
1224
0
    uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 };
1225
0
    uint64_t res[ACPI_PROCESSOR_MAX_POWER] = { 0 };
1226
0
    unsigned int i, nr, nr_pc = 0, nr_cc = 0;
1227
0
1228
0
    if ( power == NULL )
1229
0
    {
1230
0
        stat->last = 0;
1231
0
        stat->nr = 0;
1232
0
        stat->idle_time = 0;
1233
0
        stat->nr_pc = 0;
1234
0
        stat->nr_cc = 0;
1235
0
        return 0;
1236
0
    }
1237
0
1238
0
    stat->idle_time = get_cpu_idle_time(cpuid);
1239
0
    nr = min(stat->nr, power->count);
1240
0
1241
0
    /* mimic the stat when detail info hasn't been registered by dom0 */
1242
0
    if ( pm_idle_save == NULL )
1243
0
    {
1244
0
        stat->nr = 2;
1245
0
        stat->last = power->last_state ? power->last_state->idx : 0;
1246
0
1247
0
        usage[1] = idle_usage = 1;
1248
0
        res[1] = idle_res = stat->idle_time;
1249
0
1250
0
        current_stime = NOW();
1251
0
    }
1252
0
    else
1253
0
    {
1254
0
        struct hw_residencies hw_res;
1255
0
        signed int last_state_idx;
1256
0
1257
0
        stat->nr = power->count;
1258
0
1259
0
        spin_lock_irq(&power->stat_lock);
1260
0
        current_tick = cpuidle_get_tick();
1261
0
        current_stime = NOW();
1262
0
        for ( i = 1; i < nr; i++ )
1263
0
        {
1264
0
            usage[i] = power->states[i].usage;
1265
0
            res[i] = power->states[i].time;
1266
0
        }
1267
0
        last_state_update_tick = power->last_state_update_tick;
1268
0
        last_state_idx = power->last_state ? power->last_state->idx : -1;
1269
0
        spin_unlock_irq(&power->stat_lock);
1270
0
1271
0
        if ( last_state_idx >= 0 )
1272
0
        {
1273
0
            usage[last_state_idx]++;
1274
0
            res[last_state_idx] += ticks_elapsed(last_state_update_tick,
1275
0
                                                 current_tick);
1276
0
            stat->last = last_state_idx;
1277
0
        }
1278
0
        else
1279
0
            stat->last = 0;
1280
0
1281
0
        for ( i = 1; i < nr; i++ )
1282
0
        {
1283
0
            res[i] = tick_to_ns(res[i]);
1284
0
            idle_usage += usage[i];
1285
0
            idle_res += res[i];
1286
0
        }
1287
0
1288
0
        get_hw_residencies(cpuid, &hw_res);
1289
0
1290
0
#define PUT_xC(what, n) do { \
1291
0
        if ( stat->nr_##what >= n && \
1292
0
             copy_to_guest_offset(stat->what, n - 1, &hw_res.what##n, 1) ) \
1293
0
            return -EFAULT; \
1294
0
        if ( hw_res.what##n ) \
1295
0
            nr_##what = n; \
1296
0
    } while ( 0 )
1297
0
#define PUT_PC(n) PUT_xC(pc, n)
1298
0
        PUT_PC(2);
1299
0
        PUT_PC(3);
1300
0
        PUT_PC(4);
1301
0
        PUT_PC(6);
1302
0
        PUT_PC(7);
1303
0
        PUT_PC(8);
1304
0
        PUT_PC(9);
1305
0
        PUT_PC(10);
1306
0
#undef PUT_PC
1307
0
#define PUT_CC(n) PUT_xC(cc, n)
1308
0
        PUT_CC(1);
1309
0
        PUT_CC(3);
1310
0
        PUT_CC(6);
1311
0
        PUT_CC(7);
1312
0
#undef PUT_CC
1313
0
#undef PUT_xC
1314
0
    }
1315
0
1316
0
    usage[0] += idle_usage;
1317
0
    res[0] = current_stime - idle_res;
1318
0
1319
0
    if ( copy_to_guest(stat->triggers, usage, nr) ||
1320
0
         copy_to_guest(stat->residencies, res, nr) )
1321
0
        return -EFAULT;
1322
0
1323
0
    stat->nr_pc = nr_pc;
1324
0
    stat->nr_cc = nr_cc;
1325
0
1326
0
    return 0;
1327
0
}
1328
1329
int pmstat_reset_cx_stat(uint32_t cpuid)
1330
0
{
1331
0
    return 0;
1332
0
}
1333
1334
void cpuidle_disable_deep_cstate(void)
1335
0
{
1336
0
    if ( max_cstate > 1 )
1337
0
    {
1338
0
        if ( local_apic_timer_c2_ok )
1339
0
            max_cstate = 2;
1340
0
        else
1341
0
            max_cstate = 1;
1342
0
    }
1343
0
1344
0
    mb();
1345
0
1346
0
    hpet_disable_legacy_broadcast();
1347
0
}
1348
1349
bool cpuidle_using_deep_cstate(void)
1350
1
{
1351
1
    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
1352
1
}
1353
1354
static int cpu_callback(
1355
    struct notifier_block *nfb, unsigned long action, void *hcpu)
1356
0
{
1357
0
    unsigned int cpu = (unsigned long)hcpu;
1358
0
1359
0
    /* Only hook on CPU_ONLINE because a dead cpu may utilize the info to
1360
0
     * to enter deep C-state */
1361
0
    switch ( action )
1362
0
    {
1363
0
    case CPU_ONLINE:
1364
0
        (void)cpuidle_init_cpu(cpu);
1365
0
        break;
1366
0
    default:
1367
0
        break;
1368
0
    }
1369
0
1370
0
    return NOTIFY_DONE;
1371
0
}
1372
1373
static struct notifier_block cpu_nfb = {
1374
    .notifier_call = cpu_callback
1375
};
1376
1377
static int __init cpuidle_presmp_init(void)
1378
1
{
1379
1
    void *cpu = (void *)(long)smp_processor_id();
1380
1
1381
1
    if ( !xen_cpuidle )
1382
0
        return 0;
1383
1
1384
1
    mwait_idle_init(&cpu_nfb);
1385
1
    cpu_nfb.notifier_call(&cpu_nfb, CPU_ONLINE, cpu);
1386
1
    register_cpu_notifier(&cpu_nfb);
1387
1
    return 0;
1388
1
}
1389
presmp_initcall(cpuidle_presmp_init);
1390