debuggers.hg
changeset 17576:5bb9093eb0e9
Add basic acpi C-states based cpu idle power mgmt in xen for x86.
It includes:
1. hypercall definition for passing ACPI info.
2. C1/C2 support.
3. Mwait support, as well as legacy ioport.
4. Ladder policy from Linux kernel.
A lot of code & ideas came from Linux.
Signed-off-by: Wei Gang <gang.wei@intel.com>
It includes:
1. hypercall definition for passing ACPI info.
2. C1/C2 support.
3. Mwait support, as well as legacy ioport.
4. Ladder policy from Linux kernel.
A lot of code & ideas came from Linux.
Signed-off-by: Wei Gang <gang.wei@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Thu May 01 10:40:01 2008 +0100 (2008-05-01) |
parents | ad55c06c9bbc |
children | 74cae5c11f0a |
files | xen/arch/x86/acpi/Makefile xen/arch/x86/acpi/cpu_idle.c xen/arch/x86/domain.c xen/arch/x86/platform_hypercall.c xen/arch/x86/x86_64/Makefile xen/arch/x86/x86_64/cpu_idle.c xen/arch/x86/x86_64/platform_hypercall.c xen/include/public/platform.h xen/include/xlat.lst |
line diff
1.1 --- a/xen/arch/x86/acpi/Makefile Thu May 01 10:33:03 2008 +0100 1.2 +++ b/xen/arch/x86/acpi/Makefile Thu May 01 10:40:01 2008 +0100 1.3 @@ -1,2 +1,2 @@ 1.4 obj-y += boot.o 1.5 -obj-y += power.o suspend.o wakeup_prot.o 1.6 +obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/xen/arch/x86/acpi/cpu_idle.c Thu May 01 10:40:01 2008 +0100 2.3 @@ -0,0 +1,690 @@ 2.4 +/* 2.5 + * cpu_idle - xen idle state module derived from Linux 2.6 + * drivers/acpi/processor_idle.c & 2.7 + * arch/x86/kernel/acpi/cstate.c 2.8 + * 2.9 + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 2.10 + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 2.11 + * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> 2.12 + * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 2.13 + * - Added processor hotplug support 2.14 + * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 2.15 + * - Added support for C3 on SMP 2.16 + * Copyright (C) 2007, 2008 Intel Corporation 2.17 + * 2.18 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2.19 + * 2.20 + * This program is free software; you can redistribute it and/or modify 2.21 + * it under the terms of the GNU General Public License as published by 2.22 + * the Free Software Foundation; either version 2 of the License, or (at 2.23 + * your option) any later version. 2.24 + * 2.25 + * This program is distributed in the hope that it will be useful, but 2.26 + * WITHOUT ANY WARRANTY; without even the implied warranty of 2.27 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2.28 + * General Public License for more details. 2.29 + * 2.30 + * You should have received a copy of the GNU General Public License along 2.31 + * with this program; if not, write to the Free Software Foundation, Inc., 2.32 + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 2.33 + * 2.34 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2.35 + */ 2.36 + 2.37 +#include <xen/config.h> 2.38 +#include <xen/errno.h> 2.39 +#include <xen/lib.h> 2.40 +#include <xen/types.h> 2.41 +#include <xen/acpi.h> 2.42 +#include <xen/smp.h> 2.43 +#include <asm/cache.h> 2.44 +#include <asm/io.h> 2.45 +#include <xen/guest_access.h> 2.46 +#include <public/platform.h> 2.47 +#include <asm/processor.h> 2.48 +#include <xen/keyhandler.h> 2.49 + 2.50 +#define DEBUG_PM_CX 2.51 + 2.52 +#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) 2.53 +#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 2.54 +#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 2.55 + 2.56 +#define ACPI_PROCESSOR_MAX_POWER 8 2.57 +#define ACPI_PROCESSOR_MAX_C2_LATENCY 100 2.58 +#define ACPI_PROCESSOR_MAX_C3_LATENCY 1000 2.59 + 2.60 +extern u32 pmtmr_ioport; 2.61 +extern void (*pm_idle) (void); 2.62 + 2.63 +static void (*pm_idle_save) (void) __read_mostly; 2.64 +unsigned int max_cstate __read_mostly = 2; 2.65 +integer_param("max_cstate", max_cstate); 2.66 + 2.67 +struct acpi_processor_cx; 2.68 + 2.69 +struct acpi_processor_cx_policy 2.70 +{ 2.71 + u32 count; 2.72 + struct acpi_processor_cx *state; 2.73 + struct 2.74 + { 2.75 + u32 time; 2.76 + u32 ticks; 2.77 + u32 count; 2.78 + u32 bm; 2.79 + } threshold; 2.80 +}; 2.81 + 2.82 +struct acpi_processor_cx 2.83 +{ 2.84 + u8 valid; 2.85 + u8 type; 2.86 + u32 address; 2.87 + u8 space_id; 2.88 + u32 latency; 2.89 + u32 latency_ticks; 2.90 + u32 power; 2.91 + u32 usage; 2.92 + u64 time; 2.93 + struct acpi_processor_cx_policy promotion; 2.94 + struct acpi_processor_cx_policy demotion; 2.95 +}; 2.96 + 2.97 +struct acpi_processor_power 2.98 +{ 2.99 + struct acpi_processor_cx *state; 2.100 + u64 bm_check_timestamp; 2.101 + u32 default_state; 2.102 + u32 bm_activity; 2.103 + u32 count; 2.104 + struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER]; 2.105 +}; 2.106 + 2.107 +static struct acpi_processor_power processor_powers[NR_CPUS]; 2.108 + 2.109 +static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power) 2.110 +{ 2.111 + uint32_t i; 2.112 + 2.113 + printk("saved cpu%d cx acpi info:\n", cpu); 2.114 + printk("\tcurrent state is C%d\n", (power->state)?power->state->type:-1); 2.115 + printk("\tbm_check_timestamp = %"PRId64"\n", power->bm_check_timestamp); 2.116 + printk("\tdefault_state = %d\n", power->default_state); 2.117 + printk("\tbm_activity = 0x%08x\n", power->bm_activity); 2.118 + printk("\tcount = %d\n", power->count); 2.119 + 2.120 + for ( i = 0; i < power->count; i++ ) 2.121 + { 2.122 + printk("\tstates[%d]:\n", i); 2.123 + printk("\t\tvalid = %d\n", power->states[i].valid); 2.124 + printk("\t\ttype = %d\n", power->states[i].type); 2.125 + printk("\t\taddress = 0x%x\n", power->states[i].address); 2.126 + printk("\t\tspace_id = 0x%x\n", power->states[i].space_id); 2.127 + printk("\t\tlatency = %d\n", power->states[i].latency); 2.128 + printk("\t\tpower = %d\n", power->states[i].power); 2.129 + printk("\t\tlatency_ticks = %d\n", power->states[i].latency_ticks); 2.130 + printk("\t\tusage = %d\n", power->states[i].usage); 2.131 + printk("\t\ttime = %"PRId64"\n", power->states[i].time); 2.132 + 2.133 + printk("\t\tpromotion policy:\n"); 2.134 + printk("\t\t\tcount = %d\n", power->states[i].promotion.count); 2.135 + printk("\t\t\tstate = C%d\n", 2.136 + (power->states[i].promotion.state) ? 2.137 + power->states[i].promotion.state->type : -1); 2.138 + printk("\t\t\tthreshold.time = %d\n", power->states[i].promotion.threshold.time); 2.139 + printk("\t\t\tthreshold.ticks = %d\n", power->states[i].promotion.threshold.ticks); 2.140 + printk("\t\t\tthreshold.count = %d\n", power->states[i].promotion.threshold.count); 2.141 + printk("\t\t\tthreshold.bm = %d\n", power->states[i].promotion.threshold.bm); 2.142 + 2.143 + printk("\t\tdemotion policy:\n"); 2.144 + printk("\t\t\tcount = %d\n", power->states[i].demotion.count); 2.145 + printk("\t\t\tstate = C%d\n", 2.146 + (power->states[i].demotion.state) ? 2.147 + power->states[i].demotion.state->type : -1); 2.148 + printk("\t\t\tthreshold.time = %d\n", power->states[i].demotion.threshold.time); 2.149 + printk("\t\t\tthreshold.ticks = %d\n", power->states[i].demotion.threshold.ticks); 2.150 + printk("\t\t\tthreshold.count = %d\n", power->states[i].demotion.threshold.count); 2.151 + printk("\t\t\tthreshold.bm = %d\n", power->states[i].demotion.threshold.bm); 2.152 + } 2.153 +} 2.154 + 2.155 +static void dump_cx(unsigned char key) 2.156 +{ 2.157 + for( int i = 0; i < num_online_cpus(); i++ ) 2.158 + print_acpi_power(i, &processor_powers[i]); 2.159 +} 2.160 + 2.161 +static int __init cpu_idle_key_init(void) 2.162 +{ 2.163 + register_keyhandler( 2.164 + 'c', dump_cx, "dump cx structures"); 2.165 + return 0; 2.166 +} 2.167 +__initcall(cpu_idle_key_init); 2.168 + 2.169 +static inline u32 ticks_elapsed(u32 t1, u32 t2) 2.170 +{ 2.171 + if ( t2 >= t1 ) 2.172 + return (t2 - t1); 2.173 + else 2.174 + return ((0xFFFFFFFF - t1) + t2); 2.175 +} 2.176 + 2.177 +static void acpi_processor_power_activate(struct acpi_processor_power *power, 2.178 + struct acpi_processor_cx *new) 2.179 +{ 2.180 + struct acpi_processor_cx *old; 2.181 + 2.182 + if ( !power || !new ) 2.183 + return; 2.184 + 2.185 + old = power->state; 2.186 + 2.187 + if ( old ) 2.188 + old->promotion.count = 0; 2.189 + new->demotion.count = 0; 2.190 + 2.191 + power->state = new; 2.192 + 2.193 + return; 2.194 +} 2.195 + 2.196 +static void acpi_safe_halt(void) 2.197 +{ 2.198 + smp_mb__after_clear_bit(); 2.199 + safe_halt(); 2.200 +} 2.201 + 2.202 +#define MWAIT_ECX_INTERRUPT_BREAK (0x1) 2.203 + 2.204 +static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) 2.205 +{ 2.206 + __monitor((void *)current, 0, 0); 2.207 + smp_mb(); 2.208 + __mwait(eax, ecx); 2.209 +} 2.210 + 2.211 +static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) 2.212 +{ 2.213 + mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK); 2.214 +} 2.215 + 2.216 +static void acpi_idle_do_entry(struct acpi_processor_cx *cx) 2.217 +{ 2.218 + if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE ) 2.219 + { 2.220 + /* Call into architectural FFH based C-state */ 2.221 + acpi_processor_ffh_cstate_enter(cx); 2.222 + } 2.223 + else 2.224 + { 2.225 + int unused; 2.226 + /* IO port based C-state */ 2.227 + inb(cx->address); 2.228 + /* Dummy wait op - must do something useless after P_LVL2 read 2.229 + because chipsets cannot guarantee that STPCLK# signal 2.230 + gets asserted in time to freeze execution properly. */ 2.231 + unused = inl(pmtmr_ioport); 2.232 + } 2.233 +} 2.234 + 2.235 +static void acpi_processor_idle(void) 2.236 +{ 2.237 + struct acpi_processor_power *power = NULL; 2.238 + struct acpi_processor_cx *cx = NULL; 2.239 + struct acpi_processor_cx *next_state = NULL; 2.240 + int sleep_ticks = 0; 2.241 + u32 t1, t2 = 0; 2.242 + 2.243 + power = &processor_powers[smp_processor_id()]; 2.244 + 2.245 + /* 2.246 + * Interrupts must be disabled during bus mastering calculations and 2.247 + * for C2/C3 transitions. 2.248 + */ 2.249 + local_irq_disable(); 2.250 + cx = power->state; 2.251 + if ( !cx ) 2.252 + { 2.253 + if ( pm_idle_save ) 2.254 + { 2.255 + printk(XENLOG_DEBUG "call pm_idle_save()\n"); 2.256 + pm_idle_save(); 2.257 + } 2.258 + else 2.259 + { 2.260 + printk(XENLOG_DEBUG "call acpi_safe_halt()\n"); 2.261 + acpi_safe_halt(); 2.262 + } 2.263 + return; 2.264 + } 2.265 + 2.266 + /* 2.267 + * Sleep: 2.268 + * ------ 2.269 + * Invoke the current Cx state to put the processor to sleep. 2.270 + */ 2.271 + if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 ) 2.272 + smp_mb__after_clear_bit(); 2.273 + 2.274 + switch ( cx->type ) 2.275 + { 2.276 + case ACPI_STATE_C1: 2.277 + /* 2.278 + * Invoke C1. 2.279 + * Use the appropriate idle routine, the one that would 2.280 + * be used without acpi C-states. 2.281 + */ 2.282 + if ( pm_idle_save ) 2.283 + pm_idle_save(); 2.284 + else 2.285 + acpi_safe_halt(); 2.286 + 2.287 + /* 2.288 + * TBD: Can't get time duration while in C1, as resumes 2.289 + * go to an ISR rather than here. Need to instrument 2.290 + * base interrupt handler. 2.291 + */ 2.292 + sleep_ticks = 0xFFFFFFFF; 2.293 + break; 2.294 + 2.295 + case ACPI_STATE_C2: 2.296 + /* Get start time (ticks) */ 2.297 + t1 = inl(pmtmr_ioport); 2.298 + /* Invoke C2 */ 2.299 + acpi_idle_do_entry(cx); 2.300 + /* Get end time (ticks) */ 2.301 + t2 = inl(pmtmr_ioport); 2.302 + 2.303 + /* Re-enable interrupts */ 2.304 + local_irq_enable(); 2.305 + /* Compute time (ticks) that we were actually asleep */ 2.306 + sleep_ticks = 2.307 + ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; 2.308 + break; 2.309 + default: 2.310 + local_irq_enable(); 2.311 + return; 2.312 + } 2.313 + 2.314 + cx->usage++; 2.315 + if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) ) 2.316 + cx->time += sleep_ticks; 2.317 + 2.318 + next_state = power->state; 2.319 + 2.320 + /* 2.321 + * Promotion? 2.322 + * ---------- 2.323 + * Track the number of longs (time asleep is greater than threshold) 2.324 + * and promote when the count threshold is reached. Note that bus 2.325 + * mastering activity may prevent promotions. 2.326 + * Do not promote above max_cstate. 2.327 + */ 2.328 + if ( cx->promotion.state && 2.329 + ((cx->promotion.state - power->states) <= max_cstate) ) 2.330 + { 2.331 + if ( sleep_ticks > cx->promotion.threshold.ticks ) 2.332 + { 2.333 + cx->promotion.count++; 2.334 + cx->demotion.count = 0; 2.335 + if ( cx->promotion.count >= cx->promotion.threshold.count ) 2.336 + { 2.337 + next_state = cx->promotion.state; 2.338 + goto end; 2.339 + } 2.340 + } 2.341 + } 2.342 + 2.343 + /* 2.344 + * Demotion? 2.345 + * --------- 2.346 + * Track the number of shorts (time asleep is less than time threshold) 2.347 + * and demote when the usage threshold is reached. 2.348 + */ 2.349 + if ( cx->demotion.state ) 2.350 + { 2.351 + if ( sleep_ticks < cx->demotion.threshold.ticks ) 2.352 + { 2.353 + cx->demotion.count++; 2.354 + cx->promotion.count = 0; 2.355 + if ( cx->demotion.count >= cx->demotion.threshold.count ) 2.356 + { 2.357 + next_state = cx->demotion.state; 2.358 + goto end; 2.359 + } 2.360 + } 2.361 + } 2.362 + 2.363 +end: 2.364 + /* 2.365 + * Demote if current state exceeds max_cstate 2.366 + */ 2.367 + if ( (power->state - power->states) > max_cstate ) 2.368 + { 2.369 + if ( cx->demotion.state ) 2.370 + next_state = cx->demotion.state; 2.371 + } 2.372 + 2.373 + /* 2.374 + * New Cx State? 2.375 + * ------------- 2.376 + * If we're going to start using a new Cx state we must clean up 2.377 + * from the previous and prepare to use the new. 2.378 + */ 2.379 + if ( next_state != power->state ) 2.380 + acpi_processor_power_activate(power, next_state); 2.381 +} 2.382 + 2.383 +static int acpi_processor_set_power_policy(struct acpi_processor_power *power) 2.384 +{ 2.385 + unsigned int i; 2.386 + unsigned int state_is_set = 0; 2.387 + struct acpi_processor_cx *lower = NULL; 2.388 + struct acpi_processor_cx *higher = NULL; 2.389 + struct acpi_processor_cx *cx; 2.390 + 2.391 + if ( !power ) 2.392 + return -EINVAL; 2.393 + 2.394 + /* 2.395 + * This function sets the default Cx state policy (OS idle handler). 2.396 + * Our scheme is to promote quickly to C2 but more conservatively 2.397 + * to C3. We're favoring C2 for its characteristics of low latency 2.398 + * (quick response), good power savings, and ability to allow bus 2.399 + * mastering activity. Note that the Cx state policy is completely 2.400 + * customizable and can be altered dynamically. 2.401 + */ 2.402 + 2.403 + /* startup state */ 2.404 + for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ ) 2.405 + { 2.406 + cx = &power->states[i]; 2.407 + if ( !cx->valid ) 2.408 + continue; 2.409 + 2.410 + if ( !state_is_set ) 2.411 + power->state = cx; 2.412 + state_is_set++; 2.413 + break; 2.414 + } 2.415 + 2.416 + if ( !state_is_set ) 2.417 + return -ENODEV; 2.418 + 2.419 + /* demotion */ 2.420 + for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ ) 2.421 + { 2.422 + cx = &power->states[i]; 2.423 + if ( !cx->valid ) 2.424 + continue; 2.425 + 2.426 + if ( lower ) 2.427 + { 2.428 + cx->demotion.state = lower; 2.429 + cx->demotion.threshold.ticks = cx->latency_ticks; 2.430 + cx->demotion.threshold.count = 1; 2.431 + } 2.432 + 2.433 + lower = cx; 2.434 + } 2.435 + 2.436 + /* promotion */ 2.437 + for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- ) 2.438 + { 2.439 + cx = &power->states[i]; 2.440 + if ( !cx->valid ) 2.441 + continue; 2.442 + 2.443 + if ( higher ) 2.444 + { 2.445 + cx->promotion.state = higher; 2.446 + cx->promotion.threshold.ticks = cx->latency_ticks; 2.447 + if ( cx->type >= ACPI_STATE_C2 ) 2.448 + cx->promotion.threshold.count = 4; 2.449 + else 2.450 + cx->promotion.threshold.count = 10; 2.451 + } 2.452 + 2.453 + higher = cx; 2.454 + } 2.455 + 2.456 + return 0; 2.457 +} 2.458 + 2.459 +static int init_cx_pminfo(struct acpi_processor_power *acpi_power) 2.460 +{ 2.461 + memset(acpi_power, 0, sizeof(*acpi_power)); 2.462 + 2.463 + acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1; 2.464 + 2.465 + acpi_power->states[ACPI_STATE_C0].valid = 1; 2.466 + acpi_power->states[ACPI_STATE_C1].valid = 1; 2.467 + 2.468 + acpi_power->count = 2; 2.469 + 2.470 + return 0; 2.471 +} 2.472 + 2.473 +#define CPUID_MWAIT_LEAF (5) 2.474 +#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) 2.475 +#define CPUID5_ECX_INTERRUPT_BREAK (0x2) 2.476 + 2.477 +#define MWAIT_ECX_INTERRUPT_BREAK (0x1) 2.478 + 2.479 +#define MWAIT_SUBSTATE_MASK (0xf) 2.480 +#define MWAIT_SUBSTATE_SIZE (4) 2.481 + 2.482 +static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx) 2.483 +{ 2.484 + struct cpuinfo_x86 *c = ¤t_cpu_data; 2.485 + unsigned int eax, ebx, ecx, edx; 2.486 + unsigned int edx_part; 2.487 + unsigned int cstate_type; /* C-state type and not ACPI C-state type */ 2.488 + unsigned int num_cstate_subtype; 2.489 + 2.490 + if ( c->cpuid_level < CPUID_MWAIT_LEAF ) 2.491 + { 2.492 + printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n"); 2.493 + return -EFAULT; 2.494 + } 2.495 + 2.496 + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); 2.497 + printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n", 2.498 + eax, ebx, ecx, edx); 2.499 + 2.500 + /* Check whether this particular cx_type (in CST) is supported or not */ 2.501 + cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1; 2.502 + edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); 2.503 + num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; 2.504 + 2.505 + if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) ) 2.506 + return -EFAULT; 2.507 + 2.508 + /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ 2.509 + if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2.510 + !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ) 2.511 + return -EFAULT; 2.512 + 2.513 + printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n", cx->type); 2.514 + return 0; 2.515 +} 2.516 + 2.517 +#define VENDOR_INTEL (1) 2.518 +#define NATIVE_CSTATE_BEYOND_HALT (2) 2.519 + 2.520 +static int check_cx(xen_processor_cx_t *cx) 2.521 +{ 2.522 + if ( cx == NULL ) 2.523 + return -EINVAL; 2.524 + 2.525 + switch ( cx->reg.space_id ) 2.526 + { 2.527 + case ACPI_ADR_SPACE_SYSTEM_IO: 2.528 + if ( cx->reg.address == 0 ) 2.529 + return -EINVAL; 2.530 + break; 2.531 + 2.532 + case ACPI_ADR_SPACE_FIXED_HARDWARE: 2.533 + if ( cx->type > ACPI_STATE_C1 ) 2.534 + { 2.535 + if ( cx->reg.bit_width != VENDOR_INTEL || 2.536 + cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT ) 2.537 + return -EINVAL; 2.538 + 2.539 + /* assume all logical cpu has the same support for mwait */ 2.540 + if ( acpi_processor_ffh_cstate_probe(cx) ) 2.541 + return -EFAULT; 2.542 + } 2.543 + break; 2.544 + 2.545 + default: 2.546 + return -ENODEV; 2.547 + } 2.548 + 2.549 + return 0; 2.550 +} 2.551 + 2.552 +static int set_cx(struct acpi_processor_power *acpi_power, 2.553 + xen_processor_cx_t *xen_cx) 2.554 +{ 2.555 + struct acpi_processor_cx *cx; 2.556 + 2.557 + /* skip unsupported acpi cstate */ 2.558 + if ( check_cx(xen_cx) ) 2.559 + return -EFAULT; 2.560 + 2.561 + cx = &acpi_power->states[xen_cx->type]; 2.562 + if ( !cx->valid ) 2.563 + acpi_power->count++; 2.564 + 2.565 + cx->valid = 1; 2.566 + cx->type = xen_cx->type; 2.567 + cx->address = xen_cx->reg.address; 2.568 + cx->space_id = xen_cx->reg.space_id; 2.569 + cx->latency = xen_cx->latency; 2.570 + cx->power = xen_cx->power; 2.571 + 2.572 + cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 2.573 + 2.574 + return 0; 2.575 +} 2.576 + 2.577 +static int get_cpu_id(u8 acpi_id) 2.578 +{ 2.579 + int i; 2.580 + u8 apic_id; 2.581 + 2.582 + apic_id = x86_acpiid_to_apicid[acpi_id]; 2.583 + if ( apic_id == 0xff ) 2.584 + return -1; 2.585 + 2.586 + for ( i = 0; i < NR_CPUS; i++ ) 2.587 + { 2.588 + if ( apic_id == x86_cpu_to_apicid[i] ) 2.589 + return i; 2.590 + } 2.591 + 2.592 + return -1; 2.593 +} 2.594 + 2.595 +#ifdef DEBUG_PM_CX 2.596 +static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) 2.597 +{ 2.598 + XEN_GUEST_HANDLE(xen_processor_cx_t) states; 2.599 + xen_processor_cx_t state; 2.600 + XEN_GUEST_HANDLE(xen_processor_csd_t) csd; 2.601 + xen_processor_csd_t dp; 2.602 + uint32_t i; 2.603 + 2.604 + printk("cpu%d cx acpi info:\n", cpu); 2.605 + printk("\tcount = %d\n", power->count); 2.606 + printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n" 2.607 + "\t pwr_setup_done[%d], bm_rld_set[%d]\n", 2.608 + power->flags.bm_control, power->flags.bm_check, power->flags.has_cst, 2.609 + power->flags.power_setup_done, power->flags.bm_rld_set); 2.610 + 2.611 + states = power->states; 2.612 + 2.613 + for ( i = 0; i < power->count; i++ ) 2.614 + { 2.615 + if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) 2.616 + return; 2.617 + 2.618 + printk("\tstates[%d]:\n", i); 2.619 + printk("\t\treg.space_id = 0x%x\n", state.reg.space_id); 2.620 + printk("\t\treg.bit_width = 0x%x\n", state.reg.bit_width); 2.621 + printk("\t\treg.bit_offset = 0x%x\n", state.reg.bit_offset); 2.622 + printk("\t\treg.access_size = 0x%x\n", state.reg.access_size); 2.623 + printk("\t\treg.address = 0x%"PRIx64"\n", state.reg.address); 2.624 + printk("\t\ttype = %d\n", state.type); 2.625 + printk("\t\tlatency = %d\n", state.latency); 2.626 + printk("\t\tpower = %d\n", state.power); 2.627 + 2.628 + csd = state.dp; 2.629 + printk("\t\tdp(@0x%p)\n", csd.p); 2.630 + 2.631 + if ( csd.p != NULL ) 2.632 + { 2.633 + if ( unlikely(copy_from_guest(&dp, csd, 1)) ) 2.634 + return; 2.635 + printk("\t\t\tdomain = %d\n", dp.domain); 2.636 + printk("\t\t\tcoord_type = %d\n", dp.coord_type); 2.637 + printk("\t\t\tnum = %d\n", dp.num); 2.638 + } 2.639 + } 2.640 +} 2.641 +#else 2.642 +#define print_cx_pminfo(c, p) 2.643 +#endif 2.644 + 2.645 +long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) 2.646 +{ 2.647 + XEN_GUEST_HANDLE(xen_processor_cx_t) states; 2.648 + xen_processor_cx_t xen_cx; 2.649 + struct acpi_processor_power *acpi_power; 2.650 + int cpu_id, i; 2.651 + 2.652 + if ( unlikely(!guest_handle_okay(power->states, power->count)) ) 2.653 + return -EFAULT; 2.654 + 2.655 + print_cx_pminfo(cpu, power); 2.656 + 2.657 + /* map from acpi_id to cpu_id */ 2.658 + cpu_id = get_cpu_id((u8)cpu); 2.659 + if ( cpu_id == -1 ) 2.660 + { 2.661 + printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu); 2.662 + return -EFAULT; 2.663 + } 2.664 + 2.665 + acpi_power = &processor_powers[cpu_id]; 2.666 + 2.667 + init_cx_pminfo(acpi_power); 2.668 + 2.669 + states = power->states; 2.670 + 2.671 + for ( i = 0; i < power->count; i++ ) 2.672 + { 2.673 + if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) ) 2.674 + return -EFAULT; 2.675 + 2.676 + set_cx(acpi_power, &xen_cx); 2.677 + } 2.678 + 2.679 + /* FIXME: C-state dependency is not supported by far */ 2.680 + 2.681 + /* initialize default policy */ 2.682 + acpi_processor_set_power_policy(acpi_power); 2.683 + 2.684 + print_acpi_power(cpu_id, acpi_power); 2.685 + 2.686 + if ( cpu_id == 0 && pm_idle_save == NULL ) 2.687 + { 2.688 + pm_idle_save = pm_idle; 2.689 + pm_idle = acpi_processor_idle; 2.690 + } 2.691 + 2.692 + return 0; 2.693 +}
3.1 --- a/xen/arch/x86/domain.c Thu May 01 10:33:03 2008 +0100 3.2 +++ b/xen/arch/x86/domain.c Thu May 01 10:40:01 2008 +0100 3.3 @@ -56,6 +56,9 @@ DEFINE_PER_CPU(struct vcpu *, curr_vcpu) 3.4 DEFINE_PER_CPU(u64, efer); 3.5 DEFINE_PER_CPU(unsigned long, cr4); 3.6 3.7 +static void default_idle(void); 3.8 +void (*pm_idle) (void) = default_idle; 3.9 + 3.10 static void unmap_vcpu_info(struct vcpu *v); 3.11 3.12 static void paravirt_ctxt_switch_from(struct vcpu *v); 3.13 @@ -105,7 +108,7 @@ void idle_loop(void) 3.14 if ( cpu_is_offline(smp_processor_id()) ) 3.15 play_dead(); 3.16 page_scrub_schedule_work(); 3.17 - default_idle(); 3.18 + (*pm_idle)(); 3.19 do_softirq(); 3.20 } 3.21 }
4.1 --- a/xen/arch/x86/platform_hypercall.c Thu May 01 10:33:03 2008 +0100 4.2 +++ b/xen/arch/x86/platform_hypercall.c Thu May 01 10:40:01 2008 +0100 4.3 @@ -44,6 +44,8 @@ extern spinlock_t xenpf_lock; 4.4 4.5 static DEFINE_PER_CPU(uint64_t, freq); 4.6 4.7 +extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power); 4.8 + 4.9 static long cpu_frequency_change_helper(void *data) 4.10 { 4.11 return cpu_frequency_change(this_cpu(freq)); 4.12 @@ -340,6 +342,27 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe 4.13 } 4.14 break; 4.15 4.16 + case XENPF_set_processor_pminfo: 4.17 + switch ( op->u.set_pminfo.type ) 4.18 + { 4.19 + case XEN_PM_PX: 4.20 + ret = -EINVAL; 4.21 + break; 4.22 + 4.23 + case XEN_PM_CX: 4.24 + ret = set_cx_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.power); 4.25 + break; 4.26 + 4.27 + case XEN_PM_TX: 4.28 + ret = -EINVAL; 4.29 + break; 4.30 + 4.31 + default: 4.32 + ret = -EINVAL; 4.33 + break; 4.34 + } 4.35 + break; 4.36 + 4.37 default: 4.38 ret = -ENOSYS; 4.39 break;
5.1 --- a/xen/arch/x86/x86_64/Makefile Thu May 01 10:33:03 2008 +0100 5.2 +++ b/xen/arch/x86/x86_64/Makefile Thu May 01 10:40:01 2008 +0100 5.3 @@ -12,6 +12,7 @@ obj-$(CONFIG_COMPAT) += compat_kexec.o 5.4 obj-$(CONFIG_COMPAT) += domain.o 5.5 obj-$(CONFIG_COMPAT) += physdev.o 5.6 obj-$(CONFIG_COMPAT) += platform_hypercall.o 5.7 +obj-$(CONFIG_COMPAT) += cpu_idle.o 5.8 5.9 ifeq ($(CONFIG_COMPAT),y) 5.10 # extra dependencies 5.11 @@ -22,4 +23,5 @@ physdev.o: ../physdev.c 5.12 platform_hypercall.o: ../platform_hypercall.c 5.13 sysctl.o: ../sysctl.c 5.14 traps.o: compat/traps.c 5.15 +cpu_idle.o: ../acpi/cpu_idle.c 5.16 endif
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/xen/arch/x86/x86_64/cpu_idle.c Thu May 01 10:40:01 2008 +0100 6.3 @@ -0,0 +1,128 @@ 6.4 +/****************************************************************************** 6.5 + * cpu_idle.c -- adapt x86/acpi/cpu_idle.c to compat guest. 6.6 + * 6.7 + * Copyright (C) 2007, 2008 Intel Corporation 6.8 + * 6.9 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6.10 + * 6.11 + * This program is free software; you can redistribute it and/or modify 6.12 + * it under the terms of the GNU General Public License as published by 6.13 + * the Free Software Foundation; either version 2 of the License, or (at 6.14 + * your option) any later version. 6.15 + * 6.16 + * This program is distributed in the hope that it will be useful, but 6.17 + * WITHOUT ANY WARRANTY; without even the implied warranty of 6.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 6.19 + * General Public License for more details. 6.20 + * 6.21 + * You should have received a copy of the GNU General Public License along 6.22 + * with this program; if not, write to the Free Software Foundation, Inc., 6.23 + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 6.24 + * 6.25 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6.26 + */ 6.27 + 6.28 +#define __XEN_TOOLS__ /* for using get_xen_guest_handle macro */ 6.29 + 6.30 +#include <xen/config.h> 6.31 +#include <xen/types.h> 6.32 +#include <xen/xmalloc.h> 6.33 +#include <xen/guest_access.h> 6.34 +#include <compat/platform.h> 6.35 + 6.36 +CHECK_processor_csd; 6.37 + 6.38 +DEFINE_XEN_GUEST_HANDLE(compat_processor_csd_t); 6.39 +DEFINE_XEN_GUEST_HANDLE(compat_processor_cx_t); 6.40 + 6.41 +#define xlat_page_start COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id) 6.42 +#define xlat_page_size COMPAT_ARG_XLAT_SIZE 6.43 +#define xlat_page_left_size(xlat_page_current) \ 6.44 + (xlat_page_start + xlat_page_size - xlat_page_current) 6.45 + 6.46 +#define xlat_malloc_init(xlat_page_current) do { \ 6.47 + xlat_page_current = xlat_page_start; \ 6.48 +} while (0) 6.49 + 6.50 +static void *xlat_malloc(unsigned long *xlat_page_current, size_t size) 6.51 +{ 6.52 + void *ret; 6.53 + 6.54 + /* normalize size to be 64 * n */ 6.55 + size = (size + 0x3fUL) & ~0x3fUL; 6.56 + 6.57 + if ( unlikely(size > xlat_page_left_size(*xlat_page_current)) ) 6.58 + return NULL; 6.59 + 6.60 + ret = (void *) *xlat_page_current; 6.61 + *xlat_page_current += size; 6.62 + 6.63 + return ret; 6.64 +} 6.65 + 6.66 +#define xlat_malloc_array(_p, _t, _c) ((_t *) xlat_malloc(&_p, sizeof(_t) * _c)) 6.67 + 6.68 +static int copy_from_compat_state(xen_processor_cx_t *xen_state, 6.69 + compat_processor_cx_t *state) 6.70 +{ 6.71 +#define XLAT_processor_cx_HNDL_dp(_d_, _s_) do { \ 6.72 + XEN_GUEST_HANDLE(compat_processor_csd_t) dps; \ 6.73 + if ( unlikely(!compat_handle_okay((_s_)->dp, (_s_)->dpcnt)) ) \ 6.74 + return -EFAULT; \ 6.75 + guest_from_compat_handle(dps, (_s_)->dp); \ 6.76 + (_d_)->dp = guest_handle_cast(dps, xen_processor_csd_t); \ 6.77 +} while (0) 6.78 + XLAT_processor_cx(xen_state, state); 6.79 +#undef XLAT_processor_cx_HNDL_dp 6.80 + 6.81 + return 0; 6.82 +} 6.83 + 6.84 +extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power); 6.85 + 6.86 +long compat_set_cx_pminfo(uint32_t cpu, struct compat_processor_power *power) 6.87 +{ 6.88 + struct xen_processor_power *xen_power; 6.89 + unsigned long xlat_page_current; 6.90 + 6.91 + xlat_malloc_init(xlat_page_current); 6.92 + 6.93 + xen_power = xlat_malloc_array(xlat_page_current, 6.94 + struct xen_processor_power, 1); 6.95 + if ( unlikely(xen_power == NULL) ) 6.96 + return -EFAULT; 6.97 + 6.98 +#define XLAT_processor_power_HNDL_states(_d_, _s_) do { \ 6.99 + xen_processor_cx_t *xen_states = NULL; \ 6.100 +\ 6.101 + if ( likely((_s_)->count > 0) ) \ 6.102 + { \ 6.103 + XEN_GUEST_HANDLE(compat_processor_cx_t) states; \ 6.104 + compat_processor_cx_t state; \ 6.105 + int i; \ 6.106 +\ 6.107 + xen_states = xlat_malloc_array(xlat_page_current, \ 6.108 + xen_processor_cx_t, (_s_)->count); \ 6.109 + if ( unlikely(xen_states == NULL) ) \ 6.110 + return -EFAULT; \ 6.111 +\ 6.112 + if ( unlikely(!compat_handle_okay((_s_)->states, (_s_)->count)) ) \ 6.113 + return -EFAULT; \ 6.114 + guest_from_compat_handle(states, (_s_)->states); \ 6.115 +\ 6.116 + for ( i = 0; i < _s_->count; i++ ) \ 6.117 + { \ 6.118 + if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) \ 6.119 + return -EFAULT; \ 6.120 + if ( unlikely(copy_from_compat_state(&xen_states[i], &state)) ) \ 6.121 + return -EFAULT; \ 6.122 + } \ 6.123 + } \ 6.124 +\ 6.125 + set_xen_guest_handle((_d_)->states, xen_states); \ 6.126 +} while (0) 6.127 + XLAT_processor_power(xen_power, power); 6.128 +#undef XLAT_processor_power_HNDL_states 6.129 + 6.130 + return set_cx_pminfo(cpu, xen_power); 6.131 +}
7.1 --- a/xen/arch/x86/x86_64/platform_hypercall.c Thu May 01 10:33:03 2008 +0100 7.2 +++ b/xen/arch/x86/x86_64/platform_hypercall.c Thu May 01 10:40:01 2008 +0100 7.3 @@ -11,6 +11,10 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_ 7.4 #define xen_platform_op_t compat_platform_op_t 7.5 #define do_platform_op(x) compat_platform_op(_##x) 7.6 7.7 +#define xen_processor_power compat_processor_power 7.8 +#define xen_processor_power_t compat_processor_power_t 7.9 +#define set_cx_pminfo compat_set_cx_pminfo 7.10 + 7.11 #define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep 7.12 7.13 #define COMPAT
8.1 --- a/xen/include/public/platform.h Thu May 01 10:33:03 2008 +0100 8.2 +++ b/xen/include/public/platform.h Thu May 01 10:40:01 2008 +0100 8.3 @@ -199,6 +199,70 @@ struct xenpf_getidletime { 8.4 typedef struct xenpf_getidletime xenpf_getidletime_t; 8.5 DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); 8.6 8.7 +#define XENPF_set_processor_pminfo 54 8.8 + 8.9 +/* ability bits */ 8.10 +#define XEN_PROCESSOR_PM_CX 1 8.11 +#define XEN_PROCESSOR_PM_PX 2 8.12 +#define XEN_PROCESSOR_PM_TX 4 8.13 + 8.14 +/* cmd type */ 8.15 +#define XEN_PM_CX 0 8.16 +#define XEN_PM_PX 1 8.17 +#define XEN_PM_TX 2 8.18 + 8.19 +struct xen_power_register { 8.20 + uint32_t space_id; 8.21 + uint32_t bit_width; 8.22 + uint32_t bit_offset; 8.23 + uint32_t access_size; 8.24 + uint64_t address; 8.25 +}; 8.26 + 8.27 +struct xen_processor_csd { 8.28 + uint32_t domain; /* domain number of one dependent group */ 8.29 + uint32_t coord_type; /* coordination type */ 8.30 + uint32_t num; /* number of processors in same domain */ 8.31 +}; 8.32 +typedef struct xen_processor_csd xen_processor_csd_t; 8.33 +DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); 8.34 + 8.35 +struct xen_processor_cx { 8.36 + struct xen_power_register reg; /* GAS for Cx trigger register */ 8.37 + uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ 8.38 + uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ 8.39 + uint32_t power; /* average power consumption(mW) */ 8.40 + uint32_t dpcnt; /* number of dependency entries */ 8.41 + XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ 8.42 +}; 8.43 +typedef struct xen_processor_cx xen_processor_cx_t; 8.44 +DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); 8.45 + 8.46 +struct xen_processor_flags { 8.47 + uint32_t bm_control:1; 8.48 + uint32_t bm_check:1; 8.49 + uint32_t has_cst:1; 8.50 + uint32_t power_setup_done:1; 8.51 + uint32_t bm_rld_set:1; 8.52 +}; 8.53 + 8.54 +struct xen_processor_power { 8.55 + uint32_t count; /* number of C state entries in array below */ 8.56 + struct xen_processor_flags flags; /* global flags of this processor */ 8.57 + XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ 8.58 +}; 8.59 + 8.60 +struct xenpf_set_processor_pminfo { 8.61 + /* IN variables */ 8.62 + uint32_t id; /* ACPI CPU ID */ 8.63 + uint32_t type; /* {XEN_PM_CX, ...} */ 8.64 + union { 8.65 + struct xen_processor_power power;/* Cx: _CST/_CSD */ 8.66 + }; 8.67 +}; 8.68 +typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; 8.69 +DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); 8.70 + 8.71 struct xen_platform_op { 8.72 uint32_t cmd; 8.73 uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ 8.74 @@ -213,6 +277,7 @@ struct xen_platform_op { 8.75 struct xenpf_enter_acpi_sleep enter_acpi_sleep; 8.76 struct xenpf_change_freq change_freq; 8.77 struct xenpf_getidletime getidletime; 8.78 + struct xenpf_set_processor_pminfo set_pminfo; 8.79 uint8_t pad[128]; 8.80 } u; 8.81 };
9.1 --- a/xen/include/xlat.lst Thu May 01 10:33:03 2008 +0100 9.2 +++ b/xen/include/xlat.lst Thu May 01 10:40:01 2008 +0100 9.3 @@ -44,3 +44,8 @@ 9.4 ! vcpu_runstate_info vcpu.h 9.5 ? xenoprof_init xenoprof.h 9.6 ? xenoprof_passive xenoprof.h 9.7 +! power_register platform.h 9.8 +? processor_csd platform.h 9.9 +! processor_cx platform.h 9.10 +! processor_flags platform.h 9.11 +! processor_power platform.h