debuggers.hg
changeset 20992:ae2b7f1c89c8
cpuidle: do not enter deep C state if there is urgent VCPU
when VCPU is polling on event channel, it usually has urgent task
running, e.g. spin_lock, in this case, it is better for cpuidle driver
not to enter deep C state.
This patch fix the issue that SLES 11 SP1 domain0 hangs in the box of
large number of CPUs (>= 64 CPUs).
Signed-off-by: Yu Ke <ke.yu@intel.com>
Signed-off-by: Tian Kevin <kevin.tian@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
when VCPU is polling on event channel, it usually has urgent task
running, e.g. spin_lock, in this case, it is better for cpuidle driver
not to enter deep C state.
This patch fix the issue that SLES 11 SP1 domain0 hangs in the box of
large number of CPUs (>= 64 CPUs).
Signed-off-by: Yu Ke <ke.yu@intel.com>
Signed-off-by: Tian Kevin <kevin.tian@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Tue Feb 16 09:27:45 2010 +0000 (2010-02-16) |
parents | 3a0bd7ca6b11 |
children | 4a4caf3c052d |
files | xen/arch/x86/acpi/cpu_idle.c xen/common/sched_credit.c xen/common/schedule.c xen/include/xen/sched-if.h xen/include/xen/sched.h |
line diff
1.1 --- a/xen/arch/x86/acpi/cpu_idle.c Mon Feb 15 17:54:04 2010 +0000 1.2 +++ b/xen/arch/x86/acpi/cpu_idle.c Tue Feb 16 09:27:45 2010 +0000 1.3 @@ -41,6 +41,7 @@ 1.4 #include <xen/keyhandler.h> 1.5 #include <xen/cpuidle.h> 1.6 #include <xen/trace.h> 1.7 +#include <xen/sched-if.h> 1.8 #include <asm/cache.h> 1.9 #include <asm/io.h> 1.10 #include <asm/hpet.h> 1.11 @@ -216,6 +217,15 @@ static inline void trace_exit_reason(u32 1.12 } 1.13 } 1.14 1.15 +/* vcpu is urgent if vcpu is polling event channel 1.16 + * 1.17 + * if urgent vcpu exists, CPU should not enter deep C state 1.18 + */ 1.19 +static int sched_has_urgent_vcpu(void) 1.20 +{ 1.21 + return atomic_read(&this_cpu(schedule_data).urgent_count); 1.22 +} 1.23 + 1.24 static void acpi_processor_idle(void) 1.25 { 1.26 struct acpi_processor_power *power = processor_powers[smp_processor_id()]; 1.27 @@ -226,6 +236,26 @@ static void acpi_processor_idle(void) 1.28 u32 exp = 0, pred = 0; 1.29 u32 irq_traced[4] = { 0 }; 1.30 1.31 + if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() && 1.32 + (next_state = cpuidle_current_governor->select(power)) > 0 ) 1.33 + { 1.34 + cx = &power->states[next_state]; 1.35 + if ( power->flags.bm_check && acpi_idle_bm_check() 1.36 + && cx->type == ACPI_STATE_C3 ) 1.37 + cx = power->safe_state; 1.38 + if ( cx->idx > max_cstate ) 1.39 + cx = &power->states[max_cstate]; 1.40 + menu_get_trace_data(&exp, &pred); 1.41 + } 1.42 + if ( !cx ) 1.43 + { 1.44 + if ( pm_idle_save ) 1.45 + pm_idle_save(); 1.46 + else 1.47 + acpi_safe_halt(); 1.48 + return; 1.49 + } 1.50 + 1.51 cpufreq_dbs_timer_suspend(); 1.52 1.53 sched_tick_suspend(); 1.54 @@ -246,28 +276,6 @@ static void acpi_processor_idle(void) 1.55 return; 1.56 } 1.57 1.58 - if ( max_cstate > 0 && power && 1.59 - (next_state = cpuidle_current_governor->select(power)) > 0 ) 1.60 - { 1.61 - cx = &power->states[next_state]; 1.62 - if ( power->flags.bm_check && acpi_idle_bm_check() 1.63 - && cx->type == ACPI_STATE_C3 ) 1.64 - cx = power->safe_state; 1.65 - if ( cx->idx > max_cstate ) 1.66 - cx = &power->states[max_cstate]; 1.67 - menu_get_trace_data(&exp, &pred); 1.68 - } 1.69 - if ( !cx ) 1.70 - { 1.71 - if ( pm_idle_save ) 1.72 - pm_idle_save(); 1.73 - else 1.74 - acpi_safe_halt(); 1.75 - sched_tick_resume(); 1.76 - cpufreq_dbs_timer_resume(); 1.77 - return; 1.78 - } 1.79 - 1.80 power->last_state = cx; 1.81 1.82 /*
2.1 --- a/xen/common/sched_credit.c Mon Feb 15 17:54:04 2010 +0000 2.2 +++ b/xen/common/sched_credit.c Tue Feb 16 09:27:45 2010 +0000 2.3 @@ -1060,6 +1060,7 @@ csched_runq_steal(int peer_cpu, int cpu, 2.4 /* We got a candidate. Grab it! */ 2.5 CSCHED_VCPU_STAT_CRANK(speer, migrate_q); 2.6 CSCHED_STAT_CRANK(migrate_queued); 2.7 + BUG_ON(vc->is_urgent); 2.8 __runq_remove(speer); 2.9 vc->processor = cpu; 2.10 return speer;
3.1 --- a/xen/common/schedule.c Mon Feb 15 17:54:04 2010 +0000 3.2 +++ b/xen/common/schedule.c Tue Feb 16 09:27:45 2010 +0000 3.3 @@ -100,6 +100,29 @@ static inline void trace_continue_runnin 3.4 (unsigned char *)&d); 3.5 } 3.6 3.7 +static inline void vcpu_urgent_count_update(struct vcpu *v) 3.8 +{ 3.9 + if ( is_idle_vcpu(v) ) 3.10 + return; 3.11 + 3.12 + if ( unlikely(v->is_urgent) ) 3.13 + { 3.14 + if ( !test_bit(v->vcpu_id, v->domain->poll_mask) ) 3.15 + { 3.16 + v->is_urgent = 0; 3.17 + atomic_dec(&per_cpu(schedule_data,v->processor).urgent_count); 3.18 + } 3.19 + } 3.20 + else 3.21 + { 3.22 + if ( unlikely(test_bit(v->vcpu_id, v->domain->poll_mask)) ) 3.23 + { 3.24 + v->is_urgent = 1; 3.25 + atomic_inc(&per_cpu(schedule_data,v->processor).urgent_count); 3.26 + } 3.27 + } 3.28 +} 3.29 + 3.30 static inline void vcpu_runstate_change( 3.31 struct vcpu *v, int new_state, s_time_t new_entry_time) 3.32 { 3.33 @@ -108,6 +131,8 @@ static inline void vcpu_runstate_change( 3.34 ASSERT(v->runstate.state != new_state); 3.35 ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock)); 3.36 3.37 + vcpu_urgent_count_update(v); 3.38 + 3.39 trace_runstate_change(v, new_state); 3.40 3.41 delta = new_entry_time - v->runstate.state_entry_time; 3.42 @@ -188,6 +213,8 @@ void sched_destroy_vcpu(struct vcpu *v) 3.43 kill_timer(&v->periodic_timer); 3.44 kill_timer(&v->singleshot_timer); 3.45 kill_timer(&v->poll_timer); 3.46 + if ( test_and_clear_bool(v->is_urgent) ) 3.47 + atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count); 3.48 SCHED_OP(destroy_vcpu, v); 3.49 } 3.50 3.51 @@ -277,7 +304,7 @@ void vcpu_unblock(struct vcpu *v) 3.52 static void vcpu_migrate(struct vcpu *v) 3.53 { 3.54 unsigned long flags; 3.55 - int old_cpu; 3.56 + int old_cpu, new_cpu; 3.57 3.58 vcpu_schedule_lock_irqsave(v, flags); 3.59 3.60 @@ -293,9 +320,23 @@ static void vcpu_migrate(struct vcpu *v) 3.61 return; 3.62 } 3.63 3.64 - /* Switch to new CPU, then unlock old CPU. */ 3.65 + /* Select new CPU. */ 3.66 old_cpu = v->processor; 3.67 - v->processor = SCHED_OP(pick_cpu, v); 3.68 + new_cpu = SCHED_OP(pick_cpu, v); 3.69 + 3.70 + /* 3.71 + * Transfer urgency status to new CPU before switching CPUs, as once 3.72 + * the switch occurs, v->is_urgent is no longer protected by the per-CPU 3.73 + * scheduler lock we are holding. 3.74 + */ 3.75 + if ( unlikely(v->is_urgent) && (old_cpu != new_cpu) ) 3.76 + { 3.77 + atomic_inc(&per_cpu(schedule_data, new_cpu).urgent_count); 3.78 + atomic_dec(&per_cpu(schedule_data, old_cpu).urgent_count); 3.79 + } 3.80 + 3.81 + /* Switch to new CPU, then unlock old CPU. */ 3.82 + v->processor = new_cpu; 3.83 spin_unlock_irqrestore( 3.84 &per_cpu(schedule_data, old_cpu).schedule_lock, flags); 3.85
4.1 --- a/xen/include/xen/sched-if.h Mon Feb 15 17:54:04 2010 +0000 4.2 +++ b/xen/include/xen/sched-if.h Tue Feb 16 09:27:45 2010 +0000 4.3 @@ -16,6 +16,7 @@ struct schedule_data { 4.4 struct vcpu *idle; /* idle task for this cpu */ 4.5 void *sched_priv; 4.6 struct timer s_timer; /* scheduling timer */ 4.7 + atomic_t urgent_count; /* how many urgent vcpus */ 4.8 } __cacheline_aligned; 4.9 4.10 DECLARE_PER_CPU(struct schedule_data, schedule_data);
5.1 --- a/xen/include/xen/sched.h Mon Feb 15 17:54:04 2010 +0000 5.2 +++ b/xen/include/xen/sched.h Tue Feb 16 09:27:45 2010 +0000 5.3 @@ -115,6 +115,8 @@ struct vcpu 5.4 bool_t is_initialised; 5.5 /* Currently running on a CPU? */ 5.6 bool_t is_running; 5.7 + /* VCPU should wake fast (do not deep sleep the CPU). */ 5.8 + bool_t is_urgent; 5.9 5.10 #ifdef VCPU_TRAP_LAST 5.11 #define VCPU_TRAP_NONE 0