debuggers.hg

changeset 20992:ae2b7f1c89c8

cpuidle: do not enter deep C state if there is urgent VCPU

when VCPU is polling on event channel, it usually has urgent task
running, e.g. spin_lock, in this case, it is better for cpuidle driver
not to enter deep C state.

This patch fix the issue that SLES 11 SP1 domain0 hangs in the box of
large number of CPUs (>= 64 CPUs).

Signed-off-by: Yu Ke <ke.yu@intel.com>
Signed-off-by: Tian Kevin <kevin.tian@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 16 09:27:45 2010 +0000 (2010-02-16)
parents 3a0bd7ca6b11
children 4a4caf3c052d
files xen/arch/x86/acpi/cpu_idle.c xen/common/sched_credit.c xen/common/schedule.c xen/include/xen/sched-if.h xen/include/xen/sched.h
line diff
     1.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Mon Feb 15 17:54:04 2010 +0000
     1.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Tue Feb 16 09:27:45 2010 +0000
     1.3 @@ -41,6 +41,7 @@
     1.4  #include <xen/keyhandler.h>
     1.5  #include <xen/cpuidle.h>
     1.6  #include <xen/trace.h>
     1.7 +#include <xen/sched-if.h>
     1.8  #include <asm/cache.h>
     1.9  #include <asm/io.h>
    1.10  #include <asm/hpet.h>
    1.11 @@ -216,6 +217,15 @@ static inline void trace_exit_reason(u32
    1.12      }
    1.13  }
    1.14  
    1.15 +/* vcpu is urgent if vcpu is polling event channel
    1.16 + *
    1.17 + * if urgent vcpu exists, CPU should not enter deep C state
    1.18 + */
    1.19 +static int sched_has_urgent_vcpu(void)
    1.20 +{
    1.21 +    return atomic_read(&this_cpu(schedule_data).urgent_count);
    1.22 +}
    1.23 +
    1.24  static void acpi_processor_idle(void)
    1.25  {
    1.26      struct acpi_processor_power *power = processor_powers[smp_processor_id()];
    1.27 @@ -226,6 +236,26 @@ static void acpi_processor_idle(void)
    1.28      u32 exp = 0, pred = 0;
    1.29      u32 irq_traced[4] = { 0 };
    1.30  
    1.31 +    if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
    1.32 +         (next_state = cpuidle_current_governor->select(power)) > 0 )
    1.33 +    {
    1.34 +        cx = &power->states[next_state];
    1.35 +        if ( power->flags.bm_check && acpi_idle_bm_check()
    1.36 +             && cx->type == ACPI_STATE_C3 )
    1.37 +            cx = power->safe_state;
    1.38 +        if ( cx->idx > max_cstate )
    1.39 +            cx = &power->states[max_cstate];
    1.40 +        menu_get_trace_data(&exp, &pred);
    1.41 +    }
    1.42 +    if ( !cx )
    1.43 +    {
    1.44 +        if ( pm_idle_save )
    1.45 +            pm_idle_save();
    1.46 +        else
    1.47 +            acpi_safe_halt();
    1.48 +        return;
    1.49 +    }
    1.50 +
    1.51      cpufreq_dbs_timer_suspend();
    1.52  
    1.53      sched_tick_suspend();
    1.54 @@ -246,28 +276,6 @@ static void acpi_processor_idle(void)
    1.55          return;
    1.56      }
    1.57  
    1.58 -    if ( max_cstate > 0 && power && 
    1.59 -         (next_state = cpuidle_current_governor->select(power)) > 0 )
    1.60 -    {
    1.61 -        cx = &power->states[next_state];
    1.62 -        if ( power->flags.bm_check && acpi_idle_bm_check()
    1.63 -             && cx->type == ACPI_STATE_C3 )
    1.64 -            cx = power->safe_state;
    1.65 -        if ( cx->idx > max_cstate )
    1.66 -            cx = &power->states[max_cstate];
    1.67 -        menu_get_trace_data(&exp, &pred);
    1.68 -    }
    1.69 -    if ( !cx )
    1.70 -    {
    1.71 -        if ( pm_idle_save )
    1.72 -            pm_idle_save();
    1.73 -        else
    1.74 -            acpi_safe_halt();
    1.75 -        sched_tick_resume();
    1.76 -        cpufreq_dbs_timer_resume();
    1.77 -        return;
    1.78 -    }
    1.79 -
    1.80      power->last_state = cx;
    1.81  
    1.82      /*
     2.1 --- a/xen/common/sched_credit.c	Mon Feb 15 17:54:04 2010 +0000
     2.2 +++ b/xen/common/sched_credit.c	Tue Feb 16 09:27:45 2010 +0000
     2.3 @@ -1060,6 +1060,7 @@ csched_runq_steal(int peer_cpu, int cpu,
     2.4                  /* We got a candidate. Grab it! */
     2.5                  CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
     2.6                  CSCHED_STAT_CRANK(migrate_queued);
     2.7 +                BUG_ON(vc->is_urgent);
     2.8                  __runq_remove(speer);
     2.9                  vc->processor = cpu;
    2.10                  return speer;
     3.1 --- a/xen/common/schedule.c	Mon Feb 15 17:54:04 2010 +0000
     3.2 +++ b/xen/common/schedule.c	Tue Feb 16 09:27:45 2010 +0000
     3.3 @@ -100,6 +100,29 @@ static inline void trace_continue_runnin
     3.4                  (unsigned char *)&d);
     3.5  }
     3.6  
     3.7 +static inline void vcpu_urgent_count_update(struct vcpu *v)
     3.8 +{
     3.9 +    if ( is_idle_vcpu(v) )
    3.10 +        return;
    3.11 +
    3.12 +    if ( unlikely(v->is_urgent) )
    3.13 +    {
    3.14 +        if ( !test_bit(v->vcpu_id, v->domain->poll_mask) )
    3.15 +        {
    3.16 +            v->is_urgent = 0;
    3.17 +            atomic_dec(&per_cpu(schedule_data,v->processor).urgent_count);
    3.18 +        }
    3.19 +    }
    3.20 +    else
    3.21 +    {
    3.22 +        if ( unlikely(test_bit(v->vcpu_id, v->domain->poll_mask)) )
    3.23 +        {
    3.24 +            v->is_urgent = 1;
    3.25 +            atomic_inc(&per_cpu(schedule_data,v->processor).urgent_count);
    3.26 +        }
    3.27 +    }
    3.28 +}
    3.29 +
    3.30  static inline void vcpu_runstate_change(
    3.31      struct vcpu *v, int new_state, s_time_t new_entry_time)
    3.32  {
    3.33 @@ -108,6 +131,8 @@ static inline void vcpu_runstate_change(
    3.34      ASSERT(v->runstate.state != new_state);
    3.35      ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
    3.36  
    3.37 +    vcpu_urgent_count_update(v);
    3.38 +
    3.39      trace_runstate_change(v, new_state);
    3.40  
    3.41      delta = new_entry_time - v->runstate.state_entry_time;
    3.42 @@ -188,6 +213,8 @@ void sched_destroy_vcpu(struct vcpu *v)
    3.43      kill_timer(&v->periodic_timer);
    3.44      kill_timer(&v->singleshot_timer);
    3.45      kill_timer(&v->poll_timer);
    3.46 +    if ( test_and_clear_bool(v->is_urgent) )
    3.47 +        atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count);
    3.48      SCHED_OP(destroy_vcpu, v);
    3.49  }
    3.50  
    3.51 @@ -277,7 +304,7 @@ void vcpu_unblock(struct vcpu *v)
    3.52  static void vcpu_migrate(struct vcpu *v)
    3.53  {
    3.54      unsigned long flags;
    3.55 -    int old_cpu;
    3.56 +    int old_cpu, new_cpu;
    3.57  
    3.58      vcpu_schedule_lock_irqsave(v, flags);
    3.59  
    3.60 @@ -293,9 +320,23 @@ static void vcpu_migrate(struct vcpu *v)
    3.61          return;
    3.62      }
    3.63  
    3.64 -    /* Switch to new CPU, then unlock old CPU. */
    3.65 +    /* Select new CPU. */
    3.66      old_cpu = v->processor;
    3.67 -    v->processor = SCHED_OP(pick_cpu, v);
    3.68 +    new_cpu = SCHED_OP(pick_cpu, v);
    3.69 +
    3.70 +    /*
    3.71 +     * Transfer urgency status to new CPU before switching CPUs, as once
    3.72 +     * the switch occurs, v->is_urgent is no longer protected by the per-CPU
    3.73 +     * scheduler lock we are holding.
    3.74 +     */
    3.75 +    if ( unlikely(v->is_urgent) && (old_cpu != new_cpu) )
    3.76 +    {
    3.77 +        atomic_inc(&per_cpu(schedule_data, new_cpu).urgent_count);
    3.78 +        atomic_dec(&per_cpu(schedule_data, old_cpu).urgent_count);
    3.79 +    }
    3.80 +
    3.81 +    /* Switch to new CPU, then unlock old CPU. */
    3.82 +    v->processor = new_cpu;
    3.83      spin_unlock_irqrestore(
    3.84          &per_cpu(schedule_data, old_cpu).schedule_lock, flags);
    3.85  
     4.1 --- a/xen/include/xen/sched-if.h	Mon Feb 15 17:54:04 2010 +0000
     4.2 +++ b/xen/include/xen/sched-if.h	Tue Feb 16 09:27:45 2010 +0000
     4.3 @@ -16,6 +16,7 @@ struct schedule_data {
     4.4      struct vcpu        *idle;           /* idle task for this cpu          */
     4.5      void               *sched_priv;
     4.6      struct timer        s_timer;        /* scheduling timer                */
     4.7 +    atomic_t            urgent_count;   /* how many urgent vcpus           */
     4.8  } __cacheline_aligned;
     4.9  
    4.10  DECLARE_PER_CPU(struct schedule_data, schedule_data);
     5.1 --- a/xen/include/xen/sched.h	Mon Feb 15 17:54:04 2010 +0000
     5.2 +++ b/xen/include/xen/sched.h	Tue Feb 16 09:27:45 2010 +0000
     5.3 @@ -115,6 +115,8 @@ struct vcpu
     5.4      bool_t           is_initialised;
     5.5      /* Currently running on a CPU? */
     5.6      bool_t           is_running;
     5.7 +    /* VCPU should wake fast (do not deep sleep the CPU). */
     5.8 +    bool_t           is_urgent;
     5.9  
    5.10  #ifdef VCPU_TRAP_LAST
    5.11  #define VCPU_TRAP_NONE    0