debuggers.hg

annotate xen/common/sched_credit.c @ 22848:6341fe0f4e5a

Added tag 4.1.0-rc2 for changeset 9dca60d88c63
author Keir Fraser <keir@xen.org>
date Tue Jan 25 14:06:55 2011 +0000 (2011-01-25)
parents e8acb9753ff1
children 700ac6445812
rev   line source
ack@10206 1 /****************************************************************************
ack@10206 2 * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
ack@10206 3 ****************************************************************************
ack@10206 4 *
ack@10206 5 * File: common/csched_credit.c
ack@10206 6 * Author: Emmanuel Ackaouy
ack@10206 7 *
ack@10206 8 * Description: Credit-based SMP CPU scheduler
ack@10206 9 */
ack@10206 10
ack@10206 11 #include <xen/config.h>
ack@10206 12 #include <xen/init.h>
ack@10206 13 #include <xen/lib.h>
ack@10206 14 #include <xen/sched.h>
ack@10206 15 #include <xen/domain.h>
ack@10206 16 #include <xen/delay.h>
ack@10206 17 #include <xen/event.h>
ack@10206 18 #include <xen/time.h>
ack@10206 19 #include <xen/perfc.h>
ack@10206 20 #include <xen/sched-if.h>
ack@10206 21 #include <xen/softirq.h>
ack@10206 22 #include <asm/atomic.h>
kaf24@11236 23 #include <xen/errno.h>
keir@20975 24 #include <xen/keyhandler.h>
ack@10206 25
ack@10206 26 /*
ack@10206 27 * CSCHED_STATS
ack@10206 28 *
keir@19335 29 * Manage very basic per-vCPU counters and stats.
ack@10206 30 *
ack@10206 31 * Useful for debugging live systems. The stats are displayed
ack@10206 32 * with runq dumps ('r' on the Xen console).
ack@10206 33 */
keir@19335 34 #ifdef PERF_COUNTERS
ack@10206 35 #define CSCHED_STATS
keir@19335 36 #endif
ack@10206 37
ack@10206 38
ack@10206 39 /*
ack@10206 40 * Basic constants
ack@10206 41 */
ack@12071 42 #define CSCHED_DEFAULT_WEIGHT 256
ack@12071 43 #define CSCHED_TICKS_PER_TSLICE 3
ack@12071 44 #define CSCHED_TICKS_PER_ACCT 3
ack@12071 45 #define CSCHED_MSECS_PER_TICK 10
ack@12071 46 #define CSCHED_MSECS_PER_TSLICE \
ack@12071 47 (CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
keir@20308 48 #define CSCHED_CREDITS_PER_MSEC 10
ack@12071 49 #define CSCHED_CREDITS_PER_TSLICE \
keir@20308 50 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TSLICE)
ack@12071 51 #define CSCHED_CREDITS_PER_ACCT \
keir@20308 52 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_ACCT)
ack@10206 53
ack@10206 54
ack@10206 55 /*
ack@10206 56 * Priorities
ack@10206 57 */
ack@12048 58 #define CSCHED_PRI_TS_BOOST 0 /* time-share waking up */
ack@10206 59 #define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
ack@10206 60 #define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
ack@10206 61 #define CSCHED_PRI_IDLE -64 /* idle */
ack@13046 62
ack@13046 63
ack@13046 64 /*
ack@13046 65 * Flags
ack@13046 66 */
keir@21982 67 #define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
keir@21982 68 #define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
ack@10206 69
ack@10206 70
ack@10206 71 /*
ack@10206 72 * Useful macros
ack@10206 73 */
keir@21258 74 #define CSCHED_PRIV(_ops) \
keir@21258 75 ((struct csched_private *)((_ops)->sched_data))
kaf24@11017 76 #define CSCHED_PCPU(_c) \
kaf24@11017 77 ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
ack@10206 78 #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
ack@10206 79 #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
ack@10206 80 #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
keir@21258 81 #define CSCHED_CPUONLINE(_pool) \
keir@21258 82 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
ack@10206 83
ack@10206 84
ack@10206 85 /*
ack@10206 86 * Stats
ack@10206 87 */
keir@19335 88 #define CSCHED_STAT_CRANK(_X) (perfc_incr(_X))
ack@10206 89
keir@19335 90 #ifdef CSCHED_STATS
ack@12941 91
ack@12941 92 #define CSCHED_VCPU_STATS_RESET(_V) \
ack@12941 93 do \
ack@12941 94 { \
ack@12941 95 memset(&(_V)->stats, 0, sizeof((_V)->stats)); \
ack@12941 96 } while ( 0 )
ack@12941 97
ack@12941 98 #define CSCHED_VCPU_STAT_CRANK(_V, _X) (((_V)->stats._X)++)
ack@12941 99
ack@12941 100 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) (((_V)->stats._X) = (_Y))
ack@10206 101
ack@10206 102 #else /* CSCHED_STATS */
ack@10206 103
ack@12941 104 #define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 )
ack@12941 105 #define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 )
ack@12941 106 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 )
ack@10206 107
ack@10206 108 #endif /* CSCHED_STATS */
ack@10206 109
ack@10206 110
ack@10206 111 /*
keir@21982 112 * Boot parameters
keir@21982 113 */
keir@22676 114 static bool_t __read_mostly sched_credit_default_yield;
keir@21982 115 boolean_param("sched_credit_default_yield", sched_credit_default_yield);
keir@21982 116
keir@21982 117 /*
ack@10206 118 * Physical CPU
ack@10206 119 */
ack@10206 120 struct csched_pcpu {
ack@10206 121 struct list_head runq;
ack@10206 122 uint32_t runq_sort_last;
kfraser@14358 123 struct timer ticker;
kfraser@14358 124 unsigned int tick;
keir@20423 125 unsigned int idle_bias;
ack@10206 126 };
ack@10206 127
ack@10206 128 /*
ack@10206 129 * Virtual CPU
ack@10206 130 */
ack@10206 131 struct csched_vcpu {
ack@10206 132 struct list_head runq_elem;
ack@10206 133 struct list_head active_vcpu_elem;
ack@10206 134 struct csched_dom *sdom;
ack@10206 135 struct vcpu *vcpu;
ack@10206 136 atomic_t credit;
keir@20160 137 s_time_t start_time; /* When we were scheduled (used for credit) */
ack@13046 138 uint16_t flags;
ack@10206 139 int16_t pri;
ack@12941 140 #ifdef CSCHED_STATS
ack@12073 141 struct {
ack@12073 142 int credit_last;
ack@12073 143 uint32_t credit_incr;
ack@12073 144 uint32_t state_active;
ack@12073 145 uint32_t state_idle;
ack@12941 146 uint32_t migrate_q;
ack@12941 147 uint32_t migrate_r;
ack@12073 148 } stats;
ack@12941 149 #endif
ack@10206 150 };
ack@10206 151
ack@10206 152 /*
ack@10206 153 * Domain
ack@10206 154 */
ack@10206 155 struct csched_dom {
ack@10206 156 struct list_head active_vcpu;
ack@10206 157 struct list_head active_sdom_elem;
ack@10206 158 struct domain *dom;
ack@10206 159 uint16_t active_vcpu_count;
ack@10206 160 uint16_t weight;
ack@10206 161 uint16_t cap;
ack@10206 162 };
ack@10206 163
ack@10206 164 /*
ack@10206 165 * System-wide private data
ack@10206 166 */
ack@10206 167 struct csched_private {
ack@10206 168 spinlock_t lock;
ack@10206 169 struct list_head active_sdom;
ack@10206 170 uint32_t ncpus;
keir@19498 171 struct timer master_ticker;
ack@10206 172 unsigned int master;
ack@10206 173 cpumask_t idlers;
keir@21258 174 cpumask_t cpus;
ack@10206 175 uint32_t weight;
ack@10206 176 uint32_t credit;
ack@10206 177 int credit_balance;
ack@10206 178 uint32_t runq_sort;
ack@10206 179 };
ack@10206 180
kfraser@14358 181 static void csched_tick(void *_cpu);
keir@21258 182 static void csched_acct(void *dummy);
ack@10206 183
ack@10206 184 static inline int
ack@10206 185 __vcpu_on_runq(struct csched_vcpu *svc)
ack@10206 186 {
ack@10206 187 return !list_empty(&svc->runq_elem);
ack@10206 188 }
ack@10206 189
ack@10206 190 static inline struct csched_vcpu *
ack@10206 191 __runq_elem(struct list_head *elem)
ack@10206 192 {
ack@10206 193 return list_entry(elem, struct csched_vcpu, runq_elem);
ack@10206 194 }
ack@10206 195
ack@10206 196 static inline void
ack@10206 197 __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
ack@10206 198 {
ack@10206 199 const struct list_head * const runq = RUNQ(cpu);
ack@10206 200 struct list_head *iter;
ack@10206 201
ack@10206 202 BUG_ON( __vcpu_on_runq(svc) );
ack@10206 203 BUG_ON( cpu != svc->vcpu->processor );
ack@10206 204
ack@10206 205 list_for_each( iter, runq )
ack@10206 206 {
ack@10206 207 const struct csched_vcpu * const iter_svc = __runq_elem(iter);
ack@10206 208 if ( svc->pri > iter_svc->pri )
ack@10206 209 break;
ack@10206 210 }
ack@10206 211
keir@21982 212 /* If the vcpu yielded, try to put it behind one lower-priority
keir@21982 213 * runnable vcpu if we can. The next runq_sort will bring it forward
keir@21982 214 * within 30ms if the queue too long. */
keir@21982 215 if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
keir@21982 216 && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
keir@21982 217 {
keir@21982 218 iter=iter->next;
keir@21982 219
keir@21982 220 /* Some sanity checks */
keir@21982 221 BUG_ON(iter == runq);
keir@21982 222 }
keir@21982 223
ack@10206 224 list_add_tail(&svc->runq_elem, iter);
ack@10206 225 }
ack@10206 226
ack@10206 227 static inline void
ack@10206 228 __runq_remove(struct csched_vcpu *svc)
ack@10206 229 {
ack@10206 230 BUG_ON( !__vcpu_on_runq(svc) );
ack@10206 231 list_del_init(&svc->runq_elem);
ack@10206 232 }
ack@10206 233
keir@20300 234 static void burn_credits(struct csched_vcpu *svc, s_time_t now)
keir@20160 235 {
keir@20160 236 s_time_t delta;
keir@20308 237 unsigned int credits;
keir@20160 238
keir@20160 239 /* Assert svc is current */
keir@20160 240 ASSERT(svc==CSCHED_VCPU(per_cpu(schedule_data, svc->vcpu->processor).curr));
keir@20160 241
keir@20308 242 if ( (delta = now - svc->start_time) <= 0 )
keir@20160 243 return;
keir@20160 244
keir@20308 245 credits = (delta*CSCHED_CREDITS_PER_MSEC + MILLISECS(1)/2) / MILLISECS(1);
keir@20308 246 atomic_sub(credits, &svc->credit);
keir@20308 247 svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
keir@20160 248 }
keir@20160 249
keir@22676 250 static bool_t __read_mostly opt_tickle_one_idle = 1;
keir@21145 251 boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
keir@21145 252
keir@21462 253 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
keir@21145 254
ack@10206 255 static inline void
ack@10206 256 __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
ack@10206 257 {
kaf24@11017 258 struct csched_vcpu * const cur =
kaf24@11017 259 CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
keir@21258 260 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
ack@10206 261 cpumask_t mask;
ack@10206 262
ack@10206 263 ASSERT(cur);
ack@10206 264 cpus_clear(mask);
ack@10206 265
ack@10206 266 /* If strictly higher priority than current VCPU, signal the CPU */
ack@10206 267 if ( new->pri > cur->pri )
ack@10206 268 {
ack@10206 269 if ( cur->pri == CSCHED_PRI_IDLE )
ack@10206 270 CSCHED_STAT_CRANK(tickle_local_idler);
ack@10206 271 else if ( cur->pri == CSCHED_PRI_TS_OVER )
ack@10206 272 CSCHED_STAT_CRANK(tickle_local_over);
ack@10206 273 else if ( cur->pri == CSCHED_PRI_TS_UNDER )
ack@10206 274 CSCHED_STAT_CRANK(tickle_local_under);
ack@10206 275 else
ack@10206 276 CSCHED_STAT_CRANK(tickle_local_other);
ack@10206 277
ack@10206 278 cpu_set(cpu, mask);
ack@10206 279 }
ack@10206 280
ack@10206 281 /*
ack@10206 282 * If this CPU has at least two runnable VCPUs, we tickle any idlers to
ack@10206 283 * let them know there is runnable work in the system...
ack@10206 284 */
ack@10206 285 if ( cur->pri > CSCHED_PRI_IDLE )
ack@10206 286 {
keir@21258 287 if ( cpus_empty(prv->idlers) )
ack@10206 288 {
ack@10206 289 CSCHED_STAT_CRANK(tickle_idlers_none);
ack@10206 290 }
ack@10206 291 else
ack@10206 292 {
keir@21145 293 cpumask_t idle_mask;
keir@21145 294
keir@21258 295 cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
keir@21145 296 if ( !cpus_empty(idle_mask) )
keir@21145 297 {
keir@21145 298 CSCHED_STAT_CRANK(tickle_idlers_some);
keir@21145 299 if ( opt_tickle_one_idle )
keir@21145 300 {
keir@21145 301 this_cpu(last_tickle_cpu) =
keir@21145 302 cycle_cpu(this_cpu(last_tickle_cpu), idle_mask);
keir@21145 303 cpu_set(this_cpu(last_tickle_cpu), mask);
keir@21145 304 }
keir@21145 305 else
keir@21145 306 cpus_or(mask, mask, idle_mask);
keir@21145 307 }
kaf24@11519 308 cpus_and(mask, mask, new->vcpu->cpu_affinity);
ack@10206 309 }
ack@10206 310 }
ack@10206 311
ack@10206 312 /* Send scheduler interrupts to designated CPUs */
ack@10206 313 if ( !cpus_empty(mask) )
ack@10206 314 cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
ack@10206 315 }
ack@10206 316
keir@21258 317 static void
keir@21327 318 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
keir@21258 319 {
keir@21258 320 struct csched_private *prv = CSCHED_PRIV(ops);
keir@21258 321 struct csched_pcpu *spc = pcpu;
keir@21258 322 unsigned long flags;
keir@21258 323
keir@21258 324 if ( spc == NULL )
keir@21258 325 return;
keir@21258 326
keir@21258 327 spin_lock_irqsave(&prv->lock, flags);
keir@21258 328
keir@21258 329 prv->credit -= CSCHED_CREDITS_PER_ACCT;
keir@21258 330 prv->ncpus--;
keir@21258 331 cpu_clear(cpu, prv->idlers);
keir@21258 332 cpu_clear(cpu, prv->cpus);
keir@21258 333 if ( (prv->master == cpu) && (prv->ncpus > 0) )
keir@21258 334 {
keir@21258 335 prv->master = first_cpu(prv->cpus);
keir@21258 336 migrate_timer(&prv->master_ticker, prv->master);
keir@21258 337 }
keir@21258 338 kill_timer(&spc->ticker);
keir@21258 339 if ( prv->ncpus == 0 )
keir@21258 340 kill_timer(&prv->master_ticker);
keir@21258 341
keir@21258 342 spin_unlock_irqrestore(&prv->lock, flags);
keir@21258 343
keir@21258 344 xfree(spc);
keir@21258 345 }
keir@21258 346
keir@21258 347 static void *
keir@21327 348 csched_alloc_pdata(const struct scheduler *ops, int cpu)
ack@10206 349 {
ack@10206 350 struct csched_pcpu *spc;
keir@21258 351 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 352 unsigned long flags;
ack@10206 353
kfraser@10930 354 /* Allocate per-PCPU info */
kfraser@10930 355 spc = xmalloc(struct csched_pcpu);
kfraser@10930 356 if ( spc == NULL )
keir@21258 357 return NULL;
keir@20308 358 memset(spc, 0, sizeof(*spc));
kfraser@10930 359
keir@21258 360 spin_lock_irqsave(&prv->lock, flags);
ack@10206 361
ack@10206 362 /* Initialize/update system-wide config */
keir@21258 363 prv->credit += CSCHED_CREDITS_PER_ACCT;
keir@21258 364 prv->ncpus++;
keir@21258 365 cpu_set(cpu, prv->cpus);
keir@21453 366 if ( prv->ncpus == 1 )
keir@21258 367 {
keir@21258 368 prv->master = cpu;
keir@21453 369 init_timer(&prv->master_ticker, csched_acct, prv, cpu);
keir@21453 370 set_timer(&prv->master_ticker, NOW() +
keir@21453 371 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
keir@21258 372 }
ack@10206 373
kfraser@14358 374 init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
keir@21453 375 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
keir@21258 376
ack@10206 377 INIT_LIST_HEAD(&spc->runq);
keir@21258 378 spc->runq_sort_last = prv->runq_sort;
keir@20423 379 spc->idle_bias = NR_CPUS - 1;
keir@21258 380 if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
keir@21258 381 per_cpu(schedule_data, cpu).sched_priv = spc;
ack@10206 382
ack@10206 383 /* Start off idling... */
kfraser@14358 384 BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
keir@21258 385 cpu_set(cpu, prv->idlers);
ack@10206 386
keir@21258 387 spin_unlock_irqrestore(&prv->lock, flags);
kfraser@10930 388
keir@21258 389 return spc;
ack@10206 390 }
ack@10206 391
ack@10206 392 #ifndef NDEBUG
ack@10206 393 static inline void
ack@10206 394 __csched_vcpu_check(struct vcpu *vc)
ack@10206 395 {
ack@10206 396 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
ack@10206 397 struct csched_dom * const sdom = svc->sdom;
ack@10206 398
ack@10206 399 BUG_ON( svc->vcpu != vc );
ack@10206 400 BUG_ON( sdom != CSCHED_DOM(vc->domain) );
ack@10206 401 if ( sdom )
ack@10206 402 {
ack@10206 403 BUG_ON( is_idle_vcpu(vc) );
ack@10206 404 BUG_ON( sdom->dom != vc->domain );
ack@10206 405 }
ack@10206 406 else
ack@10206 407 {
ack@10206 408 BUG_ON( !is_idle_vcpu(vc) );
ack@10206 409 }
ack@10206 410
ack@10206 411 CSCHED_STAT_CRANK(vcpu_check);
ack@10206 412 }
ack@10206 413 #define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
ack@10206 414 #else
ack@10206 415 #define CSCHED_VCPU_CHECK(_vc)
ack@10206 416 #endif
ack@10206 417
keir@19331 418 /*
keir@19331 419 * Delay, in microseconds, between migrations of a VCPU between PCPUs.
keir@19331 420 * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
keir@19331 421 * implicit overheads such as cache-warming. 1ms (1000) has been measured
keir@19331 422 * as a good value.
keir@19331 423 */
keir@19331 424 static unsigned int vcpu_migration_delay;
keir@19331 425 integer_param("vcpu_migration_delay", vcpu_migration_delay);
keir@19331 426
keir@19540 427 void set_vcpu_migration_delay(unsigned int delay)
keir@19540 428 {
keir@19540 429 vcpu_migration_delay = delay;
keir@19540 430 }
keir@19540 431
keir@19540 432 unsigned int get_vcpu_migration_delay(void)
keir@19540 433 {
keir@19540 434 return vcpu_migration_delay;
keir@19540 435 }
keir@19540 436
keir@19331 437 static inline int
keir@19331 438 __csched_vcpu_is_cache_hot(struct vcpu *v)
keir@19331 439 {
keir@19346 440 int hot = ((NOW() - v->last_run_time) <
keir@19331 441 ((uint64_t)vcpu_migration_delay * 1000u));
keir@19331 442
keir@19331 443 if ( hot )
keir@19331 444 CSCHED_STAT_CRANK(vcpu_hot);
keir@19331 445
keir@19331 446 return hot;
keir@19331 447 }
keir@19331 448
ack@10206 449 static inline int
ack@12941 450 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
ack@10206 451 {
ack@10206 452 /*
keir@19331 453 * Don't pick up work that's in the peer's scheduling tail or hot on
keir@19331 454 * peer PCPU. Only pick up work that's allowed to run on our CPU.
ack@10206 455 */
keir@19331 456 return !vc->is_running &&
keir@19331 457 !__csched_vcpu_is_cache_hot(vc) &&
keir@19331 458 cpu_isset(dest_cpu, vc->cpu_affinity);
ack@10206 459 }
ack@10206 460
ack@12941 461 static int
keir@21327 462 _csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc, bool_t commit)
ack@12291 463 {
ack@12941 464 cpumask_t cpus;
ack@12941 465 cpumask_t idlers;
keir@21258 466 cpumask_t *online;
ack@12941 467 int cpu;
ack@12941 468
ack@12941 469 /*
ack@12941 470 * Pick from online CPUs in VCPU's affinity mask, giving a
ack@12941 471 * preference to its current processor if it's in there.
ack@12941 472 */
keir@21258 473 online = CSCHED_CPUONLINE(vc->domain->cpupool);
keir@21258 474 cpus_and(cpus, *online, vc->cpu_affinity);
ack@12941 475 cpu = cpu_isset(vc->processor, cpus)
ack@12941 476 ? vc->processor
keir@19314 477 : cycle_cpu(vc->processor, cpus);
ack@12941 478 ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
ack@12291 479
ack@12941 480 /*
ack@12941 481 * Try to find an idle processor within the above constraints.
ack@12941 482 *
ack@12941 483 * In multi-core and multi-threaded CPUs, not all idle execution
ack@12941 484 * vehicles are equal!
ack@12941 485 *
ack@12941 486 * We give preference to the idle execution vehicle with the most
ack@12941 487 * idling neighbours in its grouping. This distributes work across
ack@12941 488 * distinct cores first and guarantees we don't do something stupid
ack@12941 489 * like run two VCPUs on co-hyperthreads while there are idle cores
ack@12941 490 * or sockets.
ack@12941 491 */
keir@21258 492 cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
ack@12941 493 cpu_set(cpu, idlers);
ack@12941 494 cpus_and(cpus, cpus, idlers);
ack@12941 495 cpu_clear(cpu, cpus);
ack@12941 496
ack@12941 497 while ( !cpus_empty(cpus) )
ack@12291 498 {
ack@12941 499 cpumask_t cpu_idlers;
ack@12941 500 cpumask_t nxt_idlers;
keir@19450 501 int nxt, weight_cpu, weight_nxt;
keir@22226 502 int migrate_factor;
ack@12941 503
keir@19314 504 nxt = cycle_cpu(cpu, cpus);
ack@12941 505
keir@19965 506 if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
ack@12941 507 {
keir@22226 508 /* We're on the same socket, so check the busy-ness of threads.
keir@22226 509 * Migrate if # of idlers is less at all */
keir@19965 510 ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
keir@22226 511 migrate_factor = 1;
keir@19965 512 cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
keir@19965 513 cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
ack@12941 514 }
ack@12941 515 else
ack@12941 516 {
keir@22226 517 /* We're on different sockets, so check the busy-ness of cores.
keir@22226 518 * Migrate only if the other core is twice as idle */
keir@19965 519 ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
keir@22226 520 migrate_factor = 2;
keir@19965 521 cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
keir@19965 522 cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
ack@12941 523 }
ack@12941 524
keir@19450 525 weight_cpu = cpus_weight(cpu_idlers);
keir@19450 526 weight_nxt = cpus_weight(nxt_idlers);
keir@22226 527 /* smt_power_savings: consolidate work rather than spreading it */
keir@22226 528 if ( ( sched_smt_power_savings
keir@22226 529 && (weight_cpu > weight_nxt) )
keir@22226 530 || ( !sched_smt_power_savings
keir@22226 531 && (weight_cpu * migrate_factor < weight_nxt) ) )
ack@12941 532 {
keir@20423 533 cpu = cycle_cpu(CSCHED_PCPU(nxt)->idle_bias, nxt_idlers);
keir@20423 534 if ( commit )
keir@20423 535 CSCHED_PCPU(nxt)->idle_bias = cpu;
keir@20423 536 cpus_andnot(cpus, cpus, per_cpu(cpu_sibling_map, cpu));
ack@12941 537 }
ack@12941 538 else
ack@12941 539 {
ack@12941 540 cpus_andnot(cpus, cpus, nxt_idlers);
ack@12941 541 }
ack@12291 542 }
ack@12291 543
ack@12941 544 return cpu;
ack@12291 545 }
ack@12291 546
keir@20423 547 static int
keir@21327 548 csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
keir@20423 549 {
keir@21258 550 return _csched_cpu_pick(ops, vc, 1);
keir@20423 551 }
keir@20423 552
ack@12941 553 static inline void
keir@21258 554 __csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
ack@10206 555 {
ack@10206 556 struct csched_dom * const sdom = svc->sdom;
ack@10206 557 unsigned long flags;
ack@10206 558
keir@21258 559 spin_lock_irqsave(&prv->lock, flags);
ack@10206 560
ack@10206 561 if ( list_empty(&svc->active_vcpu_elem) )
ack@10206 562 {
ack@12941 563 CSCHED_VCPU_STAT_CRANK(svc, state_active);
ack@12941 564 CSCHED_STAT_CRANK(acct_vcpu_active);
ack@10206 565
ack@12941 566 sdom->active_vcpu_count++;
ack@12941 567 list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
keir@22026 568 /* Make weight per-vcpu */
keir@22026 569 prv->weight += sdom->weight;
ack@12941 570 if ( list_empty(&sdom->active_sdom_elem) )
ack@10206 571 {
keir@21258 572 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
ack@12941 573 }
ack@12941 574 }
ack@12941 575
keir@21258 576 spin_unlock_irqrestore(&prv->lock, flags);
ack@12941 577 }
ack@12941 578
ack@12941 579 static inline void
keir@21258 580 __csched_vcpu_acct_stop_locked(struct csched_private *prv,
keir@21258 581 struct csched_vcpu *svc)
ack@12941 582 {
ack@12941 583 struct csched_dom * const sdom = svc->sdom;
ack@10206 584
ack@12941 585 BUG_ON( list_empty(&svc->active_vcpu_elem) );
ack@12941 586
ack@12941 587 CSCHED_VCPU_STAT_CRANK(svc, state_idle);
ack@12941 588 CSCHED_STAT_CRANK(acct_vcpu_idle);
ack@10206 589
keir@22026 590 BUG_ON( prv->weight < sdom->weight );
ack@12941 591 sdom->active_vcpu_count--;
ack@12941 592 list_del_init(&svc->active_vcpu_elem);
keir@22026 593 prv->weight -= sdom->weight;
ack@12941 594 if ( list_empty(&sdom->active_vcpu) )
ack@12941 595 {
ack@12941 596 list_del_init(&sdom->active_sdom_elem);
ack@10206 597 }
ack@12941 598 }
ack@12941 599
ack@12941 600 static void
keir@21258 601 csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
ack@12941 602 {
ack@12941 603 struct csched_vcpu * const svc = CSCHED_VCPU(current);
keir@21327 604 const struct scheduler *ops = per_cpu(scheduler, cpu);
ack@12941 605
ack@12941 606 ASSERT( current->processor == cpu );
ack@12941 607 ASSERT( svc->sdom != NULL );
ack@12048 608
ack@12048 609 /*
ack@12048 610 * If this VCPU's priority was boosted when it last awoke, reset it.
ack@12048 611 * If the VCPU is found here, then it's consuming a non-negligeable
ack@12048 612 * amount of CPU resources and should no longer be boosted.
ack@12048 613 */
ack@12048 614 if ( svc->pri == CSCHED_PRI_TS_BOOST )
ack@12048 615 svc->pri = CSCHED_PRI_TS_UNDER;
ack@10206 616
ack@12941 617 /*
ack@12941 618 * Update credits
ack@12941 619 */
keir@20308 620 if ( !is_idle_vcpu(svc->vcpu) )
keir@20308 621 burn_credits(svc, NOW());
ack@10206 622
ack@12941 623 /*
ack@12941 624 * Put this VCPU and domain back on the active list if it was
ack@12941 625 * idling.
ack@12941 626 *
ack@12941 627 * If it's been active a while, check if we'd be better off
ack@12941 628 * migrating it to run elsewhere (see multi-core and multi-thread
ack@12941 629 * support in csched_cpu_pick()).
ack@12941 630 */
ack@12941 631 if ( list_empty(&svc->active_vcpu_elem) )
ack@10206 632 {
keir@21258 633 __csched_vcpu_acct_start(prv, svc);
ack@12941 634 }
keir@21258 635 else if ( _csched_cpu_pick(ops, current, 0) != cpu )
ack@12941 636 {
ack@12941 637 CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
ack@12941 638 CSCHED_STAT_CRANK(migrate_running);
kfraser@14698 639 set_bit(_VPF_migrating, &current->pause_flags);
ack@12941 640 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
ack@10206 641 }
ack@10206 642 }
ack@10206 643
keir@21258 644 static void *
keir@21327 645 csched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
ack@10206 646 {
ack@10206 647 struct csched_vcpu *svc;
ack@10206 648
ack@10206 649 /* Allocate per-VCPU info */
ack@10206 650 svc = xmalloc(struct csched_vcpu);
kfraser@12284 651 if ( svc == NULL )
keir@21258 652 return NULL;
keir@20308 653 memset(svc, 0, sizeof(*svc));
ack@10206 654
ack@10206 655 INIT_LIST_HEAD(&svc->runq_elem);
ack@10206 656 INIT_LIST_HEAD(&svc->active_vcpu_elem);
keir@21258 657 svc->sdom = dd;
ack@10206 658 svc->vcpu = vc;
ack@10206 659 atomic_set(&svc->credit, 0);
ack@13046 660 svc->flags = 0U;
keir@21258 661 svc->pri = is_idle_domain(vc->domain) ?
keir@21258 662 CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
ack@12941 663 CSCHED_VCPU_STATS_RESET(svc);
keir@21258 664 CSCHED_STAT_CRANK(vcpu_init);
keir@21258 665 return svc;
keir@21258 666 }
ack@10206 667
keir@21258 668 static void
keir@21327 669 csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
keir@21258 670 {
keir@21258 671 struct csched_vcpu *svc = vc->sched_priv;
ack@10206 672
keir@21258 673 if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
keir@21258 674 __runq_insert(vc->processor, svc);
ack@10206 675 }
ack@10206 676
ack@10206 677 static void
keir@21327 678 csched_free_vdata(const struct scheduler *ops, void *priv)
keir@21258 679 {
keir@22324 680 struct csched_vcpu *svc = priv;
keir@22324 681
keir@22324 682 BUG_ON( !list_empty(&svc->runq_elem) );
keir@22324 683
keir@22324 684 xfree(svc);
keir@22324 685 }
keir@22324 686
keir@22324 687 static void
keir@22324 688 csched_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
keir@22324 689 {
keir@21258 690 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22324 691 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@22324 692 struct csched_dom * const sdom = svc->sdom;
keir@21258 693 unsigned long flags;
keir@21258 694
keir@22324 695 CSCHED_STAT_CRANK(vcpu_destroy);
keir@22324 696
keir@21258 697 if ( __vcpu_on_runq(svc) )
keir@21258 698 __runq_remove(svc);
keir@21258 699
keir@21258 700 spin_lock_irqsave(&(prv->lock), flags);
keir@21258 701
keir@21258 702 if ( !list_empty(&svc->active_vcpu_elem) )
keir@21258 703 __csched_vcpu_acct_stop_locked(prv, svc);
keir@21258 704
keir@21258 705 spin_unlock_irqrestore(&(prv->lock), flags);
keir@21258 706
ack@10206 707 BUG_ON( sdom == NULL );
ack@10206 708 BUG_ON( !list_empty(&svc->runq_elem) );
ack@10206 709 }
ack@10206 710
ack@10206 711 static void
keir@21327 712 csched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
ack@10206 713 {
ack@10206 714 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
ack@10206 715
ack@10206 716 CSCHED_STAT_CRANK(vcpu_sleep);
ack@10206 717
ack@10206 718 BUG_ON( is_idle_vcpu(vc) );
ack@10206 719
kaf24@11017 720 if ( per_cpu(schedule_data, vc->processor).curr == vc )
ack@10206 721 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
ack@10206 722 else if ( __vcpu_on_runq(svc) )
ack@10206 723 __runq_remove(svc);
ack@10206 724 }
ack@10206 725
ack@10206 726 static void
keir@21327 727 csched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
ack@10206 728 {
ack@10206 729 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
ack@10206 730 const unsigned int cpu = vc->processor;
ack@10206 731
ack@10206 732 BUG_ON( is_idle_vcpu(vc) );
ack@10206 733
kaf24@11017 734 if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) )
ack@10206 735 {
ack@10206 736 CSCHED_STAT_CRANK(vcpu_wake_running);
ack@10206 737 return;
ack@10206 738 }
ack@10206 739 if ( unlikely(__vcpu_on_runq(svc)) )
ack@10206 740 {
ack@10206 741 CSCHED_STAT_CRANK(vcpu_wake_onrunq);
ack@10206 742 return;
ack@10206 743 }
ack@10206 744
ack@10206 745 if ( likely(vcpu_runnable(vc)) )
ack@10206 746 CSCHED_STAT_CRANK(vcpu_wake_runnable);
ack@10206 747 else
ack@10206 748 CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
ack@10206 749
ack@12048 750 /*
ack@12048 751 * We temporarly boost the priority of awaking VCPUs!
ack@12048 752 *
ack@12048 753 * If this VCPU consumes a non negligeable amount of CPU, it
ack@12048 754 * will eventually find itself in the credit accounting code
ack@12048 755 * path where its priority will be reset to normal.
ack@12048 756 *
ack@12048 757 * If on the other hand the VCPU consumes little CPU and is
ack@12048 758 * blocking and awoken a lot (doing I/O for example), its
ack@12048 759 * priority will remain boosted, optimizing it's wake-to-run
ack@12048 760 * latencies.
ack@12048 761 *
ack@12048 762 * This allows wake-to-run latency sensitive VCPUs to preempt
ack@12048 763 * more CPU resource intensive VCPUs without impacting overall
ack@12048 764 * system fairness.
ack@13046 765 *
ack@13046 766 * The one exception is for VCPUs of capped domains unpausing
ack@13046 767 * after earning credits they had overspent. We don't boost
ack@13046 768 * those.
ack@12048 769 */
ack@13046 770 if ( svc->pri == CSCHED_PRI_TS_UNDER &&
ack@13046 771 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
ack@13046 772 {
ack@12048 773 svc->pri = CSCHED_PRI_TS_BOOST;
ack@13046 774 }
ack@12048 775
ack@10206 776 /* Put the VCPU on the runq and tickle CPUs */
ack@10206 777 __runq_insert(cpu, svc);
ack@10206 778 __runq_tickle(cpu, svc);
ack@10206 779 }
ack@10206 780
keir@21982 781 static void
keir@21982 782 csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
keir@21982 783 {
keir@21982 784 struct csched_vcpu * const sv = CSCHED_VCPU(vc);
keir@21982 785
keir@21982 786 if ( !sched_credit_default_yield )
keir@21982 787 {
keir@21982 788 /* Let the scheduler know that this vcpu is trying to yield */
keir@21982 789 sv->flags |= CSCHED_FLAG_VCPU_YIELD;
keir@21982 790 }
keir@21982 791 }
keir@21982 792
ack@10206 793 static int
ack@10206 794 csched_dom_cntl(
keir@21327 795 const struct scheduler *ops,
ack@10206 796 struct domain *d,
kfraser@11295 797 struct xen_domctl_scheduler_op *op)
ack@10206 798 {
ack@10206 799 struct csched_dom * const sdom = CSCHED_DOM(d);
keir@21258 800 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 801 unsigned long flags;
ack@10206 802
kaf24@11296 803 if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
ack@10206 804 {
kfraser@11295 805 op->u.credit.weight = sdom->weight;
kfraser@11295 806 op->u.credit.cap = sdom->cap;
ack@10206 807 }
ack@10206 808 else
ack@10206 809 {
kaf24@11296 810 ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
ack@10206 811
keir@21258 812 spin_lock_irqsave(&prv->lock, flags);
ack@10206 813
kfraser@11295 814 if ( op->u.credit.weight != 0 )
ack@10206 815 {
ack@10609 816 if ( !list_empty(&sdom->active_sdom_elem) )
ack@10609 817 {
keir@22026 818 prv->weight -= sdom->weight * sdom->active_vcpu_count;
keir@22026 819 prv->weight += op->u.credit.weight * sdom->active_vcpu_count;
ack@10609 820 }
kfraser@11295 821 sdom->weight = op->u.credit.weight;
ack@10206 822 }
ack@10206 823
kfraser@11295 824 if ( op->u.credit.cap != (uint16_t)~0U )
kfraser@11295 825 sdom->cap = op->u.credit.cap;
ack@10206 826
keir@21258 827 spin_unlock_irqrestore(&prv->lock, flags);
ack@10206 828 }
ack@10206 829
ack@10206 830 return 0;
ack@10206 831 }
ack@10206 832
keir@21258 833 static void *
keir@21327 834 csched_alloc_domdata(const struct scheduler *ops, struct domain *dom)
kfraser@12284 835 {
kfraser@12284 836 struct csched_dom *sdom;
kfraser@12284 837
kfraser@12284 838 sdom = xmalloc(struct csched_dom);
kfraser@12284 839 if ( sdom == NULL )
keir@21258 840 return NULL;
keir@20308 841 memset(sdom, 0, sizeof(*sdom));
kfraser@12284 842
kfraser@12284 843 /* Initialize credit and weight */
kfraser@12284 844 INIT_LIST_HEAD(&sdom->active_vcpu);
kfraser@12284 845 sdom->active_vcpu_count = 0;
kfraser@12284 846 INIT_LIST_HEAD(&sdom->active_sdom_elem);
kfraser@12284 847 sdom->dom = dom;
kfraser@12284 848 sdom->weight = CSCHED_DEFAULT_WEIGHT;
kfraser@12284 849 sdom->cap = 0U;
keir@21258 850
keir@21258 851 return (void *)sdom;
keir@21258 852 }
keir@21258 853
keir@21258 854 static int
keir@21327 855 csched_dom_init(const struct scheduler *ops, struct domain *dom)
keir@21258 856 {
keir@21258 857 struct csched_dom *sdom;
keir@21258 858
keir@21258 859 CSCHED_STAT_CRANK(dom_init);
keir@21258 860
keir@21258 861 if ( is_idle_domain(dom) )
keir@21258 862 return 0;
keir@21258 863
keir@21258 864 sdom = csched_alloc_domdata(ops, dom);
keir@21258 865 if ( sdom == NULL )
keir@21258 866 return -ENOMEM;
keir@21258 867
kfraser@12284 868 dom->sched_priv = sdom;
kfraser@12284 869
kfraser@12284 870 return 0;
kfraser@12284 871 }
kfraser@12284 872
ack@10206 873 static void
keir@21327 874 csched_free_domdata(const struct scheduler *ops, void *data)
keir@21258 875 {
keir@21258 876 xfree(data);
keir@21258 877 }
keir@21258 878
keir@21258 879 static void
keir@21327 880 csched_dom_destroy(const struct scheduler *ops, struct domain *dom)
ack@10206 881 {
kaf24@10281 882 CSCHED_STAT_CRANK(dom_destroy);
keir@21258 883 csched_free_domdata(ops, CSCHED_DOM(dom));
ack@10206 884 }
ack@10206 885
ack@10206 886 /*
ack@10206 887 * This is a O(n) optimized sort of the runq.
ack@10206 888 *
ack@10206 889 * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
ack@10206 890 * through the runq and move up any UNDERs that are preceded by OVERS. We
ack@10206 891 * remember the last UNDER to make the move up operation O(1).
ack@10206 892 */
ack@10206 893 static void
keir@21258 894 csched_runq_sort(struct csched_private *prv, unsigned int cpu)
ack@10206 895 {
ack@10206 896 struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
ack@10206 897 struct list_head *runq, *elem, *next, *last_under;
ack@10206 898 struct csched_vcpu *svc_elem;
ack@10206 899 unsigned long flags;
ack@10206 900 int sort_epoch;
ack@10206 901
keir@21258 902 sort_epoch = prv->runq_sort;
ack@10206 903 if ( sort_epoch == spc->runq_sort_last )
ack@10206 904 return;
ack@10206 905
ack@10206 906 spc->runq_sort_last = sort_epoch;
ack@10206 907
keir@22655 908 pcpu_schedule_lock_irqsave(cpu, flags);
ack@10206 909
ack@10206 910 runq = &spc->runq;
ack@10206 911 elem = runq->next;
ack@10206 912 last_under = runq;
ack@10206 913
ack@10206 914 while ( elem != runq )
ack@10206 915 {
ack@10206 916 next = elem->next;
ack@10206 917 svc_elem = __runq_elem(elem);
ack@10206 918
ack@12048 919 if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
ack@10206 920 {
ack@10206 921 /* does elem need to move up the runq? */
ack@10206 922 if ( elem->prev != last_under )
ack@10206 923 {
ack@10206 924 list_del(elem);
ack@10206 925 list_add(elem, last_under);
ack@10206 926 }
ack@10206 927 last_under = elem;
ack@10206 928 }
ack@10206 929
ack@10206 930 elem = next;
ack@10206 931 }
ack@10206 932
keir@22655 933 pcpu_schedule_unlock_irqrestore(cpu, flags);
ack@10206 934 }
ack@10206 935
ack@10206 936 static void
keir@19498 937 csched_acct(void* dummy)
ack@10206 938 {
keir@21258 939 struct csched_private *prv = dummy;
ack@10206 940 unsigned long flags;
ack@10206 941 struct list_head *iter_vcpu, *next_vcpu;
ack@10206 942 struct list_head *iter_sdom, *next_sdom;
ack@10206 943 struct csched_vcpu *svc;
ack@10206 944 struct csched_dom *sdom;
ack@10206 945 uint32_t credit_total;
ack@10206 946 uint32_t weight_total;
ack@10206 947 uint32_t weight_left;
ack@10206 948 uint32_t credit_fair;
ack@10206 949 uint32_t credit_peak;
ack@12240 950 uint32_t credit_cap;
ack@10206 951 int credit_balance;
ack@10206 952 int credit_xtra;
ack@10206 953 int credit;
ack@10206 954
ack@10206 955
keir@21258 956 spin_lock_irqsave(&prv->lock, flags);
ack@10206 957
keir@21258 958 weight_total = prv->weight;
keir@21258 959 credit_total = prv->credit;
ack@10206 960
ack@10206 961 /* Converge balance towards 0 when it drops negative */
keir@21258 962 if ( prv->credit_balance < 0 )
ack@10206 963 {
keir@21258 964 credit_total -= prv->credit_balance;
ack@10206 965 CSCHED_STAT_CRANK(acct_balance);
ack@10206 966 }
ack@10206 967
ack@10206 968 if ( unlikely(weight_total == 0) )
ack@10206 969 {
keir@21258 970 prv->credit_balance = 0;
keir@21258 971 spin_unlock_irqrestore(&prv->lock, flags);
ack@10206 972 CSCHED_STAT_CRANK(acct_no_work);
keir@19498 973 goto out;
ack@10206 974 }
ack@10206 975
ack@10206 976 CSCHED_STAT_CRANK(acct_run);
ack@10206 977
ack@10206 978 weight_left = weight_total;
ack@10206 979 credit_balance = 0;
ack@10206 980 credit_xtra = 0;
ack@12240 981 credit_cap = 0U;
ack@10206 982
keir@21258 983 list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
ack@10206 984 {
ack@10206 985 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
ack@10206 986
ack@10206 987 BUG_ON( is_idle_domain(sdom->dom) );
ack@10206 988 BUG_ON( sdom->active_vcpu_count == 0 );
ack@10206 989 BUG_ON( sdom->weight == 0 );
keir@22026 990 BUG_ON( (sdom->weight * sdom->active_vcpu_count) > weight_left );
ack@10206 991
keir@22026 992 weight_left -= ( sdom->weight * sdom->active_vcpu_count );
ack@10206 993
ack@10206 994 /*
ack@10206 995 * A domain's fair share is computed using its weight in competition
ack@10206 996 * with that of all other active domains.
ack@10206 997 *
ack@10206 998 * At most, a domain can use credits to run all its active VCPUs
ack@10206 999 * for one full accounting period. We allow a domain to earn more
ack@10206 1000 * only when the system-wide credit balance is negative.
ack@10206 1001 */
ack@12071 1002 credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
keir@21258 1003 if ( prv->credit_balance < 0 )
ack@10206 1004 {
keir@22026 1005 credit_peak += ( ( -prv->credit_balance
keir@22026 1006 * sdom->weight
keir@22026 1007 * sdom->active_vcpu_count) +
ack@10206 1008 (weight_total - 1)
ack@10206 1009 ) / weight_total;
ack@10206 1010 }
ack@12240 1011
ack@10206 1012 if ( sdom->cap != 0U )
ack@10206 1013 {
ack@12071 1014 credit_cap = ((sdom->cap * CSCHED_CREDITS_PER_ACCT) + 99) / 100;
ack@10206 1015 if ( credit_cap < credit_peak )
ack@10206 1016 credit_peak = credit_cap;
ack@12240 1017
keir@22026 1018 /* FIXME -- set cap per-vcpu as well...? */
ack@12240 1019 credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 )
ack@12240 1020 ) / sdom->active_vcpu_count;
ack@10206 1021 }
ack@10206 1022
keir@22026 1023 credit_fair = ( ( credit_total
keir@22026 1024 * sdom->weight
keir@22026 1025 * sdom->active_vcpu_count )
keir@22026 1026 + (weight_total - 1)
ack@10206 1027 ) / weight_total;
ack@10206 1028
ack@10206 1029 if ( credit_fair < credit_peak )
ack@10206 1030 {
ack@10206 1031 credit_xtra = 1;
ack@10206 1032 }
ack@10206 1033 else
ack@10206 1034 {
ack@10206 1035 if ( weight_left != 0U )
ack@10206 1036 {
ack@10206 1037 /* Give other domains a chance at unused credits */
ack@10206 1038 credit_total += ( ( ( credit_fair - credit_peak
ack@10206 1039 ) * weight_total
ack@10206 1040 ) + ( weight_left - 1 )
ack@10206 1041 ) / weight_left;
ack@10206 1042 }
ack@10206 1043
ack@10206 1044 if ( credit_xtra )
ack@10206 1045 {
ack@10206 1046 /*
ack@10206 1047 * Lazily keep domains with extra credits at the head of
ack@10206 1048 * the queue to give others a chance at them in future
ack@10206 1049 * accounting periods.
ack@10206 1050 */
ack@10206 1051 CSCHED_STAT_CRANK(acct_reorder);
ack@10206 1052 list_del(&sdom->active_sdom_elem);
keir@21258 1053 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
ack@10206 1054 }
ack@10206 1055
ack@10206 1056 credit_fair = credit_peak;
ack@10206 1057 }
ack@10206 1058
ack@10206 1059 /* Compute fair share per VCPU */
ack@10206 1060 credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
ack@10206 1061 ) / sdom->active_vcpu_count;
ack@10206 1062
ack@10206 1063
ack@10206 1064 list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
ack@10206 1065 {
ack@10206 1066 svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
ack@10206 1067 BUG_ON( sdom != svc->sdom );
ack@10206 1068
ack@10206 1069 /* Increment credit */
ack@10206 1070 atomic_add(credit_fair, &svc->credit);
ack@10206 1071 credit = atomic_read(&svc->credit);
ack@10206 1072
ack@10206 1073 /*
ack@10206 1074 * Recompute priority or, if VCPU is idling, remove it from
ack@10206 1075 * the active list.
ack@10206 1076 */
ack@10206 1077 if ( credit < 0 )
ack@10206 1078 {
ack@13046 1079 svc->pri = CSCHED_PRI_TS_OVER;
ack@10206 1080
ack@13046 1081 /* Park running VCPUs of capped-out domains */
ack@13046 1082 if ( sdom->cap != 0U &&
ack@13046 1083 credit < -credit_cap &&
ack@13046 1084 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
ack@13046 1085 {
ack@13046 1086 CSCHED_STAT_CRANK(vcpu_park);
ack@13046 1087 vcpu_pause_nosync(svc->vcpu);
ack@13046 1088 svc->flags |= CSCHED_FLAG_VCPU_PARKED;
ack@13046 1089 }
ack@13046 1090
ack@13046 1091 /* Lower bound on credits */
ack@12071 1092 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
ack@10206 1093 {
ack@10206 1094 CSCHED_STAT_CRANK(acct_min_credit);
ack@12071 1095 credit = -CSCHED_CREDITS_PER_TSLICE;
ack@10206 1096 atomic_set(&svc->credit, credit);
ack@10206 1097 }
ack@10206 1098 }
ack@10206 1099 else
ack@10206 1100 {
ack@10206 1101 svc->pri = CSCHED_PRI_TS_UNDER;
ack@10206 1102
ack@13046 1103 /* Unpark any capped domains whose credits go positive */
ack@13046 1104 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
ack@13046 1105 {
ack@13046 1106 /*
ack@13046 1107 * It's important to unset the flag AFTER the unpause()
ack@13046 1108 * call to make sure the VCPU's priority is not boosted
ack@13046 1109 * if it is woken up here.
ack@13046 1110 */
ack@13046 1111 CSCHED_STAT_CRANK(vcpu_unpark);
ack@13046 1112 vcpu_unpause(svc->vcpu);
ack@13046 1113 svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
ack@13046 1114 }
ack@13046 1115
ack@13046 1116 /* Upper bound on credits means VCPU stops earning */
ack@12071 1117 if ( credit > CSCHED_CREDITS_PER_TSLICE )
ack@12071 1118 {
keir@21258 1119 __csched_vcpu_acct_stop_locked(prv, svc);
keir@21983 1120 /* Divide credits in half, so that when it starts
keir@21983 1121 * accounting again, it starts a little bit "ahead" */
keir@21983 1122 credit /= 2;
ack@12071 1123 atomic_set(&svc->credit, credit);
ack@12071 1124 }
ack@10206 1125 }
ack@10206 1126
ack@12941 1127 CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
ack@12941 1128 CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
ack@10206 1129 credit_balance += credit;
ack@10206 1130 }
ack@10206 1131 }
ack@10206 1132
keir@21258 1133 prv->credit_balance = credit_balance;
ack@10206 1134
keir@21258 1135 spin_unlock_irqrestore(&prv->lock, flags);
ack@10206 1136
ack@10206 1137 /* Inform each CPU that its runq needs to be sorted */
keir@21258 1138 prv->runq_sort++;
keir@19498 1139
keir@19498 1140 out:
keir@21258 1141 set_timer( &prv->master_ticker, NOW() +
keir@19498 1142 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
ack@10206 1143 }
ack@10206 1144
ack@10206 1145 static void
kfraser@14358 1146 csched_tick(void *_cpu)
ack@10206 1147 {
kfraser@14358 1148 unsigned int cpu = (unsigned long)_cpu;
kfraser@14358 1149 struct csched_pcpu *spc = CSCHED_PCPU(cpu);
keir@21258 1150 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
kfraser@14358 1151
kfraser@14358 1152 spc->tick++;
kfraser@14358 1153
ack@10206 1154 /*
ack@10206 1155 * Accounting for running VCPU
ack@10206 1156 */
ack@12941 1157 if ( !is_idle_vcpu(current) )
keir@21258 1158 csched_vcpu_acct(prv, cpu);
ack@10206 1159
ack@10206 1160 /*
ack@10206 1161 * Check if runq needs to be sorted
ack@10206 1162 *
ack@10206 1163 * Every physical CPU resorts the runq after the accounting master has
ack@10206 1164 * modified priorities. This is a special O(n) sort and runs at most
ack@10206 1165 * once per accounting period (currently 30 milliseconds).
ack@10206 1166 */
keir@21258 1167 csched_runq_sort(prv, cpu);
kfraser@14358 1168
kfraser@14358 1169 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
ack@10206 1170 }
ack@10206 1171
ack@10206 1172 static struct csched_vcpu *
ack@12941 1173 csched_runq_steal(int peer_cpu, int cpu, int pri)
ack@10206 1174 {
ack@12941 1175 const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
ack@12941 1176 const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
ack@12941 1177 struct csched_vcpu *speer;
ack@10206 1178 struct list_head *iter;
ack@10206 1179 struct vcpu *vc;
ack@10206 1180
ack@12941 1181 /*
ack@12941 1182 * Don't steal from an idle CPU's runq because it's about to
ack@12941 1183 * pick up work from it itself.
ack@12941 1184 */
ack@12941 1185 if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
ack@10206 1186 {
ack@12941 1187 list_for_each( iter, &peer_pcpu->runq )
ack@10206 1188 {
ack@12941 1189 speer = __runq_elem(iter);
ack@10206 1190
ack@12941 1191 /*
ack@13046 1192 * If next available VCPU here is not of strictly higher
ack@13046 1193 * priority than ours, this PCPU is useless to us.
ack@12941 1194 */
ack@13046 1195 if ( speer->pri <= pri )
ack@12941 1196 break;
ack@10206 1197
ack@12941 1198 /* Is this VCPU is runnable on our PCPU? */
ack@12941 1199 vc = speer->vcpu;
ack@12941 1200 BUG_ON( is_idle_vcpu(vc) );
ack@10206 1201
ack@12941 1202 if (__csched_vcpu_is_migrateable(vc, cpu))
ack@12941 1203 {
ack@12941 1204 /* We got a candidate. Grab it! */
ack@12941 1205 CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
ack@12941 1206 CSCHED_STAT_CRANK(migrate_queued);
keir@21019 1207 WARN_ON(vc->is_urgent);
ack@12941 1208 __runq_remove(speer);
ack@12941 1209 vc->processor = cpu;
ack@12941 1210 return speer;
ack@12941 1211 }
ack@10206 1212 }
ack@10206 1213 }
ack@10206 1214
ack@12941 1215 CSCHED_STAT_CRANK(steal_peer_idle);
ack@10206 1216 return NULL;
ack@10206 1217 }
ack@10206 1218
ack@10206 1219 static struct csched_vcpu *
keir@21258 1220 csched_load_balance(struct csched_private *prv, int cpu,
keir@21671 1221 struct csched_vcpu *snext, bool_t *stolen)
ack@10206 1222 {
ack@12290 1223 struct csched_vcpu *speer;
ack@12290 1224 cpumask_t workers;
keir@21258 1225 cpumask_t *online;
ack@10206 1226 int peer_cpu;
ack@10206 1227
ack@12941 1228 BUG_ON( cpu != snext->vcpu->processor );
keir@21258 1229 online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
ack@12941 1230
keir@18502 1231 /* If this CPU is going offline we shouldn't steal work. */
keir@21258 1232 if ( unlikely(!cpu_isset(cpu, *online)) )
keir@18502 1233 goto out;
keir@18502 1234
ack@10206 1235 if ( snext->pri == CSCHED_PRI_IDLE )
ack@10206 1236 CSCHED_STAT_CRANK(load_balance_idle);
ack@10206 1237 else if ( snext->pri == CSCHED_PRI_TS_OVER )
ack@10206 1238 CSCHED_STAT_CRANK(load_balance_over);
ack@10206 1239 else
ack@10206 1240 CSCHED_STAT_CRANK(load_balance_other);
ack@10206 1241
ack@12290 1242 /*
ack@12941 1243 * Peek at non-idling CPUs in the system, starting with our
ack@12941 1244 * immediate neighbour.
ack@12290 1245 */
keir@21258 1246 cpus_andnot(workers, *online, prv->idlers);
ack@12290 1247 cpu_clear(cpu, workers);
ack@10206 1248 peer_cpu = cpu;
ack@10206 1249
ack@12290 1250 while ( !cpus_empty(workers) )
ack@10206 1251 {
keir@19314 1252 peer_cpu = cycle_cpu(peer_cpu, workers);
ack@12290 1253 cpu_clear(peer_cpu, workers);
ack@10206 1254
ack@10206 1255 /*
ack@10206 1256 * Get ahold of the scheduler lock for this peer CPU.
ack@10206 1257 *
ack@10206 1258 * Note: We don't spin on this lock but simply try it. Spinning could
ack@10206 1259 * cause a deadlock if the peer CPU is also load balancing and trying
ack@10206 1260 * to lock this CPU.
ack@10206 1261 */
keir@22655 1262 if ( !pcpu_schedule_trylock(peer_cpu) )
ack@10206 1263 {
kaf24@11514 1264 CSCHED_STAT_CRANK(steal_trylock_failed);
kaf24@11514 1265 continue;
kaf24@11514 1266 }
ack@10206 1267
ack@12941 1268 /*
ack@12941 1269 * Any work over there to steal?
ack@12941 1270 */
ack@12941 1271 speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
keir@22655 1272 pcpu_schedule_unlock(peer_cpu);
ack@12941 1273 if ( speer != NULL )
keir@21671 1274 {
keir@21671 1275 *stolen = 1;
ack@12941 1276 return speer;
keir@21671 1277 }
ack@10206 1278 }
ack@10206 1279
keir@18502 1280 out:
ack@12291 1281 /* Failed to find more important work elsewhere... */
ack@10206 1282 __runq_remove(snext);
ack@10206 1283 return snext;
ack@10206 1284 }
ack@10206 1285
ack@10206 1286 /*
ack@10206 1287 * This function is in the critical path. It is designed to be simple and
ack@10206 1288 * fast for the common case.
ack@10206 1289 */
ack@10206 1290 static struct task_slice
keir@21390 1291 csched_schedule(
keir@21390 1292 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
ack@10206 1293 {
ack@10206 1294 const int cpu = smp_processor_id();
ack@10206 1295 struct list_head * const runq = RUNQ(cpu);
ack@10206 1296 struct csched_vcpu * const scurr = CSCHED_VCPU(current);
keir@21258 1297 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 1298 struct csched_vcpu *snext;
ack@10206 1299 struct task_slice ret;
ack@10206 1300
ack@10206 1301 CSCHED_STAT_CRANK(schedule);
ack@10206 1302 CSCHED_VCPU_CHECK(current);
ack@10206 1303
keir@20308 1304 if ( !is_idle_vcpu(scurr->vcpu) )
keir@20308 1305 {
keir@21243 1306 /* Update credits of a non-idle VCPU. */
keir@20308 1307 burn_credits(scurr, now);
keir@20308 1308 scurr->start_time -= now;
keir@20308 1309 }
keir@21243 1310 else
keir@21243 1311 {
keir@21243 1312 /* Re-instate a boosted idle VCPU as normal-idle. */
keir@21243 1313 scurr->pri = CSCHED_PRI_IDLE;
keir@21243 1314 }
keir@20160 1315
ack@10206 1316 /*
ack@10206 1317 * Select next runnable local VCPU (ie top of local runq)
ack@10206 1318 */
ack@10206 1319 if ( vcpu_runnable(current) )
ack@10206 1320 __runq_insert(cpu, scurr);
ack@10206 1321 else
ack@10206 1322 BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
ack@10206 1323
ack@10206 1324 snext = __runq_elem(runq->next);
keir@21671 1325 ret.migrated = 0;
ack@10206 1326
keir@21243 1327 /* Tasklet work (which runs in idle VCPU context) overrides all else. */
keir@21390 1328 if ( tasklet_work_scheduled )
keir@21243 1329 {
keir@21243 1330 snext = CSCHED_VCPU(idle_vcpu[cpu]);
keir@21243 1331 snext->pri = CSCHED_PRI_TS_BOOST;
keir@21243 1332 }
keir@21243 1333
ack@10206 1334 /*
keir@21982 1335 * Clear YIELD flag before scheduling out
keir@21982 1336 */
keir@21982 1337 if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
keir@21982 1338 scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
keir@21982 1339
keir@21982 1340 /*
ack@10206 1341 * SMP Load balance:
ack@10206 1342 *
ack@10206 1343 * If the next highest priority local runnable VCPU has already eaten
ack@10206 1344 * through its credits, look on other PCPUs to see if we have more
ack@10206 1345 * urgent work... If not, csched_load_balance() will return snext, but
ack@10206 1346 * already removed from the runq.
ack@10206 1347 */
ack@10206 1348 if ( snext->pri > CSCHED_PRI_TS_OVER )
ack@10206 1349 __runq_remove(snext);
ack@10206 1350 else
keir@21671 1351 snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
ack@10206 1352
ack@10206 1353 /*
ack@10206 1354 * Update idlers mask if necessary. When we're idling, other CPUs
ack@10206 1355 * will tickle us when they get extra work.
ack@10206 1356 */
ack@10206 1357 if ( snext->pri == CSCHED_PRI_IDLE )
ack@10206 1358 {
keir@21258 1359 if ( !cpu_isset(cpu, prv->idlers) )
keir@21258 1360 cpu_set(cpu, prv->idlers);
ack@10206 1361 }
keir@21258 1362 else if ( cpu_isset(cpu, prv->idlers) )
ack@10206 1363 {
keir@21258 1364 cpu_clear(cpu, prv->idlers);
ack@10206 1365 }
ack@10206 1366
keir@20160 1367 if ( !is_idle_vcpu(snext->vcpu) )
keir@20308 1368 snext->start_time += now;
keir@20160 1369
ack@10206 1370 /*
ack@10206 1371 * Return task to run next...
ack@10206 1372 */
keir@19538 1373 ret.time = (is_idle_vcpu(snext->vcpu) ?
keir@19538 1374 -1 : MILLISECS(CSCHED_MSECS_PER_TSLICE));
ack@10206 1375 ret.task = snext->vcpu;
ack@10206 1376
ack@10206 1377 CSCHED_VCPU_CHECK(ret.task);
ack@10206 1378 return ret;
ack@10206 1379 }
ack@10206 1380
ack@10206 1381 static void
ack@10206 1382 csched_dump_vcpu(struct csched_vcpu *svc)
ack@10206 1383 {
ack@10206 1384 struct csched_dom * const sdom = svc->sdom;
ack@10206 1385
ack@13046 1386 printk("[%i.%i] pri=%i flags=%x cpu=%i",
ack@10206 1387 svc->vcpu->domain->domain_id,
ack@10206 1388 svc->vcpu->vcpu_id,
ack@10206 1389 svc->pri,
ack@13046 1390 svc->flags,
ack@10206 1391 svc->vcpu->processor);
ack@10206 1392
ack@10206 1393 if ( sdom )
ack@10206 1394 {
ack@12941 1395 printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
ack@12941 1396 #ifdef CSCHED_STATS
ack@12941 1397 printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
ack@12941 1398 svc->stats.credit_last,
ack@12941 1399 svc->stats.credit_incr,
ack@12941 1400 svc->stats.state_active,
ack@12941 1401 svc->stats.state_idle,
ack@12941 1402 svc->stats.migrate_q,
ack@12941 1403 svc->stats.migrate_r);
ack@12941 1404 #endif
ack@10206 1405 }
ack@10206 1406
ack@10206 1407 printk("\n");
ack@10206 1408 }
ack@10206 1409
ack@10206 1410 static void
keir@21327 1411 csched_dump_pcpu(const struct scheduler *ops, int cpu)
ack@10206 1412 {
ack@10206 1413 struct list_head *runq, *iter;
ack@10206 1414 struct csched_pcpu *spc;
ack@10206 1415 struct csched_vcpu *svc;
ack@10206 1416 int loop;
keir@20975 1417 #define cpustr keyhandler_scratch
ack@10206 1418
ack@10206 1419 spc = CSCHED_PCPU(cpu);
ack@10206 1420 runq = &spc->runq;
ack@10206 1421
keir@19965 1422 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
keir@18561 1423 printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
keir@19965 1424 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
keir@18561 1425 printk("core=%s\n", cpustr);
ack@10206 1426
ack@10206 1427 /* current VCPU */
kaf24@11017 1428 svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
ack@10206 1429 if ( svc )
ack@10206 1430 {
ack@10206 1431 printk("\trun: ");
ack@10206 1432 csched_dump_vcpu(svc);
ack@10206 1433 }
ack@10206 1434
ack@10206 1435 loop = 0;
ack@10206 1436 list_for_each( iter, runq )
ack@10206 1437 {
ack@10206 1438 svc = __runq_elem(iter);
ack@10206 1439 if ( svc )
ack@10206 1440 {
ack@10206 1441 printk("\t%3d: ", ++loop);
ack@10206 1442 csched_dump_vcpu(svc);
ack@10206 1443 }
ack@10206 1444 }
keir@20975 1445 #undef cpustr
ack@10206 1446 }
ack@10206 1447
ack@10206 1448 static void
keir@21327 1449 csched_dump(const struct scheduler *ops)
ack@10206 1450 {
ack@10206 1451 struct list_head *iter_sdom, *iter_svc;
keir@21258 1452 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 1453 int loop;
keir@20975 1454 #define idlers_buf keyhandler_scratch
ack@10206 1455
ack@10206 1456 printk("info:\n"
ack@10206 1457 "\tncpus = %u\n"
ack@10206 1458 "\tmaster = %u\n"
ack@10206 1459 "\tcredit = %u\n"
ack@10206 1460 "\tcredit balance = %d\n"
ack@10206 1461 "\tweight = %u\n"
ack@10206 1462 "\trunq_sort = %u\n"
ack@12071 1463 "\tdefault-weight = %d\n"
ack@12071 1464 "\tmsecs per tick = %dms\n"
keir@20160 1465 "\tcredits per msec = %d\n"
ack@12071 1466 "\tticks per tslice = %d\n"
keir@19331 1467 "\tticks per acct = %d\n"
keir@19331 1468 "\tmigration delay = %uus\n",
keir@21258 1469 prv->ncpus,
keir@21258 1470 prv->master,
keir@21258 1471 prv->credit,
keir@21258 1472 prv->credit_balance,
keir@21258 1473 prv->weight,
keir@21258 1474 prv->runq_sort,
ack@12071 1475 CSCHED_DEFAULT_WEIGHT,
ack@12071 1476 CSCHED_MSECS_PER_TICK,
keir@20160 1477 CSCHED_CREDITS_PER_MSEC,
ack@12071 1478 CSCHED_TICKS_PER_TSLICE,
keir@19331 1479 CSCHED_TICKS_PER_ACCT,
keir@19331 1480 vcpu_migration_delay);
ack@10206 1481
keir@21258 1482 cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
keir@18561 1483 printk("idlers: %s\n", idlers_buf);
ack@10206 1484
ack@10206 1485 printk("active vcpus:\n");
ack@10206 1486 loop = 0;
keir@21258 1487 list_for_each( iter_sdom, &prv->active_sdom )
ack@10206 1488 {
ack@10206 1489 struct csched_dom *sdom;
ack@10206 1490 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
ack@10206 1491
ack@10206 1492 list_for_each( iter_svc, &sdom->active_vcpu )
ack@10206 1493 {
ack@10206 1494 struct csched_vcpu *svc;
ack@10206 1495 svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
ack@10206 1496
ack@10206 1497 printk("\t%3d: ", ++loop);
ack@10206 1498 csched_dump_vcpu(svc);
ack@10206 1499 }
ack@10206 1500 }
keir@20975 1501 #undef idlers_buf
ack@10206 1502 }
ack@10206 1503
keir@21258 1504 static int
keir@21453 1505 csched_init(struct scheduler *ops)
ack@10206 1506 {
keir@21258 1507 struct csched_private *prv;
keir@21258 1508
keir@21258 1509 prv = xmalloc(struct csched_private);
keir@21258 1510 if ( prv == NULL )
keir@21453 1511 return -ENOMEM;
keir@21453 1512
keir@21258 1513 memset(prv, 0, sizeof(*prv));
keir@21258 1514 ops->sched_data = prv;
keir@21258 1515 spin_lock_init(&prv->lock);
keir@21258 1516 INIT_LIST_HEAD(&prv->active_sdom);
keir@21258 1517 prv->master = UINT_MAX;
keir@21258 1518
keir@21258 1519 return 0;
ack@10206 1520 }
ack@10206 1521
keir@21258 1522 static void
keir@21327 1523 csched_deinit(const struct scheduler *ops)
keir@21258 1524 {
keir@21258 1525 struct csched_private *prv;
keir@21258 1526
keir@21258 1527 prv = CSCHED_PRIV(ops);
keir@21258 1528 if ( prv != NULL )
keir@21258 1529 xfree(prv);
keir@21258 1530 }
keir@21258 1531
keir@21327 1532 static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu)
keir@19498 1533 {
keir@19498 1534 struct csched_pcpu *spc;
keir@19498 1535
keir@21258 1536 spc = CSCHED_PCPU(cpu);
keir@19498 1537
keir@19498 1538 stop_timer(&spc->ticker);
keir@19498 1539 }
keir@19498 1540
keir@21327 1541 static void csched_tick_resume(const struct scheduler *ops, unsigned int cpu)
keir@19498 1542 {
keir@19498 1543 struct csched_pcpu *spc;
keir@19498 1544 uint64_t now = NOW();
keir@21258 1545
keir@21258 1546 spc = CSCHED_PCPU(cpu);
keir@19498 1547
keir@19498 1548 set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
keir@19498 1549 - now % MILLISECS(CSCHED_MSECS_PER_TICK) );
keir@19498 1550 }
ack@10206 1551
keir@21258 1552 static struct csched_private _csched_priv;
keir@21258 1553
keir@21327 1554 const struct scheduler sched_credit_def = {
ack@10206 1555 .name = "SMP Credit Scheduler",
ack@10206 1556 .opt_name = "credit",
kfraser@11295 1557 .sched_id = XEN_SCHEDULER_CREDIT,
keir@21258 1558 .sched_data = &_csched_priv,
ack@10206 1559
kfraser@12284 1560 .init_domain = csched_dom_init,
kfraser@12284 1561 .destroy_domain = csched_dom_destroy,
kfraser@12284 1562
keir@21258 1563 .insert_vcpu = csched_vcpu_insert,
keir@22324 1564 .remove_vcpu = csched_vcpu_remove,
kaf24@10281 1565
ack@10206 1566 .sleep = csched_vcpu_sleep,
ack@10206 1567 .wake = csched_vcpu_wake,
keir@21982 1568 .yield = csched_vcpu_yield,
kaf24@10281 1569
kfraser@11295 1570 .adjust = csched_dom_cntl,
ack@10206 1571
ack@12291 1572 .pick_cpu = csched_cpu_pick,
ack@10206 1573 .do_schedule = csched_schedule,
ack@10206 1574
ack@10206 1575 .dump_cpu_state = csched_dump_pcpu,
ack@10206 1576 .dump_settings = csched_dump,
ack@10206 1577 .init = csched_init,
keir@21258 1578 .deinit = csched_deinit,
keir@21258 1579 .alloc_vdata = csched_alloc_vdata,
keir@21258 1580 .free_vdata = csched_free_vdata,
keir@21258 1581 .alloc_pdata = csched_alloc_pdata,
keir@21258 1582 .free_pdata = csched_free_pdata,
keir@21258 1583 .alloc_domdata = csched_alloc_domdata,
keir@21258 1584 .free_domdata = csched_free_domdata,
keir@19498 1585
keir@19498 1586 .tick_suspend = csched_tick_suspend,
keir@19498 1587 .tick_resume = csched_tick_resume,
ack@10206 1588 };