debuggers.hg

annotate xen/common/sched_credit.c @ 22855:1d1eec7e1fb4

xl: Perform minimal validation of virtual disk file while parsing config file

This patch performs some very basic validation on the virtual disk
file passed through the config file. This validation ensures that we
don't go too far with the initialization like spawn qemu and more
while there could be some potentially fundamental issues.

[ Patch fixed up to work with PHYSTYPE_EMPTY 22808:6ec61438713a -iwj ]

Signed-off-by: Kamala Narasimhan <kamala.narasimhan@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Kamala Narasimhan <kamala.narasimhan@gmail.com>
date Tue Jan 25 18:09:49 2011 +0000 (2011-01-25)
parents e8acb9753ff1
children 700ac6445812
rev   line source
ack@10206 1 /****************************************************************************
ack@10206 2 * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
ack@10206 3 ****************************************************************************
ack@10206 4 *
ack@10206 5 * File: common/csched_credit.c
ack@10206 6 * Author: Emmanuel Ackaouy
ack@10206 7 *
ack@10206 8 * Description: Credit-based SMP CPU scheduler
ack@10206 9 */
ack@10206 10
ack@10206 11 #include <xen/config.h>
ack@10206 12 #include <xen/init.h>
ack@10206 13 #include <xen/lib.h>
ack@10206 14 #include <xen/sched.h>
ack@10206 15 #include <xen/domain.h>
ack@10206 16 #include <xen/delay.h>
ack@10206 17 #include <xen/event.h>
ack@10206 18 #include <xen/time.h>
ack@10206 19 #include <xen/perfc.h>
ack@10206 20 #include <xen/sched-if.h>
ack@10206 21 #include <xen/softirq.h>
ack@10206 22 #include <asm/atomic.h>
kaf24@11236 23 #include <xen/errno.h>
keir@20975 24 #include <xen/keyhandler.h>
ack@10206 25
ack@10206 26 /*
ack@10206 27 * CSCHED_STATS
ack@10206 28 *
keir@19335 29 * Manage very basic per-vCPU counters and stats.
ack@10206 30 *
ack@10206 31 * Useful for debugging live systems. The stats are displayed
ack@10206 32 * with runq dumps ('r' on the Xen console).
ack@10206 33 */
keir@19335 34 #ifdef PERF_COUNTERS
ack@10206 35 #define CSCHED_STATS
keir@19335 36 #endif
ack@10206 37
ack@10206 38
ack@10206 39 /*
ack@10206 40 * Basic constants
ack@10206 41 */
ack@12071 42 #define CSCHED_DEFAULT_WEIGHT 256
ack@12071 43 #define CSCHED_TICKS_PER_TSLICE 3
ack@12071 44 #define CSCHED_TICKS_PER_ACCT 3
ack@12071 45 #define CSCHED_MSECS_PER_TICK 10
ack@12071 46 #define CSCHED_MSECS_PER_TSLICE \
ack@12071 47 (CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
keir@20308 48 #define CSCHED_CREDITS_PER_MSEC 10
ack@12071 49 #define CSCHED_CREDITS_PER_TSLICE \
keir@20308 50 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TSLICE)
ack@12071 51 #define CSCHED_CREDITS_PER_ACCT \
keir@20308 52 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_ACCT)
ack@10206 53
ack@10206 54
ack@10206 55 /*
ack@10206 56 * Priorities
ack@10206 57 */
ack@12048 58 #define CSCHED_PRI_TS_BOOST 0 /* time-share waking up */
ack@10206 59 #define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
ack@10206 60 #define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
ack@10206 61 #define CSCHED_PRI_IDLE -64 /* idle */
ack@13046 62
ack@13046 63
ack@13046 64 /*
ack@13046 65 * Flags
ack@13046 66 */
keir@21982 67 #define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
keir@21982 68 #define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
ack@10206 69
ack@10206 70
ack@10206 71 /*
ack@10206 72 * Useful macros
ack@10206 73 */
keir@21258 74 #define CSCHED_PRIV(_ops) \
keir@21258 75 ((struct csched_private *)((_ops)->sched_data))
kaf24@11017 76 #define CSCHED_PCPU(_c) \
kaf24@11017 77 ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
ack@10206 78 #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
ack@10206 79 #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
ack@10206 80 #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
keir@21258 81 #define CSCHED_CPUONLINE(_pool) \
keir@21258 82 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
ack@10206 83
ack@10206 84
ack@10206 85 /*
ack@10206 86 * Stats
ack@10206 87 */
keir@19335 88 #define CSCHED_STAT_CRANK(_X) (perfc_incr(_X))
ack@10206 89
keir@19335 90 #ifdef CSCHED_STATS
ack@12941 91
ack@12941 92 #define CSCHED_VCPU_STATS_RESET(_V) \
ack@12941 93 do \
ack@12941 94 { \
ack@12941 95 memset(&(_V)->stats, 0, sizeof((_V)->stats)); \
ack@12941 96 } while ( 0 )
ack@12941 97
ack@12941 98 #define CSCHED_VCPU_STAT_CRANK(_V, _X) (((_V)->stats._X)++)
ack@12941 99
ack@12941 100 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) (((_V)->stats._X) = (_Y))
ack@10206 101
ack@10206 102 #else /* CSCHED_STATS */
ack@10206 103
ack@12941 104 #define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 )
ack@12941 105 #define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 )
ack@12941 106 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 )
ack@10206 107
ack@10206 108 #endif /* CSCHED_STATS */
ack@10206 109
ack@10206 110
ack@10206 111 /*
keir@21982 112 * Boot parameters
keir@21982 113 */
keir@22676 114 static bool_t __read_mostly sched_credit_default_yield;
keir@21982 115 boolean_param("sched_credit_default_yield", sched_credit_default_yield);
keir@21982 116
keir@21982 117 /*
ack@10206 118 * Physical CPU
ack@10206 119 */
ack@10206 120 struct csched_pcpu {
ack@10206 121 struct list_head runq;
ack@10206 122 uint32_t runq_sort_last;
kfraser@14358 123 struct timer ticker;
kfraser@14358 124 unsigned int tick;
keir@20423 125 unsigned int idle_bias;
ack@10206 126 };
ack@10206 127
ack@10206 128 /*
ack@10206 129 * Virtual CPU
ack@10206 130 */
ack@10206 131 struct csched_vcpu {
ack@10206 132 struct list_head runq_elem;
ack@10206 133 struct list_head active_vcpu_elem;
ack@10206 134 struct csched_dom *sdom;
ack@10206 135 struct vcpu *vcpu;
ack@10206 136 atomic_t credit;
keir@20160 137 s_time_t start_time; /* When we were scheduled (used for credit) */
ack@13046 138 uint16_t flags;
ack@10206 139 int16_t pri;
ack@12941 140 #ifdef CSCHED_STATS
ack@12073 141 struct {
ack@12073 142 int credit_last;
ack@12073 143 uint32_t credit_incr;
ack@12073 144 uint32_t state_active;
ack@12073 145 uint32_t state_idle;
ack@12941 146 uint32_t migrate_q;
ack@12941 147 uint32_t migrate_r;
ack@12073 148 } stats;
ack@12941 149 #endif
ack@10206 150 };
ack@10206 151
ack@10206 152 /*
ack@10206 153 * Domain
ack@10206 154 */
ack@10206 155 struct csched_dom {
ack@10206 156 struct list_head active_vcpu;
ack@10206 157 struct list_head active_sdom_elem;
ack@10206 158 struct domain *dom;
ack@10206 159 uint16_t active_vcpu_count;
ack@10206 160 uint16_t weight;
ack@10206 161 uint16_t cap;
ack@10206 162 };
ack@10206 163
ack@10206 164 /*
ack@10206 165 * System-wide private data
ack@10206 166 */
ack@10206 167 struct csched_private {
ack@10206 168 spinlock_t lock;
ack@10206 169 struct list_head active_sdom;
ack@10206 170 uint32_t ncpus;
keir@19498 171 struct timer master_ticker;
ack@10206 172 unsigned int master;
ack@10206 173 cpumask_t idlers;
keir@21258 174 cpumask_t cpus;
ack@10206 175 uint32_t weight;
ack@10206 176 uint32_t credit;
ack@10206 177 int credit_balance;
ack@10206 178 uint32_t runq_sort;
ack@10206 179 };
ack@10206 180
kfraser@14358 181 static void csched_tick(void *_cpu);
keir@21258 182 static void csched_acct(void *dummy);
ack@10206 183
ack@10206 184 static inline int
ack@10206 185 __vcpu_on_runq(struct csched_vcpu *svc)
ack@10206 186 {
ack@10206 187 return !list_empty(&svc->runq_elem);
ack@10206 188 }
ack@10206 189
ack@10206 190 static inline struct csched_vcpu *
ack@10206 191 __runq_elem(struct list_head *elem)
ack@10206 192 {
ack@10206 193 return list_entry(elem, struct csched_vcpu, runq_elem);
ack@10206 194 }
ack@10206 195
ack@10206 196 static inline void
ack@10206 197 __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
ack@10206 198 {
ack@10206 199 const struct list_head * const runq = RUNQ(cpu);
ack@10206 200 struct list_head *iter;
ack@10206 201
ack@10206 202 BUG_ON( __vcpu_on_runq(svc) );
ack@10206 203 BUG_ON( cpu != svc->vcpu->processor );
ack@10206 204
ack@10206 205 list_for_each( iter, runq )
ack@10206 206 {
ack@10206 207 const struct csched_vcpu * const iter_svc = __runq_elem(iter);
ack@10206 208 if ( svc->pri > iter_svc->pri )
ack@10206 209 break;
ack@10206 210 }
ack@10206 211
keir@21982 212 /* If the vcpu yielded, try to put it behind one lower-priority
keir@21982 213 * runnable vcpu if we can. The next runq_sort will bring it forward
keir@21982 214 * within 30ms if the queue too long. */
keir@21982 215 if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
keir@21982 216 && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
keir@21982 217 {
keir@21982 218 iter=iter->next;
keir@21982 219
keir@21982 220 /* Some sanity checks */
keir@21982 221 BUG_ON(iter == runq);
keir@21982 222 }
keir@21982 223
ack@10206 224 list_add_tail(&svc->runq_elem, iter);
ack@10206 225 }
ack@10206 226
ack@10206 227 static inline void
ack@10206 228 __runq_remove(struct csched_vcpu *svc)
ack@10206 229 {
ack@10206 230 BUG_ON( !__vcpu_on_runq(svc) );
ack@10206 231 list_del_init(&svc->runq_elem);
ack@10206 232 }
ack@10206 233
keir@20300 234 static void burn_credits(struct csched_vcpu *svc, s_time_t now)
keir@20160 235 {
keir@20160 236 s_time_t delta;
keir@20308 237 unsigned int credits;
keir@20160 238
keir@20160 239 /* Assert svc is current */
keir@20160 240 ASSERT(svc==CSCHED_VCPU(per_cpu(schedule_data, svc->vcpu->processor).curr));
keir@20160 241
keir@20308 242 if ( (delta = now - svc->start_time) <= 0 )
keir@20160 243 return;
keir@20160 244
keir@20308 245 credits = (delta*CSCHED_CREDITS_PER_MSEC + MILLISECS(1)/2) / MILLISECS(1);
keir@20308 246 atomic_sub(credits, &svc->credit);
keir@20308 247 svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
keir@20160 248 }
keir@20160 249
keir@22676 250 static bool_t __read_mostly opt_tickle_one_idle = 1;
keir@21145 251 boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
keir@21145 252
keir@21462 253 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
keir@21145 254
ack@10206 255 static inline void
ack@10206 256 __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
ack@10206 257 {
kaf24@11017 258 struct csched_vcpu * const cur =
kaf24@11017 259 CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
keir@21258 260 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
ack@10206 261 cpumask_t mask;
ack@10206 262
ack@10206 263 ASSERT(cur);
ack@10206 264 cpus_clear(mask);
ack@10206 265
ack@10206 266 /* If strictly higher priority than current VCPU, signal the CPU */
ack@10206 267 if ( new->pri > cur->pri )
ack@10206 268 {
ack@10206 269 if ( cur->pri == CSCHED_PRI_IDLE )
ack@10206 270 CSCHED_STAT_CRANK(tickle_local_idler);
ack@10206 271 else if ( cur->pri == CSCHED_PRI_TS_OVER )
ack@10206 272 CSCHED_STAT_CRANK(tickle_local_over);
ack@10206 273 else if ( cur->pri == CSCHED_PRI_TS_UNDER )
ack@10206 274 CSCHED_STAT_CRANK(tickle_local_under);
ack@10206 275 else
ack@10206 276 CSCHED_STAT_CRANK(tickle_local_other);
ack@10206 277
ack@10206 278 cpu_set(cpu, mask);
ack@10206 279 }
ack@10206 280
ack@10206 281 /*
ack@10206 282 * If this CPU has at least two runnable VCPUs, we tickle any idlers to
ack@10206 283 * let them know there is runnable work in the system...
ack@10206 284 */
ack@10206 285 if ( cur->pri > CSCHED_PRI_IDLE )
ack@10206 286 {
keir@21258 287 if ( cpus_empty(prv->idlers) )
ack@10206 288 {
ack@10206 289 CSCHED_STAT_CRANK(tickle_idlers_none);
ack@10206 290 }
ack@10206 291 else
ack@10206 292 {
keir@21145 293 cpumask_t idle_mask;
keir@21145 294
keir@21258 295 cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
keir@21145 296 if ( !cpus_empty(idle_mask) )
keir@21145 297 {
keir@21145 298 CSCHED_STAT_CRANK(tickle_idlers_some);
keir@21145 299 if ( opt_tickle_one_idle )
keir@21145 300 {
keir@21145 301 this_cpu(last_tickle_cpu) =
keir@21145 302 cycle_cpu(this_cpu(last_tickle_cpu), idle_mask);
keir@21145 303 cpu_set(this_cpu(last_tickle_cpu), mask);
keir@21145 304 }
keir@21145 305 else
keir@21145 306 cpus_or(mask, mask, idle_mask);
keir@21145 307 }
kaf24@11519 308 cpus_and(mask, mask, new->vcpu->cpu_affinity);
ack@10206 309 }
ack@10206 310 }
ack@10206 311
ack@10206 312 /* Send scheduler interrupts to designated CPUs */
ack@10206 313 if ( !cpus_empty(mask) )
ack@10206 314 cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
ack@10206 315 }
ack@10206 316
keir@21258 317 static void
keir@21327 318 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
keir@21258 319 {
keir@21258 320 struct csched_private *prv = CSCHED_PRIV(ops);
keir@21258 321 struct csched_pcpu *spc = pcpu;
keir@21258 322 unsigned long flags;
keir@21258 323
keir@21258 324 if ( spc == NULL )
keir@21258 325 return;
keir@21258 326
keir@21258 327 spin_lock_irqsave(&prv->lock, flags);
keir@21258 328
keir@21258 329 prv->credit -= CSCHED_CREDITS_PER_ACCT;
keir@21258 330 prv->ncpus--;
keir@21258 331 cpu_clear(cpu, prv->idlers);
keir@21258 332 cpu_clear(cpu, prv->cpus);
keir@21258 333 if ( (prv->master == cpu) && (prv->ncpus > 0) )
keir@21258 334 {
keir@21258 335 prv->master = first_cpu(prv->cpus);
keir@21258 336 migrate_timer(&prv->master_ticker, prv->master);
keir@21258 337 }
keir@21258 338 kill_timer(&spc->ticker);
keir@21258 339 if ( prv->ncpus == 0 )
keir@21258 340 kill_timer(&prv->master_ticker);
keir@21258 341
keir@21258 342 spin_unlock_irqrestore(&prv->lock, flags);
keir@21258 343
keir@21258 344 xfree(spc);
keir@21258 345 }
keir@21258 346
keir@21258 347 static void *
keir@21327 348 csched_alloc_pdata(const struct scheduler *ops, int cpu)
ack@10206 349 {
ack@10206 350 struct csched_pcpu *spc;
keir@21258 351 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 352 unsigned long flags;
ack@10206 353
kfraser@10930 354 /* Allocate per-PCPU info */
kfraser@10930 355 spc = xmalloc(struct csched_pcpu);
kfraser@10930 356 if ( spc == NULL )
keir@21258 357 return NULL;
keir@20308 358 memset(spc, 0, sizeof(*spc));
kfraser@10930 359
keir@21258 360 spin_lock_irqsave(&prv->lock, flags);
ack@10206 361
ack@10206 362 /* Initialize/update system-wide config */
keir@21258 363 prv->credit += CSCHED_CREDITS_PER_ACCT;
keir@21258 364 prv->ncpus++;
keir@21258 365 cpu_set(cpu, prv->cpus);
keir@21453 366 if ( prv->ncpus == 1 )
keir@21258 367 {
keir@21258 368 prv->master = cpu;
keir@21453 369 init_timer(&prv->master_ticker, csched_acct, prv, cpu);
keir@21453 370 set_timer(&prv->master_ticker, NOW() +
keir@21453 371 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
keir@21258 372 }
ack@10206 373
kfraser@14358 374 init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
keir@21453 375 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
keir@21258 376
ack@10206 377 INIT_LIST_HEAD(&spc->runq);
keir@21258 378 spc->runq_sort_last = prv->runq_sort;
keir@20423 379 spc->idle_bias = NR_CPUS - 1;
keir@21258 380 if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
keir@21258 381 per_cpu(schedule_data, cpu).sched_priv = spc;
ack@10206 382
ack@10206 383 /* Start off idling... */
kfraser@14358 384 BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
keir@21258 385 cpu_set(cpu, prv->idlers);
ack@10206 386
keir@21258 387 spin_unlock_irqrestore(&prv->lock, flags);
kfraser@10930 388
keir@21258 389 return spc;
ack@10206 390 }
ack@10206 391
ack@10206 392 #ifndef NDEBUG
ack@10206 393 static inline void
ack@10206 394 __csched_vcpu_check(struct vcpu *vc)
ack@10206 395 {
ack@10206 396 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
ack@10206 397 struct csched_dom * const sdom = svc->sdom;
ack@10206 398
ack@10206 399 BUG_ON( svc->vcpu != vc );
ack@10206 400 BUG_ON( sdom != CSCHED_DOM(vc->domain) );
ack@10206 401 if ( sdom )
ack@10206 402 {
ack@10206 403 BUG_ON( is_idle_vcpu(vc) );
ack@10206 404 BUG_ON( sdom->dom != vc->domain );
ack@10206 405 }
ack@10206 406 else
ack@10206 407 {
ack@10206 408 BUG_ON( !is_idle_vcpu(vc) );
ack@10206 409 }
ack@10206 410
ack@10206 411 CSCHED_STAT_CRANK(vcpu_check);
ack@10206 412 }
ack@10206 413 #define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
ack@10206 414 #else
ack@10206 415 #define CSCHED_VCPU_CHECK(_vc)
ack@10206 416 #endif
ack@10206 417
keir@19331 418 /*
keir@19331 419 * Delay, in microseconds, between migrations of a VCPU between PCPUs.
keir@19331 420 * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
keir@19331 421 * implicit overheads such as cache-warming. 1ms (1000) has been measured
keir@19331 422 * as a good value.
keir@19331 423 */
keir@19331 424 static unsigned int vcpu_migration_delay;
keir@19331 425 integer_param("vcpu_migration_delay", vcpu_migration_delay);
keir@19331 426
keir@19540 427 void set_vcpu_migration_delay(unsigned int delay)
keir@19540 428 {
keir@19540 429 vcpu_migration_delay = delay;
keir@19540 430 }
keir@19540 431
keir@19540 432 unsigned int get_vcpu_migration_delay(void)
keir@19540 433 {
keir@19540 434 return vcpu_migration_delay;
keir@19540 435 }
keir@19540 436
keir@19331 437 static inline int
keir@19331 438 __csched_vcpu_is_cache_hot(struct vcpu *v)
keir@19331 439 {
keir@19346 440 int hot = ((NOW() - v->last_run_time) <
keir@19331 441 ((uint64_t)vcpu_migration_delay * 1000u));
keir@19331 442
keir@19331 443 if ( hot )
keir@19331 444 CSCHED_STAT_CRANK(vcpu_hot);
keir@19331 445
keir@19331 446 return hot;
keir@19331 447 }
keir@19331 448
ack@10206 449 static inline int
ack@12941 450 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
ack@10206 451 {
ack@10206 452 /*
keir@19331 453 * Don't pick up work that's in the peer's scheduling tail or hot on
keir@19331 454 * peer PCPU. Only pick up work that's allowed to run on our CPU.
ack@10206 455 */
keir@19331 456 return !vc->is_running &&
keir@19331 457 !__csched_vcpu_is_cache_hot(vc) &&
keir@19331 458 cpu_isset(dest_cpu, vc->cpu_affinity);
ack@10206 459 }
ack@10206 460
ack@12941 461 static int
keir@21327 462 _csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc, bool_t commit)
ack@12291 463 {
ack@12941 464 cpumask_t cpus;
ack@12941 465 cpumask_t idlers;
keir@21258 466 cpumask_t *online;
ack@12941 467 int cpu;
ack@12941 468
ack@12941 469 /*
ack@12941 470 * Pick from online CPUs in VCPU's affinity mask, giving a
ack@12941 471 * preference to its current processor if it's in there.
ack@12941 472 */
keir@21258 473 online = CSCHED_CPUONLINE(vc->domain->cpupool);
keir@21258 474 cpus_and(cpus, *online, vc->cpu_affinity);
ack@12941 475 cpu = cpu_isset(vc->processor, cpus)
ack@12941 476 ? vc->processor
keir@19314 477 : cycle_cpu(vc->processor, cpus);
ack@12941 478 ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
ack@12291 479
ack@12941 480 /*
ack@12941 481 * Try to find an idle processor within the above constraints.
ack@12941 482 *
ack@12941 483 * In multi-core and multi-threaded CPUs, not all idle execution
ack@12941 484 * vehicles are equal!
ack@12941 485 *
ack@12941 486 * We give preference to the idle execution vehicle with the most
ack@12941 487 * idling neighbours in its grouping. This distributes work across
ack@12941 488 * distinct cores first and guarantees we don't do something stupid
ack@12941 489 * like run two VCPUs on co-hyperthreads while there are idle cores
ack@12941 490 * or sockets.
ack@12941 491 */
keir@21258 492 cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
ack@12941 493 cpu_set(cpu, idlers);
ack@12941 494 cpus_and(cpus, cpus, idlers);
ack@12941 495 cpu_clear(cpu, cpus);
ack@12941 496
ack@12941 497 while ( !cpus_empty(cpus) )
ack@12291 498 {
ack@12941 499 cpumask_t cpu_idlers;
ack@12941 500 cpumask_t nxt_idlers;
keir@19450 501 int nxt, weight_cpu, weight_nxt;
keir@22226 502 int migrate_factor;
ack@12941 503
keir@19314 504 nxt = cycle_cpu(cpu, cpus);
ack@12941 505
keir@19965 506 if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
ack@12941 507 {
keir@22226 508 /* We're on the same socket, so check the busy-ness of threads.
keir@22226 509 * Migrate if # of idlers is less at all */
keir@19965 510 ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
keir@22226 511 migrate_factor = 1;
keir@19965 512 cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
keir@19965 513 cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
ack@12941 514 }
ack@12941 515 else
ack@12941 516 {
keir@22226 517 /* We're on different sockets, so check the busy-ness of cores.
keir@22226 518 * Migrate only if the other core is twice as idle */
keir@19965 519 ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
keir@22226 520 migrate_factor = 2;
keir@19965 521 cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
keir@19965 522 cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
ack@12941 523 }
ack@12941 524
keir@19450 525 weight_cpu = cpus_weight(cpu_idlers);
keir@19450 526 weight_nxt = cpus_weight(nxt_idlers);
keir@22226 527 /* smt_power_savings: consolidate work rather than spreading it */
keir@22226 528 if ( ( sched_smt_power_savings
keir@22226 529 && (weight_cpu > weight_nxt) )
keir@22226 530 || ( !sched_smt_power_savings
keir@22226 531 && (weight_cpu * migrate_factor < weight_nxt) ) )
ack@12941 532 {
keir@20423 533 cpu = cycle_cpu(CSCHED_PCPU(nxt)->idle_bias, nxt_idlers);
keir@20423 534 if ( commit )
keir@20423 535 CSCHED_PCPU(nxt)->idle_bias = cpu;
keir@20423 536 cpus_andnot(cpus, cpus, per_cpu(cpu_sibling_map, cpu));
ack@12941 537 }
ack@12941 538 else
ack@12941 539 {
ack@12941 540 cpus_andnot(cpus, cpus, nxt_idlers);
ack@12941 541 }
ack@12291 542 }
ack@12291 543
ack@12941 544 return cpu;
ack@12291 545 }
ack@12291 546
keir@20423 547 static int
keir@21327 548 csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
keir@20423 549 {
keir@21258 550 return _csched_cpu_pick(ops, vc, 1);
keir@20423 551 }
keir@20423 552
ack@12941 553 static inline void
keir@21258 554 __csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
ack@10206 555 {
ack@10206 556 struct csched_dom * const sdom = svc->sdom;
ack@10206 557 unsigned long flags;
ack@10206 558
keir@21258 559 spin_lock_irqsave(&prv->lock, flags);
ack@10206 560
ack@10206 561 if ( list_empty(&svc->active_vcpu_elem) )
ack@10206 562 {
ack@12941 563 CSCHED_VCPU_STAT_CRANK(svc, state_active);
ack@12941 564 CSCHED_STAT_CRANK(acct_vcpu_active);
ack@10206 565
ack@12941 566 sdom->active_vcpu_count++;
ack@12941 567 list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
keir@22026 568 /* Make weight per-vcpu */
keir@22026 569 prv->weight += sdom->weight;
ack@12941 570 if ( list_empty(&sdom->active_sdom_elem) )
ack@10206 571 {
keir@21258 572 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
ack@12941 573 }
ack@12941 574 }
ack@12941 575
keir@21258 576 spin_unlock_irqrestore(&prv->lock, flags);
ack@12941 577 }
ack@12941 578
ack@12941 579 static inline void
keir@21258 580 __csched_vcpu_acct_stop_locked(struct csched_private *prv,
keir@21258 581 struct csched_vcpu *svc)
ack@12941 582 {
ack@12941 583 struct csched_dom * const sdom = svc->sdom;
ack@10206 584
ack@12941 585 BUG_ON( list_empty(&svc->active_vcpu_elem) );
ack@12941 586
ack@12941 587 CSCHED_VCPU_STAT_CRANK(svc, state_idle);
ack@12941 588 CSCHED_STAT_CRANK(acct_vcpu_idle);
ack@10206 589
keir@22026 590 BUG_ON( prv->weight < sdom->weight );
ack@12941 591 sdom->active_vcpu_count--;
ack@12941 592 list_del_init(&svc->active_vcpu_elem);
keir@22026 593 prv->weight -= sdom->weight;
ack@12941 594 if ( list_empty(&sdom->active_vcpu) )
ack@12941 595 {
ack@12941 596 list_del_init(&sdom->active_sdom_elem);
ack@10206 597 }
ack@12941 598 }
ack@12941 599
ack@12941 600 static void
keir@21258 601 csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
ack@12941 602 {
ack@12941 603 struct csched_vcpu * const svc = CSCHED_VCPU(current);
keir@21327 604 const struct scheduler *ops = per_cpu(scheduler, cpu);
ack@12941 605
ack@12941 606 ASSERT( current->processor == cpu );
ack@12941 607 ASSERT( svc->sdom != NULL );
ack@12048 608
ack@12048 609 /*
ack@12048 610 * If this VCPU's priority was boosted when it last awoke, reset it.
ack@12048 611 * If the VCPU is found here, then it's consuming a non-negligeable
ack@12048 612 * amount of CPU resources and should no longer be boosted.
ack@12048 613 */
ack@12048 614 if ( svc->pri == CSCHED_PRI_TS_BOOST )
ack@12048 615 svc->pri = CSCHED_PRI_TS_UNDER;
ack@10206 616
ack@12941 617 /*
ack@12941 618 * Update credits
ack@12941 619 */
keir@20308 620 if ( !is_idle_vcpu(svc->vcpu) )
keir@20308 621 burn_credits(svc, NOW());
ack@10206 622
ack@12941 623 /*
ack@12941 624 * Put this VCPU and domain back on the active list if it was
ack@12941 625 * idling.
ack@12941 626 *
ack@12941 627 * If it's been active a while, check if we'd be better off
ack@12941 628 * migrating it to run elsewhere (see multi-core and multi-thread
ack@12941 629 * support in csched_cpu_pick()).
ack@12941 630 */
ack@12941 631 if ( list_empty(&svc->active_vcpu_elem) )
ack@10206 632 {
keir@21258 633 __csched_vcpu_acct_start(prv, svc);
ack@12941 634 }
keir@21258 635 else if ( _csched_cpu_pick(ops, current, 0) != cpu )
ack@12941 636 {
ack@12941 637 CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
ack@12941 638 CSCHED_STAT_CRANK(migrate_running);
kfraser@14698 639 set_bit(_VPF_migrating, &current->pause_flags);
ack@12941 640 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
ack@10206 641 }
ack@10206 642 }
ack@10206 643
keir@21258 644 static void *
keir@21327 645 csched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
ack@10206 646 {
ack@10206 647 struct csched_vcpu *svc;
ack@10206 648
ack@10206 649 /* Allocate per-VCPU info */
ack@10206 650 svc = xmalloc(struct csched_vcpu);
kfraser@12284 651 if ( svc == NULL )
keir@21258 652 return NULL;
keir@20308 653 memset(svc, 0, sizeof(*svc));
ack@10206 654
ack@10206 655 INIT_LIST_HEAD(&svc->runq_elem);
ack@10206 656 INIT_LIST_HEAD(&svc->active_vcpu_elem);
keir@21258 657 svc->sdom = dd;
ack@10206 658 svc->vcpu = vc;
ack@10206 659 atomic_set(&svc->credit, 0);
ack@13046 660 svc->flags = 0U;
keir@21258 661 svc->pri = is_idle_domain(vc->domain) ?
keir@21258 662 CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
ack@12941 663 CSCHED_VCPU_STATS_RESET(svc);
keir@21258 664 CSCHED_STAT_CRANK(vcpu_init);
keir@21258 665 return svc;
keir@21258 666 }
ack@10206 667
keir@21258 668 static void
keir@21327 669 csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
keir@21258 670 {
keir@21258 671 struct csched_vcpu *svc = vc->sched_priv;
ack@10206 672
keir@21258 673 if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
keir@21258 674 __runq_insert(vc->processor, svc);
ack@10206 675 }
ack@10206 676
ack@10206 677 static void
keir@21327 678 csched_free_vdata(const struct scheduler *ops, void *priv)
keir@21258 679 {
keir@22324 680 struct csched_vcpu *svc = priv;
keir@22324 681
keir@22324 682 BUG_ON( !list_empty(&svc->runq_elem) );
keir@22324 683
keir@22324 684 xfree(svc);
keir@22324 685 }
keir@22324 686
keir@22324 687 static void
keir@22324 688 csched_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
keir@22324 689 {
keir@21258 690 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22324 691 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@22324 692 struct csched_dom * const sdom = svc->sdom;
keir@21258 693 unsigned long flags;
keir@21258 694
keir@22324 695 CSCHED_STAT_CRANK(vcpu_destroy);
keir@22324 696
keir@21258 697 if ( __vcpu_on_runq(svc) )
keir@21258 698 __runq_remove(svc);
keir@21258 699
keir@21258 700 spin_lock_irqsave(&(prv->lock), flags);
keir@21258 701
keir@21258 702 if ( !list_empty(&svc->active_vcpu_elem) )
keir@21258 703 __csched_vcpu_acct_stop_locked(prv, svc);
keir@21258 704
keir@21258 705 spin_unlock_irqrestore(&(prv->lock), flags);
keir@21258 706
ack@10206 707 BUG_ON( sdom == NULL );
ack@10206 708 BUG_ON( !list_empty(&svc->runq_elem) );
ack@10206 709 }
ack@10206 710
ack@10206 711 static void
keir@21327 712 csched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
ack@10206 713 {
ack@10206 714 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
ack@10206 715
ack@10206 716 CSCHED_STAT_CRANK(vcpu_sleep);
ack@10206 717
ack@10206 718 BUG_ON( is_idle_vcpu(vc) );
ack@10206 719
kaf24@11017 720 if ( per_cpu(schedule_data, vc->processor).curr == vc )
ack@10206 721 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
ack@10206 722 else if ( __vcpu_on_runq(svc) )
ack@10206 723 __runq_remove(svc);
ack@10206 724 }
ack@10206 725
ack@10206 726 static void
keir@21327 727 csched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
ack@10206 728 {
ack@10206 729 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
ack@10206 730 const unsigned int cpu = vc->processor;
ack@10206 731
ack@10206 732 BUG_ON( is_idle_vcpu(vc) );
ack@10206 733
kaf24@11017 734 if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) )
ack@10206 735 {
ack@10206 736 CSCHED_STAT_CRANK(vcpu_wake_running);
ack@10206 737 return;
ack@10206 738 }
ack@10206 739 if ( unlikely(__vcpu_on_runq(svc)) )
ack@10206 740 {
ack@10206 741 CSCHED_STAT_CRANK(vcpu_wake_onrunq);
ack@10206 742 return;
ack@10206 743 }
ack@10206 744
ack@10206 745 if ( likely(vcpu_runnable(vc)) )
ack@10206 746 CSCHED_STAT_CRANK(vcpu_wake_runnable);
ack@10206 747 else
ack@10206 748 CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
ack@10206 749
ack@12048 750 /*
ack@12048 751 * We temporarly boost the priority of awaking VCPUs!
ack@12048 752 *
ack@12048 753 * If this VCPU consumes a non negligeable amount of CPU, it
ack@12048 754 * will eventually find itself in the credit accounting code
ack@12048 755 * path where its priority will be reset to normal.
ack@12048 756 *
ack@12048 757 * If on the other hand the VCPU consumes little CPU and is
ack@12048 758 * blocking and awoken a lot (doing I/O for example), its
ack@12048 759 * priority will remain boosted, optimizing it's wake-to-run
ack@12048 760 * latencies.
ack@12048 761 *
ack@12048 762 * This allows wake-to-run latency sensitive VCPUs to preempt
ack@12048 763 * more CPU resource intensive VCPUs without impacting overall
ack@12048 764 * system fairness.
ack@13046 765 *
ack@13046 766 * The one exception is for VCPUs of capped domains unpausing
ack@13046 767 * after earning credits they had overspent. We don't boost
ack@13046 768 * those.
ack@12048 769 */
ack@13046 770 if ( svc->pri == CSCHED_PRI_TS_UNDER &&
ack@13046 771 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
ack@13046 772 {
ack@12048 773 svc->pri = CSCHED_PRI_TS_BOOST;
ack@13046 774 }
ack@12048 775
ack@10206 776 /* Put the VCPU on the runq and tickle CPUs */
ack@10206 777 __runq_insert(cpu, svc);
ack@10206 778 __runq_tickle(cpu, svc);
ack@10206 779 }
ack@10206 780
keir@21982 781 static void
keir@21982 782 csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
keir@21982 783 {
keir@21982 784 struct csched_vcpu * const sv = CSCHED_VCPU(vc);
keir@21982 785
keir@21982 786 if ( !sched_credit_default_yield )
keir@21982 787 {
keir@21982 788 /* Let the scheduler know that this vcpu is trying to yield */
keir@21982 789 sv->flags |= CSCHED_FLAG_VCPU_YIELD;
keir@21982 790 }
keir@21982 791 }
keir@21982 792
ack@10206 793 static int
ack@10206 794 csched_dom_cntl(
keir@21327 795 const struct scheduler *ops,
ack@10206 796 struct domain *d,
kfraser@11295 797 struct xen_domctl_scheduler_op *op)
ack@10206 798 {
ack@10206 799 struct csched_dom * const sdom = CSCHED_DOM(d);
keir@21258 800 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 801 unsigned long flags;
ack@10206 802
kaf24@11296 803 if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
ack@10206 804 {
kfraser@11295 805 op->u.credit.weight = sdom->weight;
kfraser@11295 806 op->u.credit.cap = sdom->cap;
ack@10206 807 }
ack@10206 808 else
ack@10206 809 {
kaf24@11296 810 ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
ack@10206 811
keir@21258 812 spin_lock_irqsave(&prv->lock, flags);
ack@10206 813
kfraser@11295 814 if ( op->u.credit.weight != 0 )
ack@10206 815 {
ack@10609 816 if ( !list_empty(&sdom->active_sdom_elem) )
ack@10609 817 {
keir@22026 818 prv->weight -= sdom->weight * sdom->active_vcpu_count;
keir@22026 819 prv->weight += op->u.credit.weight * sdom->active_vcpu_count;
ack@10609 820 }
kfraser@11295 821 sdom->weight = op->u.credit.weight;
ack@10206 822 }
ack@10206 823
kfraser@11295 824 if ( op->u.credit.cap != (uint16_t)~0U )
kfraser@11295 825 sdom->cap = op->u.credit.cap;
ack@10206 826
keir@21258 827 spin_unlock_irqrestore(&prv->lock, flags);
ack@10206 828 }
ack@10206 829
ack@10206 830 return 0;
ack@10206 831 }
ack@10206 832
keir@21258 833 static void *
keir@21327 834 csched_alloc_domdata(const struct scheduler *ops, struct domain *dom)
kfraser@12284 835 {
kfraser@12284 836 struct csched_dom *sdom;
kfraser@12284 837
kfraser@12284 838 sdom = xmalloc(struct csched_dom);
kfraser@12284 839 if ( sdom == NULL )
keir@21258 840 return NULL;
keir@20308 841 memset(sdom, 0, sizeof(*sdom));
kfraser@12284 842
kfraser@12284 843 /* Initialize credit and weight */
kfraser@12284 844 INIT_LIST_HEAD(&sdom->active_vcpu);
kfraser@12284 845 sdom->active_vcpu_count = 0;
kfraser@12284 846 INIT_LIST_HEAD(&sdom->active_sdom_elem);
kfraser@12284 847 sdom->dom = dom;
kfraser@12284 848 sdom->weight = CSCHED_DEFAULT_WEIGHT;
kfraser@12284 849 sdom->cap = 0U;
keir@21258 850
keir@21258 851 return (void *)sdom;
keir@21258 852 }
keir@21258 853
keir@21258 854 static int
keir@21327 855 csched_dom_init(const struct scheduler *ops, struct domain *dom)
keir@21258 856 {
keir@21258 857 struct csched_dom *sdom;
keir@21258 858
keir@21258 859 CSCHED_STAT_CRANK(dom_init);
keir@21258 860
keir@21258 861 if ( is_idle_domain(dom) )
keir@21258 862 return 0;
keir@21258 863
keir@21258 864 sdom = csched_alloc_domdata(ops, dom);
keir@21258 865 if ( sdom == NULL )
keir@21258 866 return -ENOMEM;
keir@21258 867
kfraser@12284 868 dom->sched_priv = sdom;
kfraser@12284 869
kfraser@12284 870 return 0;
kfraser@12284 871 }
kfraser@12284 872
ack@10206 873 static void
keir@21327 874 csched_free_domdata(const struct scheduler *ops, void *data)
keir@21258 875 {
keir@21258 876 xfree(data);
keir@21258 877 }
keir@21258 878
keir@21258 879 static void
keir@21327 880 csched_dom_destroy(const struct scheduler *ops, struct domain *dom)
ack@10206 881 {
kaf24@10281 882 CSCHED_STAT_CRANK(dom_destroy);
keir@21258 883 csched_free_domdata(ops, CSCHED_DOM(dom));
ack@10206 884 }
ack@10206 885
ack@10206 886 /*
ack@10206 887 * This is a O(n) optimized sort of the runq.
ack@10206 888 *
ack@10206 889 * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
ack@10206 890 * through the runq and move up any UNDERs that are preceded by OVERS. We
ack@10206 891 * remember the last UNDER to make the move up operation O(1).
ack@10206 892 */
ack@10206 893 static void
keir@21258 894 csched_runq_sort(struct csched_private *prv, unsigned int cpu)
ack@10206 895 {
ack@10206 896 struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
ack@10206 897 struct list_head *runq, *elem, *next, *last_under;
ack@10206 898 struct csched_vcpu *svc_elem;
ack@10206 899 unsigned long flags;
ack@10206 900 int sort_epoch;
ack@10206 901
keir@21258 902 sort_epoch = prv->runq_sort;
ack@10206 903 if ( sort_epoch == spc->runq_sort_last )
ack@10206 904 return;
ack@10206 905
ack@10206 906 spc->runq_sort_last = sort_epoch;
ack@10206 907
keir@22655 908 pcpu_schedule_lock_irqsave(cpu, flags);
ack@10206 909
ack@10206 910 runq = &spc->runq;
ack@10206 911 elem = runq->next;
ack@10206 912 last_under = runq;
ack@10206 913
ack@10206 914 while ( elem != runq )
ack@10206 915 {
ack@10206 916 next = elem->next;
ack@10206 917 svc_elem = __runq_elem(elem);
ack@10206 918
ack@12048 919 if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
ack@10206 920 {
ack@10206 921 /* does elem need to move up the runq? */
ack@10206 922 if ( elem->prev != last_under )
ack@10206 923 {
ack@10206 924 list_del(elem);
ack@10206 925 list_add(elem, last_under);
ack@10206 926 }
ack@10206 927 last_under = elem;
ack@10206 928 }
ack@10206 929
ack@10206 930 elem = next;
ack@10206 931 }
ack@10206 932
keir@22655 933 pcpu_schedule_unlock_irqrestore(cpu, flags);
ack@10206 934 }
ack@10206 935
ack@10206 936 static void
keir@19498 937 csched_acct(void* dummy)
ack@10206 938 {
keir@21258 939 struct csched_private *prv = dummy;
ack@10206 940 unsigned long flags;
ack@10206 941 struct list_head *iter_vcpu, *next_vcpu;
ack@10206 942 struct list_head *iter_sdom, *next_sdom;
ack@10206 943 struct csched_vcpu *svc;
ack@10206 944 struct csched_dom *sdom;
ack@10206 945 uint32_t credit_total;
ack@10206 946 uint32_t weight_total;
ack@10206 947 uint32_t weight_left;
ack@10206 948 uint32_t credit_fair;
ack@10206 949 uint32_t credit_peak;
ack@12240 950 uint32_t credit_cap;
ack@10206 951 int credit_balance;
ack@10206 952 int credit_xtra;
ack@10206 953 int credit;
ack@10206 954
ack@10206 955
keir@21258 956 spin_lock_irqsave(&prv->lock, flags);
ack@10206 957
keir@21258 958 weight_total = prv->weight;
keir@21258 959 credit_total = prv->credit;
ack@10206 960
ack@10206 961 /* Converge balance towards 0 when it drops negative */
keir@21258 962 if ( prv->credit_balance < 0 )
ack@10206 963 {
keir@21258 964 credit_total -= prv->credit_balance;
ack@10206 965 CSCHED_STAT_CRANK(acct_balance);
ack@10206 966 }
ack@10206 967
ack@10206 968 if ( unlikely(weight_total == 0) )
ack@10206 969 {
keir@21258 970 prv->credit_balance = 0;
keir@21258 971 spin_unlock_irqrestore(&prv->lock, flags);
ack@10206 972 CSCHED_STAT_CRANK(acct_no_work);
keir@19498 973 goto out;
ack@10206 974 }
ack@10206 975
ack@10206 976 CSCHED_STAT_CRANK(acct_run);
ack@10206 977
ack@10206 978 weight_left = weight_total;
ack@10206 979 credit_balance = 0;
ack@10206 980 credit_xtra = 0;
ack@12240 981 credit_cap = 0U;
ack@10206 982
keir@21258 983 list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
ack@10206 984 {
ack@10206 985 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
ack@10206 986
ack@10206 987 BUG_ON( is_idle_domain(sdom->dom) );
ack@10206 988 BUG_ON( sdom->active_vcpu_count == 0 );
ack@10206 989 BUG_ON( sdom->weight == 0 );
keir@22026 990 BUG_ON( (sdom->weight * sdom->active_vcpu_count) > weight_left );
ack@10206 991
keir@22026 992 weight_left -= ( sdom->weight * sdom->active_vcpu_count );
ack@10206 993
ack@10206 994 /*
ack@10206 995 * A domain's fair share is computed using its weight in competition
ack@10206 996 * with that of all other active domains.
ack@10206 997 *
ack@10206 998 * At most, a domain can use credits to run all its active VCPUs
ack@10206 999 * for one full accounting period. We allow a domain to earn more
ack@10206 1000 * only when the system-wide credit balance is negative.
ack@10206 1001 */
ack@12071 1002 credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
keir@21258 1003 if ( prv->credit_balance < 0 )
ack@10206 1004 {
keir@22026 1005 credit_peak += ( ( -prv->credit_balance
keir@22026 1006 * sdom->weight
keir@22026 1007 * sdom->active_vcpu_count) +
ack@10206 1008 (weight_total - 1)
ack@10206 1009 ) / weight_total;
ack@10206 1010 }
ack@12240 1011
ack@10206 1012 if ( sdom->cap != 0U )
ack@10206 1013 {
ack@12071 1014 credit_cap = ((sdom->cap * CSCHED_CREDITS_PER_ACCT) + 99) / 100;
ack@10206 1015 if ( credit_cap < credit_peak )
ack@10206 1016 credit_peak = credit_cap;
ack@12240 1017
keir@22026 1018 /* FIXME -- set cap per-vcpu as well...? */
ack@12240 1019 credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 )
ack@12240 1020 ) / sdom->active_vcpu_count;
ack@10206 1021 }
ack@10206 1022
keir@22026 1023 credit_fair = ( ( credit_total
keir@22026 1024 * sdom->weight
keir@22026 1025 * sdom->active_vcpu_count )
keir@22026 1026 + (weight_total - 1)
ack@10206 1027 ) / weight_total;
ack@10206 1028
ack@10206 1029 if ( credit_fair < credit_peak )
ack@10206 1030 {
ack@10206 1031 credit_xtra = 1;
ack@10206 1032 }
ack@10206 1033 else
ack@10206 1034 {
ack@10206 1035 if ( weight_left != 0U )
ack@10206 1036 {
ack@10206 1037 /* Give other domains a chance at unused credits */
ack@10206 1038 credit_total += ( ( ( credit_fair - credit_peak
ack@10206 1039 ) * weight_total
ack@10206 1040 ) + ( weight_left - 1 )
ack@10206 1041 ) / weight_left;
ack@10206 1042 }
ack@10206 1043
ack@10206 1044 if ( credit_xtra )
ack@10206 1045 {
ack@10206 1046 /*
ack@10206 1047 * Lazily keep domains with extra credits at the head of
ack@10206 1048 * the queue to give others a chance at them in future
ack@10206 1049 * accounting periods.
ack@10206 1050 */
ack@10206 1051 CSCHED_STAT_CRANK(acct_reorder);
ack@10206 1052 list_del(&sdom->active_sdom_elem);
keir@21258 1053 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
ack@10206 1054 }
ack@10206 1055
ack@10206 1056 credit_fair = credit_peak;
ack@10206 1057 }
ack@10206 1058
ack@10206 1059 /* Compute fair share per VCPU */
ack@10206 1060 credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
ack@10206 1061 ) / sdom->active_vcpu_count;
ack@10206 1062
ack@10206 1063
ack@10206 1064 list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
ack@10206 1065 {
ack@10206 1066 svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
ack@10206 1067 BUG_ON( sdom != svc->sdom );
ack@10206 1068
ack@10206 1069 /* Increment credit */
ack@10206 1070 atomic_add(credit_fair, &svc->credit);
ack@10206 1071 credit = atomic_read(&svc->credit);
ack@10206 1072
ack@10206 1073 /*
ack@10206 1074 * Recompute priority or, if VCPU is idling, remove it from
ack@10206 1075 * the active list.
ack@10206 1076 */
ack@10206 1077 if ( credit < 0 )
ack@10206 1078 {
ack@13046 1079 svc->pri = CSCHED_PRI_TS_OVER;
ack@10206 1080
ack@13046 1081 /* Park running VCPUs of capped-out domains */
ack@13046 1082 if ( sdom->cap != 0U &&
ack@13046 1083 credit < -credit_cap &&
ack@13046 1084 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
ack@13046 1085 {
ack@13046 1086 CSCHED_STAT_CRANK(vcpu_park);
ack@13046 1087 vcpu_pause_nosync(svc->vcpu);
ack@13046 1088 svc->flags |= CSCHED_FLAG_VCPU_PARKED;
ack@13046 1089 }
ack@13046 1090
ack@13046 1091 /* Lower bound on credits */
ack@12071 1092 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
ack@10206 1093 {
ack@10206 1094 CSCHED_STAT_CRANK(acct_min_credit);
ack@12071 1095 credit = -CSCHED_CREDITS_PER_TSLICE;
ack@10206 1096 atomic_set(&svc->credit, credit);
ack@10206 1097 }
ack@10206 1098 }
ack@10206 1099 else
ack@10206 1100 {
ack@10206 1101 svc->pri = CSCHED_PRI_TS_UNDER;
ack@10206 1102
ack@13046 1103 /* Unpark any capped domains whose credits go positive */
ack@13046 1104 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
ack@13046 1105 {
ack@13046 1106 /*
ack@13046 1107 * It's important to unset the flag AFTER the unpause()
ack@13046 1108 * call to make sure the VCPU's priority is not boosted
ack@13046 1109 * if it is woken up here.
ack@13046 1110 */
ack@13046 1111 CSCHED_STAT_CRANK(vcpu_unpark);
ack@13046 1112 vcpu_unpause(svc->vcpu);
ack@13046 1113 svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
ack@13046 1114 }
ack@13046 1115
ack@13046 1116 /* Upper bound on credits means VCPU stops earning */
ack@12071 1117 if ( credit > CSCHED_CREDITS_PER_TSLICE )
ack@12071 1118 {
keir@21258 1119 __csched_vcpu_acct_stop_locked(prv, svc);
keir@21983 1120 /* Divide credits in half, so that when it starts
keir@21983 1121 * accounting again, it starts a little bit "ahead" */
keir@21983 1122 credit /= 2;
ack@12071 1123 atomic_set(&svc->credit, credit);
ack@12071 1124 }
ack@10206 1125 }
ack@10206 1126
ack@12941 1127 CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
ack@12941 1128 CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
ack@10206 1129 credit_balance += credit;
ack@10206 1130 }
ack@10206 1131 }
ack@10206 1132
keir@21258 1133 prv->credit_balance = credit_balance;
ack@10206 1134
keir@21258 1135 spin_unlock_irqrestore(&prv->lock, flags);
ack@10206 1136
ack@10206 1137 /* Inform each CPU that its runq needs to be sorted */
keir@21258 1138 prv->runq_sort++;
keir@19498 1139
keir@19498 1140 out:
keir@21258 1141 set_timer( &prv->master_ticker, NOW() +
keir@19498 1142 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
ack@10206 1143 }
ack@10206 1144
ack@10206 1145 static void
kfraser@14358 1146 csched_tick(void *_cpu)
ack@10206 1147 {
kfraser@14358 1148 unsigned int cpu = (unsigned long)_cpu;
kfraser@14358 1149 struct csched_pcpu *spc = CSCHED_PCPU(cpu);
keir@21258 1150 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
kfraser@14358 1151
kfraser@14358 1152 spc->tick++;
kfraser@14358 1153
ack@10206 1154 /*
ack@10206 1155 * Accounting for running VCPU
ack@10206 1156 */
ack@12941 1157 if ( !is_idle_vcpu(current) )
keir@21258 1158 csched_vcpu_acct(prv, cpu);
ack@10206 1159
ack@10206 1160 /*
ack@10206 1161 * Check if runq needs to be sorted
ack@10206 1162 *
ack@10206 1163 * Every physical CPU resorts the runq after the accounting master has
ack@10206 1164 * modified priorities. This is a special O(n) sort and runs at most
ack@10206 1165 * once per accounting period (currently 30 milliseconds).
ack@10206 1166 */
keir@21258 1167 csched_runq_sort(prv, cpu);
kfraser@14358 1168
kfraser@14358 1169 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
ack@10206 1170 }
ack@10206 1171
ack@10206 1172 static struct csched_vcpu *
ack@12941 1173 csched_runq_steal(int peer_cpu, int cpu, int pri)
ack@10206 1174 {
ack@12941 1175 const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
ack@12941 1176 const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
ack@12941 1177 struct csched_vcpu *speer;
ack@10206 1178 struct list_head *iter;
ack@10206 1179 struct vcpu *vc;
ack@10206 1180
ack@12941 1181 /*
ack@12941 1182 * Don't steal from an idle CPU's runq because it's about to
ack@12941 1183 * pick up work from it itself.
ack@12941 1184 */
ack@12941 1185 if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
ack@10206 1186 {
ack@12941 1187 list_for_each( iter, &peer_pcpu->runq )
ack@10206 1188 {
ack@12941 1189 speer = __runq_elem(iter);
ack@10206 1190
ack@12941 1191 /*
ack@13046 1192 * If next available VCPU here is not of strictly higher
ack@13046 1193 * priority than ours, this PCPU is useless to us.
ack@12941 1194 */
ack@13046 1195 if ( speer->pri <= pri )
ack@12941 1196 break;
ack@10206 1197
ack@12941 1198 /* Is this VCPU is runnable on our PCPU? */
ack@12941 1199 vc = speer->vcpu;
ack@12941 1200 BUG_ON( is_idle_vcpu(vc) );
ack@10206 1201
ack@12941 1202 if (__csched_vcpu_is_migrateable(vc, cpu))
ack@12941 1203 {
ack@12941 1204 /* We got a candidate. Grab it! */
ack@12941 1205 CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
ack@12941 1206 CSCHED_STAT_CRANK(migrate_queued);
keir@21019 1207 WARN_ON(vc->is_urgent);
ack@12941 1208 __runq_remove(speer);
ack@12941 1209 vc->processor = cpu;
ack@12941 1210 return speer;
ack@12941 1211 }
ack@10206 1212 }
ack@10206 1213 }
ack@10206 1214
ack@12941 1215 CSCHED_STAT_CRANK(steal_peer_idle);
ack@10206 1216 return NULL;
ack@10206 1217 }
ack@10206 1218
ack@10206 1219 static struct csched_vcpu *
keir@21258 1220 csched_load_balance(struct csched_private *prv, int cpu,
keir@21671 1221 struct csched_vcpu *snext, bool_t *stolen)
ack@10206 1222 {
ack@12290 1223 struct csched_vcpu *speer;
ack@12290 1224 cpumask_t workers;
keir@21258 1225 cpumask_t *online;
ack@10206 1226 int peer_cpu;
ack@10206 1227
ack@12941 1228 BUG_ON( cpu != snext->vcpu->processor );
keir@21258 1229 online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
ack@12941 1230
keir@18502 1231 /* If this CPU is going offline we shouldn't steal work. */
keir@21258 1232 if ( unlikely(!cpu_isset(cpu, *online)) )
keir@18502 1233 goto out;
keir@18502 1234
ack@10206 1235 if ( snext->pri == CSCHED_PRI_IDLE )
ack@10206 1236 CSCHED_STAT_CRANK(load_balance_idle);
ack@10206 1237 else if ( snext->pri == CSCHED_PRI_TS_OVER )
ack@10206 1238 CSCHED_STAT_CRANK(load_balance_over);
ack@10206 1239 else
ack@10206 1240 CSCHED_STAT_CRANK(load_balance_other);
ack@10206 1241
ack@12290 1242 /*
ack@12941 1243 * Peek at non-idling CPUs in the system, starting with our
ack@12941 1244 * immediate neighbour.
ack@12290 1245 */
keir@21258 1246 cpus_andnot(workers, *online, prv->idlers);
ack@12290 1247 cpu_clear(cpu, workers);
ack@10206 1248 peer_cpu = cpu;
ack@10206 1249
ack@12290 1250 while ( !cpus_empty(workers) )
ack@10206 1251 {
keir@19314 1252 peer_cpu = cycle_cpu(peer_cpu, workers);
ack@12290 1253 cpu_clear(peer_cpu, workers);
ack@10206 1254
ack@10206 1255 /*
ack@10206 1256 * Get ahold of the scheduler lock for this peer CPU.
ack@10206 1257 *
ack@10206 1258 * Note: We don't spin on this lock but simply try it. Spinning could
ack@10206 1259 * cause a deadlock if the peer CPU is also load balancing and trying
ack@10206 1260 * to lock this CPU.
ack@10206 1261 */
keir@22655 1262 if ( !pcpu_schedule_trylock(peer_cpu) )
ack@10206 1263 {
kaf24@11514 1264 CSCHED_STAT_CRANK(steal_trylock_failed);
kaf24@11514 1265 continue;
kaf24@11514 1266 }
ack@10206 1267
ack@12941 1268 /*
ack@12941 1269 * Any work over there to steal?
ack@12941 1270 */
ack@12941 1271 speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
keir@22655 1272 pcpu_schedule_unlock(peer_cpu);
ack@12941 1273 if ( speer != NULL )
keir@21671 1274 {
keir@21671 1275 *stolen = 1;
ack@12941 1276 return speer;
keir@21671 1277 }
ack@10206 1278 }
ack@10206 1279
keir@18502 1280 out:
ack@12291 1281 /* Failed to find more important work elsewhere... */
ack@10206 1282 __runq_remove(snext);
ack@10206 1283 return snext;
ack@10206 1284 }
ack@10206 1285
ack@10206 1286 /*
ack@10206 1287 * This function is in the critical path. It is designed to be simple and
ack@10206 1288 * fast for the common case.
ack@10206 1289 */
ack@10206 1290 static struct task_slice
keir@21390 1291 csched_schedule(
keir@21390 1292 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
ack@10206 1293 {
ack@10206 1294 const int cpu = smp_processor_id();
ack@10206 1295 struct list_head * const runq = RUNQ(cpu);
ack@10206 1296 struct csched_vcpu * const scurr = CSCHED_VCPU(current);
keir@21258 1297 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 1298 struct csched_vcpu *snext;
ack@10206 1299 struct task_slice ret;
ack@10206 1300
ack@10206 1301 CSCHED_STAT_CRANK(schedule);
ack@10206 1302 CSCHED_VCPU_CHECK(current);
ack@10206 1303
keir@20308 1304 if ( !is_idle_vcpu(scurr->vcpu) )
keir@20308 1305 {
keir@21243 1306 /* Update credits of a non-idle VCPU. */
keir@20308 1307 burn_credits(scurr, now);
keir@20308 1308 scurr->start_time -= now;
keir@20308 1309 }
keir@21243 1310 else
keir@21243 1311 {
keir@21243 1312 /* Re-instate a boosted idle VCPU as normal-idle. */
keir@21243 1313 scurr->pri = CSCHED_PRI_IDLE;
keir@21243 1314 }
keir@20160 1315
ack@10206 1316 /*
ack@10206 1317 * Select next runnable local VCPU (ie top of local runq)
ack@10206 1318 */
ack@10206 1319 if ( vcpu_runnable(current) )
ack@10206 1320 __runq_insert(cpu, scurr);
ack@10206 1321 else
ack@10206 1322 BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
ack@10206 1323
ack@10206 1324 snext = __runq_elem(runq->next);
keir@21671 1325 ret.migrated = 0;
ack@10206 1326
keir@21243 1327 /* Tasklet work (which runs in idle VCPU context) overrides all else. */
keir@21390 1328 if ( tasklet_work_scheduled )
keir@21243 1329 {
keir@21243 1330 snext = CSCHED_VCPU(idle_vcpu[cpu]);
keir@21243 1331 snext->pri = CSCHED_PRI_TS_BOOST;
keir@21243 1332 }
keir@21243 1333
ack@10206 1334 /*
keir@21982 1335 * Clear YIELD flag before scheduling out
keir@21982 1336 */
keir@21982 1337 if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
keir@21982 1338 scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
keir@21982 1339
keir@21982 1340 /*
ack@10206 1341 * SMP Load balance:
ack@10206 1342 *
ack@10206 1343 * If the next highest priority local runnable VCPU has already eaten
ack@10206 1344 * through its credits, look on other PCPUs to see if we have more
ack@10206 1345 * urgent work... If not, csched_load_balance() will return snext, but
ack@10206 1346 * already removed from the runq.
ack@10206 1347 */
ack@10206 1348 if ( snext->pri > CSCHED_PRI_TS_OVER )
ack@10206 1349 __runq_remove(snext);
ack@10206 1350 else
keir@21671 1351 snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
ack@10206 1352
ack@10206 1353 /*
ack@10206 1354 * Update idlers mask if necessary. When we're idling, other CPUs
ack@10206 1355 * will tickle us when they get extra work.
ack@10206 1356 */
ack@10206 1357 if ( snext->pri == CSCHED_PRI_IDLE )
ack@10206 1358 {
keir@21258 1359 if ( !cpu_isset(cpu, prv->idlers) )
keir@21258 1360 cpu_set(cpu, prv->idlers);
ack@10206 1361 }
keir@21258 1362 else if ( cpu_isset(cpu, prv->idlers) )
ack@10206 1363 {
keir@21258 1364 cpu_clear(cpu, prv->idlers);
ack@10206 1365 }
ack@10206 1366
keir@20160 1367 if ( !is_idle_vcpu(snext->vcpu) )
keir@20308 1368 snext->start_time += now;
keir@20160 1369
ack@10206 1370 /*
ack@10206 1371 * Return task to run next...
ack@10206 1372 */
keir@19538 1373 ret.time = (is_idle_vcpu(snext->vcpu) ?
keir@19538 1374 -1 : MILLISECS(CSCHED_MSECS_PER_TSLICE));
ack@10206 1375 ret.task = snext->vcpu;
ack@10206 1376
ack@10206 1377 CSCHED_VCPU_CHECK(ret.task);
ack@10206 1378 return ret;
ack@10206 1379 }
ack@10206 1380
ack@10206 1381 static void
ack@10206 1382 csched_dump_vcpu(struct csched_vcpu *svc)
ack@10206 1383 {
ack@10206 1384 struct csched_dom * const sdom = svc->sdom;
ack@10206 1385
ack@13046 1386 printk("[%i.%i] pri=%i flags=%x cpu=%i",
ack@10206 1387 svc->vcpu->domain->domain_id,
ack@10206 1388 svc->vcpu->vcpu_id,
ack@10206 1389 svc->pri,
ack@13046 1390 svc->flags,
ack@10206 1391 svc->vcpu->processor);
ack@10206 1392
ack@10206 1393 if ( sdom )
ack@10206 1394 {
ack@12941 1395 printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
ack@12941 1396 #ifdef CSCHED_STATS
ack@12941 1397 printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
ack@12941 1398 svc->stats.credit_last,
ack@12941 1399 svc->stats.credit_incr,
ack@12941 1400 svc->stats.state_active,
ack@12941 1401 svc->stats.state_idle,
ack@12941 1402 svc->stats.migrate_q,
ack@12941 1403 svc->stats.migrate_r);
ack@12941 1404 #endif
ack@10206 1405 }
ack@10206 1406
ack@10206 1407 printk("\n");
ack@10206 1408 }
ack@10206 1409
ack@10206 1410 static void
keir@21327 1411 csched_dump_pcpu(const struct scheduler *ops, int cpu)
ack@10206 1412 {
ack@10206 1413 struct list_head *runq, *iter;
ack@10206 1414 struct csched_pcpu *spc;
ack@10206 1415 struct csched_vcpu *svc;
ack@10206 1416 int loop;
keir@20975 1417 #define cpustr keyhandler_scratch
ack@10206 1418
ack@10206 1419 spc = CSCHED_PCPU(cpu);
ack@10206 1420 runq = &spc->runq;
ack@10206 1421
keir@19965 1422 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
keir@18561 1423 printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
keir@19965 1424 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
keir@18561 1425 printk("core=%s\n", cpustr);
ack@10206 1426
ack@10206 1427 /* current VCPU */
kaf24@11017 1428 svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
ack@10206 1429 if ( svc )
ack@10206 1430 {
ack@10206 1431 printk("\trun: ");
ack@10206 1432 csched_dump_vcpu(svc);
ack@10206 1433 }
ack@10206 1434
ack@10206 1435 loop = 0;
ack@10206 1436 list_for_each( iter, runq )
ack@10206 1437 {
ack@10206 1438 svc = __runq_elem(iter);
ack@10206 1439 if ( svc )
ack@10206 1440 {
ack@10206 1441 printk("\t%3d: ", ++loop);
ack@10206 1442 csched_dump_vcpu(svc);
ack@10206 1443 }
ack@10206 1444 }
keir@20975 1445 #undef cpustr
ack@10206 1446 }
ack@10206 1447
ack@10206 1448 static void
keir@21327 1449 csched_dump(const struct scheduler *ops)
ack@10206 1450 {
ack@10206 1451 struct list_head *iter_sdom, *iter_svc;
keir@21258 1452 struct csched_private *prv = CSCHED_PRIV(ops);
ack@10206 1453 int loop;
keir@20975 1454 #define idlers_buf keyhandler_scratch
ack@10206 1455
ack@10206 1456 printk("info:\n"
ack@10206 1457 "\tncpus = %u\n"
ack@10206 1458 "\tmaster = %u\n"
ack@10206 1459 "\tcredit = %u\n"
ack@10206 1460 "\tcredit balance = %d\n"
ack@10206 1461 "\tweight = %u\n"
ack@10206 1462 "\trunq_sort = %u\n"
ack@12071 1463 "\tdefault-weight = %d\n"
ack@12071 1464 "\tmsecs per tick = %dms\n"
keir@20160 1465 "\tcredits per msec = %d\n"
ack@12071 1466 "\tticks per tslice = %d\n"
keir@19331 1467 "\tticks per acct = %d\n"
keir@19331 1468 "\tmigration delay = %uus\n",
keir@21258 1469 prv->ncpus,
keir@21258 1470 prv->master,
keir@21258 1471 prv->credit,
keir@21258 1472 prv->credit_balance,
keir@21258 1473 prv->weight,
keir@21258 1474 prv->runq_sort,
ack@12071 1475 CSCHED_DEFAULT_WEIGHT,
ack@12071 1476 CSCHED_MSECS_PER_TICK,
keir@20160 1477 CSCHED_CREDITS_PER_MSEC,
ack@12071 1478 CSCHED_TICKS_PER_TSLICE,
keir@19331 1479 CSCHED_TICKS_PER_ACCT,
keir@19331 1480 vcpu_migration_delay);
ack@10206 1481
keir@21258 1482 cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
keir@18561 1483 printk("idlers: %s\n", idlers_buf);
ack@10206 1484
ack@10206 1485 printk("active vcpus:\n");
ack@10206 1486 loop = 0;
keir@21258 1487 list_for_each( iter_sdom, &prv->active_sdom )
ack@10206 1488 {
ack@10206 1489 struct csched_dom *sdom;
ack@10206 1490 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
ack@10206 1491
ack@10206 1492 list_for_each( iter_svc, &sdom->active_vcpu )
ack@10206 1493 {
ack@10206 1494 struct csched_vcpu *svc;
ack@10206 1495 svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
ack@10206 1496
ack@10206 1497 printk("\t%3d: ", ++loop);
ack@10206 1498 csched_dump_vcpu(svc);
ack@10206 1499 }
ack@10206 1500 }
keir@20975 1501 #undef idlers_buf
ack@10206 1502 }
ack@10206 1503
keir@21258 1504 static int
keir@21453 1505 csched_init(struct scheduler *ops)
ack@10206 1506 {
keir@21258 1507 struct csched_private *prv;
keir@21258 1508
keir@21258 1509 prv = xmalloc(struct csched_private);
keir@21258 1510 if ( prv == NULL )
keir@21453 1511 return -ENOMEM;
keir@21453 1512
keir@21258 1513 memset(prv, 0, sizeof(*prv));
keir@21258 1514 ops->sched_data = prv;
keir@21258 1515 spin_lock_init(&prv->lock);
keir@21258 1516 INIT_LIST_HEAD(&prv->active_sdom);
keir@21258 1517 prv->master = UINT_MAX;
keir@21258 1518
keir@21258 1519 return 0;
ack@10206 1520 }
ack@10206 1521
keir@21258 1522 static void
keir@21327 1523 csched_deinit(const struct scheduler *ops)
keir@21258 1524 {
keir@21258 1525 struct csched_private *prv;
keir@21258 1526
keir@21258 1527 prv = CSCHED_PRIV(ops);
keir@21258 1528 if ( prv != NULL )
keir@21258 1529 xfree(prv);
keir@21258 1530 }
keir@21258 1531
keir@21327 1532 static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu)
keir@19498 1533 {
keir@19498 1534 struct csched_pcpu *spc;
keir@19498 1535
keir@21258 1536 spc = CSCHED_PCPU(cpu);
keir@19498 1537
keir@19498 1538 stop_timer(&spc->ticker);
keir@19498 1539 }
keir@19498 1540
keir@21327 1541 static void csched_tick_resume(const struct scheduler *ops, unsigned int cpu)
keir@19498 1542 {
keir@19498 1543 struct csched_pcpu *spc;
keir@19498 1544 uint64_t now = NOW();
keir@21258 1545
keir@21258 1546 spc = CSCHED_PCPU(cpu);
keir@19498 1547
keir@19498 1548 set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
keir@19498 1549 - now % MILLISECS(CSCHED_MSECS_PER_TICK) );
keir@19498 1550 }
ack@10206 1551
keir@21258 1552 static struct csched_private _csched_priv;
keir@21258 1553
keir@21327 1554 const struct scheduler sched_credit_def = {
ack@10206 1555 .name = "SMP Credit Scheduler",
ack@10206 1556 .opt_name = "credit",
kfraser@11295 1557 .sched_id = XEN_SCHEDULER_CREDIT,
keir@21258 1558 .sched_data = &_csched_priv,
ack@10206 1559
kfraser@12284 1560 .init_domain = csched_dom_init,
kfraser@12284 1561 .destroy_domain = csched_dom_destroy,
kfraser@12284 1562
keir@21258 1563 .insert_vcpu = csched_vcpu_insert,
keir@22324 1564 .remove_vcpu = csched_vcpu_remove,
kaf24@10281 1565
ack@10206 1566 .sleep = csched_vcpu_sleep,
ack@10206 1567 .wake = csched_vcpu_wake,
keir@21982 1568 .yield = csched_vcpu_yield,
kaf24@10281 1569
kfraser@11295 1570 .adjust = csched_dom_cntl,
ack@10206 1571
ack@12291 1572 .pick_cpu = csched_cpu_pick,
ack@10206 1573 .do_schedule = csched_schedule,
ack@10206 1574
ack@10206 1575 .dump_cpu_state = csched_dump_pcpu,
ack@10206 1576 .dump_settings = csched_dump,
ack@10206 1577 .init = csched_init,
keir@21258 1578 .deinit = csched_deinit,
keir@21258 1579 .alloc_vdata = csched_alloc_vdata,
keir@21258 1580 .free_vdata = csched_free_vdata,
keir@21258 1581 .alloc_pdata = csched_alloc_pdata,
keir@21258 1582 .free_pdata = csched_free_pdata,
keir@21258 1583 .alloc_domdata = csched_alloc_domdata,
keir@21258 1584 .free_domdata = csched_free_domdata,
keir@19498 1585
keir@19498 1586 .tick_suspend = csched_tick_suspend,
keir@19498 1587 .tick_resume = csched_tick_resume,
ack@10206 1588 };