debuggers.hg

view xen/common/sched_credit.c @ 21982:fc29e13f669d

scheduler: Implement yield for credit1

This patch implements 'yield' for credit1. It does this by attempting
to put yielding vcpu behind a single lower-priority vcpu on the
runqueue. If no lower-priority vcpus are in the queue, it will go at
the back (which if the queue is empty, will also be the front).

Runqueues are sorted every 30ms, so that's the longest this priority
inversion can happen.

For workloads with heavy concurrency hazard, and guest which implement
yield-on-spinlock, this patch significantly increases performance and
total system throughput.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Aug 09 16:36:07 2010 +0100 (2010-08-09)
parents 0695a5cdcb42
children 2129cabe5f33
line source
1 /****************************************************************************
2 * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
3 ****************************************************************************
4 *
5 * File: common/csched_credit.c
6 * Author: Emmanuel Ackaouy
7 *
8 * Description: Credit-based SMP CPU scheduler
9 */
11 #include <xen/config.h>
12 #include <xen/init.h>
13 #include <xen/lib.h>
14 #include <xen/sched.h>
15 #include <xen/domain.h>
16 #include <xen/delay.h>
17 #include <xen/event.h>
18 #include <xen/time.h>
19 #include <xen/perfc.h>
20 #include <xen/sched-if.h>
21 #include <xen/softirq.h>
22 #include <asm/atomic.h>
23 #include <xen/errno.h>
24 #include <xen/keyhandler.h>
26 /*
27 * CSCHED_STATS
28 *
29 * Manage very basic per-vCPU counters and stats.
30 *
31 * Useful for debugging live systems. The stats are displayed
32 * with runq dumps ('r' on the Xen console).
33 */
34 #ifdef PERF_COUNTERS
35 #define CSCHED_STATS
36 #endif
39 /*
40 * Basic constants
41 */
42 #define CSCHED_DEFAULT_WEIGHT 256
43 #define CSCHED_TICKS_PER_TSLICE 3
44 #define CSCHED_TICKS_PER_ACCT 3
45 #define CSCHED_MSECS_PER_TICK 10
46 #define CSCHED_MSECS_PER_TSLICE \
47 (CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
48 #define CSCHED_CREDITS_PER_MSEC 10
49 #define CSCHED_CREDITS_PER_TSLICE \
50 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TSLICE)
51 #define CSCHED_CREDITS_PER_ACCT \
52 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_ACCT)
55 /*
56 * Priorities
57 */
58 #define CSCHED_PRI_TS_BOOST 0 /* time-share waking up */
59 #define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
60 #define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
61 #define CSCHED_PRI_IDLE -64 /* idle */
64 /*
65 * Flags
66 */
67 #define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
68 #define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
71 /*
72 * Useful macros
73 */
74 #define CSCHED_PRIV(_ops) \
75 ((struct csched_private *)((_ops)->sched_data))
76 #define CSCHED_PCPU(_c) \
77 ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
78 #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
79 #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
80 #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
81 #define CSCHED_CPUONLINE(_pool) \
82 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
85 /*
86 * Stats
87 */
88 #define CSCHED_STAT_CRANK(_X) (perfc_incr(_X))
90 #ifdef CSCHED_STATS
92 #define CSCHED_VCPU_STATS_RESET(_V) \
93 do \
94 { \
95 memset(&(_V)->stats, 0, sizeof((_V)->stats)); \
96 } while ( 0 )
98 #define CSCHED_VCPU_STAT_CRANK(_V, _X) (((_V)->stats._X)++)
100 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) (((_V)->stats._X) = (_Y))
102 #else /* CSCHED_STATS */
104 #define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 )
105 #define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 )
106 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 )
108 #endif /* CSCHED_STATS */
111 /*
112 * Boot parameters
113 */
114 int sched_credit_default_yield = 0;
115 boolean_param("sched_credit_default_yield", sched_credit_default_yield);
117 /*
118 * Physical CPU
119 */
120 struct csched_pcpu {
121 struct list_head runq;
122 uint32_t runq_sort_last;
123 struct timer ticker;
124 unsigned int tick;
125 unsigned int idle_bias;
126 };
128 /*
129 * Virtual CPU
130 */
131 struct csched_vcpu {
132 struct list_head runq_elem;
133 struct list_head active_vcpu_elem;
134 struct csched_dom *sdom;
135 struct vcpu *vcpu;
136 atomic_t credit;
137 s_time_t start_time; /* When we were scheduled (used for credit) */
138 uint16_t flags;
139 int16_t pri;
140 #ifdef CSCHED_STATS
141 struct {
142 int credit_last;
143 uint32_t credit_incr;
144 uint32_t state_active;
145 uint32_t state_idle;
146 uint32_t migrate_q;
147 uint32_t migrate_r;
148 } stats;
149 #endif
150 };
152 /*
153 * Domain
154 */
155 struct csched_dom {
156 struct list_head active_vcpu;
157 struct list_head active_sdom_elem;
158 struct domain *dom;
159 uint16_t active_vcpu_count;
160 uint16_t weight;
161 uint16_t cap;
162 };
164 /*
165 * System-wide private data
166 */
167 struct csched_private {
168 spinlock_t lock;
169 struct list_head active_sdom;
170 uint32_t ncpus;
171 struct timer master_ticker;
172 unsigned int master;
173 cpumask_t idlers;
174 cpumask_t cpus;
175 uint32_t weight;
176 uint32_t credit;
177 int credit_balance;
178 uint32_t runq_sort;
179 };
181 static void csched_tick(void *_cpu);
182 static void csched_acct(void *dummy);
184 static inline int
185 __vcpu_on_runq(struct csched_vcpu *svc)
186 {
187 return !list_empty(&svc->runq_elem);
188 }
190 static inline struct csched_vcpu *
191 __runq_elem(struct list_head *elem)
192 {
193 return list_entry(elem, struct csched_vcpu, runq_elem);
194 }
196 static inline void
197 __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
198 {
199 const struct list_head * const runq = RUNQ(cpu);
200 struct list_head *iter;
202 BUG_ON( __vcpu_on_runq(svc) );
203 BUG_ON( cpu != svc->vcpu->processor );
205 list_for_each( iter, runq )
206 {
207 const struct csched_vcpu * const iter_svc = __runq_elem(iter);
208 if ( svc->pri > iter_svc->pri )
209 break;
210 }
212 /* If the vcpu yielded, try to put it behind one lower-priority
213 * runnable vcpu if we can. The next runq_sort will bring it forward
214 * within 30ms if the queue too long. */
215 if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
216 && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
217 {
218 iter=iter->next;
220 /* Some sanity checks */
221 BUG_ON(iter == runq);
222 }
224 list_add_tail(&svc->runq_elem, iter);
225 }
227 static inline void
228 __runq_remove(struct csched_vcpu *svc)
229 {
230 BUG_ON( !__vcpu_on_runq(svc) );
231 list_del_init(&svc->runq_elem);
232 }
234 static void burn_credits(struct csched_vcpu *svc, s_time_t now)
235 {
236 s_time_t delta;
237 unsigned int credits;
239 /* Assert svc is current */
240 ASSERT(svc==CSCHED_VCPU(per_cpu(schedule_data, svc->vcpu->processor).curr));
242 if ( (delta = now - svc->start_time) <= 0 )
243 return;
245 credits = (delta*CSCHED_CREDITS_PER_MSEC + MILLISECS(1)/2) / MILLISECS(1);
246 atomic_sub(credits, &svc->credit);
247 svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
248 }
250 static int opt_tickle_one_idle __read_mostly = 1;
251 boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
253 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
255 static inline void
256 __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
257 {
258 struct csched_vcpu * const cur =
259 CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
260 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
261 cpumask_t mask;
263 ASSERT(cur);
264 cpus_clear(mask);
266 /* If strictly higher priority than current VCPU, signal the CPU */
267 if ( new->pri > cur->pri )
268 {
269 if ( cur->pri == CSCHED_PRI_IDLE )
270 CSCHED_STAT_CRANK(tickle_local_idler);
271 else if ( cur->pri == CSCHED_PRI_TS_OVER )
272 CSCHED_STAT_CRANK(tickle_local_over);
273 else if ( cur->pri == CSCHED_PRI_TS_UNDER )
274 CSCHED_STAT_CRANK(tickle_local_under);
275 else
276 CSCHED_STAT_CRANK(tickle_local_other);
278 cpu_set(cpu, mask);
279 }
281 /*
282 * If this CPU has at least two runnable VCPUs, we tickle any idlers to
283 * let them know there is runnable work in the system...
284 */
285 if ( cur->pri > CSCHED_PRI_IDLE )
286 {
287 if ( cpus_empty(prv->idlers) )
288 {
289 CSCHED_STAT_CRANK(tickle_idlers_none);
290 }
291 else
292 {
293 cpumask_t idle_mask;
295 cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
296 if ( !cpus_empty(idle_mask) )
297 {
298 CSCHED_STAT_CRANK(tickle_idlers_some);
299 if ( opt_tickle_one_idle )
300 {
301 this_cpu(last_tickle_cpu) =
302 cycle_cpu(this_cpu(last_tickle_cpu), idle_mask);
303 cpu_set(this_cpu(last_tickle_cpu), mask);
304 }
305 else
306 cpus_or(mask, mask, idle_mask);
307 }
308 cpus_and(mask, mask, new->vcpu->cpu_affinity);
309 }
310 }
312 /* Send scheduler interrupts to designated CPUs */
313 if ( !cpus_empty(mask) )
314 cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
315 }
317 static void
318 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
319 {
320 struct csched_private *prv = CSCHED_PRIV(ops);
321 struct csched_pcpu *spc = pcpu;
322 unsigned long flags;
324 if ( spc == NULL )
325 return;
327 spin_lock_irqsave(&prv->lock, flags);
329 prv->credit -= CSCHED_CREDITS_PER_ACCT;
330 prv->ncpus--;
331 cpu_clear(cpu, prv->idlers);
332 cpu_clear(cpu, prv->cpus);
333 if ( (prv->master == cpu) && (prv->ncpus > 0) )
334 {
335 prv->master = first_cpu(prv->cpus);
336 migrate_timer(&prv->master_ticker, prv->master);
337 }
338 kill_timer(&spc->ticker);
339 if ( prv->ncpus == 0 )
340 kill_timer(&prv->master_ticker);
342 spin_unlock_irqrestore(&prv->lock, flags);
344 xfree(spc);
345 }
347 static void *
348 csched_alloc_pdata(const struct scheduler *ops, int cpu)
349 {
350 struct csched_pcpu *spc;
351 struct csched_private *prv = CSCHED_PRIV(ops);
352 unsigned long flags;
354 /* Allocate per-PCPU info */
355 spc = xmalloc(struct csched_pcpu);
356 if ( spc == NULL )
357 return NULL;
358 memset(spc, 0, sizeof(*spc));
360 spin_lock_irqsave(&prv->lock, flags);
362 /* Initialize/update system-wide config */
363 prv->credit += CSCHED_CREDITS_PER_ACCT;
364 prv->ncpus++;
365 cpu_set(cpu, prv->cpus);
366 if ( prv->ncpus == 1 )
367 {
368 prv->master = cpu;
369 init_timer(&prv->master_ticker, csched_acct, prv, cpu);
370 set_timer(&prv->master_ticker, NOW() +
371 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
372 }
374 init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
375 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
377 INIT_LIST_HEAD(&spc->runq);
378 spc->runq_sort_last = prv->runq_sort;
379 spc->idle_bias = NR_CPUS - 1;
380 if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
381 per_cpu(schedule_data, cpu).sched_priv = spc;
383 /* Start off idling... */
384 BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
385 cpu_set(cpu, prv->idlers);
387 spin_unlock_irqrestore(&prv->lock, flags);
389 return spc;
390 }
392 #ifndef NDEBUG
393 static inline void
394 __csched_vcpu_check(struct vcpu *vc)
395 {
396 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
397 struct csched_dom * const sdom = svc->sdom;
399 BUG_ON( svc->vcpu != vc );
400 BUG_ON( sdom != CSCHED_DOM(vc->domain) );
401 if ( sdom )
402 {
403 BUG_ON( is_idle_vcpu(vc) );
404 BUG_ON( sdom->dom != vc->domain );
405 }
406 else
407 {
408 BUG_ON( !is_idle_vcpu(vc) );
409 }
411 CSCHED_STAT_CRANK(vcpu_check);
412 }
413 #define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
414 #else
415 #define CSCHED_VCPU_CHECK(_vc)
416 #endif
418 /*
419 * Delay, in microseconds, between migrations of a VCPU between PCPUs.
420 * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
421 * implicit overheads such as cache-warming. 1ms (1000) has been measured
422 * as a good value.
423 */
424 static unsigned int vcpu_migration_delay;
425 integer_param("vcpu_migration_delay", vcpu_migration_delay);
427 void set_vcpu_migration_delay(unsigned int delay)
428 {
429 vcpu_migration_delay = delay;
430 }
432 unsigned int get_vcpu_migration_delay(void)
433 {
434 return vcpu_migration_delay;
435 }
437 static inline int
438 __csched_vcpu_is_cache_hot(struct vcpu *v)
439 {
440 int hot = ((NOW() - v->last_run_time) <
441 ((uint64_t)vcpu_migration_delay * 1000u));
443 if ( hot )
444 CSCHED_STAT_CRANK(vcpu_hot);
446 return hot;
447 }
449 static inline int
450 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
451 {
452 /*
453 * Don't pick up work that's in the peer's scheduling tail or hot on
454 * peer PCPU. Only pick up work that's allowed to run on our CPU.
455 */
456 return !vc->is_running &&
457 !__csched_vcpu_is_cache_hot(vc) &&
458 cpu_isset(dest_cpu, vc->cpu_affinity);
459 }
461 static int
462 _csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc, bool_t commit)
463 {
464 cpumask_t cpus;
465 cpumask_t idlers;
466 cpumask_t *online;
467 int cpu;
469 /*
470 * Pick from online CPUs in VCPU's affinity mask, giving a
471 * preference to its current processor if it's in there.
472 */
473 online = CSCHED_CPUONLINE(vc->domain->cpupool);
474 cpus_and(cpus, *online, vc->cpu_affinity);
475 cpu = cpu_isset(vc->processor, cpus)
476 ? vc->processor
477 : cycle_cpu(vc->processor, cpus);
478 ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
480 /*
481 * Try to find an idle processor within the above constraints.
482 *
483 * In multi-core and multi-threaded CPUs, not all idle execution
484 * vehicles are equal!
485 *
486 * We give preference to the idle execution vehicle with the most
487 * idling neighbours in its grouping. This distributes work across
488 * distinct cores first and guarantees we don't do something stupid
489 * like run two VCPUs on co-hyperthreads while there are idle cores
490 * or sockets.
491 */
492 cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
493 cpu_set(cpu, idlers);
494 cpus_and(cpus, cpus, idlers);
495 cpu_clear(cpu, cpus);
497 while ( !cpus_empty(cpus) )
498 {
499 cpumask_t cpu_idlers;
500 cpumask_t nxt_idlers;
501 int nxt, weight_cpu, weight_nxt;
503 nxt = cycle_cpu(cpu, cpus);
505 if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
506 {
507 ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
508 cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
509 cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
510 }
511 else
512 {
513 ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
514 cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
515 cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
516 }
518 weight_cpu = cpus_weight(cpu_idlers);
519 weight_nxt = cpus_weight(nxt_idlers);
520 if ( ( (weight_cpu < weight_nxt) ^ sched_smt_power_savings )
521 && (weight_cpu != weight_nxt) )
522 {
523 cpu = cycle_cpu(CSCHED_PCPU(nxt)->idle_bias, nxt_idlers);
524 if ( commit )
525 CSCHED_PCPU(nxt)->idle_bias = cpu;
526 cpus_andnot(cpus, cpus, per_cpu(cpu_sibling_map, cpu));
527 }
528 else
529 {
530 cpus_andnot(cpus, cpus, nxt_idlers);
531 }
532 }
534 return cpu;
535 }
537 static int
538 csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
539 {
540 return _csched_cpu_pick(ops, vc, 1);
541 }
543 static inline void
544 __csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
545 {
546 struct csched_dom * const sdom = svc->sdom;
547 unsigned long flags;
549 spin_lock_irqsave(&prv->lock, flags);
551 if ( list_empty(&svc->active_vcpu_elem) )
552 {
553 CSCHED_VCPU_STAT_CRANK(svc, state_active);
554 CSCHED_STAT_CRANK(acct_vcpu_active);
556 sdom->active_vcpu_count++;
557 list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
558 if ( list_empty(&sdom->active_sdom_elem) )
559 {
560 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
561 prv->weight += sdom->weight;
562 }
563 }
565 spin_unlock_irqrestore(&prv->lock, flags);
566 }
568 static inline void
569 __csched_vcpu_acct_stop_locked(struct csched_private *prv,
570 struct csched_vcpu *svc)
571 {
572 struct csched_dom * const sdom = svc->sdom;
574 BUG_ON( list_empty(&svc->active_vcpu_elem) );
576 CSCHED_VCPU_STAT_CRANK(svc, state_idle);
577 CSCHED_STAT_CRANK(acct_vcpu_idle);
579 sdom->active_vcpu_count--;
580 list_del_init(&svc->active_vcpu_elem);
581 if ( list_empty(&sdom->active_vcpu) )
582 {
583 BUG_ON( prv->weight < sdom->weight );
584 list_del_init(&sdom->active_sdom_elem);
585 prv->weight -= sdom->weight;
586 }
587 }
589 static void
590 csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
591 {
592 struct csched_vcpu * const svc = CSCHED_VCPU(current);
593 const struct scheduler *ops = per_cpu(scheduler, cpu);
595 ASSERT( current->processor == cpu );
596 ASSERT( svc->sdom != NULL );
598 /*
599 * If this VCPU's priority was boosted when it last awoke, reset it.
600 * If the VCPU is found here, then it's consuming a non-negligeable
601 * amount of CPU resources and should no longer be boosted.
602 */
603 if ( svc->pri == CSCHED_PRI_TS_BOOST )
604 svc->pri = CSCHED_PRI_TS_UNDER;
606 /*
607 * Update credits
608 */
609 if ( !is_idle_vcpu(svc->vcpu) )
610 burn_credits(svc, NOW());
612 /*
613 * Put this VCPU and domain back on the active list if it was
614 * idling.
615 *
616 * If it's been active a while, check if we'd be better off
617 * migrating it to run elsewhere (see multi-core and multi-thread
618 * support in csched_cpu_pick()).
619 */
620 if ( list_empty(&svc->active_vcpu_elem) )
621 {
622 __csched_vcpu_acct_start(prv, svc);
623 }
624 else if ( _csched_cpu_pick(ops, current, 0) != cpu )
625 {
626 CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
627 CSCHED_STAT_CRANK(migrate_running);
628 set_bit(_VPF_migrating, &current->pause_flags);
629 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
630 }
631 }
633 static void *
634 csched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
635 {
636 struct csched_vcpu *svc;
638 /* Allocate per-VCPU info */
639 svc = xmalloc(struct csched_vcpu);
640 if ( svc == NULL )
641 return NULL;
642 memset(svc, 0, sizeof(*svc));
644 INIT_LIST_HEAD(&svc->runq_elem);
645 INIT_LIST_HEAD(&svc->active_vcpu_elem);
646 svc->sdom = dd;
647 svc->vcpu = vc;
648 atomic_set(&svc->credit, 0);
649 svc->flags = 0U;
650 svc->pri = is_idle_domain(vc->domain) ?
651 CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
652 CSCHED_VCPU_STATS_RESET(svc);
653 CSCHED_STAT_CRANK(vcpu_init);
654 return svc;
655 }
657 static void
658 csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
659 {
660 struct csched_vcpu *svc = vc->sched_priv;
662 if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
663 __runq_insert(vc->processor, svc);
664 }
666 static void
667 csched_free_vdata(const struct scheduler *ops, void *priv)
668 {
669 struct csched_private *prv = CSCHED_PRIV(ops);
670 struct csched_vcpu *svc = priv;
671 unsigned long flags;
673 if ( __vcpu_on_runq(svc) )
674 __runq_remove(svc);
676 spin_lock_irqsave(&(prv->lock), flags);
678 if ( !list_empty(&svc->active_vcpu_elem) )
679 __csched_vcpu_acct_stop_locked(prv, svc);
681 spin_unlock_irqrestore(&(prv->lock), flags);
683 xfree(svc);
684 }
686 static void
687 csched_vcpu_destroy(const struct scheduler *ops, struct vcpu *vc)
688 {
689 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
690 struct csched_dom * const sdom = svc->sdom;
692 CSCHED_STAT_CRANK(vcpu_destroy);
694 BUG_ON( sdom == NULL );
695 BUG_ON( !list_empty(&svc->runq_elem) );
697 csched_free_vdata(ops, svc);
698 }
700 static void
701 csched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
702 {
703 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
705 CSCHED_STAT_CRANK(vcpu_sleep);
707 BUG_ON( is_idle_vcpu(vc) );
709 if ( per_cpu(schedule_data, vc->processor).curr == vc )
710 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
711 else if ( __vcpu_on_runq(svc) )
712 __runq_remove(svc);
713 }
715 static void
716 csched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
717 {
718 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
719 const unsigned int cpu = vc->processor;
721 BUG_ON( is_idle_vcpu(vc) );
723 if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) )
724 {
725 CSCHED_STAT_CRANK(vcpu_wake_running);
726 return;
727 }
728 if ( unlikely(__vcpu_on_runq(svc)) )
729 {
730 CSCHED_STAT_CRANK(vcpu_wake_onrunq);
731 return;
732 }
734 if ( likely(vcpu_runnable(vc)) )
735 CSCHED_STAT_CRANK(vcpu_wake_runnable);
736 else
737 CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
739 /*
740 * We temporarly boost the priority of awaking VCPUs!
741 *
742 * If this VCPU consumes a non negligeable amount of CPU, it
743 * will eventually find itself in the credit accounting code
744 * path where its priority will be reset to normal.
745 *
746 * If on the other hand the VCPU consumes little CPU and is
747 * blocking and awoken a lot (doing I/O for example), its
748 * priority will remain boosted, optimizing it's wake-to-run
749 * latencies.
750 *
751 * This allows wake-to-run latency sensitive VCPUs to preempt
752 * more CPU resource intensive VCPUs without impacting overall
753 * system fairness.
754 *
755 * The one exception is for VCPUs of capped domains unpausing
756 * after earning credits they had overspent. We don't boost
757 * those.
758 */
759 if ( svc->pri == CSCHED_PRI_TS_UNDER &&
760 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
761 {
762 svc->pri = CSCHED_PRI_TS_BOOST;
763 }
765 /* Put the VCPU on the runq and tickle CPUs */
766 __runq_insert(cpu, svc);
767 __runq_tickle(cpu, svc);
768 }
770 static void
771 csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
772 {
773 struct csched_vcpu * const sv = CSCHED_VCPU(vc);
775 if ( !sched_credit_default_yield )
776 {
777 /* Let the scheduler know that this vcpu is trying to yield */
778 sv->flags |= CSCHED_FLAG_VCPU_YIELD;
779 }
780 }
782 static int
783 csched_dom_cntl(
784 const struct scheduler *ops,
785 struct domain *d,
786 struct xen_domctl_scheduler_op *op)
787 {
788 struct csched_dom * const sdom = CSCHED_DOM(d);
789 struct csched_private *prv = CSCHED_PRIV(ops);
790 unsigned long flags;
792 if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
793 {
794 op->u.credit.weight = sdom->weight;
795 op->u.credit.cap = sdom->cap;
796 }
797 else
798 {
799 ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
801 spin_lock_irqsave(&prv->lock, flags);
803 if ( op->u.credit.weight != 0 )
804 {
805 if ( !list_empty(&sdom->active_sdom_elem) )
806 {
807 prv->weight -= sdom->weight;
808 prv->weight += op->u.credit.weight;
809 }
810 sdom->weight = op->u.credit.weight;
811 }
813 if ( op->u.credit.cap != (uint16_t)~0U )
814 sdom->cap = op->u.credit.cap;
816 spin_unlock_irqrestore(&prv->lock, flags);
817 }
819 return 0;
820 }
822 static void *
823 csched_alloc_domdata(const struct scheduler *ops, struct domain *dom)
824 {
825 struct csched_dom *sdom;
827 sdom = xmalloc(struct csched_dom);
828 if ( sdom == NULL )
829 return NULL;
830 memset(sdom, 0, sizeof(*sdom));
832 /* Initialize credit and weight */
833 INIT_LIST_HEAD(&sdom->active_vcpu);
834 sdom->active_vcpu_count = 0;
835 INIT_LIST_HEAD(&sdom->active_sdom_elem);
836 sdom->dom = dom;
837 sdom->weight = CSCHED_DEFAULT_WEIGHT;
838 sdom->cap = 0U;
840 return (void *)sdom;
841 }
843 static int
844 csched_dom_init(const struct scheduler *ops, struct domain *dom)
845 {
846 struct csched_dom *sdom;
848 CSCHED_STAT_CRANK(dom_init);
850 if ( is_idle_domain(dom) )
851 return 0;
853 sdom = csched_alloc_domdata(ops, dom);
854 if ( sdom == NULL )
855 return -ENOMEM;
857 dom->sched_priv = sdom;
859 return 0;
860 }
862 static void
863 csched_free_domdata(const struct scheduler *ops, void *data)
864 {
865 xfree(data);
866 }
868 static void
869 csched_dom_destroy(const struct scheduler *ops, struct domain *dom)
870 {
871 CSCHED_STAT_CRANK(dom_destroy);
872 csched_free_domdata(ops, CSCHED_DOM(dom));
873 }
875 /*
876 * This is a O(n) optimized sort of the runq.
877 *
878 * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
879 * through the runq and move up any UNDERs that are preceded by OVERS. We
880 * remember the last UNDER to make the move up operation O(1).
881 */
882 static void
883 csched_runq_sort(struct csched_private *prv, unsigned int cpu)
884 {
885 struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
886 struct list_head *runq, *elem, *next, *last_under;
887 struct csched_vcpu *svc_elem;
888 unsigned long flags;
889 int sort_epoch;
891 sort_epoch = prv->runq_sort;
892 if ( sort_epoch == spc->runq_sort_last )
893 return;
895 spc->runq_sort_last = sort_epoch;
897 spin_lock_irqsave(per_cpu(schedule_data, cpu).schedule_lock, flags);
899 runq = &spc->runq;
900 elem = runq->next;
901 last_under = runq;
903 while ( elem != runq )
904 {
905 next = elem->next;
906 svc_elem = __runq_elem(elem);
908 if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
909 {
910 /* does elem need to move up the runq? */
911 if ( elem->prev != last_under )
912 {
913 list_del(elem);
914 list_add(elem, last_under);
915 }
916 last_under = elem;
917 }
919 elem = next;
920 }
922 spin_unlock_irqrestore(per_cpu(schedule_data, cpu).schedule_lock, flags);
923 }
925 static void
926 csched_acct(void* dummy)
927 {
928 struct csched_private *prv = dummy;
929 unsigned long flags;
930 struct list_head *iter_vcpu, *next_vcpu;
931 struct list_head *iter_sdom, *next_sdom;
932 struct csched_vcpu *svc;
933 struct csched_dom *sdom;
934 uint32_t credit_total;
935 uint32_t weight_total;
936 uint32_t weight_left;
937 uint32_t credit_fair;
938 uint32_t credit_peak;
939 uint32_t credit_cap;
940 int credit_balance;
941 int credit_xtra;
942 int credit;
945 spin_lock_irqsave(&prv->lock, flags);
947 weight_total = prv->weight;
948 credit_total = prv->credit;
950 /* Converge balance towards 0 when it drops negative */
951 if ( prv->credit_balance < 0 )
952 {
953 credit_total -= prv->credit_balance;
954 CSCHED_STAT_CRANK(acct_balance);
955 }
957 if ( unlikely(weight_total == 0) )
958 {
959 prv->credit_balance = 0;
960 spin_unlock_irqrestore(&prv->lock, flags);
961 CSCHED_STAT_CRANK(acct_no_work);
962 goto out;
963 }
965 CSCHED_STAT_CRANK(acct_run);
967 weight_left = weight_total;
968 credit_balance = 0;
969 credit_xtra = 0;
970 credit_cap = 0U;
972 list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
973 {
974 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
976 BUG_ON( is_idle_domain(sdom->dom) );
977 BUG_ON( sdom->active_vcpu_count == 0 );
978 BUG_ON( sdom->weight == 0 );
979 BUG_ON( sdom->weight > weight_left );
981 weight_left -= sdom->weight;
983 /*
984 * A domain's fair share is computed using its weight in competition
985 * with that of all other active domains.
986 *
987 * At most, a domain can use credits to run all its active VCPUs
988 * for one full accounting period. We allow a domain to earn more
989 * only when the system-wide credit balance is negative.
990 */
991 credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
992 if ( prv->credit_balance < 0 )
993 {
994 credit_peak += ( ( -prv->credit_balance * sdom->weight) +
995 (weight_total - 1)
996 ) / weight_total;
997 }
999 if ( sdom->cap != 0U )
1001 credit_cap = ((sdom->cap * CSCHED_CREDITS_PER_ACCT) + 99) / 100;
1002 if ( credit_cap < credit_peak )
1003 credit_peak = credit_cap;
1005 credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 )
1006 ) / sdom->active_vcpu_count;
1009 credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
1010 ) / weight_total;
1012 if ( credit_fair < credit_peak )
1014 credit_xtra = 1;
1016 else
1018 if ( weight_left != 0U )
1020 /* Give other domains a chance at unused credits */
1021 credit_total += ( ( ( credit_fair - credit_peak
1022 ) * weight_total
1023 ) + ( weight_left - 1 )
1024 ) / weight_left;
1027 if ( credit_xtra )
1029 /*
1030 * Lazily keep domains with extra credits at the head of
1031 * the queue to give others a chance at them in future
1032 * accounting periods.
1033 */
1034 CSCHED_STAT_CRANK(acct_reorder);
1035 list_del(&sdom->active_sdom_elem);
1036 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
1039 credit_fair = credit_peak;
1042 /* Compute fair share per VCPU */
1043 credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
1044 ) / sdom->active_vcpu_count;
1047 list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
1049 svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
1050 BUG_ON( sdom != svc->sdom );
1052 /* Increment credit */
1053 atomic_add(credit_fair, &svc->credit);
1054 credit = atomic_read(&svc->credit);
1056 /*
1057 * Recompute priority or, if VCPU is idling, remove it from
1058 * the active list.
1059 */
1060 if ( credit < 0 )
1062 svc->pri = CSCHED_PRI_TS_OVER;
1064 /* Park running VCPUs of capped-out domains */
1065 if ( sdom->cap != 0U &&
1066 credit < -credit_cap &&
1067 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
1069 CSCHED_STAT_CRANK(vcpu_park);
1070 vcpu_pause_nosync(svc->vcpu);
1071 svc->flags |= CSCHED_FLAG_VCPU_PARKED;
1074 /* Lower bound on credits */
1075 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
1077 CSCHED_STAT_CRANK(acct_min_credit);
1078 credit = -CSCHED_CREDITS_PER_TSLICE;
1079 atomic_set(&svc->credit, credit);
1082 else
1084 svc->pri = CSCHED_PRI_TS_UNDER;
1086 /* Unpark any capped domains whose credits go positive */
1087 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
1089 /*
1090 * It's important to unset the flag AFTER the unpause()
1091 * call to make sure the VCPU's priority is not boosted
1092 * if it is woken up here.
1093 */
1094 CSCHED_STAT_CRANK(vcpu_unpark);
1095 vcpu_unpause(svc->vcpu);
1096 svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
1099 /* Upper bound on credits means VCPU stops earning */
1100 if ( credit > CSCHED_CREDITS_PER_TSLICE )
1102 __csched_vcpu_acct_stop_locked(prv, svc);
1103 credit = 0;
1104 atomic_set(&svc->credit, credit);
1108 CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
1109 CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
1110 credit_balance += credit;
1114 prv->credit_balance = credit_balance;
1116 spin_unlock_irqrestore(&prv->lock, flags);
1118 /* Inform each CPU that its runq needs to be sorted */
1119 prv->runq_sort++;
1121 out:
1122 set_timer( &prv->master_ticker, NOW() +
1123 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
1126 static void
1127 csched_tick(void *_cpu)
1129 unsigned int cpu = (unsigned long)_cpu;
1130 struct csched_pcpu *spc = CSCHED_PCPU(cpu);
1131 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
1133 spc->tick++;
1135 /*
1136 * Accounting for running VCPU
1137 */
1138 if ( !is_idle_vcpu(current) )
1139 csched_vcpu_acct(prv, cpu);
1141 /*
1142 * Check if runq needs to be sorted
1144 * Every physical CPU resorts the runq after the accounting master has
1145 * modified priorities. This is a special O(n) sort and runs at most
1146 * once per accounting period (currently 30 milliseconds).
1147 */
1148 csched_runq_sort(prv, cpu);
1150 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
1153 static struct csched_vcpu *
1154 csched_runq_steal(int peer_cpu, int cpu, int pri)
1156 const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
1157 const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
1158 struct csched_vcpu *speer;
1159 struct list_head *iter;
1160 struct vcpu *vc;
1162 /*
1163 * Don't steal from an idle CPU's runq because it's about to
1164 * pick up work from it itself.
1165 */
1166 if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
1168 list_for_each( iter, &peer_pcpu->runq )
1170 speer = __runq_elem(iter);
1172 /*
1173 * If next available VCPU here is not of strictly higher
1174 * priority than ours, this PCPU is useless to us.
1175 */
1176 if ( speer->pri <= pri )
1177 break;
1179 /* Is this VCPU is runnable on our PCPU? */
1180 vc = speer->vcpu;
1181 BUG_ON( is_idle_vcpu(vc) );
1183 if (__csched_vcpu_is_migrateable(vc, cpu))
1185 /* We got a candidate. Grab it! */
1186 CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
1187 CSCHED_STAT_CRANK(migrate_queued);
1188 WARN_ON(vc->is_urgent);
1189 __runq_remove(speer);
1190 vc->processor = cpu;
1191 return speer;
1196 CSCHED_STAT_CRANK(steal_peer_idle);
1197 return NULL;
1200 static struct csched_vcpu *
1201 csched_load_balance(struct csched_private *prv, int cpu,
1202 struct csched_vcpu *snext, bool_t *stolen)
1204 struct csched_vcpu *speer;
1205 cpumask_t workers;
1206 cpumask_t *online;
1207 int peer_cpu;
1209 BUG_ON( cpu != snext->vcpu->processor );
1210 online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
1212 /* If this CPU is going offline we shouldn't steal work. */
1213 if ( unlikely(!cpu_isset(cpu, *online)) )
1214 goto out;
1216 if ( snext->pri == CSCHED_PRI_IDLE )
1217 CSCHED_STAT_CRANK(load_balance_idle);
1218 else if ( snext->pri == CSCHED_PRI_TS_OVER )
1219 CSCHED_STAT_CRANK(load_balance_over);
1220 else
1221 CSCHED_STAT_CRANK(load_balance_other);
1223 /*
1224 * Peek at non-idling CPUs in the system, starting with our
1225 * immediate neighbour.
1226 */
1227 cpus_andnot(workers, *online, prv->idlers);
1228 cpu_clear(cpu, workers);
1229 peer_cpu = cpu;
1231 while ( !cpus_empty(workers) )
1233 peer_cpu = cycle_cpu(peer_cpu, workers);
1234 cpu_clear(peer_cpu, workers);
1236 /*
1237 * Get ahold of the scheduler lock for this peer CPU.
1239 * Note: We don't spin on this lock but simply try it. Spinning could
1240 * cause a deadlock if the peer CPU is also load balancing and trying
1241 * to lock this CPU.
1242 */
1243 if ( !spin_trylock(per_cpu(schedule_data, peer_cpu).schedule_lock) )
1245 CSCHED_STAT_CRANK(steal_trylock_failed);
1246 continue;
1249 /*
1250 * Any work over there to steal?
1251 */
1252 speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
1253 spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
1254 if ( speer != NULL )
1256 *stolen = 1;
1257 return speer;
1261 out:
1262 /* Failed to find more important work elsewhere... */
1263 __runq_remove(snext);
1264 return snext;
1267 /*
1268 * This function is in the critical path. It is designed to be simple and
1269 * fast for the common case.
1270 */
1271 static struct task_slice
1272 csched_schedule(
1273 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
1275 const int cpu = smp_processor_id();
1276 struct list_head * const runq = RUNQ(cpu);
1277 struct csched_vcpu * const scurr = CSCHED_VCPU(current);
1278 struct csched_private *prv = CSCHED_PRIV(ops);
1279 struct csched_vcpu *snext;
1280 struct task_slice ret;
1282 CSCHED_STAT_CRANK(schedule);
1283 CSCHED_VCPU_CHECK(current);
1285 if ( !is_idle_vcpu(scurr->vcpu) )
1287 /* Update credits of a non-idle VCPU. */
1288 burn_credits(scurr, now);
1289 scurr->start_time -= now;
1291 else
1293 /* Re-instate a boosted idle VCPU as normal-idle. */
1294 scurr->pri = CSCHED_PRI_IDLE;
1297 /*
1298 * Select next runnable local VCPU (ie top of local runq)
1299 */
1300 if ( vcpu_runnable(current) )
1301 __runq_insert(cpu, scurr);
1302 else
1303 BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
1305 snext = __runq_elem(runq->next);
1306 ret.migrated = 0;
1308 /* Tasklet work (which runs in idle VCPU context) overrides all else. */
1309 if ( tasklet_work_scheduled )
1311 snext = CSCHED_VCPU(idle_vcpu[cpu]);
1312 snext->pri = CSCHED_PRI_TS_BOOST;
1315 /*
1316 * Clear YIELD flag before scheduling out
1317 */
1318 if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
1319 scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
1321 /*
1322 * SMP Load balance:
1324 * If the next highest priority local runnable VCPU has already eaten
1325 * through its credits, look on other PCPUs to see if we have more
1326 * urgent work... If not, csched_load_balance() will return snext, but
1327 * already removed from the runq.
1328 */
1329 if ( snext->pri > CSCHED_PRI_TS_OVER )
1330 __runq_remove(snext);
1331 else
1332 snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
1334 /*
1335 * Update idlers mask if necessary. When we're idling, other CPUs
1336 * will tickle us when they get extra work.
1337 */
1338 if ( snext->pri == CSCHED_PRI_IDLE )
1340 if ( !cpu_isset(cpu, prv->idlers) )
1341 cpu_set(cpu, prv->idlers);
1343 else if ( cpu_isset(cpu, prv->idlers) )
1345 cpu_clear(cpu, prv->idlers);
1348 if ( !is_idle_vcpu(snext->vcpu) )
1349 snext->start_time += now;
1351 /*
1352 * Return task to run next...
1353 */
1354 ret.time = (is_idle_vcpu(snext->vcpu) ?
1355 -1 : MILLISECS(CSCHED_MSECS_PER_TSLICE));
1356 ret.task = snext->vcpu;
1358 CSCHED_VCPU_CHECK(ret.task);
1359 return ret;
1362 static void
1363 csched_dump_vcpu(struct csched_vcpu *svc)
1365 struct csched_dom * const sdom = svc->sdom;
1367 printk("[%i.%i] pri=%i flags=%x cpu=%i",
1368 svc->vcpu->domain->domain_id,
1369 svc->vcpu->vcpu_id,
1370 svc->pri,
1371 svc->flags,
1372 svc->vcpu->processor);
1374 if ( sdom )
1376 printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
1377 #ifdef CSCHED_STATS
1378 printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
1379 svc->stats.credit_last,
1380 svc->stats.credit_incr,
1381 svc->stats.state_active,
1382 svc->stats.state_idle,
1383 svc->stats.migrate_q,
1384 svc->stats.migrate_r);
1385 #endif
1388 printk("\n");
1391 static void
1392 csched_dump_pcpu(const struct scheduler *ops, int cpu)
1394 struct list_head *runq, *iter;
1395 struct csched_pcpu *spc;
1396 struct csched_vcpu *svc;
1397 int loop;
1398 #define cpustr keyhandler_scratch
1400 spc = CSCHED_PCPU(cpu);
1401 runq = &spc->runq;
1403 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
1404 printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
1405 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
1406 printk("core=%s\n", cpustr);
1408 /* current VCPU */
1409 svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
1410 if ( svc )
1412 printk("\trun: ");
1413 csched_dump_vcpu(svc);
1416 loop = 0;
1417 list_for_each( iter, runq )
1419 svc = __runq_elem(iter);
1420 if ( svc )
1422 printk("\t%3d: ", ++loop);
1423 csched_dump_vcpu(svc);
1426 #undef cpustr
1429 static void
1430 csched_dump(const struct scheduler *ops)
1432 struct list_head *iter_sdom, *iter_svc;
1433 struct csched_private *prv = CSCHED_PRIV(ops);
1434 int loop;
1435 #define idlers_buf keyhandler_scratch
1437 printk("info:\n"
1438 "\tncpus = %u\n"
1439 "\tmaster = %u\n"
1440 "\tcredit = %u\n"
1441 "\tcredit balance = %d\n"
1442 "\tweight = %u\n"
1443 "\trunq_sort = %u\n"
1444 "\tdefault-weight = %d\n"
1445 "\tmsecs per tick = %dms\n"
1446 "\tcredits per msec = %d\n"
1447 "\tticks per tslice = %d\n"
1448 "\tticks per acct = %d\n"
1449 "\tmigration delay = %uus\n",
1450 prv->ncpus,
1451 prv->master,
1452 prv->credit,
1453 prv->credit_balance,
1454 prv->weight,
1455 prv->runq_sort,
1456 CSCHED_DEFAULT_WEIGHT,
1457 CSCHED_MSECS_PER_TICK,
1458 CSCHED_CREDITS_PER_MSEC,
1459 CSCHED_TICKS_PER_TSLICE,
1460 CSCHED_TICKS_PER_ACCT,
1461 vcpu_migration_delay);
1463 cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
1464 printk("idlers: %s\n", idlers_buf);
1466 printk("active vcpus:\n");
1467 loop = 0;
1468 list_for_each( iter_sdom, &prv->active_sdom )
1470 struct csched_dom *sdom;
1471 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
1473 list_for_each( iter_svc, &sdom->active_vcpu )
1475 struct csched_vcpu *svc;
1476 svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
1478 printk("\t%3d: ", ++loop);
1479 csched_dump_vcpu(svc);
1482 #undef idlers_buf
1485 static int
1486 csched_init(struct scheduler *ops)
1488 struct csched_private *prv;
1490 prv = xmalloc(struct csched_private);
1491 if ( prv == NULL )
1492 return -ENOMEM;
1494 memset(prv, 0, sizeof(*prv));
1495 ops->sched_data = prv;
1496 spin_lock_init(&prv->lock);
1497 INIT_LIST_HEAD(&prv->active_sdom);
1498 prv->master = UINT_MAX;
1500 return 0;
1503 static void
1504 csched_deinit(const struct scheduler *ops)
1506 struct csched_private *prv;
1508 prv = CSCHED_PRIV(ops);
1509 if ( prv != NULL )
1510 xfree(prv);
1513 static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu)
1515 struct csched_pcpu *spc;
1517 spc = CSCHED_PCPU(cpu);
1519 stop_timer(&spc->ticker);
1522 static void csched_tick_resume(const struct scheduler *ops, unsigned int cpu)
1524 struct csched_pcpu *spc;
1525 uint64_t now = NOW();
1527 spc = CSCHED_PCPU(cpu);
1529 set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
1530 - now % MILLISECS(CSCHED_MSECS_PER_TICK) );
1533 static struct csched_private _csched_priv;
1535 const struct scheduler sched_credit_def = {
1536 .name = "SMP Credit Scheduler",
1537 .opt_name = "credit",
1538 .sched_id = XEN_SCHEDULER_CREDIT,
1539 .sched_data = &_csched_priv,
1541 .init_domain = csched_dom_init,
1542 .destroy_domain = csched_dom_destroy,
1544 .insert_vcpu = csched_vcpu_insert,
1545 .destroy_vcpu = csched_vcpu_destroy,
1547 .sleep = csched_vcpu_sleep,
1548 .wake = csched_vcpu_wake,
1549 .yield = csched_vcpu_yield,
1551 .adjust = csched_dom_cntl,
1553 .pick_cpu = csched_cpu_pick,
1554 .do_schedule = csched_schedule,
1556 .dump_cpu_state = csched_dump_pcpu,
1557 .dump_settings = csched_dump,
1558 .init = csched_init,
1559 .deinit = csched_deinit,
1560 .alloc_vdata = csched_alloc_vdata,
1561 .free_vdata = csched_free_vdata,
1562 .alloc_pdata = csched_alloc_pdata,
1563 .free_pdata = csched_free_pdata,
1564 .alloc_domdata = csched_alloc_domdata,
1565 .free_domdata = csched_free_domdata,
1567 .tick_suspend = csched_tick_suspend,
1568 .tick_resume = csched_tick_resume,
1569 };