debuggers.hg

view xen/common/sched_credit.c @ 22676:e8acb9753ff1

Use bool_t for various boolean variables

... decreasing cache footprint. As a prerequisite this requires making
cmdline_parse() a little more flexible.

Also remove a few variables altogether, and adjust sections
annotations for several others.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xen.org>
author Keir Fraser <keir@xen.org>
date Fri Dec 24 10:10:45 2010 +0000 (2010-12-24)
parents 05377a796952
children 700ac6445812
line source
1 /****************************************************************************
2 * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
3 ****************************************************************************
4 *
5 * File: common/csched_credit.c
6 * Author: Emmanuel Ackaouy
7 *
8 * Description: Credit-based SMP CPU scheduler
9 */
11 #include <xen/config.h>
12 #include <xen/init.h>
13 #include <xen/lib.h>
14 #include <xen/sched.h>
15 #include <xen/domain.h>
16 #include <xen/delay.h>
17 #include <xen/event.h>
18 #include <xen/time.h>
19 #include <xen/perfc.h>
20 #include <xen/sched-if.h>
21 #include <xen/softirq.h>
22 #include <asm/atomic.h>
23 #include <xen/errno.h>
24 #include <xen/keyhandler.h>
26 /*
27 * CSCHED_STATS
28 *
29 * Manage very basic per-vCPU counters and stats.
30 *
31 * Useful for debugging live systems. The stats are displayed
32 * with runq dumps ('r' on the Xen console).
33 */
34 #ifdef PERF_COUNTERS
35 #define CSCHED_STATS
36 #endif
39 /*
40 * Basic constants
41 */
42 #define CSCHED_DEFAULT_WEIGHT 256
43 #define CSCHED_TICKS_PER_TSLICE 3
44 #define CSCHED_TICKS_PER_ACCT 3
45 #define CSCHED_MSECS_PER_TICK 10
46 #define CSCHED_MSECS_PER_TSLICE \
47 (CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
48 #define CSCHED_CREDITS_PER_MSEC 10
49 #define CSCHED_CREDITS_PER_TSLICE \
50 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TSLICE)
51 #define CSCHED_CREDITS_PER_ACCT \
52 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_ACCT)
55 /*
56 * Priorities
57 */
58 #define CSCHED_PRI_TS_BOOST 0 /* time-share waking up */
59 #define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
60 #define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
61 #define CSCHED_PRI_IDLE -64 /* idle */
64 /*
65 * Flags
66 */
67 #define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
68 #define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
71 /*
72 * Useful macros
73 */
74 #define CSCHED_PRIV(_ops) \
75 ((struct csched_private *)((_ops)->sched_data))
76 #define CSCHED_PCPU(_c) \
77 ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
78 #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
79 #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
80 #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
81 #define CSCHED_CPUONLINE(_pool) \
82 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
85 /*
86 * Stats
87 */
88 #define CSCHED_STAT_CRANK(_X) (perfc_incr(_X))
90 #ifdef CSCHED_STATS
92 #define CSCHED_VCPU_STATS_RESET(_V) \
93 do \
94 { \
95 memset(&(_V)->stats, 0, sizeof((_V)->stats)); \
96 } while ( 0 )
98 #define CSCHED_VCPU_STAT_CRANK(_V, _X) (((_V)->stats._X)++)
100 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) (((_V)->stats._X) = (_Y))
102 #else /* CSCHED_STATS */
104 #define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 )
105 #define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 )
106 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 )
108 #endif /* CSCHED_STATS */
111 /*
112 * Boot parameters
113 */
114 static bool_t __read_mostly sched_credit_default_yield;
115 boolean_param("sched_credit_default_yield", sched_credit_default_yield);
117 /*
118 * Physical CPU
119 */
120 struct csched_pcpu {
121 struct list_head runq;
122 uint32_t runq_sort_last;
123 struct timer ticker;
124 unsigned int tick;
125 unsigned int idle_bias;
126 };
128 /*
129 * Virtual CPU
130 */
131 struct csched_vcpu {
132 struct list_head runq_elem;
133 struct list_head active_vcpu_elem;
134 struct csched_dom *sdom;
135 struct vcpu *vcpu;
136 atomic_t credit;
137 s_time_t start_time; /* When we were scheduled (used for credit) */
138 uint16_t flags;
139 int16_t pri;
140 #ifdef CSCHED_STATS
141 struct {
142 int credit_last;
143 uint32_t credit_incr;
144 uint32_t state_active;
145 uint32_t state_idle;
146 uint32_t migrate_q;
147 uint32_t migrate_r;
148 } stats;
149 #endif
150 };
152 /*
153 * Domain
154 */
155 struct csched_dom {
156 struct list_head active_vcpu;
157 struct list_head active_sdom_elem;
158 struct domain *dom;
159 uint16_t active_vcpu_count;
160 uint16_t weight;
161 uint16_t cap;
162 };
164 /*
165 * System-wide private data
166 */
167 struct csched_private {
168 spinlock_t lock;
169 struct list_head active_sdom;
170 uint32_t ncpus;
171 struct timer master_ticker;
172 unsigned int master;
173 cpumask_t idlers;
174 cpumask_t cpus;
175 uint32_t weight;
176 uint32_t credit;
177 int credit_balance;
178 uint32_t runq_sort;
179 };
181 static void csched_tick(void *_cpu);
182 static void csched_acct(void *dummy);
184 static inline int
185 __vcpu_on_runq(struct csched_vcpu *svc)
186 {
187 return !list_empty(&svc->runq_elem);
188 }
190 static inline struct csched_vcpu *
191 __runq_elem(struct list_head *elem)
192 {
193 return list_entry(elem, struct csched_vcpu, runq_elem);
194 }
196 static inline void
197 __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
198 {
199 const struct list_head * const runq = RUNQ(cpu);
200 struct list_head *iter;
202 BUG_ON( __vcpu_on_runq(svc) );
203 BUG_ON( cpu != svc->vcpu->processor );
205 list_for_each( iter, runq )
206 {
207 const struct csched_vcpu * const iter_svc = __runq_elem(iter);
208 if ( svc->pri > iter_svc->pri )
209 break;
210 }
212 /* If the vcpu yielded, try to put it behind one lower-priority
213 * runnable vcpu if we can. The next runq_sort will bring it forward
214 * within 30ms if the queue too long. */
215 if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
216 && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
217 {
218 iter=iter->next;
220 /* Some sanity checks */
221 BUG_ON(iter == runq);
222 }
224 list_add_tail(&svc->runq_elem, iter);
225 }
227 static inline void
228 __runq_remove(struct csched_vcpu *svc)
229 {
230 BUG_ON( !__vcpu_on_runq(svc) );
231 list_del_init(&svc->runq_elem);
232 }
234 static void burn_credits(struct csched_vcpu *svc, s_time_t now)
235 {
236 s_time_t delta;
237 unsigned int credits;
239 /* Assert svc is current */
240 ASSERT(svc==CSCHED_VCPU(per_cpu(schedule_data, svc->vcpu->processor).curr));
242 if ( (delta = now - svc->start_time) <= 0 )
243 return;
245 credits = (delta*CSCHED_CREDITS_PER_MSEC + MILLISECS(1)/2) / MILLISECS(1);
246 atomic_sub(credits, &svc->credit);
247 svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
248 }
250 static bool_t __read_mostly opt_tickle_one_idle = 1;
251 boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
253 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
255 static inline void
256 __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
257 {
258 struct csched_vcpu * const cur =
259 CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
260 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
261 cpumask_t mask;
263 ASSERT(cur);
264 cpus_clear(mask);
266 /* If strictly higher priority than current VCPU, signal the CPU */
267 if ( new->pri > cur->pri )
268 {
269 if ( cur->pri == CSCHED_PRI_IDLE )
270 CSCHED_STAT_CRANK(tickle_local_idler);
271 else if ( cur->pri == CSCHED_PRI_TS_OVER )
272 CSCHED_STAT_CRANK(tickle_local_over);
273 else if ( cur->pri == CSCHED_PRI_TS_UNDER )
274 CSCHED_STAT_CRANK(tickle_local_under);
275 else
276 CSCHED_STAT_CRANK(tickle_local_other);
278 cpu_set(cpu, mask);
279 }
281 /*
282 * If this CPU has at least two runnable VCPUs, we tickle any idlers to
283 * let them know there is runnable work in the system...
284 */
285 if ( cur->pri > CSCHED_PRI_IDLE )
286 {
287 if ( cpus_empty(prv->idlers) )
288 {
289 CSCHED_STAT_CRANK(tickle_idlers_none);
290 }
291 else
292 {
293 cpumask_t idle_mask;
295 cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
296 if ( !cpus_empty(idle_mask) )
297 {
298 CSCHED_STAT_CRANK(tickle_idlers_some);
299 if ( opt_tickle_one_idle )
300 {
301 this_cpu(last_tickle_cpu) =
302 cycle_cpu(this_cpu(last_tickle_cpu), idle_mask);
303 cpu_set(this_cpu(last_tickle_cpu), mask);
304 }
305 else
306 cpus_or(mask, mask, idle_mask);
307 }
308 cpus_and(mask, mask, new->vcpu->cpu_affinity);
309 }
310 }
312 /* Send scheduler interrupts to designated CPUs */
313 if ( !cpus_empty(mask) )
314 cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
315 }
317 static void
318 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
319 {
320 struct csched_private *prv = CSCHED_PRIV(ops);
321 struct csched_pcpu *spc = pcpu;
322 unsigned long flags;
324 if ( spc == NULL )
325 return;
327 spin_lock_irqsave(&prv->lock, flags);
329 prv->credit -= CSCHED_CREDITS_PER_ACCT;
330 prv->ncpus--;
331 cpu_clear(cpu, prv->idlers);
332 cpu_clear(cpu, prv->cpus);
333 if ( (prv->master == cpu) && (prv->ncpus > 0) )
334 {
335 prv->master = first_cpu(prv->cpus);
336 migrate_timer(&prv->master_ticker, prv->master);
337 }
338 kill_timer(&spc->ticker);
339 if ( prv->ncpus == 0 )
340 kill_timer(&prv->master_ticker);
342 spin_unlock_irqrestore(&prv->lock, flags);
344 xfree(spc);
345 }
347 static void *
348 csched_alloc_pdata(const struct scheduler *ops, int cpu)
349 {
350 struct csched_pcpu *spc;
351 struct csched_private *prv = CSCHED_PRIV(ops);
352 unsigned long flags;
354 /* Allocate per-PCPU info */
355 spc = xmalloc(struct csched_pcpu);
356 if ( spc == NULL )
357 return NULL;
358 memset(spc, 0, sizeof(*spc));
360 spin_lock_irqsave(&prv->lock, flags);
362 /* Initialize/update system-wide config */
363 prv->credit += CSCHED_CREDITS_PER_ACCT;
364 prv->ncpus++;
365 cpu_set(cpu, prv->cpus);
366 if ( prv->ncpus == 1 )
367 {
368 prv->master = cpu;
369 init_timer(&prv->master_ticker, csched_acct, prv, cpu);
370 set_timer(&prv->master_ticker, NOW() +
371 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
372 }
374 init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
375 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
377 INIT_LIST_HEAD(&spc->runq);
378 spc->runq_sort_last = prv->runq_sort;
379 spc->idle_bias = NR_CPUS - 1;
380 if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
381 per_cpu(schedule_data, cpu).sched_priv = spc;
383 /* Start off idling... */
384 BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
385 cpu_set(cpu, prv->idlers);
387 spin_unlock_irqrestore(&prv->lock, flags);
389 return spc;
390 }
392 #ifndef NDEBUG
393 static inline void
394 __csched_vcpu_check(struct vcpu *vc)
395 {
396 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
397 struct csched_dom * const sdom = svc->sdom;
399 BUG_ON( svc->vcpu != vc );
400 BUG_ON( sdom != CSCHED_DOM(vc->domain) );
401 if ( sdom )
402 {
403 BUG_ON( is_idle_vcpu(vc) );
404 BUG_ON( sdom->dom != vc->domain );
405 }
406 else
407 {
408 BUG_ON( !is_idle_vcpu(vc) );
409 }
411 CSCHED_STAT_CRANK(vcpu_check);
412 }
413 #define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
414 #else
415 #define CSCHED_VCPU_CHECK(_vc)
416 #endif
418 /*
419 * Delay, in microseconds, between migrations of a VCPU between PCPUs.
420 * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
421 * implicit overheads such as cache-warming. 1ms (1000) has been measured
422 * as a good value.
423 */
424 static unsigned int vcpu_migration_delay;
425 integer_param("vcpu_migration_delay", vcpu_migration_delay);
427 void set_vcpu_migration_delay(unsigned int delay)
428 {
429 vcpu_migration_delay = delay;
430 }
432 unsigned int get_vcpu_migration_delay(void)
433 {
434 return vcpu_migration_delay;
435 }
437 static inline int
438 __csched_vcpu_is_cache_hot(struct vcpu *v)
439 {
440 int hot = ((NOW() - v->last_run_time) <
441 ((uint64_t)vcpu_migration_delay * 1000u));
443 if ( hot )
444 CSCHED_STAT_CRANK(vcpu_hot);
446 return hot;
447 }
449 static inline int
450 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
451 {
452 /*
453 * Don't pick up work that's in the peer's scheduling tail or hot on
454 * peer PCPU. Only pick up work that's allowed to run on our CPU.
455 */
456 return !vc->is_running &&
457 !__csched_vcpu_is_cache_hot(vc) &&
458 cpu_isset(dest_cpu, vc->cpu_affinity);
459 }
461 static int
462 _csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc, bool_t commit)
463 {
464 cpumask_t cpus;
465 cpumask_t idlers;
466 cpumask_t *online;
467 int cpu;
469 /*
470 * Pick from online CPUs in VCPU's affinity mask, giving a
471 * preference to its current processor if it's in there.
472 */
473 online = CSCHED_CPUONLINE(vc->domain->cpupool);
474 cpus_and(cpus, *online, vc->cpu_affinity);
475 cpu = cpu_isset(vc->processor, cpus)
476 ? vc->processor
477 : cycle_cpu(vc->processor, cpus);
478 ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
480 /*
481 * Try to find an idle processor within the above constraints.
482 *
483 * In multi-core and multi-threaded CPUs, not all idle execution
484 * vehicles are equal!
485 *
486 * We give preference to the idle execution vehicle with the most
487 * idling neighbours in its grouping. This distributes work across
488 * distinct cores first and guarantees we don't do something stupid
489 * like run two VCPUs on co-hyperthreads while there are idle cores
490 * or sockets.
491 */
492 cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
493 cpu_set(cpu, idlers);
494 cpus_and(cpus, cpus, idlers);
495 cpu_clear(cpu, cpus);
497 while ( !cpus_empty(cpus) )
498 {
499 cpumask_t cpu_idlers;
500 cpumask_t nxt_idlers;
501 int nxt, weight_cpu, weight_nxt;
502 int migrate_factor;
504 nxt = cycle_cpu(cpu, cpus);
506 if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
507 {
508 /* We're on the same socket, so check the busy-ness of threads.
509 * Migrate if # of idlers is less at all */
510 ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
511 migrate_factor = 1;
512 cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
513 cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
514 }
515 else
516 {
517 /* We're on different sockets, so check the busy-ness of cores.
518 * Migrate only if the other core is twice as idle */
519 ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
520 migrate_factor = 2;
521 cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
522 cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
523 }
525 weight_cpu = cpus_weight(cpu_idlers);
526 weight_nxt = cpus_weight(nxt_idlers);
527 /* smt_power_savings: consolidate work rather than spreading it */
528 if ( ( sched_smt_power_savings
529 && (weight_cpu > weight_nxt) )
530 || ( !sched_smt_power_savings
531 && (weight_cpu * migrate_factor < weight_nxt) ) )
532 {
533 cpu = cycle_cpu(CSCHED_PCPU(nxt)->idle_bias, nxt_idlers);
534 if ( commit )
535 CSCHED_PCPU(nxt)->idle_bias = cpu;
536 cpus_andnot(cpus, cpus, per_cpu(cpu_sibling_map, cpu));
537 }
538 else
539 {
540 cpus_andnot(cpus, cpus, nxt_idlers);
541 }
542 }
544 return cpu;
545 }
547 static int
548 csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
549 {
550 return _csched_cpu_pick(ops, vc, 1);
551 }
553 static inline void
554 __csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
555 {
556 struct csched_dom * const sdom = svc->sdom;
557 unsigned long flags;
559 spin_lock_irqsave(&prv->lock, flags);
561 if ( list_empty(&svc->active_vcpu_elem) )
562 {
563 CSCHED_VCPU_STAT_CRANK(svc, state_active);
564 CSCHED_STAT_CRANK(acct_vcpu_active);
566 sdom->active_vcpu_count++;
567 list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
568 /* Make weight per-vcpu */
569 prv->weight += sdom->weight;
570 if ( list_empty(&sdom->active_sdom_elem) )
571 {
572 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
573 }
574 }
576 spin_unlock_irqrestore(&prv->lock, flags);
577 }
579 static inline void
580 __csched_vcpu_acct_stop_locked(struct csched_private *prv,
581 struct csched_vcpu *svc)
582 {
583 struct csched_dom * const sdom = svc->sdom;
585 BUG_ON( list_empty(&svc->active_vcpu_elem) );
587 CSCHED_VCPU_STAT_CRANK(svc, state_idle);
588 CSCHED_STAT_CRANK(acct_vcpu_idle);
590 BUG_ON( prv->weight < sdom->weight );
591 sdom->active_vcpu_count--;
592 list_del_init(&svc->active_vcpu_elem);
593 prv->weight -= sdom->weight;
594 if ( list_empty(&sdom->active_vcpu) )
595 {
596 list_del_init(&sdom->active_sdom_elem);
597 }
598 }
600 static void
601 csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
602 {
603 struct csched_vcpu * const svc = CSCHED_VCPU(current);
604 const struct scheduler *ops = per_cpu(scheduler, cpu);
606 ASSERT( current->processor == cpu );
607 ASSERT( svc->sdom != NULL );
609 /*
610 * If this VCPU's priority was boosted when it last awoke, reset it.
611 * If the VCPU is found here, then it's consuming a non-negligeable
612 * amount of CPU resources and should no longer be boosted.
613 */
614 if ( svc->pri == CSCHED_PRI_TS_BOOST )
615 svc->pri = CSCHED_PRI_TS_UNDER;
617 /*
618 * Update credits
619 */
620 if ( !is_idle_vcpu(svc->vcpu) )
621 burn_credits(svc, NOW());
623 /*
624 * Put this VCPU and domain back on the active list if it was
625 * idling.
626 *
627 * If it's been active a while, check if we'd be better off
628 * migrating it to run elsewhere (see multi-core and multi-thread
629 * support in csched_cpu_pick()).
630 */
631 if ( list_empty(&svc->active_vcpu_elem) )
632 {
633 __csched_vcpu_acct_start(prv, svc);
634 }
635 else if ( _csched_cpu_pick(ops, current, 0) != cpu )
636 {
637 CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
638 CSCHED_STAT_CRANK(migrate_running);
639 set_bit(_VPF_migrating, &current->pause_flags);
640 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
641 }
642 }
644 static void *
645 csched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
646 {
647 struct csched_vcpu *svc;
649 /* Allocate per-VCPU info */
650 svc = xmalloc(struct csched_vcpu);
651 if ( svc == NULL )
652 return NULL;
653 memset(svc, 0, sizeof(*svc));
655 INIT_LIST_HEAD(&svc->runq_elem);
656 INIT_LIST_HEAD(&svc->active_vcpu_elem);
657 svc->sdom = dd;
658 svc->vcpu = vc;
659 atomic_set(&svc->credit, 0);
660 svc->flags = 0U;
661 svc->pri = is_idle_domain(vc->domain) ?
662 CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
663 CSCHED_VCPU_STATS_RESET(svc);
664 CSCHED_STAT_CRANK(vcpu_init);
665 return svc;
666 }
668 static void
669 csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
670 {
671 struct csched_vcpu *svc = vc->sched_priv;
673 if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
674 __runq_insert(vc->processor, svc);
675 }
677 static void
678 csched_free_vdata(const struct scheduler *ops, void *priv)
679 {
680 struct csched_vcpu *svc = priv;
682 BUG_ON( !list_empty(&svc->runq_elem) );
684 xfree(svc);
685 }
687 static void
688 csched_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
689 {
690 struct csched_private *prv = CSCHED_PRIV(ops);
691 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
692 struct csched_dom * const sdom = svc->sdom;
693 unsigned long flags;
695 CSCHED_STAT_CRANK(vcpu_destroy);
697 if ( __vcpu_on_runq(svc) )
698 __runq_remove(svc);
700 spin_lock_irqsave(&(prv->lock), flags);
702 if ( !list_empty(&svc->active_vcpu_elem) )
703 __csched_vcpu_acct_stop_locked(prv, svc);
705 spin_unlock_irqrestore(&(prv->lock), flags);
707 BUG_ON( sdom == NULL );
708 BUG_ON( !list_empty(&svc->runq_elem) );
709 }
711 static void
712 csched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
713 {
714 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
716 CSCHED_STAT_CRANK(vcpu_sleep);
718 BUG_ON( is_idle_vcpu(vc) );
720 if ( per_cpu(schedule_data, vc->processor).curr == vc )
721 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
722 else if ( __vcpu_on_runq(svc) )
723 __runq_remove(svc);
724 }
726 static void
727 csched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
728 {
729 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
730 const unsigned int cpu = vc->processor;
732 BUG_ON( is_idle_vcpu(vc) );
734 if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) )
735 {
736 CSCHED_STAT_CRANK(vcpu_wake_running);
737 return;
738 }
739 if ( unlikely(__vcpu_on_runq(svc)) )
740 {
741 CSCHED_STAT_CRANK(vcpu_wake_onrunq);
742 return;
743 }
745 if ( likely(vcpu_runnable(vc)) )
746 CSCHED_STAT_CRANK(vcpu_wake_runnable);
747 else
748 CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
750 /*
751 * We temporarly boost the priority of awaking VCPUs!
752 *
753 * If this VCPU consumes a non negligeable amount of CPU, it
754 * will eventually find itself in the credit accounting code
755 * path where its priority will be reset to normal.
756 *
757 * If on the other hand the VCPU consumes little CPU and is
758 * blocking and awoken a lot (doing I/O for example), its
759 * priority will remain boosted, optimizing it's wake-to-run
760 * latencies.
761 *
762 * This allows wake-to-run latency sensitive VCPUs to preempt
763 * more CPU resource intensive VCPUs without impacting overall
764 * system fairness.
765 *
766 * The one exception is for VCPUs of capped domains unpausing
767 * after earning credits they had overspent. We don't boost
768 * those.
769 */
770 if ( svc->pri == CSCHED_PRI_TS_UNDER &&
771 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
772 {
773 svc->pri = CSCHED_PRI_TS_BOOST;
774 }
776 /* Put the VCPU on the runq and tickle CPUs */
777 __runq_insert(cpu, svc);
778 __runq_tickle(cpu, svc);
779 }
781 static void
782 csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
783 {
784 struct csched_vcpu * const sv = CSCHED_VCPU(vc);
786 if ( !sched_credit_default_yield )
787 {
788 /* Let the scheduler know that this vcpu is trying to yield */
789 sv->flags |= CSCHED_FLAG_VCPU_YIELD;
790 }
791 }
793 static int
794 csched_dom_cntl(
795 const struct scheduler *ops,
796 struct domain *d,
797 struct xen_domctl_scheduler_op *op)
798 {
799 struct csched_dom * const sdom = CSCHED_DOM(d);
800 struct csched_private *prv = CSCHED_PRIV(ops);
801 unsigned long flags;
803 if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
804 {
805 op->u.credit.weight = sdom->weight;
806 op->u.credit.cap = sdom->cap;
807 }
808 else
809 {
810 ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
812 spin_lock_irqsave(&prv->lock, flags);
814 if ( op->u.credit.weight != 0 )
815 {
816 if ( !list_empty(&sdom->active_sdom_elem) )
817 {
818 prv->weight -= sdom->weight * sdom->active_vcpu_count;
819 prv->weight += op->u.credit.weight * sdom->active_vcpu_count;
820 }
821 sdom->weight = op->u.credit.weight;
822 }
824 if ( op->u.credit.cap != (uint16_t)~0U )
825 sdom->cap = op->u.credit.cap;
827 spin_unlock_irqrestore(&prv->lock, flags);
828 }
830 return 0;
831 }
833 static void *
834 csched_alloc_domdata(const struct scheduler *ops, struct domain *dom)
835 {
836 struct csched_dom *sdom;
838 sdom = xmalloc(struct csched_dom);
839 if ( sdom == NULL )
840 return NULL;
841 memset(sdom, 0, sizeof(*sdom));
843 /* Initialize credit and weight */
844 INIT_LIST_HEAD(&sdom->active_vcpu);
845 sdom->active_vcpu_count = 0;
846 INIT_LIST_HEAD(&sdom->active_sdom_elem);
847 sdom->dom = dom;
848 sdom->weight = CSCHED_DEFAULT_WEIGHT;
849 sdom->cap = 0U;
851 return (void *)sdom;
852 }
854 static int
855 csched_dom_init(const struct scheduler *ops, struct domain *dom)
856 {
857 struct csched_dom *sdom;
859 CSCHED_STAT_CRANK(dom_init);
861 if ( is_idle_domain(dom) )
862 return 0;
864 sdom = csched_alloc_domdata(ops, dom);
865 if ( sdom == NULL )
866 return -ENOMEM;
868 dom->sched_priv = sdom;
870 return 0;
871 }
873 static void
874 csched_free_domdata(const struct scheduler *ops, void *data)
875 {
876 xfree(data);
877 }
879 static void
880 csched_dom_destroy(const struct scheduler *ops, struct domain *dom)
881 {
882 CSCHED_STAT_CRANK(dom_destroy);
883 csched_free_domdata(ops, CSCHED_DOM(dom));
884 }
886 /*
887 * This is a O(n) optimized sort of the runq.
888 *
889 * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
890 * through the runq and move up any UNDERs that are preceded by OVERS. We
891 * remember the last UNDER to make the move up operation O(1).
892 */
893 static void
894 csched_runq_sort(struct csched_private *prv, unsigned int cpu)
895 {
896 struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
897 struct list_head *runq, *elem, *next, *last_under;
898 struct csched_vcpu *svc_elem;
899 unsigned long flags;
900 int sort_epoch;
902 sort_epoch = prv->runq_sort;
903 if ( sort_epoch == spc->runq_sort_last )
904 return;
906 spc->runq_sort_last = sort_epoch;
908 pcpu_schedule_lock_irqsave(cpu, flags);
910 runq = &spc->runq;
911 elem = runq->next;
912 last_under = runq;
914 while ( elem != runq )
915 {
916 next = elem->next;
917 svc_elem = __runq_elem(elem);
919 if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
920 {
921 /* does elem need to move up the runq? */
922 if ( elem->prev != last_under )
923 {
924 list_del(elem);
925 list_add(elem, last_under);
926 }
927 last_under = elem;
928 }
930 elem = next;
931 }
933 pcpu_schedule_unlock_irqrestore(cpu, flags);
934 }
936 static void
937 csched_acct(void* dummy)
938 {
939 struct csched_private *prv = dummy;
940 unsigned long flags;
941 struct list_head *iter_vcpu, *next_vcpu;
942 struct list_head *iter_sdom, *next_sdom;
943 struct csched_vcpu *svc;
944 struct csched_dom *sdom;
945 uint32_t credit_total;
946 uint32_t weight_total;
947 uint32_t weight_left;
948 uint32_t credit_fair;
949 uint32_t credit_peak;
950 uint32_t credit_cap;
951 int credit_balance;
952 int credit_xtra;
953 int credit;
956 spin_lock_irqsave(&prv->lock, flags);
958 weight_total = prv->weight;
959 credit_total = prv->credit;
961 /* Converge balance towards 0 when it drops negative */
962 if ( prv->credit_balance < 0 )
963 {
964 credit_total -= prv->credit_balance;
965 CSCHED_STAT_CRANK(acct_balance);
966 }
968 if ( unlikely(weight_total == 0) )
969 {
970 prv->credit_balance = 0;
971 spin_unlock_irqrestore(&prv->lock, flags);
972 CSCHED_STAT_CRANK(acct_no_work);
973 goto out;
974 }
976 CSCHED_STAT_CRANK(acct_run);
978 weight_left = weight_total;
979 credit_balance = 0;
980 credit_xtra = 0;
981 credit_cap = 0U;
983 list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
984 {
985 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
987 BUG_ON( is_idle_domain(sdom->dom) );
988 BUG_ON( sdom->active_vcpu_count == 0 );
989 BUG_ON( sdom->weight == 0 );
990 BUG_ON( (sdom->weight * sdom->active_vcpu_count) > weight_left );
992 weight_left -= ( sdom->weight * sdom->active_vcpu_count );
994 /*
995 * A domain's fair share is computed using its weight in competition
996 * with that of all other active domains.
997 *
998 * At most, a domain can use credits to run all its active VCPUs
999 * for one full accounting period. We allow a domain to earn more
1000 * only when the system-wide credit balance is negative.
1001 */
1002 credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
1003 if ( prv->credit_balance < 0 )
1005 credit_peak += ( ( -prv->credit_balance
1006 * sdom->weight
1007 * sdom->active_vcpu_count) +
1008 (weight_total - 1)
1009 ) / weight_total;
1012 if ( sdom->cap != 0U )
1014 credit_cap = ((sdom->cap * CSCHED_CREDITS_PER_ACCT) + 99) / 100;
1015 if ( credit_cap < credit_peak )
1016 credit_peak = credit_cap;
1018 /* FIXME -- set cap per-vcpu as well...? */
1019 credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 )
1020 ) / sdom->active_vcpu_count;
1023 credit_fair = ( ( credit_total
1024 * sdom->weight
1025 * sdom->active_vcpu_count )
1026 + (weight_total - 1)
1027 ) / weight_total;
1029 if ( credit_fair < credit_peak )
1031 credit_xtra = 1;
1033 else
1035 if ( weight_left != 0U )
1037 /* Give other domains a chance at unused credits */
1038 credit_total += ( ( ( credit_fair - credit_peak
1039 ) * weight_total
1040 ) + ( weight_left - 1 )
1041 ) / weight_left;
1044 if ( credit_xtra )
1046 /*
1047 * Lazily keep domains with extra credits at the head of
1048 * the queue to give others a chance at them in future
1049 * accounting periods.
1050 */
1051 CSCHED_STAT_CRANK(acct_reorder);
1052 list_del(&sdom->active_sdom_elem);
1053 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
1056 credit_fair = credit_peak;
1059 /* Compute fair share per VCPU */
1060 credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
1061 ) / sdom->active_vcpu_count;
1064 list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
1066 svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
1067 BUG_ON( sdom != svc->sdom );
1069 /* Increment credit */
1070 atomic_add(credit_fair, &svc->credit);
1071 credit = atomic_read(&svc->credit);
1073 /*
1074 * Recompute priority or, if VCPU is idling, remove it from
1075 * the active list.
1076 */
1077 if ( credit < 0 )
1079 svc->pri = CSCHED_PRI_TS_OVER;
1081 /* Park running VCPUs of capped-out domains */
1082 if ( sdom->cap != 0U &&
1083 credit < -credit_cap &&
1084 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
1086 CSCHED_STAT_CRANK(vcpu_park);
1087 vcpu_pause_nosync(svc->vcpu);
1088 svc->flags |= CSCHED_FLAG_VCPU_PARKED;
1091 /* Lower bound on credits */
1092 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
1094 CSCHED_STAT_CRANK(acct_min_credit);
1095 credit = -CSCHED_CREDITS_PER_TSLICE;
1096 atomic_set(&svc->credit, credit);
1099 else
1101 svc->pri = CSCHED_PRI_TS_UNDER;
1103 /* Unpark any capped domains whose credits go positive */
1104 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
1106 /*
1107 * It's important to unset the flag AFTER the unpause()
1108 * call to make sure the VCPU's priority is not boosted
1109 * if it is woken up here.
1110 */
1111 CSCHED_STAT_CRANK(vcpu_unpark);
1112 vcpu_unpause(svc->vcpu);
1113 svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
1116 /* Upper bound on credits means VCPU stops earning */
1117 if ( credit > CSCHED_CREDITS_PER_TSLICE )
1119 __csched_vcpu_acct_stop_locked(prv, svc);
1120 /* Divide credits in half, so that when it starts
1121 * accounting again, it starts a little bit "ahead" */
1122 credit /= 2;
1123 atomic_set(&svc->credit, credit);
1127 CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
1128 CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
1129 credit_balance += credit;
1133 prv->credit_balance = credit_balance;
1135 spin_unlock_irqrestore(&prv->lock, flags);
1137 /* Inform each CPU that its runq needs to be sorted */
1138 prv->runq_sort++;
1140 out:
1141 set_timer( &prv->master_ticker, NOW() +
1142 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
1145 static void
1146 csched_tick(void *_cpu)
1148 unsigned int cpu = (unsigned long)_cpu;
1149 struct csched_pcpu *spc = CSCHED_PCPU(cpu);
1150 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
1152 spc->tick++;
1154 /*
1155 * Accounting for running VCPU
1156 */
1157 if ( !is_idle_vcpu(current) )
1158 csched_vcpu_acct(prv, cpu);
1160 /*
1161 * Check if runq needs to be sorted
1163 * Every physical CPU resorts the runq after the accounting master has
1164 * modified priorities. This is a special O(n) sort and runs at most
1165 * once per accounting period (currently 30 milliseconds).
1166 */
1167 csched_runq_sort(prv, cpu);
1169 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
1172 static struct csched_vcpu *
1173 csched_runq_steal(int peer_cpu, int cpu, int pri)
1175 const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
1176 const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
1177 struct csched_vcpu *speer;
1178 struct list_head *iter;
1179 struct vcpu *vc;
1181 /*
1182 * Don't steal from an idle CPU's runq because it's about to
1183 * pick up work from it itself.
1184 */
1185 if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
1187 list_for_each( iter, &peer_pcpu->runq )
1189 speer = __runq_elem(iter);
1191 /*
1192 * If next available VCPU here is not of strictly higher
1193 * priority than ours, this PCPU is useless to us.
1194 */
1195 if ( speer->pri <= pri )
1196 break;
1198 /* Is this VCPU is runnable on our PCPU? */
1199 vc = speer->vcpu;
1200 BUG_ON( is_idle_vcpu(vc) );
1202 if (__csched_vcpu_is_migrateable(vc, cpu))
1204 /* We got a candidate. Grab it! */
1205 CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
1206 CSCHED_STAT_CRANK(migrate_queued);
1207 WARN_ON(vc->is_urgent);
1208 __runq_remove(speer);
1209 vc->processor = cpu;
1210 return speer;
1215 CSCHED_STAT_CRANK(steal_peer_idle);
1216 return NULL;
1219 static struct csched_vcpu *
1220 csched_load_balance(struct csched_private *prv, int cpu,
1221 struct csched_vcpu *snext, bool_t *stolen)
1223 struct csched_vcpu *speer;
1224 cpumask_t workers;
1225 cpumask_t *online;
1226 int peer_cpu;
1228 BUG_ON( cpu != snext->vcpu->processor );
1229 online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
1231 /* If this CPU is going offline we shouldn't steal work. */
1232 if ( unlikely(!cpu_isset(cpu, *online)) )
1233 goto out;
1235 if ( snext->pri == CSCHED_PRI_IDLE )
1236 CSCHED_STAT_CRANK(load_balance_idle);
1237 else if ( snext->pri == CSCHED_PRI_TS_OVER )
1238 CSCHED_STAT_CRANK(load_balance_over);
1239 else
1240 CSCHED_STAT_CRANK(load_balance_other);
1242 /*
1243 * Peek at non-idling CPUs in the system, starting with our
1244 * immediate neighbour.
1245 */
1246 cpus_andnot(workers, *online, prv->idlers);
1247 cpu_clear(cpu, workers);
1248 peer_cpu = cpu;
1250 while ( !cpus_empty(workers) )
1252 peer_cpu = cycle_cpu(peer_cpu, workers);
1253 cpu_clear(peer_cpu, workers);
1255 /*
1256 * Get ahold of the scheduler lock for this peer CPU.
1258 * Note: We don't spin on this lock but simply try it. Spinning could
1259 * cause a deadlock if the peer CPU is also load balancing and trying
1260 * to lock this CPU.
1261 */
1262 if ( !pcpu_schedule_trylock(peer_cpu) )
1264 CSCHED_STAT_CRANK(steal_trylock_failed);
1265 continue;
1268 /*
1269 * Any work over there to steal?
1270 */
1271 speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
1272 pcpu_schedule_unlock(peer_cpu);
1273 if ( speer != NULL )
1275 *stolen = 1;
1276 return speer;
1280 out:
1281 /* Failed to find more important work elsewhere... */
1282 __runq_remove(snext);
1283 return snext;
1286 /*
1287 * This function is in the critical path. It is designed to be simple and
1288 * fast for the common case.
1289 */
1290 static struct task_slice
1291 csched_schedule(
1292 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
1294 const int cpu = smp_processor_id();
1295 struct list_head * const runq = RUNQ(cpu);
1296 struct csched_vcpu * const scurr = CSCHED_VCPU(current);
1297 struct csched_private *prv = CSCHED_PRIV(ops);
1298 struct csched_vcpu *snext;
1299 struct task_slice ret;
1301 CSCHED_STAT_CRANK(schedule);
1302 CSCHED_VCPU_CHECK(current);
1304 if ( !is_idle_vcpu(scurr->vcpu) )
1306 /* Update credits of a non-idle VCPU. */
1307 burn_credits(scurr, now);
1308 scurr->start_time -= now;
1310 else
1312 /* Re-instate a boosted idle VCPU as normal-idle. */
1313 scurr->pri = CSCHED_PRI_IDLE;
1316 /*
1317 * Select next runnable local VCPU (ie top of local runq)
1318 */
1319 if ( vcpu_runnable(current) )
1320 __runq_insert(cpu, scurr);
1321 else
1322 BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
1324 snext = __runq_elem(runq->next);
1325 ret.migrated = 0;
1327 /* Tasklet work (which runs in idle VCPU context) overrides all else. */
1328 if ( tasklet_work_scheduled )
1330 snext = CSCHED_VCPU(idle_vcpu[cpu]);
1331 snext->pri = CSCHED_PRI_TS_BOOST;
1334 /*
1335 * Clear YIELD flag before scheduling out
1336 */
1337 if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
1338 scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
1340 /*
1341 * SMP Load balance:
1343 * If the next highest priority local runnable VCPU has already eaten
1344 * through its credits, look on other PCPUs to see if we have more
1345 * urgent work... If not, csched_load_balance() will return snext, but
1346 * already removed from the runq.
1347 */
1348 if ( snext->pri > CSCHED_PRI_TS_OVER )
1349 __runq_remove(snext);
1350 else
1351 snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
1353 /*
1354 * Update idlers mask if necessary. When we're idling, other CPUs
1355 * will tickle us when they get extra work.
1356 */
1357 if ( snext->pri == CSCHED_PRI_IDLE )
1359 if ( !cpu_isset(cpu, prv->idlers) )
1360 cpu_set(cpu, prv->idlers);
1362 else if ( cpu_isset(cpu, prv->idlers) )
1364 cpu_clear(cpu, prv->idlers);
1367 if ( !is_idle_vcpu(snext->vcpu) )
1368 snext->start_time += now;
1370 /*
1371 * Return task to run next...
1372 */
1373 ret.time = (is_idle_vcpu(snext->vcpu) ?
1374 -1 : MILLISECS(CSCHED_MSECS_PER_TSLICE));
1375 ret.task = snext->vcpu;
1377 CSCHED_VCPU_CHECK(ret.task);
1378 return ret;
1381 static void
1382 csched_dump_vcpu(struct csched_vcpu *svc)
1384 struct csched_dom * const sdom = svc->sdom;
1386 printk("[%i.%i] pri=%i flags=%x cpu=%i",
1387 svc->vcpu->domain->domain_id,
1388 svc->vcpu->vcpu_id,
1389 svc->pri,
1390 svc->flags,
1391 svc->vcpu->processor);
1393 if ( sdom )
1395 printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
1396 #ifdef CSCHED_STATS
1397 printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
1398 svc->stats.credit_last,
1399 svc->stats.credit_incr,
1400 svc->stats.state_active,
1401 svc->stats.state_idle,
1402 svc->stats.migrate_q,
1403 svc->stats.migrate_r);
1404 #endif
1407 printk("\n");
1410 static void
1411 csched_dump_pcpu(const struct scheduler *ops, int cpu)
1413 struct list_head *runq, *iter;
1414 struct csched_pcpu *spc;
1415 struct csched_vcpu *svc;
1416 int loop;
1417 #define cpustr keyhandler_scratch
1419 spc = CSCHED_PCPU(cpu);
1420 runq = &spc->runq;
1422 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
1423 printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
1424 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
1425 printk("core=%s\n", cpustr);
1427 /* current VCPU */
1428 svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
1429 if ( svc )
1431 printk("\trun: ");
1432 csched_dump_vcpu(svc);
1435 loop = 0;
1436 list_for_each( iter, runq )
1438 svc = __runq_elem(iter);
1439 if ( svc )
1441 printk("\t%3d: ", ++loop);
1442 csched_dump_vcpu(svc);
1445 #undef cpustr
1448 static void
1449 csched_dump(const struct scheduler *ops)
1451 struct list_head *iter_sdom, *iter_svc;
1452 struct csched_private *prv = CSCHED_PRIV(ops);
1453 int loop;
1454 #define idlers_buf keyhandler_scratch
1456 printk("info:\n"
1457 "\tncpus = %u\n"
1458 "\tmaster = %u\n"
1459 "\tcredit = %u\n"
1460 "\tcredit balance = %d\n"
1461 "\tweight = %u\n"
1462 "\trunq_sort = %u\n"
1463 "\tdefault-weight = %d\n"
1464 "\tmsecs per tick = %dms\n"
1465 "\tcredits per msec = %d\n"
1466 "\tticks per tslice = %d\n"
1467 "\tticks per acct = %d\n"
1468 "\tmigration delay = %uus\n",
1469 prv->ncpus,
1470 prv->master,
1471 prv->credit,
1472 prv->credit_balance,
1473 prv->weight,
1474 prv->runq_sort,
1475 CSCHED_DEFAULT_WEIGHT,
1476 CSCHED_MSECS_PER_TICK,
1477 CSCHED_CREDITS_PER_MSEC,
1478 CSCHED_TICKS_PER_TSLICE,
1479 CSCHED_TICKS_PER_ACCT,
1480 vcpu_migration_delay);
1482 cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
1483 printk("idlers: %s\n", idlers_buf);
1485 printk("active vcpus:\n");
1486 loop = 0;
1487 list_for_each( iter_sdom, &prv->active_sdom )
1489 struct csched_dom *sdom;
1490 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
1492 list_for_each( iter_svc, &sdom->active_vcpu )
1494 struct csched_vcpu *svc;
1495 svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
1497 printk("\t%3d: ", ++loop);
1498 csched_dump_vcpu(svc);
1501 #undef idlers_buf
1504 static int
1505 csched_init(struct scheduler *ops)
1507 struct csched_private *prv;
1509 prv = xmalloc(struct csched_private);
1510 if ( prv == NULL )
1511 return -ENOMEM;
1513 memset(prv, 0, sizeof(*prv));
1514 ops->sched_data = prv;
1515 spin_lock_init(&prv->lock);
1516 INIT_LIST_HEAD(&prv->active_sdom);
1517 prv->master = UINT_MAX;
1519 return 0;
1522 static void
1523 csched_deinit(const struct scheduler *ops)
1525 struct csched_private *prv;
1527 prv = CSCHED_PRIV(ops);
1528 if ( prv != NULL )
1529 xfree(prv);
1532 static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu)
1534 struct csched_pcpu *spc;
1536 spc = CSCHED_PCPU(cpu);
1538 stop_timer(&spc->ticker);
1541 static void csched_tick_resume(const struct scheduler *ops, unsigned int cpu)
1543 struct csched_pcpu *spc;
1544 uint64_t now = NOW();
1546 spc = CSCHED_PCPU(cpu);
1548 set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
1549 - now % MILLISECS(CSCHED_MSECS_PER_TICK) );
1552 static struct csched_private _csched_priv;
1554 const struct scheduler sched_credit_def = {
1555 .name = "SMP Credit Scheduler",
1556 .opt_name = "credit",
1557 .sched_id = XEN_SCHEDULER_CREDIT,
1558 .sched_data = &_csched_priv,
1560 .init_domain = csched_dom_init,
1561 .destroy_domain = csched_dom_destroy,
1563 .insert_vcpu = csched_vcpu_insert,
1564 .remove_vcpu = csched_vcpu_remove,
1566 .sleep = csched_vcpu_sleep,
1567 .wake = csched_vcpu_wake,
1568 .yield = csched_vcpu_yield,
1570 .adjust = csched_dom_cntl,
1572 .pick_cpu = csched_cpu_pick,
1573 .do_schedule = csched_schedule,
1575 .dump_cpu_state = csched_dump_pcpu,
1576 .dump_settings = csched_dump,
1577 .init = csched_init,
1578 .deinit = csched_deinit,
1579 .alloc_vdata = csched_alloc_vdata,
1580 .free_vdata = csched_free_vdata,
1581 .alloc_pdata = csched_alloc_pdata,
1582 .free_pdata = csched_free_pdata,
1583 .alloc_domdata = csched_alloc_domdata,
1584 .free_domdata = csched_free_domdata,
1586 .tick_suspend = csched_tick_suspend,
1587 .tick_resume = csched_tick_resume,
1588 };