debuggers.hg

view xen/common/sched_credit.c @ 22855:1d1eec7e1fb4

xl: Perform minimal validation of virtual disk file while parsing config file

This patch performs some very basic validation on the virtual disk
file passed through the config file. This validation ensures that we
don't go too far with the initialization like spawn qemu and more
while there could be some potentially fundamental issues.

[ Patch fixed up to work with PHYSTYPE_EMPTY 22808:6ec61438713a -iwj ]

Signed-off-by: Kamala Narasimhan <kamala.narasimhan@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Kamala Narasimhan <kamala.narasimhan@gmail.com>
date Tue Jan 25 18:09:49 2011 +0000 (2011-01-25)
parents e8acb9753ff1
children 700ac6445812
line source
1 /****************************************************************************
2 * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
3 ****************************************************************************
4 *
5 * File: common/csched_credit.c
6 * Author: Emmanuel Ackaouy
7 *
8 * Description: Credit-based SMP CPU scheduler
9 */
11 #include <xen/config.h>
12 #include <xen/init.h>
13 #include <xen/lib.h>
14 #include <xen/sched.h>
15 #include <xen/domain.h>
16 #include <xen/delay.h>
17 #include <xen/event.h>
18 #include <xen/time.h>
19 #include <xen/perfc.h>
20 #include <xen/sched-if.h>
21 #include <xen/softirq.h>
22 #include <asm/atomic.h>
23 #include <xen/errno.h>
24 #include <xen/keyhandler.h>
26 /*
27 * CSCHED_STATS
28 *
29 * Manage very basic per-vCPU counters and stats.
30 *
31 * Useful for debugging live systems. The stats are displayed
32 * with runq dumps ('r' on the Xen console).
33 */
34 #ifdef PERF_COUNTERS
35 #define CSCHED_STATS
36 #endif
39 /*
40 * Basic constants
41 */
42 #define CSCHED_DEFAULT_WEIGHT 256
43 #define CSCHED_TICKS_PER_TSLICE 3
44 #define CSCHED_TICKS_PER_ACCT 3
45 #define CSCHED_MSECS_PER_TICK 10
46 #define CSCHED_MSECS_PER_TSLICE \
47 (CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_TSLICE)
48 #define CSCHED_CREDITS_PER_MSEC 10
49 #define CSCHED_CREDITS_PER_TSLICE \
50 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TSLICE)
51 #define CSCHED_CREDITS_PER_ACCT \
52 (CSCHED_CREDITS_PER_MSEC * CSCHED_MSECS_PER_TICK * CSCHED_TICKS_PER_ACCT)
55 /*
56 * Priorities
57 */
58 #define CSCHED_PRI_TS_BOOST 0 /* time-share waking up */
59 #define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
60 #define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
61 #define CSCHED_PRI_IDLE -64 /* idle */
64 /*
65 * Flags
66 */
67 #define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
68 #define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
71 /*
72 * Useful macros
73 */
74 #define CSCHED_PRIV(_ops) \
75 ((struct csched_private *)((_ops)->sched_data))
76 #define CSCHED_PCPU(_c) \
77 ((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
78 #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
79 #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
80 #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
81 #define CSCHED_CPUONLINE(_pool) \
82 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
85 /*
86 * Stats
87 */
88 #define CSCHED_STAT_CRANK(_X) (perfc_incr(_X))
90 #ifdef CSCHED_STATS
92 #define CSCHED_VCPU_STATS_RESET(_V) \
93 do \
94 { \
95 memset(&(_V)->stats, 0, sizeof((_V)->stats)); \
96 } while ( 0 )
98 #define CSCHED_VCPU_STAT_CRANK(_V, _X) (((_V)->stats._X)++)
100 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) (((_V)->stats._X) = (_Y))
102 #else /* CSCHED_STATS */
104 #define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 )
105 #define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 )
106 #define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 )
108 #endif /* CSCHED_STATS */
111 /*
112 * Boot parameters
113 */
114 static bool_t __read_mostly sched_credit_default_yield;
115 boolean_param("sched_credit_default_yield", sched_credit_default_yield);
117 /*
118 * Physical CPU
119 */
120 struct csched_pcpu {
121 struct list_head runq;
122 uint32_t runq_sort_last;
123 struct timer ticker;
124 unsigned int tick;
125 unsigned int idle_bias;
126 };
128 /*
129 * Virtual CPU
130 */
131 struct csched_vcpu {
132 struct list_head runq_elem;
133 struct list_head active_vcpu_elem;
134 struct csched_dom *sdom;
135 struct vcpu *vcpu;
136 atomic_t credit;
137 s_time_t start_time; /* When we were scheduled (used for credit) */
138 uint16_t flags;
139 int16_t pri;
140 #ifdef CSCHED_STATS
141 struct {
142 int credit_last;
143 uint32_t credit_incr;
144 uint32_t state_active;
145 uint32_t state_idle;
146 uint32_t migrate_q;
147 uint32_t migrate_r;
148 } stats;
149 #endif
150 };
152 /*
153 * Domain
154 */
155 struct csched_dom {
156 struct list_head active_vcpu;
157 struct list_head active_sdom_elem;
158 struct domain *dom;
159 uint16_t active_vcpu_count;
160 uint16_t weight;
161 uint16_t cap;
162 };
164 /*
165 * System-wide private data
166 */
167 struct csched_private {
168 spinlock_t lock;
169 struct list_head active_sdom;
170 uint32_t ncpus;
171 struct timer master_ticker;
172 unsigned int master;
173 cpumask_t idlers;
174 cpumask_t cpus;
175 uint32_t weight;
176 uint32_t credit;
177 int credit_balance;
178 uint32_t runq_sort;
179 };
181 static void csched_tick(void *_cpu);
182 static void csched_acct(void *dummy);
184 static inline int
185 __vcpu_on_runq(struct csched_vcpu *svc)
186 {
187 return !list_empty(&svc->runq_elem);
188 }
190 static inline struct csched_vcpu *
191 __runq_elem(struct list_head *elem)
192 {
193 return list_entry(elem, struct csched_vcpu, runq_elem);
194 }
196 static inline void
197 __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
198 {
199 const struct list_head * const runq = RUNQ(cpu);
200 struct list_head *iter;
202 BUG_ON( __vcpu_on_runq(svc) );
203 BUG_ON( cpu != svc->vcpu->processor );
205 list_for_each( iter, runq )
206 {
207 const struct csched_vcpu * const iter_svc = __runq_elem(iter);
208 if ( svc->pri > iter_svc->pri )
209 break;
210 }
212 /* If the vcpu yielded, try to put it behind one lower-priority
213 * runnable vcpu if we can. The next runq_sort will bring it forward
214 * within 30ms if the queue too long. */
215 if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
216 && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
217 {
218 iter=iter->next;
220 /* Some sanity checks */
221 BUG_ON(iter == runq);
222 }
224 list_add_tail(&svc->runq_elem, iter);
225 }
227 static inline void
228 __runq_remove(struct csched_vcpu *svc)
229 {
230 BUG_ON( !__vcpu_on_runq(svc) );
231 list_del_init(&svc->runq_elem);
232 }
234 static void burn_credits(struct csched_vcpu *svc, s_time_t now)
235 {
236 s_time_t delta;
237 unsigned int credits;
239 /* Assert svc is current */
240 ASSERT(svc==CSCHED_VCPU(per_cpu(schedule_data, svc->vcpu->processor).curr));
242 if ( (delta = now - svc->start_time) <= 0 )
243 return;
245 credits = (delta*CSCHED_CREDITS_PER_MSEC + MILLISECS(1)/2) / MILLISECS(1);
246 atomic_sub(credits, &svc->credit);
247 svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
248 }
250 static bool_t __read_mostly opt_tickle_one_idle = 1;
251 boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
253 DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
255 static inline void
256 __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
257 {
258 struct csched_vcpu * const cur =
259 CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
260 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
261 cpumask_t mask;
263 ASSERT(cur);
264 cpus_clear(mask);
266 /* If strictly higher priority than current VCPU, signal the CPU */
267 if ( new->pri > cur->pri )
268 {
269 if ( cur->pri == CSCHED_PRI_IDLE )
270 CSCHED_STAT_CRANK(tickle_local_idler);
271 else if ( cur->pri == CSCHED_PRI_TS_OVER )
272 CSCHED_STAT_CRANK(tickle_local_over);
273 else if ( cur->pri == CSCHED_PRI_TS_UNDER )
274 CSCHED_STAT_CRANK(tickle_local_under);
275 else
276 CSCHED_STAT_CRANK(tickle_local_other);
278 cpu_set(cpu, mask);
279 }
281 /*
282 * If this CPU has at least two runnable VCPUs, we tickle any idlers to
283 * let them know there is runnable work in the system...
284 */
285 if ( cur->pri > CSCHED_PRI_IDLE )
286 {
287 if ( cpus_empty(prv->idlers) )
288 {
289 CSCHED_STAT_CRANK(tickle_idlers_none);
290 }
291 else
292 {
293 cpumask_t idle_mask;
295 cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
296 if ( !cpus_empty(idle_mask) )
297 {
298 CSCHED_STAT_CRANK(tickle_idlers_some);
299 if ( opt_tickle_one_idle )
300 {
301 this_cpu(last_tickle_cpu) =
302 cycle_cpu(this_cpu(last_tickle_cpu), idle_mask);
303 cpu_set(this_cpu(last_tickle_cpu), mask);
304 }
305 else
306 cpus_or(mask, mask, idle_mask);
307 }
308 cpus_and(mask, mask, new->vcpu->cpu_affinity);
309 }
310 }
312 /* Send scheduler interrupts to designated CPUs */
313 if ( !cpus_empty(mask) )
314 cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
315 }
317 static void
318 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
319 {
320 struct csched_private *prv = CSCHED_PRIV(ops);
321 struct csched_pcpu *spc = pcpu;
322 unsigned long flags;
324 if ( spc == NULL )
325 return;
327 spin_lock_irqsave(&prv->lock, flags);
329 prv->credit -= CSCHED_CREDITS_PER_ACCT;
330 prv->ncpus--;
331 cpu_clear(cpu, prv->idlers);
332 cpu_clear(cpu, prv->cpus);
333 if ( (prv->master == cpu) && (prv->ncpus > 0) )
334 {
335 prv->master = first_cpu(prv->cpus);
336 migrate_timer(&prv->master_ticker, prv->master);
337 }
338 kill_timer(&spc->ticker);
339 if ( prv->ncpus == 0 )
340 kill_timer(&prv->master_ticker);
342 spin_unlock_irqrestore(&prv->lock, flags);
344 xfree(spc);
345 }
347 static void *
348 csched_alloc_pdata(const struct scheduler *ops, int cpu)
349 {
350 struct csched_pcpu *spc;
351 struct csched_private *prv = CSCHED_PRIV(ops);
352 unsigned long flags;
354 /* Allocate per-PCPU info */
355 spc = xmalloc(struct csched_pcpu);
356 if ( spc == NULL )
357 return NULL;
358 memset(spc, 0, sizeof(*spc));
360 spin_lock_irqsave(&prv->lock, flags);
362 /* Initialize/update system-wide config */
363 prv->credit += CSCHED_CREDITS_PER_ACCT;
364 prv->ncpus++;
365 cpu_set(cpu, prv->cpus);
366 if ( prv->ncpus == 1 )
367 {
368 prv->master = cpu;
369 init_timer(&prv->master_ticker, csched_acct, prv, cpu);
370 set_timer(&prv->master_ticker, NOW() +
371 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
372 }
374 init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
375 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
377 INIT_LIST_HEAD(&spc->runq);
378 spc->runq_sort_last = prv->runq_sort;
379 spc->idle_bias = NR_CPUS - 1;
380 if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
381 per_cpu(schedule_data, cpu).sched_priv = spc;
383 /* Start off idling... */
384 BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
385 cpu_set(cpu, prv->idlers);
387 spin_unlock_irqrestore(&prv->lock, flags);
389 return spc;
390 }
392 #ifndef NDEBUG
393 static inline void
394 __csched_vcpu_check(struct vcpu *vc)
395 {
396 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
397 struct csched_dom * const sdom = svc->sdom;
399 BUG_ON( svc->vcpu != vc );
400 BUG_ON( sdom != CSCHED_DOM(vc->domain) );
401 if ( sdom )
402 {
403 BUG_ON( is_idle_vcpu(vc) );
404 BUG_ON( sdom->dom != vc->domain );
405 }
406 else
407 {
408 BUG_ON( !is_idle_vcpu(vc) );
409 }
411 CSCHED_STAT_CRANK(vcpu_check);
412 }
413 #define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
414 #else
415 #define CSCHED_VCPU_CHECK(_vc)
416 #endif
418 /*
419 * Delay, in microseconds, between migrations of a VCPU between PCPUs.
420 * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
421 * implicit overheads such as cache-warming. 1ms (1000) has been measured
422 * as a good value.
423 */
424 static unsigned int vcpu_migration_delay;
425 integer_param("vcpu_migration_delay", vcpu_migration_delay);
427 void set_vcpu_migration_delay(unsigned int delay)
428 {
429 vcpu_migration_delay = delay;
430 }
432 unsigned int get_vcpu_migration_delay(void)
433 {
434 return vcpu_migration_delay;
435 }
437 static inline int
438 __csched_vcpu_is_cache_hot(struct vcpu *v)
439 {
440 int hot = ((NOW() - v->last_run_time) <
441 ((uint64_t)vcpu_migration_delay * 1000u));
443 if ( hot )
444 CSCHED_STAT_CRANK(vcpu_hot);
446 return hot;
447 }
449 static inline int
450 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
451 {
452 /*
453 * Don't pick up work that's in the peer's scheduling tail or hot on
454 * peer PCPU. Only pick up work that's allowed to run on our CPU.
455 */
456 return !vc->is_running &&
457 !__csched_vcpu_is_cache_hot(vc) &&
458 cpu_isset(dest_cpu, vc->cpu_affinity);
459 }
461 static int
462 _csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc, bool_t commit)
463 {
464 cpumask_t cpus;
465 cpumask_t idlers;
466 cpumask_t *online;
467 int cpu;
469 /*
470 * Pick from online CPUs in VCPU's affinity mask, giving a
471 * preference to its current processor if it's in there.
472 */
473 online = CSCHED_CPUONLINE(vc->domain->cpupool);
474 cpus_and(cpus, *online, vc->cpu_affinity);
475 cpu = cpu_isset(vc->processor, cpus)
476 ? vc->processor
477 : cycle_cpu(vc->processor, cpus);
478 ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
480 /*
481 * Try to find an idle processor within the above constraints.
482 *
483 * In multi-core and multi-threaded CPUs, not all idle execution
484 * vehicles are equal!
485 *
486 * We give preference to the idle execution vehicle with the most
487 * idling neighbours in its grouping. This distributes work across
488 * distinct cores first and guarantees we don't do something stupid
489 * like run two VCPUs on co-hyperthreads while there are idle cores
490 * or sockets.
491 */
492 cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
493 cpu_set(cpu, idlers);
494 cpus_and(cpus, cpus, idlers);
495 cpu_clear(cpu, cpus);
497 while ( !cpus_empty(cpus) )
498 {
499 cpumask_t cpu_idlers;
500 cpumask_t nxt_idlers;
501 int nxt, weight_cpu, weight_nxt;
502 int migrate_factor;
504 nxt = cycle_cpu(cpu, cpus);
506 if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
507 {
508 /* We're on the same socket, so check the busy-ness of threads.
509 * Migrate if # of idlers is less at all */
510 ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
511 migrate_factor = 1;
512 cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
513 cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
514 }
515 else
516 {
517 /* We're on different sockets, so check the busy-ness of cores.
518 * Migrate only if the other core is twice as idle */
519 ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
520 migrate_factor = 2;
521 cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
522 cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
523 }
525 weight_cpu = cpus_weight(cpu_idlers);
526 weight_nxt = cpus_weight(nxt_idlers);
527 /* smt_power_savings: consolidate work rather than spreading it */
528 if ( ( sched_smt_power_savings
529 && (weight_cpu > weight_nxt) )
530 || ( !sched_smt_power_savings
531 && (weight_cpu * migrate_factor < weight_nxt) ) )
532 {
533 cpu = cycle_cpu(CSCHED_PCPU(nxt)->idle_bias, nxt_idlers);
534 if ( commit )
535 CSCHED_PCPU(nxt)->idle_bias = cpu;
536 cpus_andnot(cpus, cpus, per_cpu(cpu_sibling_map, cpu));
537 }
538 else
539 {
540 cpus_andnot(cpus, cpus, nxt_idlers);
541 }
542 }
544 return cpu;
545 }
547 static int
548 csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
549 {
550 return _csched_cpu_pick(ops, vc, 1);
551 }
553 static inline void
554 __csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
555 {
556 struct csched_dom * const sdom = svc->sdom;
557 unsigned long flags;
559 spin_lock_irqsave(&prv->lock, flags);
561 if ( list_empty(&svc->active_vcpu_elem) )
562 {
563 CSCHED_VCPU_STAT_CRANK(svc, state_active);
564 CSCHED_STAT_CRANK(acct_vcpu_active);
566 sdom->active_vcpu_count++;
567 list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
568 /* Make weight per-vcpu */
569 prv->weight += sdom->weight;
570 if ( list_empty(&sdom->active_sdom_elem) )
571 {
572 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
573 }
574 }
576 spin_unlock_irqrestore(&prv->lock, flags);
577 }
579 static inline void
580 __csched_vcpu_acct_stop_locked(struct csched_private *prv,
581 struct csched_vcpu *svc)
582 {
583 struct csched_dom * const sdom = svc->sdom;
585 BUG_ON( list_empty(&svc->active_vcpu_elem) );
587 CSCHED_VCPU_STAT_CRANK(svc, state_idle);
588 CSCHED_STAT_CRANK(acct_vcpu_idle);
590 BUG_ON( prv->weight < sdom->weight );
591 sdom->active_vcpu_count--;
592 list_del_init(&svc->active_vcpu_elem);
593 prv->weight -= sdom->weight;
594 if ( list_empty(&sdom->active_vcpu) )
595 {
596 list_del_init(&sdom->active_sdom_elem);
597 }
598 }
600 static void
601 csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
602 {
603 struct csched_vcpu * const svc = CSCHED_VCPU(current);
604 const struct scheduler *ops = per_cpu(scheduler, cpu);
606 ASSERT( current->processor == cpu );
607 ASSERT( svc->sdom != NULL );
609 /*
610 * If this VCPU's priority was boosted when it last awoke, reset it.
611 * If the VCPU is found here, then it's consuming a non-negligeable
612 * amount of CPU resources and should no longer be boosted.
613 */
614 if ( svc->pri == CSCHED_PRI_TS_BOOST )
615 svc->pri = CSCHED_PRI_TS_UNDER;
617 /*
618 * Update credits
619 */
620 if ( !is_idle_vcpu(svc->vcpu) )
621 burn_credits(svc, NOW());
623 /*
624 * Put this VCPU and domain back on the active list if it was
625 * idling.
626 *
627 * If it's been active a while, check if we'd be better off
628 * migrating it to run elsewhere (see multi-core and multi-thread
629 * support in csched_cpu_pick()).
630 */
631 if ( list_empty(&svc->active_vcpu_elem) )
632 {
633 __csched_vcpu_acct_start(prv, svc);
634 }
635 else if ( _csched_cpu_pick(ops, current, 0) != cpu )
636 {
637 CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
638 CSCHED_STAT_CRANK(migrate_running);
639 set_bit(_VPF_migrating, &current->pause_flags);
640 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
641 }
642 }
644 static void *
645 csched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
646 {
647 struct csched_vcpu *svc;
649 /* Allocate per-VCPU info */
650 svc = xmalloc(struct csched_vcpu);
651 if ( svc == NULL )
652 return NULL;
653 memset(svc, 0, sizeof(*svc));
655 INIT_LIST_HEAD(&svc->runq_elem);
656 INIT_LIST_HEAD(&svc->active_vcpu_elem);
657 svc->sdom = dd;
658 svc->vcpu = vc;
659 atomic_set(&svc->credit, 0);
660 svc->flags = 0U;
661 svc->pri = is_idle_domain(vc->domain) ?
662 CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
663 CSCHED_VCPU_STATS_RESET(svc);
664 CSCHED_STAT_CRANK(vcpu_init);
665 return svc;
666 }
668 static void
669 csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
670 {
671 struct csched_vcpu *svc = vc->sched_priv;
673 if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
674 __runq_insert(vc->processor, svc);
675 }
677 static void
678 csched_free_vdata(const struct scheduler *ops, void *priv)
679 {
680 struct csched_vcpu *svc = priv;
682 BUG_ON( !list_empty(&svc->runq_elem) );
684 xfree(svc);
685 }
687 static void
688 csched_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
689 {
690 struct csched_private *prv = CSCHED_PRIV(ops);
691 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
692 struct csched_dom * const sdom = svc->sdom;
693 unsigned long flags;
695 CSCHED_STAT_CRANK(vcpu_destroy);
697 if ( __vcpu_on_runq(svc) )
698 __runq_remove(svc);
700 spin_lock_irqsave(&(prv->lock), flags);
702 if ( !list_empty(&svc->active_vcpu_elem) )
703 __csched_vcpu_acct_stop_locked(prv, svc);
705 spin_unlock_irqrestore(&(prv->lock), flags);
707 BUG_ON( sdom == NULL );
708 BUG_ON( !list_empty(&svc->runq_elem) );
709 }
711 static void
712 csched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
713 {
714 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
716 CSCHED_STAT_CRANK(vcpu_sleep);
718 BUG_ON( is_idle_vcpu(vc) );
720 if ( per_cpu(schedule_data, vc->processor).curr == vc )
721 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
722 else if ( __vcpu_on_runq(svc) )
723 __runq_remove(svc);
724 }
726 static void
727 csched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
728 {
729 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
730 const unsigned int cpu = vc->processor;
732 BUG_ON( is_idle_vcpu(vc) );
734 if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) )
735 {
736 CSCHED_STAT_CRANK(vcpu_wake_running);
737 return;
738 }
739 if ( unlikely(__vcpu_on_runq(svc)) )
740 {
741 CSCHED_STAT_CRANK(vcpu_wake_onrunq);
742 return;
743 }
745 if ( likely(vcpu_runnable(vc)) )
746 CSCHED_STAT_CRANK(vcpu_wake_runnable);
747 else
748 CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
750 /*
751 * We temporarly boost the priority of awaking VCPUs!
752 *
753 * If this VCPU consumes a non negligeable amount of CPU, it
754 * will eventually find itself in the credit accounting code
755 * path where its priority will be reset to normal.
756 *
757 * If on the other hand the VCPU consumes little CPU and is
758 * blocking and awoken a lot (doing I/O for example), its
759 * priority will remain boosted, optimizing it's wake-to-run
760 * latencies.
761 *
762 * This allows wake-to-run latency sensitive VCPUs to preempt
763 * more CPU resource intensive VCPUs without impacting overall
764 * system fairness.
765 *
766 * The one exception is for VCPUs of capped domains unpausing
767 * after earning credits they had overspent. We don't boost
768 * those.
769 */
770 if ( svc->pri == CSCHED_PRI_TS_UNDER &&
771 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
772 {
773 svc->pri = CSCHED_PRI_TS_BOOST;
774 }
776 /* Put the VCPU on the runq and tickle CPUs */
777 __runq_insert(cpu, svc);
778 __runq_tickle(cpu, svc);
779 }
781 static void
782 csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
783 {
784 struct csched_vcpu * const sv = CSCHED_VCPU(vc);
786 if ( !sched_credit_default_yield )
787 {
788 /* Let the scheduler know that this vcpu is trying to yield */
789 sv->flags |= CSCHED_FLAG_VCPU_YIELD;
790 }
791 }
793 static int
794 csched_dom_cntl(
795 const struct scheduler *ops,
796 struct domain *d,
797 struct xen_domctl_scheduler_op *op)
798 {
799 struct csched_dom * const sdom = CSCHED_DOM(d);
800 struct csched_private *prv = CSCHED_PRIV(ops);
801 unsigned long flags;
803 if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
804 {
805 op->u.credit.weight = sdom->weight;
806 op->u.credit.cap = sdom->cap;
807 }
808 else
809 {
810 ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
812 spin_lock_irqsave(&prv->lock, flags);
814 if ( op->u.credit.weight != 0 )
815 {
816 if ( !list_empty(&sdom->active_sdom_elem) )
817 {
818 prv->weight -= sdom->weight * sdom->active_vcpu_count;
819 prv->weight += op->u.credit.weight * sdom->active_vcpu_count;
820 }
821 sdom->weight = op->u.credit.weight;
822 }
824 if ( op->u.credit.cap != (uint16_t)~0U )
825 sdom->cap = op->u.credit.cap;
827 spin_unlock_irqrestore(&prv->lock, flags);
828 }
830 return 0;
831 }
833 static void *
834 csched_alloc_domdata(const struct scheduler *ops, struct domain *dom)
835 {
836 struct csched_dom *sdom;
838 sdom = xmalloc(struct csched_dom);
839 if ( sdom == NULL )
840 return NULL;
841 memset(sdom, 0, sizeof(*sdom));
843 /* Initialize credit and weight */
844 INIT_LIST_HEAD(&sdom->active_vcpu);
845 sdom->active_vcpu_count = 0;
846 INIT_LIST_HEAD(&sdom->active_sdom_elem);
847 sdom->dom = dom;
848 sdom->weight = CSCHED_DEFAULT_WEIGHT;
849 sdom->cap = 0U;
851 return (void *)sdom;
852 }
854 static int
855 csched_dom_init(const struct scheduler *ops, struct domain *dom)
856 {
857 struct csched_dom *sdom;
859 CSCHED_STAT_CRANK(dom_init);
861 if ( is_idle_domain(dom) )
862 return 0;
864 sdom = csched_alloc_domdata(ops, dom);
865 if ( sdom == NULL )
866 return -ENOMEM;
868 dom->sched_priv = sdom;
870 return 0;
871 }
873 static void
874 csched_free_domdata(const struct scheduler *ops, void *data)
875 {
876 xfree(data);
877 }
879 static void
880 csched_dom_destroy(const struct scheduler *ops, struct domain *dom)
881 {
882 CSCHED_STAT_CRANK(dom_destroy);
883 csched_free_domdata(ops, CSCHED_DOM(dom));
884 }
886 /*
887 * This is a O(n) optimized sort of the runq.
888 *
889 * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
890 * through the runq and move up any UNDERs that are preceded by OVERS. We
891 * remember the last UNDER to make the move up operation O(1).
892 */
893 static void
894 csched_runq_sort(struct csched_private *prv, unsigned int cpu)
895 {
896 struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
897 struct list_head *runq, *elem, *next, *last_under;
898 struct csched_vcpu *svc_elem;
899 unsigned long flags;
900 int sort_epoch;
902 sort_epoch = prv->runq_sort;
903 if ( sort_epoch == spc->runq_sort_last )
904 return;
906 spc->runq_sort_last = sort_epoch;
908 pcpu_schedule_lock_irqsave(cpu, flags);
910 runq = &spc->runq;
911 elem = runq->next;
912 last_under = runq;
914 while ( elem != runq )
915 {
916 next = elem->next;
917 svc_elem = __runq_elem(elem);
919 if ( svc_elem->pri >= CSCHED_PRI_TS_UNDER )
920 {
921 /* does elem need to move up the runq? */
922 if ( elem->prev != last_under )
923 {
924 list_del(elem);
925 list_add(elem, last_under);
926 }
927 last_under = elem;
928 }
930 elem = next;
931 }
933 pcpu_schedule_unlock_irqrestore(cpu, flags);
934 }
936 static void
937 csched_acct(void* dummy)
938 {
939 struct csched_private *prv = dummy;
940 unsigned long flags;
941 struct list_head *iter_vcpu, *next_vcpu;
942 struct list_head *iter_sdom, *next_sdom;
943 struct csched_vcpu *svc;
944 struct csched_dom *sdom;
945 uint32_t credit_total;
946 uint32_t weight_total;
947 uint32_t weight_left;
948 uint32_t credit_fair;
949 uint32_t credit_peak;
950 uint32_t credit_cap;
951 int credit_balance;
952 int credit_xtra;
953 int credit;
956 spin_lock_irqsave(&prv->lock, flags);
958 weight_total = prv->weight;
959 credit_total = prv->credit;
961 /* Converge balance towards 0 when it drops negative */
962 if ( prv->credit_balance < 0 )
963 {
964 credit_total -= prv->credit_balance;
965 CSCHED_STAT_CRANK(acct_balance);
966 }
968 if ( unlikely(weight_total == 0) )
969 {
970 prv->credit_balance = 0;
971 spin_unlock_irqrestore(&prv->lock, flags);
972 CSCHED_STAT_CRANK(acct_no_work);
973 goto out;
974 }
976 CSCHED_STAT_CRANK(acct_run);
978 weight_left = weight_total;
979 credit_balance = 0;
980 credit_xtra = 0;
981 credit_cap = 0U;
983 list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
984 {
985 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
987 BUG_ON( is_idle_domain(sdom->dom) );
988 BUG_ON( sdom->active_vcpu_count == 0 );
989 BUG_ON( sdom->weight == 0 );
990 BUG_ON( (sdom->weight * sdom->active_vcpu_count) > weight_left );
992 weight_left -= ( sdom->weight * sdom->active_vcpu_count );
994 /*
995 * A domain's fair share is computed using its weight in competition
996 * with that of all other active domains.
997 *
998 * At most, a domain can use credits to run all its active VCPUs
999 * for one full accounting period. We allow a domain to earn more
1000 * only when the system-wide credit balance is negative.
1001 */
1002 credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
1003 if ( prv->credit_balance < 0 )
1005 credit_peak += ( ( -prv->credit_balance
1006 * sdom->weight
1007 * sdom->active_vcpu_count) +
1008 (weight_total - 1)
1009 ) / weight_total;
1012 if ( sdom->cap != 0U )
1014 credit_cap = ((sdom->cap * CSCHED_CREDITS_PER_ACCT) + 99) / 100;
1015 if ( credit_cap < credit_peak )
1016 credit_peak = credit_cap;
1018 /* FIXME -- set cap per-vcpu as well...? */
1019 credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 )
1020 ) / sdom->active_vcpu_count;
1023 credit_fair = ( ( credit_total
1024 * sdom->weight
1025 * sdom->active_vcpu_count )
1026 + (weight_total - 1)
1027 ) / weight_total;
1029 if ( credit_fair < credit_peak )
1031 credit_xtra = 1;
1033 else
1035 if ( weight_left != 0U )
1037 /* Give other domains a chance at unused credits */
1038 credit_total += ( ( ( credit_fair - credit_peak
1039 ) * weight_total
1040 ) + ( weight_left - 1 )
1041 ) / weight_left;
1044 if ( credit_xtra )
1046 /*
1047 * Lazily keep domains with extra credits at the head of
1048 * the queue to give others a chance at them in future
1049 * accounting periods.
1050 */
1051 CSCHED_STAT_CRANK(acct_reorder);
1052 list_del(&sdom->active_sdom_elem);
1053 list_add(&sdom->active_sdom_elem, &prv->active_sdom);
1056 credit_fair = credit_peak;
1059 /* Compute fair share per VCPU */
1060 credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
1061 ) / sdom->active_vcpu_count;
1064 list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
1066 svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
1067 BUG_ON( sdom != svc->sdom );
1069 /* Increment credit */
1070 atomic_add(credit_fair, &svc->credit);
1071 credit = atomic_read(&svc->credit);
1073 /*
1074 * Recompute priority or, if VCPU is idling, remove it from
1075 * the active list.
1076 */
1077 if ( credit < 0 )
1079 svc->pri = CSCHED_PRI_TS_OVER;
1081 /* Park running VCPUs of capped-out domains */
1082 if ( sdom->cap != 0U &&
1083 credit < -credit_cap &&
1084 !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
1086 CSCHED_STAT_CRANK(vcpu_park);
1087 vcpu_pause_nosync(svc->vcpu);
1088 svc->flags |= CSCHED_FLAG_VCPU_PARKED;
1091 /* Lower bound on credits */
1092 if ( credit < -CSCHED_CREDITS_PER_TSLICE )
1094 CSCHED_STAT_CRANK(acct_min_credit);
1095 credit = -CSCHED_CREDITS_PER_TSLICE;
1096 atomic_set(&svc->credit, credit);
1099 else
1101 svc->pri = CSCHED_PRI_TS_UNDER;
1103 /* Unpark any capped domains whose credits go positive */
1104 if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
1106 /*
1107 * It's important to unset the flag AFTER the unpause()
1108 * call to make sure the VCPU's priority is not boosted
1109 * if it is woken up here.
1110 */
1111 CSCHED_STAT_CRANK(vcpu_unpark);
1112 vcpu_unpause(svc->vcpu);
1113 svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
1116 /* Upper bound on credits means VCPU stops earning */
1117 if ( credit > CSCHED_CREDITS_PER_TSLICE )
1119 __csched_vcpu_acct_stop_locked(prv, svc);
1120 /* Divide credits in half, so that when it starts
1121 * accounting again, it starts a little bit "ahead" */
1122 credit /= 2;
1123 atomic_set(&svc->credit, credit);
1127 CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
1128 CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
1129 credit_balance += credit;
1133 prv->credit_balance = credit_balance;
1135 spin_unlock_irqrestore(&prv->lock, flags);
1137 /* Inform each CPU that its runq needs to be sorted */
1138 prv->runq_sort++;
1140 out:
1141 set_timer( &prv->master_ticker, NOW() +
1142 MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
1145 static void
1146 csched_tick(void *_cpu)
1148 unsigned int cpu = (unsigned long)_cpu;
1149 struct csched_pcpu *spc = CSCHED_PCPU(cpu);
1150 struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
1152 spc->tick++;
1154 /*
1155 * Accounting for running VCPU
1156 */
1157 if ( !is_idle_vcpu(current) )
1158 csched_vcpu_acct(prv, cpu);
1160 /*
1161 * Check if runq needs to be sorted
1163 * Every physical CPU resorts the runq after the accounting master has
1164 * modified priorities. This is a special O(n) sort and runs at most
1165 * once per accounting period (currently 30 milliseconds).
1166 */
1167 csched_runq_sort(prv, cpu);
1169 set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
1172 static struct csched_vcpu *
1173 csched_runq_steal(int peer_cpu, int cpu, int pri)
1175 const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
1176 const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
1177 struct csched_vcpu *speer;
1178 struct list_head *iter;
1179 struct vcpu *vc;
1181 /*
1182 * Don't steal from an idle CPU's runq because it's about to
1183 * pick up work from it itself.
1184 */
1185 if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
1187 list_for_each( iter, &peer_pcpu->runq )
1189 speer = __runq_elem(iter);
1191 /*
1192 * If next available VCPU here is not of strictly higher
1193 * priority than ours, this PCPU is useless to us.
1194 */
1195 if ( speer->pri <= pri )
1196 break;
1198 /* Is this VCPU is runnable on our PCPU? */
1199 vc = speer->vcpu;
1200 BUG_ON( is_idle_vcpu(vc) );
1202 if (__csched_vcpu_is_migrateable(vc, cpu))
1204 /* We got a candidate. Grab it! */
1205 CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
1206 CSCHED_STAT_CRANK(migrate_queued);
1207 WARN_ON(vc->is_urgent);
1208 __runq_remove(speer);
1209 vc->processor = cpu;
1210 return speer;
1215 CSCHED_STAT_CRANK(steal_peer_idle);
1216 return NULL;
1219 static struct csched_vcpu *
1220 csched_load_balance(struct csched_private *prv, int cpu,
1221 struct csched_vcpu *snext, bool_t *stolen)
1223 struct csched_vcpu *speer;
1224 cpumask_t workers;
1225 cpumask_t *online;
1226 int peer_cpu;
1228 BUG_ON( cpu != snext->vcpu->processor );
1229 online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
1231 /* If this CPU is going offline we shouldn't steal work. */
1232 if ( unlikely(!cpu_isset(cpu, *online)) )
1233 goto out;
1235 if ( snext->pri == CSCHED_PRI_IDLE )
1236 CSCHED_STAT_CRANK(load_balance_idle);
1237 else if ( snext->pri == CSCHED_PRI_TS_OVER )
1238 CSCHED_STAT_CRANK(load_balance_over);
1239 else
1240 CSCHED_STAT_CRANK(load_balance_other);
1242 /*
1243 * Peek at non-idling CPUs in the system, starting with our
1244 * immediate neighbour.
1245 */
1246 cpus_andnot(workers, *online, prv->idlers);
1247 cpu_clear(cpu, workers);
1248 peer_cpu = cpu;
1250 while ( !cpus_empty(workers) )
1252 peer_cpu = cycle_cpu(peer_cpu, workers);
1253 cpu_clear(peer_cpu, workers);
1255 /*
1256 * Get ahold of the scheduler lock for this peer CPU.
1258 * Note: We don't spin on this lock but simply try it. Spinning could
1259 * cause a deadlock if the peer CPU is also load balancing and trying
1260 * to lock this CPU.
1261 */
1262 if ( !pcpu_schedule_trylock(peer_cpu) )
1264 CSCHED_STAT_CRANK(steal_trylock_failed);
1265 continue;
1268 /*
1269 * Any work over there to steal?
1270 */
1271 speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
1272 pcpu_schedule_unlock(peer_cpu);
1273 if ( speer != NULL )
1275 *stolen = 1;
1276 return speer;
1280 out:
1281 /* Failed to find more important work elsewhere... */
1282 __runq_remove(snext);
1283 return snext;
1286 /*
1287 * This function is in the critical path. It is designed to be simple and
1288 * fast for the common case.
1289 */
1290 static struct task_slice
1291 csched_schedule(
1292 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
1294 const int cpu = smp_processor_id();
1295 struct list_head * const runq = RUNQ(cpu);
1296 struct csched_vcpu * const scurr = CSCHED_VCPU(current);
1297 struct csched_private *prv = CSCHED_PRIV(ops);
1298 struct csched_vcpu *snext;
1299 struct task_slice ret;
1301 CSCHED_STAT_CRANK(schedule);
1302 CSCHED_VCPU_CHECK(current);
1304 if ( !is_idle_vcpu(scurr->vcpu) )
1306 /* Update credits of a non-idle VCPU. */
1307 burn_credits(scurr, now);
1308 scurr->start_time -= now;
1310 else
1312 /* Re-instate a boosted idle VCPU as normal-idle. */
1313 scurr->pri = CSCHED_PRI_IDLE;
1316 /*
1317 * Select next runnable local VCPU (ie top of local runq)
1318 */
1319 if ( vcpu_runnable(current) )
1320 __runq_insert(cpu, scurr);
1321 else
1322 BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
1324 snext = __runq_elem(runq->next);
1325 ret.migrated = 0;
1327 /* Tasklet work (which runs in idle VCPU context) overrides all else. */
1328 if ( tasklet_work_scheduled )
1330 snext = CSCHED_VCPU(idle_vcpu[cpu]);
1331 snext->pri = CSCHED_PRI_TS_BOOST;
1334 /*
1335 * Clear YIELD flag before scheduling out
1336 */
1337 if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
1338 scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
1340 /*
1341 * SMP Load balance:
1343 * If the next highest priority local runnable VCPU has already eaten
1344 * through its credits, look on other PCPUs to see if we have more
1345 * urgent work... If not, csched_load_balance() will return snext, but
1346 * already removed from the runq.
1347 */
1348 if ( snext->pri > CSCHED_PRI_TS_OVER )
1349 __runq_remove(snext);
1350 else
1351 snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
1353 /*
1354 * Update idlers mask if necessary. When we're idling, other CPUs
1355 * will tickle us when they get extra work.
1356 */
1357 if ( snext->pri == CSCHED_PRI_IDLE )
1359 if ( !cpu_isset(cpu, prv->idlers) )
1360 cpu_set(cpu, prv->idlers);
1362 else if ( cpu_isset(cpu, prv->idlers) )
1364 cpu_clear(cpu, prv->idlers);
1367 if ( !is_idle_vcpu(snext->vcpu) )
1368 snext->start_time += now;
1370 /*
1371 * Return task to run next...
1372 */
1373 ret.time = (is_idle_vcpu(snext->vcpu) ?
1374 -1 : MILLISECS(CSCHED_MSECS_PER_TSLICE));
1375 ret.task = snext->vcpu;
1377 CSCHED_VCPU_CHECK(ret.task);
1378 return ret;
1381 static void
1382 csched_dump_vcpu(struct csched_vcpu *svc)
1384 struct csched_dom * const sdom = svc->sdom;
1386 printk("[%i.%i] pri=%i flags=%x cpu=%i",
1387 svc->vcpu->domain->domain_id,
1388 svc->vcpu->vcpu_id,
1389 svc->pri,
1390 svc->flags,
1391 svc->vcpu->processor);
1393 if ( sdom )
1395 printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
1396 #ifdef CSCHED_STATS
1397 printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
1398 svc->stats.credit_last,
1399 svc->stats.credit_incr,
1400 svc->stats.state_active,
1401 svc->stats.state_idle,
1402 svc->stats.migrate_q,
1403 svc->stats.migrate_r);
1404 #endif
1407 printk("\n");
1410 static void
1411 csched_dump_pcpu(const struct scheduler *ops, int cpu)
1413 struct list_head *runq, *iter;
1414 struct csched_pcpu *spc;
1415 struct csched_vcpu *svc;
1416 int loop;
1417 #define cpustr keyhandler_scratch
1419 spc = CSCHED_PCPU(cpu);
1420 runq = &spc->runq;
1422 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
1423 printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
1424 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
1425 printk("core=%s\n", cpustr);
1427 /* current VCPU */
1428 svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
1429 if ( svc )
1431 printk("\trun: ");
1432 csched_dump_vcpu(svc);
1435 loop = 0;
1436 list_for_each( iter, runq )
1438 svc = __runq_elem(iter);
1439 if ( svc )
1441 printk("\t%3d: ", ++loop);
1442 csched_dump_vcpu(svc);
1445 #undef cpustr
1448 static void
1449 csched_dump(const struct scheduler *ops)
1451 struct list_head *iter_sdom, *iter_svc;
1452 struct csched_private *prv = CSCHED_PRIV(ops);
1453 int loop;
1454 #define idlers_buf keyhandler_scratch
1456 printk("info:\n"
1457 "\tncpus = %u\n"
1458 "\tmaster = %u\n"
1459 "\tcredit = %u\n"
1460 "\tcredit balance = %d\n"
1461 "\tweight = %u\n"
1462 "\trunq_sort = %u\n"
1463 "\tdefault-weight = %d\n"
1464 "\tmsecs per tick = %dms\n"
1465 "\tcredits per msec = %d\n"
1466 "\tticks per tslice = %d\n"
1467 "\tticks per acct = %d\n"
1468 "\tmigration delay = %uus\n",
1469 prv->ncpus,
1470 prv->master,
1471 prv->credit,
1472 prv->credit_balance,
1473 prv->weight,
1474 prv->runq_sort,
1475 CSCHED_DEFAULT_WEIGHT,
1476 CSCHED_MSECS_PER_TICK,
1477 CSCHED_CREDITS_PER_MSEC,
1478 CSCHED_TICKS_PER_TSLICE,
1479 CSCHED_TICKS_PER_ACCT,
1480 vcpu_migration_delay);
1482 cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
1483 printk("idlers: %s\n", idlers_buf);
1485 printk("active vcpus:\n");
1486 loop = 0;
1487 list_for_each( iter_sdom, &prv->active_sdom )
1489 struct csched_dom *sdom;
1490 sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
1492 list_for_each( iter_svc, &sdom->active_vcpu )
1494 struct csched_vcpu *svc;
1495 svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
1497 printk("\t%3d: ", ++loop);
1498 csched_dump_vcpu(svc);
1501 #undef idlers_buf
1504 static int
1505 csched_init(struct scheduler *ops)
1507 struct csched_private *prv;
1509 prv = xmalloc(struct csched_private);
1510 if ( prv == NULL )
1511 return -ENOMEM;
1513 memset(prv, 0, sizeof(*prv));
1514 ops->sched_data = prv;
1515 spin_lock_init(&prv->lock);
1516 INIT_LIST_HEAD(&prv->active_sdom);
1517 prv->master = UINT_MAX;
1519 return 0;
1522 static void
1523 csched_deinit(const struct scheduler *ops)
1525 struct csched_private *prv;
1527 prv = CSCHED_PRIV(ops);
1528 if ( prv != NULL )
1529 xfree(prv);
1532 static void csched_tick_suspend(const struct scheduler *ops, unsigned int cpu)
1534 struct csched_pcpu *spc;
1536 spc = CSCHED_PCPU(cpu);
1538 stop_timer(&spc->ticker);
1541 static void csched_tick_resume(const struct scheduler *ops, unsigned int cpu)
1543 struct csched_pcpu *spc;
1544 uint64_t now = NOW();
1546 spc = CSCHED_PCPU(cpu);
1548 set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
1549 - now % MILLISECS(CSCHED_MSECS_PER_TICK) );
1552 static struct csched_private _csched_priv;
1554 const struct scheduler sched_credit_def = {
1555 .name = "SMP Credit Scheduler",
1556 .opt_name = "credit",
1557 .sched_id = XEN_SCHEDULER_CREDIT,
1558 .sched_data = &_csched_priv,
1560 .init_domain = csched_dom_init,
1561 .destroy_domain = csched_dom_destroy,
1563 .insert_vcpu = csched_vcpu_insert,
1564 .remove_vcpu = csched_vcpu_remove,
1566 .sleep = csched_vcpu_sleep,
1567 .wake = csched_vcpu_wake,
1568 .yield = csched_vcpu_yield,
1570 .adjust = csched_dom_cntl,
1572 .pick_cpu = csched_cpu_pick,
1573 .do_schedule = csched_schedule,
1575 .dump_cpu_state = csched_dump_pcpu,
1576 .dump_settings = csched_dump,
1577 .init = csched_init,
1578 .deinit = csched_deinit,
1579 .alloc_vdata = csched_alloc_vdata,
1580 .free_vdata = csched_free_vdata,
1581 .alloc_pdata = csched_alloc_pdata,
1582 .free_pdata = csched_free_pdata,
1583 .alloc_domdata = csched_alloc_domdata,
1584 .free_domdata = csched_free_domdata,
1586 .tick_suspend = csched_tick_suspend,
1587 .tick_resume = csched_tick_resume,
1588 };