debuggers.hg

view xen/common/sched_sedf.c @ 22848:6341fe0f4e5a

Added tag 4.1.0-rc2 for changeset 9dca60d88c63
author Keir Fraser <keir@xen.org>
date Tue Jan 25 14:06:55 2011 +0000 (2011-01-25)
parents 548c808be2a6
children
line source
1 /******************************************************************************
2 * Simple EDF scheduler for xen
3 *
4 * by Stephan Diestelhorst (C) 2004 Cambridge University
5 * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
6 */
8 #include <xen/lib.h>
9 #include <xen/sched.h>
10 #include <xen/sched-if.h>
11 #include <xen/timer.h>
12 #include <xen/softirq.h>
13 #include <xen/time.h>
14 #include <xen/errno.h>
16 /*verbosity settings*/
17 #define SEDFLEVEL 0
18 #define PRINT(_f, _a...) \
19 do { \
20 if ( (_f) <= SEDFLEVEL ) \
21 printk(_a ); \
22 } while ( 0 )
24 #define SEDF_CPUONLINE(_pool) \
25 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
27 #ifndef NDEBUG
28 #define SEDF_STATS
29 #define CHECK(_p) \
30 do { \
31 if ( !(_p) ) \
32 printk("Check '%s' failed, line %d, file %s\n", \
33 #_p , __LINE__, __FILE__); \
34 } while ( 0 )
35 #else
36 #define CHECK(_p) ((void)0)
37 #endif
39 #define EXTRA_NONE (0)
40 #define EXTRA_AWARE (1)
41 #define EXTRA_RUN_PEN (2)
42 #define EXTRA_RUN_UTIL (4)
43 #define EXTRA_WANT_PEN_Q (8)
44 #define EXTRA_PEN_Q (0)
45 #define EXTRA_UTIL_Q (1)
46 #define SEDF_ASLEEP (16)
48 #define EXTRA_QUANTUM (MICROSECS(500))
49 #define WEIGHT_PERIOD (MILLISECS(100))
50 #define WEIGHT_SAFETY (MILLISECS(5))
52 #define PERIOD_MAX MILLISECS(10000) /* 10s */
53 #define PERIOD_MIN (MICROSECS(10)) /* 10us */
54 #define SLICE_MIN (MICROSECS(5)) /* 5us */
56 #define IMPLY(a, b) (!(a) || (b))
57 #define EQ(a, b) ((!!(a)) == (!!(b)))
60 struct sedf_dom_info {
61 struct domain *domain;
62 };
64 struct sedf_vcpu_info {
65 struct vcpu *vcpu;
66 struct list_head list;
67 struct list_head extralist[2];
69 /*Parameters for EDF*/
70 s_time_t period; /*=(relative deadline)*/
71 s_time_t slice; /*=worst case execution time*/
73 /*Advaced Parameters*/
74 /*Latency Scaling*/
75 s_time_t period_orig;
76 s_time_t slice_orig;
77 s_time_t latency;
79 /*status of domain*/
80 int status;
81 /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
82 short weight;
83 short extraweight;
84 /*Bookkeeping*/
85 s_time_t deadl_abs;
86 s_time_t sched_start_abs;
87 s_time_t cputime;
88 /* times the domain un-/blocked */
89 s_time_t block_abs;
90 s_time_t unblock_abs;
92 /*scores for {util, block penalty}-weighted extratime distribution*/
93 int score[2];
94 s_time_t short_block_lost_tot;
96 /*Statistics*/
97 s_time_t extra_time_tot;
99 #ifdef SEDF_STATS
100 s_time_t block_time_tot;
101 s_time_t penalty_time_tot;
102 int block_tot;
103 int short_block_tot;
104 int long_block_tot;
105 int short_cont;
106 int pen_extra_blocks;
107 int pen_extra_slices;
108 #endif
109 };
111 struct sedf_cpu_info {
112 struct list_head runnableq;
113 struct list_head waitq;
114 struct list_head extraq[2];
115 s_time_t current_slice_expires;
116 };
118 #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
119 #define CPU_INFO(cpu) \
120 ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
121 #define LIST(d) (&EDOM_INFO(d)->list)
122 #define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
123 #define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
124 #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
125 #define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
126 #define IDLETASK(cpu) (idle_vcpu[cpu])
128 #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
130 #define DIV_UP(x,y) (((x) + (y) - 1) / y)
132 #define extra_runs(inf) ((inf->status) & 6)
133 #define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
134 #define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
137 static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
139 static inline int extraq_on(struct vcpu *d, int i)
140 {
141 return ((EXTRALIST(d,i)->next != NULL) &&
142 (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
143 }
145 static inline void extraq_add_head(struct vcpu *d, int i)
146 {
147 list_add(EXTRALIST(d,i), EXTRAQ(d->processor,i));
148 ASSERT(extraq_on(d, i));
149 }
151 static inline void extraq_add_tail(struct vcpu *d, int i)
152 {
153 list_add_tail(EXTRALIST(d,i), EXTRAQ(d->processor,i));
154 ASSERT(extraq_on(d, i));
155 }
157 static inline void extraq_del(struct vcpu *d, int i)
158 {
159 struct list_head *list = EXTRALIST(d,i);
160 ASSERT(extraq_on(d,i));
161 PRINT(3, "Removing domain %i.%i from L%i extraq\n",
162 d->domain->domain_id, d->vcpu_id, i);
163 list_del(list);
164 list->next = NULL;
165 ASSERT(!extraq_on(d, i));
166 }
168 /* adds a domain to the queue of processes which are aware of extra time. List
169 is sorted by score, where a lower score means higher priority for an extra
170 slice. It also updates the score, by simply subtracting a fixed value from
171 each entry, in order to avoid overflow. The algorithm works by simply
172 charging each domain that recieved extratime with an inverse of its weight.
173 */
174 static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub)
175 {
176 struct list_head *cur;
177 struct sedf_vcpu_info *curinf;
179 ASSERT(!extraq_on(d,i));
181 PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
182 " to L%i extraq\n",
183 d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
184 EDOM_INFO(d)->short_block_lost_tot, i);
186 /*
187 * Iterate through all elements to find our "hole" and on our way
188 * update all the other scores.
189 */
190 list_for_each ( cur, EXTRAQ(d->processor, i) )
191 {
192 curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
193 curinf->score[i] -= sub;
194 if ( EDOM_INFO(d)->score[i] < curinf->score[i] )
195 break;
196 PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
197 curinf->vcpu->domain->domain_id,
198 curinf->vcpu->vcpu_id, curinf->score[i]);
199 }
201 /* cur now contains the element, before which we'll enqueue. */
202 PRINT(3, "\tlist_add to %p\n", cur->prev);
203 list_add(EXTRALIST(d,i),cur->prev);
205 /* Continue updating the extraq. */
206 if ( (cur != EXTRAQ(d->processor,i)) && sub )
207 {
208 for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next )
209 {
210 curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]);
211 curinf->score[i] -= sub;
212 PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
213 curinf->vcpu->domain->domain_id,
214 curinf->vcpu->vcpu_id, curinf->score[i]);
215 }
216 }
218 ASSERT(extraq_on(d,i));
219 }
220 static inline void extraq_check(struct vcpu *d)
221 {
222 if ( extraq_on(d, EXTRA_UTIL_Q) )
223 {
224 PRINT(2,"Dom %i.%i is on L1 extraQ\n",
225 d->domain->domain_id, d->vcpu_id);
227 if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) &&
228 !extra_runs(EDOM_INFO(d)) )
229 {
230 extraq_del(d, EXTRA_UTIL_Q);
231 PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
232 d->domain->domain_id, d->vcpu_id);
233 }
234 }
235 else
236 {
237 PRINT(2, "Dom %i.%i is NOT on L1 extraQ\n",
238 d->domain->domain_id,
239 d->vcpu_id);
241 if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) )
242 {
243 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
244 PRINT(2,"Added dom %i.%i to L1 extraQ\n",
245 d->domain->domain_id, d->vcpu_id);
246 }
247 }
248 }
250 static inline void extraq_check_add_unblocked(struct vcpu *d, int priority)
251 {
252 struct sedf_vcpu_info *inf = EDOM_INFO(d);
254 if ( inf->status & EXTRA_AWARE )
255 /* Put on the weighted extraq without updating any scores. */
256 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
257 }
259 static inline int __task_on_queue(struct vcpu *d)
260 {
261 return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
262 }
264 static inline void __del_from_queue(struct vcpu *d)
265 {
266 struct list_head *list = LIST(d);
267 ASSERT(__task_on_queue(d));
268 PRINT(3,"Removing domain %i.%i (bop= %"PRIu64") from runq/waitq\n",
269 d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
270 list_del(list);
271 list->next = NULL;
272 ASSERT(!__task_on_queue(d));
273 }
275 typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
277 static inline void list_insert_sort(
278 struct list_head *list, struct list_head *element, list_comparer comp)
279 {
280 struct list_head *cur;
282 /* Iterate through all elements to find our "hole". */
283 list_for_each( cur, list )
284 if ( comp(element, cur) < 0 )
285 break;
287 /* cur now contains the element, before which we'll enqueue. */
288 PRINT(3,"\tlist_add to %p\n",cur->prev);
289 list_add(element, cur->prev);
290 }
292 #define DOMAIN_COMPARER(name, field, comp1, comp2) \
293 static int name##_comp(struct list_head* el1, struct list_head* el2) \
294 { \
295 struct sedf_vcpu_info *d1, *d2; \
296 d1 = list_entry(el1,struct sedf_vcpu_info, field); \
297 d2 = list_entry(el2,struct sedf_vcpu_info, field); \
298 if ( (comp1) == (comp2) ) \
299 return 0; \
300 if ( (comp1) < (comp2) ) \
301 return -1; \
302 else \
303 return 1; \
304 }
306 /* adds a domain to the queue of processes which wait for the beginning of the
307 next period; this list is therefore sortet by this time, which is simply
308 absol. deadline - period
309 */
310 DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2));
311 static inline void __add_to_waitqueue_sort(struct vcpu *v)
312 {
313 ASSERT(!__task_on_queue(v));
314 PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
315 v->domain->domain_id, v->vcpu_id, PERIOD_BEGIN(EDOM_INFO(v)));
316 list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp);
317 ASSERT(__task_on_queue(v));
318 }
320 /* adds a domain to the queue of processes which have started their current
321 period and are runnable (i.e. not blocked, dieing,...). The first element
322 on this list is running on the processor, if the list is empty the idle
323 task will run. As we are implementing EDF, this list is sorted by deadlines.
324 */
325 DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs);
326 static inline void __add_to_runqueue_sort(struct vcpu *v)
327 {
328 PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
329 v->domain->domain_id, v->vcpu_id, EDOM_INFO(v)->deadl_abs);
330 list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp);
331 }
334 static void *sedf_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd)
335 {
336 struct sedf_vcpu_info *inf;
338 inf = xmalloc(struct sedf_vcpu_info);
339 if ( inf == NULL )
340 return NULL;
342 memset(inf, 0, sizeof(struct sedf_vcpu_info));
343 inf->vcpu = v;
345 /* Every VCPU gets an equal share of extratime by default. */
346 inf->deadl_abs = 0;
347 inf->latency = 0;
348 inf->status = EXTRA_AWARE | SEDF_ASLEEP;
349 inf->extraweight = 1;
351 if ( v->domain->domain_id == 0 )
352 {
353 /* Domain0 gets 75% guaranteed (15ms every 20ms). */
354 inf->period = MILLISECS(20);
355 inf->slice = MILLISECS(15);
356 }
357 else
358 {
359 /* Best-effort extratime only. */
360 inf->period = WEIGHT_PERIOD;
361 inf->slice = 0;
362 }
364 inf->period_orig = inf->period; inf->slice_orig = inf->slice;
365 INIT_LIST_HEAD(&(inf->list));
366 INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
367 INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
369 if ( !is_idle_vcpu(v) )
370 {
371 extraq_check(v);
372 }
373 else
374 {
375 inf->deadl_abs = 0;
376 inf->status &= ~SEDF_ASLEEP;
377 }
379 return inf;
380 }
382 static void *
383 sedf_alloc_pdata(const struct scheduler *ops, int cpu)
384 {
385 struct sedf_cpu_info *spc;
387 spc = xmalloc(struct sedf_cpu_info);
388 BUG_ON(spc == NULL);
389 memset(spc, 0, sizeof(*spc));
390 INIT_LIST_HEAD(&spc->waitq);
391 INIT_LIST_HEAD(&spc->runnableq);
392 INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
393 INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
395 return (void *)spc;
396 }
398 static void
399 sedf_free_pdata(const struct scheduler *ops, void *spc, int cpu)
400 {
401 if ( spc == NULL )
402 return;
404 xfree(spc);
405 }
407 static void sedf_free_vdata(const struct scheduler *ops, void *priv)
408 {
409 xfree(priv);
410 }
412 static void *
413 sedf_alloc_domdata(const struct scheduler *ops, struct domain *d)
414 {
415 void *mem;
417 mem = xmalloc(struct sedf_dom_info);
418 if ( mem == NULL )
419 return NULL;
421 memset(mem, 0, sizeof(struct sedf_dom_info));
423 return mem;
424 }
426 static int sedf_init_domain(const struct scheduler *ops, struct domain *d)
427 {
428 d->sched_priv = sedf_alloc_domdata(ops, d);
429 if ( d->sched_priv == NULL )
430 return -ENOMEM;
432 return 0;
433 }
435 static void sedf_free_domdata(const struct scheduler *ops, void *data)
436 {
437 xfree(data);
438 }
440 static void sedf_destroy_domain(const struct scheduler *ops, struct domain *d)
441 {
442 sedf_free_domdata(ops, d->sched_priv);
443 }
445 static int sedf_pick_cpu(const struct scheduler *ops, struct vcpu *v)
446 {
447 cpumask_t online_affinity;
448 cpumask_t *online;
450 online = SEDF_CPUONLINE(v->domain->cpupool);
451 cpus_and(online_affinity, v->cpu_affinity, *online);
452 return first_cpu(online_affinity);
453 }
455 /*
456 * Handles the rescheduling & bookkeeping of domains running in their
457 * guaranteed timeslice.
458 */
459 static void desched_edf_dom(s_time_t now, struct vcpu* d)
460 {
461 struct sedf_vcpu_info* inf = EDOM_INFO(d);
463 /* Current domain is running in real time mode. */
464 ASSERT(__task_on_queue(d));
466 /* Update the domain's cputime. */
467 inf->cputime += now - inf->sched_start_abs;
469 /*
470 * Scheduling decisions which don't remove the running domain from the
471 * runq.
472 */
473 if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
474 return;
476 __del_from_queue(d);
478 /*
479 * Manage bookkeeping (i.e. calculate next deadline, memorise
480 * overrun-time of slice) of finished domains.
481 */
482 if ( inf->cputime >= inf->slice )
483 {
484 inf->cputime -= inf->slice;
486 if ( inf->period < inf->period_orig )
487 {
488 /* This domain runs in latency scaling or burst mode. */
489 inf->period *= 2;
490 inf->slice *= 2;
491 if ( (inf->period > inf->period_orig) ||
492 (inf->slice > inf->slice_orig) )
493 {
494 /* Reset slice and period. */
495 inf->period = inf->period_orig;
496 inf->slice = inf->slice_orig;
497 }
498 }
500 /* Set next deadline. */
501 inf->deadl_abs += inf->period;
502 }
504 /* Add a runnable domain to the waitqueue. */
505 if ( sedf_runnable(d) )
506 {
507 __add_to_waitqueue_sort(d);
508 }
509 else
510 {
511 /* We have a blocked realtime task -> remove it from exqs too. */
512 if ( extraq_on(d, EXTRA_PEN_Q) )
513 extraq_del(d, EXTRA_PEN_Q);
514 if ( extraq_on(d, EXTRA_UTIL_Q) )
515 extraq_del(d, EXTRA_UTIL_Q);
516 }
518 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
519 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
520 sedf_runnable(d)));
521 }
524 /* Update all elements on the queues */
525 static void update_queues(
526 s_time_t now, struct list_head *runq, struct list_head *waitq)
527 {
528 struct list_head *cur, *tmp;
529 struct sedf_vcpu_info *curinf;
531 PRINT(3,"Updating waitq..\n");
533 /*
534 * Check for the first elements of the waitqueue, whether their
535 * next period has already started.
536 */
537 list_for_each_safe ( cur, tmp, waitq )
538 {
539 curinf = list_entry(cur, struct sedf_vcpu_info, list);
540 PRINT(4,"\tLooking @ dom %i.%i\n",
541 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
542 if ( PERIOD_BEGIN(curinf) > now )
543 break;
544 __del_from_queue(curinf->vcpu);
545 __add_to_runqueue_sort(curinf->vcpu);
546 }
548 PRINT(3,"Updating runq..\n");
550 /* Process the runq, find domains that are on the runq that shouldn't. */
551 list_for_each_safe ( cur, tmp, runq )
552 {
553 curinf = list_entry(cur,struct sedf_vcpu_info,list);
554 PRINT(4,"\tLooking @ dom %i.%i\n",
555 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
557 if ( unlikely(curinf->slice == 0) )
558 {
559 /* Ignore domains with empty slice. */
560 PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
561 curinf->vcpu->domain->domain_id,
562 curinf->vcpu->vcpu_id);
563 __del_from_queue(curinf->vcpu);
565 /* Move them to their next period. */
566 curinf->deadl_abs += curinf->period;
568 /* Ensure that the start of the next period is in the future. */
569 if ( unlikely(PERIOD_BEGIN(curinf) < now) )
570 curinf->deadl_abs +=
571 (DIV_UP(now - PERIOD_BEGIN(curinf),
572 curinf->period)) * curinf->period;
574 /* Put them back into the queue. */
575 __add_to_waitqueue_sort(curinf->vcpu);
576 }
577 else if ( unlikely((curinf->deadl_abs < now) ||
578 (curinf->cputime > curinf->slice)) )
579 {
580 /*
581 * We missed the deadline or the slice was already finished.
582 * Might hapen because of dom_adj.
583 */
584 PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
585 "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
586 " cputime: %"PRIu64"\n",
587 curinf->vcpu->domain->domain_id,
588 curinf->vcpu->vcpu_id,
589 curinf->deadl_abs, curinf->slice, now,
590 curinf->cputime);
591 __del_from_queue(curinf->vcpu);
593 /* Common case: we miss one period. */
594 curinf->deadl_abs += curinf->period;
596 /*
597 * If we are still behind: modulo arithmetic, force deadline
598 * to be in future and aligned to period borders.
599 */
600 if ( unlikely(curinf->deadl_abs < now) )
601 curinf->deadl_abs +=
602 DIV_UP(now - curinf->deadl_abs,
603 curinf->period) * curinf->period;
604 ASSERT(curinf->deadl_abs >= now);
606 /* Give a fresh slice. */
607 curinf->cputime = 0;
608 if ( PERIOD_BEGIN(curinf) > now )
609 __add_to_waitqueue_sort(curinf->vcpu);
610 else
611 __add_to_runqueue_sort(curinf->vcpu);
612 }
613 else
614 break;
615 }
617 PRINT(3,"done updating the queues\n");
618 }
621 /* removes a domain from the head of the according extraQ and
622 requeues it at a specified position:
623 round-robin extratime: end of extraQ
624 weighted ext.: insert in sorted list by score
625 if the domain is blocked / has regained its short-block-loss
626 time it is not put on any queue */
627 static void desched_extra_dom(s_time_t now, struct vcpu *d)
628 {
629 struct sedf_vcpu_info *inf = EDOM_INFO(d);
630 int i = extra_get_cur_q(inf);
631 unsigned long oldscore;
633 ASSERT(extraq_on(d, i));
635 /* Unset all running flags. */
636 inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
637 /* Fresh slice for the next run. */
638 inf->cputime = 0;
639 /* Accumulate total extratime. */
640 inf->extra_time_tot += now - inf->sched_start_abs;
641 /* Remove extradomain from head of the queue. */
642 extraq_del(d, i);
644 /* Update the score. */
645 oldscore = inf->score[i];
646 if ( i == EXTRA_PEN_Q )
647 {
648 /*domain was running in L0 extraq*/
649 /*reduce block lost, probably more sophistication here!*/
650 /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
651 inf->short_block_lost_tot -= now - inf->sched_start_abs;
652 PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
653 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
654 inf->short_block_lost_tot);
655 #if 0
656 /*
657 * KAF: If we don't exit short-blocking state at this point
658 * domain0 can steal all CPU for up to 10 seconds before
659 * scheduling settles down (when competing against another
660 * CPU-bound domain). Doing this seems to make things behave
661 * nicely. Noone gets starved by default.
662 */
663 if ( inf->short_block_lost_tot <= 0 )
664 #endif
665 {
666 PRINT(4,"Domain %i.%i compensated short block loss!\n",
667 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
668 /*we have (over-)compensated our block penalty*/
669 inf->short_block_lost_tot = 0;
670 /*we don't want a place on the penalty queue anymore!*/
671 inf->status &= ~EXTRA_WANT_PEN_Q;
672 goto check_extra_queues;
673 }
675 /*we have to go again for another try in the block-extraq,
676 the score is not used incremantally here, as this is
677 already done by recalculating the block_lost*/
678 inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
679 inf->short_block_lost_tot;
680 oldscore = 0;
681 }
682 else
683 {
684 /*domain was running in L1 extraq => score is inverse of
685 utilization and is used somewhat incremental!*/
686 if ( !inf->extraweight )
687 /*NB: use fixed point arithmetic with 10 bits*/
688 inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
689 inf->slice;
690 else
691 /*conversion between realtime utilisation and extrawieght:
692 full (ie 100%) utilization is equivalent to 128 extraweight*/
693 inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
694 }
696 check_extra_queues:
697 /* Adding a runnable domain to the right queue and removing blocked ones*/
698 if ( sedf_runnable(d) )
699 {
700 /*add according to score: weighted round robin*/
701 if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
702 ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
703 extraq_add_sort_update(d, i, oldscore);
704 }
705 else
706 {
707 /*remove this blocked domain from the waitq!*/
708 __del_from_queue(d);
709 /*make sure that we remove a blocked domain from the other
710 extraq too*/
711 if ( i == EXTRA_PEN_Q )
712 {
713 if ( extraq_on(d, EXTRA_UTIL_Q) )
714 extraq_del(d, EXTRA_UTIL_Q);
715 }
716 else
717 {
718 if ( extraq_on(d, EXTRA_PEN_Q) )
719 extraq_del(d, EXTRA_PEN_Q);
720 }
721 }
723 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
724 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
725 sedf_runnable(d)));
726 }
729 static struct task_slice sedf_do_extra_schedule(
730 s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
731 {
732 struct task_slice ret;
733 struct sedf_vcpu_info *runinf;
734 ASSERT(end_xt > now);
736 /* Enough time left to use for extratime? */
737 if ( end_xt - now < EXTRA_QUANTUM )
738 goto return_idle;
740 if ( !list_empty(extraq[EXTRA_PEN_Q]) )
741 {
742 /*we still have elements on the level 0 extraq
743 => let those run first!*/
744 runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
745 struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
746 runinf->status |= EXTRA_RUN_PEN;
747 ret.task = runinf->vcpu;
748 ret.time = EXTRA_QUANTUM;
749 #ifdef SEDF_STATS
750 runinf->pen_extra_slices++;
751 #endif
752 }
753 else
754 {
755 if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
756 {
757 /*use elements from the normal extraqueue*/
758 runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
759 struct sedf_vcpu_info,
760 extralist[EXTRA_UTIL_Q]);
761 runinf->status |= EXTRA_RUN_UTIL;
762 ret.task = runinf->vcpu;
763 ret.time = EXTRA_QUANTUM;
764 }
765 else
766 goto return_idle;
767 }
769 ASSERT(ret.time > 0);
770 ASSERT(sedf_runnable(ret.task));
771 return ret;
773 return_idle:
774 ret.task = IDLETASK(cpu);
775 ret.time = end_xt - now;
776 ASSERT(ret.time > 0);
777 ASSERT(sedf_runnable(ret.task));
778 return ret;
779 }
782 /* Main scheduling function
783 Reasons for calling this function are:
784 -timeslice for the current period used up
785 -domain on waitqueue has started it's period
786 -and various others ;) in general: determine which domain to run next*/
787 static struct task_slice sedf_do_schedule(
788 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
789 {
790 int cpu = smp_processor_id();
791 struct list_head *runq = RUNQ(cpu);
792 struct list_head *waitq = WAITQ(cpu);
793 struct sedf_vcpu_info *inf = EDOM_INFO(current);
794 struct list_head *extraq[] = {
795 EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
796 struct sedf_vcpu_info *runinf, *waitinf;
797 struct task_slice ret;
799 /*idle tasks don't need any of the following stuf*/
800 if ( is_idle_vcpu(current) )
801 goto check_waitq;
803 /* create local state of the status of the domain, in order to avoid
804 inconsistent state during scheduling decisions, because data for
805 vcpu_runnable is not protected by the scheduling lock!*/
806 if ( !vcpu_runnable(current) )
807 inf->status |= SEDF_ASLEEP;
809 if ( inf->status & SEDF_ASLEEP )
810 inf->block_abs = now;
812 if ( unlikely(extra_runs(inf)) )
813 {
814 /*special treatment of domains running in extra time*/
815 desched_extra_dom(now, current);
816 }
817 else
818 {
819 desched_edf_dom(now, current);
820 }
821 check_waitq:
822 update_queues(now, runq, waitq);
824 /*now simply pick the first domain from the runqueue, which has the
825 earliest deadline, because the list is sorted*/
827 /* Tasklet work (which runs in idle VCPU context) overrides all else. */
828 if ( tasklet_work_scheduled ||
829 (list_empty(runq) && list_empty(waitq)) ||
830 unlikely(!cpu_isset(cpu, *SEDF_CPUONLINE(per_cpu(cpupool, cpu)))) )
831 {
832 ret.task = IDLETASK(cpu);
833 ret.time = SECONDS(1);
834 }
835 else if ( !list_empty(runq) )
836 {
837 runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
838 ret.task = runinf->vcpu;
839 if ( !list_empty(waitq) )
840 {
841 waitinf = list_entry(waitq->next,
842 struct sedf_vcpu_info,list);
843 /*rerun scheduler, when scheduled domain reaches it's
844 end of slice or the first domain from the waitqueue
845 gets ready*/
846 ret.time = MIN(now + runinf->slice - runinf->cputime,
847 PERIOD_BEGIN(waitinf)) - now;
848 }
849 else
850 {
851 ret.time = runinf->slice - runinf->cputime;
852 }
853 }
854 else
855 {
856 waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
857 /*we could not find any suitable domain
858 => look for domains that are aware of extratime*/
859 ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
860 extraq, cpu);
861 }
863 /*TODO: Do something USEFUL when this happens and find out, why it
864 still can happen!!!*/
865 if ( ret.time < 0)
866 {
867 printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
868 ret.time);
869 ret.time = EXTRA_QUANTUM;
870 }
872 ret.migrated = 0;
874 EDOM_INFO(ret.task)->sched_start_abs = now;
875 CHECK(ret.time > 0);
876 ASSERT(sedf_runnable(ret.task));
877 CPU_INFO(cpu)->current_slice_expires = now + ret.time;
878 return ret;
879 }
882 static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
883 {
884 PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
885 d->domain->domain_id, d->vcpu_id);
887 if ( is_idle_vcpu(d) )
888 return;
890 EDOM_INFO(d)->status |= SEDF_ASLEEP;
892 if ( per_cpu(schedule_data, d->processor).curr == d )
893 {
894 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
895 }
896 else
897 {
898 if ( __task_on_queue(d) )
899 __del_from_queue(d);
900 if ( extraq_on(d, EXTRA_UTIL_Q) )
901 extraq_del(d, EXTRA_UTIL_Q);
902 if ( extraq_on(d, EXTRA_PEN_Q) )
903 extraq_del(d, EXTRA_PEN_Q);
904 }
905 }
908 /* This function wakes up a domain, i.e. moves them into the waitqueue
909 * things to mention are: admission control is taking place nowhere at
910 * the moment, so we can't be sure, whether it is safe to wake the domain
911 * up at all. Anyway, even if it is safe (total cpu usage <=100%) there are
912 * some considerations on when to allow the domain to wake up and have it's
913 * first deadline...
914 * I detected 3 cases, which could describe the possible behaviour of the
915 * scheduler,
916 * and I'll try to make them more clear:
917 *
918 * 1. Very conservative
919 * -when a blocked domain unblocks, it is allowed to start execution at
920 * the beginning of the next complete period
921 * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
922 *
923 * DRRB_____D__U_____DRRRRR___D________ ...
924 *
925 * -this causes the domain to miss a period (and a deadlline)
926 * -doesn't disturb the schedule at all
927 * -deadlines keep occuring isochronous
928 *
929 * 2. Conservative Part 1: Short Unblocking
930 * -when a domain unblocks in the same period as it was blocked it
931 * unblocks and may consume the rest of it's original time-slice minus
932 * the time it was blocked
933 * (assume period=9, slice=5)
934 *
935 * DRB_UR___DRRRRR___D...
936 *
937 * -this also doesn't disturb scheduling, but might lead to the fact, that
938 * the domain can't finish it's workload in the period
939 * -in addition to that the domain can be treated prioritised when
940 * extratime is available
941 * -addition: experiments have shown that this may have a HUGE impact on
942 * performance of other domains, becaus it can lead to excessive context
943 * switches
944 *
945 * Part2: Long Unblocking
946 * Part 2a
947 * -it is obvious that such accounting of block time, applied when
948 * unblocking is happening in later periods, works fine aswell
949 * -the domain is treated as if it would have been running since the start
950 * of its new period
951 *
952 * DRB______D___UR___D...
953 *
954 * Part 2b
955 * -if one needs the full slice in the next period, it is necessary to
956 * treat the unblocking time as the start of the new period, i.e. move
957 * the deadline further back (later)
958 * -this doesn't disturb scheduling as well, because for EDF periods can
959 * be treated as minimal inter-release times and scheduling stays
960 * correct, when deadlines are kept relative to the time the process
961 * unblocks
962 *
963 * DRB______D___URRRR___D...<prev [Thread] next>
964 * (D) <- old deadline was here
965 * -problem: deadlines don't occur isochronous anymore
966 * Part 2c (Improved Atropos design)
967 * -when a domain unblocks it is given a very short period (=latency hint)
968 * and slice length scaled accordingly
969 * -both rise again to the original value (e.g. get doubled every period)
970 *
971 * 3. Unconservative (i.e. incorrect)
972 * -to boost the performance of I/O dependent domains it would be possible
973 * to put the domain into the runnable queue immediately, and let it run
974 * for the remainder of the slice of the current period
975 * (or even worse: allocate a new full slice for the domain)
976 * -either behaviour can lead to missed deadlines in other domains as
977 * opposed to approaches 1,2a,2b
978 */
979 static void unblock_short_extra_support(
980 struct sedf_vcpu_info* inf, s_time_t now)
981 {
982 /*this unblocking scheme tries to support the domain, by assigning it
983 a priority in extratime distribution according to the loss of time
984 in this slice due to blocking*/
985 s_time_t pen;
987 /*no more realtime execution in this period!*/
988 inf->deadl_abs += inf->period;
989 if ( likely(inf->block_abs) )
990 {
991 //treat blocked time as consumed by the domain*/
992 /*inf->cputime += now - inf->block_abs;*/
993 /*penalty is time the domain would have
994 had if it continued to run */
995 pen = (inf->slice - inf->cputime);
996 if ( pen < 0 )
997 pen = 0;
998 /*accumulate all penalties over the periods*/
999 /*inf->short_block_lost_tot += pen;*/
1000 /*set penalty to the current value*/
1001 inf->short_block_lost_tot = pen;
1002 /*not sure which one is better.. but seems to work well...*/
1004 if ( inf->short_block_lost_tot )
1006 inf->score[0] = (inf->period << 10) /
1007 inf->short_block_lost_tot;
1008 #ifdef SEDF_STATS
1009 inf->pen_extra_blocks++;
1010 #endif
1011 if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
1012 /*remove domain for possible resorting!*/
1013 extraq_del(inf->vcpu, EXTRA_PEN_Q);
1014 else
1015 /*remember that we want to be on the penalty q
1016 so that we can continue when we (un-)block
1017 in penalty-extratime*/
1018 inf->status |= EXTRA_WANT_PEN_Q;
1020 /*(re-)add domain to the penalty extraq*/
1021 extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
1025 /*give it a fresh slice in the next period!*/
1026 inf->cputime = 0;
1030 static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
1032 /*Conservative 2b*/
1033 /*Treat the unblocking time as a start of a new period */
1034 inf->deadl_abs = now + inf->period;
1035 inf->cputime = 0;
1039 #define DOMAIN_EDF 1
1040 #define DOMAIN_EXTRA_PEN 2
1041 #define DOMAIN_EXTRA_UTIL 3
1042 #define DOMAIN_IDLE 4
1043 static inline int get_run_type(struct vcpu* d)
1045 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1046 if (is_idle_vcpu(d))
1047 return DOMAIN_IDLE;
1048 if (inf->status & EXTRA_RUN_PEN)
1049 return DOMAIN_EXTRA_PEN;
1050 if (inf->status & EXTRA_RUN_UTIL)
1051 return DOMAIN_EXTRA_UTIL;
1052 return DOMAIN_EDF;
1056 /*Compares two domains in the relation of whether the one is allowed to
1057 interrupt the others execution.
1058 It returns true (!=0) if a switch to the other domain is good.
1059 Current Priority scheme is as follows:
1060 EDF > L0 (penalty based) extra-time >
1061 L1 (utilization) extra-time > idle-domain
1062 In the same class priorities are assigned as following:
1063 EDF: early deadline > late deadline
1064 L0 extra-time: lower score > higher score*/
1065 static inline int should_switch(struct vcpu *cur,
1066 struct vcpu *other,
1067 s_time_t now)
1069 struct sedf_vcpu_info *cur_inf, *other_inf;
1070 cur_inf = EDOM_INFO(cur);
1071 other_inf = EDOM_INFO(other);
1073 /* Check whether we need to make an earlier scheduling decision. */
1074 if ( PERIOD_BEGIN(other_inf) <
1075 CPU_INFO(other->processor)->current_slice_expires )
1076 return 1;
1078 /* No timing-based switches need to be taken into account here. */
1079 switch ( get_run_type(cur) )
1081 case DOMAIN_EDF:
1082 /* Do not interrupt a running EDF domain. */
1083 return 0;
1084 case DOMAIN_EXTRA_PEN:
1085 /* Check whether we also want the L0 ex-q with lower score. */
1086 return ((other_inf->status & EXTRA_WANT_PEN_Q) &&
1087 (other_inf->score[EXTRA_PEN_Q] <
1088 cur_inf->score[EXTRA_PEN_Q]));
1089 case DOMAIN_EXTRA_UTIL:
1090 /* Check whether we want the L0 extraq. Don't
1091 * switch if both domains want L1 extraq.
1092 */
1093 return !!(other_inf->status & EXTRA_WANT_PEN_Q);
1094 case DOMAIN_IDLE:
1095 return 1;
1098 return 1;
1101 static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
1103 s_time_t now = NOW();
1104 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1106 PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
1107 d->vcpu_id);
1109 if ( unlikely(is_idle_vcpu(d)) )
1110 return;
1112 if ( unlikely(__task_on_queue(d)) )
1114 PRINT(3,"\tdomain %i.%i is already in some queue\n",
1115 d->domain->domain_id, d->vcpu_id);
1116 return;
1119 ASSERT(!sedf_runnable(d));
1120 inf->status &= ~SEDF_ASLEEP;
1121 ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
1122 ASSERT(!extraq_on(d, EXTRA_PEN_Q));
1124 if ( unlikely(inf->deadl_abs == 0) )
1126 /*initial setup of the deadline*/
1127 inf->deadl_abs = now + inf->slice;
1130 PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1131 "now= %"PRIu64")\n",
1132 d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
1134 #ifdef SEDF_STATS
1135 inf->block_tot++;
1136 #endif
1138 if ( unlikely(now < PERIOD_BEGIN(inf)) )
1140 PRINT(4,"extratime unblock\n");
1141 /* unblocking in extra-time! */
1142 if ( inf->status & EXTRA_WANT_PEN_Q )
1144 /*we have a domain that wants compensation
1145 for block penalty and did just block in
1146 its compensation time. Give it another
1147 chance!*/
1148 extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
1150 extraq_check_add_unblocked(d, 0);
1152 else
1154 if ( now < inf->deadl_abs )
1156 PRINT(4,"short unblocking\n");
1157 /*short blocking*/
1158 #ifdef SEDF_STATS
1159 inf->short_block_tot++;
1160 #endif
1161 unblock_short_extra_support(inf, now);
1163 extraq_check_add_unblocked(d, 1);
1165 else
1167 PRINT(4,"long unblocking\n");
1168 /*long unblocking*/
1169 #ifdef SEDF_STATS
1170 inf->long_block_tot++;
1171 #endif
1172 unblock_long_cons_b(inf, now);
1174 extraq_check_add_unblocked(d, 1);
1178 PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1179 "now= %"PRIu64")\n",
1180 d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
1181 inf->period, now);
1183 if ( PERIOD_BEGIN(inf) > now )
1185 __add_to_waitqueue_sort(d);
1186 PRINT(3,"added to waitq\n");
1188 else
1190 __add_to_runqueue_sort(d);
1191 PRINT(3,"added to runq\n");
1194 #ifdef SEDF_STATS
1195 /*do some statistics here...*/
1196 if ( inf->block_abs != 0 )
1198 inf->block_time_tot += now - inf->block_abs;
1199 inf->penalty_time_tot +=
1200 PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
1202 #endif
1204 /*sanity check: make sure each extra-aware domain IS on the util-q!*/
1205 ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
1206 ASSERT(__task_on_queue(d));
1207 /*check whether the awakened task needs to invoke the do_schedule
1208 routine. Try to avoid unnecessary runs but:
1209 Save approximation: Always switch to scheduler!*/
1210 ASSERT(d->processor >= 0);
1211 ASSERT(d->processor < NR_CPUS);
1212 ASSERT(per_cpu(schedule_data, d->processor).curr);
1214 if ( should_switch(per_cpu(schedule_data, d->processor).curr, d, now) )
1215 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
1219 /* Print a lot of useful information about a domains in the system */
1220 static void sedf_dump_domain(struct vcpu *d)
1222 printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
1223 d->is_running ? 'T':'F');
1224 printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
1225 " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
1226 EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
1227 EDOM_INFO(d)->weight,
1228 EDOM_INFO(d)->score[EXTRA_UTIL_Q],
1229 (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
1230 EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
1232 #ifdef SEDF_STATS
1233 if ( EDOM_INFO(d)->block_time_tot != 0 )
1234 printk(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
1235 EDOM_INFO(d)->block_time_tot);
1236 if ( EDOM_INFO(d)->block_tot != 0 )
1237 printk("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
1238 "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
1239 EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
1240 (EDOM_INFO(d)->short_block_tot * 100)
1241 / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
1242 (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
1243 EDOM_INFO(d)->pen_extra_blocks,
1244 EDOM_INFO(d)->pen_extra_slices,
1245 EDOM_INFO(d)->long_block_tot,
1246 (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
1247 (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
1248 (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
1249 #endif
1250 printk("\n");
1254 /* dumps all domains on the specified cpu */
1255 static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
1257 struct list_head *list, *queue, *tmp;
1258 struct sedf_vcpu_info *d_inf;
1259 struct domain *d;
1260 struct vcpu *ed;
1261 int loop = 0;
1263 printk("now=%"PRIu64"\n",NOW());
1264 queue = RUNQ(i);
1265 printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1266 (unsigned long) queue->next, (unsigned long) queue->prev);
1267 list_for_each_safe ( list, tmp, queue )
1269 printk("%3d: ",loop++);
1270 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1271 sedf_dump_domain(d_inf->vcpu);
1274 queue = WAITQ(i); loop = 0;
1275 printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1276 (unsigned long) queue->next, (unsigned long) queue->prev);
1277 list_for_each_safe ( list, tmp, queue )
1279 printk("%3d: ",loop++);
1280 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1281 sedf_dump_domain(d_inf->vcpu);
1284 queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
1285 printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
1286 (unsigned long)queue, (unsigned long) queue->next,
1287 (unsigned long) queue->prev);
1288 list_for_each_safe ( list, tmp, queue )
1290 d_inf = list_entry(list, struct sedf_vcpu_info,
1291 extralist[EXTRA_PEN_Q]);
1292 printk("%3d: ",loop++);
1293 sedf_dump_domain(d_inf->vcpu);
1296 queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
1297 printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
1298 (unsigned long)queue, (unsigned long) queue->next,
1299 (unsigned long) queue->prev);
1300 list_for_each_safe ( list, tmp, queue )
1302 d_inf = list_entry(list, struct sedf_vcpu_info,
1303 extralist[EXTRA_UTIL_Q]);
1304 printk("%3d: ",loop++);
1305 sedf_dump_domain(d_inf->vcpu);
1308 loop = 0;
1309 printk("\nnot on Q\n");
1311 rcu_read_lock(&domlist_read_lock);
1312 for_each_domain ( d )
1314 for_each_vcpu(d, ed)
1316 if ( !__task_on_queue(ed) && (ed->processor == i) )
1318 printk("%3d: ",loop++);
1319 sedf_dump_domain(ed);
1323 rcu_read_unlock(&domlist_read_lock);
1327 /* Adjusts periods and slices of the domains accordingly to their weights. */
1328 static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
1330 struct vcpu *p;
1331 struct domain *d;
1332 unsigned int cpu, nr_cpus = last_cpu(cpu_online_map) + 1;
1333 int *sumw = xmalloc_array(int, nr_cpus);
1334 s_time_t *sumt = xmalloc_array(s_time_t, nr_cpus);
1336 if ( !sumw || !sumt )
1338 xfree(sumt);
1339 xfree(sumw);
1340 return -ENOMEM;
1342 memset(sumw, 0, nr_cpus * sizeof(*sumw));
1343 memset(sumt, 0, nr_cpus * sizeof(*sumt));
1345 /* Sum across all weights. */
1346 rcu_read_lock(&domlist_read_lock);
1347 for_each_domain( d )
1349 if ( c != d->cpupool )
1350 continue;
1351 for_each_vcpu( d, p )
1353 if ( (cpu = p->processor) >= nr_cpus )
1354 continue;
1356 if ( EDOM_INFO(p)->weight )
1358 sumw[cpu] += EDOM_INFO(p)->weight;
1360 else
1362 /*don't modify domains who don't have a weight, but sum
1363 up the time they need, projected to a WEIGHT_PERIOD,
1364 so that this time is not given to the weight-driven
1365 domains*/
1366 /*check for overflows*/
1367 ASSERT((WEIGHT_PERIOD < ULONG_MAX)
1368 && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
1369 sumt[cpu] +=
1370 (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
1371 EDOM_INFO(p)->period_orig;
1375 rcu_read_unlock(&domlist_read_lock);
1377 /* Adjust all slices (and periods) to the new weight. */
1378 rcu_read_lock(&domlist_read_lock);
1379 for_each_domain( d )
1381 for_each_vcpu ( d, p )
1383 if ( (cpu = p->processor) >= nr_cpus )
1384 continue;
1385 if ( EDOM_INFO(p)->weight )
1387 EDOM_INFO(p)->period_orig =
1388 EDOM_INFO(p)->period = WEIGHT_PERIOD;
1389 EDOM_INFO(p)->slice_orig =
1390 EDOM_INFO(p)->slice =
1391 (EDOM_INFO(p)->weight *
1392 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
1396 rcu_read_unlock(&domlist_read_lock);
1398 xfree(sumt);
1399 xfree(sumw);
1401 return 0;
1405 /* set or fetch domain scheduling parameters */
1406 static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
1408 struct vcpu *v;
1409 int rc;
1411 PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
1412 "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
1413 p->domain_id, op->u.sedf.period, op->u.sedf.slice,
1414 op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
1416 if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
1418 /* Check for sane parameters. */
1419 if ( !op->u.sedf.period && !op->u.sedf.weight )
1420 return -EINVAL;
1421 if ( op->u.sedf.weight )
1423 if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
1424 (!op->u.sedf.period) )
1426 /* Weight-driven domains with extratime only. */
1427 for_each_vcpu ( p, v )
1429 EDOM_INFO(v)->extraweight = op->u.sedf.weight;
1430 EDOM_INFO(v)->weight = 0;
1431 EDOM_INFO(v)->slice = 0;
1432 EDOM_INFO(v)->period = WEIGHT_PERIOD;
1435 else
1437 /* Weight-driven domains with real-time execution. */
1438 for_each_vcpu ( p, v )
1439 EDOM_INFO(v)->weight = op->u.sedf.weight;
1442 else
1444 /* Time-driven domains. */
1445 for_each_vcpu ( p, v )
1447 /*
1448 * Sanity checking: note that disabling extra weight requires
1449 * that we set a non-zero slice.
1450 */
1451 if ( (op->u.sedf.period > PERIOD_MAX) ||
1452 (op->u.sedf.period < PERIOD_MIN) ||
1453 (op->u.sedf.slice > op->u.sedf.period) ||
1454 (op->u.sedf.slice < SLICE_MIN) )
1455 return -EINVAL;
1456 EDOM_INFO(v)->weight = 0;
1457 EDOM_INFO(v)->extraweight = 0;
1458 EDOM_INFO(v)->period_orig =
1459 EDOM_INFO(v)->period = op->u.sedf.period;
1460 EDOM_INFO(v)->slice_orig =
1461 EDOM_INFO(v)->slice = op->u.sedf.slice;
1465 rc = sedf_adjust_weights(p->cpupool, op);
1466 if ( rc )
1467 return rc;
1469 for_each_vcpu ( p, v )
1471 EDOM_INFO(v)->status =
1472 (EDOM_INFO(v)->status &
1473 ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
1474 EDOM_INFO(v)->latency = op->u.sedf.latency;
1475 extraq_check(v);
1478 else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
1480 if ( p->vcpu[0] == NULL )
1481 return -EINVAL;
1482 op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
1483 op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
1484 op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
1485 op->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
1486 op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
1489 PRINT(2,"sedf_adjust_finished\n");
1490 return 0;
1493 const struct scheduler sched_sedf_def = {
1494 .name = "Simple EDF Scheduler",
1495 .opt_name = "sedf",
1496 .sched_id = XEN_SCHEDULER_SEDF,
1498 .init_domain = sedf_init_domain,
1499 .destroy_domain = sedf_destroy_domain,
1501 .alloc_vdata = sedf_alloc_vdata,
1502 .free_vdata = sedf_free_vdata,
1503 .alloc_pdata = sedf_alloc_pdata,
1504 .free_pdata = sedf_free_pdata,
1505 .alloc_domdata = sedf_alloc_domdata,
1506 .free_domdata = sedf_free_domdata,
1508 .do_schedule = sedf_do_schedule,
1509 .pick_cpu = sedf_pick_cpu,
1510 .dump_cpu_state = sedf_dump_cpu_state,
1511 .sleep = sedf_sleep,
1512 .wake = sedf_wake,
1513 .adjust = sedf_adjust,
1514 };
1516 /*
1517 * Local variables:
1518 * mode: C
1519 * c-set-style: "BSD"
1520 * c-basic-offset: 4
1521 * tab-width: 4
1522 * indent-tabs-mode: nil
1523 * End:
1524 */