debuggers.hg

view xen/common/sched_atropos.c @ 3570:51052c8b6456

bitkeeper revision 1.1159.212.38 (41f6537aX7dfqsdH6-jWzX24faBDtQ)

manual merge.
author kaf24@scramble.cl.cam.ac.uk
date Tue Jan 25 14:11:06 2005 +0000 (2005-01-25)
parents 3f929065a1d1 dee91b44a753
children d8ba911dce48 beb0887c54bc 0ef6e8e6e85d
line source
1 /*
2 * atropos.c
3 * ---------
4 *
5 * Copyright (c) 1994 University of Cambridge Computer Laboratory.
6 * This is part of Nemesis; consult your contract for terms and conditions.
7 *
8 * ID : $Id: atropos.c 1.1 Tue, 13 Apr 1999 13:30:49 +0100 dr10009 $
9 *
10 * This is the "atropos" CPU scheduler.
11 */
13 /* Ported to Xen's generic scheduler interface by Mark Williamson
14 * these modifications are (C) 2004 Intel Research Cambridge
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/time.h>
21 #include <xen/sched.h>
22 #include <xen/sched-if.h>
23 #include <public/sched_ctl.h>
24 #include <xen/trace.h>
26 #define ATROPOS_TASK_UNBLOCKED 16
27 #define ATROPOS_TASK_WAIT 32
28 #define ATROPOS_TASK_BLOCKED 48
30 /* Atropos-specific per-domain data */
31 struct at_dom_info
32 {
33 /* MAW Xen additions */
34 struct domain *owner; /* the domain this data belongs to */
35 struct list_head run_list; /* runqueue */
36 struct list_head waitq; /* wait queue */
38 /* (what remains of) the original fields */
40 s_time_t deadline; /* Next deadline */
41 s_time_t prevddln; /* Previous deadline */
43 s_time_t remain; /* Time remaining this period */
44 s_time_t period; /* Current period of time allocation */
45 s_time_t nat_period; /* Natural period */
46 s_time_t slice; /* Current length of allocation */
47 s_time_t nat_slice; /* Natural length of allocation */
48 s_time_t latency; /* Unblocking latency */
50 int xtratime; /* Prepared to accept extra time? */
51 int state; /* Keeps Atropos domain state */
52 };
54 /* Atropos-specific per-CPU data */
55 struct at_cpu_info
56 {
57 struct list_head runq;
58 struct list_head waitq;
59 };
62 #define DOM_INFO(_p) ((struct at_dom_info *)((_p)->sched_priv))
63 #define CPU_INFO(_c) ((struct at_cpu_info *)((schedule_data[_c]).sched_priv))
64 #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
65 #define RUNQ(cpu) (&CPU_INFO(cpu)->runq)
66 #define RUNLIST(_d) (&DOM_INFO(_d)->run_list)
68 #define BESTEFFORT_QUANTUM MILLISECS(5)
70 static void at_dump_cpu_state(int cpu);
72 static xmem_cache_t *dom_info_cache;
74 static inline void __add_to_runqueue_head(struct domain *d)
75 {
76 list_add(RUNLIST(d), RUNQ(d->processor));
77 }
79 static inline void __add_to_runqueue_tail(struct domain *d)
80 {
81 list_add_tail(RUNLIST(d), RUNQ(d->processor));
82 }
84 static inline void __del_from_runqueue(struct domain *d)
85 {
86 struct list_head *runlist = RUNLIST(d);
87 list_del(runlist);
88 runlist->next = NULL;
89 }
91 static inline int __task_on_runqueue(struct domain *d)
92 {
93 return (RUNLIST(d))->next != NULL;
94 }
97 /** calculate the length of a linked list */
98 static int q_len(struct list_head *q)
99 {
100 int i = 0;
101 struct at_dom_info *tmp;
102 list_for_each_entry ( tmp, q, waitq )
103 i++;
104 return i;
105 }
108 /** waitq_el - get the domain that owns a wait queue list element */
109 static inline struct domain *waitq_el(struct list_head *l)
110 {
111 struct at_dom_info *inf;
112 inf = list_entry(l, struct at_dom_info, waitq);
113 return inf->owner;
114 }
117 /*
118 * requeue
119 *
120 * Places the specified domain on the appropriate queue.
121 * The wait queue is ordered by the time at which the domain
122 * will receive more CPU time. If a domain has no guaranteed time
123 * left then the domain will be placed on the WAIT queue until
124 * its next period.
125 *
126 * Note that domains can be on the wait queue with remain > 0
127 * as a result of being blocked for a short time.
128 * These are scheduled in preference to domains with remain < 0
129 * in an attempt to improve interactive performance.
130 */
131 static void requeue(struct domain *sdom)
132 {
133 struct at_dom_info *i, *inf = DOM_INFO(sdom);
135 if ( !domain_runnable(sdom) )
136 return;
138 if ( (inf->state == ATROPOS_TASK_WAIT) ||
139 (inf->state == ATROPOS_TASK_UNBLOCKED) )
140 {
141 list_for_each_entry ( i, WAITQ(sdom->processor), waitq )
142 {
143 if ( i->deadline > inf->deadline )
144 {
145 __list_add(&inf->waitq, i->waitq.prev, &i->waitq);
146 break;
147 }
148 }
150 if ( &i->waitq == WAITQ(sdom->processor) )
151 list_add_tail(&inf->waitq, WAITQ(sdom->processor));
152 }
153 else if ( domain_runnable(sdom) )
154 {
155 list_for_each_entry ( i, RUNQ(sdom->processor), run_list )
156 {
157 if ( (i->deadline > inf->deadline) || is_idle_task(i->owner) )
158 {
159 __list_add(&inf->run_list, i->run_list.prev, &i->run_list);
160 break;
161 }
162 }
164 if ( &i->waitq == RUNQ(sdom->processor) )
165 list_add_tail(&inf->run_list, RUNQ(sdom->processor));
166 }
167 /* silently ignore tasks in other states like BLOCKED, DYING, STOPPED, etc
168 * - they shouldn't be on any queue */
169 }
171 /** at_alloc_task - allocate private info for a task */
172 static int at_alloc_task(struct domain *p)
173 {
174 ASSERT(p != NULL);
176 p->sched_priv = xmem_cache_alloc(dom_info_cache);
177 if ( p->sched_priv == NULL )
178 return -1;
180 return 0;
181 }
184 /* prepare a task to be added to scheduling */
185 static void at_add_task(struct domain *p)
186 {
187 s_time_t now = NOW();
189 ASSERT( p->sched_priv != NULL );
191 DOM_INFO(p)->owner = p;
192 p->lastschd = now;
194 /* DOM 0's parameters must be set here for it to boot the system! */
195 if(p->id == 0)
196 {
197 DOM_INFO(p)->remain = MILLISECS(15);
198 DOM_INFO(p)->nat_period =
199 DOM_INFO(p)->period = MILLISECS(20);
200 DOM_INFO(p)->nat_slice =
201 DOM_INFO(p)->slice = MILLISECS(15);
202 DOM_INFO(p)->latency = MILLISECS(5);
203 DOM_INFO(p)->xtratime = 1;
204 DOM_INFO(p)->deadline = now;
205 DOM_INFO(p)->prevddln = now;
206 }
207 else /* other domains run basically best effort unless otherwise set */
208 {
209 DOM_INFO(p)->remain = 0;
210 DOM_INFO(p)->nat_period =
211 DOM_INFO(p)->period = SECONDS(10);
212 DOM_INFO(p)->nat_slice =
213 DOM_INFO(p)->slice = MILLISECS(10);
214 DOM_INFO(p)->latency = SECONDS(10);
215 DOM_INFO(p)->xtratime = 1;
216 DOM_INFO(p)->deadline = now;
217 // DOM_INFO(p)->deadline = now + SECONDS(10);
218 DOM_INFO(p)->prevddln = 0;
219 }
221 INIT_LIST_HEAD(&(DOM_INFO(p)->run_list));
222 INIT_LIST_HEAD(&(DOM_INFO(p)->waitq));
223 }
225 /**
226 * dequeue - remove a domain from any queues it is on.
227 * @sdom: the task to remove
228 */
229 static void dequeue(struct domain *sdom)
230 {
231 struct at_dom_info *inf = DOM_INFO(sdom);
233 ASSERT(sdom->id != IDLE_DOMAIN_ID);
235 /* just delete it from all the queues! */
236 list_del(&inf->waitq);
237 INIT_LIST_HEAD(&inf->waitq);
240 if(__task_on_runqueue(sdom))
241 __del_from_runqueue(sdom);
242 }
245 /*
246 * unblock
247 *
248 * This function deals with updating the sdom for a domain
249 * which has just been unblocked.
250 *
251 * Xen's Atropos treats unblocking slightly differently to Nemesis:
252 *
253 * - "Short blocking" domains (i.e. that unblock before their deadline has
254 * expired) are treated the same as in nemesis (put on the wait queue and
255 * given preferential treatment in selecting domains for extra time).
256 *
257 * - "Long blocking" domains do not simply have their period truncated to their
258 * unblocking latency as before but also have their slice recomputed to be the
259 * same fraction of their new period. Each time the domain is scheduled, the
260 * period and slice are doubled until they reach their original ("natural")
261 * values, as set by the user (and stored in nat_period and nat_slice). The
262 * idea is to give better response times to unblocking whilst preserving QoS
263 * guarantees to other domains.
264 */
265 static void unblock(struct domain *sdom)
266 {
267 s_time_t time = NOW();
268 struct at_dom_info *inf = DOM_INFO(sdom);
270 dequeue(sdom);
272 /* We distinguish two cases... short and long blocks */
273 if ( inf->deadline < time )
274 {
275 /* Long blocking case */
277 /* The sdom has passed its deadline since it was blocked.
278 Give it its new deadline based on the latency value. */
279 inf->prevddln = time;
281 /* Scale the scheduling parameters as requested by the latency hint. */
282 inf->deadline = time + inf->latency;
283 inf->slice = inf->nat_slice / ( inf->nat_period / inf->latency );
284 inf->period = inf->latency;
285 inf->remain = inf->slice;
286 }
287 else
288 {
289 /* Short blocking case */
291 /* We leave REMAIN intact, but put this domain on the WAIT
292 queue marked as recently unblocked. It will be given
293 priority over other domains on the wait queue until while
294 REMAIN>0 in a generous attempt to help it make up for its
295 own foolishness. */
296 if(inf->remain > 0)
297 inf->state = ATROPOS_TASK_UNBLOCKED;
298 else
299 inf->state = ATROPOS_TASK_WAIT;
300 }
302 requeue(sdom);
303 }
306 static int at_init_idle_task(struct domain *p)
307 {
308 if(at_alloc_task(p) < 0) return -1;
310 at_add_task(p);
312 dequeue(p);
313 requeue(p);
315 return 0;
316 }
319 static void block(struct domain* sdom)
320 {
321 DOM_INFO(sdom)->state = ATROPOS_TASK_BLOCKED;
322 dequeue(sdom);
323 requeue(sdom);
324 }
327 /**
328 * ATROPOS - main scheduler function
329 */
330 task_slice_t ksched_scheduler(s_time_t time)
331 {
332 struct domain *cur_sdom = current; /* Current sdom */
333 s_time_t newtime;
334 s_time_t ranfor; /* How long the domain ran */
335 struct domain *sdom; /* tmp. scheduling domain */
336 int cpu = cur_sdom->processor; /* current CPU */
337 struct at_dom_info *cur_info;
338 static unsigned long waitq_rrobin = 0;
339 int i;
340 task_slice_t ret;
343 cur_info = DOM_INFO(cur_sdom);
345 ASSERT( cur_sdom != NULL);
347 /* If we were spinning in the idle loop, there is no current
348 * domain to deschedule. */
349 if (is_idle_task(cur_sdom))
350 goto deschedule_done;
352 /*****************************
353 *
354 * Deschedule the current scheduling domain
355 *
356 ****************************/
358 /* Record the time the domain was preempted and for how long it
359 ran. Work out if the domain is going to be blocked to save
360 some pointless queue shuffling */
361 cur_sdom->lastdeschd = time;
363 ranfor = (time - cur_sdom->lastschd);
365 dequeue(cur_sdom);
367 if ( domain_runnable(cur_sdom) ||
368 (cur_info->state == ATROPOS_TASK_UNBLOCKED) )
369 {
371 /* In this block, we are doing accounting for an sdom which has
372 been running in contracted time. Note that this could now happen
373 even if the domain is on the wait queue (i.e. if it blocked) */
375 /* Deduct guaranteed time from the domain */
376 cur_info->remain -= ranfor;
378 /* If guaranteed time has run out... */
379 if ( cur_info->remain <= 0 )
380 {
381 /* Move domain to correct position in WAIT queue */
382 /* XXX sdom_unblocked doesn't need this since it is
383 already in the correct place. */
384 cur_info->state = ATROPOS_TASK_WAIT;
385 }
386 }
388 requeue(cur_sdom);
390 deschedule_done:
391 /*****************************
392 *
393 * We have now successfully descheduled the current sdom.
394 * The next task is the allocate CPU time to any sdom it is due to.
395 *
396 ****************************/
397 cur_sdom = NULL;
399 /*****************************
400 *
401 * Allocate CPU time to any waiting domains who have passed their
402 * period deadline. If necessary, move them to run queue.
403 *
404 ****************************/
406 while(!list_empty(WAITQ(cpu)) &&
407 DOM_INFO(sdom = waitq_el(WAITQ(cpu)->next))->deadline <= time )
408 {
410 struct at_dom_info *inf = DOM_INFO(sdom);
411 dequeue(sdom);
413 if ( inf->period != inf->nat_period )
414 {
415 /* This domain has had its parameters adjusted as a result of
416 * unblocking and they need to be adjusted before requeuing it */
417 inf->slice *= 2;
418 inf->period *= 2;
420 if ( inf->period > inf->nat_period )
421 {
422 inf->period = inf->nat_period;
423 inf->slice = inf->nat_slice;
424 }
425 }
427 /* Domain begins a new period and receives a slice of CPU
428 * If this domain has been blocking then throw away the
429 * rest of it's remain - it can't be trusted */
430 if (inf->remain > 0)
431 inf->remain = inf->slice;
432 else
433 inf->remain += inf->slice;
435 inf->prevddln = inf->deadline;
436 inf->deadline += inf->period;
438 if ( inf->remain <= 0 )
439 inf->state = ATROPOS_TASK_WAIT;
441 /* Place on the appropriate queue */
442 requeue(sdom);
443 }
445 /*****************************
446 *
447 * Next we need to pick an sdom to run.
448 * If anything is actually 'runnable', we run that.
449 * If nothing is, we pick a waiting sdom to run optimistically.
450 * If there aren't even any of those, we have to spin waiting for an
451 * event or a suitable time condition to happen.
452 *
453 ****************************/
455 /* we guarantee there's always something on the runqueue */
456 cur_info = list_entry(RUNQ(cpu)->next,
457 struct at_dom_info, run_list);
459 cur_sdom = cur_info->owner;
460 newtime = time + cur_info->remain;
462 /* MAW - the idle domain is always on the run queue. We run from the
463 * runqueue if it's NOT the idle domain or if there's nothing on the wait
464 * queue */
465 if (cur_sdom->id == IDLE_DOMAIN_ID && !list_empty(WAITQ(cpu)))
466 {
467 struct at_dom_info *inf;
469 /* Try running a domain on the WAIT queue - this part of the
470 scheduler isn't particularly efficient but then again, we
471 don't have any guaranteed domains to worry about. */
473 /* See if there are any unblocked domains on the WAIT
474 queue who we can give preferential treatment to. */
476 list_for_each_entry ( inf, WAITQ(cpu), waitq )
477 {
478 sdom = inf->owner;
480 if (inf->state == ATROPOS_TASK_UNBLOCKED)
481 {
482 cur_sdom = sdom;
483 cur_info = inf;
484 newtime = time + inf->remain;
485 goto found;
486 }
487 }
489 /* init values needed to approximate round-robin for slack time */
490 i = 0;
491 if ( waitq_rrobin >= q_len(WAITQ(cpu)))
492 waitq_rrobin = 0;
495 /* Last chance: pick a domain on the wait queue with the XTRA
496 flag set. The NEXT_OPTM field is used to cheaply achieve
497 an approximation of round-robin order */
498 list_for_each_entry ( inf, WAITQ(cpu), waitq )
499 {
500 sdom = inf->owner;
502 if (inf->xtratime && i >= waitq_rrobin)
503 {
504 cur_sdom = sdom;
505 cur_info = inf;
506 newtime = time + BESTEFFORT_QUANTUM;
507 waitq_rrobin = i + 1; /* set this value ready for next */
508 goto found;
509 }
511 i++;
512 }
513 }
515 found:
516 /**********************
517 *
518 * We now have to work out the time when we next need to
519 * make a scheduling decision. We set the alarm timer
520 * to cause an interrupt at that time.
521 *
522 **********************/
524 #define MIN(x,y) ( ( x < y ) ? x : y )
525 #define MAX(x,y) ( ( x > y ) ? x : y )
527 /* If we might be able to run a waiting domain before this one has */
528 /* exhausted its time, cut short the time allocation */
529 if (!list_empty(WAITQ(cpu)))
530 {
531 newtime = MIN(newtime,
532 DOM_INFO(waitq_el(WAITQ(cpu)->next))->deadline);
533 }
535 /* don't allow pointlessly small time slices */
536 newtime = MAX(newtime, time + BESTEFFORT_QUANTUM);
538 ret.task = cur_sdom;
539 ret.time = newtime - time;
541 TRACE_1D(0, cur_sdom->id);
543 return ret;
544 }
547 /* set up some private data structures */
548 static int at_init_scheduler()
549 {
550 int i;
552 for ( i = 0; i < NR_CPUS; i++ )
553 {
554 schedule_data[i].sched_priv = xmalloc(sizeof(struct at_cpu_info));
555 if ( schedule_data[i].sched_priv == NULL )
556 return -1;
557 INIT_LIST_HEAD(WAITQ(i));
558 INIT_LIST_HEAD(RUNQ(i));
559 }
561 dom_info_cache = xmem_cache_create("Atropos dom info",
562 sizeof(struct at_dom_info),
563 0, 0, NULL, NULL);
565 return 0;
566 }
569 /* print relevant per-domain info for a run queue dump */
570 static void at_dump_runq_el(struct domain *p)
571 {
572 printk("lastschd = %llu, xtratime = %d ",
573 p->lastschd, DOM_INFO(p)->xtratime);
574 }
577 /* dump relevant per-cpu state for a run queue dump */
578 static void at_dump_cpu_state(int cpu)
579 {
580 struct list_head *queue;
581 int loop = 0;
582 struct at_dom_info *d_inf;
583 struct domain *d;
585 queue = RUNQ(cpu);
586 printk("\nRUNQUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
587 (unsigned long) queue->next, (unsigned long) queue->prev);
589 list_for_each_entry ( d_inf, queue, run_list )
590 {
591 d = d_inf->owner;
592 printk("%3d: %d has=%c ", loop++, d->id,
593 test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
594 at_dump_runq_el(d);
595 printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
596 printk(" l: %lx n: %lx p: %lx\n",
597 (unsigned long)&d_inf->run_list,
598 (unsigned long)d_inf->run_list.next,
599 (unsigned long)d_inf->run_list.prev);
600 }
603 queue = WAITQ(cpu);
604 printk("\nWAITQUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
605 (unsigned long) queue->next, (unsigned long) queue->prev);
607 list_for_each_entry ( d_inf, queue, waitq )
608 {
609 d = d_inf->owner;
610 printk("%3d: %d has=%c ", loop++, d->id,
611 test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
612 at_dump_runq_el(d);
613 printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
614 printk(" l: %lx n: %lx p: %lx\n",
615 (unsigned long)&d_inf->waitq,
616 (unsigned long)d_inf->waitq.next,
617 (unsigned long)d_inf->waitq.prev);
618 }
620 }
622 /* set or fetch domain scheduling parameters */
623 static int at_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
624 {
625 if ( cmd->direction == SCHED_INFO_PUT )
626 {
627 /* sanity checking! */
628 if( cmd->u.atropos.latency > cmd->u.atropos.nat_period
629 || cmd->u.atropos.latency == 0
630 || cmd->u.atropos.nat_slice > cmd->u.atropos.nat_period )
631 return -EINVAL;
633 DOM_INFO(p)->nat_period = cmd->u.atropos.nat_period;
634 DOM_INFO(p)->nat_slice = cmd->u.atropos.nat_slice;
635 DOM_INFO(p)->latency = cmd->u.atropos.latency;
636 DOM_INFO(p)->xtratime = !!cmd->u.atropos.xtratime;
637 }
638 else if ( cmd->direction == SCHED_INFO_GET )
639 {
640 cmd->u.atropos.nat_period = DOM_INFO(p)->nat_period;
641 cmd->u.atropos.nat_slice = DOM_INFO(p)->nat_slice;
642 cmd->u.atropos.latency = DOM_INFO(p)->latency;
643 cmd->u.atropos.xtratime = DOM_INFO(p)->xtratime;
644 }
646 return 0;
647 }
649 /* free memory associated with a task */
650 static void at_free_task(struct domain *p)
651 {
652 xmem_cache_free( dom_info_cache, DOM_INFO(p) );
653 }
656 /* print decoded domain private state value (if known) */
657 static int at_prn_state(int state)
658 {
659 int ret = 0;
661 switch(state)
662 {
663 case ATROPOS_TASK_UNBLOCKED:
664 printk("Unblocked");
665 break;
666 case ATROPOS_TASK_WAIT:
667 printk("Wait");
668 break;
669 default:
670 ret = -1;
671 }
673 return ret;
674 }
676 struct scheduler sched_atropos_def = {
677 .name = "Atropos Soft Real Time Scheduler",
678 .opt_name = "atropos",
679 .sched_id = SCHED_ATROPOS,
680 .init_scheduler = at_init_scheduler,
681 .init_idle_task = at_init_idle_task,
682 .alloc_task = at_alloc_task,
683 .add_task = at_add_task,
684 .free_task = at_free_task,
685 .wake = unblock,
686 .sleep = block,
687 .do_schedule = ksched_scheduler,
688 .adjdom = at_adjdom,
689 .dump_cpu_state = at_dump_cpu_state,
690 .prn_state = at_prn_state,
691 };