debuggers.hg

view xen/common/sched_atropos.c @ 3685:bbe8541361dd

bitkeeper revision 1.1159.1.542 (42038a42_52IAalMZRKdTn0UbVN5fw)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Fri Feb 04 14:44:18 2005 +0000 (2005-02-04)
parents d8ba911dce48 0ef6e8e6e85d
children 88957a238191
line source
1 /*
2 * atropos.c
3 * ---------
4 *
5 * Copyright (c) 1994 University of Cambridge Computer Laboratory.
6 * This is part of Nemesis; consult your contract for terms and conditions.
7 *
8 * ID : $Id: atropos.c 1.1 Tue, 13 Apr 1999 13:30:49 +0100 dr10009 $
9 *
10 * This is the "atropos" CPU scheduler.
11 */
13 /* Ported to Xen's generic scheduler interface by Mark Williamson
14 * these modifications are (C) 2004 Intel Research Cambridge
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/time.h>
21 #include <xen/sched.h>
22 #include <xen/sched-if.h>
23 #include <public/sched_ctl.h>
24 #include <xen/trace.h>
26 #define ATROPOS_TASK_UNBLOCKED 16
27 #define ATROPOS_TASK_WAIT 32
28 #define ATROPOS_TASK_BLOCKED 48
30 /* Atropos-specific per-domain data */
31 struct at_dom_info
32 {
33 /* MAW Xen additions */
34 struct domain *owner; /* the domain this data belongs to */
35 struct list_head run_list; /* runqueue */
36 struct list_head waitq; /* wait queue */
38 /* (what remains of) the original fields */
40 s_time_t deadline; /* Next deadline */
41 s_time_t prevddln; /* Previous deadline */
43 s_time_t remain; /* Time remaining this period */
44 s_time_t period; /* Current period of time allocation */
45 s_time_t nat_period; /* Natural period */
46 s_time_t slice; /* Current length of allocation */
47 s_time_t nat_slice; /* Natural length of allocation */
48 s_time_t latency; /* Unblocking latency */
50 int xtratime; /* Prepared to accept extra time? */
51 int state; /* Keeps Atropos domain state */
52 };
54 /* Atropos-specific per-CPU data */
55 struct at_cpu_info
56 {
57 struct list_head runq;
58 struct list_head waitq;
59 };
62 #define DOM_INFO(_p) ((struct at_dom_info *)((_p)->sched_priv))
63 #define CPU_INFO(_c) ((struct at_cpu_info *)((schedule_data[_c]).sched_priv))
64 #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
65 #define RUNQ(cpu) (&CPU_INFO(cpu)->runq)
66 #define RUNLIST(_d) (&DOM_INFO(_d)->run_list)
68 #define BESTEFFORT_QUANTUM MILLISECS(5)
70 static void at_dump_cpu_state(int cpu);
72 static inline void __add_to_runqueue_head(struct domain *d)
73 {
74 list_add(RUNLIST(d), RUNQ(d->processor));
75 }
77 static inline void __add_to_runqueue_tail(struct domain *d)
78 {
79 list_add_tail(RUNLIST(d), RUNQ(d->processor));
80 }
82 static inline void __del_from_runqueue(struct domain *d)
83 {
84 struct list_head *runlist = RUNLIST(d);
85 list_del(runlist);
86 runlist->next = NULL;
87 }
89 static inline int __task_on_runqueue(struct domain *d)
90 {
91 return (RUNLIST(d))->next != NULL;
92 }
95 /** calculate the length of a linked list */
96 static int q_len(struct list_head *q)
97 {
98 int i = 0;
99 struct at_dom_info *tmp;
100 list_for_each_entry ( tmp, q, waitq )
101 i++;
102 return i;
103 }
106 /** waitq_el - get the domain that owns a wait queue list element */
107 static inline struct domain *waitq_el(struct list_head *l)
108 {
109 struct at_dom_info *inf;
110 inf = list_entry(l, struct at_dom_info, waitq);
111 return inf->owner;
112 }
115 /*
116 * requeue
117 *
118 * Places the specified domain on the appropriate queue.
119 * The wait queue is ordered by the time at which the domain
120 * will receive more CPU time. If a domain has no guaranteed time
121 * left then the domain will be placed on the WAIT queue until
122 * its next period.
123 *
124 * Note that domains can be on the wait queue with remain > 0
125 * as a result of being blocked for a short time.
126 * These are scheduled in preference to domains with remain < 0
127 * in an attempt to improve interactive performance.
128 */
129 static void requeue(struct domain *sdom)
130 {
131 struct at_dom_info *i, *inf = DOM_INFO(sdom);
133 if ( !domain_runnable(sdom) )
134 return;
136 if ( (inf->state == ATROPOS_TASK_WAIT) ||
137 (inf->state == ATROPOS_TASK_UNBLOCKED) )
138 {
139 list_for_each_entry ( i, WAITQ(sdom->processor), waitq )
140 {
141 if ( i->deadline > inf->deadline )
142 {
143 __list_add(&inf->waitq, i->waitq.prev, &i->waitq);
144 break;
145 }
146 }
148 if ( &i->waitq == WAITQ(sdom->processor) )
149 list_add_tail(&inf->waitq, WAITQ(sdom->processor));
150 }
151 else if ( domain_runnable(sdom) )
152 {
153 list_for_each_entry ( i, RUNQ(sdom->processor), run_list )
154 {
155 if ( (i->deadline > inf->deadline) || is_idle_task(i->owner) )
156 {
157 __list_add(&inf->run_list, i->run_list.prev, &i->run_list);
158 break;
159 }
160 }
162 if ( &i->waitq == RUNQ(sdom->processor) )
163 list_add_tail(&inf->run_list, RUNQ(sdom->processor));
164 }
165 /* silently ignore tasks in other states like BLOCKED, DYING, STOPPED, etc
166 * - they shouldn't be on any queue */
167 }
169 /** at_alloc_task - allocate private info for a task */
170 static int at_alloc_task(struct domain *p)
171 {
172 ASSERT(p != NULL);
174 p->sched_priv = xmalloc(struct at_dom_info);
175 if ( p->sched_priv == NULL )
176 return -1;
178 return 0;
179 }
182 /* prepare a task to be added to scheduling */
183 static void at_add_task(struct domain *p)
184 {
185 s_time_t now = NOW();
187 ASSERT( p->sched_priv != NULL );
189 DOM_INFO(p)->owner = p;
190 p->lastschd = now;
192 /* DOM 0's parameters must be set here for it to boot the system! */
193 if(p->id == 0)
194 {
195 DOM_INFO(p)->remain = MILLISECS(15);
196 DOM_INFO(p)->nat_period =
197 DOM_INFO(p)->period = MILLISECS(20);
198 DOM_INFO(p)->nat_slice =
199 DOM_INFO(p)->slice = MILLISECS(15);
200 DOM_INFO(p)->latency = MILLISECS(5);
201 DOM_INFO(p)->xtratime = 1;
202 DOM_INFO(p)->deadline = now;
203 DOM_INFO(p)->prevddln = now;
204 }
205 else /* other domains run basically best effort unless otherwise set */
206 {
207 DOM_INFO(p)->remain = 0;
208 DOM_INFO(p)->nat_period =
209 DOM_INFO(p)->period = SECONDS(10);
210 DOM_INFO(p)->nat_slice =
211 DOM_INFO(p)->slice = MILLISECS(10);
212 DOM_INFO(p)->latency = SECONDS(10);
213 DOM_INFO(p)->xtratime = 1;
214 DOM_INFO(p)->deadline = now;
215 // DOM_INFO(p)->deadline = now + SECONDS(10);
216 DOM_INFO(p)->prevddln = 0;
217 }
219 INIT_LIST_HEAD(&(DOM_INFO(p)->run_list));
220 INIT_LIST_HEAD(&(DOM_INFO(p)->waitq));
221 }
223 /**
224 * dequeue - remove a domain from any queues it is on.
225 * @sdom: the task to remove
226 */
227 static void dequeue(struct domain *sdom)
228 {
229 struct at_dom_info *inf = DOM_INFO(sdom);
231 ASSERT(sdom->id != IDLE_DOMAIN_ID);
233 /* just delete it from all the queues! */
234 list_del(&inf->waitq);
235 INIT_LIST_HEAD(&inf->waitq);
238 if(__task_on_runqueue(sdom))
239 __del_from_runqueue(sdom);
240 }
243 /*
244 * unblock
245 *
246 * This function deals with updating the sdom for a domain
247 * which has just been unblocked.
248 *
249 * Xen's Atropos treats unblocking slightly differently to Nemesis:
250 *
251 * - "Short blocking" domains (i.e. that unblock before their deadline has
252 * expired) are treated the same as in nemesis (put on the wait queue and
253 * given preferential treatment in selecting domains for extra time).
254 *
255 * - "Long blocking" domains do not simply have their period truncated to their
256 * unblocking latency as before but also have their slice recomputed to be the
257 * same fraction of their new period. Each time the domain is scheduled, the
258 * period and slice are doubled until they reach their original ("natural")
259 * values, as set by the user (and stored in nat_period and nat_slice). The
260 * idea is to give better response times to unblocking whilst preserving QoS
261 * guarantees to other domains.
262 */
263 static void unblock(struct domain *sdom)
264 {
265 s_time_t time = NOW();
266 struct at_dom_info *inf = DOM_INFO(sdom);
268 dequeue(sdom);
270 /* We distinguish two cases... short and long blocks */
271 if ( inf->deadline < time )
272 {
273 /* Long blocking case */
275 /* The sdom has passed its deadline since it was blocked.
276 Give it its new deadline based on the latency value. */
277 inf->prevddln = time;
279 /* Scale the scheduling parameters as requested by the latency hint. */
280 inf->deadline = time + inf->latency;
281 inf->slice = inf->nat_slice / ( inf->nat_period / inf->latency );
282 inf->period = inf->latency;
283 inf->remain = inf->slice;
284 }
285 else
286 {
287 /* Short blocking case */
289 /* We leave REMAIN intact, but put this domain on the WAIT
290 queue marked as recently unblocked. It will be given
291 priority over other domains on the wait queue until while
292 REMAIN>0 in a generous attempt to help it make up for its
293 own foolishness. */
294 if(inf->remain > 0)
295 inf->state = ATROPOS_TASK_UNBLOCKED;
296 else
297 inf->state = ATROPOS_TASK_WAIT;
298 }
300 requeue(sdom);
301 }
304 static int at_init_idle_task(struct domain *p)
305 {
306 if(at_alloc_task(p) < 0) return -1;
308 at_add_task(p);
310 dequeue(p);
311 requeue(p);
313 return 0;
314 }
317 static void block(struct domain* sdom)
318 {
319 DOM_INFO(sdom)->state = ATROPOS_TASK_BLOCKED;
320 dequeue(sdom);
321 requeue(sdom);
322 }
325 /**
326 * ATROPOS - main scheduler function
327 */
328 task_slice_t ksched_scheduler(s_time_t time)
329 {
330 struct domain *cur_sdom = current; /* Current sdom */
331 s_time_t newtime;
332 s_time_t ranfor; /* How long the domain ran */
333 struct domain *sdom; /* tmp. scheduling domain */
334 int cpu = cur_sdom->processor; /* current CPU */
335 struct at_dom_info *cur_info;
336 static unsigned long waitq_rrobin = 0;
337 int i;
338 task_slice_t ret;
341 cur_info = DOM_INFO(cur_sdom);
343 ASSERT( cur_sdom != NULL);
345 /* If we were spinning in the idle loop, there is no current
346 * domain to deschedule. */
347 if (is_idle_task(cur_sdom))
348 goto deschedule_done;
350 /*****************************
351 *
352 * Deschedule the current scheduling domain
353 *
354 ****************************/
356 /* Record the time the domain was preempted and for how long it
357 ran. Work out if the domain is going to be blocked to save
358 some pointless queue shuffling */
359 cur_sdom->lastdeschd = time;
361 ranfor = (time - cur_sdom->lastschd);
363 dequeue(cur_sdom);
365 if ( domain_runnable(cur_sdom) ||
366 (cur_info->state == ATROPOS_TASK_UNBLOCKED) )
367 {
369 /* In this block, we are doing accounting for an sdom which has
370 been running in contracted time. Note that this could now happen
371 even if the domain is on the wait queue (i.e. if it blocked) */
373 /* Deduct guaranteed time from the domain */
374 cur_info->remain -= ranfor;
376 /* If guaranteed time has run out... */
377 if ( cur_info->remain <= 0 )
378 {
379 /* Move domain to correct position in WAIT queue */
380 /* XXX sdom_unblocked doesn't need this since it is
381 already in the correct place. */
382 cur_info->state = ATROPOS_TASK_WAIT;
383 }
384 }
386 requeue(cur_sdom);
388 deschedule_done:
389 /*****************************
390 *
391 * We have now successfully descheduled the current sdom.
392 * The next task is the allocate CPU time to any sdom it is due to.
393 *
394 ****************************/
395 cur_sdom = NULL;
397 /*****************************
398 *
399 * Allocate CPU time to any waiting domains who have passed their
400 * period deadline. If necessary, move them to run queue.
401 *
402 ****************************/
404 while(!list_empty(WAITQ(cpu)) &&
405 DOM_INFO(sdom = waitq_el(WAITQ(cpu)->next))->deadline <= time )
406 {
408 struct at_dom_info *inf = DOM_INFO(sdom);
409 dequeue(sdom);
411 if ( inf->period != inf->nat_period )
412 {
413 /* This domain has had its parameters adjusted as a result of
414 * unblocking and they need to be adjusted before requeuing it */
415 inf->slice *= 2;
416 inf->period *= 2;
418 if ( inf->period > inf->nat_period )
419 {
420 inf->period = inf->nat_period;
421 inf->slice = inf->nat_slice;
422 }
423 }
425 /* Domain begins a new period and receives a slice of CPU
426 * If this domain has been blocking then throw away the
427 * rest of it's remain - it can't be trusted */
428 if (inf->remain > 0)
429 inf->remain = inf->slice;
430 else
431 inf->remain += inf->slice;
433 inf->prevddln = inf->deadline;
434 inf->deadline += inf->period;
436 if ( inf->remain <= 0 )
437 inf->state = ATROPOS_TASK_WAIT;
439 /* Place on the appropriate queue */
440 requeue(sdom);
441 }
443 /*****************************
444 *
445 * Next we need to pick an sdom to run.
446 * If anything is actually 'runnable', we run that.
447 * If nothing is, we pick a waiting sdom to run optimistically.
448 * If there aren't even any of those, we have to spin waiting for an
449 * event or a suitable time condition to happen.
450 *
451 ****************************/
453 /* we guarantee there's always something on the runqueue */
454 cur_info = list_entry(RUNQ(cpu)->next,
455 struct at_dom_info, run_list);
457 cur_sdom = cur_info->owner;
458 newtime = time + cur_info->remain;
460 /* MAW - the idle domain is always on the run queue. We run from the
461 * runqueue if it's NOT the idle domain or if there's nothing on the wait
462 * queue */
463 if (cur_sdom->id == IDLE_DOMAIN_ID && !list_empty(WAITQ(cpu)))
464 {
465 struct at_dom_info *inf;
467 /* Try running a domain on the WAIT queue - this part of the
468 scheduler isn't particularly efficient but then again, we
469 don't have any guaranteed domains to worry about. */
471 /* See if there are any unblocked domains on the WAIT
472 queue who we can give preferential treatment to. */
474 list_for_each_entry ( inf, WAITQ(cpu), waitq )
475 {
476 sdom = inf->owner;
478 if (inf->state == ATROPOS_TASK_UNBLOCKED)
479 {
480 cur_sdom = sdom;
481 cur_info = inf;
482 newtime = time + inf->remain;
483 goto found;
484 }
485 }
487 /* init values needed to approximate round-robin for slack time */
488 i = 0;
489 if ( waitq_rrobin >= q_len(WAITQ(cpu)))
490 waitq_rrobin = 0;
493 /* Last chance: pick a domain on the wait queue with the XTRA
494 flag set. The NEXT_OPTM field is used to cheaply achieve
495 an approximation of round-robin order */
496 list_for_each_entry ( inf, WAITQ(cpu), waitq )
497 {
498 sdom = inf->owner;
500 if (inf->xtratime && i >= waitq_rrobin)
501 {
502 cur_sdom = sdom;
503 cur_info = inf;
504 newtime = time + BESTEFFORT_QUANTUM;
505 waitq_rrobin = i + 1; /* set this value ready for next */
506 goto found;
507 }
509 i++;
510 }
511 }
513 found:
514 /**********************
515 *
516 * We now have to work out the time when we next need to
517 * make a scheduling decision. We set the alarm timer
518 * to cause an interrupt at that time.
519 *
520 **********************/
522 #define MIN(x,y) ( ( x < y ) ? x : y )
523 #define MAX(x,y) ( ( x > y ) ? x : y )
525 /* If we might be able to run a waiting domain before this one has */
526 /* exhausted its time, cut short the time allocation */
527 if (!list_empty(WAITQ(cpu)))
528 {
529 newtime = MIN(newtime,
530 DOM_INFO(waitq_el(WAITQ(cpu)->next))->deadline);
531 }
533 /* don't allow pointlessly small time slices */
534 newtime = MAX(newtime, time + BESTEFFORT_QUANTUM);
536 ret.task = cur_sdom;
537 ret.time = newtime - time;
539 TRACE_1D(0, cur_sdom->id);
541 return ret;
542 }
545 /* set up some private data structures */
546 static int at_init_scheduler()
547 {
548 int i;
550 for ( i = 0; i < NR_CPUS; i++ )
551 {
552 schedule_data[i].sched_priv = xmalloc(sizeof(struct at_cpu_info));
553 if ( schedule_data[i].sched_priv == NULL )
554 return -1;
555 INIT_LIST_HEAD(WAITQ(i));
556 INIT_LIST_HEAD(RUNQ(i));
557 }
559 return 0;
560 }
563 /* print relevant per-domain info for a run queue dump */
564 static void at_dump_runq_el(struct domain *p)
565 {
566 printk("lastschd = %llu, xtratime = %d ",
567 p->lastschd, DOM_INFO(p)->xtratime);
568 }
571 /* dump relevant per-cpu state for a run queue dump */
572 static void at_dump_cpu_state(int cpu)
573 {
574 struct list_head *queue;
575 int loop = 0;
576 struct at_dom_info *d_inf;
577 struct domain *d;
579 queue = RUNQ(cpu);
580 printk("\nRUNQUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
581 (unsigned long) queue->next, (unsigned long) queue->prev);
583 list_for_each_entry ( d_inf, queue, run_list )
584 {
585 d = d_inf->owner;
586 printk("%3d: %d has=%c ", loop++, d->id,
587 test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
588 at_dump_runq_el(d);
589 printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
590 printk(" l: %lx n: %lx p: %lx\n",
591 (unsigned long)&d_inf->run_list,
592 (unsigned long)d_inf->run_list.next,
593 (unsigned long)d_inf->run_list.prev);
594 }
597 queue = WAITQ(cpu);
598 printk("\nWAITQUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
599 (unsigned long) queue->next, (unsigned long) queue->prev);
601 list_for_each_entry ( d_inf, queue, waitq )
602 {
603 d = d_inf->owner;
604 printk("%3d: %d has=%c ", loop++, d->id,
605 test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
606 at_dump_runq_el(d);
607 printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
608 printk(" l: %lx n: %lx p: %lx\n",
609 (unsigned long)&d_inf->waitq,
610 (unsigned long)d_inf->waitq.next,
611 (unsigned long)d_inf->waitq.prev);
612 }
614 }
616 /* set or fetch domain scheduling parameters */
617 static int at_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
618 {
619 if ( cmd->direction == SCHED_INFO_PUT )
620 {
621 /* sanity checking! */
622 if( cmd->u.atropos.latency > cmd->u.atropos.nat_period
623 || cmd->u.atropos.latency == 0
624 || cmd->u.atropos.nat_slice > cmd->u.atropos.nat_period )
625 return -EINVAL;
627 DOM_INFO(p)->nat_period = cmd->u.atropos.nat_period;
628 DOM_INFO(p)->nat_slice = cmd->u.atropos.nat_slice;
629 DOM_INFO(p)->latency = cmd->u.atropos.latency;
630 DOM_INFO(p)->xtratime = !!cmd->u.atropos.xtratime;
631 }
632 else if ( cmd->direction == SCHED_INFO_GET )
633 {
634 cmd->u.atropos.nat_period = DOM_INFO(p)->nat_period;
635 cmd->u.atropos.nat_slice = DOM_INFO(p)->nat_slice;
636 cmd->u.atropos.latency = DOM_INFO(p)->latency;
637 cmd->u.atropos.xtratime = DOM_INFO(p)->xtratime;
638 }
640 return 0;
641 }
643 /* free memory associated with a task */
644 static void at_free_task(struct domain *p)
645 {
646 xfree( DOM_INFO(p) );
647 }
650 /* print decoded domain private state value (if known) */
651 static int at_prn_state(int state)
652 {
653 int ret = 0;
655 switch(state)
656 {
657 case ATROPOS_TASK_UNBLOCKED:
658 printk("Unblocked");
659 break;
660 case ATROPOS_TASK_WAIT:
661 printk("Wait");
662 break;
663 default:
664 ret = -1;
665 }
667 return ret;
668 }
670 struct scheduler sched_atropos_def = {
671 .name = "Atropos Soft Real Time Scheduler",
672 .opt_name = "atropos",
673 .sched_id = SCHED_ATROPOS,
674 .init_scheduler = at_init_scheduler,
675 .init_idle_task = at_init_idle_task,
676 .alloc_task = at_alloc_task,
677 .add_task = at_add_task,
678 .free_task = at_free_task,
679 .wake = unblock,
680 .sleep = block,
681 .do_schedule = ksched_scheduler,
682 .adjdom = at_adjdom,
683 .dump_cpu_state = at_dump_cpu_state,
684 .prn_state = at_prn_state,
685 };