debuggers.hg

view xen/common/sched_bvt.c @ 3652:10a0f6b0a996

bitkeeper revision 1.1159.238.3 (4200cd90cCW2XIYxAgdkWL28Tzf-8g)

Introduce _xmalloc for when you really want just bytes.

Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 12:54:40 2005 +0000 (2005-02-02)
parents f98fa170a9f4
children 0ef6e8e6e85d
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
2 ****************************************************************************
3 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2002-2003 University of Cambridge
5 * (C) 2004 - Mark Williamson - Intel Research Cambridge
6 ****************************************************************************
7 *
8 * File: common/schedule.c
9 * Author: Rolf Neugebauer & Keir Fraser
10 * Updated for generic API by Mark Williamson
11 *
12 * Description: CPU scheduling
13 * implements A Borrowed Virtual Time scheduler.
14 * (see Duda & Cheriton SOSP'99)
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/sched.h>
21 #include <xen/delay.h>
22 #include <xen/event.h>
23 #include <xen/time.h>
24 #include <xen/ac_timer.h>
25 #include <xen/perfc.h>
26 #include <xen/sched-if.h>
27 #include <xen/slab.h>
28 #include <xen/softirq.h>
30 /* all per-domain BVT-specific scheduling info is stored here */
31 struct bvt_edom_info
32 {
33 struct list_head run_list; /* runqueue list pointers */
34 u32 avt; /* actual virtual time */
35 u32 evt; /* effective virtual time */
36 struct exec_domain *exec_domain;
37 struct bvt_dom_info *inf;
38 };
40 struct bvt_dom_info
41 {
42 struct domain *domain; /* domain this info belongs to */
43 u32 mcu_advance; /* inverse of weight */
44 int warpback; /* warp? */
45 int warp; /* warp set and within the warp
46 limits*/
47 s32 warp_value; /* virtual time warp */
48 s_time_t warpl; /* warp limit */
49 struct ac_timer warp_timer; /* deals with warpl */
50 s_time_t warpu; /* unwarp time requirement */
51 struct ac_timer unwarp_timer; /* deals with warpu */
53 struct bvt_edom_info ed_inf[MAX_VIRT_CPUS];
54 };
56 struct bvt_cpu_info
57 {
58 struct list_head runqueue;
59 unsigned long svt;
60 };
62 #define BVT_INFO(p) ((struct bvt_dom_info *)(p)->sched_priv)
63 #define EBVT_INFO(p) ((struct bvt_edom_info *)(p)->ed_sched_priv)
64 #define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv)
65 #define RUNLIST(p) ((struct list_head *)&(EBVT_INFO(p)->run_list))
66 #define RUNQUEUE(cpu) ((struct list_head *)&(CPU_INFO(cpu)->runqueue))
67 #define CPU_SVT(cpu) (CPU_INFO(cpu)->svt)
69 #define MCU (s32)MICROSECS(100) /* Minimum unit */
70 #define MCU_ADVANCE 10 /* default weight */
71 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
72 static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */
74 static inline void __add_to_runqueue_head(struct exec_domain *d)
75 {
76 list_add(RUNLIST(d), RUNQUEUE(d->processor));
77 }
79 static inline void __add_to_runqueue_tail(struct exec_domain *d)
80 {
81 list_add_tail(RUNLIST(d), RUNQUEUE(d->processor));
82 }
84 static inline void __del_from_runqueue(struct exec_domain *d)
85 {
86 struct list_head *runlist = RUNLIST(d);
87 list_del(runlist);
88 runlist->next = NULL;
89 }
91 static inline int __task_on_runqueue(struct exec_domain *d)
92 {
93 return (RUNLIST(d))->next != NULL;
94 }
97 /* Warp/unwarp timer functions */
98 static void warp_timer_fn(unsigned long pointer)
99 {
100 struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
101 unsigned int cpu = inf->domain->exec_domain[0]->processor;
103 spin_lock_irq(&schedule_data[cpu].schedule_lock);
105 inf->warp = 0;
107 /* unwarp equal to zero => stop warping */
108 if ( inf->warpu == 0 )
109 {
110 inf->warpback = 0;
111 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
112 }
114 /* set unwarp timer */
115 inf->unwarp_timer.expires = NOW() + inf->warpu;
116 add_ac_timer(&inf->unwarp_timer);
118 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
119 }
121 static void unwarp_timer_fn(unsigned long pointer)
122 {
123 struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
124 unsigned int cpu = inf->domain->exec_domain[0]->processor;
126 spin_lock_irq(&schedule_data[cpu].schedule_lock);
128 if ( inf->warpback )
129 {
130 inf->warp = 1;
131 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
132 }
134 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
135 }
137 static inline u32 calc_avt(struct exec_domain *d, s_time_t now)
138 {
139 u32 ranfor, mcus;
140 struct bvt_dom_info *inf = BVT_INFO(d->domain);
141 struct bvt_edom_info *einf = EBVT_INFO(d);
143 ranfor = (u32)(now - d->lastschd);
144 mcus = (ranfor + MCU - 1)/MCU;
146 return einf->avt + mcus * inf->mcu_advance;
147 }
149 /*
150 * Calculate the effective virtual time for a domain. Take into account
151 * warping limits
152 */
153 static inline u32 calc_evt(struct exec_domain *d, u32 avt)
154 {
155 struct bvt_dom_info *inf = BVT_INFO(d->domain);
156 /* TODO The warp routines need to be rewritten GM */
158 if ( inf->warp )
159 return avt - inf->warp_value;
160 else
161 return avt;
162 }
164 /**
165 * bvt_alloc_task - allocate BVT private structures for a task
166 * @p: task to allocate private structures for
167 *
168 * Returns non-zero on failure.
169 */
170 int bvt_alloc_task(struct exec_domain *ed)
171 {
172 struct domain *d = ed->domain;
173 if ( (d->sched_priv == NULL) ) {
174 if ( (d->sched_priv = xmalloc(struct bvt_dom_info)) == NULL )
175 return -1;
176 memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
177 }
178 ed->ed_sched_priv = &BVT_INFO(d)->ed_inf[ed->eid];
179 BVT_INFO(d)->ed_inf[ed->eid].inf = BVT_INFO(d);
180 BVT_INFO(d)->ed_inf[ed->eid].exec_domain = ed;
181 return 0;
182 }
184 /*
185 * Add and remove a domain
186 */
187 void bvt_add_task(struct exec_domain *d)
188 {
189 struct bvt_dom_info *inf = BVT_INFO(d->domain);
190 struct bvt_edom_info *einf = EBVT_INFO(d);
191 ASSERT(inf != NULL);
192 ASSERT(d != NULL);
194 if (d->eid == 0) {
195 inf->mcu_advance = MCU_ADVANCE;
196 inf->domain = d->domain;
197 inf->warpback = 0;
198 /* Set some default values here. */
199 inf->warp = 0;
200 inf->warp_value = 0;
201 inf->warpl = MILLISECS(2000);
202 inf->warpu = MILLISECS(1000);
203 /* initialise the timers */
204 init_ac_timer(&inf->warp_timer);
205 inf->warp_timer.cpu = d->processor;
206 inf->warp_timer.data = (unsigned long)inf;
207 inf->warp_timer.function = &warp_timer_fn;
208 init_ac_timer(&inf->unwarp_timer);
209 inf->unwarp_timer.cpu = d->processor;
210 inf->unwarp_timer.data = (unsigned long)inf;
211 inf->unwarp_timer.function = &unwarp_timer_fn;
212 }
214 einf->exec_domain = d;
216 if ( d->domain->id == IDLE_DOMAIN_ID )
217 {
218 einf->avt = einf->evt = ~0U;
219 }
220 else
221 {
222 /* Set avt and evt to system virtual time. */
223 einf->avt = CPU_SVT(d->processor);
224 einf->evt = CPU_SVT(d->processor);
225 }
226 }
228 int bvt_init_idle_task(struct exec_domain *p)
229 {
230 if ( bvt_alloc_task(p) < 0 )
231 return -1;
233 bvt_add_task(p);
235 set_bit(EDF_RUNNING, &p->ed_flags);
236 if ( !__task_on_runqueue(p) )
237 __add_to_runqueue_head(p);
239 return 0;
240 }
242 void bvt_wake(struct exec_domain *d)
243 {
244 struct bvt_edom_info *einf = EBVT_INFO(d);
245 struct exec_domain *curr;
246 s_time_t now, r_time;
247 int cpu = d->processor;
248 u32 curr_evt;
250 if ( unlikely(__task_on_runqueue(d)) )
251 return;
253 __add_to_runqueue_head(d);
255 now = NOW();
257 /* Set the BVT parameters. AVT should always be updated
258 if CPU migration ocurred.*/
259 if ( einf->avt < CPU_SVT(cpu) ||
260 unlikely(test_bit(EDF_MIGRATED, &d->ed_flags)) )
261 einf->avt = CPU_SVT(cpu);
263 /* Deal with warping here. */
264 einf->evt = calc_evt(d, einf->avt);
266 curr = schedule_data[cpu].curr;
267 curr_evt = calc_evt(curr, calc_avt(curr, now));
268 /* Calculate the time the current domain would run assuming
269 the second smallest evt is of the newly woken domain */
270 r_time = curr->lastschd +
271 ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
272 ctx_allow;
274 if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
275 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
276 else if ( schedule_data[cpu].s_timer.expires > r_time )
277 mod_ac_timer(&schedule_data[cpu].s_timer, r_time);
278 }
281 static void bvt_sleep(struct exec_domain *d)
282 {
283 if ( test_bit(EDF_RUNNING, &d->ed_flags) )
284 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
285 else if ( __task_on_runqueue(d) )
286 __del_from_runqueue(d);
287 }
289 /**
290 * bvt_free_task - free BVT private structures for a task
291 * @d: task
292 */
293 void bvt_free_task(struct domain *d)
294 {
295 ASSERT(d->sched_priv != NULL);
296 xfree(d->sched_priv);
297 }
299 /* Control the scheduler. */
300 int bvt_ctl(struct sched_ctl_cmd *cmd)
301 {
302 struct bvt_ctl *params = &cmd->u.bvt;
304 if ( cmd->direction == SCHED_INFO_PUT )
305 ctx_allow = params->ctx_allow;
306 else
307 params->ctx_allow = ctx_allow;
309 return 0;
310 }
312 /* Adjust scheduling parameter for a given domain. */
313 int bvt_adjdom(
314 struct domain *d, struct sched_adjdom_cmd *cmd)
315 {
316 struct bvt_adjdom *params = &cmd->u.bvt;
318 if ( cmd->direction == SCHED_INFO_PUT )
319 {
320 u32 mcu_adv = params->mcu_adv;
321 u32 warpback = params->warpback;
322 s32 warpvalue = params->warpvalue;
323 s_time_t warpl = params->warpl;
324 s_time_t warpu = params->warpu;
326 struct bvt_dom_info *inf = BVT_INFO(d);
328 /* Sanity -- this can avoid divide-by-zero. */
329 if ( (mcu_adv == 0) || (warpl < 0) || (warpu < 0) )
330 return -EINVAL;
332 inf->mcu_advance = mcu_adv;
333 inf->warpback = warpback;
334 /* The warp should be the same as warpback */
335 inf->warp = warpback;
336 inf->warp_value = warpvalue;
337 inf->warpl = MILLISECS(warpl);
338 inf->warpu = MILLISECS(warpu);
340 /* If the unwarp timer set up it needs to be removed */
341 rem_ac_timer(&inf->unwarp_timer);
342 /* If we stop warping the warp timer needs to be removed */
343 if ( !warpback )
344 rem_ac_timer(&inf->warp_timer);
345 }
346 else if ( cmd->direction == SCHED_INFO_GET )
347 {
348 struct bvt_dom_info *inf = BVT_INFO(d);
349 params->mcu_adv = inf->mcu_advance;
350 params->warpvalue = inf->warp_value;
351 params->warpback = inf->warpback;
352 params->warpl = inf->warpl;
353 params->warpu = inf->warpu;
354 }
356 return 0;
357 }
360 /*
361 * The main function
362 * - deschedule the current domain.
363 * - pick a new domain.
364 * i.e., the domain with lowest EVT.
365 * The runqueue should be ordered by EVT so that is easy.
366 */
367 static task_slice_t bvt_do_schedule(s_time_t now)
368 {
369 struct domain *d;
370 struct exec_domain *prev = current, *next = NULL, *next_prime, *ed;
371 int cpu = prev->processor;
372 s32 r_time; /* time for new dom to run */
373 u32 next_evt, next_prime_evt, min_avt;
374 struct bvt_dom_info *prev_inf = BVT_INFO(prev->domain);
375 struct bvt_edom_info *prev_einf = EBVT_INFO(prev);
376 struct bvt_edom_info *p_einf = NULL;
377 struct bvt_edom_info *next_einf = NULL;
378 struct bvt_edom_info *next_prime_einf = NULL;
379 task_slice_t ret;
381 ASSERT(prev->ed_sched_priv != NULL);
382 ASSERT(prev_einf != NULL);
383 ASSERT(__task_on_runqueue(prev));
385 if ( likely(!is_idle_task(prev->domain)) )
386 {
387 prev_einf->avt = calc_avt(prev, now);
388 prev_einf->evt = calc_evt(prev, prev_einf->avt);
390 if(prev_inf->warpback && prev_inf->warpl > 0)
391 rem_ac_timer(&prev_inf->warp_timer);
393 __del_from_runqueue(prev);
395 if ( domain_runnable(prev) )
396 __add_to_runqueue_tail(prev);
397 }
400 /* We should at least have the idle task */
401 ASSERT(!list_empty(RUNQUEUE(cpu)));
403 /*
404 * scan through the run queue and pick the task with the lowest evt
405 * *and* the task the second lowest evt.
406 * this code is O(n) but we expect n to be small.
407 */
408 next_einf = EBVT_INFO(schedule_data[cpu].idle);
409 next_prime_einf = NULL;
411 next_evt = ~0U;
412 next_prime_evt = ~0U;
413 min_avt = ~0U;
415 list_for_each_entry ( p_einf, RUNQUEUE(cpu), run_list )
416 {
417 if ( p_einf->evt < next_evt )
418 {
419 next_prime_einf = next_einf;
420 next_prime_evt = next_evt;
421 next_einf = p_einf;
422 next_evt = p_einf->evt;
423 }
424 else if ( next_prime_evt == ~0U )
425 {
426 next_prime_evt = p_einf->evt;
427 next_prime_einf = p_einf;
428 }
429 else if ( p_einf->evt < next_prime_evt )
430 {
431 next_prime_evt = p_einf->evt;
432 next_prime_einf = p_einf;
433 }
435 /* Determine system virtual time. */
436 if ( p_einf->avt < min_avt )
437 min_avt = p_einf->avt;
438 }
440 if(next_einf->inf->warp && next_einf->inf->warpl > 0)
441 {
442 /* Set the timer up */
443 next_einf->inf->warp_timer.expires = now + next_einf->inf->warpl;
444 /* Add it to the heap */
445 add_ac_timer(&next_einf->inf->warp_timer);
446 }
448 /* Extract the domain pointers from the dom infos */
449 next = next_einf->exec_domain;
450 next_prime = next_prime_einf->exec_domain;
452 /* Update system virtual time. */
453 if ( min_avt != ~0U )
454 CPU_SVT(cpu) = min_avt;
456 /* check for virtual time overrun on this cpu */
457 if ( CPU_SVT(cpu) >= 0xf0000000 )
458 {
459 ASSERT(!local_irq_is_enabled());
461 write_lock(&domlist_lock);
463 for_each_domain ( d )
464 {
465 for_each_exec_domain (d, ed) {
466 if ( ed->processor == cpu )
467 {
468 p_einf = EBVT_INFO(ed);
469 p_einf->evt -= 0xe0000000;
470 p_einf->avt -= 0xe0000000;
471 }
472 }
473 }
475 write_unlock(&domlist_lock);
477 CPU_SVT(cpu) -= 0xe0000000;
478 }
480 /* work out time for next run through scheduler */
481 if ( is_idle_task(next->domain) )
482 {
483 r_time = ctx_allow;
484 goto sched_done;
485 }
487 if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
488 {
489 /* We have only one runnable task besides the idle task. */
490 r_time = 10 * ctx_allow; /* RN: random constant */
491 goto sched_done;
492 }
494 /*
495 * If we are here then we have two runnable tasks.
496 * Work out how long 'next' can run till its evt is greater than
497 * 'next_prime's evt. Take context switch allowance into account.
498 */
499 ASSERT(next_prime_einf->evt >= next_einf->evt);
501 r_time = ((next_prime_einf->evt - next_einf->evt)/next_einf->inf->mcu_advance)
502 + ctx_allow;
504 ASSERT(r_time >= ctx_allow);
506 sched_done:
507 ret.task = next;
508 ret.time = r_time;
509 return ret;
510 }
513 static void bvt_dump_runq_el(struct exec_domain *p)
514 {
515 struct bvt_edom_info *inf = EBVT_INFO(p);
517 printk("mcua=%d ev=0x%08X av=0x%08X ",
518 inf->inf->mcu_advance, inf->evt, inf->avt);
519 }
521 static void bvt_dump_settings(void)
522 {
523 printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns ", (u32)MCU, (s32)ctx_allow );
524 }
526 static void bvt_dump_cpu_state(int i)
527 {
528 struct list_head *queue;
529 int loop = 0;
530 struct bvt_edom_info *d_inf;
531 struct exec_domain *d;
533 printk("svt=0x%08lX ", CPU_SVT(i));
535 queue = RUNQUEUE(i);
536 printk("QUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
537 (unsigned long) queue->next, (unsigned long) queue->prev);
539 list_for_each_entry ( d_inf, queue, run_list )
540 {
541 d = d_inf->exec_domain;
542 printk("%3d: %u has=%c ", loop++, d->domain->id,
543 test_bit(EDF_RUNNING, &d->ed_flags) ? 'T':'F');
544 bvt_dump_runq_el(d);
545 printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
546 printk(" l: %p n: %p p: %p\n",
547 &d_inf->run_list, d_inf->run_list.next, d_inf->run_list.prev);
548 }
549 }
551 /* Initialise the data structures. */
552 int bvt_init_scheduler()
553 {
554 int i;
556 for ( i = 0; i < NR_CPUS; i++ )
557 {
558 schedule_data[i].sched_priv = xmalloc(struct bvt_cpu_info);
560 if ( schedule_data[i].sched_priv == NULL )
561 {
562 printk("Failed to allocate BVT scheduler per-CPU memory!\n");
563 return -1;
564 }
566 INIT_LIST_HEAD(RUNQUEUE(i));
568 CPU_SVT(i) = 0; /* XXX do I really need to do this? */
569 }
571 return 0;
572 }
574 struct scheduler sched_bvt_def = {
575 .name = "Borrowed Virtual Time",
576 .opt_name = "bvt",
577 .sched_id = SCHED_BVT,
579 .init_scheduler = bvt_init_scheduler,
580 .init_idle_task = bvt_init_idle_task,
581 .alloc_task = bvt_alloc_task,
582 .add_task = bvt_add_task,
583 .free_task = bvt_free_task,
584 .do_schedule = bvt_do_schedule,
585 .control = bvt_ctl,
586 .adjdom = bvt_adjdom,
587 .dump_settings = bvt_dump_settings,
588 .dump_cpu_state = bvt_dump_cpu_state,
589 .sleep = bvt_sleep,
590 .wake = bvt_wake,
591 };