debuggers.hg

view xen/common/schedule.c @ 4647:9c88ba91d330

bitkeeper revision 1.1346.1.1 (42670505dNhgnJm5dQD81pCalXMZgw)

manual merge
author iap10@freefall.cl.cam.ac.uk
date Thu Apr 21 01:42:29 2005 +0000 (2005-04-21)
parents a1f760a94785 18d709f72233
children 93a7ffae49b3
line source
1 /****************************************************************************
2 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
3 * (C) 2002-2003 University of Cambridge
4 * (C) 2004 - Mark Williamson - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: common/schedule.c
8 * Author: Rolf Neugebauer & Keir Fraser
9 * Updated for generic API by Mark Williamson
10 *
11 * Description: Generic CPU scheduling code
12 * implements support functionality for the Xen scheduler API.
13 *
14 */
16 /*#define WAKE_HISTO*/
17 /*#define BLOCKTIME_HISTO*/
19 #if defined(WAKE_HISTO)
20 #define BUCKETS 31
21 #elif defined(BLOCKTIME_HISTO)
22 #define BUCKETS 200
23 #endif
25 #include <xen/config.h>
26 #include <xen/init.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/delay.h>
30 #include <xen/event.h>
31 #include <xen/time.h>
32 #include <xen/ac_timer.h>
33 #include <xen/perfc.h>
34 #include <xen/sched-if.h>
35 #include <xen/softirq.h>
36 #include <xen/trace.h>
37 #include <public/sched_ctl.h>
39 /* opt_sched: scheduler - default to Borrowed Virtual Time */
40 static char opt_sched[10] = "bvt";
41 string_param("sched", opt_sched);
43 /*#define WAKE_HISTO*/
44 /*#define BLOCKTIME_HISTO*/
45 /*#define ADV_SCHED_HISTO*/
46 //#include <xen/adv_sched_hist.h>
48 #if defined(WAKE_HISTO)
49 #define BUCKETS 31
50 #elif defined(BLOCKTIME_HISTO)
51 #define BUCKETS 200
52 #endif
54 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
56 /* Various timer handlers. */
57 static void s_timer_fn(unsigned long unused);
58 static void t_timer_fn(unsigned long unused);
59 static void dom_timer_fn(unsigned long data);
61 /* This is global for now so that private implementations can reach it */
62 struct schedule_data schedule_data[NR_CPUS];
64 extern struct scheduler sched_bvt_def;
65 extern struct scheduler sched_sedf_def;
66 static struct scheduler *schedulers[] = {
67 &sched_bvt_def,
68 &sched_sedf_def,
69 NULL
70 };
72 static void __enter_scheduler(void);
74 static struct scheduler ops;
76 #define SCHED_OP(fn, ...) \
77 (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
78 : (typeof(ops.fn(__VA_ARGS__)))0 )
80 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
81 static struct ac_timer t_timer[NR_CPUS];
83 void free_domain_struct(struct domain *d)
84 {
85 int i;
87 SCHED_OP(free_task, d);
88 for (i = 0; i < MAX_VIRT_CPUS; i++)
89 if ( d->exec_domain[i] )
90 arch_free_exec_domain_struct(d->exec_domain[i]);
92 xfree(d);
93 }
95 struct exec_domain *alloc_exec_domain_struct(struct domain *d,
96 unsigned long vcpu)
97 {
98 struct exec_domain *ed, *edc;
100 ASSERT( d->exec_domain[vcpu] == NULL );
102 if ( (ed = arch_alloc_exec_domain_struct()) == NULL )
103 return NULL;
105 memset(ed, 0, sizeof(*ed));
107 d->exec_domain[vcpu] = ed;
108 ed->domain = d;
109 ed->eid = vcpu;
111 if ( SCHED_OP(alloc_task, ed) < 0 )
112 goto out;
114 if (vcpu != 0) {
115 ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
117 for_each_exec_domain(d, edc) {
118 if (edc->ed_next_list == NULL || edc->ed_next_list->eid > vcpu)
119 break;
120 }
121 ed->ed_next_list = edc->ed_next_list;
122 edc->ed_next_list = ed;
124 if (test_bit(EDF_CPUPINNED, &edc->ed_flags)) {
125 ed->processor = (edc->processor + 1) % smp_num_cpus;
126 set_bit(EDF_CPUPINNED, &ed->ed_flags);
127 } else {
128 ed->processor = (edc->processor + 1) % smp_num_cpus; /* XXX */
129 }
130 }
132 return ed;
134 out:
135 d->exec_domain[vcpu] = NULL;
136 arch_free_exec_domain_struct(ed);
138 return NULL;
139 }
141 struct domain *alloc_domain_struct(void)
142 {
143 struct domain *d;
145 if ( (d = xmalloc(struct domain)) == NULL )
146 return NULL;
148 memset(d, 0, sizeof(*d));
150 if ( alloc_exec_domain_struct(d, 0) == NULL )
151 goto out;
153 return d;
155 out:
156 xfree(d);
157 return NULL;
158 }
160 /*
161 * Add and remove a domain
162 */
163 void sched_add_domain(struct exec_domain *ed)
164 {
165 struct domain *d = ed->domain;
167 /* Must be unpaused by control software to start execution. */
168 set_bit(EDF_CTRLPAUSE, &ed->ed_flags);
170 if ( d->id != IDLE_DOMAIN_ID )
171 {
172 /* Initialise the per-domain timer. */
173 init_ac_timer(&ed->timer);
174 ed->timer.cpu = ed->processor;
175 ed->timer.data = (unsigned long)ed;
176 ed->timer.function = &dom_timer_fn;
177 }
178 else
179 {
180 schedule_data[ed->processor].idle = ed;
181 }
183 SCHED_OP(add_task, ed);
184 TRACE_2D(TRC_SCHED_DOM_ADD, d->id, ed->eid);
185 }
187 void sched_rem_domain(struct exec_domain *ed)
188 {
189 rem_ac_timer(&ed->timer);
190 SCHED_OP(rem_task, ed);
191 TRACE_2D(TRC_SCHED_DOM_REM, ed->domain->id, ed->eid);
192 }
194 void init_idle_task(void)
195 {
196 if ( SCHED_OP(init_idle_task, current) < 0 )
197 BUG();
198 }
200 void domain_sleep(struct exec_domain *ed)
201 {
202 unsigned long flags;
204 spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags);
205 if ( likely(!domain_runnable(ed)) )
206 SCHED_OP(sleep, ed);
207 spin_unlock_irqrestore(&schedule_data[ed->processor].schedule_lock, flags);
209 TRACE_2D(TRC_SCHED_SLEEP, ed->domain->id, ed->eid);
211 /* Synchronous. */
212 while ( test_bit(EDF_RUNNING, &ed->ed_flags) && !domain_runnable(ed) )
213 cpu_relax();
214 }
216 void domain_wake(struct exec_domain *ed)
217 {
218 unsigned long flags;
220 spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags);
221 if ( likely(domain_runnable(ed)) )
222 {
223 SCHED_OP(wake, ed);
224 #ifdef WAKE_HISTO
225 ed->wokenup = NOW();
226 #endif
227 }
228 clear_bit(EDF_MIGRATED, &ed->ed_flags);
229 spin_unlock_irqrestore(&schedule_data[ed->processor].schedule_lock, flags);
231 TRACE_2D(TRC_SCHED_WAKE, ed->domain->id, ed->eid);
232 }
234 /* Block the currently-executing domain until a pertinent event occurs. */
235 long do_block(void)
236 {
237 struct exec_domain *ed = current;
239 #ifdef ADV_SCHED_HISTO
240 adv_sched_hist_start(current->processor);
241 #endif
243 ed->vcpu_info->evtchn_upcall_mask = 0;
244 set_bit(EDF_BLOCKED, &ed->ed_flags);
246 /* Check for events /after/ blocking: avoids wakeup waiting race. */
247 if ( event_pending(ed) )
248 clear_bit(EDF_BLOCKED, &ed->ed_flags);
249 else
250 {
251 TRACE_2D(TRC_SCHED_BLOCK, ed->domain->id, ed->eid);
252 __enter_scheduler();
253 }
255 return 0;
256 }
258 /* Voluntarily yield the processor for this allocation. */
259 static long do_yield(void)
260 {
261 #ifdef ADV_SCHED_HISTO
262 adv_sched_hist_start(current->processor);
263 #endif
265 TRACE_2D(TRC_SCHED_YIELD, current->domain->id, current->eid);
266 __enter_scheduler();
267 return 0;
268 }
270 /*
271 * Demultiplex scheduler-related hypercalls.
272 */
273 long do_sched_op(unsigned long op)
274 {
275 long ret = 0;
277 switch ( op & SCHEDOP_cmdmask )
278 {
280 case SCHEDOP_yield:
281 {
282 ret = do_yield();
283 break;
284 }
286 case SCHEDOP_block:
287 {
288 ret = do_block();
289 break;
290 }
292 case SCHEDOP_shutdown:
293 {
294 TRACE_3D(TRC_SCHED_SHUTDOWN, current->domain->id, current->eid,
295 (op >> SCHEDOP_reasonshift));
296 domain_shutdown((u8)(op >> SCHEDOP_reasonshift));
297 break;
298 }
300 default:
301 ret = -ENOSYS;
302 }
304 return ret;
305 }
307 /* Per-domain one-shot-timer hypercall. */
308 long do_set_timer_op(s_time_t timeout)
309 {
310 struct exec_domain *ed = current;
312 rem_ac_timer(&ed->timer);
314 if ( (ed->timer.expires = timeout) != 0 )
315 add_ac_timer(&ed->timer);
317 return 0;
318 }
320 /** sched_id - fetch ID of current scheduler */
321 int sched_id()
322 {
323 return ops.sched_id;
324 }
326 long sched_ctl(struct sched_ctl_cmd *cmd)
327 {
328 if ( cmd->sched_id != ops.sched_id )
329 return -EINVAL;
331 SCHED_OP(control, cmd);
332 TRACE_0D(TRC_SCHED_CTL);
333 return 0;
334 }
337 /* Adjust scheduling parameter for a given domain. */
338 long sched_adjdom(struct sched_adjdom_cmd *cmd)
339 {
340 struct domain *d;
342 if ( cmd->sched_id != ops.sched_id )
343 return -EINVAL;
345 if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
346 return -EINVAL;
348 d = find_domain_by_id(cmd->domain);
349 if ( d == NULL )
350 return -ESRCH;
352 spin_lock_irq(&schedule_data[d->exec_domain[0]->processor].schedule_lock);
353 SCHED_OP(adjdom, d, cmd);
354 spin_unlock_irq(&schedule_data[d->exec_domain[0]->processor].schedule_lock);
356 TRACE_1D(TRC_SCHED_ADJDOM, d->id);
357 put_domain(d);
358 return 0;
359 }
361 /*
362 * The main function
363 * - deschedule the current domain (scheduler independent).
364 * - pick a new domain (scheduler dependent).
365 */
366 static void __enter_scheduler(void)
367 {
368 struct exec_domain *prev = current, *next = NULL;
369 int cpu = prev->processor;
370 s_time_t now;
371 struct task_slice next_slice;
372 s32 r_time; /* time for new dom to run */
374 perfc_incrc(sched_run);
376 spin_lock_irq(&schedule_data[cpu].schedule_lock);
378 #ifdef ADV_SCHED_HISTO
379 adv_sched_hist_from_stop(cpu);
380 #endif
381 now = NOW();
382 #ifdef ADV_SCHED_HISTO
383 adv_sched_hist_start(cpu);
384 #endif
386 rem_ac_timer(&schedule_data[cpu].s_timer);
388 ASSERT(!in_irq());
390 prev->cpu_time += now - prev->lastschd;
392 /* get policy-specific decision on scheduling... */
393 next_slice = ops.do_schedule(now);
395 r_time = next_slice.time;
396 next = next_slice.task;
398 schedule_data[cpu].curr = next;
400 next->lastschd = now;
402 /* reprogramm the timer */
403 schedule_data[cpu].s_timer.expires = now + r_time;
404 add_ac_timer(&schedule_data[cpu].s_timer);
406 /* Must be protected by the schedule_lock! */
407 set_bit(EDF_RUNNING, &next->ed_flags);
409 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
411 if ( unlikely(prev == next) ) {
412 #ifdef ADV_SCHED_HISTO
413 adv_sched_hist_to_stop(cpu);
414 #endif
415 return;
416 }
417 perfc_incrc(sched_ctx);
419 #if defined(WAKE_HISTO)
420 if ( !is_idle_task(next->domain) && next->wokenup ) {
421 ulong diff = (ulong)(now - next->wokenup);
422 diff /= (ulong)MILLISECS(1);
423 if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
424 else schedule_data[cpu].hist[BUCKETS-1]++;
425 }
426 next->wokenup = (s_time_t)0;
427 #elif defined(BLOCKTIME_HISTO)
428 prev->lastdeschd = now;
429 if ( !is_idle_task(next->domain) )
430 {
431 ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
432 if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
433 else schedule_data[cpu].hist[BUCKETS-1]++;
434 }
435 #endif
437 prev->sleep_tick = schedule_data[cpu].tick;
439 /* Ensure that the domain has an up-to-date time base. */
440 if ( !is_idle_task(next->domain) )
441 {
442 update_dom_time(next);
443 if ( next->sleep_tick != schedule_data[cpu].tick )
444 send_guest_virq(next, VIRQ_TIMER);
445 }
447 TRACE_4D(TRC_SCHED_SWITCH,
448 prev->domain->id, prev->eid,
449 next->domain->id, next->eid);
451 #ifdef ADV_SCHED_HISTO
452 adv_sched_hist_to_stop(cpu);
453 #endif
455 context_switch(prev, next);
456 }
458 /* No locking needed -- pointer comparison is safe :-) */
459 int idle_cpu(int cpu)
460 {
461 struct exec_domain *p = schedule_data[cpu].curr;
462 return p == idle_task[cpu];
463 }
466 /****************************************************************************
467 * Timers: the scheduler utilises a number of timers
468 * - s_timer: per CPU timer for preemption and scheduling decisions
469 * - t_timer: per CPU periodic timer to send timer interrupt to current dom
470 * - dom_timer: per domain timer to specifiy timeout values
471 ****************************************************************************/
473 /* The scheduler timer: force a run through the scheduler */
474 static void s_timer_fn(unsigned long unused)
475 {
476 #ifdef ADV_SCHED_HISTO
477 adv_sched_hist_start(current->processor);
478 #endif
480 raise_softirq(SCHEDULE_SOFTIRQ);
481 perfc_incrc(sched_irq);
482 }
484 /* Periodic tick timer: send timer event to current domain */
485 static void t_timer_fn(unsigned long unused)
486 {
487 struct exec_domain *ed = current;
488 unsigned int cpu = ed->processor;
490 schedule_data[cpu].tick++;
492 if ( !is_idle_task(ed->domain) )
493 {
494 update_dom_time(ed);
495 send_guest_virq(ed, VIRQ_TIMER);
496 }
498 page_scrub_schedule_work();
500 t_timer[cpu].expires = NOW() + MILLISECS(10);
501 add_ac_timer(&t_timer[cpu]);
502 }
504 /* Domain timer function, sends a virtual timer interrupt to domain */
505 static void dom_timer_fn(unsigned long data)
506 {
507 struct exec_domain *ed = (struct exec_domain *)data;
509 update_dom_time(ed);
510 send_guest_virq(ed, VIRQ_TIMER);
511 }
513 /* Initialise the data structures. */
514 void __init scheduler_init(void)
515 {
516 int i;
518 open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
520 for ( i = 0; i < NR_CPUS; i++ )
521 {
522 spin_lock_init(&schedule_data[i].schedule_lock);
523 schedule_data[i].curr = &idle0_exec_domain;
525 init_ac_timer(&schedule_data[i].s_timer);
526 schedule_data[i].s_timer.cpu = i;
527 schedule_data[i].s_timer.data = 2;
528 schedule_data[i].s_timer.function = &s_timer_fn;
530 init_ac_timer(&t_timer[i]);
531 t_timer[i].cpu = i;
532 t_timer[i].data = 3;
533 t_timer[i].function = &t_timer_fn;
534 }
536 schedule_data[0].idle = &idle0_exec_domain;
538 for ( i = 0; schedulers[i] != NULL; i++ )
539 {
540 ops = *schedulers[i];
541 if ( strcmp(ops.opt_name, opt_sched) == 0 )
542 break;
543 }
545 if ( schedulers[i] == NULL )
546 printk("Could not find scheduler: %s\n", opt_sched);
548 printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
550 if ( SCHED_OP(init_scheduler) < 0 )
551 panic("Initialising scheduler failed!");
552 }
554 /*
555 * Start a scheduler for each CPU
556 * This has to be done *after* the timers, e.g., APICs, have been initialised
557 */
558 void schedulers_start(void)
559 {
560 s_timer_fn(0);
561 smp_call_function((void *)s_timer_fn, NULL, 1, 1);
563 t_timer_fn(0);
564 smp_call_function((void *)t_timer_fn, NULL, 1, 1);
565 }
568 void dump_runq(unsigned char key)
569 {
570 s_time_t now = NOW();
571 int i;
572 unsigned long flags;
574 local_irq_save(flags);
576 printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
577 SCHED_OP(dump_settings);
578 printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
580 for ( i = 0; i < smp_num_cpus; i++ )
581 {
582 spin_lock(&schedule_data[i].schedule_lock);
583 printk("CPU[%02d] ", i);
584 SCHED_OP(dump_cpu_state,i);
585 spin_unlock(&schedule_data[i].schedule_lock);
586 }
588 local_irq_restore(flags);
589 }
591 #if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
592 void print_sched_histo(unsigned char key)
593 {
594 int i, j, k;
595 for ( k = 0; k < smp_num_cpus; k++ )
596 {
597 j = 0;
598 printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
599 for ( i = 0; i < BUCKETS; i++ )
600 {
601 if ( schedule_data[k].hist[i] != 0 )
602 {
603 if ( i < BUCKETS-1 )
604 printk("%2d:[%7u] ", i, schedule_data[k].hist[i]);
605 else
606 printk(" >:[%7u] ", schedule_data[k].hist[i]);
607 if ( !(++j % 5) )
608 printk("\n");
609 }
610 }
611 printk("\n");
612 }
614 }
615 void reset_sched_histo(unsigned char key)
616 {
617 int i, j;
618 for ( j = 0; j < smp_num_cpus; j++ )
619 for ( i=0; i < BUCKETS; i++ )
620 schedule_data[j].hist[i] = 0;
621 }
622 #else
623 #if defined(ADV_SCHED_HISTO)
624 void print_sched_histo(unsigned char key)
625 {
626 int i, j, k,t;
627 printf("Hello!\n");
628 for ( k = 0; k < smp_num_cpus; k++ )
629 {
630 j = 0;
631 t = 0;
632 printf ("CPU[%02d]: scheduler latency histogram FROM (ms:[count])\n", k);
633 for ( i = 0; i < BUCKETS; i++ )
634 {
635 //if ( schedule_data[k].hist[i] != 0 )
636 {
637 t += schedule_data[k].from_hist[i];
638 if ( i < BUCKETS-1 )
639 printk("%3d:[%7u] ", i, schedule_data[k].from_hist[i]);
640 else
641 printk(" >:[%7u] ", schedule_data[k].from_hist[i]);
642 //if ( !(++j % 5) )
643 printk("\n");
644 }
645 }
646 printk("\nTotal: %i\n",t);
647 }
648 for ( k = 0; k < smp_num_cpus; k++ )
649 {
650 j = 0; t = 0;
651 printf ("CPU[%02d]: scheduler latency histogram TO (ms:[count])\n", k);
652 for ( i = 0; i < BUCKETS; i++ )
653 {
654 //if ( schedule_data[k].hist[i] != 0 )
655 {
656 t += schedule_data[k].from_hist[i];
657 if ( i < BUCKETS-1 )
658 printk("%3d:[%7u] ", i, schedule_data[k].to_hist[i]);
659 else
660 printk(" >:[%7u] ", schedule_data[k].to_hist[i]);
661 //if ( !(++j % 5) )
662 printk("\n");
663 }
664 }
665 printk("\nTotal: %i\n",t);
666 }
668 }
669 void reset_sched_histo(unsigned char key)
670 {
671 int i, j;
672 for ( j = 0; j < smp_num_cpus; j++ ) {
673 for ( i=0; i < BUCKETS; i++ )
674 schedule_data[j].to_hist[i] = schedule_data[j].from_hist[i] = 0;
675 schedule_data[j].save_tsc = 0;
676 }
677 }
678 #else
679 void print_sched_histo(unsigned char key) { }
680 void reset_sched_histo(unsigned char key) { }
681 #endif
682 #endif
684 /*
685 * Local variables:
686 * mode: C
687 * c-set-style: "BSD"
688 * c-basic-offset: 4
689 * tab-width: 4
690 * indent-tabs-mode: nil
691 * End:
692 */