debuggers.hg

view xen/common/schedule.c @ 3705:4294cfa9fad3

bitkeeper revision 1.1159.212.95 (4204aa0ee0re5Xx1zWrJ9ejxzgRs3w)

Various cleanups. Remove PDB pending simpler GDB stub and/or NetBSD debugger.
Force emacs mode to appropriate tabbing in various files.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Sat Feb 05 11:12:14 2005 +0000 (2005-02-05)
parents 610068179f96
children 4f291c00af8c ed6875102ea3
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*-
2 ****************************************************************************
3 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2002-2003 University of Cambridge
5 * (C) 2004 - Mark Williamson - Intel Research Cambridge
6 ****************************************************************************
7 *
8 * File: common/schedule.c
9 * Author: Rolf Neugebauer & Keir Fraser
10 * Updated for generic API by Mark Williamson
11 *
12 * Description: Generic CPU scheduling code
13 * implements support functionality for the Xen scheduler API.
14 *
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/sched.h>
21 #include <xen/delay.h>
22 #include <xen/event.h>
23 #include <xen/time.h>
24 #include <xen/ac_timer.h>
25 #include <xen/perfc.h>
26 #include <xen/sched-if.h>
27 #include <xen/softirq.h>
28 #include <xen/trace.h>
29 #include <public/sched_ctl.h>
31 /* opt_sched: scheduler - default to Borrowed Virtual Time */
32 static char opt_sched[10] = "bvt";
33 string_param("sched", opt_sched);
35 /*#define WAKE_HISTO*/
36 /*#define BLOCKTIME_HISTO*/
38 #if defined(WAKE_HISTO)
39 #define BUCKETS 31
40 #elif defined(BLOCKTIME_HISTO)
41 #define BUCKETS 200
42 #endif
44 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
46 /*
47 * TODO MAW pull trace-related #defines out of here and into an auto-generated
48 * header file later on!
49 */
50 #define TRC_SCHED_DOM_ADD 0x00010000
51 #define TRC_SCHED_DOM_REM 0x00010001
52 #define TRC_SCHED_WAKE 0x00010002
53 #define TRC_SCHED_BLOCK 0x00010003
54 #define TRC_SCHED_YIELD 0x00010004
55 #define TRC_SCHED_SET_TIMER 0x00010005
56 #define TRC_SCHED_CTL 0x00010006
57 #define TRC_SCHED_ADJDOM 0x00010007
58 #define TRC_SCHED_RESCHED 0x00010008
59 #define TRC_SCHED_SWITCH 0x00010009
60 #define TRC_SCHED_S_TIMER_FN 0x0001000A
61 #define TRC_SCHED_T_TIMER_FN 0x0001000B
62 #define TRC_SCHED_DOM_TIMER_FN 0x0001000C
64 /* Various timer handlers. */
65 static void s_timer_fn(unsigned long unused);
66 static void t_timer_fn(unsigned long unused);
67 static void dom_timer_fn(unsigned long data);
69 /* This is global for now so that private implementations can reach it */
70 schedule_data_t schedule_data[NR_CPUS];
72 extern struct scheduler sched_bvt_def;
73 // extern struct scheduler sched_rrobin_def;
74 // extern struct scheduler sched_atropos_def;
75 static struct scheduler *schedulers[] = {
76 &sched_bvt_def,
77 // &sched_rrobin_def,
78 // &sched_atropos_def,
79 NULL
80 };
82 /* Operations for the current scheduler. */
83 static struct scheduler ops;
85 #define SCHED_OP(fn, ...) \
86 (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
87 : (typeof(ops.fn(__VA_ARGS__)))0 )
89 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
90 static struct ac_timer t_timer[NR_CPUS];
92 void free_domain_struct(struct domain *d)
93 {
94 struct exec_domain *ed;
96 SCHED_OP(free_task, d);
97 for_each_exec_domain(d, ed)
98 arch_free_exec_domain_struct(ed);
99 arch_free_domain_struct(d);
100 }
102 struct exec_domain *alloc_exec_domain_struct(struct domain *d,
103 unsigned long vcpu)
104 {
105 struct exec_domain *ed, *edc;
107 ASSERT( d->exec_domain[vcpu] == NULL );
109 if ( (ed = arch_alloc_exec_domain_struct()) == NULL )
110 return NULL;
112 memset(ed, 0, sizeof(*ed));
114 d->exec_domain[vcpu] = ed;
115 ed->domain = d;
116 ed->eid = vcpu;
118 if ( SCHED_OP(alloc_task, ed) < 0 )
119 goto out;
121 if (vcpu != 0) {
122 ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
124 for_each_exec_domain(d, edc) {
125 if (edc->ed_next_list == NULL || edc->ed_next_list->eid > vcpu)
126 break;
127 }
128 ed->ed_next_list = edc->ed_next_list;
129 edc->ed_next_list = ed;
131 if (test_bit(EDF_CPUPINNED, &edc->ed_flags)) {
132 ed->processor = (edc->processor + 1) % smp_num_cpus;
133 set_bit(EDF_CPUPINNED, &ed->ed_flags);
134 } else {
135 ed->processor = (edc->processor + 1) % smp_num_cpus; /* XXX */
136 }
137 }
139 return ed;
141 out:
142 d->exec_domain[vcpu] = NULL;
143 arch_free_exec_domain_struct(ed);
145 return NULL;
146 }
148 struct domain *alloc_domain_struct(void)
149 {
150 struct domain *d;
152 if ( (d = arch_alloc_domain_struct()) == NULL )
153 return NULL;
155 memset(d, 0, sizeof(*d));
157 if ( alloc_exec_domain_struct(d, 0) == NULL )
158 goto out;
160 return d;
162 out:
163 arch_free_domain_struct(d);
164 return NULL;
165 }
167 /*
168 * Add and remove a domain
169 */
170 void sched_add_domain(struct exec_domain *ed)
171 {
172 struct domain *d = ed->domain;
174 /* Must be unpaused by control software to start execution. */
175 set_bit(EDF_CTRLPAUSE, &ed->ed_flags);
177 if ( d->id != IDLE_DOMAIN_ID )
178 {
179 /* Initialise the per-domain timer. */
180 init_ac_timer(&ed->timer);
181 ed->timer.cpu = ed->processor;
182 ed->timer.data = (unsigned long)ed;
183 ed->timer.function = &dom_timer_fn;
184 }
185 else
186 {
187 schedule_data[ed->processor].idle = ed;
188 }
190 SCHED_OP(add_task, ed);
192 TRACE_2D(TRC_SCHED_DOM_ADD, d->id, ed);
193 }
195 void sched_rem_domain(struct exec_domain *ed)
196 {
198 rem_ac_timer(&ed->timer);
199 SCHED_OP(rem_task, ed);
200 TRACE_3D(TRC_SCHED_DOM_REM, ed->domain->id, ed->eid, ed);
201 }
203 void init_idle_task(void)
204 {
205 if ( SCHED_OP(init_idle_task, current) < 0 )
206 BUG();
207 }
209 void domain_sleep(struct exec_domain *d)
210 {
211 unsigned long flags;
213 spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
215 if ( likely(!domain_runnable(d)) )
216 SCHED_OP(sleep, d);
218 spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
220 /* Synchronous. */
221 while ( test_bit(EDF_RUNNING, &d->ed_flags) && !domain_runnable(d) )
222 {
223 smp_mb();
224 cpu_relax();
225 }
226 }
228 void domain_wake(struct exec_domain *ed)
229 {
230 unsigned long flags;
232 spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags);
234 if ( likely(domain_runnable(ed)) )
235 {
236 TRACE_2D(TRC_SCHED_WAKE, ed->domain->id, ed);
237 SCHED_OP(wake, ed);
238 #ifdef WAKE_HISTO
239 ed->wokenup = NOW();
240 #endif
241 }
243 clear_bit(EDF_MIGRATED, &ed->ed_flags);
245 spin_unlock_irqrestore(&schedule_data[ed->processor].schedule_lock, flags);
246 }
248 /* Block the currently-executing domain until a pertinent event occurs. */
249 long do_block(void)
250 {
251 ASSERT(current->domain->id != IDLE_DOMAIN_ID);
252 current->vcpu_info->evtchn_upcall_mask = 0;
253 set_bit(EDF_BLOCKED, &current->ed_flags);
254 TRACE_2D(TRC_SCHED_BLOCK, current->domain->id, current);
255 __enter_scheduler();
256 return 0;
257 }
259 /* Voluntarily yield the processor for this allocation. */
260 static long do_yield(void)
261 {
262 TRACE_2D(TRC_SCHED_YIELD, current->domain->id, current);
263 __enter_scheduler();
264 return 0;
265 }
267 /*
268 * Demultiplex scheduler-related hypercalls.
269 */
270 long do_sched_op(unsigned long op)
271 {
272 long ret = 0;
274 switch ( op & SCHEDOP_cmdmask )
275 {
277 case SCHEDOP_yield:
278 {
279 ret = do_yield();
280 break;
281 }
283 case SCHEDOP_block:
284 {
285 ret = do_block();
286 break;
287 }
289 case SCHEDOP_shutdown:
290 {
291 domain_shutdown((u8)(op >> SCHEDOP_reasonshift));
292 break;
293 }
295 default:
296 ret = -ENOSYS;
297 }
299 return ret;
300 }
302 /* Per-domain one-shot-timer hypercall. */
303 long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo)
304 {
305 struct exec_domain *p = current;
307 rem_ac_timer(&p->timer);
309 if ( (timeout_hi != 0) || (timeout_lo != 0) )
310 {
311 p->timer.expires = ((s_time_t)timeout_hi<<32) | ((s_time_t)timeout_lo);
312 add_ac_timer(&p->timer);
313 }
315 TRACE_5D(TRC_SCHED_SET_TIMER, p->domain->id, p->eid, p, timeout_hi,
316 timeout_lo);
318 return 0;
319 }
321 /** sched_id - fetch ID of current scheduler */
322 int sched_id()
323 {
324 return ops.sched_id;
325 }
327 long sched_ctl(struct sched_ctl_cmd *cmd)
328 {
329 TRACE_0D(TRC_SCHED_CTL);
331 if ( cmd->sched_id != ops.sched_id )
332 return -EINVAL;
334 return SCHED_OP(control, cmd);
335 }
338 /* Adjust scheduling parameter for a given domain. */
339 long sched_adjdom(struct sched_adjdom_cmd *cmd)
340 {
341 struct domain *d;
343 if ( cmd->sched_id != ops.sched_id )
344 return -EINVAL;
346 if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
347 return -EINVAL;
349 d = find_domain_by_id(cmd->domain);
350 if ( d == NULL )
351 return -ESRCH;
353 TRACE_1D(TRC_SCHED_ADJDOM, d->id);
355 spin_lock_irq(&schedule_data[d->exec_domain[0]->processor].schedule_lock);
356 SCHED_OP(adjdom, d, cmd);
357 spin_unlock_irq(&schedule_data[d->exec_domain[0]->processor].schedule_lock);
359 put_domain(d);
360 return 0;
361 }
363 /*
364 * The main function
365 * - deschedule the current domain (scheduler independent).
366 * - pick a new domain (scheduler dependent).
367 */
368 void __enter_scheduler(void)
369 {
370 struct exec_domain *prev = current, *next = NULL;
371 int cpu = prev->processor;
372 s_time_t now;
373 task_slice_t next_slice;
374 s32 r_time; /* time for new dom to run */
376 if ( !is_idle_task(current->domain) )
377 {
378 LOCK_BIGLOCK(current->domain);
379 cleanup_writable_pagetable(prev->domain);
380 UNLOCK_BIGLOCK(current->domain);
381 }
383 perfc_incrc(sched_run);
385 spin_lock_irq(&schedule_data[cpu].schedule_lock);
387 now = NOW();
389 rem_ac_timer(&schedule_data[cpu].s_timer);
391 ASSERT(!in_irq());
393 if ( test_bit(EDF_BLOCKED, &prev->ed_flags) )
394 {
395 /* This check is needed to avoid a race condition. */
396 if ( event_pending(prev) )
397 clear_bit(EDF_BLOCKED, &prev->ed_flags);
398 else
399 SCHED_OP(do_block, prev);
400 }
402 prev->cpu_time += now - prev->lastschd;
404 /* get policy-specific decision on scheduling... */
405 next_slice = ops.do_schedule(now);
407 r_time = next_slice.time;
408 next = next_slice.task;
410 schedule_data[cpu].curr = next;
412 next->lastschd = now;
414 /* reprogramm the timer */
415 schedule_data[cpu].s_timer.expires = now + r_time;
416 add_ac_timer(&schedule_data[cpu].s_timer);
418 /* Must be protected by the schedule_lock! */
419 set_bit(EDF_RUNNING, &next->ed_flags);
421 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
423 /* Ensure that the domain has an up-to-date time base. */
424 if ( !is_idle_task(next->domain) )
425 update_dom_time(next->domain);
427 if ( unlikely(prev == next) )
428 return;
430 perfc_incrc(sched_ctx);
432 #if defined(WAKE_HISTO)
433 if ( !is_idle_task(next) && next->wokenup ) {
434 ulong diff = (ulong)(now - next->wokenup);
435 diff /= (ulong)MILLISECS(1);
436 if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
437 else schedule_data[cpu].hist[BUCKETS-1]++;
438 }
439 next->wokenup = (s_time_t)0;
440 #elif defined(BLOCKTIME_HISTO)
441 prev->lastdeschd = now;
442 if ( !is_idle_task(next) )
443 {
444 ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
445 if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
446 else schedule_data[cpu].hist[BUCKETS-1]++;
447 }
448 #endif
450 TRACE_2D(TRC_SCHED_SWITCH, next->domain->id, next);
452 switch_to(prev, next);
454 /*
455 * We do this late on because it doesn't need to be protected by the
456 * schedule_lock, and because we want this to be the very last use of
457 * 'prev' (after this point, a dying domain's info structure may be freed
458 * without warning).
459 */
460 clear_bit(EDF_RUNNING, &prev->ed_flags);
462 /* Mark a timer event for the newly-scheduled domain. */
463 if ( !is_idle_task(next->domain) )
464 send_guest_virq(next, VIRQ_TIMER);
466 schedule_tail(next);
468 BUG();
469 }
471 /* No locking needed -- pointer comparison is safe :-) */
472 int idle_cpu(int cpu)
473 {
474 struct exec_domain *p = schedule_data[cpu].curr;
475 return p == idle_task[cpu];
476 }
479 /****************************************************************************
480 * Timers: the scheduler utilises a number of timers
481 * - s_timer: per CPU timer for preemption and scheduling decisions
482 * - t_timer: per CPU periodic timer to send timer interrupt to current dom
483 * - dom_timer: per domain timer to specifiy timeout values
484 ****************************************************************************/
486 /* The scheduler timer: force a run through the scheduler*/
487 static void s_timer_fn(unsigned long unused)
488 {
489 TRACE_0D(TRC_SCHED_S_TIMER_FN);
490 raise_softirq(SCHEDULE_SOFTIRQ);
491 perfc_incrc(sched_irq);
492 }
494 /* Periodic tick timer: send timer event to current domain*/
495 static void t_timer_fn(unsigned long unused)
496 {
497 struct exec_domain *ed = current;
499 TRACE_0D(TRC_SCHED_T_TIMER_FN);
501 if ( !is_idle_task(ed->domain) )
502 {
503 update_dom_time(ed->domain);
504 send_guest_virq(ed, VIRQ_TIMER);
505 }
507 t_timer[ed->processor].expires = NOW() + MILLISECS(10);
508 add_ac_timer(&t_timer[ed->processor]);
509 }
511 /* Domain timer function, sends a virtual timer interrupt to domain */
512 static void dom_timer_fn(unsigned long data)
513 {
514 struct exec_domain *ed = (struct exec_domain *)data;
516 TRACE_0D(TRC_SCHED_DOM_TIMER_FN);
517 update_dom_time(ed->domain);
518 send_guest_virq(ed, VIRQ_TIMER);
519 }
521 /* Initialise the data structures. */
522 void __init scheduler_init(void)
523 {
524 int i;
526 open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
528 for ( i = 0; i < NR_CPUS; i++ )
529 {
530 spin_lock_init(&schedule_data[i].schedule_lock);
531 schedule_data[i].curr = &idle0_exec_domain;
533 init_ac_timer(&schedule_data[i].s_timer);
534 schedule_data[i].s_timer.cpu = i;
535 schedule_data[i].s_timer.data = 2;
536 schedule_data[i].s_timer.function = &s_timer_fn;
538 init_ac_timer(&t_timer[i]);
539 t_timer[i].cpu = i;
540 t_timer[i].data = 3;
541 t_timer[i].function = &t_timer_fn;
542 }
544 schedule_data[0].idle = &idle0_exec_domain;
546 for ( i = 0; schedulers[i] != NULL; i++ )
547 {
548 ops = *schedulers[i];
549 if ( strcmp(ops.opt_name, opt_sched) == 0 )
550 break;
551 }
553 if ( schedulers[i] == NULL )
554 printk("Could not find scheduler: %s\n", opt_sched);
556 printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
558 if ( SCHED_OP(init_scheduler) < 0 )
559 panic("Initialising scheduler failed!");
560 }
562 /*
563 * Start a scheduler for each CPU
564 * This has to be done *after* the timers, e.g., APICs, have been initialised
565 */
566 void schedulers_start(void)
567 {
568 s_timer_fn(0);
569 smp_call_function((void *)s_timer_fn, NULL, 1, 1);
571 t_timer_fn(0);
572 smp_call_function((void *)t_timer_fn, NULL, 1, 1);
573 }
576 void dump_runq(unsigned char key)
577 {
578 s_time_t now = NOW();
579 int i;
580 unsigned long flags;
582 local_irq_save(flags);
584 printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
585 SCHED_OP(dump_settings);
586 printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
588 for ( i = 0; i < smp_num_cpus; i++ )
589 {
590 spin_lock(&schedule_data[i].schedule_lock);
591 printk("CPU[%02d] ", i);
592 SCHED_OP(dump_cpu_state,i);
593 spin_unlock(&schedule_data[i].schedule_lock);
594 }
596 local_irq_restore(flags);
597 }
599 #if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
600 void print_sched_histo(unsigned char key)
601 {
602 int i, j, k;
603 for ( k = 0; k < smp_num_cpus; k++ )
604 {
605 j = 0;
606 printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
607 for ( i = 0; i < BUCKETS; i++ )
608 {
609 if ( schedule_data[k].hist[i] != 0 )
610 {
611 if ( i < BUCKETS-1 )
612 printk("%2d:[%7u] ", i, schedule_data[k].hist[i]);
613 else
614 printk(" >:[%7u] ", schedule_data[k].hist[i]);
615 if ( !(++j % 5) )
616 printk("\n");
617 }
618 }
619 printk("\n");
620 }
622 }
623 void reset_sched_histo(unsigned char key)
624 {
625 int i, j;
626 for ( j = 0; j < smp_num_cpus; j++ )
627 for ( i=0; i < BUCKETS; i++ )
628 schedule_data[j].hist[i] = 0;
629 }
630 #else
631 void print_sched_histo(unsigned char key) { }
632 void reset_sched_histo(unsigned char key) { }
633 #endif