debuggers.hg

view xen/common/schedule.c @ 3515:d331c6994d28

bitkeeper revision 1.1159.223.12 (41f14d3cE4GADmEAEr6XE9nXX4dyGw)

Common-code cleanups. Moved arch-specific code out into arch/x86
and asm-x86.
author kaf24@scramble.cl.cam.ac.uk
date Fri Jan 21 18:43:08 2005 +0000 (2005-01-21)
parents 60e5912b6b28
children 46c14b1a4351 c4a7f635b2cd
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
2 ****************************************************************************
3 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2002-2003 University of Cambridge
5 * (C) 2004 - Mark Williamson - Intel Research Cambridge
6 ****************************************************************************
7 *
8 * File: common/schedule.c
9 * Author: Rolf Neugebauer & Keir Fraser
10 * Updated for generic API by Mark Williamson
11 *
12 * Description: Generic CPU scheduling code
13 * implements support functionality for the Xen scheduler API.
14 *
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/sched.h>
21 #include <xen/delay.h>
22 #include <xen/event.h>
23 #include <xen/time.h>
24 #include <xen/ac_timer.h>
25 #include <xen/perfc.h>
26 #include <xen/sched-if.h>
27 #include <xen/softirq.h>
28 #include <xen/trace.h>
29 #include <public/sched_ctl.h>
31 /* opt_sched: scheduler - default to Borrowed Virtual Time */
32 static char opt_sched[10] = "bvt";
33 string_param("sched", opt_sched);
35 /*#define WAKE_HISTO*/
36 /*#define BLOCKTIME_HISTO*/
38 #if defined(WAKE_HISTO)
39 #define BUCKETS 31
40 #elif defined(BLOCKTIME_HISTO)
41 #define BUCKETS 200
42 #endif
44 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
46 /*
47 * TODO MAW pull trace-related #defines out of here and into an auto-generated
48 * header file later on!
49 */
50 #define TRC_SCHED_DOM_ADD 0x00010000
51 #define TRC_SCHED_DOM_REM 0x00010001
52 #define TRC_SCHED_WAKE 0x00010002
53 #define TRC_SCHED_BLOCK 0x00010003
54 #define TRC_SCHED_YIELD 0x00010004
55 #define TRC_SCHED_SET_TIMER 0x00010005
56 #define TRC_SCHED_CTL 0x00010006
57 #define TRC_SCHED_ADJDOM 0x00010007
58 #define TRC_SCHED_RESCHED 0x00010008
59 #define TRC_SCHED_SWITCH 0x00010009
60 #define TRC_SCHED_S_TIMER_FN 0x0001000A
61 #define TRC_SCHED_T_TIMER_FN 0x0001000B
62 #define TRC_SCHED_DOM_TIMER_FN 0x0001000C
64 /* Various timer handlers. */
65 static void s_timer_fn(unsigned long unused);
66 static void t_timer_fn(unsigned long unused);
67 static void dom_timer_fn(unsigned long data);
69 /* This is global for now so that private implementations can reach it */
70 schedule_data_t schedule_data[NR_CPUS];
72 extern struct scheduler sched_bvt_def;
73 extern struct scheduler sched_rrobin_def;
74 extern struct scheduler sched_atropos_def;
75 static struct scheduler *schedulers[] = {
76 &sched_bvt_def,
77 &sched_rrobin_def,
78 &sched_atropos_def,
79 NULL
80 };
82 /* Operations for the current scheduler. */
83 static struct scheduler ops;
85 #define SCHED_OP(fn, ...) \
86 (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
87 : (typeof(ops.fn(__VA_ARGS__)))0 )
89 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
90 static struct ac_timer t_timer[NR_CPUS];
92 void free_domain_struct(struct domain *d)
93 {
94 SCHED_OP(free_task, d);
95 arch_free_domain_struct(d);
96 }
98 struct domain *alloc_domain_struct(void)
99 {
100 struct domain *d;
102 if ( (d = arch_alloc_domain_struct()) == NULL )
103 return NULL;
105 memset(d, 0, sizeof(*d));
107 if ( SCHED_OP(alloc_task, d) < 0 )
108 {
109 arch_free_domain_struct(d);
110 return NULL;
111 }
113 return d;
114 }
116 /*
117 * Add and remove a domain
118 */
119 void sched_add_domain(struct domain *d)
120 {
121 /* Must be unpaused by control software to start execution. */
122 set_bit(DF_CTRLPAUSE, &d->flags);
124 if ( d->id != IDLE_DOMAIN_ID )
125 {
126 /* Initialise the per-domain timer. */
127 init_ac_timer(&d->timer);
128 d->timer.cpu = d->processor;
129 d->timer.data = (unsigned long)d;
130 d->timer.function = &dom_timer_fn;
131 }
132 else
133 {
134 schedule_data[d->processor].idle = d;
135 }
137 SCHED_OP(add_task, d);
139 TRACE_2D(TRC_SCHED_DOM_ADD, d->id, d);
140 }
142 void sched_rem_domain(struct domain *d)
143 {
144 rem_ac_timer(&d->timer);
145 SCHED_OP(rem_task, d);
146 TRACE_2D(TRC_SCHED_DOM_REM, d->id, d);
147 }
149 void init_idle_task(void)
150 {
151 if ( SCHED_OP(init_idle_task, current) < 0 )
152 BUG();
153 }
155 void domain_sleep(struct domain *d)
156 {
157 unsigned long flags;
159 spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
161 if ( likely(!domain_runnable(d)) )
162 SCHED_OP(sleep, d);
164 spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
166 /* Synchronous. */
167 while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) )
168 {
169 smp_mb();
170 cpu_relax();
171 }
172 }
174 void domain_wake(struct domain *d)
175 {
176 unsigned long flags;
178 spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
180 if ( likely(domain_runnable(d)) )
181 {
182 TRACE_2D(TRC_SCHED_WAKE, d->id, d);
183 SCHED_OP(wake, d);
184 #ifdef WAKE_HISTO
185 d->wokenup = NOW();
186 #endif
187 }
189 clear_bit(DF_MIGRATED, &d->flags);
191 spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
192 }
194 /* Block the currently-executing domain until a pertinent event occurs. */
195 long do_block(void)
196 {
197 ASSERT(current->id != IDLE_DOMAIN_ID);
198 current->shared_info->vcpu_data[0].evtchn_upcall_mask = 0;
199 set_bit(DF_BLOCKED, &current->flags);
200 TRACE_2D(TRC_SCHED_BLOCK, current->id, current);
201 __enter_scheduler();
202 return 0;
203 }
205 /* Voluntarily yield the processor for this allocation. */
206 static long do_yield(void)
207 {
208 TRACE_2D(TRC_SCHED_YIELD, current->id, current);
209 __enter_scheduler();
210 return 0;
211 }
213 /*
214 * Demultiplex scheduler-related hypercalls.
215 */
216 long do_sched_op(unsigned long op)
217 {
218 long ret = 0;
220 switch ( op & SCHEDOP_cmdmask )
221 {
223 case SCHEDOP_yield:
224 {
225 ret = do_yield();
226 break;
227 }
229 case SCHEDOP_block:
230 {
231 ret = do_block();
232 break;
233 }
235 case SCHEDOP_shutdown:
236 {
237 domain_shutdown((u8)(op >> SCHEDOP_reasonshift));
238 break;
239 }
241 default:
242 ret = -ENOSYS;
243 }
245 return ret;
246 }
248 /* Per-domain one-shot-timer hypercall. */
249 long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo)
250 {
251 struct domain *p = current;
253 rem_ac_timer(&p->timer);
255 if ( (timeout_hi != 0) || (timeout_lo != 0) )
256 {
257 p->timer.expires = ((s_time_t)timeout_hi<<32) | ((s_time_t)timeout_lo);
258 add_ac_timer(&p->timer);
259 }
261 TRACE_4D(TRC_SCHED_SET_TIMER, p->id, p, timeout_hi, timeout_lo);
263 return 0;
264 }
266 /** sched_id - fetch ID of current scheduler */
267 int sched_id()
268 {
269 return ops.sched_id;
270 }
272 long sched_ctl(struct sched_ctl_cmd *cmd)
273 {
274 TRACE_0D(TRC_SCHED_CTL);
276 if ( cmd->sched_id != ops.sched_id )
277 return -EINVAL;
279 return SCHED_OP(control, cmd);
280 }
283 /* Adjust scheduling parameter for a given domain. */
284 long sched_adjdom(struct sched_adjdom_cmd *cmd)
285 {
286 struct domain *d;
288 if ( cmd->sched_id != ops.sched_id )
289 return -EINVAL;
291 if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
292 return -EINVAL;
294 d = find_domain_by_id(cmd->domain);
295 if ( d == NULL )
296 return -ESRCH;
298 TRACE_1D(TRC_SCHED_ADJDOM, d->id);
300 spin_lock_irq(&schedule_data[d->processor].schedule_lock);
301 SCHED_OP(adjdom, d, cmd);
302 spin_unlock_irq(&schedule_data[d->processor].schedule_lock);
304 put_domain(d);
305 return 0;
306 }
308 /*
309 * The main function
310 * - deschedule the current domain (scheduler independent).
311 * - pick a new domain (scheduler dependent).
312 */
313 void __enter_scheduler(void)
314 {
315 struct domain *prev = current, *next = NULL;
316 int cpu = prev->processor;
317 s_time_t now;
318 task_slice_t next_slice;
319 s32 r_time; /* time for new dom to run */
321 cleanup_writable_pagetable(prev);
323 perfc_incrc(sched_run);
325 spin_lock_irq(&schedule_data[cpu].schedule_lock);
327 now = NOW();
329 rem_ac_timer(&schedule_data[cpu].s_timer);
331 ASSERT(!in_irq());
333 if ( test_bit(DF_BLOCKED, &prev->flags) )
334 {
335 /* This check is needed to avoid a race condition. */
336 if ( event_pending(prev) )
337 clear_bit(DF_BLOCKED, &prev->flags);
338 else
339 SCHED_OP(do_block, prev);
340 }
342 prev->cpu_time += now - prev->lastschd;
344 /* get policy-specific decision on scheduling... */
345 next_slice = ops.do_schedule(now);
347 r_time = next_slice.time;
348 next = next_slice.task;
350 schedule_data[cpu].curr = next;
352 next->lastschd = now;
354 /* reprogramm the timer */
355 schedule_data[cpu].s_timer.expires = now + r_time;
356 add_ac_timer(&schedule_data[cpu].s_timer);
358 /* Must be protected by the schedule_lock! */
359 set_bit(DF_RUNNING, &next->flags);
361 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
363 /* Ensure that the domain has an up-to-date time base. */
364 if ( !is_idle_task(next) )
365 update_dom_time(next->shared_info);
367 if ( unlikely(prev == next) )
368 return;
370 perfc_incrc(sched_ctx);
372 #if defined(WAKE_HISTO)
373 if ( !is_idle_task(next) && next->wokenup ) {
374 ulong diff = (ulong)(now - next->wokenup);
375 diff /= (ulong)MILLISECS(1);
376 if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
377 else schedule_data[cpu].hist[BUCKETS-1]++;
378 }
379 next->wokenup = (s_time_t)0;
380 #elif defined(BLOCKTIME_HISTO)
381 prev->lastdeschd = now;
382 if ( !is_idle_task(next) )
383 {
384 ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
385 if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
386 else schedule_data[cpu].hist[BUCKETS-1]++;
387 }
388 #endif
390 TRACE_2D(TRC_SCHED_SWITCH, next->id, next);
392 switch_to(prev, next);
394 /*
395 * We do this late on because it doesn't need to be protected by the
396 * schedule_lock, and because we want this to be the very last use of
397 * 'prev' (after this point, a dying domain's info structure may be freed
398 * without warning).
399 */
400 clear_bit(DF_RUNNING, &prev->flags);
402 /* Mark a timer event for the newly-scheduled domain. */
403 if ( !is_idle_task(next) )
404 send_guest_virq(next, VIRQ_TIMER);
406 schedule_tail(next);
408 BUG();
409 }
411 /* No locking needed -- pointer comparison is safe :-) */
412 int idle_cpu(int cpu)
413 {
414 struct domain *p = schedule_data[cpu].curr;
415 return p == idle_task[cpu];
416 }
419 /****************************************************************************
420 * Timers: the scheduler utilises a number of timers
421 * - s_timer: per CPU timer for preemption and scheduling decisions
422 * - t_timer: per CPU periodic timer to send timer interrupt to current dom
423 * - dom_timer: per domain timer to specifiy timeout values
424 ****************************************************************************/
426 /* The scheduler timer: force a run through the scheduler*/
427 static void s_timer_fn(unsigned long unused)
428 {
429 TRACE_0D(TRC_SCHED_S_TIMER_FN);
430 raise_softirq(SCHEDULE_SOFTIRQ);
431 perfc_incrc(sched_irq);
432 }
434 /* Periodic tick timer: send timer event to current domain*/
435 static void t_timer_fn(unsigned long unused)
436 {
437 struct domain *d = current;
439 TRACE_0D(TRC_SCHED_T_TIMER_FN);
441 if ( !is_idle_task(d) )
442 {
443 update_dom_time(d->shared_info);
444 send_guest_virq(d, VIRQ_TIMER);
445 }
447 t_timer[d->processor].expires = NOW() + MILLISECS(10);
448 add_ac_timer(&t_timer[d->processor]);
449 }
451 /* Domain timer function, sends a virtual timer interrupt to domain */
452 static void dom_timer_fn(unsigned long data)
453 {
454 struct domain *d = (struct domain *)data;
455 TRACE_0D(TRC_SCHED_DOM_TIMER_FN);
456 update_dom_time(d->shared_info);
457 send_guest_virq(d, VIRQ_TIMER);
458 }
460 /* Initialise the data structures. */
461 void __init scheduler_init(void)
462 {
463 int i;
465 open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
467 for ( i = 0; i < NR_CPUS; i++ )
468 {
469 spin_lock_init(&schedule_data[i].schedule_lock);
470 schedule_data[i].curr = &idle0_task;
472 init_ac_timer(&schedule_data[i].s_timer);
473 schedule_data[i].s_timer.cpu = i;
474 schedule_data[i].s_timer.data = 2;
475 schedule_data[i].s_timer.function = &s_timer_fn;
477 init_ac_timer(&t_timer[i]);
478 t_timer[i].cpu = i;
479 t_timer[i].data = 3;
480 t_timer[i].function = &t_timer_fn;
481 }
483 schedule_data[0].idle = &idle0_task;
485 for ( i = 0; schedulers[i] != NULL; i++ )
486 {
487 ops = *schedulers[i];
488 if ( strcmp(ops.opt_name, opt_sched) == 0 )
489 break;
490 }
492 if ( schedulers[i] == NULL )
493 printk("Could not find scheduler: %s\n", opt_sched);
495 printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
497 if ( SCHED_OP(init_scheduler) < 0 )
498 panic("Initialising scheduler failed!");
499 }
501 /*
502 * Start a scheduler for each CPU
503 * This has to be done *after* the timers, e.g., APICs, have been initialised
504 */
505 void schedulers_start(void)
506 {
507 s_timer_fn(0);
508 smp_call_function((void *)s_timer_fn, NULL, 1, 1);
510 t_timer_fn(0);
511 smp_call_function((void *)t_timer_fn, NULL, 1, 1);
512 }
515 void dump_runq(unsigned char key)
516 {
517 s_time_t now = NOW();
518 int i;
519 unsigned long flags;
521 local_irq_save(flags);
523 printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
524 SCHED_OP(dump_settings);
525 printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
527 for ( i = 0; i < smp_num_cpus; i++ )
528 {
529 spin_lock(&schedule_data[i].schedule_lock);
530 printk("CPU[%02d] ", i);
531 SCHED_OP(dump_cpu_state,i);
532 spin_unlock(&schedule_data[i].schedule_lock);
533 }
535 local_irq_restore(flags);
536 }
538 #if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
539 void print_sched_histo(unsigned char key)
540 {
541 int i, j, k;
542 for ( k = 0; k < smp_num_cpus; k++ )
543 {
544 j = 0;
545 printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
546 for ( i = 0; i < BUCKETS; i++ )
547 {
548 if ( schedule_data[k].hist[i] != 0 )
549 {
550 if ( i < BUCKETS-1 )
551 printk("%2d:[%7u] ", i, schedule_data[k].hist[i]);
552 else
553 printk(" >:[%7u] ", schedule_data[k].hist[i]);
554 if ( !(++j % 5) )
555 printk("\n");
556 }
557 }
558 printk("\n");
559 }
561 }
562 void reset_sched_histo(unsigned char key)
563 {
564 int i, j;
565 for ( j = 0; j < smp_num_cpus; j++ )
566 for ( i=0; i < BUCKETS; i++ )
567 schedule_data[j].hist[i] = 0;
568 }
569 #else
570 void print_sched_histo(unsigned char key) { }
571 void reset_sched_histo(unsigned char key) { }
572 #endif