debuggers.hg

view xen/common/schedule.c @ 20967:f5fba6214a20

Remove hardcoded instances of TIMER_SLOP.

They aren't needed at all, since slop now only delays a timer firing,
rather than allowing it to happen early.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 10 13:27:55 2010 +0000 (2010-02-10)
parents 5a224e101cb3
children cbb147631e8c
line source
1 /****************************************************************************
2 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
3 * (C) 2002-2003 University of Cambridge
4 * (C) 2004 - Mark Williamson - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: common/schedule.c
8 * Author: Rolf Neugebauer & Keir Fraser
9 * Updated for generic API by Mark Williamson
10 *
11 * Description: Generic CPU scheduling code
12 * implements support functionality for the Xen scheduler API.
13 *
14 */
16 #ifndef COMPAT
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/sched.h>
21 #include <xen/domain.h>
22 #include <xen/delay.h>
23 #include <xen/event.h>
24 #include <xen/time.h>
25 #include <xen/timer.h>
26 #include <xen/perfc.h>
27 #include <xen/sched-if.h>
28 #include <xen/softirq.h>
29 #include <xen/trace.h>
30 #include <xen/mm.h>
31 #include <xen/errno.h>
32 #include <xen/guest_access.h>
33 #include <xen/multicall.h>
34 #include <public/sched.h>
35 #include <xsm/xsm.h>
37 /* opt_sched: scheduler - default to credit */
38 static char __initdata opt_sched[10] = "credit";
39 string_param("sched", opt_sched);
41 /* if sched_smt_power_savings is set,
42 * scheduler will give preferrence to partially idle package compared to
43 * the full idle package, when picking pCPU to schedule vCPU.
44 */
45 int sched_smt_power_savings = 0;
46 boolean_param("sched_smt_power_savings", sched_smt_power_savings);
48 /* Various timer handlers. */
49 static void s_timer_fn(void *unused);
50 static void vcpu_periodic_timer_fn(void *data);
51 static void vcpu_singleshot_timer_fn(void *data);
52 static void poll_timer_fn(void *data);
54 /* This is global for now so that private implementations can reach it */
55 DEFINE_PER_CPU(struct schedule_data, schedule_data);
57 extern const struct scheduler sched_sedf_def;
58 extern const struct scheduler sched_credit_def;
59 static const struct scheduler *__initdata schedulers[] = {
60 &sched_sedf_def,
61 &sched_credit_def,
62 NULL
63 };
65 static struct scheduler __read_mostly ops;
67 #define SCHED_OP(fn, ...) \
68 (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
69 : (typeof(ops.fn(__VA_ARGS__)))0 )
71 static inline void trace_runstate_change(struct vcpu *v, int new_state)
72 {
73 struct { uint32_t vcpu:16, domain:16; } d;
74 uint32_t event;
76 if ( likely(!tb_init_done) )
77 return;
79 d.vcpu = v->vcpu_id;
80 d.domain = v->domain->domain_id;
82 event = TRC_SCHED_RUNSTATE_CHANGE;
83 event |= ( v->runstate.state & 0x3 ) << 8;
84 event |= ( new_state & 0x3 ) << 4;
86 __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
87 }
89 static inline void trace_continue_running(struct vcpu *v)
90 {
91 struct { uint32_t vcpu:16, domain:16; } d;
93 if ( likely(!tb_init_done) )
94 return;
96 d.vcpu = v->vcpu_id;
97 d.domain = v->domain->domain_id;
99 __trace_var(TRC_SCHED_CONTINUE_RUNNING, 1/*tsc*/, sizeof(d),
100 (unsigned char *)&d);
101 }
103 static inline void vcpu_runstate_change(
104 struct vcpu *v, int new_state, s_time_t new_entry_time)
105 {
106 s_time_t delta;
108 ASSERT(v->runstate.state != new_state);
109 ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
111 trace_runstate_change(v, new_state);
113 delta = new_entry_time - v->runstate.state_entry_time;
114 if ( delta > 0 )
115 {
116 v->runstate.time[v->runstate.state] += delta;
117 v->runstate.state_entry_time = new_entry_time;
118 }
120 v->runstate.state = new_state;
121 }
123 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
124 {
125 s_time_t delta;
127 if ( unlikely(v != current) )
128 vcpu_schedule_lock_irq(v);
130 memcpy(runstate, &v->runstate, sizeof(*runstate));
131 delta = NOW() - runstate->state_entry_time;
132 if ( delta > 0 )
133 runstate->time[runstate->state] += delta;
135 if ( unlikely(v != current) )
136 vcpu_schedule_unlock_irq(v);
137 }
139 uint64_t get_cpu_idle_time(unsigned int cpu)
140 {
141 struct vcpu_runstate_info state;
142 struct vcpu *v;
144 if ( (v = idle_vcpu[cpu]) == NULL )
145 return 0;
147 vcpu_runstate_get(v, &state);
148 return state.time[RUNSTATE_running];
149 }
151 int sched_init_vcpu(struct vcpu *v, unsigned int processor)
152 {
153 struct domain *d = v->domain;
155 /*
156 * Initialize processor and affinity settings. The idler, and potentially
157 * domain-0 VCPUs, are pinned onto their respective physical CPUs.
158 */
159 v->processor = processor;
160 if ( is_idle_domain(d) || d->is_pinned )
161 v->cpu_affinity = cpumask_of_cpu(processor);
162 else
163 cpus_setall(v->cpu_affinity);
165 /* Initialise the per-vcpu timers. */
166 init_timer(&v->periodic_timer, vcpu_periodic_timer_fn,
167 v, v->processor);
168 init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn,
169 v, v->processor);
170 init_timer(&v->poll_timer, poll_timer_fn,
171 v, v->processor);
173 /* Idle VCPUs are scheduled immediately. */
174 if ( is_idle_domain(d) )
175 {
176 per_cpu(schedule_data, v->processor).curr = v;
177 per_cpu(schedule_data, v->processor).idle = v;
178 v->is_running = 1;
179 }
181 TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
183 return SCHED_OP(init_vcpu, v);
184 }
186 void sched_destroy_vcpu(struct vcpu *v)
187 {
188 kill_timer(&v->periodic_timer);
189 kill_timer(&v->singleshot_timer);
190 kill_timer(&v->poll_timer);
191 SCHED_OP(destroy_vcpu, v);
192 }
194 int sched_init_domain(struct domain *d)
195 {
196 return SCHED_OP(init_domain, d);
197 }
199 void sched_destroy_domain(struct domain *d)
200 {
201 SCHED_OP(destroy_domain, d);
202 }
204 void vcpu_sleep_nosync(struct vcpu *v)
205 {
206 unsigned long flags;
208 vcpu_schedule_lock_irqsave(v, flags);
210 if ( likely(!vcpu_runnable(v)) )
211 {
212 if ( v->runstate.state == RUNSTATE_runnable )
213 vcpu_runstate_change(v, RUNSTATE_offline, NOW());
215 SCHED_OP(sleep, v);
216 }
218 vcpu_schedule_unlock_irqrestore(v, flags);
220 TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
221 }
223 void vcpu_sleep_sync(struct vcpu *v)
224 {
225 vcpu_sleep_nosync(v);
227 while ( !vcpu_runnable(v) && v->is_running )
228 cpu_relax();
230 sync_vcpu_execstate(v);
231 }
233 void vcpu_wake(struct vcpu *v)
234 {
235 unsigned long flags;
237 vcpu_schedule_lock_irqsave(v, flags);
239 if ( likely(vcpu_runnable(v)) )
240 {
241 if ( v->runstate.state >= RUNSTATE_blocked )
242 vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
243 SCHED_OP(wake, v);
244 }
245 else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
246 {
247 if ( v->runstate.state == RUNSTATE_blocked )
248 vcpu_runstate_change(v, RUNSTATE_offline, NOW());
249 }
251 vcpu_schedule_unlock_irqrestore(v, flags);
253 TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
254 }
256 void vcpu_unblock(struct vcpu *v)
257 {
258 if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
259 return;
261 /* Polling period ends when a VCPU is unblocked. */
262 if ( unlikely(v->poll_evtchn != 0) )
263 {
264 v->poll_evtchn = 0;
265 /*
266 * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
267 * this VCPU (and it then going back to sleep on poll_mask).
268 * Test-and-clear is idiomatic and ensures clear_bit not reordered.
269 */
270 if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
271 clear_bit(_VPF_blocked, &v->pause_flags);
272 }
274 vcpu_wake(v);
275 }
277 static void vcpu_migrate(struct vcpu *v)
278 {
279 unsigned long flags;
280 int old_cpu;
282 vcpu_schedule_lock_irqsave(v, flags);
284 /*
285 * NB. Check of v->running happens /after/ setting migration flag
286 * because they both happen in (different) spinlock regions, and those
287 * regions are strictly serialised.
288 */
289 if ( v->is_running ||
290 !test_and_clear_bit(_VPF_migrating, &v->pause_flags) )
291 {
292 vcpu_schedule_unlock_irqrestore(v, flags);
293 return;
294 }
296 /* Switch to new CPU, then unlock old CPU. */
297 old_cpu = v->processor;
298 v->processor = SCHED_OP(pick_cpu, v);
299 spin_unlock_irqrestore(
300 &per_cpu(schedule_data, old_cpu).schedule_lock, flags);
302 /* Wake on new CPU. */
303 vcpu_wake(v);
304 }
306 /*
307 * Force a VCPU through a deschedule/reschedule path.
308 * For example, using this when setting the periodic timer period means that
309 * most periodic-timer state need only be touched from within the scheduler
310 * which can thus be done without need for synchronisation.
311 */
312 void vcpu_force_reschedule(struct vcpu *v)
313 {
314 vcpu_schedule_lock_irq(v);
315 if ( v->is_running )
316 set_bit(_VPF_migrating, &v->pause_flags);
317 vcpu_schedule_unlock_irq(v);
319 if ( test_bit(_VPF_migrating, &v->pause_flags) )
320 {
321 vcpu_sleep_nosync(v);
322 vcpu_migrate(v);
323 }
324 }
326 /*
327 * This function is used by cpu_hotplug code from stop_machine context.
328 * Hence we can avoid needing to take the
329 */
330 void cpu_disable_scheduler(void)
331 {
332 struct domain *d;
333 struct vcpu *v;
334 unsigned int cpu = smp_processor_id();
336 for_each_domain ( d )
337 {
338 for_each_vcpu ( d, v )
339 {
340 if ( is_idle_vcpu(v) )
341 continue;
343 if ( (cpus_weight(v->cpu_affinity) == 1) &&
344 cpu_isset(cpu, v->cpu_affinity) )
345 {
346 printk("Breaking vcpu affinity for domain %d vcpu %d\n",
347 v->domain->domain_id, v->vcpu_id);
348 cpus_setall(v->cpu_affinity);
349 }
351 /*
352 * Migrate single-shot timers to CPU0. A new cpu will automatically
353 * be chosen when the timer is next re-set.
354 */
355 if ( v->singleshot_timer.cpu == cpu )
356 migrate_timer(&v->singleshot_timer, 0);
358 if ( v->processor == cpu )
359 {
360 set_bit(_VPF_migrating, &v->pause_flags);
361 vcpu_sleep_nosync(v);
362 vcpu_migrate(v);
363 }
364 }
365 }
366 }
368 static int __vcpu_set_affinity(
369 struct vcpu *v, cpumask_t *affinity,
370 bool_t old_lock_status, bool_t new_lock_status)
371 {
372 cpumask_t online_affinity, old_affinity;
374 cpus_and(online_affinity, *affinity, cpu_online_map);
375 if ( cpus_empty(online_affinity) )
376 return -EINVAL;
378 vcpu_schedule_lock_irq(v);
380 if ( v->affinity_locked != old_lock_status )
381 {
382 BUG_ON(!v->affinity_locked);
383 vcpu_schedule_unlock_irq(v);
384 return -EBUSY;
385 }
387 v->affinity_locked = new_lock_status;
389 old_affinity = v->cpu_affinity;
390 v->cpu_affinity = *affinity;
391 *affinity = old_affinity;
392 if ( !cpu_isset(v->processor, v->cpu_affinity) )
393 set_bit(_VPF_migrating, &v->pause_flags);
395 vcpu_schedule_unlock_irq(v);
397 if ( test_bit(_VPF_migrating, &v->pause_flags) )
398 {
399 vcpu_sleep_nosync(v);
400 vcpu_migrate(v);
401 }
403 return 0;
404 }
406 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
407 {
408 if ( v->domain->is_pinned )
409 return -EINVAL;
410 return __vcpu_set_affinity(v, affinity, 0, 0);
411 }
413 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity)
414 {
415 return __vcpu_set_affinity(v, affinity, 0, 1);
416 }
418 int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
419 {
420 return __vcpu_set_affinity(v, affinity, 1, 1);
421 }
423 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
424 {
425 cpumask_t online_affinity;
427 /* Do not fail if no CPU in old affinity mask is online. */
428 cpus_and(online_affinity, *affinity, cpu_online_map);
429 if ( cpus_empty(online_affinity) )
430 *affinity = cpu_online_map;
432 if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
433 BUG();
434 }
436 /* Block the currently-executing domain until a pertinent event occurs. */
437 static long do_block(void)
438 {
439 struct vcpu *v = current;
441 local_event_delivery_enable();
442 set_bit(_VPF_blocked, &v->pause_flags);
444 /* Check for events /after/ blocking: avoids wakeup waiting race. */
445 if ( local_events_need_delivery() )
446 {
447 clear_bit(_VPF_blocked, &v->pause_flags);
448 }
449 else
450 {
451 TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
452 raise_softirq(SCHEDULE_SOFTIRQ);
453 }
455 return 0;
456 }
458 static long do_poll(struct sched_poll *sched_poll)
459 {
460 struct vcpu *v = current;
461 struct domain *d = v->domain;
462 evtchn_port_t port;
463 long rc;
464 unsigned int i;
466 /* Fairly arbitrary limit. */
467 if ( sched_poll->nr_ports > 128 )
468 return -EINVAL;
470 if ( !guest_handle_okay(sched_poll->ports, sched_poll->nr_ports) )
471 return -EFAULT;
473 set_bit(_VPF_blocked, &v->pause_flags);
474 v->poll_evtchn = -1;
475 set_bit(v->vcpu_id, d->poll_mask);
477 #ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
478 /* Check for events /after/ setting flags: avoids wakeup waiting race. */
479 smp_mb();
481 /*
482 * Someone may have seen we are blocked but not that we are polling, or
483 * vice versa. We are certainly being woken, so clean up and bail. Beyond
484 * this point others can be guaranteed to clean up for us if they wake us.
485 */
486 rc = 0;
487 if ( (v->poll_evtchn == 0) ||
488 !test_bit(_VPF_blocked, &v->pause_flags) ||
489 !test_bit(v->vcpu_id, d->poll_mask) )
490 goto out;
491 #endif
493 rc = 0;
494 if ( local_events_need_delivery() )
495 goto out;
497 for ( i = 0; i < sched_poll->nr_ports; i++ )
498 {
499 rc = -EFAULT;
500 if ( __copy_from_guest_offset(&port, sched_poll->ports, i, 1) )
501 goto out;
503 rc = -EINVAL;
504 if ( port >= MAX_EVTCHNS(d) )
505 goto out;
507 rc = 0;
508 if ( test_bit(port, &shared_info(d, evtchn_pending)) )
509 goto out;
510 }
512 if ( sched_poll->nr_ports == 1 )
513 v->poll_evtchn = port;
515 if ( sched_poll->timeout != 0 )
516 set_timer(&v->poll_timer, sched_poll->timeout);
518 TRACE_2D(TRC_SCHED_BLOCK, d->domain_id, v->vcpu_id);
519 raise_softirq(SCHEDULE_SOFTIRQ);
521 return 0;
523 out:
524 v->poll_evtchn = 0;
525 clear_bit(v->vcpu_id, d->poll_mask);
526 clear_bit(_VPF_blocked, &v->pause_flags);
527 return rc;
528 }
530 /* Voluntarily yield the processor for this allocation. */
531 static long do_yield(void)
532 {
533 TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
534 raise_softirq(SCHEDULE_SOFTIRQ);
535 return 0;
536 }
538 long do_sched_op_compat(int cmd, unsigned long arg)
539 {
540 long ret = 0;
542 switch ( cmd )
543 {
544 case SCHEDOP_yield:
545 {
546 ret = do_yield();
547 break;
548 }
550 case SCHEDOP_block:
551 {
552 ret = do_block();
553 break;
554 }
556 case SCHEDOP_shutdown:
557 {
558 TRACE_3D(TRC_SCHED_SHUTDOWN,
559 current->domain->domain_id, current->vcpu_id, arg);
560 domain_shutdown(current->domain, (u8)arg);
561 break;
562 }
564 default:
565 ret = -ENOSYS;
566 }
568 return ret;
569 }
571 typedef long ret_t;
573 #endif /* !COMPAT */
575 ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE(void) arg)
576 {
577 ret_t ret = 0;
579 switch ( cmd )
580 {
581 case SCHEDOP_yield:
582 {
583 ret = do_yield();
584 break;
585 }
587 case SCHEDOP_block:
588 {
589 ret = do_block();
590 break;
591 }
593 case SCHEDOP_shutdown:
594 {
595 struct sched_shutdown sched_shutdown;
597 ret = -EFAULT;
598 if ( copy_from_guest(&sched_shutdown, arg, 1) )
599 break;
601 ret = 0;
602 TRACE_3D(TRC_SCHED_SHUTDOWN,
603 current->domain->domain_id, current->vcpu_id,
604 sched_shutdown.reason);
605 domain_shutdown(current->domain, (u8)sched_shutdown.reason);
607 break;
608 }
610 case SCHEDOP_poll:
611 {
612 struct sched_poll sched_poll;
614 ret = -EFAULT;
615 if ( copy_from_guest(&sched_poll, arg, 1) )
616 break;
618 ret = do_poll(&sched_poll);
620 break;
621 }
623 case SCHEDOP_remote_shutdown:
624 {
625 struct domain *d;
626 struct sched_remote_shutdown sched_remote_shutdown;
628 ret = -EFAULT;
629 if ( copy_from_guest(&sched_remote_shutdown, arg, 1) )
630 break;
632 ret = -ESRCH;
633 d = rcu_lock_domain_by_id(sched_remote_shutdown.domain_id);
634 if ( d == NULL )
635 break;
637 if ( !IS_PRIV_FOR(current->domain, d) )
638 {
639 rcu_unlock_domain(d);
640 return -EPERM;
641 }
643 ret = xsm_schedop_shutdown(current->domain, d);
644 if ( ret )
645 {
646 rcu_unlock_domain(d);
647 return ret;
648 }
650 domain_shutdown(d, (u8)sched_remote_shutdown.reason);
652 rcu_unlock_domain(d);
653 ret = 0;
655 break;
656 }
658 default:
659 ret = -ENOSYS;
660 }
662 return ret;
663 }
665 #ifndef COMPAT
667 /* Per-vcpu oneshot-timer hypercall. */
668 long do_set_timer_op(s_time_t timeout)
669 {
670 struct vcpu *v = current;
671 s_time_t offset = timeout - NOW();
673 if ( timeout == 0 )
674 {
675 stop_timer(&v->singleshot_timer);
676 }
677 else if ( unlikely(timeout < 0) || /* overflow into 64th bit? */
678 unlikely((offset > 0) && ((uint32_t)(offset >> 50) != 0)) )
679 {
680 /*
681 * Linux workaround: occasionally we will see timeouts a long way in
682 * the future due to wrapping in Linux's jiffy time handling. We check
683 * for timeouts wrapped negative, and for positive timeouts more than
684 * about 13 days in the future (2^50ns). The correct fix is to trigger
685 * an interrupt immediately (since Linux in fact has pending work to
686 * do in this situation). However, older guests also set a long timeout
687 * when they have *no* pending timers at all: setting an immediate
688 * timeout in this case can burn a lot of CPU. We therefore go for a
689 * reasonable middleground of triggering a timer event in 100ms.
690 */
691 gdprintk(XENLOG_INFO,
692 "Warning: huge timeout set by vcpu %d: %"PRIx64"\n",
693 v->vcpu_id, (uint64_t)timeout);
694 set_timer(&v->singleshot_timer, NOW() + MILLISECS(100));
695 }
696 else
697 {
698 if ( v->singleshot_timer.cpu != smp_processor_id() )
699 {
700 stop_timer(&v->singleshot_timer);
701 v->singleshot_timer.cpu = smp_processor_id();
702 }
704 set_timer(&v->singleshot_timer, timeout);
705 }
707 return 0;
708 }
710 /* sched_id - fetch ID of current scheduler */
711 int sched_id(void)
712 {
713 return ops.sched_id;
714 }
716 /* Adjust scheduling parameter for a given domain. */
717 long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
718 {
719 struct vcpu *v;
720 long ret;
722 if ( (op->sched_id != ops.sched_id) ||
723 ((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
724 (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
725 return -EINVAL;
727 /*
728 * Most VCPUs we can simply pause. If we are adjusting this VCPU then
729 * we acquire the local schedule_lock to guard against concurrent updates.
730 *
731 * We only acquire the local schedule lock after we have paused all other
732 * VCPUs in this domain. There are two reasons for this:
733 * 1- We don't want to hold up interrupts as pausing a VCPU can
734 * trigger a tlb shootdown.
735 * 2- Pausing other VCPUs involves briefly locking the schedule
736 * lock of the CPU they are running on. This CPU could be the
737 * same as ours.
738 */
740 for_each_vcpu ( d, v )
741 {
742 if ( v != current )
743 vcpu_pause(v);
744 }
746 if ( d == current->domain )
747 vcpu_schedule_lock_irq(current);
749 if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
750 TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
752 if ( d == current->domain )
753 vcpu_schedule_unlock_irq(current);
755 for_each_vcpu ( d, v )
756 {
757 if ( v != current )
758 vcpu_unpause(v);
759 }
761 return ret;
762 }
764 static void vcpu_periodic_timer_work(struct vcpu *v)
765 {
766 s_time_t now = NOW();
767 uint64_t periodic_next_event;
769 ASSERT(!active_timer(&v->periodic_timer));
771 if ( v->periodic_period == 0 )
772 return;
774 periodic_next_event = v->periodic_last_event + v->periodic_period;
776 if ( now >= periodic_next_event )
777 {
778 send_timer_event(v);
779 v->periodic_last_event = now;
780 periodic_next_event = now + v->periodic_period;
781 }
783 v->periodic_timer.cpu = smp_processor_id();
784 set_timer(&v->periodic_timer, periodic_next_event);
785 }
787 /*
788 * The main function
789 * - deschedule the current domain (scheduler independent).
790 * - pick a new domain (scheduler dependent).
791 */
792 static void schedule(void)
793 {
794 struct vcpu *prev = current, *next = NULL;
795 s_time_t now = NOW();
796 struct schedule_data *sd;
797 struct task_slice next_slice;
799 ASSERT(!in_irq());
800 ASSERT(this_cpu(mc_state).flags == 0);
802 perfc_incr(sched_run);
804 sd = &this_cpu(schedule_data);
806 spin_lock_irq(&sd->schedule_lock);
808 stop_timer(&sd->s_timer);
810 /* get policy-specific decision on scheduling... */
811 next_slice = ops.do_schedule(now);
813 next = next_slice.task;
815 sd->curr = next;
817 if ( next_slice.time >= 0 ) /* -ve means no limit */
818 set_timer(&sd->s_timer, now + next_slice.time);
820 if ( unlikely(prev == next) )
821 {
822 spin_unlock_irq(&sd->schedule_lock);
823 trace_continue_running(next);
824 return continue_running(prev);
825 }
827 TRACE_2D(TRC_SCHED_SWITCH_INFPREV,
828 prev->domain->domain_id,
829 now - prev->runstate.state_entry_time);
830 TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
831 next->domain->domain_id,
832 (next->runstate.state == RUNSTATE_runnable) ?
833 (now - next->runstate.state_entry_time) : 0,
834 next_slice.time);
836 ASSERT(prev->runstate.state == RUNSTATE_running);
837 vcpu_runstate_change(
838 prev,
839 (test_bit(_VPF_blocked, &prev->pause_flags) ? RUNSTATE_blocked :
840 (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
841 now);
842 prev->last_run_time = now;
844 ASSERT(next->runstate.state != RUNSTATE_running);
845 vcpu_runstate_change(next, RUNSTATE_running, now);
847 ASSERT(!next->is_running);
848 next->is_running = 1;
850 spin_unlock_irq(&sd->schedule_lock);
852 perfc_incr(sched_ctx);
854 stop_timer(&prev->periodic_timer);
856 /* Ensure that the domain has an up-to-date time base. */
857 update_vcpu_system_time(next);
858 vcpu_periodic_timer_work(next);
860 TRACE_4D(TRC_SCHED_SWITCH,
861 prev->domain->domain_id, prev->vcpu_id,
862 next->domain->domain_id, next->vcpu_id);
864 context_switch(prev, next);
865 }
867 void context_saved(struct vcpu *prev)
868 {
869 /* Clear running flag /after/ writing context to memory. */
870 smp_wmb();
872 prev->is_running = 0;
874 /* Check for migration request /after/ clearing running flag. */
875 smp_mb();
877 if ( unlikely(test_bit(_VPF_migrating, &prev->pause_flags)) )
878 vcpu_migrate(prev);
879 }
881 /* The scheduler timer: force a run through the scheduler */
882 static void s_timer_fn(void *unused)
883 {
884 raise_softirq(SCHEDULE_SOFTIRQ);
885 perfc_incr(sched_irq);
886 }
888 /* Per-VCPU periodic timer function: sends a virtual timer interrupt. */
889 static void vcpu_periodic_timer_fn(void *data)
890 {
891 struct vcpu *v = data;
892 vcpu_periodic_timer_work(v);
893 }
895 /* Per-VCPU single-shot timer function: sends a virtual timer interrupt. */
896 static void vcpu_singleshot_timer_fn(void *data)
897 {
898 struct vcpu *v = data;
899 send_timer_event(v);
900 }
902 /* SCHEDOP_poll timeout callback. */
903 static void poll_timer_fn(void *data)
904 {
905 struct vcpu *v = data;
907 if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
908 vcpu_unblock(v);
909 }
911 /* Initialise the data structures. */
912 void __init scheduler_init(void)
913 {
914 int i;
916 open_softirq(SCHEDULE_SOFTIRQ, schedule);
918 for_each_possible_cpu ( i )
919 {
920 spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
921 init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
922 }
924 for ( i = 0; schedulers[i] != NULL; i++ )
925 {
926 ops = *schedulers[i];
927 if ( strcmp(ops.opt_name, opt_sched) == 0 )
928 break;
929 }
931 if ( schedulers[i] == NULL )
932 printk("Could not find scheduler: %s\n", opt_sched);
934 printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
935 SCHED_OP(init);
936 }
938 void dump_runq(unsigned char key)
939 {
940 s_time_t now = NOW();
941 int i;
942 unsigned long flags;
944 local_irq_save(flags);
946 printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
947 SCHED_OP(dump_settings);
948 printk("sched_smt_power_savings: %s\n",
949 sched_smt_power_savings? "enabled":"disabled");
950 printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
952 for_each_online_cpu ( i )
953 {
954 spin_lock(&per_cpu(schedule_data, i).schedule_lock);
955 printk("CPU[%02d] ", i);
956 SCHED_OP(dump_cpu_state, i);
957 spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
958 }
960 local_irq_restore(flags);
961 }
963 void sched_tick_suspend(void)
964 {
965 SCHED_OP(tick_suspend);
966 }
968 void sched_tick_resume(void)
969 {
970 SCHED_OP(tick_resume);
971 }
973 #ifdef CONFIG_COMPAT
974 #include "compat/schedule.c"
975 #endif
977 #endif /* !COMPAT */
979 /*
980 * Local variables:
981 * mode: C
982 * c-set-style: "BSD"
983 * c-basic-offset: 4
984 * tab-width: 4
985 * indent-tabs-mode: nil
986 * End:
987 */