debuggers.hg

view xen/common/domain.c @ 21959:581ebaa7e2da

numa: Attempt more efficient NUMA allocation in hypervisor by default.

1. Try to allocate from nodes containing CPUs which a guest can be
scheduled on.
2. Remember which node we allocated from last, and round-robin
allocations among above-mentioned nodes.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Aug 04 15:35:28 2010 +0100 (2010-08-04)
parents 497bda800505
children 49254cab8465
line source
1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
7 #include <xen/config.h>
8 #include <xen/compat.h>
9 #include <xen/init.h>
10 #include <xen/lib.h>
11 #include <xen/ctype.h>
12 #include <xen/errno.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/mm.h>
16 #include <xen/event.h>
17 #include <xen/time.h>
18 #include <xen/console.h>
19 #include <xen/softirq.h>
20 #include <xen/tasklet.h>
21 #include <xen/domain_page.h>
22 #include <xen/rangeset.h>
23 #include <xen/guest_access.h>
24 #include <xen/hypercall.h>
25 #include <xen/delay.h>
26 #include <xen/shutdown.h>
27 #include <xen/percpu.h>
28 #include <xen/multicall.h>
29 #include <xen/rcupdate.h>
30 #include <acpi/cpufreq/cpufreq.h>
31 #include <asm/debugger.h>
32 #include <public/sched.h>
33 #include <public/sysctl.h>
34 #include <public/vcpu.h>
35 #include <xsm/xsm.h>
36 #include <xen/trace.h>
37 #include <xen/tmem.h>
39 /* Linux config option: propageted to domain0 */
40 /* xen_processor_pmbits: xen control Cx, Px, ... */
41 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
43 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
44 static unsigned int opt_dom0_vcpus_pin;
45 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
47 /* set xen as default cpufreq */
48 enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
50 static void __init setup_cpufreq_option(char *str)
51 {
52 char *arg;
54 if ( !strcmp(str, "dom0-kernel") )
55 {
56 xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
57 cpufreq_controller = FREQCTL_dom0_kernel;
58 opt_dom0_vcpus_pin = 1;
59 return;
60 }
62 if ( !strcmp(str, "none") )
63 {
64 xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
65 cpufreq_controller = FREQCTL_none;
66 return;
67 }
69 if ( (arg = strpbrk(str, ",:")) != NULL )
70 *arg++ = '\0';
72 if ( !strcmp(str, "xen") )
73 if ( arg && *arg )
74 cpufreq_cmdline_parse(arg);
75 }
76 custom_param("cpufreq", setup_cpufreq_option);
78 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
79 DEFINE_SPINLOCK(domlist_update_lock);
80 DEFINE_RCU_READ_LOCK(domlist_read_lock);
82 #define DOMAIN_HASH_SIZE 256
83 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
84 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
85 struct domain *domain_list;
87 struct domain *dom0;
89 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
91 vcpu_info_t dummy_vcpu_info;
93 int current_domain_id(void)
94 {
95 return current->domain->domain_id;
96 }
98 static void __domain_finalise_shutdown(struct domain *d)
99 {
100 struct vcpu *v;
102 BUG_ON(!spin_is_locked(&d->shutdown_lock));
104 if ( d->is_shut_down )
105 return;
107 for_each_vcpu ( d, v )
108 if ( !v->paused_for_shutdown )
109 return;
111 d->is_shut_down = 1;
112 if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
113 evtchn_send(d, d->suspend_evtchn);
114 else
115 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
116 }
118 static void vcpu_check_shutdown(struct vcpu *v)
119 {
120 struct domain *d = v->domain;
122 spin_lock(&d->shutdown_lock);
124 if ( d->is_shutting_down )
125 {
126 if ( !v->paused_for_shutdown )
127 vcpu_pause_nosync(v);
128 v->paused_for_shutdown = 1;
129 v->defer_shutdown = 0;
130 __domain_finalise_shutdown(d);
131 }
133 spin_unlock(&d->shutdown_lock);
134 }
136 struct vcpu *alloc_vcpu(
137 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
138 {
139 struct vcpu *v;
141 BUG_ON((!is_idle_domain(d) || vcpu_id) && d->vcpu[vcpu_id]);
143 if ( (v = alloc_vcpu_struct()) == NULL )
144 return NULL;
146 v->domain = d;
147 v->vcpu_id = vcpu_id;
149 spin_lock_init(&v->virq_lock);
151 tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
153 if ( is_idle_domain(d) )
154 {
155 v->runstate.state = RUNSTATE_running;
156 }
157 else
158 {
159 v->runstate.state = RUNSTATE_offline;
160 v->runstate.state_entry_time = NOW();
161 set_bit(_VPF_down, &v->pause_flags);
162 v->vcpu_info = ((vcpu_id < XEN_LEGACY_MAX_VCPUS)
163 ? (vcpu_info_t *)&shared_info(d, vcpu_info[vcpu_id])
164 : &dummy_vcpu_info);
165 }
167 if ( sched_init_vcpu(v, cpu_id) != 0 )
168 {
169 free_vcpu_struct(v);
170 return NULL;
171 }
173 if ( vcpu_initialise(v) != 0 )
174 {
175 sched_destroy_vcpu(v);
176 free_vcpu_struct(v);
177 return NULL;
178 }
180 d->vcpu[vcpu_id] = v;
181 if ( vcpu_id != 0 )
182 {
183 int prev_id = v->vcpu_id - 1;
184 while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
185 prev_id--;
186 BUG_ON(prev_id < 0);
187 v->next_in_list = d->vcpu[prev_id]->next_in_list;
188 d->vcpu[prev_id]->next_in_list = v;
189 }
191 /* Must be called after making new vcpu visible to for_each_vcpu(). */
192 vcpu_check_shutdown(v);
194 domain_update_node_affinity(d);
196 return v;
197 }
199 static unsigned int __read_mostly extra_dom0_irqs = 256;
200 static unsigned int __read_mostly extra_domU_irqs = 32;
201 static void __init parse_extra_guest_irqs(const char *s)
202 {
203 if ( isdigit(*s) )
204 extra_domU_irqs = simple_strtoul(s, &s, 0);
205 if ( *s == ',' && isdigit(*++s) )
206 extra_dom0_irqs = simple_strtoul(s, &s, 0);
207 }
208 custom_param("extra_guest_irqs", parse_extra_guest_irqs);
210 struct domain *domain_create(
211 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
212 {
213 struct domain *d, **pd;
214 enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
215 INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
216 int init_status = 0;
217 int poolid = CPUPOOLID_NONE;
219 if ( (d = alloc_domain_struct()) == NULL )
220 return NULL;
222 d->domain_id = domid;
224 lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
226 if ( xsm_alloc_security_domain(d) != 0 )
227 goto fail;
228 init_status |= INIT_xsm;
230 watchdog_domain_init(d);
231 init_status |= INIT_watchdog;
233 atomic_set(&d->refcnt, 1);
234 spin_lock_init_prof(d, domain_lock);
235 spin_lock_init_prof(d, page_alloc_lock);
236 spin_lock_init(&d->hypercall_deadlock_mutex);
237 INIT_PAGE_LIST_HEAD(&d->page_list);
238 INIT_PAGE_LIST_HEAD(&d->xenpage_list);
240 spin_lock_init(&d->node_affinity_lock);
242 spin_lock_init(&d->shutdown_lock);
243 d->shutdown_code = -1;
245 if ( domcr_flags & DOMCRF_hvm )
246 d->is_hvm = 1;
248 if ( domid == 0 )
249 {
250 d->is_pinned = opt_dom0_vcpus_pin;
251 d->disable_migrate = 1;
252 }
254 rangeset_domain_initialise(d);
255 init_status |= INIT_rangeset;
257 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
258 d->irq_caps = rangeset_new(d, "Interrupts", 0);
259 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
260 goto fail;
262 if ( domcr_flags & DOMCRF_dummy )
263 return d;
265 if ( !is_idle_domain(d) )
266 {
267 if ( xsm_domain_create(d, ssidref) != 0 )
268 goto fail;
270 d->is_paused_by_controller = 1;
271 atomic_inc(&d->pause_count);
273 if ( domid )
274 d->nr_pirqs = nr_irqs_gsi + extra_domU_irqs;
275 else
276 d->nr_pirqs = nr_irqs_gsi + extra_dom0_irqs;
278 d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs);
279 d->pirq_mask = xmalloc_array(
280 unsigned long, BITS_TO_LONGS(d->nr_pirqs));
281 if ( (d->pirq_to_evtchn == NULL) || (d->pirq_mask == NULL) )
282 goto fail;
283 memset(d->pirq_to_evtchn, 0, d->nr_pirqs * sizeof(*d->pirq_to_evtchn));
284 bitmap_zero(d->pirq_mask, d->nr_pirqs);
286 if ( evtchn_init(d) != 0 )
287 goto fail;
288 init_status |= INIT_evtchn;
290 if ( grant_table_create(d) != 0 )
291 goto fail;
292 init_status |= INIT_gnttab;
294 poolid = 0;
295 }
297 if ( arch_domain_create(d, domcr_flags) != 0 )
298 goto fail;
299 init_status |= INIT_arch;
301 if ( cpupool_add_domain(d, poolid) != 0 )
302 goto fail;
304 if ( sched_init_domain(d) != 0 )
305 goto fail;
307 if ( !is_idle_domain(d) )
308 {
309 spin_lock(&domlist_update_lock);
310 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
311 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
312 if ( (*pd)->domain_id > d->domain_id )
313 break;
314 d->next_in_list = *pd;
315 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
316 rcu_assign_pointer(*pd, d);
317 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
318 spin_unlock(&domlist_update_lock);
319 }
321 return d;
323 fail:
324 d->is_dying = DOMDYING_dead;
325 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
326 if ( init_status & INIT_arch )
327 arch_domain_destroy(d);
328 if ( init_status & INIT_gnttab )
329 grant_table_destroy(d);
330 if ( init_status & INIT_evtchn )
331 {
332 evtchn_destroy(d);
333 evtchn_destroy_final(d);
334 }
335 if ( init_status & INIT_rangeset )
336 rangeset_domain_destroy(d);
337 if ( init_status & INIT_watchdog )
338 watchdog_domain_destroy(d);
339 if ( init_status & INIT_xsm )
340 xsm_free_security_domain(d);
341 xfree(d->pirq_mask);
342 xfree(d->pirq_to_evtchn);
343 free_domain_struct(d);
344 return NULL;
345 }
348 void domain_update_node_affinity(struct domain *d)
349 {
350 cpumask_t cpumask = CPU_MASK_NONE;
351 nodemask_t nodemask = NODE_MASK_NONE;
352 struct vcpu *v;
353 unsigned int node;
355 spin_lock(&d->node_affinity_lock);
357 for_each_vcpu ( d, v )
358 cpus_or(cpumask, cpumask, v->cpu_affinity);
360 for_each_online_node ( node )
361 {
362 if ( cpus_intersects(node_to_cpumask(node), cpumask) )
363 node_set(node, nodemask);
364 else
365 node_clear(node, nodemask);
366 }
368 d->node_affinity = nodemask;
369 spin_unlock(&d->node_affinity_lock);
370 }
373 struct domain *get_domain_by_id(domid_t dom)
374 {
375 struct domain *d;
377 rcu_read_lock(&domlist_read_lock);
379 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
380 d != NULL;
381 d = rcu_dereference(d->next_in_hashbucket) )
382 {
383 if ( d->domain_id == dom )
384 {
385 if ( unlikely(!get_domain(d)) )
386 d = NULL;
387 break;
388 }
389 }
391 rcu_read_unlock(&domlist_read_lock);
393 return d;
394 }
397 struct domain *rcu_lock_domain_by_id(domid_t dom)
398 {
399 struct domain *d;
401 rcu_read_lock(&domlist_read_lock);
403 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
404 d != NULL;
405 d = rcu_dereference(d->next_in_hashbucket) )
406 {
407 if ( d->domain_id == dom )
408 return d;
409 }
411 rcu_read_unlock(&domlist_read_lock);
413 return NULL;
414 }
416 int rcu_lock_target_domain_by_id(domid_t dom, struct domain **d)
417 {
418 if ( dom == DOMID_SELF )
419 {
420 *d = rcu_lock_current_domain();
421 return 0;
422 }
424 if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
425 return -ESRCH;
427 if ( !IS_PRIV_FOR(current->domain, *d) )
428 {
429 rcu_unlock_domain(*d);
430 return -EPERM;
431 }
433 return 0;
434 }
436 int domain_kill(struct domain *d)
437 {
438 int rc = 0;
440 if ( d == current->domain )
441 return -EINVAL;
443 /* Protected by domctl_lock. */
444 switch ( d->is_dying )
445 {
446 case DOMDYING_alive:
447 domain_pause(d);
448 d->is_dying = DOMDYING_dying;
449 spin_barrier(&d->domain_lock);
450 evtchn_destroy(d);
451 gnttab_release_mappings(d);
452 tmem_destroy(d->tmem);
453 d->tmem = NULL;
454 /* fallthrough */
455 case DOMDYING_dying:
456 rc = domain_relinquish_resources(d);
457 if ( rc != 0 )
458 {
459 BUG_ON(rc != -EAGAIN);
460 break;
461 }
462 d->is_dying = DOMDYING_dead;
463 put_domain(d);
464 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
465 /* fallthrough */
466 case DOMDYING_dead:
467 break;
468 }
470 return rc;
471 }
474 void __domain_crash(struct domain *d)
475 {
476 if ( d->is_shutting_down )
477 {
478 /* Print nothing: the domain is already shutting down. */
479 }
480 else if ( d == current->domain )
481 {
482 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
483 d->domain_id, current->vcpu_id, smp_processor_id());
484 show_execution_state(guest_cpu_user_regs());
485 }
486 else
487 {
488 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
489 d->domain_id, current->domain->domain_id, smp_processor_id());
490 }
492 domain_shutdown(d, SHUTDOWN_crash);
493 }
496 void __domain_crash_synchronous(void)
497 {
498 __domain_crash(current->domain);
500 /*
501 * Flush multicall state before dying if a multicall is in progress.
502 * This shouldn't be necessary, but some architectures are calling
503 * domain_crash_synchronous() when they really shouldn't (i.e., from
504 * within hypercall context).
505 */
506 if ( this_cpu(mc_state).flags != 0 )
507 {
508 dprintk(XENLOG_ERR,
509 "FIXME: synchronous domain crash during a multicall!\n");
510 this_cpu(mc_state).flags = 0;
511 }
513 vcpu_end_shutdown_deferral(current);
515 for ( ; ; )
516 do_softirq();
517 }
520 void domain_shutdown(struct domain *d, u8 reason)
521 {
522 struct vcpu *v;
524 spin_lock(&d->shutdown_lock);
526 if ( d->shutdown_code == -1 )
527 d->shutdown_code = reason;
528 reason = d->shutdown_code;
530 if ( d->domain_id == 0 )
531 dom0_shutdown(reason);
533 if ( d->is_shutting_down )
534 {
535 spin_unlock(&d->shutdown_lock);
536 return;
537 }
539 d->is_shutting_down = 1;
541 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
543 for_each_vcpu ( d, v )
544 {
545 if ( reason == SHUTDOWN_crash )
546 v->defer_shutdown = 0;
547 else if ( v->defer_shutdown )
548 continue;
549 vcpu_pause_nosync(v);
550 v->paused_for_shutdown = 1;
551 }
553 __domain_finalise_shutdown(d);
555 spin_unlock(&d->shutdown_lock);
556 }
558 void domain_resume(struct domain *d)
559 {
560 struct vcpu *v;
562 /*
563 * Some code paths assume that shutdown status does not get reset under
564 * their feet (e.g., some assertions make this assumption).
565 */
566 domain_pause(d);
568 spin_lock(&d->shutdown_lock);
570 d->is_shutting_down = d->is_shut_down = 0;
571 d->shutdown_code = -1;
573 for_each_vcpu ( d, v )
574 {
575 if ( v->paused_for_shutdown )
576 vcpu_unpause(v);
577 v->paused_for_shutdown = 0;
578 }
580 spin_unlock(&d->shutdown_lock);
582 domain_unpause(d);
583 }
585 int vcpu_start_shutdown_deferral(struct vcpu *v)
586 {
587 if ( v->defer_shutdown )
588 return 1;
590 v->defer_shutdown = 1;
591 smp_mb(); /* set deferral status /then/ check for shutdown */
592 if ( unlikely(v->domain->is_shutting_down) )
593 vcpu_check_shutdown(v);
595 return v->defer_shutdown;
596 }
598 void vcpu_end_shutdown_deferral(struct vcpu *v)
599 {
600 v->defer_shutdown = 0;
601 smp_mb(); /* clear deferral status /then/ check for shutdown */
602 if ( unlikely(v->domain->is_shutting_down) )
603 vcpu_check_shutdown(v);
604 }
606 void domain_pause_for_debugger(void)
607 {
608 struct domain *d = current->domain;
609 struct vcpu *v;
611 atomic_inc(&d->pause_count);
612 if ( test_and_set_bool(d->is_paused_by_controller) )
613 domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
615 for_each_vcpu ( d, v )
616 vcpu_sleep_nosync(v);
618 send_guest_global_virq(dom0, VIRQ_DEBUGGER);
619 }
621 /* Complete domain destroy after RCU readers are not holding old references. */
622 static void complete_domain_destroy(struct rcu_head *head)
623 {
624 struct domain *d = container_of(head, struct domain, rcu);
625 struct vcpu *v;
626 int i;
628 for ( i = d->max_vcpus - 1; i >= 0; i-- )
629 {
630 if ( (v = d->vcpu[i]) == NULL )
631 continue;
632 tasklet_kill(&v->continue_hypercall_tasklet);
633 vcpu_destroy(v);
634 sched_destroy_vcpu(v);
635 }
637 grant_table_destroy(d);
639 arch_domain_destroy(d);
641 watchdog_domain_destroy(d);
643 rangeset_domain_destroy(d);
645 cpupool_rm_domain(d);
647 sched_destroy_domain(d);
649 /* Free page used by xen oprofile buffer. */
650 free_xenoprof_pages(d);
652 for ( i = d->max_vcpus - 1; i >= 0; i-- )
653 if ( (v = d->vcpu[i]) != NULL )
654 free_vcpu_struct(v);
656 if ( d->target != NULL )
657 put_domain(d->target);
659 evtchn_destroy_final(d);
661 xfree(d->pirq_mask);
662 xfree(d->pirq_to_evtchn);
664 xsm_free_security_domain(d);
665 free_domain_struct(d);
667 send_guest_global_virq(dom0, VIRQ_DOM_EXC);
668 }
670 /* Release resources belonging to task @p. */
671 void domain_destroy(struct domain *d)
672 {
673 struct domain **pd;
674 atomic_t old, new;
676 BUG_ON(!d->is_dying);
678 /* May be already destroyed, or get_domain() can race us. */
679 _atomic_set(old, 0);
680 _atomic_set(new, DOMAIN_DESTROYED);
681 old = atomic_compareandswap(old, new, &d->refcnt);
682 if ( _atomic_read(old) != 0 )
683 return;
685 /* Delete from task list and task hashtable. */
686 TRACE_1D(TRC_SCHED_DOM_REM, d->domain_id);
687 spin_lock(&domlist_update_lock);
688 pd = &domain_list;
689 while ( *pd != d )
690 pd = &(*pd)->next_in_list;
691 rcu_assign_pointer(*pd, d->next_in_list);
692 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
693 while ( *pd != d )
694 pd = &(*pd)->next_in_hashbucket;
695 rcu_assign_pointer(*pd, d->next_in_hashbucket);
696 spin_unlock(&domlist_update_lock);
698 /* Schedule RCU asynchronous completion of domain destroy. */
699 call_rcu(&d->rcu, complete_domain_destroy);
700 }
702 void vcpu_pause(struct vcpu *v)
703 {
704 ASSERT(v != current);
705 atomic_inc(&v->pause_count);
706 vcpu_sleep_sync(v);
707 }
709 void vcpu_pause_nosync(struct vcpu *v)
710 {
711 atomic_inc(&v->pause_count);
712 vcpu_sleep_nosync(v);
713 }
715 void vcpu_unpause(struct vcpu *v)
716 {
717 if ( atomic_dec_and_test(&v->pause_count) )
718 vcpu_wake(v);
719 }
721 void domain_pause(struct domain *d)
722 {
723 struct vcpu *v;
725 ASSERT(d != current->domain);
727 atomic_inc(&d->pause_count);
729 for_each_vcpu( d, v )
730 vcpu_sleep_sync(v);
731 }
733 void domain_unpause(struct domain *d)
734 {
735 struct vcpu *v;
737 if ( atomic_dec_and_test(&d->pause_count) )
738 for_each_vcpu( d, v )
739 vcpu_wake(v);
740 }
742 void domain_pause_by_systemcontroller(struct domain *d)
743 {
744 domain_pause(d);
745 if ( test_and_set_bool(d->is_paused_by_controller) )
746 domain_unpause(d);
747 }
749 void domain_unpause_by_systemcontroller(struct domain *d)
750 {
751 if ( test_and_clear_bool(d->is_paused_by_controller) )
752 domain_unpause(d);
753 }
755 int boot_vcpu(struct domain *d, int vcpuid, vcpu_guest_context_u ctxt)
756 {
757 struct vcpu *v = d->vcpu[vcpuid];
759 BUG_ON(v->is_initialised);
761 return arch_set_info_guest(v, ctxt);
762 }
764 void vcpu_reset(struct vcpu *v)
765 {
766 struct domain *d = v->domain;
768 vcpu_pause(v);
769 domain_lock(d);
771 arch_vcpu_reset(v);
773 set_bit(_VPF_down, &v->pause_flags);
775 clear_bit(v->vcpu_id, d->poll_mask);
776 v->poll_evtchn = 0;
778 v->fpu_initialised = 0;
779 v->fpu_dirtied = 0;
780 v->is_initialised = 0;
781 #ifdef VCPU_TRAP_LAST
782 v->async_exception_mask = 0;
783 memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
784 #endif
785 cpus_clear(v->cpu_affinity_tmp);
786 clear_bit(_VPF_blocked, &v->pause_flags);
788 domain_unlock(v->domain);
789 vcpu_unpause(v);
790 }
793 long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
794 {
795 struct domain *d = current->domain;
796 struct vcpu *v;
797 struct vcpu_guest_context *ctxt;
798 long rc = 0;
800 if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) )
801 return -EINVAL;
803 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
804 return -ENOENT;
806 switch ( cmd )
807 {
808 case VCPUOP_initialise:
809 if ( v->vcpu_info == &dummy_vcpu_info )
810 return -EINVAL;
812 if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL )
813 return -ENOMEM;
815 if ( copy_from_guest(ctxt, arg, 1) )
816 {
817 xfree(ctxt);
818 return -EFAULT;
819 }
821 domain_lock(d);
822 rc = -EEXIST;
823 if ( !v->is_initialised )
824 rc = boot_vcpu(d, vcpuid, ctxt);
825 domain_unlock(d);
827 xfree(ctxt);
828 break;
830 case VCPUOP_up:
831 if ( !v->is_initialised )
832 return -EINVAL;
834 if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
835 vcpu_wake(v);
837 break;
839 case VCPUOP_down:
840 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
841 vcpu_sleep_nosync(v);
842 break;
844 case VCPUOP_is_up:
845 rc = !test_bit(_VPF_down, &v->pause_flags);
846 break;
848 case VCPUOP_get_runstate_info:
849 {
850 struct vcpu_runstate_info runstate;
851 vcpu_runstate_get(v, &runstate);
852 if ( copy_to_guest(arg, &runstate, 1) )
853 rc = -EFAULT;
854 break;
855 }
857 case VCPUOP_set_periodic_timer:
858 {
859 struct vcpu_set_periodic_timer set;
861 if ( copy_from_guest(&set, arg, 1) )
862 return -EFAULT;
864 if ( set.period_ns < MILLISECS(1) )
865 return -EINVAL;
867 v->periodic_period = set.period_ns;
868 vcpu_force_reschedule(v);
870 break;
871 }
873 case VCPUOP_stop_periodic_timer:
874 v->periodic_period = 0;
875 vcpu_force_reschedule(v);
876 break;
878 case VCPUOP_set_singleshot_timer:
879 {
880 struct vcpu_set_singleshot_timer set;
882 if ( v != current )
883 return -EINVAL;
885 if ( copy_from_guest(&set, arg, 1) )
886 return -EFAULT;
888 if ( (set.flags & VCPU_SSHOTTMR_future) &&
889 (set.timeout_abs_ns < NOW()) )
890 return -ETIME;
892 migrate_timer(&v->singleshot_timer, smp_processor_id());
893 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
895 break;
896 }
898 case VCPUOP_stop_singleshot_timer:
899 if ( v != current )
900 return -EINVAL;
902 stop_timer(&v->singleshot_timer);
904 break;
906 #ifdef VCPU_TRAP_NMI
907 case VCPUOP_send_nmi:
908 if ( !guest_handle_is_null(arg) )
909 return -EINVAL;
911 if ( !test_and_set_bool(v->nmi_pending) )
912 vcpu_kick(v);
914 break;
915 #endif
917 default:
918 rc = arch_do_vcpu_op(cmd, v, arg);
919 break;
920 }
922 return rc;
923 }
925 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
926 {
927 if ( type > MAX_VMASST_TYPE )
928 return -EINVAL;
930 switch ( cmd )
931 {
932 case VMASST_CMD_enable:
933 set_bit(type, &p->vm_assist);
934 return 0;
935 case VMASST_CMD_disable:
936 clear_bit(type, &p->vm_assist);
937 return 0;
938 }
940 return -ENOSYS;
941 }
943 struct migrate_info {
944 long (*func)(void *data);
945 void *data;
946 struct vcpu *vcpu;
947 unsigned int cpu;
948 unsigned int nest;
949 };
951 static DEFINE_PER_CPU(struct migrate_info *, continue_info);
953 static void continue_hypercall_tasklet_handler(unsigned long _info)
954 {
955 struct migrate_info *info = (struct migrate_info *)_info;
956 struct vcpu *v = info->vcpu;
958 /* Wait for vcpu to sleep so that we can access its register state. */
959 vcpu_sleep_sync(v);
961 this_cpu(continue_info) = info;
962 return_reg(v) = (info->cpu == smp_processor_id())
963 ? info->func(info->data) : -EINVAL;
964 this_cpu(continue_info) = NULL;
966 if ( info->nest-- == 0 )
967 {
968 xfree(info);
969 vcpu_unpause(v);
970 put_domain(v->domain);
971 }
972 }
974 int continue_hypercall_on_cpu(
975 unsigned int cpu, long (*func)(void *data), void *data)
976 {
977 struct migrate_info *info;
979 if ( (cpu >= NR_CPUS) || !cpu_online(cpu) )
980 return -EINVAL;
982 info = this_cpu(continue_info);
983 if ( info == NULL )
984 {
985 struct vcpu *curr = current;
987 info = xmalloc(struct migrate_info);
988 if ( info == NULL )
989 return -ENOMEM;
991 info->vcpu = curr;
992 info->nest = 0;
994 tasklet_kill(
995 &curr->continue_hypercall_tasklet);
996 tasklet_init(
997 &curr->continue_hypercall_tasklet,
998 continue_hypercall_tasklet_handler,
999 (unsigned long)info);
1001 get_knownalive_domain(curr->domain);
1002 vcpu_pause_nosync(curr);
1004 else
1006 BUG_ON(info->nest != 0);
1007 info->nest++;
1010 info->func = func;
1011 info->data = data;
1012 info->cpu = cpu;
1014 tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
1016 /* Dummy return value will be overwritten by tasklet. */
1017 return 0;
1020 /*
1021 * Local variables:
1022 * mode: C
1023 * c-set-style: "BSD"
1024 * c-basic-offset: 4
1025 * tab-width: 4
1026 * indent-tabs-mode: nil
1027 * End:
1028 */