/root/src/xen/xen/common/domain.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * domain.c |
3 | | * |
4 | | * Generic domain-handling functions. |
5 | | */ |
6 | | |
7 | | #include <xen/compat.h> |
8 | | #include <xen/init.h> |
9 | | #include <xen/lib.h> |
10 | | #include <xen/ctype.h> |
11 | | #include <xen/err.h> |
12 | | #include <xen/sched.h> |
13 | | #include <xen/sched-if.h> |
14 | | #include <xen/domain.h> |
15 | | #include <xen/mm.h> |
16 | | #include <xen/event.h> |
17 | | #include <xen/vm_event.h> |
18 | | #include <xen/time.h> |
19 | | #include <xen/console.h> |
20 | | #include <xen/softirq.h> |
21 | | #include <xen/tasklet.h> |
22 | | #include <xen/domain_page.h> |
23 | | #include <xen/rangeset.h> |
24 | | #include <xen/guest_access.h> |
25 | | #include <xen/hypercall.h> |
26 | | #include <xen/delay.h> |
27 | | #include <xen/shutdown.h> |
28 | | #include <xen/percpu.h> |
29 | | #include <xen/multicall.h> |
30 | | #include <xen/rcupdate.h> |
31 | | #include <xen/wait.h> |
32 | | #include <xen/grant_table.h> |
33 | | #include <xen/xenoprof.h> |
34 | | #include <xen/irq.h> |
35 | | #include <asm/debugger.h> |
36 | | #include <asm/p2m.h> |
37 | | #include <asm/processor.h> |
38 | | #include <public/sched.h> |
39 | | #include <public/sysctl.h> |
40 | | #include <public/vcpu.h> |
41 | | #include <xsm/xsm.h> |
42 | | #include <xen/trace.h> |
43 | | #include <xen/tmem.h> |
44 | | #include <asm/setup.h> |
45 | | |
46 | | /* Linux config option: propageted to domain0 */ |
47 | | /* xen_processor_pmbits: xen control Cx, Px, ... */ |
48 | | unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX; |
49 | | |
50 | | /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */ |
51 | | bool_t opt_dom0_vcpus_pin; |
52 | | boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin); |
53 | | |
54 | | /* Protect updates/reads (resp.) of domain_list and domain_hash. */ |
55 | | DEFINE_SPINLOCK(domlist_update_lock); |
56 | | DEFINE_RCU_READ_LOCK(domlist_read_lock); |
57 | | |
58 | 1 | #define DOMAIN_HASH_SIZE 256 |
59 | 1 | #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1)) |
60 | | static struct domain *domain_hash[DOMAIN_HASH_SIZE]; |
61 | | struct domain *domain_list; |
62 | | |
63 | | struct domain *hardware_domain __read_mostly; |
64 | | |
65 | | #ifdef CONFIG_LATE_HWDOM |
66 | | domid_t hardware_domid __read_mostly; |
67 | | integer_param("hardware_dom", hardware_domid); |
68 | | #endif |
69 | | |
70 | | struct vcpu *idle_vcpu[NR_CPUS] __read_mostly; |
71 | | |
72 | | vcpu_info_t dummy_vcpu_info; |
73 | | |
74 | | static void __domain_finalise_shutdown(struct domain *d) |
75 | 0 | { |
76 | 0 | struct vcpu *v; |
77 | 0 |
|
78 | 0 | BUG_ON(!spin_is_locked(&d->shutdown_lock)); |
79 | 0 |
|
80 | 0 | if ( d->is_shut_down ) |
81 | 0 | return; |
82 | 0 |
|
83 | 0 | for_each_vcpu ( d, v ) |
84 | 0 | if ( !v->paused_for_shutdown ) |
85 | 0 | return; |
86 | 0 |
|
87 | 0 | d->is_shut_down = 1; |
88 | 0 | if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn ) |
89 | 0 | evtchn_send(d, d->suspend_evtchn); |
90 | 0 | else |
91 | 0 | send_global_virq(VIRQ_DOM_EXC); |
92 | 0 | } |
93 | | |
94 | | static void vcpu_check_shutdown(struct vcpu *v) |
95 | 24 | { |
96 | 24 | struct domain *d = v->domain; |
97 | 24 | |
98 | 24 | spin_lock(&d->shutdown_lock); |
99 | 24 | |
100 | 24 | if ( d->is_shutting_down ) |
101 | 0 | { |
102 | 0 | if ( !v->paused_for_shutdown ) |
103 | 0 | vcpu_pause_nosync(v); |
104 | 0 | v->paused_for_shutdown = 1; |
105 | 0 | v->defer_shutdown = 0; |
106 | 0 | __domain_finalise_shutdown(d); |
107 | 0 | } |
108 | 24 | |
109 | 24 | spin_unlock(&d->shutdown_lock); |
110 | 24 | } |
111 | | |
112 | | static void vcpu_info_reset(struct vcpu *v) |
113 | 12 | { |
114 | 12 | struct domain *d = v->domain; |
115 | 12 | |
116 | 12 | v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS) |
117 | 12 | ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id]) |
118 | 0 | : &dummy_vcpu_info); |
119 | 12 | v->vcpu_info_mfn = INVALID_MFN; |
120 | 12 | } |
121 | | |
122 | | struct vcpu *alloc_vcpu( |
123 | | struct domain *d, unsigned int vcpu_id, unsigned int cpu_id) |
124 | 24 | { |
125 | 24 | struct vcpu *v; |
126 | 24 | |
127 | 24 | BUG_ON((!is_idle_domain(d) || vcpu_id) && d->vcpu[vcpu_id]); |
128 | 24 | |
129 | 24 | if ( (v = alloc_vcpu_struct()) == NULL ) |
130 | 0 | return NULL; |
131 | 24 | |
132 | 24 | v->domain = d; |
133 | 24 | v->vcpu_id = vcpu_id; |
134 | 24 | |
135 | 24 | spin_lock_init(&v->virq_lock); |
136 | 24 | |
137 | 24 | tasklet_init(&v->continue_hypercall_tasklet, NULL, 0); |
138 | 24 | |
139 | 24 | grant_table_init_vcpu(v); |
140 | 24 | |
141 | 24 | if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) || |
142 | 24 | !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) || |
143 | 24 | !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) || |
144 | 24 | !zalloc_cpumask_var(&v->cpu_soft_affinity) || |
145 | 24 | !zalloc_cpumask_var(&v->vcpu_dirty_cpumask) ) |
146 | 0 | goto fail_free; |
147 | 24 | |
148 | 24 | if ( is_idle_domain(d) ) |
149 | 12 | { |
150 | 12 | v->runstate.state = RUNSTATE_running; |
151 | 12 | } |
152 | 24 | else |
153 | 12 | { |
154 | 12 | v->runstate.state = RUNSTATE_offline; |
155 | 12 | v->runstate.state_entry_time = NOW(); |
156 | 12 | set_bit(_VPF_down, &v->pause_flags); |
157 | 12 | vcpu_info_reset(v); |
158 | 12 | init_waitqueue_vcpu(v); |
159 | 12 | } |
160 | 24 | |
161 | 24 | if ( sched_init_vcpu(v, cpu_id) != 0 ) |
162 | 0 | goto fail_wq; |
163 | 24 | |
164 | 24 | if ( vcpu_initialise(v) != 0 ) |
165 | 0 | { |
166 | 0 | sched_destroy_vcpu(v); |
167 | 0 | fail_wq: |
168 | 0 | destroy_waitqueue_vcpu(v); |
169 | 0 | fail_free: |
170 | 0 | free_cpumask_var(v->cpu_hard_affinity); |
171 | 0 | free_cpumask_var(v->cpu_hard_affinity_tmp); |
172 | 0 | free_cpumask_var(v->cpu_hard_affinity_saved); |
173 | 0 | free_cpumask_var(v->cpu_soft_affinity); |
174 | 0 | free_cpumask_var(v->vcpu_dirty_cpumask); |
175 | 0 | free_vcpu_struct(v); |
176 | 0 | return NULL; |
177 | 0 | } |
178 | 24 | |
179 | 24 | d->vcpu[vcpu_id] = v; |
180 | 24 | if ( vcpu_id != 0 ) |
181 | 22 | { |
182 | 22 | int prev_id = v->vcpu_id - 1; |
183 | 22 | while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) ) |
184 | 0 | prev_id--; |
185 | 22 | BUG_ON(prev_id < 0); |
186 | 22 | v->next_in_list = d->vcpu[prev_id]->next_in_list; |
187 | 22 | d->vcpu[prev_id]->next_in_list = v; |
188 | 22 | } |
189 | 24 | |
190 | 24 | /* Must be called after making new vcpu visible to for_each_vcpu(). */ |
191 | 24 | vcpu_check_shutdown(v); |
192 | 24 | |
193 | 24 | if ( !is_idle_domain(d) ) |
194 | 12 | domain_update_node_affinity(d); |
195 | 24 | |
196 | 24 | return v; |
197 | 24 | } |
198 | | |
199 | | static int late_hwdom_init(struct domain *d) |
200 | 2 | { |
201 | 2 | #ifdef CONFIG_LATE_HWDOM |
202 | | struct domain *dom0; |
203 | | int rv; |
204 | | |
205 | | if ( d != hardware_domain || d->domain_id == 0 ) |
206 | | return 0; |
207 | | |
208 | | rv = xsm_init_hardware_domain(XSM_HOOK, d); |
209 | | if ( rv ) |
210 | | return rv; |
211 | | |
212 | | printk("Initialising hardware domain %d\n", hardware_domid); |
213 | | |
214 | | dom0 = rcu_lock_domain_by_id(0); |
215 | | ASSERT(dom0 != NULL); |
216 | | /* |
217 | | * Hardware resource ranges for domain 0 have been set up from |
218 | | * various sources intended to restrict the hardware domain's |
219 | | * access. Apply these ranges to the actual hardware domain. |
220 | | * |
221 | | * Because the lists are being swapped, a side effect of this |
222 | | * operation is that Domain 0's rangesets are cleared. Since |
223 | | * domain 0 should not be accessing the hardware when it constructs |
224 | | * a hardware domain, this should not be a problem. Both lists |
225 | | * may be modified after this hypercall returns if a more complex |
226 | | * device model is desired. |
227 | | */ |
228 | | rangeset_swap(d->irq_caps, dom0->irq_caps); |
229 | | rangeset_swap(d->iomem_caps, dom0->iomem_caps); |
230 | | #ifdef CONFIG_X86 |
231 | | rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps); |
232 | | setup_io_bitmap(d); |
233 | | setup_io_bitmap(dom0); |
234 | | #endif |
235 | | |
236 | | rcu_unlock_domain(dom0); |
237 | | |
238 | | iommu_hwdom_init(d); |
239 | | |
240 | | return rv; |
241 | | #else |
242 | 2 | return 0; |
243 | 2 | #endif |
244 | 2 | } |
245 | | |
246 | | static unsigned int __read_mostly extra_hwdom_irqs; |
247 | | static unsigned int __read_mostly extra_domU_irqs = 32; |
248 | | |
249 | | static int __init parse_extra_guest_irqs(const char *s) |
250 | 0 | { |
251 | 0 | if ( isdigit(*s) ) |
252 | 0 | extra_domU_irqs = simple_strtoul(s, &s, 0); |
253 | 0 | if ( *s == ',' && isdigit(*++s) ) |
254 | 0 | extra_hwdom_irqs = simple_strtoul(s, &s, 0); |
255 | 0 |
|
256 | 0 | return *s ? -EINVAL : 0; |
257 | 0 | } |
258 | | custom_param("extra_guest_irqs", parse_extra_guest_irqs); |
259 | | |
260 | | struct domain *domain_create(domid_t domid, unsigned int domcr_flags, |
261 | | uint32_t ssidref, |
262 | | struct xen_arch_domainconfig *config) |
263 | 5 | { |
264 | 5 | struct domain *d, **pd, *old_hwdom = NULL; |
265 | 5 | enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2, |
266 | 5 | INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 }; |
267 | 5 | int err, init_status = 0; |
268 | 5 | int poolid = CPUPOOLID_NONE; |
269 | 5 | |
270 | 5 | if ( (d = alloc_domain_struct()) == NULL ) |
271 | 0 | return ERR_PTR(-ENOMEM); |
272 | 5 | |
273 | 5 | d->domain_id = domid; |
274 | 5 | |
275 | 5 | TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id); |
276 | 5 | |
277 | 5 | lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain"); |
278 | 5 | |
279 | 5 | if ( (err = xsm_alloc_security_domain(d)) != 0 ) |
280 | 0 | goto fail; |
281 | 5 | init_status |= INIT_xsm; |
282 | 5 | |
283 | 5 | watchdog_domain_init(d); |
284 | 5 | init_status |= INIT_watchdog; |
285 | 5 | |
286 | 5 | atomic_set(&d->refcnt, 1); |
287 | 5 | spin_lock_init_prof(d, domain_lock); |
288 | 5 | spin_lock_init_prof(d, page_alloc_lock); |
289 | 5 | spin_lock_init(&d->hypercall_deadlock_mutex); |
290 | 5 | INIT_PAGE_LIST_HEAD(&d->page_list); |
291 | 5 | INIT_PAGE_LIST_HEAD(&d->xenpage_list); |
292 | 5 | |
293 | 5 | spin_lock_init(&d->node_affinity_lock); |
294 | 5 | d->node_affinity = NODE_MASK_ALL; |
295 | 5 | d->auto_node_affinity = 1; |
296 | 5 | |
297 | 5 | spin_lock_init(&d->shutdown_lock); |
298 | 5 | d->shutdown_code = SHUTDOWN_CODE_INVALID; |
299 | 5 | |
300 | 5 | spin_lock_init(&d->pbuf_lock); |
301 | 5 | |
302 | 5 | rwlock_init(&d->vnuma_rwlock); |
303 | 5 | |
304 | 5 | err = -ENOMEM; |
305 | 5 | if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) ) |
306 | 0 | goto fail; |
307 | 5 | |
308 | 5 | if ( domcr_flags & DOMCRF_hvm ) |
309 | 1 | d->guest_type = guest_type_hvm; |
310 | 5 | else |
311 | 4 | d->guest_type = guest_type_pv; |
312 | 5 | |
313 | 5 | if ( domid == 0 || domid == hardware_domid ) |
314 | 1 | { |
315 | 1 | if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED ) |
316 | 0 | panic("The value of hardware_dom must be a valid domain ID"); |
317 | 1 | d->is_pinned = opt_dom0_vcpus_pin; |
318 | 1 | d->disable_migrate = 1; |
319 | 1 | old_hwdom = hardware_domain; |
320 | 1 | hardware_domain = d; |
321 | 1 | } |
322 | 5 | |
323 | 5 | if ( domcr_flags & DOMCRF_xs_domain ) |
324 | 0 | { |
325 | 0 | d->is_xenstore = 1; |
326 | 0 | d->disable_migrate = 1; |
327 | 0 | } |
328 | 5 | |
329 | 5 | rangeset_domain_initialise(d); |
330 | 5 | init_status |= INIT_rangeset; |
331 | 5 | |
332 | 5 | d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex); |
333 | 5 | d->irq_caps = rangeset_new(d, "Interrupts", 0); |
334 | 5 | if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) ) |
335 | 0 | goto fail; |
336 | 5 | |
337 | 5 | if ( domcr_flags & DOMCRF_dummy ) |
338 | 3 | return d; |
339 | 5 | |
340 | 2 | if ( !is_idle_domain(d) ) |
341 | 1 | { |
342 | 1 | if ( (err = xsm_domain_create(XSM_HOOK, d, ssidref)) != 0 ) |
343 | 0 | goto fail; |
344 | 1 | |
345 | 1 | d->controller_pause_count = 1; |
346 | 1 | atomic_inc(&d->pause_count); |
347 | 1 | |
348 | 1 | if ( !is_hardware_domain(d) ) |
349 | 0 | d->nr_pirqs = nr_static_irqs + extra_domU_irqs; |
350 | 1 | else |
351 | 1 | d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs |
352 | 1 | : arch_hwdom_irqs(domid); |
353 | 1 | if ( d->nr_pirqs > nr_irqs ) |
354 | 0 | d->nr_pirqs = nr_irqs; |
355 | 1 | |
356 | 1 | radix_tree_init(&d->pirq_tree); |
357 | 1 | |
358 | 1 | if ( (err = evtchn_init(d)) != 0 ) |
359 | 0 | goto fail; |
360 | 1 | init_status |= INIT_evtchn; |
361 | 1 | |
362 | 1 | if ( (err = grant_table_create(d)) != 0 ) |
363 | 0 | goto fail; |
364 | 1 | init_status |= INIT_gnttab; |
365 | 1 | |
366 | 1 | poolid = 0; |
367 | 1 | |
368 | 1 | err = -ENOMEM; |
369 | 1 | |
370 | 1 | d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE); |
371 | 1 | if ( !d->pbuf ) |
372 | 0 | goto fail; |
373 | 1 | } |
374 | 2 | |
375 | 2 | if ( (err = arch_domain_create(d, domcr_flags, config)) != 0 ) |
376 | 0 | goto fail; |
377 | 2 | init_status |= INIT_arch; |
378 | 2 | |
379 | 2 | if ( (err = sched_init_domain(d, poolid)) != 0 ) |
380 | 0 | goto fail; |
381 | 2 | |
382 | 2 | if ( (err = late_hwdom_init(d)) != 0 ) |
383 | 0 | goto fail; |
384 | 2 | |
385 | 2 | if ( !is_idle_domain(d) ) |
386 | 1 | { |
387 | 1 | spin_lock(&domlist_update_lock); |
388 | 1 | pd = &domain_list; /* NB. domain_list maintained in order of domid. */ |
389 | 1 | for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) |
390 | 0 | if ( (*pd)->domain_id > d->domain_id ) |
391 | 0 | break; |
392 | 1 | d->next_in_list = *pd; |
393 | 1 | d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)]; |
394 | 1 | rcu_assign_pointer(*pd, d); |
395 | 1 | rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d); |
396 | 1 | spin_unlock(&domlist_update_lock); |
397 | 1 | } |
398 | 2 | |
399 | 2 | return d; |
400 | 2 | |
401 | 0 | fail: |
402 | 0 | d->is_dying = DOMDYING_dead; |
403 | 0 | if ( hardware_domain == d ) |
404 | 0 | hardware_domain = old_hwdom; |
405 | 0 | atomic_set(&d->refcnt, DOMAIN_DESTROYED); |
406 | 0 | xfree(d->pbuf); |
407 | 0 | if ( init_status & INIT_arch ) |
408 | 0 | arch_domain_destroy(d); |
409 | 0 | if ( init_status & INIT_gnttab ) |
410 | 0 | grant_table_destroy(d); |
411 | 0 | if ( init_status & INIT_evtchn ) |
412 | 0 | { |
413 | 0 | evtchn_destroy(d); |
414 | 0 | evtchn_destroy_final(d); |
415 | 0 | radix_tree_destroy(&d->pirq_tree, free_pirq_struct); |
416 | 0 | } |
417 | 0 | if ( init_status & INIT_rangeset ) |
418 | 0 | rangeset_domain_destroy(d); |
419 | 0 | if ( init_status & INIT_watchdog ) |
420 | 0 | watchdog_domain_destroy(d); |
421 | 0 | if ( init_status & INIT_xsm ) |
422 | 0 | xsm_free_security_domain(d); |
423 | 0 | free_cpumask_var(d->domain_dirty_cpumask); |
424 | 0 | free_domain_struct(d); |
425 | 0 | return ERR_PTR(err); |
426 | 2 | } |
427 | | |
428 | | |
429 | | void domain_update_node_affinity(struct domain *d) |
430 | 12 | { |
431 | 12 | cpumask_var_t dom_cpumask, dom_cpumask_soft; |
432 | 12 | cpumask_t *dom_affinity; |
433 | 12 | const cpumask_t *online; |
434 | 12 | struct vcpu *v; |
435 | 12 | unsigned int cpu; |
436 | 12 | |
437 | 12 | /* Do we have vcpus already? If not, no need to update node-affinity. */ |
438 | 12 | if ( !d->vcpu || !d->vcpu[0] ) |
439 | 0 | return; |
440 | 12 | |
441 | 12 | if ( !zalloc_cpumask_var(&dom_cpumask) ) |
442 | 0 | return; |
443 | 12 | if ( !zalloc_cpumask_var(&dom_cpumask_soft) ) |
444 | 0 | { |
445 | 0 | free_cpumask_var(dom_cpumask); |
446 | 0 | return; |
447 | 0 | } |
448 | 12 | |
449 | 12 | online = cpupool_domain_cpumask(d); |
450 | 12 | |
451 | 12 | spin_lock(&d->node_affinity_lock); |
452 | 12 | |
453 | 12 | /* |
454 | 12 | * If d->auto_node_affinity is true, let's compute the domain's |
455 | 12 | * node-affinity and update d->node_affinity accordingly. if false, |
456 | 12 | * just leave d->auto_node_affinity alone. |
457 | 12 | */ |
458 | 12 | if ( d->auto_node_affinity ) |
459 | 12 | { |
460 | 12 | /* |
461 | 12 | * We want the narrowest possible set of pcpus (to get the narowest |
462 | 12 | * possible set of nodes). What we need is the cpumask of where the |
463 | 12 | * domain can run (the union of the hard affinity of all its vcpus), |
464 | 12 | * and the full mask of where it would prefer to run (the union of |
465 | 12 | * the soft affinity of all its various vcpus). Let's build them. |
466 | 12 | */ |
467 | 12 | for_each_vcpu ( d, v ) |
468 | 78 | { |
469 | 78 | cpumask_or(dom_cpumask, dom_cpumask, v->cpu_hard_affinity); |
470 | 78 | cpumask_or(dom_cpumask_soft, dom_cpumask_soft, |
471 | 78 | v->cpu_soft_affinity); |
472 | 78 | } |
473 | 12 | /* Filter out non-online cpus */ |
474 | 12 | cpumask_and(dom_cpumask, dom_cpumask, online); |
475 | 12 | ASSERT(!cpumask_empty(dom_cpumask)); |
476 | 12 | /* And compute the intersection between hard, online and soft */ |
477 | 12 | cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask); |
478 | 12 | |
479 | 12 | /* |
480 | 12 | * If not empty, the intersection of hard, soft and online is the |
481 | 12 | * narrowest set we want. If empty, we fall back to hard&online. |
482 | 12 | */ |
483 | 12 | dom_affinity = cpumask_empty(dom_cpumask_soft) ? |
484 | 12 | dom_cpumask : dom_cpumask_soft; |
485 | 12 | |
486 | 12 | nodes_clear(d->node_affinity); |
487 | 12 | for_each_cpu ( cpu, dom_affinity ) |
488 | 144 | node_set(cpu_to_node(cpu), d->node_affinity); |
489 | 12 | } |
490 | 12 | |
491 | 12 | spin_unlock(&d->node_affinity_lock); |
492 | 12 | |
493 | 12 | free_cpumask_var(dom_cpumask_soft); |
494 | 12 | free_cpumask_var(dom_cpumask); |
495 | 12 | } |
496 | | |
497 | | |
498 | | int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity) |
499 | 0 | { |
500 | 0 | /* Being affine with no nodes is just wrong */ |
501 | 0 | if ( nodes_empty(*affinity) ) |
502 | 0 | return -EINVAL; |
503 | 0 |
|
504 | 0 | spin_lock(&d->node_affinity_lock); |
505 | 0 |
|
506 | 0 | /* |
507 | 0 | * Being/becoming explicitly affine to all nodes is not particularly |
508 | 0 | * useful. Let's take it as the `reset node affinity` command. |
509 | 0 | */ |
510 | 0 | if ( nodes_full(*affinity) ) |
511 | 0 | { |
512 | 0 | d->auto_node_affinity = 1; |
513 | 0 | goto out; |
514 | 0 | } |
515 | 0 |
|
516 | 0 | d->auto_node_affinity = 0; |
517 | 0 | d->node_affinity = *affinity; |
518 | 0 |
|
519 | 0 | out: |
520 | 0 | spin_unlock(&d->node_affinity_lock); |
521 | 0 |
|
522 | 0 | domain_update_node_affinity(d); |
523 | 0 |
|
524 | 0 | return 0; |
525 | 0 | } |
526 | | |
527 | | |
528 | | struct domain *get_domain_by_id(domid_t dom) |
529 | 0 | { |
530 | 0 | struct domain *d; |
531 | 0 |
|
532 | 0 | rcu_read_lock(&domlist_read_lock); |
533 | 0 |
|
534 | 0 | for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]); |
535 | 0 | d != NULL; |
536 | 0 | d = rcu_dereference(d->next_in_hashbucket) ) |
537 | 0 | { |
538 | 0 | if ( d->domain_id == dom ) |
539 | 0 | { |
540 | 0 | if ( unlikely(!get_domain(d)) ) |
541 | 0 | d = NULL; |
542 | 0 | break; |
543 | 0 | } |
544 | 0 | } |
545 | 0 |
|
546 | 0 | rcu_read_unlock(&domlist_read_lock); |
547 | 0 |
|
548 | 0 | return d; |
549 | 0 | } |
550 | | |
551 | | |
552 | | struct domain *rcu_lock_domain_by_id(domid_t dom) |
553 | 4 | { |
554 | 4 | struct domain *d = NULL; |
555 | 4 | |
556 | 4 | rcu_read_lock(&domlist_read_lock); |
557 | 4 | |
558 | 4 | for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]); |
559 | 4 | d != NULL; |
560 | 0 | d = rcu_dereference(d->next_in_hashbucket) ) |
561 | 1 | { |
562 | 1 | if ( d->domain_id == dom ) |
563 | 1 | { |
564 | 1 | rcu_lock_domain(d); |
565 | 1 | break; |
566 | 1 | } |
567 | 1 | } |
568 | 4 | |
569 | 4 | rcu_read_unlock(&domlist_read_lock); |
570 | 4 | |
571 | 4 | return d; |
572 | 4 | } |
573 | | |
574 | | struct domain *rcu_lock_domain_by_any_id(domid_t dom) |
575 | 11 | { |
576 | 11 | if ( dom == DOMID_SELF ) |
577 | 11 | return rcu_lock_current_domain(); |
578 | 0 | return rcu_lock_domain_by_id(dom); |
579 | 11 | } |
580 | | |
581 | | int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d) |
582 | 0 | { |
583 | 0 | if ( (*d = rcu_lock_domain_by_id(dom)) == NULL ) |
584 | 0 | return -ESRCH; |
585 | 0 |
|
586 | 0 | if ( *d == current->domain ) |
587 | 0 | { |
588 | 0 | rcu_unlock_domain(*d); |
589 | 0 | return -EPERM; |
590 | 0 | } |
591 | 0 |
|
592 | 0 | return 0; |
593 | 0 | } |
594 | | |
595 | | int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d) |
596 | 0 | { |
597 | 0 | int rv; |
598 | 0 | rv = rcu_lock_remote_domain_by_id(dom, d); |
599 | 0 | if ( rv ) |
600 | 0 | return rv; |
601 | 0 | if ( (*d)->is_dying ) |
602 | 0 | { |
603 | 0 | rcu_unlock_domain(*d); |
604 | 0 | return -EINVAL; |
605 | 0 | } |
606 | 0 |
|
607 | 0 | return 0; |
608 | 0 | } |
609 | | |
610 | | int domain_kill(struct domain *d) |
611 | 0 | { |
612 | 0 | int rc = 0; |
613 | 0 | struct vcpu *v; |
614 | 0 |
|
615 | 0 | if ( d == current->domain ) |
616 | 0 | return -EINVAL; |
617 | 0 |
|
618 | 0 | /* Protected by domctl_lock. */ |
619 | 0 | switch ( d->is_dying ) |
620 | 0 | { |
621 | 0 | case DOMDYING_alive: |
622 | 0 | domain_pause(d); |
623 | 0 | d->is_dying = DOMDYING_dying; |
624 | 0 | spin_barrier(&d->domain_lock); |
625 | 0 | evtchn_destroy(d); |
626 | 0 | gnttab_release_mappings(d); |
627 | 0 | tmem_destroy(d->tmem_client); |
628 | 0 | vnuma_destroy(d->vnuma); |
629 | 0 | domain_set_outstanding_pages(d, 0); |
630 | 0 | d->tmem_client = NULL; |
631 | 0 | /* fallthrough */ |
632 | 0 | case DOMDYING_dying: |
633 | 0 | rc = domain_relinquish_resources(d); |
634 | 0 | if ( rc != 0 ) |
635 | 0 | break; |
636 | 0 | if ( cpupool_move_domain(d, cpupool0) ) |
637 | 0 | return -ERESTART; |
638 | 0 | for_each_vcpu ( d, v ) |
639 | 0 | unmap_vcpu_info(v); |
640 | 0 | d->is_dying = DOMDYING_dead; |
641 | 0 | /* Mem event cleanup has to go here because the rings |
642 | 0 | * have to be put before we call put_domain. */ |
643 | 0 | vm_event_cleanup(d); |
644 | 0 | put_domain(d); |
645 | 0 | send_global_virq(VIRQ_DOM_EXC); |
646 | 0 | /* fallthrough */ |
647 | 0 | case DOMDYING_dead: |
648 | 0 | break; |
649 | 0 | } |
650 | 0 |
|
651 | 0 | return rc; |
652 | 0 | } |
653 | | |
654 | | |
655 | | void __domain_crash(struct domain *d) |
656 | 0 | { |
657 | 0 | if ( d->is_shutting_down ) |
658 | 0 | { |
659 | 0 | /* Print nothing: the domain is already shutting down. */ |
660 | 0 | } |
661 | 0 | else if ( d == current->domain ) |
662 | 0 | { |
663 | 0 | printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n", |
664 | 0 | d->domain_id, current->vcpu_id, smp_processor_id()); |
665 | 0 | show_execution_state(guest_cpu_user_regs()); |
666 | 0 | } |
667 | 0 | else |
668 | 0 | { |
669 | 0 | printk("Domain %d reported crashed by domain %d on cpu#%d:\n", |
670 | 0 | d->domain_id, current->domain->domain_id, smp_processor_id()); |
671 | 0 | } |
672 | 0 |
|
673 | 0 | domain_shutdown(d, SHUTDOWN_crash); |
674 | 0 | } |
675 | | |
676 | | |
677 | | void __domain_crash_synchronous(void) |
678 | 0 | { |
679 | 0 | __domain_crash(current->domain); |
680 | 0 |
|
681 | 0 | vcpu_end_shutdown_deferral(current); |
682 | 0 |
|
683 | 0 | for ( ; ; ) |
684 | 0 | do_softirq(); |
685 | 0 | } |
686 | | |
687 | | |
688 | | void domain_shutdown(struct domain *d, u8 reason) |
689 | 0 | { |
690 | 0 | struct vcpu *v; |
691 | 0 |
|
692 | 0 | spin_lock(&d->shutdown_lock); |
693 | 0 |
|
694 | 0 | if ( d->shutdown_code == SHUTDOWN_CODE_INVALID ) |
695 | 0 | d->shutdown_code = reason; |
696 | 0 | reason = d->shutdown_code; |
697 | 0 |
|
698 | 0 | if ( is_hardware_domain(d) ) |
699 | 0 | hwdom_shutdown(reason); |
700 | 0 |
|
701 | 0 | if ( d->is_shutting_down ) |
702 | 0 | { |
703 | 0 | spin_unlock(&d->shutdown_lock); |
704 | 0 | return; |
705 | 0 | } |
706 | 0 |
|
707 | 0 | d->is_shutting_down = 1; |
708 | 0 |
|
709 | 0 | smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */ |
710 | 0 |
|
711 | 0 | for_each_vcpu ( d, v ) |
712 | 0 | { |
713 | 0 | if ( reason == SHUTDOWN_crash ) |
714 | 0 | v->defer_shutdown = 0; |
715 | 0 | else if ( v->defer_shutdown ) |
716 | 0 | continue; |
717 | 0 | vcpu_pause_nosync(v); |
718 | 0 | v->paused_for_shutdown = 1; |
719 | 0 | } |
720 | 0 |
|
721 | 0 | arch_domain_shutdown(d); |
722 | 0 |
|
723 | 0 | __domain_finalise_shutdown(d); |
724 | 0 |
|
725 | 0 | spin_unlock(&d->shutdown_lock); |
726 | 0 | } |
727 | | |
728 | | void domain_resume(struct domain *d) |
729 | 0 | { |
730 | 0 | struct vcpu *v; |
731 | 0 |
|
732 | 0 | /* |
733 | 0 | * Some code paths assume that shutdown status does not get reset under |
734 | 0 | * their feet (e.g., some assertions make this assumption). |
735 | 0 | */ |
736 | 0 | domain_pause(d); |
737 | 0 |
|
738 | 0 | spin_lock(&d->shutdown_lock); |
739 | 0 |
|
740 | 0 | d->is_shutting_down = d->is_shut_down = 0; |
741 | 0 | d->shutdown_code = SHUTDOWN_CODE_INVALID; |
742 | 0 |
|
743 | 0 | for_each_vcpu ( d, v ) |
744 | 0 | { |
745 | 0 | if ( v->paused_for_shutdown ) |
746 | 0 | vcpu_unpause(v); |
747 | 0 | v->paused_for_shutdown = 0; |
748 | 0 | } |
749 | 0 |
|
750 | 0 | spin_unlock(&d->shutdown_lock); |
751 | 0 |
|
752 | 0 | domain_unpause(d); |
753 | 0 | } |
754 | | |
755 | | int vcpu_start_shutdown_deferral(struct vcpu *v) |
756 | 0 | { |
757 | 0 | if ( v->defer_shutdown ) |
758 | 0 | return 1; |
759 | 0 |
|
760 | 0 | v->defer_shutdown = 1; |
761 | 0 | smp_mb(); /* set deferral status /then/ check for shutdown */ |
762 | 0 | if ( unlikely(v->domain->is_shutting_down) ) |
763 | 0 | vcpu_check_shutdown(v); |
764 | 0 |
|
765 | 0 | return v->defer_shutdown; |
766 | 0 | } |
767 | | |
768 | | void vcpu_end_shutdown_deferral(struct vcpu *v) |
769 | 0 | { |
770 | 0 | v->defer_shutdown = 0; |
771 | 0 | smp_mb(); /* clear deferral status /then/ check for shutdown */ |
772 | 0 | if ( unlikely(v->domain->is_shutting_down) ) |
773 | 0 | vcpu_check_shutdown(v); |
774 | 0 | } |
775 | | |
776 | | #ifdef CONFIG_HAS_GDBSX |
777 | | void domain_pause_for_debugger(void) |
778 | 0 | { |
779 | 0 | struct vcpu *curr = current; |
780 | 0 | struct domain *d = curr->domain; |
781 | 0 |
|
782 | 0 | domain_pause_by_systemcontroller_nosync(d); |
783 | 0 |
|
784 | 0 | /* if gdbsx active, we just need to pause the domain */ |
785 | 0 | if ( curr->arch.gdbsx_vcpu_event == 0 ) |
786 | 0 | send_global_virq(VIRQ_DEBUGGER); |
787 | 0 | } |
788 | | #endif |
789 | | |
790 | | /* Complete domain destroy after RCU readers are not holding old references. */ |
791 | | static void complete_domain_destroy(struct rcu_head *head) |
792 | 0 | { |
793 | 0 | struct domain *d = container_of(head, struct domain, rcu); |
794 | 0 | struct vcpu *v; |
795 | 0 | int i; |
796 | 0 |
|
797 | 0 | for ( i = d->max_vcpus - 1; i >= 0; i-- ) |
798 | 0 | { |
799 | 0 | if ( (v = d->vcpu[i]) == NULL ) |
800 | 0 | continue; |
801 | 0 | tasklet_kill(&v->continue_hypercall_tasklet); |
802 | 0 | vcpu_destroy(v); |
803 | 0 | sched_destroy_vcpu(v); |
804 | 0 | destroy_waitqueue_vcpu(v); |
805 | 0 | } |
806 | 0 |
|
807 | 0 | grant_table_destroy(d); |
808 | 0 |
|
809 | 0 | arch_domain_destroy(d); |
810 | 0 |
|
811 | 0 | watchdog_domain_destroy(d); |
812 | 0 |
|
813 | 0 | rangeset_domain_destroy(d); |
814 | 0 |
|
815 | 0 | sched_destroy_domain(d); |
816 | 0 |
|
817 | 0 | /* Free page used by xen oprofile buffer. */ |
818 | 0 | #ifdef CONFIG_XENOPROF |
819 | 0 | free_xenoprof_pages(d); |
820 | 0 | #endif |
821 | 0 |
|
822 | 0 | #ifdef CONFIG_HAS_MEM_PAGING |
823 | 0 | xfree(d->vm_event_paging); |
824 | 0 | #endif |
825 | 0 | xfree(d->vm_event_monitor); |
826 | 0 | #ifdef CONFIG_HAS_MEM_SHARING |
827 | 0 | xfree(d->vm_event_share); |
828 | 0 | #endif |
829 | 0 |
|
830 | 0 | xfree(d->pbuf); |
831 | 0 |
|
832 | 0 | for ( i = d->max_vcpus - 1; i >= 0; i-- ) |
833 | 0 | if ( (v = d->vcpu[i]) != NULL ) |
834 | 0 | { |
835 | 0 | free_cpumask_var(v->cpu_hard_affinity); |
836 | 0 | free_cpumask_var(v->cpu_hard_affinity_tmp); |
837 | 0 | free_cpumask_var(v->cpu_hard_affinity_saved); |
838 | 0 | free_cpumask_var(v->cpu_soft_affinity); |
839 | 0 | free_cpumask_var(v->vcpu_dirty_cpumask); |
840 | 0 | free_vcpu_struct(v); |
841 | 0 | } |
842 | 0 |
|
843 | 0 | if ( d->target != NULL ) |
844 | 0 | put_domain(d->target); |
845 | 0 |
|
846 | 0 | evtchn_destroy_final(d); |
847 | 0 |
|
848 | 0 | radix_tree_destroy(&d->pirq_tree, free_pirq_struct); |
849 | 0 |
|
850 | 0 | xsm_free_security_domain(d); |
851 | 0 | free_cpumask_var(d->domain_dirty_cpumask); |
852 | 0 | xfree(d->vcpu); |
853 | 0 | free_domain_struct(d); |
854 | 0 |
|
855 | 0 | send_global_virq(VIRQ_DOM_EXC); |
856 | 0 | } |
857 | | |
858 | | /* Release resources belonging to task @p. */ |
859 | | void domain_destroy(struct domain *d) |
860 | 0 | { |
861 | 0 | struct domain **pd; |
862 | 0 |
|
863 | 0 | BUG_ON(!d->is_dying); |
864 | 0 |
|
865 | 0 | /* May be already destroyed, or get_domain() can race us. */ |
866 | 0 | if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 ) |
867 | 0 | return; |
868 | 0 |
|
869 | 0 | TRACE_1D(TRC_DOM0_DOM_REM, d->domain_id); |
870 | 0 |
|
871 | 0 | /* Delete from task list and task hashtable. */ |
872 | 0 | spin_lock(&domlist_update_lock); |
873 | 0 | pd = &domain_list; |
874 | 0 | while ( *pd != d ) |
875 | 0 | pd = &(*pd)->next_in_list; |
876 | 0 | rcu_assign_pointer(*pd, d->next_in_list); |
877 | 0 | pd = &domain_hash[DOMAIN_HASH(d->domain_id)]; |
878 | 0 | while ( *pd != d ) |
879 | 0 | pd = &(*pd)->next_in_hashbucket; |
880 | 0 | rcu_assign_pointer(*pd, d->next_in_hashbucket); |
881 | 0 | spin_unlock(&domlist_update_lock); |
882 | 0 |
|
883 | 0 | /* Schedule RCU asynchronous completion of domain destroy. */ |
884 | 0 | call_rcu(&d->rcu, complete_domain_destroy); |
885 | 0 | } |
886 | | |
887 | | void vcpu_pause(struct vcpu *v) |
888 | 315 | { |
889 | 315 | ASSERT(v != current); |
890 | 315 | atomic_inc(&v->pause_count); |
891 | 315 | vcpu_sleep_sync(v); |
892 | 315 | } |
893 | | |
894 | | void vcpu_pause_nosync(struct vcpu *v) |
895 | 44 | { |
896 | 44 | atomic_inc(&v->pause_count); |
897 | 44 | vcpu_sleep_nosync(v); |
898 | 44 | } |
899 | | |
900 | | void vcpu_unpause(struct vcpu *v) |
901 | 359 | { |
902 | 359 | if ( atomic_dec_and_test(&v->pause_count) ) |
903 | 172 | vcpu_wake(v); |
904 | 359 | } |
905 | | |
906 | | int vcpu_pause_by_systemcontroller(struct vcpu *v) |
907 | 0 | { |
908 | 0 | int old, new, prev = v->controller_pause_count; |
909 | 0 |
|
910 | 0 | do |
911 | 0 | { |
912 | 0 | old = prev; |
913 | 0 | new = old + 1; |
914 | 0 |
|
915 | 0 | if ( new > 255 ) |
916 | 0 | return -EOVERFLOW; |
917 | 0 |
|
918 | 0 | prev = cmpxchg(&v->controller_pause_count, old, new); |
919 | 0 | } while ( prev != old ); |
920 | 0 |
|
921 | 0 | vcpu_pause(v); |
922 | 0 |
|
923 | 0 | return 0; |
924 | 0 | } |
925 | | |
926 | | int vcpu_unpause_by_systemcontroller(struct vcpu *v) |
927 | 0 | { |
928 | 0 | int old, new, prev = v->controller_pause_count; |
929 | 0 |
|
930 | 0 | do |
931 | 0 | { |
932 | 0 | old = prev; |
933 | 0 | new = old - 1; |
934 | 0 |
|
935 | 0 | if ( new < 0 ) |
936 | 0 | return -EINVAL; |
937 | 0 |
|
938 | 0 | prev = cmpxchg(&v->controller_pause_count, old, new); |
939 | 0 | } while ( prev != old ); |
940 | 0 |
|
941 | 0 | vcpu_unpause(v); |
942 | 0 |
|
943 | 0 | return 0; |
944 | 0 | } |
945 | | |
946 | | static void do_domain_pause(struct domain *d, |
947 | | void (*sleep_fn)(struct vcpu *v)) |
948 | 1 | { |
949 | 1 | struct vcpu *v; |
950 | 1 | |
951 | 1 | atomic_inc(&d->pause_count); |
952 | 1 | |
953 | 1 | for_each_vcpu( d, v ) |
954 | 0 | sleep_fn(v); |
955 | 1 | |
956 | 1 | arch_domain_pause(d); |
957 | 1 | } |
958 | | |
959 | | void domain_pause(struct domain *d) |
960 | 1 | { |
961 | 1 | ASSERT(d != current->domain); |
962 | 1 | do_domain_pause(d, vcpu_sleep_sync); |
963 | 1 | } |
964 | | |
965 | | void domain_pause_nosync(struct domain *d) |
966 | 0 | { |
967 | 0 | do_domain_pause(d, vcpu_sleep_nosync); |
968 | 0 | } |
969 | | |
970 | | void domain_unpause(struct domain *d) |
971 | 2 | { |
972 | 2 | struct vcpu *v; |
973 | 2 | |
974 | 2 | arch_domain_unpause(d); |
975 | 2 | |
976 | 2 | if ( atomic_dec_and_test(&d->pause_count) ) |
977 | 1 | for_each_vcpu( d, v ) |
978 | 12 | vcpu_wake(v); |
979 | 2 | } |
980 | | |
981 | | int __domain_pause_by_systemcontroller(struct domain *d, |
982 | | void (*pause_fn)(struct domain *d)) |
983 | 0 | { |
984 | 0 | int old, new, prev = d->controller_pause_count; |
985 | 0 |
|
986 | 0 | do |
987 | 0 | { |
988 | 0 | old = prev; |
989 | 0 | new = old + 1; |
990 | 0 |
|
991 | 0 | /* |
992 | 0 | * Limit the toolstack pause count to an arbitrary 255 to prevent the |
993 | 0 | * toolstack overflowing d->pause_count with many repeated hypercalls. |
994 | 0 | */ |
995 | 0 | if ( new > 255 ) |
996 | 0 | return -EOVERFLOW; |
997 | 0 |
|
998 | 0 | prev = cmpxchg(&d->controller_pause_count, old, new); |
999 | 0 | } while ( prev != old ); |
1000 | 0 |
|
1001 | 0 | pause_fn(d); |
1002 | 0 |
|
1003 | 0 | return 0; |
1004 | 0 | } |
1005 | | |
1006 | | int domain_unpause_by_systemcontroller(struct domain *d) |
1007 | 1 | { |
1008 | 1 | int old, new, prev = d->controller_pause_count; |
1009 | 1 | |
1010 | 1 | do |
1011 | 1 | { |
1012 | 1 | old = prev; |
1013 | 1 | new = old - 1; |
1014 | 1 | |
1015 | 1 | if ( new < 0 ) |
1016 | 0 | return -EINVAL; |
1017 | 1 | |
1018 | 1 | prev = cmpxchg(&d->controller_pause_count, old, new); |
1019 | 1 | } while ( prev != old ); |
1020 | 1 | |
1021 | 1 | /* |
1022 | 1 | * d->controller_pause_count is initialised to 1, and the toolstack is |
1023 | 1 | * responsible for making one unpause hypercall when it wishes the guest |
1024 | 1 | * to start running. |
1025 | 1 | * |
1026 | 1 | * All other toolstack operations should make a pair of pause/unpause |
1027 | 1 | * calls and rely on the reference counting here. |
1028 | 1 | * |
1029 | 1 | * Creation is considered finished when the controller reference count |
1030 | 1 | * first drops to 0. |
1031 | 1 | */ |
1032 | 1 | if ( new == 0 ) |
1033 | 1 | d->creation_finished = true; |
1034 | 1 | |
1035 | 1 | domain_unpause(d); |
1036 | 1 | |
1037 | 1 | return 0; |
1038 | 1 | } |
1039 | | |
1040 | | void domain_pause_except_self(struct domain *d) |
1041 | 0 | { |
1042 | 0 | struct vcpu *v, *curr = current; |
1043 | 0 |
|
1044 | 0 | if ( curr->domain == d ) |
1045 | 0 | { |
1046 | 0 | for_each_vcpu( d, v ) |
1047 | 0 | if ( likely(v != curr) ) |
1048 | 0 | vcpu_pause(v); |
1049 | 0 | } |
1050 | 0 | else |
1051 | 0 | domain_pause(d); |
1052 | 0 | } |
1053 | | |
1054 | | void domain_unpause_except_self(struct domain *d) |
1055 | 0 | { |
1056 | 0 | struct vcpu *v, *curr = current; |
1057 | 0 |
|
1058 | 0 | if ( curr->domain == d ) |
1059 | 0 | { |
1060 | 0 | for_each_vcpu( d, v ) |
1061 | 0 | if ( likely(v != curr) ) |
1062 | 0 | vcpu_unpause(v); |
1063 | 0 | } |
1064 | 0 | else |
1065 | 0 | domain_unpause(d); |
1066 | 0 | } |
1067 | | |
1068 | | int domain_soft_reset(struct domain *d) |
1069 | 0 | { |
1070 | 0 | struct vcpu *v; |
1071 | 0 | int rc; |
1072 | 0 |
|
1073 | 0 | spin_lock(&d->shutdown_lock); |
1074 | 0 | for_each_vcpu ( d, v ) |
1075 | 0 | if ( !v->paused_for_shutdown ) |
1076 | 0 | { |
1077 | 0 | spin_unlock(&d->shutdown_lock); |
1078 | 0 | return -EINVAL; |
1079 | 0 | } |
1080 | 0 | spin_unlock(&d->shutdown_lock); |
1081 | 0 |
|
1082 | 0 | rc = evtchn_reset(d); |
1083 | 0 | if ( rc ) |
1084 | 0 | return rc; |
1085 | 0 |
|
1086 | 0 | grant_table_warn_active_grants(d); |
1087 | 0 |
|
1088 | 0 | for_each_vcpu ( d, v ) |
1089 | 0 | { |
1090 | 0 | set_xen_guest_handle(runstate_guest(v), NULL); |
1091 | 0 | unmap_vcpu_info(v); |
1092 | 0 | } |
1093 | 0 |
|
1094 | 0 | rc = arch_domain_soft_reset(d); |
1095 | 0 | if ( !rc ) |
1096 | 0 | domain_resume(d); |
1097 | 0 | else |
1098 | 0 | domain_crash(d); |
1099 | 0 |
|
1100 | 0 | return rc; |
1101 | 0 | } |
1102 | | |
1103 | | int vcpu_reset(struct vcpu *v) |
1104 | 0 | { |
1105 | 0 | struct domain *d = v->domain; |
1106 | 0 | int rc; |
1107 | 0 |
|
1108 | 0 | vcpu_pause(v); |
1109 | 0 | domain_lock(d); |
1110 | 0 |
|
1111 | 0 | set_bit(_VPF_in_reset, &v->pause_flags); |
1112 | 0 | rc = arch_vcpu_reset(v); |
1113 | 0 | if ( rc ) |
1114 | 0 | goto out_unlock; |
1115 | 0 |
|
1116 | 0 | set_bit(_VPF_down, &v->pause_flags); |
1117 | 0 |
|
1118 | 0 | clear_bit(v->vcpu_id, d->poll_mask); |
1119 | 0 | v->poll_evtchn = 0; |
1120 | 0 |
|
1121 | 0 | v->fpu_initialised = 0; |
1122 | 0 | v->fpu_dirtied = 0; |
1123 | 0 | v->is_initialised = 0; |
1124 | 0 | #ifdef VCPU_TRAP_LAST |
1125 | 0 | v->async_exception_mask = 0; |
1126 | 0 | memset(v->async_exception_state, 0, sizeof(v->async_exception_state)); |
1127 | 0 | #endif |
1128 | 0 | cpumask_clear(v->cpu_hard_affinity_tmp); |
1129 | 0 | clear_bit(_VPF_blocked, &v->pause_flags); |
1130 | 0 | clear_bit(_VPF_in_reset, &v->pause_flags); |
1131 | 0 |
|
1132 | 0 | out_unlock: |
1133 | 0 | domain_unlock(v->domain); |
1134 | 0 | vcpu_unpause(v); |
1135 | 0 |
|
1136 | 0 | return rc; |
1137 | 0 | } |
1138 | | |
1139 | | /* |
1140 | | * Map a guest page in and point the vcpu_info pointer at it. This |
1141 | | * makes sure that the vcpu_info is always pointing at a valid piece |
1142 | | * of memory, and it sets a pending event to make sure that a pending |
1143 | | * event doesn't get missed. |
1144 | | */ |
1145 | | int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset) |
1146 | 12 | { |
1147 | 12 | struct domain *d = v->domain; |
1148 | 12 | void *mapping; |
1149 | 12 | vcpu_info_t *new_info; |
1150 | 12 | struct page_info *page; |
1151 | 12 | int i; |
1152 | 12 | |
1153 | 12 | if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) ) |
1154 | 0 | return -EINVAL; |
1155 | 12 | |
1156 | 12 | if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) ) |
1157 | 0 | return -EINVAL; |
1158 | 12 | |
1159 | 12 | /* Run this command on yourself or on other offline VCPUS. */ |
1160 | 12 | if ( (v != current) && !(v->pause_flags & VPF_down) ) |
1161 | 0 | return -EINVAL; |
1162 | 12 | |
1163 | 12 | page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); |
1164 | 12 | if ( !page ) |
1165 | 0 | return -EINVAL; |
1166 | 12 | |
1167 | 12 | if ( !get_page_type(page, PGT_writable_page) ) |
1168 | 0 | { |
1169 | 0 | put_page(page); |
1170 | 0 | return -EINVAL; |
1171 | 0 | } |
1172 | 12 | |
1173 | 12 | mapping = __map_domain_page_global(page); |
1174 | 12 | if ( mapping == NULL ) |
1175 | 0 | { |
1176 | 0 | put_page_and_type(page); |
1177 | 0 | return -ENOMEM; |
1178 | 0 | } |
1179 | 12 | |
1180 | 12 | new_info = (vcpu_info_t *)(mapping + offset); |
1181 | 12 | |
1182 | 12 | if ( v->vcpu_info == &dummy_vcpu_info ) |
1183 | 0 | { |
1184 | 0 | memset(new_info, 0, sizeof(*new_info)); |
1185 | 0 | #ifdef XEN_HAVE_PV_UPCALL_MASK |
1186 | 0 | __vcpu_info(v, new_info, evtchn_upcall_mask) = 1; |
1187 | 0 | #endif |
1188 | 0 | } |
1189 | 12 | else |
1190 | 12 | { |
1191 | 12 | memcpy(new_info, v->vcpu_info, sizeof(*new_info)); |
1192 | 12 | } |
1193 | 12 | |
1194 | 12 | v->vcpu_info = new_info; |
1195 | 12 | v->vcpu_info_mfn = _mfn(page_to_mfn(page)); |
1196 | 12 | |
1197 | 12 | /* Set new vcpu_info pointer /before/ setting pending flags. */ |
1198 | 12 | smp_wmb(); |
1199 | 12 | |
1200 | 12 | /* |
1201 | 12 | * Mark everything as being pending just to make sure nothing gets |
1202 | 12 | * lost. The domain will get a spurious event, but it can cope. |
1203 | 12 | */ |
1204 | 12 | vcpu_info(v, evtchn_upcall_pending) = 1; |
1205 | 780 | for ( i = 0; i < BITS_PER_EVTCHN_WORD(d); i++ ) |
1206 | 768 | set_bit(i, &vcpu_info(v, evtchn_pending_sel)); |
1207 | 12 | arch_evtchn_inject(v); |
1208 | 12 | |
1209 | 12 | return 0; |
1210 | 12 | } |
1211 | | |
1212 | | /* |
1213 | | * Unmap the vcpu info page if the guest decided to place it somewhere |
1214 | | * else. This is used from domain_kill() and domain_soft_reset(). |
1215 | | */ |
1216 | | void unmap_vcpu_info(struct vcpu *v) |
1217 | 0 | { |
1218 | 0 | mfn_t mfn = v->vcpu_info_mfn; |
1219 | 0 |
|
1220 | 0 | if ( mfn_eq(mfn, INVALID_MFN) ) |
1221 | 0 | return; |
1222 | 0 |
|
1223 | 0 | unmap_domain_page_global((void *) |
1224 | 0 | ((unsigned long)v->vcpu_info & PAGE_MASK)); |
1225 | 0 |
|
1226 | 0 | vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */ |
1227 | 0 |
|
1228 | 0 | put_page_and_type(mfn_to_page(mfn_x(mfn))); |
1229 | 0 | } |
1230 | | |
1231 | | int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg) |
1232 | 0 | { |
1233 | 0 | struct vcpu_guest_context *ctxt; |
1234 | 0 | struct domain *d = v->domain; |
1235 | 0 | int rc; |
1236 | 0 |
|
1237 | 0 | if ( (ctxt = alloc_vcpu_guest_context()) == NULL ) |
1238 | 0 | return -ENOMEM; |
1239 | 0 |
|
1240 | 0 | if ( copy_from_guest(ctxt, arg, 1) ) |
1241 | 0 | { |
1242 | 0 | free_vcpu_guest_context(ctxt); |
1243 | 0 | return -EFAULT; |
1244 | 0 | } |
1245 | 0 |
|
1246 | 0 | domain_lock(d); |
1247 | 0 | rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt); |
1248 | 0 | domain_unlock(d); |
1249 | 0 |
|
1250 | 0 | free_vcpu_guest_context(ctxt); |
1251 | 0 |
|
1252 | 0 | return rc; |
1253 | 0 | } |
1254 | | |
1255 | | long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg) |
1256 | 91.2k | { |
1257 | 91.2k | struct domain *d = current->domain; |
1258 | 91.2k | struct vcpu *v; |
1259 | 91.2k | long rc = 0; |
1260 | 91.2k | |
1261 | 91.2k | if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) |
1262 | 0 | return -ENOENT; |
1263 | 91.2k | |
1264 | 91.2k | switch ( cmd ) |
1265 | 91.2k | { |
1266 | 0 | case VCPUOP_initialise: |
1267 | 0 | if ( v->vcpu_info == &dummy_vcpu_info ) |
1268 | 0 | return -EINVAL; |
1269 | 0 |
|
1270 | 0 | rc = arch_initialise_vcpu(v, arg); |
1271 | 0 | if ( rc == -ERESTART ) |
1272 | 0 | rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh", |
1273 | 0 | cmd, vcpuid, arg); |
1274 | 0 |
|
1275 | 0 | break; |
1276 | 0 |
|
1277 | 0 | case VCPUOP_up: { |
1278 | 0 | bool_t wake = 0; |
1279 | 0 | domain_lock(d); |
1280 | 0 | if ( !v->is_initialised ) |
1281 | 0 | rc = -EINVAL; |
1282 | 0 | else |
1283 | 0 | wake = test_and_clear_bit(_VPF_down, &v->pause_flags); |
1284 | 0 | domain_unlock(d); |
1285 | 0 | if ( wake ) |
1286 | 0 | vcpu_wake(v); |
1287 | 0 | break; |
1288 | 0 | } |
1289 | 0 |
|
1290 | 0 | case VCPUOP_down: |
1291 | 0 | if ( !test_and_set_bit(_VPF_down, &v->pause_flags) ) |
1292 | 0 | vcpu_sleep_nosync(v); |
1293 | 0 | break; |
1294 | 0 |
|
1295 | 0 | case VCPUOP_is_up: |
1296 | 0 | rc = !(v->pause_flags & VPF_down); |
1297 | 0 | break; |
1298 | 0 |
|
1299 | 0 | case VCPUOP_get_runstate_info: |
1300 | 0 | { |
1301 | 0 | struct vcpu_runstate_info runstate; |
1302 | 0 | vcpu_runstate_get(v, &runstate); |
1303 | 0 | if ( copy_to_guest(arg, &runstate, 1) ) |
1304 | 0 | rc = -EFAULT; |
1305 | 0 | break; |
1306 | 0 | } |
1307 | 0 |
|
1308 | 0 | case VCPUOP_set_periodic_timer: |
1309 | 0 | { |
1310 | 0 | struct vcpu_set_periodic_timer set; |
1311 | 0 |
|
1312 | 0 | if ( copy_from_guest(&set, arg, 1) ) |
1313 | 0 | return -EFAULT; |
1314 | 0 |
|
1315 | 0 | if ( set.period_ns < MILLISECS(1) ) |
1316 | 0 | return -EINVAL; |
1317 | 0 |
|
1318 | 0 | if ( set.period_ns > STIME_DELTA_MAX ) |
1319 | 0 | return -EINVAL; |
1320 | 0 |
|
1321 | 0 | v->periodic_period = set.period_ns; |
1322 | 0 | vcpu_force_reschedule(v); |
1323 | 0 |
|
1324 | 0 | break; |
1325 | 0 | } |
1326 | 0 |
|
1327 | 14 | case VCPUOP_stop_periodic_timer: |
1328 | 14 | v->periodic_period = 0; |
1329 | 14 | vcpu_force_reschedule(v); |
1330 | 14 | break; |
1331 | 0 |
|
1332 | 90.9k | case VCPUOP_set_singleshot_timer: |
1333 | 90.9k | { |
1334 | 90.9k | struct vcpu_set_singleshot_timer set; |
1335 | 90.9k | |
1336 | 90.9k | if ( v != current ) |
1337 | 0 | return -EINVAL; |
1338 | 90.9k | |
1339 | 90.9k | if ( copy_from_guest(&set, arg, 1) ) |
1340 | 0 | return -EFAULT; |
1341 | 90.9k | |
1342 | 90.9k | if ( (set.flags & VCPU_SSHOTTMR_future) && |
1343 | 0 | (set.timeout_abs_ns < NOW()) ) |
1344 | 0 | return -ETIME; |
1345 | 90.9k | |
1346 | 90.9k | migrate_timer(&v->singleshot_timer, smp_processor_id()); |
1347 | 90.9k | set_timer(&v->singleshot_timer, set.timeout_abs_ns); |
1348 | 90.9k | |
1349 | 90.9k | break; |
1350 | 90.9k | } |
1351 | 90.9k | |
1352 | 0 | case VCPUOP_stop_singleshot_timer: |
1353 | 0 | if ( v != current ) |
1354 | 0 | return -EINVAL; |
1355 | 0 |
|
1356 | 0 | stop_timer(&v->singleshot_timer); |
1357 | 0 |
|
1358 | 0 | break; |
1359 | 0 |
|
1360 | 10 | case VCPUOP_register_vcpu_info: |
1361 | 10 | { |
1362 | 10 | struct vcpu_register_vcpu_info info; |
1363 | 10 | |
1364 | 10 | rc = -EFAULT; |
1365 | 10 | if ( copy_from_guest(&info, arg, 1) ) |
1366 | 0 | break; |
1367 | 10 | |
1368 | 10 | domain_lock(d); |
1369 | 10 | rc = map_vcpu_info(v, info.mfn, info.offset); |
1370 | 10 | domain_unlock(d); |
1371 | 10 | |
1372 | 10 | break; |
1373 | 10 | } |
1374 | 10 | |
1375 | 0 | case VCPUOP_register_runstate_memory_area: |
1376 | 0 | { |
1377 | 0 | struct vcpu_register_runstate_memory_area area; |
1378 | 0 | struct vcpu_runstate_info runstate; |
1379 | 0 |
|
1380 | 0 | rc = -EFAULT; |
1381 | 0 | if ( copy_from_guest(&area, arg, 1) ) |
1382 | 0 | break; |
1383 | 0 |
|
1384 | 0 | if ( !guest_handle_okay(area.addr.h, 1) ) |
1385 | 0 | break; |
1386 | 0 |
|
1387 | 0 | rc = 0; |
1388 | 0 | runstate_guest(v) = area.addr.h; |
1389 | 0 |
|
1390 | 0 | if ( v == current ) |
1391 | 0 | { |
1392 | 0 | __copy_to_guest(runstate_guest(v), &v->runstate, 1); |
1393 | 0 | } |
1394 | 0 | else |
1395 | 0 | { |
1396 | 0 | vcpu_runstate_get(v, &runstate); |
1397 | 0 | __copy_to_guest(runstate_guest(v), &runstate, 1); |
1398 | 0 | } |
1399 | 0 |
|
1400 | 0 | break; |
1401 | 0 | } |
1402 | 0 |
|
1403 | 0 | #ifdef VCPU_TRAP_NMI |
1404 | 0 | case VCPUOP_send_nmi: |
1405 | 0 | if ( !guest_handle_is_null(arg) ) |
1406 | 0 | return -EINVAL; |
1407 | 0 |
|
1408 | 0 | if ( !test_and_set_bool(v->nmi_pending) ) |
1409 | 0 | vcpu_kick(v); |
1410 | 0 |
|
1411 | 0 | break; |
1412 | 0 | #endif |
1413 | 0 |
|
1414 | 0 | default: |
1415 | 0 | rc = arch_do_vcpu_op(cmd, v, arg); |
1416 | 0 | break; |
1417 | 91.2k | } |
1418 | 91.2k | |
1419 | 91.1k | return rc; |
1420 | 91.2k | } |
1421 | | |
1422 | | #ifdef VM_ASSIST_VALID |
1423 | | long vm_assist(struct domain *p, unsigned int cmd, unsigned int type, |
1424 | | unsigned long valid) |
1425 | 0 | { |
1426 | 0 | if ( type >= BITS_PER_LONG || !test_bit(type, &valid) ) |
1427 | 0 | return -EINVAL; |
1428 | 0 |
|
1429 | 0 | switch ( cmd ) |
1430 | 0 | { |
1431 | 0 | case VMASST_CMD_enable: |
1432 | 0 | set_bit(type, &p->vm_assist); |
1433 | 0 | return 0; |
1434 | 0 | case VMASST_CMD_disable: |
1435 | 0 | clear_bit(type, &p->vm_assist); |
1436 | 0 | return 0; |
1437 | 0 | } |
1438 | 0 |
|
1439 | 0 | return -ENOSYS; |
1440 | 0 | } |
1441 | | #endif |
1442 | | |
1443 | | struct pirq *pirq_get_info(struct domain *d, int pirq) |
1444 | 138 | { |
1445 | 138 | struct pirq *info = pirq_info(d, pirq); |
1446 | 138 | |
1447 | 138 | if ( !info && (info = alloc_pirq_struct(d)) != NULL ) |
1448 | 48 | { |
1449 | 48 | info->pirq = pirq; |
1450 | 48 | if ( radix_tree_insert(&d->pirq_tree, pirq, info) ) |
1451 | 0 | { |
1452 | 0 | free_pirq_struct(info); |
1453 | 0 | info = NULL; |
1454 | 0 | } |
1455 | 48 | } |
1456 | 138 | |
1457 | 138 | return info; |
1458 | 138 | } |
1459 | | |
1460 | | static void _free_pirq_struct(struct rcu_head *head) |
1461 | 0 | { |
1462 | 0 | xfree(container_of(head, struct pirq, rcu_head)); |
1463 | 0 | } |
1464 | | |
1465 | | void free_pirq_struct(void *ptr) |
1466 | 0 | { |
1467 | 0 | struct pirq *pirq = ptr; |
1468 | 0 |
|
1469 | 0 | call_rcu(&pirq->rcu_head, _free_pirq_struct); |
1470 | 0 | } |
1471 | | |
1472 | | struct migrate_info { |
1473 | | long (*func)(void *data); |
1474 | | void *data; |
1475 | | struct vcpu *vcpu; |
1476 | | unsigned int cpu; |
1477 | | unsigned int nest; |
1478 | | }; |
1479 | | |
1480 | | static DEFINE_PER_CPU(struct migrate_info *, continue_info); |
1481 | | |
1482 | | static void continue_hypercall_tasklet_handler(unsigned long _info) |
1483 | 0 | { |
1484 | 0 | struct migrate_info *info = (struct migrate_info *)_info; |
1485 | 0 | struct vcpu *v = info->vcpu; |
1486 | 0 |
|
1487 | 0 | /* Wait for vcpu to sleep so that we can access its register state. */ |
1488 | 0 | vcpu_sleep_sync(v); |
1489 | 0 |
|
1490 | 0 | this_cpu(continue_info) = info; |
1491 | 0 | return_reg(v) = (info->cpu == smp_processor_id()) |
1492 | 0 | ? info->func(info->data) : -EINVAL; |
1493 | 0 | this_cpu(continue_info) = NULL; |
1494 | 0 |
|
1495 | 0 | if ( info->nest-- == 0 ) |
1496 | 0 | { |
1497 | 0 | xfree(info); |
1498 | 0 | vcpu_unpause(v); |
1499 | 0 | put_domain(v->domain); |
1500 | 0 | } |
1501 | 0 | } |
1502 | | |
1503 | | int continue_hypercall_on_cpu( |
1504 | | unsigned int cpu, long (*func)(void *data), void *data) |
1505 | 0 | { |
1506 | 0 | struct migrate_info *info; |
1507 | 0 |
|
1508 | 0 | if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) ) |
1509 | 0 | return -EINVAL; |
1510 | 0 |
|
1511 | 0 | info = this_cpu(continue_info); |
1512 | 0 | if ( info == NULL ) |
1513 | 0 | { |
1514 | 0 | struct vcpu *curr = current; |
1515 | 0 |
|
1516 | 0 | info = xmalloc(struct migrate_info); |
1517 | 0 | if ( info == NULL ) |
1518 | 0 | return -ENOMEM; |
1519 | 0 |
|
1520 | 0 | info->vcpu = curr; |
1521 | 0 | info->nest = 0; |
1522 | 0 |
|
1523 | 0 | tasklet_kill( |
1524 | 0 | &curr->continue_hypercall_tasklet); |
1525 | 0 | tasklet_init( |
1526 | 0 | &curr->continue_hypercall_tasklet, |
1527 | 0 | continue_hypercall_tasklet_handler, |
1528 | 0 | (unsigned long)info); |
1529 | 0 |
|
1530 | 0 | get_knownalive_domain(curr->domain); |
1531 | 0 | vcpu_pause_nosync(curr); |
1532 | 0 | } |
1533 | 0 | else |
1534 | 0 | { |
1535 | 0 | BUG_ON(info->nest != 0); |
1536 | 0 | info->nest++; |
1537 | 0 | } |
1538 | 0 |
|
1539 | 0 | info->func = func; |
1540 | 0 | info->data = data; |
1541 | 0 | info->cpu = cpu; |
1542 | 0 |
|
1543 | 0 | tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu); |
1544 | 0 |
|
1545 | 0 | /* Dummy return value will be overwritten by tasklet. */ |
1546 | 0 | return 0; |
1547 | 0 | } |
1548 | | |
1549 | | /* |
1550 | | * Local variables: |
1551 | | * mode: C |
1552 | | * c-file-style: "BSD" |
1553 | | * c-basic-offset: 4 |
1554 | | * tab-width: 4 |
1555 | | * indent-tabs-mode: nil |
1556 | | * End: |
1557 | | */ |