Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/common/domain.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * domain.c
3
 * 
4
 * Generic domain-handling functions.
5
 */
6
7
#include <xen/compat.h>
8
#include <xen/init.h>
9
#include <xen/lib.h>
10
#include <xen/ctype.h>
11
#include <xen/err.h>
12
#include <xen/sched.h>
13
#include <xen/sched-if.h>
14
#include <xen/domain.h>
15
#include <xen/mm.h>
16
#include <xen/event.h>
17
#include <xen/vm_event.h>
18
#include <xen/time.h>
19
#include <xen/console.h>
20
#include <xen/softirq.h>
21
#include <xen/tasklet.h>
22
#include <xen/domain_page.h>
23
#include <xen/rangeset.h>
24
#include <xen/guest_access.h>
25
#include <xen/hypercall.h>
26
#include <xen/delay.h>
27
#include <xen/shutdown.h>
28
#include <xen/percpu.h>
29
#include <xen/multicall.h>
30
#include <xen/rcupdate.h>
31
#include <xen/wait.h>
32
#include <xen/grant_table.h>
33
#include <xen/xenoprof.h>
34
#include <xen/irq.h>
35
#include <asm/debugger.h>
36
#include <asm/p2m.h>
37
#include <asm/processor.h>
38
#include <public/sched.h>
39
#include <public/sysctl.h>
40
#include <public/vcpu.h>
41
#include <xsm/xsm.h>
42
#include <xen/trace.h>
43
#include <xen/tmem.h>
44
#include <asm/setup.h>
45
46
/* Linux config option: propageted to domain0 */
47
/* xen_processor_pmbits: xen control Cx, Px, ... */
48
unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
49
50
/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
51
bool_t opt_dom0_vcpus_pin;
52
boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
53
54
/* Protect updates/reads (resp.) of domain_list and domain_hash. */
55
DEFINE_SPINLOCK(domlist_update_lock);
56
DEFINE_RCU_READ_LOCK(domlist_read_lock);
57
58
1
#define DOMAIN_HASH_SIZE 256
59
1
#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
60
static struct domain *domain_hash[DOMAIN_HASH_SIZE];
61
struct domain *domain_list;
62
63
struct domain *hardware_domain __read_mostly;
64
65
#ifdef CONFIG_LATE_HWDOM
66
domid_t hardware_domid __read_mostly;
67
integer_param("hardware_dom", hardware_domid);
68
#endif
69
70
struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
71
72
vcpu_info_t dummy_vcpu_info;
73
74
static void __domain_finalise_shutdown(struct domain *d)
75
0
{
76
0
    struct vcpu *v;
77
0
78
0
    BUG_ON(!spin_is_locked(&d->shutdown_lock));
79
0
80
0
    if ( d->is_shut_down )
81
0
        return;
82
0
83
0
    for_each_vcpu ( d, v )
84
0
        if ( !v->paused_for_shutdown )
85
0
            return;
86
0
87
0
    d->is_shut_down = 1;
88
0
    if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
89
0
        evtchn_send(d, d->suspend_evtchn);
90
0
    else
91
0
        send_global_virq(VIRQ_DOM_EXC);
92
0
}
93
94
static void vcpu_check_shutdown(struct vcpu *v)
95
24
{
96
24
    struct domain *d = v->domain;
97
24
98
24
    spin_lock(&d->shutdown_lock);
99
24
100
24
    if ( d->is_shutting_down )
101
0
    {
102
0
        if ( !v->paused_for_shutdown )
103
0
            vcpu_pause_nosync(v);
104
0
        v->paused_for_shutdown = 1;
105
0
        v->defer_shutdown = 0;
106
0
        __domain_finalise_shutdown(d);
107
0
    }
108
24
109
24
    spin_unlock(&d->shutdown_lock);
110
24
}
111
112
static void vcpu_info_reset(struct vcpu *v)
113
12
{
114
12
    struct domain *d = v->domain;
115
12
116
12
    v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
117
12
                    ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
118
0
                    : &dummy_vcpu_info);
119
12
    v->vcpu_info_mfn = INVALID_MFN;
120
12
}
121
122
struct vcpu *alloc_vcpu(
123
    struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
124
24
{
125
24
    struct vcpu *v;
126
24
127
24
    BUG_ON((!is_idle_domain(d) || vcpu_id) && d->vcpu[vcpu_id]);
128
24
129
24
    if ( (v = alloc_vcpu_struct()) == NULL )
130
0
        return NULL;
131
24
132
24
    v->domain = d;
133
24
    v->vcpu_id = vcpu_id;
134
24
135
24
    spin_lock_init(&v->virq_lock);
136
24
137
24
    tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
138
24
139
24
    grant_table_init_vcpu(v);
140
24
141
24
    if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
142
24
         !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
143
24
         !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
144
24
         !zalloc_cpumask_var(&v->cpu_soft_affinity) ||
145
24
         !zalloc_cpumask_var(&v->vcpu_dirty_cpumask) )
146
0
        goto fail_free;
147
24
148
24
    if ( is_idle_domain(d) )
149
12
    {
150
12
        v->runstate.state = RUNSTATE_running;
151
12
    }
152
24
    else
153
12
    {
154
12
        v->runstate.state = RUNSTATE_offline;        
155
12
        v->runstate.state_entry_time = NOW();
156
12
        set_bit(_VPF_down, &v->pause_flags);
157
12
        vcpu_info_reset(v);
158
12
        init_waitqueue_vcpu(v);
159
12
    }
160
24
161
24
    if ( sched_init_vcpu(v, cpu_id) != 0 )
162
0
        goto fail_wq;
163
24
164
24
    if ( vcpu_initialise(v) != 0 )
165
0
    {
166
0
        sched_destroy_vcpu(v);
167
0
 fail_wq:
168
0
        destroy_waitqueue_vcpu(v);
169
0
 fail_free:
170
0
        free_cpumask_var(v->cpu_hard_affinity);
171
0
        free_cpumask_var(v->cpu_hard_affinity_tmp);
172
0
        free_cpumask_var(v->cpu_hard_affinity_saved);
173
0
        free_cpumask_var(v->cpu_soft_affinity);
174
0
        free_cpumask_var(v->vcpu_dirty_cpumask);
175
0
        free_vcpu_struct(v);
176
0
        return NULL;
177
0
    }
178
24
179
24
    d->vcpu[vcpu_id] = v;
180
24
    if ( vcpu_id != 0 )
181
22
    {
182
22
        int prev_id = v->vcpu_id - 1;
183
22
        while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
184
0
            prev_id--;
185
22
        BUG_ON(prev_id < 0);
186
22
        v->next_in_list = d->vcpu[prev_id]->next_in_list;
187
22
        d->vcpu[prev_id]->next_in_list = v;
188
22
    }
189
24
190
24
    /* Must be called after making new vcpu visible to for_each_vcpu(). */
191
24
    vcpu_check_shutdown(v);
192
24
193
24
    if ( !is_idle_domain(d) )
194
12
        domain_update_node_affinity(d);
195
24
196
24
    return v;
197
24
}
198
199
static int late_hwdom_init(struct domain *d)
200
2
{
201
2
#ifdef CONFIG_LATE_HWDOM
202
    struct domain *dom0;
203
    int rv;
204
205
    if ( d != hardware_domain || d->domain_id == 0 )
206
        return 0;
207
208
    rv = xsm_init_hardware_domain(XSM_HOOK, d);
209
    if ( rv )
210
        return rv;
211
212
    printk("Initialising hardware domain %d\n", hardware_domid);
213
214
    dom0 = rcu_lock_domain_by_id(0);
215
    ASSERT(dom0 != NULL);
216
    /*
217
     * Hardware resource ranges for domain 0 have been set up from
218
     * various sources intended to restrict the hardware domain's
219
     * access.  Apply these ranges to the actual hardware domain.
220
     *
221
     * Because the lists are being swapped, a side effect of this
222
     * operation is that Domain 0's rangesets are cleared.  Since
223
     * domain 0 should not be accessing the hardware when it constructs
224
     * a hardware domain, this should not be a problem.  Both lists
225
     * may be modified after this hypercall returns if a more complex
226
     * device model is desired.
227
     */
228
    rangeset_swap(d->irq_caps, dom0->irq_caps);
229
    rangeset_swap(d->iomem_caps, dom0->iomem_caps);
230
#ifdef CONFIG_X86
231
    rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
232
    setup_io_bitmap(d);
233
    setup_io_bitmap(dom0);
234
#endif
235
236
    rcu_unlock_domain(dom0);
237
238
    iommu_hwdom_init(d);
239
240
    return rv;
241
#else
242
2
    return 0;
243
2
#endif
244
2
}
245
246
static unsigned int __read_mostly extra_hwdom_irqs;
247
static unsigned int __read_mostly extra_domU_irqs = 32;
248
249
static int __init parse_extra_guest_irqs(const char *s)
250
0
{
251
0
    if ( isdigit(*s) )
252
0
        extra_domU_irqs = simple_strtoul(s, &s, 0);
253
0
    if ( *s == ',' && isdigit(*++s) )
254
0
        extra_hwdom_irqs = simple_strtoul(s, &s, 0);
255
0
256
0
    return *s ? -EINVAL : 0;
257
0
}
258
custom_param("extra_guest_irqs", parse_extra_guest_irqs);
259
260
struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
261
                             uint32_t ssidref,
262
                             struct xen_arch_domainconfig *config)
263
5
{
264
5
    struct domain *d, **pd, *old_hwdom = NULL;
265
5
    enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
266
5
           INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
267
5
    int err, init_status = 0;
268
5
    int poolid = CPUPOOLID_NONE;
269
5
270
5
    if ( (d = alloc_domain_struct()) == NULL )
271
0
        return ERR_PTR(-ENOMEM);
272
5
273
5
    d->domain_id = domid;
274
5
275
5
    TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id);
276
5
277
5
    lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
278
5
279
5
    if ( (err = xsm_alloc_security_domain(d)) != 0 )
280
0
        goto fail;
281
5
    init_status |= INIT_xsm;
282
5
283
5
    watchdog_domain_init(d);
284
5
    init_status |= INIT_watchdog;
285
5
286
5
    atomic_set(&d->refcnt, 1);
287
5
    spin_lock_init_prof(d, domain_lock);
288
5
    spin_lock_init_prof(d, page_alloc_lock);
289
5
    spin_lock_init(&d->hypercall_deadlock_mutex);
290
5
    INIT_PAGE_LIST_HEAD(&d->page_list);
291
5
    INIT_PAGE_LIST_HEAD(&d->xenpage_list);
292
5
293
5
    spin_lock_init(&d->node_affinity_lock);
294
5
    d->node_affinity = NODE_MASK_ALL;
295
5
    d->auto_node_affinity = 1;
296
5
297
5
    spin_lock_init(&d->shutdown_lock);
298
5
    d->shutdown_code = SHUTDOWN_CODE_INVALID;
299
5
300
5
    spin_lock_init(&d->pbuf_lock);
301
5
302
5
    rwlock_init(&d->vnuma_rwlock);
303
5
304
5
    err = -ENOMEM;
305
5
    if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) )
306
0
        goto fail;
307
5
308
5
    if ( domcr_flags & DOMCRF_hvm )
309
1
        d->guest_type = guest_type_hvm;
310
5
    else
311
4
        d->guest_type = guest_type_pv;
312
5
313
5
    if ( domid == 0 || domid == hardware_domid )
314
1
    {
315
1
        if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED )
316
0
            panic("The value of hardware_dom must be a valid domain ID");
317
1
        d->is_pinned = opt_dom0_vcpus_pin;
318
1
        d->disable_migrate = 1;
319
1
        old_hwdom = hardware_domain;
320
1
        hardware_domain = d;
321
1
    }
322
5
323
5
    if ( domcr_flags & DOMCRF_xs_domain )
324
0
    {
325
0
        d->is_xenstore = 1;
326
0
        d->disable_migrate = 1;
327
0
    }
328
5
329
5
    rangeset_domain_initialise(d);
330
5
    init_status |= INIT_rangeset;
331
5
332
5
    d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
333
5
    d->irq_caps   = rangeset_new(d, "Interrupts", 0);
334
5
    if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
335
0
        goto fail;
336
5
337
5
    if ( domcr_flags & DOMCRF_dummy )
338
3
        return d;
339
5
340
2
    if ( !is_idle_domain(d) )
341
1
    {
342
1
        if ( (err = xsm_domain_create(XSM_HOOK, d, ssidref)) != 0 )
343
0
            goto fail;
344
1
345
1
        d->controller_pause_count = 1;
346
1
        atomic_inc(&d->pause_count);
347
1
348
1
        if ( !is_hardware_domain(d) )
349
0
            d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
350
1
        else
351
1
            d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
352
1
                                           : arch_hwdom_irqs(domid);
353
1
        if ( d->nr_pirqs > nr_irqs )
354
0
            d->nr_pirqs = nr_irqs;
355
1
356
1
        radix_tree_init(&d->pirq_tree);
357
1
358
1
        if ( (err = evtchn_init(d)) != 0 )
359
0
            goto fail;
360
1
        init_status |= INIT_evtchn;
361
1
362
1
        if ( (err = grant_table_create(d)) != 0 )
363
0
            goto fail;
364
1
        init_status |= INIT_gnttab;
365
1
366
1
        poolid = 0;
367
1
368
1
        err = -ENOMEM;
369
1
370
1
        d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
371
1
        if ( !d->pbuf )
372
0
            goto fail;
373
1
    }
374
2
375
2
    if ( (err = arch_domain_create(d, domcr_flags, config)) != 0 )
376
0
        goto fail;
377
2
    init_status |= INIT_arch;
378
2
379
2
    if ( (err = sched_init_domain(d, poolid)) != 0 )
380
0
        goto fail;
381
2
382
2
    if ( (err = late_hwdom_init(d)) != 0 )
383
0
        goto fail;
384
2
385
2
    if ( !is_idle_domain(d) )
386
1
    {
387
1
        spin_lock(&domlist_update_lock);
388
1
        pd = &domain_list; /* NB. domain_list maintained in order of domid. */
389
1
        for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
390
0
            if ( (*pd)->domain_id > d->domain_id )
391
0
                break;
392
1
        d->next_in_list = *pd;
393
1
        d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
394
1
        rcu_assign_pointer(*pd, d);
395
1
        rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
396
1
        spin_unlock(&domlist_update_lock);
397
1
    }
398
2
399
2
    return d;
400
2
401
0
 fail:
402
0
    d->is_dying = DOMDYING_dead;
403
0
    if ( hardware_domain == d )
404
0
        hardware_domain = old_hwdom;
405
0
    atomic_set(&d->refcnt, DOMAIN_DESTROYED);
406
0
    xfree(d->pbuf);
407
0
    if ( init_status & INIT_arch )
408
0
        arch_domain_destroy(d);
409
0
    if ( init_status & INIT_gnttab )
410
0
        grant_table_destroy(d);
411
0
    if ( init_status & INIT_evtchn )
412
0
    {
413
0
        evtchn_destroy(d);
414
0
        evtchn_destroy_final(d);
415
0
        radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
416
0
    }
417
0
    if ( init_status & INIT_rangeset )
418
0
        rangeset_domain_destroy(d);
419
0
    if ( init_status & INIT_watchdog )
420
0
        watchdog_domain_destroy(d);
421
0
    if ( init_status & INIT_xsm )
422
0
        xsm_free_security_domain(d);
423
0
    free_cpumask_var(d->domain_dirty_cpumask);
424
0
    free_domain_struct(d);
425
0
    return ERR_PTR(err);
426
2
}
427
428
429
void domain_update_node_affinity(struct domain *d)
430
12
{
431
12
    cpumask_var_t dom_cpumask, dom_cpumask_soft;
432
12
    cpumask_t *dom_affinity;
433
12
    const cpumask_t *online;
434
12
    struct vcpu *v;
435
12
    unsigned int cpu;
436
12
437
12
    /* Do we have vcpus already? If not, no need to update node-affinity. */
438
12
    if ( !d->vcpu || !d->vcpu[0] )
439
0
        return;
440
12
441
12
    if ( !zalloc_cpumask_var(&dom_cpumask) )
442
0
        return;
443
12
    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
444
0
    {
445
0
        free_cpumask_var(dom_cpumask);
446
0
        return;
447
0
    }
448
12
449
12
    online = cpupool_domain_cpumask(d);
450
12
451
12
    spin_lock(&d->node_affinity_lock);
452
12
453
12
    /*
454
12
     * If d->auto_node_affinity is true, let's compute the domain's
455
12
     * node-affinity and update d->node_affinity accordingly. if false,
456
12
     * just leave d->auto_node_affinity alone.
457
12
     */
458
12
    if ( d->auto_node_affinity )
459
12
    {
460
12
        /*
461
12
         * We want the narrowest possible set of pcpus (to get the narowest
462
12
         * possible set of nodes). What we need is the cpumask of where the
463
12
         * domain can run (the union of the hard affinity of all its vcpus),
464
12
         * and the full mask of where it would prefer to run (the union of
465
12
         * the soft affinity of all its various vcpus). Let's build them.
466
12
         */
467
12
        for_each_vcpu ( d, v )
468
78
        {
469
78
            cpumask_or(dom_cpumask, dom_cpumask, v->cpu_hard_affinity);
470
78
            cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
471
78
                       v->cpu_soft_affinity);
472
78
        }
473
12
        /* Filter out non-online cpus */
474
12
        cpumask_and(dom_cpumask, dom_cpumask, online);
475
12
        ASSERT(!cpumask_empty(dom_cpumask));
476
12
        /* And compute the intersection between hard, online and soft */
477
12
        cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
478
12
479
12
        /*
480
12
         * If not empty, the intersection of hard, soft and online is the
481
12
         * narrowest set we want. If empty, we fall back to hard&online.
482
12
         */
483
12
        dom_affinity = cpumask_empty(dom_cpumask_soft) ?
484
12
                           dom_cpumask : dom_cpumask_soft;
485
12
486
12
        nodes_clear(d->node_affinity);
487
12
        for_each_cpu ( cpu, dom_affinity )
488
144
            node_set(cpu_to_node(cpu), d->node_affinity);
489
12
    }
490
12
491
12
    spin_unlock(&d->node_affinity_lock);
492
12
493
12
    free_cpumask_var(dom_cpumask_soft);
494
12
    free_cpumask_var(dom_cpumask);
495
12
}
496
497
498
int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
499
0
{
500
0
    /* Being affine with no nodes is just wrong */
501
0
    if ( nodes_empty(*affinity) )
502
0
        return -EINVAL;
503
0
504
0
    spin_lock(&d->node_affinity_lock);
505
0
506
0
    /*
507
0
     * Being/becoming explicitly affine to all nodes is not particularly
508
0
     * useful. Let's take it as the `reset node affinity` command.
509
0
     */
510
0
    if ( nodes_full(*affinity) )
511
0
    {
512
0
        d->auto_node_affinity = 1;
513
0
        goto out;
514
0
    }
515
0
516
0
    d->auto_node_affinity = 0;
517
0
    d->node_affinity = *affinity;
518
0
519
0
out:
520
0
    spin_unlock(&d->node_affinity_lock);
521
0
522
0
    domain_update_node_affinity(d);
523
0
524
0
    return 0;
525
0
}
526
527
528
struct domain *get_domain_by_id(domid_t dom)
529
0
{
530
0
    struct domain *d;
531
0
532
0
    rcu_read_lock(&domlist_read_lock);
533
0
534
0
    for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
535
0
          d != NULL;
536
0
          d = rcu_dereference(d->next_in_hashbucket) )
537
0
    {
538
0
        if ( d->domain_id == dom )
539
0
        {
540
0
            if ( unlikely(!get_domain(d)) )
541
0
                d = NULL;
542
0
            break;
543
0
        }
544
0
    }
545
0
546
0
    rcu_read_unlock(&domlist_read_lock);
547
0
548
0
    return d;
549
0
}
550
551
552
struct domain *rcu_lock_domain_by_id(domid_t dom)
553
4
{
554
4
    struct domain *d = NULL;
555
4
556
4
    rcu_read_lock(&domlist_read_lock);
557
4
558
4
    for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
559
4
          d != NULL;
560
0
          d = rcu_dereference(d->next_in_hashbucket) )
561
1
    {
562
1
        if ( d->domain_id == dom )
563
1
        {
564
1
            rcu_lock_domain(d);
565
1
            break;
566
1
        }
567
1
    }
568
4
569
4
    rcu_read_unlock(&domlist_read_lock);
570
4
571
4
    return d;
572
4
}
573
574
struct domain *rcu_lock_domain_by_any_id(domid_t dom)
575
11
{
576
11
    if ( dom == DOMID_SELF )
577
11
        return rcu_lock_current_domain();
578
0
    return rcu_lock_domain_by_id(dom);
579
11
}
580
581
int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d)
582
0
{
583
0
    if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
584
0
        return -ESRCH;
585
0
586
0
    if ( *d == current->domain )
587
0
    {
588
0
        rcu_unlock_domain(*d);
589
0
        return -EPERM;
590
0
    }
591
0
592
0
    return 0;
593
0
}
594
595
int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
596
0
{
597
0
    int rv;
598
0
    rv = rcu_lock_remote_domain_by_id(dom, d);
599
0
    if ( rv )
600
0
        return rv;
601
0
    if ( (*d)->is_dying )
602
0
    {
603
0
        rcu_unlock_domain(*d);
604
0
        return -EINVAL;
605
0
    }
606
0
607
0
    return 0;
608
0
}
609
610
int domain_kill(struct domain *d)
611
0
{
612
0
    int rc = 0;
613
0
    struct vcpu *v;
614
0
615
0
    if ( d == current->domain )
616
0
        return -EINVAL;
617
0
618
0
    /* Protected by domctl_lock. */
619
0
    switch ( d->is_dying )
620
0
    {
621
0
    case DOMDYING_alive:
622
0
        domain_pause(d);
623
0
        d->is_dying = DOMDYING_dying;
624
0
        spin_barrier(&d->domain_lock);
625
0
        evtchn_destroy(d);
626
0
        gnttab_release_mappings(d);
627
0
        tmem_destroy(d->tmem_client);
628
0
        vnuma_destroy(d->vnuma);
629
0
        domain_set_outstanding_pages(d, 0);
630
0
        d->tmem_client = NULL;
631
0
        /* fallthrough */
632
0
    case DOMDYING_dying:
633
0
        rc = domain_relinquish_resources(d);
634
0
        if ( rc != 0 )
635
0
            break;
636
0
        if ( cpupool_move_domain(d, cpupool0) )
637
0
            return -ERESTART;
638
0
        for_each_vcpu ( d, v )
639
0
            unmap_vcpu_info(v);
640
0
        d->is_dying = DOMDYING_dead;
641
0
        /* Mem event cleanup has to go here because the rings 
642
0
         * have to be put before we call put_domain. */
643
0
        vm_event_cleanup(d);
644
0
        put_domain(d);
645
0
        send_global_virq(VIRQ_DOM_EXC);
646
0
        /* fallthrough */
647
0
    case DOMDYING_dead:
648
0
        break;
649
0
    }
650
0
651
0
    return rc;
652
0
}
653
654
655
void __domain_crash(struct domain *d)
656
0
{
657
0
    if ( d->is_shutting_down )
658
0
    {
659
0
        /* Print nothing: the domain is already shutting down. */
660
0
    }
661
0
    else if ( d == current->domain )
662
0
    {
663
0
        printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
664
0
               d->domain_id, current->vcpu_id, smp_processor_id());
665
0
        show_execution_state(guest_cpu_user_regs());
666
0
    }
667
0
    else
668
0
    {
669
0
        printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
670
0
               d->domain_id, current->domain->domain_id, smp_processor_id());
671
0
    }
672
0
673
0
    domain_shutdown(d, SHUTDOWN_crash);
674
0
}
675
676
677
void __domain_crash_synchronous(void)
678
0
{
679
0
    __domain_crash(current->domain);
680
0
681
0
    vcpu_end_shutdown_deferral(current);
682
0
683
0
    for ( ; ; )
684
0
        do_softirq();
685
0
}
686
687
688
void domain_shutdown(struct domain *d, u8 reason)
689
0
{
690
0
    struct vcpu *v;
691
0
692
0
    spin_lock(&d->shutdown_lock);
693
0
694
0
    if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
695
0
        d->shutdown_code = reason;
696
0
    reason = d->shutdown_code;
697
0
698
0
    if ( is_hardware_domain(d) )
699
0
        hwdom_shutdown(reason);
700
0
701
0
    if ( d->is_shutting_down )
702
0
    {
703
0
        spin_unlock(&d->shutdown_lock);
704
0
        return;
705
0
    }
706
0
707
0
    d->is_shutting_down = 1;
708
0
709
0
    smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
710
0
711
0
    for_each_vcpu ( d, v )
712
0
    {
713
0
        if ( reason == SHUTDOWN_crash )
714
0
            v->defer_shutdown = 0;
715
0
        else if ( v->defer_shutdown )
716
0
            continue;
717
0
        vcpu_pause_nosync(v);
718
0
        v->paused_for_shutdown = 1;
719
0
    }
720
0
721
0
    arch_domain_shutdown(d);
722
0
723
0
    __domain_finalise_shutdown(d);
724
0
725
0
    spin_unlock(&d->shutdown_lock);
726
0
}
727
728
void domain_resume(struct domain *d)
729
0
{
730
0
    struct vcpu *v;
731
0
732
0
    /*
733
0
     * Some code paths assume that shutdown status does not get reset under
734
0
     * their feet (e.g., some assertions make this assumption).
735
0
     */
736
0
    domain_pause(d);
737
0
738
0
    spin_lock(&d->shutdown_lock);
739
0
740
0
    d->is_shutting_down = d->is_shut_down = 0;
741
0
    d->shutdown_code = SHUTDOWN_CODE_INVALID;
742
0
743
0
    for_each_vcpu ( d, v )
744
0
    {
745
0
        if ( v->paused_for_shutdown )
746
0
            vcpu_unpause(v);
747
0
        v->paused_for_shutdown = 0;
748
0
    }
749
0
750
0
    spin_unlock(&d->shutdown_lock);
751
0
752
0
    domain_unpause(d);
753
0
}
754
755
int vcpu_start_shutdown_deferral(struct vcpu *v)
756
0
{
757
0
    if ( v->defer_shutdown )
758
0
        return 1;
759
0
760
0
    v->defer_shutdown = 1;
761
0
    smp_mb(); /* set deferral status /then/ check for shutdown */
762
0
    if ( unlikely(v->domain->is_shutting_down) )
763
0
        vcpu_check_shutdown(v);
764
0
765
0
    return v->defer_shutdown;
766
0
}
767
768
void vcpu_end_shutdown_deferral(struct vcpu *v)
769
0
{
770
0
    v->defer_shutdown = 0;
771
0
    smp_mb(); /* clear deferral status /then/ check for shutdown */
772
0
    if ( unlikely(v->domain->is_shutting_down) )
773
0
        vcpu_check_shutdown(v);
774
0
}
775
776
#ifdef CONFIG_HAS_GDBSX
777
void domain_pause_for_debugger(void)
778
0
{
779
0
    struct vcpu *curr = current;
780
0
    struct domain *d = curr->domain;
781
0
782
0
    domain_pause_by_systemcontroller_nosync(d);
783
0
784
0
    /* if gdbsx active, we just need to pause the domain */
785
0
    if ( curr->arch.gdbsx_vcpu_event == 0 )
786
0
        send_global_virq(VIRQ_DEBUGGER);
787
0
}
788
#endif
789
790
/* Complete domain destroy after RCU readers are not holding old references. */
791
static void complete_domain_destroy(struct rcu_head *head)
792
0
{
793
0
    struct domain *d = container_of(head, struct domain, rcu);
794
0
    struct vcpu *v;
795
0
    int i;
796
0
797
0
    for ( i = d->max_vcpus - 1; i >= 0; i-- )
798
0
    {
799
0
        if ( (v = d->vcpu[i]) == NULL )
800
0
            continue;
801
0
        tasklet_kill(&v->continue_hypercall_tasklet);
802
0
        vcpu_destroy(v);
803
0
        sched_destroy_vcpu(v);
804
0
        destroy_waitqueue_vcpu(v);
805
0
    }
806
0
807
0
    grant_table_destroy(d);
808
0
809
0
    arch_domain_destroy(d);
810
0
811
0
    watchdog_domain_destroy(d);
812
0
813
0
    rangeset_domain_destroy(d);
814
0
815
0
    sched_destroy_domain(d);
816
0
817
0
    /* Free page used by xen oprofile buffer. */
818
0
#ifdef CONFIG_XENOPROF
819
0
    free_xenoprof_pages(d);
820
0
#endif
821
0
822
0
#ifdef CONFIG_HAS_MEM_PAGING
823
0
    xfree(d->vm_event_paging);
824
0
#endif
825
0
    xfree(d->vm_event_monitor);
826
0
#ifdef CONFIG_HAS_MEM_SHARING
827
0
    xfree(d->vm_event_share);
828
0
#endif
829
0
830
0
    xfree(d->pbuf);
831
0
832
0
    for ( i = d->max_vcpus - 1; i >= 0; i-- )
833
0
        if ( (v = d->vcpu[i]) != NULL )
834
0
        {
835
0
            free_cpumask_var(v->cpu_hard_affinity);
836
0
            free_cpumask_var(v->cpu_hard_affinity_tmp);
837
0
            free_cpumask_var(v->cpu_hard_affinity_saved);
838
0
            free_cpumask_var(v->cpu_soft_affinity);
839
0
            free_cpumask_var(v->vcpu_dirty_cpumask);
840
0
            free_vcpu_struct(v);
841
0
        }
842
0
843
0
    if ( d->target != NULL )
844
0
        put_domain(d->target);
845
0
846
0
    evtchn_destroy_final(d);
847
0
848
0
    radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
849
0
850
0
    xsm_free_security_domain(d);
851
0
    free_cpumask_var(d->domain_dirty_cpumask);
852
0
    xfree(d->vcpu);
853
0
    free_domain_struct(d);
854
0
855
0
    send_global_virq(VIRQ_DOM_EXC);
856
0
}
857
858
/* Release resources belonging to task @p. */
859
void domain_destroy(struct domain *d)
860
0
{
861
0
    struct domain **pd;
862
0
863
0
    BUG_ON(!d->is_dying);
864
0
865
0
    /* May be already destroyed, or get_domain() can race us. */
866
0
    if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 )
867
0
        return;
868
0
869
0
    TRACE_1D(TRC_DOM0_DOM_REM, d->domain_id);
870
0
871
0
    /* Delete from task list and task hashtable. */
872
0
    spin_lock(&domlist_update_lock);
873
0
    pd = &domain_list;
874
0
    while ( *pd != d ) 
875
0
        pd = &(*pd)->next_in_list;
876
0
    rcu_assign_pointer(*pd, d->next_in_list);
877
0
    pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
878
0
    while ( *pd != d ) 
879
0
        pd = &(*pd)->next_in_hashbucket;
880
0
    rcu_assign_pointer(*pd, d->next_in_hashbucket);
881
0
    spin_unlock(&domlist_update_lock);
882
0
883
0
    /* Schedule RCU asynchronous completion of domain destroy. */
884
0
    call_rcu(&d->rcu, complete_domain_destroy);
885
0
}
886
887
void vcpu_pause(struct vcpu *v)
888
315
{
889
315
    ASSERT(v != current);
890
315
    atomic_inc(&v->pause_count);
891
315
    vcpu_sleep_sync(v);
892
315
}
893
894
void vcpu_pause_nosync(struct vcpu *v)
895
44
{
896
44
    atomic_inc(&v->pause_count);
897
44
    vcpu_sleep_nosync(v);
898
44
}
899
900
void vcpu_unpause(struct vcpu *v)
901
359
{
902
359
    if ( atomic_dec_and_test(&v->pause_count) )
903
172
        vcpu_wake(v);
904
359
}
905
906
int vcpu_pause_by_systemcontroller(struct vcpu *v)
907
0
{
908
0
    int old, new, prev = v->controller_pause_count;
909
0
910
0
    do
911
0
    {
912
0
        old = prev;
913
0
        new = old + 1;
914
0
915
0
        if ( new > 255 )
916
0
            return -EOVERFLOW;
917
0
918
0
        prev = cmpxchg(&v->controller_pause_count, old, new);
919
0
    } while ( prev != old );
920
0
921
0
    vcpu_pause(v);
922
0
923
0
    return 0;
924
0
}
925
926
int vcpu_unpause_by_systemcontroller(struct vcpu *v)
927
0
{
928
0
    int old, new, prev = v->controller_pause_count;
929
0
930
0
    do
931
0
    {
932
0
        old = prev;
933
0
        new = old - 1;
934
0
935
0
        if ( new < 0 )
936
0
            return -EINVAL;
937
0
938
0
        prev = cmpxchg(&v->controller_pause_count, old, new);
939
0
    } while ( prev != old );
940
0
941
0
    vcpu_unpause(v);
942
0
943
0
    return 0;
944
0
}
945
946
static void do_domain_pause(struct domain *d,
947
                            void (*sleep_fn)(struct vcpu *v))
948
1
{
949
1
    struct vcpu *v;
950
1
951
1
    atomic_inc(&d->pause_count);
952
1
953
1
    for_each_vcpu( d, v )
954
0
        sleep_fn(v);
955
1
956
1
    arch_domain_pause(d);
957
1
}
958
959
void domain_pause(struct domain *d)
960
1
{
961
1
    ASSERT(d != current->domain);
962
1
    do_domain_pause(d, vcpu_sleep_sync);
963
1
}
964
965
void domain_pause_nosync(struct domain *d)
966
0
{
967
0
    do_domain_pause(d, vcpu_sleep_nosync);
968
0
}
969
970
void domain_unpause(struct domain *d)
971
2
{
972
2
    struct vcpu *v;
973
2
974
2
    arch_domain_unpause(d);
975
2
976
2
    if ( atomic_dec_and_test(&d->pause_count) )
977
1
        for_each_vcpu( d, v )
978
12
            vcpu_wake(v);
979
2
}
980
981
int __domain_pause_by_systemcontroller(struct domain *d,
982
                                       void (*pause_fn)(struct domain *d))
983
0
{
984
0
    int old, new, prev = d->controller_pause_count;
985
0
986
0
    do
987
0
    {
988
0
        old = prev;
989
0
        new = old + 1;
990
0
991
0
        /*
992
0
         * Limit the toolstack pause count to an arbitrary 255 to prevent the
993
0
         * toolstack overflowing d->pause_count with many repeated hypercalls.
994
0
         */
995
0
        if ( new > 255 )
996
0
            return -EOVERFLOW;
997
0
998
0
        prev = cmpxchg(&d->controller_pause_count, old, new);
999
0
    } while ( prev != old );
1000
0
1001
0
    pause_fn(d);
1002
0
1003
0
    return 0;
1004
0
}
1005
1006
int domain_unpause_by_systemcontroller(struct domain *d)
1007
1
{
1008
1
    int old, new, prev = d->controller_pause_count;
1009
1
1010
1
    do
1011
1
    {
1012
1
        old = prev;
1013
1
        new = old - 1;
1014
1
1015
1
        if ( new < 0 )
1016
0
            return -EINVAL;
1017
1
1018
1
        prev = cmpxchg(&d->controller_pause_count, old, new);
1019
1
    } while ( prev != old );
1020
1
1021
1
    /*
1022
1
     * d->controller_pause_count is initialised to 1, and the toolstack is
1023
1
     * responsible for making one unpause hypercall when it wishes the guest
1024
1
     * to start running.
1025
1
     *
1026
1
     * All other toolstack operations should make a pair of pause/unpause
1027
1
     * calls and rely on the reference counting here.
1028
1
     *
1029
1
     * Creation is considered finished when the controller reference count
1030
1
     * first drops to 0.
1031
1
     */
1032
1
    if ( new == 0 )
1033
1
        d->creation_finished = true;
1034
1
1035
1
    domain_unpause(d);
1036
1
1037
1
    return 0;
1038
1
}
1039
1040
void domain_pause_except_self(struct domain *d)
1041
0
{
1042
0
    struct vcpu *v, *curr = current;
1043
0
1044
0
    if ( curr->domain == d )
1045
0
    {
1046
0
        for_each_vcpu( d, v )
1047
0
            if ( likely(v != curr) )
1048
0
                vcpu_pause(v);
1049
0
    }
1050
0
    else
1051
0
        domain_pause(d);
1052
0
}
1053
1054
void domain_unpause_except_self(struct domain *d)
1055
0
{
1056
0
    struct vcpu *v, *curr = current;
1057
0
1058
0
    if ( curr->domain == d )
1059
0
    {
1060
0
        for_each_vcpu( d, v )
1061
0
            if ( likely(v != curr) )
1062
0
                vcpu_unpause(v);
1063
0
    }
1064
0
    else
1065
0
        domain_unpause(d);
1066
0
}
1067
1068
int domain_soft_reset(struct domain *d)
1069
0
{
1070
0
    struct vcpu *v;
1071
0
    int rc;
1072
0
1073
0
    spin_lock(&d->shutdown_lock);
1074
0
    for_each_vcpu ( d, v )
1075
0
        if ( !v->paused_for_shutdown )
1076
0
        {
1077
0
            spin_unlock(&d->shutdown_lock);
1078
0
            return -EINVAL;
1079
0
        }
1080
0
    spin_unlock(&d->shutdown_lock);
1081
0
1082
0
    rc = evtchn_reset(d);
1083
0
    if ( rc )
1084
0
        return rc;
1085
0
1086
0
    grant_table_warn_active_grants(d);
1087
0
1088
0
    for_each_vcpu ( d, v )
1089
0
    {
1090
0
        set_xen_guest_handle(runstate_guest(v), NULL);
1091
0
        unmap_vcpu_info(v);
1092
0
    }
1093
0
1094
0
    rc = arch_domain_soft_reset(d);
1095
0
    if ( !rc )
1096
0
        domain_resume(d);
1097
0
    else
1098
0
        domain_crash(d);
1099
0
1100
0
    return rc;
1101
0
}
1102
1103
int vcpu_reset(struct vcpu *v)
1104
0
{
1105
0
    struct domain *d = v->domain;
1106
0
    int rc;
1107
0
1108
0
    vcpu_pause(v);
1109
0
    domain_lock(d);
1110
0
1111
0
    set_bit(_VPF_in_reset, &v->pause_flags);
1112
0
    rc = arch_vcpu_reset(v);
1113
0
    if ( rc )
1114
0
        goto out_unlock;
1115
0
1116
0
    set_bit(_VPF_down, &v->pause_flags);
1117
0
1118
0
    clear_bit(v->vcpu_id, d->poll_mask);
1119
0
    v->poll_evtchn = 0;
1120
0
1121
0
    v->fpu_initialised = 0;
1122
0
    v->fpu_dirtied     = 0;
1123
0
    v->is_initialised  = 0;
1124
0
#ifdef VCPU_TRAP_LAST
1125
0
    v->async_exception_mask = 0;
1126
0
    memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
1127
0
#endif
1128
0
    cpumask_clear(v->cpu_hard_affinity_tmp);
1129
0
    clear_bit(_VPF_blocked, &v->pause_flags);
1130
0
    clear_bit(_VPF_in_reset, &v->pause_flags);
1131
0
1132
0
 out_unlock:
1133
0
    domain_unlock(v->domain);
1134
0
    vcpu_unpause(v);
1135
0
1136
0
    return rc;
1137
0
}
1138
1139
/*
1140
 * Map a guest page in and point the vcpu_info pointer at it.  This
1141
 * makes sure that the vcpu_info is always pointing at a valid piece
1142
 * of memory, and it sets a pending event to make sure that a pending
1143
 * event doesn't get missed.
1144
 */
1145
int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
1146
12
{
1147
12
    struct domain *d = v->domain;
1148
12
    void *mapping;
1149
12
    vcpu_info_t *new_info;
1150
12
    struct page_info *page;
1151
12
    int i;
1152
12
1153
12
    if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
1154
0
        return -EINVAL;
1155
12
1156
12
    if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
1157
0
        return -EINVAL;
1158
12
1159
12
    /* Run this command on yourself or on other offline VCPUS. */
1160
12
    if ( (v != current) && !(v->pause_flags & VPF_down) )
1161
0
        return -EINVAL;
1162
12
1163
12
    page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
1164
12
    if ( !page )
1165
0
        return -EINVAL;
1166
12
1167
12
    if ( !get_page_type(page, PGT_writable_page) )
1168
0
    {
1169
0
        put_page(page);
1170
0
        return -EINVAL;
1171
0
    }
1172
12
1173
12
    mapping = __map_domain_page_global(page);
1174
12
    if ( mapping == NULL )
1175
0
    {
1176
0
        put_page_and_type(page);
1177
0
        return -ENOMEM;
1178
0
    }
1179
12
1180
12
    new_info = (vcpu_info_t *)(mapping + offset);
1181
12
1182
12
    if ( v->vcpu_info == &dummy_vcpu_info )
1183
0
    {
1184
0
        memset(new_info, 0, sizeof(*new_info));
1185
0
#ifdef XEN_HAVE_PV_UPCALL_MASK
1186
0
        __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
1187
0
#endif
1188
0
    }
1189
12
    else
1190
12
    {
1191
12
        memcpy(new_info, v->vcpu_info, sizeof(*new_info));
1192
12
    }
1193
12
1194
12
    v->vcpu_info = new_info;
1195
12
    v->vcpu_info_mfn = _mfn(page_to_mfn(page));
1196
12
1197
12
    /* Set new vcpu_info pointer /before/ setting pending flags. */
1198
12
    smp_wmb();
1199
12
1200
12
    /*
1201
12
     * Mark everything as being pending just to make sure nothing gets
1202
12
     * lost.  The domain will get a spurious event, but it can cope.
1203
12
     */
1204
12
    vcpu_info(v, evtchn_upcall_pending) = 1;
1205
780
    for ( i = 0; i < BITS_PER_EVTCHN_WORD(d); i++ )
1206
768
        set_bit(i, &vcpu_info(v, evtchn_pending_sel));
1207
12
    arch_evtchn_inject(v);
1208
12
1209
12
    return 0;
1210
12
}
1211
1212
/*
1213
 * Unmap the vcpu info page if the guest decided to place it somewhere
1214
 * else. This is used from domain_kill() and domain_soft_reset().
1215
 */
1216
void unmap_vcpu_info(struct vcpu *v)
1217
0
{
1218
0
    mfn_t mfn = v->vcpu_info_mfn;
1219
0
1220
0
    if ( mfn_eq(mfn, INVALID_MFN) )
1221
0
        return;
1222
0
1223
0
    unmap_domain_page_global((void *)
1224
0
                             ((unsigned long)v->vcpu_info & PAGE_MASK));
1225
0
1226
0
    vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
1227
0
1228
0
    put_page_and_type(mfn_to_page(mfn_x(mfn)));
1229
0
}
1230
1231
int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1232
0
{
1233
0
    struct vcpu_guest_context *ctxt;
1234
0
    struct domain *d = v->domain;
1235
0
    int rc;
1236
0
1237
0
    if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
1238
0
        return -ENOMEM;
1239
0
1240
0
    if ( copy_from_guest(ctxt, arg, 1) )
1241
0
    {
1242
0
        free_vcpu_guest_context(ctxt);
1243
0
        return -EFAULT;
1244
0
    }
1245
0
1246
0
    domain_lock(d);
1247
0
    rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
1248
0
    domain_unlock(d);
1249
0
1250
0
    free_vcpu_guest_context(ctxt);
1251
0
1252
0
    return rc;
1253
0
}
1254
1255
long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1256
91.2k
{
1257
91.2k
    struct domain *d = current->domain;
1258
91.2k
    struct vcpu *v;
1259
91.2k
    long rc = 0;
1260
91.2k
1261
91.2k
    if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1262
0
        return -ENOENT;
1263
91.2k
1264
91.2k
    switch ( cmd )
1265
91.2k
    {
1266
0
    case VCPUOP_initialise:
1267
0
        if ( v->vcpu_info == &dummy_vcpu_info )
1268
0
            return -EINVAL;
1269
0
1270
0
        rc = arch_initialise_vcpu(v, arg);
1271
0
        if ( rc == -ERESTART )
1272
0
            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh",
1273
0
                                               cmd, vcpuid, arg);
1274
0
1275
0
        break;
1276
0
1277
0
    case VCPUOP_up: {
1278
0
        bool_t wake = 0;
1279
0
        domain_lock(d);
1280
0
        if ( !v->is_initialised )
1281
0
            rc = -EINVAL;
1282
0
        else
1283
0
            wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
1284
0
        domain_unlock(d);
1285
0
        if ( wake )
1286
0
            vcpu_wake(v);
1287
0
        break;
1288
0
    }
1289
0
1290
0
    case VCPUOP_down:
1291
0
        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
1292
0
            vcpu_sleep_nosync(v);
1293
0
        break;
1294
0
1295
0
    case VCPUOP_is_up:
1296
0
        rc = !(v->pause_flags & VPF_down);
1297
0
        break;
1298
0
1299
0
    case VCPUOP_get_runstate_info:
1300
0
    {
1301
0
        struct vcpu_runstate_info runstate;
1302
0
        vcpu_runstate_get(v, &runstate);
1303
0
        if ( copy_to_guest(arg, &runstate, 1) )
1304
0
            rc = -EFAULT;
1305
0
        break;
1306
0
    }
1307
0
1308
0
    case VCPUOP_set_periodic_timer:
1309
0
    {
1310
0
        struct vcpu_set_periodic_timer set;
1311
0
1312
0
        if ( copy_from_guest(&set, arg, 1) )
1313
0
            return -EFAULT;
1314
0
1315
0
        if ( set.period_ns < MILLISECS(1) )
1316
0
            return -EINVAL;
1317
0
1318
0
        if ( set.period_ns > STIME_DELTA_MAX )
1319
0
            return -EINVAL;
1320
0
1321
0
        v->periodic_period = set.period_ns;
1322
0
        vcpu_force_reschedule(v);
1323
0
1324
0
        break;
1325
0
    }
1326
0
1327
14
    case VCPUOP_stop_periodic_timer:
1328
14
        v->periodic_period = 0;
1329
14
        vcpu_force_reschedule(v);
1330
14
        break;
1331
0
1332
90.9k
    case VCPUOP_set_singleshot_timer:
1333
90.9k
    {
1334
90.9k
        struct vcpu_set_singleshot_timer set;
1335
90.9k
1336
90.9k
        if ( v != current )
1337
0
            return -EINVAL;
1338
90.9k
1339
90.9k
        if ( copy_from_guest(&set, arg, 1) )
1340
0
            return -EFAULT;
1341
90.9k
1342
90.9k
        if ( (set.flags & VCPU_SSHOTTMR_future) &&
1343
0
             (set.timeout_abs_ns < NOW()) )
1344
0
            return -ETIME;
1345
90.9k
1346
90.9k
        migrate_timer(&v->singleshot_timer, smp_processor_id());
1347
90.9k
        set_timer(&v->singleshot_timer, set.timeout_abs_ns);
1348
90.9k
1349
90.9k
        break;
1350
90.9k
    }
1351
90.9k
1352
0
    case VCPUOP_stop_singleshot_timer:
1353
0
        if ( v != current )
1354
0
            return -EINVAL;
1355
0
1356
0
        stop_timer(&v->singleshot_timer);
1357
0
1358
0
        break;
1359
0
1360
10
    case VCPUOP_register_vcpu_info:
1361
10
    {
1362
10
        struct vcpu_register_vcpu_info info;
1363
10
1364
10
        rc = -EFAULT;
1365
10
        if ( copy_from_guest(&info, arg, 1) )
1366
0
            break;
1367
10
1368
10
        domain_lock(d);
1369
10
        rc = map_vcpu_info(v, info.mfn, info.offset);
1370
10
        domain_unlock(d);
1371
10
1372
10
        break;
1373
10
    }
1374
10
1375
0
    case VCPUOP_register_runstate_memory_area:
1376
0
    {
1377
0
        struct vcpu_register_runstate_memory_area area;
1378
0
        struct vcpu_runstate_info runstate;
1379
0
1380
0
        rc = -EFAULT;
1381
0
        if ( copy_from_guest(&area, arg, 1) )
1382
0
            break;
1383
0
1384
0
        if ( !guest_handle_okay(area.addr.h, 1) )
1385
0
            break;
1386
0
1387
0
        rc = 0;
1388
0
        runstate_guest(v) = area.addr.h;
1389
0
1390
0
        if ( v == current )
1391
0
        {
1392
0
            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
1393
0
        }
1394
0
        else
1395
0
        {
1396
0
            vcpu_runstate_get(v, &runstate);
1397
0
            __copy_to_guest(runstate_guest(v), &runstate, 1);
1398
0
        }
1399
0
1400
0
        break;
1401
0
    }
1402
0
1403
0
#ifdef VCPU_TRAP_NMI
1404
0
    case VCPUOP_send_nmi:
1405
0
        if ( !guest_handle_is_null(arg) )
1406
0
            return -EINVAL;
1407
0
1408
0
        if ( !test_and_set_bool(v->nmi_pending) )
1409
0
            vcpu_kick(v);
1410
0
1411
0
        break;
1412
0
#endif
1413
0
1414
0
    default:
1415
0
        rc = arch_do_vcpu_op(cmd, v, arg);
1416
0
        break;
1417
91.2k
    }
1418
91.2k
1419
91.1k
    return rc;
1420
91.2k
}
1421
1422
#ifdef VM_ASSIST_VALID
1423
long vm_assist(struct domain *p, unsigned int cmd, unsigned int type,
1424
               unsigned long valid)
1425
0
{
1426
0
    if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
1427
0
        return -EINVAL;
1428
0
1429
0
    switch ( cmd )
1430
0
    {
1431
0
    case VMASST_CMD_enable:
1432
0
        set_bit(type, &p->vm_assist);
1433
0
        return 0;
1434
0
    case VMASST_CMD_disable:
1435
0
        clear_bit(type, &p->vm_assist);
1436
0
        return 0;
1437
0
    }
1438
0
1439
0
    return -ENOSYS;
1440
0
}
1441
#endif
1442
1443
struct pirq *pirq_get_info(struct domain *d, int pirq)
1444
138
{
1445
138
    struct pirq *info = pirq_info(d, pirq);
1446
138
1447
138
    if ( !info && (info = alloc_pirq_struct(d)) != NULL )
1448
48
    {
1449
48
        info->pirq = pirq;
1450
48
        if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
1451
0
        {
1452
0
            free_pirq_struct(info);
1453
0
            info = NULL;
1454
0
        }
1455
48
    }
1456
138
1457
138
    return info;
1458
138
}
1459
1460
static void _free_pirq_struct(struct rcu_head *head)
1461
0
{
1462
0
    xfree(container_of(head, struct pirq, rcu_head));
1463
0
}
1464
1465
void free_pirq_struct(void *ptr)
1466
0
{
1467
0
    struct pirq *pirq = ptr;
1468
0
1469
0
    call_rcu(&pirq->rcu_head, _free_pirq_struct);
1470
0
}
1471
1472
struct migrate_info {
1473
    long (*func)(void *data);
1474
    void *data;
1475
    struct vcpu *vcpu;
1476
    unsigned int cpu;
1477
    unsigned int nest;
1478
};
1479
1480
static DEFINE_PER_CPU(struct migrate_info *, continue_info);
1481
1482
static void continue_hypercall_tasklet_handler(unsigned long _info)
1483
0
{
1484
0
    struct migrate_info *info = (struct migrate_info *)_info;
1485
0
    struct vcpu *v = info->vcpu;
1486
0
1487
0
    /* Wait for vcpu to sleep so that we can access its register state. */
1488
0
    vcpu_sleep_sync(v);
1489
0
1490
0
    this_cpu(continue_info) = info;
1491
0
    return_reg(v) = (info->cpu == smp_processor_id())
1492
0
        ? info->func(info->data) : -EINVAL;
1493
0
    this_cpu(continue_info) = NULL;
1494
0
1495
0
    if ( info->nest-- == 0 )
1496
0
    {
1497
0
        xfree(info);
1498
0
        vcpu_unpause(v);
1499
0
        put_domain(v->domain);
1500
0
    }
1501
0
}
1502
1503
int continue_hypercall_on_cpu(
1504
    unsigned int cpu, long (*func)(void *data), void *data)
1505
0
{
1506
0
    struct migrate_info *info;
1507
0
1508
0
    if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) )
1509
0
        return -EINVAL;
1510
0
1511
0
    info = this_cpu(continue_info);
1512
0
    if ( info == NULL )
1513
0
    {
1514
0
        struct vcpu *curr = current;
1515
0
1516
0
        info = xmalloc(struct migrate_info);
1517
0
        if ( info == NULL )
1518
0
            return -ENOMEM;
1519
0
1520
0
        info->vcpu = curr;
1521
0
        info->nest = 0;
1522
0
1523
0
        tasklet_kill(
1524
0
            &curr->continue_hypercall_tasklet);
1525
0
        tasklet_init(
1526
0
            &curr->continue_hypercall_tasklet,
1527
0
            continue_hypercall_tasklet_handler,
1528
0
            (unsigned long)info);
1529
0
1530
0
        get_knownalive_domain(curr->domain);
1531
0
        vcpu_pause_nosync(curr);
1532
0
    }
1533
0
    else
1534
0
    {
1535
0
        BUG_ON(info->nest != 0);
1536
0
        info->nest++;
1537
0
    }
1538
0
1539
0
    info->func = func;
1540
0
    info->data = data;
1541
0
    info->cpu  = cpu;
1542
0
1543
0
    tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
1544
0
1545
0
    /* Dummy return value will be overwritten by tasklet. */
1546
0
    return 0;
1547
0
}
1548
1549
/*
1550
 * Local variables:
1551
 * mode: C
1552
 * c-file-style: "BSD"
1553
 * c-basic-offset: 4
1554
 * tab-width: 4
1555
 * indent-tabs-mode: nil
1556
 * End:
1557
 */