Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/common/domctl.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * domctl.c
3
 *
4
 * Domain management operations. For use by node control stack.
5
 *
6
 * Copyright (c) 2002-2006, K A Fraser
7
 */
8
9
#include <xen/types.h>
10
#include <xen/lib.h>
11
#include <xen/err.h>
12
#include <xen/mm.h>
13
#include <xen/sched.h>
14
#include <xen/sched-if.h>
15
#include <xen/domain.h>
16
#include <xen/event.h>
17
#include <xen/grant_table.h>
18
#include <xen/domain_page.h>
19
#include <xen/trace.h>
20
#include <xen/console.h>
21
#include <xen/iocap.h>
22
#include <xen/rcupdate.h>
23
#include <xen/guest_access.h>
24
#include <xen/bitmap.h>
25
#include <xen/paging.h>
26
#include <xen/hypercall.h>
27
#include <xen/vm_event.h>
28
#include <xen/monitor.h>
29
#include <asm/current.h>
30
#include <asm/irq.h>
31
#include <asm/page.h>
32
#include <asm/p2m.h>
33
#include <public/domctl.h>
34
#include <xsm/xsm.h>
35
36
static DEFINE_SPINLOCK(domctl_lock);
37
DEFINE_SPINLOCK(vcpu_alloc_lock);
38
39
static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
40
                                   const unsigned long *bitmap,
41
                                   unsigned int nbits)
42
0
{
43
0
    unsigned int guest_bytes, copy_bytes, i;
44
0
    uint8_t zero = 0;
45
0
    int err = 0;
46
0
    uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
47
0
48
0
    if ( !bytemap )
49
0
        return -ENOMEM;
50
0
51
0
    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
52
0
    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
53
0
54
0
    bitmap_long_to_byte(bytemap, bitmap, nbits);
55
0
56
0
    if ( copy_bytes != 0 )
57
0
        if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
58
0
            err = -EFAULT;
59
0
60
0
    for ( i = copy_bytes; !err && i < guest_bytes; i++ )
61
0
        if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
62
0
            err = -EFAULT;
63
0
64
0
    xfree(bytemap);
65
0
66
0
    return err;
67
0
}
68
69
static int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
70
                                   const struct xenctl_bitmap *xenctl_bitmap,
71
                                   unsigned int nbits)
72
0
{
73
0
    unsigned int guest_bytes, copy_bytes;
74
0
    int err = 0;
75
0
    uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
76
0
77
0
    if ( !bytemap )
78
0
        return -ENOMEM;
79
0
80
0
    guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
81
0
    copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
82
0
83
0
    if ( copy_bytes != 0 )
84
0
    {
85
0
        if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
86
0
            err = -EFAULT;
87
0
        if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
88
0
            bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
89
0
    }
90
0
91
0
    if ( !err )
92
0
        bitmap_byte_to_long(bitmap, bytemap, nbits);
93
0
94
0
    xfree(bytemap);
95
0
96
0
    return err;
97
0
}
98
99
int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
100
                             const cpumask_t *cpumask)
101
0
{
102
0
    return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
103
0
                                   nr_cpu_ids);
104
0
}
105
106
int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
107
                             const struct xenctl_bitmap *xenctl_cpumap)
108
0
{
109
0
    int err = 0;
110
0
111
0
    if ( alloc_cpumask_var(cpumask) ) {
112
0
        err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
113
0
                                      nr_cpu_ids);
114
0
        /* In case of error, cleanup is up to us, as the caller won't care! */
115
0
        if ( err )
116
0
            free_cpumask_var(*cpumask);
117
0
    }
118
0
    else
119
0
        err = -ENOMEM;
120
0
121
0
    return err;
122
0
}
123
124
static int nodemask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_nodemap,
125
                                     const nodemask_t *nodemask)
126
0
{
127
0
    return bitmap_to_xenctl_bitmap(xenctl_nodemap, nodes_addr(*nodemask),
128
0
                                   MAX_NUMNODES);
129
0
}
130
131
static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
132
                                     const struct xenctl_bitmap *xenctl_nodemap)
133
0
{
134
0
    return xenctl_bitmap_to_bitmap(nodes_addr(*nodemask), xenctl_nodemap,
135
0
                                   MAX_NUMNODES);
136
0
}
137
138
static inline int is_free_domid(domid_t dom)
139
0
{
140
0
    struct domain *d;
141
0
142
0
    if ( dom >= DOMID_FIRST_RESERVED )
143
0
        return 0;
144
0
145
0
    if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
146
0
        return 1;
147
0
148
0
    rcu_unlock_domain(d);
149
0
    return 0;
150
0
}
151
152
void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
153
4
{
154
4
    struct vcpu *v;
155
4
    u64 cpu_time = 0;
156
4
    int flags = XEN_DOMINF_blocked;
157
4
    struct vcpu_runstate_info runstate;
158
4
159
4
    info->domain = d->domain_id;
160
4
    info->max_vcpu_id = XEN_INVALID_MAX_VCPU_ID;
161
4
    info->nr_online_vcpus = 0;
162
4
    info->ssidref = 0;
163
4
164
4
    /*
165
4
     * - domain is marked as blocked only if all its vcpus are blocked
166
4
     * - domain is marked as running if any of its vcpus is running
167
4
     */
168
4
    for_each_vcpu ( d, v )
169
48
    {
170
48
        vcpu_runstate_get(v, &runstate);
171
48
        cpu_time += runstate.time[RUNSTATE_running];
172
48
        info->max_vcpu_id = v->vcpu_id;
173
48
        if ( !(v->pause_flags & VPF_down) )
174
48
        {
175
48
            if ( !(v->pause_flags & VPF_blocked) )
176
4
                flags &= ~XEN_DOMINF_blocked;
177
48
            if ( v->is_running )
178
4
                flags |= XEN_DOMINF_running;
179
48
            info->nr_online_vcpus++;
180
48
        }
181
48
    }
182
4
183
4
    info->cpu_time = cpu_time;
184
4
185
4
    info->flags = (info->nr_online_vcpus ? flags : 0) |
186
4
        ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying     : 0) |
187
4
        (d->is_shut_down                ? XEN_DOMINF_shutdown  : 0) |
188
4
        (d->controller_pause_count > 0  ? XEN_DOMINF_paused    : 0) |
189
4
        (d->debugger_attached           ? XEN_DOMINF_debugged  : 0) |
190
4
        (d->is_xenstore                 ? XEN_DOMINF_xs_domain : 0) |
191
4
        d->shutdown_code << XEN_DOMINF_shutdownshift;
192
4
193
4
    switch ( d->guest_type )
194
4
    {
195
4
    case guest_type_hvm:
196
4
        info->flags |= XEN_DOMINF_hvm_guest;
197
4
        break;
198
0
    default:
199
0
        break;
200
4
    }
201
4
202
4
    xsm_security_domaininfo(d, info);
203
4
204
4
    info->tot_pages         = d->tot_pages;
205
4
    info->max_pages         = d->max_pages;
206
4
    info->outstanding_pages = d->outstanding_pages;
207
4
    info->shr_pages         = atomic_read(&d->shr_pages);
208
4
    info->paged_pages       = atomic_read(&d->paged_pages);
209
4
    info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info));
210
4
    BUG_ON(SHARED_M2P(info->shared_info_frame));
211
4
212
4
    info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
213
4
214
4
    memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
215
4
216
4
    arch_get_domain_info(d, info);
217
4
}
218
219
bool_t domctl_lock_acquire(void)
220
3
{
221
3
    /*
222
3
     * Caller may try to pause its own VCPUs. We must prevent deadlock
223
3
     * against other non-domctl routines which try to do the same.
224
3
     */
225
3
    if ( !spin_trylock(&current->domain->hypercall_deadlock_mutex) )
226
0
        return 0;
227
3
228
3
    /*
229
3
     * Trylock here is paranoia if we have multiple privileged domains. Then
230
3
     * we could have one domain trying to pause another which is spinning
231
3
     * on domctl_lock -- results in deadlock.
232
3
     */
233
3
    if ( spin_trylock(&domctl_lock) )
234
3
        return 1;
235
3
236
0
    spin_unlock(&current->domain->hypercall_deadlock_mutex);
237
0
    return 0;
238
3
}
239
240
void domctl_lock_release(void)
241
3
{
242
3
    spin_unlock(&domctl_lock);
243
3
    spin_unlock(&current->domain->hypercall_deadlock_mutex);
244
3
}
245
246
static inline
247
int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff)
248
0
{
249
0
    return vcpuaff->flags == 0 ||
250
0
           ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) &&
251
0
            guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) ||
252
0
           ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) &&
253
0
            guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
254
0
}
255
256
void vnuma_destroy(struct vnuma_info *vnuma)
257
0
{
258
0
    if ( vnuma )
259
0
    {
260
0
        xfree(vnuma->vmemrange);
261
0
        xfree(vnuma->vcpu_to_vnode);
262
0
        xfree(vnuma->vdistance);
263
0
        xfree(vnuma->vnode_to_pnode);
264
0
        xfree(vnuma);
265
0
    }
266
0
}
267
268
/*
269
 * Allocates memory for vNUMA, **vnuma should be NULL.
270
 * Caller has to make sure that domain has max_pages
271
 * and number of vcpus set for domain.
272
 * Verifies that single allocation does not exceed
273
 * PAGE_SIZE.
274
 */
275
static struct vnuma_info *vnuma_alloc(unsigned int nr_vnodes,
276
                                      unsigned int nr_ranges,
277
                                      unsigned int nr_vcpus)
278
0
{
279
0
280
0
    struct vnuma_info *vnuma;
281
0
282
0
    /*
283
0
     * Check if any of the allocations are bigger than PAGE_SIZE.
284
0
     * See XSA-77.
285
0
     */
286
0
    if ( nr_vnodes * nr_vnodes > (PAGE_SIZE / sizeof(*vnuma->vdistance)) ||
287
0
         nr_ranges > (PAGE_SIZE / sizeof(*vnuma->vmemrange)) )
288
0
        return ERR_PTR(-EINVAL);
289
0
290
0
    /*
291
0
     * If allocations become larger then PAGE_SIZE, these allocations
292
0
     * should be split into PAGE_SIZE allocations due to XSA-77.
293
0
     */
294
0
    vnuma = xmalloc(struct vnuma_info);
295
0
    if ( !vnuma )
296
0
        return ERR_PTR(-ENOMEM);
297
0
298
0
    vnuma->vdistance = xmalloc_array(unsigned int, nr_vnodes * nr_vnodes);
299
0
    vnuma->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus);
300
0
    vnuma->vnode_to_pnode = xmalloc_array(nodeid_t, nr_vnodes);
301
0
    vnuma->vmemrange = xmalloc_array(xen_vmemrange_t, nr_ranges);
302
0
303
0
    if ( vnuma->vdistance == NULL || vnuma->vmemrange == NULL ||
304
0
         vnuma->vcpu_to_vnode == NULL || vnuma->vnode_to_pnode == NULL )
305
0
    {
306
0
        vnuma_destroy(vnuma);
307
0
        return ERR_PTR(-ENOMEM);
308
0
    }
309
0
310
0
    return vnuma;
311
0
}
312
313
/*
314
 * Construct vNUMA topology form uinfo.
315
 */
316
static struct vnuma_info *vnuma_init(const struct xen_domctl_vnuma *uinfo,
317
                                     const struct domain *d)
318
0
{
319
0
    unsigned int i, nr_vnodes;
320
0
    int ret = -EINVAL;
321
0
    struct vnuma_info *info;
322
0
323
0
    nr_vnodes = uinfo->nr_vnodes;
324
0
325
0
    if ( nr_vnodes == 0 || uinfo->nr_vcpus != d->max_vcpus || uinfo->pad != 0 )
326
0
        return ERR_PTR(ret);
327
0
328
0
    info = vnuma_alloc(nr_vnodes, uinfo->nr_vmemranges, d->max_vcpus);
329
0
    if ( IS_ERR(info) )
330
0
        return info;
331
0
332
0
    ret = -EFAULT;
333
0
334
0
    if ( copy_from_guest(info->vdistance, uinfo->vdistance,
335
0
                         nr_vnodes * nr_vnodes) )
336
0
        goto vnuma_fail;
337
0
338
0
    if ( copy_from_guest(info->vmemrange, uinfo->vmemrange,
339
0
                         uinfo->nr_vmemranges) )
340
0
        goto vnuma_fail;
341
0
342
0
    if ( copy_from_guest(info->vcpu_to_vnode, uinfo->vcpu_to_vnode,
343
0
                         d->max_vcpus) )
344
0
        goto vnuma_fail;
345
0
346
0
    ret = -E2BIG;
347
0
    for ( i = 0; i < d->max_vcpus; ++i )
348
0
        if ( info->vcpu_to_vnode[i] >= nr_vnodes )
349
0
            goto vnuma_fail;
350
0
351
0
    for ( i = 0; i < nr_vnodes; ++i )
352
0
    {
353
0
        unsigned int pnode;
354
0
355
0
        ret = -EFAULT;
356
0
        if ( copy_from_guest_offset(&pnode, uinfo->vnode_to_pnode, i, 1) )
357
0
            goto vnuma_fail;
358
0
        ret = -E2BIG;
359
0
        if ( pnode >= MAX_NUMNODES )
360
0
            goto vnuma_fail;
361
0
        info->vnode_to_pnode[i] = pnode;
362
0
    }
363
0
364
0
    info->nr_vnodes = nr_vnodes;
365
0
    info->nr_vmemranges = uinfo->nr_vmemranges;
366
0
367
0
    /* Check that vmemranges flags are zero. */
368
0
    ret = -EINVAL;
369
0
    for ( i = 0; i < info->nr_vmemranges; i++ )
370
0
        if ( info->vmemrange[i].flags != 0 )
371
0
            goto vnuma_fail;
372
0
373
0
    return info;
374
0
375
0
 vnuma_fail:
376
0
    vnuma_destroy(info);
377
0
    return ERR_PTR(ret);
378
0
}
379
380
long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
381
3
{
382
3
    long ret = 0;
383
3
    bool_t copyback = 0;
384
3
    struct xen_domctl curop, *op = &curop;
385
3
    struct domain *d;
386
3
387
3
    if ( copy_from_guest(op, u_domctl, 1) )
388
0
        return -EFAULT;
389
3
390
3
    if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
391
0
        return -EACCES;
392
3
393
3
    switch ( op->cmd )
394
3
    {
395
0
    case XEN_DOMCTL_test_assign_device:
396
0
        if ( op->domain == DOMID_INVALID )
397
0
        {
398
0
    case XEN_DOMCTL_createdomain:
399
0
    case XEN_DOMCTL_gdbsx_guestmemio:
400
0
            d = NULL;
401
0
            break;
402
0
        }
403
0
        /* fall through */
404
3
    default:
405
3
        d = rcu_lock_domain_by_id(op->domain);
406
3
        if ( !d && op->cmd != XEN_DOMCTL_getdomaininfo )
407
0
            return -ESRCH;
408
3
    }
409
3
410
3
    ret = xsm_domctl(XSM_OTHER, d, op->cmd);
411
3
    if ( ret )
412
0
        goto domctl_out_unlock_domonly;
413
3
414
3
    if ( !domctl_lock_acquire() )
415
0
    {
416
0
        if ( d )
417
0
            rcu_unlock_domain(d);
418
0
        return hypercall_create_continuation(
419
0
            __HYPERVISOR_domctl, "h", u_domctl);
420
0
    }
421
3
422
3
    switch ( op->cmd )
423
3
    {
424
3
425
0
    case XEN_DOMCTL_setvcpucontext:
426
0
    {
427
0
        vcpu_guest_context_u c = { .nat = NULL };
428
0
        unsigned int vcpu = op->u.vcpucontext.vcpu;
429
0
        struct vcpu *v;
430
0
431
0
        ret = -EINVAL;
432
0
        if ( (d == current->domain) || /* no domain_pause() */
433
0
             (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
434
0
            break;
435
0
436
0
        if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
437
0
        {
438
0
            ret = vcpu_reset(v);
439
0
            if ( ret == -ERESTART )
440
0
                ret = hypercall_create_continuation(
441
0
                          __HYPERVISOR_domctl, "h", u_domctl);
442
0
            break;
443
0
        }
444
0
445
0
#ifdef CONFIG_COMPAT
446
0
        BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
447
0
                     < sizeof(struct compat_vcpu_guest_context));
448
0
#endif
449
0
        ret = -ENOMEM;
450
0
        if ( (c.nat = alloc_vcpu_guest_context()) == NULL )
451
0
            break;
452
0
453
0
#ifdef CONFIG_COMPAT
454
0
        if ( !is_pv_32bit_domain(d) )
455
0
            ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
456
0
        else
457
0
            ret = copy_from_guest(c.cmp,
458
0
                                  guest_handle_cast(op->u.vcpucontext.ctxt,
459
0
                                                    void), 1);
460
0
#else
461
        ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
462
#endif
463
0
        ret = ret ? -EFAULT : 0;
464
0
465
0
        if ( ret == 0 )
466
0
        {
467
0
            domain_pause(d);
468
0
            ret = arch_set_info_guest(v, c);
469
0
            domain_unpause(d);
470
0
471
0
            if ( ret == -ERESTART )
472
0
                ret = hypercall_create_continuation(
473
0
                          __HYPERVISOR_domctl, "h", u_domctl);
474
0
        }
475
0
476
0
        free_vcpu_guest_context(c.nat);
477
0
        break;
478
0
    }
479
0
480
0
    case XEN_DOMCTL_pausedomain:
481
0
        ret = -EINVAL;
482
0
        if ( d != current->domain )
483
0
            ret = domain_pause_by_systemcontroller(d);
484
0
        break;
485
0
486
0
    case XEN_DOMCTL_unpausedomain:
487
0
        ret = domain_unpause_by_systemcontroller(d);
488
0
        break;
489
0
490
0
    case XEN_DOMCTL_resumedomain:
491
0
        if ( d == current->domain ) /* no domain_pause() */
492
0
            ret = -EINVAL;
493
0
        else
494
0
            domain_resume(d);
495
0
        break;
496
0
497
0
    case XEN_DOMCTL_createdomain:
498
0
    {
499
0
        domid_t        dom;
500
0
        static domid_t rover = 0;
501
0
        unsigned int domcr_flags;
502
0
503
0
        ret = -EINVAL;
504
0
        if ( (op->u.createdomain.flags &
505
0
             ~(XEN_DOMCTL_CDF_hvm_guest
506
0
               | XEN_DOMCTL_CDF_hap
507
0
               | XEN_DOMCTL_CDF_s3_integrity
508
0
               | XEN_DOMCTL_CDF_oos_off
509
0
               | XEN_DOMCTL_CDF_xs_domain)) )
510
0
            break;
511
0
512
0
        dom = op->domain;
513
0
        if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
514
0
        {
515
0
            ret = -EINVAL;
516
0
            if ( !is_free_domid(dom) )
517
0
                break;
518
0
        }
519
0
        else
520
0
        {
521
0
            for ( dom = rover + 1; dom != rover; dom++ )
522
0
            {
523
0
                if ( dom == DOMID_FIRST_RESERVED )
524
0
                    dom = 1;
525
0
                if ( is_free_domid(dom) )
526
0
                    break;
527
0
            }
528
0
529
0
            ret = -ENOMEM;
530
0
            if ( dom == rover )
531
0
                break;
532
0
533
0
            rover = dom;
534
0
        }
535
0
536
0
        domcr_flags = 0;
537
0
        if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest )
538
0
            domcr_flags |= DOMCRF_hvm;
539
0
        if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap )
540
0
            domcr_flags |= DOMCRF_hap;
541
0
        if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity )
542
0
            domcr_flags |= DOMCRF_s3_integrity;
543
0
        if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_oos_off )
544
0
            domcr_flags |= DOMCRF_oos_off;
545
0
        if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_xs_domain )
546
0
            domcr_flags |= DOMCRF_xs_domain;
547
0
548
0
        d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref,
549
0
                          &op->u.createdomain.config);
550
0
        if ( IS_ERR(d) )
551
0
        {
552
0
            ret = PTR_ERR(d);
553
0
            d = NULL;
554
0
            break;
555
0
        }
556
0
557
0
        ret = 0;
558
0
559
0
        memcpy(d->handle, op->u.createdomain.handle,
560
0
               sizeof(xen_domain_handle_t));
561
0
562
0
        op->domain = d->domain_id;
563
0
        copyback = 1;
564
0
        d = NULL;
565
0
        break;
566
0
    }
567
0
568
0
    case XEN_DOMCTL_max_vcpus:
569
0
    {
570
0
        unsigned int i, max = op->u.max_vcpus.max, cpu;
571
0
        cpumask_t *online;
572
0
573
0
        ret = -EINVAL;
574
0
        if ( (d == current->domain) || /* no domain_pause() */
575
0
             (max > domain_max_vcpus(d)) )
576
0
            break;
577
0
578
0
        /* Until Xenoprof can dynamically grow its vcpu-s array... */
579
0
        if ( d->xenoprof )
580
0
        {
581
0
            ret = -EAGAIN;
582
0
            break;
583
0
        }
584
0
585
0
        /* Needed, for example, to ensure writable p.t. state is synced. */
586
0
        domain_pause(d);
587
0
588
0
        /*
589
0
         * Certain operations (e.g. CPU microcode updates) modify data which is
590
0
         * used during VCPU allocation/initialization
591
0
         */
592
0
        while ( !spin_trylock(&vcpu_alloc_lock) )
593
0
        {
594
0
            if ( hypercall_preempt_check() )
595
0
            {
596
0
                ret =  hypercall_create_continuation(
597
0
                    __HYPERVISOR_domctl, "h", u_domctl);
598
0
                goto maxvcpu_out_novcpulock;
599
0
            }
600
0
        }
601
0
602
0
        /* We cannot reduce maximum VCPUs. */
603
0
        ret = -EINVAL;
604
0
        if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
605
0
            goto maxvcpu_out;
606
0
607
0
        /*
608
0
         * For now don't allow increasing the vcpu count from a non-zero
609
0
         * value: This code and all readers of d->vcpu would otherwise need
610
0
         * to be converted to use RCU, but at present there's no tools side
611
0
         * code path that would issue such a request.
612
0
         */
613
0
        ret = -EBUSY;
614
0
        if ( (d->max_vcpus > 0) && (max > d->max_vcpus) )
615
0
            goto maxvcpu_out;
616
0
617
0
        ret = -ENOMEM;
618
0
        online = cpupool_domain_cpumask(d);
619
0
        if ( max > d->max_vcpus )
620
0
        {
621
0
            struct vcpu **vcpus;
622
0
623
0
            BUG_ON(d->vcpu != NULL);
624
0
            BUG_ON(d->max_vcpus != 0);
625
0
626
0
            if ( (vcpus = xzalloc_array(struct vcpu *, max)) == NULL )
627
0
                goto maxvcpu_out;
628
0
629
0
            /* Install vcpu array /then/ update max_vcpus. */
630
0
            d->vcpu = vcpus;
631
0
            smp_wmb();
632
0
            d->max_vcpus = max;
633
0
        }
634
0
635
0
        for ( i = 0; i < max; i++ )
636
0
        {
637
0
            if ( d->vcpu[i] != NULL )
638
0
                continue;
639
0
640
0
            cpu = (i == 0) ?
641
0
                cpumask_any(online) :
642
0
                cpumask_cycle(d->vcpu[i-1]->processor, online);
643
0
644
0
            if ( alloc_vcpu(d, i, cpu) == NULL )
645
0
                goto maxvcpu_out;
646
0
        }
647
0
648
0
        ret = 0;
649
0
650
0
    maxvcpu_out:
651
0
        spin_unlock(&vcpu_alloc_lock);
652
0
653
0
    maxvcpu_out_novcpulock:
654
0
        domain_unpause(d);
655
0
        break;
656
0
    }
657
0
658
0
    case XEN_DOMCTL_soft_reset:
659
0
        if ( d == current->domain ) /* no domain_pause() */
660
0
        {
661
0
            ret = -EINVAL;
662
0
            break;
663
0
        }
664
0
        ret = domain_soft_reset(d);
665
0
        break;
666
0
667
0
    case XEN_DOMCTL_destroydomain:
668
0
        ret = domain_kill(d);
669
0
        if ( ret == -ERESTART )
670
0
            ret = hypercall_create_continuation(
671
0
                __HYPERVISOR_domctl, "h", u_domctl);
672
0
        break;
673
0
674
0
    case XEN_DOMCTL_setnodeaffinity:
675
0
    {
676
0
        nodemask_t new_affinity;
677
0
678
0
        ret = xenctl_bitmap_to_nodemask(&new_affinity,
679
0
                                        &op->u.nodeaffinity.nodemap);
680
0
        if ( !ret )
681
0
            ret = domain_set_node_affinity(d, &new_affinity);
682
0
        break;
683
0
    }
684
0
685
0
    case XEN_DOMCTL_getnodeaffinity:
686
0
        ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap,
687
0
                                        &d->node_affinity);
688
0
        break;
689
0
690
0
    case XEN_DOMCTL_setvcpuaffinity:
691
0
    case XEN_DOMCTL_getvcpuaffinity:
692
0
    {
693
0
        struct vcpu *v;
694
0
        struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity;
695
0
696
0
        ret = -EINVAL;
697
0
        if ( vcpuaff->vcpu >= d->max_vcpus )
698
0
            break;
699
0
700
0
        ret = -ESRCH;
701
0
        if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
702
0
            break;
703
0
704
0
        ret = -EINVAL;
705
0
        if ( vcpuaffinity_params_invalid(vcpuaff) )
706
0
            break;
707
0
708
0
        if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
709
0
        {
710
0
            cpumask_var_t new_affinity, old_affinity;
711
0
            cpumask_t *online = cpupool_domain_cpumask(v->domain);
712
0
713
0
            /*
714
0
             * We want to be able to restore hard affinity if we are trying
715
0
             * setting both and changing soft affinity (which happens later,
716
0
             * when hard affinity has been succesfully chaged already) fails.
717
0
             */
718
0
            if ( !alloc_cpumask_var(&old_affinity) )
719
0
            {
720
0
                ret = -ENOMEM;
721
0
                break;
722
0
            }
723
0
            cpumask_copy(old_affinity, v->cpu_hard_affinity);
724
0
725
0
            if ( !alloc_cpumask_var(&new_affinity) )
726
0
            {
727
0
                free_cpumask_var(old_affinity);
728
0
                ret = -ENOMEM;
729
0
                break;
730
0
            }
731
0
732
0
            /* Undo a stuck SCHED_pin_override? */
733
0
            if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
734
0
                vcpu_pin_override(v, -1);
735
0
736
0
            ret = 0;
737
0
738
0
            /*
739
0
             * We both set a new affinity and report back to the caller what
740
0
             * the scheduler will be effectively using.
741
0
             */
742
0
            if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
743
0
            {
744
0
                ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
745
0
                                              &vcpuaff->cpumap_hard,
746
0
                                              nr_cpu_ids);
747
0
                if ( !ret )
748
0
                    ret = vcpu_set_hard_affinity(v, new_affinity);
749
0
                if ( ret )
750
0
                    goto setvcpuaffinity_out;
751
0
752
0
                /*
753
0
                 * For hard affinity, what we return is the intersection of
754
0
                 * cpupool's online mask and the new hard affinity.
755
0
                 */
756
0
                cpumask_and(new_affinity, online, v->cpu_hard_affinity);
757
0
                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
758
0
                                               new_affinity);
759
0
            }
760
0
            if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
761
0
            {
762
0
                ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
763
0
                                              &vcpuaff->cpumap_soft,
764
0
                                              nr_cpu_ids);
765
0
                if ( !ret)
766
0
                    ret = vcpu_set_soft_affinity(v, new_affinity);
767
0
                if ( ret )
768
0
                {
769
0
                    /*
770
0
                     * Since we're returning error, the caller expects nothing
771
0
                     * happened, so we rollback the changes to hard affinity
772
0
                     * (if any).
773
0
                     */
774
0
                    if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
775
0
                        vcpu_set_hard_affinity(v, old_affinity);
776
0
                    goto setvcpuaffinity_out;
777
0
                }
778
0
779
0
                /*
780
0
                 * For soft affinity, we return the intersection between the
781
0
                 * new soft affinity, the cpupool's online map and the (new)
782
0
                 * hard affinity.
783
0
                 */
784
0
                cpumask_and(new_affinity, new_affinity, online);
785
0
                cpumask_and(new_affinity, new_affinity, v->cpu_hard_affinity);
786
0
                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
787
0
                                               new_affinity);
788
0
            }
789
0
790
0
 setvcpuaffinity_out:
791
0
            free_cpumask_var(new_affinity);
792
0
            free_cpumask_var(old_affinity);
793
0
        }
794
0
        else
795
0
        {
796
0
            if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
797
0
                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
798
0
                                               v->cpu_hard_affinity);
799
0
            if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
800
0
                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
801
0
                                               v->cpu_soft_affinity);
802
0
        }
803
0
        break;
804
0
    }
805
0
806
0
    case XEN_DOMCTL_scheduler_op:
807
0
        ret = sched_adjust(d, &op->u.scheduler_op);
808
0
        copyback = 1;
809
0
        break;
810
0
811
3
    case XEN_DOMCTL_getdomaininfo:
812
3
    {
813
3
        domid_t dom = DOMID_INVALID;
814
3
815
3
        if ( !d )
816
3
        {
817
3
            ret = -EINVAL;
818
3
            if ( op->domain >= DOMID_FIRST_RESERVED )
819
0
                break;
820
3
821
3
            rcu_read_lock(&domlist_read_lock);
822
3
823
3
            dom = op->domain;
824
3
            for_each_domain ( d )
825
3
                if ( d->domain_id >= dom )
826
0
                    break;
827
3
        }
828
3
829
3
        ret = -ESRCH;
830
3
        if ( d == NULL )
831
3
            goto getdomaininfo_out;
832
3
833
0
        ret = xsm_getdomaininfo(XSM_HOOK, d);
834
0
        if ( ret )
835
0
            goto getdomaininfo_out;
836
0
837
0
        getdomaininfo(d, &op->u.getdomaininfo);
838
0
839
0
        op->domain = op->u.getdomaininfo.domain;
840
0
        copyback = 1;
841
0
842
3
    getdomaininfo_out:
843
3
        /* When d was non-NULL upon entry, no cleanup is needed. */
844
3
        if ( dom == DOMID_INVALID )
845
0
            break;
846
3
847
3
        rcu_read_unlock(&domlist_read_lock);
848
3
        d = NULL;
849
3
        break;
850
3
    }
851
3
852
0
    case XEN_DOMCTL_getvcpucontext:
853
0
    {
854
0
        vcpu_guest_context_u c = { .nat = NULL };
855
0
        struct vcpu         *v;
856
0
857
0
        ret = -EINVAL;
858
0
        if ( op->u.vcpucontext.vcpu >= d->max_vcpus ||
859
0
             (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL ||
860
0
             v == current ) /* no vcpu_pause() */
861
0
            goto getvcpucontext_out;
862
0
863
0
        ret = -ENODATA;
864
0
        if ( !v->is_initialised )
865
0
            goto getvcpucontext_out;
866
0
867
0
#ifdef CONFIG_COMPAT
868
0
        BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
869
0
                     < sizeof(struct compat_vcpu_guest_context));
870
0
#endif
871
0
        ret = -ENOMEM;
872
0
        if ( (c.nat = xzalloc(struct vcpu_guest_context)) == NULL )
873
0
            goto getvcpucontext_out;
874
0
875
0
        vcpu_pause(v);
876
0
877
0
        arch_get_info_guest(v, c);
878
0
        ret = 0;
879
0
880
0
        vcpu_unpause(v);
881
0
882
0
#ifdef CONFIG_COMPAT
883
0
        if ( !is_pv_32bit_domain(d) )
884
0
            ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
885
0
        else
886
0
            ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
887
0
                                                  void), c.cmp, 1);
888
0
#else
889
        ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
890
#endif
891
0
892
0
        if ( ret )
893
0
            ret = -EFAULT;
894
0
        copyback = 1;
895
0
896
0
    getvcpucontext_out:
897
0
        xfree(c.nat);
898
0
        break;
899
0
    }
900
0
901
0
    case XEN_DOMCTL_getvcpuinfo:
902
0
    {
903
0
        struct vcpu   *v;
904
0
        struct vcpu_runstate_info runstate;
905
0
906
0
        ret = -EINVAL;
907
0
        if ( op->u.getvcpuinfo.vcpu >= d->max_vcpus )
908
0
            break;
909
0
910
0
        ret = -ESRCH;
911
0
        if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
912
0
            break;
913
0
914
0
        vcpu_runstate_get(v, &runstate);
915
0
916
0
        op->u.getvcpuinfo.online   = !(v->pause_flags & VPF_down);
917
0
        op->u.getvcpuinfo.blocked  = !!(v->pause_flags & VPF_blocked);
918
0
        op->u.getvcpuinfo.running  = v->is_running;
919
0
        op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
920
0
        op->u.getvcpuinfo.cpu      = v->processor;
921
0
        ret = 0;
922
0
        copyback = 1;
923
0
        break;
924
0
    }
925
0
926
0
    case XEN_DOMCTL_max_mem:
927
0
    {
928
0
        uint64_t new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT - 10);
929
0
930
0
        spin_lock(&d->page_alloc_lock);
931
0
        /*
932
0
         * NB. We removed a check that new_max >= current tot_pages; this means
933
0
         * that the domain will now be allowed to "ratchet" down to new_max. In
934
0
         * the meantime, while tot > max, all new allocations are disallowed.
935
0
         */
936
0
        d->max_pages = min(new_max, (uint64_t)(typeof(d->max_pages))-1);
937
0
        spin_unlock(&d->page_alloc_lock);
938
0
        break;
939
0
    }
940
0
941
0
    case XEN_DOMCTL_setdomainhandle:
942
0
        memcpy(d->handle, op->u.setdomainhandle.handle,
943
0
               sizeof(xen_domain_handle_t));
944
0
        break;
945
0
946
0
    case XEN_DOMCTL_setdebugging:
947
0
        if ( unlikely(d == current->domain) ) /* no domain_pause() */
948
0
            ret = -EINVAL;
949
0
        else
950
0
        {
951
0
            domain_pause(d);
952
0
            d->debugger_attached = !!op->u.setdebugging.enable;
953
0
            domain_unpause(d); /* causes guest to latch new status */
954
0
        }
955
0
        break;
956
0
957
0
    case XEN_DOMCTL_irq_permission:
958
0
    {
959
0
        unsigned int pirq = op->u.irq_permission.pirq, irq;
960
0
        int allow = op->u.irq_permission.allow_access;
961
0
962
0
        if ( pirq >= current->domain->nr_pirqs )
963
0
        {
964
0
            ret = -EINVAL;
965
0
            break;
966
0
        }
967
0
        irq = pirq_access_permitted(current->domain, pirq);
968
0
        if ( !irq || xsm_irq_permission(XSM_HOOK, d, irq, allow) )
969
0
            ret = -EPERM;
970
0
        else if ( allow )
971
0
            ret = irq_permit_access(d, irq);
972
0
        else
973
0
            ret = irq_deny_access(d, irq);
974
0
        break;
975
0
    }
976
0
977
0
    case XEN_DOMCTL_iomem_permission:
978
0
    {
979
0
        unsigned long mfn = op->u.iomem_permission.first_mfn;
980
0
        unsigned long nr_mfns = op->u.iomem_permission.nr_mfns;
981
0
        int allow = op->u.iomem_permission.allow_access;
982
0
983
0
        ret = -EINVAL;
984
0
        if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
985
0
            break;
986
0
987
0
        if ( !iomem_access_permitted(current->domain,
988
0
                                     mfn, mfn + nr_mfns - 1) ||
989
0
             xsm_iomem_permission(XSM_HOOK, d, mfn, mfn + nr_mfns - 1, allow) )
990
0
            ret = -EPERM;
991
0
        else if ( allow )
992
0
            ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
993
0
        else
994
0
            ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
995
0
        if ( !ret )
996
0
            memory_type_changed(d);
997
0
        break;
998
0
    }
999
0
1000
0
    case XEN_DOMCTL_memory_mapping:
1001
0
    {
1002
0
        unsigned long gfn = op->u.memory_mapping.first_gfn;
1003
0
        unsigned long mfn = op->u.memory_mapping.first_mfn;
1004
0
        unsigned long nr_mfns = op->u.memory_mapping.nr_mfns;
1005
0
        unsigned long mfn_end = mfn + nr_mfns - 1;
1006
0
        int add = op->u.memory_mapping.add_mapping;
1007
0
1008
0
        ret = -EINVAL;
1009
0
        if ( mfn_end < mfn || /* wrap? */
1010
0
             ((mfn | mfn_end) >> (paddr_bits - PAGE_SHIFT)) ||
1011
0
             (gfn + nr_mfns - 1) < gfn ) /* wrap? */
1012
0
            break;
1013
0
1014
0
#ifndef CONFIG_X86 /* XXX ARM!? */
1015
        ret = -E2BIG;
1016
        /* Must break hypercall up as this could take a while. */
1017
        if ( nr_mfns > 64 )
1018
            break;
1019
#endif
1020
0
1021
0
        ret = -EPERM;
1022
0
        if ( !iomem_access_permitted(current->domain, mfn, mfn_end) ||
1023
0
             !iomem_access_permitted(d, mfn, mfn_end) )
1024
0
            break;
1025
0
1026
0
        ret = xsm_iomem_mapping(XSM_HOOK, d, mfn, mfn_end, add);
1027
0
        if ( ret )
1028
0
            break;
1029
0
1030
0
        if ( add )
1031
0
        {
1032
0
            printk(XENLOG_G_DEBUG
1033
0
                   "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n",
1034
0
                   d->domain_id, gfn, mfn, nr_mfns);
1035
0
1036
0
            ret = map_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn));
1037
0
            if ( ret < 0 )
1038
0
                printk(XENLOG_G_WARNING
1039
0
                       "memory_map:fail: dom%d gfn=%lx mfn=%lx nr=%lx ret:%ld\n",
1040
0
                       d->domain_id, gfn, mfn, nr_mfns, ret);
1041
0
        }
1042
0
        else
1043
0
        {
1044
0
            printk(XENLOG_G_DEBUG
1045
0
                   "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n",
1046
0
                   d->domain_id, gfn, mfn, nr_mfns);
1047
0
1048
0
            ret = unmap_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn));
1049
0
            if ( ret < 0 && is_hardware_domain(current->domain) )
1050
0
                printk(XENLOG_ERR
1051
0
                       "memory_map: error %ld removing dom%d access to [%lx,%lx]\n",
1052
0
                       ret, d->domain_id, mfn, mfn_end);
1053
0
        }
1054
0
        /* Do this unconditionally to cover errors on above failure paths. */
1055
0
        memory_type_changed(d);
1056
0
        break;
1057
0
    }
1058
0
1059
0
    case XEN_DOMCTL_settimeoffset:
1060
0
        domain_set_time_offset(d, op->u.settimeoffset.time_offset_seconds);
1061
0
        break;
1062
0
1063
0
    case XEN_DOMCTL_set_target:
1064
0
    {
1065
0
        struct domain *e;
1066
0
1067
0
        ret = -ESRCH;
1068
0
        e = get_domain_by_id(op->u.set_target.target);
1069
0
        if ( e == NULL )
1070
0
            break;
1071
0
1072
0
        ret = -EINVAL;
1073
0
        if ( (d == e) || (d->target != NULL) )
1074
0
        {
1075
0
            put_domain(e);
1076
0
            break;
1077
0
        }
1078
0
1079
0
        ret = -EOPNOTSUPP;
1080
0
        if ( is_hvm_domain(e) )
1081
0
            ret = xsm_set_target(XSM_HOOK, d, e);
1082
0
        if ( ret )
1083
0
        {
1084
0
            put_domain(e);
1085
0
            break;
1086
0
        }
1087
0
1088
0
        /* Hold reference on @e until we destroy @d. */
1089
0
        d->target = e;
1090
0
        break;
1091
0
    }
1092
0
1093
0
    case XEN_DOMCTL_subscribe:
1094
0
        d->suspend_evtchn = op->u.subscribe.port;
1095
0
        break;
1096
0
1097
0
    case XEN_DOMCTL_vm_event_op:
1098
0
        ret = vm_event_domctl(d, &op->u.vm_event_op,
1099
0
                              guest_handle_cast(u_domctl, void));
1100
0
        copyback = 1;
1101
0
        break;
1102
0
1103
0
#ifdef CONFIG_HAS_MEM_ACCESS
1104
0
    case XEN_DOMCTL_set_access_required:
1105
0
        if ( unlikely(current->domain == d) ) /* no domain_pause() */
1106
0
            ret = -EPERM;
1107
0
        else
1108
0
        {
1109
0
            domain_pause(d);
1110
0
            p2m_get_hostp2m(d)->access_required =
1111
0
                op->u.access_required.access_required;
1112
0
            domain_unpause(d);
1113
0
        }
1114
0
        break;
1115
0
#endif
1116
0
1117
0
    case XEN_DOMCTL_set_virq_handler:
1118
0
        ret = set_global_virq_handler(d, op->u.set_virq_handler.virq);
1119
0
        break;
1120
0
1121
0
    case XEN_DOMCTL_set_max_evtchn:
1122
0
        d->max_evtchn_port = min_t(unsigned int,
1123
0
                                   op->u.set_max_evtchn.max_port,
1124
0
                                   INT_MAX);
1125
0
        break;
1126
0
1127
0
    case XEN_DOMCTL_setvnumainfo:
1128
0
    {
1129
0
        struct vnuma_info *vnuma;
1130
0
1131
0
        vnuma = vnuma_init(&op->u.vnuma, d);
1132
0
        if ( IS_ERR(vnuma) )
1133
0
        {
1134
0
            ret = PTR_ERR(vnuma);
1135
0
            break;
1136
0
        }
1137
0
1138
0
        /* overwrite vnuma topology for domain. */
1139
0
        write_lock(&d->vnuma_rwlock);
1140
0
        vnuma_destroy(d->vnuma);
1141
0
        d->vnuma = vnuma;
1142
0
        write_unlock(&d->vnuma_rwlock);
1143
0
1144
0
        break;
1145
0
    }
1146
0
1147
0
    case XEN_DOMCTL_monitor_op:
1148
0
        ret = monitor_domctl(d, &op->u.monitor_op);
1149
0
        if ( !ret )
1150
0
            copyback = 1;
1151
0
        break;
1152
0
1153
0
    case XEN_DOMCTL_set_gnttab_limits:
1154
0
        ret = grant_table_set_limits(d, op->u.set_gnttab_limits.grant_frames,
1155
0
                                     op->u.set_gnttab_limits.maptrack_frames);
1156
0
        break;
1157
0
1158
0
    default:
1159
0
        ret = arch_do_domctl(op, d, u_domctl);
1160
0
        break;
1161
3
    }
1162
3
1163
3
    domctl_lock_release();
1164
3
1165
3
 domctl_out_unlock_domonly:
1166
3
    if ( d )
1167
0
        rcu_unlock_domain(d);
1168
3
1169
3
    if ( copyback && __copy_to_guest(u_domctl, op, 1) )
1170
0
        ret = -EFAULT;
1171
3
1172
3
    return ret;
1173
3
}
1174
1175
/*
1176
 * Local variables:
1177
 * mode: C
1178
 * c-file-style: "BSD"
1179
 * c-basic-offset: 4
1180
 * tab-width: 4
1181
 * indent-tabs-mode: nil
1182
 * End:
1183
 */