Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/common/sched_null.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * xen/common/sched_null.c
3
 *
4
 *  Copyright (c) 2017, Dario Faggioli, Citrix Ltd
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public
8
 * License v2 as published by the Free Software Foundation.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public
16
 * License along with this program; If not, see <http://www.gnu.org/licenses/>.
17
 */
18
19
/*
20
 * The 'null' scheduler always choose to run, on each pCPU, either nothing
21
 * (i.e., the pCPU stays idle) or always the same vCPU.
22
 *
23
 * It is aimed at supporting static scenarios, where there always are
24
 * less vCPUs than pCPUs (and the vCPUs don't need to move among pCPUs
25
 * for any reason) with the least possible overhead.
26
 *
27
 * Typical usecase are embedded applications, but also HPC, especially
28
 * if the scheduler is used inside a cpupool.
29
 */
30
31
#include <xen/sched.h>
32
#include <xen/sched-if.h>
33
#include <xen/softirq.h>
34
#include <xen/keyhandler.h>
35
#include <xen/trace.h>
36
37
/*
38
 * null tracing events. Check include/public/trace.h for more details.
39
 */
40
0
#define TRC_SNULL_PICKED_CPU    TRC_SCHED_CLASS_EVT(SNULL, 1)
41
0
#define TRC_SNULL_VCPU_ASSIGN   TRC_SCHED_CLASS_EVT(SNULL, 2)
42
0
#define TRC_SNULL_VCPU_DEASSIGN TRC_SCHED_CLASS_EVT(SNULL, 3)
43
0
#define TRC_SNULL_MIGRATE       TRC_SCHED_CLASS_EVT(SNULL, 4)
44
0
#define TRC_SNULL_SCHEDULE      TRC_SCHED_CLASS_EVT(SNULL, 5)
45
0
#define TRC_SNULL_TASKLET       TRC_SCHED_CLASS_EVT(SNULL, 6)
46
47
/*
48
 * Locking:
49
 * - Scheduler-lock (a.k.a. runqueue lock):
50
 *  + is per-pCPU;
51
 *  + serializes assignment and deassignment of vCPUs to a pCPU.
52
 * - Private data lock (a.k.a. private scheduler lock):
53
 *  + is scheduler-wide;
54
 *  + serializes accesses to the list of domains in this scheduler.
55
 * - Waitqueue lock:
56
 *  + is scheduler-wide;
57
 *  + serialize accesses to the list of vCPUs waiting to be assigned
58
 *    to pCPUs.
59
 *
60
 * Ordering is: private lock, runqueue lock, waitqueue lock. Or, OTOH,
61
 * waitqueue lock nests inside runqueue lock which nests inside private
62
 * lock. More specifically:
63
 *  + if we need both runqueue and private locks, we must acquire the
64
 *    private lock for first;
65
 *  + if we need both runqueue and waitqueue locks, we must acquire
66
 *    the runqueue lock for first;
67
 *  + if we need both private and waitqueue locks, we must acquire
68
 *    the private lock for first;
69
 *  + if we already own a runqueue lock, we must never acquire
70
 *    the private lock;
71
 *  + if we already own the waitqueue lock, we must never acquire
72
 *    the runqueue lock or the private lock.
73
 */
74
75
/*
76
 * System-wide private data
77
 */
78
struct null_private {
79
    spinlock_t lock;        /* scheduler lock; nests inside cpupool_lock */
80
    struct list_head ndom;  /* Domains of this scheduler                 */
81
    struct list_head waitq; /* vCPUs not assigned to any pCPU            */
82
    spinlock_t waitq_lock;  /* serializes waitq; nests inside runq locks */
83
    cpumask_t cpus_free;    /* CPUs without a vCPU associated to them    */
84
};
85
86
/*
87
 * Physical CPU
88
 */
89
struct null_pcpu {
90
    struct vcpu *vcpu;
91
};
92
DEFINE_PER_CPU(struct null_pcpu, npc);
93
94
/*
95
 * Virtual CPU
96
 */
97
struct null_vcpu {
98
    struct list_head waitq_elem;
99
    struct vcpu *vcpu;
100
};
101
102
/*
103
 * Domain
104
 */
105
struct null_dom {
106
    struct list_head ndom_elem;
107
    struct domain *dom;
108
};
109
110
/*
111
 * Accessor helpers functions
112
 */
113
static inline struct null_private *null_priv(const struct scheduler *ops)
114
0
{
115
0
    return ops->sched_data;
116
0
}
117
118
static inline struct null_vcpu *null_vcpu(const struct vcpu *v)
119
0
{
120
0
    return v->sched_priv;
121
0
}
122
123
static inline struct null_dom *null_dom(const struct domain *d)
124
0
{
125
0
    return d->sched_priv;
126
0
}
127
128
static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu,
129
                                       unsigned int balance_step)
130
0
{
131
0
    affinity_balance_cpumask(v, balance_step, cpumask_scratch_cpu(cpu));
132
0
    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
133
0
                cpupool_domain_cpumask(v->domain));
134
0
135
0
    return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
136
0
}
137
138
static int null_init(struct scheduler *ops)
139
0
{
140
0
    struct null_private *prv;
141
0
142
0
    printk("Initializing null scheduler\n"
143
0
           "WARNING: This is experimental software in development.\n"
144
0
           "Use at your own risk.\n");
145
0
146
0
    prv = xzalloc(struct null_private);
147
0
    if ( prv == NULL )
148
0
        return -ENOMEM;
149
0
150
0
    spin_lock_init(&prv->lock);
151
0
    spin_lock_init(&prv->waitq_lock);
152
0
    INIT_LIST_HEAD(&prv->ndom);
153
0
    INIT_LIST_HEAD(&prv->waitq);
154
0
155
0
    ops->sched_data = prv;
156
0
157
0
    return 0;
158
0
}
159
160
static void null_deinit(struct scheduler *ops)
161
0
{
162
0
    xfree(ops->sched_data);
163
0
    ops->sched_data = NULL;
164
0
}
165
166
static void init_pdata(struct null_private *prv, unsigned int cpu)
167
0
{
168
0
    /* Mark the pCPU as free, and with no vCPU assigned */
169
0
    cpumask_set_cpu(cpu, &prv->cpus_free);
170
0
    per_cpu(npc, cpu).vcpu = NULL;
171
0
}
172
173
static void null_init_pdata(const struct scheduler *ops, void *pdata, int cpu)
174
0
{
175
0
    struct null_private *prv = null_priv(ops);
176
0
    struct schedule_data *sd = &per_cpu(schedule_data, cpu);
177
0
178
0
    /* alloc_pdata is not implemented, so we want this to be NULL. */
179
0
    ASSERT(!pdata);
180
0
181
0
    /*
182
0
     * The scheduler lock points already to the default per-cpu spinlock,
183
0
     * so there is no remapping to be done.
184
0
     */
185
0
    ASSERT(sd->schedule_lock == &sd->_lock && !spin_is_locked(&sd->_lock));
186
0
187
0
    init_pdata(prv, cpu);
188
0
}
189
190
static void null_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
191
0
{
192
0
    struct null_private *prv = null_priv(ops);
193
0
194
0
    /* alloc_pdata not implemented, so this must have stayed NULL */
195
0
    ASSERT(!pcpu);
196
0
197
0
    cpumask_clear_cpu(cpu, &prv->cpus_free);
198
0
    per_cpu(npc, cpu).vcpu = NULL;
199
0
}
200
201
static void *null_alloc_vdata(const struct scheduler *ops,
202
                              struct vcpu *v, void *dd)
203
0
{
204
0
    struct null_vcpu *nvc;
205
0
206
0
    nvc = xzalloc(struct null_vcpu);
207
0
    if ( nvc == NULL )
208
0
        return NULL;
209
0
210
0
    INIT_LIST_HEAD(&nvc->waitq_elem);
211
0
    nvc->vcpu = v;
212
0
213
0
    SCHED_STAT_CRANK(vcpu_alloc);
214
0
215
0
    return nvc;
216
0
}
217
218
static void null_free_vdata(const struct scheduler *ops, void *priv)
219
0
{
220
0
    struct null_vcpu *nvc = priv;
221
0
222
0
    xfree(nvc);
223
0
}
224
225
static void * null_alloc_domdata(const struct scheduler *ops,
226
                                 struct domain *d)
227
0
{
228
0
    struct null_private *prv = null_priv(ops);
229
0
    struct null_dom *ndom;
230
0
    unsigned long flags;
231
0
232
0
    ndom = xzalloc(struct null_dom);
233
0
    if ( ndom == NULL )
234
0
        return NULL;
235
0
236
0
    ndom->dom = d;
237
0
238
0
    spin_lock_irqsave(&prv->lock, flags);
239
0
    list_add_tail(&ndom->ndom_elem, &null_priv(ops)->ndom);
240
0
    spin_unlock_irqrestore(&prv->lock, flags);
241
0
242
0
    return (void*)ndom;
243
0
}
244
245
static void null_free_domdata(const struct scheduler *ops, void *data)
246
0
{
247
0
    unsigned long flags;
248
0
    struct null_dom *ndom = data;
249
0
    struct null_private *prv = null_priv(ops);
250
0
251
0
    spin_lock_irqsave(&prv->lock, flags);
252
0
    list_del_init(&ndom->ndom_elem);
253
0
    spin_unlock_irqrestore(&prv->lock, flags);
254
0
255
0
    xfree(data);
256
0
}
257
258
static int null_dom_init(const struct scheduler *ops, struct domain *d)
259
0
{
260
0
    struct null_dom *ndom;
261
0
262
0
    if ( is_idle_domain(d) )
263
0
        return 0;
264
0
265
0
    ndom = null_alloc_domdata(ops, d);
266
0
    if ( ndom == NULL )
267
0
        return -ENOMEM;
268
0
269
0
    d->sched_priv = ndom;
270
0
271
0
    return 0;
272
0
}
273
static void null_dom_destroy(const struct scheduler *ops, struct domain *d)
274
0
{
275
0
    null_free_domdata(ops, null_dom(d));
276
0
}
277
278
/*
279
 * vCPU to pCPU assignment and placement. This _only_ happens:
280
 *  - on insert,
281
 *  - on migrate.
282
 *
283
 * Insert occurs when a vCPU joins this scheduler for the first time
284
 * (e.g., when the domain it's part of is moved to the scheduler's
285
 * cpupool).
286
 *
287
 * Migration may be necessary if a pCPU (with a vCPU assigned to it)
288
 * is removed from the scheduler's cpupool.
289
 *
290
 * So this is not part of any hot path.
291
 */
292
static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
293
0
{
294
0
    unsigned int bs;
295
0
    unsigned int cpu = v->processor, new_cpu;
296
0
    cpumask_t *cpus = cpupool_domain_cpumask(v->domain);
297
0
298
0
    ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
299
0
300
0
    for_each_affinity_balance_step( bs )
301
0
    {
302
0
        if ( bs == BALANCE_SOFT_AFFINITY &&
303
0
             !has_soft_affinity(v, v->cpu_hard_affinity) )
304
0
            continue;
305
0
306
0
        affinity_balance_cpumask(v, bs, cpumask_scratch_cpu(cpu));
307
0
        cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), cpus);
308
0
309
0
        /*
310
0
         * If our processor is free, or we are assigned to it, and it is also
311
0
         * still valid and part of our affinity, just go for it.
312
0
         * (Note that we may call vcpu_check_affinity(), but we deliberately
313
0
         * don't, so we get to keep in the scratch cpumask what we have just
314
0
         * put in it.)
315
0
         */
316
0
        if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
317
0
                    && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
318
0
        {
319
0
            new_cpu = cpu;
320
0
            goto out;
321
0
        }
322
0
323
0
        /* If not, just go for a free pCPU, within our affinity, if any */
324
0
        cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
325
0
                    &prv->cpus_free);
326
0
        new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
327
0
328
0
        if ( likely(new_cpu != nr_cpu_ids) )
329
0
            goto out;
330
0
    }
331
0
332
0
    /*
333
0
     * If we didn't find any free pCPU, just pick any valid pcpu, even if
334
0
     * it has another vCPU assigned. This will happen during shutdown and
335
0
     * suspend/resume, but it may also happen during "normal operation", if
336
0
     * all the pCPUs are busy.
337
0
     *
338
0
     * In fact, there must always be something sane in v->processor, or
339
0
     * vcpu_schedule_lock() and friends won't work. This is not a problem,
340
0
     * as we will actually assign the vCPU to the pCPU we return from here,
341
0
     * only if the pCPU is free.
342
0
     */
343
0
    cpumask_and(cpumask_scratch_cpu(cpu), cpus, v->cpu_hard_affinity);
344
0
    new_cpu = cpumask_any(cpumask_scratch_cpu(cpu));
345
0
346
0
 out:
347
0
    if ( unlikely(tb_init_done) )
348
0
    {
349
0
        struct {
350
0
            uint16_t vcpu, dom;
351
0
            uint32_t new_cpu;
352
0
        } d;
353
0
        d.dom = v->domain->domain_id;
354
0
        d.vcpu = v->vcpu_id;
355
0
        d.new_cpu = new_cpu;
356
0
        __trace_var(TRC_SNULL_PICKED_CPU, 1, sizeof(d), &d);
357
0
    }
358
0
359
0
    return new_cpu;
360
0
}
361
362
static void vcpu_assign(struct null_private *prv, struct vcpu *v,
363
                        unsigned int cpu)
364
0
{
365
0
    per_cpu(npc, cpu).vcpu = v;
366
0
    v->processor = cpu;
367
0
    cpumask_clear_cpu(cpu, &prv->cpus_free);
368
0
369
0
    dprintk(XENLOG_G_INFO, "%d <-- d%dv%d\n", cpu, v->domain->domain_id, v->vcpu_id);
370
0
371
0
    if ( unlikely(tb_init_done) )
372
0
    {
373
0
        struct {
374
0
            uint16_t vcpu, dom;
375
0
            uint32_t cpu;
376
0
        } d;
377
0
        d.dom = v->domain->domain_id;
378
0
        d.vcpu = v->vcpu_id;
379
0
        d.cpu = cpu;
380
0
        __trace_var(TRC_SNULL_VCPU_ASSIGN, 1, sizeof(d), &d);
381
0
    }
382
0
}
383
384
static void vcpu_deassign(struct null_private *prv, struct vcpu *v,
385
                          unsigned int cpu)
386
0
{
387
0
    per_cpu(npc, cpu).vcpu = NULL;
388
0
    cpumask_set_cpu(cpu, &prv->cpus_free);
389
0
390
0
    dprintk(XENLOG_G_INFO, "%d <-- NULL (d%dv%d)\n", cpu, v->domain->domain_id, v->vcpu_id);
391
0
392
0
    if ( unlikely(tb_init_done) )
393
0
    {
394
0
        struct {
395
0
            uint16_t vcpu, dom;
396
0
            uint32_t cpu;
397
0
        } d;
398
0
        d.dom = v->domain->domain_id;
399
0
        d.vcpu = v->vcpu_id;
400
0
        d.cpu = cpu;
401
0
        __trace_var(TRC_SNULL_VCPU_DEASSIGN, 1, sizeof(d), &d);
402
0
    }
403
0
}
404
405
/* Change the scheduler of cpu to us (null). */
406
static void null_switch_sched(struct scheduler *new_ops, unsigned int cpu,
407
                              void *pdata, void *vdata)
408
0
{
409
0
    struct schedule_data *sd = &per_cpu(schedule_data, cpu);
410
0
    struct null_private *prv = null_priv(new_ops);
411
0
    struct null_vcpu *nvc = vdata;
412
0
413
0
    ASSERT(nvc && is_idle_vcpu(nvc->vcpu));
414
0
415
0
    idle_vcpu[cpu]->sched_priv = vdata;
416
0
417
0
    /*
418
0
     * We are holding the runqueue lock already (it's been taken in
419
0
     * schedule_cpu_switch()). It actually may or may not be the 'right'
420
0
     * one for this cpu, but that is ok for preventing races.
421
0
     */
422
0
    ASSERT(!local_irq_is_enabled());
423
0
424
0
    init_pdata(prv, cpu);
425
0
426
0
    per_cpu(scheduler, cpu) = new_ops;
427
0
    per_cpu(schedule_data, cpu).sched_priv = pdata;
428
0
429
0
    /*
430
0
     * (Re?)route the lock to the per pCPU lock as /last/ thing. In fact,
431
0
     * if it is free (and it can be) we want that anyone that manages
432
0
     * taking it, finds all the initializations we've done above in place.
433
0
     */
434
0
    smp_mb();
435
0
    sd->schedule_lock = &sd->_lock;
436
0
}
437
438
static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
439
0
{
440
0
    struct null_private *prv = null_priv(ops);
441
0
    struct null_vcpu *nvc = null_vcpu(v);
442
0
    unsigned int cpu;
443
0
    spinlock_t *lock;
444
0
445
0
    ASSERT(!is_idle_vcpu(v));
446
0
447
0
    lock = vcpu_schedule_lock_irq(v);
448
0
 retry:
449
0
450
0
    cpu = v->processor = pick_cpu(prv, v);
451
0
452
0
    spin_unlock(lock);
453
0
454
0
    lock = vcpu_schedule_lock(v);
455
0
456
0
    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
457
0
                cpupool_domain_cpumask(v->domain));
458
0
459
0
    /* If the pCPU is free, we assign v to it */
460
0
    if ( likely(per_cpu(npc, cpu).vcpu == NULL) )
461
0
    {
462
0
        /*
463
0
         * Insert is followed by vcpu_wake(), so there's no need to poke
464
0
         * the pcpu with the SCHEDULE_SOFTIRQ, as wake will do that.
465
0
         */
466
0
        vcpu_assign(prv, v, cpu);
467
0
    }
468
0
    else if ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
469
0
    {
470
0
        /*
471
0
         * If the pCPU is not free (e.g., because we raced with another
472
0
         * insert or a migrate), but there are other free pCPUs, we can
473
0
         * try to pick again.
474
0
         */
475
0
         goto retry;
476
0
    }
477
0
    else
478
0
    {
479
0
        /*
480
0
         * If the pCPU is not free, and there aren't any (valid) others,
481
0
         * we have no alternatives than to go into the waitqueue.
482
0
         */
483
0
        spin_lock(&prv->waitq_lock);
484
0
        list_add_tail(&nvc->waitq_elem, &prv->waitq);
485
0
        dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n",
486
0
                v->domain->domain_id, v->vcpu_id);
487
0
        spin_unlock(&prv->waitq_lock);
488
0
    }
489
0
    spin_unlock_irq(lock);
490
0
491
0
    SCHED_STAT_CRANK(vcpu_insert);
492
0
}
493
494
static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
495
0
{
496
0
    unsigned int bs;
497
0
    unsigned int cpu = v->processor;
498
0
    struct null_vcpu *wvc;
499
0
500
0
    ASSERT(list_empty(&null_vcpu(v)->waitq_elem));
501
0
502
0
    vcpu_deassign(prv, v, cpu);
503
0
504
0
    spin_lock(&prv->waitq_lock);
505
0
506
0
    /*
507
0
     * If v is assigned to a pCPU, let's see if there is someone waiting,
508
0
     * suitable to be assigned to it (prioritizing vcpus that have
509
0
     * soft-affinity with cpu).
510
0
     */
511
0
    for_each_affinity_balance_step( bs )
512
0
    {
513
0
        list_for_each_entry( wvc, &prv->waitq, waitq_elem )
514
0
        {
515
0
            if ( bs == BALANCE_SOFT_AFFINITY &&
516
0
                 !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
517
0
                continue;
518
0
519
0
            if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
520
0
            {
521
0
                list_del_init(&wvc->waitq_elem);
522
0
                vcpu_assign(prv, wvc->vcpu, cpu);
523
0
                cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
524
0
                spin_unlock(&prv->waitq_lock);
525
0
                return;
526
0
            }
527
0
        }
528
0
    }
529
0
    spin_unlock(&prv->waitq_lock);
530
0
}
531
532
static void null_vcpu_remove(const struct scheduler *ops, struct vcpu *v)
533
0
{
534
0
    struct null_private *prv = null_priv(ops);
535
0
    struct null_vcpu *nvc = null_vcpu(v);
536
0
    spinlock_t *lock;
537
0
538
0
    ASSERT(!is_idle_vcpu(v));
539
0
540
0
    lock = vcpu_schedule_lock_irq(v);
541
0
542
0
    /* If v is in waitqueue, just get it out of there and bail */
543
0
    if ( unlikely(!list_empty(&nvc->waitq_elem)) )
544
0
    {
545
0
        spin_lock(&prv->waitq_lock);
546
0
        list_del_init(&nvc->waitq_elem);
547
0
        spin_unlock(&prv->waitq_lock);
548
0
549
0
        goto out;
550
0
    }
551
0
552
0
    ASSERT(per_cpu(npc, v->processor).vcpu == v);
553
0
    ASSERT(!cpumask_test_cpu(v->processor, &prv->cpus_free));
554
0
555
0
    _vcpu_remove(prv, v);
556
0
557
0
 out:
558
0
    vcpu_schedule_unlock_irq(lock, v);
559
0
560
0
    SCHED_STAT_CRANK(vcpu_remove);
561
0
}
562
563
static void null_vcpu_wake(const struct scheduler *ops, struct vcpu *v)
564
0
{
565
0
    ASSERT(!is_idle_vcpu(v));
566
0
567
0
    if ( unlikely(curr_on_cpu(v->processor) == v) )
568
0
    {
569
0
        SCHED_STAT_CRANK(vcpu_wake_running);
570
0
        return;
571
0
    }
572
0
573
0
    if ( unlikely(!list_empty(&null_vcpu(v)->waitq_elem)) )
574
0
    {
575
0
        /* Not exactly "on runq", but close enough for reusing the counter */
576
0
        SCHED_STAT_CRANK(vcpu_wake_onrunq);
577
0
        return;
578
0
    }
579
0
580
0
    if ( likely(vcpu_runnable(v)) )
581
0
        SCHED_STAT_CRANK(vcpu_wake_runnable);
582
0
    else
583
0
        SCHED_STAT_CRANK(vcpu_wake_not_runnable);
584
0
585
0
    /* Note that we get here only for vCPUs assigned to a pCPU */
586
0
    cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
587
0
}
588
589
static void null_vcpu_sleep(const struct scheduler *ops, struct vcpu *v)
590
0
{
591
0
    ASSERT(!is_idle_vcpu(v));
592
0
593
0
    /* If v is not assigned to a pCPU, or is not running, no need to bother */
594
0
    if ( curr_on_cpu(v->processor) == v )
595
0
        cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
596
0
597
0
    SCHED_STAT_CRANK(vcpu_sleep);
598
0
}
599
600
static int null_cpu_pick(const struct scheduler *ops, struct vcpu *v)
601
0
{
602
0
    ASSERT(!is_idle_vcpu(v));
603
0
    return pick_cpu(null_priv(ops), v);
604
0
}
605
606
static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
607
                              unsigned int new_cpu)
608
0
{
609
0
    struct null_private *prv = null_priv(ops);
610
0
    struct null_vcpu *nvc = null_vcpu(v);
611
0
612
0
    ASSERT(!is_idle_vcpu(v));
613
0
614
0
    if ( v->processor == new_cpu )
615
0
        return;
616
0
617
0
    if ( unlikely(tb_init_done) )
618
0
    {
619
0
        struct {
620
0
            uint16_t vcpu, dom;
621
0
            uint16_t cpu, new_cpu;
622
0
        } d;
623
0
        d.dom = v->domain->domain_id;
624
0
        d.vcpu = v->vcpu_id;
625
0
        d.cpu = v->processor;
626
0
        d.new_cpu = new_cpu;
627
0
        __trace_var(TRC_SNULL_MIGRATE, 1, sizeof(d), &d);
628
0
    }
629
0
630
0
    /*
631
0
     * v is either assigned to a pCPU, or in the waitqueue.
632
0
     *
633
0
     * In the former case, the pCPU to which it was assigned would
634
0
     * become free, and we, therefore, should check whether there is
635
0
     * anyone in the waitqueue that can be assigned to it.
636
0
     *
637
0
     * In the latter, there is just nothing to do.
638
0
     */
639
0
    if ( likely(list_empty(&nvc->waitq_elem)) )
640
0
    {
641
0
        _vcpu_remove(prv, v);
642
0
        SCHED_STAT_CRANK(migrate_running);
643
0
    }
644
0
    else
645
0
        SCHED_STAT_CRANK(migrate_on_runq);
646
0
647
0
    SCHED_STAT_CRANK(migrated);
648
0
649
0
    /*
650
0
     * Let's now consider new_cpu, which is where v is being sent. It can be
651
0
     * either free, or have a vCPU already assigned to it.
652
0
     *
653
0
     * In the former case, we should assign v to it, and try to get it to run,
654
0
     * if possible, according to affinity.
655
0
     *
656
0
     * In latter, all we can do is to park v in the waitqueue.
657
0
     */
658
0
    if ( per_cpu(npc, new_cpu).vcpu == NULL &&
659
0
         vcpu_check_affinity(v, new_cpu, BALANCE_HARD_AFFINITY) )
660
0
    {
661
0
        /* v might have been in the waitqueue, so remove it */
662
0
        spin_lock(&prv->waitq_lock);
663
0
        list_del_init(&nvc->waitq_elem);
664
0
        spin_unlock(&prv->waitq_lock);
665
0
666
0
        vcpu_assign(prv, v, new_cpu);
667
0
    }
668
0
    else
669
0
    {
670
0
        /* Put v in the waitqueue, if it wasn't there already */
671
0
        spin_lock(&prv->waitq_lock);
672
0
        if ( list_empty(&nvc->waitq_elem) )
673
0
        {
674
0
            list_add_tail(&nvc->waitq_elem, &prv->waitq);
675
0
            dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n",
676
0
                    v->domain->domain_id, v->vcpu_id);
677
0
        }
678
0
        spin_unlock(&prv->waitq_lock);
679
0
    }
680
0
681
0
    /*
682
0
     * Whatever all the above, we always at least override v->processor.
683
0
     * This is especially important for shutdown or suspend/resume paths,
684
0
     * when it is important to let our caller (cpu_disable_scheduler())
685
0
     * know that the migration did happen, to the best of our possibilities,
686
0
     * at least. In case of suspend, any temporary inconsistency caused
687
0
     * by this, will be fixed-up during resume.
688
0
     */
689
0
    v->processor = new_cpu;
690
0
}
691
692
#ifndef NDEBUG
693
static inline void null_vcpu_check(struct vcpu *v)
694
0
{
695
0
    struct null_vcpu * const nvc = null_vcpu(v);
696
0
    struct null_dom * const ndom = null_dom(v->domain);
697
0
698
0
    BUG_ON(nvc->vcpu != v);
699
0
700
0
    if ( ndom )
701
0
        BUG_ON(is_idle_vcpu(v));
702
0
    else
703
0
        BUG_ON(!is_idle_vcpu(v));
704
0
705
0
    SCHED_STAT_CRANK(vcpu_check);
706
0
}
707
0
#define NULL_VCPU_CHECK(v)  (null_vcpu_check(v))
708
#else
709
#define NULL_VCPU_CHECK(v)
710
#endif
711
712
713
/*
714
 * The most simple scheduling function of all times! We either return:
715
 *  - the vCPU assigned to the pCPU, if there's one and it can run;
716
 *  - the idle vCPU, otherwise.
717
 */
718
static struct task_slice null_schedule(const struct scheduler *ops,
719
                                       s_time_t now,
720
                                       bool_t tasklet_work_scheduled)
721
0
{
722
0
    unsigned int bs;
723
0
    const unsigned int cpu = smp_processor_id();
724
0
    struct null_private *prv = null_priv(ops);
725
0
    struct null_vcpu *wvc;
726
0
    struct task_slice ret;
727
0
728
0
    SCHED_STAT_CRANK(schedule);
729
0
    NULL_VCPU_CHECK(current);
730
0
731
0
    if ( unlikely(tb_init_done) )
732
0
    {
733
0
        struct {
734
0
            uint16_t tasklet, cpu;
735
0
            int16_t vcpu, dom;
736
0
        } d;
737
0
        d.cpu = cpu;
738
0
        d.tasklet = tasklet_work_scheduled;
739
0
        if ( per_cpu(npc, cpu).vcpu == NULL )
740
0
        {
741
0
            d.vcpu = d.dom = -1;
742
0
        }
743
0
        else
744
0
        {
745
0
            d.vcpu = per_cpu(npc, cpu).vcpu->vcpu_id;
746
0
            d.dom = per_cpu(npc, cpu).vcpu->domain->domain_id;
747
0
        }
748
0
        __trace_var(TRC_SNULL_SCHEDULE, 1, sizeof(d), &d);
749
0
    }
750
0
751
0
    if ( tasklet_work_scheduled )
752
0
    {
753
0
        trace_var(TRC_SNULL_TASKLET, 1, 0, NULL);
754
0
        ret.task = idle_vcpu[cpu];
755
0
    }
756
0
    else
757
0
        ret.task = per_cpu(npc, cpu).vcpu;
758
0
    ret.migrated = 0;
759
0
    ret.time = -1;
760
0
761
0
    /*
762
0
     * We may be new in the cpupool, or just coming back online. In which
763
0
     * case, there may be vCPUs in the waitqueue that we can assign to us
764
0
     * and run.
765
0
     */
766
0
    if ( unlikely(ret.task == NULL) )
767
0
    {
768
0
        spin_lock(&prv->waitq_lock);
769
0
770
0
        if ( list_empty(&prv->waitq) )
771
0
            goto unlock;
772
0
773
0
        /*
774
0
         * We scan the waitqueue twice, for prioritizing vcpus that have
775
0
         * soft-affinity with cpu. This may look like something expensive to
776
0
         * do here in null_schedule(), but it's actually fine, beceuse we do
777
0
         * it only in cases where a pcpu has no vcpu associated (e.g., as
778
0
         * said above, the cpu has just joined a cpupool).
779
0
         */
780
0
        for_each_affinity_balance_step( bs )
781
0
        {
782
0
            list_for_each_entry( wvc, &prv->waitq, waitq_elem )
783
0
            {
784
0
                if ( bs == BALANCE_SOFT_AFFINITY &&
785
0
                     !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
786
0
                    continue;
787
0
788
0
                if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
789
0
                {
790
0
                    vcpu_assign(prv, wvc->vcpu, cpu);
791
0
                    list_del_init(&wvc->waitq_elem);
792
0
                    ret.task = wvc->vcpu;
793
0
                    goto unlock;
794
0
                }
795
0
            }
796
0
        }
797
0
 unlock:
798
0
        spin_unlock(&prv->waitq_lock);
799
0
    }
800
0
801
0
    if ( unlikely(ret.task == NULL || !vcpu_runnable(ret.task)) )
802
0
        ret.task = idle_vcpu[cpu];
803
0
804
0
    NULL_VCPU_CHECK(ret.task);
805
0
    return ret;
806
0
}
807
808
static inline void dump_vcpu(struct null_private *prv, struct null_vcpu *nvc)
809
0
{
810
0
    printk("[%i.%i] pcpu=%d", nvc->vcpu->domain->domain_id,
811
0
            nvc->vcpu->vcpu_id, list_empty(&nvc->waitq_elem) ?
812
0
                                nvc->vcpu->processor : -1);
813
0
}
814
815
static void null_dump_pcpu(const struct scheduler *ops, int cpu)
816
0
{
817
0
    struct null_private *prv = null_priv(ops);
818
0
    struct null_vcpu *nvc;
819
0
    spinlock_t *lock;
820
0
    unsigned long flags;
821
0
#define cpustr keyhandler_scratch
822
0
823
0
    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
824
0
825
0
    cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_mask, cpu));
826
0
    printk("CPU[%02d] sibling=%s, ", cpu, cpustr);
827
0
    cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_mask, cpu));
828
0
    printk("core=%s", cpustr);
829
0
    if ( per_cpu(npc, cpu).vcpu != NULL )
830
0
        printk(", vcpu=d%dv%d", per_cpu(npc, cpu).vcpu->domain->domain_id,
831
0
               per_cpu(npc, cpu).vcpu->vcpu_id);
832
0
    printk("\n");
833
0
834
0
    /* current VCPU (nothing to say if that's the idle vcpu) */
835
0
    nvc = null_vcpu(curr_on_cpu(cpu));
836
0
    if ( nvc && !is_idle_vcpu(nvc->vcpu) )
837
0
    {
838
0
        printk("\trun: ");
839
0
        dump_vcpu(prv, nvc);
840
0
        printk("\n");
841
0
    }
842
0
843
0
    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
844
0
#undef cpustr
845
0
}
846
847
static void null_dump(const struct scheduler *ops)
848
0
{
849
0
    struct null_private *prv = null_priv(ops);
850
0
    struct list_head *iter;
851
0
    unsigned long flags;
852
0
    unsigned int loop;
853
0
#define cpustr keyhandler_scratch
854
0
855
0
    spin_lock_irqsave(&prv->lock, flags);
856
0
857
0
    cpulist_scnprintf(cpustr, sizeof(cpustr), &prv->cpus_free);
858
0
    printk("\tcpus_free = %s\n", cpustr);
859
0
860
0
    printk("Domain info:\n");
861
0
    loop = 0;
862
0
    list_for_each( iter, &prv->ndom )
863
0
    {
864
0
        struct null_dom *ndom;
865
0
        struct vcpu *v;
866
0
867
0
        ndom = list_entry(iter, struct null_dom, ndom_elem);
868
0
869
0
        printk("\tDomain: %d\n", ndom->dom->domain_id);
870
0
        for_each_vcpu( ndom->dom, v )
871
0
        {
872
0
            struct null_vcpu * const nvc = null_vcpu(v);
873
0
            spinlock_t *lock;
874
0
875
0
            lock = vcpu_schedule_lock(nvc->vcpu);
876
0
877
0
            printk("\t%3d: ", ++loop);
878
0
            dump_vcpu(prv, nvc);
879
0
            printk("\n");
880
0
881
0
            vcpu_schedule_unlock(lock, nvc->vcpu);
882
0
        }
883
0
    }
884
0
885
0
    printk("Waitqueue: ");
886
0
    loop = 0;
887
0
    spin_lock(&prv->waitq_lock);
888
0
    list_for_each( iter, &prv->waitq )
889
0
    {
890
0
        struct null_vcpu *nvc = list_entry(iter, struct null_vcpu, waitq_elem);
891
0
892
0
        if ( loop++ != 0 )
893
0
            printk(", ");
894
0
        if ( loop % 24 == 0 )
895
0
            printk("\n\t");
896
0
        printk("d%dv%d", nvc->vcpu->domain->domain_id, nvc->vcpu->vcpu_id);
897
0
    }
898
0
    printk("\n");
899
0
    spin_unlock(&prv->waitq_lock);
900
0
901
0
    spin_unlock_irqrestore(&prv->lock, flags);
902
0
#undef cpustr
903
0
}
904
905
const struct scheduler sched_null_def = {
906
    .name           = "null Scheduler",
907
    .opt_name       = "null",
908
    .sched_id       = XEN_SCHEDULER_NULL,
909
    .sched_data     = NULL,
910
911
    .init           = null_init,
912
    .deinit         = null_deinit,
913
    .init_pdata     = null_init_pdata,
914
    .switch_sched   = null_switch_sched,
915
    .deinit_pdata   = null_deinit_pdata,
916
917
    .alloc_vdata    = null_alloc_vdata,
918
    .free_vdata     = null_free_vdata,
919
    .alloc_domdata  = null_alloc_domdata,
920
    .free_domdata   = null_free_domdata,
921
922
    .init_domain    = null_dom_init,
923
    .destroy_domain = null_dom_destroy,
924
925
    .insert_vcpu    = null_vcpu_insert,
926
    .remove_vcpu    = null_vcpu_remove,
927
928
    .wake           = null_vcpu_wake,
929
    .sleep          = null_vcpu_sleep,
930
    .pick_cpu       = null_cpu_pick,
931
    .migrate        = null_vcpu_migrate,
932
    .do_schedule    = null_schedule,
933
934
    .dump_cpu_state = null_dump_pcpu,
935
    .dump_settings  = null_dump,
936
};
937
938
REGISTER_SCHEDULER(sched_null_def);