/root/src/xen/xen/common/sched_rt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Preemptive Global Earliest Deadline First (EDF) scheduler for Xen |
3 | | * EDF scheduling is a real-time scheduling algorithm used in embedded field. |
4 | | * |
5 | | * by Sisu Xi, 2013, Washington University in Saint Louis |
6 | | * Meng Xu, 2014-2016, University of Pennsylvania |
7 | | * |
8 | | * Conversion toward event driven model by Tianyang Chen |
9 | | * and Dagaen Golomb, 2016, University of Pennsylvania |
10 | | * |
11 | | * based on the code of credit Scheduler |
12 | | */ |
13 | | |
14 | | #include <xen/init.h> |
15 | | #include <xen/lib.h> |
16 | | #include <xen/sched.h> |
17 | | #include <xen/domain.h> |
18 | | #include <xen/delay.h> |
19 | | #include <xen/event.h> |
20 | | #include <xen/time.h> |
21 | | #include <xen/timer.h> |
22 | | #include <xen/perfc.h> |
23 | | #include <xen/sched-if.h> |
24 | | #include <xen/softirq.h> |
25 | | #include <asm/atomic.h> |
26 | | #include <xen/errno.h> |
27 | | #include <xen/trace.h> |
28 | | #include <xen/cpu.h> |
29 | | #include <xen/keyhandler.h> |
30 | | #include <xen/trace.h> |
31 | | #include <xen/err.h> |
32 | | #include <xen/guest_access.h> |
33 | | |
34 | | /* |
35 | | * TODO: |
36 | | * |
37 | | * Migration compensation and resist like credit2 to better use cache; |
38 | | * Lock Holder Problem, using yield? |
39 | | * Self switch problem: VCPUs of the same domain may preempt each other; |
40 | | */ |
41 | | |
42 | | /* |
43 | | * Design: |
44 | | * |
45 | | * This scheduler follows the Preemptive Global Earliest Deadline First (EDF) |
46 | | * theory in real-time field. |
47 | | * At any scheduling point, the VCPU with earlier deadline has higher priority. |
48 | | * The scheduler always picks highest priority VCPU to run on a feasible PCPU. |
49 | | * A PCPU is feasible if the VCPU can run on this PCPU and (the PCPU is idle or |
50 | | * has a lower-priority VCPU running on it.) |
51 | | * |
52 | | * Each VCPU has a dedicated period, budget and a extratime flag |
53 | | * The deadline of a VCPU is at the end of each period; |
54 | | * A VCPU has its budget replenished at the beginning of each period; |
55 | | * While scheduled, a VCPU burns its budget. |
56 | | * The VCPU needs to finish its budget before its deadline in each period; |
57 | | * The VCPU discards its unused budget at the end of each period. |
58 | | * When a VCPU runs out of budget in a period, if its extratime flag is set, |
59 | | * the VCPU increases its priority_level by 1 and refills its budget; otherwise, |
60 | | * it has to wait until next period. |
61 | | * |
62 | | * Each VCPU is implemented as a deferable server. |
63 | | * When a VCPU has a task running on it, its budget is continuously burned; |
64 | | * When a VCPU has no task but with budget left, its budget is preserved. |
65 | | * |
66 | | * Queue scheme: |
67 | | * A global runqueue and a global depletedqueue for each CPU pool. |
68 | | * The runqueue holds all runnable VCPUs with budget, |
69 | | * sorted by priority_level and deadline; |
70 | | * The depletedqueue holds all VCPUs without budget, unsorted; |
71 | | * |
72 | | * Note: cpumask and cpupool is supported. |
73 | | */ |
74 | | |
75 | | /* |
76 | | * Locking: |
77 | | * A global system lock is used to protect the RunQ and DepletedQ. |
78 | | * The global lock is referenced by schedule_data.schedule_lock |
79 | | * from all physical cpus. |
80 | | * |
81 | | * The lock is already grabbed when calling wake/sleep/schedule/ functions |
82 | | * in schedule.c |
83 | | * |
84 | | * The functions involes RunQ and needs to grab locks are: |
85 | | * vcpu_insert, vcpu_remove, context_saved, runq_insert |
86 | | */ |
87 | | |
88 | | |
89 | | /* |
90 | | * Default parameters: |
91 | | * Period and budget in default is 10 and 4 ms, respectively |
92 | | */ |
93 | 0 | #define RTDS_DEFAULT_PERIOD (MICROSECS(10000)) |
94 | 0 | #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) |
95 | | |
96 | | /* |
97 | | * Max period: max delta of time type, because period is added to the time |
98 | | * a vcpu activates, so this must not overflow. |
99 | | * Min period: 10 us, considering the scheduling overhead (when period is |
100 | | * too low, scheduling is invoked too frequently, causing high overhead). |
101 | | */ |
102 | 0 | #define RTDS_MAX_PERIOD (STIME_DELTA_MAX) |
103 | 0 | #define RTDS_MIN_PERIOD (MICROSECS(10)) |
104 | | |
105 | | /* |
106 | | * Min budget: 10 us, considering the scheduling overhead (when budget is |
107 | | * consumed too fast, scheduling is invoked too frequently, causing |
108 | | * high overhead). |
109 | | */ |
110 | 0 | #define RTDS_MIN_BUDGET (MICROSECS(10)) |
111 | | |
112 | | /* |
113 | | * UPDATE_LIMIT_SHIFT: a constant used in rt_update_deadline(). When finding |
114 | | * the next deadline, performing addition could be faster if the difference |
115 | | * between cur_deadline and now is small. If the difference is bigger than |
116 | | * 1024 * period, use multiplication. |
117 | | */ |
118 | 0 | #define UPDATE_LIMIT_SHIFT 10 |
119 | | |
120 | | /* |
121 | | * Flags |
122 | | */ |
123 | | /* |
124 | | * RTDS_scheduled: Is this vcpu either running on, or context-switching off, |
125 | | * a phyiscal cpu? |
126 | | * + Accessed only with global lock held. |
127 | | * + Set when chosen as next in rt_schedule(). |
128 | | * + Cleared after context switch has been saved in rt_context_saved() |
129 | | * + Checked in vcpu_wake to see if we can add to the Runqueue, or if we should |
130 | | * set RTDS_delayed_runq_add |
131 | | * + Checked to be false in runq_insert. |
132 | | */ |
133 | | #define __RTDS_scheduled 1 |
134 | | #define RTDS_scheduled (1<<__RTDS_scheduled) |
135 | | /* |
136 | | * RTDS_delayed_runq_add: Do we need to add this to the RunQ/DepletedQ |
137 | | * once it's done being context switching out? |
138 | | * + Set when scheduling out in rt_schedule() if prev is runable |
139 | | * + Set in rt_vcpu_wake if it finds RTDS_scheduled set |
140 | | * + Read in rt_context_saved(). If set, it adds prev to the Runqueue/DepletedQ |
141 | | * and clears the bit. |
142 | | */ |
143 | 0 | #define __RTDS_delayed_runq_add 2 |
144 | 0 | #define RTDS_delayed_runq_add (1<<__RTDS_delayed_runq_add) |
145 | | |
146 | | /* |
147 | | * RTDS_depleted: Does this vcp run out of budget? |
148 | | * This flag is |
149 | | * + set in burn_budget() if a vcpu has zero budget left; |
150 | | * + cleared and checked in the repenishment handler, |
151 | | * for the vcpus that are being replenished. |
152 | | */ |
153 | | #define __RTDS_depleted 3 |
154 | | #define RTDS_depleted (1<<__RTDS_depleted) |
155 | | |
156 | | /* |
157 | | * RTDS_extratime: Can the vcpu run in the time that is |
158 | | * not part of any real-time reservation, and would therefore |
159 | | * be otherwise left idle? |
160 | | */ |
161 | 0 | #define __RTDS_extratime 4 |
162 | 0 | #define RTDS_extratime (1<<__RTDS_extratime) |
163 | | |
164 | | /* |
165 | | * rt tracing events ("only" 512 available!). Check |
166 | | * include/public/trace.h for more details. |
167 | | */ |
168 | 0 | #define TRC_RTDS_TICKLE TRC_SCHED_CLASS_EVT(RTDS, 1) |
169 | 0 | #define TRC_RTDS_RUNQ_PICK TRC_SCHED_CLASS_EVT(RTDS, 2) |
170 | 0 | #define TRC_RTDS_BUDGET_BURN TRC_SCHED_CLASS_EVT(RTDS, 3) |
171 | 0 | #define TRC_RTDS_BUDGET_REPLENISH TRC_SCHED_CLASS_EVT(RTDS, 4) |
172 | 0 | #define TRC_RTDS_SCHED_TASKLET TRC_SCHED_CLASS_EVT(RTDS, 5) |
173 | 0 | #define TRC_RTDS_SCHEDULE TRC_SCHED_CLASS_EVT(RTDS, 6) |
174 | | |
175 | | static void repl_timer_handler(void *data); |
176 | | |
177 | | /* |
178 | | * System-wide private data, include global RunQueue/DepletedQ |
179 | | * Global lock is referenced by schedule_data.schedule_lock from all |
180 | | * physical cpus. It can be grabbed via vcpu_schedule_lock_irq() |
181 | | */ |
182 | | struct rt_private { |
183 | | spinlock_t lock; /* the global coarse-grained lock */ |
184 | | struct list_head sdom; /* list of availalbe domains, used for dump */ |
185 | | |
186 | | struct list_head runq; /* ordered list of runnable vcpus */ |
187 | | struct list_head depletedq; /* unordered list of depleted vcpus */ |
188 | | |
189 | | struct timer *repl_timer; /* replenishment timer */ |
190 | | struct list_head replq; /* ordered list of vcpus that need replenishment */ |
191 | | |
192 | | cpumask_t tickled; /* cpus been tickled */ |
193 | | }; |
194 | | |
195 | | /* |
196 | | * Virtual CPU |
197 | | */ |
198 | | struct rt_vcpu { |
199 | | struct list_head q_elem; /* on the runq/depletedq list */ |
200 | | struct list_head replq_elem; /* on the replenishment events list */ |
201 | | |
202 | | /* VCPU parameters, in nanoseconds */ |
203 | | s_time_t period; |
204 | | s_time_t budget; |
205 | | |
206 | | /* VCPU current infomation in nanosecond */ |
207 | | s_time_t cur_budget; /* current budget */ |
208 | | s_time_t last_start; /* last start time */ |
209 | | s_time_t cur_deadline; /* current deadline for EDF */ |
210 | | |
211 | | /* Up-pointers */ |
212 | | struct rt_dom *sdom; |
213 | | struct vcpu *vcpu; |
214 | | |
215 | | unsigned priority_level; |
216 | | |
217 | | unsigned flags; /* mark __RTDS_scheduled, etc.. */ |
218 | | }; |
219 | | |
220 | | /* |
221 | | * Domain |
222 | | */ |
223 | | struct rt_dom { |
224 | | struct list_head sdom_elem; /* link list on rt_priv */ |
225 | | struct domain *dom; /* pointer to upper domain */ |
226 | | }; |
227 | | |
228 | | /* |
229 | | * Useful inline functions |
230 | | */ |
231 | | static inline struct rt_private *rt_priv(const struct scheduler *ops) |
232 | 0 | { |
233 | 0 | return ops->sched_data; |
234 | 0 | } |
235 | | |
236 | | static inline struct rt_vcpu *rt_vcpu(const struct vcpu *vcpu) |
237 | 0 | { |
238 | 0 | return vcpu->sched_priv; |
239 | 0 | } |
240 | | |
241 | | static inline struct rt_dom *rt_dom(const struct domain *dom) |
242 | 0 | { |
243 | 0 | return dom->sched_priv; |
244 | 0 | } |
245 | | |
246 | | static inline struct list_head *rt_runq(const struct scheduler *ops) |
247 | 0 | { |
248 | 0 | return &rt_priv(ops)->runq; |
249 | 0 | } |
250 | | |
251 | | static inline struct list_head *rt_depletedq(const struct scheduler *ops) |
252 | 0 | { |
253 | 0 | return &rt_priv(ops)->depletedq; |
254 | 0 | } |
255 | | |
256 | | static inline struct list_head *rt_replq(const struct scheduler *ops) |
257 | 0 | { |
258 | 0 | return &rt_priv(ops)->replq; |
259 | 0 | } |
260 | | |
261 | | static inline bool has_extratime(const struct rt_vcpu *svc) |
262 | 0 | { |
263 | 0 | return svc->flags & RTDS_extratime; |
264 | 0 | } |
265 | | |
266 | | /* |
267 | | * Helper functions for manipulating the runqueue, the depleted queue, |
268 | | * and the replenishment events queue. |
269 | | */ |
270 | | static int |
271 | | vcpu_on_q(const struct rt_vcpu *svc) |
272 | 0 | { |
273 | 0 | return !list_empty(&svc->q_elem); |
274 | 0 | } |
275 | | |
276 | | static struct rt_vcpu * |
277 | | q_elem(struct list_head *elem) |
278 | 0 | { |
279 | 0 | return list_entry(elem, struct rt_vcpu, q_elem); |
280 | 0 | } |
281 | | |
282 | | static struct rt_vcpu * |
283 | | replq_elem(struct list_head *elem) |
284 | 0 | { |
285 | 0 | return list_entry(elem, struct rt_vcpu, replq_elem); |
286 | 0 | } |
287 | | |
288 | | static int |
289 | | vcpu_on_replq(const struct rt_vcpu *svc) |
290 | 0 | { |
291 | 0 | return !list_empty(&svc->replq_elem); |
292 | 0 | } |
293 | | |
294 | | /* |
295 | | * If v1 priority >= v2 priority, return value > 0 |
296 | | * Otherwise, return value < 0 |
297 | | */ |
298 | | static s_time_t |
299 | | compare_vcpu_priority(const struct rt_vcpu *v1, const struct rt_vcpu *v2) |
300 | 0 | { |
301 | 0 | int prio = v2->priority_level - v1->priority_level; |
302 | 0 |
|
303 | 0 | if ( prio == 0 ) |
304 | 0 | return v2->cur_deadline - v1->cur_deadline; |
305 | 0 |
|
306 | 0 | return prio; |
307 | 0 | } |
308 | | |
309 | | /* |
310 | | * Debug related code, dump vcpu/cpu information |
311 | | */ |
312 | | static void |
313 | | rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc) |
314 | 0 | { |
315 | 0 | cpumask_t *cpupool_mask, *mask; |
316 | 0 |
|
317 | 0 | ASSERT(svc != NULL); |
318 | 0 | /* idle vcpu */ |
319 | 0 | if( svc->sdom == NULL ) |
320 | 0 | { |
321 | 0 | printk("\n"); |
322 | 0 | return; |
323 | 0 | } |
324 | 0 |
|
325 | 0 | /* |
326 | 0 | * We can't just use 'cpumask_scratch' because the dumping can |
327 | 0 | * happen from a pCPU outside of this scheduler's cpupool, and |
328 | 0 | * hence it's not right to use its pCPU's scratch mask. |
329 | 0 | * On the other hand, it is safe to use svc->vcpu->processor's |
330 | 0 | * own scratch space, since we hold the runqueue lock. |
331 | 0 | */ |
332 | 0 | mask = cpumask_scratch_cpu(svc->vcpu->processor); |
333 | 0 |
|
334 | 0 | cpupool_mask = cpupool_domain_cpumask(svc->vcpu->domain); |
335 | 0 | cpumask_and(mask, cpupool_mask, svc->vcpu->cpu_hard_affinity); |
336 | 0 | cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), mask); |
337 | 0 | printk("[%5d.%-2u] cpu %u, (%"PRI_stime", %"PRI_stime")," |
338 | 0 | " cur_b=%"PRI_stime" cur_d=%"PRI_stime" last_start=%"PRI_stime"\n" |
339 | 0 | " \t\t priority_level=%d has_extratime=%d\n" |
340 | 0 | " \t\t onQ=%d runnable=%d flags=%x effective hard_affinity=%s\n", |
341 | 0 | svc->vcpu->domain->domain_id, |
342 | 0 | svc->vcpu->vcpu_id, |
343 | 0 | svc->vcpu->processor, |
344 | 0 | svc->period, |
345 | 0 | svc->budget, |
346 | 0 | svc->cur_budget, |
347 | 0 | svc->cur_deadline, |
348 | 0 | svc->last_start, |
349 | 0 | svc->priority_level, |
350 | 0 | has_extratime(svc), |
351 | 0 | vcpu_on_q(svc), |
352 | 0 | vcpu_runnable(svc->vcpu), |
353 | 0 | svc->flags, |
354 | 0 | keyhandler_scratch); |
355 | 0 | } |
356 | | |
357 | | static void |
358 | | rt_dump_pcpu(const struct scheduler *ops, int cpu) |
359 | 0 | { |
360 | 0 | struct rt_private *prv = rt_priv(ops); |
361 | 0 | struct rt_vcpu *svc; |
362 | 0 | unsigned long flags; |
363 | 0 |
|
364 | 0 | spin_lock_irqsave(&prv->lock, flags); |
365 | 0 | printk("CPU[%02d]\n", cpu); |
366 | 0 | /* current VCPU (nothing to say if that's the idle vcpu). */ |
367 | 0 | svc = rt_vcpu(curr_on_cpu(cpu)); |
368 | 0 | if ( svc && !is_idle_vcpu(svc->vcpu) ) |
369 | 0 | { |
370 | 0 | rt_dump_vcpu(ops, svc); |
371 | 0 | } |
372 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
373 | 0 | } |
374 | | |
375 | | static void |
376 | | rt_dump(const struct scheduler *ops) |
377 | 0 | { |
378 | 0 | struct list_head *runq, *depletedq, *replq, *iter; |
379 | 0 | struct rt_private *prv = rt_priv(ops); |
380 | 0 | struct rt_vcpu *svc; |
381 | 0 | struct rt_dom *sdom; |
382 | 0 | unsigned long flags; |
383 | 0 |
|
384 | 0 | spin_lock_irqsave(&prv->lock, flags); |
385 | 0 |
|
386 | 0 | if ( list_empty(&prv->sdom) ) |
387 | 0 | goto out; |
388 | 0 |
|
389 | 0 | runq = rt_runq(ops); |
390 | 0 | depletedq = rt_depletedq(ops); |
391 | 0 | replq = rt_replq(ops); |
392 | 0 |
|
393 | 0 | printk("Global RunQueue info:\n"); |
394 | 0 | list_for_each ( iter, runq ) |
395 | 0 | { |
396 | 0 | svc = q_elem(iter); |
397 | 0 | rt_dump_vcpu(ops, svc); |
398 | 0 | } |
399 | 0 |
|
400 | 0 | printk("Global DepletedQueue info:\n"); |
401 | 0 | list_for_each ( iter, depletedq ) |
402 | 0 | { |
403 | 0 | svc = q_elem(iter); |
404 | 0 | rt_dump_vcpu(ops, svc); |
405 | 0 | } |
406 | 0 |
|
407 | 0 | printk("Global Replenishment Events info:\n"); |
408 | 0 | list_for_each ( iter, replq ) |
409 | 0 | { |
410 | 0 | svc = replq_elem(iter); |
411 | 0 | rt_dump_vcpu(ops, svc); |
412 | 0 | } |
413 | 0 |
|
414 | 0 | printk("Domain info:\n"); |
415 | 0 | list_for_each ( iter, &prv->sdom ) |
416 | 0 | { |
417 | 0 | struct vcpu *v; |
418 | 0 |
|
419 | 0 | sdom = list_entry(iter, struct rt_dom, sdom_elem); |
420 | 0 | printk("\tdomain: %d\n", sdom->dom->domain_id); |
421 | 0 |
|
422 | 0 | for_each_vcpu ( sdom->dom, v ) |
423 | 0 | { |
424 | 0 | svc = rt_vcpu(v); |
425 | 0 | rt_dump_vcpu(ops, svc); |
426 | 0 | } |
427 | 0 | } |
428 | 0 |
|
429 | 0 | out: |
430 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
431 | 0 | } |
432 | | |
433 | | /* |
434 | | * update deadline and budget when now >= cur_deadline |
435 | | * it needs to be updated to the deadline of the current period |
436 | | */ |
437 | | static void |
438 | | rt_update_deadline(s_time_t now, struct rt_vcpu *svc) |
439 | 0 | { |
440 | 0 | ASSERT(now >= svc->cur_deadline); |
441 | 0 | ASSERT(svc->period != 0); |
442 | 0 |
|
443 | 0 | if ( svc->cur_deadline + (svc->period << UPDATE_LIMIT_SHIFT) > now ) |
444 | 0 | { |
445 | 0 | do |
446 | 0 | svc->cur_deadline += svc->period; |
447 | 0 | while ( svc->cur_deadline <= now ); |
448 | 0 | } |
449 | 0 | else |
450 | 0 | { |
451 | 0 | long count = ((now - svc->cur_deadline) / svc->period) + 1; |
452 | 0 | svc->cur_deadline += count * svc->period; |
453 | 0 | } |
454 | 0 |
|
455 | 0 | /* |
456 | 0 | * svc may be scheduled to run immediately after it misses deadline |
457 | 0 | * Then rt_update_deadline is called before rt_schedule, which |
458 | 0 | * should only deduct the time spent in current period from the budget |
459 | 0 | */ |
460 | 0 | svc->last_start = now; |
461 | 0 | svc->cur_budget = svc->budget; |
462 | 0 | svc->priority_level = 0; |
463 | 0 |
|
464 | 0 | /* TRACE */ |
465 | 0 | { |
466 | 0 | struct __packed { |
467 | 0 | unsigned vcpu:16, dom:16; |
468 | 0 | unsigned priority_level; |
469 | 0 | uint64_t cur_deadline, cur_budget; |
470 | 0 | } d; |
471 | 0 | d.dom = svc->vcpu->domain->domain_id; |
472 | 0 | d.vcpu = svc->vcpu->vcpu_id; |
473 | 0 | d.priority_level = svc->priority_level; |
474 | 0 | d.cur_deadline = (uint64_t) svc->cur_deadline; |
475 | 0 | d.cur_budget = (uint64_t) svc->cur_budget; |
476 | 0 | trace_var(TRC_RTDS_BUDGET_REPLENISH, 1, |
477 | 0 | sizeof(d), |
478 | 0 | (unsigned char *) &d); |
479 | 0 | } |
480 | 0 |
|
481 | 0 | return; |
482 | 0 | } |
483 | | |
484 | | /* |
485 | | * Helpers for removing and inserting a vcpu in a queue |
486 | | * that is being kept ordered by the vcpus' deadlines (as EDF |
487 | | * mandates). |
488 | | * |
489 | | * For callers' convenience, the vcpu removing helper returns |
490 | | * true if the vcpu removed was the one at the front of the |
491 | | * queue; similarly, the inserting helper returns true if the |
492 | | * inserted ended at the front of the queue (i.e., in both |
493 | | * cases, if the vcpu with the earliest deadline is what we |
494 | | * are dealing with). |
495 | | */ |
496 | | static inline bool |
497 | | deadline_queue_remove(struct list_head *queue, struct list_head *elem) |
498 | 0 | { |
499 | 0 | int pos = 0; |
500 | 0 |
|
501 | 0 | if ( queue->next != elem ) |
502 | 0 | pos = 1; |
503 | 0 |
|
504 | 0 | list_del_init(elem); |
505 | 0 | return !pos; |
506 | 0 | } |
507 | | |
508 | | static inline bool |
509 | | deadline_queue_insert(struct rt_vcpu * (*qelem)(struct list_head *), |
510 | | struct rt_vcpu *svc, struct list_head *elem, |
511 | | struct list_head *queue) |
512 | 0 | { |
513 | 0 | struct list_head *iter; |
514 | 0 | int pos = 0; |
515 | 0 |
|
516 | 0 | list_for_each ( iter, queue ) |
517 | 0 | { |
518 | 0 | struct rt_vcpu * iter_svc = (*qelem)(iter); |
519 | 0 | if ( compare_vcpu_priority(svc, iter_svc) > 0 ) |
520 | 0 | break; |
521 | 0 | pos++; |
522 | 0 | } |
523 | 0 | list_add_tail(elem, iter); |
524 | 0 | return !pos; |
525 | 0 | } |
526 | | #define deadline_runq_insert(...) \ |
527 | 0 | deadline_queue_insert(&q_elem, ##__VA_ARGS__) |
528 | | #define deadline_replq_insert(...) \ |
529 | 0 | deadline_queue_insert(&replq_elem, ##__VA_ARGS__) |
530 | | |
531 | | static inline void |
532 | | q_remove(struct rt_vcpu *svc) |
533 | 0 | { |
534 | 0 | ASSERT( vcpu_on_q(svc) ); |
535 | 0 | list_del_init(&svc->q_elem); |
536 | 0 | } |
537 | | |
538 | | static inline void |
539 | | replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) |
540 | 0 | { |
541 | 0 | struct rt_private *prv = rt_priv(ops); |
542 | 0 | struct list_head *replq = rt_replq(ops); |
543 | 0 |
|
544 | 0 | ASSERT( vcpu_on_replq(svc) ); |
545 | 0 |
|
546 | 0 | if ( deadline_queue_remove(replq, &svc->replq_elem) ) |
547 | 0 | { |
548 | 0 | /* |
549 | 0 | * The replenishment timer needs to be set to fire when a |
550 | 0 | * replenishment for the vcpu at the front of the replenishment |
551 | 0 | * queue is due. If it is such vcpu that we just removed, we may |
552 | 0 | * need to reprogram the timer. |
553 | 0 | */ |
554 | 0 | if ( !list_empty(replq) ) |
555 | 0 | { |
556 | 0 | struct rt_vcpu *svc_next = replq_elem(replq->next); |
557 | 0 | set_timer(prv->repl_timer, svc_next->cur_deadline); |
558 | 0 | } |
559 | 0 | else |
560 | 0 | stop_timer(prv->repl_timer); |
561 | 0 | } |
562 | 0 | } |
563 | | |
564 | | /* |
565 | | * Insert svc with budget in RunQ according to EDF: |
566 | | * vcpus with smaller deadlines go first. |
567 | | * Insert svc without budget in DepletedQ unsorted; |
568 | | */ |
569 | | static void |
570 | | runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) |
571 | 0 | { |
572 | 0 | struct rt_private *prv = rt_priv(ops); |
573 | 0 | struct list_head *runq = rt_runq(ops); |
574 | 0 |
|
575 | 0 | ASSERT( spin_is_locked(&prv->lock) ); |
576 | 0 | ASSERT( !vcpu_on_q(svc) ); |
577 | 0 | ASSERT( vcpu_on_replq(svc) ); |
578 | 0 |
|
579 | 0 | /* add svc to runq if svc still has budget or its extratime is set */ |
580 | 0 | if ( svc->cur_budget > 0 || |
581 | 0 | has_extratime(svc) ) |
582 | 0 | deadline_runq_insert(svc, &svc->q_elem, runq); |
583 | 0 | else |
584 | 0 | list_add(&svc->q_elem, &prv->depletedq); |
585 | 0 | } |
586 | | |
587 | | static void |
588 | | replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) |
589 | 0 | { |
590 | 0 | struct list_head *replq = rt_replq(ops); |
591 | 0 | struct rt_private *prv = rt_priv(ops); |
592 | 0 |
|
593 | 0 | ASSERT( !vcpu_on_replq(svc) ); |
594 | 0 |
|
595 | 0 | /* |
596 | 0 | * The timer may be re-programmed if svc is inserted |
597 | 0 | * at the front of the event list. |
598 | 0 | */ |
599 | 0 | if ( deadline_replq_insert(svc, &svc->replq_elem, replq) ) |
600 | 0 | set_timer(prv->repl_timer, svc->cur_deadline); |
601 | 0 | } |
602 | | |
603 | | /* |
604 | | * Removes and re-inserts an event to the replenishment queue. |
605 | | * The aim is to update its position inside the queue, as its |
606 | | * deadline (and hence its replenishment time) could have |
607 | | * changed. |
608 | | */ |
609 | | static void |
610 | | replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc) |
611 | 0 | { |
612 | 0 | struct list_head *replq = rt_replq(ops); |
613 | 0 | struct rt_vcpu *rearm_svc = svc; |
614 | 0 | bool_t rearm = 0; |
615 | 0 |
|
616 | 0 | ASSERT( vcpu_on_replq(svc) ); |
617 | 0 |
|
618 | 0 | /* |
619 | 0 | * If svc was at the front of the replenishment queue, we certainly |
620 | 0 | * need to re-program the timer, and we want to use the deadline of |
621 | 0 | * the vcpu which is now at the front of the queue (which may still |
622 | 0 | * be svc or not). |
623 | 0 | * |
624 | 0 | * We may also need to re-program, if svc has been put at the front |
625 | 0 | * of the replenishment queue when being re-inserted. |
626 | 0 | */ |
627 | 0 | if ( deadline_queue_remove(replq, &svc->replq_elem) ) |
628 | 0 | { |
629 | 0 | deadline_replq_insert(svc, &svc->replq_elem, replq); |
630 | 0 | rearm_svc = replq_elem(replq->next); |
631 | 0 | rearm = 1; |
632 | 0 | } |
633 | 0 | else |
634 | 0 | rearm = deadline_replq_insert(svc, &svc->replq_elem, replq); |
635 | 0 |
|
636 | 0 | if ( rearm ) |
637 | 0 | set_timer(rt_priv(ops)->repl_timer, rearm_svc->cur_deadline); |
638 | 0 | } |
639 | | |
640 | | /* |
641 | | * Pick a valid CPU for the vcpu vc |
642 | | * Valid CPU of a vcpu is intesection of vcpu's affinity |
643 | | * and available cpus |
644 | | */ |
645 | | static int |
646 | | rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc) |
647 | 0 | { |
648 | 0 | cpumask_t cpus; |
649 | 0 | cpumask_t *online; |
650 | 0 | int cpu; |
651 | 0 |
|
652 | 0 | online = cpupool_domain_cpumask(vc->domain); |
653 | 0 | cpumask_and(&cpus, online, vc->cpu_hard_affinity); |
654 | 0 |
|
655 | 0 | cpu = cpumask_test_cpu(vc->processor, &cpus) |
656 | 0 | ? vc->processor |
657 | 0 | : cpumask_cycle(vc->processor, &cpus); |
658 | 0 | ASSERT( !cpumask_empty(&cpus) && cpumask_test_cpu(cpu, &cpus) ); |
659 | 0 |
|
660 | 0 | return cpu; |
661 | 0 | } |
662 | | |
663 | | /* |
664 | | * Init/Free related code |
665 | | */ |
666 | | static int |
667 | | rt_init(struct scheduler *ops) |
668 | 0 | { |
669 | 0 | int rc = -ENOMEM; |
670 | 0 | struct rt_private *prv = xzalloc(struct rt_private); |
671 | 0 |
|
672 | 0 | printk("Initializing RTDS scheduler\n" |
673 | 0 | "WARNING: This is experimental software in development.\n" |
674 | 0 | "Use at your own risk.\n"); |
675 | 0 |
|
676 | 0 | if ( prv == NULL ) |
677 | 0 | goto err; |
678 | 0 |
|
679 | 0 | prv->repl_timer = xzalloc(struct timer); |
680 | 0 | if ( prv->repl_timer == NULL ) |
681 | 0 | goto err; |
682 | 0 |
|
683 | 0 | spin_lock_init(&prv->lock); |
684 | 0 | INIT_LIST_HEAD(&prv->sdom); |
685 | 0 | INIT_LIST_HEAD(&prv->runq); |
686 | 0 | INIT_LIST_HEAD(&prv->depletedq); |
687 | 0 | INIT_LIST_HEAD(&prv->replq); |
688 | 0 |
|
689 | 0 | cpumask_clear(&prv->tickled); |
690 | 0 |
|
691 | 0 | ops->sched_data = prv; |
692 | 0 | rc = 0; |
693 | 0 |
|
694 | 0 | err: |
695 | 0 | if ( rc && prv ) |
696 | 0 | { |
697 | 0 | xfree(prv->repl_timer); |
698 | 0 | xfree(prv); |
699 | 0 | } |
700 | 0 |
|
701 | 0 | return rc; |
702 | 0 | } |
703 | | |
704 | | static void |
705 | | rt_deinit(struct scheduler *ops) |
706 | 0 | { |
707 | 0 | struct rt_private *prv = rt_priv(ops); |
708 | 0 |
|
709 | 0 | ASSERT(prv->repl_timer->status == TIMER_STATUS_invalid || |
710 | 0 | prv->repl_timer->status == TIMER_STATUS_killed); |
711 | 0 | xfree(prv->repl_timer); |
712 | 0 |
|
713 | 0 | ops->sched_data = NULL; |
714 | 0 | xfree(prv); |
715 | 0 | } |
716 | | |
717 | | /* |
718 | | * Point per_cpu spinlock to the global system lock; |
719 | | * All cpu have same global system lock |
720 | | */ |
721 | | static void |
722 | | rt_init_pdata(const struct scheduler *ops, void *pdata, int cpu) |
723 | 0 | { |
724 | 0 | struct rt_private *prv = rt_priv(ops); |
725 | 0 | spinlock_t *old_lock; |
726 | 0 | unsigned long flags; |
727 | 0 |
|
728 | 0 | old_lock = pcpu_schedule_lock_irqsave(cpu, &flags); |
729 | 0 |
|
730 | 0 | /* |
731 | 0 | * TIMER_STATUS_invalid means we are the first cpu that sees the timer |
732 | 0 | * allocated but not initialized, and so it's up to us to initialize it. |
733 | 0 | */ |
734 | 0 | if ( prv->repl_timer->status == TIMER_STATUS_invalid ) |
735 | 0 | { |
736 | 0 | init_timer(prv->repl_timer, repl_timer_handler, (void*) ops, cpu); |
737 | 0 | dprintk(XENLOG_DEBUG, "RTDS: timer initialized on cpu %u\n", cpu); |
738 | 0 | } |
739 | 0 |
|
740 | 0 | /* Move the scheduler lock to our global runqueue lock. */ |
741 | 0 | per_cpu(schedule_data, cpu).schedule_lock = &prv->lock; |
742 | 0 |
|
743 | 0 | /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */ |
744 | 0 | spin_unlock_irqrestore(old_lock, flags); |
745 | 0 | } |
746 | | |
747 | | /* Change the scheduler of cpu to us (RTDS). */ |
748 | | static void |
749 | | rt_switch_sched(struct scheduler *new_ops, unsigned int cpu, |
750 | | void *pdata, void *vdata) |
751 | 0 | { |
752 | 0 | struct rt_private *prv = rt_priv(new_ops); |
753 | 0 | struct rt_vcpu *svc = vdata; |
754 | 0 |
|
755 | 0 | ASSERT(!pdata && svc && is_idle_vcpu(svc->vcpu)); |
756 | 0 |
|
757 | 0 | /* |
758 | 0 | * We are holding the runqueue lock already (it's been taken in |
759 | 0 | * schedule_cpu_switch()). It's actually the runqueue lock of |
760 | 0 | * another scheduler, but that is how things need to be, for |
761 | 0 | * preventing races. |
762 | 0 | */ |
763 | 0 | ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->lock); |
764 | 0 |
|
765 | 0 | /* |
766 | 0 | * If we are the absolute first cpu being switched toward this |
767 | 0 | * scheduler (in which case we'll see TIMER_STATUS_invalid), or the |
768 | 0 | * first one that is added back to the cpupool that had all its cpus |
769 | 0 | * removed (in which case we'll see TIMER_STATUS_killed), it's our |
770 | 0 | * job to (re)initialize the timer. |
771 | 0 | */ |
772 | 0 | if ( prv->repl_timer->status == TIMER_STATUS_invalid || |
773 | 0 | prv->repl_timer->status == TIMER_STATUS_killed ) |
774 | 0 | { |
775 | 0 | init_timer(prv->repl_timer, repl_timer_handler, (void*) new_ops, cpu); |
776 | 0 | dprintk(XENLOG_DEBUG, "RTDS: timer initialized on cpu %u\n", cpu); |
777 | 0 | } |
778 | 0 |
|
779 | 0 | idle_vcpu[cpu]->sched_priv = vdata; |
780 | 0 | per_cpu(scheduler, cpu) = new_ops; |
781 | 0 | per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */ |
782 | 0 |
|
783 | 0 | /* |
784 | 0 | * (Re?)route the lock to the per pCPU lock as /last/ thing. In fact, |
785 | 0 | * if it is free (and it can be) we want that anyone that manages |
786 | 0 | * taking it, find all the initializations we've done above in place. |
787 | 0 | */ |
788 | 0 | smp_mb(); |
789 | 0 | per_cpu(schedule_data, cpu).schedule_lock = &prv->lock; |
790 | 0 | } |
791 | | |
792 | | static void |
793 | | rt_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) |
794 | 0 | { |
795 | 0 | unsigned long flags; |
796 | 0 | struct rt_private *prv = rt_priv(ops); |
797 | 0 |
|
798 | 0 | spin_lock_irqsave(&prv->lock, flags); |
799 | 0 |
|
800 | 0 | if ( prv->repl_timer->cpu == cpu ) |
801 | 0 | { |
802 | 0 | struct cpupool *c = per_cpu(cpupool, cpu); |
803 | 0 | unsigned int new_cpu = cpumask_cycle(cpu, cpupool_online_cpumask(c)); |
804 | 0 |
|
805 | 0 | /* |
806 | 0 | * Make sure the timer run on one of the cpus that are still available |
807 | 0 | * to this scheduler. If there aren't any left, it means it's the time |
808 | 0 | * to just kill it. |
809 | 0 | */ |
810 | 0 | if ( new_cpu >= nr_cpu_ids ) |
811 | 0 | { |
812 | 0 | kill_timer(prv->repl_timer); |
813 | 0 | dprintk(XENLOG_DEBUG, "RTDS: timer killed on cpu %d\n", cpu); |
814 | 0 | } |
815 | 0 | else |
816 | 0 | { |
817 | 0 | migrate_timer(prv->repl_timer, new_cpu); |
818 | 0 | } |
819 | 0 | } |
820 | 0 |
|
821 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
822 | 0 | } |
823 | | |
824 | | static void * |
825 | | rt_alloc_domdata(const struct scheduler *ops, struct domain *dom) |
826 | 0 | { |
827 | 0 | unsigned long flags; |
828 | 0 | struct rt_dom *sdom; |
829 | 0 | struct rt_private * prv = rt_priv(ops); |
830 | 0 |
|
831 | 0 | sdom = xzalloc(struct rt_dom); |
832 | 0 | if ( sdom == NULL ) |
833 | 0 | return NULL; |
834 | 0 |
|
835 | 0 | INIT_LIST_HEAD(&sdom->sdom_elem); |
836 | 0 | sdom->dom = dom; |
837 | 0 |
|
838 | 0 | /* spinlock here to insert the dom */ |
839 | 0 | spin_lock_irqsave(&prv->lock, flags); |
840 | 0 | list_add_tail(&sdom->sdom_elem, &(prv->sdom)); |
841 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
842 | 0 |
|
843 | 0 | return sdom; |
844 | 0 | } |
845 | | |
846 | | static void |
847 | | rt_free_domdata(const struct scheduler *ops, void *data) |
848 | 0 | { |
849 | 0 | unsigned long flags; |
850 | 0 | struct rt_dom *sdom = data; |
851 | 0 | struct rt_private *prv = rt_priv(ops); |
852 | 0 |
|
853 | 0 | spin_lock_irqsave(&prv->lock, flags); |
854 | 0 | list_del_init(&sdom->sdom_elem); |
855 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
856 | 0 | xfree(data); |
857 | 0 | } |
858 | | |
859 | | static int |
860 | | rt_dom_init(const struct scheduler *ops, struct domain *dom) |
861 | 0 | { |
862 | 0 | struct rt_dom *sdom; |
863 | 0 |
|
864 | 0 | /* IDLE Domain does not link on rt_private */ |
865 | 0 | if ( is_idle_domain(dom) ) |
866 | 0 | return 0; |
867 | 0 |
|
868 | 0 | sdom = rt_alloc_domdata(ops, dom); |
869 | 0 | if ( sdom == NULL ) |
870 | 0 | return -ENOMEM; |
871 | 0 |
|
872 | 0 | dom->sched_priv = sdom; |
873 | 0 |
|
874 | 0 | return 0; |
875 | 0 | } |
876 | | |
877 | | static void |
878 | | rt_dom_destroy(const struct scheduler *ops, struct domain *dom) |
879 | 0 | { |
880 | 0 | rt_free_domdata(ops, rt_dom(dom)); |
881 | 0 | } |
882 | | |
883 | | static void * |
884 | | rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) |
885 | 0 | { |
886 | 0 | struct rt_vcpu *svc; |
887 | 0 |
|
888 | 0 | /* Allocate per-VCPU info */ |
889 | 0 | svc = xzalloc(struct rt_vcpu); |
890 | 0 | if ( svc == NULL ) |
891 | 0 | return NULL; |
892 | 0 |
|
893 | 0 | INIT_LIST_HEAD(&svc->q_elem); |
894 | 0 | INIT_LIST_HEAD(&svc->replq_elem); |
895 | 0 | svc->flags = 0U; |
896 | 0 | svc->sdom = dd; |
897 | 0 | svc->vcpu = vc; |
898 | 0 | svc->last_start = 0; |
899 | 0 |
|
900 | 0 | __set_bit(__RTDS_extratime, &svc->flags); |
901 | 0 | svc->priority_level = 0; |
902 | 0 | svc->period = RTDS_DEFAULT_PERIOD; |
903 | 0 | if ( !is_idle_vcpu(vc) ) |
904 | 0 | svc->budget = RTDS_DEFAULT_BUDGET; |
905 | 0 |
|
906 | 0 | SCHED_STAT_CRANK(vcpu_alloc); |
907 | 0 |
|
908 | 0 | return svc; |
909 | 0 | } |
910 | | |
911 | | static void |
912 | | rt_free_vdata(const struct scheduler *ops, void *priv) |
913 | 0 | { |
914 | 0 | struct rt_vcpu *svc = priv; |
915 | 0 |
|
916 | 0 | xfree(svc); |
917 | 0 | } |
918 | | |
919 | | /* |
920 | | * It is called in sched_move_domain() and sched_init_vcpu |
921 | | * in schedule.c. |
922 | | * When move a domain to a new cpupool. |
923 | | * It inserts vcpus of moving domain to the scheduler's RunQ in |
924 | | * dest. cpupool. |
925 | | */ |
926 | | static void |
927 | | rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) |
928 | 0 | { |
929 | 0 | struct rt_vcpu *svc = rt_vcpu(vc); |
930 | 0 | s_time_t now; |
931 | 0 | spinlock_t *lock; |
932 | 0 |
|
933 | 0 | BUG_ON( is_idle_vcpu(vc) ); |
934 | 0 |
|
935 | 0 | /* This is safe because vc isn't yet being scheduled */ |
936 | 0 | vc->processor = rt_cpu_pick(ops, vc); |
937 | 0 |
|
938 | 0 | lock = vcpu_schedule_lock_irq(vc); |
939 | 0 |
|
940 | 0 | now = NOW(); |
941 | 0 | if ( now >= svc->cur_deadline ) |
942 | 0 | rt_update_deadline(now, svc); |
943 | 0 |
|
944 | 0 | if ( !vcpu_on_q(svc) && vcpu_runnable(vc) ) |
945 | 0 | { |
946 | 0 | replq_insert(ops, svc); |
947 | 0 |
|
948 | 0 | if ( !vc->is_running ) |
949 | 0 | runq_insert(ops, svc); |
950 | 0 | } |
951 | 0 | vcpu_schedule_unlock_irq(lock, vc); |
952 | 0 |
|
953 | 0 | SCHED_STAT_CRANK(vcpu_insert); |
954 | 0 | } |
955 | | |
956 | | /* |
957 | | * Remove rt_vcpu svc from the old scheduler in source cpupool. |
958 | | */ |
959 | | static void |
960 | | rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) |
961 | 0 | { |
962 | 0 | struct rt_vcpu * const svc = rt_vcpu(vc); |
963 | 0 | struct rt_dom * const sdom = svc->sdom; |
964 | 0 | spinlock_t *lock; |
965 | 0 |
|
966 | 0 | SCHED_STAT_CRANK(vcpu_remove); |
967 | 0 |
|
968 | 0 | BUG_ON( sdom == NULL ); |
969 | 0 |
|
970 | 0 | lock = vcpu_schedule_lock_irq(vc); |
971 | 0 | if ( vcpu_on_q(svc) ) |
972 | 0 | q_remove(svc); |
973 | 0 |
|
974 | 0 | if ( vcpu_on_replq(svc) ) |
975 | 0 | replq_remove(ops,svc); |
976 | 0 |
|
977 | 0 | vcpu_schedule_unlock_irq(lock, vc); |
978 | 0 | } |
979 | | |
980 | | /* |
981 | | * Burn budget in nanosecond granularity |
982 | | */ |
983 | | static void |
984 | | burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now) |
985 | 0 | { |
986 | 0 | s_time_t delta; |
987 | 0 |
|
988 | 0 | /* don't burn budget for idle VCPU */ |
989 | 0 | if ( is_idle_vcpu(svc->vcpu) ) |
990 | 0 | return; |
991 | 0 |
|
992 | 0 | /* burn at nanoseconds level */ |
993 | 0 | delta = now - svc->last_start; |
994 | 0 | /* |
995 | 0 | * delta < 0 only happens in nested virtualization; |
996 | 0 | * TODO: how should we handle delta < 0 in a better way? |
997 | 0 | */ |
998 | 0 | if ( delta < 0 ) |
999 | 0 | { |
1000 | 0 | printk("%s, ATTENTION: now is behind last_start! delta=%"PRI_stime"\n", |
1001 | 0 | __func__, delta); |
1002 | 0 | svc->last_start = now; |
1003 | 0 | return; |
1004 | 0 | } |
1005 | 0 |
|
1006 | 0 | svc->cur_budget -= delta; |
1007 | 0 | svc->last_start = now; |
1008 | 0 |
|
1009 | 0 | if ( svc->cur_budget <= 0 ) |
1010 | 0 | { |
1011 | 0 | if ( has_extratime(svc) ) |
1012 | 0 | { |
1013 | 0 | svc->priority_level++; |
1014 | 0 | svc->cur_budget = svc->budget; |
1015 | 0 | } |
1016 | 0 | else |
1017 | 0 | { |
1018 | 0 | svc->cur_budget = 0; |
1019 | 0 | __set_bit(__RTDS_depleted, &svc->flags); |
1020 | 0 | } |
1021 | 0 | } |
1022 | 0 |
|
1023 | 0 | /* TRACE */ |
1024 | 0 | { |
1025 | 0 | struct __packed { |
1026 | 0 | unsigned vcpu:16, dom:16; |
1027 | 0 | uint64_t cur_budget; |
1028 | 0 | int delta; |
1029 | 0 | unsigned priority_level; |
1030 | 0 | bool has_extratime; |
1031 | 0 | } d; |
1032 | 0 | d.dom = svc->vcpu->domain->domain_id; |
1033 | 0 | d.vcpu = svc->vcpu->vcpu_id; |
1034 | 0 | d.cur_budget = (uint64_t) svc->cur_budget; |
1035 | 0 | d.delta = delta; |
1036 | 0 | d.priority_level = svc->priority_level; |
1037 | 0 | d.has_extratime = svc->flags & RTDS_extratime; |
1038 | 0 | trace_var(TRC_RTDS_BUDGET_BURN, 1, |
1039 | 0 | sizeof(d), |
1040 | 0 | (unsigned char *) &d); |
1041 | 0 | } |
1042 | 0 | } |
1043 | | |
1044 | | /* |
1045 | | * RunQ is sorted. Pick first one within cpumask. If no one, return NULL |
1046 | | * lock is grabbed before calling this function |
1047 | | */ |
1048 | | static struct rt_vcpu * |
1049 | | runq_pick(const struct scheduler *ops, const cpumask_t *mask) |
1050 | 0 | { |
1051 | 0 | struct list_head *runq = rt_runq(ops); |
1052 | 0 | struct list_head *iter; |
1053 | 0 | struct rt_vcpu *svc = NULL; |
1054 | 0 | struct rt_vcpu *iter_svc = NULL; |
1055 | 0 | cpumask_t cpu_common; |
1056 | 0 | cpumask_t *online; |
1057 | 0 |
|
1058 | 0 | list_for_each ( iter, runq ) |
1059 | 0 | { |
1060 | 0 | iter_svc = q_elem(iter); |
1061 | 0 |
|
1062 | 0 | /* mask cpu_hard_affinity & cpupool & mask */ |
1063 | 0 | online = cpupool_domain_cpumask(iter_svc->vcpu->domain); |
1064 | 0 | cpumask_and(&cpu_common, online, iter_svc->vcpu->cpu_hard_affinity); |
1065 | 0 | cpumask_and(&cpu_common, mask, &cpu_common); |
1066 | 0 | if ( cpumask_empty(&cpu_common) ) |
1067 | 0 | continue; |
1068 | 0 |
|
1069 | 0 | ASSERT( iter_svc->cur_budget > 0 ); |
1070 | 0 |
|
1071 | 0 | svc = iter_svc; |
1072 | 0 | break; |
1073 | 0 | } |
1074 | 0 |
|
1075 | 0 | /* TRACE */ |
1076 | 0 | { |
1077 | 0 | if( svc != NULL ) |
1078 | 0 | { |
1079 | 0 | struct __packed { |
1080 | 0 | unsigned vcpu:16, dom:16; |
1081 | 0 | uint64_t cur_deadline, cur_budget; |
1082 | 0 | } d; |
1083 | 0 | d.dom = svc->vcpu->domain->domain_id; |
1084 | 0 | d.vcpu = svc->vcpu->vcpu_id; |
1085 | 0 | d.cur_deadline = (uint64_t) svc->cur_deadline; |
1086 | 0 | d.cur_budget = (uint64_t) svc->cur_budget; |
1087 | 0 | trace_var(TRC_RTDS_RUNQ_PICK, 1, |
1088 | 0 | sizeof(d), |
1089 | 0 | (unsigned char *) &d); |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 |
|
1093 | 0 | return svc; |
1094 | 0 | } |
1095 | | |
1096 | | /* |
1097 | | * schedule function for rt scheduler. |
1098 | | * The lock is already grabbed in schedule.c, no need to lock here |
1099 | | */ |
1100 | | static struct task_slice |
1101 | | rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled) |
1102 | 0 | { |
1103 | 0 | const int cpu = smp_processor_id(); |
1104 | 0 | struct rt_private *prv = rt_priv(ops); |
1105 | 0 | struct rt_vcpu *const scurr = rt_vcpu(current); |
1106 | 0 | struct rt_vcpu *snext = NULL; |
1107 | 0 | struct task_slice ret = { .migrated = 0 }; |
1108 | 0 |
|
1109 | 0 | /* TRACE */ |
1110 | 0 | { |
1111 | 0 | struct __packed { |
1112 | 0 | unsigned cpu:16, tasklet:8, tickled:4, idle:4; |
1113 | 0 | } d; |
1114 | 0 | d.cpu = cpu; |
1115 | 0 | d.tasklet = tasklet_work_scheduled; |
1116 | 0 | d.tickled = cpumask_test_cpu(cpu, &prv->tickled); |
1117 | 0 | d.idle = is_idle_vcpu(current); |
1118 | 0 | trace_var(TRC_RTDS_SCHEDULE, 1, |
1119 | 0 | sizeof(d), |
1120 | 0 | (unsigned char *)&d); |
1121 | 0 | } |
1122 | 0 |
|
1123 | 0 | /* clear ticked bit now that we've been scheduled */ |
1124 | 0 | cpumask_clear_cpu(cpu, &prv->tickled); |
1125 | 0 |
|
1126 | 0 | /* burn_budget would return for IDLE VCPU */ |
1127 | 0 | burn_budget(ops, scurr, now); |
1128 | 0 |
|
1129 | 0 | if ( tasklet_work_scheduled ) |
1130 | 0 | { |
1131 | 0 | trace_var(TRC_RTDS_SCHED_TASKLET, 1, 0, NULL); |
1132 | 0 | snext = rt_vcpu(idle_vcpu[cpu]); |
1133 | 0 | } |
1134 | 0 | else |
1135 | 0 | { |
1136 | 0 | snext = runq_pick(ops, cpumask_of(cpu)); |
1137 | 0 | if ( snext == NULL ) |
1138 | 0 | snext = rt_vcpu(idle_vcpu[cpu]); |
1139 | 0 |
|
1140 | 0 | /* if scurr has higher priority and budget, still pick scurr */ |
1141 | 0 | if ( !is_idle_vcpu(current) && |
1142 | 0 | vcpu_runnable(current) && |
1143 | 0 | scurr->cur_budget > 0 && |
1144 | 0 | ( is_idle_vcpu(snext->vcpu) || |
1145 | 0 | compare_vcpu_priority(scurr, snext) > 0 ) ) |
1146 | 0 | snext = scurr; |
1147 | 0 | } |
1148 | 0 |
|
1149 | 0 | if ( snext != scurr && |
1150 | 0 | !is_idle_vcpu(current) && |
1151 | 0 | vcpu_runnable(current) ) |
1152 | 0 | __set_bit(__RTDS_delayed_runq_add, &scurr->flags); |
1153 | 0 |
|
1154 | 0 | snext->last_start = now; |
1155 | 0 | ret.time = -1; /* if an idle vcpu is picked */ |
1156 | 0 | if ( !is_idle_vcpu(snext->vcpu) ) |
1157 | 0 | { |
1158 | 0 | if ( snext != scurr ) |
1159 | 0 | { |
1160 | 0 | q_remove(snext); |
1161 | 0 | __set_bit(__RTDS_scheduled, &snext->flags); |
1162 | 0 | } |
1163 | 0 | if ( snext->vcpu->processor != cpu ) |
1164 | 0 | { |
1165 | 0 | snext->vcpu->processor = cpu; |
1166 | 0 | ret.migrated = 1; |
1167 | 0 | } |
1168 | 0 | ret.time = snext->cur_budget; /* invoke the scheduler next time */ |
1169 | 0 | } |
1170 | 0 | ret.task = snext->vcpu; |
1171 | 0 |
|
1172 | 0 | return ret; |
1173 | 0 | } |
1174 | | |
1175 | | /* |
1176 | | * Remove VCPU from RunQ |
1177 | | * The lock is already grabbed in schedule.c, no need to lock here |
1178 | | */ |
1179 | | static void |
1180 | | rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) |
1181 | 0 | { |
1182 | 0 | struct rt_vcpu * const svc = rt_vcpu(vc); |
1183 | 0 |
|
1184 | 0 | BUG_ON( is_idle_vcpu(vc) ); |
1185 | 0 | SCHED_STAT_CRANK(vcpu_sleep); |
1186 | 0 |
|
1187 | 0 | if ( curr_on_cpu(vc->processor) == vc ) |
1188 | 0 | cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); |
1189 | 0 | else if ( vcpu_on_q(svc) ) |
1190 | 0 | { |
1191 | 0 | q_remove(svc); |
1192 | 0 | replq_remove(ops, svc); |
1193 | 0 | } |
1194 | 0 | else if ( svc->flags & RTDS_delayed_runq_add ) |
1195 | 0 | __clear_bit(__RTDS_delayed_runq_add, &svc->flags); |
1196 | 0 | } |
1197 | | |
1198 | | /* |
1199 | | * Pick a cpu where to run a vcpu, |
1200 | | * possibly kicking out the vcpu running there |
1201 | | * Called by wake() and context_saved() |
1202 | | * We have a running candidate here, the kick logic is: |
1203 | | * Among all the cpus that are within the cpu affinity |
1204 | | * 1) if there are any idle CPUs, kick one. |
1205 | | For cache benefit, we check new->cpu as first |
1206 | | * 2) now all pcpus are busy; |
1207 | | * among all the running vcpus, pick lowest priority one |
1208 | | * if snext has higher priority, kick it. |
1209 | | * |
1210 | | * TODO: |
1211 | | * 1) what if these two vcpus belongs to the same domain? |
1212 | | * replace a vcpu belonging to the same domain introduces more overhead |
1213 | | * |
1214 | | * lock is grabbed before calling this function |
1215 | | */ |
1216 | | static void |
1217 | | runq_tickle(const struct scheduler *ops, struct rt_vcpu *new) |
1218 | 0 | { |
1219 | 0 | struct rt_private *prv = rt_priv(ops); |
1220 | 0 | struct rt_vcpu *latest_deadline_vcpu = NULL; /* lowest priority */ |
1221 | 0 | struct rt_vcpu *iter_svc; |
1222 | 0 | struct vcpu *iter_vc; |
1223 | 0 | int cpu = 0, cpu_to_tickle = 0; |
1224 | 0 | cpumask_t not_tickled; |
1225 | 0 | cpumask_t *online; |
1226 | 0 |
|
1227 | 0 | if ( new == NULL || is_idle_vcpu(new->vcpu) ) |
1228 | 0 | return; |
1229 | 0 |
|
1230 | 0 | online = cpupool_domain_cpumask(new->vcpu->domain); |
1231 | 0 | cpumask_and(¬_tickled, online, new->vcpu->cpu_hard_affinity); |
1232 | 0 | cpumask_andnot(¬_tickled, ¬_tickled, &prv->tickled); |
1233 | 0 |
|
1234 | 0 | /* |
1235 | 0 | * 1) If there are any idle CPUs, kick one. |
1236 | 0 | * For cache benefit,we first search new->cpu. |
1237 | 0 | * The same loop also find the one with lowest priority. |
1238 | 0 | */ |
1239 | 0 | cpu = cpumask_test_or_cycle(new->vcpu->processor, ¬_tickled); |
1240 | 0 | while ( cpu!= nr_cpu_ids ) |
1241 | 0 | { |
1242 | 0 | iter_vc = curr_on_cpu(cpu); |
1243 | 0 | if ( is_idle_vcpu(iter_vc) ) |
1244 | 0 | { |
1245 | 0 | SCHED_STAT_CRANK(tickled_idle_cpu); |
1246 | 0 | cpu_to_tickle = cpu; |
1247 | 0 | goto out; |
1248 | 0 | } |
1249 | 0 | iter_svc = rt_vcpu(iter_vc); |
1250 | 0 | if ( latest_deadline_vcpu == NULL || |
1251 | 0 | compare_vcpu_priority(iter_svc, latest_deadline_vcpu) < 0 ) |
1252 | 0 | latest_deadline_vcpu = iter_svc; |
1253 | 0 |
|
1254 | 0 | cpumask_clear_cpu(cpu, ¬_tickled); |
1255 | 0 | cpu = cpumask_cycle(cpu, ¬_tickled); |
1256 | 0 | } |
1257 | 0 |
|
1258 | 0 | /* 2) candicate has higher priority, kick out lowest priority vcpu */ |
1259 | 0 | if ( latest_deadline_vcpu != NULL && |
1260 | 0 | compare_vcpu_priority(latest_deadline_vcpu, new) < 0 ) |
1261 | 0 | { |
1262 | 0 | SCHED_STAT_CRANK(tickled_busy_cpu); |
1263 | 0 | cpu_to_tickle = latest_deadline_vcpu->vcpu->processor; |
1264 | 0 | goto out; |
1265 | 0 | } |
1266 | 0 |
|
1267 | 0 | /* didn't tickle any cpu */ |
1268 | 0 | SCHED_STAT_CRANK(tickled_no_cpu); |
1269 | 0 | return; |
1270 | 0 | out: |
1271 | 0 | /* TRACE */ |
1272 | 0 | { |
1273 | 0 | struct { |
1274 | 0 | unsigned cpu:16, pad:16; |
1275 | 0 | } d; |
1276 | 0 | d.cpu = cpu_to_tickle; |
1277 | 0 | d.pad = 0; |
1278 | 0 | trace_var(TRC_RTDS_TICKLE, 1, |
1279 | 0 | sizeof(d), |
1280 | 0 | (unsigned char *)&d); |
1281 | 0 | } |
1282 | 0 |
|
1283 | 0 | cpumask_set_cpu(cpu_to_tickle, &prv->tickled); |
1284 | 0 | cpu_raise_softirq(cpu_to_tickle, SCHEDULE_SOFTIRQ); |
1285 | 0 | return; |
1286 | 0 | } |
1287 | | |
1288 | | /* |
1289 | | * Should always wake up runnable vcpu, put it back to RunQ. |
1290 | | * Check priority to raise interrupt |
1291 | | * The lock is already grabbed in schedule.c, no need to lock here |
1292 | | * TODO: what if these two vcpus belongs to the same domain? |
1293 | | */ |
1294 | | static void |
1295 | | rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) |
1296 | 0 | { |
1297 | 0 | struct rt_vcpu * const svc = rt_vcpu(vc); |
1298 | 0 | s_time_t now; |
1299 | 0 | bool_t missed; |
1300 | 0 |
|
1301 | 0 | BUG_ON( is_idle_vcpu(vc) ); |
1302 | 0 |
|
1303 | 0 | if ( unlikely(curr_on_cpu(vc->processor) == vc) ) |
1304 | 0 | { |
1305 | 0 | SCHED_STAT_CRANK(vcpu_wake_running); |
1306 | 0 | return; |
1307 | 0 | } |
1308 | 0 |
|
1309 | 0 | /* on RunQ/DepletedQ, just update info is ok */ |
1310 | 0 | if ( unlikely(vcpu_on_q(svc)) ) |
1311 | 0 | { |
1312 | 0 | SCHED_STAT_CRANK(vcpu_wake_onrunq); |
1313 | 0 | return; |
1314 | 0 | } |
1315 | 0 |
|
1316 | 0 | if ( likely(vcpu_runnable(vc)) ) |
1317 | 0 | SCHED_STAT_CRANK(vcpu_wake_runnable); |
1318 | 0 | else |
1319 | 0 | SCHED_STAT_CRANK(vcpu_wake_not_runnable); |
1320 | 0 |
|
1321 | 0 | /* |
1322 | 0 | * If a deadline passed while svc was asleep/blocked, we need new |
1323 | 0 | * scheduling parameters (a new deadline and full budget). |
1324 | 0 | */ |
1325 | 0 | now = NOW(); |
1326 | 0 |
|
1327 | 0 | missed = ( now >= svc->cur_deadline ); |
1328 | 0 | if ( missed ) |
1329 | 0 | rt_update_deadline(now, svc); |
1330 | 0 |
|
1331 | 0 | /* |
1332 | 0 | * If context hasn't been saved for this vcpu yet, we can't put it on |
1333 | 0 | * the run-queue/depleted-queue. Instead, we set the appropriate flag, |
1334 | 0 | * the vcpu will be put back on queue after the context has been saved |
1335 | 0 | * (in rt_context_save()). |
1336 | 0 | */ |
1337 | 0 | if ( unlikely(svc->flags & RTDS_scheduled) ) |
1338 | 0 | { |
1339 | 0 | __set_bit(__RTDS_delayed_runq_add, &svc->flags); |
1340 | 0 | /* |
1341 | 0 | * The vcpu is waking up already, and we didn't even had the time to |
1342 | 0 | * remove its next replenishment event from the replenishment queue |
1343 | 0 | * when it blocked! No big deal. If we did not miss the deadline in |
1344 | 0 | * the meantime, let's just leave it there. If we did, let's remove it |
1345 | 0 | * and queue a new one (to occur at our new deadline). |
1346 | 0 | */ |
1347 | 0 | if ( missed ) |
1348 | 0 | replq_reinsert(ops, svc); |
1349 | 0 | return; |
1350 | 0 | } |
1351 | 0 |
|
1352 | 0 | /* Replenishment event got cancelled when we blocked. Add it back. */ |
1353 | 0 | replq_insert(ops, svc); |
1354 | 0 | /* insert svc to runq/depletedq because svc is not in queue now */ |
1355 | 0 | runq_insert(ops, svc); |
1356 | 0 |
|
1357 | 0 | runq_tickle(ops, svc); |
1358 | 0 | } |
1359 | | |
1360 | | /* |
1361 | | * scurr has finished context switch, insert it back to the RunQ, |
1362 | | * and then pick the highest priority vcpu from runq to run |
1363 | | */ |
1364 | | static void |
1365 | | rt_context_saved(const struct scheduler *ops, struct vcpu *vc) |
1366 | 0 | { |
1367 | 0 | struct rt_vcpu *svc = rt_vcpu(vc); |
1368 | 0 | spinlock_t *lock = vcpu_schedule_lock_irq(vc); |
1369 | 0 |
|
1370 | 0 | __clear_bit(__RTDS_scheduled, &svc->flags); |
1371 | 0 | /* not insert idle vcpu to runq */ |
1372 | 0 | if ( is_idle_vcpu(vc) ) |
1373 | 0 | goto out; |
1374 | 0 |
|
1375 | 0 | if ( __test_and_clear_bit(__RTDS_delayed_runq_add, &svc->flags) && |
1376 | 0 | likely(vcpu_runnable(vc)) ) |
1377 | 0 | { |
1378 | 0 | runq_insert(ops, svc); |
1379 | 0 | runq_tickle(ops, svc); |
1380 | 0 | } |
1381 | 0 | else |
1382 | 0 | replq_remove(ops, svc); |
1383 | 0 |
|
1384 | 0 | out: |
1385 | 0 | vcpu_schedule_unlock_irq(lock, vc); |
1386 | 0 | } |
1387 | | |
1388 | | /* |
1389 | | * set/get each vcpu info of each domain |
1390 | | */ |
1391 | | static int |
1392 | | rt_dom_cntl( |
1393 | | const struct scheduler *ops, |
1394 | | struct domain *d, |
1395 | | struct xen_domctl_scheduler_op *op) |
1396 | 0 | { |
1397 | 0 | struct rt_private *prv = rt_priv(ops); |
1398 | 0 | struct rt_vcpu *svc; |
1399 | 0 | struct vcpu *v; |
1400 | 0 | unsigned long flags; |
1401 | 0 | int rc = 0; |
1402 | 0 | struct xen_domctl_schedparam_vcpu local_sched; |
1403 | 0 | s_time_t period, budget; |
1404 | 0 | uint32_t index = 0; |
1405 | 0 |
|
1406 | 0 | switch ( op->cmd ) |
1407 | 0 | { |
1408 | 0 | case XEN_DOMCTL_SCHEDOP_getinfo: |
1409 | 0 | /* Return the default parameters. */ |
1410 | 0 | op->u.rtds.period = RTDS_DEFAULT_PERIOD / MICROSECS(1); |
1411 | 0 | op->u.rtds.budget = RTDS_DEFAULT_BUDGET / MICROSECS(1); |
1412 | 0 | break; |
1413 | 0 | case XEN_DOMCTL_SCHEDOP_putinfo: |
1414 | 0 | if ( op->u.rtds.period == 0 || op->u.rtds.budget == 0 ) |
1415 | 0 | { |
1416 | 0 | rc = -EINVAL; |
1417 | 0 | break; |
1418 | 0 | } |
1419 | 0 | spin_lock_irqsave(&prv->lock, flags); |
1420 | 0 | for_each_vcpu ( d, v ) |
1421 | 0 | { |
1422 | 0 | svc = rt_vcpu(v); |
1423 | 0 | svc->period = MICROSECS(op->u.rtds.period); /* transfer to nanosec */ |
1424 | 0 | svc->budget = MICROSECS(op->u.rtds.budget); |
1425 | 0 | } |
1426 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
1427 | 0 | break; |
1428 | 0 | case XEN_DOMCTL_SCHEDOP_getvcpuinfo: |
1429 | 0 | case XEN_DOMCTL_SCHEDOP_putvcpuinfo: |
1430 | 0 | while ( index < op->u.v.nr_vcpus ) |
1431 | 0 | { |
1432 | 0 | if ( copy_from_guest_offset(&local_sched, |
1433 | 0 | op->u.v.vcpus, index, 1) ) |
1434 | 0 | { |
1435 | 0 | rc = -EFAULT; |
1436 | 0 | break; |
1437 | 0 | } |
1438 | 0 | if ( local_sched.vcpuid >= d->max_vcpus || |
1439 | 0 | d->vcpu[local_sched.vcpuid] == NULL ) |
1440 | 0 | { |
1441 | 0 | rc = -EINVAL; |
1442 | 0 | break; |
1443 | 0 | } |
1444 | 0 |
|
1445 | 0 | if ( op->cmd == XEN_DOMCTL_SCHEDOP_getvcpuinfo ) |
1446 | 0 | { |
1447 | 0 | spin_lock_irqsave(&prv->lock, flags); |
1448 | 0 | svc = rt_vcpu(d->vcpu[local_sched.vcpuid]); |
1449 | 0 | local_sched.u.rtds.budget = svc->budget / MICROSECS(1); |
1450 | 0 | local_sched.u.rtds.period = svc->period / MICROSECS(1); |
1451 | 0 | if ( has_extratime(svc) ) |
1452 | 0 | local_sched.u.rtds.flags |= XEN_DOMCTL_SCHEDRT_extra; |
1453 | 0 | else |
1454 | 0 | local_sched.u.rtds.flags &= ~XEN_DOMCTL_SCHEDRT_extra; |
1455 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
1456 | 0 |
|
1457 | 0 | if ( copy_to_guest_offset(op->u.v.vcpus, index, |
1458 | 0 | &local_sched, 1) ) |
1459 | 0 | { |
1460 | 0 | rc = -EFAULT; |
1461 | 0 | break; |
1462 | 0 | } |
1463 | 0 | } |
1464 | 0 | else |
1465 | 0 | { |
1466 | 0 | period = MICROSECS(local_sched.u.rtds.period); |
1467 | 0 | budget = MICROSECS(local_sched.u.rtds.budget); |
1468 | 0 | if ( period > RTDS_MAX_PERIOD || budget < RTDS_MIN_BUDGET || |
1469 | 0 | budget > period || period < RTDS_MIN_PERIOD ) |
1470 | 0 | { |
1471 | 0 | rc = -EINVAL; |
1472 | 0 | break; |
1473 | 0 | } |
1474 | 0 |
|
1475 | 0 | spin_lock_irqsave(&prv->lock, flags); |
1476 | 0 | svc = rt_vcpu(d->vcpu[local_sched.vcpuid]); |
1477 | 0 | svc->period = period; |
1478 | 0 | svc->budget = budget; |
1479 | 0 | if ( local_sched.u.rtds.flags & XEN_DOMCTL_SCHEDRT_extra ) |
1480 | 0 | __set_bit(__RTDS_extratime, &svc->flags); |
1481 | 0 | else |
1482 | 0 | __clear_bit(__RTDS_extratime, &svc->flags); |
1483 | 0 | spin_unlock_irqrestore(&prv->lock, flags); |
1484 | 0 | } |
1485 | 0 | /* Process a most 64 vCPUs without checking for preemptions. */ |
1486 | 0 | if ( (++index > 63) && hypercall_preempt_check() ) |
1487 | 0 | break; |
1488 | 0 | } |
1489 | 0 | if ( !rc ) |
1490 | 0 | /* notify upper caller how many vcpus have been processed. */ |
1491 | 0 | op->u.v.nr_vcpus = index; |
1492 | 0 | break; |
1493 | 0 | } |
1494 | 0 |
|
1495 | 0 | return rc; |
1496 | 0 | } |
1497 | | |
1498 | | /* |
1499 | | * The replenishment timer handler picks vcpus |
1500 | | * from the replq and does the actual replenishment. |
1501 | | */ |
1502 | 0 | static void repl_timer_handler(void *data){ |
1503 | 0 | s_time_t now; |
1504 | 0 | struct scheduler *ops = data; |
1505 | 0 | struct rt_private *prv = rt_priv(ops); |
1506 | 0 | struct list_head *replq = rt_replq(ops); |
1507 | 0 | struct list_head *runq = rt_runq(ops); |
1508 | 0 | struct timer *repl_timer = prv->repl_timer; |
1509 | 0 | struct list_head *iter, *tmp; |
1510 | 0 | struct rt_vcpu *svc; |
1511 | 0 | LIST_HEAD(tmp_replq); |
1512 | 0 |
|
1513 | 0 | spin_lock_irq(&prv->lock); |
1514 | 0 |
|
1515 | 0 | now = NOW(); |
1516 | 0 |
|
1517 | 0 | /* |
1518 | 0 | * Do the replenishment and move replenished vcpus |
1519 | 0 | * to the temporary list to tickle. |
1520 | 0 | * If svc is on run queue, we need to put it at |
1521 | 0 | * the correct place since its deadline changes. |
1522 | 0 | */ |
1523 | 0 | list_for_each_safe ( iter, tmp, replq ) |
1524 | 0 | { |
1525 | 0 | svc = replq_elem(iter); |
1526 | 0 |
|
1527 | 0 | if ( now < svc->cur_deadline ) |
1528 | 0 | break; |
1529 | 0 |
|
1530 | 0 | list_del(&svc->replq_elem); |
1531 | 0 | rt_update_deadline(now, svc); |
1532 | 0 | list_add(&svc->replq_elem, &tmp_replq); |
1533 | 0 |
|
1534 | 0 | if ( vcpu_on_q(svc) ) |
1535 | 0 | { |
1536 | 0 | q_remove(svc); |
1537 | 0 | runq_insert(ops, svc); |
1538 | 0 | } |
1539 | 0 | } |
1540 | 0 |
|
1541 | 0 | /* |
1542 | 0 | * Iterate through the list of updated vcpus. |
1543 | 0 | * If an updated vcpu is running, tickle the head of the |
1544 | 0 | * runqueue if it has a higher priority. |
1545 | 0 | * If an updated vcpu was depleted and on the runqueue, tickle it. |
1546 | 0 | * Finally, reinsert the vcpus back to replenishement events list. |
1547 | 0 | */ |
1548 | 0 | list_for_each_safe ( iter, tmp, &tmp_replq ) |
1549 | 0 | { |
1550 | 0 | svc = replq_elem(iter); |
1551 | 0 |
|
1552 | 0 | if ( curr_on_cpu(svc->vcpu->processor) == svc->vcpu && |
1553 | 0 | !list_empty(runq) ) |
1554 | 0 | { |
1555 | 0 | struct rt_vcpu *next_on_runq = q_elem(runq->next); |
1556 | 0 |
|
1557 | 0 | if ( compare_vcpu_priority(svc, next_on_runq) < 0 ) |
1558 | 0 | runq_tickle(ops, next_on_runq); |
1559 | 0 | } |
1560 | 0 | else if ( __test_and_clear_bit(__RTDS_depleted, &svc->flags) && |
1561 | 0 | vcpu_on_q(svc) ) |
1562 | 0 | runq_tickle(ops, svc); |
1563 | 0 |
|
1564 | 0 | list_del(&svc->replq_elem); |
1565 | 0 | deadline_replq_insert(svc, &svc->replq_elem, replq); |
1566 | 0 | } |
1567 | 0 |
|
1568 | 0 | /* |
1569 | 0 | * If there are vcpus left in the replenishment event list, |
1570 | 0 | * set the next replenishment to happen at the deadline of |
1571 | 0 | * the one in the front. |
1572 | 0 | */ |
1573 | 0 | if ( !list_empty(replq) ) |
1574 | 0 | set_timer(repl_timer, replq_elem(replq->next)->cur_deadline); |
1575 | 0 |
|
1576 | 0 | spin_unlock_irq(&prv->lock); |
1577 | 0 | } |
1578 | | |
1579 | | static const struct scheduler sched_rtds_def = { |
1580 | | .name = "SMP RTDS Scheduler", |
1581 | | .opt_name = "rtds", |
1582 | | .sched_id = XEN_SCHEDULER_RTDS, |
1583 | | .sched_data = NULL, |
1584 | | |
1585 | | .dump_cpu_state = rt_dump_pcpu, |
1586 | | .dump_settings = rt_dump, |
1587 | | .init = rt_init, |
1588 | | .deinit = rt_deinit, |
1589 | | .init_pdata = rt_init_pdata, |
1590 | | .switch_sched = rt_switch_sched, |
1591 | | .deinit_pdata = rt_deinit_pdata, |
1592 | | .alloc_domdata = rt_alloc_domdata, |
1593 | | .free_domdata = rt_free_domdata, |
1594 | | .init_domain = rt_dom_init, |
1595 | | .destroy_domain = rt_dom_destroy, |
1596 | | .alloc_vdata = rt_alloc_vdata, |
1597 | | .free_vdata = rt_free_vdata, |
1598 | | .insert_vcpu = rt_vcpu_insert, |
1599 | | .remove_vcpu = rt_vcpu_remove, |
1600 | | |
1601 | | .adjust = rt_dom_cntl, |
1602 | | |
1603 | | .pick_cpu = rt_cpu_pick, |
1604 | | .do_schedule = rt_schedule, |
1605 | | .sleep = rt_vcpu_sleep, |
1606 | | .wake = rt_vcpu_wake, |
1607 | | .context_saved = rt_context_saved, |
1608 | | }; |
1609 | | |
1610 | | REGISTER_SCHEDULER(sched_rtds_def); |