debuggers.hg

annotate xen/common/sched_credit2.c @ 22848:6341fe0f4e5a

Added tag 4.1.0-rc2 for changeset 9dca60d88c63
author Keir Fraser <keir@xen.org>
date Tue Jan 25 14:06:55 2011 +0000 (2011-01-25)
parents 0133cf2a72f5
children
rev   line source
keir@21217 1
keir@21217 2 /****************************************************************************
keir@21217 3 * (C) 2009 - George Dunlap - Citrix Systems R&D UK, Ltd
keir@21217 4 ****************************************************************************
keir@21217 5 *
keir@21217 6 * File: common/csched_credit2.c
keir@21217 7 * Author: George Dunlap
keir@21217 8 *
keir@21217 9 * Description: Credit-based SMP CPU scheduler
keir@21217 10 * Based on an earlier verson by Emmanuel Ackaouy.
keir@21217 11 */
keir@21217 12
keir@21217 13 #include <xen/config.h>
keir@21217 14 #include <xen/init.h>
keir@21217 15 #include <xen/lib.h>
keir@21217 16 #include <xen/sched.h>
keir@21217 17 #include <xen/domain.h>
keir@21217 18 #include <xen/delay.h>
keir@21217 19 #include <xen/event.h>
keir@21217 20 #include <xen/time.h>
keir@21217 21 #include <xen/perfc.h>
keir@21217 22 #include <xen/sched-if.h>
keir@21217 23 #include <xen/softirq.h>
keir@21217 24 #include <asm/atomic.h>
keir@21217 25 #include <xen/errno.h>
keir@21217 26 #include <xen/trace.h>
keir@22660 27 #include <xen/cpu.h>
keir@21217 28
keir@21217 29 #if __i386__
keir@21217 30 #define PRI_stime "lld"
keir@21217 31 #else
keir@21217 32 #define PRI_stime "ld"
keir@21217 33 #endif
keir@21217 34
keir@21217 35 #define d2printk(x...)
keir@21217 36 //#define d2printk printk
keir@21217 37
keir@21217 38 #define TRC_CSCHED2_TICK TRC_SCHED_CLASS + 1
keir@21217 39 #define TRC_CSCHED2_RUNQ_POS TRC_SCHED_CLASS + 2
keir@21217 40 #define TRC_CSCHED2_CREDIT_BURN TRC_SCHED_CLASS + 3
keir@21217 41 #define TRC_CSCHED2_CREDIT_ADD TRC_SCHED_CLASS + 4
keir@21217 42 #define TRC_CSCHED2_TICKLE_CHECK TRC_SCHED_CLASS + 5
keir@22376 43 #define TRC_CSCHED2_TICKLE TRC_SCHED_CLASS + 6
keir@22377 44 #define TRC_CSCHED2_CREDIT_RESET TRC_SCHED_CLASS + 7
keir@22377 45 #define TRC_CSCHED2_SCHED_TASKLET TRC_SCHED_CLASS + 8
keir@22658 46 #define TRC_CSCHED2_UPDATE_LOAD TRC_SCHED_CLASS + 9
keir@22657 47 #define TRC_CSCHED2_RUNQ_ASSIGN TRC_SCHED_CLASS + 10
keir@22662 48 #define TRC_CSCHED2_UPDATE_VCPU_LOAD TRC_SCHED_CLASS + 11
keir@22662 49 #define TRC_CSCHED2_UPDATE_RUNQ_LOAD TRC_SCHED_CLASS + 12
keir@21217 50
keir@21217 51 /*
keir@21217 52 * WARNING: This is still in an experimental phase. Status and work can be found at the
keir@21217 53 * credit2 wiki page:
keir@21217 54 * http://wiki.xensource.com/xenwiki/Credit2_Scheduler_Development
keir@21217 55 * TODO:
keir@21217 56 * + Immediate bug-fixes
keir@21217 57 * - Do per-runqueue, grab proper lock for dump debugkey
keir@21217 58 * + Multiple sockets
keir@21217 59 * - Detect cpu layout and make runqueue map, one per L2 (make_runq_map())
keir@21217 60 * - Simple load balancer / runqueue assignment
keir@21217 61 * - Runqueue load measurement
keir@21217 62 * - Load-based load balancer
keir@21217 63 * + Hyperthreading
keir@21217 64 * - Look for non-busy core if possible
keir@21217 65 * - "Discount" time run on a thread with busy siblings
keir@21217 66 * + Algorithm:
keir@21217 67 * - "Mixed work" problem: if a VM is playing audio (5%) but also burning cpu (e.g.,
keir@21217 68 * a flash animation in the background) can we schedule it with low enough latency
keir@21217 69 * so that audio doesn't skip?
keir@21217 70 * - Cap and reservation: How to implement with the current system?
keir@21217 71 * + Optimizing
keir@21217 72 * - Profiling, making new algorithms, making math more efficient (no long division)
keir@21217 73 */
keir@21217 74
keir@21217 75 /*
keir@21217 76 * Design:
keir@21217 77 *
keir@21217 78 * VMs "burn" credits based on their weight; higher weight means
keir@21217 79 * credits burn more slowly. The highest weight vcpu burns credits at
keir@21217 80 * a rate of 1 credit per nanosecond. Others burn proportionally
keir@21217 81 * more.
keir@21217 82 *
keir@21217 83 * vcpus are inserted into the runqueue by credit order.
keir@21217 84 *
keir@21217 85 * Credits are "reset" when the next vcpu in the runqueue is less than
keir@21217 86 * or equal to zero. At that point, everyone's credits are "clipped"
keir@21217 87 * to a small value, and a fixed credit is added to everyone.
keir@21217 88 *
keir@21217 89 * The plan is for all cores that share an L2 will share the same
keir@21217 90 * runqueue. At the moment, there is one global runqueue for all
keir@21217 91 * cores.
keir@21217 92 */
keir@21217 93
keir@21217 94 /*
keir@21217 95 * Locking:
keir@21217 96 * - Schedule-lock is per-runqueue
keir@21217 97 * + Protects runqueue data, runqueue insertion, &c
keir@21217 98 * + Also protects updates to private sched vcpu structure
keir@21217 99 * + Must be grabbed using vcpu_schedule_lock_irq() to make sure vcpu->processr
keir@21217 100 * doesn't change under our feet.
keir@21217 101 * - Private data lock
keir@21217 102 * + Protects access to global domain list
keir@21217 103 * + All other private data is written at init and only read afterwards.
keir@21217 104 * Ordering:
keir@21217 105 * - We grab private->schedule when updating domain weight; so we
keir@21217 106 * must never grab private if a schedule lock is held.
keir@21217 107 */
keir@21217 108
keir@21217 109 /*
keir@21217 110 * Basic constants
keir@21217 111 */
keir@21217 112 /* Default weight: How much a new domain starts with */
keir@21217 113 #define CSCHED_DEFAULT_WEIGHT 256
keir@21217 114 /* Min timer: Minimum length a timer will be set, to
keir@21217 115 * achieve efficiency */
keir@21217 116 #define CSCHED_MIN_TIMER MICROSECS(500)
keir@21217 117 /* Amount of credit VMs begin with, and are reset to.
keir@21217 118 * ATM, set so that highest-weight VMs can only run for 10ms
keir@21217 119 * before a reset event. */
keir@21217 120 #define CSCHED_CREDIT_INIT MILLISECS(10)
keir@21217 121 /* Carryover: How much "extra" credit may be carried over after
keir@21217 122 * a reset. */
keir@21217 123 #define CSCHED_CARRYOVER_MAX CSCHED_MIN_TIMER
keir@22526 124 /* Stickiness: Cross-L2 migration resistance. Should be less than
keir@22526 125 * MIN_TIMER. */
keir@22526 126 #define CSCHED_MIGRATE_RESIST ((opt_migrate_resist)*MICROSECS(1))
keir@22526 127 /* How much to "compensate" a vcpu for L2 migration */
keir@22526 128 #define CSCHED_MIGRATE_COMPENSATION MICROSECS(50)
keir@21217 129 /* Reset: Value below which credit will be reset. */
keir@21217 130 #define CSCHED_CREDIT_RESET 0
keir@21217 131 /* Max timer: Maximum time a guest can be run for. */
keir@21217 132 #define CSCHED_MAX_TIMER MILLISECS(2)
keir@21217 133
keir@21217 134
keir@21217 135 #define CSCHED_IDLE_CREDIT (-(1<<30))
keir@21217 136
keir@21217 137 /*
keir@21217 138 * Flags
keir@21217 139 */
keir@21217 140 /* CSFLAG_scheduled: Is this vcpu either running on, or context-switching off,
keir@21217 141 * a physical cpu?
keir@21217 142 * + Accessed only with runqueue lock held
keir@21217 143 * + Set when chosen as next in csched_schedule().
keir@21217 144 * + Cleared after context switch has been saved in csched_context_saved()
keir@21217 145 * + Checked in vcpu_wake to see if we can add to the runqueue, or if we should
keir@21217 146 * set CSFLAG_delayed_runq_add
keir@21217 147 * + Checked to be false in runq_insert.
keir@21217 148 */
keir@21217 149 #define __CSFLAG_scheduled 1
keir@21217 150 #define CSFLAG_scheduled (1<<__CSFLAG_scheduled)
keir@21217 151 /* CSFLAG_delayed_runq_add: Do we need to add this to the runqueue once it'd done
keir@21217 152 * being context switched out?
keir@21217 153 * + Set when scheduling out in csched_schedule() if prev is runnable
keir@21217 154 * + Set in csched_vcpu_wake if it finds CSFLAG_scheduled set
keir@21217 155 * + Read in csched_context_saved(). If set, it adds prev to the runqueue and
keir@21217 156 * clears the bit.
keir@21217 157 */
keir@21217 158 #define __CSFLAG_delayed_runq_add 2
keir@21217 159 #define CSFLAG_delayed_runq_add (1<<__CSFLAG_delayed_runq_add)
keir@22664 160 /* CSFLAG_runq_migrate_request: This vcpu is being migrated as a result of a
keir@22664 161 * credit2-initiated runq migrate request; migrate it to the runqueue indicated
keir@22664 162 * in the svc struct.
keir@22664 163 */
keir@22664 164 #define __CSFLAG_runq_migrate_request 3
keir@22664 165 #define CSFLAG_runq_migrate_request (1<<__CSFLAG_runq_migrate_request)
keir@21217 166
keir@21217 167
keir@22526 168 int opt_migrate_resist=500;
keir@22526 169 integer_param("sched_credit2_migrate_resist", opt_migrate_resist);
keir@22526 170
keir@21217 171 /*
keir@21217 172 * Useful macros
keir@21217 173 */
keir@21258 174 #define CSCHED_PRIV(_ops) \
keir@21258 175 ((struct csched_private *)((_ops)->sched_data))
keir@21217 176 #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
keir@21217 177 #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
keir@21258 178 #define CSCHED_CPUONLINE(_pool) \
keir@21258 179 (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
keir@21217 180 /* CPU to runq_id macro */
keir@21258 181 #define c2r(_ops, _cpu) (CSCHED_PRIV(_ops)->runq_map[(_cpu)])
keir@21217 182 /* CPU to runqueue struct macro */
keir@21258 183 #define RQD(_ops, _cpu) (&CSCHED_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
keir@21217 184
keir@21217 185 /*
keir@22661 186 * Shifts for load average.
keir@22661 187 * - granularity: Reduce granularity of time by a factor of 1000, so we can use 32-bit maths
keir@22661 188 * - window shift: Given granularity shift, make the window about 1 second
keir@22661 189 * - scale shift: Shift up load by this amount rather than using fractions; 128 corresponds
keir@22661 190 * to a load of 1.
keir@22661 191 */
keir@22661 192 #define LOADAVG_GRANULARITY_SHIFT (10)
keir@22661 193 int opt_load_window_shift=18;
keir@22661 194 #define LOADAVG_WINDOW_SHIFT_MIN 4
keir@22661 195 integer_param("credit2_load_window_shift", opt_load_window_shift);
keir@22667 196 int opt_underload_balance_tolerance=0;
keir@22667 197 integer_param("credit2_balance_under", opt_underload_balance_tolerance);
keir@22667 198 int opt_overload_balance_tolerance=-3;
keir@22667 199 integer_param("credit2_balance_over", opt_overload_balance_tolerance);
keir@22661 200
keir@22661 201 /*
keir@21217 202 * Per-runqueue data
keir@21217 203 */
keir@21217 204 struct csched_runqueue_data {
keir@21217 205 int id;
keir@22656 206
keir@22656 207 spinlock_t lock; /* Lock for this runqueue. */
keir@22656 208 cpumask_t active; /* CPUs enabled for this runqueue */
keir@22656 209
keir@21217 210 struct list_head runq; /* Ordered list of runnable vms */
keir@21217 211 struct list_head svc; /* List of all vcpus assigned to this runqueue */
keir@21217 212 int max_weight;
keir@22656 213
keir@22376 214 cpumask_t idle, /* Currently idle */
keir@22376 215 tickled; /* Another cpu in the queue is already targeted for this one */
keir@22658 216 int load; /* Instantaneous load: Length of queue + num non-idle threads */
keir@22661 217 s_time_t load_last_update; /* Last time average was updated */
keir@22661 218 s_time_t avgload; /* Decaying queue load */
keir@22663 219 s_time_t b_avgload; /* Decaying queue load modified by balancing */
keir@21217 220 };
keir@21217 221
keir@21217 222 /*
keir@21217 223 * System-wide private data
keir@21217 224 */
keir@21217 225 struct csched_private {
keir@21217 226 spinlock_t lock;
keir@22656 227 cpumask_t initialized; /* CPU is initialized for this pool */
keir@22656 228
keir@21217 229 struct list_head sdom; /* Used mostly for dump keyhandler. */
keir@21217 230
keir@21217 231 int runq_map[NR_CPUS];
keir@22656 232 cpumask_t active_queues; /* Queues which may have active cpus */
keir@21217 233 struct csched_runqueue_data rqd[NR_CPUS];
keir@22661 234
keir@22661 235 int load_window_shift;
keir@21217 236 };
keir@21217 237
keir@21217 238 /*
keir@21217 239 * Virtual CPU
keir@21217 240 */
keir@21217 241 struct csched_vcpu {
keir@21217 242 struct list_head rqd_elem; /* On the runqueue data list */
keir@21217 243 struct list_head sdom_elem; /* On the domain vcpu list */
keir@21217 244 struct list_head runq_elem; /* On the runqueue */
keir@22657 245 struct csched_runqueue_data *rqd; /* Up-pointer to the runqueue */
keir@21217 246
keir@21217 247 /* Up-pointers */
keir@21217 248 struct csched_dom *sdom;
keir@21217 249 struct vcpu *vcpu;
keir@21217 250
keir@21217 251 int weight;
keir@21217 252
keir@21217 253 int credit;
keir@21217 254 s_time_t start_time; /* When we were scheduled (used for credit) */
keir@21217 255 unsigned flags; /* 16 bits doesn't seem to play well with clear_bit() */
keir@21217 256
keir@22662 257 /* Individual contribution to load */
keir@22662 258 s_time_t load_last_update; /* Last time average was updated */
keir@22662 259 s_time_t avgload; /* Decaying queue load */
keir@22664 260
keir@22664 261 struct csched_runqueue_data *migrate_rqd; /* Pre-determined rqd to which to migrate */
keir@21217 262 };
keir@21217 263
keir@21217 264 /*
keir@21217 265 * Domain
keir@21217 266 */
keir@21217 267 struct csched_dom {
keir@21217 268 struct list_head vcpu;
keir@21217 269 struct list_head sdom_elem;
keir@21217 270 struct domain *dom;
keir@21217 271 uint16_t weight;
keir@21217 272 uint16_t nr_vcpus;
keir@21217 273 };
keir@21217 274
keir@21217 275
keir@21217 276 /*
keir@21217 277 * Time-to-credit, credit-to-time.
keir@21217 278 * FIXME: Do pre-calculated division?
keir@21217 279 */
keir@21217 280 static s_time_t t2c(struct csched_runqueue_data *rqd, s_time_t time, struct csched_vcpu *svc)
keir@21217 281 {
keir@21217 282 return time * rqd->max_weight / svc->weight;
keir@21217 283 }
keir@21217 284
keir@21217 285 static s_time_t c2t(struct csched_runqueue_data *rqd, s_time_t credit, struct csched_vcpu *svc)
keir@21217 286 {
keir@21217 287 return credit * svc->weight / rqd->max_weight;
keir@21217 288 }
keir@21217 289
keir@21217 290 /*
keir@21217 291 * Runqueue related code
keir@21217 292 */
keir@21217 293
keir@21217 294 static /*inline*/ int
keir@21217 295 __vcpu_on_runq(struct csched_vcpu *svc)
keir@21217 296 {
keir@21217 297 return !list_empty(&svc->runq_elem);
keir@21217 298 }
keir@21217 299
keir@21217 300 static /*inline*/ struct csched_vcpu *
keir@21217 301 __runq_elem(struct list_head *elem)
keir@21217 302 {
keir@21217 303 return list_entry(elem, struct csched_vcpu, runq_elem);
keir@21217 304 }
keir@21217 305
keir@22658 306 static void
keir@22662 307 __update_runq_load(const struct scheduler *ops,
keir@22662 308 struct csched_runqueue_data *rqd, int change, s_time_t now)
keir@22658 309 {
keir@22661 310 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22661 311 s_time_t delta=-1;
keir@22661 312
keir@22661 313 now >>= LOADAVG_GRANULARITY_SHIFT;
keir@22661 314
keir@22661 315 if ( rqd->load_last_update + (1ULL<<prv->load_window_shift) < now )
keir@22661 316 {
keir@22662 317 rqd->avgload = (unsigned long long)rqd->load << prv->load_window_shift;
keir@22663 318 rqd->b_avgload = (unsigned long long)rqd->load << prv->load_window_shift;
keir@22661 319 }
keir@22661 320 else
keir@22661 321 {
keir@22661 322 delta = now - rqd->load_last_update;
keir@22661 323
keir@22661 324 rqd->avgload =
keir@22661 325 ( ( delta * ( (unsigned long long)rqd->load << prv->load_window_shift ) )
keir@22661 326 + ( ((1ULL<<prv->load_window_shift) - delta) * rqd->avgload ) ) >> prv->load_window_shift;
keir@22663 327
keir@22663 328 rqd->b_avgload =
keir@22663 329 ( ( delta * ( (unsigned long long)rqd->load << prv->load_window_shift ) )
keir@22663 330 + ( ((1ULL<<prv->load_window_shift) - delta) * rqd->b_avgload ) ) >> prv->load_window_shift;
keir@22661 331 }
keir@22658 332 rqd->load += change;
keir@22661 333 rqd->load_last_update = now;
keir@22662 334
keir@22658 335 {
keir@22658 336 struct {
keir@22662 337 unsigned rq_load:4, rq_avgload:28;
keir@22663 338 unsigned rq_id:4, b_avgload:28;
keir@22658 339 } d;
keir@22662 340 d.rq_id=rqd->id;
keir@22662 341 d.rq_load = rqd->load;
keir@22662 342 d.rq_avgload = rqd->avgload;
keir@22663 343 d.b_avgload = rqd->b_avgload;
keir@22662 344 trace_var(TRC_CSCHED2_UPDATE_RUNQ_LOAD, 1,
keir@22658 345 sizeof(d),
keir@22658 346 (unsigned char *)&d);
keir@22658 347 }
keir@22658 348 }
keir@22658 349
keir@22662 350 static void
keir@22662 351 __update_svc_load(const struct scheduler *ops,
keir@22662 352 struct csched_vcpu *svc, int change, s_time_t now)
keir@22662 353 {
keir@22662 354 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22662 355 s_time_t delta=-1;
keir@22662 356 int vcpu_load;
keir@22662 357
keir@22662 358 if ( change == -1 )
keir@22662 359 vcpu_load = 1;
keir@22662 360 else if ( change == 1 )
keir@22662 361 vcpu_load = 0;
keir@22662 362 else
keir@22662 363 vcpu_load = vcpu_runnable(svc->vcpu);
keir@22662 364
keir@22662 365 now >>= LOADAVG_GRANULARITY_SHIFT;
keir@22662 366
keir@22662 367 if ( svc->load_last_update + (1ULL<<prv->load_window_shift) < now )
keir@22662 368 {
keir@22662 369 svc->avgload = (unsigned long long)vcpu_load << prv->load_window_shift;
keir@22662 370 }
keir@22662 371 else
keir@22662 372 {
keir@22662 373 delta = now - svc->load_last_update;
keir@22662 374
keir@22662 375 svc->avgload =
keir@22662 376 ( ( delta * ( (unsigned long long)vcpu_load << prv->load_window_shift ) )
keir@22662 377 + ( ((1ULL<<prv->load_window_shift) - delta) * svc->avgload ) ) >> prv->load_window_shift;
keir@22662 378 }
keir@22662 379 svc->load_last_update = now;
keir@22662 380
keir@22662 381 {
keir@22662 382 struct {
keir@22662 383 unsigned dom:16,vcpu:16;
keir@22662 384 unsigned v_avgload:32;
keir@22662 385 } d;
keir@22662 386 d.dom = svc->vcpu->domain->domain_id;
keir@22662 387 d.vcpu = svc->vcpu->vcpu_id;
keir@22662 388 d.v_avgload = svc->avgload;
keir@22662 389 trace_var(TRC_CSCHED2_UPDATE_VCPU_LOAD, 1,
keir@22662 390 sizeof(d),
keir@22662 391 (unsigned char *)&d);
keir@22662 392 }
keir@22662 393 }
keir@22662 394
keir@22662 395 static void
keir@22662 396 update_load(const struct scheduler *ops,
keir@22662 397 struct csched_runqueue_data *rqd,
keir@22662 398 struct csched_vcpu *svc, int change, s_time_t now)
keir@22662 399 {
keir@22662 400 __update_runq_load(ops, rqd, change, now);
keir@22662 401 if ( svc )
keir@22662 402 __update_svc_load(ops, svc, change, now);
keir@22662 403 }
keir@22662 404
keir@21217 405 static int
keir@21217 406 __runq_insert(struct list_head *runq, struct csched_vcpu *svc)
keir@21217 407 {
keir@21217 408 struct list_head *iter;
keir@21217 409 int pos = 0;
keir@21217 410
keir@21217 411 d2printk("rqi d%dv%d\n",
keir@21217 412 svc->vcpu->domain->domain_id,
keir@21217 413 svc->vcpu->vcpu_id);
keir@21217 414
keir@22657 415 BUG_ON(&svc->rqd->runq != runq);
keir@21217 416 /* Idle vcpus not allowed on the runqueue anymore */
keir@21217 417 BUG_ON(is_idle_vcpu(svc->vcpu));
keir@21217 418 BUG_ON(svc->vcpu->is_running);
keir@21217 419 BUG_ON(test_bit(__CSFLAG_scheduled, &svc->flags));
keir@21217 420
keir@21217 421 list_for_each( iter, runq )
keir@21217 422 {
keir@21217 423 struct csched_vcpu * iter_svc = __runq_elem(iter);
keir@21217 424
keir@21217 425 if ( svc->credit > iter_svc->credit )
keir@21217 426 {
keir@21217 427 d2printk(" p%d d%dv%d\n",
keir@21217 428 pos,
keir@21217 429 iter_svc->vcpu->domain->domain_id,
keir@21217 430 iter_svc->vcpu->vcpu_id);
keir@21217 431 break;
keir@21217 432 }
keir@21217 433 pos++;
keir@21217 434 }
keir@21217 435
keir@21217 436 list_add_tail(&svc->runq_elem, iter);
keir@21217 437
keir@21217 438 return pos;
keir@21217 439 }
keir@21217 440
keir@21217 441 static void
keir@21327 442 runq_insert(const struct scheduler *ops, unsigned int cpu, struct csched_vcpu *svc)
keir@21217 443 {
keir@21258 444 struct list_head * runq = &RQD(ops, cpu)->runq;
keir@21217 445 int pos = 0;
keir@21217 446
keir@21217 447 ASSERT( spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock) );
keir@21217 448
keir@21217 449 BUG_ON( __vcpu_on_runq(svc) );
keir@21258 450 BUG_ON( c2r(ops, cpu) != c2r(ops, svc->vcpu->processor) );
keir@21217 451
keir@21217 452 pos = __runq_insert(runq, svc);
keir@21217 453
keir@21217 454 {
keir@21217 455 struct {
keir@21217 456 unsigned dom:16,vcpu:16;
keir@21217 457 unsigned pos;
keir@21217 458 } d;
keir@21217 459 d.dom = svc->vcpu->domain->domain_id;
keir@21217 460 d.vcpu = svc->vcpu->vcpu_id;
keir@21217 461 d.pos = pos;
keir@22377 462 trace_var(TRC_CSCHED2_RUNQ_POS, 0,
keir@21217 463 sizeof(d),
keir@21217 464 (unsigned char *)&d);
keir@21217 465 }
keir@21217 466
keir@21217 467 return;
keir@21217 468 }
keir@21217 469
keir@21217 470 static inline void
keir@21217 471 __runq_remove(struct csched_vcpu *svc)
keir@21217 472 {
keir@21217 473 BUG_ON( !__vcpu_on_runq(svc) );
keir@21217 474 list_del_init(&svc->runq_elem);
keir@21217 475 }
keir@21217 476
keir@21217 477 void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *, s_time_t);
keir@21217 478
keir@21217 479 /* Check to see if the item on the runqueue is higher priority than what's
keir@21217 480 * currently running; if so, wake up the processor */
keir@21217 481 static /*inline*/ void
keir@21327 482 runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched_vcpu *new, s_time_t now)
keir@21217 483 {
keir@21217 484 int i, ipid=-1;
keir@21217 485 s_time_t lowest=(1<<30);
keir@21258 486 struct csched_runqueue_data *rqd = RQD(ops, cpu);
keir@22656 487 cpumask_t mask;
keir@22525 488 struct csched_vcpu * cur;
keir@21217 489
keir@21217 490 d2printk("rqt d%dv%d cd%dv%d\n",
keir@21217 491 new->vcpu->domain->domain_id,
keir@21217 492 new->vcpu->vcpu_id,
keir@21217 493 current->domain->domain_id,
keir@21217 494 current->vcpu_id);
keir@21217 495
keir@22525 496 BUG_ON(new->vcpu->processor != cpu);
keir@22657 497 BUG_ON(new->rqd != rqd);
keir@22525 498
keir@22525 499 /* Look at the cpu it's running on first */
keir@22525 500 cur = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
keir@22525 501 burn_credits(rqd, cur, now);
keir@22376 502
keir@22525 503 if ( cur->credit < new->credit )
keir@22525 504 {
keir@22525 505 ipid = cpu;
keir@22525 506 goto tickle;
keir@22525 507 }
keir@22525 508
keir@22376 509 /* Get a mask of idle, but not tickled */
keir@22376 510 cpus_andnot(mask, rqd->idle, rqd->tickled);
keir@22376 511
keir@22376 512 /* If it's not empty, choose one */
keir@22376 513 if ( !cpus_empty(mask) )
keir@22376 514 {
keir@22376 515 ipid=first_cpu(mask);
keir@22376 516 goto tickle;
keir@22376 517 }
keir@22376 518
keir@22376 519 /* Otherwise, look for the non-idle cpu with the lowest credit,
keir@22376 520 * skipping cpus which have been tickled but not scheduled yet */
keir@22656 521 cpus_andnot(mask, rqd->active, rqd->idle);
keir@22376 522 cpus_andnot(mask, mask, rqd->tickled);
keir@22376 523
keir@22376 524 for_each_cpu_mask(i, mask)
keir@21217 525 {
keir@21217 526 struct csched_vcpu * cur;
keir@21217 527
keir@22525 528 /* Already looked at this one above */
keir@22525 529 if ( i == cpu )
keir@22525 530 continue;
keir@22525 531
keir@21217 532 cur = CSCHED_VCPU(per_cpu(schedule_data, i).curr);
keir@21217 533
keir@22376 534 BUG_ON(is_idle_vcpu(cur->vcpu));
keir@22376 535
keir@22376 536 /* Update credits for current to see if we want to preempt */
keir@22376 537 burn_credits(rqd, cur, now);
keir@22376 538
keir@22376 539 if ( cur->credit < lowest )
keir@21217 540 {
keir@21217 541 ipid = i;
keir@22376 542 lowest = cur->credit;
keir@21217 543 }
keir@21217 544
keir@22376 545 /* TRACE */ {
keir@22376 546 struct {
keir@22376 547 unsigned dom:16,vcpu:16;
keir@22376 548 unsigned credit;
keir@22376 549 } d;
keir@22376 550 d.dom = cur->vcpu->domain->domain_id;
keir@22376 551 d.vcpu = cur->vcpu->vcpu_id;
keir@22376 552 d.credit = cur->credit;
keir@22524 553 trace_var(TRC_CSCHED2_TICKLE_CHECK, 1,
keir@22376 554 sizeof(d),
keir@22376 555 (unsigned char *)&d);
keir@21217 556 }
keir@21217 557 }
keir@21217 558
keir@22526 559 /* Only switch to another processor if the credit difference is greater
keir@22526 560 * than the migrate resistance */
keir@22526 561 if ( ipid == -1 || lowest + CSCHED_MIGRATE_RESIST > new->credit )
keir@22376 562 goto no_tickle;
keir@22376 563
keir@22376 564 tickle:
keir@22376 565 BUG_ON(ipid == -1);
keir@21217 566
keir@22376 567 /* TRACE */ {
keir@22376 568 struct {
keir@22376 569 unsigned cpu:8;
keir@22376 570 } d;
keir@22376 571 d.cpu = ipid;
keir@22376 572 trace_var(TRC_CSCHED2_TICKLE, 0,
keir@22376 573 sizeof(d),
keir@22376 574 (unsigned char *)&d);
keir@21217 575 }
keir@22376 576 cpu_set(ipid, rqd->tickled);
keir@22376 577 cpu_raise_softirq(ipid, SCHEDULE_SOFTIRQ);
keir@22376 578
keir@22376 579 no_tickle:
keir@22376 580 return;
keir@21217 581 }
keir@21217 582
keir@21217 583 /*
keir@21217 584 * Credit-related code
keir@21217 585 */
keir@21327 586 static void reset_credit(const struct scheduler *ops, int cpu, s_time_t now)
keir@21217 587 {
keir@22657 588 struct csched_runqueue_data *rqd = RQD(ops, cpu);
keir@21217 589 struct list_head *iter;
keir@21217 590
keir@22657 591 list_for_each( iter, &rqd->svc )
keir@21217 592 {
keir@21217 593 struct csched_vcpu * svc = list_entry(iter, struct csched_vcpu, rqd_elem);
keir@21217 594
keir@22377 595 int start_credit;
keir@22377 596
keir@21217 597 BUG_ON( is_idle_vcpu(svc->vcpu) );
keir@22657 598 BUG_ON( svc->rqd != rqd );
keir@21217 599
keir@22377 600 start_credit = svc->credit;
keir@22377 601
keir@21217 602 /* "Clip" credits to max carryover */
keir@21217 603 if ( svc->credit > CSCHED_CARRYOVER_MAX )
keir@21217 604 svc->credit = CSCHED_CARRYOVER_MAX;
keir@21217 605 /* And add INIT */
keir@21217 606 svc->credit += CSCHED_CREDIT_INIT;
keir@21217 607 svc->start_time = now;
keir@21217 608
keir@22377 609 /* TRACE */ {
keir@22377 610 struct {
keir@22377 611 unsigned dom:16,vcpu:16;
keir@22377 612 unsigned credit_start, credit_end;
keir@22377 613 } d;
keir@22377 614 d.dom = svc->vcpu->domain->domain_id;
keir@22377 615 d.vcpu = svc->vcpu->vcpu_id;
keir@22377 616 d.credit_start = start_credit;
keir@22377 617 d.credit_end = svc->credit;
keir@22524 618 trace_var(TRC_CSCHED2_CREDIT_RESET, 1,
keir@22377 619 sizeof(d),
keir@22377 620 (unsigned char *)&d);
keir@22377 621 }
keir@21217 622 }
keir@21217 623
keir@21217 624 /* No need to resort runqueue, as everyone's order should be the same. */
keir@21217 625 }
keir@21217 626
keir@21217 627 void burn_credits(struct csched_runqueue_data *rqd, struct csched_vcpu *svc, s_time_t now)
keir@21217 628 {
keir@21217 629 s_time_t delta;
keir@21217 630
keir@21217 631 /* Assert svc is current */
keir@21217 632 ASSERT(svc==CSCHED_VCPU(per_cpu(schedule_data, svc->vcpu->processor).curr));
keir@21217 633
keir@21217 634 if ( is_idle_vcpu(svc->vcpu) )
keir@21217 635 {
keir@21217 636 BUG_ON(svc->credit != CSCHED_IDLE_CREDIT);
keir@21217 637 return;
keir@21217 638 }
keir@21217 639
keir@21217 640 delta = now - svc->start_time;
keir@21217 641
keir@21217 642 if ( delta > 0 ) {
keir@21217 643 /* This will round down; should we consider rounding up...? */
keir@21217 644 svc->credit -= t2c(rqd, delta, svc);
keir@21217 645 svc->start_time = now;
keir@21217 646
keir@21217 647 d2printk("b d%dv%d c%d\n",
keir@21217 648 svc->vcpu->domain->domain_id,
keir@21217 649 svc->vcpu->vcpu_id,
keir@21217 650 svc->credit);
keir@21217 651 } else {
keir@21217 652 d2printk("%s: Time went backwards? now %"PRI_stime" start %"PRI_stime"\n",
keir@21217 653 __func__, now, svc->start_time);
keir@21217 654 }
keir@21217 655
keir@21217 656 /* TRACE */
keir@21217 657 {
keir@21217 658 struct {
keir@21217 659 unsigned dom:16,vcpu:16;
keir@21217 660 unsigned credit;
keir@21217 661 int delta;
keir@21217 662 } d;
keir@21217 663 d.dom = svc->vcpu->domain->domain_id;
keir@21217 664 d.vcpu = svc->vcpu->vcpu_id;
keir@21217 665 d.credit = svc->credit;
keir@21217 666 d.delta = delta;
keir@22524 667 trace_var(TRC_CSCHED2_CREDIT_BURN, 1,
keir@21217 668 sizeof(d),
keir@21217 669 (unsigned char *)&d);
keir@21217 670 }
keir@21217 671 }
keir@21217 672
keir@21217 673 /* Find the domain with the highest weight. */
keir@21217 674 void update_max_weight(struct csched_runqueue_data *rqd, int new_weight, int old_weight)
keir@21217 675 {
keir@21217 676 /* Try to avoid brute-force search:
keir@21217 677 * - If new_weight is larger, max_weigth <- new_weight
keir@21217 678 * - If old_weight != max_weight, someone else is still max_weight
keir@21217 679 * (No action required)
keir@21217 680 * - If old_weight == max_weight, brute-force search for max weight
keir@21217 681 */
keir@21217 682 if ( new_weight > rqd->max_weight )
keir@21217 683 {
keir@21217 684 rqd->max_weight = new_weight;
keir@22653 685 d2printk("%s: Runqueue id %d max weight %d\n", __func__, rqd->id, rqd->max_weight);
keir@21217 686 }
keir@21217 687 else if ( old_weight == rqd->max_weight )
keir@21217 688 {
keir@21217 689 struct list_head *iter;
keir@21217 690 int max_weight = 1;
keir@21217 691
keir@21217 692 list_for_each( iter, &rqd->svc )
keir@21217 693 {
keir@21217 694 struct csched_vcpu * svc = list_entry(iter, struct csched_vcpu, rqd_elem);
keir@21217 695
keir@21217 696 if ( svc->weight > max_weight )
keir@21217 697 max_weight = svc->weight;
keir@21217 698 }
keir@21217 699
keir@21217 700 rqd->max_weight = max_weight;
keir@22653 701 d2printk("%s: Runqueue %d max weight %d\n", __func__, rqd->id, rqd->max_weight);
keir@21217 702 }
keir@21217 703 }
keir@21217 704
keir@21217 705 #ifndef NDEBUG
keir@21217 706 static /*inline*/ void
keir@21217 707 __csched_vcpu_check(struct vcpu *vc)
keir@21217 708 {
keir@21217 709 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@21217 710 struct csched_dom * const sdom = svc->sdom;
keir@21217 711
keir@21217 712 BUG_ON( svc->vcpu != vc );
keir@21217 713 BUG_ON( sdom != CSCHED_DOM(vc->domain) );
keir@21217 714 if ( sdom )
keir@21217 715 {
keir@21217 716 BUG_ON( is_idle_vcpu(vc) );
keir@21217 717 BUG_ON( sdom->dom != vc->domain );
keir@21217 718 }
keir@21217 719 else
keir@21217 720 {
keir@21217 721 BUG_ON( !is_idle_vcpu(vc) );
keir@21217 722 }
keir@21217 723 }
keir@21217 724 #define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
keir@21217 725 #else
keir@21217 726 #define CSCHED_VCPU_CHECK(_vc)
keir@21217 727 #endif
keir@21217 728
keir@21258 729 static void *
keir@21327 730 csched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
keir@21217 731 {
keir@21217 732 struct csched_vcpu *svc;
keir@21217 733
keir@21217 734 /* Allocate per-VCPU info */
keir@21217 735 svc = xmalloc(struct csched_vcpu);
keir@21217 736 if ( svc == NULL )
keir@21258 737 return NULL;
keir@21258 738 memset(svc, 0, sizeof(*svc));
keir@21217 739
keir@21217 740 INIT_LIST_HEAD(&svc->rqd_elem);
keir@21217 741 INIT_LIST_HEAD(&svc->sdom_elem);
keir@21217 742 INIT_LIST_HEAD(&svc->runq_elem);
keir@21217 743
keir@21258 744 svc->sdom = dd;
keir@21217 745 svc->vcpu = vc;
keir@21217 746 svc->flags = 0U;
keir@21217 747
keir@21217 748 if ( ! is_idle_vcpu(vc) )
keir@21217 749 {
keir@21258 750 BUG_ON( svc->sdom == NULL );
keir@21217 751
keir@21217 752 svc->credit = CSCHED_CREDIT_INIT;
keir@21258 753 svc->weight = svc->sdom->weight;
keir@22662 754 /* Starting load of 50% */
keir@22662 755 svc->avgload = 1ULL << (CSCHED_PRIV(ops)->load_window_shift - 1);
keir@22662 756 svc->load_last_update = NOW();
keir@21258 757 }
keir@21258 758 else
keir@21258 759 {
keir@21258 760 BUG_ON( svc->sdom != NULL );
keir@21258 761 svc->credit = CSCHED_IDLE_CREDIT;
keir@21258 762 svc->weight = 0;
keir@21258 763 }
keir@21258 764
keir@21258 765 return svc;
keir@21258 766 }
keir@21217 767
keir@22657 768 /* Add and remove from runqueue assignment (not active run queue) */
keir@22657 769 static void
keir@22657 770 __runq_assign(struct csched_vcpu *svc, struct csched_runqueue_data *rqd)
keir@22657 771 {
keir@22657 772
keir@22657 773 svc->rqd = rqd;
keir@22657 774 list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
keir@22657 775
keir@22657 776 update_max_weight(svc->rqd, svc->weight, 0);
keir@22657 777
keir@22663 778 /* Expected new load based on adding this vcpu */
keir@22663 779 rqd->b_avgload += svc->avgload;
keir@22663 780
keir@22657 781 /* TRACE */
keir@22657 782 {
keir@22657 783 struct {
keir@22657 784 unsigned dom:16,vcpu:16;
keir@22657 785 unsigned rqi:16;
keir@22657 786 } d;
keir@22657 787 d.dom = svc->vcpu->domain->domain_id;
keir@22657 788 d.vcpu = svc->vcpu->vcpu_id;
keir@22657 789 d.rqi=rqd->id;
keir@22657 790 trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
keir@22657 791 sizeof(d),
keir@22657 792 (unsigned char *)&d);
keir@22657 793 }
keir@22657 794
keir@22657 795 }
keir@22657 796
keir@22657 797 static void
keir@22657 798 runq_assign(const struct scheduler *ops, struct vcpu *vc)
keir@22657 799 {
keir@22657 800 struct csched_vcpu *svc = vc->sched_priv;
keir@22657 801
keir@22657 802 BUG_ON(svc->rqd != NULL);
keir@22657 803
keir@22657 804 __runq_assign(svc, RQD(ops, vc->processor));
keir@22657 805 }
keir@22657 806
keir@22657 807 static void
keir@22657 808 __runq_deassign(struct csched_vcpu *svc)
keir@22657 809 {
keir@22657 810 BUG_ON(__vcpu_on_runq(svc));
keir@22657 811
keir@22657 812 list_del_init(&svc->rqd_elem);
keir@22657 813 update_max_weight(svc->rqd, 0, svc->weight);
keir@22657 814
keir@22663 815 /* Expected new load based on removing this vcpu */
keir@22663 816 svc->rqd->b_avgload -= svc->avgload;
keir@22663 817
keir@22657 818 svc->rqd = NULL;
keir@22657 819 }
keir@22657 820
keir@22657 821 static void
keir@22657 822 runq_deassign(const struct scheduler *ops, struct vcpu *vc)
keir@22657 823 {
keir@22657 824 struct csched_vcpu *svc = vc->sched_priv;
keir@22657 825
keir@22657 826 BUG_ON(svc->rqd != RQD(ops, vc->processor));
keir@22657 827
keir@22657 828 __runq_deassign(svc);
keir@22657 829 }
keir@22657 830
keir@21258 831 static void
keir@21327 832 csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
keir@21258 833 {
keir@21258 834 struct csched_vcpu *svc = vc->sched_priv;
keir@21258 835 struct domain * const dom = vc->domain;
keir@22657 836 struct csched_dom * const sdom = svc->sdom;
keir@21258 837
keir@21258 838 printk("%s: Inserting d%dv%d\n",
keir@21258 839 __func__, dom->domain_id, vc->vcpu_id);
keir@21258 840
keir@22660 841 /* NB: On boot, idle vcpus are inserted before alloc_pdata() has
keir@22660 842 * been called for that cpu.
keir@22660 843 */
keir@21258 844 if ( ! is_idle_vcpu(vc) )
keir@21258 845 {
keir@21217 846 /* FIXME: Do we need the private lock here? */
keir@21258 847 list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
keir@21217 848
keir@21217 849 /* Add vcpu to runqueue of initial processor */
keir@21217 850 vcpu_schedule_lock_irq(vc);
keir@21217 851
keir@22657 852 runq_assign(ops, vc);
keir@21217 853
keir@21217 854 vcpu_schedule_unlock_irq(vc);
keir@21217 855
keir@21217 856 sdom->nr_vcpus++;
keir@21217 857 }
keir@21217 858
keir@21217 859 CSCHED_VCPU_CHECK(vc);
keir@21217 860 }
keir@21217 861
keir@21217 862 static void
keir@21327 863 csched_free_vdata(const struct scheduler *ops, void *priv)
keir@21258 864 {
keir@21258 865 struct csched_vcpu *svc = priv;
keir@22324 866
keir@22324 867 xfree(svc);
keir@22324 868 }
keir@22324 869
keir@22324 870 static void
keir@22324 871 csched_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
keir@22324 872 {
keir@22324 873 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@22324 874 struct csched_dom * const sdom = svc->sdom;
keir@22324 875
keir@22324 876 BUG_ON( sdom == NULL );
keir@22324 877 BUG_ON( !list_empty(&svc->runq_elem) );
keir@21258 878
keir@21258 879 if ( ! is_idle_vcpu(vc) )
keir@21258 880 {
keir@21258 881 /* Remove from runqueue */
keir@21258 882 vcpu_schedule_lock_irq(vc);
keir@21258 883
keir@22657 884 runq_deassign(ops, vc);
keir@21258 885
keir@21258 886 vcpu_schedule_unlock_irq(vc);
keir@21258 887
keir@21258 888 /* Remove from sdom list. Don't need a lock for this, as it's called
keir@21258 889 * syncronously when nothing else can happen. */
keir@21258 890 list_del_init(&svc->sdom_elem);
keir@21258 891
keir@21258 892 svc->sdom->nr_vcpus--;
keir@21258 893 }
keir@21217 894 }
keir@21217 895
keir@21217 896 static void
keir@21327 897 csched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
keir@21217 898 {
keir@21217 899 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@21217 900
keir@21217 901 BUG_ON( is_idle_vcpu(vc) );
keir@21217 902
keir@21217 903 if ( per_cpu(schedule_data, vc->processor).curr == vc )
keir@21217 904 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
keir@21217 905 else if ( __vcpu_on_runq(svc) )
keir@22658 906 {
keir@22658 907 BUG_ON(svc->rqd != RQD(ops, vc->processor));
keir@22662 908 update_load(ops, svc->rqd, svc, -1, NOW());
keir@21217 909 __runq_remove(svc);
keir@22658 910 }
keir@22522 911 else if ( test_bit(__CSFLAG_delayed_runq_add, &svc->flags) )
keir@22522 912 clear_bit(__CSFLAG_delayed_runq_add, &svc->flags);
keir@21217 913 }
keir@21217 914
keir@21217 915 static void
keir@21327 916 csched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
keir@21217 917 {
keir@21217 918 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@21217 919 s_time_t now = 0;
keir@21217 920
keir@21217 921 /* Schedule lock should be held at this point. */
keir@21217 922
keir@21217 923 d2printk("w d%dv%d\n", vc->domain->domain_id, vc->vcpu_id);
keir@21217 924
keir@21217 925 BUG_ON( is_idle_vcpu(vc) );
keir@21217 926
keir@21217 927 /* Make sure svc priority mod happens before runq check */
keir@22523 928 if ( unlikely(per_cpu(schedule_data, vc->processor).curr == vc) )
keir@21217 929 {
keir@21217 930 goto out;
keir@21217 931 }
keir@21217 932
keir@21217 933 if ( unlikely(__vcpu_on_runq(svc)) )
keir@21217 934 {
keir@21217 935 /* If we've boosted someone that's already on a runqueue, prioritize
keir@21217 936 * it and inform the cpu in question. */
keir@21217 937 goto out;
keir@21217 938 }
keir@21217 939
keir@21217 940 /* If the context hasn't been saved for this vcpu yet, we can't put it on
keir@21217 941 * another runqueue. Instead, we set a flag so that it will be put on the runqueue
keir@21217 942 * after the context has been saved. */
keir@21217 943 if ( unlikely (test_bit(__CSFLAG_scheduled, &svc->flags) ) )
keir@21217 944 {
keir@21217 945 set_bit(__CSFLAG_delayed_runq_add, &svc->flags);
keir@21217 946 goto out;
keir@21217 947 }
keir@21217 948
keir@22657 949 /* Add into the new runqueue if necessary */
keir@22657 950 if ( svc->rqd == NULL )
keir@22657 951 runq_assign(ops, vc);
keir@22657 952 else
keir@22657 953 BUG_ON(RQD(ops, vc->processor) != svc->rqd );
keir@22657 954
keir@21217 955 now = NOW();
keir@21217 956
keir@22662 957 update_load(ops, svc->rqd, svc, 1, now);
keir@22658 958
keir@21217 959 /* Put the VCPU on the runq */
keir@22523 960 runq_insert(ops, vc->processor, svc);
keir@22523 961 runq_tickle(ops, vc->processor, svc, now);
keir@21217 962
keir@21217 963 out:
keir@21217 964 d2printk("w-\n");
keir@21217 965 return;
keir@21217 966 }
keir@21217 967
keir@21217 968 static void
keir@21327 969 csched_context_saved(const struct scheduler *ops, struct vcpu *vc)
keir@21217 970 {
keir@21217 971 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@22523 972 s_time_t now = NOW();
keir@21217 973
keir@21217 974 vcpu_schedule_lock_irq(vc);
keir@21217 975
keir@22657 976 BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops, vc->processor));
keir@22657 977
keir@21217 978 /* This vcpu is now eligible to be put on the runqueue again */
keir@21217 979 clear_bit(__CSFLAG_scheduled, &svc->flags);
keir@21217 980
keir@21217 981 /* If someone wants it on the runqueue, put it there. */
keir@21217 982 /*
keir@21217 983 * NB: We can get rid of CSFLAG_scheduled by checking for
keir@21217 984 * vc->is_running and __vcpu_on_runq(svc) here. However,
keir@21217 985 * since we're accessing the flags cacheline anyway,
keir@21217 986 * it seems a bit pointless; especially as we have plenty of
keir@21217 987 * bits free.
keir@21217 988 */
keir@22664 989 if ( test_and_clear_bit(__CSFLAG_delayed_runq_add, &svc->flags)
keir@22664 990 && likely(vcpu_runnable(vc)) )
keir@21217 991 {
keir@22523 992 BUG_ON(__vcpu_on_runq(svc));
keir@21217 993
keir@22523 994 runq_insert(ops, vc->processor, svc);
keir@22523 995 runq_tickle(ops, vc->processor, svc, now);
keir@21217 996 }
keir@22658 997 else if ( !is_idle_vcpu(vc) )
keir@22662 998 update_load(ops, svc->rqd, svc, -1, now);
keir@21217 999
keir@21217 1000 vcpu_schedule_unlock_irq(vc);
keir@21217 1001 }
keir@21217 1002
keir@22665 1003 #define MAX_LOAD (1ULL<<60);
keir@21217 1004 static int
keir@22657 1005 choose_cpu(const struct scheduler *ops, struct vcpu *vc)
keir@21217 1006 {
keir@22659 1007 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22665 1008 int i, min_rqi = -1, new_cpu;
keir@22659 1009 struct csched_vcpu *svc = CSCHED_VCPU(vc);
keir@22665 1010 s_time_t min_avgload;
keir@22659 1011
keir@22659 1012 BUG_ON(cpus_empty(prv->active_queues));
keir@22659 1013
keir@22659 1014 /* Locking:
keir@22659 1015 * - vc->processor is already locked
keir@22659 1016 * - Need to grab prv lock to make sure active runqueues don't
keir@22659 1017 * change
keir@22659 1018 * - Need to grab locks for other runqueues while checking
keir@22659 1019 * avgload
keir@22659 1020 * Locking constraint is:
keir@22659 1021 * - Lock prv before runqueue locks
keir@22659 1022 * - Trylock between runqueue locks (no ordering)
keir@22659 1023 *
keir@22659 1024 * Since one of the runqueue locks is already held, we can't
keir@22659 1025 * just grab the prv lock. Instead, we'll have to trylock, and
keir@22659 1026 * do something else reasonable if we fail.
keir@22659 1027 */
keir@22659 1028
keir@22659 1029 if ( !spin_trylock(&prv->lock) )
keir@22659 1030 {
keir@22664 1031 if ( test_and_clear_bit(__CSFLAG_runq_migrate_request, &svc->flags) )
keir@22664 1032 {
keir@22664 1033 d2printk("d%dv%d -\n", svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id);
keir@22664 1034 clear_bit(__CSFLAG_runq_migrate_request, &svc->flags);
keir@22664 1035 }
keir@22659 1036 /* Leave it where it is for now. When we actually pay attention
keir@22659 1037 * to affinity we'll have to figure something out... */
keir@22659 1038 return vc->processor;
keir@22659 1039 }
keir@22659 1040
keir@22664 1041 /* First check to see if we're here because someone else suggested a place
keir@22664 1042 * for us to move. */
keir@22664 1043 if ( test_and_clear_bit(__CSFLAG_runq_migrate_request, &svc->flags) )
keir@22664 1044 {
keir@22664 1045 if ( unlikely(svc->migrate_rqd->id < 0) )
keir@22664 1046 {
keir@22664 1047 printk("%s: Runqueue migrate aborted because target runqueue disappeared!\n",
keir@22664 1048 __func__);
keir@22664 1049 /* Fall-through to normal cpu pick */
keir@22664 1050 }
keir@22664 1051 else
keir@22664 1052 {
keir@22664 1053 d2printk("d%dv%d +\n", svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id);
keir@22664 1054 new_cpu = first_cpu(svc->migrate_rqd->active);
keir@22664 1055 goto out_up;
keir@22664 1056 }
keir@22664 1057 }
keir@22664 1058
keir@22659 1059 /* FIXME: Pay attention to cpu affinity */
keir@22659 1060
keir@22665 1061 min_avgload = MAX_LOAD;
keir@22659 1062
keir@22659 1063 /* Find the runqueue with the lowest instantaneous load */
keir@22659 1064 for_each_cpu_mask(i, prv->active_queues)
keir@22659 1065 {
keir@22659 1066 struct csched_runqueue_data *rqd;
keir@22665 1067 s_time_t rqd_avgload;
keir@22659 1068
keir@22659 1069 rqd = prv->rqd + i;
keir@22659 1070
keir@22659 1071 /* If checking a different runqueue, grab the lock,
keir@22665 1072 * read the avg, and then release the lock.
keir@22665 1073 *
keir@22665 1074 * If on our own runqueue, don't grab or release the lock;
keir@22665 1075 * but subtract our own load from the runqueue load to simulate
keir@22665 1076 * impartiality */
keir@22665 1077 if ( rqd == svc->rqd )
keir@22665 1078 {
keir@22665 1079 rqd_avgload = rqd->b_avgload - svc->avgload;
keir@22665 1080 }
keir@22665 1081 else if ( spin_trylock(&rqd->lock) )
keir@22665 1082 {
keir@22665 1083 rqd_avgload = rqd->b_avgload;
keir@22665 1084 spin_unlock(&rqd->lock);
keir@22665 1085 }
keir@22665 1086 else
keir@22659 1087 continue;
keir@22665 1088
keir@22665 1089 if ( rqd_avgload < min_avgload )
keir@22659 1090 {
keir@22665 1091 min_avgload = rqd_avgload;
keir@22659 1092 min_rqi=i;
keir@22659 1093 }
keir@22659 1094 }
keir@22659 1095
keir@22659 1096 /* We didn't find anyone (most likely because of spinlock contention); leave it where it is */
keir@22659 1097 if ( min_rqi == -1 )
keir@22659 1098 new_cpu = vc->processor;
keir@22659 1099 else
keir@22659 1100 {
keir@22659 1101 BUG_ON(cpus_empty(prv->rqd[min_rqi].active));
keir@22659 1102 new_cpu = first_cpu(prv->rqd[min_rqi].active);
keir@22659 1103 }
keir@22664 1104
keir@22664 1105 out_up:
keir@22659 1106 spin_unlock(&prv->lock);
keir@22659 1107
keir@22659 1108 return new_cpu;
keir@21217 1109 }
keir@21217 1110
keir@22666 1111 static void balance_load(const struct scheduler *ops, int cpu, s_time_t now)
keir@22666 1112 {
keir@22666 1113 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22666 1114 int i, max_delta_rqi = -1;
keir@22666 1115 struct list_head *push_iter, *pull_iter;
keir@22666 1116
keir@22666 1117 /* NB: Modified by consider() */
keir@22666 1118 s_time_t load_delta;
keir@22666 1119 struct csched_vcpu * best_push_svc=NULL, *best_pull_svc=NULL;
keir@22666 1120 /* NB: Read by consider() */
keir@22666 1121 struct csched_runqueue_data *lrqd;
keir@22666 1122 struct csched_runqueue_data *orqd;
keir@22666 1123
keir@22666 1124 void consider(struct csched_vcpu *push_svc,
keir@22666 1125 struct csched_vcpu *pull_svc)
keir@22666 1126 {
keir@22666 1127 s_time_t l_load, o_load, delta;
keir@22666 1128
keir@22666 1129 l_load = lrqd->b_avgload;
keir@22666 1130 o_load = orqd->b_avgload;
keir@22666 1131 if ( push_svc )
keir@22666 1132 {
keir@22666 1133 /* What happens to the load on both if we push? */
keir@22666 1134 l_load -= push_svc->avgload;
keir@22666 1135 o_load += push_svc->avgload;
keir@22666 1136 }
keir@22666 1137 if ( pull_svc )
keir@22666 1138 {
keir@22666 1139 /* What happens to the load on both if we pull? */
keir@22666 1140 l_load += pull_svc->avgload;
keir@22666 1141 o_load -= pull_svc->avgload;
keir@22666 1142 }
keir@22666 1143
keir@22666 1144 delta = l_load - o_load;
keir@22666 1145 if ( delta < 0 )
keir@22666 1146 delta = -delta;
keir@22666 1147
keir@22666 1148 if ( delta < load_delta )
keir@22666 1149 {
keir@22666 1150 load_delta = delta;
keir@22666 1151 best_push_svc=push_svc;
keir@22666 1152 best_pull_svc=pull_svc;
keir@22666 1153 }
keir@22666 1154 }
keir@22666 1155
keir@22666 1156 void migrate(struct csched_vcpu *svc, struct csched_runqueue_data *trqd)
keir@22666 1157 {
keir@22666 1158 if ( test_bit(__CSFLAG_scheduled, &svc->flags) )
keir@22666 1159 {
keir@22666 1160 d2printk("d%dv%d %d-%d a\n", svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id,
keir@22666 1161 svc->rqd->id, trqd->id);
keir@22666 1162 /* It's running; mark it to migrate. */
keir@22666 1163 svc->migrate_rqd = trqd;
keir@22666 1164 set_bit(_VPF_migrating, &svc->vcpu->pause_flags);
keir@22666 1165 set_bit(__CSFLAG_runq_migrate_request, &svc->flags);
keir@22666 1166 }
keir@22666 1167 else
keir@22666 1168 {
keir@22666 1169 int on_runq=0;
keir@22666 1170 /* It's not running; just move it */
keir@22666 1171 d2printk("d%dv%d %d-%d i\n", svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id,
keir@22666 1172 svc->rqd->id, trqd->id);
keir@22666 1173 if ( __vcpu_on_runq(svc) )
keir@22666 1174 {
keir@22666 1175 __runq_remove(svc);
keir@22666 1176 update_load(ops, svc->rqd, svc, -1, now);
keir@22666 1177 on_runq=1;
keir@22666 1178 }
keir@22666 1179 __runq_deassign(svc);
keir@22666 1180 svc->vcpu->processor = first_cpu(trqd->active);
keir@22666 1181 __runq_assign(svc, trqd);
keir@22666 1182 if ( on_runq )
keir@22666 1183 {
keir@22666 1184 update_load(ops, svc->rqd, svc, 1, now);
keir@22666 1185 runq_insert(ops, svc->vcpu->processor, svc);
keir@22666 1186 runq_tickle(ops, svc->vcpu->processor, svc, now);
keir@22666 1187 }
keir@22666 1188 }
keir@22666 1189 }
keir@22666 1190
keir@22666 1191
keir@22666 1192 /*
keir@22666 1193 * Basic algorithm: Push, pull, or swap.
keir@22666 1194 * - Find the runqueue with the furthest load distance
keir@22666 1195 * - Find a pair that makes the difference the least (where one
keir@22666 1196 * on either side may be empty).
keir@22666 1197 */
keir@22666 1198
keir@22666 1199 /* Locking:
keir@22666 1200 * - pcpu schedule lock should be already locked
keir@22666 1201 */
keir@22666 1202 lrqd = RQD(ops, cpu);
keir@22666 1203
keir@22666 1204 __update_runq_load(ops, lrqd, 0, now);
keir@22666 1205
keir@22666 1206 retry:
keir@22666 1207 if ( !spin_trylock(&prv->lock) )
keir@22666 1208 return;
keir@22666 1209
keir@22666 1210 load_delta = 0;
keir@22666 1211
keir@22666 1212 for_each_cpu_mask(i, prv->active_queues)
keir@22666 1213 {
keir@22666 1214 s_time_t delta;
keir@22666 1215
keir@22666 1216 orqd = prv->rqd + i;
keir@22666 1217
keir@22666 1218 if ( orqd == lrqd
keir@22666 1219 || !spin_trylock(&orqd->lock) )
keir@22666 1220 continue;
keir@22666 1221
keir@22666 1222 __update_runq_load(ops, orqd, 0, now);
keir@22666 1223
keir@22666 1224 delta = lrqd->b_avgload - orqd->b_avgload;
keir@22666 1225 if ( delta < 0 )
keir@22666 1226 delta = -delta;
keir@22666 1227
keir@22666 1228 if ( delta > load_delta )
keir@22666 1229 {
keir@22666 1230 load_delta = delta;
keir@22666 1231 max_delta_rqi = i;
keir@22666 1232 }
keir@22666 1233
keir@22666 1234 spin_unlock(&orqd->lock);
keir@22666 1235 }
keir@22666 1236
keir@22666 1237 /* Minimize holding the big lock */
keir@22666 1238 spin_unlock(&prv->lock);
keir@22666 1239 if ( max_delta_rqi == -1 )
keir@22666 1240 goto out;
keir@22666 1241
keir@22667 1242 {
keir@22667 1243 s_time_t load_max;
keir@22667 1244 int cpus_max;
keir@22667 1245
keir@22667 1246
keir@22667 1247 load_max = lrqd->b_avgload;
keir@22667 1248 if ( orqd->b_avgload > load_max )
keir@22667 1249 load_max = orqd->b_avgload;
keir@22667 1250
keir@22667 1251 cpus_max=cpus_weight(lrqd->active);
keir@22667 1252 if ( cpus_weight(orqd->active) > cpus_max )
keir@22667 1253 cpus_max = cpus_weight(orqd->active);
keir@22666 1254
keir@22667 1255 /* If we're under 100% capacaty, only shift if load difference
keir@22667 1256 * is > 1. otherwise, shift if under 12.5% */
keir@22667 1257 if ( load_max < (1ULL<<(prv->load_window_shift))*cpus_max )
keir@22667 1258 {
keir@22667 1259 if ( load_delta < (1ULL<<(prv->load_window_shift+opt_underload_balance_tolerance) ) )
keir@22667 1260 goto out;
keir@22667 1261 }
keir@22667 1262 else
keir@22667 1263 if ( load_delta < (1ULL<<(prv->load_window_shift+opt_overload_balance_tolerance)) )
keir@22667 1264 goto out;
keir@22667 1265 }
keir@22667 1266
keir@22666 1267 /* Try to grab the other runqueue lock; if it's been taken in the
keir@22666 1268 * meantime, try the process over again. This can't deadlock
keir@22666 1269 * because if it doesn't get any other rqd locks, it will simply
keir@22666 1270 * give up and return. */
keir@22666 1271 orqd = prv->rqd + max_delta_rqi;
keir@22666 1272 if ( !spin_trylock(&orqd->lock) )
keir@22666 1273 goto retry;
keir@22666 1274
keir@22666 1275 /* Make sure the runqueue hasn't been deactivated since we released prv->lock */
keir@22666 1276 if ( unlikely(orqd->id < 0) )
keir@22666 1277 goto out_up;
keir@22666 1278
keir@22666 1279 /* Look for "swap" which gives the best load average
keir@22666 1280 * FIXME: O(n^2)! */
keir@22666 1281
keir@22666 1282 /* Reuse load delta (as we're trying to minimize it) */
keir@22666 1283 list_for_each( push_iter, &lrqd->svc )
keir@22666 1284 {
keir@22666 1285 int inner_load_updated = 0;
keir@22666 1286 struct csched_vcpu * push_svc = list_entry(push_iter, struct csched_vcpu, rqd_elem);
keir@22666 1287
keir@22666 1288 __update_svc_load(ops, push_svc, 0, now);
keir@22666 1289
keir@22666 1290 /* Skip this one if it's already been flagged to migrate */
keir@22666 1291 if ( test_bit(__CSFLAG_runq_migrate_request, &push_svc->flags) )
keir@22666 1292 continue;
keir@22666 1293
keir@22666 1294 list_for_each( pull_iter, &orqd->svc )
keir@22666 1295 {
keir@22666 1296 struct csched_vcpu * pull_svc = list_entry(pull_iter, struct csched_vcpu, rqd_elem);
keir@22666 1297
keir@22666 1298 if ( ! inner_load_updated )
keir@22666 1299 {
keir@22666 1300 __update_svc_load(ops, pull_svc, 0, now);
keir@22666 1301 }
keir@22666 1302
keir@22666 1303 /* Skip this one if it's already been flagged to migrate */
keir@22666 1304 if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) )
keir@22666 1305 continue;
keir@22666 1306
keir@22666 1307 consider(push_svc, pull_svc);
keir@22666 1308 }
keir@22666 1309
keir@22666 1310 inner_load_updated = 1;
keir@22666 1311
keir@22666 1312 /* Consider push only */
keir@22666 1313 consider(push_svc, NULL);
keir@22666 1314 }
keir@22666 1315
keir@22666 1316 list_for_each( pull_iter, &orqd->svc )
keir@22666 1317 {
keir@22666 1318 struct csched_vcpu * pull_svc = list_entry(pull_iter, struct csched_vcpu, rqd_elem);
keir@22666 1319
keir@22666 1320 /* Skip this one if it's already been flagged to migrate */
keir@22666 1321 if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) )
keir@22666 1322 continue;
keir@22666 1323
keir@22666 1324 /* Consider pull only */
keir@22666 1325 consider(NULL, pull_svc);
keir@22666 1326 }
keir@22666 1327
keir@22666 1328 /* OK, now we have some candidates; do the moving */
keir@22666 1329 if ( best_push_svc )
keir@22666 1330 migrate(best_push_svc, orqd);
keir@22666 1331 if ( best_pull_svc )
keir@22666 1332 migrate(best_pull_svc, lrqd);
keir@22666 1333
keir@22666 1334 out_up:
keir@22666 1335 spin_unlock(&orqd->lock);
keir@22666 1336
keir@22666 1337 out:
keir@22666 1338 return;
keir@22666 1339 }
keir@22666 1340
keir@21217 1341 static int
keir@22657 1342 csched_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
keir@22657 1343 {
keir@22657 1344 struct csched_vcpu * const svc = CSCHED_VCPU(vc);
keir@22657 1345 int new_cpu;
keir@22657 1346
keir@22657 1347 /* The scheduler interface doesn't have an explicit mechanism to
keir@22657 1348 * involve the choosable scheduler in the migrate process, so we
keir@22657 1349 * infer that a change may happen by the call to cpu_pick, and
keir@22657 1350 * remove it from the old runqueue while the lock for the old
keir@22657 1351 * runqueue is held. It can't be actively waiting to run. It
keir@22657 1352 * will be added to the new runqueue when it next wakes.
keir@22657 1353 *
keir@22659 1354 * If we want to be able to call pick() separately, we need to add
keir@22659 1355 * a mechansim to remove a vcpu from an old processor / runqueue
keir@22659 1356 * before releasing the lock. */
keir@22657 1357 BUG_ON(__vcpu_on_runq(svc));
keir@22657 1358
keir@22657 1359 new_cpu = choose_cpu(ops, vc);
keir@22657 1360 /* If we're suggesting moving to a different runqueue, remove it
keir@22657 1361 * from the old runqueue while we have the lock. It will be added
keir@22657 1362 * to the new one when it wakes. */
keir@22657 1363 if ( svc->rqd != NULL
keir@22657 1364 && RQD(ops, new_cpu) != svc->rqd )
keir@22657 1365 runq_deassign(ops, vc);
keir@22657 1366
keir@22657 1367 return new_cpu;
keir@22657 1368 }
keir@22657 1369
keir@22657 1370 static int
keir@21217 1371 csched_dom_cntl(
keir@21327 1372 const struct scheduler *ops,
keir@21217 1373 struct domain *d,
keir@21217 1374 struct xen_domctl_scheduler_op *op)
keir@21217 1375 {
keir@21217 1376 struct csched_dom * const sdom = CSCHED_DOM(d);
keir@21258 1377 struct csched_private *prv = CSCHED_PRIV(ops);
keir@21217 1378 unsigned long flags;
keir@21217 1379
keir@21217 1380 if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
keir@21217 1381 {
keir@21217 1382 op->u.credit2.weight = sdom->weight;
keir@21217 1383 }
keir@21217 1384 else
keir@21217 1385 {
keir@21217 1386 ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
keir@21217 1387
keir@21217 1388 if ( op->u.credit2.weight != 0 )
keir@21217 1389 {
keir@21217 1390 struct list_head *iter;
keir@21217 1391 int old_weight;
keir@21217 1392
keir@21217 1393 /* Must hold csched_priv lock to update sdom, runq lock to
keir@21217 1394 * update csvcs. */
keir@21258 1395 spin_lock_irqsave(&prv->lock, flags);
keir@21217 1396
keir@21217 1397 old_weight = sdom->weight;
keir@21217 1398
keir@21217 1399 sdom->weight = op->u.credit2.weight;
keir@21217 1400
keir@21217 1401 /* Update weights for vcpus, and max_weight for runqueues on which they reside */
keir@21217 1402 list_for_each ( iter, &sdom->vcpu )
keir@21217 1403 {
keir@21217 1404 struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);
keir@21217 1405
keir@21217 1406 /* NB: Locking order is important here. Because we grab this lock here, we
keir@21217 1407 * must never lock csched_priv.lock if we're holding a runqueue
keir@21217 1408 * lock. */
keir@21217 1409 vcpu_schedule_lock_irq(svc->vcpu);
keir@21217 1410
keir@22657 1411 BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
keir@22657 1412
keir@21217 1413 svc->weight = sdom->weight;
keir@22657 1414 update_max_weight(svc->rqd, svc->weight, old_weight);
keir@21217 1415
keir@21217 1416 vcpu_schedule_unlock_irq(svc->vcpu);
keir@21217 1417 }
keir@21217 1418
keir@21258 1419 spin_unlock_irqrestore(&prv->lock, flags);
keir@21217 1420 }
keir@21217 1421 }
keir@21217 1422
keir@21217 1423 return 0;
keir@21217 1424 }
keir@21217 1425
keir@21258 1426 static void *
keir@21327 1427 csched_alloc_domdata(const struct scheduler *ops, struct domain *dom)
keir@21217 1428 {
keir@21217 1429 struct csched_dom *sdom;
keir@21217 1430 int flags;
keir@21217 1431
keir@21217 1432 sdom = xmalloc(struct csched_dom);
keir@21217 1433 if ( sdom == NULL )
keir@21258 1434 return NULL;
keir@21258 1435 memset(sdom, 0, sizeof(*sdom));
keir@21217 1436
keir@21217 1437 /* Initialize credit and weight */
keir@21217 1438 INIT_LIST_HEAD(&sdom->vcpu);
keir@21217 1439 INIT_LIST_HEAD(&sdom->sdom_elem);
keir@21217 1440 sdom->dom = dom;
keir@21217 1441 sdom->weight = CSCHED_DEFAULT_WEIGHT;
keir@21217 1442 sdom->nr_vcpus = 0;
keir@21217 1443
keir@21258 1444 spin_lock_irqsave(&CSCHED_PRIV(ops)->lock, flags);
keir@21258 1445
keir@21258 1446 list_add_tail(&sdom->sdom_elem, &CSCHED_PRIV(ops)->sdom);
keir@21217 1447
keir@21258 1448 spin_unlock_irqrestore(&CSCHED_PRIV(ops)->lock, flags);
keir@21258 1449
keir@21258 1450 return (void *)sdom;
keir@21258 1451 }
keir@21217 1452
keir@21258 1453 static int
keir@21327 1454 csched_dom_init(const struct scheduler *ops, struct domain *dom)
keir@21258 1455 {
keir@21258 1456 struct csched_dom *sdom;
keir@21258 1457
keir@21258 1458 printk("%s: Initializing domain %d\n", __func__, dom->domain_id);
keir@21217 1459
keir@21258 1460 if ( is_idle_domain(dom) )
keir@21258 1461 return 0;
keir@21258 1462
keir@21258 1463 sdom = csched_alloc_domdata(ops, dom);
keir@21258 1464 if ( sdom == NULL )
keir@21258 1465 return -ENOMEM;
keir@21258 1466
keir@21258 1467 dom->sched_priv = sdom;
keir@21217 1468
keir@21217 1469 return 0;
keir@21217 1470 }
keir@21217 1471
keir@21217 1472 static void
keir@21327 1473 csched_free_domdata(const struct scheduler *ops, void *data)
keir@21258 1474 {
keir@21258 1475 int flags;
keir@21258 1476 struct csched_dom *sdom = data;
keir@21258 1477
keir@21258 1478 spin_lock_irqsave(&CSCHED_PRIV(ops)->lock, flags);
keir@21258 1479
keir@21258 1480 list_del_init(&sdom->sdom_elem);
keir@21258 1481
keir@21258 1482 spin_unlock_irqrestore(&CSCHED_PRIV(ops)->lock, flags);
keir@21258 1483
keir@21258 1484 xfree(data);
keir@21258 1485 }
keir@21258 1486
keir@21258 1487 static void
keir@21327 1488 csched_dom_destroy(const struct scheduler *ops, struct domain *dom)
keir@21217 1489 {
keir@21217 1490 struct csched_dom *sdom = CSCHED_DOM(dom);
keir@21217 1491
keir@21217 1492 BUG_ON(!list_empty(&sdom->vcpu));
keir@21217 1493
keir@21258 1494 csched_free_domdata(ops, CSCHED_DOM(dom));
keir@21217 1495 }
keir@21217 1496
keir@21217 1497 /* How long should we let this vcpu run for? */
keir@21217 1498 static s_time_t
keir@21327 1499 csched_runtime(const struct scheduler *ops, int cpu, struct csched_vcpu *snext)
keir@21217 1500 {
keir@21217 1501 s_time_t time = CSCHED_MAX_TIMER;
keir@21258 1502 struct csched_runqueue_data *rqd = RQD(ops, cpu);
keir@21217 1503 struct list_head *runq = &rqd->runq;
keir@21217 1504
keir@21217 1505 if ( is_idle_vcpu(snext->vcpu) )
keir@21217 1506 return CSCHED_MAX_TIMER;
keir@21217 1507
keir@21217 1508 /* Basic time */
keir@21217 1509 time = c2t(rqd, snext->credit, snext);
keir@21217 1510
keir@21217 1511 /* Next guy on runqueue */
keir@21217 1512 if ( ! list_empty(runq) )
keir@21217 1513 {
keir@21217 1514 struct csched_vcpu *svc = __runq_elem(runq->next);
keir@21217 1515 s_time_t ntime;
keir@21217 1516
keir@21217 1517 if ( ! is_idle_vcpu(svc->vcpu) )
keir@21217 1518 {
keir@21217 1519 ntime = c2t(rqd, snext->credit - svc->credit, snext);
keir@21217 1520
keir@21217 1521 if ( time > ntime )
keir@21217 1522 time = ntime;
keir@21217 1523 }
keir@21217 1524 }
keir@21217 1525
keir@21217 1526 /* Check limits */
keir@21217 1527 if ( time < CSCHED_MIN_TIMER )
keir@21217 1528 time = CSCHED_MIN_TIMER;
keir@21217 1529 else if ( time > CSCHED_MAX_TIMER )
keir@21217 1530 time = CSCHED_MAX_TIMER;
keir@21217 1531
keir@21217 1532 return time;
keir@21217 1533 }
keir@21217 1534
keir@21217 1535 void __dump_execstate(void *unused);
keir@21217 1536
keir@21217 1537 /*
keir@22526 1538 * Find a candidate.
keir@22526 1539 */
keir@22526 1540 static struct csched_vcpu *
keir@22526 1541 runq_candidate(struct csched_runqueue_data *rqd,
keir@22526 1542 struct csched_vcpu *scurr,
keir@22526 1543 int cpu, s_time_t now)
keir@22526 1544 {
keir@22526 1545 struct list_head *iter;
keir@22526 1546 struct csched_vcpu *snext = NULL;
keir@22526 1547
keir@22526 1548 /* Default to current if runnable, idle otherwise */
keir@22526 1549 if ( vcpu_runnable(scurr->vcpu) )
keir@22526 1550 snext = scurr;
keir@22526 1551 else
keir@22526 1552 snext = CSCHED_VCPU(idle_vcpu[cpu]);
keir@22526 1553
keir@22526 1554 list_for_each( iter, &rqd->runq )
keir@22526 1555 {
keir@22526 1556 struct csched_vcpu * svc = list_entry(iter, struct csched_vcpu, runq_elem);
keir@22526 1557
keir@22526 1558 /* If this is on a different processor, don't pull it unless
keir@22526 1559 * its credit is at least CSCHED_MIGRATE_RESIST higher. */
keir@22526 1560 if ( svc->vcpu->processor != cpu
keir@22526 1561 && snext->credit + CSCHED_MIGRATE_RESIST > svc->credit )
keir@22526 1562 continue;
keir@22526 1563
keir@22526 1564 /* If the next one on the list has more credit than current
keir@22526 1565 * (or idle, if current is not runnable), choose it. */
keir@22526 1566 if ( svc->credit > snext->credit )
keir@22526 1567 snext = svc;
keir@22526 1568
keir@22526 1569 /* In any case, if we got this far, break. */
keir@22526 1570 break;
keir@22526 1571
keir@22526 1572 }
keir@22526 1573
keir@22526 1574 return snext;
keir@22526 1575 }
keir@22526 1576
keir@22526 1577 /*
keir@21217 1578 * This function is in the critical path. It is designed to be simple and
keir@21217 1579 * fast for the common case.
keir@21217 1580 */
keir@21217 1581 static struct task_slice
keir@21390 1582 csched_schedule(
keir@21390 1583 const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled)
keir@21217 1584 {
keir@21217 1585 const int cpu = smp_processor_id();
keir@22656 1586 struct csched_runqueue_data *rqd;
keir@21217 1587 struct csched_vcpu * const scurr = CSCHED_VCPU(current);
keir@21217 1588 struct csched_vcpu *snext = NULL;
keir@21217 1589 struct task_slice ret;
keir@21217 1590
keir@21217 1591 CSCHED_VCPU_CHECK(current);
keir@21217 1592
keir@21217 1593 d2printk("sc p%d c d%dv%d now %"PRI_stime"\n",
keir@21217 1594 cpu,
keir@21217 1595 scurr->vcpu->domain->domain_id,
keir@21217 1596 scurr->vcpu->vcpu_id,
keir@21217 1597 now);
keir@21217 1598
keir@22656 1599 BUG_ON(!cpu_isset(cpu, CSCHED_PRIV(ops)->initialized));
keir@22656 1600
keir@22656 1601 rqd = RQD(ops, cpu);
keir@22656 1602 BUG_ON(!cpu_isset(cpu, rqd->active));
keir@21217 1603
keir@22657 1604 /* Protected by runqueue lock */
keir@22657 1605
keir@22657 1606 BUG_ON(!is_idle_vcpu(scurr->vcpu) && scurr->rqd != rqd);
keir@21217 1607
keir@22376 1608 /* Clear "tickled" bit now that we've been scheduled */
keir@22376 1609 if ( cpu_isset(cpu, rqd->tickled) )
keir@22376 1610 cpu_clear(cpu, rqd->tickled);
keir@22376 1611
keir@21217 1612 /* Update credits */
keir@21217 1613 burn_credits(rqd, scurr, now);
keir@21217 1614
keir@21217 1615 /*
keir@21217 1616 * Select next runnable local VCPU (ie top of local runq).
keir@21217 1617 *
keir@21217 1618 * If the current vcpu is runnable, and has higher credit than
keir@21398 1619 * the next guy on the queue (or there is noone else), we want to
keir@21398 1620 * run him again.
keir@21398 1621 *
keir@21398 1622 * If there's tasklet work to do, we want to chose the idle vcpu
keir@21398 1623 * for this processor, and mark the current for delayed runqueue
keir@21398 1624 * add.
keir@21217 1625 *
keir@22526 1626 * If the current vcpu is runnable, and there's another runnable
keir@22526 1627 * candidate, we want to mark current for delayed runqueue add,
keir@22526 1628 * and remove the next guy from the queue.
keir@21217 1629 *
keir@21217 1630 * If the current vcpu is not runnable, we want to chose the idle
keir@21217 1631 * vcpu for this processor.
keir@21217 1632 */
keir@22377 1633 if ( tasklet_work_scheduled )
keir@21217 1634 {
keir@22526 1635 trace_var(TRC_CSCHED2_SCHED_TASKLET, 0, 0, NULL);
keir@22526 1636 snext = CSCHED_VCPU(idle_vcpu[cpu]);
keir@21217 1637 }
keir@22526 1638 else
keir@22526 1639 snext=runq_candidate(rqd, scurr, cpu, now);
keir@21217 1640
keir@22526 1641 /* If switching from a non-idle runnable vcpu, put it
keir@22526 1642 * back on the runqueue. */
keir@22526 1643 if ( snext != scurr
keir@22526 1644 && !is_idle_vcpu(scurr->vcpu)
keir@22526 1645 && vcpu_runnable(current) )
keir@22526 1646 set_bit(__CSFLAG_delayed_runq_add, &scurr->flags);
keir@21217 1647
keir@21671 1648 ret.migrated = 0;
keir@21671 1649
keir@22526 1650 /* Accounting for non-idle tasks */
keir@21217 1651 if ( !is_idle_vcpu(snext->vcpu) )
keir@21217 1652 {
keir@22526 1653 /* If switching, remove this from the runqueue and mark it scheduled */
keir@22526 1654 if ( snext != scurr )
keir@22526 1655 {
keir@22657 1656 BUG_ON(snext->rqd != rqd);
keir@22657 1657
keir@22526 1658 __runq_remove(snext);
keir@22526 1659 if ( snext->vcpu->is_running )
keir@22526 1660 {
keir@22526 1661 printk("p%d: snext d%dv%d running on p%d! scurr d%dv%d\n",
keir@22526 1662 cpu,
keir@22526 1663 snext->vcpu->domain->domain_id, snext->vcpu->vcpu_id,
keir@22526 1664 snext->vcpu->processor,
keir@22526 1665 scurr->vcpu->domain->domain_id,
keir@22526 1666 scurr->vcpu->vcpu_id);
keir@22526 1667 BUG();
keir@22526 1668 }
keir@22526 1669 set_bit(__CSFLAG_scheduled, &snext->flags);
keir@22526 1670 }
keir@22526 1671
keir@22526 1672 /* Check for the reset condition */
keir@22526 1673 if ( snext->credit <= CSCHED_CREDIT_RESET )
keir@22666 1674 {
keir@22526 1675 reset_credit(ops, cpu, now);
keir@22666 1676 balance_load(ops, cpu, now);
keir@22666 1677 }
keir@22526 1678
keir@22526 1679 /* Clear the idle mask if necessary */
keir@22526 1680 if ( cpu_isset(cpu, rqd->idle) )
keir@22526 1681 cpu_clear(cpu, rqd->idle);
keir@22526 1682
keir@21217 1683 snext->start_time = now;
keir@22526 1684
keir@21671 1685 /* Safe because lock for old processor is held */
keir@21671 1686 if ( snext->vcpu->processor != cpu )
keir@21671 1687 {
keir@22526 1688 snext->credit += CSCHED_MIGRATE_COMPENSATION;
keir@21671 1689 snext->vcpu->processor = cpu;
keir@21671 1690 ret.migrated = 1;
keir@21671 1691 }
keir@21217 1692 }
keir@22526 1693 else
keir@22526 1694 {
keir@22526 1695 /* Update the idle mask if necessary */
keir@22526 1696 if ( !cpu_isset(cpu, rqd->idle) )
keir@22526 1697 cpu_set(cpu, rqd->idle);
keir@22658 1698 /* Make sure avgload gets updated periodically even
keir@22658 1699 * if there's no activity */
keir@22662 1700 update_load(ops, rqd, NULL, 0, now);
keir@22526 1701 }
keir@21243 1702
keir@21217 1703 /*
keir@21217 1704 * Return task to run next...
keir@21217 1705 */
keir@21258 1706 ret.time = csched_runtime(ops, cpu, snext);
keir@21217 1707 ret.task = snext->vcpu;
keir@21217 1708
keir@21217 1709 CSCHED_VCPU_CHECK(ret.task);
keir@21217 1710 return ret;
keir@21217 1711 }
keir@21217 1712
keir@21217 1713 static void
keir@21217 1714 csched_dump_vcpu(struct csched_vcpu *svc)
keir@21217 1715 {
keir@21217 1716 printk("[%i.%i] flags=%x cpu=%i",
keir@21217 1717 svc->vcpu->domain->domain_id,
keir@21217 1718 svc->vcpu->vcpu_id,
keir@21217 1719 svc->flags,
keir@21217 1720 svc->vcpu->processor);
keir@21217 1721
keir@21217 1722 printk(" credit=%" PRIi32" [w=%u]", svc->credit, svc->weight);
keir@21217 1723
keir@21217 1724 printk("\n");
keir@21217 1725 }
keir@21217 1726
keir@21217 1727 static void
keir@21327 1728 csched_dump_pcpu(const struct scheduler *ops, int cpu)
keir@21217 1729 {
keir@21217 1730 struct list_head *runq, *iter;
keir@21217 1731 struct csched_vcpu *svc;
keir@21217 1732 int loop;
keir@21217 1733 char cpustr[100];
keir@21217 1734
keir@21217 1735 /* FIXME: Do locking properly for access to runqueue structures */
keir@21217 1736
keir@21258 1737 runq = &RQD(ops, cpu)->runq;
keir@21217 1738
keir@21217 1739 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map,cpu));
keir@21217 1740 printk(" sibling=%s, ", cpustr);
keir@21217 1741 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map,cpu));
keir@21217 1742 printk("core=%s\n", cpustr);
keir@21217 1743
keir@21217 1744 /* current VCPU */
keir@21217 1745 svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
keir@21217 1746 if ( svc )
keir@21217 1747 {
keir@21217 1748 printk("\trun: ");
keir@21217 1749 csched_dump_vcpu(svc);
keir@21217 1750 }
keir@21217 1751
keir@21217 1752 loop = 0;
keir@21217 1753 list_for_each( iter, runq )
keir@21217 1754 {
keir@21217 1755 svc = __runq_elem(iter);
keir@21217 1756 if ( svc )
keir@21217 1757 {
keir@21217 1758 printk("\t%3d: ", ++loop);
keir@21217 1759 csched_dump_vcpu(svc);
keir@21217 1760 }
keir@21217 1761 }
keir@21217 1762 }
keir@21217 1763
keir@21217 1764 static void
keir@21327 1765 csched_dump(const struct scheduler *ops)
keir@21217 1766 {
keir@21217 1767 struct list_head *iter_sdom, *iter_svc;
keir@21258 1768 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22656 1769 int i, loop;
keir@21217 1770
keir@22656 1771 printk("Active queues: %d\n"
keir@21217 1772 "\tdefault-weight = %d\n",
keir@22656 1773 cpus_weight(prv->active_queues),
keir@21217 1774 CSCHED_DEFAULT_WEIGHT);
keir@22656 1775 for_each_cpu_mask(i, prv->active_queues)
keir@22656 1776 {
keir@22668 1777 s_time_t fraction;
keir@22668 1778
keir@22668 1779 fraction = prv->rqd[i].avgload * 100 / (1ULL<<prv->load_window_shift);
keir@22668 1780
keir@22656 1781 printk("Runqueue %d:\n"
keir@22656 1782 "\tncpus = %u\n"
keir@22658 1783 "\tmax_weight = %d\n"
keir@22668 1784 "\tinstload = %d\n"
keir@22685 1785 "\taveload = %3"PRI_stime"\n",
keir@22656 1786 i,
keir@22656 1787 cpus_weight(prv->rqd[i].active),
keir@22658 1788 prv->rqd[i].max_weight,
keir@22668 1789 prv->rqd[i].load,
keir@22668 1790 fraction);
keir@21217 1791
keir@22656 1792 }
keir@21217 1793 /* FIXME: Locking! */
keir@21217 1794
keir@22377 1795 printk("Domain info:\n");
keir@21217 1796 loop = 0;
keir@21258 1797 list_for_each( iter_sdom, &prv->sdom )
keir@21217 1798 {
keir@21217 1799 struct csched_dom *sdom;
keir@21217 1800 sdom = list_entry(iter_sdom, struct csched_dom, sdom_elem);
keir@21217 1801
keir@22377 1802 printk("\tDomain: %d w %d v %d\n\t",
keir@22377 1803 sdom->dom->domain_id,
keir@22377 1804 sdom->weight,
keir@22377 1805 sdom->nr_vcpus);
keir@22377 1806
keir@21217 1807 list_for_each( iter_svc, &sdom->vcpu )
keir@21217 1808 {
keir@21217 1809 struct csched_vcpu *svc;
keir@21217 1810 svc = list_entry(iter_svc, struct csched_vcpu, sdom_elem);
keir@21217 1811
keir@21217 1812 printk("\t%3d: ", ++loop);
keir@21217 1813 csched_dump_vcpu(svc);
keir@21217 1814 }
keir@21217 1815 }
keir@21217 1816 }
keir@21217 1817
keir@22656 1818 static void activate_runqueue(struct csched_private *prv, int rqi)
keir@21367 1819 {
keir@22656 1820 struct csched_runqueue_data *rqd;
keir@22656 1821
keir@22656 1822 rqd = prv->rqd + rqi;
keir@22656 1823
keir@22656 1824 BUG_ON(!cpus_empty(rqd->active));
keir@22656 1825
keir@22656 1826 rqd->max_weight = 1;
keir@22656 1827 rqd->id = rqi;
keir@22656 1828 INIT_LIST_HEAD(&rqd->svc);
keir@22656 1829 INIT_LIST_HEAD(&rqd->runq);
keir@22656 1830 spin_lock_init(&rqd->lock);
keir@22656 1831
keir@22656 1832 cpu_set(rqi, prv->active_queues);
keir@22656 1833 }
keir@22656 1834
keir@22656 1835 static void deactivate_runqueue(struct csched_private *prv, int rqi)
keir@22656 1836 {
keir@22656 1837 struct csched_runqueue_data *rqd;
keir@22656 1838
keir@22656 1839 rqd = prv->rqd + rqi;
keir@22656 1840
keir@22656 1841 BUG_ON(!cpus_empty(rqd->active));
keir@22656 1842
keir@22656 1843 rqd->id = -1;
keir@22656 1844
keir@22656 1845 cpu_clear(rqi, prv->active_queues);
keir@22656 1846 }
keir@22656 1847
keir@22656 1848 static void init_pcpu(const struct scheduler *ops, int cpu)
keir@22656 1849 {
keir@22656 1850 int rqi, old_rqi, flags;
keir@21367 1851 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22656 1852 struct csched_runqueue_data *rqd;
keir@22656 1853 spinlock_t *old_lock;
keir@21367 1854
keir@21367 1855 spin_lock_irqsave(&prv->lock, flags);
keir@22656 1856
keir@22656 1857 if ( cpu_isset(cpu, prv->initialized) )
keir@22656 1858 {
keir@22656 1859 printk("%s: Strange, cpu %d already initialized!\n", __func__, cpu);
keir@22656 1860 spin_unlock_irqrestore(&prv->lock, flags);
keir@22656 1861 return;
keir@22656 1862 }
keir@22656 1863
keir@22656 1864 old_rqi = prv->runq_map[cpu];
keir@22656 1865
keir@22656 1866 /* Figure out which runqueue to put it in */
keir@22656 1867 rqi = 0;
keir@22656 1868
keir@22660 1869 /* Figure out which runqueue to put it in */
keir@22660 1870 /* NB: cpu 0 doesn't get a STARTING callback, so we hard-code it to runqueue 0. */
keir@22660 1871 if ( cpu == 0 )
keir@22660 1872 rqi = 0;
keir@22660 1873 else
keir@22660 1874 rqi = cpu_to_socket(cpu);
keir@22660 1875
keir@22660 1876 if ( rqi < 0 )
keir@22660 1877 {
keir@22660 1878 printk("%s: cpu_to_socket(%d) returned %d!\n",
keir@22660 1879 __func__, cpu, rqi);
keir@22660 1880 BUG();
keir@22660 1881 }
keir@22660 1882
keir@22656 1883 rqd=prv->rqd + rqi;
keir@22656 1884
keir@22656 1885 printk("Adding cpu %d to runqueue %d\n", cpu, rqi);
keir@22656 1886 if ( ! cpu_isset(rqi, prv->active_queues) )
keir@22656 1887 {
keir@22656 1888 printk(" First cpu on runqueue, activating\n");
keir@22656 1889 activate_runqueue(prv, rqi);
keir@22656 1890 }
keir@22656 1891
keir@22656 1892 /* IRQs already disabled */
keir@22656 1893 old_lock=pcpu_schedule_lock(cpu);
keir@22656 1894
keir@22656 1895 /* Move spinlock to new runq lock. */
keir@22656 1896 per_cpu(schedule_data, cpu).schedule_lock = &rqd->lock;
keir@22656 1897
keir@22656 1898 /* Set the runqueue map */
keir@22656 1899 prv->runq_map[cpu]=rqi;
keir@22656 1900
keir@22656 1901 cpu_set(cpu, rqd->idle);
keir@22656 1902 cpu_set(cpu, rqd->active);
keir@22656 1903
keir@22656 1904 spin_unlock(old_lock);
keir@22656 1905
keir@22656 1906 cpu_set(cpu, prv->initialized);
keir@22656 1907
keir@21367 1908 spin_unlock_irqrestore(&prv->lock, flags);
keir@21367 1909
keir@21367 1910 return;
keir@21367 1911 }
keir@21367 1912
keir@21367 1913 static void *
keir@21367 1914 csched_alloc_pdata(const struct scheduler *ops, int cpu)
keir@21367 1915 {
keir@22660 1916 /* Check to see if the cpu is online yet */
keir@22660 1917 /* Note: cpu 0 doesn't get a STARTING callback */
keir@22660 1918 if ( cpu == 0 || cpu_to_socket(cpu) >= 0 )
keir@22660 1919 init_pcpu(ops, cpu);
keir@22660 1920 else
keir@22660 1921 printk("%s: cpu %d not online yet, deferring initializatgion\n",
keir@22660 1922 __func__, cpu);
keir@21367 1923
keir@21367 1924 return (void *)1;
keir@21367 1925 }
keir@21367 1926
keir@21367 1927 static void
keir@22656 1928 csched_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
keir@21217 1929 {
keir@22656 1930 unsigned long flags;
keir@22656 1931 struct csched_private *prv = CSCHED_PRIV(ops);
keir@22656 1932 struct csched_runqueue_data *rqd;
keir@22656 1933 int rqi;
keir@22656 1934
keir@22656 1935 spin_lock_irqsave(&prv->lock, flags);
keir@22656 1936
keir@22656 1937 BUG_ON( !cpu_isset(cpu, prv->initialized));
keir@22656 1938
keir@22656 1939 /* Find the old runqueue and remove this cpu from it */
keir@22656 1940 rqi = prv->runq_map[cpu];
keir@22656 1941
keir@22656 1942 rqd = prv->rqd + rqi;
keir@22656 1943
keir@22656 1944 /* No need to save IRQs here, they're already disabled */
keir@22656 1945 spin_lock(&rqd->lock);
keir@22656 1946
keir@22656 1947 BUG_ON(!cpu_isset(cpu, rqd->idle));
keir@22656 1948
keir@22656 1949 printk("Removing cpu %d from runqueue %d\n", cpu, rqi);
keir@22656 1950
keir@22656 1951 cpu_clear(cpu, rqd->idle);
keir@22656 1952 cpu_clear(cpu, rqd->active);
keir@22656 1953
keir@22656 1954 if ( cpus_empty(rqd->active) )
keir@22656 1955 {
keir@22656 1956 printk(" No cpus left on runqueue, disabling\n");
keir@22656 1957 deactivate_runqueue(prv, rqi);
keir@22656 1958 }
keir@22656 1959
keir@22656 1960 spin_unlock(&rqd->lock);
keir@22656 1961
keir@22656 1962 cpu_clear(cpu, prv->initialized);
keir@22656 1963
keir@22656 1964 spin_unlock_irqrestore(&prv->lock, flags);
keir@22656 1965
keir@22656 1966 return;
keir@21217 1967 }
keir@21217 1968
keir@21258 1969 static int
keir@22660 1970 csched_cpu_starting(int cpu)
keir@22660 1971 {
keir@22660 1972 struct scheduler *ops;
keir@22660 1973
keir@22660 1974 /* Hope this is safe from cpupools switching things around. :-) */
keir@22660 1975 ops = per_cpu(scheduler, cpu);
keir@22660 1976
keir@22660 1977 init_pcpu(ops, cpu);
keir@22660 1978
keir@22660 1979 return NOTIFY_DONE;
keir@22660 1980 }
keir@22660 1981
keir@22660 1982 static int cpu_credit2_callback(
keir@22660 1983 struct notifier_block *nfb, unsigned long action, void *hcpu)
keir@22660 1984 {
keir@22660 1985 unsigned int cpu = (unsigned long)hcpu;
keir@22660 1986 int rc = 0;
keir@22660 1987
keir@22660 1988 switch ( action )
keir@22660 1989 {
keir@22660 1990 case CPU_STARTING:
keir@22660 1991 csched_cpu_starting(cpu);
keir@22660 1992 break;
keir@22660 1993 default:
keir@22660 1994 break;
keir@22660 1995 }
keir@22660 1996
keir@22660 1997 return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
keir@22660 1998 }
keir@22660 1999
keir@22660 2000 static struct notifier_block cpu_credit2_nfb = {
keir@22660 2001 .notifier_call = cpu_credit2_callback
keir@22660 2002 };
keir@22660 2003
keir@22660 2004 static int
keir@21453 2005 csched_init(struct scheduler *ops)
keir@21217 2006 {
keir@21217 2007 int i;
keir@21258 2008 struct csched_private *prv;
keir@21217 2009
keir@21217 2010 printk("Initializing Credit2 scheduler\n" \
keir@21217 2011 " WARNING: This is experimental software in development.\n" \
keir@21217 2012 " Use at your own risk.\n");
keir@21217 2013
keir@22661 2014 printk(" load_window_shift: %d\n", opt_load_window_shift);
keir@22667 2015 printk(" underload_balance_tolerance: %d\n", opt_underload_balance_tolerance);
keir@22667 2016 printk(" overload_balance_tolerance: %d\n", opt_overload_balance_tolerance);
keir@22661 2017
keir@22661 2018 if ( opt_load_window_shift < LOADAVG_WINDOW_SHIFT_MIN )
keir@22661 2019 {
keir@22661 2020 printk("%s: opt_load_window_shift %d below min %d, resetting\n",
keir@22661 2021 __func__, opt_load_window_shift, LOADAVG_WINDOW_SHIFT_MIN);
keir@22661 2022 opt_load_window_shift = LOADAVG_WINDOW_SHIFT_MIN;
keir@22661 2023 }
keir@22661 2024
keir@22660 2025 /* Basically no CPU information is available at this point; just
keir@22660 2026 * set up basic structures, and a callback when the CPU info is
keir@22660 2027 * available. */
keir@22660 2028
keir@21258 2029 prv = xmalloc(struct csched_private);
keir@21258 2030 if ( prv == NULL )
keir@21453 2031 return -ENOMEM;
keir@21258 2032 memset(prv, 0, sizeof(*prv));
keir@21367 2033 ops->sched_data = prv;
keir@21258 2034 spin_lock_init(&prv->lock);
keir@21258 2035 INIT_LIST_HEAD(&prv->sdom);
keir@21217 2036
keir@22660 2037 register_cpu_notifier(&cpu_credit2_nfb);
keir@22660 2038
keir@22656 2039 /* But un-initialize all runqueues */
keir@22656 2040 for ( i=0; i<NR_CPUS; i++)
keir@21217 2041 {
keir@22656 2042 prv->runq_map[i] = -1;
keir@22656 2043 prv->rqd[i].id = -1;
keir@21217 2044 }
keir@21217 2045
keir@22661 2046 prv->load_window_shift = opt_load_window_shift;
keir@22661 2047
keir@21258 2048 return 0;
keir@21217 2049 }
keir@21217 2050
keir@21258 2051 static void
keir@21327 2052 csched_deinit(const struct scheduler *ops)
keir@21258 2053 {
keir@21258 2054 struct csched_private *prv;
keir@21258 2055
keir@21258 2056 prv = CSCHED_PRIV(ops);
keir@21258 2057 if ( prv != NULL )
keir@21258 2058 xfree(prv);
keir@21258 2059 }
keir@21258 2060
keir@21258 2061
keir@21258 2062 static struct csched_private _csched_priv;
keir@21258 2063
keir@21327 2064 const struct scheduler sched_credit2_def = {
keir@21217 2065 .name = "SMP Credit Scheduler rev2",
keir@21217 2066 .opt_name = "credit2",
keir@21217 2067 .sched_id = XEN_SCHEDULER_CREDIT2,
keir@21258 2068 .sched_data = &_csched_priv,
keir@21217 2069
keir@21217 2070 .init_domain = csched_dom_init,
keir@21217 2071 .destroy_domain = csched_dom_destroy,
keir@21217 2072
keir@21258 2073 .insert_vcpu = csched_vcpu_insert,
keir@22324 2074 .remove_vcpu = csched_vcpu_remove,
keir@21217 2075
keir@21217 2076 .sleep = csched_vcpu_sleep,
keir@21217 2077 .wake = csched_vcpu_wake,
keir@21217 2078
keir@21217 2079 .adjust = csched_dom_cntl,
keir@21217 2080
keir@21217 2081 .pick_cpu = csched_cpu_pick,
keir@21217 2082 .do_schedule = csched_schedule,
keir@21217 2083 .context_saved = csched_context_saved,
keir@21217 2084
keir@21217 2085 .dump_cpu_state = csched_dump_pcpu,
keir@21217 2086 .dump_settings = csched_dump,
keir@21217 2087 .init = csched_init,
keir@21258 2088 .deinit = csched_deinit,
keir@21258 2089 .alloc_vdata = csched_alloc_vdata,
keir@21258 2090 .free_vdata = csched_free_vdata,
keir@21367 2091 .alloc_pdata = csched_alloc_pdata,
keir@21367 2092 .free_pdata = csched_free_pdata,
keir@21258 2093 .alloc_domdata = csched_alloc_domdata,
keir@21258 2094 .free_domdata = csched_free_domdata,
keir@21217 2095 };