]> xenbits.xen.org Git - xenclient/xen.git/commitdiff
patch bcredit
authorVincent Hanquez <vincent@snarc.org>
Wed, 17 Dec 2008 17:32:02 +0000 (17:32 +0000)
committerVincent Hanquez <vincent@snarc.org>
Wed, 17 Dec 2008 17:32:02 +0000 (17:32 +0000)
13 files changed:
tools/libxc/xc_csched.c
tools/libxc/xenctrl.h
tools/python/xen/lowlevel/xc/xc.c
tools/python/xen/xend/XendAPI.py
tools/python/xen/xend/XendConfig.py
tools/python/xen/xend/XendDomain.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/XendNode.py
tools/python/xen/xm/main.py
xen/common/sched_credit.c
xen/common/schedule.c
xen/drivers/passthrough/iommu.c
xen/include/public/domctl.h

index 4ea986fae2d726d4dec8bb0bd31148277bc7312b..e8f590f92a9906072759bd8f8b9a376f6046b7df 100644 (file)
@@ -48,3 +48,41 @@ xc_sched_credit_domain_get(
 
     return err;
 }
+
+int
+xc_sched_bcredit_domain_set(
+    int xc_handle,
+    uint32_t domid,
+    struct xen_domctl_sched_bcredit *sdom)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_scheduler_op;
+    domctl.domain = (domid_t) domid;
+    domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_BCREDIT;
+    domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo;
+    domctl.u.scheduler_op.u.bcredit = *sdom;
+
+    return do_domctl(xc_handle, &domctl);
+}
+
+int
+xc_sched_bcredit_domain_get(
+    int xc_handle,
+    uint32_t domid,
+    struct xen_domctl_sched_bcredit *sdom)
+{
+    DECLARE_DOMCTL;
+    int err;
+
+    domctl.cmd = XEN_DOMCTL_scheduler_op;
+    domctl.domain = (domid_t) domid;
+    domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_BCREDIT;
+    domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo;
+
+    err = do_domctl(xc_handle, &domctl);
+    if ( err == 0 )
+        *sdom = domctl.u.scheduler_op.u.bcredit;
+
+    return err;
+}
index ef68689683b21166f163f7a70fec08bcae36e3f7..f648de82bd40ff2faaa8d5dcb9f363f80ebc6f2a 100644 (file)
@@ -452,6 +452,14 @@ int xc_sched_credit_domain_get(int xc_handle,
                                uint32_t domid,
                                struct xen_domctl_sched_credit *sdom);
 
+int xc_sched_bcredit_domain_set(int xc_handle,
+                                uint32_t domind,
+                                struct xen_domctl_sched_bcredit *sdom);
+
+int xc_sched_bcredit_domain_get(int xc_handle,
+                                uint32_t domid,
+                                struct xen_domctl_sched_bcredit *sdom);
+
 /**
  * This function sends a trigger to a domain.
  *
index ee51a7e3662884218e9cabd6406bd6601b2f5509..77b5f21ffc4d32312d8150660848dd381054ca35 100644 (file)
@@ -1317,6 +1317,59 @@ static PyObject *pyxc_sched_credit_domain_get(XcObject *self, PyObject *args)
                          "cap",     sdom.cap);
 }
 
+static PyObject *pyxc_sched_bcredit_domain_set(XcObject *self,
+                                               PyObject *args,
+                                               PyObject *kwds)
+{
+    uint32_t domid;
+    uint16_t weight;
+    uint16_t cap;
+    uint16_t max_boost_period;
+    uint16_t boost_ratio;
+    static char *kwd_list[] = { "domid", "bc_weight", "bc_cap",
+                                "bc_max_boost_period", "bc_ratio", NULL };
+    static char kwd_type[] = "I|HHhh";
+    struct xen_domctl_sched_bcredit sdom;
+    
+    weight = 0;
+    cap = (uint16_t)~0U;
+    max_boost_period = (uint16_t)~0U;
+    boost_ratio = (uint16_t)~0U;
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
+                                     &domid, &weight, &cap,
+                                     &max_boost_period, &boost_ratio) )
+        return NULL;
+
+    sdom.weight = weight;
+    sdom.cap = cap;
+    sdom.max_boost_period = max_boost_period;
+    sdom.boost_ratio = boost_ratio;
+
+    if ( xc_sched_bcredit_domain_set(self->xc_handle, domid, &sdom) != 0 )
+        return pyxc_error_to_exception();
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_sched_bcredit_domain_get(XcObject *self, PyObject *args)
+{
+    uint32_t domid;
+    struct xen_domctl_sched_bcredit sdom;
+    
+    if( !PyArg_ParseTuple(args, "I", &domid) )
+        return NULL;
+    
+    if ( xc_sched_bcredit_domain_get(self->xc_handle, domid, &sdom) != 0 )
+        return pyxc_error_to_exception();
+
+    return Py_BuildValue("{s:H,s:H,s:i,s:i}",
+                         "bc_weight",           sdom.weight,
+                         "bc_cap",              sdom.cap,
+                         "bc_max_boost_period", sdom.max_boost_period,
+                         "bc_ratio",            sdom.boost_ratio);
+}
+
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
 {
     uint32_t dom;
@@ -1733,6 +1786,30 @@ static PyMethodDef pyxc_methods[] = {
       "Returns:   [dict]\n"
       " weight    [short]: domain's scheduling weight\n"},
 
+    { "sched_bcredit_domain_set",
+      (PyCFunction)pyxc_sched_bcredit_domain_set,
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler for client.\n"
+      " domid               [int]:   domain id to set\n"
+      " bc_weight           [short]: domain's scheduling weight\n"
+      " bc_cap              [short]: cap\n"
+      " bc_max_boost_period [short]: upper limit in BOOST priority\n"
+      " bc_ratio            [short]: domain's boost ratio per a CPU\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "sched_bcredit_domain_get",
+      (PyCFunction)pyxc_sched_bcredit_domain_get,
+      METH_VARARGS, "\n"
+      "Get the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler for client.\n"
+      " domid     [int]:   domain id to get\n"
+      "Returns:   [dict]\n"
+      " bc_weight           [short]: domain's scheduling weight\n"
+      " bc_cap              [short]: cap\n"
+      " bc_max_boost_period [short]: upper limit in BOOST priority\n"
+      " bc_ratio            [short]: domain's boost ratio per a CPU\n"},
+
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
       METH_VARARGS | METH_KEYWORDS, "\n"
@@ -2048,6 +2125,7 @@ PyMODINIT_FUNC initxc(void)
     /* Expose some libxc constants to Python */
     PyModule_AddIntConstant(m, "XEN_SCHEDULER_SEDF", XEN_SCHEDULER_SEDF);
     PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT", XEN_SCHEDULER_CREDIT);
+    PyModule_AddIntConstant(m, "XEN_SCHEDULER_BCREDIT", XEN_SCHEDULER_BCREDIT);
 
 }
 
index 42e131be37cbef16fc276f2b88be71ae9fec0315..9124a1768b11f55f0c7ad2dd93b44312c8afbadd 100644 (file)
@@ -1510,6 +1510,16 @@ class XendAPI(object):
             cap = xeninfo.info['vcpus_params']['cap']
             xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap)
 
+        if 'bc_weight' in xeninfo.info['vcpus_params'] \
+           and 'bc_cap' in xeninfo.info['vcpus_params'] \
+           and 'bc_max_boost_period' in xeninfo.info['vcpus_params'] \
+           and 'bc_ratio' in xeninfo.info['vcpus_params']:
+            bc_weight = xeninfo.info['vcpus_params']['bc_weight']
+            bc_cap = xeninfo.info['vcpus_params']['bc_cap']
+            bc_max_boost_period = xeninfo.info['vcpus_params']['bc_max_boost_period']
+            bc_ratio = xeninfo.info['vcpus_params']['bc_ratio']
+            xendom.domain_sched_bcredit_set(xeninfo.getDomid(), bc_weight, bc_cap, bc_max_boost_period, bc_ratio)
+
     def VM_set_VCPUs_number_live(self, _, vm_ref, num):
         dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
         dom.setVCpuCount(int(num))
index 86b15d5573d230b9003b8706284b79f1fb723af5..cc92b9ac9e80f0388204c9daee9576f7f347a5d4 100644 (file)
@@ -589,6 +589,15 @@ class XendConfig(dict):
             int(sxp.child_value(sxp_cfg, "cpu_weight", 256))
         cfg["vcpus_params"]["cap"] = \
             int(sxp.child_value(sxp_cfg, "cpu_cap", 0))
+        # For boost credit scheduler
+        cfg["vcpus_params"]["bc_weight"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_bc_weight", 256))
+        cfg["vcpus_params"]["bc_cap"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_bc_cap", 0))
+        cfg["vcpus_params"]["bc_max_boost_period"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_bc_max_boost_period", 0))
+        cfg["vcpus_params"]["bc_ratio"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_bc_ratio", 0))
 
         # Only extract options we know about.
         extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \
index 9faebe95aaa0ff0663f481de92d8e3cdb1f9b058..aa6506c1172859bf3f92aabbd821f15b971b46db 100644 (file)
@@ -1591,6 +1591,99 @@ class XendDomain:
             log.exception(ex)
             raise XendError(str(ex))
 
+    def domain_sched_bcredit_get(self, domid):
+        """Get boost credit scheduler parameters for a domain.
+
+        @param domid: Domain ID or Name
+        @type domid: int or string.
+        @rtype: dict with keys 'bc_weight' and 'bc_cap' and 'bc_max_boost_period' and 'bc_ratio'
+        @return: boost credit scheduler parameters
+        """
+        dominfo = self.domain_lookup_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        
+        if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+            try:
+                return xc.sched_bcredit_domain_get(dominfo.getDomid())
+            except Exception, ex:
+                raise XendError(str(ex))
+        else:
+            return {'bc_weight'           : dominfo.getBCWeight(),
+                    'bc_cap'              : dominfo.getBCCap(),
+                    'bc_max_boost_period' : dominfo.getBCMaxBoostPeriod(),
+                    'bc_ratio'            : dominfo.getBCRatio()} 
+    
+    def domain_sched_bcredit_set(self, domid, bc_weight = None, bc_cap = None, bc_max_boost_period = None, bc_ratio = None):
+        """Set boost credit scheduler parameters for a domain.
+
+        @param domid: Domain ID or Name
+        @type domid: int or string.
+        @type bc_weight: int
+        @type bc_cap: int
+        @type bc_max_boost_period: int
+        @type bc_ratio: int
+        @rtype: 0
+        """
+        set_weight = False
+        set_cap = False
+        set_max_boost_period = False
+        set_ratio = False
+        dominfo = self.domain_lookup_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            if bc_weight is None:
+                bc_weight = int(0)
+            elif bc_weight < 1 or bc_weight > 65535:
+                raise XendError("bc_weight is out of range")
+            else:
+                set_weight = True
+
+            if bc_cap is None:
+                bc_cap = int(~0)
+            elif bc_cap < 0 or bc_cap > dominfo.getVCpuCount() * 100:
+                raise XendError("bc_cap is out of range")
+            else:
+                set_cap = True
+
+            if bc_max_boost_period is None:
+                bc_max_boost_period = int(~0)
+            elif bc_max_boost_period < 0:
+                raise XendError("bc_max_boost_period is out of range")
+            else:
+                set_max_boost_period = True
+
+            if bc_ratio is None:
+                bc_ratio = int(~0)
+            elif bc_ratio < 0:
+                raise XendError("bc_ratio is out of range")
+            else:
+                set_ratio = True
+
+            assert type(bc_weight) == int
+            assert type(bc_cap) == int
+            assert type(bc_max_boost_period) == int
+            assert type(bc_ratio) == int
+
+            rc = 0
+            if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+                rc = xc.sched_bcredit_domain_set(dominfo.getDomid(), bc_weight, bc_cap, bc_max_boost_period, bc_ratio)
+            if rc == 0:
+                if set_weight:
+                    dominfo.setBCWeight(bc_weight)
+                if set_cap:
+                    dominfo.setBCCap(bc_cap)
+                if set_max_boost_period:
+                    dominfo.setBCMaxBoostPeriod(bc_max_boost_period)
+                if set_ratio:
+                    dominfo.setBCRatio(bc_ratio)
+                self.managed_config_save(dominfo)
+            return rc
+        except Exception, ex:
+            log.exception(ex)
+            raise XendError(str(ex))
+
     def domain_maxmem_set(self, domid, mem):
         """Set the memory limit for a domain.
 
index 9784f47c9d5f4c971d99c283e727f7258b917d24..a51edd1009215490efbb21a49f5c1c6ae41f5444 100644 (file)
@@ -465,6 +465,14 @@ class XendDomainInfo:
                     xendomains.domain_sched_credit_set(self.getDomid(),
                                                        self.getWeight(),
                                                        self.getCap())
+
+                if xennode.xenschedinfo() == 'bcredit':
+                    xendomains.domain_sched_bcredit_set(self.getDomid(),
+                                                        self.getBCWeight(),
+                                                        self.getBCCap(),
+                                                        self.getBCMaxBoostPeriod(),
+                                                        self.getBCRatio())
+
             except:
                 log.exception('VM start failed')
                 self.destroy()
@@ -1569,6 +1577,30 @@ class XendDomainInfo:
     def setWeight(self, cpu_weight):
         self.info['vcpus_params']['weight'] = cpu_weight
 
+    def getBCCap(self):
+        return self.info['vcpus_params']['bc_cap']
+
+    def setBCCap(self, cpu_bc_cap):
+        self.info['vcpus_params']['bc_cap'] = cpu_bc_cap
+
+    def getBCWeight(self):
+        return self.info['vcpus_params']['bc_weight']
+
+    def setBCWeight(self, cpu_bc_weight):
+        self.info['vcpus_params']['bc_weight'] = cpu_bc_weight
+
+    def getBCMaxBoostPeriod(self):
+        return self.info['vcpus_params']['bc_max_boost_period']
+
+    def setBCMaxBoostPeriod(self, cpu_bc_max_boost_period):
+        self.info['vcpus_params']['bc_max_boost_period'] = cpu_bc_max_boost_period
+
+    def getBCRatio(self):
+        return self.info['vcpus_params']['bc_ratio']
+
+    def setBCRatio(self, cpu_bc_ratio):
+        self.info['vcpus_params']['bc_ratio'] = cpu_bc_ratio
+
     def getRestartCount(self):
         return self._readVm('xend/restart_count')
 
index 72db2974a96c2a81f1778b36dfce13010e8ce67b..3ef422180a108745b5eea49f44a756a8d16a93a1 100644 (file)
@@ -555,6 +555,8 @@ class XendNode:
             return 'sedf'
         elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT:
             return 'credit'
+        elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_BCREDIT:
+            return 'bcredit'
         else:
             return 'unknown'
 
@@ -714,6 +716,8 @@ class XendNode:
             return 'sedf'
         elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT:
             return 'credit'
+        elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_BCREDIT:
+            return 'bcredit'
         else:
             return 'unknown'
 
index 8ed8b1b17d7bd2f0860d0800e795bddf901be17d..90d93e5e2529dc057994487af4e3fc9756d5d8e3 100644 (file)
@@ -151,6 +151,8 @@ SUBCOMMAND_HELP = {
     'sched-sedf'  : ('<Domain> [options]', 'Get/set EDF parameters.'),
     'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]',
                      'Get/set credit scheduler parameters.'),
+    'sched-bcredit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]|-m[=MAXBOOSTPERIOD]|-r[=RATIO]]]',
+                      ''),
     'sysrq'       : ('<Domain> <letter>', 'Send a sysrq to a domain.'),
     'debug-keys'  : ('<Keys>', 'Send debug keys to Xen.'),
     'trigger'     : ('<Domain> <nmi|reset|init|s3resume> [<VCPU>]',
@@ -240,6 +242,13 @@ SUBCOMMAND_OPTIONS = {
        ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'),
        ('-c CAP',    '--cap=CAP',       'Cap (int)'),
     ),
+    'sched-bcredit': (
+       ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'),
+       ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'),
+       ('-c CAP',    '--cap=CAP',       'Cap (int)'),
+       ('-m PERIOD', '--maxboostperiod=PERIOD', 'Upper limit of boost period (ms)'),
+       ('-r RATIO',  '--ratio=RATIO',   'Boost ratio per a CPU (int)'),
+    ),
     'list': (
        ('-l', '--long',         'Output all VM details in SXP'),
        ('', '--label',          'Include security labels'),
@@ -1657,6 +1666,116 @@ def xm_sched_credit(args):
             if result != 0:
                 err(str(result))
 
+def xm_sched_bcredit(args):
+    """Get/Set options for Boost Credit Scheduler."""
+    
+    check_sched_type('bcredit')
+
+    try:
+        opts, params = getopt.getopt(args, "d:w:c:m:r:",
+            ["domain=", "weight=", "cap=", "maxboostperiod=", "ratio="])
+    except getopt.GetoptError, opterr:
+        err(opterr)
+        usage('sched-bcredit')
+
+    domid = None
+    weight = None
+    cap = None
+    max_boost_period = None
+    boost_ratio = None
+
+    for o, a in opts:
+        if o in ["-d", "--domain"]:
+            domid = a
+        elif o in ["-w", "--weight"]:
+            weight = int(a)
+        elif o in ["-c", "--cap"]:
+            cap = int(a)
+        elif o in ["-m", "--maxboostperiod"]:
+            max_boost_period = int(a)
+        elif o in ["-r", "--ratio"]:
+            boost_ratio = int(a)
+
+    doms = filter(lambda x : domid_match(domid, x),
+                  [parse_doms_info(dom)
+                  for dom in getDomains(None, 'all')])
+
+    if weight is None and cap is None and max_boost_period is None and boost_ratio is None:
+        if domid is not None and doms == []: 
+            err("Domain '%s' does not exist." % domid)
+            usage('sched-bcredit')
+        # print header if we aren't setting any parameters
+        print '%-33s %4s %6s %4s %8s %5s' % ('Name','ID','Weight','Cap','Max(ms)','Ratio')
+        
+        for d in doms:
+            try:
+                if serverType == SERVER_XEN_API:
+                    info = server.xenapi.VM_metrics.get_VCPUs_params(
+                        server.xenapi.VM.get_metrics(
+                            get_single_vm(d['name'])))
+                else:
+                    info = server.xend.domain.sched_bcredit_get(d['name'])
+            except xmlrpclib.Fault:
+                pass
+
+            if 'bc_weight' not in info or 'bc_cap' not in info or 'bc_max_boost_period' not in info or 'bc_ratio' not in info:
+                # domain does not support sched-bcredit?
+                info = {'bc_weight': -1, 'bc_cap': -1, 'bc_max_boost_period': -1, 'bc_ratio': -1}
+
+            info['bc_weight'] = int(info['bc_weight'])
+            info['bc_cap']    = int(info['bc_cap'])
+            info['bc_max_boost_period'] = int(info['bc_max_boost_period'])
+            info['bc_ratio']  = int(info['bc_ratio'])
+            
+            info['name']  = d['name']
+            info['domid'] = str(d['domid'])
+            print( ("%(name)-32s %(domid)5s %(bc_weight)6d %(bc_cap)4d %(bc_max_boost_period)8d %(bc_ratio)5d") % info)
+    else:
+        if domid is None:
+            # place holder for system-wide scheduler parameters
+            err("No domain given.")
+            usage('sched-bcredit')
+
+        if serverType == SERVER_XEN_API:
+            if doms[0]['domid']:
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "bc_weight",
+                    weight)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "bc_cap",
+                    cap)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "bc_max_boost_period",
+                     max_boost_period)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "bc_ratio",
+                    boost_ratio)
+            else:
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "bc_weight",
+                    weight)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "bc_cap",
+                    cap)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "bc_max_boost_period",
+                    max_boost_period)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "bc_ratio",
+                    boost_ratio)
+        else:
+            result = server.xend.domain.sched_bcredit_set(domid, weight, cap, max_boost_period, boost_ratio)
+            if result != 0:
+                err(str(result))
+
 def xm_info(args):
     arg_check(args, "info", 0, 1)
     
@@ -2824,6 +2943,7 @@ commands = {
     # scheduler
     "sched-sedf": xm_sched_sedf,
     "sched-credit": xm_sched_credit,
+    "sched-bcredit": xm_sched_bcredit,
     # block
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
index 3ba7d3e8cd0cdc8b01288d69bd2dbc23b187a66a..a4530a8da62b9d010b60f8c86779aef734473f6d 100644 (file)
@@ -1401,3 +1401,1003 @@ struct scheduler sched_credit_def = {
     .dump_settings  = csched_dump,
     .init           = csched_init,
 };
+
+
+/*
+ * Boost Credit Schdeuler(bcredit)
+ *   Alternative Credit Scheduler optimized for client hypervisor
+ */
+
+/*
+ * Basic constants
+ */
+#define BCSCHED_DEFAULT_WEIGHT     CSCHED_DEFAULT_WEIGHT
+#define BCSCHED_TICKS_PER_TSLICE   CSCHED_TICKS_PER_TSLICE
+#define BCSCHED_TICKS_PER_ACCT     CSCHED_TICKS_PER_ACCT
+#define BCSCHED_MSECS_PER_TICK     CSCHED_MSECS_PER_TICK
+#define BCSCHED_MSECS_PER_TSLICE   \
+    (BCSCHED_MSECS_PER_TICK * BCSCHED_TICKS_PER_TSLICE)
+#define BCSCHED_CREDITS_PER_TICK   10000
+#define BCSCHED_CREDITS_PER_TSLICE \
+    (BCSCHED_CREDITS_PER_TICK * BCSCHED_TICKS_PER_TSLICE)
+#define BCSCHED_CREDITS_PER_ACCT   \
+    (BCSCHED_CREDITS_PER_TICK * BCSCHED_TICKS_PER_ACCT)
+#define BCSCHED_MSECS_BOOSTTSLICE_PER_CPU 2
+#define BCSCHED_NSECS_MIN_BOOST_TSLICE 500000
+
+/*
+ * Macros
+ */
+#define svc_sbvc(_v) (container_of((_v), struct bcsched_vcpu, svc))
+#define sdom_sbdom(_d) (container_of((_d), struct bcsched_dom, sdom))
+
+/*
+ * Virtual CPU
+ */
+struct bcsched_vcpu {
+    struct csched_vcpu svc;
+    struct list_head inactive_vcpu_elem;
+    s_time_t start_time;
+    atomic_t boost_credit;
+};
+
+/*
+ * Domain
+ */
+struct bcsched_dom {
+    struct csched_dom sdom;
+    uint16_t boost_ratio;
+    uint16_t max_boost_period;
+};
+
+/*
+ * System-wide private data
+ */
+struct bcsched_private {
+    struct list_head inactive_vcpu;
+    uint32_t nvcpus;
+    s_time_t boost_tslice;
+    uint32_t boost_credit;
+    uint16_t total_boost_ratio;
+};
+
+/*
+ * Global variables
+ */
+static struct bcsched_private bcsched_priv;
+
+/* opt_bcsched_tslice: time slice for BOOST priority */
+static unsigned int opt_bcsched_tslice = BCSCHED_MSECS_BOOSTTSLICE_PER_CPU;
+integer_param("bcsched_tslice", opt_bcsched_tslice);
+
+static void bcsched_tick(void *_cpu);
+
+static int
+bcsched_pcpu_init(int cpu)
+{
+    struct csched_pcpu *spc;
+    unsigned long flags;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct csched_pcpu);
+    if ( spc == NULL )
+        return -1;
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    /* Initialize/update system-wide config */
+    csched_priv.credit += BCSCHED_CREDITS_PER_ACCT;
+    if ( csched_priv.ncpus <= cpu )
+        csched_priv.ncpus = cpu + 1;
+    if ( csched_priv.master >= csched_priv.ncpus )
+        csched_priv.master = cpu;
+
+    init_timer(&spc->ticker, bcsched_tick, (void *)(unsigned long)cpu, cpu);
+    INIT_LIST_HEAD(&spc->runq);
+    spc->runq_sort_last = csched_priv.runq_sort;
+    per_cpu(schedule_data, cpu).sched_priv = spc;
+
+    /* Start off idling... */
+    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
+    cpu_set(cpu, csched_priv.idlers);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    return 0;
+}
+
+static inline void
+__bcsched_vcpu_acct_start_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+    struct bcsched_vcpu * const sbvc = svc_sbvc(svc);
+    struct bcsched_dom * const sbdom = sdom_sbdom(sdom);
+
+    CSCHED_VCPU_STAT_CRANK(svc, state_active);
+    CSCHED_STAT_CRANK(acct_vcpu_active);
+
+    sdom->active_vcpu_count++;
+    list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+    list_del_init(&sbvc->inactive_vcpu_elem);
+    if ( list_empty(&sdom->active_sdom_elem) )
+    {
+        list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+        csched_priv.weight += sdom->weight;
+        bcsched_priv.boost_credit += (sbdom->boost_ratio *
+                                      BCSCHED_CREDITS_PER_TSLICE) / 100;
+    }
+}
+
+static inline void
+__bcsched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+    struct bcsched_vcpu * const sbvc = svc_sbvc(svc);
+    struct bcsched_dom * const sbdom = sdom_sbdom(sdom);
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_VCPU_STAT_CRANK(svc, state_idle);
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    list_add(&sbvc->inactive_vcpu_elem, &bcsched_priv.inactive_vcpu);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+        bcsched_priv.boost_credit -= (sbdom->boost_ratio *
+                                      BCSCHED_CREDITS_PER_TSLICE) / 100;
+    }
+}
+
+static void
+bcsched_vcpu_acct(unsigned int cpu)
+{
+    ASSERT( current->processor == cpu );
+    ASSERT( CSCHED_VCPU(current)->sdom != NULL );
+
+    /*
+     * If it's been active a while, check if we'd be better off
+     * migrating it to run elsewhere (see multi-core and multi-thread
+     * support in csched_cpu_pick()).
+     */
+    if ( csched_cpu_pick(current) != cpu )
+    {
+        CSCHED_VCPU_STAT_CRANK(CSCHED_VCPU(current), migrate_r);
+        CSCHED_STAT_CRANK(migrate_running);
+        set_bit(_VPF_migrating, &current->pause_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+}
+
+static int
+bcsched_vcpu_init(struct vcpu *vc)
+{
+    struct domain * const dom = vc->domain;
+    struct csched_dom *sdom = CSCHED_DOM(dom);
+    struct bcsched_vcpu *sbvc;
+    struct csched_vcpu *svc;
+    unsigned long flags;
+
+    CSCHED_STAT_CRANK(vcpu_init);
+
+    /* Allocate per-VCPU info */
+    sbvc = xmalloc(struct bcsched_vcpu);
+    if ( sbvc == NULL )
+        return -1;
+    svc = &(sbvc->svc);
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    INIT_LIST_HEAD(&svc->active_vcpu_elem);
+    INIT_LIST_HEAD(&sbvc->inactive_vcpu_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    atomic_set(&svc->credit, 0);
+    svc->flags = 0U;
+    svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
+    CSCHED_VCPU_STATS_RESET(svc);
+    vc->sched_priv = svc;
+    atomic_set(&sbvc->boost_credit, 0);
+
+    /* Allocate per-PCPU info */
+    if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+    {
+        if ( bcsched_pcpu_init(vc->processor) != 0 )
+            return -1;
+    }
+
+    /* Add inactive queue in order to start acct */
+    if ( !is_idle_vcpu(vc) )
+    {
+        uint32_t vcpus_per_cpu;
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        list_add(&sbvc->inactive_vcpu_elem, &bcsched_priv.inactive_vcpu);
+
+        bcsched_priv.nvcpus++;
+        vcpus_per_cpu = ( (bcsched_priv.nvcpus + (csched_priv.ncpus-1)) /
+                          csched_priv.ncpus
+                        ) - 1;
+        if ( vcpus_per_cpu == 0 )
+            bcsched_priv.boost_tslice = MILLISECS(BCSCHED_MSECS_PER_TSLICE);
+        else
+        {
+            bcsched_priv.boost_tslice =  MILLISECS(opt_bcsched_tslice) /
+                                         vcpus_per_cpu;
+            if ( bcsched_priv.boost_tslice < BCSCHED_NSECS_MIN_BOOST_TSLICE )
+                bcsched_priv.boost_tslice = BCSCHED_NSECS_MIN_BOOST_TSLICE; 
+        }
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
+    CSCHED_VCPU_CHECK(vc);
+    return 0;
+}
+
+static void
+bcsched_vcpu_destroy(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct bcsched_vcpu * const sbvc = svc_sbvc(svc);
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    CSCHED_STAT_CRANK(vcpu_destroy);
+
+    BUG_ON( sdom == NULL );
+    BUG_ON( !list_empty(&svc->runq_elem) );
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    if ( !list_empty(&svc->active_vcpu_elem) )
+        __bcsched_vcpu_acct_stop_locked(svc);
+
+    if ( !list_empty(&sbvc->inactive_vcpu_elem) )
+        list_del_init(&sbvc->inactive_vcpu_elem);
+
+    if ( !is_idle_vcpu(vc) )
+    {
+        uint32_t vcpus_per_cpu;
+
+        bcsched_priv.nvcpus--;
+        vcpus_per_cpu = ( (bcsched_priv.nvcpus + (csched_priv.ncpus-1)) /
+                          csched_priv.ncpus
+                        ) - 1;
+        if ( vcpus_per_cpu == 0 )
+            bcsched_priv.boost_tslice = MILLISECS(BCSCHED_MSECS_PER_TSLICE);
+        else
+        {
+            bcsched_priv.boost_tslice =  MILLISECS(opt_bcsched_tslice) /
+                                         vcpus_per_cpu;
+            if ( bcsched_priv.boost_tslice < BCSCHED_NSECS_MIN_BOOST_TSLICE )
+                bcsched_priv.boost_tslice = BCSCHED_NSECS_MIN_BOOST_TSLICE; 
+        }
+    }
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    xfree(sbvc);
+}
+
+static int
+bcsched_dom_cntl(
+    struct domain *d,
+    struct xen_domctl_scheduler_op *op)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(d);
+    struct bcsched_dom * const sbdom = sdom_sbdom(sdom);
+    unsigned long flags;
+
+    if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
+    {
+        op->u.bcredit.weight = sdom->weight;
+        op->u.bcredit.cap = sdom->cap;
+        op->u.bcredit.max_boost_period = sbdom->max_boost_period;
+        op->u.bcredit.boost_ratio = sbdom->boost_ratio;
+    }
+    else
+    {
+        uint16_t weight = (uint16_t)~0U;
+
+        ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( (op->u.bcredit.weight != 0) &&
+             (sbdom->boost_ratio == 0 || op->u.bcredit.boost_ratio == 0) )
+        {
+            weight = op->u.bcredit.weight;
+        }
+
+        if ( op->u.bcredit.cap != (uint16_t)~0U )
+            sdom->cap = op->u.bcredit.cap;
+
+        if ( (op->u.bcredit.max_boost_period != (uint16_t)~0U) &&
+             (op->u.bcredit.max_boost_period >= BCSCHED_MSECS_PER_TSLICE ||
+              op->u.bcredit.max_boost_period == 0) )
+        {
+                sbdom->max_boost_period = op->u.bcredit.max_boost_period;
+        }
+
+        if ( (op->u.bcredit.boost_ratio != (uint16_t)~0U) &&
+             ((bcsched_priv.total_boost_ratio - sbdom->boost_ratio +
+               op->u.bcredit.boost_ratio) <= 100 * csched_priv.ncpus) &&
+             (sbdom->max_boost_period || op->u.bcredit.boost_ratio == 0) )
+        {
+            uint16_t new_bc, old_bc;
+
+            new_bc = ( op->u.bcredit.boost_ratio *
+                       BCSCHED_CREDITS_PER_TSLICE ) / 100;
+            old_bc = ( sbdom->boost_ratio *
+                       BCSCHED_CREDITS_PER_TSLICE ) / 100;
+
+            bcsched_priv.total_boost_ratio -= sbdom->boost_ratio;
+            bcsched_priv.total_boost_ratio += op->u.bcredit.boost_ratio;
+
+            sbdom->boost_ratio = op->u.bcredit.boost_ratio;
+
+            if ( !list_empty(&sdom->active_sdom_elem) )
+            {
+                bcsched_priv.boost_credit -= old_bc;
+                bcsched_priv.boost_credit += new_bc;
+            }
+            if ( new_bc == 0 )
+            {
+                if ( sdom->weight == 0 )
+                    weight = BCSCHED_DEFAULT_WEIGHT;
+            }
+            else
+                weight = 0;
+        }
+
+        if ( weight != (uint16_t)~0U )
+        {
+            if ( !list_empty(&sdom->active_sdom_elem) )
+            {
+                csched_priv.weight -= sdom->weight;
+                csched_priv.weight += weight;
+            }
+            sdom->weight = weight;
+        }
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
+    return 0;
+}
+
+static int
+bcsched_dom_init(struct domain *dom)
+{
+    struct csched_dom *sdom;
+    struct bcsched_dom *sbdom;
+
+    CSCHED_STAT_CRANK(dom_init);
+
+    if ( is_idle_domain(dom) )
+        return 0;
+
+    sbdom = xmalloc(struct bcsched_dom);
+    if ( sbdom == NULL )
+        return -ENOMEM;
+    sdom = &(sbdom->sdom);
+
+    /* Initalize credit and weight */
+    INIT_LIST_HEAD(&sdom->active_vcpu);
+    sdom->active_vcpu_count = 0;
+    INIT_LIST_HEAD(&sdom->active_sdom_elem);
+    sdom->dom = dom;
+    sdom->weight = BCSCHED_DEFAULT_WEIGHT;
+    sdom->cap = 0U;
+    sbdom->boost_ratio = 0U;
+    sbdom->max_boost_period = 0;
+    dom->sched_priv = sdom;
+
+    return 0;
+}
+
+static void
+bcsched_dom_destroy(struct domain *dom)
+{
+    CSCHED_STAT_CRANK(dom_destroy);
+    xfree(sdom_sbdom(CSCHED_DOM(dom)));
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of three priorities, BOOST, UNDER or OVER.
+ * We walk through the runq and move up any BOOSTs that are preceded by UNDERs
+ * or OVERs, and any UNDERs that are preceded by OVERs. We remember the last
+ * BOOST and UNDER to make the move up operation O(1).
+ */
+static void
+bcsched_runq_sort(unsigned int cpu)
+{
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+    struct list_head *runq, *elem, *next, *last_boost, *last_under;
+    struct csched_vcpu *svc_elem;
+    unsigned long flags;
+    int sort_epoch;
+
+    sort_epoch = csched_priv.runq_sort;
+    if ( sort_epoch == spc->runq_sort_last )
+        return;
+
+    spc->runq_sort_last = sort_epoch;
+
+    spin_lock_irqsave(&per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+    runq = &spc->runq;
+    elem = runq->next;
+    last_boost = last_under = runq;
+    while ( elem != runq )
+    {
+        next = elem->next;
+        svc_elem = __runq_elem(elem);
+
+        if ( svc_elem->pri == CSCHED_PRI_TS_BOOST )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_boost )
+            {
+                list_del(elem);
+                list_add(elem, last_boost);
+            }
+            if ( last_boost == last_under )
+                last_under = elem;
+            last_boost = elem;
+        }
+        else if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_under )
+            {
+                list_del(elem);
+                list_add(elem, last_under);
+            }
+            last_under = elem;
+        }
+
+        elem = next;
+    }
+
+    spin_unlock_irqrestore(&per_cpu(schedule_data, cpu).schedule_lock, flags);
+}
+
+static void
+bcsched_acct(void)
+{
+    unsigned long flags;
+    struct list_head *iter_vcpu, *next_vcpu;
+    struct list_head *iter_sdom, *next_sdom;
+    struct bcsched_vcpu *sbvc;
+    struct bcsched_dom *sbdom;
+    struct csched_vcpu *svc;
+    struct csched_dom *sdom;
+    uint32_t credit_total;
+    uint32_t weight_total;
+    uint32_t bc_total;
+    uint32_t weight_left;
+    uint32_t credit_fair;
+    uint32_t credit_peak;
+    uint32_t credit_cap;
+    uint32_t bc_fair;
+    int credit_balance;
+    int credit_xtra;
+    int credit;
+    int boost_credit;
+    int max_boost_credit;
+    int64_t c_sum, bc_sum;
+    int c_average, bc_average;
+
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    /* Add vcpu to active list when its credits were consumued by one tick */
+    list_for_each_safe( iter_vcpu, next_vcpu, &bcsched_priv.inactive_vcpu )
+    {
+        sbvc = list_entry(iter_vcpu, struct bcsched_vcpu, inactive_vcpu_elem);
+        svc = &(sbvc->svc);
+        sbdom = sdom_sbdom(svc->sdom);
+
+        max_boost_credit = sbdom->max_boost_period *
+                           (BCSCHED_CREDITS_PER_TSLICE/BCSCHED_MSECS_PER_TSLICE);
+        if ( (atomic_read(&sbvc->boost_credit)
+              <= (max_boost_credit-BCSCHED_CREDITS_PER_TICK)) ||
+             (atomic_read(&svc->credit)
+              <= BCSCHED_CREDITS_PER_TICK*(BCSCHED_TICKS_PER_ACCT-1)) )
+        {
+            __bcsched_vcpu_acct_start_locked(svc);
+        }
+    }
+
+    weight_total = csched_priv.weight;
+    credit_total = csched_priv.credit;
+    bc_total = bcsched_priv.boost_credit;
+
+    /* Converge balance towards 0 when it drops negative */
+    if ( csched_priv.credit_balance < 0 )
+    {
+        credit_total -= csched_priv.credit_balance;
+        CSCHED_STAT_CRANK(acct_balance);
+    }
+
+    if ( unlikely(weight_total == 0 && bc_total == 0) )
+    {
+        csched_priv.credit_balance = 0;
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+        CSCHED_STAT_CRANK(acct_no_work);
+        return;
+    }
+
+    CSCHED_STAT_CRANK(acct_run);
+
+    weight_left = weight_total;
+    credit_balance = 0;
+    credit_xtra = 0;
+    credit_cap = 0U;
+
+    /* Firstly, subtract boost credits from credit_total. */
+    if ( bc_total != 0 )
+    {
+        credit_total -= bc_total;
+        credit_balance += bc_total;
+    }
+
+    /* Avoid 0 divide error */
+    if ( weight_total == 0 )
+        weight_total = 1;
+
+    list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+    {
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+        sbdom = sdom_sbdom(sdom);
+
+        BUG_ON( is_idle_domain(sdom->dom) );
+        BUG_ON( sdom->active_vcpu_count == 0 );
+        BUG_ON( sdom->weight > weight_left );
+
+        max_boost_credit = sbdom->max_boost_period *
+                           (BCSCHED_CREDITS_PER_TSLICE/BCSCHED_MSECS_PER_TSLICE);
+        c_sum = bc_sum = 0;
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            sbvc = svc_sbvc(svc);
+
+            BUG_ON( sdom != svc->sdom );
+
+            c_sum += atomic_read(&svc->credit);
+            bc_sum += atomic_read(&sbvc->boost_credit);
+        }
+        c_average = ( c_sum + ( sdom->active_vcpu_count - 1 )
+                    ) / sdom->active_vcpu_count;
+        bc_average = ( bc_sum + ( sdom->active_vcpu_count - 1 )
+                     ) / sdom->active_vcpu_count;
+
+        weight_left -= sdom->weight;
+
+        /*
+         * A domain's fair share is computed using its weight in competition
+         * with that of all other active domains.
+         *
+         * At most, a domain can use credits to run all its active VCPUs
+         * for one full accounting period. We allow a domain to earn more
+         * only when the system-wide credit balance is negative.
+         */
+        credit_peak = sdom->active_vcpu_count * BCSCHED_CREDITS_PER_ACCT;
+        if ( csched_priv.credit_balance < 0 )
+        {
+            credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+                             (weight_total - 1)
+                           ) / weight_total;
+        }
+
+        if ( sdom->cap != 0U )
+        {
+            credit_cap = ((sdom->cap * BCSCHED_CREDITS_PER_ACCT) + 99) / 100;
+            if ( credit_cap < credit_peak )
+                credit_peak = credit_cap;
+
+            credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 )
+                         ) / sdom->active_vcpu_count;
+        }
+
+        credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+                      ) / weight_total;
+
+        if ( credit_fair < credit_peak )
+        {
+            /* credit_fair is 0 if weight is 0. */
+            if ( sdom->weight != 0 )
+                credit_xtra = 1;
+        }
+        else
+        {
+            if ( weight_left != 0U )
+            {
+                /* Give other domains a chance at unused credits */
+                credit_total += ( ( ( credit_fair - credit_peak
+                                    ) * weight_total
+                                  ) + ( weight_left - 1 )
+                                ) / weight_left;
+            }
+
+            if ( credit_xtra )
+            {
+                /*
+                 * Lazily keep domains with extra credits at the head of
+                 * the queue to give others a chance at them in future
+                 * accounting periods.
+                 */
+                CSCHED_STAT_CRANK(acct_reorder);
+                list_del(&sdom->active_sdom_elem);
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            }
+
+            credit_fair = credit_peak;
+        }
+
+        /* Compute fair share per VCPU */
+        credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+                      ) / sdom->active_vcpu_count;
+
+        /* Compute fair share of boost_credit per VCPU */
+        bc_fair = ( ((sbdom->boost_ratio * BCSCHED_CREDITS_PER_ACCT)/100) +
+                    (sdom->active_vcpu_count - 1)
+                  ) / sdom->active_vcpu_count;
+
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            sbvc = svc_sbvc(svc);
+
+            BUG_ON( sdom != svc->sdom );
+
+            /* Balance two credits */
+            credit = atomic_read(&svc->credit);
+            atomic_add(c_average - credit, &svc->credit);
+            boost_credit = atomic_read(&sbvc->boost_credit);
+            atomic_add(bc_average - boost_credit, &sbvc->boost_credit);
+            boost_credit = atomic_read(&sbvc->boost_credit);
+            if ( sbdom->boost_ratio != 0 )
+            {
+                /* Increment boost credit */
+                atomic_add(bc_fair, &sbvc->boost_credit);
+                boost_credit = atomic_read(&sbvc->boost_credit);
+
+                /*
+                 * Upper bound on boost credits.
+                 * Add excess to credit.
+                 */
+                if ( boost_credit > max_boost_credit )
+                {
+                    atomic_add(boost_credit - max_boost_credit, &svc->credit);
+                    atomic_set(&sbvc->boost_credit, max_boost_credit);
+                    boost_credit = atomic_read(&sbvc->boost_credit);
+                }
+                /*
+                 * If credit is negative,
+                 * boost credits compensate credit.
+                 */
+                credit = atomic_read(&svc->credit);
+                if ( credit < 0 && boost_credit > 0 )
+                {
+                    if ( boost_credit > -credit )
+                    {
+                        atomic_sub(-credit, &sbvc->boost_credit);
+                        atomic_add(-credit, &svc->credit);
+                    }
+                    else
+                    {
+                        atomic_sub(boost_credit, &sbvc->boost_credit);
+                        atomic_add(boost_credit, &svc->credit);
+                    }
+                    boost_credit = atomic_read(&sbvc->boost_credit);
+                }
+            }
+
+            /* Increment credit */
+            atomic_add(credit_fair, &svc->credit);
+            credit = atomic_read(&svc->credit);
+
+            /*
+             * Recompute priority or, if VCPU is idling, remove it from
+             * the active list.
+             */
+            if ( credit < 0 )
+            {
+                svc->pri = CSCHED_PRI_TS_OVER;
+
+                /* Park running VCPUs of capped-out domains */
+                if ( sdom->cap != 0U &&
+                     credit < -credit_cap &&
+                     !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+                {
+                    CSCHED_STAT_CRANK(vcpu_park);
+                    vcpu_pause_nosync(svc->vcpu);
+                    svc->flags |= CSCHED_FLAG_VCPU_PARKED;
+                }
+
+                /* Lower bound on credits */
+                if ( credit < -BCSCHED_CREDITS_PER_TSLICE )
+                {
+                    CSCHED_STAT_CRANK(acct_min_credit);
+                    credit = -BCSCHED_CREDITS_PER_TSLICE;
+                    atomic_set(&svc->credit, credit);
+                }
+            }
+            else
+            {
+                if ( boost_credit <= 0 )
+                    svc->pri = CSCHED_PRI_TS_UNDER;
+                else
+                    svc->pri = CSCHED_PRI_TS_BOOST;
+
+                /* Unpark any capped domains whose credits go positive */
+                if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
+                {
+                    /*
+                     * It's important to unset the flag AFTER the unpause()
+                     * call to make sure the VCPU's priority is not boosted
+                     * if it is woken up here.
+                     */
+                    CSCHED_STAT_CRANK(vcpu_unpark);
+                    vcpu_unpause(svc->vcpu);
+                    svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
+                }
+
+                if ( credit > BCSCHED_CREDITS_PER_TSLICE )
+                {
+                    atomic_add(credit - BCSCHED_CREDITS_PER_TSLICE,
+                               &sbvc->boost_credit);
+                    boost_credit = atomic_read(&sbvc->boost_credit);
+                    credit = BCSCHED_CREDITS_PER_TSLICE;
+                    atomic_set(&svc->credit, credit);
+
+                    if ( boost_credit > max_boost_credit )
+                    {
+                        atomic_set(&sbvc->boost_credit, max_boost_credit);
+                        __bcsched_vcpu_acct_stop_locked(svc);
+                    }
+                }
+            }
+
+            if ( sbdom->boost_ratio == 0 )
+            {
+                CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
+                CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
+                credit_balance += credit;
+            }
+            else
+            {
+                CSCHED_VCPU_STAT_SET(svc, credit_last, boost_credit);
+                CSCHED_VCPU_STAT_SET(svc, credit_incr, bc_fair);
+            }
+        }
+    }
+
+    csched_priv.credit_balance = credit_balance;
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    /* Inform each CPU that its runq needs to be sorted */
+    csched_priv.runq_sort++;
+}
+
+static void
+bcsched_tick(void *_cpu)
+{
+    unsigned int cpu = (unsigned long)_cpu;
+    struct csched_pcpu *spc = CSCHED_PCPU(cpu);
+
+    spc->tick++;
+
+    /*
+     * Accounting for running VCPU
+     */
+    if ( !is_idle_vcpu(current) )
+        bcsched_vcpu_acct(cpu);
+
+    /*
+     * Host-wide accounting duty
+     *
+     * Note: Currently, this is always done by the master boot CPU. Eventually,
+     * we could distribute or at the very least cycle the duty.
+     */
+    if ( (csched_priv.master == cpu) &&
+         (spc->tick % BCSCHED_TICKS_PER_ACCT) == 0 )
+    {
+        bcsched_acct();
+    }
+
+    /*
+     * Check if runq needs to be sorted
+     *
+     * Every physical CPU resorts the runq after the accounting master has
+     * modified priorities. This is a special O(n) sort and runs at most
+     * once per accounting period (currently 30 milliseconds).
+     */
+    bcsched_runq_sort(cpu);
+
+    set_timer(&spc->ticker, NOW() + MILLISECS(BCSCHED_MSECS_PER_TICK));
+}
+
+static struct task_slice
+bcsched_schedule(s_time_t now)
+{
+    struct csched_vcpu *svc = CSCHED_VCPU(current);
+    struct bcsched_vcpu *sbvc = svc_sbvc(svc);
+    s_time_t passed = now - sbvc->start_time;
+    int consumed;
+    int boost_credit;
+    struct task_slice ret;
+
+    /*
+     * Update credit
+     */
+    consumed = ( passed +
+                 (MILLISECS(BCSCHED_MSECS_PER_TSLICE) /
+                  BCSCHED_CREDITS_PER_TSLICE - 1)
+               ) / (MILLISECS(BCSCHED_MSECS_PER_TSLICE) /
+                    BCSCHED_CREDITS_PER_TSLICE);
+    if ( svc->pri == CSCHED_PRI_TS_BOOST )
+    {
+        boost_credit = atomic_read(&sbvc->boost_credit);
+        if ( boost_credit > consumed )
+        {
+            atomic_sub(consumed, &sbvc->boost_credit);
+            consumed = 0;
+        }
+        else
+        {
+            atomic_sub(boost_credit, &sbvc->boost_credit);
+            consumed -= boost_credit;
+            svc->pri = CSCHED_PRI_TS_UNDER;
+        }
+    }
+    if ( consumed > 0 && !is_idle_vcpu(current) )
+        atomic_sub(consumed, &svc->credit);
+
+    ret = csched_schedule(now);
+
+    svc = CSCHED_VCPU(ret.task);
+    if ( svc->pri == CSCHED_PRI_TS_BOOST )
+        ret.time = bcsched_priv.boost_tslice;
+
+    sbvc = svc_sbvc(svc);
+    sbvc->start_time = now;
+
+    return ret;
+}
+
+static void
+bcsched_dump_vcpu(struct csched_vcpu *svc)
+{
+    struct bcsched_vcpu * const sbvc = svc_sbvc(svc);
+
+    csched_dump_vcpu(svc);
+
+    if ( svc->sdom )
+    {
+        struct bcsched_dom * const sbdom = sdom_sbdom(svc->sdom);
+
+        printk("\t     bc=%i [bc=%i]\n",
+               atomic_read(&sbvc->boost_credit),
+               sbdom->boost_ratio * BCSCHED_CREDITS_PER_TSLICE / 100);
+    }
+}
+
+static void
+bcsched_dump(void)
+{
+    struct list_head *iter_sdom, *iter_svc;
+    int loop;
+    char idlers_buf[100];
+
+    printk("info:\n"
+           "\tncpus              = %u\n"
+           "\tmaster             = %u\n"
+           "\tcredit             = %u\n"
+           "\tcredit balance     = %d\n"
+           "\tweight             = %u\n"
+           "\trunq_sort          = %u\n"
+           "\tboost_tslice       = %"PRId64"\n"
+           "\tboost_credit       = %u\n"
+           "\ttotal_boost_ratio  = %u\n"
+           "\tdefault-weight     = %d\n"
+           "\tmsecs per tick     = %dms\n"
+           "\tcredits per tick   = %d\n"
+           "\tticks per tslice   = %d\n"
+           "\tticks per acct     = %d\n",
+           csched_priv.ncpus,
+           csched_priv.master,
+           csched_priv.credit,
+           csched_priv.credit_balance,
+           csched_priv.weight,
+           csched_priv.runq_sort,
+           bcsched_priv.boost_tslice,
+           bcsched_priv.boost_credit,
+           bcsched_priv.total_boost_ratio,
+           CSCHED_DEFAULT_WEIGHT,
+           BCSCHED_MSECS_PER_TICK,
+           BCSCHED_CREDITS_PER_TICK,
+           BCSCHED_TICKS_PER_TSLICE,
+           BCSCHED_TICKS_PER_ACCT);
+
+    cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
+    printk("idlers: %s\n", idlers_buf);
+
+    CSCHED_STATS_PRINTK();
+
+    printk("active vcpus:\n");
+    loop = 0;
+    list_for_each( iter_sdom, &csched_priv.active_sdom )
+    {
+        struct csched_dom *sdom;
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        list_for_each( iter_svc, &sdom->active_vcpu )
+        {
+            struct csched_vcpu *svc;
+            svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+            printk("\t%3d: ", ++loop);
+            bcsched_dump_vcpu(svc);
+        }
+    }
+
+    printk("inactive vcpus:\n");
+    loop = 0;
+    list_for_each( iter_svc, &bcsched_priv.inactive_vcpu )
+    {
+        struct bcsched_vcpu *sbvc;
+        sbvc = list_entry(iter_svc, struct bcsched_vcpu, inactive_vcpu_elem);
+
+        printk("\t%3d: ", ++loop);
+        bcsched_dump_vcpu(&sbvc->svc);
+    }
+}
+
+static void
+bcsched_init(void)
+{
+    csched_init();
+
+    INIT_LIST_HEAD(&bcsched_priv.inactive_vcpu);
+    bcsched_priv.boost_tslice = MILLISECS(BCSCHED_MSECS_PER_TSLICE);
+    bcsched_priv.boost_credit = 0;
+    bcsched_priv.total_boost_ratio = 0;
+}
+
+
+struct scheduler sched_bcredit_def = {
+    .name           = "SMP Credit Scheduler for client side",
+    .opt_name       = "bcredit",
+    .sched_id       = XEN_SCHEDULER_BCREDIT,
+
+    .init_domain    = bcsched_dom_init,
+    .destroy_domain = bcsched_dom_destroy,
+
+    .init_vcpu      = bcsched_vcpu_init,
+    .destroy_vcpu   = bcsched_vcpu_destroy,
+
+    .sleep          = csched_vcpu_sleep,
+    .wake           = csched_vcpu_wake,
+
+    .adjust         = bcsched_dom_cntl,
+
+    .pick_cpu       = csched_cpu_pick,
+    .do_schedule    = bcsched_schedule,
+
+    .dump_cpu_state = csched_dump_pcpu,
+    .dump_settings  = bcsched_dump,
+    .init           = bcsched_init,
+};
+
index 04b09e2168841150f2072543aa06dc37ff6ed792..ffcb432b39d9e88719c80290f6b14b5acf60ba79 100644 (file)
@@ -51,9 +51,11 @@ DEFINE_PER_CPU(struct schedule_data, schedule_data);
 
 extern struct scheduler sched_sedf_def;
 extern struct scheduler sched_credit_def;
+extern struct scheduler sched_bcredit_def;
 static struct scheduler *schedulers[] = { 
     &sched_sedf_def,
     &sched_credit_def,
+    &sched_bcredit_def,
     NULL
 };
 
index cc0ec145927f0fcb8de1f1a881d122641d96fffb..084e86b6d9da00f1d6cbd8dfa548678a2f009183 100644 (file)
@@ -243,6 +243,7 @@ static int iommu_setup(void)
     if ( iommu_enabled )
         printk("I/O virtualisation for PV guests %sabled\n",
                iommu_pv_enabled ? "en" : "dis");
+
     return rc;
 }
 __initcall(iommu_setup);
index b7075ac447e54372b745729aaf344b7a388b4048..84651984cf984a1d911cd34d9f77c1c77fd0dbed 100644 (file)
@@ -294,6 +294,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
 /* Scheduler types. */
 #define XEN_SCHEDULER_SEDF     4
 #define XEN_SCHEDULER_CREDIT   5
+#define XEN_SCHEDULER_BCREDIT  6
 /* Set or get info? */
 #define XEN_DOMCTL_SCHEDOP_putinfo 0
 #define XEN_DOMCTL_SCHEDOP_getinfo 1
@@ -312,6 +313,12 @@ struct xen_domctl_scheduler_op {
             uint16_t weight;
             uint16_t cap;
         } credit;
+        struct xen_domctl_sched_bcredit {
+            uint16_t weight;
+            uint16_t cap;
+            uint16_t max_boost_period;
+            uint16_t boost_ratio;
+        } bcredit;
     } u;
 };
 typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;