debuggers.hg

annotate xen/common/rcupdate.c @ 22848:6341fe0f4e5a

Added tag 4.1.0-rc2 for changeset 9dca60d88c63
author Keir Fraser <keir@xen.org>
date Tue Jan 25 14:06:55 2011 +0000 (2011-01-25)
parents 75b6287626ee
children
rev   line source
kaf24@13686 1 /*
kaf24@13686 2 * Read-Copy Update mechanism for mutual exclusion
kaf24@13686 3 *
kaf24@13686 4 * This program is free software; you can redistribute it and/or modify
kaf24@13686 5 * it under the terms of the GNU General Public License as published by
kaf24@13686 6 * the Free Software Foundation; either version 2 of the License, or
kaf24@13686 7 * (at your option) any later version.
kaf24@13686 8 *
kaf24@13686 9 * This program is distributed in the hope that it will be useful,
kaf24@13686 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
kaf24@13686 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kaf24@13686 12 * GNU General Public License for more details.
kaf24@13686 13 *
kaf24@13686 14 * You should have received a copy of the GNU General Public License
kaf24@13686 15 * along with this program; if not, write to the Free Software
kaf24@13686 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
kaf24@13686 17 *
kaf24@13686 18 * Copyright (C) IBM Corporation, 2001
kaf24@13686 19 *
kaf24@13686 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
kaf24@13686 21 * Manfred Spraul <manfred@colorfullife.com>
kaf24@13686 22 *
kaf24@13686 23 * Modifications for Xen: Jose Renato Santos
kaf24@13686 24 * Copyright (C) Hewlett-Packard, 2006
kaf24@13686 25 *
kaf24@13686 26 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
kaf24@13686 27 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
kaf24@13686 28 * Papers:
kaf24@13686 29 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
kaf24@13686 30 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
kaf24@13686 31 *
kaf24@13686 32 * For detailed explanation of Read-Copy Update mechanism see -
kaf24@13686 33 * http://lse.sourceforge.net/locking/rcupdate.html
kaf24@13686 34 */
kaf24@13686 35 #include <xen/types.h>
kaf24@13686 36 #include <xen/kernel.h>
kaf24@13686 37 #include <xen/init.h>
kaf24@13686 38 #include <xen/spinlock.h>
kaf24@13686 39 #include <xen/smp.h>
kaf24@13686 40 #include <xen/rcupdate.h>
kaf24@13686 41 #include <xen/sched.h>
kaf24@13686 42 #include <asm/atomic.h>
kaf24@13686 43 #include <xen/bitops.h>
kaf24@13686 44 #include <xen/percpu.h>
kaf24@13686 45 #include <xen/softirq.h>
keir@21429 46 #include <xen/cpu.h>
keir@22802 47 #include <xen/stop_machine.h>
kaf24@13686 48
kaf24@13686 49 /* Definition for rcupdate control block. */
kaf24@13686 50 struct rcu_ctrlblk rcu_ctrlblk = {
kaf24@13686 51 .cur = -300,
kaf24@13686 52 .completed = -300,
kaf24@13686 53 .lock = SPIN_LOCK_UNLOCKED,
kaf24@13686 54 .cpumask = CPU_MASK_NONE,
kaf24@13686 55 };
kaf24@13686 56
keir@21462 57 DEFINE_PER_CPU(struct rcu_data, rcu_data);
kaf24@13686 58
kaf24@13686 59 static int blimit = 10;
kaf24@13686 60 static int qhimark = 10000;
kaf24@13686 61 static int qlowmark = 100;
kaf24@13686 62 static int rsinterval = 1000;
kaf24@13686 63
keir@22810 64 struct rcu_barrier_data {
keir@22810 65 struct rcu_head head;
keir@22810 66 atomic_t *cpu_count;
keir@22810 67 };
keir@22810 68
keir@22810 69 static void rcu_barrier_callback(struct rcu_head *head)
keir@22802 70 {
keir@22810 71 struct rcu_barrier_data *data = container_of(
keir@22810 72 head, struct rcu_barrier_data, head);
keir@22810 73 atomic_inc(data->cpu_count);
keir@22810 74 }
keir@22810 75
keir@22810 76 static int rcu_barrier_action(void *_cpu_count)
keir@22810 77 {
keir@22810 78 struct rcu_barrier_data data = { .cpu_count = _cpu_count };
keir@22802 79
keir@22802 80 ASSERT(!local_irq_is_enabled());
keir@22802 81 local_irq_enable();
keir@22802 82
keir@22810 83 /*
keir@22810 84 * When callback is executed, all previously-queued RCU work on this CPU
keir@22810 85 * is completed. When all CPUs have executed their callback, data.cpu_count
keir@22810 86 * will have been incremented to include every online CPU.
keir@22810 87 */
keir@22810 88 call_rcu(&data.head, rcu_barrier_callback);
keir@22810 89
keir@22810 90 while ( atomic_read(data.cpu_count) != cpus_weight(cpu_online_map) )
keir@22802 91 {
keir@22802 92 process_pending_softirqs();
keir@22802 93 cpu_relax();
keir@22802 94 }
keir@22802 95
keir@22802 96 local_irq_disable();
keir@22802 97
keir@22802 98 return 0;
keir@22802 99 }
keir@22802 100
keir@22802 101 int rcu_barrier(void)
keir@22802 102 {
keir@22810 103 atomic_t cpu_count = ATOMIC_INIT(0);
keir@22810 104 return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS);
keir@22802 105 }
keir@22802 106
kaf24@13686 107 static void force_quiescent_state(struct rcu_data *rdp,
kaf24@13686 108 struct rcu_ctrlblk *rcp)
kaf24@13686 109 {
kaf24@13686 110 cpumask_t cpumask;
kaf24@13686 111 raise_softirq(SCHEDULE_SOFTIRQ);
kaf24@13686 112 if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
kaf24@13686 113 rdp->last_rs_qlen = rdp->qlen;
kaf24@13686 114 /*
kaf24@13686 115 * Don't send IPI to itself. With irqs disabled,
kaf24@13686 116 * rdp->cpu is the current cpu.
kaf24@13686 117 */
kaf24@13686 118 cpumask = rcp->cpumask;
kaf24@13686 119 cpu_clear(rdp->cpu, cpumask);
kaf24@13686 120 cpumask_raise_softirq(cpumask, SCHEDULE_SOFTIRQ);
kaf24@13686 121 }
kaf24@13686 122 }
kaf24@13686 123
kaf24@13686 124 /**
kaf24@13686 125 * call_rcu - Queue an RCU callback for invocation after a grace period.
kaf24@13686 126 * @head: structure to be used for queueing the RCU updates.
kaf24@13686 127 * @func: actual update function to be invoked after the grace period
kaf24@13686 128 *
kaf24@13686 129 * The update function will be invoked some time after a full grace
kaf24@13686 130 * period elapses, in other words after all currently executing RCU
kaf24@13686 131 * read-side critical sections have completed. RCU read-side critical
kaf24@13686 132 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
kaf24@13686 133 * and may be nested.
kaf24@13686 134 */
kaf24@13686 135 void fastcall call_rcu(struct rcu_head *head,
kaf24@13686 136 void (*func)(struct rcu_head *rcu))
kaf24@13686 137 {
kaf24@13686 138 unsigned long flags;
kaf24@13686 139 struct rcu_data *rdp;
kaf24@13686 140
kaf24@13686 141 head->func = func;
kaf24@13686 142 head->next = NULL;
kaf24@13686 143 local_irq_save(flags);
kaf24@13686 144 rdp = &__get_cpu_var(rcu_data);
kaf24@13686 145 *rdp->nxttail = head;
kaf24@13686 146 rdp->nxttail = &head->next;
kaf24@13686 147 if (unlikely(++rdp->qlen > qhimark)) {
kaf24@13686 148 rdp->blimit = INT_MAX;
kaf24@13686 149 force_quiescent_state(rdp, &rcu_ctrlblk);
kaf24@13686 150 }
kaf24@13686 151 local_irq_restore(flags);
kaf24@13686 152 }
kaf24@13686 153
kaf24@13686 154 /*
kaf24@13686 155 * Invoke the completed RCU callbacks. They are expected to be in
kaf24@13686 156 * a per-cpu list.
kaf24@13686 157 */
kaf24@13686 158 static void rcu_do_batch(struct rcu_data *rdp)
kaf24@13686 159 {
kaf24@13686 160 struct rcu_head *next, *list;
kaf24@13686 161 int count = 0;
kaf24@13686 162
kaf24@13686 163 list = rdp->donelist;
kaf24@13686 164 while (list) {
kaf24@13686 165 next = rdp->donelist = list->next;
kaf24@13686 166 list->func(list);
kaf24@13686 167 list = next;
kaf24@13686 168 rdp->qlen--;
kaf24@13686 169 if (++count >= rdp->blimit)
kaf24@13686 170 break;
kaf24@13686 171 }
kaf24@13686 172 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
kaf24@13686 173 rdp->blimit = blimit;
kaf24@13686 174 if (!rdp->donelist)
kaf24@13686 175 rdp->donetail = &rdp->donelist;
kaf24@13686 176 else
kaf24@13686 177 raise_softirq(RCU_SOFTIRQ);
kaf24@13686 178 }
kaf24@13686 179
kaf24@13686 180 /*
kaf24@13686 181 * Grace period handling:
kaf24@13686 182 * The grace period handling consists out of two steps:
kaf24@13686 183 * - A new grace period is started.
kaf24@13686 184 * This is done by rcu_start_batch. The start is not broadcasted to
kaf24@13686 185 * all cpus, they must pick this up by comparing rcp->cur with
kaf24@13686 186 * rdp->quiescbatch. All cpus are recorded in the
kaf24@13686 187 * rcu_ctrlblk.cpumask bitmap.
kaf24@13686 188 * - All cpus must go through a quiescent state.
kaf24@13686 189 * Since the start of the grace period is not broadcasted, at least two
kaf24@13686 190 * calls to rcu_check_quiescent_state are required:
kaf24@13686 191 * The first call just notices that a new grace period is running. The
kaf24@13686 192 * following calls check if there was a quiescent state since the beginning
kaf24@13686 193 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
kaf24@13686 194 * the bitmap is empty, then the grace period is completed.
kaf24@13686 195 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
kaf24@13686 196 * period (if necessary).
kaf24@13686 197 */
kaf24@13686 198 /*
kaf24@13686 199 * Register a new batch of callbacks, and start it up if there is currently no
kaf24@13686 200 * active batch and the batch to be registered has not already occurred.
kaf24@13686 201 * Caller must hold rcu_ctrlblk.lock.
kaf24@13686 202 */
kaf24@13686 203 static void rcu_start_batch(struct rcu_ctrlblk *rcp)
kaf24@13686 204 {
kaf24@13686 205 if (rcp->next_pending &&
kaf24@13686 206 rcp->completed == rcp->cur) {
kaf24@13686 207 rcp->next_pending = 0;
kaf24@13686 208 /*
kaf24@13686 209 * next_pending == 0 must be visible in
kaf24@13686 210 * __rcu_process_callbacks() before it can see new value of cur.
kaf24@13686 211 */
kaf24@13686 212 smp_wmb();
kaf24@13686 213 rcp->cur++;
kaf24@13686 214
kaf24@13686 215 rcp->cpumask = cpu_online_map;
kaf24@13686 216 }
kaf24@13686 217 }
kaf24@13686 218
kaf24@13686 219 /*
kaf24@13686 220 * cpu went through a quiescent state since the beginning of the grace period.
kaf24@13686 221 * Clear it from the cpu mask and complete the grace period if it was the last
kaf24@13686 222 * cpu. Start another grace period if someone has further entries pending
kaf24@13686 223 */
kaf24@13686 224 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
kaf24@13686 225 {
kaf24@13686 226 cpu_clear(cpu, rcp->cpumask);
kaf24@13686 227 if (cpus_empty(rcp->cpumask)) {
kaf24@13686 228 /* batch completed ! */
kaf24@13686 229 rcp->completed = rcp->cur;
kaf24@13686 230 rcu_start_batch(rcp);
kaf24@13686 231 }
kaf24@13686 232 }
kaf24@13686 233
kaf24@13686 234 /*
kaf24@13686 235 * Check if the cpu has gone through a quiescent state (say context
kaf24@13686 236 * switch). If so and if it already hasn't done so in this RCU
kaf24@13686 237 * quiescent cycle, then indicate that it has done so.
kaf24@13686 238 */
kaf24@13686 239 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
kaf24@13686 240 struct rcu_data *rdp)
kaf24@13686 241 {
kaf24@13686 242 if (rdp->quiescbatch != rcp->cur) {
kaf24@13686 243 /* start new grace period: */
kaf24@13686 244 rdp->qs_pending = 1;
kaf24@13686 245 rdp->quiescbatch = rcp->cur;
kaf24@13686 246 return;
kaf24@13686 247 }
kaf24@13686 248
kaf24@13686 249 /* Grace period already completed for this cpu?
kaf24@13686 250 * qs_pending is checked instead of the actual bitmap to avoid
kaf24@13686 251 * cacheline trashing.
kaf24@13686 252 */
kaf24@13686 253 if (!rdp->qs_pending)
kaf24@13686 254 return;
kaf24@13686 255
kaf24@13686 256 rdp->qs_pending = 0;
kaf24@13686 257
kaf24@13686 258 spin_lock(&rcp->lock);
kaf24@13686 259 /*
kaf24@13686 260 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
kaf24@13686 261 * during cpu startup. Ignore the quiescent state.
kaf24@13686 262 */
kaf24@13686 263 if (likely(rdp->quiescbatch == rcp->cur))
kaf24@13686 264 cpu_quiet(rdp->cpu, rcp);
kaf24@13686 265
kaf24@13686 266 spin_unlock(&rcp->lock);
kaf24@13686 267 }
kaf24@13686 268
kaf24@13686 269
kaf24@13686 270 /*
kaf24@13686 271 * This does the RCU processing work from softirq context.
kaf24@13686 272 */
kaf24@13686 273 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
kaf24@13686 274 struct rcu_data *rdp)
kaf24@13686 275 {
kaf24@13686 276 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
kaf24@13686 277 *rdp->donetail = rdp->curlist;
kaf24@13686 278 rdp->donetail = rdp->curtail;
kaf24@13686 279 rdp->curlist = NULL;
kaf24@13686 280 rdp->curtail = &rdp->curlist;
kaf24@13686 281 }
kaf24@13686 282
kaf24@13686 283 local_irq_disable();
kaf24@13686 284 if (rdp->nxtlist && !rdp->curlist) {
kaf24@13686 285 rdp->curlist = rdp->nxtlist;
kaf24@13686 286 rdp->curtail = rdp->nxttail;
kaf24@13686 287 rdp->nxtlist = NULL;
kaf24@13686 288 rdp->nxttail = &rdp->nxtlist;
kaf24@13686 289 local_irq_enable();
kaf24@13686 290
kaf24@13686 291 /*
kaf24@13686 292 * start the next batch of callbacks
kaf24@13686 293 */
kaf24@13686 294
kaf24@13686 295 /* determine batch number */
kaf24@13686 296 rdp->batch = rcp->cur + 1;
kaf24@13686 297 /* see the comment and corresponding wmb() in
kaf24@13686 298 * the rcu_start_batch()
kaf24@13686 299 */
kaf24@13686 300 smp_rmb();
kaf24@13686 301
kaf24@13686 302 if (!rcp->next_pending) {
kaf24@13686 303 /* and start it/schedule start if it's a new batch */
kaf24@13686 304 spin_lock(&rcp->lock);
kaf24@13686 305 rcp->next_pending = 1;
kaf24@13686 306 rcu_start_batch(rcp);
kaf24@13686 307 spin_unlock(&rcp->lock);
kaf24@13686 308 }
kaf24@13686 309 } else {
kaf24@13686 310 local_irq_enable();
kaf24@13686 311 }
kaf24@13686 312 rcu_check_quiescent_state(rcp, rdp);
kaf24@13686 313 if (rdp->donelist)
kaf24@13686 314 rcu_do_batch(rdp);
kaf24@13686 315 }
kaf24@13686 316
kaf24@13686 317 static void rcu_process_callbacks(void)
kaf24@13686 318 {
kaf24@13686 319 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
kaf24@13686 320 }
kaf24@13686 321
kaf24@13686 322 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
kaf24@13686 323 {
kaf24@13686 324 /* This cpu has pending rcu entries and the grace period
kaf24@13686 325 * for them has completed.
kaf24@13686 326 */
kaf24@13686 327 if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
kaf24@13686 328 return 1;
kaf24@13686 329
kaf24@13686 330 /* This cpu has no pending entries, but there are new entries */
kaf24@13686 331 if (!rdp->curlist && rdp->nxtlist)
kaf24@13686 332 return 1;
kaf24@13686 333
kaf24@13686 334 /* This cpu has finished callbacks to invoke */
kaf24@13686 335 if (rdp->donelist)
kaf24@13686 336 return 1;
kaf24@13686 337
kaf24@13686 338 /* The rcu core waits for a quiescent state from the cpu */
kaf24@13686 339 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
kaf24@13686 340 return 1;
kaf24@13686 341
kaf24@13686 342 /* nothing to do */
kaf24@13686 343 return 0;
kaf24@13686 344 }
kaf24@13686 345
kaf24@13686 346 int rcu_pending(int cpu)
kaf24@13686 347 {
kaf24@13686 348 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu));
kaf24@13686 349 }
kaf24@13686 350
kaf24@13686 351 /*
kaf24@13686 352 * Check to see if any future RCU-related work will need to be done
kaf24@13686 353 * by the current CPU, even if none need be done immediately, returning
kaf24@13686 354 * 1 if so. This function is part of the RCU implementation; it is -not-
kaf24@13686 355 * an exported member of the RCU API.
kaf24@13686 356 */
kaf24@13686 357 int rcu_needs_cpu(int cpu)
kaf24@13686 358 {
kaf24@13686 359 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
kaf24@13686 360
kaf24@13686 361 return (!!rdp->curlist || rcu_pending(cpu));
kaf24@13686 362 }
kaf24@13686 363
kaf24@13686 364 void rcu_check_callbacks(int cpu)
kaf24@13686 365 {
kaf24@13686 366 raise_softirq(RCU_SOFTIRQ);
kaf24@13686 367 }
kaf24@13686 368
keir@21474 369 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
keir@21474 370 struct rcu_head **tail)
keir@21474 371 {
keir@21474 372 local_irq_disable();
keir@21474 373 *this_rdp->nxttail = list;
keir@21474 374 if (list)
keir@21474 375 this_rdp->nxttail = tail;
keir@21474 376 local_irq_enable();
keir@21474 377 }
keir@21474 378
keir@21474 379 static void rcu_offline_cpu(struct rcu_data *this_rdp,
keir@21474 380 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
keir@21474 381 {
keir@21474 382 /* If the cpu going offline owns the grace period we can block
keir@21474 383 * indefinitely waiting for it, so flush it here.
keir@21474 384 */
keir@21474 385 spin_lock(&rcp->lock);
keir@21474 386 if (rcp->cur != rcp->completed)
keir@21474 387 cpu_quiet(rdp->cpu, rcp);
keir@21474 388 spin_unlock(&rcp->lock);
keir@21474 389
keir@21474 390 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
keir@21474 391 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
keir@21474 392 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
keir@21474 393
keir@21474 394 local_irq_disable();
keir@21474 395 this_rdp->qlen += rdp->qlen;
keir@21474 396 local_irq_enable();
keir@21474 397 }
keir@21474 398
kaf24@13686 399 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
kaf24@13686 400 struct rcu_data *rdp)
kaf24@13686 401 {
kaf24@13686 402 memset(rdp, 0, sizeof(*rdp));
kaf24@13686 403 rdp->curtail = &rdp->curlist;
kaf24@13686 404 rdp->nxttail = &rdp->nxtlist;
kaf24@13686 405 rdp->donetail = &rdp->donelist;
kaf24@13686 406 rdp->quiescbatch = rcp->completed;
kaf24@13686 407 rdp->qs_pending = 0;
kaf24@13686 408 rdp->cpu = cpu;
kaf24@13686 409 rdp->blimit = blimit;
kaf24@13686 410 }
kaf24@13686 411
keir@21429 412 static int cpu_callback(
keir@21429 413 struct notifier_block *nfb, unsigned long action, void *hcpu)
kaf24@13686 414 {
keir@21429 415 unsigned int cpu = (unsigned long)hcpu;
keir@21474 416 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
kaf24@13686 417
keir@21429 418 switch ( action )
keir@21429 419 {
keir@21474 420 case CPU_UP_PREPARE:
keir@21429 421 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
keir@21429 422 break;
keir@21474 423 case CPU_UP_CANCELED:
keir@21474 424 case CPU_DEAD:
keir@21474 425 rcu_offline_cpu(&this_cpu(rcu_data), &rcu_ctrlblk, rdp);
keir@21474 426 break;
keir@21429 427 default:
keir@21429 428 break;
keir@21429 429 }
keir@21429 430
keir@21429 431 return NOTIFY_DONE;
kaf24@13686 432 }
kaf24@13686 433
keir@21429 434 static struct notifier_block cpu_nfb = {
keir@21429 435 .notifier_call = cpu_callback
keir@21429 436 };
keir@21429 437
keir@15081 438 void __init rcu_init(void)
kaf24@13686 439 {
keir@21429 440 void *cpu = (void *)(long)smp_processor_id();
keir@21436 441 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
keir@21429 442 register_cpu_notifier(&cpu_nfb);
kaf24@13686 443 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
kaf24@13686 444 }