/root/src/xen/xen/common/rcupdate.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Read-Copy Update mechanism for mutual exclusion |
3 | | * |
4 | | * This program is free software; you can redistribute it and/or modify |
5 | | * it under the terms of the GNU General Public License as published by |
6 | | * the Free Software Foundation; either version 2 of the License, or |
7 | | * (at your option) any later version. |
8 | | * |
9 | | * This program is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | * GNU General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU General Public License |
15 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
16 | | * |
17 | | * Copyright (C) IBM Corporation, 2001 |
18 | | * |
19 | | * Authors: Dipankar Sarma <dipankar@in.ibm.com> |
20 | | * Manfred Spraul <manfred@colorfullife.com> |
21 | | * |
22 | | * Modifications for Xen: Jose Renato Santos |
23 | | * Copyright (C) Hewlett-Packard, 2006 |
24 | | * |
25 | | * Based on the original work by Paul McKenney <paulmck@us.ibm.com> |
26 | | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. |
27 | | * Papers: |
28 | | * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf |
29 | | * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) |
30 | | * |
31 | | * For detailed explanation of Read-Copy Update mechanism see - |
32 | | * http://lse.sourceforge.net/locking/rcupdate.html |
33 | | */ |
34 | | #include <xen/types.h> |
35 | | #include <xen/kernel.h> |
36 | | #include <xen/init.h> |
37 | | #include <xen/spinlock.h> |
38 | | #include <xen/smp.h> |
39 | | #include <xen/rcupdate.h> |
40 | | #include <xen/sched.h> |
41 | | #include <asm/atomic.h> |
42 | | #include <xen/bitops.h> |
43 | | #include <xen/percpu.h> |
44 | | #include <xen/softirq.h> |
45 | | #include <xen/cpu.h> |
46 | | #include <xen/stop_machine.h> |
47 | | |
48 | | /* Global control variables for rcupdate callback mechanism. */ |
49 | | static struct rcu_ctrlblk { |
50 | | long cur; /* Current batch number. */ |
51 | | long completed; /* Number of the last completed batch */ |
52 | | int next_pending; /* Is the next batch already waiting? */ |
53 | | |
54 | | spinlock_t lock __cacheline_aligned; |
55 | | cpumask_t cpumask; /* CPUs that need to switch in order ... */ |
56 | | cpumask_t idle_cpumask; /* ... unless they are already idle */ |
57 | | /* for current batch to proceed. */ |
58 | | } __cacheline_aligned rcu_ctrlblk = { |
59 | | .cur = -300, |
60 | | .completed = -300, |
61 | | .lock = SPIN_LOCK_UNLOCKED, |
62 | | }; |
63 | | |
64 | | /* |
65 | | * Per-CPU data for Read-Copy Update. |
66 | | * nxtlist - new callbacks are added here |
67 | | * curlist - current batch for which quiescent cycle started if any |
68 | | */ |
69 | | struct rcu_data { |
70 | | /* 1) quiescent state handling : */ |
71 | | long quiescbatch; /* Batch # for grace period */ |
72 | | int qs_pending; /* core waits for quiesc state */ |
73 | | |
74 | | /* 2) batch handling */ |
75 | | long batch; /* Batch # for current RCU batch */ |
76 | | struct rcu_head *nxtlist; |
77 | | struct rcu_head **nxttail; |
78 | | long qlen; /* # of queued callbacks */ |
79 | | struct rcu_head *curlist; |
80 | | struct rcu_head **curtail; |
81 | | struct rcu_head *donelist; |
82 | | struct rcu_head **donetail; |
83 | | long blimit; /* Upper limit on a processed batch */ |
84 | | int cpu; |
85 | | struct rcu_head barrier; |
86 | | long last_rs_qlen; /* qlen during the last resched */ |
87 | | |
88 | | /* 3) idle CPUs handling */ |
89 | | struct timer idle_timer; |
90 | | bool idle_timer_active; |
91 | | }; |
92 | | |
93 | | /* |
94 | | * If a CPU with RCU callbacks queued goes idle, when the grace period is |
95 | | * not finished yet, how can we make sure that the callbacks will eventually |
96 | | * be executed? In Linux (2.6.21, the first "tickless idle" Linux kernel), |
97 | | * the periodic timer tick would not be stopped for such CPU. Here in Xen, |
98 | | * we (may) don't even have a periodic timer tick, so we need to use a |
99 | | * special purpose timer. |
100 | | * |
101 | | * Such timer: |
102 | | * 1) is armed only when a CPU with an RCU callback(s) queued goes idle |
103 | | * before the end of the current grace period (_not_ for any CPUs that |
104 | | * go idle!); |
105 | | * 2) when it fires, it is only re-armed if the grace period is still |
106 | | * running; |
107 | | * 3) it is stopped immediately, if the CPU wakes up from idle and |
108 | | * resumes 'normal' execution. |
109 | | * |
110 | | * About how far in the future the timer should be programmed each time, |
111 | | * it's hard to tell (guess!!). Since this mimics Linux's periodic timer |
112 | | * tick, take values used there as an indication. In Linux 2.6.21, tick |
113 | | * period can be 10ms, 4ms, 3.33ms or 1ms. |
114 | | * |
115 | | * By default, we use 10ms, to enable at least some power saving on the |
116 | | * CPU that is going idle. The user can change this, via a boot time |
117 | | * parameter, but only up to 100ms. |
118 | | */ |
119 | 1 | #define IDLE_TIMER_PERIOD_MAX MILLISECS(100) |
120 | 1 | #define IDLE_TIMER_PERIOD_DEFAULT MILLISECS(10) |
121 | | #define IDLE_TIMER_PERIOD_MIN MICROSECS(100) |
122 | | |
123 | | static s_time_t __read_mostly idle_timer_period; |
124 | | |
125 | | /* |
126 | | * Increment and decrement values for the idle timer handler. The algorithm |
127 | | * works as follows: |
128 | | * - if the timer actually fires, and it finds out that the grace period isn't |
129 | | * over yet, we add IDLE_TIMER_PERIOD_INCR to the timer's period; |
130 | | * - if the timer actually fires and it finds the grace period over, we |
131 | | * subtract IDLE_TIMER_PERIOD_DECR from the timer's period. |
132 | | */ |
133 | | #define IDLE_TIMER_PERIOD_INCR MILLISECS(10) |
134 | | #define IDLE_TIMER_PERIOD_DECR MICROSECS(100) |
135 | | |
136 | | static DEFINE_PER_CPU(struct rcu_data, rcu_data); |
137 | | |
138 | | static int blimit = 10; |
139 | | static int qhimark = 10000; |
140 | | static int qlowmark = 100; |
141 | | static int rsinterval = 1000; |
142 | | |
143 | | struct rcu_barrier_data { |
144 | | struct rcu_head head; |
145 | | atomic_t *cpu_count; |
146 | | }; |
147 | | |
148 | | static void rcu_barrier_callback(struct rcu_head *head) |
149 | 0 | { |
150 | 0 | struct rcu_barrier_data *data = container_of( |
151 | 0 | head, struct rcu_barrier_data, head); |
152 | 0 | atomic_inc(data->cpu_count); |
153 | 0 | } |
154 | | |
155 | | static int rcu_barrier_action(void *_cpu_count) |
156 | 0 | { |
157 | 0 | struct rcu_barrier_data data = { .cpu_count = _cpu_count }; |
158 | 0 |
|
159 | 0 | ASSERT(!local_irq_is_enabled()); |
160 | 0 | local_irq_enable(); |
161 | 0 |
|
162 | 0 | /* |
163 | 0 | * When callback is executed, all previously-queued RCU work on this CPU |
164 | 0 | * is completed. When all CPUs have executed their callback, data.cpu_count |
165 | 0 | * will have been incremented to include every online CPU. |
166 | 0 | */ |
167 | 0 | call_rcu(&data.head, rcu_barrier_callback); |
168 | 0 |
|
169 | 0 | while ( atomic_read(data.cpu_count) != num_online_cpus() ) |
170 | 0 | { |
171 | 0 | process_pending_softirqs(); |
172 | 0 | cpu_relax(); |
173 | 0 | } |
174 | 0 |
|
175 | 0 | local_irq_disable(); |
176 | 0 |
|
177 | 0 | return 0; |
178 | 0 | } |
179 | | |
180 | | int rcu_barrier(void) |
181 | 0 | { |
182 | 0 | atomic_t cpu_count = ATOMIC_INIT(0); |
183 | 0 | return stop_machine_run(rcu_barrier_action, &cpu_count, NR_CPUS); |
184 | 0 | } |
185 | | |
186 | | /* Is batch a before batch b ? */ |
187 | | static inline int rcu_batch_before(long a, long b) |
188 | 0 | { |
189 | 0 | return (a - b) < 0; |
190 | 0 | } |
191 | | |
192 | | static void force_quiescent_state(struct rcu_data *rdp, |
193 | | struct rcu_ctrlblk *rcp) |
194 | 0 | { |
195 | 0 | cpumask_t cpumask; |
196 | 0 | raise_softirq(SCHEDULE_SOFTIRQ); |
197 | 0 | if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { |
198 | 0 | rdp->last_rs_qlen = rdp->qlen; |
199 | 0 | /* |
200 | 0 | * Don't send IPI to itself. With irqs disabled, |
201 | 0 | * rdp->cpu is the current cpu. |
202 | 0 | */ |
203 | 0 | cpumask_andnot(&cpumask, &rcp->cpumask, cpumask_of(rdp->cpu)); |
204 | 0 | cpumask_raise_softirq(&cpumask, SCHEDULE_SOFTIRQ); |
205 | 0 | } |
206 | 0 | } |
207 | | |
208 | | /** |
209 | | * call_rcu - Queue an RCU callback for invocation after a grace period. |
210 | | * @head: structure to be used for queueing the RCU updates. |
211 | | * @func: actual update function to be invoked after the grace period |
212 | | * |
213 | | * The update function will be invoked some time after a full grace |
214 | | * period elapses, in other words after all currently executing RCU |
215 | | * read-side critical sections have completed. RCU read-side critical |
216 | | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), |
217 | | * and may be nested. |
218 | | */ |
219 | | void call_rcu(struct rcu_head *head, |
220 | | void (*func)(struct rcu_head *rcu)) |
221 | 0 | { |
222 | 0 | unsigned long flags; |
223 | 0 | struct rcu_data *rdp; |
224 | 0 |
|
225 | 0 | head->func = func; |
226 | 0 | head->next = NULL; |
227 | 0 | local_irq_save(flags); |
228 | 0 | rdp = &__get_cpu_var(rcu_data); |
229 | 0 | *rdp->nxttail = head; |
230 | 0 | rdp->nxttail = &head->next; |
231 | 0 | if (unlikely(++rdp->qlen > qhimark)) { |
232 | 0 | rdp->blimit = INT_MAX; |
233 | 0 | force_quiescent_state(rdp, &rcu_ctrlblk); |
234 | 0 | } |
235 | 0 | local_irq_restore(flags); |
236 | 0 | } |
237 | | |
238 | | /* |
239 | | * Invoke the completed RCU callbacks. They are expected to be in |
240 | | * a per-cpu list. |
241 | | */ |
242 | | static void rcu_do_batch(struct rcu_data *rdp) |
243 | 0 | { |
244 | 0 | struct rcu_head *next, *list; |
245 | 0 | int count = 0; |
246 | 0 |
|
247 | 0 | list = rdp->donelist; |
248 | 0 | while (list) { |
249 | 0 | next = rdp->donelist = list->next; |
250 | 0 | list->func(list); |
251 | 0 | list = next; |
252 | 0 | rdp->qlen--; |
253 | 0 | if (++count >= rdp->blimit) |
254 | 0 | break; |
255 | 0 | } |
256 | 0 | if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) |
257 | 0 | rdp->blimit = blimit; |
258 | 0 | if (!rdp->donelist) |
259 | 0 | rdp->donetail = &rdp->donelist; |
260 | 0 | else |
261 | 0 | raise_softirq(RCU_SOFTIRQ); |
262 | 0 | } |
263 | | |
264 | | /* |
265 | | * Grace period handling: |
266 | | * The grace period handling consists out of two steps: |
267 | | * - A new grace period is started. |
268 | | * This is done by rcu_start_batch. The start is not broadcasted to |
269 | | * all cpus, they must pick this up by comparing rcp->cur with |
270 | | * rdp->quiescbatch. All cpus are recorded in the |
271 | | * rcu_ctrlblk.cpumask bitmap. |
272 | | * - All cpus must go through a quiescent state. |
273 | | * Since the start of the grace period is not broadcasted, at least two |
274 | | * calls to rcu_check_quiescent_state are required: |
275 | | * The first call just notices that a new grace period is running. The |
276 | | * following calls check if there was a quiescent state since the beginning |
277 | | * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If |
278 | | * the bitmap is empty, then the grace period is completed. |
279 | | * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace |
280 | | * period (if necessary). |
281 | | */ |
282 | | /* |
283 | | * Register a new batch of callbacks, and start it up if there is currently no |
284 | | * active batch and the batch to be registered has not already occurred. |
285 | | * Caller must hold rcu_ctrlblk.lock. |
286 | | */ |
287 | | static void rcu_start_batch(struct rcu_ctrlblk *rcp) |
288 | 0 | { |
289 | 0 | if (rcp->next_pending && |
290 | 0 | rcp->completed == rcp->cur) { |
291 | 0 | rcp->next_pending = 0; |
292 | 0 | /* |
293 | 0 | * next_pending == 0 must be visible in |
294 | 0 | * __rcu_process_callbacks() before it can see new value of cur. |
295 | 0 | */ |
296 | 0 | smp_wmb(); |
297 | 0 | rcp->cur++; |
298 | 0 |
|
299 | 0 | /* |
300 | 0 | * Make sure the increment of rcp->cur is visible so, even if a |
301 | 0 | * CPU that is about to go idle, is captured inside rcp->cpumask, |
302 | 0 | * rcu_pending() will return false, which then means cpu_quiet() |
303 | 0 | * will be invoked, before the CPU would actually enter idle. |
304 | 0 | * |
305 | 0 | * This barrier is paired with the one in rcu_idle_enter(). |
306 | 0 | */ |
307 | 0 | smp_mb(); |
308 | 0 | cpumask_andnot(&rcp->cpumask, &cpu_online_map, &rcp->idle_cpumask); |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | | /* |
313 | | * cpu went through a quiescent state since the beginning of the grace period. |
314 | | * Clear it from the cpu mask and complete the grace period if it was the last |
315 | | * cpu. Start another grace period if someone has further entries pending |
316 | | */ |
317 | | static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) |
318 | 0 | { |
319 | 0 | cpumask_clear_cpu(cpu, &rcp->cpumask); |
320 | 0 | if (cpumask_empty(&rcp->cpumask)) { |
321 | 0 | /* batch completed ! */ |
322 | 0 | rcp->completed = rcp->cur; |
323 | 0 | rcu_start_batch(rcp); |
324 | 0 | } |
325 | 0 | } |
326 | | |
327 | | /* |
328 | | * Check if the cpu has gone through a quiescent state (say context |
329 | | * switch). If so and if it already hasn't done so in this RCU |
330 | | * quiescent cycle, then indicate that it has done so. |
331 | | */ |
332 | | static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, |
333 | | struct rcu_data *rdp) |
334 | 0 | { |
335 | 0 | if (rdp->quiescbatch != rcp->cur) { |
336 | 0 | /* start new grace period: */ |
337 | 0 | rdp->qs_pending = 1; |
338 | 0 | rdp->quiescbatch = rcp->cur; |
339 | 0 | return; |
340 | 0 | } |
341 | 0 |
|
342 | 0 | /* Grace period already completed for this cpu? |
343 | 0 | * qs_pending is checked instead of the actual bitmap to avoid |
344 | 0 | * cacheline trashing. |
345 | 0 | */ |
346 | 0 | if (!rdp->qs_pending) |
347 | 0 | return; |
348 | 0 |
|
349 | 0 | rdp->qs_pending = 0; |
350 | 0 |
|
351 | 0 | spin_lock(&rcp->lock); |
352 | 0 | /* |
353 | 0 | * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync |
354 | 0 | * during cpu startup. Ignore the quiescent state. |
355 | 0 | */ |
356 | 0 | if (likely(rdp->quiescbatch == rcp->cur)) |
357 | 0 | cpu_quiet(rdp->cpu, rcp); |
358 | 0 |
|
359 | 0 | spin_unlock(&rcp->lock); |
360 | 0 | } |
361 | | |
362 | | |
363 | | /* |
364 | | * This does the RCU processing work from softirq context. |
365 | | */ |
366 | | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, |
367 | | struct rcu_data *rdp) |
368 | 0 | { |
369 | 0 | if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { |
370 | 0 | *rdp->donetail = rdp->curlist; |
371 | 0 | rdp->donetail = rdp->curtail; |
372 | 0 | rdp->curlist = NULL; |
373 | 0 | rdp->curtail = &rdp->curlist; |
374 | 0 | } |
375 | 0 |
|
376 | 0 | local_irq_disable(); |
377 | 0 | if (rdp->nxtlist && !rdp->curlist) { |
378 | 0 | rdp->curlist = rdp->nxtlist; |
379 | 0 | rdp->curtail = rdp->nxttail; |
380 | 0 | rdp->nxtlist = NULL; |
381 | 0 | rdp->nxttail = &rdp->nxtlist; |
382 | 0 | local_irq_enable(); |
383 | 0 |
|
384 | 0 | /* |
385 | 0 | * start the next batch of callbacks |
386 | 0 | */ |
387 | 0 |
|
388 | 0 | /* determine batch number */ |
389 | 0 | rdp->batch = rcp->cur + 1; |
390 | 0 | /* see the comment and corresponding wmb() in |
391 | 0 | * the rcu_start_batch() |
392 | 0 | */ |
393 | 0 | smp_rmb(); |
394 | 0 |
|
395 | 0 | if (!rcp->next_pending) { |
396 | 0 | /* and start it/schedule start if it's a new batch */ |
397 | 0 | spin_lock(&rcp->lock); |
398 | 0 | rcp->next_pending = 1; |
399 | 0 | rcu_start_batch(rcp); |
400 | 0 | spin_unlock(&rcp->lock); |
401 | 0 | } |
402 | 0 | } else { |
403 | 0 | local_irq_enable(); |
404 | 0 | } |
405 | 0 | rcu_check_quiescent_state(rcp, rdp); |
406 | 0 | if (rdp->donelist) |
407 | 0 | rcu_do_batch(rdp); |
408 | 0 | } |
409 | | |
410 | | static void rcu_process_callbacks(void) |
411 | 0 | { |
412 | 0 | __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); |
413 | 0 | } |
414 | | |
415 | | static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) |
416 | 14.8M | { |
417 | 14.8M | /* This cpu has pending rcu entries and the grace period |
418 | 14.8M | * for them has completed. |
419 | 14.8M | */ |
420 | 14.8M | if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) |
421 | 0 | return 1; |
422 | 14.8M | |
423 | 14.8M | /* This cpu has no pending entries, but there are new entries */ |
424 | 15.2M | if (!rdp->curlist && rdp->nxtlist) |
425 | 0 | return 1; |
426 | 14.8M | |
427 | 14.8M | /* This cpu has finished callbacks to invoke */ |
428 | 14.8M | if (rdp->donelist) |
429 | 0 | return 1; |
430 | 14.8M | |
431 | 14.8M | /* The rcu core waits for a quiescent state from the cpu */ |
432 | 15.2M | if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) |
433 | 0 | return 1; |
434 | 14.8M | |
435 | 14.8M | /* nothing to do */ |
436 | 14.8M | return 0; |
437 | 14.8M | } |
438 | | |
439 | | int rcu_pending(int cpu) |
440 | 14.7M | { |
441 | 14.7M | return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)); |
442 | 14.7M | } |
443 | | |
444 | | /* |
445 | | * Check to see if any future RCU-related work will need to be done |
446 | | * by the current CPU, even if none need be done immediately, returning |
447 | | * 1 if so. This function is part of the RCU implementation; it is -not- |
448 | | * an exported member of the RCU API. |
449 | | */ |
450 | | int rcu_needs_cpu(int cpu) |
451 | 3.47M | { |
452 | 3.47M | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); |
453 | 3.47M | |
454 | 3.52M | return (rdp->curlist && !rdp->idle_timer_active) || rcu_pending(cpu); |
455 | 3.47M | } |
456 | | |
457 | | /* |
458 | | * Timer for making sure the CPU where a callback is queued does |
459 | | * periodically poke rcu_pedning(), so that it will invoke the callback |
460 | | * not too late after the end of the grace period. |
461 | | */ |
462 | | void rcu_idle_timer_start() |
463 | 1.89M | { |
464 | 1.89M | struct rcu_data *rdp = &this_cpu(rcu_data); |
465 | 1.89M | |
466 | 1.89M | /* |
467 | 1.89M | * Note that we don't check rcu_pending() here. In fact, we don't want |
468 | 1.89M | * the timer armed on CPUs that are in the process of quiescing while |
469 | 1.89M | * going idle, unless they really are the ones with a queued callback. |
470 | 1.89M | */ |
471 | 1.89M | if (likely(!rdp->curlist)) |
472 | 1.89M | return; |
473 | 1.89M | |
474 | 6.83k | set_timer(&rdp->idle_timer, NOW() + idle_timer_period); |
475 | 6.83k | rdp->idle_timer_active = true; |
476 | 6.83k | } |
477 | | |
478 | | void rcu_idle_timer_stop() |
479 | 1.88M | { |
480 | 1.88M | struct rcu_data *rdp = &this_cpu(rcu_data); |
481 | 1.88M | |
482 | 1.88M | if (likely(!rdp->idle_timer_active)) |
483 | 1.87M | return; |
484 | 1.88M | |
485 | 6.02k | rdp->idle_timer_active = false; |
486 | 6.02k | |
487 | 6.02k | /* |
488 | 6.02k | * In general, as the CPU is becoming active again, we don't need the |
489 | 6.02k | * idle timer, and so we want to stop it. |
490 | 6.02k | * |
491 | 6.02k | * However, in case we are here because idle_timer has (just) fired and |
492 | 6.02k | * has woken up the CPU, we skip stop_timer() now. In fact, when a CPU |
493 | 6.02k | * wakes up from idle, this code always runs before do_softirq() has the |
494 | 6.02k | * chance to check and deal with TIMER_SOFTIRQ. And if we stop the timer |
495 | 6.02k | * now, the TIMER_SOFTIRQ handler will see it as inactive, and will not |
496 | 6.02k | * call rcu_idle_timer_handler(). |
497 | 6.02k | * |
498 | 6.02k | * Therefore, if we see that the timer is expired already, we leave it |
499 | 6.02k | * alone. The TIMER_SOFTIRQ handler will then run the timer routine, and |
500 | 6.02k | * deactivate it. |
501 | 6.02k | */ |
502 | 6.02k | if ( !timer_is_expired(&rdp->idle_timer) ) |
503 | 0 | stop_timer(&rdp->idle_timer); |
504 | 6.02k | } |
505 | | |
506 | | static void rcu_idle_timer_handler(void* data) |
507 | 0 | { |
508 | 0 | perfc_incr(rcu_idle_timer); |
509 | 0 |
|
510 | 0 | if ( !cpumask_empty(&rcu_ctrlblk.cpumask) ) |
511 | 0 | idle_timer_period = min(idle_timer_period + IDLE_TIMER_PERIOD_INCR, |
512 | 0 | IDLE_TIMER_PERIOD_MAX); |
513 | 0 | else |
514 | 0 | idle_timer_period = max(idle_timer_period - IDLE_TIMER_PERIOD_DECR, |
515 | 0 | IDLE_TIMER_PERIOD_MIN); |
516 | 0 | } |
517 | | |
518 | | void rcu_check_callbacks(int cpu) |
519 | 0 | { |
520 | 0 | raise_softirq(RCU_SOFTIRQ); |
521 | 0 | } |
522 | | |
523 | | static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, |
524 | | struct rcu_head **tail) |
525 | 0 | { |
526 | 0 | local_irq_disable(); |
527 | 0 | *this_rdp->nxttail = list; |
528 | 0 | if (list) |
529 | 0 | this_rdp->nxttail = tail; |
530 | 0 | local_irq_enable(); |
531 | 0 | } |
532 | | |
533 | | static void rcu_offline_cpu(struct rcu_data *this_rdp, |
534 | | struct rcu_ctrlblk *rcp, struct rcu_data *rdp) |
535 | 0 | { |
536 | 0 | kill_timer(&rdp->idle_timer); |
537 | 0 |
|
538 | 0 | /* If the cpu going offline owns the grace period we can block |
539 | 0 | * indefinitely waiting for it, so flush it here. |
540 | 0 | */ |
541 | 0 | spin_lock(&rcp->lock); |
542 | 0 | if (rcp->cur != rcp->completed) |
543 | 0 | cpu_quiet(rdp->cpu, rcp); |
544 | 0 | spin_unlock(&rcp->lock); |
545 | 0 |
|
546 | 0 | rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); |
547 | 0 | rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); |
548 | 0 | rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); |
549 | 0 |
|
550 | 0 | local_irq_disable(); |
551 | 0 | this_rdp->qlen += rdp->qlen; |
552 | 0 | local_irq_enable(); |
553 | 0 | } |
554 | | |
555 | | static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, |
556 | | struct rcu_data *rdp) |
557 | 12 | { |
558 | 12 | memset(rdp, 0, sizeof(*rdp)); |
559 | 12 | rdp->curtail = &rdp->curlist; |
560 | 12 | rdp->nxttail = &rdp->nxtlist; |
561 | 12 | rdp->donetail = &rdp->donelist; |
562 | 12 | rdp->quiescbatch = rcp->completed; |
563 | 12 | rdp->qs_pending = 0; |
564 | 12 | rdp->cpu = cpu; |
565 | 12 | rdp->blimit = blimit; |
566 | 12 | init_timer(&rdp->idle_timer, rcu_idle_timer_handler, rdp, cpu); |
567 | 12 | } |
568 | | |
569 | | static int cpu_callback( |
570 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
571 | 34 | { |
572 | 34 | unsigned int cpu = (unsigned long)hcpu; |
573 | 34 | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); |
574 | 34 | |
575 | 34 | switch ( action ) |
576 | 34 | { |
577 | 12 | case CPU_UP_PREPARE: |
578 | 12 | rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp); |
579 | 12 | break; |
580 | 0 | case CPU_UP_CANCELED: |
581 | 0 | case CPU_DEAD: |
582 | 0 | rcu_offline_cpu(&this_cpu(rcu_data), &rcu_ctrlblk, rdp); |
583 | 0 | break; |
584 | 22 | default: |
585 | 22 | break; |
586 | 34 | } |
587 | 34 | |
588 | 34 | return NOTIFY_DONE; |
589 | 34 | } |
590 | | |
591 | | static struct notifier_block cpu_nfb = { |
592 | | .notifier_call = cpu_callback |
593 | | }; |
594 | | |
595 | | void __init rcu_init(void) |
596 | 1 | { |
597 | 1 | void *cpu = (void *)(long)smp_processor_id(); |
598 | 1 | static unsigned int __initdata idle_timer_period_ms = |
599 | 1 | IDLE_TIMER_PERIOD_DEFAULT / MILLISECS(1); |
600 | 1 | integer_param("rcu-idle-timer-period-ms", idle_timer_period_ms); |
601 | 1 | |
602 | 1 | /* We don't allow 0, or anything higher than IDLE_TIMER_PERIOD_MAX */ |
603 | 1 | if ( idle_timer_period_ms == 0 || |
604 | 1 | idle_timer_period_ms > IDLE_TIMER_PERIOD_MAX / MILLISECS(1) ) |
605 | 0 | { |
606 | 0 | idle_timer_period_ms = IDLE_TIMER_PERIOD_DEFAULT / MILLISECS(1); |
607 | 0 | printk("WARNING: rcu-idle-timer-period-ms outside of " |
608 | 0 | "(0,%"PRI_stime"]. Resetting it to %u.\n", |
609 | 0 | IDLE_TIMER_PERIOD_MAX / MILLISECS(1), idle_timer_period_ms); |
610 | 0 | } |
611 | 1 | idle_timer_period = MILLISECS(idle_timer_period_ms); |
612 | 1 | |
613 | 1 | cpumask_clear(&rcu_ctrlblk.idle_cpumask); |
614 | 1 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
615 | 1 | register_cpu_notifier(&cpu_nfb); |
616 | 1 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
617 | 1 | } |
618 | | |
619 | | /* |
620 | | * The CPU is becoming idle, so no more read side critical |
621 | | * sections, and one more step toward grace period. |
622 | | */ |
623 | | void rcu_idle_enter(unsigned int cpu) |
624 | 1.81M | { |
625 | 1.81M | ASSERT(!cpumask_test_cpu(cpu, &rcu_ctrlblk.idle_cpumask)); |
626 | 1.81M | cpumask_set_cpu(cpu, &rcu_ctrlblk.idle_cpumask); |
627 | 1.81M | /* |
628 | 1.81M | * If some other CPU is starting a new grace period, we'll notice that |
629 | 1.81M | * by seeing a new value in rcp->cur (different than our quiescbatch). |
630 | 1.81M | * That will force us all the way until cpu_quiet(), clearing our bit |
631 | 1.81M | * in rcp->cpumask, even in case we managed to get in there. |
632 | 1.81M | * |
633 | 1.81M | * Se the comment before cpumask_andnot() in rcu_start_batch(). |
634 | 1.81M | */ |
635 | 1.81M | smp_mb(); |
636 | 1.81M | } |
637 | | |
638 | | void rcu_idle_exit(unsigned int cpu) |
639 | 1.86M | { |
640 | 1.86M | ASSERT(cpumask_test_cpu(cpu, &rcu_ctrlblk.idle_cpumask)); |
641 | 1.86M | cpumask_clear_cpu(cpu, &rcu_ctrlblk.idle_cpumask); |
642 | 1.86M | } |