/root/src/xen/xen/common/spinlock.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <xen/lib.h> |
2 | | #include <xen/irq.h> |
3 | | #include <xen/smp.h> |
4 | | #include <xen/time.h> |
5 | | #include <xen/spinlock.h> |
6 | | #include <xen/guest_access.h> |
7 | | #include <xen/preempt.h> |
8 | | #include <public/sysctl.h> |
9 | | #include <asm/processor.h> |
10 | | #include <asm/atomic.h> |
11 | | |
12 | | #ifndef NDEBUG |
13 | | |
14 | | static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0); |
15 | | |
16 | | static void check_lock(struct lock_debug *debug) |
17 | 54.6M | { |
18 | 54.6M | int irq_safe = !local_irq_is_enabled(); |
19 | 54.6M | |
20 | 54.6M | if ( unlikely(atomic_read(&spin_debug) <= 0) ) |
21 | 4.17M | return; |
22 | 54.6M | |
23 | 54.6M | /* A few places take liberties with this. */ |
24 | 54.6M | /* BUG_ON(in_irq() && !irq_safe); */ |
25 | 54.6M | |
26 | 54.6M | /* |
27 | 54.6M | * We partition locks into IRQ-safe (always held with IRQs disabled) and |
28 | 54.6M | * IRQ-unsafe (always held with IRQs enabled) types. The convention for |
29 | 54.6M | * every lock must be consistently observed else we can deadlock in |
30 | 54.6M | * IRQ-context rendezvous functions (a rendezvous which gets every CPU |
31 | 54.6M | * into IRQ context before any CPU is released from the rendezvous). |
32 | 54.6M | * |
33 | 54.6M | * If we can mix IRQ-disabled and IRQ-enabled callers, the following can |
34 | 54.6M | * happen: |
35 | 54.6M | * * Lock is held by CPU A, with IRQs enabled |
36 | 54.6M | * * CPU B is spinning on same lock, with IRQs disabled |
37 | 54.6M | * * Rendezvous starts -- CPU A takes interrupt and enters rendezbous spin |
38 | 54.6M | * * DEADLOCK -- CPU B will never enter rendezvous, CPU A will never exit |
39 | 54.6M | * the rendezvous, and will hence never release the lock. |
40 | 54.6M | * |
41 | 54.6M | * To guard against this subtle bug we latch the IRQ safety of every |
42 | 54.6M | * spinlock in the system, on first use. |
43 | 54.6M | */ |
44 | 50.4M | if ( unlikely(debug->irq_safe != irq_safe) ) |
45 | 400 | { |
46 | 400 | int seen = cmpxchg(&debug->irq_safe, -1, irq_safe); |
47 | 400 | BUG_ON(seen == !irq_safe); |
48 | 400 | } |
49 | 50.4M | } |
50 | | |
51 | | static void check_barrier(struct lock_debug *debug) |
52 | 0 | { |
53 | 0 | if ( unlikely(atomic_read(&spin_debug) <= 0) ) |
54 | 0 | return; |
55 | 0 |
|
56 | 0 | /* |
57 | 0 | * For a barrier, we have a relaxed IRQ-safety-consistency check. |
58 | 0 | * |
59 | 0 | * It is always safe to spin at the barrier with IRQs enabled -- that does |
60 | 0 | * not prevent us from entering an IRQ-context rendezvous, and nor are |
61 | 0 | * we preventing anyone else from doing so (since we do not actually |
62 | 0 | * acquire the lock during a barrier operation). |
63 | 0 | * |
64 | 0 | * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that |
65 | 0 | * is clearly wrong, for the same reason outlined in check_lock() above. |
66 | 0 | */ |
67 | 0 | BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0)); |
68 | 0 | } |
69 | | |
70 | | void spin_debug_enable(void) |
71 | 12 | { |
72 | 12 | atomic_inc(&spin_debug); |
73 | 12 | } |
74 | | |
75 | | void spin_debug_disable(void) |
76 | 11 | { |
77 | 11 | atomic_dec(&spin_debug); |
78 | 11 | } |
79 | | |
80 | | #else /* defined(NDEBUG) */ |
81 | | |
82 | | #define check_lock(l) ((void)0) |
83 | | #define check_barrier(l) ((void)0) |
84 | | |
85 | | #endif |
86 | | |
87 | | #ifdef CONFIG_LOCK_PROFILE |
88 | | |
89 | | #define LOCK_PROFILE_REL \ |
90 | | if (lock->profile) \ |
91 | | { \ |
92 | | lock->profile->time_hold += NOW() - lock->profile->time_locked; \ |
93 | | lock->profile->lock_cnt++; \ |
94 | | } |
95 | | #define LOCK_PROFILE_VAR s_time_t block = 0 |
96 | | #define LOCK_PROFILE_BLOCK block = block ? : NOW(); |
97 | | #define LOCK_PROFILE_GOT \ |
98 | | if (lock->profile) \ |
99 | | { \ |
100 | | lock->profile->time_locked = NOW(); \ |
101 | | if (block) \ |
102 | | { \ |
103 | | lock->profile->time_block += lock->profile->time_locked - block; \ |
104 | | lock->profile->block_cnt++; \ |
105 | | } \ |
106 | | } |
107 | | |
108 | | #else |
109 | | |
110 | | #define LOCK_PROFILE_REL |
111 | | #define LOCK_PROFILE_VAR |
112 | | #define LOCK_PROFILE_BLOCK |
113 | | #define LOCK_PROFILE_GOT |
114 | | |
115 | | #endif |
116 | | |
117 | | static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t) |
118 | 8.79k | { |
119 | 8.79k | spinlock_tickets_t v; |
120 | 8.79k | |
121 | 8.79k | smp_rmb(); |
122 | 8.79k | v.head_tail = read_atomic(&t->head_tail); |
123 | 8.79k | return v; |
124 | 8.79k | } |
125 | | |
126 | | static always_inline u16 observe_head(spinlock_tickets_t *t) |
127 | 58.0M | { |
128 | 58.0M | smp_rmb(); |
129 | 58.0M | return read_atomic(&t->head); |
130 | 58.0M | } |
131 | | |
132 | | void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data) |
133 | 48.7M | { |
134 | 48.7M | spinlock_tickets_t tickets = SPINLOCK_TICKET_INC; |
135 | 48.7M | LOCK_PROFILE_VAR; |
136 | 48.7M | |
137 | 48.7M | check_lock(&lock->debug); |
138 | 48.7M | tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail, |
139 | 48.7M | tickets.head_tail); |
140 | 49.7M | while ( tickets.tail != observe_head(&lock->tickets) ) |
141 | 941k | { |
142 | 941k | LOCK_PROFILE_BLOCK; |
143 | 941k | if ( unlikely(cb) ) |
144 | 29 | cb(data); |
145 | 941k | arch_lock_relax(); |
146 | 941k | } |
147 | 48.7M | LOCK_PROFILE_GOT; |
148 | 48.7M | preempt_disable(); |
149 | 48.7M | arch_lock_acquire_barrier(); |
150 | 48.7M | } |
151 | | |
152 | | void _spin_lock(spinlock_t *lock) |
153 | 48.8M | { |
154 | 48.8M | _spin_lock_cb(lock, NULL, NULL); |
155 | 48.8M | } |
156 | | |
157 | | void _spin_lock_irq(spinlock_t *lock) |
158 | 13.0M | { |
159 | 13.0M | ASSERT(local_irq_is_enabled()); |
160 | 13.0M | local_irq_disable(); |
161 | 13.0M | _spin_lock(lock); |
162 | 13.0M | } |
163 | | |
164 | | unsigned long _spin_lock_irqsave(spinlock_t *lock) |
165 | 734k | { |
166 | 734k | unsigned long flags; |
167 | 734k | |
168 | 734k | local_irq_save(flags); |
169 | 734k | _spin_lock(lock); |
170 | 734k | return flags; |
171 | 734k | } |
172 | | |
173 | | void _spin_unlock(spinlock_t *lock) |
174 | 48.9M | { |
175 | 48.9M | arch_lock_release_barrier(); |
176 | 48.9M | preempt_enable(); |
177 | 48.9M | LOCK_PROFILE_REL; |
178 | 48.9M | add_sized(&lock->tickets.head, 1); |
179 | 48.4M | arch_lock_signal(); |
180 | 48.4M | } |
181 | | |
182 | | void _spin_unlock_irq(spinlock_t *lock) |
183 | 13.2M | { |
184 | 13.2M | _spin_unlock(lock); |
185 | 13.2M | local_irq_enable(); |
186 | 13.2M | } |
187 | | |
188 | | void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) |
189 | 734k | { |
190 | 734k | _spin_unlock(lock); |
191 | 734k | local_irq_restore(flags); |
192 | 734k | } |
193 | | |
194 | | int _spin_is_locked(spinlock_t *lock) |
195 | 5.60M | { |
196 | 5.60M | check_lock(&lock->debug); |
197 | 5.60M | |
198 | 5.60M | /* |
199 | 5.60M | * Recursive locks may be locked by another CPU, yet we return |
200 | 5.60M | * "false" here, making this function suitable only for use in |
201 | 5.60M | * ASSERT()s and alike. |
202 | 5.60M | */ |
203 | 5.60M | return lock->recurse_cpu == SPINLOCK_NO_CPU |
204 | 5.53M | ? lock->tickets.head != lock->tickets.tail |
205 | 67.2k | : lock->recurse_cpu == smp_processor_id(); |
206 | 5.60M | } |
207 | | |
208 | | int _spin_trylock(spinlock_t *lock) |
209 | 8.79k | { |
210 | 8.79k | spinlock_tickets_t old, new; |
211 | 8.79k | |
212 | 8.79k | check_lock(&lock->debug); |
213 | 8.79k | old = observe_lock(&lock->tickets); |
214 | 8.79k | if ( old.head != old.tail ) |
215 | 982 | return 0; |
216 | 7.81k | new = old; |
217 | 7.81k | new.tail++; |
218 | 7.81k | if ( cmpxchg(&lock->tickets.head_tail, |
219 | 7.81k | old.head_tail, new.head_tail) != old.head_tail ) |
220 | 66 | return 0; |
221 | 7.81k | #ifdef CONFIG_LOCK_PROFILE |
222 | | if (lock->profile) |
223 | | lock->profile->time_locked = NOW(); |
224 | | #endif |
225 | 7.75k | preempt_disable(); |
226 | 7.75k | /* |
227 | 7.75k | * cmpxchg() is a full barrier so no need for an |
228 | 7.75k | * arch_lock_acquire_barrier(). |
229 | 7.75k | */ |
230 | 7.75k | return 1; |
231 | 7.81k | } |
232 | | |
233 | | void _spin_barrier(spinlock_t *lock) |
234 | 0 | { |
235 | 0 | spinlock_tickets_t sample; |
236 | 0 | #ifdef CONFIG_LOCK_PROFILE |
237 | | s_time_t block = NOW(); |
238 | | #endif |
239 | 0 |
|
240 | 0 | check_barrier(&lock->debug); |
241 | 0 | smp_mb(); |
242 | 0 | sample = observe_lock(&lock->tickets); |
243 | 0 | if ( sample.head != sample.tail ) |
244 | 0 | { |
245 | 0 | while ( observe_head(&lock->tickets) == sample.head ) |
246 | 0 | arch_lock_relax(); |
247 | 0 | #ifdef CONFIG_LOCK_PROFILE |
248 | | if ( lock->profile ) |
249 | | { |
250 | | lock->profile->time_block += NOW() - block; |
251 | | lock->profile->block_cnt++; |
252 | | } |
253 | | #endif |
254 | 0 | } |
255 | 0 | smp_mb(); |
256 | 0 | } |
257 | | |
258 | | int _spin_trylock_recursive(spinlock_t *lock) |
259 | 11 | { |
260 | 11 | unsigned int cpu = smp_processor_id(); |
261 | 11 | |
262 | 11 | /* Don't allow overflow of recurse_cpu field. */ |
263 | 11 | BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU); |
264 | 11 | |
265 | 11 | check_lock(&lock->debug); |
266 | 11 | |
267 | 11 | if ( likely(lock->recurse_cpu != cpu) ) |
268 | 11 | { |
269 | 11 | if ( !spin_trylock(lock) ) |
270 | 0 | return 0; |
271 | 11 | lock->recurse_cpu = cpu; |
272 | 11 | } |
273 | 11 | |
274 | 11 | /* We support only fairly shallow recursion, else the counter overflows. */ |
275 | 11 | ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE); |
276 | 11 | lock->recurse_cnt++; |
277 | 11 | |
278 | 11 | return 1; |
279 | 11 | } |
280 | | |
281 | | void _spin_lock_recursive(spinlock_t *lock) |
282 | 36.7k | { |
283 | 36.7k | unsigned int cpu = smp_processor_id(); |
284 | 36.7k | |
285 | 36.7k | if ( likely(lock->recurse_cpu != cpu) ) |
286 | 36.7k | { |
287 | 36.7k | _spin_lock(lock); |
288 | 36.7k | lock->recurse_cpu = cpu; |
289 | 36.7k | } |
290 | 36.7k | |
291 | 36.7k | /* We support only fairly shallow recursion, else the counter overflows. */ |
292 | 36.7k | ASSERT(lock->recurse_cnt < SPINLOCK_MAX_RECURSE); |
293 | 36.7k | lock->recurse_cnt++; |
294 | 36.7k | } |
295 | | |
296 | | void _spin_unlock_recursive(spinlock_t *lock) |
297 | 36.7k | { |
298 | 36.7k | if ( likely(--lock->recurse_cnt == 0) ) |
299 | 36.7k | { |
300 | 36.7k | lock->recurse_cpu = SPINLOCK_NO_CPU; |
301 | 36.7k | spin_unlock(lock); |
302 | 36.7k | } |
303 | 36.7k | } |
304 | | |
305 | | #ifdef CONFIG_LOCK_PROFILE |
306 | | |
307 | | struct lock_profile_anc { |
308 | | struct lock_profile_qhead *head_q; /* first head of this type */ |
309 | | char *name; /* descriptive string for print */ |
310 | | }; |
311 | | |
312 | | typedef void lock_profile_subfunc( |
313 | | struct lock_profile *, int32_t, int32_t, void *); |
314 | | |
315 | | extern struct lock_profile *__lock_profile_start; |
316 | | extern struct lock_profile *__lock_profile_end; |
317 | | |
318 | | static s_time_t lock_profile_start; |
319 | | static struct lock_profile_anc lock_profile_ancs[LOCKPROF_TYPE_N]; |
320 | | static struct lock_profile_qhead lock_profile_glb_q; |
321 | | static spinlock_t lock_profile_lock = SPIN_LOCK_UNLOCKED; |
322 | | |
323 | | static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par) |
324 | | { |
325 | | int i; |
326 | | struct lock_profile_qhead *hq; |
327 | | struct lock_profile *eq; |
328 | | |
329 | | spin_lock(&lock_profile_lock); |
330 | | for ( i = 0; i < LOCKPROF_TYPE_N; i++ ) |
331 | | for ( hq = lock_profile_ancs[i].head_q; hq; hq = hq->head_q ) |
332 | | for ( eq = hq->elem_q; eq; eq = eq->next ) |
333 | | sub(eq, i, hq->idx, par); |
334 | | spin_unlock(&lock_profile_lock); |
335 | | } |
336 | | |
337 | | static void spinlock_profile_print_elem(struct lock_profile *data, |
338 | | int32_t type, int32_t idx, void *par) |
339 | | { |
340 | | if ( type == LOCKPROF_TYPE_GLOBAL ) |
341 | | printk("%s %s:\n", lock_profile_ancs[type].name, data->name); |
342 | | else |
343 | | printk("%s %d %s:\n", lock_profile_ancs[type].name, idx, data->name); |
344 | | printk(" lock:%12"PRId64"(%08X:%08X), block:%12"PRId64"(%08X:%08X)\n", |
345 | | data->lock_cnt, (u32)(data->time_hold >> 32), (u32)data->time_hold, |
346 | | data->block_cnt, (u32)(data->time_block >> 32), |
347 | | (u32)data->time_block); |
348 | | } |
349 | | |
350 | | void spinlock_profile_printall(unsigned char key) |
351 | | { |
352 | | s_time_t now = NOW(); |
353 | | s_time_t diff; |
354 | | |
355 | | diff = now - lock_profile_start; |
356 | | printk("Xen lock profile info SHOW (now = %08X:%08X, " |
357 | | "total = %08X:%08X)\n", (u32)(now>>32), (u32)now, |
358 | | (u32)(diff>>32), (u32)diff); |
359 | | spinlock_profile_iterate(spinlock_profile_print_elem, NULL); |
360 | | } |
361 | | |
362 | | static void spinlock_profile_reset_elem(struct lock_profile *data, |
363 | | int32_t type, int32_t idx, void *par) |
364 | | { |
365 | | data->lock_cnt = 0; |
366 | | data->block_cnt = 0; |
367 | | data->time_hold = 0; |
368 | | data->time_block = 0; |
369 | | } |
370 | | |
371 | | void spinlock_profile_reset(unsigned char key) |
372 | | { |
373 | | s_time_t now = NOW(); |
374 | | |
375 | | if ( key != '\0' ) |
376 | | printk("Xen lock profile info RESET (now = %08X:%08X)\n", |
377 | | (u32)(now>>32), (u32)now); |
378 | | lock_profile_start = now; |
379 | | spinlock_profile_iterate(spinlock_profile_reset_elem, NULL); |
380 | | } |
381 | | |
382 | | typedef struct { |
383 | | struct xen_sysctl_lockprof_op *pc; |
384 | | int rc; |
385 | | } spinlock_profile_ucopy_t; |
386 | | |
387 | | static void spinlock_profile_ucopy_elem(struct lock_profile *data, |
388 | | int32_t type, int32_t idx, void *par) |
389 | | { |
390 | | spinlock_profile_ucopy_t *p = par; |
391 | | struct xen_sysctl_lockprof_data elem; |
392 | | |
393 | | if ( p->rc ) |
394 | | return; |
395 | | |
396 | | if ( p->pc->nr_elem < p->pc->max_elem ) |
397 | | { |
398 | | safe_strcpy(elem.name, data->name); |
399 | | elem.type = type; |
400 | | elem.idx = idx; |
401 | | elem.lock_cnt = data->lock_cnt; |
402 | | elem.block_cnt = data->block_cnt; |
403 | | elem.lock_time = data->time_hold; |
404 | | elem.block_time = data->time_block; |
405 | | if ( copy_to_guest_offset(p->pc->data, p->pc->nr_elem, &elem, 1) ) |
406 | | p->rc = -EFAULT; |
407 | | } |
408 | | |
409 | | if ( !p->rc ) |
410 | | p->pc->nr_elem++; |
411 | | } |
412 | | |
413 | | /* Dom0 control of lock profiling */ |
414 | | int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc) |
415 | | { |
416 | | int rc = 0; |
417 | | spinlock_profile_ucopy_t par; |
418 | | |
419 | | switch ( pc->cmd ) |
420 | | { |
421 | | case XEN_SYSCTL_LOCKPROF_reset: |
422 | | spinlock_profile_reset('\0'); |
423 | | break; |
424 | | case XEN_SYSCTL_LOCKPROF_query: |
425 | | pc->nr_elem = 0; |
426 | | par.rc = 0; |
427 | | par.pc = pc; |
428 | | spinlock_profile_iterate(spinlock_profile_ucopy_elem, &par); |
429 | | pc->time = NOW() - lock_profile_start; |
430 | | rc = par.rc; |
431 | | break; |
432 | | default: |
433 | | rc = -EINVAL; |
434 | | break; |
435 | | } |
436 | | |
437 | | return rc; |
438 | | } |
439 | | |
440 | | void _lock_profile_register_struct( |
441 | | int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name) |
442 | | { |
443 | | qhead->idx = idx; |
444 | | spin_lock(&lock_profile_lock); |
445 | | qhead->head_q = lock_profile_ancs[type].head_q; |
446 | | lock_profile_ancs[type].head_q = qhead; |
447 | | lock_profile_ancs[type].name = name; |
448 | | spin_unlock(&lock_profile_lock); |
449 | | } |
450 | | |
451 | | void _lock_profile_deregister_struct( |
452 | | int32_t type, struct lock_profile_qhead *qhead) |
453 | | { |
454 | | struct lock_profile_qhead **q; |
455 | | |
456 | | spin_lock(&lock_profile_lock); |
457 | | for ( q = &lock_profile_ancs[type].head_q; *q; q = &(*q)->head_q ) |
458 | | { |
459 | | if ( *q == qhead ) |
460 | | { |
461 | | *q = qhead->head_q; |
462 | | break; |
463 | | } |
464 | | } |
465 | | spin_unlock(&lock_profile_lock); |
466 | | } |
467 | | |
468 | | static int __init lock_prof_init(void) |
469 | | { |
470 | | struct lock_profile **q; |
471 | | |
472 | | for ( q = &__lock_profile_start; q < &__lock_profile_end; q++ ) |
473 | | { |
474 | | (*q)->next = lock_profile_glb_q.elem_q; |
475 | | lock_profile_glb_q.elem_q = *q; |
476 | | (*q)->lock->profile = *q; |
477 | | } |
478 | | |
479 | | _lock_profile_register_struct( |
480 | | LOCKPROF_TYPE_GLOBAL, &lock_profile_glb_q, |
481 | | 0, "Global lock"); |
482 | | |
483 | | return 0; |
484 | | } |
485 | | __initcall(lock_prof_init); |
486 | | |
487 | | #endif /* LOCK_PROFILE */ |