debuggers.hg

view xen/common/trace.c @ 20925:2a07df55c08a

xentrace: Clear lost records when disabling tracing

This patch clears the "lost records" flag on each cpu when tracing is
disabled.

Without this patch, the next time tracing starts, cpus with lost
records will generate lost record traces, even though buffers are
empty and no tracing has recently happened.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 03 09:35:56 2010 +0000 (2010-02-03)
parents fad80160c001
children 94cae4dfa25b
line source
1 /******************************************************************************
2 * common/trace.c
3 *
4 * Xen Trace Buffer
5 *
6 * Copyright (C) 2004 by Intel Research Cambridge
7 *
8 * Authors: Mark Williamson, mark.a.williamson@intel.com
9 * Rob Gardner, rob.gardner@hp.com
10 * Date: October 2005
11 *
12 * Copyright (C) 2005 Bin Ren
13 *
14 * The trace buffer code is designed to allow debugging traces of Xen to be
15 * generated on UP / SMP machines. Each trace entry is timestamped so that
16 * it's possible to reconstruct a chronological record of trace events.
17 */
19 #include <xen/config.h>
20 #include <asm/types.h>
21 #include <asm/io.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <xen/smp.h>
25 #include <xen/trace.h>
26 #include <xen/errno.h>
27 #include <xen/event.h>
28 #include <xen/softirq.h>
29 #include <xen/init.h>
30 #include <xen/mm.h>
31 #include <xen/percpu.h>
32 #include <asm/atomic.h>
33 #include <public/sysctl.h>
35 #ifdef CONFIG_COMPAT
36 #include <compat/trace.h>
37 #define xen_t_buf t_buf
38 CHECK_t_buf;
39 #undef xen_t_buf
40 #else
41 #define compat_t_rec t_rec
42 #endif
44 /* opt_tbuf_size: trace buffer size (in pages) */
45 static unsigned int opt_tbuf_size = 0;
46 integer_param("tbuf_size", opt_tbuf_size);
48 /* Pointers to the meta-data objects for all system trace buffers */
49 static struct t_info *t_info;
50 #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
51 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
52 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
53 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
54 static int data_size;
56 /* High water mark for trace buffers; */
57 /* Send virtual interrupt when buffer level reaches this point */
58 static int t_buf_highwater;
60 /* Number of records lost due to per-CPU trace buffer being full. */
61 static DEFINE_PER_CPU(unsigned long, lost_records);
62 static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
64 /* a flag recording whether initialization has been done */
65 /* or more properly, if the tbuf subsystem is enabled right now */
66 int tb_init_done __read_mostly;
68 /* which CPUs tracing is enabled on */
69 static cpumask_t tb_cpu_mask = CPU_MASK_ALL;
71 /* which tracing events are enabled */
72 static u32 tb_event_mask = TRC_ALL;
74 /**
75 * alloc_trace_bufs - performs initialization of the per-cpu trace buffers.
76 *
77 * This function is called at start of day in order to initialize the per-cpu
78 * trace buffers. The trace buffers are then available for debugging use, via
79 * the %TRACE_xD macros exported in <xen/trace.h>.
80 *
81 * This function may also be called later when enabling trace buffers
82 * via the SET_SIZE hypercall.
83 */
84 static int alloc_trace_bufs(void)
85 {
86 int i, cpu, order;
87 unsigned long nr_pages;
88 /* Start after a fixed-size array of NR_CPUS */
89 uint32_t *t_info_mfn_list = (uint32_t *)t_info;
90 int offset = (NR_CPUS * 2 + 1 + 1) / 4;
92 if ( opt_tbuf_size == 0 )
93 return -EINVAL;
95 if ( !t_info )
96 {
97 printk("%s: t_info not allocated, cannot allocate trace buffers!\n",
98 __func__);
99 return -EINVAL;
100 }
102 t_info->tbuf_size = opt_tbuf_size;
103 printk("tbuf_size %d\n", t_info->tbuf_size);
105 nr_pages = opt_tbuf_size;
106 order = get_order_from_pages(nr_pages);
108 /*
109 * First, allocate buffers for all of the cpus. If any
110 * fails, deallocate what you have so far and exit.
111 */
112 for_each_online_cpu(cpu)
113 {
114 int flags;
115 char *rawbuf;
116 struct t_buf *buf;
118 if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
119 {
120 printk("Xen trace buffers: memory allocation failed\n");
121 opt_tbuf_size = 0;
122 goto out_dealloc;
123 }
125 spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
127 buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
128 buf->cons = buf->prod = 0;
129 per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
131 spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
133 }
135 /*
136 * Now share the pages to xentrace can map them, and write them in
137 * the global t_info structure.
138 */
139 for_each_online_cpu(cpu)
140 {
141 /* Share pages so that xentrace can map them. */
142 char *rawbuf;
144 if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
145 {
146 struct page_info *p = virt_to_page(rawbuf);
147 uint32_t mfn = virt_to_mfn(rawbuf);
149 for ( i = 0; i < nr_pages; i++ )
150 {
151 share_xen_page_with_privileged_guests(
152 p + i, XENSHARE_writable);
154 t_info_mfn_list[offset + i]=mfn + i;
155 }
156 /* Write list first, then write per-cpu offset. */
157 wmb();
158 t_info->mfn_offset[cpu]=offset;
159 printk("p%d mfn %"PRIx32" offset %d\n",
160 cpu, mfn, offset);
161 offset+=i;
162 }
163 }
165 data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
166 t_buf_highwater = data_size >> 1; /* 50% high water */
168 return 0;
169 out_dealloc:
170 for_each_online_cpu(cpu)
171 {
172 int flags;
173 char * rawbuf;
175 spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
176 if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
177 {
178 ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
179 free_xenheap_pages(rawbuf, order);
180 }
181 spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
182 }
183 return -EINVAL;
184 }
187 /**
188 * tb_set_size - handle the logic involved with dynamically
189 * allocating and deallocating tbufs
190 *
191 * This function is called when the SET_SIZE hypercall is done.
192 */
193 static int tb_set_size(int size)
194 {
195 /*
196 * Setting size is a one-shot operation. It can be done either at
197 * boot time or via control tools, but not by both. Once buffers
198 * are created they cannot be destroyed.
199 */
200 if ( (opt_tbuf_size != 0) || (size <= 0) )
201 {
202 gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
203 opt_tbuf_size, size);
204 return -EINVAL;
205 }
207 opt_tbuf_size = size;
208 if ( alloc_trace_bufs() != 0 )
209 return -EINVAL;
211 printk("Xen trace buffers: initialized\n");
212 return 0;
213 }
215 int trace_will_trace_event(u32 event)
216 {
217 if ( !tb_init_done )
218 return 0;
220 /*
221 * Copied from __trace_var()
222 */
223 if ( (tb_event_mask & event) == 0 )
224 return 0;
226 /* match class */
227 if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
228 return 0;
230 /* then match subclass */
231 if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
232 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
233 return 0;
235 if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
236 return 0;
238 return 1;
239 }
241 /**
242 * init_trace_bufs - performs initialization of the per-cpu trace buffers.
243 *
244 * This function is called at start of day in order to initialize the per-cpu
245 * trace buffers. The trace buffers are then available for debugging use, via
246 * the %TRACE_xD macros exported in <xen/trace.h>.
247 */
248 void __init init_trace_bufs(void)
249 {
250 int i;
251 /* t_info size fixed at 2 pages for now. That should be big enough / small enough
252 * until it's worth making it dynamic. */
253 t_info = alloc_xenheap_pages(1, 0);
255 if ( t_info == NULL )
256 {
257 printk("Xen trace buffers: t_info allocation failed! Tracing disabled.\n");
258 return;
259 }
261 for(i = 0; i < NR_CPUS; i++)
262 spin_lock_init(&per_cpu(t_lock, i));
264 for(i=0; i<T_INFO_PAGES; i++)
265 share_xen_page_with_privileged_guests(
266 virt_to_page(t_info) + i, XENSHARE_writable);
270 if ( opt_tbuf_size == 0 )
271 {
272 printk("Xen trace buffers: disabled\n");
273 return;
274 }
276 if ( alloc_trace_bufs() == 0 )
277 {
278 printk("Xen trace buffers: initialised\n");
279 wmb(); /* above must be visible before tb_init_done flag set */
280 tb_init_done = 1;
281 }
282 }
284 /**
285 * tb_control - sysctl operations on trace buffers.
286 * @tbc: a pointer to a xen_sysctl_tbuf_op_t to be filled out
287 */
288 int tb_control(xen_sysctl_tbuf_op_t *tbc)
289 {
290 static DEFINE_SPINLOCK(lock);
291 int rc = 0;
293 spin_lock(&lock);
295 switch ( tbc->cmd )
296 {
297 case XEN_SYSCTL_TBUFOP_get_info:
298 tbc->evt_mask = tb_event_mask;
299 tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
300 tbc->size = T_INFO_PAGES;
301 break;
302 case XEN_SYSCTL_TBUFOP_set_cpu_mask:
303 xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
304 break;
305 case XEN_SYSCTL_TBUFOP_set_evt_mask:
306 tb_event_mask = tbc->evt_mask;
307 break;
308 case XEN_SYSCTL_TBUFOP_set_size:
309 rc = tb_set_size(tbc->size);
310 break;
311 case XEN_SYSCTL_TBUFOP_enable:
312 /* Enable trace buffers. Check buffers are already allocated. */
313 if ( opt_tbuf_size == 0 )
314 rc = -EINVAL;
315 else
316 tb_init_done = 1;
317 break;
318 case XEN_SYSCTL_TBUFOP_disable:
319 {
320 /*
321 * Disable trace buffers. Just stops new records from being written,
322 * does not deallocate any memory.
323 */
324 int i;
326 tb_init_done = 0;
327 wmb();
328 /* Clear any lost-record info so we don't get phantom lost records next time we
329 * start tracing. Grab the lock to make sure we're not racing anyone. After this
330 * hypercall returns, no more records should be placed into the buffers. */
331 for_each_online_cpu(i)
332 {
333 int flags;
334 spin_lock_irqsave(&per_cpu(t_lock, i), flags);
335 per_cpu(lost_records, i)=0;
336 spin_unlock_irqrestore(&per_cpu(t_lock, i), flags);
337 }
338 }
339 break;
340 default:
341 rc = -EINVAL;
342 break;
343 }
345 spin_unlock(&lock);
347 return rc;
348 }
350 static inline int calc_rec_size(int cycles, int extra)
351 {
352 int rec_size;
353 rec_size = 4;
354 if ( cycles )
355 rec_size += 8;
356 rec_size += extra;
357 return rec_size;
358 }
360 static inline int calc_unconsumed_bytes(struct t_buf *buf)
361 {
362 int x = buf->prod - buf->cons;
363 if ( x < 0 )
364 x += 2*data_size;
366 ASSERT(x >= 0);
367 ASSERT(x <= data_size);
369 return x;
370 }
372 static inline int calc_bytes_to_wrap(struct t_buf *buf)
373 {
374 int x = data_size - buf->prod;
375 if ( x <= 0 )
376 x += data_size;
378 ASSERT(x > 0);
379 ASSERT(x <= data_size);
381 return x;
382 }
384 static inline int calc_bytes_avail(struct t_buf *buf)
385 {
386 return data_size - calc_unconsumed_bytes(buf);
387 }
389 static inline struct t_rec *
390 next_record(struct t_buf *buf)
391 {
392 int x = buf->prod;
393 if ( x >= data_size )
394 x -= data_size;
396 ASSERT(x >= 0);
397 ASSERT(x < data_size);
399 return (struct t_rec *)&this_cpu(t_data)[x];
400 }
402 static inline int __insert_record(struct t_buf *buf,
403 unsigned long event,
404 int extra,
405 int cycles,
406 int rec_size,
407 unsigned char *extra_data)
408 {
409 struct t_rec *rec;
410 unsigned char *dst;
411 unsigned long extra_word = extra/sizeof(u32);
412 int local_rec_size = calc_rec_size(cycles, extra);
413 uint32_t next;
415 BUG_ON(local_rec_size != rec_size);
416 BUG_ON(extra & 3);
418 /* Double-check once more that we have enough space.
419 * Don't bugcheck here, in case the userland tool is doing
420 * something stupid. */
421 if ( calc_bytes_avail(buf) < rec_size )
422 {
423 printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
424 __func__,
425 calc_bytes_avail(buf),
426 data_size, buf->prod, buf->cons, data_size, rec_size);
427 return 0;
428 }
429 rmb();
431 rec = next_record(buf);
432 rec->event = event;
433 rec->extra_u32 = extra_word;
434 dst = (unsigned char *)rec->u.nocycles.extra_u32;
435 if ( (rec->cycles_included = cycles) != 0 )
436 {
437 u64 tsc = (u64)get_cycles();
438 rec->u.cycles.cycles_lo = (uint32_t)tsc;
439 rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
440 dst = (unsigned char *)rec->u.cycles.extra_u32;
441 }
443 if ( extra_data && extra )
444 memcpy(dst, extra_data, extra);
446 wmb();
448 next = buf->prod + rec_size;
449 if ( next >= 2*data_size )
450 next -= 2*data_size;
451 ASSERT(next >= 0);
452 ASSERT(next < 2*data_size);
453 buf->prod = next;
455 return rec_size;
456 }
458 static inline int insert_wrap_record(struct t_buf *buf, int size)
459 {
460 int space_left = calc_bytes_to_wrap(buf);
461 unsigned long extra_space = space_left - sizeof(u32);
462 int cycles = 0;
464 BUG_ON(space_left > size);
466 /* We may need to add cycles to take up enough space... */
467 if ( (extra_space/sizeof(u32)) > TRACE_EXTRA_MAX )
468 {
469 cycles = 1;
470 extra_space -= sizeof(u64);
471 ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
472 }
474 return __insert_record(buf,
475 TRC_TRACE_WRAP_BUFFER,
476 extra_space,
477 cycles,
478 space_left,
479 NULL);
480 }
482 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
484 static inline int insert_lost_records(struct t_buf *buf)
485 {
486 struct {
487 u32 lost_records;
488 u32 did:16, vid:16;
489 u64 first_tsc;
490 } __attribute__((packed)) ed;
492 ed.vid = current->vcpu_id;
493 ed.did = current->domain->domain_id;
494 ed.lost_records = this_cpu(lost_records);
495 ed.first_tsc = this_cpu(lost_records_first_tsc);
497 this_cpu(lost_records) = 0;
499 return __insert_record(buf,
500 TRC_LOST_RECORDS,
501 sizeof(ed),
502 1 /* cycles */,
503 LOST_REC_SIZE,
504 (unsigned char *)&ed);
505 }
507 /*
508 * Notification is performed in qtasklet to avoid deadlocks with contexts
509 * which __trace_var() may be called from (e.g., scheduler critical regions).
510 */
511 static void trace_notify_dom0(unsigned long unused)
512 {
513 send_guest_global_virq(dom0, VIRQ_TBUF);
514 }
515 static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0);
517 /**
518 * trace - Enters a trace tuple into the trace buffer for the current CPU.
519 * @event: the event type being logged
520 * @d1...d5: the data items for the event being logged
521 *
522 * Logs a trace record into the appropriate buffer. Returns nonzero on
523 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet
524 * initialised.
525 */
526 void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
527 {
528 struct t_buf *buf;
529 unsigned long flags, bytes_to_tail, bytes_to_wrap;
530 int rec_size, total_size;
531 int extra_word;
532 int started_below_highwater = 0;
534 if( !tb_init_done )
535 return;
537 /* Convert byte count into word count, rounding up */
538 extra_word = (extra / sizeof(u32));
539 if ( (extra % sizeof(u32)) != 0 )
540 extra_word++;
542 ASSERT(extra_word <= TRACE_EXTRA_MAX);
543 extra_word = min_t(int, extra_word, TRACE_EXTRA_MAX);
545 /* Round size up to nearest word */
546 extra = extra_word * sizeof(u32);
548 if ( (tb_event_mask & event) == 0 )
549 return;
551 /* match class */
552 if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
553 return;
555 /* then match subclass */
556 if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
557 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
558 return;
560 if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
561 return;
563 /* Read tb_init_done /before/ t_bufs. */
564 rmb();
566 spin_lock_irqsave(&this_cpu(t_lock), flags);
568 buf = this_cpu(t_bufs);
570 if ( unlikely(!buf) )
571 goto unlock;
573 started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
575 /* Calculate the record size */
576 rec_size = calc_rec_size(cycles, extra);
578 /* How many bytes are available in the buffer? */
579 bytes_to_tail = calc_bytes_avail(buf);
581 /* How many bytes until the next wrap-around? */
582 bytes_to_wrap = calc_bytes_to_wrap(buf);
584 /*
585 * Calculate expected total size to commit this record by
586 * doing a dry-run.
587 */
588 total_size = 0;
590 /* First, check to see if we need to include a lost_record.
591 */
592 if ( this_cpu(lost_records) )
593 {
594 if ( LOST_REC_SIZE > bytes_to_wrap )
595 {
596 total_size += bytes_to_wrap;
597 bytes_to_wrap = data_size;
598 }
599 total_size += LOST_REC_SIZE;
600 bytes_to_wrap -= LOST_REC_SIZE;
602 /* LOST_REC might line up perfectly with the buffer wrap */
603 if ( bytes_to_wrap == 0 )
604 bytes_to_wrap = data_size;
605 }
607 if ( rec_size > bytes_to_wrap )
608 {
609 total_size += bytes_to_wrap;
610 }
611 total_size += rec_size;
613 /* Do we have enough space for everything? */
614 if ( total_size > bytes_to_tail )
615 {
616 if ( ++this_cpu(lost_records) == 1 )
617 this_cpu(lost_records_first_tsc)=(u64)get_cycles();
618 started_below_highwater = 0;
619 goto unlock;
620 }
622 /*
623 * Now, actually write information
624 */
625 bytes_to_wrap = calc_bytes_to_wrap(buf);
627 if ( this_cpu(lost_records) )
628 {
629 if ( LOST_REC_SIZE > bytes_to_wrap )
630 {
631 insert_wrap_record(buf, LOST_REC_SIZE);
632 bytes_to_wrap = data_size;
633 }
634 insert_lost_records(buf);
635 bytes_to_wrap -= LOST_REC_SIZE;
637 /* LOST_REC might line up perfectly with the buffer wrap */
638 if ( bytes_to_wrap == 0 )
639 bytes_to_wrap = data_size;
640 }
642 if ( rec_size > bytes_to_wrap )
643 insert_wrap_record(buf, rec_size);
645 /* Write the original record */
646 __insert_record(buf, event, extra, cycles, rec_size, extra_data);
648 unlock:
649 spin_unlock_irqrestore(&this_cpu(t_lock), flags);
651 /* Notify trace buffer consumer that we've crossed the high water mark. */
652 if ( started_below_highwater &&
653 (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
654 tasklet_schedule(&trace_notify_dom0_tasklet);
655 }
657 /*
658 * Local variables:
659 * mode: C
660 * c-set-style: "BSD"
661 * c-basic-offset: 4
662 * tab-width: 4
663 * indent-tabs-mode: nil
664 * End:
665 */