debuggers.hg

view xen/common/trace.c @ 19964:3952eaeb70b0

Introduce and use a per-CPU read-mostly sub-section

Since mixing data that only gets setup once and then (perhaps
frequently) gets read by remote CPUs with data that the local CPU may
modify (again, perhaps frequently) still causes undesirable cache
protocol related bus traffic, separate the former class of objects
from the latter.

These objects converted here are just picked based on their write-once
(or write-very-rarely) properties; perhaps some more adjustments may
be desirable subsequently. The primary users of the new sub-section
will result from the next patch.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 11:32:41 2009 +0100 (2009-07-13)
parents 71af89e70fee
children 809b20f066fb fad80160c001
line source
1 /******************************************************************************
2 * common/trace.c
3 *
4 * Xen Trace Buffer
5 *
6 * Copyright (C) 2004 by Intel Research Cambridge
7 *
8 * Authors: Mark Williamson, mark.a.williamson@intel.com
9 * Rob Gardner, rob.gardner@hp.com
10 * Date: October 2005
11 *
12 * Copyright (C) 2005 Bin Ren
13 *
14 * The trace buffer code is designed to allow debugging traces of Xen to be
15 * generated on UP / SMP machines. Each trace entry is timestamped so that
16 * it's possible to reconstruct a chronological record of trace events.
17 */
19 #include <xen/config.h>
20 #include <asm/types.h>
21 #include <asm/io.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <xen/smp.h>
25 #include <xen/trace.h>
26 #include <xen/errno.h>
27 #include <xen/event.h>
28 #include <xen/softirq.h>
29 #include <xen/init.h>
30 #include <xen/mm.h>
31 #include <xen/percpu.h>
32 #include <asm/atomic.h>
33 #include <public/sysctl.h>
35 #ifdef CONFIG_COMPAT
36 #include <compat/trace.h>
37 #define xen_t_buf t_buf
38 CHECK_t_buf;
39 #undef xen_t_buf
40 #else
41 #define compat_t_rec t_rec
42 #endif
44 /* opt_tbuf_size: trace buffer size (in pages) */
45 static unsigned int opt_tbuf_size = 0;
46 integer_param("tbuf_size", opt_tbuf_size);
48 /* Pointers to the meta-data objects for all system trace buffers */
49 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
50 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
51 static int data_size;
53 /* High water mark for trace buffers; */
54 /* Send virtual interrupt when buffer level reaches this point */
55 static int t_buf_highwater;
57 /* Number of records lost due to per-CPU trace buffer being full. */
58 static DEFINE_PER_CPU(unsigned long, lost_records);
59 static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
61 /* a flag recording whether initialization has been done */
62 /* or more properly, if the tbuf subsystem is enabled right now */
63 int tb_init_done __read_mostly;
65 /* which CPUs tracing is enabled on */
66 static cpumask_t tb_cpu_mask = CPU_MASK_ALL;
68 /* which tracing events are enabled */
69 static u32 tb_event_mask = TRC_ALL;
71 /**
72 * alloc_trace_bufs - performs initialization of the per-cpu trace buffers.
73 *
74 * This function is called at start of day in order to initialize the per-cpu
75 * trace buffers. The trace buffers are then available for debugging use, via
76 * the %TRACE_xD macros exported in <xen/trace.h>.
77 *
78 * This function may also be called later when enabling trace buffers
79 * via the SET_SIZE hypercall.
80 */
81 static int alloc_trace_bufs(void)
82 {
83 int i, order;
84 unsigned long nr_pages;
85 char *rawbuf;
86 struct t_buf *buf;
88 if ( opt_tbuf_size == 0 )
89 return -EINVAL;
91 nr_pages = num_online_cpus() * opt_tbuf_size;
92 order = get_order_from_pages(nr_pages);
93 data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
95 if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
96 {
97 printk("Xen trace buffers: memory allocation failed\n");
98 opt_tbuf_size = 0;
99 return -EINVAL;
100 }
102 /* Share pages so that xentrace can map them. */
103 for ( i = 0; i < nr_pages; i++ )
104 share_xen_page_with_privileged_guests(
105 virt_to_page(rawbuf) + i, XENSHARE_writable);
107 for_each_online_cpu ( i )
108 {
109 buf = per_cpu(t_bufs, i) = (struct t_buf *)
110 &rawbuf[i*opt_tbuf_size*PAGE_SIZE];
111 buf->cons = buf->prod = 0;
112 per_cpu(t_data, i) = (unsigned char *)(buf + 1);
113 }
115 t_buf_highwater = data_size >> 1; /* 50% high water */
117 return 0;
118 }
121 /**
122 * tb_set_size - handle the logic involved with dynamically
123 * allocating and deallocating tbufs
124 *
125 * This function is called when the SET_SIZE hypercall is done.
126 */
127 static int tb_set_size(int size)
128 {
129 /*
130 * Setting size is a one-shot operation. It can be done either at
131 * boot time or via control tools, but not by both. Once buffers
132 * are created they cannot be destroyed.
133 */
134 if ( (opt_tbuf_size != 0) || (size <= 0) )
135 {
136 gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
137 opt_tbuf_size, size);
138 return -EINVAL;
139 }
141 opt_tbuf_size = size;
142 if ( alloc_trace_bufs() != 0 )
143 return -EINVAL;
145 printk("Xen trace buffers: initialized\n");
146 return 0;
147 }
149 int trace_will_trace_event(u32 event)
150 {
151 if ( !tb_init_done )
152 return 0;
154 /*
155 * Copied from __trace_var()
156 */
157 if ( (tb_event_mask & event) == 0 )
158 return 0;
160 /* match class */
161 if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
162 return 0;
164 /* then match subclass */
165 if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
166 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
167 return 0;
169 if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
170 return 0;
172 return 1;
173 }
175 /**
176 * init_trace_bufs - performs initialization of the per-cpu trace buffers.
177 *
178 * This function is called at start of day in order to initialize the per-cpu
179 * trace buffers. The trace buffers are then available for debugging use, via
180 * the %TRACE_xD macros exported in <xen/trace.h>.
181 */
182 void __init init_trace_bufs(void)
183 {
184 if ( opt_tbuf_size == 0 )
185 {
186 printk("Xen trace buffers: disabled\n");
187 return;
188 }
190 if ( alloc_trace_bufs() == 0 )
191 {
192 printk("Xen trace buffers: initialised\n");
193 wmb(); /* above must be visible before tb_init_done flag set */
194 tb_init_done = 1;
195 }
196 }
198 /**
199 * tb_control - sysctl operations on trace buffers.
200 * @tbc: a pointer to a xen_sysctl_tbuf_op_t to be filled out
201 */
202 int tb_control(xen_sysctl_tbuf_op_t *tbc)
203 {
204 static DEFINE_SPINLOCK(lock);
205 int rc = 0;
207 spin_lock(&lock);
209 switch ( tbc->cmd )
210 {
211 case XEN_SYSCTL_TBUFOP_get_info:
212 tbc->evt_mask = tb_event_mask;
213 tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(per_cpu(t_bufs, 0)) : 0;
214 tbc->size = opt_tbuf_size * PAGE_SIZE;
215 break;
216 case XEN_SYSCTL_TBUFOP_set_cpu_mask:
217 xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
218 break;
219 case XEN_SYSCTL_TBUFOP_set_evt_mask:
220 tb_event_mask = tbc->evt_mask;
221 break;
222 case XEN_SYSCTL_TBUFOP_set_size:
223 rc = !tb_init_done ? tb_set_size(tbc->size) : -EINVAL;
224 break;
225 case XEN_SYSCTL_TBUFOP_enable:
226 /* Enable trace buffers. Check buffers are already allocated. */
227 if ( opt_tbuf_size == 0 )
228 rc = -EINVAL;
229 else
230 tb_init_done = 1;
231 break;
232 case XEN_SYSCTL_TBUFOP_disable:
233 /*
234 * Disable trace buffers. Just stops new records from being written,
235 * does not deallocate any memory.
236 */
237 tb_init_done = 0;
238 break;
239 default:
240 rc = -EINVAL;
241 break;
242 }
244 spin_unlock(&lock);
246 return rc;
247 }
249 static inline int calc_rec_size(int cycles, int extra)
250 {
251 int rec_size;
252 rec_size = 4;
253 if ( cycles )
254 rec_size += 8;
255 rec_size += extra;
256 return rec_size;
257 }
259 static inline int calc_unconsumed_bytes(struct t_buf *buf)
260 {
261 int x = buf->prod - buf->cons;
262 if ( x < 0 )
263 x += 2*data_size;
265 ASSERT(x >= 0);
266 ASSERT(x <= data_size);
268 return x;
269 }
271 static inline int calc_bytes_to_wrap(struct t_buf *buf)
272 {
273 int x = data_size - buf->prod;
274 if ( x <= 0 )
275 x += data_size;
277 ASSERT(x > 0);
278 ASSERT(x <= data_size);
280 return x;
281 }
283 static inline int calc_bytes_avail(struct t_buf *buf)
284 {
285 return data_size - calc_unconsumed_bytes(buf);
286 }
288 static inline struct t_rec *
289 next_record(struct t_buf *buf)
290 {
291 int x = buf->prod;
292 if ( x >= data_size )
293 x -= data_size;
295 ASSERT(x >= 0);
296 ASSERT(x < data_size);
298 return (struct t_rec *)&this_cpu(t_data)[x];
299 }
301 static inline int __insert_record(struct t_buf *buf,
302 unsigned long event,
303 int extra,
304 int cycles,
305 int rec_size,
306 unsigned char *extra_data)
307 {
308 struct t_rec *rec;
309 unsigned char *dst;
310 unsigned long extra_word = extra/sizeof(u32);
311 int local_rec_size = calc_rec_size(cycles, extra);
312 uint32_t next;
314 BUG_ON(local_rec_size != rec_size);
315 BUG_ON(extra & 3);
317 /* Double-check once more that we have enough space.
318 * Don't bugcheck here, in case the userland tool is doing
319 * something stupid. */
320 if ( calc_bytes_avail(buf) < rec_size )
321 {
322 printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
323 __func__,
324 calc_bytes_avail(buf),
325 data_size, buf->prod, buf->cons, data_size, rec_size);
326 return 0;
327 }
328 rmb();
330 rec = next_record(buf);
331 rec->event = event;
332 rec->extra_u32 = extra_word;
333 dst = (unsigned char *)rec->u.nocycles.extra_u32;
334 if ( (rec->cycles_included = cycles) != 0 )
335 {
336 u64 tsc = (u64)get_cycles();
337 rec->u.cycles.cycles_lo = (uint32_t)tsc;
338 rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
339 dst = (unsigned char *)rec->u.cycles.extra_u32;
340 }
342 if ( extra_data && extra )
343 memcpy(dst, extra_data, extra);
345 wmb();
347 next = buf->prod + rec_size;
348 if ( next >= 2*data_size )
349 next -= 2*data_size;
350 ASSERT(next >= 0);
351 ASSERT(next < 2*data_size);
352 buf->prod = next;
354 return rec_size;
355 }
357 static inline int insert_wrap_record(struct t_buf *buf, int size)
358 {
359 int space_left = calc_bytes_to_wrap(buf);
360 unsigned long extra_space = space_left - sizeof(u32);
361 int cycles = 0;
363 BUG_ON(space_left > size);
365 /* We may need to add cycles to take up enough space... */
366 if ( (extra_space/sizeof(u32)) > TRACE_EXTRA_MAX )
367 {
368 cycles = 1;
369 extra_space -= sizeof(u64);
370 ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
371 }
373 return __insert_record(buf,
374 TRC_TRACE_WRAP_BUFFER,
375 extra_space,
376 cycles,
377 space_left,
378 NULL);
379 }
381 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
383 static inline int insert_lost_records(struct t_buf *buf)
384 {
385 struct {
386 u32 lost_records;
387 u32 did:16, vid:16;
388 u64 first_tsc;
389 } __attribute__((packed)) ed;
391 ed.vid = current->vcpu_id;
392 ed.did = current->domain->domain_id;
393 ed.lost_records = this_cpu(lost_records);
394 ed.first_tsc = this_cpu(lost_records_first_tsc);
396 this_cpu(lost_records) = 0;
398 return __insert_record(buf,
399 TRC_LOST_RECORDS,
400 sizeof(ed),
401 1 /* cycles */,
402 LOST_REC_SIZE,
403 (unsigned char *)&ed);
404 }
406 /*
407 * Notification is performed in qtasklet to avoid deadlocks with contexts
408 * which __trace_var() may be called from (e.g., scheduler critical regions).
409 */
410 static void trace_notify_dom0(unsigned long unused)
411 {
412 send_guest_global_virq(dom0, VIRQ_TBUF);
413 }
414 static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0);
416 /**
417 * trace - Enters a trace tuple into the trace buffer for the current CPU.
418 * @event: the event type being logged
419 * @d1...d5: the data items for the event being logged
420 *
421 * Logs a trace record into the appropriate buffer. Returns nonzero on
422 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet
423 * initialised.
424 */
425 void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
426 {
427 struct t_buf *buf;
428 unsigned long flags, bytes_to_tail, bytes_to_wrap;
429 int rec_size, total_size;
430 int extra_word;
431 int started_below_highwater;
433 if( !tb_init_done )
434 return;
436 /* Convert byte count into word count, rounding up */
437 extra_word = (extra / sizeof(u32));
438 if ( (extra % sizeof(u32)) != 0 )
439 extra_word++;
441 ASSERT(extra_word <= TRACE_EXTRA_MAX);
442 extra_word = min_t(int, extra_word, TRACE_EXTRA_MAX);
444 /* Round size up to nearest word */
445 extra = extra_word * sizeof(u32);
447 if ( (tb_event_mask & event) == 0 )
448 return;
450 /* match class */
451 if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
452 return;
454 /* then match subclass */
455 if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
456 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
457 return;
459 if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
460 return;
462 /* Read tb_init_done /before/ t_bufs. */
463 rmb();
465 buf = this_cpu(t_bufs);
467 local_irq_save(flags);
469 started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
471 /* Calculate the record size */
472 rec_size = calc_rec_size(cycles, extra);
474 /* How many bytes are available in the buffer? */
475 bytes_to_tail = calc_bytes_avail(buf);
477 /* How many bytes until the next wrap-around? */
478 bytes_to_wrap = calc_bytes_to_wrap(buf);
480 /*
481 * Calculate expected total size to commit this record by
482 * doing a dry-run.
483 */
484 total_size = 0;
486 /* First, check to see if we need to include a lost_record.
487 */
488 if ( this_cpu(lost_records) )
489 {
490 if ( LOST_REC_SIZE > bytes_to_wrap )
491 {
492 total_size += bytes_to_wrap;
493 bytes_to_wrap = data_size;
494 }
495 total_size += LOST_REC_SIZE;
496 bytes_to_wrap -= LOST_REC_SIZE;
498 /* LOST_REC might line up perfectly with the buffer wrap */
499 if ( bytes_to_wrap == 0 )
500 bytes_to_wrap = data_size;
501 }
503 if ( rec_size > bytes_to_wrap )
504 {
505 total_size += bytes_to_wrap;
506 }
507 total_size += rec_size;
509 /* Do we have enough space for everything? */
510 if ( total_size > bytes_to_tail )
511 {
512 if ( ++this_cpu(lost_records) == 1 )
513 this_cpu(lost_records_first_tsc)=(u64)get_cycles();
514 local_irq_restore(flags);
515 return;
516 }
518 /*
519 * Now, actually write information
520 */
521 bytes_to_wrap = calc_bytes_to_wrap(buf);
523 if ( this_cpu(lost_records) )
524 {
525 if ( LOST_REC_SIZE > bytes_to_wrap )
526 {
527 insert_wrap_record(buf, LOST_REC_SIZE);
528 bytes_to_wrap = data_size;
529 }
530 insert_lost_records(buf);
531 bytes_to_wrap -= LOST_REC_SIZE;
533 /* LOST_REC might line up perfectly with the buffer wrap */
534 if ( bytes_to_wrap == 0 )
535 bytes_to_wrap = data_size;
536 }
538 if ( rec_size > bytes_to_wrap )
539 insert_wrap_record(buf, rec_size);
541 /* Write the original record */
542 __insert_record(buf, event, extra, cycles, rec_size, extra_data);
544 local_irq_restore(flags);
546 /* Notify trace buffer consumer that we've crossed the high water mark. */
547 if ( started_below_highwater &&
548 (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
549 tasklet_schedule(&trace_notify_dom0_tasklet);
550 }
552 /*
553 * Local variables:
554 * mode: C
555 * c-set-style: "BSD"
556 * c-basic-offset: 4
557 * tab-width: 4
558 * indent-tabs-mode: nil
559 * End:
560 */