/root/src/xen/xen/common/trace.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * common/trace.c |
3 | | * |
4 | | * Xen Trace Buffer |
5 | | * |
6 | | * Copyright (C) 2004 by Intel Research Cambridge |
7 | | * |
8 | | * Authors: Mark Williamson, mark.a.williamson@intel.com |
9 | | * Rob Gardner, rob.gardner@hp.com |
10 | | * Date: October 2005 |
11 | | * |
12 | | * Copyright (C) 2005 Bin Ren |
13 | | * |
14 | | * The trace buffer code is designed to allow debugging traces of Xen to be |
15 | | * generated on UP / SMP machines. Each trace entry is timestamped so that |
16 | | * it's possible to reconstruct a chronological record of trace events. |
17 | | */ |
18 | | |
19 | | #include <asm/types.h> |
20 | | #include <asm/io.h> |
21 | | #include <xen/lib.h> |
22 | | #include <xen/sched.h> |
23 | | #include <xen/smp.h> |
24 | | #include <xen/trace.h> |
25 | | #include <xen/errno.h> |
26 | | #include <xen/event.h> |
27 | | #include <xen/tasklet.h> |
28 | | #include <xen/init.h> |
29 | | #include <xen/mm.h> |
30 | | #include <xen/percpu.h> |
31 | | #include <xen/pfn.h> |
32 | | #include <xen/cpu.h> |
33 | | #include <asm/atomic.h> |
34 | | #include <public/sysctl.h> |
35 | | |
36 | | #ifdef CONFIG_COMPAT |
37 | | #include <compat/trace.h> |
38 | | #define xen_t_buf t_buf |
39 | | CHECK_t_buf; |
40 | | #undef xen_t_buf |
41 | | #else |
42 | | #define compat_t_rec t_rec |
43 | | #endif |
44 | | |
45 | | /* opt_tbuf_size: trace buffer size (in pages) for each cpu */ |
46 | | static unsigned int opt_tbuf_size; |
47 | | static unsigned int opt_tevt_mask; |
48 | | integer_param("tbuf_size", opt_tbuf_size); |
49 | | integer_param("tevt_mask", opt_tevt_mask); |
50 | | |
51 | | /* Pointers to the meta-data objects for all system trace buffers */ |
52 | | static struct t_info *t_info; |
53 | | static unsigned int t_info_pages; |
54 | | |
55 | | static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); |
56 | | static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); |
57 | | static u32 data_size __read_mostly; |
58 | | |
59 | | /* High water mark for trace buffers; */ |
60 | | /* Send virtual interrupt when buffer level reaches this point */ |
61 | | static u32 t_buf_highwater; |
62 | | |
63 | | /* Number of records lost due to per-CPU trace buffer being full. */ |
64 | | static DEFINE_PER_CPU(unsigned long, lost_records); |
65 | | static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc); |
66 | | |
67 | | /* a flag recording whether initialization has been done */ |
68 | | /* or more properly, if the tbuf subsystem is enabled right now */ |
69 | | int tb_init_done __read_mostly; |
70 | | |
71 | | /* which CPUs tracing is enabled on */ |
72 | | static cpumask_t tb_cpu_mask; |
73 | | |
74 | | /* which tracing events are enabled */ |
75 | | static u32 tb_event_mask = TRC_ALL; |
76 | | |
77 | | /* Return the number of elements _type necessary to store at least _x bytes of data |
78 | | * i.e., sizeof(_type) * ans >= _x. */ |
79 | 0 | #define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type)) |
80 | | |
81 | | static int cpu_callback( |
82 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
83 | 0 | { |
84 | 0 | unsigned int cpu = (unsigned long)hcpu; |
85 | 0 |
|
86 | 0 | if ( action == CPU_UP_PREPARE ) |
87 | 0 | spin_lock_init(&per_cpu(t_lock, cpu)); |
88 | 0 |
|
89 | 0 | return NOTIFY_DONE; |
90 | 0 | } |
91 | | |
92 | | static struct notifier_block cpu_nfb = { |
93 | | .notifier_call = cpu_callback |
94 | | }; |
95 | | |
96 | | static uint32_t calc_tinfo_first_offset(void) |
97 | 0 | { |
98 | 0 | int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]); |
99 | 0 | return fit_to_type(uint32_t, offset_in_bytes); |
100 | 0 | } |
101 | | |
102 | | /** |
103 | | * calculate_tbuf_size - check to make sure that the proposed size will fit |
104 | | * in the currently sized struct t_info and allows prod and cons to |
105 | | * reach double the value without overflow. |
106 | | * The t_info layout is fixed and cant be changed without breaking xentrace. |
107 | | * Initialize t_info_pages based on number of trace pages. |
108 | | */ |
109 | | static int calculate_tbuf_size(unsigned int pages, uint16_t t_info_first_offset) |
110 | 0 | { |
111 | 0 | struct t_buf dummy_size; |
112 | 0 | typeof(dummy_size.prod) max_size; |
113 | 0 | struct t_info dummy_pages; |
114 | 0 | typeof(dummy_pages.tbuf_size) max_pages; |
115 | 0 | typeof(dummy_pages.mfn_offset[0]) max_mfn_offset; |
116 | 0 | unsigned int max_cpus = num_online_cpus(); |
117 | 0 | unsigned int t_info_words; |
118 | 0 |
|
119 | 0 | /* force maximum value for an unsigned type */ |
120 | 0 | max_size = -1; |
121 | 0 | max_pages = -1; |
122 | 0 | max_mfn_offset = -1; |
123 | 0 |
|
124 | 0 | /* max size holds up to n pages */ |
125 | 0 | max_size /= PAGE_SIZE; |
126 | 0 |
|
127 | 0 | if ( max_size < max_pages ) |
128 | 0 | max_pages = max_size; |
129 | 0 |
|
130 | 0 | /* |
131 | 0 | * max mfn_offset holds up to n pages per cpu |
132 | 0 | * The array of mfns for the highest cpu can start at the maximum value |
133 | 0 | * mfn_offset can hold. So reduce the number of cpus and also the mfn_offset. |
134 | 0 | */ |
135 | 0 | max_mfn_offset -= t_info_first_offset; |
136 | 0 | max_cpus--; |
137 | 0 | if ( max_cpus ) |
138 | 0 | max_mfn_offset /= max_cpus; |
139 | 0 | if ( max_mfn_offset < max_pages ) |
140 | 0 | max_pages = max_mfn_offset; |
141 | 0 |
|
142 | 0 | if ( pages > max_pages ) |
143 | 0 | { |
144 | 0 | printk(XENLOG_INFO "xentrace: requested number of %u pages " |
145 | 0 | "reduced to %u\n", |
146 | 0 | pages, max_pages); |
147 | 0 | pages = max_pages; |
148 | 0 | } |
149 | 0 |
|
150 | 0 | /* |
151 | 0 | * NB this calculation is correct, because t_info_first_offset is |
152 | 0 | * in words, not bytes, not bytes |
153 | 0 | */ |
154 | 0 | t_info_words = num_online_cpus() * pages + t_info_first_offset; |
155 | 0 | t_info_pages = PFN_UP(t_info_words * sizeof(uint32_t)); |
156 | 0 | printk(XENLOG_INFO "xentrace: requesting %u t_info pages " |
157 | 0 | "for %u trace pages on %u cpus\n", |
158 | 0 | t_info_pages, pages, num_online_cpus()); |
159 | 0 | return pages; |
160 | 0 | } |
161 | | |
162 | | /** |
163 | | * alloc_trace_bufs - performs initialization of the per-cpu trace buffers. |
164 | | * |
165 | | * This function is called at start of day in order to initialize the per-cpu |
166 | | * trace buffers. The trace buffers are then available for debugging use, via |
167 | | * the %TRACE_xD macros exported in <xen/trace.h>. |
168 | | * |
169 | | * This function may also be called later when enabling trace buffers |
170 | | * via the SET_SIZE hypercall. |
171 | | */ |
172 | | static int alloc_trace_bufs(unsigned int pages) |
173 | 0 | { |
174 | 0 | int i, cpu; |
175 | 0 | /* Start after a fixed-size array of NR_CPUS */ |
176 | 0 | uint32_t *t_info_mfn_list; |
177 | 0 | uint16_t t_info_first_offset; |
178 | 0 | uint16_t offset; |
179 | 0 |
|
180 | 0 | if ( t_info ) |
181 | 0 | return -EBUSY; |
182 | 0 |
|
183 | 0 | if ( pages == 0 ) |
184 | 0 | return -EINVAL; |
185 | 0 |
|
186 | 0 | /* Calculate offset in units of u32 of first mfn */ |
187 | 0 | t_info_first_offset = calc_tinfo_first_offset(); |
188 | 0 |
|
189 | 0 | pages = calculate_tbuf_size(pages, t_info_first_offset); |
190 | 0 |
|
191 | 0 | t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0); |
192 | 0 | if ( t_info == NULL ) |
193 | 0 | goto out_fail; |
194 | 0 |
|
195 | 0 | memset(t_info, 0, t_info_pages*PAGE_SIZE); |
196 | 0 |
|
197 | 0 | t_info_mfn_list = (uint32_t *)t_info; |
198 | 0 |
|
199 | 0 | t_info->tbuf_size = pages; |
200 | 0 |
|
201 | 0 | /* |
202 | 0 | * Allocate buffers for all of the cpus. |
203 | 0 | * If any fails, deallocate what you have so far and exit. |
204 | 0 | */ |
205 | 0 | for_each_online_cpu(cpu) |
206 | 0 | { |
207 | 0 | offset = t_info_first_offset + (cpu * pages); |
208 | 0 | t_info->mfn_offset[cpu] = offset; |
209 | 0 |
|
210 | 0 | for ( i = 0; i < pages; i++ ) |
211 | 0 | { |
212 | 0 | void *p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT)); |
213 | 0 | if ( !p ) |
214 | 0 | { |
215 | 0 | printk(XENLOG_INFO "xentrace: memory allocation failed " |
216 | 0 | "on cpu %d after %d pages\n", cpu, i); |
217 | 0 | t_info_mfn_list[offset + i] = 0; |
218 | 0 | goto out_dealloc; |
219 | 0 | } |
220 | 0 | t_info_mfn_list[offset + i] = virt_to_mfn(p); |
221 | 0 | } |
222 | 0 | } |
223 | 0 |
|
224 | 0 | /* |
225 | 0 | * Initialize buffers for all of the cpus. |
226 | 0 | */ |
227 | 0 | for_each_online_cpu(cpu) |
228 | 0 | { |
229 | 0 | struct t_buf *buf; |
230 | 0 | struct page_info *pg; |
231 | 0 |
|
232 | 0 | spin_lock_init(&per_cpu(t_lock, cpu)); |
233 | 0 |
|
234 | 0 | offset = t_info->mfn_offset[cpu]; |
235 | 0 |
|
236 | 0 | /* Initialize the buffer metadata */ |
237 | 0 | per_cpu(t_bufs, cpu) = buf = mfn_to_virt(t_info_mfn_list[offset]); |
238 | 0 | buf->cons = buf->prod = 0; |
239 | 0 |
|
240 | 0 | printk(XENLOG_INFO "xentrace: p%d mfn %x offset %u\n", |
241 | 0 | cpu, t_info_mfn_list[offset], offset); |
242 | 0 |
|
243 | 0 | /* Now share the trace pages */ |
244 | 0 | for ( i = 0; i < pages; i++ ) |
245 | 0 | { |
246 | 0 | pg = mfn_to_page(t_info_mfn_list[offset + i]); |
247 | 0 | share_xen_page_with_privileged_guests(pg, XENSHARE_writable); |
248 | 0 | } |
249 | 0 | } |
250 | 0 |
|
251 | 0 | /* Finally, share the t_info page */ |
252 | 0 | for(i = 0; i < t_info_pages; i++) |
253 | 0 | share_xen_page_with_privileged_guests( |
254 | 0 | virt_to_page(t_info) + i, XENSHARE_readonly); |
255 | 0 |
|
256 | 0 | data_size = (pages * PAGE_SIZE - sizeof(struct t_buf)); |
257 | 0 | t_buf_highwater = data_size >> 1; /* 50% high water */ |
258 | 0 | opt_tbuf_size = pages; |
259 | 0 |
|
260 | 0 | printk("xentrace: initialised\n"); |
261 | 0 | smp_wmb(); /* above must be visible before tb_init_done flag set */ |
262 | 0 | tb_init_done = 1; |
263 | 0 |
|
264 | 0 | return 0; |
265 | 0 |
|
266 | 0 | out_dealloc: |
267 | 0 | for_each_online_cpu(cpu) |
268 | 0 | { |
269 | 0 | offset = t_info->mfn_offset[cpu]; |
270 | 0 | if ( !offset ) |
271 | 0 | continue; |
272 | 0 | for ( i = 0; i < pages; i++ ) |
273 | 0 | { |
274 | 0 | uint32_t mfn = t_info_mfn_list[offset + i]; |
275 | 0 | if ( !mfn ) |
276 | 0 | break; |
277 | 0 | ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated)); |
278 | 0 | free_xenheap_pages(mfn_to_virt(mfn), 0); |
279 | 0 | } |
280 | 0 | } |
281 | 0 | free_xenheap_pages(t_info, get_order_from_pages(t_info_pages)); |
282 | 0 | t_info = NULL; |
283 | 0 | out_fail: |
284 | 0 | printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n"); |
285 | 0 | return -ENOMEM; |
286 | 0 | } |
287 | | |
288 | | |
289 | | /** |
290 | | * tb_set_size - handle the logic involved with dynamically allocating tbufs |
291 | | * |
292 | | * This function is called when the SET_SIZE hypercall is done. |
293 | | */ |
294 | | static int tb_set_size(unsigned int pages) |
295 | 0 | { |
296 | 0 | /* |
297 | 0 | * Setting size is a one-shot operation. It can be done either at |
298 | 0 | * boot time or via control tools, but not by both. Once buffers |
299 | 0 | * are created they cannot be destroyed. |
300 | 0 | */ |
301 | 0 | if ( opt_tbuf_size && pages != opt_tbuf_size ) |
302 | 0 | { |
303 | 0 | printk(XENLOG_INFO "xentrace: tb_set_size from %d to %d " |
304 | 0 | "not implemented\n", |
305 | 0 | opt_tbuf_size, pages); |
306 | 0 | return -EINVAL; |
307 | 0 | } |
308 | 0 |
|
309 | 0 | return alloc_trace_bufs(pages); |
310 | 0 | } |
311 | | |
312 | | int trace_will_trace_event(u32 event) |
313 | 0 | { |
314 | 0 | if ( !tb_init_done ) |
315 | 0 | return 0; |
316 | 0 |
|
317 | 0 | /* |
318 | 0 | * Copied from __trace_var() |
319 | 0 | */ |
320 | 0 | if ( (tb_event_mask & event) == 0 ) |
321 | 0 | return 0; |
322 | 0 |
|
323 | 0 | /* match class */ |
324 | 0 | if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 ) |
325 | 0 | return 0; |
326 | 0 |
|
327 | 0 | /* then match subclass */ |
328 | 0 | if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf ) |
329 | 0 | & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 ) |
330 | 0 | return 0; |
331 | 0 |
|
332 | 0 | if ( !cpumask_test_cpu(smp_processor_id(), &tb_cpu_mask) ) |
333 | 0 | return 0; |
334 | 0 |
|
335 | 0 | return 1; |
336 | 0 | } |
337 | | |
338 | | /** |
339 | | * init_trace_bufs - performs initialization of the per-cpu trace buffers. |
340 | | * |
341 | | * This function is called at start of day in order to initialize the per-cpu |
342 | | * trace buffers. The trace buffers are then available for debugging use, via |
343 | | * the %TRACE_xD macros exported in <xen/trace.h>. |
344 | | */ |
345 | | void __init init_trace_bufs(void) |
346 | 1 | { |
347 | 1 | cpumask_setall(&tb_cpu_mask); |
348 | 1 | register_cpu_notifier(&cpu_nfb); |
349 | 1 | |
350 | 1 | if ( opt_tbuf_size ) |
351 | 0 | { |
352 | 0 | if ( alloc_trace_bufs(opt_tbuf_size) ) |
353 | 0 | { |
354 | 0 | printk("xentrace: allocation size %d failed, disabling\n", |
355 | 0 | opt_tbuf_size); |
356 | 0 | opt_tbuf_size = 0; |
357 | 0 | } |
358 | 0 | else if ( opt_tevt_mask ) |
359 | 0 | { |
360 | 0 | printk("xentrace: Starting tracing, enabling mask %x\n", |
361 | 0 | opt_tevt_mask); |
362 | 0 | tb_event_mask = opt_tevt_mask; |
363 | 0 | tb_init_done=1; |
364 | 0 | } |
365 | 0 | } |
366 | 1 | } |
367 | | |
368 | | /** |
369 | | * tb_control - sysctl operations on trace buffers. |
370 | | * @tbc: a pointer to a struct xen_sysctl_tbuf_op to be filled out |
371 | | */ |
372 | | int tb_control(struct xen_sysctl_tbuf_op *tbc) |
373 | 0 | { |
374 | 0 | static DEFINE_SPINLOCK(lock); |
375 | 0 | int rc = 0; |
376 | 0 |
|
377 | 0 | spin_lock(&lock); |
378 | 0 |
|
379 | 0 | switch ( tbc->cmd ) |
380 | 0 | { |
381 | 0 | case XEN_SYSCTL_TBUFOP_get_info: |
382 | 0 | tbc->evt_mask = tb_event_mask; |
383 | 0 | tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0; |
384 | 0 | tbc->size = t_info_pages * PAGE_SIZE; |
385 | 0 | break; |
386 | 0 | case XEN_SYSCTL_TBUFOP_set_cpu_mask: |
387 | 0 | { |
388 | 0 | cpumask_var_t mask; |
389 | 0 |
|
390 | 0 | rc = xenctl_bitmap_to_cpumask(&mask, &tbc->cpu_mask); |
391 | 0 | if ( !rc ) |
392 | 0 | { |
393 | 0 | cpumask_copy(&tb_cpu_mask, mask); |
394 | 0 | free_cpumask_var(mask); |
395 | 0 | } |
396 | 0 | } |
397 | 0 | break; |
398 | 0 | case XEN_SYSCTL_TBUFOP_set_evt_mask: |
399 | 0 | tb_event_mask = tbc->evt_mask; |
400 | 0 | break; |
401 | 0 | case XEN_SYSCTL_TBUFOP_set_size: |
402 | 0 | rc = tb_set_size(tbc->size); |
403 | 0 | break; |
404 | 0 | case XEN_SYSCTL_TBUFOP_enable: |
405 | 0 | /* Enable trace buffers. Check buffers are already allocated. */ |
406 | 0 | if ( opt_tbuf_size == 0 ) |
407 | 0 | rc = -EINVAL; |
408 | 0 | else |
409 | 0 | tb_init_done = 1; |
410 | 0 | break; |
411 | 0 | case XEN_SYSCTL_TBUFOP_disable: |
412 | 0 | { |
413 | 0 | /* |
414 | 0 | * Disable trace buffers. Just stops new records from being written, |
415 | 0 | * does not deallocate any memory. |
416 | 0 | */ |
417 | 0 | int i; |
418 | 0 |
|
419 | 0 | tb_init_done = 0; |
420 | 0 | smp_wmb(); |
421 | 0 | /* Clear any lost-record info so we don't get phantom lost records next time we |
422 | 0 | * start tracing. Grab the lock to make sure we're not racing anyone. After this |
423 | 0 | * hypercall returns, no more records should be placed into the buffers. */ |
424 | 0 | for_each_online_cpu(i) |
425 | 0 | { |
426 | 0 | unsigned long flags; |
427 | 0 | spin_lock_irqsave(&per_cpu(t_lock, i), flags); |
428 | 0 | per_cpu(lost_records, i)=0; |
429 | 0 | spin_unlock_irqrestore(&per_cpu(t_lock, i), flags); |
430 | 0 | } |
431 | 0 | } |
432 | 0 | break; |
433 | 0 | default: |
434 | 0 | rc = -EINVAL; |
435 | 0 | break; |
436 | 0 | } |
437 | 0 |
|
438 | 0 | spin_unlock(&lock); |
439 | 0 |
|
440 | 0 | return rc; |
441 | 0 | } |
442 | | |
443 | | static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) |
444 | 0 | { |
445 | 0 | unsigned int rec_size = 4; |
446 | 0 |
|
447 | 0 | if ( cycles ) |
448 | 0 | rec_size += 8; |
449 | 0 | rec_size += extra; |
450 | 0 | return rec_size; |
451 | 0 | } |
452 | | |
453 | | static inline bool_t bogus(u32 prod, u32 cons) |
454 | 0 | { |
455 | 0 | if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || |
456 | 0 | unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) |
457 | 0 | { |
458 | 0 | tb_init_done = 0; |
459 | 0 | printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n", |
460 | 0 | smp_processor_id(), prod, cons); |
461 | 0 | return 1; |
462 | 0 | } |
463 | 0 | return 0; |
464 | 0 | } |
465 | | |
466 | | static inline u32 calc_unconsumed_bytes(const struct t_buf *buf) |
467 | 0 | { |
468 | 0 | u32 prod = buf->prod, cons = buf->cons; |
469 | 0 | s32 x; |
470 | 0 |
|
471 | 0 | barrier(); /* must read buf->prod and buf->cons only once */ |
472 | 0 | if ( bogus(prod, cons) ) |
473 | 0 | return data_size; |
474 | 0 |
|
475 | 0 | x = prod - cons; |
476 | 0 | if ( x < 0 ) |
477 | 0 | x += 2*data_size; |
478 | 0 |
|
479 | 0 | ASSERT(x >= 0); |
480 | 0 | ASSERT(x <= data_size); |
481 | 0 |
|
482 | 0 | return x; |
483 | 0 | } |
484 | | |
485 | | static inline u32 calc_bytes_to_wrap(const struct t_buf *buf) |
486 | 0 | { |
487 | 0 | u32 prod = buf->prod, cons = buf->cons; |
488 | 0 | s32 x; |
489 | 0 |
|
490 | 0 | barrier(); /* must read buf->prod and buf->cons only once */ |
491 | 0 | if ( bogus(prod, cons) ) |
492 | 0 | return 0; |
493 | 0 |
|
494 | 0 | x = data_size - prod; |
495 | 0 | if ( x <= 0 ) |
496 | 0 | x += data_size; |
497 | 0 |
|
498 | 0 | ASSERT(x > 0); |
499 | 0 | ASSERT(x <= data_size); |
500 | 0 |
|
501 | 0 | return x; |
502 | 0 | } |
503 | | |
504 | | static inline u32 calc_bytes_avail(const struct t_buf *buf) |
505 | 0 | { |
506 | 0 | return data_size - calc_unconsumed_bytes(buf); |
507 | 0 | } |
508 | | |
509 | | static unsigned char *next_record(const struct t_buf *buf, uint32_t *next, |
510 | | unsigned char **next_page, |
511 | | uint32_t *offset_in_page) |
512 | 0 | { |
513 | 0 | u32 x = buf->prod, cons = buf->cons; |
514 | 0 | uint16_t per_cpu_mfn_offset; |
515 | 0 | uint32_t per_cpu_mfn_nr; |
516 | 0 | uint32_t *mfn_list; |
517 | 0 | uint32_t mfn; |
518 | 0 | unsigned char *this_page; |
519 | 0 |
|
520 | 0 | barrier(); /* must read buf->prod and buf->cons only once */ |
521 | 0 | *next = x; |
522 | 0 | if ( !tb_init_done || bogus(x, cons) ) |
523 | 0 | return NULL; |
524 | 0 |
|
525 | 0 | if ( x >= data_size ) |
526 | 0 | x -= data_size; |
527 | 0 |
|
528 | 0 | ASSERT(x < data_size); |
529 | 0 |
|
530 | 0 | /* add leading header to get total offset of next record */ |
531 | 0 | x += sizeof(struct t_buf); |
532 | 0 | *offset_in_page = x & ~PAGE_MASK; |
533 | 0 |
|
534 | 0 | /* offset into array of mfns */ |
535 | 0 | per_cpu_mfn_nr = x >> PAGE_SHIFT; |
536 | 0 | per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()]; |
537 | 0 | mfn_list = (uint32_t *)t_info; |
538 | 0 | mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr]; |
539 | 0 | this_page = mfn_to_virt(mfn); |
540 | 0 | if (per_cpu_mfn_nr + 1 >= opt_tbuf_size) |
541 | 0 | { |
542 | 0 | /* reached end of buffer? */ |
543 | 0 | *next_page = NULL; |
544 | 0 | } |
545 | 0 | else |
546 | 0 | { |
547 | 0 | mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1]; |
548 | 0 | *next_page = mfn_to_virt(mfn); |
549 | 0 | } |
550 | 0 | return this_page; |
551 | 0 | } |
552 | | |
553 | | static inline void __insert_record(struct t_buf *buf, |
554 | | unsigned long event, |
555 | | unsigned int extra, |
556 | | bool_t cycles, |
557 | | unsigned int rec_size, |
558 | | const void *extra_data) |
559 | 0 | { |
560 | 0 | struct t_rec split_rec, *rec; |
561 | 0 | uint32_t *dst; |
562 | 0 | unsigned char *this_page, *next_page; |
563 | 0 | unsigned int extra_word = extra / sizeof(u32); |
564 | 0 | unsigned int local_rec_size = calc_rec_size(cycles, extra); |
565 | 0 | uint32_t next; |
566 | 0 | uint32_t offset; |
567 | 0 | uint32_t remaining; |
568 | 0 |
|
569 | 0 | BUG_ON(local_rec_size != rec_size); |
570 | 0 | BUG_ON(extra & 3); |
571 | 0 |
|
572 | 0 | this_page = next_record(buf, &next, &next_page, &offset); |
573 | 0 | if ( !this_page ) |
574 | 0 | return; |
575 | 0 |
|
576 | 0 | remaining = PAGE_SIZE - offset; |
577 | 0 |
|
578 | 0 | if ( unlikely(rec_size > remaining) ) |
579 | 0 | { |
580 | 0 | if ( next_page == NULL ) |
581 | 0 | { |
582 | 0 | /* access beyond end of buffer */ |
583 | 0 | printk(XENLOG_WARNING |
584 | 0 | "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n", |
585 | 0 | __func__, data_size, next, buf->cons, rec_size, remaining); |
586 | 0 | return; |
587 | 0 | } |
588 | 0 | rec = &split_rec; |
589 | 0 | } else { |
590 | 0 | rec = (struct t_rec*)(this_page + offset); |
591 | 0 | } |
592 | 0 |
|
593 | 0 | rec->event = event; |
594 | 0 | rec->extra_u32 = extra_word; |
595 | 0 | dst = rec->u.nocycles.extra_u32; |
596 | 0 | if ( (rec->cycles_included = cycles) != 0 ) |
597 | 0 | { |
598 | 0 | u64 tsc = (u64)get_cycles(); |
599 | 0 | rec->u.cycles.cycles_lo = (uint32_t)tsc; |
600 | 0 | rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32); |
601 | 0 | dst = rec->u.cycles.extra_u32; |
602 | 0 | } |
603 | 0 |
|
604 | 0 | if ( extra_data && extra ) |
605 | 0 | memcpy(dst, extra_data, extra); |
606 | 0 |
|
607 | 0 | if ( unlikely(rec_size > remaining) ) |
608 | 0 | { |
609 | 0 | memcpy(this_page + offset, rec, remaining); |
610 | 0 | memcpy(next_page, (char *)rec + remaining, rec_size - remaining); |
611 | 0 | } |
612 | 0 |
|
613 | 0 | smp_wmb(); |
614 | 0 |
|
615 | 0 | next += rec_size; |
616 | 0 | if ( next >= 2*data_size ) |
617 | 0 | next -= 2*data_size; |
618 | 0 | ASSERT(next < 2*data_size); |
619 | 0 | buf->prod = next; |
620 | 0 | } |
621 | | |
622 | | static inline void insert_wrap_record(struct t_buf *buf, |
623 | | unsigned int size) |
624 | 0 | { |
625 | 0 | u32 space_left = calc_bytes_to_wrap(buf); |
626 | 0 | unsigned int extra_space = space_left - sizeof(u32); |
627 | 0 | bool_t cycles = 0; |
628 | 0 |
|
629 | 0 | BUG_ON(space_left > size); |
630 | 0 |
|
631 | 0 | /* We may need to add cycles to take up enough space... */ |
632 | 0 | if ( (extra_space/sizeof(u32)) > TRACE_EXTRA_MAX ) |
633 | 0 | { |
634 | 0 | cycles = 1; |
635 | 0 | extra_space -= sizeof(u64); |
636 | 0 | ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); |
637 | 0 | } |
638 | 0 |
|
639 | 0 | __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, |
640 | 0 | space_left, NULL); |
641 | 0 | } |
642 | | |
643 | 0 | #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ |
644 | | |
645 | | static inline void insert_lost_records(struct t_buf *buf) |
646 | 0 | { |
647 | 0 | struct __packed { |
648 | 0 | u32 lost_records; |
649 | 0 | u16 did, vid; |
650 | 0 | u64 first_tsc; |
651 | 0 | } ed; |
652 | 0 |
|
653 | 0 | ed.vid = current->vcpu_id; |
654 | 0 | ed.did = current->domain->domain_id; |
655 | 0 | ed.lost_records = this_cpu(lost_records); |
656 | 0 | ed.first_tsc = this_cpu(lost_records_first_tsc); |
657 | 0 |
|
658 | 0 | this_cpu(lost_records) = 0; |
659 | 0 |
|
660 | 0 | __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, |
661 | 0 | LOST_REC_SIZE, &ed); |
662 | 0 | } |
663 | | |
664 | | /* |
665 | | * Notification is performed in qtasklet to avoid deadlocks with contexts |
666 | | * which __trace_var() may be called from (e.g., scheduler critical regions). |
667 | | */ |
668 | | static void trace_notify_dom0(unsigned long unused) |
669 | 0 | { |
670 | 0 | send_global_virq(VIRQ_TBUF); |
671 | 0 | } |
672 | | static DECLARE_SOFTIRQ_TASKLET(trace_notify_dom0_tasklet, |
673 | | trace_notify_dom0, 0); |
674 | | |
675 | | /** |
676 | | * __trace_var - Enters a trace tuple into the trace buffer for the current CPU. |
677 | | * @event: the event type being logged |
678 | | * @cycles: include tsc timestamp into trace record |
679 | | * @extra: size of additional trace data in bytes |
680 | | * @extra_data: pointer to additional trace data |
681 | | * |
682 | | * Logs a trace record into the appropriate buffer. |
683 | | */ |
684 | | void __trace_var(u32 event, bool_t cycles, unsigned int extra, |
685 | | const void *extra_data) |
686 | 0 | { |
687 | 0 | struct t_buf *buf; |
688 | 0 | unsigned long flags; |
689 | 0 | u32 bytes_to_tail, bytes_to_wrap; |
690 | 0 | unsigned int rec_size, total_size; |
691 | 0 | unsigned int extra_word; |
692 | 0 | bool_t started_below_highwater; |
693 | 0 |
|
694 | 0 | if( !tb_init_done ) |
695 | 0 | return; |
696 | 0 |
|
697 | 0 | /* Convert byte count into word count, rounding up */ |
698 | 0 | extra_word = (extra / sizeof(u32)); |
699 | 0 | if ( (extra % sizeof(u32)) != 0 ) |
700 | 0 | extra_word++; |
701 | 0 | |
702 | 0 | ASSERT(extra_word <= TRACE_EXTRA_MAX); |
703 | 0 | extra_word = min_t(int, extra_word, TRACE_EXTRA_MAX); |
704 | 0 |
|
705 | 0 | /* Round size up to nearest word */ |
706 | 0 | extra = extra_word * sizeof(u32); |
707 | 0 |
|
708 | 0 | if ( (tb_event_mask & event) == 0 ) |
709 | 0 | return; |
710 | 0 |
|
711 | 0 | /* match class */ |
712 | 0 | if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 ) |
713 | 0 | return; |
714 | 0 |
|
715 | 0 | /* then match subclass */ |
716 | 0 | if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf ) |
717 | 0 | & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 ) |
718 | 0 | return; |
719 | 0 |
|
720 | 0 | if ( !cpumask_test_cpu(smp_processor_id(), &tb_cpu_mask) ) |
721 | 0 | return; |
722 | 0 |
|
723 | 0 | /* Read tb_init_done /before/ t_bufs. */ |
724 | 0 | smp_rmb(); |
725 | 0 |
|
726 | 0 | spin_lock_irqsave(&this_cpu(t_lock), flags); |
727 | 0 |
|
728 | 0 | buf = this_cpu(t_bufs); |
729 | 0 |
|
730 | 0 | if ( unlikely(!buf) ) |
731 | 0 | { |
732 | 0 | /* Make gcc happy */ |
733 | 0 | started_below_highwater = 0; |
734 | 0 | goto unlock; |
735 | 0 | } |
736 | 0 |
|
737 | 0 | started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); |
738 | 0 |
|
739 | 0 | /* Calculate the record size */ |
740 | 0 | rec_size = calc_rec_size(cycles, extra); |
741 | 0 | |
742 | 0 | /* How many bytes are available in the buffer? */ |
743 | 0 | bytes_to_tail = calc_bytes_avail(buf); |
744 | 0 | |
745 | 0 | /* How many bytes until the next wrap-around? */ |
746 | 0 | bytes_to_wrap = calc_bytes_to_wrap(buf); |
747 | 0 | |
748 | 0 | /* |
749 | 0 | * Calculate expected total size to commit this record by |
750 | 0 | * doing a dry-run. |
751 | 0 | */ |
752 | 0 | total_size = 0; |
753 | 0 |
|
754 | 0 | /* First, check to see if we need to include a lost_record. |
755 | 0 | */ |
756 | 0 | if ( this_cpu(lost_records) ) |
757 | 0 | { |
758 | 0 | if ( LOST_REC_SIZE > bytes_to_wrap ) |
759 | 0 | { |
760 | 0 | total_size += bytes_to_wrap; |
761 | 0 | bytes_to_wrap = data_size; |
762 | 0 | } |
763 | 0 | total_size += LOST_REC_SIZE; |
764 | 0 | bytes_to_wrap -= LOST_REC_SIZE; |
765 | 0 |
|
766 | 0 | /* LOST_REC might line up perfectly with the buffer wrap */ |
767 | 0 | if ( bytes_to_wrap == 0 ) |
768 | 0 | bytes_to_wrap = data_size; |
769 | 0 | } |
770 | 0 |
|
771 | 0 | if ( rec_size > bytes_to_wrap ) |
772 | 0 | { |
773 | 0 | total_size += bytes_to_wrap; |
774 | 0 | } |
775 | 0 | total_size += rec_size; |
776 | 0 |
|
777 | 0 | /* Do we have enough space for everything? */ |
778 | 0 | if ( total_size > bytes_to_tail ) |
779 | 0 | { |
780 | 0 | if ( ++this_cpu(lost_records) == 1 ) |
781 | 0 | this_cpu(lost_records_first_tsc)=(u64)get_cycles(); |
782 | 0 | started_below_highwater = 0; |
783 | 0 | goto unlock; |
784 | 0 | } |
785 | 0 |
|
786 | 0 | /* |
787 | 0 | * Now, actually write information |
788 | 0 | */ |
789 | 0 | bytes_to_wrap = calc_bytes_to_wrap(buf); |
790 | 0 |
|
791 | 0 | if ( this_cpu(lost_records) ) |
792 | 0 | { |
793 | 0 | if ( LOST_REC_SIZE > bytes_to_wrap ) |
794 | 0 | { |
795 | 0 | insert_wrap_record(buf, LOST_REC_SIZE); |
796 | 0 | bytes_to_wrap = data_size; |
797 | 0 | } |
798 | 0 | insert_lost_records(buf); |
799 | 0 | bytes_to_wrap -= LOST_REC_SIZE; |
800 | 0 |
|
801 | 0 | /* LOST_REC might line up perfectly with the buffer wrap */ |
802 | 0 | if ( bytes_to_wrap == 0 ) |
803 | 0 | bytes_to_wrap = data_size; |
804 | 0 | } |
805 | 0 |
|
806 | 0 | if ( rec_size > bytes_to_wrap ) |
807 | 0 | insert_wrap_record(buf, rec_size); |
808 | 0 |
|
809 | 0 | /* Write the original record */ |
810 | 0 | __insert_record(buf, event, extra, cycles, rec_size, extra_data); |
811 | 0 |
|
812 | 0 | unlock: |
813 | 0 | spin_unlock_irqrestore(&this_cpu(t_lock), flags); |
814 | 0 |
|
815 | 0 | /* Notify trace buffer consumer that we've crossed the high water mark. */ |
816 | 0 | if ( likely(buf!=NULL) |
817 | 0 | && started_below_highwater |
818 | 0 | && (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) |
819 | 0 | tasklet_schedule(&trace_notify_dom0_tasklet); |
820 | 0 | } |
821 | | |
822 | | void __trace_hypercall(uint32_t event, unsigned long op, |
823 | | const xen_ulong_t *args) |
824 | 0 | { |
825 | 0 | struct __packed { |
826 | 0 | uint32_t op; |
827 | 0 | uint32_t args[6]; |
828 | 0 | } d; |
829 | 0 | uint32_t *a = d.args; |
830 | 0 |
|
831 | 0 | #define APPEND_ARG32(i) \ |
832 | 0 | do { \ |
833 | 0 | unsigned i_ = (i); \ |
834 | 0 | *a++ = args[(i_)]; \ |
835 | 0 | d.op |= TRC_PV_HYPERCALL_V2_ARG_32(i_); \ |
836 | 0 | } while( 0 ) |
837 | 0 |
|
838 | 0 | /* |
839 | 0 | * This shouldn't happen as @op should be small enough but just in |
840 | 0 | * case, warn if the argument bits in the trace record would |
841 | 0 | * clobber the hypercall op. |
842 | 0 | */ |
843 | 0 | WARN_ON(op & TRC_PV_HYPERCALL_V2_ARG_MASK); |
844 | 0 |
|
845 | 0 | d.op = op; |
846 | 0 |
|
847 | 0 | switch ( op ) |
848 | 0 | { |
849 | 0 | case __HYPERVISOR_mmu_update: |
850 | 0 | APPEND_ARG32(1); /* count */ |
851 | 0 | break; |
852 | 0 | case __HYPERVISOR_multicall: |
853 | 0 | APPEND_ARG32(1); /* count */ |
854 | 0 | break; |
855 | 0 | case __HYPERVISOR_grant_table_op: |
856 | 0 | APPEND_ARG32(0); /* cmd */ |
857 | 0 | APPEND_ARG32(2); /* count */ |
858 | 0 | break; |
859 | 0 | case __HYPERVISOR_vcpu_op: |
860 | 0 | APPEND_ARG32(0); /* cmd */ |
861 | 0 | APPEND_ARG32(1); /* vcpuid */ |
862 | 0 | break; |
863 | 0 | case __HYPERVISOR_mmuext_op: |
864 | 0 | APPEND_ARG32(1); /* count */ |
865 | 0 | break; |
866 | 0 | case __HYPERVISOR_sched_op: |
867 | 0 | APPEND_ARG32(0); /* cmd */ |
868 | 0 | break; |
869 | 0 | } |
870 | 0 |
|
871 | 0 | __trace_var(event, 1, sizeof(uint32_t) * (1 + (a - d.args)), &d); |
872 | 0 | } |
873 | | |
874 | | /* |
875 | | * Local variables: |
876 | | * mode: C |
877 | | * c-file-style: "BSD" |
878 | | * c-basic-offset: 4 |
879 | | * tab-width: 4 |
880 | | * indent-tabs-mode: nil |
881 | | * End: |
882 | | */ |