debuggers.hg

view tools/xenmon/xenbaked.c @ 22906:700ac6445812

Now add KDB to the non-kdb tree
author Mukesh Rathor
date Thu Feb 03 15:42:41 2011 -0800 (2011-02-03)
parents 60782cefa154
children
line source
1 /******************************************************************************
2 * tools/xenbaked.c
3 *
4 * Tool for collecting raw trace buffer data from Xen and
5 * performing some accumulation operations and other processing
6 * on it.
7 *
8 * Copyright (C) 2004 by Intel Research Cambridge
9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
10 * Copyright (C) 2006 by Hewlett Packard Fort Collins
11 *
12 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
13 * Rob Gardner, rob.gardner@hp.com
14 * Lucy Cherkasova, lucy.cherkasova.hp.com
15 * Much code based on xentrace, authored by Mark Williamson,
16 * mark.a.williamson@intel.com
17 * Date: November, 2005
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; under version 2 of the License.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 */
33 #include <time.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <sys/mman.h>
37 #include <fcntl.h>
38 #include <unistd.h>
39 #include <errno.h>
40 #include <signal.h>
41 #include <xenctrl.h>
42 #include <xen/xen.h>
43 #include <string.h>
44 #include <sys/select.h>
45 #include <getopt.h>
47 #define PERROR(_m, _a...) \
48 do { \
49 int __saved_errno = errno; \
50 fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
51 __saved_errno, strerror(__saved_errno)); \
52 errno = __saved_errno; \
53 } while (0)
55 typedef struct { int counter; } atomic_t;
56 #define _atomic_read(v) ((v).counter)
58 #include <xen/trace.h>
59 #include "xenbaked.h"
62 /***** Compile time configuration of defaults ********************************/
64 /* when we've got more records than this waiting, we log it to the output */
65 #define NEW_DATA_THRESH 1
67 /* sleep for this long (milliseconds) between checking the trace buffers */
68 #define POLL_SLEEP_MILLIS 100
70 /* Size of time period represented by each sample */
71 #define MS_PER_SAMPLE 100
73 /* CPU Frequency */
74 #define MHZ
75 #define CPU_FREQ 2660 MHZ
77 /***** The code **************************************************************/
79 typedef struct settings_st {
80 struct timespec poll_sleep;
81 unsigned long new_data_thresh;
82 unsigned long ms_per_sample;
83 double cpu_freq;
84 } settings_t;
86 struct t_struct {
87 const struct t_info *t_info; /* Structure with information about individual buffers */
88 struct t_buf **meta; /* Pointers to trace buffer metadata */
89 unsigned char **data; /* Pointers to trace buffer data areas */
90 };
92 settings_t opts;
94 int interrupted = 0; /* gets set if we get a SIGHUP */
95 int rec_count = 0;
96 int wakeups = 0;
97 time_t start_time;
98 int dom0_flips = 0;
100 _new_qos_data *new_qos;
101 _new_qos_data **cpu_qos_data;
103 int global_cpu;
104 uint64_t global_now;
106 // array of currently running domains, indexed by cpu
107 int *running = NULL;
109 // number of cpu's on this platform
110 int NCPU = 0;
113 static void advance_next_datapoint(uint64_t);
114 static void alloc_qos_data(int ncpu);
115 static int process_record(int, struct t_rec *);
116 static void qos_kill_thread(int domid);
119 static void init_current(int ncpu)
120 {
121 running = calloc(ncpu, sizeof(int));
122 NCPU = ncpu;
123 printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
124 }
126 static int is_current(int domain, int cpu)
127 {
128 // int i;
130 // for (i=0; i<NCPU; i++)
131 if (running[cpu] == domain)
132 return 1;
133 return 0;
134 }
137 #if 0 /* unused */
138 // return the domain that's currently running on the given cpu
139 static int current(int cpu)
140 {
141 return running[cpu];
142 }
143 #endif
145 static void set_current(int cpu, int domain)
146 {
147 running[cpu] = domain;
148 }
152 static void close_handler(int signal)
153 {
154 interrupted = 1;
155 }
157 #if 0
158 void dump_record(int cpu, struct t_rec *x)
159 {
160 printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
161 cpu, x->cycles, x->event, x->data[0]);
162 }
163 #endif
165 /**
166 * millis_to_timespec - convert a time in milliseconds to a struct timespec
167 * @millis: time interval in milliseconds
168 */
169 static struct timespec millis_to_timespec(unsigned long millis)
170 {
171 struct timespec spec;
173 spec.tv_sec = millis / 1000;
174 spec.tv_nsec = (millis % 1000) * 1000;
176 return spec;
177 }
180 typedef struct
181 {
182 int event_count;
183 int event_id;
184 char *text;
185 } stat_map_t;
187 stat_map_t stat_map[] = {
188 { 0, 0, "Other" },
189 { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
190 { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
191 { 0, TRC_SCHED_SLEEP, "Sleep" },
192 { 0, TRC_SCHED_WAKE, "Wake" },
193 { 0, TRC_SCHED_BLOCK, "Block" },
194 { 0, TRC_SCHED_SWITCH, "Switch" },
195 { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
196 { 0, TRC_SCHED_SWITCH_INFPREV, "Switch Prev" },
197 { 0, TRC_SCHED_SWITCH_INFNEXT, "Switch Next" },
198 { 0, TRC_MEM_PAGE_GRANT_MAP, "Page Map" },
199 { 0, TRC_MEM_PAGE_GRANT_UNMAP, "Page Unmap" },
200 { 0, TRC_MEM_PAGE_GRANT_TRANSFER, "Page Transfer" },
201 { 0, 0, 0 }
202 };
205 static void check_gotten_sum(void)
206 {
207 #if 0
208 uint64_t sum, ns;
209 extern uint64_t total_ns_gotten(uint64_t*);
210 double percent;
211 int i;
213 for (i=0; i<NCPU; i++) {
214 new_qos = cpu_qos_data[i];
215 ns = billion;
216 sum = total_ns_gotten(&ns);
218 printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
219 i, sum, ns);
220 percent = (double) sum;
221 percent = (100.0*percent) / (double)ns;
222 printf(" ==> ns_gotten = %7.3f%%\n", percent);
223 }
224 #endif
225 }
229 static void dump_stats(void)
230 {
231 stat_map_t *smt = stat_map;
232 time_t end_time, run_time;
234 time(&end_time);
236 run_time = end_time - start_time;
238 printf("Event counts:\n");
239 while (smt->text != NULL) {
240 printf("%08d\t%s\n", smt->event_count, smt->text);
241 smt++;
242 }
244 printf("processed %d total records in %d seconds (%ld per second)\n",
245 rec_count, (int)run_time, (long)(rec_count/run_time));
247 printf("woke up %d times in %d seconds (%ld per second)\n", wakeups,
248 (int) run_time, (long)(wakeups/run_time));
250 check_gotten_sum();
251 }
253 static void log_event(int event_id)
254 {
255 stat_map_t *smt = stat_map;
257 // printf("event_id = 0x%x\n", event_id);
259 while (smt->text != NULL) {
260 if (smt->event_id == event_id) {
261 smt->event_count++;
262 return;
263 }
264 smt++;
265 }
266 if (smt->text == NULL)
267 stat_map[0].event_count++; // other
268 }
270 int virq_port;
271 xc_evtchn *xce_handle = NULL;
273 /* Returns the event channel handle. */
274 /* Stolen from xenstore code */
275 static int eventchn_init(void)
276 {
277 int rc;
279 // to revert to old way:
280 if (0)
281 return -1;
283 xce_handle = xc_evtchn_open(NULL, 0);
285 if (xce_handle == NULL)
286 perror("Failed to open evtchn device");
288 if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
289 perror("Failed to bind to domain exception virq port");
290 virq_port = rc;
292 return xce_handle == NULL ? -1 : 0;
293 }
295 static void wait_for_event(void)
296 {
297 int ret;
298 fd_set inset;
299 evtchn_port_t port;
300 struct timeval tv;
301 int evtchn_fd;
303 if (xce_handle == NULL) {
304 nanosleep(&opts.poll_sleep, NULL);
305 return;
306 }
308 evtchn_fd = xc_evtchn_fd(xce_handle);
310 FD_ZERO(&inset);
311 FD_SET(evtchn_fd, &inset);
312 tv.tv_sec = 1;
313 tv.tv_usec = 0;
314 // tv = millis_to_timespec(&opts.poll_sleep);
315 ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
317 if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
318 if ((port = xc_evtchn_pending(xce_handle)) == -1)
319 perror("Failed to read from event fd");
321 // if (port == virq_port)
322 // printf("got the event I was looking for\r\n");
324 if (xc_evtchn_unmask(xce_handle, port) == -1)
325 perror("Failed to write to event fd");
326 }
327 }
329 static void get_tbufs(unsigned long *mfn, unsigned long *size)
330 {
331 xc_interface *xc_handle = xc_interface_open(0,0,0);
332 int ret;
334 if ( !xc_handle )
335 {
336 exit(EXIT_FAILURE);
337 }
339 ret = xc_tbuf_enable(xc_handle, DEFAULT_TBUF_SIZE, mfn, size);
341 if ( ret != 0 )
342 {
343 perror("Couldn't enable trace buffers");
344 exit(1);
345 }
347 xc_interface_close(xc_handle);
348 }
350 static void disable_tracing(void)
351 {
352 xc_interface *xc_handle = xc_interface_open(0,0,0);
353 xc_tbuf_disable(xc_handle);
354 xc_interface_close(xc_handle);
355 }
357 /**
358 * map_tbufs - memory map Xen trace buffers into user space
359 * @tbufs_mfn: mfn of the trace buffers
360 * @num: number of trace buffers to map
361 * @size: size of each trace buffer
362 *
363 * Maps the Xen trace buffers them into process address space.
364 */
365 static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
366 unsigned long tinfo_size)
367 {
368 xc_interface *xc_handle;
369 static struct t_struct tbufs = { 0 };
370 int i;
372 xc_handle = xc_interface_open(0,0,0);
373 if ( !xc_handle )
374 {
375 exit(EXIT_FAILURE);
376 }
378 /* Map t_info metadata structure */
379 tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
380 PROT_READ, tbufs_mfn);
382 if ( tbufs.t_info == 0 )
383 {
384 PERROR("Failed to mmap trace buffers");
385 exit(EXIT_FAILURE);
386 }
388 if ( tbufs.t_info->tbuf_size == 0 )
389 {
390 fprintf(stderr, "%s: tbuf_size 0!\n", __func__);
391 exit(EXIT_FAILURE);
392 }
394 /* Map per-cpu buffers */
395 tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
396 tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *));
397 if ( tbufs.meta == NULL || tbufs.data == NULL )
398 {
399 PERROR( "Failed to allocate memory for buffer pointers\n");
400 exit(EXIT_FAILURE);
401 }
403 for(i=0; i<num; i++)
404 {
406 const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
407 + tbufs.t_info->mfn_offset[i];
408 int j;
409 xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
411 for ( j=0; j<tbufs.t_info->tbuf_size; j++)
412 pfn_list[j] = (xen_pfn_t)mfn_list[j];
414 tbufs.meta[i] = xc_map_foreign_batch(xc_handle, DOMID_XEN,
415 PROT_READ | PROT_WRITE,
416 pfn_list,
417 tbufs.t_info->tbuf_size);
418 if ( tbufs.meta[i] == NULL )
419 {
420 PERROR("Failed to map cpu buffer!");
421 exit(EXIT_FAILURE);
422 }
423 tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1);
424 }
426 xc_interface_close(xc_handle);
428 return &tbufs;
429 }
431 /**
432 * get_num_cpus - get the number of logical CPUs
433 */
434 static unsigned int get_num_cpus(void)
435 {
436 xc_physinfo_t physinfo = { 0 };
437 xc_interface *xc_handle = xc_interface_open(0,0,0);
438 int ret;
440 ret = xc_physinfo(xc_handle, &physinfo);
442 if ( ret != 0 )
443 {
444 PERROR("Failure to get logical CPU count from Xen");
445 exit(EXIT_FAILURE);
446 }
448 xc_interface_close(xc_handle);
449 opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
451 return physinfo.nr_cpus;
452 }
454 /**
455 * monitor_tbufs - monitor the contents of tbufs
456 */
457 static int monitor_tbufs(void)
458 {
459 int i;
461 struct t_struct *tbufs; /* Pointer to hypervisor maps */
462 struct t_buf **meta; /* pointers to the trace buffer metadata */
463 unsigned char **data; /* pointers to the trace buffer data areas
464 * where they are mapped into user space. */
465 unsigned long tbufs_mfn; /* mfn of the tbufs */
466 unsigned int num; /* number of trace buffers / logical CPUS */
467 unsigned long tinfo_size; /* size of t_info metadata map */
468 unsigned long size; /* size of a single trace buffer */
470 unsigned long data_size, rec_size;
472 /* get number of logical CPUs (and therefore number of trace buffers) */
473 num = get_num_cpus();
475 init_current(num);
476 alloc_qos_data(num);
478 printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
480 /* setup access to trace buffers */
481 get_tbufs(&tbufs_mfn, &tinfo_size);
482 tbufs = map_tbufs(tbufs_mfn, num, tinfo_size);
484 size = tbufs->t_info->tbuf_size * XC_PAGE_SIZE;
486 data_size = size - sizeof(struct t_buf);
488 meta = tbufs->meta;
489 data = tbufs->data;
491 if ( eventchn_init() < 0 )
492 fprintf(stderr, "Failed to initialize event channel; "
493 "Using POLL method\r\n");
495 /* now, scan buffers for events */
496 while ( !interrupted )
497 {
498 for ( i = 0; (i < num) && !interrupted; i++ )
499 {
500 unsigned long start_offset, end_offset, cons, prod;
502 cons = meta[i]->cons;
503 prod = meta[i]->prod;
504 xen_rmb(); /* read prod, then read item. */
506 if ( cons == prod )
507 continue;
509 start_offset = cons % data_size;
510 end_offset = prod % data_size;
512 if ( start_offset >= end_offset )
513 {
514 while ( start_offset != data_size )
515 {
516 rec_size = process_record(
517 i, (struct t_rec *)(data[i] + start_offset));
518 start_offset += rec_size;
519 }
520 start_offset = 0;
521 }
522 while ( start_offset != end_offset )
523 {
524 rec_size = process_record(
525 i, (struct t_rec *)(data[i] + start_offset));
526 start_offset += rec_size;
527 }
528 xen_mb(); /* read item, then update cons. */
529 meta[i]->cons = prod;
530 }
532 wait_for_event();
533 wakeups++;
534 }
536 /* cleanup */
537 free(meta);
538 free(data);
539 /* don't need to munmap - cleanup is automatic */
541 return 0;
542 }
545 /******************************************************************************
546 * Command line handling
547 *****************************************************************************/
549 const char *program_version = "xenbaked v1.4";
550 const char *program_bug_address = "<rob.gardner@hp.com>";
552 #define xstr(x) str(x)
553 #define str(x) #x
555 static void usage(void)
556 {
557 #define USAGE_STR \
558 "Usage: xenbaked [OPTION...]\n" \
559 "Tool to capture and partially process Xen trace buffer data\n" \
560 "\n" \
561 " -m, --ms_per_sample=MS Specify the number of milliseconds per sample\n" \
562 " (default " xstr(MS_PER_SAMPLE) ").\n" \
563 " -s, --poll-sleep=p Set sleep time, p, in milliseconds between\n" \
564 " polling the trace buffer for new data\n" \
565 " (default " xstr(POLL_SLEEP_MILLIS) ").\n" \
566 " -t, --log-thresh=l Set number, l, of new records required to\n" \
567 " trigger a write to output (default " \
568 xstr(NEW_DATA_THRESH) ").\n" \
569 " -?, --help Show this message\n" \
570 " -V, --version Print program version\n" \
571 "\n" \
572 "This tool is used to capture trace buffer data from Xen. The data is\n" \
573 "saved in a shared memory structure to be further processed by xenmon.\n"
575 printf(USAGE_STR);
576 printf("\nReport bugs to %s\n", program_bug_address);
578 exit(EXIT_FAILURE);
579 }
581 /* convert the argument string pointed to by arg to a long int representation */
582 static long argtol(const char *restrict arg, int base)
583 {
584 char *endp;
585 long val;
587 errno = 0;
588 val = strtol(arg, &endp, base);
590 if (errno != 0) {
591 fprintf(stderr, "Invalid option argument: %s\n", arg);
592 fprintf(stderr, "Error: %s\n\n", strerror(errno));
593 usage();
594 } else if (endp == arg || *endp != '\0') {
595 fprintf(stderr, "Invalid option argument: %s\n\n", arg);
596 usage();
597 }
599 return val;
600 }
602 /* parse command line arguments */
603 static void parse_args(int argc, char **argv)
604 {
605 int option;
606 static struct option long_options[] = {
607 { "log-thresh", required_argument, 0, 't' },
608 { "poll-sleep", required_argument, 0, 's' },
609 { "ms_per_sample", required_argument, 0, 'm' },
610 { "help", no_argument, 0, '?' },
611 { "version", no_argument, 0, 'V' },
612 { 0, 0, 0, 0 }
613 };
615 while ( (option = getopt_long(argc, argv, "m:s:t:?V",
616 long_options, NULL)) != -1)
617 {
618 switch ( option )
619 {
620 case 't': /* set new records threshold for logging */
621 opts.new_data_thresh = argtol(optarg, 0);
622 break;
624 case 's': /* set sleep time (given in milliseconds) */
625 opts.poll_sleep = millis_to_timespec(argtol(optarg, 0));
626 break;
628 case 'm': /* set ms_per_sample */
629 opts.ms_per_sample = argtol(optarg, 0);
630 break;
632 case 'V': /* print program version */
633 printf("%s\n", program_version);
634 exit(EXIT_SUCCESS);
635 break;
637 default:
638 usage();
639 }
640 }
642 /* all arguments should have been processed */
643 if (optind != argc) {
644 usage();
645 }
646 }
648 #define SHARED_MEM_FILE "/var/run/xenq-shm"
649 static void alloc_qos_data(int ncpu)
650 {
651 int i, n, pgsize, off=0;
652 char *dummy;
653 int qos_fd;
655 cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
658 qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
659 if (qos_fd < 0) {
660 PERROR(SHARED_MEM_FILE);
661 exit(2);
662 }
663 pgsize = getpagesize();
664 dummy = malloc(pgsize);
666 for (n=0; n<ncpu; n++) {
668 for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
669 if ((write(qos_fd, dummy, pgsize)) != pgsize) {
670 PERROR(SHARED_MEM_FILE);
671 exit(2);
672 }
674 new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE,
675 MAP_SHARED, qos_fd, off);
676 off += i;
677 if (new_qos == NULL) {
678 PERROR("mmap");
679 exit(3);
680 }
681 // printf("new_qos = %p\n", new_qos);
682 memset(new_qos, 0, sizeof(_new_qos_data));
683 new_qos->next_datapoint = 0;
684 advance_next_datapoint(0);
685 new_qos->structlen = i;
686 new_qos->ncpu = ncpu;
687 // printf("structlen = 0x%x\n", i);
688 cpu_qos_data[n] = new_qos;
689 }
690 free(dummy);
691 new_qos = NULL;
692 }
695 int main(int argc, char **argv)
696 {
697 int ret;
698 struct sigaction act;
700 time(&start_time);
701 opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
702 opts.new_data_thresh = NEW_DATA_THRESH;
703 opts.ms_per_sample = MS_PER_SAMPLE;
704 opts.cpu_freq = CPU_FREQ;
706 parse_args(argc, argv);
707 fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
710 /* ensure that if we get a signal, we'll do cleanup, then exit */
711 act.sa_handler = close_handler;
712 act.sa_flags = 0;
713 sigemptyset(&act.sa_mask);
714 sigaction(SIGHUP, &act, NULL);
715 sigaction(SIGTERM, &act, NULL);
716 sigaction(SIGINT, &act, NULL);
718 ret = monitor_tbufs();
720 dump_stats();
721 msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
722 disable_tracing();
724 return ret;
725 }
727 static void qos_init_domain(int domid, int idx)
728 {
729 int i;
731 memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
732 new_qos->domain_info[idx].last_update_time = global_now;
733 // runnable_start_time[idx] = 0;
734 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
735 new_qos->domain_info[idx].in_use = 1;
736 new_qos->domain_info[idx].blocked_start_time = 0;
737 new_qos->domain_info[idx].id = domid;
738 if (domid == IDLE_DOMAIN_ID)
739 snprintf(new_qos->domain_info[idx].name,
740 sizeof(new_qos->domain_info[idx].name),
741 "Idle Task%d", global_cpu);
742 else
743 snprintf(new_qos->domain_info[idx].name,
744 sizeof(new_qos->domain_info[idx].name),
745 "Domain#%d", domid);
747 for (i=0; i<NSAMPLES; i++) {
748 new_qos->qdata[i].ns_gotten[idx] = 0;
749 new_qos->qdata[i].ns_allocated[idx] = 0;
750 new_qos->qdata[i].ns_waiting[idx] = 0;
751 new_qos->qdata[i].ns_blocked[idx] = 0;
752 new_qos->qdata[i].switchin_count[idx] = 0;
753 new_qos->qdata[i].io_count[idx] = 0;
754 }
755 }
757 static void global_init_domain(int domid, int idx)
758 {
759 int cpu;
760 _new_qos_data *saved_qos;
762 saved_qos = new_qos;
764 for (cpu=0; cpu<NCPU; cpu++) {
765 new_qos = cpu_qos_data[cpu];
766 qos_init_domain(domid, idx);
767 }
768 new_qos = saved_qos;
769 }
771 // give index of this domain in the qos data array
772 static int indexof(int domid)
773 {
774 int idx;
775 xc_dominfo_t dominfo[NDOMAINS];
776 xc_interface *xc_handle;
777 int ndomains;
779 if (domid < 0) { // shouldn't happen
780 printf("bad domain id: %d\r\n", domid);
781 return 0;
782 }
784 for (idx=0; idx<NDOMAINS; idx++)
785 if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
786 return idx;
788 // not found, make a new entry
789 for (idx=0; idx<NDOMAINS; idx++)
790 if (new_qos->domain_info[idx].in_use == 0) {
791 global_init_domain(domid, idx);
792 return idx;
793 }
795 // call domaininfo hypercall to try and garbage collect unused entries
796 xc_handle = xc_interface_open(0,0,0);
797 ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
798 xc_interface_close(xc_handle);
800 // for each domain in our data, look for it in the system dominfo structure
801 // and purge the domain's data from our state if it does not exist in the
802 // dominfo structure
803 for (idx=0; idx<NDOMAINS; idx++) {
804 int domid = new_qos->domain_info[idx].id;
805 int jdx;
807 for (jdx=0; jdx<ndomains; jdx++) {
808 if (dominfo[jdx].domid == domid)
809 break;
810 }
811 if (jdx == ndomains) // we didn't find domid in the dominfo struct
812 if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
813 // contained in dominfo
814 qos_kill_thread(domid); // purge our stale data
815 }
817 // look again for a free slot
818 for (idx=0; idx<NDOMAINS; idx++)
819 if (new_qos->domain_info[idx].in_use == 0) {
820 global_init_domain(domid, idx);
821 return idx;
822 }
824 // still no space found, so bail
825 fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
826 exit(2);
827 }
829 static int domain_runnable(int domid)
830 {
831 return new_qos->domain_info[indexof(domid)].runnable;
832 }
835 static void update_blocked_time(int domid, uint64_t now)
836 {
837 uint64_t t_blocked;
838 int id = indexof(domid);
840 if (new_qos->domain_info[id].blocked_start_time != 0) {
841 if (now >= new_qos->domain_info[id].blocked_start_time)
842 t_blocked = now - new_qos->domain_info[id].blocked_start_time;
843 else
844 t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
845 new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
846 }
848 if (domain_runnable(domid))
849 new_qos->domain_info[id].blocked_start_time = 0;
850 else
851 new_qos->domain_info[id].blocked_start_time = now;
852 }
855 // advance to next datapoint for all domains
856 static void advance_next_datapoint(uint64_t now)
857 {
858 int new, old, didx;
860 old = new_qos->next_datapoint;
861 new = QOS_INCR(old);
862 new_qos->next_datapoint = new;
863 // memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
864 for (didx = 0; didx < NDOMAINS; didx++) {
865 new_qos->qdata[new].ns_gotten[didx] = 0;
866 new_qos->qdata[new].ns_allocated[didx] = 0;
867 new_qos->qdata[new].ns_waiting[didx] = 0;
868 new_qos->qdata[new].ns_blocked[didx] = 0;
869 new_qos->qdata[new].switchin_count[didx] = 0;
870 new_qos->qdata[new].io_count[didx] = 0;
871 }
872 new_qos->qdata[new].ns_passed = 0;
873 new_qos->qdata[new].lost_records = 0;
874 new_qos->qdata[new].flip_free_periods = 0;
876 new_qos->qdata[new].timestamp = now;
877 }
881 static void qos_update_thread(int cpu, int domid, uint64_t now)
882 {
883 int n, id;
884 uint64_t last_update_time, start;
885 int64_t time_since_update, run_time = 0;
887 id = indexof(domid);
889 n = new_qos->next_datapoint;
890 last_update_time = new_qos->domain_info[id].last_update_time;
892 time_since_update = now - last_update_time;
894 if (time_since_update < 0) {
895 // what happened here? either a timestamp wraparound, or more likely,
896 // a slight inconsistency among timestamps from various cpu's
897 if (-time_since_update < billion) {
898 // fairly small difference, let's just adjust 'now' to be a little
899 // beyond last_update_time
900 time_since_update = -time_since_update;
901 }
902 else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
903 // difference is huge, must be a wraparound
904 // last_update time should be "near" ~0ULL,
905 // and now should be "near" 0
906 time_since_update = now + (~0ULL - last_update_time);
907 printf("time wraparound\n");
908 }
909 else {
910 // none of the above, may be an out of order record
911 // no good solution, just ignore and update again later
912 return;
913 }
914 }
916 new_qos->domain_info[id].last_update_time = now;
918 if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
919 start = new_qos->domain_info[id].start_time;
920 if (start > now) { // wrapped around
921 run_time = now + (~0ULL - start);
922 // this could happen if there is nothing going on within a cpu;
923 // in this case the idle domain would run forever
924 // printf("warning: start > now\n");
925 }
926 else
927 run_time = now - start;
928 // if (run_time < 0) // should not happen
929 // printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
930 new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
931 new_qos->domain_info[id].start_time = now;
932 new_qos->domain_info[id].ns_since_boot += time_since_update;
934 new_qos->qdata[n].ns_gotten[id] += run_time;
935 // if (domid == 0 && cpu == 1)
936 // printf("adding run time for dom0 on cpu1\r\n");
938 }
940 new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
942 update_blocked_time(domid, now);
944 // how much time passed since this datapoint was updated?
945 if (now >= new_qos->qdata[n].timestamp) {
946 // all is right with the world, time is increasing
947 new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
948 }
949 else {
950 // time wrapped around
951 //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
952 // printf("why timewrap?\r\n");
953 }
954 new_qos->qdata[n].timestamp = now;
955 }
958 // called by dump routines to update all structures
959 static void qos_update_all(uint64_t now, int cpu)
960 {
961 int i;
963 for (i=0; i<NDOMAINS; i++)
964 if (new_qos->domain_info[i].in_use)
965 qos_update_thread(cpu, new_qos->domain_info[i].id, now);
966 }
969 static void qos_update_thread_stats(int cpu, int domid, uint64_t now)
970 {
971 if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
972 qos_update_all(now, cpu);
973 advance_next_datapoint(now);
974 return;
975 }
976 qos_update_thread(cpu, domid, now);
977 }
981 // called when a new thread gets the cpu
982 static void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
983 {
984 int idx = indexof(domid);
986 new_qos->domain_info[idx].runnable = 1;
987 update_blocked_time(domid, now);
988 new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
989 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
990 //runnable_start_time[idx] = 0;
992 new_qos->domain_info[idx].start_time = now;
993 new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
994 new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
995 new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
996 qos_update_thread_stats(cpu, domid, now);
997 set_current(cpu, domid);
999 // count up page flips for dom0 execution
1000 if (domid == 0)
1001 dom0_flips = 0;
1004 // called when the current thread is taken off the cpu
1005 static void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
1007 int idx = indexof(domid);
1008 int n;
1010 if (!is_current(domid, cpu)) {
1011 // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
1014 if (gotten == 0) {
1015 printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
1018 if (gotten < 100) {
1019 printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
1023 n = new_qos->next_datapoint;
1024 #if 0
1025 new_qos->qdata[n].ns_gotten[idx] += gotten;
1026 if (gotten > new_qos->qdata[n].ns_passed)
1027 printf("inconsistency #257, diff = %lld\n",
1028 gotten - new_qos->qdata[n].ns_passed );
1029 #endif
1030 new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
1031 new_qos->domain_info[idx].runnable_start_time = now;
1032 // runnable_start_time[id] = now;
1033 qos_update_thread_stats(cpu, domid, now);
1035 // process dom0 page flips
1036 if (domid == 0)
1037 if (dom0_flips == 0)
1038 new_qos->qdata[n].flip_free_periods++;
1041 // called when domain is put to sleep, may also be called
1042 // when thread is already asleep
1043 static void qos_state_sleeping(int cpu, int domid, uint64_t now)
1045 int idx;
1047 if (!domain_runnable(domid)) // double call?
1048 return;
1050 idx = indexof(domid);
1051 new_qos->domain_info[idx].runnable = 0;
1052 new_qos->domain_info[idx].blocked_start_time = now;
1053 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
1054 // runnable_start_time[idx] = 0; // invalidate
1055 qos_update_thread_stats(cpu, domid, now);
1060 // domain died, presume it's dead on all cpu's, not just mostly dead
1061 static void qos_kill_thread(int domid)
1063 int cpu;
1065 for (cpu=0; cpu<NCPU; cpu++) {
1066 cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
1072 // called when thread becomes runnable, may also be called
1073 // when thread is already runnable
1074 static void qos_state_runnable(int cpu, int domid, uint64_t now)
1076 int idx;
1079 qos_update_thread_stats(cpu, domid, now);
1081 if (domain_runnable(domid)) // double call?
1082 return;
1084 idx = indexof(domid);
1085 new_qos->domain_info[idx].runnable = 1;
1086 update_blocked_time(domid, now);
1088 new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
1089 new_qos->domain_info[idx].runnable_start_time = now;
1090 // runnable_start_time[id] = now;
1094 static void qos_count_packets(domid_t domid, uint64_t now)
1096 int i, idx = indexof(domid);
1097 _new_qos_data *cpu_data;
1099 for (i=0; i<NCPU; i++) {
1100 cpu_data = cpu_qos_data[i];
1101 if (cpu_data->domain_info[idx].in_use) {
1102 cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
1106 new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
1107 dom0_flips++;
1111 static int process_record(int cpu, struct t_rec *r)
1113 uint64_t now = 0;
1114 uint32_t *extra_u32 = r->u.nocycles.extra_u32;
1116 new_qos = cpu_qos_data[cpu];
1118 rec_count++;
1120 if ( r->cycles_included )
1122 now = ((uint64_t)r->u.cycles.cycles_hi << 32) | r->u.cycles.cycles_lo;
1123 now = ((double)now) / (opts.cpu_freq / 1000.0);
1124 extra_u32 = r->u.cycles.extra_u32;
1127 global_now = now;
1128 global_cpu = cpu;
1130 log_event(r->event);
1132 switch (r->event) {
1134 case TRC_SCHED_SWITCH_INFPREV:
1135 // domain data[0] just switched out and received data[1] ns of cpu time
1136 qos_switch_out(cpu, extra_u32[0], now, extra_u32[1]);
1137 // printf("ns_gotten %ld\n", extra_u32[1]);
1138 break;
1140 case TRC_SCHED_SWITCH_INFNEXT:
1141 // domain data[0] just switched in and
1142 // waited data[1] ns, and was allocated data[2] ns of cpu time
1143 qos_switch_in(cpu, extra_u32[0], now, extra_u32[2], extra_u32[1]);
1144 break;
1146 case TRC_SCHED_DOM_ADD:
1147 (void) indexof(extra_u32[0]);
1148 break;
1150 case TRC_SCHED_DOM_REM:
1151 qos_kill_thread(extra_u32[0]);
1152 break;
1154 case TRC_SCHED_SLEEP:
1155 qos_state_sleeping(cpu, extra_u32[0], now);
1156 break;
1158 case TRC_SCHED_WAKE:
1159 qos_state_runnable(cpu, extra_u32[0], now);
1160 break;
1162 case TRC_SCHED_BLOCK:
1163 qos_state_sleeping(cpu, extra_u32[0], now);
1164 break;
1166 case TRC_MEM_PAGE_GRANT_TRANSFER:
1167 qos_count_packets(extra_u32[0], now);
1168 break;
1170 default:
1171 break;
1174 new_qos = NULL;
1176 return 4 + (r->cycles_included ? 8 : 0) + (r->extra_u32 * 4);