debuggers.hg

annotate tools/xenmon/xenbaked.c @ 22848:6341fe0f4e5a

Added tag 4.1.0-rc2 for changeset 9dca60d88c63
author Keir Fraser <keir@xen.org>
date Tue Jan 25 14:06:55 2011 +0000 (2011-01-25)
parents 60782cefa154
children
rev   line source
kaf24@7840 1 /******************************************************************************
kaf24@7840 2 * tools/xenbaked.c
kaf24@7840 3 *
kaf24@7840 4 * Tool for collecting raw trace buffer data from Xen and
kaf24@7840 5 * performing some accumulation operations and other processing
kaf24@7840 6 * on it.
kaf24@7840 7 *
kaf24@7840 8 * Copyright (C) 2004 by Intel Research Cambridge
kaf24@7840 9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
kaf24@9685 10 * Copyright (C) 2006 by Hewlett Packard Fort Collins
kaf24@7840 11 *
kaf24@7840 12 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
kaf24@7840 13 * Rob Gardner, rob.gardner@hp.com
kaf24@7840 14 * Lucy Cherkasova, lucy.cherkasova.hp.com
keir@15979 15 * Much code based on xentrace, authored by Mark Williamson,
keir@15979 16 * mark.a.williamson@intel.com
kaf24@7840 17 * Date: November, 2005
kaf24@7840 18 *
kaf24@7840 19 * This program is free software; you can redistribute it and/or modify
kaf24@7840 20 * it under the terms of the GNU General Public License as published by
kaf24@7840 21 * the Free Software Foundation; under version 2 of the License.
kaf24@7840 22 *
kaf24@7840 23 * This program is distributed in the hope that it will be useful,
kaf24@7840 24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
kaf24@7840 25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kaf24@7840 26 * GNU General Public License for more details.
kaf24@7840 27 *
kaf24@7840 28 * You should have received a copy of the GNU General Public License
kaf24@7840 29 * along with this program; if not, write to the Free Software
kaf24@7840 30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
kaf24@7840 31 */
kaf24@7840 32
kaf24@7840 33 #include <time.h>
kaf24@7840 34 #include <stdlib.h>
kaf24@7840 35 #include <stdio.h>
kaf24@7840 36 #include <sys/mman.h>
kaf24@7840 37 #include <fcntl.h>
kaf24@7840 38 #include <unistd.h>
kaf24@7840 39 #include <errno.h>
kaf24@7840 40 #include <signal.h>
kaf24@7840 41 #include <xenctrl.h>
kaf24@7840 42 #include <xen/xen.h>
kaf24@7840 43 #include <string.h>
kaf24@9685 44 #include <sys/select.h>
keir@16428 45 #include <getopt.h>
kaf24@7840 46
kaf24@10034 47 #define PERROR(_m, _a...) \
kaf24@10034 48 do { \
kaf24@10034 49 int __saved_errno = errno; \
kaf24@10034 50 fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
kaf24@10034 51 __saved_errno, strerror(__saved_errno)); \
kaf24@10034 52 errno = __saved_errno; \
kaf24@10034 53 } while (0)
kaf24@10034 54
kaf24@7840 55 typedef struct { int counter; } atomic_t;
kaf24@7840 56 #define _atomic_read(v) ((v).counter)
kaf24@7840 57
kaf24@7840 58 #include <xen/trace.h>
kaf24@7840 59 #include "xenbaked.h"
kaf24@7840 60
kaf24@7840 61
kaf24@7840 62 /***** Compile time configuration of defaults ********************************/
kaf24@7840 63
kaf24@7840 64 /* when we've got more records than this waiting, we log it to the output */
kaf24@7840 65 #define NEW_DATA_THRESH 1
kaf24@7840 66
kaf24@7840 67 /* sleep for this long (milliseconds) between checking the trace buffers */
kaf24@7840 68 #define POLL_SLEEP_MILLIS 100
kaf24@7840 69
kaf24@7840 70 /* Size of time period represented by each sample */
kaf24@7840 71 #define MS_PER_SAMPLE 100
kaf24@7840 72
kaf24@7840 73 /* CPU Frequency */
kaf24@7840 74 #define MHZ
kaf24@7840 75 #define CPU_FREQ 2660 MHZ
kaf24@7840 76
kaf24@7840 77 /***** The code **************************************************************/
kaf24@7840 78
kaf24@7840 79 typedef struct settings_st {
kaf24@7840 80 struct timespec poll_sleep;
kaf24@7840 81 unsigned long new_data_thresh;
kaf24@7840 82 unsigned long ms_per_sample;
kaf24@7840 83 double cpu_freq;
kaf24@7840 84 } settings_t;
kaf24@7840 85
keir@20969 86 struct t_struct {
keir@21741 87 const struct t_info *t_info; /* Structure with information about individual buffers */
keir@20969 88 struct t_buf **meta; /* Pointers to trace buffer metadata */
keir@20969 89 unsigned char **data; /* Pointers to trace buffer data areas */
keir@20969 90 };
keir@20969 91
kaf24@7840 92 settings_t opts;
kaf24@7840 93
kaf24@7840 94 int interrupted = 0; /* gets set if we get a SIGHUP */
kaf24@7840 95 int rec_count = 0;
kaf24@9685 96 int wakeups = 0;
kaf24@7840 97 time_t start_time;
kaf24@7840 98 int dom0_flips = 0;
kaf24@7840 99
kaf24@7840 100 _new_qos_data *new_qos;
kaf24@7840 101 _new_qos_data **cpu_qos_data;
kaf24@7840 102
kfraser@10697 103 int global_cpu;
kfraser@10697 104 uint64_t global_now;
kaf24@7840 105
kaf24@7840 106 // array of currently running domains, indexed by cpu
kaf24@7840 107 int *running = NULL;
kaf24@7840 108
kaf24@7840 109 // number of cpu's on this platform
kaf24@7840 110 int NCPU = 0;
kaf24@7840 111
kaf24@7840 112
keir@18091 113 static void advance_next_datapoint(uint64_t);
keir@18091 114 static void alloc_qos_data(int ncpu);
keir@18091 115 static int process_record(int, struct t_rec *);
keir@18091 116 static void qos_kill_thread(int domid);
keir@18091 117
keir@18091 118
keir@18091 119 static void init_current(int ncpu)
kaf24@7840 120 {
keir@15979 121 running = calloc(ncpu, sizeof(int));
keir@15979 122 NCPU = ncpu;
keir@15979 123 printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
kaf24@7840 124 }
kaf24@7840 125
keir@18091 126 static int is_current(int domain, int cpu)
kaf24@7840 127 {
keir@15979 128 // int i;
kaf24@7840 129
keir@15979 130 // for (i=0; i<NCPU; i++)
kaf24@7840 131 if (running[cpu] == domain)
keir@15979 132 return 1;
keir@15979 133 return 0;
kaf24@7840 134 }
kaf24@7840 135
kaf24@7840 136
keir@18091 137 #if 0 /* unused */
kaf24@7840 138 // return the domain that's currently running on the given cpu
keir@18091 139 static int current(int cpu)
kaf24@7840 140 {
keir@15979 141 return running[cpu];
kaf24@7840 142 }
keir@18091 143 #endif
kaf24@7840 144
keir@18091 145 static void set_current(int cpu, int domain)
kaf24@7840 146 {
keir@15979 147 running[cpu] = domain;
kaf24@7840 148 }
kaf24@7840 149
kaf24@7840 150
kaf24@7840 151
keir@18091 152 static void close_handler(int signal)
kaf24@7840 153 {
kaf24@7840 154 interrupted = 1;
kaf24@7840 155 }
kaf24@7840 156
kaf24@7840 157 #if 0
kaf24@7840 158 void dump_record(int cpu, struct t_rec *x)
kaf24@7840 159 {
kaf24@7840 160 printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
keir@15979 161 cpu, x->cycles, x->event, x->data[0]);
kaf24@7840 162 }
kaf24@7840 163 #endif
kaf24@7840 164
kaf24@7840 165 /**
kaf24@7840 166 * millis_to_timespec - convert a time in milliseconds to a struct timespec
kaf24@7840 167 * @millis: time interval in milliseconds
kaf24@7840 168 */
keir@18091 169 static struct timespec millis_to_timespec(unsigned long millis)
kaf24@7840 170 {
kaf24@7840 171 struct timespec spec;
kaf24@7840 172
kaf24@7840 173 spec.tv_sec = millis / 1000;
kaf24@7840 174 spec.tv_nsec = (millis % 1000) * 1000;
kaf24@7840 175
kaf24@7840 176 return spec;
kaf24@7840 177 }
kaf24@7840 178
kaf24@7840 179
kaf24@7840 180 typedef struct
kaf24@7840 181 {
kaf24@7840 182 int event_count;
kaf24@7840 183 int event_id;
kaf24@7840 184 char *text;
kaf24@7840 185 } stat_map_t;
kaf24@7840 186
kaf24@7840 187 stat_map_t stat_map[] = {
kaf24@7840 188 { 0, 0, "Other" },
kaf24@7840 189 { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
kaf24@7840 190 { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
kaf24@7840 191 { 0, TRC_SCHED_SLEEP, "Sleep" },
kaf24@7840 192 { 0, TRC_SCHED_WAKE, "Wake" },
kaf24@7840 193 { 0, TRC_SCHED_BLOCK, "Block" },
kaf24@7840 194 { 0, TRC_SCHED_SWITCH, "Switch" },
kaf24@7840 195 { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
kaf24@7840 196 { 0, TRC_SCHED_SWITCH_INFPREV, "Switch Prev" },
kaf24@7840 197 { 0, TRC_SCHED_SWITCH_INFNEXT, "Switch Next" },
kaf24@7840 198 { 0, TRC_MEM_PAGE_GRANT_MAP, "Page Map" },
kaf24@7840 199 { 0, TRC_MEM_PAGE_GRANT_UNMAP, "Page Unmap" },
kaf24@7840 200 { 0, TRC_MEM_PAGE_GRANT_TRANSFER, "Page Transfer" },
kaf24@7840 201 { 0, 0, 0 }
kaf24@7840 202 };
kaf24@7840 203
kaf24@7840 204
keir@18091 205 static void check_gotten_sum(void)
kaf24@7840 206 {
kaf24@7840 207 #if 0
kaf24@7840 208 uint64_t sum, ns;
kaf24@7840 209 extern uint64_t total_ns_gotten(uint64_t*);
kaf24@7840 210 double percent;
kaf24@7840 211 int i;
kaf24@7840 212
kaf24@7840 213 for (i=0; i<NCPU; i++) {
keir@15979 214 new_qos = cpu_qos_data[i];
keir@15979 215 ns = billion;
keir@15979 216 sum = total_ns_gotten(&ns);
kaf24@7840 217
keir@15979 218 printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
keir@15979 219 i, sum, ns);
keir@15979 220 percent = (double) sum;
keir@15979 221 percent = (100.0*percent) / (double)ns;
keir@15979 222 printf(" ==> ns_gotten = %7.3f%%\n", percent);
kaf24@7840 223 }
kaf24@7840 224 #endif
kaf24@7840 225 }
kaf24@7840 226
kaf24@7840 227
kaf24@7840 228
keir@18091 229 static void dump_stats(void)
kaf24@7840 230 {
kaf24@7840 231 stat_map_t *smt = stat_map;
kaf24@7840 232 time_t end_time, run_time;
kaf24@7840 233
kaf24@7840 234 time(&end_time);
kaf24@7840 235
kaf24@7840 236 run_time = end_time - start_time;
kaf24@7840 237
kaf24@7840 238 printf("Event counts:\n");
kaf24@7840 239 while (smt->text != NULL) {
kaf24@7840 240 printf("%08d\t%s\n", smt->event_count, smt->text);
kaf24@7840 241 smt++;
kaf24@7840 242 }
kaf24@7840 243
kaf24@7840 244 printf("processed %d total records in %d seconds (%ld per second)\n",
keir@16428 245 rec_count, (int)run_time, (long)(rec_count/run_time));
kaf24@7840 246
kaf24@9685 247 printf("woke up %d times in %d seconds (%ld per second)\n", wakeups,
keir@16428 248 (int) run_time, (long)(wakeups/run_time));
kaf24@9685 249
kaf24@7840 250 check_gotten_sum();
kaf24@7840 251 }
kaf24@7840 252
keir@18091 253 static void log_event(int event_id)
kaf24@7840 254 {
kaf24@7840 255 stat_map_t *smt = stat_map;
kaf24@7840 256
kaf24@7840 257 // printf("event_id = 0x%x\n", event_id);
kaf24@7840 258
kaf24@7840 259 while (smt->text != NULL) {
kaf24@7840 260 if (smt->event_id == event_id) {
kaf24@7840 261 smt->event_count++;
kaf24@7840 262 return;
kaf24@7840 263 }
kaf24@7840 264 smt++;
kaf24@7840 265 }
kaf24@7840 266 if (smt->text == NULL)
kaf24@7840 267 stat_map[0].event_count++; // other
kaf24@7840 268 }
kaf24@7840 269
kaf24@9685 270 int virq_port;
ian@22624 271 xc_evtchn *xce_handle = NULL;
kaf24@9685 272
kaf24@9685 273 /* Returns the event channel handle. */
kaf24@9685 274 /* Stolen from xenstore code */
keir@18091 275 static int eventchn_init(void)
kaf24@9685 276 {
keir@15979 277 int rc;
kaf24@9685 278
keir@15979 279 // to revert to old way:
keir@15979 280 if (0)
keir@15979 281 return -1;
kaf24@9685 282
ian@22624 283 xce_handle = xc_evtchn_open(NULL, 0);
kaf24@10393 284
ian@22624 285 if (xce_handle == NULL)
keir@15979 286 perror("Failed to open evtchn device");
kaf24@9685 287
keir@15979 288 if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
keir@15979 289 perror("Failed to bind to domain exception virq port");
keir@15979 290 virq_port = rc;
kaf24@9685 291
ian@22624 292 return xce_handle == NULL ? -1 : 0;
kaf24@9685 293 }
kaf24@9685 294
keir@18091 295 static void wait_for_event(void)
kaf24@9685 296 {
keir@15979 297 int ret;
keir@15979 298 fd_set inset;
keir@15979 299 evtchn_port_t port;
keir@15979 300 struct timeval tv;
keir@15979 301 int evtchn_fd;
kaf24@9685 302
ian@22624 303 if (xce_handle == NULL) {
keir@15979 304 nanosleep(&opts.poll_sleep, NULL);
keir@15979 305 return;
keir@15979 306 }
kaf24@9685 307
keir@15979 308 evtchn_fd = xc_evtchn_fd(xce_handle);
kaf24@10393 309
keir@15979 310 FD_ZERO(&inset);
keir@15979 311 FD_SET(evtchn_fd, &inset);
keir@15979 312 tv.tv_sec = 1;
keir@15979 313 tv.tv_usec = 0;
keir@15979 314 // tv = millis_to_timespec(&opts.poll_sleep);
keir@15979 315 ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
kaf24@9685 316
keir@15979 317 if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
keir@15979 318 if ((port = xc_evtchn_pending(xce_handle)) == -1)
keir@15979 319 perror("Failed to read from event fd");
kaf24@9685 320
keir@15979 321 // if (port == virq_port)
keir@15979 322 // printf("got the event I was looking for\r\n");
kaf24@10393 323
keir@15979 324 if (xc_evtchn_unmask(xce_handle, port) == -1)
keir@15979 325 perror("Failed to write to event fd");
keir@15979 326 }
kaf24@9685 327 }
kaf24@9685 328
kaf24@10034 329 static void get_tbufs(unsigned long *mfn, unsigned long *size)
kaf24@9685 330 {
keir@21529 331 xc_interface *xc_handle = xc_interface_open(0,0,0);
kaf24@10034 332 int ret;
kaf24@10034 333
keir@21529 334 if ( !xc_handle )
kaf24@10034 335 {
kaf24@10034 336 exit(EXIT_FAILURE);
kaf24@9685 337 }
kaf24@10034 338
kaf24@10034 339 ret = xc_tbuf_enable(xc_handle, DEFAULT_TBUF_SIZE, mfn, size);
kaf24@10034 340
kaf24@10034 341 if ( ret != 0 )
kaf24@10034 342 {
kaf24@10034 343 perror("Couldn't enable trace buffers");
kaf24@10034 344 exit(1);
kaf24@9685 345 }
kaf24@10034 346
kaf24@10034 347 xc_interface_close(xc_handle);
kaf24@9685 348 }
kaf24@9685 349
keir@18091 350 static void disable_tracing(void)
kaf24@9685 351 {
keir@21529 352 xc_interface *xc_handle = xc_interface_open(0,0,0);
keir@15979 353 xc_tbuf_disable(xc_handle);
keir@15979 354 xc_interface_close(xc_handle);
kaf24@9685 355 }
kaf24@7840 356
kaf24@7840 357 /**
kaf24@7840 358 * map_tbufs - memory map Xen trace buffers into user space
kaf24@7840 359 * @tbufs_mfn: mfn of the trace buffers
kaf24@7840 360 * @num: number of trace buffers to map
kaf24@7840 361 * @size: size of each trace buffer
kaf24@7840 362 *
kaf24@7840 363 * Maps the Xen trace buffers them into process address space.
kaf24@7840 364 */
keir@20969 365 static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
keir@20969 366 unsigned long tinfo_size)
kaf24@7840 367 {
keir@21529 368 xc_interface *xc_handle;
keir@20969 369 static struct t_struct tbufs = { 0 };
keir@20969 370 int i;
kaf24@7840 371
keir@21529 372 xc_handle = xc_interface_open(0,0,0);
keir@21529 373 if ( !xc_handle )
kaf24@7840 374 {
kaf24@7840 375 exit(EXIT_FAILURE);
kaf24@7840 376 }
kaf24@7840 377
keir@20969 378 /* Map t_info metadata structure */
keir@21741 379 tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
keir@21741 380 PROT_READ, tbufs_mfn);
kaf24@7840 381
keir@20969 382 if ( tbufs.t_info == 0 )
kaf24@7840 383 {
kaf24@7840 384 PERROR("Failed to mmap trace buffers");
kaf24@7840 385 exit(EXIT_FAILURE);
kaf24@7840 386 }
kaf24@7840 387
keir@20969 388 if ( tbufs.t_info->tbuf_size == 0 )
keir@20969 389 {
keir@20969 390 fprintf(stderr, "%s: tbuf_size 0!\n", __func__);
keir@20969 391 exit(EXIT_FAILURE);
keir@20969 392 }
kaf24@7840 393
keir@20969 394 /* Map per-cpu buffers */
keir@20969 395 tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
keir@20969 396 tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *));
keir@20969 397 if ( tbufs.meta == NULL || tbufs.data == NULL )
kaf24@7840 398 {
kaf24@7840 399 PERROR( "Failed to allocate memory for buffer pointers\n");
kaf24@7840 400 exit(EXIT_FAILURE);
kaf24@7840 401 }
kaf24@7840 402
keir@20969 403 for(i=0; i<num; i++)
keir@20969 404 {
keir@20969 405
keir@21741 406 const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
keir@21741 407 + tbufs.t_info->mfn_offset[i];
keir@20969 408 int j;
keir@20969 409 xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
kaf24@7840 410
keir@20969 411 for ( j=0; j<tbufs.t_info->tbuf_size; j++)
keir@20969 412 pfn_list[j] = (xen_pfn_t)mfn_list[j];
kaf24@7840 413
keir@20969 414 tbufs.meta[i] = xc_map_foreign_batch(xc_handle, DOMID_XEN,
keir@20969 415 PROT_READ | PROT_WRITE,
keir@20969 416 pfn_list,
keir@20969 417 tbufs.t_info->tbuf_size);
keir@20969 418 if ( tbufs.meta[i] == NULL )
keir@20969 419 {
keir@20969 420 PERROR("Failed to map cpu buffer!");
keir@20969 421 exit(EXIT_FAILURE);
keir@20969 422 }
keir@20969 423 tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1);
kaf24@7840 424 }
kaf24@7840 425
keir@20969 426 xc_interface_close(xc_handle);
kaf24@7840 427
keir@20969 428 return &tbufs;
kaf24@7840 429 }
kaf24@7840 430
kaf24@7840 431 /**
kaf24@7840 432 * get_num_cpus - get the number of logical CPUs
kaf24@7840 433 */
keir@18091 434 static unsigned int get_num_cpus(void)
kaf24@7840 435 {
kfraser@15520 436 xc_physinfo_t physinfo = { 0 };
keir@21529 437 xc_interface *xc_handle = xc_interface_open(0,0,0);
kaf24@7840 438 int ret;
kaf24@7840 439
kfraser@11148 440 ret = xc_physinfo(xc_handle, &physinfo);
kaf24@7840 441
kaf24@7840 442 if ( ret != 0 )
kaf24@7840 443 {
kaf24@7840 444 PERROR("Failure to get logical CPU count from Xen");
kaf24@7840 445 exit(EXIT_FAILURE);
kaf24@7840 446 }
kaf24@7840 447
kaf24@7840 448 xc_interface_close(xc_handle);
kfraser@11148 449 opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
kaf24@7840 450
keir@16183 451 return physinfo.nr_cpus;
kaf24@7840 452 }
kaf24@7840 453
kaf24@7840 454 /**
kaf24@7840 455 * monitor_tbufs - monitor the contents of tbufs
kaf24@7840 456 */
keir@18091 457 static int monitor_tbufs(void)
kaf24@7840 458 {
kaf24@7840 459 int i;
kaf24@7840 460
keir@20969 461 struct t_struct *tbufs; /* Pointer to hypervisor maps */
kaf24@7840 462 struct t_buf **meta; /* pointers to the trace buffer metadata */
keir@20969 463 unsigned char **data; /* pointers to the trace buffer data areas
kaf24@7840 464 * where they are mapped into user space. */
kaf24@7840 465 unsigned long tbufs_mfn; /* mfn of the tbufs */
kaf24@7840 466 unsigned int num; /* number of trace buffers / logical CPUS */
keir@20969 467 unsigned long tinfo_size; /* size of t_info metadata map */
kaf24@7840 468 unsigned long size; /* size of a single trace buffer */
kaf24@7840 469
keir@15979 470 unsigned long data_size, rec_size;
kaf24@7840 471
kaf24@7840 472 /* get number of logical CPUs (and therefore number of trace buffers) */
kaf24@7840 473 num = get_num_cpus();
kaf24@7840 474
kaf24@7840 475 init_current(num);
kaf24@7840 476 alloc_qos_data(num);
kaf24@7840 477
kaf24@7840 478 printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
kaf24@7840 479
kaf24@7840 480 /* setup access to trace buffers */
keir@20969 481 get_tbufs(&tbufs_mfn, &tinfo_size);
keir@20969 482 tbufs = map_tbufs(tbufs_mfn, num, tinfo_size);
kaf24@7840 483
keir@20969 484 size = tbufs->t_info->tbuf_size * XC_PAGE_SIZE;
kaf24@7840 485
keir@15979 486 data_size = size - sizeof(struct t_buf);
kaf24@7840 487
keir@20969 488 meta = tbufs->meta;
keir@20969 489 data = tbufs->data;
kaf24@7840 490
keir@15979 491 if ( eventchn_init() < 0 )
keir@15979 492 fprintf(stderr, "Failed to initialize event channel; "
keir@15979 493 "Using POLL method\r\n");
kaf24@9685 494
kaf24@7840 495 /* now, scan buffers for events */
kaf24@7840 496 while ( !interrupted )
kaf24@7840 497 {
keir@15979 498 for ( i = 0; (i < num) && !interrupted; i++ )
keir@15979 499 {
keir@17553 500 unsigned long start_offset, end_offset, cons, prod;
keir@17553 501
keir@17553 502 cons = meta[i]->cons;
keir@17553 503 prod = meta[i]->prod;
keir@17553 504 xen_rmb(); /* read prod, then read item. */
keir@17553 505
keir@17553 506 if ( cons == prod )
keir@17553 507 continue;
keir@17553 508
keir@17553 509 start_offset = cons % data_size;
keir@17553 510 end_offset = prod % data_size;
keir@17553 511
keir@17553 512 if ( start_offset >= end_offset )
kaf24@7840 513 {
keir@17553 514 while ( start_offset != data_size )
keir@17553 515 {
keir@17553 516 rec_size = process_record(
keir@17553 517 i, (struct t_rec *)(data[i] + start_offset));
keir@17553 518 start_offset += rec_size;
keir@17553 519 }
keir@17553 520 start_offset = 0;
keir@17553 521 }
keir@17553 522 while ( start_offset != end_offset )
keir@17553 523 {
keir@15979 524 rec_size = process_record(
keir@17553 525 i, (struct t_rec *)(data[i] + start_offset));
keir@17553 526 start_offset += rec_size;
kaf24@7840 527 }
keir@17553 528 xen_mb(); /* read item, then update cons. */
keir@17553 529 meta[i]->cons = prod;
keir@15979 530 }
kaf24@7840 531
kaf24@9685 532 wait_for_event();
kaf24@9685 533 wakeups++;
kaf24@7840 534 }
kaf24@7840 535
kaf24@7840 536 /* cleanup */
kaf24@7840 537 free(meta);
kaf24@7840 538 free(data);
kaf24@7840 539 /* don't need to munmap - cleanup is automatic */
kaf24@7840 540
kaf24@7840 541 return 0;
kaf24@7840 542 }
kaf24@7840 543
kaf24@7840 544
kaf24@7840 545 /******************************************************************************
keir@16428 546 * Command line handling
kaf24@7840 547 *****************************************************************************/
kaf24@7840 548
keir@16428 549 const char *program_version = "xenbaked v1.4";
keir@16428 550 const char *program_bug_address = "<rob.gardner@hp.com>";
kaf24@7840 551
keir@16428 552 #define xstr(x) str(x)
keir@16428 553 #define str(x) #x
kaf24@7840 554
keir@18091 555 static void usage(void)
keir@16428 556 {
keir@16428 557 #define USAGE_STR \
keir@16428 558 "Usage: xenbaked [OPTION...]\n" \
keir@16428 559 "Tool to capture and partially process Xen trace buffer data\n" \
keir@16428 560 "\n" \
keir@16428 561 " -m, --ms_per_sample=MS Specify the number of milliseconds per sample\n" \
keir@16428 562 " (default " xstr(MS_PER_SAMPLE) ").\n" \
keir@16428 563 " -s, --poll-sleep=p Set sleep time, p, in milliseconds between\n" \
keir@16428 564 " polling the trace buffer for new data\n" \
keir@16428 565 " (default " xstr(POLL_SLEEP_MILLIS) ").\n" \
keir@16428 566 " -t, --log-thresh=l Set number, l, of new records required to\n" \
keir@16428 567 " trigger a write to output (default " \
keir@16428 568 xstr(NEW_DATA_THRESH) ").\n" \
keir@16428 569 " -?, --help Show this message\n" \
keir@16428 570 " -V, --version Print program version\n" \
keir@16428 571 "\n" \
keir@16428 572 "This tool is used to capture trace buffer data from Xen. The data is\n" \
keir@16428 573 "saved in a shared memory structure to be further processed by xenmon.\n"
kaf24@7840 574
keir@16428 575 printf(USAGE_STR);
keir@16428 576 printf("\nReport bugs to %s\n", program_bug_address);
keir@16428 577
keir@16428 578 exit(EXIT_FAILURE);
keir@16428 579 }
kaf24@7840 580
keir@16428 581 /* convert the argument string pointed to by arg to a long int representation */
keir@18091 582 static long argtol(const char *restrict arg, int base)
keir@16428 583 {
keir@16428 584 char *endp;
keir@16428 585 long val;
kaf24@7840 586
keir@16428 587 errno = 0;
keir@16428 588 val = strtol(arg, &endp, base);
keir@16428 589
keir@16428 590 if (errno != 0) {
keir@16428 591 fprintf(stderr, "Invalid option argument: %s\n", arg);
keir@16428 592 fprintf(stderr, "Error: %s\n\n", strerror(errno));
keir@16428 593 usage();
keir@16428 594 } else if (endp == arg || *endp != '\0') {
keir@16428 595 fprintf(stderr, "Invalid option argument: %s\n\n", arg);
keir@16428 596 usage();
kaf24@7840 597 }
kaf24@7840 598
keir@16428 599 return val;
keir@16428 600 }
keir@16428 601
keir@16428 602 /* parse command line arguments */
keir@18091 603 static void parse_args(int argc, char **argv)
keir@16428 604 {
keir@16428 605 int option;
keir@16428 606 static struct option long_options[] = {
keir@16428 607 { "log-thresh", required_argument, 0, 't' },
keir@16428 608 { "poll-sleep", required_argument, 0, 's' },
keir@16428 609 { "ms_per_sample", required_argument, 0, 'm' },
keir@16428 610 { "help", no_argument, 0, '?' },
keir@16428 611 { "version", no_argument, 0, 'V' },
keir@16428 612 { 0, 0, 0, 0 }
keir@16428 613 };
keir@16428 614
keir@16428 615 while ( (option = getopt_long(argc, argv, "m:s:t:?V",
keir@16428 616 long_options, NULL)) != -1)
keir@16428 617 {
keir@16428 618 switch ( option )
keir@16428 619 {
keir@16428 620 case 't': /* set new records threshold for logging */
keir@16428 621 opts.new_data_thresh = argtol(optarg, 0);
keir@16428 622 break;
keir@16428 623
keir@16428 624 case 's': /* set sleep time (given in milliseconds) */
keir@16428 625 opts.poll_sleep = millis_to_timespec(argtol(optarg, 0));
keir@16428 626 break;
keir@16428 627
keir@16428 628 case 'm': /* set ms_per_sample */
keir@16428 629 opts.ms_per_sample = argtol(optarg, 0);
keir@16428 630 break;
keir@16428 631
keir@16428 632 case 'V': /* print program version */
keir@16428 633 printf("%s\n", program_version);
keir@16428 634 exit(EXIT_SUCCESS);
keir@16428 635 break;
keir@16428 636
keir@16428 637 default:
keir@16428 638 usage();
keir@16428 639 }
keir@16428 640 }
keir@16428 641
keir@16428 642 /* all arguments should have been processed */
keir@16428 643 if (optind != argc) {
keir@16428 644 usage();
keir@16428 645 }
kaf24@7840 646 }
kaf24@7840 647
keir@16195 648 #define SHARED_MEM_FILE "/var/run/xenq-shm"
keir@18091 649 static void alloc_qos_data(int ncpu)
kaf24@7840 650 {
kaf24@7840 651 int i, n, pgsize, off=0;
kaf24@7840 652 char *dummy;
kaf24@7840 653 int qos_fd;
kaf24@7840 654
kaf24@7840 655 cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
kaf24@7840 656
kaf24@7840 657
kaf24@7840 658 qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
kaf24@7840 659 if (qos_fd < 0) {
kaf24@7840 660 PERROR(SHARED_MEM_FILE);
kaf24@7840 661 exit(2);
kaf24@7840 662 }
kaf24@7840 663 pgsize = getpagesize();
kaf24@7840 664 dummy = malloc(pgsize);
kaf24@7840 665
kaf24@7840 666 for (n=0; n<ncpu; n++) {
kaf24@7840 667
keir@15979 668 for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
keir@15979 669 if ((write(qos_fd, dummy, pgsize)) != pgsize) {
keir@15979 670 PERROR(SHARED_MEM_FILE);
keir@15979 671 exit(2);
keir@15979 672 }
kaf24@7840 673
keir@15979 674 new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE,
keir@15979 675 MAP_SHARED, qos_fd, off);
keir@15979 676 off += i;
keir@15979 677 if (new_qos == NULL) {
keir@15979 678 PERROR("mmap");
keir@15979 679 exit(3);
keir@15979 680 }
keir@15979 681 // printf("new_qos = %p\n", new_qos);
keir@15979 682 memset(new_qos, 0, sizeof(_new_qos_data));
keir@15979 683 new_qos->next_datapoint = 0;
keir@15979 684 advance_next_datapoint(0);
keir@15979 685 new_qos->structlen = i;
keir@15979 686 new_qos->ncpu = ncpu;
keir@15979 687 // printf("structlen = 0x%x\n", i);
keir@15979 688 cpu_qos_data[n] = new_qos;
kaf24@7840 689 }
kaf24@7840 690 free(dummy);
kaf24@7840 691 new_qos = NULL;
kaf24@7840 692 }
kaf24@7840 693
kaf24@7840 694
kaf24@7840 695 int main(int argc, char **argv)
kaf24@7840 696 {
kaf24@7840 697 int ret;
kaf24@7840 698 struct sigaction act;
kaf24@7840 699
kaf24@7840 700 time(&start_time);
kaf24@7840 701 opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
kaf24@7840 702 opts.new_data_thresh = NEW_DATA_THRESH;
kaf24@7840 703 opts.ms_per_sample = MS_PER_SAMPLE;
kaf24@7840 704 opts.cpu_freq = CPU_FREQ;
kaf24@7840 705
keir@16428 706 parse_args(argc, argv);
kaf24@7840 707 fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
kaf24@7840 708
kaf24@7840 709
kaf24@7840 710 /* ensure that if we get a signal, we'll do cleanup, then exit */
kaf24@7840 711 act.sa_handler = close_handler;
kaf24@7840 712 act.sa_flags = 0;
kaf24@7840 713 sigemptyset(&act.sa_mask);
kaf24@7840 714 sigaction(SIGHUP, &act, NULL);
kaf24@7840 715 sigaction(SIGTERM, &act, NULL);
kaf24@7840 716 sigaction(SIGINT, &act, NULL);
kaf24@7840 717
kaf24@7840 718 ret = monitor_tbufs();
kaf24@7840 719
kaf24@7840 720 dump_stats();
kaf24@7840 721 msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
kaf24@9685 722 disable_tracing();
kaf24@7840 723
kaf24@7840 724 return ret;
kaf24@7840 725 }
kaf24@7840 726
keir@18091 727 static void qos_init_domain(int domid, int idx)
kfraser@10697 728 {
keir@15979 729 int i;
kfraser@10697 730
keir@15979 731 memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
keir@15979 732 new_qos->domain_info[idx].last_update_time = global_now;
keir@15979 733 // runnable_start_time[idx] = 0;
keir@15979 734 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
keir@15979 735 new_qos->domain_info[idx].in_use = 1;
keir@15979 736 new_qos->domain_info[idx].blocked_start_time = 0;
keir@15979 737 new_qos->domain_info[idx].id = domid;
keir@15979 738 if (domid == IDLE_DOMAIN_ID)
keir@17870 739 snprintf(new_qos->domain_info[idx].name,
keir@17870 740 sizeof(new_qos->domain_info[idx].name),
keir@17870 741 "Idle Task%d", global_cpu);
keir@15979 742 else
keir@17870 743 snprintf(new_qos->domain_info[idx].name,
keir@17870 744 sizeof(new_qos->domain_info[idx].name),
keir@17870 745 "Domain#%d", domid);
kfraser@10697 746
keir@15979 747 for (i=0; i<NSAMPLES; i++) {
keir@15979 748 new_qos->qdata[i].ns_gotten[idx] = 0;
keir@15979 749 new_qos->qdata[i].ns_allocated[idx] = 0;
keir@15979 750 new_qos->qdata[i].ns_waiting[idx] = 0;
keir@15979 751 new_qos->qdata[i].ns_blocked[idx] = 0;
keir@15979 752 new_qos->qdata[i].switchin_count[idx] = 0;
keir@15979 753 new_qos->qdata[i].io_count[idx] = 0;
keir@15979 754 }
kfraser@10697 755 }
kfraser@10697 756
keir@18091 757 static void global_init_domain(int domid, int idx)
kfraser@10697 758 {
keir@15979 759 int cpu;
keir@15979 760 _new_qos_data *saved_qos;
kfraser@10697 761
keir@15979 762 saved_qos = new_qos;
kfraser@10697 763
keir@15979 764 for (cpu=0; cpu<NCPU; cpu++) {
keir@15979 765 new_qos = cpu_qos_data[cpu];
keir@15979 766 qos_init_domain(domid, idx);
keir@15979 767 }
keir@15979 768 new_qos = saved_qos;
kfraser@10697 769 }
kfraser@10697 770
kfraser@10697 771 // give index of this domain in the qos data array
keir@18091 772 static int indexof(int domid)
kfraser@10697 773 {
keir@15979 774 int idx;
keir@15979 775 xc_dominfo_t dominfo[NDOMAINS];
keir@21529 776 xc_interface *xc_handle;
keir@21529 777 int ndomains;
kfraser@10697 778
keir@15979 779 if (domid < 0) { // shouldn't happen
keir@15979 780 printf("bad domain id: %d\r\n", domid);
keir@15979 781 return 0;
kfraser@10697 782 }
kfraser@10697 783
keir@15979 784 for (idx=0; idx<NDOMAINS; idx++)
keir@15979 785 if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
keir@15979 786 return idx;
keir@15979 787
keir@15979 788 // not found, make a new entry
keir@15979 789 for (idx=0; idx<NDOMAINS; idx++)
keir@15979 790 if (new_qos->domain_info[idx].in_use == 0) {
keir@15979 791 global_init_domain(domid, idx);
keir@15979 792 return idx;
keir@15979 793 }
keir@15979 794
keir@15979 795 // call domaininfo hypercall to try and garbage collect unused entries
keir@21529 796 xc_handle = xc_interface_open(0,0,0);
keir@15979 797 ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
keir@15979 798 xc_interface_close(xc_handle);
kfraser@10697 799
keir@15979 800 // for each domain in our data, look for it in the system dominfo structure
keir@15979 801 // and purge the domain's data from our state if it does not exist in the
keir@15979 802 // dominfo structure
keir@15979 803 for (idx=0; idx<NDOMAINS; idx++) {
keir@15979 804 int domid = new_qos->domain_info[idx].id;
keir@15979 805 int jdx;
kfraser@10697 806
keir@15979 807 for (jdx=0; jdx<ndomains; jdx++) {
keir@15979 808 if (dominfo[jdx].domid == domid)
keir@15979 809 break;
keir@15979 810 }
keir@15979 811 if (jdx == ndomains) // we didn't find domid in the dominfo struct
keir@15979 812 if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
keir@15979 813 // contained in dominfo
keir@15979 814 qos_kill_thread(domid); // purge our stale data
kfraser@10697 815 }
kfraser@10697 816
keir@15979 817 // look again for a free slot
keir@15979 818 for (idx=0; idx<NDOMAINS; idx++)
keir@15979 819 if (new_qos->domain_info[idx].in_use == 0) {
keir@15979 820 global_init_domain(domid, idx);
keir@15979 821 return idx;
keir@15979 822 }
kfraser@10697 823
keir@15979 824 // still no space found, so bail
keir@15979 825 fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
keir@15979 826 exit(2);
kfraser@10697 827 }
kfraser@10697 828
keir@18091 829 static int domain_runnable(int domid)
kaf24@7840 830 {
kfraser@10697 831 return new_qos->domain_info[indexof(domid)].runnable;
kaf24@7840 832 }
kaf24@7840 833
kaf24@7840 834
keir@18091 835 static void update_blocked_time(int domid, uint64_t now)
kaf24@7840 836 {
kaf24@7840 837 uint64_t t_blocked;
kfraser@10697 838 int id = indexof(domid);
kaf24@7840 839
kaf24@7840 840 if (new_qos->domain_info[id].blocked_start_time != 0) {
kaf24@7840 841 if (now >= new_qos->domain_info[id].blocked_start_time)
kaf24@7840 842 t_blocked = now - new_qos->domain_info[id].blocked_start_time;
kaf24@7840 843 else
kaf24@7840 844 t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
kaf24@7840 845 new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
kaf24@7840 846 }
kaf24@7840 847
kfraser@10697 848 if (domain_runnable(domid))
kaf24@7840 849 new_qos->domain_info[id].blocked_start_time = 0;
kaf24@7840 850 else
kaf24@7840 851 new_qos->domain_info[id].blocked_start_time = now;
kaf24@7840 852 }
kaf24@7840 853
kaf24@7840 854
kaf24@7840 855 // advance to next datapoint for all domains
keir@18091 856 static void advance_next_datapoint(uint64_t now)
kaf24@7840 857 {
kaf24@7840 858 int new, old, didx;
kaf24@7840 859
kaf24@7840 860 old = new_qos->next_datapoint;
kaf24@7840 861 new = QOS_INCR(old);
kaf24@7840 862 new_qos->next_datapoint = new;
kaf24@7840 863 // memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
kaf24@7840 864 for (didx = 0; didx < NDOMAINS; didx++) {
kaf24@7840 865 new_qos->qdata[new].ns_gotten[didx] = 0;
kaf24@7840 866 new_qos->qdata[new].ns_allocated[didx] = 0;
kaf24@7840 867 new_qos->qdata[new].ns_waiting[didx] = 0;
kaf24@7840 868 new_qos->qdata[new].ns_blocked[didx] = 0;
kaf24@7840 869 new_qos->qdata[new].switchin_count[didx] = 0;
kaf24@7840 870 new_qos->qdata[new].io_count[didx] = 0;
kaf24@7840 871 }
kaf24@7840 872 new_qos->qdata[new].ns_passed = 0;
kaf24@7840 873 new_qos->qdata[new].lost_records = 0;
kaf24@7840 874 new_qos->qdata[new].flip_free_periods = 0;
kaf24@7840 875
kaf24@7840 876 new_qos->qdata[new].timestamp = now;
kaf24@7840 877 }
kaf24@7840 878
kaf24@7840 879
kaf24@7840 880
keir@18091 881 static void qos_update_thread(int cpu, int domid, uint64_t now)
kaf24@7840 882 {
kaf24@7840 883 int n, id;
kaf24@7840 884 uint64_t last_update_time, start;
kaf24@7840 885 int64_t time_since_update, run_time = 0;
kaf24@7840 886
kfraser@10697 887 id = indexof(domid);
kaf24@7840 888
kaf24@7840 889 n = new_qos->next_datapoint;
kaf24@7840 890 last_update_time = new_qos->domain_info[id].last_update_time;
kaf24@7840 891
kaf24@7840 892 time_since_update = now - last_update_time;
kaf24@7840 893
kaf24@7840 894 if (time_since_update < 0) {
keir@15979 895 // what happened here? either a timestamp wraparound, or more likely,
keir@15979 896 // a slight inconsistency among timestamps from various cpu's
keir@15979 897 if (-time_since_update < billion) {
keir@15979 898 // fairly small difference, let's just adjust 'now' to be a little
keir@15979 899 // beyond last_update_time
keir@15979 900 time_since_update = -time_since_update;
keir@15979 901 }
keir@15979 902 else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
keir@15979 903 // difference is huge, must be a wraparound
keir@15979 904 // last_update time should be "near" ~0ULL,
keir@15979 905 // and now should be "near" 0
keir@15979 906 time_since_update = now + (~0ULL - last_update_time);
keir@15979 907 printf("time wraparound\n");
keir@15979 908 }
keir@15979 909 else {
keir@15979 910 // none of the above, may be an out of order record
keir@15979 911 // no good solution, just ignore and update again later
keir@15979 912 return;
keir@15979 913 }
kaf24@7840 914 }
kaf24@7840 915
kaf24@7840 916 new_qos->domain_info[id].last_update_time = now;
kaf24@7840 917
kaf24@7840 918 if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
kaf24@7840 919 start = new_qos->domain_info[id].start_time;
kaf24@7840 920 if (start > now) { // wrapped around
kaf24@7840 921 run_time = now + (~0ULL - start);
kaf24@9685 922 // this could happen if there is nothing going on within a cpu;
kaf24@9685 923 // in this case the idle domain would run forever
kaf24@9685 924 // printf("warning: start > now\n");
kaf24@7840 925 }
kaf24@7840 926 else
kaf24@7840 927 run_time = now - start;
kaf24@7840 928 // if (run_time < 0) // should not happen
kaf24@7840 929 // printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
kaf24@7840 930 new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
kaf24@7840 931 new_qos->domain_info[id].start_time = now;
kaf24@7840 932 new_qos->domain_info[id].ns_since_boot += time_since_update;
kaf24@9685 933
kaf24@7840 934 new_qos->qdata[n].ns_gotten[id] += run_time;
kaf24@9685 935 // if (domid == 0 && cpu == 1)
kaf24@9685 936 // printf("adding run time for dom0 on cpu1\r\n");
kaf24@9685 937
kaf24@7840 938 }
kaf24@7840 939
kaf24@7840 940 new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
kaf24@7840 941
kaf24@7840 942 update_blocked_time(domid, now);
kaf24@7840 943
kaf24@7840 944 // how much time passed since this datapoint was updated?
kaf24@7840 945 if (now >= new_qos->qdata[n].timestamp) {
kaf24@7840 946 // all is right with the world, time is increasing
kaf24@7840 947 new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
kaf24@7840 948 }
kaf24@7840 949 else {
kaf24@7840 950 // time wrapped around
kaf24@7840 951 //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
kaf24@7840 952 // printf("why timewrap?\r\n");
kaf24@7840 953 }
kaf24@7840 954 new_qos->qdata[n].timestamp = now;
kaf24@7840 955 }
kaf24@7840 956
kaf24@7840 957
kaf24@7840 958 // called by dump routines to update all structures
keir@18091 959 static void qos_update_all(uint64_t now, int cpu)
kaf24@7840 960 {
kaf24@7840 961 int i;
kaf24@7840 962
kaf24@7840 963 for (i=0; i<NDOMAINS; i++)
kaf24@7840 964 if (new_qos->domain_info[i].in_use)
kfraser@10697 965 qos_update_thread(cpu, new_qos->domain_info[i].id, now);
kaf24@7840 966 }
kaf24@7840 967
kaf24@7840 968
keir@18091 969 static void qos_update_thread_stats(int cpu, int domid, uint64_t now)
kaf24@7840 970 {
kaf24@7840 971 if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
kaf24@7840 972 qos_update_all(now, cpu);
kaf24@7840 973 advance_next_datapoint(now);
kaf24@7840 974 return;
kaf24@7840 975 }
kaf24@7840 976 qos_update_thread(cpu, domid, now);
kaf24@7840 977 }
kaf24@7840 978
kaf24@7840 979
kaf24@7840 980
kaf24@7840 981 // called when a new thread gets the cpu
keir@18091 982 static void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
kaf24@7840 983 {
kfraser@10697 984 int idx = indexof(domid);
kaf24@7840 985
kfraser@10697 986 new_qos->domain_info[idx].runnable = 1;
kaf24@7840 987 update_blocked_time(domid, now);
kfraser@10697 988 new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
kfraser@10697 989 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
kfraser@10697 990 //runnable_start_time[idx] = 0;
kaf24@7840 991
kfraser@10697 992 new_qos->domain_info[idx].start_time = now;
kfraser@10697 993 new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
kfraser@10697 994 new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
kfraser@10697 995 new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
kaf24@7840 996 qos_update_thread_stats(cpu, domid, now);
kfraser@10697 997 set_current(cpu, domid);
kaf24@7840 998
kaf24@7840 999 // count up page flips for dom0 execution
kfraser@10697 1000 if (domid == 0)
keir@15979 1001 dom0_flips = 0;
kaf24@7840 1002 }
kaf24@7840 1003
kaf24@7840 1004 // called when the current thread is taken off the cpu
keir@18091 1005 static void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
kaf24@7840 1006 {
kfraser@10697 1007 int idx = indexof(domid);
kaf24@7840 1008 int n;
kaf24@7840 1009
kfraser@10697 1010 if (!is_current(domid, cpu)) {
kaf24@7840 1011 // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
kaf24@7840 1012 }
kaf24@7840 1013
kaf24@7840 1014 if (gotten == 0) {
kaf24@7840 1015 printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
kaf24@7840 1016 }
kaf24@7840 1017
kaf24@7840 1018 if (gotten < 100) {
kaf24@7840 1019 printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
kaf24@7840 1020 }
kaf24@7840 1021
kaf24@7840 1022
kaf24@7840 1023 n = new_qos->next_datapoint;
kaf24@7840 1024 #if 0
kfraser@10697 1025 new_qos->qdata[n].ns_gotten[idx] += gotten;
kaf24@7840 1026 if (gotten > new_qos->qdata[n].ns_passed)
keir@15979 1027 printf("inconsistency #257, diff = %lld\n",
keir@15979 1028 gotten - new_qos->qdata[n].ns_passed );
kaf24@7840 1029 #endif
kfraser@10697 1030 new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
kfraser@10697 1031 new_qos->domain_info[idx].runnable_start_time = now;
kaf24@7840 1032 // runnable_start_time[id] = now;
kfraser@10697 1033 qos_update_thread_stats(cpu, domid, now);
kaf24@7840 1034
kaf24@7840 1035 // process dom0 page flips
kfraser@10697 1036 if (domid == 0)
keir@15979 1037 if (dom0_flips == 0)
keir@15979 1038 new_qos->qdata[n].flip_free_periods++;
kaf24@7840 1039 }
kaf24@7840 1040
kaf24@7840 1041 // called when domain is put to sleep, may also be called
kaf24@7840 1042 // when thread is already asleep
keir@18091 1043 static void qos_state_sleeping(int cpu, int domid, uint64_t now)
kaf24@7840 1044 {
kfraser@10697 1045 int idx;
kaf24@7840 1046
kfraser@10697 1047 if (!domain_runnable(domid)) // double call?
kaf24@7840 1048 return;
kaf24@7840 1049
kfraser@10697 1050 idx = indexof(domid);
kfraser@10697 1051 new_qos->domain_info[idx].runnable = 0;
kfraser@10697 1052 new_qos->domain_info[idx].blocked_start_time = now;
kfraser@10697 1053 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
kfraser@10697 1054 // runnable_start_time[idx] = 0; // invalidate
kaf24@7840 1055 qos_update_thread_stats(cpu, domid, now);
kaf24@7840 1056 }
kaf24@7840 1057
kaf24@7840 1058
kaf24@7840 1059
kfraser@10697 1060 // domain died, presume it's dead on all cpu's, not just mostly dead
keir@18091 1061 static void qos_kill_thread(int domid)
kaf24@7840 1062 {
keir@15979 1063 int cpu;
kfraser@10697 1064
keir@15979 1065 for (cpu=0; cpu<NCPU; cpu++) {
keir@15979 1066 cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
keir@15979 1067 }
kfraser@10697 1068
kaf24@7840 1069 }
kaf24@7840 1070
kaf24@7840 1071
kaf24@7840 1072 // called when thread becomes runnable, may also be called
kaf24@7840 1073 // when thread is already runnable
keir@18091 1074 static void qos_state_runnable(int cpu, int domid, uint64_t now)
kaf24@7840 1075 {
keir@15979 1076 int idx;
kfraser@10697 1077
kaf24@7840 1078
kaf24@9685 1079 qos_update_thread_stats(cpu, domid, now);
kaf24@9685 1080
kfraser@10697 1081 if (domain_runnable(domid)) // double call?
kaf24@7840 1082 return;
kfraser@10697 1083
kfraser@10697 1084 idx = indexof(domid);
kfraser@10697 1085 new_qos->domain_info[idx].runnable = 1;
kaf24@7840 1086 update_blocked_time(domid, now);
kaf24@7840 1087
kfraser@10697 1088 new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
kfraser@10697 1089 new_qos->domain_info[idx].runnable_start_time = now;
kaf24@7840 1090 // runnable_start_time[id] = now;
kaf24@7840 1091 }
kaf24@7840 1092
kaf24@7840 1093
keir@18091 1094 static void qos_count_packets(domid_t domid, uint64_t now)
kaf24@7840 1095 {
keir@15979 1096 int i, idx = indexof(domid);
keir@15979 1097 _new_qos_data *cpu_data;
kaf24@7840 1098
keir@15979 1099 for (i=0; i<NCPU; i++) {
keir@15979 1100 cpu_data = cpu_qos_data[i];
keir@15979 1101 if (cpu_data->domain_info[idx].in_use) {
keir@15979 1102 cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
keir@15979 1103 }
kaf24@7840 1104 }
kaf24@7840 1105
keir@15979 1106 new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
keir@15979 1107 dom0_flips++;
kaf24@7840 1108 }
kaf24@7840 1109
kaf24@7840 1110
keir@18091 1111 static int process_record(int cpu, struct t_rec *r)
kaf24@7840 1112 {
keir@15979 1113 uint64_t now = 0;
keir@15979 1114 uint32_t *extra_u32 = r->u.nocycles.extra_u32;
kaf24@7840 1115
keir@15979 1116 new_qos = cpu_qos_data[cpu];
kaf24@7840 1117
keir@15979 1118 rec_count++;
kfraser@10697 1119
keir@15979 1120 if ( r->cycles_included )
keir@15979 1121 {
keir@15979 1122 now = ((uint64_t)r->u.cycles.cycles_hi << 32) | r->u.cycles.cycles_lo;
keir@15979 1123 now = ((double)now) / (opts.cpu_freq / 1000.0);
keir@15979 1124 extra_u32 = r->u.cycles.extra_u32;
keir@15979 1125 }
kaf24@7840 1126
keir@15979 1127 global_now = now;
keir@15979 1128 global_cpu = cpu;
kaf24@7840 1129
keir@15979 1130 log_event(r->event);
keir@15979 1131
keir@15979 1132 switch (r->event) {
keir@15979 1133
keir@15979 1134 case TRC_SCHED_SWITCH_INFPREV:
keir@15979 1135 // domain data[0] just switched out and received data[1] ns of cpu time
keir@15979 1136 qos_switch_out(cpu, extra_u32[0], now, extra_u32[1]);
keir@15979 1137 // printf("ns_gotten %ld\n", extra_u32[1]);
keir@15979 1138 break;
kaf24@7840 1139
keir@15979 1140 case TRC_SCHED_SWITCH_INFNEXT:
keir@15979 1141 // domain data[0] just switched in and
keir@15979 1142 // waited data[1] ns, and was allocated data[2] ns of cpu time
keir@15979 1143 qos_switch_in(cpu, extra_u32[0], now, extra_u32[2], extra_u32[1]);
keir@15979 1144 break;
kaf24@7840 1145
keir@15979 1146 case TRC_SCHED_DOM_ADD:
keir@15979 1147 (void) indexof(extra_u32[0]);
keir@15979 1148 break;
kaf24@7840 1149
keir@15979 1150 case TRC_SCHED_DOM_REM:
keir@15979 1151 qos_kill_thread(extra_u32[0]);
keir@15979 1152 break;
keir@15979 1153
keir@15979 1154 case TRC_SCHED_SLEEP:
keir@15979 1155 qos_state_sleeping(cpu, extra_u32[0], now);
keir@15979 1156 break;
kaf24@7840 1157
keir@15979 1158 case TRC_SCHED_WAKE:
keir@15979 1159 qos_state_runnable(cpu, extra_u32[0], now);
keir@15979 1160 break;
kaf24@7840 1161
keir@15979 1162 case TRC_SCHED_BLOCK:
keir@15979 1163 qos_state_sleeping(cpu, extra_u32[0], now);
keir@15979 1164 break;
kaf24@7840 1165
keir@15979 1166 case TRC_MEM_PAGE_GRANT_TRANSFER:
keir@15979 1167 qos_count_packets(extra_u32[0], now);
keir@15979 1168 break;
keir@15979 1169
keir@15979 1170 default:
keir@15979 1171 break;
keir@15979 1172 }
keir@15979 1173
keir@15979 1174 new_qos = NULL;
keir@15979 1175
keir@15979 1176 return 4 + (r->cycles_included ? 8 : 0) + (r->extra_u32 * 4);
kaf24@7840 1177 }