debuggers.hg

changeset 20873:fad80160c001

xentrace: Per-cpu xentrace buffers

In the current xentrace configuration, xentrace buffers are all
allocated in a single contiguous chunk, and then divided among logical
cpus, one buffer per cpu. The size of an allocatable chunk is fairly
limited, in my experience about 128 pages (512KiB). As the number of
logical cores increase, this means a much smaller maximum per-cpu
trace buffer per cpu; on my dual-socket quad-core nehalem box with
hyperthreading (16 logical cpus), that comes to 8 pages per logical
cpu.

This patch addresses this issue by allocating per-cpu buffers
separately.

Signed-off-by: George Dunlap <dunlapg@umich.edu>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Jan 20 20:33:35 2010 +0000 (2010-01-20)
parents b0b41e735575
children 0edb75cd8126
files tools/xentrace/xentrace.c xen/common/trace.c xen/include/public/sysctl.h xen/include/public/trace.h
line diff
     1.1 --- a/tools/xentrace/xentrace.c	Wed Jan 20 09:51:38 2010 +0000
     1.2 +++ b/tools/xentrace/xentrace.c	Wed Jan 20 20:33:35 2010 +0000
     1.3 @@ -61,6 +61,12 @@ typedef struct settings_st {
     1.4          disable_tracing:1;
     1.5  } settings_t;
     1.6  
     1.7 +struct t_struct {
     1.8 +    struct t_info *t_info;  /* Structure with information about individual buffers */
     1.9 +    struct t_buf **meta;    /* Pointers to trace buffer metadata */
    1.10 +    unsigned char **data;   /* Pointers to trace buffer data areas */
    1.11 +};
    1.12 +
    1.13  settings_t opts;
    1.14  
    1.15  int interrupted = 0; /* gets set if we get a SIGHUP */
    1.16 @@ -446,22 +452,61 @@ static void get_tbufs(unsigned long *mfn
    1.17   *
    1.18   * Maps the Xen trace buffers them into process address space.
    1.19   */
    1.20 -static struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
    1.21 -                        unsigned long size)
    1.22 +static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
    1.23 +                                  unsigned long tinfo_size)
    1.24  {
    1.25 -    struct t_buf *tbufs_mapped;
    1.26 +    static struct t_struct tbufs = { 0 };
    1.27 +    int i;
    1.28  
    1.29 -    tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN,
    1.30 -                                        size * num, PROT_READ | PROT_WRITE,
    1.31 +    /* Map t_info metadata structure */
    1.32 +    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
    1.33 +                                        tinfo_size, PROT_READ | PROT_WRITE,
    1.34                                          tbufs_mfn);
    1.35  
    1.36 -    if ( tbufs_mapped == 0 ) 
    1.37 +    if ( tbufs.t_info == 0 ) 
    1.38      {
    1.39          PERROR("Failed to mmap trace buffers");
    1.40          exit(EXIT_FAILURE);
    1.41      }
    1.42  
    1.43 -    return tbufs_mapped;
    1.44 +    if ( tbufs.t_info->tbuf_size == 0 )
    1.45 +    {
    1.46 +        fprintf(stderr, "%s: tbuf_size 0!\n", __func__);
    1.47 +        exit(EXIT_FAILURE);
    1.48 +    }
    1.49 +
    1.50 +    /* Map per-cpu buffers */
    1.51 +    tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
    1.52 +    tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *));
    1.53 +    if ( tbufs.meta == NULL || tbufs.data == NULL )
    1.54 +    {
    1.55 +        PERROR( "Failed to allocate memory for buffer pointers\n");
    1.56 +        exit(EXIT_FAILURE);
    1.57 +    }
    1.58 +
    1.59 +    for(i=0; i<num; i++)
    1.60 +    {
    1.61 +        
    1.62 +        uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
    1.63 +        int j;
    1.64 +        xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
    1.65 +
    1.66 +        for ( j=0; j<tbufs.t_info->tbuf_size; j++)
    1.67 +            pfn_list[j] = (xen_pfn_t)mfn_list[j];
    1.68 +
    1.69 +        tbufs.meta[i] = xc_map_foreign_batch(xc_handle, DOMID_XEN,
    1.70 +                                             PROT_READ | PROT_WRITE,
    1.71 +                                             pfn_list,
    1.72 +                                             tbufs.t_info->tbuf_size);
    1.73 +        if ( tbufs.meta[i] == NULL )
    1.74 +        {
    1.75 +            PERROR("Failed to map cpu buffer!");
    1.76 +            exit(EXIT_FAILURE);
    1.77 +        }
    1.78 +        tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1);
    1.79 +    }
    1.80 +
    1.81 +    return &tbufs;
    1.82  }
    1.83  
    1.84  /**
    1.85 @@ -490,66 +535,6 @@ static void set_mask(uint32_t mask, int 
    1.86  }
    1.87  
    1.88  /**
    1.89 - * init_bufs_ptrs - initialises an array of pointers to the trace buffers
    1.90 - * @bufs_mapped:    the userspace address where the trace buffers are mapped
    1.91 - * @num:            number of trace buffers
    1.92 - * @size:           trace buffer size
    1.93 - *
    1.94 - * Initialises an array of pointers to individual trace buffers within the
    1.95 - * mapped region containing all trace buffers.
    1.96 - */
    1.97 -static struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
    1.98 -                              unsigned long size)
    1.99 -{
   1.100 -    int i;
   1.101 -    struct t_buf **user_ptrs;
   1.102 -
   1.103 -    user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
   1.104 -    if ( user_ptrs == NULL )
   1.105 -    {
   1.106 -        PERROR( "Failed to allocate memory for buffer pointers\n");
   1.107 -        exit(EXIT_FAILURE);
   1.108 -    }
   1.109 -    
   1.110 -    /* initialise pointers to the trace buffers - given the size of a trace
   1.111 -     * buffer and the value of bufs_maped, we can easily calculate these */
   1.112 -    for ( i = 0; i<num; i++ )
   1.113 -        user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
   1.114 -
   1.115 -    return user_ptrs;
   1.116 -}
   1.117 -
   1.118 -
   1.119 -/**
   1.120 - * init_rec_ptrs - initialises data area pointers to locations in user space
   1.121 - * @tbufs_mfn:     base mfn of the trace buffer area
   1.122 - * @tbufs_mapped:  user virtual address of base of trace buffer area
   1.123 - * @meta:          array of user-space pointers to struct t_buf's of metadata
   1.124 - * @num:           number of trace buffers
   1.125 - *
   1.126 - * Initialises data area pointers to the locations that data areas have been
   1.127 - * mapped in user space.  Note that the trace buffer metadata contains machine
   1.128 - * pointers - the array returned allows more convenient access to them.
   1.129 - */
   1.130 -static unsigned char **init_rec_ptrs(struct t_buf **meta, unsigned int num)
   1.131 -{
   1.132 -    int i;
   1.133 -    unsigned char **data;
   1.134 -    
   1.135 -    data = calloc(num, sizeof(unsigned char *));
   1.136 -    if ( data == NULL )
   1.137 -    {
   1.138 -        PERROR("Failed to allocate memory for data pointers\n");
   1.139 -        exit(EXIT_FAILURE);
   1.140 -    }
   1.141 -
   1.142 -    for ( i = 0; i < num; i++ )
   1.143 -        data[i] = (unsigned char *)(meta[i] + 1);
   1.144 -
   1.145 -    return data;
   1.146 -}
   1.147 -
   1.148 -/**
   1.149   * get_num_cpus - get the number of logical CPUs
   1.150   */
   1.151  static unsigned int get_num_cpus(void)
   1.152 @@ -638,12 +623,13 @@ static int monitor_tbufs(void)
   1.153  {
   1.154      int i;
   1.155  
   1.156 -    void *tbufs_mapped;          /* pointer to where the tbufs are mapped    */
   1.157 +    struct t_struct *tbufs;      /* Pointer to hypervisor maps */
   1.158      struct t_buf **meta;         /* pointers to the trace buffer metadata    */
   1.159      unsigned char **data;        /* pointers to the trace buffer data areas
   1.160                                    * where they are mapped into user space.   */
   1.161      unsigned long tbufs_mfn;     /* mfn of the tbufs                         */
   1.162      unsigned int  num;           /* number of trace buffers / logical CPUS   */
   1.163 +    unsigned long tinfo_size;    /* size of t_info metadata map */
   1.164      unsigned long size;          /* size of a single trace buffer            */
   1.165  
   1.166      unsigned long data_size;
   1.167 @@ -655,14 +641,15 @@ static int monitor_tbufs(void)
   1.168      num = get_num_cpus();
   1.169  
   1.170      /* setup access to trace buffers */
   1.171 -    get_tbufs(&tbufs_mfn, &size);
   1.172 -    tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
   1.173 +    get_tbufs(&tbufs_mfn, &tinfo_size);
   1.174 +    tbufs = map_tbufs(tbufs_mfn, num, tinfo_size);
   1.175 +
   1.176 +    size = tbufs->t_info->tbuf_size * PAGE_SIZE;
   1.177  
   1.178      data_size = size - sizeof(struct t_buf);
   1.179  
   1.180 -    /* build arrays of convenience ptrs */
   1.181 -    meta  = init_bufs_ptrs(tbufs_mapped, num, size);
   1.182 -    data  = init_rec_ptrs(meta, num);
   1.183 +    meta = tbufs->meta;
   1.184 +    data = tbufs->data;
   1.185  
   1.186      if ( opts.discard )
   1.187          for ( i = 0; i < num; i++ )
     2.1 --- a/xen/common/trace.c	Wed Jan 20 09:51:38 2010 +0000
     2.2 +++ b/xen/common/trace.c	Wed Jan 20 20:33:35 2010 +0000
     2.3 @@ -46,8 +46,11 @@ static unsigned int opt_tbuf_size = 0;
     2.4  integer_param("tbuf_size", opt_tbuf_size);
     2.5  
     2.6  /* Pointers to the meta-data objects for all system trace buffers */
     2.7 +static struct t_info *t_info;
     2.8 +#define T_INFO_PAGES 2  /* Size fixed at 2 pages for now. */
     2.9  static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
    2.10  static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
    2.11 +static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
    2.12  static int data_size;
    2.13  
    2.14  /* High water mark for trace buffers; */
    2.15 @@ -80,41 +83,104 @@ static u32 tb_event_mask = TRC_ALL;
    2.16   */
    2.17  static int alloc_trace_bufs(void)
    2.18  {
    2.19 -    int           i, order;
    2.20 +    int           i, cpu, order;
    2.21      unsigned long nr_pages;
    2.22 -    char         *rawbuf;
    2.23 -    struct t_buf *buf;
    2.24 +    /* Start after a fixed-size array of NR_CPUS */
    2.25 +    uint32_t *t_info_mfn_list = (uint32_t *)t_info;
    2.26 +    int offset = (NR_CPUS * 2 + 1 + 1) / 4;
    2.27  
    2.28      if ( opt_tbuf_size == 0 )
    2.29          return -EINVAL;
    2.30  
    2.31 -    nr_pages = num_online_cpus() * opt_tbuf_size;
    2.32 -    order    = get_order_from_pages(nr_pages);
    2.33 -    data_size  = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
    2.34 -    
    2.35 -    if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
    2.36 +    if ( !t_info )
    2.37      {
    2.38 -        printk("Xen trace buffers: memory allocation failed\n");
    2.39 -        opt_tbuf_size = 0;
    2.40 +        printk("%s: t_info not allocated, cannot allocate trace buffers!\n",
    2.41 +               __func__);
    2.42          return -EINVAL;
    2.43      }
    2.44  
    2.45 -    /* Share pages so that xentrace can map them. */
    2.46 -    for ( i = 0; i < nr_pages; i++ )
    2.47 -        share_xen_page_with_privileged_guests(
    2.48 -            virt_to_page(rawbuf) + i, XENSHARE_writable);
    2.49 +    t_info->tbuf_size = opt_tbuf_size;
    2.50 +    printk("tbuf_size %d\n", t_info->tbuf_size);
    2.51 +
    2.52 +    nr_pages = opt_tbuf_size;
    2.53 +    order = get_order_from_pages(nr_pages);
    2.54 +
    2.55 +    /*
    2.56 +     * First, allocate buffers for all of the cpus.  If any
    2.57 +     * fails, deallocate what you have so far and exit. 
    2.58 +     */
    2.59 +    for_each_online_cpu(cpu)
    2.60 +    {
    2.61 +        int flags;
    2.62 +        char         *rawbuf;
    2.63 +        struct t_buf *buf;
    2.64  
    2.65 -    for_each_online_cpu ( i )
    2.66 -    {
    2.67 -        buf = per_cpu(t_bufs, i) = (struct t_buf *)
    2.68 -            &rawbuf[i*opt_tbuf_size*PAGE_SIZE];
    2.69 +        if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
    2.70 +        {
    2.71 +            printk("Xen trace buffers: memory allocation failed\n");
    2.72 +            opt_tbuf_size = 0;
    2.73 +            goto out_dealloc;
    2.74 +        }
    2.75 +
    2.76 +        spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
    2.77 +
    2.78 +        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
    2.79          buf->cons = buf->prod = 0;
    2.80 -        per_cpu(t_data, i) = (unsigned char *)(buf + 1);
    2.81 +        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
    2.82 +
    2.83 +        spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
    2.84 +
    2.85      }
    2.86  
    2.87 +    /*
    2.88 +     * Now share the pages to xentrace can map them, and write them in
    2.89 +     * the global t_info structure.
    2.90 +     */
    2.91 +    for_each_online_cpu(cpu)
    2.92 +    {
    2.93 +        /* Share pages so that xentrace can map them. */
    2.94 +        char         *rawbuf;
    2.95 +
    2.96 +        if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
    2.97 +        {
    2.98 +            struct page_info *p = virt_to_page(rawbuf);
    2.99 +            uint32_t mfn = virt_to_mfn(rawbuf);
   2.100 +
   2.101 +            for ( i = 0; i < nr_pages; i++ )
   2.102 +            {
   2.103 +                share_xen_page_with_privileged_guests(
   2.104 +                    p + i, XENSHARE_writable);
   2.105 +            
   2.106 +                t_info_mfn_list[offset + i]=mfn + i;
   2.107 +            }
   2.108 +            /* Write list first, then write per-cpu offset. */
   2.109 +            wmb();
   2.110 +            t_info->mfn_offset[cpu]=offset;
   2.111 +            printk("p%d mfn %"PRIx32" offset %d\n",
   2.112 +                   cpu, mfn, offset);
   2.113 +            offset+=i;
   2.114 +        }
   2.115 +    }
   2.116 +
   2.117 +    data_size  = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
   2.118      t_buf_highwater = data_size >> 1; /* 50% high water */
   2.119  
   2.120      return 0;
   2.121 +out_dealloc:
   2.122 +    for_each_online_cpu(cpu)
   2.123 +    {
   2.124 +        int flags;
   2.125 +        char * rawbuf;
   2.126 +
   2.127 +        spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
   2.128 +        if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
   2.129 +        {
   2.130 +            ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
   2.131 +            free_xenheap_pages(rawbuf, order);
   2.132 +        }
   2.133 +        spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
   2.134 +    }
   2.135 +    return -EINVAL;
   2.136  }
   2.137  
   2.138  
   2.139 @@ -181,6 +247,26 @@ int trace_will_trace_event(u32 event)
   2.140   */
   2.141  void __init init_trace_bufs(void)
   2.142  {
   2.143 +    int i;
   2.144 +    /* t_info size fixed at 2 pages for now.  That should be big enough / small enough
   2.145 +     * until it's worth making it dynamic. */
   2.146 +    t_info = alloc_xenheap_pages(1, 0);
   2.147 +
   2.148 +    if ( t_info == NULL )
   2.149 +    {
   2.150 +        printk("Xen trace buffers: t_info allocation failed!  Tracing disabled.\n");
   2.151 +        return;
   2.152 +    }
   2.153 +
   2.154 +    for(i = 0; i < NR_CPUS; i++)
   2.155 +        spin_lock_init(&per_cpu(t_lock, i));
   2.156 +
   2.157 +    for(i=0; i<T_INFO_PAGES; i++)
   2.158 +        share_xen_page_with_privileged_guests(
   2.159 +            virt_to_page(t_info) + i, XENSHARE_writable);
   2.160 +
   2.161 +
   2.162 +
   2.163      if ( opt_tbuf_size == 0 )
   2.164      {
   2.165          printk("Xen trace buffers: disabled\n");
   2.166 @@ -210,8 +296,8 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
   2.167      {
   2.168      case XEN_SYSCTL_TBUFOP_get_info:
   2.169          tbc->evt_mask   = tb_event_mask;
   2.170 -        tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(per_cpu(t_bufs, 0)) : 0;
   2.171 -        tbc->size       = opt_tbuf_size * PAGE_SIZE;
   2.172 +        tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
   2.173 +        tbc->size = T_INFO_PAGES;
   2.174          break;
   2.175      case XEN_SYSCTL_TBUFOP_set_cpu_mask:
   2.176          xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
   2.177 @@ -220,7 +306,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
   2.178          tb_event_mask = tbc->evt_mask;
   2.179          break;
   2.180      case XEN_SYSCTL_TBUFOP_set_size:
   2.181 -        rc = !tb_init_done ? tb_set_size(tbc->size) : -EINVAL;
   2.182 +        rc = tb_set_size(tbc->size);
   2.183          break;
   2.184      case XEN_SYSCTL_TBUFOP_enable:
   2.185          /* Enable trace buffers. Check buffers are already allocated. */
   2.186 @@ -428,7 +514,7 @@ void __trace_var(u32 event, int cycles, 
   2.187      unsigned long flags, bytes_to_tail, bytes_to_wrap;
   2.188      int rec_size, total_size;
   2.189      int extra_word;
   2.190 -    int started_below_highwater;
   2.191 +    int started_below_highwater = 0;
   2.192  
   2.193      if( !tb_init_done )
   2.194          return;
   2.195 @@ -462,9 +548,12 @@ void __trace_var(u32 event, int cycles, 
   2.196      /* Read tb_init_done /before/ t_bufs. */
   2.197      rmb();
   2.198  
   2.199 +    spin_lock_irqsave(&this_cpu(t_lock), flags);
   2.200 +
   2.201      buf = this_cpu(t_bufs);
   2.202  
   2.203 -    local_irq_save(flags);
   2.204 +    if ( unlikely(!buf) )
   2.205 +        goto unlock;
   2.206  
   2.207      started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
   2.208  
   2.209 @@ -511,8 +600,8 @@ void __trace_var(u32 event, int cycles, 
   2.210      {
   2.211          if ( ++this_cpu(lost_records) == 1 )
   2.212              this_cpu(lost_records_first_tsc)=(u64)get_cycles();
   2.213 -        local_irq_restore(flags);
   2.214 -        return;
   2.215 +        started_below_highwater = 0;
   2.216 +        goto unlock;
   2.217      }
   2.218  
   2.219      /*
   2.220 @@ -541,7 +630,8 @@ void __trace_var(u32 event, int cycles, 
   2.221      /* Write the original record */
   2.222      __insert_record(buf, event, extra, cycles, rec_size, extra_data);
   2.223  
   2.224 -    local_irq_restore(flags);
   2.225 +unlock:
   2.226 +    spin_unlock_irqrestore(&this_cpu(t_lock), flags);
   2.227  
   2.228      /* Notify trace buffer consumer that we've crossed the high water mark. */
   2.229      if ( started_below_highwater &&
     3.1 --- a/xen/include/public/sysctl.h	Wed Jan 20 09:51:38 2010 +0000
     3.2 +++ b/xen/include/public/sysctl.h	Wed Jan 20 20:33:35 2010 +0000
     3.3 @@ -75,7 +75,7 @@ struct xen_sysctl_tbuf_op {
     3.4      uint32_t             evt_mask;
     3.5      /* OUT variables */
     3.6      uint64_aligned_t buffer_mfn;
     3.7 -    uint32_t size;
     3.8 +    uint32_t size;  /* Also an IN variable! */
     3.9  };
    3.10  typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
    3.11  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
     4.1 --- a/xen/include/public/trace.h	Wed Jan 20 09:51:38 2010 +0000
     4.2 +++ b/xen/include/public/trace.h	Wed Jan 20 20:33:35 2010 +0000
     4.3 @@ -195,6 +195,16 @@ struct t_buf {
     4.4      /*  Records follow immediately after the meta-data header.    */
     4.5  };
     4.6  
     4.7 +/* Structure used to pass MFNs to the trace buffers back to trace consumers.
     4.8 + * Offset is an offset into the mapped structure where the mfn list will be held.
     4.9 + * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]).
    4.10 + */
    4.11 +struct t_info {
    4.12 +    uint16_t tbuf_size; /* Size in pages of each trace buffer */
    4.13 +    uint16_t mfn_offset[];  /* Offset within t_info structure of the page list per cpu */
    4.14 +    /* MFN lists immediately after the header */
    4.15 +};
    4.16 +
    4.17  #endif /* __XEN_PUBLIC_TRACE_H__ */
    4.18  
    4.19  /*