debuggers.hg

changeset 19964:3952eaeb70b0

Introduce and use a per-CPU read-mostly sub-section

Since mixing data that only gets setup once and then (perhaps
frequently) gets read by remote CPUs with data that the local CPU may
modify (again, perhaps frequently) still causes undesirable cache
protocol related bus traffic, separate the former class of objects
from the latter.

These objects converted here are just picked based on their write-once
(or write-very-rarely) properties; perhaps some more adjustments may
be desirable subsequently. The primary users of the new sub-section
will result from the next patch.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 11:32:41 2009 +0100 (2009-07-13)
parents ed76e4bbea83
children 2dbabefe62dc
files xen/arch/ia64/xen/vhpt.c xen/arch/ia64/xen/xen.lds.S xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/setup.c xen/arch/x86/traps.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_32/xen.lds.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/xen.lds.S xen/common/kexec.c xen/common/tmem_xen.c xen/common/trace.c xen/include/asm-ia64/linux-xen/asm/percpu.h xen/include/asm-x86/percpu.h xen/include/xen/percpu.h
line diff
     1.1 --- a/xen/arch/ia64/xen/vhpt.c	Mon Jul 13 11:31:34 2009 +0100
     1.2 +++ b/xen/arch/ia64/xen/vhpt.c	Mon Jul 13 11:32:41 2009 +0100
     1.3 @@ -21,8 +21,8 @@
     1.4  #include <asm/vcpumask.h>
     1.5  #include <asm/vmmu.h>
     1.6  
     1.7 -DEFINE_PER_CPU (unsigned long, vhpt_paddr);
     1.8 -DEFINE_PER_CPU (unsigned long, vhpt_pend);
     1.9 +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, vhpt_paddr);
    1.10 +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, vhpt_pend);
    1.11  #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
    1.12  DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
    1.13  #endif
     2.1 --- a/xen/arch/ia64/xen/xen.lds.S	Mon Jul 13 11:31:34 2009 +0100
     2.2 +++ b/xen/arch/ia64/xen/xen.lds.S	Mon Jul 13 11:32:41 2009 +0100
     2.3 @@ -187,6 +187,8 @@ SECTIONS
     2.4  	{
     2.5  		__per_cpu_start = .;
     2.6  		*(.data.percpu)
     2.7 +		. = ALIGN(SMP_CACHE_BYTES);
     2.8 +		*(.data.percpu.read_mostly)
     2.9  		__per_cpu_end = .;
    2.10  	}
    2.11    . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;	/* ensure percpu data fits
     3.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Mon Jul 13 11:31:34 2009 +0100
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Mon Jul 13 11:32:41 2009 +0100
     3.3 @@ -66,7 +66,7 @@ u32 vmx_vmexit_control __read_mostly;
     3.4  u32 vmx_vmentry_control __read_mostly;
     3.5  bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
     3.6  
     3.7 -static DEFINE_PER_CPU(struct vmcs_struct *, host_vmcs);
     3.8 +static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, host_vmcs);
     3.9  static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
    3.10  static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
    3.11  
     4.1 --- a/xen/arch/x86/setup.c	Mon Jul 13 11:31:34 2009 +0100
     4.2 +++ b/xen/arch/x86/setup.c	Mon Jul 13 11:32:41 2009 +0100
     4.3 @@ -111,9 +111,9 @@ extern void vesa_init(void);
     4.4  extern void vesa_mtrr_init(void);
     4.5  extern void init_tmem(void);
     4.6  
     4.7 -DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
     4.8 +DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
     4.9  #ifdef CONFIG_COMPAT
    4.10 -DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
    4.11 +DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table)
    4.12      = boot_cpu_compat_gdt_table;
    4.13  #endif
    4.14  
     5.1 --- a/xen/arch/x86/traps.c	Mon Jul 13 11:31:34 2009 +0100
     5.2 +++ b/xen/arch/x86/traps.c	Mon Jul 13 11:32:41 2009 +0100
     5.3 @@ -78,7 +78,7 @@ char opt_nmi[10] = "fatal";
     5.4  #endif
     5.5  string_param("nmi", opt_nmi);
     5.6  
     5.7 -DEFINE_PER_CPU(u32, ler_msr);
     5.8 +DEFINE_PER_CPU_READ_MOSTLY(u32, ler_msr);
     5.9  
    5.10  /* Master table, used by CPU0. */
    5.11  idt_entry_t idt_table[IDT_ENTRIES];
     6.1 --- a/xen/arch/x86/x86_32/traps.c	Mon Jul 13 11:31:34 2009 +0100
     6.2 +++ b/xen/arch/x86/x86_32/traps.c	Mon Jul 13 11:32:41 2009 +0100
     6.3 @@ -188,7 +188,7 @@ void show_page_walk(unsigned long addr)
     6.4      unmap_domain_page(l1t);
     6.5  }
     6.6  
     6.7 -DEFINE_PER_CPU(struct tss_struct *, doublefault_tss);
     6.8 +DEFINE_PER_CPU_READ_MOSTLY(struct tss_struct *, doublefault_tss);
     6.9  static unsigned char __attribute__ ((__section__ (".bss.page_aligned")))
    6.10      boot_cpu_doublefault_space[PAGE_SIZE];
    6.11  
     7.1 --- a/xen/arch/x86/x86_32/xen.lds.S	Mon Jul 13 11:31:34 2009 +0100
     7.2 +++ b/xen/arch/x86/x86_32/xen.lds.S	Mon Jul 13 11:32:41 2009 +0100
     7.3 @@ -4,6 +4,7 @@
     7.4   */
     7.5  
     7.6  #include <xen/config.h>
     7.7 +#include <xen/cache.h>
     7.8  #include <asm/page.h>
     7.9  #include <asm/percpu.h>
    7.10  #undef ENTRY
    7.11 @@ -69,9 +70,13 @@ SECTIONS
    7.12    __init_end = .;
    7.13  
    7.14    __per_cpu_shift = PERCPU_SHIFT; /* kdump assist */
    7.15 -  __per_cpu_start = .;
    7.16 -  .data.percpu : { *(.data.percpu) } :text
    7.17 -  __per_cpu_data_end = .;
    7.18 +  .data.percpu : {
    7.19 +	__per_cpu_start = .;
    7.20 +	*(.data.percpu)
    7.21 +	. = ALIGN(SMP_CACHE_BYTES);
    7.22 +	*(.data.percpu.read_mostly)
    7.23 +	__per_cpu_data_end = .;
    7.24 +  } :text
    7.25    . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
    7.26    . = ALIGN(PAGE_SIZE);
    7.27    __per_cpu_end = .;
     8.1 --- a/xen/arch/x86/x86_64/mm.c	Mon Jul 13 11:31:34 2009 +0100
     8.2 +++ b/xen/arch/x86/x86_64/mm.c	Mon Jul 13 11:32:41 2009 +0100
     8.3 @@ -37,7 +37,7 @@
     8.4  unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
     8.5  #endif
     8.6  
     8.7 -DEFINE_PER_CPU(void *, compat_arg_xlat);
     8.8 +DEFINE_PER_CPU_READ_MOSTLY(void *, compat_arg_xlat);
     8.9  
    8.10  /* Top-level master (and idle-domain) page directory. */
    8.11  l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
     9.1 --- a/xen/arch/x86/x86_64/xen.lds.S	Mon Jul 13 11:31:34 2009 +0100
     9.2 +++ b/xen/arch/x86/x86_64/xen.lds.S	Mon Jul 13 11:32:41 2009 +0100
     9.3 @@ -2,6 +2,7 @@
     9.4  /* Modified for x86-64 Xen by Keir Fraser */
     9.5  
     9.6  #include <xen/config.h>
     9.7 +#include <xen/cache.h>
     9.8  #include <asm/page.h>
     9.9  #include <asm/percpu.h>
    9.10  #undef ENTRY
    9.11 @@ -67,9 +68,13 @@ SECTIONS
    9.12    __init_end = .;
    9.13  
    9.14    __per_cpu_shift = PERCPU_SHIFT; /* kdump assist */
    9.15 -  __per_cpu_start = .;
    9.16 -  .data.percpu : { *(.data.percpu) } :text
    9.17 -  __per_cpu_data_end = .;
    9.18 +  .data.percpu : {
    9.19 +	__per_cpu_start = .;
    9.20 +	*(.data.percpu)
    9.21 +	. = ALIGN(SMP_CACHE_BYTES);
    9.22 +	*(.data.percpu.read_mostly)
    9.23 +	__per_cpu_data_end = .;
    9.24 +  } :text
    9.25    . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
    9.26    . = ALIGN(PAGE_SIZE);
    9.27    __per_cpu_end = .;
    10.1 --- a/xen/common/kexec.c	Mon Jul 13 11:31:34 2009 +0100
    10.2 +++ b/xen/common/kexec.c	Mon Jul 13 11:32:41 2009 +0100
    10.3 @@ -27,7 +27,7 @@
    10.4  #include <compat/kexec.h>
    10.5  #endif
    10.6  
    10.7 -static DEFINE_PER_CPU(void *, crash_notes);
    10.8 +static DEFINE_PER_CPU_READ_MOSTLY(void *, crash_notes);
    10.9  
   10.10  static Elf_Note *xen_crash_note;
   10.11  
    11.1 --- a/xen/common/tmem_xen.c	Mon Jul 13 11:31:34 2009 +0100
    11.2 +++ b/xen/common/tmem_xen.c	Mon Jul 13 11:32:41 2009 +0100
    11.3 @@ -36,8 +36,8 @@ DECL_CYC_COUNTER(pg_copy);
    11.4   * allocated iff opt_tmem_compress */
    11.5  #define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS
    11.6  #define LZO_DSTMEM_PAGES 2
    11.7 -static DEFINE_PER_CPU(unsigned char *, workmem);
    11.8 -static DEFINE_PER_CPU(unsigned char *, dstmem);
    11.9 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, workmem);
   11.10 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, dstmem);
   11.11  
   11.12  #ifdef COMPARE_COPY_PAGE_SSE2
   11.13  #include <asm/flushtlb.h>  /* REMOVE ME AFTER TEST */
    12.1 --- a/xen/common/trace.c	Mon Jul 13 11:31:34 2009 +0100
    12.2 +++ b/xen/common/trace.c	Mon Jul 13 11:32:41 2009 +0100
    12.3 @@ -46,8 +46,8 @@ static unsigned int opt_tbuf_size = 0;
    12.4  integer_param("tbuf_size", opt_tbuf_size);
    12.5  
    12.6  /* Pointers to the meta-data objects for all system trace buffers */
    12.7 -static DEFINE_PER_CPU(struct t_buf *, t_bufs);
    12.8 -static DEFINE_PER_CPU(unsigned char *, t_data);
    12.9 +static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
   12.10 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
   12.11  static int data_size;
   12.12  
   12.13  /* High water mark for trace buffers; */
    13.1 --- a/xen/include/asm-ia64/linux-xen/asm/percpu.h	Mon Jul 13 11:31:34 2009 +0100
    13.2 +++ b/xen/include/asm-ia64/linux-xen/asm/percpu.h	Mon Jul 13 11:32:41 2009 +0100
    13.3 @@ -26,9 +26,9 @@
    13.4  	extern __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
    13.5  
    13.6  /* Separate out the type, so (int[3], foo) works. */
    13.7 -#define DEFINE_PER_CPU(type, name)				\
    13.8 -	__attribute__((__section__(".data.percpu")))		\
    13.9 -	__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name
   13.10 +#define __DEFINE_PER_CPU(type, name, suffix)				\
   13.11 +	__attribute__((__section__(".data.percpu" #suffix)))		\
   13.12 +	__SMALL_ADDR_AREA __typeof__(type) per_cpu_##name
   13.13  
   13.14  /*
   13.15   * Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
    14.1 --- a/xen/include/asm-x86/percpu.h	Mon Jul 13 11:31:34 2009 +0100
    14.2 +++ b/xen/include/asm-x86/percpu.h	Mon Jul 13 11:32:41 2009 +0100
    14.3 @@ -5,9 +5,9 @@
    14.4  #define PERCPU_SIZE  (1UL << PERCPU_SHIFT)
    14.5  
    14.6  /* Separate out the type, so (int[3], foo) works. */
    14.7 -#define DEFINE_PER_CPU(type, name)                      \
    14.8 -    __attribute__((__section__(".data.percpu")))        \
    14.9 -    __typeof__(type) per_cpu__##name
   14.10 +#define __DEFINE_PER_CPU(type, name, suffix)                    \
   14.11 +    __attribute__((__section__(".data.percpu" #suffix)))        \
   14.12 +    __typeof__(type) per_cpu_##name
   14.13  
   14.14  /* var is in discarded region: offset to particular copy we want */
   14.15  #define per_cpu(var, cpu)  \
    15.1 --- a/xen/include/xen/percpu.h	Mon Jul 13 11:31:34 2009 +0100
    15.2 +++ b/xen/include/xen/percpu.h	Mon Jul 13 11:32:41 2009 +0100
    15.3 @@ -4,6 +4,16 @@
    15.4  #include <xen/config.h>
    15.5  #include <asm/percpu.h>
    15.6  
    15.7 +/*
    15.8 + * Separate out the type, so (int[3], foo) works.
    15.9 + *
   15.10 + * The _##name concatenation is being used here to prevent 'name' from getting
   15.11 + * macro expanded, while still allowing a per-architecture symbol name prefix.
   15.12 + */
   15.13 +#define DEFINE_PER_CPU(type, name) __DEFINE_PER_CPU(type, _##name, )
   15.14 +#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \
   15.15 +	__DEFINE_PER_CPU(type, _##name, .read_mostly)
   15.16 +
   15.17  /* Preferred on Xen. Also see arch-defined per_cpu(). */
   15.18  #define this_cpu(var)    __get_cpu_var(var)
   15.19