debuggers.hg

changeset 19965:2dbabefe62dc

Move cpu_{sibling,core}_map into per-CPU space

These cpu maps get read from all CPUs, so apart from addressing the
square(nr_cpus) growth of these objects, they also get moved into the
previously introduced read-mostly sub-section of the per-CPU section,
in order to not need to waste a full cacheline in order to align (and
properly pad) them, which would be undue overhead on systems with low
NR_CPUS.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 11:45:31 2009 +0100 (2009-07-13)
parents 3952eaeb70b0
children 3f12d48f2880
files xen/arch/ia64/linux-xen/setup.c xen/arch/ia64/linux-xen/smpboot.c xen/arch/ia64/xen/dom0_ops.c xen/arch/x86/oprofile/op_model_p4.c xen/arch/x86/smpboot.c xen/arch/x86/sysctl.c xen/common/domctl.c xen/common/sched_credit.c xen/include/asm-ia64/linux-xen/asm/smp.h xen/include/asm-x86/smp.h
line diff
     1.1 --- a/xen/arch/ia64/linux-xen/setup.c	Mon Jul 13 11:32:41 2009 +0100
     1.2 +++ b/xen/arch/ia64/linux-xen/setup.c	Mon Jul 13 11:45:31 2009 +0100
     1.3 @@ -577,8 +577,8 @@ late_setup_arch (char **cmdline_p)
     1.4  
     1.5  	cpu_physical_id(0) = hard_smp_processor_id();
     1.6  
     1.7 -	cpu_set(0, cpu_sibling_map[0]);
     1.8 -	cpu_set(0, cpu_core_map[0]);
     1.9 +	cpu_set(0, per_cpu(cpu_sibling_map, 0));
    1.10 +	cpu_set(0, per_cpu(cpu_core_map, 0));
    1.11  
    1.12  	check_for_logical_procs();
    1.13  	if (smp_num_cpucores > 1)
     2.1 --- a/xen/arch/ia64/linux-xen/smpboot.c	Mon Jul 13 11:32:41 2009 +0100
     2.2 +++ b/xen/arch/ia64/linux-xen/smpboot.c	Mon Jul 13 11:45:31 2009 +0100
     2.3 @@ -144,8 +144,8 @@ EXPORT_SYMBOL(cpu_online_map);
     2.4  cpumask_t cpu_possible_map;
     2.5  EXPORT_SYMBOL(cpu_possible_map);
     2.6  
     2.7 -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
     2.8 -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
     2.9 +DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
    2.10 +DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
    2.11  int smp_num_siblings = 1;
    2.12  int smp_num_cpucores = 1;
    2.13  
    2.14 @@ -686,13 +686,13 @@ clear_cpu_sibling_map(int cpu)
    2.15  {
    2.16  	int i;
    2.17  
    2.18 -	for_each_cpu_mask(i, cpu_sibling_map[cpu])
    2.19 -		cpu_clear(cpu, cpu_sibling_map[i]);
    2.20 -	for_each_cpu_mask(i, cpu_core_map[cpu])
    2.21 -		cpu_clear(cpu, cpu_core_map[i]);
    2.22 +	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
    2.23 +		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
    2.24 +	for_each_cpu_mask(i, per_cpu(cpu_core_map, cpu))
    2.25 +		cpu_clear(cpu, per_cpu(cpu_core_map, i));
    2.26  
    2.27 -	cpus_clear(cpu_sibling_map[cpu]);
    2.28 -	cpus_clear(cpu_core_map[cpu]);
    2.29 +	cpus_clear(per_cpu(cpu_sibling_map, cpu));
    2.30 +	cpus_clear(per_cpu(cpu_core_map, cpu));
    2.31  }
    2.32  
    2.33  static void
    2.34 @@ -702,12 +702,12 @@ remove_siblinginfo(int cpu)
    2.35  
    2.36  	if (cpu_data(cpu)->threads_per_core == 1 &&
    2.37  	    cpu_data(cpu)->cores_per_socket == 1) {
    2.38 -		cpu_clear(cpu, cpu_core_map[cpu]);
    2.39 -		cpu_clear(cpu, cpu_sibling_map[cpu]);
    2.40 +		cpu_clear(cpu, per_cpu(cpu_core_map, cpu));
    2.41 +		cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
    2.42  		return;
    2.43  	}
    2.44  
    2.45 -	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
    2.46 +	last = (cpus_weight(per_cpu(cpu_core_map, cpu)) == 1);
    2.47  
    2.48  	/* remove it from all sibling map's */
    2.49  	clear_cpu_sibling_map(cpu);
    2.50 @@ -800,11 +800,11 @@ set_cpu_sibling_map(int cpu)
    2.51  
    2.52  	for_each_online_cpu(i) {
    2.53  		if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
    2.54 -			cpu_set(i, cpu_core_map[cpu]);
    2.55 -			cpu_set(cpu, cpu_core_map[i]);
    2.56 +			cpu_set(i, per_cpu(cpu_core_map, cpu));
    2.57 +			cpu_set(cpu, per_cpu(cpu_core_map, i));
    2.58  			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
    2.59 -				cpu_set(i, cpu_sibling_map[cpu]);
    2.60 -				cpu_set(cpu, cpu_sibling_map[i]);
    2.61 +				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
    2.62 +				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
    2.63  			}
    2.64  		}
    2.65  	}
    2.66 @@ -835,8 +835,8 @@ int __devinit
    2.67  
    2.68  	if (cpu_data(cpu)->threads_per_core == 1 &&
    2.69  	    cpu_data(cpu)->cores_per_socket == 1) {
    2.70 -		cpu_set(cpu, cpu_sibling_map[cpu]);
    2.71 -		cpu_set(cpu, cpu_core_map[cpu]);
    2.72 +		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
    2.73 +		cpu_set(cpu, per_cpu(cpu_core_map, cpu));
    2.74  		return 0;
    2.75  	}
    2.76  
     3.1 --- a/xen/arch/ia64/xen/dom0_ops.c	Mon Jul 13 11:32:41 2009 +0100
     3.2 +++ b/xen/arch/ia64/xen/dom0_ops.c	Mon Jul 13 11:45:31 2009 +0100
     3.3 @@ -711,9 +711,9 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
     3.4  
     3.5          memset(pi, 0, sizeof(*pi));
     3.6          pi->cpu_to_node = cpu_to_node_arr;
     3.7 -        pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
     3.8 +        pi->threads_per_core = cpus_weight(per_cpu(cpu_sibling_map, 0));
     3.9          pi->cores_per_socket =
    3.10 -            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
    3.11 +            cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
    3.12          pi->nr_cpus          = (u32)num_online_cpus();
    3.13          pi->nr_nodes         = num_online_nodes();
    3.14          pi->total_pages      = total_pages; 
     4.1 --- a/xen/arch/x86/oprofile/op_model_p4.c	Mon Jul 13 11:32:41 2009 +0100
     4.2 +++ b/xen/arch/x86/oprofile/op_model_p4.c	Mon Jul 13 11:45:31 2009 +0100
     4.3 @@ -385,7 +385,7 @@ static unsigned int get_stagger(void)
     4.4  {
     4.5  #ifdef CONFIG_SMP
     4.6  	int cpu = smp_processor_id();
     4.7 -	return (cpu != first_cpu(cpu_sibling_map[cpu]));
     4.8 +	return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
     4.9  #endif	
    4.10  	return 0;
    4.11  }
     5.1 --- a/xen/arch/x86/smpboot.c	Mon Jul 13 11:32:41 2009 +0100
     5.2 +++ b/xen/arch/x86/smpboot.c	Mon Jul 13 11:45:31 2009 +0100
     5.3 @@ -70,12 +70,9 @@ int phys_proc_id[NR_CPUS] __read_mostly 
     5.4  int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
     5.5  
     5.6  /* representing HT siblings of each logical CPU */
     5.7 -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
     5.8 -EXPORT_SYMBOL(cpu_sibling_map);
     5.9 -
    5.10 +DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
    5.11  /* representing HT and core siblings of each logical CPU */
    5.12 -cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
    5.13 -EXPORT_SYMBOL(cpu_core_map);
    5.14 +DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
    5.15  
    5.16  /* bitmap of online cpus */
    5.17  cpumask_t cpu_online_map __read_mostly;
    5.18 @@ -419,35 +416,35 @@ set_cpu_sibling_map(int cpu)
    5.19  		for_each_cpu_mask(i, cpu_sibling_setup_map) {
    5.20  			if (phys_proc_id[cpu] == phys_proc_id[i] &&
    5.21  			    cpu_core_id[cpu] == cpu_core_id[i]) {
    5.22 -				cpu_set(i, cpu_sibling_map[cpu]);
    5.23 -				cpu_set(cpu, cpu_sibling_map[i]);
    5.24 -				cpu_set(i, cpu_core_map[cpu]);
    5.25 -				cpu_set(cpu, cpu_core_map[i]);
    5.26 +				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
    5.27 +				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
    5.28 +				cpu_set(i, per_cpu(cpu_core_map, cpu));
    5.29 +				cpu_set(cpu, per_cpu(cpu_core_map, i));
    5.30  			}
    5.31  		}
    5.32  	} else {
    5.33 -		cpu_set(cpu, cpu_sibling_map[cpu]);
    5.34 +		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
    5.35  	}
    5.36  
    5.37  	if (c[cpu].x86_max_cores == 1) {
    5.38 -		cpu_core_map[cpu] = cpu_sibling_map[cpu];
    5.39 +		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
    5.40  		c[cpu].booted_cores = 1;
    5.41  		return;
    5.42  	}
    5.43  
    5.44  	for_each_cpu_mask(i, cpu_sibling_setup_map) {
    5.45  		if (phys_proc_id[cpu] == phys_proc_id[i]) {
    5.46 -			cpu_set(i, cpu_core_map[cpu]);
    5.47 -			cpu_set(cpu, cpu_core_map[i]);
    5.48 +			cpu_set(i, per_cpu(cpu_core_map, cpu));
    5.49 +			cpu_set(cpu, per_cpu(cpu_core_map, i));
    5.50  			/*
    5.51  			 *  Does this new cpu bringup a new core?
    5.52  			 */
    5.53 -			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
    5.54 +			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
    5.55  				/*
    5.56  				 * for each core in package, increment
    5.57  				 * the booted_cores for this new cpu
    5.58  				 */
    5.59 -				if (first_cpu(cpu_sibling_map[i]) == i)
    5.60 +				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
    5.61  					c[cpu].booted_cores++;
    5.62  				/*
    5.63  				 * increment the core count for all
    5.64 @@ -1052,8 +1049,8 @@ static void __init smp_boot_cpus(unsigne
    5.65  			printk(KERN_NOTICE "Local APIC not detected."
    5.66  					   " Using dummy APIC emulation.\n");
    5.67  		map_cpu_to_logical_apicid();
    5.68 -		cpu_set(0, cpu_sibling_map[0]);
    5.69 -		cpu_set(0, cpu_core_map[0]);
    5.70 +		cpu_set(0, per_cpu(cpu_sibling_map, 0));
    5.71 +		cpu_set(0, per_cpu(cpu_core_map, 0));
    5.72  		return;
    5.73  	}
    5.74  
    5.75 @@ -1163,16 +1160,16 @@ static void __init smp_boot_cpus(unsigne
    5.76  	Dprintk("Boot done.\n");
    5.77  
    5.78  	/*
    5.79 -	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
    5.80 +	 * construct cpu_sibling_map, so that we can tell sibling CPUs
    5.81  	 * efficiently.
    5.82  	 */
    5.83  	for (cpu = 0; cpu < NR_CPUS; cpu++) {
    5.84 -		cpus_clear(cpu_sibling_map[cpu]);
    5.85 -		cpus_clear(cpu_core_map[cpu]);
    5.86 +		cpus_clear(per_cpu(cpu_sibling_map, cpu));
    5.87 +		cpus_clear(per_cpu(cpu_core_map, cpu));
    5.88  	}
    5.89  
    5.90 -	cpu_set(0, cpu_sibling_map[0]);
    5.91 -	cpu_set(0, cpu_core_map[0]);
    5.92 +	cpu_set(0, per_cpu(cpu_sibling_map, 0));
    5.93 +	cpu_set(0, per_cpu(cpu_core_map, 0));
    5.94  
    5.95  	if (nmi_watchdog == NMI_LOCAL_APIC)
    5.96  		check_nmi_watchdog();
    5.97 @@ -1215,19 +1212,19 @@ remove_siblinginfo(int cpu)
    5.98  	int sibling;
    5.99  	struct cpuinfo_x86 *c = cpu_data;
   5.100  
   5.101 -	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
   5.102 -		cpu_clear(cpu, cpu_core_map[sibling]);
   5.103 +	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
   5.104 +		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
   5.105  		/*
   5.106  		 * last thread sibling in this cpu core going down
   5.107  		 */
   5.108 -		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
   5.109 +		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
   5.110  			c[sibling].booted_cores--;
   5.111  	}
   5.112  			
   5.113 -	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
   5.114 -		cpu_clear(cpu, cpu_sibling_map[sibling]);
   5.115 -	cpus_clear(cpu_sibling_map[cpu]);
   5.116 -	cpus_clear(cpu_core_map[cpu]);
   5.117 +	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
   5.118 +		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
   5.119 +	cpus_clear(per_cpu(cpu_sibling_map, cpu));
   5.120 +	cpus_clear(per_cpu(cpu_core_map, cpu));
   5.121  	phys_proc_id[cpu] = BAD_APICID;
   5.122  	cpu_core_id[cpu] = BAD_APICID;
   5.123  	cpu_clear(cpu, cpu_sibling_setup_map);
     6.1 --- a/xen/arch/x86/sysctl.c	Mon Jul 13 11:32:41 2009 +0100
     6.2 +++ b/xen/arch/x86/sysctl.c	Mon Jul 13 11:45:31 2009 +0100
     6.3 @@ -60,9 +60,9 @@ long arch_do_sysctl(
     6.4          memset(pi, 0, sizeof(*pi));
     6.5          pi->cpu_to_node = cpu_to_node_arr;
     6.6          pi->threads_per_core =
     6.7 -            cpus_weight(cpu_sibling_map[0]);
     6.8 +            cpus_weight(per_cpu(cpu_sibling_map, 0));
     6.9          pi->cores_per_socket =
    6.10 -            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
    6.11 +            cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
    6.12          pi->nr_cpus = (u32)num_online_cpus();
    6.13          pi->nr_nodes = num_online_nodes();
    6.14          pi->total_pages = total_pages;
     7.1 --- a/xen/common/domctl.c	Mon Jul 13 11:32:41 2009 +0100
     7.2 +++ b/xen/common/domctl.c	Mon Jul 13 11:45:31 2009 +0100
     7.3 @@ -167,18 +167,18 @@ static unsigned int default_vcpu0_locati
     7.4       * If we're on a HT system, we only auto-allocate to a non-primary HT. We 
     7.5       * favour high numbered CPUs in the event of a tie.
     7.6       */
     7.7 -    cpu = first_cpu(cpu_sibling_map[0]);
     7.8 -    if ( cpus_weight(cpu_sibling_map[0]) > 1 )
     7.9 -        cpu = next_cpu(cpu, cpu_sibling_map[0]);
    7.10 -    cpu_exclude_map = cpu_sibling_map[0];
    7.11 +    cpu = first_cpu(per_cpu(cpu_sibling_map, 0));
    7.12 +    if ( cpus_weight(per_cpu(cpu_sibling_map, 0)) > 1 )
    7.13 +        cpu = next_cpu(cpu, per_cpu(cpu_sibling_map, 0));
    7.14 +    cpu_exclude_map = per_cpu(cpu_sibling_map, 0);
    7.15      for_each_online_cpu ( i )
    7.16      {
    7.17          if ( cpu_isset(i, cpu_exclude_map) )
    7.18              continue;
    7.19 -        if ( (i == first_cpu(cpu_sibling_map[i])) &&
    7.20 -             (cpus_weight(cpu_sibling_map[i]) > 1) )
    7.21 +        if ( (i == first_cpu(per_cpu(cpu_sibling_map, i))) &&
    7.22 +             (cpus_weight(per_cpu(cpu_sibling_map, i)) > 1) )
    7.23              continue;
    7.24 -        cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
    7.25 +        cpus_or(cpu_exclude_map, cpu_exclude_map, per_cpu(cpu_sibling_map, i));
    7.26          if ( !cnt || cnt[i] <= cnt[cpu] )
    7.27              cpu = i;
    7.28      }
     8.1 --- a/xen/common/sched_credit.c	Mon Jul 13 11:32:41 2009 +0100
     8.2 +++ b/xen/common/sched_credit.c	Mon Jul 13 11:45:31 2009 +0100
     8.3 @@ -402,17 +402,17 @@ csched_cpu_pick(struct vcpu *vc)
     8.4  
     8.5          nxt = cycle_cpu(cpu, cpus);
     8.6  
     8.7 -        if ( cpu_isset(cpu, cpu_core_map[nxt]) )
     8.8 +        if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
     8.9          {
    8.10 -            ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
    8.11 -            cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
    8.12 -            cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
    8.13 +            ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
    8.14 +            cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
    8.15 +            cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
    8.16          }
    8.17          else
    8.18          {
    8.19 -            ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
    8.20 -            cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
    8.21 -            cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
    8.22 +            ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
    8.23 +            cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
    8.24 +            cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
    8.25          }
    8.26  
    8.27          weight_cpu = cpus_weight(cpu_idlers);
    8.28 @@ -1205,9 +1205,9 @@ csched_dump_pcpu(int cpu)
    8.29      spc = CSCHED_PCPU(cpu);
    8.30      runq = &spc->runq;
    8.31  
    8.32 -    cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_sibling_map[cpu]);
    8.33 +    cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
    8.34      printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
    8.35 -    cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_core_map[cpu]);
    8.36 +    cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
    8.37      printk("core=%s\n", cpustr);
    8.38  
    8.39      /* current VCPU */
     9.1 --- a/xen/include/asm-ia64/linux-xen/asm/smp.h	Mon Jul 13 11:32:41 2009 +0100
     9.2 +++ b/xen/include/asm-ia64/linux-xen/asm/smp.h	Mon Jul 13 11:45:31 2009 +0100
     9.3 @@ -60,8 +60,8 @@ extern struct smp_boot_data {
     9.4  extern char no_int_routing __devinitdata;
     9.5  
     9.6  extern cpumask_t cpu_online_map;
     9.7 -extern cpumask_t cpu_core_map[NR_CPUS];
     9.8 -extern cpumask_t cpu_sibling_map[NR_CPUS];
     9.9 +DECLARE_PER_CPU(cpumask_t, cpu_core_map);
    9.10 +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
    9.11  extern int smp_num_siblings;
    9.12  extern int smp_num_cpucores;
    9.13  extern void __iomem *ipi_base_addr;
    10.1 --- a/xen/include/asm-x86/smp.h	Mon Jul 13 11:32:41 2009 +0100
    10.2 +++ b/xen/include/asm-x86/smp.h	Mon Jul 13 11:45:31 2009 +0100
    10.3 @@ -32,8 +32,8 @@
    10.4   
    10.5  extern void smp_alloc_memory(void);
    10.6  extern int pic_mode;
    10.7 -extern cpumask_t cpu_sibling_map[];
    10.8 -extern cpumask_t cpu_core_map[];
    10.9 +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
   10.10 +DECLARE_PER_CPU(cpumask_t, cpu_core_map);
   10.11  
   10.12  void smp_send_nmi_allbutself(void);
   10.13