debuggers.hg

view xen/arch/x86/srat.c @ 21030:bb7164fc680a

x86_32: Fix build after 20983:94535cc63835

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Feb 26 08:13:02 2010 +0000 (2010-02-26)
parents 94535cc63835
children d01ea51fc929
line source
1 /*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 *
11 * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
12 */
14 #include <xen/init.h>
15 #include <xen/mm.h>
16 #include <xen/inttypes.h>
17 #include <xen/nodemask.h>
18 #include <xen/acpi.h>
19 #include <xen/numa.h>
20 #include <asm/e820.h>
21 #include <asm/page.h>
23 static struct acpi_table_slit *__read_mostly acpi_slit;
25 static nodemask_t nodes_parsed __initdata;
26 static nodemask_t nodes_found __initdata;
27 static struct node nodes[MAX_NUMNODES] __initdata;
28 static u8 __read_mostly pxm2node[256] = { [0 ... 255] = 0xff };
31 static int num_node_memblks;
32 static struct node node_memblk_range[NR_NODE_MEMBLKS];
33 static int memblk_nodeid[NR_NODE_MEMBLKS];
35 /* Too small nodes confuse the VM badly. Usually they result
36 from BIOS bugs. */
37 #define NODE_MIN_SIZE (4*1024*1024)
39 static int node_to_pxm(int n);
41 int pxm_to_node(int pxm)
42 {
43 if ((unsigned)pxm >= 256)
44 return -1;
45 /* Extend 0xff to (int)-1 */
46 return (signed char)pxm2node[pxm];
47 }
49 __devinit int setup_node(int pxm)
50 {
51 unsigned node = pxm2node[pxm];
52 if (node == 0xff) {
53 if (nodes_weight(nodes_found) >= MAX_NUMNODES)
54 return -1;
55 node = first_unset_node(nodes_found);
56 node_set(node, nodes_found);
57 pxm2node[pxm] = node;
58 }
59 return pxm2node[pxm];
60 }
62 int valid_numa_range(u64 start, u64 end, int node)
63 {
64 int i;
66 for (i = 0; i < num_node_memblks; i++) {
67 struct node *nd = &node_memblk_range[i];
69 if (nd->start <= start && nd->end > end &&
70 memblk_nodeid[i] == node )
71 return 1;
72 }
74 return 0;
75 }
77 static __init int conflicting_memblks(u64 start, u64 end)
78 {
79 int i;
81 for (i = 0; i < num_node_memblks; i++) {
82 struct node *nd = &node_memblk_range[i];
83 if (nd->start == nd->end)
84 continue;
85 if (nd->end > start && nd->start < end)
86 return memblk_nodeid[i];
87 if (nd->end == end && nd->start == start)
88 return memblk_nodeid[i];
89 }
90 return -1;
91 }
93 static __init void cutoff_node(int i, u64 start, u64 end)
94 {
95 struct node *nd = &nodes[i];
96 if (nd->start < start) {
97 nd->start = start;
98 if (nd->end < nd->start)
99 nd->start = nd->end;
100 }
101 if (nd->end > end) {
102 nd->end = end;
103 if (nd->start > nd->end)
104 nd->start = nd->end;
105 }
106 }
108 static __init void bad_srat(void)
109 {
110 int i;
111 printk(KERN_ERR "SRAT: SRAT not used.\n");
112 acpi_numa = -1;
113 for (i = 0; i < MAX_LOCAL_APIC; i++)
114 apicid_to_node[i] = NUMA_NO_NODE;
115 }
117 #ifdef CONFIG_X86_64
118 /*
119 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
120 * up the NUMA heuristics which wants the local node to have a smaller
121 * distance than the others.
122 * Do some quick checks here and only use the SLIT if it passes.
123 */
124 static __init int slit_valid(struct acpi_table_slit *slit)
125 {
126 int i, j;
127 int d = slit->locality_count;
128 for (i = 0; i < d; i++) {
129 for (j = 0; j < d; j++) {
130 u8 val = slit->entry[d*i + j];
131 if (i == j) {
132 if (val != 10)
133 return 0;
134 } else if (val <= 10)
135 return 0;
136 }
137 }
138 return 1;
139 }
141 /* Callback for SLIT parsing */
142 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
143 {
144 unsigned long mfn;
145 if (!slit_valid(slit)) {
146 printk(KERN_INFO "ACPI: SLIT table looks invalid. "
147 "Not used.\n");
148 return;
149 }
150 mfn = alloc_boot_pages(PFN_UP(slit->header.length), 1);
151 if (!mfn) {
152 printk(KERN_ERR "ACPI: Unable to allocate memory for "
153 "saving ACPI SLIT numa information.\n");
154 return;
155 }
156 acpi_slit = mfn_to_virt(mfn);
157 memcpy(acpi_slit, slit, slit->header.length);
158 }
159 #else
160 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
161 {
162 }
163 #endif
165 /* Callback for Proximity Domain -> LAPIC mapping */
166 void __init
167 acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
168 {
169 int pxm, node;
170 if (srat_disabled())
171 return;
172 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
173 bad_srat();
174 return;
175 }
176 if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
177 return;
178 pxm = pa->proximity_domain_lo;
179 if (srat_rev >= 2) {
180 pxm |= pa->proximity_domain_hi[0] << 8;
181 pxm |= pa->proximity_domain_hi[1] << 16;
182 pxm |= pa->proximity_domain_hi[2] << 24;
183 }
184 node = setup_node(pxm);
185 if (node < 0) {
186 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
187 bad_srat();
188 return;
189 }
190 apicid_to_node[pa->apic_id] = node;
191 acpi_numa = 1;
192 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
193 pxm, pa->apic_id, node);
194 }
196 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
197 void __init
198 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
199 {
200 struct node *nd;
201 u64 start, end;
202 int node, pxm;
203 int i;
205 if (srat_disabled())
206 return;
207 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
208 bad_srat();
209 return;
210 }
211 if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
212 return;
214 if (num_node_memblks >= NR_NODE_MEMBLKS)
215 {
216 dprintk(XENLOG_WARNING,
217 "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
218 bad_srat();
219 return;
220 }
222 start = ma->base_address;
223 end = start + ma->length;
224 pxm = ma->proximity_domain;
225 if (srat_rev < 2)
226 pxm &= 0xff;
227 node = setup_node(pxm);
228 if (node < 0) {
229 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
230 bad_srat();
231 return;
232 }
233 /* It is fine to add this area to the nodes data it will be used later*/
234 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
235 {
236 printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
237 start, end);
238 #ifdef CONFIG_X86_64
239 mem_hotplug = 1;
240 #endif
241 }
243 i = conflicting_memblks(start, end);
244 if (i == node) {
245 printk(KERN_WARNING
246 "SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
247 PRIx64"-%"PRIx64")\n", pxm, start, end, nodes[i].start, nodes[i].end);
248 } else if (i >= 0) {
249 printk(KERN_ERR
250 "SRAT: PXM %d (%"PRIx64"-%"PRIx64") overlaps with PXM %d (%"
251 PRIx64"-%"PRIx64")\n", pxm, start, end, node_to_pxm(i),
252 nodes[i].start, nodes[i].end);
253 bad_srat();
254 return;
255 }
256 nd = &nodes[node];
257 if (!node_test_and_set(node, nodes_parsed)) {
258 nd->start = start;
259 nd->end = end;
260 } else {
261 if (start < nd->start)
262 nd->start = start;
263 if (nd->end < end)
264 nd->end = end;
265 }
266 printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
267 start, end);
269 node_memblk_range[num_node_memblks].start = start;
270 node_memblk_range[num_node_memblks].end = end;
271 memblk_nodeid[num_node_memblks] = node;
272 num_node_memblks++;
273 }
275 /* Sanity check to catch more bad SRATs (they are amazingly common).
276 Make sure the PXMs cover all memory. */
277 static int nodes_cover_memory(void)
278 {
279 int i;
281 for (i = 0; i < e820.nr_map; i++) {
282 int j, found;
283 unsigned long long start, end;
285 if (e820.map[i].type != E820_RAM) {
286 continue;
287 }
289 start = e820.map[i].addr;
290 end = e820.map[i].addr + e820.map[i].size - 1;
292 do {
293 found = 0;
294 for_each_node_mask(j, nodes_parsed)
295 if (start < nodes[j].end
296 && end > nodes[j].start) {
297 if (start >= nodes[j].start) {
298 start = nodes[j].end;
299 found = 1;
300 }
301 if (end <= nodes[j].end) {
302 end = nodes[j].start;
303 found = 1;
304 }
305 }
306 } while (found && start < end);
308 if (start < end) {
309 printk(KERN_ERR "SRAT: No PXM for e820 range: "
310 "%016Lx - %016Lx\n", start, end);
311 return 0;
312 }
313 }
314 return 1;
315 }
317 void __init acpi_numa_arch_fixup(void) {}
319 #ifdef __x86_64__
321 static u64 __initdata srat_region_mask;
323 static u64 __init fill_mask(u64 mask)
324 {
325 while (mask & (mask + 1))
326 mask |= mask + 1;
327 return mask;
328 }
330 static int __init srat_parse_region(struct acpi_subtable_header *header,
331 const unsigned long end)
332 {
333 struct acpi_srat_mem_affinity *ma;
335 if (!header)
336 return -EINVAL;
338 ma = container_of(header, struct acpi_srat_mem_affinity, header);
340 if (!ma->length ||
341 !(ma->flags & ACPI_SRAT_MEM_ENABLED) ||
342 (ma->flags & ACPI_SRAT_MEM_NON_VOLATILE))
343 return 0;
345 if (numa_off)
346 printk(KERN_INFO "SRAT: %013"PRIx64"-%013"PRIx64"\n",
347 ma->base_address, ma->base_address + ma->length - 1);
349 srat_region_mask |= ma->base_address |
350 fill_mask(ma->base_address ^
351 (ma->base_address + ma->length - 1));
353 return 0;
354 }
356 void __init srat_parse_regions(u64 addr)
357 {
358 u64 mask;
359 unsigned int i;
361 if (acpi_disabled || acpi_numa < 0 ||
362 acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat))
363 return;
365 srat_region_mask = fill_mask(addr - 1);
366 acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, srat_parse_region, 0);
368 for (mask = srat_region_mask, i = 0; mask && i < e820.nr_map; i++) {
369 if (e820.map[i].type != E820_RAM)
370 continue;
372 if (~mask &
373 fill_mask(e820.map[i].addr ^
374 (e820.map[i].addr + e820.map[i].size - 1)))
375 mask = 0;
376 }
378 pfn_pdx_hole_setup(mask >> PAGE_SHIFT);
379 }
381 #endif /* __x86_64__ */
383 /* Use the information discovered above to actually set up the nodes. */
384 int __init acpi_scan_nodes(u64 start, u64 end)
385 {
386 int i;
388 /* First clean up the node list */
389 for (i = 0; i < MAX_NUMNODES; i++)
390 cutoff_node(i, start, end);
392 if (acpi_numa <= 0)
393 return -1;
395 if (!nodes_cover_memory()) {
396 bad_srat();
397 return -1;
398 }
400 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
401 memblk_nodeid);
403 if (memnode_shift < 0) {
404 printk(KERN_ERR
405 "SRAT: No NUMA node hash function found. Contact maintainer\n");
406 bad_srat();
407 return -1;
408 }
410 /* Finally register nodes */
411 for_each_node_mask(i, nodes_parsed)
412 {
413 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
414 continue;
415 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
416 }
417 for (i = 0; i < NR_CPUS; i++) {
418 if (cpu_to_node[i] == NUMA_NO_NODE)
419 continue;
420 if (!node_isset(cpu_to_node[i], nodes_parsed))
421 numa_set_node(i, NUMA_NO_NODE);
422 }
423 numa_init_array();
424 return 0;
425 }
427 static int node_to_pxm(int n)
428 {
429 int i;
430 if (pxm2node[n] == n)
431 return n;
432 for (i = 0; i < 256; i++)
433 if (pxm2node[i] == n)
434 return i;
435 return 0;
436 }
438 int __node_distance(int a, int b)
439 {
440 int index;
442 if (!acpi_slit)
443 return a == b ? 10 : 20;
444 index = acpi_slit->locality_count * node_to_pxm(a);
445 return acpi_slit->entry[index + node_to_pxm(b)];
446 }
448 EXPORT_SYMBOL(__node_distance);