Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/srat.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * ACPI 3.0 based NUMA setup
3
 * Copyright 2004 Andi Kleen, SuSE Labs.
4
 *
5
 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6
 *
7
 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8
 * Assumes all memory regions belonging to a single proximity domain
9
 * are in one chunk. Holes between them will be included in the node.
10
 * 
11
 * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
12
 */
13
14
#include <xen/init.h>
15
#include <xen/mm.h>
16
#include <xen/inttypes.h>
17
#include <xen/nodemask.h>
18
#include <xen/acpi.h>
19
#include <xen/numa.h>
20
#include <xen/pfn.h>
21
#include <asm/e820.h>
22
#include <asm/page.h>
23
24
static struct acpi_table_slit *__read_mostly acpi_slit;
25
26
static nodemask_t memory_nodes_parsed __initdata;
27
static nodemask_t processor_nodes_parsed __initdata;
28
static struct node nodes[MAX_NUMNODES] __initdata;
29
30
struct pxm2node {
31
  unsigned pxm;
32
  nodeid_t node;
33
};
34
static struct pxm2node __read_mostly pxm2node[MAX_NUMNODES] =
35
  { [0 ... MAX_NUMNODES - 1] = {.node = NUMA_NO_NODE} };
36
37
static unsigned node_to_pxm(nodeid_t n);
38
39
static int num_node_memblks;
40
static struct node node_memblk_range[NR_NODE_MEMBLKS];
41
static nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
42
static __initdata DECLARE_BITMAP(memblk_hotplug, NR_NODE_MEMBLKS);
43
44
static inline bool node_found(unsigned idx, unsigned pxm)
45
526k
{
46
526k
  return ((pxm2node[idx].pxm == pxm) &&
47
526k
    (pxm2node[idx].node != NUMA_NO_NODE));
48
526k
}
49
50
nodeid_t pxm_to_node(unsigned pxm)
51
8.09k
{
52
8.09k
  unsigned i;
53
8.09k
54
8.09k
  if ((pxm < ARRAY_SIZE(pxm2node)) && node_found(pxm, pxm))
55
0
    return pxm2node[pxm].node;
56
8.09k
57
526k
  for (i = 0; i < ARRAY_SIZE(pxm2node); i++)
58
517k
    if (node_found(i, pxm))
59
0
      return pxm2node[i].node;
60
8.09k
61
8.09k
  return NUMA_NO_NODE;
62
8.09k
}
63
64
nodeid_t setup_node(unsigned pxm)
65
0
{
66
0
  nodeid_t node;
67
0
  unsigned idx;
68
0
  static bool warned;
69
0
  static unsigned nodes_found;
70
0
71
0
  BUILD_BUG_ON(MAX_NUMNODES >= NUMA_NO_NODE);
72
0
73
0
  if (pxm < ARRAY_SIZE(pxm2node)) {
74
0
    if (node_found(pxm, pxm))
75
0
      return pxm2node[pxm].node;
76
0
77
0
    /* Try to maintain indexing of pxm2node by pxm */
78
0
    if (pxm2node[pxm].node == NUMA_NO_NODE) {
79
0
      idx = pxm;
80
0
      goto finish;
81
0
    }
82
0
  }
83
0
84
0
  for (idx = 0; idx < ARRAY_SIZE(pxm2node); idx++)
85
0
    if (pxm2node[idx].node == NUMA_NO_NODE)
86
0
      goto finish;
87
0
88
0
  if (!warned) {
89
0
    printk(KERN_WARNING "SRAT: Too many proximity domains (%#x)\n",
90
0
           pxm);
91
0
    warned = true;
92
0
  }
93
0
94
0
  return NUMA_NO_NODE;
95
0
96
0
 finish:
97
0
  node = nodes_found++;
98
0
  if (node >= MAX_NUMNODES)
99
0
    return NUMA_NO_NODE;
100
0
  pxm2node[idx].pxm = pxm;
101
0
  pxm2node[idx].node = node;
102
0
103
0
  return node;
104
0
}
105
106
int valid_numa_range(u64 start, u64 end, nodeid_t node)
107
0
{
108
0
  int i;
109
0
110
0
  for (i = 0; i < num_node_memblks; i++) {
111
0
    struct node *nd = &node_memblk_range[i];
112
0
113
0
    if (nd->start <= start && nd->end > end &&
114
0
      memblk_nodeid[i] == node )
115
0
      return 1;
116
0
  }
117
0
118
0
  return 0;
119
0
}
120
121
static __init int conflicting_memblks(u64 start, u64 end)
122
0
{
123
0
  int i;
124
0
125
0
  for (i = 0; i < num_node_memblks; i++) {
126
0
    struct node *nd = &node_memblk_range[i];
127
0
    if (nd->start == nd->end)
128
0
      continue;
129
0
    if (nd->end > start && nd->start < end)
130
0
      return i;
131
0
    if (nd->end == end && nd->start == start)
132
0
      return i;
133
0
  }
134
0
  return -1;
135
0
}
136
137
static __init void cutoff_node(int i, u64 start, u64 end)
138
64
{
139
64
  struct node *nd = &nodes[i];
140
64
  if (nd->start < start) {
141
0
    nd->start = start;
142
0
    if (nd->end < nd->start)
143
0
      nd->start = nd->end;
144
0
  }
145
64
  if (nd->end > end) {
146
0
    nd->end = end;
147
0
    if (nd->start > nd->end)
148
0
      nd->start = nd->end;
149
0
  }
150
64
}
151
152
static __init void bad_srat(void)
153
0
{
154
0
  int i;
155
0
  printk(KERN_ERR "SRAT: SRAT not used.\n");
156
0
  acpi_numa = -1;
157
0
  for (i = 0; i < MAX_LOCAL_APIC; i++)
158
0
    apicid_to_node[i] = NUMA_NO_NODE;
159
0
  for (i = 0; i < ARRAY_SIZE(pxm2node); i++)
160
0
    pxm2node[i].node = NUMA_NO_NODE;
161
0
  mem_hotplug = 0;
162
0
}
163
164
/*
165
 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
166
 * up the NUMA heuristics which wants the local node to have a smaller
167
 * distance than the others.
168
 * Do some quick checks here and only use the SLIT if it passes.
169
 */
170
static __init int slit_valid(struct acpi_table_slit *slit)
171
0
{
172
0
  int i, j;
173
0
  int d = slit->locality_count;
174
0
  for (i = 0; i < d; i++) {
175
0
    for (j = 0; j < d; j++)  {
176
0
      u8 val = slit->entry[d*i + j];
177
0
      if (i == j) {
178
0
        if (val != 10)
179
0
          return 0;
180
0
      } else if (val <= 10)
181
0
        return 0;
182
0
    }
183
0
  }
184
0
  return 1;
185
0
}
186
187
/* Callback for SLIT parsing */
188
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
189
0
{
190
0
  mfn_t mfn;
191
0
192
0
  if (!slit_valid(slit)) {
193
0
    printk(KERN_INFO "ACPI: SLIT table looks invalid. "
194
0
           "Not used.\n");
195
0
    return;
196
0
  }
197
0
  mfn = alloc_boot_pages(PFN_UP(slit->header.length), 1);
198
0
  acpi_slit = mfn_to_virt(mfn_x(mfn));
199
0
  memcpy(acpi_slit, slit, slit->header.length);
200
0
}
201
202
/* Callback for Proximity Domain -> x2APIC mapping */
203
void __init
204
acpi_numa_x2apic_affinity_init(const struct acpi_srat_x2apic_cpu_affinity *pa)
205
0
{
206
0
  unsigned pxm;
207
0
  nodeid_t node;
208
0
209
0
  if (srat_disabled())
210
0
    return;
211
0
  if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
212
0
    bad_srat();
213
0
    return;
214
0
  }
215
0
  if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
216
0
    return;
217
0
  if (pa->apic_id >= MAX_LOCAL_APIC) {
218
0
    printk(KERN_INFO "SRAT: APIC %08x ignored\n", pa->apic_id);
219
0
    return;
220
0
  }
221
0
222
0
  pxm = pa->proximity_domain;
223
0
  node = setup_node(pxm);
224
0
  if (node == NUMA_NO_NODE) {
225
0
    bad_srat();
226
0
    return;
227
0
  }
228
0
229
0
  apicid_to_node[pa->apic_id] = node;
230
0
  node_set(node, processor_nodes_parsed);
231
0
  acpi_numa = 1;
232
0
  printk(KERN_INFO "SRAT: PXM %u -> APIC %08x -> Node %u\n",
233
0
         pxm, pa->apic_id, node);
234
0
}
235
236
/* Callback for Proximity Domain -> LAPIC mapping */
237
void __init
238
acpi_numa_processor_affinity_init(const struct acpi_srat_cpu_affinity *pa)
239
0
{
240
0
  unsigned pxm;
241
0
  nodeid_t node;
242
0
243
0
  if (srat_disabled())
244
0
    return;
245
0
  if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
246
0
    bad_srat();
247
0
    return;
248
0
  }
249
0
  if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
250
0
    return;
251
0
  pxm = pa->proximity_domain_lo;
252
0
  if (srat_rev >= 2) {
253
0
    pxm |= pa->proximity_domain_hi[0] << 8;
254
0
    pxm |= pa->proximity_domain_hi[1] << 16;
255
0
    pxm |= pa->proximity_domain_hi[2] << 24;
256
0
  }
257
0
  node = setup_node(pxm);
258
0
  if (node == NUMA_NO_NODE) {
259
0
    bad_srat();
260
0
    return;
261
0
  }
262
0
  apicid_to_node[pa->apic_id] = node;
263
0
  node_set(node, processor_nodes_parsed);
264
0
  acpi_numa = 1;
265
0
  printk(KERN_INFO "SRAT: PXM %u -> APIC %02x -> Node %u\n",
266
0
         pxm, pa->apic_id, node);
267
0
}
268
269
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
270
void __init
271
acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
272
0
{
273
0
  u64 start, end;
274
0
  unsigned pxm;
275
0
  nodeid_t node;
276
0
  int i;
277
0
278
0
  if (srat_disabled())
279
0
    return;
280
0
  if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
281
0
    bad_srat();
282
0
    return;
283
0
  }
284
0
  if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
285
0
    return;
286
0
287
0
  if (num_node_memblks >= NR_NODE_MEMBLKS)
288
0
  {
289
0
    dprintk(XENLOG_WARNING,
290
0
                "Too many numa entry, try bigger NR_NODE_MEMBLKS \n");
291
0
    bad_srat();
292
0
    return;
293
0
  }
294
0
295
0
  start = ma->base_address;
296
0
  end = start + ma->length;
297
0
  pxm = ma->proximity_domain;
298
0
  if (srat_rev < 2)
299
0
    pxm &= 0xff;
300
0
  node = setup_node(pxm);
301
0
  if (node == NUMA_NO_NODE) {
302
0
    bad_srat();
303
0
    return;
304
0
  }
305
0
  /* It is fine to add this area to the nodes data it will be used later*/
306
0
  i = conflicting_memblks(start, end);
307
0
  if (i < 0)
308
0
    /* everything fine */;
309
0
  else if (memblk_nodeid[i] == node) {
310
0
    bool mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
311
0
                    !test_bit(i, memblk_hotplug);
312
0
313
0
    printk("%sSRAT: PXM %u (%"PRIx64"-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n",
314
0
           mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end,
315
0
           node_memblk_range[i].start, node_memblk_range[i].end);
316
0
    if (mismatch) {
317
0
      bad_srat();
318
0
      return;
319
0
    }
320
0
  } else {
321
0
    printk(KERN_ERR
322
0
           "SRAT: PXM %u (%"PRIx64"-%"PRIx64") overlaps with PXM %u (%"PRIx64"-%"PRIx64")\n",
323
0
           pxm, start, end, node_to_pxm(memblk_nodeid[i]),
324
0
           node_memblk_range[i].start, node_memblk_range[i].end);
325
0
    bad_srat();
326
0
    return;
327
0
  }
328
0
  if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
329
0
    struct node *nd = &nodes[node];
330
0
331
0
    if (!node_test_and_set(node, memory_nodes_parsed)) {
332
0
      nd->start = start;
333
0
      nd->end = end;
334
0
    } else {
335
0
      if (start < nd->start)
336
0
        nd->start = start;
337
0
      if (nd->end < end)
338
0
        nd->end = end;
339
0
    }
340
0
  }
341
0
  printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"%s\n",
342
0
         node, pxm, start, end,
343
0
         ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
344
0
345
0
  node_memblk_range[num_node_memblks].start = start;
346
0
  node_memblk_range[num_node_memblks].end = end;
347
0
  memblk_nodeid[num_node_memblks] = node;
348
0
  if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
349
0
    __set_bit(num_node_memblks, memblk_hotplug);
350
0
    if (end > mem_hotplug)
351
0
      mem_hotplug = end;
352
0
  }
353
0
  num_node_memblks++;
354
0
}
355
356
/* Sanity check to catch more bad SRATs (they are amazingly common).
357
   Make sure the PXMs cover all memory. */
358
static int __init nodes_cover_memory(void)
359
0
{
360
0
  int i;
361
0
362
0
  for (i = 0; i < e820.nr_map; i++) {
363
0
    int j, found;
364
0
    unsigned long long start, end;
365
0
366
0
    if (e820.map[i].type != E820_RAM) {
367
0
      continue;
368
0
    }
369
0
370
0
    start = e820.map[i].addr;
371
0
    end = e820.map[i].addr + e820.map[i].size - 1;
372
0
373
0
    do {
374
0
      found = 0;
375
0
      for_each_node_mask(j, memory_nodes_parsed)
376
0
        if (start < nodes[j].end
377
0
            && end > nodes[j].start) {
378
0
          if (start >= nodes[j].start) {
379
0
            start = nodes[j].end;
380
0
            found = 1;
381
0
          }
382
0
          if (end <= nodes[j].end) {
383
0
            end = nodes[j].start;
384
0
            found = 1;
385
0
          }
386
0
        }
387
0
    } while (found && start < end);
388
0
389
0
    if (start < end) {
390
0
      printk(KERN_ERR "SRAT: No PXM for e820 range: "
391
0
        "%016Lx - %016Lx\n", start, end);
392
0
      return 0;
393
0
    }
394
0
  }
395
0
  return 1;
396
0
}
397
398
1
void __init acpi_numa_arch_fixup(void) {}
399
400
static u64 __initdata srat_region_mask;
401
402
static int __init srat_parse_region(struct acpi_subtable_header *header,
403
            const unsigned long end)
404
0
{
405
0
  struct acpi_srat_mem_affinity *ma;
406
0
407
0
  if (!header)
408
0
    return -EINVAL;
409
0
410
0
  ma = container_of(header, struct acpi_srat_mem_affinity, header);
411
0
412
0
  if (!ma->length ||
413
0
      !(ma->flags & ACPI_SRAT_MEM_ENABLED) ||
414
0
      (ma->flags & ACPI_SRAT_MEM_NON_VOLATILE))
415
0
    return 0;
416
0
417
0
  if (numa_off)
418
0
    printk(KERN_INFO "SRAT: %013"PRIx64"-%013"PRIx64"\n",
419
0
           ma->base_address, ma->base_address + ma->length - 1);
420
0
421
0
  srat_region_mask |= ma->base_address |
422
0
          pdx_region_mask(ma->base_address, ma->length);
423
0
424
0
  return 0;
425
0
}
426
427
void __init srat_parse_regions(u64 addr)
428
1
{
429
1
  u64 mask;
430
1
  unsigned int i;
431
1
432
1
  if (acpi_disabled || acpi_numa < 0 ||
433
1
      acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat))
434
1
    return;
435
1
436
0
  srat_region_mask = pdx_init_mask(addr);
437
0
  acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
438
0
            srat_parse_region, 0);
439
0
440
0
  for (mask = srat_region_mask, i = 0; mask && i < e820.nr_map; i++) {
441
0
    if (e820.map[i].type != E820_RAM)
442
0
      continue;
443
0
444
0
    if (~mask & pdx_region_mask(e820.map[i].addr, e820.map[i].size))
445
0
      mask = 0;
446
0
  }
447
0
448
0
  pfn_pdx_hole_setup(mask >> PAGE_SHIFT);
449
0
}
450
451
/* Use the information discovered above to actually set up the nodes. */
452
int __init acpi_scan_nodes(u64 start, u64 end)
453
1
{
454
1
  int i;
455
1
  nodemask_t all_nodes_parsed;
456
1
457
1
  /* First clean up the node list */
458
65
  for (i = 0; i < MAX_NUMNODES; i++)
459
64
    cutoff_node(i, start, end);
460
1
461
1
  if (acpi_numa <= 0)
462
1
    return -1;
463
1
464
0
  if (!nodes_cover_memory()) {
465
0
    bad_srat();
466
0
    return -1;
467
0
  }
468
0
469
0
  memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
470
0
        memblk_nodeid);
471
0
472
0
  if (memnode_shift < 0) {
473
0
    printk(KERN_ERR
474
0
         "SRAT: No NUMA node hash function found. Contact maintainer\n");
475
0
    bad_srat();
476
0
    return -1;
477
0
  }
478
0
479
0
  nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed);
480
0
481
0
  /* Finally register nodes */
482
0
  for_each_node_mask(i, all_nodes_parsed)
483
0
  {
484
0
    u64 size = nodes[i].end - nodes[i].start;
485
0
    if ( size == 0 )
486
0
      printk(KERN_WARNING "SRAT: Node %u has no memory. "
487
0
             "BIOS Bug or mis-configured hardware?\n", i);
488
0
489
0
    setup_node_bootmem(i, nodes[i].start, nodes[i].end);
490
0
  }
491
0
  for (i = 0; i < nr_cpu_ids; i++) {
492
0
    if (cpu_to_node[i] == NUMA_NO_NODE)
493
0
      continue;
494
0
    if (!node_isset(cpu_to_node[i], processor_nodes_parsed))
495
0
      numa_set_node(i, NUMA_NO_NODE);
496
0
  }
497
0
  numa_init_array();
498
0
  return 0;
499
0
}
500
501
static unsigned node_to_pxm(nodeid_t n)
502
0
{
503
0
  unsigned i;
504
0
505
0
  if ((n < ARRAY_SIZE(pxm2node)) && (pxm2node[n].node == n))
506
0
    return pxm2node[n].pxm;
507
0
  for (i = 0; i < ARRAY_SIZE(pxm2node); i++)
508
0
    if (pxm2node[i].node == n)
509
0
      return pxm2node[i].pxm;
510
0
  return 0;
511
0
}
512
513
u8 __node_distance(nodeid_t a, nodeid_t b)
514
0
{
515
0
  unsigned index;
516
0
  u8 slit_val;
517
0
518
0
  if (!acpi_slit)
519
0
    return a == b ? 10 : 20;
520
0
  index = acpi_slit->locality_count * node_to_pxm(a);
521
0
  slit_val = acpi_slit->entry[index + node_to_pxm(b)];
522
0
523
0
  /* ACPI defines 0xff as an unreachable node and 0-9 are undefined */
524
0
  if ((slit_val == 0xff) || (slit_val <= 9))
525
0
    return NUMA_NO_DISTANCE;
526
0
  else
527
0
    return slit_val;
528
0
}
529
530
EXPORT_SYMBOL(__node_distance);