debuggers.hg

view xen/arch/x86/e820.c @ 16586:cd5e1e76d0bc

32-on-64: Fix domain address-size clamping, implement
copy-on-grant-transfer, and eliminate 166GB memory limit for x86/64
Xen.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Dec 06 13:39:19 2007 +0000 (2007-12-06)
parents 9c567284436e
children 9ab9dadf4876
line source
1 #include <xen/config.h>
2 #include <xen/init.h>
3 #include <xen/lib.h>
4 #include <xen/compat.h>
5 #include <xen/dmi.h>
6 #include <asm/e820.h>
7 #include <asm/page.h>
9 /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
10 unsigned long long opt_mem;
11 static void parse_mem(char *s) { opt_mem = parse_size_and_unit(s, NULL); }
12 custom_param("mem", parse_mem);
14 struct e820map e820;
16 static void __init add_memory_region(unsigned long long start,
17 unsigned long long size, int type)
18 {
19 int x;
21 /*if (!efi_enabled)*/ {
22 x = e820.nr_map;
24 if (x == E820MAX) {
25 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
26 return;
27 }
29 e820.map[x].addr = start;
30 e820.map[x].size = size;
31 e820.map[x].type = type;
32 e820.nr_map++;
33 }
34 } /* add_memory_region */
36 static void __init print_e820_memory_map(struct e820entry *map, int entries)
37 {
38 int i;
40 for (i = 0; i < entries; i++) {
41 printk(" %016Lx - %016Lx ",
42 (unsigned long long)(map[i].addr),
43 (unsigned long long)(map[i].addr + map[i].size));
44 switch (map[i].type) {
45 case E820_RAM:
46 printk("(usable)\n");
47 break;
48 case E820_RESERVED:
49 printk("(reserved)\n");
50 break;
51 case E820_ACPI:
52 printk("(ACPI data)\n");
53 break;
54 case E820_NVS:
55 printk("(ACPI NVS)\n");
56 break;
57 case E820_UNUSABLE:
58 printk("(unusable)\n");
59 break;
60 default:
61 printk("type %u\n", map[i].type);
62 break;
63 }
64 }
65 }
67 /*
68 * Sanitize the BIOS e820 map.
69 *
70 * Some e820 responses include overlapping entries. The following
71 * replaces the original e820 map with a new one, removing overlaps.
72 *
73 */
74 struct change_member {
75 struct e820entry *pbios; /* pointer to original bios entry */
76 unsigned long long addr; /* address for this change point */
77 };
78 static struct change_member change_point_list[2*E820MAX] __initdata;
79 static struct change_member *change_point[2*E820MAX] __initdata;
80 static struct e820entry *overlap_list[E820MAX] __initdata;
81 static struct e820entry new_bios[E820MAX] __initdata;
83 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
84 {
85 struct change_member *change_tmp;
86 unsigned long current_type, last_type;
87 unsigned long long last_addr;
88 int chgidx, still_changing;
89 int overlap_entries;
90 int new_bios_entry;
91 int old_nr, new_nr, chg_nr;
92 int i;
94 /*
95 Visually we're performing the following (1,2,3,4 = memory types)...
97 Sample memory map (w/overlaps):
98 ____22__________________
99 ______________________4_
100 ____1111________________
101 _44_____________________
102 11111111________________
103 ____________________33__
104 ___________44___________
105 __________33333_________
106 ______________22________
107 ___________________2222_
108 _________111111111______
109 _____________________11_
110 _________________4______
112 Sanitized equivalent (no overlap):
113 1_______________________
114 _44_____________________
115 ___1____________________
116 ____22__________________
117 ______11________________
118 _________1______________
119 __________3_____________
120 ___________44___________
121 _____________33_________
122 _______________2________
123 ________________1_______
124 _________________4______
125 ___________________2____
126 ____________________33__
127 ______________________4_
128 */
130 /* if there's only one memory region, don't bother */
131 if (*pnr_map < 2)
132 return -1;
134 old_nr = *pnr_map;
136 /* bail out if we find any unreasonable addresses in bios map */
137 for (i=0; i<old_nr; i++)
138 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
139 return -1;
141 /* create pointers for initial change-point information (for sorting) */
142 for (i=0; i < 2*old_nr; i++)
143 change_point[i] = &change_point_list[i];
145 /* record all known change-points (starting and ending addresses),
146 omitting those that are for empty memory regions */
147 chgidx = 0;
148 for (i=0; i < old_nr; i++) {
149 if (biosmap[i].size != 0) {
150 change_point[chgidx]->addr = biosmap[i].addr;
151 change_point[chgidx++]->pbios = &biosmap[i];
152 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
153 change_point[chgidx++]->pbios = &biosmap[i];
154 }
155 }
156 chg_nr = chgidx; /* true number of change-points */
158 /* sort change-point list by memory addresses (low -> high) */
159 still_changing = 1;
160 while (still_changing) {
161 still_changing = 0;
162 for (i=1; i < chg_nr; i++) {
163 /* if <current_addr> > <last_addr>, swap */
164 /* or, if current=<start_addr> & last=<end_addr>, swap */
165 if ((change_point[i]->addr < change_point[i-1]->addr) ||
166 ((change_point[i]->addr == change_point[i-1]->addr) &&
167 (change_point[i]->addr == change_point[i]->pbios->addr) &&
168 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
169 )
170 {
171 change_tmp = change_point[i];
172 change_point[i] = change_point[i-1];
173 change_point[i-1] = change_tmp;
174 still_changing=1;
175 }
176 }
177 }
179 /* create a new bios memory map, removing overlaps */
180 overlap_entries=0; /* number of entries in the overlap table */
181 new_bios_entry=0; /* index for creating new bios map entries */
182 last_type = 0; /* start with undefined memory type */
183 last_addr = 0; /* start with 0 as last starting address */
184 /* loop through change-points, determining affect on the new bios map */
185 for (chgidx=0; chgidx < chg_nr; chgidx++)
186 {
187 /* keep track of all overlapping bios entries */
188 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
189 {
190 /* add map entry to overlap list (> 1 entry implies an overlap) */
191 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
192 }
193 else
194 {
195 /* remove entry from list (order independent, so swap with last) */
196 for (i=0; i<overlap_entries; i++)
197 {
198 if (overlap_list[i] == change_point[chgidx]->pbios)
199 overlap_list[i] = overlap_list[overlap_entries-1];
200 }
201 overlap_entries--;
202 }
203 /* if there are overlapping entries, decide which "type" to use */
204 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
205 current_type = 0;
206 for (i=0; i<overlap_entries; i++)
207 if (overlap_list[i]->type > current_type)
208 current_type = overlap_list[i]->type;
209 /* continue building up new bios map based on this information */
210 if (current_type != last_type) {
211 if (last_type != 0) {
212 new_bios[new_bios_entry].size =
213 change_point[chgidx]->addr - last_addr;
214 /* move forward only if the new size was non-zero */
215 if (new_bios[new_bios_entry].size != 0)
216 if (++new_bios_entry >= E820MAX)
217 break; /* no more space left for new bios entries */
218 }
219 if (current_type != 0) {
220 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
221 new_bios[new_bios_entry].type = current_type;
222 last_addr=change_point[chgidx]->addr;
223 }
224 last_type = current_type;
225 }
226 }
227 new_nr = new_bios_entry; /* retain count for new bios entries */
229 /* copy new bios mapping into original location */
230 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
231 *pnr_map = new_nr;
233 return 0;
234 }
236 /*
237 * Copy the BIOS e820 map into a safe place.
238 *
239 * Sanity-check it while we're at it..
240 *
241 * If we're lucky and live on a modern system, the setup code
242 * will have given us a memory map that we can use to properly
243 * set up memory. If we aren't, we'll fake a memory map.
244 *
245 * We check to see that the memory map contains at least 2 elements
246 * before we'll use it, because the detection code in setup.S may
247 * not be perfect and most every PC known to man has two memory
248 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
249 * thinkpad 560x, for example, does not cooperate with the memory
250 * detection code.)
251 */
252 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
253 {
254 /* Only one memory region (or negative)? Ignore it */
255 if (nr_map < 2)
256 return -1;
258 do {
259 unsigned long long start = biosmap->addr;
260 unsigned long long size = biosmap->size;
261 unsigned long long end = start + size;
262 unsigned long type = biosmap->type;
264 /* Overflow in 64 bits? Ignore the memory map. */
265 if (start > end)
266 return -1;
268 /*
269 * Some BIOSes claim RAM in the 640k - 1M region.
270 * Not right. Fix it up.
271 */
272 if (type == E820_RAM) {
273 if (start < 0x100000ULL && end > 0xA0000ULL) {
274 if (start < 0xA0000ULL)
275 add_memory_region(start, 0xA0000ULL-start, type);
276 if (end <= 0x100000ULL)
277 continue;
278 start = 0x100000ULL;
279 size = end - start;
280 }
281 }
282 add_memory_region(start, size, type);
283 } while (biosmap++,--nr_map);
284 return 0;
285 }
288 /*
289 * Find the highest page frame number we have available
290 */
291 static unsigned long __init find_max_pfn(void)
292 {
293 int i;
294 unsigned long max_pfn = 0;
296 #if 0
297 if (efi_enabled) {
298 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
299 return;
300 }
301 #endif
303 for (i = 0; i < e820.nr_map; i++) {
304 unsigned long start, end;
305 /* RAM? */
306 if (e820.map[i].type != E820_RAM)
307 continue;
308 start = PFN_UP(e820.map[i].addr);
309 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
310 if (start >= end)
311 continue;
312 if (end > max_pfn)
313 max_pfn = end;
314 }
316 return max_pfn;
317 }
319 static void __init clip_to_limit(uint64_t limit, char *warnmsg)
320 {
321 int i;
322 char _warnmsg[160];
324 for ( i = 0; i < e820.nr_map; i++ )
325 {
326 if ( (e820.map[i].addr + e820.map[i].size) <= limit )
327 continue;
328 if ( warnmsg )
329 {
330 snprintf(_warnmsg, sizeof(_warnmsg), warnmsg, (int)(limit>>30));
331 printk("WARNING: %s\n", _warnmsg);
332 }
333 printk("Truncating memory map to %lukB\n",
334 (unsigned long)(limit >> 10));
335 if ( e820.map[i].addr >= limit )
336 {
337 e820.nr_map = i;
338 }
339 else
340 {
341 e820.map[i].size = limit - e820.map[i].addr;
342 e820.nr_map = i + 1;
343 }
344 }
345 }
347 static void __init reserve_dmi_region(void)
348 {
349 u32 base, len;
350 if ( (dmi_get_table(&base, &len) == 0) && ((base + len) > base) &&
351 reserve_e820_ram(&e820, base, base + len) )
352 printk("WARNING: DMI table located in E820 RAM %08x-%08x. Fixed.\n",
353 base, base+len);
354 }
356 static void __init machine_specific_memory_setup(
357 struct e820entry *raw, int *raw_nr)
358 {
359 char nr = (char)*raw_nr;
360 sanitize_e820_map(raw, &nr);
361 *raw_nr = nr;
362 (void)copy_e820_map(raw, nr);
364 if ( opt_mem )
365 clip_to_limit(opt_mem, NULL);
367 #ifdef __i386__
368 clip_to_limit((1ULL << 30) * MACHPHYS_MBYTES,
369 "Only the first %u GB of the physical memory map "
370 "can be accessed by Xen in 32-bit mode.");
371 #endif
373 reserve_dmi_region();
374 }
376 /* Reserve RAM area (@s,@e) in the specified e820 map. */
377 int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e)
378 {
379 uint64_t rs = 0, re = 0;
380 int i;
382 for ( i = 0; i < e820->nr_map; i++ )
383 {
384 /* Have we found the e820 region that includes the specified range? */
385 rs = e820->map[i].addr;
386 re = rs + e820->map[i].size;
387 if ( (s >= rs) && (e <= re) )
388 break;
389 }
391 if ( (i == e820->nr_map) || (e820->map[i].type != E820_RAM) )
392 return 0;
394 if ( (s == rs) && (e == re) )
395 {
396 /* Complete excision. */
397 memmove(&e820->map[i], &e820->map[i+1],
398 (e820->nr_map-i-1) * sizeof(e820->map[0]));
399 e820->nr_map--;
400 }
401 else if ( s == rs )
402 {
403 /* Truncate start. */
404 e820->map[i].addr += e - s;
405 e820->map[i].size -= e - s;
406 }
407 else if ( e == re )
408 {
409 /* Truncate end. */
410 e820->map[i].size -= e - s;
411 }
412 else if ( e820->nr_map < ARRAY_SIZE(e820->map) )
413 {
414 /* Split in two. */
415 memmove(&e820->map[i+1], &e820->map[i],
416 (e820->nr_map-i) * sizeof(e820->map[0]));
417 e820->nr_map++;
418 e820->map[i].size = s - rs;
419 i++;
420 e820->map[i].addr = e;
421 e820->map[i].size = re - e;
422 }
423 else
424 {
425 /* e820map is at maximum size. We have to leak some space. */
426 if ( (s - rs) > (re - e) )
427 {
428 printk("e820 overflow: leaking RAM %"PRIx64"-%"PRIx64"\n", e, re);
429 e820->map[i].size = s - rs;
430 }
431 else
432 {
433 printk("e820 overflow: leaking RAM %"PRIx64"-%"PRIx64"\n", rs, s);
434 e820->map[i].addr = e;
435 e820->map[i].size = re - e;
436 }
437 }
439 return 1;
440 }
442 unsigned long __init init_e820(
443 const char *str, struct e820entry *raw, int *raw_nr)
444 {
445 machine_specific_memory_setup(raw, raw_nr);
446 printk("%s RAM map:\n", str);
447 print_e820_memory_map(e820.map, e820.nr_map);
448 return find_max_pfn();
449 }