debuggers.hg

view linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c @ 6680:d0a4f770a5f4

phys_to_mach and mach_to_phys tables contain long entries, not
32-bit entries.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Sep 06 18:01:24 2005 +0000 (2005-09-06)
parents d105692072a4
children 8db9c5873b9b
line source
1 /*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
22 /*
23 * This file handles the architecture-dependent parts of initialization
24 */
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/nodemask.h>
44 #include <linux/kernel.h>
45 #include <linux/percpu.h>
46 #include <linux/notifier.h>
47 #include <video/edid.h>
48 #include <asm/e820.h>
49 #include <asm/mpspec.h>
50 #include <asm/setup.h>
51 #include <asm/arch_hooks.h>
52 #include <asm/sections.h>
53 #include <asm/io_apic.h>
54 #include <asm/ist.h>
55 #include <asm/io.h>
56 #include <asm-xen/hypervisor.h>
57 #include <asm-xen/xen-public/physdev.h>
58 #include <asm-xen/xen-public/memory.h>
59 #include "setup_arch_pre.h"
60 #include <bios_ebda.h>
62 /* Allows setting of maximum possible memory size */
63 static unsigned long xen_override_max_pfn;
65 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
66 static struct notifier_block xen_panic_block = {
67 xen_panic_event, NULL, 0 /* try to go last */
68 };
70 int disable_pse __initdata = 0;
72 /*
73 * Machine setup..
74 */
76 #ifdef CONFIG_EFI
77 int efi_enabled = 0;
78 EXPORT_SYMBOL(efi_enabled);
79 #endif
81 /* cpu data as detected by the assembly code in head.S */
82 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
83 /* common cpu data for all cpus */
84 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
86 unsigned long mmu_cr4_features;
88 #ifdef CONFIG_ACPI_INTERPRETER
89 int acpi_disabled = 0;
90 #else
91 int acpi_disabled = 1;
92 #endif
93 EXPORT_SYMBOL(acpi_disabled);
95 #ifdef CONFIG_ACPI_BOOT
96 int __initdata acpi_force = 0;
97 extern acpi_interrupt_flags acpi_sci_flags;
98 #endif
100 /* for MCA, but anyone else can use it if they want */
101 unsigned int machine_id;
102 unsigned int machine_submodel_id;
103 unsigned int BIOS_revision;
104 unsigned int mca_pentium_flag;
106 /* For PCI or other memory-mapped resources */
107 unsigned long pci_mem_start = 0x10000000;
109 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
110 int bootloader_type;
112 /* user-defined highmem size */
113 static unsigned int highmem_pages = -1;
115 /*
116 * Setup options
117 */
118 struct drive_info_struct { char dummy[32]; } drive_info;
119 struct screen_info screen_info;
120 struct apm_info apm_info;
121 struct sys_desc_table_struct {
122 unsigned short length;
123 unsigned char table[0];
124 };
125 struct edid_info edid_info;
126 struct ist_info ist_info;
127 struct e820map e820;
129 extern void early_cpu_init(void);
130 extern void dmi_scan_machine(void);
131 extern void generic_apic_probe(char *);
132 extern int root_mountflags;
134 unsigned long saved_videomode;
136 #define RAMDISK_IMAGE_START_MASK 0x07FF
137 #define RAMDISK_PROMPT_FLAG 0x8000
138 #define RAMDISK_LOAD_FLAG 0x4000
140 static char command_line[COMMAND_LINE_SIZE];
142 unsigned char __initdata boot_params[PARAM_SIZE];
144 static struct resource data_resource = {
145 .name = "Kernel data",
146 .start = 0,
147 .end = 0,
148 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
149 };
151 static struct resource code_resource = {
152 .name = "Kernel code",
153 .start = 0,
154 .end = 0,
155 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
156 };
158 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
159 static struct resource system_rom_resource = {
160 .name = "System ROM",
161 .start = 0xf0000,
162 .end = 0xfffff,
163 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
164 };
166 static struct resource extension_rom_resource = {
167 .name = "Extension ROM",
168 .start = 0xe0000,
169 .end = 0xeffff,
170 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
171 };
173 static struct resource adapter_rom_resources[] = { {
174 .name = "Adapter ROM",
175 .start = 0xc8000,
176 .end = 0,
177 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
178 }, {
179 .name = "Adapter ROM",
180 .start = 0,
181 .end = 0,
182 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183 }, {
184 .name = "Adapter ROM",
185 .start = 0,
186 .end = 0,
187 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
188 }, {
189 .name = "Adapter ROM",
190 .start = 0,
191 .end = 0,
192 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
193 }, {
194 .name = "Adapter ROM",
195 .start = 0,
196 .end = 0,
197 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
198 }, {
199 .name = "Adapter ROM",
200 .start = 0,
201 .end = 0,
202 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203 } };
205 #define ADAPTER_ROM_RESOURCES \
206 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
208 static struct resource video_rom_resource = {
209 .name = "Video ROM",
210 .start = 0xc0000,
211 .end = 0xc7fff,
212 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
213 };
214 #endif
216 static struct resource video_ram_resource = {
217 .name = "Video RAM area",
218 .start = 0xa0000,
219 .end = 0xbffff,
220 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
221 };
223 static struct resource standard_io_resources[] = { {
224 .name = "dma1",
225 .start = 0x0000,
226 .end = 0x001f,
227 .flags = IORESOURCE_BUSY | IORESOURCE_IO
228 }, {
229 .name = "pic1",
230 .start = 0x0020,
231 .end = 0x0021,
232 .flags = IORESOURCE_BUSY | IORESOURCE_IO
233 }, {
234 .name = "timer0",
235 .start = 0x0040,
236 .end = 0x0043,
237 .flags = IORESOURCE_BUSY | IORESOURCE_IO
238 }, {
239 .name = "timer1",
240 .start = 0x0050,
241 .end = 0x0053,
242 .flags = IORESOURCE_BUSY | IORESOURCE_IO
243 }, {
244 .name = "keyboard",
245 .start = 0x0060,
246 .end = 0x006f,
247 .flags = IORESOURCE_BUSY | IORESOURCE_IO
248 }, {
249 .name = "dma page reg",
250 .start = 0x0080,
251 .end = 0x008f,
252 .flags = IORESOURCE_BUSY | IORESOURCE_IO
253 }, {
254 .name = "pic2",
255 .start = 0x00a0,
256 .end = 0x00a1,
257 .flags = IORESOURCE_BUSY | IORESOURCE_IO
258 }, {
259 .name = "dma2",
260 .start = 0x00c0,
261 .end = 0x00df,
262 .flags = IORESOURCE_BUSY | IORESOURCE_IO
263 }, {
264 .name = "fpu",
265 .start = 0x00f0,
266 .end = 0x00ff,
267 .flags = IORESOURCE_BUSY | IORESOURCE_IO
268 } };
270 #define STANDARD_IO_RESOURCES \
271 (sizeof standard_io_resources / sizeof standard_io_resources[0])
273 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
274 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
276 static int __init romchecksum(unsigned char *rom, unsigned long length)
277 {
278 unsigned char *p, sum = 0;
280 for (p = rom; p < rom + length; p++)
281 sum += *p;
282 return sum == 0;
283 }
285 static void __init probe_roms(void)
286 {
287 unsigned long start, length, upper;
288 unsigned char *rom;
289 int i;
291 /* Nothing to do if not running in dom0. */
292 if (!(xen_start_info->flags & SIF_INITDOMAIN))
293 return;
295 /* video rom */
296 upper = adapter_rom_resources[0].start;
297 for (start = video_rom_resource.start; start < upper; start += 2048) {
298 rom = isa_bus_to_virt(start);
299 if (!romsignature(rom))
300 continue;
302 video_rom_resource.start = start;
304 /* 0 < length <= 0x7f * 512, historically */
305 length = rom[2] * 512;
307 /* if checksum okay, trust length byte */
308 if (length && romchecksum(rom, length))
309 video_rom_resource.end = start + length - 1;
311 request_resource(&iomem_resource, &video_rom_resource);
312 break;
313 }
315 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
316 if (start < upper)
317 start = upper;
319 /* system rom */
320 request_resource(&iomem_resource, &system_rom_resource);
321 upper = system_rom_resource.start;
323 /* check for extension rom (ignore length byte!) */
324 rom = isa_bus_to_virt(extension_rom_resource.start);
325 if (romsignature(rom)) {
326 length = extension_rom_resource.end - extension_rom_resource.start + 1;
327 if (romchecksum(rom, length)) {
328 request_resource(&iomem_resource, &extension_rom_resource);
329 upper = extension_rom_resource.start;
330 }
331 }
333 /* check for adapter roms on 2k boundaries */
334 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
335 rom = isa_bus_to_virt(start);
336 if (!romsignature(rom))
337 continue;
339 /* 0 < length <= 0x7f * 512, historically */
340 length = rom[2] * 512;
342 /* but accept any length that fits if checksum okay */
343 if (!length || start + length > upper || !romchecksum(rom, length))
344 continue;
346 adapter_rom_resources[i].start = start;
347 adapter_rom_resources[i].end = start + length - 1;
348 request_resource(&iomem_resource, &adapter_rom_resources[i]);
350 start = adapter_rom_resources[i++].end & ~2047UL;
351 }
352 }
353 #endif
355 /*
356 * Point at the empty zero page to start with. We map the real shared_info
357 * page as soon as fixmap is up and running.
358 */
359 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
360 EXPORT_SYMBOL(HYPERVISOR_shared_info);
362 unsigned long *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
363 EXPORT_SYMBOL(phys_to_machine_mapping);
365 /* Raw start-of-day parameters from the hypervisor. */
366 start_info_t *xen_start_info;
368 static void __init limit_regions(unsigned long long size)
369 {
370 unsigned long long current_addr = 0;
371 int i;
373 if (efi_enabled) {
374 for (i = 0; i < memmap.nr_map; i++) {
375 current_addr = memmap.map[i].phys_addr +
376 (memmap.map[i].num_pages << 12);
377 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
378 if (current_addr >= size) {
379 memmap.map[i].num_pages -=
380 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
381 memmap.nr_map = i + 1;
382 return;
383 }
384 }
385 }
386 }
387 for (i = 0; i < e820.nr_map; i++) {
388 if (e820.map[i].type == E820_RAM) {
389 current_addr = e820.map[i].addr + e820.map[i].size;
390 if (current_addr >= size) {
391 e820.map[i].size -= current_addr-size;
392 e820.nr_map = i + 1;
393 return;
394 }
395 }
396 }
397 }
399 static void __init add_memory_region(unsigned long long start,
400 unsigned long long size, int type)
401 {
402 int x;
404 if (!efi_enabled) {
405 x = e820.nr_map;
407 if (x == E820MAX) {
408 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
409 return;
410 }
412 e820.map[x].addr = start;
413 e820.map[x].size = size;
414 e820.map[x].type = type;
415 e820.nr_map++;
416 }
417 } /* add_memory_region */
419 #define E820_DEBUG 1
421 static void __init print_memory_map(char *who)
422 {
423 int i;
425 for (i = 0; i < e820.nr_map; i++) {
426 printk(" %s: %016Lx - %016Lx ", who,
427 e820.map[i].addr,
428 e820.map[i].addr + e820.map[i].size);
429 switch (e820.map[i].type) {
430 case E820_RAM: printk("(usable)\n");
431 break;
432 case E820_RESERVED:
433 printk("(reserved)\n");
434 break;
435 case E820_ACPI:
436 printk("(ACPI data)\n");
437 break;
438 case E820_NVS:
439 printk("(ACPI NVS)\n");
440 break;
441 default: printk("type %lu\n", e820.map[i].type);
442 break;
443 }
444 }
445 }
447 #if 0
448 /*
449 * Sanitize the BIOS e820 map.
450 *
451 * Some e820 responses include overlapping entries. The following
452 * replaces the original e820 map with a new one, removing overlaps.
453 *
454 */
455 struct change_member {
456 struct e820entry *pbios; /* pointer to original bios entry */
457 unsigned long long addr; /* address for this change point */
458 };
459 static struct change_member change_point_list[2*E820MAX] __initdata;
460 static struct change_member *change_point[2*E820MAX] __initdata;
461 static struct e820entry *overlap_list[E820MAX] __initdata;
462 static struct e820entry new_bios[E820MAX] __initdata;
464 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
465 {
466 struct change_member *change_tmp;
467 unsigned long current_type, last_type;
468 unsigned long long last_addr;
469 int chgidx, still_changing;
470 int overlap_entries;
471 int new_bios_entry;
472 int old_nr, new_nr, chg_nr;
473 int i;
475 /*
476 Visually we're performing the following (1,2,3,4 = memory types)...
478 Sample memory map (w/overlaps):
479 ____22__________________
480 ______________________4_
481 ____1111________________
482 _44_____________________
483 11111111________________
484 ____________________33__
485 ___________44___________
486 __________33333_________
487 ______________22________
488 ___________________2222_
489 _________111111111______
490 _____________________11_
491 _________________4______
493 Sanitized equivalent (no overlap):
494 1_______________________
495 _44_____________________
496 ___1____________________
497 ____22__________________
498 ______11________________
499 _________1______________
500 __________3_____________
501 ___________44___________
502 _____________33_________
503 _______________2________
504 ________________1_______
505 _________________4______
506 ___________________2____
507 ____________________33__
508 ______________________4_
509 */
511 /* if there's only one memory region, don't bother */
512 if (*pnr_map < 2)
513 return -1;
515 old_nr = *pnr_map;
517 /* bail out if we find any unreasonable addresses in bios map */
518 for (i=0; i<old_nr; i++)
519 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
520 return -1;
522 /* create pointers for initial change-point information (for sorting) */
523 for (i=0; i < 2*old_nr; i++)
524 change_point[i] = &change_point_list[i];
526 /* record all known change-points (starting and ending addresses),
527 omitting those that are for empty memory regions */
528 chgidx = 0;
529 for (i=0; i < old_nr; i++) {
530 if (biosmap[i].size != 0) {
531 change_point[chgidx]->addr = biosmap[i].addr;
532 change_point[chgidx++]->pbios = &biosmap[i];
533 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
534 change_point[chgidx++]->pbios = &biosmap[i];
535 }
536 }
537 chg_nr = chgidx; /* true number of change-points */
539 /* sort change-point list by memory addresses (low -> high) */
540 still_changing = 1;
541 while (still_changing) {
542 still_changing = 0;
543 for (i=1; i < chg_nr; i++) {
544 /* if <current_addr> > <last_addr>, swap */
545 /* or, if current=<start_addr> & last=<end_addr>, swap */
546 if ((change_point[i]->addr < change_point[i-1]->addr) ||
547 ((change_point[i]->addr == change_point[i-1]->addr) &&
548 (change_point[i]->addr == change_point[i]->pbios->addr) &&
549 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
550 )
551 {
552 change_tmp = change_point[i];
553 change_point[i] = change_point[i-1];
554 change_point[i-1] = change_tmp;
555 still_changing=1;
556 }
557 }
558 }
560 /* create a new bios memory map, removing overlaps */
561 overlap_entries=0; /* number of entries in the overlap table */
562 new_bios_entry=0; /* index for creating new bios map entries */
563 last_type = 0; /* start with undefined memory type */
564 last_addr = 0; /* start with 0 as last starting address */
565 /* loop through change-points, determining affect on the new bios map */
566 for (chgidx=0; chgidx < chg_nr; chgidx++)
567 {
568 /* keep track of all overlapping bios entries */
569 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
570 {
571 /* add map entry to overlap list (> 1 entry implies an overlap) */
572 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
573 }
574 else
575 {
576 /* remove entry from list (order independent, so swap with last) */
577 for (i=0; i<overlap_entries; i++)
578 {
579 if (overlap_list[i] == change_point[chgidx]->pbios)
580 overlap_list[i] = overlap_list[overlap_entries-1];
581 }
582 overlap_entries--;
583 }
584 /* if there are overlapping entries, decide which "type" to use */
585 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
586 current_type = 0;
587 for (i=0; i<overlap_entries; i++)
588 if (overlap_list[i]->type > current_type)
589 current_type = overlap_list[i]->type;
590 /* continue building up new bios map based on this information */
591 if (current_type != last_type) {
592 if (last_type != 0) {
593 new_bios[new_bios_entry].size =
594 change_point[chgidx]->addr - last_addr;
595 /* move forward only if the new size was non-zero */
596 if (new_bios[new_bios_entry].size != 0)
597 if (++new_bios_entry >= E820MAX)
598 break; /* no more space left for new bios entries */
599 }
600 if (current_type != 0) {
601 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
602 new_bios[new_bios_entry].type = current_type;
603 last_addr=change_point[chgidx]->addr;
604 }
605 last_type = current_type;
606 }
607 }
608 new_nr = new_bios_entry; /* retain count for new bios entries */
610 /* copy new bios mapping into original location */
611 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
612 *pnr_map = new_nr;
614 return 0;
615 }
617 /*
618 * Copy the BIOS e820 map into a safe place.
619 *
620 * Sanity-check it while we're at it..
621 *
622 * If we're lucky and live on a modern system, the setup code
623 * will have given us a memory map that we can use to properly
624 * set up memory. If we aren't, we'll fake a memory map.
625 *
626 * We check to see that the memory map contains at least 2 elements
627 * before we'll use it, because the detection code in setup.S may
628 * not be perfect and most every PC known to man has two memory
629 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
630 * thinkpad 560x, for example, does not cooperate with the memory
631 * detection code.)
632 */
633 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
634 {
635 /* Only one memory region (or negative)? Ignore it */
636 if (nr_map < 2)
637 return -1;
639 do {
640 unsigned long long start = biosmap->addr;
641 unsigned long long size = biosmap->size;
642 unsigned long long end = start + size;
643 unsigned long type = biosmap->type;
645 /* Overflow in 64 bits? Ignore the memory map. */
646 if (start > end)
647 return -1;
649 /*
650 * Some BIOSes claim RAM in the 640k - 1M region.
651 * Not right. Fix it up.
652 */
653 if (type == E820_RAM) {
654 if (start < 0x100000ULL && end > 0xA0000ULL) {
655 if (start < 0xA0000ULL)
656 add_memory_region(start, 0xA0000ULL-start, type);
657 if (end <= 0x100000ULL)
658 continue;
659 start = 0x100000ULL;
660 size = end - start;
661 }
662 }
663 add_memory_region(start, size, type);
664 } while (biosmap++,--nr_map);
665 return 0;
666 }
667 #endif
669 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
670 struct edd edd;
671 #ifdef CONFIG_EDD_MODULE
672 EXPORT_SYMBOL(edd);
673 #endif
674 /**
675 * copy_edd() - Copy the BIOS EDD information
676 * from boot_params into a safe place.
677 *
678 */
679 static inline void copy_edd(void)
680 {
681 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
682 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
683 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
684 edd.edd_info_nr = EDD_NR;
685 }
686 #else
687 static inline void copy_edd(void)
688 {
689 }
690 #endif
692 /*
693 * Do NOT EVER look at the BIOS memory size location.
694 * It does not work on many machines.
695 */
696 #define LOWMEMSIZE() (0x9f000)
698 static void __init parse_cmdline_early (char ** cmdline_p)
699 {
700 char c = ' ', *to = command_line, *from = saved_command_line;
701 int len = 0, max_cmdline;
702 int userdef = 0;
704 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
705 max_cmdline = COMMAND_LINE_SIZE;
706 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
707 /* Save unparsed command line copy for /proc/cmdline */
708 saved_command_line[max_cmdline-1] = '\0';
710 for (;;) {
711 if (c != ' ')
712 goto next_char;
713 /*
714 * "mem=nopentium" disables the 4MB page tables.
715 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
716 * to <mem>, overriding the bios size.
717 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
718 * <start> to <start>+<mem>, overriding the bios size.
719 *
720 * HPA tells me bootloaders need to parse mem=, so no new
721 * option should be mem= [also see Documentation/i386/boot.txt]
722 */
723 if (!memcmp(from, "mem=", 4)) {
724 if (to != command_line)
725 to--;
726 if (!memcmp(from+4, "nopentium", 9)) {
727 from += 9+4;
728 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
729 disable_pse = 1;
730 } else {
731 /* If the user specifies memory size, we
732 * limit the BIOS-provided memory map to
733 * that size. exactmap can be used to specify
734 * the exact map. mem=number can be used to
735 * trim the existing memory map.
736 */
737 unsigned long long mem_size;
739 mem_size = memparse(from+4, &from);
740 #if 0
741 limit_regions(mem_size);
742 userdef=1;
743 #else
744 xen_override_max_pfn =
745 (unsigned long)(mem_size>>PAGE_SHIFT);
746 #endif
747 }
748 }
750 else if (!memcmp(from, "memmap=", 7)) {
751 if (to != command_line)
752 to--;
753 if (!memcmp(from+7, "exactmap", 8)) {
754 from += 8+7;
755 e820.nr_map = 0;
756 userdef = 1;
757 } else {
758 /* If the user specifies memory size, we
759 * limit the BIOS-provided memory map to
760 * that size. exactmap can be used to specify
761 * the exact map. mem=number can be used to
762 * trim the existing memory map.
763 */
764 unsigned long long start_at, mem_size;
766 mem_size = memparse(from+7, &from);
767 if (*from == '@') {
768 start_at = memparse(from+1, &from);
769 add_memory_region(start_at, mem_size, E820_RAM);
770 } else if (*from == '#') {
771 start_at = memparse(from+1, &from);
772 add_memory_region(start_at, mem_size, E820_ACPI);
773 } else if (*from == '$') {
774 start_at = memparse(from+1, &from);
775 add_memory_region(start_at, mem_size, E820_RESERVED);
776 } else {
777 limit_regions(mem_size);
778 userdef=1;
779 }
780 }
781 }
783 else if (!memcmp(from, "noexec=", 7))
784 noexec_setup(from + 7);
787 #ifdef CONFIG_X86_MPPARSE
788 /*
789 * If the BIOS enumerates physical processors before logical,
790 * maxcpus=N at enumeration-time can be used to disable HT.
791 */
792 else if (!memcmp(from, "maxcpus=", 8)) {
793 extern unsigned int maxcpus;
795 maxcpus = simple_strtoul(from + 8, NULL, 0);
796 }
797 #endif
799 #ifdef CONFIG_ACPI_BOOT
800 /* "acpi=off" disables both ACPI table parsing and interpreter */
801 else if (!memcmp(from, "acpi=off", 8)) {
802 disable_acpi();
803 }
805 /* acpi=force to over-ride black-list */
806 else if (!memcmp(from, "acpi=force", 10)) {
807 acpi_force = 1;
808 acpi_ht = 1;
809 acpi_disabled = 0;
810 }
812 /* acpi=strict disables out-of-spec workarounds */
813 else if (!memcmp(from, "acpi=strict", 11)) {
814 acpi_strict = 1;
815 }
817 /* Limit ACPI just to boot-time to enable HT */
818 else if (!memcmp(from, "acpi=ht", 7)) {
819 if (!acpi_force)
820 disable_acpi();
821 acpi_ht = 1;
822 }
824 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
825 else if (!memcmp(from, "pci=noacpi", 10)) {
826 acpi_disable_pci();
827 }
828 /* "acpi=noirq" disables ACPI interrupt routing */
829 else if (!memcmp(from, "acpi=noirq", 10)) {
830 acpi_noirq_set();
831 }
833 else if (!memcmp(from, "acpi_sci=edge", 13))
834 acpi_sci_flags.trigger = 1;
836 else if (!memcmp(from, "acpi_sci=level", 14))
837 acpi_sci_flags.trigger = 3;
839 else if (!memcmp(from, "acpi_sci=high", 13))
840 acpi_sci_flags.polarity = 1;
842 else if (!memcmp(from, "acpi_sci=low", 12))
843 acpi_sci_flags.polarity = 3;
845 #ifdef CONFIG_X86_IO_APIC
846 else if (!memcmp(from, "acpi_skip_timer_override", 24))
847 acpi_skip_timer_override = 1;
848 #endif
850 #ifdef CONFIG_X86_LOCAL_APIC
851 /* disable IO-APIC */
852 else if (!memcmp(from, "noapic", 6))
853 disable_ioapic_setup();
854 #endif /* CONFIG_X86_LOCAL_APIC */
855 #endif /* CONFIG_ACPI_BOOT */
857 /*
858 * highmem=size forces highmem to be exactly 'size' bytes.
859 * This works even on boxes that have no highmem otherwise.
860 * This also works to reduce highmem size on bigger boxes.
861 */
862 else if (!memcmp(from, "highmem=", 8))
863 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
865 /*
866 * vmalloc=size forces the vmalloc area to be exactly 'size'
867 * bytes. This can be used to increase (or decrease) the
868 * vmalloc area - the default is 128m.
869 */
870 else if (!memcmp(from, "vmalloc=", 8))
871 __VMALLOC_RESERVE = memparse(from+8, &from);
873 next_char:
874 c = *(from++);
875 if (!c)
876 break;
877 if (COMMAND_LINE_SIZE <= ++len)
878 break;
879 *(to++) = c;
880 }
881 *to = '\0';
882 *cmdline_p = command_line;
883 if (userdef) {
884 printk(KERN_INFO "user-defined physical RAM map:\n");
885 print_memory_map("user");
886 }
887 }
889 #if 0 /* !XEN */
890 /*
891 * Callback for efi_memory_walk.
892 */
893 static int __init
894 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
895 {
896 unsigned long *max_pfn = arg, pfn;
898 if (start < end) {
899 pfn = PFN_UP(end -1);
900 if (pfn > *max_pfn)
901 *max_pfn = pfn;
902 }
903 return 0;
904 }
907 /*
908 * Find the highest page frame number we have available
909 */
910 void __init find_max_pfn(void)
911 {
912 int i;
914 max_pfn = 0;
915 if (efi_enabled) {
916 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
917 return;
918 }
920 for (i = 0; i < e820.nr_map; i++) {
921 unsigned long start, end;
922 /* RAM? */
923 if (e820.map[i].type != E820_RAM)
924 continue;
925 start = PFN_UP(e820.map[i].addr);
926 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
927 if (start >= end)
928 continue;
929 if (end > max_pfn)
930 max_pfn = end;
931 }
932 }
933 #else
934 /* We don't use the fake e820 because we need to respond to user override. */
935 void __init find_max_pfn(void)
936 {
937 if ( xen_override_max_pfn < xen_start_info->nr_pages )
938 xen_override_max_pfn = xen_start_info->nr_pages;
939 max_pfn = xen_override_max_pfn;
940 }
941 #endif /* XEN */
943 /*
944 * Determine low and high memory ranges:
945 */
946 unsigned long __init find_max_low_pfn(void)
947 {
948 unsigned long max_low_pfn;
950 max_low_pfn = max_pfn;
951 if (max_low_pfn > MAXMEM_PFN) {
952 if (highmem_pages == -1)
953 highmem_pages = max_pfn - MAXMEM_PFN;
954 if (highmem_pages + MAXMEM_PFN < max_pfn)
955 max_pfn = MAXMEM_PFN + highmem_pages;
956 if (highmem_pages + MAXMEM_PFN > max_pfn) {
957 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
958 highmem_pages = 0;
959 }
960 max_low_pfn = MAXMEM_PFN;
961 #ifndef CONFIG_HIGHMEM
962 /* Maximum memory usable is what is directly addressable */
963 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
964 MAXMEM>>20);
965 if (max_pfn > MAX_NONPAE_PFN)
966 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
967 else
968 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
969 max_pfn = MAXMEM_PFN;
970 #else /* !CONFIG_HIGHMEM */
971 #ifndef CONFIG_X86_PAE
972 if (max_pfn > MAX_NONPAE_PFN) {
973 max_pfn = MAX_NONPAE_PFN;
974 printk(KERN_WARNING "Warning only 4GB will be used.\n");
975 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
976 }
977 #endif /* !CONFIG_X86_PAE */
978 #endif /* !CONFIG_HIGHMEM */
979 } else {
980 if (highmem_pages == -1)
981 highmem_pages = 0;
982 #ifdef CONFIG_HIGHMEM
983 if (highmem_pages >= max_pfn) {
984 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
985 highmem_pages = 0;
986 }
987 if (highmem_pages) {
988 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
989 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
990 highmem_pages = 0;
991 }
992 max_low_pfn -= highmem_pages;
993 }
994 #else
995 if (highmem_pages)
996 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
997 #endif
998 }
999 return max_low_pfn;
1002 /*
1003 * Free all available memory for boot time allocation. Used
1004 * as a callback function by efi_memory_walk()
1005 */
1007 static int __init
1008 free_available_memory(unsigned long start, unsigned long end, void *arg)
1010 /* check max_low_pfn */
1011 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1012 return 0;
1013 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1014 end = (max_low_pfn + 1) << PAGE_SHIFT;
1015 if (start < end)
1016 free_bootmem(start, end - start);
1018 return 0;
1020 /*
1021 * Register fully available low RAM pages with the bootmem allocator.
1022 */
1023 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1025 int i;
1027 if (efi_enabled) {
1028 efi_memmap_walk(free_available_memory, NULL);
1029 return;
1031 for (i = 0; i < e820.nr_map; i++) {
1032 unsigned long curr_pfn, last_pfn, size;
1033 /*
1034 * Reserve usable low memory
1035 */
1036 if (e820.map[i].type != E820_RAM)
1037 continue;
1038 /*
1039 * We are rounding up the start address of usable memory:
1040 */
1041 curr_pfn = PFN_UP(e820.map[i].addr);
1042 if (curr_pfn >= max_low_pfn)
1043 continue;
1044 /*
1045 * ... and at the end of the usable range downwards:
1046 */
1047 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1049 if (last_pfn > max_low_pfn)
1050 last_pfn = max_low_pfn;
1052 /*
1053 * .. finally, did all the rounding and playing
1054 * around just make the area go away?
1055 */
1056 if (last_pfn <= curr_pfn)
1057 continue;
1059 size = last_pfn - curr_pfn;
1060 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1064 #ifndef CONFIG_XEN
1065 /*
1066 * workaround for Dell systems that neglect to reserve EBDA
1067 */
1068 static void __init reserve_ebda_region(void)
1070 unsigned int addr;
1071 addr = get_bios_ebda();
1072 if (addr)
1073 reserve_bootmem(addr, PAGE_SIZE);
1075 #endif
1077 #ifndef CONFIG_DISCONTIGMEM
1078 void __init setup_bootmem_allocator(void);
1079 static unsigned long __init setup_memory(void)
1081 /*
1082 * partially used pages are not usable - thus
1083 * we are rounding upwards:
1084 */
1085 min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1086 xen_start_info->nr_pt_frames;
1088 find_max_pfn();
1090 max_low_pfn = find_max_low_pfn();
1092 #ifdef CONFIG_HIGHMEM
1093 highstart_pfn = highend_pfn = max_pfn;
1094 if (max_pfn > max_low_pfn) {
1095 highstart_pfn = max_low_pfn;
1097 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1098 pages_to_mb(highend_pfn - highstart_pfn));
1099 #endif
1100 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1101 pages_to_mb(max_low_pfn));
1103 setup_bootmem_allocator();
1105 return max_low_pfn;
1108 void __init zone_sizes_init(void)
1110 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1111 unsigned int max_dma, low;
1113 /*
1114 * XEN: Our notion of "DMA memory" is fake when running over Xen.
1115 * We simply put all RAM in the DMA zone so that those drivers which
1116 * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
1117 * Those drivers that *do* require lowmem are screwed anyway when
1118 * running over Xen!
1119 */
1120 max_dma = max_low_pfn;
1121 low = max_low_pfn;
1123 if (low < max_dma)
1124 zones_size[ZONE_DMA] = low;
1125 else {
1126 zones_size[ZONE_DMA] = max_dma;
1127 zones_size[ZONE_NORMAL] = low - max_dma;
1128 #ifdef CONFIG_HIGHMEM
1129 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1130 #endif
1132 free_area_init(zones_size);
1134 #else
1135 extern unsigned long setup_memory(void);
1136 extern void zone_sizes_init(void);
1137 #endif /* !CONFIG_DISCONTIGMEM */
1139 void __init setup_bootmem_allocator(void)
1141 unsigned long bootmap_size;
1142 /*
1143 * Initialize the boot-time allocator (with low memory only):
1144 */
1145 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1147 register_bootmem_low_pages(max_low_pfn);
1149 /*
1150 * Reserve the bootmem bitmap itself as well. We do this in two
1151 * steps (first step was init_bootmem()) because this catches
1152 * the (very unlikely) case of us accidentally initializing the
1153 * bootmem allocator with an invalid RAM area.
1154 */
1155 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1156 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1158 #ifndef CONFIG_XEN
1159 /*
1160 * reserve physical page 0 - it's a special BIOS page on many boxes,
1161 * enabling clean reboots, SMP operation, laptop functions.
1162 */
1163 reserve_bootmem(0, PAGE_SIZE);
1165 /* reserve EBDA region, it's a 4K region */
1166 reserve_ebda_region();
1168 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1169 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1170 unless you have no PS/2 mouse plugged in. */
1171 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1172 boot_cpu_data.x86 == 6)
1173 reserve_bootmem(0xa0000 - 4096, 4096);
1175 #ifdef CONFIG_SMP
1176 /*
1177 * But first pinch a few for the stack/trampoline stuff
1178 * FIXME: Don't need the extra page at 4K, but need to fix
1179 * trampoline before removing it. (see the GDT stuff)
1180 */
1181 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1182 #endif
1183 #ifdef CONFIG_ACPI_SLEEP
1184 /*
1185 * Reserve low memory region for sleep support.
1186 */
1187 acpi_reserve_bootmem();
1188 #endif
1189 #endif /* !CONFIG_XEN */
1191 #ifdef CONFIG_BLK_DEV_INITRD
1192 if (xen_start_info->mod_start) {
1193 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1194 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1195 initrd_start = INITRD_START + PAGE_OFFSET;
1196 initrd_end = initrd_start+INITRD_SIZE;
1197 initrd_below_start_ok = 1;
1199 else {
1200 printk(KERN_ERR "initrd extends beyond end of memory "
1201 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1202 INITRD_START + INITRD_SIZE,
1203 max_low_pfn << PAGE_SHIFT);
1204 initrd_start = 0;
1207 #endif
1209 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
1212 /*
1213 * The node 0 pgdat is initialized before all of these because
1214 * it's needed for bootmem. node>0 pgdats have their virtual
1215 * space allocated before the pagetables are in place to access
1216 * them, so they can't be cleared then.
1218 * This should all compile down to nothing when NUMA is off.
1219 */
1220 void __init remapped_pgdat_init(void)
1222 int nid;
1224 for_each_online_node(nid) {
1225 if (nid != 0)
1226 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1230 /*
1231 * Request address space for all standard RAM and ROM resources
1232 * and also for regions reported as reserved by the e820.
1233 */
1234 static void __init
1235 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1237 int i;
1238 #ifdef CONFIG_XEN
1239 dom0_op_t op;
1240 struct dom0_memory_map_entry *map;
1241 unsigned long gapstart, gapsize;
1242 unsigned long long last;
1243 #endif
1245 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1246 probe_roms();
1247 #endif
1249 #ifdef CONFIG_XEN
1250 map = alloc_bootmem_low_pages(PAGE_SIZE);
1251 op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
1252 op.u.physical_memory_map.memory_map = map;
1253 op.u.physical_memory_map.max_map_entries =
1254 PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
1255 BUG_ON(HYPERVISOR_dom0_op(&op));
1257 last = 0x100000000ULL;
1258 gapstart = 0x10000000;
1259 gapsize = 0x400000;
1261 for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
1262 struct resource *res;
1264 if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
1265 gapsize = last - map[i].end;
1266 gapstart = map[i].end;
1268 if (map[i].start < last)
1269 last = map[i].start;
1271 if (map[i].end > 0x100000000ULL)
1272 continue;
1273 res = alloc_bootmem_low(sizeof(struct resource));
1274 res->name = map[i].is_ram ? "System RAM" : "reserved";
1275 res->start = map[i].start;
1276 res->end = map[i].end - 1;
1277 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1278 request_resource(&iomem_resource, res);
1281 free_bootmem(__pa(map), PAGE_SIZE);
1283 /*
1284 * Start allocating dynamic PCI memory a bit into the gap,
1285 * aligned up to the nearest megabyte.
1287 * Question: should we try to pad it up a bit (do something
1288 * like " + (gapsize >> 3)" in there too?). We now have the
1289 * technology.
1290 */
1291 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1293 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1294 pci_mem_start, gapstart, gapsize);
1295 #else
1296 for (i = 0; i < e820.nr_map; i++) {
1297 struct resource *res;
1298 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1299 continue;
1300 res = alloc_bootmem_low(sizeof(struct resource));
1301 switch (e820.map[i].type) {
1302 case E820_RAM: res->name = "System RAM"; break;
1303 case E820_ACPI: res->name = "ACPI Tables"; break;
1304 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1305 default: res->name = "reserved";
1307 res->start = e820.map[i].addr;
1308 res->end = res->start + e820.map[i].size - 1;
1309 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1310 request_resource(&iomem_resource, res);
1311 if (e820.map[i].type == E820_RAM) {
1312 /*
1313 * We don't know which RAM region contains kernel data,
1314 * so we try it repeatedly and let the resource manager
1315 * test it.
1316 */
1317 request_resource(res, code_resource);
1318 request_resource(res, data_resource);
1321 #endif
1324 /*
1325 * Request address space for all standard resources
1326 */
1327 static void __init register_memory(void)
1329 #ifndef CONFIG_XEN
1330 unsigned long gapstart, gapsize;
1331 unsigned long long last;
1332 #endif
1333 int i;
1335 /* Nothing to do if not running in dom0. */
1336 if (!(xen_start_info->flags & SIF_INITDOMAIN))
1337 return;
1339 if (efi_enabled)
1340 efi_initialize_iomem_resources(&code_resource, &data_resource);
1341 else
1342 legacy_init_iomem_resources(&code_resource, &data_resource);
1344 /* EFI systems may still have VGA */
1345 request_resource(&iomem_resource, &video_ram_resource);
1347 /* request I/O space for devices used on all i[345]86 PCs */
1348 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1349 request_resource(&ioport_resource, &standard_io_resources[i]);
1351 #ifndef CONFIG_XEN
1352 /*
1353 * Search for the bigest gap in the low 32 bits of the e820
1354 * memory space.
1355 */
1356 last = 0x100000000ull;
1357 gapstart = 0x10000000;
1358 gapsize = 0x400000;
1359 i = e820.nr_map;
1360 while (--i >= 0) {
1361 unsigned long long start = e820.map[i].addr;
1362 unsigned long long end = start + e820.map[i].size;
1364 /*
1365 * Since "last" is at most 4GB, we know we'll
1366 * fit in 32 bits if this condition is true
1367 */
1368 if (last > end) {
1369 unsigned long gap = last - end;
1371 if (gap > gapsize) {
1372 gapsize = gap;
1373 gapstart = end;
1376 if (start < last)
1377 last = start;
1380 /*
1381 * Start allocating dynamic PCI memory a bit into the gap,
1382 * aligned up to the nearest megabyte.
1384 * Question: should we try to pad it up a bit (do something
1385 * like " + (gapsize >> 3)" in there too?). We now have the
1386 * technology.
1387 */
1388 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1390 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1391 pci_mem_start, gapstart, gapsize);
1392 #endif
1395 /* Use inline assembly to define this because the nops are defined
1396 as inline assembly strings in the include files and we cannot
1397 get them easily into strings. */
1398 asm("\t.data\nintelnops: "
1399 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1400 GENERIC_NOP7 GENERIC_NOP8);
1401 asm("\t.data\nk8nops: "
1402 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1403 K8_NOP7 K8_NOP8);
1404 asm("\t.data\nk7nops: "
1405 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1406 K7_NOP7 K7_NOP8);
1408 extern unsigned char intelnops[], k8nops[], k7nops[];
1409 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1410 NULL,
1411 intelnops,
1412 intelnops + 1,
1413 intelnops + 1 + 2,
1414 intelnops + 1 + 2 + 3,
1415 intelnops + 1 + 2 + 3 + 4,
1416 intelnops + 1 + 2 + 3 + 4 + 5,
1417 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1418 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1419 };
1420 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1421 NULL,
1422 k8nops,
1423 k8nops + 1,
1424 k8nops + 1 + 2,
1425 k8nops + 1 + 2 + 3,
1426 k8nops + 1 + 2 + 3 + 4,
1427 k8nops + 1 + 2 + 3 + 4 + 5,
1428 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1429 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1430 };
1431 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1432 NULL,
1433 k7nops,
1434 k7nops + 1,
1435 k7nops + 1 + 2,
1436 k7nops + 1 + 2 + 3,
1437 k7nops + 1 + 2 + 3 + 4,
1438 k7nops + 1 + 2 + 3 + 4 + 5,
1439 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1440 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1441 };
1442 static struct nop {
1443 int cpuid;
1444 unsigned char **noptable;
1445 } noptypes[] = {
1446 { X86_FEATURE_K8, k8_nops },
1447 { X86_FEATURE_K7, k7_nops },
1448 { -1, NULL }
1449 };
1451 /* Replace instructions with better alternatives for this CPU type.
1453 This runs before SMP is initialized to avoid SMP problems with
1454 self modifying code. This implies that assymetric systems where
1455 APs have less capabilities than the boot processor are not handled.
1456 In this case boot with "noreplacement". */
1457 void apply_alternatives(void *start, void *end)
1459 struct alt_instr *a;
1460 int diff, i, k;
1461 unsigned char **noptable = intel_nops;
1462 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1463 if (boot_cpu_has(noptypes[i].cpuid)) {
1464 noptable = noptypes[i].noptable;
1465 break;
1468 for (a = start; (void *)a < end; a++) {
1469 if (!boot_cpu_has(a->cpuid))
1470 continue;
1471 BUG_ON(a->replacementlen > a->instrlen);
1472 memcpy(a->instr, a->replacement, a->replacementlen);
1473 diff = a->instrlen - a->replacementlen;
1474 /* Pad the rest with nops */
1475 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1476 k = diff;
1477 if (k > ASM_NOP_MAX)
1478 k = ASM_NOP_MAX;
1479 memcpy(a->instr + i, noptable[k], k);
1484 static int no_replacement __initdata = 0;
1486 void __init alternative_instructions(void)
1488 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1489 if (no_replacement)
1490 return;
1491 apply_alternatives(__alt_instructions, __alt_instructions_end);
1494 static int __init noreplacement_setup(char *s)
1496 no_replacement = 1;
1497 return 0;
1500 __setup("noreplacement", noreplacement_setup);
1502 static char * __init machine_specific_memory_setup(void);
1504 #ifdef CONFIG_MCA
1505 static void set_mca_bus(int x)
1507 MCA_bus = x;
1509 #else
1510 static void set_mca_bus(int x) { }
1511 #endif
1513 /*
1514 * Determine if we were loaded by an EFI loader. If so, then we have also been
1515 * passed the efi memmap, systab, etc., so we should use these data structures
1516 * for initialization. Note, the efi init code path is determined by the
1517 * global efi_enabled. This allows the same kernel image to be used on existing
1518 * systems (with a traditional BIOS) as well as on EFI systems.
1519 */
1520 void __init setup_arch(char **cmdline_p)
1522 int i, j;
1523 physdev_op_t op;
1524 unsigned long max_low_pfn;
1526 /* Force a quick death if the kernel panics. */
1527 extern int panic_timeout;
1528 if (panic_timeout == 0)
1529 panic_timeout = 1;
1531 /* Register a call for panic conditions. */
1532 notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1534 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1535 HYPERVISOR_vm_assist(VMASST_CMD_enable,
1536 VMASST_TYPE_writable_pagetables);
1538 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1539 early_cpu_init();
1541 /*
1542 * FIXME: This isn't an official loader_type right
1543 * now but does currently work with elilo.
1544 * If we were configured as an EFI kernel, check to make
1545 * sure that we were loaded correctly from elilo and that
1546 * the system table is valid. If not, then initialize normally.
1547 */
1548 #ifdef CONFIG_EFI
1549 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1550 efi_enabled = 1;
1551 #endif
1553 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1554 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1555 */
1556 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1557 drive_info = DRIVE_INFO;
1558 screen_info = SCREEN_INFO;
1559 edid_info = EDID_INFO;
1560 apm_info.bios = APM_BIOS_INFO;
1561 ist_info = IST_INFO;
1562 saved_videomode = VIDEO_MODE;
1563 if( SYS_DESC_TABLE.length != 0 ) {
1564 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1565 machine_id = SYS_DESC_TABLE.table[0];
1566 machine_submodel_id = SYS_DESC_TABLE.table[1];
1567 BIOS_revision = SYS_DESC_TABLE.table[2];
1569 bootloader_type = LOADER_TYPE;
1571 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
1572 /* This is drawn from a dump from vgacon:startup in standard Linux. */
1573 screen_info.orig_video_mode = 3;
1574 screen_info.orig_video_isVGA = 1;
1575 screen_info.orig_video_lines = 25;
1576 screen_info.orig_video_cols = 80;
1577 screen_info.orig_video_ega_bx = 3;
1578 screen_info.orig_video_points = 16;
1579 #endif
1581 #ifdef CONFIG_BLK_DEV_RAM
1582 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1583 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1584 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1585 #endif
1586 ARCH_SETUP
1587 if (efi_enabled)
1588 efi_init();
1589 else {
1590 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1591 print_memory_map(machine_specific_memory_setup());
1594 copy_edd();
1596 if (!MOUNT_ROOT_RDONLY)
1597 root_mountflags &= ~MS_RDONLY;
1598 init_mm.start_code = (unsigned long) _text;
1599 init_mm.end_code = (unsigned long) _etext;
1600 init_mm.end_data = (unsigned long) _edata;
1601 init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1602 xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1604 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
1605 /*code_resource.start = virt_to_phys(_text);*/
1606 /*code_resource.end = virt_to_phys(_etext)-1;*/
1607 /*data_resource.start = virt_to_phys(_etext);*/
1608 /*data_resource.end = virt_to_phys(_edata)-1;*/
1610 parse_cmdline_early(cmdline_p);
1612 max_low_pfn = setup_memory();
1614 /*
1615 * NOTE: before this point _nobody_ is allowed to allocate
1616 * any memory using the bootmem allocator. Although the
1617 * alloctor is now initialised only the first 8Mb of the kernel
1618 * virtual address space has been mapped. All allocations before
1619 * paging_init() has completed must use the alloc_bootmem_low_pages()
1620 * variant (which allocates DMA'able memory) and care must be taken
1621 * not to exceed the 8Mb limit.
1622 */
1624 #ifdef CONFIG_SMP
1625 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1626 #endif
1627 paging_init();
1628 remapped_pgdat_init();
1629 zone_sizes_init();
1631 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1632 /*
1633 * Find and reserve possible boot-time SMP configuration:
1634 */
1635 find_smp_config();
1636 #endif
1638 /* Make sure we have a correctly sized P->M table. */
1639 if (max_pfn != xen_start_info->nr_pages) {
1640 phys_to_machine_mapping = alloc_bootmem_low_pages(
1641 max_pfn * sizeof(unsigned long));
1643 if (max_pfn > xen_start_info->nr_pages) {
1644 /* set to INVALID_P2M_ENTRY */
1645 memset(phys_to_machine_mapping, ~0,
1646 max_pfn * sizeof(unsigned long));
1647 memcpy(phys_to_machine_mapping,
1648 (unsigned long *)xen_start_info->mfn_list,
1649 xen_start_info->nr_pages * sizeof(unsigned long));
1650 } else {
1651 struct xen_memory_reservation reservation = {
1652 .extent_start = (unsigned long *)xen_start_info->mfn_list + max_pfn,
1653 .nr_extents = xen_start_info->nr_pages - max_pfn,
1654 .extent_order = 0,
1655 .domid = DOMID_SELF
1656 };
1658 memcpy(phys_to_machine_mapping,
1659 (unsigned long *)xen_start_info->mfn_list,
1660 max_pfn * sizeof(unsigned long));
1661 BUG_ON(HYPERVISOR_memory_op(
1662 XENMEM_decrease_reservation,
1663 &reservation) !=
1664 (xen_start_info->nr_pages - max_pfn));
1666 free_bootmem(
1667 __pa(xen_start_info->mfn_list),
1668 PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1669 sizeof(unsigned long))));
1672 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
1673 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
1675 pfn_to_mfn_frame_list[j] =
1676 virt_to_mfn(&phys_to_machine_mapping[i]);
1678 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
1679 virt_to_mfn(pfn_to_mfn_frame_list);
1681 /*
1682 * NOTE: at this point the bootmem allocator is fully available.
1683 */
1685 #ifdef CONFIG_EARLY_PRINTK
1687 char *s = strstr(*cmdline_p, "earlyprintk=");
1688 if (s) {
1689 extern void setup_early_printk(char *);
1691 setup_early_printk(s);
1692 printk("early console enabled\n");
1695 #endif
1697 if (xen_start_info->flags & SIF_INITDOMAIN)
1698 dmi_scan_machine();
1700 #ifdef CONFIG_X86_GENERICARCH
1701 generic_apic_probe(*cmdline_p);
1702 #endif
1703 if (efi_enabled)
1704 efi_map_memmap();
1706 op.cmd = PHYSDEVOP_SET_IOPL;
1707 op.u.set_iopl.iopl = 1;
1708 HYPERVISOR_physdev_op(&op);
1710 #ifdef CONFIG_ACPI_BOOT
1711 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
1712 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1713 acpi_disabled = 1;
1714 acpi_ht = 0;
1716 #endif
1718 #ifdef CONFIG_ACPI_BOOT
1719 /*
1720 * Parse the ACPI tables for possible boot-time SMP configuration.
1721 */
1722 acpi_boot_table_init();
1723 acpi_boot_init();
1724 #endif
1726 #ifdef CONFIG_X86_LOCAL_APIC
1727 if (smp_found_config)
1728 get_smp_config();
1729 #endif
1731 /* XXX Disable irqdebug until we have a way to avoid interrupt
1732 * conflicts. */
1733 noirqdebug_setup("");
1735 register_memory();
1737 if (xen_start_info->flags & SIF_INITDOMAIN) {
1738 if (!(xen_start_info->flags & SIF_PRIVILEGED))
1739 panic("Xen granted us console access "
1740 "but not privileged status");
1742 #ifdef CONFIG_VT
1743 #if defined(CONFIG_VGA_CONSOLE)
1744 if (!efi_enabled ||
1745 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1746 conswitchp = &vga_con;
1747 #elif defined(CONFIG_DUMMY_CONSOLE)
1748 conswitchp = &dummy_con;
1749 #endif
1750 #endif
1751 } else {
1752 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1753 extern const struct consw xennull_con;
1754 extern int console_use_vt;
1755 #if defined(CONFIG_VGA_CONSOLE)
1756 /* disable VGA driver */
1757 ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
1758 #endif
1759 conswitchp = &xennull_con;
1760 console_use_vt = 0;
1761 #endif
1765 static int
1766 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1768 HYPERVISOR_crash();
1769 /* we're never actually going to get here... */
1770 return NOTIFY_DONE;
1773 #include "setup_arch_post.h"
1774 /*
1775 * Local Variables:
1776 * mode:c
1777 * c-file-style:"k&r"
1778 * c-basic-offset:8
1779 * End:
1780 */