debuggers.hg

view xen/arch/x86/setup.c @ 19966:3f12d48f2880

x86: merge final linking scripts

While unrelated to the previous four patches, I realized that the two
scripts are nearly identical when coding those earlier patches, and
this patch depends on them in order to apply cleanly.

As an extra measure, it also adjusts the (unused) space freed at the
end of the per-CPU area to include all alignment space needed before
the first actual constituent of the .bss section (up to 7 pages on
x86-64).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 11:51:07 2009 +0100 (2009-07-13)
parents 3952eaeb70b0
children 722c7e94e764
line source
1 #include <xen/config.h>
2 #include <xen/init.h>
3 #include <xen/lib.h>
4 #include <xen/sched.h>
5 #include <xen/domain.h>
6 #include <xen/serial.h>
7 #include <xen/softirq.h>
8 #include <xen/acpi.h>
9 #include <xen/console.h>
10 #include <xen/serial.h>
11 #include <xen/trace.h>
12 #include <xen/multiboot.h>
13 #include <xen/domain_page.h>
14 #include <xen/version.h>
15 #include <xen/gdbstub.h>
16 #include <xen/percpu.h>
17 #include <xen/hypercall.h>
18 #include <xen/keyhandler.h>
19 #include <xen/numa.h>
20 #include <xen/rcupdate.h>
21 #include <xen/vga.h>
22 #include <xen/dmi.h>
23 #include <public/version.h>
24 #ifdef CONFIG_COMPAT
25 #include <compat/platform.h>
26 #include <compat/xen.h>
27 #endif
28 #include <asm/bitops.h>
29 #include <asm/smp.h>
30 #include <asm/processor.h>
31 #include <asm/mpspec.h>
32 #include <asm/apic.h>
33 #include <asm/desc.h>
34 #include <asm/paging.h>
35 #include <asm/e820.h>
36 #include <xsm/acm/acm_hooks.h>
37 #include <xen/kexec.h>
38 #include <asm/edd.h>
39 #include <xsm/xsm.h>
40 #include <asm/tboot.h>
42 int __init bzimage_headroom(char *image_start, unsigned long image_length);
44 #if defined(CONFIG_X86_64)
45 #define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
46 #define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
47 #else
48 #define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
49 #define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
50 #endif
52 extern void generic_apic_probe(void);
53 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
55 extern u16 boot_edid_caps;
56 extern u8 boot_edid_info[128];
57 extern struct boot_video_info boot_vid_info;
59 /* opt_nosmp: If true, secondary processors are ignored. */
60 static int opt_nosmp = 0;
61 boolean_param("nosmp", opt_nosmp);
63 /* maxcpus: maximum number of CPUs to activate. */
64 static unsigned int max_cpus = NR_CPUS;
65 integer_param("maxcpus", max_cpus);
67 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
68 static int opt_watchdog = 0;
69 boolean_param("watchdog", opt_watchdog);
71 /* **** Linux config option: propagated to domain0. */
72 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
73 /* "acpi=force": Override the disable blacklist. */
74 /* "acpi=strict": Disables out-of-spec workarounds. */
75 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
76 /* "acpi=noirq": Disables ACPI interrupt routing. */
77 static void parse_acpi_param(char *s);
78 custom_param("acpi", parse_acpi_param);
80 /* **** Linux config option: propagated to domain0. */
81 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
82 extern int acpi_skip_timer_override;
83 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
85 /* **** Linux config option: propagated to domain0. */
86 /* noapic: Disable IOAPIC setup. */
87 extern int skip_ioapic_setup;
88 boolean_param("noapic", skip_ioapic_setup);
90 /* **** Linux config option: propagated to domain0. */
91 /* xen_cpuidle: xen control cstate. */
92 /*static*/ int xen_cpuidle = -1;
93 boolean_param("cpuidle", xen_cpuidle);
95 int early_boot = 1;
97 cpumask_t cpu_present_map;
99 unsigned long xen_phys_start;
101 #ifdef CONFIG_X86_32
102 /* Limits of Xen heap, used to initialise the allocator. */
103 unsigned long xenheap_initial_phys_start, xenheap_phys_end;
104 #endif
106 extern void arch_init_memory(void);
107 extern void init_IRQ(void);
108 extern void early_time_init(void);
109 extern void early_cpu_init(void);
110 extern void vesa_init(void);
111 extern void vesa_mtrr_init(void);
112 extern void init_tmem(void);
114 DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
115 #ifdef CONFIG_COMPAT
116 DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table)
117 = boot_cpu_compat_gdt_table;
118 #endif
120 DEFINE_PER_CPU(struct tss_struct, init_tss);
122 char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
124 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
126 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
127 EXPORT_SYMBOL(mmu_cr4_features);
129 int acpi_disabled;
131 int acpi_force;
132 char acpi_param[10] = "";
133 static void __init parse_acpi_param(char *s)
134 {
135 /* Save the parameter so it can be propagated to domain0. */
136 safe_strcpy(acpi_param, s);
138 /* Interpret the parameter for use within Xen. */
139 if ( !strcmp(s, "off") )
140 {
141 disable_acpi();
142 }
143 else if ( !strcmp(s, "force") )
144 {
145 acpi_force = 1;
146 acpi_ht = 1;
147 acpi_disabled = 0;
148 }
149 else if ( !strcmp(s, "strict") )
150 {
151 acpi_strict = 1;
152 }
153 else if ( !strcmp(s, "ht") )
154 {
155 if ( !acpi_force )
156 disable_acpi();
157 acpi_ht = 1;
158 }
159 else if ( !strcmp(s, "noirq") )
160 {
161 acpi_noirq_set();
162 }
163 }
165 static void __init do_initcalls(void)
166 {
167 initcall_t *call;
168 for ( call = &__initcall_start; call < &__initcall_end; call++ )
169 (*call)();
170 }
172 #define EARLY_FAIL(f, a...) do { \
173 printk( f , ## a ); \
174 for ( ; ; ) halt(); \
175 } while (0)
177 static unsigned long __initdata initial_images_base;
178 static unsigned long __initdata initial_images_start;
179 static unsigned long __initdata initial_images_end;
181 unsigned long __init initial_images_nrpages(void)
182 {
183 ASSERT(!(initial_images_base & ~PAGE_MASK));
184 ASSERT(!(initial_images_end & ~PAGE_MASK));
185 return ((initial_images_end >> PAGE_SHIFT) -
186 (initial_images_base >> PAGE_SHIFT));
187 }
189 void __init discard_initial_images(void)
190 {
191 init_domheap_pages(initial_images_base, initial_images_end);
192 }
194 extern char __init_begin[], __bss_start[];
195 extern char __per_cpu_start[], __per_cpu_data_end[];
197 static void __init percpu_init_areas(void)
198 {
199 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
200 unsigned int first_unused;
202 BUG_ON(data_size > PERCPU_SIZE);
204 /* Initialise per-cpu data area for all possible secondary CPUs. */
205 for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
206 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
207 __per_cpu_start,
208 data_size);
209 first_unused = i;
211 /* Check that there are no holes in cpu_possible_map. */
212 for ( ; i < NR_CPUS; i++ )
213 BUG_ON(cpu_possible(i));
215 #ifndef MEMORY_GUARD
216 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
217 __pa(__bss_start));
218 #endif
219 memguard_guard_range(&__per_cpu_start[first_unused << PERCPU_SHIFT],
220 __bss_start - &__per_cpu_start[first_unused <<
221 PERCPU_SHIFT]);
222 #if defined(CONFIG_X86_64)
223 /* Also zap the mapping in the 1:1 area. */
224 memguard_guard_range(__va(__pa(__per_cpu_start)) +
225 (first_unused << PERCPU_SHIFT),
226 (NR_CPUS - first_unused) << PERCPU_SHIFT);
227 #endif
228 }
230 static void __init init_idle_domain(void)
231 {
232 struct domain *idle_domain;
234 /* Domain creation requires that scheduler structures are initialised. */
235 scheduler_init();
237 idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
238 if ( idle_domain == NULL )
239 BUG();
240 idle_domain->vcpu = idle_vcpu;
241 idle_domain->max_vcpus = NR_CPUS;
242 if ( alloc_vcpu(idle_domain, 0, 0) == NULL )
243 BUG();
245 set_current(idle_vcpu[0]);
246 this_cpu(curr_vcpu) = current;
248 setup_idle_pagetable();
249 }
251 static void __init srat_detect_node(int cpu)
252 {
253 unsigned node;
254 u32 apicid = x86_cpu_to_apicid[cpu];
256 node = apicid_to_node[apicid];
257 if ( node == NUMA_NO_NODE )
258 node = 0;
259 numa_set_node(cpu, node);
261 if ( acpi_numa > 0 )
262 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
263 }
265 /*
266 * Ensure a given physical memory range is present in the bootstrap mappings.
267 * Use superpage mappings to ensure that pagetable memory needn't be allocated.
268 */
269 static void __init bootstrap_map(unsigned long start, unsigned long end)
270 {
271 unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
272 start = max_t(unsigned long, start & ~mask, 16UL << 20);
273 end = (end + mask) & ~mask;
274 if ( start >= end )
275 return;
276 if ( end > BOOTSTRAP_DIRECTMAP_END )
277 panic("Cannot access memory beyond end of "
278 "bootstrap direct-map area\n");
279 map_pages_to_xen(
280 (unsigned long)maddr_to_bootstrap_virt(start),
281 start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
282 }
284 static void __init move_memory(
285 unsigned long dst, unsigned long src_start, unsigned long src_end)
286 {
287 bootstrap_map(src_start, src_end);
288 bootstrap_map(dst, dst + src_end - src_start);
289 memmove(maddr_to_bootstrap_virt(dst),
290 maddr_to_bootstrap_virt(src_start),
291 src_end - src_start);
292 }
294 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
295 static struct e820map __initdata boot_e820;
297 struct boot_video_info {
298 u8 orig_x; /* 0x00 */
299 u8 orig_y; /* 0x01 */
300 u8 orig_video_mode; /* 0x02 */
301 u8 orig_video_cols; /* 0x03 */
302 u8 orig_video_lines; /* 0x04 */
303 u8 orig_video_isVGA; /* 0x05 */
304 u16 orig_video_points; /* 0x06 */
306 /* VESA graphic mode -- linear frame buffer */
307 u32 capabilities; /* 0x08 */
308 u16 lfb_linelength; /* 0x0c */
309 u16 lfb_width; /* 0x0e */
310 u16 lfb_height; /* 0x10 */
311 u16 lfb_depth; /* 0x12 */
312 u32 lfb_base; /* 0x14 */
313 u32 lfb_size; /* 0x18 */
314 u8 red_size; /* 0x1c */
315 u8 red_pos; /* 0x1d */
316 u8 green_size; /* 0x1e */
317 u8 green_pos; /* 0x1f */
318 u8 blue_size; /* 0x20 */
319 u8 blue_pos; /* 0x21 */
320 u8 rsvd_size; /* 0x22 */
321 u8 rsvd_pos; /* 0x23 */
322 u16 vesapm_seg; /* 0x24 */
323 u16 vesapm_off; /* 0x26 */
324 u16 vesa_attrib; /* 0x28 */
325 };
327 static void __init parse_video_info(void)
328 {
329 struct boot_video_info *bvi = &bootsym(boot_vid_info);
331 if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) )
332 {
333 vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
334 vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points;
335 vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x;
336 vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y;
337 vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines;
338 vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols;
339 }
340 else if ( bvi->orig_video_isVGA == 0x23 )
341 {
342 vga_console_info.video_type = XEN_VGATYPE_VESA_LFB;
343 vga_console_info.u.vesa_lfb.width = bvi->lfb_width;
344 vga_console_info.u.vesa_lfb.height = bvi->lfb_height;
345 vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength;
346 vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth;
347 vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base;
348 vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size;
349 vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos;
350 vga_console_info.u.vesa_lfb.red_size = bvi->red_size;
351 vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos;
352 vga_console_info.u.vesa_lfb.green_size = bvi->green_size;
353 vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos;
354 vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
355 vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
356 vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
357 vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities;
358 vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib;
359 }
360 }
362 void __init kexec_reserve_area(struct e820map *e820)
363 {
364 unsigned long kdump_start = kexec_crash_area.start;
365 unsigned long kdump_size = kexec_crash_area.size;
366 static int is_reserved = 0;
368 kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
370 if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved )
371 return;
373 is_reserved = 1;
375 if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) )
376 {
377 printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)"
378 "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
379 kexec_crash_area.start = kexec_crash_area.size = 0;
380 }
381 else
382 {
383 printk("Kdump: %luMB (%lukB) at 0x%lx\n",
384 kdump_size >> 20, kdump_size >> 10, kdump_start);
385 }
386 }
388 void init_done(void)
389 {
390 extern char __init_begin[], __init_end[];
392 /* Free (or page-protect) the init areas. */
393 memset(__init_begin, 0xcc, __init_end - __init_begin); /* int3 poison */
394 #ifndef MEMORY_GUARD
395 init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
396 #endif
397 memguard_guard_range(__init_begin, __init_end - __init_begin);
398 #if defined(CONFIG_X86_64)
399 /* Also zap the mapping in the 1:1 area. */
400 memguard_guard_range(__va(__pa(__init_begin)), __init_end - __init_begin);
401 #endif
402 printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
404 startup_cpu_idle_loop();
405 }
407 static char * __init cmdline_cook(char *p)
408 {
409 p = p ? : "";
410 while ( *p == ' ' )
411 p++;
412 while ( (*p != ' ') && (*p != '\0') )
413 p++;
414 while ( *p == ' ' )
415 p++;
416 return p;
417 }
419 void __init __start_xen(unsigned long mbi_p)
420 {
421 char *memmap_type = NULL;
422 char *cmdline, *kextra;
423 unsigned long _initrd_start = 0, _initrd_len = 0;
424 unsigned int initrdidx = 1;
425 multiboot_info_t *mbi = __va(mbi_p);
426 module_t *mod = (module_t *)__va(mbi->mods_addr);
427 unsigned long nr_pages, modules_length, modules_headroom;
428 int i, j, e820_warn = 0, bytes = 0;
429 struct ns16550_defaults ns16550 = {
430 .data_bits = 8,
431 .parity = 'n',
432 .stop_bits = 1
433 };
435 extern void early_page_fault(void);
436 set_intr_gate(TRAP_page_fault, &early_page_fault);
438 /* Parse the command-line options. */
439 cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ?
440 __va(mbi->cmdline) : NULL);
441 if ( (kextra = strstr(cmdline, " -- ")) != NULL )
442 {
443 /*
444 * Options after ' -- ' separator belong to dom0.
445 * 1. Orphan dom0's options from Xen's command line.
446 * 2. Skip all but final leading space from dom0's options.
447 */
448 *kextra = '\0';
449 kextra += 3;
450 while ( kextra[1] == ' ' ) kextra++;
451 }
452 cmdline_parse(cmdline);
454 parse_video_info();
456 set_current((struct vcpu *)0xfffff000); /* debug sanity */
457 idle_vcpu[0] = current;
458 set_processor_id(0); /* needed early, for smp_processor_id() */
459 if ( cpu_has_efer )
460 rdmsrl(MSR_EFER, this_cpu(efer));
461 asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) );
463 smp_prepare_boot_cpu();
465 /* We initialise the serial devices very early so we can get debugging. */
466 ns16550.io_base = 0x3f8;
467 ns16550.irq = 4;
468 ns16550_init(0, &ns16550);
469 ns16550.io_base = 0x2f8;
470 ns16550.irq = 3;
471 ns16550_init(1, &ns16550);
472 console_init_preirq();
474 printk("Command line: %s\n", cmdline);
476 printk("Video information:\n");
478 /* Print VGA display mode information. */
479 switch ( vga_console_info.video_type )
480 {
481 case XEN_VGATYPE_TEXT_MODE_3:
482 printk(" VGA is text mode %dx%d, font 8x%d\n",
483 vga_console_info.u.text_mode_3.columns,
484 vga_console_info.u.text_mode_3.rows,
485 vga_console_info.u.text_mode_3.font_height);
486 break;
487 case XEN_VGATYPE_VESA_LFB:
488 printk(" VGA is graphics mode %dx%d, %d bpp\n",
489 vga_console_info.u.vesa_lfb.width,
490 vga_console_info.u.vesa_lfb.height,
491 vga_console_info.u.vesa_lfb.bits_per_pixel);
492 break;
493 default:
494 printk(" No VGA detected\n");
495 break;
496 }
498 /* Print VBE/DDC EDID information. */
499 if ( bootsym(boot_edid_caps) != 0x1313 )
500 {
501 u16 caps = bootsym(boot_edid_caps);
502 printk(" VBE/DDC methods:%s%s%s; ",
503 (caps & 1) ? " V1" : "",
504 (caps & 2) ? " V2" : "",
505 !(caps & 3) ? " none" : "");
506 printk("EDID transfer time: %d seconds\n", caps >> 8);
507 if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
508 {
509 printk(" EDID info not retrieved because ");
510 if ( !(caps & 3) )
511 printk("no DDC retrieval method detected\n");
512 else if ( (caps >> 8) > 5 )
513 printk("takes longer than 5 seconds\n");
514 else
515 printk("of reasons unknown\n");
516 }
517 }
519 printk("Disc information:\n");
520 printk(" Found %d MBR signatures\n",
521 bootsym(boot_mbr_signature_nr));
522 printk(" Found %d EDD information structures\n",
523 bootsym(boot_edd_info_nr));
525 /* Check that we have at least one Multiboot module. */
526 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
527 EARLY_FAIL("dom0 kernel not specified. "
528 "Check bootloader configuration.\n");
530 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
531 EARLY_FAIL("Misaligned CPU0 stack.\n");
533 if ( e820_raw_nr != 0 )
534 {
535 memmap_type = "Xen-e820";
536 }
537 else if ( bootsym(lowmem_kb) )
538 {
539 memmap_type = "Xen-e801";
540 e820_raw[0].addr = 0;
541 e820_raw[0].size = bootsym(lowmem_kb) << 10;
542 e820_raw[0].type = E820_RAM;
543 e820_raw[1].addr = 0x100000;
544 e820_raw[1].size = bootsym(highmem_kb) << 10;
545 e820_raw[1].type = E820_RAM;
546 e820_raw_nr = 2;
547 }
548 else if ( mbi->flags & MBI_MEMMAP )
549 {
550 memmap_type = "Multiboot-e820";
551 while ( (bytes < mbi->mmap_length) && (e820_raw_nr < E820MAX) )
552 {
553 memory_map_t *map = __va(mbi->mmap_addr + bytes);
555 /*
556 * This is a gross workaround for a BIOS bug. Some bootloaders do
557 * not write e820 map entries into pre-zeroed memory. This is
558 * okay if the BIOS fills in all fields of the map entry, but
559 * some broken BIOSes do not bother to write the high word of
560 * the length field if the length is smaller than 4GB. We
561 * detect and fix this by flagging sections below 4GB that
562 * appear to be larger than 4GB in size.
563 */
564 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
565 {
566 if ( !e820_warn )
567 {
568 printk("WARNING: Buggy e820 map detected and fixed "
569 "(truncated length fields).\n");
570 e820_warn = 1;
571 }
572 map->length_high = 0;
573 }
575 e820_raw[e820_raw_nr].addr =
576 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
577 e820_raw[e820_raw_nr].size =
578 ((u64)map->length_high << 32) | (u64)map->length_low;
579 e820_raw[e820_raw_nr].type = map->type;
580 e820_raw_nr++;
582 bytes += map->size + 4;
583 }
584 }
585 else if ( mbi->flags & MBI_MEMLIMITS )
586 {
587 memmap_type = "Multiboot-e801";
588 e820_raw[0].addr = 0;
589 e820_raw[0].size = mbi->mem_lower << 10;
590 e820_raw[0].type = E820_RAM;
591 e820_raw[1].addr = 0x100000;
592 e820_raw[1].size = mbi->mem_upper << 10;
593 e820_raw[1].type = E820_RAM;
594 e820_raw_nr = 2;
595 }
596 else
597 {
598 EARLY_FAIL("Bootloader provided no memory information.\n");
599 }
601 /* Sanitise the raw E820 map to produce a final clean version. */
602 max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
604 /* Create a temporary copy of the E820 map. */
605 memcpy(&boot_e820, &e820, sizeof(e820));
607 /* Early kexec reservation (explicit static start address). */
608 kexec_reserve_area(&boot_e820);
610 /*
611 * Iterate backwards over all superpage-aligned RAM regions.
612 *
613 * We require superpage alignment because the boot allocator is not yet
614 * initialised. Hence we can only map superpages in the address range
615 * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
616 * dynamic allocation of pagetables.
617 *
618 * As well as mapping superpages in that range, in preparation for
619 * initialising the boot allocator, we also look for a region to which
620 * we can relocate the dom0 kernel and other multiboot modules. Also, on
621 * x86/64, we relocate Xen to higher memory.
622 */
623 modules_length = 0;
624 for ( i = 0; i < mbi->mods_count; i++ )
625 modules_length += mod[i].mod_end - mod[i].mod_start;
627 /* ensure mod[0] is mapped before parsing */
628 bootstrap_map(mod[0].mod_start, mod[0].mod_end);
629 modules_headroom = bzimage_headroom(
630 (char *)(unsigned long)mod[0].mod_start,
631 (unsigned long)(mod[0].mod_end - mod[0].mod_start));
633 for ( i = boot_e820.nr_map-1; i >= 0; i-- )
634 {
635 uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
637 /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
638 s = (boot_e820.map[i].addr + mask) & ~mask;
639 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
640 s = max_t(uint64_t, s, 16 << 20);
641 e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
642 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
643 continue;
645 /* Map the chunk. No memory will need to be allocated to do this. */
646 map_pages_to_xen(
647 (unsigned long)maddr_to_bootstrap_virt(s),
648 s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
650 #if defined(CONFIG_X86_64)
651 #define reloc_size ((__pa(&_end) + mask) & ~mask)
652 /* Is the region suitable for relocating Xen? */
653 if ( !xen_phys_start && ((e-s) >= reloc_size) )
654 {
655 extern l2_pgentry_t l2_xenmap[];
656 l4_pgentry_t *pl4e;
657 l3_pgentry_t *pl3e;
658 l2_pgentry_t *pl2e;
659 int i, j, k;
661 /* Select relocation address. */
662 e -= reloc_size;
663 xen_phys_start = e;
664 bootsym(trampoline_xen_phys_start) = e;
666 /*
667 * Perform relocation to new physical address.
668 * Before doing so we must sync static/global data with main memory
669 * with a barrier(). After this we must *not* modify static/global
670 * data until after we have switched to the relocated pagetables!
671 */
672 barrier();
673 move_memory(e, 0, __pa(&_end) - xen_phys_start);
675 /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
676 memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
678 /* Walk initial pagetables, relocating page directory entries. */
679 pl4e = __va(__pa(idle_pg_table));
680 for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
681 {
682 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
683 continue;
684 *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
685 xen_phys_start);
686 pl3e = l4e_to_l3e(*pl4e);
687 for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
688 {
689 /* Not present, 1GB mapping, or already relocated? */
690 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
691 (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
692 (l3e_get_pfn(*pl3e) > 0x1000) )
693 continue;
694 *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
695 xen_phys_start);
696 pl2e = l3e_to_l2e(*pl3e);
697 for ( k = 0; k < L2_PAGETABLE_ENTRIES; k++, pl2e++ )
698 {
699 /* Not present, PSE, or already relocated? */
700 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
701 (l2e_get_flags(*pl2e) & _PAGE_PSE) ||
702 (l2e_get_pfn(*pl2e) > 0x1000) )
703 continue;
704 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
705 xen_phys_start);
706 }
707 }
708 }
710 /* The only data mappings to be relocated are in the Xen area. */
711 pl2e = __va(__pa(l2_xenmap));
712 *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT,
713 PAGE_HYPERVISOR | _PAGE_PSE);
714 for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
715 {
716 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
717 continue;
718 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
719 xen_phys_start);
720 }
722 /* Re-sync the stack and then switch to relocated pagetables. */
723 asm volatile (
724 "rep movsb ; " /* re-sync the stack */
725 "movq %%cr4,%%rsi ; "
726 "andb $0x7f,%%sil ; "
727 "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
728 "movq %0,%%cr3 ; " /* CR3 == new pagetables */
729 "orb $0x80,%%sil ; "
730 "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
731 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
732 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
733 }
734 #endif
736 /* Is the region suitable for relocating the multiboot modules? */
737 if ( !initial_images_start && (s < e) &&
738 ((e-s) >= (modules_length+modules_headroom)) )
739 {
740 initial_images_end = e;
741 e = (e - modules_length) & PAGE_MASK;
742 initial_images_start = e;
743 e -= modules_headroom;
744 initial_images_base = e;
745 e += modules_length + modules_headroom;
746 for ( j = mbi->mods_count-1; j >= 0; j-- )
747 {
748 e -= mod[j].mod_end - mod[j].mod_start;
749 move_memory(e, mod[j].mod_start, mod[j].mod_end);
750 mod[j].mod_end += e - mod[j].mod_start;
751 mod[j].mod_start = e;
752 }
753 }
755 if ( !kexec_crash_area.start && (s < e) &&
756 ((e-s) >= kexec_crash_area.size) )
757 {
758 e = (e - kexec_crash_area.size) & PAGE_MASK;
759 kexec_crash_area.start = e;
760 }
761 }
763 if ( !initial_images_start )
764 EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
765 reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
767 #if defined(CONFIG_X86_32)
768 xenheap_initial_phys_start = (PFN_UP(__pa(&_end)) + 1) << PAGE_SHIFT;
769 /* Must pass a single mapped page for populating bootmem_region_list. */
770 init_boot_pages(__pa(&_end), xenheap_initial_phys_start);
771 xenheap_phys_end = DIRECTMAP_MBYTES << 20;
772 #else
773 if ( !xen_phys_start )
774 EARLY_FAIL("Not enough memory to relocate Xen.\n");
775 reserve_e820_ram(&boot_e820, __pa(&_start), __pa(&_end));
776 #endif
778 /* Late kexec reservation (dynamic start address). */
779 kexec_reserve_area(&boot_e820);
781 /*
782 * Walk every RAM region and map it in its entirety (on x86/64, at least)
783 * and notify it to the boot allocator.
784 */
785 for ( i = 0; i < boot_e820.nr_map; i++ )
786 {
787 uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
789 /* Only page alignment required now. */
790 s = (boot_e820.map[i].addr + mask) & ~mask;
791 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
792 #if defined(CONFIG_X86_32)
793 s = max_t(uint64_t, s, xenheap_phys_end);
794 #else
795 s = max_t(uint64_t, s, 1<<20);
796 #endif
797 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
798 continue;
800 /* Need to create mappings above 16MB. */
801 map_s = max_t(uint64_t, s, 16<<20);
802 map_e = e;
803 #if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
804 map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
805 #endif
807 /* Pass mapped memory to allocator /before/ creating new mappings. */
808 init_boot_pages(s, min_t(uint64_t, map_s, e));
810 /* Create new mappings /before/ passing memory to the allocator. */
811 if ( map_s < map_e )
812 map_pages_to_xen(
813 (unsigned long)maddr_to_bootstrap_virt(map_s),
814 map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
815 PAGE_HYPERVISOR);
817 /* Pass remainder of this memory chunk to the allocator. */
818 init_boot_pages(map_s, e);
819 }
821 memguard_init();
823 nr_pages = 0;
824 for ( i = 0; i < e820.nr_map; i++ )
825 if ( e820.map[i].type == E820_RAM )
826 nr_pages += e820.map[i].size >> PAGE_SHIFT;
827 printk("System RAM: %luMB (%lukB)\n",
828 nr_pages >> (20 - PAGE_SHIFT),
829 nr_pages << (PAGE_SHIFT - 10));
830 total_pages = nr_pages;
832 /* Sanity check for unwanted bloat of certain hypercall structures. */
833 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
834 sizeof(((struct xen_platform_op *)0)->u.pad));
835 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
836 sizeof(((struct xen_domctl *)0)->u.pad));
837 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
838 sizeof(((struct xen_sysctl *)0)->u.pad));
840 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
841 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
842 BUILD_BUG_ON(sizeof(struct vcpu_info) != 64);
844 #ifdef CONFIG_COMPAT
845 BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) !=
846 sizeof(((struct compat_platform_op *)0)->u.pad));
847 BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE);
848 BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64);
849 #endif
851 /* Check definitions in public headers match internal defs. */
852 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
853 #ifdef HYPERVISOR_VIRT_END
854 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
855 #endif
856 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
857 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
859 init_frametable();
861 acpi_boot_table_init();
863 acpi_numa_init();
865 numa_initmem_init(0, max_page);
867 #if defined(CONFIG_X86_32)
868 /* Initialise the Xen heap. */
869 init_xenheap_pages(xenheap_initial_phys_start, xenheap_phys_end);
870 nr_pages = (xenheap_phys_end - xenheap_initial_phys_start) >> PAGE_SHIFT;
871 printk("Xen heap: %luMB (%lukB)\n",
872 nr_pages >> (20 - PAGE_SHIFT),
873 nr_pages << (PAGE_SHIFT - 10));
874 #endif
876 end_boot_allocator();
877 early_boot = 0;
879 #if defined(CONFIG_X86_64)
880 vesa_init();
881 #endif
883 softirq_init();
885 early_cpu_init();
887 paging_init();
889 tboot_probe();
891 /* Unmap the first page of CPU0's stack. */
892 memguard_guard_stack(cpu0_stack);
894 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
896 if ( opt_watchdog )
897 nmi_watchdog = NMI_LOCAL_APIC;
899 sort_exception_tables();
901 find_smp_config();
903 dmi_scan_machine();
905 generic_apic_probe();
907 if ( x2apic_is_available() )
908 enable_x2apic();
910 acpi_boot_init();
912 init_cpu_to_node();
914 if ( smp_found_config )
915 get_smp_config();
917 #ifdef CONFIG_X86_64
918 /* Low mappings were only needed for some BIOS table parsing. */
919 zap_low_mappings();
920 #endif
922 init_apic_mappings();
924 init_IRQ();
926 percpu_init_areas();
928 xsm_init(&initrdidx, mbi, initial_images_start);
930 init_idle_domain();
932 trap_init();
934 rcu_init();
936 timer_init();
938 early_time_init();
940 arch_init_memory();
942 identify_cpu(&boot_cpu_data);
943 if ( cpu_has_fxsr )
944 set_in_cr4(X86_CR4_OSFXSR);
945 if ( cpu_has_xmm )
946 set_in_cr4(X86_CR4_OSXMMEXCPT);
948 local_irq_enable();
950 #ifdef CONFIG_X86_64
951 vesa_mtrr_init();
952 #endif
954 if ( opt_nosmp )
955 max_cpus = 0;
957 smp_prepare_cpus(max_cpus);
959 spin_debug_enable();
961 /*
962 * Initialise higher-level timer functions. We do this fairly late
963 * (post-SMP) because the time bases and scale factors need to be updated
964 * regularly, and SMP initialisation can cause a long delay with
965 * interrupts not yet enabled.
966 */
967 init_xen_time();
969 initialize_keytable();
971 console_init_postirq();
973 for_each_present_cpu ( i )
974 {
975 if ( num_online_cpus() >= max_cpus )
976 break;
977 if ( !cpu_online(i) )
978 {
979 rcu_online_cpu(i);
980 __cpu_up(i);
981 }
983 /* Set up cpu_to_node[]. */
984 srat_detect_node(i);
985 /* Set up node_to_cpumask based on cpu_to_node[]. */
986 numa_add_cpu(i);
987 }
989 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
990 smp_cpus_done(max_cpus);
992 initialise_gdb(); /* could be moved earlier */
994 do_initcalls();
996 if ( opt_watchdog )
997 watchdog_enable();
999 if ( !tboot_protect_mem_regions() )
1000 panic("Could not protect TXT memory regions\n");
1002 /* Create initial domain 0. */
1003 dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
1004 if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
1005 panic("Error creating domain 0\n");
1007 dom0->is_privileged = 1;
1008 dom0->target = NULL;
1010 /* Grab the DOM0 command line. */
1011 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
1012 if ( (cmdline != NULL) || (kextra != NULL) )
1014 static char dom0_cmdline[MAX_GUEST_CMDLINE];
1016 cmdline = cmdline_cook(cmdline);
1017 safe_strcpy(dom0_cmdline, cmdline);
1019 if ( kextra != NULL )
1020 /* kextra always includes exactly one leading space. */
1021 safe_strcat(dom0_cmdline, kextra);
1023 /* Append any extra parameters. */
1024 if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
1025 safe_strcat(dom0_cmdline, " noapic");
1026 if ( acpi_skip_timer_override &&
1027 !strstr(dom0_cmdline, "acpi_skip_timer_override") )
1028 safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
1029 if ( (strlen(acpi_param) == 0) && acpi_disabled )
1031 printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
1032 safe_strcpy(acpi_param, "off");
1034 if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
1036 safe_strcat(dom0_cmdline, " acpi=");
1037 safe_strcat(dom0_cmdline, acpi_param);
1040 cmdline = dom0_cmdline;
1043 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
1045 _initrd_start = mod[initrdidx].mod_start;
1046 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
1049 if ( xen_cpuidle )
1050 xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
1052 /*
1053 * We're going to setup domain0 using the module(s) that we stashed safely
1054 * above our heap. The second module, if present, is an initrd ramdisk.
1055 */
1056 if ( construct_dom0(dom0,
1057 initial_images_base,
1058 initial_images_start,
1059 mod[0].mod_end-mod[0].mod_start,
1060 _initrd_start,
1061 _initrd_len,
1062 cmdline) != 0)
1063 panic("Could not set up DOM0 guest OS\n");
1065 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
1066 scrub_heap_pages();
1068 init_trace_bufs();
1070 init_tmem();
1072 console_endboot();
1074 /* Hide UART from DOM0 if we're using it */
1075 serial_endboot();
1077 domain_unpause_by_systemcontroller(dom0);
1079 reset_stack_and_jump(init_done);
1082 void arch_get_xen_caps(xen_capabilities_info_t *info)
1084 /* Interface name is always xen-3.0-* for Xen-3.x. */
1085 int major = 3, minor = 0;
1086 char s[32];
1088 (*info)[0] = '\0';
1090 #if defined(CONFIG_X86_32)
1092 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
1093 safe_strcat(*info, s);
1094 if ( hvm_enabled )
1096 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
1097 safe_strcat(*info, s);
1098 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
1099 safe_strcat(*info, s);
1102 #elif defined(CONFIG_X86_64)
1104 snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
1105 safe_strcat(*info, s);
1106 #ifdef CONFIG_COMPAT
1107 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
1108 safe_strcat(*info, s);
1109 #endif
1110 if ( hvm_enabled )
1112 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
1113 safe_strcat(*info, s);
1114 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
1115 safe_strcat(*info, s);
1116 snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor);
1117 safe_strcat(*info, s);
1120 #endif
1123 int xen_in_range(paddr_t start, paddr_t end)
1125 int i;
1126 static struct {
1127 paddr_t s, e;
1128 } xen_regions[4];
1130 /* initialize first time */
1131 if ( !xen_regions[0].s )
1133 /* S3 resume code (and other real mode trampoline code) */
1134 xen_regions[0].s = bootsym_phys(trampoline_start);
1135 xen_regions[0].e = bootsym_phys(trampoline_end);
1136 /* hypervisor code + data */
1137 xen_regions[1].s =__pa(&_stext);
1138 xen_regions[1].e = __pa(&__init_begin);
1139 /* per-cpu data */
1140 xen_regions[2].s = __pa(&__per_cpu_start);
1141 xen_regions[2].e = xen_regions[2].s +
1142 (((paddr_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT);
1143 /* bss */
1144 xen_regions[3].s = __pa(&__bss_start);
1145 xen_regions[3].e = __pa(&_end);
1148 for ( i = 0; i < ARRAY_SIZE(xen_regions); i++ )
1150 if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
1151 return 1;
1154 return 0;
1157 /*
1158 * Local variables:
1159 * mode: C
1160 * c-set-style: "BSD"
1161 * c-basic-offset: 4
1162 * tab-width: 4
1163 * indent-tabs-mode: nil
1164 * End:
1165 */