debuggers.hg

view xen/arch/x86/setup.c @ 3674:fb875591fd72

bitkeeper revision 1.1159.223.63 (42028527-fv-d9BM0_LRp8UKGP19gQ)

Fix NMI deferral.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 20:10:15 2005 +0000 (2005-02-03)
parents 7f2bf9fecd7e
children d1e0d9a8fde0 f620c41a1fef 0dc3b8b8c298
line source
2 #include <xen/config.h>
3 #include <xen/init.h>
4 #include <xen/lib.h>
5 #include <xen/sched.h>
6 #include <xen/pci.h>
7 #include <xen/serial.h>
8 #include <xen/softirq.h>
9 #include <xen/acpi.h>
10 #include <xen/console.h>
11 #include <xen/trace.h>
12 #include <xen/multiboot.h>
13 #include <asm/bitops.h>
14 #include <asm/smp.h>
15 #include <asm/processor.h>
16 #include <asm/mpspec.h>
17 #include <asm/apic.h>
18 #include <asm/desc.h>
19 #include <asm/domain_page.h>
20 #include <asm/pdb.h>
21 #include <asm/shadow.h>
22 #include <asm/e820.h>
24 /* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
25 static unsigned int opt_dom0_mem = 16000;
26 integer_param("dom0_mem", opt_dom0_mem);
28 /*
29 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
30 * pfn_info table and allocation bitmap.
31 */
32 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
33 #if defined(__x86_64__)
34 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
35 #endif
37 /* opt_noht: If true, Hyperthreading is ignored. */
38 int opt_noht = 0;
39 boolean_param("noht", opt_noht);
41 /* opt_noacpi: If true, ACPI tables are not parsed. */
42 static int opt_noacpi = 0;
43 boolean_param("noacpi", opt_noacpi);
45 /* opt_nosmp: If true, secondary processors are ignored. */
46 static int opt_nosmp = 0;
47 boolean_param("nosmp", opt_nosmp);
49 /* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */
50 /* NB. This flag implies 'nosmp' and 'noacpi'. */
51 static int opt_ignorebiostables = 0;
52 boolean_param("ignorebiostables", opt_ignorebiostables);
54 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
55 static int opt_watchdog = 0;
56 boolean_param("watchdog", opt_watchdog);
58 unsigned long xenheap_phys_end;
60 extern void arch_init_memory(void);
61 extern void init_IRQ(void);
62 extern void trap_init(void);
63 extern void time_init(void);
64 extern void ac_timer_init(void);
65 extern void initialize_keytable();
66 extern int do_timer_lists_from_pit;
68 char ignore_irq13; /* set if exception 16 works */
69 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
71 #if defined(__x86_64__)
72 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
73 #else
74 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
75 #endif
76 EXPORT_SYMBOL(mmu_cr4_features);
78 unsigned long wait_init_idle;
80 struct domain *idle_task[NR_CPUS] = { &idle0_task };
82 #ifdef CONFIG_ACPI_INTERPRETER
83 int acpi_disabled = 0;
84 #else
85 int acpi_disabled = 1;
86 #endif
87 EXPORT_SYMBOL(acpi_disabled);
89 int phys_proc_id[NR_CPUS];
90 int logical_proc_id[NR_CPUS];
92 #if defined(__i386__)
94 /* Standard macro to see if a specific flag is changeable */
95 static inline int flag_is_changeable_p(u32 flag)
96 {
97 u32 f1, f2;
99 asm("pushfl\n\t"
100 "pushfl\n\t"
101 "popl %0\n\t"
102 "movl %0,%1\n\t"
103 "xorl %2,%0\n\t"
104 "pushl %0\n\t"
105 "popfl\n\t"
106 "pushfl\n\t"
107 "popl %0\n\t"
108 "popfl\n\t"
109 : "=&r" (f1), "=&r" (f2)
110 : "ir" (flag));
112 return ((f1^f2) & flag) != 0;
113 }
115 /* Probe for the CPUID instruction */
116 static int __init have_cpuid_p(void)
117 {
118 return flag_is_changeable_p(X86_EFLAGS_ID);
119 }
121 #elif defined(__x86_64__)
123 #define have_cpuid_p() (1)
125 #endif
127 void __init get_cpu_vendor(struct cpuinfo_x86 *c)
128 {
129 char *v = c->x86_vendor_id;
131 if (!strcmp(v, "GenuineIntel"))
132 c->x86_vendor = X86_VENDOR_INTEL;
133 else if (!strcmp(v, "AuthenticAMD"))
134 c->x86_vendor = X86_VENDOR_AMD;
135 else if (!strcmp(v, "CyrixInstead"))
136 c->x86_vendor = X86_VENDOR_CYRIX;
137 else if (!strcmp(v, "UMC UMC UMC "))
138 c->x86_vendor = X86_VENDOR_UMC;
139 else if (!strcmp(v, "CentaurHauls"))
140 c->x86_vendor = X86_VENDOR_CENTAUR;
141 else if (!strcmp(v, "NexGenDriven"))
142 c->x86_vendor = X86_VENDOR_NEXGEN;
143 else if (!strcmp(v, "RiseRiseRise"))
144 c->x86_vendor = X86_VENDOR_RISE;
145 else if (!strcmp(v, "GenuineTMx86") ||
146 !strcmp(v, "TransmetaCPU"))
147 c->x86_vendor = X86_VENDOR_TRANSMETA;
148 else
149 c->x86_vendor = X86_VENDOR_UNKNOWN;
150 }
152 static void __init init_intel(struct cpuinfo_x86 *c)
153 {
154 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
155 if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
156 clear_bit(X86_FEATURE_SEP, &c->x86_capability);
158 #ifdef CONFIG_SMP
159 if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
160 {
161 u32 eax, ebx, ecx, edx;
162 int initial_apic_id, siblings, cpu = smp_processor_id();
164 cpuid(1, &eax, &ebx, &ecx, &edx);
165 ht_per_core = siblings = (ebx & 0xff0000) >> 16;
167 if ( opt_noht )
168 clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);
170 if ( siblings <= 1 )
171 {
172 printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu);
173 }
174 else if ( siblings > 2 )
175 {
176 panic("We don't support more than two logical CPUs per package!");
177 }
178 else
179 {
180 initial_apic_id = ebx >> 24 & 0xff;
181 phys_proc_id[cpu] = initial_apic_id >> 1;
182 logical_proc_id[cpu] = initial_apic_id & 1;
183 printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n",
184 cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
185 }
186 }
187 #endif
188 }
190 static void __init init_amd(struct cpuinfo_x86 *c)
191 {
192 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
193 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
194 clear_bit(0*32+31, &c->x86_capability);
196 switch(c->x86)
197 {
198 case 5:
199 panic("AMD K6 is not supported.\n");
200 case 6: /* An Athlon/Duron. We can trust the BIOS probably */
201 break;
202 }
203 }
205 /*
206 * This does the hard work of actually picking apart the CPU stuff...
207 */
208 void __init identify_cpu(struct cpuinfo_x86 *c)
209 {
210 int junk, i, cpu = smp_processor_id();
211 u32 xlvl, tfms;
213 phys_proc_id[cpu] = cpu;
214 logical_proc_id[cpu] = 0;
216 c->x86_vendor = X86_VENDOR_UNKNOWN;
217 c->cpuid_level = -1; /* CPUID not detected */
218 c->x86_model = c->x86_mask = 0; /* So far unknown... */
219 c->x86_vendor_id[0] = '\0'; /* Unset */
220 memset(&c->x86_capability, 0, sizeof c->x86_capability);
222 if ( !have_cpuid_p() )
223 panic("Ancient processors not supported\n");
225 /* Get vendor name */
226 cpuid(0x00000000, &c->cpuid_level,
227 (int *)&c->x86_vendor_id[0],
228 (int *)&c->x86_vendor_id[8],
229 (int *)&c->x86_vendor_id[4]);
231 get_cpu_vendor(c);
233 if ( c->cpuid_level == 0 )
234 panic("Decrepit CPUID not supported\n");
236 cpuid(0x00000001, &tfms, &junk, &junk,
237 &c->x86_capability[0]);
238 c->x86 = (tfms >> 8) & 15;
239 c->x86_model = (tfms >> 4) & 15;
240 c->x86_mask = tfms & 15;
242 /* AMD-defined flags: level 0x80000001 */
243 xlvl = cpuid_eax(0x80000000);
244 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
245 if ( xlvl >= 0x80000001 )
246 c->x86_capability[1] = cpuid_edx(0x80000001);
247 }
249 /* Transmeta-defined flags: level 0x80860001 */
250 xlvl = cpuid_eax(0x80860000);
251 if ( (xlvl & 0xffff0000) == 0x80860000 ) {
252 if ( xlvl >= 0x80860001 )
253 c->x86_capability[2] = cpuid_edx(0x80860001);
254 }
256 printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
257 smp_processor_id(),
258 c->x86_capability[0],
259 c->x86_capability[1],
260 c->x86_capability[2],
261 c->x86_vendor);
263 switch ( c->x86_vendor ) {
264 case X86_VENDOR_INTEL:
265 init_intel(c);
266 break;
267 case X86_VENDOR_AMD:
268 init_amd(c);
269 break;
270 case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */
271 break;
272 case X86_VENDOR_CENTAUR:
273 break;
274 default:
275 printk("Unknown CPU identifier (%d): continuing anyway, "
276 "but might fail.\n", c->x86_vendor);
277 }
279 printk("CPU caps: %08x %08x %08x %08x\n",
280 c->x86_capability[0],
281 c->x86_capability[1],
282 c->x86_capability[2],
283 c->x86_capability[3]);
285 /*
286 * On SMP, boot_cpu_data holds the common feature set between
287 * all CPUs; so make sure that we indicate which features are
288 * common between the CPUs. The first time this routine gets
289 * executed, c == &boot_cpu_data.
290 */
291 if ( c != &boot_cpu_data ) {
292 /* AND the already accumulated flags with these */
293 for ( i = 0 ; i < NCAPINTS ; i++ )
294 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
295 }
296 }
299 unsigned long cpu_initialized;
300 void __init cpu_init(void)
301 {
302 #if defined(__i386__) /* XXX */
303 int nr = smp_processor_id();
304 struct tss_struct * t = &init_tss[nr];
306 if ( test_and_set_bit(nr, &cpu_initialized) )
307 panic("CPU#%d already initialized!!!\n", nr);
308 printk("Initializing CPU#%d\n", nr);
310 t->bitmap = IOBMP_INVALID_OFFSET;
311 memset(t->io_bitmap, ~0, sizeof(t->io_bitmap));
313 /* Set up GDT and IDT. */
314 SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
315 SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
316 __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt));
317 __asm__ __volatile__("lidt %0": "=m" (idt_descr));
319 /* No nested task. */
320 __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
322 /* Ensure FPU gets initialised for each domain. */
323 stts();
325 /* Set up and load the per-CPU TSS and LDT. */
326 t->ss0 = __HYPERVISOR_DS;
327 t->esp0 = get_stack_top();
328 set_tss_desc(nr,t);
329 load_TR(nr);
330 __asm__ __volatile__("lldt %%ax"::"a" (0));
332 /* Clear all 6 debug registers. */
333 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
334 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
335 #undef CD
337 /* Install correct page table. */
338 write_ptbase(&current->mm);
340 init_idle_task();
341 #endif
342 }
344 static void __init do_initcalls(void)
345 {
346 initcall_t *call;
347 for ( call = &__initcall_start; call < &__initcall_end; call++ )
348 (*call)();
349 }
351 unsigned long pci_mem_start = 0x10000000;
353 static void __init start_of_day(void)
354 {
355 unsigned long low_mem_size;
357 #ifdef MEMORY_GUARD
358 /* Unmap the first page of CPU0's stack. */
359 extern unsigned long cpu0_stack[];
360 memguard_guard_range(cpu0_stack, PAGE_SIZE);
361 #endif
363 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
365 if ( opt_watchdog )
366 nmi_watchdog = NMI_LOCAL_APIC;
368 sort_exception_tables();
370 arch_do_createdomain(current);
372 /* Tell the PCI layer not to allocate too close to the RAM area.. */
373 low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
374 if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
376 identify_cpu(&boot_cpu_data); /* get CPU type info */
377 if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
378 if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT);
379 #ifdef CONFIG_SMP
380 if ( opt_ignorebiostables )
381 {
382 opt_nosmp = 1; /* No SMP without configuration */
383 opt_noacpi = 1; /* ACPI will just confuse matters also */
384 }
385 else
386 {
387 find_smp_config();
388 smp_alloc_memory(); /* trampoline which other CPUs jump at */
389 }
390 #endif
391 paging_init(); /* not much here now, but sets up fixmap */
392 if ( !opt_noacpi )
393 acpi_boot_init();
394 #ifdef CONFIG_SMP
395 if ( smp_found_config )
396 get_smp_config();
397 #endif
398 scheduler_init();
399 init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */
400 trap_init();
401 time_init(); /* installs software handler for HZ clock. */
402 init_apic_mappings(); /* make APICs addressable in our pagetables. */
404 arch_init_memory();
406 #ifndef CONFIG_SMP
407 APIC_init_uniprocessor();
408 #else
409 if ( opt_nosmp )
410 APIC_init_uniprocessor();
411 else
412 smp_boot_cpus();
413 /*
414 * Does loads of stuff, including kicking the local
415 * APIC, and the IO APIC after other CPUs are booted.
416 * Each IRQ is preferably handled by IO-APIC, but
417 * fall thru to 8259A if we have to (but slower).
418 */
419 #endif
421 __sti();
423 initialize_keytable(); /* call back handling for key codes */
425 serial_init_stage2();
427 #ifdef XEN_DEBUGGER
428 initialize_pdb(); /* pervasive debugger */
429 #endif
431 if ( !cpu_has_apic )
432 {
433 do_timer_lists_from_pit = 1;
434 if ( smp_num_cpus != 1 )
435 panic("We need local APICs on SMP machines!");
436 }
438 ac_timer_init(); /* init accurate timers */
439 init_xen_time(); /* initialise the time */
440 schedulers_start(); /* start scheduler for each CPU */
442 check_nmi_watchdog();
444 #ifdef CONFIG_PCI
445 pci_init();
446 #endif
447 do_initcalls();
449 #ifdef CONFIG_SMP
450 wait_init_idle = cpu_online_map;
451 clear_bit(smp_processor_id(), &wait_init_idle);
452 smp_threads_ready = 1;
453 smp_commence(); /* Tell other CPUs that state of the world is stable. */
454 while ( wait_init_idle != 0 )
455 {
456 cpu_relax();
457 barrier();
458 }
459 #endif
461 watchdog_on = 1;
462 }
464 void __init __start_xen(multiboot_info_t *mbi)
465 {
466 unsigned char *cmdline;
467 module_t *mod = (module_t *)__va(mbi->mods_addr);
468 void *heap_start;
469 unsigned long firsthole_start, nr_pages;
470 unsigned long dom0_memory_start, dom0_memory_end;
471 unsigned long initial_images_start, initial_images_end;
472 struct e820entry e820_raw[E820MAX];
473 int i, e820_raw_nr = 0, bytes = 0;
475 /* Parse the command-line options. */
476 if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
477 cmdline_parse(__va(mbi->cmdline));
479 /* Must do this early -- e.g., spinlocks rely on get_current(). */
480 set_current(&idle0_task);
482 /* We initialise the serial devices very early so we can get debugging. */
483 serial_init_stage1();
485 init_console();
487 /* Check that we have at least one Multiboot module. */
488 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
489 {
490 printk("FATAL ERROR: Require at least one Multiboot module.\n");
491 for ( ; ; ) ;
492 }
494 xenheap_phys_end = opt_xenheap_megabytes << 20;
496 if ( mbi->flags & MBI_MEMMAP )
497 {
498 while ( bytes < mbi->mmap_length )
499 {
500 memory_map_t *map = __va(mbi->mmap_addr + bytes);
501 e820_raw[e820_raw_nr].addr =
502 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
503 e820_raw[e820_raw_nr].size =
504 ((u64)map->length_high << 32) | (u64)map->length_low;
505 e820_raw[e820_raw_nr].type =
506 (map->type > E820_NVS) ? E820_RESERVED : map->type;
507 e820_raw_nr++;
508 bytes += map->size + 4;
509 }
510 }
511 else if ( mbi->flags & MBI_MEMLIMITS )
512 {
513 e820_raw[0].addr = 0;
514 e820_raw[0].size = mbi->mem_lower << 10;
515 e820_raw[0].type = E820_RAM;
516 e820_raw[1].addr = 0x100000;
517 e820_raw[1].size = mbi->mem_upper << 10;
518 e820_raw[1].type = E820_RAM;
519 e820_raw_nr = 2;
520 }
521 else
522 {
523 printk("FATAL ERROR: Bootloader provided no memory information.\n");
524 for ( ; ; ) ;
525 }
527 max_page = init_e820(e820_raw, e820_raw_nr);
529 /* Find the first high-memory RAM hole. */
530 for ( i = 0; i < e820.nr_map; i++ )
531 if ( (e820.map[i].type == E820_RAM) &&
532 (e820.map[i].addr >= 0x100000) )
533 break;
534 firsthole_start = e820.map[i].addr + e820.map[i].size;
536 /* Relocate the Multiboot modules. */
537 initial_images_start = xenheap_phys_end;
538 initial_images_end = initial_images_start +
539 (mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
540 if ( initial_images_end > firsthole_start )
541 {
542 printk("Not enough memory to stash the DOM0 kernel image.\n");
543 for ( ; ; ) ;
544 }
545 #if defined(__i386__)
546 memmove((void *)initial_images_start, /* use low mapping */
547 (void *)mod[0].mod_start, /* use low mapping */
548 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
549 #elif defined(__x86_64__)
550 memmove(__va(initial_images_start),
551 __va(mod[0].mod_start),
552 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
553 #endif
555 /* Initialise boot-time allocator with all RAM situated after modules. */
556 heap_start = memguard_init(&_end);
557 heap_start = __va(init_boot_allocator(__pa(heap_start)));
558 nr_pages = 0;
559 for ( i = 0; i < e820.nr_map; i++ )
560 {
561 if ( e820.map[i].type != E820_RAM )
562 continue;
563 nr_pages += e820.map[i].size >> PAGE_SHIFT;
564 if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end )
565 init_boot_pages((e820.map[i].addr < initial_images_end) ?
566 initial_images_end : e820.map[i].addr,
567 e820.map[i].addr + e820.map[i].size);
568 }
570 printk("System RAM: %luMB (%lukB)\n",
571 nr_pages >> (20 - PAGE_SHIFT),
572 nr_pages << (PAGE_SHIFT - 10));
574 /* Allocate an aligned chunk of RAM for DOM0. */
575 dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20);
576 dom0_memory_end = dom0_memory_start + (opt_dom0_mem << 10);
577 if ( dom0_memory_start == 0 )
578 {
579 printk("Not enough memory for DOM0 memory reservation.\n");
580 for ( ; ; ) ;
581 }
583 init_frametable();
585 end_boot_allocator();
587 init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
588 printk("Xen heap: %luMB (%lukB)\n",
589 (xenheap_phys_end-__pa(heap_start)) >> 20,
590 (xenheap_phys_end-__pa(heap_start)) >> 10);
592 /* Initialise the slab allocator. */
593 xmem_cache_init();
594 xmem_cache_sizes_init(max_page);
596 domain_startofday();
598 start_of_day();
600 grant_table_init();
602 shadow_mode_init();
604 /* Create initial domain 0. */
605 dom0 = do_createdomain(0, 0);
606 if ( dom0 == NULL )
607 panic("Error creating domain 0\n");
609 set_bit(DF_PRIVILEGED, &dom0->flags);
611 /* Grab the DOM0 command line. Skip past the image name. */
612 cmdline = (unsigned char *)(mod[0].string ? __va(mod[0].string) : NULL);
613 if ( cmdline != NULL )
614 {
615 while ( *cmdline == ' ' ) cmdline++;
616 if ( (cmdline = strchr(cmdline, ' ')) != NULL )
617 while ( *cmdline == ' ' ) cmdline++;
618 }
620 /*
621 * We're going to setup domain0 using the module(s) that we stashed safely
622 * above our heap. The second module, if present, is an initrd ramdisk.
623 */
624 if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
625 (char *)initial_images_start,
626 mod[0].mod_end-mod[0].mod_start,
627 (mbi->mods_count == 1) ? 0 :
628 (char *)initial_images_start +
629 (mod[1].mod_start-mod[0].mod_start),
630 (mbi->mods_count == 1) ? 0 :
631 mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
632 cmdline) != 0)
633 panic("Could not set up DOM0 guest OS\n");
635 /* The stash space for the initial kernel image can now be freed up. */
636 init_domheap_pages(initial_images_start, initial_images_end);
638 scrub_heap_pages();
640 init_trace_bufs();
642 /* Give up the VGA console if DOM0 is configured to grab it. */
643 console_endboot(cmdline && strstr(cmdline, "tty0"));
645 domain_unpause_by_systemcontroller(current);
646 domain_unpause_by_systemcontroller(dom0);
647 startup_cpu_idle_loop();
648 }