debuggers.hg

view xen/arch/x86/smpboot.c @ 20931:39424ff0c91c

tboot: fix S3 issue for Intel Trusted Execution Technology.

Those unmapped pages cause page fault when MACing them and finally
cause S3 failure.

Signed-off-by: Shane Wang <shane.wang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 03 09:44:12 2010 +0000 (2010-02-03)
parents 508f457aa439
children 2fc43ea6b8de
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <xen/config.h>
37 #include <xen/init.h>
38 #include <xen/kernel.h>
39 #include <xen/mm.h>
40 #include <xen/domain.h>
41 #include <xen/sched.h>
42 #include <xen/irq.h>
43 #include <xen/delay.h>
44 #include <xen/softirq.h>
45 #include <xen/serial.h>
46 #include <xen/numa.h>
47 #include <xen/event.h>
48 #include <asm/current.h>
49 #include <asm/mc146818rtc.h>
50 #include <asm/desc.h>
51 #include <asm/div64.h>
52 #include <asm/flushtlb.h>
53 #include <asm/msr.h>
54 #include <asm/mtrr.h>
55 #include <mach_apic.h>
56 #include <mach_wakecpu.h>
57 #include <smpboot_hooks.h>
58 #include <xen/stop_machine.h>
59 #include <acpi/cpufreq/processor_perf.h>
61 #define setup_trampoline() (bootsym_phys(trampoline_realmode_entry))
63 /* Set if we find a B stepping CPU */
64 static int __devinitdata smp_b_stepping;
66 /* Package ID of each logical CPU */
67 int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
69 /* Core ID of each logical CPU */
70 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
72 /* representing HT siblings of each logical CPU */
73 DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
74 /* representing HT and core siblings of each logical CPU */
75 DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
77 /* bitmap of online cpus */
78 cpumask_t cpu_online_map __read_mostly;
79 EXPORT_SYMBOL(cpu_online_map);
81 cpumask_t cpu_callin_map;
82 cpumask_t cpu_callout_map;
83 EXPORT_SYMBOL(cpu_callout_map);
84 cpumask_t cpu_possible_map = CPU_MASK_ALL;
85 EXPORT_SYMBOL(cpu_possible_map);
86 static cpumask_t smp_commenced_mask;
88 /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
89 * is no way to resync one AP against BP. TBD: for prescott and above, we
90 * should use IA64's algorithm
91 */
92 static int __devinitdata tsc_sync_disabled;
94 /* Per CPU bogomips and other parameters */
95 struct cpuinfo_x86 cpu_data[NR_CPUS];
96 EXPORT_SYMBOL(cpu_data);
98 u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
99 { [0 ... NR_CPUS-1] = -1U };
100 EXPORT_SYMBOL(x86_cpu_to_apicid);
102 static void map_cpu_to_logical_apicid(void);
103 /* State of each CPU. */
104 DEFINE_PER_CPU(int, cpu_state) = { 0 };
106 void *stack_base[NR_CPUS];
107 DEFINE_SPINLOCK(cpu_add_remove_lock);
109 /*
110 * The bootstrap kernel entry code has set these up. Save them for
111 * a given CPU
112 */
114 static void __devinit smp_store_cpu_info(int id)
115 {
116 struct cpuinfo_x86 *c = cpu_data + id;
118 *c = boot_cpu_data;
119 if (id!=0)
120 identify_cpu(c);
121 /*
122 * Mask B, Pentium, but not Pentium MMX
123 */
124 if (c->x86_vendor == X86_VENDOR_INTEL &&
125 c->x86 == 5 &&
126 c->x86_mask >= 1 && c->x86_mask <= 4 &&
127 c->x86_model <= 3)
128 /*
129 * Remember we have B step Pentia with bugs
130 */
131 smp_b_stepping = 1;
133 /*
134 * Certain Athlons might work (for various values of 'work') in SMP
135 * but they are not certified as MP capable.
136 */
137 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
139 /* Athlon 660/661 is valid. */
140 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
141 goto valid_k7;
143 /* Duron 670 is valid */
144 if ((c->x86_model==7) && (c->x86_mask==0))
145 goto valid_k7;
147 /*
148 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
149 * It's worth noting that the A5 stepping (662) of some Athlon XP's
150 * have the MP bit set.
151 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
152 */
153 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
154 ((c->x86_model==7) && (c->x86_mask>=1)) ||
155 (c->x86_model> 7))
156 if (cpu_has_mp)
157 goto valid_k7;
159 /* If we get here, it's not a certified SMP capable AMD system. */
160 add_taint(TAINT_UNSAFE_SMP);
161 }
163 valid_k7:
164 ;
165 }
167 /*
168 * TSC synchronization.
169 *
170 * We first check whether all CPUs have their TSC's synchronized,
171 * then we print a warning if not, and always resync.
172 */
174 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
175 static atomic_t tsc_count_start = ATOMIC_INIT(0);
176 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
177 static unsigned long long tsc_values[NR_CPUS];
179 #define NR_LOOPS 5
181 static void __init synchronize_tsc_bp (void)
182 {
183 int i;
184 unsigned long long t0;
185 unsigned long long sum, avg;
186 long long delta;
187 unsigned int one_usec;
188 int buggy = 0;
190 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
191 printk("TSC is reliable, synchronization unnecessary\n");
192 return;
193 }
195 printk("checking TSC synchronization across %u CPUs: ", num_booting_cpus());
197 /* convert from kcyc/sec to cyc/usec */
198 one_usec = cpu_khz / 1000;
200 atomic_set(&tsc_start_flag, 1);
201 wmb();
203 /*
204 * We loop a few times to get a primed instruction cache,
205 * then the last pass is more or less synchronized and
206 * the BP and APs set their cycle counters to zero all at
207 * once. This reduces the chance of having random offsets
208 * between the processors, and guarantees that the maximum
209 * delay between the cycle counters is never bigger than
210 * the latency of information-passing (cachelines) between
211 * two CPUs.
212 */
213 for (i = 0; i < NR_LOOPS; i++) {
214 /*
215 * all APs synchronize but they loop on '== num_cpus'
216 */
217 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
218 mb();
219 atomic_set(&tsc_count_stop, 0);
220 wmb();
221 /*
222 * this lets the APs save their current TSC:
223 */
224 atomic_inc(&tsc_count_start);
226 rdtscll(tsc_values[smp_processor_id()]);
227 /*
228 * We clear the TSC in the last loop:
229 */
230 if (i == NR_LOOPS-1)
231 write_tsc(0L);
233 /*
234 * Wait for all APs to leave the synchronization point:
235 */
236 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
237 mb();
238 atomic_set(&tsc_count_start, 0);
239 wmb();
240 atomic_inc(&tsc_count_stop);
241 }
243 sum = 0;
244 for (i = 0; i < NR_CPUS; i++) {
245 if (cpu_isset(i, cpu_callout_map)) {
246 t0 = tsc_values[i];
247 sum += t0;
248 }
249 }
250 avg = sum;
251 do_div(avg, num_booting_cpus());
253 sum = 0;
254 for (i = 0; i < NR_CPUS; i++) {
255 if (!cpu_isset(i, cpu_callout_map))
256 continue;
257 delta = tsc_values[i] - avg;
258 if (delta < 0)
259 delta = -delta;
260 /*
261 * We report bigger than 2 microseconds clock differences.
262 */
263 if (delta > 2*one_usec) {
264 long realdelta;
265 if (!buggy) {
266 buggy = 1;
267 printk("\n");
268 }
269 realdelta = delta;
270 do_div(realdelta, one_usec);
271 if (tsc_values[i] < avg)
272 realdelta = -realdelta;
274 printk("CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
275 }
277 sum += delta;
278 }
279 if (!buggy)
280 printk("passed.\n");
281 }
283 static void __init synchronize_tsc_ap (void)
284 {
285 int i;
287 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
288 return;
290 /*
291 * Not every cpu is online at the time
292 * this gets called, so we first wait for the BP to
293 * finish SMP initialization:
294 */
295 while (!atomic_read(&tsc_start_flag)) mb();
297 for (i = 0; i < NR_LOOPS; i++) {
298 atomic_inc(&tsc_count_start);
299 while (atomic_read(&tsc_count_start) != num_booting_cpus())
300 mb();
302 rdtscll(tsc_values[smp_processor_id()]);
303 if (i == NR_LOOPS-1)
304 write_tsc(0L);
306 atomic_inc(&tsc_count_stop);
307 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
308 }
309 }
310 #undef NR_LOOPS
312 extern void calibrate_delay(void);
314 static atomic_t init_deasserted;
316 void __devinit smp_callin(void)
317 {
318 int cpuid, phys_id, i;
320 /*
321 * If waken up by an INIT in an 82489DX configuration
322 * we may get here before an INIT-deassert IPI reaches
323 * our local APIC. We have to wait for the IPI or we'll
324 * lock up on an APIC access.
325 */
326 wait_for_init_deassert(&init_deasserted);
328 if ( x2apic_enabled )
329 enable_x2apic();
331 /*
332 * (This works even if the APIC is not enabled.)
333 */
334 phys_id = get_apic_id();
335 cpuid = smp_processor_id();
336 if (cpu_isset(cpuid, cpu_callin_map)) {
337 printk("huh, phys CPU#%d, CPU#%d already present??\n",
338 phys_id, cpuid);
339 BUG();
340 }
341 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
343 /*
344 * STARTUP IPIs are fragile beasts as they might sometimes
345 * trigger some glue motherboard logic. Complete APIC bus
346 * silence for 1 second, this overestimates the time the
347 * boot CPU is spending to send the up to 2 STARTUP IPIs
348 * by a factor of two. This should be enough.
349 */
351 /*
352 * Waiting 2s total for startup
353 */
354 for (i = 0; i < 200; i++) {
355 /*
356 * Has the boot CPU finished it's STARTUP sequence?
357 */
358 if (cpu_isset(cpuid, cpu_callout_map))
359 break;
360 rep_nop();
361 mdelay(10);
362 }
364 if (!cpu_isset(cpuid, cpu_callout_map)) {
365 printk("BUG: CPU%d started up but did not get a callout!\n",
366 cpuid);
367 BUG();
368 }
370 /*
371 * the boot CPU has finished the init stage and is spinning
372 * on callin_map until we finish. We are free to set up this
373 * CPU, first the APIC. (this is probably redundant on most
374 * boards)
375 */
377 Dprintk("CALLIN, before setup_local_APIC().\n");
378 smp_callin_clear_local_apic();
379 setup_local_APIC();
380 map_cpu_to_logical_apicid();
382 #if 0
383 /*
384 * Get our bogomips.
385 */
386 calibrate_delay();
387 Dprintk("Stack at about %p\n",&cpuid);
388 #endif
390 /*
391 * Save our processor parameters
392 */
393 smp_store_cpu_info(cpuid);
395 /*
396 * Allow the master to continue.
397 */
398 cpu_set(cpuid, cpu_callin_map);
400 /*
401 * Synchronize the TSC with the BP
402 */
403 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) {
404 synchronize_tsc_ap();
405 /* No sync for same reason as above */
406 calibrate_tsc_ap();
407 }
408 }
410 static int cpucount, booting_cpu;
412 /* representing cpus for which sibling maps can be computed */
413 static cpumask_t cpu_sibling_setup_map;
415 static inline void
416 set_cpu_sibling_map(int cpu)
417 {
418 int i;
419 struct cpuinfo_x86 *c = cpu_data;
421 cpu_set(cpu, cpu_sibling_setup_map);
423 if (c[cpu].x86_num_siblings > 1) {
424 for_each_cpu_mask(i, cpu_sibling_setup_map) {
425 if (phys_proc_id[cpu] == phys_proc_id[i] &&
426 cpu_core_id[cpu] == cpu_core_id[i]) {
427 cpu_set(i, per_cpu(cpu_sibling_map, cpu));
428 cpu_set(cpu, per_cpu(cpu_sibling_map, i));
429 cpu_set(i, per_cpu(cpu_core_map, cpu));
430 cpu_set(cpu, per_cpu(cpu_core_map, i));
431 }
432 }
433 } else {
434 cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
435 }
437 if (c[cpu].x86_max_cores == 1) {
438 per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
439 c[cpu].booted_cores = 1;
440 return;
441 }
443 for_each_cpu_mask(i, cpu_sibling_setup_map) {
444 if (phys_proc_id[cpu] == phys_proc_id[i]) {
445 cpu_set(i, per_cpu(cpu_core_map, cpu));
446 cpu_set(cpu, per_cpu(cpu_core_map, i));
447 /*
448 * Does this new cpu bringup a new core?
449 */
450 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
451 /*
452 * for each core in package, increment
453 * the booted_cores for this new cpu
454 */
455 if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
456 c[cpu].booted_cores++;
457 /*
458 * increment the core count for all
459 * the other cpus in this package
460 */
461 if (i != cpu)
462 c[i].booted_cores++;
463 } else if (i != cpu && !c[cpu].booted_cores)
464 c[cpu].booted_cores = c[i].booted_cores;
465 }
466 }
467 }
469 static void construct_percpu_idt(unsigned int cpu)
470 {
471 unsigned char idt_load[10];
473 *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
474 *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
475 __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
476 }
478 /*
479 * Activate a secondary processor.
480 */
481 void __devinit start_secondary(void *unused)
482 {
483 /*
484 * Dont put anything before smp_callin(), SMP
485 * booting is too fragile that we want to limit the
486 * things done here to the most necessary things.
487 */
488 unsigned int cpu = booting_cpu;
490 set_processor_id(cpu);
491 set_current(idle_vcpu[cpu]);
492 this_cpu(curr_vcpu) = idle_vcpu[cpu];
493 if ( cpu_has_efer )
494 rdmsrl(MSR_EFER, this_cpu(efer));
495 asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) );
497 /*
498 * Just as during early bootstrap, it is convenient here to disable
499 * spinlock checking while we have IRQs disabled. This allows us to
500 * acquire IRQ-unsafe locks when it would otherwise be disallowed.
501 *
502 * It is safe because the race we are usually trying to avoid involves
503 * a group of CPUs rendezvousing in an IPI handler, where one cannot
504 * join because it is spinning with IRQs disabled waiting to acquire a
505 * lock held by another in the rendezvous group (the lock must be an
506 * IRQ-unsafe lock since the CPU took the IPI after acquiring it, and
507 * hence had IRQs enabled). This is a deadlock scenario.
508 *
509 * However, no CPU can be involved in rendezvous until it is online,
510 * hence no such group can be waiting for this CPU until it is
511 * visible in cpu_online_map. Hence such a deadlock is not possible.
512 */
513 spin_debug_disable();
515 percpu_traps_init();
517 cpu_init();
518 /*preempt_disable();*/
519 smp_callin();
520 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
521 rep_nop();
523 /*
524 * At this point, boot CPU has fully initialised the IDT. It is
525 * now safe to make ourselves a private copy.
526 */
527 construct_percpu_idt(cpu);
529 setup_secondary_APIC_clock();
530 enable_APIC_timer();
531 /*
532 * low-memory mappings have been cleared, flush them from
533 * the local TLBs too.
534 */
535 flush_tlb_local();
537 /* This must be done before setting cpu_online_map */
538 spin_debug_enable();
539 set_cpu_sibling_map(raw_smp_processor_id());
540 wmb();
542 /*
543 * We need to hold vector_lock so there the set of online cpus
544 * does not change while we are assigning vectors to cpus. Holding
545 * this lock ensures we don't half assign or remove an irq from a cpu.
546 */
547 lock_vector_lock();
548 __setup_vector_irq(smp_processor_id());
549 cpu_set(smp_processor_id(), cpu_online_map);
550 unlock_vector_lock();
552 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
554 init_percpu_time();
556 /* We can take interrupts now: we're officially "up". */
557 local_irq_enable();
558 mtrr_ap_init();
560 microcode_resume_cpu(cpu);
562 wmb();
563 startup_cpu_idle_loop();
564 }
566 extern struct {
567 void * esp;
568 unsigned short ss;
569 } stack_start;
571 u32 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
573 static void map_cpu_to_logical_apicid(void)
574 {
575 int cpu = smp_processor_id();
576 int apicid = logical_smp_processor_id();
578 cpu_2_logical_apicid[cpu] = apicid;
579 }
581 static void unmap_cpu_to_logical_apicid(int cpu)
582 {
583 cpu_2_logical_apicid[cpu] = BAD_APICID;
584 }
586 #if APIC_DEBUG
587 static inline void __inquire_remote_apic(int apicid)
588 {
589 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
590 char *names[] = { "ID", "VERSION", "SPIV" };
591 int timeout, status;
593 printk("Inquiring remote APIC #%d...\n", apicid);
595 for (i = 0; i < ARRAY_SIZE(regs); i++) {
596 printk("... APIC #%d %s: ", apicid, names[i]);
598 /*
599 * Wait for idle.
600 */
601 apic_wait_icr_idle();
603 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
605 timeout = 0;
606 do {
607 udelay(100);
608 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
609 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
611 switch (status) {
612 case APIC_ICR_RR_VALID:
613 status = apic_read(APIC_RRR);
614 printk("%08x\n", status);
615 break;
616 default:
617 printk("failed\n");
618 }
619 }
620 }
621 #endif
623 #ifdef WAKE_SECONDARY_VIA_NMI
625 static int logical_apicid_to_cpu(int logical_apicid)
626 {
627 int i;
629 for ( i = 0; i < sizeof(cpu_2_logical_apicid); i++ )
630 if ( cpu_2_logical_apicid[i] == logical_apicid )
631 break;
633 if ( i == sizeof(cpu_2_logical_apicid) );
634 i = -1; /* not found */
636 return i;
637 }
639 /*
640 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
641 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
642 * won't ... remember to clear down the APIC, etc later.
643 */
644 static int __devinit
645 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
646 {
647 unsigned long send_status = 0, accept_status = 0;
648 int timeout, maxlvt;
649 int dest_cpu;
650 u32 dest;
652 dest_cpu = logical_apicid_to_cpu(logical_apicid);
653 BUG_ON(dest_cpu == -1);
655 dest = cpu_physical_id(dest_cpu);
657 /* Boot on the stack */
658 apic_icr_write(APIC_DM_NMI | APIC_DEST_PHYSICAL, dest_cpu);
660 Dprintk("Waiting for send to finish...\n");
661 timeout = 0;
662 do {
663 Dprintk("+");
664 udelay(100);
665 if ( !x2apic_enabled )
666 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
667 else
668 send_status = 0; /* We go out of the loop directly. */
669 } while (send_status && (timeout++ < 1000));
671 /*
672 * Give the other CPU some time to accept the IPI.
673 */
674 udelay(200);
675 /*
676 * Due to the Pentium erratum 3AP.
677 */
678 maxlvt = get_maxlvt();
679 if (maxlvt > 3) {
680 apic_read_around(APIC_SPIV);
681 apic_write(APIC_ESR, 0);
682 }
683 accept_status = (apic_read(APIC_ESR) & 0xEF);
684 Dprintk("NMI sent.\n");
686 if (send_status)
687 printk("APIC never delivered???\n");
688 if (accept_status)
689 printk("APIC delivery error (%lx).\n", accept_status);
691 return (send_status | accept_status);
692 }
693 #endif /* WAKE_SECONDARY_VIA_NMI */
695 #ifdef WAKE_SECONDARY_VIA_INIT
696 static int __devinit
697 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
698 {
699 unsigned long send_status = 0, accept_status = 0;
700 int maxlvt, timeout, num_starts, j;
702 /*
703 * Be paranoid about clearing APIC errors.
704 */
705 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
706 apic_read_around(APIC_SPIV);
707 apic_write(APIC_ESR, 0);
708 apic_read(APIC_ESR);
709 }
711 Dprintk("Asserting INIT.\n");
713 /*
714 * Turn INIT on target chip via IPI
715 */
716 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
717 phys_apicid);
719 Dprintk("Waiting for send to finish...\n");
720 timeout = 0;
721 do {
722 Dprintk("+");
723 udelay(100);
724 if ( !x2apic_enabled )
725 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
726 else
727 send_status = 0; /* We go out of the loop dirctly. */
728 } while (send_status && (timeout++ < 1000));
730 mdelay(10);
732 Dprintk("Deasserting INIT.\n");
734 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
736 Dprintk("Waiting for send to finish...\n");
737 timeout = 0;
738 do {
739 Dprintk("+");
740 udelay(100);
741 if ( !x2apic_enabled )
742 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
743 else
744 send_status = 0; /* We go out of the loop dirctly. */
745 } while (send_status && (timeout++ < 1000));
747 atomic_set(&init_deasserted, 1);
749 /*
750 * Should we send STARTUP IPIs ?
751 *
752 * Determine this based on the APIC version.
753 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
754 */
755 if (APIC_INTEGRATED(apic_version[phys_apicid]))
756 num_starts = 2;
757 else
758 num_starts = 0;
760 /*
761 * Run STARTUP IPI loop.
762 */
763 Dprintk("#startup loops: %d.\n", num_starts);
765 maxlvt = get_maxlvt();
767 for (j = 1; j <= num_starts; j++) {
768 Dprintk("Sending STARTUP #%d.\n",j);
769 apic_read_around(APIC_SPIV);
770 apic_write(APIC_ESR, 0);
771 apic_read(APIC_ESR);
772 Dprintk("After apic_write.\n");
774 /*
775 * STARTUP IPI
776 * Boot on the stack
777 */
778 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid);
780 /*
781 * Give the other CPU some time to accept the IPI.
782 */
783 udelay(300);
785 Dprintk("Startup point 1.\n");
787 Dprintk("Waiting for send to finish...\n");
788 timeout = 0;
789 do {
790 Dprintk("+");
791 udelay(100);
792 send_status = (x2apic_enabled ? 0 :
793 apic_read(APIC_ICR) & APIC_ICR_BUSY);
794 } while (send_status && (timeout++ < 1000));
796 /*
797 * Give the other CPU some time to accept the IPI.
798 */
799 udelay(200);
800 /*
801 * Due to the Pentium erratum 3AP.
802 */
803 if (maxlvt > 3) {
804 apic_read_around(APIC_SPIV);
805 apic_write(APIC_ESR, 0);
806 }
807 accept_status = (apic_read(APIC_ESR) & 0xEF);
808 if (send_status || accept_status)
809 break;
810 }
811 Dprintk("After Startup.\n");
813 if (send_status)
814 printk("APIC never delivered???\n");
815 if (accept_status)
816 printk("APIC delivery error (%lx).\n", accept_status);
818 return (send_status | accept_status);
819 }
820 #endif /* WAKE_SECONDARY_VIA_INIT */
822 extern cpumask_t cpu_initialized;
823 /*
824 * Caller should hold cpu_add_remove_lock if not called when booting
825 */
826 int alloc_cpu_id(void)
827 {
828 cpumask_t tmp_map;
829 int cpu;
830 cpus_complement(tmp_map, cpu_present_map);
831 cpu = first_cpu(tmp_map);
832 if (cpu >= NR_CPUS)
833 return -ENODEV;
834 return cpu;
835 }
837 static void *prepare_idle_stack(unsigned int cpu)
838 {
839 if (!stack_base[cpu])
840 stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0);
842 return stack_base[cpu];
843 }
845 static int __devinit do_boot_cpu(int apicid, int cpu)
846 /*
847 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
848 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
849 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
850 */
851 {
852 unsigned long boot_error;
853 unsigned int order;
854 int timeout;
855 unsigned long start_eip;
856 unsigned short nmi_high = 0, nmi_low = 0;
857 struct vcpu *v;
858 struct desc_struct *gdt;
859 #ifdef __x86_64__
860 struct page_info *page;
861 #endif
863 /*
864 * Save current MTRR state in case it was changed since early boot
865 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
866 */
867 mtrr_save_state();
869 ++cpucount;
871 booting_cpu = cpu;
873 v = alloc_idle_vcpu(cpu);
874 BUG_ON(v == NULL);
876 /* start_eip had better be page-aligned! */
877 start_eip = setup_trampoline();
879 /* So we see what's up */
880 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
882 stack_start.esp = prepare_idle_stack(cpu);
884 /* Debug build: detect stack overflow by setting up a guard page. */
885 memguard_guard_stack(stack_start.esp);
887 gdt = per_cpu(gdt_table, cpu);
888 if (gdt == boot_cpu_gdt_table) {
889 order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
890 #ifdef __x86_64__
891 page = alloc_domheap_pages(NULL, order,
892 MEMF_node(cpu_to_node(cpu)));
893 per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
894 memcpy(gdt, boot_cpu_compat_gdt_table,
895 NR_RESERVED_GDT_PAGES * PAGE_SIZE);
896 gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
897 page = alloc_domheap_pages(NULL, order,
898 MEMF_node(cpu_to_node(cpu)));
899 per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
900 #else
901 per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0);
902 #endif
903 memcpy(gdt, boot_cpu_gdt_table,
904 NR_RESERVED_GDT_PAGES * PAGE_SIZE);
905 BUILD_BUG_ON(NR_CPUS > 0x10000);
906 gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
907 }
909 #ifdef __i386__
910 if (!per_cpu(doublefault_tss, cpu)) {
911 per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
912 memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE);
913 }
914 #else
915 if (!per_cpu(compat_arg_xlat, cpu))
916 setup_compat_arg_xlat(cpu, cpu_to_node[cpu]);
917 #endif
919 if (!idt_tables[cpu]) {
920 idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
921 memcpy(idt_tables[cpu], idt_table,
922 IDT_ENTRIES*sizeof(idt_entry_t));
923 }
925 /*
926 * This grunge runs the startup process for
927 * the targeted processor.
928 */
930 atomic_set(&init_deasserted, 0);
932 Dprintk("Setting warm reset code and vector.\n");
934 store_NMI_vector(&nmi_high, &nmi_low);
936 smpboot_setup_warm_reset_vector(start_eip);
938 /*
939 * Starting actual IPI sequence...
940 */
941 boot_error = wakeup_secondary_cpu(apicid, start_eip);
943 if (!boot_error) {
944 /*
945 * allow APs to start initializing.
946 */
947 Dprintk("Before Callout %d.\n", cpu);
948 cpu_set(cpu, cpu_callout_map);
949 Dprintk("After Callout %d.\n", cpu);
951 /*
952 * Wait 5s total for a response
953 */
954 for (timeout = 0; timeout < 50000; timeout++) {
955 if (cpu_isset(cpu, cpu_callin_map))
956 break; /* It has booted */
957 udelay(100);
958 }
960 if (cpu_isset(cpu, cpu_callin_map)) {
961 /* number CPUs logically, starting from 1 (BSP is 0) */
962 Dprintk("OK.\n");
963 printk("CPU%d: ", cpu);
964 print_cpu_info(&cpu_data[cpu]);
965 Dprintk("CPU has booted.\n");
966 } else {
967 boot_error = 1;
968 mb();
969 if (bootsym(trampoline_cpu_started) == 0xA5)
970 /* trampoline started but...? */
971 printk("Stuck ??\n");
972 else
973 /* trampoline code not run */
974 printk("Not responding.\n");
975 inquire_remote_apic(apicid);
976 }
977 }
979 if (boot_error) {
980 /* Try to put things back the way they were before ... */
981 unmap_cpu_to_logical_apicid(cpu);
982 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
983 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
984 cpucount--;
986 /* Mark the CPU as non-present */
987 spin_lock(&cpu_add_remove_lock);
988 x86_cpu_to_apicid[cpu] = BAD_APICID;
989 cpu_clear(cpu, cpu_present_map);
990 spin_unlock(&cpu_add_remove_lock);
991 } else {
992 }
994 /* mark "stuck" area as not stuck */
995 bootsym(trampoline_cpu_started) = 0;
996 mb();
998 return boot_error;
999 }
1001 static void idle_task_exit(void)
1003 /* Give up lazy state borrowed by this idle vcpu */
1004 __sync_lazy_execstate();
1007 void cpu_exit_clear(void)
1009 int cpu = raw_smp_processor_id();
1011 idle_task_exit();
1013 cpucount --;
1014 cpu_uninit();
1016 cpu_clear(cpu, cpu_callout_map);
1017 cpu_clear(cpu, cpu_callin_map);
1019 cpu_clear(cpu, smp_commenced_mask);
1020 unmap_cpu_to_logical_apicid(cpu);
1023 static int __cpuinit __smp_prepare_cpu(int cpu)
1025 int apicid, ret;
1027 apicid = x86_cpu_to_apicid[cpu];
1028 if (apicid == BAD_APICID) {
1029 ret = -ENODEV;
1030 goto exit;
1033 tsc_sync_disabled = 1;
1035 do_boot_cpu(apicid, cpu);
1037 tsc_sync_disabled = 0;
1039 ret = 0;
1040 exit:
1041 return ret;
1044 /*
1045 * Cycle through the processors sending APIC IPIs to boot each.
1046 */
1048 /* Where the IO area was mapped on multiquad, always 0 otherwise */
1049 void *xquad_portio;
1050 #ifdef CONFIG_X86_NUMAQ
1051 EXPORT_SYMBOL(xquad_portio);
1052 #endif
1054 static void __init smp_boot_cpus(unsigned int max_cpus)
1056 int apicid, cpu, kicked;
1057 #ifdef BOGOMIPS
1058 unsigned long bogosum = 0;
1059 #endif
1061 /*
1062 * Setup boot CPU information
1063 */
1064 smp_store_cpu_info(0); /* Final full version of the data */
1065 printk("CPU%d: ", 0);
1066 print_cpu_info(&cpu_data[0]);
1068 boot_cpu_physical_apicid = get_apic_id();
1069 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
1071 stack_base[0] = stack_start.esp;
1073 /*current_thread_info()->cpu = 0;*/
1074 /*smp_tune_scheduling();*/
1076 set_cpu_sibling_map(0);
1078 /*
1079 * If we couldn't find an SMP configuration at boot time,
1080 * get out of here now!
1081 */
1082 if (!smp_found_config && !acpi_lapic) {
1083 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1084 init_uniprocessor:
1085 phys_cpu_present_map = physid_mask_of_physid(0);
1086 if (APIC_init_uniprocessor())
1087 printk(KERN_NOTICE "Local APIC not detected."
1088 " Using dummy APIC emulation.\n");
1089 map_cpu_to_logical_apicid();
1090 cpu_set(0, per_cpu(cpu_sibling_map, 0));
1091 cpu_set(0, per_cpu(cpu_core_map, 0));
1092 return;
1095 /*
1096 * Should not be necessary because the MP table should list the boot
1097 * CPU too, but we do it for the sake of robustness anyway.
1098 * Makes no sense to do this check in clustered apic mode, so skip it
1099 */
1100 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1101 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
1102 boot_cpu_physical_apicid);
1103 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1106 /*
1107 * If we couldn't find a local APIC, then get out of here now!
1108 */
1109 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
1110 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1111 boot_cpu_physical_apicid);
1112 goto init_uniprocessor;
1115 verify_local_APIC();
1117 /*
1118 * If SMP should be disabled, then really disable it!
1119 */
1120 if (!max_cpus)
1121 goto init_uniprocessor;
1123 connect_bsp_APIC();
1124 setup_local_APIC();
1125 map_cpu_to_logical_apicid();
1128 setup_portio_remap();
1130 /*
1131 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1133 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1134 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1135 * clustered apic ID.
1136 */
1137 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1139 kicked = 1;
1141 for_each_present_cpu ( cpu )
1143 apicid = x86_cpu_to_apicid[cpu];
1145 /*
1146 * Don't even attempt to start the boot CPU!
1147 */
1148 if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
1149 continue;
1151 if (!check_apicid_present(apicid)) {
1152 dprintk(XENLOG_WARNING,
1153 "Present CPU has valid apicid\n");
1154 continue;
1157 if (max_cpus <= cpucount+1)
1158 continue;
1160 if ( do_boot_cpu(apicid, cpu))
1161 printk("CPU #%d not responding - cannot use it.\n",
1162 apicid);
1163 else
1164 ++kicked;
1167 /*
1168 * Cleanup possible dangling ends...
1169 */
1170 smpboot_restore_warm_reset_vector();
1172 #ifdef BOGOMIPS
1173 /*
1174 * Allow the user to impress friends.
1175 */
1176 Dprintk("Before bogomips.\n");
1177 for (cpu = 0; cpu < NR_CPUS; cpu++)
1178 if (cpu_isset(cpu, cpu_callout_map))
1179 bogosum += cpu_data[cpu].loops_per_jiffy;
1180 printk(KERN_INFO
1181 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1182 cpucount+1,
1183 bogosum/(500000/HZ),
1184 (bogosum/(5000/HZ))%100);
1185 #else
1186 printk("Total of %d processors activated.\n", cpucount+1);
1187 #endif
1189 Dprintk("Before bogocount - setting activated=1.\n");
1191 if (smp_b_stepping)
1192 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1194 /*
1195 * Don't taint if we are running SMP kernel on a single non-MP
1196 * approved Athlon
1197 */
1198 if (tainted & TAINT_UNSAFE_SMP) {
1199 if (cpucount)
1200 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1201 else
1202 tainted &= ~TAINT_UNSAFE_SMP;
1205 Dprintk("Boot done.\n");
1207 /*
1208 * construct cpu_sibling_map, so that we can tell sibling CPUs
1209 * efficiently.
1210 */
1211 for_each_possible_cpu(cpu) {
1212 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1213 cpus_clear(per_cpu(cpu_core_map, cpu));
1216 cpu_set(0, per_cpu(cpu_sibling_map, 0));
1217 cpu_set(0, per_cpu(cpu_core_map, 0));
1219 if (nmi_watchdog == NMI_LOCAL_APIC)
1220 check_nmi_watchdog();
1222 smpboot_setup_io_apic();
1224 setup_boot_APIC_clock();
1226 /*
1227 * Synchronize the TSC with the AP
1228 */
1229 if (cpu_has_tsc && cpucount && cpu_khz)
1230 synchronize_tsc_bp();
1231 calibrate_tsc_bp();
1234 /* These are wrappers to interface to the new boot process. Someone
1235 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1236 void __init smp_prepare_cpus(unsigned int max_cpus)
1238 smp_commenced_mask = cpumask_of_cpu(0);
1239 cpu_callin_map = cpumask_of_cpu(0);
1240 mb();
1241 smp_boot_cpus(max_cpus);
1242 mtrr_aps_sync_begin();
1245 void __devinit smp_prepare_boot_cpu(void)
1247 cpu_set(smp_processor_id(), cpu_online_map);
1248 cpu_set(smp_processor_id(), cpu_callout_map);
1249 cpu_set(smp_processor_id(), cpu_present_map);
1250 cpu_set(smp_processor_id(), cpu_possible_map);
1251 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
1254 static void
1255 remove_siblinginfo(int cpu)
1257 int sibling;
1258 struct cpuinfo_x86 *c = cpu_data;
1260 for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
1261 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1262 /*
1263 * last thread sibling in this cpu core going down
1264 */
1265 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1266 c[sibling].booted_cores--;
1269 for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
1270 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1271 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1272 cpus_clear(per_cpu(cpu_core_map, cpu));
1273 phys_proc_id[cpu] = BAD_APICID;
1274 cpu_core_id[cpu] = BAD_APICID;
1275 cpu_clear(cpu, cpu_sibling_setup_map);
1278 extern void fixup_irqs(void);
1279 int __cpu_disable(void)
1281 int cpu = smp_processor_id();
1283 /*
1284 * Perhaps use cpufreq to drop frequency, but that could go
1285 * into generic code.
1287 * We won't take down the boot processor on i386 due to some
1288 * interrupts only being able to be serviced by the BSP.
1289 * Especially so if we're not using an IOAPIC -zwane
1290 */
1291 if (cpu == 0)
1292 return -EBUSY;
1294 local_irq_disable();
1295 clear_local_APIC();
1296 /* Allow any queued timer interrupts to get serviced */
1297 local_irq_enable();
1298 mdelay(1);
1299 local_irq_disable();
1301 time_suspend();
1303 cpu_mcheck_disable();
1305 remove_siblinginfo(cpu);
1307 cpu_clear(cpu, cpu_online_map);
1308 fixup_irqs();
1309 /* It's now safe to remove this processor from the online map */
1310 cpu_clear(cpu, cpu_online_map);
1312 cpu_disable_scheduler();
1314 return 0;
1317 void __cpu_die(unsigned int cpu)
1319 /* We don't do anything here: idle task is faking death itself. */
1320 unsigned int i = 0;
1322 for (;;) {
1323 /* They ack this in play_dead by setting CPU_DEAD */
1324 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1325 printk ("CPU %u is now offline\n", cpu);
1326 return;
1328 mdelay(100);
1329 mb();
1330 process_pending_softirqs();
1331 if ((++i % 10) == 0)
1332 printk(KERN_ERR "CPU %u still not dead...\n", cpu);
1336 static int take_cpu_down(void *unused)
1338 return __cpu_disable();
1341 int cpu_down(unsigned int cpu)
1343 int err = 0;
1345 spin_lock(&cpu_add_remove_lock);
1346 if (num_online_cpus() == 1) {
1347 err = -EBUSY;
1348 goto out;
1351 /* Can not offline BSP */
1352 if (cpu == 0) {
1353 err = -EINVAL;
1354 goto out;
1357 if (!cpu_online(cpu)) {
1358 err = -EINVAL;
1359 goto out;
1362 printk("Prepare to bring CPU%d down...\n", cpu);
1364 cpufreq_del_cpu(cpu);
1366 err = stop_machine_run(take_cpu_down, NULL, cpu);
1367 if (err < 0)
1368 goto out;
1370 __cpu_die(cpu);
1372 BUG_ON(cpu_online(cpu));
1374 cpu_mcheck_distribute_cmci();
1376 out:
1377 if (!err)
1378 send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
1379 spin_unlock(&cpu_add_remove_lock);
1380 return err;
1383 int cpu_up(unsigned int cpu)
1385 int err = 0;
1387 spin_lock(&cpu_add_remove_lock);
1388 if (cpu_online(cpu)) {
1389 printk("Bring up a online cpu. Bogus!\n");
1390 err = -EBUSY;
1391 goto out;
1394 err = __cpu_up(cpu);
1395 if (err < 0)
1396 goto out;
1398 out:
1399 if (!err)
1400 send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
1401 spin_unlock(&cpu_add_remove_lock);
1402 return err;
1405 /* From kernel/power/main.c */
1406 /* This is protected by pm_sem semaphore */
1407 static cpumask_t frozen_cpus;
1409 void disable_nonboot_cpus(void)
1411 int cpu, error;
1413 error = 0;
1414 cpus_clear(frozen_cpus);
1415 printk("Freezing cpus ...\n");
1416 for_each_online_cpu(cpu) {
1417 if (cpu == 0)
1418 continue;
1419 error = cpu_down(cpu);
1420 if (!error) {
1421 cpu_set(cpu, frozen_cpus);
1422 printk("CPU%d is down\n", cpu);
1423 continue;
1425 printk("Error taking cpu %d down: %d\n", cpu, error);
1427 BUG_ON(raw_smp_processor_id() != 0);
1428 if (error)
1429 panic("cpus not sleeping");
1432 void enable_nonboot_cpus(void)
1434 int cpu, error;
1436 printk("Thawing cpus ...\n");
1437 mtrr_aps_sync_begin();
1438 for_each_cpu_mask(cpu, frozen_cpus) {
1439 error = cpu_up(cpu);
1440 if (!error) {
1441 printk("CPU%d is up\n", cpu);
1442 continue;
1444 printk("Error taking cpu %d up: %d\n", cpu, error);
1445 panic("Not enough cpus");
1447 mtrr_aps_sync_end();
1448 cpus_clear(frozen_cpus);
1450 /*
1451 * Cleanup possible dangling ends after sleep...
1452 */
1453 smpboot_restore_warm_reset_vector();
1456 int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm)
1458 int cpu = -1;
1460 #ifndef CONFIG_ACPI
1461 return -ENOSYS;
1462 #endif
1464 dprintk(XENLOG_DEBUG, "cpu_add apic_id %x acpi_id %x pxm %x\n",
1465 apic_id, acpi_id, pxm);
1467 if ( acpi_id > MAX_MADT_ENTRIES || apic_id > MAX_APICS || pxm > 256 )
1468 return -EINVAL;
1470 /* Detect if the cpu has been added before */
1471 if ( x86_acpiid_to_apicid[acpi_id] != 0xff)
1473 if (x86_acpiid_to_apicid[acpi_id] != apic_id)
1474 return -EINVAL;
1475 else
1476 return -EEXIST;
1479 if ( physid_isset(apic_id, phys_cpu_present_map) )
1480 return -EEXIST;
1482 spin_lock(&cpu_add_remove_lock);
1484 cpu = mp_register_lapic(apic_id, 1);
1486 if (cpu < 0)
1488 spin_unlock(&cpu_add_remove_lock);
1489 return cpu;
1492 x86_acpiid_to_apicid[acpi_id] = apic_id;
1494 if ( !srat_disabled() )
1496 int node;
1498 node = setup_node(pxm);
1499 if (node < 0)
1501 dprintk(XENLOG_WARNING,
1502 "Setup node failed for pxm %x\n", pxm);
1503 x86_acpiid_to_apicid[acpi_id] = 0xff;
1504 mp_unregister_lapic(apic_id, cpu);
1505 spin_unlock(&cpu_add_remove_lock);
1506 return node;
1508 apicid_to_node[apic_id] = node;
1511 srat_detect_node(cpu);
1512 numa_add_cpu(cpu);
1513 spin_unlock(&cpu_add_remove_lock);
1514 dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu);
1515 return cpu;
1519 int __devinit __cpu_up(unsigned int cpu)
1521 int ret;
1523 ret = hvm_cpu_prepare(cpu);
1524 if (ret)
1525 return ret;
1527 /*
1528 * We do warm boot only on cpus that had booted earlier
1529 * Otherwise cold boot is all handled from smp_boot_cpus().
1530 * cpu_callin_map is set during AP kickstart process. Its reset
1531 * when a cpu is taken offline from cpu_exit_clear().
1532 */
1533 if (!cpu_isset(cpu, cpu_callin_map)) {
1534 ret = __smp_prepare_cpu(cpu);
1535 smpboot_restore_warm_reset_vector();
1538 if (ret)
1539 return -EIO;
1541 /* In case one didn't come up */
1542 if (!cpu_isset(cpu, cpu_callin_map)) {
1543 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1544 local_irq_enable();
1545 return -EIO;
1548 local_irq_enable();
1549 /*per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;*/
1550 /* Unleash the CPU! */
1551 cpu_set(cpu, smp_commenced_mask);
1552 while (!cpu_isset(cpu, cpu_online_map)) {
1553 mb();
1554 process_pending_softirqs();
1557 cpufreq_add_cpu(cpu);
1558 return 0;
1562 void __init smp_cpus_done(unsigned int max_cpus)
1564 #ifdef CONFIG_X86_IO_APIC
1565 setup_ioapic_dest();
1566 #endif
1567 mtrr_save_state();
1568 mtrr_aps_sync_end();
1571 void __init smp_intr_init(void)
1573 int irq, seridx, cpu = smp_processor_id();
1575 /*
1576 * IRQ0 must be given a fixed assignment and initialized,
1577 * because it's used before the IO-APIC is set up.
1578 */
1579 irq_vector[0] = FIRST_HIPRIORITY_VECTOR;
1581 /*
1582 * Also ensure serial interrupts are high priority. We do not
1583 * want them to be blocked by unacknowledged guest-bound interrupts.
1584 */
1585 for (seridx = 0; seridx < 2; seridx++) {
1586 if ((irq = serial_irq(seridx)) < 0)
1587 continue;
1588 irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1;
1589 per_cpu(vector_irq, cpu)[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq;
1590 irq_cfg[irq].vector = FIRST_HIPRIORITY_VECTOR + seridx + 1;
1591 irq_cfg[irq].domain = (cpumask_t)CPU_MASK_ALL;
1594 /* IPI for cleanuping vectors after irq move */
1595 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
1597 /* IPI for event checking. */
1598 set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
1600 /* IPI for invalidation */
1601 set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1603 /* IPI for generic function call */
1604 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);