debuggers.hg

annotate xen/arch/x86/smpboot.c @ 3658:0ef6e8e6e85d

bitkeeper revision 1.1159.212.71 (4200f0afX_JumfbEHQex6TdFENULMQ)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into labyrinth.cl.cam.ac.uk:/auto/groups/xeno/users/iap10/xeno-clone/xen-unstable.bk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 15:24:31 2005 +0000 (2005-02-02)
parents dda5ab69e74a beb0887c54bc
children bf2c38625b39
rev   line source
kaf24@1490 1 /*
kaf24@1490 2 * x86 SMP booting functions
kaf24@1490 3 *
kaf24@1490 4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
kaf24@1490 5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
kaf24@1490 6 *
kaf24@1490 7 * Much of the core SMP work is based on previous work by Thomas Radke, to
kaf24@1490 8 * whom a great many thanks are extended.
kaf24@1490 9 *
kaf24@1490 10 * Thanks to Intel for making available several different Pentium,
kaf24@1490 11 * Pentium Pro and Pentium-II/Xeon MP machines.
kaf24@1490 12 * Original development of Linux SMP code supported by Caldera.
kaf24@1490 13 *
kaf24@1490 14 * This code is released under the GNU General Public License version 2 or
kaf24@1490 15 * later.
kaf24@1490 16 *
kaf24@1490 17 * Fixes
kaf24@1490 18 * Felix Koop : NR_CPUS used properly
kaf24@1490 19 * Jose Renau : Handle single CPU case.
kaf24@1490 20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
kaf24@1490 21 * Greg Wright : Fix for kernel stacks panic.
kaf24@1490 22 * Erich Boleyn : MP v1.4 and additional changes.
kaf24@1490 23 * Matthias Sattler : Changes for 2.1 kernel map.
kaf24@1490 24 * Michel Lespinasse : Changes for 2.1 kernel map.
kaf24@1490 25 * Michael Chastain : Change trampoline.S to gnu as.
kaf24@1490 26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
kaf24@1490 27 * Ingo Molnar : Added APIC timers, based on code
kaf24@1490 28 * from Jose Renau
kaf24@1490 29 * Ingo Molnar : various cleanups and rewrites
kaf24@1490 30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
kaf24@1490 31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
kaf24@1490 32 * Martin J. Bligh : Added support for multi-quad systems
kaf24@1490 33 */
kaf24@1490 34
kaf24@1490 35 #include <xen/config.h>
kaf24@1490 36 #include <xen/init.h>
kaf24@1490 37 #include <xen/irq.h>
kaf24@1490 38 #include <xen/mm.h>
kaf24@1490 39 #include <xen/slab.h>
kaf24@1490 40 #include <asm/flushtlb.h>
kaf24@1490 41 #include <asm/mc146818rtc.h>
kaf24@1490 42 #include <asm/smpboot.h>
kaf24@1490 43 #include <xen/smp.h>
kaf24@1490 44 #include <asm/msr.h>
kaf24@1490 45 #include <asm/system.h>
kaf24@1490 46 #include <asm/mpspec.h>
kaf24@1490 47 #include <asm/io_apic.h>
kaf24@1490 48 #include <xen/sched.h>
kaf24@1490 49 #include <xen/delay.h>
kaf24@1490 50 #include <xen/lib.h>
kaf24@1490 51
kaf24@1490 52 #ifdef CONFIG_SMP
kaf24@1490 53
kaf24@1490 54 /* Setup configured maximum number of CPUs to activate */
kaf24@1490 55 static int max_cpus = -1;
kaf24@1490 56
kaf24@1490 57 /* Total count of live CPUs */
kaf24@1490 58 int smp_num_cpus = 1;
kaf24@1490 59
cl349@2741 60 /* Number of hyperthreads per core */
cl349@2741 61 int ht_per_core = 1;
cl349@2741 62
kaf24@1490 63 /* Bitmask of currently online CPUs */
kaf24@1490 64 unsigned long cpu_online_map;
kaf24@1490 65
kaf24@1490 66 static volatile unsigned long cpu_callin_map;
kaf24@1490 67 static volatile unsigned long cpu_callout_map;
kaf24@1490 68
kaf24@1490 69 /* Per CPU bogomips and other parameters */
kaf24@3113 70 struct cpuinfo_x86 cpu_data[NR_CPUS];
kaf24@1490 71
kaf24@1490 72 /* Set when the idlers are all forked */
kaf24@1490 73 int smp_threads_ready;
kaf24@1490 74
kaf24@1490 75 /*
kaf24@1490 76 * Trampoline 80x86 program as an array.
kaf24@1490 77 */
kaf24@1490 78
kaf24@1490 79 extern unsigned char trampoline_data [];
kaf24@1490 80 extern unsigned char trampoline_end [];
kaf24@1490 81 static unsigned char *trampoline_base;
kaf24@1490 82
kaf24@1490 83 /*
kaf24@1490 84 * Currently trivial. Write the real->protected mode
kaf24@1490 85 * bootstrap into the page concerned. The caller
kaf24@1490 86 * has made sure it's suitably aligned.
kaf24@1490 87 */
kaf24@1490 88
kaf24@1490 89 static unsigned long __init setup_trampoline(void)
kaf24@1490 90 {
kaf24@1490 91 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
kaf24@1490 92 return virt_to_phys(trampoline_base);
kaf24@1490 93 }
kaf24@1490 94
kaf24@1490 95 /*
kaf24@1490 96 * We are called very early to get the low memory for the
kaf24@1490 97 * SMP bootup trampoline page.
kaf24@1490 98 */
kaf24@1490 99 void __init smp_alloc_memory(void)
kaf24@1490 100 {
kaf24@1490 101 /*
kaf24@1490 102 * Has to be in very low memory so we can execute
kaf24@1490 103 * real-mode AP code.
kaf24@1490 104 */
kaf24@1490 105 trampoline_base = __va(0x90000);
kaf24@1490 106 }
kaf24@1490 107
kaf24@1490 108 /*
kaf24@1490 109 * The bootstrap kernel entry code has set these up. Save them for
kaf24@1490 110 * a given CPU
kaf24@1490 111 */
kaf24@1490 112
kaf24@1490 113 void __init smp_store_cpu_info(int id)
kaf24@1490 114 {
kaf24@1518 115 cpu_data[id] = boot_cpu_data;
kaf24@1518 116 identify_cpu(&cpu_data[id]);
kaf24@1490 117 }
kaf24@1490 118
kaf24@1490 119 /*
kaf24@1490 120 * Architecture specific routine called by the kernel just before init is
kaf24@1490 121 * fired off. This allows the BP to have everything in order [we hope].
kaf24@1490 122 * At the end of this all the APs will hit the system scheduling and off
kaf24@1490 123 * we go. Each AP will load the system gdt's and jump through the kernel
kaf24@1490 124 * init into idle(). At this point the scheduler will one day take over
kaf24@1490 125 * and give them jobs to do. smp_callin is a standard routine
kaf24@1490 126 * we use to track CPUs as they power up.
kaf24@1490 127 */
kaf24@1490 128
kaf24@1490 129 static atomic_t smp_commenced = ATOMIC_INIT(0);
kaf24@1490 130
kaf24@1490 131 void __init smp_commence(void)
kaf24@1490 132 {
kaf24@1490 133 /*
kaf24@1490 134 * Lets the callins below out of their loop.
kaf24@1490 135 */
kaf24@1490 136 Dprintk("Setting commenced=1, go go go\n");
kaf24@1490 137
kaf24@1490 138 wmb();
kaf24@1490 139 atomic_set(&smp_commenced,1);
kaf24@1490 140 }
kaf24@1490 141
kaf24@1490 142 /*
kaf24@1490 143 * TSC synchronization.
kaf24@1490 144 *
kaf24@1490 145 * We first check wether all CPUs have their TSC's synchronized,
kaf24@1490 146 * then we print a warning if not, and always resync.
kaf24@1490 147 */
kaf24@1490 148
kaf24@1490 149 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
kaf24@1490 150 static atomic_t tsc_count_start = ATOMIC_INIT(0);
kaf24@1490 151 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
kaf24@1490 152 static unsigned long long tsc_values[NR_CPUS];
kaf24@1490 153
kaf24@1490 154 #define NR_LOOPS 5
kaf24@1490 155
kaf24@1490 156 /*
kaf24@1490 157 * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
kaf24@1490 158 * multiplication. Not terribly optimized but we need it at boot time only
kaf24@1490 159 * anyway.
kaf24@1490 160 *
kaf24@1490 161 * result == a / b
kaf24@1490 162 * == (a1 + a2*(2^32)) / b
kaf24@1490 163 * == a1/b + a2*(2^32/b)
kaf24@1490 164 * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
kaf24@1490 165 * ^---- (this multiplication can overflow)
kaf24@1490 166 */
kaf24@1490 167
kaf24@1490 168 static unsigned long long div64 (unsigned long long a, unsigned long b0)
kaf24@1490 169 {
kaf24@1490 170 unsigned int a1, a2;
kaf24@1490 171 unsigned long long res;
kaf24@1490 172
kaf24@1490 173 a1 = ((unsigned int*)&a)[0];
kaf24@1490 174 a2 = ((unsigned int*)&a)[1];
kaf24@1490 175
kaf24@1490 176 res = a1/b0 +
kaf24@1490 177 (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
kaf24@1490 178 a2 / b0 +
kaf24@1490 179 (a2 * (0xffffffff % b0)) / b0;
kaf24@1490 180
kaf24@1490 181 return res;
kaf24@1490 182 }
kaf24@1490 183
kaf24@1490 184 static void __init synchronize_tsc_bp (void)
kaf24@1490 185 {
kaf24@1490 186 int i;
kaf24@1490 187 unsigned long long t0;
kaf24@1490 188 unsigned long long sum, avg;
kaf24@1490 189 long long delta;
kaf24@1490 190 int buggy = 0;
kaf24@1490 191
kaf24@1490 192 printk("checking TSC synchronization across CPUs: ");
kaf24@1490 193
kaf24@1490 194 atomic_set(&tsc_start_flag, 1);
kaf24@1490 195 wmb();
kaf24@1490 196
kaf24@1490 197 /*
kaf24@1490 198 * We loop a few times to get a primed instruction cache,
kaf24@1490 199 * then the last pass is more or less synchronized and
kaf24@1490 200 * the BP and APs set their cycle counters to zero all at
kaf24@1490 201 * once. This reduces the chance of having random offsets
kaf24@1490 202 * between the processors, and guarantees that the maximum
kaf24@1490 203 * delay between the cycle counters is never bigger than
kaf24@1490 204 * the latency of information-passing (cachelines) between
kaf24@1490 205 * two CPUs.
kaf24@1490 206 */
kaf24@1490 207 for (i = 0; i < NR_LOOPS; i++) {
kaf24@1490 208 /*
kaf24@1490 209 * all APs synchronize but they loop on '== num_cpus'
kaf24@1490 210 */
kaf24@1490 211 while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
kaf24@1490 212 atomic_set(&tsc_count_stop, 0);
kaf24@1490 213 wmb();
kaf24@1490 214 /*
kaf24@1490 215 * this lets the APs save their current TSC:
kaf24@1490 216 */
kaf24@1490 217 atomic_inc(&tsc_count_start);
kaf24@1490 218
kaf24@1490 219 rdtscll(tsc_values[smp_processor_id()]);
kaf24@1490 220 /*
kaf24@1490 221 * We clear the TSC in the last loop:
kaf24@1490 222 */
kaf24@1490 223 if (i == NR_LOOPS-1)
kaf24@1490 224 write_tsc(0, 0);
kaf24@1490 225
kaf24@1490 226 /*
kaf24@1490 227 * Wait for all APs to leave the synchronization point:
kaf24@1490 228 */
kaf24@1490 229 while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
kaf24@1490 230 atomic_set(&tsc_count_start, 0);
kaf24@1490 231 wmb();
kaf24@1490 232 atomic_inc(&tsc_count_stop);
kaf24@1490 233 }
kaf24@1490 234
kaf24@1490 235 sum = 0;
kaf24@1490 236 for (i = 0; i < smp_num_cpus; i++) {
kaf24@1490 237 t0 = tsc_values[i];
kaf24@1490 238 sum += t0;
kaf24@1490 239 }
kaf24@1490 240 avg = div64(sum, smp_num_cpus);
kaf24@1490 241
kaf24@1490 242 sum = 0;
kaf24@1490 243 for (i = 0; i < smp_num_cpus; i++) {
kaf24@1490 244 delta = tsc_values[i] - avg;
kaf24@1490 245 if (delta < 0)
kaf24@1490 246 delta = -delta;
kaf24@1490 247 /*
kaf24@1490 248 * We report bigger than 2 microseconds clock differences.
kaf24@1490 249 */
kaf24@1490 250 if (delta > 2*ticks_per_usec) {
kaf24@1490 251 long realdelta;
kaf24@1490 252 if (!buggy) {
kaf24@1490 253 buggy = 1;
kaf24@1490 254 printk("\n");
kaf24@1490 255 }
kaf24@1490 256 realdelta = div64(delta, ticks_per_usec);
kaf24@1490 257 if (tsc_values[i] < avg)
kaf24@1490 258 realdelta = -realdelta;
kaf24@1490 259
kaf24@1490 260 printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
kaf24@1490 261 i, realdelta);
kaf24@1490 262 }
kaf24@1490 263
kaf24@1490 264 sum += delta;
kaf24@1490 265 }
kaf24@1490 266 if (!buggy)
kaf24@1490 267 printk("passed.\n");
kaf24@1490 268 }
kaf24@1490 269
kaf24@1490 270 static void __init synchronize_tsc_ap (void)
kaf24@1490 271 {
kaf24@1490 272 int i;
kaf24@1490 273
kaf24@1490 274 /*
kaf24@1490 275 * smp_num_cpus is not necessarily known at the time
kaf24@1490 276 * this gets called, so we first wait for the BP to
kaf24@1490 277 * finish SMP initialization:
kaf24@1490 278 */
kaf24@1490 279 while (!atomic_read(&tsc_start_flag)) mb();
kaf24@1490 280
kaf24@1490 281 for (i = 0; i < NR_LOOPS; i++) {
kaf24@1490 282 atomic_inc(&tsc_count_start);
kaf24@1490 283 while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
kaf24@1490 284
kaf24@1490 285 rdtscll(tsc_values[smp_processor_id()]);
kaf24@1490 286 if (i == NR_LOOPS-1)
kaf24@1490 287 write_tsc(0, 0);
kaf24@1490 288
kaf24@1490 289 atomic_inc(&tsc_count_stop);
kaf24@1490 290 while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
kaf24@1490 291 }
kaf24@1490 292 }
kaf24@1490 293 #undef NR_LOOPS
kaf24@1490 294
kaf24@1490 295 static atomic_t init_deasserted;
kaf24@1490 296
kaf24@1490 297 void __init smp_callin(void)
kaf24@1490 298 {
kaf24@1490 299 int cpuid, phys_id, i;
kaf24@1490 300
kaf24@1490 301 /*
kaf24@1490 302 * If waken up by an INIT in an 82489DX configuration
kaf24@1490 303 * we may get here before an INIT-deassert IPI reaches
kaf24@1490 304 * our local APIC. We have to wait for the IPI or we'll
kaf24@1490 305 * lock up on an APIC access.
kaf24@1490 306 */
kaf24@1490 307 while (!atomic_read(&init_deasserted));
kaf24@1490 308
kaf24@1490 309 /*
kaf24@1490 310 * (This works even if the APIC is not enabled.)
kaf24@1490 311 */
kaf24@1490 312 phys_id = GET_APIC_ID(apic_read(APIC_ID));
kaf24@1490 313 cpuid = smp_processor_id();
kaf24@1490 314 if (test_and_set_bit(cpuid, &cpu_online_map)) {
kaf24@1490 315 printk("huh, phys CPU#%d, CPU#%d already present??\n",
kaf24@1490 316 phys_id, cpuid);
kaf24@1490 317 BUG();
kaf24@1490 318 }
kaf24@1490 319 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
kaf24@1490 320
kaf24@1490 321 /*
kaf24@1490 322 * STARTUP IPIs are fragile beasts as they might sometimes
kaf24@1490 323 * trigger some glue motherboard logic. Complete APIC bus
kaf24@1490 324 * silence for 1 second, this overestimates the time the
kaf24@1490 325 * boot CPU is spending to send the up to 2 STARTUP IPIs
kaf24@1490 326 * by a factor of two. This should be enough.
kaf24@1490 327 */
kaf24@1490 328
kaf24@1490 329 for ( i = 0; i < 200; i++ )
kaf24@1490 330 {
kaf24@1490 331 if ( test_bit(cpuid, &cpu_callout_map) ) break;
kaf24@1490 332 mdelay(10);
kaf24@1490 333 }
kaf24@1490 334
kaf24@1490 335 if (!test_bit(cpuid, &cpu_callout_map)) {
kaf24@1490 336 printk("BUG: CPU%d started up but did not get a callout!\n",
kaf24@1490 337 cpuid);
kaf24@1490 338 BUG();
kaf24@1490 339 }
kaf24@1490 340
kaf24@1490 341 /*
kaf24@1490 342 * the boot CPU has finished the init stage and is spinning
kaf24@1490 343 * on callin_map until we finish. We are free to set up this
kaf24@1490 344 * CPU, first the APIC. (this is probably redundant on most
kaf24@1490 345 * boards)
kaf24@1490 346 */
kaf24@1490 347
kaf24@1490 348 Dprintk("CALLIN, before setup_local_APIC().\n");
kaf24@1490 349
kaf24@1490 350 setup_local_APIC();
kaf24@1490 351
kaf24@1490 352 __sti();
kaf24@1490 353
kaf24@1490 354 #ifdef CONFIG_MTRR
kaf24@1490 355 /*
kaf24@1490 356 * Must be done before calibration delay is computed
kaf24@1490 357 */
kaf24@1490 358 mtrr_init_secondary_cpu ();
kaf24@1490 359 #endif
kaf24@1490 360
kaf24@1490 361 Dprintk("Stack at about %p\n",&cpuid);
kaf24@1490 362
kaf24@1490 363 /*
kaf24@1490 364 * Save our processor parameters
kaf24@1490 365 */
kaf24@1490 366 smp_store_cpu_info(cpuid);
kaf24@1490 367
kaf24@1490 368 if (nmi_watchdog == NMI_LOCAL_APIC)
kaf24@1490 369 setup_apic_nmi_watchdog();
kaf24@1490 370
kaf24@1490 371 /*
kaf24@1490 372 * Allow the master to continue.
kaf24@1490 373 */
kaf24@1490 374 set_bit(cpuid, &cpu_callin_map);
kaf24@1490 375
kaf24@1490 376 /*
kaf24@1490 377 * Synchronize the TSC with the BP
kaf24@1490 378 */
kaf24@1490 379 synchronize_tsc_ap();
kaf24@1490 380 }
kaf24@1490 381
kaf24@1490 382 static int cpucount;
kaf24@1490 383
kaf24@1490 384 /*
kaf24@1490 385 * Activate a secondary processor.
kaf24@1490 386 */
kaf24@1490 387 void __init start_secondary(void)
kaf24@1490 388 {
kaf24@1490 389 unsigned int cpu = cpucount;
kaf24@1490 390 /* 6 bytes suitable for passing to LIDT instruction. */
kaf24@1490 391 unsigned char idt_load[6];
kaf24@1490 392
kaf24@1490 393 extern void cpu_init(void);
kaf24@1490 394
kaf24@1490 395 set_current(idle_task[cpu]);
kaf24@1490 396
kaf24@1490 397 /*
kaf24@1490 398 * Dont put anything before smp_callin(), SMP
kaf24@1490 399 * booting is too fragile that we want to limit the
kaf24@1490 400 * things done here to the most necessary things.
kaf24@1490 401 */
kaf24@1490 402 cpu_init();
kaf24@1490 403 smp_callin();
kaf24@1490 404
kaf24@1490 405 while (!atomic_read(&smp_commenced))
kaf24@1490 406 rep_nop();
kaf24@1490 407
kaf24@1490 408 /*
kaf24@1490 409 * At this point, boot CPU has fully initialised the IDT. It is
kaf24@1490 410 * now safe to make ourselves a private copy.
kaf24@1490 411 */
iap10@3650 412 idt_tables[cpu] = xmalloc_array(struct desc_struct, IDT_ENTRIES);
kaf24@1490 413 memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
kaf24@1490 414 *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
kaf24@1490 415 *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
kaf24@1490 416 __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
kaf24@1490 417
kaf24@1490 418 /*
kaf24@1490 419 * low-memory mappings have been cleared, flush them from the local TLBs
kaf24@1490 420 * too.
kaf24@1490 421 */
kaf24@1490 422 local_flush_tlb();
kaf24@1490 423
kaf24@1490 424 startup_cpu_idle_loop();
kaf24@1490 425
kaf24@1490 426 BUG();
kaf24@1490 427 }
kaf24@1490 428
kaf24@1490 429 extern struct {
kaf24@1490 430 unsigned long esp, ss;
kaf24@1490 431 } stack_start;
kaf24@1490 432
kaf24@1490 433 /* which physical APIC ID maps to which logical CPU number */
kaf24@1490 434 volatile int physical_apicid_2_cpu[MAX_APICID];
kaf24@1490 435 /* which logical CPU number maps to which physical APIC ID */
kaf24@1490 436 volatile int cpu_2_physical_apicid[NR_CPUS];
kaf24@1490 437
kaf24@1490 438 /* which logical APIC ID maps to which logical CPU number */
kaf24@1490 439 volatile int logical_apicid_2_cpu[MAX_APICID];
kaf24@1490 440 /* which logical CPU number maps to which logical APIC ID */
kaf24@1490 441 volatile int cpu_2_logical_apicid[NR_CPUS];
kaf24@1490 442
kaf24@1490 443 static inline void init_cpu_to_apicid(void)
kaf24@1490 444 /* Initialize all maps between cpu number and apicids */
kaf24@1490 445 {
kaf24@1490 446 int apicid, cpu;
kaf24@1490 447
kaf24@1490 448 for (apicid = 0; apicid < MAX_APICID; apicid++) {
kaf24@1490 449 physical_apicid_2_cpu[apicid] = -1;
kaf24@1490 450 logical_apicid_2_cpu[apicid] = -1;
kaf24@1490 451 }
kaf24@1490 452 for (cpu = 0; cpu < NR_CPUS; cpu++) {
kaf24@1490 453 cpu_2_physical_apicid[cpu] = -1;
kaf24@1490 454 cpu_2_logical_apicid[cpu] = -1;
kaf24@1490 455 }
kaf24@1490 456 }
kaf24@1490 457
kaf24@1490 458 static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
kaf24@1490 459 /*
kaf24@1490 460 * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
kaf24@1490 461 * else physical apic ids
kaf24@1490 462 */
kaf24@1490 463 {
kaf24@1490 464 physical_apicid_2_cpu[apicid] = cpu;
kaf24@1490 465 cpu_2_physical_apicid[cpu] = apicid;
kaf24@1490 466 }
kaf24@1490 467
kaf24@1490 468 static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
kaf24@1490 469 /*
kaf24@1490 470 * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
kaf24@1490 471 * else physical apic ids
kaf24@1490 472 */
kaf24@1490 473 {
kaf24@1490 474 physical_apicid_2_cpu[apicid] = -1;
kaf24@1490 475 cpu_2_physical_apicid[cpu] = -1;
kaf24@1490 476 }
kaf24@1490 477
kaf24@1490 478 #if APIC_DEBUG
kaf24@1490 479 static inline void inquire_remote_apic(int apicid)
kaf24@1490 480 {
kaf24@1490 481 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
kaf24@1490 482 char *names[] = { "ID", "VERSION", "SPIV" };
kaf24@1490 483 int timeout, status;
kaf24@1490 484
kaf24@1490 485 printk("Inquiring remote APIC #%d...\n", apicid);
kaf24@1490 486
kaf24@1490 487 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
kaf24@1490 488 printk("... APIC #%d %s: ", apicid, names[i]);
kaf24@1490 489
kaf24@1490 490 /*
kaf24@1490 491 * Wait for idle.
kaf24@1490 492 */
kaf24@1490 493 apic_wait_icr_idle();
kaf24@1490 494
kaf24@1490 495 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
kaf24@1490 496 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
kaf24@1490 497
kaf24@1490 498 timeout = 0;
kaf24@1490 499 do {
kaf24@1490 500 udelay(100);
kaf24@1490 501 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
kaf24@1490 502 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
kaf24@1490 503
kaf24@1490 504 switch (status) {
kaf24@1490 505 case APIC_ICR_RR_VALID:
kaf24@1490 506 status = apic_read(APIC_RRR);
kaf24@1490 507 printk("%08x\n", status);
kaf24@1490 508 break;
kaf24@1490 509 default:
kaf24@1490 510 printk("failed\n");
kaf24@1490 511 }
kaf24@1490 512 }
kaf24@1490 513 }
kaf24@1490 514 #endif
kaf24@1490 515
kaf24@1490 516
kaf24@1490 517 static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
kaf24@1490 518 {
kaf24@1490 519 unsigned long send_status = 0, accept_status = 0;
kaf24@1490 520 int maxlvt, timeout, num_starts, j;
kaf24@1490 521
kaf24@1490 522 Dprintk("Asserting INIT.\n");
kaf24@1490 523
kaf24@1490 524 /*
kaf24@1490 525 * Turn INIT on target chip
kaf24@1490 526 */
kaf24@1490 527 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
kaf24@1490 528
kaf24@1490 529 /*
kaf24@1490 530 * Send IPI
kaf24@1490 531 */
kaf24@1490 532 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
kaf24@1490 533 | APIC_DM_INIT);
kaf24@1490 534
kaf24@1490 535 Dprintk("Waiting for send to finish...\n");
kaf24@1490 536 timeout = 0;
kaf24@1490 537 do {
kaf24@1490 538 Dprintk("+");
kaf24@1490 539 udelay(100);
kaf24@1490 540 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
kaf24@1490 541 } while (send_status && (timeout++ < 1000));
kaf24@1490 542
kaf24@1490 543 mdelay(10);
kaf24@1490 544
kaf24@1490 545 Dprintk("Deasserting INIT.\n");
kaf24@1490 546
kaf24@1490 547 /* Target chip */
kaf24@1490 548 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
kaf24@1490 549
kaf24@1490 550 /* Send IPI */
kaf24@1490 551 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
kaf24@1490 552
kaf24@1490 553 Dprintk("Waiting for send to finish...\n");
kaf24@1490 554 timeout = 0;
kaf24@1490 555 do {
kaf24@1490 556 Dprintk("+");
kaf24@1490 557 udelay(100);
kaf24@1490 558 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
kaf24@1490 559 } while (send_status && (timeout++ < 1000));
kaf24@1490 560
kaf24@1490 561 atomic_set(&init_deasserted, 1);
kaf24@1490 562
kaf24@1490 563 /*
kaf24@1490 564 * Should we send STARTUP IPIs ?
kaf24@1490 565 *
kaf24@1490 566 * Determine this based on the APIC version.
kaf24@1490 567 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
kaf24@1490 568 */
kaf24@1490 569 if (APIC_INTEGRATED(apic_version[phys_apicid]))
kaf24@1490 570 num_starts = 2;
kaf24@1490 571 else
kaf24@1490 572 num_starts = 0;
kaf24@1490 573
kaf24@1490 574 /*
kaf24@1490 575 * Run STARTUP IPI loop.
kaf24@1490 576 */
kaf24@1490 577 Dprintk("#startup loops: %d.\n", num_starts);
kaf24@1490 578
kaf24@1490 579 maxlvt = get_maxlvt();
kaf24@1490 580
kaf24@1490 581 for (j = 1; j <= num_starts; j++) {
kaf24@1490 582 Dprintk("Sending STARTUP #%d.\n",j);
kaf24@1490 583
kaf24@1490 584 apic_read_around(APIC_SPIV);
kaf24@1490 585 apic_write(APIC_ESR, 0);
kaf24@1490 586 apic_read(APIC_ESR);
kaf24@1490 587 Dprintk("After apic_write.\n");
kaf24@1490 588
kaf24@1490 589 /*
kaf24@1490 590 * STARTUP IPI
kaf24@1490 591 */
kaf24@1490 592
kaf24@1490 593 /* Target chip */
kaf24@1490 594 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
kaf24@1490 595
kaf24@1490 596 /* Boot on the stack */
kaf24@1490 597 /* Kick the second */
kaf24@1490 598 apic_write_around(APIC_ICR, APIC_DM_STARTUP
kaf24@1490 599 | (start_eip >> 12));
kaf24@1490 600
kaf24@1490 601 /*
kaf24@1490 602 * Give the other CPU some time to accept the IPI.
kaf24@1490 603 */
kaf24@1490 604 udelay(300);
kaf24@1490 605
kaf24@1490 606 Dprintk("Startup point 1.\n");
kaf24@1490 607
kaf24@1490 608 Dprintk("Waiting for send to finish...\n");
kaf24@1490 609 timeout = 0;
kaf24@1490 610 do {
kaf24@1490 611 Dprintk("+");
kaf24@1490 612 udelay(100);
kaf24@1490 613 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
kaf24@1490 614 } while (send_status && (timeout++ < 1000));
kaf24@1490 615
kaf24@1490 616 /*
kaf24@1490 617 * Give the other CPU some time to accept the IPI.
kaf24@1490 618 */
kaf24@1490 619 udelay(200);
kaf24@1490 620 /*
kaf24@1490 621 * Due to the Pentium erratum 3AP.
kaf24@1490 622 */
kaf24@1490 623 if (maxlvt > 3) {
kaf24@1490 624 apic_read_around(APIC_SPIV);
kaf24@1490 625 apic_write(APIC_ESR, 0);
kaf24@1490 626 }
kaf24@1490 627 accept_status = (apic_read(APIC_ESR) & 0xEF);
kaf24@1490 628 if (send_status || accept_status)
kaf24@1490 629 break;
kaf24@1490 630 }
kaf24@1490 631 Dprintk("After Startup.\n");
kaf24@1490 632
kaf24@1490 633 if (send_status)
kaf24@1490 634 printk("APIC never delivered???\n");
kaf24@1490 635 if (accept_status)
kaf24@1490 636 printk("APIC delivery error (%lx).\n", accept_status);
kaf24@1490 637
kaf24@1490 638 return (send_status | accept_status);
kaf24@1490 639 }
kaf24@1490 640
kaf24@1490 641 extern unsigned long cpu_initialized;
kaf24@1490 642
kaf24@1490 643 static void __init do_boot_cpu (int apicid)
kaf24@1490 644 /*
kaf24@1490 645 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
kaf24@1490 646 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
kaf24@1490 647 */
kaf24@1490 648 {
kaf24@1543 649 struct domain *idle;
cl349@2957 650 struct exec_domain *ed;
kaf24@1490 651 unsigned long boot_error = 0;
kaf24@1490 652 int timeout, cpu;
kaf24@1490 653 unsigned long start_eip, stack;
kaf24@1490 654
kaf24@1490 655 cpu = ++cpucount;
kaf24@1490 656
kaf24@2217 657 if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
kaf24@1490 658 panic("failed 'createdomain' for CPU %d", cpu);
kaf24@1490 659
cl349@2957 660 ed = idle->exec_domain[0];
kaf24@1490 661
cl349@2957 662 set_bit(DF_IDLETASK, &idle->d_flags);
cl349@2957 663
cl349@2957 664 ed->mm.pagetable = mk_pagetable(__pa(idle_pg_table));
kaf24@1490 665
kaf24@1490 666 map_cpu_to_boot_apicid(cpu, apicid);
kaf24@1490 667
cl349@2957 668 idle_task[cpu] = ed;
kaf24@1490 669
kaf24@1490 670 /* start_eip had better be page-aligned! */
kaf24@1490 671 start_eip = setup_trampoline();
kaf24@1490 672
kaf24@1490 673 /* So we see what's up. */
kaf24@1490 674 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
kaf24@1490 675
kaf24@1958 676 stack = __pa(alloc_xenheap_pages(1));
kaf24@1490 677 stack_start.esp = stack + STACK_SIZE - STACK_RESERVED;
kaf24@1490 678
kaf24@1490 679 /* Debug build: detect stack overflow by setting up a guard page. */
kaf24@1490 680 memguard_guard_range(__va(stack), PAGE_SIZE);
kaf24@1490 681
kaf24@1490 682 /*
kaf24@1490 683 * This grunge runs the startup process for
kaf24@1490 684 * the targeted processor.
kaf24@1490 685 */
kaf24@1490 686
kaf24@1490 687 atomic_set(&init_deasserted, 0);
kaf24@1490 688
kaf24@1490 689 Dprintk("Setting warm reset code and vector.\n");
kaf24@1490 690
kaf24@1490 691 CMOS_WRITE(0xa, 0xf);
kaf24@1490 692 local_flush_tlb();
kaf24@1490 693 Dprintk("1.\n");
kaf24@1490 694 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
kaf24@1490 695 Dprintk("2.\n");
kaf24@1490 696 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
kaf24@1490 697 Dprintk("3.\n");
kaf24@1490 698
kaf24@1490 699 /*
kaf24@1490 700 * Be paranoid about clearing APIC errors.
kaf24@1490 701 */
kaf24@1490 702 if ( APIC_INTEGRATED(apic_version[apicid]) )
kaf24@1490 703 {
kaf24@1490 704 apic_read_around(APIC_SPIV);
kaf24@1490 705 apic_write(APIC_ESR, 0);
kaf24@1490 706 apic_read(APIC_ESR);
kaf24@1490 707 }
kaf24@1490 708
kaf24@1490 709 /*
kaf24@1490 710 * Status is now clean
kaf24@1490 711 */
kaf24@1490 712 boot_error = 0;
kaf24@1490 713
kaf24@1490 714 /*
kaf24@1490 715 * Starting actual IPI sequence...
kaf24@1490 716 */
kaf24@1490 717
kaf24@1490 718 boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
kaf24@1490 719
kaf24@1490 720 if (!boot_error) {
kaf24@1490 721 /*
kaf24@1490 722 * allow APs to start initializing.
kaf24@1490 723 */
kaf24@1490 724 Dprintk("Before Callout %d.\n", cpu);
kaf24@1490 725 set_bit(cpu, &cpu_callout_map);
kaf24@1490 726 Dprintk("After Callout %d.\n", cpu);
kaf24@1490 727
kaf24@1490 728 /*
kaf24@1490 729 * Wait 5s total for a response
kaf24@1490 730 */
kaf24@1490 731 for (timeout = 0; timeout < 50000; timeout++) {
kaf24@1490 732 if (test_bit(cpu, &cpu_callin_map))
kaf24@1490 733 break; /* It has booted */
kaf24@1490 734 udelay(100);
kaf24@1490 735 }
kaf24@1490 736
kaf24@1490 737 if (test_bit(cpu, &cpu_callin_map)) {
kaf24@1490 738 /* number CPUs logically, starting from 1 (BSP is 0) */
kaf24@1490 739 printk("CPU%d has booted.\n", cpu);
kaf24@1490 740 } else {
kaf24@1490 741 boot_error= 1;
kaf24@1490 742 if (*((volatile unsigned long *)phys_to_virt(start_eip))
kaf24@1490 743 == 0xA5A5A5A5)
kaf24@1490 744 /* trampoline started but...? */
kaf24@1490 745 printk("Stuck ??\n");
kaf24@1490 746 else
kaf24@1490 747 /* trampoline code not run */
kaf24@1490 748 printk("Not responding.\n");
kaf24@1490 749 #if APIC_DEBUG
kaf24@1490 750 inquire_remote_apic(apicid);
kaf24@1490 751 #endif
kaf24@1490 752 }
kaf24@1490 753 }
kaf24@1490 754 if (boot_error) {
kaf24@1490 755 /* Try to put things back the way they were before ... */
kaf24@1490 756 unmap_cpu_to_boot_apicid(cpu, apicid);
kaf24@1490 757 clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
kaf24@1490 758 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
kaf24@1490 759 clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
kaf24@1490 760 cpucount--;
kaf24@1490 761 }
kaf24@1490 762 }
kaf24@1490 763
kaf24@1490 764
kaf24@1490 765 /*
kaf24@1490 766 * Cycle through the processors sending APIC IPIs to boot each.
kaf24@1490 767 */
kaf24@1490 768
kaf24@1490 769 static int boot_cpu_logical_apicid;
kaf24@1490 770 /* Where the IO area was mapped on multiquad, always 0 otherwise */
kaf24@1490 771 void *xquad_portio = NULL;
kaf24@1490 772
kaf24@1490 773 void __init smp_boot_cpus(void)
kaf24@1490 774 {
kaf24@1490 775 int apicid, bit;
kaf24@1490 776
kaf24@1490 777 #ifdef CONFIG_MTRR
kaf24@1490 778 /* Must be done before other processors booted */
kaf24@1490 779 mtrr_init_boot_cpu ();
kaf24@1490 780 #endif
kaf24@1490 781 /* Initialize the logical to physical CPU number mapping */
kaf24@1490 782 init_cpu_to_apicid();
kaf24@1490 783
kaf24@1490 784 /*
kaf24@1490 785 * Setup boot CPU information
kaf24@1490 786 */
kaf24@1490 787 smp_store_cpu_info(0); /* Final full version of the data */
kaf24@1490 788 printk("CPU%d booted\n", 0);
kaf24@1490 789
kaf24@1490 790 /*
kaf24@1490 791 * We have the boot CPU online for sure.
kaf24@1490 792 */
kaf24@1490 793 set_bit(0, &cpu_online_map);
kaf24@1490 794 boot_cpu_logical_apicid = logical_smp_processor_id();
kaf24@1490 795 map_cpu_to_boot_apicid(0, boot_cpu_apicid);
kaf24@1490 796
kaf24@1490 797 /*
kaf24@1490 798 * If we couldnt find an SMP configuration at boot time,
kaf24@1490 799 * get out of here now!
kaf24@1490 800 */
kaf24@1490 801 if (!smp_found_config) {
kaf24@1490 802 printk("SMP motherboard not detected.\n");
kaf24@1490 803 io_apic_irqs = 0;
kaf24@1490 804 cpu_online_map = phys_cpu_present_map = 1;
kaf24@1490 805 smp_num_cpus = 1;
kaf24@1490 806 if (APIC_init_uniprocessor())
kaf24@1490 807 printk("Local APIC not detected."
kaf24@1490 808 " Using dummy APIC emulation.\n");
kaf24@1490 809 goto smp_done;
kaf24@1490 810 }
kaf24@1490 811
kaf24@1490 812 /*
kaf24@1490 813 * Should not be necessary because the MP table should list the boot
kaf24@1490 814 * CPU too, but we do it for the sake of robustness anyway.
kaf24@1490 815 */
kaf24@1490 816 if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
kaf24@1490 817 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
kaf24@1490 818 boot_cpu_physical_apicid);
kaf24@1490 819 phys_cpu_present_map |= (1 << hard_smp_processor_id());
kaf24@1490 820 }
kaf24@1490 821
kaf24@1490 822 /*
kaf24@1490 823 * If we couldn't find a local APIC, then get out of here now!
kaf24@1490 824 */
kaf24@1490 825 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
kaf24@1490 826 !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
kaf24@1490 827 printk("BIOS bug, local APIC #%d not detected!...\n",
kaf24@1490 828 boot_cpu_physical_apicid);
kaf24@1490 829 printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
kaf24@1490 830 io_apic_irqs = 0;
kaf24@1490 831 cpu_online_map = phys_cpu_present_map = 1;
kaf24@1490 832 smp_num_cpus = 1;
kaf24@1490 833 goto smp_done;
kaf24@1490 834 }
kaf24@1490 835
kaf24@1490 836 verify_local_APIC();
kaf24@1490 837
kaf24@1490 838 /*
kaf24@1490 839 * If SMP should be disabled, then really disable it!
kaf24@1490 840 */
kaf24@1490 841 if (!max_cpus) {
kaf24@1490 842 smp_found_config = 0;
kaf24@1490 843 printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
kaf24@1490 844 io_apic_irqs = 0;
kaf24@1490 845 cpu_online_map = phys_cpu_present_map = 1;
kaf24@1490 846 smp_num_cpus = 1;
kaf24@1490 847 goto smp_done;
kaf24@1490 848 }
kaf24@1490 849
kaf24@1490 850 connect_bsp_APIC();
kaf24@1490 851 setup_local_APIC();
kaf24@1490 852
kaf24@1490 853 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
kaf24@1490 854 BUG();
kaf24@1490 855
kaf24@1490 856 /*
kaf24@1490 857 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
kaf24@1490 858 *
kaf24@1490 859 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
kaf24@1490 860 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
kaf24@1490 861 * clustered apic ID.
kaf24@1490 862 */
kaf24@1490 863 Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
kaf24@1490 864
kaf24@1490 865 for (bit = 0; bit < NR_CPUS; bit++) {
kaf24@1490 866 apicid = cpu_present_to_apicid(bit);
kaf24@1490 867 /*
kaf24@1490 868 * Don't even attempt to start the boot CPU!
kaf24@1490 869 */
kaf24@1490 870 if (apicid == boot_cpu_apicid)
kaf24@1490 871 continue;
kaf24@1490 872
cl349@2741 873 /*
cl349@2741 874 * Don't start hyperthreads if option noht requested.
cl349@2741 875 */
cl349@2741 876 if (opt_noht && (apicid & (ht_per_core - 1)))
cl349@2741 877 continue;
cl349@2741 878
kaf24@1490 879 if (!(phys_cpu_present_map & (1 << bit)))
kaf24@1490 880 continue;
kaf24@1490 881 if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
kaf24@1490 882 continue;
kaf24@1490 883
kaf24@1490 884 do_boot_cpu(apicid);
kaf24@1490 885
kaf24@1490 886 /*
kaf24@1490 887 * Make sure we unmap all failed CPUs
kaf24@1490 888 */
kaf24@1490 889 if ((boot_apicid_to_cpu(apicid) == -1) &&
kaf24@1490 890 (phys_cpu_present_map & (1 << bit)))
kaf24@1490 891 printk("CPU #%d not responding - cannot use it.\n",
kaf24@1490 892 apicid);
kaf24@1490 893 }
kaf24@1490 894
kaf24@1490 895 /*
kaf24@1490 896 * Cleanup possible dangling ends...
kaf24@1490 897 */
kaf24@1490 898 /*
kaf24@1490 899 * Install writable page 0 entry to set BIOS data area.
kaf24@1490 900 */
kaf24@1490 901 local_flush_tlb();
kaf24@1490 902
kaf24@1490 903 /*
kaf24@1490 904 * Paranoid: Set warm reset code and vector here back
kaf24@1490 905 * to default values.
kaf24@1490 906 */
kaf24@1490 907 CMOS_WRITE(0, 0xf);
kaf24@1490 908
kaf24@1490 909 *((volatile long *) phys_to_virt(0x467)) = 0;
kaf24@1490 910
kaf24@1490 911 if (!cpucount) {
kaf24@1490 912 printk("Error: only one processor found.\n");
kaf24@1490 913 } else {
kaf24@1490 914 printk("Total of %d processors activated.\n", cpucount+1);
kaf24@1490 915 }
kaf24@1490 916 smp_num_cpus = cpucount + 1;
kaf24@1490 917
kaf24@1490 918 Dprintk("Boot done.\n");
kaf24@1490 919
kaf24@1490 920 /*
kaf24@1490 921 * Here we can be sure that there is an IO-APIC in the system. Let's
kaf24@1490 922 * go and set it up:
kaf24@1490 923 */
kaf24@1490 924 if ( nr_ioapics ) setup_IO_APIC();
kaf24@1490 925
kaf24@1490 926 /* Set up all local APIC timers in the system. */
kaf24@1490 927 setup_APIC_clocks();
kaf24@1490 928
kaf24@1490 929 /* Synchronize the TSC with the AP(s). */
kaf24@1490 930 if ( cpucount ) synchronize_tsc_bp();
kaf24@1490 931
kaf24@1490 932 smp_done:
kaf24@1490 933 ;
kaf24@1490 934 }
kaf24@1490 935
kaf24@1490 936 #endif /* CONFIG_SMP */