/root/src/xen/xen/arch/x86/smpboot.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * x86 SMP booting functions |
3 | | * |
4 | | * This inherits a great deal from Linux's SMP boot code: |
5 | | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> |
6 | | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> |
7 | | * |
8 | | * This program is free software; you can redistribute it and/or modify |
9 | | * it under the terms of the GNU General Public License as published by |
10 | | * the Free Software Foundation; either version 2 of the License, or |
11 | | * (at your option) any later version. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU General Public License |
19 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <xen/init.h> |
23 | | #include <xen/kernel.h> |
24 | | #include <xen/mm.h> |
25 | | #include <xen/domain.h> |
26 | | #include <xen/domain_page.h> |
27 | | #include <xen/sched.h> |
28 | | #include <xen/sched-if.h> |
29 | | #include <xen/irq.h> |
30 | | #include <xen/delay.h> |
31 | | #include <xen/softirq.h> |
32 | | #include <xen/tasklet.h> |
33 | | #include <xen/serial.h> |
34 | | #include <xen/numa.h> |
35 | | #include <xen/cpu.h> |
36 | | #include <asm/current.h> |
37 | | #include <asm/mc146818rtc.h> |
38 | | #include <asm/desc.h> |
39 | | #include <asm/div64.h> |
40 | | #include <asm/flushtlb.h> |
41 | | #include <asm/msr.h> |
42 | | #include <asm/mtrr.h> |
43 | | #include <asm/time.h> |
44 | | #include <asm/tboot.h> |
45 | | #include <mach_apic.h> |
46 | | #include <mach_wakecpu.h> |
47 | | #include <smpboot_hooks.h> |
48 | | |
49 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
50 | | #undef mfn_to_page |
51 | 11 | #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn)) |
52 | | #undef page_to_mfn |
53 | 13 | #define page_to_mfn(pg) _mfn(__page_to_mfn(pg)) |
54 | | |
55 | 11 | #define setup_trampoline() (bootsym_phys(trampoline_realmode_entry)) |
56 | | |
57 | | unsigned long __read_mostly trampoline_phys; |
58 | | |
59 | | /* representing HT siblings of each logical CPU */ |
60 | | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_mask); |
61 | | /* representing HT and core siblings of each logical CPU */ |
62 | | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask); |
63 | | |
64 | | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, scratch_cpumask); |
65 | | static cpumask_t scratch_cpu0mask; |
66 | | |
67 | | cpumask_t cpu_online_map __read_mostly; |
68 | | EXPORT_SYMBOL(cpu_online_map); |
69 | | |
70 | | unsigned int __read_mostly nr_sockets; |
71 | | cpumask_t **__read_mostly socket_cpumask; |
72 | | static cpumask_t *secondary_socket_cpumask; |
73 | | |
74 | | struct cpuinfo_x86 cpu_data[NR_CPUS]; |
75 | | |
76 | | u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly = |
77 | | { [0 ... NR_CPUS-1] = BAD_APICID }; |
78 | | |
79 | | static int cpu_error; |
80 | | static enum cpu_state { |
81 | | CPU_STATE_DYING, /* slave -> master: I am dying */ |
82 | | CPU_STATE_DEAD, /* slave -> master: I am completely dead */ |
83 | | CPU_STATE_INIT, /* master -> slave: Early bringup phase 1 */ |
84 | | CPU_STATE_CALLOUT, /* master -> slave: Early bringup phase 2 */ |
85 | | CPU_STATE_CALLIN, /* slave -> master: Completed phase 2 */ |
86 | | CPU_STATE_ONLINE /* master -> slave: Go fully online now. */ |
87 | | } cpu_state; |
88 | 44 | #define set_cpu_state(state) do { mb(); cpu_state = (state); } while (0) |
89 | | |
90 | | void *stack_base[NR_CPUS]; |
91 | | |
92 | | void initialize_cpu_data(unsigned int cpu) |
93 | 13 | { |
94 | 13 | cpu_data[cpu] = boot_cpu_data; |
95 | 13 | } |
96 | | |
97 | | static void smp_store_cpu_info(int id) |
98 | 11 | { |
99 | 11 | unsigned int socket; |
100 | 11 | |
101 | 11 | identify_cpu(&cpu_data[id]); |
102 | 11 | |
103 | 11 | socket = cpu_to_socket(id); |
104 | 11 | if ( !socket_cpumask[socket] ) |
105 | 0 | { |
106 | 0 | socket_cpumask[socket] = secondary_socket_cpumask; |
107 | 0 | secondary_socket_cpumask = NULL; |
108 | 0 | } |
109 | 11 | } |
110 | | |
111 | | /* |
112 | | * TSC's upper 32 bits can't be written in earlier CPUs (before |
113 | | * Prescott), there is no way to resync one AP against BP. |
114 | | */ |
115 | | bool disable_tsc_sync; |
116 | | |
117 | | static atomic_t tsc_count; |
118 | | static uint64_t tsc_value; |
119 | | static cpumask_t tsc_sync_cpu_mask; |
120 | | |
121 | | static void synchronize_tsc_master(unsigned int slave) |
122 | 11 | { |
123 | 11 | unsigned int i; |
124 | 11 | |
125 | 11 | if ( disable_tsc_sync ) |
126 | 0 | return; |
127 | 11 | |
128 | 11 | if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) && |
129 | 11 | !cpumask_test_cpu(slave, &tsc_sync_cpu_mask) ) |
130 | 11 | return; |
131 | 11 | |
132 | 0 | for ( i = 1; i <= 5; i++ ) |
133 | 0 | { |
134 | 0 | tsc_value = rdtsc_ordered(); |
135 | 0 | wmb(); |
136 | 0 | atomic_inc(&tsc_count); |
137 | 0 | while ( atomic_read(&tsc_count) != (i<<1) ) |
138 | 0 | cpu_relax(); |
139 | 0 | } |
140 | 0 |
|
141 | 0 | atomic_set(&tsc_count, 0); |
142 | 0 | cpumask_clear_cpu(slave, &tsc_sync_cpu_mask); |
143 | 0 | } |
144 | | |
145 | | static void synchronize_tsc_slave(unsigned int slave) |
146 | 11 | { |
147 | 11 | unsigned int i; |
148 | 11 | |
149 | 11 | if ( disable_tsc_sync ) |
150 | 0 | return; |
151 | 11 | |
152 | 11 | if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) && |
153 | 11 | !cpumask_test_cpu(slave, &tsc_sync_cpu_mask) ) |
154 | 11 | return; |
155 | 11 | |
156 | 0 | for ( i = 1; i <= 5; i++ ) |
157 | 0 | { |
158 | 0 | while ( atomic_read(&tsc_count) != ((i<<1)-1) ) |
159 | 0 | cpu_relax(); |
160 | 0 | rmb(); |
161 | 0 | /* |
162 | 0 | * If a CPU has been physically hotplugged, we may as well write |
163 | 0 | * to its TSC in spite of X86_FEATURE_TSC_RELIABLE. The platform does |
164 | 0 | * not sync up a new CPU's TSC for us. |
165 | 0 | */ |
166 | 0 | __write_tsc(tsc_value); |
167 | 0 | atomic_inc(&tsc_count); |
168 | 0 | } |
169 | 0 | } |
170 | | |
171 | | static void smp_callin(void) |
172 | 11 | { |
173 | 11 | unsigned int cpu = smp_processor_id(); |
174 | 11 | int i, rc; |
175 | 11 | |
176 | 11 | /* Wait 2s total for startup. */ |
177 | 11 | Dprintk("Waiting for CALLOUT.\n"); |
178 | 11 | for ( i = 0; cpu_state != CPU_STATE_CALLOUT; i++ ) |
179 | 0 | { |
180 | 0 | BUG_ON(i >= 200); |
181 | 0 | cpu_relax(); |
182 | 0 | mdelay(10); |
183 | 0 | } |
184 | 11 | |
185 | 11 | /* |
186 | 11 | * The boot CPU has finished the init stage and is spinning on cpu_state |
187 | 11 | * update until we finish. We are free to set up this CPU: first the APIC. |
188 | 11 | */ |
189 | 11 | Dprintk("CALLIN, before setup_local_APIC().\n"); |
190 | 11 | x2apic_ap_setup(); |
191 | 11 | setup_local_APIC(); |
192 | 11 | |
193 | 11 | /* Save our processor parameters. */ |
194 | 11 | smp_store_cpu_info(cpu); |
195 | 11 | |
196 | 11 | if ( (rc = hvm_cpu_up()) != 0 ) |
197 | 0 | { |
198 | 0 | printk("CPU%d: Failed to initialise HVM. Not coming online.\n", cpu); |
199 | 0 | cpu_error = rc; |
200 | 0 | clear_local_APIC(); |
201 | 0 | spin_debug_enable(); |
202 | 0 | cpu_exit_clear(cpu); |
203 | 0 | (*dead_idle)(); |
204 | 0 | } |
205 | 11 | |
206 | 11 | /* Allow the master to continue. */ |
207 | 11 | set_cpu_state(CPU_STATE_CALLIN); |
208 | 11 | |
209 | 11 | synchronize_tsc_slave(cpu); |
210 | 11 | |
211 | 11 | /* And wait for our final Ack. */ |
212 | 98.5k | while ( cpu_state != CPU_STATE_ONLINE ) |
213 | 98.5k | cpu_relax(); |
214 | 11 | } |
215 | | |
216 | | static int booting_cpu; |
217 | | |
218 | | /* CPUs for which sibling maps can be computed. */ |
219 | | static cpumask_t cpu_sibling_setup_map; |
220 | | |
221 | | static void link_thread_siblings(int cpu1, int cpu2) |
222 | 18 | { |
223 | 18 | cpumask_set_cpu(cpu1, per_cpu(cpu_sibling_mask, cpu2)); |
224 | 18 | cpumask_set_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu1)); |
225 | 18 | cpumask_set_cpu(cpu1, per_cpu(cpu_core_mask, cpu2)); |
226 | 18 | cpumask_set_cpu(cpu2, per_cpu(cpu_core_mask, cpu1)); |
227 | 18 | } |
228 | | |
229 | | static void set_cpu_sibling_map(int cpu) |
230 | 12 | { |
231 | 12 | int i; |
232 | 12 | struct cpuinfo_x86 *c = cpu_data; |
233 | 12 | |
234 | 12 | cpumask_set_cpu(cpu, &cpu_sibling_setup_map); |
235 | 12 | |
236 | 12 | cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]); |
237 | 12 | |
238 | 12 | if ( c[cpu].x86_num_siblings > 1 ) |
239 | 12 | { |
240 | 12 | for_each_cpu ( i, &cpu_sibling_setup_map ) |
241 | 78 | { |
242 | 78 | if ( cpu_has(c, X86_FEATURE_TOPOEXT) ) { |
243 | 0 | if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) && |
244 | 0 | (c[cpu].compute_unit_id == c[i].compute_unit_id) ) |
245 | 0 | link_thread_siblings(cpu, i); |
246 | 78 | } else if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) && |
247 | 78 | (c[cpu].cpu_core_id == c[i].cpu_core_id) ) { |
248 | 18 | link_thread_siblings(cpu, i); |
249 | 18 | } |
250 | 78 | } |
251 | 12 | } |
252 | 12 | else |
253 | 0 | { |
254 | 0 | cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu)); |
255 | 0 | } |
256 | 12 | |
257 | 12 | if ( c[cpu].x86_max_cores == 1 ) |
258 | 0 | { |
259 | 0 | cpumask_copy(per_cpu(cpu_core_mask, cpu), |
260 | 0 | per_cpu(cpu_sibling_mask, cpu)); |
261 | 0 | c[cpu].booted_cores = 1; |
262 | 0 | return; |
263 | 0 | } |
264 | 12 | |
265 | 12 | for_each_cpu ( i, &cpu_sibling_setup_map ) |
266 | 78 | { |
267 | 78 | if ( c[cpu].phys_proc_id == c[i].phys_proc_id ) |
268 | 78 | { |
269 | 78 | cpumask_set_cpu(i, per_cpu(cpu_core_mask, cpu)); |
270 | 78 | cpumask_set_cpu(cpu, per_cpu(cpu_core_mask, i)); |
271 | 78 | /* |
272 | 78 | * Does this new cpu bringup a new core? |
273 | 78 | */ |
274 | 78 | if ( cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) == 1 ) |
275 | 36 | { |
276 | 36 | /* |
277 | 36 | * for each core in package, increment |
278 | 36 | * the booted_cores for this new cpu |
279 | 36 | */ |
280 | 36 | if ( cpumask_first(per_cpu(cpu_sibling_mask, i)) == i ) |
281 | 21 | c[cpu].booted_cores++; |
282 | 36 | /* |
283 | 36 | * increment the core count for all |
284 | 36 | * the other cpus in this package |
285 | 36 | */ |
286 | 36 | if ( i != cpu ) |
287 | 30 | c[i].booted_cores++; |
288 | 36 | } |
289 | 42 | else if ( (i != cpu) && !c[cpu].booted_cores ) |
290 | 6 | { |
291 | 6 | c[cpu].booted_cores = c[i].booted_cores; |
292 | 6 | } |
293 | 78 | } |
294 | 78 | } |
295 | 12 | } |
296 | | |
297 | | void start_secondary(void *unused) |
298 | 11 | { |
299 | 11 | /* |
300 | 11 | * Dont put anything before smp_callin(), SMP booting is so fragile that we |
301 | 11 | * want to limit the things done here to the most necessary things. |
302 | 11 | */ |
303 | 11 | unsigned int cpu = booting_cpu; |
304 | 11 | |
305 | 11 | /* Critical region without IDT or TSS. Any fault is deadly! */ |
306 | 11 | |
307 | 11 | set_processor_id(cpu); |
308 | 11 | set_current(idle_vcpu[cpu]); |
309 | 11 | this_cpu(curr_vcpu) = idle_vcpu[cpu]; |
310 | 11 | rdmsrl(MSR_EFER, this_cpu(efer)); |
311 | 11 | |
312 | 11 | /* |
313 | 11 | * Just as during early bootstrap, it is convenient here to disable |
314 | 11 | * spinlock checking while we have IRQs disabled. This allows us to |
315 | 11 | * acquire IRQ-unsafe locks when it would otherwise be disallowed. |
316 | 11 | * |
317 | 11 | * It is safe because the race we are usually trying to avoid involves |
318 | 11 | * a group of CPUs rendezvousing in an IPI handler, where one cannot |
319 | 11 | * join because it is spinning with IRQs disabled waiting to acquire a |
320 | 11 | * lock held by another in the rendezvous group (the lock must be an |
321 | 11 | * IRQ-unsafe lock since the CPU took the IPI after acquiring it, and |
322 | 11 | * hence had IRQs enabled). This is a deadlock scenario. |
323 | 11 | * |
324 | 11 | * However, no CPU can be involved in rendezvous until it is online, |
325 | 11 | * hence no such group can be waiting for this CPU until it is |
326 | 11 | * visible in cpu_online_map. Hence such a deadlock is not possible. |
327 | 11 | */ |
328 | 11 | spin_debug_disable(); |
329 | 11 | |
330 | 11 | load_system_tables(); |
331 | 11 | |
332 | 11 | /* Full exception support from here on in. */ |
333 | 11 | |
334 | 11 | /* Safe to enable feature such as CR4.MCE with the IDT set up now. */ |
335 | 11 | write_cr4(mmu_cr4_features); |
336 | 11 | |
337 | 11 | percpu_traps_init(); |
338 | 11 | |
339 | 11 | cpu_init(); |
340 | 11 | |
341 | 11 | initialize_cpu_data(cpu); |
342 | 11 | |
343 | 11 | if ( system_state <= SYS_STATE_smp_boot ) |
344 | 11 | early_microcode_update_cpu(false); |
345 | 11 | else |
346 | 0 | microcode_resume_cpu(cpu); |
347 | 11 | |
348 | 11 | smp_callin(); |
349 | 11 | |
350 | 11 | init_percpu_time(); |
351 | 11 | |
352 | 11 | setup_secondary_APIC_clock(); |
353 | 11 | |
354 | 11 | /* |
355 | 11 | * low-memory mappings have been cleared, flush them from |
356 | 11 | * the local TLBs too. |
357 | 11 | */ |
358 | 11 | flush_tlb_local(); |
359 | 11 | |
360 | 11 | /* This must be done before setting cpu_online_map */ |
361 | 11 | spin_debug_enable(); |
362 | 11 | set_cpu_sibling_map(cpu); |
363 | 11 | notify_cpu_starting(cpu); |
364 | 11 | wmb(); |
365 | 11 | |
366 | 11 | /* |
367 | 11 | * We need to hold vector_lock so there the set of online cpus |
368 | 11 | * does not change while we are assigning vectors to cpus. Holding |
369 | 11 | * this lock ensures we don't half assign or remove an irq from a cpu. |
370 | 11 | */ |
371 | 11 | lock_vector_lock(); |
372 | 11 | setup_vector_irq(cpu); |
373 | 11 | cpumask_set_cpu(cpu, &cpu_online_map); |
374 | 11 | unlock_vector_lock(); |
375 | 11 | |
376 | 11 | /* We can take interrupts now: we're officially "up". */ |
377 | 11 | local_irq_enable(); |
378 | 11 | mtrr_ap_init(); |
379 | 11 | |
380 | 11 | wmb(); |
381 | 11 | startup_cpu_idle_loop(); |
382 | 11 | } |
383 | | |
384 | | extern void *stack_start; |
385 | | |
386 | | static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) |
387 | 11 | { |
388 | 11 | unsigned long send_status = 0, accept_status = 0; |
389 | 11 | int maxlvt, timeout, i; |
390 | 11 | |
391 | 11 | /* |
392 | 11 | * Be paranoid about clearing APIC errors. |
393 | 11 | */ |
394 | 11 | apic_write(APIC_ESR, 0); |
395 | 11 | apic_read(APIC_ESR); |
396 | 11 | |
397 | 11 | Dprintk("Asserting INIT.\n"); |
398 | 11 | |
399 | 11 | /* |
400 | 11 | * Turn INIT on target chip via IPI |
401 | 11 | */ |
402 | 11 | apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, |
403 | 11 | phys_apicid); |
404 | 11 | |
405 | 11 | if ( !x2apic_enabled ) |
406 | 0 | { |
407 | 0 | Dprintk("Waiting for send to finish...\n"); |
408 | 0 | timeout = 0; |
409 | 0 | do { |
410 | 0 | Dprintk("+"); |
411 | 0 | udelay(100); |
412 | 0 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
413 | 0 | } while ( send_status && (timeout++ < 1000) ); |
414 | 0 |
|
415 | 0 | mdelay(10); |
416 | 0 |
|
417 | 0 | Dprintk("Deasserting INIT.\n"); |
418 | 0 |
|
419 | 0 | apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); |
420 | 0 |
|
421 | 0 | Dprintk("Waiting for send to finish...\n"); |
422 | 0 | timeout = 0; |
423 | 0 | do { |
424 | 0 | Dprintk("+"); |
425 | 0 | udelay(100); |
426 | 0 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
427 | 0 | } while ( send_status && (timeout++ < 1000) ); |
428 | 0 | } |
429 | 11 | else if ( tboot_in_measured_env() ) |
430 | 0 | { |
431 | 0 | /* |
432 | 0 | * With tboot AP is actually spinning in a mini-guest before |
433 | 0 | * receiving INIT. Upon receiving INIT ipi, AP need time to VMExit, |
434 | 0 | * update VMCS to tracking SIPIs and VMResume. |
435 | 0 | * |
436 | 0 | * While AP is in root mode handling the INIT the CPU will drop |
437 | 0 | * any SIPIs |
438 | 0 | */ |
439 | 0 | udelay(10); |
440 | 0 | } |
441 | 11 | |
442 | 11 | maxlvt = get_maxlvt(); |
443 | 11 | |
444 | 33 | for ( i = 0; i < 2; i++ ) |
445 | 22 | { |
446 | 22 | Dprintk("Sending STARTUP #%d.\n", i+1); |
447 | 22 | apic_write(APIC_ESR, 0); |
448 | 22 | apic_read(APIC_ESR); |
449 | 22 | Dprintk("After apic_write.\n"); |
450 | 22 | |
451 | 22 | /* |
452 | 22 | * STARTUP IPI |
453 | 22 | * Boot on the stack |
454 | 22 | */ |
455 | 22 | apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid); |
456 | 22 | |
457 | 22 | if ( !x2apic_enabled ) |
458 | 0 | { |
459 | 0 | /* Give the other CPU some time to accept the IPI. */ |
460 | 0 | udelay(300); |
461 | 0 |
|
462 | 0 | Dprintk("Startup point 1.\n"); |
463 | 0 |
|
464 | 0 | Dprintk("Waiting for send to finish...\n"); |
465 | 0 | timeout = 0; |
466 | 0 | do { |
467 | 0 | Dprintk("+"); |
468 | 0 | udelay(100); |
469 | 0 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
470 | 0 | } while ( send_status && (timeout++ < 1000) ); |
471 | 0 |
|
472 | 0 | /* Give the other CPU some time to accept the IPI. */ |
473 | 0 | udelay(200); |
474 | 0 | } |
475 | 22 | |
476 | 22 | /* Due to the Pentium erratum 3AP. */ |
477 | 22 | if ( maxlvt > 3 ) |
478 | 22 | { |
479 | 22 | apic_write(APIC_ESR, 0); |
480 | 22 | } |
481 | 22 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
482 | 22 | if ( send_status || accept_status ) |
483 | 0 | break; |
484 | 22 | } |
485 | 11 | Dprintk("After Startup.\n"); |
486 | 11 | |
487 | 11 | if ( send_status ) |
488 | 0 | printk("APIC never delivered???\n"); |
489 | 11 | if ( accept_status ) |
490 | 0 | printk("APIC delivery error (%lx).\n", accept_status); |
491 | 11 | |
492 | 11 | return (send_status | accept_status); |
493 | 11 | } |
494 | | |
495 | | int alloc_cpu_id(void) |
496 | 11 | { |
497 | 11 | cpumask_t tmp_map; |
498 | 11 | int cpu; |
499 | 11 | |
500 | 11 | cpumask_complement(&tmp_map, &cpu_present_map); |
501 | 11 | cpu = cpumask_first(&tmp_map); |
502 | 11 | return (cpu < nr_cpu_ids) ? cpu : -ENODEV; |
503 | 11 | } |
504 | | |
505 | | static int do_boot_cpu(int apicid, int cpu) |
506 | 11 | { |
507 | 11 | int timeout, boot_error = 0, rc = 0; |
508 | 11 | unsigned long start_eip; |
509 | 11 | |
510 | 11 | /* |
511 | 11 | * Save current MTRR state in case it was changed since early boot |
512 | 11 | * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: |
513 | 11 | */ |
514 | 11 | mtrr_save_state(); |
515 | 11 | |
516 | 11 | booting_cpu = cpu; |
517 | 11 | |
518 | 11 | /* start_eip had better be page-aligned! */ |
519 | 11 | start_eip = setup_trampoline(); |
520 | 11 | |
521 | 11 | /* So we see what's up */ |
522 | 11 | if ( opt_cpu_info ) |
523 | 0 | printk("Booting processor %d/%d eip %lx\n", |
524 | 0 | cpu, apicid, start_eip); |
525 | 11 | |
526 | 11 | stack_start = stack_base[cpu]; |
527 | 11 | |
528 | 11 | /* This grunge runs the startup process for the targeted processor. */ |
529 | 11 | |
530 | 11 | set_cpu_state(CPU_STATE_INIT); |
531 | 11 | |
532 | 11 | Dprintk("Setting warm reset code and vector.\n"); |
533 | 11 | |
534 | 11 | smpboot_setup_warm_reset_vector(start_eip); |
535 | 11 | |
536 | 11 | /* Starting actual IPI sequence... */ |
537 | 11 | if ( !tboot_in_measured_env() || tboot_wake_ap(apicid, start_eip) ) |
538 | 11 | boot_error = wakeup_secondary_cpu(apicid, start_eip); |
539 | 11 | |
540 | 11 | if ( !boot_error ) |
541 | 11 | { |
542 | 11 | /* Allow AP to start initializing. */ |
543 | 11 | set_cpu_state(CPU_STATE_CALLOUT); |
544 | 11 | Dprintk("After Callout %d.\n", cpu); |
545 | 11 | |
546 | 11 | /* Wait 5s total for a response. */ |
547 | 22 | for ( timeout = 0; timeout < 50000; timeout++ ) |
548 | 22 | { |
549 | 22 | if ( cpu_state != CPU_STATE_CALLOUT ) |
550 | 11 | break; |
551 | 11 | udelay(100); |
552 | 11 | } |
553 | 11 | |
554 | 11 | if ( cpu_state == CPU_STATE_CALLIN ) |
555 | 11 | { |
556 | 11 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
557 | 11 | Dprintk("OK.\n"); |
558 | 11 | print_cpu_info(cpu); |
559 | 11 | synchronize_tsc_master(cpu); |
560 | 11 | Dprintk("CPU has booted.\n"); |
561 | 11 | } |
562 | 0 | else if ( cpu_state == CPU_STATE_DEAD ) |
563 | 0 | { |
564 | 0 | rmb(); |
565 | 0 | rc = cpu_error; |
566 | 0 | } |
567 | 0 | else |
568 | 0 | { |
569 | 0 | boot_error = 1; |
570 | 0 | mb(); |
571 | 0 | if ( bootsym(trampoline_cpu_started) == 0xA5 ) |
572 | 0 | /* trampoline started but...? */ |
573 | 0 | printk("Stuck ??\n"); |
574 | 0 | else |
575 | 0 | /* trampoline code not run */ |
576 | 0 | printk("Not responding.\n"); |
577 | 0 | } |
578 | 11 | } |
579 | 11 | |
580 | 11 | if ( boot_error ) |
581 | 0 | { |
582 | 0 | cpu_exit_clear(cpu); |
583 | 0 | rc = -EIO; |
584 | 0 | } |
585 | 11 | |
586 | 11 | /* mark "stuck" area as not stuck */ |
587 | 11 | bootsym(trampoline_cpu_started) = 0; |
588 | 11 | mb(); |
589 | 11 | |
590 | 11 | smpboot_restore_warm_reset_vector(); |
591 | 11 | |
592 | 11 | return rc; |
593 | 11 | } |
594 | | |
595 | 11 | #define STUB_BUF_CPU_OFFS(cpu) (((cpu) & (STUBS_PER_PAGE - 1)) * STUB_BUF_SIZE) |
596 | | |
597 | | unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn) |
598 | 12 | { |
599 | 12 | unsigned long stub_va; |
600 | 12 | struct page_info *pg; |
601 | 12 | |
602 | 12 | BUILD_BUG_ON(STUBS_PER_PAGE & (STUBS_PER_PAGE - 1)); |
603 | 12 | |
604 | 12 | if ( *mfn ) |
605 | 11 | pg = mfn_to_page(_mfn(*mfn)); |
606 | 12 | else |
607 | 1 | { |
608 | 1 | nodeid_t node = cpu_to_node(cpu); |
609 | 1 | unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0; |
610 | 1 | |
611 | 1 | pg = alloc_domheap_page(NULL, memflags); |
612 | 1 | if ( !pg ) |
613 | 0 | return 0; |
614 | 1 | |
615 | 1 | unmap_domain_page(memset(__map_domain_page(pg), 0xcc, PAGE_SIZE)); |
616 | 1 | } |
617 | 12 | |
618 | 12 | stub_va = XEN_VIRT_END - (cpu + 1) * PAGE_SIZE; |
619 | 12 | if ( map_pages_to_xen(stub_va, mfn_x(page_to_mfn(pg)), 1, |
620 | 12 | PAGE_HYPERVISOR_RX | MAP_SMALL_PAGES) ) |
621 | 0 | { |
622 | 0 | if ( !*mfn ) |
623 | 0 | free_domheap_page(pg); |
624 | 0 | stub_va = 0; |
625 | 0 | } |
626 | 12 | else if ( !*mfn ) |
627 | 1 | *mfn = mfn_x(page_to_mfn(pg)); |
628 | 12 | |
629 | 12 | return stub_va; |
630 | 12 | } |
631 | | |
632 | | void cpu_exit_clear(unsigned int cpu) |
633 | 0 | { |
634 | 0 | cpu_uninit(cpu); |
635 | 0 | set_cpu_state(CPU_STATE_DEAD); |
636 | 0 | } |
637 | | |
638 | | static void cpu_smpboot_free(unsigned int cpu) |
639 | 0 | { |
640 | 0 | unsigned int order, socket = cpu_to_socket(cpu); |
641 | 0 | struct cpuinfo_x86 *c = cpu_data; |
642 | 0 |
|
643 | 0 | if ( cpumask_empty(socket_cpumask[socket]) ) |
644 | 0 | { |
645 | 0 | xfree(socket_cpumask[socket]); |
646 | 0 | socket_cpumask[socket] = NULL; |
647 | 0 | } |
648 | 0 |
|
649 | 0 | c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID; |
650 | 0 | c[cpu].cpu_core_id = XEN_INVALID_CORE_ID; |
651 | 0 | c[cpu].compute_unit_id = INVALID_CUID; |
652 | 0 | cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); |
653 | 0 |
|
654 | 0 | free_cpumask_var(per_cpu(cpu_sibling_mask, cpu)); |
655 | 0 | free_cpumask_var(per_cpu(cpu_core_mask, cpu)); |
656 | 0 | if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask ) |
657 | 0 | free_cpumask_var(per_cpu(scratch_cpumask, cpu)); |
658 | 0 |
|
659 | 0 | if ( per_cpu(stubs.addr, cpu) ) |
660 | 0 | { |
661 | 0 | mfn_t mfn = _mfn(per_cpu(stubs.mfn, cpu)); |
662 | 0 | unsigned char *stub_page = map_domain_page(mfn); |
663 | 0 | unsigned int i; |
664 | 0 |
|
665 | 0 | memset(stub_page + STUB_BUF_CPU_OFFS(cpu), 0xcc, STUB_BUF_SIZE); |
666 | 0 | for ( i = 0; i < STUBS_PER_PAGE; ++i ) |
667 | 0 | if ( stub_page[i * STUB_BUF_SIZE] != 0xcc ) |
668 | 0 | break; |
669 | 0 | unmap_domain_page(stub_page); |
670 | 0 | destroy_xen_mappings(per_cpu(stubs.addr, cpu) & PAGE_MASK, |
671 | 0 | (per_cpu(stubs.addr, cpu) | ~PAGE_MASK) + 1); |
672 | 0 | if ( i == STUBS_PER_PAGE ) |
673 | 0 | free_domheap_page(mfn_to_page(mfn)); |
674 | 0 | } |
675 | 0 |
|
676 | 0 | order = get_order_from_pages(NR_RESERVED_GDT_PAGES); |
677 | 0 | free_xenheap_pages(per_cpu(gdt_table, cpu), order); |
678 | 0 |
|
679 | 0 | free_xenheap_pages(per_cpu(compat_gdt_table, cpu), order); |
680 | 0 |
|
681 | 0 | order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t)); |
682 | 0 | free_xenheap_pages(idt_tables[cpu], order); |
683 | 0 | idt_tables[cpu] = NULL; |
684 | 0 |
|
685 | 0 | if ( stack_base[cpu] != NULL ) |
686 | 0 | { |
687 | 0 | memguard_unguard_stack(stack_base[cpu]); |
688 | 0 | free_xenheap_pages(stack_base[cpu], STACK_ORDER); |
689 | 0 | stack_base[cpu] = NULL; |
690 | 0 | } |
691 | 0 | } |
692 | | |
693 | | static int cpu_smpboot_alloc(unsigned int cpu) |
694 | 11 | { |
695 | 11 | unsigned int i, order, memflags = 0; |
696 | 11 | nodeid_t node = cpu_to_node(cpu); |
697 | 11 | struct desc_struct *gdt; |
698 | 11 | unsigned long stub_page; |
699 | 11 | |
700 | 11 | if ( node != NUMA_NO_NODE ) |
701 | 11 | memflags = MEMF_node(node); |
702 | 11 | |
703 | 11 | stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags); |
704 | 11 | if ( stack_base[cpu] == NULL ) |
705 | 0 | goto oom; |
706 | 11 | memguard_guard_stack(stack_base[cpu]); |
707 | 11 | |
708 | 11 | order = get_order_from_pages(NR_RESERVED_GDT_PAGES); |
709 | 11 | per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, memflags); |
710 | 11 | if ( gdt == NULL ) |
711 | 0 | goto oom; |
712 | 11 | memcpy(gdt, boot_cpu_gdt_table, NR_RESERVED_GDT_PAGES * PAGE_SIZE); |
713 | 11 | BUILD_BUG_ON(NR_CPUS > 0x10000); |
714 | 11 | gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; |
715 | 11 | |
716 | 11 | per_cpu(compat_gdt_table, cpu) = gdt = alloc_xenheap_pages(order, memflags); |
717 | 11 | if ( gdt == NULL ) |
718 | 0 | goto oom; |
719 | 11 | memcpy(gdt, boot_cpu_compat_gdt_table, NR_RESERVED_GDT_PAGES * PAGE_SIZE); |
720 | 11 | gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu; |
721 | 11 | |
722 | 11 | order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t)); |
723 | 11 | idt_tables[cpu] = alloc_xenheap_pages(order, memflags); |
724 | 11 | if ( idt_tables[cpu] == NULL ) |
725 | 0 | goto oom; |
726 | 11 | memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); |
727 | 11 | set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); |
728 | 11 | set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); |
729 | 11 | set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); |
730 | 11 | |
731 | 11 | for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); |
732 | 11 | i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) |
733 | 11 | if ( cpu_online(i) && cpu_to_node(i) == node ) |
734 | 11 | { |
735 | 11 | per_cpu(stubs.mfn, cpu) = per_cpu(stubs.mfn, i); |
736 | 11 | break; |
737 | 11 | } |
738 | 11 | BUG_ON(i == cpu); |
739 | 11 | stub_page = alloc_stub_page(cpu, &per_cpu(stubs.mfn, cpu)); |
740 | 11 | if ( !stub_page ) |
741 | 0 | goto oom; |
742 | 11 | per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu); |
743 | 11 | |
744 | 11 | if ( secondary_socket_cpumask == NULL && |
745 | 1 | (secondary_socket_cpumask = xzalloc(cpumask_t)) == NULL ) |
746 | 0 | goto oom; |
747 | 11 | |
748 | 11 | if ( zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) && |
749 | 11 | zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) && |
750 | 11 | alloc_cpumask_var(&per_cpu(scratch_cpumask, cpu)) ) |
751 | 11 | return 0; |
752 | 11 | |
753 | 0 | oom: |
754 | 0 | cpu_smpboot_free(cpu); |
755 | 0 | return -ENOMEM; |
756 | 11 | } |
757 | | |
758 | | static int cpu_smpboot_callback( |
759 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
760 | 33 | { |
761 | 33 | unsigned int cpu = (unsigned long)hcpu; |
762 | 33 | int rc = 0; |
763 | 33 | |
764 | 33 | switch ( action ) |
765 | 33 | { |
766 | 11 | case CPU_UP_PREPARE: |
767 | 11 | rc = cpu_smpboot_alloc(cpu); |
768 | 11 | break; |
769 | 0 | case CPU_UP_CANCELED: |
770 | 0 | case CPU_DEAD: |
771 | 0 | cpu_smpboot_free(cpu); |
772 | 0 | break; |
773 | 22 | default: |
774 | 22 | break; |
775 | 33 | } |
776 | 33 | |
777 | 33 | return !rc ? NOTIFY_DONE : notifier_from_errno(rc); |
778 | 33 | } |
779 | | |
780 | | static struct notifier_block cpu_smpboot_nfb = { |
781 | | .notifier_call = cpu_smpboot_callback |
782 | | }; |
783 | | |
784 | | void __init smp_prepare_cpus(unsigned int max_cpus) |
785 | 1 | { |
786 | 1 | register_cpu_notifier(&cpu_smpboot_nfb); |
787 | 1 | |
788 | 1 | mtrr_aps_sync_begin(); |
789 | 1 | |
790 | 1 | /* Setup boot CPU information */ |
791 | 1 | initialize_cpu_data(0); /* Final full version of the data */ |
792 | 1 | print_cpu_info(0); |
793 | 1 | |
794 | 1 | boot_cpu_physical_apicid = get_apic_id(); |
795 | 1 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; |
796 | 1 | |
797 | 1 | stack_base[0] = stack_start; |
798 | 1 | |
799 | 1 | set_nr_sockets(); |
800 | 1 | |
801 | 1 | socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); |
802 | 1 | if ( socket_cpumask == NULL || |
803 | 1 | (socket_cpumask[cpu_to_socket(0)] = xzalloc(cpumask_t)) == NULL ) |
804 | 0 | panic("No memory for socket CPU siblings map"); |
805 | 1 | |
806 | 1 | if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) || |
807 | 1 | !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) ) |
808 | 0 | panic("No memory for boot CPU sibling/core maps"); |
809 | 1 | |
810 | 1 | set_cpu_sibling_map(0); |
811 | 1 | |
812 | 1 | /* |
813 | 1 | * If we couldn't find an SMP configuration at boot time, |
814 | 1 | * get out of here now! |
815 | 1 | */ |
816 | 1 | if ( !smp_found_config && !acpi_lapic ) |
817 | 0 | { |
818 | 0 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); |
819 | 0 | init_uniprocessor: |
820 | 0 | physids_clear(phys_cpu_present_map); |
821 | 0 | physid_set(0, phys_cpu_present_map); |
822 | 0 | if (APIC_init_uniprocessor()) |
823 | 0 | printk(KERN_NOTICE "Local APIC not detected." |
824 | 0 | " Using dummy APIC emulation.\n"); |
825 | 0 | return; |
826 | 0 | } |
827 | 1 | |
828 | 1 | /* |
829 | 1 | * Should not be necessary because the MP table should list the boot |
830 | 1 | * CPU too, but we do it for the sake of robustness anyway. |
831 | 1 | * Makes no sense to do this check in clustered apic mode, so skip it |
832 | 1 | */ |
833 | 1 | if ( !check_apicid_present(boot_cpu_physical_apicid) ) |
834 | 0 | { |
835 | 0 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", |
836 | 0 | boot_cpu_physical_apicid); |
837 | 0 | physid_set(get_apic_id(), phys_cpu_present_map); |
838 | 0 | } |
839 | 1 | |
840 | 1 | /* If we couldn't find a local APIC, then get out of here now! */ |
841 | 1 | if ( !cpu_has_apic ) |
842 | 0 | { |
843 | 0 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
844 | 0 | boot_cpu_physical_apicid); |
845 | 0 | goto init_uniprocessor; |
846 | 0 | } |
847 | 1 | |
848 | 1 | verify_local_APIC(); |
849 | 1 | |
850 | 1 | connect_bsp_APIC(); |
851 | 1 | setup_local_APIC(); |
852 | 1 | |
853 | 1 | smpboot_setup_io_apic(); |
854 | 1 | |
855 | 1 | setup_boot_APIC_clock(); |
856 | 1 | } |
857 | | |
858 | | void __init smp_prepare_boot_cpu(void) |
859 | 1 | { |
860 | 1 | unsigned int cpu = smp_processor_id(); |
861 | 1 | |
862 | 1 | cpumask_set_cpu(cpu, &cpu_online_map); |
863 | 1 | cpumask_set_cpu(cpu, &cpu_present_map); |
864 | 1 | #if NR_CPUS > 2 * BITS_PER_LONG |
865 | 1 | per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask; |
866 | 1 | #endif |
867 | 1 | } |
868 | | |
869 | | static void |
870 | | remove_siblinginfo(int cpu) |
871 | 0 | { |
872 | 0 | int sibling; |
873 | 0 |
|
874 | 0 | cpumask_clear_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]); |
875 | 0 |
|
876 | 0 | for_each_cpu ( sibling, per_cpu(cpu_core_mask, cpu) ) |
877 | 0 | { |
878 | 0 | cpumask_clear_cpu(cpu, per_cpu(cpu_core_mask, sibling)); |
879 | 0 | /* Last thread sibling in this cpu core going down. */ |
880 | 0 | if ( cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) == 1 ) |
881 | 0 | cpu_data[sibling].booted_cores--; |
882 | 0 | } |
883 | 0 |
|
884 | 0 | for_each_cpu(sibling, per_cpu(cpu_sibling_mask, cpu)) |
885 | 0 | cpumask_clear_cpu(cpu, per_cpu(cpu_sibling_mask, sibling)); |
886 | 0 | cpumask_clear(per_cpu(cpu_sibling_mask, cpu)); |
887 | 0 | cpumask_clear(per_cpu(cpu_core_mask, cpu)); |
888 | 0 | } |
889 | | |
890 | | void __cpu_disable(void) |
891 | 0 | { |
892 | 0 | int cpu = smp_processor_id(); |
893 | 0 |
|
894 | 0 | set_cpu_state(CPU_STATE_DYING); |
895 | 0 |
|
896 | 0 | local_irq_disable(); |
897 | 0 | clear_local_APIC(); |
898 | 0 | /* Allow any queued timer interrupts to get serviced */ |
899 | 0 | local_irq_enable(); |
900 | 0 | mdelay(1); |
901 | 0 | local_irq_disable(); |
902 | 0 |
|
903 | 0 | time_suspend(); |
904 | 0 |
|
905 | 0 | remove_siblinginfo(cpu); |
906 | 0 |
|
907 | 0 | /* It's now safe to remove this processor from the online map */ |
908 | 0 | cpumask_clear_cpu(cpu, &cpu_online_map); |
909 | 0 | fixup_irqs(&cpu_online_map, 1); |
910 | 0 | fixup_eoi(); |
911 | 0 |
|
912 | 0 | if ( cpu_disable_scheduler(cpu) ) |
913 | 0 | BUG(); |
914 | 0 | } |
915 | | |
916 | | void __cpu_die(unsigned int cpu) |
917 | 0 | { |
918 | 0 | /* We don't do anything here: idle task is faking death itself. */ |
919 | 0 | unsigned int i = 0; |
920 | 0 | enum cpu_state seen_state; |
921 | 0 |
|
922 | 0 | while ( (seen_state = cpu_state) != CPU_STATE_DEAD ) |
923 | 0 | { |
924 | 0 | BUG_ON(seen_state != CPU_STATE_DYING); |
925 | 0 | mdelay(100); |
926 | 0 | cpu_relax(); |
927 | 0 | process_pending_softirqs(); |
928 | 0 | if ( (++i % 10) == 0 ) |
929 | 0 | printk(KERN_ERR "CPU %u still not dead...\n", cpu); |
930 | 0 | } |
931 | 0 | } |
932 | | |
933 | | int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm) |
934 | 0 | { |
935 | 0 | int cpu = -1; |
936 | 0 |
|
937 | 0 | dprintk(XENLOG_DEBUG, "cpu_add apic_id %x acpi_id %x pxm %x\n", |
938 | 0 | apic_id, acpi_id, pxm); |
939 | 0 |
|
940 | 0 | if ( (acpi_id >= MAX_MADT_ENTRIES) || |
941 | 0 | (apic_id >= MAX_APICS) || |
942 | 0 | (pxm >= 256) ) |
943 | 0 | return -EINVAL; |
944 | 0 |
|
945 | 0 | if ( !cpu_hotplug_begin() ) |
946 | 0 | return -EBUSY; |
947 | 0 |
|
948 | 0 | /* Detect if the cpu has been added before */ |
949 | 0 | if ( x86_acpiid_to_apicid[acpi_id] != BAD_APICID ) |
950 | 0 | { |
951 | 0 | cpu = (x86_acpiid_to_apicid[acpi_id] != apic_id) |
952 | 0 | ? -EINVAL : -EEXIST; |
953 | 0 | goto out; |
954 | 0 | } |
955 | 0 |
|
956 | 0 | if ( physid_isset(apic_id, phys_cpu_present_map) ) |
957 | 0 | { |
958 | 0 | cpu = -EEXIST; |
959 | 0 | goto out; |
960 | 0 | } |
961 | 0 |
|
962 | 0 | if ( (cpu = mp_register_lapic(apic_id, 1, 1)) < 0 ) |
963 | 0 | goto out; |
964 | 0 |
|
965 | 0 | x86_acpiid_to_apicid[acpi_id] = apic_id; |
966 | 0 |
|
967 | 0 | if ( !srat_disabled() ) |
968 | 0 | { |
969 | 0 | nodeid_t node = setup_node(pxm); |
970 | 0 |
|
971 | 0 | if ( node == NUMA_NO_NODE ) |
972 | 0 | { |
973 | 0 | dprintk(XENLOG_WARNING, |
974 | 0 | "Setup node failed for pxm %x\n", pxm); |
975 | 0 | x86_acpiid_to_apicid[acpi_id] = BAD_APICID; |
976 | 0 | mp_unregister_lapic(apic_id, cpu); |
977 | 0 | cpu = node; |
978 | 0 | goto out; |
979 | 0 | } |
980 | 0 | if ( apic_id < MAX_LOCAL_APIC ) |
981 | 0 | apicid_to_node[apic_id] = node; |
982 | 0 | } |
983 | 0 |
|
984 | 0 | /* Physically added CPUs do not have synchronised TSC. */ |
985 | 0 | if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) |
986 | 0 | { |
987 | 0 | static bool once_only; |
988 | 0 |
|
989 | 0 | if ( !test_and_set_bool(once_only) ) |
990 | 0 | printk(XENLOG_WARNING |
991 | 0 | " ** New physical CPU %u may have skewed TSC and hence " |
992 | 0 | "break assumed cross-CPU TSC coherency.\n" |
993 | 0 | " ** Consider using boot parameter \"tsc=skewed\" " |
994 | 0 | "which forces TSC emulation where appropriate.\n", cpu); |
995 | 0 | cpumask_set_cpu(cpu, &tsc_sync_cpu_mask); |
996 | 0 | } |
997 | 0 |
|
998 | 0 | srat_detect_node(cpu); |
999 | 0 | numa_add_cpu(cpu); |
1000 | 0 | dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu); |
1001 | 0 | out: |
1002 | 0 | cpu_hotplug_done(); |
1003 | 0 | return cpu; |
1004 | 0 | } |
1005 | | |
1006 | | |
1007 | | int __cpu_up(unsigned int cpu) |
1008 | 11 | { |
1009 | 11 | int apicid, ret; |
1010 | 11 | |
1011 | 11 | if ( (apicid = x86_cpu_to_apicid[cpu]) == BAD_APICID ) |
1012 | 0 | return -ENODEV; |
1013 | 11 | |
1014 | 11 | if ( (ret = do_boot_cpu(apicid, cpu)) != 0 ) |
1015 | 0 | return ret; |
1016 | 11 | |
1017 | 11 | time_latch_stamps(); |
1018 | 11 | |
1019 | 11 | set_cpu_state(CPU_STATE_ONLINE); |
1020 | 5.51k | while ( !cpu_online(cpu) ) |
1021 | 5.50k | { |
1022 | 5.50k | cpu_relax(); |
1023 | 5.50k | process_pending_softirqs(); |
1024 | 5.50k | } |
1025 | 11 | |
1026 | 11 | return 0; |
1027 | 11 | } |
1028 | | |
1029 | | |
1030 | | void __init smp_cpus_done(void) |
1031 | 1 | { |
1032 | 1 | if ( nmi_watchdog == NMI_LOCAL_APIC ) |
1033 | 0 | check_nmi_watchdog(); |
1034 | 1 | |
1035 | 1 | setup_ioapic_dest(); |
1036 | 1 | |
1037 | 1 | mtrr_save_state(); |
1038 | 1 | mtrr_aps_sync_end(); |
1039 | 1 | } |
1040 | | |
1041 | | void __init smp_intr_init(void) |
1042 | 1 | { |
1043 | 1 | int irq, vector, seridx, cpu = smp_processor_id(); |
1044 | 1 | |
1045 | 1 | /* |
1046 | 1 | * IRQ0 must be given a fixed assignment and initialized, |
1047 | 1 | * because it's used before the IO-APIC is set up. |
1048 | 1 | */ |
1049 | 1 | irq_to_desc(0)->arch.vector = IRQ0_VECTOR; |
1050 | 1 | |
1051 | 1 | /* |
1052 | 1 | * Also ensure serial interrupts are high priority. We do not |
1053 | 1 | * want them to be blocked by unacknowledged guest-bound interrupts. |
1054 | 1 | */ |
1055 | 5 | for ( seridx = 0; seridx <= SERHND_IDX; seridx++ ) |
1056 | 4 | { |
1057 | 4 | if ( (irq = serial_irq(seridx)) < 0 ) |
1058 | 3 | continue; |
1059 | 1 | vector = alloc_hipriority_vector(); |
1060 | 1 | per_cpu(vector_irq, cpu)[vector] = irq; |
1061 | 1 | irq_to_desc(irq)->arch.vector = vector; |
1062 | 1 | cpumask_copy(irq_to_desc(irq)->arch.cpu_mask, &cpu_online_map); |
1063 | 1 | } |
1064 | 1 | |
1065 | 1 | /* Direct IPI vectors. */ |
1066 | 1 | set_direct_apic_vector(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
1067 | 1 | set_direct_apic_vector(EVENT_CHECK_VECTOR, event_check_interrupt); |
1068 | 1 | set_direct_apic_vector(INVALIDATE_TLB_VECTOR, invalidate_interrupt); |
1069 | 1 | set_direct_apic_vector(CALL_FUNCTION_VECTOR, call_function_interrupt); |
1070 | 1 | } |