/root/src/xen/xen/arch/x86/setup.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <xen/init.h> |
2 | | #include <xen/lib.h> |
3 | | #include <xen/err.h> |
4 | | #include <xen/sched.h> |
5 | | #include <xen/sched-if.h> |
6 | | #include <xen/domain.h> |
7 | | #include <xen/serial.h> |
8 | | #include <xen/softirq.h> |
9 | | #include <xen/acpi.h> |
10 | | #include <xen/efi.h> |
11 | | #include <xen/console.h> |
12 | | #include <xen/serial.h> |
13 | | #include <xen/trace.h> |
14 | | #include <xen/multiboot.h> |
15 | | #include <xen/domain_page.h> |
16 | | #include <xen/version.h> |
17 | | #include <xen/gdbstub.h> |
18 | | #include <xen/percpu.h> |
19 | | #include <xen/hypercall.h> |
20 | | #include <xen/keyhandler.h> |
21 | | #include <xen/numa.h> |
22 | | #include <xen/rcupdate.h> |
23 | | #include <xen/vga.h> |
24 | | #include <xen/dmi.h> |
25 | | #include <xen/pfn.h> |
26 | | #include <xen/nodemask.h> |
27 | | #include <xen/tmem_xen.h> |
28 | | #include <xen/virtual_region.h> |
29 | | #include <xen/watchdog.h> |
30 | | #include <public/version.h> |
31 | | #include <compat/platform.h> |
32 | | #include <compat/xen.h> |
33 | | #include <xen/bitops.h> |
34 | | #include <asm/smp.h> |
35 | | #include <asm/processor.h> |
36 | | #include <asm/mpspec.h> |
37 | | #include <asm/apic.h> |
38 | | #include <asm/msi.h> |
39 | | #include <asm/desc.h> |
40 | | #include <asm/paging.h> |
41 | | #include <asm/e820.h> |
42 | | #include <xen/kexec.h> |
43 | | #include <asm/edd.h> |
44 | | #include <xsm/xsm.h> |
45 | | #include <asm/tboot.h> |
46 | | #include <asm/bzimage.h> /* for bzimage_headroom */ |
47 | | #include <asm/mach-generic/mach_apic.h> /* for generic_apic_probe */ |
48 | | #include <asm/setup.h> |
49 | | #include <xen/cpu.h> |
50 | | #include <asm/nmi.h> |
51 | | #include <asm/alternative.h> |
52 | | #include <asm/mc146818rtc.h> |
53 | | #include <asm/cpuid.h> |
54 | | |
55 | | /* opt_nosmp: If true, secondary processors are ignored. */ |
56 | | static bool __initdata opt_nosmp; |
57 | | boolean_param("nosmp", opt_nosmp); |
58 | | |
59 | | /* maxcpus: maximum number of CPUs to activate. */ |
60 | | static unsigned int __initdata max_cpus; |
61 | | integer_param("maxcpus", max_cpus); |
62 | | |
63 | | unsigned long __read_mostly cr4_pv32_mask; |
64 | | |
65 | | /* **** Linux config option: propagated to domain0. */ |
66 | | /* "acpi=off": Sisables both ACPI table parsing and interpreter. */ |
67 | | /* "acpi=force": Override the disable blacklist. */ |
68 | | /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */ |
69 | | /* "acpi=noirq": Disables ACPI interrupt routing. */ |
70 | | static int parse_acpi_param(const char *s); |
71 | | custom_param("acpi", parse_acpi_param); |
72 | | |
73 | | /* **** Linux config option: propagated to domain0. */ |
74 | | /* noapic: Disable IOAPIC setup. */ |
75 | | boolean_param("noapic", skip_ioapic_setup); |
76 | | |
77 | | /* **** Linux config option: propagated to domain0. */ |
78 | | /* xen_cpuidle: xen control cstate. */ |
79 | | s8 __read_mostly xen_cpuidle = -1; |
80 | | boolean_param("cpuidle", xen_cpuidle); |
81 | | |
82 | | #ifndef NDEBUG |
83 | | unsigned long __initdata highmem_start; |
84 | | size_param("highmem-start", highmem_start); |
85 | | #endif |
86 | | |
87 | | cpumask_t __read_mostly cpu_present_map; |
88 | | |
89 | | unsigned long __read_mostly xen_phys_start; |
90 | | |
91 | | unsigned long __read_mostly xen_virt_end; |
92 | | |
93 | | DEFINE_PER_CPU(struct tss_struct, init_tss); |
94 | | |
95 | | char __section(".bss.stack_aligned") __aligned(STACK_SIZE) |
96 | | cpu0_stack[STACK_SIZE]; |
97 | | |
98 | | struct cpuinfo_x86 __read_mostly boot_cpu_data = { 0, 0, 0, 0, -1 }; |
99 | | |
100 | | unsigned long __read_mostly mmu_cr4_features = XEN_MINIMAL_CR4; |
101 | | |
102 | | /* smep: Enable/disable Supervisor Mode Execution Protection (default on). */ |
103 | 1 | #define SMEP_HVM_ONLY (-1) |
104 | | static s8 __initdata opt_smep = 1; |
105 | | |
106 | | static int __init parse_smep_param(const char *s) |
107 | 0 | { |
108 | 0 | if ( !*s ) |
109 | 0 | { |
110 | 0 | opt_smep = 1; |
111 | 0 | return 0; |
112 | 0 | } |
113 | 0 |
|
114 | 0 | switch ( parse_bool(s, NULL) ) |
115 | 0 | { |
116 | 0 | case 0: |
117 | 0 | opt_smep = 0; |
118 | 0 | return 0; |
119 | 0 | case 1: |
120 | 0 | opt_smep = 1; |
121 | 0 | return 0; |
122 | 0 | } |
123 | 0 |
|
124 | 0 | if ( !strcmp(s, "hvm") ) |
125 | 0 | opt_smep = SMEP_HVM_ONLY; |
126 | 0 | else |
127 | 0 | return -EINVAL; |
128 | 0 |
|
129 | 0 | return 0; |
130 | 0 | } |
131 | | custom_param("smep", parse_smep_param); |
132 | | |
133 | | /* smap: Enable/disable Supervisor Mode Access Prevention (default on). */ |
134 | 0 | #define SMAP_HVM_ONLY (-1) |
135 | | static s8 __initdata opt_smap = 1; |
136 | | |
137 | | static int __init parse_smap_param(const char *s) |
138 | 0 | { |
139 | 0 | if ( !*s ) |
140 | 0 | { |
141 | 0 | opt_smap = 1; |
142 | 0 | return 0; |
143 | 0 | } |
144 | 0 |
|
145 | 0 | switch ( parse_bool(s, NULL) ) |
146 | 0 | { |
147 | 0 | case 0: |
148 | 0 | opt_smap = 0; |
149 | 0 | return 0; |
150 | 0 | case 1: |
151 | 0 | opt_smap = 1; |
152 | 0 | return 0; |
153 | 0 | } |
154 | 0 |
|
155 | 0 | if ( !strcmp(s, "hvm") ) |
156 | 0 | opt_smap = SMAP_HVM_ONLY; |
157 | 0 | else |
158 | 0 | return -EINVAL; |
159 | 0 |
|
160 | 0 | return 0; |
161 | 0 | } |
162 | | custom_param("smap", parse_smap_param); |
163 | | |
164 | | bool __read_mostly acpi_disabled; |
165 | | bool __initdata acpi_force; |
166 | | static char __initdata acpi_param[10] = ""; |
167 | | |
168 | | static int __init parse_acpi_param(const char *s) |
169 | 0 | { |
170 | 0 | /* Save the parameter so it can be propagated to domain0. */ |
171 | 0 | safe_strcpy(acpi_param, s); |
172 | 0 |
|
173 | 0 | /* Interpret the parameter for use within Xen. */ |
174 | 0 | if ( !parse_bool(s, NULL) ) |
175 | 0 | { |
176 | 0 | disable_acpi(); |
177 | 0 | } |
178 | 0 | else if ( !strcmp(s, "force") ) |
179 | 0 | { |
180 | 0 | acpi_force = true; |
181 | 0 | acpi_ht = 1; |
182 | 0 | acpi_disabled = false; |
183 | 0 | } |
184 | 0 | else if ( !strcmp(s, "ht") ) |
185 | 0 | { |
186 | 0 | if ( !acpi_force ) |
187 | 0 | disable_acpi(); |
188 | 0 | acpi_ht = 1; |
189 | 0 | } |
190 | 0 | else if ( !strcmp(s, "noirq") ) |
191 | 0 | { |
192 | 0 | acpi_noirq_set(); |
193 | 0 | } |
194 | 0 | else |
195 | 0 | return -EINVAL; |
196 | 0 |
|
197 | 0 | return 0; |
198 | 0 | } |
199 | | |
200 | | static const module_t *__initdata initial_images; |
201 | | static unsigned int __initdata nr_initial_images; |
202 | | |
203 | | unsigned long __init initial_images_nrpages(nodeid_t node) |
204 | 1 | { |
205 | 1 | unsigned long node_start = node_start_pfn(node); |
206 | 1 | unsigned long node_end = node_end_pfn(node); |
207 | 1 | unsigned long nr; |
208 | 1 | unsigned int i; |
209 | 1 | |
210 | 3 | for ( nr = i = 0; i < nr_initial_images; ++i ) |
211 | 2 | { |
212 | 2 | unsigned long start = initial_images[i].mod_start; |
213 | 2 | unsigned long end = start + PFN_UP(initial_images[i].mod_end); |
214 | 2 | |
215 | 2 | if ( end > node_start && node_end > start ) |
216 | 2 | nr += min(node_end, end) - max(node_start, start); |
217 | 2 | } |
218 | 1 | |
219 | 1 | return nr; |
220 | 1 | } |
221 | | |
222 | | void __init discard_initial_images(void) |
223 | 1 | { |
224 | 1 | unsigned int i; |
225 | 1 | |
226 | 3 | for ( i = 0; i < nr_initial_images; ++i ) |
227 | 2 | { |
228 | 2 | uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT; |
229 | 2 | |
230 | 2 | init_domheap_pages(start, |
231 | 2 | start + PAGE_ALIGN(initial_images[i].mod_end)); |
232 | 2 | } |
233 | 1 | |
234 | 1 | nr_initial_images = 0; |
235 | 1 | initial_images = NULL; |
236 | 1 | } |
237 | | |
238 | | extern char __init_begin[], __init_end[], __bss_start[], __bss_end[]; |
239 | | |
240 | | static void __init init_idle_domain(void) |
241 | 1 | { |
242 | 1 | scheduler_init(); |
243 | 1 | set_current(idle_vcpu[0]); |
244 | 1 | this_cpu(curr_vcpu) = current; |
245 | 1 | } |
246 | | |
247 | | void srat_detect_node(int cpu) |
248 | 12 | { |
249 | 12 | nodeid_t node; |
250 | 12 | u32 apicid = x86_cpu_to_apicid[cpu]; |
251 | 12 | |
252 | 12 | node = apicid < MAX_LOCAL_APIC ? apicid_to_node[apicid] : NUMA_NO_NODE; |
253 | 12 | if ( node == NUMA_NO_NODE ) |
254 | 12 | node = 0; |
255 | 12 | |
256 | 12 | node_set_online(node); |
257 | 12 | numa_set_node(cpu, node); |
258 | 12 | |
259 | 12 | if ( opt_cpu_info && acpi_numa > 0 ) |
260 | 0 | printk("CPU %d APIC %d -> Node %d\n", cpu, apicid, node); |
261 | 12 | } |
262 | | |
263 | | /* |
264 | | * Sort CPUs by <node,package,core,thread> tuple. Fortunately this hierarchy is |
265 | | * reflected in the structure of modern APIC identifiers, so we sort based on |
266 | | * those. This is slightly complicated by the fact that the BSP must remain |
267 | | * CPU 0. Hence we do a variation on longest-prefix matching to do the best we |
268 | | * can while keeping CPU 0 static. |
269 | | */ |
270 | | static void __init normalise_cpu_order(void) |
271 | 1 | { |
272 | 1 | unsigned int i, j, min_cpu; |
273 | 1 | uint32_t apicid, diff, min_diff; |
274 | 1 | |
275 | 1 | for_each_present_cpu ( i ) |
276 | 12 | { |
277 | 12 | apicid = x86_cpu_to_apicid[i]; |
278 | 12 | min_diff = min_cpu = ~0u; |
279 | 12 | |
280 | 12 | /* |
281 | 12 | * Find remaining CPU with longest-prefix match on APIC ID. |
282 | 12 | * Among identical longest-prefix matches, pick the smallest APIC ID. |
283 | 12 | */ |
284 | 12 | for ( j = cpumask_next(i, &cpu_present_map); |
285 | 78 | j < nr_cpu_ids; |
286 | 66 | j = cpumask_next(j, &cpu_present_map) ) |
287 | 66 | { |
288 | 66 | diff = x86_cpu_to_apicid[j] ^ apicid; |
289 | 136 | while ( diff & (diff-1) ) |
290 | 70 | diff &= diff-1; |
291 | 66 | if ( (diff < min_diff) || |
292 | 46 | ((diff == min_diff) && |
293 | 14 | (x86_cpu_to_apicid[j] < x86_cpu_to_apicid[min_cpu])) ) |
294 | 23 | { |
295 | 23 | min_diff = diff; |
296 | 23 | min_cpu = j; |
297 | 23 | } |
298 | 66 | } |
299 | 12 | |
300 | 12 | /* If no match then there must be no CPUs remaining to consider. */ |
301 | 12 | if ( min_cpu >= nr_cpu_ids ) |
302 | 1 | { |
303 | 1 | BUG_ON(cpumask_next(i, &cpu_present_map) < nr_cpu_ids); |
304 | 1 | break; |
305 | 1 | } |
306 | 12 | |
307 | 12 | /* Switch the best-matching CPU with the next CPU in logical order. */ |
308 | 11 | j = cpumask_next(i, &cpu_present_map); |
309 | 11 | apicid = x86_cpu_to_apicid[min_cpu]; |
310 | 11 | x86_cpu_to_apicid[min_cpu] = x86_cpu_to_apicid[j]; |
311 | 11 | x86_cpu_to_apicid[j] = apicid; |
312 | 11 | } |
313 | 1 | } |
314 | | |
315 | 27 | #define BOOTSTRAP_MAP_BASE (16UL << 20) |
316 | 14 | #define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT) |
317 | | |
318 | | /* |
319 | | * Ensure a given physical memory range is present in the bootstrap mappings. |
320 | | * Use superpage mappings to ensure that pagetable memory needn't be allocated. |
321 | | */ |
322 | | static void *__init bootstrap_map(const module_t *mod) |
323 | 12 | { |
324 | 12 | static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE; |
325 | 12 | uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1; |
326 | 12 | void *ret; |
327 | 12 | |
328 | 12 | if ( system_state != SYS_STATE_early_boot ) |
329 | 1 | return mod ? mfn_to_virt(mod->mod_start) : NULL; |
330 | 12 | |
331 | 11 | if ( !mod ) |
332 | 4 | { |
333 | 4 | destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT); |
334 | 4 | map_cur = BOOTSTRAP_MAP_BASE; |
335 | 4 | return NULL; |
336 | 4 | } |
337 | 11 | |
338 | 7 | start = (uint64_t)mod->mod_start << PAGE_SHIFT; |
339 | 7 | end = start + mod->mod_end; |
340 | 7 | if ( start >= end ) |
341 | 0 | return NULL; |
342 | 7 | |
343 | 7 | ret = (void *)(map_cur + (unsigned long)(start & mask)); |
344 | 7 | start &= ~mask; |
345 | 7 | end = (end + mask) & ~mask; |
346 | 7 | if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur ) |
347 | 0 | return NULL; |
348 | 7 | |
349 | 7 | map_pages_to_xen(map_cur, start >> PAGE_SHIFT, |
350 | 7 | (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR); |
351 | 7 | map_cur += end - start; |
352 | 7 | return ret; |
353 | 7 | } |
354 | | |
355 | | static void *__init move_memory( |
356 | | uint64_t dst, uint64_t src, unsigned int size, bool keep) |
357 | 3 | { |
358 | 3 | unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE; |
359 | 3 | unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1; |
360 | 3 | |
361 | 3 | if ( src + size > BOOTSTRAP_MAP_BASE ) |
362 | 2 | blksz >>= 1; |
363 | 3 | |
364 | 6 | while ( size ) |
365 | 3 | { |
366 | 3 | module_t mod; |
367 | 3 | unsigned int soffs = src & mask; |
368 | 3 | unsigned int doffs = dst & mask; |
369 | 3 | unsigned int sz; |
370 | 3 | void *d, *s; |
371 | 3 | |
372 | 3 | mod.mod_start = (src - soffs) >> PAGE_SHIFT; |
373 | 3 | mod.mod_end = soffs + size; |
374 | 3 | if ( mod.mod_end > blksz ) |
375 | 0 | mod.mod_end = blksz; |
376 | 3 | sz = mod.mod_end - soffs; |
377 | 3 | s = bootstrap_map(&mod); |
378 | 3 | |
379 | 3 | mod.mod_start = (dst - doffs) >> PAGE_SHIFT; |
380 | 3 | mod.mod_end = doffs + size; |
381 | 3 | if ( mod.mod_end > blksz ) |
382 | 0 | mod.mod_end = blksz; |
383 | 3 | if ( sz > mod.mod_end - doffs ) |
384 | 0 | sz = mod.mod_end - doffs; |
385 | 3 | d = bootstrap_map(&mod); |
386 | 3 | |
387 | 3 | memmove(d + doffs, s + soffs, sz); |
388 | 3 | |
389 | 3 | dst += sz; |
390 | 3 | src += sz; |
391 | 3 | size -= sz; |
392 | 3 | |
393 | 3 | if ( keep ) |
394 | 0 | return size ? NULL : d + doffs; |
395 | 3 | |
396 | 3 | bootstrap_map(NULL); |
397 | 3 | } |
398 | 3 | |
399 | 3 | return NULL; |
400 | 3 | } |
401 | | |
402 | | static uint64_t __init consider_modules( |
403 | | uint64_t s, uint64_t e, uint32_t size, const module_t *mod, |
404 | | unsigned int nr_mods, unsigned int this_mod) |
405 | 9 | { |
406 | 9 | unsigned int i; |
407 | 9 | |
408 | 9 | if ( s > e || e - s < size ) |
409 | 2 | return 0; |
410 | 9 | |
411 | 12 | for ( i = 0; i < nr_mods ; ++i ) |
412 | 8 | { |
413 | 8 | uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT; |
414 | 8 | uint64_t end = start + PAGE_ALIGN(mod[i].mod_end); |
415 | 8 | |
416 | 8 | if ( i == this_mod ) |
417 | 2 | continue; |
418 | 8 | |
419 | 6 | if ( s < end && start < e ) |
420 | 3 | { |
421 | 3 | end = consider_modules(end, e, size, mod + i + 1, |
422 | 3 | nr_mods - i - 1, this_mod - i - 1); |
423 | 3 | if ( end ) |
424 | 2 | return end; |
425 | 3 | |
426 | 1 | return consider_modules(s, start, size, mod + i + 1, |
427 | 1 | nr_mods - i - 1, this_mod - i - 1); |
428 | 3 | } |
429 | 6 | } |
430 | 7 | |
431 | 4 | return e; |
432 | 7 | } |
433 | | |
434 | | static void __init setup_max_pdx(unsigned long top_page) |
435 | 2 | { |
436 | 2 | max_pdx = pfn_to_pdx(top_page - 1) + 1; |
437 | 2 | |
438 | 2 | if ( max_pdx > (DIRECTMAP_SIZE >> PAGE_SHIFT) ) |
439 | 0 | max_pdx = DIRECTMAP_SIZE >> PAGE_SHIFT; |
440 | 2 | |
441 | 2 | if ( max_pdx > FRAMETABLE_NR ) |
442 | 0 | max_pdx = FRAMETABLE_NR; |
443 | 2 | |
444 | 2 | if ( max_pdx > MPT_VIRT_SIZE / sizeof(unsigned long) ) |
445 | 0 | max_pdx = MPT_VIRT_SIZE / sizeof(unsigned long); |
446 | 2 | |
447 | 2 | #ifdef PAGE_LIST_NULL |
448 | 2 | if ( max_pdx >= PAGE_LIST_NULL ) |
449 | 0 | max_pdx = PAGE_LIST_NULL - 1; |
450 | 2 | #endif |
451 | 2 | |
452 | 2 | max_page = pdx_to_pfn(max_pdx - 1) + 1; |
453 | 2 | } |
454 | | |
455 | | /* A temporary copy of the e820 map that we can mess with during bootstrap. */ |
456 | | static struct e820map __initdata boot_e820; |
457 | | |
458 | | struct boot_video_info { |
459 | | u8 orig_x; /* 0x00 */ |
460 | | u8 orig_y; /* 0x01 */ |
461 | | u8 orig_video_mode; /* 0x02 */ |
462 | | u8 orig_video_cols; /* 0x03 */ |
463 | | u8 orig_video_lines; /* 0x04 */ |
464 | | u8 orig_video_isVGA; /* 0x05 */ |
465 | | u16 orig_video_points; /* 0x06 */ |
466 | | |
467 | | /* VESA graphic mode -- linear frame buffer */ |
468 | | u32 capabilities; /* 0x08 */ |
469 | | u16 lfb_linelength; /* 0x0c */ |
470 | | u16 lfb_width; /* 0x0e */ |
471 | | u16 lfb_height; /* 0x10 */ |
472 | | u16 lfb_depth; /* 0x12 */ |
473 | | u32 lfb_base; /* 0x14 */ |
474 | | u32 lfb_size; /* 0x18 */ |
475 | | u8 red_size; /* 0x1c */ |
476 | | u8 red_pos; /* 0x1d */ |
477 | | u8 green_size; /* 0x1e */ |
478 | | u8 green_pos; /* 0x1f */ |
479 | | u8 blue_size; /* 0x20 */ |
480 | | u8 blue_pos; /* 0x21 */ |
481 | | u8 rsvd_size; /* 0x22 */ |
482 | | u8 rsvd_pos; /* 0x23 */ |
483 | | u16 vesapm_seg; /* 0x24 */ |
484 | | u16 vesapm_off; /* 0x26 */ |
485 | | u16 vesa_attrib; /* 0x28 */ |
486 | | }; |
487 | | extern struct boot_video_info boot_vid_info; |
488 | | |
489 | | static void __init parse_video_info(void) |
490 | 1 | { |
491 | 1 | struct boot_video_info *bvi = &bootsym(boot_vid_info); |
492 | 1 | |
493 | 1 | /* vga_console_info is filled directly on EFI platform. */ |
494 | 1 | if ( efi_enabled(EFI_BOOT) ) |
495 | 0 | return; |
496 | 1 | |
497 | 1 | if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) ) |
498 | 1 | { |
499 | 1 | vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3; |
500 | 1 | vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points; |
501 | 1 | vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x; |
502 | 1 | vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y; |
503 | 1 | vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines; |
504 | 1 | vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols; |
505 | 1 | } |
506 | 0 | else if ( bvi->orig_video_isVGA == 0x23 ) |
507 | 0 | { |
508 | 0 | vga_console_info.video_type = XEN_VGATYPE_VESA_LFB; |
509 | 0 | vga_console_info.u.vesa_lfb.width = bvi->lfb_width; |
510 | 0 | vga_console_info.u.vesa_lfb.height = bvi->lfb_height; |
511 | 0 | vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength; |
512 | 0 | vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth; |
513 | 0 | vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base; |
514 | 0 | vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size; |
515 | 0 | vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos; |
516 | 0 | vga_console_info.u.vesa_lfb.red_size = bvi->red_size; |
517 | 0 | vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos; |
518 | 0 | vga_console_info.u.vesa_lfb.green_size = bvi->green_size; |
519 | 0 | vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos; |
520 | 0 | vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size; |
521 | 0 | vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos; |
522 | 0 | vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size; |
523 | 0 | vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities; |
524 | 0 | vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib; |
525 | 0 | } |
526 | 1 | } |
527 | | |
528 | | static void __init kexec_reserve_area(struct e820map *e820) |
529 | 2 | { |
530 | 2 | #ifdef CONFIG_KEXEC |
531 | 2 | unsigned long kdump_start = kexec_crash_area.start; |
532 | 2 | unsigned long kdump_size = kexec_crash_area.size; |
533 | 2 | static bool __initdata is_reserved = false; |
534 | 2 | |
535 | 2 | kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK; |
536 | 2 | |
537 | 2 | if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved ) |
538 | 2 | return; |
539 | 2 | |
540 | 0 | is_reserved = true; |
541 | 0 |
|
542 | 0 | if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) ) |
543 | 0 | { |
544 | 0 | printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at %#lx)" |
545 | 0 | "\n", kdump_size >> 20, kdump_size >> 10, kdump_start); |
546 | 0 | kexec_crash_area.start = kexec_crash_area.size = 0; |
547 | 0 | } |
548 | 0 | else |
549 | 0 | { |
550 | 0 | printk("Kdump: %luMB (%lukB) at %#lx\n", |
551 | 0 | kdump_size >> 20, kdump_size >> 10, kdump_start); |
552 | 0 | } |
553 | 0 | #endif |
554 | 0 | } |
555 | | |
556 | | static inline bool using_2M_mapping(void) |
557 | 2 | { |
558 | 2 | return !l1_table_offset((unsigned long)__2M_text_end) && |
559 | 0 | !l1_table_offset((unsigned long)__2M_rodata_start) && |
560 | 0 | !l1_table_offset((unsigned long)__2M_rodata_end) && |
561 | 0 | !l1_table_offset((unsigned long)__2M_init_start) && |
562 | 0 | !l1_table_offset((unsigned long)__2M_init_end) && |
563 | 0 | !l1_table_offset((unsigned long)__2M_rwdata_start) && |
564 | 0 | !l1_table_offset((unsigned long)__2M_rwdata_end); |
565 | 2 | } |
566 | | |
567 | | static void noinline init_done(void) |
568 | 1 | { |
569 | 1 | void *va; |
570 | 1 | unsigned long start, end; |
571 | 1 | |
572 | 1 | system_state = SYS_STATE_active; |
573 | 1 | |
574 | 1 | /* MUST be done prior to removing .init data. */ |
575 | 1 | unregister_init_virtual_region(); |
576 | 1 | |
577 | 1 | domain_unpause_by_systemcontroller(hardware_domain); |
578 | 1 | |
579 | 1 | /* Zero the .init code and data. */ |
580 | 113 | for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE ) |
581 | 112 | clear_page(va); |
582 | 1 | |
583 | 1 | /* Destroy Xen's mappings, and reuse the pages. */ |
584 | 1 | if ( using_2M_mapping() ) |
585 | 0 | { |
586 | 0 | start = (unsigned long)&__2M_init_start, |
587 | 0 | end = (unsigned long)&__2M_init_end; |
588 | 0 | } |
589 | 1 | else |
590 | 1 | { |
591 | 1 | start = (unsigned long)&__init_begin; |
592 | 1 | end = (unsigned long)&__init_end; |
593 | 1 | } |
594 | 1 | |
595 | 1 | destroy_xen_mappings(start, end); |
596 | 1 | init_xenheap_pages(__pa(start), __pa(end)); |
597 | 1 | printk("Freed %lukB init memory\n", (end - start) >> 10); |
598 | 1 | |
599 | 1 | startup_cpu_idle_loop(); |
600 | 1 | } |
601 | | |
602 | | /* Reinitalise all state referring to the old virtual address of the stack. */ |
603 | | static void __init noreturn reinit_bsp_stack(void) |
604 | 1 | { |
605 | 1 | unsigned long *stack = (void*)(get_stack_bottom() & ~(STACK_SIZE - 1)); |
606 | 1 | |
607 | 1 | /* Update TSS and ISTs */ |
608 | 1 | load_system_tables(); |
609 | 1 | |
610 | 1 | /* Update SYSCALL trampolines */ |
611 | 1 | percpu_traps_init(); |
612 | 1 | |
613 | 1 | stack_base[0] = stack; |
614 | 1 | memguard_guard_stack(stack); |
615 | 1 | |
616 | 1 | reset_stack_and_jump(init_done); |
617 | 1 | } |
618 | | |
619 | | static bool __init loader_is_grub2(const char *loader_name) |
620 | 1 | { |
621 | 1 | /* GRUB1="GNU GRUB 0.xx"; GRUB2="GRUB 1.xx" */ |
622 | 1 | const char *p = strstr(loader_name, "GRUB "); |
623 | 1 | return (p != NULL) && (p[5] != '0'); |
624 | 1 | } |
625 | | |
626 | | static char * __init cmdline_cook(char *p, const char *loader_name) |
627 | 1 | { |
628 | 0 | p = p ? : ""; |
629 | 1 | |
630 | 1 | /* Strip leading whitespace. */ |
631 | 1 | while ( *p == ' ' ) |
632 | 0 | p++; |
633 | 1 | |
634 | 1 | /* GRUB2 does not include image name as first item on command line. */ |
635 | 1 | if ( loader_is_grub2(loader_name) ) |
636 | 0 | return p; |
637 | 1 | |
638 | 1 | /* Strip image name plus whitespace. */ |
639 | 10 | while ( (*p != ' ') && (*p != '\0') ) |
640 | 9 | p++; |
641 | 2 | while ( *p == ' ' ) |
642 | 1 | p++; |
643 | 1 | |
644 | 1 | return p; |
645 | 1 | } |
646 | | |
647 | | void __init noreturn __start_xen(unsigned long mbi_p) |
648 | 1 | { |
649 | 1 | char *memmap_type = NULL; |
650 | 1 | char *cmdline, *kextra, *loader; |
651 | 1 | unsigned int initrdidx, domcr_flags = DOMCRF_s3_integrity; |
652 | 1 | multiboot_info_t *mbi = __va(mbi_p); |
653 | 1 | module_t *mod = (module_t *)__va(mbi->mods_addr); |
654 | 1 | unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; |
655 | 1 | int i, j, e820_warn = 0, bytes = 0; |
656 | 1 | bool acpi_boot_table_init_done = false; |
657 | 1 | struct domain *dom0; |
658 | 1 | struct ns16550_defaults ns16550 = { |
659 | 1 | .data_bits = 8, |
660 | 1 | .parity = 'n', |
661 | 1 | .stop_bits = 1 |
662 | 1 | }; |
663 | 1 | struct xen_arch_domainconfig config = { .emulation_flags = 0 }; |
664 | 1 | |
665 | 1 | /* Critical region without IDT or TSS. Any fault is deadly! */ |
666 | 1 | |
667 | 1 | set_processor_id(0); |
668 | 1 | set_current(INVALID_VCPU); /* debug sanity. */ |
669 | 1 | idle_vcpu[0] = current; |
670 | 1 | |
671 | 1 | percpu_init_areas(); |
672 | 1 | |
673 | 1 | init_idt_traps(); |
674 | 1 | load_system_tables(); |
675 | 1 | |
676 | 1 | smp_prepare_boot_cpu(); |
677 | 1 | sort_exception_tables(); |
678 | 1 | |
679 | 1 | setup_virtual_regions(__start___ex_table, __stop___ex_table); |
680 | 1 | |
681 | 1 | /* Full exception support from here on in. */ |
682 | 1 | |
683 | 1 | loader = (mbi->flags & MBI_LOADERNAME) |
684 | 1 | ? (char *)__va(mbi->boot_loader_name) : "unknown"; |
685 | 1 | |
686 | 1 | /* Parse the command-line options. */ |
687 | 1 | cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ? |
688 | 1 | __va(mbi->cmdline) : NULL, |
689 | 1 | loader); |
690 | 1 | if ( (kextra = strstr(cmdline, " -- ")) != NULL ) |
691 | 0 | { |
692 | 0 | /* |
693 | 0 | * Options after ' -- ' separator belong to dom0. |
694 | 0 | * 1. Orphan dom0's options from Xen's command line. |
695 | 0 | * 2. Skip all but final leading space from dom0's options. |
696 | 0 | */ |
697 | 0 | *kextra = '\0'; |
698 | 0 | kextra += 3; |
699 | 0 | while ( kextra[1] == ' ' ) kextra++; |
700 | 0 | } |
701 | 1 | cmdline_parse(cmdline); |
702 | 1 | |
703 | 1 | /* Must be after command line argument parsing and before |
704 | 1 | * allocing any xenheap structures wanted in lower memory. */ |
705 | 1 | kexec_early_calculations(); |
706 | 1 | |
707 | 1 | parse_video_info(); |
708 | 1 | |
709 | 1 | rdmsrl(MSR_EFER, this_cpu(efer)); |
710 | 1 | asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) ); |
711 | 1 | |
712 | 1 | /* We initialise the serial devices very early so we can get debugging. */ |
713 | 1 | ns16550.io_base = 0x3f8; |
714 | 1 | ns16550.irq = 4; |
715 | 1 | ns16550_init(0, &ns16550); |
716 | 1 | ns16550.io_base = 0x2f8; |
717 | 1 | ns16550.irq = 3; |
718 | 1 | ns16550_init(1, &ns16550); |
719 | 1 | ehci_dbgp_init(); |
720 | 1 | console_init_preirq(); |
721 | 1 | |
722 | 1 | printk("Bootloader: %s\n", loader); |
723 | 1 | |
724 | 1 | printk("Command line: %s\n", cmdline); |
725 | 1 | |
726 | 1 | printk("Xen image load base address: %#lx\n", xen_phys_start); |
727 | 1 | |
728 | 1 | printk("Video information:\n"); |
729 | 1 | |
730 | 1 | /* Print VGA display mode information. */ |
731 | 1 | switch ( vga_console_info.video_type ) |
732 | 1 | { |
733 | 1 | case XEN_VGATYPE_TEXT_MODE_3: |
734 | 1 | printk(" VGA is text mode %dx%d, font 8x%d\n", |
735 | 1 | vga_console_info.u.text_mode_3.columns, |
736 | 1 | vga_console_info.u.text_mode_3.rows, |
737 | 1 | vga_console_info.u.text_mode_3.font_height); |
738 | 1 | break; |
739 | 0 | case XEN_VGATYPE_VESA_LFB: |
740 | 0 | case XEN_VGATYPE_EFI_LFB: |
741 | 0 | printk(" VGA is graphics mode %dx%d, %d bpp\n", |
742 | 0 | vga_console_info.u.vesa_lfb.width, |
743 | 0 | vga_console_info.u.vesa_lfb.height, |
744 | 0 | vga_console_info.u.vesa_lfb.bits_per_pixel); |
745 | 0 | break; |
746 | 0 | default: |
747 | 0 | printk(" No VGA detected\n"); |
748 | 0 | break; |
749 | 1 | } |
750 | 1 | |
751 | 1 | /* Print VBE/DDC EDID information. */ |
752 | 1 | if ( bootsym(boot_edid_caps) != 0x1313 ) |
753 | 1 | { |
754 | 1 | u16 caps = bootsym(boot_edid_caps); |
755 | 1 | printk(" VBE/DDC methods:%s%s%s; ", |
756 | 1 | (caps & 1) ? " V1" : "", |
757 | 1 | (caps & 2) ? " V2" : "", |
758 | 1 | !(caps & 3) ? " none" : ""); |
759 | 1 | printk("EDID transfer time: %d seconds\n", caps >> 8); |
760 | 1 | if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 ) |
761 | 1 | { |
762 | 1 | printk(" EDID info not retrieved because "); |
763 | 1 | if ( !(caps & 3) ) |
764 | 0 | printk("no DDC retrieval method detected\n"); |
765 | 1 | else if ( (caps >> 8) > 5 ) |
766 | 0 | printk("takes longer than 5 seconds\n"); |
767 | 1 | else |
768 | 1 | printk("of reasons unknown\n"); |
769 | 1 | } |
770 | 1 | } |
771 | 1 | |
772 | 1 | printk("Disc information:\n"); |
773 | 1 | printk(" Found %d MBR signatures\n", |
774 | 1 | bootsym(boot_mbr_signature_nr)); |
775 | 1 | printk(" Found %d EDD information structures\n", |
776 | 1 | bootsym(boot_edd_info_nr)); |
777 | 1 | |
778 | 1 | /* Check that we have at least one Multiboot module. */ |
779 | 1 | if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) ) |
780 | 0 | panic("dom0 kernel not specified. Check bootloader configuration."); |
781 | 1 | |
782 | 1 | if ( efi_enabled(EFI_LOADER) ) |
783 | 0 | { |
784 | 0 | set_pdx_range(xen_phys_start >> PAGE_SHIFT, |
785 | 0 | (xen_phys_start + BOOTSTRAP_MAP_BASE) >> PAGE_SHIFT); |
786 | 0 |
|
787 | 0 | /* Clean up boot loader identity mappings. */ |
788 | 0 | destroy_xen_mappings(xen_phys_start, |
789 | 0 | xen_phys_start + BOOTSTRAP_MAP_BASE); |
790 | 0 |
|
791 | 0 | /* Make boot page tables match non-EFI boot. */ |
792 | 0 | l3_bootmap[l3_table_offset(BOOTSTRAP_MAP_BASE)] = |
793 | 0 | l3e_from_paddr(__pa(l2_bootmap), __PAGE_HYPERVISOR); |
794 | 0 |
|
795 | 0 | memmap_type = loader; |
796 | 0 | } |
797 | 1 | else if ( efi_enabled(EFI_BOOT) ) |
798 | 0 | memmap_type = "EFI"; |
799 | 1 | else if ( (e820_raw.nr_map = |
800 | 1 | copy_bios_e820(e820_raw.map, |
801 | 1 | ARRAY_SIZE(e820_raw.map))) != 0 ) |
802 | 1 | { |
803 | 1 | memmap_type = "Xen-e820"; |
804 | 1 | } |
805 | 0 | else if ( mbi->flags & MBI_MEMMAP ) |
806 | 0 | { |
807 | 0 | memmap_type = "Multiboot-e820"; |
808 | 0 | while ( bytes < mbi->mmap_length && |
809 | 0 | e820_raw.nr_map < ARRAY_SIZE(e820_raw.map) ) |
810 | 0 | { |
811 | 0 | memory_map_t *map = __va(mbi->mmap_addr + bytes); |
812 | 0 |
|
813 | 0 | /* |
814 | 0 | * This is a gross workaround for a BIOS bug. Some bootloaders do |
815 | 0 | * not write e820 map entries into pre-zeroed memory. This is |
816 | 0 | * okay if the BIOS fills in all fields of the map entry, but |
817 | 0 | * some broken BIOSes do not bother to write the high word of |
818 | 0 | * the length field if the length is smaller than 4GB. We |
819 | 0 | * detect and fix this by flagging sections below 4GB that |
820 | 0 | * appear to be larger than 4GB in size. |
821 | 0 | */ |
822 | 0 | if ( (map->base_addr_high == 0) && (map->length_high != 0) ) |
823 | 0 | { |
824 | 0 | if ( !e820_warn ) |
825 | 0 | { |
826 | 0 | printk("WARNING: Buggy e820 map detected and fixed " |
827 | 0 | "(truncated length fields).\n"); |
828 | 0 | e820_warn = 1; |
829 | 0 | } |
830 | 0 | map->length_high = 0; |
831 | 0 | } |
832 | 0 |
|
833 | 0 | e820_raw.map[e820_raw.nr_map].addr = |
834 | 0 | ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low; |
835 | 0 | e820_raw.map[e820_raw.nr_map].size = |
836 | 0 | ((u64)map->length_high << 32) | (u64)map->length_low; |
837 | 0 | e820_raw.map[e820_raw.nr_map].type = map->type; |
838 | 0 | e820_raw.nr_map++; |
839 | 0 |
|
840 | 0 | bytes += map->size + 4; |
841 | 0 | } |
842 | 0 | } |
843 | 0 | else if ( bootsym(lowmem_kb) ) |
844 | 0 | { |
845 | 0 | memmap_type = "Xen-e801"; |
846 | 0 | e820_raw.map[0].addr = 0; |
847 | 0 | e820_raw.map[0].size = bootsym(lowmem_kb) << 10; |
848 | 0 | e820_raw.map[0].type = E820_RAM; |
849 | 0 | e820_raw.map[1].addr = 0x100000; |
850 | 0 | e820_raw.map[1].size = bootsym(highmem_kb) << 10; |
851 | 0 | e820_raw.map[1].type = E820_RAM; |
852 | 0 | e820_raw.nr_map = 2; |
853 | 0 | } |
854 | 0 | else if ( mbi->flags & MBI_MEMLIMITS ) |
855 | 0 | { |
856 | 0 | memmap_type = "Multiboot-e801"; |
857 | 0 | e820_raw.map[0].addr = 0; |
858 | 0 | e820_raw.map[0].size = mbi->mem_lower << 10; |
859 | 0 | e820_raw.map[0].type = E820_RAM; |
860 | 0 | e820_raw.map[1].addr = 0x100000; |
861 | 0 | e820_raw.map[1].size = mbi->mem_upper << 10; |
862 | 0 | e820_raw.map[1].type = E820_RAM; |
863 | 0 | e820_raw.nr_map = 2; |
864 | 0 | } |
865 | 0 | else |
866 | 0 | panic("Bootloader provided no memory information."); |
867 | 1 | |
868 | 1 | /* Sanitise the raw E820 map to produce a final clean version. */ |
869 | 1 | max_page = raw_max_page = init_e820(memmap_type, &e820_raw); |
870 | 1 | |
871 | 1 | /* Create a temporary copy of the E820 map. */ |
872 | 1 | memcpy(&boot_e820, &e820, sizeof(e820)); |
873 | 1 | |
874 | 1 | /* Early kexec reservation (explicit static start address). */ |
875 | 1 | nr_pages = 0; |
876 | 20 | for ( i = 0; i < e820.nr_map; i++ ) |
877 | 19 | if ( e820.map[i].type == E820_RAM ) |
878 | 8 | nr_pages += e820.map[i].size >> PAGE_SHIFT; |
879 | 1 | set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT); |
880 | 1 | kexec_reserve_area(&boot_e820); |
881 | 1 | |
882 | 1 | initial_images = mod; |
883 | 1 | nr_initial_images = mbi->mods_count; |
884 | 1 | |
885 | 1 | /* |
886 | 1 | * Iterate backwards over all superpage-aligned RAM regions. |
887 | 1 | * |
888 | 1 | * We require superpage alignment because the boot allocator is not yet |
889 | 1 | * initialised. Hence we can only map superpages in the address range |
890 | 1 | * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require |
891 | 1 | * dynamic allocation of pagetables. |
892 | 1 | * |
893 | 1 | * As well as mapping superpages in that range, in preparation for |
894 | 1 | * initialising the boot allocator, we also look for a region to which |
895 | 1 | * we can relocate the dom0 kernel and other multiboot modules. Also, on |
896 | 1 | * x86/64, we relocate Xen to higher memory. |
897 | 1 | */ |
898 | 3 | for ( i = 0; !efi_enabled(EFI_LOADER) && i < mbi->mods_count; i++ ) |
899 | 2 | { |
900 | 2 | if ( mod[i].mod_start & (PAGE_SIZE - 1) ) |
901 | 0 | panic("Bootloader didn't honor module alignment request."); |
902 | 2 | mod[i].mod_end -= mod[i].mod_start; |
903 | 2 | mod[i].mod_start >>= PAGE_SHIFT; |
904 | 2 | mod[i].reserved = 0; |
905 | 2 | } |
906 | 1 | |
907 | 1 | if ( efi_enabled(EFI_LOADER) ) |
908 | 0 | { |
909 | 0 | /* |
910 | 0 | * This needs to remain in sync with xen_in_range() and the |
911 | 0 | * respective reserve_e820_ram() invocation below. |
912 | 0 | */ |
913 | 0 | mod[mbi->mods_count].mod_start = virt_to_mfn(_stext); |
914 | 0 | mod[mbi->mods_count].mod_end = __2M_rwdata_end - _stext; |
915 | 0 | } |
916 | 1 | |
917 | 1 | modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end); |
918 | 1 | bootstrap_map(NULL); |
919 | 1 | |
920 | 1 | #ifndef highmem_start |
921 | 1 | /* Don't allow split below 4Gb. */ |
922 | 1 | if ( highmem_start < GB(4) ) |
923 | 1 | highmem_start = 0; |
924 | 1 | else /* align to L3 entry boundary */ |
925 | 0 | highmem_start &= ~((1UL << L3_PAGETABLE_SHIFT) - 1); |
926 | 1 | #endif |
927 | 1 | |
928 | 20 | for ( i = boot_e820.nr_map-1; i >= 0; i-- ) |
929 | 19 | { |
930 | 19 | uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1; |
931 | 19 | uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT; |
932 | 19 | |
933 | 19 | /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */ |
934 | 19 | s = (boot_e820.map[i].addr + mask) & ~mask; |
935 | 19 | e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask; |
936 | 19 | s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE); |
937 | 19 | if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) ) |
938 | 12 | continue; |
939 | 19 | |
940 | 7 | if ( s < limit ) |
941 | 6 | { |
942 | 6 | end = min(e, limit); |
943 | 6 | set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT); |
944 | 6 | map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT, |
945 | 6 | (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR); |
946 | 6 | } |
947 | 7 | |
948 | 7 | if ( e > min(HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START, |
949 | 7 | 1UL << (PAGE_SHIFT + 32)) ) |
950 | 0 | e = min(HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START, |
951 | 7 | 1UL << (PAGE_SHIFT + 32)); |
952 | 3 | #define reloc_size ((__pa(__2M_rwdata_end) + mask) & ~mask) |
953 | 7 | /* Is the region suitable for relocating Xen? */ |
954 | 7 | if ( !xen_phys_start && e <= limit ) |
955 | 2 | { |
956 | 2 | /* Don't overlap with modules. */ |
957 | 2 | end = consider_modules(s, e, reloc_size + mask, |
958 | 2 | mod, mbi->mods_count, -1); |
959 | 2 | end &= ~mask; |
960 | 2 | } |
961 | 7 | else |
962 | 5 | end = 0; |
963 | 7 | if ( end > s ) |
964 | 1 | { |
965 | 1 | l4_pgentry_t *pl4e; |
966 | 1 | l3_pgentry_t *pl3e; |
967 | 1 | l2_pgentry_t *pl2e; |
968 | 1 | int i, j, k; |
969 | 1 | |
970 | 1 | /* Select relocation address. */ |
971 | 1 | e = end - reloc_size; |
972 | 1 | xen_phys_start = e; |
973 | 1 | bootsym(trampoline_xen_phys_start) = e; |
974 | 1 | |
975 | 1 | /* |
976 | 1 | * Perform relocation to new physical address. |
977 | 1 | * Before doing so we must sync static/global data with main memory |
978 | 1 | * with a barrier(). After this we must *not* modify static/global |
979 | 1 | * data until after we have switched to the relocated pagetables! |
980 | 1 | */ |
981 | 1 | barrier(); |
982 | 1 | move_memory(e + XEN_IMG_OFFSET, XEN_IMG_OFFSET, _end - _start, 1); |
983 | 1 | |
984 | 1 | /* Walk initial pagetables, relocating page directory entries. */ |
985 | 1 | pl4e = __va(__pa(idle_pg_table)); |
986 | 1 | for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ ) |
987 | 0 | { |
988 | 0 | if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) |
989 | 0 | continue; |
990 | 0 | *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) + |
991 | 0 | xen_phys_start); |
992 | 0 | pl3e = l4e_to_l3e(*pl4e); |
993 | 0 | for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ ) |
994 | 0 | { |
995 | 0 | /* Not present, 1GB mapping, or already relocated? */ |
996 | 0 | if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) || |
997 | 0 | (l3e_get_flags(*pl3e) & _PAGE_PSE) || |
998 | 0 | (l3e_get_pfn(*pl3e) > PFN_DOWN(xen_phys_start)) ) |
999 | 0 | continue; |
1000 | 0 | *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) + |
1001 | 0 | xen_phys_start); |
1002 | 0 | pl2e = l3e_to_l2e(*pl3e); |
1003 | 0 | for ( k = 0; k < L2_PAGETABLE_ENTRIES; k++, pl2e++ ) |
1004 | 0 | { |
1005 | 0 | /* Not present, PSE, or already relocated? */ |
1006 | 0 | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) || |
1007 | 0 | (l2e_get_flags(*pl2e) & _PAGE_PSE) || |
1008 | 0 | (l2e_get_pfn(*pl2e) > PFN_DOWN(xen_phys_start)) ) |
1009 | 0 | continue; |
1010 | 0 | *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) + |
1011 | 0 | xen_phys_start); |
1012 | 0 | } |
1013 | 0 | } |
1014 | 0 | } |
1015 | 1 | |
1016 | 1 | /* The only data mappings to be relocated are in the Xen area. */ |
1017 | 1 | pl2e = __va(__pa(l2_xenmap)); |
1018 | 1 | /* |
1019 | 1 | * Undo the temporary-hooking of the l1_identmap. __2M_text_start |
1020 | 1 | * is contained in this PTE. |
1021 | 1 | */ |
1022 | 1 | BUG_ON(using_2M_mapping() && |
1023 | 1 | l2_table_offset((unsigned long)_erodata) == |
1024 | 1 | l2_table_offset((unsigned long)_stext)); |
1025 | 1 | *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT, |
1026 | 1 | PAGE_HYPERVISOR_RX | _PAGE_PSE); |
1027 | 1 | for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) |
1028 | 0 | { |
1029 | 0 | unsigned int flags; |
1030 | 0 |
|
1031 | 0 | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) || |
1032 | 0 | (l2e_get_pfn(*pl2e) > PFN_DOWN(xen_phys_start)) ) |
1033 | 0 | continue; |
1034 | 0 |
|
1035 | 0 | if ( !using_2M_mapping() ) |
1036 | 0 | { |
1037 | 0 | *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) + |
1038 | 0 | xen_phys_start); |
1039 | 0 | continue; |
1040 | 0 | } |
1041 | 0 |
|
1042 | 0 | if ( i < l2_table_offset((unsigned long)&__2M_text_end) ) |
1043 | 0 | { |
1044 | 0 | flags = PAGE_HYPERVISOR_RX | _PAGE_PSE; |
1045 | 0 | } |
1046 | 0 | else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) && |
1047 | 0 | i < l2_table_offset((unsigned long)&__2M_rodata_end) ) |
1048 | 0 | { |
1049 | 0 | flags = PAGE_HYPERVISOR_RO | _PAGE_PSE; |
1050 | 0 | } |
1051 | 0 | else if ( i >= l2_table_offset((unsigned long)&__2M_init_start) && |
1052 | 0 | i < l2_table_offset((unsigned long)&__2M_init_end) ) |
1053 | 0 | { |
1054 | 0 | flags = PAGE_HYPERVISOR_RWX | _PAGE_PSE; |
1055 | 0 | } |
1056 | 0 | else if ( (i >= l2_table_offset((unsigned long)&__2M_rwdata_start) && |
1057 | 0 | i < l2_table_offset((unsigned long)&__2M_rwdata_end)) ) |
1058 | 0 | { |
1059 | 0 | flags = PAGE_HYPERVISOR_RW | _PAGE_PSE; |
1060 | 0 | } |
1061 | 0 | else |
1062 | 0 | { |
1063 | 0 | *pl2e = l2e_empty(); |
1064 | 0 | continue; |
1065 | 0 | } |
1066 | 0 |
|
1067 | 0 | *pl2e = l2e_from_paddr( |
1068 | 0 | l2e_get_paddr(*pl2e) + xen_phys_start, flags); |
1069 | 0 | } |
1070 | 1 | |
1071 | 1 | /* Re-sync the stack and then switch to relocated pagetables. */ |
1072 | 1 | asm volatile ( |
1073 | 1 | "rep movsq ; " /* re-sync the stack */ |
1074 | 1 | "movq %%cr4,%%rsi ; " |
1075 | 1 | "andb $0x7f,%%sil ; " |
1076 | 1 | "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */ |
1077 | 1 | "movq %[pg],%%cr3 ; " /* CR3 == new pagetables */ |
1078 | 1 | "orb $0x80,%%sil ; " |
1079 | 1 | "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */ |
1080 | 1 | : "=&S" (i), "=&D" (i), "=&c" (i) /* All outputs discarded. */ |
1081 | 1 | : [pg] "r" (__pa(idle_pg_table)), "0" (cpu0_stack), |
1082 | 1 | "1" (__va(__pa(cpu0_stack))), "2" (STACK_SIZE / 8) |
1083 | 1 | : "memory" ); |
1084 | 1 | |
1085 | 1 | bootstrap_map(NULL); |
1086 | 1 | |
1087 | 1 | printk("New Xen image base address: %#lx\n", xen_phys_start); |
1088 | 1 | } |
1089 | 7 | |
1090 | 7 | /* Is the region suitable for relocating the multiboot modules? */ |
1091 | 21 | for ( j = mbi->mods_count - 1; j >= 0; j-- ) |
1092 | 14 | { |
1093 | 7 | unsigned long headroom = j ? 0 : modules_headroom; |
1094 | 14 | unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end); |
1095 | 14 | |
1096 | 14 | if ( mod[j].reserved ) |
1097 | 12 | continue; |
1098 | 14 | |
1099 | 14 | /* Don't overlap with other modules (or Xen itself). */ |
1100 | 2 | end = consider_modules(s, e, size, mod, |
1101 | 2 | mbi->mods_count + efi_enabled(EFI_LOADER), |
1102 | 2 | j); |
1103 | 2 | |
1104 | 2 | if ( highmem_start && end > highmem_start ) |
1105 | 0 | continue; |
1106 | 2 | |
1107 | 2 | if ( s < end && |
1108 | 2 | (headroom || |
1109 | 2 | ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) ) |
1110 | 2 | { |
1111 | 2 | move_memory(end - size + headroom, |
1112 | 2 | (uint64_t)mod[j].mod_start << PAGE_SHIFT, |
1113 | 2 | mod[j].mod_end, 0); |
1114 | 2 | mod[j].mod_start = (end - size) >> PAGE_SHIFT; |
1115 | 2 | mod[j].mod_end += headroom; |
1116 | 2 | mod[j].reserved = 1; |
1117 | 2 | } |
1118 | 2 | } |
1119 | 7 | |
1120 | 7 | #ifdef CONFIG_KEXEC |
1121 | 7 | /* |
1122 | 7 | * Looking backwards from the crash area limit, find a large |
1123 | 7 | * enough range that does not overlap with modules. |
1124 | 7 | */ |
1125 | 8 | while ( !kexec_crash_area.start ) |
1126 | 1 | { |
1127 | 1 | /* Don't overlap with modules (or Xen itself). */ |
1128 | 1 | e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), mod, |
1129 | 1 | mbi->mods_count + efi_enabled(EFI_LOADER), -1); |
1130 | 1 | if ( s >= e ) |
1131 | 0 | break; |
1132 | 1 | if ( e > kexec_crash_area_limit ) |
1133 | 0 | { |
1134 | 0 | e = kexec_crash_area_limit & PAGE_MASK; |
1135 | 0 | continue; |
1136 | 0 | } |
1137 | 1 | kexec_crash_area.start = (e - kexec_crash_area.size) & PAGE_MASK; |
1138 | 1 | } |
1139 | 7 | #endif |
1140 | 7 | } |
1141 | 1 | |
1142 | 1 | if ( modules_headroom && !mod->reserved ) |
1143 | 0 | panic("Not enough memory to relocate the dom0 kernel image."); |
1144 | 3 | for ( i = 0; i < mbi->mods_count; ++i ) |
1145 | 2 | { |
1146 | 2 | uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT; |
1147 | 2 | |
1148 | 2 | reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end)); |
1149 | 2 | } |
1150 | 1 | |
1151 | 1 | if ( !xen_phys_start ) |
1152 | 0 | panic("Not enough memory to relocate Xen."); |
1153 | 1 | |
1154 | 1 | /* This needs to remain in sync with xen_in_range(). */ |
1155 | 1 | reserve_e820_ram(&boot_e820, __pa(_stext), __pa(__2M_rwdata_end)); |
1156 | 1 | |
1157 | 1 | /* Late kexec reservation (dynamic start address). */ |
1158 | 1 | kexec_reserve_area(&boot_e820); |
1159 | 1 | |
1160 | 1 | setup_max_pdx(raw_max_page); |
1161 | 1 | if ( highmem_start ) |
1162 | 0 | xenheap_max_mfn(PFN_DOWN(highmem_start - 1)); |
1163 | 1 | |
1164 | 1 | /* |
1165 | 1 | * Walk every RAM region and map it in its entirety (on x86/64, at least) |
1166 | 1 | * and notify it to the boot allocator. |
1167 | 1 | */ |
1168 | 23 | for ( i = 0; i < boot_e820.nr_map; i++ ) |
1169 | 22 | { |
1170 | 22 | uint64_t s, e, mask = PAGE_SIZE - 1; |
1171 | 22 | uint64_t map_s, map_e; |
1172 | 22 | |
1173 | 22 | /* Only page alignment required now. */ |
1174 | 22 | s = (boot_e820.map[i].addr + mask) & ~mask; |
1175 | 22 | e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask; |
1176 | 22 | s = max_t(uint64_t, s, 1<<20); |
1177 | 22 | if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) ) |
1178 | 14 | continue; |
1179 | 22 | |
1180 | 8 | if ( !acpi_boot_table_init_done && |
1181 | 8 | s >= (1ULL << 32) && |
1182 | 1 | !acpi_boot_table_init() ) |
1183 | 1 | { |
1184 | 1 | acpi_boot_table_init_done = true; |
1185 | 1 | srat_parse_regions(s); |
1186 | 1 | setup_max_pdx(raw_max_page); |
1187 | 1 | } |
1188 | 8 | |
1189 | 8 | if ( pfn_to_pdx((e - 1) >> PAGE_SHIFT) >= max_pdx ) |
1190 | 0 | { |
1191 | 0 | if ( pfn_to_pdx(s >> PAGE_SHIFT) >= max_pdx ) |
1192 | 0 | { |
1193 | 0 | for ( j = i - 1; ; --j ) |
1194 | 0 | { |
1195 | 0 | if ( boot_e820.map[j].type == E820_RAM ) |
1196 | 0 | break; |
1197 | 0 | ASSERT(j); |
1198 | 0 | } |
1199 | 0 | map_e = boot_e820.map[j].addr + boot_e820.map[j].size; |
1200 | 0 | for ( j = 0; j < mbi->mods_count; ++j ) |
1201 | 0 | { |
1202 | 0 | uint64_t end = pfn_to_paddr(mod[j].mod_start) + |
1203 | 0 | mod[j].mod_end; |
1204 | 0 |
|
1205 | 0 | if ( map_e < end ) |
1206 | 0 | map_e = end; |
1207 | 0 | } |
1208 | 0 | if ( PFN_UP(map_e) < max_page ) |
1209 | 0 | { |
1210 | 0 | max_page = PFN_UP(map_e); |
1211 | 0 | max_pdx = pfn_to_pdx(max_page - 1) + 1; |
1212 | 0 | } |
1213 | 0 | printk(XENLOG_WARNING "Ignoring inaccessible memory range" |
1214 | 0 | " %013"PRIx64"-%013"PRIx64"\n", |
1215 | 0 | s, e); |
1216 | 0 | continue; |
1217 | 0 | } |
1218 | 0 | map_e = e; |
1219 | 0 | e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT; |
1220 | 0 | printk(XENLOG_WARNING "Ignoring inaccessible memory range" |
1221 | 0 | " %013"PRIx64"-%013"PRIx64"\n", |
1222 | 0 | e, map_e); |
1223 | 0 | } |
1224 | 8 | |
1225 | 8 | set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT); |
1226 | 8 | |
1227 | 8 | /* Need to create mappings above BOOTSTRAP_MAP_BASE. */ |
1228 | 8 | map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE); |
1229 | 8 | map_e = min_t(uint64_t, e, |
1230 | 8 | ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT); |
1231 | 8 | |
1232 | 8 | /* Pass mapped memory to allocator /before/ creating new mappings. */ |
1233 | 8 | init_boot_pages(s, min(map_s, e)); |
1234 | 8 | s = map_s; |
1235 | 8 | if ( s < map_e ) |
1236 | 7 | { |
1237 | 7 | uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1; |
1238 | 7 | |
1239 | 7 | map_s = (s + mask) & ~mask; |
1240 | 7 | map_e &= ~mask; |
1241 | 7 | init_boot_pages(map_s, map_e); |
1242 | 7 | } |
1243 | 8 | |
1244 | 8 | if ( map_s > map_e ) |
1245 | 0 | map_s = map_e = s; |
1246 | 8 | |
1247 | 8 | /* Create new mappings /before/ passing memory to the allocator. */ |
1248 | 8 | if ( map_e < e ) |
1249 | 6 | { |
1250 | 6 | uint64_t limit = __pa(HYPERVISOR_VIRT_END - 1) + 1; |
1251 | 6 | uint64_t end = min(e, limit); |
1252 | 6 | |
1253 | 6 | if ( map_e < end ) |
1254 | 6 | { |
1255 | 6 | map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e), |
1256 | 6 | PFN_DOWN(end - map_e), PAGE_HYPERVISOR); |
1257 | 6 | init_boot_pages(map_e, end); |
1258 | 6 | map_e = end; |
1259 | 6 | } |
1260 | 6 | } |
1261 | 8 | if ( map_e < e ) |
1262 | 0 | { |
1263 | 0 | /* This range must not be passed to the boot allocator and |
1264 | 0 | * must also not be mapped with _PAGE_GLOBAL. */ |
1265 | 0 | map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e), |
1266 | 0 | PFN_DOWN(e - map_e), __PAGE_HYPERVISOR_RW); |
1267 | 0 | } |
1268 | 8 | if ( s < map_s ) |
1269 | 2 | { |
1270 | 2 | map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT, |
1271 | 2 | (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR); |
1272 | 2 | init_boot_pages(s, map_s); |
1273 | 2 | } |
1274 | 8 | } |
1275 | 1 | |
1276 | 3 | for ( i = 0; i < mbi->mods_count; ++i ) |
1277 | 2 | { |
1278 | 2 | set_pdx_range(mod[i].mod_start, |
1279 | 2 | mod[i].mod_start + PFN_UP(mod[i].mod_end)); |
1280 | 2 | map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start), |
1281 | 2 | mod[i].mod_start, |
1282 | 2 | PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR); |
1283 | 2 | } |
1284 | 1 | |
1285 | 1 | #ifdef CONFIG_KEXEC |
1286 | 1 | if ( kexec_crash_area.size ) |
1287 | 0 | { |
1288 | 0 | unsigned long s = PFN_DOWN(kexec_crash_area.start); |
1289 | 0 | unsigned long e = min(s + PFN_UP(kexec_crash_area.size), |
1290 | 0 | PFN_UP(__pa(HYPERVISOR_VIRT_END - 1))); |
1291 | 0 |
|
1292 | 0 | if ( e > s ) |
1293 | 0 | map_pages_to_xen((unsigned long)__va(kexec_crash_area.start), |
1294 | 0 | s, e - s, PAGE_HYPERVISOR); |
1295 | 0 | } |
1296 | 1 | #endif |
1297 | 1 | |
1298 | 1 | xen_virt_end = ((unsigned long)_end + (1UL << L2_PAGETABLE_SHIFT) - 1) & |
1299 | 1 | ~((1UL << L2_PAGETABLE_SHIFT) - 1); |
1300 | 1 | destroy_xen_mappings(xen_virt_end, XEN_VIRT_START + BOOTSTRAP_MAP_BASE); |
1301 | 1 | |
1302 | 1 | /* |
1303 | 1 | * If not using 2M mappings to gain suitable pagetable permissions |
1304 | 1 | * directly from the relocation above, remap the code/data |
1305 | 1 | * sections with decreased permissions. |
1306 | 1 | */ |
1307 | 1 | if ( !using_2M_mapping() ) |
1308 | 1 | { |
1309 | 1 | /* Mark .text as RX (avoiding the first 2M superpage). */ |
1310 | 1 | modify_xen_mappings(XEN_VIRT_START + MB(2), |
1311 | 1 | (unsigned long)&__2M_text_end, |
1312 | 1 | PAGE_HYPERVISOR_RX); |
1313 | 1 | |
1314 | 1 | /* Mark .rodata as RO. */ |
1315 | 1 | modify_xen_mappings((unsigned long)&__2M_rodata_start, |
1316 | 1 | (unsigned long)&__2M_rodata_end, |
1317 | 1 | PAGE_HYPERVISOR_RO); |
1318 | 1 | |
1319 | 1 | /* Mark .data and .bss as RW. */ |
1320 | 1 | modify_xen_mappings((unsigned long)&__2M_rwdata_start, |
1321 | 1 | (unsigned long)&__2M_rwdata_end, |
1322 | 1 | PAGE_HYPERVISOR_RW); |
1323 | 1 | |
1324 | 1 | /* Drop the remaining mappings in the shattered superpage. */ |
1325 | 1 | destroy_xen_mappings((unsigned long)&__2M_rwdata_end, |
1326 | 1 | ROUNDUP((unsigned long)&__2M_rwdata_end, MB(2))); |
1327 | 1 | } |
1328 | 1 | |
1329 | 1 | nr_pages = 0; |
1330 | 20 | for ( i = 0; i < e820.nr_map; i++ ) |
1331 | 19 | if ( e820.map[i].type == E820_RAM ) |
1332 | 8 | nr_pages += e820.map[i].size >> PAGE_SHIFT; |
1333 | 1 | printk("System RAM: %luMB (%lukB)\n", |
1334 | 1 | nr_pages >> (20 - PAGE_SHIFT), |
1335 | 1 | nr_pages << (PAGE_SHIFT - 10)); |
1336 | 1 | total_pages = nr_pages; |
1337 | 1 | |
1338 | 1 | /* Sanity check for unwanted bloat of certain hypercall structures. */ |
1339 | 1 | BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) != |
1340 | 1 | sizeof(((struct xen_platform_op *)0)->u.pad)); |
1341 | 1 | BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) != |
1342 | 1 | sizeof(((struct xen_domctl *)0)->u.pad)); |
1343 | 1 | BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) != |
1344 | 1 | sizeof(((struct xen_sysctl *)0)->u.pad)); |
1345 | 1 | |
1346 | 1 | BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE); |
1347 | 1 | BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE); |
1348 | 1 | BUILD_BUG_ON(sizeof(struct vcpu_info) != 64); |
1349 | 1 | |
1350 | 1 | BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) != |
1351 | 1 | sizeof(((struct compat_platform_op *)0)->u.pad)); |
1352 | 1 | BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE); |
1353 | 1 | BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64); |
1354 | 1 | |
1355 | 1 | /* Check definitions in public headers match internal defs. */ |
1356 | 1 | BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START); |
1357 | 1 | BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END); |
1358 | 1 | BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START); |
1359 | 1 | BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END); |
1360 | 1 | |
1361 | 1 | init_frametable(); |
1362 | 1 | |
1363 | 1 | if ( !acpi_boot_table_init_done ) |
1364 | 0 | acpi_boot_table_init(); |
1365 | 1 | |
1366 | 1 | acpi_numa_init(); |
1367 | 1 | |
1368 | 1 | numa_initmem_init(0, raw_max_page); |
1369 | 1 | |
1370 | 1 | if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) ) |
1371 | 0 | { |
1372 | 0 | unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1); |
1373 | 0 | uint64_t mask = PAGE_SIZE - 1; |
1374 | 0 |
|
1375 | 0 | if ( !highmem_start ) |
1376 | 0 | xenheap_max_mfn(limit); |
1377 | 0 |
|
1378 | 0 | end_boot_allocator(); |
1379 | 0 |
|
1380 | 0 | /* Pass the remaining memory to the allocator. */ |
1381 | 0 | for ( i = 0; i < boot_e820.nr_map; i++ ) |
1382 | 0 | { |
1383 | 0 | uint64_t s, e; |
1384 | 0 |
|
1385 | 0 | if ( boot_e820.map[i].type != E820_RAM ) |
1386 | 0 | continue; |
1387 | 0 | s = (boot_e820.map[i].addr + mask) & ~mask; |
1388 | 0 | e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask; |
1389 | 0 | if ( PFN_DOWN(e) <= limit ) |
1390 | 0 | continue; |
1391 | 0 | if ( PFN_DOWN(s) <= limit ) |
1392 | 0 | s = pfn_to_paddr(limit + 1); |
1393 | 0 | init_domheap_pages(s, e); |
1394 | 0 | } |
1395 | 0 |
|
1396 | 0 | if ( tmem_enabled() ) |
1397 | 0 | { |
1398 | 0 | printk(XENLOG_WARNING |
1399 | 0 | "TMEM physical RAM limit exceeded, disabling TMEM\n"); |
1400 | 0 | tmem_disable(); |
1401 | 0 | } |
1402 | 0 | } |
1403 | 1 | else |
1404 | 1 | end_boot_allocator(); |
1405 | 1 | |
1406 | 1 | system_state = SYS_STATE_boot; |
1407 | 1 | /* |
1408 | 1 | * No calls involving ACPI code should go between the setting of |
1409 | 1 | * SYS_STATE_boot and vm_init() (or else acpi_os_{,un}map_memory() |
1410 | 1 | * will break). |
1411 | 1 | */ |
1412 | 1 | vm_init(); |
1413 | 1 | |
1414 | 1 | console_init_ring(); |
1415 | 1 | vesa_init(); |
1416 | 1 | |
1417 | 1 | softirq_init(); |
1418 | 1 | tasklet_subsys_init(); |
1419 | 1 | |
1420 | 1 | early_cpu_init(); |
1421 | 1 | |
1422 | 1 | paging_init(); |
1423 | 1 | |
1424 | 1 | tboot_probe(); |
1425 | 1 | |
1426 | 1 | open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period); |
1427 | 1 | |
1428 | 1 | if ( opt_watchdog ) |
1429 | 0 | nmi_watchdog = NMI_LOCAL_APIC; |
1430 | 1 | |
1431 | 1 | find_smp_config(); |
1432 | 1 | |
1433 | 1 | dmi_scan_machine(); |
1434 | 1 | |
1435 | 1 | generic_apic_probe(); |
1436 | 1 | |
1437 | 1 | acpi_boot_init(); |
1438 | 1 | |
1439 | 1 | if ( smp_found_config ) |
1440 | 1 | get_smp_config(); |
1441 | 1 | |
1442 | 1 | if ( opt_nosmp ) |
1443 | 0 | { |
1444 | 0 | max_cpus = 0; |
1445 | 0 | set_nr_cpu_ids(1); |
1446 | 0 | } |
1447 | 1 | else |
1448 | 1 | { |
1449 | 1 | set_nr_cpu_ids(max_cpus); |
1450 | 1 | max_cpus = nr_cpu_ids; |
1451 | 1 | } |
1452 | 1 | |
1453 | 1 | /* Low mappings were only needed for some BIOS table parsing. */ |
1454 | 1 | zap_low_mappings(); |
1455 | 1 | |
1456 | 1 | mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges", |
1457 | 1 | RANGESETF_prettyprint_hex); |
1458 | 1 | |
1459 | 1 | init_apic_mappings(); |
1460 | 1 | |
1461 | 1 | normalise_cpu_order(); |
1462 | 1 | |
1463 | 1 | init_cpu_to_node(); |
1464 | 1 | |
1465 | 1 | x2apic_bsp_setup(); |
1466 | 1 | |
1467 | 1 | init_IRQ(); |
1468 | 1 | |
1469 | 1 | module_map = xmalloc_array(unsigned long, BITS_TO_LONGS(mbi->mods_count)); |
1470 | 1 | bitmap_fill(module_map, mbi->mods_count); |
1471 | 1 | __clear_bit(0, module_map); /* Dom0 kernel is always first */ |
1472 | 1 | |
1473 | 1 | xsm_multiboot_init(module_map, mbi, bootstrap_map); |
1474 | 1 | |
1475 | 1 | microcode_grab_module(module_map, mbi, bootstrap_map); |
1476 | 1 | |
1477 | 1 | timer_init(); |
1478 | 1 | |
1479 | 1 | early_microcode_init(); |
1480 | 1 | |
1481 | 1 | identify_cpu(&boot_cpu_data); |
1482 | 1 | |
1483 | 1 | set_in_cr4(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT); |
1484 | 1 | |
1485 | 1 | if ( !opt_smep ) |
1486 | 0 | setup_clear_cpu_cap(X86_FEATURE_SMEP); |
1487 | 1 | if ( cpu_has_smep && opt_smep != SMEP_HVM_ONLY ) |
1488 | 1 | setup_force_cpu_cap(X86_FEATURE_XEN_SMEP); |
1489 | 1 | if ( boot_cpu_has(X86_FEATURE_XEN_SMEP) ) |
1490 | 1 | set_in_cr4(X86_CR4_SMEP); |
1491 | 1 | |
1492 | 1 | if ( !opt_smap ) |
1493 | 0 | setup_clear_cpu_cap(X86_FEATURE_SMAP); |
1494 | 1 | if ( cpu_has_smap && opt_smap != SMAP_HVM_ONLY ) |
1495 | 0 | setup_force_cpu_cap(X86_FEATURE_XEN_SMAP); |
1496 | 1 | if ( boot_cpu_has(X86_FEATURE_XEN_SMAP) ) |
1497 | 0 | set_in_cr4(X86_CR4_SMAP); |
1498 | 1 | |
1499 | 1 | cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS; |
1500 | 1 | |
1501 | 1 | if ( cpu_has_fsgsbase ) |
1502 | 1 | set_in_cr4(X86_CR4_FSGSBASE); |
1503 | 1 | |
1504 | 1 | init_idle_domain(); |
1505 | 1 | |
1506 | 1 | this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(), |
1507 | 1 | &this_cpu(stubs).mfn); |
1508 | 1 | BUG_ON(!this_cpu(stubs.addr)); |
1509 | 1 | |
1510 | 1 | trap_init(); |
1511 | 1 | |
1512 | 1 | rcu_init(); |
1513 | 1 | |
1514 | 1 | early_time_init(); |
1515 | 1 | |
1516 | 1 | arch_init_memory(); |
1517 | 1 | |
1518 | 1 | alternative_instructions(); |
1519 | 1 | |
1520 | 1 | local_irq_enable(); |
1521 | 1 | |
1522 | 1 | pt_pci_init(); |
1523 | 1 | |
1524 | 1 | vesa_mtrr_init(); |
1525 | 1 | |
1526 | 1 | acpi_mmcfg_init(); |
1527 | 1 | |
1528 | 1 | early_msi_init(); |
1529 | 1 | |
1530 | 1 | iommu_setup(); /* setup iommu if available */ |
1531 | 1 | |
1532 | 1 | smp_prepare_cpus(max_cpus); |
1533 | 1 | |
1534 | 1 | spin_debug_enable(); |
1535 | 1 | |
1536 | 1 | /* |
1537 | 1 | * Initialise higher-level timer functions. We do this fairly late |
1538 | 1 | * (after interrupts got enabled) because the time bases and scale |
1539 | 1 | * factors need to be updated regularly. |
1540 | 1 | */ |
1541 | 1 | init_xen_time(); |
1542 | 1 | |
1543 | 1 | initialize_keytable(); |
1544 | 1 | |
1545 | 1 | console_init_postirq(); |
1546 | 1 | |
1547 | 1 | system_state = SYS_STATE_smp_boot; |
1548 | 1 | |
1549 | 1 | do_presmp_initcalls(); |
1550 | 1 | |
1551 | 1 | for_each_present_cpu ( i ) |
1552 | 12 | { |
1553 | 12 | /* Set up cpu_to_node[]. */ |
1554 | 12 | srat_detect_node(i); |
1555 | 12 | /* Set up node_to_cpumask based on cpu_to_node[]. */ |
1556 | 12 | numa_add_cpu(i); |
1557 | 12 | |
1558 | 12 | if ( (num_online_cpus() < max_cpus) && !cpu_online(i) ) |
1559 | 11 | { |
1560 | 11 | int ret = cpu_up(i); |
1561 | 11 | if ( ret != 0 ) |
1562 | 0 | printk("Failed to bring up CPU %u (error %d)\n", i, ret); |
1563 | 11 | } |
1564 | 12 | } |
1565 | 1 | |
1566 | 1 | printk("Brought up %ld CPUs\n", (long)num_online_cpus()); |
1567 | 1 | smp_cpus_done(); |
1568 | 1 | |
1569 | 1 | do_initcalls(); |
1570 | 1 | |
1571 | 1 | if ( opt_watchdog ) |
1572 | 0 | watchdog_setup(); |
1573 | 1 | |
1574 | 1 | if ( !tboot_protect_mem_regions() ) |
1575 | 0 | panic("Could not protect TXT memory regions"); |
1576 | 1 | |
1577 | 1 | init_guest_cpuid(); |
1578 | 1 | init_guest_msr_policy(); |
1579 | 1 | |
1580 | 1 | if ( dom0_pvh ) |
1581 | 1 | { |
1582 | 1 | domcr_flags |= DOMCRF_hvm | |
1583 | 1 | ((hvm_funcs.hap_supported && !opt_dom0_shadow) ? |
1584 | 1 | DOMCRF_hap : 0); |
1585 | 1 | config.emulation_flags = XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC| |
1586 | 1 | XEN_X86_EMU_VPCI; |
1587 | 1 | } |
1588 | 1 | |
1589 | 1 | /* Create initial domain 0. */ |
1590 | 1 | dom0 = domain_create(0, domcr_flags, 0, &config); |
1591 | 1 | if ( IS_ERR(dom0) || (alloc_dom0_vcpu0(dom0) == NULL) ) |
1592 | 0 | panic("Error creating domain 0"); |
1593 | 1 | |
1594 | 1 | dom0->is_privileged = 1; |
1595 | 1 | dom0->target = NULL; |
1596 | 1 | |
1597 | 1 | /* Grab the DOM0 command line. */ |
1598 | 1 | cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL); |
1599 | 1 | if ( (cmdline != NULL) || (kextra != NULL) ) |
1600 | 0 | { |
1601 | 0 | static char __initdata dom0_cmdline[MAX_GUEST_CMDLINE]; |
1602 | 0 |
|
1603 | 0 | cmdline = cmdline_cook(cmdline, loader); |
1604 | 0 | safe_strcpy(dom0_cmdline, cmdline); |
1605 | 0 |
|
1606 | 0 | if ( kextra != NULL ) |
1607 | 0 | /* kextra always includes exactly one leading space. */ |
1608 | 0 | safe_strcat(dom0_cmdline, kextra); |
1609 | 0 |
|
1610 | 0 | /* Append any extra parameters. */ |
1611 | 0 | if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") ) |
1612 | 0 | safe_strcat(dom0_cmdline, " noapic"); |
1613 | 0 | if ( (strlen(acpi_param) == 0) && acpi_disabled ) |
1614 | 0 | { |
1615 | 0 | printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n"); |
1616 | 0 | safe_strcpy(acpi_param, "off"); |
1617 | 0 | } |
1618 | 0 | if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") ) |
1619 | 0 | { |
1620 | 0 | safe_strcat(dom0_cmdline, " acpi="); |
1621 | 0 | safe_strcat(dom0_cmdline, acpi_param); |
1622 | 0 | } |
1623 | 0 |
|
1624 | 0 | cmdline = dom0_cmdline; |
1625 | 0 | } |
1626 | 1 | |
1627 | 1 | if ( xen_cpuidle ) |
1628 | 1 | xen_processor_pmbits |= XEN_PROCESSOR_PM_CX; |
1629 | 1 | |
1630 | 1 | initrdidx = find_first_bit(module_map, mbi->mods_count); |
1631 | 1 | if ( bitmap_weight(module_map, mbi->mods_count) > 1 ) |
1632 | 0 | printk(XENLOG_WARNING |
1633 | 0 | "Multiple initrd candidates, picking module #%u\n", |
1634 | 0 | initrdidx); |
1635 | 1 | |
1636 | 1 | /* |
1637 | 1 | * Temporarily clear SMAP in CR4 to allow user-accesses in construct_dom0(). |
1638 | 1 | * This saves a large number of corner cases interactions with |
1639 | 1 | * copy_from_user(). |
1640 | 1 | */ |
1641 | 1 | if ( cpu_has_smap ) |
1642 | 0 | { |
1643 | 0 | cr4_pv32_mask &= ~X86_CR4_SMAP; |
1644 | 0 | write_cr4(read_cr4() & ~X86_CR4_SMAP); |
1645 | 0 | } |
1646 | 1 | |
1647 | 1 | printk("%sNX (Execute Disable) protection %sactive\n", |
1648 | 1 | cpu_has_nx ? XENLOG_INFO : XENLOG_WARNING "Warning: ", |
1649 | 1 | cpu_has_nx ? "" : "not "); |
1650 | 1 | |
1651 | 1 | /* |
1652 | 1 | * We're going to setup domain0 using the module(s) that we stashed safely |
1653 | 1 | * above our heap. The second module, if present, is an initrd ramdisk. |
1654 | 1 | */ |
1655 | 1 | if ( construct_dom0(dom0, mod, modules_headroom, |
1656 | 1 | (initrdidx > 0) && (initrdidx < mbi->mods_count) |
1657 | 1 | ? mod + initrdidx : NULL, |
1658 | 1 | bootstrap_map, cmdline) != 0) |
1659 | 0 | panic("Could not set up DOM0 guest OS"); |
1660 | 1 | |
1661 | 1 | if ( cpu_has_smap ) |
1662 | 0 | { |
1663 | 0 | write_cr4(read_cr4() | X86_CR4_SMAP); |
1664 | 0 | cr4_pv32_mask |= X86_CR4_SMAP; |
1665 | 0 | } |
1666 | 1 | |
1667 | 1 | heap_init_late(); |
1668 | 1 | |
1669 | 1 | init_trace_bufs(); |
1670 | 1 | |
1671 | 1 | init_constructors(); |
1672 | 1 | |
1673 | 1 | console_endboot(); |
1674 | 1 | |
1675 | 1 | /* Hide UART from DOM0 if we're using it */ |
1676 | 1 | serial_endboot(); |
1677 | 1 | |
1678 | 1 | dmi_end_boot(); |
1679 | 1 | |
1680 | 1 | setup_io_bitmap(dom0); |
1681 | 1 | |
1682 | 1 | /* Jump to the 1:1 virtual mappings of cpu0_stack. */ |
1683 | 1 | asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" :: |
1684 | 1 | [stk] "g" (__va(__pa(get_stack_bottom()))), |
1685 | 1 | [fn] "i" (reinit_bsp_stack) : "memory"); |
1686 | 1 | unreachable(); |
1687 | 1 | } |
1688 | | |
1689 | | void arch_get_xen_caps(xen_capabilities_info_t *info) |
1690 | 0 | { |
1691 | 0 | /* Interface name is always xen-3.0-* for Xen-3.x. */ |
1692 | 0 | int major = 3, minor = 0; |
1693 | 0 | char s[32]; |
1694 | 0 |
|
1695 | 0 | (*info)[0] = '\0'; |
1696 | 0 |
|
1697 | 0 | snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor); |
1698 | 0 | safe_strcat(*info, s); |
1699 | 0 | snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor); |
1700 | 0 | safe_strcat(*info, s); |
1701 | 0 | if ( hvm_enabled ) |
1702 | 0 | { |
1703 | 0 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor); |
1704 | 0 | safe_strcat(*info, s); |
1705 | 0 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor); |
1706 | 0 | safe_strcat(*info, s); |
1707 | 0 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor); |
1708 | 0 | safe_strcat(*info, s); |
1709 | 0 | } |
1710 | 0 | } |
1711 | | |
1712 | | int __hwdom_init xen_in_range(unsigned long mfn) |
1713 | 0 | { |
1714 | 0 | paddr_t start, end; |
1715 | 0 | int i; |
1716 | 0 |
|
1717 | 0 | enum { region_s3, region_ro, region_rw, nr_regions }; |
1718 | 0 | static struct { |
1719 | 0 | paddr_t s, e; |
1720 | 0 | } xen_regions[nr_regions] __hwdom_initdata; |
1721 | 0 |
|
1722 | 0 | /* initialize first time */ |
1723 | 0 | if ( !xen_regions[0].s ) |
1724 | 0 | { |
1725 | 0 | /* S3 resume code (and other real mode trampoline code) */ |
1726 | 0 | xen_regions[region_s3].s = bootsym_phys(trampoline_start); |
1727 | 0 | xen_regions[region_s3].e = bootsym_phys(trampoline_end); |
1728 | 0 |
|
1729 | 0 | /* |
1730 | 0 | * This needs to remain in sync with the uses of the same symbols in |
1731 | 0 | * - __start_xen() (above) |
1732 | 0 | * - is_xen_fixed_mfn() |
1733 | 0 | * - tboot_shutdown() |
1734 | 0 | */ |
1735 | 0 |
|
1736 | 0 | /* hypervisor .text + .rodata */ |
1737 | 0 | xen_regions[region_ro].s = __pa(&_stext); |
1738 | 0 | xen_regions[region_ro].e = __pa(&__2M_rodata_end); |
1739 | 0 | /* hypervisor .data + .bss */ |
1740 | 0 | xen_regions[region_rw].s = __pa(&__2M_rwdata_start); |
1741 | 0 | xen_regions[region_rw].e = __pa(&__2M_rwdata_end); |
1742 | 0 | } |
1743 | 0 |
|
1744 | 0 | start = (paddr_t)mfn << PAGE_SHIFT; |
1745 | 0 | end = start + PAGE_SIZE; |
1746 | 0 | for ( i = 0; i < nr_regions; i++ ) |
1747 | 0 | if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) ) |
1748 | 0 | return 1; |
1749 | 0 |
|
1750 | 0 | return 0; |
1751 | 0 | } |
1752 | | |
1753 | | static int __hwdom_init io_bitmap_cb(unsigned long s, unsigned long e, |
1754 | | void *ctx) |
1755 | 8 | { |
1756 | 8 | struct domain *d = ctx; |
1757 | 8 | unsigned int i; |
1758 | 8 | |
1759 | 8 | ASSERT(e <= INT_MAX); |
1760 | 65.5k | for ( i = s; i <= e; i++ ) |
1761 | 65.5k | __clear_bit(i, d->arch.hvm_domain.io_bitmap); |
1762 | 8 | |
1763 | 8 | return 0; |
1764 | 8 | } |
1765 | | |
1766 | | void __hwdom_init setup_io_bitmap(struct domain *d) |
1767 | 1 | { |
1768 | 1 | int rc; |
1769 | 1 | |
1770 | 1 | if ( is_hvm_domain(d) ) |
1771 | 1 | { |
1772 | 1 | bitmap_fill(d->arch.hvm_domain.io_bitmap, 0x10000); |
1773 | 1 | rc = rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000, |
1774 | 1 | io_bitmap_cb, d); |
1775 | 1 | BUG_ON(rc); |
1776 | 1 | /* |
1777 | 1 | * NB: we need to trap accesses to 0xcf8 in order to intercept |
1778 | 1 | * 4 byte accesses, that need to be handled by Xen in order to |
1779 | 1 | * keep consistency. |
1780 | 1 | * Access to 1 byte RTC ports also needs to be trapped in order |
1781 | 1 | * to keep consistency with PV. |
1782 | 1 | */ |
1783 | 1 | __set_bit(0xcf8, d->arch.hvm_domain.io_bitmap); |
1784 | 1 | __set_bit(RTC_PORT(0), d->arch.hvm_domain.io_bitmap); |
1785 | 1 | __set_bit(RTC_PORT(1), d->arch.hvm_domain.io_bitmap); |
1786 | 1 | } |
1787 | 1 | } |
1788 | | |
1789 | | /* |
1790 | | * Local variables: |
1791 | | * mode: C |
1792 | | * c-file-style: "BSD" |
1793 | | * c-basic-offset: 4 |
1794 | | * tab-width: 4 |
1795 | | * indent-tabs-mode: nil |
1796 | | * End: |
1797 | | */ |