Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/setup.c
Line
Count
Source (jump to first uncovered line)
1
#include <xen/init.h>
2
#include <xen/lib.h>
3
#include <xen/err.h>
4
#include <xen/sched.h>
5
#include <xen/sched-if.h>
6
#include <xen/domain.h>
7
#include <xen/serial.h>
8
#include <xen/softirq.h>
9
#include <xen/acpi.h>
10
#include <xen/efi.h>
11
#include <xen/console.h>
12
#include <xen/serial.h>
13
#include <xen/trace.h>
14
#include <xen/multiboot.h>
15
#include <xen/domain_page.h>
16
#include <xen/version.h>
17
#include <xen/gdbstub.h>
18
#include <xen/percpu.h>
19
#include <xen/hypercall.h>
20
#include <xen/keyhandler.h>
21
#include <xen/numa.h>
22
#include <xen/rcupdate.h>
23
#include <xen/vga.h>
24
#include <xen/dmi.h>
25
#include <xen/pfn.h>
26
#include <xen/nodemask.h>
27
#include <xen/tmem_xen.h>
28
#include <xen/virtual_region.h>
29
#include <xen/watchdog.h>
30
#include <public/version.h>
31
#include <compat/platform.h>
32
#include <compat/xen.h>
33
#include <xen/bitops.h>
34
#include <asm/smp.h>
35
#include <asm/processor.h>
36
#include <asm/mpspec.h>
37
#include <asm/apic.h>
38
#include <asm/msi.h>
39
#include <asm/desc.h>
40
#include <asm/paging.h>
41
#include <asm/e820.h>
42
#include <xen/kexec.h>
43
#include <asm/edd.h>
44
#include <xsm/xsm.h>
45
#include <asm/tboot.h>
46
#include <asm/bzimage.h> /* for bzimage_headroom */
47
#include <asm/mach-generic/mach_apic.h> /* for generic_apic_probe */
48
#include <asm/setup.h>
49
#include <xen/cpu.h>
50
#include <asm/nmi.h>
51
#include <asm/alternative.h>
52
#include <asm/mc146818rtc.h>
53
#include <asm/cpuid.h>
54
55
/* opt_nosmp: If true, secondary processors are ignored. */
56
static bool __initdata opt_nosmp;
57
boolean_param("nosmp", opt_nosmp);
58
59
/* maxcpus: maximum number of CPUs to activate. */
60
static unsigned int __initdata max_cpus;
61
integer_param("maxcpus", max_cpus);
62
63
unsigned long __read_mostly cr4_pv32_mask;
64
65
/* **** Linux config option: propagated to domain0. */
66
/* "acpi=off":    Sisables both ACPI table parsing and interpreter. */
67
/* "acpi=force":  Override the disable blacklist.                   */
68
/* "acpi=ht":     Limit ACPI just to boot-time to enable HT.        */
69
/* "acpi=noirq":  Disables ACPI interrupt routing.                  */
70
static int parse_acpi_param(const char *s);
71
custom_param("acpi", parse_acpi_param);
72
73
/* **** Linux config option: propagated to domain0. */
74
/* noapic: Disable IOAPIC setup. */
75
boolean_param("noapic", skip_ioapic_setup);
76
77
/* **** Linux config option: propagated to domain0. */
78
/* xen_cpuidle: xen control cstate. */
79
s8 __read_mostly xen_cpuidle = -1;
80
boolean_param("cpuidle", xen_cpuidle);
81
82
#ifndef NDEBUG
83
unsigned long __initdata highmem_start;
84
size_param("highmem-start", highmem_start);
85
#endif
86
87
cpumask_t __read_mostly cpu_present_map;
88
89
unsigned long __read_mostly xen_phys_start;
90
91
unsigned long __read_mostly xen_virt_end;
92
93
DEFINE_PER_CPU(struct tss_struct, init_tss);
94
95
char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
96
    cpu0_stack[STACK_SIZE];
97
98
struct cpuinfo_x86 __read_mostly boot_cpu_data = { 0, 0, 0, 0, -1 };
99
100
unsigned long __read_mostly mmu_cr4_features = XEN_MINIMAL_CR4;
101
102
/* smep: Enable/disable Supervisor Mode Execution Protection (default on). */
103
1
#define SMEP_HVM_ONLY (-1)
104
static s8 __initdata opt_smep = 1;
105
106
static int __init parse_smep_param(const char *s)
107
0
{
108
0
    if ( !*s )
109
0
    {
110
0
        opt_smep = 1;
111
0
        return 0;
112
0
    }
113
0
114
0
    switch ( parse_bool(s, NULL) )
115
0
    {
116
0
    case 0:
117
0
        opt_smep = 0;
118
0
        return 0;
119
0
    case 1:
120
0
        opt_smep = 1;
121
0
        return 0;
122
0
    }
123
0
124
0
    if ( !strcmp(s, "hvm") )
125
0
        opt_smep = SMEP_HVM_ONLY;
126
0
    else
127
0
        return -EINVAL;
128
0
129
0
    return 0;
130
0
}
131
custom_param("smep", parse_smep_param);
132
133
/* smap: Enable/disable Supervisor Mode Access Prevention (default on). */
134
0
#define SMAP_HVM_ONLY (-1)
135
static s8 __initdata opt_smap = 1;
136
137
static int __init parse_smap_param(const char *s)
138
0
{
139
0
    if ( !*s )
140
0
    {
141
0
        opt_smap = 1;
142
0
        return 0;
143
0
    }
144
0
145
0
    switch ( parse_bool(s, NULL) )
146
0
    {
147
0
    case 0:
148
0
        opt_smap = 0;
149
0
        return 0;
150
0
    case 1:
151
0
        opt_smap = 1;
152
0
        return 0;
153
0
    }
154
0
155
0
    if ( !strcmp(s, "hvm") )
156
0
        opt_smap = SMAP_HVM_ONLY;
157
0
    else
158
0
        return -EINVAL;
159
0
160
0
    return 0;
161
0
}
162
custom_param("smap", parse_smap_param);
163
164
bool __read_mostly acpi_disabled;
165
bool __initdata acpi_force;
166
static char __initdata acpi_param[10] = "";
167
168
static int __init parse_acpi_param(const char *s)
169
0
{
170
0
    /* Save the parameter so it can be propagated to domain0. */
171
0
    safe_strcpy(acpi_param, s);
172
0
173
0
    /* Interpret the parameter for use within Xen. */
174
0
    if ( !parse_bool(s, NULL) )
175
0
    {
176
0
        disable_acpi();
177
0
    }
178
0
    else if ( !strcmp(s, "force") )
179
0
    {
180
0
        acpi_force = true;
181
0
        acpi_ht = 1;
182
0
        acpi_disabled = false;
183
0
    }
184
0
    else if ( !strcmp(s, "ht") )
185
0
    {
186
0
        if ( !acpi_force )
187
0
            disable_acpi();
188
0
        acpi_ht = 1;
189
0
    }
190
0
    else if ( !strcmp(s, "noirq") )
191
0
    {
192
0
        acpi_noirq_set();
193
0
    }
194
0
    else
195
0
        return -EINVAL;
196
0
197
0
    return 0;
198
0
}
199
200
static const module_t *__initdata initial_images;
201
static unsigned int __initdata nr_initial_images;
202
203
unsigned long __init initial_images_nrpages(nodeid_t node)
204
1
{
205
1
    unsigned long node_start = node_start_pfn(node);
206
1
    unsigned long node_end = node_end_pfn(node);
207
1
    unsigned long nr;
208
1
    unsigned int i;
209
1
210
3
    for ( nr = i = 0; i < nr_initial_images; ++i )
211
2
    {
212
2
        unsigned long start = initial_images[i].mod_start;
213
2
        unsigned long end = start + PFN_UP(initial_images[i].mod_end);
214
2
215
2
        if ( end > node_start && node_end > start )
216
2
            nr += min(node_end, end) - max(node_start, start);
217
2
    }
218
1
219
1
    return nr;
220
1
}
221
222
void __init discard_initial_images(void)
223
1
{
224
1
    unsigned int i;
225
1
226
3
    for ( i = 0; i < nr_initial_images; ++i )
227
2
    {
228
2
        uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT;
229
2
230
2
        init_domheap_pages(start,
231
2
                           start + PAGE_ALIGN(initial_images[i].mod_end));
232
2
    }
233
1
234
1
    nr_initial_images = 0;
235
1
    initial_images = NULL;
236
1
}
237
238
extern char __init_begin[], __init_end[], __bss_start[], __bss_end[];
239
240
static void __init init_idle_domain(void)
241
1
{
242
1
    scheduler_init();
243
1
    set_current(idle_vcpu[0]);
244
1
    this_cpu(curr_vcpu) = current;
245
1
}
246
247
void srat_detect_node(int cpu)
248
12
{
249
12
    nodeid_t node;
250
12
    u32 apicid = x86_cpu_to_apicid[cpu];
251
12
252
12
    node = apicid < MAX_LOCAL_APIC ? apicid_to_node[apicid] : NUMA_NO_NODE;
253
12
    if ( node == NUMA_NO_NODE )
254
12
        node = 0;
255
12
256
12
    node_set_online(node);
257
12
    numa_set_node(cpu, node);
258
12
259
12
    if ( opt_cpu_info && acpi_numa > 0 )
260
0
        printk("CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
261
12
}
262
263
/*
264
 * Sort CPUs by <node,package,core,thread> tuple. Fortunately this hierarchy is
265
 * reflected in the structure of modern APIC identifiers, so we sort based on
266
 * those. This is slightly complicated by the fact that the BSP must remain
267
 * CPU 0. Hence we do a variation on longest-prefix matching to do the best we
268
 * can while keeping CPU 0 static.
269
 */
270
static void __init normalise_cpu_order(void)
271
1
{
272
1
    unsigned int i, j, min_cpu;
273
1
    uint32_t apicid, diff, min_diff;
274
1
275
1
    for_each_present_cpu ( i )
276
12
    {
277
12
        apicid = x86_cpu_to_apicid[i];
278
12
        min_diff = min_cpu = ~0u;
279
12
280
12
        /*
281
12
         * Find remaining CPU with longest-prefix match on APIC ID.
282
12
         * Among identical longest-prefix matches, pick the smallest APIC ID.
283
12
         */
284
12
        for ( j = cpumask_next(i, &cpu_present_map);
285
78
              j < nr_cpu_ids;
286
66
              j = cpumask_next(j, &cpu_present_map) )
287
66
        {
288
66
            diff = x86_cpu_to_apicid[j] ^ apicid;
289
136
            while ( diff & (diff-1) )
290
70
                diff &= diff-1;
291
66
            if ( (diff < min_diff) ||
292
46
                 ((diff == min_diff) &&
293
14
                  (x86_cpu_to_apicid[j] < x86_cpu_to_apicid[min_cpu])) )
294
23
            {
295
23
                min_diff = diff;
296
23
                min_cpu = j;
297
23
            }
298
66
        }
299
12
300
12
        /* If no match then there must be no CPUs remaining to consider. */
301
12
        if ( min_cpu >= nr_cpu_ids )
302
1
        {
303
1
            BUG_ON(cpumask_next(i, &cpu_present_map) < nr_cpu_ids);
304
1
            break;
305
1
        }
306
12
307
12
        /* Switch the best-matching CPU with the next CPU in logical order. */
308
11
        j = cpumask_next(i, &cpu_present_map);
309
11
        apicid = x86_cpu_to_apicid[min_cpu];
310
11
        x86_cpu_to_apicid[min_cpu] = x86_cpu_to_apicid[j];
311
11
        x86_cpu_to_apicid[j] = apicid;
312
11
    }
313
1
}
314
315
27
#define BOOTSTRAP_MAP_BASE  (16UL << 20)
316
14
#define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT)
317
318
/*
319
 * Ensure a given physical memory range is present in the bootstrap mappings.
320
 * Use superpage mappings to ensure that pagetable memory needn't be allocated.
321
 */
322
static void *__init bootstrap_map(const module_t *mod)
323
12
{
324
12
    static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE;
325
12
    uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1;
326
12
    void *ret;
327
12
328
12
    if ( system_state != SYS_STATE_early_boot )
329
1
        return mod ? mfn_to_virt(mod->mod_start) : NULL;
330
12
331
11
    if ( !mod )
332
4
    {
333
4
        destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT);
334
4
        map_cur = BOOTSTRAP_MAP_BASE;
335
4
        return NULL;
336
4
    }
337
11
338
7
    start = (uint64_t)mod->mod_start << PAGE_SHIFT;
339
7
    end = start + mod->mod_end;
340
7
    if ( start >= end )
341
0
        return NULL;
342
7
343
7
    ret = (void *)(map_cur + (unsigned long)(start & mask));
344
7
    start &= ~mask;
345
7
    end = (end + mask) & ~mask;
346
7
    if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur )
347
0
        return NULL;
348
7
349
7
    map_pages_to_xen(map_cur, start >> PAGE_SHIFT,
350
7
                     (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
351
7
    map_cur += end - start;
352
7
    return ret;
353
7
}
354
355
static void *__init move_memory(
356
    uint64_t dst, uint64_t src, unsigned int size, bool keep)
357
3
{
358
3
    unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE;
359
3
    unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1;
360
3
361
3
    if ( src + size > BOOTSTRAP_MAP_BASE )
362
2
        blksz >>= 1;
363
3
364
6
    while ( size )
365
3
    {
366
3
        module_t mod;
367
3
        unsigned int soffs = src & mask;
368
3
        unsigned int doffs = dst & mask;
369
3
        unsigned int sz;
370
3
        void *d, *s;
371
3
372
3
        mod.mod_start = (src - soffs) >> PAGE_SHIFT;
373
3
        mod.mod_end = soffs + size;
374
3
        if ( mod.mod_end > blksz )
375
0
            mod.mod_end = blksz;
376
3
        sz = mod.mod_end - soffs;
377
3
        s = bootstrap_map(&mod);
378
3
379
3
        mod.mod_start = (dst - doffs) >> PAGE_SHIFT;
380
3
        mod.mod_end = doffs + size;
381
3
        if ( mod.mod_end > blksz )
382
0
            mod.mod_end = blksz;
383
3
        if ( sz > mod.mod_end - doffs )
384
0
            sz = mod.mod_end - doffs;
385
3
        d = bootstrap_map(&mod);
386
3
387
3
        memmove(d + doffs, s + soffs, sz);
388
3
389
3
        dst += sz;
390
3
        src += sz;
391
3
        size -= sz;
392
3
393
3
        if ( keep )
394
0
            return size ? NULL : d + doffs;
395
3
396
3
        bootstrap_map(NULL);
397
3
    }
398
3
399
3
    return NULL;
400
3
}
401
402
static uint64_t __init consider_modules(
403
    uint64_t s, uint64_t e, uint32_t size, const module_t *mod,
404
    unsigned int nr_mods, unsigned int this_mod)
405
9
{
406
9
    unsigned int i;
407
9
408
9
    if ( s > e || e - s < size )
409
2
        return 0;
410
9
411
12
    for ( i = 0; i < nr_mods ; ++i )
412
8
    {
413
8
        uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
414
8
        uint64_t end = start + PAGE_ALIGN(mod[i].mod_end);
415
8
416
8
        if ( i == this_mod )
417
2
            continue;
418
8
419
6
        if ( s < end && start < e )
420
3
        {
421
3
            end = consider_modules(end, e, size, mod + i + 1,
422
3
                                   nr_mods - i - 1, this_mod - i - 1);
423
3
            if ( end )
424
2
                return end;
425
3
426
1
            return consider_modules(s, start, size, mod + i + 1,
427
1
                                    nr_mods - i - 1, this_mod - i - 1);
428
3
        }
429
6
    }
430
7
431
4
    return e;
432
7
}
433
434
static void __init setup_max_pdx(unsigned long top_page)
435
2
{
436
2
    max_pdx = pfn_to_pdx(top_page - 1) + 1;
437
2
438
2
    if ( max_pdx > (DIRECTMAP_SIZE >> PAGE_SHIFT) )
439
0
        max_pdx = DIRECTMAP_SIZE >> PAGE_SHIFT;
440
2
441
2
    if ( max_pdx > FRAMETABLE_NR )
442
0
        max_pdx = FRAMETABLE_NR;
443
2
444
2
    if ( max_pdx > MPT_VIRT_SIZE / sizeof(unsigned long) )
445
0
        max_pdx = MPT_VIRT_SIZE / sizeof(unsigned long);
446
2
447
2
#ifdef PAGE_LIST_NULL
448
2
    if ( max_pdx >= PAGE_LIST_NULL )
449
0
        max_pdx = PAGE_LIST_NULL - 1;
450
2
#endif
451
2
452
2
    max_page = pdx_to_pfn(max_pdx - 1) + 1;
453
2
}
454
455
/* A temporary copy of the e820 map that we can mess with during bootstrap. */
456
static struct e820map __initdata boot_e820;
457
458
struct boot_video_info {
459
    u8  orig_x;             /* 0x00 */
460
    u8  orig_y;             /* 0x01 */
461
    u8  orig_video_mode;    /* 0x02 */
462
    u8  orig_video_cols;    /* 0x03 */
463
    u8  orig_video_lines;   /* 0x04 */
464
    u8  orig_video_isVGA;   /* 0x05 */
465
    u16 orig_video_points;  /* 0x06 */
466
467
    /* VESA graphic mode -- linear frame buffer */
468
    u32 capabilities;       /* 0x08 */
469
    u16 lfb_linelength;     /* 0x0c */
470
    u16 lfb_width;          /* 0x0e */
471
    u16 lfb_height;         /* 0x10 */
472
    u16 lfb_depth;          /* 0x12 */
473
    u32 lfb_base;           /* 0x14 */
474
    u32 lfb_size;           /* 0x18 */
475
    u8  red_size;           /* 0x1c */
476
    u8  red_pos;            /* 0x1d */
477
    u8  green_size;         /* 0x1e */
478
    u8  green_pos;          /* 0x1f */
479
    u8  blue_size;          /* 0x20 */
480
    u8  blue_pos;           /* 0x21 */
481
    u8  rsvd_size;          /* 0x22 */
482
    u8  rsvd_pos;           /* 0x23 */
483
    u16 vesapm_seg;         /* 0x24 */
484
    u16 vesapm_off;         /* 0x26 */
485
    u16 vesa_attrib;        /* 0x28 */
486
};
487
extern struct boot_video_info boot_vid_info;
488
489
static void __init parse_video_info(void)
490
1
{
491
1
    struct boot_video_info *bvi = &bootsym(boot_vid_info);
492
1
493
1
    /* vga_console_info is filled directly on EFI platform. */
494
1
    if ( efi_enabled(EFI_BOOT) )
495
0
        return;
496
1
497
1
    if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) )
498
1
    {
499
1
        vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
500
1
        vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points;
501
1
        vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x;
502
1
        vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y;
503
1
        vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines;
504
1
        vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols;
505
1
    }
506
0
    else if ( bvi->orig_video_isVGA == 0x23 )
507
0
    {
508
0
        vga_console_info.video_type = XEN_VGATYPE_VESA_LFB;
509
0
        vga_console_info.u.vesa_lfb.width = bvi->lfb_width;
510
0
        vga_console_info.u.vesa_lfb.height = bvi->lfb_height;
511
0
        vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength;
512
0
        vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth;
513
0
        vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base;
514
0
        vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size;
515
0
        vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos;
516
0
        vga_console_info.u.vesa_lfb.red_size = bvi->red_size;
517
0
        vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos;
518
0
        vga_console_info.u.vesa_lfb.green_size = bvi->green_size;
519
0
        vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos;
520
0
        vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
521
0
        vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
522
0
        vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
523
0
        vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities;
524
0
        vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib;
525
0
    }
526
1
}
527
528
static void __init kexec_reserve_area(struct e820map *e820)
529
2
{
530
2
#ifdef CONFIG_KEXEC
531
2
    unsigned long kdump_start = kexec_crash_area.start;
532
2
    unsigned long kdump_size  = kexec_crash_area.size;
533
2
    static bool __initdata is_reserved = false;
534
2
535
2
    kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
536
2
537
2
    if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved )
538
2
        return;
539
2
540
0
    is_reserved = true;
541
0
542
0
    if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) )
543
0
    {
544
0
        printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at %#lx)"
545
0
               "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
546
0
        kexec_crash_area.start = kexec_crash_area.size = 0;
547
0
    }
548
0
    else
549
0
    {
550
0
        printk("Kdump: %luMB (%lukB) at %#lx\n",
551
0
               kdump_size >> 20, kdump_size >> 10, kdump_start);
552
0
    }
553
0
#endif
554
0
}
555
556
static inline bool using_2M_mapping(void)
557
2
{
558
2
    return !l1_table_offset((unsigned long)__2M_text_end) &&
559
0
           !l1_table_offset((unsigned long)__2M_rodata_start) &&
560
0
           !l1_table_offset((unsigned long)__2M_rodata_end) &&
561
0
           !l1_table_offset((unsigned long)__2M_init_start) &&
562
0
           !l1_table_offset((unsigned long)__2M_init_end) &&
563
0
           !l1_table_offset((unsigned long)__2M_rwdata_start) &&
564
0
           !l1_table_offset((unsigned long)__2M_rwdata_end);
565
2
}
566
567
static void noinline init_done(void)
568
1
{
569
1
    void *va;
570
1
    unsigned long start, end;
571
1
572
1
    system_state = SYS_STATE_active;
573
1
574
1
    /* MUST be done prior to removing .init data. */
575
1
    unregister_init_virtual_region();
576
1
577
1
    domain_unpause_by_systemcontroller(hardware_domain);
578
1
579
1
    /* Zero the .init code and data. */
580
113
    for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE )
581
112
        clear_page(va);
582
1
583
1
    /* Destroy Xen's mappings, and reuse the pages. */
584
1
    if ( using_2M_mapping() )
585
0
    {
586
0
        start = (unsigned long)&__2M_init_start,
587
0
        end   = (unsigned long)&__2M_init_end;
588
0
    }
589
1
    else
590
1
    {
591
1
        start = (unsigned long)&__init_begin;
592
1
        end   = (unsigned long)&__init_end;
593
1
    }
594
1
595
1
    destroy_xen_mappings(start, end);
596
1
    init_xenheap_pages(__pa(start), __pa(end));
597
1
    printk("Freed %lukB init memory\n", (end - start) >> 10);
598
1
599
1
    startup_cpu_idle_loop();
600
1
}
601
602
/* Reinitalise all state referring to the old virtual address of the stack. */
603
static void __init noreturn reinit_bsp_stack(void)
604
1
{
605
1
    unsigned long *stack = (void*)(get_stack_bottom() & ~(STACK_SIZE - 1));
606
1
607
1
    /* Update TSS and ISTs */
608
1
    load_system_tables();
609
1
610
1
    /* Update SYSCALL trampolines */
611
1
    percpu_traps_init();
612
1
613
1
    stack_base[0] = stack;
614
1
    memguard_guard_stack(stack);
615
1
616
1
    reset_stack_and_jump(init_done);
617
1
}
618
619
static bool __init loader_is_grub2(const char *loader_name)
620
1
{
621
1
    /* GRUB1="GNU GRUB 0.xx"; GRUB2="GRUB 1.xx" */
622
1
    const char *p = strstr(loader_name, "GRUB ");
623
1
    return (p != NULL) && (p[5] != '0');
624
1
}
625
626
static char * __init cmdline_cook(char *p, const char *loader_name)
627
1
{
628
0
    p = p ? : "";
629
1
630
1
    /* Strip leading whitespace. */
631
1
    while ( *p == ' ' )
632
0
        p++;
633
1
634
1
    /* GRUB2 does not include image name as first item on command line. */
635
1
    if ( loader_is_grub2(loader_name) )
636
0
        return p;
637
1
638
1
    /* Strip image name plus whitespace. */
639
10
    while ( (*p != ' ') && (*p != '\0') )
640
9
        p++;
641
2
    while ( *p == ' ' )
642
1
        p++;
643
1
644
1
    return p;
645
1
}
646
647
void __init noreturn __start_xen(unsigned long mbi_p)
648
1
{
649
1
    char *memmap_type = NULL;
650
1
    char *cmdline, *kextra, *loader;
651
1
    unsigned int initrdidx, domcr_flags = DOMCRF_s3_integrity;
652
1
    multiboot_info_t *mbi = __va(mbi_p);
653
1
    module_t *mod = (module_t *)__va(mbi->mods_addr);
654
1
    unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
655
1
    int i, j, e820_warn = 0, bytes = 0;
656
1
    bool acpi_boot_table_init_done = false;
657
1
    struct domain *dom0;
658
1
    struct ns16550_defaults ns16550 = {
659
1
        .data_bits = 8,
660
1
        .parity    = 'n',
661
1
        .stop_bits = 1
662
1
    };
663
1
    struct xen_arch_domainconfig config = { .emulation_flags = 0 };
664
1
665
1
    /* Critical region without IDT or TSS.  Any fault is deadly! */
666
1
667
1
    set_processor_id(0);
668
1
    set_current(INVALID_VCPU); /* debug sanity. */
669
1
    idle_vcpu[0] = current;
670
1
671
1
    percpu_init_areas();
672
1
673
1
    init_idt_traps();
674
1
    load_system_tables();
675
1
676
1
    smp_prepare_boot_cpu();
677
1
    sort_exception_tables();
678
1
679
1
    setup_virtual_regions(__start___ex_table, __stop___ex_table);
680
1
681
1
    /* Full exception support from here on in. */
682
1
683
1
    loader = (mbi->flags & MBI_LOADERNAME)
684
1
        ? (char *)__va(mbi->boot_loader_name) : "unknown";
685
1
686
1
    /* Parse the command-line options. */
687
1
    cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ?
688
1
                           __va(mbi->cmdline) : NULL,
689
1
                           loader);
690
1
    if ( (kextra = strstr(cmdline, " -- ")) != NULL )
691
0
    {
692
0
        /*
693
0
         * Options after ' -- ' separator belong to dom0.
694
0
         *  1. Orphan dom0's options from Xen's command line.
695
0
         *  2. Skip all but final leading space from dom0's options.
696
0
         */
697
0
        *kextra = '\0';
698
0
        kextra += 3;
699
0
        while ( kextra[1] == ' ' ) kextra++;
700
0
    }
701
1
    cmdline_parse(cmdline);
702
1
703
1
    /* Must be after command line argument parsing and before
704
1
     * allocing any xenheap structures wanted in lower memory. */
705
1
    kexec_early_calculations();
706
1
707
1
    parse_video_info();
708
1
709
1
    rdmsrl(MSR_EFER, this_cpu(efer));
710
1
    asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) );
711
1
712
1
    /* We initialise the serial devices very early so we can get debugging. */
713
1
    ns16550.io_base = 0x3f8;
714
1
    ns16550.irq     = 4;
715
1
    ns16550_init(0, &ns16550);
716
1
    ns16550.io_base = 0x2f8;
717
1
    ns16550.irq     = 3;
718
1
    ns16550_init(1, &ns16550);
719
1
    ehci_dbgp_init();
720
1
    console_init_preirq();
721
1
722
1
    printk("Bootloader: %s\n", loader);
723
1
724
1
    printk("Command line: %s\n", cmdline);
725
1
726
1
    printk("Xen image load base address: %#lx\n", xen_phys_start);
727
1
728
1
    printk("Video information:\n");
729
1
730
1
    /* Print VGA display mode information. */
731
1
    switch ( vga_console_info.video_type )
732
1
    {
733
1
    case XEN_VGATYPE_TEXT_MODE_3:
734
1
        printk(" VGA is text mode %dx%d, font 8x%d\n",
735
1
               vga_console_info.u.text_mode_3.columns,
736
1
               vga_console_info.u.text_mode_3.rows,
737
1
               vga_console_info.u.text_mode_3.font_height);
738
1
        break;
739
0
    case XEN_VGATYPE_VESA_LFB:
740
0
    case XEN_VGATYPE_EFI_LFB:
741
0
        printk(" VGA is graphics mode %dx%d, %d bpp\n",
742
0
               vga_console_info.u.vesa_lfb.width,
743
0
               vga_console_info.u.vesa_lfb.height,
744
0
               vga_console_info.u.vesa_lfb.bits_per_pixel);
745
0
        break;
746
0
    default:
747
0
        printk(" No VGA detected\n");
748
0
        break;
749
1
    }
750
1
751
1
    /* Print VBE/DDC EDID information. */
752
1
    if ( bootsym(boot_edid_caps) != 0x1313 )
753
1
    {
754
1
        u16 caps = bootsym(boot_edid_caps);
755
1
        printk(" VBE/DDC methods:%s%s%s; ",
756
1
               (caps & 1) ? " V1" : "",
757
1
               (caps & 2) ? " V2" : "",
758
1
               !(caps & 3) ? " none" : "");
759
1
        printk("EDID transfer time: %d seconds\n", caps >> 8);
760
1
        if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
761
1
        {
762
1
            printk(" EDID info not retrieved because ");
763
1
            if ( !(caps & 3) )
764
0
                printk("no DDC retrieval method detected\n");
765
1
            else if ( (caps >> 8) > 5 )
766
0
                printk("takes longer than 5 seconds\n");
767
1
            else
768
1
                printk("of reasons unknown\n");
769
1
        }
770
1
    }
771
1
772
1
    printk("Disc information:\n");
773
1
    printk(" Found %d MBR signatures\n",
774
1
           bootsym(boot_mbr_signature_nr));
775
1
    printk(" Found %d EDD information structures\n",
776
1
           bootsym(boot_edd_info_nr));
777
1
778
1
    /* Check that we have at least one Multiboot module. */
779
1
    if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
780
0
        panic("dom0 kernel not specified. Check bootloader configuration.");
781
1
782
1
    if ( efi_enabled(EFI_LOADER) )
783
0
    {
784
0
        set_pdx_range(xen_phys_start >> PAGE_SHIFT,
785
0
                      (xen_phys_start + BOOTSTRAP_MAP_BASE) >> PAGE_SHIFT);
786
0
787
0
        /* Clean up boot loader identity mappings. */
788
0
        destroy_xen_mappings(xen_phys_start,
789
0
                             xen_phys_start + BOOTSTRAP_MAP_BASE);
790
0
791
0
        /* Make boot page tables match non-EFI boot. */
792
0
        l3_bootmap[l3_table_offset(BOOTSTRAP_MAP_BASE)] =
793
0
            l3e_from_paddr(__pa(l2_bootmap), __PAGE_HYPERVISOR);
794
0
795
0
        memmap_type = loader;
796
0
    }
797
1
    else if ( efi_enabled(EFI_BOOT) )
798
0
        memmap_type = "EFI";
799
1
    else if ( (e820_raw.nr_map = 
800
1
                   copy_bios_e820(e820_raw.map,
801
1
                                  ARRAY_SIZE(e820_raw.map))) != 0 )
802
1
    {
803
1
        memmap_type = "Xen-e820";
804
1
    }
805
0
    else if ( mbi->flags & MBI_MEMMAP )
806
0
    {
807
0
        memmap_type = "Multiboot-e820";
808
0
        while ( bytes < mbi->mmap_length &&
809
0
                e820_raw.nr_map < ARRAY_SIZE(e820_raw.map) )
810
0
        {
811
0
            memory_map_t *map = __va(mbi->mmap_addr + bytes);
812
0
813
0
            /*
814
0
             * This is a gross workaround for a BIOS bug. Some bootloaders do
815
0
             * not write e820 map entries into pre-zeroed memory. This is
816
0
             * okay if the BIOS fills in all fields of the map entry, but
817
0
             * some broken BIOSes do not bother to write the high word of
818
0
             * the length field if the length is smaller than 4GB. We
819
0
             * detect and fix this by flagging sections below 4GB that
820
0
             * appear to be larger than 4GB in size.
821
0
             */
822
0
            if ( (map->base_addr_high == 0) && (map->length_high != 0) )
823
0
            {
824
0
                if ( !e820_warn )
825
0
                {
826
0
                    printk("WARNING: Buggy e820 map detected and fixed "
827
0
                           "(truncated length fields).\n");
828
0
                    e820_warn = 1;
829
0
                }
830
0
                map->length_high = 0;
831
0
            }
832
0
833
0
            e820_raw.map[e820_raw.nr_map].addr =
834
0
                ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
835
0
            e820_raw.map[e820_raw.nr_map].size =
836
0
                ((u64)map->length_high << 32) | (u64)map->length_low;
837
0
            e820_raw.map[e820_raw.nr_map].type = map->type;
838
0
            e820_raw.nr_map++;
839
0
840
0
            bytes += map->size + 4;
841
0
        }
842
0
    }
843
0
    else if ( bootsym(lowmem_kb) )
844
0
    {
845
0
        memmap_type = "Xen-e801";
846
0
        e820_raw.map[0].addr = 0;
847
0
        e820_raw.map[0].size = bootsym(lowmem_kb) << 10;
848
0
        e820_raw.map[0].type = E820_RAM;
849
0
        e820_raw.map[1].addr = 0x100000;
850
0
        e820_raw.map[1].size = bootsym(highmem_kb) << 10;
851
0
        e820_raw.map[1].type = E820_RAM;
852
0
        e820_raw.nr_map = 2;
853
0
    }
854
0
    else if ( mbi->flags & MBI_MEMLIMITS )
855
0
    {
856
0
        memmap_type = "Multiboot-e801";
857
0
        e820_raw.map[0].addr = 0;
858
0
        e820_raw.map[0].size = mbi->mem_lower << 10;
859
0
        e820_raw.map[0].type = E820_RAM;
860
0
        e820_raw.map[1].addr = 0x100000;
861
0
        e820_raw.map[1].size = mbi->mem_upper << 10;
862
0
        e820_raw.map[1].type = E820_RAM;
863
0
        e820_raw.nr_map = 2;
864
0
    }
865
0
    else
866
0
        panic("Bootloader provided no memory information.");
867
1
868
1
    /* Sanitise the raw E820 map to produce a final clean version. */
869
1
    max_page = raw_max_page = init_e820(memmap_type, &e820_raw);
870
1
871
1
    /* Create a temporary copy of the E820 map. */
872
1
    memcpy(&boot_e820, &e820, sizeof(e820));
873
1
874
1
    /* Early kexec reservation (explicit static start address). */
875
1
    nr_pages = 0;
876
20
    for ( i = 0; i < e820.nr_map; i++ )
877
19
        if ( e820.map[i].type == E820_RAM )
878
8
            nr_pages += e820.map[i].size >> PAGE_SHIFT;
879
1
    set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT);
880
1
    kexec_reserve_area(&boot_e820);
881
1
882
1
    initial_images = mod;
883
1
    nr_initial_images = mbi->mods_count;
884
1
885
1
    /*
886
1
     * Iterate backwards over all superpage-aligned RAM regions.
887
1
     * 
888
1
     * We require superpage alignment because the boot allocator is not yet
889
1
     * initialised. Hence we can only map superpages in the address range
890
1
     * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
891
1
     * dynamic allocation of pagetables.
892
1
     * 
893
1
     * As well as mapping superpages in that range, in preparation for
894
1
     * initialising the boot allocator, we also look for a region to which
895
1
     * we can relocate the dom0 kernel and other multiboot modules. Also, on
896
1
     * x86/64, we relocate Xen to higher memory.
897
1
     */
898
3
    for ( i = 0; !efi_enabled(EFI_LOADER) && i < mbi->mods_count; i++ )
899
2
    {
900
2
        if ( mod[i].mod_start & (PAGE_SIZE - 1) )
901
0
            panic("Bootloader didn't honor module alignment request.");
902
2
        mod[i].mod_end -= mod[i].mod_start;
903
2
        mod[i].mod_start >>= PAGE_SHIFT;
904
2
        mod[i].reserved = 0;
905
2
    }
906
1
907
1
    if ( efi_enabled(EFI_LOADER) )
908
0
    {
909
0
        /*
910
0
         * This needs to remain in sync with xen_in_range() and the
911
0
         * respective reserve_e820_ram() invocation below.
912
0
         */
913
0
        mod[mbi->mods_count].mod_start = virt_to_mfn(_stext);
914
0
        mod[mbi->mods_count].mod_end = __2M_rwdata_end - _stext;
915
0
    }
916
1
917
1
    modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
918
1
    bootstrap_map(NULL);
919
1
920
1
#ifndef highmem_start
921
1
    /* Don't allow split below 4Gb. */
922
1
    if ( highmem_start < GB(4) )
923
1
        highmem_start = 0;
924
1
    else /* align to L3 entry boundary */
925
0
        highmem_start &= ~((1UL << L3_PAGETABLE_SHIFT) - 1);
926
1
#endif
927
1
928
20
    for ( i = boot_e820.nr_map-1; i >= 0; i-- )
929
19
    {
930
19
        uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
931
19
        uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT;
932
19
933
19
        /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */
934
19
        s = (boot_e820.map[i].addr + mask) & ~mask;
935
19
        e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
936
19
        s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
937
19
        if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
938
12
            continue;
939
19
940
7
        if ( s < limit )
941
6
        {
942
6
            end = min(e, limit);
943
6
            set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT);
944
6
            map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
945
6
                             (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
946
6
        }
947
7
948
7
        if ( e > min(HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START,
949
7
                     1UL << (PAGE_SHIFT + 32)) )
950
0
            e = min(HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START,
951
7
                    1UL << (PAGE_SHIFT + 32));
952
3
#define reloc_size ((__pa(__2M_rwdata_end) + mask) & ~mask)
953
7
        /* Is the region suitable for relocating Xen? */
954
7
        if ( !xen_phys_start && e <= limit )
955
2
        {
956
2
            /* Don't overlap with modules. */
957
2
            end = consider_modules(s, e, reloc_size + mask,
958
2
                                   mod, mbi->mods_count, -1);
959
2
            end &= ~mask;
960
2
        }
961
7
        else
962
5
            end = 0;
963
7
        if ( end > s )
964
1
        {
965
1
            l4_pgentry_t *pl4e;
966
1
            l3_pgentry_t *pl3e;
967
1
            l2_pgentry_t *pl2e;
968
1
            int i, j, k;
969
1
970
1
            /* Select relocation address. */
971
1
            e = end - reloc_size;
972
1
            xen_phys_start = e;
973
1
            bootsym(trampoline_xen_phys_start) = e;
974
1
975
1
            /*
976
1
             * Perform relocation to new physical address.
977
1
             * Before doing so we must sync static/global data with main memory
978
1
             * with a barrier(). After this we must *not* modify static/global
979
1
             * data until after we have switched to the relocated pagetables!
980
1
             */
981
1
            barrier();
982
1
            move_memory(e + XEN_IMG_OFFSET, XEN_IMG_OFFSET, _end - _start, 1);
983
1
984
1
            /* Walk initial pagetables, relocating page directory entries. */
985
1
            pl4e = __va(__pa(idle_pg_table));
986
1
            for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
987
0
            {
988
0
                if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
989
0
                    continue;
990
0
                *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
991
0
                                        xen_phys_start);
992
0
                pl3e = l4e_to_l3e(*pl4e);
993
0
                for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
994
0
                {
995
0
                    /* Not present, 1GB mapping, or already relocated? */
996
0
                    if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
997
0
                         (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
998
0
                         (l3e_get_pfn(*pl3e) > PFN_DOWN(xen_phys_start)) )
999
0
                        continue;
1000
0
                    *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
1001
0
                                            xen_phys_start);
1002
0
                    pl2e = l3e_to_l2e(*pl3e);
1003
0
                    for ( k = 0; k < L2_PAGETABLE_ENTRIES; k++, pl2e++ )
1004
0
                    {
1005
0
                        /* Not present, PSE, or already relocated? */
1006
0
                        if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
1007
0
                             (l2e_get_flags(*pl2e) & _PAGE_PSE) ||
1008
0
                             (l2e_get_pfn(*pl2e) > PFN_DOWN(xen_phys_start)) )
1009
0
                            continue;
1010
0
                        *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
1011
0
                                                xen_phys_start);
1012
0
                    }
1013
0
                }
1014
0
            }
1015
1
1016
1
            /* The only data mappings to be relocated are in the Xen area. */
1017
1
            pl2e = __va(__pa(l2_xenmap));
1018
1
            /*
1019
1
             * Undo the temporary-hooking of the l1_identmap.  __2M_text_start
1020
1
             * is contained in this PTE.
1021
1
             */
1022
1
            BUG_ON(using_2M_mapping() &&
1023
1
                   l2_table_offset((unsigned long)_erodata) ==
1024
1
                   l2_table_offset((unsigned long)_stext));
1025
1
            *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT,
1026
1
                                   PAGE_HYPERVISOR_RX | _PAGE_PSE);
1027
1
            for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
1028
0
            {
1029
0
                unsigned int flags;
1030
0
1031
0
                if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
1032
0
                     (l2e_get_pfn(*pl2e) > PFN_DOWN(xen_phys_start)) )
1033
0
                    continue;
1034
0
1035
0
                if ( !using_2M_mapping() )
1036
0
                {
1037
0
                    *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
1038
0
                                            xen_phys_start);
1039
0
                    continue;
1040
0
                }
1041
0
1042
0
                if ( i < l2_table_offset((unsigned long)&__2M_text_end) )
1043
0
                {
1044
0
                    flags = PAGE_HYPERVISOR_RX | _PAGE_PSE;
1045
0
                }
1046
0
                else if ( i >= l2_table_offset((unsigned long)&__2M_rodata_start) &&
1047
0
                          i <  l2_table_offset((unsigned long)&__2M_rodata_end) )
1048
0
                {
1049
0
                    flags = PAGE_HYPERVISOR_RO | _PAGE_PSE;
1050
0
                }
1051
0
                else if ( i >= l2_table_offset((unsigned long)&__2M_init_start) &&
1052
0
                          i <  l2_table_offset((unsigned long)&__2M_init_end) )
1053
0
                {
1054
0
                    flags = PAGE_HYPERVISOR_RWX | _PAGE_PSE;
1055
0
                }
1056
0
                else if ( (i >= l2_table_offset((unsigned long)&__2M_rwdata_start) &&
1057
0
                           i <  l2_table_offset((unsigned long)&__2M_rwdata_end)) )
1058
0
                {
1059
0
                    flags = PAGE_HYPERVISOR_RW | _PAGE_PSE;
1060
0
                }
1061
0
                else
1062
0
                {
1063
0
                    *pl2e = l2e_empty();
1064
0
                    continue;
1065
0
                }
1066
0
1067
0
                *pl2e = l2e_from_paddr(
1068
0
                    l2e_get_paddr(*pl2e) + xen_phys_start, flags);
1069
0
            }
1070
1
1071
1
            /* Re-sync the stack and then switch to relocated pagetables. */
1072
1
            asm volatile (
1073
1
                "rep movsq        ; " /* re-sync the stack */
1074
1
                "movq %%cr4,%%rsi ; "
1075
1
                "andb $0x7f,%%sil ; "
1076
1
                "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
1077
1
                "movq %[pg],%%cr3 ; " /* CR3 == new pagetables */
1078
1
                "orb $0x80,%%sil  ; "
1079
1
                "movq %%rsi,%%cr4   " /* CR4.PGE == 1 */
1080
1
                : "=&S" (i), "=&D" (i), "=&c" (i) /* All outputs discarded. */
1081
1
                :  [pg] "r" (__pa(idle_pg_table)), "0" (cpu0_stack),
1082
1
                   "1" (__va(__pa(cpu0_stack))), "2" (STACK_SIZE / 8)
1083
1
                : "memory" );
1084
1
1085
1
            bootstrap_map(NULL);
1086
1
1087
1
            printk("New Xen image base address: %#lx\n", xen_phys_start);
1088
1
        }
1089
7
1090
7
        /* Is the region suitable for relocating the multiboot modules? */
1091
21
        for ( j = mbi->mods_count - 1; j >= 0; j-- )
1092
14
        {
1093
7
            unsigned long headroom = j ? 0 : modules_headroom;
1094
14
            unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);
1095
14
1096
14
            if ( mod[j].reserved )
1097
12
                continue;
1098
14
1099
14
            /* Don't overlap with other modules (or Xen itself). */
1100
2
            end = consider_modules(s, e, size, mod,
1101
2
                                   mbi->mods_count + efi_enabled(EFI_LOADER),
1102
2
                                   j);
1103
2
1104
2
            if ( highmem_start && end > highmem_start )
1105
0
                continue;
1106
2
1107
2
            if ( s < end &&
1108
2
                 (headroom ||
1109
2
                  ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
1110
2
            {
1111
2
                move_memory(end - size + headroom,
1112
2
                            (uint64_t)mod[j].mod_start << PAGE_SHIFT,
1113
2
                            mod[j].mod_end, 0);
1114
2
                mod[j].mod_start = (end - size) >> PAGE_SHIFT;
1115
2
                mod[j].mod_end += headroom;
1116
2
                mod[j].reserved = 1;
1117
2
            }
1118
2
        }
1119
7
1120
7
#ifdef CONFIG_KEXEC
1121
7
        /*
1122
7
         * Looking backwards from the crash area limit, find a large
1123
7
         * enough range that does not overlap with modules.
1124
7
         */
1125
8
        while ( !kexec_crash_area.start )
1126
1
        {
1127
1
            /* Don't overlap with modules (or Xen itself). */
1128
1
            e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), mod,
1129
1
                                 mbi->mods_count + efi_enabled(EFI_LOADER), -1);
1130
1
            if ( s >= e )
1131
0
                break;
1132
1
            if ( e > kexec_crash_area_limit )
1133
0
            {
1134
0
                e = kexec_crash_area_limit & PAGE_MASK;
1135
0
                continue;
1136
0
            }
1137
1
            kexec_crash_area.start = (e - kexec_crash_area.size) & PAGE_MASK;
1138
1
        }
1139
7
#endif
1140
7
    }
1141
1
1142
1
    if ( modules_headroom && !mod->reserved )
1143
0
        panic("Not enough memory to relocate the dom0 kernel image.");
1144
3
    for ( i = 0; i < mbi->mods_count; ++i )
1145
2
    {
1146
2
        uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
1147
2
1148
2
        reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end));
1149
2
    }
1150
1
1151
1
    if ( !xen_phys_start )
1152
0
        panic("Not enough memory to relocate Xen.");
1153
1
1154
1
    /* This needs to remain in sync with xen_in_range(). */
1155
1
    reserve_e820_ram(&boot_e820, __pa(_stext), __pa(__2M_rwdata_end));
1156
1
1157
1
    /* Late kexec reservation (dynamic start address). */
1158
1
    kexec_reserve_area(&boot_e820);
1159
1
1160
1
    setup_max_pdx(raw_max_page);
1161
1
    if ( highmem_start )
1162
0
        xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
1163
1
1164
1
    /*
1165
1
     * Walk every RAM region and map it in its entirety (on x86/64, at least)
1166
1
     * and notify it to the boot allocator.
1167
1
     */
1168
23
    for ( i = 0; i < boot_e820.nr_map; i++ )
1169
22
    {
1170
22
        uint64_t s, e, mask = PAGE_SIZE - 1;
1171
22
        uint64_t map_s, map_e;
1172
22
1173
22
        /* Only page alignment required now. */
1174
22
        s = (boot_e820.map[i].addr + mask) & ~mask;
1175
22
        e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
1176
22
        s = max_t(uint64_t, s, 1<<20);
1177
22
        if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
1178
14
            continue;
1179
22
1180
8
        if ( !acpi_boot_table_init_done &&
1181
8
             s >= (1ULL << 32) &&
1182
1
             !acpi_boot_table_init() )
1183
1
        {
1184
1
            acpi_boot_table_init_done = true;
1185
1
            srat_parse_regions(s);
1186
1
            setup_max_pdx(raw_max_page);
1187
1
        }
1188
8
1189
8
        if ( pfn_to_pdx((e - 1) >> PAGE_SHIFT) >= max_pdx )
1190
0
        {
1191
0
            if ( pfn_to_pdx(s >> PAGE_SHIFT) >= max_pdx )
1192
0
            {
1193
0
                for ( j = i - 1; ; --j )
1194
0
                {
1195
0
                    if ( boot_e820.map[j].type == E820_RAM )
1196
0
                        break;
1197
0
                    ASSERT(j);
1198
0
                }
1199
0
                map_e = boot_e820.map[j].addr + boot_e820.map[j].size;
1200
0
                for ( j = 0; j < mbi->mods_count; ++j )
1201
0
                {
1202
0
                    uint64_t end = pfn_to_paddr(mod[j].mod_start) +
1203
0
                                   mod[j].mod_end;
1204
0
1205
0
                    if ( map_e < end )
1206
0
                        map_e = end;
1207
0
                }
1208
0
                if ( PFN_UP(map_e) < max_page )
1209
0
                {
1210
0
                    max_page = PFN_UP(map_e);
1211
0
                    max_pdx = pfn_to_pdx(max_page - 1) + 1;
1212
0
                }
1213
0
                printk(XENLOG_WARNING "Ignoring inaccessible memory range"
1214
0
                                      " %013"PRIx64"-%013"PRIx64"\n",
1215
0
                       s, e);
1216
0
                continue;
1217
0
            }
1218
0
            map_e = e;
1219
0
            e = (pdx_to_pfn(max_pdx - 1) + 1ULL) << PAGE_SHIFT;
1220
0
            printk(XENLOG_WARNING "Ignoring inaccessible memory range"
1221
0
                                  " %013"PRIx64"-%013"PRIx64"\n",
1222
0
                   e, map_e);
1223
0
        }
1224
8
1225
8
        set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
1226
8
1227
8
        /* Need to create mappings above BOOTSTRAP_MAP_BASE. */
1228
8
        map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
1229
8
        map_e = min_t(uint64_t, e,
1230
8
                      ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT);
1231
8
1232
8
        /* Pass mapped memory to allocator /before/ creating new mappings. */
1233
8
        init_boot_pages(s, min(map_s, e));
1234
8
        s = map_s;
1235
8
        if ( s < map_e )
1236
7
        {
1237
7
            uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
1238
7
1239
7
            map_s = (s + mask) & ~mask;
1240
7
            map_e &= ~mask;
1241
7
            init_boot_pages(map_s, map_e);
1242
7
        }
1243
8
1244
8
        if ( map_s > map_e )
1245
0
            map_s = map_e = s;
1246
8
1247
8
        /* Create new mappings /before/ passing memory to the allocator. */
1248
8
        if ( map_e < e )
1249
6
        {
1250
6
            uint64_t limit = __pa(HYPERVISOR_VIRT_END - 1) + 1;
1251
6
            uint64_t end = min(e, limit);
1252
6
1253
6
            if ( map_e < end )
1254
6
            {
1255
6
                map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
1256
6
                                 PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
1257
6
                init_boot_pages(map_e, end);
1258
6
                map_e = end;
1259
6
            }
1260
6
        }
1261
8
        if ( map_e < e )
1262
0
        {
1263
0
            /* This range must not be passed to the boot allocator and
1264
0
             * must also not be mapped with _PAGE_GLOBAL. */
1265
0
            map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
1266
0
                             PFN_DOWN(e - map_e), __PAGE_HYPERVISOR_RW);
1267
0
        }
1268
8
        if ( s < map_s )
1269
2
        {
1270
2
            map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
1271
2
                             (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
1272
2
            init_boot_pages(s, map_s);
1273
2
        }
1274
8
    }
1275
1
1276
3
    for ( i = 0; i < mbi->mods_count; ++i )
1277
2
    {
1278
2
        set_pdx_range(mod[i].mod_start,
1279
2
                      mod[i].mod_start + PFN_UP(mod[i].mod_end));
1280
2
        map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start),
1281
2
                         mod[i].mod_start,
1282
2
                         PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR);
1283
2
    }
1284
1
1285
1
#ifdef CONFIG_KEXEC
1286
1
    if ( kexec_crash_area.size )
1287
0
    {
1288
0
        unsigned long s = PFN_DOWN(kexec_crash_area.start);
1289
0
        unsigned long e = min(s + PFN_UP(kexec_crash_area.size),
1290
0
                              PFN_UP(__pa(HYPERVISOR_VIRT_END - 1)));
1291
0
1292
0
        if ( e > s ) 
1293
0
            map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
1294
0
                             s, e - s, PAGE_HYPERVISOR);
1295
0
    }
1296
1
#endif
1297
1
1298
1
    xen_virt_end = ((unsigned long)_end + (1UL << L2_PAGETABLE_SHIFT) - 1) &
1299
1
                   ~((1UL << L2_PAGETABLE_SHIFT) - 1);
1300
1
    destroy_xen_mappings(xen_virt_end, XEN_VIRT_START + BOOTSTRAP_MAP_BASE);
1301
1
1302
1
    /*
1303
1
     * If not using 2M mappings to gain suitable pagetable permissions
1304
1
     * directly from the relocation above, remap the code/data
1305
1
     * sections with decreased permissions.
1306
1
     */
1307
1
    if ( !using_2M_mapping() )
1308
1
    {
1309
1
        /* Mark .text as RX (avoiding the first 2M superpage). */
1310
1
        modify_xen_mappings(XEN_VIRT_START + MB(2),
1311
1
                            (unsigned long)&__2M_text_end,
1312
1
                            PAGE_HYPERVISOR_RX);
1313
1
1314
1
        /* Mark .rodata as RO. */
1315
1
        modify_xen_mappings((unsigned long)&__2M_rodata_start,
1316
1
                            (unsigned long)&__2M_rodata_end,
1317
1
                            PAGE_HYPERVISOR_RO);
1318
1
1319
1
        /* Mark .data and .bss as RW. */
1320
1
        modify_xen_mappings((unsigned long)&__2M_rwdata_start,
1321
1
                            (unsigned long)&__2M_rwdata_end,
1322
1
                            PAGE_HYPERVISOR_RW);
1323
1
1324
1
        /* Drop the remaining mappings in the shattered superpage. */
1325
1
        destroy_xen_mappings((unsigned long)&__2M_rwdata_end,
1326
1
                             ROUNDUP((unsigned long)&__2M_rwdata_end, MB(2)));
1327
1
    }
1328
1
1329
1
    nr_pages = 0;
1330
20
    for ( i = 0; i < e820.nr_map; i++ )
1331
19
        if ( e820.map[i].type == E820_RAM )
1332
8
            nr_pages += e820.map[i].size >> PAGE_SHIFT;
1333
1
    printk("System RAM: %luMB (%lukB)\n",
1334
1
           nr_pages >> (20 - PAGE_SHIFT),
1335
1
           nr_pages << (PAGE_SHIFT - 10));
1336
1
    total_pages = nr_pages;
1337
1
1338
1
    /* Sanity check for unwanted bloat of certain hypercall structures. */
1339
1
    BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
1340
1
                 sizeof(((struct xen_platform_op *)0)->u.pad));
1341
1
    BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
1342
1
                 sizeof(((struct xen_domctl *)0)->u.pad));
1343
1
    BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
1344
1
                 sizeof(((struct xen_sysctl *)0)->u.pad));
1345
1
1346
1
    BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
1347
1
    BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
1348
1
    BUILD_BUG_ON(sizeof(struct vcpu_info) != 64);
1349
1
1350
1
    BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) !=
1351
1
                 sizeof(((struct compat_platform_op *)0)->u.pad));
1352
1
    BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE);
1353
1
    BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64);
1354
1
1355
1
    /* Check definitions in public headers match internal defs. */
1356
1
    BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
1357
1
    BUILD_BUG_ON(__HYPERVISOR_VIRT_END   != HYPERVISOR_VIRT_END);
1358
1
    BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
1359
1
    BUILD_BUG_ON(MACH2PHYS_VIRT_END   != RO_MPT_VIRT_END);
1360
1
1361
1
    init_frametable();
1362
1
1363
1
    if ( !acpi_boot_table_init_done )
1364
0
        acpi_boot_table_init();
1365
1
1366
1
    acpi_numa_init();
1367
1
1368
1
    numa_initmem_init(0, raw_max_page);
1369
1
1370
1
    if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
1371
0
    {
1372
0
        unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
1373
0
        uint64_t mask = PAGE_SIZE - 1;
1374
0
1375
0
        if ( !highmem_start )
1376
0
            xenheap_max_mfn(limit);
1377
0
1378
0
        end_boot_allocator();
1379
0
1380
0
        /* Pass the remaining memory to the allocator. */
1381
0
        for ( i = 0; i < boot_e820.nr_map; i++ )
1382
0
        {
1383
0
            uint64_t s, e;
1384
0
1385
0
            if ( boot_e820.map[i].type != E820_RAM )
1386
0
                continue;
1387
0
            s = (boot_e820.map[i].addr + mask) & ~mask;
1388
0
            e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
1389
0
            if ( PFN_DOWN(e) <= limit )
1390
0
                continue;
1391
0
            if ( PFN_DOWN(s) <= limit )
1392
0
                s = pfn_to_paddr(limit + 1);
1393
0
            init_domheap_pages(s, e);
1394
0
        }
1395
0
1396
0
        if ( tmem_enabled() )
1397
0
        {
1398
0
           printk(XENLOG_WARNING
1399
0
                  "TMEM physical RAM limit exceeded, disabling TMEM\n");
1400
0
           tmem_disable();
1401
0
        }
1402
0
    }
1403
1
    else
1404
1
        end_boot_allocator();
1405
1
1406
1
    system_state = SYS_STATE_boot;
1407
1
    /*
1408
1
     * No calls involving ACPI code should go between the setting of
1409
1
     * SYS_STATE_boot and vm_init() (or else acpi_os_{,un}map_memory()
1410
1
     * will break).
1411
1
     */
1412
1
    vm_init();
1413
1
1414
1
    console_init_ring();
1415
1
    vesa_init();
1416
1
1417
1
    softirq_init();
1418
1
    tasklet_subsys_init();
1419
1
1420
1
    early_cpu_init();
1421
1
1422
1
    paging_init();
1423
1
1424
1
    tboot_probe();
1425
1
1426
1
    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
1427
1
1428
1
    if ( opt_watchdog ) 
1429
0
        nmi_watchdog = NMI_LOCAL_APIC;
1430
1
1431
1
    find_smp_config();
1432
1
1433
1
    dmi_scan_machine();
1434
1
1435
1
    generic_apic_probe();
1436
1
1437
1
    acpi_boot_init();
1438
1
1439
1
    if ( smp_found_config )
1440
1
        get_smp_config();
1441
1
1442
1
    if ( opt_nosmp )
1443
0
    {
1444
0
        max_cpus = 0;
1445
0
        set_nr_cpu_ids(1);
1446
0
    }
1447
1
    else
1448
1
    {
1449
1
        set_nr_cpu_ids(max_cpus);
1450
1
        max_cpus = nr_cpu_ids;
1451
1
    }
1452
1
1453
1
    /* Low mappings were only needed for some BIOS table parsing. */
1454
1
    zap_low_mappings();
1455
1
1456
1
    mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
1457
1
                                  RANGESETF_prettyprint_hex);
1458
1
1459
1
    init_apic_mappings();
1460
1
1461
1
    normalise_cpu_order();
1462
1
1463
1
    init_cpu_to_node();
1464
1
1465
1
    x2apic_bsp_setup();
1466
1
1467
1
    init_IRQ();
1468
1
1469
1
    module_map = xmalloc_array(unsigned long, BITS_TO_LONGS(mbi->mods_count));
1470
1
    bitmap_fill(module_map, mbi->mods_count);
1471
1
    __clear_bit(0, module_map); /* Dom0 kernel is always first */
1472
1
1473
1
    xsm_multiboot_init(module_map, mbi, bootstrap_map);
1474
1
1475
1
    microcode_grab_module(module_map, mbi, bootstrap_map);
1476
1
1477
1
    timer_init();
1478
1
1479
1
    early_microcode_init();
1480
1
1481
1
    identify_cpu(&boot_cpu_data);
1482
1
1483
1
    set_in_cr4(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT);
1484
1
1485
1
    if ( !opt_smep )
1486
0
        setup_clear_cpu_cap(X86_FEATURE_SMEP);
1487
1
    if ( cpu_has_smep && opt_smep != SMEP_HVM_ONLY )
1488
1
        setup_force_cpu_cap(X86_FEATURE_XEN_SMEP);
1489
1
    if ( boot_cpu_has(X86_FEATURE_XEN_SMEP) )
1490
1
        set_in_cr4(X86_CR4_SMEP);
1491
1
1492
1
    if ( !opt_smap )
1493
0
        setup_clear_cpu_cap(X86_FEATURE_SMAP);
1494
1
    if ( cpu_has_smap && opt_smap != SMAP_HVM_ONLY )
1495
0
        setup_force_cpu_cap(X86_FEATURE_XEN_SMAP);
1496
1
    if ( boot_cpu_has(X86_FEATURE_XEN_SMAP) )
1497
0
        set_in_cr4(X86_CR4_SMAP);
1498
1
1499
1
    cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
1500
1
1501
1
    if ( cpu_has_fsgsbase )
1502
1
        set_in_cr4(X86_CR4_FSGSBASE);
1503
1
1504
1
    init_idle_domain();
1505
1
1506
1
    this_cpu(stubs.addr) = alloc_stub_page(smp_processor_id(),
1507
1
                                           &this_cpu(stubs).mfn);
1508
1
    BUG_ON(!this_cpu(stubs.addr));
1509
1
1510
1
    trap_init();
1511
1
1512
1
    rcu_init();
1513
1
1514
1
    early_time_init();
1515
1
1516
1
    arch_init_memory();
1517
1
1518
1
    alternative_instructions();
1519
1
1520
1
    local_irq_enable();
1521
1
1522
1
    pt_pci_init();
1523
1
1524
1
    vesa_mtrr_init();
1525
1
1526
1
    acpi_mmcfg_init();
1527
1
1528
1
    early_msi_init();
1529
1
1530
1
    iommu_setup();    /* setup iommu if available */
1531
1
1532
1
    smp_prepare_cpus(max_cpus);
1533
1
1534
1
    spin_debug_enable();
1535
1
1536
1
    /*
1537
1
     * Initialise higher-level timer functions. We do this fairly late
1538
1
     * (after interrupts got enabled) because the time bases and scale
1539
1
     * factors need to be updated regularly.
1540
1
     */
1541
1
    init_xen_time();
1542
1
1543
1
    initialize_keytable();
1544
1
1545
1
    console_init_postirq();
1546
1
1547
1
    system_state = SYS_STATE_smp_boot;
1548
1
1549
1
    do_presmp_initcalls();
1550
1
1551
1
    for_each_present_cpu ( i )
1552
12
    {
1553
12
        /* Set up cpu_to_node[]. */
1554
12
        srat_detect_node(i);
1555
12
        /* Set up node_to_cpumask based on cpu_to_node[]. */
1556
12
        numa_add_cpu(i);        
1557
12
1558
12
        if ( (num_online_cpus() < max_cpus) && !cpu_online(i) )
1559
11
        {
1560
11
            int ret = cpu_up(i);
1561
11
            if ( ret != 0 )
1562
0
                printk("Failed to bring up CPU %u (error %d)\n", i, ret);
1563
11
        }
1564
12
    }
1565
1
1566
1
    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
1567
1
    smp_cpus_done();
1568
1
1569
1
    do_initcalls();
1570
1
1571
1
    if ( opt_watchdog ) 
1572
0
        watchdog_setup();
1573
1
1574
1
    if ( !tboot_protect_mem_regions() )
1575
0
        panic("Could not protect TXT memory regions");
1576
1
1577
1
    init_guest_cpuid();
1578
1
    init_guest_msr_policy();
1579
1
1580
1
    if ( dom0_pvh )
1581
1
    {
1582
1
        domcr_flags |= DOMCRF_hvm |
1583
1
                       ((hvm_funcs.hap_supported && !opt_dom0_shadow) ?
1584
1
                         DOMCRF_hap : 0);
1585
1
        config.emulation_flags = XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC|
1586
1
                                 XEN_X86_EMU_VPCI;
1587
1
    }
1588
1
1589
1
    /* Create initial domain 0. */
1590
1
    dom0 = domain_create(0, domcr_flags, 0, &config);
1591
1
    if ( IS_ERR(dom0) || (alloc_dom0_vcpu0(dom0) == NULL) )
1592
0
        panic("Error creating domain 0");
1593
1
1594
1
    dom0->is_privileged = 1;
1595
1
    dom0->target = NULL;
1596
1
1597
1
    /* Grab the DOM0 command line. */
1598
1
    cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
1599
1
    if ( (cmdline != NULL) || (kextra != NULL) )
1600
0
    {
1601
0
        static char __initdata dom0_cmdline[MAX_GUEST_CMDLINE];
1602
0
1603
0
        cmdline = cmdline_cook(cmdline, loader);
1604
0
        safe_strcpy(dom0_cmdline, cmdline);
1605
0
1606
0
        if ( kextra != NULL )
1607
0
            /* kextra always includes exactly one leading space. */
1608
0
            safe_strcat(dom0_cmdline, kextra);
1609
0
1610
0
        /* Append any extra parameters. */
1611
0
        if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
1612
0
            safe_strcat(dom0_cmdline, " noapic");
1613
0
        if ( (strlen(acpi_param) == 0) && acpi_disabled )
1614
0
        {
1615
0
            printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
1616
0
            safe_strcpy(acpi_param, "off");
1617
0
        }
1618
0
        if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
1619
0
        {
1620
0
            safe_strcat(dom0_cmdline, " acpi=");
1621
0
            safe_strcat(dom0_cmdline, acpi_param);
1622
0
        }
1623
0
1624
0
        cmdline = dom0_cmdline;
1625
0
    }
1626
1
1627
1
    if ( xen_cpuidle )
1628
1
        xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
1629
1
1630
1
    initrdidx = find_first_bit(module_map, mbi->mods_count);
1631
1
    if ( bitmap_weight(module_map, mbi->mods_count) > 1 )
1632
0
        printk(XENLOG_WARNING
1633
0
               "Multiple initrd candidates, picking module #%u\n",
1634
0
               initrdidx);
1635
1
1636
1
    /*
1637
1
     * Temporarily clear SMAP in CR4 to allow user-accesses in construct_dom0().
1638
1
     * This saves a large number of corner cases interactions with
1639
1
     * copy_from_user().
1640
1
     */
1641
1
    if ( cpu_has_smap )
1642
0
    {
1643
0
        cr4_pv32_mask &= ~X86_CR4_SMAP;
1644
0
        write_cr4(read_cr4() & ~X86_CR4_SMAP);
1645
0
    }
1646
1
1647
1
    printk("%sNX (Execute Disable) protection %sactive\n",
1648
1
           cpu_has_nx ? XENLOG_INFO : XENLOG_WARNING "Warning: ",
1649
1
           cpu_has_nx ? "" : "not ");
1650
1
1651
1
    /*
1652
1
     * We're going to setup domain0 using the module(s) that we stashed safely
1653
1
     * above our heap. The second module, if present, is an initrd ramdisk.
1654
1
     */
1655
1
    if ( construct_dom0(dom0, mod, modules_headroom,
1656
1
                        (initrdidx > 0) && (initrdidx < mbi->mods_count)
1657
1
                        ? mod + initrdidx : NULL,
1658
1
                        bootstrap_map, cmdline) != 0)
1659
0
        panic("Could not set up DOM0 guest OS");
1660
1
1661
1
    if ( cpu_has_smap )
1662
0
    {
1663
0
        write_cr4(read_cr4() | X86_CR4_SMAP);
1664
0
        cr4_pv32_mask |= X86_CR4_SMAP;
1665
0
    }
1666
1
1667
1
    heap_init_late();
1668
1
1669
1
    init_trace_bufs();
1670
1
1671
1
    init_constructors();
1672
1
1673
1
    console_endboot();
1674
1
1675
1
    /* Hide UART from DOM0 if we're using it */
1676
1
    serial_endboot();
1677
1
1678
1
    dmi_end_boot();
1679
1
1680
1
    setup_io_bitmap(dom0);
1681
1
1682
1
    /* Jump to the 1:1 virtual mappings of cpu0_stack. */
1683
1
    asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" ::
1684
1
                  [stk] "g" (__va(__pa(get_stack_bottom()))),
1685
1
                  [fn] "i" (reinit_bsp_stack) : "memory");
1686
1
    unreachable();
1687
1
}
1688
1689
void arch_get_xen_caps(xen_capabilities_info_t *info)
1690
0
{
1691
0
    /* Interface name is always xen-3.0-* for Xen-3.x. */
1692
0
    int major = 3, minor = 0;
1693
0
    char s[32];
1694
0
1695
0
    (*info)[0] = '\0';
1696
0
1697
0
    snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
1698
0
    safe_strcat(*info, s);
1699
0
    snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
1700
0
    safe_strcat(*info, s);
1701
0
    if ( hvm_enabled )
1702
0
    {
1703
0
        snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
1704
0
        safe_strcat(*info, s);
1705
0
        snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
1706
0
        safe_strcat(*info, s);
1707
0
        snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor);
1708
0
        safe_strcat(*info, s);
1709
0
    }
1710
0
}
1711
1712
int __hwdom_init xen_in_range(unsigned long mfn)
1713
0
{
1714
0
    paddr_t start, end;
1715
0
    int i;
1716
0
1717
0
    enum { region_s3, region_ro, region_rw, nr_regions };
1718
0
    static struct {
1719
0
        paddr_t s, e;
1720
0
    } xen_regions[nr_regions] __hwdom_initdata;
1721
0
1722
0
    /* initialize first time */
1723
0
    if ( !xen_regions[0].s )
1724
0
    {
1725
0
        /* S3 resume code (and other real mode trampoline code) */
1726
0
        xen_regions[region_s3].s = bootsym_phys(trampoline_start);
1727
0
        xen_regions[region_s3].e = bootsym_phys(trampoline_end);
1728
0
1729
0
        /*
1730
0
         * This needs to remain in sync with the uses of the same symbols in
1731
0
         * - __start_xen() (above)
1732
0
         * - is_xen_fixed_mfn()
1733
0
         * - tboot_shutdown()
1734
0
         */
1735
0
1736
0
        /* hypervisor .text + .rodata */
1737
0
        xen_regions[region_ro].s = __pa(&_stext);
1738
0
        xen_regions[region_ro].e = __pa(&__2M_rodata_end);
1739
0
        /* hypervisor .data + .bss */
1740
0
        xen_regions[region_rw].s = __pa(&__2M_rwdata_start);
1741
0
        xen_regions[region_rw].e = __pa(&__2M_rwdata_end);
1742
0
    }
1743
0
1744
0
    start = (paddr_t)mfn << PAGE_SHIFT;
1745
0
    end = start + PAGE_SIZE;
1746
0
    for ( i = 0; i < nr_regions; i++ )
1747
0
        if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
1748
0
            return 1;
1749
0
1750
0
    return 0;
1751
0
}
1752
1753
static int __hwdom_init io_bitmap_cb(unsigned long s, unsigned long e,
1754
                                     void *ctx)
1755
8
{
1756
8
    struct domain *d = ctx;
1757
8
    unsigned int i;
1758
8
1759
8
    ASSERT(e <= INT_MAX);
1760
65.5k
    for ( i = s; i <= e; i++ )
1761
65.5k
        __clear_bit(i, d->arch.hvm_domain.io_bitmap);
1762
8
1763
8
    return 0;
1764
8
}
1765
1766
void __hwdom_init setup_io_bitmap(struct domain *d)
1767
1
{
1768
1
    int rc;
1769
1
1770
1
    if ( is_hvm_domain(d) )
1771
1
    {
1772
1
        bitmap_fill(d->arch.hvm_domain.io_bitmap, 0x10000);
1773
1
        rc = rangeset_report_ranges(d->arch.ioport_caps, 0, 0x10000,
1774
1
                                    io_bitmap_cb, d);
1775
1
        BUG_ON(rc);
1776
1
        /*
1777
1
         * NB: we need to trap accesses to 0xcf8 in order to intercept
1778
1
         * 4 byte accesses, that need to be handled by Xen in order to
1779
1
         * keep consistency.
1780
1
         * Access to 1 byte RTC ports also needs to be trapped in order
1781
1
         * to keep consistency with PV.
1782
1
         */
1783
1
        __set_bit(0xcf8, d->arch.hvm_domain.io_bitmap);
1784
1
        __set_bit(RTC_PORT(0), d->arch.hvm_domain.io_bitmap);
1785
1
        __set_bit(RTC_PORT(1), d->arch.hvm_domain.io_bitmap);
1786
1
    }
1787
1
}
1788
1789
/*
1790
 * Local variables:
1791
 * mode: C
1792
 * c-file-style: "BSD"
1793
 * c-basic-offset: 4
1794
 * tab-width: 4
1795
 * indent-tabs-mode: nil
1796
 * End:
1797
 */