# HG changeset patch # User kaf24@scramble.cl.cam.ac.uk # Date 1107528517 0 # Node ID 5c112b235281b86515ff4de917794aaf8eaff23f # Parent 33d462bea5cda020049f29b2f4f90060babb872b bitkeeper revision 1.1159.212.85 (42038b45EjUo-1JiSCHXW0Wav4TZGQ) x86_64 progress: now entering ring 3. Need a hypercall (SYSCALL) entry point, and some kind of DOM0 image to test against. Signed-off-by: keir.fraser@cl.cam.ac.uk diff -r 33d462bea5cd -r 5c112b235281 xen/arch/x86/boot/x86_64.S --- a/xen/arch/x86/boot/x86_64.S Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/arch/x86/boot/x86_64.S Fri Feb 04 14:48:37 2005 +0000 @@ -189,7 +189,7 @@ 1: jmp 1b ENTRY(gdt_table) .fill FIRST_RESERVED_GDT_ENTRY,8,0 .quad 0x0000000000000000 /* unused */ - .quad 0x00cf9a000000ffff /* 0x0808 ring 0 code, compatability */ + .quad 0x00cf9a000000ffff /* 0x0808 ring 0 code, compatibility */ .quad 0x00af9a000000ffff /* 0x0810 ring 0 code, 64-bit mode */ .quad 0x00cf92000000ffff /* 0x0818 ring 0 data */ .quad 0x00cffa000000ffff /* 0x0823 ring 3 code, compatibility */ diff -r 33d462bea5cd -r 5c112b235281 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/arch/x86/domain.c Fri Feb 04 14:48:37 2005 +0000 @@ -491,13 +491,14 @@ void new_thread(struct exec_domain *d, /* * Initial register values: - * DS,ES,FS,GS = FLAT_RING1_DS - * CS:EIP = FLAT_RING1_CS:start_pc - * SS:ESP = FLAT_RING1_DS:start_stack + * DS,ES,FS,GS = FLAT_GUESTOS_DS + * CS:EIP = FLAT_GUESTOS_CS:start_pc + * SS:ESP = FLAT_GUESTOS_SS:start_stack * ESI = start_info * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ - ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_GUESTOS_DS; + ec->ds = ec->es = ec->fs = ec->gs = FLAT_GUESTOS_DS; + ec->ss = FLAT_GUESTOS_SS; ec->cs = FLAT_GUESTOS_CS; ec->eip = start_pc; ec->esp = start_stack; diff -r 33d462bea5cd -r 5c112b235281 xen/arch/x86/x86_32/domain_build.c --- a/xen/arch/x86/x86_32/domain_build.c Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/arch/x86/x86_32/domain_build.c Fri Feb 04 14:48:37 2005 +0000 @@ -218,7 +218,7 @@ int construct_dom0(struct domain *d, */ ed->thread.failsafe_selector = FLAT_GUESTOS_CS; ed->thread.event_selector = FLAT_GUESTOS_CS; - ed->thread.guestos_ss = FLAT_GUESTOS_DS; + ed->thread.guestos_ss = FLAT_GUESTOS_SS; for ( i = 0; i < 256; i++ ) ed->thread.traps[i].cs = FLAT_GUESTOS_CS; @@ -257,7 +257,6 @@ int construct_dom0(struct domain *d, l2tab = l2start + l2_table_offset(vpt_start); l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); l1tab += l1_table_offset(vpt_start); - l2tab++; for ( count = 0; count < nr_pt_pages; count++ ) { *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); @@ -294,9 +293,8 @@ int construct_dom0(struct domain *d, */ get_page(page, d); /* an extra ref because of readable mapping */ } - l1tab++; - if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) ) - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); + if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab); } /* Set up shared-info area. */ diff -r 33d462bea5cd -r 5c112b235281 xen/arch/x86/x86_64/domain_build.c --- a/xen/arch/x86/x86_64/domain_build.c Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/arch/x86/x86_64/domain_build.c Fri Feb 04 14:48:37 2005 +0000 @@ -42,7 +42,9 @@ int construct_dom0(struct domain *d, unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; unsigned long nr_pt_pages; unsigned long count; - l2_pgentry_t *l2tab, *l2start; + l4_pgentry_t *l4tab = NULL, *l4start = NULL; + l3_pgentry_t *l3tab = NULL, *l3start = NULL; + l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; struct pfn_info *page = NULL; start_info_t *si; @@ -128,8 +130,16 @@ int construct_dom0(struct domain *d, v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */ - if ( (((v_end - dsi.v_start + ((1UL<> - L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) +#define RD(_p,_s) ((_p) >> (_s)) /* round up */ +#define RU(_p,_s) (((_p) + ((1UL<<(_s))-1)) >> (_s)) /* round down */ + if ( (1 + /* # L4 */ + (RU(v_end, L4_PAGETABLE_SHIFT) - + RD(dsi.v_start, L4_PAGETABLE_SHIFT)) + /* # L3 */ + (RU(v_end, L3_PAGETABLE_SHIFT) - + RD(dsi.v_start, L3_PAGETABLE_SHIFT)) + /* # L2 */ + (RU(v_end, L2_PAGETABLE_SHIFT) - + RD(dsi.v_start, L2_PAGETABLE_SHIFT))) /* # L1 */ + <= nr_pt_pages ) break; } @@ -195,8 +205,8 @@ int construct_dom0(struct domain *d, printk("done.\n"); /* Construct a frame-allocation list for the initial domain. */ - for ( mfn = (alloc_start>>PAGE_SHIFT); - mfn < (alloc_end>>PAGE_SHIFT); + for ( mfn = (alloc_start>>PAGE_SHIFT); + mfn < (alloc_end>>PAGE_SHIFT); mfn++ ) { page = &frame_table[mfn]; @@ -218,85 +228,97 @@ int construct_dom0(struct domain *d, */ ed->thread.failsafe_selector = FLAT_GUESTOS_CS; ed->thread.event_selector = FLAT_GUESTOS_CS; - ed->thread.guestos_ss = FLAT_GUESTOS_DS; + ed->thread.guestos_ss = FLAT_GUESTOS_SS; for ( i = 0; i < 256; i++ ) ed->thread.traps[i].cs = FLAT_GUESTOS_CS; /* WARNING: The new domain must have its 'processor' field filled in! */ - l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; - memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); - l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); - l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(d->mm_perdomain_pt) | __PAGE_HYPERVISOR); - ed->mm.pagetable = mk_pagetable((unsigned long)l2start); + phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; + l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE); + l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = + mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR); + l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = + mk_l4_pgentry(__pa(d->mm_perdomain_pt) | __PAGE_HYPERVISOR); + ed->mm.pagetable = mk_pagetable(__pa(l4start)); - l2tab += l2_table_offset(dsi.v_start); + l4tab += l4_table_offset(dsi.v_start); mfn = alloc_start >> PAGE_SHIFT; for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) { if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) { - l1start = l1tab = (l1_pgentry_t *)mpt_alloc; - mpt_alloc += PAGE_SIZE; - *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); + phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table; + l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; clear_page(l1tab); if ( count == 0 ) l1tab += l1_table_offset(dsi.v_start); + if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) ) + { + phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; + l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + clear_page(l2tab); + if ( count == 0 ) + l2tab += l2_table_offset(dsi.v_start); + if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) ) + { + phys_to_page(mpt_alloc)->u.inuse.type_info = + PGT_l3_page_table; + l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + clear_page(l3tab); + if ( count == 0 ) + l3tab += l3_table_offset(dsi.v_start); + *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT); + } + *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT); + } + *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT); } *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); - + page = &frame_table[mfn]; - if ( !get_page_and_type(page, d, PGT_writable_page) ) + if ( (page->u.inuse.type_info == 0) && + !get_page_and_type(page, d, PGT_writable_page) ) BUG(); mfn++; } /* Pages that are part of page tables must be read only. */ - l2tab = l2start + l2_table_offset(vpt_start); - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); + l4tab = l4start + l4_table_offset(vpt_start); + l3start = l3tab = l4_pgentry_to_l3(*l4tab); + l3tab += l3_table_offset(vpt_start); + l2start = l2tab = l3_pgentry_to_l2(*l3tab); + l2tab += l2_table_offset(vpt_start); + l1start = l1tab = l2_pgentry_to_l1(*l2tab); l1tab += l1_table_offset(vpt_start); - l2tab++; for ( count = 0; count < nr_pt_pages; count++ ) { *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); page = &frame_table[l1_pgentry_to_pagenr(*l1tab)]; - if ( count == 0 ) - { - page->u.inuse.type_info &= ~PGT_type_mask; - page->u.inuse.type_info |= PGT_l2_page_table; + + /* Read-only mapping + PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 3; + page->u.inuse.type_info |= PGT_validated | 1; - /* - * No longer writable: decrement the type_count. - * Installed as CR3: increment both the ref_count and type_count. - * Net: just increment the ref_count. - */ - get_page(page, d); /* an extra ref because of readable mapping */ - - /* Get another ref to L2 page so that it can be pinned. */ - if ( !get_page_and_type(page, d, PGT_l2_page_table) ) - BUG(); - set_bit(_PGT_pinned, &page->u.inuse.type_info); + /* Top-level p.t. is pinned. */ + if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table ) + { + page->count_info += 1; + page->u.inuse.type_info += 1 | PGT_pinned; } - else + + /* Iterate. */ + if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) { - page->u.inuse.type_info &= ~PGT_type_mask; - page->u.inuse.type_info |= PGT_l1_page_table; - page->u.inuse.type_info |= - ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<mm); __sti(); - /* Destroy low mappings - they were only for our convenience. */ - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) - l2start[i] = mk_l2_pgentry(0); - zap_low_mappings(); /* Do the same for the idle page tables. */ - /* DOM0 gets access to everything. */ physdev_init_dom0(d); @@ -378,12 +394,6 @@ int construct_dom0(struct domain *d, new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); -#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ - shadow_lock(&d->mm); - shadow_mode_enable(p, SHM_test); - shadow_unlock(&d->mm); -#endif - return 0; } diff -r 33d462bea5cd -r 5c112b235281 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/arch/x86/x86_64/mm.c Fri Feb 04 14:48:37 2005 +0000 @@ -118,8 +118,8 @@ void __init paging_init(void) unsigned long i, p, max; /* Map all of physical memory. */ - max = (max_page + (1UL << L2_PAGETABLE_SHIFT) - 1UL) & - ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); + max = ((max_page + ENTRIES_PER_L1_PAGETABLE - 1) & + ~(ENTRIES_PER_L1_PAGETABLE - 1)) << PAGE_SHIFT; map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR); /* diff -r 33d462bea5cd -r 5c112b235281 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/include/public/arch-x86_32.h Fri Feb 04 14:48:37 2005 +0000 @@ -49,13 +49,17 @@ */ #define FLAT_RING1_CS 0x0819 /* GDT index 259 */ #define FLAT_RING1_DS 0x0821 /* GDT index 260 */ +#define FLAT_RING1_SS 0x0821 /* GDT index 260 */ #define FLAT_RING3_CS 0x082b /* GDT index 261 */ #define FLAT_RING3_DS 0x0833 /* GDT index 262 */ +#define FLAT_RING3_SS 0x0833 /* GDT index 262 */ #define FLAT_GUESTOS_CS FLAT_RING1_CS #define FLAT_GUESTOS_DS FLAT_RING1_DS +#define FLAT_GUESTOS_SS FLAT_RING1_SS #define FLAT_USER_CS FLAT_RING3_CS #define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" diff -r 33d462bea5cd -r 5c112b235281 xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Fri Feb 04 12:58:34 2005 +0000 +++ b/xen/include/public/arch-x86_64.h Fri Feb 04 14:48:37 2005 +0000 @@ -45,7 +45,9 @@ #define FLAT_RING3_CS32 0x0823 /* GDT index 260 */ #define FLAT_RING3_CS64 0x082b /* GDT index 261 */ #define FLAT_RING3_DS32 0x0833 /* GDT index 262 */ -#define FLAT_RING3_DS64 0x0000 +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0x0833 /* GDT index 262 */ +#define FLAT_RING3_SS64 0x0833 /* GDT index 262 */ #define FLAT_GUESTOS_DS64 FLAT_RING3_DS64 #define FLAT_GUESTOS_DS32 FLAT_RING3_DS32 @@ -53,6 +55,9 @@ #define FLAT_GUESTOS_CS64 FLAT_RING3_CS64 #define FLAT_GUESTOS_CS32 FLAT_RING3_CS32 #define FLAT_GUESTOS_CS FLAT_GUESTOS_CS64 +#define FLAT_GUESTOS_SS64 FLAT_RING3_SS64 +#define FLAT_GUESTOS_SS32 FLAT_RING3_SS32 +#define FLAT_GUESTOS_SS FLAT_GUESTOS_SS64 #define FLAT_USER_DS64 FLAT_RING3_DS64 #define FLAT_USER_DS32 FLAT_RING3_DS32 @@ -60,13 +65,21 @@ #define FLAT_USER_CS64 FLAT_RING3_CS64 #define FLAT_USER_CS32 FLAT_RING3_CS32 #define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 /* And the trap vector is... */ #define TRAP_INSTR "syscall" +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL) +#define HYPERVISOR_VIRT_END (0xFFFF880000000000UL) +#endif + /* The machine->physical mapping table starts at this address, read-only. */ #ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)0xffff810000000000ULL) +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) #endif #ifndef __ASSEMBLY__