debuggers.hg
changeset 3669:677cb76cff18
bitkeeper revision 1.1159.212.78 (4202391ehUS0T4TJglUpPqBH3oGjNQ)
Move domain builder to be subarch-specific. Fix pfn_info structure and
page reference-counting to be 64-bit clean.
Signed-off-by: keir.fraser@cl.cam.ac.uk
Move domain builder to be subarch-specific. Fix pfn_info structure and
page reference-counting to be 64-bit clean.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Thu Feb 03 14:45:50 2005 +0000 (2005-02-03) |
parents | d55d523078f7 |
children | 1c43dbcfc46f |
files | .rootkeys xen/arch/x86/domain.c xen/arch/x86/memory.c xen/arch/x86/shadow.c xen/arch/x86/x86_32/domain_build.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/domain_build.c xen/arch/x86/x86_64/mm.c xen/common/page_alloc.c xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h |
line diff
1.1 --- a/.rootkeys Thu Feb 03 13:07:34 2005 +0000 1.2 +++ b/.rootkeys Thu Feb 03 14:45:50 2005 +0000 1.3 @@ -897,6 +897,7 @@ 41c0c411ODt8uEmV-yUxpQLpqimE5Q xen/arch/ 1.4 41f97ef5139vN42cOYHfX_Ac8WOOjA xen/arch/x86/vmx_platform.c 1.5 41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c 1.6 419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c 1.7 +4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c 1.8 3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c 1.9 3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S 1.10 3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c 1.11 @@ -905,6 +906,7 @@ 42000d3ckiFc1qxa4AWqsd0t3lxuyw xen/arch/ 1.12 3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c 1.13 3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds 1.14 41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c 1.15 +4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c 1.16 40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S 1.17 41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c 1.18 42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
2.1 --- a/xen/arch/x86/domain.c Thu Feb 03 13:07:34 2005 +0000 2.2 +++ b/xen/arch/x86/domain.c Thu Feb 03 14:45:50 2005 +0000 2.3 @@ -43,20 +43,6 @@ 2.4 static int opt_noreboot = 0; 2.5 boolean_param("noreboot", opt_noreboot); 2.6 2.7 -#if !defined(CONFIG_X86_64BITMODE) 2.8 -/* No ring-3 access in initial page tables. */ 2.9 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) 2.10 -#else 2.11 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */ 2.12 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 2.13 -#endif 2.14 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 2.15 -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 2.16 -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 2.17 - 2.18 -#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 2.19 -#define round_pgdown(_p) ((_p)&PAGE_MASK) 2.20 - 2.21 static void default_idle(void) 2.22 { 2.23 __cli(); 2.24 @@ -795,364 +781,3 @@ void domain_relinquish_memory(struct dom 2.25 relinquish_list(d, &d->page_list); 2.26 } 2.27 2.28 - 2.29 -int construct_dom0(struct domain *p, 2.30 - unsigned long alloc_start, 2.31 - unsigned long alloc_end, 2.32 - char *image_start, unsigned long image_len, 2.33 - char *initrd_start, unsigned long initrd_len, 2.34 - char *cmdline) 2.35 -{ 2.36 - char *dst; 2.37 - int i, rc; 2.38 - unsigned long pfn, mfn; 2.39 - unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; 2.40 - unsigned long nr_pt_pages; 2.41 - unsigned long count; 2.42 - l2_pgentry_t *l2tab, *l2start; 2.43 - l1_pgentry_t *l1tab = NULL, *l1start = NULL; 2.44 - struct pfn_info *page = NULL; 2.45 - start_info_t *si; 2.46 - struct exec_domain *ed = p->exec_domain[0]; 2.47 - 2.48 - /* 2.49 - * This fully describes the memory layout of the initial domain. All 2.50 - * *_start address are page-aligned, except v_start (and v_end) which are 2.51 - * superpage-aligned. 2.52 - */ 2.53 - struct domain_setup_info dsi; 2.54 - unsigned long vinitrd_start; 2.55 - unsigned long vinitrd_end; 2.56 - unsigned long vphysmap_start; 2.57 - unsigned long vphysmap_end; 2.58 - unsigned long vstartinfo_start; 2.59 - unsigned long vstartinfo_end; 2.60 - unsigned long vstack_start; 2.61 - unsigned long vstack_end; 2.62 - unsigned long vpt_start; 2.63 - unsigned long vpt_end; 2.64 - unsigned long v_end; 2.65 - 2.66 - /* Machine address of next candidate page-table page. */ 2.67 - unsigned long mpt_alloc; 2.68 - 2.69 - extern void physdev_init_dom0(struct domain *); 2.70 - 2.71 - /* Sanity! */ 2.72 - if ( p->id != 0 ) 2.73 - BUG(); 2.74 - if ( test_bit(DF_CONSTRUCTED, &p->d_flags) ) 2.75 - BUG(); 2.76 - 2.77 - memset(&dsi, 0, sizeof(struct domain_setup_info)); 2.78 - 2.79 - printk("*** LOADING DOMAIN 0 ***\n"); 2.80 - 2.81 - /* 2.82 - * This is all a bit grim. We've moved the modules to the "safe" physical 2.83 - * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 2.84 - * routine we're going to copy it down into the region that's actually 2.85 - * been allocated to domain 0. This is highly likely to be overlapping, so 2.86 - * we use a forward copy. 2.87 - * 2.88 - * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 2.89 - * 4GB and lots of network/disk cards that allocate loads of buffers. 2.90 - * We'll have to revisit this if we ever support PAE (64GB). 2.91 - */ 2.92 - 2.93 - rc = parseelfimage(image_start, image_len, &dsi); 2.94 - if ( rc != 0 ) 2.95 - return rc; 2.96 - 2.97 - /* Set up domain options */ 2.98 - if ( dsi.use_writable_pagetables ) 2.99 - vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 2.100 - 2.101 - if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) 2.102 - { 2.103 - printk("Initial guest OS must load to a page boundary.\n"); 2.104 - return -EINVAL; 2.105 - } 2.106 - 2.107 - /* 2.108 - * Why do we need this? The number of page-table frames depends on the 2.109 - * size of the bootstrap address space. But the size of the address space 2.110 - * depends on the number of page-table frames (since each one is mapped 2.111 - * read-only). We have a pair of simultaneous equations in two unknowns, 2.112 - * which we solve by exhaustive search. 2.113 - */ 2.114 - vinitrd_start = round_pgup(dsi.v_kernend); 2.115 - vinitrd_end = vinitrd_start + initrd_len; 2.116 - vphysmap_start = round_pgup(vinitrd_end); 2.117 - vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); 2.118 - vpt_start = round_pgup(vphysmap_end); 2.119 - for ( nr_pt_pages = 2; ; nr_pt_pages++ ) 2.120 - { 2.121 - vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 2.122 - vstartinfo_start = vpt_end; 2.123 - vstartinfo_end = vstartinfo_start + PAGE_SIZE; 2.124 - vstack_start = vstartinfo_end; 2.125 - vstack_end = vstack_start + PAGE_SIZE; 2.126 - v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1); 2.127 - if ( (v_end - vstack_end) < (512 << 10) ) 2.128 - v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */ 2.129 - if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 2.130 - L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) 2.131 - break; 2.132 - } 2.133 - 2.134 - printk("PHYSICAL MEMORY ARRANGEMENT:\n" 2.135 - " Kernel image: %p->%p\n" 2.136 - " Initrd image: %p->%p\n" 2.137 - " Dom0 alloc.: %08lx->%08lx\n", 2.138 - image_start, image_start + image_len, 2.139 - initrd_start, initrd_start + initrd_len, 2.140 - alloc_start, alloc_end); 2.141 - printk("VIRTUAL MEMORY ARRANGEMENT:\n" 2.142 - " Loaded kernel: %08lx->%08lx\n" 2.143 - " Init. ramdisk: %08lx->%08lx\n" 2.144 - " Phys-Mach map: %08lx->%08lx\n" 2.145 - " Page tables: %08lx->%08lx\n" 2.146 - " Start info: %08lx->%08lx\n" 2.147 - " Boot stack: %08lx->%08lx\n" 2.148 - " TOTAL: %08lx->%08lx\n", 2.149 - dsi.v_kernstart, dsi.v_kernend, 2.150 - vinitrd_start, vinitrd_end, 2.151 - vphysmap_start, vphysmap_end, 2.152 - vpt_start, vpt_end, 2.153 - vstartinfo_start, vstartinfo_end, 2.154 - vstack_start, vstack_end, 2.155 - dsi.v_start, v_end); 2.156 - printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); 2.157 - 2.158 - if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) 2.159 - { 2.160 - printk("Initial guest OS requires too much space\n" 2.161 - "(%luMB is greater than %luMB limit)\n", 2.162 - (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); 2.163 - return -ENOMEM; 2.164 - } 2.165 - 2.166 - /* 2.167 - * Protect the lowest 1GB of memory. We use a temporary mapping there 2.168 - * from which we copy the kernel and ramdisk images. 2.169 - */ 2.170 - if ( dsi.v_start < (1<<30) ) 2.171 - { 2.172 - printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); 2.173 - return -EINVAL; 2.174 - } 2.175 - 2.176 - /* Paranoia: scrub DOM0's memory allocation. */ 2.177 - printk("Scrubbing DOM0 RAM: "); 2.178 - dst = (char *)alloc_start; 2.179 - while ( dst < (char *)alloc_end ) 2.180 - { 2.181 -#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ 2.182 - printk("."); 2.183 - touch_nmi_watchdog(); 2.184 - if ( ((char *)alloc_end - dst) > SCRUB_BYTES ) 2.185 - { 2.186 - memset(dst, 0, SCRUB_BYTES); 2.187 - dst += SCRUB_BYTES; 2.188 - } 2.189 - else 2.190 - { 2.191 - memset(dst, 0, (char *)alloc_end - dst); 2.192 - break; 2.193 - } 2.194 - } 2.195 - printk("done.\n"); 2.196 - 2.197 - /* Construct a frame-allocation list for the initial domain. */ 2.198 - for ( mfn = (alloc_start>>PAGE_SHIFT); 2.199 - mfn < (alloc_end>>PAGE_SHIFT); 2.200 - mfn++ ) 2.201 - { 2.202 - page = &frame_table[mfn]; 2.203 - page->u.inuse.domain = p; 2.204 - page->u.inuse.type_info = 0; 2.205 - page->count_info = PGC_allocated | 1; 2.206 - list_add_tail(&page->list, &p->page_list); 2.207 - p->tot_pages++; p->max_pages++; 2.208 - } 2.209 - 2.210 - mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; 2.211 - 2.212 - SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); 2.213 - SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); 2.214 - 2.215 - /* 2.216 - * We're basically forcing default RPLs to 1, so that our "what privilege 2.217 - * level are we returning to?" logic works. 2.218 - */ 2.219 - ed->thread.failsafe_selector = FLAT_GUESTOS_CS; 2.220 - ed->thread.event_selector = FLAT_GUESTOS_CS; 2.221 - ed->thread.guestos_ss = FLAT_GUESTOS_DS; 2.222 - for ( i = 0; i < 256; i++ ) 2.223 - ed->thread.traps[i].cs = FLAT_GUESTOS_CS; 2.224 - 2.225 - /* WARNING: The new domain must have its 'processor' field filled in! */ 2.226 - l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; 2.227 - memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); 2.228 - l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 2.229 - mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); 2.230 - l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 2.231 - mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR); 2.232 - ed->mm.pagetable = mk_pagetable((unsigned long)l2start); 2.233 - 2.234 - l2tab += l2_table_offset(dsi.v_start); 2.235 - mfn = alloc_start >> PAGE_SHIFT; 2.236 - for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 2.237 - { 2.238 - if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 2.239 - { 2.240 - l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 2.241 - mpt_alloc += PAGE_SIZE; 2.242 - *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); 2.243 - clear_page(l1tab); 2.244 - if ( count == 0 ) 2.245 - l1tab += l1_table_offset(dsi.v_start); 2.246 - } 2.247 - *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 2.248 - 2.249 - page = &frame_table[mfn]; 2.250 - if ( !get_page_and_type(page, p, PGT_writable_page) ) 2.251 - BUG(); 2.252 - 2.253 - mfn++; 2.254 - } 2.255 - 2.256 - /* Pages that are part of page tables must be read only. */ 2.257 - l2tab = l2start + l2_table_offset(vpt_start); 2.258 - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 2.259 - l1tab += l1_table_offset(vpt_start); 2.260 - l2tab++; 2.261 - for ( count = 0; count < nr_pt_pages; count++ ) 2.262 - { 2.263 - *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 2.264 - page = &frame_table[l1_pgentry_to_pagenr(*l1tab)]; 2.265 - if ( count == 0 ) 2.266 - { 2.267 - page->u.inuse.type_info &= ~PGT_type_mask; 2.268 - page->u.inuse.type_info |= PGT_l2_page_table; 2.269 - 2.270 - /* 2.271 - * No longer writable: decrement the type_count. 2.272 - * Installed as CR3: increment both the ref_count and type_count. 2.273 - * Net: just increment the ref_count. 2.274 - */ 2.275 - get_page(page, p); /* an extra ref because of readable mapping */ 2.276 - 2.277 - /* Get another ref to L2 page so that it can be pinned. */ 2.278 - if ( !get_page_and_type(page, p, PGT_l2_page_table) ) 2.279 - BUG(); 2.280 - set_bit(_PGT_pinned, &page->u.inuse.type_info); 2.281 - } 2.282 - else 2.283 - { 2.284 - page->u.inuse.type_info &= ~PGT_type_mask; 2.285 - page->u.inuse.type_info |= PGT_l1_page_table; 2.286 - page->u.inuse.type_info |= 2.287 - ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift; 2.288 - 2.289 - /* 2.290 - * No longer writable: decrement the type_count. 2.291 - * This is an L1 page, installed in a validated L2 page: 2.292 - * increment both the ref_count and type_count. 2.293 - * Net: just increment the ref_count. 2.294 - */ 2.295 - get_page(page, p); /* an extra ref because of readable mapping */ 2.296 - } 2.297 - l1tab++; 2.298 - if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) ) 2.299 - l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 2.300 - } 2.301 - 2.302 - /* Set up shared-info area. */ 2.303 - update_dom_time(p); 2.304 - p->shared_info->domain_time = 0; 2.305 - /* Mask all upcalls... */ 2.306 - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 2.307 - p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; 2.308 - p->shared_info->n_vcpu = smp_num_cpus; 2.309 - 2.310 - /* Install the new page tables. */ 2.311 - __cli(); 2.312 - write_ptbase(&ed->mm); 2.313 - 2.314 - /* Copy the OS image. */ 2.315 - (void)loadelfimage(image_start); 2.316 - 2.317 - /* Copy the initial ramdisk. */ 2.318 - if ( initrd_len != 0 ) 2.319 - memcpy((void *)vinitrd_start, initrd_start, initrd_len); 2.320 - 2.321 - /* Set up start info area. */ 2.322 - si = (start_info_t *)vstartinfo_start; 2.323 - memset(si, 0, PAGE_SIZE); 2.324 - si->nr_pages = p->tot_pages; 2.325 - si->shared_info = virt_to_phys(p->shared_info); 2.326 - si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; 2.327 - si->pt_base = vpt_start; 2.328 - si->nr_pt_frames = nr_pt_pages; 2.329 - si->mfn_list = vphysmap_start; 2.330 - 2.331 - /* Write the phys->machine and machine->phys table entries. */ 2.332 - for ( pfn = 0; pfn < p->tot_pages; pfn++ ) 2.333 - { 2.334 - mfn = pfn + (alloc_start>>PAGE_SHIFT); 2.335 -#ifndef NDEBUG 2.336 -#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) 2.337 - if ( pfn > REVERSE_START ) 2.338 - mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); 2.339 -#endif 2.340 - ((unsigned long *)vphysmap_start)[pfn] = mfn; 2.341 - machine_to_phys_mapping[mfn] = pfn; 2.342 - } 2.343 - 2.344 - if ( initrd_len != 0 ) 2.345 - { 2.346 - si->mod_start = vinitrd_start; 2.347 - si->mod_len = initrd_len; 2.348 - printk("Initrd len 0x%lx, start at 0x%08lx\n", 2.349 - si->mod_len, si->mod_start); 2.350 - } 2.351 - 2.352 - dst = si->cmd_line; 2.353 - if ( cmdline != NULL ) 2.354 - { 2.355 - for ( i = 0; i < 255; i++ ) 2.356 - { 2.357 - if ( cmdline[i] == '\0' ) 2.358 - break; 2.359 - *dst++ = cmdline[i]; 2.360 - } 2.361 - } 2.362 - *dst = '\0'; 2.363 - 2.364 - /* Reinstate the caller's page tables. */ 2.365 - write_ptbase(¤t->mm); 2.366 - __sti(); 2.367 - 2.368 - /* Destroy low mappings - they were only for our convenience. */ 2.369 - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 2.370 - if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) 2.371 - l2start[i] = mk_l2_pgentry(0); 2.372 - zap_low_mappings(); /* Do the same for the idle page tables. */ 2.373 - 2.374 - /* DOM0 gets access to everything. */ 2.375 - physdev_init_dom0(p); 2.376 - 2.377 - set_bit(DF_CONSTRUCTED, &p->d_flags); 2.378 - 2.379 - new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 2.380 - 2.381 -#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ 2.382 - shadow_lock(&p->mm); 2.383 - shadow_mode_enable(p, SHM_test); 2.384 - shadow_unlock(&p->mm); 2.385 -#endif 2.386 - 2.387 - return 0; 2.388 -}
3.1 --- a/xen/arch/x86/memory.c Thu Feb 03 13:07:34 2005 +0000 3.2 +++ b/xen/arch/x86/memory.c Thu Feb 03 14:45:50 2005 +0000 3.3 @@ -444,7 +444,7 @@ static void put_page_from_l1e(l1_pgentry 3.4 if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) ) 3.5 return; 3.6 3.7 - e = page->u.inuse.domain; 3.8 + e = page_get_owner(page); 3.9 if ( unlikely(e != d) ) 3.10 { 3.11 /* 3.12 @@ -493,7 +493,7 @@ static void put_page_from_l2e(l2_pgentry 3.13 3.14 static int alloc_l2_table(struct pfn_info *page) 3.15 { 3.16 - struct domain *d = page->u.inuse.domain; 3.17 + struct domain *d = page_get_owner(page); 3.18 unsigned long page_nr = page_to_pfn(page); 3.19 l2_pgentry_t *pl2e; 3.20 int i; 3.21 @@ -512,7 +512,7 @@ static int alloc_l2_table(struct pfn_inf 3.22 pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 3.23 mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR); 3.24 pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 3.25 - mk_l2_pgentry(__pa(page->u.inuse.domain->mm_perdomain_pt) | 3.26 + mk_l2_pgentry(__pa(page_get_owner(page)->mm_perdomain_pt) | 3.27 __PAGE_HYPERVISOR); 3.28 #endif 3.29 3.30 @@ -530,7 +530,7 @@ static int alloc_l2_table(struct pfn_inf 3.31 3.32 static int alloc_l1_table(struct pfn_info *page) 3.33 { 3.34 - struct domain *d = page->u.inuse.domain; 3.35 + struct domain *d = page_get_owner(page); 3.36 unsigned long page_nr = page_to_pfn(page); 3.37 l1_pgentry_t *pl1e; 3.38 int i; 3.39 @@ -570,7 +570,7 @@ static void free_l2_table(struct pfn_inf 3.40 3.41 static void free_l1_table(struct pfn_info *page) 3.42 { 3.43 - struct domain *d = page->u.inuse.domain; 3.44 + struct domain *d = page_get_owner(page); 3.45 unsigned long page_nr = page - frame_table; 3.46 l1_pgentry_t *pl1e; 3.47 int i; 3.48 @@ -731,7 +731,7 @@ int alloc_page_type(struct pfn_info *pag 3.49 3.50 void free_page_type(struct pfn_info *page, unsigned int type) 3.51 { 3.52 - struct domain *d = page->u.inuse.domain; 3.53 + struct domain *d = page_get_owner(page); 3.54 3.55 switch ( type ) 3.56 { 3.57 @@ -774,7 +774,7 @@ void put_page_type(struct pfn_info *page 3.58 * See domain.c:relinquish_list(). 3.59 */ 3.60 ASSERT((x & PGT_validated) || 3.61 - test_bit(DF_DYING, &page->u.inuse.domain->d_flags)); 3.62 + test_bit(DF_DYING, &page_get_owner(page)->d_flags)); 3.63 3.64 if ( unlikely((nx & PGT_count_mask) == 0) ) 3.65 { 3.66 @@ -832,7 +832,7 @@ int get_page_type(struct pfn_info *page, 3.67 * may be unnecessary (e.g., page was GDT/LDT) but those 3.68 * circumstances should be very rare. 3.69 */ 3.70 - struct domain *d = page->u.inuse.domain; 3.71 + struct domain *d = page_get_owner(page); 3.72 if ( unlikely(NEED_FLUSH(tlbflush_time[d->exec_domain[0]->processor], 3.73 page->tlbflush_timestamp)) ) 3.74 { 3.75 @@ -987,7 +987,7 @@ static int do_extended_command(unsigned 3.76 if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) ) 3.77 { 3.78 MEM_LOG("Page %08lx bad domain (dom=%p)", 3.79 - ptr, page->u.inuse.domain); 3.80 + ptr, page_get_owner(page)); 3.81 } 3.82 else if ( likely(test_and_clear_bit(_PGT_pinned, 3.83 &page->u.inuse.type_info)) ) 3.84 @@ -1117,7 +1117,7 @@ static int do_extended_command(unsigned 3.85 * benign reference to the page (PGC_allocated). If that reference 3.86 * disappears then the deallocation routine will safely spin. 3.87 */ 3.88 - nd = page->u.inuse.domain; 3.89 + nd = page_get_owner(page); 3.90 y = page->count_info; 3.91 do { 3.92 x = y; 3.93 @@ -1173,7 +1173,7 @@ static int do_extended_command(unsigned 3.94 if ( unlikely(e->tot_pages++ == 0) ) 3.95 get_knownalive_domain(e); 3.96 list_add_tail(&page->list, &e->page_list); 3.97 - page->u.inuse.domain = e; 3.98 + page_set_owner(page, e); 3.99 3.100 spin_unlock(&e->page_alloc_lock); 3.101 3.102 @@ -1229,7 +1229,7 @@ static int do_extended_command(unsigned 3.103 * benign reference to the page (PGC_allocated). If that reference 3.104 * disappears then the deallocation routine will safely spin. 3.105 */ 3.106 - nd = page->u.inuse.domain; 3.107 + nd = page_get_owner(page); 3.108 y = page->count_info; 3.109 do { 3.110 x = y; 3.111 @@ -2072,7 +2072,7 @@ void audit_domain(struct domain *d) 3.112 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; 3.113 page = &frame_table[pfn]; 3.114 3.115 - if ( page->u.inuse.domain != d ) 3.116 + if ( page_get_owner(page) != d ) 3.117 BUG(); 3.118 3.119 if ( (page->u.inuse.type_info & PGT_count_mask) > 3.120 @@ -2118,7 +2118,7 @@ void audit_domain(struct domain *d) 3.121 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; 3.122 page = &frame_table[pfn]; 3.123 3.124 - if ( page->u.inuse.domain != d ) 3.125 + if ( page_get_owner(page) != d ) 3.126 BUG(); 3.127 3.128 switch ( page->u.inuse.type_info & PGT_type_mask ) 3.129 @@ -2144,10 +2144,10 @@ void audit_domain(struct domain *d) 3.130 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; 3.131 struct pfn_info *l1page = &frame_table[l1pfn]; 3.132 3.133 - if ( l1page->u.inuse.domain != d ) 3.134 + if ( page_get_owner(l1page) != d ) 3.135 { 3.136 printk("L2: Skip bizarre page belonging to other " 3.137 - "dom %p\n", l1page->u.inuse.domain); 3.138 + "dom %p\n", page_get_owner(l1page)); 3.139 continue; 3.140 } 3.141 3.142 @@ -2222,12 +2222,12 @@ void audit_domain(struct domain *d) 3.143 3.144 } 3.145 3.146 - if ( l1page->u.inuse.domain != d ) 3.147 + if ( page_get_owner(l1page) != d ) 3.148 { 3.149 - printk("Audit %d: [%lx,%x] Skip foreign page dom=%lx " 3.150 + printk("Audit %d: [%lx,%x] Skip foreign page dom=%p " 3.151 "pfn=%lx c=%08x t=%08x m2p=%lx\n", 3.152 d->id, pfn, i, 3.153 - (unsigned long)l1page->u.inuse.domain, 3.154 + page_get_owner(l1page), 3.155 l1pfn, 3.156 l1page->count_info, 3.157 l1page->u.inuse.type_info, 3.158 @@ -2312,7 +2312,7 @@ void audit_domain(struct domain *d) 3.159 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; 3.160 struct pfn_info *l1page = &frame_table[l1pfn]; 3.161 3.162 - if ( l1page->u.inuse.domain == d) 3.163 + if ( page_get_owner(l1page) == d ) 3.164 adjust(l1page, 1, 1); 3.165 } 3.166 } 3.167 @@ -2333,7 +2333,7 @@ void audit_domain(struct domain *d) 3.168 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; 3.169 struct pfn_info *l1page = &frame_table[l1pfn]; 3.170 3.171 - if ( (l1page->u.inuse.domain != d) || 3.172 + if ( (page_get_owner(l1page) != d) || 3.173 (l1pfn < 0x100) || (l1pfn > max_page) ) 3.174 continue; 3.175
4.1 --- a/xen/arch/x86/shadow.c Thu Feb 03 13:07:34 2005 +0000 4.2 +++ b/xen/arch/x86/shadow.c Thu Feb 03 14:45:50 2005 +0000 4.3 @@ -420,7 +420,7 @@ static inline struct pfn_info *alloc_sha 4.4 void unshadow_table(unsigned long gpfn, unsigned int type) 4.5 { 4.6 unsigned long spfn; 4.7 - struct domain *d = frame_table[gpfn].u.inuse.domain; 4.8 + struct domain *d = page_get_owner(&frame_table[gpfn]); 4.9 4.10 SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn); 4.11 4.12 @@ -494,7 +494,7 @@ unsigned long shadow_l2_table( 4.13 spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 4.14 mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); 4.15 spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 4.16 - mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm_perdomain_pt) | 4.17 + mk_l2_pgentry(__pa(page_get_owner(&frame_table[gpfn])->mm_perdomain_pt) | 4.18 __PAGE_HYPERVISOR); 4.19 } 4.20 #endif 4.21 @@ -924,7 +924,7 @@ int check_pagetable(struct mm_struct *m, 4.22 4.23 if (m->shadow_mode != SHM_full_32) { 4.24 if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) != 4.25 - ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | 4.26 + ((__pa(page_get_owner(&frame_table[gpfn])->mm.perdomain_pt) | 4.27 __PAGE_HYPERVISOR))) ) 4.28 FAILPT("hypervisor per-domain map inconsistent"); 4.29 }
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/xen/arch/x86/x86_32/domain_build.c Thu Feb 03 14:45:50 2005 +0000 5.3 @@ -0,0 +1,389 @@ 5.4 +/****************************************************************************** 5.5 + * domain_build.c 5.6 + * 5.7 + * Copyright (c) 2002-2005, K A Fraser 5.8 + */ 5.9 + 5.10 +#include <xen/config.h> 5.11 +#include <xen/init.h> 5.12 +#include <xen/lib.h> 5.13 +#include <xen/sched.h> 5.14 +#include <xen/smp.h> 5.15 +#include <xen/delay.h> 5.16 +#include <asm/regs.h> 5.17 +#include <asm/system.h> 5.18 +#include <asm/io.h> 5.19 +#include <asm/processor.h> 5.20 +#include <asm/desc.h> 5.21 +#include <asm/i387.h> 5.22 +#include <xen/event.h> 5.23 +#include <xen/elf.h> 5.24 +#include <xen/kernel.h> 5.25 + 5.26 +/* No ring-3 access in initial page tables. */ 5.27 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) 5.28 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 5.29 + 5.30 +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 5.31 +#define round_pgdown(_p) ((_p)&PAGE_MASK) 5.32 + 5.33 +int construct_dom0(struct domain *p, 5.34 + unsigned long alloc_start, 5.35 + unsigned long alloc_end, 5.36 + char *image_start, unsigned long image_len, 5.37 + char *initrd_start, unsigned long initrd_len, 5.38 + char *cmdline) 5.39 +{ 5.40 + char *dst; 5.41 + int i, rc; 5.42 + unsigned long pfn, mfn; 5.43 + unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; 5.44 + unsigned long nr_pt_pages; 5.45 + unsigned long count; 5.46 + l2_pgentry_t *l2tab, *l2start; 5.47 + l1_pgentry_t *l1tab = NULL, *l1start = NULL; 5.48 + struct pfn_info *page = NULL; 5.49 + start_info_t *si; 5.50 + struct exec_domain *ed = p->exec_domain[0]; 5.51 + 5.52 + /* 5.53 + * This fully describes the memory layout of the initial domain. All 5.54 + * *_start address are page-aligned, except v_start (and v_end) which are 5.55 + * superpage-aligned. 5.56 + */ 5.57 + struct domain_setup_info dsi; 5.58 + unsigned long vinitrd_start; 5.59 + unsigned long vinitrd_end; 5.60 + unsigned long vphysmap_start; 5.61 + unsigned long vphysmap_end; 5.62 + unsigned long vstartinfo_start; 5.63 + unsigned long vstartinfo_end; 5.64 + unsigned long vstack_start; 5.65 + unsigned long vstack_end; 5.66 + unsigned long vpt_start; 5.67 + unsigned long vpt_end; 5.68 + unsigned long v_end; 5.69 + 5.70 + /* Machine address of next candidate page-table page. */ 5.71 + unsigned long mpt_alloc; 5.72 + 5.73 + extern void physdev_init_dom0(struct domain *); 5.74 + 5.75 + /* Sanity! */ 5.76 + if ( p->id != 0 ) 5.77 + BUG(); 5.78 + if ( test_bit(DF_CONSTRUCTED, &p->d_flags) ) 5.79 + BUG(); 5.80 + 5.81 + memset(&dsi, 0, sizeof(struct domain_setup_info)); 5.82 + 5.83 + printk("*** LOADING DOMAIN 0 ***\n"); 5.84 + 5.85 + /* 5.86 + * This is all a bit grim. We've moved the modules to the "safe" physical 5.87 + * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 5.88 + * routine we're going to copy it down into the region that's actually 5.89 + * been allocated to domain 0. This is highly likely to be overlapping, so 5.90 + * we use a forward copy. 5.91 + * 5.92 + * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 5.93 + * 4GB and lots of network/disk cards that allocate loads of buffers. 5.94 + * We'll have to revisit this if we ever support PAE (64GB). 5.95 + */ 5.96 + 5.97 + rc = parseelfimage(image_start, image_len, &dsi); 5.98 + if ( rc != 0 ) 5.99 + return rc; 5.100 + 5.101 + /* Set up domain options */ 5.102 + if ( dsi.use_writable_pagetables ) 5.103 + vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 5.104 + 5.105 + if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) 5.106 + { 5.107 + printk("Initial guest OS must load to a page boundary.\n"); 5.108 + return -EINVAL; 5.109 + } 5.110 + 5.111 + /* 5.112 + * Why do we need this? The number of page-table frames depends on the 5.113 + * size of the bootstrap address space. But the size of the address space 5.114 + * depends on the number of page-table frames (since each one is mapped 5.115 + * read-only). We have a pair of simultaneous equations in two unknowns, 5.116 + * which we solve by exhaustive search. 5.117 + */ 5.118 + vinitrd_start = round_pgup(dsi.v_kernend); 5.119 + vinitrd_end = vinitrd_start + initrd_len; 5.120 + vphysmap_start = round_pgup(vinitrd_end); 5.121 + vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); 5.122 + vpt_start = round_pgup(vphysmap_end); 5.123 + for ( nr_pt_pages = 2; ; nr_pt_pages++ ) 5.124 + { 5.125 + vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 5.126 + vstartinfo_start = vpt_end; 5.127 + vstartinfo_end = vstartinfo_start + PAGE_SIZE; 5.128 + vstack_start = vstartinfo_end; 5.129 + vstack_end = vstack_start + PAGE_SIZE; 5.130 + v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1); 5.131 + if ( (v_end - vstack_end) < (512 << 10) ) 5.132 + v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */ 5.133 + if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 5.134 + L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) 5.135 + break; 5.136 + } 5.137 + 5.138 + printk("PHYSICAL MEMORY ARRANGEMENT:\n" 5.139 + " Kernel image: %p->%p\n" 5.140 + " Initrd image: %p->%p\n" 5.141 + " Dom0 alloc.: %08lx->%08lx\n", 5.142 + image_start, image_start + image_len, 5.143 + initrd_start, initrd_start + initrd_len, 5.144 + alloc_start, alloc_end); 5.145 + printk("VIRTUAL MEMORY ARRANGEMENT:\n" 5.146 + " Loaded kernel: %08lx->%08lx\n" 5.147 + " Init. ramdisk: %08lx->%08lx\n" 5.148 + " Phys-Mach map: %08lx->%08lx\n" 5.149 + " Page tables: %08lx->%08lx\n" 5.150 + " Start info: %08lx->%08lx\n" 5.151 + " Boot stack: %08lx->%08lx\n" 5.152 + " TOTAL: %08lx->%08lx\n", 5.153 + dsi.v_kernstart, dsi.v_kernend, 5.154 + vinitrd_start, vinitrd_end, 5.155 + vphysmap_start, vphysmap_end, 5.156 + vpt_start, vpt_end, 5.157 + vstartinfo_start, vstartinfo_end, 5.158 + vstack_start, vstack_end, 5.159 + dsi.v_start, v_end); 5.160 + printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); 5.161 + 5.162 + if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) 5.163 + { 5.164 + printk("Initial guest OS requires too much space\n" 5.165 + "(%luMB is greater than %luMB limit)\n", 5.166 + (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); 5.167 + return -ENOMEM; 5.168 + } 5.169 + 5.170 + /* 5.171 + * Protect the lowest 1GB of memory. We use a temporary mapping there 5.172 + * from which we copy the kernel and ramdisk images. 5.173 + */ 5.174 + if ( dsi.v_start < (1<<30) ) 5.175 + { 5.176 + printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); 5.177 + return -EINVAL; 5.178 + } 5.179 + 5.180 + /* Paranoia: scrub DOM0's memory allocation. */ 5.181 + printk("Scrubbing DOM0 RAM: "); 5.182 + dst = (char *)alloc_start; 5.183 + while ( dst < (char *)alloc_end ) 5.184 + { 5.185 +#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ 5.186 + printk("."); 5.187 + touch_nmi_watchdog(); 5.188 + if ( ((char *)alloc_end - dst) > SCRUB_BYTES ) 5.189 + { 5.190 + memset(dst, 0, SCRUB_BYTES); 5.191 + dst += SCRUB_BYTES; 5.192 + } 5.193 + else 5.194 + { 5.195 + memset(dst, 0, (char *)alloc_end - dst); 5.196 + break; 5.197 + } 5.198 + } 5.199 + printk("done.\n"); 5.200 + 5.201 + /* Construct a frame-allocation list for the initial domain. */ 5.202 + for ( mfn = (alloc_start>>PAGE_SHIFT); 5.203 + mfn < (alloc_end>>PAGE_SHIFT); 5.204 + mfn++ ) 5.205 + { 5.206 + page = &frame_table[mfn]; 5.207 + page_set_owner(page, p); 5.208 + page->u.inuse.type_info = 0; 5.209 + page->count_info = PGC_allocated | 1; 5.210 + list_add_tail(&page->list, &p->page_list); 5.211 + p->tot_pages++; p->max_pages++; 5.212 + } 5.213 + 5.214 + mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; 5.215 + 5.216 + SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); 5.217 + SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); 5.218 + 5.219 + /* 5.220 + * We're basically forcing default RPLs to 1, so that our "what privilege 5.221 + * level are we returning to?" logic works. 5.222 + */ 5.223 + ed->thread.failsafe_selector = FLAT_GUESTOS_CS; 5.224 + ed->thread.event_selector = FLAT_GUESTOS_CS; 5.225 + ed->thread.guestos_ss = FLAT_GUESTOS_DS; 5.226 + for ( i = 0; i < 256; i++ ) 5.227 + ed->thread.traps[i].cs = FLAT_GUESTOS_CS; 5.228 + 5.229 + /* WARNING: The new domain must have its 'processor' field filled in! */ 5.230 + l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; 5.231 + memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); 5.232 + l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 5.233 + mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); 5.234 + l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 5.235 + mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR); 5.236 + ed->mm.pagetable = mk_pagetable((unsigned long)l2start); 5.237 + 5.238 + l2tab += l2_table_offset(dsi.v_start); 5.239 + mfn = alloc_start >> PAGE_SHIFT; 5.240 + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 5.241 + { 5.242 + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 5.243 + { 5.244 + l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 5.245 + mpt_alloc += PAGE_SIZE; 5.246 + *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); 5.247 + clear_page(l1tab); 5.248 + if ( count == 0 ) 5.249 + l1tab += l1_table_offset(dsi.v_start); 5.250 + } 5.251 + *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 5.252 + 5.253 + page = &frame_table[mfn]; 5.254 + if ( !get_page_and_type(page, p, PGT_writable_page) ) 5.255 + BUG(); 5.256 + 5.257 + mfn++; 5.258 + } 5.259 + 5.260 + /* Pages that are part of page tables must be read only. */ 5.261 + l2tab = l2start + l2_table_offset(vpt_start); 5.262 + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 5.263 + l1tab += l1_table_offset(vpt_start); 5.264 + l2tab++; 5.265 + for ( count = 0; count < nr_pt_pages; count++ ) 5.266 + { 5.267 + *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 5.268 + page = &frame_table[l1_pgentry_to_pagenr(*l1tab)]; 5.269 + if ( count == 0 ) 5.270 + { 5.271 + page->u.inuse.type_info &= ~PGT_type_mask; 5.272 + page->u.inuse.type_info |= PGT_l2_page_table; 5.273 + 5.274 + /* 5.275 + * No longer writable: decrement the type_count. 5.276 + * Installed as CR3: increment both the ref_count and type_count. 5.277 + * Net: just increment the ref_count. 5.278 + */ 5.279 + get_page(page, p); /* an extra ref because of readable mapping */ 5.280 + 5.281 + /* Get another ref to L2 page so that it can be pinned. */ 5.282 + if ( !get_page_and_type(page, p, PGT_l2_page_table) ) 5.283 + BUG(); 5.284 + set_bit(_PGT_pinned, &page->u.inuse.type_info); 5.285 + } 5.286 + else 5.287 + { 5.288 + page->u.inuse.type_info &= ~PGT_type_mask; 5.289 + page->u.inuse.type_info |= PGT_l1_page_table; 5.290 + page->u.inuse.type_info |= 5.291 + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift; 5.292 + 5.293 + /* 5.294 + * No longer writable: decrement the type_count. 5.295 + * This is an L1 page, installed in a validated L2 page: 5.296 + * increment both the ref_count and type_count. 5.297 + * Net: just increment the ref_count. 5.298 + */ 5.299 + get_page(page, p); /* an extra ref because of readable mapping */ 5.300 + } 5.301 + l1tab++; 5.302 + if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) ) 5.303 + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 5.304 + } 5.305 + 5.306 + /* Set up shared-info area. */ 5.307 + update_dom_time(p); 5.308 + p->shared_info->domain_time = 0; 5.309 + /* Mask all upcalls... */ 5.310 + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 5.311 + p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; 5.312 + p->shared_info->n_vcpu = smp_num_cpus; 5.313 + 5.314 + /* Install the new page tables. */ 5.315 + __cli(); 5.316 + write_ptbase(&ed->mm); 5.317 + 5.318 + /* Copy the OS image. */ 5.319 + (void)loadelfimage(image_start); 5.320 + 5.321 + /* Copy the initial ramdisk. */ 5.322 + if ( initrd_len != 0 ) 5.323 + memcpy((void *)vinitrd_start, initrd_start, initrd_len); 5.324 + 5.325 + /* Set up start info area. */ 5.326 + si = (start_info_t *)vstartinfo_start; 5.327 + memset(si, 0, PAGE_SIZE); 5.328 + si->nr_pages = p->tot_pages; 5.329 + si->shared_info = virt_to_phys(p->shared_info); 5.330 + si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; 5.331 + si->pt_base = vpt_start; 5.332 + si->nr_pt_frames = nr_pt_pages; 5.333 + si->mfn_list = vphysmap_start; 5.334 + 5.335 + /* Write the phys->machine and machine->phys table entries. */ 5.336 + for ( pfn = 0; pfn < p->tot_pages; pfn++ ) 5.337 + { 5.338 + mfn = pfn + (alloc_start>>PAGE_SHIFT); 5.339 +#ifndef NDEBUG 5.340 +#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) 5.341 + if ( pfn > REVERSE_START ) 5.342 + mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); 5.343 +#endif 5.344 + ((unsigned long *)vphysmap_start)[pfn] = mfn; 5.345 + machine_to_phys_mapping[mfn] = pfn; 5.346 + } 5.347 + 5.348 + if ( initrd_len != 0 ) 5.349 + { 5.350 + si->mod_start = vinitrd_start; 5.351 + si->mod_len = initrd_len; 5.352 + printk("Initrd len 0x%lx, start at 0x%08lx\n", 5.353 + si->mod_len, si->mod_start); 5.354 + } 5.355 + 5.356 + dst = si->cmd_line; 5.357 + if ( cmdline != NULL ) 5.358 + { 5.359 + for ( i = 0; i < 255; i++ ) 5.360 + { 5.361 + if ( cmdline[i] == '\0' ) 5.362 + break; 5.363 + *dst++ = cmdline[i]; 5.364 + } 5.365 + } 5.366 + *dst = '\0'; 5.367 + 5.368 + /* Reinstate the caller's page tables. */ 5.369 + write_ptbase(¤t->mm); 5.370 + __sti(); 5.371 + 5.372 + /* Destroy low mappings - they were only for our convenience. */ 5.373 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 5.374 + if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) 5.375 + l2start[i] = mk_l2_pgentry(0); 5.376 + zap_low_mappings(); /* Do the same for the idle page tables. */ 5.377 + 5.378 + /* DOM0 gets access to everything. */ 5.379 + physdev_init_dom0(p); 5.380 + 5.381 + set_bit(DF_CONSTRUCTED, &p->d_flags); 5.382 + 5.383 + new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 5.384 + 5.385 +#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ 5.386 + shadow_lock(&p->mm); 5.387 + shadow_mode_enable(p, SHM_test); 5.388 + shadow_unlock(&p->mm); 5.389 +#endif 5.390 + 5.391 + return 0; 5.392 +}
6.1 --- a/xen/arch/x86/x86_32/mm.c Thu Feb 03 13:07:34 2005 +0000 6.2 +++ b/xen/arch/x86/x86_32/mm.c Thu Feb 03 14:45:50 2005 +0000 6.3 @@ -151,13 +151,13 @@ void subarch_init_memory(struct domain * 6.4 * 64-bit operations on them. Also, just for sanity, we assert the size 6.5 * of the structure here. 6.6 */ 6.7 - if ( (offsetof(struct pfn_info, u.inuse.domain) != 6.8 + if ( (offsetof(struct pfn_info, u.inuse._domain) != 6.9 (offsetof(struct pfn_info, count_info) + sizeof(u32))) || 6.10 (sizeof(struct pfn_info) != 24) ) 6.11 { 6.12 printk("Weird pfn_info layout (%ld,%ld,%d)\n", 6.13 offsetof(struct pfn_info, count_info), 6.14 - offsetof(struct pfn_info, u.inuse.domain), 6.15 + offsetof(struct pfn_info, u.inuse._domain), 6.16 sizeof(struct pfn_info)); 6.17 for ( ; ; ) ; 6.18 } 6.19 @@ -167,11 +167,11 @@ void subarch_init_memory(struct domain * 6.20 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]); 6.21 for ( i = 0; i < 1024; i++ ) 6.22 { 6.23 - frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; 6.24 + frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; 6.25 /* gdt to make sure it's only mapped read-only by non-privileged 6.26 domains. */ 6.27 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; 6.28 - frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; 6.29 + page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen); 6.30 } 6.31 } 6.32
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/xen/arch/x86/x86_64/domain_build.c Thu Feb 03 14:45:50 2005 +0000 7.3 @@ -0,0 +1,391 @@ 7.4 +/****************************************************************************** 7.5 + * domain_build.c 7.6 + * 7.7 + * Copyright (c) 2002-2005, K A Fraser 7.8 + */ 7.9 + 7.10 +#include <xen/config.h> 7.11 +#include <xen/init.h> 7.12 +#include <xen/lib.h> 7.13 +#include <xen/sched.h> 7.14 +#include <xen/smp.h> 7.15 +#include <xen/delay.h> 7.16 +#include <asm/regs.h> 7.17 +#include <asm/system.h> 7.18 +#include <asm/io.h> 7.19 +#include <asm/processor.h> 7.20 +#include <asm/desc.h> 7.21 +#include <asm/i387.h> 7.22 +#include <xen/event.h> 7.23 +#include <xen/elf.h> 7.24 +#include <xen/kernel.h> 7.25 + 7.26 +/* Allow ring-3 access in long mode as guest cannot use ring 1. */ 7.27 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 7.28 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 7.29 +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 7.30 +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 7.31 + 7.32 +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 7.33 +#define round_pgdown(_p) ((_p)&PAGE_MASK) 7.34 + 7.35 +int construct_dom0(struct domain *p, 7.36 + unsigned long alloc_start, 7.37 + unsigned long alloc_end, 7.38 + char *image_start, unsigned long image_len, 7.39 + char *initrd_start, unsigned long initrd_len, 7.40 + char *cmdline) 7.41 +{ 7.42 + char *dst; 7.43 + int i, rc; 7.44 + unsigned long pfn, mfn; 7.45 + unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT; 7.46 + unsigned long nr_pt_pages; 7.47 + unsigned long count; 7.48 + l2_pgentry_t *l2tab, *l2start; 7.49 + l1_pgentry_t *l1tab = NULL, *l1start = NULL; 7.50 + struct pfn_info *page = NULL; 7.51 + start_info_t *si; 7.52 + struct exec_domain *ed = p->exec_domain[0]; 7.53 + 7.54 + /* 7.55 + * This fully describes the memory layout of the initial domain. All 7.56 + * *_start address are page-aligned, except v_start (and v_end) which are 7.57 + * superpage-aligned. 7.58 + */ 7.59 + struct domain_setup_info dsi; 7.60 + unsigned long vinitrd_start; 7.61 + unsigned long vinitrd_end; 7.62 + unsigned long vphysmap_start; 7.63 + unsigned long vphysmap_end; 7.64 + unsigned long vstartinfo_start; 7.65 + unsigned long vstartinfo_end; 7.66 + unsigned long vstack_start; 7.67 + unsigned long vstack_end; 7.68 + unsigned long vpt_start; 7.69 + unsigned long vpt_end; 7.70 + unsigned long v_end; 7.71 + 7.72 + /* Machine address of next candidate page-table page. */ 7.73 + unsigned long mpt_alloc; 7.74 + 7.75 + extern void physdev_init_dom0(struct domain *); 7.76 + 7.77 + /* Sanity! */ 7.78 + if ( p->id != 0 ) 7.79 + BUG(); 7.80 + if ( test_bit(DF_CONSTRUCTED, &p->d_flags) ) 7.81 + BUG(); 7.82 + 7.83 + memset(&dsi, 0, sizeof(struct domain_setup_info)); 7.84 + 7.85 + printk("*** LOADING DOMAIN 0 ***\n"); 7.86 + 7.87 + /* 7.88 + * This is all a bit grim. We've moved the modules to the "safe" physical 7.89 + * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this 7.90 + * routine we're going to copy it down into the region that's actually 7.91 + * been allocated to domain 0. This is highly likely to be overlapping, so 7.92 + * we use a forward copy. 7.93 + * 7.94 + * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with 7.95 + * 4GB and lots of network/disk cards that allocate loads of buffers. 7.96 + * We'll have to revisit this if we ever support PAE (64GB). 7.97 + */ 7.98 + 7.99 + rc = parseelfimage(image_start, image_len, &dsi); 7.100 + if ( rc != 0 ) 7.101 + return rc; 7.102 + 7.103 + /* Set up domain options */ 7.104 + if ( dsi.use_writable_pagetables ) 7.105 + vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 7.106 + 7.107 + if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) 7.108 + { 7.109 + printk("Initial guest OS must load to a page boundary.\n"); 7.110 + return -EINVAL; 7.111 + } 7.112 + 7.113 + /* 7.114 + * Why do we need this? The number of page-table frames depends on the 7.115 + * size of the bootstrap address space. But the size of the address space 7.116 + * depends on the number of page-table frames (since each one is mapped 7.117 + * read-only). We have a pair of simultaneous equations in two unknowns, 7.118 + * which we solve by exhaustive search. 7.119 + */ 7.120 + vinitrd_start = round_pgup(dsi.v_kernend); 7.121 + vinitrd_end = vinitrd_start + initrd_len; 7.122 + vphysmap_start = round_pgup(vinitrd_end); 7.123 + vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); 7.124 + vpt_start = round_pgup(vphysmap_end); 7.125 + for ( nr_pt_pages = 2; ; nr_pt_pages++ ) 7.126 + { 7.127 + vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 7.128 + vstartinfo_start = vpt_end; 7.129 + vstartinfo_end = vstartinfo_start + PAGE_SIZE; 7.130 + vstack_start = vstartinfo_end; 7.131 + vstack_end = vstack_start + PAGE_SIZE; 7.132 + v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1); 7.133 + if ( (v_end - vstack_end) < (512 << 10) ) 7.134 + v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */ 7.135 + if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 7.136 + L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages ) 7.137 + break; 7.138 + } 7.139 + 7.140 + printk("PHYSICAL MEMORY ARRANGEMENT:\n" 7.141 + " Kernel image: %p->%p\n" 7.142 + " Initrd image: %p->%p\n" 7.143 + " Dom0 alloc.: %08lx->%08lx\n", 7.144 + image_start, image_start + image_len, 7.145 + initrd_start, initrd_start + initrd_len, 7.146 + alloc_start, alloc_end); 7.147 + printk("VIRTUAL MEMORY ARRANGEMENT:\n" 7.148 + " Loaded kernel: %08lx->%08lx\n" 7.149 + " Init. ramdisk: %08lx->%08lx\n" 7.150 + " Phys-Mach map: %08lx->%08lx\n" 7.151 + " Page tables: %08lx->%08lx\n" 7.152 + " Start info: %08lx->%08lx\n" 7.153 + " Boot stack: %08lx->%08lx\n" 7.154 + " TOTAL: %08lx->%08lx\n", 7.155 + dsi.v_kernstart, dsi.v_kernend, 7.156 + vinitrd_start, vinitrd_end, 7.157 + vphysmap_start, vphysmap_end, 7.158 + vpt_start, vpt_end, 7.159 + vstartinfo_start, vstartinfo_end, 7.160 + vstack_start, vstack_end, 7.161 + dsi.v_start, v_end); 7.162 + printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); 7.163 + 7.164 + if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) 7.165 + { 7.166 + printk("Initial guest OS requires too much space\n" 7.167 + "(%luMB is greater than %luMB limit)\n", 7.168 + (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20); 7.169 + return -ENOMEM; 7.170 + } 7.171 + 7.172 + /* 7.173 + * Protect the lowest 1GB of memory. We use a temporary mapping there 7.174 + * from which we copy the kernel and ramdisk images. 7.175 + */ 7.176 + if ( dsi.v_start < (1<<30) ) 7.177 + { 7.178 + printk("Initial loading isn't allowed to lowest 1GB of memory.\n"); 7.179 + return -EINVAL; 7.180 + } 7.181 + 7.182 + /* Paranoia: scrub DOM0's memory allocation. */ 7.183 + printk("Scrubbing DOM0 RAM: "); 7.184 + dst = (char *)alloc_start; 7.185 + while ( dst < (char *)alloc_end ) 7.186 + { 7.187 +#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */ 7.188 + printk("."); 7.189 + touch_nmi_watchdog(); 7.190 + if ( ((char *)alloc_end - dst) > SCRUB_BYTES ) 7.191 + { 7.192 + memset(dst, 0, SCRUB_BYTES); 7.193 + dst += SCRUB_BYTES; 7.194 + } 7.195 + else 7.196 + { 7.197 + memset(dst, 0, (char *)alloc_end - dst); 7.198 + break; 7.199 + } 7.200 + } 7.201 + printk("done.\n"); 7.202 + 7.203 + /* Construct a frame-allocation list for the initial domain. */ 7.204 + for ( mfn = (alloc_start>>PAGE_SHIFT); 7.205 + mfn < (alloc_end>>PAGE_SHIFT); 7.206 + mfn++ ) 7.207 + { 7.208 + page = &frame_table[mfn]; 7.209 + page_set_owner(page, p); 7.210 + page->u.inuse.type_info = 0; 7.211 + page->count_info = PGC_allocated | 1; 7.212 + list_add_tail(&page->list, &p->page_list); 7.213 + p->tot_pages++; p->max_pages++; 7.214 + } 7.215 + 7.216 + mpt_alloc = (vpt_start - dsi.v_start) + alloc_start; 7.217 + 7.218 + SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES); 7.219 + SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS); 7.220 + 7.221 + /* 7.222 + * We're basically forcing default RPLs to 1, so that our "what privilege 7.223 + * level are we returning to?" logic works. 7.224 + */ 7.225 + ed->thread.failsafe_selector = FLAT_GUESTOS_CS; 7.226 + ed->thread.event_selector = FLAT_GUESTOS_CS; 7.227 + ed->thread.guestos_ss = FLAT_GUESTOS_DS; 7.228 + for ( i = 0; i < 256; i++ ) 7.229 + ed->thread.traps[i].cs = FLAT_GUESTOS_CS; 7.230 + 7.231 + /* WARNING: The new domain must have its 'processor' field filled in! */ 7.232 + l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; 7.233 + memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE); 7.234 + l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 7.235 + mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR); 7.236 + l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 7.237 + mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR); 7.238 + ed->mm.pagetable = mk_pagetable((unsigned long)l2start); 7.239 + 7.240 + l2tab += l2_table_offset(dsi.v_start); 7.241 + mfn = alloc_start >> PAGE_SHIFT; 7.242 + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 7.243 + { 7.244 + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) 7.245 + { 7.246 + l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 7.247 + mpt_alloc += PAGE_SIZE; 7.248 + *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT); 7.249 + clear_page(l1tab); 7.250 + if ( count == 0 ) 7.251 + l1tab += l1_table_offset(dsi.v_start); 7.252 + } 7.253 + *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 7.254 + 7.255 + page = &frame_table[mfn]; 7.256 + if ( !get_page_and_type(page, p, PGT_writable_page) ) 7.257 + BUG(); 7.258 + 7.259 + mfn++; 7.260 + } 7.261 + 7.262 + /* Pages that are part of page tables must be read only. */ 7.263 + l2tab = l2start + l2_table_offset(vpt_start); 7.264 + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 7.265 + l1tab += l1_table_offset(vpt_start); 7.266 + l2tab++; 7.267 + for ( count = 0; count < nr_pt_pages; count++ ) 7.268 + { 7.269 + *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); 7.270 + page = &frame_table[l1_pgentry_to_pagenr(*l1tab)]; 7.271 + if ( count == 0 ) 7.272 + { 7.273 + page->u.inuse.type_info &= ~PGT_type_mask; 7.274 + page->u.inuse.type_info |= PGT_l2_page_table; 7.275 + 7.276 + /* 7.277 + * No longer writable: decrement the type_count. 7.278 + * Installed as CR3: increment both the ref_count and type_count. 7.279 + * Net: just increment the ref_count. 7.280 + */ 7.281 + get_page(page, p); /* an extra ref because of readable mapping */ 7.282 + 7.283 + /* Get another ref to L2 page so that it can be pinned. */ 7.284 + if ( !get_page_and_type(page, p, PGT_l2_page_table) ) 7.285 + BUG(); 7.286 + set_bit(_PGT_pinned, &page->u.inuse.type_info); 7.287 + } 7.288 + else 7.289 + { 7.290 + page->u.inuse.type_info &= ~PGT_type_mask; 7.291 + page->u.inuse.type_info |= PGT_l1_page_table; 7.292 + page->u.inuse.type_info |= 7.293 + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift; 7.294 + 7.295 + /* 7.296 + * No longer writable: decrement the type_count. 7.297 + * This is an L1 page, installed in a validated L2 page: 7.298 + * increment both the ref_count and type_count. 7.299 + * Net: just increment the ref_count. 7.300 + */ 7.301 + get_page(page, p); /* an extra ref because of readable mapping */ 7.302 + } 7.303 + l1tab++; 7.304 + if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) ) 7.305 + l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab); 7.306 + } 7.307 + 7.308 + /* Set up shared-info area. */ 7.309 + update_dom_time(p); 7.310 + p->shared_info->domain_time = 0; 7.311 + /* Mask all upcalls... */ 7.312 + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 7.313 + p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; 7.314 + p->shared_info->n_vcpu = smp_num_cpus; 7.315 + 7.316 + /* Install the new page tables. */ 7.317 + __cli(); 7.318 + write_ptbase(&ed->mm); 7.319 + 7.320 + /* Copy the OS image. */ 7.321 + (void)loadelfimage(image_start); 7.322 + 7.323 + /* Copy the initial ramdisk. */ 7.324 + if ( initrd_len != 0 ) 7.325 + memcpy((void *)vinitrd_start, initrd_start, initrd_len); 7.326 + 7.327 + /* Set up start info area. */ 7.328 + si = (start_info_t *)vstartinfo_start; 7.329 + memset(si, 0, PAGE_SIZE); 7.330 + si->nr_pages = p->tot_pages; 7.331 + si->shared_info = virt_to_phys(p->shared_info); 7.332 + si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; 7.333 + si->pt_base = vpt_start; 7.334 + si->nr_pt_frames = nr_pt_pages; 7.335 + si->mfn_list = vphysmap_start; 7.336 + 7.337 + /* Write the phys->machine and machine->phys table entries. */ 7.338 + for ( pfn = 0; pfn < p->tot_pages; pfn++ ) 7.339 + { 7.340 + mfn = pfn + (alloc_start>>PAGE_SHIFT); 7.341 +#ifndef NDEBUG 7.342 +#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) 7.343 + if ( pfn > REVERSE_START ) 7.344 + mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START); 7.345 +#endif 7.346 + ((unsigned long *)vphysmap_start)[pfn] = mfn; 7.347 + machine_to_phys_mapping[mfn] = pfn; 7.348 + } 7.349 + 7.350 + if ( initrd_len != 0 ) 7.351 + { 7.352 + si->mod_start = vinitrd_start; 7.353 + si->mod_len = initrd_len; 7.354 + printk("Initrd len 0x%lx, start at 0x%08lx\n", 7.355 + si->mod_len, si->mod_start); 7.356 + } 7.357 + 7.358 + dst = si->cmd_line; 7.359 + if ( cmdline != NULL ) 7.360 + { 7.361 + for ( i = 0; i < 255; i++ ) 7.362 + { 7.363 + if ( cmdline[i] == '\0' ) 7.364 + break; 7.365 + *dst++ = cmdline[i]; 7.366 + } 7.367 + } 7.368 + *dst = '\0'; 7.369 + 7.370 + /* Reinstate the caller's page tables. */ 7.371 + write_ptbase(¤t->mm); 7.372 + __sti(); 7.373 + 7.374 + /* Destroy low mappings - they were only for our convenience. */ 7.375 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 7.376 + if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE ) 7.377 + l2start[i] = mk_l2_pgentry(0); 7.378 + zap_low_mappings(); /* Do the same for the idle page tables. */ 7.379 + 7.380 + /* DOM0 gets access to everything. */ 7.381 + physdev_init_dom0(p); 7.382 + 7.383 + set_bit(DF_CONSTRUCTED, &p->d_flags); 7.384 + 7.385 + new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start); 7.386 + 7.387 +#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ 7.388 + shadow_lock(&p->mm); 7.389 + shadow_mode_enable(p, SHM_test); 7.390 + shadow_unlock(&p->mm); 7.391 +#endif 7.392 + 7.393 + return 0; 7.394 +}
8.1 --- a/xen/arch/x86/x86_64/mm.c Thu Feb 03 13:07:34 2005 +0000 8.2 +++ b/xen/arch/x86/x86_64/mm.c Thu Feb 03 14:45:50 2005 +0000 8.3 @@ -171,6 +171,21 @@ void subarch_init_memory(struct domain * 8.4 l3_pgentry_t l3e; 8.5 l2_pgentry_t l2e; 8.6 8.7 + /* 8.8 + * We are rather picky about the layout of 'struct pfn_info'. The 8.9 + * count_info and domain fields must be adjacent, as we perform atomic 8.10 + * 64-bit operations on them. 8.11 + */ 8.12 + if ( (offsetof(struct pfn_info, u.inuse._domain) != 8.13 + (offsetof(struct pfn_info, count_info) + sizeof(u32))) ) 8.14 + { 8.15 + printk("Weird pfn_info layout (%ld,%ld,%d)\n", 8.16 + offsetof(struct pfn_info, count_info), 8.17 + offsetof(struct pfn_info, u.inuse._domain), 8.18 + sizeof(struct pfn_info)); 8.19 + for ( ; ; ) ; 8.20 + } 8.21 + 8.22 /* M2P table is mappable read-only by privileged domains. */ 8.23 for ( v = RDWR_MPT_VIRT_START; 8.24 v != RDWR_MPT_VIRT_END; 8.25 @@ -187,11 +202,11 @@ void subarch_init_memory(struct domain * 8.26 8.27 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) 8.28 { 8.29 - frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; 8.30 + frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; 8.31 /* gdt to make sure it's only mapped read-only by non-privileged 8.32 domains. */ 8.33 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; 8.34 - frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; 8.35 + page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen); 8.36 } 8.37 } 8.38 }
9.1 --- a/xen/common/page_alloc.c Thu Feb 03 13:07:34 2005 +0000 9.2 +++ b/xen/common/page_alloc.c Thu Feb 03 14:45:50 2005 +0000 9.3 @@ -418,7 +418,7 @@ unsigned long alloc_xenheap_pages(unsign 9.4 for ( i = 0; i < (1 << order); i++ ) 9.5 { 9.6 pg[i].count_info = 0; 9.7 - pg[i].u.inuse.domain = NULL; 9.8 + pg[i].u.inuse._domain = 0; 9.9 pg[i].u.inuse.type_info = 0; 9.10 } 9.11 9.12 @@ -501,7 +501,7 @@ struct pfn_info *alloc_domheap_pages(str 9.13 } 9.14 9.15 pg[i].count_info = 0; 9.16 - pg[i].u.inuse.domain = NULL; 9.17 + pg[i].u.inuse._domain = 0; 9.18 pg[i].u.inuse.type_info = 0; 9.19 } 9.20 9.21 @@ -529,7 +529,7 @@ struct pfn_info *alloc_domheap_pages(str 9.22 9.23 for ( i = 0; i < (1 << order); i++ ) 9.24 { 9.25 - pg[i].u.inuse.domain = d; 9.26 + page_set_owner(&pg[i], d); 9.27 wmb(); /* Domain pointer must be visible before updating refcnt. */ 9.28 pg[i].count_info |= PGC_allocated | 1; 9.29 list_add_tail(&pg[i].list, &d->page_list); 9.30 @@ -544,7 +544,7 @@ struct pfn_info *alloc_domheap_pages(str 9.31 void free_domheap_pages(struct pfn_info *pg, unsigned int order) 9.32 { 9.33 int i, drop_dom_ref; 9.34 - struct domain *d = pg->u.inuse.domain; 9.35 + struct domain *d = page_get_owner(pg); 9.36 struct exec_domain *ed; 9.37 void *p; 9.38 int cpu_mask = 0;
10.1 --- a/xen/include/asm-x86/mm.h Thu Feb 03 13:07:34 2005 +0000 10.2 +++ b/xen/include/asm-x86/mm.h Thu Feb 03 14:45:50 2005 +0000 10.3 @@ -30,6 +30,9 @@ struct pfn_info 10.4 /* Each frame can be threaded onto a doubly-linked list. */ 10.5 struct list_head list; 10.6 10.7 + /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ 10.8 + u32 tlbflush_timestamp; 10.9 + 10.10 /* Reference count and various PGC_xxx flags and fields. */ 10.11 u32 count_info; 10.12 10.13 @@ -39,24 +42,22 @@ struct pfn_info 10.14 /* Page is in use: ((count_info & PGC_count_mask) != 0). */ 10.15 struct { 10.16 /* Owner of this page (NULL if page is anonymous). */ 10.17 - struct domain *domain; 10.18 + u32 _domain; /* pickled format */ 10.19 /* Type reference count and various PGT_xxx flags and fields. */ 10.20 u32 type_info; 10.21 - } inuse; 10.22 + } PACKED inuse; 10.23 10.24 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ 10.25 struct { 10.26 /* Mask of possibly-tainted TLBs. */ 10.27 - unsigned long cpu_mask; 10.28 + u32 cpu_mask; 10.29 /* Order-size of the free chunk this page is the head of. */ 10.30 u8 order; 10.31 - } free; 10.32 - 10.33 - } u; 10.34 + } PACKED free; 10.35 10.36 - /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ 10.37 - u32 tlbflush_timestamp; 10.38 -}; 10.39 + } PACKED u; 10.40 + 10.41 +} PACKED; 10.42 10.43 /* The following page types are MUTUALLY EXCLUSIVE. */ 10.44 #define PGT_none (0<<29) /* no special uses of this page */ 10.45 @@ -97,9 +98,25 @@ struct pfn_info 10.46 10.47 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end) 10.48 10.49 +#if defined(__i386__) 10.50 + 10.51 +#define pickle_domptr(_d) ((u32)(unsigned long)(_d)) 10.52 +#define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d)) 10.53 + 10.54 +#elif defined(__x86_64__) 10.55 +static inline struct domain *unpickle_domptr(u32 _domain) 10.56 +{ return (_domain == 0) ? NULL : __va(_domain); } 10.57 +static inline u32 pickle_domptr(struct domain *domain) 10.58 +{ return (domain == NULL) ? 0 : (u32)__pa(domain); } 10.59 + 10.60 +#endif 10.61 + 10.62 +#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) 10.63 +#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) 10.64 + 10.65 #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \ 10.66 do { \ 10.67 - (_pfn)->u.inuse.domain = (_dom); \ 10.68 + page_set_owner((_pfn), (_dom)); \ 10.69 /* The incremented type count is intended to pin to 'writable'. */ \ 10.70 (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \ 10.71 wmb(); /* install valid domain ptr before updating refcnt. */ \ 10.72 @@ -142,7 +159,8 @@ static inline int get_page(struct pfn_in 10.73 struct domain *domain) 10.74 { 10.75 u32 x, nx, y = page->count_info; 10.76 - struct domain *d, *nd = page->u.inuse.domain; 10.77 + u32 d, nd = page->u.inuse._domain; 10.78 + u32 _domain = pickle_domptr(domain); 10.79 10.80 do { 10.81 x = y; 10.82 @@ -150,10 +168,10 @@ static inline int get_page(struct pfn_in 10.83 d = nd; 10.84 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ 10.85 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ 10.86 - unlikely(d != domain) ) /* Wrong owner? */ 10.87 + unlikely(d != _domain) ) /* Wrong owner? */ 10.88 { 10.89 DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n", 10.90 - page_to_pfn(page), domain, d, 10.91 + page_to_pfn(page), domain, unpickle_domptr(d), 10.92 x, page->u.inuse.type_info); 10.93 return 0; 10.94 } 10.95 @@ -198,7 +216,7 @@ static inline int get_page_and_type(stru 10.96 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0) 10.97 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \ 10.98 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \ 10.99 - ASSERT((_p)->u.inuse.domain == (_d)) 10.100 + ASSERT(page_get_owner(_p) == (_d)) 10.101 10.102 int check_descriptor(unsigned long *d); 10.103
11.1 --- a/xen/include/asm-x86/shadow.h Thu Feb 03 13:07:34 2005 +0000 11.2 +++ b/xen/include/asm-x86/shadow.h Thu Feb 03 14:45:50 2005 +0000 11.3 @@ -189,7 +189,7 @@ static inline int __mark_dirty( struct m 11.4 SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (mm %p)", 11.5 mfn, pfn, m->shadow_dirty_bitmap_size, m ); 11.6 SH_LOG("dom=%p caf=%08x taf=%08x\n", 11.7 - frame_table[mfn].u.inuse.domain, 11.8 + page_get_owner(&frame_table[mfn]), 11.9 frame_table[mfn].count_info, 11.10 frame_table[mfn].u.inuse.type_info ); 11.11 }