xen-vtx-unstable
changeset 5727:ba925b4aef28
The 1:1 page table should be a 3 level PAE page table on x86-64
This is needed to support > 4GB machine physical addresses.
Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
This is needed to support > 4GB machine physical addresses.
Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Mon Jul 11 14:39:10 2005 +0000 (2005-07-11) |
parents | a29b4174d39c |
children | 56a63f9f378f |
files | tools/libxc/xc_vmx_build.c tools/python/xen/xend/image.py xen/arch/x86/shadow32.c xen/arch/x86/shadow_public.c xen/arch/x86/vmx.c xen/arch/x86/vmx_vmcs.c |
line diff
1.1 --- a/tools/libxc/xc_vmx_build.c Mon Jul 11 10:23:19 2005 +0000 1.2 +++ b/tools/libxc/xc_vmx_build.c Mon Jul 11 14:39:10 2005 +0000 1.3 @@ -13,6 +13,9 @@ 1.4 1.5 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) 1.6 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 1.7 +#ifdef __x86_64__ 1.8 +#define L3_PROT (_PAGE_PRESENT) 1.9 +#endif 1.10 1.11 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 1.12 #define round_pgdown(_p) ((_p)&PAGE_MASK) 1.13 @@ -91,6 +94,7 @@ static void build_e820map(struct mem_map 1.14 mem_mapp->nr_map = nr_map; 1.15 } 1.16 1.17 +#ifdef __i386__ 1.18 static int zap_mmio_range(int xc_handle, u32 dom, 1.19 l2_pgentry_32_t *vl2tab, 1.20 unsigned long mmio_range_start, 1.21 @@ -138,6 +142,65 @@ static int zap_mmio_ranges(int xc_handle 1.22 munmap(vl2tab, PAGE_SIZE); 1.23 return 0; 1.24 } 1.25 +#else 1.26 +static int zap_mmio_range(int xc_handle, u32 dom, 1.27 + l3_pgentry_t *vl3tab, 1.28 + unsigned long mmio_range_start, 1.29 + unsigned long mmio_range_size) 1.30 +{ 1.31 + unsigned long mmio_addr; 1.32 + unsigned long mmio_range_end = mmio_range_start + mmio_range_size; 1.33 + unsigned long vl2e = 0; 1.34 + unsigned long vl3e; 1.35 + l1_pgentry_t *vl1tab; 1.36 + l2_pgentry_t *vl2tab; 1.37 + 1.38 + mmio_addr = mmio_range_start & PAGE_MASK; 1.39 + for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) { 1.40 + vl3e = vl3tab[l3_table_offset(mmio_addr)]; 1.41 + if (vl3e == 0) 1.42 + continue; 1.43 + vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 1.44 + PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT); 1.45 + if (vl2tab == 0) { 1.46 + PERROR("Failed zap MMIO range"); 1.47 + return -1; 1.48 + } 1.49 + vl2e = vl2tab[l2_table_offset(mmio_addr)]; 1.50 + if (vl2e == 0) 1.51 + continue; 1.52 + vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 1.53 + PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT); 1.54 + 1.55 + vl1tab[l1_table_offset(mmio_addr)] = 0; 1.56 + munmap(vl2tab, PAGE_SIZE); 1.57 + munmap(vl1tab, PAGE_SIZE); 1.58 + } 1.59 + return 0; 1.60 +} 1.61 + 1.62 +static int zap_mmio_ranges(int xc_handle, u32 dom, 1.63 + unsigned long l3tab, 1.64 + struct mem_map *mem_mapp) 1.65 +{ 1.66 + int i; 1.67 + l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 1.68 + PROT_READ|PROT_WRITE, 1.69 + l3tab >> PAGE_SHIFT); 1.70 + if (vl3tab == 0) 1.71 + return -1; 1.72 + for (i = 0; i < mem_mapp->nr_map; i++) { 1.73 + if ((mem_mapp->map[i].type == E820_IO) 1.74 + && (mem_mapp->map[i].caching_attr == MEMMAP_UC)) 1.75 + if (zap_mmio_range(xc_handle, dom, vl3tab, 1.76 + mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1) 1.77 + return -1; 1.78 + } 1.79 + munmap(vl3tab, PAGE_SIZE); 1.80 + return 0; 1.81 +} 1.82 + 1.83 +#endif 1.84 1.85 static int setup_guest(int xc_handle, 1.86 u32 dom, int memsize, 1.87 @@ -151,9 +214,13 @@ static int setup_guest(int xc_handle, 1.88 unsigned long flags, 1.89 struct mem_map * mem_mapp) 1.90 { 1.91 - l1_pgentry_32_t *vl1tab=NULL, *vl1e=NULL; 1.92 - l2_pgentry_32_t *vl2tab=NULL, *vl2e=NULL; 1.93 + l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; 1.94 + l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; 1.95 unsigned long *page_array = NULL; 1.96 +#ifdef __x86_64__ 1.97 + l3_pgentry_t *vl3tab=NULL, *vl3e=NULL; 1.98 + unsigned long l3tab; 1.99 +#endif 1.100 unsigned long l2tab; 1.101 unsigned long l1tab; 1.102 unsigned long count, i; 1.103 @@ -212,7 +279,11 @@ static int setup_guest(int xc_handle, 1.104 if(initrd_len == 0) 1.105 vinitrd_start = vinitrd_end = 0; 1.106 1.107 +#ifdef __i386__ 1.108 nr_pt_pages = 1 + ((memsize + 3) >> 2); 1.109 +#else 1.110 + nr_pt_pages = 5 + ((memsize + 1) >> 1); 1.111 +#endif 1.112 vpt_start = v_end; 1.113 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); 1.114 1.115 @@ -274,6 +345,7 @@ static int setup_guest(int xc_handle, 1.116 if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL ) 1.117 goto error_out; 1.118 1.119 +#ifdef __i386__ 1.120 /* First allocate page for page dir. */ 1.121 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT; 1.122 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT; 1.123 @@ -310,7 +382,64 @@ static int setup_guest(int xc_handle, 1.124 } 1.125 munmap(vl1tab, PAGE_SIZE); 1.126 munmap(vl2tab, PAGE_SIZE); 1.127 +#else 1.128 + /* First allocate pdpt */ 1.129 + ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT; 1.130 + /* here l3tab means pdpt, only 4 entry is used */ 1.131 + l3tab = page_array[ppt_alloc++] << PAGE_SHIFT; 1.132 + ctxt->ctrlreg[3] = l3tab; 1.133 1.134 + /* Initialise the page tables. */ 1.135 + if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 1.136 + PROT_READ|PROT_WRITE, 1.137 + l3tab >> PAGE_SHIFT)) == NULL ) 1.138 + goto error_out; 1.139 + memset(vl3tab, 0, PAGE_SIZE); 1.140 + 1.141 + vl3e = &vl3tab[l3_table_offset(dsi.v_start)]; 1.142 + 1.143 + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) 1.144 + { 1.145 + if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){ 1.146 + l2tab = page_array[ppt_alloc++] << PAGE_SHIFT; 1.147 + 1.148 + if (vl2tab != NULL) 1.149 + munmap(vl2tab, PAGE_SIZE); 1.150 + 1.151 + if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 1.152 + PROT_READ|PROT_WRITE, 1.153 + l2tab >> PAGE_SHIFT)) == NULL ) 1.154 + goto error_out; 1.155 + 1.156 + memset(vl2tab, 0, PAGE_SIZE); 1.157 + *vl3e++ = l2tab | L3_PROT; 1.158 + vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << PAGE_SHIFT))]; 1.159 + } 1.160 + if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 ) 1.161 + { 1.162 + l1tab = page_array[ppt_alloc++] << PAGE_SHIFT; 1.163 + if ( vl1tab != NULL ) 1.164 + munmap(vl1tab, PAGE_SIZE); 1.165 + if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 1.166 + PROT_READ|PROT_WRITE, 1.167 + l1tab >> PAGE_SHIFT)) == NULL ) 1.168 + { 1.169 + munmap(vl2tab, PAGE_SIZE); 1.170 + goto error_out; 1.171 + } 1.172 + memset(vl1tab, 0, PAGE_SIZE); 1.173 + vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))]; 1.174 + *vl2e++ = l1tab | L2_PROT; 1.175 + } 1.176 + 1.177 + *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT; 1.178 + vl1e++; 1.179 + } 1.180 + 1.181 + munmap(vl1tab, PAGE_SIZE); 1.182 + munmap(vl2tab, PAGE_SIZE); 1.183 + munmap(vl3tab, PAGE_SIZE); 1.184 +#endif 1.185 /* Write the machine->phys table entries. */ 1.186 for ( count = 0; count < nr_pages; count++ ) 1.187 { 1.188 @@ -325,6 +454,7 @@ static int setup_guest(int xc_handle, 1.189 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, 1.190 page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0) 1.191 goto error_out; 1.192 + 1.193 memset(boot_paramsp, 0, sizeof(*boot_paramsp)); 1.194 1.195 strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800); 1.196 @@ -381,7 +511,11 @@ static int setup_guest(int xc_handle, 1.197 1.198 /* memsize is in megabytes */ 1.199 build_e820map(mem_mapp, memsize << 20); 1.200 +#if defined (__i386__) 1.201 if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1) 1.202 +#else 1.203 + if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1) 1.204 +#endif 1.205 goto error_out; 1.206 boot_paramsp->e820_map_nr = mem_mapp->nr_map; 1.207 for (i=0; i<mem_mapp->nr_map; i++) {
2.1 --- a/tools/python/xen/xend/image.py Mon Jul 11 10:23:19 2005 +0000 2.2 +++ b/tools/python/xen/xend/image.py Mon Jul 11 14:39:10 2005 +0000 2.3 @@ -351,6 +351,8 @@ class VmxImageHandler(ImageHandler): 2.4 @param mem_mb: size in MB 2.5 @return size in KB 2.6 """ 2.7 - # Logic x86-32 specific. 2.8 # 1 page for the PGD + 1 pte page for 4MB of memory (rounded) 2.9 - return (1 + ((mem_mb + 3) >> 2)) * 4 2.10 + if os.uname()[4] == 'x86_64': 2.11 + return (5 + ((mem_mb + 1) >> 1)) * 4 2.12 + else: 2.13 + return (1 + ((mem_mb + 3) >> 2)) * 4
3.1 --- a/xen/arch/x86/shadow32.c Mon Jul 11 10:23:19 2005 +0000 3.2 +++ b/xen/arch/x86/shadow32.c Mon Jul 11 14:39:10 2005 +0000 3.3 @@ -677,7 +677,7 @@ int _shadow_mode_refcounts(struct domain 3.4 return shadow_mode_refcounts(d); 3.5 } 3.6 3.7 -void alloc_monitor_pagetable(struct vcpu *v) 3.8 +static void alloc_monitor_pagetable(struct vcpu *v) 3.9 { 3.10 unsigned long mmfn; 3.11 l2_pgentry_t *mpl2e;
4.1 --- a/xen/arch/x86/shadow_public.c Mon Jul 11 10:23:19 2005 +0000 4.2 +++ b/xen/arch/x86/shadow_public.c Mon Jul 11 14:39:10 2005 +0000 4.3 @@ -162,7 +162,7 @@ static pagetable_t page_table_convert(st 4.4 return mk_pagetable(page_to_phys(l4page)); 4.5 } 4.6 4.7 -void alloc_monitor_pagetable(struct vcpu *v) 4.8 +static void alloc_monitor_pagetable(struct vcpu *v) 4.9 { 4.10 unsigned long mmfn; 4.11 l4_pgentry_t *mpl4e;
5.1 --- a/xen/arch/x86/vmx.c Mon Jul 11 10:23:19 2005 +0000 5.2 +++ b/xen/arch/x86/vmx.c Mon Jul 11 14:39:10 2005 +0000 5.3 @@ -801,7 +801,11 @@ vmx_world_restore(struct vcpu *d, struct 5.4 skip_cr3: 5.5 5.6 error |= __vmread(CR4_READ_SHADOW, &old_cr4); 5.7 +#if defined (__i386__) 5.8 error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE)); 5.9 +#else 5.10 + error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE | X86_CR4_PAE)); 5.11 +#endif 5.12 error |= __vmwrite(CR4_READ_SHADOW, c->cr4); 5.13 5.14 error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit); 5.15 @@ -860,7 +864,7 @@ vmx_assist(struct vcpu *d, int mode) 5.16 { 5.17 struct vmx_assist_context c; 5.18 u32 magic; 5.19 - unsigned long cp; 5.20 + u32 cp; 5.21 5.22 /* make sure vmxassist exists (this is not an error) */ 5.23 if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN)) 5.24 @@ -1191,7 +1195,7 @@ static int mov_to_cr(int gp, int cr, str 5.25 5.26 __vmread(CR4_READ_SHADOW, &old_cr); 5.27 if (pae_disabled) 5.28 - __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE)); 5.29 + __vmwrite(GUEST_CR4, value| X86_CR4_VMXE); 5.30 else 5.31 __vmwrite(GUEST_CR4, value| X86_CR4_VMXE); 5.32
6.1 --- a/xen/arch/x86/vmx_vmcs.c Mon Jul 11 10:23:19 2005 +0000 6.2 +++ b/xen/arch/x86/vmx_vmcs.c Mon Jul 11 14:39:10 2005 +0000 6.3 @@ -122,6 +122,7 @@ int vmx_setup_platform(struct vcpu *d, s 6.4 struct e820entry *e820p; 6.5 unsigned long gpfn = 0; 6.6 6.7 + local_flush_tlb_pge(); 6.8 regs->ebx = 0; /* Linux expects ebx to be 0 for boot proc */ 6.9 6.10 n = regs->ecx; 6.11 @@ -311,8 +312,7 @@ construct_init_vmcs_guest(struct cpu_use 6.12 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr); 6.13 /* CR3 is set in vmx_final_setup_guest */ 6.14 #ifdef __x86_64__ 6.15 - error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE); 6.16 - printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 ); 6.17 + error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE); 6.18 #else 6.19 error |= __vmwrite(GUEST_CR4, host_env->cr4); 6.20 #endif