debuggers.hg
changeset 21153:d7370232060a
EPT: 1GB large page support.
Alloc 1GB large page for EPT if possible. It also contains the logic
to split large page into small ones (2M or 4K).
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
Acked-by: Tim Deegan <Tim.Deegan@citrix.com>
Alloc 1GB large page for EPT if possible. It also contains the logic
to split large page into small ones (2M or 4K).
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
Acked-by: Tim Deegan <Tim.Deegan@citrix.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Tue Apr 06 07:14:56 2010 +0100 (2010-04-06) |
parents | b20f897d6010 |
children | adce8bc43fcc |
files | xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm/hap/p2m-ept.c xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/msr-index.h |
line diff
1.1 --- a/xen/arch/x86/hvm/hvm.c Tue Apr 06 07:13:19 2010 +0100 1.2 +++ b/xen/arch/x86/hvm/hvm.c Tue Apr 06 07:14:56 2010 +0100 1.3 @@ -966,6 +966,11 @@ bool_t hvm_hap_nested_page_fault(unsigne 1.4 /* Spurious fault? PoD and log-dirty also take this path. */ 1.5 if ( p2m_is_ram(p2mt) ) 1.6 { 1.7 + /* 1.8 + * Page log dirty is always done with order 0. If this mfn resides in 1.9 + * a large page, we do not change other pages type within that large 1.10 + * page. 1.11 + */ 1.12 paging_mark_dirty(current->domain, mfn_x(mfn)); 1.13 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); 1.14 return 1;
2.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Apr 06 07:13:19 2010 +0100 2.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Apr 06 07:14:56 2010 +0100 2.3 @@ -64,6 +64,7 @@ u32 vmx_cpu_based_exec_control __read_mo 2.4 u32 vmx_secondary_exec_control __read_mostly; 2.5 u32 vmx_vmexit_control __read_mostly; 2.6 u32 vmx_vmentry_control __read_mostly; 2.7 +u8 vmx_ept_super_page_level_limit __read_mostly; 2.8 bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly; 2.9 2.10 static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, host_vmcs); 2.11 @@ -183,6 +184,21 @@ static void vmx_init_vmcs_config(void) 2.12 _vmx_secondary_exec_control &= 2.13 ~(SECONDARY_EXEC_ENABLE_EPT | 2.14 SECONDARY_EXEC_UNRESTRICTED_GUEST); 2.15 + if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT ) 2.16 + { 2.17 + uint64_t cap; 2.18 + rdmsrl(MSR_IA32_VMX_EPT_VPID_CAP, cap); 2.19 + if ( cap & VMX_EPT_SUPER_PAGE_1G ) 2.20 + { 2.21 + vmx_ept_super_page_level_limit = 2; 2.22 + printk("EPT support 1G super page.\n"); 2.23 + } 2.24 + else if ( cap & VMX_EPT_SUPER_PAGE_2M ) 2.25 + { 2.26 + vmx_ept_super_page_level_limit = 1; 2.27 + printk("EPT support 2M super page.\n"); 2.28 + } 2.29 + } 2.30 } 2.31 2.32 if ( (_vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
3.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 06 07:13:19 2010 +0100 3.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 06 07:14:56 2010 +0100 3.3 @@ -1446,7 +1446,8 @@ void start_vmx(void) 3.4 if ( cpu_has_vmx_ept ) 3.5 vmx_function_table.hap_supported = 1; 3.6 3.7 - vmx_function_table.hap_1gb_pgtb = 0; 3.8 + vmx_function_table.hap_1gb_pgtb = ( vmx_ept_super_page_level_limit == 2 ) ? 3.9 + 1 : 0; 3.10 3.11 setup_vmcs_dump(); 3.12
4.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c Tue Apr 06 07:13:19 2010 +0100 4.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Tue Apr 06 07:14:56 2010 +0100 4.3 @@ -25,6 +25,7 @@ 4.4 #include <asm/domain.h> 4.5 #include <asm/p2m.h> 4.6 #include <asm/hvm/vmx/vmx.h> 4.7 +#include <asm/hvm/vmx/vmcs.h> 4.8 #include <xen/iommu.h> 4.9 #include <asm/mtrr.h> 4.10 #include <asm/hvm/cacheattr.h> 4.11 @@ -167,6 +168,61 @@ static int ept_next_level(struct domain 4.12 } 4.13 } 4.14 4.15 +/* It's super page before and we should break down it now. */ 4.16 +static int ept_split_large_page(struct domain *d, 4.17 + ept_entry_t **table, u32 *index, 4.18 + unsigned long gfn, int level) 4.19 +{ 4.20 + ept_entry_t *prev_table = *table; 4.21 + ept_entry_t *split_table = NULL; 4.22 + ept_entry_t *split_entry = NULL; 4.23 + ept_entry_t *ept_entry = (*table) + (*index); 4.24 + ept_entry_t temp_ept_entry; 4.25 + unsigned long s_gfn, s_mfn; 4.26 + unsigned long offset, trunk; 4.27 + int i; 4.28 + 4.29 + /* alloc new page for new ept middle level entry which is 4.30 + * before a leaf super entry 4.31 + */ 4.32 + 4.33 + if ( !ept_set_middle_entry(d, &temp_ept_entry) ) 4.34 + return 0; 4.35 + 4.36 + /* split the super page to small next level pages */ 4.37 + split_table = map_domain_page(temp_ept_entry.mfn); 4.38 + offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1); 4.39 + trunk = (1UL << ((level-1) * EPT_TABLE_ORDER)); 4.40 + 4.41 + for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ ) 4.42 + { 4.43 + s_gfn = gfn - offset + i * trunk; 4.44 + s_mfn = ept_entry->mfn + i * trunk; 4.45 + 4.46 + split_entry = split_table + i; 4.47 + split_entry->emt = ept_entry->emt; 4.48 + split_entry->ipat = ept_entry->ipat; 4.49 + 4.50 + split_entry->sp_avail = (level > 1) ? 1 : 0; 4.51 + 4.52 + split_entry->mfn = s_mfn; 4.53 + 4.54 + split_entry->avail1 = ept_entry->avail1; 4.55 + split_entry->avail2 = 0; 4.56 + /* last step */ 4.57 + split_entry->r = split_entry->w = split_entry->x = 1; 4.58 + ept_p2m_type_to_flags(split_entry, ept_entry->avail1); 4.59 + } 4.60 + 4.61 + *ept_entry = temp_ept_entry; 4.62 + 4.63 + *index = offset / trunk; 4.64 + *table = split_table; 4.65 + unmap_domain_page(prev_table); 4.66 + 4.67 + return 1; 4.68 +} 4.69 + 4.70 /* 4.71 * ept_set_entry() computes 'need_modify_vtd_table' for itself, 4.72 * by observing whether any gfn->mfn translations are modified. 4.73 @@ -183,14 +239,12 @@ ept_set_entry(struct domain *d, unsigned 4.74 int i; 4.75 int rv = 0; 4.76 int ret = 0; 4.77 + int split_level = 0; 4.78 int walk_level = order / EPT_TABLE_ORDER; 4.79 int direct_mmio = (p2mt == p2m_mmio_direct); 4.80 uint8_t ipat = 0; 4.81 int need_modify_vtd_table = 1; 4.82 4.83 - /* We only support 4k and 2m pages now */ 4.84 - BUG_ON(order && order != EPT_TABLE_ORDER); 4.85 - 4.86 if ( order != 0 ) 4.87 if ( (gfn & ((1UL << order) - 1)) ) 4.88 return 1; 4.89 @@ -208,16 +262,16 @@ ept_set_entry(struct domain *d, unsigned 4.90 break; 4.91 } 4.92 4.93 - /* If order == 9, we should never get SUPERPAGE or PoD. 4.94 - * If order == 0, we should only get POD if we have a POD superpage. 4.95 + /* If order == 0, we should only get POD if we have a POD superpage. 4.96 * If i > walk_level, we need to split the page; otherwise, 4.97 * just behave as normal. */ 4.98 - ASSERT(order == 0 || ret == GUEST_TABLE_NORMAL_PAGE); 4.99 ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level); 4.100 4.101 index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order); 4.102 offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1))); 4.103 4.104 + split_level = i; 4.105 + 4.106 ept_entry = table + index; 4.107 4.108 if ( i == walk_level ) 4.109 @@ -231,25 +285,10 @@ ept_set_entry(struct domain *d, unsigned 4.110 ept_entry->ipat = ipat; 4.111 ept_entry->sp_avail = order ? 1 : 0; 4.112 4.113 - if ( ret == GUEST_TABLE_SUPER_PAGE ) 4.114 - { 4.115 - if ( ept_entry->mfn == (mfn_x(mfn) - offset) ) 4.116 - need_modify_vtd_table = 0; 4.117 - else 4.118 - ept_entry->mfn = mfn_x(mfn) - offset; 4.119 - 4.120 - if ( (ept_entry->avail1 == p2m_ram_logdirty) 4.121 - && (p2mt == p2m_ram_rw) ) 4.122 - for ( i = 0; i < 512; i++ ) 4.123 - paging_mark_dirty(d, mfn_x(mfn) - offset + i); 4.124 - } 4.125 + if ( ept_entry->mfn == mfn_x(mfn) ) 4.126 + need_modify_vtd_table = 0; 4.127 else 4.128 - { 4.129 - if ( ept_entry->mfn == mfn_x(mfn) ) 4.130 - need_modify_vtd_table = 0; 4.131 - else 4.132 - ept_entry->mfn = mfn_x(mfn); 4.133 - } 4.134 + ept_entry->mfn = mfn_x(mfn); 4.135 4.136 ept_entry->avail1 = p2mt; 4.137 ept_entry->avail2 = 0; 4.138 @@ -261,51 +300,22 @@ ept_set_entry(struct domain *d, unsigned 4.139 } 4.140 else 4.141 { 4.142 - /* 4.143 - * It's super page before, now set one of the 4k pages, so 4.144 - * we should split the 2m page to 4k pages now. 4.145 - */ 4.146 - /* Pointers to / into new (split) middle-level table */ 4.147 - ept_entry_t *split_table = NULL; 4.148 - ept_entry_t *split_ept_entry = NULL; 4.149 - /* Info about old (superpage) table */ 4.150 - unsigned long super_mfn = ept_entry->mfn; 4.151 - p2m_type_t super_p2mt = ept_entry->avail1; 4.152 - /* The new l2 entry which we'll write after we've build the new l1 table */ 4.153 - ept_entry_t l2_ept_entry; 4.154 - 4.155 - /* 4.156 - * Allocate new page for new ept middle level entry which is 4.157 - * before a leaf super entry 4.158 - */ 4.159 - if ( !ept_set_middle_entry(d, &l2_ept_entry) ) 4.160 - goto out; 4.161 - 4.162 - /* Split the super page before to 4k pages */ 4.163 - split_table = map_domain_page(l2_ept_entry.mfn); 4.164 - offset = gfn & ((1 << EPT_TABLE_ORDER) - 1); 4.165 - 4.166 - for ( i = 0; i < 512; i++ ) 4.167 + int num = order / EPT_TABLE_ORDER; 4.168 + int level; 4.169 + ept_entry_t *split_ept_entry; 4.170 + 4.171 + if ( num >= cpu_vmx_ept_super_page_level_limit ) 4.172 + num = cpu_vmx_ept_super_page_level_limit; 4.173 + for ( level = split_level; level > num ; level-- ) 4.174 { 4.175 - split_ept_entry = split_table + i; 4.176 - split_ept_entry->emt = epte_get_entry_emt(d, gfn - offset + i, 4.177 - _mfn(super_mfn + i), 4.178 - &ipat, direct_mmio); 4.179 - split_ept_entry->ipat = ipat; 4.180 - split_ept_entry->sp_avail = 0; 4.181 - /* Don't increment mfn if it's a PoD mfn */ 4.182 - if ( super_p2mt != p2m_populate_on_demand ) 4.183 - split_ept_entry->mfn = super_mfn + i; 4.184 - else 4.185 - split_ept_entry->mfn = super_mfn; 4.186 - split_ept_entry->avail1 = super_p2mt; 4.187 - split_ept_entry->avail2 = 0; 4.188 - 4.189 - ept_p2m_type_to_flags(split_ept_entry, super_p2mt); 4.190 + rv = ept_split_large_page(d, &table, &index, gfn, level); 4.191 + if ( !rv ) 4.192 + goto out; 4.193 } 4.194 4.195 - /* Set the destinated 4k page as normal */ 4.196 - split_ept_entry = split_table + offset; 4.197 + split_ept_entry = table + index; 4.198 + split_ept_entry->avail1 = p2mt; 4.199 + ept_p2m_type_to_flags(split_ept_entry, p2mt); 4.200 split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, 4.201 direct_mmio); 4.202 split_ept_entry->ipat = ipat; 4.203 @@ -314,12 +324,6 @@ ept_set_entry(struct domain *d, unsigned 4.204 need_modify_vtd_table = 0; 4.205 else 4.206 split_ept_entry->mfn = mfn_x(mfn); 4.207 - 4.208 - split_ept_entry->avail1 = p2mt; 4.209 - ept_p2m_type_to_flags(split_ept_entry, p2mt); 4.210 - 4.211 - unmap_domain_page(split_table); 4.212 - *ept_entry = l2_ept_entry; 4.213 } 4.214 4.215 /* Track the highest gfn for which we have ever had a valid mapping */ 4.216 @@ -336,7 +340,7 @@ out: 4.217 ept_sync_domain(d); 4.218 4.219 /* Now the p2m table is not shared with vt-d page table */ 4.220 - if ( iommu_enabled && need_iommu(d) && need_modify_vtd_table ) 4.221 + if ( rv && iommu_enabled && need_iommu(d) && need_modify_vtd_table ) 4.222 { 4.223 if ( p2mt == p2m_ram_rw ) 4.224 { 4.225 @@ -459,7 +463,7 @@ out: 4.226 /* WARNING: Only caller doesn't care about PoD pages. So this function will 4.227 * always return 0 for PoD pages, not populate them. If that becomes necessary, 4.228 * pass a p2m_query_t type along to distinguish. */ 4.229 -static ept_entry_t ept_get_entry_content(struct domain *d, unsigned long gfn) 4.230 +static ept_entry_t ept_get_entry_content(struct domain *d, unsigned long gfn, int *level) 4.231 { 4.232 ept_entry_t *table = 4.233 map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 4.234 @@ -487,6 +491,7 @@ static ept_entry_t ept_get_entry_content 4.235 index = gfn_remainder >> (i * EPT_TABLE_ORDER); 4.236 ept_entry = table + index; 4.237 content = *ept_entry; 4.238 + *level = i; 4.239 4.240 out: 4.241 unmap_domain_page(table); 4.242 @@ -579,7 +584,10 @@ void ept_change_entry_emt_with_range(str 4.243 p2m_lock(d->arch.p2m); 4.244 for ( gfn = start_gfn; gfn <= end_gfn; gfn++ ) 4.245 { 4.246 - e = ept_get_entry_content(d, gfn); 4.247 + int level = 0; 4.248 + uint64_t trunk = 0; 4.249 + 4.250 + e = ept_get_entry_content(d, gfn, &level); 4.251 if ( !p2m_has_emt(e.avail1) ) 4.252 continue; 4.253 4.254 @@ -588,25 +596,24 @@ void ept_change_entry_emt_with_range(str 4.255 4.256 if ( e.sp_avail ) 4.257 { 4.258 - if ( !(gfn & ((1 << EPT_TABLE_ORDER) - 1)) && 4.259 - ((gfn + 0x1FF) <= end_gfn) ) 4.260 + while ( level ) 4.261 { 4.262 - /* 4.263 - * gfn assigned with 2M, and the end covers more than 2m areas. 4.264 - * Set emt for super page. 4.265 - */ 4.266 - order = EPT_TABLE_ORDER; 4.267 - if ( need_modify_ept_entry(d, gfn, mfn, e.ipat, e.emt, e.avail1) ) 4.268 - ept_set_entry(d, gfn, mfn, order, e.avail1); 4.269 - gfn += 0x1FF; 4.270 - } 4.271 - else 4.272 - { 4.273 - /* Change emt for partial entries of the 2m area. */ 4.274 - if ( need_modify_ept_entry(d, gfn, mfn, e.ipat, e.emt, e.avail1) ) 4.275 - ept_set_entry(d, gfn, mfn, order, e.avail1); 4.276 - gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF; 4.277 - } 4.278 + trunk = (1UL << (level * EPT_TABLE_ORDER)) - 1; 4.279 + if ( !(gfn & trunk) && (gfn + trunk <= end_gfn) ) 4.280 + { 4.281 + /* gfn assigned with 2M or 1G, and the end covers more than 4.282 + * the super page areas. 4.283 + * Set emt for super page. 4.284 + */ 4.285 + order = level * EPT_TABLE_ORDER; 4.286 + if ( need_modify_ept_entry(d, gfn, mfn, 4.287 + e.ipat, e.emt, e.avail1) ) 4.288 + ept_set_entry(d, gfn, mfn, order, e.avail1); 4.289 + gfn += trunk; 4.290 + break; 4.291 + } 4.292 + level--; 4.293 + } 4.294 } 4.295 else /* gfn assigned with 4k */ 4.296 {
5.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Apr 06 07:13:19 2010 +0100 5.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Apr 06 07:14:56 2010 +0100 5.3 @@ -176,6 +176,11 @@ extern u32 vmx_secondary_exec_control; 5.4 5.5 extern bool_t cpu_has_vmx_ins_outs_instr_info; 5.6 5.7 +extern u8 vmx_ept_super_page_level_limit; 5.8 + 5.9 +#define VMX_EPT_SUPER_PAGE_2M 0x00010000 5.10 +#define VMX_EPT_SUPER_PAGE_1G 0x00020000 5.11 + 5.12 #define cpu_has_wbinvd_exiting \ 5.13 (vmx_secondary_exec_control & SECONDARY_EXEC_WBINVD_EXITING) 5.14 #define cpu_has_vmx_virtualize_apic_accesses \ 5.15 @@ -203,6 +208,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr 5.16 SECONDARY_EXEC_UNRESTRICTED_GUEST) 5.17 #define cpu_has_vmx_ple \ 5.18 (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) 5.19 +#define cpu_vmx_ept_super_page_level_limit \ 5.20 + vmx_ept_super_page_level_limit 5.21 5.22 /* GUEST_INTERRUPTIBILITY_INFO flags. */ 5.23 #define VMX_INTR_SHADOW_STI 0x00000001
6.1 --- a/xen/include/asm-x86/msr-index.h Tue Apr 06 07:13:19 2010 +0100 6.2 +++ b/xen/include/asm-x86/msr-index.h Tue Apr 06 07:14:56 2010 +0100 6.3 @@ -166,6 +166,7 @@ 6.4 #define MSR_IA32_VMX_CR4_FIXED0 0x488 6.5 #define MSR_IA32_VMX_CR4_FIXED1 0x489 6.6 #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b 6.7 +#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c 6.8 #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x48d 6.9 #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x48e 6.10 #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x48f