debuggers.hg
changeset 13938:6daa91dc9247
[XEN] New paging-assistance interface.
Adds paging_* functions, wrapping the existing shadow_* ones,
so that later hardware paging assistance can be hooked in at this level.
Separates p2m maintenance code into its own files.
Adjusts shadow code to comply with the new interfaces.
Changes all callers in arch/x86 to use the new interfaces.
Signed-off-by: Wei Huang <Wei.Huang2@amd.com>
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
Adds paging_* functions, wrapping the existing shadow_* ones,
so that later hardware paging assistance can be hooked in at this level.
Separates p2m maintenance code into its own files.
Adjusts shadow code to comply with the new interfaces.
Changes all callers in arch/x86 to use the new interfaces.
Signed-off-by: Wei Huang <Wei.Huang2@amd.com>
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
line diff
1.1 --- a/xen/arch/x86/domain.c Tue Feb 13 15:32:25 2007 +0000 1.2 +++ b/xen/arch/x86/domain.c Wed Feb 14 12:02:20 2007 +0000 1.3 @@ -37,7 +37,7 @@ 1.4 #include <asm/i387.h> 1.5 #include <asm/mpspec.h> 1.6 #include <asm/ldt.h> 1.7 -#include <asm/shadow.h> 1.8 +#include <asm/paging.h> 1.9 #include <asm/hvm/hvm.h> 1.10 #include <asm/hvm/support.h> 1.11 #include <asm/msr.h> 1.12 @@ -331,6 +331,7 @@ int vcpu_initialise(struct vcpu *v) 1.13 1.14 pae_l3_cache_init(&v->arch.pae_l3_cache); 1.15 1.16 + paging_vcpu_init(v); 1.17 1.18 if ( is_hvm_domain(d) ) 1.19 { 1.20 @@ -424,7 +425,7 @@ int arch_domain_create(struct domain *d) 1.21 HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; 1.22 #endif 1.23 1.24 - shadow_domain_init(d); 1.25 + paging_domain_init(d); 1.26 1.27 if ( !is_idle_domain(d) ) 1.28 { 1.29 @@ -464,7 +465,7 @@ void arch_domain_destroy(struct domain * 1.30 hvm_domain_destroy(d); 1.31 } 1.32 1.33 - shadow_final_teardown(d); 1.34 + paging_final_teardown(d); 1.35 1.36 free_xenheap_pages( 1.37 d->arch.mm_perdomain_pt, 1.38 @@ -613,7 +614,7 @@ int arch_set_info_guest( 1.39 { 1.40 cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); 1.41 1.42 - if ( shadow_mode_refcounts(d) 1.43 + if ( paging_mode_refcounts(d) 1.44 ? !get_page(mfn_to_page(cr3_pfn), d) 1.45 : !get_page_and_type(mfn_to_page(cr3_pfn), d, 1.46 PGT_base_page_table) ) 1.47 @@ -631,7 +632,7 @@ int arch_set_info_guest( 1.48 1.49 cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); 1.50 1.51 - if ( shadow_mode_refcounts(d) 1.52 + if ( paging_mode_refcounts(d) 1.53 ? !get_page(mfn_to_page(cr3_pfn), d) 1.54 : !get_page_and_type(mfn_to_page(cr3_pfn), d, 1.55 PGT_l3_page_table) ) 1.56 @@ -652,8 +653,8 @@ int arch_set_info_guest( 1.57 /* Don't redo final setup */ 1.58 set_bit(_VCPUF_initialised, &v->vcpu_flags); 1.59 1.60 - if ( shadow_mode_enabled(d) ) 1.61 - shadow_update_paging_modes(v); 1.62 + if ( paging_mode_enabled(d) ) 1.63 + paging_update_paging_modes(v); 1.64 1.65 update_cr3(v); 1.66 1.67 @@ -1406,7 +1407,7 @@ static void vcpu_destroy_pagetables(stru 1.68 1.69 if ( pfn != 0 ) 1.70 { 1.71 - if ( shadow_mode_refcounts(d) ) 1.72 + if ( paging_mode_refcounts(d) ) 1.73 put_page(mfn_to_page(pfn)); 1.74 else 1.75 put_page_and_type(mfn_to_page(pfn)); 1.76 @@ -1427,7 +1428,7 @@ static void vcpu_destroy_pagetables(stru 1.77 pfn = pagetable_get_pfn(v->arch.guest_table); 1.78 if ( pfn != 0 ) 1.79 { 1.80 - if ( shadow_mode_refcounts(d) ) 1.81 + if ( paging_mode_refcounts(d) ) 1.82 put_page(mfn_to_page(pfn)); 1.83 else 1.84 put_page_and_type(mfn_to_page(pfn)); 1.85 @@ -1443,7 +1444,7 @@ static void vcpu_destroy_pagetables(stru 1.86 pfn = pagetable_get_pfn(v->arch.guest_table_user); 1.87 if ( pfn != 0 ) 1.88 { 1.89 - if ( shadow_mode_refcounts(d) ) 1.90 + if ( paging_mode_refcounts(d) ) 1.91 put_page(mfn_to_page(pfn)); 1.92 else 1.93 put_page_and_type(mfn_to_page(pfn)); 1.94 @@ -1464,8 +1465,8 @@ void domain_relinquish_resources(struct 1.95 for_each_vcpu ( d, v ) 1.96 vcpu_destroy_pagetables(v); 1.97 1.98 - /* Tear down shadow mode stuff. */ 1.99 - shadow_teardown(d); 1.100 + /* Tear down paging-assistance stuff. */ 1.101 + paging_teardown(d); 1.102 1.103 /* 1.104 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as 1.105 @@ -1484,35 +1485,12 @@ void domain_relinquish_resources(struct 1.106 1.107 void arch_dump_domain_info(struct domain *d) 1.108 { 1.109 - if ( shadow_mode_enabled(d) ) 1.110 - { 1.111 - printk(" shadow mode: "); 1.112 - if ( d->arch.shadow.mode & SHM2_enable ) 1.113 - printk("enabled "); 1.114 - if ( shadow_mode_refcounts(d) ) 1.115 - printk("refcounts "); 1.116 - if ( shadow_mode_log_dirty(d) ) 1.117 - printk("log_dirty "); 1.118 - if ( shadow_mode_translate(d) ) 1.119 - printk("translate "); 1.120 - if ( shadow_mode_external(d) ) 1.121 - printk("external "); 1.122 - printk("\n"); 1.123 - } 1.124 + paging_dump_domain_info(d); 1.125 } 1.126 1.127 void arch_dump_vcpu_info(struct vcpu *v) 1.128 { 1.129 - if ( shadow_mode_enabled(v->domain) ) 1.130 - { 1.131 - if ( v->arch.shadow.mode ) 1.132 - printk(" shadowed %u-on-%u, %stranslated\n", 1.133 - v->arch.shadow.mode->guest_levels, 1.134 - v->arch.shadow.mode->shadow_levels, 1.135 - shadow_vcpu_mode_translate(v) ? "" : "not "); 1.136 - else 1.137 - printk(" not shadowed\n"); 1.138 - } 1.139 + paging_dump_vcpu_info(v); 1.140 } 1.141 1.142 /*
2.1 --- a/xen/arch/x86/domain_build.c Tue Feb 13 15:32:25 2007 +0000 2.2 +++ b/xen/arch/x86/domain_build.c Wed Feb 14 12:02:20 2007 +0000 2.3 @@ -25,7 +25,7 @@ 2.4 #include <asm/processor.h> 2.5 #include <asm/desc.h> 2.6 #include <asm/i387.h> 2.7 -#include <asm/shadow.h> 2.8 +#include <asm/paging.h> 2.9 2.10 #include <public/version.h> 2.11 #include <public/libelf.h> 2.12 @@ -777,8 +777,8 @@ int construct_dom0(struct domain *d, 2.13 (void)alloc_vcpu(d, i, i); 2.14 2.15 /* Set up CR3 value for write_ptbase */ 2.16 - if ( shadow_mode_enabled(v->domain) ) 2.17 - shadow_update_paging_modes(v); 2.18 + if ( paging_mode_enabled(v->domain) ) 2.19 + paging_update_paging_modes(v); 2.20 else 2.21 update_cr3(v); 2.22 2.23 @@ -918,8 +918,8 @@ int construct_dom0(struct domain *d, 2.24 regs->eflags = X86_EFLAGS_IF; 2.25 2.26 if ( opt_dom0_shadow ) 2.27 - if ( shadow_enable(d, SHM2_enable) == 0 ) 2.28 - shadow_update_paging_modes(v); 2.29 + if ( paging_enable(d, PG_SH_enable) == 0 ) 2.30 + paging_update_paging_modes(v); 2.31 2.32 if ( supervisor_mode_kernel ) 2.33 {
3.1 --- a/xen/arch/x86/domctl.c Tue Feb 13 15:32:25 2007 +0000 3.2 +++ b/xen/arch/x86/domctl.c Wed Feb 14 12:02:20 2007 +0000 3.3 @@ -19,7 +19,7 @@ 3.4 #include <xen/trace.h> 3.5 #include <xen/console.h> 3.6 #include <xen/iocap.h> 3.7 -#include <asm/shadow.h> 3.8 +#include <asm/paging.h> 3.9 #include <asm/irq.h> 3.10 #include <asm/hvm/hvm.h> 3.11 #include <asm/hvm/support.h> 3.12 @@ -42,7 +42,7 @@ long arch_do_domctl( 3.13 d = get_domain_by_id(domctl->domain); 3.14 if ( d != NULL ) 3.15 { 3.16 - ret = shadow_domctl(d, 3.17 + ret = paging_domctl(d, 3.18 &domctl->u.shadow_op, 3.19 guest_handle_cast(u_domctl, void)); 3.20 put_domain(d);
4.1 --- a/xen/arch/x86/hvm/hvm.c Tue Feb 13 15:32:25 2007 +0000 4.2 +++ b/xen/arch/x86/hvm/hvm.c Wed Feb 14 12:02:20 2007 +0000 4.3 @@ -30,11 +30,10 @@ 4.4 #include <xen/hypercall.h> 4.5 #include <xen/guest_access.h> 4.6 #include <xen/event.h> 4.7 -#include <xen/shadow.h> 4.8 #include <asm/current.h> 4.9 #include <asm/e820.h> 4.10 #include <asm/io.h> 4.11 -#include <asm/shadow.h> 4.12 +#include <asm/paging.h> 4.13 #include <asm/regs.h> 4.14 #include <asm/cpufeature.h> 4.15 #include <asm/processor.h> 4.16 @@ -155,7 +154,7 @@ int hvm_domain_initialise(struct domain 4.17 spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); 4.18 spin_lock_init(&d->arch.hvm_domain.irq_lock); 4.19 4.20 - rc = shadow_enable(d, SHM2_refcounts|SHM2_translate|SHM2_external); 4.21 + rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); 4.22 if ( rc != 0 ) 4.23 return rc; 4.24 4.25 @@ -383,7 +382,7 @@ static int __hvm_copy(void *buf, paddr_t 4.26 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); 4.27 4.28 if ( virt ) 4.29 - mfn = get_mfn_from_gpfn(shadow_gva_to_gfn(current, addr)); 4.30 + mfn = get_mfn_from_gpfn(paging_gva_to_gfn(current, addr)); 4.31 else 4.32 mfn = get_mfn_from_gpfn(addr >> PAGE_SHIFT); 4.33 4.34 @@ -600,7 +599,7 @@ void hvm_do_hypercall(struct cpu_user_re 4.35 return; 4.36 } 4.37 4.38 - if ( current->arch.shadow.mode->guest_levels == 4 ) 4.39 + if ( current->arch.paging.mode->guest_levels == 4 ) 4.40 { 4.41 pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi, 4.42 pregs->rsi,
5.1 --- a/xen/arch/x86/hvm/io.c Tue Feb 13 15:32:25 2007 +0000 5.2 +++ b/xen/arch/x86/hvm/io.c Wed Feb 14 12:02:20 2007 +0000 5.3 @@ -32,7 +32,7 @@ 5.4 #include <asm/processor.h> 5.5 #include <asm/msr.h> 5.6 #include <asm/apic.h> 5.7 -#include <asm/shadow.h> 5.8 +#include <asm/paging.h> 5.9 #include <asm/hvm/hvm.h> 5.10 #include <asm/hvm/support.h> 5.11 #include <asm/hvm/vpt.h>
6.1 --- a/xen/arch/x86/hvm/platform.c Tue Feb 13 15:32:25 2007 +0000 6.2 +++ b/xen/arch/x86/hvm/platform.c Wed Feb 14 12:02:20 2007 +0000 6.3 @@ -21,7 +21,6 @@ 6.4 #include <xen/config.h> 6.5 #include <xen/types.h> 6.6 #include <xen/mm.h> 6.7 -#include <xen/shadow.h> 6.8 #include <xen/domain_page.h> 6.9 #include <asm/page.h> 6.10 #include <xen/event.h> 6.11 @@ -29,6 +28,7 @@ 6.12 #include <xen/sched.h> 6.13 #include <asm/regs.h> 6.14 #include <asm/x86_emulate.h> 6.15 +#include <asm/paging.h> 6.16 #include <asm/hvm/hvm.h> 6.17 #include <asm/hvm/support.h> 6.18 #include <asm/hvm/io.h> 6.19 @@ -809,7 +809,7 @@ void send_pio_req(unsigned long port, un 6.20 if ( value_is_ptr ) /* get physical address of data */ 6.21 { 6.22 if ( hvm_paging_enabled(current) ) 6.23 - p->data = shadow_gva_to_gpa(current, value); 6.24 + p->data = paging_gva_to_gpa(current, value); 6.25 else 6.26 p->data = value; /* guest VA == guest PA */ 6.27 } 6.28 @@ -865,7 +865,7 @@ static void send_mmio_req(unsigned char 6.29 if ( value_is_ptr ) 6.30 { 6.31 if ( hvm_paging_enabled(v) ) 6.32 - p->data = shadow_gva_to_gpa(v, value); 6.33 + p->data = paging_gva_to_gpa(v, value); 6.34 else 6.35 p->data = value; /* guest VA == guest PA */ 6.36 } 6.37 @@ -981,7 +981,7 @@ void handle_mmio(unsigned long gpa) 6.38 if ( ad_size == WORD ) 6.39 addr &= 0xFFFF; 6.40 addr += hvm_get_segment_base(v, x86_seg_es); 6.41 - if ( shadow_gva_to_gpa(v, addr) == gpa ) 6.42 + if ( paging_gva_to_gpa(v, addr) == gpa ) 6.43 { 6.44 enum x86_segment seg; 6.45
7.1 --- a/xen/arch/x86/hvm/svm/intr.c Tue Feb 13 15:32:25 2007 +0000 7.2 +++ b/xen/arch/x86/hvm/svm/intr.c Wed Feb 14 12:02:20 2007 +0000 7.3 @@ -24,10 +24,10 @@ 7.4 #include <xen/lib.h> 7.5 #include <xen/trace.h> 7.6 #include <xen/errno.h> 7.7 -#include <xen/shadow.h> 7.8 #include <asm/cpufeature.h> 7.9 #include <asm/processor.h> 7.10 #include <asm/msr.h> 7.11 +#include <asm/paging.h> 7.12 #include <asm/hvm/hvm.h> 7.13 #include <asm/hvm/io.h> 7.14 #include <asm/hvm/support.h>
8.1 --- a/xen/arch/x86/hvm/svm/svm.c Tue Feb 13 15:32:25 2007 +0000 8.2 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Feb 14 12:02:20 2007 +0000 8.3 @@ -29,7 +29,8 @@ 8.4 #include <xen/domain_page.h> 8.5 #include <asm/current.h> 8.6 #include <asm/io.h> 8.7 -#include <asm/shadow.h> 8.8 +#include <asm/paging.h> 8.9 +#include <asm/p2m.h> 8.10 #include <asm/regs.h> 8.11 #include <asm/cpufeature.h> 8.12 #include <asm/processor.h> 8.13 @@ -491,9 +492,6 @@ int svm_vmcb_restore(struct vcpu *v, str 8.14 v->arch.guest_table = pagetable_from_pfn(mfn); 8.15 if (old_base_mfn) 8.16 put_page(mfn_to_page(old_base_mfn)); 8.17 - /* 8.18 - * arch.shadow_table should now hold the next CR3 for shadow 8.19 - */ 8.20 v->arch.hvm_svm.cpu_cr3 = c->cr3; 8.21 } 8.22 8.23 @@ -560,7 +558,7 @@ int svm_vmcb_restore(struct vcpu *v, str 8.24 vmcb->sysenter_esp = c->sysenter_esp; 8.25 vmcb->sysenter_eip = c->sysenter_eip; 8.26 8.27 - shadow_update_paging_modes(v); 8.28 + paging_update_paging_modes(v); 8.29 return 0; 8.30 8.31 bad_cr3: 8.32 @@ -1095,7 +1093,7 @@ static int svm_do_page_fault(unsigned lo 8.33 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", 8.34 va, (unsigned long)current->arch.hvm_svm.vmcb->rip, 8.35 (unsigned long)regs->error_code); 8.36 - return shadow_fault(va, regs); 8.37 + return paging_fault(va, regs); 8.38 } 8.39 8.40 8.41 @@ -1730,7 +1728,7 @@ static int svm_set_cr0(unsigned long val 8.42 v->arch.guest_table = pagetable_from_pfn(mfn); 8.43 if ( old_base_mfn ) 8.44 put_page(mfn_to_page(old_base_mfn)); 8.45 - shadow_update_paging_modes(v); 8.46 + paging_update_paging_modes(v); 8.47 8.48 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 8.49 (unsigned long) (mfn << PAGE_SHIFT)); 8.50 @@ -1753,7 +1751,7 @@ static int svm_set_cr0(unsigned long val 8.51 svm_inject_exception(v, TRAP_gp_fault, 1, 0); 8.52 return 0; 8.53 } 8.54 - shadow_update_paging_modes(v); 8.55 + paging_update_paging_modes(v); 8.56 } 8.57 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) 8.58 { 8.59 @@ -1763,7 +1761,7 @@ static int svm_set_cr0(unsigned long val 8.60 clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); 8.61 } 8.62 /* we should take care of this kind of situation */ 8.63 - shadow_update_paging_modes(v); 8.64 + paging_update_paging_modes(v); 8.65 } 8.66 8.67 return 1; 8.68 @@ -1866,7 +1864,7 @@ static int mov_to_cr(int gpreg, int cr, 8.69 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); 8.70 if (mfn != pagetable_get_pfn(v->arch.guest_table)) 8.71 goto bad_cr3; 8.72 - shadow_update_cr3(v); 8.73 + paging_update_cr3(v); 8.74 } 8.75 else 8.76 { 8.77 @@ -1917,7 +1915,7 @@ static int mov_to_cr(int gpreg, int cr, 8.78 v->arch.guest_table = pagetable_from_pfn(mfn); 8.79 if ( old_base_mfn ) 8.80 put_page(mfn_to_page(old_base_mfn)); 8.81 - shadow_update_paging_modes(v); 8.82 + paging_update_paging_modes(v); 8.83 8.84 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 8.85 (unsigned long) (mfn << PAGE_SHIFT)); 8.86 @@ -1946,7 +1944,7 @@ static int mov_to_cr(int gpreg, int cr, 8.87 * all TLB entries except global entries. 8.88 */ 8.89 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) 8.90 - shadow_update_paging_modes(v); 8.91 + paging_update_paging_modes(v); 8.92 break; 8.93 8.94 case 8: 8.95 @@ -2289,7 +2287,7 @@ void svm_handle_invlpg(const short invlp 8.96 __update_guest_eip (vmcb, inst_len); 8.97 } 8.98 8.99 - shadow_invlpg(v, g_vaddr); 8.100 + paging_invlpg(v, g_vaddr); 8.101 } 8.102 8.103 8.104 @@ -2660,7 +2658,7 @@ void walk_shadow_and_guest_pt(unsigned l 8.105 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 8.106 paddr_t gpa; 8.107 8.108 - gpa = shadow_gva_to_gpa(current, gva); 8.109 + gpa = paging_gva_to_gpa(current, gva); 8.110 printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3); 8.111 if( !svm_paging_enabled(v) || mmio_space(gpa) ) 8.112 return; 8.113 @@ -2726,7 +2724,7 @@ asmlinkage void svm_vmexit_handler(struc 8.114 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 8.115 { 8.116 if (svm_paging_enabled(v) && 8.117 - !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2))) 8.118 + !mmio_space(paging_gva_to_gpa(current, vmcb->exitinfo2))) 8.119 { 8.120 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64"," 8.121 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", " 8.122 @@ -2736,7 +2734,7 @@ asmlinkage void svm_vmexit_handler(struc 8.123 (u64)vmcb->exitinfo1, 8.124 (u64)vmcb->exitinfo2, 8.125 (u64)vmcb->exitintinfo.bytes, 8.126 - (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2)); 8.127 + (u64)paging_gva_to_gpa(current, vmcb->exitinfo2)); 8.128 } 8.129 else 8.130 {
9.1 --- a/xen/arch/x86/hvm/svm/vmcb.c Tue Feb 13 15:32:25 2007 +0000 9.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Feb 14 12:02:20 2007 +0000 9.3 @@ -23,10 +23,10 @@ 9.4 #include <xen/mm.h> 9.5 #include <xen/lib.h> 9.6 #include <xen/errno.h> 9.7 -#include <xen/shadow.h> 9.8 #include <asm/cpufeature.h> 9.9 #include <asm/processor.h> 9.10 #include <asm/msr.h> 9.11 +#include <asm/paging.h> 9.12 #include <asm/hvm/hvm.h> 9.13 #include <asm/hvm/io.h> 9.14 #include <asm/hvm/support.h> 9.15 @@ -196,7 +196,7 @@ static int construct_vmcb(struct vcpu *v 9.16 read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); 9.17 vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK; 9.18 9.19 - shadow_update_paging_modes(v); 9.20 + paging_update_paging_modes(v); 9.21 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 9.22 9.23 arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
10.1 --- a/xen/arch/x86/hvm/vlapic.c Tue Feb 13 15:32:25 2007 +0000 10.2 +++ b/xen/arch/x86/hvm/vlapic.c Wed Feb 14 12:02:20 2007 +0000 10.3 @@ -22,7 +22,6 @@ 10.4 #include <xen/types.h> 10.5 #include <xen/mm.h> 10.6 #include <xen/xmalloc.h> 10.7 -#include <xen/shadow.h> 10.8 #include <xen/domain_page.h> 10.9 #include <asm/page.h> 10.10 #include <xen/event.h>
11.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Feb 13 15:32:25 2007 +0000 11.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Feb 14 12:02:20 2007 +0000 11.3 @@ -448,7 +448,7 @@ static void construct_vmcs(struct vcpu * 11.4 11.5 vmx_vmcs_exit(v); 11.6 11.7 - shadow_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ 11.8 + paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ 11.9 } 11.10 11.11 int vmx_create_vmcs(struct vcpu *v)
12.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Feb 13 15:32:25 2007 +0000 12.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Feb 14 12:02:20 2007 +0000 12.3 @@ -35,12 +35,13 @@ 12.4 #include <asm/types.h> 12.5 #include <asm/msr.h> 12.6 #include <asm/spinlock.h> 12.7 +#include <asm/paging.h> 12.8 +#include <asm/p2m.h> 12.9 #include <asm/hvm/hvm.h> 12.10 #include <asm/hvm/support.h> 12.11 #include <asm/hvm/vmx/vmx.h> 12.12 #include <asm/hvm/vmx/vmcs.h> 12.13 #include <asm/hvm/vmx/cpu.h> 12.14 -#include <asm/shadow.h> 12.15 #include <public/sched.h> 12.16 #include <public/hvm/ioreq.h> 12.17 #include <asm/hvm/vpic.h> 12.18 @@ -484,9 +485,6 @@ int vmx_vmcs_restore(struct vcpu *v, str 12.19 v->arch.guest_table = pagetable_from_pfn(mfn); 12.20 if (old_base_mfn) 12.21 put_page(mfn_to_page(old_base_mfn)); 12.22 - /* 12.23 - * arch.shadow_table should now hold the next CR3 for shadow 12.24 - */ 12.25 v->arch.hvm_vmx.cpu_cr3 = c->cr3; 12.26 } 12.27 12.28 @@ -556,7 +554,7 @@ int vmx_vmcs_restore(struct vcpu *v, str 12.29 12.30 vmx_vmcs_exit(v); 12.31 12.32 - shadow_update_paging_modes(v); 12.33 + paging_update_paging_modes(v); 12.34 return 0; 12.35 12.36 bad_cr3: 12.37 @@ -1126,7 +1124,7 @@ static int vmx_do_page_fault(unsigned lo 12.38 } 12.39 #endif 12.40 12.41 - result = shadow_fault(va, regs); 12.42 + result = paging_fault(va, regs); 12.43 12.44 TRACE_VMEXIT(2, result); 12.45 #if 0 12.46 @@ -1277,7 +1275,7 @@ static void vmx_do_invlpg(unsigned long 12.47 * We do the safest things first, then try to update the shadow 12.48 * copying from guest 12.49 */ 12.50 - shadow_invlpg(v, va); 12.51 + paging_invlpg(v, va); 12.52 } 12.53 12.54 12.55 @@ -1691,9 +1689,6 @@ static int vmx_world_restore(struct vcpu 12.56 v->arch.guest_table = pagetable_from_pfn(mfn); 12.57 if (old_base_mfn) 12.58 put_page(mfn_to_page(old_base_mfn)); 12.59 - /* 12.60 - * arch.shadow_table should now hold the next CR3 for shadow 12.61 - */ 12.62 v->arch.hvm_vmx.cpu_cr3 = c->cr3; 12.63 } 12.64 12.65 @@ -1753,7 +1748,7 @@ static int vmx_world_restore(struct vcpu 12.66 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); 12.67 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes); 12.68 12.69 - shadow_update_paging_modes(v); 12.70 + paging_update_paging_modes(v); 12.71 return 0; 12.72 12.73 bad_cr3: 12.74 @@ -1906,14 +1901,11 @@ static int vmx_set_cr0(unsigned long val 12.75 v->arch.guest_table = pagetable_from_pfn(mfn); 12.76 if (old_base_mfn) 12.77 put_page(mfn_to_page(old_base_mfn)); 12.78 - shadow_update_paging_modes(v); 12.79 + paging_update_paging_modes(v); 12.80 12.81 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 12.82 (unsigned long) (mfn << PAGE_SHIFT)); 12.83 12.84 - /* 12.85 - * arch->shadow_table should hold the next CR3 for shadow 12.86 - */ 12.87 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 12.88 v->arch.hvm_vmx.cpu_cr3, mfn); 12.89 } 12.90 @@ -1981,7 +1973,7 @@ static int vmx_set_cr0(unsigned long val 12.91 vm_entry_value &= ~VM_ENTRY_IA32E_MODE; 12.92 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); 12.93 } 12.94 - shadow_update_paging_modes(v); 12.95 + paging_update_paging_modes(v); 12.96 } 12.97 12.98 return 1; 12.99 @@ -2070,7 +2062,7 @@ static int mov_to_cr(int gp, int cr, str 12.100 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); 12.101 if (mfn != pagetable_get_pfn(v->arch.guest_table)) 12.102 goto bad_cr3; 12.103 - shadow_update_cr3(v); 12.104 + paging_update_cr3(v); 12.105 } else { 12.106 /* 12.107 * If different, make a shadow. Check if the PDBR is valid 12.108 @@ -2084,9 +2076,6 @@ static int mov_to_cr(int gp, int cr, str 12.109 v->arch.guest_table = pagetable_from_pfn(mfn); 12.110 if (old_base_mfn) 12.111 put_page(mfn_to_page(old_base_mfn)); 12.112 - /* 12.113 - * arch.shadow_table should now hold the next CR3 for shadow 12.114 - */ 12.115 v->arch.hvm_vmx.cpu_cr3 = value; 12.116 update_cr3(v); 12.117 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); 12.118 @@ -2120,9 +2109,6 @@ static int mov_to_cr(int gp, int cr, str 12.119 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 12.120 (unsigned long) (mfn << PAGE_SHIFT)); 12.121 12.122 - /* 12.123 - * arch->shadow_table should hold the next CR3 for shadow 12.124 - */ 12.125 HVM_DBG_LOG(DBG_LEVEL_VMMU, 12.126 "Update CR3 value = %lx, mfn = %lx", 12.127 v->arch.hvm_vmx.cpu_cr3, mfn); 12.128 @@ -2148,7 +2134,7 @@ static int mov_to_cr(int gp, int cr, str 12.129 * all TLB entries except global entries. 12.130 */ 12.131 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) 12.132 - shadow_update_paging_modes(v); 12.133 + paging_update_paging_modes(v); 12.134 break; 12.135 12.136 case 8:
13.1 --- a/xen/arch/x86/mm.c Tue Feb 13 15:32:25 2007 +0000 13.2 +++ b/xen/arch/x86/mm.c Wed Feb 14 12:02:20 2007 +0000 13.3 @@ -99,6 +99,7 @@ 13.4 #include <xen/event.h> 13.5 #include <xen/iocap.h> 13.6 #include <xen/guest_access.h> 13.7 +#include <asm/paging.h> 13.8 #include <asm/shadow.h> 13.9 #include <asm/page.h> 13.10 #include <asm/flushtlb.h> 13.11 @@ -385,9 +386,9 @@ void update_cr3(struct vcpu *v) 13.12 { 13.13 unsigned long cr3_mfn=0; 13.14 13.15 - if ( shadow_mode_enabled(v->domain) ) 13.16 + if ( paging_mode_enabled(v->domain) ) 13.17 { 13.18 - shadow_update_cr3(v); 13.19 + paging_update_cr3(v); 13.20 return; 13.21 } 13.22 13.23 @@ -615,7 +616,7 @@ get_page_from_l1e( 13.24 * qemu-dm helper process in dom0 to map the domain's memory without 13.25 * messing up the count of "real" writable mappings.) */ 13.26 okay = (((l1e_get_flags(l1e) & _PAGE_RW) && 13.27 - !(unlikely(shadow_mode_external(d) && (d != current->domain)))) 13.28 + !(unlikely(paging_mode_external(d) && (d != current->domain)))) 13.29 ? get_page_and_type(page, d, PGT_writable_page) 13.30 : get_page(page, d)); 13.31 if ( !okay ) 13.32 @@ -804,9 +805,9 @@ void put_page_from_l1e(l1_pgentry_t l1e, 13.33 } 13.34 13.35 /* Remember we didn't take a type-count of foreign writable mappings 13.36 - * to shadow external domains */ 13.37 + * to paging-external domains */ 13.38 if ( (l1e_get_flags(l1e) & _PAGE_RW) && 13.39 - !(unlikely((e != d) && shadow_mode_external(e))) ) 13.40 + !(unlikely((e != d) && paging_mode_external(e))) ) 13.41 { 13.42 put_page_and_type(page); 13.43 } 13.44 @@ -1259,20 +1260,13 @@ static inline int update_intpte(intpte_t 13.45 { 13.46 int rv = 1; 13.47 #ifndef PTE_UPDATE_WITH_CMPXCHG 13.48 - if ( unlikely(shadow_mode_enabled(v->domain)) ) 13.49 - rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); 13.50 - else 13.51 - rv = (!__copy_to_user(p, &new, sizeof(new))); 13.52 + rv = paging_write_guest_entry(v, p, new, _mfn(mfn)); 13.53 #else 13.54 { 13.55 intpte_t t = old; 13.56 for ( ; ; ) 13.57 { 13.58 - if ( unlikely(shadow_mode_enabled(v->domain)) ) 13.59 - rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); 13.60 - else 13.61 - rv = (!cmpxchg_user(p, t, new)); 13.62 - 13.63 + rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); 13.64 if ( unlikely(rv == 0) ) 13.65 { 13.66 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte 13.67 @@ -1310,7 +1304,7 @@ static int mod_l1_entry(l1_pgentry_t *pl 13.68 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) 13.69 return 0; 13.70 13.71 - if ( unlikely(shadow_mode_refcounts(d)) ) 13.72 + if ( unlikely(paging_mode_refcounts(d)) ) 13.73 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); 13.74 13.75 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) 13.76 @@ -1572,7 +1566,7 @@ void free_page_type(struct page_info *pa 13.77 */ 13.78 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); 13.79 13.80 - if ( unlikely(shadow_mode_enabled(owner)) ) 13.81 + if ( unlikely(paging_mode_enabled(owner)) ) 13.82 { 13.83 /* A page table is dirtied when its type count becomes zero. */ 13.84 mark_dirty(owner, page_to_mfn(page)); 13.85 @@ -1771,7 +1765,7 @@ int new_guest_cr3(unsigned long mfn) 13.86 #ifdef CONFIG_COMPAT 13.87 if ( IS_COMPAT(d) ) 13.88 { 13.89 - okay = shadow_mode_refcounts(d) 13.90 + okay = paging_mode_refcounts(d) 13.91 ? 0 /* Old code was broken, but what should it be? */ 13.92 : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), 13.93 l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW| 13.94 @@ -1788,7 +1782,7 @@ int new_guest_cr3(unsigned long mfn) 13.95 return 1; 13.96 } 13.97 #endif 13.98 - okay = shadow_mode_refcounts(d) 13.99 + okay = paging_mode_refcounts(d) 13.100 ? get_page_from_pagenr(mfn, d) 13.101 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); 13.102 if ( unlikely(!okay) ) 13.103 @@ -1808,7 +1802,7 @@ int new_guest_cr3(unsigned long mfn) 13.104 13.105 if ( likely(old_base_mfn != 0) ) 13.106 { 13.107 - if ( shadow_mode_refcounts(d) ) 13.108 + if ( paging_mode_refcounts(d) ) 13.109 put_page(mfn_to_page(old_base_mfn)); 13.110 else 13.111 put_page_and_type(mfn_to_page(old_base_mfn)); 13.112 @@ -1861,7 +1855,7 @@ static int set_foreigndom(domid_t domid) 13.113 d->domain_id); 13.114 okay = 0; 13.115 } 13.116 - else if ( unlikely(shadow_mode_translate(d)) ) 13.117 + else if ( unlikely(paging_mode_translate(d)) ) 13.118 { 13.119 MEM_LOG("Cannot mix foreign mappings with translated domains"); 13.120 okay = 0; 13.121 @@ -2007,7 +2001,7 @@ int do_mmuext_op( 13.122 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) 13.123 break; 13.124 13.125 - if ( shadow_mode_refcounts(FOREIGNDOM) ) 13.126 + if ( paging_mode_refcounts(FOREIGNDOM) ) 13.127 break; 13.128 13.129 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); 13.130 @@ -2032,7 +2026,7 @@ int do_mmuext_op( 13.131 break; 13.132 13.133 case MMUEXT_UNPIN_TABLE: 13.134 - if ( shadow_mode_refcounts(d) ) 13.135 + if ( paging_mode_refcounts(d) ) 13.136 break; 13.137 13.138 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) 13.139 @@ -2070,7 +2064,7 @@ int do_mmuext_op( 13.140 } 13.141 if (likely(mfn != 0)) 13.142 { 13.143 - if ( shadow_mode_refcounts(d) ) 13.144 + if ( paging_mode_refcounts(d) ) 13.145 okay = get_page_from_pagenr(mfn, d); 13.146 else 13.147 okay = get_page_and_type_from_pagenr( 13.148 @@ -2087,7 +2081,7 @@ int do_mmuext_op( 13.149 v->arch.guest_table_user = pagetable_from_pfn(mfn); 13.150 if ( old_mfn != 0 ) 13.151 { 13.152 - if ( shadow_mode_refcounts(d) ) 13.153 + if ( paging_mode_refcounts(d) ) 13.154 put_page(mfn_to_page(old_mfn)); 13.155 else 13.156 put_page_and_type(mfn_to_page(old_mfn)); 13.157 @@ -2101,8 +2095,8 @@ int do_mmuext_op( 13.158 break; 13.159 13.160 case MMUEXT_INVLPG_LOCAL: 13.161 - if ( !shadow_mode_enabled(d) 13.162 - || shadow_invlpg(v, op.arg1.linear_addr) != 0 ) 13.163 + if ( !paging_mode_enabled(d) 13.164 + || paging_invlpg(v, op.arg1.linear_addr) != 0 ) 13.165 local_flush_tlb_one(op.arg1.linear_addr); 13.166 break; 13.167 13.168 @@ -2149,7 +2143,7 @@ int do_mmuext_op( 13.169 unsigned long ptr = op.arg1.linear_addr; 13.170 unsigned long ents = op.arg2.nr_ents; 13.171 13.172 - if ( shadow_mode_external(d) ) 13.173 + if ( paging_mode_external(d) ) 13.174 { 13.175 MEM_LOG("ignoring SET_LDT hypercall from external " 13.176 "domain %u", d->domain_id); 13.177 @@ -2298,9 +2292,9 @@ int do_mmu_update( 13.178 case PGT_l3_page_table: 13.179 case PGT_l4_page_table: 13.180 { 13.181 - if ( shadow_mode_refcounts(d) ) 13.182 + if ( paging_mode_refcounts(d) ) 13.183 { 13.184 - MEM_LOG("mmu update on shadow-refcounted domain!"); 13.185 + MEM_LOG("mmu update on auto-refcounted domain!"); 13.186 break; 13.187 } 13.188 13.189 @@ -2351,13 +2345,7 @@ int do_mmu_update( 13.190 if ( unlikely(!get_page_type(page, PGT_writable_page)) ) 13.191 break; 13.192 13.193 - if ( unlikely(shadow_mode_enabled(d)) ) 13.194 - okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); 13.195 - else 13.196 - { 13.197 - *(intpte_t *)va = req.val; 13.198 - okay = 1; 13.199 - } 13.200 + okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); 13.201 13.202 put_page_type(page); 13.203 } 13.204 @@ -2380,9 +2368,9 @@ int do_mmu_update( 13.205 break; 13.206 } 13.207 13.208 - if ( unlikely(shadow_mode_translate(FOREIGNDOM)) ) 13.209 + if ( unlikely(paging_mode_translate(FOREIGNDOM)) ) 13.210 { 13.211 - MEM_LOG("Mach-phys update on shadow-translate guest"); 13.212 + MEM_LOG("Mach-phys update on auto-translate guest"); 13.213 break; 13.214 } 13.215 13.216 @@ -2472,7 +2460,7 @@ static int create_grant_pte_mapping( 13.217 goto failed; 13.218 } 13.219 13.220 - if ( !shadow_mode_refcounts(d) ) 13.221 + if ( !paging_mode_refcounts(d) ) 13.222 put_page_from_l1e(ol1e, d); 13.223 13.224 put_page_type(page); 13.225 @@ -2578,7 +2566,7 @@ static int create_grant_va_mapping( 13.226 if ( !okay ) 13.227 return GNTST_general_error; 13.228 13.229 - if ( !shadow_mode_refcounts(d) ) 13.230 + if ( !paging_mode_refcounts(d) ) 13.231 put_page_from_l1e(ol1e, d); 13.232 13.233 return GNTST_okay; 13.234 @@ -2704,7 +2692,7 @@ int do_update_va_mapping(unsigned long v 13.235 13.236 perfc_incrc(calls_to_update_va); 13.237 13.238 - if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) 13.239 + if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) ) 13.240 return -EINVAL; 13.241 13.242 LOCK_BIGLOCK(d); 13.243 @@ -2744,8 +2732,8 @@ int do_update_va_mapping(unsigned long v 13.244 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) 13.245 { 13.246 case UVMF_LOCAL: 13.247 - if ( !shadow_mode_enabled(d) 13.248 - || (shadow_invlpg(current, va) != 0) ) 13.249 + if ( !paging_mode_enabled(d) 13.250 + || (paging_invlpg(current, va) != 0) ) 13.251 local_flush_tlb_one(va); 13.252 break; 13.253 case UVMF_ALL: 13.254 @@ -2980,7 +2968,7 @@ long arch_memory_op(int op, XEN_GUEST_HA 13.255 break; 13.256 } 13.257 13.258 - if ( !shadow_mode_translate(d) || (mfn == 0) ) 13.259 + if ( !paging_mode_translate(d) || (mfn == 0) ) 13.260 { 13.261 put_domain(d); 13.262 return -EINVAL; 13.263 @@ -3235,17 +3223,12 @@ static int ptwr_emulated_update( 13.264 if ( do_cmpxchg ) 13.265 { 13.266 int okay; 13.267 + intpte_t t = old; 13.268 ol1e = l1e_from_intpte(old); 13.269 13.270 - if ( shadow_mode_enabled(d) ) 13.271 - { 13.272 - intpte_t t = old; 13.273 - okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 13.274 - &t, val, _mfn(mfn)); 13.275 - okay = (okay && t == old); 13.276 - } 13.277 - else 13.278 - okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); 13.279 + okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 13.280 + &t, val, _mfn(mfn)); 13.281 + okay = (okay && t == old); 13.282 13.283 if ( !okay ) 13.284 {
14.1 --- a/xen/arch/x86/mm/Makefile Tue Feb 13 15:32:25 2007 +0000 14.2 +++ b/xen/arch/x86/mm/Makefile Wed Feb 14 12:02:20 2007 +0000 14.3 @@ -1,1 +1,4 @@ 14.4 subdir-y += shadow 14.5 + 14.6 +obj-y += paging.o 14.7 +obj-y += p2m.o
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/xen/arch/x86/mm/p2m.c Wed Feb 14 12:02:20 2007 +0000 15.3 @@ -0,0 +1,699 @@ 15.4 +/****************************************************************************** 15.5 + * arch/x86/mm/p2m.c 15.6 + * 15.7 + * physical-to-machine mappings for automatically-translated domains. 15.8 + * 15.9 + * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. 15.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 15.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 15.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 15.13 + * 15.14 + * This program is free software; you can redistribute it and/or modify 15.15 + * it under the terms of the GNU General Public License as published by 15.16 + * the Free Software Foundation; either version 2 of the License, or 15.17 + * (at your option) any later version. 15.18 + * 15.19 + * This program is distributed in the hope that it will be useful, 15.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15.22 + * GNU General Public License for more details. 15.23 + * 15.24 + * You should have received a copy of the GNU General Public License 15.25 + * along with this program; if not, write to the Free Software 15.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 15.27 + */ 15.28 + 15.29 +#include <asm/domain.h> 15.30 +#include <asm/page.h> 15.31 +#include <asm/paging.h> 15.32 +#include <asm/p2m.h> 15.33 + 15.34 +/* Debugging and auditing of the P2M code? */ 15.35 +#define P2M_AUDIT 0 15.36 +#define P2M_DEBUGGING 1 15.37 + 15.38 +/* The P2M lock. This protects all updates to the p2m table. 15.39 + * Updates are expected to be safe against concurrent reads, 15.40 + * which do *not* require the lock */ 15.41 + 15.42 +#define p2m_lock_init(_d) \ 15.43 + do { \ 15.44 + spin_lock_init(&(_d)->arch.p2m.lock); \ 15.45 + (_d)->arch.p2m.locker = -1; \ 15.46 + (_d)->arch.p2m.locker_function = "nobody"; \ 15.47 + } while (0) 15.48 + 15.49 +#define p2m_lock(_d) \ 15.50 + do { \ 15.51 + if ( unlikely((_d)->arch.p2m.locker == current->processor) )\ 15.52 + { \ 15.53 + printk("Error: p2m lock held by %s\n", \ 15.54 + (_d)->arch.p2m.locker_function); \ 15.55 + BUG(); \ 15.56 + } \ 15.57 + spin_lock(&(_d)->arch.p2m.lock); \ 15.58 + ASSERT((_d)->arch.p2m.locker == -1); \ 15.59 + (_d)->arch.p2m.locker = current->processor; \ 15.60 + (_d)->arch.p2m.locker_function = __func__; \ 15.61 + } while (0) 15.62 + 15.63 +#define p2m_unlock(_d) \ 15.64 + do { \ 15.65 + ASSERT((_d)->arch.p2m.locker == current->processor); \ 15.66 + (_d)->arch.p2m.locker = -1; \ 15.67 + (_d)->arch.p2m.locker_function = "nobody"; \ 15.68 + spin_unlock(&(_d)->arch.p2m.lock); \ 15.69 + } while (0) 15.70 + 15.71 + 15.72 + 15.73 +/* Printouts */ 15.74 +#define P2M_PRINTK(_f, _a...) \ 15.75 + debugtrace_printk("p2m: %s(): " _f, __func__, ##_a) 15.76 +#define P2M_ERROR(_f, _a...) \ 15.77 + printk("pg error: %s(): " _f, __func__, ##_a) 15.78 +#if P2M_DEBUGGING 15.79 +#define P2M_DEBUG(_f, _a...) \ 15.80 + debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a) 15.81 +#else 15.82 +#define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0) 15.83 +#endif 15.84 + 15.85 + 15.86 +/* Override macros from asm/page.h to make them work with mfn_t */ 15.87 +#undef mfn_to_page 15.88 +#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 15.89 +#undef mfn_valid 15.90 +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 15.91 +#undef page_to_mfn 15.92 +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 15.93 + 15.94 + 15.95 + 15.96 +// Find the next level's P2M entry, checking for out-of-range gfn's... 15.97 +// Returns NULL on error. 15.98 +// 15.99 +static l1_pgentry_t * 15.100 +p2m_find_entry(void *table, unsigned long *gfn_remainder, 15.101 + unsigned long gfn, u32 shift, u32 max) 15.102 +{ 15.103 + u32 index; 15.104 + 15.105 + index = *gfn_remainder >> shift; 15.106 + if ( index >= max ) 15.107 + { 15.108 + P2M_DEBUG("gfn=0x%lx out of range " 15.109 + "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", 15.110 + gfn, *gfn_remainder, shift, index, max); 15.111 + return NULL; 15.112 + } 15.113 + *gfn_remainder &= (1 << shift) - 1; 15.114 + return (l1_pgentry_t *)table + index; 15.115 +} 15.116 + 15.117 +// Walk one level of the P2M table, allocating a new table if required. 15.118 +// Returns 0 on error. 15.119 +// 15.120 +static int 15.121 +p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 15.122 + unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 15.123 + u32 max, unsigned long type) 15.124 +{ 15.125 + l1_pgentry_t *p2m_entry; 15.126 + l1_pgentry_t new_entry; 15.127 + void *next; 15.128 + ASSERT(d->arch.p2m.alloc_page); 15.129 + 15.130 + if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, 15.131 + shift, max)) ) 15.132 + return 0; 15.133 + 15.134 + if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) 15.135 + { 15.136 + struct page_info *pg = d->arch.p2m.alloc_page(d); 15.137 + if ( pg == NULL ) 15.138 + return 0; 15.139 + list_add_tail(&pg->list, &d->arch.p2m.pages); 15.140 + pg->u.inuse.type_info = type | 1 | PGT_validated; 15.141 + pg->count_info = 1; 15.142 + 15.143 + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), 15.144 + __PAGE_HYPERVISOR|_PAGE_USER); 15.145 + 15.146 + switch ( type ) { 15.147 + case PGT_l3_page_table: 15.148 + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 4); 15.149 + break; 15.150 + case PGT_l2_page_table: 15.151 + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 3); 15.152 + break; 15.153 + case PGT_l1_page_table: 15.154 + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 2); 15.155 + break; 15.156 + default: 15.157 + BUG(); 15.158 + break; 15.159 + } 15.160 + } 15.161 + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); 15.162 + next = map_domain_page(mfn_x(*table_mfn)); 15.163 + unmap_domain_page(*table); 15.164 + *table = next; 15.165 + 15.166 + return 1; 15.167 +} 15.168 + 15.169 +// Returns 0 on error (out of memory) 15.170 +static int 15.171 +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) 15.172 +{ 15.173 + // XXX -- this might be able to be faster iff current->domain == d 15.174 + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); 15.175 + void *table =map_domain_page(mfn_x(table_mfn)); 15.176 + unsigned long gfn_remainder = gfn; 15.177 + l1_pgentry_t *p2m_entry; 15.178 + l1_pgentry_t entry_content; 15.179 + int rv=0; 15.180 + 15.181 +#if CONFIG_PAGING_LEVELS >= 4 15.182 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 15.183 + L4_PAGETABLE_SHIFT - PAGE_SHIFT, 15.184 + L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) 15.185 + goto out; 15.186 +#endif 15.187 +#if CONFIG_PAGING_LEVELS >= 3 15.188 + // When using PAE Xen, we only allow 33 bits of pseudo-physical 15.189 + // address in translated guests (i.e. 8 GBytes). This restriction 15.190 + // comes from wanting to map the P2M table into the 16MB RO_MPT hole 15.191 + // in Xen's address space for translated PV guests. 15.192 + // 15.193 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 15.194 + L3_PAGETABLE_SHIFT - PAGE_SHIFT, 15.195 + (CONFIG_PAGING_LEVELS == 3 15.196 + ? 8 15.197 + : L3_PAGETABLE_ENTRIES), 15.198 + PGT_l2_page_table) ) 15.199 + goto out; 15.200 +#endif 15.201 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 15.202 + L2_PAGETABLE_SHIFT - PAGE_SHIFT, 15.203 + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) 15.204 + goto out; 15.205 + 15.206 + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 15.207 + 0, L1_PAGETABLE_ENTRIES); 15.208 + ASSERT(p2m_entry); 15.209 + 15.210 + /* Track the highest gfn for which we have ever had a valid mapping */ 15.211 + if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) 15.212 + d->arch.p2m.max_mapped_pfn = gfn; 15.213 + 15.214 + if ( mfn_valid(mfn) ) 15.215 + entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 15.216 + else 15.217 + entry_content = l1e_empty(); 15.218 + 15.219 + /* level 1 entry */ 15.220 + paging_write_p2m_entry(d, gfn, p2m_entry, entry_content, 1); 15.221 + 15.222 + /* Success */ 15.223 + rv = 1; 15.224 + 15.225 + out: 15.226 + unmap_domain_page(table); 15.227 + return rv; 15.228 +} 15.229 + 15.230 + 15.231 +/* Init the datastructures for later use by the p2m code */ 15.232 +void p2m_init(struct domain *d) 15.233 +{ 15.234 + p2m_lock_init(d); 15.235 + INIT_LIST_HEAD(&d->arch.p2m.pages); 15.236 +} 15.237 + 15.238 + 15.239 +// Allocate a new p2m table for a domain. 15.240 +// 15.241 +// The structure of the p2m table is that of a pagetable for xen (i.e. it is 15.242 +// controlled by CONFIG_PAGING_LEVELS). 15.243 +// 15.244 +// The alloc_page and free_page functions will be used to get memory to 15.245 +// build the p2m, and to release it again at the end of day. 15.246 +// 15.247 +// Returns 0 for success or -errno. 15.248 +// 15.249 +int p2m_alloc_table(struct domain *d, 15.250 + struct page_info * (*alloc_page)(struct domain *d), 15.251 + void (*free_page)(struct domain *d, struct page_info *pg)) 15.252 + 15.253 +{ 15.254 + mfn_t mfn; 15.255 + struct list_head *entry; 15.256 + struct page_info *page, *p2m_top; 15.257 + unsigned int page_count = 0; 15.258 + unsigned long gfn; 15.259 + 15.260 + p2m_lock(d); 15.261 + 15.262 + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) 15.263 + { 15.264 + P2M_ERROR("p2m already allocated for this domain\n"); 15.265 + p2m_unlock(d); 15.266 + return -EINVAL; 15.267 + } 15.268 + 15.269 + P2M_PRINTK("allocating p2m table\n"); 15.270 + 15.271 + d->arch.p2m.alloc_page = alloc_page; 15.272 + d->arch.p2m.free_page = free_page; 15.273 + 15.274 + p2m_top = d->arch.p2m.alloc_page(d); 15.275 + if ( p2m_top == NULL ) 15.276 + { 15.277 + p2m_unlock(d); 15.278 + return -ENOMEM; 15.279 + } 15.280 +list_add_tail(&p2m_top->list, &d->arch.p2m.pages); 15.281 + 15.282 + p2m_top->count_info = 1; 15.283 + p2m_top->u.inuse.type_info = 15.284 +#if CONFIG_PAGING_LEVELS == 4 15.285 + PGT_l4_page_table 15.286 +#elif CONFIG_PAGING_LEVELS == 3 15.287 + PGT_l3_page_table 15.288 +#elif CONFIG_PAGING_LEVELS == 2 15.289 + PGT_l2_page_table 15.290 +#endif 15.291 + | 1 | PGT_validated; 15.292 + 15.293 + d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); 15.294 + 15.295 + P2M_PRINTK("populating p2m table\n"); 15.296 + 15.297 + /* Initialise physmap tables for slot zero. Other code assumes this. */ 15.298 + gfn = 0; 15.299 +mfn = _mfn(INVALID_MFN); 15.300 + if ( !set_p2m_entry(d, gfn, mfn) ) 15.301 + goto error; 15.302 + 15.303 + for ( entry = d->page_list.next; 15.304 + entry != &d->page_list; 15.305 + entry = entry->next ) 15.306 + { 15.307 + page = list_entry(entry, struct page_info, list); 15.308 + mfn = page_to_mfn(page); 15.309 + gfn = get_gpfn_from_mfn(mfn_x(mfn)); 15.310 + page_count++; 15.311 + if ( 15.312 +#ifdef __x86_64__ 15.313 + (gfn != 0x5555555555555555L) 15.314 +#else 15.315 + (gfn != 0x55555555L) 15.316 +#endif 15.317 + && gfn != INVALID_M2P_ENTRY 15.318 + && !set_p2m_entry(d, gfn, mfn) ) 15.319 + goto error; 15.320 + } 15.321 + 15.322 + P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); 15.323 + p2m_unlock(d); 15.324 + return 0; 15.325 + 15.326 + error: 15.327 + P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" 15.328 + PRI_mfn "\n", gfn, mfn_x(mfn)); 15.329 + p2m_unlock(d); 15.330 + return -ENOMEM; 15.331 +} 15.332 + 15.333 +void p2m_teardown(struct domain *d) 15.334 +/* Return all the p2m pages to Xen. 15.335 + * We know we don't have any extra mappings to these pages */ 15.336 +{ 15.337 + struct list_head *entry, *n; 15.338 + struct page_info *pg; 15.339 + 15.340 + p2m_lock(d); 15.341 + d->arch.phys_table = pagetable_null(); 15.342 + 15.343 + list_for_each_safe(entry, n, &d->arch.p2m.pages) 15.344 + { 15.345 + pg = list_entry(entry, struct page_info, list); 15.346 + list_del(entry); 15.347 + d->arch.p2m.free_page(d, pg); 15.348 + } 15.349 + p2m_unlock(d); 15.350 +} 15.351 + 15.352 +mfn_t 15.353 +gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) 15.354 +/* Read another domain's p2m entries */ 15.355 +{ 15.356 + mfn_t mfn; 15.357 + paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; 15.358 + l2_pgentry_t *l2e; 15.359 + l1_pgentry_t *l1e; 15.360 + 15.361 + ASSERT(paging_mode_translate(d)); 15.362 + mfn = pagetable_get_mfn(d->arch.phys_table); 15.363 + 15.364 + 15.365 + if ( gpfn > d->arch.p2m.max_mapped_pfn ) 15.366 + /* This pfn is higher than the highest the p2m map currently holds */ 15.367 + return _mfn(INVALID_MFN); 15.368 + 15.369 +#if CONFIG_PAGING_LEVELS >= 4 15.370 + { 15.371 + l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); 15.372 + l4e += l4_table_offset(addr); 15.373 + if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) 15.374 + { 15.375 + unmap_domain_page(l4e); 15.376 + return _mfn(INVALID_MFN); 15.377 + } 15.378 + mfn = _mfn(l4e_get_pfn(*l4e)); 15.379 + unmap_domain_page(l4e); 15.380 + } 15.381 +#endif 15.382 +#if CONFIG_PAGING_LEVELS >= 3 15.383 + { 15.384 + l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); 15.385 +#if CONFIG_PAGING_LEVELS == 3 15.386 + /* On PAE hosts the p2m has eight l3 entries, not four (see 15.387 + * shadow_set_p2m_entry()) so we can't use l3_table_offset. 15.388 + * Instead, just count the number of l3es from zero. It's safe 15.389 + * to do this because we already checked that the gfn is within 15.390 + * the bounds of the p2m. */ 15.391 + l3e += (addr >> L3_PAGETABLE_SHIFT); 15.392 +#else 15.393 + l3e += l3_table_offset(addr); 15.394 +#endif 15.395 + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) 15.396 + { 15.397 + unmap_domain_page(l3e); 15.398 + return _mfn(INVALID_MFN); 15.399 + } 15.400 + mfn = _mfn(l3e_get_pfn(*l3e)); 15.401 + unmap_domain_page(l3e); 15.402 + } 15.403 +#endif 15.404 + 15.405 + l2e = map_domain_page(mfn_x(mfn)); 15.406 + l2e += l2_table_offset(addr); 15.407 + if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) 15.408 + { 15.409 + unmap_domain_page(l2e); 15.410 + return _mfn(INVALID_MFN); 15.411 + } 15.412 + mfn = _mfn(l2e_get_pfn(*l2e)); 15.413 + unmap_domain_page(l2e); 15.414 + 15.415 + l1e = map_domain_page(mfn_x(mfn)); 15.416 + l1e += l1_table_offset(addr); 15.417 + if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) 15.418 + { 15.419 + unmap_domain_page(l1e); 15.420 + return _mfn(INVALID_MFN); 15.421 + } 15.422 + mfn = _mfn(l1e_get_pfn(*l1e)); 15.423 + unmap_domain_page(l1e); 15.424 + 15.425 + return mfn; 15.426 +} 15.427 + 15.428 +#if P2M_AUDIT 15.429 +static void audit_p2m(struct domain *d) 15.430 +{ 15.431 + struct list_head *entry; 15.432 + struct page_info *page; 15.433 + struct domain *od; 15.434 + unsigned long mfn, gfn, m2pfn, lp2mfn = 0; 15.435 + mfn_t p2mfn; 15.436 + unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; 15.437 + int test_linear; 15.438 + 15.439 + if ( !paging_mode_translate(d) ) 15.440 + return; 15.441 + 15.442 + //P2M_PRINTK("p2m audit starts\n"); 15.443 + 15.444 + test_linear = ( (d == current->domain) 15.445 + && !pagetable_is_null(current->arch.monitor_table) ); 15.446 + if ( test_linear ) 15.447 + local_flush_tlb(); 15.448 + 15.449 + /* Audit part one: walk the domain's page allocation list, checking 15.450 + * the m2p entries. */ 15.451 + for ( entry = d->page_list.next; 15.452 + entry != &d->page_list; 15.453 + entry = entry->next ) 15.454 + { 15.455 + page = list_entry(entry, struct page_info, list); 15.456 + mfn = mfn_x(page_to_mfn(page)); 15.457 + 15.458 + // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 15.459 + 15.460 + od = page_get_owner(page); 15.461 + 15.462 + if ( od != d ) 15.463 + { 15.464 + P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", 15.465 + mfn, od, (od?od->domain_id:-1), d, d->domain_id); 15.466 + continue; 15.467 + } 15.468 + 15.469 + gfn = get_gpfn_from_mfn(mfn); 15.470 + if ( gfn == INVALID_M2P_ENTRY ) 15.471 + { 15.472 + orphans_i++; 15.473 + //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", 15.474 + // mfn); 15.475 + continue; 15.476 + } 15.477 + 15.478 + if ( gfn == 0x55555555 ) 15.479 + { 15.480 + orphans_d++; 15.481 + //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 15.482 + // mfn); 15.483 + continue; 15.484 + } 15.485 + 15.486 + p2mfn = gfn_to_mfn_foreign(d, gfn); 15.487 + if ( mfn_x(p2mfn) != mfn ) 15.488 + { 15.489 + mpbad++; 15.490 + P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" 15.491 + " (-> gfn %#lx)\n", 15.492 + mfn, gfn, mfn_x(p2mfn), 15.493 + (mfn_valid(p2mfn) 15.494 + ? get_gpfn_from_mfn(mfn_x(p2mfn)) 15.495 + : -1u)); 15.496 + /* This m2p entry is stale: the domain has another frame in 15.497 + * this physical slot. No great disaster, but for neatness, 15.498 + * blow away the m2p entry. */ 15.499 + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 15.500 + } 15.501 + 15.502 + if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) ) 15.503 + { 15.504 + lp2mfn = mfn_x(gfn_to_mfn_current(gfn)); 15.505 + if ( lp2mfn != mfn_x(p2mfn) ) 15.506 + { 15.507 + P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " 15.508 + "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn)); 15.509 + } 15.510 + } 15.511 + 15.512 + // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 15.513 + // mfn, gfn, p2mfn, lp2mfn); 15.514 + } 15.515 + 15.516 + /* Audit part two: walk the domain's p2m table, checking the entries. */ 15.517 + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) 15.518 + { 15.519 + l2_pgentry_t *l2e; 15.520 + l1_pgentry_t *l1e; 15.521 + int i1, i2; 15.522 + 15.523 +#if CONFIG_PAGING_LEVELS == 4 15.524 + l4_pgentry_t *l4e; 15.525 + l3_pgentry_t *l3e; 15.526 + int i3, i4; 15.527 + l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 15.528 +#elif CONFIG_PAGING_LEVELS == 3 15.529 + l3_pgentry_t *l3e; 15.530 + int i3; 15.531 + l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 15.532 +#else /* CONFIG_PAGING_LEVELS == 2 */ 15.533 + l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 15.534 +#endif 15.535 + 15.536 + gfn = 0; 15.537 +#if CONFIG_PAGING_LEVELS >= 3 15.538 +#if CONFIG_PAGING_LEVELS >= 4 15.539 + for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) 15.540 + { 15.541 + if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) 15.542 + { 15.543 + gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); 15.544 + continue; 15.545 + } 15.546 + l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); 15.547 +#endif /* now at levels 3 or 4... */ 15.548 + for ( i3 = 0; 15.549 + i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 15.550 + i3++ ) 15.551 + { 15.552 + if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) 15.553 + { 15.554 + gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); 15.555 + continue; 15.556 + } 15.557 + l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); 15.558 +#endif /* all levels... */ 15.559 + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) 15.560 + { 15.561 + if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) 15.562 + { 15.563 + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); 15.564 + continue; 15.565 + } 15.566 + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); 15.567 + 15.568 + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) 15.569 + { 15.570 + if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) 15.571 + continue; 15.572 + mfn = l1e_get_pfn(l1e[i1]); 15.573 + ASSERT(mfn_valid(_mfn(mfn))); 15.574 + m2pfn = get_gpfn_from_mfn(mfn); 15.575 + if ( m2pfn != gfn ) 15.576 + { 15.577 + pmbad++; 15.578 + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" 15.579 + " -> gfn %#lx\n", gfn, mfn, m2pfn); 15.580 + BUG(); 15.581 + } 15.582 + } 15.583 + unmap_domain_page(l1e); 15.584 + } 15.585 +#if CONFIG_PAGING_LEVELS >= 3 15.586 + unmap_domain_page(l2e); 15.587 + } 15.588 +#if CONFIG_PAGING_LEVELS >= 4 15.589 + unmap_domain_page(l3e); 15.590 + } 15.591 +#endif 15.592 +#endif 15.593 + 15.594 +#if CONFIG_PAGING_LEVELS == 4 15.595 + unmap_domain_page(l4e); 15.596 +#elif CONFIG_PAGING_LEVELS == 3 15.597 + unmap_domain_page(l3e); 15.598 +#else /* CONFIG_PAGING_LEVELS == 2 */ 15.599 + unmap_domain_page(l2e); 15.600 +#endif 15.601 + 15.602 + } 15.603 + 15.604 + //P2M_PRINTK("p2m audit complete\n"); 15.605 + //if ( orphans_i | orphans_d | mpbad | pmbad ) 15.606 + // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", 15.607 + // orphans_i + orphans_d, orphans_i, orphans_d, 15.608 + if ( mpbad | pmbad ) 15.609 + P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", 15.610 + pmbad, mpbad); 15.611 +} 15.612 +#else 15.613 +#define audit_p2m(_d) do { (void)(_d); } while(0) 15.614 +#endif /* P2M_AUDIT */ 15.615 + 15.616 + 15.617 + 15.618 +static void 15.619 +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) 15.620 +{ 15.621 + if ( !paging_mode_translate(d) ) 15.622 + return; 15.623 + P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); 15.624 + 15.625 + ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); 15.626 + //ASSERT(mfn_to_gfn(d, mfn) == gfn); 15.627 + 15.628 + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); 15.629 + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 15.630 +} 15.631 + 15.632 +void 15.633 +guest_physmap_remove_page(struct domain *d, unsigned long gfn, 15.634 + unsigned long mfn) 15.635 +{ 15.636 + p2m_lock(d); 15.637 + audit_p2m(d); 15.638 + p2m_remove_page(d, gfn, mfn); 15.639 + audit_p2m(d); 15.640 + p2m_unlock(d); 15.641 +} 15.642 + 15.643 +void 15.644 +guest_physmap_add_page(struct domain *d, unsigned long gfn, 15.645 + unsigned long mfn) 15.646 +{ 15.647 + unsigned long ogfn; 15.648 + mfn_t omfn; 15.649 + 15.650 + if ( !paging_mode_translate(d) ) 15.651 + return; 15.652 + 15.653 + p2m_lock(d); 15.654 + audit_p2m(d); 15.655 + 15.656 + P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); 15.657 + 15.658 + omfn = gfn_to_mfn(d, gfn); 15.659 + if ( mfn_valid(omfn) ) 15.660 + { 15.661 + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); 15.662 + set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); 15.663 + } 15.664 + 15.665 + ogfn = mfn_to_gfn(d, _mfn(mfn)); 15.666 + if ( 15.667 +#ifdef __x86_64__ 15.668 + (ogfn != 0x5555555555555555L) 15.669 +#else 15.670 + (ogfn != 0x55555555L) 15.671 +#endif 15.672 + && (ogfn != INVALID_M2P_ENTRY) 15.673 + && (ogfn != gfn) ) 15.674 + { 15.675 + /* This machine frame is already mapped at another physical address */ 15.676 + P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", 15.677 + mfn, ogfn, gfn); 15.678 + if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) ) 15.679 + { 15.680 + P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", 15.681 + ogfn , mfn_x(omfn)); 15.682 + if ( mfn_x(omfn) == mfn ) 15.683 + p2m_remove_page(d, ogfn, mfn); 15.684 + } 15.685 + } 15.686 + 15.687 + set_p2m_entry(d, gfn, _mfn(mfn)); 15.688 + set_gpfn_from_mfn(mfn, gfn); 15.689 + 15.690 + audit_p2m(d); 15.691 + p2m_unlock(d); 15.692 +} 15.693 + 15.694 + 15.695 +/* 15.696 + * Local variables: 15.697 + * mode: C 15.698 + * c-set-style: "BSD" 15.699 + * c-basic-offset: 4 15.700 + * indent-tabs-mode: nil 15.701 + * End: 15.702 + */
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/xen/arch/x86/mm/paging.c Wed Feb 14 12:02:20 2007 +0000 16.3 @@ -0,0 +1,143 @@ 16.4 +/****************************************************************************** 16.5 + * arch/x86/paging.c 16.6 + * 16.7 + * x86 specific paging support 16.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 16.9 + * Copyright (c) 2007 XenSource Inc. 16.10 + * 16.11 + * This program is free software; you can redistribute it and/or modify 16.12 + * it under the terms of the GNU General Public License as published by 16.13 + * the Free Software Foundation; either version 2 of the License, or 16.14 + * (at your option) any later version. 16.15 + * 16.16 + * This program is distributed in the hope that it will be useful, 16.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 16.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16.19 + * GNU General Public License for more details. 16.20 + * 16.21 + * You should have received a copy of the GNU General Public License 16.22 + * along with this program; if not, write to the Free Software 16.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16.24 + */ 16.25 + 16.26 +#include <xen/init.h> 16.27 +#include <asm/paging.h> 16.28 +#include <asm/shadow.h> 16.29 +#include <asm/p2m.h> 16.30 + 16.31 +/* Xen command-line option to enable hardware-assisted paging */ 16.32 +int opt_hap_enabled = 0; 16.33 +boolean_param("hap", opt_hap_enabled); 16.34 + 16.35 +/* Printouts */ 16.36 +#define PAGING_PRINTK(_f, _a...) \ 16.37 + debugtrace_printk("pg: %s(): " _f, __func__, ##_a) 16.38 +#define PAGING_ERROR(_f, _a...) \ 16.39 + printk("pg error: %s(): " _f, __func__, ##_a) 16.40 +#define PAGING_DEBUG(flag, _f, _a...) \ 16.41 + do { \ 16.42 + if (PAGING_DEBUG_ ## flag) \ 16.43 + debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \ 16.44 + } while (0) 16.45 + 16.46 + 16.47 +/* Domain paging struct initialization. */ 16.48 +void paging_domain_init(struct domain *d) 16.49 +{ 16.50 + p2m_init(d); 16.51 + shadow_domain_init(d); 16.52 +} 16.53 + 16.54 +/* vcpu paging struct initialization goes here */ 16.55 +void paging_vcpu_init(struct vcpu *v) 16.56 +{ 16.57 + shadow_vcpu_init(v); 16.58 +} 16.59 + 16.60 + 16.61 +int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 16.62 + XEN_GUEST_HANDLE(void) u_domctl) 16.63 +{ 16.64 + /* Here, dispatch domctl to the appropriate paging code */ 16.65 + return shadow_domctl(d, sc, u_domctl); 16.66 +} 16.67 + 16.68 +/* Call when destroying a domain */ 16.69 +void paging_teardown(struct domain *d) 16.70 +{ 16.71 + shadow_teardown(d); 16.72 + /* Call other modes' teardown code here */ 16.73 +} 16.74 + 16.75 +/* Call once all of the references to the domain have gone away */ 16.76 +void paging_final_teardown(struct domain *d) 16.77 +{ 16.78 + shadow_teardown(d); 16.79 + /* Call other modes' final teardown code here */ 16.80 +} 16.81 + 16.82 +/* Enable an arbitrary paging-assistance mode. Call once at domain 16.83 + * creation. */ 16.84 +int paging_enable(struct domain *d, u32 mode) 16.85 +{ 16.86 + if ( mode & PG_SH_enable ) 16.87 + return shadow_enable(d, mode); 16.88 + else 16.89 + /* No other modes supported yet */ 16.90 + return -EINVAL; 16.91 +} 16.92 + 16.93 +/* Print paging-assistance info to the console */ 16.94 +void paging_dump_domain_info(struct domain *d) 16.95 +{ 16.96 + if ( paging_mode_enabled(d) ) 16.97 + { 16.98 + printk(" paging assistance: "); 16.99 + if ( paging_mode_shadow(d) ) 16.100 + printk("shadow "); 16.101 + if ( paging_mode_hap(d) ) 16.102 + printk("hap "); 16.103 + if ( paging_mode_refcounts(d) ) 16.104 + printk("refcounts "); 16.105 + if ( paging_mode_log_dirty(d) ) 16.106 + printk("log_dirty "); 16.107 + if ( paging_mode_translate(d) ) 16.108 + printk("translate "); 16.109 + if ( paging_mode_external(d) ) 16.110 + printk("external "); 16.111 + printk("\n"); 16.112 + } 16.113 +} 16.114 + 16.115 +void paging_dump_vcpu_info(struct vcpu *v) 16.116 +{ 16.117 + if ( paging_mode_enabled(v->domain) ) 16.118 + { 16.119 + printk(" paging assistance: "); 16.120 + if ( paging_mode_shadow(v->domain) ) 16.121 + { 16.122 + if ( v->arch.paging.mode ) 16.123 + printk("shadowed %u-on-%u, %stranslated\n", 16.124 + v->arch.paging.mode->guest_levels, 16.125 + v->arch.paging.mode->shadow.shadow_levels, 16.126 + paging_vcpu_mode_translate(v) ? "" : "not "); 16.127 + else 16.128 + printk("not shadowed\n"); 16.129 + } 16.130 + else if ( paging_mode_hap(v->domain) && v->arch.paging.mode ) 16.131 + printk("hap, %u levels\n", 16.132 + v->arch.paging.mode->guest_levels); 16.133 + else 16.134 + printk("none\n"); 16.135 + } 16.136 +} 16.137 + 16.138 + 16.139 +/* 16.140 + * Local variables: 16.141 + * mode: C 16.142 + * c-set-style: "BSD" 16.143 + * c-basic-offset: 4 16.144 + * indent-tabs-mode: nil 16.145 + * End: 16.146 + */
17.1 --- a/xen/arch/x86/mm/shadow/common.c Tue Feb 13 15:32:25 2007 +0000 17.2 +++ b/xen/arch/x86/mm/shadow/common.c Wed Feb 14 12:02:20 2007 +0000 17.3 @@ -47,12 +47,27 @@ void shadow_domain_init(struct domain *d 17.4 int i; 17.5 shadow_lock_init(d); 17.6 for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) 17.7 - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); 17.8 - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); 17.9 - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); 17.10 - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); 17.11 + INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); 17.12 + INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); 17.13 + INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); 17.14 } 17.15 17.16 +/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important 17.17 + * job is to initialize the update_paging_modes() function pointer, which is 17.18 + * used to initialized the rest of resources. Therefore, it really does not 17.19 + * matter to have v->arch.paging.mode pointing to any mode, as long as it can 17.20 + * be compiled. 17.21 + */ 17.22 +void shadow_vcpu_init(struct vcpu *v) 17.23 +{ 17.24 +#if CONFIG_PAGING_LEVELS == 4 17.25 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.26 +#elif CONFIG_PAGING_LEVELS == 3 17.27 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.28 +#elif CONFIG_PAGING_LEVELS == 2 17.29 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 17.30 +#endif 17.31 +} 17.32 17.33 #if SHADOW_AUDIT 17.34 int shadow_audit_enable = 0; 17.35 @@ -265,7 +280,7 @@ hvm_emulate_write(enum x86_segment seg, 17.36 if ( rc ) 17.37 return rc; 17.38 17.39 - return v->arch.shadow.mode->x86_emulate_write( 17.40 + return v->arch.paging.mode->shadow.x86_emulate_write( 17.41 v, addr, &val, bytes, sh_ctxt); 17.42 } 17.43 17.44 @@ -288,7 +303,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg 17.45 if ( rc ) 17.46 return rc; 17.47 17.48 - return v->arch.shadow.mode->x86_emulate_cmpxchg( 17.49 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( 17.50 v, addr, old, new, bytes, sh_ctxt); 17.51 } 17.52 17.53 @@ -312,7 +327,7 @@ hvm_emulate_cmpxchg8b(enum x86_segment s 17.54 if ( rc ) 17.55 return rc; 17.56 17.57 - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( 17.58 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( 17.59 v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt); 17.60 } 17.61 17.62 @@ -353,7 +368,7 @@ pv_emulate_write(enum x86_segment seg, 17.63 struct sh_emulate_ctxt *sh_ctxt = 17.64 container_of(ctxt, struct sh_emulate_ctxt, ctxt); 17.65 struct vcpu *v = current; 17.66 - return v->arch.shadow.mode->x86_emulate_write( 17.67 + return v->arch.paging.mode->shadow.x86_emulate_write( 17.68 v, offset, &val, bytes, sh_ctxt); 17.69 } 17.70 17.71 @@ -368,7 +383,7 @@ pv_emulate_cmpxchg(enum x86_segment seg, 17.72 struct sh_emulate_ctxt *sh_ctxt = 17.73 container_of(ctxt, struct sh_emulate_ctxt, ctxt); 17.74 struct vcpu *v = current; 17.75 - return v->arch.shadow.mode->x86_emulate_cmpxchg( 17.76 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( 17.77 v, offset, old, new, bytes, sh_ctxt); 17.78 } 17.79 17.80 @@ -384,7 +399,7 @@ pv_emulate_cmpxchg8b(enum x86_segment se 17.81 struct sh_emulate_ctxt *sh_ctxt = 17.82 container_of(ctxt, struct sh_emulate_ctxt, ctxt); 17.83 struct vcpu *v = current; 17.84 - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( 17.85 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( 17.86 v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt); 17.87 } 17.88 17.89 @@ -721,7 +736,7 @@ static inline int chunk_is_available(str 17.90 int i; 17.91 17.92 for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) 17.93 - if ( !list_empty(&d->arch.shadow.freelists[i]) ) 17.94 + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) 17.95 return 1; 17.96 return 0; 17.97 } 17.98 @@ -783,7 +798,7 @@ void shadow_prealloc(struct domain *d, u 17.99 17.100 /* Stage one: walk the list of pinned pages, unpinning them */ 17.101 perfc_incrc(shadow_prealloc_1); 17.102 - list_for_each_backwards_safe(l, t, &d->arch.shadow.pinned_shadows) 17.103 + list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows) 17.104 { 17.105 sp = list_entry(l, struct shadow_page_info, list); 17.106 smfn = shadow_page_to_mfn(sp); 17.107 @@ -823,9 +838,9 @@ void shadow_prealloc(struct domain *d, u 17.108 SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n" 17.109 " shadow pages total = %u, free = %u, p2m=%u\n", 17.110 1 << order, 17.111 - d->arch.shadow.total_pages, 17.112 - d->arch.shadow.free_pages, 17.113 - d->arch.shadow.p2m_pages); 17.114 + d->arch.paging.shadow.total_pages, 17.115 + d->arch.paging.shadow.free_pages, 17.116 + d->arch.paging.shadow.p2m_pages); 17.117 BUG(); 17.118 } 17.119 17.120 @@ -840,7 +855,7 @@ static void shadow_blow_tables(struct do 17.121 int i; 17.122 17.123 /* Pass one: unpin all pinned pages */ 17.124 - list_for_each_backwards_safe(l,t, &d->arch.shadow.pinned_shadows) 17.125 + list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows) 17.126 { 17.127 sp = list_entry(l, struct shadow_page_info, list); 17.128 smfn = shadow_page_to_mfn(sp); 17.129 @@ -905,9 +920,9 @@ mfn_t shadow_alloc(struct domain *d, 17.130 17.131 /* Find smallest order which can satisfy the request. */ 17.132 for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) 17.133 - if ( !list_empty(&d->arch.shadow.freelists[i]) ) 17.134 + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) 17.135 { 17.136 - sp = list_entry(d->arch.shadow.freelists[i].next, 17.137 + sp = list_entry(d->arch.paging.shadow.freelists[i].next, 17.138 struct shadow_page_info, list); 17.139 list_del(&sp->list); 17.140 17.141 @@ -916,10 +931,10 @@ mfn_t shadow_alloc(struct domain *d, 17.142 { 17.143 i--; 17.144 sp->order = i; 17.145 - list_add_tail(&sp->list, &d->arch.shadow.freelists[i]); 17.146 + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]); 17.147 sp += 1 << i; 17.148 } 17.149 - d->arch.shadow.free_pages -= 1 << order; 17.150 + d->arch.paging.shadow.free_pages -= 1 << order; 17.151 17.152 /* Init page info fields and clear the pages */ 17.153 for ( i = 0; i < 1<<order ; i++ ) 17.154 @@ -976,7 +991,7 @@ void shadow_free(struct domain *d, mfn_t 17.155 ASSERT(shadow_type != SH_type_p2m_table); 17.156 order = shadow_order(shadow_type); 17.157 17.158 - d->arch.shadow.free_pages += 1 << order; 17.159 + d->arch.paging.shadow.free_pages += 1 << order; 17.160 17.161 for ( i = 0; i < 1<<order; i++ ) 17.162 { 17.163 @@ -985,8 +1000,8 @@ void shadow_free(struct domain *d, mfn_t 17.164 for_each_vcpu(d, v) 17.165 { 17.166 /* No longer safe to look for a writeable mapping in this shadow */ 17.167 - if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 17.168 - v->arch.shadow.last_writeable_pte_smfn = 0; 17.169 + if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 17.170 + v->arch.paging.shadow.last_writeable_pte_smfn = 0; 17.171 } 17.172 #endif 17.173 /* Strip out the type: this is now a free shadow page */ 17.174 @@ -1019,7 +1034,7 @@ void shadow_free(struct domain *d, mfn_t 17.175 } 17.176 17.177 sp->order = order; 17.178 - list_add_tail(&sp->list, &d->arch.shadow.freelists[order]); 17.179 + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); 17.180 } 17.181 17.182 /* Divert some memory from the pool to be used by the p2m mapping. 17.183 @@ -1033,19 +1048,19 @@ void shadow_free(struct domain *d, mfn_t 17.184 * returns non-zero on success. 17.185 */ 17.186 static int 17.187 -shadow_alloc_p2m_pages(struct domain *d) 17.188 +sh_alloc_p2m_pages(struct domain *d) 17.189 { 17.190 struct page_info *pg; 17.191 u32 i; 17.192 ASSERT(shadow_locked_by_me(d)); 17.193 17.194 - if ( d->arch.shadow.total_pages 17.195 + if ( d->arch.paging.shadow.total_pages 17.196 < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) ) 17.197 return 0; /* Not enough shadow memory: need to increase it first */ 17.198 17.199 pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); 17.200 - d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); 17.201 - d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); 17.202 + d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); 17.203 + d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); 17.204 for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++) 17.205 { 17.206 /* Unlike shadow pages, mark p2m pages as owned by the domain. 17.207 @@ -1055,34 +1070,59 @@ shadow_alloc_p2m_pages(struct domain *d) 17.208 * believed to be a concern. 17.209 */ 17.210 page_set_owner(&pg[i], d); 17.211 - list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist); 17.212 + pg->count_info = 1; 17.213 + list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist); 17.214 } 17.215 return 1; 17.216 } 17.217 17.218 // Returns 0 if no memory is available... 17.219 -mfn_t 17.220 +struct page_info * 17.221 shadow_alloc_p2m_page(struct domain *d) 17.222 { 17.223 struct list_head *entry; 17.224 struct page_info *pg; 17.225 mfn_t mfn; 17.226 void *p; 17.227 - 17.228 - if ( list_empty(&d->arch.shadow.p2m_freelist) && 17.229 - !shadow_alloc_p2m_pages(d) ) 17.230 - return _mfn(0); 17.231 - entry = d->arch.shadow.p2m_freelist.next; 17.232 + 17.233 + shadow_lock(d); 17.234 + 17.235 + if ( list_empty(&d->arch.paging.shadow.p2m_freelist) && 17.236 + !sh_alloc_p2m_pages(d) ) 17.237 + { 17.238 + shadow_unlock(d); 17.239 + return NULL; 17.240 + } 17.241 + entry = d->arch.paging.shadow.p2m_freelist.next; 17.242 list_del(entry); 17.243 - list_add_tail(entry, &d->arch.shadow.p2m_inuse); 17.244 + 17.245 + shadow_unlock(d); 17.246 + 17.247 pg = list_entry(entry, struct page_info, list); 17.248 - pg->count_info = 1; 17.249 mfn = page_to_mfn(pg); 17.250 p = sh_map_domain_page(mfn); 17.251 clear_page(p); 17.252 sh_unmap_domain_page(p); 17.253 17.254 - return mfn; 17.255 + return pg; 17.256 +} 17.257 + 17.258 +void 17.259 +shadow_free_p2m_page(struct domain *d, struct page_info *pg) 17.260 +{ 17.261 + ASSERT(page_get_owner(pg) == d); 17.262 + /* Should have just the one ref we gave it in alloc_p2m_page() */ 17.263 + if ( (pg->count_info & PGC_count_mask) != 1 ) 17.264 + { 17.265 + SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", 17.266 + pg->count_info, pg->u.inuse.type_info); 17.267 + } 17.268 + /* Free should not decrement domain's total allocation, since 17.269 + * these pages were allocated without an owner. */ 17.270 + page_set_owner(pg, NULL); 17.271 + free_domheap_pages(pg, 0); 17.272 + d->arch.paging.shadow.p2m_pages--; 17.273 + perfc_decr(shadow_alloc_count); 17.274 } 17.275 17.276 #if CONFIG_PAGING_LEVELS == 3 17.277 @@ -1130,344 +1170,6 @@ static void p2m_install_entry_in_monitor 17.278 } 17.279 #endif 17.280 17.281 -// Find the next level's P2M entry, checking for out-of-range gfn's... 17.282 -// Returns NULL on error. 17.283 -// 17.284 -static l1_pgentry_t * 17.285 -p2m_find_entry(void *table, unsigned long *gfn_remainder, 17.286 - unsigned long gfn, u32 shift, u32 max) 17.287 -{ 17.288 - u32 index; 17.289 - 17.290 - index = *gfn_remainder >> shift; 17.291 - if ( index >= max ) 17.292 - { 17.293 - SHADOW_DEBUG(P2M, "gfn=0x%lx out of range " 17.294 - "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", 17.295 - gfn, *gfn_remainder, shift, index, max); 17.296 - return NULL; 17.297 - } 17.298 - *gfn_remainder &= (1 << shift) - 1; 17.299 - return (l1_pgentry_t *)table + index; 17.300 -} 17.301 - 17.302 -// Walk one level of the P2M table, allocating a new table if required. 17.303 -// Returns 0 on error. 17.304 -// 17.305 -static int 17.306 -p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 17.307 - unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 17.308 - u32 max, unsigned long type) 17.309 -{ 17.310 - l1_pgentry_t *p2m_entry; 17.311 - void *next; 17.312 - 17.313 - if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, 17.314 - shift, max)) ) 17.315 - return 0; 17.316 - 17.317 - if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) 17.318 - { 17.319 - mfn_t mfn = shadow_alloc_p2m_page(d); 17.320 - if ( mfn_x(mfn) == 0 ) 17.321 - return 0; 17.322 - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 17.323 - mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated; 17.324 - mfn_to_page(mfn)->count_info = 1; 17.325 -#if CONFIG_PAGING_LEVELS == 3 17.326 - if (type == PGT_l2_page_table) 17.327 - { 17.328 - struct vcpu *v; 17.329 - /* We have written to the p2m l3: need to sync the per-vcpu 17.330 - * copies of it in the monitor tables */ 17.331 - p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); 17.332 - /* Also, any vcpus running on shadows of the p2m need to 17.333 - * reload their CR3s so the change propagates to the shadow */ 17.334 - ASSERT(shadow_locked_by_me(d)); 17.335 - for_each_vcpu(d, v) 17.336 - { 17.337 - if ( pagetable_get_pfn(v->arch.guest_table) 17.338 - == pagetable_get_pfn(d->arch.phys_table) 17.339 - && v->arch.shadow.mode != NULL ) 17.340 - v->arch.shadow.mode->update_cr3(v, 0); 17.341 - } 17.342 - } 17.343 -#endif 17.344 - /* The P2M can be shadowed: keep the shadows synced */ 17.345 - if ( d->vcpu[0] != NULL ) 17.346 - (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, 17.347 - p2m_entry, sizeof *p2m_entry); 17.348 - } 17.349 - *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); 17.350 - next = sh_map_domain_page(*table_mfn); 17.351 - sh_unmap_domain_page(*table); 17.352 - *table = next; 17.353 - 17.354 - return 1; 17.355 -} 17.356 - 17.357 -// Returns 0 on error (out of memory) 17.358 -int 17.359 -shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) 17.360 -{ 17.361 - // XXX -- this might be able to be faster iff current->domain == d 17.362 - mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); 17.363 - void *table = sh_map_domain_page(table_mfn); 17.364 - unsigned long gfn_remainder = gfn; 17.365 - l1_pgentry_t *p2m_entry; 17.366 - int rv=0; 17.367 - 17.368 -#if CONFIG_PAGING_LEVELS >= 4 17.369 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 17.370 - L4_PAGETABLE_SHIFT - PAGE_SHIFT, 17.371 - L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) 17.372 - goto out; 17.373 -#endif 17.374 -#if CONFIG_PAGING_LEVELS >= 3 17.375 - // When using PAE Xen, we only allow 33 bits of pseudo-physical 17.376 - // address in translated guests (i.e. 8 GBytes). This restriction 17.377 - // comes from wanting to map the P2M table into the 16MB RO_MPT hole 17.378 - // in Xen's address space for translated PV guests. 17.379 - // 17.380 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 17.381 - L3_PAGETABLE_SHIFT - PAGE_SHIFT, 17.382 - (CONFIG_PAGING_LEVELS == 3 17.383 - ? 8 17.384 - : L3_PAGETABLE_ENTRIES), 17.385 - PGT_l2_page_table) ) 17.386 - goto out; 17.387 -#endif 17.388 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 17.389 - L2_PAGETABLE_SHIFT - PAGE_SHIFT, 17.390 - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) 17.391 - goto out; 17.392 - 17.393 - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 17.394 - 0, L1_PAGETABLE_ENTRIES); 17.395 - ASSERT(p2m_entry); 17.396 - if ( mfn_valid(mfn) ) 17.397 - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 17.398 - else 17.399 - *p2m_entry = l1e_empty(); 17.400 - 17.401 - /* Track the highest gfn for which we have ever had a valid mapping */ 17.402 - if ( mfn_valid(mfn) && (gfn > d->arch.max_mapped_pfn) ) 17.403 - d->arch.max_mapped_pfn = gfn; 17.404 - 17.405 - /* The P2M can be shadowed: keep the shadows synced */ 17.406 - if ( d->vcpu[0] != NULL ) 17.407 - (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, 17.408 - p2m_entry, sizeof(*p2m_entry)); 17.409 - 17.410 - /* Success */ 17.411 - rv = 1; 17.412 - 17.413 - out: 17.414 - sh_unmap_domain_page(table); 17.415 - return rv; 17.416 -} 17.417 - 17.418 -// Allocate a new p2m table for a domain. 17.419 -// 17.420 -// The structure of the p2m table is that of a pagetable for xen (i.e. it is 17.421 -// controlled by CONFIG_PAGING_LEVELS). 17.422 -// 17.423 -// Returns 0 if p2m table could not be initialized 17.424 -// 17.425 -static int 17.426 -shadow_alloc_p2m_table(struct domain *d) 17.427 -{ 17.428 - mfn_t p2m_top, mfn; 17.429 - struct list_head *entry; 17.430 - struct page_info *page; 17.431 - unsigned int page_count = 0; 17.432 - unsigned long gfn; 17.433 - 17.434 - SHADOW_PRINTK("allocating p2m table\n"); 17.435 - ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0); 17.436 - 17.437 - p2m_top = shadow_alloc_p2m_page(d); 17.438 - mfn_to_page(p2m_top)->count_info = 1; 17.439 - mfn_to_page(p2m_top)->u.inuse.type_info = 17.440 -#if CONFIG_PAGING_LEVELS == 4 17.441 - PGT_l4_page_table 17.442 -#elif CONFIG_PAGING_LEVELS == 3 17.443 - PGT_l3_page_table 17.444 -#elif CONFIG_PAGING_LEVELS == 2 17.445 - PGT_l2_page_table 17.446 -#endif 17.447 - | 1 | PGT_validated; 17.448 - 17.449 - if ( mfn_x(p2m_top) == 0 ) 17.450 - return 0; 17.451 - 17.452 - d->arch.phys_table = pagetable_from_mfn(p2m_top); 17.453 - 17.454 - SHADOW_PRINTK("populating p2m table\n"); 17.455 - 17.456 - /* Initialise physmap tables for slot zero. Other code assumes this. */ 17.457 - gfn = 0; 17.458 - mfn = _mfn(INVALID_MFN); 17.459 - if ( !shadow_set_p2m_entry(d, gfn, mfn) ) 17.460 - goto error; 17.461 - 17.462 - /* Build a p2m map that matches the m2p entries for this domain's 17.463 - * allocated pages. Skip any pages that have an explicitly invalid 17.464 - * or obviously bogus m2p entry. */ 17.465 - for ( entry = d->page_list.next; 17.466 - entry != &d->page_list; 17.467 - entry = entry->next ) 17.468 - { 17.469 - page = list_entry(entry, struct page_info, list); 17.470 - mfn = page_to_mfn(page); 17.471 - gfn = get_gpfn_from_mfn(mfn_x(mfn)); 17.472 - page_count++; 17.473 - if ( 17.474 -#ifdef __x86_64__ 17.475 - (gfn != 0x5555555555555555L) 17.476 -#else 17.477 - (gfn != 0x55555555L) 17.478 -#endif 17.479 - && gfn != INVALID_M2P_ENTRY 17.480 - && (gfn < 17.481 - (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) 17.482 - && !shadow_set_p2m_entry(d, gfn, mfn) ) 17.483 - goto error; 17.484 - } 17.485 - 17.486 - SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count); 17.487 - return 1; 17.488 - 17.489 - error: 17.490 - SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" 17.491 - SH_PRI_mfn "\n", gfn, mfn_x(mfn)); 17.492 - return 0; 17.493 -} 17.494 - 17.495 -mfn_t 17.496 -sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) 17.497 -/* Read another domain's p2m entries */ 17.498 -{ 17.499 - mfn_t mfn; 17.500 - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; 17.501 - l2_pgentry_t *l2e; 17.502 - l1_pgentry_t *l1e; 17.503 - 17.504 - ASSERT(shadow_mode_translate(d)); 17.505 - mfn = pagetable_get_mfn(d->arch.phys_table); 17.506 - 17.507 - 17.508 - if ( gpfn > d->arch.max_mapped_pfn ) 17.509 - /* This pfn is higher than the highest the p2m map currently holds */ 17.510 - return _mfn(INVALID_MFN); 17.511 - 17.512 -#if CONFIG_PAGING_LEVELS >= 4 17.513 - { 17.514 - l4_pgentry_t *l4e = sh_map_domain_page(mfn); 17.515 - l4e += l4_table_offset(addr); 17.516 - if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) 17.517 - { 17.518 - sh_unmap_domain_page(l4e); 17.519 - return _mfn(INVALID_MFN); 17.520 - } 17.521 - mfn = _mfn(l4e_get_pfn(*l4e)); 17.522 - sh_unmap_domain_page(l4e); 17.523 - } 17.524 -#endif 17.525 -#if CONFIG_PAGING_LEVELS >= 3 17.526 - { 17.527 - l3_pgentry_t *l3e = sh_map_domain_page(mfn); 17.528 -#if CONFIG_PAGING_LEVELS == 3 17.529 - /* On PAE hosts the p2m has eight l3 entries, not four (see 17.530 - * shadow_set_p2m_entry()) so we can't use l3_table_offset. 17.531 - * Instead, just count the number of l3es from zero. It's safe 17.532 - * to do this because we already checked that the gfn is within 17.533 - * the bounds of the p2m. */ 17.534 - l3e += (addr >> L3_PAGETABLE_SHIFT); 17.535 -#else 17.536 - l3e += l3_table_offset(addr); 17.537 -#endif 17.538 - if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) 17.539 - { 17.540 - sh_unmap_domain_page(l3e); 17.541 - return _mfn(INVALID_MFN); 17.542 - } 17.543 - mfn = _mfn(l3e_get_pfn(*l3e)); 17.544 - sh_unmap_domain_page(l3e); 17.545 - } 17.546 -#endif 17.547 - 17.548 - l2e = sh_map_domain_page(mfn); 17.549 - l2e += l2_table_offset(addr); 17.550 - if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) 17.551 - { 17.552 - sh_unmap_domain_page(l2e); 17.553 - return _mfn(INVALID_MFN); 17.554 - } 17.555 - mfn = _mfn(l2e_get_pfn(*l2e)); 17.556 - sh_unmap_domain_page(l2e); 17.557 - 17.558 - l1e = sh_map_domain_page(mfn); 17.559 - l1e += l1_table_offset(addr); 17.560 - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) 17.561 - { 17.562 - sh_unmap_domain_page(l1e); 17.563 - return _mfn(INVALID_MFN); 17.564 - } 17.565 - mfn = _mfn(l1e_get_pfn(*l1e)); 17.566 - sh_unmap_domain_page(l1e); 17.567 - 17.568 - return mfn; 17.569 -} 17.570 - 17.571 -unsigned long 17.572 -shadow_gfn_to_mfn_foreign(unsigned long gpfn) 17.573 -{ 17.574 - return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn)); 17.575 -} 17.576 - 17.577 - 17.578 -static void shadow_p2m_teardown(struct domain *d) 17.579 -/* Return all the p2m pages to Xen. 17.580 - * We know we don't have any extra mappings to these pages */ 17.581 -{ 17.582 - struct list_head *entry, *n; 17.583 - struct page_info *pg; 17.584 - 17.585 - d->arch.phys_table = pagetable_null(); 17.586 - 17.587 - list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse) 17.588 - { 17.589 - pg = list_entry(entry, struct page_info, list); 17.590 - list_del(entry); 17.591 - /* Should have just the one ref we gave it in alloc_p2m_page() */ 17.592 - if ( (pg->count_info & PGC_count_mask) != 1 ) 17.593 - { 17.594 - SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n", 17.595 - pg->count_info, pg->u.inuse.type_info); 17.596 - } 17.597 - ASSERT(page_get_owner(pg) == d); 17.598 - /* Free should not decrement domain's total allocation, since 17.599 - * these pages were allocated without an owner. */ 17.600 - page_set_owner(pg, NULL); 17.601 - free_domheap_pages(pg, 0); 17.602 - d->arch.shadow.p2m_pages--; 17.603 - perfc_decr(shadow_alloc_count); 17.604 - } 17.605 - list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist) 17.606 - { 17.607 - list_del(entry); 17.608 - pg = list_entry(entry, struct page_info, list); 17.609 - ASSERT(page_get_owner(pg) == d); 17.610 - /* Free should not decrement domain's total allocation. */ 17.611 - page_set_owner(pg, NULL); 17.612 - free_domheap_pages(pg, 0); 17.613 - d->arch.shadow.p2m_pages--; 17.614 - perfc_decr(shadow_alloc_count); 17.615 - } 17.616 - ASSERT(d->arch.shadow.p2m_pages == 0); 17.617 -} 17.618 - 17.619 /* Set the pool of shadow pages to the required number of pages. 17.620 * Input will be rounded up to at least shadow_min_acceptable_pages(), 17.621 * plus space for the p2m table. 17.622 @@ -1491,11 +1193,11 @@ static unsigned int sh_set_allocation(st 17.623 pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1); 17.624 17.625 SHADOW_PRINTK("current %i target %i\n", 17.626 - d->arch.shadow.total_pages, pages); 17.627 - 17.628 - while ( d->arch.shadow.total_pages != pages ) 17.629 + d->arch.paging.shadow.total_pages, pages); 17.630 + 17.631 + while ( d->arch.paging.shadow.total_pages != pages ) 17.632 { 17.633 - if ( d->arch.shadow.total_pages < pages ) 17.634 + if ( d->arch.paging.shadow.total_pages < pages ) 17.635 { 17.636 /* Need to allocate more memory from domheap */ 17.637 sp = (struct shadow_page_info *) 17.638 @@ -1505,8 +1207,8 @@ static unsigned int sh_set_allocation(st 17.639 SHADOW_PRINTK("failed to allocate shadow pages.\n"); 17.640 return -ENOMEM; 17.641 } 17.642 - d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER; 17.643 - d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER; 17.644 + d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER; 17.645 + d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER; 17.646 for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 17.647 { 17.648 sp[j].type = 0; 17.649 @@ -1518,18 +1220,18 @@ static unsigned int sh_set_allocation(st 17.650 } 17.651 sp->order = SHADOW_MAX_ORDER; 17.652 list_add_tail(&sp->list, 17.653 - &d->arch.shadow.freelists[SHADOW_MAX_ORDER]); 17.654 + &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]); 17.655 } 17.656 - else if ( d->arch.shadow.total_pages > pages ) 17.657 + else if ( d->arch.paging.shadow.total_pages > pages ) 17.658 { 17.659 /* Need to return memory to domheap */ 17.660 shadow_prealloc(d, SHADOW_MAX_ORDER); 17.661 - ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER])); 17.662 - sp = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, 17.663 + ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER])); 17.664 + sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next, 17.665 struct shadow_page_info, list); 17.666 list_del(&sp->list); 17.667 - d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; 17.668 - d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; 17.669 + d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; 17.670 + d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; 17.671 free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER); 17.672 } 17.673 17.674 @@ -1547,7 +1249,7 @@ static unsigned int sh_set_allocation(st 17.675 /* Return the size of the shadow pool, rounded up to the nearest MB */ 17.676 static unsigned int shadow_get_allocation(struct domain *d) 17.677 { 17.678 - unsigned int pg = d->arch.shadow.total_pages; 17.679 + unsigned int pg = d->arch.paging.shadow.total_pages; 17.680 return ((pg >> (20 - PAGE_SHIFT)) 17.681 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 17.682 } 17.683 @@ -1583,7 +1285,7 @@ static void sh_hash_audit_bucket(struct 17.684 if ( !(SHADOW_AUDIT_ENABLE) ) 17.685 return; 17.686 17.687 - sp = d->arch.shadow.hash_table[bucket]; 17.688 + sp = d->arch.paging.shadow.hash_table[bucket]; 17.689 while ( sp ) 17.690 { 17.691 /* Not a shadow? */ 17.692 @@ -1608,7 +1310,7 @@ static void sh_hash_audit_bucket(struct 17.693 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 17.694 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) 17.695 { 17.696 - SHADOW_ERROR("MFN %#lx shadowed (by %#"SH_PRI_mfn")" 17.697 + SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" 17.698 " but has typecount %#lx\n", 17.699 sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), 17.700 gpg->u.inuse.type_info); 17.701 @@ -1652,13 +1354,13 @@ static int shadow_hash_alloc(struct doma 17.702 struct shadow_page_info **table; 17.703 17.704 ASSERT(shadow_locked_by_me(d)); 17.705 - ASSERT(!d->arch.shadow.hash_table); 17.706 + ASSERT(!d->arch.paging.shadow.hash_table); 17.707 17.708 table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); 17.709 if ( !table ) return 1; 17.710 memset(table, 0, 17.711 SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *)); 17.712 - d->arch.shadow.hash_table = table; 17.713 + d->arch.paging.shadow.hash_table = table; 17.714 return 0; 17.715 } 17.716 17.717 @@ -1667,10 +1369,10 @@ static int shadow_hash_alloc(struct doma 17.718 static void shadow_hash_teardown(struct domain *d) 17.719 { 17.720 ASSERT(shadow_locked_by_me(d)); 17.721 - ASSERT(d->arch.shadow.hash_table); 17.722 - 17.723 - xfree(d->arch.shadow.hash_table); 17.724 - d->arch.shadow.hash_table = NULL; 17.725 + ASSERT(d->arch.paging.shadow.hash_table); 17.726 + 17.727 + xfree(d->arch.paging.shadow.hash_table); 17.728 + d->arch.paging.shadow.hash_table = NULL; 17.729 } 17.730 17.731 17.732 @@ -1683,7 +1385,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 17.733 key_t key; 17.734 17.735 ASSERT(shadow_locked_by_me(d)); 17.736 - ASSERT(d->arch.shadow.hash_table); 17.737 + ASSERT(d->arch.paging.shadow.hash_table); 17.738 ASSERT(t); 17.739 17.740 sh_hash_audit(d); 17.741 @@ -1692,16 +1394,16 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 17.742 key = sh_hash(n, t); 17.743 sh_hash_audit_bucket(d, key); 17.744 17.745 - sp = d->arch.shadow.hash_table[key]; 17.746 + sp = d->arch.paging.shadow.hash_table[key]; 17.747 prev = NULL; 17.748 while(sp) 17.749 { 17.750 if ( sp->backpointer == n && sp->type == t ) 17.751 { 17.752 /* Pull-to-front if 'sp' isn't already the head item */ 17.753 - if ( unlikely(sp != d->arch.shadow.hash_table[key]) ) 17.754 + if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) ) 17.755 { 17.756 - if ( unlikely(d->arch.shadow.hash_walking != 0) ) 17.757 + if ( unlikely(d->arch.paging.shadow.hash_walking != 0) ) 17.758 /* Can't reorder: someone is walking the hash chains */ 17.759 return shadow_page_to_mfn(sp); 17.760 else 17.761 @@ -1710,8 +1412,8 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 17.762 /* Delete sp from the list */ 17.763 prev->next_shadow = sp->next_shadow; 17.764 /* Re-insert it at the head of the list */ 17.765 - sp->next_shadow = d->arch.shadow.hash_table[key]; 17.766 - d->arch.shadow.hash_table[key] = sp; 17.767 + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; 17.768 + d->arch.paging.shadow.hash_table[key] = sp; 17.769 } 17.770 } 17.771 else 17.772 @@ -1737,7 +1439,7 @@ void shadow_hash_insert(struct vcpu *v, 17.773 key_t key; 17.774 17.775 ASSERT(shadow_locked_by_me(d)); 17.776 - ASSERT(d->arch.shadow.hash_table); 17.777 + ASSERT(d->arch.paging.shadow.hash_table); 17.778 ASSERT(t); 17.779 17.780 sh_hash_audit(d); 17.781 @@ -1748,8 +1450,8 @@ void shadow_hash_insert(struct vcpu *v, 17.782 17.783 /* Insert this shadow at the top of the bucket */ 17.784 sp = mfn_to_shadow_page(smfn); 17.785 - sp->next_shadow = d->arch.shadow.hash_table[key]; 17.786 - d->arch.shadow.hash_table[key] = sp; 17.787 + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; 17.788 + d->arch.paging.shadow.hash_table[key] = sp; 17.789 17.790 sh_hash_audit_bucket(d, key); 17.791 } 17.792 @@ -1763,7 +1465,7 @@ void shadow_hash_delete(struct vcpu *v, 17.793 key_t key; 17.794 17.795 ASSERT(shadow_locked_by_me(d)); 17.796 - ASSERT(d->arch.shadow.hash_table); 17.797 + ASSERT(d->arch.paging.shadow.hash_table); 17.798 ASSERT(t); 17.799 17.800 sh_hash_audit(d); 17.801 @@ -1773,13 +1475,13 @@ void shadow_hash_delete(struct vcpu *v, 17.802 sh_hash_audit_bucket(d, key); 17.803 17.804 sp = mfn_to_shadow_page(smfn); 17.805 - if ( d->arch.shadow.hash_table[key] == sp ) 17.806 + if ( d->arch.paging.shadow.hash_table[key] == sp ) 17.807 /* Easy case: we're deleting the head item. */ 17.808 - d->arch.shadow.hash_table[key] = sp->next_shadow; 17.809 + d->arch.paging.shadow.hash_table[key] = sp->next_shadow; 17.810 else 17.811 { 17.812 /* Need to search for the one we want */ 17.813 - x = d->arch.shadow.hash_table[key]; 17.814 + x = d->arch.paging.shadow.hash_table[key]; 17.815 while ( 1 ) 17.816 { 17.817 ASSERT(x); /* We can't have hit the end, since our target is 17.818 @@ -1818,15 +1520,15 @@ static void hash_foreach(struct vcpu *v, 17.819 17.820 /* Say we're here, to stop hash-lookups reordering the chains */ 17.821 ASSERT(shadow_locked_by_me(d)); 17.822 - ASSERT(d->arch.shadow.hash_walking == 0); 17.823 - d->arch.shadow.hash_walking = 1; 17.824 + ASSERT(d->arch.paging.shadow.hash_walking == 0); 17.825 + d->arch.paging.shadow.hash_walking = 1; 17.826 17.827 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 17.828 { 17.829 /* WARNING: This is not safe against changes to the hash table. 17.830 * The callback *must* return non-zero if it has inserted or 17.831 * deleted anything from the hash (lookups are OK, though). */ 17.832 - for ( x = d->arch.shadow.hash_table[i]; x; x = x->next_shadow ) 17.833 + for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow ) 17.834 { 17.835 if ( callback_mask & (1 << x->type) ) 17.836 { 17.837 @@ -1839,7 +1541,7 @@ static void hash_foreach(struct vcpu *v, 17.838 } 17.839 if ( done ) break; 17.840 } 17.841 - d->arch.shadow.hash_walking = 0; 17.842 + d->arch.paging.shadow.hash_walking = 0; 17.843 } 17.844 17.845 17.846 @@ -2008,27 +1710,27 @@ int sh_remove_write_access(struct vcpu * 17.847 * and that mapping is likely to be in the current pagetable, 17.848 * in the guest's linear map (on non-HIGHPTE linux and windows)*/ 17.849 17.850 -#define GUESS(_a, _h) do { \ 17.851 - if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) ) \ 17.852 - perfc_incrc(shadow_writeable_h_ ## _h); \ 17.853 - if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ 17.854 - return 1; \ 17.855 +#define GUESS(_a, _h) do { \ 17.856 + if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \ 17.857 + perfc_incrc(shadow_writeable_h_ ## _h); \ 17.858 + if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ 17.859 + return 1; \ 17.860 } while (0) 17.861 17.862 17.863 - if ( v->arch.shadow.mode->guest_levels == 2 ) 17.864 + if ( v->arch.paging.mode->guest_levels == 2 ) 17.865 { 17.866 if ( level == 1 ) 17.867 /* 32bit non-PAE w2k3: linear map at 0xC0000000 */ 17.868 GUESS(0xC0000000UL + (fault_addr >> 10), 1); 17.869 17.870 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ 17.871 - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 17.872 + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 17.873 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); 17.874 17.875 } 17.876 #if CONFIG_PAGING_LEVELS >= 3 17.877 - else if ( v->arch.shadow.mode->guest_levels == 3 ) 17.878 + else if ( v->arch.paging.mode->guest_levels == 3 ) 17.879 { 17.880 /* 32bit PAE w2k3: linear map at 0xC0000000 */ 17.881 switch ( level ) 17.882 @@ -2038,11 +1740,11 @@ int sh_remove_write_access(struct vcpu * 17.883 } 17.884 17.885 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ 17.886 - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 17.887 + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 17.888 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); 17.889 } 17.890 #if CONFIG_PAGING_LEVELS >= 4 17.891 - else if ( v->arch.shadow.mode->guest_levels == 4 ) 17.892 + else if ( v->arch.paging.mode->guest_levels == 4 ) 17.893 { 17.894 /* 64bit w2k3: linear map at 0x0000070000000000 */ 17.895 switch ( level ) 17.896 @@ -2054,7 +1756,7 @@ int sh_remove_write_access(struct vcpu * 17.897 17.898 /* 64bit Linux direct map at 0xffff810000000000; older kernels 17.899 * had it at 0x0000010000000000UL */ 17.900 - gfn = sh_mfn_to_gfn(v->domain, gmfn); 17.901 + gfn = mfn_to_gfn(v->domain, gmfn); 17.902 GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4); 17.903 GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4); 17.904 } 17.905 @@ -2073,10 +1775,10 @@ int sh_remove_write_access(struct vcpu * 17.906 * the writeable mapping by looking at the same MFN where the last 17.907 * brute-force search succeeded. */ 17.908 17.909 - if ( v->arch.shadow.last_writeable_pte_smfn != 0 ) 17.910 + if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 ) 17.911 { 17.912 unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); 17.913 - mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn); 17.914 + mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn); 17.915 int shtype = mfn_to_shadow_page(last_smfn)->type; 17.916 17.917 if ( callbacks[shtype] ) 17.918 @@ -2431,7 +2133,7 @@ sh_remove_all_shadows_and_parents(struct 17.919 static void sh_update_paging_modes(struct vcpu *v) 17.920 { 17.921 struct domain *d = v->domain; 17.922 - struct shadow_paging_mode *old_mode = v->arch.shadow.mode; 17.923 + struct paging_mode *old_mode = v->arch.paging.mode; 17.924 mfn_t old_guest_table; 17.925 17.926 ASSERT(shadow_locked_by_me(d)); 17.927 @@ -2446,8 +2148,8 @@ static void sh_update_paging_modes(struc 17.928 17.929 // First, tear down any old shadow tables held by this vcpu. 17.930 // 17.931 - if ( v->arch.shadow.mode ) 17.932 - v->arch.shadow.mode->detach_old_tables(v); 17.933 + if ( v->arch.paging.mode ) 17.934 + v->arch.paging.mode->shadow.detach_old_tables(v); 17.935 17.936 if ( !is_hvm_domain(d) ) 17.937 { 17.938 @@ -2456,17 +2158,17 @@ static void sh_update_paging_modes(struc 17.939 /// 17.940 #if CONFIG_PAGING_LEVELS == 4 17.941 if ( pv_32bit_guest(v) ) 17.942 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.943 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.944 else 17.945 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); 17.946 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); 17.947 #elif CONFIG_PAGING_LEVELS == 3 17.948 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.949 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.950 #elif CONFIG_PAGING_LEVELS == 2 17.951 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 17.952 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 17.953 #else 17.954 #error unexpected paging mode 17.955 #endif 17.956 - v->arch.shadow.translate_enabled = !!shadow_mode_translate(d); 17.957 + v->arch.paging.translate_enabled = !!shadow_mode_translate(d); 17.958 } 17.959 else 17.960 { 17.961 @@ -2476,8 +2178,8 @@ static void sh_update_paging_modes(struc 17.962 ASSERT(shadow_mode_translate(d)); 17.963 ASSERT(shadow_mode_external(d)); 17.964 17.965 - v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v); 17.966 - if ( !v->arch.shadow.translate_enabled ) 17.967 + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); 17.968 + if ( !v->arch.paging.translate_enabled ) 17.969 { 17.970 /* Set v->arch.guest_table to use the p2m map, and choose 17.971 * the appropriate shadow mode */ 17.972 @@ -2485,11 +2187,11 @@ static void sh_update_paging_modes(struc 17.973 #if CONFIG_PAGING_LEVELS == 2 17.974 v->arch.guest_table = 17.975 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); 17.976 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 17.977 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 17.978 #elif CONFIG_PAGING_LEVELS == 3 17.979 v->arch.guest_table = 17.980 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); 17.981 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.982 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.983 #else /* CONFIG_PAGING_LEVELS == 4 */ 17.984 { 17.985 l4_pgentry_t *l4e; 17.986 @@ -2501,7 +2203,7 @@ static void sh_update_paging_modes(struc 17.987 pagetable_from_pfn(l4e_get_pfn(l4e[0])); 17.988 sh_unmap_domain_page(l4e); 17.989 } 17.990 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.991 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 17.992 #endif 17.993 /* Fix up refcounts on guest_table */ 17.994 get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d); 17.995 @@ -2514,7 +2216,7 @@ static void sh_update_paging_modes(struc 17.996 if ( hvm_long_mode_enabled(v) ) 17.997 { 17.998 // long mode guest... 17.999 - v->arch.shadow.mode = 17.1000 + v->arch.paging.mode = 17.1001 &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4); 17.1002 } 17.1003 else 17.1004 @@ -2523,7 +2225,7 @@ static void sh_update_paging_modes(struc 17.1005 { 17.1006 #if CONFIG_PAGING_LEVELS >= 3 17.1007 // 32-bit PAE mode guest... 17.1008 - v->arch.shadow.mode = 17.1009 + v->arch.paging.mode = 17.1010 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3); 17.1011 #else 17.1012 SHADOW_ERROR("PAE not supported in 32-bit Xen\n"); 17.1013 @@ -2535,10 +2237,10 @@ static void sh_update_paging_modes(struc 17.1014 { 17.1015 // 32-bit 2 level guest... 17.1016 #if CONFIG_PAGING_LEVELS >= 3 17.1017 - v->arch.shadow.mode = 17.1018 + v->arch.paging.mode = 17.1019 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2); 17.1020 #else 17.1021 - v->arch.shadow.mode = 17.1022 + v->arch.paging.mode = 17.1023 &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2); 17.1024 #endif 17.1025 } 17.1026 @@ -2546,25 +2248,25 @@ static void sh_update_paging_modes(struc 17.1027 17.1028 if ( pagetable_is_null(v->arch.monitor_table) ) 17.1029 { 17.1030 - mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); 17.1031 + mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v); 17.1032 v->arch.monitor_table = pagetable_from_mfn(mmfn); 17.1033 make_cr3(v, mfn_x(mmfn)); 17.1034 hvm_update_host_cr3(v); 17.1035 } 17.1036 17.1037 - if ( v->arch.shadow.mode != old_mode ) 17.1038 + if ( v->arch.paging.mode != old_mode ) 17.1039 { 17.1040 SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u " 17.1041 "(was g=%u s=%u)\n", 17.1042 d->domain_id, v->vcpu_id, 17.1043 is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1, 17.1044 - v->arch.shadow.mode->guest_levels, 17.1045 - v->arch.shadow.mode->shadow_levels, 17.1046 + v->arch.paging.mode->guest_levels, 17.1047 + v->arch.paging.mode->shadow.shadow_levels, 17.1048 old_mode ? old_mode->guest_levels : 0, 17.1049 - old_mode ? old_mode->shadow_levels : 0); 17.1050 + old_mode ? old_mode->shadow.shadow_levels : 0); 17.1051 if ( old_mode && 17.1052 - (v->arch.shadow.mode->shadow_levels != 17.1053 - old_mode->shadow_levels) ) 17.1054 + (v->arch.paging.mode->shadow.shadow_levels != 17.1055 + old_mode->shadow.shadow_levels) ) 17.1056 { 17.1057 /* Need to make a new monitor table for the new mode */ 17.1058 mfn_t new_mfn, old_mfn; 17.1059 @@ -2584,9 +2286,9 @@ static void sh_update_paging_modes(struc 17.1060 17.1061 old_mfn = pagetable_get_mfn(v->arch.monitor_table); 17.1062 v->arch.monitor_table = pagetable_null(); 17.1063 - new_mfn = v->arch.shadow.mode->make_monitor_table(v); 17.1064 + new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v); 17.1065 v->arch.monitor_table = pagetable_from_mfn(new_mfn); 17.1066 - SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", 17.1067 + SHADOW_PRINTK("new monitor table %"PRI_mfn "\n", 17.1068 mfn_x(new_mfn)); 17.1069 17.1070 /* Don't be running on the old monitor table when we 17.1071 @@ -2596,7 +2298,7 @@ static void sh_update_paging_modes(struc 17.1072 if ( v == current ) 17.1073 write_ptbase(v); 17.1074 hvm_update_host_cr3(v); 17.1075 - old_mode->destroy_monitor_table(v, old_mfn); 17.1076 + old_mode->shadow.destroy_monitor_table(v, old_mfn); 17.1077 } 17.1078 } 17.1079 17.1080 @@ -2606,7 +2308,7 @@ static void sh_update_paging_modes(struc 17.1081 // This *does* happen, at least for CR4.PGE... 17.1082 } 17.1083 17.1084 - v->arch.shadow.mode->update_cr3(v, 0); 17.1085 + v->arch.paging.mode->update_cr3(v, 0); 17.1086 } 17.1087 17.1088 void shadow_update_paging_modes(struct vcpu *v) 17.1089 @@ -2626,9 +2328,7 @@ static void sh_new_mode(struct domain *d 17.1090 17.1091 ASSERT(shadow_locked_by_me(d)); 17.1092 ASSERT(d != current->domain); 17.1093 - d->arch.shadow.mode = new_mode; 17.1094 - if ( new_mode & SHM2_translate ) 17.1095 - shadow_audit_p2m(d); 17.1096 + d->arch.paging.mode = new_mode; 17.1097 for_each_vcpu(d, v) 17.1098 sh_update_paging_modes(v); 17.1099 } 17.1100 @@ -2642,75 +2342,75 @@ int shadow_enable(struct domain *d, u32 17.1101 unsigned int old_pages; 17.1102 int rv = 0; 17.1103 17.1104 - mode |= SHM2_enable; 17.1105 + mode |= PG_SH_enable; 17.1106 17.1107 domain_pause(d); 17.1108 - shadow_lock(d); 17.1109 17.1110 /* Sanity check the arguments */ 17.1111 if ( (d == current->domain) || 17.1112 shadow_mode_enabled(d) || 17.1113 - ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) || 17.1114 - ((mode & SHM2_external) && !(mode & SHM2_translate)) ) 17.1115 + ((mode & PG_translate) && !(mode & PG_refcounts)) || 17.1116 + ((mode & PG_external) && !(mode & PG_translate)) ) 17.1117 { 17.1118 rv = -EINVAL; 17.1119 - goto out; 17.1120 + goto out_unlocked; 17.1121 } 17.1122 17.1123 - // XXX -- eventually would like to require that all memory be allocated 17.1124 - // *after* shadow_enabled() is called... So here, we would test to make 17.1125 - // sure that d->page_list is empty. 17.1126 -#if 0 17.1127 - spin_lock(&d->page_alloc_lock); 17.1128 - if ( !list_empty(&d->page_list) ) 17.1129 - { 17.1130 - spin_unlock(&d->page_alloc_lock); 17.1131 - rv = -EINVAL; 17.1132 - goto out; 17.1133 - } 17.1134 - spin_unlock(&d->page_alloc_lock); 17.1135 -#endif 17.1136 - 17.1137 /* Init the shadow memory allocation if the user hasn't done so */ 17.1138 - old_pages = d->arch.shadow.total_pages; 17.1139 + old_pages = d->arch.paging.shadow.total_pages; 17.1140 if ( old_pages == 0 ) 17.1141 - if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ 17.1142 + { 17.1143 + unsigned int r; 17.1144 + shadow_lock(d); 17.1145 + r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */ 17.1146 + shadow_unlock(d); 17.1147 + if ( r != 0 ) 17.1148 { 17.1149 sh_set_allocation(d, 0, NULL); 17.1150 rv = -ENOMEM; 17.1151 - goto out; 17.1152 - } 17.1153 + goto out_unlocked; 17.1154 + } 17.1155 + } 17.1156 + 17.1157 + /* Init the P2M table. Must be done before we take the shadow lock 17.1158 + * to avoid possible deadlock. */ 17.1159 + if ( mode & PG_translate ) 17.1160 + { 17.1161 + rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page); 17.1162 + if (rv != 0) 17.1163 + goto out_unlocked; 17.1164 + } 17.1165 + 17.1166 + shadow_lock(d); 17.1167 + 17.1168 + /* Sanity check again with the lock held */ 17.1169 + if ( shadow_mode_enabled(d) ) 17.1170 + { 17.1171 + rv = -EINVAL; 17.1172 + goto out_locked; 17.1173 + } 17.1174 17.1175 /* Init the hash table */ 17.1176 if ( shadow_hash_alloc(d) != 0 ) 17.1177 { 17.1178 - sh_set_allocation(d, old_pages, NULL); 17.1179 rv = -ENOMEM; 17.1180 - goto out; 17.1181 + goto out_locked; 17.1182 } 17.1183 17.1184 - /* Init the P2M table */ 17.1185 - if ( mode & SHM2_translate ) 17.1186 - if ( !shadow_alloc_p2m_table(d) ) 17.1187 - { 17.1188 - shadow_hash_teardown(d); 17.1189 - sh_set_allocation(d, old_pages, NULL); 17.1190 - shadow_p2m_teardown(d); 17.1191 - rv = -ENOMEM; 17.1192 - goto out; 17.1193 - } 17.1194 - 17.1195 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 17.1196 /* We assume we're dealing with an older 64bit linux guest until we 17.1197 * see the guest use more than one l4 per vcpu. */ 17.1198 - d->arch.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; 17.1199 + d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; 17.1200 #endif 17.1201 17.1202 /* Update the bits */ 17.1203 sh_new_mode(d, mode); 17.1204 - shadow_audit_p2m(d); 17.1205 - out: 17.1206 + 17.1207 + out_locked: 17.1208 shadow_unlock(d); 17.1209 + out_unlocked: 17.1210 + if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) ) 17.1211 + p2m_teardown(d); 17.1212 domain_unpause(d); 17.1213 return rv; 17.1214 } 17.1215 @@ -2721,6 +2421,8 @@ void shadow_teardown(struct domain *d) 17.1216 { 17.1217 struct vcpu *v; 17.1218 mfn_t mfn; 17.1219 + struct list_head *entry, *n; 17.1220 + struct page_info *pg; 17.1221 17.1222 ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); 17.1223 ASSERT(d != current->domain); 17.1224 @@ -2733,48 +2435,55 @@ void shadow_teardown(struct domain *d) 17.1225 /* Release the shadow and monitor tables held by each vcpu */ 17.1226 for_each_vcpu(d, v) 17.1227 { 17.1228 - if ( v->arch.shadow.mode ) 17.1229 + if ( v->arch.paging.mode ) 17.1230 { 17.1231 - v->arch.shadow.mode->detach_old_tables(v); 17.1232 + v->arch.paging.mode->shadow.detach_old_tables(v); 17.1233 if ( shadow_mode_external(d) ) 17.1234 { 17.1235 mfn = pagetable_get_mfn(v->arch.monitor_table); 17.1236 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) 17.1237 - v->arch.shadow.mode->destroy_monitor_table(v, mfn); 17.1238 + v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn); 17.1239 v->arch.monitor_table = pagetable_null(); 17.1240 } 17.1241 } 17.1242 } 17.1243 } 17.1244 17.1245 - if ( d->arch.shadow.total_pages != 0 ) 17.1246 + list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) 17.1247 + { 17.1248 + list_del(entry); 17.1249 + pg = list_entry(entry, struct page_info, list); 17.1250 + shadow_free_p2m_page(d, pg); 17.1251 + } 17.1252 + 17.1253 + if ( d->arch.paging.shadow.total_pages != 0 ) 17.1254 { 17.1255 SHADOW_PRINTK("teardown of domain %u starts." 17.1256 " Shadow pages total = %u, free = %u, p2m=%u\n", 17.1257 d->domain_id, 17.1258 - d->arch.shadow.total_pages, 17.1259 - d->arch.shadow.free_pages, 17.1260 - d->arch.shadow.p2m_pages); 17.1261 + d->arch.paging.shadow.total_pages, 17.1262 + d->arch.paging.shadow.free_pages, 17.1263 + d->arch.paging.shadow.p2m_pages); 17.1264 /* Destroy all the shadows and release memory to domheap */ 17.1265 sh_set_allocation(d, 0, NULL); 17.1266 /* Release the hash table back to xenheap */ 17.1267 - if (d->arch.shadow.hash_table) 17.1268 + if (d->arch.paging.shadow.hash_table) 17.1269 shadow_hash_teardown(d); 17.1270 /* Release the log-dirty bitmap of dirtied pages */ 17.1271 sh_free_log_dirty_bitmap(d); 17.1272 /* Should not have any more memory held */ 17.1273 SHADOW_PRINTK("teardown done." 17.1274 " Shadow pages total = %u, free = %u, p2m=%u\n", 17.1275 - d->arch.shadow.total_pages, 17.1276 - d->arch.shadow.free_pages, 17.1277 - d->arch.shadow.p2m_pages); 17.1278 - ASSERT(d->arch.shadow.total_pages == 0); 17.1279 + d->arch.paging.shadow.total_pages, 17.1280 + d->arch.paging.shadow.free_pages, 17.1281 + d->arch.paging.shadow.p2m_pages); 17.1282 + ASSERT(d->arch.paging.shadow.total_pages == 0); 17.1283 } 17.1284 17.1285 /* We leave the "permanent" shadow modes enabled, but clear the 17.1286 * log-dirty mode bit. We don't want any more mark_dirty() 17.1287 * calls now that we've torn down the bitmap */ 17.1288 - d->arch.shadow.mode &= ~SHM2_log_dirty; 17.1289 + d->arch.paging.mode &= ~PG_log_dirty; 17.1290 17.1291 shadow_unlock(d); 17.1292 } 17.1293 @@ -2782,30 +2491,28 @@ void shadow_teardown(struct domain *d) 17.1294 void shadow_final_teardown(struct domain *d) 17.1295 /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */ 17.1296 { 17.1297 - 17.1298 SHADOW_PRINTK("dom %u final teardown starts." 17.1299 " Shadow pages total = %u, free = %u, p2m=%u\n", 17.1300 d->domain_id, 17.1301 - d->arch.shadow.total_pages, 17.1302 - d->arch.shadow.free_pages, 17.1303 - d->arch.shadow.p2m_pages); 17.1304 + d->arch.paging.shadow.total_pages, 17.1305 + d->arch.paging.shadow.free_pages, 17.1306 + d->arch.paging.shadow.p2m_pages); 17.1307 17.1308 /* Double-check that the domain didn't have any shadow memory. 17.1309 * It is possible for a domain that never got domain_kill()ed 17.1310 * to get here with its shadow allocation intact. */ 17.1311 - if ( d->arch.shadow.total_pages != 0 ) 17.1312 + if ( d->arch.paging.shadow.total_pages != 0 ) 17.1313 shadow_teardown(d); 17.1314 17.1315 /* It is now safe to pull down the p2m map. */ 17.1316 - if ( d->arch.shadow.p2m_pages != 0 ) 17.1317 - shadow_p2m_teardown(d); 17.1318 + p2m_teardown(d); 17.1319 17.1320 SHADOW_PRINTK("dom %u final teardown done." 17.1321 " Shadow pages total = %u, free = %u, p2m=%u\n", 17.1322 d->domain_id, 17.1323 - d->arch.shadow.total_pages, 17.1324 - d->arch.shadow.free_pages, 17.1325 - d->arch.shadow.p2m_pages); 17.1326 + d->arch.paging.shadow.total_pages, 17.1327 + d->arch.paging.shadow.free_pages, 17.1328 + d->arch.paging.shadow.p2m_pages); 17.1329 } 17.1330 17.1331 static int shadow_one_bit_enable(struct domain *d, u32 mode) 17.1332 @@ -2814,12 +2521,14 @@ static int shadow_one_bit_enable(struct 17.1333 ASSERT(shadow_locked_by_me(d)); 17.1334 17.1335 /* Sanity check the call */ 17.1336 - if ( d == current->domain || (d->arch.shadow.mode & mode) ) 17.1337 + if ( d == current->domain || (d->arch.paging.mode & mode) ) 17.1338 { 17.1339 return -EINVAL; 17.1340 } 17.1341 17.1342 - if ( d->arch.shadow.mode == 0 ) 17.1343 + mode |= PG_SH_enable; 17.1344 + 17.1345 + if ( d->arch.paging.mode == 0 ) 17.1346 { 17.1347 /* Init the shadow memory allocation and the hash table */ 17.1348 if ( sh_set_allocation(d, 1, NULL) != 0 17.1349 @@ -2831,7 +2540,7 @@ static int shadow_one_bit_enable(struct 17.1350 } 17.1351 17.1352 /* Update the bits */ 17.1353 - sh_new_mode(d, d->arch.shadow.mode | mode); 17.1354 + sh_new_mode(d, d->arch.paging.mode | mode); 17.1355 17.1356 return 0; 17.1357 } 17.1358 @@ -2843,26 +2552,26 @@ static int shadow_one_bit_disable(struct 17.1359 ASSERT(shadow_locked_by_me(d)); 17.1360 17.1361 /* Sanity check the call */ 17.1362 - if ( d == current->domain || !(d->arch.shadow.mode & mode) ) 17.1363 + if ( d == current->domain || !(d->arch.paging.mode & mode) ) 17.1364 { 17.1365 return -EINVAL; 17.1366 } 17.1367 17.1368 /* Update the bits */ 17.1369 - sh_new_mode(d, d->arch.shadow.mode & ~mode); 17.1370 - if ( d->arch.shadow.mode == 0 ) 17.1371 + sh_new_mode(d, d->arch.paging.mode & ~mode); 17.1372 + if ( d->arch.paging.mode == 0 ) 17.1373 { 17.1374 /* Get this domain off shadows */ 17.1375 SHADOW_PRINTK("un-shadowing of domain %u starts." 17.1376 " Shadow pages total = %u, free = %u, p2m=%u\n", 17.1377 d->domain_id, 17.1378 - d->arch.shadow.total_pages, 17.1379 - d->arch.shadow.free_pages, 17.1380 - d->arch.shadow.p2m_pages); 17.1381 + d->arch.paging.shadow.total_pages, 17.1382 + d->arch.paging.shadow.free_pages, 17.1383 + d->arch.paging.shadow.p2m_pages); 17.1384 for_each_vcpu(d, v) 17.1385 { 17.1386 - if ( v->arch.shadow.mode ) 17.1387 - v->arch.shadow.mode->detach_old_tables(v); 17.1388 + if ( v->arch.paging.mode ) 17.1389 + v->arch.paging.mode->shadow.detach_old_tables(v); 17.1390 #if CONFIG_PAGING_LEVELS == 4 17.1391 if ( !(v->arch.flags & TF_kernel_mode) ) 17.1392 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); 17.1393 @@ -2885,9 +2594,9 @@ static int shadow_one_bit_disable(struct 17.1394 SHADOW_PRINTK("un-shadowing of domain %u done." 17.1395 " Shadow pages total = %u, free = %u, p2m=%u\n", 17.1396 d->domain_id, 17.1397 - d->arch.shadow.total_pages, 17.1398 - d->arch.shadow.free_pages, 17.1399 - d->arch.shadow.p2m_pages); 17.1400 + d->arch.paging.shadow.total_pages, 17.1401 + d->arch.paging.shadow.free_pages, 17.1402 + d->arch.paging.shadow.p2m_pages); 17.1403 } 17.1404 17.1405 return 0; 17.1406 @@ -2909,7 +2618,7 @@ static int shadow_test_enable(struct dom 17.1407 goto out; 17.1408 } 17.1409 17.1410 - ret = shadow_one_bit_enable(d, SHM2_enable); 17.1411 + ret = shadow_one_bit_enable(d, PG_SH_enable); 17.1412 out: 17.1413 shadow_unlock(d); 17.1414 domain_unpause(d); 17.1415 @@ -2923,7 +2632,7 @@ static int shadow_test_disable(struct do 17.1416 17.1417 domain_pause(d); 17.1418 shadow_lock(d); 17.1419 - ret = shadow_one_bit_disable(d, SHM2_enable); 17.1420 + ret = shadow_one_bit_disable(d, PG_SH_enable); 17.1421 shadow_unlock(d); 17.1422 domain_unpause(d); 17.1423 17.1424 @@ -2933,19 +2642,19 @@ static int shadow_test_disable(struct do 17.1425 static int 17.1426 sh_alloc_log_dirty_bitmap(struct domain *d) 17.1427 { 17.1428 - ASSERT(d->arch.shadow.dirty_bitmap == NULL); 17.1429 - d->arch.shadow.dirty_bitmap_size = 17.1430 + ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL); 17.1431 + d->arch.paging.shadow.dirty_bitmap_size = 17.1432 (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) & 17.1433 ~(BITS_PER_LONG - 1); 17.1434 - d->arch.shadow.dirty_bitmap = 17.1435 + d->arch.paging.shadow.dirty_bitmap = 17.1436 xmalloc_array(unsigned long, 17.1437 - d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG); 17.1438 - if ( d->arch.shadow.dirty_bitmap == NULL ) 17.1439 + d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG); 17.1440 + if ( d->arch.paging.shadow.dirty_bitmap == NULL ) 17.1441 { 17.1442 - d->arch.shadow.dirty_bitmap_size = 0; 17.1443 + d->arch.paging.shadow.dirty_bitmap_size = 0; 17.1444 return -ENOMEM; 17.1445 } 17.1446 - memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8); 17.1447 + memset(d->arch.paging.shadow.dirty_bitmap, 0, d->arch.paging.shadow.dirty_bitmap_size/8); 17.1448 17.1449 return 0; 17.1450 } 17.1451 @@ -2953,11 +2662,11 @@ sh_alloc_log_dirty_bitmap(struct domain 17.1452 static void 17.1453 sh_free_log_dirty_bitmap(struct domain *d) 17.1454 { 17.1455 - d->arch.shadow.dirty_bitmap_size = 0; 17.1456 - if ( d->arch.shadow.dirty_bitmap ) 17.1457 + d->arch.paging.shadow.dirty_bitmap_size = 0; 17.1458 + if ( d->arch.paging.shadow.dirty_bitmap ) 17.1459 { 17.1460 - xfree(d->arch.shadow.dirty_bitmap); 17.1461 - d->arch.shadow.dirty_bitmap = NULL; 17.1462 + xfree(d->arch.paging.shadow.dirty_bitmap); 17.1463 + d->arch.paging.shadow.dirty_bitmap = NULL; 17.1464 } 17.1465 } 17.1466 17.1467 @@ -2989,7 +2698,7 @@ static int shadow_log_dirty_enable(struc 17.1468 goto out; 17.1469 } 17.1470 17.1471 - ret = shadow_one_bit_enable(d, SHM2_log_dirty); 17.1472 + ret = shadow_one_bit_enable(d, PG_log_dirty); 17.1473 if ( ret != 0 ) 17.1474 sh_free_log_dirty_bitmap(d); 17.1475 17.1476 @@ -3005,7 +2714,7 @@ static int shadow_log_dirty_disable(stru 17.1477 17.1478 domain_pause(d); 17.1479 shadow_lock(d); 17.1480 - ret = shadow_one_bit_disable(d, SHM2_log_dirty); 17.1481 + ret = shadow_one_bit_disable(d, PG_log_dirty); 17.1482 if ( !shadow_mode_log_dirty(d) ) 17.1483 sh_free_log_dirty_bitmap(d); 17.1484 shadow_unlock(d); 17.1485 @@ -3017,100 +2726,52 @@ static int shadow_log_dirty_disable(stru 17.1486 /**************************************************************************/ 17.1487 /* P2M map manipulations */ 17.1488 17.1489 -static void 17.1490 -sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) 17.1491 -{ 17.1492 - struct vcpu *v; 17.1493 - 17.1494 - if ( !shadow_mode_translate(d) ) 17.1495 - return; 17.1496 - 17.1497 - v = current; 17.1498 - if ( v->domain != d ) 17.1499 - v = d->vcpu[0]; 17.1500 - 17.1501 - SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn); 17.1502 - 17.1503 - ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn); 17.1504 - //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn); 17.1505 - 17.1506 - if ( v != NULL ) 17.1507 - { 17.1508 - sh_remove_all_shadows_and_parents(v, _mfn(mfn)); 17.1509 - if ( sh_remove_all_mappings(v, _mfn(mfn)) ) 17.1510 - flush_tlb_mask(d->domain_dirty_cpumask); 17.1511 - } 17.1512 - 17.1513 - shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); 17.1514 - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 17.1515 -} 17.1516 - 17.1517 +/* shadow specific code which should be called when P2M table entry is updated 17.1518 + * with new content. It is responsible for update the entry, as well as other 17.1519 + * shadow processing jobs. 17.1520 + */ 17.1521 void 17.1522 -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, 17.1523 - unsigned long mfn) 17.1524 -{ 17.1525 - shadow_lock(d); 17.1526 - shadow_audit_p2m(d); 17.1527 - sh_p2m_remove_page(d, gfn, mfn); 17.1528 - shadow_audit_p2m(d); 17.1529 - shadow_unlock(d); 17.1530 -} 17.1531 - 17.1532 -void 17.1533 -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, 17.1534 - unsigned long mfn) 17.1535 +shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, 17.1536 + l1_pgentry_t new, unsigned int level) 17.1537 { 17.1538 - unsigned long ogfn; 17.1539 - mfn_t omfn; 17.1540 - 17.1541 - if ( !shadow_mode_translate(d) ) 17.1542 - return; 17.1543 - 17.1544 + struct domain *d = v->domain; 17.1545 + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); 17.1546 + mfn_t mfn; 17.1547 + 17.1548 shadow_lock(d); 17.1549 - shadow_audit_p2m(d); 17.1550 - 17.1551 - SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn); 17.1552 - 17.1553 - omfn = sh_gfn_to_mfn(d, gfn); 17.1554 - if ( mfn_valid(omfn) ) 17.1555 - { 17.1556 - /* Get rid of the old mapping, especially any shadows */ 17.1557 - struct vcpu *v = current; 17.1558 - if ( v->domain != d ) 17.1559 - v = d->vcpu[0]; 17.1560 - if ( v != NULL ) 17.1561 - { 17.1562 - sh_remove_all_shadows_and_parents(v, omfn); 17.1563 - if ( sh_remove_all_mappings(v, omfn) ) 17.1564 - flush_tlb_mask(d->domain_dirty_cpumask); 17.1565 - } 17.1566 - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); 17.1567 + 17.1568 + /* handle physmap_add and physmap_remove */ 17.1569 + mfn = gfn_to_mfn(d, gfn); 17.1570 + if ( v != NULL && level == 1 && mfn_valid(mfn) ) { 17.1571 + sh_remove_all_shadows_and_parents(v, mfn); 17.1572 + if ( sh_remove_all_mappings(v, mfn) ) 17.1573 + flush_tlb_mask(d->domain_dirty_cpumask); 17.1574 } 17.1575 - 17.1576 - ogfn = sh_mfn_to_gfn(d, _mfn(mfn)); 17.1577 - if ( 17.1578 -#ifdef __x86_64__ 17.1579 - (ogfn != 0x5555555555555555L) 17.1580 -#else 17.1581 - (ogfn != 0x55555555L) 17.1582 -#endif 17.1583 - && (ogfn != INVALID_M2P_ENTRY) 17.1584 - && (ogfn != gfn) ) 17.1585 - { 17.1586 - /* This machine frame is already mapped at another physical address */ 17.1587 - SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", 17.1588 - mfn, ogfn, gfn); 17.1589 - if ( mfn_valid(omfn = sh_gfn_to_mfn(d, ogfn)) ) 17.1590 - { 17.1591 - SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", 17.1592 - ogfn , mfn_x(omfn)); 17.1593 - if ( mfn_x(omfn) == mfn ) 17.1594 - sh_p2m_remove_page(d, ogfn, mfn); 17.1595 + 17.1596 + /* update the entry with new content */ 17.1597 + safe_write_pte(p, new); 17.1598 + 17.1599 + /* The P2M can be shadowed: keep the shadows synced */ 17.1600 + if ( d->vcpu[0] != NULL ) 17.1601 + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p)); 17.1602 + 17.1603 + /* install P2M in monitors for PAE Xen */ 17.1604 +#if CONFIG_PAGING_LEVELS == 3 17.1605 + if ( level == 3 ) { 17.1606 + struct vcpu *v; 17.1607 + /* We have written to the p2m l3: need to sync the per-vcpu 17.1608 + * copies of it in the monitor tables */ 17.1609 + p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p); 17.1610 + /* Also, any vcpus running on shadows of the p2m need to 17.1611 + * reload their CR3s so the change propagates to the shadow */ 17.1612 + for_each_vcpu(d, v) { 17.1613 + if ( pagetable_get_pfn(v->arch.guest_table) 17.1614 + == pagetable_get_pfn(d->arch.phys_table) 17.1615 + && v->arch.paging.mode != NULL ) 17.1616 + v->arch.paging.mode->update_cr3(v, 0); 17.1617 } 17.1618 } 17.1619 - 17.1620 - shadow_set_p2m_entry(d, gfn, _mfn(mfn)); 17.1621 - set_gpfn_from_mfn(mfn, gfn); 17.1622 +#endif 17.1623 17.1624 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) 17.1625 /* If we're doing FAST_FAULT_PATH, then shadow mode may have 17.1626 @@ -3122,7 +2783,6 @@ shadow_guest_physmap_add_page(struct dom 17.1627 shadow_blow_tables(d); 17.1628 #endif 17.1629 17.1630 - shadow_audit_p2m(d); 17.1631 shadow_unlock(d); 17.1632 } 17.1633 17.1634 @@ -3151,11 +2811,11 @@ static int shadow_log_dirty_op( 17.1635 SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 17.1636 (clean) ? "clean" : "peek", 17.1637 d->domain_id, 17.1638 - d->arch.shadow.fault_count, 17.1639 - d->arch.shadow.dirty_count); 17.1640 - 17.1641 - sc->stats.fault_count = d->arch.shadow.fault_count; 17.1642 - sc->stats.dirty_count = d->arch.shadow.dirty_count; 17.1643 + d->arch.paging.shadow.fault_count, 17.1644 + d->arch.paging.shadow.dirty_count); 17.1645 + 17.1646 + sc->stats.fault_count = d->arch.paging.shadow.fault_count; 17.1647 + sc->stats.dirty_count = d->arch.paging.shadow.dirty_count; 17.1648 17.1649 if ( clean ) 17.1650 { 17.1651 @@ -3164,22 +2824,22 @@ static int shadow_log_dirty_op( 17.1652 * but for now, we just unshadow everything except Xen. */ 17.1653 shadow_blow_tables(d); 17.1654 17.1655 - d->arch.shadow.fault_count = 0; 17.1656 - d->arch.shadow.dirty_count = 0; 17.1657 + d->arch.paging.shadow.fault_count = 0; 17.1658 + d->arch.paging.shadow.dirty_count = 0; 17.1659 } 17.1660 17.1661 if ( guest_handle_is_null(sc->dirty_bitmap) ) 17.1662 /* caller may have wanted just to clean the state or access stats. */ 17.1663 peek = 0; 17.1664 17.1665 - if ( (peek || clean) && (d->arch.shadow.dirty_bitmap == NULL) ) 17.1666 + if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) ) 17.1667 { 17.1668 rv = -EINVAL; /* perhaps should be ENOMEM? */ 17.1669 goto out; 17.1670 } 17.1671 17.1672 - if ( sc->pages > d->arch.shadow.dirty_bitmap_size ) 17.1673 - sc->pages = d->arch.shadow.dirty_bitmap_size; 17.1674 + if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size ) 17.1675 + sc->pages = d->arch.paging.shadow.dirty_bitmap_size; 17.1676 17.1677 #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ 17.1678 for ( i = 0; i < sc->pages; i += CHUNK ) 17.1679 @@ -3192,7 +2852,7 @@ static int shadow_log_dirty_op( 17.1680 { 17.1681 if ( copy_to_guest_offset( 17.1682 sc->dirty_bitmap, i/8, 17.1683 - (uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), bytes) ) 17.1684 + (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) ) 17.1685 { 17.1686 rv = -EFAULT; 17.1687 goto out; 17.1688 @@ -3200,7 +2860,7 @@ static int shadow_log_dirty_op( 17.1689 } 17.1690 17.1691 if ( clean ) 17.1692 - memset((uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), 0, bytes); 17.1693 + memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes); 17.1694 } 17.1695 #undef CHUNK 17.1696 17.1697 @@ -3221,7 +2881,7 @@ void sh_mark_dirty(struct domain *d, mfn 17.1698 if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) 17.1699 return; 17.1700 17.1701 - ASSERT(d->arch.shadow.dirty_bitmap != NULL); 17.1702 + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); 17.1703 17.1704 /* We /really/ mean PFN here, even for non-translated guests. */ 17.1705 pfn = get_gpfn_from_mfn(mfn_x(gmfn)); 17.1706 @@ -3235,24 +2895,24 @@ void sh_mark_dirty(struct domain *d, mfn 17.1707 return; 17.1708 17.1709 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */ 17.1710 - if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) 17.1711 + if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) ) 17.1712 { 17.1713 - if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) ) 17.1714 + if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) 17.1715 { 17.1716 SHADOW_DEBUG(LOGDIRTY, 17.1717 - "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n", 17.1718 + "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n", 17.1719 mfn_x(gmfn), pfn, d->domain_id); 17.1720 - d->arch.shadow.dirty_count++; 17.1721 + d->arch.paging.shadow.dirty_count++; 17.1722 } 17.1723 } 17.1724 else 17.1725 { 17.1726 SHADOW_PRINTK("mark_dirty OOR! " 17.1727 - "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n" 17.1728 + "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" 17.1729 "owner=%d c=%08x t=%" PRtype_info "\n", 17.1730 mfn_x(gmfn), 17.1731 pfn, 17.1732 - d->arch.shadow.dirty_bitmap_size, 17.1733 + d->arch.paging.shadow.dirty_bitmap_size, 17.1734 d->domain_id, 17.1735 (page_get_owner(mfn_to_page(gmfn)) 17.1736 ? page_get_owner(mfn_to_page(gmfn))->domain_id 17.1737 @@ -3292,7 +2952,7 @@ int shadow_domctl(struct domain *d, 17.1738 return rc; 17.1739 if ( is_hvm_domain(d) ) 17.1740 return -EINVAL; 17.1741 - if ( d->arch.shadow.mode & SHM2_enable ) 17.1742 + if ( d->arch.paging.mode & PG_SH_enable ) 17.1743 if ( (rc = shadow_test_disable(d)) != 0 ) 17.1744 return rc; 17.1745 return 0; 17.1746 @@ -3304,7 +2964,7 @@ int shadow_domctl(struct domain *d, 17.1747 return shadow_log_dirty_enable(d); 17.1748 17.1749 case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: 17.1750 - return shadow_enable(d, SHM2_refcounts|SHM2_translate); 17.1751 + return shadow_enable(d, PG_refcounts|PG_translate); 17.1752 17.1753 case XEN_DOMCTL_SHADOW_OP_CLEAN: 17.1754 case XEN_DOMCTL_SHADOW_OP_PEEK: 17.1755 @@ -3313,7 +2973,7 @@ int shadow_domctl(struct domain *d, 17.1756 case XEN_DOMCTL_SHADOW_OP_ENABLE: 17.1757 if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY ) 17.1758 return shadow_log_dirty_enable(d); 17.1759 - return shadow_enable(d, sc->mode << SHM2_shift); 17.1760 + return shadow_enable(d, sc->mode << PG_mode_shift); 17.1761 17.1762 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: 17.1763 sc->mb = shadow_get_allocation(d); 17.1764 @@ -3390,7 +3050,7 @@ void shadow_audit_tables(struct vcpu *v) 17.1765 else 17.1766 { 17.1767 /* Audit only the current mode's tables */ 17.1768 - switch ( v->arch.shadow.mode->guest_levels ) 17.1769 + switch ( v->arch.paging.mode->guest_levels ) 17.1770 { 17.1771 case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break; 17.1772 case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE 17.1773 @@ -3406,199 +3066,6 @@ void shadow_audit_tables(struct vcpu *v) 17.1774 17.1775 #endif /* Shadow audit */ 17.1776 17.1777 - 17.1778 -/**************************************************************************/ 17.1779 -/* Auditing p2m tables */ 17.1780 - 17.1781 -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M 17.1782 - 17.1783 -void shadow_audit_p2m(struct domain *d) 17.1784 -{ 17.1785 - struct list_head *entry; 17.1786 - struct page_info *page; 17.1787 - struct domain *od; 17.1788 - unsigned long mfn, gfn, m2pfn, lp2mfn = 0; 17.1789 - mfn_t p2mfn; 17.1790 - unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; 17.1791 - int test_linear; 17.1792 - 17.1793 - if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) ) 17.1794 - return; 17.1795 - 17.1796 - //SHADOW_PRINTK("p2m audit starts\n"); 17.1797 - 17.1798 - test_linear = ( (d == current->domain) 17.1799 - && !pagetable_is_null(current->arch.monitor_table) ); 17.1800 - if ( test_linear ) 17.1801 - local_flush_tlb(); 17.1802 - 17.1803 - /* Audit part one: walk the domain's page allocation list, checking 17.1804 - * the m2p entries. */ 17.1805 - for ( entry = d->page_list.next; 17.1806 - entry != &d->page_list; 17.1807 - entry = entry->next ) 17.1808 - { 17.1809 - page = list_entry(entry, struct page_info, list); 17.1810 - mfn = mfn_x(page_to_mfn(page)); 17.1811 - 17.1812 - // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 17.1813 - 17.1814 - od = page_get_owner(page); 17.1815 - 17.1816 - if ( od != d ) 17.1817 - { 17.1818 - SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", 17.1819 - mfn, od, (od?od->domain_id:-1), d, d->domain_id); 17.1820 - continue; 17.1821 - } 17.1822 - 17.1823 - gfn = get_gpfn_from_mfn(mfn); 17.1824 - if ( gfn == INVALID_M2P_ENTRY ) 17.1825 - { 17.1826 - orphans_i++; 17.1827 - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", 17.1828 - // mfn); 17.1829 - continue; 17.1830 - } 17.1831 - 17.1832 - if ( gfn == 0x55555555 ) 17.1833 - { 17.1834 - orphans_d++; 17.1835 - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 17.1836 - // mfn); 17.1837 - continue; 17.1838 - } 17.1839 - 17.1840 - p2mfn = sh_gfn_to_mfn_foreign(d, gfn); 17.1841 - if ( mfn_x(p2mfn) != mfn ) 17.1842 - { 17.1843 - mpbad++; 17.1844 - SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" 17.1845 - " (-> gfn %#lx)\n", 17.1846 - mfn, gfn, mfn_x(p2mfn), 17.1847 - (mfn_valid(p2mfn) 17.1848 - ? get_gpfn_from_mfn(mfn_x(p2mfn)) 17.1849 - : -1u)); 17.1850 - /* This m2p entry is stale: the domain has another frame in 17.1851 - * this physical slot. No great disaster, but for neatness, 17.1852 - * blow away the m2p entry. */ 17.1853 - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 17.1854 - } 17.1855 - 17.1856 - if ( test_linear && (gfn <= d->arch.max_mapped_pfn) ) 17.1857 - { 17.1858 - lp2mfn = gfn_to_mfn_current(gfn); 17.1859 - if ( mfn_x(lp2mfn) != mfn_x(p2mfn) ) 17.1860 - { 17.1861 - SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " 17.1862 - "(!= mfn %#lx)\n", gfn, 17.1863 - mfn_x(lp2mfn), mfn_x(p2mfn)); 17.1864 - } 17.1865 - } 17.1866 - 17.1867 - // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 17.1868 - // mfn, gfn, p2mfn, lp2mfn); 17.1869 - } 17.1870 - 17.1871 - /* Audit part two: walk the domain's p2m table, checking the entries. */ 17.1872 - if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) 17.1873 - { 17.1874 - l2_pgentry_t *l2e; 17.1875 - l1_pgentry_t *l1e; 17.1876 - int i1, i2; 17.1877 - 17.1878 -#if CONFIG_PAGING_LEVELS == 4 17.1879 - l4_pgentry_t *l4e; 17.1880 - l3_pgentry_t *l3e; 17.1881 - int i3, i4; 17.1882 - l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 17.1883 -#elif CONFIG_PAGING_LEVELS == 3 17.1884 - l3_pgentry_t *l3e; 17.1885 - int i3; 17.1886 - l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 17.1887 -#else /* CONFIG_PAGING_LEVELS == 2 */ 17.1888 - l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 17.1889 -#endif 17.1890 - 17.1891 - gfn = 0; 17.1892 -#if CONFIG_PAGING_LEVELS >= 3 17.1893 -#if CONFIG_PAGING_LEVELS >= 4 17.1894 - for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) 17.1895 - { 17.1896 - if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) 17.1897 - { 17.1898 - gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); 17.1899 - continue; 17.1900 - } 17.1901 - l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4]))); 17.1902 -#endif /* now at levels 3 or 4... */ 17.1903 - for ( i3 = 0; 17.1904 - i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 17.1905 - i3++ ) 17.1906 - { 17.1907 - if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) 17.1908 - { 17.1909 - gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); 17.1910 - continue; 17.1911 - } 17.1912 - l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3]))); 17.1913 -#endif /* all levels... */ 17.1914 - for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) 17.1915 - { 17.1916 - if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) 17.1917 - { 17.1918 - gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); 17.1919 - continue; 17.1920 - } 17.1921 - l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2]))); 17.1922 - 17.1923 - for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) 17.1924 - { 17.1925 - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) 17.1926 - continue; 17.1927 - mfn = l1e_get_pfn(l1e[i1]); 17.1928 - ASSERT(mfn_valid(_mfn(mfn))); 17.1929 - m2pfn = get_gpfn_from_mfn(mfn); 17.1930 - if ( m2pfn != gfn ) 17.1931 - { 17.1932 - pmbad++; 17.1933 - SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx" 17.1934 - " -> gfn %#lx\n", gfn, mfn, m2pfn); 17.1935 - BUG(); 17.1936 - } 17.1937 - } 17.1938 - sh_unmap_domain_page(l1e); 17.1939 - } 17.1940 -#if CONFIG_PAGING_LEVELS >= 3 17.1941 - sh_unmap_domain_page(l2e); 17.1942 - } 17.1943 -#if CONFIG_PAGING_LEVELS >= 4 17.1944 - sh_unmap_domain_page(l3e); 17.1945 - } 17.1946 -#endif 17.1947 -#endif 17.1948 - 17.1949 -#if CONFIG_PAGING_LEVELS == 4 17.1950 - sh_unmap_domain_page(l4e); 17.1951 -#elif CONFIG_PAGING_LEVELS == 3 17.1952 - sh_unmap_domain_page(l3e); 17.1953 -#else /* CONFIG_PAGING_LEVELS == 2 */ 17.1954 - sh_unmap_domain_page(l2e); 17.1955 -#endif 17.1956 - 17.1957 - } 17.1958 - 17.1959 - //SHADOW_PRINTK("p2m audit complete\n"); 17.1960 - //if ( orphans_i | orphans_d | mpbad | pmbad ) 17.1961 - // SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", 17.1962 - // orphans_i + orphans_d, orphans_i, orphans_d, 17.1963 - if ( mpbad | pmbad ) 17.1964 - SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", 17.1965 - pmbad, mpbad); 17.1966 -} 17.1967 - 17.1968 -#endif /* p2m audit */ 17.1969 - 17.1970 /* 17.1971 * Local variables: 17.1972 * mode: C
18.1 --- a/xen/arch/x86/mm/shadow/multi.c Tue Feb 13 15:32:25 2007 +0000 18.2 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Feb 14 12:02:20 2007 +0000 18.3 @@ -353,21 +353,21 @@ static inline void print_gw(walk_t *gw) 18.4 SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va); 18.5 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ 18.6 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ 18.7 - SHADOW_PRINTK(" l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn)); 18.8 + SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn)); 18.9 SHADOW_PRINTK(" l4e=%p\n", gw->l4e); 18.10 if ( gw->l4e ) 18.11 SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4); 18.12 #endif /* PAE or 64... */ 18.13 - SHADOW_PRINTK(" l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn)); 18.14 + SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn)); 18.15 SHADOW_PRINTK(" l3e=%p\n", gw->l3e); 18.16 if ( gw->l3e ) 18.17 SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3); 18.18 #endif /* All levels... */ 18.19 - SHADOW_PRINTK(" l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn)); 18.20 + SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn)); 18.21 SHADOW_PRINTK(" l2e=%p\n", gw->l2e); 18.22 if ( gw->l2e ) 18.23 SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2); 18.24 - SHADOW_PRINTK(" l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn)); 18.25 + SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn)); 18.26 SHADOW_PRINTK(" l1e=%p\n", gw->l1e); 18.27 if ( gw->l1e ) 18.28 SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1); 18.29 @@ -1572,7 +1572,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 18.30 #if GUEST_PAGING_LEVELS == 4 18.31 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 18.32 if ( shadow_type == SH_type_l4_64_shadow && 18.33 - unlikely(v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) 18.34 + unlikely(v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) 18.35 { 18.36 /* We're shadowing a new l4, but we've been assuming the guest uses 18.37 * only one l4 per vcpu and context switches using an l4 entry. 18.38 @@ -1584,7 +1584,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 18.39 struct shadow_page_info *sp; 18.40 struct vcpu *v2; 18.41 int l4count = 0, vcpus = 0; 18.42 - list_for_each(l, &v->domain->arch.shadow.pinned_shadows) 18.43 + list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows) 18.44 { 18.45 sp = list_entry(l, struct shadow_page_info, list); 18.46 if ( sp->type == SH_type_l4_64_shadow ) 18.47 @@ -1595,13 +1595,13 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 18.48 if ( l4count > 2 * vcpus ) 18.49 { 18.50 /* Unpin all the pinned l3 tables, and don't pin any more. */ 18.51 - list_for_each_safe(l, t, &v->domain->arch.shadow.pinned_shadows) 18.52 + list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows) 18.53 { 18.54 sp = list_entry(l, struct shadow_page_info, list); 18.55 if ( sp->type == SH_type_l3_64_shadow ) 18.56 sh_unpin(v, shadow_page_to_mfn(sp)); 18.57 } 18.58 - v->domain->arch.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; 18.59 + v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; 18.60 } 18.61 } 18.62 #endif 18.63 @@ -1641,7 +1641,7 @@ make_fl1_shadow(struct vcpu *v, gfn_t gf 18.64 mfn_t smfn = shadow_alloc(v->domain, SH_type_fl1_shadow, 18.65 (unsigned long) gfn_x(gfn)); 18.66 18.67 - SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n", 18.68 + SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n", 18.69 gfn_x(gfn), mfn_x(smfn)); 18.70 18.71 set_fl1_shadow_status(v, gfn, smfn); 18.72 @@ -1851,7 +1851,7 @@ static shadow_l2e_t * shadow_get_and_cre 18.73 #elif GUEST_PAGING_LEVELS == 3 /* PAE... */ 18.74 /* We never demand-shadow PAE l3es: they are only created in 18.75 * sh_update_cr3(). Check if the relevant sl3e is present. */ 18.76 - shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) 18.77 + shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table) 18.78 + shadow_l3_linear_offset(gw->va); 18.79 if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) 18.80 return NULL; 18.81 @@ -2358,7 +2358,7 @@ static int validate_gl1e(struct vcpu *v, 18.82 gfn = guest_l1e_get_gfn(*new_gl1e); 18.83 gmfn = vcpu_gfn_to_mfn(v, gfn); 18.84 18.85 - mmio = (is_hvm_vcpu(v) && shadow_vcpu_mode_translate(v) && !mfn_valid(gmfn)); 18.86 + mmio = (is_hvm_vcpu(v) && paging_vcpu_mode_translate(v) && !mfn_valid(gmfn)); 18.87 l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 18.88 ft_prefetch, mmio); 18.89 18.90 @@ -2506,7 +2506,7 @@ sh_map_and_validate_gl1e(struct vcpu *v, 18.91 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn) 18.92 { 18.93 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW 18.94 - if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) && 18.95 + if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) && 18.96 sh_mfn_is_a_page_table(gmfn) ) 18.97 { 18.98 u32 flags = mfn_to_page(gmfn)->shadow_flags; 18.99 @@ -2516,7 +2516,7 @@ static inline void check_for_early_unsha 18.100 sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); 18.101 } 18.102 } 18.103 - v->arch.shadow.last_emulated_mfn = mfn_x(gmfn); 18.104 + v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn); 18.105 #endif 18.106 } 18.107 18.108 @@ -2524,7 +2524,7 @@ static inline void check_for_early_unsha 18.109 static inline void reset_early_unshadow(struct vcpu *v) 18.110 { 18.111 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW 18.112 - v->arch.shadow.last_emulated_mfn = INVALID_MFN; 18.113 + v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN; 18.114 #endif 18.115 } 18.116 18.117 @@ -2589,7 +2589,7 @@ static void sh_prefetch(struct vcpu *v, 18.118 gfn = guest_l1e_get_gfn(gl1e); 18.119 gmfn = vcpu_gfn_to_mfn(v, gfn); 18.120 mmio = ( is_hvm_vcpu(v) 18.121 - && shadow_vcpu_mode_translate(v) 18.122 + && paging_vcpu_mode_translate(v) 18.123 && mmio_space(gfn_to_paddr(gfn)) ); 18.124 18.125 /* Propagate the entry. Safe to use a pointer to our local 18.126 @@ -2631,6 +2631,7 @@ static int sh_page_fault(struct vcpu *v, 18.127 SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", 18.128 v->domain->domain_id, v->vcpu_id, va, regs->error_code); 18.129 18.130 + perfc_incrc(shadow_fault); 18.131 // 18.132 // XXX: Need to think about eventually mapping superpages directly in the 18.133 // shadow (when possible), as opposed to splintering them into a 18.134 @@ -2651,7 +2652,7 @@ static int sh_page_fault(struct vcpu *v, 18.135 if ( sh_l1e_is_gnp(sl1e) ) 18.136 { 18.137 if ( likely(!is_hvm_domain(d) || 18.138 - shadow_vcpu_mode_translate(v)) ) 18.139 + paging_vcpu_mode_translate(v)) ) 18.140 { 18.141 /* Not-present in a guest PT: pass to the guest as 18.142 * a not-present fault (by flipping two bits). */ 18.143 @@ -2701,7 +2702,7 @@ static int sh_page_fault(struct vcpu *v, 18.144 if ( unlikely(shadow_locked_by_me(d)) ) 18.145 { 18.146 SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n", 18.147 - d->arch.shadow.locker_function); 18.148 + d->arch.paging.shadow.locker_function); 18.149 return 0; 18.150 } 18.151 18.152 @@ -2726,7 +2727,7 @@ static int sh_page_fault(struct vcpu *v, 18.153 // 18.154 if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) ) 18.155 { 18.156 - if ( is_hvm_domain(d) && !shadow_vcpu_mode_translate(v) ) 18.157 + if ( is_hvm_domain(d) && !paging_vcpu_mode_translate(v) ) 18.158 { 18.159 /* Not present in p2m map, means this is mmio */ 18.160 gpa = va; 18.161 @@ -2784,13 +2785,13 @@ static int sh_page_fault(struct vcpu *v, 18.162 gfn = guest_l1e_get_gfn(gw.eff_l1e); 18.163 gmfn = vcpu_gfn_to_mfn(v, gfn); 18.164 mmio = (is_hvm_domain(d) 18.165 - && shadow_vcpu_mode_translate(v) 18.166 + && paging_vcpu_mode_translate(v) 18.167 && mmio_space(gfn_to_paddr(gfn))); 18.168 18.169 if ( !mmio && !mfn_valid(gmfn) ) 18.170 { 18.171 perfc_incrc(shadow_fault_bail_bad_gfn); 18.172 - SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 18.173 + SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 18.174 gfn_x(gfn), mfn_x(gmfn)); 18.175 goto not_a_shadow_fault; 18.176 } 18.177 @@ -2848,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v, 18.178 } 18.179 18.180 perfc_incrc(shadow_fault_fixed); 18.181 - d->arch.shadow.fault_count++; 18.182 + d->arch.paging.shadow.fault_count++; 18.183 reset_early_unshadow(v); 18.184 18.185 done: 18.186 @@ -2949,7 +2950,7 @@ sh_invlpg(struct vcpu *v, unsigned long 18.187 return 0; 18.188 } 18.189 #elif SHADOW_PAGING_LEVELS == 3 18.190 - if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)]) 18.191 + if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)]) 18.192 & _PAGE_PRESENT) ) 18.193 // no need to flush anything if there's no SL2... 18.194 return 0; 18.195 @@ -3120,7 +3121,7 @@ sh_update_linear_entries(struct vcpu *v) 18.196 } 18.197 18.198 /* Shadow l3 tables are made up by sh_update_cr3 */ 18.199 - sl3e = v->arch.shadow.l3table; 18.200 + sl3e = v->arch.paging.shadow.l3table; 18.201 18.202 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) 18.203 { 18.204 @@ -3161,13 +3162,13 @@ sh_update_linear_entries(struct vcpu *v) 18.205 #if GUEST_PAGING_LEVELS == 2 18.206 /* Shadow l3 tables were built by sh_update_cr3 */ 18.207 if ( shadow_mode_external(d) ) 18.208 - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; 18.209 + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; 18.210 else 18.211 BUG(); /* PV 2-on-3 is not supported yet */ 18.212 18.213 #else /* GUEST_PAGING_LEVELS == 3 */ 18.214 18.215 - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; 18.216 + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; 18.217 /* Always safe to use guest_vtable, because it's globally mapped */ 18.218 guest_l3e = v->arch.guest_vtable; 18.219 18.220 @@ -3370,7 +3371,7 @@ sh_set_toplevel_shadow(struct vcpu *v, 18.221 18.222 install_new_entry: 18.223 /* Done. Install it */ 18.224 - SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n", 18.225 + SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", 18.226 GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot, 18.227 mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); 18.228 v->arch.shadow_table[slot] = new_entry; 18.229 @@ -3410,7 +3411,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.230 if ( do_locking ) shadow_lock(v->domain); 18.231 18.232 ASSERT(shadow_locked_by_me(v->domain)); 18.233 - ASSERT(v->arch.shadow.mode); 18.234 + ASSERT(v->arch.paging.mode); 18.235 18.236 //// 18.237 //// vcpu->arch.guest_table is already set 18.238 @@ -3425,7 +3426,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.239 ASSERT(shadow_mode_external(d)); 18.240 18.241 // Is paging enabled on this vcpu? 18.242 - if ( shadow_vcpu_mode_translate(v) ) 18.243 + if ( paging_vcpu_mode_translate(v) ) 18.244 { 18.245 gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3))); 18.246 gmfn = vcpu_gfn_to_mfn(v, gfn); 18.247 @@ -3472,7 +3473,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.248 sh_unmap_domain_page_global(v->arch.guest_vtable); 18.249 if ( shadow_mode_external(d) ) 18.250 { 18.251 - if ( shadow_vcpu_mode_translate(v) ) 18.252 + if ( paging_vcpu_mode_translate(v) ) 18.253 /* Paging enabled: find where in the page the l3 table is */ 18.254 guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); 18.255 else 18.256 @@ -3566,7 +3567,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.257 #endif 18.258 18.259 /// 18.260 - /// v->arch.shadow.l3table 18.261 + /// v->arch.paging.shadow.l3table 18.262 /// 18.263 #if SHADOW_PAGING_LEVELS == 3 18.264 { 18.265 @@ -3581,7 +3582,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.266 /* 3-on-3: make a PAE l3 that points at the four l2 pages */ 18.267 smfn = pagetable_get_mfn(v->arch.shadow_table[i]); 18.268 #endif 18.269 - v->arch.shadow.l3table[i] = 18.270 + v->arch.paging.shadow.l3table[i] = 18.271 (mfn_x(smfn) == 0) 18.272 ? shadow_l3e_empty() 18.273 : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT); 18.274 @@ -3605,8 +3606,8 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.275 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated. 18.276 * Don't use make_cr3 because (a) we know it's below 4GB, and 18.277 * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */ 18.278 - ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL); 18.279 - v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table); 18.280 + ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL); 18.281 + v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table); 18.282 #else 18.283 /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ 18.284 make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0])); 18.285 @@ -3622,7 +3623,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.286 ASSERT(is_hvm_domain(d)); 18.287 #if SHADOW_PAGING_LEVELS == 3 18.288 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ 18.289 - hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.shadow.l3table)); 18.290 + hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.paging.shadow.l3table)); 18.291 #else 18.292 /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ 18.293 hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.shadow_table[0])); 18.294 @@ -3665,7 +3666,7 @@ static int sh_guess_wrmap(struct vcpu *v 18.295 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) 18.296 return 0; 18.297 #elif SHADOW_PAGING_LEVELS == 3 18.298 - sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) 18.299 + sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table) 18.300 + shadow_l3_linear_offset(vaddr); 18.301 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) 18.302 return 0; 18.303 @@ -3709,7 +3710,7 @@ int sh_rm_write_access_from_l1(struct vc 18.304 (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn); 18.305 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 18.306 /* Remember the last shadow that we shot a writeable mapping in */ 18.307 - v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); 18.308 + v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); 18.309 #endif 18.310 if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info 18.311 & PGT_count_mask) == 0 ) 18.312 @@ -4050,8 +4051,8 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, 18.313 18.314 #define AUDIT_FAIL(_level, _fmt, _a...) do { \ 18.315 printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \ 18.316 - "gl" #_level "mfn = %" SH_PRI_mfn \ 18.317 - " sl" #_level "mfn = %" SH_PRI_mfn \ 18.318 + "gl" #_level "mfn = %" PRI_mfn \ 18.319 + " sl" #_level "mfn = %" PRI_mfn \ 18.320 " &gl" #_level "e = %p &sl" #_level "e = %p" \ 18.321 " gl" #_level "e = %" SH_PRI_gpte \ 18.322 " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \ 18.323 @@ -4105,7 +4106,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g 18.324 != PGT_writable_page ) 18.325 return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ 18.326 else 18.327 - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); 18.328 + return gfn_to_mfn(v->domain, gfn_x(gfn)); 18.329 } 18.330 18.331 18.332 @@ -4156,7 +4157,7 @@ int sh_audit_l1_table(struct vcpu *v, mf 18.333 gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); 18.334 if ( mfn_x(gmfn) != mfn_x(mfn) ) 18.335 AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn 18.336 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 18.337 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 18.338 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 18.339 } 18.340 } 18.341 @@ -4219,8 +4220,8 @@ int sh_audit_l2_table(struct vcpu *v, mf 18.342 SH_type_l1_shadow); 18.343 if ( mfn_x(gmfn) != mfn_x(mfn) ) 18.344 AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn 18.345 - " (--> %" SH_PRI_mfn ")" 18.346 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 18.347 + " (--> %" PRI_mfn ")" 18.348 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 18.349 gfn_x(gfn), 18.350 (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 18.351 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)), 18.352 @@ -4262,7 +4263,7 @@ int sh_audit_l3_table(struct vcpu *v, mf 18.353 : SH_type_l2_shadow); 18.354 if ( mfn_x(gmfn) != mfn_x(mfn) ) 18.355 AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn 18.356 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 18.357 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 18.358 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 18.359 } 18.360 }); 18.361 @@ -4297,7 +4298,7 @@ int sh_audit_l4_table(struct vcpu *v, mf 18.362 SH_type_l3_shadow); 18.363 if ( mfn_x(gmfn) != mfn_x(mfn) ) 18.364 AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn 18.365 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 18.366 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 18.367 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 18.368 } 18.369 }); 18.370 @@ -4314,30 +4315,29 @@ int sh_audit_l4_table(struct vcpu *v, mf 18.371 /**************************************************************************/ 18.372 /* Entry points into this mode of the shadow code. 18.373 * This will all be mangled by the preprocessor to uniquify everything. */ 18.374 -struct shadow_paging_mode sh_paging_mode = { 18.375 - .page_fault = sh_page_fault, 18.376 - .invlpg = sh_invlpg, 18.377 - .gva_to_gpa = sh_gva_to_gpa, 18.378 - .gva_to_gfn = sh_gva_to_gfn, 18.379 - .update_cr3 = sh_update_cr3, 18.380 - .map_and_validate_gl1e = sh_map_and_validate_gl1e, 18.381 - .map_and_validate_gl2e = sh_map_and_validate_gl2e, 18.382 - .map_and_validate_gl2he = sh_map_and_validate_gl2he, 18.383 - .map_and_validate_gl3e = sh_map_and_validate_gl3e, 18.384 - .map_and_validate_gl4e = sh_map_and_validate_gl4e, 18.385 - .detach_old_tables = sh_detach_old_tables, 18.386 - .x86_emulate_write = sh_x86_emulate_write, 18.387 - .x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, 18.388 - .x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, 18.389 - .make_monitor_table = sh_make_monitor_table, 18.390 - .destroy_monitor_table = sh_destroy_monitor_table, 18.391 - .guest_map_l1e = sh_guest_map_l1e, 18.392 - .guest_get_eff_l1e = sh_guest_get_eff_l1e, 18.393 +struct paging_mode sh_paging_mode = { 18.394 + .page_fault = sh_page_fault, 18.395 + .invlpg = sh_invlpg, 18.396 + .gva_to_gpa = sh_gva_to_gpa, 18.397 + .gva_to_gfn = sh_gva_to_gfn, 18.398 + .update_cr3 = sh_update_cr3, 18.399 + .update_paging_modes = shadow_update_paging_modes, 18.400 + .write_p2m_entry = shadow_write_p2m_entry, 18.401 + .write_guest_entry = shadow_write_guest_entry, 18.402 + .cmpxchg_guest_entry = shadow_cmpxchg_guest_entry, 18.403 + .guest_map_l1e = sh_guest_map_l1e, 18.404 + .guest_get_eff_l1e = sh_guest_get_eff_l1e, 18.405 + .guest_levels = GUEST_PAGING_LEVELS, 18.406 + .shadow.detach_old_tables = sh_detach_old_tables, 18.407 + .shadow.x86_emulate_write = sh_x86_emulate_write, 18.408 + .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, 18.409 + .shadow.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, 18.410 + .shadow.make_monitor_table = sh_make_monitor_table, 18.411 + .shadow.destroy_monitor_table = sh_destroy_monitor_table, 18.412 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 18.413 - .guess_wrmap = sh_guess_wrmap, 18.414 + .shadow.guess_wrmap = sh_guess_wrmap, 18.415 #endif 18.416 - .guest_levels = GUEST_PAGING_LEVELS, 18.417 - .shadow_levels = SHADOW_PAGING_LEVELS, 18.418 + .shadow.shadow_levels = SHADOW_PAGING_LEVELS, 18.419 }; 18.420 18.421 /*
19.1 --- a/xen/arch/x86/mm/shadow/multi.h Tue Feb 13 15:32:25 2007 +0000 19.2 +++ b/xen/arch/x86/mm/shadow/multi.h Wed Feb 14 12:02:20 2007 +0000 19.3 @@ -115,5 +115,5 @@ SHADOW_INTERNAL_NAME(sh_destroy_monitor_ 19.4 (struct vcpu *v, mfn_t mmfn); 19.5 #endif 19.6 19.7 -extern struct shadow_paging_mode 19.8 +extern struct paging_mode 19.9 SHADOW_INTERNAL_NAME(sh_paging_mode, SHADOW_LEVELS, GUEST_LEVELS);
20.1 --- a/xen/arch/x86/mm/shadow/private.h Tue Feb 13 15:32:25 2007 +0000 20.2 +++ b/xen/arch/x86/mm/shadow/private.h Wed Feb 14 12:02:20 2007 +0000 20.3 @@ -41,13 +41,12 @@ 20.4 #define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ 20.5 #define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ 20.6 #define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ 20.7 -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ 20.8 20.9 #ifdef NDEBUG 20.10 #define SHADOW_AUDIT 0 20.11 #define SHADOW_AUDIT_ENABLE 0 20.12 #else 20.13 -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ 20.14 +#define SHADOW_AUDIT 0x15 /* Basic audit of all */ 20.15 #define SHADOW_AUDIT_ENABLE shadow_audit_enable 20.16 extern int shadow_audit_enable; 20.17 #endif 20.18 @@ -84,9 +83,9 @@ extern int shadow_audit_enable; 20.19 #define SHADOW_DEBUG_PROPAGATE 1 20.20 #define SHADOW_DEBUG_MAKE_SHADOW 1 20.21 #define SHADOW_DEBUG_DESTROY_SHADOW 1 20.22 -#define SHADOW_DEBUG_P2M 0 20.23 #define SHADOW_DEBUG_A_AND_D 1 20.24 #define SHADOW_DEBUG_EMULATE 1 20.25 +#define SHADOW_DEBUG_P2M 1 20.26 #define SHADOW_DEBUG_LOGDIRTY 0 20.27 20.28 /****************************************************************************** 20.29 @@ -108,36 +107,36 @@ extern int shadow_audit_enable; 20.30 #error shadow.h currently requires CONFIG_SMP 20.31 #endif 20.32 20.33 -#define shadow_lock_init(_d) \ 20.34 - do { \ 20.35 - spin_lock_init(&(_d)->arch.shadow.lock); \ 20.36 - (_d)->arch.shadow.locker = -1; \ 20.37 - (_d)->arch.shadow.locker_function = "nobody"; \ 20.38 +#define shadow_lock_init(_d) \ 20.39 + do { \ 20.40 + spin_lock_init(&(_d)->arch.paging.shadow.lock); \ 20.41 + (_d)->arch.paging.shadow.locker = -1; \ 20.42 + (_d)->arch.paging.shadow.locker_function = "nobody"; \ 20.43 } while (0) 20.44 20.45 #define shadow_locked_by_me(_d) \ 20.46 - (current->processor == (_d)->arch.shadow.locker) 20.47 + (current->processor == (_d)->arch.paging.shadow.locker) 20.48 20.49 -#define shadow_lock(_d) \ 20.50 - do { \ 20.51 - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ 20.52 - { \ 20.53 - printk("Error: shadow lock held by %s\n", \ 20.54 - (_d)->arch.shadow.locker_function); \ 20.55 - BUG(); \ 20.56 - } \ 20.57 - spin_lock(&(_d)->arch.shadow.lock); \ 20.58 - ASSERT((_d)->arch.shadow.locker == -1); \ 20.59 - (_d)->arch.shadow.locker = current->processor; \ 20.60 - (_d)->arch.shadow.locker_function = __func__; \ 20.61 +#define shadow_lock(_d) \ 20.62 + do { \ 20.63 + if ( unlikely((_d)->arch.paging.shadow.locker == current->processor) )\ 20.64 + { \ 20.65 + printk("Error: shadow lock held by %s\n", \ 20.66 + (_d)->arch.paging.shadow.locker_function); \ 20.67 + BUG(); \ 20.68 + } \ 20.69 + spin_lock(&(_d)->arch.paging.shadow.lock); \ 20.70 + ASSERT((_d)->arch.paging.shadow.locker == -1); \ 20.71 + (_d)->arch.paging.shadow.locker = current->processor; \ 20.72 + (_d)->arch.paging.shadow.locker_function = __func__; \ 20.73 } while (0) 20.74 20.75 -#define shadow_unlock(_d) \ 20.76 - do { \ 20.77 - ASSERT((_d)->arch.shadow.locker == current->processor); \ 20.78 - (_d)->arch.shadow.locker = -1; \ 20.79 - (_d)->arch.shadow.locker_function = "nobody"; \ 20.80 - spin_unlock(&(_d)->arch.shadow.lock); \ 20.81 +#define shadow_unlock(_d) \ 20.82 + do { \ 20.83 + ASSERT((_d)->arch.paging.shadow.locker == current->processor); \ 20.84 + (_d)->arch.paging.shadow.locker = -1; \ 20.85 + (_d)->arch.paging.shadow.locker_function = "nobody"; \ 20.86 + spin_unlock(&(_d)->arch.paging.shadow.lock); \ 20.87 } while (0) 20.88 20.89 20.90 @@ -152,13 +151,6 @@ extern void shadow_audit_tables(struct v 20.91 #define shadow_audit_tables(_v) do {} while(0) 20.92 #endif 20.93 20.94 -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M 20.95 -extern void shadow_audit_p2m(struct domain *d); 20.96 -#else 20.97 -#define shadow_audit_p2m(_d) do {} while(0) 20.98 -#endif 20.99 - 20.100 - 20.101 /****************************************************************************** 20.102 * Macro for dealing with the naming of the internal names of the 20.103 * shadow code's external entry points. 20.104 @@ -304,7 +296,7 @@ static inline int sh_type_is_pinnable(st 20.105 * page. When we're shadowing those kernels, we have to pin l3 20.106 * shadows so they don't just evaporate on every context switch. 20.107 * For all other guests, we'd rather use the up-pointer field in l3s. */ 20.108 - if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) 20.109 + if ( unlikely((v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) 20.110 && CONFIG_PAGING_LEVELS >= 4 20.111 && t == SH_type_l3_64_shadow) ) 20.112 return 1; 20.113 @@ -379,12 +371,11 @@ void sh_install_xen_entries_in_l2h(struc 20.114 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); 20.115 20.116 /* Update the shadows in response to a pagetable write from Xen */ 20.117 -extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 20.118 - void *entry, u32 size); 20.119 +int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size); 20.120 20.121 /* Update the shadows in response to a pagetable write from a HVM guest */ 20.122 -extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 20.123 - void *entry, u32 size); 20.124 +void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 20.125 + void *entry, u32 size); 20.126 20.127 /* Remove all writeable mappings of a guest frame from the shadows. 20.128 * Returns non-zero if we need to flush TLBs. 20.129 @@ -394,6 +385,21 @@ extern int sh_remove_write_access(struct 20.130 unsigned int level, 20.131 unsigned long fault_addr); 20.132 20.133 +/* Allocate/free functions for passing to the P2M code. */ 20.134 +struct page_info *shadow_alloc_p2m_page(struct domain *d); 20.135 +void shadow_free_p2m_page(struct domain *d, struct page_info *pg); 20.136 + 20.137 +/* Functions that atomically write PT/P2M entries and update state */ 20.138 +void shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, 20.139 + l1_pgentry_t *p, l1_pgentry_t new, 20.140 + unsigned int level); 20.141 +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, 20.142 + intpte_t new, mfn_t gmfn); 20.143 +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, 20.144 + intpte_t *old, intpte_t new, mfn_t gmfn); 20.145 + 20.146 + 20.147 + 20.148 /****************************************************************************** 20.149 * Flags used in the return value of the shadow_set_lXe() functions... 20.150 */ 20.151 @@ -477,19 +483,6 @@ sh_unmap_domain_page_global(void *p) 20.152 unmap_domain_page_global(p); 20.153 } 20.154 20.155 -static inline mfn_t 20.156 -pagetable_get_mfn(pagetable_t pt) 20.157 -{ 20.158 - return _mfn(pagetable_get_pfn(pt)); 20.159 -} 20.160 - 20.161 -static inline pagetable_t 20.162 -pagetable_from_mfn(mfn_t mfn) 20.163 -{ 20.164 - return pagetable_from_pfn(mfn_x(mfn)); 20.165 -} 20.166 - 20.167 - 20.168 /****************************************************************************** 20.169 * Log-dirty mode bitmap handling 20.170 */ 20.171 @@ -502,13 +495,13 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 20.172 { 20.173 unsigned long pfn; 20.174 ASSERT(shadow_mode_log_dirty(d)); 20.175 - ASSERT(d->arch.shadow.dirty_bitmap != NULL); 20.176 + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); 20.177 20.178 /* We /really/ mean PFN here, even for non-translated guests. */ 20.179 pfn = get_gpfn_from_mfn(mfn_x(gmfn)); 20.180 if ( likely(VALID_M2P(pfn)) 20.181 - && likely(pfn < d->arch.shadow.dirty_bitmap_size) 20.182 - && test_bit(pfn, d->arch.shadow.dirty_bitmap) ) 20.183 + && likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) 20.184 + && test_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) 20.185 return 1; 20.186 20.187 return 0; 20.188 @@ -612,7 +605,7 @@ static inline int sh_pin(struct vcpu *v, 20.189 sp->pinned = 1; 20.190 } 20.191 /* Put it at the head of the list of pinned shadows */ 20.192 - list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows); 20.193 + list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows); 20.194 return 1; 20.195 } 20.196
21.1 --- a/xen/arch/x86/mm/shadow/types.h Tue Feb 13 15:32:25 2007 +0000 21.2 +++ b/xen/arch/x86/mm/shadow/types.h Wed Feb 14 12:02:20 2007 +0000 21.3 @@ -414,15 +414,9 @@ valid_gfn(gfn_t m) 21.4 static inline mfn_t 21.5 vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn) 21.6 { 21.7 - if ( !shadow_vcpu_mode_translate(v) ) 21.8 + if ( !paging_vcpu_mode_translate(v) ) 21.9 return _mfn(gfn_x(gfn)); 21.10 - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); 21.11 -} 21.12 - 21.13 -static inline gfn_t 21.14 -mfn_to_gfn(struct domain *d, mfn_t mfn) 21.15 -{ 21.16 - return _gfn(sh_mfn_to_gfn(d, mfn)); 21.17 + return gfn_to_mfn(v->domain, gfn_x(gfn)); 21.18 } 21.19 21.20 static inline paddr_t
22.1 --- a/xen/arch/x86/setup.c Tue Feb 13 15:32:25 2007 +0000 22.2 +++ b/xen/arch/x86/setup.c Wed Feb 14 12:02:20 2007 +0000 22.3 @@ -29,7 +29,7 @@ 22.4 #include <asm/mpspec.h> 22.5 #include <asm/apic.h> 22.6 #include <asm/desc.h> 22.7 -#include <asm/shadow.h> 22.8 +#include <asm/paging.h> 22.9 #include <asm/e820.h> 22.10 #include <acm/acm_hooks.h> 22.11 #include <xen/kexec.h>
23.1 --- a/xen/arch/x86/sysctl.c Tue Feb 13 15:32:25 2007 +0000 23.2 +++ b/xen/arch/x86/sysctl.c Wed Feb 14 12:02:20 2007 +0000 23.3 @@ -19,7 +19,6 @@ 23.4 #include <xen/trace.h> 23.5 #include <xen/console.h> 23.6 #include <xen/iocap.h> 23.7 -#include <asm/shadow.h> 23.8 #include <asm/irq.h> 23.9 #include <asm/hvm/hvm.h> 23.10 #include <asm/hvm/support.h>
24.1 --- a/xen/arch/x86/traps.c Tue Feb 13 15:32:25 2007 +0000 24.2 +++ b/xen/arch/x86/traps.c Wed Feb 14 12:02:20 2007 +0000 24.3 @@ -46,7 +46,7 @@ 24.4 #include <xen/nmi.h> 24.5 #include <xen/version.h> 24.6 #include <xen/kexec.h> 24.7 -#include <asm/shadow.h> 24.8 +#include <asm/paging.h> 24.9 #include <asm/system.h> 24.10 #include <asm/io.h> 24.11 #include <asm/atomic.h> 24.12 @@ -860,8 +860,8 @@ static int fixup_page_fault(unsigned lon 24.13 24.14 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) 24.15 { 24.16 - if ( shadow_mode_external(d) && guest_mode(regs) ) 24.17 - return shadow_fault(addr, regs); 24.18 + if ( paging_mode_external(d) && guest_mode(regs) ) 24.19 + return paging_fault(addr, regs); 24.20 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) 24.21 return handle_gdt_ldt_mapping_fault( 24.22 addr - GDT_LDT_VIRT_START, regs); 24.23 @@ -876,8 +876,8 @@ static int fixup_page_fault(unsigned lon 24.24 ptwr_do_page_fault(v, addr, regs) ) 24.25 return EXCRET_fault_fixed; 24.26 24.27 - if ( shadow_mode_enabled(d) ) 24.28 - return shadow_fault(addr, regs); 24.29 + if ( paging_mode_enabled(d) ) 24.30 + return paging_fault(addr, regs); 24.31 24.32 return 0; 24.33 }
25.1 --- a/xen/arch/x86/x86_32/domain_page.c Tue Feb 13 15:32:25 2007 +0000 25.2 +++ b/xen/arch/x86/x86_32/domain_page.c Wed Feb 14 12:02:20 2007 +0000 25.3 @@ -11,7 +11,6 @@ 25.4 #include <xen/mm.h> 25.5 #include <xen/perfc.h> 25.6 #include <xen/domain_page.h> 25.7 -#include <xen/shadow.h> 25.8 #include <asm/current.h> 25.9 #include <asm/flushtlb.h> 25.10 #include <asm/hardirq.h>
26.1 --- a/xen/arch/x86/x86_64/traps.c Tue Feb 13 15:32:25 2007 +0000 26.2 +++ b/xen/arch/x86/x86_64/traps.c Wed Feb 14 12:02:20 2007 +0000 26.3 @@ -16,7 +16,6 @@ 26.4 #include <asm/flushtlb.h> 26.5 #include <asm/msr.h> 26.6 #include <asm/page.h> 26.7 -#include <asm/shadow.h> 26.8 #include <asm/shared.h> 26.9 #include <asm/hvm/hvm.h> 26.10 #include <asm/hvm/support.h>
27.1 --- a/xen/include/asm-x86/domain.h Tue Feb 13 15:32:25 2007 +0000 27.2 +++ b/xen/include/asm-x86/domain.h Wed Feb 14 12:02:20 2007 +0000 27.3 @@ -58,19 +58,22 @@ extern void toggle_guest_mode(struct vcp 27.4 */ 27.5 extern void hypercall_page_initialise(struct domain *d, void *); 27.6 27.7 +/************************************************/ 27.8 +/* shadow paging extension */ 27.9 +/************************************************/ 27.10 struct shadow_domain { 27.11 - u32 mode; /* flags to control shadow operation */ 27.12 spinlock_t lock; /* shadow domain lock */ 27.13 int locker; /* processor which holds the lock */ 27.14 const char *locker_function; /* Func that took it */ 27.15 + unsigned int opt_flags; /* runtime tunable optimizations on/off */ 27.16 + struct list_head pinned_shadows; 27.17 + 27.18 + /* Memory allocation */ 27.19 struct list_head freelists[SHADOW_MAX_ORDER + 1]; 27.20 struct list_head p2m_freelist; 27.21 - struct list_head p2m_inuse; 27.22 - struct list_head pinned_shadows; 27.23 unsigned int total_pages; /* number of pages allocated */ 27.24 unsigned int free_pages; /* number of pages on freelists */ 27.25 - unsigned int p2m_pages; /* number of pages in p2m map */ 27.26 - unsigned int opt_flags; /* runtime tunable optimizations on/off */ 27.27 + unsigned int p2m_pages; /* number of pages allocates to p2m */ 27.28 27.29 /* Shadow hashtable */ 27.30 struct shadow_page_info **hash_table; 27.31 @@ -85,6 +88,61 @@ struct shadow_domain { 27.32 unsigned int dirty_count; 27.33 }; 27.34 27.35 +struct shadow_vcpu { 27.36 +#if CONFIG_PAGING_LEVELS >= 3 27.37 + /* PAE guests: per-vcpu shadow top-level table */ 27.38 + l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); 27.39 +#endif 27.40 + /* Last MFN that we emulated a write to. */ 27.41 + unsigned long last_emulated_mfn; 27.42 + /* MFN of the last shadow that we shot a writeable mapping in */ 27.43 + unsigned long last_writeable_pte_smfn; 27.44 +}; 27.45 + 27.46 +/************************************************/ 27.47 +/* p2m handling */ 27.48 +/************************************************/ 27.49 + 27.50 +struct p2m_domain { 27.51 + /* Lock that protects updates to the p2m */ 27.52 + spinlock_t lock; 27.53 + int locker; /* processor which holds the lock */ 27.54 + const char *locker_function; /* Func that took it */ 27.55 + 27.56 + /* Pages used to construct the p2m */ 27.57 + struct list_head pages; 27.58 + 27.59 + /* Functions to call to get or free pages for the p2m */ 27.60 + struct page_info * (*alloc_page )(struct domain *d); 27.61 + void (*free_page )(struct domain *d, 27.62 + struct page_info *pg); 27.63 + 27.64 + /* Highest guest frame that's ever been mapped in the p2m */ 27.65 + unsigned long max_mapped_pfn; 27.66 +}; 27.67 + 27.68 +/************************************************/ 27.69 +/* common paging data structure */ 27.70 +/************************************************/ 27.71 +struct paging_domain { 27.72 + u32 mode; /* flags to control paging operation */ 27.73 + 27.74 + /* extension for shadow paging support */ 27.75 + struct shadow_domain shadow; 27.76 + 27.77 + /* Other paging assistance code will have structs here */ 27.78 +}; 27.79 + 27.80 +struct paging_vcpu { 27.81 + /* Pointers to mode-specific entry points. */ 27.82 + struct paging_mode *mode; 27.83 + /* HVM guest: paging enabled (CR0.PG)? */ 27.84 + unsigned int translate_enabled:1; 27.85 + 27.86 + /* paging support extension */ 27.87 + struct shadow_vcpu shadow; 27.88 +}; 27.89 + 27.90 struct arch_domain 27.91 { 27.92 l1_pgentry_t *mm_perdomain_pt; 27.93 @@ -108,12 +166,11 @@ struct arch_domain 27.94 27.95 struct hvm_domain hvm_domain; 27.96 27.97 - struct shadow_domain shadow; 27.98 + struct paging_domain paging; 27.99 + struct p2m_domain p2m ; 27.100 27.101 /* Shadow translated domain: P2M mapping */ 27.102 pagetable_t phys_table; 27.103 - /* Highest guest frame that's ever been mapped in the p2m */ 27.104 - unsigned long max_mapped_pfn; 27.105 27.106 /* Pseudophysical e820 map (XENMEM_memory_map). */ 27.107 struct e820entry e820[3]; 27.108 @@ -139,21 +196,6 @@ struct pae_l3_cache { }; 27.109 #define pae_l3_cache_init(c) ((void)0) 27.110 #endif 27.111 27.112 -struct shadow_vcpu { 27.113 -#if CONFIG_PAGING_LEVELS >= 3 27.114 - /* PAE guests: per-vcpu shadow top-level table */ 27.115 - l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); 27.116 -#endif 27.117 - /* Pointers to mode-specific entry points. */ 27.118 - struct shadow_paging_mode *mode; 27.119 - /* Last MFN that we emulated a write to. */ 27.120 - unsigned long last_emulated_mfn; 27.121 - /* MFN of the last shadow that we shot a writeable mapping in */ 27.122 - unsigned long last_writeable_pte_smfn; 27.123 - /* HVM guest: paging enabled (CR0.PG)? */ 27.124 - unsigned int translate_enabled:1; 27.125 -}; 27.126 - 27.127 struct arch_vcpu 27.128 { 27.129 /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */ 27.130 @@ -205,7 +247,7 @@ struct arch_vcpu 27.131 /* Current LDT details. */ 27.132 unsigned long shadow_ldt_mapcnt; 27.133 27.134 - struct shadow_vcpu shadow; 27.135 + struct paging_vcpu paging; 27.136 } __cacheline_aligned; 27.137 27.138 /* shorthands to improve code legibility */
28.1 --- a/xen/include/asm-x86/mm.h Tue Feb 13 15:32:25 2007 +0000 28.2 +++ b/xen/include/asm-x86/mm.h Wed Feb 14 12:02:20 2007 +0000 28.3 @@ -246,6 +246,64 @@ pae_copy_root(struct vcpu *v, l3_pgentry 28.4 28.5 int check_descriptor(const struct domain *, struct desc_struct *d); 28.6 28.7 + 28.8 +/****************************************************************************** 28.9 + * With shadow pagetables, the different kinds of address start 28.10 + * to get get confusing. 28.11 + * 28.12 + * Virtual addresses are what they usually are: the addresses that are used 28.13 + * to accessing memory while the guest is running. The MMU translates from 28.14 + * virtual addresses to machine addresses. 28.15 + * 28.16 + * (Pseudo-)physical addresses are the abstraction of physical memory the 28.17 + * guest uses for allocation and so forth. For the purposes of this code, 28.18 + * we can largely ignore them. 28.19 + * 28.20 + * Guest frame numbers (gfns) are the entries that the guest puts in its 28.21 + * pagetables. For normal paravirtual guests, they are actual frame numbers, 28.22 + * with the translation done by the guest. 28.23 + * 28.24 + * Machine frame numbers (mfns) are the entries that the hypervisor puts 28.25 + * in the shadow page tables. 28.26 + * 28.27 + * Elsewhere in the xen code base, the name "gmfn" is generally used to refer 28.28 + * to a "machine frame number, from the guest's perspective", or in other 28.29 + * words, pseudo-physical frame numbers. However, in the shadow code, the 28.30 + * term "gmfn" means "the mfn of a guest page"; this combines naturally with 28.31 + * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a 28.32 + * guest L2 page), etc... 28.33 + */ 28.34 + 28.35 +/* With this defined, we do some ugly things to force the compiler to 28.36 + * give us type safety between mfns and gfns and other integers. 28.37 + * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 28.38 + * that translate beween int and foo_t. 28.39 + * 28.40 + * It does have some performance cost because the types now have 28.41 + * a different storage attribute, so may not want it on all the time. */ 28.42 + 28.43 +#ifndef NDEBUG 28.44 +#define TYPE_SAFETY 1 28.45 +#endif 28.46 + 28.47 +#ifdef TYPE_SAFETY 28.48 +#define TYPE_SAFE(_type,_name) \ 28.49 +typedef struct { _type _name; } _name##_t; \ 28.50 +static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ 28.51 +static inline _type _name##_x(_name##_t n) { return n._name; } 28.52 +#else 28.53 +#define TYPE_SAFE(_type,_name) \ 28.54 +typedef _type _name##_t; \ 28.55 +static inline _name##_t _##_name(_type n) { return n; } \ 28.56 +static inline _type _name##_x(_name##_t n) { return n; } 28.57 +#endif 28.58 + 28.59 +TYPE_SAFE(unsigned long,mfn); 28.60 + 28.61 +/* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */ 28.62 +#define PRI_mfn "05lx" 28.63 + 28.64 + 28.65 /* 28.66 * The MPT (machine->physical mapping table) is an array of word-sized 28.67 * values, indexed on machine frame number. It is expected that guest OSes 28.68 @@ -269,13 +327,12 @@ int check_descriptor(const struct domain 28.69 #endif 28.70 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) 28.71 28.72 - 28.73 #define mfn_to_gmfn(_d, mfn) \ 28.74 - ( (shadow_mode_translate(_d)) \ 28.75 + ( (paging_mode_translate(_d)) \ 28.76 ? get_gpfn_from_mfn(mfn) \ 28.77 : (mfn) ) 28.78 28.79 -#define gmfn_to_mfn(_d, gpfn) mfn_x(sh_gfn_to_mfn(_d, gpfn)) 28.80 +#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn)) 28.81 28.82 #define INVALID_MFN (~0UL) 28.83
29.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 29.2 +++ b/xen/include/asm-x86/p2m.h Wed Feb 14 12:02:20 2007 +0000 29.3 @@ -0,0 +1,142 @@ 29.4 +/****************************************************************************** 29.5 + * include/asm-x86/paging.h 29.6 + * 29.7 + * physical-to-machine mappings for automatically-translated domains. 29.8 + * 29.9 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 29.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 29.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 29.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 29.13 + * 29.14 + * This program is free software; you can redistribute it and/or modify 29.15 + * it under the terms of the GNU General Public License as published by 29.16 + * the Free Software Foundation; either version 2 of the License, or 29.17 + * (at your option) any later version. 29.18 + * 29.19 + * This program is distributed in the hope that it will be useful, 29.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 29.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29.22 + * GNU General Public License for more details. 29.23 + * 29.24 + * You should have received a copy of the GNU General Public License 29.25 + * along with this program; if not, write to the Free Software 29.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29.27 + */ 29.28 + 29.29 +#ifndef _XEN_P2M_H 29.30 +#define _XEN_P2M_H 29.31 + 29.32 + 29.33 +/* The phys_to_machine_mapping is the reversed mapping of MPT for full 29.34 + * virtualization. It is only used by shadow_mode_translate()==true 29.35 + * guests, so we steal the address space that would have normally 29.36 + * been used by the read-only MPT map. 29.37 + */ 29.38 +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) 29.39 + 29.40 + 29.41 +/* Read the current domain's P2M table. */ 29.42 +static inline mfn_t gfn_to_mfn_current(unsigned long gfn) 29.43 +{ 29.44 + l1_pgentry_t l1e = l1e_empty(); 29.45 + int ret; 29.46 + 29.47 + if ( gfn > current->domain->arch.p2m.max_mapped_pfn ) 29.48 + return _mfn(INVALID_MFN); 29.49 + 29.50 + /* Don't read off the end of the p2m table */ 29.51 + ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); 29.52 + 29.53 + ret = __copy_from_user(&l1e, 29.54 + &phys_to_machine_mapping[gfn], 29.55 + sizeof(l1e)); 29.56 + 29.57 + if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) 29.58 + return _mfn(l1e_get_pfn(l1e)); 29.59 + 29.60 + return _mfn(INVALID_MFN); 29.61 +} 29.62 + 29.63 +/* Read another domain's P2M table, mapping pages as we go */ 29.64 +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); 29.65 + 29.66 +/* General conversion function from gfn to mfn */ 29.67 +static inline mfn_t gfn_to_mfn(struct domain *d, unsigned long gfn) 29.68 +{ 29.69 + if ( !paging_mode_translate(d) ) 29.70 + return _mfn(gfn); 29.71 + if ( likely(current->domain == d) ) 29.72 + return gfn_to_mfn_current(gfn); 29.73 + else 29.74 + return gfn_to_mfn_foreign(d, gfn); 29.75 +} 29.76 + 29.77 +/* General conversion function from mfn to gfn */ 29.78 +static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn) 29.79 +{ 29.80 + if ( paging_mode_translate(d) ) 29.81 + return get_gpfn_from_mfn(mfn_x(mfn)); 29.82 + else 29.83 + return mfn_x(mfn); 29.84 +} 29.85 + 29.86 +/* Compatibility function for HVM code */ 29.87 +static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) 29.88 +{ 29.89 + return mfn_x(gfn_to_mfn_current(pfn)); 29.90 +} 29.91 + 29.92 +/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ 29.93 +static inline int mmio_space(paddr_t gpa) 29.94 +{ 29.95 + unsigned long gfn = gpa >> PAGE_SHIFT; 29.96 + return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn))); 29.97 +} 29.98 + 29.99 +/* Translate the frame number held in an l1e from guest to machine */ 29.100 +static inline l1_pgentry_t 29.101 +gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) 29.102 +{ 29.103 + if ( unlikely(paging_mode_translate(d)) ) 29.104 + l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), 29.105 + l1e_get_flags(l1e)); 29.106 + return l1e; 29.107 +} 29.108 + 29.109 + 29.110 + 29.111 +/* Init the datastructures for later use by the p2m code */ 29.112 +void p2m_init(struct domain *d); 29.113 + 29.114 +/* Allocate a new p2m table for a domain. 29.115 + * 29.116 + * The alloc_page and free_page functions will be used to get memory to 29.117 + * build the p2m, and to release it again at the end of day. 29.118 + * 29.119 + * Returns 0 for success or -errno. */ 29.120 +int p2m_alloc_table(struct domain *d, 29.121 + struct page_info * (*alloc_page)(struct domain *d), 29.122 + void (*free_page)(struct domain *d, struct page_info *pg)); 29.123 + 29.124 +/* Return all the p2m resources to Xen. */ 29.125 +void p2m_teardown(struct domain *d); 29.126 + 29.127 +/* Add a page to a domain's p2m table */ 29.128 +void guest_physmap_add_page(struct domain *d, unsigned long gfn, 29.129 + unsigned long mfn); 29.130 + 29.131 +/* Remove a page from a domain's p2m table */ 29.132 +void guest_physmap_remove_page(struct domain *d, unsigned long gfn, 29.133 + unsigned long mfn); 29.134 + 29.135 + 29.136 +#endif /* _XEN_P2M_H */ 29.137 + 29.138 +/* 29.139 + * Local variables: 29.140 + * mode: C 29.141 + * c-set-style: "BSD" 29.142 + * c-basic-offset: 4 29.143 + * indent-tabs-mode: nil 29.144 + * End: 29.145 + */
30.1 --- a/xen/include/asm-x86/page.h Tue Feb 13 15:32:25 2007 +0000 30.2 +++ b/xen/include/asm-x86/page.h Wed Feb 14 12:02:20 2007 +0000 30.3 @@ -208,8 +208,10 @@ typedef struct { u64 pfn; } pagetable_t; 30.4 #define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT) 30.5 #define pagetable_get_page(x) mfn_to_page((x).pfn) 30.6 #define pagetable_get_pfn(x) ((x).pfn) 30.7 +#define pagetable_get_mfn(x) _mfn(((x).pfn)) 30.8 #define pagetable_is_null(x) ((x).pfn == 0) 30.9 #define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) }) 30.10 +#define pagetable_from_mfn(mfn) ((pagetable_t) { mfn_x(mfn) }) 30.11 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg)) 30.12 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT) 30.13 #define pagetable_null() pagetable_from_pfn(0)
31.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 31.2 +++ b/xen/include/asm-x86/paging.h Wed Feb 14 12:02:20 2007 +0000 31.3 @@ -0,0 +1,376 @@ 31.4 +/****************************************************************************** 31.5 + * include/asm-x86/paging.h 31.6 + * 31.7 + * Common interface for paging support 31.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 31.9 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 31.10 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 31.11 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 31.12 + * 31.13 + * This program is free software; you can redistribute it and/or modify 31.14 + * it under the terms of the GNU General Public License as published by 31.15 + * the Free Software Foundation; either version 2 of the License, or 31.16 + * (at your option) any later version. 31.17 + * 31.18 + * This program is distributed in the hope that it will be useful, 31.19 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 31.20 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 31.21 + * GNU General Public License for more details. 31.22 + * 31.23 + * You should have received a copy of the GNU General Public License 31.24 + * along with this program; if not, write to the Free Software 31.25 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 31.26 + */ 31.27 + 31.28 +#ifndef _XEN_PAGING_H 31.29 +#define _XEN_PAGING_H 31.30 + 31.31 +#include <xen/mm.h> 31.32 +#include <public/domctl.h> 31.33 +#include <xen/sched.h> 31.34 +#include <xen/perfc.h> 31.35 +#include <xen/domain_page.h> 31.36 +#include <asm/flushtlb.h> 31.37 +#include <asm/domain.h> 31.38 + 31.39 +/***************************************************************************** 31.40 + * Macros to tell which paging mode a domain is in */ 31.41 + 31.42 +#define PG_SH_shift 20 31.43 +#define PG_HAP_shift 21 31.44 +/* We're in one of the shadow modes */ 31.45 +#define PG_SH_enable (1U << PG_SH_shift) 31.46 +#define PG_HAP_enable (1U << PG_HAP_shift) 31.47 + 31.48 +/* common paging mode bits */ 31.49 +#define PG_mode_shift 10 31.50 +/* Refcounts based on shadow tables instead of guest tables */ 31.51 +#define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift) 31.52 +/* Enable log dirty mode */ 31.53 +#define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift) 31.54 +/* Xen does p2m translation, not guest */ 31.55 +#define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift) 31.56 +/* Xen does not steal address space from the domain for its own booking; 31.57 + * requires VT or similar mechanisms */ 31.58 +#define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift) 31.59 + 31.60 +#define paging_mode_enabled(_d) ((_d)->arch.paging.mode) 31.61 +#define paging_mode_shadow(_d) ((_d)->arch.paging.mode & PG_SH_enable) 31.62 +#define paging_mode_hap(_d) ((_d)->arch.paging.mode & PG_HAP_enable) 31.63 + 31.64 +#define paging_mode_refcounts(_d) ((_d)->arch.paging.mode & PG_refcounts) 31.65 +#define paging_mode_log_dirty(_d) ((_d)->arch.paging.mode & PG_log_dirty) 31.66 +#define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate) 31.67 +#define paging_mode_external(_d) ((_d)->arch.paging.mode & PG_external) 31.68 + 31.69 +/****************************************************************************** 31.70 + * The equivalent for a particular vcpu of a shadowed domain. */ 31.71 + 31.72 +/* Is this vcpu using the P2M table to translate between GFNs and MFNs? 31.73 + * 31.74 + * This is true of translated HVM domains on a vcpu which has paging 31.75 + * enabled. (HVM vcpus with paging disabled are using the p2m table as 31.76 + * its paging table, so no translation occurs in this case.) 31.77 + * It is also true for all vcpus of translated PV domains. */ 31.78 +#define paging_vcpu_mode_translate(_v) ((_v)->arch.paging.translate_enabled) 31.79 + 31.80 + 31.81 + 31.82 +/***************************************************************************** 31.83 + * Mode-specific entry points into the shadow code. 31.84 + * 31.85 + * These shouldn't be used directly by callers; rather use the functions 31.86 + * below which will indirect through this table as appropriate. */ 31.87 + 31.88 +struct sh_emulate_ctxt; 31.89 +struct shadow_paging_mode { 31.90 + void (*detach_old_tables )(struct vcpu *v); 31.91 + int (*x86_emulate_write )(struct vcpu *v, unsigned long va, 31.92 + void *src, u32 bytes, 31.93 + struct sh_emulate_ctxt *sh_ctxt); 31.94 + int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, 31.95 + unsigned long old, 31.96 + unsigned long new, 31.97 + unsigned int bytes, 31.98 + struct sh_emulate_ctxt *sh_ctxt); 31.99 + int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, 31.100 + unsigned long old_lo, 31.101 + unsigned long old_hi, 31.102 + unsigned long new_lo, 31.103 + unsigned long new_hi, 31.104 + struct sh_emulate_ctxt *sh_ctxt); 31.105 + mfn_t (*make_monitor_table )(struct vcpu *v); 31.106 + void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); 31.107 + int (*guess_wrmap )(struct vcpu *v, 31.108 + unsigned long vaddr, mfn_t gmfn); 31.109 + /* For outsiders to tell what mode we're in */ 31.110 + unsigned int shadow_levels; 31.111 +}; 31.112 + 31.113 + 31.114 +/************************************************/ 31.115 +/* common paging interface */ 31.116 +/************************************************/ 31.117 +struct paging_mode { 31.118 + int (*page_fault )(struct vcpu *v, unsigned long va, 31.119 + struct cpu_user_regs *regs); 31.120 + int (*invlpg )(struct vcpu *v, unsigned long va); 31.121 + paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); 31.122 + unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); 31.123 + void (*update_cr3 )(struct vcpu *v, int do_locking); 31.124 + void (*update_paging_modes )(struct vcpu *v); 31.125 + void (*write_p2m_entry )(struct vcpu *v, unsigned long gfn, 31.126 + l1_pgentry_t *p, l1_pgentry_t new, 31.127 + unsigned int level); 31.128 + int (*write_guest_entry )(struct vcpu *v, intpte_t *p, 31.129 + intpte_t new, mfn_t gmfn); 31.130 + int (*cmpxchg_guest_entry )(struct vcpu *v, intpte_t *p, 31.131 + intpte_t *old, intpte_t new, 31.132 + mfn_t gmfn); 31.133 + void * (*guest_map_l1e )(struct vcpu *v, unsigned long va, 31.134 + unsigned long *gl1mfn); 31.135 + void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, 31.136 + void *eff_l1e); 31.137 + unsigned int guest_levels; 31.138 + 31.139 + /* paging support extension */ 31.140 + struct shadow_paging_mode shadow; 31.141 +}; 31.142 + 31.143 + 31.144 +/***************************************************************************** 31.145 + * Entry points into the paging-assistance code */ 31.146 + 31.147 +/* Initialize the paging resource for vcpu struct. It is called by 31.148 + * vcpu_initialise() in domain.c */ 31.149 +void paging_vcpu_init(struct vcpu *v); 31.150 + 31.151 +/* Set up the paging-assistance-specific parts of a domain struct at 31.152 + * start of day. Called for every domain from arch_domain_create() */ 31.153 +void paging_domain_init(struct domain *d); 31.154 + 31.155 +/* Handler for paging-control ops: operations from user-space to enable 31.156 + * and disable ephemeral shadow modes (test mode and log-dirty mode) and 31.157 + * manipulate the log-dirty bitmap. */ 31.158 +int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 31.159 + XEN_GUEST_HANDLE(void) u_domctl); 31.160 + 31.161 +/* Call when destroying a domain */ 31.162 +void paging_teardown(struct domain *d); 31.163 + 31.164 +/* Call once all of the references to the domain have gone away */ 31.165 +void paging_final_teardown(struct domain *d); 31.166 + 31.167 +/* Enable an arbitrary paging-assistance mode. Call once at domain 31.168 + * creation. */ 31.169 +int paging_enable(struct domain *d, u32 mode); 31.170 + 31.171 + 31.172 +/* Page fault handler 31.173 + * Called from pagefault handler in Xen, and from the HVM trap handlers 31.174 + * for pagefaults. Returns 1 if this fault was an artefact of the 31.175 + * paging code (and the guest should retry) or 0 if it is not (and the 31.176 + * fault should be handled elsewhere or passed to the guest). 31.177 + * 31.178 + * Note: under shadow paging, this function handles all page faults; 31.179 + * however, for hardware-assisted paging, this function handles only 31.180 + * host page faults (i.e. nested page faults). */ 31.181 +static inline int 31.182 +paging_fault(unsigned long va, struct cpu_user_regs *regs) 31.183 +{ 31.184 + struct vcpu *v = current; 31.185 + return v->arch.paging.mode->page_fault(v, va, regs); 31.186 +} 31.187 + 31.188 +/* Handle invlpg requests on vcpus. 31.189 + * Returns 1 if the invlpg instruction should be issued on the hardware, 31.190 + * or 0 if it's safe not to do so. */ 31.191 +static inline int paging_invlpg(struct vcpu *v, unsigned long va) 31.192 +{ 31.193 + return v->arch.paging.mode->invlpg(v, va); 31.194 +} 31.195 + 31.196 +/* Translate a guest virtual address to the physical address that the 31.197 + * *guest* pagetables would map it to. */ 31.198 +static inline paddr_t paging_gva_to_gpa(struct vcpu *v, unsigned long va) 31.199 +{ 31.200 + if ( unlikely(!paging_vcpu_mode_translate(v)) ) 31.201 + return (paddr_t) va; 31.202 + 31.203 + return v->arch.paging.mode->gva_to_gpa(v, va); 31.204 +} 31.205 + 31.206 +/* Translate a guest virtual address to the frame number that the 31.207 + * *guest* pagetables would map it to. */ 31.208 +static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va) 31.209 +{ 31.210 + if ( unlikely(!paging_vcpu_mode_translate(v)) ) 31.211 + return va >> PAGE_SHIFT; 31.212 + 31.213 + return v->arch.paging.mode->gva_to_gfn(v, va); 31.214 +} 31.215 + 31.216 +/* Update all the things that are derived from the guest's CR3. 31.217 + * Called when the guest changes CR3; the caller can then use v->arch.cr3 31.218 + * as the value to load into the host CR3 to schedule this vcpu */ 31.219 +static inline void paging_update_cr3(struct vcpu *v) 31.220 +{ 31.221 + v->arch.paging.mode->update_cr3(v, 1); 31.222 +} 31.223 + 31.224 +/* Update all the things that are derived from the guest's CR0/CR3/CR4. 31.225 + * Called to initialize paging structures if the paging mode 31.226 + * has changed, and when bringing up a VCPU for the first time. */ 31.227 +static inline void paging_update_paging_modes(struct vcpu *v) 31.228 +{ 31.229 + v->arch.paging.mode->update_paging_modes(v); 31.230 +} 31.231 + 31.232 + 31.233 +/* Write a new value into the guest pagetable, and update the 31.234 + * paging-assistance state appropriately. Returns 0 if we page-faulted, 31.235 + * 1 for success. */ 31.236 +static inline int paging_write_guest_entry(struct vcpu *v, intpte_t *p, 31.237 + intpte_t new, mfn_t gmfn) 31.238 +{ 31.239 + if ( unlikely(paging_mode_enabled(v->domain) 31.240 + && v->arch.paging.mode != NULL) ) 31.241 + return v->arch.paging.mode->write_guest_entry(v, p, new, gmfn); 31.242 + else 31.243 + return (!__copy_to_user(p, &new, sizeof(new))); 31.244 +} 31.245 + 31.246 + 31.247 +/* Cmpxchg a new value into the guest pagetable, and update the 31.248 + * paging-assistance state appropriately. Returns 0 if we page-faulted, 31.249 + * 1 if not. N.B. caller should check the value of "old" to see if the 31.250 + * cmpxchg itself was successful. */ 31.251 +static inline int paging_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, 31.252 + intpte_t *old, intpte_t new, 31.253 + mfn_t gmfn) 31.254 +{ 31.255 + if ( unlikely(paging_mode_enabled(v->domain) 31.256 + && v->arch.paging.mode != NULL) ) 31.257 + return v->arch.paging.mode->cmpxchg_guest_entry(v, p, old, new, gmfn); 31.258 + else 31.259 + return (!cmpxchg_user(p, *old, new)); 31.260 +} 31.261 + 31.262 +/* Helper function that writes a pte in such a way that a concurrent read 31.263 + * never sees a half-written entry that has _PAGE_PRESENT set */ 31.264 +static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new) 31.265 +{ 31.266 +#if CONFIG_PAGING_LEVELS == 3 31.267 + /* PAE machines write 64bit PTEs as two 32bit writes. */ 31.268 + volatile unsigned long *d = (unsigned long *) p; 31.269 + unsigned long *s = (unsigned long *) &new; 31.270 + BUILD_BUG_ON(sizeof (l1_pgentry_t) != 2 * sizeof (unsigned long)); 31.271 + d[0] = 0; 31.272 + d[1] = s[1]; 31.273 + d[0] = s[0]; 31.274 +#else 31.275 + *p = new; 31.276 +#endif 31.277 +} 31.278 + 31.279 +/* Atomically write a P2M entry and update the paging-assistance state 31.280 + * appropriately. */ 31.281 +static inline void paging_write_p2m_entry(struct domain *d, unsigned long gfn, 31.282 + l1_pgentry_t *p, l1_pgentry_t new, 31.283 + unsigned int level) 31.284 +{ 31.285 + struct vcpu *v = current; 31.286 + if ( v->domain != d ) 31.287 + v = d->vcpu[0]; 31.288 + if ( likely(paging_mode_enabled(d) && v->arch.paging.mode != NULL) ) 31.289 + { 31.290 + return v->arch.paging.mode->write_p2m_entry(v, gfn, p, new, level); 31.291 + } 31.292 + else 31.293 + safe_write_pte(p, new); 31.294 +} 31.295 + 31.296 +/* Print paging-assistance info to the console */ 31.297 +void paging_dump_domain_info(struct domain *d); 31.298 +void paging_dump_vcpu_info(struct vcpu *v); 31.299 + 31.300 + 31.301 +/***************************************************************************** 31.302 + * Access to the guest pagetables */ 31.303 + 31.304 +/* Get a mapping of a PV guest's l1e for this virtual address. */ 31.305 +static inline void * 31.306 +guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) 31.307 +{ 31.308 + l2_pgentry_t l2e; 31.309 + 31.310 + if ( unlikely(paging_mode_translate(v->domain)) ) 31.311 + return v->arch.paging.mode->guest_map_l1e(v, addr, gl1mfn); 31.312 + 31.313 + /* Find this l1e and its enclosing l1mfn in the linear map */ 31.314 + if ( __copy_from_user(&l2e, 31.315 + &__linear_l2_table[l2_linear_offset(addr)], 31.316 + sizeof(l2_pgentry_t)) != 0 ) 31.317 + return NULL; 31.318 + /* Check flags that it will be safe to read the l1e */ 31.319 + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 31.320 + != _PAGE_PRESENT ) 31.321 + return NULL; 31.322 + *gl1mfn = l2e_get_pfn(l2e); 31.323 + return &__linear_l1_table[l1_linear_offset(addr)]; 31.324 +} 31.325 + 31.326 +/* Pull down the mapping we got from guest_map_l1e() */ 31.327 +static inline void 31.328 +guest_unmap_l1e(struct vcpu *v, void *p) 31.329 +{ 31.330 + if ( unlikely(paging_mode_translate(v->domain)) ) 31.331 + unmap_domain_page(p); 31.332 +} 31.333 + 31.334 +/* Read the guest's l1e that maps this address. */ 31.335 +static inline void 31.336 +guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 31.337 +{ 31.338 + if ( likely(!paging_mode_translate(v->domain)) ) 31.339 + { 31.340 + ASSERT(!paging_mode_external(v->domain)); 31.341 + if ( __copy_from_user(eff_l1e, 31.342 + &__linear_l1_table[l1_linear_offset(addr)], 31.343 + sizeof(l1_pgentry_t)) != 0 ) 31.344 + *(l1_pgentry_t *)eff_l1e = l1e_empty(); 31.345 + return; 31.346 + } 31.347 + 31.348 + v->arch.paging.mode->guest_get_eff_l1e(v, addr, eff_l1e); 31.349 +} 31.350 + 31.351 +/* Read the guest's l1e that maps this address, from the kernel-mode 31.352 + * pagetables. */ 31.353 +static inline void 31.354 +guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 31.355 +{ 31.356 +#if defined(__x86_64__) 31.357 + int user_mode = !(v->arch.flags & TF_kernel_mode); 31.358 +#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) 31.359 +#else 31.360 +#define TOGGLE_MODE() ((void)0) 31.361 +#endif 31.362 + 31.363 + TOGGLE_MODE(); 31.364 + guest_get_eff_l1e(v, addr, eff_l1e); 31.365 + TOGGLE_MODE(); 31.366 +} 31.367 + 31.368 + 31.369 + 31.370 +#endif /* XEN_PAGING_H */ 31.371 + 31.372 +/* 31.373 + * Local variables: 31.374 + * mode: C 31.375 + * c-set-style: "BSD" 31.376 + * c-basic-offset: 4 31.377 + * indent-tabs-mode: nil 31.378 + * End: 31.379 + */
32.1 --- a/xen/include/asm-x86/shadow.h Tue Feb 13 15:32:25 2007 +0000 32.2 +++ b/xen/include/asm-x86/shadow.h Wed Feb 14 12:02:20 2007 +0000 32.3 @@ -28,45 +28,26 @@ 32.4 #include <xen/perfc.h> 32.5 #include <xen/domain_page.h> 32.6 #include <asm/flushtlb.h> 32.7 +#include <asm/paging.h> 32.8 +#include <asm/p2m.h> 32.9 32.10 /***************************************************************************** 32.11 - * Macros to tell which shadow paging mode a domain is in */ 32.12 + * Macros to tell which shadow paging mode a domain is in*/ 32.13 32.14 -#define SHM2_shift 10 32.15 -/* We're in one of the shadow modes */ 32.16 -#define SHM2_enable (1U << SHM2_shift) 32.17 -/* Refcounts based on shadow tables instead of guest tables */ 32.18 -#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift) 32.19 -/* Enable log dirty mode */ 32.20 -#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift) 32.21 -/* Xen does p2m translation, not guest */ 32.22 -#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift) 32.23 -/* Xen does not steal address space from the domain for its own booking; 32.24 - * requires VT or similar mechanisms */ 32.25 -#define SHM2_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift) 32.26 - 32.27 -#define shadow_mode_enabled(_d) ((_d)->arch.shadow.mode) 32.28 -#define shadow_mode_refcounts(_d) ((_d)->arch.shadow.mode & SHM2_refcounts) 32.29 -#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow.mode & SHM2_log_dirty) 32.30 -#define shadow_mode_translate(_d) ((_d)->arch.shadow.mode & SHM2_translate) 32.31 -#define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) 32.32 +#define shadow_mode_enabled(_d) paging_mode_shadow(_d) 32.33 +#define shadow_mode_refcounts(_d) (paging_mode_shadow(_d) && \ 32.34 + paging_mode_refcounts(_d)) 32.35 +#define shadow_mode_log_dirty(_d) (paging_mode_shadow(_d) && \ 32.36 + paging_mode_log_dirty(_d)) 32.37 +#define shadow_mode_translate(_d) (paging_mode_shadow(_d) && \ 32.38 + paging_mode_translate(_d)) 32.39 +#define shadow_mode_external(_d) (paging_mode_shadow(_d) && \ 32.40 + paging_mode_external(_d)) 32.41 32.42 /* Xen traps & emulates all reads of all page table pages: 32.43 * not yet supported */ 32.44 #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; }) 32.45 32.46 - 32.47 -/****************************************************************************** 32.48 - * The equivalent for a particular vcpu of a shadowed domain. */ 32.49 - 32.50 -/* Is this vcpu using the P2M table to translate between GFNs and MFNs? 32.51 - * 32.52 - * This is true of translated HVM domains on a vcpu which has paging 32.53 - * enabled. (HVM vcpus with paging disabled are using the p2m table as 32.54 - * its paging table, so no translation occurs in this case.) 32.55 - * It is also true for all vcpus of translated PV domains. */ 32.56 -#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) 32.57 - 32.58 /* 32.59 * 32on64 support 32.60 */ 32.61 @@ -76,122 +57,18 @@ 32.62 #define pv_32bit_guest(_v) (!is_hvm_vcpu(_v)) 32.63 #endif 32.64 32.65 -/****************************************************************************** 32.66 - * With shadow pagetables, the different kinds of address start 32.67 - * to get get confusing. 32.68 - * 32.69 - * Virtual addresses are what they usually are: the addresses that are used 32.70 - * to accessing memory while the guest is running. The MMU translates from 32.71 - * virtual addresses to machine addresses. 32.72 - * 32.73 - * (Pseudo-)physical addresses are the abstraction of physical memory the 32.74 - * guest uses for allocation and so forth. For the purposes of this code, 32.75 - * we can largely ignore them. 32.76 - * 32.77 - * Guest frame numbers (gfns) are the entries that the guest puts in its 32.78 - * pagetables. For normal paravirtual guests, they are actual frame numbers, 32.79 - * with the translation done by the guest. 32.80 - * 32.81 - * Machine frame numbers (mfns) are the entries that the hypervisor puts 32.82 - * in the shadow page tables. 32.83 - * 32.84 - * Elsewhere in the xen code base, the name "gmfn" is generally used to refer 32.85 - * to a "machine frame number, from the guest's perspective", or in other 32.86 - * words, pseudo-physical frame numbers. However, in the shadow code, the 32.87 - * term "gmfn" means "the mfn of a guest page"; this combines naturally with 32.88 - * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a 32.89 - * guest L2 page), etc... 32.90 - */ 32.91 - 32.92 -/* With this defined, we do some ugly things to force the compiler to 32.93 - * give us type safety between mfns and gfns and other integers. 32.94 - * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 32.95 - * that translate beween int and foo_t. 32.96 - * 32.97 - * It does have some performance cost because the types now have 32.98 - * a different storage attribute, so may not want it on all the time. */ 32.99 -#ifndef NDEBUG 32.100 -#define TYPE_SAFETY 1 32.101 -#endif 32.102 - 32.103 -#ifdef TYPE_SAFETY 32.104 -#define TYPE_SAFE(_type,_name) \ 32.105 -typedef struct { _type _name; } _name##_t; \ 32.106 -static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ 32.107 -static inline _type _name##_x(_name##_t n) { return n._name; } 32.108 -#else 32.109 -#define TYPE_SAFE(_type,_name) \ 32.110 -typedef _type _name##_t; \ 32.111 -static inline _name##_t _##_name(_type n) { return n; } \ 32.112 -static inline _type _name##_x(_name##_t n) { return n; } 32.113 -#endif 32.114 - 32.115 -TYPE_SAFE(unsigned long,mfn) 32.116 - 32.117 -/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ 32.118 -#define SH_PRI_mfn "05lx" 32.119 - 32.120 - 32.121 -/***************************************************************************** 32.122 - * Mode-specific entry points into the shadow code. 32.123 - * 32.124 - * These shouldn't be used directly by callers; rather use the functions 32.125 - * below which will indirect through this table as appropriate. */ 32.126 - 32.127 -struct sh_emulate_ctxt; 32.128 -struct shadow_paging_mode { 32.129 - int (*page_fault )(struct vcpu *v, unsigned long va, 32.130 - struct cpu_user_regs *regs); 32.131 - int (*invlpg )(struct vcpu *v, unsigned long va); 32.132 - paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); 32.133 - unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); 32.134 - void (*update_cr3 )(struct vcpu *v, int do_locking); 32.135 - int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, 32.136 - void *new_guest_entry, u32 size); 32.137 - int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, 32.138 - void *new_guest_entry, u32 size); 32.139 - int (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn, 32.140 - void *new_guest_entry, u32 size); 32.141 - int (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn, 32.142 - void *new_guest_entry, u32 size); 32.143 - int (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn, 32.144 - void *new_guest_entry, u32 size); 32.145 - void (*detach_old_tables )(struct vcpu *v); 32.146 - int (*x86_emulate_write )(struct vcpu *v, unsigned long va, 32.147 - void *src, u32 bytes, 32.148 - struct sh_emulate_ctxt *sh_ctxt); 32.149 - int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, 32.150 - unsigned long old, 32.151 - unsigned long new, 32.152 - unsigned int bytes, 32.153 - struct sh_emulate_ctxt *sh_ctxt); 32.154 - int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, 32.155 - unsigned long old_lo, 32.156 - unsigned long old_hi, 32.157 - unsigned long new_lo, 32.158 - unsigned long new_hi, 32.159 - struct sh_emulate_ctxt *sh_ctxt); 32.160 - mfn_t (*make_monitor_table )(struct vcpu *v); 32.161 - void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); 32.162 - void * (*guest_map_l1e )(struct vcpu *v, unsigned long va, 32.163 - unsigned long *gl1mfn); 32.164 - void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, 32.165 - void *eff_l1e); 32.166 - int (*guess_wrmap )(struct vcpu *v, 32.167 - unsigned long vaddr, mfn_t gmfn); 32.168 - /* For outsiders to tell what mode we're in */ 32.169 - unsigned int shadow_levels; 32.170 - unsigned int guest_levels; 32.171 -}; 32.172 - 32.173 32.174 /***************************************************************************** 32.175 * Entry points into the shadow code */ 32.176 32.177 /* Set up the shadow-specific parts of a domain struct at start of day. 32.178 - * Called for every domain from arch_domain_create() */ 32.179 + * Called from paging_domain_init(). */ 32.180 void shadow_domain_init(struct domain *d); 32.181 32.182 +/* Setup the shadow-specific parts of a vcpu struct. It is called by 32.183 + * paging_vcpu_init() in paging.c */ 32.184 +void shadow_vcpu_init(struct vcpu *v); 32.185 + 32.186 /* Enable an arbitrary shadow mode. Call once at domain creation. */ 32.187 int shadow_enable(struct domain *d, u32 mode); 32.188 32.189 @@ -218,137 +95,12 @@ static inline void mark_dirty(struct dom 32.190 shadow_mark_dirty(d, _mfn(gmfn)); 32.191 } 32.192 32.193 -/* Handle page-faults caused by the shadow pagetable mechanisms. 32.194 - * Called from pagefault handler in Xen, and from the HVM trap handlers 32.195 - * for pagefaults. Returns 1 if this fault was an artefact of the 32.196 - * shadow code (and the guest should retry) or 0 if it is not (and the 32.197 - * fault should be handled elsewhere or passed to the guest). */ 32.198 -static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) 32.199 -{ 32.200 - struct vcpu *v = current; 32.201 - perfc_incrc(shadow_fault); 32.202 - return v->arch.shadow.mode->page_fault(v, va, regs); 32.203 -} 32.204 - 32.205 -/* Handle invlpg requests on shadowed vcpus. 32.206 - * Returns 1 if the invlpg instruction should be issued on the hardware, 32.207 - * or 0 if it's safe not to do so. */ 32.208 -static inline int shadow_invlpg(struct vcpu *v, unsigned long va) 32.209 -{ 32.210 - return v->arch.shadow.mode->invlpg(v, va); 32.211 -} 32.212 - 32.213 -/* Translate a guest virtual address to the physical address that the 32.214 - * *guest* pagetables would map it to. */ 32.215 -static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) 32.216 -{ 32.217 - if ( unlikely(!shadow_vcpu_mode_translate(v)) ) 32.218 - return (paddr_t) va; 32.219 - return v->arch.shadow.mode->gva_to_gpa(v, va); 32.220 -} 32.221 - 32.222 -/* Translate a guest virtual address to the frame number that the 32.223 - * *guest* pagetables would map it to. */ 32.224 -static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) 32.225 -{ 32.226 - if ( unlikely(!shadow_vcpu_mode_translate(v)) ) 32.227 - return va >> PAGE_SHIFT; 32.228 - return v->arch.shadow.mode->gva_to_gfn(v, va); 32.229 -} 32.230 - 32.231 -/* Update all the things that are derived from the guest's CR3. 32.232 - * Called when the guest changes CR3; the caller can then use v->arch.cr3 32.233 - * as the value to load into the host CR3 to schedule this vcpu */ 32.234 -static inline void shadow_update_cr3(struct vcpu *v) 32.235 -{ 32.236 - v->arch.shadow.mode->update_cr3(v, 1); 32.237 -} 32.238 - 32.239 /* Update all the things that are derived from the guest's CR0/CR3/CR4. 32.240 * Called to initialize paging structures if the paging mode 32.241 * has changed, and when bringing up a VCPU for the first time. */ 32.242 void shadow_update_paging_modes(struct vcpu *v); 32.243 32.244 32.245 -/***************************************************************************** 32.246 - * Access to the guest pagetables */ 32.247 - 32.248 -/* Get a mapping of a PV guest's l1e for this virtual address. */ 32.249 -static inline void * 32.250 -guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) 32.251 -{ 32.252 - l2_pgentry_t l2e; 32.253 - 32.254 - if ( unlikely(shadow_mode_translate(v->domain)) ) 32.255 - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); 32.256 - 32.257 - /* Find this l1e and its enclosing l1mfn in the linear map */ 32.258 - if ( __copy_from_user(&l2e, 32.259 - &__linear_l2_table[l2_linear_offset(addr)], 32.260 - sizeof(l2_pgentry_t)) != 0 ) 32.261 - return NULL; 32.262 - /* Check flags that it will be safe to read the l1e */ 32.263 - if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 32.264 - != _PAGE_PRESENT ) 32.265 - return NULL; 32.266 - *gl1mfn = l2e_get_pfn(l2e); 32.267 - return &__linear_l1_table[l1_linear_offset(addr)]; 32.268 -} 32.269 - 32.270 -/* Pull down the mapping we got from guest_map_l1e() */ 32.271 -static inline void 32.272 -guest_unmap_l1e(struct vcpu *v, void *p) 32.273 -{ 32.274 - if ( unlikely(shadow_mode_translate(v->domain)) ) 32.275 - unmap_domain_page(p); 32.276 -} 32.277 - 32.278 -/* Read the guest's l1e that maps this address. */ 32.279 -static inline void 32.280 -guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 32.281 -{ 32.282 - if ( likely(!shadow_mode_translate(v->domain)) ) 32.283 - { 32.284 - ASSERT(!shadow_mode_external(v->domain)); 32.285 - if ( __copy_from_user(eff_l1e, 32.286 - &__linear_l1_table[l1_linear_offset(addr)], 32.287 - sizeof(l1_pgentry_t)) != 0 ) 32.288 - *(l1_pgentry_t *)eff_l1e = l1e_empty(); 32.289 - return; 32.290 - } 32.291 - 32.292 - v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); 32.293 -} 32.294 - 32.295 -/* Read the guest's l1e that maps this address, from the kernel-mode 32.296 - * pagetables. */ 32.297 -static inline void 32.298 -guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) 32.299 -{ 32.300 -#if defined(__x86_64__) 32.301 - int user_mode = !(v->arch.flags & TF_kernel_mode); 32.302 -#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) 32.303 -#else 32.304 -#define TOGGLE_MODE() ((void)0) 32.305 -#endif 32.306 - 32.307 - TOGGLE_MODE(); 32.308 - guest_get_eff_l1e(v, addr, eff_l1e); 32.309 - TOGGLE_MODE(); 32.310 -} 32.311 - 32.312 -/* Write a new value into the guest pagetable, and update the shadows 32.313 - * appropriately. Returns 0 if we page-faulted, 1 for success. */ 32.314 -int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, 32.315 - intpte_t new, mfn_t gmfn); 32.316 - 32.317 -/* Cmpxchg a new value into the guest pagetable, and update the shadows 32.318 - * appropriately. Returns 0 if we page-faulted, 1 if not. 32.319 - * N.B. caller should check the value of "old" to see if the 32.320 - * cmpxchg itself was successful. */ 32.321 -int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, 32.322 - intpte_t *old, intpte_t new, mfn_t gmfn); 32.323 - 32.324 /* Remove all mappings of the guest page from the shadows. 32.325 * This is called from common code. It does not flush TLBs. */ 32.326 int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); 32.327 @@ -368,99 +120,6 @@ static inline void shadow_remove_all_sha 32.328 sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); 32.329 } 32.330 32.331 -/**************************************************************************/ 32.332 -/* Guest physmap (p2m) support 32.333 - * 32.334 - * The phys_to_machine_mapping is the reversed mapping of MPT for full 32.335 - * virtualization. It is only used by shadow_mode_translate()==true 32.336 - * guests, so we steal the address space that would have normally 32.337 - * been used by the read-only MPT map. 32.338 - */ 32.339 -#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) 32.340 - 32.341 -/* Add a page to a domain's p2m table */ 32.342 -void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, 32.343 - unsigned long mfn); 32.344 - 32.345 -/* Remove a page from a domain's p2m table */ 32.346 -void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, 32.347 - unsigned long mfn); 32.348 - 32.349 -/* Aliases, called from common code. */ 32.350 -#define guest_physmap_add_page shadow_guest_physmap_add_page 32.351 -#define guest_physmap_remove_page shadow_guest_physmap_remove_page 32.352 - 32.353 -/* Read the current domain's P2M table. */ 32.354 -static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) 32.355 -{ 32.356 - l1_pgentry_t l1e = l1e_empty(); 32.357 - int ret; 32.358 - 32.359 - if ( gfn > current->domain->arch.max_mapped_pfn ) 32.360 - return _mfn(INVALID_MFN); 32.361 - 32.362 - /* Don't read off the end of the p2m table */ 32.363 - ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); 32.364 - 32.365 - ret = __copy_from_user(&l1e, 32.366 - &phys_to_machine_mapping[gfn], 32.367 - sizeof(l1e)); 32.368 - 32.369 - if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) 32.370 - return _mfn(l1e_get_pfn(l1e)); 32.371 - 32.372 - return _mfn(INVALID_MFN); 32.373 -} 32.374 - 32.375 -/* Read another domain's P2M table, mapping pages as we go */ 32.376 -mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); 32.377 - 32.378 -/* General conversion function from gfn to mfn */ 32.379 -static inline mfn_t 32.380 -sh_gfn_to_mfn(struct domain *d, unsigned long gfn) 32.381 -{ 32.382 - if ( !shadow_mode_translate(d) ) 32.383 - return _mfn(gfn); 32.384 - if ( likely(current->domain == d) ) 32.385 - return sh_gfn_to_mfn_current(gfn); 32.386 - else 32.387 - return sh_gfn_to_mfn_foreign(d, gfn); 32.388 -} 32.389 - 32.390 -/* Compatibility function for HVM code */ 32.391 -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) 32.392 -{ 32.393 - return mfn_x(sh_gfn_to_mfn_current(pfn)); 32.394 -} 32.395 - 32.396 -/* General conversion function from mfn to gfn */ 32.397 -static inline unsigned long 32.398 -sh_mfn_to_gfn(struct domain *d, mfn_t mfn) 32.399 -{ 32.400 - if ( shadow_mode_translate(d) ) 32.401 - return get_gpfn_from_mfn(mfn_x(mfn)); 32.402 - else 32.403 - return mfn_x(mfn); 32.404 -} 32.405 - 32.406 -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ 32.407 -static inline int 32.408 -mmio_space(paddr_t gpa) 32.409 -{ 32.410 - unsigned long gfn = gpa >> PAGE_SHIFT; 32.411 - return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); 32.412 -} 32.413 - 32.414 -/* Translate the frame number held in an l1e from guest to machine */ 32.415 -static inline l1_pgentry_t 32.416 -gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) 32.417 -{ 32.418 - if ( unlikely(shadow_mode_translate(d)) ) 32.419 - l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), 32.420 - l1e_get_flags(l1e)); 32.421 - return l1e; 32.422 -} 32.423 - 32.424 #endif /* _XEN_SHADOW_H */ 32.425 32.426 /*