debuggers.hg
changeset 18996:2090917489c5
PoD memory 7/9: Xen interface
Implement Xen interface to PoD functionality.
* Increase the number of MEMOP bits from 4 to 6 (increasing the number
of available memory operations from 16 to 64).
* Introduce XENMEMF_populate_on_demand, which will cause
populate_physmap() to fill a range with PoD entries rather than
backing it with ram
* Introduce XENMEM_[sg]et_pod_target operation to the memory
hypercall, to get and set PoD cache size. set_pod_target() should be
called during domain creation, as well as after modifying the memory
target of any domain which may have outstanding PoD entries.
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
Implement Xen interface to PoD functionality.
* Increase the number of MEMOP bits from 4 to 6 (increasing the number
of available memory operations from 16 to 64).
* Introduce XENMEMF_populate_on_demand, which will cause
populate_physmap() to fill a range with PoD entries rather than
backing it with ram
* Introduce XENMEM_[sg]et_pod_target operation to the memory
hypercall, to get and set PoD cache size. set_pod_target() should be
called during domain creation, as well as after modifying the memory
target of any domain which may have outstanding PoD entries.
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Mon Jan 05 10:45:48 2009 +0000 (2009-01-05) |
parents | ebe11a452393 |
children | 2a8ae362a828 |
files | xen/arch/x86/mm.c xen/arch/x86/mm/p2m.c xen/arch/x86/x86_64/compat/mm.c xen/common/memory.c xen/include/asm-x86/p2m.h xen/include/public/memory.h xen/include/xen/hypercall.h xen/include/xen/mm.h xen/include/xlat.lst |
line diff
1.1 --- a/xen/arch/x86/mm.c Mon Jan 05 10:45:09 2009 +0000 1.2 +++ b/xen/arch/x86/mm.c Mon Jan 05 10:45:48 2009 +0000 1.3 @@ -3976,6 +3976,49 @@ long arch_memory_op(int op, XEN_GUEST_HA 1.4 return 0; 1.5 } 1.6 1.7 + case XENMEM_set_pod_target: 1.8 + case XENMEM_get_pod_target: 1.9 + { 1.10 + xen_pod_target_t target; 1.11 + struct domain *d; 1.12 + 1.13 + /* Support DOMID_SELF? */ 1.14 + if ( !IS_PRIV(current->domain) ) 1.15 + return -EINVAL; 1.16 + 1.17 + if ( copy_from_guest(&target, arg, 1) ) 1.18 + return -EFAULT; 1.19 + 1.20 + rc = rcu_lock_target_domain_by_id(target.domid, &d); 1.21 + if ( rc != 0 ) 1.22 + return rc; 1.23 + 1.24 + if ( op == XENMEM_set_pod_target ) 1.25 + { 1.26 + if ( target.target_pages > d->max_pages ) 1.27 + { 1.28 + rc = -EINVAL; 1.29 + goto pod_target_out_unlock; 1.30 + } 1.31 + 1.32 + rc = p2m_pod_set_mem_target(d, target.target_pages); 1.33 + } 1.34 + 1.35 + target.tot_pages = d->tot_pages; 1.36 + target.pod_cache_pages = d->arch.p2m->pod.count; 1.37 + target.pod_entries = d->arch.p2m->pod.entry_count; 1.38 + 1.39 + if ( copy_to_guest(arg, &target, 1) ) 1.40 + { 1.41 + rc= -EFAULT; 1.42 + goto pod_target_out_unlock; 1.43 + } 1.44 + 1.45 + pod_target_out_unlock: 1.46 + rcu_unlock_domain(d); 1.47 + return rc; 1.48 + } 1.49 + 1.50 default: 1.51 return subarch_memory_op(op, arg); 1.52 }
2.1 --- a/xen/arch/x86/mm/p2m.c Mon Jan 05 10:45:09 2009 +0000 2.2 +++ b/xen/arch/x86/mm/p2m.c Mon Jan 05 10:45:48 2009 +0000 2.3 @@ -387,6 +387,150 @@ static struct page_info * p2m_pod_cache_ 2.4 return p; 2.5 } 2.6 2.7 +/* Set the size of the cache, allocating or freeing as necessary. */ 2.8 +static int 2.9 +p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target) 2.10 +{ 2.11 + struct p2m_domain *p2md = d->arch.p2m; 2.12 + int ret = 0; 2.13 + 2.14 + /* Increasing the target */ 2.15 + while ( pod_target > p2md->pod.count ) 2.16 + { 2.17 + struct page_info * page; 2.18 + int order; 2.19 + 2.20 + if ( (pod_target - p2md->pod.count) >= (1>>9) ) 2.21 + order = 9; 2.22 + else 2.23 + order = 0; 2.24 + 2.25 + page = alloc_domheap_pages(d, order, 0); 2.26 + if ( unlikely(page == NULL) ) 2.27 + goto out; 2.28 + 2.29 + p2m_pod_cache_add(d, page, order); 2.30 + } 2.31 + 2.32 + /* Decreasing the target */ 2.33 + /* We hold the p2m lock here, so we don't need to worry about 2.34 + * cache disappearing under our feet. */ 2.35 + while ( pod_target < p2md->pod.count ) 2.36 + { 2.37 + struct page_info * page; 2.38 + int order, i; 2.39 + 2.40 + /* Grab the lock before checking that pod.super is empty, or the last 2.41 + * entries may disappear before we grab the lock. */ 2.42 + spin_lock(&d->page_alloc_lock); 2.43 + 2.44 + if ( (p2md->pod.count - pod_target) > (1>>9) 2.45 + && !list_empty(&p2md->pod.super) ) 2.46 + order = 9; 2.47 + else 2.48 + order = 0; 2.49 + 2.50 + page = p2m_pod_cache_get(d, order); 2.51 + 2.52 + ASSERT(page != NULL); 2.53 + 2.54 + spin_unlock(&d->page_alloc_lock); 2.55 + 2.56 + /* Then free them */ 2.57 + for ( i = 0 ; i < (1 << order) ; i++ ) 2.58 + { 2.59 + /* Copied from common/memory.c:guest_remove_page() */ 2.60 + if ( unlikely(!get_page(page+i, d)) ) 2.61 + { 2.62 + gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); 2.63 + ret = -EINVAL; 2.64 + goto out; 2.65 + } 2.66 + 2.67 + if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) ) 2.68 + put_page_and_type(page+i); 2.69 + 2.70 + if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) ) 2.71 + put_page(page+i); 2.72 + 2.73 + put_page(page+i); 2.74 + } 2.75 + } 2.76 + 2.77 +out: 2.78 + return ret; 2.79 +} 2.80 + 2.81 +/* 2.82 + * The "right behavior" here requires some careful thought. First, some 2.83 + * definitions: 2.84 + * + M: static_max 2.85 + * + B: number of pages the balloon driver has ballooned down to. 2.86 + * + P: Number of populated pages. 2.87 + * + T: Old target 2.88 + * + T': New target 2.89 + * 2.90 + * The following equations should hold: 2.91 + * 0 <= P <= T <= B <= M 2.92 + * d->arch.p2m->pod.entry_count == B - P 2.93 + * d->tot_pages == P + d->arch.p2m->pod.count 2.94 + * 2.95 + * Now we have the following potential cases to cover: 2.96 + * B <T': Set the PoD cache size equal to the number of outstanding PoD 2.97 + * entries. The balloon driver will deflate the balloon to give back 2.98 + * the remainder of the ram to the guest OS. 2.99 + * T <T'<B : Increase PoD cache size. 2.100 + * T'<T<=B : Here we have a choice. We can decrease the size of the cache, 2.101 + * get the memory right away. However, that means every time we 2.102 + * reduce the memory target we risk the guest attempting to populate the 2.103 + * memory before the balloon driver has reached its new target. Safer to 2.104 + * never reduce the cache size here, but only when the balloon driver frees 2.105 + * PoD ranges. 2.106 + * 2.107 + * If there are many zero pages, we could reach the target also by doing 2.108 + * zero sweeps and marking the ranges PoD; but the balloon driver will have 2.109 + * to free this memory eventually anyway, so we don't actually gain that much 2.110 + * by doing so. 2.111 + * 2.112 + * NB that the equation (B<T') may require adjustment to the cache 2.113 + * size as PoD pages are freed as well; i.e., freeing a PoD-backed 2.114 + * entry when pod.entry_count == pod.count requires us to reduce both 2.115 + * pod.entry_count and pod.count. 2.116 + */ 2.117 +int 2.118 +p2m_pod_set_mem_target(struct domain *d, unsigned long target) 2.119 +{ 2.120 + unsigned pod_target; 2.121 + struct p2m_domain *p2md = d->arch.p2m; 2.122 + int ret = 0; 2.123 + unsigned long populated; 2.124 + 2.125 + /* P == B: Nothing to do. */ 2.126 + if ( p2md->pod.entry_count == 0 ) 2.127 + goto out; 2.128 + 2.129 + /* T' < B: Don't reduce the cache size; let the balloon driver 2.130 + * take care of it. */ 2.131 + if ( target < d->tot_pages ) 2.132 + goto out; 2.133 + 2.134 + populated = d->tot_pages - p2md->pod.count; 2.135 + 2.136 + pod_target = target - populated; 2.137 + 2.138 + /* B < T': Set the cache size equal to # of outstanding entries, 2.139 + * let the balloon driver fill in the rest. */ 2.140 + if ( pod_target > p2md->pod.entry_count ) 2.141 + pod_target = p2md->pod.entry_count; 2.142 + 2.143 + ASSERT( pod_target > p2md->pod.count ); 2.144 + 2.145 + ret = p2m_pod_set_cache_target(d, pod_target); 2.146 + 2.147 +out: 2.148 + return ret; 2.149 +} 2.150 + 2.151 void 2.152 p2m_pod_empty_cache(struct domain *d) 2.153 { 2.154 @@ -538,6 +682,13 @@ p2m_pod_decrease_reservation(struct doma 2.155 } 2.156 } 2.157 2.158 + /* If we've reduced our "liabilities" beyond our "assets", free some */ 2.159 + if ( p2md->pod.entry_count < p2md->pod.count ) 2.160 + { 2.161 + printk("b %d\n", p2md->pod.entry_count); 2.162 + p2m_pod_set_cache_target(d, p2md->pod.entry_count); 2.163 + } 2.164 + 2.165 /* If there are no more non-PoD entries, tell decrease_reservation() that 2.166 * there's nothing left to do. */ 2.167 if ( nonpod == 0 ) 2.168 @@ -786,7 +937,7 @@ p2m_pod_emergency_sweep_super(struct dom 2.169 /* Stop if we're past our limit and we have found *something*. 2.170 * 2.171 * NB that this is a zero-sum game; we're increasing our cache size 2.172 - * by re-increasing our 'debt'. Since we hold the p2m lock, 2.173 + * by increasing our 'debt'. Since we hold the p2m lock, 2.174 * (entry_count - count) must remain the same. */ 2.175 if ( !list_empty(&p2md->pod.super) && i < limit ) 2.176 break;
3.1 --- a/xen/arch/x86/x86_64/compat/mm.c Mon Jan 05 10:45:09 2009 +0000 3.2 +++ b/xen/arch/x86/x86_64/compat/mm.c Mon Jan 05 10:45:48 2009 +0000 3.3 @@ -128,6 +128,29 @@ int compat_arch_memory_op(int op, XEN_GU 3.4 break; 3.5 } 3.6 3.7 + case XENMEM_set_pod_target: 3.8 + case XENMEM_get_pod_target: 3.9 + { 3.10 + struct compat_pod_target cmp; 3.11 + struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; 3.12 + 3.13 + if ( copy_from_guest(&cmp, arg, 1) ) 3.14 + return -EFAULT; 3.15 + 3.16 + XLAT_pod_target(nat, &cmp); 3.17 + 3.18 + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); 3.19 + if ( rc < 0 ) 3.20 + break; 3.21 + 3.22 + XLAT_pod_target(&cmp, nat); 3.23 + 3.24 + if ( copy_to_guest(arg, &cmp, 1) ) 3.25 + rc = -EFAULT; 3.26 + 3.27 + break; 3.28 + } 3.29 + 3.30 case XENMEM_machphys_mapping: 3.31 { 3.32 struct domain *d = current->domain;
4.1 --- a/xen/common/memory.c Mon Jan 05 10:45:09 2009 +0000 4.2 +++ b/xen/common/memory.c Mon Jan 05 10:45:48 2009 +0000 4.3 @@ -111,31 +111,40 @@ static void populate_physmap(struct memo 4.4 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) 4.5 goto out; 4.6 4.7 - page = alloc_domheap_pages(d, a->extent_order, a->memflags); 4.8 - if ( unlikely(page == NULL) ) 4.9 + if ( a->memflags & MEMF_populate_on_demand ) 4.10 + { 4.11 + if ( guest_physmap_mark_populate_on_demand(d, gpfn, 4.12 + a->extent_order) < 0 ) 4.13 + goto out; 4.14 + } 4.15 + else 4.16 { 4.17 - gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " 4.18 - "id=%d memflags=%x (%ld of %d)\n", 4.19 - a->extent_order, d->domain_id, a->memflags, 4.20 - i, a->nr_extents); 4.21 - goto out; 4.22 - } 4.23 + page = alloc_domheap_pages(d, a->extent_order, a->memflags); 4.24 + if ( unlikely(page == NULL) ) 4.25 + { 4.26 + gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " 4.27 + "id=%d memflags=%x (%ld of %d)\n", 4.28 + a->extent_order, d->domain_id, a->memflags, 4.29 + i, a->nr_extents); 4.30 + goto out; 4.31 + } 4.32 4.33 - mfn = page_to_mfn(page); 4.34 - guest_physmap_add_page(d, gpfn, mfn, a->extent_order); 4.35 + mfn = page_to_mfn(page); 4.36 + guest_physmap_add_page(d, gpfn, mfn, a->extent_order); 4.37 4.38 - if ( !paging_mode_translate(d) ) 4.39 - { 4.40 - for ( j = 0; j < (1 << a->extent_order); j++ ) 4.41 - set_gpfn_from_mfn(mfn + j, gpfn + j); 4.42 + if ( !paging_mode_translate(d) ) 4.43 + { 4.44 + for ( j = 0; j < (1 << a->extent_order); j++ ) 4.45 + set_gpfn_from_mfn(mfn + j, gpfn + j); 4.46 4.47 - /* Inform the domain of the new page's machine address. */ 4.48 - if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) 4.49 - goto out; 4.50 + /* Inform the domain of the new page's machine address. */ 4.51 + if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) 4.52 + goto out; 4.53 + } 4.54 } 4.55 } 4.56 4.57 - out: 4.58 +out: 4.59 a->nr_done = i; 4.60 } 4.61 4.62 @@ -527,6 +536,10 @@ long do_memory_op(unsigned long cmd, XEN 4.63 4.64 args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); 4.65 4.66 + if ( op == XENMEM_populate_physmap 4.67 + && (reservation.mem_flags & XENMEMF_populate_on_demand) ) 4.68 + args.memflags |= MEMF_populate_on_demand; 4.69 + 4.70 if ( likely(reservation.domid == DOMID_SELF) ) 4.71 { 4.72 d = rcu_lock_current_domain();
5.1 --- a/xen/include/asm-x86/p2m.h Mon Jan 05 10:45:09 2009 +0000 5.2 +++ b/xen/include/asm-x86/p2m.h Mon Jan 05 10:45:48 2009 +0000 5.3 @@ -261,6 +261,10 @@ void p2m_pod_dump_data(struct domain *d) 5.4 * (usually in preparation for domain destruction) */ 5.5 void p2m_pod_empty_cache(struct domain *d); 5.6 5.7 +/* Set populate-on-demand cache size so that the total memory allocated to a 5.8 + * domain matches target */ 5.9 +int p2m_pod_set_mem_target(struct domain *d, unsigned long target); 5.10 + 5.11 /* Call when decreasing memory reservation to handle PoD entries properly. 5.12 * Will return '1' if all entries were handled and nothing more need be done.*/ 5.13 int
6.1 --- a/xen/include/public/memory.h Mon Jan 05 10:45:09 2009 +0000 6.2 +++ b/xen/include/public/memory.h Mon Jan 05 10:45:48 2009 +0000 6.3 @@ -48,6 +48,8 @@ 6.4 /* NUMA node to allocate from. */ 6.5 #define XENMEMF_node(x) (((x) + 1) << 8) 6.6 #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) 6.7 +/* Flag to populate physmap with populate-on-demand entries */ 6.8 +#define XENMEMF_populate_on_demand (1<<16) 6.9 #endif 6.10 6.11 struct xen_memory_reservation { 6.12 @@ -299,6 +301,19 @@ struct xen_foreign_memory_map { 6.13 typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; 6.14 DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); 6.15 6.16 +#define XENMEM_set_pod_target 16 6.17 +#define XENMEM_get_pod_target 17 6.18 +struct xen_pod_target { 6.19 + /* IN */ 6.20 + uint64_t target_pages; 6.21 + /* OUT */ 6.22 + uint64_t tot_pages; 6.23 + uint64_t pod_cache_pages; 6.24 + uint64_t pod_entries; 6.25 + /* IN */ 6.26 + domid_t domid; 6.27 +}; 6.28 +typedef struct xen_pod_target xen_pod_target_t; 6.29 #endif /* __XEN_PUBLIC_MEMORY_H__ */ 6.30 6.31 /*
7.1 --- a/xen/include/xen/hypercall.h Mon Jan 05 10:45:09 2009 +0000 7.2 +++ b/xen/include/xen/hypercall.h Mon Jan 05 10:45:48 2009 +0000 7.3 @@ -48,7 +48,7 @@ do_platform_op( 7.4 * at what point in the page list to resume. For this purpose I steal the 7.5 * high-order bits of the @cmd parameter, which are otherwise unused and zero. 7.6 */ 7.7 -#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ 7.8 +#define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */ 7.9 #define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1) 7.10 7.11 extern long
8.1 --- a/xen/include/xen/mm.h Mon Jan 05 10:45:09 2009 +0000 8.2 +++ b/xen/include/xen/mm.h Mon Jan 05 10:45:48 2009 +0000 8.3 @@ -72,6 +72,8 @@ int assign_pages( 8.4 /* memflags: */ 8.5 #define _MEMF_no_refcount 0 8.6 #define MEMF_no_refcount (1U<<_MEMF_no_refcount) 8.7 +#define _MEMF_populate_on_demand 1 8.8 +#define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand) 8.9 #define _MEMF_node 8 8.10 #define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node) 8.11 #define _MEMF_bits 24
9.1 --- a/xen/include/xlat.lst Mon Jan 05 10:45:09 2009 +0000 9.2 +++ b/xen/include/xlat.lst Mon Jan 05 10:45:48 2009 +0000 9.3 @@ -38,6 +38,7 @@ 9.4 ! memory_exchange memory.h 9.5 ! memory_map memory.h 9.6 ! memory_reservation memory.h 9.7 +! pod_target memory.h 9.8 ! translate_gpfn_list memory.h 9.9 ! sched_poll sched.h 9.10 ? sched_remote_shutdown sched.h