debuggers.hg

view xen/common/memory.c @ 19968:0d4406bc5cb7

Allow XENMEM_exchange to support exchange on foreign domains.

Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 12:17:05 2009 +0100 (2009-07-13)
parents 2d68d518038b
children 809b20f066fb 27d816a4bfb2
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/paging.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <asm/current.h>
23 #include <asm/hardirq.h>
24 #include <xen/numa.h>
25 #include <public/memory.h>
26 #include <xsm/xsm.h>
28 struct memop_args {
29 /* INPUT */
30 struct domain *domain; /* Domain to be affected. */
31 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
32 unsigned int nr_extents; /* Number of extents to allocate or free. */
33 unsigned int extent_order; /* Size of each extent. */
34 unsigned int memflags; /* Allocation flags. */
36 /* INPUT/OUTPUT */
37 unsigned int nr_done; /* Number of extents processed so far. */
38 int preempted; /* Was the hypercall preempted? */
39 };
41 static void increase_reservation(struct memop_args *a)
42 {
43 struct page_info *page;
44 unsigned long i;
45 xen_pfn_t mfn;
46 struct domain *d = a->domain;
48 if ( !guest_handle_is_null(a->extent_list) &&
49 !guest_handle_subrange_okay(a->extent_list, a->nr_done,
50 a->nr_extents-1) )
51 return;
53 if ( !multipage_allocation_permitted(current->domain, a->extent_order) )
54 return;
56 for ( i = a->nr_done; i < a->nr_extents; i++ )
57 {
58 if ( hypercall_preempt_check() )
59 {
60 a->preempted = 1;
61 goto out;
62 }
64 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
65 if ( unlikely(page == NULL) )
66 {
67 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
68 "id=%d memflags=%x (%ld of %d)\n",
69 a->extent_order, d->domain_id, a->memflags,
70 i, a->nr_extents);
71 goto out;
72 }
74 /* Inform the domain of the new page's machine address. */
75 if ( !guest_handle_is_null(a->extent_list) )
76 {
77 mfn = page_to_mfn(page);
78 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
79 goto out;
80 }
81 }
83 out:
84 a->nr_done = i;
85 }
87 static void populate_physmap(struct memop_args *a)
88 {
89 struct page_info *page;
90 unsigned long i, j;
91 xen_pfn_t gpfn, mfn;
92 struct domain *d = a->domain;
94 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
95 a->nr_extents-1) )
96 return;
98 if ( !multipage_allocation_permitted(current->domain, a->extent_order) )
99 return;
101 for ( i = a->nr_done; i < a->nr_extents; i++ )
102 {
103 if ( hypercall_preempt_check() )
104 {
105 a->preempted = 1;
106 goto out;
107 }
109 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
110 goto out;
112 if ( a->memflags & MEMF_populate_on_demand )
113 {
114 if ( guest_physmap_mark_populate_on_demand(d, gpfn,
115 a->extent_order) < 0 )
116 goto out;
117 }
118 else
119 {
120 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
121 if ( unlikely(page == NULL) )
122 {
123 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
124 "id=%d memflags=%x (%ld of %d)\n",
125 a->extent_order, d->domain_id, a->memflags,
126 i, a->nr_extents);
127 goto out;
128 }
130 mfn = page_to_mfn(page);
131 guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
133 if ( !paging_mode_translate(d) )
134 {
135 for ( j = 0; j < (1 << a->extent_order); j++ )
136 set_gpfn_from_mfn(mfn + j, gpfn + j);
138 /* Inform the domain of the new page's machine address. */
139 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
140 goto out;
141 }
142 }
143 }
145 out:
146 a->nr_done = i;
147 }
149 int guest_remove_page(struct domain *d, unsigned long gmfn)
150 {
151 struct page_info *page;
152 unsigned long mfn;
154 mfn = gmfn_to_mfn(d, gmfn);
155 if ( unlikely(!mfn_valid(mfn)) )
156 {
157 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
158 d->domain_id, gmfn);
159 return 0;
160 }
162 page = mfn_to_page(mfn);
163 if ( unlikely(!get_page(page, d)) )
164 {
165 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
166 return 0;
167 }
169 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
170 put_page_and_type(page);
172 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
173 put_page(page);
175 guest_physmap_remove_page(d, gmfn, mfn, 0);
177 put_page(page);
179 return 1;
180 }
182 static void decrease_reservation(struct memop_args *a)
183 {
184 unsigned long i, j;
185 xen_pfn_t gmfn;
187 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
188 a->nr_extents-1) )
189 return;
191 for ( i = a->nr_done; i < a->nr_extents; i++ )
192 {
193 if ( hypercall_preempt_check() )
194 {
195 a->preempted = 1;
196 goto out;
197 }
199 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
200 goto out;
202 /* See if populate-on-demand wants to handle this */
203 if ( is_hvm_domain(a->domain)
204 && p2m_pod_decrease_reservation(a->domain, gmfn, a->extent_order) )
205 continue;
207 for ( j = 0; j < (1 << a->extent_order); j++ )
208 if ( !guest_remove_page(a->domain, gmfn + j) )
209 goto out;
210 }
212 out:
213 a->nr_done = i;
214 }
216 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
217 {
218 struct xen_memory_exchange exch;
219 PAGE_LIST_HEAD(in_chunk_list);
220 PAGE_LIST_HEAD(out_chunk_list);
221 unsigned long in_chunk_order, out_chunk_order;
222 xen_pfn_t gpfn, gmfn, mfn;
223 unsigned long i, j, k;
224 unsigned int node, memflags = 0;
225 long rc = 0;
226 struct domain *d;
227 struct page_info *page;
229 if ( copy_from_guest(&exch, arg, 1) )
230 return -EFAULT;
232 /* Various sanity checks. */
233 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
234 /* Input and output domain identifiers match? */
235 (exch.in.domid != exch.out.domid) ||
236 /* Sizes of input and output lists do not overflow a long? */
237 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
238 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
239 /* Sizes of input and output lists match? */
240 ((exch.in.nr_extents << exch.in.extent_order) !=
241 (exch.out.nr_extents << exch.out.extent_order)) )
242 {
243 rc = -EINVAL;
244 goto fail_early;
245 }
247 /* Only privileged guests can allocate multi-page contiguous extents. */
248 if ( !multipage_allocation_permitted(current->domain,
249 exch.in.extent_order) ||
250 !multipage_allocation_permitted(current->domain,
251 exch.out.extent_order) )
252 {
253 rc = -EPERM;
254 goto fail_early;
255 }
257 if ( exch.in.extent_order <= exch.out.extent_order )
258 {
259 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
260 out_chunk_order = 0;
261 }
262 else
263 {
264 in_chunk_order = 0;
265 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
266 }
268 if ( likely(exch.in.domid == DOMID_SELF) )
269 {
270 d = rcu_lock_current_domain();
271 }
272 else
273 {
274 if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
275 goto fail_early;
277 if ( !IS_PRIV_FOR(current->domain, d) )
278 {
279 rcu_unlock_domain(d);
280 rc = -EPERM;
281 goto fail_early;
282 }
283 }
285 memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
286 d,
287 XENMEMF_get_address_bits(exch.out.mem_flags) ? :
288 (BITS_PER_LONG+PAGE_SHIFT)));
289 node = XENMEMF_get_node(exch.out.mem_flags);
290 if ( node == NUMA_NO_NODE )
291 node = domain_to_node(d);
292 memflags |= MEMF_node(node);
294 for ( i = (exch.nr_exchanged >> in_chunk_order);
295 i < (exch.in.nr_extents >> in_chunk_order);
296 i++ )
297 {
298 if ( hypercall_preempt_check() )
299 {
300 exch.nr_exchanged = i << in_chunk_order;
301 rcu_unlock_domain(d);
302 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
303 return -EFAULT;
304 return hypercall_create_continuation(
305 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
306 }
308 /* Steal a chunk's worth of input pages from the domain. */
309 for ( j = 0; j < (1UL << in_chunk_order); j++ )
310 {
311 if ( unlikely(__copy_from_guest_offset(
312 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
313 {
314 rc = -EFAULT;
315 goto fail;
316 }
318 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
319 {
320 mfn = gmfn_to_mfn(d, gmfn + k);
321 if ( unlikely(!mfn_valid(mfn)) )
322 {
323 rc = -EINVAL;
324 goto fail;
325 }
327 page = mfn_to_page(mfn);
329 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
330 {
331 rc = -EINVAL;
332 goto fail;
333 }
335 page_list_add(page, &in_chunk_list);
336 }
337 }
339 /* Allocate a chunk's worth of anonymous output pages. */
340 for ( j = 0; j < (1UL << out_chunk_order); j++ )
341 {
342 page = alloc_domheap_pages(NULL, exch.out.extent_order, memflags);
343 if ( unlikely(page == NULL) )
344 {
345 rc = -ENOMEM;
346 goto fail;
347 }
349 page_list_add(page, &out_chunk_list);
350 }
352 /*
353 * Success! Beyond this point we cannot fail for this chunk.
354 */
356 /* Destroy final reference to each input page. */
357 while ( (page = page_list_remove_head(&in_chunk_list)) )
358 {
359 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
360 BUG();
361 mfn = page_to_mfn(page);
362 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
363 put_page(page);
364 }
366 /* Assign each output page to the domain. */
367 j = 0;
368 while ( (page = page_list_remove_head(&out_chunk_list)) )
369 {
370 if ( assign_pages(d, page, exch.out.extent_order,
371 MEMF_no_refcount) )
372 {
373 unsigned long dec_count;
374 bool_t drop_dom_ref;
376 /*
377 * Pages in in_chunk_list is stolen without
378 * decreasing the tot_pages. If the domain is dying when
379 * assign pages, we need decrease the count. For those pages
380 * that has been assigned, it should be covered by
381 * domain_relinquish_resources().
382 */
383 dec_count = (((1UL << exch.in.extent_order) *
384 (1UL << in_chunk_order)) -
385 (j * (1UL << exch.out.extent_order)));
387 spin_lock(&d->page_alloc_lock);
388 d->tot_pages -= dec_count;
389 drop_dom_ref = (dec_count && !d->tot_pages);
390 spin_unlock(&d->page_alloc_lock);
392 if ( drop_dom_ref )
393 put_domain(d);
395 free_domheap_pages(page, exch.out.extent_order);
396 goto dying;
397 }
399 /* Note that we ignore errors accessing the output extent list. */
400 (void)__copy_from_guest_offset(
401 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
403 mfn = page_to_mfn(page);
404 guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
406 if ( !paging_mode_translate(d) )
407 {
408 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
409 set_gpfn_from_mfn(mfn + k, gpfn + k);
410 (void)__copy_to_guest_offset(
411 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
412 }
413 j++;
414 }
415 BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) );
416 }
418 exch.nr_exchanged = exch.in.nr_extents;
419 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
420 rc = -EFAULT;
421 rcu_unlock_domain(d);
422 return rc;
424 /*
425 * Failed a chunk! Free any partial chunk work. Tell caller how many
426 * chunks succeeded.
427 */
428 fail:
429 /* Reassign any input pages we managed to steal. */
430 while ( (page = page_list_remove_head(&in_chunk_list)) )
431 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
432 BUG();
433 dying:
434 rcu_unlock_domain(d);
435 /* Free any output pages we managed to allocate. */
436 while ( (page = page_list_remove_head(&out_chunk_list)) )
437 free_domheap_pages(page, exch.out.extent_order);
439 exch.nr_exchanged = i << in_chunk_order;
441 fail_early:
442 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
443 rc = -EFAULT;
444 return rc;
445 }
447 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
448 {
449 struct domain *d;
450 int rc, op;
451 unsigned int address_bits;
452 unsigned long start_extent;
453 struct xen_memory_reservation reservation;
454 struct memop_args args;
455 domid_t domid;
457 op = cmd & MEMOP_CMD_MASK;
459 switch ( op )
460 {
461 case XENMEM_increase_reservation:
462 case XENMEM_decrease_reservation:
463 case XENMEM_populate_physmap:
464 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
466 if ( copy_from_guest(&reservation, arg, 1) )
467 return start_extent;
469 /* Is size too large for us to encode a continuation? */
470 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
471 return start_extent;
473 if ( unlikely(start_extent > reservation.nr_extents) )
474 return start_extent;
476 args.extent_list = reservation.extent_start;
477 args.nr_extents = reservation.nr_extents;
478 args.extent_order = reservation.extent_order;
479 args.nr_done = start_extent;
480 args.preempted = 0;
481 args.memflags = 0;
483 address_bits = XENMEMF_get_address_bits(reservation.mem_flags);
484 if ( (address_bits != 0) &&
485 (address_bits < (get_order_from_pages(max_page) + PAGE_SHIFT)) )
486 {
487 if ( address_bits <= PAGE_SHIFT )
488 return start_extent;
489 args.memflags = MEMF_bits(address_bits);
490 }
492 args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
494 if ( op == XENMEM_populate_physmap
495 && (reservation.mem_flags & XENMEMF_populate_on_demand) )
496 args.memflags |= MEMF_populate_on_demand;
498 if ( likely(reservation.domid == DOMID_SELF) )
499 {
500 d = rcu_lock_current_domain();
501 }
502 else
503 {
504 if ( (d = rcu_lock_domain_by_id(reservation.domid)) == NULL )
505 return start_extent;
506 if ( !IS_PRIV_FOR(current->domain, d) )
507 {
508 rcu_unlock_domain(d);
509 return start_extent;
510 }
511 }
512 args.domain = d;
514 rc = xsm_memory_adjust_reservation(current->domain, d);
515 if ( rc )
516 {
517 rcu_unlock_domain(d);
518 return rc;
519 }
521 switch ( op )
522 {
523 case XENMEM_increase_reservation:
524 increase_reservation(&args);
525 break;
526 case XENMEM_decrease_reservation:
527 decrease_reservation(&args);
528 break;
529 default: /* XENMEM_populate_physmap */
530 populate_physmap(&args);
531 break;
532 }
534 rcu_unlock_domain(d);
536 rc = args.nr_done;
538 if ( args.preempted )
539 return hypercall_create_continuation(
540 __HYPERVISOR_memory_op, "lh",
541 op | (rc << MEMOP_EXTENT_SHIFT), arg);
543 break;
545 case XENMEM_exchange:
546 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
547 break;
549 case XENMEM_maximum_ram_page:
550 rc = max_page;
551 break;
553 case XENMEM_current_reservation:
554 case XENMEM_maximum_reservation:
555 case XENMEM_maximum_gpfn:
556 if ( copy_from_guest(&domid, arg, 1) )
557 return -EFAULT;
559 rc = rcu_lock_target_domain_by_id(domid, &d);
560 if ( rc )
561 return rc;
563 rc = xsm_memory_stat_reservation(current->domain, d);
564 if ( rc )
565 {
566 rcu_unlock_domain(d);
567 return rc;
568 }
570 switch ( op )
571 {
572 case XENMEM_current_reservation:
573 rc = d->tot_pages;
574 break;
575 case XENMEM_maximum_reservation:
576 rc = d->max_pages;
577 break;
578 default:
579 ASSERT(op == XENMEM_maximum_gpfn);
580 rc = domain_get_maximum_gpfn(d);
581 break;
582 }
584 rcu_unlock_domain(d);
586 break;
588 default:
589 rc = arch_memory_op(op, arg);
590 break;
591 }
593 return rc;
594 }
596 /*
597 * Local variables:
598 * mode: C
599 * c-set-style: "BSD"
600 * c-basic-offset: 4
601 * tab-width: 4
602 * indent-tabs-mode: nil
603 * End:
604 */