debuggers.hg

view xen/common/memory.c @ 16586:cd5e1e76d0bc

32-on-64: Fix domain address-size clamping, implement
copy-on-grant-transfer, and eliminate 166GB memory limit for x86/64
Xen.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Dec 06 13:39:19 2007 +0000 (2007-12-06)
parents 2717128cbdd1
children baf90ee3c1da
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/paging.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <asm/current.h>
23 #include <asm/hardirq.h>
24 #include <public/memory.h>
25 #include <xsm/xsm.h>
27 struct memop_args {
28 /* INPUT */
29 struct domain *domain; /* Domain to be affected. */
30 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
31 unsigned int nr_extents; /* Number of extents to allocate or free. */
32 unsigned int extent_order; /* Size of each extent. */
33 unsigned int memflags; /* Allocation flags. */
35 /* INPUT/OUTPUT */
36 unsigned int nr_done; /* Number of extents processed so far. */
37 int preempted; /* Was the hypercall preempted? */
38 };
40 static unsigned int select_local_cpu(struct domain *d)
41 {
42 struct vcpu *v = d->vcpu[0];
43 return (v ? v->processor : 0);
44 }
46 static void increase_reservation(struct memop_args *a)
47 {
48 struct page_info *page;
49 unsigned long i;
50 xen_pfn_t mfn;
51 struct domain *d = a->domain;
52 unsigned int cpu = select_local_cpu(d);
54 if ( !guest_handle_is_null(a->extent_list) &&
55 !guest_handle_okay(a->extent_list, a->nr_extents) )
56 return;
58 if ( (a->extent_order != 0) &&
59 !multipage_allocation_permitted(current->domain) )
60 return;
62 for ( i = a->nr_done; i < a->nr_extents; i++ )
63 {
64 if ( hypercall_preempt_check() )
65 {
66 a->preempted = 1;
67 goto out;
68 }
70 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
71 if ( unlikely(page == NULL) )
72 {
73 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
74 "id=%d memflags=%x (%ld of %d)\n",
75 a->extent_order, d->domain_id, a->memflags,
76 i, a->nr_extents);
77 goto out;
78 }
80 /* Inform the domain of the new page's machine address. */
81 if ( !guest_handle_is_null(a->extent_list) )
82 {
83 mfn = page_to_mfn(page);
84 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
85 goto out;
86 }
87 }
89 out:
90 a->nr_done = i;
91 }
93 static void populate_physmap(struct memop_args *a)
94 {
95 struct page_info *page;
96 unsigned long i, j;
97 xen_pfn_t gpfn, mfn;
98 struct domain *d = a->domain;
99 unsigned int cpu = select_local_cpu(d);
101 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
102 return;
104 if ( (a->extent_order != 0) &&
105 !multipage_allocation_permitted(current->domain) )
106 return;
108 for ( i = a->nr_done; i < a->nr_extents; i++ )
109 {
110 if ( hypercall_preempt_check() )
111 {
112 a->preempted = 1;
113 goto out;
114 }
116 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
117 goto out;
119 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
120 if ( unlikely(page == NULL) )
121 {
122 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
123 "id=%d memflags=%x (%ld of %d)\n",
124 a->extent_order, d->domain_id, a->memflags,
125 i, a->nr_extents);
126 goto out;
127 }
129 mfn = page_to_mfn(page);
131 if ( unlikely(paging_mode_translate(d)) )
132 {
133 for ( j = 0; j < (1 << a->extent_order); j++ )
134 if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
135 goto out;
136 }
137 else
138 {
139 for ( j = 0; j < (1 << a->extent_order); j++ )
140 set_gpfn_from_mfn(mfn + j, gpfn + j);
142 /* Inform the domain of the new page's machine address. */
143 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
144 goto out;
145 }
146 }
148 out:
149 a->nr_done = i;
150 }
152 int guest_remove_page(struct domain *d, unsigned long gmfn)
153 {
154 struct page_info *page;
155 unsigned long mfn;
157 mfn = gmfn_to_mfn(d, gmfn);
158 if ( unlikely(!mfn_valid(mfn)) )
159 {
160 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
161 d->domain_id, gmfn);
162 return 0;
163 }
165 page = mfn_to_page(mfn);
166 if ( unlikely(!get_page(page, d)) )
167 {
168 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
169 return 0;
170 }
172 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
173 put_page_and_type(page);
175 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
176 put_page(page);
178 guest_physmap_remove_page(d, gmfn, mfn);
180 put_page(page);
182 return 1;
183 }
185 static void decrease_reservation(struct memop_args *a)
186 {
187 unsigned long i, j;
188 xen_pfn_t gmfn;
190 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
191 return;
193 for ( i = a->nr_done; i < a->nr_extents; i++ )
194 {
195 if ( hypercall_preempt_check() )
196 {
197 a->preempted = 1;
198 goto out;
199 }
201 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
202 goto out;
204 for ( j = 0; j < (1 << a->extent_order); j++ )
205 if ( !guest_remove_page(a->domain, gmfn + j) )
206 goto out;
207 }
209 out:
210 a->nr_done = i;
211 }
213 static long translate_gpfn_list(
214 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
215 {
216 struct xen_translate_gpfn_list op;
217 unsigned long i;
218 xen_pfn_t gpfn;
219 xen_pfn_t mfn;
220 struct domain *d;
221 int rc;
223 if ( copy_from_guest(&op, uop, 1) )
224 return -EFAULT;
226 /* Is size too large for us to encode a continuation? */
227 if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
228 return -EINVAL;
230 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
231 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
232 return -EFAULT;
234 if ( op.domid == DOMID_SELF )
235 op.domid = current->domain->domain_id;
236 else if ( !IS_PRIV(current->domain) )
237 return -EPERM;
239 if ( (d = rcu_lock_domain_by_id(op.domid)) == NULL )
240 return -ESRCH;
242 if ( !paging_mode_translate(d) )
243 {
244 rcu_unlock_domain(d);
245 return -EINVAL;
246 }
248 for ( i = *progress; i < op.nr_gpfns; i++ )
249 {
250 if ( hypercall_preempt_check() )
251 {
252 rcu_unlock_domain(d);
253 *progress = i;
254 return -EAGAIN;
255 }
257 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
258 {
259 rcu_unlock_domain(d);
260 return -EFAULT;
261 }
263 mfn = gmfn_to_mfn(d, gpfn);
265 rc = xsm_translate_gpfn_list(current->domain, mfn);
266 if ( rc )
267 {
268 rcu_unlock_domain(d);
269 return rc;
270 }
272 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
273 {
274 rcu_unlock_domain(d);
275 return -EFAULT;
276 }
277 }
279 rcu_unlock_domain(d);
280 return 0;
281 }
283 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
284 {
285 struct xen_memory_exchange exch;
286 LIST_HEAD(in_chunk_list);
287 LIST_HEAD(out_chunk_list);
288 unsigned long in_chunk_order, out_chunk_order;
289 xen_pfn_t gpfn, gmfn, mfn;
290 unsigned long i, j, k;
291 unsigned int memflags = 0, cpu;
292 long rc = 0;
293 struct domain *d;
294 struct page_info *page;
296 if ( copy_from_guest(&exch, arg, 1) )
297 return -EFAULT;
299 /* Various sanity checks. */
300 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
301 /* Input and output domain identifiers match? */
302 (exch.in.domid != exch.out.domid) ||
303 /* Sizes of input and output lists do not overflow a long? */
304 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
305 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
306 /* Sizes of input and output lists match? */
307 ((exch.in.nr_extents << exch.in.extent_order) !=
308 (exch.out.nr_extents << exch.out.extent_order)) )
309 {
310 rc = -EINVAL;
311 goto fail_early;
312 }
314 /* Only privileged guests can allocate multi-page contiguous extents. */
315 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
316 !multipage_allocation_permitted(current->domain) )
317 {
318 rc = -EPERM;
319 goto fail_early;
320 }
322 if ( exch.in.extent_order <= exch.out.extent_order )
323 {
324 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
325 out_chunk_order = 0;
326 }
327 else
328 {
329 in_chunk_order = 0;
330 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
331 }
333 /*
334 * Only support exchange on calling domain right now. Otherwise there are
335 * tricky corner cases to consider (e.g., dying domain).
336 */
337 if ( unlikely(exch.in.domid != DOMID_SELF) )
338 {
339 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
340 goto fail_early;
341 }
342 d = current->domain;
344 memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
345 d, exch.out.address_bits ? : BITS_PER_LONG));
347 cpu = select_local_cpu(d);
349 for ( i = (exch.nr_exchanged >> in_chunk_order);
350 i < (exch.in.nr_extents >> in_chunk_order);
351 i++ )
352 {
353 if ( hypercall_preempt_check() )
354 {
355 exch.nr_exchanged = i << in_chunk_order;
356 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
357 return -EFAULT;
358 return hypercall_create_continuation(
359 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
360 }
362 /* Steal a chunk's worth of input pages from the domain. */
363 for ( j = 0; j < (1UL << in_chunk_order); j++ )
364 {
365 if ( unlikely(__copy_from_guest_offset(
366 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
367 {
368 rc = -EFAULT;
369 goto fail;
370 }
372 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
373 {
374 mfn = gmfn_to_mfn(d, gmfn + k);
375 if ( unlikely(!mfn_valid(mfn)) )
376 {
377 rc = -EINVAL;
378 goto fail;
379 }
381 page = mfn_to_page(mfn);
383 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
384 {
385 rc = -EINVAL;
386 goto fail;
387 }
389 list_add(&page->list, &in_chunk_list);
390 }
391 }
393 /* Allocate a chunk's worth of anonymous output pages. */
394 for ( j = 0; j < (1UL << out_chunk_order); j++ )
395 {
396 page = __alloc_domheap_pages(
397 NULL, cpu, exch.out.extent_order, memflags);
398 if ( unlikely(page == NULL) )
399 {
400 rc = -ENOMEM;
401 goto fail;
402 }
404 list_add(&page->list, &out_chunk_list);
405 }
407 /*
408 * Success! Beyond this point we cannot fail for this chunk.
409 */
411 /* Destroy final reference to each input page. */
412 while ( !list_empty(&in_chunk_list) )
413 {
414 page = list_entry(in_chunk_list.next, struct page_info, list);
415 list_del(&page->list);
416 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
417 BUG();
418 mfn = page_to_mfn(page);
419 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
420 put_page(page);
421 }
423 /* Assign each output page to the domain. */
424 j = 0;
425 while ( !list_empty(&out_chunk_list) )
426 {
427 page = list_entry(out_chunk_list.next, struct page_info, list);
428 list_del(&page->list);
429 if ( assign_pages(d, page, exch.out.extent_order,
430 MEMF_no_refcount) )
431 BUG();
433 /* Note that we ignore errors accessing the output extent list. */
434 (void)__copy_from_guest_offset(
435 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
437 mfn = page_to_mfn(page);
438 if ( unlikely(paging_mode_translate(d)) )
439 {
440 /* Ignore failure here. There's nothing we can do. */
441 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
442 (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
443 }
444 else
445 {
446 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
447 set_gpfn_from_mfn(mfn + k, gpfn + k);
448 (void)__copy_to_guest_offset(
449 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
450 }
452 j++;
453 }
454 BUG_ON(j != (1UL << out_chunk_order));
455 }
457 exch.nr_exchanged = exch.in.nr_extents;
458 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
459 rc = -EFAULT;
460 return rc;
462 /*
463 * Failed a chunk! Free any partial chunk work. Tell caller how many
464 * chunks succeeded.
465 */
466 fail:
467 /* Reassign any input pages we managed to steal. */
468 while ( !list_empty(&in_chunk_list) )
469 {
470 page = list_entry(in_chunk_list.next, struct page_info, list);
471 list_del(&page->list);
472 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
473 BUG();
474 }
476 /* Free any output pages we managed to allocate. */
477 while ( !list_empty(&out_chunk_list) )
478 {
479 page = list_entry(out_chunk_list.next, struct page_info, list);
480 list_del(&page->list);
481 free_domheap_pages(page, exch.out.extent_order);
482 }
484 exch.nr_exchanged = i << in_chunk_order;
486 fail_early:
487 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
488 rc = -EFAULT;
489 return rc;
490 }
492 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
493 {
494 struct domain *d;
495 int rc, op;
496 unsigned long start_extent, progress;
497 struct xen_memory_reservation reservation;
498 struct memop_args args;
499 domid_t domid;
501 op = cmd & MEMOP_CMD_MASK;
503 switch ( op )
504 {
505 case XENMEM_increase_reservation:
506 case XENMEM_decrease_reservation:
507 case XENMEM_populate_physmap:
508 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
510 if ( copy_from_guest(&reservation, arg, 1) )
511 return start_extent;
513 /* Is size too large for us to encode a continuation? */
514 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
515 return start_extent;
517 if ( unlikely(start_extent > reservation.nr_extents) )
518 return start_extent;
520 args.extent_list = reservation.extent_start;
521 args.nr_extents = reservation.nr_extents;
522 args.extent_order = reservation.extent_order;
523 args.nr_done = start_extent;
524 args.preempted = 0;
525 args.memflags = 0;
527 if ( (reservation.address_bits != 0) &&
528 (reservation.address_bits <
529 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
530 {
531 if ( reservation.address_bits <= PAGE_SHIFT )
532 return start_extent;
533 args.memflags = MEMF_bits(reservation.address_bits);
534 }
536 if ( likely(reservation.domid == DOMID_SELF) )
537 d = current->domain;
538 else if ( !IS_PRIV(current->domain) ||
539 ((d = rcu_lock_domain_by_id(reservation.domid)) == NULL) )
540 return start_extent;
541 args.domain = d;
543 rc = xsm_memory_adjust_reservation(current->domain, d);
544 if ( rc )
545 {
546 if ( reservation.domid != DOMID_SELF )
547 rcu_unlock_domain(d);
548 return rc;
549 }
551 switch ( op )
552 {
553 case XENMEM_increase_reservation:
554 increase_reservation(&args);
555 break;
556 case XENMEM_decrease_reservation:
557 decrease_reservation(&args);
558 break;
559 default: /* XENMEM_populate_physmap */
560 populate_physmap(&args);
561 break;
562 }
564 if ( unlikely(reservation.domid != DOMID_SELF) )
565 rcu_unlock_domain(d);
567 rc = args.nr_done;
569 if ( args.preempted )
570 return hypercall_create_continuation(
571 __HYPERVISOR_memory_op, "lh",
572 op | (rc << MEMOP_EXTENT_SHIFT), arg);
574 break;
576 case XENMEM_exchange:
577 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
578 break;
580 case XENMEM_maximum_ram_page:
581 rc = max_page;
582 break;
584 case XENMEM_current_reservation:
585 case XENMEM_maximum_reservation:
586 case XENMEM_maximum_gpfn:
587 if ( copy_from_guest(&domid, arg, 1) )
588 return -EFAULT;
590 if ( likely(domid == DOMID_SELF) )
591 d = current->domain;
592 else if ( !IS_PRIV(current->domain) )
593 return -EPERM;
594 else if ( (d = rcu_lock_domain_by_id(domid)) == NULL )
595 return -ESRCH;
597 rc = xsm_memory_stat_reservation(current->domain, d);
598 if ( rc )
599 {
600 if ( domid != DOMID_SELF )
601 rcu_unlock_domain(d);
602 return rc;
603 }
605 switch ( op )
606 {
607 case XENMEM_current_reservation:
608 rc = d->tot_pages;
609 break;
610 case XENMEM_maximum_reservation:
611 rc = d->max_pages;
612 break;
613 default:
614 ASSERT(op == XENMEM_maximum_gpfn);
615 rc = domain_get_maximum_gpfn(d);
616 break;
617 }
619 if ( unlikely(domid != DOMID_SELF) )
620 rcu_unlock_domain(d);
622 break;
624 case XENMEM_translate_gpfn_list:
625 progress = cmd >> MEMOP_EXTENT_SHIFT;
626 rc = translate_gpfn_list(
627 guest_handle_cast(arg, xen_translate_gpfn_list_t),
628 &progress);
629 if ( rc == -EAGAIN )
630 return hypercall_create_continuation(
631 __HYPERVISOR_memory_op, "lh",
632 op | (progress << MEMOP_EXTENT_SHIFT), arg);
633 break;
635 default:
636 rc = arch_memory_op(op, arg);
637 break;
638 }
640 return rc;
641 }
643 /*
644 * Local variables:
645 * mode: C
646 * c-set-style: "BSD"
647 * c-basic-offset: 4
648 * tab-width: 4
649 * indent-tabs-mode: nil
650 * End:
651 */