debuggers.hg

view xen/common/memory.c @ 20924:6ade83cb21ca

xentrace: Trace p2m events

Add more tracing to aid in debugging ballooning / PoD:
* Nested page faults for EPT/NPT systems
* set_p2m_enry
* Decrease reservation (for ballooning)
* PoD populate, zero reclaim, superpage splinter

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 03 09:35:23 2010 +0000 (2010-02-03)
parents 3984b8c7fa25
children 81d785fae7af
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/paging.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <xen/tmem.h>
23 #include <asm/current.h>
24 #include <asm/hardirq.h>
25 #ifdef CONFIG_X86
26 # include <asm/p2m.h>
27 #endif
28 #include <xen/numa.h>
29 #include <public/memory.h>
30 #include <xsm/xsm.h>
31 #include <xen/trace.h>
33 struct memop_args {
34 /* INPUT */
35 struct domain *domain; /* Domain to be affected. */
36 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
37 unsigned int nr_extents; /* Number of extents to allocate or free. */
38 unsigned int extent_order; /* Size of each extent. */
39 unsigned int memflags; /* Allocation flags. */
41 /* INPUT/OUTPUT */
42 unsigned int nr_done; /* Number of extents processed so far. */
43 int preempted; /* Was the hypercall preempted? */
44 };
46 static void increase_reservation(struct memop_args *a)
47 {
48 struct page_info *page;
49 unsigned long i;
50 xen_pfn_t mfn;
51 struct domain *d = a->domain;
53 if ( !guest_handle_is_null(a->extent_list) &&
54 !guest_handle_subrange_okay(a->extent_list, a->nr_done,
55 a->nr_extents-1) )
56 return;
58 if ( !multipage_allocation_permitted(current->domain, a->extent_order) )
59 return;
61 for ( i = a->nr_done; i < a->nr_extents; i++ )
62 {
63 if ( hypercall_preempt_check() )
64 {
65 a->preempted = 1;
66 goto out;
67 }
69 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
70 if ( unlikely(page == NULL) )
71 {
72 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
73 "id=%d memflags=%x (%ld of %d)\n",
74 a->extent_order, d->domain_id, a->memflags,
75 i, a->nr_extents);
76 goto out;
77 }
79 /* Inform the domain of the new page's machine address. */
80 if ( !guest_handle_is_null(a->extent_list) )
81 {
82 mfn = page_to_mfn(page);
83 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
84 goto out;
85 }
86 }
88 out:
89 a->nr_done = i;
90 }
92 static void populate_physmap(struct memop_args *a)
93 {
94 struct page_info *page;
95 unsigned long i, j;
96 xen_pfn_t gpfn, mfn;
97 struct domain *d = a->domain;
99 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
100 a->nr_extents-1) )
101 return;
103 if ( !multipage_allocation_permitted(current->domain, a->extent_order) )
104 return;
106 for ( i = a->nr_done; i < a->nr_extents; i++ )
107 {
108 if ( hypercall_preempt_check() )
109 {
110 a->preempted = 1;
111 goto out;
112 }
114 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
115 goto out;
117 if ( a->memflags & MEMF_populate_on_demand )
118 {
119 if ( guest_physmap_mark_populate_on_demand(d, gpfn,
120 a->extent_order) < 0 )
121 goto out;
122 }
123 else
124 {
125 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
126 if ( unlikely(page == NULL) )
127 {
128 if ( !opt_tmem || (a->extent_order != 0) )
129 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"
130 " id=%d memflags=%x (%ld of %d)\n",
131 a->extent_order, d->domain_id, a->memflags,
132 i, a->nr_extents);
133 goto out;
134 }
136 mfn = page_to_mfn(page);
137 guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
139 if ( !paging_mode_translate(d) )
140 {
141 for ( j = 0; j < (1 << a->extent_order); j++ )
142 set_gpfn_from_mfn(mfn + j, gpfn + j);
144 /* Inform the domain of the new page's machine address. */
145 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
146 goto out;
147 }
148 }
149 }
151 out:
152 a->nr_done = i;
153 }
155 int guest_remove_page(struct domain *d, unsigned long gmfn)
156 {
157 struct page_info *page;
158 #ifdef CONFIG_X86
159 p2m_type_t p2mt;
160 #endif
161 unsigned long mfn;
163 #ifdef CONFIG_X86
164 mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
165 #else
166 mfn = gmfn_to_mfn(d, gmfn);
167 #endif
168 if ( unlikely(!mfn_valid(mfn)) )
169 {
170 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
171 d->domain_id, gmfn);
172 return 0;
173 }
175 page = mfn_to_page(mfn);
176 #ifdef CONFIG_X86
177 /* If gmfn is shared, just drop the guest reference (which may or may not
178 * free the page) */
179 if(p2m_is_shared(p2mt))
180 {
181 put_page_and_type(page);
182 guest_physmap_remove_page(d, gmfn, mfn, 0);
183 return 1;
184 }
186 #endif /* CONFIG_X86 */
187 if ( unlikely(!get_page(page, d)) )
188 {
189 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
190 return 0;
191 }
193 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
194 put_page_and_type(page);
196 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
197 put_page(page);
199 guest_physmap_remove_page(d, gmfn, mfn, 0);
201 put_page(page);
203 return 1;
204 }
206 static void decrease_reservation(struct memop_args *a)
207 {
208 unsigned long i, j;
209 xen_pfn_t gmfn;
211 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
212 a->nr_extents-1) )
213 return;
215 for ( i = a->nr_done; i < a->nr_extents; i++ )
216 {
217 if ( hypercall_preempt_check() )
218 {
219 a->preempted = 1;
220 goto out;
221 }
223 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
224 goto out;
226 if ( tb_init_done )
227 {
228 struct {
229 u64 gfn;
230 int d:16,order:16;
231 } t;
233 t.gfn = gmfn;
234 t.d = a->domain->domain_id;
235 t.order = a->extent_order;
237 __trace_var(TRC_MEM_DECREASE_RESERVATION, 0, sizeof(t), (unsigned char *)&t);
238 }
240 /* See if populate-on-demand wants to handle this */
241 if ( is_hvm_domain(a->domain)
242 && p2m_pod_decrease_reservation(a->domain, gmfn, a->extent_order) )
243 continue;
245 for ( j = 0; j < (1 << a->extent_order); j++ )
246 if ( !guest_remove_page(a->domain, gmfn + j) )
247 goto out;
248 }
250 out:
251 a->nr_done = i;
252 }
254 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
255 {
256 struct xen_memory_exchange exch;
257 PAGE_LIST_HEAD(in_chunk_list);
258 PAGE_LIST_HEAD(out_chunk_list);
259 unsigned long in_chunk_order, out_chunk_order;
260 xen_pfn_t gpfn, gmfn, mfn;
261 unsigned long i, j, k;
262 unsigned int node, memflags = 0;
263 long rc = 0;
264 struct domain *d;
265 struct page_info *page;
267 if ( copy_from_guest(&exch, arg, 1) )
268 return -EFAULT;
270 /* Various sanity checks. */
271 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
272 /* Input and output domain identifiers match? */
273 (exch.in.domid != exch.out.domid) ||
274 /* Sizes of input and output lists do not overflow a long? */
275 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
276 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
277 /* Sizes of input and output lists match? */
278 ((exch.in.nr_extents << exch.in.extent_order) !=
279 (exch.out.nr_extents << exch.out.extent_order)) )
280 {
281 rc = -EINVAL;
282 goto fail_early;
283 }
285 /* Only privileged guests can allocate multi-page contiguous extents. */
286 if ( !multipage_allocation_permitted(current->domain,
287 exch.in.extent_order) ||
288 !multipage_allocation_permitted(current->domain,
289 exch.out.extent_order) )
290 {
291 rc = -EPERM;
292 goto fail_early;
293 }
295 if ( exch.in.extent_order <= exch.out.extent_order )
296 {
297 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
298 out_chunk_order = 0;
299 }
300 else
301 {
302 in_chunk_order = 0;
303 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
304 }
306 if ( likely(exch.in.domid == DOMID_SELF) )
307 {
308 d = rcu_lock_current_domain();
309 }
310 else
311 {
312 if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
313 goto fail_early;
315 if ( !IS_PRIV_FOR(current->domain, d) )
316 {
317 rcu_unlock_domain(d);
318 rc = -EPERM;
319 goto fail_early;
320 }
321 }
323 memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
324 d,
325 XENMEMF_get_address_bits(exch.out.mem_flags) ? :
326 (BITS_PER_LONG+PAGE_SHIFT)));
327 node = XENMEMF_get_node(exch.out.mem_flags);
328 if ( node == NUMA_NO_NODE )
329 node = domain_to_node(d);
330 memflags |= MEMF_node(node);
332 for ( i = (exch.nr_exchanged >> in_chunk_order);
333 i < (exch.in.nr_extents >> in_chunk_order);
334 i++ )
335 {
336 if ( hypercall_preempt_check() )
337 {
338 exch.nr_exchanged = i << in_chunk_order;
339 rcu_unlock_domain(d);
340 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
341 return -EFAULT;
342 return hypercall_create_continuation(
343 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
344 }
346 /* Steal a chunk's worth of input pages from the domain. */
347 for ( j = 0; j < (1UL << in_chunk_order); j++ )
348 {
349 if ( unlikely(__copy_from_guest_offset(
350 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
351 {
352 rc = -EFAULT;
353 goto fail;
354 }
356 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
357 {
358 #ifdef CONFIG_X86
359 p2m_type_t p2mt;
361 /* Shared pages cannot be exchanged */
362 mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0));
363 if ( p2m_is_shared(p2mt) )
364 {
365 rc = -ENOMEM;
366 goto fail;
367 }
368 #else /* !CONFIG_X86 */
369 mfn = gmfn_to_mfn(d, gmfn + k);
370 #endif
371 if ( unlikely(!mfn_valid(mfn)) )
372 {
373 rc = -EINVAL;
374 goto fail;
375 }
377 page = mfn_to_page(mfn);
379 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
380 {
381 rc = -EINVAL;
382 goto fail;
383 }
385 page_list_add(page, &in_chunk_list);
386 }
387 }
389 /* Allocate a chunk's worth of anonymous output pages. */
390 for ( j = 0; j < (1UL << out_chunk_order); j++ )
391 {
392 page = alloc_domheap_pages(NULL, exch.out.extent_order, memflags);
393 if ( unlikely(page == NULL) )
394 {
395 rc = -ENOMEM;
396 goto fail;
397 }
399 page_list_add(page, &out_chunk_list);
400 }
402 /*
403 * Success! Beyond this point we cannot fail for this chunk.
404 */
406 /* Destroy final reference to each input page. */
407 while ( (page = page_list_remove_head(&in_chunk_list)) )
408 {
409 unsigned long gfn;
411 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
412 BUG();
413 mfn = page_to_mfn(page);
414 gfn = mfn_to_gmfn(d, mfn);
415 /* Pages were unshared above */
416 BUG_ON(SHARED_M2P(gfn));
417 guest_physmap_remove_page(d, gfn, mfn, 0);
418 put_page(page);
419 }
421 /* Assign each output page to the domain. */
422 j = 0;
423 while ( (page = page_list_remove_head(&out_chunk_list)) )
424 {
425 if ( assign_pages(d, page, exch.out.extent_order,
426 MEMF_no_refcount) )
427 {
428 unsigned long dec_count;
429 bool_t drop_dom_ref;
431 /*
432 * Pages in in_chunk_list is stolen without
433 * decreasing the tot_pages. If the domain is dying when
434 * assign pages, we need decrease the count. For those pages
435 * that has been assigned, it should be covered by
436 * domain_relinquish_resources().
437 */
438 dec_count = (((1UL << exch.in.extent_order) *
439 (1UL << in_chunk_order)) -
440 (j * (1UL << exch.out.extent_order)));
442 spin_lock(&d->page_alloc_lock);
443 d->tot_pages -= dec_count;
444 drop_dom_ref = (dec_count && !d->tot_pages);
445 spin_unlock(&d->page_alloc_lock);
447 if ( drop_dom_ref )
448 put_domain(d);
450 free_domheap_pages(page, exch.out.extent_order);
451 goto dying;
452 }
454 /* Note that we ignore errors accessing the output extent list. */
455 (void)__copy_from_guest_offset(
456 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
458 mfn = page_to_mfn(page);
459 guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
461 if ( !paging_mode_translate(d) )
462 {
463 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
464 set_gpfn_from_mfn(mfn + k, gpfn + k);
465 (void)__copy_to_guest_offset(
466 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
467 }
468 j++;
469 }
470 BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) );
471 }
473 exch.nr_exchanged = exch.in.nr_extents;
474 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
475 rc = -EFAULT;
476 rcu_unlock_domain(d);
477 return rc;
479 /*
480 * Failed a chunk! Free any partial chunk work. Tell caller how many
481 * chunks succeeded.
482 */
483 fail:
484 /* Reassign any input pages we managed to steal. */
485 while ( (page = page_list_remove_head(&in_chunk_list)) )
486 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
487 BUG();
488 dying:
489 rcu_unlock_domain(d);
490 /* Free any output pages we managed to allocate. */
491 while ( (page = page_list_remove_head(&out_chunk_list)) )
492 free_domheap_pages(page, exch.out.extent_order);
494 exch.nr_exchanged = i << in_chunk_order;
496 fail_early:
497 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
498 rc = -EFAULT;
499 return rc;
500 }
502 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
503 {
504 struct domain *d;
505 int rc, op;
506 unsigned int address_bits;
507 unsigned long start_extent;
508 struct xen_memory_reservation reservation;
509 struct memop_args args;
510 domid_t domid;
512 op = cmd & MEMOP_CMD_MASK;
514 switch ( op )
515 {
516 case XENMEM_increase_reservation:
517 case XENMEM_decrease_reservation:
518 case XENMEM_populate_physmap:
519 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
521 if ( copy_from_guest(&reservation, arg, 1) )
522 return start_extent;
524 /* Is size too large for us to encode a continuation? */
525 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
526 return start_extent;
528 if ( unlikely(start_extent > reservation.nr_extents) )
529 return start_extent;
531 args.extent_list = reservation.extent_start;
532 args.nr_extents = reservation.nr_extents;
533 args.extent_order = reservation.extent_order;
534 args.nr_done = start_extent;
535 args.preempted = 0;
536 args.memflags = 0;
538 address_bits = XENMEMF_get_address_bits(reservation.mem_flags);
539 if ( (address_bits != 0) &&
540 (address_bits < (get_order_from_pages(max_page) + PAGE_SHIFT)) )
541 {
542 if ( address_bits <= PAGE_SHIFT )
543 return start_extent;
544 args.memflags = MEMF_bits(address_bits);
545 }
547 args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
549 if ( op == XENMEM_populate_physmap
550 && (reservation.mem_flags & XENMEMF_populate_on_demand) )
551 args.memflags |= MEMF_populate_on_demand;
553 if ( likely(reservation.domid == DOMID_SELF) )
554 {
555 d = rcu_lock_current_domain();
556 }
557 else
558 {
559 if ( (d = rcu_lock_domain_by_id(reservation.domid)) == NULL )
560 return start_extent;
561 if ( !IS_PRIV_FOR(current->domain, d) )
562 {
563 rcu_unlock_domain(d);
564 return start_extent;
565 }
566 }
567 args.domain = d;
569 rc = xsm_memory_adjust_reservation(current->domain, d);
570 if ( rc )
571 {
572 rcu_unlock_domain(d);
573 return rc;
574 }
576 switch ( op )
577 {
578 case XENMEM_increase_reservation:
579 increase_reservation(&args);
580 break;
581 case XENMEM_decrease_reservation:
582 decrease_reservation(&args);
583 break;
584 default: /* XENMEM_populate_physmap */
585 populate_physmap(&args);
586 break;
587 }
589 rcu_unlock_domain(d);
591 rc = args.nr_done;
593 if ( args.preempted )
594 return hypercall_create_continuation(
595 __HYPERVISOR_memory_op, "lh",
596 op | (rc << MEMOP_EXTENT_SHIFT), arg);
598 break;
600 case XENMEM_exchange:
601 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
602 break;
604 case XENMEM_maximum_ram_page:
605 rc = max_page;
606 break;
608 case XENMEM_current_reservation:
609 case XENMEM_maximum_reservation:
610 case XENMEM_maximum_gpfn:
611 if ( copy_from_guest(&domid, arg, 1) )
612 return -EFAULT;
614 rc = rcu_lock_target_domain_by_id(domid, &d);
615 if ( rc )
616 return rc;
618 rc = xsm_memory_stat_reservation(current->domain, d);
619 if ( rc )
620 {
621 rcu_unlock_domain(d);
622 return rc;
623 }
625 switch ( op )
626 {
627 case XENMEM_current_reservation:
628 rc = d->tot_pages;
629 break;
630 case XENMEM_maximum_reservation:
631 rc = d->max_pages;
632 break;
633 default:
634 ASSERT(op == XENMEM_maximum_gpfn);
635 rc = domain_get_maximum_gpfn(d);
636 break;
637 }
639 rcu_unlock_domain(d);
641 break;
643 default:
644 rc = arch_memory_op(op, arg);
645 break;
646 }
648 return rc;
649 }
651 /*
652 * Local variables:
653 * mode: C
654 * c-set-style: "BSD"
655 * c-basic-offset: 4
656 * tab-width: 4
657 * indent-tabs-mode: nil
658 * End:
659 */