debuggers.hg

view xen/arch/x86/mm/p2m.c @ 20652:295e77eed8c9

PoD: appropriate BUG_ON when domain is dying

BUG_ON(d->is_dying) in p2m_pod_cache_add() which is introduced in
c/s 20426 is not proper. Since dom->is_dying is set asynchronously.
For example, MMU_UPDATE hypercalls from qemu and the
DOMCTL_destroydomain hypercall from xend can be issued simultaneously.

Also this patch lets p2m_pod_empty_cache() wait by spin_barrier
until another PoD operation ceases.

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Dec 11 08:42:28 2009 +0000 (2009-12-11)
parents 8f304c003af4
children 45fc26e2d05a
line source
1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
7 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
26 #include <asm/domain.h>
27 #include <asm/page.h>
28 #include <asm/paging.h>
29 #include <asm/p2m.h>
30 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
31 #include <xen/iommu.h>
33 /* Debugging and auditing of the P2M code? */
34 #define P2M_AUDIT 0
35 #define P2M_DEBUGGING 0
37 /* Printouts */
38 #define P2M_PRINTK(_f, _a...) \
39 debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
40 #define P2M_ERROR(_f, _a...) \
41 printk("pg error: %s(): " _f, __func__, ##_a)
42 #if P2M_DEBUGGING
43 #define P2M_DEBUG(_f, _a...) \
44 debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
45 #else
46 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
47 #endif
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef mfn_to_page
52 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53 #undef mfn_valid
54 #define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
55 #undef page_to_mfn
56 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
59 /* PTE flags for the various types of p2m entry */
60 #define P2M_BASE_FLAGS \
61 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
63 #define SUPERPAGE_PAGES (1UL << 9)
64 #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
66 static unsigned long p2m_type_to_flags(p2m_type_t t)
67 {
68 unsigned long flags;
69 #ifdef __x86_64__
70 flags = (unsigned long)(t & 0x3fff) << 9;
71 #else
72 flags = (t & 0x7UL) << 9;
73 #endif
74 #ifndef HAVE_GRANT_MAP_P2M
75 BUG_ON(p2m_is_grant(t));
76 #endif
77 switch(t)
78 {
79 case p2m_invalid:
80 default:
81 return flags;
82 case p2m_ram_rw:
83 case p2m_grant_map_rw:
84 return flags | P2M_BASE_FLAGS | _PAGE_RW;
85 case p2m_ram_logdirty:
86 return flags | P2M_BASE_FLAGS;
87 case p2m_ram_ro:
88 case p2m_grant_map_ro:
89 return flags | P2M_BASE_FLAGS;
90 case p2m_mmio_dm:
91 return flags;
92 case p2m_mmio_direct:
93 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
94 case p2m_populate_on_demand:
95 return flags;
96 }
97 }
99 #if P2M_AUDIT
100 static void audit_p2m(struct domain *d);
101 #else
102 # define audit_p2m(_d) do { (void)(_d); } while(0)
103 #endif /* P2M_AUDIT */
105 // Find the next level's P2M entry, checking for out-of-range gfn's...
106 // Returns NULL on error.
107 //
108 static l1_pgentry_t *
109 p2m_find_entry(void *table, unsigned long *gfn_remainder,
110 unsigned long gfn, u32 shift, u32 max)
111 {
112 u32 index;
114 index = *gfn_remainder >> shift;
115 if ( index >= max )
116 {
117 P2M_DEBUG("gfn=0x%lx out of range "
118 "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
119 gfn, *gfn_remainder, shift, index, max);
120 return NULL;
121 }
122 *gfn_remainder &= (1 << shift) - 1;
123 return (l1_pgentry_t *)table + index;
124 }
126 // Walk one level of the P2M table, allocating a new table if required.
127 // Returns 0 on error.
128 //
129 static int
130 p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
131 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
132 u32 max, unsigned long type)
133 {
134 l1_pgentry_t *l1_entry;
135 l1_pgentry_t *p2m_entry;
136 l1_pgentry_t new_entry;
137 void *next;
138 int i;
139 ASSERT(d->arch.p2m->alloc_page);
141 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
142 shift, max)) )
143 return 0;
145 /* PoD: Not present doesn't imply empty. */
146 if ( !l1e_get_flags(*p2m_entry) )
147 {
148 struct page_info *pg = d->arch.p2m->alloc_page(d);
149 if ( pg == NULL )
150 return 0;
151 page_list_add_tail(pg, &d->arch.p2m->pages);
152 pg->u.inuse.type_info = type | 1 | PGT_validated;
153 pg->count_info |= 1;
155 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
156 __PAGE_HYPERVISOR|_PAGE_USER);
158 switch ( type ) {
159 case PGT_l3_page_table:
160 paging_write_p2m_entry(d, gfn,
161 p2m_entry, *table_mfn, new_entry, 4);
162 break;
163 case PGT_l2_page_table:
164 #if CONFIG_PAGING_LEVELS == 3
165 /* for PAE mode, PDPE only has PCD/PWT/P bits available */
166 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
167 #endif
168 paging_write_p2m_entry(d, gfn,
169 p2m_entry, *table_mfn, new_entry, 3);
170 break;
171 case PGT_l1_page_table:
172 paging_write_p2m_entry(d, gfn,
173 p2m_entry, *table_mfn, new_entry, 2);
174 break;
175 default:
176 BUG();
177 break;
178 }
179 }
181 ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
183 /* split single large page into 4KB page in P2M table */
184 if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
185 {
186 unsigned long flags, pfn;
187 struct page_info *pg = d->arch.p2m->alloc_page(d);
188 if ( pg == NULL )
189 return 0;
190 page_list_add_tail(pg, &d->arch.p2m->pages);
191 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
192 pg->count_info |= 1;
194 /* New splintered mappings inherit the flags of the old superpage,
195 * with a little reorganisation for the _PAGE_PSE_PAT bit. */
196 flags = l1e_get_flags(*p2m_entry);
197 pfn = l1e_get_pfn(*p2m_entry);
198 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
199 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
200 else
201 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
203 l1_entry = __map_domain_page(pg);
204 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
205 {
206 new_entry = l1e_from_pfn(pfn + i, flags);
207 paging_write_p2m_entry(d, gfn,
208 l1_entry+i, *table_mfn, new_entry, 1);
209 }
210 unmap_domain_page(l1_entry);
212 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
213 __PAGE_HYPERVISOR|_PAGE_USER);
214 paging_write_p2m_entry(d, gfn,
215 p2m_entry, *table_mfn, new_entry, 2);
216 }
218 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
219 next = map_domain_page(mfn_x(*table_mfn));
220 unmap_domain_page(*table);
221 *table = next;
223 return 1;
224 }
226 /*
227 * Populate-on-demand functionality
228 */
229 static
230 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
231 unsigned int page_order, p2m_type_t p2mt);
233 static int
234 p2m_pod_cache_add(struct domain *d,
235 struct page_info *page,
236 unsigned long order)
237 {
238 int i;
239 struct page_info *p;
240 struct p2m_domain *p2md = d->arch.p2m;
242 #ifndef NDEBUG
243 mfn_t mfn;
245 mfn = page_to_mfn(page);
247 /* Check to make sure this is a contiguous region */
248 if( mfn_x(mfn) & ((1 << order) - 1) )
249 {
250 printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
251 __func__, mfn_x(mfn), order, ((1UL << order) - 1));
252 return -1;
253 }
255 for(i=0; i < 1 << order ; i++) {
256 struct domain * od;
258 p = mfn_to_page(_mfn(mfn_x(mfn) + i));
259 od = page_get_owner(p);
260 if(od != d)
261 {
262 printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
263 __func__, mfn_x(mfn), d->domain_id,
264 od?od->domain_id:-1);
265 return -1;
266 }
267 }
268 #endif
270 ASSERT(p2m_locked_by_me(p2md));
272 /*
273 * Pages from domain_alloc and returned by the balloon driver aren't
274 * guaranteed to be zero; but by reclaiming zero pages, we implicitly
275 * promise to provide zero pages. So we scrub pages before using.
276 */
277 for ( i = 0; i < (1 << order); i++ )
278 {
279 char *b = map_domain_page(mfn_x(page_to_mfn(page)) + i);
280 clear_page(b);
281 unmap_domain_page(b);
282 }
284 spin_lock(&d->page_alloc_lock);
286 /* First, take all pages off the domain list */
287 for(i=0; i < 1 << order ; i++)
288 {
289 p = page + i;
290 page_list_del(p, &d->page_list);
291 }
293 /* Then add the first one to the appropriate populate-on-demand list */
294 switch(order)
295 {
296 case 9:
297 page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
298 p2md->pod.count += 1 << order;
299 break;
300 case 0:
301 page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
302 p2md->pod.count += 1 ;
303 break;
304 default:
305 BUG();
306 }
308 /* Ensure that the PoD cache has never been emptied.
309 * This may cause "zombie domains" since the page will never be freed. */
310 BUG_ON( d->arch.relmem != RELMEM_not_started );
312 spin_unlock(&d->page_alloc_lock);
314 return 0;
315 }
317 /* Get a page of size order from the populate-on-demand cache. Will break
318 * down 2-meg pages into singleton pages automatically. Returns null if
319 * a superpage is requested and no superpages are available. Must be called
320 * with the d->page_lock held. */
321 static struct page_info * p2m_pod_cache_get(struct domain *d,
322 unsigned long order)
323 {
324 struct p2m_domain *p2md = d->arch.p2m;
325 struct page_info *p = NULL;
326 int i;
328 if ( order == 9 && page_list_empty(&p2md->pod.super) )
329 {
330 return NULL;
331 }
332 else if ( order == 0 && page_list_empty(&p2md->pod.single) )
333 {
334 unsigned long mfn;
335 struct page_info *q;
337 BUG_ON( page_list_empty(&p2md->pod.super) );
339 /* Break up a superpage to make single pages. NB count doesn't
340 * need to be adjusted. */
341 printk("%s: Breaking up superpage.\n", __func__);
342 p = page_list_remove_head(&p2md->pod.super);
343 mfn = mfn_x(page_to_mfn(p));
345 for ( i=0; i<SUPERPAGE_PAGES; i++ )
346 {
347 q = mfn_to_page(_mfn(mfn+i));
348 page_list_add_tail(q, &p2md->pod.single);
349 }
350 }
352 switch ( order )
353 {
354 case 9:
355 BUG_ON( page_list_empty(&p2md->pod.super) );
356 p = page_list_remove_head(&p2md->pod.super);
357 p2md->pod.count -= 1 << order; /* Lock: page_alloc */
358 break;
359 case 0:
360 BUG_ON( page_list_empty(&p2md->pod.single) );
361 p = page_list_remove_head(&p2md->pod.single);
362 p2md->pod.count -= 1;
363 break;
364 default:
365 BUG();
366 }
368 /* Put the pages back on the domain page_list */
369 for ( i = 0 ; i < (1 << order) ; i++ )
370 {
371 BUG_ON(page_get_owner(p + i) != d);
372 page_list_add_tail(p + i, &d->page_list);
373 }
375 return p;
376 }
378 /* Set the size of the cache, allocating or freeing as necessary. */
379 static int
380 p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
381 {
382 struct p2m_domain *p2md = d->arch.p2m;
383 int ret = 0;
385 /* Increasing the target */
386 while ( pod_target > p2md->pod.count )
387 {
388 struct page_info * page;
389 int order;
391 if ( (pod_target - p2md->pod.count) >= SUPERPAGE_PAGES )
392 order = 9;
393 else
394 order = 0;
395 retry:
396 page = alloc_domheap_pages(d, order, 0);
397 if ( unlikely(page == NULL) )
398 {
399 if ( order == 9 )
400 {
401 /* If we can't allocate a superpage, try singleton pages */
402 order = 0;
403 goto retry;
404 }
406 printk("%s: Unable to allocate domheap page for pod cache. target %lu cachesize %d\n",
407 __func__, pod_target, p2md->pod.count);
408 ret = -ENOMEM;
409 goto out;
410 }
412 p2m_pod_cache_add(d, page, order);
413 }
415 /* Decreasing the target */
416 /* We hold the p2m lock here, so we don't need to worry about
417 * cache disappearing under our feet. */
418 while ( pod_target < p2md->pod.count )
419 {
420 struct page_info * page;
421 int order, i;
423 /* Grab the lock before checking that pod.super is empty, or the last
424 * entries may disappear before we grab the lock. */
425 spin_lock(&d->page_alloc_lock);
427 if ( (p2md->pod.count - pod_target) > SUPERPAGE_PAGES
428 && !page_list_empty(&p2md->pod.super) )
429 order = 9;
430 else
431 order = 0;
433 page = p2m_pod_cache_get(d, order);
435 ASSERT(page != NULL);
437 spin_unlock(&d->page_alloc_lock);
439 /* Then free them */
440 for ( i = 0 ; i < (1 << order) ; i++ )
441 {
442 /* Copied from common/memory.c:guest_remove_page() */
443 if ( unlikely(!get_page(page+i, d)) )
444 {
445 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
446 ret = -EINVAL;
447 goto out;
448 }
450 if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
451 put_page_and_type(page+i);
453 if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
454 put_page(page+i);
456 put_page(page+i);
457 }
458 }
460 out:
461 return ret;
462 }
464 /*
465 * The "right behavior" here requires some careful thought. First, some
466 * definitions:
467 * + M: static_max
468 * + B: number of pages the balloon driver has ballooned down to.
469 * + P: Number of populated pages.
470 * + T: Old target
471 * + T': New target
472 *
473 * The following equations should hold:
474 * 0 <= P <= T <= B <= M
475 * d->arch.p2m->pod.entry_count == B - P
476 * d->tot_pages == P + d->arch.p2m->pod.count
477 *
478 * Now we have the following potential cases to cover:
479 * B <T': Set the PoD cache size equal to the number of outstanding PoD
480 * entries. The balloon driver will deflate the balloon to give back
481 * the remainder of the ram to the guest OS.
482 * T <T'<B : Increase PoD cache size.
483 * T'<T<=B : Here we have a choice. We can decrease the size of the cache,
484 * get the memory right away. However, that means every time we
485 * reduce the memory target we risk the guest attempting to populate the
486 * memory before the balloon driver has reached its new target. Safer to
487 * never reduce the cache size here, but only when the balloon driver frees
488 * PoD ranges.
489 *
490 * If there are many zero pages, we could reach the target also by doing
491 * zero sweeps and marking the ranges PoD; but the balloon driver will have
492 * to free this memory eventually anyway, so we don't actually gain that much
493 * by doing so.
494 *
495 * NB that the equation (B<T') may require adjustment to the cache
496 * size as PoD pages are freed as well; i.e., freeing a PoD-backed
497 * entry when pod.entry_count == pod.count requires us to reduce both
498 * pod.entry_count and pod.count.
499 */
500 int
501 p2m_pod_set_mem_target(struct domain *d, unsigned long target)
502 {
503 unsigned pod_target;
504 struct p2m_domain *p2md = d->arch.p2m;
505 int ret = 0;
506 unsigned long populated;
508 p2m_lock(p2md);
510 /* P == B: Nothing to do. */
511 if ( p2md->pod.entry_count == 0 )
512 goto out;
514 /* Don't do anything if the domain is being torn down */
515 if ( d->is_dying )
516 goto out;
518 /* T' < B: Don't reduce the cache size; let the balloon driver
519 * take care of it. */
520 if ( target < d->tot_pages )
521 goto out;
523 populated = d->tot_pages - p2md->pod.count;
525 pod_target = target - populated;
527 /* B < T': Set the cache size equal to # of outstanding entries,
528 * let the balloon driver fill in the rest. */
529 if ( pod_target > p2md->pod.entry_count )
530 pod_target = p2md->pod.entry_count;
532 ASSERT( pod_target > p2md->pod.count );
534 ret = p2m_pod_set_cache_target(d, pod_target);
536 out:
537 p2m_unlock(p2md);
539 return ret;
540 }
542 void
543 p2m_pod_empty_cache(struct domain *d)
544 {
545 struct p2m_domain *p2md = d->arch.p2m;
546 struct page_info *page;
548 /* After this barrier no new PoD activities can happen. */
549 BUG_ON(!d->is_dying);
550 spin_barrier(&p2md->lock);
552 spin_lock(&d->page_alloc_lock);
554 while ( (page = page_list_remove_head(&p2md->pod.super)) )
555 {
556 int i;
558 for ( i = 0 ; i < SUPERPAGE_PAGES ; i++ )
559 {
560 BUG_ON(page_get_owner(page + i) != d);
561 page_list_add_tail(page + i, &d->page_list);
562 }
564 p2md->pod.count -= SUPERPAGE_PAGES;
565 }
567 while ( (page = page_list_remove_head(&p2md->pod.single)) )
568 {
569 BUG_ON(page_get_owner(page) != d);
570 page_list_add_tail(page, &d->page_list);
572 p2md->pod.count -= 1;
573 }
575 BUG_ON(p2md->pod.count != 0);
577 spin_unlock(&d->page_alloc_lock);
578 }
580 /* This function is needed for two reasons:
581 * + To properly handle clearing of PoD entries
582 * + To "steal back" memory being freed for the PoD cache, rather than
583 * releasing it.
584 *
585 * Once both of these functions have been completed, we can return and
586 * allow decrease_reservation() to handle everything else.
587 */
588 int
589 p2m_pod_decrease_reservation(struct domain *d,
590 xen_pfn_t gpfn,
591 unsigned int order)
592 {
593 struct p2m_domain *p2md = d->arch.p2m;
594 int ret=0;
595 int i;
597 int steal_for_cache = 0;
598 int pod = 0, nonpod = 0, ram = 0;
601 /* If we don't have any outstanding PoD entries, let things take their
602 * course */
603 if ( p2md->pod.entry_count == 0 )
604 goto out;
606 /* Figure out if we need to steal some freed memory for our cache */
607 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
609 p2m_lock(p2md);
610 audit_p2m(d);
612 if ( unlikely(d->is_dying) )
613 goto out_unlock;
615 /* See what's in here. */
616 /* FIXME: Add contiguous; query for PSE entries? */
617 for ( i=0; i<(1<<order); i++)
618 {
619 p2m_type_t t;
621 gfn_to_mfn_query(d, gpfn + i, &t);
623 if ( t == p2m_populate_on_demand )
624 pod++;
625 else
626 {
627 nonpod++;
628 if ( p2m_is_ram(t) )
629 ram++;
630 }
631 }
633 /* No populate-on-demand? Don't need to steal anything? Then we're done!*/
634 if(!pod && !steal_for_cache)
635 goto out_unlock;
637 if ( !nonpod )
638 {
639 /* All PoD: Mark the whole region invalid and tell caller
640 * we're done. */
641 set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid);
642 p2md->pod.entry_count-=(1<<order); /* Lock: p2m */
643 BUG_ON(p2md->pod.entry_count < 0);
644 ret = 1;
645 goto out_entry_check;
646 }
648 /* FIXME: Steal contig 2-meg regions for cache */
650 /* Process as long as:
651 * + There are PoD entries to handle, or
652 * + There is ram left, and we want to steal it
653 */
654 for ( i=0;
655 i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
656 i++)
657 {
658 mfn_t mfn;
659 p2m_type_t t;
661 mfn = gfn_to_mfn_query(d, gpfn + i, &t);
662 if ( t == p2m_populate_on_demand )
663 {
664 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
665 p2md->pod.entry_count--; /* Lock: p2m */
666 BUG_ON(p2md->pod.entry_count < 0);
667 pod--;
668 }
669 else if ( steal_for_cache && p2m_is_ram(t) )
670 {
671 struct page_info *page;
673 ASSERT(mfn_valid(mfn));
675 page = mfn_to_page(mfn);
677 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
678 set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
680 p2m_pod_cache_add(d, page, 0);
682 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
684 nonpod--;
685 ram--;
686 }
687 }
689 /* If there are no more non-PoD entries, tell decrease_reservation() that
690 * there's nothing left to do. */
691 if ( nonpod == 0 )
692 ret = 1;
694 out_entry_check:
695 /* If we've reduced our "liabilities" beyond our "assets", free some */
696 if ( p2md->pod.entry_count < p2md->pod.count )
697 {
698 printk("b %d\n", p2md->pod.entry_count);
699 p2m_pod_set_cache_target(d, p2md->pod.entry_count);
700 }
702 out_unlock:
703 audit_p2m(d);
704 p2m_unlock(p2md);
706 out:
707 return ret;
708 }
710 void
711 p2m_pod_dump_data(struct domain *d)
712 {
713 struct p2m_domain *p2md = d->arch.p2m;
715 printk(" PoD entries=%d cachesize=%d\n",
716 p2md->pod.entry_count, p2md->pod.count);
717 }
720 /* Search for all-zero superpages to be reclaimed as superpages for the
721 * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
722 static int
723 p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
724 {
725 mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
726 p2m_type_t type, type0 = 0;
727 unsigned long * map = NULL;
728 int ret=0, reset = 0;
729 int i, j;
730 int max_ref = 1;
732 if ( !superpage_aligned(gfn) )
733 goto out;
735 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
736 if ( paging_mode_shadow(d) )
737 max_ref++;
739 /* Look up the mfns, checking to make sure they're the same mfn
740 * and aligned, and mapping them. */
741 for ( i=0; i<SUPERPAGE_PAGES; i++ )
742 {
744 mfn = gfn_to_mfn_query(d, gfn + i, &type);
746 if ( i == 0 )
747 {
748 mfn0 = mfn;
749 type0 = type;
750 }
752 /* Conditions that must be met for superpage-superpage:
753 * + All gfns are ram types
754 * + All gfns have the same type
755 * + All of the mfns are allocated to a domain
756 * + None of the mfns are used as pagetables, or allocated via xenheap
757 * + The first mfn is 2-meg aligned
758 * + All the other mfns are in sequence
759 * Adding for good measure:
760 * + None of the mfns are likely to be mapped elsewhere (refcount
761 * 2 or less for shadow, 1 for hap)
762 */
763 if ( !p2m_is_ram(type)
764 || type != type0
765 || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
766 || ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
767 || ( (mfn_to_page(mfn)->count_info & PGC_xen_heap ) != 0 )
768 || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
769 || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
770 || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
771 goto out;
772 }
774 /* Now, do a quick check to see if it may be zero before unmapping. */
775 for ( i=0; i<SUPERPAGE_PAGES; i++ )
776 {
777 /* Quick zero-check */
778 map = map_domain_page(mfn_x(mfn0) + i);
780 for ( j=0; j<16; j++ )
781 if( *(map+j) != 0 )
782 break;
784 unmap_domain_page(map);
786 if ( j < 16 )
787 goto out;
789 }
791 /* Try to remove the page, restoring old mapping if it fails. */
792 set_p2m_entry(d, gfn,
793 _mfn(POPULATE_ON_DEMAND_MFN), 9,
794 p2m_populate_on_demand);
796 /* Make none of the MFNs are used elsewhere... for example, mapped
797 * via the grant table interface, or by qemu. Allow one refcount for
798 * being allocated to the domain. */
799 for ( i=0; i < SUPERPAGE_PAGES; i++ )
800 {
801 mfn = _mfn(mfn_x(mfn0) + i);
802 if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
803 {
804 reset = 1;
805 goto out_reset;
806 }
807 }
809 /* Finally, do a full zero-check */
810 for ( i=0; i < SUPERPAGE_PAGES; i++ )
811 {
812 map = map_domain_page(mfn_x(mfn0) + i);
814 for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
815 if( *(map+j) != 0 )
816 {
817 reset = 1;
818 break;
819 }
821 unmap_domain_page(map);
823 if ( reset )
824 goto out_reset;
825 }
827 /* Finally! We've passed all the checks, and can add the mfn superpage
828 * back on the PoD cache, and account for the new p2m PoD entries */
829 p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
830 d->arch.p2m->pod.entry_count += SUPERPAGE_PAGES;
832 out_reset:
833 if ( reset )
834 set_p2m_entry(d, gfn, mfn0, 9, type0);
836 out:
837 return ret;
838 }
840 static void
841 p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
842 {
843 mfn_t mfns[count];
844 p2m_type_t types[count];
845 unsigned long * map[count];
847 int i, j;
848 int max_ref = 1;
850 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
851 if ( paging_mode_shadow(d) )
852 max_ref++;
854 /* First, get the gfn list, translate to mfns, and map the pages. */
855 for ( i=0; i<count; i++ )
856 {
857 mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
858 /* If this is ram, and not a pagetable or from the xen heap, and probably not mapped
859 elsewhere, map it; otherwise, skip. */
860 if ( p2m_is_ram(types[i])
861 && ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
862 && ( (mfn_to_page(mfns[i])->count_info & (PGC_page_table|PGC_xen_heap)) == 0 )
863 && ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
864 map[i] = map_domain_page(mfn_x(mfns[i]));
865 else
866 map[i] = NULL;
867 }
869 /* Then, go through and check for zeroed pages, removing write permission
870 * for those with zeroes. */
871 for ( i=0; i<count; i++ )
872 {
873 if(!map[i])
874 continue;
876 /* Quick zero-check */
877 for ( j=0; j<16; j++ )
878 if( *(map[i]+j) != 0 )
879 break;
881 if ( j < 16 )
882 {
883 unmap_domain_page(map[i]);
884 map[i] = NULL;
885 continue;
886 }
888 /* Try to remove the page, restoring old mapping if it fails. */
889 set_p2m_entry(d, gfns[i],
890 _mfn(POPULATE_ON_DEMAND_MFN), 0,
891 p2m_populate_on_demand);
893 /* See if the page was successfully unmapped. (Allow one refcount
894 * for being allocated to a domain.) */
895 if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
896 {
897 unmap_domain_page(map[i]);
898 map[i] = NULL;
900 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
902 continue;
903 }
904 }
906 /* Now check each page for real */
907 for ( i=0; i < count; i++ )
908 {
909 if(!map[i])
910 continue;
912 for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
913 if( *(map[i]+j) != 0 )
914 break;
916 unmap_domain_page(map[i]);
918 /* See comment in p2m_pod_zero_check_superpage() re gnttab
919 * check timing. */
920 if ( j < PAGE_SIZE/sizeof(*map[i]) )
921 {
922 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
923 }
924 else
925 {
926 /* Add to cache, and account for the new p2m PoD entry */
927 p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0);
928 d->arch.p2m->pod.entry_count++;
929 }
930 }
932 }
934 #define POD_SWEEP_LIMIT 1024
935 static void
936 p2m_pod_emergency_sweep_super(struct domain *d)
937 {
938 struct p2m_domain *p2md = d->arch.p2m;
939 unsigned long i, start, limit;
941 if ( p2md->pod.reclaim_super == 0 )
942 {
943 p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9;
944 p2md->pod.reclaim_super -= SUPERPAGE_PAGES;
945 }
947 start = p2md->pod.reclaim_super;
948 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
950 for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=SUPERPAGE_PAGES )
951 {
952 p2m_pod_zero_check_superpage(d, i);
953 /* Stop if we're past our limit and we have found *something*.
954 *
955 * NB that this is a zero-sum game; we're increasing our cache size
956 * by increasing our 'debt'. Since we hold the p2m lock,
957 * (entry_count - count) must remain the same. */
958 if ( !page_list_empty(&p2md->pod.super) && i < limit )
959 break;
960 }
962 p2md->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0;
964 }
966 #define POD_SWEEP_STRIDE 16
967 static void
968 p2m_pod_emergency_sweep(struct domain *d)
969 {
970 struct p2m_domain *p2md = d->arch.p2m;
971 unsigned long gfns[POD_SWEEP_STRIDE];
972 unsigned long i, j=0, start, limit;
973 p2m_type_t t;
976 if ( p2md->pod.reclaim_single == 0 )
977 p2md->pod.reclaim_single = p2md->pod.max_guest;
979 start = p2md->pod.reclaim_single;
980 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
982 /* FIXME: Figure out how to avoid superpages */
983 for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- )
984 {
985 gfn_to_mfn_query(d, i, &t );
986 if ( p2m_is_ram(t) )
987 {
988 gfns[j] = i;
989 j++;
990 BUG_ON(j > POD_SWEEP_STRIDE);
991 if ( j == POD_SWEEP_STRIDE )
992 {
993 p2m_pod_zero_check(d, gfns, j);
994 j = 0;
995 }
996 }
997 /* Stop if we're past our limit and we have found *something*.
998 *
999 * NB that this is a zero-sum game; we're increasing our cache size
1000 * by re-increasing our 'debt'. Since we hold the p2m lock,
1001 * (entry_count - count) must remain the same. */
1002 if ( p2md->pod.count > 0 && i < limit )
1003 break;
1006 if ( j )
1007 p2m_pod_zero_check(d, gfns, j);
1009 p2md->pod.reclaim_single = i ? i - 1 : i;
1013 int
1014 p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
1015 unsigned int order,
1016 p2m_query_t q)
1018 struct page_info *p = NULL; /* Compiler warnings */
1019 unsigned long gfn_aligned;
1020 mfn_t mfn;
1021 struct p2m_domain *p2md = d->arch.p2m;
1022 int i;
1024 ASSERT(p2m_locked_by_me(d->arch.p2m));
1026 /* This check is done with the p2m lock held. This will make sure that
1027 * even if d->is_dying changes under our feet, p2m_pod_empty_cache()
1028 * won't start until we're done. */
1029 if ( unlikely(d->is_dying) )
1030 goto out_fail;
1032 /* If we're low, start a sweep */
1033 if ( order == 9 && page_list_empty(&p2md->pod.super) )
1034 p2m_pod_emergency_sweep_super(d);
1036 if ( page_list_empty(&p2md->pod.single) &&
1037 ( ( order == 0 )
1038 || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
1039 p2m_pod_emergency_sweep(d);
1041 /* Keep track of the highest gfn demand-populated by a guest fault */
1042 if ( q == p2m_guest && gfn > p2md->pod.max_guest )
1043 p2md->pod.max_guest = gfn;
1045 spin_lock(&d->page_alloc_lock);
1047 if ( p2md->pod.count == 0 )
1048 goto out_of_memory;
1050 /* Get a page f/ the cache. A NULL return value indicates that the
1051 * 2-meg range should be marked singleton PoD, and retried */
1052 if ( (p = p2m_pod_cache_get(d, order)) == NULL )
1053 goto remap_and_retry;
1055 mfn = page_to_mfn(p);
1057 BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
1059 spin_unlock(&d->page_alloc_lock);
1061 gfn_aligned = (gfn >> order) << order;
1063 set_p2m_entry(d, gfn_aligned, mfn, order, p2m_ram_rw);
1065 for( i = 0 ; i < (1UL << order) ; i++ )
1066 set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
1068 p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
1069 BUG_ON(p2md->pod.entry_count < 0);
1071 return 0;
1072 out_of_memory:
1073 spin_unlock(&d->page_alloc_lock);
1075 printk("%s: Out of populate-on-demand memory! tot_pages %" PRIu32 " pod_entries %" PRIi32 "\n",
1076 __func__, d->tot_pages, p2md->pod.entry_count);
1077 domain_crash(d);
1078 out_fail:
1079 return -1;
1080 remap_and_retry:
1081 BUG_ON(order != 9);
1082 spin_unlock(&d->page_alloc_lock);
1084 /* Remap this 2-meg region in singleton chunks */
1085 gfn_aligned = (gfn>>order)<<order;
1086 for(i=0; i<(1<<order); i++)
1087 set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
1088 p2m_populate_on_demand);
1090 return 0;
1093 /* Non-ept "lock-and-check" wrapper */
1094 static int p2m_pod_check_and_populate(struct domain *d, unsigned long gfn,
1095 l1_pgentry_t *p2m_entry, int order,
1096 p2m_query_t q)
1098 /* Only take the lock if we don't already have it. Otherwise it
1099 * wouldn't be safe to do p2m lookups with the p2m lock held */
1100 int do_locking = !p2m_locked_by_me(d->arch.p2m);
1101 int r;
1103 if ( do_locking )
1104 p2m_lock(d->arch.p2m);
1106 audit_p2m(d);
1108 /* Check to make sure this is still PoD */
1109 if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
1111 if ( do_locking )
1112 p2m_unlock(d->arch.p2m);
1113 return 0;
1116 r = p2m_pod_demand_populate(d, gfn, order, q);
1118 audit_p2m(d);
1119 if ( do_locking )
1120 p2m_unlock(d->arch.p2m);
1122 return r;
1125 // Returns 0 on error (out of memory)
1126 static int
1127 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1128 unsigned int page_order, p2m_type_t p2mt)
1130 // XXX -- this might be able to be faster iff current->domain == d
1131 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
1132 void *table =map_domain_page(mfn_x(table_mfn));
1133 unsigned long i, gfn_remainder = gfn;
1134 l1_pgentry_t *p2m_entry;
1135 l1_pgentry_t entry_content;
1136 l2_pgentry_t l2e_content;
1137 int rv=0;
1139 #if CONFIG_PAGING_LEVELS >= 4
1140 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1141 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
1142 L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
1143 goto out;
1144 #endif
1145 /*
1146 * When using PAE Xen, we only allow 33 bits of pseudo-physical
1147 * address in translated guests (i.e. 8 GBytes). This restriction
1148 * comes from wanting to map the P2M table into the 16MB RO_MPT hole
1149 * in Xen's address space for translated PV guests.
1150 * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
1151 */
1152 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1153 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
1154 ((CONFIG_PAGING_LEVELS == 3)
1155 ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
1156 : L3_PAGETABLE_ENTRIES),
1157 PGT_l2_page_table) )
1158 goto out;
1160 if ( page_order == 0 )
1162 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1163 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1164 L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
1165 goto out;
1167 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1168 0, L1_PAGETABLE_ENTRIES);
1169 ASSERT(p2m_entry);
1171 if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
1172 entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
1173 else
1174 entry_content = l1e_empty();
1176 /* level 1 entry */
1177 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
1179 else
1181 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1182 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1183 L2_PAGETABLE_ENTRIES);
1184 ASSERT(p2m_entry);
1186 /* FIXME: Deal with 4k replaced by 2meg pages */
1187 if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
1188 !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
1190 P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
1191 domain_crash(d);
1192 goto out;
1195 if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
1196 l2e_content = l2e_from_pfn(mfn_x(mfn),
1197 p2m_type_to_flags(p2mt) | _PAGE_PSE);
1198 else
1199 l2e_content = l2e_empty();
1201 entry_content.l1 = l2e_content.l2;
1202 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
1205 /* Track the highest gfn for which we have ever had a valid mapping */
1206 if ( mfn_valid(mfn)
1207 && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
1208 d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
1210 if ( iommu_enabled && need_iommu(d) )
1212 if ( p2mt == p2m_ram_rw )
1213 for ( i = 0; i < (1UL << page_order); i++ )
1214 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
1215 else
1216 for ( int i = 0; i < (1UL << page_order); i++ )
1217 iommu_unmap_page(d, gfn+i);
1220 /* Success */
1221 rv = 1;
1223 out:
1224 unmap_domain_page(table);
1225 return rv;
1228 static mfn_t
1229 p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t,
1230 p2m_query_t q)
1232 mfn_t mfn;
1233 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1234 l2_pgentry_t *l2e;
1235 l1_pgentry_t *l1e;
1237 ASSERT(paging_mode_translate(d));
1239 /* XXX This is for compatibility with the old model, where anything not
1240 * XXX marked as RAM was considered to be emulated MMIO space.
1241 * XXX Once we start explicitly registering MMIO regions in the p2m
1242 * XXX we will return p2m_invalid for unmapped gfns */
1243 *t = p2m_mmio_dm;
1245 mfn = pagetable_get_mfn(d->arch.phys_table);
1247 if ( gfn > d->arch.p2m->max_mapped_pfn )
1248 /* This pfn is higher than the highest the p2m map currently holds */
1249 return _mfn(INVALID_MFN);
1251 #if CONFIG_PAGING_LEVELS >= 4
1253 l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
1254 l4e += l4_table_offset(addr);
1255 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
1257 unmap_domain_page(l4e);
1258 return _mfn(INVALID_MFN);
1260 mfn = _mfn(l4e_get_pfn(*l4e));
1261 unmap_domain_page(l4e);
1263 #endif
1265 l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
1266 #if CONFIG_PAGING_LEVELS == 3
1267 /* On PAE hosts the p2m has eight l3 entries, not four (see
1268 * shadow_set_p2m_entry()) so we can't use l3_table_offset.
1269 * Instead, just count the number of l3es from zero. It's safe
1270 * to do this because we already checked that the gfn is within
1271 * the bounds of the p2m. */
1272 l3e += (addr >> L3_PAGETABLE_SHIFT);
1273 #else
1274 l3e += l3_table_offset(addr);
1275 #endif
1276 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
1278 unmap_domain_page(l3e);
1279 return _mfn(INVALID_MFN);
1281 mfn = _mfn(l3e_get_pfn(*l3e));
1282 unmap_domain_page(l3e);
1285 l2e = map_domain_page(mfn_x(mfn));
1286 l2e += l2_table_offset(addr);
1288 pod_retry_l2:
1289 if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
1291 /* PoD: Try to populate a 2-meg chunk */
1292 if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
1294 if ( q != p2m_query ) {
1295 if ( !p2m_pod_check_and_populate(d, gfn,
1296 (l1_pgentry_t *)l2e, 9, q) )
1297 goto pod_retry_l2;
1298 } else
1299 *t = p2m_populate_on_demand;
1302 unmap_domain_page(l2e);
1303 return _mfn(INVALID_MFN);
1305 else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
1307 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
1308 *t = p2m_flags_to_type(l2e_get_flags(*l2e));
1309 unmap_domain_page(l2e);
1311 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1312 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1315 mfn = _mfn(l2e_get_pfn(*l2e));
1316 unmap_domain_page(l2e);
1318 l1e = map_domain_page(mfn_x(mfn));
1319 l1e += l1_table_offset(addr);
1320 pod_retry_l1:
1321 if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
1323 /* PoD: Try to populate */
1324 if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
1326 if ( q != p2m_query ) {
1327 if ( !p2m_pod_check_and_populate(d, gfn,
1328 (l1_pgentry_t *)l1e, 0, q) )
1329 goto pod_retry_l1;
1330 } else
1331 *t = p2m_populate_on_demand;
1334 unmap_domain_page(l1e);
1335 return _mfn(INVALID_MFN);
1337 mfn = _mfn(l1e_get_pfn(*l1e));
1338 *t = p2m_flags_to_type(l1e_get_flags(*l1e));
1339 unmap_domain_page(l1e);
1341 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1342 return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN);
1345 /* Read the current domain's p2m table (through the linear mapping). */
1346 static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t,
1347 p2m_query_t q)
1349 mfn_t mfn = _mfn(INVALID_MFN);
1350 p2m_type_t p2mt = p2m_mmio_dm;
1351 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1352 /* XXX This is for compatibility with the old model, where anything not
1353 * XXX marked as RAM was considered to be emulated MMIO space.
1354 * XXX Once we start explicitly registering MMIO regions in the p2m
1355 * XXX we will return p2m_invalid for unmapped gfns */
1357 if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
1359 l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
1360 l2_pgentry_t l2e = l2e_empty();
1361 int ret;
1363 ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
1364 / sizeof(l1_pgentry_t));
1366 /*
1367 * Read & process L2
1368 */
1369 p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
1370 + l2_linear_offset(addr)];
1372 pod_retry_l2:
1373 ret = __copy_from_user(&l2e,
1374 p2m_entry,
1375 sizeof(l2e));
1376 if ( ret != 0
1377 || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1379 if( (l2e_get_flags(l2e) & _PAGE_PSE)
1380 && ( p2m_flags_to_type(l2e_get_flags(l2e))
1381 == p2m_populate_on_demand ) )
1383 /* The read has succeeded, so we know that the mapping
1384 * exits at this point. */
1385 if ( q != p2m_query )
1387 if ( !p2m_pod_check_and_populate(current->domain, gfn,
1388 p2m_entry, 9, q) )
1389 goto pod_retry_l2;
1391 /* Allocate failed. */
1392 p2mt = p2m_invalid;
1393 printk("%s: Allocate failed!\n", __func__);
1394 goto out;
1396 else
1398 p2mt = p2m_populate_on_demand;
1399 goto out;
1403 goto pod_retry_l1;
1406 if (l2e_get_flags(l2e) & _PAGE_PSE)
1408 p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
1409 ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
1411 if ( p2m_is_valid(p2mt) )
1412 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
1413 else
1414 p2mt = p2m_mmio_dm;
1416 goto out;
1419 /*
1420 * Read and process L1
1421 */
1423 /* Need to __copy_from_user because the p2m is sparse and this
1424 * part might not exist */
1425 pod_retry_l1:
1426 p2m_entry = &phys_to_machine_mapping[gfn];
1428 ret = __copy_from_user(&l1e,
1429 p2m_entry,
1430 sizeof(l1e));
1432 if ( ret == 0 ) {
1433 p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
1434 ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
1436 if ( p2m_flags_to_type(l1e_get_flags(l1e))
1437 == p2m_populate_on_demand )
1439 /* The read has succeeded, so we know that the mapping
1440 * exits at this point. */
1441 if ( q != p2m_query )
1443 if ( !p2m_pod_check_and_populate(current->domain, gfn,
1444 (l1_pgentry_t *)p2m_entry, 0, q) )
1445 goto pod_retry_l1;
1447 /* Allocate failed. */
1448 p2mt = p2m_invalid;
1449 goto out;
1451 else
1453 p2mt = p2m_populate_on_demand;
1454 goto out;
1458 if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
1459 mfn = _mfn(l1e_get_pfn(l1e));
1460 else
1461 /* XXX see above */
1462 p2mt = p2m_mmio_dm;
1465 out:
1466 *t = p2mt;
1467 return mfn;
1470 /* Init the datastructures for later use by the p2m code */
1471 int p2m_init(struct domain *d)
1473 struct p2m_domain *p2m;
1475 p2m = xmalloc(struct p2m_domain);
1476 if ( p2m == NULL )
1477 return -ENOMEM;
1479 d->arch.p2m = p2m;
1481 memset(p2m, 0, sizeof(*p2m));
1482 p2m_lock_init(p2m);
1483 INIT_PAGE_LIST_HEAD(&p2m->pages);
1484 INIT_PAGE_LIST_HEAD(&p2m->pod.super);
1485 INIT_PAGE_LIST_HEAD(&p2m->pod.single);
1487 p2m->set_entry = p2m_set_entry;
1488 p2m->get_entry = p2m_gfn_to_mfn;
1489 p2m->get_entry_current = p2m_gfn_to_mfn_current;
1490 p2m->change_entry_type_global = p2m_change_type_global;
1492 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
1493 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
1494 ept_p2m_init(d);
1496 return 0;
1499 void p2m_change_entry_type_global(struct domain *d,
1500 p2m_type_t ot, p2m_type_t nt)
1502 struct p2m_domain *p2m = d->arch.p2m;
1504 p2m_lock(p2m);
1505 p2m->change_entry_type_global(d, ot, nt);
1506 p2m_unlock(p2m);
1509 static
1510 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1511 unsigned int page_order, p2m_type_t p2mt)
1513 unsigned long todo = 1ul << page_order;
1514 unsigned int order;
1515 int rc = 1;
1517 while ( todo )
1519 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled )
1520 order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ?
1521 9 : 0;
1522 else
1523 order = 0;
1524 if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) )
1525 rc = 0;
1526 gfn += 1ul << order;
1527 if ( mfn_x(mfn) != INVALID_MFN )
1528 mfn = _mfn(mfn_x(mfn) + (1ul << order));
1529 todo -= 1ul << order;
1532 return rc;
1535 // Allocate a new p2m table for a domain.
1536 //
1537 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
1538 // controlled by CONFIG_PAGING_LEVELS).
1539 //
1540 // The alloc_page and free_page functions will be used to get memory to
1541 // build the p2m, and to release it again at the end of day.
1542 //
1543 // Returns 0 for success or -errno.
1544 //
1545 int p2m_alloc_table(struct domain *d,
1546 struct page_info * (*alloc_page)(struct domain *d),
1547 void (*free_page)(struct domain *d, struct page_info *pg))
1550 mfn_t mfn = _mfn(INVALID_MFN);
1551 struct page_info *page, *p2m_top;
1552 unsigned int page_count = 0;
1553 unsigned long gfn = -1UL;
1554 struct p2m_domain *p2m = d->arch.p2m;
1556 p2m_lock(p2m);
1558 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1560 P2M_ERROR("p2m already allocated for this domain\n");
1561 p2m_unlock(p2m);
1562 return -EINVAL;
1565 P2M_PRINTK("allocating p2m table\n");
1567 p2m->alloc_page = alloc_page;
1568 p2m->free_page = free_page;
1570 p2m_top = p2m->alloc_page(d);
1571 if ( p2m_top == NULL )
1573 p2m_unlock(p2m);
1574 return -ENOMEM;
1576 page_list_add_tail(p2m_top, &p2m->pages);
1578 p2m_top->count_info = 1;
1579 p2m_top->u.inuse.type_info =
1580 #if CONFIG_PAGING_LEVELS == 4
1581 PGT_l4_page_table
1582 #else
1583 PGT_l3_page_table
1584 #endif
1585 | 1 | PGT_validated;
1587 d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
1589 P2M_PRINTK("populating p2m table\n");
1591 /* Initialise physmap tables for slot zero. Other code assumes this. */
1592 if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
1593 p2m_invalid) )
1594 goto error;
1596 /* Copy all existing mappings from the page list and m2p */
1597 page_list_for_each(page, &d->page_list)
1599 mfn = page_to_mfn(page);
1600 gfn = get_gpfn_from_mfn(mfn_x(mfn));
1601 page_count++;
1602 if (
1603 #ifdef __x86_64__
1604 (gfn != 0x5555555555555555L)
1605 #else
1606 (gfn != 0x55555555L)
1607 #endif
1608 && gfn != INVALID_M2P_ENTRY
1609 && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
1610 goto error;
1613 P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
1614 p2m_unlock(p2m);
1615 return 0;
1617 error:
1618 P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
1619 PRI_mfn "\n", gfn, mfn_x(mfn));
1620 p2m_unlock(p2m);
1621 return -ENOMEM;
1624 void p2m_teardown(struct domain *d)
1625 /* Return all the p2m pages to Xen.
1626 * We know we don't have any extra mappings to these pages */
1628 struct page_info *pg;
1629 struct p2m_domain *p2m = d->arch.p2m;
1631 p2m_lock(p2m);
1632 d->arch.phys_table = pagetable_null();
1634 while ( (pg = page_list_remove_head(&p2m->pages)) )
1635 p2m->free_page(d, pg);
1636 p2m_unlock(p2m);
1639 void p2m_final_teardown(struct domain *d)
1641 xfree(d->arch.p2m);
1642 d->arch.p2m = NULL;
1645 #if P2M_AUDIT
1646 static void audit_p2m(struct domain *d)
1648 struct page_info *page;
1649 struct domain *od;
1650 unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
1651 int entry_count = 0;
1652 mfn_t p2mfn;
1653 unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
1654 int test_linear;
1655 p2m_type_t type;
1657 if ( !paging_mode_translate(d) )
1658 return;
1660 //P2M_PRINTK("p2m audit starts\n");
1662 test_linear = ( (d == current->domain)
1663 && !pagetable_is_null(current->arch.monitor_table) );
1664 if ( test_linear )
1665 flush_tlb_local();
1667 spin_lock(&d->page_alloc_lock);
1669 /* Audit part one: walk the domain's page allocation list, checking
1670 * the m2p entries. */
1671 page_list_for_each ( page, &d->page_list )
1673 mfn = mfn_x(page_to_mfn(page));
1675 // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
1677 od = page_get_owner(page);
1679 if ( od != d )
1681 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
1682 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
1683 continue;
1686 gfn = get_gpfn_from_mfn(mfn);
1687 if ( gfn == INVALID_M2P_ENTRY )
1689 orphans_i++;
1690 //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
1691 // mfn);
1692 continue;
1695 if ( gfn == 0x55555555 )
1697 orphans_d++;
1698 //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
1699 // mfn);
1700 continue;
1703 p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query);
1704 if ( mfn_x(p2mfn) != mfn )
1706 mpbad++;
1707 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
1708 " (-> gfn %#lx)\n",
1709 mfn, gfn, mfn_x(p2mfn),
1710 (mfn_valid(p2mfn)
1711 ? get_gpfn_from_mfn(mfn_x(p2mfn))
1712 : -1u));
1713 /* This m2p entry is stale: the domain has another frame in
1714 * this physical slot. No great disaster, but for neatness,
1715 * blow away the m2p entry. */
1716 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1719 if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
1721 lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type));
1722 if ( lp2mfn != mfn_x(p2mfn) )
1724 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
1725 "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
1729 // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
1730 // mfn, gfn, p2mfn, lp2mfn);
1733 spin_unlock(&d->page_alloc_lock);
1735 /* Audit part two: walk the domain's p2m table, checking the entries. */
1736 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1738 l2_pgentry_t *l2e;
1739 l1_pgentry_t *l1e;
1740 int i1, i2;
1742 #if CONFIG_PAGING_LEVELS == 4
1743 l4_pgentry_t *l4e;
1744 l3_pgentry_t *l3e;
1745 int i3, i4;
1746 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1747 #else /* CONFIG_PAGING_LEVELS == 3 */
1748 l3_pgentry_t *l3e;
1749 int i3;
1750 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1751 #endif
1753 gfn = 0;
1754 #if CONFIG_PAGING_LEVELS >= 4
1755 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1757 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1759 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1760 continue;
1762 l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
1763 #endif
1764 for ( i3 = 0;
1765 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1766 i3++ )
1768 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1770 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1771 continue;
1773 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
1774 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1776 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1778 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1779 && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1780 == p2m_populate_on_demand ) )
1781 entry_count+=SUPERPAGE_PAGES;
1782 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1783 continue;
1786 /* check for super page */
1787 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1789 mfn = l2e_get_pfn(l2e[i2]);
1790 ASSERT(mfn_valid(_mfn(mfn)));
1791 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1793 m2pfn = get_gpfn_from_mfn(mfn+i1);
1794 if ( m2pfn != (gfn + i1) )
1796 pmbad++;
1797 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1798 " -> gfn %#lx\n", gfn+i1, mfn+i1,
1799 m2pfn);
1800 BUG();
1803 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1804 continue;
1807 l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
1809 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1811 p2m_type_t type;
1813 type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1814 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1816 if ( type == p2m_populate_on_demand )
1817 entry_count++;
1818 continue;
1820 mfn = l1e_get_pfn(l1e[i1]);
1821 ASSERT(mfn_valid(_mfn(mfn)));
1822 m2pfn = get_gpfn_from_mfn(mfn);
1823 if ( m2pfn != gfn &&
1824 type != p2m_mmio_direct &&
1825 !p2m_is_grant(type) )
1827 pmbad++;
1828 printk("mismatch: gfn %#lx -> mfn %#lx"
1829 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1830 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1831 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1832 BUG();
1835 unmap_domain_page(l1e);
1837 unmap_domain_page(l2e);
1839 #if CONFIG_PAGING_LEVELS >= 4
1840 unmap_domain_page(l3e);
1842 #endif
1844 #if CONFIG_PAGING_LEVELS == 4
1845 unmap_domain_page(l4e);
1846 #else /* CONFIG_PAGING_LEVELS == 3 */
1847 unmap_domain_page(l3e);
1848 #endif
1852 if ( entry_count != d->arch.p2m->pod.entry_count )
1854 printk("%s: refcounted entry count %d, audit count %d!\n",
1855 __func__,
1856 d->arch.p2m->pod.entry_count,
1857 entry_count);
1858 BUG();
1861 //P2M_PRINTK("p2m audit complete\n");
1862 //if ( orphans_i | orphans_d | mpbad | pmbad )
1863 // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
1864 // orphans_i + orphans_d, orphans_i, orphans_d,
1865 if ( mpbad | pmbad )
1866 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
1867 pmbad, mpbad);
1869 #endif /* P2M_AUDIT */
1873 static void
1874 p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
1875 unsigned int page_order)
1877 unsigned long i;
1878 mfn_t mfn_return;
1879 p2m_type_t t;
1881 if ( !paging_mode_translate(d) )
1883 if ( need_iommu(d) )
1884 for ( i = 0; i < (1 << page_order); i++ )
1885 iommu_unmap_page(d, mfn + i);
1886 return;
1889 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
1891 for ( i = 0; i < (1UL << page_order); i++ )
1893 mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query);
1894 if ( !p2m_is_grant(t) )
1895 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
1896 ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
1898 set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
1901 void
1902 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
1903 unsigned long mfn, unsigned int page_order)
1905 p2m_lock(d->arch.p2m);
1906 audit_p2m(d);
1907 p2m_remove_page(d, gfn, mfn, page_order);
1908 audit_p2m(d);
1909 p2m_unlock(d->arch.p2m);
1912 #if CONFIG_PAGING_LEVELS == 3
1913 static int gfn_check_limit(
1914 struct domain *d, unsigned long gfn, unsigned int order)
1916 /*
1917 * 32bit AMD nested paging does not support over 4GB guest due to
1918 * hardware translation limit. This limitation is checked by comparing
1919 * gfn with 0xfffffUL.
1920 */
1921 if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
1922 (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
1923 return 0;
1925 if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
1926 dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
1927 " 4GB: specify 'hap=0' domain config option.\n",
1928 d->domain_id);
1930 return -EINVAL;
1932 #else
1933 #define gfn_check_limit(d, g, o) 0
1934 #endif
1936 int
1937 guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
1938 unsigned int order)
1940 struct p2m_domain *p2md = d->arch.p2m;
1941 unsigned long i;
1942 p2m_type_t ot;
1943 mfn_t omfn;
1944 int pod_count = 0;
1945 int rc = 0;
1947 BUG_ON(!paging_mode_translate(d));
1949 rc = gfn_check_limit(d, gfn, order);
1950 if ( rc != 0 )
1951 return rc;
1953 p2m_lock(p2md);
1954 audit_p2m(d);
1956 P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
1958 /* Make sure all gpfns are unused */
1959 for ( i = 0; i < (1UL << order); i++ )
1961 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
1962 if ( p2m_is_ram(ot) )
1964 printk("%s: gfn_to_mfn returned type %d!\n",
1965 __func__, ot);
1966 rc = -EBUSY;
1967 goto out;
1969 else if ( ot == p2m_populate_on_demand )
1971 /* Count how man PoD entries we'll be replacing if successful */
1972 pod_count++;
1976 /* Now, actually do the two-way mapping */
1977 if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
1978 p2m_populate_on_demand) )
1979 rc = -EINVAL;
1980 else
1982 p2md->pod.entry_count += 1 << order; /* Lock: p2m */
1983 p2md->pod.entry_count -= pod_count;
1984 BUG_ON(p2md->pod.entry_count < 0);
1987 audit_p2m(d);
1988 p2m_unlock(p2md);
1990 out:
1991 return rc;
1995 int
1996 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
1997 unsigned long mfn, unsigned int page_order,
1998 p2m_type_t t)
2000 unsigned long i, ogfn;
2001 p2m_type_t ot;
2002 mfn_t omfn;
2003 int pod_count = 0;
2004 int rc = 0;
2006 if ( !paging_mode_translate(d) )
2008 if ( need_iommu(d) && t == p2m_ram_rw )
2010 for ( i = 0; i < (1 << page_order); i++ )
2011 if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
2013 while ( i-- > 0 )
2014 iommu_unmap_page(d, mfn + i);
2015 return rc;
2018 return 0;
2021 rc = gfn_check_limit(d, gfn, page_order);
2022 if ( rc != 0 )
2023 return rc;
2025 p2m_lock(d->arch.p2m);
2026 audit_p2m(d);
2028 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
2030 /* First, remove m->p mappings for existing p->m mappings */
2031 for ( i = 0; i < (1UL << page_order); i++ )
2033 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
2034 if ( p2m_is_grant(ot) )
2036 /* Really shouldn't be unmapping grant maps this way */
2037 domain_crash(d);
2038 p2m_unlock(d->arch.p2m);
2039 return -EINVAL;
2041 else if ( p2m_is_ram(ot) )
2043 ASSERT(mfn_valid(omfn));
2044 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2046 else if ( ot == p2m_populate_on_demand )
2048 /* Count how man PoD entries we'll be replacing if successful */
2049 pod_count++;
2053 /* Then, look for m->p mappings for this range and deal with them */
2054 for ( i = 0; i < (1UL << page_order); i++ )
2056 if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d )
2057 continue;
2058 ogfn = mfn_to_gfn(d, _mfn(mfn+i));
2059 if (
2060 #ifdef __x86_64__
2061 (ogfn != 0x5555555555555555L)
2062 #else
2063 (ogfn != 0x55555555L)
2064 #endif
2065 && (ogfn != INVALID_M2P_ENTRY)
2066 && (ogfn != gfn + i) )
2068 /* This machine frame is already mapped at another physical
2069 * address */
2070 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
2071 mfn + i, ogfn, gfn + i);
2072 omfn = gfn_to_mfn_query(d, ogfn, &ot);
2073 /* If we get here, we know the local domain owns the page,
2074 so it can't have been grant mapped in. */
2075 BUG_ON( p2m_is_grant(ot) );
2076 if ( p2m_is_ram(ot) )
2078 ASSERT(mfn_valid(omfn));
2079 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
2080 ogfn , mfn_x(omfn));
2081 if ( mfn_x(omfn) == (mfn + i) )
2082 p2m_remove_page(d, ogfn, mfn + i, 0);
2087 /* Now, actually do the two-way mapping */
2088 if ( mfn_valid(_mfn(mfn)) )
2090 if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
2091 rc = -EINVAL;
2092 if ( !p2m_is_grant(t) )
2094 for ( i = 0; i < (1UL << page_order); i++ )
2095 set_gpfn_from_mfn(mfn+i, gfn+i);
2098 else
2100 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
2101 gfn, mfn);
2102 if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
2103 p2m_invalid) )
2104 rc = -EINVAL;
2105 else
2107 d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
2108 BUG_ON(d->arch.p2m->pod.entry_count < 0);
2112 audit_p2m(d);
2113 p2m_unlock(d->arch.p2m);
2115 return rc;
2118 /* Walk the whole p2m table, changing any entries of the old type
2119 * to the new type. This is used in hardware-assisted paging to
2120 * quickly enable or diable log-dirty tracking */
2121 void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
2123 unsigned long mfn, gfn, flags;
2124 l1_pgentry_t l1e_content;
2125 l1_pgentry_t *l1e;
2126 l2_pgentry_t *l2e;
2127 mfn_t l1mfn, l2mfn;
2128 int i1, i2;
2129 l3_pgentry_t *l3e;
2130 int i3;
2131 #if CONFIG_PAGING_LEVELS == 4
2132 l4_pgentry_t *l4e;
2133 int i4;
2134 #endif /* CONFIG_PAGING_LEVELS == 4 */
2136 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
2138 if ( !paging_mode_translate(d) )
2139 return;
2141 if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
2142 return;
2144 ASSERT(p2m_locked_by_me(d->arch.p2m));
2146 #if CONFIG_PAGING_LEVELS == 4
2147 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2148 #else /* CONFIG_PAGING_LEVELS == 3 */
2149 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2150 #endif
2152 #if CONFIG_PAGING_LEVELS >= 4
2153 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
2155 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
2157 continue;
2159 l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
2160 #endif
2161 for ( i3 = 0;
2162 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
2163 i3++ )
2165 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
2167 continue;
2169 l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
2170 l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
2171 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
2173 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
2175 continue;
2178 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
2180 flags = l2e_get_flags(l2e[i2]);
2181 if ( p2m_flags_to_type(flags) != ot )
2182 continue;
2183 mfn = l2e_get_pfn(l2e[i2]);
2184 gfn = get_gpfn_from_mfn(mfn);
2185 flags = p2m_type_to_flags(nt);
2186 l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
2187 paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
2188 l2mfn, l1e_content, 2);
2189 continue;
2192 l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
2193 l1e = map_domain_page(mfn_x(l1mfn));
2195 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
2197 flags = l1e_get_flags(l1e[i1]);
2198 if ( p2m_flags_to_type(flags) != ot )
2199 continue;
2200 mfn = l1e_get_pfn(l1e[i1]);
2201 gfn = get_gpfn_from_mfn(mfn);
2202 /* create a new 1le entry with the new type */
2203 flags = p2m_type_to_flags(nt);
2204 l1e_content = l1e_from_pfn(mfn, flags);
2205 paging_write_p2m_entry(d, gfn, &l1e[i1],
2206 l1mfn, l1e_content, 1);
2208 unmap_domain_page(l1e);
2210 unmap_domain_page(l2e);
2212 #if CONFIG_PAGING_LEVELS >= 4
2213 unmap_domain_page(l3e);
2215 #endif
2217 #if CONFIG_PAGING_LEVELS == 4
2218 unmap_domain_page(l4e);
2219 #else /* CONFIG_PAGING_LEVELS == 3 */
2220 unmap_domain_page(l3e);
2221 #endif
2225 /* Modify the p2m type of a single gfn from ot to nt, returning the
2226 * entry's previous type */
2227 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
2228 p2m_type_t ot, p2m_type_t nt)
2230 p2m_type_t pt;
2231 mfn_t mfn;
2233 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
2235 p2m_lock(d->arch.p2m);
2237 mfn = gfn_to_mfn(d, gfn, &pt);
2238 if ( pt == ot )
2239 set_p2m_entry(d, gfn, mfn, 0, nt);
2241 p2m_unlock(d->arch.p2m);
2243 return pt;
2246 int
2247 set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
2249 int rc = 0;
2250 p2m_type_t ot;
2251 mfn_t omfn;
2253 if ( !paging_mode_translate(d) )
2254 return 0;
2256 omfn = gfn_to_mfn_query(d, gfn, &ot);
2257 if ( p2m_is_grant(ot) )
2259 domain_crash(d);
2260 return 0;
2262 else if ( p2m_is_ram(ot) )
2264 ASSERT(mfn_valid(omfn));
2265 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2268 P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
2269 p2m_lock(d->arch.p2m);
2270 rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
2271 p2m_unlock(d->arch.p2m);
2272 if ( 0 == rc )
2273 gdprintk(XENLOG_ERR,
2274 "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
2275 gmfn_to_mfn(d, gfn));
2276 return rc;
2279 int
2280 clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
2282 int rc = 0;
2283 unsigned long mfn;
2285 if ( !paging_mode_translate(d) )
2286 return 0;
2288 mfn = gmfn_to_mfn(d, gfn);
2289 if ( INVALID_MFN == mfn )
2291 gdprintk(XENLOG_ERR,
2292 "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
2293 return 0;
2295 p2m_lock(d->arch.p2m);
2296 rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
2297 p2m_unlock(d->arch.p2m);
2299 return rc;
2302 /*
2303 * Local variables:
2304 * mode: C
2305 * c-set-style: "BSD"
2306 * c-basic-offset: 4
2307 * indent-tabs-mode: nil
2308 * End:
2309 */