debuggers.hg

view xen/arch/x86/mm/p2m.c @ 20651:8f304c003af4

x86-32/pod: fix map_domain_page() leak

The 'continue' in the if() part of the conditional at the end of
p2m_pod_zero_check() was causing this, but there also really is no
point in retaining the mapping after having checked page contents,
so fix it both ways. Additionally there is no point in updating
map[] at this point anymore.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Dec 09 10:59:31 2009 +0000 (2009-12-09)
parents c4e620a2e65c
children 295e77eed8c9
line source
1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
7 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
26 #include <asm/domain.h>
27 #include <asm/page.h>
28 #include <asm/paging.h>
29 #include <asm/p2m.h>
30 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
31 #include <xen/iommu.h>
33 /* Debugging and auditing of the P2M code? */
34 #define P2M_AUDIT 0
35 #define P2M_DEBUGGING 0
37 /* Printouts */
38 #define P2M_PRINTK(_f, _a...) \
39 debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
40 #define P2M_ERROR(_f, _a...) \
41 printk("pg error: %s(): " _f, __func__, ##_a)
42 #if P2M_DEBUGGING
43 #define P2M_DEBUG(_f, _a...) \
44 debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
45 #else
46 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
47 #endif
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef mfn_to_page
52 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53 #undef mfn_valid
54 #define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
55 #undef page_to_mfn
56 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
59 /* PTE flags for the various types of p2m entry */
60 #define P2M_BASE_FLAGS \
61 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
63 #define SUPERPAGE_PAGES (1UL << 9)
64 #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
66 static unsigned long p2m_type_to_flags(p2m_type_t t)
67 {
68 unsigned long flags;
69 #ifdef __x86_64__
70 flags = (unsigned long)(t & 0x3fff) << 9;
71 #else
72 flags = (t & 0x7UL) << 9;
73 #endif
74 #ifndef HAVE_GRANT_MAP_P2M
75 BUG_ON(p2m_is_grant(t));
76 #endif
77 switch(t)
78 {
79 case p2m_invalid:
80 default:
81 return flags;
82 case p2m_ram_rw:
83 case p2m_grant_map_rw:
84 return flags | P2M_BASE_FLAGS | _PAGE_RW;
85 case p2m_ram_logdirty:
86 return flags | P2M_BASE_FLAGS;
87 case p2m_ram_ro:
88 case p2m_grant_map_ro:
89 return flags | P2M_BASE_FLAGS;
90 case p2m_mmio_dm:
91 return flags;
92 case p2m_mmio_direct:
93 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
94 case p2m_populate_on_demand:
95 return flags;
96 }
97 }
99 #if P2M_AUDIT
100 static void audit_p2m(struct domain *d);
101 #else
102 # define audit_p2m(_d) do { (void)(_d); } while(0)
103 #endif /* P2M_AUDIT */
105 // Find the next level's P2M entry, checking for out-of-range gfn's...
106 // Returns NULL on error.
107 //
108 static l1_pgentry_t *
109 p2m_find_entry(void *table, unsigned long *gfn_remainder,
110 unsigned long gfn, u32 shift, u32 max)
111 {
112 u32 index;
114 index = *gfn_remainder >> shift;
115 if ( index >= max )
116 {
117 P2M_DEBUG("gfn=0x%lx out of range "
118 "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
119 gfn, *gfn_remainder, shift, index, max);
120 return NULL;
121 }
122 *gfn_remainder &= (1 << shift) - 1;
123 return (l1_pgentry_t *)table + index;
124 }
126 // Walk one level of the P2M table, allocating a new table if required.
127 // Returns 0 on error.
128 //
129 static int
130 p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
131 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
132 u32 max, unsigned long type)
133 {
134 l1_pgentry_t *l1_entry;
135 l1_pgentry_t *p2m_entry;
136 l1_pgentry_t new_entry;
137 void *next;
138 int i;
139 ASSERT(d->arch.p2m->alloc_page);
141 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
142 shift, max)) )
143 return 0;
145 /* PoD: Not present doesn't imply empty. */
146 if ( !l1e_get_flags(*p2m_entry) )
147 {
148 struct page_info *pg = d->arch.p2m->alloc_page(d);
149 if ( pg == NULL )
150 return 0;
151 page_list_add_tail(pg, &d->arch.p2m->pages);
152 pg->u.inuse.type_info = type | 1 | PGT_validated;
153 pg->count_info |= 1;
155 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
156 __PAGE_HYPERVISOR|_PAGE_USER);
158 switch ( type ) {
159 case PGT_l3_page_table:
160 paging_write_p2m_entry(d, gfn,
161 p2m_entry, *table_mfn, new_entry, 4);
162 break;
163 case PGT_l2_page_table:
164 #if CONFIG_PAGING_LEVELS == 3
165 /* for PAE mode, PDPE only has PCD/PWT/P bits available */
166 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
167 #endif
168 paging_write_p2m_entry(d, gfn,
169 p2m_entry, *table_mfn, new_entry, 3);
170 break;
171 case PGT_l1_page_table:
172 paging_write_p2m_entry(d, gfn,
173 p2m_entry, *table_mfn, new_entry, 2);
174 break;
175 default:
176 BUG();
177 break;
178 }
179 }
181 ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
183 /* split single large page into 4KB page in P2M table */
184 if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
185 {
186 unsigned long flags, pfn;
187 struct page_info *pg = d->arch.p2m->alloc_page(d);
188 if ( pg == NULL )
189 return 0;
190 page_list_add_tail(pg, &d->arch.p2m->pages);
191 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
192 pg->count_info |= 1;
194 /* New splintered mappings inherit the flags of the old superpage,
195 * with a little reorganisation for the _PAGE_PSE_PAT bit. */
196 flags = l1e_get_flags(*p2m_entry);
197 pfn = l1e_get_pfn(*p2m_entry);
198 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
199 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
200 else
201 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
203 l1_entry = __map_domain_page(pg);
204 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
205 {
206 new_entry = l1e_from_pfn(pfn + i, flags);
207 paging_write_p2m_entry(d, gfn,
208 l1_entry+i, *table_mfn, new_entry, 1);
209 }
210 unmap_domain_page(l1_entry);
212 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
213 __PAGE_HYPERVISOR|_PAGE_USER);
214 paging_write_p2m_entry(d, gfn,
215 p2m_entry, *table_mfn, new_entry, 2);
216 }
218 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
219 next = map_domain_page(mfn_x(*table_mfn));
220 unmap_domain_page(*table);
221 *table = next;
223 return 1;
224 }
226 /*
227 * Populate-on-demand functionality
228 */
229 static
230 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
231 unsigned int page_order, p2m_type_t p2mt);
233 static int
234 p2m_pod_cache_add(struct domain *d,
235 struct page_info *page,
236 unsigned long order)
237 {
238 int i;
239 struct page_info *p;
240 struct p2m_domain *p2md = d->arch.p2m;
242 #ifndef NDEBUG
243 mfn_t mfn;
245 mfn = page_to_mfn(page);
247 /* Check to make sure this is a contiguous region */
248 if( mfn_x(mfn) & ((1 << order) - 1) )
249 {
250 printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
251 __func__, mfn_x(mfn), order, ((1UL << order) - 1));
252 return -1;
253 }
255 for(i=0; i < 1 << order ; i++) {
256 struct domain * od;
258 p = mfn_to_page(_mfn(mfn_x(mfn) + i));
259 od = page_get_owner(p);
260 if(od != d)
261 {
262 printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
263 __func__, mfn_x(mfn), d->domain_id,
264 od?od->domain_id:-1);
265 return -1;
266 }
267 }
268 #endif
270 /*
271 * Pages from domain_alloc and returned by the balloon driver aren't
272 * guaranteed to be zero; but by reclaiming zero pages, we implicitly
273 * promise to provide zero pages. So we scrub pages before using.
274 */
275 for ( i = 0; i < (1 << order); i++ )
276 {
277 char *b = map_domain_page(mfn_x(page_to_mfn(page)) + i);
278 clear_page(b);
279 unmap_domain_page(b);
280 }
282 spin_lock(&d->page_alloc_lock);
284 /* First, take all pages off the domain list */
285 for(i=0; i < 1 << order ; i++)
286 {
287 p = page + i;
288 page_list_del(p, &d->page_list);
289 }
291 /* Then add the first one to the appropriate populate-on-demand list */
292 switch(order)
293 {
294 case 9:
295 page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
296 p2md->pod.count += 1 << order;
297 break;
298 case 0:
299 page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
300 p2md->pod.count += 1 ;
301 break;
302 default:
303 BUG();
304 }
306 BUG_ON(d->is_dying);
308 spin_unlock(&d->page_alloc_lock);
310 return 0;
311 }
313 /* Get a page of size order from the populate-on-demand cache. Will break
314 * down 2-meg pages into singleton pages automatically. Returns null if
315 * a superpage is requested and no superpages are available. Must be called
316 * with the d->page_lock held. */
317 static struct page_info * p2m_pod_cache_get(struct domain *d,
318 unsigned long order)
319 {
320 struct p2m_domain *p2md = d->arch.p2m;
321 struct page_info *p = NULL;
322 int i;
324 if ( order == 9 && page_list_empty(&p2md->pod.super) )
325 {
326 return NULL;
327 }
328 else if ( order == 0 && page_list_empty(&p2md->pod.single) )
329 {
330 unsigned long mfn;
331 struct page_info *q;
333 BUG_ON( page_list_empty(&p2md->pod.super) );
335 /* Break up a superpage to make single pages. NB count doesn't
336 * need to be adjusted. */
337 printk("%s: Breaking up superpage.\n", __func__);
338 p = page_list_remove_head(&p2md->pod.super);
339 mfn = mfn_x(page_to_mfn(p));
341 for ( i=0; i<SUPERPAGE_PAGES; i++ )
342 {
343 q = mfn_to_page(_mfn(mfn+i));
344 page_list_add_tail(q, &p2md->pod.single);
345 }
346 }
348 switch ( order )
349 {
350 case 9:
351 BUG_ON( page_list_empty(&p2md->pod.super) );
352 p = page_list_remove_head(&p2md->pod.super);
353 p2md->pod.count -= 1 << order; /* Lock: page_alloc */
354 break;
355 case 0:
356 BUG_ON( page_list_empty(&p2md->pod.single) );
357 p = page_list_remove_head(&p2md->pod.single);
358 p2md->pod.count -= 1;
359 break;
360 default:
361 BUG();
362 }
364 /* Put the pages back on the domain page_list */
365 for ( i = 0 ; i < (1 << order) ; i++ )
366 {
367 BUG_ON(page_get_owner(p + i) != d);
368 page_list_add_tail(p + i, &d->page_list);
369 }
371 return p;
372 }
374 /* Set the size of the cache, allocating or freeing as necessary. */
375 static int
376 p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
377 {
378 struct p2m_domain *p2md = d->arch.p2m;
379 int ret = 0;
381 /* Increasing the target */
382 while ( pod_target > p2md->pod.count )
383 {
384 struct page_info * page;
385 int order;
387 if ( (pod_target - p2md->pod.count) >= SUPERPAGE_PAGES )
388 order = 9;
389 else
390 order = 0;
391 retry:
392 page = alloc_domheap_pages(d, order, 0);
393 if ( unlikely(page == NULL) )
394 {
395 if ( order == 9 )
396 {
397 /* If we can't allocate a superpage, try singleton pages */
398 order = 0;
399 goto retry;
400 }
402 printk("%s: Unable to allocate domheap page for pod cache. target %lu cachesize %d\n",
403 __func__, pod_target, p2md->pod.count);
404 ret = -ENOMEM;
405 goto out;
406 }
408 p2m_pod_cache_add(d, page, order);
409 }
411 /* Decreasing the target */
412 /* We hold the p2m lock here, so we don't need to worry about
413 * cache disappearing under our feet. */
414 while ( pod_target < p2md->pod.count )
415 {
416 struct page_info * page;
417 int order, i;
419 /* Grab the lock before checking that pod.super is empty, or the last
420 * entries may disappear before we grab the lock. */
421 spin_lock(&d->page_alloc_lock);
423 if ( (p2md->pod.count - pod_target) > SUPERPAGE_PAGES
424 && !page_list_empty(&p2md->pod.super) )
425 order = 9;
426 else
427 order = 0;
429 page = p2m_pod_cache_get(d, order);
431 ASSERT(page != NULL);
433 spin_unlock(&d->page_alloc_lock);
435 /* Then free them */
436 for ( i = 0 ; i < (1 << order) ; i++ )
437 {
438 /* Copied from common/memory.c:guest_remove_page() */
439 if ( unlikely(!get_page(page+i, d)) )
440 {
441 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
442 ret = -EINVAL;
443 goto out;
444 }
446 if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
447 put_page_and_type(page+i);
449 if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
450 put_page(page+i);
452 put_page(page+i);
453 }
454 }
456 out:
457 return ret;
458 }
460 /*
461 * The "right behavior" here requires some careful thought. First, some
462 * definitions:
463 * + M: static_max
464 * + B: number of pages the balloon driver has ballooned down to.
465 * + P: Number of populated pages.
466 * + T: Old target
467 * + T': New target
468 *
469 * The following equations should hold:
470 * 0 <= P <= T <= B <= M
471 * d->arch.p2m->pod.entry_count == B - P
472 * d->tot_pages == P + d->arch.p2m->pod.count
473 *
474 * Now we have the following potential cases to cover:
475 * B <T': Set the PoD cache size equal to the number of outstanding PoD
476 * entries. The balloon driver will deflate the balloon to give back
477 * the remainder of the ram to the guest OS.
478 * T <T'<B : Increase PoD cache size.
479 * T'<T<=B : Here we have a choice. We can decrease the size of the cache,
480 * get the memory right away. However, that means every time we
481 * reduce the memory target we risk the guest attempting to populate the
482 * memory before the balloon driver has reached its new target. Safer to
483 * never reduce the cache size here, but only when the balloon driver frees
484 * PoD ranges.
485 *
486 * If there are many zero pages, we could reach the target also by doing
487 * zero sweeps and marking the ranges PoD; but the balloon driver will have
488 * to free this memory eventually anyway, so we don't actually gain that much
489 * by doing so.
490 *
491 * NB that the equation (B<T') may require adjustment to the cache
492 * size as PoD pages are freed as well; i.e., freeing a PoD-backed
493 * entry when pod.entry_count == pod.count requires us to reduce both
494 * pod.entry_count and pod.count.
495 */
496 int
497 p2m_pod_set_mem_target(struct domain *d, unsigned long target)
498 {
499 unsigned pod_target;
500 struct p2m_domain *p2md = d->arch.p2m;
501 int ret = 0;
502 unsigned long populated;
504 /* P == B: Nothing to do. */
505 if ( p2md->pod.entry_count == 0 )
506 goto out;
508 /* Don't do anything if the domain is being torn down */
509 if ( d->is_dying )
510 goto out;
512 /* T' < B: Don't reduce the cache size; let the balloon driver
513 * take care of it. */
514 if ( target < d->tot_pages )
515 goto out;
517 populated = d->tot_pages - p2md->pod.count;
519 pod_target = target - populated;
521 /* B < T': Set the cache size equal to # of outstanding entries,
522 * let the balloon driver fill in the rest. */
523 if ( pod_target > p2md->pod.entry_count )
524 pod_target = p2md->pod.entry_count;
526 ASSERT( pod_target > p2md->pod.count );
528 ret = p2m_pod_set_cache_target(d, pod_target);
530 out:
531 return ret;
532 }
534 void
535 p2m_pod_empty_cache(struct domain *d)
536 {
537 struct p2m_domain *p2md = d->arch.p2m;
538 struct page_info *page;
540 spin_lock(&d->page_alloc_lock);
542 while ( (page = page_list_remove_head(&p2md->pod.super)) )
543 {
544 int i;
546 for ( i = 0 ; i < SUPERPAGE_PAGES ; i++ )
547 {
548 BUG_ON(page_get_owner(page + i) != d);
549 page_list_add_tail(page + i, &d->page_list);
550 }
552 p2md->pod.count -= SUPERPAGE_PAGES;
553 }
555 while ( (page = page_list_remove_head(&p2md->pod.single)) )
556 {
557 BUG_ON(page_get_owner(page) != d);
558 page_list_add_tail(page, &d->page_list);
560 p2md->pod.count -= 1;
561 }
563 BUG_ON(p2md->pod.count != 0);
565 spin_unlock(&d->page_alloc_lock);
566 }
568 /* This function is needed for two reasons:
569 * + To properly handle clearing of PoD entries
570 * + To "steal back" memory being freed for the PoD cache, rather than
571 * releasing it.
572 *
573 * Once both of these functions have been completed, we can return and
574 * allow decrease_reservation() to handle everything else.
575 */
576 int
577 p2m_pod_decrease_reservation(struct domain *d,
578 xen_pfn_t gpfn,
579 unsigned int order)
580 {
581 struct p2m_domain *p2md = d->arch.p2m;
582 int ret=0;
583 int i;
585 int steal_for_cache = 0;
586 int pod = 0, nonpod = 0, ram = 0;
589 /* If we don't have any outstanding PoD entries, let things take their
590 * course */
591 if ( p2md->pod.entry_count == 0 || unlikely(d->is_dying) )
592 goto out;
594 /* Figure out if we need to steal some freed memory for our cache */
595 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
597 p2m_lock(p2md);
598 audit_p2m(d);
600 /* See what's in here. */
601 /* FIXME: Add contiguous; query for PSE entries? */
602 for ( i=0; i<(1<<order); i++)
603 {
604 p2m_type_t t;
606 gfn_to_mfn_query(d, gpfn + i, &t);
608 if ( t == p2m_populate_on_demand )
609 pod++;
610 else
611 {
612 nonpod++;
613 if ( p2m_is_ram(t) )
614 ram++;
615 }
616 }
618 /* No populate-on-demand? Don't need to steal anything? Then we're done!*/
619 if(!pod && !steal_for_cache)
620 goto out_unlock;
622 if ( !nonpod )
623 {
624 /* All PoD: Mark the whole region invalid and tell caller
625 * we're done. */
626 set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid);
627 p2md->pod.entry_count-=(1<<order); /* Lock: p2m */
628 BUG_ON(p2md->pod.entry_count < 0);
629 ret = 1;
630 goto out_entry_check;
631 }
633 /* FIXME: Steal contig 2-meg regions for cache */
635 /* Process as long as:
636 * + There are PoD entries to handle, or
637 * + There is ram left, and we want to steal it
638 */
639 for ( i=0;
640 i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
641 i++)
642 {
643 mfn_t mfn;
644 p2m_type_t t;
646 mfn = gfn_to_mfn_query(d, gpfn + i, &t);
647 if ( t == p2m_populate_on_demand )
648 {
649 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
650 p2md->pod.entry_count--; /* Lock: p2m */
651 BUG_ON(p2md->pod.entry_count < 0);
652 pod--;
653 }
654 else if ( steal_for_cache && p2m_is_ram(t) )
655 {
656 struct page_info *page;
658 ASSERT(mfn_valid(mfn));
660 page = mfn_to_page(mfn);
662 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
663 set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
665 p2m_pod_cache_add(d, page, 0);
667 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
669 nonpod--;
670 ram--;
671 }
672 }
674 /* If there are no more non-PoD entries, tell decrease_reservation() that
675 * there's nothing left to do. */
676 if ( nonpod == 0 )
677 ret = 1;
679 out_entry_check:
680 /* If we've reduced our "liabilities" beyond our "assets", free some */
681 if ( p2md->pod.entry_count < p2md->pod.count )
682 {
683 printk("b %d\n", p2md->pod.entry_count);
684 p2m_pod_set_cache_target(d, p2md->pod.entry_count);
685 }
687 out_unlock:
688 audit_p2m(d);
689 p2m_unlock(p2md);
691 out:
692 return ret;
693 }
695 void
696 p2m_pod_dump_data(struct domain *d)
697 {
698 struct p2m_domain *p2md = d->arch.p2m;
700 printk(" PoD entries=%d cachesize=%d\n",
701 p2md->pod.entry_count, p2md->pod.count);
702 }
705 /* Search for all-zero superpages to be reclaimed as superpages for the
706 * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
707 static int
708 p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
709 {
710 mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
711 p2m_type_t type, type0 = 0;
712 unsigned long * map = NULL;
713 int ret=0, reset = 0;
714 int i, j;
715 int max_ref = 1;
717 if ( !superpage_aligned(gfn) )
718 goto out;
720 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
721 if ( paging_mode_shadow(d) )
722 max_ref++;
724 /* Look up the mfns, checking to make sure they're the same mfn
725 * and aligned, and mapping them. */
726 for ( i=0; i<SUPERPAGE_PAGES; i++ )
727 {
729 mfn = gfn_to_mfn_query(d, gfn + i, &type);
731 if ( i == 0 )
732 {
733 mfn0 = mfn;
734 type0 = type;
735 }
737 /* Conditions that must be met for superpage-superpage:
738 * + All gfns are ram types
739 * + All gfns have the same type
740 * + All of the mfns are allocated to a domain
741 * + None of the mfns are used as pagetables, or allocated via xenheap
742 * + The first mfn is 2-meg aligned
743 * + All the other mfns are in sequence
744 * Adding for good measure:
745 * + None of the mfns are likely to be mapped elsewhere (refcount
746 * 2 or less for shadow, 1 for hap)
747 */
748 if ( !p2m_is_ram(type)
749 || type != type0
750 || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
751 || ( (mfn_to_page(mfn)->count_info & (PGC_page_table|PGC_xen_heap)) != 0 )
752 || ( (mfn_to_page(mfn)->count_info & PGC_xen_heap ) != 0 )
753 || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
754 || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
755 || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
756 goto out;
757 }
759 /* Now, do a quick check to see if it may be zero before unmapping. */
760 for ( i=0; i<SUPERPAGE_PAGES; i++ )
761 {
762 /* Quick zero-check */
763 map = map_domain_page(mfn_x(mfn0) + i);
765 for ( j=0; j<16; j++ )
766 if( *(map+j) != 0 )
767 break;
769 unmap_domain_page(map);
771 if ( j < 16 )
772 goto out;
774 }
776 /* Try to remove the page, restoring old mapping if it fails. */
777 set_p2m_entry(d, gfn,
778 _mfn(POPULATE_ON_DEMAND_MFN), 9,
779 p2m_populate_on_demand);
781 /* Make none of the MFNs are used elsewhere... for example, mapped
782 * via the grant table interface, or by qemu. Allow one refcount for
783 * being allocated to the domain. */
784 for ( i=0; i < SUPERPAGE_PAGES; i++ )
785 {
786 mfn = _mfn(mfn_x(mfn0) + i);
787 if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
788 {
789 reset = 1;
790 goto out_reset;
791 }
792 }
794 /* Finally, do a full zero-check */
795 for ( i=0; i < SUPERPAGE_PAGES; i++ )
796 {
797 map = map_domain_page(mfn_x(mfn0) + i);
799 for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
800 if( *(map+j) != 0 )
801 {
802 reset = 1;
803 break;
804 }
806 unmap_domain_page(map);
808 if ( reset )
809 goto out_reset;
810 }
812 /* Finally! We've passed all the checks, and can add the mfn superpage
813 * back on the PoD cache, and account for the new p2m PoD entries */
814 p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
815 d->arch.p2m->pod.entry_count += SUPERPAGE_PAGES;
817 out_reset:
818 if ( reset )
819 set_p2m_entry(d, gfn, mfn0, 9, type0);
821 out:
822 return ret;
823 }
825 static void
826 p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
827 {
828 mfn_t mfns[count];
829 p2m_type_t types[count];
830 unsigned long * map[count];
832 int i, j;
833 int max_ref = 1;
835 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
836 if ( paging_mode_shadow(d) )
837 max_ref++;
839 /* First, get the gfn list, translate to mfns, and map the pages. */
840 for ( i=0; i<count; i++ )
841 {
842 mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
843 /* If this is ram, and not a pagetable or from the xen heap, and probably not mapped
844 elsewhere, map it; otherwise, skip. */
845 if ( p2m_is_ram(types[i])
846 && ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
847 && ( (mfn_to_page(mfns[i])->count_info & (PGC_page_table|PGC_xen_heap)) == 0 )
848 && ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
849 map[i] = map_domain_page(mfn_x(mfns[i]));
850 else
851 map[i] = NULL;
852 }
854 /* Then, go through and check for zeroed pages, removing write permission
855 * for those with zeroes. */
856 for ( i=0; i<count; i++ )
857 {
858 if(!map[i])
859 continue;
861 /* Quick zero-check */
862 for ( j=0; j<16; j++ )
863 if( *(map[i]+j) != 0 )
864 break;
866 if ( j < 16 )
867 {
868 unmap_domain_page(map[i]);
869 map[i] = NULL;
870 continue;
871 }
873 /* Try to remove the page, restoring old mapping if it fails. */
874 set_p2m_entry(d, gfns[i],
875 _mfn(POPULATE_ON_DEMAND_MFN), 0,
876 p2m_populate_on_demand);
878 /* See if the page was successfully unmapped. (Allow one refcount
879 * for being allocated to a domain.) */
880 if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
881 {
882 unmap_domain_page(map[i]);
883 map[i] = NULL;
885 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
887 continue;
888 }
889 }
891 /* Now check each page for real */
892 for ( i=0; i < count; i++ )
893 {
894 if(!map[i])
895 continue;
897 for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
898 if( *(map[i]+j) != 0 )
899 break;
901 unmap_domain_page(map[i]);
903 /* See comment in p2m_pod_zero_check_superpage() re gnttab
904 * check timing. */
905 if ( j < PAGE_SIZE/sizeof(*map[i]) )
906 {
907 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
908 }
909 else
910 {
911 /* Add to cache, and account for the new p2m PoD entry */
912 p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0);
913 d->arch.p2m->pod.entry_count++;
914 }
915 }
917 }
919 #define POD_SWEEP_LIMIT 1024
920 static void
921 p2m_pod_emergency_sweep_super(struct domain *d)
922 {
923 struct p2m_domain *p2md = d->arch.p2m;
924 unsigned long i, start, limit;
926 if ( p2md->pod.reclaim_super == 0 )
927 {
928 p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9;
929 p2md->pod.reclaim_super -= SUPERPAGE_PAGES;
930 }
932 start = p2md->pod.reclaim_super;
933 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
935 for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=SUPERPAGE_PAGES )
936 {
937 p2m_pod_zero_check_superpage(d, i);
938 /* Stop if we're past our limit and we have found *something*.
939 *
940 * NB that this is a zero-sum game; we're increasing our cache size
941 * by increasing our 'debt'. Since we hold the p2m lock,
942 * (entry_count - count) must remain the same. */
943 if ( !page_list_empty(&p2md->pod.super) && i < limit )
944 break;
945 }
947 p2md->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0;
949 }
951 #define POD_SWEEP_STRIDE 16
952 static void
953 p2m_pod_emergency_sweep(struct domain *d)
954 {
955 struct p2m_domain *p2md = d->arch.p2m;
956 unsigned long gfns[POD_SWEEP_STRIDE];
957 unsigned long i, j=0, start, limit;
958 p2m_type_t t;
961 if ( p2md->pod.reclaim_single == 0 )
962 p2md->pod.reclaim_single = p2md->pod.max_guest;
964 start = p2md->pod.reclaim_single;
965 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
967 /* FIXME: Figure out how to avoid superpages */
968 for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- )
969 {
970 gfn_to_mfn_query(d, i, &t );
971 if ( p2m_is_ram(t) )
972 {
973 gfns[j] = i;
974 j++;
975 BUG_ON(j > POD_SWEEP_STRIDE);
976 if ( j == POD_SWEEP_STRIDE )
977 {
978 p2m_pod_zero_check(d, gfns, j);
979 j = 0;
980 }
981 }
982 /* Stop if we're past our limit and we have found *something*.
983 *
984 * NB that this is a zero-sum game; we're increasing our cache size
985 * by re-increasing our 'debt'. Since we hold the p2m lock,
986 * (entry_count - count) must remain the same. */
987 if ( p2md->pod.count > 0 && i < limit )
988 break;
989 }
991 if ( j )
992 p2m_pod_zero_check(d, gfns, j);
994 p2md->pod.reclaim_single = i ? i - 1 : i;
996 }
998 int
999 p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
1000 unsigned int order,
1001 p2m_query_t q)
1003 struct page_info *p = NULL; /* Compiler warnings */
1004 unsigned long gfn_aligned;
1005 mfn_t mfn;
1006 struct p2m_domain *p2md = d->arch.p2m;
1007 int i;
1009 /* This check is done with the p2m lock held. This will make sure that
1010 * even if d->is_dying changes under our feet, empty_pod_cache() won't start
1011 * until we're done. */
1012 if ( unlikely(d->is_dying) )
1013 goto out_fail;
1015 /* If we're low, start a sweep */
1016 if ( order == 9 && page_list_empty(&p2md->pod.super) )
1017 p2m_pod_emergency_sweep_super(d);
1019 if ( page_list_empty(&p2md->pod.single) &&
1020 ( ( order == 0 )
1021 || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
1022 p2m_pod_emergency_sweep(d);
1024 /* Keep track of the highest gfn demand-populated by a guest fault */
1025 if ( q == p2m_guest && gfn > p2md->pod.max_guest )
1026 p2md->pod.max_guest = gfn;
1028 spin_lock(&d->page_alloc_lock);
1030 if ( p2md->pod.count == 0 )
1031 goto out_of_memory;
1033 /* Get a page f/ the cache. A NULL return value indicates that the
1034 * 2-meg range should be marked singleton PoD, and retried */
1035 if ( (p = p2m_pod_cache_get(d, order)) == NULL )
1036 goto remap_and_retry;
1038 mfn = page_to_mfn(p);
1040 BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
1042 spin_unlock(&d->page_alloc_lock);
1044 gfn_aligned = (gfn >> order) << order;
1046 set_p2m_entry(d, gfn_aligned, mfn, order, p2m_ram_rw);
1048 for( i = 0 ; i < (1UL << order) ; i++ )
1049 set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
1051 p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
1052 BUG_ON(p2md->pod.entry_count < 0);
1054 return 0;
1055 out_of_memory:
1056 spin_unlock(&d->page_alloc_lock);
1058 printk("%s: Out of populate-on-demand memory! tot_pages %" PRIu32 " pod_entries %" PRIi32 "\n",
1059 __func__, d->tot_pages, p2md->pod.entry_count);
1060 domain_crash(d);
1061 out_fail:
1062 return -1;
1063 remap_and_retry:
1064 BUG_ON(order != 9);
1065 spin_unlock(&d->page_alloc_lock);
1067 /* Remap this 2-meg region in singleton chunks */
1068 gfn_aligned = (gfn>>order)<<order;
1069 for(i=0; i<(1<<order); i++)
1070 set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
1071 p2m_populate_on_demand);
1073 return 0;
1076 /* Non-ept "lock-and-check" wrapper */
1077 static int p2m_pod_check_and_populate(struct domain *d, unsigned long gfn,
1078 l1_pgentry_t *p2m_entry, int order,
1079 p2m_query_t q)
1081 /* Only take the lock if we don't already have it. Otherwise it
1082 * wouldn't be safe to do p2m lookups with the p2m lock held */
1083 int do_locking = !p2m_locked_by_me(d->arch.p2m);
1084 int r;
1086 if ( do_locking )
1087 p2m_lock(d->arch.p2m);
1089 audit_p2m(d);
1091 /* Check to make sure this is still PoD */
1092 if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
1094 if ( do_locking )
1095 p2m_unlock(d->arch.p2m);
1096 return 0;
1099 r = p2m_pod_demand_populate(d, gfn, order, q);
1101 audit_p2m(d);
1102 if ( do_locking )
1103 p2m_unlock(d->arch.p2m);
1105 return r;
1108 // Returns 0 on error (out of memory)
1109 static int
1110 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1111 unsigned int page_order, p2m_type_t p2mt)
1113 // XXX -- this might be able to be faster iff current->domain == d
1114 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
1115 void *table =map_domain_page(mfn_x(table_mfn));
1116 unsigned long i, gfn_remainder = gfn;
1117 l1_pgentry_t *p2m_entry;
1118 l1_pgentry_t entry_content;
1119 l2_pgentry_t l2e_content;
1120 int rv=0;
1122 #if CONFIG_PAGING_LEVELS >= 4
1123 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1124 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
1125 L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
1126 goto out;
1127 #endif
1128 /*
1129 * When using PAE Xen, we only allow 33 bits of pseudo-physical
1130 * address in translated guests (i.e. 8 GBytes). This restriction
1131 * comes from wanting to map the P2M table into the 16MB RO_MPT hole
1132 * in Xen's address space for translated PV guests.
1133 * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
1134 */
1135 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1136 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
1137 ((CONFIG_PAGING_LEVELS == 3)
1138 ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
1139 : L3_PAGETABLE_ENTRIES),
1140 PGT_l2_page_table) )
1141 goto out;
1143 if ( page_order == 0 )
1145 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1146 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1147 L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
1148 goto out;
1150 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1151 0, L1_PAGETABLE_ENTRIES);
1152 ASSERT(p2m_entry);
1154 if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
1155 entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
1156 else
1157 entry_content = l1e_empty();
1159 /* level 1 entry */
1160 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
1162 else
1164 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1165 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1166 L2_PAGETABLE_ENTRIES);
1167 ASSERT(p2m_entry);
1169 /* FIXME: Deal with 4k replaced by 2meg pages */
1170 if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
1171 !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
1173 P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
1174 domain_crash(d);
1175 goto out;
1178 if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
1179 l2e_content = l2e_from_pfn(mfn_x(mfn),
1180 p2m_type_to_flags(p2mt) | _PAGE_PSE);
1181 else
1182 l2e_content = l2e_empty();
1184 entry_content.l1 = l2e_content.l2;
1185 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
1188 /* Track the highest gfn for which we have ever had a valid mapping */
1189 if ( mfn_valid(mfn)
1190 && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
1191 d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
1193 if ( iommu_enabled && need_iommu(d) )
1195 if ( p2mt == p2m_ram_rw )
1196 for ( i = 0; i < (1UL << page_order); i++ )
1197 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
1198 else
1199 for ( int i = 0; i < (1UL << page_order); i++ )
1200 iommu_unmap_page(d, gfn+i);
1203 /* Success */
1204 rv = 1;
1206 out:
1207 unmap_domain_page(table);
1208 return rv;
1211 static mfn_t
1212 p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t,
1213 p2m_query_t q)
1215 mfn_t mfn;
1216 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1217 l2_pgentry_t *l2e;
1218 l1_pgentry_t *l1e;
1220 ASSERT(paging_mode_translate(d));
1222 /* XXX This is for compatibility with the old model, where anything not
1223 * XXX marked as RAM was considered to be emulated MMIO space.
1224 * XXX Once we start explicitly registering MMIO regions in the p2m
1225 * XXX we will return p2m_invalid for unmapped gfns */
1226 *t = p2m_mmio_dm;
1228 mfn = pagetable_get_mfn(d->arch.phys_table);
1230 if ( gfn > d->arch.p2m->max_mapped_pfn )
1231 /* This pfn is higher than the highest the p2m map currently holds */
1232 return _mfn(INVALID_MFN);
1234 #if CONFIG_PAGING_LEVELS >= 4
1236 l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
1237 l4e += l4_table_offset(addr);
1238 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
1240 unmap_domain_page(l4e);
1241 return _mfn(INVALID_MFN);
1243 mfn = _mfn(l4e_get_pfn(*l4e));
1244 unmap_domain_page(l4e);
1246 #endif
1248 l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
1249 #if CONFIG_PAGING_LEVELS == 3
1250 /* On PAE hosts the p2m has eight l3 entries, not four (see
1251 * shadow_set_p2m_entry()) so we can't use l3_table_offset.
1252 * Instead, just count the number of l3es from zero. It's safe
1253 * to do this because we already checked that the gfn is within
1254 * the bounds of the p2m. */
1255 l3e += (addr >> L3_PAGETABLE_SHIFT);
1256 #else
1257 l3e += l3_table_offset(addr);
1258 #endif
1259 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
1261 unmap_domain_page(l3e);
1262 return _mfn(INVALID_MFN);
1264 mfn = _mfn(l3e_get_pfn(*l3e));
1265 unmap_domain_page(l3e);
1268 l2e = map_domain_page(mfn_x(mfn));
1269 l2e += l2_table_offset(addr);
1271 pod_retry_l2:
1272 if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
1274 /* PoD: Try to populate a 2-meg chunk */
1275 if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
1277 if ( q != p2m_query ) {
1278 if ( !p2m_pod_check_and_populate(d, gfn,
1279 (l1_pgentry_t *)l2e, 9, q) )
1280 goto pod_retry_l2;
1281 } else
1282 *t = p2m_populate_on_demand;
1285 unmap_domain_page(l2e);
1286 return _mfn(INVALID_MFN);
1288 else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
1290 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
1291 *t = p2m_flags_to_type(l2e_get_flags(*l2e));
1292 unmap_domain_page(l2e);
1294 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1295 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1298 mfn = _mfn(l2e_get_pfn(*l2e));
1299 unmap_domain_page(l2e);
1301 l1e = map_domain_page(mfn_x(mfn));
1302 l1e += l1_table_offset(addr);
1303 pod_retry_l1:
1304 if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
1306 /* PoD: Try to populate */
1307 if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
1309 if ( q != p2m_query ) {
1310 if ( !p2m_pod_check_and_populate(d, gfn,
1311 (l1_pgentry_t *)l1e, 0, q) )
1312 goto pod_retry_l1;
1313 } else
1314 *t = p2m_populate_on_demand;
1317 unmap_domain_page(l1e);
1318 return _mfn(INVALID_MFN);
1320 mfn = _mfn(l1e_get_pfn(*l1e));
1321 *t = p2m_flags_to_type(l1e_get_flags(*l1e));
1322 unmap_domain_page(l1e);
1324 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1325 return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN);
1328 /* Read the current domain's p2m table (through the linear mapping). */
1329 static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t,
1330 p2m_query_t q)
1332 mfn_t mfn = _mfn(INVALID_MFN);
1333 p2m_type_t p2mt = p2m_mmio_dm;
1334 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1335 /* XXX This is for compatibility with the old model, where anything not
1336 * XXX marked as RAM was considered to be emulated MMIO space.
1337 * XXX Once we start explicitly registering MMIO regions in the p2m
1338 * XXX we will return p2m_invalid for unmapped gfns */
1340 if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
1342 l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
1343 l2_pgentry_t l2e = l2e_empty();
1344 int ret;
1346 ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
1347 / sizeof(l1_pgentry_t));
1349 /*
1350 * Read & process L2
1351 */
1352 p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
1353 + l2_linear_offset(addr)];
1355 pod_retry_l2:
1356 ret = __copy_from_user(&l2e,
1357 p2m_entry,
1358 sizeof(l2e));
1359 if ( ret != 0
1360 || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1362 if( (l2e_get_flags(l2e) & _PAGE_PSE)
1363 && ( p2m_flags_to_type(l2e_get_flags(l2e))
1364 == p2m_populate_on_demand ) )
1366 /* The read has succeeded, so we know that the mapping
1367 * exits at this point. */
1368 if ( q != p2m_query )
1370 if ( !p2m_pod_check_and_populate(current->domain, gfn,
1371 p2m_entry, 9, q) )
1372 goto pod_retry_l2;
1374 /* Allocate failed. */
1375 p2mt = p2m_invalid;
1376 printk("%s: Allocate failed!\n", __func__);
1377 goto out;
1379 else
1381 p2mt = p2m_populate_on_demand;
1382 goto out;
1386 goto pod_retry_l1;
1389 if (l2e_get_flags(l2e) & _PAGE_PSE)
1391 p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
1392 ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
1394 if ( p2m_is_valid(p2mt) )
1395 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
1396 else
1397 p2mt = p2m_mmio_dm;
1399 goto out;
1402 /*
1403 * Read and process L1
1404 */
1406 /* Need to __copy_from_user because the p2m is sparse and this
1407 * part might not exist */
1408 pod_retry_l1:
1409 p2m_entry = &phys_to_machine_mapping[gfn];
1411 ret = __copy_from_user(&l1e,
1412 p2m_entry,
1413 sizeof(l1e));
1415 if ( ret == 0 ) {
1416 p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
1417 ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
1419 if ( p2m_flags_to_type(l1e_get_flags(l1e))
1420 == p2m_populate_on_demand )
1422 /* The read has succeeded, so we know that the mapping
1423 * exits at this point. */
1424 if ( q != p2m_query )
1426 if ( !p2m_pod_check_and_populate(current->domain, gfn,
1427 (l1_pgentry_t *)p2m_entry, 0, q) )
1428 goto pod_retry_l1;
1430 /* Allocate failed. */
1431 p2mt = p2m_invalid;
1432 goto out;
1434 else
1436 p2mt = p2m_populate_on_demand;
1437 goto out;
1441 if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
1442 mfn = _mfn(l1e_get_pfn(l1e));
1443 else
1444 /* XXX see above */
1445 p2mt = p2m_mmio_dm;
1448 out:
1449 *t = p2mt;
1450 return mfn;
1453 /* Init the datastructures for later use by the p2m code */
1454 int p2m_init(struct domain *d)
1456 struct p2m_domain *p2m;
1458 p2m = xmalloc(struct p2m_domain);
1459 if ( p2m == NULL )
1460 return -ENOMEM;
1462 d->arch.p2m = p2m;
1464 memset(p2m, 0, sizeof(*p2m));
1465 p2m_lock_init(p2m);
1466 INIT_PAGE_LIST_HEAD(&p2m->pages);
1467 INIT_PAGE_LIST_HEAD(&p2m->pod.super);
1468 INIT_PAGE_LIST_HEAD(&p2m->pod.single);
1470 p2m->set_entry = p2m_set_entry;
1471 p2m->get_entry = p2m_gfn_to_mfn;
1472 p2m->get_entry_current = p2m_gfn_to_mfn_current;
1473 p2m->change_entry_type_global = p2m_change_type_global;
1475 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
1476 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
1477 ept_p2m_init(d);
1479 return 0;
1482 void p2m_change_entry_type_global(struct domain *d,
1483 p2m_type_t ot, p2m_type_t nt)
1485 struct p2m_domain *p2m = d->arch.p2m;
1487 p2m_lock(p2m);
1488 p2m->change_entry_type_global(d, ot, nt);
1489 p2m_unlock(p2m);
1492 static
1493 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1494 unsigned int page_order, p2m_type_t p2mt)
1496 unsigned long todo = 1ul << page_order;
1497 unsigned int order;
1498 int rc = 1;
1500 while ( todo )
1502 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled )
1503 order = (((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) ?
1504 9 : 0;
1505 else
1506 order = 0;
1507 if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) )
1508 rc = 0;
1509 gfn += 1ul << order;
1510 if ( mfn_x(mfn) != INVALID_MFN )
1511 mfn = _mfn(mfn_x(mfn) + (1ul << order));
1512 todo -= 1ul << order;
1515 return rc;
1518 // Allocate a new p2m table for a domain.
1519 //
1520 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
1521 // controlled by CONFIG_PAGING_LEVELS).
1522 //
1523 // The alloc_page and free_page functions will be used to get memory to
1524 // build the p2m, and to release it again at the end of day.
1525 //
1526 // Returns 0 for success or -errno.
1527 //
1528 int p2m_alloc_table(struct domain *d,
1529 struct page_info * (*alloc_page)(struct domain *d),
1530 void (*free_page)(struct domain *d, struct page_info *pg))
1533 mfn_t mfn = _mfn(INVALID_MFN);
1534 struct page_info *page, *p2m_top;
1535 unsigned int page_count = 0;
1536 unsigned long gfn = -1UL;
1537 struct p2m_domain *p2m = d->arch.p2m;
1539 p2m_lock(p2m);
1541 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1543 P2M_ERROR("p2m already allocated for this domain\n");
1544 p2m_unlock(p2m);
1545 return -EINVAL;
1548 P2M_PRINTK("allocating p2m table\n");
1550 p2m->alloc_page = alloc_page;
1551 p2m->free_page = free_page;
1553 p2m_top = p2m->alloc_page(d);
1554 if ( p2m_top == NULL )
1556 p2m_unlock(p2m);
1557 return -ENOMEM;
1559 page_list_add_tail(p2m_top, &p2m->pages);
1561 p2m_top->count_info = 1;
1562 p2m_top->u.inuse.type_info =
1563 #if CONFIG_PAGING_LEVELS == 4
1564 PGT_l4_page_table
1565 #else
1566 PGT_l3_page_table
1567 #endif
1568 | 1 | PGT_validated;
1570 d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
1572 P2M_PRINTK("populating p2m table\n");
1574 /* Initialise physmap tables for slot zero. Other code assumes this. */
1575 if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
1576 p2m_invalid) )
1577 goto error;
1579 /* Copy all existing mappings from the page list and m2p */
1580 page_list_for_each(page, &d->page_list)
1582 mfn = page_to_mfn(page);
1583 gfn = get_gpfn_from_mfn(mfn_x(mfn));
1584 page_count++;
1585 if (
1586 #ifdef __x86_64__
1587 (gfn != 0x5555555555555555L)
1588 #else
1589 (gfn != 0x55555555L)
1590 #endif
1591 && gfn != INVALID_M2P_ENTRY
1592 && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
1593 goto error;
1596 P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
1597 p2m_unlock(p2m);
1598 return 0;
1600 error:
1601 P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
1602 PRI_mfn "\n", gfn, mfn_x(mfn));
1603 p2m_unlock(p2m);
1604 return -ENOMEM;
1607 void p2m_teardown(struct domain *d)
1608 /* Return all the p2m pages to Xen.
1609 * We know we don't have any extra mappings to these pages */
1611 struct page_info *pg;
1612 struct p2m_domain *p2m = d->arch.p2m;
1614 p2m_lock(p2m);
1615 d->arch.phys_table = pagetable_null();
1617 while ( (pg = page_list_remove_head(&p2m->pages)) )
1618 p2m->free_page(d, pg);
1619 p2m_unlock(p2m);
1622 void p2m_final_teardown(struct domain *d)
1624 xfree(d->arch.p2m);
1625 d->arch.p2m = NULL;
1628 #if P2M_AUDIT
1629 static void audit_p2m(struct domain *d)
1631 struct page_info *page;
1632 struct domain *od;
1633 unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
1634 int entry_count = 0;
1635 mfn_t p2mfn;
1636 unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
1637 int test_linear;
1638 p2m_type_t type;
1640 if ( !paging_mode_translate(d) )
1641 return;
1643 //P2M_PRINTK("p2m audit starts\n");
1645 test_linear = ( (d == current->domain)
1646 && !pagetable_is_null(current->arch.monitor_table) );
1647 if ( test_linear )
1648 flush_tlb_local();
1650 spin_lock(&d->page_alloc_lock);
1652 /* Audit part one: walk the domain's page allocation list, checking
1653 * the m2p entries. */
1654 page_list_for_each ( page, &d->page_list )
1656 mfn = mfn_x(page_to_mfn(page));
1658 // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
1660 od = page_get_owner(page);
1662 if ( od != d )
1664 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
1665 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
1666 continue;
1669 gfn = get_gpfn_from_mfn(mfn);
1670 if ( gfn == INVALID_M2P_ENTRY )
1672 orphans_i++;
1673 //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
1674 // mfn);
1675 continue;
1678 if ( gfn == 0x55555555 )
1680 orphans_d++;
1681 //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
1682 // mfn);
1683 continue;
1686 p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query);
1687 if ( mfn_x(p2mfn) != mfn )
1689 mpbad++;
1690 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
1691 " (-> gfn %#lx)\n",
1692 mfn, gfn, mfn_x(p2mfn),
1693 (mfn_valid(p2mfn)
1694 ? get_gpfn_from_mfn(mfn_x(p2mfn))
1695 : -1u));
1696 /* This m2p entry is stale: the domain has another frame in
1697 * this physical slot. No great disaster, but for neatness,
1698 * blow away the m2p entry. */
1699 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1702 if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
1704 lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type));
1705 if ( lp2mfn != mfn_x(p2mfn) )
1707 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
1708 "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
1712 // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
1713 // mfn, gfn, p2mfn, lp2mfn);
1716 spin_unlock(&d->page_alloc_lock);
1718 /* Audit part two: walk the domain's p2m table, checking the entries. */
1719 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1721 l2_pgentry_t *l2e;
1722 l1_pgentry_t *l1e;
1723 int i1, i2;
1725 #if CONFIG_PAGING_LEVELS == 4
1726 l4_pgentry_t *l4e;
1727 l3_pgentry_t *l3e;
1728 int i3, i4;
1729 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1730 #else /* CONFIG_PAGING_LEVELS == 3 */
1731 l3_pgentry_t *l3e;
1732 int i3;
1733 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1734 #endif
1736 gfn = 0;
1737 #if CONFIG_PAGING_LEVELS >= 4
1738 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1740 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1742 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1743 continue;
1745 l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
1746 #endif
1747 for ( i3 = 0;
1748 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1749 i3++ )
1751 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1753 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1754 continue;
1756 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
1757 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1759 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1761 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1762 && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1763 == p2m_populate_on_demand ) )
1764 entry_count+=SUPERPAGE_PAGES;
1765 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1766 continue;
1769 /* check for super page */
1770 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1772 mfn = l2e_get_pfn(l2e[i2]);
1773 ASSERT(mfn_valid(_mfn(mfn)));
1774 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1776 m2pfn = get_gpfn_from_mfn(mfn+i1);
1777 if ( m2pfn != (gfn + i1) )
1779 pmbad++;
1780 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1781 " -> gfn %#lx\n", gfn+i1, mfn+i1,
1782 m2pfn);
1783 BUG();
1786 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1787 continue;
1790 l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
1792 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1794 p2m_type_t type;
1796 type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1797 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1799 if ( type == p2m_populate_on_demand )
1800 entry_count++;
1801 continue;
1803 mfn = l1e_get_pfn(l1e[i1]);
1804 ASSERT(mfn_valid(_mfn(mfn)));
1805 m2pfn = get_gpfn_from_mfn(mfn);
1806 if ( m2pfn != gfn &&
1807 type != p2m_mmio_direct &&
1808 !p2m_is_grant(type) )
1810 pmbad++;
1811 printk("mismatch: gfn %#lx -> mfn %#lx"
1812 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1813 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1814 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1815 BUG();
1818 unmap_domain_page(l1e);
1820 unmap_domain_page(l2e);
1822 #if CONFIG_PAGING_LEVELS >= 4
1823 unmap_domain_page(l3e);
1825 #endif
1827 #if CONFIG_PAGING_LEVELS == 4
1828 unmap_domain_page(l4e);
1829 #else /* CONFIG_PAGING_LEVELS == 3 */
1830 unmap_domain_page(l3e);
1831 #endif
1835 if ( entry_count != d->arch.p2m->pod.entry_count )
1837 printk("%s: refcounted entry count %d, audit count %d!\n",
1838 __func__,
1839 d->arch.p2m->pod.entry_count,
1840 entry_count);
1841 BUG();
1844 //P2M_PRINTK("p2m audit complete\n");
1845 //if ( orphans_i | orphans_d | mpbad | pmbad )
1846 // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
1847 // orphans_i + orphans_d, orphans_i, orphans_d,
1848 if ( mpbad | pmbad )
1849 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
1850 pmbad, mpbad);
1852 #endif /* P2M_AUDIT */
1856 static void
1857 p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
1858 unsigned int page_order)
1860 unsigned long i;
1861 mfn_t mfn_return;
1862 p2m_type_t t;
1864 if ( !paging_mode_translate(d) )
1866 if ( need_iommu(d) )
1867 for ( i = 0; i < (1 << page_order); i++ )
1868 iommu_unmap_page(d, mfn + i);
1869 return;
1872 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
1874 for ( i = 0; i < (1UL << page_order); i++ )
1876 mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query);
1877 if ( !p2m_is_grant(t) )
1878 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
1879 ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
1881 set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
1884 void
1885 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
1886 unsigned long mfn, unsigned int page_order)
1888 p2m_lock(d->arch.p2m);
1889 audit_p2m(d);
1890 p2m_remove_page(d, gfn, mfn, page_order);
1891 audit_p2m(d);
1892 p2m_unlock(d->arch.p2m);
1895 #if CONFIG_PAGING_LEVELS == 3
1896 static int gfn_check_limit(
1897 struct domain *d, unsigned long gfn, unsigned int order)
1899 /*
1900 * 32bit AMD nested paging does not support over 4GB guest due to
1901 * hardware translation limit. This limitation is checked by comparing
1902 * gfn with 0xfffffUL.
1903 */
1904 if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
1905 (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
1906 return 0;
1908 if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
1909 dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
1910 " 4GB: specify 'hap=0' domain config option.\n",
1911 d->domain_id);
1913 return -EINVAL;
1915 #else
1916 #define gfn_check_limit(d, g, o) 0
1917 #endif
1919 int
1920 guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
1921 unsigned int order)
1923 struct p2m_domain *p2md = d->arch.p2m;
1924 unsigned long i;
1925 p2m_type_t ot;
1926 mfn_t omfn;
1927 int pod_count = 0;
1928 int rc = 0;
1930 BUG_ON(!paging_mode_translate(d));
1932 rc = gfn_check_limit(d, gfn, order);
1933 if ( rc != 0 )
1934 return rc;
1936 p2m_lock(p2md);
1937 audit_p2m(d);
1939 P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
1941 /* Make sure all gpfns are unused */
1942 for ( i = 0; i < (1UL << order); i++ )
1944 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
1945 if ( p2m_is_ram(ot) )
1947 printk("%s: gfn_to_mfn returned type %d!\n",
1948 __func__, ot);
1949 rc = -EBUSY;
1950 goto out;
1952 else if ( ot == p2m_populate_on_demand )
1954 /* Count how man PoD entries we'll be replacing if successful */
1955 pod_count++;
1959 /* Now, actually do the two-way mapping */
1960 if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
1961 p2m_populate_on_demand) )
1962 rc = -EINVAL;
1963 else
1965 p2md->pod.entry_count += 1 << order; /* Lock: p2m */
1966 p2md->pod.entry_count -= pod_count;
1967 BUG_ON(p2md->pod.entry_count < 0);
1970 audit_p2m(d);
1971 p2m_unlock(p2md);
1973 out:
1974 return rc;
1978 int
1979 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
1980 unsigned long mfn, unsigned int page_order,
1981 p2m_type_t t)
1983 unsigned long i, ogfn;
1984 p2m_type_t ot;
1985 mfn_t omfn;
1986 int pod_count = 0;
1987 int rc = 0;
1989 if ( !paging_mode_translate(d) )
1991 if ( need_iommu(d) && t == p2m_ram_rw )
1993 for ( i = 0; i < (1 << page_order); i++ )
1994 if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
1996 while ( i-- > 0 )
1997 iommu_unmap_page(d, mfn + i);
1998 return rc;
2001 return 0;
2004 rc = gfn_check_limit(d, gfn, page_order);
2005 if ( rc != 0 )
2006 return rc;
2008 p2m_lock(d->arch.p2m);
2009 audit_p2m(d);
2011 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
2013 /* First, remove m->p mappings for existing p->m mappings */
2014 for ( i = 0; i < (1UL << page_order); i++ )
2016 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
2017 if ( p2m_is_grant(ot) )
2019 /* Really shouldn't be unmapping grant maps this way */
2020 domain_crash(d);
2021 p2m_unlock(d->arch.p2m);
2022 return -EINVAL;
2024 else if ( p2m_is_ram(ot) )
2026 ASSERT(mfn_valid(omfn));
2027 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2029 else if ( ot == p2m_populate_on_demand )
2031 /* Count how man PoD entries we'll be replacing if successful */
2032 pod_count++;
2036 /* Then, look for m->p mappings for this range and deal with them */
2037 for ( i = 0; i < (1UL << page_order); i++ )
2039 if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d )
2040 continue;
2041 ogfn = mfn_to_gfn(d, _mfn(mfn+i));
2042 if (
2043 #ifdef __x86_64__
2044 (ogfn != 0x5555555555555555L)
2045 #else
2046 (ogfn != 0x55555555L)
2047 #endif
2048 && (ogfn != INVALID_M2P_ENTRY)
2049 && (ogfn != gfn + i) )
2051 /* This machine frame is already mapped at another physical
2052 * address */
2053 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
2054 mfn + i, ogfn, gfn + i);
2055 omfn = gfn_to_mfn_query(d, ogfn, &ot);
2056 /* If we get here, we know the local domain owns the page,
2057 so it can't have been grant mapped in. */
2058 BUG_ON( p2m_is_grant(ot) );
2059 if ( p2m_is_ram(ot) )
2061 ASSERT(mfn_valid(omfn));
2062 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
2063 ogfn , mfn_x(omfn));
2064 if ( mfn_x(omfn) == (mfn + i) )
2065 p2m_remove_page(d, ogfn, mfn + i, 0);
2070 /* Now, actually do the two-way mapping */
2071 if ( mfn_valid(_mfn(mfn)) )
2073 if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
2074 rc = -EINVAL;
2075 if ( !p2m_is_grant(t) )
2077 for ( i = 0; i < (1UL << page_order); i++ )
2078 set_gpfn_from_mfn(mfn+i, gfn+i);
2081 else
2083 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
2084 gfn, mfn);
2085 if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
2086 p2m_invalid) )
2087 rc = -EINVAL;
2088 else
2090 d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
2091 BUG_ON(d->arch.p2m->pod.entry_count < 0);
2095 audit_p2m(d);
2096 p2m_unlock(d->arch.p2m);
2098 return rc;
2101 /* Walk the whole p2m table, changing any entries of the old type
2102 * to the new type. This is used in hardware-assisted paging to
2103 * quickly enable or diable log-dirty tracking */
2104 void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
2106 unsigned long mfn, gfn, flags;
2107 l1_pgentry_t l1e_content;
2108 l1_pgentry_t *l1e;
2109 l2_pgentry_t *l2e;
2110 mfn_t l1mfn, l2mfn;
2111 int i1, i2;
2112 l3_pgentry_t *l3e;
2113 int i3;
2114 #if CONFIG_PAGING_LEVELS == 4
2115 l4_pgentry_t *l4e;
2116 int i4;
2117 #endif /* CONFIG_PAGING_LEVELS == 4 */
2119 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
2121 if ( !paging_mode_translate(d) )
2122 return;
2124 if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
2125 return;
2127 ASSERT(p2m_locked_by_me(d->arch.p2m));
2129 #if CONFIG_PAGING_LEVELS == 4
2130 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2131 #else /* CONFIG_PAGING_LEVELS == 3 */
2132 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2133 #endif
2135 #if CONFIG_PAGING_LEVELS >= 4
2136 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
2138 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
2140 continue;
2142 l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
2143 #endif
2144 for ( i3 = 0;
2145 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
2146 i3++ )
2148 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
2150 continue;
2152 l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
2153 l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
2154 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
2156 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
2158 continue;
2161 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
2163 flags = l2e_get_flags(l2e[i2]);
2164 if ( p2m_flags_to_type(flags) != ot )
2165 continue;
2166 mfn = l2e_get_pfn(l2e[i2]);
2167 gfn = get_gpfn_from_mfn(mfn);
2168 flags = p2m_type_to_flags(nt);
2169 l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
2170 paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
2171 l2mfn, l1e_content, 2);
2172 continue;
2175 l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
2176 l1e = map_domain_page(mfn_x(l1mfn));
2178 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
2180 flags = l1e_get_flags(l1e[i1]);
2181 if ( p2m_flags_to_type(flags) != ot )
2182 continue;
2183 mfn = l1e_get_pfn(l1e[i1]);
2184 gfn = get_gpfn_from_mfn(mfn);
2185 /* create a new 1le entry with the new type */
2186 flags = p2m_type_to_flags(nt);
2187 l1e_content = l1e_from_pfn(mfn, flags);
2188 paging_write_p2m_entry(d, gfn, &l1e[i1],
2189 l1mfn, l1e_content, 1);
2191 unmap_domain_page(l1e);
2193 unmap_domain_page(l2e);
2195 #if CONFIG_PAGING_LEVELS >= 4
2196 unmap_domain_page(l3e);
2198 #endif
2200 #if CONFIG_PAGING_LEVELS == 4
2201 unmap_domain_page(l4e);
2202 #else /* CONFIG_PAGING_LEVELS == 3 */
2203 unmap_domain_page(l3e);
2204 #endif
2208 /* Modify the p2m type of a single gfn from ot to nt, returning the
2209 * entry's previous type */
2210 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
2211 p2m_type_t ot, p2m_type_t nt)
2213 p2m_type_t pt;
2214 mfn_t mfn;
2216 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
2218 p2m_lock(d->arch.p2m);
2220 mfn = gfn_to_mfn(d, gfn, &pt);
2221 if ( pt == ot )
2222 set_p2m_entry(d, gfn, mfn, 0, nt);
2224 p2m_unlock(d->arch.p2m);
2226 return pt;
2229 int
2230 set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
2232 int rc = 0;
2233 p2m_type_t ot;
2234 mfn_t omfn;
2236 if ( !paging_mode_translate(d) )
2237 return 0;
2239 omfn = gfn_to_mfn_query(d, gfn, &ot);
2240 if ( p2m_is_grant(ot) )
2242 domain_crash(d);
2243 return 0;
2245 else if ( p2m_is_ram(ot) )
2247 ASSERT(mfn_valid(omfn));
2248 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2251 P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
2252 p2m_lock(d->arch.p2m);
2253 rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
2254 p2m_unlock(d->arch.p2m);
2255 if ( 0 == rc )
2256 gdprintk(XENLOG_ERR,
2257 "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
2258 gmfn_to_mfn(d, gfn));
2259 return rc;
2262 int
2263 clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
2265 int rc = 0;
2266 unsigned long mfn;
2268 if ( !paging_mode_translate(d) )
2269 return 0;
2271 mfn = gmfn_to_mfn(d, gfn);
2272 if ( INVALID_MFN == mfn )
2274 gdprintk(XENLOG_ERR,
2275 "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
2276 return 0;
2278 p2m_lock(d->arch.p2m);
2279 rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
2280 p2m_unlock(d->arch.p2m);
2282 return rc;
2285 /*
2286 * Local variables:
2287 * mode: C
2288 * c-set-style: "BSD"
2289 * c-basic-offset: 4
2290 * indent-tabs-mode: nil
2291 * End:
2292 */