debuggers.hg

view xen/arch/x86/shadow.c @ 2673:0174982516f6

bitkeeper revision 1.1159.1.229 (416d3ad1BpCS1RVPjkX14HUpsanlGw)

Shadow pagetable walkthrough.
author kaf24@freefall.cl.cam.ac.uk
date Wed Oct 13 14:25:21 2004 +0000 (2004-10-13)
parents 13eee2d191f6
children 3cb2cf1d6374
line source
1 /* -*- Mode:C++; c-file-style:BSD; c-basic-offset:4; tab-width:4 -*- */
3 #include <xen/config.h>
4 #include <xen/types.h>
5 #include <xen/mm.h>
6 #include <asm/shadow.h>
7 #include <asm/domain_page.h>
8 #include <asm/page.h>
9 #include <xen/event.h>
10 #include <xen/trace.h>
12 /********
14 To use these shadow page tables, guests must not rely on the ACCESSED
15 and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
17 I doubt this will break anything. (If guests want to use the va_update
18 mechanism they've signed up for this anyhow...)
20 There's a per-domain shadow table spin lock which works fine for SMP
21 hosts. We don't have to worry about interrupts as no shadow operations
22 happen in an interrupt context. It's probably not quite ready for SMP
23 guest operation as we have to worry about synchonisation between gpte
24 and spte updates. Its possible that this might only happen in a
25 hypercall context, in which case we'll probably at have a per-domain
26 hypercall lock anyhow (at least initially).
28 ********/
30 static inline void free_shadow_page(
31 struct mm_struct *m, struct pfn_info *page)
32 {
33 m->shadow_page_count--;
35 switch ( page->u.inuse.type_info & PGT_type_mask )
36 {
37 case PGT_l1_page_table:
38 perfc_decr(shadow_l1_pages);
39 break;
41 case PGT_l2_page_table:
42 perfc_decr(shadow_l2_pages);
43 break;
45 default:
46 printk("Free shadow weird page type pfn=%08x type=%08x\n",
47 frame_table-page, page->u.inuse.type_info);
48 break;
49 }
51 free_domheap_page(page);
52 }
54 static void __free_shadow_table(struct mm_struct *m)
55 {
56 int i, free = 0;
57 struct shadow_status *x, *n;
59 /*
60 * WARNING! The shadow page table must not currently be in use!
61 * e.g., You are expected to have paused the domain and synchronized CR3.
62 */
64 shadow_audit(m, 1);
66 /* Free each hash chain in turn. */
67 for ( i = 0; i < shadow_ht_buckets; i++ )
68 {
69 /* Skip empty buckets. */
70 x = &m->shadow_ht[i];
71 if ( x->pfn == 0 )
72 continue;
74 /* Free the head page. */
75 free_shadow_page(
76 m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
78 /* Reinitialise the head node. */
79 x->pfn = 0;
80 x->spfn_and_flags = 0;
81 n = x->next;
82 x->next = NULL;
84 free++;
86 /* Iterate over non-head nodes. */
87 for ( x = n; x != NULL; x = n )
88 {
89 /* Free the shadow page. */
90 free_shadow_page(
91 m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
93 /* Re-initialise the chain node. */
94 x->pfn = 0;
95 x->spfn_and_flags = 0;
97 /* Add to the free list. */
98 n = x->next;
99 x->next = m->shadow_ht_free;
100 m->shadow_ht_free = x;
102 free++;
103 }
105 shadow_audit(m, 0);
106 }
108 SH_LOG("Free shadow table. Freed=%d.", free);
109 }
111 static inline int __clear_shadow_page(
112 struct mm_struct *m, struct shadow_status *x)
113 {
114 unsigned long *p;
115 int restart = 0;
116 struct pfn_info *spage = &frame_table[x->spfn_and_flags & PSH_pfn_mask];
118 switch ( spage->u.inuse.type_info & PGT_type_mask )
119 {
120 /* We clear L2 pages by zeroing the guest entries. */
121 case PGT_l2_page_table:
122 p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
123 memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
124 unmap_domain_mem(p);
125 break;
127 /* We clear L1 pages by freeing them: no benefit from zeroing them. */
128 case PGT_l1_page_table:
129 delete_shadow_status(m, x->pfn);
130 free_shadow_page(m, spage);
131 restart = 1; /* We need to go to start of list again. */
132 break;
133 }
135 return restart;
136 }
138 static void __clear_shadow_state(struct mm_struct *m)
139 {
140 int i;
141 struct shadow_status *x;
143 shadow_audit(m, 1);
145 for ( i = 0; i < shadow_ht_buckets; i++ )
146 {
147 retry:
148 /* Skip empty buckets. */
149 x = &m->shadow_ht[i];
150 if ( x->pfn == 0 )
151 continue;
153 if ( __clear_shadow_page(m, x) )
154 goto retry;
156 for ( x = x->next; x != NULL; x = x->next )
157 if ( __clear_shadow_page(m, x) )
158 goto retry;
160 shadow_audit(m, 0);
161 }
163 SH_VLOG("Scan shadow table. l1=%d l2=%d",
164 perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
165 }
168 void shadow_mode_init(void)
169 {
170 }
172 int shadow_mode_enable(struct domain *p, unsigned int mode)
173 {
174 struct mm_struct *m = &p->mm;
176 m->shadow_ht = xmalloc(
177 shadow_ht_buckets * sizeof(struct shadow_status));
178 if ( m->shadow_ht == NULL )
179 goto nomem;
180 memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
182 if ( mode == SHM_logdirty )
183 {
184 m->shadow_dirty_bitmap_size = (p->max_pages + 63) & ~63;
185 m->shadow_dirty_bitmap =
186 xmalloc(m->shadow_dirty_bitmap_size/8);
187 if ( m->shadow_dirty_bitmap == NULL )
188 {
189 m->shadow_dirty_bitmap_size = 0;
190 goto nomem;
191 }
192 memset(m->shadow_dirty_bitmap, 0, m->shadow_dirty_bitmap_size/8);
193 }
195 m->shadow_mode = mode;
197 __shadow_mk_pagetable(m);
198 return 0;
200 nomem:
201 if ( m->shadow_ht != NULL )
202 xfree( m->shadow_ht );
203 m->shadow_ht = NULL;
204 return -ENOMEM;
205 }
207 void __shadow_mode_disable(struct domain *d)
208 {
209 struct mm_struct *m = &d->mm;
210 struct shadow_status *x, *n;
212 __free_shadow_table(m);
213 m->shadow_mode = 0;
215 SH_VLOG("freed tables count=%d l1=%d l2=%d",
216 m->shadow_page_count, perfc_value(shadow_l1_pages),
217 perfc_value(shadow_l2_pages));
219 n = m->shadow_ht_extras;
220 while ( (x = n) != NULL )
221 {
222 m->shadow_extras_count--;
223 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
224 xfree(x);
225 }
227 m->shadow_ht_extras = NULL;
228 ASSERT(m->shadow_extras_count == 0);
229 SH_LOG("freed extras, now %d", m->shadow_extras_count);
231 if ( m->shadow_dirty_bitmap != NULL )
232 {
233 xfree(m->shadow_dirty_bitmap);
234 m->shadow_dirty_bitmap = 0;
235 m->shadow_dirty_bitmap_size = 0;
236 }
238 xfree(m->shadow_ht);
239 m->shadow_ht = NULL;
240 }
242 static int shadow_mode_table_op(
243 struct domain *d, dom0_shadow_control_t *sc)
244 {
245 unsigned int op = sc->op;
246 struct mm_struct *m = &d->mm;
247 int i, rc = 0;
249 ASSERT(spin_is_locked(&d->mm.shadow_lock));
251 SH_VLOG("shadow mode table op %08lx %08lx count %d",
252 pagetable_val(m->pagetable), pagetable_val(m->shadow_table),
253 m->shadow_page_count);
255 shadow_audit(m, 1);
257 switch ( op )
258 {
259 case DOM0_SHADOW_CONTROL_OP_FLUSH:
260 __free_shadow_table( m );
262 d->mm.shadow_fault_count = 0;
263 d->mm.shadow_dirty_count = 0;
264 d->mm.shadow_dirty_net_count = 0;
265 d->mm.shadow_dirty_block_count = 0;
267 break;
269 case DOM0_SHADOW_CONTROL_OP_CLEAN:
270 __clear_shadow_state(m);
272 sc->stats.fault_count = d->mm.shadow_fault_count;
273 sc->stats.dirty_count = d->mm.shadow_dirty_count;
274 sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
275 sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
277 d->mm.shadow_fault_count = 0;
278 d->mm.shadow_dirty_count = 0;
279 d->mm.shadow_dirty_net_count = 0;
280 d->mm.shadow_dirty_block_count = 0;
282 if ( (d->max_pages > sc->pages) ||
283 (sc->dirty_bitmap == NULL) ||
284 (d->mm.shadow_dirty_bitmap == NULL) )
285 {
286 rc = -EINVAL;
287 goto out;
288 }
290 sc->pages = d->max_pages;
292 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
293 for ( i = 0; i < d->max_pages; i += chunk )
294 {
295 int bytes = ((((d->max_pages - i) > chunk) ?
296 chunk : (d->max_pages - i)) + 7) / 8;
298 copy_to_user(
299 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
300 d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
301 bytes);
303 memset(
304 d->mm.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
305 0, bytes);
306 }
308 break;
310 case DOM0_SHADOW_CONTROL_OP_PEEK:
311 sc->stats.fault_count = d->mm.shadow_fault_count;
312 sc->stats.dirty_count = d->mm.shadow_dirty_count;
313 sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
314 sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
316 if ( (d->max_pages > sc->pages) ||
317 (sc->dirty_bitmap == NULL) ||
318 (d->mm.shadow_dirty_bitmap == NULL) )
319 {
320 rc = -EINVAL;
321 goto out;
322 }
324 sc->pages = d->max_pages;
325 copy_to_user(
326 sc->dirty_bitmap, d->mm.shadow_dirty_bitmap, (d->max_pages+7)/8);
328 break;
330 default:
331 BUG();
332 }
334 out:
335 SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
336 shadow_audit(m, 1);
337 __shadow_mk_pagetable(m);
338 return rc;
339 }
341 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
342 {
343 unsigned int cmd = sc->op;
344 int rc = 0;
346 if ( unlikely(d == current) )
347 {
348 DPRINTK("Don't try to do a shadow op on yourself!\n");
349 return -EINVAL;
350 }
352 domain_pause(d);
353 synchronise_pagetables(~0UL);
355 shadow_lock(&d->mm);
357 switch ( cmd )
358 {
359 case DOM0_SHADOW_CONTROL_OP_OFF:
360 shadow_mode_disable(d);
361 break;
363 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
364 shadow_mode_disable(d);
365 rc = shadow_mode_enable(d, SHM_test);
366 break;
368 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
369 shadow_mode_disable(d);
370 rc = shadow_mode_enable(d, SHM_logdirty);
371 break;
373 default:
374 if ( shadow_mode(d) &&
375 (cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH) &&
376 (cmd <= DOM0_SHADOW_CONTROL_OP_PEEK) )
377 rc = shadow_mode_table_op(d, sc);
378 else
379 rc = -EINVAL;
380 break;
381 }
383 shadow_unlock(&d->mm);
385 domain_unpause(d);
387 return rc;
388 }
390 static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
391 {
392 struct pfn_info *page = alloc_domheap_page(NULL);
394 m->shadow_page_count++;
396 if ( unlikely(page == NULL) )
397 {
398 printk("Couldn't alloc shadow page! count=%d\n",
399 m->shadow_page_count);
400 SH_VLOG("Shadow tables l1=%d l2=%d",
401 perfc_value(shadow_l1_pages),
402 perfc_value(shadow_l2_pages));
403 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
404 }
406 return page;
407 }
409 void unshadow_table(unsigned long gpfn, unsigned int type)
410 {
411 unsigned long spfn;
412 struct domain *d = frame_table[gpfn].u.inuse.domain;
414 SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn);
416 perfc_incrc(unshadow_table_count);
418 /*
419 * This function is the same for all p.t. pages. Even for multi-processor
420 * guests there won't be a race here as this CPU was the one that
421 * cmpxchg'ed the page to invalid.
422 */
423 spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
424 delete_shadow_status(&d->mm, gpfn);
425 free_shadow_page(&d->mm, &frame_table[spfn]);
426 }
428 unsigned long shadow_l2_table(
429 struct mm_struct *m, unsigned long gpfn)
430 {
431 struct pfn_info *spfn_info;
432 unsigned long spfn;
433 l2_pgentry_t *spl2e;
435 SH_VVLOG("shadow_l2_table( %08lx )", gpfn);
437 perfc_incrc(shadow_l2_table_count);
439 if ( (spfn_info = alloc_shadow_page(m)) != NULL )
440 BUG(); /* XXX Deal gracefully with failure. */
442 spfn_info->u.inuse.type_info = PGT_l2_page_table;
443 perfc_incr(shadow_l2_pages);
445 spfn = spfn_info - frame_table;
447 /* Mark pfn as being shadowed; update field to point at shadow. */
448 set_shadow_status(m, gpfn, spfn | PSH_shadowed);
450 spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
452 /*
453 * We could proactively fill in PDEs for pages that are already shadowed.
454 * However, we tried it and it didn't help performance. This is simpler.
455 */
456 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
458 #ifdef __i386__
459 /* Install hypervisor and 2x linear p.t. mapings. */
460 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
461 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
462 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
463 spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
464 mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
465 spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
466 mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
467 spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
468 mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
469 __PAGE_HYPERVISOR);
470 #endif
472 unmap_domain_mem(spl2e);
474 SH_VLOG("shadow_l2_table( %08lx -> %08lx)", gpfn, spfn);
475 return spfn;
476 }
478 static void shadow_map_l1_into_current_l2(unsigned long va)
479 {
480 struct mm_struct *m = &current->mm;
481 unsigned long *gpl1e, *spl1e, gpde, spde, gl1pfn, sl1pfn, sl1ss;
482 struct pfn_info *sl1pfn_info;
483 int i;
485 gpde = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
487 gl1pfn = gpde >> PAGE_SHIFT;
489 sl1ss = __shadow_status(m, gl1pfn);
490 if ( !(sl1ss & PSH_shadowed) )
491 {
492 /* This L1 is NOT already shadowed so we need to shadow it. */
493 SH_VVLOG("4a: l1 not shadowed ( %08lx )", sl1pfn);
495 sl1pfn_info = alloc_shadow_page(m);
496 sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
498 sl1pfn = sl1pfn_info - frame_table;
500 perfc_incrc(shadow_l1_table_count);
501 perfc_incr(shadow_l1_pages);
503 set_shadow_status(m, gl1pfn, PSH_shadowed | sl1pfn);
505 l2pde_general(m, &gpde, &spde, sl1pfn);
507 linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
508 shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =
509 mk_l2_pgentry(spde);
511 gpl1e = (unsigned long *) &(linear_pg_table[
512 (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
514 spl1e = (unsigned long *) &shadow_linear_pg_table[
515 (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)];
517 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
518 l1pte_propagate_from_guest(m, &gpl1e[i], &spl1e[i]);
519 }
520 else
521 {
522 /* This L1 is shadowed already, but the L2 entry is missing. */
523 SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )", sl1pfn);
525 sl1pfn = sl1ss & PSH_pfn_mask;
526 l2pde_general(m, &gpde, &spde, sl1pfn);
528 linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
529 shadow_linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
530 }
531 }
533 int shadow_fault(unsigned long va, long error_code)
534 {
535 unsigned long gpte, spte;
536 struct mm_struct *m = &current->mm;
538 SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
540 check_pagetable(m, current->mm.pagetable, "pre-sf");
542 /*
543 * STEP 1. A fast-reject set of checks with no locking.
544 */
546 if ( unlikely(__get_user(gpte, (unsigned long *)
547 &linear_pg_table[va >> PAGE_SHIFT])) )
548 {
549 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
550 return 0;
551 }
553 if ( !(gpte & _PAGE_PRESENT) )
554 {
555 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
556 return 0;
557 }
559 if ( (error_code & 2) && !(gpte & _PAGE_RW) )
560 {
561 /* Write fault on a read-only mapping. */
562 return 0;
563 }
565 /*
566 * STEP 2. Take the shadow lock and re-check the guest PTE.
567 */
569 shadow_lock(m);
571 if ( unlikely(__get_user(gpte, (unsigned long *)
572 &linear_pg_table[va >> PAGE_SHIFT])) )
573 {
574 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
575 shadow_unlock(m);
576 return 0;
577 }
579 if ( unlikely(!(gpte & _PAGE_PRESENT)) )
580 {
581 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
582 shadow_unlock(m);
583 return 0;
584 }
586 /* Write fault? */
587 if ( error_code & 2 )
588 {
589 if ( unlikely(!(gpte & _PAGE_RW)) )
590 {
591 /* Write fault on a read-only mapping. */
592 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
593 shadow_unlock(m);
594 return 0;
595 }
597 l1pte_write_fault(m, &gpte, &spte);
598 }
599 else
600 {
601 l1pte_read_fault(m, &gpte, &spte);
602 }
604 /*
605 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
606 */
608 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
609 if ( unlikely(__put_user(gpte, (unsigned long *)
610 &linear_pg_table[va >> PAGE_SHIFT])) )
611 domain_crash();
613 /*
614 * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
615 * or because the shadow isn't linked into this shadow L2 p.t.
616 */
617 if ( unlikely(__put_user(spte, (unsigned long *)
618 &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
619 {
620 SH_VVLOG("3: not shadowed/mapped gpte=%08lx spte=%08lx", gpte, spte);
621 shadow_map_l1_into_current_l2(va);
622 shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
623 }
625 perfc_incrc(shadow_fixup_count);
626 m->shadow_fault_count++;
628 shadow_unlock(m);
630 check_pagetable(m, current->mm.pagetable, "post-sf");
631 return 1;
632 }
635 void shadow_l1_normal_pt_update(
636 unsigned long pa, unsigned long gpte,
637 unsigned long *prev_spfn_ptr,
638 l1_pgentry_t **prev_spl1e_ptr)
639 {
640 unsigned long spfn, spte, prev_spfn = *prev_spfn_ptr;
641 l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
643 /* N.B. To get here, we know the l1 page *must* be shadowed. */
644 SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, "
645 "prev_spfn=%08lx, prev_spl1e=%p\n",
646 pa, gpte, prev_spfn, prev_spl1e);
648 spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
650 if ( spfn == prev_spfn )
651 {
652 spl1e = prev_spl1e;
653 }
654 else
655 {
656 if ( prev_spl1e != NULL )
657 unmap_domain_mem( prev_spl1e );
658 spl1e = (l1_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
659 *prev_spfn_ptr = spfn;
660 *prev_spl1e_ptr = spl1e;
661 }
663 l1pte_propagate_from_guest(&current->mm, &gpte, &spte);
664 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
665 }
667 void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte)
668 {
669 unsigned long spfn, spte;
670 l2_pgentry_t *spl2e;
671 unsigned long s_sh;
673 /* N.B. To get here, we know the l2 page *must* be shadowed. */
674 SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
676 spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
678 s_sh = (gpte & _PAGE_PRESENT) ?
679 __shadow_status(&current->mm, gpte >> PAGE_SHIFT) : 0;
681 /* XXXX Should mark guest pte as DIRTY and ACCESSED too! */
682 l2pde_general(&current->mm, &gpte, &spte, s_sh);
683 spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
684 spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spte);
685 unmap_domain_mem(spl2e);
686 }
691 /************************************************************************/
692 /************************************************************************/
693 /************************************************************************/
695 #if SHADOW_DEBUG
697 static int sh_l2_present;
698 static int sh_l1_present;
699 char * sh_check_name;
701 #define FAIL(_f, _a...) \
702 do { \
703 printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", \
704 sh_check_name, level, i, ## _a , gpte, spte); \
705 BUG(); \
706 } while ( 0 )
708 static int check_pte(
709 struct mm_struct *m, unsigned long gpte, unsigned long spte,
710 int level, int i)
711 {
712 unsigned long mask, gpfn, spfn;
714 if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
715 return 1; /* always safe */
717 if ( !(spte & _PAGE_PRESENT) )
718 FAIL("Non zero not present spte");
720 if ( level == 2 ) sh_l2_present++;
721 if ( level == 1 ) sh_l1_present++;
723 if ( !(gpte & _PAGE_PRESENT) )
724 FAIL("Guest not present yet shadow is");
726 mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
728 if ( (spte & mask) != (gpte & mask) )
729 FAIL("Corrupt?");
731 if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
732 FAIL("Dirty coherence");
734 if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
735 FAIL("Accessed coherence");
737 if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
738 FAIL("RW coherence");
740 if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
741 FAIL("RW2 coherence");
743 spfn = spte >> PAGE_SHIFT;
744 gpfn = gpte >> PAGE_SHIFT;
746 if ( gpfn == spfn )
747 {
748 if ( level > 1 )
749 FAIL("Linear map ???"); /* XXX this will fail on BSD */
750 }
751 else
752 {
753 if ( level < 2 )
754 FAIL("Shadow in L1 entry?");
756 if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
757 FAIL("spfn problem g.sf=%08lx", __shadow_status(m, gpfn));
758 }
760 return 1;
761 }
764 static int check_l1_table(
765 struct mm_struct *m, unsigned long va,
766 unsigned long g2, unsigned long s2)
767 {
768 int i;
769 unsigned long *gpl1e, *spl1e;
771 gpl1e = map_domain_mem(g2 << PAGE_SHIFT);
772 spl1e = map_domain_mem(s2 << PAGE_SHIFT);
774 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
775 check_pte(m, gpl1e[i], spl1e[i], 1, i);
777 unmap_domain_mem(spl1e);
778 unmap_domain_mem(gpl1e);
780 return 1;
781 }
783 #define FAILPT(_f, _a...) \
784 do { \
785 printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); \
786 BUG(); \
787 } while ( 0 )
789 int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s)
790 {
791 unsigned long gptbase = pagetable_val(pt);
792 unsigned long gpfn, spfn;
793 int i;
794 l2_pgentry_t *gpl2e, *spl2e;
796 sh_check_name = s;
798 SH_VVLOG("%s-PT Audit", s);
800 sh_l2_present = sh_l1_present = 0;
802 gpfn = gptbase >> PAGE_SHIFT;
804 if ( !(__shadow_status(m, gpfn) & PSH_shadowed) )
805 {
806 printk("%s-PT %08lx not shadowed\n", s, gptbase);
807 if ( __shadow_status(m, gpfn) != 0 )
808 BUG();
809 return 0;
810 }
812 spfn = __shadow_status(m, gpfn) & PSH_pfn_mask;
814 if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
815 FAILPT("ptbase shadow inconsistent1");
817 gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
818 spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
820 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
821 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
822 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
823 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
824 {
825 printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
826 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
827 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
828 i++ )
829 printk("+++ (%d) %08lx %08lx\n",i,
830 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
831 FAILPT("hypervisor entries inconsistent");
832 }
834 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
835 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
836 FAILPT("hypervisor linear map inconsistent");
838 if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
839 L2_PAGETABLE_SHIFT]) !=
840 ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
841 FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
842 l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
843 L2_PAGETABLE_SHIFT]),
844 (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
846 if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
847 ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
848 __PAGE_HYPERVISOR))) )
849 FAILPT("hypervisor per-domain map inconsistent");
852 /* Check the whole L2. */
853 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
854 check_pte(m, l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]), 2, i);
856 /* Go back and recurse. */
857 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
858 {
859 if ( l2_pgentry_val(spl2e[i]) != 0 )
860 check_l1_table(
861 m, i << L2_PAGETABLE_SHIFT,
862 l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT,
863 l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT);
864 }
866 unmap_domain_mem(spl2e);
867 unmap_domain_mem(gpl2e);
869 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
870 sh_l2_present, sh_l1_present);
872 return 1;
873 }
875 #endif