debuggers.hg

view xen/arch/x86/shadow.c @ 3770:d21fbb46b9d8

bitkeeper revision 1.1159.253.1 (4208f8a54Zaz-XgC11YTHeLxPHPoZg)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xen-unstable.bk
author kaf24@scramble.cl.cam.ac.uk
date Tue Feb 08 17:36:37 2005 +0000 (2005-02-08)
parents f5f2757b3aa2 cb87fd290eb0
children 12104922e743
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
3 #include <xen/config.h>
4 #include <xen/types.h>
5 #include <xen/mm.h>
6 #include <asm/shadow.h>
7 #include <asm/domain_page.h>
8 #include <asm/page.h>
9 #include <xen/event.h>
10 #include <xen/trace.h>
12 /********
14 To use these shadow page tables, guests must not rely on the ACCESSED
15 and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
17 I doubt this will break anything. (If guests want to use the va_update
18 mechanism they've signed up for this anyhow...)
20 There's a per-domain shadow table spin lock which works fine for SMP
21 hosts. We don't have to worry about interrupts as no shadow operations
22 happen in an interrupt context. It's probably not quite ready for SMP
23 guest operation as we have to worry about synchonisation between gpte
24 and spte updates. Its possible that this might only happen in a
25 hypercall context, in which case we'll probably at have a per-domain
26 hypercall lock anyhow (at least initially).
28 ********/
30 static inline void free_shadow_page(
31 struct domain *d, struct pfn_info *page)
32 {
33 d->arch.shadow_page_count--;
35 switch ( page->u.inuse.type_info & PGT_type_mask )
36 {
37 case PGT_l1_page_table:
38 perfc_decr(shadow_l1_pages);
39 break;
41 case PGT_l2_page_table:
42 perfc_decr(shadow_l2_pages);
43 break;
45 default:
46 printk("Free shadow weird page type pfn=%08x type=%08x\n",
47 frame_table-page, page->u.inuse.type_info);
48 break;
49 }
51 free_domheap_page(page);
52 }
54 static void free_shadow_state(struct domain *d)
55 {
56 int i, free = 0;
57 struct shadow_status *x, *n;
59 /*
60 * WARNING! The shadow page table must not currently be in use!
61 * e.g., You are expected to have paused the domain and synchronized CR3.
62 */
64 shadow_audit(d, 1);
66 /* Free each hash chain in turn. */
67 for ( i = 0; i < shadow_ht_buckets; i++ )
68 {
69 /* Skip empty buckets. */
70 x = &d->arch.shadow_ht[i];
71 if ( x->pfn == 0 )
72 continue;
74 /* Free the head page. */
75 free_shadow_page(
76 d, &frame_table[x->smfn_and_flags & PSH_pfn_mask]);
78 /* Reinitialise the head node. */
79 x->pfn = 0;
80 x->smfn_and_flags = 0;
81 n = x->next;
82 x->next = NULL;
84 free++;
86 /* Iterate over non-head nodes. */
87 for ( x = n; x != NULL; x = n )
88 {
89 /* Free the shadow page. */
90 free_shadow_page(
91 d, &frame_table[x->smfn_and_flags & PSH_pfn_mask]);
93 /* Re-initialise the chain node. */
94 x->pfn = 0;
95 x->smfn_and_flags = 0;
97 /* Add to the free list. */
98 n = x->next;
99 x->next = d->arch.shadow_ht_free;
100 d->arch.shadow_ht_free = x;
102 free++;
103 }
105 shadow_audit(d, 0);
106 }
108 SH_LOG("Free shadow table. Freed=%d.", free);
109 }
111 static inline int clear_shadow_page(
112 struct domain *d, struct shadow_status *x)
113 {
114 unsigned long *p;
115 int restart = 0;
116 struct pfn_info *spage = &frame_table[x->smfn_and_flags & PSH_pfn_mask];
118 switch ( spage->u.inuse.type_info & PGT_type_mask )
119 {
120 /* We clear L2 pages by zeroing the guest entries. */
121 case PGT_l2_page_table:
122 p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
123 if ( shadow_mode(d) == SHM_full_32 )
124 memset(p, 0, ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
125 else
126 memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
127 unmap_domain_mem(p);
128 break;
130 /* We clear L1 pages by freeing them: no benefit from zeroing them. */
131 case PGT_l1_page_table:
132 delete_shadow_status(d, x->pfn);
133 free_shadow_page(d, spage);
134 restart = 1; /* We need to go to start of list again. */
135 break;
136 }
138 return restart;
139 }
141 static void clear_shadow_state(struct domain *d)
142 {
143 int i;
144 struct shadow_status *x;
146 shadow_audit(d, 1);
148 for ( i = 0; i < shadow_ht_buckets; i++ )
149 {
150 retry:
151 /* Skip empty buckets. */
152 x = &d->arch.shadow_ht[i];
153 if ( x->pfn == 0 )
154 continue;
156 if ( clear_shadow_page(d, x) )
157 goto retry;
159 for ( x = x->next; x != NULL; x = x->next )
160 if ( clear_shadow_page(d, x) )
161 goto retry;
163 shadow_audit(d, 0);
164 }
166 SH_VLOG("Scan shadow table. l1=%d l2=%d",
167 perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
168 }
171 void shadow_mode_init(void)
172 {
173 }
175 int shadow_mode_enable(struct domain *d, unsigned int mode)
176 {
177 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
178 if ( d->arch.shadow_ht == NULL )
179 goto nomem;
180 memset(d->arch.shadow_ht, 0,
181 shadow_ht_buckets * sizeof(struct shadow_status));
183 if ( mode == SHM_logdirty )
184 {
185 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
186 d->arch.shadow_dirty_bitmap =
187 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
188 (8 * sizeof(unsigned long)));
189 if ( d->arch.shadow_dirty_bitmap == NULL )
190 {
191 d->arch.shadow_dirty_bitmap_size = 0;
192 goto nomem;
193 }
194 memset(d->arch.shadow_dirty_bitmap, 0,
195 d->arch.shadow_dirty_bitmap_size/8);
196 }
198 d->arch.shadow_mode = mode;
200 __shadow_mk_pagetable(d->exec_domain[0]); /* XXX SMP */
201 return 0;
203 nomem:
204 if ( d->arch.shadow_ht != NULL )
205 xfree(d->arch.shadow_ht);
206 d->arch.shadow_ht = NULL;
207 return -ENOMEM;
208 }
210 void __shadow_mode_disable(struct domain *d)
211 {
212 struct shadow_status *x, *n;
214 free_shadow_state(d);
215 d->arch.shadow_mode = 0;
217 SH_VLOG("freed tables count=%d l1=%d l2=%d",
218 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
219 perfc_value(shadow_l2_pages));
221 n = d->arch.shadow_ht_extras;
222 while ( (x = n) != NULL )
223 {
224 d->arch.shadow_extras_count--;
225 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
226 xfree(x);
227 }
229 d->arch.shadow_ht_extras = NULL;
230 ASSERT(d->arch.shadow_extras_count == 0);
231 SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
233 if ( d->arch.shadow_dirty_bitmap != NULL )
234 {
235 xfree(d->arch.shadow_dirty_bitmap);
236 d->arch.shadow_dirty_bitmap = 0;
237 d->arch.shadow_dirty_bitmap_size = 0;
238 }
240 xfree(d->arch.shadow_ht);
241 d->arch.shadow_ht = NULL;
242 }
244 static int shadow_mode_table_op(
245 struct domain *d, dom0_shadow_control_t *sc)
246 {
247 unsigned int op = sc->op;
248 int i, rc = 0;
250 ASSERT(spin_is_locked(&d->arch.shadow_lock));
252 SH_VLOG("shadow mode table op %p %p count %d",
253 pagetable_val(d->exec_domain[0]->arch.pagetable), /* XXX SMP */
254 pagetable_val(d->exec_domain[0]->arch.shadow_table), /* XXX SMP */
255 d->arch.shadow_page_count);
257 shadow_audit(d, 1);
259 switch ( op )
260 {
261 case DOM0_SHADOW_CONTROL_OP_FLUSH:
262 free_shadow_state(d);
264 d->arch.shadow_fault_count = 0;
265 d->arch.shadow_dirty_count = 0;
266 d->arch.shadow_dirty_net_count = 0;
267 d->arch.shadow_dirty_block_count = 0;
269 break;
271 case DOM0_SHADOW_CONTROL_OP_CLEAN:
272 clear_shadow_state(d);
274 sc->stats.fault_count = d->arch.shadow_fault_count;
275 sc->stats.dirty_count = d->arch.shadow_dirty_count;
276 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
277 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
279 d->arch.shadow_fault_count = 0;
280 d->arch.shadow_dirty_count = 0;
281 d->arch.shadow_dirty_net_count = 0;
282 d->arch.shadow_dirty_block_count = 0;
284 if ( (d->max_pages > sc->pages) ||
285 (sc->dirty_bitmap == NULL) ||
286 (d->arch.shadow_dirty_bitmap == NULL) )
287 {
288 rc = -EINVAL;
289 break;
290 }
292 sc->pages = d->max_pages;
294 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
295 for ( i = 0; i < d->max_pages; i += chunk )
296 {
297 int bytes = ((((d->max_pages - i) > chunk) ?
298 chunk : (d->max_pages - i)) + 7) / 8;
300 if (copy_to_user(
301 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
302 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
303 bytes))
304 {
305 // copy_to_user can fail when copying to guest app memory.
306 // app should zero buffer after mallocing, and pin it
307 rc = -EINVAL;
308 memset(
309 d->arch.shadow_dirty_bitmap +
310 (i/(8*sizeof(unsigned long))),
311 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
312 break;
313 }
315 memset(
316 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
317 0, bytes);
318 }
320 break;
322 case DOM0_SHADOW_CONTROL_OP_PEEK:
323 sc->stats.fault_count = d->arch.shadow_fault_count;
324 sc->stats.dirty_count = d->arch.shadow_dirty_count;
325 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
326 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
328 if ( (d->max_pages > sc->pages) ||
329 (sc->dirty_bitmap == NULL) ||
330 (d->arch.shadow_dirty_bitmap == NULL) )
331 {
332 rc = -EINVAL;
333 break;
334 }
336 sc->pages = d->max_pages;
337 if (copy_to_user(
338 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
339 {
340 rc = -EINVAL;
341 break;
342 }
344 break;
346 default:
347 rc = -EINVAL;
348 break;
349 }
351 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
352 shadow_audit(d, 1);
353 __shadow_mk_pagetable(d->exec_domain[0]); /* XXX SMP */
354 return rc;
355 }
357 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
358 {
359 unsigned int op = sc->op;
360 int rc = 0;
362 if ( unlikely(d == current->domain) )
363 {
364 DPRINTK("Don't try to do a shadow op on yourself!\n");
365 return -EINVAL;
366 }
368 domain_pause(d);
369 synchronise_pagetables(~0UL);
371 shadow_lock(d);
373 switch ( op )
374 {
375 case DOM0_SHADOW_CONTROL_OP_OFF:
376 shadow_mode_disable(d);
377 break;
379 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
380 shadow_mode_disable(d);
381 rc = shadow_mode_enable(d, SHM_test);
382 break;
384 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
385 shadow_mode_disable(d);
386 rc = shadow_mode_enable(d, SHM_logdirty);
387 break;
389 default:
390 rc = shadow_mode(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
391 break;
392 }
394 shadow_unlock(d);
396 domain_unpause(d);
398 return rc;
399 }
401 static inline struct pfn_info *alloc_shadow_page(struct domain *d)
402 {
403 struct pfn_info *page = alloc_domheap_page(NULL);
405 d->arch.shadow_page_count++;
407 if ( unlikely(page == NULL) )
408 {
409 printk("Couldn't alloc shadow page! count=%d\n",
410 d->arch.shadow_page_count);
411 SH_VLOG("Shadow tables l1=%d l2=%d",
412 perfc_value(shadow_l1_pages),
413 perfc_value(shadow_l2_pages));
414 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
415 }
417 return page;
418 }
420 void unshadow_table(unsigned long gpfn, unsigned int type)
421 {
422 unsigned long smfn;
423 struct domain *d = page_get_owner(&frame_table[gpfn]);
425 SH_VLOG("unshadow_table type=%08x gpfn=%p", type, gpfn);
427 perfc_incrc(unshadow_table_count);
429 /*
430 * This function is the same for all p.t. pages. Even for multi-processor
431 * guests there won't be a race here as this CPU was the one that
432 * cmpxchg'ed the page to invalid.
433 */
434 smfn = __shadow_status(d, gpfn) & PSH_pfn_mask;
435 delete_shadow_status(d, gpfn);
436 free_shadow_page(d, &frame_table[smfn]);
437 }
439 #ifdef CONFIG_VMX
440 void vmx_shadow_clear_state(struct domain *d)
441 {
442 SH_VVLOG("vmx_clear_shadow_state:");
443 clear_shadow_state(d);
444 }
445 #endif
448 unsigned long shadow_l2_table(
449 struct domain *d, unsigned long gpfn)
450 {
451 struct pfn_info *spfn_info;
452 unsigned long spfn;
453 l2_pgentry_t *spl2e = 0;
454 unsigned long guest_gpfn;
456 guest_gpfn = __mfn_to_gpfn(d, gpfn);
458 SH_VVLOG("shadow_l2_table( %p )", gpfn);
460 perfc_incrc(shadow_l2_table_count);
462 if ( (spfn_info = alloc_shadow_page(d)) == NULL )
463 BUG(); /* XXX Deal gracefully with failure. */
465 spfn_info->u.inuse.type_info = PGT_l2_page_table;
466 perfc_incr(shadow_l2_pages);
468 spfn = spfn_info - frame_table;
469 /* Mark pfn as being shadowed; update field to point at shadow. */
470 set_shadow_status(d, guest_gpfn, spfn | PSH_shadowed);
472 #ifdef __i386__
473 /* Install hypervisor and 2x linear p.t. mapings. */
474 if ( shadow_mode(d) == SHM_full_32 )
475 {
476 #ifdef CONFIG_VMX
477 vmx_update_shadow_state(d->exec_domain[0], gpfn, spfn);
478 #else
479 panic("Shadow Full 32 not yet implemented without VMX\n");
480 #endif
481 }
482 else
483 {
484 spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
485 /*
486 * We could proactively fill in PDEs for pages that are already
487 * shadowed. However, we tried it and it didn't help performance.
488 * This is simpler.
489 */
490 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
492 /* Install hypervisor and 2x linear p.t. mapings. */
493 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
494 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
495 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
496 spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
497 mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
498 spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
499 mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
500 spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
501 mk_l2_pgentry(__pa(page_get_owner(&frame_table[gpfn])->arch.mm_perdomain_pt) |
502 __PAGE_HYPERVISOR);
503 }
504 #endif
506 if ( shadow_mode(d) != SHM_full_32 )
507 unmap_domain_mem(spl2e);
509 SH_VLOG("shadow_l2_table( %p -> %p)", gpfn, spfn);
510 return spfn;
511 }
513 static void shadow_map_l1_into_current_l2(unsigned long va)
514 {
515 struct exec_domain *ed = current;
516 struct domain *d = ed->domain;
517 unsigned long *gpl1e, *spl1e, gl2e, sl2e, gl1pfn, sl1pfn=0, sl1ss;
518 struct pfn_info *sl1pfn_info;
519 int i;
521 __guest_get_l2e(ed, va, &gl2e);
523 gl1pfn = gl2e >> PAGE_SHIFT;
525 sl1ss = __shadow_status(d, gl1pfn);
526 if ( !(sl1ss & PSH_shadowed) )
527 {
528 /* This L1 is NOT already shadowed so we need to shadow it. */
529 SH_VVLOG("4a: l1 not shadowed ( %p )", sl1pfn);
531 sl1pfn_info = alloc_shadow_page(d);
532 sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
534 sl1pfn = sl1pfn_info - frame_table;
536 perfc_incrc(shadow_l1_table_count);
537 perfc_incr(shadow_l1_pages);
539 set_shadow_status(d, gl1pfn, PSH_shadowed | sl1pfn);
541 l2pde_general(d, &gl2e, &sl2e, sl1pfn);
543 __guest_set_l2e(ed, va, gl2e);
544 __shadow_set_l2e(ed, va, sl2e);
546 gpl1e = (unsigned long *) &(linear_pg_table[
547 (va>>L1_PAGETABLE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
549 spl1e = (unsigned long *) &(shadow_linear_pg_table[
550 (va>>L1_PAGETABLE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
552 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
553 l1pte_propagate_from_guest(d, &gpl1e[i], &spl1e[i]);
554 }
555 else
556 {
557 /* This L1 is shadowed already, but the L2 entry is missing. */
558 SH_VVLOG("4b: was shadowed, l2 missing ( %p )", sl1pfn);
560 sl1pfn = sl1ss & PSH_pfn_mask;
561 l2pde_general(d, &gl2e, &sl2e, sl1pfn);
562 __guest_set_l2e(ed, va, gl2e);
563 __shadow_set_l2e(ed, va, sl2e);
564 }
565 }
567 #ifdef CONFIG_VMX
568 void vmx_shadow_invlpg(struct domain *d, unsigned long va)
569 {
570 unsigned long gpte, spte, host_pfn;
572 if (__put_user(0L, (unsigned long *)
573 &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
574 vmx_shadow_clear_state(d);
575 return;
576 }
578 if (__get_user(gpte, (unsigned long *)
579 &linear_pg_table[va >> PAGE_SHIFT])) {
580 return;
581 }
583 host_pfn = phys_to_machine_mapping(gpte >> PAGE_SHIFT);
584 spte = (host_pfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
586 if (__put_user(spte, (unsigned long *)
587 &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
588 return;
589 }
590 }
591 #endif
593 int shadow_fault(unsigned long va, long error_code)
594 {
595 unsigned long gpte, spte = 0;
596 struct exec_domain *ed = current;
597 struct domain *d = ed->domain;
599 SH_VVLOG("shadow_fault( va=%p, code=%ld )", va, error_code );
601 check_pagetable(d, ed->arch.pagetable, "pre-sf");
603 /*
604 * STEP 1. A fast-reject set of checks with no locking.
605 */
607 if ( unlikely(__get_user(gpte, (unsigned long *)
608 &linear_pg_table[va >> PAGE_SHIFT])) )
609 {
610 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
611 return 0;
612 }
614 if ( !(gpte & _PAGE_PRESENT) )
615 {
616 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
617 return 0;
618 }
620 if ( (error_code & 2) && !(gpte & _PAGE_RW) )
621 {
622 /* Write fault on a read-only mapping. */
623 return 0;
624 }
626 /*
627 * STEP 2. Take the shadow lock and re-check the guest PTE.
628 */
630 shadow_lock(d);
632 if ( unlikely(__get_user(gpte, (unsigned long *)
633 &linear_pg_table[va >> PAGE_SHIFT])) )
634 {
635 SH_VVLOG("shadow_fault - EXIT: read gpte faulted2" );
636 shadow_unlock(d);
637 return 0;
638 }
640 if ( unlikely(!(gpte & _PAGE_PRESENT)) )
641 {
642 SH_VVLOG("shadow_fault - EXIT: gpte not present2 (%lx)",gpte );
643 shadow_unlock(d);
644 return 0;
645 }
647 /* Write fault? */
648 if ( error_code & 2 )
649 {
650 if ( unlikely(!(gpte & _PAGE_RW)) )
651 {
652 /* Write fault on a read-only mapping. */
653 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
654 shadow_unlock(d);
655 return 0;
656 }
658 l1pte_write_fault(d, &gpte, &spte);
659 }
660 else
661 {
662 l1pte_read_fault(d, &gpte, &spte);
663 }
665 /*
666 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
667 */
669 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
670 if ( unlikely(__put_user(gpte, (unsigned long *)
671 &linear_pg_table[va >> PAGE_SHIFT])) )
672 domain_crash();
674 /*
675 * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
676 * or because the shadow isn't linked into this shadow L2 p.t.
677 */
678 if ( unlikely(__put_user(spte, (unsigned long *)
679 &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
680 {
681 SH_VVLOG("3: not shadowed/mapped gpte=%p spte=%p", gpte, spte);
682 shadow_map_l1_into_current_l2(va);
683 shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
684 }
686 perfc_incrc(shadow_fixup_count);
687 d->arch.shadow_fault_count++;
689 shadow_unlock(d);
691 check_pagetable(d, ed->arch.pagetable, "post-sf");
692 return EXCRET_fault_fixed;
693 }
696 void shadow_l1_normal_pt_update(
697 unsigned long pa, unsigned long gpte,
698 unsigned long *prev_smfn_ptr,
699 l1_pgentry_t **prev_spl1e_ptr)
700 {
701 unsigned long smfn, spte, prev_smfn = *prev_smfn_ptr;
702 l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
704 /* N.B. To get here, we know the l1 page *must* be shadowed. */
705 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%p, "
706 "prev_smfn=%p, prev_spl1e=%p",
707 pa, gpte, prev_smfn, prev_spl1e);
709 smfn = __shadow_status(current->domain, pa >> PAGE_SHIFT) & PSH_pfn_mask;
711 if ( smfn == prev_smfn )
712 {
713 spl1e = prev_spl1e;
714 }
715 else
716 {
717 if ( prev_spl1e != NULL )
718 unmap_domain_mem( prev_spl1e );
719 spl1e = (l1_pgentry_t *)map_domain_mem(smfn << PAGE_SHIFT);
720 *prev_smfn_ptr = smfn;
721 *prev_spl1e_ptr = spl1e;
722 }
724 l1pte_propagate_from_guest(current->domain, &gpte, &spte);
725 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
726 }
728 void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpde)
729 {
730 unsigned long sl2mfn, spde;
731 l2_pgentry_t *spl2e;
732 unsigned long sl1mfn;
734 /* N.B. To get here, we know the l2 page *must* be shadowed. */
735 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%p",pa,gpde);
737 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT) & PSH_pfn_mask;
739 sl1mfn = (gpde & _PAGE_PRESENT) ?
740 __shadow_status(current->domain, gpde >> PAGE_SHIFT) : 0;
742 /* XXXX Should mark guest pte as DIRTY and ACCESSED too! */
743 l2pde_general(current->domain, &gpde, &spde, sl1mfn);
744 spl2e = (l2_pgentry_t *)map_domain_mem(sl2mfn << PAGE_SHIFT);
745 spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spde);
746 unmap_domain_mem(spl2e);
747 }
752 /************************************************************************/
753 /************************************************************************/
754 /************************************************************************/
756 #if SHADOW_DEBUG
758 // BUG: these are not SMP safe...
759 static int sh_l2_present;
760 static int sh_l1_present;
761 static int errors;
762 char * sh_check_name;
764 #define virt_to_phys2(adr) ({ \
765 unsigned long _a = (unsigned long)(adr); \
766 unsigned long _pte = l1_pgentry_val( \
767 shadow_linear_pg_table[_a >> PAGE_SHIFT]); \
768 unsigned long _pa = _pte & PAGE_MASK; \
769 _pa | (_a & ~PAGE_MASK); \
770 })
772 #define FAIL(_f, _a...) \
773 do { \
774 printk("XXX %s-FAIL (%d,%d)" _f " g=%p s=%p &g=%p &s=%p" \
775 " pa(&g)=%p pa(&s)=%p\n", \
776 sh_check_name, level, i, ## _a , gpte, spte, pgpte, pspte, \
777 virt_to_phys2(pgpte), virt_to_phys2(pspte)); \
778 errors++; \
779 } while ( 0 )
781 static int check_pte(
782 struct domain *d, unsigned long *pgpte, unsigned long *pspte,
783 int level, int i)
784 {
785 unsigned gpte = *pgpte;
786 unsigned spte = *pspte;
787 unsigned long mask, gpfn, smfn;
789 if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
790 return 1; /* always safe */
792 if ( !(spte & _PAGE_PRESENT) )
793 FAIL("Non zero not present spte");
795 if ( level == 2 ) sh_l2_present++;
796 if ( level == 1 ) sh_l1_present++;
798 if ( !(gpte & _PAGE_PRESENT) )
799 FAIL("Guest not present yet shadow is");
801 mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|PAGE_MASK);
803 if ( (spte & mask) != (gpte & mask) )
804 FAIL("Corrupt?");
806 if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
807 FAIL("Dirty coherence");
809 if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
810 FAIL("Accessed coherence");
812 if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
813 FAIL("RW coherence");
815 if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
816 FAIL("RW2 coherence");
818 smfn = spte >> PAGE_SHIFT;
819 gpfn = gpte >> PAGE_SHIFT;
821 if ( gpfn == smfn )
822 {
823 if ( level > 1 )
824 FAIL("Linear map ???"); /* XXX this will fail on BSD */
825 }
826 else
827 {
828 if ( level < 2 )
829 FAIL("Shadow in L1 entry?");
831 if ( __shadow_status(d, gpfn) != (PSH_shadowed | smfn) )
832 FAIL("smfn problem g.sf=%p",
833 __shadow_status(d, gpfn) );
834 }
836 return 1;
837 }
840 static int check_l1_table(
841 struct domain *d,
842 unsigned long g2mfn, unsigned long s2mfn)
843 {
844 int i;
845 unsigned long *gpl1e, *spl1e;
847 gpl1e = map_domain_mem(g2mfn << PAGE_SHIFT);
848 spl1e = map_domain_mem(s2mfn << PAGE_SHIFT);
850 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
851 check_pte(d, &gpl1e[i], &spl1e[i], 1, i);
853 unmap_domain_mem(spl1e);
854 unmap_domain_mem(gpl1e);
856 return 1;
857 }
859 #define FAILPT(_f, _a...) \
860 do { \
861 printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); \
862 errors++; \
863 } while ( 0 )
865 void check_pagetable(struct domain *d, pagetable_t pt, char *s)
866 {
867 unsigned long gptbase = pagetable_val(pt);
868 unsigned long ptbase_pfn, smfn, ss;
869 unsigned long i;
870 l2_pgentry_t *gpl2e, *spl2e;
871 unsigned long ptbase_mfn = 0;
872 int cpu = current->processor;
874 errors = 0;
875 sh_check_name = s;
877 SH_VVLOG("%s-PT Audit", s);
879 sh_l2_present = sh_l1_present = 0;
881 ptbase_pfn = gptbase >> PAGE_SHIFT;
882 ptbase_mfn = __gpfn_to_mfn(d, ptbase_pfn);
884 ss = __shadow_status(d, ptbase_pfn);
886 if ( ! (ss & PSH_shadowed) )
887 {
888 printk("%s-PT %p not shadowed\n", s, gptbase);
890 if ( ss != 0 )
891 BUG();
892 return;
893 }
895 smfn = ss & PSH_pfn_mask;
897 if ( ss != (PSH_shadowed | smfn) )
898 FAILPT("ptbase shadow inconsistent1");
900 gpl2e = (l2_pgentry_t *) map_domain_mem( ptbase_mfn << PAGE_SHIFT );
901 spl2e = (l2_pgentry_t *) map_domain_mem( smfn << PAGE_SHIFT );
903 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
904 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
905 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
906 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
907 {
908 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
909 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
910 i++ )
911 printk("+++ (%d) %p %p\n",i,
912 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
913 FAILPT("hypervisor entries inconsistent");
914 }
916 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
917 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
918 FAILPT("hypervisor linear map inconsistent");
920 if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
921 L2_PAGETABLE_SHIFT]) !=
922 ((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
923 FAILPT("hypervisor shadow linear map inconsistent %p %p",
924 l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
925 L2_PAGETABLE_SHIFT]),
926 (smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
928 if ( shadow_mode(d) != SHM_full_32 ) {
929 // BUG: this shouldn't be using exec_domain[0] here...
930 if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
931 ((__pa(page_get_owner(&frame_table[ptbase_pfn])->arch.mm_perdomain_pt) |
932 __PAGE_HYPERVISOR))) )
933 FAILPT("hypervisor per-domain map inconsistent");
934 }
936 /* Check the whole L2. */
937 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
938 check_pte(d, &l2_pgentry_val(gpl2e[i]), &l2_pgentry_val(spl2e[i]), 2, i);
940 /* Go back and recurse. */
941 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
942 {
943 unsigned long gl1pfn = l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT;
944 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
945 unsigned long sl1mfn = l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT;
947 if ( l2_pgentry_val(spl2e[i]) != 0 )
948 {
949 // First check to see if this guest page is currently the active
950 // PTWR page. If so, then we compare the (old) cached copy of the
951 // guest page to the shadow, and not the currently writable (and
952 // thus potentially out-of-sync) guest page.
953 //
954 if ( ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va &&
955 (i == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) &&
956 likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
957 {
958 gl1mfn = (__pa(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].page) >>
959 PAGE_SHIFT);
960 }
962 check_l1_table(d, gl1mfn, sl1mfn);
963 }
964 }
966 unmap_domain_mem(spl2e);
967 unmap_domain_mem(gpl2e);
969 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
970 sh_l2_present, sh_l1_present);
972 if ( errors )
973 BUG();
975 return;
976 }
978 #endif // SHADOW_DEBUG