debuggers.hg

view xen/arch/x86/shadow.c @ 3658:0ef6e8e6e85d

bitkeeper revision 1.1159.212.71 (4200f0afX_JumfbEHQex6TdFENULMQ)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into labyrinth.cl.cam.ac.uk:/auto/groups/xeno/users/iap10/xeno-clone/xen-unstable.bk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 15:24:31 2005 +0000 (2005-02-02)
parents a4621fab44b4 10a0f6b0a996
children 8472fafee3cf
line source
1 /* -*- Mode:C++; c-file-style:BSD; c-basic-offset:4; tab-width:4 -*- */
3 #include <xen/config.h>
4 #include <xen/types.h>
5 #include <xen/mm.h>
6 #include <asm/shadow.h>
7 #include <asm/domain_page.h>
8 #include <asm/page.h>
9 #include <xen/event.h>
10 #include <xen/trace.h>
12 /********
14 To use these shadow page tables, guests must not rely on the ACCESSED
15 and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
17 I doubt this will break anything. (If guests want to use the va_update
18 mechanism they've signed up for this anyhow...)
20 There's a per-domain shadow table spin lock which works fine for SMP
21 hosts. We don't have to worry about interrupts as no shadow operations
22 happen in an interrupt context. It's probably not quite ready for SMP
23 guest operation as we have to worry about synchonisation between gpte
24 and spte updates. Its possible that this might only happen in a
25 hypercall context, in which case we'll probably at have a per-domain
26 hypercall lock anyhow (at least initially).
28 ********/
30 static inline void free_shadow_page(
31 struct mm_struct *m, struct pfn_info *page)
32 {
33 m->shadow_page_count--;
35 switch ( page->u.inuse.type_info & PGT_type_mask )
36 {
37 case PGT_l1_page_table:
38 perfc_decr(shadow_l1_pages);
39 break;
41 case PGT_l2_page_table:
42 perfc_decr(shadow_l2_pages);
43 break;
45 default:
46 printk("Free shadow weird page type pfn=%08x type=%08x\n",
47 frame_table-page, page->u.inuse.type_info);
48 break;
49 }
51 free_domheap_page(page);
52 }
54 static void free_shadow_state(struct mm_struct *m)
55 {
56 int i, free = 0;
57 struct shadow_status *x, *n;
59 /*
60 * WARNING! The shadow page table must not currently be in use!
61 * e.g., You are expected to have paused the domain and synchronized CR3.
62 */
64 shadow_audit(m, 1);
66 /* Free each hash chain in turn. */
67 for ( i = 0; i < shadow_ht_buckets; i++ )
68 {
69 /* Skip empty buckets. */
70 x = &m->shadow_ht[i];
71 if ( x->pfn == 0 )
72 continue;
74 /* Free the head page. */
75 free_shadow_page(
76 m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
78 /* Reinitialise the head node. */
79 x->pfn = 0;
80 x->spfn_and_flags = 0;
81 n = x->next;
82 x->next = NULL;
84 free++;
86 /* Iterate over non-head nodes. */
87 for ( x = n; x != NULL; x = n )
88 {
89 /* Free the shadow page. */
90 free_shadow_page(
91 m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
93 /* Re-initialise the chain node. */
94 x->pfn = 0;
95 x->spfn_and_flags = 0;
97 /* Add to the free list. */
98 n = x->next;
99 x->next = m->shadow_ht_free;
100 m->shadow_ht_free = x;
102 free++;
103 }
105 shadow_audit(m, 0);
106 }
108 SH_LOG("Free shadow table. Freed=%d.", free);
109 }
111 static inline int clear_shadow_page(
112 struct mm_struct *m, struct shadow_status *x)
113 {
114 unsigned long *p;
115 int restart = 0;
116 struct pfn_info *spage = &frame_table[x->spfn_and_flags & PSH_pfn_mask];
118 switch ( spage->u.inuse.type_info & PGT_type_mask )
119 {
120 /* We clear L2 pages by zeroing the guest entries. */
121 case PGT_l2_page_table:
122 p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
123 if (m->shadow_mode == SHM_full_32)
124 memset(p, 0, ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
125 else
126 memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
127 unmap_domain_mem(p);
128 break;
130 /* We clear L1 pages by freeing them: no benefit from zeroing them. */
131 case PGT_l1_page_table:
132 delete_shadow_status(m, x->pfn);
133 free_shadow_page(m, spage);
134 restart = 1; /* We need to go to start of list again. */
135 break;
136 }
138 return restart;
139 }
141 static void clear_shadow_state(struct mm_struct *m)
142 {
143 int i;
144 struct shadow_status *x;
146 shadow_audit(m, 1);
148 for ( i = 0; i < shadow_ht_buckets; i++ )
149 {
150 retry:
151 /* Skip empty buckets. */
152 x = &m->shadow_ht[i];
153 if ( x->pfn == 0 )
154 continue;
156 if ( clear_shadow_page(m, x) )
157 goto retry;
159 for ( x = x->next; x != NULL; x = x->next )
160 if ( clear_shadow_page(m, x) )
161 goto retry;
163 shadow_audit(m, 0);
164 }
166 SH_VLOG("Scan shadow table. l1=%d l2=%d",
167 perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
168 }
171 void shadow_mode_init(void)
172 {
173 }
175 int shadow_mode_enable(struct domain *p, unsigned int mode)
176 {
177 struct mm_struct *m = &p->exec_domain[0]->mm;
179 m->shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
180 if ( m->shadow_ht == NULL )
181 goto nomem;
182 memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
184 if ( mode == SHM_logdirty )
185 {
186 m->shadow_dirty_bitmap_size = (p->max_pages + 63) & ~63;
187 m->shadow_dirty_bitmap =
188 _xmalloc(m->shadow_dirty_bitmap_size/8);
189 if ( m->shadow_dirty_bitmap == NULL )
190 {
191 m->shadow_dirty_bitmap_size = 0;
192 goto nomem;
193 }
194 memset(m->shadow_dirty_bitmap, 0, m->shadow_dirty_bitmap_size/8);
195 }
197 m->shadow_mode = mode;
199 __shadow_mk_pagetable(m);
200 return 0;
202 nomem:
203 if ( m->shadow_ht != NULL )
204 xfree( m->shadow_ht );
205 m->shadow_ht = NULL;
206 return -ENOMEM;
207 }
209 void __shadow_mode_disable(struct domain *d)
210 {
211 struct mm_struct *m = &d->exec_domain[0]->mm;
212 struct shadow_status *x, *n;
214 free_shadow_state(m);
215 m->shadow_mode = 0;
217 SH_VLOG("freed tables count=%d l1=%d l2=%d",
218 m->shadow_page_count, perfc_value(shadow_l1_pages),
219 perfc_value(shadow_l2_pages));
221 n = m->shadow_ht_extras;
222 while ( (x = n) != NULL )
223 {
224 m->shadow_extras_count--;
225 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
226 xfree(x);
227 }
229 m->shadow_ht_extras = NULL;
230 ASSERT(m->shadow_extras_count == 0);
231 SH_LOG("freed extras, now %d", m->shadow_extras_count);
233 if ( m->shadow_dirty_bitmap != NULL )
234 {
235 xfree(m->shadow_dirty_bitmap);
236 m->shadow_dirty_bitmap = 0;
237 m->shadow_dirty_bitmap_size = 0;
238 }
240 xfree(m->shadow_ht);
241 m->shadow_ht = NULL;
242 }
244 static int shadow_mode_table_op(
245 struct domain *d, dom0_shadow_control_t *sc)
246 {
247 unsigned int op = sc->op;
248 struct mm_struct *m = &d->exec_domain[0]->mm;
249 int i, rc = 0;
251 ASSERT(spin_is_locked(&m->shadow_lock));
253 SH_VLOG("shadow mode table op %08lx %08lx count %d",
254 pagetable_val(m->pagetable), pagetable_val(m->shadow_table),
255 m->shadow_page_count);
257 shadow_audit(m, 1);
259 switch ( op )
260 {
261 case DOM0_SHADOW_CONTROL_OP_FLUSH:
262 free_shadow_state(m);
264 m->shadow_fault_count = 0;
265 m->shadow_dirty_count = 0;
266 m->shadow_dirty_net_count = 0;
267 m->shadow_dirty_block_count = 0;
269 break;
271 case DOM0_SHADOW_CONTROL_OP_CLEAN:
272 clear_shadow_state(m);
274 sc->stats.fault_count = m->shadow_fault_count;
275 sc->stats.dirty_count = m->shadow_dirty_count;
276 sc->stats.dirty_net_count = m->shadow_dirty_net_count;
277 sc->stats.dirty_block_count = m->shadow_dirty_block_count;
279 m->shadow_fault_count = 0;
280 m->shadow_dirty_count = 0;
281 m->shadow_dirty_net_count = 0;
282 m->shadow_dirty_block_count = 0;
284 if ( (d->max_pages > sc->pages) ||
285 (sc->dirty_bitmap == NULL) ||
286 (m->shadow_dirty_bitmap == NULL) )
287 {
288 rc = -EINVAL;
289 break;
290 }
292 sc->pages = d->max_pages;
294 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
295 for ( i = 0; i < d->max_pages; i += chunk )
296 {
297 int bytes = ((((d->max_pages - i) > chunk) ?
298 chunk : (d->max_pages - i)) + 7) / 8;
300 if (copy_to_user(
301 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
302 m->shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
303 bytes))
304 {
305 // copy_to_user can fail when copying to guest app memory.
306 // app should zero buffer after mallocing, and pin it
307 rc = -EINVAL;
308 memset(
309 m->shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
310 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
311 break;
312 }
314 memset(
315 m->shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
316 0, bytes);
317 }
319 break;
321 case DOM0_SHADOW_CONTROL_OP_PEEK:
322 sc->stats.fault_count = m->shadow_fault_count;
323 sc->stats.dirty_count = m->shadow_dirty_count;
324 sc->stats.dirty_net_count = m->shadow_dirty_net_count;
325 sc->stats.dirty_block_count = m->shadow_dirty_block_count;
327 if ( (d->max_pages > sc->pages) ||
328 (sc->dirty_bitmap == NULL) ||
329 (m->shadow_dirty_bitmap == NULL) )
330 {
331 rc = -EINVAL;
332 break;
333 }
335 sc->pages = d->max_pages;
336 if (copy_to_user(
337 sc->dirty_bitmap, m->shadow_dirty_bitmap, (d->max_pages+7)/8))
338 {
339 rc = -EINVAL;
340 break;
341 }
343 break;
345 default:
346 rc = -EINVAL;
347 break;
348 }
350 SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
351 shadow_audit(m, 1);
352 __shadow_mk_pagetable(m);
353 return rc;
354 }
356 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
357 {
358 unsigned int op = sc->op;
359 int rc = 0;
361 if ( unlikely(d == current->domain) )
362 {
363 DPRINTK("Don't try to do a shadow op on yourself!\n");
364 return -EINVAL;
365 }
367 domain_pause(d);
368 synchronise_pagetables(~0UL);
370 shadow_lock(&d->exec_domain[0]->mm);
372 switch ( op )
373 {
374 case DOM0_SHADOW_CONTROL_OP_OFF:
375 shadow_mode_disable(d);
376 break;
378 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
379 shadow_mode_disable(d);
380 rc = shadow_mode_enable(d, SHM_test);
381 break;
383 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
384 shadow_mode_disable(d);
385 rc = shadow_mode_enable(d, SHM_logdirty);
386 break;
388 default:
389 rc = shadow_mode(d->exec_domain[0]) ? shadow_mode_table_op(d, sc) : -EINVAL;
390 break;
391 }
393 shadow_unlock(&d->exec_domain[0]->mm);
395 domain_unpause(d);
397 return rc;
398 }
400 static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
401 {
402 struct pfn_info *page = alloc_domheap_page(NULL);
404 m->shadow_page_count++;
406 if ( unlikely(page == NULL) )
407 {
408 printk("Couldn't alloc shadow page! count=%d\n",
409 m->shadow_page_count);
410 SH_VLOG("Shadow tables l1=%d l2=%d",
411 perfc_value(shadow_l1_pages),
412 perfc_value(shadow_l2_pages));
413 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
414 }
416 return page;
417 }
419 void unshadow_table(unsigned long gpfn, unsigned int type)
420 {
421 unsigned long spfn;
422 struct domain *d = frame_table[gpfn].u.inuse.domain;
424 SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn);
426 perfc_incrc(unshadow_table_count);
428 /*
429 * This function is the same for all p.t. pages. Even for multi-processor
430 * guests there won't be a race here as this CPU was the one that
431 * cmpxchg'ed the page to invalid.
432 */
433 spfn = __shadow_status(&d->exec_domain[0]->mm, gpfn) & PSH_pfn_mask;
434 delete_shadow_status(&d->exec_domain[0]->mm, gpfn);
435 free_shadow_page(&d->exec_domain[0]->mm, &frame_table[spfn]);
436 }
438 #ifdef CONFIG_VMX
439 void vmx_shadow_clear_state(struct mm_struct *m)
440 {
441 SH_VVLOG("vmx_clear_shadow_state: \n");
442 clear_shadow_state(m);
443 }
444 #endif
447 unsigned long shadow_l2_table(
448 struct mm_struct *m, unsigned long gpfn)
449 {
450 struct pfn_info *spfn_info;
451 unsigned long spfn;
452 l2_pgentry_t *spl2e = 0;
453 unsigned long guest_gpfn;
455 __get_machine_to_phys(m, guest_gpfn, gpfn);
457 SH_VVLOG("shadow_l2_table( %08lx )", gpfn);
459 perfc_incrc(shadow_l2_table_count);
461 if ( (spfn_info = alloc_shadow_page(m)) == NULL )
462 BUG(); /* XXX Deal gracefully with failure. */
464 spfn_info->u.inuse.type_info = PGT_l2_page_table;
465 perfc_incr(shadow_l2_pages);
467 spfn = spfn_info - frame_table;
468 /* Mark pfn as being shadowed; update field to point at shadow. */
469 set_shadow_status(m, guest_gpfn, spfn | PSH_shadowed);
471 #ifdef __i386__
472 /* Install hypervisor and 2x linear p.t. mapings. */
473 if ( m->shadow_mode == SHM_full_32 )
474 {
475 vmx_update_shadow_state(m, gpfn, spfn);
476 }
477 else
478 {
479 spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
480 /*
481 * We could proactively fill in PDEs for pages that are already
482 * shadowed. However, we tried it and it didn't help performance.
483 * This is simpler.
484 */
485 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
487 /* Install hypervisor and 2x linear p.t. mapings. */
488 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
489 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
490 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
491 spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
492 mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
493 spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
494 mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
495 spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
496 mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm_perdomain_pt) |
497 __PAGE_HYPERVISOR);
498 }
499 #endif
501 if ( m->shadow_mode != SHM_full_32 )
502 unmap_domain_mem(spl2e);
504 SH_VLOG("shadow_l2_table( %08lx -> %08lx)", gpfn, spfn);
505 return spfn;
506 }
508 static void shadow_map_l1_into_current_l2(unsigned long va)
509 {
510 struct mm_struct *m = &current->mm;
511 unsigned long *gpl1e, *spl1e, gpl2e, spl2e, gl1pfn, sl1pfn=0, sl1ss;
512 struct pfn_info *sl1pfn_info;
513 int i;
515 __guest_get_pl2e(m, va, &gpl2e);
517 gl1pfn = gpl2e >> PAGE_SHIFT;
519 sl1ss = __shadow_status(m, gl1pfn);
520 if ( !(sl1ss & PSH_shadowed) )
521 {
522 /* This L1 is NOT already shadowed so we need to shadow it. */
523 SH_VVLOG("4a: l1 not shadowed ( %08lx )", sl1pfn);
525 sl1pfn_info = alloc_shadow_page(m);
526 sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
528 sl1pfn = sl1pfn_info - frame_table;
530 perfc_incrc(shadow_l1_table_count);
531 perfc_incr(shadow_l1_pages);
533 set_shadow_status(m, gl1pfn, PSH_shadowed | sl1pfn);
535 l2pde_general(m, &gpl2e, &spl2e, sl1pfn);
537 __guest_set_pl2e(m, va, gpl2e);
538 __shadow_set_pl2e(m, va, spl2e);
540 gpl1e = (unsigned long *) &(linear_pg_table[
541 (va>>L1_PAGETABLE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
543 spl1e = (unsigned long *) &(shadow_linear_pg_table[
544 (va>>L1_PAGETABLE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
546 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
547 l1pte_propagate_from_guest(m, &gpl1e[i], &spl1e[i]);
548 }
549 else
550 {
551 /* This L1 is shadowed already, but the L2 entry is missing. */
552 SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )", sl1pfn);
554 sl1pfn = sl1ss & PSH_pfn_mask;
555 l2pde_general(m, &gpl2e, &spl2e, sl1pfn);
556 __guest_set_pl2e(m, va, gpl2e);
557 __shadow_set_pl2e(m, va, spl2e);
558 }
559 }
561 #ifdef CONFIG_VMX
562 void vmx_shadow_invlpg(struct mm_struct *m, unsigned long va)
563 {
564 unsigned long gpte, spte, host_pfn;
566 if (__put_user(0L, (unsigned long *)
567 &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
568 vmx_shadow_clear_state(m);
569 return;
570 }
572 if (__get_user(gpte, (unsigned long *)
573 &linear_pg_table[va >> PAGE_SHIFT])) {
574 return;
575 }
577 host_pfn = phys_to_machine_mapping[gpte >> PAGE_SHIFT];
578 spte = (host_pfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
580 if (__put_user(spte, (unsigned long *)
581 &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
582 return;
583 }
584 }
585 #endif
587 int shadow_fault(unsigned long va, long error_code)
588 {
589 unsigned long gpte, spte;
590 struct mm_struct *m = &current->mm;
592 SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
594 check_pagetable(m, current->mm.pagetable, "pre-sf");
596 /*
597 * STEP 1. A fast-reject set of checks with no locking.
598 */
600 if ( unlikely(__get_user(gpte, (unsigned long *)
601 &linear_pg_table[va >> PAGE_SHIFT])) )
602 {
603 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
604 return 0;
605 }
607 if ( !(gpte & _PAGE_PRESENT) )
608 {
609 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
610 return 0;
611 }
613 if ( (error_code & 2) && !(gpte & _PAGE_RW) )
614 {
615 /* Write fault on a read-only mapping. */
616 return 0;
617 }
619 /*
620 * STEP 2. Take the shadow lock and re-check the guest PTE.
621 */
623 shadow_lock(m);
625 if ( unlikely(__get_user(gpte, (unsigned long *)
626 &linear_pg_table[va >> PAGE_SHIFT])) )
627 {
628 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
629 shadow_unlock(m);
630 return 0;
631 }
633 if ( unlikely(!(gpte & _PAGE_PRESENT)) )
634 {
635 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
636 shadow_unlock(m);
637 return 0;
638 }
640 /* Write fault? */
641 if ( error_code & 2 )
642 {
643 if ( unlikely(!(gpte & _PAGE_RW)) )
644 {
645 /* Write fault on a read-only mapping. */
646 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
647 shadow_unlock(m);
648 return 0;
649 }
651 l1pte_write_fault(m, &gpte, &spte);
652 }
653 else
654 {
655 l1pte_read_fault(m, &gpte, &spte);
656 }
658 /*
659 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
660 */
662 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
663 if ( unlikely(__put_user(gpte, (unsigned long *)
664 &linear_pg_table[va >> PAGE_SHIFT])) )
665 domain_crash();
667 /*
668 * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
669 * or because the shadow isn't linked into this shadow L2 p.t.
670 */
671 if ( unlikely(__put_user(spte, (unsigned long *)
672 &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
673 {
674 SH_VVLOG("3: not shadowed/mapped gpte=%08lx spte=%08lx", gpte, spte);
675 shadow_map_l1_into_current_l2(va);
676 shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
677 }
679 perfc_incrc(shadow_fixup_count);
680 m->shadow_fault_count++;
682 shadow_unlock(m);
684 check_pagetable(m, current->mm.pagetable, "post-sf");
685 return EXCRET_fault_fixed;
686 }
689 void shadow_l1_normal_pt_update(
690 unsigned long pa, unsigned long gpte,
691 unsigned long *prev_spfn_ptr,
692 l1_pgentry_t **prev_spl1e_ptr)
693 {
694 unsigned long spfn, spte, prev_spfn = *prev_spfn_ptr;
695 l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
697 /* N.B. To get here, we know the l1 page *must* be shadowed. */
698 SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, "
699 "prev_spfn=%08lx, prev_spl1e=%p\n",
700 pa, gpte, prev_spfn, prev_spl1e);
702 spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
704 if ( spfn == prev_spfn )
705 {
706 spl1e = prev_spl1e;
707 }
708 else
709 {
710 if ( prev_spl1e != NULL )
711 unmap_domain_mem( prev_spl1e );
712 spl1e = (l1_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
713 *prev_spfn_ptr = spfn;
714 *prev_spl1e_ptr = spl1e;
715 }
717 l1pte_propagate_from_guest(&current->mm, &gpte, &spte);
718 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
719 }
721 void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte)
722 {
723 unsigned long spfn, spte;
724 l2_pgentry_t *spl2e;
725 unsigned long s_sh;
727 /* N.B. To get here, we know the l2 page *must* be shadowed. */
728 SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
730 spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
732 s_sh = (gpte & _PAGE_PRESENT) ?
733 __shadow_status(&current->mm, gpte >> PAGE_SHIFT) : 0;
735 /* XXXX Should mark guest pte as DIRTY and ACCESSED too! */
736 l2pde_general(&current->mm, &gpte, &spte, s_sh);
737 spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
738 spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spte);
739 unmap_domain_mem(spl2e);
740 }
745 /************************************************************************/
746 /************************************************************************/
747 /************************************************************************/
749 #if SHADOW_DEBUG
751 static int sh_l2_present;
752 static int sh_l1_present;
753 char * sh_check_name;
755 #define FAIL(_f, _a...) \
756 do { \
757 printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", \
758 sh_check_name, level, i, ## _a , gpte, spte); \
759 BUG(); \
760 } while ( 0 )
762 static int check_pte(
763 struct mm_struct *m, unsigned long gpte, unsigned long spte,
764 int level, int i)
765 {
766 unsigned long mask, gpfn, spfn;
767 #ifdef CONFIG_VMX
768 unsigned long guest_gpfn;
769 #endif
771 if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
772 return 1; /* always safe */
774 if ( !(spte & _PAGE_PRESENT) )
775 FAIL("Non zero not present spte");
777 if ( level == 2 ) sh_l2_present++;
778 if ( level == 1 ) sh_l1_present++;
780 if ( !(gpte & _PAGE_PRESENT) )
781 FAIL("Guest not present yet shadow is");
783 mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
785 if ( (spte & mask) != (gpte & mask) )
786 FAIL("Corrupt?");
788 if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
789 FAIL("Dirty coherence");
791 if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
792 FAIL("Accessed coherence");
794 if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
795 FAIL("RW coherence");
797 if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
798 FAIL("RW2 coherence");
800 spfn = spte >> PAGE_SHIFT;
801 gpfn = gpte >> PAGE_SHIFT;
803 if ( gpfn == spfn )
804 {
805 if ( level > 1 )
806 FAIL("Linear map ???"); /* XXX this will fail on BSD */
807 }
808 else
809 {
810 if ( level < 2 )
811 FAIL("Shadow in L1 entry?");
813 if (m->shadow_mode == SHM_full_32) {
815 guest_gpfn = phys_to_machine_mapping[gpfn];
817 if ( __shadow_status(m, guest_gpfn) != (PSH_shadowed | spfn) )
818 FAIL("spfn problem g.sf=%08lx",
819 __shadow_status(m, guest_gpfn) );
821 } else {
822 if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
823 FAIL("spfn problem g.sf=%08lx",
824 __shadow_status(m, gpfn) );
825 }
827 }
829 return 1;
830 }
833 static int check_l1_table(
834 struct mm_struct *m, unsigned long va,
835 unsigned long g2, unsigned long s2)
836 {
837 int i;
838 unsigned long *gpl1e, *spl1e;
840 gpl1e = map_domain_mem(g2 << PAGE_SHIFT);
841 spl1e = map_domain_mem(s2 << PAGE_SHIFT);
843 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
844 check_pte(m, gpl1e[i], spl1e[i], 1, i);
846 unmap_domain_mem(spl1e);
847 unmap_domain_mem(gpl1e);
849 return 1;
850 }
852 #define FAILPT(_f, _a...) \
853 do { \
854 printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); \
855 BUG(); \
856 } while ( 0 )
858 int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s)
859 {
860 unsigned long gptbase = pagetable_val(pt);
861 unsigned long gpfn, spfn;
862 int i;
863 l2_pgentry_t *gpl2e, *spl2e;
864 unsigned long host_gpfn = 0;
866 sh_check_name = s;
868 SH_VVLOG("%s-PT Audit", s);
870 sh_l2_present = sh_l1_present = 0;
872 gpfn = gptbase >> PAGE_SHIFT;
874 __get_phys_to_machine(m, host_gpfn, gpfn);
876 if ( ! (__shadow_status(m, gpfn) & PSH_shadowed) )
877 {
878 printk("%s-PT %08lx not shadowed\n", s, gptbase);
880 if( __shadow_status(m, gpfn) != 0 ) BUG();
881 return 0;
882 }
884 spfn = __shadow_status(m, gpfn) & PSH_pfn_mask;
886 if ( ! __shadow_status(m, gpfn) == (PSH_shadowed | spfn) )
887 FAILPT("ptbase shadow inconsistent1");
889 if (m->shadow_mode == SHM_full_32)
890 {
891 host_gpfn = phys_to_machine_mapping[gpfn];
892 gpl2e = (l2_pgentry_t *) map_domain_mem( host_gpfn << PAGE_SHIFT );
894 } else
895 gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
897 spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
899 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
900 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
901 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
902 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
903 {
904 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
905 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
906 i++ )
907 printk("+++ (%d) %08lx %08lx\n",i,
908 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
909 FAILPT("hypervisor entries inconsistent");
910 }
912 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
913 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
914 FAILPT("hypervisor linear map inconsistent");
916 if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
917 L2_PAGETABLE_SHIFT]) !=
918 ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
919 FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
920 l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
921 L2_PAGETABLE_SHIFT]),
922 (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
924 if (m->shadow_mode != SHM_full_32) {
925 if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
926 ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
927 __PAGE_HYPERVISOR))) )
928 FAILPT("hypervisor per-domain map inconsistent");
929 }
931 /* Check the whole L2. */
932 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
933 check_pte(m, l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]), 2, i);
935 /* Go back and recurse. */
936 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
937 {
938 if ( l2_pgentry_val(spl2e[i]) != 0 )
939 check_l1_table(
940 m, i << L2_PAGETABLE_SHIFT,
941 l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT,
942 l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT);
943 }
945 unmap_domain_mem(spl2e);
946 unmap_domain_mem(gpl2e);
948 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
949 sh_l2_present, sh_l1_present);
951 return 1;
952 }
954 #endif