xen-vt-testing

view xen/arch/x86/shadow_public.c @ 6753:36e74b5dfa95

Fix a bug in shadow_remove_all_access_in_page.

Fixes a fairly obvious bug, in which an if statement was reversed. The
bug has been present for a while, so apparently this code isn't well
tested.

Signed-off-by: Michael Vrable <mvrable@cs.ucsd.edu>
author kaf24@firebug.cl.cam.ac.uk
date Tue Sep 13 09:09:59 2005 +0000 (2005-09-13)
parents 3feb7fa331ed
children f804b28871ba
line source
1 /******************************************************************************
2 * arch/x86/shadow_public.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #if CONFIG_PAGING_LEVELS >= 3
34 #include <asm/shadow_64.h>
36 #endif
37 #if CONFIG_PAGING_LEVELS == 4
38 extern struct shadow_ops MODE_F_HANDLER;
39 extern struct shadow_ops MODE_D_HANDLER;
40 #endif
42 extern struct shadow_ops MODE_A_HANDLER;
44 #define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16))
45 /****************************************************************************/
46 /************* export interface functions ***********************************/
47 /****************************************************************************/
50 int shadow_set_guest_paging_levels(struct domain *d, int levels)
51 {
52 shadow_lock(d);
54 switch(levels) {
55 #if CONFIG_PAGING_LEVELS >= 4
56 case 4:
57 if ( d->arch.ops != &MODE_F_HANDLER )
58 d->arch.ops = &MODE_F_HANDLER;
59 shadow_unlock(d);
60 return 1;
61 #endif
62 case 3:
63 case 2:
64 #if CONFIG_PAGING_LEVELS == 2
65 if ( d->arch.ops != &MODE_A_HANDLER )
66 d->arch.ops = &MODE_A_HANDLER;
67 #elif CONFIG_PAGING_LEVELS == 4
68 if ( d->arch.ops != &MODE_D_HANDLER )
69 d->arch.ops = &MODE_D_HANDLER;
70 #endif
71 shadow_unlock(d);
72 return 1;
73 default:
74 shadow_unlock(d);
75 return 0;
76 }
77 }
79 void shadow_invlpg(struct vcpu *v, unsigned long va)
80 {
81 struct domain *d = current->domain;
82 d->arch.ops->invlpg(v, va);
83 }
85 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
86 {
87 struct domain *d = current->domain;
88 return d->arch.ops->fault(va, regs);
89 }
91 void __update_pagetables(struct vcpu *v)
92 {
93 struct domain *d = v->domain;
94 d->arch.ops->update_pagetables(v);
95 }
97 void __shadow_sync_all(struct domain *d)
98 {
99 d->arch.ops->sync_all(d);
100 }
102 int shadow_remove_all_write_access(
103 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
104 {
105 return d->arch.ops->remove_all_write_access(d, readonly_gpfn, readonly_gmfn);
106 }
108 int shadow_do_update_va_mapping(unsigned long va,
109 l1_pgentry_t val,
110 struct vcpu *v)
111 {
112 struct domain *d = v->domain;
113 return d->arch.ops->do_update_va_mapping(va, val, v);
114 }
116 struct out_of_sync_entry *
117 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
118 unsigned long mfn)
119 {
120 struct domain *d = v->domain;
121 return d->arch.ops->mark_mfn_out_of_sync(v, gpfn, mfn);
122 }
124 /*
125 * Returns 1 if va's shadow mapping is out-of-sync.
126 * Returns 0 otherwise.
127 */
128 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
129 {
130 struct domain *d = v->domain;
131 return d->arch.ops->is_out_of_sync(v, va);
132 }
134 unsigned long gva_to_gpa(unsigned long gva)
135 {
136 struct domain *d = current->domain;
137 return d->arch.ops->gva_to_gpa(gva);
138 }
139 /****************************************************************************/
140 /****************************************************************************/
141 #if CONFIG_PAGING_LEVELS >= 4
142 /*
143 * Convert PAE 3-level page-table to 4-level page-table
144 */
145 static pagetable_t page_table_convert(struct domain *d)
146 {
147 struct pfn_info *l4page, *l3page;
148 l4_pgentry_t *l4;
149 l3_pgentry_t *l3, *pae_l3;
150 int i;
152 l4page = alloc_domheap_page(NULL);
153 if (l4page == NULL)
154 domain_crash();
155 l4 = map_domain_page(page_to_pfn(l4page));
156 memset(l4, 0, PAGE_SIZE);
158 l3page = alloc_domheap_page(NULL);
159 if (l3page == NULL)
160 domain_crash();
161 l3 = map_domain_page(page_to_pfn(l3page));
162 memset(l3, 0, PAGE_SIZE);
164 l4[0] = l4e_from_page(l3page, __PAGE_HYPERVISOR);
165 pae_l3 = map_domain_page(pagetable_get_pfn(d->arch.phys_table));
167 for (i = 0; i < PDP_ENTRIES; i++) {
168 l3[i] = pae_l3[i];
169 l3e_add_flags(l3[i], 0x67);
170 }
172 unmap_domain_page(l4);
173 unmap_domain_page(l3);
175 return mk_pagetable(page_to_phys(l4page));
176 }
178 static void alloc_monitor_pagetable(struct vcpu *v)
179 {
180 unsigned long mmfn;
181 l4_pgentry_t *mpl4e;
182 struct pfn_info *mmfn_info;
183 struct domain *d = v->domain;
184 pagetable_t phys_table;
186 ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
188 mmfn_info = alloc_domheap_page(NULL);
189 ASSERT( mmfn_info );
191 mmfn = (unsigned long) (mmfn_info - frame_table);
192 mpl4e = (l4_pgentry_t *) map_domain_page(mmfn);
193 memcpy(mpl4e, &idle_pg_table[0], PAGE_SIZE);
194 mpl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
195 l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
196 /* map the phys_to_machine map into the per domain Read-Only MPT space */
197 phys_table = page_table_convert(d);
199 mpl4e[l4_table_offset(RO_MPT_VIRT_START)] =
200 l4e_from_paddr(pagetable_get_paddr(phys_table),
201 __PAGE_HYPERVISOR);
202 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
203 v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
204 }
206 static void inline
207 free_shadow_fl1_table(struct domain *d, unsigned long smfn)
208 {
209 l1_pgentry_t *pl1e = map_domain_page(smfn);
210 int i;
212 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
213 put_page_from_l1e(pl1e[i], d);
214 }
216 /*
217 * Free l2, l3, l4 shadow tables
218 */
220 void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
222 static void inline
223 free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
224 {
225 pgentry_64_t *ple = map_domain_page(smfn);
226 int i, external = shadow_mode_external(d);
227 struct pfn_info *page = &frame_table[smfn];
229 if (d->arch.ops->guest_paging_levels == PAGING_L2)
230 {
231 #if CONFIG_PAGING_LEVELS >=4
232 for ( i = 0; i < PDP_ENTRIES; i++ )
233 {
234 if (entry_get_flags(ple[i]) & _PAGE_PRESENT )
235 free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
236 }
238 page = &frame_table[entry_get_pfn(ple[0])];
239 free_domheap_pages(page, SL2_ORDER);
240 unmap_domain_page(ple);
241 #endif
242 }
243 else
244 {
245 for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
246 if ( external || is_guest_l4_slot(i) )
247 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
248 put_shadow_ref(entry_get_pfn(ple[i]));
250 unmap_domain_page(ple);
251 }
252 }
255 void free_monitor_pagetable(struct vcpu *v)
256 {
257 unsigned long mfn;
259 // ASSERT( pagetable_val(v->arch.monitor_table) );
260 /*
261 * free monitor_table.
262 */
263 //mfn = (pagetable_val(v->arch.monitor_table)) >> PAGE_SHIFT;
264 mfn = pagetable_get_pfn(v->arch.monitor_table);
265 unmap_domain_page(v->arch.monitor_vtable);
266 free_domheap_page(&frame_table[mfn]);
267 v->arch.monitor_table = mk_pagetable(0);
268 v->arch.monitor_vtable = 0;
269 }
271 #elif CONFIG_PAGING_LEVELS == 3
273 static void alloc_monitor_pagetable(struct vcpu *v)
274 {
275 BUG(); /* PAE not implemented yet */
276 }
278 void free_monitor_pagetable(struct vcpu *v)
279 {
280 BUG(); /* PAE not implemented yet */
281 }
283 #elif CONFIG_PAGING_LEVELS == 2
285 static void alloc_monitor_pagetable(struct vcpu *v)
286 {
287 unsigned long mmfn;
288 l2_pgentry_t *mpl2e;
289 struct pfn_info *mmfn_info;
290 struct domain *d = v->domain;
292 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
294 mmfn_info = alloc_domheap_page(NULL);
295 ASSERT(mmfn_info != NULL);
297 mmfn = page_to_pfn(mmfn_info);
298 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
299 memset(mpl2e, 0, PAGE_SIZE);
301 #ifdef __i386__ /* XXX screws x86/64 build */
302 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
303 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
304 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
305 #endif
307 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
308 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
309 __PAGE_HYPERVISOR);
311 // map the phys_to_machine map into the Read-Only MPT space for this domain
312 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
313 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
314 __PAGE_HYPERVISOR);
316 // Don't (yet) have mappings for these...
317 // Don't want to accidentally see the idle_pg_table's linear mapping.
318 //
319 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
320 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
322 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
323 v->arch.monitor_vtable = mpl2e;
324 }
326 /*
327 * Free the pages for monitor_table and hl2_table
328 */
329 void free_monitor_pagetable(struct vcpu *v)
330 {
331 l2_pgentry_t *mpl2e, hl2e, sl2e;
332 unsigned long mfn;
334 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
336 mpl2e = v->arch.monitor_vtable;
338 /*
339 * First get the mfn for hl2_table by looking at monitor_table
340 */
341 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
342 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
343 {
344 mfn = l2e_get_pfn(hl2e);
345 ASSERT(mfn);
346 put_shadow_ref(mfn);
347 }
349 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
350 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
351 {
352 mfn = l2e_get_pfn(sl2e);
353 ASSERT(mfn);
354 put_shadow_ref(mfn);
355 }
357 unmap_domain_page(mpl2e);
359 /*
360 * Then free monitor_table.
361 */
362 mfn = pagetable_get_pfn(v->arch.monitor_table);
363 free_domheap_page(&frame_table[mfn]);
365 v->arch.monitor_table = mk_pagetable(0);
366 v->arch.monitor_vtable = 0;
367 }
368 #endif
370 static void
371 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
372 {
373 void *snapshot;
375 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
376 return;
378 // Clear the out_of_sync bit.
379 //
380 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
382 // XXX Need to think about how to protect the domain's
383 // information less expensively.
384 //
385 snapshot = map_domain_page(entry->snapshot_mfn);
386 memset(snapshot, 0, PAGE_SIZE);
387 unmap_domain_page(snapshot);
389 put_shadow_ref(entry->snapshot_mfn);
390 }
392 void
393 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
394 {
395 struct pfn_info *page;
397 page = &frame_table[entry->gmfn];
399 // Decrement ref count of guest & shadow pages
400 //
401 put_page(page);
403 // Only use entries that have low bits clear...
404 //
405 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
406 {
407 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
408 entry->writable_pl1e = -2;
409 }
410 else
411 ASSERT( entry->writable_pl1e == -1 );
413 // Free the snapshot
414 //
415 shadow_free_snapshot(d, entry);
416 }
418 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
419 {
420 struct out_of_sync_entry *entry = d->arch.out_of_sync;
421 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
422 struct out_of_sync_entry *found = NULL;
424 // NB: Be careful not to call something that manipulates this list
425 // while walking it. Collect the results into a separate list
426 // first, then walk that list.
427 //
428 while ( entry )
429 {
430 if ( entry->gmfn == gmfn )
431 {
432 // remove from out of sync list
433 *prev = entry->next;
435 // add to found list
436 entry->next = found;
437 found = entry;
439 entry = *prev;
440 continue;
441 }
442 prev = &entry->next;
443 entry = entry->next;
444 }
446 prev = NULL;
447 entry = found;
448 while ( entry )
449 {
450 release_out_of_sync_entry(d, entry);
452 prev = &entry->next;
453 entry = entry->next;
454 }
456 // Add found list to free list
457 if ( prev )
458 {
459 *prev = d->arch.out_of_sync_free;
460 d->arch.out_of_sync_free = found;
461 }
462 }
464 static inline void
465 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
466 {
467 if ( !shadow_mode_refcounts(d) )
468 return;
470 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
472 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
473 {
474 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
476 if ( page_out_of_sync(pfn_to_page(gmfn)) )
477 {
478 remove_out_of_sync_entries(d, gmfn);
479 }
480 }
481 }
483 static void inline
484 free_shadow_l1_table(struct domain *d, unsigned long smfn)
485 {
486 l1_pgentry_t *pl1e = map_domain_page(smfn);
487 int i;
488 struct pfn_info *spage = pfn_to_page(smfn);
489 u32 min_max = spage->tlbflush_timestamp;
490 int min = SHADOW_MIN(min_max);
491 int max;
493 if (d->arch.ops->guest_paging_levels == PAGING_L2)
494 max = SHADOW_MAX_GUEST32(min_max);
495 else
496 max = SHADOW_MAX(min_max);
498 for ( i = min; i <= max; i++ )
499 {
500 shadow_put_page_from_l1e(pl1e[i], d);
501 pl1e[i] = l1e_empty();
502 }
504 unmap_domain_page(pl1e);
505 }
507 static void inline
508 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
509 {
510 l1_pgentry_t *hl2 = map_domain_page(smfn);
511 int i, limit;
513 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
515 #ifdef __i386__
516 if ( shadow_mode_external(d) )
517 limit = L2_PAGETABLE_ENTRIES;
518 else
519 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
520 #else
521 limit = 0; /* XXX x86/64 XXX */
522 #endif
524 for ( i = 0; i < limit; i++ )
525 {
526 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
527 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
528 }
530 unmap_domain_page(hl2);
531 }
533 static void inline
534 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
535 {
536 l2_pgentry_t *pl2e = map_domain_page(smfn);
537 int i, external = shadow_mode_external(d);
539 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
540 if ( external || is_guest_l2_slot(type, i) )
541 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
542 put_shadow_ref(l2e_get_pfn(pl2e[i]));
544 if ( (PGT_base_page_table == PGT_l2_page_table) &&
545 shadow_mode_translate(d) && !external )
546 {
547 // free the ref to the hl2
548 //
549 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
550 }
552 unmap_domain_page(pl2e);
553 }
555 void free_fake_shadow_l2(struct domain *d, unsigned long smfn)
556 {
557 pgentry_64_t *ple = map_domain_page(smfn);
558 int i;
560 for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 )
561 {
562 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
563 put_shadow_ref(entry_get_pfn(ple[i]));
564 }
566 unmap_domain_page(ple);
567 }
569 void free_shadow_page(unsigned long smfn)
570 {
571 struct pfn_info *page = &frame_table[smfn];
573 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
574 struct domain *d = page_get_owner(pfn_to_page(gmfn));
575 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
576 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
578 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
580 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
581 #if CONFIG_PAGING_LEVELS >=4
582 if (type == PGT_fl1_shadow) {
583 unsigned long mfn;
584 mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
585 if (!mfn)
586 gpfn |= (1UL << 63);
587 }
588 #endif
590 delete_shadow_status(d, gpfn, gmfn, type);
592 switch ( type )
593 {
594 case PGT_l1_shadow:
595 perfc_decr(shadow_l1_pages);
596 shadow_demote(d, gpfn, gmfn);
597 free_shadow_l1_table(d, smfn);
598 break;
599 #if defined (__i386__)
600 case PGT_l2_shadow:
601 perfc_decr(shadow_l2_pages);
602 shadow_demote(d, gpfn, gmfn);
603 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
604 break;
606 case PGT_hl2_shadow:
607 perfc_decr(hl2_table_pages);
608 shadow_demote(d, gpfn, gmfn);
609 free_shadow_hl2_table(d, smfn);
610 break;
611 #else
612 case PGT_l2_shadow:
613 case PGT_l3_shadow:
614 case PGT_l4_shadow:
615 shadow_demote(d, gpfn, gmfn);
616 free_shadow_tables(d, smfn, shadow_type_to_level(type));
617 break;
619 case PGT_fl1_shadow:
620 free_shadow_fl1_table(d, smfn);
621 break;
623 #endif
625 case PGT_snapshot:
626 perfc_decr(apshot_pages);
627 break;
629 default:
630 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
631 page_to_pfn(page), page->u.inuse.type_info);
632 break;
633 }
635 d->arch.shadow_page_count--;
637 // No TLB flushes are needed the next time this page gets allocated.
638 //
639 page->tlbflush_timestamp = 0;
640 page->u.free.cpumask = CPU_MASK_NONE;
642 if ( type == PGT_l1_shadow )
643 {
644 list_add(&page->list, &d->arch.free_shadow_frames);
645 perfc_incr(free_l1_pages);
646 }
647 else
648 free_domheap_page(page);
649 }
651 static void
652 free_writable_pte_predictions(struct domain *d)
653 {
654 int i;
655 struct shadow_status *x;
657 for ( i = 0; i < shadow_ht_buckets; i++ )
658 {
659 u32 count;
660 unsigned long *gpfn_list;
662 /* Skip empty buckets. */
663 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
664 continue;
666 count = 0;
667 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
668 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
669 count++;
671 gpfn_list = xmalloc_array(unsigned long, count);
672 count = 0;
673 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
674 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
675 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
677 while ( count )
678 {
679 count--;
680 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
681 }
683 xfree(gpfn_list);
684 }
685 }
687 static void free_shadow_ht_entries(struct domain *d)
688 {
689 struct shadow_status *x, *n;
691 SH_VLOG("freed tables count=%d l1=%d l2=%d",
692 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
693 perfc_value(shadow_l2_pages));
695 n = d->arch.shadow_ht_extras;
696 while ( (x = n) != NULL )
697 {
698 d->arch.shadow_extras_count--;
699 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
700 xfree(x);
701 }
703 d->arch.shadow_ht_extras = NULL;
704 d->arch.shadow_ht_free = NULL;
706 ASSERT(d->arch.shadow_extras_count == 0);
707 SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
709 if ( d->arch.shadow_dirty_bitmap != NULL )
710 {
711 xfree(d->arch.shadow_dirty_bitmap);
712 d->arch.shadow_dirty_bitmap = 0;
713 d->arch.shadow_dirty_bitmap_size = 0;
714 }
716 xfree(d->arch.shadow_ht);
717 d->arch.shadow_ht = NULL;
718 }
720 static void free_out_of_sync_entries(struct domain *d)
721 {
722 struct out_of_sync_entry *x, *n;
724 n = d->arch.out_of_sync_extras;
725 while ( (x = n) != NULL )
726 {
727 d->arch.out_of_sync_extras_count--;
728 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
729 xfree(x);
730 }
732 d->arch.out_of_sync_extras = NULL;
733 d->arch.out_of_sync_free = NULL;
734 d->arch.out_of_sync = NULL;
736 ASSERT(d->arch.out_of_sync_extras_count == 0);
737 FSH_LOG("freed extra out_of_sync entries, now %d",
738 d->arch.out_of_sync_extras_count);
739 }
741 void free_shadow_pages(struct domain *d)
742 {
743 int i;
744 struct shadow_status *x;
745 struct vcpu *v;
747 /*
748 * WARNING! The shadow page table must not currently be in use!
749 * e.g., You are expected to have paused the domain and synchronized CR3.
750 */
752 if( !d->arch.shadow_ht ) return;
754 shadow_audit(d, 1);
756 // first, remove any outstanding refs from out_of_sync entries...
757 //
758 free_out_of_sync_state(d);
760 // second, remove any outstanding refs from v->arch.shadow_table
761 // and CR3.
762 //
763 for_each_vcpu(d, v)
764 {
765 if ( pagetable_get_paddr(v->arch.shadow_table) )
766 {
767 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
768 v->arch.shadow_table = mk_pagetable(0);
769 }
771 if ( v->arch.monitor_shadow_ref )
772 {
773 put_shadow_ref(v->arch.monitor_shadow_ref);
774 v->arch.monitor_shadow_ref = 0;
775 }
776 }
778 #if defined (__i386__)
779 // For external shadows, remove the monitor table's refs
780 //
781 if ( shadow_mode_external(d) )
782 {
783 for_each_vcpu(d, v)
784 {
785 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
787 if ( mpl2e )
788 {
789 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
790 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
792 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
793 {
794 put_shadow_ref(l2e_get_pfn(hl2e));
795 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
796 }
797 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
798 {
799 put_shadow_ref(l2e_get_pfn(smfn));
800 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
801 }
802 }
803 }
804 }
805 #endif
806 // Now, the only refs to shadow pages that are left are from the shadow
807 // pages themselves. We just unpin the pinned pages, and the rest
808 // should automatically disappear.
809 //
810 // NB: Beware: each explicitly or implicit call to free_shadow_page
811 // can/will result in the hash bucket getting rewritten out from
812 // under us... First, collect the list of pinned pages, then
813 // free them.
814 //
815 for ( i = 0; i < shadow_ht_buckets; i++ )
816 {
817 u32 count;
818 unsigned long *mfn_list;
820 /* Skip empty buckets. */
821 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
822 continue;
824 count = 0;
825 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
826 if ( MFN_PINNED(x->smfn) )
827 count++;
828 if ( !count )
829 continue;
831 mfn_list = xmalloc_array(unsigned long, count);
832 count = 0;
833 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
834 if ( MFN_PINNED(x->smfn) )
835 mfn_list[count++] = x->smfn;
837 while ( count )
838 {
839 shadow_unpin(mfn_list[--count]);
840 }
841 xfree(mfn_list);
842 }
844 // Now free the pre-zero'ed pages from the domain
845 //
846 struct list_head *list_ent, *tmp;
847 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
848 {
849 list_del(list_ent);
850 perfc_decr(free_l1_pages);
852 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
853 if (d->arch.ops->guest_paging_levels == PAGING_L2)
854 {
855 #if CONFIG_PAGING_LEVELS >=4
856 free_domheap_pages(page, SL1_ORDER);
857 #else
858 free_domheap_page(page);
859 #endif
860 }
861 else
862 free_domheap_page(page);
863 }
865 shadow_audit(d, 0);
867 SH_LOG("Free shadow table.");
868 }
870 void __shadow_mode_disable(struct domain *d)
871 {
872 if ( unlikely(!shadow_mode_enabled(d)) )
873 return;
875 /*
876 * Currently this does not fix up page ref counts, so it is valid to call
877 * only when a domain is being destroyed.
878 */
879 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
880 shadow_mode_refcounts(d));
881 d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
883 free_shadow_pages(d);
884 free_writable_pte_predictions(d);
886 #ifndef NDEBUG
887 int i;
888 for ( i = 0; i < shadow_ht_buckets; i++ )
889 {
890 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
891 {
892 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
893 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
894 BUG();
895 }
896 }
897 #endif
899 d->arch.shadow_mode = 0;
901 free_shadow_ht_entries(d);
902 free_out_of_sync_entries(d);
904 struct vcpu *v;
905 for_each_vcpu(d, v)
906 {
907 update_pagetables(v);
908 }
909 }
912 static void
913 free_p2m_table(struct domain *d)
914 {
915 // uh, this needs some work... :)
916 BUG();
917 }
920 int __shadow_mode_enable(struct domain *d, unsigned int mode)
921 {
922 struct vcpu *v;
923 int new_modes = (mode & ~d->arch.shadow_mode);
925 // Gotta be adding something to call this function.
926 ASSERT(new_modes);
928 // can't take anything away by calling this function.
929 ASSERT(!(d->arch.shadow_mode & ~mode));
931 #if defined(CONFIG_PAGING_LEVELS)
932 if(!shadow_set_guest_paging_levels(d,
933 CONFIG_PAGING_LEVELS)) {
934 printk("Unsupported guest paging levels\n");
935 domain_crash_synchronous(); /* need to take a clean path */
936 }
937 #endif
939 for_each_vcpu(d, v)
940 {
941 invalidate_shadow_ldt(v);
943 // We need to set these up for __update_pagetables().
944 // See the comment there.
946 /*
947 * arch.guest_vtable
948 */
949 if ( v->arch.guest_vtable &&
950 (v->arch.guest_vtable != __linear_l2_table) )
951 {
952 unmap_domain_page(v->arch.guest_vtable);
953 }
954 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
955 v->arch.guest_vtable = __linear_l2_table;
956 else
957 v->arch.guest_vtable = NULL;
959 /*
960 * arch.shadow_vtable
961 */
962 if ( v->arch.shadow_vtable &&
963 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
964 {
965 unmap_domain_page(v->arch.shadow_vtable);
966 }
967 if ( !(mode & SHM_external) && d->arch.ops->guest_paging_levels == 2)
968 v->arch.shadow_vtable = __shadow_linear_l2_table;
969 else
970 v->arch.shadow_vtable = NULL;
972 #if defined (__i386__)
973 /*
974 * arch.hl2_vtable
975 */
976 if ( v->arch.hl2_vtable &&
977 (v->arch.hl2_vtable != __linear_hl2_table) )
978 {
979 unmap_domain_page(v->arch.hl2_vtable);
980 }
981 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
982 v->arch.hl2_vtable = __linear_hl2_table;
983 else
984 v->arch.hl2_vtable = NULL;
985 #endif
986 /*
987 * arch.monitor_table & arch.monitor_vtable
988 */
989 if ( v->arch.monitor_vtable )
990 {
991 free_monitor_pagetable(v);
992 }
993 if ( mode & SHM_external )
994 {
995 alloc_monitor_pagetable(v);
996 }
997 }
999 if ( new_modes & SHM_enable )
1001 ASSERT( !d->arch.shadow_ht );
1002 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
1003 if ( d->arch.shadow_ht == NULL )
1004 goto nomem;
1006 memset(d->arch.shadow_ht, 0,
1007 shadow_ht_buckets * sizeof(struct shadow_status));
1010 if ( new_modes & SHM_log_dirty )
1012 ASSERT( !d->arch.shadow_dirty_bitmap );
1013 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
1014 d->arch.shadow_dirty_bitmap =
1015 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
1016 (8 * sizeof(unsigned long)));
1017 if ( d->arch.shadow_dirty_bitmap == NULL )
1019 d->arch.shadow_dirty_bitmap_size = 0;
1020 goto nomem;
1022 memset(d->arch.shadow_dirty_bitmap, 0,
1023 d->arch.shadow_dirty_bitmap_size/8);
1026 if ( new_modes & SHM_translate )
1028 if ( !(new_modes & SHM_external) )
1030 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
1031 if ( !alloc_p2m_table(d) )
1033 printk("alloc_p2m_table failed (out-of-memory?)\n");
1034 goto nomem;
1037 else
1039 // external guests provide their own memory for their P2M maps.
1040 //
1041 ASSERT( d == page_get_owner(
1042 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
1046 printk("audit1\n");
1047 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1048 printk("audit1 done\n");
1050 // Get rid of any shadow pages from any previous shadow mode.
1051 //
1052 free_shadow_pages(d);
1054 printk("audit2\n");
1055 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1056 printk("audit2 done\n");
1058 /*
1059 * Tear down it's counts by disassembling its page-table-based ref counts.
1060 * Also remove CR3's gcount/tcount.
1061 * That leaves things like GDTs and LDTs and external refs in tact.
1063 * Most pages will be writable tcount=0.
1064 * Some will still be L1 tcount=0 or L2 tcount=0.
1065 * Maybe some pages will be type none tcount=0.
1066 * Pages granted external writable refs (via grant tables?) will
1067 * still have a non-zero tcount. That's OK.
1069 * gcounts will generally be 1 for PGC_allocated.
1070 * GDTs and LDTs will have additional gcounts.
1071 * Any grant-table based refs will still be in the gcount.
1073 * We attempt to grab writable refs to each page (thus setting its type).
1074 * Immediately put back those type refs.
1076 * Assert that no pages are left with L1/L2/L3/L4 type.
1077 */
1078 audit_adjust_pgtables(d, -1, 1);
1080 d->arch.shadow_mode = mode;
1082 if ( shadow_mode_refcounts(d) )
1084 struct list_head *list_ent = d->page_list.next;
1085 while ( list_ent != &d->page_list )
1087 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1088 if ( !get_page_type(page, PGT_writable_page) )
1089 BUG();
1090 put_page_type(page);
1092 list_ent = page->list.next;
1096 audit_adjust_pgtables(d, 1, 1);
1098 printk("audit3\n");
1099 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1100 printk("audit3 done\n");
1102 return 0;
1104 nomem:
1105 if ( (new_modes & SHM_enable) )
1107 xfree(d->arch.shadow_ht);
1108 d->arch.shadow_ht = NULL;
1110 if ( (new_modes & SHM_log_dirty) )
1112 xfree(d->arch.shadow_dirty_bitmap);
1113 d->arch.shadow_dirty_bitmap = NULL;
1115 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1116 pagetable_get_paddr(d->arch.phys_table) )
1118 free_p2m_table(d);
1120 return -ENOMEM;
1124 int shadow_mode_enable(struct domain *d, unsigned int mode)
1126 int rc;
1127 shadow_lock(d);
1128 rc = __shadow_mode_enable(d, mode);
1129 shadow_unlock(d);
1130 return rc;
1133 static int shadow_mode_table_op(
1134 struct domain *d, dom0_shadow_control_t *sc)
1136 unsigned int op = sc->op;
1137 int i, rc = 0;
1138 struct vcpu *v;
1140 ASSERT(shadow_lock_is_acquired(d));
1142 SH_VLOG("shadow mode table op %lx %lx count %d",
1143 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1144 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1145 d->arch.shadow_page_count);
1147 shadow_audit(d, 1);
1149 switch ( op )
1151 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1152 free_shadow_pages(d);
1154 d->arch.shadow_fault_count = 0;
1155 d->arch.shadow_dirty_count = 0;
1156 d->arch.shadow_dirty_net_count = 0;
1157 d->arch.shadow_dirty_block_count = 0;
1159 break;
1161 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1162 free_shadow_pages(d);
1164 sc->stats.fault_count = d->arch.shadow_fault_count;
1165 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1166 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1167 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1169 d->arch.shadow_fault_count = 0;
1170 d->arch.shadow_dirty_count = 0;
1171 d->arch.shadow_dirty_net_count = 0;
1172 d->arch.shadow_dirty_block_count = 0;
1174 if ( (d->max_pages > sc->pages) ||
1175 (sc->dirty_bitmap == NULL) ||
1176 (d->arch.shadow_dirty_bitmap == NULL) )
1178 rc = -EINVAL;
1179 break;
1182 sc->pages = d->max_pages;
1184 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1185 for ( i = 0; i < d->max_pages; i += chunk )
1187 int bytes = ((((d->max_pages - i) > chunk) ?
1188 chunk : (d->max_pages - i)) + 7) / 8;
1190 if (copy_to_user(
1191 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1192 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1193 bytes))
1195 // copy_to_user can fail when copying to guest app memory.
1196 // app should zero buffer after mallocing, and pin it
1197 rc = -EINVAL;
1198 memset(
1199 d->arch.shadow_dirty_bitmap +
1200 (i/(8*sizeof(unsigned long))),
1201 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
1202 break;
1204 memset(
1205 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1206 0, bytes);
1209 break;
1211 case DOM0_SHADOW_CONTROL_OP_PEEK:
1212 sc->stats.fault_count = d->arch.shadow_fault_count;
1213 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1214 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1215 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1217 if ( (d->max_pages > sc->pages) ||
1218 (sc->dirty_bitmap == NULL) ||
1219 (d->arch.shadow_dirty_bitmap == NULL) )
1221 rc = -EINVAL;
1222 break;
1225 sc->pages = d->max_pages;
1226 if (copy_to_user(
1227 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
1229 rc = -EINVAL;
1230 break;
1233 break;
1235 default:
1236 rc = -EINVAL;
1237 break;
1240 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1241 shadow_audit(d, 1);
1243 for_each_vcpu(d,v)
1244 __update_pagetables(v);
1246 return rc;
1249 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1251 unsigned int op = sc->op;
1252 int rc = 0;
1253 struct vcpu *v;
1255 if ( unlikely(d == current->domain) )
1257 DPRINTK("Don't try to do a shadow op on yourself!\n");
1258 return -EINVAL;
1261 domain_pause(d);
1263 shadow_lock(d);
1265 switch ( op )
1267 case DOM0_SHADOW_CONTROL_OP_OFF:
1268 __shadow_sync_all(d);
1269 __shadow_mode_disable(d);
1270 break;
1272 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1273 free_shadow_pages(d);
1274 rc = __shadow_mode_enable(d, SHM_enable);
1275 break;
1277 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1278 free_shadow_pages(d);
1279 rc = __shadow_mode_enable(
1280 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1281 break;
1283 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1284 free_shadow_pages(d);
1285 rc = __shadow_mode_enable(
1286 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1287 break;
1289 default:
1290 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1291 break;
1294 shadow_unlock(d);
1296 for_each_vcpu(d,v)
1297 update_pagetables(v);
1299 domain_unpause(d);
1301 return rc;
1304 void shadow_mode_init(void)
1308 int _shadow_mode_refcounts(struct domain *d)
1310 return shadow_mode_refcounts(d);
1313 int
1314 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
1315 struct domain_mmap_cache *l2cache,
1316 struct domain_mmap_cache *l1cache)
1318 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1319 l2_pgentry_t *l2, l2e;
1320 l1_pgentry_t *l1;
1321 struct pfn_info *l1page;
1322 unsigned long va = pfn << PAGE_SHIFT;
1324 ASSERT(tabpfn != 0);
1326 l2 = map_domain_page_with_cache(tabpfn, l2cache);
1327 l2e = l2[l2_table_offset(va)];
1328 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1330 l1page = alloc_domheap_page(NULL);
1331 if ( !l1page )
1333 unmap_domain_page_with_cache(l2, l2cache);
1334 return 0;
1337 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
1338 memset(l1, 0, PAGE_SIZE);
1339 unmap_domain_page_with_cache(l1, l1cache);
1341 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
1342 l2[l2_table_offset(va)] = l2e;
1344 unmap_domain_page_with_cache(l2, l2cache);
1346 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
1347 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
1348 unmap_domain_page_with_cache(l1, l1cache);
1350 return 1;
1353 int
1354 alloc_p2m_table(struct domain *d)
1356 struct list_head *list_ent;
1357 struct pfn_info *page, *l2page;
1358 l2_pgentry_t *l2;
1359 unsigned long mfn, pfn;
1360 struct domain_mmap_cache l1cache, l2cache;
1362 l2page = alloc_domheap_page(NULL);
1363 if ( l2page == NULL )
1364 return 0;
1366 domain_mmap_cache_init(&l1cache);
1367 domain_mmap_cache_init(&l2cache);
1369 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
1370 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
1371 memset(l2, 0, PAGE_SIZE);
1372 unmap_domain_page_with_cache(l2, &l2cache);
1374 list_ent = d->page_list.next;
1375 while ( list_ent != &d->page_list )
1377 page = list_entry(list_ent, struct pfn_info, list);
1378 mfn = page_to_pfn(page);
1379 pfn = get_pfn_from_mfn(mfn);
1380 ASSERT(pfn != INVALID_M2P_ENTRY);
1381 ASSERT(pfn < (1u<<20));
1383 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
1385 list_ent = page->list.next;
1388 list_ent = d->xenpage_list.next;
1389 while ( list_ent != &d->xenpage_list )
1391 page = list_entry(list_ent, struct pfn_info, list);
1392 mfn = page_to_pfn(page);
1393 pfn = get_pfn_from_mfn(mfn);
1394 if ( (pfn != INVALID_M2P_ENTRY) &&
1395 (pfn < (1u<<20)) )
1397 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
1400 list_ent = page->list.next;
1403 domain_mmap_cache_destroy(&l2cache);
1404 domain_mmap_cache_destroy(&l1cache);
1406 return 1;
1409 void shadow_l1_normal_pt_update(
1410 struct domain *d,
1411 unsigned long pa, l1_pgentry_t gpte,
1412 struct domain_mmap_cache *cache)
1414 unsigned long sl1mfn;
1415 l1_pgentry_t *spl1e, spte;
1417 shadow_lock(d);
1419 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
1420 if ( sl1mfn )
1422 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
1423 (void *)pa, l1e_get_intpte(gpte));
1424 l1pte_propagate_from_guest(current->domain, gpte, &spte);
1426 spl1e = map_domain_page_with_cache(sl1mfn, cache);
1427 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
1428 unmap_domain_page_with_cache(spl1e, cache);
1431 shadow_unlock(d);
1434 void shadow_l2_normal_pt_update(
1435 struct domain *d,
1436 unsigned long pa, l2_pgentry_t gpde,
1437 struct domain_mmap_cache *cache)
1439 unsigned long sl2mfn;
1440 l2_pgentry_t *spl2e;
1442 shadow_lock(d);
1444 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
1445 if ( sl2mfn )
1447 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
1448 (void *)pa, l2e_get_intpte(gpde));
1449 spl2e = map_domain_page_with_cache(sl2mfn, cache);
1450 validate_pde_change(d, gpde,
1451 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
1452 unmap_domain_page_with_cache(spl2e, cache);
1455 shadow_unlock(d);
1458 #if CONFIG_PAGING_LEVELS >= 3
1459 void shadow_l3_normal_pt_update(
1460 struct domain *d,
1461 unsigned long pa, l3_pgentry_t gpde,
1462 struct domain_mmap_cache *cache)
1464 unsigned long sl3mfn;
1465 pgentry_64_t *spl3e;
1467 shadow_lock(d);
1469 sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
1470 if ( sl3mfn )
1472 SH_VVLOG("shadow_l3_normal_pt_update pa=%p, gpde=%" PRIpte,
1473 (void *)pa, l3e_get_intpte(gpde));
1475 spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
1476 validate_entry_change(d, (pgentry_64_t *) &gpde,
1477 &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)],
1478 shadow_type_to_level(PGT_l3_shadow));
1479 unmap_domain_page_with_cache(spl3e, cache);
1482 shadow_unlock(d);
1484 #endif
1486 #if CONFIG_PAGING_LEVELS >= 4
1487 void shadow_l4_normal_pt_update(
1488 struct domain *d,
1489 unsigned long pa, l4_pgentry_t gpde,
1490 struct domain_mmap_cache *cache)
1492 unsigned long sl4mfn;
1493 pgentry_64_t *spl4e;
1495 shadow_lock(d);
1497 sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
1498 if ( sl4mfn )
1500 SH_VVLOG("shadow_l4_normal_pt_update pa=%p, gpde=%" PRIpte,
1501 (void *)pa, l4e_get_intpte(gpde));
1503 spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
1504 validate_entry_change(d, (pgentry_64_t *)&gpde,
1505 &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)],
1506 shadow_type_to_level(PGT_l4_shadow));
1507 unmap_domain_page_with_cache(spl4e, cache);
1510 shadow_unlock(d);
1512 #endif
1514 static void
1515 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1517 int i;
1518 l1_pgentry_t *l1;
1520 l1 = map_domain_page(l1mfn);
1521 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1523 if ( is_guest_l1_slot(i) &&
1524 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1526 unsigned long mfn = l1e_get_pfn(l1[i]);
1527 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1528 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1529 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1532 unmap_domain_page(l1);
1535 // This is not general enough to handle arbitrary pagetables
1536 // with shared L1 pages, etc., but it is sufficient for bringing
1537 // up dom0.
1538 //
1539 void
1540 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1541 unsigned int type)
1543 int i;
1544 l2_pgentry_t *l2;
1546 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1548 l2 = map_domain_page(l2mfn);
1549 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1551 if ( is_guest_l2_slot(type, i) &&
1552 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1554 unsigned long mfn = l2e_get_pfn(l2[i]);
1555 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1556 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1557 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1558 translate_l1pgtable(d, p2m, mfn);
1561 unmap_domain_page(l2);
1564 void
1565 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
1567 unsigned long smfn;
1569 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
1571 shadow_lock(d);
1573 while ( stype >= PGT_l1_shadow )
1575 smfn = __shadow_status(d, gpfn, stype);
1576 if ( smfn && MFN_PINNED(smfn) )
1577 shadow_unpin(smfn);
1578 stype -= PGT_l1_shadow;
1581 shadow_unlock(d);
1584 unsigned long
1585 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1587 ASSERT( shadow_mode_translate(d) );
1589 perfc_incrc(gpfn_to_mfn_foreign);
1591 unsigned long va = gpfn << PAGE_SHIFT;
1592 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1593 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1594 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1595 unmap_domain_page(l2);
1596 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1598 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1599 d->domain_id, gpfn, l2e_get_intpte(l2e));
1600 return INVALID_MFN;
1602 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1603 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1604 unmap_domain_page(l1);
1606 #if 0
1607 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1608 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1609 #endif
1611 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1613 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1614 d->domain_id, gpfn, l1e_get_intpte(l1e));
1615 return INVALID_MFN;
1618 return l1e_get_pfn(l1e);
1621 static u32 remove_all_access_in_page(
1622 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
1624 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
1625 l1_pgentry_t match, ol2e;
1626 unsigned long flags = _PAGE_PRESENT;
1627 int i;
1628 u32 count = 0;
1629 int is_l1_shadow =
1630 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
1631 PGT_l1_shadow);
1633 match = l1e_from_pfn(forbidden_gmfn, flags);
1635 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1637 if ( l1e_has_changed(pl1e[i], match, flags) )
1638 continue;
1640 ol2e = pl1e[i];
1641 pl1e[i] = l1e_empty();
1642 count++;
1644 if ( is_l1_shadow )
1645 shadow_put_page_from_l1e(ol2e, d);
1646 else /* must be an hl2 page */
1647 put_page(&frame_table[forbidden_gmfn]);
1650 unmap_domain_page(pl1e);
1652 return count;
1655 static u32 __shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
1657 int i;
1658 struct shadow_status *a;
1659 u32 count = 0;
1661 if ( unlikely(!shadow_mode_enabled(d)) )
1662 return 0;
1664 ASSERT(shadow_lock_is_acquired(d));
1665 perfc_incrc(remove_all_access);
1667 for (i = 0; i < shadow_ht_buckets; i++)
1669 a = &d->arch.shadow_ht[i];
1670 while ( a && a->gpfn_and_flags )
1672 switch (a->gpfn_and_flags & PGT_type_mask)
1674 case PGT_l1_shadow:
1675 case PGT_l2_shadow:
1676 case PGT_l3_shadow:
1677 case PGT_l4_shadow:
1678 case PGT_hl2_shadow:
1679 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
1680 break;
1681 case PGT_snapshot:
1682 case PGT_writable_pred:
1683 // these can't hold refs to the forbidden page
1684 break;
1685 default:
1686 BUG();
1689 a = a->next;
1693 return count;
1696 void shadow_drop_references(
1697 struct domain *d, struct pfn_info *page)
1699 if ( likely(!shadow_mode_refcounts(d)) ||
1700 ((page->u.inuse.type_info & PGT_count_mask) == 0) )
1701 return;
1703 /* XXX This needs more thought... */
1704 printk("%s: needing to call __shadow_remove_all_access for mfn=%lx\n",
1705 __func__, page_to_pfn(page));
1706 printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
1707 page->count_info, page->u.inuse.type_info);
1709 shadow_lock(d);
1710 __shadow_remove_all_access(d, page_to_pfn(page));
1711 shadow_unlock(d);
1713 printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
1714 page->count_info, page->u.inuse.type_info);
1717 /* XXX Needs more thought. Neither pretty nor fast: a place holder. */
1718 void shadow_sync_and_drop_references(
1719 struct domain *d, struct pfn_info *page)
1721 if ( likely(!shadow_mode_refcounts(d)) )
1722 return;
1724 shadow_lock(d);
1726 if ( page_out_of_sync(page) )
1727 __shadow_sync_mfn(d, page_to_pfn(page));
1729 __shadow_remove_all_access(d, page_to_pfn(page));
1731 shadow_unlock(d);
1734 /*
1735 * Local variables:
1736 * mode: C
1737 * c-set-style: "BSD"
1738 * c-basic-offset: 4
1739 * tab-width: 4
1740 * indent-tabs-mode: nil
1741 * End:
1742 */