xen-vtx-unstable

view xen/arch/x86/shadow.c @ 6774:4d899a738d59

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 13 15:05:49 2005 +0000 (2005-09-13)
parents 3feb7fa331ed f27205ea60ef
children e7c7196fa329 8ca0f98ba8e2
line source
1 /******************************************************************************
2 * arch/x86/shadow_64.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 /*
22 * Jun Nakajima <jun.nakajima@intel.com>
23 * Chengyuan Li <chengyuan.li@intel.com>
24 *
25 * Extended to support 64-bit guests.
26 */
28 #include <xen/config.h>
29 #include <xen/types.h>
30 #include <xen/mm.h>
31 #include <xen/domain_page.h>
32 #include <asm/shadow.h>
33 #include <asm/page.h>
34 #include <xen/event.h>
35 #include <xen/sched.h>
36 #include <xen/trace.h>
38 extern void free_shadow_pages(struct domain *d);
40 #if SHADOW_DEBUG
41 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
42 #endif
44 #if CONFIG_PAGING_LEVELS == 3
45 #include <asm/shadow_64.h>
46 static unsigned long shadow_l3_table(
47 struct domain *d, unsigned long gpfn, unsigned long gmfn);
48 #endif
50 #if CONFIG_PAGING_LEVELS == 4
51 #include <asm/shadow_64.h>
52 static unsigned long shadow_l4_table(
53 struct domain *d, unsigned long gpfn, unsigned long gmfn);
54 static void shadow_map_into_current(struct vcpu *v,
55 unsigned long va, unsigned int from, unsigned int to);
56 static inline void validate_bl2e_change( struct domain *d,
57 guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
59 #endif
61 /********
63 There's a per-domain shadow table spin lock which works fine for SMP
64 hosts. We don't have to worry about interrupts as no shadow operations
65 happen in an interrupt context. It's probably not quite ready for SMP
66 guest operation as we have to worry about synchonisation between gpte
67 and spte updates. Its possible that this might only happen in a
68 hypercall context, in which case we'll probably at have a per-domain
69 hypercall lock anyhow (at least initially).
71 ********/
73 static inline int
74 shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
75 unsigned long new_type)
76 {
77 struct pfn_info *page = pfn_to_page(gmfn);
78 int pinned = 0, okay = 1;
80 if ( page_out_of_sync(page) )
81 {
82 // Don't know how long ago this snapshot was taken.
83 // Can't trust it to be recent enough.
84 //
85 __shadow_sync_mfn(d, gmfn);
86 }
88 if ( !shadow_mode_refcounts(d) )
89 return 1;
91 if ( unlikely(page_is_page_table(page)) )
92 return 1;
94 FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
96 if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
97 {
98 FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
99 __func__, gpfn, gmfn);
100 #if 1 || defined(LIVE_DANGEROUSLY)
101 set_bit(_PGC_page_table, &page->count_info);
102 return 1;
103 #endif
104 return 0;
106 }
108 // To convert this page to use as a page table, the writable count
109 // should now be zero. Test this by grabbing the page as an page table,
110 // and then immediately releasing. This will also deal with any
111 // necessary TLB flushing issues for us.
112 //
113 // The cruft here about pinning doesn't really work right. This
114 // needs rethinking/rewriting... Need to gracefully deal with the
115 // TLB flushes required when promoting a writable page, and also deal
116 // with any outstanding (external) writable refs to this page (by
117 // refusing to promote it). The pinning headache complicates this
118 // code -- it would all get much simpler if we stop using
119 // shadow_lock() and move the shadow code to BIGLOCK().
120 //
121 if ( unlikely(!get_page(page, d)) )
122 BUG(); // XXX -- needs more thought for a graceful failure
123 if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
124 {
125 pinned = 1;
126 put_page_and_type(page);
127 }
128 if ( get_page_type(page, PGT_base_page_table) )
129 {
130 set_bit(_PGC_page_table, &page->count_info);
131 put_page_type(page);
132 }
133 else
134 {
135 printk("shadow_promote: get_page_type failed "
136 "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
137 d->domain_id, gpfn, gmfn, new_type);
138 okay = 0;
139 }
141 // Now put the type back to writable...
142 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
143 BUG(); // XXX -- needs more thought for a graceful failure
144 if ( unlikely(pinned) )
145 {
146 if ( unlikely(test_and_set_bit(_PGT_pinned,
147 &page->u.inuse.type_info)) )
148 BUG(); // hmm... someone pinned this again?
149 }
150 else
151 put_page_and_type(page);
153 return okay;
154 }
157 /*
158 * Things in shadow mode that collect get_page() refs to the domain's
159 * pages are:
160 * - PGC_allocated takes a gen count, just like normal.
161 * - A writable page can be pinned (paravirtualized guests may consider
162 * these pages to be L1s or L2s, and don't know the difference).
163 * Pinning a page takes a gen count (but, for domains in shadow mode,
164 * it *doesn't* take a type count)
165 * - CR3 grabs a ref to whatever it points at, just like normal.
166 * - Shadow mode grabs an initial gen count for itself, as a placehold
167 * for whatever references will exist.
168 * - Shadow PTEs that point to a page take a gen count, just like regular
169 * PTEs. However, they don't get a type count, as get_page_type() is
170 * hardwired to keep writable pages' counts at 1 for domains in shadow
171 * mode.
172 * - Whenever we shadow a page, the entry in the shadow hash grabs a
173 * general ref to the page.
174 * - Whenever a page goes out of sync, the out of sync entry grabs a
175 * general ref to the page.
176 */
177 /*
178 * pfn_info fields for pages allocated as shadow pages:
179 *
180 * All 32 bits of count_info are a simple count of refs to this shadow
181 * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
182 * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
183 * references.
184 *
185 * u.inuse._domain is left NULL, to prevent accidently allow some random
186 * domain from gaining permissions to map this page.
187 *
188 * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
189 * shadowed.
190 * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
191 * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
192 * is currently exists because this is a shadow of a root page, and we
193 * don't want to let those disappear just because no CR3 is currently pointing
194 * at it.
195 *
196 * tlbflush_timestamp holds a min & max index of valid page table entries
197 * within the shadow page.
198 */
200 static inline unsigned long
201 alloc_shadow_page(struct domain *d,
202 unsigned long gpfn, unsigned long gmfn,
203 u32 psh_type)
204 {
205 struct pfn_info *page;
206 unsigned long smfn;
207 int pin = 0;
209 // Currently, we only keep pre-zero'ed pages around for use as L1's...
210 // This will change. Soon.
211 //
212 if ( psh_type == PGT_l1_shadow )
213 {
214 if ( !list_empty(&d->arch.free_shadow_frames) )
215 {
216 struct list_head *entry = d->arch.free_shadow_frames.next;
217 page = list_entry(entry, struct pfn_info, list);
218 list_del(entry);
219 perfc_decr(free_l1_pages);
220 }
221 else
222 {
223 if (d->arch.ops->guest_paging_levels == PAGING_L2)
224 {
225 #if CONFIG_PAGING_LEVELS >= 4
226 /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1
227 * So need allocate 2 continues shadow L1 each time.
228 */
229 page = alloc_domheap_pages(NULL, SL1_ORDER, 0);
230 if (!page)
231 domain_crash_synchronous();
233 void *l1_0 = map_domain_page(page_to_pfn(page));
234 memset(l1_0,0,PAGE_SIZE);
235 unmap_domain_page(l1_0);
236 void *l1_1 = map_domain_page(page_to_pfn(page+1));
237 memset(l1_1,0,PAGE_SIZE);
238 unmap_domain_page(l1_1);
239 #else
240 page = alloc_domheap_page(NULL);
241 if (!page)
242 domain_crash_synchronous();
243 void *l1 = map_domain_page(page_to_pfn(page));
244 memset(l1, 0, PAGE_SIZE);
245 unmap_domain_page(l1);
246 #endif
247 }
248 else
249 {
250 page = alloc_domheap_page(NULL);
251 void *l1 = map_domain_page(page_to_pfn(page));
252 memset(l1, 0, PAGE_SIZE);
253 unmap_domain_page(l1);
254 }
255 }
256 }
257 else {
258 page = alloc_domheap_page(NULL);
259 void *lp = map_domain_page(page_to_pfn(page));
260 memset(lp, 0, PAGE_SIZE);
261 unmap_domain_page(lp);
263 }
264 if ( unlikely(page == NULL) )
265 {
266 printk("Couldn't alloc shadow page! dom%d count=%d\n",
267 d->domain_id, d->arch.shadow_page_count);
268 printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
269 perfc_value(shadow_l1_pages),
270 perfc_value(shadow_l2_pages),
271 perfc_value(hl2_table_pages),
272 perfc_value(snapshot_pages));
273 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
274 }
276 smfn = page_to_pfn(page);
278 ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
279 page->u.inuse.type_info = psh_type | gmfn;
280 page->count_info = 0;
281 page->tlbflush_timestamp = 0;
283 switch ( psh_type )
284 {
285 case PGT_l1_shadow:
286 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
287 goto fail;
288 perfc_incr(shadow_l1_pages);
289 d->arch.shadow_page_count++;
290 break;
292 case PGT_l2_shadow:
293 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
294 goto fail;
295 perfc_incr(shadow_l2_pages);
296 d->arch.shadow_page_count++;
297 if ( PGT_l2_page_table == PGT_root_page_table )
298 pin = 1;
300 break;
302 case PGT_l3_shadow:
303 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
304 goto fail;
305 perfc_incr(shadow_l3_pages);
306 d->arch.shadow_page_count++;
307 break;
309 case PGT_l4_shadow:
310 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
311 goto fail;
312 perfc_incr(shadow_l4_pages);
313 d->arch.shadow_page_count++;
314 if ( PGT_l4_page_table == PGT_root_page_table )
315 pin = 1;
316 break;
318 #if CONFIG_PAGING_LEVELS >= 4
319 case PGT_fl1_shadow:
320 perfc_incr(shadow_l1_pages);
321 d->arch.shadow_page_count++;
322 break;
323 #else
325 case PGT_hl2_shadow:
326 // Treat an hl2 as an L1 for purposes of promotion.
327 // For external mode domains, treat them as an L2 for purposes of
328 // pinning.
329 //
330 if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
331 goto fail;
332 perfc_incr(hl2_table_pages);
333 d->arch.hl2_page_count++;
334 if ( shadow_mode_external(d) &&
335 (PGT_l2_page_table == PGT_root_page_table) )
336 pin = 1;
338 break;
339 #endif
340 case PGT_snapshot:
341 perfc_incr(snapshot_pages);
342 d->arch.snapshot_page_count++;
343 break;
345 default:
346 printk("Alloc shadow weird page type type=%08x\n", psh_type);
347 BUG();
348 break;
349 }
351 // Don't add a new shadow of something that already has a snapshot.
352 //
353 ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
355 set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
357 if ( pin )
358 shadow_pin(smfn);
360 return smfn;
362 fail:
363 FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
364 gpfn, gmfn);
365 if (psh_type == PGT_l1_shadow)
366 {
367 if (d->arch.ops->guest_paging_levels == PAGING_L2)
368 {
369 #if CONFIG_PAGING_LEVELS >=4
370 free_domheap_pages(page, SL1_ORDER);
371 #else
372 free_domheap_page(page);
373 #endif
374 }
375 else
376 free_domheap_page(page);
377 }
378 else
379 free_domheap_page(page);
380 return 0;
381 }
383 #if CONFIG_PAGING_LEVELS == 2
384 static unsigned long
385 shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
386 unsigned long smfn)
387 {
388 unsigned long hl2mfn;
389 l1_pgentry_t *hl2;
390 int limit;
392 ASSERT(PGT_base_page_table == PGT_l2_page_table);
394 if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
395 {
396 printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
397 gpfn, gmfn);
398 BUG(); /* XXX Deal gracefully with failure. */
399 }
401 SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
402 gpfn, gmfn, smfn, hl2mfn);
403 perfc_incrc(shadow_hl2_table_count);
405 hl2 = map_domain_page(hl2mfn);
407 #ifdef __i386__
408 if ( shadow_mode_external(d) )
409 limit = L2_PAGETABLE_ENTRIES;
410 else
411 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
412 #else
413 limit = 0; /* XXX x86/64 XXX */
414 #endif
416 memset(hl2, 0, limit * sizeof(l1_pgentry_t));
418 if ( !shadow_mode_external(d) )
419 {
420 memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
421 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
423 // Setup easy access to the GL2, SL2, and HL2 frames.
424 //
425 hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
426 l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
427 hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
428 l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
429 hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
430 l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
431 }
433 unmap_domain_page(hl2);
435 return hl2mfn;
436 }
438 /*
439 * This could take and use a snapshot, and validate the entire page at
440 * once, or it could continue to fault in entries one at a time...
441 * Might be worth investigating...
442 */
443 static unsigned long shadow_l2_table(
444 struct domain *d, unsigned long gpfn, unsigned long gmfn)
445 {
446 unsigned long smfn;
447 l2_pgentry_t *spl2e;
449 SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
451 perfc_incrc(shadow_l2_table_count);
453 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
454 {
455 printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
456 gpfn, gmfn);
457 BUG(); /* XXX Deal gracefully with failure. */
458 }
460 spl2e = (l2_pgentry_t *)map_domain_page(smfn);
462 /* Install hypervisor and 2x linear p.t. mapings. */
463 if ( (PGT_base_page_table == PGT_l2_page_table) &&
464 !shadow_mode_external(d) )
465 {
466 /*
467 * We could proactively fill in PDEs for pages that are already
468 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
469 * (restriction required for coherence of the accessed bit). However,
470 * we tried it and it didn't help performance. This is simpler.
471 */
472 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
474 /* Install hypervisor and 2x linear p.t. mapings. */
475 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
476 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
477 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
479 spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
480 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
482 spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
483 l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
484 __PAGE_HYPERVISOR);
486 if ( shadow_mode_translate(d) ) // NB: not external
487 {
488 unsigned long hl2mfn;
490 spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
491 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
492 __PAGE_HYPERVISOR);
494 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
495 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
497 // shadow_mode_translate (but not external) sl2 tables hold a
498 // ref to their hl2.
499 //
500 if ( !get_shadow_ref(hl2mfn) )
501 BUG();
503 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
504 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
505 }
506 else
507 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
508 l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
509 }
510 else
511 {
512 memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
513 }
515 unmap_domain_page(spl2e);
517 SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
518 return smfn;
519 }
520 #endif
522 static void shadow_map_l1_into_current_l2(unsigned long va)
523 {
524 struct vcpu *v = current;
525 struct domain *d = v->domain;
526 l1_pgentry_t *spl1e;
527 l2_pgentry_t sl2e;
528 guest_l1_pgentry_t *gpl1e;
529 guest_l2_pgentry_t gl2e;
530 unsigned long gl1pfn, gl1mfn, sl1mfn;
531 int i, init_table = 0;
533 __guest_get_l2e(v, va, &gl2e);
534 ASSERT(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT);
535 gl1pfn = l2e_get_pfn(gl2e);
537 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
538 {
539 /* This L1 is NOT already shadowed so we need to shadow it. */
540 SH_VVLOG("4a: l1 not shadowed");
542 gl1mfn = __gpfn_to_mfn(d, gl1pfn);
543 if ( unlikely(!VALID_MFN(gl1mfn)) )
544 {
545 // Attempt to use an invalid pfn as an L1 page.
546 // XXX this needs to be more graceful!
547 BUG();
548 }
550 if ( unlikely(!(sl1mfn =
551 alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
552 {
553 printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
554 gl1pfn, gl1mfn);
555 BUG(); /* XXX Need to deal gracefully with failure. */
556 }
558 perfc_incrc(shadow_l1_table_count);
559 init_table = 1;
560 }
561 else
562 {
563 /* This L1 is shadowed already, but the L2 entry is missing. */
564 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
565 }
567 #ifndef NDEBUG
568 l2_pgentry_t old_sl2e;
569 __shadow_get_l2e(v, va, &old_sl2e);
570 ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
571 #endif
573 #if CONFIG_PAGING_LEVELS >=4
574 if (d->arch.ops->guest_paging_levels == PAGING_L2)
575 {
576 /* for 32-bit VMX guest on 64-bit host,
577 * need update two L2 entries each time
578 */
579 if ( !get_shadow_ref(sl1mfn))
580 BUG();
581 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
582 __guest_set_l2e(v, va, &gl2e);
583 __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e);
584 if ( !get_shadow_ref(sl1mfn+1))
585 BUG();
586 sl2e = l2e_empty();
587 l2pde_general(d, &gl2e, &sl2e, sl1mfn+1);
588 __shadow_set_l2e(v,((va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1)) + (1 << L2_PAGETABLE_SHIFT)) , &sl2e);
589 } else
590 #endif
591 {
592 if ( !get_shadow_ref(sl1mfn) )
593 BUG();
594 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
595 __guest_set_l2e(v, va, &gl2e);
596 __shadow_set_l2e(v, va , &sl2e);
597 }
599 if ( init_table )
600 {
601 l1_pgentry_t sl1e;
602 int index = guest_l1_table_offset(va);
603 int min = 1, max = 0;
605 unsigned long entries, pt_va;
606 l1_pgentry_t tmp_sl1e;
607 guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile
610 entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t);
611 pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
612 gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
614 /* If the PGT_l1_shadow has two continual pages */
615 entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!!
616 pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT;
617 spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
619 /*
620 gpl1e = &(linear_pg_table[l1_linear_offset(va) &
621 ~(L1_PAGETABLE_ENTRIES-1)]);
623 spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
624 ~(L1_PAGETABLE_ENTRIES-1)]);*/
626 for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
627 {
628 l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
629 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
630 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
631 sl1e = l1e_empty();
632 if ( l1e_get_flags(sl1e) == 0 )
633 {
634 // First copy entries from 0 until first invalid.
635 // Then copy entries from index until first invalid.
636 //
637 if ( i < index ) {
638 i = index - 1;
639 continue;
640 }
641 break;
642 }
643 spl1e[i] = sl1e;
644 if ( unlikely(i < min) )
645 min = i;
646 if ( likely(i > max) )
647 max = i;
648 }
650 frame_table[sl1mfn].tlbflush_timestamp =
651 SHADOW_ENCODE_MIN_MAX(min, max);
652 }
653 }
655 static void
656 shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
657 {
658 struct vcpu *v = current;
659 struct domain *d = v->domain;
660 l2_pgentry_t sl2e;
662 __shadow_get_l2e(v, va, &sl2e);
663 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
664 {
665 /*
666 * Either the L1 is not shadowed, or the shadow isn't linked into
667 * the current shadow L2.
668 */
669 if ( create_l1_shadow )
670 {
671 perfc_incrc(shadow_set_l1e_force_map);
672 shadow_map_l1_into_current_l2(va);
673 }
674 else /* check to see if it exists; if so, link it in */
675 {
676 l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
677 unsigned long gl1pfn = l2e_get_pfn(gpde);
678 unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
680 ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
682 if ( sl1mfn )
683 {
684 perfc_incrc(shadow_set_l1e_unlinked);
685 if ( !get_shadow_ref(sl1mfn) )
686 BUG();
687 l2pde_general(d, (guest_l2_pgentry_t *)&gpde, &sl2e, sl1mfn);
688 __guest_set_l2e(v, va, &gpde);
689 __shadow_set_l2e(v, va, &sl2e);
690 }
691 else
692 {
693 // no shadow exists, so there's nothing to do.
694 perfc_incrc(shadow_set_l1e_fail);
695 return;
696 }
697 }
698 }
700 if ( shadow_mode_refcounts(d) )
701 {
702 l1_pgentry_t old_spte;
703 __shadow_get_l1e(v, va, &old_spte);
705 // only do the ref counting if something important changed.
706 //
707 if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
708 {
709 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
710 !shadow_get_page_from_l1e(new_spte, d) )
711 new_spte = l1e_empty();
712 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
713 shadow_put_page_from_l1e(old_spte, d);
714 }
715 }
717 __shadow_set_l1e(v, va, &new_spte);
719 shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
720 }
722 #if CONFIG_PAGING_LEVELS <= 3
723 static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
724 {
725 struct domain *d = v->domain;
726 l1_pgentry_t gpte, spte;
728 ASSERT(shadow_mode_enabled(d));
730 shadow_lock(d);
732 __shadow_sync_va(v, va);
734 // XXX mafetter: will need to think about 4MB pages...
736 // It's not strictly necessary to update the shadow here,
737 // but it might save a fault later.
738 //
739 /*if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
740 sizeof(gpte))) {*/
741 if (unlikely(!__guest_get_l1e(v, va, &gpte))) {
742 perfc_incrc(shadow_invlpg_faults);
743 shadow_unlock(d);
744 return;
745 }
746 l1pte_propagate_from_guest(d, gpte, &spte);
747 shadow_set_l1e(va, spte, 1);
749 shadow_unlock(d);
750 }
751 #endif
753 static struct out_of_sync_entry *
754 shadow_alloc_oos_entry(struct domain *d)
755 {
756 struct out_of_sync_entry *f, *extra;
757 unsigned size, i;
759 if ( unlikely(d->arch.out_of_sync_free == NULL) )
760 {
761 FSH_LOG("Allocate more fullshadow tuple blocks.");
763 size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
764 extra = xmalloc_bytes(size);
766 /* XXX Should be more graceful here. */
767 if ( extra == NULL )
768 BUG();
770 memset(extra, 0, size);
772 /* Record the allocation block so it can be correctly freed later. */
773 d->arch.out_of_sync_extras_count++;
774 *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
775 d->arch.out_of_sync_extras;
776 d->arch.out_of_sync_extras = &extra[0];
778 /* Thread a free chain through the newly-allocated nodes. */
779 for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
780 extra[i].next = &extra[i+1];
781 extra[i].next = NULL;
783 /* Add the new nodes to the free list. */
784 d->arch.out_of_sync_free = &extra[0];
785 }
787 /* Allocate a new node from the quicklist. */
788 f = d->arch.out_of_sync_free;
789 d->arch.out_of_sync_free = f->next;
791 return f;
792 }
794 static inline unsigned long
795 shadow_make_snapshot(
796 struct domain *d, unsigned long gpfn, unsigned long gmfn)
797 {
798 unsigned long smfn, sl1mfn = 0;
799 void *original, *snapshot;
800 u32 min_max = 0;
801 int min, max, length;
803 if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
804 {
805 ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
806 return SHADOW_SNAPSHOT_ELSEWHERE;
807 }
809 perfc_incrc(shadow_make_snapshot);
811 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
812 {
813 printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
814 "Dom%d snapshot_count_count=%d\n",
815 gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
816 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
817 }
819 if ( !get_shadow_ref(smfn) )
820 BUG();
822 if ( shadow_mode_refcounts(d) &&
823 (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
824 min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
825 pfn_to_page(smfn)->tlbflush_timestamp = min_max;
827 min = SHADOW_MIN(min_max);
828 max = SHADOW_MAX(min_max);
829 length = max - min + 1;
830 perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
832 min *= sizeof(guest_l1_pgentry_t);
833 length *= sizeof(guest_l1_pgentry_t);
835 original = map_domain_page(gmfn);
836 snapshot = map_domain_page(smfn);
837 memcpy(snapshot + min, original + min, length);
838 unmap_domain_page(original);
839 unmap_domain_page(snapshot);
841 return smfn;
842 }
844 static struct out_of_sync_entry *
845 mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
846 unsigned long mfn)
847 {
848 struct domain *d = v->domain;
849 struct pfn_info *page = &frame_table[mfn];
850 struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
852 ASSERT(shadow_lock_is_acquired(d));
853 ASSERT(pfn_valid(mfn));
855 #ifndef NDEBUG
856 u32 type = page->u.inuse.type_info & PGT_type_mask;
857 if ( shadow_mode_refcounts(d) )
858 {
859 ASSERT(type == PGT_writable_page);
860 }
861 else
862 {
863 ASSERT(type && (type < PGT_l4_page_table));
864 }
865 #endif
867 FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
868 gpfn, mfn, page->count_info, page->u.inuse.type_info);
870 // XXX this will require some more thought... Cross-domain sharing and
871 // modification of page tables? Hmm...
872 //
873 if ( d != page_get_owner(page) )
874 BUG();
876 perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
878 entry->gpfn = gpfn;
879 entry->gmfn = mfn;
880 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
881 entry->writable_pl1e = -1;
883 #if SHADOW_DEBUG
884 mark_shadows_as_reflecting_snapshot(d, gpfn);
885 #endif
887 // increment guest's ref count to represent the entry in the
888 // full shadow out-of-sync list.
889 //
890 get_page(page, d);
892 // Add to the out-of-sync list
893 //
894 entry->next = d->arch.out_of_sync;
895 d->arch.out_of_sync = entry;
897 return entry;
898 }
900 static void shadow_mark_va_out_of_sync(
901 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
902 {
903 struct out_of_sync_entry *entry =
904 shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
905 l2_pgentry_t sl2e;
907 #if CONFIG_PAGING_LEVELS >= 4
908 {
909 l4_pgentry_t sl4e;
910 l3_pgentry_t sl3e;
912 __shadow_get_l4e(v, va, &sl4e);
913 if ( !(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
914 shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
915 }
917 if (!__shadow_get_l3e(v, va, &sl3e)) {
918 BUG();
919 }
921 if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
922 shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
923 }
924 }
925 #endif
927 // We need the address of shadow PTE that maps @va.
928 // It might not exist yet. Make sure it's there.
929 //
930 __shadow_get_l2e(v, va, &sl2e);
931 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
932 {
933 // either this L1 isn't shadowed yet, or the shadow isn't linked into
934 // the current L2.
935 shadow_map_l1_into_current_l2(va);
936 __shadow_get_l2e(v, va, &sl2e);
937 }
938 ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
940 // NB: this is stored as a machine address.
941 entry->writable_pl1e =
942 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
943 ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
945 // Increment shadow's page count to represent the reference
946 // inherent in entry->writable_pl1e
947 //
948 if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
949 BUG();
951 FSH_LOG("mark_out_of_sync(va=%lx -> writable_pl1e=%lx)",
952 va, entry->writable_pl1e);
953 }
955 /*
956 * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
957 * Returns 0 otherwise.
958 */
959 static int snapshot_entry_matches(
960 struct domain *d, guest_l1_pgentry_t *guest_pt,
961 unsigned long gpfn, unsigned index)
962 {
963 unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
964 guest_l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
965 int entries_match;
967 perfc_incrc(snapshot_entry_matches_calls);
969 if ( !smfn )
970 return 0;
972 snapshot = map_domain_page(smfn);
974 if (__copy_from_user(&gpte, &guest_pt[index],
975 sizeof(gpte)))
976 return 0;
978 // This could probably be smarter, but this is sufficent for
979 // our current needs.
980 //
981 entries_match = !guest_l1e_has_changed(gpte, snapshot[index],
982 PAGE_FLAG_MASK);
984 unmap_domain_page(snapshot);
986 #ifdef PERF_COUNTERS
987 if ( entries_match )
988 perfc_incrc(snapshot_entry_matches_true);
989 #endif
991 return entries_match;
992 }
994 /*
995 * Returns 1 if va's shadow mapping is out-of-sync.
996 * Returns 0 otherwise.
997 */
998 static int is_out_of_sync(struct vcpu *v, unsigned long va) /* __shadow_out_of_sync */
999 {
1000 struct domain *d = v->domain;
1001 #if defined (__x86_64__)
1002 unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)?
1003 pagetable_get_pfn(v->arch.guest_table) :
1004 pagetable_get_pfn(v->arch.guest_table_user));
1005 #else
1006 unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
1007 #endif
1008 unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
1009 guest_l2_pgentry_t l2e;
1010 unsigned long l1pfn, l1mfn;
1011 guest_l1_pgentry_t *guest_pt;
1012 guest_l1_pgentry_t tmp_gle;
1013 unsigned long pt_va;
1015 ASSERT(shadow_lock_is_acquired(d));
1016 ASSERT(VALID_M2P(l2pfn));
1018 perfc_incrc(shadow_out_of_sync_calls);
1020 #if CONFIG_PAGING_LEVELS >= 4
1021 if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */
1022 pgentry_64_t le;
1023 unsigned long gmfn;
1024 unsigned long gpfn;
1025 int i;
1027 gmfn = l2mfn;
1028 gpfn = l2pfn;
1029 guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable;
1031 for (i = PAGING_L4; i >= PAGING_L3; i--) {
1032 if ( page_out_of_sync(&frame_table[gmfn]) &&
1033 !snapshot_entry_matches(
1034 d, guest_pt, gpfn, table_offset_64(va, i)) )
1035 return 1;
1037 __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i);
1038 if ( !(entry_get_flags(le) & _PAGE_PRESENT) )
1039 return 0;
1040 gpfn = entry_get_pfn(le);
1041 gmfn = __gpfn_to_mfn(d, gpfn);
1042 if ( !VALID_MFN(gmfn) )
1043 return 0;
1044 /* Todo: check!*/
1045 guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn);
1049 /* L2 */
1050 if ( page_out_of_sync(&frame_table[gmfn]) &&
1051 !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) )
1052 return 1;
1055 } else
1056 #endif
1058 if ( page_out_of_sync(&frame_table[l2mfn]) &&
1059 !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
1060 l2pfn, guest_l2_table_offset(va)) )
1061 return 1;
1063 __guest_get_l2e(v, va, &l2e);
1064 if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) ||
1065 (guest_l2e_get_flags(l2e) & _PAGE_PSE))
1066 return 0;
1068 l1pfn = l2e_get_pfn(l2e);
1069 l1mfn = __gpfn_to_mfn(d, l1pfn);
1071 // If the l1 pfn is invalid, it can't be out of sync...
1072 if ( !VALID_MFN(l1mfn) )
1073 return 0;
1075 pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1))
1076 << L1_PAGETABLE_SHIFT;
1077 guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
1079 if ( page_out_of_sync(&frame_table[l1mfn]) &&
1080 !snapshot_entry_matches(
1081 d, guest_pt, l1pfn, guest_l1_table_offset(va)) )
1082 return 1;
1084 return 0;
1087 #define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(guest_l1_pgentry_t)))
1088 static inline unsigned long
1089 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
1091 return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
1094 static inline void
1095 increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
1097 unsigned long score = prediction & PGT_score_mask;
1098 int create = (score == 0);
1100 // saturating addition
1101 score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
1102 score = score ? score : PGT_score_mask;
1104 prediction = (prediction & PGT_mfn_mask) | score;
1106 //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
1107 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
1109 if ( create )
1110 perfc_incr(writable_pte_predictions);
1113 static inline void
1114 decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
1116 unsigned long score = prediction & PGT_score_mask;
1117 ASSERT(score);
1119 // divide score by 2... We don't like bad predictions.
1120 //
1121 score = (score >> 1) & PGT_score_mask;
1123 prediction = (prediction & PGT_mfn_mask) | score;
1125 //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
1127 if ( score )
1128 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
1129 else
1131 delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
1132 perfc_decr(writable_pte_predictions);
1136 static u32 remove_all_write_access_in_ptpage(
1137 struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
1138 unsigned long readonly_gpfn, unsigned long readonly_gmfn,
1139 u32 max_refs_to_find, unsigned long prediction)
1141 l1_pgentry_t *pt = map_domain_page(pt_mfn);
1142 l1_pgentry_t match;
1143 unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
1144 int i;
1145 u32 found = 0;
1146 int is_l1_shadow =
1147 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
1148 PGT_l1_shadow);
1149 #if CONFIG_PAGING_LEVELS == 4
1150 is_l1_shadow |=
1151 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
1152 PGT_fl1_shadow);
1153 #endif
1155 match = l1e_from_pfn(readonly_gmfn, flags);
1157 // returns true if all refs have been found and fixed.
1158 //
1159 int fix_entry(int i)
1161 l1_pgentry_t old = pt[i];
1162 l1_pgentry_t new = old;
1164 l1e_remove_flags(new,_PAGE_RW);
1165 if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
1166 BUG();
1167 found++;
1168 pt[i] = new;
1169 if ( is_l1_shadow )
1170 shadow_put_page_from_l1e(old, d);
1172 #if 0
1173 printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
1174 "is_l1_shadow=%d\n",
1175 readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
1176 #endif
1178 return (found == max_refs_to_find);
1181 i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1);
1182 if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
1184 perfc_incrc(remove_write_fast_exit);
1185 increase_writable_pte_prediction(d, readonly_gpfn, prediction);
1186 unmap_domain_page(pt);
1187 return found;
1190 for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++)
1192 if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
1193 break;
1196 unmap_domain_page(pt);
1198 return found;
1199 #undef MATCH_ENTRY
1202 static int remove_all_write_access(
1203 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
1205 int i;
1206 struct shadow_status *a;
1207 u32 found = 0, fixups, write_refs;
1208 unsigned long prediction, predicted_gpfn, predicted_smfn;
1210 ASSERT(shadow_lock_is_acquired(d));
1211 ASSERT(VALID_MFN(readonly_gmfn));
1213 perfc_incrc(remove_write_access);
1215 // If it's not a writable page, then no writable refs can be outstanding.
1216 //
1217 if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
1218 PGT_writable_page )
1220 perfc_incrc(remove_write_not_writable);
1221 return 1;
1224 // How many outstanding writable PTEs for this page are there?
1225 //
1226 write_refs =
1227 (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
1228 if ( write_refs && MFN_PINNED(readonly_gmfn) )
1230 write_refs--;
1233 if ( write_refs == 0 )
1235 perfc_incrc(remove_write_no_work);
1236 return 1;
1239 // Before searching all the L1 page tables, check the typical culprit first
1240 //
1241 if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
1243 predicted_gpfn = prediction & PGT_mfn_mask;
1244 if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
1245 (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
1247 found += fixups;
1248 if ( found == write_refs )
1250 perfc_incrc(remove_write_predicted);
1251 return 1;
1254 else
1256 perfc_incrc(remove_write_bad_prediction);
1257 decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
1261 // Search all the shadow L1 page tables...
1262 //
1263 for (i = 0; i < shadow_ht_buckets; i++)
1265 a = &d->arch.shadow_ht[i];
1266 while ( a && a->gpfn_and_flags )
1268 if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow
1269 #if CONFIG_PAGING_LEVELS >= 4
1270 || (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
1271 #endif
1275 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
1276 if ( found == write_refs )
1277 return 1;
1280 a = a->next;
1284 FSH_LOG("%s: looking for %d refs, found %d refs",
1285 __func__, write_refs, found);
1287 return 0;
1291 static int resync_all(struct domain *d, u32 stype)
1293 struct out_of_sync_entry *entry;
1294 unsigned i;
1295 unsigned long smfn;
1296 void *guest, *shadow, *snapshot;
1297 int need_flush = 0, external = shadow_mode_external(d);
1298 int unshadow;
1299 int changed;
1301 ASSERT(shadow_lock_is_acquired(d));
1303 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
1305 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
1306 continue;
1308 smfn = __shadow_status(d, entry->gpfn, stype);
1310 if ( !smfn )
1312 if ( shadow_mode_refcounts(d) )
1313 continue;
1315 // For light weight shadows, even when no shadow page exists,
1316 // we need to resync the refcounts to the new contents of the
1317 // guest page.
1318 // This only applies when we have writable page tables.
1319 //
1320 if ( !shadow_mode_write_all(d) &&
1321 !((stype == PGT_l1_shadow) &&
1322 VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
1323 // Page is not writable -- no resync necessary
1324 continue;
1327 FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
1328 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
1330 // Compare guest's new contents to its snapshot, validating
1331 // and updating its shadow as appropriate.
1332 //
1333 guest = map_domain_page(entry->gmfn);
1334 snapshot = map_domain_page(entry->snapshot_mfn);
1336 if ( smfn )
1337 shadow = map_domain_page(smfn);
1338 else
1339 shadow = NULL;
1341 unshadow = 0;
1343 u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
1344 int min_shadow = SHADOW_MIN(min_max_shadow);
1345 int max_shadow = SHADOW_MAX(min_max_shadow);
1347 u32 min_max_snapshot =
1348 pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
1349 int min_snapshot = SHADOW_MIN(min_max_snapshot);
1350 int max_snapshot = SHADOW_MAX(min_max_snapshot);
1352 switch ( stype ) {
1353 case PGT_l1_shadow:
1355 guest_l1_pgentry_t *guest1 = guest;
1356 l1_pgentry_t *shadow1 = shadow;
1357 guest_l1_pgentry_t *snapshot1 = snapshot;
1359 ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
1360 shadow_mode_write_all(d));
1362 if ( !shadow_mode_refcounts(d) )
1363 revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t *)snapshot1);
1365 if ( !smfn )
1366 break;
1369 changed = 0;
1371 for ( i = min_shadow; i <= max_shadow; i++ )
1373 if ( (i < min_snapshot) || (i > max_snapshot) ||
1374 guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
1376 need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
1378 // can't update snapshots of linear page tables -- they
1379 // are used multiple times...
1380 //
1381 // snapshot[i] = new_pte;
1383 changed++;
1386 perfc_incrc(resync_l1);
1387 perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
1388 perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
1389 break;
1391 #if defined (__i386__)
1392 case PGT_l2_shadow:
1394 int max = -1;
1396 l2_pgentry_t *guest2 = guest;
1397 l2_pgentry_t *shadow2 = shadow;
1398 l2_pgentry_t *snapshot2 = snapshot;
1400 ASSERT(shadow_mode_write_all(d));
1401 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
1403 changed = 0;
1404 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1406 #if CONFIG_X86_PAE
1407 BUG(); /* FIXME: need type_info */
1408 #endif
1409 if ( !is_guest_l2_slot(0,i) && !external )
1410 continue;
1412 l2_pgentry_t new_pde = guest2[i];
1413 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
1415 need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
1417 // can't update snapshots of linear page tables -- they
1418 // are used multiple times...
1419 //
1420 // snapshot[i] = new_pde;
1422 changed++;
1424 if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
1425 max = i;
1427 // XXX - This hack works for linux guests.
1428 // Need a better solution long term.
1429 if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
1430 unlikely(l2e_get_intpte(new_pde) != 0) &&
1431 !unshadow && MFN_PINNED(smfn) )
1432 unshadow = 1;
1434 if ( max == -1 )
1435 unshadow = 1;
1436 perfc_incrc(resync_l2);
1437 perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
1438 break;
1440 case PGT_hl2_shadow:
1442 l2_pgentry_t *guest2 = guest;
1443 l2_pgentry_t *snapshot2 = snapshot;
1444 l1_pgentry_t *shadow2 = shadow;
1446 ASSERT(shadow_mode_write_all(d));
1447 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
1449 changed = 0;
1450 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1452 #if CONFIG_X86_PAE
1453 BUG(); /* FIXME: need type_info */
1454 #endif
1455 if ( !is_guest_l2_slot(0, i) && !external )
1456 continue;
1458 l2_pgentry_t new_pde = guest2[i];
1459 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
1461 need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
1463 // can't update snapshots of linear page tables -- they
1464 // are used multiple times...
1465 //
1466 // snapshot[i] = new_pde;
1468 changed++;
1471 perfc_incrc(resync_hl2);
1472 perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
1473 break;
1475 #else
1476 case PGT_l2_shadow:
1477 case PGT_l3_shadow:
1479 pgentry_64_t *guest_pt = guest;
1480 pgentry_64_t *shadow_pt = shadow;
1481 pgentry_64_t *snapshot_pt = snapshot;
1483 changed = 0;
1484 for ( i = min_shadow; i <= max_shadow; i++ )
1486 if ( (i < min_snapshot) || (i > max_snapshot) ||
1487 entry_has_changed(
1488 guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
1490 need_flush |= validate_entry_change(
1491 d, &guest_pt[i], &shadow_pt[i],
1492 shadow_type_to_level(stype));
1493 changed++;
1496 break;
1500 case PGT_l4_shadow:
1502 int max = -1;
1504 guest_root_pgentry_t *guest_root = guest;
1505 l4_pgentry_t *shadow4 = shadow;
1506 guest_root_pgentry_t *snapshot_root = snapshot;
1508 changed = 0;
1509 for ( i = 0; i < GUEST_ROOT_PAGETABLE_ENTRIES; i++ )
1511 if ( !is_guest_l4_slot(i) && !external )
1512 continue;
1513 guest_root_pgentry_t new_root_e = guest_root[i];
1514 if ( root_entry_has_changed(
1515 new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
1517 if (d->arch.ops->guest_paging_levels == PAGING_L4) {
1518 need_flush |= validate_entry_change(
1519 d, (pgentry_64_t *)&new_root_e,
1520 (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype));
1521 } else {
1522 validate_bl2e_change(d, &new_root_e, shadow, i);
1524 changed++;
1525 ESH_LOG("%d: shadow4 mfn: %lx, shadow root: %lx\n", i,
1526 smfn, pagetable_get_paddr(current->arch.shadow_table));
1528 if ( guest_root_get_intpte(new_root_e) != 0 ) /* FIXME: check flags? */
1529 max = i;
1531 // Need a better solution in the long term.
1532 if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) &&
1533 unlikely(guest_root_get_intpte(new_root_e) != 0) &&
1534 !unshadow &&
1535 (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
1536 unshadow = 1;
1538 if ( max == -1 )
1539 unshadow = 1;
1540 perfc_incrc(resync_l4);
1541 perfc_incr_histo(shm_l4_updates, changed, PT_UPDATES);
1542 break;
1545 #endif
1546 default:
1547 BUG();
1550 if ( smfn )
1551 unmap_domain_page(shadow);
1552 unmap_domain_page(snapshot);
1553 unmap_domain_page(guest);
1555 if ( unlikely(unshadow) )
1557 perfc_incrc(unshadow_l2_count);
1558 shadow_unpin(smfn);
1559 #if defined (__i386__)
1560 if ( unlikely(shadow_mode_external(d)) )
1562 unsigned long hl2mfn;
1564 if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
1565 MFN_PINNED(hl2mfn) )
1566 shadow_unpin(hl2mfn);
1568 #endif
1572 return need_flush;
1575 static void sync_all(struct domain *d)
1577 struct out_of_sync_entry *entry;
1578 int need_flush = 0;
1580 perfc_incrc(shadow_sync_all);
1582 ASSERT(shadow_lock_is_acquired(d));
1584 // First, remove all write permissions to the page tables
1585 //
1586 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
1588 // Skip entries that have low bits set... Those aren't
1589 // real PTEs.
1590 //
1591 if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
1592 continue;
1594 l1_pgentry_t *ppte = (l1_pgentry_t *)(
1595 (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
1596 (entry->writable_pl1e & ~PAGE_MASK));
1597 l1_pgentry_t opte = *ppte;
1598 l1_pgentry_t npte = opte;
1599 l1e_remove_flags(npte, _PAGE_RW);
1601 if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
1602 !shadow_get_page_from_l1e(npte, d) )
1603 BUG();
1604 *ppte = npte;
1605 shadow_put_page_from_l1e(opte, d);
1607 unmap_domain_page(ppte);
1610 // XXX mafetter: SMP
1611 //
1612 // With the current algorithm, we've gotta flush all the TLBs
1613 // before we can safely continue. I don't think we want to
1614 // do it this way, so I think we should consider making
1615 // entirely private copies of the shadow for each vcpu, and/or
1616 // possibly having a mix of private and shared shadow state
1617 // (any path from a PTE that grants write access to an out-of-sync
1618 // page table page needs to be vcpu private).
1619 //
1620 #if 0 // this should be enabled for SMP guests...
1621 flush_tlb_mask(cpu_online_map);
1622 #endif
1623 need_flush = 1;
1625 // Second, resync all L1 pages, then L2 pages, etc...
1626 //
1627 need_flush |= resync_all(d, PGT_l1_shadow);
1628 #if defined (__i386__)
1629 if ( shadow_mode_translate(d) )
1630 need_flush |= resync_all(d, PGT_hl2_shadow);
1631 #endif
1633 /*
1634 * Fixme: for i386 host
1635 */
1636 if (d->arch.ops->guest_paging_levels == PAGING_L4) {
1637 need_flush |= resync_all(d, PGT_l2_shadow);
1638 need_flush |= resync_all(d, PGT_l3_shadow);
1640 need_flush |= resync_all(d, PGT_l4_shadow);
1642 if ( need_flush && !unlikely(shadow_mode_external(d)) )
1643 local_flush_tlb();
1645 free_out_of_sync_state(d);
1648 static inline int l1pte_write_fault(
1649 struct vcpu *v, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
1650 unsigned long va)
1652 struct domain *d = v->domain;
1653 guest_l1_pgentry_t gpte = *gpte_p;
1654 l1_pgentry_t spte;
1655 unsigned long gpfn = l1e_get_pfn(gpte);
1656 unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
1658 //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
1660 if ( unlikely(!VALID_MFN(gmfn)) )
1662 SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
1663 *spte_p = l1e_empty();
1664 return 0;
1667 ASSERT(guest_l1e_get_flags(gpte) & _PAGE_RW);
1668 guest_l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
1669 spte = l1e_from_pfn(gmfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
1671 SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
1672 l1e_get_intpte(spte), l1e_get_intpte(gpte));
1674 if ( shadow_mode_log_dirty(d) )
1675 __mark_dirty(d, gmfn);
1677 if ( mfn_is_page_table(gmfn) )
1678 shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
1680 *gpte_p = gpte;
1681 *spte_p = spte;
1683 return 1;
1686 static inline int l1pte_read_fault(
1687 struct domain *d, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
1689 guest_l1_pgentry_t gpte = *gpte_p;
1690 l1_pgentry_t spte = *spte_p;
1691 unsigned long pfn = l1e_get_pfn(gpte);
1692 unsigned long mfn = __gpfn_to_mfn(d, pfn);
1694 if ( unlikely(!VALID_MFN(mfn)) )
1696 SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
1697 *spte_p = l1e_empty();
1698 return 0;
1701 guest_l1e_add_flags(gpte, _PAGE_ACCESSED);
1702 spte = l1e_from_pfn(mfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
1704 if ( shadow_mode_log_dirty(d) || !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
1705 mfn_is_page_table(mfn) )
1707 l1e_remove_flags(spte, _PAGE_RW);
1710 SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
1711 l1e_get_intpte(spte), l1e_get_intpte(gpte));
1712 *gpte_p = gpte;
1713 *spte_p = spte;
1715 return 1;
1717 #if CONFIG_PAGING_LEVELS <= 3
1718 static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
1720 l1_pgentry_t gpte, spte, orig_gpte;
1721 struct vcpu *v = current;
1722 struct domain *d = v->domain;
1723 l2_pgentry_t gpde;
1725 spte = l1e_empty();
1727 SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
1728 va, (unsigned long)regs->error_code);
1729 perfc_incrc(shadow_fault_calls);
1731 check_pagetable(v, "pre-sf");
1733 /*
1734 * Don't let someone else take the guest's table pages out-of-sync.
1735 */
1736 shadow_lock(d);
1738 /* XXX - FIX THIS COMMENT!!!
1739 * STEP 1. Check to see if this fault might have been caused by an
1740 * out-of-sync table page entry, or if we should pass this
1741 * fault onto the guest.
1742 */
1743 __shadow_sync_va(v, va);
1745 /*
1746 * STEP 2. Check the guest PTE.
1747 */
1748 __guest_get_l2e(v, va, &gpde);
1749 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
1751 SH_VVLOG("shadow_fault - EXIT: L1 not present");
1752 perfc_incrc(shadow_fault_bail_pde_not_present);
1753 goto fail;
1756 // This can't fault because we hold the shadow lock and we've ensured that
1757 // the mapping is in-sync, so the check of the PDE's present bit, above,
1758 // covers this access.
1759 //
1760 //orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
1761 __guest_get_l1e(v, va, &gpte);
1762 orig_gpte = gpte;
1764 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
1766 SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
1767 l1e_get_intpte(gpte));
1768 perfc_incrc(shadow_fault_bail_pte_not_present);
1769 goto fail;
1772 /* Write fault? */
1773 if ( regs->error_code & 2 )
1775 int allow_writes = 0;
1777 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
1779 if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
1781 allow_writes = 1;
1782 l1e_add_flags(gpte, _PAGE_RW);
1784 else
1786 /* Write fault on a read-only mapping. */
1787 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
1788 l1e_get_intpte(gpte));
1789 perfc_incrc(shadow_fault_bail_ro_mapping);
1790 goto fail;
1794 if ( !l1pte_write_fault(v, &gpte, &spte, va) )
1796 SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
1797 perfc_incrc(write_fault_bail);
1798 shadow_unlock(d);
1799 return 0;
1802 if ( allow_writes )
1803 l1e_remove_flags(gpte, _PAGE_RW);
1805 else
1807 if ( !l1pte_read_fault(d, &gpte, &spte) )
1809 SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
1810 perfc_incrc(read_fault_bail);
1811 shadow_unlock(d);
1812 return 0;
1816 /*
1817 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
1818 */
1819 if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
1821 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
1822 /*if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
1823 &gpte, sizeof(gpte))) )*/
1824 if ( unlikely(!__guest_set_l1e(v, va, &gpte)))
1826 printk("%s() failed, crashing domain %d "
1827 "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
1828 __func__,d->domain_id, l2e_get_intpte(gpde), va);
1829 domain_crash_synchronous();
1832 // if necessary, record the page table page as dirty
1833 if ( unlikely(shadow_mode_log_dirty(d)) )
1834 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
1837 shadow_set_l1e(va, spte, 1);
1839 perfc_incrc(shadow_fault_fixed);
1840 d->arch.shadow_fault_count++;
1842 shadow_unlock(d);
1844 check_pagetable(v, "post-sf");
1845 return EXCRET_fault_fixed;
1847 fail:
1848 shadow_unlock(d);
1849 return 0;
1851 #endif
1853 static int do_update_va_mapping(unsigned long va,
1854 l1_pgentry_t val,
1855 struct vcpu *v)
1857 struct domain *d = v->domain;
1858 l1_pgentry_t spte;
1859 int rc = 0;
1861 shadow_lock(d);
1863 //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
1865 // This is actually overkill - we don't need to sync the L1 itself,
1866 // just everything involved in getting to this L1 (i.e. we need
1867 // linear_pg_table[l1_linear_offset(va)] to be in sync)...
1868 //
1869 __shadow_sync_va(v, va);
1871 l1pte_propagate_from_guest(d, *(guest_l1_pgentry_t *)&val, &spte);
1872 shadow_set_l1e(va, spte, 0);
1874 /*
1875 * If we're in log-dirty mode then we need to note that we've updated
1876 * the PTE in the PT-holding page. We need the machine frame number
1877 * for this.
1878 */
1879 if ( shadow_mode_log_dirty(d) )
1880 __mark_dirty(d, va_to_l1mfn(v, va));
1882 // out:
1883 shadow_unlock(d);
1885 return rc;
1889 /*
1890 * What lives where in the 32-bit address space in the various shadow modes,
1891 * and what it uses to get/maintain that mapping.
1893 * SHADOW MODE: none enable translate external
1895 * 4KB things:
1896 * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
1897 * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
1898 * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
1899 * monitor_vtable n/a n/a n/a mapped once
1901 * 4MB things:
1902 * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
1903 * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
1904 * monitor_linear n/a n/a n/a ???
1905 * perdomain perdomain perdomain perdomain perdomain
1906 * R/O M2P R/O M2P R/O M2P n/a n/a
1907 * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
1908 * P2M n/a n/a R/O M2P R/O M2P
1910 * NB:
1911 * update_pagetables(), shadow_update_pagetables(), shadow_mode_enable(),
1912 * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
1913 * all play a part in maintaining these mappings.
1914 */
1915 static void shadow_update_pagetables(struct vcpu *v)
1917 struct domain *d = v->domain;
1918 #if defined (__x86_64__)
1919 unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)?
1920 pagetable_get_pfn(v->arch.guest_table) :
1921 pagetable_get_pfn(v->arch.guest_table_user));
1922 #else
1923 unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
1924 #endif
1926 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
1927 unsigned long smfn, old_smfn;
1929 #if CONFIG_PAGING_LEVELS == 2
1930 unsigned long hl2mfn;
1931 #endif
1933 int max_mode = ( shadow_mode_external(d) ? SHM_external
1934 : shadow_mode_translate(d) ? SHM_translate
1935 : shadow_mode_enabled(d) ? SHM_enable
1936 : 0 );
1938 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
1939 ASSERT( max_mode );
1941 /*
1942 * arch.guest_vtable
1943 */
1944 if ( max_mode & (SHM_enable | SHM_external) )
1946 if ( likely(v->arch.guest_vtable != NULL) )
1947 unmap_domain_page(v->arch.guest_vtable);
1948 v->arch.guest_vtable = map_domain_page(gmfn);
1951 /*
1952 * arch.shadow_table
1953 */
1954 if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) {
1955 #if CONFIG_PAGING_LEVELS == 2
1956 smfn = shadow_l2_table(d, gpfn, gmfn);
1957 #elif CONFIG_PAGING_LEVELS == 3
1958 smfn = shadow_l3_table(d, gpfn, gmfn);
1959 #elif CONFIG_PAGING_LEVELS == 4
1960 smfn = shadow_l4_table(d, gpfn, gmfn);
1961 #endif
1963 if ( !get_shadow_ref(smfn) )
1964 BUG();
1965 old_smfn = pagetable_get_pfn(v->arch.shadow_table);
1966 v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
1967 if ( old_smfn )
1968 put_shadow_ref(old_smfn);
1970 SH_VVLOG("shadow_update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
1972 /*
1973 * arch.shadow_vtable
1974 */
1975 if ( max_mode == SHM_external
1976 #if CONFIG_PAGING_LEVELS >=4
1977 || max_mode & SHM_enable
1978 #endif
1981 if ( v->arch.shadow_vtable )
1982 unmap_domain_page(v->arch.shadow_vtable);
1983 v->arch.shadow_vtable = map_domain_page(smfn);
1986 #if CONFIG_PAGING_LEVELS == 2
1987 /*
1988 * arch.hl2_vtable
1989 */
1991 // if max_mode == SHM_translate, then the hl2 is already installed
1992 // correctly in its smfn, and there's nothing to do.
1993 //
1994 if ( max_mode == SHM_external )
1996 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
1997 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
1998 if ( v->arch.hl2_vtable )
1999 unmap_domain_page(v->arch.hl2_vtable);
2000 v->arch.hl2_vtable = map_domain_page(hl2mfn);
2003 /*
2004 * fixup pointers in monitor table, as necessary
2005 */
2006 if ( max_mode == SHM_external )
2008 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
2009 l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
2010 l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
2012 ASSERT( shadow_mode_translate(d) );
2014 if ( !get_shadow_ref(hl2mfn) )
2015 BUG();
2016 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
2017 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
2018 if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
2019 put_shadow_ref(l2e_get_pfn(old_hl2e));
2021 if ( !get_shadow_ref(smfn) )
2022 BUG();
2023 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
2024 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2025 if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
2026 put_shadow_ref(l2e_get_pfn(old_sl2e));
2028 // XXX - maybe this can be optimized somewhat??
2029 local_flush_tlb();
2031 #endif
2033 #if CONFIG_PAGING_LEVELS == 3
2034 /* FIXME: PAE code to be written */
2035 #endif
2039 /************************************************************************/
2040 /************************************************************************/
2041 /************************************************************************/
2043 #if SHADOW_DEBUG
2045 // The following is entirely for _check_pagetable()'s benefit.
2046 // _check_pagetable() wants to know whether a given entry in a
2047 // shadow page table is supposed to be the shadow of the guest's
2048 // current entry, or the shadow of the entry held in the snapshot
2049 // taken above.
2050 //
2051 // Here, we mark all currently existing entries as reflecting
2052 // the snapshot, above. All other places in xen that update
2053 // the shadow will keep the shadow in sync with the guest's
2054 // entries (via l1pte_propagate_from_guest and friends), which clear
2055 // the SHADOW_REFLECTS_SNAPSHOT bit.
2056 //
2057 static void
2058 mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
2060 unsigned long smfn;
2061 l1_pgentry_t *l1e;
2062 l2_pgentry_t *l2e;
2063 unsigned i;
2065 if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
2067 l1e = map_domain_page(smfn);
2068 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2069 if ( is_guest_l1_slot(i) &&
2070 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
2071 l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
2072 unmap_domain_page(l1e);
2075 if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
2077 l2e = map_domain_page(smfn);
2078 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2079 if ( is_guest_l2_slot(0, i) &&
2080 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
2081 l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
2082 unmap_domain_page(l2e);
2086 // BUG: these are not SMP safe...
2087 static int sh_l2_present;
2088 static int sh_l1_present;
2089 char * sh_check_name;
2090 int shadow_status_noswap;
2092 #define v2m(_v, _adr) ({ \
2093 unsigned long _a = (unsigned long)(_adr); \
2094 l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
2095 unsigned long _pa = -1; \
2096 if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
2097 { \
2098 l1_pgentry_t _pte; \
2099 _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
2100 if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
2101 _pa = l1e_get_paddr(_pte); \
2102 } \
2103 _pa | (_a & ~PAGE_MASK); \
2104 })
2106 #define FAIL(_f, _a...) \
2107 do { \
2108 printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
2109 sh_check_name, level, l2_idx, l1_idx, ## _a, \
2110 __FILE__, __LINE__); \
2111 printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
2112 " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
2113 " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
2114 " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
2115 l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
2116 l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
2117 p_guest_pte, p_shadow_pte, p_snapshot_pte, \
2118 (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
2119 (void *)v2m(v, p_snapshot_pte), \
2120 (l2_idx << L2_PAGETABLE_SHIFT) | \
2121 (l1_idx << L1_PAGETABLE_SHIFT)); \
2122 errors++; \
2123 } while ( 0 )
2125 static int check_pte(
2126 struct vcpu *v,
2127 l1_pgentry_t *p_guest_pte,
2128 l1_pgentry_t *p_shadow_pte,
2129 l1_pgentry_t *p_snapshot_pte,
2130 int level, int l2_idx, int l1_idx)
2132 struct domain *d = v->domain;
2133 l1_pgentry_t guest_pte = *p_guest_pte;
2134 l1_pgentry_t shadow_pte = *p_shadow_pte;
2135 l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
2136 l1_pgentry_t eff_guest_pte;
2137 unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
2138 int errors = 0, guest_writable;
2139 int page_table_page;
2141 if ( (l1e_get_intpte(shadow_pte) == 0) ||
2142 (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
2143 (l1e_get_intpte(shadow_pte) == 0x00000E00) )
2144 return errors; /* always safe */
2146 if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
2147 FAIL("Non zero not present shadow_pte");
2149 if ( level == 2 ) sh_l2_present++;
2150 if ( level == 1 ) sh_l1_present++;
2152 if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
2153 eff_guest_pte = snapshot_pte;
2154 else
2155 eff_guest_pte = guest_pte;
2157 if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
2158 FAIL("Guest not present yet shadow is");
2160 mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
2162 if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
2163 FAIL("Corrupt?");
2165 if ( (level == 1) &&
2166 (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
2167 !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
2168 FAIL("Dirty coherence");
2170 if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
2171 !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
2172 FAIL("Accessed coherence");
2174 if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
2175 FAIL("global bit set in shadow");
2177 eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
2178 eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
2179 shadow_mfn = l1e_get_pfn(shadow_pte);
2181 if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
2182 FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
2183 __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
2185 page_table_page = mfn_is_page_table(eff_guest_mfn);
2187 guest_writable =
2188 (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
2189 (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
2191 if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
2193 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
2194 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
2195 frame_table[eff_guest_mfn].u.inuse.type_info,
2196 page_table_page);
2197 FAIL("RW coherence");
2200 if ( (level == 1) &&
2201 (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
2202 !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
2204 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
2205 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
2206 frame_table[eff_guest_mfn].u.inuse.type_info,
2207 page_table_page);
2208 FAIL("RW2 coherence");
2211 if ( eff_guest_mfn == shadow_mfn )
2213 if ( level > 1 )
2214 FAIL("Linear map ???"); /* XXX this will fail on BSD */
2216 else
2218 if ( level < 2 )
2219 FAIL("Shadow in L1 entry?");
2221 if ( level == 2 )
2223 if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
2224 FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
2225 __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
2227 else
2228 BUG(); // XXX -- not handled yet.
2231 return errors;
2233 #undef FAIL
2234 #undef v2m
2236 static int check_l1_table(
2237 struct vcpu *v, unsigned long gpfn,
2238 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
2240 struct domain *d = v->domain;
2241 int i;
2242 unsigned long snapshot_mfn;
2243 l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
2244 int errors = 0;
2246 if ( page_out_of_sync(pfn_to_page(gmfn)) )
2248 snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
2249 ASSERT(snapshot_mfn);
2250 p_snapshot = map_domain_page(snapshot_mfn);
2253 p_guest = map_domain_page(gmfn);
2254 p_shadow = map_domain_page(smfn);
2256 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2257 errors += check_pte(v, p_guest+i, p_shadow+i,
2258 p_snapshot ? p_snapshot+i : NULL,
2259 1, l2_idx, i);
2261 unmap_domain_page(p_shadow);
2262 unmap_domain_page(p_guest);
2263 if ( p_snapshot )
2264 unmap_domain_page(p_snapshot);
2266 return errors;
2269 #define FAILPT(_f, _a...) \
2270 do { \
2271 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
2272 errors++; \
2273 } while ( 0 )
2275 static int check_l2_table(
2276 struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
2278 struct domain *d = v->domain;
2279 l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
2280 l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
2281 l2_pgentry_t match;
2282 int i;
2283 int errors = 0;
2284 int limit;
2286 if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
2287 FAILPT("domain doesn't own page");
2288 if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
2289 FAILPT("bogus owner for snapshot page");
2290 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
2291 FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
2292 smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
2294 #if 0
2295 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
2296 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
2297 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
2298 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
2300 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
2301 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
2302 i++ )
2303 printk("+++ (%d) %lx %lx\n",i,
2304 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
2305 FAILPT("hypervisor entries inconsistent");
2308 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
2309 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
2310 FAILPT("hypervisor linear map inconsistent");
2311 #endif
2313 match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2314 if ( !shadow_mode_external(d) &&
2315 l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
2316 match, PAGE_FLAG_MASK))
2318 FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
2319 l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
2320 L2_PAGETABLE_SHIFT]),
2321 l2e_get_intpte(match));
2324 match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
2325 if ( !shadow_mode_external(d) &&
2326 l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
2327 match, PAGE_FLAG_MASK))
2329 FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
2330 l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
2331 d->arch.mm_perdomain_pt,
2332 l2e_get_intpte(match));
2335 #ifdef __i386__
2336 if ( shadow_mode_external(d) )
2337 limit = L2_PAGETABLE_ENTRIES;
2338 else
2339 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
2340 #else
2341 limit = 0; /* XXX x86/64 XXX */
2342 #endif
2344 /* Check the whole L2. */
2345 for ( i = 0; i < limit; i++ )
2346 errors += check_pte(v,
2347 (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
2348 (l1_pgentry_t*)(&spl2e[i]),
2349 NULL,
2350 2, i, 0);
2352 unmap_domain_page(spl2e);
2353 unmap_domain_page(gpl2e);
2355 #if 1
2356 if ( errors )
2357 printk("check_l2_table returning %d errors\n", errors);
2358 #endif
2360 return errors;
2362 #undef FAILPT
2364 static int _check_pagetable(struct vcpu *v, char *s)
2366 struct domain *d = v->domain;
2367 #if defined (__x86_64__)
2368 pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
2369 pagetable_get_pfn(v->arch.guest_table) :
2370 pagetable_get_pfn(v->arch.guest_table_user));
2371 #else
2372 pagetable_t pt = v->arch.guest_table;
2373 #endif
2374 unsigned long gptbase = pagetable_get_paddr(pt);
2375 unsigned long ptbase_pfn, smfn;
2376 unsigned long i;
2377 l2_pgentry_t *gpl2e, *spl2e;
2378 unsigned long ptbase_mfn = 0;
2379 int errors = 0, limit, oos_pdes = 0;
2381 //_audit_domain(d, AUDIT_QUIET);
2382 shadow_lock(d);
2384 sh_check_name = s;
2385 //SH_VVLOG("%s-PT Audit", s);
2386 sh_l2_present = sh_l1_present = 0;
2387 perfc_incrc(check_pagetable);
2389 ptbase_mfn = gptbase >> PAGE_SHIFT;
2390 ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
2392 if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
2394 printk("%s-PT %lx not shadowed\n", s, gptbase);
2395 goto out;
2397 if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
2399 ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
2400 oos_pdes = 1;
2401 ASSERT(ptbase_mfn);
2404 errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
2406 gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
2407 spl2e = (l2_pgentry_t *) map_domain_page(smfn);
2409 /* Go back and recurse. */
2410 #ifdef __i386__
2411 if ( shadow_mode_external(d) )
2412 limit = L2_PAGETABLE_ENTRIES;
2413 else
2414 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
2415 #else
2416 limit = 0; /* XXX x86/64 XXX */
2417 #endif
2419 for ( i = 0; i < limit; i++ )
2421 unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
2422 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
2423 unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
2425 if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
2427 errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
2431 unmap_domain_page(spl2e);
2432 unmap_domain_page(gpl2e);
2434 #if 0
2435 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
2436 sh_l2_present, sh_l1_present);
2437 #endif
2439 out:
2440 if ( errors )
2441 BUG();
2443 shadow_unlock(d);
2445 return errors;
2448 int _check_all_pagetables(struct vcpu *v, char *s)
2450 struct domain *d = v->domain;
2451 int i;
2452 struct shadow_status *a;
2453 unsigned long gmfn;
2454 int errors = 0;
2456 shadow_status_noswap = 1;
2458 sh_check_name = s;
2459 SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
2460 sh_l2_present = sh_l1_present = 0;
2461 perfc_incrc(check_all_pagetables);
2463 for (i = 0; i < shadow_ht_buckets; i++)
2465 a = &d->arch.shadow_ht[i];
2466 while ( a && a->gpfn_and_flags )
2468 gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
2470 switch ( a->gpfn_and_flags & PGT_type_mask )
2472 case PGT_l1_shadow:
2473 errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
2474 gmfn, a->smfn, 0);
2475 break;
2476 case PGT_l2_shadow:
2477 errors += check_l2_table(v, gmfn, a->smfn,
2478 page_out_of_sync(pfn_to_page(gmfn)));
2479 break;
2480 case PGT_l3_shadow:
2481 case PGT_l4_shadow:
2482 case PGT_hl2_shadow:
2483 BUG(); // XXX - ought to fix this...
2484 break;
2485 case PGT_snapshot:
2486 case PGT_writable_pred:
2487 break;
2488 default:
2489 errors++;
2490 printk("unexpected shadow type %lx, gpfn=%lx, "
2491 "gmfn=%lx smfn=%lx\n",
2492 a->gpfn_and_flags & PGT_type_mask,
2493 a->gpfn_and_flags & PGT_mfn_mask,
2494 gmfn, a->smfn);
2495 BUG();
2497 a = a->next;
2501 shadow_status_noswap = 0;
2503 if ( errors )
2504 BUG();
2506 return errors;
2509 #endif // SHADOW_DEBUG
2511 #if CONFIG_PAGING_LEVELS == 3
2512 static unsigned long shadow_l3_table(
2513 struct domain *d, unsigned long gpfn, unsigned long gmfn)
2515 BUG(); /* not implemenated yet */
2516 return 42;
2518 static unsigned long gva_to_gpa_pae(unsigned long gva)
2520 BUG();
2521 return 43;
2523 #endif
2525 #if CONFIG_PAGING_LEVELS >= 4
2526 /****************************************************************************/
2527 /* 64-bit shadow-mode code testing */
2528 /****************************************************************************/
2529 /*
2530 * validate_bl2e_change()
2531 * The code is for 32-bit VMX gues on 64-bit host.
2532 * To sync guest L2.
2533 */
2535 static inline void
2536 validate_bl2e_change(
2537 struct domain *d,
2538 guest_root_pgentry_t *new_gle_p,
2539 pgentry_64_t *shadow_l3,
2540 int index)
2542 int sl3_idx, sl2_idx;
2543 unsigned long sl2mfn, sl1mfn;
2544 pgentry_64_t *sl2_p;
2546 /* Using guest l2 pte index to get shadow l3&l2 index
2547 * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
2548 */
2549 sl3_idx = index / (PAGETABLE_ENTRIES / 2);
2550 sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
2552 sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
2553 sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
2555 validate_pde_change(
2556 d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
2558 /* Mapping the second l1 shadow page */
2559 if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
2560 sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
2561 sl2_p[sl2_idx + 1] =
2562 entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
2564 unmap_domain_page(sl2_p);
2568 /*
2569 * init_bl2() is for 32-bit VMX guest on 64-bit host
2570 * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
2571 */
2572 static inline unsigned long init_bl2(l4_pgentry_t *spl4e, unsigned long smfn)
2574 unsigned int count;
2575 unsigned long sl2mfn;
2576 struct pfn_info *page;
2578 memset(spl4e, 0, PAGE_SIZE);
2580 /* Map the self entry, L4&L3 share the same page */
2581 spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
2583 /* Allocate 4 shadow L2s */
2584 page = alloc_domheap_pages(NULL, SL2_ORDER, 0);
2585 if (!page)
2586 domain_crash_synchronous();
2588 for (count = 0; count < PDP_ENTRIES; count++)
2590 sl2mfn = page_to_pfn(page+count);
2591 void *l2 = map_domain_page(sl2mfn);
2592 memset(l2, 0, PAGE_SIZE);
2593 unmap_domain_page(l2);
2594 spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT);
2597 unmap_domain_page(spl4e);
2598 return smfn;
2603 static unsigned long shadow_l4_table(
2604 struct domain *d, unsigned long gpfn, unsigned long gmfn)
2606 unsigned long smfn;
2607 l4_pgentry_t *spl4e;
2609 SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
2611 perfc_incrc(shadow_l4_table_count);
2613 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
2615 printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
2616 BUG(); /* XXX Deal gracefully with failure. */
2619 spl4e = (l4_pgentry_t *)map_domain_page(smfn);
2621 if (d->arch.ops->guest_paging_levels == PAGING_L2) {
2622 return init_bl2(spl4e, smfn);
2625 /* Install hypervisor and 4x linear p.t. mapings. */
2626 if ( (PGT_base_page_table == PGT_l4_page_table) &&
2627 !shadow_mode_external(d) )
2629 /*
2630 * We could proactively fill in PDEs for pages that are already
2631 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
2632 * (restriction required for coherence of the accessed bit). However,
2633 * we tried it and it didn't help performance. This is simpler.
2634 */
2635 memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
2637 /* Install hypervisor and 2x linear p.t. mapings. */
2638 memcpy(&spl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
2639 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
2640 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
2642 spl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
2643 l4e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_l3),
2644 __PAGE_HYPERVISOR);
2646 if ( shadow_mode_translate(d) ) // NB: not external
2648 spl4e[l4_table_offset(RO_MPT_VIRT_START)] =
2649 l4e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
2650 __PAGE_HYPERVISOR);
2652 else
2653 spl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
2654 l4e_from_pfn(gmfn, __PAGE_HYPERVISOR);
2656 } else
2657 memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
2659 unmap_domain_page(spl4e);
2661 ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn);
2662 return smfn;
2665 /*
2666 * This shadow_mark_va_out_of_sync() is for 2M page shadow
2667 */
2668 static void shadow_mark_va_out_of_sync_2mp(
2669 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long writable_pl1e)
2671 struct out_of_sync_entry *entry =
2672 shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
2674 entry->writable_pl1e = writable_pl1e;
2675 ESH_LOG("<shadow_mark_va_out_of_sync_2mp> gpfn = %lx\n", gpfn);
2676 if ( !get_shadow_ref(writable_pl1e >> L1_PAGETABLE_SHIFT) )
2677 BUG();
2681 static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long *spmfn, u32 flag)
2683 unsigned long gmfn;
2684 if ( !(*spmfn = __shadow_status(d, gpfn, flag)) )
2686 /* This is NOT already shadowed so we need to shadow it. */
2687 SH_VVLOG("<get_shadow_mfn>: not shadowed");
2689 gmfn = __gpfn_to_mfn(d, gpfn);
2690 if ( unlikely(!VALID_MFN(gmfn)) )
2692 // Attempt to use an invalid pfn as an shadow page.
2693 // XXX this needs to be more graceful!
2694 BUG();
2697 if ( unlikely(!(*spmfn =
2698 alloc_shadow_page(d, gpfn, gmfn, flag))) )
2700 printk("<get_shadow_mfn>Couldn't alloc an shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
2701 BUG(); /* XXX Need to deal gracefully with failure. */
2703 switch(flag) {
2704 case PGT_l1_shadow:
2705 perfc_incrc(shadow_l1_table_count);
2706 break;
2707 case PGT_l2_shadow:
2708 perfc_incrc(shadow_l2_table_count);
2709 break;
2710 case PGT_l3_shadow:
2711 perfc_incrc(shadow_l3_table_count);
2712 break;
2713 case PGT_hl2_shadow:
2714 perfc_incrc(shadow_hl2_table_count);
2715 break;
2718 return 1;
2719 } else {
2720 /* This L1 is shadowed already, but the L2 entry is missing. */
2721 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", *spmfn);
2722 return 0;
2726 static void shadow_map_into_current(struct vcpu *v,
2727 unsigned long va, unsigned int from, unsigned int to)
2729 pgentry_64_t gle, sle;
2730 unsigned long gpfn, smfn;
2732 if (from == PAGING_L1 && to == PAGING_L2) {
2733 shadow_map_l1_into_current_l2(va);
2734 return;
2737 __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | to);
2738 ASSERT(entry_get_flags(gle) & _PAGE_PRESENT);
2739 gpfn = entry_get_pfn(gle);
2741 get_shadow_mfn(v->domain, gpfn, &smfn, shadow_level_to_type(from));
2743 if ( !get_shadow_ref(smfn) )
2744 BUG();
2745 entry_general(v->domain, &gle, &sle, smfn, to);
2746 __rw_entry(v, va, &gle, GUEST_ENTRY | SET_ENTRY | to);
2747 __rw_entry(v, va, &sle, SHADOW_ENTRY | SET_ENTRY | to);
2750 /*
2751 * shadow_set_lxe should be put in shadow.h
2752 */
2753 static void shadow_set_l2e_64(unsigned long va, l2_pgentry_t sl2e,
2754 int create_l2_shadow, int put_ref_check)
2756 struct vcpu *v = current;
2757 l4_pgentry_t sl4e;
2758 l3_pgentry_t sl3e;
2760 __shadow_get_l4e(v, va, &sl4e);
2761 if (!(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
2762 if (create_l2_shadow) {
2763 perfc_incrc(shadow_set_l3e_force_map);
2764 shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
2765 __shadow_get_l4e(v, va, &sl4e);
2766 } else {
2767 printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow);
2771 __shadow_get_l3e(v, va, &sl3e);
2772 if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
2773 if (create_l2_shadow) {
2774 perfc_incrc(shadow_set_l2e_force_map);
2775 shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
2776 __shadow_get_l3e(v, va, &sl3e);
2777 } else {
2778 printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow);
2780 shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
2784 if ( put_ref_check ) {
2785 l2_pgentry_t tmp_sl2e;
2786 if ( __shadow_get_l2e(v, va, &tmp_sl2e) ) {
2787 if ( l2e_get_flags(tmp_sl2e) & _PAGE_PRESENT )
2788 if ( l2e_get_pfn(tmp_sl2e) == l2e_get_pfn(sl2e) ) {
2789 put_shadow_ref(l2e_get_pfn(sl2e));
2795 if (! __shadow_set_l2e(v, va, &sl2e))
2796 BUG();
2797 shadow_update_min_max(l3e_get_pfn(sl3e), l2_table_offset(va));
2801 static void shadow_set_l1e_64(unsigned long va, pgentry_64_t *sl1e_p,
2802 int create_l1_shadow)
2804 struct vcpu *v = current;
2805 struct domain *d = v->domain;
2806 pgentry_64_t sle;
2807 pgentry_64_t sle_up = {0};
2808 l1_pgentry_t old_spte;
2809 l1_pgentry_t sl1e = *(l1_pgentry_t *)sl1e_p;
2810 int i;
2811 unsigned long orig_va = 0;
2813 if (d->arch.ops->guest_paging_levels == PAGING_L2) {
2814 /* This is for 32-bit VMX guest on 64-bit host */
2815 orig_va = va;
2816 va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1));
2819 for (i = PAGING_L4; i >= PAGING_L2; i--) {
2820 if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) {
2821 printk("<%s> i = %d\n", __func__, i);
2822 BUG();
2824 if (!(entry_get_flags(sle) & _PAGE_PRESENT)) {
2825 if (create_l1_shadow) {
2826 perfc_incrc(shadow_set_l3e_force_map);
2827 shadow_map_into_current(v, va, i-1, i);
2828 __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
2829 } else {
2830 #if 0
2831 printk("For non VMX shadow, create_l1_shadow:%d\n", create_l1_shadow);
2832 #endif
2835 if(i < PAGING_L4)
2836 shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i));
2837 sle_up = sle;
2840 if (d->arch.ops->guest_paging_levels == PAGING_L2) {
2841 va = orig_va;
2844 if ( shadow_mode_refcounts(d) )
2846 __shadow_get_l1e(v, va, &old_spte);
2847 ESH_LOG("old_sl1e: %lx, new_sl1e: %lx\n", l1e_get_intpte(old_spte), l1e_get_intpte(sl1e));
2848 if ( l1e_has_changed(old_spte, sl1e, _PAGE_RW | _PAGE_PRESENT) )
2850 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
2851 !shadow_get_page_from_l1e(sl1e, d) )
2852 sl1e = l1e_empty();
2853 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
2854 put_page_from_l1e(old_spte, d);
2858 __shadow_set_l1e(v, va, &sl1e);
2860 shadow_update_min_max(entry_get_pfn(sle_up), guest_l1_table_offset(va));
2863 /* As 32-bit guest don't support 4M page yet,
2864 * we don't concern double compile for this function
2865 */
2866 static inline int l2e_rw_fault(
2867 struct vcpu *v, l2_pgentry_t *gl2e_p, unsigned long va, int rw)
2869 struct domain *d = v->domain;
2870 l2_pgentry_t gl2e = *gl2e_p;
2871 l2_pgentry_t tmp_l2e = gl2e;
2872 unsigned long start_gpfn = l2e_get_pfn(gl2e);
2873 unsigned long gpfn, mfn;
2874 unsigned long l1_mfn, gmfn;
2875 l1_pgentry_t *l1_p;
2876 l1_pgentry_t sl1e;
2877 l1_pgentry_t old_sl1e;
2878 l2_pgentry_t sl2e;
2879 unsigned long nx = 0;
2880 int put_ref_check = 0;
2881 /* Check if gpfn is 2M aligned */
2883 /* Update guest l2e */
2884 if (rw) {
2885 ASSERT(l2e_get_flags(gl2e) & _PAGE_RW);
2886 l2e_add_flags(gl2e, _PAGE_DIRTY | _PAGE_ACCESSED);
2887 } else {
2888 l2e_add_flags(gl2e, _PAGE_ACCESSED);
2891 l2e_remove_flags(tmp_l2e, _PAGE_PSE);
2892 if (l2e_get_flags(gl2e) & _PAGE_NX) {
2893 l2e_remove_flags(tmp_l2e, _PAGE_NX);
2894 nx = 1UL << 63;
2898 /* Get the shadow l2 first */
2899 if ( !__shadow_get_l2e(v, va, &sl2e) )
2900 sl2e = l2e_empty();
2902 l1_mfn = ___shadow_status(d, start_gpfn | nx, PGT_fl1_shadow);
2904 /* Check the corresponding l2e */
2905 if (l1_mfn) {
2906 /* Why it is PRESENT?*/
2907 if ((l2e_get_flags(sl2e) & _PAGE_PRESENT) &&
2908 l2e_get_pfn(sl2e) == l1_mfn) {
2909 ESH_LOG("sl2e PRSENT bit is set: %lx, l1_mfn = %lx\n", l2e_get_pfn(sl2e), l1_mfn);
2910 } else {
2911 put_ref_check = 1;
2912 if (!get_shadow_ref(l1_mfn))
2913 BUG();
2915 l1_p = (l1_pgentry_t *)map_domain_page(l1_mfn);
2916 sl2e = l2e_from_pfn(l1_mfn, l2e_get_flags(tmp_l2e));
2917 } else {
2918 /* Allocate a new page as shadow page table if need */
2919 gmfn = __gpfn_to_mfn(d, start_gpfn);
2920 l1_mfn = alloc_shadow_page(d, start_gpfn | nx, gmfn, PGT_fl1_shadow);
2921 if (unlikely(!l1_mfn)) {
2922 BUG();
2925 if (!get_shadow_ref(l1_mfn))
2926 BUG();
2927 l1_p = (l1_pgentry_t *)map_domain_page(l1_mfn );
2928 sl2e = l2e_from_pfn(l1_mfn, l2e_get_flags(tmp_l2e));
2929 memset(l1_p, 0, PAGE_SIZE);
2930 ESH_LOG("Alloc a shadow page: %lx\n", l1_mfn);
2933 ESH_LOG("<%s>: sl2e = %lx\n", __func__, l2e_get_intpte(sl2e));
2934 /* Map the page to l2*/
2935 shadow_set_l2e_64(va, sl2e, 1, put_ref_check);
2937 if (l2e_get_flags(gl2e) & _PAGE_NX)
2938 l2e_add_flags(tmp_l2e, _PAGE_NX);
2940 /* Propagate the shadow page table, i.e. setting sl1e */
2941 for (gpfn = start_gpfn;
2942 gpfn < (start_gpfn + L1_PAGETABLE_ENTRIES); gpfn++) {
2944 mfn = __gpfn_to_mfn(d, gpfn);
2946 if ( unlikely(!VALID_MFN(mfn)) )
2948 continue;
2951 sl1e = l1e_from_pfn(mfn, l2e_get_flags(tmp_l2e));
2953 if (!rw) {
2954 if ( shadow_mode_log_dirty(d) ||
2955 !(l2e_get_flags(gl2e) & _PAGE_DIRTY) || mfn_is_page_table(mfn) )
2957 l1e_remove_flags(sl1e, _PAGE_RW);
2959 } else {
2960 /* log dirty*/
2961 /*
2962 if ( shadow_mode_log_dirty(d) )
2963 __mark_dirty(d, gmfn);
2964 */
2966 // printk("<%s> gpfn: %lx, mfn: %lx, sl1e: %lx\n", __func__, gpfn, mfn, l1e_get_intpte(sl1e));
2967 /* The shadow entrys need setup before shadow_mark_va_out_of_sync()*/
2968 old_sl1e = l1_p[gpfn - start_gpfn];
2970 if ( l1e_has_changed(old_sl1e, sl1e, _PAGE_RW | _PAGE_PRESENT) )
2972 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
2973 !shadow_get_page_from_l1e(sl1e, d) ) {
2974 ESH_LOG("%lx, mfn: %lx why make me empty, start_pfn: %lx, gpfn: %lx\n", l1e_get_intpte(sl1e),mfn, start_gpfn, gpfn);
2975 sl1e = l1e_empty();
2977 if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
2978 put_page_from_l1e(old_sl1e, d);
2981 l1_p[gpfn - start_gpfn] = sl1e;
2983 if (rw) {
2984 /* shadow_mark_va_out_of_sync() need modificatin for 2M pages*/
2985 if ( mfn_is_page_table(mfn) )
2986 shadow_mark_va_out_of_sync_2mp(v, gpfn, mfn,
2987 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * (gpfn - start_gpfn)));
2991 unmap_domain_page(l1_p);
2992 return 1;
2996 /*
2997 * Check P, R/W, U/S bits in the guest page table.
2998 * If the fault belongs to guest return 1,
2999 * else return 0.
3000 */
3001 #if defined( GUEST_PGENTRY_32 )
3002 static inline int guest_page_fault(struct vcpu *v,
3003 unsigned long va, unsigned int error_code,
3004 guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
3006 /* The following check for 32-bit guest on 64-bit host */
3008 __guest_get_l2e(v, va, gpl2e);
3010 /* Check the guest L2 page-table entry first*/
3011 if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)))
3012 return 1;
3014 if (error_code & ERROR_W) {
3015 if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)))
3016 return 1;
3018 if (error_code & ERROR_U) {
3019 if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)))
3020 return 1;
3023 if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE)
3024 return 0;
3026 __guest_get_l1e(v, va, gpl1e);
3028 /* Then check the guest L1 page-table entry */
3029 if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)))
3030 return 1;
3032 if (error_code & ERROR_W) {
3033 if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)))
3034 return 1;
3036 if (error_code & ERROR_U) {
3037 if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)))
3038 return 1;
3041 return 0;
3043 #else
3044 static inline int guest_page_fault(struct vcpu *v,
3045 unsigned long va, unsigned int error_code,
3046 guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
3048 struct domain *d = v->domain;
3049 pgentry_64_t gle, *lva;
3050 unsigned long mfn;
3051 int i;
3053 __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
3054 if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
3055 return 1;
3057 if (error_code & ERROR_W) {
3058 if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
3059 return 1;
3061 if (error_code & ERROR_U) {
3062 if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
3063 return 1;
3065 for (i = PAGING_L3; i >= PAGING_L1; i--) {
3066 /*
3067 * If it's not external mode, then mfn should be machine physical.
3068 */
3069 mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT));
3071 lva = (pgentry_64_t *) phys_to_virt(
3072 mfn << PAGE_SHIFT);
3073 gle = lva[table_offset_64(va, i)];
3075 if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
3076 return 1;
3078 if (error_code & ERROR_W) {
3079 if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
3080 return 1;
3082 if (error_code & ERROR_U) {
3083 if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
3084 return 1;
3087 if (i == PAGING_L2) {
3088 if (gpl2e)
3089 gpl2e->l2 = gle.lo;
3091 if (likely(entry_get_flags(gle) & _PAGE_PSE))
3092 return 0;
3096 if (i == PAGING_L1)
3097 if (gpl1e)
3098 gpl1e->l1 = gle.lo;
3100 return 0;
3102 #endif
3103 static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
3105 struct vcpu *v = current;
3106 struct domain *d = v->domain;
3107 guest_l2_pgentry_t gl2e;
3108 guest_l1_pgentry_t gl1e;
3109 l1_pgentry_t sl1e;
3111 perfc_incrc(shadow_fault_calls);
3113 ESH_LOG("<shadow_fault_64> va=%lx, rip = %lx, error code = %x\n",
3114 va, regs->eip, regs->error_code);
3116 /*
3117 * Don't let someone else take the guest's table pages out-of-sync.
3118 */
3119 shadow_lock(d);
3121 /* XXX - FIX THIS COMMENT!!!
3122 * STEP 1. Check to see if this fault might have been caused by an
3123 * out-of-sync table page entry, or if we should pass this
3124 * fault onto the guest.
3125 */
3126 __shadow_sync_va(v, va);
3128 /*
3129 * STEP 2. Check if the fault belongs to guest
3130 */
3131 if ( guest_page_fault(
3132 v, va, regs->error_code, &gl2e, &gl1e) ) {
3133 goto fail;
3136 if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
3137 /*
3138 * Handle 4K pages here
3139 */
3141 /* Write fault? */
3142 if ( regs->error_code & 2 ) {
3143 if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) {
3144 goto fail;
3146 } else {
3147 l1pte_read_fault(d, &gl1e, &sl1e);
3149 /*
3150 * STEP 3. Write guest/shadow l2e back
3151 */
3152 if (unlikely(!__guest_set_l1e(v, va, &gl1e))) {
3153 domain_crash_synchronous();
3156 ESH_LOG("gl1e: %lx, sl1e: %lx\n", l1e_get_intpte(gl1e), l1e_get_intpte(sl1e));
3157 shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
3158 /*
3159 * if necessary, record the page table page as dirty
3160 */
3161 if ( unlikely(shadow_mode_log_dirty(d)) )
3162 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gl2e)));
3164 } else {
3165 /*
3166 * Handle 2M pages here
3167 */
3168 /* Write fault? */
3169 if ( regs->error_code & 2 ) {
3170 if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) {
3171 goto fail;
3173 } else {
3174 l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
3177 /*
3178 * STEP 3. Write guest/shadow l2e back
3179 */
3181 if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) {
3182 domain_crash_synchronous();
3185 /*
3186 * Todo: if necessary, record the page table page as dirty
3187 */
3192 perfc_incrc(shadow_fault_fixed);
3193 d->arch.shadow_fault_count++;
3195 shadow_unlock(d);
3197 return EXCRET_fault_fixed;
3198 fail:
3199 shadow_unlock(d);
3200 ESH_LOG("Guest fault~~~\n");
3201 return 0;
3204 static void shadow_invlpg_64(struct vcpu *v, unsigned long va)
3206 struct domain *d = v->domain;
3207 l1_pgentry_t sl1e, old_sl1e;
3209 shadow_lock(d);
3211 if ( __shadow_get_l1e(v, va, &old_sl1e) )
3212 if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
3213 put_page_from_l1e(old_sl1e, d);
3216 sl1e = l1e_empty();
3217 __shadow_set_l1e(v, va, &sl1e);
3219 shadow_unlock(d);
3222 static unsigned long gva_to_gpa_64(unsigned long gva)
3224 struct vcpu *v = current;
3225 guest_l1_pgentry_t gl1e = {0};
3226 guest_l2_pgentry_t gl2e = {0};
3227 unsigned long gpa;
3229 if (guest_page_fault(v, gva, 0, &gl2e, &gl1e))
3230 return 0;
3232 if (guest_l2e_get_flags(gl2e) & _PAGE_PSE)
3233 gpa = guest_l2e_get_paddr(gl2e) + (gva & ((1 << GUEST_L2_PAGETABLE_SHIFT) - 1));
3234 else
3235 gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK);
3237 return gpa;
3241 #ifndef GUEST_PGENTRY_32
3243 struct shadow_ops MODE_F_HANDLER = {
3244 .guest_paging_levels = 4,
3245 .invlpg = shadow_invlpg_64,
3246 .fault = shadow_fault_64,
3247 .update_pagetables = shadow_update_pagetables,
3248 .sync_all = sync_all,
3249 .remove_all_write_access = remove_all_write_access,
3250 .do_update_va_mapping = do_update_va_mapping,
3251 .mark_mfn_out_of_sync = mark_mfn_out_of_sync,
3252 .is_out_of_sync = is_out_of_sync,
3253 .gva_to_gpa = gva_to_gpa_64,
3254 };
3255 #endif
3257 #endif
3259 #if CONFIG_PAGING_LEVELS == 2
3260 struct shadow_ops MODE_A_HANDLER = {
3261 .guest_paging_levels = 2,
3262 .invlpg = shadow_invlpg_32,
3263 .fault = shadow_fault_32,
3264 .update_pagetables = shadow_update_pagetables,
3265 .sync_all = sync_all,
3266 .remove_all_write_access = remove_all_write_access,
3267 .do_update_va_mapping = do_update_va_mapping,
3268 .mark_mfn_out_of_sync = mark_mfn_out_of_sync,
3269 .is_out_of_sync = is_out_of_sync,
3270 .gva_to_gpa = gva_to_gpa_64,
3271 };
3273 #elif CONFIG_PAGING_LEVELS == 3
3274 struct shadow_ops MODE_B_HANDLER = {
3275 .guest_paging_levels = 3,
3276 .invlpg = shadow_invlpg_32,
3277 .fault = shadow_fault_32,
3278 .update_pagetables = shadow_update_pagetables,
3279 .sync_all = sync_all,
3280 .remove_all_write_access = remove_all_write_access,
3281 .do_update_va_mapping = do_update_va_mapping,
3282 .mark_mfn_out_of_sync = mark_mfn_out_of_sync,
3283 .is_out_of_sync = is_out_of_sync,
3284 .gva_to_gpa = gva_to_gpa_pae,
3285 };
3287 #endif
3290 /*
3291 * Local variables:
3292 * mode: C
3293 * c-set-style: "BSD"
3294 * c-basic-offset: 4
3295 * tab-width: 4
3296 * indent-tabs-mode: nil
3297 * End:
3298 */