/root/src/xen/xen/arch/x86/mm/shadow/multi.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/mm/shadow/multi.c |
3 | | * |
4 | | * Simple, mostly-synchronous shadow page tables. |
5 | | * Parts of this code are Copyright (c) 2006 by XenSource Inc. |
6 | | * Parts of this code are Copyright (c) 2006 by Michael A Fetterman |
7 | | * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. |
8 | | * |
9 | | * This program is free software; you can redistribute it and/or modify |
10 | | * it under the terms of the GNU General Public License as published by |
11 | | * the Free Software Foundation; either version 2 of the License, or |
12 | | * (at your option) any later version. |
13 | | * |
14 | | * This program is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | * GNU General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU General Public License |
20 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
21 | | */ |
22 | | |
23 | | /* Allow uniquely identifying static symbols in the 3 generated objects. */ |
24 | | asm(".file \"" __OBJECT_FILE__ "\""); |
25 | | |
26 | | #include <xen/types.h> |
27 | | #include <xen/mm.h> |
28 | | #include <xen/trace.h> |
29 | | #include <xen/sched.h> |
30 | | #include <xen/perfc.h> |
31 | | #include <xen/domain_page.h> |
32 | | #include <xen/iocap.h> |
33 | | #include <xsm/xsm.h> |
34 | | #include <asm/page.h> |
35 | | #include <asm/current.h> |
36 | | #include <asm/shadow.h> |
37 | | #include <asm/flushtlb.h> |
38 | | #include <asm/hvm/hvm.h> |
39 | | #include <asm/hvm/cacheattr.h> |
40 | | #include <asm/mtrr.h> |
41 | | #include <asm/guest_pt.h> |
42 | | #include <public/sched.h> |
43 | | #include "private.h" |
44 | | #include "types.h" |
45 | | |
46 | | /* THINGS TO DO LATER: |
47 | | * |
48 | | * TEARDOWN HEURISTICS |
49 | | * Also: have a heuristic for when to destroy a previous paging-mode's |
50 | | * shadows. When a guest is done with its start-of-day 32-bit tables |
51 | | * and reuses the memory we want to drop those shadows. Start with |
52 | | * shadows in a page in two modes as a hint, but beware of clever tricks |
53 | | * like reusing a pagetable for both PAE and 64-bit during boot... |
54 | | * |
55 | | * PAE LINEAR MAPS |
56 | | * Rework shadow_get_l*e() to have the option of using map_domain_page() |
57 | | * instead of linear maps. Add appropriate unmap_l*e calls in the users. |
58 | | * Then we can test the speed difference made by linear maps. If the |
59 | | * map_domain_page() version is OK on PAE, we could maybe allow a lightweight |
60 | | * l3-and-l2h-only shadow mode for PAE PV guests that would allow them |
61 | | * to share l2h pages again. |
62 | | * |
63 | | * PSE disabled / PSE36 |
64 | | * We don't support any modes other than PSE enabled, PSE36 disabled. |
65 | | * Neither of those would be hard to change, but we'd need to be able to |
66 | | * deal with shadows made in one mode and used in another. |
67 | | */ |
68 | | |
69 | | #define FETCH_TYPE_PREFETCH 1 |
70 | | #define FETCH_TYPE_DEMAND 2 |
71 | 0 | #define FETCH_TYPE_WRITE 4 |
72 | | typedef enum { |
73 | | ft_prefetch = FETCH_TYPE_PREFETCH, |
74 | | ft_demand_read = FETCH_TYPE_DEMAND, |
75 | | ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE, |
76 | | } fetch_type_t; |
77 | | |
78 | | extern const char *const fetch_type_names[]; |
79 | | |
80 | | #if SHADOW_DEBUG_PROPAGATE && CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS |
81 | | const char *const fetch_type_names[] = { |
82 | | [ft_prefetch] = "prefetch", |
83 | | [ft_demand_read] = "demand read", |
84 | | [ft_demand_write] = "demand write", |
85 | | }; |
86 | | #endif |
87 | | |
88 | | /**************************************************************************/ |
89 | | /* Hash table mapping from guest pagetables to shadows |
90 | | * |
91 | | * Normal case: maps the mfn of a guest page to the mfn of its shadow page. |
92 | | * FL1's: maps the *gfn* of the start of a superpage to the mfn of a |
93 | | * shadow L1 which maps its "splinters". |
94 | | */ |
95 | | |
96 | | static inline mfn_t |
97 | | get_fl1_shadow_status(struct domain *d, gfn_t gfn) |
98 | | /* Look for FL1 shadows in the hash table */ |
99 | 0 | { |
100 | 0 | mfn_t smfn = shadow_hash_lookup(d, gfn_x(gfn), SH_type_fl1_shadow); |
101 | 0 | ASSERT(!mfn_valid(smfn) || mfn_to_page(smfn)->u.sh.head); |
102 | 0 | return smfn; |
103 | 0 | } |
104 | | |
105 | | static inline mfn_t |
106 | | get_shadow_status(struct domain *d, mfn_t gmfn, u32 shadow_type) |
107 | | /* Look for shadows in the hash table */ |
108 | 0 | { |
109 | 0 | mfn_t smfn = shadow_hash_lookup(d, mfn_x(gmfn), shadow_type); |
110 | 0 | ASSERT(!mfn_valid(smfn) || mfn_to_page(smfn)->u.sh.head); |
111 | 0 | perfc_incr(shadow_get_shadow_status); |
112 | 0 | return smfn; |
113 | 0 | } |
114 | | |
115 | | static inline void |
116 | | set_fl1_shadow_status(struct domain *d, gfn_t gfn, mfn_t smfn) |
117 | | /* Put an FL1 shadow into the hash table */ |
118 | 0 | { |
119 | 0 | SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%"PRI_mfn"\n", |
120 | 0 | gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn)); |
121 | 0 |
|
122 | 0 | ASSERT(mfn_to_page(smfn)->u.sh.head); |
123 | 0 | shadow_hash_insert(d, gfn_x(gfn), SH_type_fl1_shadow, smfn); |
124 | 0 | } |
125 | | |
126 | | static inline void |
127 | | set_shadow_status(struct domain *d, mfn_t gmfn, u32 shadow_type, mfn_t smfn) |
128 | | /* Put a shadow into the hash table */ |
129 | 0 | { |
130 | 0 | int res; |
131 | 0 |
|
132 | 0 | SHADOW_PRINTK("d%d gmfn=%lx, type=%08x, smfn=%lx\n", |
133 | 0 | d->domain_id, mfn_x(gmfn), shadow_type, mfn_x(smfn)); |
134 | 0 |
|
135 | 0 | ASSERT(mfn_to_page(smfn)->u.sh.head); |
136 | 0 |
|
137 | 0 | /* 32-bit PV guests don't own their l4 pages so can't get_page them */ |
138 | 0 | if ( !is_pv_32bit_domain(d) || shadow_type != SH_type_l4_64_shadow ) |
139 | 0 | { |
140 | 0 | res = get_page(mfn_to_page(gmfn), d); |
141 | 0 | ASSERT(res == 1); |
142 | 0 | } |
143 | 0 |
|
144 | 0 | shadow_hash_insert(d, mfn_x(gmfn), shadow_type, smfn); |
145 | 0 | } |
146 | | |
147 | | static inline void |
148 | | delete_fl1_shadow_status(struct domain *d, gfn_t gfn, mfn_t smfn) |
149 | | /* Remove a shadow from the hash table */ |
150 | 0 | { |
151 | 0 | SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%"PRI_mfn"\n", |
152 | 0 | gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn)); |
153 | 0 | ASSERT(mfn_to_page(smfn)->u.sh.head); |
154 | 0 | shadow_hash_delete(d, gfn_x(gfn), SH_type_fl1_shadow, smfn); |
155 | 0 | } |
156 | | |
157 | | static inline void |
158 | | delete_shadow_status(struct domain *d, mfn_t gmfn, u32 shadow_type, mfn_t smfn) |
159 | | /* Remove a shadow from the hash table */ |
160 | 0 | { |
161 | 0 | SHADOW_PRINTK("d%d gmfn=%"PRI_mfn", type=%08x, smfn=%"PRI_mfn"\n", |
162 | 0 | d->domain_id, mfn_x(gmfn), shadow_type, mfn_x(smfn)); |
163 | 0 | ASSERT(mfn_to_page(smfn)->u.sh.head); |
164 | 0 | shadow_hash_delete(d, mfn_x(gmfn), shadow_type, smfn); |
165 | 0 | /* 32-bit PV guests don't own their l4 pages; see set_shadow_status */ |
166 | 0 | if ( !is_pv_32bit_domain(d) || shadow_type != SH_type_l4_64_shadow ) |
167 | 0 | put_page(mfn_to_page(gmfn)); |
168 | 0 | } |
169 | | |
170 | | |
171 | | /**************************************************************************/ |
172 | | /* Functions for walking the guest page tables */ |
173 | | |
174 | | static inline bool |
175 | | sh_walk_guest_tables(struct vcpu *v, unsigned long va, walk_t *gw, |
176 | | uint32_t pfec) |
177 | 0 | { |
178 | 0 | return guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec, |
179 | 0 | #if GUEST_PAGING_LEVELS == 3 /* PAE */ |
180 | | INVALID_MFN, |
181 | | v->arch.paging.shadow.gl3e |
182 | | #else /* 32 or 64 */ |
183 | 0 | pagetable_get_mfn(v->arch.guest_table), |
184 | 0 | v->arch.paging.shadow.guest_vtable |
185 | 0 | #endif |
186 | 0 | ); |
187 | 0 | } |
188 | | |
189 | | /* This validation is called with lock held, and after write permission |
190 | | * removal. Then check is atomic and no more inconsistent content can |
191 | | * be observed before lock is released |
192 | | * |
193 | | * Return 1 to indicate success and 0 for inconsistency |
194 | | */ |
195 | | static inline uint32_t |
196 | | shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw, int version) |
197 | 0 | { |
198 | 0 | struct domain *d = v->domain; |
199 | 0 | guest_l1e_t *l1p; |
200 | 0 | guest_l2e_t *l2p; |
201 | 0 | #if GUEST_PAGING_LEVELS >= 4 |
202 | | guest_l3e_t *l3p; |
203 | | guest_l4e_t *l4p; |
204 | | #endif |
205 | 0 | int mismatch = 0; |
206 | 0 |
|
207 | 0 | ASSERT(paging_locked_by_me(d)); |
208 | 0 |
|
209 | 0 | /* No need for smp_rmb() here; taking the paging lock was enough. */ |
210 | 0 | if ( version == atomic_read(&d->arch.paging.shadow.gtable_dirty_version) ) |
211 | 0 | return 1; |
212 | 0 |
|
213 | 0 | /* We may consider caching guest page mapping from last |
214 | 0 | * guest table walk. However considering this check happens |
215 | 0 | * relatively less-frequent, and a bit burden here to |
216 | 0 | * remap guest page is better than caching mapping in each |
217 | 0 | * guest table walk. |
218 | 0 | * |
219 | 0 | * Also when inconsistency occurs, simply return to trigger |
220 | 0 | * another fault instead of re-validate new path to make |
221 | 0 | * logic simple. |
222 | 0 | */ |
223 | 0 | perfc_incr(shadow_check_gwalk); |
224 | 0 | #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ |
225 | | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ |
226 | | l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable; |
227 | | mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4); |
228 | | l3p = map_domain_page(gw->l3mfn); |
229 | | mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3); |
230 | | unmap_domain_page(l3p); |
231 | | #else |
232 | | mismatch |= (gw->l3e.l3 != |
233 | | v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3); |
234 | | #endif |
235 | | l2p = map_domain_page(gw->l2mfn); |
236 | | mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2); |
237 | | unmap_domain_page(l2p); |
238 | | #else |
239 | 0 | l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable; |
240 | 0 | mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2); |
241 | 0 | #endif |
242 | 0 | if ( !(guest_can_use_l2_superpages(v) && |
243 | 0 | (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) ) |
244 | 0 | { |
245 | 0 | l1p = map_domain_page(gw->l1mfn); |
246 | 0 | mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1); |
247 | 0 | unmap_domain_page(l1p); |
248 | 0 | } |
249 | 0 |
|
250 | 0 | return !mismatch; |
251 | 0 | } |
252 | | |
253 | | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
254 | | static int |
255 | | shadow_check_gl1e(struct vcpu *v, walk_t *gw) |
256 | 0 | { |
257 | 0 | guest_l1e_t *l1p, nl1e; |
258 | 0 |
|
259 | 0 | if ( !mfn_valid(gw->l1mfn) ) |
260 | 0 | return 0; |
261 | 0 |
|
262 | 0 | /* Can't just pull-through because mfn may have changed */ |
263 | 0 | l1p = map_domain_page(gw->l1mfn); |
264 | 0 | nl1e.l1 = l1p[guest_l1_table_offset(gw->va)].l1; |
265 | 0 | unmap_domain_page(l1p); |
266 | 0 |
|
267 | 0 | return gw->l1e.l1 != nl1e.l1; |
268 | 0 | } |
269 | | #endif |
270 | | |
271 | | /* Remove write access permissions from a gwalk_t in a batch, and |
272 | | * return OR-ed result for TLB flush hint and need to rewalk the guest |
273 | | * pages. |
274 | | * |
275 | | * Syncing pages will remove write access to that page; but it may |
276 | | * also give write access to other pages in the path. If we resync any |
277 | | * pages, re-walk from the beginning. |
278 | | */ |
279 | 0 | #define GW_RMWR_FLUSHTLB 1 |
280 | 0 | #define GW_RMWR_REWALK 2 |
281 | | |
282 | | static inline uint32_t |
283 | | gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw) |
284 | 0 | { |
285 | 0 | struct domain *d = v->domain; |
286 | 0 | uint32_t rc = 0; |
287 | 0 |
|
288 | 0 | #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ |
289 | | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ |
290 | | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
291 | | if ( mfn_is_out_of_sync(gw->l3mfn) ) |
292 | | { |
293 | | sh_resync(d, gw->l3mfn); |
294 | | rc = GW_RMWR_REWALK; |
295 | | } |
296 | | else |
297 | | #endif /* OOS */ |
298 | | if ( sh_remove_write_access(d, gw->l3mfn, 3, va) ) |
299 | | rc = GW_RMWR_FLUSHTLB; |
300 | | #endif /* GUEST_PAGING_LEVELS >= 4 */ |
301 | | |
302 | | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
303 | | if ( mfn_is_out_of_sync(gw->l2mfn) ) |
304 | | { |
305 | | sh_resync(d, gw->l2mfn); |
306 | | rc |= GW_RMWR_REWALK; |
307 | | } |
308 | | else |
309 | | #endif /* OOS */ |
310 | | if ( sh_remove_write_access(d, gw->l2mfn, 2, va) ) |
311 | | rc |= GW_RMWR_FLUSHTLB; |
312 | | #endif /* GUEST_PAGING_LEVELS >= 3 */ |
313 | 0 |
|
314 | 0 | if ( !(guest_can_use_l2_superpages(v) && |
315 | 0 | (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) |
316 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
317 | 0 | && !mfn_is_out_of_sync(gw->l1mfn) |
318 | 0 | #endif /* OOS */ |
319 | 0 | && sh_remove_write_access(d, gw->l1mfn, 1, va) ) |
320 | 0 | rc |= GW_RMWR_FLUSHTLB; |
321 | 0 |
|
322 | 0 | return rc; |
323 | 0 | } |
324 | | |
325 | | /* Lightweight audit: pass all the shadows associated with this guest walk |
326 | | * through the audit mechanisms */ |
327 | | static void sh_audit_gw(struct vcpu *v, const walk_t *gw) |
328 | 0 | { |
329 | 0 | #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES |
330 | 0 | struct domain *d = v->domain; |
331 | 0 | mfn_t smfn; |
332 | 0 |
|
333 | 0 | if ( !(SHADOW_AUDIT_ENABLE) ) |
334 | 0 | return; |
335 | 0 |
|
336 | 0 | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ |
337 | | if ( mfn_valid(gw->l4mfn) |
338 | | && mfn_valid((smfn = get_shadow_status(d, gw->l4mfn, |
339 | | SH_type_l4_shadow))) ) |
340 | | (void) sh_audit_l4_table(v, smfn, INVALID_MFN); |
341 | | if ( mfn_valid(gw->l3mfn) |
342 | | && mfn_valid((smfn = get_shadow_status(d, gw->l3mfn, |
343 | | SH_type_l3_shadow))) ) |
344 | | (void) sh_audit_l3_table(v, smfn, INVALID_MFN); |
345 | | #endif /* PAE or 64... */ |
346 | 0 | if ( mfn_valid(gw->l2mfn) ) |
347 | 0 | { |
348 | 0 | if ( mfn_valid((smfn = get_shadow_status(d, gw->l2mfn, |
349 | 0 | SH_type_l2_shadow))) ) |
350 | 0 | (void) sh_audit_l2_table(v, smfn, INVALID_MFN); |
351 | 0 | #if GUEST_PAGING_LEVELS == 3 |
352 | | if ( mfn_valid((smfn = get_shadow_status(d, gw->l2mfn, |
353 | | SH_type_l2h_shadow))) ) |
354 | | (void) sh_audit_l2_table(v, smfn, INVALID_MFN); |
355 | | #endif |
356 | 0 | } |
357 | 0 | if ( mfn_valid(gw->l1mfn) |
358 | 0 | && mfn_valid((smfn = get_shadow_status(d, gw->l1mfn, |
359 | 0 | SH_type_l1_shadow))) ) |
360 | 0 | (void) sh_audit_l1_table(v, smfn, INVALID_MFN); |
361 | 0 | else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT) |
362 | 0 | && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) |
363 | 0 | && mfn_valid( |
364 | 0 | (smfn = get_fl1_shadow_status(d, guest_l2e_get_gfn(gw->l2e)))) ) |
365 | 0 | (void) sh_audit_fl1_table(v, smfn, INVALID_MFN); |
366 | 0 | #endif /* SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES */ |
367 | 0 | } |
368 | | |
369 | | /* |
370 | | * Write a new value into the guest pagetable, and update the shadows |
371 | | * appropriately. Returns false if we page-faulted, true for success. |
372 | | */ |
373 | | static bool |
374 | | sh_write_guest_entry(struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn) |
375 | 0 | { |
376 | 0 | #if CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS |
377 | | int failed; |
378 | | |
379 | | paging_lock(v->domain); |
380 | | failed = __copy_to_user(p, &new, sizeof(new)); |
381 | | if ( failed != sizeof(new) ) |
382 | | sh_validate_guest_entry(v, gmfn, p, sizeof(new)); |
383 | | paging_unlock(v->domain); |
384 | | |
385 | | return !failed; |
386 | | #else |
387 | 0 | return false; |
388 | 0 | #endif |
389 | 0 | } |
390 | | |
391 | | /* |
392 | | * Cmpxchg a new value into the guest pagetable, and update the shadows |
393 | | * appropriately. Returns false if we page-faulted, true if not. |
394 | | * N.B. caller should check the value of "old" to see if the cmpxchg itself |
395 | | * was successful. |
396 | | */ |
397 | | static bool |
398 | | sh_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, intpte_t *old, |
399 | | intpte_t new, mfn_t gmfn) |
400 | 0 | { |
401 | 0 | #if CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS |
402 | | int failed; |
403 | | guest_intpte_t t = *old; |
404 | | |
405 | | paging_lock(v->domain); |
406 | | failed = cmpxchg_user(p, t, new); |
407 | | if ( t == *old ) |
408 | | sh_validate_guest_entry(v, gmfn, p, sizeof(new)); |
409 | | *old = t; |
410 | | paging_unlock(v->domain); |
411 | | |
412 | | return !failed; |
413 | | #else |
414 | 0 | return false; |
415 | 0 | #endif |
416 | 0 | } |
417 | | |
418 | | /**************************************************************************/ |
419 | | /* Functions to compute the correct index into a shadow page, given an |
420 | | * index into the guest page (as returned by guest_get_index()). |
421 | | * This is trivial when the shadow and guest use the same sized PTEs, but |
422 | | * gets more interesting when those sizes are mismatched (e.g. 32-bit guest, |
423 | | * PAE- or 64-bit shadows). |
424 | | * |
425 | | * These functions also increment the shadow mfn, when necessary. When PTE |
426 | | * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1 |
427 | | * page. In this case, we allocate 2 contiguous pages for the shadow L1, and |
428 | | * use simple pointer arithmetic on a pointer to the guest L1e to figure out |
429 | | * which shadow page we really want. Similarly, when PTE sizes are |
430 | | * mismatched, we shadow a guest L2 page with 4 shadow L2 pages. (The easiest |
431 | | * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address |
432 | | * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address |
433 | | * space.) |
434 | | */ |
435 | | |
436 | | #if GUEST_PAGING_LEVELS == 2 |
437 | | /* From one page of a multi-page shadow, find the next one */ |
438 | | static inline mfn_t sh_next_page(mfn_t smfn) |
439 | 0 | { |
440 | 0 | struct page_info *pg = mfn_to_page(smfn), *next; |
441 | 0 | struct page_list_head h = PAGE_LIST_HEAD_INIT(h); |
442 | 0 |
|
443 | 0 | ASSERT(pg->u.sh.type == SH_type_l1_32_shadow |
444 | 0 | || pg->u.sh.type == SH_type_fl1_32_shadow |
445 | 0 | || pg->u.sh.type == SH_type_l2_32_shadow); |
446 | 0 | ASSERT(pg->u.sh.type == SH_type_l2_32_shadow || pg->u.sh.head); |
447 | 0 |
|
448 | 0 | next = page_list_next(pg, &h); |
449 | 0 |
|
450 | 0 | ASSERT(next); |
451 | 0 | ASSERT(next->u.sh.type == pg->u.sh.type); |
452 | 0 | ASSERT(!next->u.sh.head); |
453 | 0 | return page_to_mfn(next); |
454 | 0 | } |
455 | | #endif |
456 | | |
457 | | static inline u32 |
458 | | guest_index(void *ptr) |
459 | 0 | { |
460 | 0 | return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t); |
461 | 0 | } |
462 | | |
463 | | static u32 |
464 | | shadow_l1_index(mfn_t *smfn, u32 guest_index) |
465 | 0 | { |
466 | 0 | #if (GUEST_PAGING_LEVELS == 2) |
467 | 0 | ASSERT(mfn_to_page(*smfn)->u.sh.head); |
468 | 0 | if ( guest_index >= SHADOW_L1_PAGETABLE_ENTRIES ) |
469 | 0 | *smfn = sh_next_page(*smfn); |
470 | 0 | return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES); |
471 | 0 | #else |
472 | | return guest_index; |
473 | | #endif |
474 | 0 | } |
475 | | |
476 | | static u32 |
477 | | shadow_l2_index(mfn_t *smfn, u32 guest_index) |
478 | 0 | { |
479 | 0 | #if (GUEST_PAGING_LEVELS == 2) |
480 | 0 | int i; |
481 | 0 | ASSERT(mfn_to_page(*smfn)->u.sh.head); |
482 | 0 | // Because we use 2 shadow l2 entries for each guest entry, the number of |
483 | 0 | // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2 |
484 | 0 | for ( i = 0; i < guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2); i++ ) |
485 | 0 | *smfn = sh_next_page(*smfn); |
486 | 0 | // We multiply by two to get the index of the first of the two entries |
487 | 0 | // used to shadow the specified guest entry. |
488 | 0 | return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2; |
489 | 0 | #else |
490 | | return guest_index; |
491 | | #endif |
492 | 0 | } |
493 | | |
494 | | #if GUEST_PAGING_LEVELS >= 4 |
495 | | |
496 | | static u32 |
497 | | shadow_l3_index(mfn_t *smfn, u32 guest_index) |
498 | 0 | { |
499 | 0 | return guest_index; |
500 | 0 | } |
501 | | |
502 | | static u32 |
503 | | shadow_l4_index(mfn_t *smfn, u32 guest_index) |
504 | 0 | { |
505 | 0 | return guest_index; |
506 | 0 | } |
507 | | |
508 | | #endif // GUEST_PAGING_LEVELS >= 4 |
509 | | |
510 | | |
511 | | /**************************************************************************/ |
512 | | /* Function which computes shadow entries from their corresponding guest |
513 | | * entries. This is the "heart" of the shadow code. It operates using |
514 | | * level-1 shadow types, but handles all levels of entry. |
515 | | * Don't call it directly, but use the four wrappers below. |
516 | | */ |
517 | | |
518 | | static always_inline void |
519 | | _sh_propagate(struct vcpu *v, |
520 | | guest_intpte_t guest_intpte, |
521 | | mfn_t target_mfn, |
522 | | void *shadow_entry_ptr, |
523 | | int level, |
524 | | fetch_type_t ft, |
525 | | p2m_type_t p2mt) |
526 | 0 | { |
527 | 0 | guest_l1e_t guest_entry = { guest_intpte }; |
528 | 0 | shadow_l1e_t *sp = shadow_entry_ptr; |
529 | 0 | struct domain *d = v->domain; |
530 | 0 | struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; |
531 | 0 | gfn_t target_gfn = guest_l1e_get_gfn(guest_entry); |
532 | 0 | u32 pass_thru_flags; |
533 | 0 | u32 gflags, sflags; |
534 | 0 | bool mmio_mfn; |
535 | 0 |
|
536 | 0 | /* We don't shadow PAE l3s */ |
537 | 0 | ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); |
538 | 0 |
|
539 | 0 | /* Check there's something for the shadows to map to */ |
540 | 0 | if ( (!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) |
541 | 0 | || !gfn_valid(d, target_gfn) ) |
542 | 0 | { |
543 | 0 | *sp = shadow_l1e_empty(); |
544 | 0 | goto done; |
545 | 0 | } |
546 | 0 |
|
547 | 0 | gflags = guest_l1e_get_flags(guest_entry); |
548 | 0 |
|
549 | 0 | if ( unlikely(!(gflags & _PAGE_PRESENT)) ) |
550 | 0 | { |
551 | 0 | #if !(SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
552 | | /* If a guest l1 entry is not present, shadow with the magic |
553 | | * guest-not-present entry. */ |
554 | | if ( level == 1 ) |
555 | | *sp = sh_l1e_gnp(); |
556 | | else |
557 | | #endif /* !OOS */ |
558 | 0 | *sp = shadow_l1e_empty(); |
559 | 0 | goto done; |
560 | 0 | } |
561 | 0 |
|
562 | 0 | if ( level == 1 && p2mt == p2m_mmio_dm ) |
563 | 0 | { |
564 | 0 | /* Guest l1e maps emulated MMIO space */ |
565 | 0 | *sp = sh_l1e_mmio(target_gfn, gflags); |
566 | 0 | if ( !d->arch.paging.shadow.has_fast_mmio_entries ) |
567 | 0 | d->arch.paging.shadow.has_fast_mmio_entries = 1; |
568 | 0 | goto done; |
569 | 0 | } |
570 | 0 |
|
571 | 0 | // Must have a valid target_mfn unless this is a prefetch or an l1 |
572 | 0 | // pointing at MMIO space. In the case of a prefetch, an invalid |
573 | 0 | // mfn means that we can not usefully shadow anything, and so we |
574 | 0 | // return early. |
575 | 0 | // |
576 | 0 | mmio_mfn = !mfn_valid(target_mfn) |
577 | 0 | || (level == 1 |
578 | 0 | && page_get_owner(mfn_to_page(target_mfn)) == dom_io); |
579 | 0 | if ( mmio_mfn |
580 | 0 | && !(level == 1 && (!shadow_mode_refcounts(d) |
581 | 0 | || p2mt == p2m_mmio_direct)) ) |
582 | 0 | { |
583 | 0 | ASSERT((ft == ft_prefetch)); |
584 | 0 | *sp = shadow_l1e_empty(); |
585 | 0 | goto done; |
586 | 0 | } |
587 | 0 |
|
588 | 0 | // Propagate bits from the guest to the shadow. |
589 | 0 | // Some of these may be overwritten, below. |
590 | 0 | // Since we know the guest's PRESENT bit is set, we also set the shadow's |
591 | 0 | // SHADOW_PRESENT bit. |
592 | 0 | // |
593 | 0 | pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER | |
594 | 0 | _PAGE_RW | _PAGE_PRESENT); |
595 | 0 | if ( guest_nx_enabled(v) ) |
596 | 0 | pass_thru_flags |= _PAGE_NX_BIT; |
597 | 0 | if ( level == 1 && !shadow_mode_refcounts(d) && mmio_mfn ) |
598 | 0 | pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT; |
599 | 0 | sflags = gflags & pass_thru_flags; |
600 | 0 |
|
601 | 0 | /* |
602 | 0 | * For HVM domains with direct access to MMIO areas, set the correct |
603 | 0 | * caching attributes in the shadows to match what was asked for. |
604 | 0 | */ |
605 | 0 | if ( (level == 1) && is_hvm_domain(d) && |
606 | 0 | !is_xen_heap_mfn(mfn_x(target_mfn)) ) |
607 | 0 | { |
608 | 0 | int type; |
609 | 0 |
|
610 | 0 | ASSERT(!(sflags & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT))); |
611 | 0 |
|
612 | 0 | /* compute the PAT index for shadow page entry when VT-d is enabled |
613 | 0 | * and device assigned. |
614 | 0 | * 1) direct MMIO: compute the PAT index with gMTRR=UC and gPAT. |
615 | 0 | * 2) if enables snoop control, compute the PAT index as WB. |
616 | 0 | * 3) if disables snoop control, compute the PAT index with |
617 | 0 | * gMTRR and gPAT. |
618 | 0 | */ |
619 | 0 | if ( !mmio_mfn && |
620 | 0 | (type = hvm_get_mem_pinned_cacheattr(d, target_gfn, 0)) >= 0 ) |
621 | 0 | sflags |= pat_type_2_pte_flags(type); |
622 | 0 | else if ( d->arch.hvm_domain.is_in_uc_mode ) |
623 | 0 | sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE); |
624 | 0 | else |
625 | 0 | if ( iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) ) |
626 | 0 | { |
627 | 0 | if ( p2mt == p2m_mmio_direct ) |
628 | 0 | sflags |= get_pat_flags(v, |
629 | 0 | gflags, |
630 | 0 | gfn_to_paddr(target_gfn), |
631 | 0 | pfn_to_paddr(mfn_x(target_mfn)), |
632 | 0 | MTRR_TYPE_UNCACHABLE); |
633 | 0 | else if ( iommu_snoop ) |
634 | 0 | sflags |= pat_type_2_pte_flags(PAT_TYPE_WRBACK); |
635 | 0 | else |
636 | 0 | sflags |= get_pat_flags(v, |
637 | 0 | gflags, |
638 | 0 | gfn_to_paddr(target_gfn), |
639 | 0 | pfn_to_paddr(mfn_x(target_mfn)), |
640 | 0 | NO_HARDCODE_MEM_TYPE); |
641 | 0 | } |
642 | 0 | } |
643 | 0 |
|
644 | 0 | // Set the A&D bits for higher level shadows. |
645 | 0 | // Higher level entries do not, strictly speaking, have dirty bits, but |
646 | 0 | // since we use shadow linear tables, each of these entries may, at some |
647 | 0 | // point in time, also serve as a shadow L1 entry. |
648 | 0 | // By setting both the A&D bits in each of these, we eliminate the burden |
649 | 0 | // on the hardware to update these bits on initial accesses. |
650 | 0 | // |
651 | 0 | if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) ) |
652 | 0 | sflags |= _PAGE_ACCESSED | _PAGE_DIRTY; |
653 | 0 |
|
654 | 0 | // If the A or D bit has not yet been set in the guest, then we must |
655 | 0 | // prevent the corresponding kind of access. |
656 | 0 | // |
657 | 0 | if ( unlikely(!(gflags & _PAGE_ACCESSED)) ) |
658 | 0 | sflags &= ~_PAGE_PRESENT; |
659 | 0 |
|
660 | 0 | /* D bits exist in L1es and PSE L2es */ |
661 | 0 | if ( unlikely(((level == 1) || |
662 | 0 | ((level == 2) && |
663 | 0 | (gflags & _PAGE_PSE) && |
664 | 0 | guest_can_use_l2_superpages(v))) |
665 | 0 | && !(gflags & _PAGE_DIRTY)) ) |
666 | 0 | sflags &= ~_PAGE_RW; |
667 | 0 |
|
668 | 0 | // shadow_mode_log_dirty support |
669 | 0 | // |
670 | 0 | // Only allow the guest write access to a page a) on a demand fault, |
671 | 0 | // or b) if the page is already marked as dirty. |
672 | 0 | // |
673 | 0 | // (We handle log-dirty entirely inside the shadow code, without using the |
674 | 0 | // p2m_ram_logdirty p2m type: only HAP uses that.) |
675 | 0 | if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) ) |
676 | 0 | { |
677 | 0 | if ( mfn_valid(target_mfn) ) { |
678 | 0 | if ( ft & FETCH_TYPE_WRITE ) |
679 | 0 | paging_mark_dirty(d, target_mfn); |
680 | 0 | else if ( !paging_mfn_is_dirty(d, target_mfn) ) |
681 | 0 | sflags &= ~_PAGE_RW; |
682 | 0 | } |
683 | 0 | } |
684 | 0 |
|
685 | 0 | if ( unlikely((level == 1) && dirty_vram |
686 | 0 | && dirty_vram->last_dirty == -1 |
687 | 0 | && gfn_x(target_gfn) >= dirty_vram->begin_pfn |
688 | 0 | && gfn_x(target_gfn) < dirty_vram->end_pfn) ) |
689 | 0 | { |
690 | 0 | if ( ft & FETCH_TYPE_WRITE ) |
691 | 0 | dirty_vram->last_dirty = NOW(); |
692 | 0 | else |
693 | 0 | sflags &= ~_PAGE_RW; |
694 | 0 | } |
695 | 0 |
|
696 | 0 | /* Read-only memory */ |
697 | 0 | if ( p2m_is_readonly(p2mt) ) |
698 | 0 | sflags &= ~_PAGE_RW; |
699 | 0 | else if ( p2mt == p2m_mmio_direct && |
700 | 0 | rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn)) ) |
701 | 0 | { |
702 | 0 | sflags &= ~(_PAGE_RW | _PAGE_PAT); |
703 | 0 | sflags |= _PAGE_PCD | _PAGE_PWT; |
704 | 0 | } |
705 | 0 |
|
706 | 0 | // protect guest page tables |
707 | 0 | // |
708 | 0 | if ( unlikely((level == 1) |
709 | 0 | && sh_mfn_is_a_page_table(target_mfn) |
710 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) |
711 | 0 | /* Unless the page is out of sync and the guest is |
712 | 0 | writing to it. */ |
713 | 0 | && !(mfn_oos_may_write(target_mfn) |
714 | 0 | && (ft == ft_demand_write)) |
715 | 0 | #endif /* OOS */ |
716 | 0 | ) ) |
717 | 0 | sflags &= ~_PAGE_RW; |
718 | 0 |
|
719 | 0 | // PV guests in 64-bit mode use two different page tables for user vs |
720 | 0 | // supervisor permissions, making the guest's _PAGE_USER bit irrelevant. |
721 | 0 | // It is always shadowed as present... |
722 | 0 | if ( (GUEST_PAGING_LEVELS == 4) && !is_pv_32bit_domain(d) |
723 | 0 | && is_pv_domain(d) ) |
724 | 0 | { |
725 | 0 | sflags |= _PAGE_USER; |
726 | 0 | } |
727 | 0 |
|
728 | 0 | *sp = shadow_l1e_from_mfn(target_mfn, sflags); |
729 | 0 |
|
730 | 0 | done: |
731 | 0 | SHADOW_DEBUG(PROPAGATE, |
732 | 0 | "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n", |
733 | 0 | fetch_type_names[ft], level, guest_entry.l1, sp->l1); |
734 | 0 | } |
735 | | |
736 | | |
737 | | /* These four wrappers give us a little bit of type-safety back around |
738 | | * the use of void-* pointers and intpte types in _sh_propagate(), and |
739 | | * allow the compiler to optimize out some level checks. */ |
740 | | |
741 | | #if GUEST_PAGING_LEVELS >= 4 |
742 | | static void |
743 | | l4e_propagate_from_guest(struct vcpu *v, |
744 | | guest_l4e_t gl4e, |
745 | | mfn_t sl3mfn, |
746 | | shadow_l4e_t *sl4e, |
747 | | fetch_type_t ft) |
748 | 0 | { |
749 | 0 | if ( !mfn_eq(sl3mfn, INVALID_MFN) && |
750 | 0 | (guest_l4e_get_flags(gl4e) & _PAGE_PRESENT) ) |
751 | 0 | ASSERT(!guest_l4e_rsvd_bits(v, gl4e)); |
752 | 0 |
|
753 | 0 | _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw); |
754 | 0 | } |
755 | | |
756 | | static void |
757 | | l3e_propagate_from_guest(struct vcpu *v, |
758 | | guest_l3e_t gl3e, |
759 | | mfn_t sl2mfn, |
760 | | shadow_l3e_t *sl3e, |
761 | | fetch_type_t ft) |
762 | 0 | { |
763 | 0 | if ( !mfn_eq(sl2mfn, INVALID_MFN) && |
764 | 0 | (guest_l3e_get_flags(gl3e) & _PAGE_PRESENT) ) |
765 | 0 | ASSERT(!guest_l3e_rsvd_bits(v, gl3e)); |
766 | 0 |
|
767 | 0 | _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw); |
768 | 0 | } |
769 | | #endif // GUEST_PAGING_LEVELS >= 4 |
770 | | |
771 | | static void |
772 | | l2e_propagate_from_guest(struct vcpu *v, |
773 | | guest_l2e_t gl2e, |
774 | | mfn_t sl1mfn, |
775 | | shadow_l2e_t *sl2e, |
776 | | fetch_type_t ft) |
777 | 0 | { |
778 | 0 | if ( !mfn_eq(sl1mfn, INVALID_MFN) && |
779 | 0 | (guest_l2e_get_flags(gl2e) & _PAGE_PRESENT) ) |
780 | 0 | ASSERT(!guest_l2e_rsvd_bits(v, gl2e)); |
781 | 0 |
|
782 | 0 | _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw); |
783 | 0 | } |
784 | | |
785 | | static void |
786 | | l1e_propagate_from_guest(struct vcpu *v, |
787 | | guest_l1e_t gl1e, |
788 | | mfn_t gmfn, |
789 | | shadow_l1e_t *sl1e, |
790 | | fetch_type_t ft, |
791 | | p2m_type_t p2mt) |
792 | 0 | { |
793 | 0 | if ( !mfn_eq(gmfn, INVALID_MFN) && |
794 | 0 | (guest_l1e_get_flags(gl1e) & _PAGE_PRESENT) ) |
795 | 0 | ASSERT(!guest_l1e_rsvd_bits(v, gl1e)); |
796 | 0 |
|
797 | 0 | _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt); |
798 | 0 | } |
799 | | |
800 | | |
801 | | /**************************************************************************/ |
802 | | /* These functions update shadow entries (and do bookkeeping on the shadow |
803 | | * tables they are in). It is intended that they are the only |
804 | | * functions which ever write (non-zero) data onto a shadow page. |
805 | | */ |
806 | | |
807 | | static inline void safe_write_entry(void *dst, void *src) |
808 | | /* Copy one PTE safely when processors might be running on the |
809 | | * destination pagetable. This does *not* give safety against |
810 | | * concurrent writes (that's what the paging lock is for), just |
811 | | * stops the hardware picking up partially written entries. */ |
812 | 0 | { |
813 | 0 | volatile unsigned long *d = dst; |
814 | 0 | unsigned long *s = src; |
815 | 0 | ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1))); |
816 | 0 | /* In 64-bit, sizeof(pte) == sizeof(ulong) == 1 word, |
817 | 0 | * which will be an atomic write, since the entry is aligned. */ |
818 | 0 | BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long)); |
819 | 0 | *d = *s; |
820 | 0 | } |
821 | | |
822 | | |
823 | | static inline void |
824 | | shadow_write_entries(void *d, void *s, int entries, mfn_t mfn) |
825 | | /* This function does the actual writes to shadow pages. |
826 | | * It must not be called directly, since it doesn't do the bookkeeping |
827 | | * that shadow_set_l*e() functions do. */ |
828 | 0 | { |
829 | 0 | shadow_l1e_t *dst = d; |
830 | 0 | shadow_l1e_t *src = s; |
831 | 0 | void *map = NULL; |
832 | 0 | int i; |
833 | 0 |
|
834 | 0 | /* Because we mirror access rights at all levels in the shadow, an |
835 | 0 | * l2 (or higher) entry with the RW bit cleared will leave us with |
836 | 0 | * no write access through the linear map. |
837 | 0 | * We detect that by writing to the shadow with copy_to_user() and |
838 | 0 | * using map_domain_page() to get a writeable mapping if we need to. */ |
839 | 0 | if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 ) |
840 | 0 | { |
841 | 0 | perfc_incr(shadow_linear_map_failed); |
842 | 0 | map = map_domain_page(mfn); |
843 | 0 | dst = map + ((unsigned long)dst & (PAGE_SIZE - 1)); |
844 | 0 | } |
845 | 0 |
|
846 | 0 |
|
847 | 0 | for ( i = 0; i < entries; i++ ) |
848 | 0 | safe_write_entry(dst++, src++); |
849 | 0 |
|
850 | 0 | if ( map != NULL ) unmap_domain_page(map); |
851 | 0 | } |
852 | | |
853 | | /* type is only used to distinguish grant map pages from ordinary RAM |
854 | | * i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw. */ |
855 | | static int inline |
856 | | shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type) |
857 | 0 | { |
858 | 0 | int res; |
859 | 0 | mfn_t mfn; |
860 | 0 | struct domain *owner; |
861 | 0 |
|
862 | 0 | ASSERT(!sh_l1e_is_magic(sl1e)); |
863 | 0 |
|
864 | 0 | if ( !shadow_mode_refcounts(d) ) |
865 | 0 | return 1; |
866 | 0 |
|
867 | 0 | res = get_page_from_l1e(sl1e, d, d); |
868 | 0 |
|
869 | 0 | // If a privileged domain is attempting to install a map of a page it does |
870 | 0 | // not own, we let it succeed anyway. |
871 | 0 | // |
872 | 0 | if ( unlikely(res < 0) && |
873 | 0 | !shadow_mode_translate(d) && |
874 | 0 | mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) && |
875 | 0 | (owner = page_get_owner(mfn_to_page(mfn))) && |
876 | 0 | (d != owner) ) |
877 | 0 | { |
878 | 0 | res = xsm_priv_mapping(XSM_TARGET, d, owner); |
879 | 0 | if ( !res ) { |
880 | 0 | res = get_page_from_l1e(sl1e, d, owner); |
881 | 0 | SHADOW_PRINTK("privileged domain %d installs map of mfn %"PRI_mfn" " |
882 | 0 | "which is owned by d%d: %s\n", |
883 | 0 | d->domain_id, mfn_x(mfn), owner->domain_id, |
884 | 0 | res >= 0 ? "success" : "failed"); |
885 | 0 | } |
886 | 0 | } |
887 | 0 |
|
888 | 0 | /* Okay, it might still be a grant mapping PTE. Try it. */ |
889 | 0 | if ( unlikely(res < 0) && |
890 | 0 | (type == p2m_grant_map_rw || |
891 | 0 | (type == p2m_grant_map_ro && |
892 | 0 | !(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) ) |
893 | 0 | { |
894 | 0 | /* It's a grant mapping. The grant table implementation will |
895 | 0 | already have checked that we're supposed to have access, so |
896 | 0 | we can just grab a reference directly. */ |
897 | 0 | mfn = shadow_l1e_get_mfn(sl1e); |
898 | 0 | if ( mfn_valid(mfn) ) |
899 | 0 | res = get_page_from_l1e(sl1e, d, page_get_owner(mfn_to_page(mfn))); |
900 | 0 | } |
901 | 0 |
|
902 | 0 | if ( unlikely(res < 0) ) |
903 | 0 | { |
904 | 0 | perfc_incr(shadow_get_page_fail); |
905 | 0 | SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n"); |
906 | 0 | } |
907 | 0 |
|
908 | 0 | return res; |
909 | 0 | } |
910 | | |
911 | | static void inline |
912 | | shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d) |
913 | 0 | { |
914 | 0 | if ( !shadow_mode_refcounts(d) ) |
915 | 0 | return; |
916 | 0 |
|
917 | 0 | put_page_from_l1e(sl1e, d); |
918 | 0 | } |
919 | | |
920 | | #if GUEST_PAGING_LEVELS >= 4 |
921 | | static int shadow_set_l4e(struct domain *d, |
922 | | shadow_l4e_t *sl4e, |
923 | | shadow_l4e_t new_sl4e, |
924 | | mfn_t sl4mfn) |
925 | 0 | { |
926 | 0 | int flags = 0, ok; |
927 | 0 | shadow_l4e_t old_sl4e; |
928 | 0 | paddr_t paddr; |
929 | 0 | ASSERT(sl4e != NULL); |
930 | 0 | old_sl4e = *sl4e; |
931 | 0 |
|
932 | 0 | if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */ |
933 | 0 |
|
934 | 0 | paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) |
935 | 0 | | (((unsigned long)sl4e) & ~PAGE_MASK)); |
936 | 0 |
|
937 | 0 | if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) |
938 | 0 | { |
939 | 0 | /* About to install a new reference */ |
940 | 0 | mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e); |
941 | 0 | ok = sh_get_ref(d, sl3mfn, paddr); |
942 | 0 | /* Are we pinning l3 shadows to handle wierd linux behaviour? */ |
943 | 0 | if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) |
944 | 0 | ok |= sh_pin(d, sl3mfn); |
945 | 0 | if ( !ok ) |
946 | 0 | { |
947 | 0 | domain_crash(d); |
948 | 0 | return SHADOW_SET_ERROR; |
949 | 0 | } |
950 | 0 | } |
951 | 0 |
|
952 | 0 | /* Write the new entry */ |
953 | 0 | shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn); |
954 | 0 | flags |= SHADOW_SET_CHANGED; |
955 | 0 |
|
956 | 0 | if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT ) |
957 | 0 | { |
958 | 0 | /* We lost a reference to an old mfn. */ |
959 | 0 | mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e); |
960 | 0 | if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e))) |
961 | 0 | || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e), |
962 | 0 | shadow_l4e_get_flags(new_sl4e)) ) |
963 | 0 | { |
964 | 0 | flags |= SHADOW_SET_FLUSH; |
965 | 0 | } |
966 | 0 | sh_put_ref(d, osl3mfn, paddr); |
967 | 0 | } |
968 | 0 | return flags; |
969 | 0 | } |
970 | | |
971 | | static int shadow_set_l3e(struct domain *d, |
972 | | shadow_l3e_t *sl3e, |
973 | | shadow_l3e_t new_sl3e, |
974 | | mfn_t sl3mfn) |
975 | 0 | { |
976 | 0 | int flags = 0; |
977 | 0 | shadow_l3e_t old_sl3e; |
978 | 0 | paddr_t paddr; |
979 | 0 | ASSERT(sl3e != NULL); |
980 | 0 | old_sl3e = *sl3e; |
981 | 0 |
|
982 | 0 | if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */ |
983 | 0 |
|
984 | 0 | paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) |
985 | 0 | | (((unsigned long)sl3e) & ~PAGE_MASK)); |
986 | 0 |
|
987 | 0 | if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT ) |
988 | 0 | { |
989 | 0 | /* About to install a new reference */ |
990 | 0 | if ( !sh_get_ref(d, shadow_l3e_get_mfn(new_sl3e), paddr) ) |
991 | 0 | { |
992 | 0 | domain_crash(d); |
993 | 0 | return SHADOW_SET_ERROR; |
994 | 0 | } |
995 | 0 | } |
996 | 0 |
|
997 | 0 | /* Write the new entry */ |
998 | 0 | shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn); |
999 | 0 | flags |= SHADOW_SET_CHANGED; |
1000 | 0 |
|
1001 | 0 | if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) |
1002 | 0 | { |
1003 | 0 | /* We lost a reference to an old mfn. */ |
1004 | 0 | mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e); |
1005 | 0 | if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) || |
1006 | 0 | !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e), |
1007 | 0 | shadow_l3e_get_flags(new_sl3e)) ) |
1008 | 0 | { |
1009 | 0 | flags |= SHADOW_SET_FLUSH; |
1010 | 0 | } |
1011 | 0 | sh_put_ref(d, osl2mfn, paddr); |
1012 | 0 | } |
1013 | 0 | return flags; |
1014 | 0 | } |
1015 | | #endif /* GUEST_PAGING_LEVELS >= 4 */ |
1016 | | |
1017 | | static int shadow_set_l2e(struct domain *d, |
1018 | | shadow_l2e_t *sl2e, |
1019 | | shadow_l2e_t new_sl2e, |
1020 | | mfn_t sl2mfn) |
1021 | 0 | { |
1022 | 0 | int flags = 0; |
1023 | 0 | shadow_l2e_t old_sl2e; |
1024 | 0 | paddr_t paddr; |
1025 | 0 |
|
1026 | 0 | #if GUEST_PAGING_LEVELS == 2 |
1027 | 0 | /* In 2-on-3 we work with pairs of l2es pointing at two-page |
1028 | 0 | * shadows. Reference counting and up-pointers track from the first |
1029 | 0 | * page of the shadow to the first l2e, so make sure that we're |
1030 | 0 | * working with those: |
1031 | 0 | * Start with a pair of identical entries */ |
1032 | 0 | shadow_l2e_t pair[2] = { new_sl2e, new_sl2e }; |
1033 | 0 | /* Align the pointer down so it's pointing at the first of the pair */ |
1034 | 0 | sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t))); |
1035 | 0 | #endif |
1036 | 0 |
|
1037 | 0 | ASSERT(sl2e != NULL); |
1038 | 0 | old_sl2e = *sl2e; |
1039 | 0 |
|
1040 | 0 | if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */ |
1041 | 0 |
|
1042 | 0 | paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT) |
1043 | 0 | | (((unsigned long)sl2e) & ~PAGE_MASK)); |
1044 | 0 |
|
1045 | 0 | if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) |
1046 | 0 | { |
1047 | 0 | mfn_t sl1mfn = shadow_l2e_get_mfn(new_sl2e); |
1048 | 0 | ASSERT(mfn_to_page(sl1mfn)->u.sh.head); |
1049 | 0 |
|
1050 | 0 | /* About to install a new reference */ |
1051 | 0 | if ( !sh_get_ref(d, sl1mfn, paddr) ) |
1052 | 0 | { |
1053 | 0 | domain_crash(d); |
1054 | 0 | return SHADOW_SET_ERROR; |
1055 | 0 | } |
1056 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
1057 | 0 | { |
1058 | 0 | struct page_info *sp = mfn_to_page(sl1mfn); |
1059 | 0 | mfn_t gl1mfn; |
1060 | 0 |
|
1061 | 0 | ASSERT(sp->u.sh.head); |
1062 | 0 | gl1mfn = backpointer(sp); |
1063 | 0 | /* If the shadow is a fl1 then the backpointer contains |
1064 | 0 | the GFN instead of the GMFN, and it's definitely not |
1065 | 0 | OOS. */ |
1066 | 0 | if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn) |
1067 | 0 | && mfn_is_out_of_sync(gl1mfn) ) |
1068 | 0 | sh_resync(d, gl1mfn); |
1069 | 0 | } |
1070 | 0 | #endif |
1071 | 0 | #if GUEST_PAGING_LEVELS == 2 |
1072 | 0 | /* Update the second entry to point tio the second half of the l1 */ |
1073 | 0 | sl1mfn = sh_next_page(sl1mfn); |
1074 | 0 | pair[1] = shadow_l2e_from_mfn(sl1mfn, shadow_l2e_get_flags(new_sl2e)); |
1075 | 0 | #endif |
1076 | 0 | } |
1077 | 0 |
|
1078 | 0 | /* Write the new entry */ |
1079 | 0 | #if GUEST_PAGING_LEVELS == 2 |
1080 | 0 | shadow_write_entries(sl2e, &pair, 2, sl2mfn); |
1081 | 0 | #else /* normal case */ |
1082 | | shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn); |
1083 | | #endif |
1084 | 0 | flags |= SHADOW_SET_CHANGED; |
1085 | 0 |
|
1086 | 0 | if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT ) |
1087 | 0 | { |
1088 | 0 | /* We lost a reference to an old mfn. */ |
1089 | 0 | mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e); |
1090 | 0 | if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) || |
1091 | 0 | !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e), |
1092 | 0 | shadow_l2e_get_flags(new_sl2e)) ) |
1093 | 0 | { |
1094 | 0 | flags |= SHADOW_SET_FLUSH; |
1095 | 0 | } |
1096 | 0 | sh_put_ref(d, osl1mfn, paddr); |
1097 | 0 | } |
1098 | 0 | return flags; |
1099 | 0 | } |
1100 | | |
1101 | | static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e, |
1102 | | shadow_l1e_t *sl1e, |
1103 | | mfn_t sl1mfn, |
1104 | | struct domain *d) |
1105 | 0 | { |
1106 | 0 | mfn_t mfn = shadow_l1e_get_mfn(new_sl1e); |
1107 | 0 | int flags = shadow_l1e_get_flags(new_sl1e); |
1108 | 0 | unsigned long gfn; |
1109 | 0 | struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; |
1110 | 0 |
|
1111 | 0 | if ( !dirty_vram /* tracking disabled? */ |
1112 | 0 | || !(flags & _PAGE_RW) /* read-only mapping? */ |
1113 | 0 | || !mfn_valid(mfn) ) /* mfn can be invalid in mmio_direct */ |
1114 | 0 | return; |
1115 | 0 |
|
1116 | 0 | gfn = mfn_to_gfn(d, mfn); |
1117 | 0 | /* Page sharing not supported on shadow PTs */ |
1118 | 0 | BUG_ON(SHARED_M2P(gfn)); |
1119 | 0 |
|
1120 | 0 | if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) ) |
1121 | 0 | { |
1122 | 0 | unsigned long i = gfn - dirty_vram->begin_pfn; |
1123 | 0 | struct page_info *page = mfn_to_page(mfn); |
1124 | 0 |
|
1125 | 0 | if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) |
1126 | 0 | /* Initial guest reference, record it */ |
1127 | 0 | dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn)) |
1128 | 0 | | ((unsigned long)sl1e & ~PAGE_MASK); |
1129 | 0 | } |
1130 | 0 | } |
1131 | | |
1132 | | static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e, |
1133 | | shadow_l1e_t *sl1e, |
1134 | | mfn_t sl1mfn, |
1135 | | struct domain *d) |
1136 | 0 | { |
1137 | 0 | mfn_t mfn = shadow_l1e_get_mfn(old_sl1e); |
1138 | 0 | int flags = shadow_l1e_get_flags(old_sl1e); |
1139 | 0 | unsigned long gfn; |
1140 | 0 | struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; |
1141 | 0 |
|
1142 | 0 | if ( !dirty_vram /* tracking disabled? */ |
1143 | 0 | || !(flags & _PAGE_RW) /* read-only mapping? */ |
1144 | 0 | || !mfn_valid(mfn) ) /* mfn can be invalid in mmio_direct */ |
1145 | 0 | return; |
1146 | 0 |
|
1147 | 0 | gfn = mfn_to_gfn(d, mfn); |
1148 | 0 | /* Page sharing not supported on shadow PTs */ |
1149 | 0 | BUG_ON(SHARED_M2P(gfn)); |
1150 | 0 |
|
1151 | 0 | if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) ) |
1152 | 0 | { |
1153 | 0 | unsigned long i = gfn - dirty_vram->begin_pfn; |
1154 | 0 | struct page_info *page = mfn_to_page(mfn); |
1155 | 0 | int dirty = 0; |
1156 | 0 | paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) |
1157 | 0 | | ((unsigned long)sl1e & ~PAGE_MASK); |
1158 | 0 |
|
1159 | 0 | if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) |
1160 | 0 | { |
1161 | 0 | /* Last reference */ |
1162 | 0 | if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) { |
1163 | 0 | /* We didn't know it was that one, let's say it is dirty */ |
1164 | 0 | dirty = 1; |
1165 | 0 | } |
1166 | 0 | else |
1167 | 0 | { |
1168 | 0 | ASSERT(dirty_vram->sl1ma[i] == sl1ma); |
1169 | 0 | dirty_vram->sl1ma[i] = INVALID_PADDR; |
1170 | 0 | if ( flags & _PAGE_DIRTY ) |
1171 | 0 | dirty = 1; |
1172 | 0 | } |
1173 | 0 | } |
1174 | 0 | else |
1175 | 0 | { |
1176 | 0 | /* We had more than one reference, just consider the page dirty. */ |
1177 | 0 | dirty = 1; |
1178 | 0 | /* Check that it's not the one we recorded. */ |
1179 | 0 | if ( dirty_vram->sl1ma[i] == sl1ma ) |
1180 | 0 | { |
1181 | 0 | /* Too bad, we remembered the wrong one... */ |
1182 | 0 | dirty_vram->sl1ma[i] = INVALID_PADDR; |
1183 | 0 | } |
1184 | 0 | else |
1185 | 0 | { |
1186 | 0 | /* Ok, our recorded sl1e is still pointing to this page, let's |
1187 | 0 | * just hope it will remain. */ |
1188 | 0 | } |
1189 | 0 | } |
1190 | 0 | if ( dirty ) |
1191 | 0 | { |
1192 | 0 | dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8); |
1193 | 0 | dirty_vram->last_dirty = NOW(); |
1194 | 0 | } |
1195 | 0 | } |
1196 | 0 | } |
1197 | | |
1198 | | static int shadow_set_l1e(struct domain *d, |
1199 | | shadow_l1e_t *sl1e, |
1200 | | shadow_l1e_t new_sl1e, |
1201 | | p2m_type_t new_type, |
1202 | | mfn_t sl1mfn) |
1203 | 0 | { |
1204 | 0 | int flags = 0; |
1205 | 0 | shadow_l1e_t old_sl1e; |
1206 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC |
1207 | 0 | mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e); |
1208 | 0 | #endif |
1209 | 0 | ASSERT(sl1e != NULL); |
1210 | 0 |
|
1211 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC |
1212 | 0 | if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn) |
1213 | 0 | && ((shadow_l1e_get_flags(new_sl1e) & (_PAGE_RW|_PAGE_PRESENT)) |
1214 | 0 | == (_PAGE_RW|_PAGE_PRESENT)) ) |
1215 | 0 | oos_fixup_add(d, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e)); |
1216 | 0 | #endif |
1217 | 0 |
|
1218 | 0 | old_sl1e = *sl1e; |
1219 | 0 |
|
1220 | 0 | if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */ |
1221 | 0 |
|
1222 | 0 | if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT) |
1223 | 0 | && !sh_l1e_is_magic(new_sl1e) ) |
1224 | 0 | { |
1225 | 0 | /* About to install a new reference */ |
1226 | 0 | if ( shadow_mode_refcounts(d) ) |
1227 | 0 | { |
1228 | 0 | #define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT) |
1229 | 0 | int rc; |
1230 | 0 |
|
1231 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF); |
1232 | 0 | switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) ) |
1233 | 0 | { |
1234 | 0 | default: |
1235 | 0 | /* Doesn't look like a pagetable. */ |
1236 | 0 | flags |= SHADOW_SET_ERROR; |
1237 | 0 | new_sl1e = shadow_l1e_empty(); |
1238 | 0 | break; |
1239 | 0 | case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE: |
1240 | 0 | ASSERT(!(rc & ~PAGE_FLIPPABLE)); |
1241 | 0 | new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc); |
1242 | 0 | /* fall through */ |
1243 | 0 | case 0: |
1244 | 0 | shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); |
1245 | 0 | break; |
1246 | 0 | } |
1247 | 0 | #undef PAGE_FLIPPABLE |
1248 | 0 | } |
1249 | 0 | } |
1250 | 0 |
|
1251 | 0 | /* Write the new entry */ |
1252 | 0 | shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn); |
1253 | 0 | flags |= SHADOW_SET_CHANGED; |
1254 | 0 |
|
1255 | 0 | if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT) |
1256 | 0 | && !sh_l1e_is_magic(old_sl1e) ) |
1257 | 0 | { |
1258 | 0 | /* We lost a reference to an old mfn. */ |
1259 | 0 | /* N.B. Unlike higher-level sets, never need an extra flush |
1260 | 0 | * when writing an l1e. Because it points to the same guest frame |
1261 | 0 | * as the guest l1e did, it's the guest's responsibility to |
1262 | 0 | * trigger a flush later. */ |
1263 | 0 | if ( shadow_mode_refcounts(d) ) |
1264 | 0 | { |
1265 | 0 | shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d); |
1266 | 0 | shadow_put_page_from_l1e(old_sl1e, d); |
1267 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF); |
1268 | 0 | } |
1269 | 0 | } |
1270 | 0 | return flags; |
1271 | 0 | } |
1272 | | |
1273 | | |
1274 | | /**************************************************************************/ |
1275 | | /* Macros to walk pagetables. These take the shadow of a pagetable and |
1276 | | * walk every "interesting" entry. That is, they don't touch Xen mappings, |
1277 | | * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every |
1278 | | * second entry (since pairs of entries are managed together). For multi-page |
1279 | | * shadows they walk all pages. |
1280 | | * |
1281 | | * Arguments are an MFN, the variable to point to each entry, a variable |
1282 | | * to indicate that we are done (we will shortcut to the end of the scan |
1283 | | * when _done != 0), a variable to indicate that we should avoid Xen mappings, |
1284 | | * and the code. |
1285 | | * |
1286 | | * WARNING: These macros have side-effects. They change the values of both |
1287 | | * the pointer and the MFN. */ |
1288 | | |
1289 | | static inline void increment_ptr_to_guest_entry(void *ptr) |
1290 | 0 | { |
1291 | 0 | if ( ptr ) |
1292 | 0 | { |
1293 | 0 | guest_l1e_t **entry = ptr; |
1294 | 0 | (*entry)++; |
1295 | 0 | } |
1296 | 0 | } |
1297 | | |
1298 | | /* All kinds of l1: touch all entries */ |
1299 | 0 | #define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \ |
1300 | 0 | do { \ |
1301 | 0 | int _i; \ |
1302 | 0 | shadow_l1e_t *_sp = map_domain_page((_sl1mfn)); \ |
1303 | 0 | ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow \ |
1304 | 0 | || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\ |
1305 | 0 | for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \ |
1306 | 0 | { \ |
1307 | 0 | (_sl1e) = _sp + _i; \ |
1308 | 0 | if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT ) \ |
1309 | 0 | {_code} \ |
1310 | 0 | if ( _done ) break; \ |
1311 | 0 | increment_ptr_to_guest_entry(_gl1p); \ |
1312 | 0 | } \ |
1313 | 0 | unmap_domain_page(_sp); \ |
1314 | 0 | } while (0) |
1315 | | |
1316 | | /* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */ |
1317 | | #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2 |
1318 | 0 | #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \ |
1319 | 0 | do { \ |
1320 | 0 | int __done = 0; \ |
1321 | 0 | _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \ |
1322 | 0 | ({ (__done = _done); }), _code); \ |
1323 | 0 | _sl1mfn = sh_next_page(_sl1mfn); \ |
1324 | 0 | if ( !__done ) \ |
1325 | 0 | _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \ |
1326 | 0 | ({ (__done = _done); }), _code); \ |
1327 | 0 | } while (0) |
1328 | | #else /* Everything else; l1 shadows are only one page */ |
1329 | | #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \ |
1330 | 0 | _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) |
1331 | | #endif |
1332 | | |
1333 | | |
1334 | | #if GUEST_PAGING_LEVELS == 2 |
1335 | | |
1336 | | /* 32-bit l2 on PAE/64: four pages, touch every second entry */ |
1337 | 0 | #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \ |
1338 | 0 | do { \ |
1339 | 0 | int _i, _j, __done = 0; \ |
1340 | 0 | ASSERT(shadow_mode_external(_dom)); \ |
1341 | 0 | ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow); \ |
1342 | 0 | for ( _j = 0; _j < 4 && !__done; _j++ ) \ |
1343 | 0 | { \ |
1344 | 0 | shadow_l2e_t *_sp = map_domain_page(_sl2mfn); \ |
1345 | 0 | for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 ) \ |
1346 | 0 | { \ |
1347 | 0 | (_sl2e) = _sp + _i; \ |
1348 | 0 | if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \ |
1349 | 0 | {_code} \ |
1350 | 0 | if ( (__done = (_done)) ) break; \ |
1351 | 0 | increment_ptr_to_guest_entry(_gl2p); \ |
1352 | 0 | } \ |
1353 | 0 | unmap_domain_page(_sp); \ |
1354 | 0 | if ( _j < 3 ) _sl2mfn = sh_next_page(_sl2mfn); \ |
1355 | 0 | } \ |
1356 | 0 | } while (0) |
1357 | | |
1358 | | #elif GUEST_PAGING_LEVELS == 3 |
1359 | | |
1360 | | /* PAE: touch all entries */ |
1361 | 0 | #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \ |
1362 | 0 | do { \ |
1363 | 0 | int _i; \ |
1364 | 0 | shadow_l2e_t *_sp = map_domain_page((_sl2mfn)); \ |
1365 | 0 | ASSERT(shadow_mode_external(_dom)); \ |
1366 | 0 | ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \ |
1367 | 0 | || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow); \ |
1368 | 0 | for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \ |
1369 | 0 | { \ |
1370 | 0 | (_sl2e) = _sp + _i; \ |
1371 | 0 | if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \ |
1372 | 0 | {_code} \ |
1373 | 0 | if ( _done ) break; \ |
1374 | 0 | increment_ptr_to_guest_entry(_gl2p); \ |
1375 | 0 | } \ |
1376 | 0 | unmap_domain_page(_sp); \ |
1377 | 0 | } while (0) |
1378 | | |
1379 | | #else |
1380 | | |
1381 | | /* 64-bit l2: touch all entries except for PAE compat guests. */ |
1382 | 0 | #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \ |
1383 | 0 | do { \ |
1384 | 0 | int _i; \ |
1385 | 0 | int _xen = !shadow_mode_external(_dom); \ |
1386 | 0 | shadow_l2e_t *_sp = map_domain_page((_sl2mfn)); \ |
1387 | 0 | ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\ |
1388 | 0 | mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\ |
1389 | 0 | for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \ |
1390 | 0 | { \ |
1391 | 0 | if ( (!(_xen)) \ |
1392 | 0 | || !is_pv_32bit_domain(_dom) \ |
1393 | 0 | || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow \ |
1394 | 0 | || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \ |
1395 | 0 | { \ |
1396 | 0 | (_sl2e) = _sp + _i; \ |
1397 | 0 | if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \ |
1398 | 0 | {_code} \ |
1399 | 0 | if ( _done ) break; \ |
1400 | 0 | increment_ptr_to_guest_entry(_gl2p); \ |
1401 | 0 | } \ |
1402 | 0 | } \ |
1403 | 0 | unmap_domain_page(_sp); \ |
1404 | 0 | } while (0) |
1405 | | |
1406 | | #endif /* different kinds of l2 */ |
1407 | | |
1408 | | #if GUEST_PAGING_LEVELS == 4 |
1409 | | |
1410 | | /* 64-bit l3: touch all entries */ |
1411 | 0 | #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \ |
1412 | 0 | do { \ |
1413 | 0 | int _i; \ |
1414 | 0 | shadow_l3e_t *_sp = map_domain_page((_sl3mfn)); \ |
1415 | 0 | ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\ |
1416 | 0 | for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \ |
1417 | 0 | { \ |
1418 | 0 | (_sl3e) = _sp + _i; \ |
1419 | 0 | if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \ |
1420 | 0 | {_code} \ |
1421 | 0 | if ( _done ) break; \ |
1422 | 0 | increment_ptr_to_guest_entry(_gl3p); \ |
1423 | 0 | } \ |
1424 | 0 | unmap_domain_page(_sp); \ |
1425 | 0 | } while (0) |
1426 | | |
1427 | | /* 64-bit l4: avoid Xen mappings */ |
1428 | 0 | #define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _dom, _code) \ |
1429 | 0 | do { \ |
1430 | 0 | shadow_l4e_t *_sp = map_domain_page((_sl4mfn)); \ |
1431 | 0 | int _xen = !shadow_mode_external(_dom); \ |
1432 | 0 | int _i; \ |
1433 | 0 | ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\ |
1434 | 0 | for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \ |
1435 | 0 | { \ |
1436 | 0 | if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \ |
1437 | 0 | { \ |
1438 | 0 | (_sl4e) = _sp + _i; \ |
1439 | 0 | if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT ) \ |
1440 | 0 | {_code} \ |
1441 | 0 | if ( _done ) break; \ |
1442 | 0 | } \ |
1443 | 0 | increment_ptr_to_guest_entry(_gl4p); \ |
1444 | 0 | } \ |
1445 | 0 | unmap_domain_page(_sp); \ |
1446 | 0 | } while (0) |
1447 | | |
1448 | | #endif |
1449 | | |
1450 | | |
1451 | | /**************************************************************************/ |
1452 | | /* Create a shadow of a given guest page. |
1453 | | */ |
1454 | | static mfn_t |
1455 | | sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type) |
1456 | 0 | { |
1457 | 0 | struct domain *d = v->domain; |
1458 | 0 | mfn_t smfn = shadow_alloc(d, shadow_type, mfn_x(gmfn)); |
1459 | 0 | SHADOW_DEBUG(MAKE_SHADOW, "(%"PRI_mfn", %u)=>%"PRI_mfn"\n", |
1460 | 0 | mfn_x(gmfn), shadow_type, mfn_x(smfn)); |
1461 | 0 |
|
1462 | 0 | if ( sh_type_has_up_pointer(d, shadow_type) ) |
1463 | 0 | /* Lower-level shadow, not yet linked form a higher level */ |
1464 | 0 | mfn_to_page(smfn)->up = 0; |
1465 | 0 |
|
1466 | 0 | #if GUEST_PAGING_LEVELS == 4 |
1467 | | #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) |
1468 | | if ( shadow_type == SH_type_l4_64_shadow && |
1469 | | unlikely(d->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) |
1470 | | { |
1471 | | /* We're shadowing a new l4, but we've been assuming the guest uses |
1472 | | * only one l4 per vcpu and context switches using an l4 entry. |
1473 | | * Count the number of active l4 shadows. If there are enough |
1474 | | * of them, decide that this isn't an old linux guest, and stop |
1475 | | * pinning l3es. This is not very quick but it doesn't happen |
1476 | | * very often. */ |
1477 | | struct page_info *sp, *t; |
1478 | | struct vcpu *v2; |
1479 | | int l4count = 0, vcpus = 0; |
1480 | | page_list_for_each(sp, &d->arch.paging.shadow.pinned_shadows) |
1481 | | { |
1482 | | if ( sp->u.sh.type == SH_type_l4_64_shadow ) |
1483 | | l4count++; |
1484 | | } |
1485 | | for_each_vcpu ( d, v2 ) |
1486 | | vcpus++; |
1487 | | if ( l4count > 2 * vcpus ) |
1488 | | { |
1489 | | /* Unpin all the pinned l3 tables, and don't pin any more. */ |
1490 | | page_list_for_each_safe(sp, t, &d->arch.paging.shadow.pinned_shadows) |
1491 | | { |
1492 | | if ( sp->u.sh.type == SH_type_l3_64_shadow ) |
1493 | | sh_unpin(d, page_to_mfn(sp)); |
1494 | | } |
1495 | | d->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; |
1496 | | sh_reset_l3_up_pointers(v); |
1497 | | } |
1498 | | } |
1499 | | #endif |
1500 | | #endif |
1501 | 0 |
|
1502 | 0 | // Create the Xen mappings... |
1503 | 0 | if ( !shadow_mode_external(d) ) |
1504 | 0 | { |
1505 | 0 | switch (shadow_type) |
1506 | 0 | { |
1507 | 0 | #if GUEST_PAGING_LEVELS == 4 |
1508 | | case SH_type_l4_shadow: |
1509 | | { |
1510 | | shadow_l4e_t *l4t = map_domain_page(smfn); |
1511 | | |
1512 | | BUILD_BUG_ON(sizeof(l4_pgentry_t) != sizeof(shadow_l4e_t)); |
1513 | | |
1514 | | init_xen_l4_slots(l4t, gmfn, d, smfn, (!is_pv_32bit_domain(d) && |
1515 | | VM_ASSIST(d, m2p_strict))); |
1516 | | unmap_domain_page(l4t); |
1517 | | } |
1518 | | break; |
1519 | | #endif |
1520 | 0 | #if GUEST_PAGING_LEVELS >= 3 |
1521 | | case SH_type_l2h_shadow: |
1522 | | BUILD_BUG_ON(sizeof(l2_pgentry_t) != sizeof(shadow_l2e_t)); |
1523 | | if ( is_pv_32bit_domain(d) ) |
1524 | | { |
1525 | | shadow_l2e_t *l2t = map_domain_page(smfn); |
1526 | | |
1527 | | init_xen_pae_l2_slots(l2t, d); |
1528 | | unmap_domain_page(l2t); |
1529 | | } |
1530 | | break; |
1531 | | #endif |
1532 | 0 | default: /* Do nothing */ break; |
1533 | 0 | } |
1534 | 0 | } |
1535 | 0 |
|
1536 | 0 | shadow_promote(d, gmfn, shadow_type); |
1537 | 0 | set_shadow_status(d, gmfn, shadow_type, smfn); |
1538 | 0 |
|
1539 | 0 | return smfn; |
1540 | 0 | } |
1541 | | |
1542 | | /* Make a splintered superpage shadow */ |
1543 | | static mfn_t |
1544 | | make_fl1_shadow(struct domain *d, gfn_t gfn) |
1545 | 0 | { |
1546 | 0 | mfn_t smfn = shadow_alloc(d, SH_type_fl1_shadow, gfn_x(gfn)); |
1547 | 0 |
|
1548 | 0 | SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n", |
1549 | 0 | gfn_x(gfn), mfn_x(smfn)); |
1550 | 0 |
|
1551 | 0 | set_fl1_shadow_status(d, gfn, smfn); |
1552 | 0 | return smfn; |
1553 | 0 | } |
1554 | | |
1555 | | |
1556 | | #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS |
1557 | | mfn_t |
1558 | | sh_make_monitor_table(struct vcpu *v) |
1559 | 0 | { |
1560 | 0 | struct domain *d = v->domain; |
1561 | 0 |
|
1562 | 0 | ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); |
1563 | 0 |
|
1564 | 0 | /* Guarantee we can get the memory we need */ |
1565 | 0 | shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS); |
1566 | 0 |
|
1567 | 0 | { |
1568 | 0 | mfn_t m4mfn; |
1569 | 0 | l4_pgentry_t *l4e; |
1570 | 0 |
|
1571 | 0 | m4mfn = shadow_alloc(d, SH_type_monitor_table, 0); |
1572 | 0 | mfn_to_page(m4mfn)->shadow_flags = 4; |
1573 | 0 |
|
1574 | 0 | l4e = map_domain_page(m4mfn); |
1575 | 0 |
|
1576 | 0 | /* |
1577 | 0 | * Create a self-linear mapping, but no shadow-linear mapping. A |
1578 | 0 | * shadow-linear mapping will either be inserted below when creating |
1579 | 0 | * lower level monitor tables, or later in sh_update_cr3(). |
1580 | 0 | */ |
1581 | 0 | init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); |
1582 | 0 |
|
1583 | 0 | #if SHADOW_PAGING_LEVELS < 4 |
1584 | | { |
1585 | | mfn_t m3mfn, m2mfn; |
1586 | | l3_pgentry_t *l3e; |
1587 | | /* Install an l3 table and an l2 table that will hold the shadow |
1588 | | * linear map entries. This overrides the linear map entry that |
1589 | | * was installed by sh_install_xen_entries_in_l4. */ |
1590 | | |
1591 | 0 | m3mfn = shadow_alloc(d, SH_type_monitor_table, 0); |
1592 | 0 | mfn_to_page(m3mfn)->shadow_flags = 3; |
1593 | 0 | l4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] |
1594 | 0 | = l4e_from_mfn(m3mfn, __PAGE_HYPERVISOR_RW); |
1595 | | |
1596 | 0 | m2mfn = shadow_alloc(d, SH_type_monitor_table, 0); |
1597 | 0 | mfn_to_page(m2mfn)->shadow_flags = 2; |
1598 | | l3e = map_domain_page(m3mfn); |
1599 | 0 | l3e[0] = l3e_from_mfn(m2mfn, __PAGE_HYPERVISOR_RW); |
1600 | | unmap_domain_page(l3e); |
1601 | | |
1602 | 0 | if ( is_pv_32bit_domain(d) ) |
1603 | 0 | { |
1604 | 0 | l2_pgentry_t *l2t; |
1605 | 0 |
|
1606 | 0 | /* For 32-bit PV guests, we need to map the 32-bit Xen |
1607 | 0 | * area into its usual VAs in the monitor tables */ |
1608 | 0 | m3mfn = shadow_alloc(d, SH_type_monitor_table, 0); |
1609 | 0 | mfn_to_page(m3mfn)->shadow_flags = 3; |
1610 | 0 | l4e[0] = l4e_from_mfn(m3mfn, __PAGE_HYPERVISOR_RW); |
1611 | 0 |
|
1612 | 0 | m2mfn = shadow_alloc(d, SH_type_monitor_table, 0); |
1613 | 0 | mfn_to_page(m2mfn)->shadow_flags = 2; |
1614 | 0 | l3e = map_domain_page(m3mfn); |
1615 | 0 | l3e[3] = l3e_from_mfn(m2mfn, _PAGE_PRESENT); |
1616 | 0 |
|
1617 | 0 | l2t = map_domain_page(m2mfn); |
1618 | 0 | init_xen_pae_l2_slots(l2t, d); |
1619 | 0 | unmap_domain_page(l2t); |
1620 | 0 |
|
1621 | 0 | unmap_domain_page(l3e); |
1622 | 0 | } |
1623 | | |
1624 | | } |
1625 | | #endif /* SHADOW_PAGING_LEVELS < 4 */ |
1626 | 0 |
|
1627 | 0 | unmap_domain_page(l4e); |
1628 | 0 |
|
1629 | 0 | return m4mfn; |
1630 | 0 | } |
1631 | 0 | } Unexecuted instantiation: sh_make_monitor_table__sh_3 Unexecuted instantiation: sh_make_monitor_table__sh_4 |
1632 | | #endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */ |
1633 | | |
1634 | | /**************************************************************************/ |
1635 | | /* These functions also take a virtual address and return the level-N |
1636 | | * shadow table mfn and entry, but they create the shadow pagetables if |
1637 | | * they are needed. The "demand" argument is non-zero when handling |
1638 | | * a demand fault (so we know what to do about accessed bits &c). |
1639 | | * If the necessary tables are not present in the guest, they return NULL. */ |
1640 | | |
1641 | | /* N.B. The use of GUEST_PAGING_LEVELS here is correct. If the shadow has |
1642 | | * more levels than the guest, the upper levels are always fixed and do not |
1643 | | * reflect any information from the guest, so we do not use these functions |
1644 | | * to access them. */ |
1645 | | |
1646 | | #if GUEST_PAGING_LEVELS >= 4 |
1647 | | static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, |
1648 | | walk_t *gw, |
1649 | | mfn_t *sl4mfn) |
1650 | 0 | { |
1651 | 0 | /* There is always a shadow of the top level table. Get it. */ |
1652 | 0 | *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]); |
1653 | 0 | /* Reading the top level table is always valid. */ |
1654 | 0 | return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va); |
1655 | 0 | } |
1656 | | |
1657 | | static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, |
1658 | | walk_t *gw, |
1659 | | mfn_t *sl3mfn, |
1660 | | fetch_type_t ft, |
1661 | | int *resync) |
1662 | 0 | { |
1663 | 0 | struct domain *d = v->domain; |
1664 | 0 | mfn_t sl4mfn; |
1665 | 0 | shadow_l4e_t *sl4e; |
1666 | 0 | if ( !mfn_valid(gw->l3mfn) ) return NULL; /* No guest page. */ |
1667 | 0 | /* Get the l4e */ |
1668 | 0 | sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn); |
1669 | 0 | ASSERT(sl4e != NULL); |
1670 | 0 | if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) |
1671 | 0 | { |
1672 | 0 | *sl3mfn = shadow_l4e_get_mfn(*sl4e); |
1673 | 0 | ASSERT(mfn_valid(*sl3mfn)); |
1674 | 0 | } |
1675 | 0 | else |
1676 | 0 | { |
1677 | 0 | int r; |
1678 | 0 | shadow_l4e_t new_sl4e; |
1679 | 0 | /* No l3 shadow installed: find and install it. */ |
1680 | 0 | *sl3mfn = get_shadow_status(d, gw->l3mfn, SH_type_l3_shadow); |
1681 | 0 | if ( !mfn_valid(*sl3mfn) ) |
1682 | 0 | { |
1683 | 0 | /* No l3 shadow of this page exists at all: make one. */ |
1684 | 0 | *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow); |
1685 | 0 | } |
1686 | 0 | /* Install the new sl3 table in the sl4e */ |
1687 | 0 | l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft); |
1688 | 0 | r = shadow_set_l4e(d, sl4e, new_sl4e, sl4mfn); |
1689 | 0 | ASSERT((r & SHADOW_SET_FLUSH) == 0); |
1690 | 0 | if ( r & SHADOW_SET_ERROR ) |
1691 | 0 | return NULL; |
1692 | 0 |
|
1693 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) |
1694 | 0 | *resync |= 1; |
1695 | 0 | #endif |
1696 | 0 |
|
1697 | 0 | } |
1698 | 0 | /* Now follow it down a level. Guaranteed to succeed. */ |
1699 | 0 | return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va); |
1700 | 0 | } |
1701 | | #endif /* GUEST_PAGING_LEVELS >= 4 */ |
1702 | | |
1703 | | |
1704 | | static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, |
1705 | | walk_t *gw, |
1706 | | mfn_t *sl2mfn, |
1707 | | fetch_type_t ft, |
1708 | | int *resync) |
1709 | 0 | { |
1710 | 0 | #if GUEST_PAGING_LEVELS >= 4 /* 64bit... */ |
1711 | | struct domain *d = v->domain; |
1712 | | mfn_t sl3mfn = INVALID_MFN; |
1713 | | shadow_l3e_t *sl3e; |
1714 | | if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */ |
1715 | | /* Get the l3e */ |
1716 | | sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft, resync); |
1717 | | if ( sl3e == NULL ) return NULL; |
1718 | | if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) |
1719 | | { |
1720 | | *sl2mfn = shadow_l3e_get_mfn(*sl3e); |
1721 | | ASSERT(mfn_valid(*sl2mfn)); |
1722 | | } |
1723 | | else |
1724 | | { |
1725 | | int r; |
1726 | | shadow_l3e_t new_sl3e; |
1727 | | unsigned int t = SH_type_l2_shadow; |
1728 | | |
1729 | | /* Tag compat L2 containing hypervisor (m2p) mappings */ |
1730 | | if ( is_pv_32bit_vcpu(v) && |
1731 | | guest_l4_table_offset(gw->va) == 0 && |
1732 | | guest_l3_table_offset(gw->va) == 3 ) |
1733 | | t = SH_type_l2h_shadow; |
1734 | | |
1735 | | /* No l2 shadow installed: find and install it. */ |
1736 | | *sl2mfn = get_shadow_status(d, gw->l2mfn, t); |
1737 | | if ( !mfn_valid(*sl2mfn) ) |
1738 | | { |
1739 | | /* No l2 shadow of this page exists at all: make one. */ |
1740 | | *sl2mfn = sh_make_shadow(v, gw->l2mfn, t); |
1741 | | } |
1742 | | /* Install the new sl2 table in the sl3e */ |
1743 | | l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft); |
1744 | | r = shadow_set_l3e(d, sl3e, new_sl3e, sl3mfn); |
1745 | | ASSERT((r & SHADOW_SET_FLUSH) == 0); |
1746 | | if ( r & SHADOW_SET_ERROR ) |
1747 | | return NULL; |
1748 | | |
1749 | | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) |
1750 | | *resync |= 1; |
1751 | | #endif |
1752 | | |
1753 | | } |
1754 | | /* Now follow it down a level. Guaranteed to succeed. */ |
1755 | | return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); |
1756 | | #elif GUEST_PAGING_LEVELS == 3 /* PAE... */ |
1757 | | /* We never demand-shadow PAE l3es: they are only created in |
1758 | | * sh_update_cr3(). Check if the relevant sl3e is present. */ |
1759 | | shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table) |
1760 | | + shadow_l3_linear_offset(gw->va); |
1761 | | if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) |
1762 | | return NULL; |
1763 | | *sl2mfn = shadow_l3e_get_mfn(*sl3e); |
1764 | | ASSERT(mfn_valid(*sl2mfn)); |
1765 | | return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); |
1766 | | #else /* 32bit... */ |
1767 | 0 | /* There is always a shadow of the top level table. Get it. */ |
1768 | 0 | *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]); |
1769 | 0 | /* This next line is important: the guest l2 has a 16k |
1770 | 0 | * shadow, we need to return the right mfn of the four. This |
1771 | 0 | * call will set it for us as a side-effect. */ |
1772 | 0 | (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va)); |
1773 | 0 | /* Reading the top level table is always valid. */ |
1774 | 0 | return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); |
1775 | 0 | #endif |
1776 | 0 | } |
1777 | | |
1778 | | |
1779 | | static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v, |
1780 | | walk_t *gw, |
1781 | | mfn_t *sl1mfn, |
1782 | | fetch_type_t ft) |
1783 | 0 | { |
1784 | 0 | struct domain *d = v->domain; |
1785 | 0 | mfn_t sl2mfn; |
1786 | 0 | int resync = 0; |
1787 | 0 | shadow_l2e_t *sl2e; |
1788 | 0 |
|
1789 | 0 | /* Get the l2e */ |
1790 | 0 | sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft, &resync); |
1791 | 0 | if ( sl2e == NULL ) return NULL; |
1792 | 0 |
|
1793 | 0 | /* Install the sl1 in the l2e if it wasn't there or if we need to |
1794 | 0 | * re-do it to fix a PSE dirty bit. */ |
1795 | 0 | if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT |
1796 | 0 | && likely(ft != ft_demand_write |
1797 | 0 | || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW) |
1798 | 0 | || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) ) |
1799 | 0 | { |
1800 | 0 | *sl1mfn = shadow_l2e_get_mfn(*sl2e); |
1801 | 0 | ASSERT(mfn_valid(*sl1mfn)); |
1802 | 0 | } |
1803 | 0 | else |
1804 | 0 | { |
1805 | 0 | shadow_l2e_t new_sl2e; |
1806 | 0 | int r, flags = guest_l2e_get_flags(gw->l2e); |
1807 | 0 | /* No l1 shadow installed: find and install it. */ |
1808 | 0 | if ( !(flags & _PAGE_PRESENT) ) |
1809 | 0 | return NULL; /* No guest page. */ |
1810 | 0 | if ( guest_can_use_l2_superpages(v) && (flags & _PAGE_PSE) ) |
1811 | 0 | { |
1812 | 0 | /* Splintering a superpage */ |
1813 | 0 | gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e); |
1814 | 0 | *sl1mfn = get_fl1_shadow_status(d, l2gfn); |
1815 | 0 | if ( !mfn_valid(*sl1mfn) ) |
1816 | 0 | { |
1817 | 0 | /* No fl1 shadow of this superpage exists at all: make one. */ |
1818 | 0 | *sl1mfn = make_fl1_shadow(d, l2gfn); |
1819 | 0 | } |
1820 | 0 | } |
1821 | 0 | else |
1822 | 0 | { |
1823 | 0 | /* Shadowing an actual guest l1 table */ |
1824 | 0 | if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */ |
1825 | 0 | *sl1mfn = get_shadow_status(d, gw->l1mfn, SH_type_l1_shadow); |
1826 | 0 | if ( !mfn_valid(*sl1mfn) ) |
1827 | 0 | { |
1828 | 0 | /* No l1 shadow of this page exists at all: make one. */ |
1829 | 0 | *sl1mfn = sh_make_shadow(v, gw->l1mfn, SH_type_l1_shadow); |
1830 | 0 | } |
1831 | 0 | } |
1832 | 0 | /* Install the new sl1 table in the sl2e */ |
1833 | 0 | l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft); |
1834 | 0 | r = shadow_set_l2e(d, sl2e, new_sl2e, sl2mfn); |
1835 | 0 | ASSERT((r & SHADOW_SET_FLUSH) == 0); |
1836 | 0 | if ( r & SHADOW_SET_ERROR ) |
1837 | 0 | return NULL; |
1838 | 0 |
|
1839 | 0 | /* This next line is important: in 32-on-PAE and 32-on-64 modes, |
1840 | 0 | * the guest l1 table has an 8k shadow, and we need to return |
1841 | 0 | * the right mfn of the pair. This call will set it for us as a |
1842 | 0 | * side-effect. (In all other cases, it's a no-op and will be |
1843 | 0 | * compiled out.) */ |
1844 | 0 | (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va)); |
1845 | 0 | } |
1846 | 0 |
|
1847 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) |
1848 | 0 | /* All pages walked are now pagetables. Safe to resync pages |
1849 | 0 | in case level 4 or 3 shadows were set. */ |
1850 | 0 | if ( resync ) |
1851 | 0 | shadow_resync_all(v); |
1852 | 0 | #endif |
1853 | 0 |
|
1854 | 0 | /* Now follow it down a level. Guaranteed to succeed. */ |
1855 | 0 | return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va); |
1856 | 0 | } |
1857 | | |
1858 | | |
1859 | | |
1860 | | /**************************************************************************/ |
1861 | | /* Destructors for shadow tables: |
1862 | | * Unregister the shadow, decrement refcounts of any entries present in it, |
1863 | | * and release the memory. |
1864 | | * |
1865 | | * N.B. These destructors do not clear the contents of the shadows. |
1866 | | * This allows us to delay TLB shootdowns until the page is being reused. |
1867 | | * See shadow_alloc() and shadow_free() for how this is handled. |
1868 | | */ |
1869 | | |
1870 | | #if GUEST_PAGING_LEVELS >= 4 |
1871 | | void sh_destroy_l4_shadow(struct domain *d, mfn_t smfn) |
1872 | 0 | { |
1873 | 0 | shadow_l4e_t *sl4e; |
1874 | 0 | struct page_info *sp = mfn_to_page(smfn); |
1875 | 0 | u32 t = sp->u.sh.type; |
1876 | 0 | mfn_t gmfn, sl4mfn; |
1877 | 0 |
|
1878 | 0 | SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn)); |
1879 | 0 | ASSERT(t == SH_type_l4_shadow); |
1880 | 0 | ASSERT(sp->u.sh.head); |
1881 | 0 |
|
1882 | 0 | /* Record that the guest page isn't shadowed any more (in this type) */ |
1883 | 0 | gmfn = backpointer(sp); |
1884 | 0 | delete_shadow_status(d, gmfn, t, smfn); |
1885 | 0 | shadow_demote(d, gmfn, t); |
1886 | 0 | /* Decrement refcounts of all the old entries */ |
1887 | 0 | sl4mfn = smfn; |
1888 | 0 | SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, d, { |
1889 | 0 | if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) |
1890 | 0 | { |
1891 | 0 | sh_put_ref(d, shadow_l4e_get_mfn(*sl4e), |
1892 | 0 | (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) |
1893 | 0 | | ((unsigned long)sl4e & ~PAGE_MASK)); |
1894 | 0 | } |
1895 | 0 | }); |
1896 | 0 |
|
1897 | 0 | /* Put the memory back in the pool */ |
1898 | 0 | shadow_free(d, smfn); |
1899 | 0 | } |
1900 | | |
1901 | | void sh_destroy_l3_shadow(struct domain *d, mfn_t smfn) |
1902 | 0 | { |
1903 | 0 | shadow_l3e_t *sl3e; |
1904 | 0 | struct page_info *sp = mfn_to_page(smfn); |
1905 | 0 | u32 t = sp->u.sh.type; |
1906 | 0 | mfn_t gmfn, sl3mfn; |
1907 | 0 |
|
1908 | 0 | SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn)); |
1909 | 0 | ASSERT(t == SH_type_l3_shadow); |
1910 | 0 | ASSERT(sp->u.sh.head); |
1911 | 0 |
|
1912 | 0 | /* Record that the guest page isn't shadowed any more (in this type) */ |
1913 | 0 | gmfn = backpointer(sp); |
1914 | 0 | delete_shadow_status(d, gmfn, t, smfn); |
1915 | 0 | shadow_demote(d, gmfn, t); |
1916 | 0 |
|
1917 | 0 | /* Decrement refcounts of all the old entries */ |
1918 | 0 | sl3mfn = smfn; |
1919 | 0 | SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, { |
1920 | 0 | if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) |
1921 | 0 | sh_put_ref(d, shadow_l3e_get_mfn(*sl3e), |
1922 | 0 | (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) |
1923 | 0 | | ((unsigned long)sl3e & ~PAGE_MASK)); |
1924 | 0 | }); |
1925 | 0 |
|
1926 | 0 | /* Put the memory back in the pool */ |
1927 | 0 | shadow_free(d, smfn); |
1928 | 0 | } |
1929 | | #endif /* GUEST_PAGING_LEVELS >= 4 */ |
1930 | | |
1931 | | |
1932 | | void sh_destroy_l2_shadow(struct domain *d, mfn_t smfn) |
1933 | 0 | { |
1934 | 0 | shadow_l2e_t *sl2e; |
1935 | 0 | struct page_info *sp = mfn_to_page(smfn); |
1936 | 0 | u32 t = sp->u.sh.type; |
1937 | 0 | mfn_t gmfn, sl2mfn; |
1938 | 0 |
|
1939 | 0 | SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn)); |
1940 | 0 |
|
1941 | 0 | #if GUEST_PAGING_LEVELS >= 3 |
1942 | 0 | ASSERT(t == SH_type_l2_shadow || t == SH_type_l2h_shadow); |
1943 | | #else |
1944 | 0 | ASSERT(t == SH_type_l2_shadow); |
1945 | | #endif |
1946 | 0 | ASSERT(sp->u.sh.head); |
1947 | 0 |
|
1948 | 0 | /* Record that the guest page isn't shadowed any more (in this type) */ |
1949 | 0 | gmfn = backpointer(sp); |
1950 | 0 | delete_shadow_status(d, gmfn, t, smfn); |
1951 | 0 | shadow_demote(d, gmfn, t); |
1952 | 0 |
|
1953 | 0 | /* Decrement refcounts of all the old entries */ |
1954 | 0 | sl2mfn = smfn; |
1955 | 0 | SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, d, { |
1956 | 0 | if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) |
1957 | 0 | sh_put_ref(d, shadow_l2e_get_mfn(*sl2e), |
1958 | 0 | (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT) |
1959 | 0 | | ((unsigned long)sl2e & ~PAGE_MASK)); |
1960 | 0 | }); |
1961 | 0 |
|
1962 | 0 | /* Put the memory back in the pool */ |
1963 | 0 | shadow_free(d, smfn); |
1964 | 0 | } Unexecuted instantiation: sh_destroy_l2_shadow__guest_2 Unexecuted instantiation: sh_destroy_l2_shadow__guest_3 Unexecuted instantiation: sh_destroy_l2_shadow__guest_4 |
1965 | | |
1966 | | void sh_destroy_l1_shadow(struct domain *d, mfn_t smfn) |
1967 | 0 | { |
1968 | 0 | shadow_l1e_t *sl1e; |
1969 | 0 | struct page_info *sp = mfn_to_page(smfn); |
1970 | 0 | u32 t = sp->u.sh.type; |
1971 | 0 |
|
1972 | 0 | SHADOW_DEBUG(DESTROY_SHADOW, "%"PRI_mfn"\n", mfn_x(smfn)); |
1973 | 0 | ASSERT(t == SH_type_l1_shadow || t == SH_type_fl1_shadow); |
1974 | 0 | ASSERT(sp->u.sh.head); |
1975 | 0 |
|
1976 | 0 | /* Record that the guest page isn't shadowed any more (in this type) */ |
1977 | 0 | if ( t == SH_type_fl1_shadow ) |
1978 | 0 | { |
1979 | 0 | gfn_t gfn = _gfn(sp->v.sh.back); |
1980 | 0 | delete_fl1_shadow_status(d, gfn, smfn); |
1981 | 0 | } |
1982 | 0 | else |
1983 | 0 | { |
1984 | 0 | mfn_t gmfn = backpointer(sp); |
1985 | 0 | delete_shadow_status(d, gmfn, t, smfn); |
1986 | 0 | shadow_demote(d, gmfn, t); |
1987 | 0 | } |
1988 | 0 |
|
1989 | 0 | if ( shadow_mode_refcounts(d) ) |
1990 | 0 | { |
1991 | 0 | /* Decrement refcounts of all the old entries */ |
1992 | 0 | mfn_t sl1mfn = smfn; |
1993 | 0 | SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, { |
1994 | 0 | if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT) |
1995 | 0 | && !sh_l1e_is_magic(*sl1e) ) { |
1996 | 0 | shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d); |
1997 | 0 | shadow_put_page_from_l1e(*sl1e, d); |
1998 | 0 | } |
1999 | 0 | }); |
2000 | 0 | } |
2001 | 0 |
|
2002 | 0 | /* Put the memory back in the pool */ |
2003 | 0 | shadow_free(d, smfn); |
2004 | 0 | } Unexecuted instantiation: sh_destroy_l1_shadow__guest_2 Unexecuted instantiation: sh_destroy_l1_shadow__guest_3 Unexecuted instantiation: sh_destroy_l1_shadow__guest_4 |
2005 | | |
2006 | | #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS |
2007 | | void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) |
2008 | 0 | { |
2009 | 0 | struct domain *d = v->domain; |
2010 | 0 | ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table); |
2011 | 0 |
|
2012 | 0 | #if SHADOW_PAGING_LEVELS != 4 |
2013 | | { |
2014 | | mfn_t m3mfn; |
2015 | | l4_pgentry_t *l4e = map_domain_page(mmfn); |
2016 | | l3_pgentry_t *l3e; |
2017 | 0 | int linear_slot = shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); |
2018 | | |
2019 | | /* Need to destroy the l3 and l2 monitor pages used |
2020 | | * for the linear map */ |
2021 | 0 | ASSERT(l4e_get_flags(l4e[linear_slot]) & _PAGE_PRESENT); |
2022 | 0 | m3mfn = l4e_get_mfn(l4e[linear_slot]); |
2023 | | l3e = map_domain_page(m3mfn); |
2024 | 0 | ASSERT(l3e_get_flags(l3e[0]) & _PAGE_PRESENT); |
2025 | 0 | shadow_free(d, l3e_get_mfn(l3e[0])); |
2026 | | unmap_domain_page(l3e); |
2027 | | shadow_free(d, m3mfn); |
2028 | | |
2029 | 0 | if ( is_pv_32bit_domain(d) ) |
2030 | 0 | { |
2031 | 0 | /* Need to destroy the l3 and l2 monitor pages that map the |
2032 | 0 | * Xen VAs at 3GB-4GB */ |
2033 | 0 | ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT); |
2034 | 0 | m3mfn = l4e_get_mfn(l4e[0]); |
2035 | 0 | l3e = map_domain_page(m3mfn); |
2036 | 0 | ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); |
2037 | 0 | shadow_free(d, l3e_get_mfn(l3e[3])); |
2038 | 0 | unmap_domain_page(l3e); |
2039 | 0 | shadow_free(d, m3mfn); |
2040 | 0 | } |
2041 | | unmap_domain_page(l4e); |
2042 | | } |
2043 | | #endif |
2044 | 0 |
|
2045 | 0 | /* Put the memory back in the pool */ |
2046 | 0 | shadow_free(d, mmfn); |
2047 | 0 | } Unexecuted instantiation: sh_destroy_monitor_table__sh_3 Unexecuted instantiation: sh_destroy_monitor_table__sh_4 |
2048 | | #endif |
2049 | | |
2050 | | /**************************************************************************/ |
2051 | | /* Functions to destroy non-Xen mappings in a pagetable hierarchy. |
2052 | | * These are called from common code when we are running out of shadow |
2053 | | * memory, and unpinning all the top-level shadows hasn't worked. |
2054 | | * |
2055 | | * With user_only == 1, we leave guest kernel-mode mappings in place too, |
2056 | | * unhooking only the user-mode mappings |
2057 | | * |
2058 | | * This implementation is pretty crude and slow, but we hope that it won't |
2059 | | * be called very often. */ |
2060 | | |
2061 | | #if GUEST_PAGING_LEVELS == 2 |
2062 | | |
2063 | | void sh_unhook_32b_mappings(struct domain *d, mfn_t sl2mfn, int user_only) |
2064 | 0 | { |
2065 | 0 | shadow_l2e_t *sl2e; |
2066 | 0 | SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, d, { |
2067 | 0 | if ( !user_only || (sl2e->l2 & _PAGE_USER) ) |
2068 | 0 | (void) shadow_set_l2e(d, sl2e, shadow_l2e_empty(), sl2mfn); |
2069 | 0 | }); |
2070 | 0 | } |
2071 | | |
2072 | | #elif GUEST_PAGING_LEVELS == 3 |
2073 | | |
2074 | | void sh_unhook_pae_mappings(struct domain *d, mfn_t sl2mfn, int user_only) |
2075 | | /* Walk a PAE l2 shadow, unhooking entries from all the subshadows */ |
2076 | 0 | { |
2077 | 0 | shadow_l2e_t *sl2e; |
2078 | 0 | SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, d, { |
2079 | 0 | if ( !user_only || (sl2e->l2 & _PAGE_USER) ) |
2080 | 0 | (void) shadow_set_l2e(d, sl2e, shadow_l2e_empty(), sl2mfn); |
2081 | 0 | }); |
2082 | 0 | } |
2083 | | |
2084 | | #elif GUEST_PAGING_LEVELS == 4 |
2085 | | |
2086 | | void sh_unhook_64b_mappings(struct domain *d, mfn_t sl4mfn, int user_only) |
2087 | 0 | { |
2088 | 0 | shadow_l4e_t *sl4e; |
2089 | 0 | SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, d, { |
2090 | 0 | if ( !user_only || (sl4e->l4 & _PAGE_USER) ) |
2091 | 0 | (void) shadow_set_l4e(d, sl4e, shadow_l4e_empty(), sl4mfn); |
2092 | 0 | }); |
2093 | 0 | } |
2094 | | |
2095 | | #endif |
2096 | | |
2097 | | /**************************************************************************/ |
2098 | | /* Internal translation functions. |
2099 | | * These functions require a pointer to the shadow entry that will be updated. |
2100 | | */ |
2101 | | |
2102 | | /* These functions take a new guest entry, translate it to shadow and write |
2103 | | * the shadow entry. |
2104 | | * |
2105 | | * They return the same bitmaps as the shadow_set_lXe() functions. |
2106 | | */ |
2107 | | |
2108 | | #if GUEST_PAGING_LEVELS >= 4 |
2109 | | static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se) |
2110 | 0 | { |
2111 | 0 | shadow_l4e_t new_sl4e; |
2112 | 0 | guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge; |
2113 | 0 | shadow_l4e_t *sl4p = se; |
2114 | 0 | mfn_t sl3mfn = INVALID_MFN; |
2115 | 0 | struct domain *d = v->domain; |
2116 | 0 | p2m_type_t p2mt; |
2117 | 0 | int result = 0; |
2118 | 0 |
|
2119 | 0 | perfc_incr(shadow_validate_gl4e_calls); |
2120 | 0 |
|
2121 | 0 | if ( (guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT) && |
2122 | 0 | !guest_l4e_rsvd_bits(v, new_gl4e) ) |
2123 | 0 | { |
2124 | 0 | gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e); |
2125 | 0 | mfn_t gl3mfn = get_gfn_query_unlocked(d, gfn_x(gl3gfn), &p2mt); |
2126 | 0 | if ( p2m_is_ram(p2mt) ) |
2127 | 0 | sl3mfn = get_shadow_status(d, gl3mfn, SH_type_l3_shadow); |
2128 | 0 | else if ( p2mt != p2m_populate_on_demand ) |
2129 | 0 | result |= SHADOW_SET_ERROR; |
2130 | 0 |
|
2131 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) |
2132 | 0 | if ( mfn_valid(sl3mfn) ) |
2133 | 0 | shadow_resync_all(v); |
2134 | 0 | #endif |
2135 | 0 | } |
2136 | 0 | l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch); |
2137 | 0 |
|
2138 | 0 | // check for updates to xen reserved slots |
2139 | 0 | if ( !shadow_mode_external(d) ) |
2140 | 0 | { |
2141 | 0 | int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) / |
2142 | 0 | sizeof(shadow_l4e_t)); |
2143 | 0 | int reserved_xen_slot = !is_guest_l4_slot(d, shadow_index); |
2144 | 0 |
|
2145 | 0 | if ( unlikely(reserved_xen_slot) ) |
2146 | 0 | { |
2147 | 0 | // attempt by the guest to write to a xen reserved slot |
2148 | 0 | // |
2149 | 0 | SHADOW_PRINTK("out-of-range update " |
2150 | 0 | "sl4mfn=%"PRI_mfn" index=%#x val=%" SH_PRI_pte "\n", |
2151 | 0 | mfn_x(sl4mfn), shadow_index, new_sl4e.l4); |
2152 | 0 | if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) |
2153 | 0 | { |
2154 | 0 | SHADOW_ERROR("out-of-range l4e update\n"); |
2155 | 0 | result |= SHADOW_SET_ERROR; |
2156 | 0 | } |
2157 | 0 |
|
2158 | 0 | // do not call shadow_set_l4e... |
2159 | 0 | return result; |
2160 | 0 | } |
2161 | 0 | } |
2162 | 0 |
|
2163 | 0 | result |= shadow_set_l4e(d, sl4p, new_sl4e, sl4mfn); |
2164 | 0 | return result; |
2165 | 0 | } |
2166 | | |
2167 | | |
2168 | | static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se) |
2169 | 0 | { |
2170 | 0 | struct domain *d = v->domain; |
2171 | 0 | shadow_l3e_t new_sl3e; |
2172 | 0 | guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge; |
2173 | 0 | shadow_l3e_t *sl3p = se; |
2174 | 0 | mfn_t sl2mfn = INVALID_MFN; |
2175 | 0 | p2m_type_t p2mt; |
2176 | 0 | int result = 0; |
2177 | 0 |
|
2178 | 0 | perfc_incr(shadow_validate_gl3e_calls); |
2179 | 0 |
|
2180 | 0 | if ( (guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT) && |
2181 | 0 | !guest_l3e_rsvd_bits(v, new_gl3e) ) |
2182 | 0 | { |
2183 | 0 | gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e); |
2184 | 0 | mfn_t gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt); |
2185 | 0 | if ( p2m_is_ram(p2mt) ) |
2186 | 0 | sl2mfn = get_shadow_status(d, gl2mfn, SH_type_l2_shadow); |
2187 | 0 | else if ( p2mt != p2m_populate_on_demand ) |
2188 | 0 | result |= SHADOW_SET_ERROR; |
2189 | 0 |
|
2190 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) |
2191 | 0 | if ( mfn_valid(sl2mfn) ) |
2192 | 0 | shadow_resync_all(v); |
2193 | 0 | #endif |
2194 | 0 | } |
2195 | 0 | l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch); |
2196 | 0 | result |= shadow_set_l3e(d, sl3p, new_sl3e, sl3mfn); |
2197 | 0 |
|
2198 | 0 | return result; |
2199 | 0 | } |
2200 | | #endif // GUEST_PAGING_LEVELS >= 4 |
2201 | | |
2202 | | static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se) |
2203 | 0 | { |
2204 | 0 | struct domain *d = v->domain; |
2205 | 0 | shadow_l2e_t new_sl2e; |
2206 | 0 | guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge; |
2207 | 0 | shadow_l2e_t *sl2p = se; |
2208 | 0 | mfn_t sl1mfn = INVALID_MFN; |
2209 | 0 | p2m_type_t p2mt; |
2210 | 0 | int result = 0; |
2211 | 0 |
|
2212 | 0 | perfc_incr(shadow_validate_gl2e_calls); |
2213 | 0 |
|
2214 | 0 | if ( (guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT) && |
2215 | 0 | !guest_l2e_rsvd_bits(v, new_gl2e) ) |
2216 | 0 | { |
2217 | 0 | gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e); |
2218 | 0 | if ( guest_can_use_l2_superpages(v) && |
2219 | 0 | (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) ) |
2220 | 0 | { |
2221 | 0 | // superpage -- need to look up the shadow L1 which holds the |
2222 | 0 | // splitters... |
2223 | 0 | sl1mfn = get_fl1_shadow_status(d, gl1gfn); |
2224 | 0 | #if 0 |
2225 | | // XXX - it's possible that we want to do some kind of prefetch |
2226 | | // for superpage fl1's here, but this is *not* on the demand path, |
2227 | | // so we'll hold off trying that for now... |
2228 | | // |
2229 | | if ( !mfn_valid(sl1mfn) ) |
2230 | | sl1mfn = make_fl1_shadow(d, gl1gfn); |
2231 | | #endif |
2232 | 0 | } |
2233 | 0 | else |
2234 | 0 | { |
2235 | 0 | mfn_t gl1mfn = get_gfn_query_unlocked(d, gfn_x(gl1gfn), &p2mt); |
2236 | 0 | if ( p2m_is_ram(p2mt) ) |
2237 | 0 | sl1mfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow); |
2238 | 0 | else if ( p2mt != p2m_populate_on_demand ) |
2239 | 0 | result |= SHADOW_SET_ERROR; |
2240 | 0 | } |
2241 | 0 | } |
2242 | 0 | l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch); |
2243 | 0 |
|
2244 | 0 | result |= shadow_set_l2e(d, sl2p, new_sl2e, sl2mfn); |
2245 | 0 |
|
2246 | 0 | return result; |
2247 | 0 | } |
2248 | | |
2249 | | static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se) |
2250 | 0 | { |
2251 | 0 | struct domain *d = v->domain; |
2252 | 0 | shadow_l1e_t new_sl1e; |
2253 | 0 | guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge; |
2254 | 0 | shadow_l1e_t *sl1p = se; |
2255 | 0 | gfn_t gfn; |
2256 | 0 | mfn_t gmfn = INVALID_MFN; |
2257 | 0 | p2m_type_t p2mt = p2m_invalid; |
2258 | 0 | int result = 0; |
2259 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2260 | 0 | mfn_t gl1mfn; |
2261 | 0 | #endif /* OOS */ |
2262 | 0 |
|
2263 | 0 | perfc_incr(shadow_validate_gl1e_calls); |
2264 | 0 |
|
2265 | 0 | if ( (guest_l1e_get_flags(new_gl1e) & _PAGE_PRESENT) && |
2266 | 0 | !guest_l1e_rsvd_bits(v, new_gl1e) ) |
2267 | 0 | { |
2268 | 0 | gfn = guest_l1e_get_gfn(new_gl1e); |
2269 | 0 | gmfn = get_gfn_query_unlocked(d, gfn_x(gfn), &p2mt); |
2270 | 0 | } |
2271 | 0 |
|
2272 | 0 | l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt); |
2273 | 0 | result |= shadow_set_l1e(d, sl1p, new_sl1e, p2mt, sl1mfn); |
2274 | 0 |
|
2275 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2276 | 0 | gl1mfn = backpointer(mfn_to_page(sl1mfn)); |
2277 | 0 | if ( mfn_valid(gl1mfn) |
2278 | 0 | && mfn_is_out_of_sync(gl1mfn) ) |
2279 | 0 | { |
2280 | 0 | /* Update the OOS snapshot. */ |
2281 | 0 | mfn_t snpmfn = oos_snapshot_lookup(d, gl1mfn); |
2282 | 0 | guest_l1e_t *snp; |
2283 | 0 |
|
2284 | 0 | ASSERT(mfn_valid(snpmfn)); |
2285 | 0 |
|
2286 | 0 | snp = map_domain_page(snpmfn); |
2287 | 0 | snp[guest_index(new_ge)] = new_gl1e; |
2288 | 0 | unmap_domain_page(snp); |
2289 | 0 | } |
2290 | 0 | #endif /* OOS */ |
2291 | 0 |
|
2292 | 0 | return result; |
2293 | 0 | } |
2294 | | |
2295 | | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2296 | | /**************************************************************************/ |
2297 | | /* Special validation function for re-syncing out-of-sync shadows. |
2298 | | * Walks the *shadow* page, and for every entry that it finds, |
2299 | | * revalidates the guest entry that corresponds to it. |
2300 | | * N.B. This function is called with the vcpu that unsynced the page, |
2301 | | * *not* the one that is causing it to be resynced. */ |
2302 | | void sh_resync_l1(struct vcpu *v, mfn_t gl1mfn, mfn_t snpmfn) |
2303 | 0 | { |
2304 | 0 | struct domain *d = v->domain; |
2305 | 0 | mfn_t sl1mfn; |
2306 | 0 | shadow_l1e_t *sl1p; |
2307 | 0 | guest_l1e_t *gl1p, *gp, *snp; |
2308 | 0 | int rc = 0; |
2309 | 0 |
|
2310 | 0 | ASSERT(mfn_valid(snpmfn)); |
2311 | 0 |
|
2312 | 0 | sl1mfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow); |
2313 | 0 | ASSERT(mfn_valid(sl1mfn)); /* Otherwise we would not have been called */ |
2314 | 0 |
|
2315 | 0 | snp = map_domain_page(snpmfn); |
2316 | 0 | gp = map_domain_page(gl1mfn); |
2317 | 0 | gl1p = gp; |
2318 | 0 |
|
2319 | 0 | SHADOW_FOREACH_L1E(sl1mfn, sl1p, &gl1p, 0, { |
2320 | 0 | guest_l1e_t gl1e = *gl1p; |
2321 | 0 | guest_l1e_t *snpl1p = (guest_l1e_t *)snp + guest_index(gl1p); |
2322 | 0 |
|
2323 | 0 | if ( memcmp(snpl1p, &gl1e, sizeof(gl1e)) ) |
2324 | 0 | { |
2325 | 0 | gfn_t gfn; |
2326 | 0 | mfn_t gmfn = INVALID_MFN; |
2327 | 0 | p2m_type_t p2mt = p2m_invalid; |
2328 | 0 | shadow_l1e_t nsl1e; |
2329 | 0 |
|
2330 | 0 | if ( (guest_l1e_get_flags(gl1e) & _PAGE_PRESENT) && |
2331 | 0 | !guest_l1e_rsvd_bits(v, gl1e) ) |
2332 | 0 | { |
2333 | 0 | gfn = guest_l1e_get_gfn(gl1e); |
2334 | 0 | gmfn = get_gfn_query_unlocked(d, gfn_x(gfn), &p2mt); |
2335 | 0 | } |
2336 | 0 |
|
2337 | 0 | l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt); |
2338 | 0 | rc |= shadow_set_l1e(d, sl1p, nsl1e, p2mt, sl1mfn); |
2339 | 0 | *snpl1p = gl1e; |
2340 | 0 | } |
2341 | 0 | }); |
2342 | 0 |
|
2343 | 0 | unmap_domain_page(gp); |
2344 | 0 | unmap_domain_page(snp); |
2345 | 0 |
|
2346 | 0 | /* Setting shadow L1 entries should never need us to flush the TLB */ |
2347 | 0 | ASSERT(!(rc & SHADOW_SET_FLUSH)); |
2348 | 0 | } Unexecuted instantiation: sh_resync_l1__guest_2 Unexecuted instantiation: sh_resync_l1__guest_4 Unexecuted instantiation: sh_resync_l1__guest_3 |
2349 | | |
2350 | | /* Figure out whether it's definitely safe not to sync this l1 table. |
2351 | | * That is: if we can tell that it's only used once, and that the |
2352 | | * toplevel shadow responsible is not one of ours. |
2353 | | * N.B. This function is called with the vcpu that required the resync, |
2354 | | * *not* the one that originally unsynced the page, but it is |
2355 | | * called in the *mode* of the vcpu that unsynced it. Clear? Good. */ |
2356 | | int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn) |
2357 | 0 | { |
2358 | 0 | struct domain *d = v->domain; |
2359 | 0 | struct page_info *sp; |
2360 | 0 | mfn_t smfn; |
2361 | 0 |
|
2362 | 0 | if ( !sh_type_has_up_pointer(d, SH_type_l1_shadow) ) |
2363 | 0 | return 0; |
2364 | 0 |
|
2365 | 0 | smfn = get_shadow_status(d, gl1mfn, SH_type_l1_shadow); |
2366 | 0 | ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */ |
2367 | 0 |
|
2368 | 0 | /* Up to l2 */ |
2369 | 0 | sp = mfn_to_page(smfn); |
2370 | 0 | if ( sp->u.sh.count != 1 || !sp->up ) |
2371 | 0 | return 0; |
2372 | 0 | smfn = maddr_to_mfn(sp->up); |
2373 | 0 | ASSERT(mfn_valid(smfn)); |
2374 | 0 |
|
2375 | 0 | #if (SHADOW_PAGING_LEVELS == 4) |
2376 | | /* up to l3 */ |
2377 | 0 | sp = mfn_to_page(smfn); |
2378 | 0 | ASSERT(sh_type_has_up_pointer(d, SH_type_l2_shadow)); |
2379 | 0 | if ( sp->u.sh.count != 1 || !sp->up ) |
2380 | 0 | return 0; |
2381 | 0 | smfn = maddr_to_mfn(sp->up); |
2382 | 0 | ASSERT(mfn_valid(smfn)); |
2383 | 0 |
|
2384 | 0 | /* up to l4 */ |
2385 | 0 | sp = mfn_to_page(smfn); |
2386 | 0 | if ( sp->u.sh.count != 1 |
2387 | 0 | || !sh_type_has_up_pointer(d, SH_type_l3_64_shadow) || !sp->up ) |
2388 | 0 | return 0; |
2389 | 0 | smfn = maddr_to_mfn(sp->up); |
2390 | 0 | ASSERT(mfn_valid(smfn)); |
2391 | 0 | #endif |
2392 | 0 |
|
2393 | 0 | if ( pagetable_get_pfn(v->arch.shadow_table[0]) == mfn_x(smfn) |
2394 | 0 | #if (SHADOW_PAGING_LEVELS == 3) |
2395 | 0 | || pagetable_get_pfn(v->arch.shadow_table[1]) == mfn_x(smfn) |
2396 | 0 | || pagetable_get_pfn(v->arch.shadow_table[2]) == mfn_x(smfn) |
2397 | 0 | || pagetable_get_pfn(v->arch.shadow_table[3]) == mfn_x(smfn) |
2398 | | #endif |
2399 | 0 | ) |
2400 | 0 | return 0; |
2401 | 0 |
|
2402 | 0 | /* Only in use in one toplevel shadow, and it's not the one we're |
2403 | 0 | * running on */ |
2404 | 0 | return 1; |
2405 | 0 | } Unexecuted instantiation: sh_safe_not_to_sync__guest_2 Unexecuted instantiation: sh_safe_not_to_sync__guest_4 Unexecuted instantiation: sh_safe_not_to_sync__guest_3 |
2406 | 0 | #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ |
2407 | 0 |
|
2408 | 0 |
|
2409 | 0 | /**************************************************************************/ |
2410 | 0 | /* Functions which translate and install the shadows of arbitrary guest |
2411 | 0 | * entries that we have just seen the guest write. */ |
2412 | 0 |
|
2413 | 0 |
|
2414 | 0 | static inline int |
2415 | 0 | sh_map_and_validate(struct vcpu *v, mfn_t gmfn, |
2416 | 0 | void *new_gp, u32 size, u32 sh_type, |
2417 | 0 | u32 (*shadow_index)(mfn_t *smfn, u32 idx), |
2418 | 0 | int (*validate_ge)(struct vcpu *v, void *ge, |
2419 | 0 | mfn_t smfn, void *se)) |
2420 | 0 | /* Generic function for mapping and validating. */ |
2421 | 0 | { |
2422 | 0 | struct domain *d = v->domain; |
2423 | 0 | mfn_t smfn, smfn2, map_mfn; |
2424 | 0 | shadow_l1e_t *sl1p; |
2425 | 0 | u32 shadow_idx, guest_idx; |
2426 | 0 | int result = 0; |
2427 | 0 |
|
2428 | 0 | /* Align address and size to guest entry boundaries */ |
2429 | 0 | size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1); |
2430 | 0 | new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1)); |
2431 | 0 | size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1); |
2432 | 0 | ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE); |
2433 | 0 |
|
2434 | 0 | /* Map the shadow page */ |
2435 | 0 | smfn = get_shadow_status(d, gmfn, sh_type); |
2436 | 0 | ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */ |
2437 | 0 | guest_idx = guest_index(new_gp); |
2438 | 0 | map_mfn = smfn; |
2439 | 0 | shadow_idx = shadow_index(&map_mfn, guest_idx); |
2440 | 0 | sl1p = map_domain_page(map_mfn); |
2441 | 0 |
|
2442 | 0 | /* Validate one entry at a time */ |
2443 | 0 | while ( size ) |
2444 | 0 | { |
2445 | 0 | smfn2 = smfn; |
2446 | 0 | guest_idx = guest_index(new_gp); |
2447 | 0 | shadow_idx = shadow_index(&smfn2, guest_idx); |
2448 | 0 | if ( mfn_x(smfn2) != mfn_x(map_mfn) ) |
2449 | 0 | { |
2450 | 0 | /* We have moved to another page of the shadow */ |
2451 | 0 | map_mfn = smfn2; |
2452 | 0 | unmap_domain_page(sl1p); |
2453 | 0 | sl1p = map_domain_page(map_mfn); |
2454 | 0 | } |
2455 | 0 | result |= validate_ge(v, |
2456 | 0 | new_gp, |
2457 | 0 | map_mfn, |
2458 | 0 | &sl1p[shadow_idx]); |
2459 | 0 | size -= sizeof(guest_l1e_t); |
2460 | 0 | new_gp += sizeof(guest_l1e_t); |
2461 | 0 | } |
2462 | 0 | unmap_domain_page(sl1p); |
2463 | 0 | return result; |
2464 | 0 | } |
2465 | 0 |
|
2466 | 0 |
|
2467 | 0 | int |
2468 | 0 | sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn, |
2469 | 0 | void *new_gl4p, u32 size) |
2470 | 0 | { |
2471 | 0 | #if GUEST_PAGING_LEVELS >= 4 |
2472 | 0 | return sh_map_and_validate(v, gl4mfn, new_gl4p, size, |
2473 | 0 | SH_type_l4_shadow, |
2474 | 0 | shadow_l4_index, |
2475 | 0 | validate_gl4e); |
2476 | 0 | #else // ! GUEST_PAGING_LEVELS >= 4 |
2477 | 0 | SHADOW_ERROR("called in wrong paging mode!\n"); |
2478 | 0 | BUG(); |
2479 | 0 | return 0; |
2480 | 0 | #endif |
2481 | 0 | } Unexecuted instantiation: sh_map_and_validate_gl4e__guest_3 Unexecuted instantiation: sh_map_and_validate_gl4e__guest_2 Unexecuted instantiation: sh_map_and_validate_gl4e__guest_4 |
2482 | 0 |
|
2483 | 0 | int |
2484 | 0 | sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn, |
2485 | 0 | void *new_gl3p, u32 size) |
2486 | 0 | { |
2487 | 0 | #if GUEST_PAGING_LEVELS >= 4 |
2488 | 0 | return sh_map_and_validate(v, gl3mfn, new_gl3p, size, |
2489 | 0 | SH_type_l3_shadow, |
2490 | 0 | shadow_l3_index, |
2491 | 0 | validate_gl3e); |
2492 | 0 | #else // ! GUEST_PAGING_LEVELS >= 4 |
2493 | 0 | SHADOW_ERROR("called in wrong paging mode!\n"); |
2494 | 0 | BUG(); |
2495 | 0 | return 0; |
2496 | 0 | #endif |
2497 | 0 | } Unexecuted instantiation: sh_map_and_validate_gl3e__guest_3 Unexecuted instantiation: sh_map_and_validate_gl3e__guest_4 Unexecuted instantiation: sh_map_and_validate_gl3e__guest_2 |
2498 | 0 |
|
2499 | 0 | int |
2500 | 0 | sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn, |
2501 | 0 | void *new_gl2p, u32 size) |
2502 | 0 | { |
2503 | 0 | return sh_map_and_validate(v, gl2mfn, new_gl2p, size, |
2504 | 0 | SH_type_l2_shadow, |
2505 | 0 | shadow_l2_index, |
2506 | 0 | validate_gl2e); |
2507 | 0 | } Unexecuted instantiation: sh_map_and_validate_gl2e__guest_4 Unexecuted instantiation: sh_map_and_validate_gl2e__guest_3 Unexecuted instantiation: sh_map_and_validate_gl2e__guest_2 |
2508 | 0 |
|
2509 | 0 | int |
2510 | 0 | sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn, |
2511 | 0 | void *new_gl2p, u32 size) |
2512 | 0 | { |
2513 | 0 | #if GUEST_PAGING_LEVELS >= 3 |
2514 | 0 | return sh_map_and_validate(v, gl2mfn, new_gl2p, size, |
2515 | 0 | SH_type_l2h_shadow, |
2516 | 0 | shadow_l2_index, |
2517 | 0 | validate_gl2e); |
2518 | 0 | #else /* Non-PAE guests don't have different kinds of l2 table */ |
2519 | 0 | SHADOW_ERROR("called in wrong paging mode!\n"); |
2520 | 0 | BUG(); |
2521 | 0 | return 0; |
2522 | 0 | #endif |
2523 | 0 | } Unexecuted instantiation: sh_map_and_validate_gl2he__guest_4 Unexecuted instantiation: sh_map_and_validate_gl2he__guest_3 Unexecuted instantiation: sh_map_and_validate_gl2he__guest_2 |
2524 | 0 |
|
2525 | 0 | int |
2526 | 0 | sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn, |
2527 | 0 | void *new_gl1p, u32 size) |
2528 | 0 | { |
2529 | 0 | return sh_map_and_validate(v, gl1mfn, new_gl1p, size, |
2530 | 0 | SH_type_l1_shadow, |
2531 | 0 | shadow_l1_index, |
2532 | 0 | validate_gl1e); |
2533 | 0 | } Unexecuted instantiation: sh_map_and_validate_gl1e__guest_3 Unexecuted instantiation: sh_map_and_validate_gl1e__guest_4 Unexecuted instantiation: sh_map_and_validate_gl1e__guest_2 |
2534 | 0 |
|
2535 | 0 |
|
2536 | 0 | /**************************************************************************/ |
2537 | 0 | /* Optimization: If we see two emulated writes of zeros to the same |
2538 | 0 | * page-table without another kind of page fault in between, we guess |
2539 | 0 | * that this is a batch of changes (for process destruction) and |
2540 | 0 | * unshadow the page so we don't take a pagefault on every entry. This |
2541 | 0 | * should also make finding writeable mappings of pagetables much |
2542 | 0 | * easier. */ |
2543 | 0 |
|
2544 | 0 | /* Look to see if this is the second emulated write in a row to this |
2545 | 0 | * page, and unshadow if it is */ |
2546 | 0 | static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn) |
2547 | 0 | { |
2548 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW |
2549 | 0 | struct domain *d = v->domain; |
2550 | 0 | /* If the domain has never made a "dying" op, use the two-writes |
2551 | 0 | * heuristic; otherwise, unshadow as soon as we write a zero for a dying |
2552 | 0 | * process. |
2553 | 0 | * |
2554 | 0 | * Don't bother trying to unshadow if it's not a PT, or if it's > l1. |
2555 | 0 | */ |
2556 | 0 | if ( ( v->arch.paging.shadow.pagetable_dying |
2557 | 0 | || ( !d->arch.paging.shadow.pagetable_dying_op |
2558 | 0 | && v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) ) ) |
2559 | 0 | && sh_mfn_is_a_page_table(gmfn) |
2560 | 0 | && (!d->arch.paging.shadow.pagetable_dying_op || |
2561 | 0 | !(mfn_to_page(gmfn)->shadow_flags |
2562 | 0 | & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64))) ) |
2563 | 0 | { |
2564 | 0 | perfc_incr(shadow_early_unshadow); |
2565 | 0 | sh_remove_shadows(d, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); |
2566 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW); |
2567 | 0 | } |
2568 | 0 | v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn); |
2569 | 0 | #endif |
2570 | 0 | } |
2571 | 0 |
|
2572 | 0 | /* Stop counting towards early unshadows, as we've seen a real page fault */ |
2573 | 0 | static inline void reset_early_unshadow(struct vcpu *v) |
2574 | 0 | { |
2575 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW |
2576 | 0 | v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(INVALID_MFN); |
2577 | 0 | #endif |
2578 | 0 | } |
2579 | 0 |
|
2580 | 0 |
|
2581 | 0 |
|
2582 | 0 | /**************************************************************************/ |
2583 | 0 | /* Optimization: Prefetch multiple L1 entries. This is called after we have |
2584 | 0 | * demand-faulted a shadow l1e in the fault handler, to see if it's |
2585 | 0 | * worth fetching some more. |
2586 | 0 | */ |
2587 | 0 |
|
2588 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH |
2589 | 0 |
|
2590 | 0 | /* XXX magic number */ |
2591 | 0 | #define PREFETCH_DISTANCE 32 |
2592 | 0 |
|
2593 | 0 | static void sh_prefetch(struct vcpu *v, walk_t *gw, |
2594 | 0 | shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn) |
2595 | 0 | { |
2596 | 0 | struct domain *d = v->domain; |
2597 | 0 | int i, dist; |
2598 | 0 | gfn_t gfn; |
2599 | 0 | mfn_t gmfn; |
2600 | 0 | guest_l1e_t *gl1p = NULL, gl1e; |
2601 | 0 | shadow_l1e_t sl1e; |
2602 | 0 | u32 gflags; |
2603 | 0 | p2m_type_t p2mt; |
2604 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2605 | 0 | guest_l1e_t *snpl1p = NULL; |
2606 | 0 | #endif /* OOS */ |
2607 | 0 |
|
2608 | 0 |
|
2609 | 0 | /* Prefetch no further than the end of the _shadow_ l1 MFN */ |
2610 | 0 | dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e; |
2611 | 0 | /* And no more than a maximum fetches-per-fault */ |
2612 | 0 | if ( dist > PREFETCH_DISTANCE ) |
2613 | 0 | dist = PREFETCH_DISTANCE; |
2614 | 0 |
|
2615 | 0 | if ( mfn_valid(gw->l1mfn) ) |
2616 | 0 | { |
2617 | 0 | /* Normal guest page; grab the next guest entry */ |
2618 | 0 | gl1p = map_domain_page(gw->l1mfn); |
2619 | 0 | gl1p += guest_l1_table_offset(gw->va); |
2620 | 0 |
|
2621 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2622 | 0 | if ( mfn_is_out_of_sync(gw->l1mfn) ) |
2623 | 0 | { |
2624 | 0 | mfn_t snpmfn = oos_snapshot_lookup(d, gw->l1mfn); |
2625 | 0 |
|
2626 | 0 | ASSERT(mfn_valid(snpmfn)); |
2627 | 0 | snpl1p = map_domain_page(snpmfn); |
2628 | 0 | snpl1p += guest_l1_table_offset(gw->va); |
2629 | 0 | } |
2630 | 0 | #endif /* OOS */ |
2631 | 0 | } |
2632 | 0 |
|
2633 | 0 | for ( i = 1; i < dist ; i++ ) |
2634 | 0 | { |
2635 | 0 | /* No point in prefetching if there's already a shadow */ |
2636 | 0 | if ( ptr_sl1e[i].l1 != 0 ) |
2637 | 0 | break; |
2638 | 0 |
|
2639 | 0 | if ( mfn_valid(gw->l1mfn) ) |
2640 | 0 | { |
2641 | 0 | /* Normal guest page; grab the next guest entry */ |
2642 | 0 | gl1e = gl1p[i]; |
2643 | 0 | /* Not worth continuing if we hit an entry that will need another |
2644 | 0 | * fault for A/D-bit propagation anyway */ |
2645 | 0 | gflags = guest_l1e_get_flags(gl1e); |
2646 | 0 | if ( (gflags & _PAGE_PRESENT) |
2647 | 0 | && (!(gflags & _PAGE_ACCESSED) |
2648 | 0 | || ((gflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY))) ) |
2649 | 0 | break; |
2650 | 0 | } |
2651 | 0 | else |
2652 | 0 | { |
2653 | 0 | /* Fragmented superpage, unless we've been called wrongly */ |
2654 | 0 | ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE); |
2655 | 0 | /* Increment the l1e's GFN by the right number of guest pages */ |
2656 | 0 | gl1e = guest_l1e_from_gfn( |
2657 | 0 | _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i), |
2658 | 0 | guest_l1e_get_flags(gw->l1e)); |
2659 | 0 | } |
2660 | 0 |
|
2661 | 0 | /* Look at the gfn that the l1e is pointing at */ |
2662 | 0 | if ( (guest_l1e_get_flags(gl1e) & _PAGE_PRESENT) && |
2663 | 0 | !guest_l1e_rsvd_bits(v, gl1e) ) |
2664 | 0 | { |
2665 | 0 | gfn = guest_l1e_get_gfn(gl1e); |
2666 | 0 | gmfn = get_gfn_query_unlocked(d, gfn_x(gfn), &p2mt); |
2667 | 0 | } |
2668 | 0 | else |
2669 | 0 | { |
2670 | 0 | gmfn = INVALID_MFN; |
2671 | 0 | p2mt = p2m_invalid; |
2672 | 0 | } |
2673 | 0 |
|
2674 | 0 | /* Propagate the entry. */ |
2675 | 0 | l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt); |
2676 | 0 | (void) shadow_set_l1e(d, ptr_sl1e + i, sl1e, p2mt, sl1mfn); |
2677 | 0 |
|
2678 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2679 | 0 | if ( snpl1p != NULL ) |
2680 | 0 | snpl1p[i] = gl1e; |
2681 | 0 | #endif /* OOS */ |
2682 | 0 | } |
2683 | 0 | if ( gl1p != NULL ) |
2684 | 0 | unmap_domain_page(gl1p); |
2685 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2686 | 0 | if ( snpl1p != NULL ) |
2687 | 0 | unmap_domain_page(snpl1p); |
2688 | 0 | #endif /* OOS */ |
2689 | 0 | } |
2690 | 0 |
|
2691 | 0 | #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */ |
2692 | 0 |
|
2693 | 0 | #if GUEST_PAGING_LEVELS == 4 |
2694 | 0 | typedef u64 guest_va_t; |
2695 | 0 | typedef u64 guest_pa_t; |
2696 | 0 | #elif GUEST_PAGING_LEVELS == 3 |
2697 | 0 | typedef u32 guest_va_t; |
2698 | 0 | typedef u64 guest_pa_t; |
2699 | 0 | #else |
2700 | 0 | typedef u32 guest_va_t; |
2701 | 0 | typedef u32 guest_pa_t; |
2702 | 0 | #endif |
2703 | 0 |
|
2704 | 0 | static inline void trace_shadow_gen(u32 event, guest_va_t va) |
2705 | 0 | { |
2706 | 0 | if ( tb_init_done ) |
2707 | 0 | { |
2708 | 0 | event |= (GUEST_PAGING_LEVELS-2)<<8; |
2709 | 0 | __trace_var(event, 0/*!tsc*/, sizeof(va), &va); |
2710 | 0 | } |
2711 | 0 | } |
2712 | 0 |
|
2713 | 0 | static inline void trace_shadow_fixup(guest_l1e_t gl1e, |
2714 | 0 | guest_va_t va) |
2715 | 0 | { |
2716 | 0 | if ( tb_init_done ) |
2717 | 0 | { |
2718 | 0 | struct __packed { |
2719 | 0 | /* for PAE, guest_l1e may be 64 while guest_va may be 32; |
2720 | 0 | so put it first for alignment sake. */ |
2721 | 0 | guest_l1e_t gl1e; |
2722 | 0 | guest_va_t va; |
2723 | 0 | u32 flags; |
2724 | 0 | } d; |
2725 | 0 | u32 event; |
2726 | 0 |
|
2727 | 0 | event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8); |
2728 | 0 |
|
2729 | 0 | d.gl1e = gl1e; |
2730 | 0 | d.va = va; |
2731 | 0 | d.flags = this_cpu(trace_shadow_path_flags); |
2732 | 0 |
|
2733 | 0 | __trace_var(event, 0/*!tsc*/, sizeof(d), &d); |
2734 | 0 | } |
2735 | 0 | } |
2736 | 0 |
|
2737 | 0 | static inline void trace_not_shadow_fault(guest_l1e_t gl1e, |
2738 | 0 | guest_va_t va) |
2739 | 0 | { |
2740 | 0 | if ( tb_init_done ) |
2741 | 0 | { |
2742 | 0 | struct __packed { |
2743 | 0 | /* for PAE, guest_l1e may be 64 while guest_va may be 32; |
2744 | 0 | so put it first for alignment sake. */ |
2745 | 0 | guest_l1e_t gl1e; |
2746 | 0 | guest_va_t va; |
2747 | 0 | u32 flags; |
2748 | 0 | } d; |
2749 | 0 | u32 event; |
2750 | 0 |
|
2751 | 0 | event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8); |
2752 | 0 |
|
2753 | 0 | d.gl1e = gl1e; |
2754 | 0 | d.va = va; |
2755 | 0 | d.flags = this_cpu(trace_shadow_path_flags); |
2756 | 0 |
|
2757 | 0 | __trace_var(event, 0/*!tsc*/, sizeof(d), &d); |
2758 | 0 | } |
2759 | 0 | } |
2760 | 0 |
|
2761 | 0 | static inline void trace_shadow_emulate_other(u32 event, |
2762 | 0 | guest_va_t va, |
2763 | 0 | gfn_t gfn) |
2764 | 0 | { |
2765 | 0 | if ( tb_init_done ) |
2766 | 0 | { |
2767 | 0 | struct __packed { |
2768 | 0 | /* for PAE, guest_l1e may be 64 while guest_va may be 32; |
2769 | 0 | so put it first for alignment sake. */ |
2770 | 0 | #if GUEST_PAGING_LEVELS == 2 |
2771 | 0 | u32 gfn; |
2772 | 0 | #else |
2773 | 0 | u64 gfn; |
2774 | 0 | #endif |
2775 | 0 | guest_va_t va; |
2776 | 0 | } d; |
2777 | 0 |
|
2778 | 0 | event |= ((GUEST_PAGING_LEVELS-2)<<8); |
2779 | 0 |
|
2780 | 0 | d.gfn=gfn_x(gfn); |
2781 | 0 | d.va = va; |
2782 | 0 |
|
2783 | 0 | __trace_var(event, 0/*!tsc*/, sizeof(d), &d); |
2784 | 0 | } |
2785 | 0 | } |
2786 | 0 |
|
2787 | 0 | #if GUEST_PAGING_LEVELS == 3 |
2788 | 0 | static DEFINE_PER_CPU(guest_va_t,trace_emulate_initial_va); |
2789 | 0 | static DEFINE_PER_CPU(int,trace_extra_emulation_count); |
2790 | 0 | #endif |
2791 | 0 | static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val); |
2792 | 0 |
|
2793 | 0 | static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va) |
2794 | 0 | { |
2795 | 0 | if ( tb_init_done ) |
2796 | 0 | { |
2797 | 0 | struct __packed { |
2798 | 0 | /* for PAE, guest_l1e may be 64 while guest_va may be 32; |
2799 | 0 | so put it first for alignment sake. */ |
2800 | 0 | guest_l1e_t gl1e, write_val; |
2801 | 0 | guest_va_t va; |
2802 | 0 | unsigned flags:29, emulation_count:3; |
2803 | 0 | } d; |
2804 | 0 | u32 event; |
2805 | 0 |
|
2806 | 0 | event = TRC_SHADOW_EMULATE | ((GUEST_PAGING_LEVELS-2)<<8); |
2807 | 0 |
|
2808 | 0 | d.gl1e = gl1e; |
2809 | 0 | d.write_val.l1 = this_cpu(trace_emulate_write_val); |
2810 | 0 | d.va = va; |
2811 | 0 | #if GUEST_PAGING_LEVELS == 3 |
2812 | 0 | d.emulation_count = this_cpu(trace_extra_emulation_count); |
2813 | 0 | #endif |
2814 | 0 | d.flags = this_cpu(trace_shadow_path_flags); |
2815 | 0 |
|
2816 | 0 | __trace_var(event, 0/*!tsc*/, sizeof(d), &d); |
2817 | 0 | } |
2818 | 0 | } |
2819 | 0 |
|
2820 | 0 | /**************************************************************************/ |
2821 | 0 | /* Entry points into the shadow code */ |
2822 | 0 |
|
2823 | 0 | /* Called from pagefault handler in Xen, and from the HVM trap handlers |
2824 | 0 | * for pagefaults. Returns 1 if this fault was an artefact of the |
2825 | 0 | * shadow code (and the guest should retry) or 0 if it is not (and the |
2826 | 0 | * fault should be handled elsewhere or passed to the guest). */ |
2827 | 0 |
|
2828 | 0 | static int sh_page_fault(struct vcpu *v, |
2829 | 0 | unsigned long va, |
2830 | 0 | struct cpu_user_regs *regs) |
2831 | 0 | { |
2832 | 0 | struct domain *d = v->domain; |
2833 | 0 | walk_t gw; |
2834 | 0 | gfn_t gfn = _gfn(0); |
2835 | 0 | mfn_t gmfn, sl1mfn = _mfn(0); |
2836 | 0 | shadow_l1e_t sl1e, *ptr_sl1e; |
2837 | 0 | paddr_t gpa; |
2838 | 0 | struct sh_emulate_ctxt emul_ctxt; |
2839 | 0 | const struct x86_emulate_ops *emul_ops; |
2840 | 0 | int r; |
2841 | 0 | p2m_type_t p2mt; |
2842 | 0 | uint32_t rc, error_code; |
2843 | 0 | bool walk_ok; |
2844 | 0 | int version; |
2845 | 0 | const struct npfec access = { |
2846 | 0 | .read_access = 1, |
2847 | 0 | .write_access = !!(regs->error_code & PFEC_write_access), |
2848 | 0 | .gla_valid = 1, |
2849 | 0 | .kind = npfec_kind_with_gla |
2850 | 0 | }; |
2851 | 0 | const fetch_type_t ft = |
2852 | 0 | access.write_access ? ft_demand_write : ft_demand_read; |
2853 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
2854 | 0 | int fast_emul = 0; |
2855 | 0 | #endif |
2856 | 0 |
|
2857 | 0 | SHADOW_PRINTK("%pv va=%#lx err=%#x, rip=%lx\n", |
2858 | 0 | v, va, regs->error_code, regs->rip); |
2859 | 0 |
|
2860 | 0 | perfc_incr(shadow_fault); |
2861 | 0 |
|
2862 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
2863 | 0 | /* If faulting frame is successfully emulated in last shadow fault |
2864 | 0 | * it's highly likely to reach same emulation action for this frame. |
2865 | 0 | * Then try to emulate early to avoid lock aquisition. |
2866 | 0 | */ |
2867 | 0 | if ( v->arch.paging.last_write_emul_ok |
2868 | 0 | && v->arch.paging.shadow.last_emulated_frame == (va >> PAGE_SHIFT) ) |
2869 | 0 | { |
2870 | 0 | /* check whether error code is 3, or else fall back to normal path |
2871 | 0 | * in case of some validation is required |
2872 | 0 | */ |
2873 | 0 | if ( regs->error_code == (PFEC_write_access | PFEC_page_present) ) |
2874 | 0 | { |
2875 | 0 | fast_emul = 1; |
2876 | 0 | gmfn = _mfn(v->arch.paging.shadow.last_emulated_mfn); |
2877 | 0 |
|
2878 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2879 | 0 | /* Fall back to the slow path if we're trying to emulate |
2880 | 0 | writes to an out of sync page. */ |
2881 | 0 | if ( mfn_valid(gmfn) && mfn_is_out_of_sync(gmfn) ) |
2882 | 0 | { |
2883 | 0 | fast_emul = 0; |
2884 | 0 | v->arch.paging.last_write_emul_ok = 0; |
2885 | 0 | goto page_fault_slow_path; |
2886 | 0 | } |
2887 | 0 | #endif /* OOS */ |
2888 | 0 |
|
2889 | 0 | perfc_incr(shadow_fault_fast_emulate); |
2890 | 0 | goto early_emulation; |
2891 | 0 | } |
2892 | 0 | else |
2893 | 0 | v->arch.paging.last_write_emul_ok = 0; |
2894 | 0 | } |
2895 | 0 | #endif |
2896 | 0 |
|
2897 | 0 | // |
2898 | 0 | // XXX: Need to think about eventually mapping superpages directly in the |
2899 | 0 | // shadow (when possible), as opposed to splintering them into a |
2900 | 0 | // bunch of 4K maps. |
2901 | 0 | // |
2902 | 0 |
|
2903 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) |
2904 | 0 | if ( (regs->error_code & PFEC_reserved_bit) ) |
2905 | 0 | { |
2906 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2907 | 0 | /* First, need to check that this isn't an out-of-sync |
2908 | 0 | * shadow l1e. If it is, we fall back to the slow path, which |
2909 | 0 | * will sync it up again. */ |
2910 | 0 | { |
2911 | 0 | shadow_l2e_t sl2e; |
2912 | 0 | mfn_t gl1mfn; |
2913 | 0 | if ( (__copy_from_user(&sl2e, |
2914 | 0 | (sh_linear_l2_table(v) |
2915 | 0 | + shadow_l2_linear_offset(va)), |
2916 | 0 | sizeof(sl2e)) != 0) |
2917 | 0 | || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) |
2918 | 0 | || !mfn_valid(gl1mfn = backpointer(mfn_to_page( |
2919 | 0 | shadow_l2e_get_mfn(sl2e)))) |
2920 | 0 | || unlikely(mfn_is_out_of_sync(gl1mfn)) ) |
2921 | 0 | { |
2922 | 0 | /* Hit the slow path as if there had been no |
2923 | 0 | * shadow entry at all, and let it tidy up */ |
2924 | 0 | ASSERT(regs->error_code & PFEC_page_present); |
2925 | 0 | regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present); |
2926 | 0 | goto page_fault_slow_path; |
2927 | 0 | } |
2928 | 0 | } |
2929 | 0 | #endif /* SHOPT_OUT_OF_SYNC */ |
2930 | 0 | /* The only reasons for reserved bits to be set in shadow entries |
2931 | 0 | * are the two "magic" shadow_l1e entries. */ |
2932 | 0 | if ( likely((__copy_from_user(&sl1e, |
2933 | 0 | (sh_linear_l1_table(v) |
2934 | 0 | + shadow_l1_linear_offset(va)), |
2935 | 0 | sizeof(sl1e)) == 0) |
2936 | 0 | && sh_l1e_is_magic(sl1e)) ) |
2937 | 0 | { |
2938 | 0 |
|
2939 | 0 | if ( sh_l1e_is_gnp(sl1e) ) |
2940 | 0 | { |
2941 | 0 | /* Not-present in a guest PT: pass to the guest as |
2942 | 0 | * a not-present fault (by flipping two bits). */ |
2943 | 0 | ASSERT(regs->error_code & PFEC_page_present); |
2944 | 0 | regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present); |
2945 | 0 | reset_early_unshadow(v); |
2946 | 0 | perfc_incr(shadow_fault_fast_gnp); |
2947 | 0 | SHADOW_PRINTK("fast path not-present\n"); |
2948 | 0 | trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va); |
2949 | 0 | return 0; |
2950 | 0 | } |
2951 | 0 | else |
2952 | 0 | { |
2953 | 0 | /* Magic MMIO marker: extract gfn for MMIO address */ |
2954 | 0 | ASSERT(sh_l1e_is_mmio(sl1e)); |
2955 | 0 | gpa = (((paddr_t)(gfn_x(sh_l1e_mmio_get_gfn(sl1e)))) |
2956 | 0 | << PAGE_SHIFT) |
2957 | 0 | | (va & ~PAGE_MASK); |
2958 | 0 | } |
2959 | 0 | perfc_incr(shadow_fault_fast_mmio); |
2960 | 0 | SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); |
2961 | 0 | reset_early_unshadow(v); |
2962 | 0 | trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va); |
2963 | 0 | return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access) |
2964 | 0 | ? EXCRET_fault_fixed : 0); |
2965 | 0 | } |
2966 | 0 | else |
2967 | 0 | { |
2968 | 0 | /* This should be exceptionally rare: another vcpu has fixed |
2969 | 0 | * the tables between the fault and our reading the l1e. |
2970 | 0 | * Retry and let the hardware give us the right fault next time. */ |
2971 | 0 | perfc_incr(shadow_fault_fast_fail); |
2972 | 0 | SHADOW_PRINTK("fast path false alarm!\n"); |
2973 | 0 | trace_shadow_gen(TRC_SHADOW_FALSE_FAST_PATH, va); |
2974 | 0 | return EXCRET_fault_fixed; |
2975 | 0 | } |
2976 | 0 | } |
2977 | 0 |
|
2978 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
2979 | 0 | page_fault_slow_path: |
2980 | 0 | #endif |
2981 | 0 | #endif /* SHOPT_FAST_FAULT_PATH */ |
2982 | 0 |
|
2983 | 0 | /* Detect if this page fault happened while we were already in Xen |
2984 | 0 | * doing a shadow operation. If that happens, the only thing we can |
2985 | 0 | * do is let Xen's normal fault handlers try to fix it. In any case, |
2986 | 0 | * a diagnostic trace of the fault will be more useful than |
2987 | 0 | * a BUG() when we try to take the lock again. */ |
2988 | 0 | if ( unlikely(paging_locked_by_me(d)) ) |
2989 | 0 | { |
2990 | 0 | SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n", |
2991 | 0 | d->arch.paging.lock.locker_function); |
2992 | 0 | return 0; |
2993 | 0 | } |
2994 | 0 |
|
2995 | 0 | rewalk: |
2996 | 0 |
|
2997 | 0 | error_code = regs->error_code; |
2998 | 0 |
|
2999 | 0 | /* |
3000 | 0 | * When CR4.SMAP is enabled, instructions which have a side effect of |
3001 | 0 | * accessing the system data structures (e.g. mov to %ds accessing the |
3002 | 0 | * LDT/GDT, or int $n accessing the IDT) are known as implicit supervisor |
3003 | 0 | * accesses. |
3004 | 0 | * |
3005 | 0 | * The distinction between implicit and explicit accesses form part of the |
3006 | 0 | * determination of access rights, controlling whether the access is |
3007 | 0 | * successful, or raises a #PF. |
3008 | 0 | * |
3009 | 0 | * Unfortunately, the processor throws away the implicit/explicit |
3010 | 0 | * distinction and does not provide it to the pagefault handler |
3011 | 0 | * (i.e. here.) in the #PF error code. Therefore, we must try to |
3012 | 0 | * reconstruct the lost state so it can be fed back into our pagewalk |
3013 | 0 | * through the guest tables. |
3014 | 0 | * |
3015 | 0 | * User mode accesses are easy to reconstruct: |
3016 | 0 | * |
3017 | 0 | * If we observe a cpl3 data fetch which was a supervisor walk, this |
3018 | 0 | * must have been an implicit access to a system table. |
3019 | 0 | * |
3020 | 0 | * Supervisor mode accesses are not easy: |
3021 | 0 | * |
3022 | 0 | * In principle, we could decode the instruction under %rip and have the |
3023 | 0 | * instruction emulator tell us if there is an implicit access. |
3024 | 0 | * However, this is racy with other vcpus updating the pagetable or |
3025 | 0 | * rewriting the instruction stream under our feet. |
3026 | 0 | * |
3027 | 0 | * Therefore, we do nothing. (If anyone has a sensible suggestion for |
3028 | 0 | * how to distinguish these cases, xen-devel@ is all ears...) |
3029 | 0 | * |
3030 | 0 | * As a result, one specific corner case will fail. If a guest OS with |
3031 | 0 | * SMAP enabled ends up mapping a system table with user mappings, sets |
3032 | 0 | * EFLAGS.AC to allow explicit accesses to user mappings, and implicitly |
3033 | 0 | * accesses the user mapping, hardware and the shadow code will disagree |
3034 | 0 | * on whether a #PF should be raised. |
3035 | 0 | * |
3036 | 0 | * Hardware raises #PF because implicit supervisor accesses to user |
3037 | 0 | * mappings are strictly disallowed. As we can't reconstruct the correct |
3038 | 0 | * input, the pagewalk is performed as if it were an explicit access, |
3039 | 0 | * which concludes that the access should have succeeded and the shadow |
3040 | 0 | * pagetables need modifying. The shadow pagetables are modified (to the |
3041 | 0 | * same value), and we re-enter the guest to re-execute the instruction, |
3042 | 0 | * which causes another #PF, and the vcpu livelocks, unable to make |
3043 | 0 | * forward progress. |
3044 | 0 | * |
3045 | 0 | * In practice, this is tolerable. No production OS will deliberately |
3046 | 0 | * construct this corner case (as doing so would mean that a system table |
3047 | 0 | * is directly accessable to userspace, and the OS is trivially rootable.) |
3048 | 0 | * If this corner case comes about accidentally, then a security-relevant |
3049 | 0 | * bug has been tickled. |
3050 | 0 | */ |
3051 | 0 | if ( !(error_code & (PFEC_insn_fetch|PFEC_user_mode)) && |
3052 | 0 | (is_pv_vcpu(v) ? (regs->ss & 3) : hvm_get_cpl(v)) == 3 ) |
3053 | 0 | error_code |= PFEC_implicit; |
3054 | 0 |
|
3055 | 0 | /* The walk is done in a lock-free style, with some sanity check |
3056 | 0 | * postponed after grabbing paging lock later. Those delayed checks |
3057 | 0 | * will make sure no inconsistent mapping being translated into |
3058 | 0 | * shadow page table. */ |
3059 | 0 | version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version); |
3060 | 0 | smp_rmb(); |
3061 | 0 | walk_ok = sh_walk_guest_tables(v, va, &gw, error_code); |
3062 | 0 |
|
3063 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
3064 | 0 | regs->error_code &= ~PFEC_page_present; |
3065 | 0 | if ( gw.pfec & PFEC_page_present ) |
3066 | 0 | regs->error_code |= PFEC_page_present; |
3067 | 0 | #endif |
3068 | 0 |
|
3069 | 0 | if ( !walk_ok ) |
3070 | 0 | { |
3071 | 0 | perfc_incr(shadow_fault_bail_real_fault); |
3072 | 0 | SHADOW_PRINTK("not a shadow fault\n"); |
3073 | 0 | reset_early_unshadow(v); |
3074 | 0 | regs->error_code = gw.pfec & PFEC_arch_mask; |
3075 | 0 | goto propagate; |
3076 | 0 | } |
3077 | 0 |
|
3078 | 0 | /* It's possible that the guest has put pagetables in memory that it has |
3079 | 0 | * already used for some special purpose (ioreq pages, or granted pages). |
3080 | 0 | * If that happens we'll have killed the guest already but it's still not |
3081 | 0 | * safe to propagate entries out of the guest PT so get out now. */ |
3082 | 0 | if ( unlikely(d->is_shutting_down && d->shutdown_code == SHUTDOWN_crash) ) |
3083 | 0 | { |
3084 | 0 | SHADOW_PRINTK("guest is shutting down\n"); |
3085 | 0 | goto propagate; |
3086 | 0 | } |
3087 | 0 |
|
3088 | 0 | /* What mfn is the guest trying to access? */ |
3089 | 0 | gfn = guest_walk_to_gfn(&gw); |
3090 | 0 | gmfn = get_gfn(d, gfn, &p2mt); |
3091 | 0 |
|
3092 | 0 | if ( shadow_mode_refcounts(d) && |
3093 | 0 | ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) || |
3094 | 0 | (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn))) ) |
3095 | 0 | { |
3096 | 0 | perfc_incr(shadow_fault_bail_bad_gfn); |
3097 | 0 | SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", |
3098 | 0 | gfn_x(gfn), mfn_x(gmfn)); |
3099 | 0 | reset_early_unshadow(v); |
3100 | 0 | put_gfn(d, gfn_x(gfn)); |
3101 | 0 | goto propagate; |
3102 | 0 | } |
3103 | 0 |
|
3104 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) |
3105 | 0 | /* Remember this successful VA->GFN translation for later. */ |
3106 | 0 | vtlb_insert(v, va >> PAGE_SHIFT, gfn_x(gfn), |
3107 | 0 | regs->error_code | PFEC_page_present); |
3108 | 0 | #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ |
3109 | 0 |
|
3110 | 0 | paging_lock(d); |
3111 | 0 |
|
3112 | 0 | TRACE_CLEAR_PATH_FLAGS; |
3113 | 0 |
|
3114 | 0 | /* Make sure there is enough free shadow memory to build a chain of |
3115 | 0 | * shadow tables. (We never allocate a top-level shadow on this path, |
3116 | 0 | * only a 32b l1, pae l1, or 64b l3+2+1. Note that while |
3117 | 0 | * SH_type_l1_shadow isn't correct in the latter case, all page |
3118 | 0 | * tables are the same size there.) |
3119 | 0 | * |
3120 | 0 | * Preallocate shadow pages *before* removing writable accesses |
3121 | 0 | * otherwhise an OOS L1 might be demoted and promoted again with |
3122 | 0 | * writable mappings. */ |
3123 | 0 | shadow_prealloc(d, |
3124 | 0 | SH_type_l1_shadow, |
3125 | 0 | GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1); |
3126 | 0 |
|
3127 | 0 | rc = gw_remove_write_accesses(v, va, &gw); |
3128 | 0 |
|
3129 | 0 | /* First bit set: Removed write access to a page. */ |
3130 | 0 | if ( rc & GW_RMWR_FLUSHTLB ) |
3131 | 0 | { |
3132 | 0 | /* Write permission removal is also a hint that other gwalks |
3133 | 0 | * overlapping with this one may be inconsistent |
3134 | 0 | */ |
3135 | 0 | perfc_incr(shadow_rm_write_flush_tlb); |
3136 | 0 | smp_wmb(); |
3137 | 0 | atomic_inc(&d->arch.paging.shadow.gtable_dirty_version); |
3138 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
3139 | 0 | } |
3140 | 0 |
|
3141 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
3142 | 0 | /* Second bit set: Resynced a page. Re-walk needed. */ |
3143 | 0 | if ( rc & GW_RMWR_REWALK ) |
3144 | 0 | { |
3145 | 0 | paging_unlock(d); |
3146 | 0 | put_gfn(d, gfn_x(gfn)); |
3147 | 0 | goto rewalk; |
3148 | 0 | } |
3149 | 0 | #endif /* OOS */ |
3150 | 0 |
|
3151 | 0 | if ( !shadow_check_gwalk(v, va, &gw, version) ) |
3152 | 0 | { |
3153 | 0 | perfc_incr(shadow_inconsistent_gwalk); |
3154 | 0 | paging_unlock(d); |
3155 | 0 | put_gfn(d, gfn_x(gfn)); |
3156 | 0 | goto rewalk; |
3157 | 0 | } |
3158 | 0 |
|
3159 | 0 | shadow_audit_tables(v); |
3160 | 0 | sh_audit_gw(v, &gw); |
3161 | 0 |
|
3162 | 0 | /* Acquire the shadow. This must happen before we figure out the rights |
3163 | 0 | * for the shadow entry, since we might promote a page here. */ |
3164 | 0 | ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft); |
3165 | 0 | if ( unlikely(ptr_sl1e == NULL) ) |
3166 | 0 | { |
3167 | 0 | /* Couldn't get the sl1e! Since we know the guest entries |
3168 | 0 | * are OK, this can only have been caused by a failed |
3169 | 0 | * shadow_set_l*e(), which will have crashed the guest. |
3170 | 0 | * Get out of the fault handler immediately. */ |
3171 | 0 | /* Windows 7 apparently relies on the hardware to do something |
3172 | 0 | * it explicitly hasn't promised to do: load l3 values after |
3173 | 0 | * the cr3 is loaded. |
3174 | 0 | * In any case, in the PAE case, the ASSERT is not true; it can |
3175 | 0 | * happen because of actions the guest is taking. */ |
3176 | 0 | #if GUEST_PAGING_LEVELS == 3 |
3177 | 0 | v->arch.paging.mode->update_cr3(v, 0); |
3178 | 0 | #else |
3179 | 0 | ASSERT(d->is_shutting_down); |
3180 | 0 | #endif |
3181 | 0 | paging_unlock(d); |
3182 | 0 | put_gfn(d, gfn_x(gfn)); |
3183 | 0 | trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va); |
3184 | 0 | return 0; |
3185 | 0 | } |
3186 | 0 |
|
3187 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
3188 | 0 | /* Always unsync when writing to L1 page tables. */ |
3189 | 0 | if ( sh_mfn_is_a_page_table(gmfn) |
3190 | 0 | && ft == ft_demand_write ) |
3191 | 0 | sh_unsync(v, gmfn); |
3192 | 0 |
|
3193 | 0 | if ( unlikely(d->is_shutting_down && d->shutdown_code == SHUTDOWN_crash) ) |
3194 | 0 | { |
3195 | 0 | /* We might end up with a crashed domain here if |
3196 | 0 | * sh_remove_shadows() in a previous sh_resync() call has |
3197 | 0 | * failed. We cannot safely continue since some page is still |
3198 | 0 | * OOS but not in the hash table anymore. */ |
3199 | 0 | paging_unlock(d); |
3200 | 0 | put_gfn(d, gfn_x(gfn)); |
3201 | 0 | return 0; |
3202 | 0 | } |
3203 | 0 |
|
3204 | 0 | /* Final check: if someone has synced a page, it's possible that |
3205 | 0 | * our l1e is stale. Compare the entries, and rewalk if necessary. */ |
3206 | 0 | if ( shadow_check_gl1e(v, &gw) ) |
3207 | 0 | { |
3208 | 0 | perfc_incr(shadow_inconsistent_gwalk); |
3209 | 0 | paging_unlock(d); |
3210 | 0 | put_gfn(d, gfn_x(gfn)); |
3211 | 0 | goto rewalk; |
3212 | 0 | } |
3213 | 0 | #endif /* OOS */ |
3214 | 0 |
|
3215 | 0 | /* Calculate the shadow entry and write it */ |
3216 | 0 | l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt); |
3217 | 0 | r = shadow_set_l1e(d, ptr_sl1e, sl1e, p2mt, sl1mfn); |
3218 | 0 |
|
3219 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
3220 | 0 | if ( mfn_valid(gw.l1mfn) |
3221 | 0 | && mfn_is_out_of_sync(gw.l1mfn) ) |
3222 | 0 | { |
3223 | 0 | /* Update the OOS snapshot. */ |
3224 | 0 | mfn_t snpmfn = oos_snapshot_lookup(d, gw.l1mfn); |
3225 | 0 | guest_l1e_t *snp; |
3226 | 0 |
|
3227 | 0 | ASSERT(mfn_valid(snpmfn)); |
3228 | 0 |
|
3229 | 0 | snp = map_domain_page(snpmfn); |
3230 | 0 | snp[guest_l1_table_offset(va)] = gw.l1e; |
3231 | 0 | unmap_domain_page(snp); |
3232 | 0 | } |
3233 | 0 | #endif /* OOS */ |
3234 | 0 |
|
3235 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH |
3236 | 0 | /* Prefetch some more shadow entries */ |
3237 | 0 | sh_prefetch(v, &gw, ptr_sl1e, sl1mfn); |
3238 | 0 | #endif |
3239 | 0 |
|
3240 | 0 | /* Need to emulate accesses to page tables */ |
3241 | 0 | if ( sh_mfn_is_a_page_table(gmfn) |
3242 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
3243 | 0 | /* Unless they've been allowed to go out of sync with their |
3244 | 0 | shadows and we don't need to unshadow it. */ |
3245 | 0 | && !(mfn_is_out_of_sync(gmfn) |
3246 | 0 | && !(regs->error_code & PFEC_user_mode)) |
3247 | 0 | #endif |
3248 | 0 | && (ft == ft_demand_write) ) |
3249 | 0 | { |
3250 | 0 | perfc_incr(shadow_fault_emulate_write); |
3251 | 0 | goto emulate; |
3252 | 0 | } |
3253 | 0 |
|
3254 | 0 | /* Need to hand off device-model MMIO to the device model */ |
3255 | 0 | if ( p2mt == p2m_mmio_dm ) |
3256 | 0 | { |
3257 | 0 | gpa = guest_walk_to_gpa(&gw); |
3258 | 0 | goto mmio; |
3259 | 0 | } |
3260 | 0 |
|
3261 | 0 | /* Ignore attempts to write to read-only memory. */ |
3262 | 0 | if ( p2m_is_readonly(p2mt) && (ft == ft_demand_write) ) |
3263 | 0 | { |
3264 | 0 | static unsigned long lastpage; |
3265 | 0 | if ( xchg(&lastpage, va & PAGE_MASK) != (va & PAGE_MASK) ) |
3266 | 0 | gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory" |
3267 | 0 | " page. va page=%#lx, mfn=%#lx\n", |
3268 | 0 | va & PAGE_MASK, mfn_x(gmfn)); |
3269 | 0 | goto emulate_readonly; /* skip over the instruction */ |
3270 | 0 | } |
3271 | 0 |
|
3272 | 0 | /* In HVM guests, we force CR0.WP always to be set, so that the |
3273 | 0 | * pagetables are always write-protected. If the guest thinks |
3274 | 0 | * CR0.WP is clear, we must emulate faulting supervisor writes to |
3275 | 0 | * allow the guest to write through read-only PTEs. Emulate if the |
3276 | 0 | * fault was a non-user write to a present page. */ |
3277 | 0 | if ( is_hvm_domain(d) |
3278 | 0 | && unlikely(!hvm_wp_enabled(v)) |
3279 | 0 | && regs->error_code == (PFEC_write_access|PFEC_page_present) |
3280 | 0 | && mfn_valid(gmfn) ) |
3281 | 0 | { |
3282 | 0 | perfc_incr(shadow_fault_emulate_wp); |
3283 | 0 | goto emulate; |
3284 | 0 | } |
3285 | 0 |
|
3286 | 0 | perfc_incr(shadow_fault_fixed); |
3287 | 0 | d->arch.paging.log_dirty.fault_count++; |
3288 | 0 | reset_early_unshadow(v); |
3289 | 0 |
|
3290 | 0 | trace_shadow_fixup(gw.l1e, va); |
3291 | 0 | done: |
3292 | 0 | sh_audit_gw(v, &gw); |
3293 | 0 | SHADOW_PRINTK("fixed\n"); |
3294 | 0 | shadow_audit_tables(v); |
3295 | 0 | paging_unlock(d); |
3296 | 0 | put_gfn(d, gfn_x(gfn)); |
3297 | 0 | return EXCRET_fault_fixed; |
3298 | 0 |
|
3299 | 0 | emulate: |
3300 | 0 | if ( !shadow_mode_refcounts(d) || !guest_mode(regs) ) |
3301 | 0 | goto not_a_shadow_fault; |
3302 | 0 |
|
3303 | 0 | /* |
3304 | 0 | * We do not emulate user writes. Instead we use them as a hint that the |
3305 | 0 | * page is no longer a page table. This behaviour differs from native, but |
3306 | 0 | * it seems very unlikely that any OS grants user access to page tables. |
3307 | 0 | */ |
3308 | 0 | if ( (regs->error_code & PFEC_user_mode) ) |
3309 | 0 | { |
3310 | 0 | SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n", |
3311 | 0 | mfn_x(gmfn)); |
3312 | 0 | perfc_incr(shadow_fault_emulate_failed); |
3313 | 0 | sh_remove_shadows(d, gmfn, 0 /* thorough */, 1 /* must succeed */); |
3314 | 0 | trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_USER, |
3315 | 0 | va, gfn); |
3316 | 0 | goto done; |
3317 | 0 | } |
3318 | 0 |
|
3319 | 0 | /* |
3320 | 0 | * Write from userspace to ro-mem needs to jump here to avoid getting |
3321 | 0 | * caught by user-mode page-table check above. |
3322 | 0 | */ |
3323 | 0 | emulate_readonly: |
3324 | 0 |
|
3325 | 0 | /* Unshadow if we are writing to a toplevel pagetable that is |
3326 | 0 | * flagged as a dying process, and that is not currently used. */ |
3327 | 0 | if ( sh_mfn_is_a_page_table(gmfn) |
3328 | 0 | && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) ) |
3329 | 0 | { |
3330 | 0 | int used = 0; |
3331 | 0 | struct vcpu *tmp; |
3332 | 0 | for_each_vcpu(d, tmp) |
3333 | 0 | { |
3334 | 0 | #if GUEST_PAGING_LEVELS == 3 |
3335 | 0 | int i; |
3336 | 0 | for ( i = 0; i < 4; i++ ) |
3337 | 0 | { |
3338 | 0 | mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[i]); |
3339 | 0 |
|
3340 | 0 | if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) ) |
3341 | 0 | { |
3342 | 0 | used |= (mfn_to_page(smfn)->v.sh.back == mfn_x(gmfn)); |
3343 | 0 |
|
3344 | 0 | if ( used ) |
3345 | 0 | break; |
3346 | 0 | } |
3347 | 0 | } |
3348 | 0 | #else /* 32 or 64 */ |
3349 | 0 | used = mfn_eq(pagetable_get_mfn(tmp->arch.guest_table), gmfn); |
3350 | 0 | #endif |
3351 | 0 | if ( used ) |
3352 | 0 | break; |
3353 | 0 | } |
3354 | 0 |
|
3355 | 0 | if ( !used ) |
3356 | 0 | sh_remove_shadows(d, gmfn, 1 /* fast */, 0 /* can fail */); |
3357 | 0 | } |
3358 | 0 |
|
3359 | 0 | /* |
3360 | 0 | * We don't need to hold the lock for the whole emulation; we will |
3361 | 0 | * take it again when we write to the pagetables. |
3362 | 0 | */ |
3363 | 0 | sh_audit_gw(v, &gw); |
3364 | 0 | shadow_audit_tables(v); |
3365 | 0 | paging_unlock(d); |
3366 | 0 | put_gfn(d, gfn_x(gfn)); |
3367 | 0 |
|
3368 | 0 | this_cpu(trace_emulate_write_val) = 0; |
3369 | 0 |
|
3370 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
3371 | 0 | early_emulation: |
3372 | 0 | #endif |
3373 | 0 | if ( is_hvm_domain(d) ) |
3374 | 0 | { |
3375 | 0 | /* |
3376 | 0 | * If we are in the middle of injecting an exception or interrupt then |
3377 | 0 | * we should not emulate: it is not the instruction at %eip that caused |
3378 | 0 | * the fault. Furthermore it is almost certainly the case the handler |
3379 | 0 | * stack is currently considered to be a page table, so we should |
3380 | 0 | * unshadow the faulting page before exiting. |
3381 | 0 | */ |
3382 | 0 | if ( unlikely(hvm_event_pending(v)) ) |
3383 | 0 | { |
3384 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
3385 | 0 | if ( fast_emul ) |
3386 | 0 | { |
3387 | 0 | perfc_incr(shadow_fault_fast_emulate_fail); |
3388 | 0 | v->arch.paging.last_write_emul_ok = 0; |
3389 | 0 | } |
3390 | 0 | #endif |
3391 | 0 | gdprintk(XENLOG_DEBUG, "write to pagetable during event " |
3392 | 0 | "injection: cr2=%#lx, mfn=%#lx\n", |
3393 | 0 | va, mfn_x(gmfn)); |
3394 | 0 | sh_remove_shadows(d, gmfn, 0 /* thorough */, 1 /* must succeed */); |
3395 | 0 | trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ, |
3396 | 0 | va, gfn); |
3397 | 0 | return EXCRET_fault_fixed; |
3398 | 0 | } |
3399 | 0 | } |
3400 | 0 |
|
3401 | 0 | SHADOW_PRINTK("emulate: eip=%#lx esp=%#lx\n", regs->rip, regs->rsp); |
3402 | 0 |
|
3403 | 0 | emul_ops = shadow_init_emulation(&emul_ctxt, regs); |
3404 | 0 |
|
3405 | 0 | r = x86_emulate(&emul_ctxt.ctxt, emul_ops); |
3406 | 0 |
|
3407 | 0 | if ( r == X86EMUL_EXCEPTION ) |
3408 | 0 | { |
3409 | 0 | /* |
3410 | 0 | * This emulation covers writes to shadow pagetables. We tolerate #PF |
3411 | 0 | * (from accesses spanning pages, concurrent paging updated from |
3412 | 0 | * vcpus, etc) and #GP[0]/#SS[0] (from segmentation errors). Anything |
3413 | 0 | * else is an emulation bug, or a guest playing with the instruction |
3414 | 0 | * stream under Xen's feet. |
3415 | 0 | */ |
3416 | 0 | if ( emul_ctxt.ctxt.event.type == X86_EVENTTYPE_HW_EXCEPTION && |
3417 | 0 | ((emul_ctxt.ctxt.event.vector == TRAP_page_fault) || |
3418 | 0 | (((emul_ctxt.ctxt.event.vector == TRAP_gp_fault) || |
3419 | 0 | (emul_ctxt.ctxt.event.vector == TRAP_stack_error)) && |
3420 | 0 | emul_ctxt.ctxt.event.error_code == 0)) ) |
3421 | 0 | hvm_inject_event(&emul_ctxt.ctxt.event); |
3422 | 0 | else |
3423 | 0 | { |
3424 | 0 | SHADOW_PRINTK( |
3425 | 0 | "Unexpected event (type %u, vector %#x) from emulation\n", |
3426 | 0 | emul_ctxt.ctxt.event.type, emul_ctxt.ctxt.event.vector); |
3427 | 0 | r = X86EMUL_UNHANDLEABLE; |
3428 | 0 | } |
3429 | 0 | } |
3430 | 0 |
|
3431 | 0 | /* |
3432 | 0 | * NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it |
3433 | 0 | * would be a good unshadow hint. If we *do* decide to unshadow-on-fault |
3434 | 0 | * then it must be 'failable': we cannot require the unshadow to succeed. |
3435 | 0 | */ |
3436 | 0 | if ( r == X86EMUL_UNHANDLEABLE || r == X86EMUL_UNIMPLEMENTED ) |
3437 | 0 | { |
3438 | 0 | perfc_incr(shadow_fault_emulate_failed); |
3439 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
3440 | 0 | if ( fast_emul ) |
3441 | 0 | { |
3442 | 0 | perfc_incr(shadow_fault_fast_emulate_fail); |
3443 | 0 | v->arch.paging.last_write_emul_ok = 0; |
3444 | 0 | } |
3445 | 0 | #endif |
3446 | 0 | SHADOW_PRINTK("emulator failure (rc=%d), unshadowing mfn %#lx\n", |
3447 | 0 | r, mfn_x(gmfn)); |
3448 | 0 | /* If this is actually a page table, then we have a bug, and need |
3449 | 0 | * to support more operations in the emulator. More likely, |
3450 | 0 | * though, this is a hint that this page should not be shadowed. */ |
3451 | 0 | shadow_remove_all_shadows(d, gmfn); |
3452 | 0 |
|
3453 | 0 | trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED, |
3454 | 0 | va, gfn); |
3455 | 0 | goto emulate_done; |
3456 | 0 | } |
3457 | 0 |
|
3458 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
3459 | 0 | /* Record successfully emulated information as heuristics to next |
3460 | 0 | * fault on same frame for acceleration. But be careful to verify |
3461 | 0 | * its attribute still as page table, or else unshadow triggered |
3462 | 0 | * in write emulation normally requires a re-sync with guest page |
3463 | 0 | * table to recover r/w permission. Incorrect record for such case |
3464 | 0 | * will cause unexpected more shadow faults due to propagation is |
3465 | 0 | * skipped. |
3466 | 0 | */ |
3467 | 0 | if ( (r == X86EMUL_OKAY) && sh_mfn_is_a_page_table(gmfn) ) |
3468 | 0 | { |
3469 | 0 | if ( !fast_emul ) |
3470 | 0 | { |
3471 | 0 | v->arch.paging.shadow.last_emulated_frame = va >> PAGE_SHIFT; |
3472 | 0 | v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn); |
3473 | 0 | v->arch.paging.last_write_emul_ok = 1; |
3474 | 0 | } |
3475 | 0 | } |
3476 | 0 | else if ( fast_emul ) |
3477 | 0 | v->arch.paging.last_write_emul_ok = 0; |
3478 | 0 | #endif |
3479 | 0 |
|
3480 | 0 | if ( emul_ctxt.ctxt.retire.singlestep ) |
3481 | 0 | hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); |
3482 | 0 |
|
3483 | 0 | #if GUEST_PAGING_LEVELS == 3 /* PAE guest */ |
3484 | 0 | /* |
3485 | 0 | * If there are no pending actions, emulate up to four extra instructions |
3486 | 0 | * in the hope of catching the "second half" of a 64-bit pagetable write. |
3487 | 0 | */ |
3488 | 0 | if ( r == X86EMUL_OKAY && !emul_ctxt.ctxt.retire.raw ) |
3489 | 0 | { |
3490 | 0 | int i, emulation_count=0; |
3491 | 0 | this_cpu(trace_emulate_initial_va) = va; |
3492 | 0 |
|
3493 | 0 | for ( i = 0 ; i < 4 ; i++ ) |
3494 | 0 | { |
3495 | 0 | shadow_continue_emulation(&emul_ctxt, regs); |
3496 | 0 | v->arch.paging.last_write_was_pt = 0; |
3497 | 0 | r = x86_emulate(&emul_ctxt.ctxt, emul_ops); |
3498 | 0 |
|
3499 | 0 | /* |
3500 | 0 | * Only continue the search for the second half if there are no |
3501 | 0 | * exceptions or pending actions. Otherwise, give up and re-enter |
3502 | 0 | * the guest. |
3503 | 0 | */ |
3504 | 0 | if ( r == X86EMUL_OKAY && !emul_ctxt.ctxt.retire.raw ) |
3505 | 0 | { |
3506 | 0 | emulation_count++; |
3507 | 0 | if ( v->arch.paging.last_write_was_pt ) |
3508 | 0 | { |
3509 | 0 | perfc_incr(shadow_em_ex_pt); |
3510 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN); |
3511 | 0 | break; /* Don't emulate past the other half of the write */ |
3512 | 0 | } |
3513 | 0 | else |
3514 | 0 | perfc_incr(shadow_em_ex_non_pt); |
3515 | 0 | } |
3516 | 0 | else |
3517 | 0 | { |
3518 | 0 | perfc_incr(shadow_em_ex_fail); |
3519 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_LAST_FAILED); |
3520 | 0 |
|
3521 | 0 | if ( emul_ctxt.ctxt.retire.singlestep ) |
3522 | 0 | hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); |
3523 | 0 |
|
3524 | 0 | break; /* Don't emulate again if we failed! */ |
3525 | 0 | } |
3526 | 0 | } |
3527 | 0 | this_cpu(trace_extra_emulation_count)=emulation_count; |
3528 | 0 | } |
3529 | 0 | #endif /* PAE guest */ |
3530 | 0 |
|
3531 | 0 | trace_shadow_emulate(gw.l1e, va); |
3532 | 0 | emulate_done: |
3533 | 0 | SHADOW_PRINTK("emulated\n"); |
3534 | 0 | return EXCRET_fault_fixed; |
3535 | 0 |
|
3536 | 0 | mmio: |
3537 | 0 | if ( !guest_mode(regs) ) |
3538 | 0 | goto not_a_shadow_fault; |
3539 | 0 | perfc_incr(shadow_fault_mmio); |
3540 | 0 | sh_audit_gw(v, &gw); |
3541 | 0 | SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa); |
3542 | 0 | shadow_audit_tables(v); |
3543 | 0 | reset_early_unshadow(v); |
3544 | 0 | paging_unlock(d); |
3545 | 0 | put_gfn(d, gfn_x(gfn)); |
3546 | 0 | trace_shadow_gen(TRC_SHADOW_MMIO, va); |
3547 | 0 | return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access) |
3548 | 0 | ? EXCRET_fault_fixed : 0); |
3549 | 0 |
|
3550 | 0 | not_a_shadow_fault: |
3551 | 0 | sh_audit_gw(v, &gw); |
3552 | 0 | SHADOW_PRINTK("not a shadow fault\n"); |
3553 | 0 | shadow_audit_tables(v); |
3554 | 0 | reset_early_unshadow(v); |
3555 | 0 | paging_unlock(d); |
3556 | 0 | put_gfn(d, gfn_x(gfn)); |
3557 | 0 |
|
3558 | 0 | propagate: |
3559 | 0 | trace_not_shadow_fault(gw.l1e, va); |
3560 | 0 |
|
3561 | 0 | return 0; |
3562 | 0 | } Unexecuted instantiation: multi.c:sh_page_fault__guest_3 Unexecuted instantiation: multi.c:sh_page_fault__guest_4 Unexecuted instantiation: multi.c:sh_page_fault__guest_2 |
3563 | 0 |
|
3564 | 0 |
|
3565 | 0 | /* |
3566 | 0 | * Called when the guest requests an invlpg. Returns true if the invlpg |
3567 | 0 | * instruction should be issued on the hardware, or false if it's safe not |
3568 | 0 | * to do so. |
3569 | 0 | */ |
3570 | 0 | static bool sh_invlpg(struct vcpu *v, unsigned long va) |
3571 | 0 | { |
3572 | 0 | mfn_t sl1mfn; |
3573 | 0 | shadow_l2e_t sl2e; |
3574 | 0 |
|
3575 | 0 | perfc_incr(shadow_invlpg); |
3576 | 0 |
|
3577 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) |
3578 | 0 | /* No longer safe to use cached gva->gfn translations */ |
3579 | 0 | vtlb_flush(v); |
3580 | 0 | #endif |
3581 | 0 |
|
3582 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
3583 | 0 | v->arch.paging.last_write_emul_ok = 0; |
3584 | 0 | #endif |
3585 | 0 |
|
3586 | 0 | /* First check that we can safely read the shadow l2e. SMP/PAE linux can |
3587 | 0 | * run as high as 6% of invlpg calls where we haven't shadowed the l2 |
3588 | 0 | * yet. */ |
3589 | 0 | #if SHADOW_PAGING_LEVELS == 4 |
3590 | 0 | { |
3591 | 0 | shadow_l3e_t sl3e; |
3592 | 0 | if ( !(shadow_l4e_get_flags( |
3593 | 0 | sh_linear_l4_table(v)[shadow_l4_linear_offset(va)]) |
3594 | 0 | & _PAGE_PRESENT) ) |
3595 | 0 | return false; |
3596 | 0 | /* This must still be a copy-from-user because we don't have the |
3597 | 0 | * paging lock, and the higher-level shadows might disappear |
3598 | 0 | * under our feet. */ |
3599 | 0 | if ( __copy_from_user(&sl3e, (sh_linear_l3_table(v) |
3600 | 0 | + shadow_l3_linear_offset(va)), |
3601 | 0 | sizeof (sl3e)) != 0 ) |
3602 | 0 | { |
3603 | 0 | perfc_incr(shadow_invlpg_fault); |
3604 | 0 | return false; |
3605 | 0 | } |
3606 | 0 | if ( !(shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) ) |
3607 | 0 | return false; |
3608 | 0 | } |
3609 | 0 | #else /* SHADOW_PAGING_LEVELS == 3 */ |
3610 | 0 | if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)]) |
3611 | 0 | & _PAGE_PRESENT) ) |
3612 | 0 | // no need to flush anything if there's no SL2... |
3613 | 0 | return false; |
3614 | 0 | #endif |
3615 | 0 |
|
3616 | 0 | /* This must still be a copy-from-user because we don't have the shadow |
3617 | 0 | * lock, and the higher-level shadows might disappear under our feet. */ |
3618 | 0 | if ( __copy_from_user(&sl2e, |
3619 | 0 | sh_linear_l2_table(v) + shadow_l2_linear_offset(va), |
3620 | 0 | sizeof (sl2e)) != 0 ) |
3621 | 0 | { |
3622 | 0 | perfc_incr(shadow_invlpg_fault); |
3623 | 0 | return false; |
3624 | 0 | } |
3625 | 0 |
|
3626 | 0 | // If there's nothing shadowed for this particular sl2e, then |
3627 | 0 | // there is no need to do an invlpg, either... |
3628 | 0 | // |
3629 | 0 | if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) ) |
3630 | 0 | return false; |
3631 | 0 |
|
3632 | 0 | // Check to see if the SL2 is a splintered superpage... |
3633 | 0 | // If so, then we'll need to flush the entire TLB (because that's |
3634 | 0 | // easier than invalidating all of the individual 4K pages). |
3635 | 0 | // |
3636 | 0 | sl1mfn = shadow_l2e_get_mfn(sl2e); |
3637 | 0 | if ( mfn_to_page(sl1mfn)->u.sh.type |
3638 | 0 | == SH_type_fl1_shadow ) |
3639 | 0 | { |
3640 | 0 | flush_tlb_local(); |
3641 | 0 | return false; |
3642 | 0 | } |
3643 | 0 |
|
3644 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
3645 | 0 | /* Check to see if the SL1 is out of sync. */ |
3646 | 0 | { |
3647 | 0 | struct domain *d = v->domain; |
3648 | 0 | mfn_t gl1mfn = backpointer(mfn_to_page(sl1mfn)); |
3649 | 0 | struct page_info *pg = mfn_to_page(gl1mfn); |
3650 | 0 | if ( mfn_valid(gl1mfn) |
3651 | 0 | && page_is_out_of_sync(pg) ) |
3652 | 0 | { |
3653 | 0 | /* The test above may give false positives, since we don't |
3654 | 0 | * hold the paging lock yet. Check again with the lock held. */ |
3655 | 0 | paging_lock(d); |
3656 | 0 |
|
3657 | 0 | /* This must still be a copy-from-user because we didn't |
3658 | 0 | * have the paging lock last time we checked, and the |
3659 | 0 | * higher-level shadows might have disappeared under our |
3660 | 0 | * feet. */ |
3661 | 0 | if ( __copy_from_user(&sl2e, |
3662 | 0 | sh_linear_l2_table(v) |
3663 | 0 | + shadow_l2_linear_offset(va), |
3664 | 0 | sizeof (sl2e)) != 0 ) |
3665 | 0 | { |
3666 | 0 | perfc_incr(shadow_invlpg_fault); |
3667 | 0 | paging_unlock(d); |
3668 | 0 | return false; |
3669 | 0 | } |
3670 | 0 |
|
3671 | 0 | if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) ) |
3672 | 0 | { |
3673 | 0 | paging_unlock(d); |
3674 | 0 | return false; |
3675 | 0 | } |
3676 | 0 |
|
3677 | 0 | sl1mfn = shadow_l2e_get_mfn(sl2e); |
3678 | 0 | gl1mfn = backpointer(mfn_to_page(sl1mfn)); |
3679 | 0 | pg = mfn_to_page(gl1mfn); |
3680 | 0 |
|
3681 | 0 | if ( likely(sh_mfn_is_a_page_table(gl1mfn) |
3682 | 0 | && page_is_out_of_sync(pg) ) ) |
3683 | 0 | { |
3684 | 0 | shadow_l1e_t *sl1; |
3685 | 0 | sl1 = sh_linear_l1_table(v) + shadow_l1_linear_offset(va); |
3686 | 0 | /* Remove the shadow entry that maps this VA */ |
3687 | 0 | (void) shadow_set_l1e(d, sl1, shadow_l1e_empty(), |
3688 | 0 | p2m_invalid, sl1mfn); |
3689 | 0 | } |
3690 | 0 | paging_unlock(d); |
3691 | 0 | /* Need the invlpg, to pick up the disappeareance of the sl1e */ |
3692 | 0 | return true; |
3693 | 0 | } |
3694 | 0 | } |
3695 | 0 | #endif |
3696 | 0 |
|
3697 | 0 | return true; |
3698 | 0 | } Unexecuted instantiation: multi.c:sh_invlpg__guest_4 Unexecuted instantiation: multi.c:sh_invlpg__guest_2 Unexecuted instantiation: multi.c:sh_invlpg__guest_3 |
3699 | 0 |
|
3700 | 0 |
|
3701 | 0 | static unsigned long |
3702 | 0 | sh_gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m, |
3703 | 0 | unsigned long va, uint32_t *pfec) |
3704 | 0 | /* Called to translate a guest virtual address to what the *guest* |
3705 | 0 | * pagetables would map it to. */ |
3706 | 0 | { |
3707 | 0 | walk_t gw; |
3708 | 0 | gfn_t gfn; |
3709 | 0 | bool walk_ok; |
3710 | 0 |
|
3711 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) |
3712 | 0 | /* Check the vTLB cache first */ |
3713 | 0 | unsigned long vtlb_gfn = vtlb_lookup(v, va, *pfec); |
3714 | 0 | if ( vtlb_gfn != gfn_x(INVALID_GFN) ) |
3715 | 0 | return vtlb_gfn; |
3716 | 0 | #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ |
3717 | 0 |
|
3718 | 0 | if ( !(walk_ok = sh_walk_guest_tables(v, va, &gw, *pfec)) ) |
3719 | 0 | { |
3720 | 0 | *pfec = gw.pfec; |
3721 | 0 | return gfn_x(INVALID_GFN); |
3722 | 0 | } |
3723 | 0 | gfn = guest_walk_to_gfn(&gw); |
3724 | 0 |
|
3725 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) |
3726 | 0 | /* Remember this successful VA->GFN translation for later. */ |
3727 | 0 | vtlb_insert(v, va >> PAGE_SHIFT, gfn_x(gfn), *pfec); |
3728 | 0 | #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ |
3729 | 0 |
|
3730 | 0 | return gfn_x(gfn); |
3731 | 0 | } Unexecuted instantiation: multi.c:sh_gva_to_gfn__guest_2 Unexecuted instantiation: multi.c:sh_gva_to_gfn__guest_3 Unexecuted instantiation: multi.c:sh_gva_to_gfn__guest_4 |
3732 | 0 |
|
3733 | 0 |
|
3734 | 0 | static inline void |
3735 | 0 | sh_update_linear_entries(struct vcpu *v) |
3736 | 0 | /* Sync up all the linear mappings for this vcpu's pagetables */ |
3737 | 0 | { |
3738 | 0 | struct domain *d = v->domain; |
3739 | 0 |
|
3740 | 0 | /* Linear pagetables in PV guests |
3741 | 0 | * ------------------------------ |
3742 | 0 | * |
3743 | 0 | * Guest linear pagetables, which map the guest pages, are at |
3744 | 0 | * LINEAR_PT_VIRT_START. Shadow linear pagetables, which map the |
3745 | 0 | * shadows, are at SH_LINEAR_PT_VIRT_START. Most of the time these |
3746 | 0 | * are set up at shadow creation time, but (of course!) the PAE case |
3747 | 0 | * is subtler. Normal linear mappings are made by having an entry |
3748 | 0 | * in the top-level table that points to itself (shadow linear) or |
3749 | 0 | * to the guest top-level table (guest linear). For PAE, to set up |
3750 | 0 | * a linear map requires us to copy the four top-level entries into |
3751 | 0 | * level-2 entries. That means that every time we change a PAE l3e, |
3752 | 0 | * we need to reflect the change into the copy. |
3753 | 0 | * |
3754 | 0 | * Linear pagetables in HVM guests |
3755 | 0 | * ------------------------------- |
3756 | 0 | * |
3757 | 0 | * For HVM guests, the linear pagetables are installed in the monitor |
3758 | 0 | * tables (since we can't put them in the shadow). Shadow linear |
3759 | 0 | * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START, |
3760 | 0 | * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for |
3761 | 0 | * a linear pagetable of the monitor tables themselves. We have |
3762 | 0 | * the same issue of having to re-copy PAE l3 entries whevever we use |
3763 | 0 | * PAE shadows. |
3764 | 0 | * |
3765 | 0 | * Because HVM guests run on the same monitor tables regardless of the |
3766 | 0 | * shadow tables in use, the linear mapping of the shadow tables has to |
3767 | 0 | * be updated every time v->arch.shadow_table changes. |
3768 | 0 | */ |
3769 | 0 |
|
3770 | 0 | /* Don't try to update the monitor table if it doesn't exist */ |
3771 | 0 | if ( shadow_mode_external(d) |
3772 | 0 | && pagetable_get_pfn(v->arch.monitor_table) == 0 ) |
3773 | 0 | return; |
3774 | 0 |
|
3775 | 0 | #if SHADOW_PAGING_LEVELS == 4 |
3776 | 0 |
|
3777 | 0 | /* For PV, one l4e points at the guest l4, one points at the shadow |
3778 | 0 | * l4. No maintenance required. |
3779 | 0 | * For HVM, just need to update the l4e that points to the shadow l4. */ |
3780 | 0 |
|
3781 | 0 | if ( shadow_mode_external(d) ) |
3782 | 0 | { |
3783 | 0 | /* Use the linear map if we can; otherwise make a new mapping */ |
3784 | 0 | if ( v == current ) |
3785 | 0 | { |
3786 | 0 | __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = |
3787 | 0 | l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]), |
3788 | 0 | __PAGE_HYPERVISOR_RW); |
3789 | 0 | } |
3790 | 0 | else |
3791 | 0 | { |
3792 | 0 | l4_pgentry_t *ml4e; |
3793 | 0 | ml4e = map_domain_page(pagetable_get_mfn(v->arch.monitor_table)); |
3794 | 0 | ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = |
3795 | 0 | l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]), |
3796 | 0 | __PAGE_HYPERVISOR_RW); |
3797 | 0 | unmap_domain_page(ml4e); |
3798 | 0 | } |
3799 | 0 | } |
3800 | 0 |
|
3801 | 0 | #elif SHADOW_PAGING_LEVELS == 3 |
3802 | 0 |
|
3803 | 0 | /* PV: XXX |
3804 | 0 | * |
3805 | 0 | * HVM: To give ourselves a linear map of the shadows, we need to |
3806 | 0 | * extend a PAE shadow to 4 levels. We do this by having a monitor |
3807 | 0 | * l3 in slot 0 of the monitor l4 table, and copying the PAE l3 |
3808 | 0 | * entries into it. Then, by having the monitor l4e for shadow |
3809 | 0 | * pagetables also point to the monitor l4, we can use it to access |
3810 | 0 | * the shadows. |
3811 | 0 | */ |
3812 | 0 |
|
3813 | 0 | if ( shadow_mode_external(d) ) |
3814 | 0 | { |
3815 | 0 | /* Install copies of the shadow l3es into the monitor l2 table |
3816 | 0 | * that maps SH_LINEAR_PT_VIRT_START. */ |
3817 | 0 | shadow_l3e_t *sl3e; |
3818 | 0 | l2_pgentry_t *ml2e; |
3819 | 0 | int i; |
3820 | 0 |
|
3821 | 0 | /* Use linear mappings if we can; otherwise make new mappings */ |
3822 | 0 | if ( v == current ) |
3823 | 0 | ml2e = __linear_l2_table |
3824 | 0 | + l2_linear_offset(SH_LINEAR_PT_VIRT_START); |
3825 | 0 | else |
3826 | 0 | { |
3827 | 0 | mfn_t l3mfn, l2mfn; |
3828 | 0 | l4_pgentry_t *ml4e; |
3829 | 0 | l3_pgentry_t *ml3e; |
3830 | 0 | int linear_slot = shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); |
3831 | 0 | ml4e = map_domain_page(pagetable_get_mfn(v->arch.monitor_table)); |
3832 | 0 |
|
3833 | 0 | ASSERT(l4e_get_flags(ml4e[linear_slot]) & _PAGE_PRESENT); |
3834 | 0 | l3mfn = l4e_get_mfn(ml4e[linear_slot]); |
3835 | 0 | ml3e = map_domain_page(l3mfn); |
3836 | 0 | unmap_domain_page(ml4e); |
3837 | 0 |
|
3838 | 0 | ASSERT(l3e_get_flags(ml3e[0]) & _PAGE_PRESENT); |
3839 | 0 | l2mfn = l3e_get_mfn(ml3e[0]); |
3840 | 0 | ml2e = map_domain_page(l2mfn); |
3841 | 0 | unmap_domain_page(ml3e); |
3842 | 0 | } |
3843 | 0 |
|
3844 | 0 | /* Shadow l3 tables are made up by sh_update_cr3 */ |
3845 | 0 | sl3e = v->arch.paging.shadow.l3table; |
3846 | 0 |
|
3847 | 0 | for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) |
3848 | 0 | { |
3849 | 0 | ml2e[i] = |
3850 | 0 | (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT) |
3851 | 0 | ? l2e_from_mfn(shadow_l3e_get_mfn(sl3e[i]), |
3852 | 0 | __PAGE_HYPERVISOR_RW) |
3853 | 0 | : l2e_empty(); |
3854 | 0 | } |
3855 | 0 |
|
3856 | 0 | if ( v != current ) |
3857 | 0 | unmap_domain_page(ml2e); |
3858 | 0 | } |
3859 | 0 | else |
3860 | 0 | domain_crash(d); /* XXX */ |
3861 | 0 |
|
3862 | 0 | #else |
3863 | 0 | #error this should not happen |
3864 | 0 | #endif |
3865 | 0 |
|
3866 | 0 | if ( shadow_mode_external(d) ) |
3867 | 0 | { |
3868 | 0 | /* |
3869 | 0 | * Having modified the linear pagetable mapping, flush local host TLBs. |
3870 | 0 | * This was not needed when vmenter/vmexit always had the side effect |
3871 | 0 | * of flushing host TLBs but, with ASIDs, it is possible to finish |
3872 | 0 | * this CR3 update, vmenter the guest, vmexit due to a page fault, |
3873 | 0 | * without an intervening host TLB flush. Then the page fault code |
3874 | 0 | * could use the linear pagetable to read a top-level shadow page |
3875 | 0 | * table entry. But, without this change, it would fetch the wrong |
3876 | 0 | * value due to a stale TLB. |
3877 | 0 | */ |
3878 | 0 | flush_tlb_local(); |
3879 | 0 | } |
3880 | 0 | } |
3881 | 0 |
|
3882 | 0 |
|
3883 | 0 | /* Removes vcpu->arch.paging.shadow.guest_vtable and vcpu->arch.shadow_table[]. |
3884 | 0 | * Does all appropriate management/bookkeeping/refcounting/etc... |
3885 | 0 | */ |
3886 | 0 | static void |
3887 | 0 | sh_detach_old_tables(struct vcpu *v) |
3888 | 0 | { |
3889 | 0 | struct domain *d = v->domain; |
3890 | 0 | mfn_t smfn; |
3891 | 0 | int i = 0; |
3892 | 0 |
|
3893 | 0 | //// |
3894 | 0 | //// vcpu->arch.paging.shadow.guest_vtable |
3895 | 0 | //// |
3896 | 0 |
|
3897 | 0 | #if GUEST_PAGING_LEVELS == 3 |
3898 | 0 | /* PAE guests don't have a mapping of the guest top-level table */ |
3899 | 0 | ASSERT(v->arch.paging.shadow.guest_vtable == NULL); |
3900 | 0 | #else |
3901 | 0 | if ( v->arch.paging.shadow.guest_vtable ) |
3902 | 0 | { |
3903 | 0 | if ( shadow_mode_external(d) || shadow_mode_translate(d) ) |
3904 | 0 | unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); |
3905 | 0 | v->arch.paging.shadow.guest_vtable = NULL; |
3906 | 0 | } |
3907 | 0 | #endif // !NDEBUG |
3908 | 0 |
|
3909 | 0 |
|
3910 | 0 | //// |
3911 | 0 | //// vcpu->arch.shadow_table[] |
3912 | 0 | //// |
3913 | 0 |
|
3914 | 0 | #if GUEST_PAGING_LEVELS == 3 |
3915 | 0 | /* PAE guests have four shadow_table entries */ |
3916 | 0 | for ( i = 0 ; i < 4 ; i++ ) |
3917 | 0 | #endif |
3918 | 0 | { |
3919 | 0 | smfn = pagetable_get_mfn(v->arch.shadow_table[i]); |
3920 | 0 | if ( mfn_x(smfn) ) |
3921 | 0 | sh_put_ref(d, smfn, 0); |
3922 | 0 | v->arch.shadow_table[i] = pagetable_null(); |
3923 | 0 | } |
3924 | 0 | } Unexecuted instantiation: multi.c:sh_detach_old_tables__guest_3 Unexecuted instantiation: multi.c:sh_detach_old_tables__guest_2 Unexecuted instantiation: multi.c:sh_detach_old_tables__guest_4 |
3925 | 0 |
|
3926 | 0 | /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */ |
3927 | 0 | static void |
3928 | 0 | sh_set_toplevel_shadow(struct vcpu *v, |
3929 | 0 | int slot, |
3930 | 0 | mfn_t gmfn, |
3931 | 0 | unsigned int root_type) |
3932 | 0 | { |
3933 | 0 | mfn_t smfn; |
3934 | 0 | pagetable_t old_entry, new_entry; |
3935 | 0 |
|
3936 | 0 | struct domain *d = v->domain; |
3937 | 0 |
|
3938 | 0 | /* Remember the old contents of this slot */ |
3939 | 0 | old_entry = v->arch.shadow_table[slot]; |
3940 | 0 |
|
3941 | 0 | /* Now figure out the new contents: is this a valid guest MFN? */ |
3942 | 0 | if ( !mfn_valid(gmfn) ) |
3943 | 0 | { |
3944 | 0 | new_entry = pagetable_null(); |
3945 | 0 | goto install_new_entry; |
3946 | 0 | } |
3947 | 0 |
|
3948 | 0 | /* Guest mfn is valid: shadow it and install the shadow */ |
3949 | 0 | smfn = get_shadow_status(d, gmfn, root_type); |
3950 | 0 | if ( !mfn_valid(smfn) ) |
3951 | 0 | { |
3952 | 0 | /* Make sure there's enough free shadow memory. */ |
3953 | 0 | shadow_prealloc(d, root_type, 1); |
3954 | 0 | /* Shadow the page. */ |
3955 | 0 | smfn = sh_make_shadow(v, gmfn, root_type); |
3956 | 0 | } |
3957 | 0 | ASSERT(mfn_valid(smfn)); |
3958 | 0 |
|
3959 | 0 | /* Pin the shadow and put it (back) on the list of pinned shadows */ |
3960 | 0 | if ( sh_pin(d, smfn) == 0 ) |
3961 | 0 | { |
3962 | 0 | SHADOW_ERROR("can't pin %#lx as toplevel shadow\n", mfn_x(smfn)); |
3963 | 0 | domain_crash(d); |
3964 | 0 | } |
3965 | 0 |
|
3966 | 0 | /* Take a ref to this page: it will be released in sh_detach_old_tables() |
3967 | 0 | * or the next call to set_toplevel_shadow() */ |
3968 | 0 | if ( !sh_get_ref(d, smfn, 0) ) |
3969 | 0 | { |
3970 | 0 | SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn)); |
3971 | 0 | domain_crash(d); |
3972 | 0 | } |
3973 | 0 |
|
3974 | 0 | new_entry = pagetable_from_mfn(smfn); |
3975 | 0 |
|
3976 | 0 | install_new_entry: |
3977 | 0 | /* Done. Install it */ |
3978 | 0 | SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", |
3979 | 0 | GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot, |
3980 | 0 | mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); |
3981 | 0 | v->arch.shadow_table[slot] = new_entry; |
3982 | 0 |
|
3983 | 0 | /* Decrement the refcount of the old contents of this slot */ |
3984 | 0 | if ( !pagetable_is_null(old_entry) ) { |
3985 | 0 | mfn_t old_smfn = pagetable_get_mfn(old_entry); |
3986 | 0 | /* Need to repin the old toplevel shadow if it's been unpinned |
3987 | 0 | * by shadow_prealloc(): in PV mode we're still running on this |
3988 | 0 | * shadow and it's not safe to free it yet. */ |
3989 | 0 | if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) ) |
3990 | 0 | { |
3991 | 0 | SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn)); |
3992 | 0 | domain_crash(d); |
3993 | 0 | } |
3994 | 0 | sh_put_ref(d, old_smfn, 0); |
3995 | 0 | } |
3996 | 0 | } |
3997 | 0 |
|
3998 | 0 |
|
3999 | 0 | static void |
4000 | 0 | sh_update_cr3(struct vcpu *v, int do_locking) |
4001 | 0 | /* Updates vcpu->arch.cr3 after the guest has changed CR3. |
4002 | 0 | * Paravirtual guests should set v->arch.guest_table (and guest_table_user, |
4003 | 0 | * if appropriate). |
4004 | 0 | * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works; |
4005 | 0 | * this function will call hvm_update_guest_cr(v, 3) to tell them where the |
4006 | 0 | * shadow tables are. |
4007 | 0 | * If do_locking != 0, assume we are being called from outside the |
4008 | 0 | * shadow code, and must take and release the paging lock; otherwise |
4009 | 0 | * that is the caller's responsibility. |
4010 | 0 | */ |
4011 | 0 | { |
4012 | 0 | struct domain *d = v->domain; |
4013 | 0 | mfn_t gmfn; |
4014 | 0 | #if GUEST_PAGING_LEVELS == 3 |
4015 | 0 | guest_l3e_t *gl3e; |
4016 | 0 | u32 guest_idx=0; |
4017 | 0 | int i; |
4018 | 0 | #endif |
4019 | 0 |
|
4020 | 0 | /* Don't do anything on an uninitialised vcpu */ |
4021 | 0 | if ( is_pv_domain(d) && !v->is_initialised ) |
4022 | 0 | { |
4023 | 0 | ASSERT(v->arch.cr3 == 0); |
4024 | 0 | return; |
4025 | 0 | } |
4026 | 0 |
|
4027 | 0 | if ( do_locking ) paging_lock(v->domain); |
4028 | 0 |
|
4029 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
4030 | 0 | /* Need to resync all the shadow entries on a TLB flush. Resync |
4031 | 0 | * current vcpus OOS pages before switching to the new shadow |
4032 | 0 | * tables so that the VA hint is still valid. */ |
4033 | 0 | shadow_resync_current_vcpu(v); |
4034 | 0 | #endif |
4035 | 0 |
|
4036 | 0 | ASSERT(paging_locked_by_me(v->domain)); |
4037 | 0 | ASSERT(v->arch.paging.mode); |
4038 | 0 |
|
4039 | 0 | //// |
4040 | 0 | //// vcpu->arch.guest_table is already set |
4041 | 0 | //// |
4042 | 0 |
|
4043 | 0 | #ifndef NDEBUG |
4044 | 0 | /* Double-check that the HVM code has sent us a sane guest_table */ |
4045 | 0 | if ( is_hvm_domain(d) ) |
4046 | 0 | { |
4047 | 0 | ASSERT(shadow_mode_external(d)); |
4048 | 0 | if ( hvm_paging_enabled(v) ) |
4049 | 0 | ASSERT(pagetable_get_pfn(v->arch.guest_table)); |
4050 | 0 | else |
4051 | 0 | ASSERT(v->arch.guest_table.pfn |
4052 | 0 | == d->arch.paging.shadow.unpaged_pagetable.pfn); |
4053 | 0 | } |
4054 | 0 | #endif |
4055 | 0 |
|
4056 | 0 | SHADOW_PRINTK("%pv guest_table=%"PRI_mfn"\n", |
4057 | 0 | v, (unsigned long)pagetable_get_pfn(v->arch.guest_table)); |
4058 | 0 |
|
4059 | 0 | #if GUEST_PAGING_LEVELS == 4 |
4060 | 0 | if ( !(v->arch.flags & TF_kernel_mode) && !is_pv_32bit_domain(d) ) |
4061 | 0 | gmfn = pagetable_get_mfn(v->arch.guest_table_user); |
4062 | 0 | else |
4063 | 0 | #endif |
4064 | 0 | gmfn = pagetable_get_mfn(v->arch.guest_table); |
4065 | 0 |
|
4066 | 0 |
|
4067 | 0 | //// |
4068 | 0 | //// vcpu->arch.paging.shadow.guest_vtable |
4069 | 0 | //// |
4070 | 0 | #if GUEST_PAGING_LEVELS == 4 |
4071 | 0 | if ( shadow_mode_external(d) || shadow_mode_translate(d) ) |
4072 | 0 | { |
4073 | 0 | if ( v->arch.paging.shadow.guest_vtable ) |
4074 | 0 | unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); |
4075 | 0 | v->arch.paging.shadow.guest_vtable = map_domain_page_global(gmfn); |
4076 | 0 | /* PAGING_LEVELS==4 implies 64-bit, which means that |
4077 | 0 | * map_domain_page_global can't fail */ |
4078 | 0 | BUG_ON(v->arch.paging.shadow.guest_vtable == NULL); |
4079 | 0 | } |
4080 | 0 | else |
4081 | 0 | v->arch.paging.shadow.guest_vtable = __linear_l4_table; |
4082 | 0 | #elif GUEST_PAGING_LEVELS == 3 |
4083 | 0 | /* On PAE guests we don't use a mapping of the guest's own top-level |
4084 | 0 | * table. We cache the current state of that table and shadow that, |
4085 | 0 | * until the next CR3 write makes us refresh our cache. */ |
4086 | 0 | ASSERT(v->arch.paging.shadow.guest_vtable == NULL); |
4087 | 0 |
|
4088 | 0 | if ( shadow_mode_external(d) ) |
4089 | 0 | /* Find where in the page the l3 table is */ |
4090 | 0 | guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]); |
4091 | 0 | else |
4092 | 0 | /* PV guest: l3 is at the start of a page */ |
4093 | 0 | guest_idx = 0; |
4094 | 0 |
|
4095 | 0 | // Ignore the low 2 bits of guest_idx -- they are really just |
4096 | 0 | // cache control. |
4097 | 0 | guest_idx &= ~3; |
4098 | 0 |
|
4099 | 0 | gl3e = ((guest_l3e_t *)map_domain_page(gmfn)) + guest_idx; |
4100 | 0 | for ( i = 0; i < 4 ; i++ ) |
4101 | 0 | v->arch.paging.shadow.gl3e[i] = gl3e[i]; |
4102 | 0 | unmap_domain_page(gl3e); |
4103 | 0 | #elif GUEST_PAGING_LEVELS == 2 |
4104 | 0 | if ( shadow_mode_external(d) || shadow_mode_translate(d) ) |
4105 | 0 | { |
4106 | 0 | if ( v->arch.paging.shadow.guest_vtable ) |
4107 | 0 | unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); |
4108 | 0 | v->arch.paging.shadow.guest_vtable = map_domain_page_global(gmfn); |
4109 | 0 | /* Does this really need map_domain_page_global? Handle the |
4110 | 0 | * error properly if so. */ |
4111 | 0 | BUG_ON(v->arch.paging.shadow.guest_vtable == NULL); /* XXX */ |
4112 | 0 | } |
4113 | 0 | else |
4114 | 0 | v->arch.paging.shadow.guest_vtable = __linear_l2_table; |
4115 | 0 | #else |
4116 | 0 | #error this should never happen |
4117 | 0 | #endif |
4118 | 0 |
|
4119 | 0 |
|
4120 | 0 | //// |
4121 | 0 | //// vcpu->arch.shadow_table[] |
4122 | 0 | //// |
4123 | 0 |
|
4124 | 0 | /* We revoke write access to the new guest toplevel page(s) before we |
4125 | 0 | * replace the old shadow pagetable(s), so that we can safely use the |
4126 | 0 | * (old) shadow linear maps in the writeable mapping heuristics. */ |
4127 | 0 | #if GUEST_PAGING_LEVELS == 2 |
4128 | 0 | if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 ) |
4129 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
4130 | 0 | sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); |
4131 | 0 | #elif GUEST_PAGING_LEVELS == 3 |
4132 | 0 | /* PAE guests have four shadow_table entries, based on the |
4133 | 0 | * current values of the guest's four l3es. */ |
4134 | 0 | { |
4135 | 0 | int flush = 0; |
4136 | 0 | gfn_t gl2gfn; |
4137 | 0 | mfn_t gl2mfn; |
4138 | 0 | p2m_type_t p2mt; |
4139 | 0 | guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e; |
4140 | 0 | /* First, make all four entries read-only. */ |
4141 | 0 | for ( i = 0; i < 4; i++ ) |
4142 | 0 | { |
4143 | 0 | if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) |
4144 | 0 | { |
4145 | 0 | gl2gfn = guest_l3e_get_gfn(gl3e[i]); |
4146 | 0 | gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt); |
4147 | 0 | if ( p2m_is_ram(p2mt) ) |
4148 | 0 | flush |= sh_remove_write_access(d, gl2mfn, 2, 0); |
4149 | 0 | } |
4150 | 0 | } |
4151 | 0 | if ( flush ) |
4152 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
4153 | 0 | /* Now install the new shadows. */ |
4154 | 0 | for ( i = 0; i < 4; i++ ) |
4155 | 0 | { |
4156 | 0 | if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) |
4157 | 0 | { |
4158 | 0 | gl2gfn = guest_l3e_get_gfn(gl3e[i]); |
4159 | 0 | gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt); |
4160 | 0 | if ( p2m_is_ram(p2mt) ) |
4161 | 0 | sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) |
4162 | 0 | ? SH_type_l2h_shadow |
4163 | 0 | : SH_type_l2_shadow); |
4164 | 0 | else |
4165 | 0 | sh_set_toplevel_shadow(v, i, INVALID_MFN, 0); |
4166 | 0 | } |
4167 | 0 | else |
4168 | 0 | sh_set_toplevel_shadow(v, i, INVALID_MFN, 0); |
4169 | 0 | } |
4170 | 0 | } |
4171 | 0 | #elif GUEST_PAGING_LEVELS == 4 |
4172 | 0 | if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 ) |
4173 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
4174 | 0 | sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); |
4175 | 0 | if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) ) |
4176 | 0 | { |
4177 | 0 | mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]); |
4178 | 0 |
|
4179 | 0 | if ( !(v->arch.flags & TF_kernel_mode) && VM_ASSIST(d, m2p_strict) ) |
4180 | 0 | zap_ro_mpt(smfn); |
4181 | 0 | else if ( (v->arch.flags & TF_kernel_mode) && |
4182 | 0 | !VM_ASSIST(d, m2p_strict) ) |
4183 | 0 | fill_ro_mpt(smfn); |
4184 | 0 | } |
4185 | 0 | #else |
4186 | 0 | #error This should never happen |
4187 | 0 | #endif |
4188 | 0 |
|
4189 | 0 |
|
4190 | 0 | /// |
4191 | 0 | /// v->arch.paging.shadow.l3table |
4192 | 0 | /// |
4193 | 0 | #if SHADOW_PAGING_LEVELS == 3 |
4194 | 0 | { |
4195 | 0 | mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]); |
4196 | 0 | int i; |
4197 | 0 | for ( i = 0; i < 4; i++ ) |
4198 | 0 | { |
4199 | 0 | #if GUEST_PAGING_LEVELS == 2 |
4200 | 0 | /* 2-on-3: make a PAE l3 that points at the four-page l2 */ |
4201 | 0 | if ( i != 0 ) |
4202 | 0 | smfn = sh_next_page(smfn); |
4203 | 0 | #else |
4204 | 0 | /* 3-on-3: make a PAE l3 that points at the four l2 pages */ |
4205 | 0 | smfn = pagetable_get_mfn(v->arch.shadow_table[i]); |
4206 | 0 | #endif |
4207 | 0 | v->arch.paging.shadow.l3table[i] = |
4208 | 0 | (mfn_x(smfn) == 0) |
4209 | 0 | ? shadow_l3e_empty() |
4210 | 0 | : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT); |
4211 | 0 | } |
4212 | 0 | } |
4213 | 0 | #endif /* SHADOW_PAGING_LEVELS == 3 */ |
4214 | 0 |
|
4215 | 0 |
|
4216 | 0 | /// |
4217 | 0 | /// v->arch.cr3 |
4218 | 0 | /// |
4219 | 0 | if ( shadow_mode_external(d) ) |
4220 | 0 | { |
4221 | 0 | make_cr3(v, pagetable_get_mfn(v->arch.monitor_table)); |
4222 | 0 | } |
4223 | 0 | else // not shadow_mode_external... |
4224 | 0 | { |
4225 | 0 | /* We don't support PV except guest == shadow == config levels */ |
4226 | 0 | BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS); |
4227 | 0 | #if SHADOW_PAGING_LEVELS == 3 |
4228 | 0 | /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated. |
4229 | 0 | * Don't use make_cr3 because (a) we know it's below 4GB, and |
4230 | 0 | * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */ |
4231 | 0 | ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL); |
4232 | 0 | v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table); |
4233 | 0 | #else |
4234 | 0 | /* 4-on-4: Just use the shadow top-level directly */ |
4235 | 0 | make_cr3(v, pagetable_get_mfn(v->arch.shadow_table[0])); |
4236 | 0 | #endif |
4237 | 0 | } |
4238 | 0 |
|
4239 | 0 |
|
4240 | 0 | /// |
4241 | 0 | /// v->arch.hvm_vcpu.hw_cr[3] |
4242 | 0 | /// |
4243 | 0 | if ( shadow_mode_external(d) ) |
4244 | 0 | { |
4245 | 0 | ASSERT(is_hvm_domain(d)); |
4246 | 0 | #if SHADOW_PAGING_LEVELS == 3 |
4247 | 0 | /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ |
4248 | 0 | v->arch.hvm_vcpu.hw_cr[3] = |
4249 | 0 | virt_to_maddr(&v->arch.paging.shadow.l3table); |
4250 | 0 | #else |
4251 | 0 | /* 4-on-4: Just use the shadow top-level directly */ |
4252 | 0 | v->arch.hvm_vcpu.hw_cr[3] = |
4253 | 0 | pagetable_get_paddr(v->arch.shadow_table[0]); |
4254 | 0 | #endif |
4255 | 0 | hvm_update_guest_cr(v, 3); |
4256 | 0 | } |
4257 | 0 |
|
4258 | 0 | /* Fix up the linear pagetable mappings */ |
4259 | 0 | sh_update_linear_entries(v); |
4260 | 0 |
|
4261 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) |
4262 | 0 | /* No longer safe to use cached gva->gfn translations */ |
4263 | 0 | vtlb_flush(v); |
4264 | 0 | #endif |
4265 | 0 |
|
4266 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION |
4267 | 0 | v->arch.paging.last_write_emul_ok = 0; |
4268 | 0 | #endif |
4269 | 0 |
|
4270 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
4271 | 0 | /* Need to resync all the shadow entries on a TLB flush. We only |
4272 | 0 | * update the shadows, leaving the pages out of sync. Also, we try |
4273 | 0 | * to skip synchronization of shadows not mapped in the new |
4274 | 0 | * tables. */ |
4275 | 0 | shadow_sync_other_vcpus(v); |
4276 | 0 | #endif |
4277 | 0 |
|
4278 | 0 | /* Release the lock, if we took it (otherwise it's the caller's problem) */ |
4279 | 0 | if ( do_locking ) paging_unlock(v->domain); |
4280 | 0 | } Unexecuted instantiation: multi.c:sh_update_cr3__guest_2 Unexecuted instantiation: multi.c:sh_update_cr3__guest_4 Unexecuted instantiation: multi.c:sh_update_cr3__guest_3 |
4281 | 0 |
|
4282 | 0 |
|
4283 | 0 | /**************************************************************************/ |
4284 | 0 | /* Functions to revoke guest rights */ |
4285 | 0 |
|
4286 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC |
4287 | 0 | int sh_rm_write_access_from_sl1p(struct domain *d, mfn_t gmfn, |
4288 | 0 | mfn_t smfn, unsigned long off) |
4289 | 0 | { |
4290 | 0 | struct vcpu *curr = current; |
4291 | 0 | int r; |
4292 | 0 | shadow_l1e_t *sl1p, sl1e; |
4293 | 0 | struct page_info *sp; |
4294 | 0 |
|
4295 | 0 | ASSERT(mfn_valid(gmfn)); |
4296 | 0 | ASSERT(mfn_valid(smfn)); |
4297 | 0 |
|
4298 | 0 | /* Remember if we've been told that this process is being torn down */ |
4299 | 0 | if ( curr->domain == d ) |
4300 | 0 | curr->arch.paging.shadow.pagetable_dying |
4301 | 0 | = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying); |
4302 | 0 |
|
4303 | 0 | sp = mfn_to_page(smfn); |
4304 | 0 |
|
4305 | 0 | if ( ((sp->count_info & PGC_count_mask) != 0) |
4306 | 0 | || (sp->u.sh.type != SH_type_l1_shadow |
4307 | 0 | && sp->u.sh.type != SH_type_fl1_shadow) ) |
4308 | 0 | goto fail; |
4309 | 0 |
|
4310 | 0 | sl1p = map_domain_page(smfn); |
4311 | 0 | sl1p += off; |
4312 | 0 | sl1e = *sl1p; |
4313 | 0 | if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW)) |
4314 | 0 | != (_PAGE_PRESENT|_PAGE_RW)) |
4315 | 0 | || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) ) |
4316 | 0 | { |
4317 | 0 | unmap_domain_page(sl1p); |
4318 | 0 | goto fail; |
4319 | 0 | } |
4320 | 0 |
|
4321 | 0 | /* Found it! Need to remove its write permissions. */ |
4322 | 0 | sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW); |
4323 | 0 | r = shadow_set_l1e(d, sl1p, sl1e, p2m_ram_rw, smfn); |
4324 | 0 | ASSERT( !(r & SHADOW_SET_ERROR) ); |
4325 | 0 |
|
4326 | 0 | unmap_domain_page(sl1p); |
4327 | 0 | perfc_incr(shadow_writeable_h_7); |
4328 | 0 | return 1; |
4329 | 0 |
|
4330 | 0 | fail: |
4331 | 0 | perfc_incr(shadow_writeable_h_8); |
4332 | 0 | return 0; |
4333 | 0 | } Unexecuted instantiation: sh_rm_write_access_from_sl1p__guest_4 Unexecuted instantiation: sh_rm_write_access_from_sl1p__guest_3 Unexecuted instantiation: sh_rm_write_access_from_sl1p__guest_2 |
4334 | 0 | #endif /* OOS */ |
4335 | 0 |
|
4336 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC |
4337 | 0 | static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) |
4338 | 0 | /* Look up this vaddr in the current shadow and see if it's a writeable |
4339 | 0 | * mapping of this gmfn. If so, remove it. Returns 1 if it worked. */ |
4340 | 0 | { |
4341 | 0 | struct domain *d = v->domain; |
4342 | 0 | shadow_l1e_t sl1e, *sl1p; |
4343 | 0 | shadow_l2e_t *sl2p; |
4344 | 0 | shadow_l3e_t *sl3p; |
4345 | 0 | #if SHADOW_PAGING_LEVELS >= 4 |
4346 | 0 | shadow_l4e_t *sl4p; |
4347 | 0 | #endif |
4348 | 0 | mfn_t sl1mfn; |
4349 | 0 | int r; |
4350 | 0 |
|
4351 | 0 | /* Carefully look in the shadow linear map for the l1e we expect */ |
4352 | 0 | #if SHADOW_PAGING_LEVELS >= 4 |
4353 | 0 | /* Is a shadow linear map is installed in the first place? */ |
4354 | 0 | sl4p = v->arch.paging.shadow.guest_vtable; |
4355 | 0 | sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); |
4356 | 0 | if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) |
4357 | 0 | return 0; |
4358 | 0 | sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); |
4359 | 0 | if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) |
4360 | 0 | return 0; |
4361 | 0 | sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr); |
4362 | 0 | if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) |
4363 | 0 | return 0; |
4364 | 0 | #else /* SHADOW_PAGING_LEVELS == 3 */ |
4365 | 0 | sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table) |
4366 | 0 | + shadow_l3_linear_offset(vaddr); |
4367 | 0 | if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) |
4368 | 0 | return 0; |
4369 | 0 | #endif |
4370 | 0 | sl2p = sh_linear_l2_table(v) + shadow_l2_linear_offset(vaddr); |
4371 | 0 | if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) ) |
4372 | 0 | return 0; |
4373 | 0 | sl1p = sh_linear_l1_table(v) + shadow_l1_linear_offset(vaddr); |
4374 | 0 | sl1e = *sl1p; |
4375 | 0 | if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW)) |
4376 | 0 | != (_PAGE_PRESENT|_PAGE_RW)) |
4377 | 0 | || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) ) |
4378 | 0 | return 0; |
4379 | 0 |
|
4380 | 0 | /* Found it! Need to remove its write permissions. */ |
4381 | 0 | sl1mfn = shadow_l2e_get_mfn(*sl2p); |
4382 | 0 | sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW); |
4383 | 0 | r = shadow_set_l1e(d, sl1p, sl1e, p2m_ram_rw, sl1mfn); |
4384 | 0 | if ( r & SHADOW_SET_ERROR ) { |
4385 | 0 | /* Can only currently happen if we found a grant-mapped |
4386 | 0 | * page. Just make the guess fail. */ |
4387 | 0 | return 0; |
4388 | 0 | } |
4389 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND); |
4390 | 0 | return 1; |
4391 | 0 | } Unexecuted instantiation: multi.c:sh_guess_wrmap__guest_4 Unexecuted instantiation: multi.c:sh_guess_wrmap__guest_2 Unexecuted instantiation: multi.c:sh_guess_wrmap__guest_3 |
4392 | 0 | #endif |
4393 | 0 |
|
4394 | 0 | int sh_rm_write_access_from_l1(struct domain *d, mfn_t sl1mfn, |
4395 | 0 | mfn_t readonly_mfn) |
4396 | 0 | /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */ |
4397 | 0 | { |
4398 | 0 | shadow_l1e_t *sl1e; |
4399 | 0 | int done = 0; |
4400 | 0 | int flags; |
4401 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC |
4402 | 0 | struct vcpu *curr = current; |
4403 | 0 | mfn_t base_sl1mfn = sl1mfn; /* Because sl1mfn changes in the foreach */ |
4404 | 0 | #endif |
4405 | 0 |
|
4406 | 0 | SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, |
4407 | 0 | { |
4408 | 0 | flags = shadow_l1e_get_flags(*sl1e); |
4409 | 0 | if ( (flags & _PAGE_PRESENT) |
4410 | 0 | && (flags & _PAGE_RW) |
4411 | 0 | && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) ) |
4412 | 0 | { |
4413 | 0 | shadow_l1e_t ro_sl1e = shadow_l1e_remove_flags(*sl1e, _PAGE_RW); |
4414 | 0 | (void) shadow_set_l1e(d, sl1e, ro_sl1e, p2m_ram_rw, sl1mfn); |
4415 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC |
4416 | 0 | /* Remember the last shadow that we shot a writeable mapping in */ |
4417 | 0 | if ( curr->domain == d ) |
4418 | 0 | curr->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); |
4419 | 0 | #endif |
4420 | 0 | if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info |
4421 | 0 | & PGT_count_mask) == 0 ) |
4422 | 0 | /* This breaks us cleanly out of the FOREACH macro */ |
4423 | 0 | done = 1; |
4424 | 0 | } |
4425 | 0 | }); |
4426 | 0 | return done; |
4427 | 0 | } Unexecuted instantiation: sh_rm_write_access_from_l1__guest_3 Unexecuted instantiation: sh_rm_write_access_from_l1__guest_2 Unexecuted instantiation: sh_rm_write_access_from_l1__guest_4 |
4428 | 0 |
|
4429 | 0 |
|
4430 | 0 | int sh_rm_mappings_from_l1(struct domain *d, mfn_t sl1mfn, mfn_t target_mfn) |
4431 | 0 | /* Excises all mappings to guest frame from this shadow l1 table */ |
4432 | 0 | { |
4433 | 0 | shadow_l1e_t *sl1e; |
4434 | 0 | int done = 0; |
4435 | 0 | int flags; |
4436 | 0 |
|
4437 | 0 | SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, |
4438 | 0 | { |
4439 | 0 | flags = shadow_l1e_get_flags(*sl1e); |
4440 | 0 | if ( (flags & _PAGE_PRESENT) |
4441 | 0 | && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) ) |
4442 | 0 | { |
4443 | 0 | (void) shadow_set_l1e(d, sl1e, shadow_l1e_empty(), |
4444 | 0 | p2m_invalid, sl1mfn); |
4445 | 0 | if ( sh_check_page_has_no_refs(mfn_to_page(target_mfn)) ) |
4446 | 0 | /* This breaks us cleanly out of the FOREACH macro */ |
4447 | 0 | done = 1; |
4448 | 0 | } |
4449 | 0 | }); |
4450 | 0 | return done; |
4451 | 0 | } Unexecuted instantiation: sh_rm_mappings_from_l1__guest_4 Unexecuted instantiation: sh_rm_mappings_from_l1__guest_2 Unexecuted instantiation: sh_rm_mappings_from_l1__guest_3 |
4452 | 0 |
|
4453 | 0 | /**************************************************************************/ |
4454 | 0 | /* Functions to excise all pointers to shadows from higher-level shadows. */ |
4455 | 0 |
|
4456 | 0 | void sh_clear_shadow_entry(struct domain *d, void *ep, mfn_t smfn) |
4457 | 0 | /* Blank out a single shadow entry */ |
4458 | 0 | { |
4459 | 0 | switch ( mfn_to_page(smfn)->u.sh.type ) |
4460 | 0 | { |
4461 | 0 | case SH_type_l1_shadow: |
4462 | 0 | (void) shadow_set_l1e(d, ep, shadow_l1e_empty(), p2m_invalid, smfn); |
4463 | 0 | break; |
4464 | 0 | case SH_type_l2_shadow: |
4465 | 0 | #if GUEST_PAGING_LEVELS >= 3 |
4466 | 0 | case SH_type_l2h_shadow: |
4467 | 0 | #endif |
4468 | 0 | (void) shadow_set_l2e(d, ep, shadow_l2e_empty(), smfn); |
4469 | 0 | break; |
4470 | 0 | #if GUEST_PAGING_LEVELS >= 4 |
4471 | 0 | case SH_type_l3_shadow: |
4472 | 0 | (void) shadow_set_l3e(d, ep, shadow_l3e_empty(), smfn); |
4473 | 0 | break; |
4474 | 0 | case SH_type_l4_shadow: |
4475 | 0 | (void) shadow_set_l4e(d, ep, shadow_l4e_empty(), smfn); |
4476 | 0 | break; |
4477 | 0 | #endif |
4478 | 0 | default: BUG(); /* Called with the wrong kind of shadow. */ |
4479 | 0 | } |
4480 | 0 | } Unexecuted instantiation: sh_clear_shadow_entry__guest_2 Unexecuted instantiation: sh_clear_shadow_entry__guest_3 Unexecuted instantiation: sh_clear_shadow_entry__guest_4 |
4481 | 0 |
|
4482 | 0 | int sh_remove_l1_shadow(struct domain *d, mfn_t sl2mfn, mfn_t sl1mfn) |
4483 | 0 | /* Remove all mappings of this l1 shadow from this l2 shadow */ |
4484 | 0 | { |
4485 | 0 | shadow_l2e_t *sl2e; |
4486 | 0 | int done = 0; |
4487 | 0 | int flags; |
4488 | 0 |
|
4489 | 0 | SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, done, d, |
4490 | 0 | { |
4491 | 0 | flags = shadow_l2e_get_flags(*sl2e); |
4492 | 0 | if ( (flags & _PAGE_PRESENT) |
4493 | 0 | && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) ) |
4494 | 0 | { |
4495 | 0 | (void) shadow_set_l2e(d, sl2e, shadow_l2e_empty(), sl2mfn); |
4496 | 0 | if ( mfn_to_page(sl1mfn)->u.sh.type == 0 ) |
4497 | 0 | /* This breaks us cleanly out of the FOREACH macro */ |
4498 | 0 | done = 1; |
4499 | 0 | } |
4500 | 0 | }); |
4501 | 0 | return done; |
4502 | 0 | } Unexecuted instantiation: sh_remove_l1_shadow__guest_2 Unexecuted instantiation: sh_remove_l1_shadow__guest_4 Unexecuted instantiation: sh_remove_l1_shadow__guest_3 |
4503 | 0 |
|
4504 | 0 | #if GUEST_PAGING_LEVELS >= 4 |
4505 | 0 | int sh_remove_l2_shadow(struct domain *d, mfn_t sl3mfn, mfn_t sl2mfn) |
4506 | 0 | /* Remove all mappings of this l2 shadow from this l3 shadow */ |
4507 | 0 | { |
4508 | 0 | shadow_l3e_t *sl3e; |
4509 | 0 | int done = 0; |
4510 | 0 | int flags; |
4511 | 0 |
|
4512 | 0 | SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, done, |
4513 | 0 | { |
4514 | 0 | flags = shadow_l3e_get_flags(*sl3e); |
4515 | 0 | if ( (flags & _PAGE_PRESENT) |
4516 | 0 | && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) ) |
4517 | 0 | { |
4518 | 0 | (void) shadow_set_l3e(d, sl3e, shadow_l3e_empty(), sl3mfn); |
4519 | 0 | if ( mfn_to_page(sl2mfn)->u.sh.type == 0 ) |
4520 | 0 | /* This breaks us cleanly out of the FOREACH macro */ |
4521 | 0 | done = 1; |
4522 | 0 | } |
4523 | 0 | }); |
4524 | 0 | return done; |
4525 | 0 | } |
4526 | 0 |
|
4527 | 0 | int sh_remove_l3_shadow(struct domain *d, mfn_t sl4mfn, mfn_t sl3mfn) |
4528 | 0 | /* Remove all mappings of this l3 shadow from this l4 shadow */ |
4529 | 0 | { |
4530 | 0 | shadow_l4e_t *sl4e; |
4531 | 0 | int done = 0; |
4532 | 0 | int flags; |
4533 | 0 |
|
4534 | 0 | SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, done, d, |
4535 | 0 | { |
4536 | 0 | flags = shadow_l4e_get_flags(*sl4e); |
4537 | 0 | if ( (flags & _PAGE_PRESENT) |
4538 | 0 | && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) ) |
4539 | 0 | { |
4540 | 0 | (void) shadow_set_l4e(d, sl4e, shadow_l4e_empty(), sl4mfn); |
4541 | 0 | if ( mfn_to_page(sl3mfn)->u.sh.type == 0 ) |
4542 | 0 | /* This breaks us cleanly out of the FOREACH macro */ |
4543 | 0 | done = 1; |
4544 | 0 | } |
4545 | 0 | }); |
4546 | 0 | return done; |
4547 | 0 | } |
4548 | 0 | #endif /* 64bit guest */ |
4549 | 0 |
|
4550 | 0 | /**************************************************************************/ |
4551 | 0 | /* Function for the guest to inform us that a process is being torn |
4552 | 0 | * down. We remember that as a hint to unshadow its pagetables soon, |
4553 | 0 | * and in the meantime we unhook its top-level user-mode entries. */ |
4554 | 0 |
|
4555 | 0 | #if GUEST_PAGING_LEVELS == 3 |
4556 | 0 | static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa) |
4557 | 0 | { |
4558 | 0 | struct domain *d = v->domain; |
4559 | 0 | int i = 0; |
4560 | 0 | int flush = 0; |
4561 | 0 | int fast_path = 0; |
4562 | 0 | paddr_t gcr3 = 0; |
4563 | 0 | p2m_type_t p2mt; |
4564 | 0 | char *gl3pa = NULL; |
4565 | 0 | guest_l3e_t *gl3e = NULL; |
4566 | 0 | unsigned long l3gfn; |
4567 | 0 | mfn_t l3mfn; |
4568 | 0 |
|
4569 | 0 | gcr3 = (v->arch.hvm_vcpu.guest_cr[3]); |
4570 | 0 | /* fast path: the pagetable belongs to the current context */ |
4571 | 0 | if ( gcr3 == gpa ) |
4572 | 0 | fast_path = 1; |
4573 | 0 |
|
4574 | 0 | l3gfn = gpa >> PAGE_SHIFT; |
4575 | 0 | l3mfn = get_gfn_query(d, _gfn(l3gfn), &p2mt); |
4576 | 0 | if ( !mfn_valid(l3mfn) || !p2m_is_ram(p2mt) ) |
4577 | 0 | { |
4578 | 0 | printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %"PRIpaddr"\n", |
4579 | 0 | gpa); |
4580 | 0 | goto out_put_gfn; |
4581 | 0 | } |
4582 | 0 |
|
4583 | 0 | paging_lock(d); |
4584 | 0 |
|
4585 | 0 | if ( !fast_path ) |
4586 | 0 | { |
4587 | 0 | gl3pa = map_domain_page(l3mfn); |
4588 | 0 | gl3e = (guest_l3e_t *)(gl3pa + ((unsigned long)gpa & ~PAGE_MASK)); |
4589 | 0 | } |
4590 | 0 | for ( i = 0; i < 4; i++ ) |
4591 | 0 | { |
4592 | 0 | mfn_t smfn, gmfn; |
4593 | 0 |
|
4594 | 0 | if ( fast_path ) { |
4595 | 0 | if ( pagetable_is_null(v->arch.shadow_table[i]) ) |
4596 | 0 | smfn = INVALID_MFN; |
4597 | 0 | else |
4598 | 0 | smfn = pagetable_get_mfn(v->arch.shadow_table[i]); |
4599 | 0 | } |
4600 | 0 | else |
4601 | 0 | { |
4602 | 0 | /* retrieving the l2s */ |
4603 | 0 | gmfn = get_gfn_query_unlocked(d, gfn_x(guest_l3e_get_gfn(gl3e[i])), |
4604 | 0 | &p2mt); |
4605 | 0 | smfn = unlikely(mfn_eq(gmfn, INVALID_MFN)) |
4606 | 0 | ? INVALID_MFN |
4607 | 0 | : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow); |
4608 | 0 | } |
4609 | 0 |
|
4610 | 0 | if ( mfn_valid(smfn) ) |
4611 | 0 | { |
4612 | 0 | gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); |
4613 | 0 | mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; |
4614 | 0 | shadow_unhook_mappings(d, smfn, 1/* user pages only */); |
4615 | 0 | flush = 1; |
4616 | 0 | } |
4617 | 0 | } |
4618 | 0 | if ( flush ) |
4619 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
4620 | 0 |
|
4621 | 0 | /* Remember that we've seen the guest use this interface, so we |
4622 | 0 | * can rely on it using it in future, instead of guessing at |
4623 | 0 | * when processes are being torn down. */ |
4624 | 0 | d->arch.paging.shadow.pagetable_dying_op = 1; |
4625 | 0 |
|
4626 | 0 | v->arch.paging.shadow.pagetable_dying = 1; |
4627 | 0 |
|
4628 | 0 | if ( !fast_path ) |
4629 | 0 | unmap_domain_page(gl3pa); |
4630 | 0 | paging_unlock(d); |
4631 | 0 | out_put_gfn: |
4632 | 0 | put_gfn(d, l3gfn); |
4633 | 0 | } |
4634 | 0 | #else |
4635 | 0 | static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa) |
4636 | 0 | { |
4637 | 0 | struct domain *d = v->domain; |
4638 | 0 | mfn_t smfn, gmfn; |
4639 | 0 | p2m_type_t p2mt; |
4640 | 0 |
|
4641 | 0 | gmfn = get_gfn_query(d, _gfn(gpa >> PAGE_SHIFT), &p2mt); |
4642 | 0 | paging_lock(d); |
4643 | 0 |
|
4644 | 0 | #if GUEST_PAGING_LEVELS == 2 |
4645 | 0 | smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_32_shadow); |
4646 | 0 | #else |
4647 | 0 | smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow); |
4648 | 0 | #endif |
4649 | 0 |
|
4650 | 0 | if ( mfn_valid(smfn) ) |
4651 | 0 | { |
4652 | 0 | mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; |
4653 | 0 | shadow_unhook_mappings(d, smfn, 1/* user pages only */); |
4654 | 0 | /* Now flush the TLB: we removed toplevel mappings. */ |
4655 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
4656 | 0 | } |
4657 | 0 |
|
4658 | 0 | /* Remember that we've seen the guest use this interface, so we |
4659 | 0 | * can rely on it using it in future, instead of guessing at |
4660 | 0 | * when processes are being torn down. */ |
4661 | 0 | d->arch.paging.shadow.pagetable_dying_op = 1; |
4662 | 0 |
|
4663 | 0 | v->arch.paging.shadow.pagetable_dying = 1; |
4664 | 0 |
|
4665 | 0 | paging_unlock(d); |
4666 | 0 | put_gfn(d, gpa >> PAGE_SHIFT); |
4667 | 0 | } |
4668 | 0 | #endif |
4669 | 0 |
|
4670 | 0 | /**************************************************************************/ |
4671 | 0 | /* Handling guest writes to pagetables. */ |
4672 | 0 |
|
4673 | 0 | /* Tidy up after the emulated write: mark pages dirty, verify the new |
4674 | 0 | * contents, and undo the mapping */ |
4675 | 0 | static void emulate_unmap_dest(struct vcpu *v, |
4676 | 0 | void *addr, |
4677 | 0 | u32 bytes, |
4678 | 0 | struct sh_emulate_ctxt *sh_ctxt) |
4679 | 0 | { |
4680 | 0 | ASSERT(mfn_valid(sh_ctxt->mfn[0])); |
4681 | 0 |
|
4682 | 0 | /* If we are writing lots of PTE-aligned zeros, might want to unshadow */ |
4683 | 0 | if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) |
4684 | 0 | { |
4685 | 0 | if ( ((unsigned long) addr & ((sizeof (guest_intpte_t)) - 1)) == 0 ) |
4686 | 0 | check_for_early_unshadow(v, sh_ctxt->mfn[0]); |
4687 | 0 | /* Don't reset the heuristic if we're writing zeros at non-aligned |
4688 | 0 | * addresses, otherwise it doesn't catch REP MOVSD on PAE guests */ |
4689 | 0 | } |
4690 | 0 | else |
4691 | 0 | reset_early_unshadow(v); |
4692 | 0 |
|
4693 | 0 | sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt); |
4694 | 0 | } |
4695 | 0 |
|
4696 | 0 | static int |
4697 | 0 | sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src, |
4698 | 0 | u32 bytes, struct sh_emulate_ctxt *sh_ctxt) |
4699 | 0 | { |
4700 | 0 | void *addr; |
4701 | 0 |
|
4702 | 0 | /* Unaligned writes are only acceptable on HVM */ |
4703 | 0 | if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v) ) |
4704 | 0 | return X86EMUL_UNHANDLEABLE; |
4705 | 0 |
|
4706 | 0 | addr = sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt); |
4707 | 0 | if ( IS_ERR(addr) ) |
4708 | 0 | return ~PTR_ERR(addr); |
4709 | 0 |
|
4710 | 0 | paging_lock(v->domain); |
4711 | 0 | memcpy(addr, src, bytes); |
4712 | 0 |
|
4713 | 0 | if ( tb_init_done ) |
4714 | 0 | { |
4715 | 0 | #if GUEST_PAGING_LEVELS == 3 |
4716 | 0 | if ( vaddr == this_cpu(trace_emulate_initial_va) ) |
4717 | 0 | memcpy(&this_cpu(trace_emulate_write_val), src, bytes); |
4718 | 0 | else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) ) |
4719 | 0 | { |
4720 | 0 | TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT); |
4721 | 0 | memcpy(&this_cpu(trace_emulate_write_val), |
4722 | 0 | (void *)(((unsigned long) addr) & ~(0x7UL)), GUEST_PTE_SIZE); |
4723 | 0 | } |
4724 | 0 | #else |
4725 | 0 | memcpy(&this_cpu(trace_emulate_write_val), src, bytes); |
4726 | 0 | #endif |
4727 | 0 | } |
4728 | 0 |
|
4729 | 0 | emulate_unmap_dest(v, addr, bytes, sh_ctxt); |
4730 | 0 | shadow_audit_tables(v); |
4731 | 0 | paging_unlock(v->domain); |
4732 | 0 | return X86EMUL_OKAY; |
4733 | 0 | } Unexecuted instantiation: multi.c:sh_x86_emulate_write__guest_4 Unexecuted instantiation: multi.c:sh_x86_emulate_write__guest_3 Unexecuted instantiation: multi.c:sh_x86_emulate_write__guest_2 |
4734 | 0 |
|
4735 | 0 | static int |
4736 | 0 | sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, |
4737 | 0 | unsigned long old, unsigned long new, |
4738 | 0 | unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt) |
4739 | 0 | { |
4740 | 0 | void *addr; |
4741 | 0 | unsigned long prev; |
4742 | 0 | int rv = X86EMUL_OKAY; |
4743 | 0 |
|
4744 | 0 | /* Unaligned writes are only acceptable on HVM */ |
4745 | 0 | if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v) ) |
4746 | 0 | return X86EMUL_UNHANDLEABLE; |
4747 | 0 |
|
4748 | 0 | addr = sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt); |
4749 | 0 | if ( IS_ERR(addr) ) |
4750 | 0 | return ~PTR_ERR(addr); |
4751 | 0 |
|
4752 | 0 | paging_lock(v->domain); |
4753 | 0 | switch ( bytes ) |
4754 | 0 | { |
4755 | 0 | case 1: prev = cmpxchg(((u8 *)addr), old, new); break; |
4756 | 0 | case 2: prev = cmpxchg(((u16 *)addr), old, new); break; |
4757 | 0 | case 4: prev = cmpxchg(((u32 *)addr), old, new); break; |
4758 | 0 | case 8: prev = cmpxchg(((u64 *)addr), old, new); break; |
4759 | 0 | default: |
4760 | 0 | SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes); |
4761 | 0 | prev = ~old; |
4762 | 0 | } |
4763 | 0 |
|
4764 | 0 | if ( prev != old ) |
4765 | 0 | rv = X86EMUL_RETRY; |
4766 | 0 |
|
4767 | 0 | SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx" |
4768 | 0 | " wanted %#lx now %#lx bytes %u\n", |
4769 | 0 | vaddr, prev, old, new, *(unsigned long *)addr, bytes); |
4770 | 0 |
|
4771 | 0 | emulate_unmap_dest(v, addr, bytes, sh_ctxt); |
4772 | 0 | shadow_audit_tables(v); |
4773 | 0 | paging_unlock(v->domain); |
4774 | 0 | return rv; |
4775 | 0 | } Unexecuted instantiation: multi.c:sh_x86_emulate_cmpxchg__guest_2 Unexecuted instantiation: multi.c:sh_x86_emulate_cmpxchg__guest_3 Unexecuted instantiation: multi.c:sh_x86_emulate_cmpxchg__guest_4 |
4776 | 0 |
|
4777 | 0 | /**************************************************************************/ |
4778 | 0 | /* Audit tools */ |
4779 | 0 |
|
4780 | 0 | #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES |
4781 | 0 |
|
4782 | 0 | #define AUDIT_FAIL(_level, _fmt, _a...) do { \ |
4783 | 0 | printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \ |
4784 | 0 | "gl" #_level "mfn = %" PRI_mfn \ |
4785 | 0 | " sl" #_level "mfn = %" PRI_mfn \ |
4786 | 0 | " &gl" #_level "e = %p &sl" #_level "e = %p" \ |
4787 | 0 | " gl" #_level "e = %" SH_PRI_gpte \ |
4788 | 0 | " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \ |
4789 | 0 | GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, \ |
4790 | 0 | _level, guest_index(gl ## _level ## e), \ |
4791 | 0 | mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn), \ |
4792 | 0 | gl ## _level ## e, sl ## _level ## e, \ |
4793 | 0 | gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \ |
4794 | 0 | ##_a); \ |
4795 | 0 | BUG(); \ |
4796 | 0 | done = 1; \ |
4797 | 0 | } while (0) |
4798 | 0 |
|
4799 | 0 | #define AUDIT_FAIL_MIN(_level, _fmt, _a...) do { \ |
4800 | 0 | printk("Shadow %u-on-%u audit failed at level %i\n" \ |
4801 | 0 | "gl" #_level "mfn = %" PRI_mfn \ |
4802 | 0 | " sl" #_level "mfn = %" PRI_mfn \ |
4803 | 0 | " Error: " _fmt "\n", \ |
4804 | 0 | GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, \ |
4805 | 0 | _level, \ |
4806 | 0 | mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn), \ |
4807 | 0 | ##_a); \ |
4808 | 0 | BUG(); \ |
4809 | 0 | done = 1; \ |
4810 | 0 | } while (0) |
4811 | 0 |
|
4812 | 0 | static char * sh_audit_flags(struct vcpu *v, int level, |
4813 | 0 | int gflags, int sflags) |
4814 | 0 | /* Common code for auditing flag bits */ |
4815 | 0 | { |
4816 | 0 | if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) ) |
4817 | 0 | return "shadow is present but guest is not present"; |
4818 | 0 | if ( (sflags & _PAGE_GLOBAL) && !is_hvm_vcpu(v) ) |
4819 | 0 | return "global bit set in PV shadow"; |
4820 | 0 | if ( level == 2 && (sflags & _PAGE_PSE) ) |
4821 | 0 | return "PS bit set in shadow"; |
4822 | 0 | #if SHADOW_PAGING_LEVELS == 3 |
4823 | 0 | if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */ |
4824 | 0 | #endif |
4825 | 0 | if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_ACCESSED) ) |
4826 | 0 | return "accessed bit not propagated"; |
4827 | 0 | if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE))) |
4828 | 0 | && ((sflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY)) ) |
4829 | 0 | return "dirty bit not propagated"; |
4830 | 0 | if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) ) |
4831 | 0 | return "user/supervisor bit does not match"; |
4832 | 0 | if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) ) |
4833 | 0 | return "NX bit does not match"; |
4834 | 0 | if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) ) |
4835 | 0 | return "shadow grants write access but guest does not"; |
4836 | 0 | return NULL; |
4837 | 0 | } |
4838 | 0 |
|
4839 | 0 | int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x) |
4840 | 0 | { |
4841 | 0 | guest_l1e_t *gl1e, *gp; |
4842 | 0 | shadow_l1e_t *sl1e; |
4843 | 0 | mfn_t mfn, gmfn, gl1mfn; |
4844 | 0 | gfn_t gfn; |
4845 | 0 | p2m_type_t p2mt; |
4846 | 0 | char *s; |
4847 | 0 | int done = 0; |
4848 | 0 |
|
4849 | 0 | /* Follow the backpointer */ |
4850 | 0 | ASSERT(mfn_to_page(sl1mfn)->u.sh.head); |
4851 | 0 | gl1mfn = backpointer(mfn_to_page(sl1mfn)); |
4852 | 0 |
|
4853 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
4854 | 0 | /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */ |
4855 | 0 | if ( page_is_out_of_sync(mfn_to_page(gl1mfn)) ) |
4856 | 0 | { |
4857 | 0 | oos_audit_hash_is_present(v->domain, gl1mfn); |
4858 | 0 | return 0; |
4859 | 0 | } |
4860 | 0 | #endif |
4861 | 0 |
|
4862 | 0 | gl1e = gp = map_domain_page(gl1mfn); |
4863 | 0 | SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, { |
4864 | 0 |
|
4865 | 0 | if ( sh_l1e_is_magic(*sl1e) ) |
4866 | 0 | { |
4867 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) |
4868 | 0 | if ( sh_l1e_is_gnp(*sl1e) ) |
4869 | 0 | { |
4870 | 0 | if ( guest_l1e_get_flags(*gl1e) & _PAGE_PRESENT ) |
4871 | 0 | AUDIT_FAIL(1, "shadow is GNP magic but guest is present"); |
4872 | 0 | } |
4873 | 0 | else |
4874 | 0 | { |
4875 | 0 | ASSERT(sh_l1e_is_mmio(*sl1e)); |
4876 | 0 | gfn = sh_l1e_mmio_get_gfn(*sl1e); |
4877 | 0 | if ( gfn_x(gfn) != gfn_x(guest_l1e_get_gfn(*gl1e)) ) |
4878 | 0 | AUDIT_FAIL(1, "shadow MMIO gfn is %" SH_PRI_gfn |
4879 | 0 | " but guest gfn is %" SH_PRI_gfn, |
4880 | 0 | gfn_x(gfn), |
4881 | 0 | gfn_x(guest_l1e_get_gfn(*gl1e))); |
4882 | 0 | } |
4883 | 0 | #endif |
4884 | 0 | } |
4885 | 0 | else |
4886 | 0 | { |
4887 | 0 | s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e), |
4888 | 0 | shadow_l1e_get_flags(*sl1e)); |
4889 | 0 | if ( s ) AUDIT_FAIL(1, "%s", s); |
4890 | 0 |
|
4891 | 0 | if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS ) |
4892 | 0 | { |
4893 | 0 | gfn = guest_l1e_get_gfn(*gl1e); |
4894 | 0 | mfn = shadow_l1e_get_mfn(*sl1e); |
4895 | 0 | gmfn = get_gfn_query_unlocked(v->domain, gfn_x(gfn), &p2mt); |
4896 | 0 | if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) ) |
4897 | 0 | AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn |
4898 | 0 | " --> %" PRI_mfn " != mfn %" PRI_mfn, |
4899 | 0 | gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); |
4900 | 0 | } |
4901 | 0 | } |
4902 | 0 | }); |
4903 | 0 | unmap_domain_page(gp); |
4904 | 0 | return done; |
4905 | 0 | } Unexecuted instantiation: sh_audit_l1_table__guest_2 Unexecuted instantiation: sh_audit_l1_table__guest_4 Unexecuted instantiation: sh_audit_l1_table__guest_3 |
4906 | 0 |
|
4907 | 0 | int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x) |
4908 | 0 | { |
4909 | 0 | guest_l1e_t *gl1e, e; |
4910 | 0 | shadow_l1e_t *sl1e; |
4911 | 0 | mfn_t gl1mfn = INVALID_MFN; |
4912 | 0 | int f; |
4913 | 0 | int done = 0; |
4914 | 0 |
|
4915 | 0 | /* fl1 has no useful backpointer: all we can check are flags */ |
4916 | 0 | e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */ |
4917 | 0 | SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, { |
4918 | 0 | f = shadow_l1e_get_flags(*sl1e); |
4919 | 0 | f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2); |
4920 | 0 | if ( !(f == 0 |
4921 | 0 | || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| |
4922 | 0 | _PAGE_ACCESSED) |
4923 | 0 | || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED) |
4924 | 0 | || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| |
4925 | 0 | _PAGE_ACCESSED|_PAGE_DIRTY) |
4926 | 0 | || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) |
4927 | 0 | || sh_l1e_is_magic(*sl1e)) ) |
4928 | 0 | AUDIT_FAIL(1, "fl1e has bad flags"); |
4929 | 0 | }); |
4930 | 0 | return 0; |
4931 | 0 | } Unexecuted instantiation: sh_audit_fl1_table__guest_2 Unexecuted instantiation: sh_audit_fl1_table__guest_3 Unexecuted instantiation: sh_audit_fl1_table__guest_4 |
4932 | 0 |
|
4933 | 0 | int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x) |
4934 | 0 | { |
4935 | 0 | struct domain *d = v->domain; |
4936 | 0 | guest_l2e_t *gl2e, *gp; |
4937 | 0 | shadow_l2e_t *sl2e; |
4938 | 0 | mfn_t mfn, gmfn, gl2mfn; |
4939 | 0 | gfn_t gfn; |
4940 | 0 | p2m_type_t p2mt; |
4941 | 0 | char *s; |
4942 | 0 | int done = 0; |
4943 | 0 |
|
4944 | 0 | /* Follow the backpointer */ |
4945 | 0 | ASSERT(mfn_to_page(sl2mfn)->u.sh.head); |
4946 | 0 | gl2mfn = backpointer(mfn_to_page(sl2mfn)); |
4947 | 0 |
|
4948 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
4949 | 0 | /* Only L1's may be out of sync. */ |
4950 | 0 | if ( page_is_out_of_sync(mfn_to_page(gl2mfn)) ) |
4951 | 0 | AUDIT_FAIL_MIN(2, "gmfn %lx is out of sync", mfn_x(gl2mfn)); |
4952 | 0 | #endif |
4953 | 0 |
|
4954 | 0 | gl2e = gp = map_domain_page(gl2mfn); |
4955 | 0 | SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, d, { |
4956 | 0 |
|
4957 | 0 | s = sh_audit_flags(v, 2, guest_l2e_get_flags(*gl2e), |
4958 | 0 | shadow_l2e_get_flags(*sl2e)); |
4959 | 0 | if ( s ) AUDIT_FAIL(2, "%s", s); |
4960 | 0 |
|
4961 | 0 | if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS ) |
4962 | 0 | { |
4963 | 0 | gfn = guest_l2e_get_gfn(*gl2e); |
4964 | 0 | mfn = shadow_l2e_get_mfn(*sl2e); |
4965 | 0 | gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) |
4966 | 0 | ? get_fl1_shadow_status(d, gfn) |
4967 | 0 | : get_shadow_status(d, |
4968 | 0 | get_gfn_query_unlocked(d, gfn_x(gfn), |
4969 | 0 | &p2mt), SH_type_l1_shadow); |
4970 | 0 | if ( mfn_x(gmfn) != mfn_x(mfn) ) |
4971 | 0 | AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn |
4972 | 0 | " (--> %" PRI_mfn ")" |
4973 | 0 | " --> %" PRI_mfn " != mfn %" PRI_mfn, |
4974 | 0 | gfn_x(gfn), |
4975 | 0 | (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 |
4976 | 0 | : mfn_x(get_gfn_query_unlocked(d, |
4977 | 0 | gfn_x(gfn), &p2mt)), mfn_x(gmfn), mfn_x(mfn)); |
4978 | 0 | } |
4979 | 0 | }); |
4980 | 0 | unmap_domain_page(gp); |
4981 | 0 | return 0; |
4982 | 0 | } Unexecuted instantiation: sh_audit_l2_table__guest_3 Unexecuted instantiation: sh_audit_l2_table__guest_2 Unexecuted instantiation: sh_audit_l2_table__guest_4 |
4983 | 0 |
|
4984 | 0 | #if GUEST_PAGING_LEVELS >= 4 |
4985 | 0 | int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x) |
4986 | 0 | { |
4987 | 0 | struct domain *d = v->domain; |
4988 | 0 | guest_l3e_t *gl3e, *gp; |
4989 | 0 | shadow_l3e_t *sl3e; |
4990 | 0 | mfn_t mfn, gmfn, gl3mfn; |
4991 | 0 | gfn_t gfn; |
4992 | 0 | p2m_type_t p2mt; |
4993 | 0 | char *s; |
4994 | 0 | int done = 0; |
4995 | 0 |
|
4996 | 0 | /* Follow the backpointer */ |
4997 | 0 | ASSERT(mfn_to_page(sl3mfn)->u.sh.head); |
4998 | 0 | gl3mfn = backpointer(mfn_to_page(sl3mfn)); |
4999 | 0 |
|
5000 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
5001 | 0 | /* Only L1's may be out of sync. */ |
5002 | 0 | if ( page_is_out_of_sync(mfn_to_page(gl3mfn)) ) |
5003 | 0 | AUDIT_FAIL_MIN(3, "gmfn %lx is out of sync", mfn_x(gl3mfn)); |
5004 | 0 | #endif |
5005 | 0 |
|
5006 | 0 | gl3e = gp = map_domain_page(gl3mfn); |
5007 | 0 | SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, { |
5008 | 0 |
|
5009 | 0 | s = sh_audit_flags(v, 3, guest_l3e_get_flags(*gl3e), |
5010 | 0 | shadow_l3e_get_flags(*sl3e)); |
5011 | 0 | if ( s ) AUDIT_FAIL(3, "%s", s); |
5012 | 0 |
|
5013 | 0 | if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS ) |
5014 | 0 | { |
5015 | 0 | gfn = guest_l3e_get_gfn(*gl3e); |
5016 | 0 | mfn = shadow_l3e_get_mfn(*sl3e); |
5017 | 0 | gmfn = get_shadow_status(d, get_gfn_query_unlocked( |
5018 | 0 | d, gfn_x(gfn), &p2mt), |
5019 | 0 | ((GUEST_PAGING_LEVELS == 3 || |
5020 | 0 | is_pv_32bit_domain(d)) |
5021 | 0 | && !shadow_mode_external(d) |
5022 | 0 | && (guest_index(gl3e) % 4) == 3) |
5023 | 0 | ? SH_type_l2h_shadow |
5024 | 0 | : SH_type_l2_shadow); |
5025 | 0 | if ( mfn_x(gmfn) != mfn_x(mfn) ) |
5026 | 0 | AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn |
5027 | 0 | " --> %" PRI_mfn " != mfn %" PRI_mfn, |
5028 | 0 | gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); |
5029 | 0 | } |
5030 | 0 | }); |
5031 | 0 | unmap_domain_page(gp); |
5032 | 0 | return 0; |
5033 | 0 | } |
5034 | 0 |
|
5035 | 0 | int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x) |
5036 | 0 | { |
5037 | 0 | struct domain *d = v->domain; |
5038 | 0 | guest_l4e_t *gl4e, *gp; |
5039 | 0 | shadow_l4e_t *sl4e; |
5040 | 0 | mfn_t mfn, gmfn, gl4mfn; |
5041 | 0 | gfn_t gfn; |
5042 | 0 | p2m_type_t p2mt; |
5043 | 0 | char *s; |
5044 | 0 | int done = 0; |
5045 | 0 |
|
5046 | 0 | /* Follow the backpointer */ |
5047 | 0 | ASSERT(mfn_to_page(sl4mfn)->u.sh.head); |
5048 | 0 | gl4mfn = backpointer(mfn_to_page(sl4mfn)); |
5049 | 0 |
|
5050 | 0 | #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) |
5051 | 0 | /* Only L1's may be out of sync. */ |
5052 | 0 | if ( page_is_out_of_sync(mfn_to_page(gl4mfn)) ) |
5053 | 0 | AUDIT_FAIL_MIN(4, "gmfn %lx is out of sync", mfn_x(gl4mfn)); |
5054 | 0 | #endif |
5055 | 0 |
|
5056 | 0 | gl4e = gp = map_domain_page(gl4mfn); |
5057 | 0 | SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, d, |
5058 | 0 | { |
5059 | 0 | s = sh_audit_flags(v, 4, guest_l4e_get_flags(*gl4e), |
5060 | 0 | shadow_l4e_get_flags(*sl4e)); |
5061 | 0 | if ( s ) AUDIT_FAIL(4, "%s", s); |
5062 | 0 |
|
5063 | 0 | if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS ) |
5064 | 0 | { |
5065 | 0 | gfn = guest_l4e_get_gfn(*gl4e); |
5066 | 0 | mfn = shadow_l4e_get_mfn(*sl4e); |
5067 | 0 | gmfn = get_shadow_status(d, get_gfn_query_unlocked( |
5068 | 0 | d, gfn_x(gfn), &p2mt), |
5069 | 0 | SH_type_l3_shadow); |
5070 | 0 | if ( mfn_x(gmfn) != mfn_x(mfn) ) |
5071 | 0 | AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn |
5072 | 0 | " --> %" PRI_mfn " != mfn %" PRI_mfn, |
5073 | 0 | gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); |
5074 | 0 | } |
5075 | 0 | }); |
5076 | 0 | unmap_domain_page(gp); |
5077 | 0 | return 0; |
5078 | 0 | } |
5079 | 0 | #endif /* GUEST_PAGING_LEVELS >= 4 */ |
5080 | 0 |
|
5081 | 0 |
|
5082 | 0 | #undef AUDIT_FAIL |
5083 | 0 |
|
5084 | 0 | #endif /* Audit code */ |
5085 | 0 |
|
5086 | 0 | /**************************************************************************/ |
5087 | 0 | /* Entry points into this mode of the shadow code. |
5088 | 0 | * This will all be mangled by the preprocessor to uniquify everything. */ |
5089 | 0 | const struct paging_mode sh_paging_mode = { |
5090 | 0 | .page_fault = sh_page_fault, |
5091 | 0 | .invlpg = sh_invlpg, |
5092 | 0 | .gva_to_gfn = sh_gva_to_gfn, |
5093 | 0 | .update_cr3 = sh_update_cr3, |
5094 | 0 | .update_paging_modes = shadow_update_paging_modes, |
5095 | 0 | .write_p2m_entry = shadow_write_p2m_entry, |
5096 | 0 | .guest_levels = GUEST_PAGING_LEVELS, |
5097 | 0 | .shadow.detach_old_tables = sh_detach_old_tables, |
5098 | 0 | .shadow.x86_emulate_write = sh_x86_emulate_write, |
5099 | 0 | .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, |
5100 | 0 | .shadow.write_guest_entry = sh_write_guest_entry, |
5101 | 0 | .shadow.cmpxchg_guest_entry = sh_cmpxchg_guest_entry, |
5102 | 0 | .shadow.make_monitor_table = sh_make_monitor_table, |
5103 | 0 | .shadow.destroy_monitor_table = sh_destroy_monitor_table, |
5104 | 0 | #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC |
5105 | 0 | .shadow.guess_wrmap = sh_guess_wrmap, |
5106 | 0 | #endif |
5107 | 0 | .shadow.pagetable_dying = sh_pagetable_dying, |
5108 | 0 | .shadow.shadow_levels = SHADOW_PAGING_LEVELS, |
5109 | 0 | }; |
5110 | 0 |
|
5111 | 0 | /* |
5112 | 0 | * Local variables: |
5113 | 0 | * mode: C |
5114 | 0 | * c-file-style: "BSD" |
5115 | 0 | * c-basic-offset: 4 |
5116 | 0 | * indent-tabs-mode: nil |
5117 | 0 | * End: |
5118 | 0 | */ |