/root/src/xen/xen/arch/x86/mm/guest_walk.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/mm/guest_walk.c |
3 | | * |
4 | | * Pagetable walker for guest memory accesses. |
5 | | * |
6 | | * Parts of this code are Copyright (c) 2006 by XenSource Inc. |
7 | | * Parts of this code are Copyright (c) 2006 by Michael A Fetterman |
8 | | * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. |
9 | | * |
10 | | * This program is free software; you can redistribute it and/or modify |
11 | | * it under the terms of the GNU General Public License as published by |
12 | | * the Free Software Foundation; either version 2 of the License, or |
13 | | * (at your option) any later version. |
14 | | * |
15 | | * This program is distributed in the hope that it will be useful, |
16 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | * GNU General Public License for more details. |
19 | | * |
20 | | * You should have received a copy of the GNU General Public License |
21 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
22 | | */ |
23 | | |
24 | | /* Allow uniquely identifying static symbols in the 3 generated objects. */ |
25 | | asm(".file \"" __OBJECT_FILE__ "\""); |
26 | | |
27 | | #include <xen/types.h> |
28 | | #include <xen/mm.h> |
29 | | #include <xen/paging.h> |
30 | | #include <xen/domain_page.h> |
31 | | #include <xen/sched.h> |
32 | | #include <asm/page.h> |
33 | | #include <asm/guest_pt.h> |
34 | | |
35 | | /* |
36 | | * Modify a guest pagetable entry to set the Accessed and Dirty bits. |
37 | | * Returns true if it actually writes to guest memory. |
38 | | */ |
39 | | static bool set_ad_bits(guest_intpte_t *guest_p, guest_intpte_t *walk_p, |
40 | | bool set_dirty) |
41 | 1.23M | { |
42 | 1.23M | guest_intpte_t new, old = *walk_p; |
43 | 1.23M | |
44 | 1.23M | new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0); |
45 | 1.23M | if ( old != new ) |
46 | 0 | { |
47 | 0 | /* |
48 | 0 | * Write the new entry into the walk, and try to write it back |
49 | 0 | * into the guest table as well. If the guest table has changed |
50 | 0 | * under our feet then leave it alone. |
51 | 0 | */ |
52 | 0 | *walk_p = new; |
53 | 0 | if ( cmpxchg(guest_p, old, new) == old ) |
54 | 0 | return true; |
55 | 0 | } |
56 | 1.23M | return false; |
57 | 1.23M | } |
58 | | |
59 | | /* |
60 | | * Walk the guest pagetables, after the manner of a hardware walker. |
61 | | * |
62 | | * This is a condensing of the 'Paging' chapters from Intel and AMD software |
63 | | * manuals. Please refer closely to them. |
64 | | * |
65 | | * A pagetable walk consists of two parts: |
66 | | * 1) to find whether a translation exists, and |
67 | | * 2) if a translation does exist, to check whether the translation's access |
68 | | * rights permit the access. |
69 | | * |
70 | | * A translation is found by following the pagetable structure (starting at |
71 | | * %cr3) to a leaf entry (an L1 PTE, or a higher level entry with PSE set) |
72 | | * which identifies the physical destination of the access. |
73 | | * |
74 | | * A translation from one level to the next exists if the PTE is both present |
75 | | * and has no reserved bits set. If the pagewalk counters a situation where a |
76 | | * translation does not exist, the walk stops at that point. |
77 | | * |
78 | | * The access rights (NX, User, RW bits) are collected as the walk progresses. |
79 | | * If a translation exists, the accumulated access rights are compared to the |
80 | | * requested walk, to see whether the access is permitted. |
81 | | */ |
82 | | bool |
83 | | guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, |
84 | | unsigned long va, walk_t *gw, |
85 | | uint32_t walk, mfn_t top_mfn, void *top_map) |
86 | 364k | { |
87 | 364k | struct domain *d = v->domain; |
88 | 364k | p2m_type_t p2mt; |
89 | 364k | guest_l1e_t *l1p = NULL; |
90 | 364k | guest_l2e_t *l2p = NULL; |
91 | 364k | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ |
92 | 364k | guest_l3e_t *l3p = NULL; |
93 | | guest_l4e_t *l4p; |
94 | | #endif |
95 | 364k | uint32_t gflags, rc; |
96 | 364k | unsigned int leaf_level; |
97 | 364k | p2m_query_t qt = P2M_ALLOC | P2M_UNSHARE; |
98 | 364k | |
99 | 365k | #define AR_ACCUM_AND (_PAGE_USER | _PAGE_RW) |
100 | 365k | #define AR_ACCUM_OR (_PAGE_NX_BIT) |
101 | 364k | /* Start with all AND bits set, all OR bits clear. */ |
102 | 364k | uint32_t ar, ar_and = ~0u, ar_or = 0; |
103 | 364k | |
104 | 364k | bool walk_ok = false; |
105 | 364k | |
106 | 364k | /* |
107 | 364k | * TODO - We should ASSERT() that only the following bits are set as |
108 | 364k | * inputs to a guest walk, but a whole load of code currently passes in |
109 | 364k | * other PFEC_ constants. |
110 | 364k | */ |
111 | 364k | walk &= (PFEC_implicit | PFEC_insn_fetch | PFEC_user_mode | PFEC_write_access); |
112 | 364k | |
113 | 364k | /* Only implicit supervisor data accesses exist. */ |
114 | 364k | ASSERT(!(walk & PFEC_implicit) || |
115 | 364k | !(walk & (PFEC_insn_fetch | PFEC_user_mode))); |
116 | 364k | |
117 | 364k | perfc_incr(guest_walk); |
118 | 364k | memset(gw, 0, sizeof(*gw)); |
119 | 364k | gw->va = va; |
120 | 364k | gw->pfec = walk & (PFEC_user_mode | PFEC_write_access); |
121 | 364k | |
122 | 364k | /* |
123 | 364k | * PFEC_insn_fetch is only reported if NX or SMEP are enabled. Hardware |
124 | 364k | * still distingueses instruction fetches during determination of access |
125 | 364k | * rights. |
126 | 364k | */ |
127 | 364k | if ( guest_nx_enabled(v) || guest_smep_enabled(v) ) |
128 | 365k | gw->pfec |= (walk & PFEC_insn_fetch); |
129 | 364k | |
130 | 364k | #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ |
131 | | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ |
132 | | |
133 | | /* Get the l4e from the top level table and check its flags*/ |
134 | | gw->l4mfn = top_mfn; |
135 | | l4p = (guest_l4e_t *) top_map; |
136 | 364k | gw->l4e = l4p[guest_l4_table_offset(va)]; |
137 | | gflags = guest_l4e_get_flags(gw->l4e); |
138 | 364k | if ( !(gflags & _PAGE_PRESENT) ) |
139 | 0 | goto out; |
140 | | |
141 | | /* Check for reserved bits. */ |
142 | 364k | if ( guest_l4e_rsvd_bits(v, gw->l4e) ) |
143 | 0 | { |
144 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; |
145 | 0 | goto out; |
146 | 0 | } |
147 | 364k | |
148 | 364k | /* Accumulate l4e access rights. */ |
149 | 364k | ar_and &= gflags; |
150 | 364k | ar_or |= gflags; |
151 | 364k | |
152 | 364k | /* Map the l3 table */ |
153 | 364k | l3p = map_domain_gfn(p2m, |
154 | 364k | guest_l4e_get_gfn(gw->l4e), |
155 | 364k | &gw->l3mfn, |
156 | 364k | &p2mt, |
157 | 364k | qt, |
158 | 364k | &rc); |
159 | 364k | if ( l3p == NULL ) |
160 | 0 | { |
161 | 0 | gw->pfec |= rc & PFEC_synth_mask; |
162 | 0 | goto out; |
163 | 0 | } |
164 | 364k | |
165 | 364k | /* Get the l3e and check its flags*/ |
166 | 364k | gw->l3e = l3p[guest_l3_table_offset(va)]; |
167 | 364k | gflags = guest_l3e_get_flags(gw->l3e); |
168 | 364k | if ( !(gflags & _PAGE_PRESENT) ) |
169 | 0 | goto out; |
170 | 364k | |
171 | 364k | /* Check for reserved bits, including possibly _PAGE_PSE. */ |
172 | 364k | if ( guest_l3e_rsvd_bits(v, gw->l3e) ) |
173 | 0 | { |
174 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; |
175 | 0 | goto out; |
176 | 0 | } |
177 | 364k | |
178 | 364k | /* Accumulate l3e access rights. */ |
179 | 364k | ar_and &= gflags; |
180 | 364k | ar_or |= gflags; |
181 | 364k | |
182 | 364k | if ( gflags & _PAGE_PSE ) |
183 | 0 | { |
184 | 0 | /* |
185 | 0 | * Generate a fake l1 table entry so callers don't all |
186 | 0 | * have to understand superpages. |
187 | 0 | */ |
188 | 0 | gfn_t start = guest_l3e_get_gfn(gw->l3e); |
189 | 0 | /* |
190 | 0 | * Grant full access in the l1e, since all the guest entry's |
191 | 0 | * access controls are enforced in the l3e. |
192 | 0 | */ |
193 | 0 | int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| |
194 | 0 | _PAGE_ACCESSED|_PAGE_DIRTY); |
195 | 0 | /* |
196 | 0 | * Import protection key and cache-control bits. Note that _PAGE_PAT |
197 | 0 | * is actually _PAGE_PSE, and it is always set. We will clear it in |
198 | 0 | * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. |
199 | 0 | */ |
200 | 0 | flags |= (guest_l3e_get_flags(gw->l3e) |
201 | 0 | & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); |
202 | 0 | if ( !(gfn_x(start) & 1) ) |
203 | 0 | /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ |
204 | 0 | flags &= ~_PAGE_PAT; |
205 | 0 |
|
206 | 0 | /* Increment the pfn by the right number of 4k pages. */ |
207 | 0 | start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) + |
208 | 0 | ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK)); |
209 | 0 | gw->l1e = guest_l1e_from_gfn(start, flags); |
210 | 0 | gw->l2mfn = gw->l1mfn = INVALID_MFN; |
211 | 0 | leaf_level = 3; |
212 | 0 | goto leaf; |
213 | 0 | } |
214 | 364k | |
215 | 364k | #else /* PAE only... */ |
216 | | |
217 | | /* Get the l3e and check its flag */ |
218 | 0 | gw->l3e = ((guest_l3e_t *) top_map)[guest_l3_table_offset(va)]; |
219 | | gflags = guest_l3e_get_flags(gw->l3e); |
220 | 0 | if ( !(gflags & _PAGE_PRESENT) ) |
221 | 0 | goto out; |
222 | | |
223 | 0 | if ( guest_l3e_rsvd_bits(v, gw->l3e) ) |
224 | 0 | { |
225 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; |
226 | 0 | goto out; |
227 | 0 | } |
228 | 0 |
|
229 | 0 | #endif /* PAE or 64... */ |
230 | 0 |
|
231 | 0 | /* Map the l2 table */ |
232 | 364k | l2p = map_domain_gfn(p2m, |
233 | 364k | guest_l3e_get_gfn(gw->l3e), |
234 | 364k | &gw->l2mfn, |
235 | 364k | &p2mt, |
236 | 364k | qt, |
237 | 364k | &rc); |
238 | 364k | if ( l2p == NULL ) |
239 | 0 | { |
240 | 0 | gw->pfec |= rc & PFEC_synth_mask; |
241 | 0 | goto out; |
242 | 0 | } |
243 | 364k | |
244 | 364k | /* Get the l2e */ |
245 | 364k | gw->l2e = l2p[guest_l2_table_offset(va)]; |
246 | 364k | |
247 | 364k | #else /* 32-bit only... */ |
248 | | |
249 | | /* Get l2e from the top level table */ |
250 | | gw->l2mfn = top_mfn; |
251 | | l2p = (guest_l2e_t *) top_map; |
252 | 0 | gw->l2e = l2p[guest_l2_table_offset(va)]; |
253 | | |
254 | | #endif /* All levels... */ |
255 | 364k | |
256 | 364k | /* Check the l2e flags. */ |
257 | 364k | gflags = guest_l2e_get_flags(gw->l2e); |
258 | 364k | if ( !(gflags & _PAGE_PRESENT) ) |
259 | 0 | goto out; |
260 | 364k | |
261 | 364k | /* |
262 | 364k | * In 2-level paging without CR0.PSE, there are no reserved bits, and the |
263 | 364k | * PAT/PSE bit is ignored. |
264 | 364k | */ |
265 | 364k | if ( GUEST_PAGING_LEVELS == 2 && !guest_can_use_l2_superpages(v) ) |
266 | 0 | { |
267 | 0 | gw->l2e.l2 &= ~_PAGE_PSE; |
268 | 0 | gflags &= ~_PAGE_PSE; |
269 | 0 | } |
270 | 364k | /* else check for reserved bits, including possibly _PAGE_PSE. */ |
271 | 364k | else if ( guest_l2e_rsvd_bits(v, gw->l2e) ) |
272 | 0 | { |
273 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; |
274 | 0 | goto out; |
275 | 0 | } |
276 | 364k | |
277 | 364k | /* Accumulate l2e access rights. */ |
278 | 364k | ar_and &= gflags; |
279 | 364k | ar_or |= gflags; |
280 | 364k | |
281 | 364k | if ( gflags & _PAGE_PSE ) |
282 | 225k | { |
283 | 225k | /* |
284 | 225k | * Special case: this guest VA is in a PSE superpage, so there's |
285 | 225k | * no guest l1e. We make one up so that the propagation code |
286 | 225k | * can generate a shadow l1 table. Start with the gfn of the |
287 | 225k | * first 4k-page of the superpage. |
288 | 225k | */ |
289 | 225k | #if GUEST_PAGING_LEVELS == 2 |
290 | 0 | gfn_t start = _gfn(unfold_pse36(gw->l2e.l2) >> PAGE_SHIFT); |
291 | | #else |
292 | | gfn_t start = guest_l2e_get_gfn(gw->l2e); |
293 | | #endif |
294 | 225k | /* |
295 | 225k | * Grant full access in the l1e, since all the guest entry's |
296 | 225k | * access controls are enforced in the shadow l2e. |
297 | 225k | */ |
298 | 225k | int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| |
299 | 225k | _PAGE_ACCESSED|_PAGE_DIRTY); |
300 | 225k | /* |
301 | 225k | * Import protection key and cache-control bits. Note that _PAGE_PAT |
302 | 225k | * is actually _PAGE_PSE, and it is always set. We will clear it in |
303 | 225k | * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. |
304 | 225k | */ |
305 | 225k | flags |= (guest_l2e_get_flags(gw->l2e) |
306 | 225k | & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); |
307 | 225k | if ( !(gfn_x(start) & 1) ) |
308 | 225k | /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ |
309 | 225k | flags &= ~_PAGE_PAT; |
310 | 225k | |
311 | 225k | /* Increment the pfn by the right number of 4k pages. */ |
312 | 225k | start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) + |
313 | 225k | guest_l1_table_offset(va)); |
314 | 225k | #if GUEST_PAGING_LEVELS == 2 |
315 | | /* Wider than 32 bits if PSE36 superpage. */ |
316 | 0 | gw->el1e = (gfn_x(start) << PAGE_SHIFT) | flags; |
317 | | #else |
318 | | gw->l1e = guest_l1e_from_gfn(start, flags); |
319 | | #endif |
320 | 225k | gw->l1mfn = INVALID_MFN; |
321 | 225k | leaf_level = 2; |
322 | 225k | goto leaf; |
323 | 225k | } |
324 | 364k | |
325 | 364k | /* Map the l1 table */ |
326 | 139k | l1p = map_domain_gfn(p2m, |
327 | 139k | guest_l2e_get_gfn(gw->l2e), |
328 | 139k | &gw->l1mfn, |
329 | 139k | &p2mt, |
330 | 139k | qt, |
331 | 139k | &rc); |
332 | 139k | if ( l1p == NULL ) |
333 | 0 | { |
334 | 0 | gw->pfec |= rc & PFEC_synth_mask; |
335 | 0 | goto out; |
336 | 0 | } |
337 | 139k | gw->l1e = l1p[guest_l1_table_offset(va)]; |
338 | 139k | gflags = guest_l1e_get_flags(gw->l1e); |
339 | 139k | if ( !(gflags & _PAGE_PRESENT) ) |
340 | 0 | goto out; |
341 | 139k | |
342 | 139k | /* Check for reserved bits. */ |
343 | 139k | if ( guest_l1e_rsvd_bits(v, gw->l1e) ) |
344 | 0 | { |
345 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; |
346 | 0 | goto out; |
347 | 0 | } |
348 | 139k | |
349 | 139k | /* Accumulate l1e access rights. */ |
350 | 139k | ar_and &= gflags; |
351 | 139k | ar_or |= gflags; |
352 | 139k | |
353 | 139k | leaf_level = 1; |
354 | 139k | |
355 | 365k | leaf: |
356 | 365k | gw->pfec |= PFEC_page_present; |
357 | 365k | |
358 | 365k | /* |
359 | 365k | * The pagetable walk has returned a successful translation (i.e. All PTEs |
360 | 365k | * are present and have no reserved bits set). Now check access rights to |
361 | 365k | * see whether the access should succeed. |
362 | 365k | */ |
363 | 365k | ar = (ar_and & AR_ACCUM_AND) | (ar_or & AR_ACCUM_OR); |
364 | 365k | |
365 | 365k | /* |
366 | 365k | * Sanity check. If EFER.NX is disabled, _PAGE_NX_BIT is reserved and |
367 | 365k | * should have caused a translation failure before we get here. |
368 | 365k | */ |
369 | 365k | if ( ar & _PAGE_NX_BIT ) |
370 | 257 | ASSERT(guest_nx_enabled(v)); |
371 | 365k | |
372 | 365k | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ |
373 | | /* |
374 | | * If all access checks are thus far ok, check Protection Key for 64bit |
375 | | * data accesses to user mappings. |
376 | | * |
377 | | * N.B. In the case that the walk ended with a superpage, the fabricated |
378 | | * gw->l1e contains the appropriate leaf pkey. |
379 | | */ |
380 | 365k | if ( (ar & _PAGE_USER) && !(walk & PFEC_insn_fetch) && |
381 | 282 | guest_pku_enabled(v) ) |
382 | 0 | { |
383 | 0 | unsigned int pkey = guest_l1e_get_pkey(gw->l1e); |
384 | 0 | unsigned int pkru = read_pkru(); |
385 | 0 |
|
386 | 0 | if ( read_pkru_ad(pkru, pkey) || |
387 | 0 | ((walk & PFEC_write_access) && read_pkru_wd(pkru, pkey) && |
388 | 0 | ((walk & PFEC_user_mode) || guest_wp_enabled(v))) ) |
389 | 0 | { |
390 | 0 | gw->pfec |= PFEC_prot_key; |
391 | 0 | goto out; |
392 | 0 | } |
393 | 0 | } |
394 | | #endif |
395 | 365k | |
396 | 365k | if ( (walk & PFEC_insn_fetch) && (ar & _PAGE_NX_BIT) ) |
397 | 365k | /* Requested an instruction fetch and found NX? Fail. */ |
398 | 0 | goto out; |
399 | 365k | |
400 | 365k | if ( walk & PFEC_user_mode ) /* Requested a user acess. */ |
401 | 0 | { |
402 | 0 | if ( !(ar & _PAGE_USER) ) |
403 | 0 | /* Got a supervisor walk? Unconditional fail. */ |
404 | 0 | goto out; |
405 | 0 |
|
406 | 0 | if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) ) |
407 | 0 | /* Requested a write and only got a read? Fail. */ |
408 | 0 | goto out; |
409 | 0 | } |
410 | 365k | else /* Requested a supervisor access. */ |
411 | 365k | { |
412 | 365k | if ( ar & _PAGE_USER ) /* Got a user walk. */ |
413 | 282 | { |
414 | 282 | if ( (walk & PFEC_insn_fetch) && guest_smep_enabled(v) ) |
415 | 282 | /* User insn fetch and smep? Fail. */ |
416 | 0 | goto out; |
417 | 282 | |
418 | 282 | if ( !(walk & PFEC_insn_fetch) && guest_smap_enabled(v) && |
419 | 0 | ((walk & PFEC_implicit) || |
420 | 0 | !(guest_cpu_user_regs()->eflags & X86_EFLAGS_AC)) ) |
421 | 282 | /* User data access and smap? Fail. */ |
422 | 0 | goto out; |
423 | 282 | } |
424 | 365k | |
425 | 365k | if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) && |
426 | 0 | guest_wp_enabled(v) ) |
427 | 365k | /* Requested a write, got a read, and CR0.WP is set? Fail. */ |
428 | 0 | goto out; |
429 | 365k | } |
430 | 365k | |
431 | 365k | walk_ok = true; |
432 | 365k | |
433 | 365k | /* |
434 | 365k | * Go back and set accessed and dirty bits only if the walk was a |
435 | 365k | * success. Although the PRMs say higher-level _PAGE_ACCESSED bits |
436 | 365k | * get set whenever a lower-level PT is used, at least some hardware |
437 | 365k | * walkers behave this way. |
438 | 365k | */ |
439 | 365k | switch ( leaf_level ) |
440 | 365k | { |
441 | 0 | default: |
442 | 0 | ASSERT_UNREACHABLE(); |
443 | 0 | break; |
444 | 0 |
|
445 | 0 | case 1: |
446 | 139k | if ( set_ad_bits(&l1p[guest_l1_table_offset(va)].l1, &gw->l1e.l1, |
447 | 139k | (walk & PFEC_write_access)) ) |
448 | 0 | paging_mark_dirty(d, gw->l1mfn); |
449 | 0 | /* Fallthrough */ |
450 | 0 | case 2: |
451 | 364k | if ( set_ad_bits(&l2p[guest_l2_table_offset(va)].l2, &gw->l2e.l2, |
452 | 364k | (walk & PFEC_write_access) && leaf_level == 2) ) |
453 | 0 | paging_mark_dirty(d, gw->l2mfn); |
454 | 364k | /* Fallthrough */ |
455 | 364k | #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ |
456 | 364k | case 3: |
457 | 364k | if ( set_ad_bits(&l3p[guest_l3_table_offset(va)].l3, &gw->l3e.l3, |
458 | 364k | (walk & PFEC_write_access) && leaf_level == 3) ) |
459 | 0 | paging_mark_dirty(d, gw->l3mfn); |
460 | 364k | |
461 | 364k | if ( set_ad_bits(&l4p[guest_l4_table_offset(va)].l4, &gw->l4e.l4, |
462 | 364k | false) ) |
463 | 0 | paging_mark_dirty(d, gw->l4mfn); |
464 | | #endif |
465 | 364k | } |
466 | 364k | |
467 | 364k | out: |
468 | 364k | #if GUEST_PAGING_LEVELS == 4 |
469 | 364k | if ( l3p ) |
470 | 364k | { |
471 | 364k | unmap_domain_page(l3p); |
472 | 364k | put_page(mfn_to_page(mfn_x(gw->l3mfn))); |
473 | 364k | } |
474 | 364k | #endif |
475 | 364k | #if GUEST_PAGING_LEVELS >= 3 |
476 | 364k | if ( l2p ) |
477 | 364k | { |
478 | 364k | unmap_domain_page(l2p); |
479 | 364k | put_page(mfn_to_page(mfn_x(gw->l2mfn))); |
480 | 364k | } |
481 | 364k | #endif |
482 | 364k | if ( l1p ) |
483 | 364k | { |
484 | 364k | unmap_domain_page(l1p); |
485 | 364k | put_page(mfn_to_page(mfn_x(gw->l1mfn))); |
486 | 364k | } |
487 | 364k | |
488 | 364k | return walk_ok; |
489 | 364k | } Unexecuted instantiation: guest_walk_tables_2_levels Unexecuted instantiation: guest_walk_tables_3_levels guest_walk_tables_4_levels Line | Count | Source | 86 | 364k | { | 87 | 364k | struct domain *d = v->domain; | 88 | 364k | p2m_type_t p2mt; | 89 | 364k | guest_l1e_t *l1p = NULL; | 90 | 364k | guest_l2e_t *l2p = NULL; | 91 | 364k | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ | 92 | 364k | guest_l3e_t *l3p = NULL; | 93 | 364k | guest_l4e_t *l4p; | 94 | 364k | #endif | 95 | 364k | uint32_t gflags, rc; | 96 | 364k | unsigned int leaf_level; | 97 | 364k | p2m_query_t qt = P2M_ALLOC | P2M_UNSHARE; | 98 | 364k | | 99 | 364k | #define AR_ACCUM_AND (_PAGE_USER | _PAGE_RW) | 100 | 364k | #define AR_ACCUM_OR (_PAGE_NX_BIT) | 101 | 364k | /* Start with all AND bits set, all OR bits clear. */ | 102 | 364k | uint32_t ar, ar_and = ~0u, ar_or = 0; | 103 | 364k | | 104 | 364k | bool walk_ok = false; | 105 | 364k | | 106 | 364k | /* | 107 | 364k | * TODO - We should ASSERT() that only the following bits are set as | 108 | 364k | * inputs to a guest walk, but a whole load of code currently passes in | 109 | 364k | * other PFEC_ constants. | 110 | 364k | */ | 111 | 364k | walk &= (PFEC_implicit | PFEC_insn_fetch | PFEC_user_mode | PFEC_write_access); | 112 | 364k | | 113 | 364k | /* Only implicit supervisor data accesses exist. */ | 114 | 364k | ASSERT(!(walk & PFEC_implicit) || | 115 | 364k | !(walk & (PFEC_insn_fetch | PFEC_user_mode))); | 116 | 364k | | 117 | 364k | perfc_incr(guest_walk); | 118 | 364k | memset(gw, 0, sizeof(*gw)); | 119 | 364k | gw->va = va; | 120 | 364k | gw->pfec = walk & (PFEC_user_mode | PFEC_write_access); | 121 | 364k | | 122 | 364k | /* | 123 | 364k | * PFEC_insn_fetch is only reported if NX or SMEP are enabled. Hardware | 124 | 364k | * still distingueses instruction fetches during determination of access | 125 | 364k | * rights. | 126 | 364k | */ | 127 | 364k | if ( guest_nx_enabled(v) || guest_smep_enabled(v) ) | 128 | 365k | gw->pfec |= (walk & PFEC_insn_fetch); | 129 | 364k | | 130 | 364k | #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ | 131 | 364k | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ | 132 | 364k | | 133 | 364k | /* Get the l4e from the top level table and check its flags*/ | 134 | 364k | gw->l4mfn = top_mfn; | 135 | 364k | l4p = (guest_l4e_t *) top_map; | 136 | 364k | gw->l4e = l4p[guest_l4_table_offset(va)]; | 137 | 364k | gflags = guest_l4e_get_flags(gw->l4e); | 138 | 364k | if ( !(gflags & _PAGE_PRESENT) ) | 139 | 0 | goto out; | 140 | 364k | | 141 | 364k | /* Check for reserved bits. */ | 142 | 364k | if ( guest_l4e_rsvd_bits(v, gw->l4e) ) | 143 | 0 | { | 144 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; | 145 | 0 | goto out; | 146 | 0 | } | 147 | 364k | | 148 | 364k | /* Accumulate l4e access rights. */ | 149 | 364k | ar_and &= gflags; | 150 | 364k | ar_or |= gflags; | 151 | 364k | | 152 | 364k | /* Map the l3 table */ | 153 | 364k | l3p = map_domain_gfn(p2m, | 154 | 364k | guest_l4e_get_gfn(gw->l4e), | 155 | 364k | &gw->l3mfn, | 156 | 364k | &p2mt, | 157 | 364k | qt, | 158 | 364k | &rc); | 159 | 364k | if ( l3p == NULL ) | 160 | 0 | { | 161 | 0 | gw->pfec |= rc & PFEC_synth_mask; | 162 | 0 | goto out; | 163 | 0 | } | 164 | 364k | | 165 | 364k | /* Get the l3e and check its flags*/ | 166 | 364k | gw->l3e = l3p[guest_l3_table_offset(va)]; | 167 | 364k | gflags = guest_l3e_get_flags(gw->l3e); | 168 | 364k | if ( !(gflags & _PAGE_PRESENT) ) | 169 | 0 | goto out; | 170 | 364k | | 171 | 364k | /* Check for reserved bits, including possibly _PAGE_PSE. */ | 172 | 364k | if ( guest_l3e_rsvd_bits(v, gw->l3e) ) | 173 | 0 | { | 174 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; | 175 | 0 | goto out; | 176 | 0 | } | 177 | 364k | | 178 | 364k | /* Accumulate l3e access rights. */ | 179 | 364k | ar_and &= gflags; | 180 | 364k | ar_or |= gflags; | 181 | 364k | | 182 | 364k | if ( gflags & _PAGE_PSE ) | 183 | 0 | { | 184 | 0 | /* | 185 | 0 | * Generate a fake l1 table entry so callers don't all | 186 | 0 | * have to understand superpages. | 187 | 0 | */ | 188 | 0 | gfn_t start = guest_l3e_get_gfn(gw->l3e); | 189 | 0 | /* | 190 | 0 | * Grant full access in the l1e, since all the guest entry's | 191 | 0 | * access controls are enforced in the l3e. | 192 | 0 | */ | 193 | 0 | int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| | 194 | 0 | _PAGE_ACCESSED|_PAGE_DIRTY); | 195 | 0 | /* | 196 | 0 | * Import protection key and cache-control bits. Note that _PAGE_PAT | 197 | 0 | * is actually _PAGE_PSE, and it is always set. We will clear it in | 198 | 0 | * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. | 199 | 0 | */ | 200 | 0 | flags |= (guest_l3e_get_flags(gw->l3e) | 201 | 0 | & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); | 202 | 0 | if ( !(gfn_x(start) & 1) ) | 203 | 0 | /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ | 204 | 0 | flags &= ~_PAGE_PAT; | 205 | 0 |
| 206 | 0 | /* Increment the pfn by the right number of 4k pages. */ | 207 | 0 | start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) + | 208 | 0 | ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK)); | 209 | 0 | gw->l1e = guest_l1e_from_gfn(start, flags); | 210 | 0 | gw->l2mfn = gw->l1mfn = INVALID_MFN; | 211 | 0 | leaf_level = 3; | 212 | 0 | goto leaf; | 213 | 0 | } | 214 | 364k | | 215 | 364k | #else /* PAE only... */ | 216 | | | 217 | | /* Get the l3e and check its flag */ | 218 | | gw->l3e = ((guest_l3e_t *) top_map)[guest_l3_table_offset(va)]; | 219 | | gflags = guest_l3e_get_flags(gw->l3e); | 220 | | if ( !(gflags & _PAGE_PRESENT) ) | 221 | | goto out; | 222 | | | 223 | | if ( guest_l3e_rsvd_bits(v, gw->l3e) ) | 224 | | { | 225 | | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; | 226 | | goto out; | 227 | | } | 228 | | | 229 | | #endif /* PAE or 64... */ | 230 | 364k | | 231 | 364k | /* Map the l2 table */ | 232 | 364k | l2p = map_domain_gfn(p2m, | 233 | 364k | guest_l3e_get_gfn(gw->l3e), | 234 | 364k | &gw->l2mfn, | 235 | 364k | &p2mt, | 236 | 364k | qt, | 237 | 364k | &rc); | 238 | 364k | if ( l2p == NULL ) | 239 | 0 | { | 240 | 0 | gw->pfec |= rc & PFEC_synth_mask; | 241 | 0 | goto out; | 242 | 0 | } | 243 | 364k | | 244 | 364k | /* Get the l2e */ | 245 | 364k | gw->l2e = l2p[guest_l2_table_offset(va)]; | 246 | 364k | | 247 | 364k | #else /* 32-bit only... */ | 248 | | | 249 | | /* Get l2e from the top level table */ | 250 | | gw->l2mfn = top_mfn; | 251 | | l2p = (guest_l2e_t *) top_map; | 252 | | gw->l2e = l2p[guest_l2_table_offset(va)]; | 253 | | | 254 | | #endif /* All levels... */ | 255 | 364k | | 256 | 364k | /* Check the l2e flags. */ | 257 | 364k | gflags = guest_l2e_get_flags(gw->l2e); | 258 | 364k | if ( !(gflags & _PAGE_PRESENT) ) | 259 | 0 | goto out; | 260 | 364k | | 261 | 364k | /* | 262 | 364k | * In 2-level paging without CR0.PSE, there are no reserved bits, and the | 263 | 364k | * PAT/PSE bit is ignored. | 264 | 364k | */ | 265 | 364k | if ( GUEST_PAGING_LEVELS == 2 && !guest_can_use_l2_superpages(v) ) | 266 | 0 | { | 267 | 0 | gw->l2e.l2 &= ~_PAGE_PSE; | 268 | 0 | gflags &= ~_PAGE_PSE; | 269 | 0 | } | 270 | 364k | /* else check for reserved bits, including possibly _PAGE_PSE. */ | 271 | 364k | else if ( guest_l2e_rsvd_bits(v, gw->l2e) ) | 272 | 0 | { | 273 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; | 274 | 0 | goto out; | 275 | 0 | } | 276 | 364k | | 277 | 364k | /* Accumulate l2e access rights. */ | 278 | 364k | ar_and &= gflags; | 279 | 364k | ar_or |= gflags; | 280 | 364k | | 281 | 364k | if ( gflags & _PAGE_PSE ) | 282 | 225k | { | 283 | 225k | /* | 284 | 225k | * Special case: this guest VA is in a PSE superpage, so there's | 285 | 225k | * no guest l1e. We make one up so that the propagation code | 286 | 225k | * can generate a shadow l1 table. Start with the gfn of the | 287 | 225k | * first 4k-page of the superpage. | 288 | 225k | */ | 289 | 225k | #if GUEST_PAGING_LEVELS == 2 | 290 | | gfn_t start = _gfn(unfold_pse36(gw->l2e.l2) >> PAGE_SHIFT); | 291 | | #else | 292 | 225k | gfn_t start = guest_l2e_get_gfn(gw->l2e); | 293 | 225k | #endif | 294 | 225k | /* | 295 | 225k | * Grant full access in the l1e, since all the guest entry's | 296 | 225k | * access controls are enforced in the shadow l2e. | 297 | 225k | */ | 298 | 225k | int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| | 299 | 225k | _PAGE_ACCESSED|_PAGE_DIRTY); | 300 | 225k | /* | 301 | 225k | * Import protection key and cache-control bits. Note that _PAGE_PAT | 302 | 225k | * is actually _PAGE_PSE, and it is always set. We will clear it in | 303 | 225k | * case _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. | 304 | 225k | */ | 305 | 225k | flags |= (guest_l2e_get_flags(gw->l2e) | 306 | 225k | & (_PAGE_PKEY_BITS|_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); | 307 | 225k | if ( !(gfn_x(start) & 1) ) | 308 | 225k | /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ | 309 | 225k | flags &= ~_PAGE_PAT; | 310 | 225k | | 311 | 225k | /* Increment the pfn by the right number of 4k pages. */ | 312 | 225k | start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) + | 313 | 225k | guest_l1_table_offset(va)); | 314 | 225k | #if GUEST_PAGING_LEVELS == 2 | 315 | | /* Wider than 32 bits if PSE36 superpage. */ | 316 | | gw->el1e = (gfn_x(start) << PAGE_SHIFT) | flags; | 317 | | #else | 318 | 225k | gw->l1e = guest_l1e_from_gfn(start, flags); | 319 | 225k | #endif | 320 | 225k | gw->l1mfn = INVALID_MFN; | 321 | 225k | leaf_level = 2; | 322 | 225k | goto leaf; | 323 | 225k | } | 324 | 364k | | 325 | 364k | /* Map the l1 table */ | 326 | 139k | l1p = map_domain_gfn(p2m, | 327 | 139k | guest_l2e_get_gfn(gw->l2e), | 328 | 139k | &gw->l1mfn, | 329 | 139k | &p2mt, | 330 | 139k | qt, | 331 | 139k | &rc); | 332 | 139k | if ( l1p == NULL ) | 333 | 0 | { | 334 | 0 | gw->pfec |= rc & PFEC_synth_mask; | 335 | 0 | goto out; | 336 | 0 | } | 337 | 139k | gw->l1e = l1p[guest_l1_table_offset(va)]; | 338 | 139k | gflags = guest_l1e_get_flags(gw->l1e); | 339 | 139k | if ( !(gflags & _PAGE_PRESENT) ) | 340 | 0 | goto out; | 341 | 139k | | 342 | 139k | /* Check for reserved bits. */ | 343 | 139k | if ( guest_l1e_rsvd_bits(v, gw->l1e) ) | 344 | 0 | { | 345 | 0 | gw->pfec |= PFEC_reserved_bit | PFEC_page_present; | 346 | 0 | goto out; | 347 | 0 | } | 348 | 139k | | 349 | 139k | /* Accumulate l1e access rights. */ | 350 | 139k | ar_and &= gflags; | 351 | 139k | ar_or |= gflags; | 352 | 139k | | 353 | 139k | leaf_level = 1; | 354 | 139k | | 355 | 365k | leaf: | 356 | 365k | gw->pfec |= PFEC_page_present; | 357 | 365k | | 358 | 365k | /* | 359 | 365k | * The pagetable walk has returned a successful translation (i.e. All PTEs | 360 | 365k | * are present and have no reserved bits set). Now check access rights to | 361 | 365k | * see whether the access should succeed. | 362 | 365k | */ | 363 | 365k | ar = (ar_and & AR_ACCUM_AND) | (ar_or & AR_ACCUM_OR); | 364 | 365k | | 365 | 365k | /* | 366 | 365k | * Sanity check. If EFER.NX is disabled, _PAGE_NX_BIT is reserved and | 367 | 365k | * should have caused a translation failure before we get here. | 368 | 365k | */ | 369 | 365k | if ( ar & _PAGE_NX_BIT ) | 370 | 257 | ASSERT(guest_nx_enabled(v)); | 371 | 365k | | 372 | 365k | #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ | 373 | 365k | /* | 374 | 365k | * If all access checks are thus far ok, check Protection Key for 64bit | 375 | 365k | * data accesses to user mappings. | 376 | 365k | * | 377 | 365k | * N.B. In the case that the walk ended with a superpage, the fabricated | 378 | 365k | * gw->l1e contains the appropriate leaf pkey. | 379 | 365k | */ | 380 | 365k | if ( (ar & _PAGE_USER) && !(walk & PFEC_insn_fetch) && | 381 | 282 | guest_pku_enabled(v) ) | 382 | 0 | { | 383 | 0 | unsigned int pkey = guest_l1e_get_pkey(gw->l1e); | 384 | 0 | unsigned int pkru = read_pkru(); | 385 | 0 |
| 386 | 0 | if ( read_pkru_ad(pkru, pkey) || | 387 | 0 | ((walk & PFEC_write_access) && read_pkru_wd(pkru, pkey) && | 388 | 0 | ((walk & PFEC_user_mode) || guest_wp_enabled(v))) ) | 389 | 0 | { | 390 | 0 | gw->pfec |= PFEC_prot_key; | 391 | 0 | goto out; | 392 | 0 | } | 393 | 0 | } | 394 | 365k | #endif | 395 | 365k | | 396 | 365k | if ( (walk & PFEC_insn_fetch) && (ar & _PAGE_NX_BIT) ) | 397 | 365k | /* Requested an instruction fetch and found NX? Fail. */ | 398 | 0 | goto out; | 399 | 365k | | 400 | 365k | if ( walk & PFEC_user_mode ) /* Requested a user acess. */ | 401 | 0 | { | 402 | 0 | if ( !(ar & _PAGE_USER) ) | 403 | 0 | /* Got a supervisor walk? Unconditional fail. */ | 404 | 0 | goto out; | 405 | 0 |
| 406 | 0 | if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) ) | 407 | 0 | /* Requested a write and only got a read? Fail. */ | 408 | 0 | goto out; | 409 | 0 | } | 410 | 365k | else /* Requested a supervisor access. */ | 411 | 365k | { | 412 | 365k | if ( ar & _PAGE_USER ) /* Got a user walk. */ | 413 | 282 | { | 414 | 282 | if ( (walk & PFEC_insn_fetch) && guest_smep_enabled(v) ) | 415 | 282 | /* User insn fetch and smep? Fail. */ | 416 | 0 | goto out; | 417 | 282 | | 418 | 282 | if ( !(walk & PFEC_insn_fetch) && guest_smap_enabled(v) && | 419 | 0 | ((walk & PFEC_implicit) || | 420 | 0 | !(guest_cpu_user_regs()->eflags & X86_EFLAGS_AC)) ) | 421 | 282 | /* User data access and smap? Fail. */ | 422 | 0 | goto out; | 423 | 282 | } | 424 | 365k | | 425 | 365k | if ( (walk & PFEC_write_access) && !(ar & _PAGE_RW) && | 426 | 0 | guest_wp_enabled(v) ) | 427 | 365k | /* Requested a write, got a read, and CR0.WP is set? Fail. */ | 428 | 0 | goto out; | 429 | 365k | } | 430 | 365k | | 431 | 365k | walk_ok = true; | 432 | 365k | | 433 | 365k | /* | 434 | 365k | * Go back and set accessed and dirty bits only if the walk was a | 435 | 365k | * success. Although the PRMs say higher-level _PAGE_ACCESSED bits | 436 | 365k | * get set whenever a lower-level PT is used, at least some hardware | 437 | 365k | * walkers behave this way. | 438 | 365k | */ | 439 | 365k | switch ( leaf_level ) | 440 | 365k | { | 441 | 0 | default: | 442 | 0 | ASSERT_UNREACHABLE(); | 443 | 0 | break; | 444 | 0 |
| 445 | 139k | case 1: | 446 | 139k | if ( set_ad_bits(&l1p[guest_l1_table_offset(va)].l1, &gw->l1e.l1, | 447 | 139k | (walk & PFEC_write_access)) ) | 448 | 0 | paging_mark_dirty(d, gw->l1mfn); | 449 | 139k | /* Fallthrough */ | 450 | 364k | case 2: | 451 | 364k | if ( set_ad_bits(&l2p[guest_l2_table_offset(va)].l2, &gw->l2e.l2, | 452 | 364k | (walk & PFEC_write_access) && leaf_level == 2) ) | 453 | 0 | paging_mark_dirty(d, gw->l2mfn); | 454 | 364k | /* Fallthrough */ | 455 | 364k | #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ | 456 | 364k | case 3: | 457 | 364k | if ( set_ad_bits(&l3p[guest_l3_table_offset(va)].l3, &gw->l3e.l3, | 458 | 364k | (walk & PFEC_write_access) && leaf_level == 3) ) | 459 | 0 | paging_mark_dirty(d, gw->l3mfn); | 460 | 364k | | 461 | 364k | if ( set_ad_bits(&l4p[guest_l4_table_offset(va)].l4, &gw->l4e.l4, | 462 | 364k | false) ) | 463 | 0 | paging_mark_dirty(d, gw->l4mfn); | 464 | 365k | #endif | 465 | 365k | } | 466 | 365k | | 467 | 365k | out: | 468 | 365k | #if GUEST_PAGING_LEVELS == 4 | 469 | 365k | if ( l3p ) | 470 | 365k | { | 471 | 365k | unmap_domain_page(l3p); | 472 | 365k | put_page(mfn_to_page(mfn_x(gw->l3mfn))); | 473 | 365k | } | 474 | 365k | #endif | 475 | 365k | #if GUEST_PAGING_LEVELS >= 3 | 476 | 365k | if ( l2p ) | 477 | 366k | { | 478 | 366k | unmap_domain_page(l2p); | 479 | 366k | put_page(mfn_to_page(mfn_x(gw->l2mfn))); | 480 | 366k | } | 481 | 365k | #endif | 482 | 365k | if ( l1p ) | 483 | 141k | { | 484 | 141k | unmap_domain_page(l1p); | 485 | 141k | put_page(mfn_to_page(mfn_x(gw->l1mfn))); | 486 | 141k | } | 487 | 365k | | 488 | 365k | return walk_ok; | 489 | 365k | } |
|
490 | 364k | |
491 | 364k | /* |
492 | 364k | * Local variables: |
493 | 364k | * mode: C |
494 | 364k | * c-file-style: "BSD" |
495 | 364k | * c-basic-offset: 4 |
496 | 364k | * tab-width: 4 |
497 | 364k | * indent-tabs-mode: nil |
498 | 364k | * End: |
499 | 364k | */ |