/root/src/xen/xen/arch/x86/mm/p2m-ept.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * ept-p2m.c: use the EPT page table as p2m |
3 | | * Copyright (c) 2007, Intel Corporation. |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify it |
6 | | * under the terms and conditions of the GNU General Public License, |
7 | | * version 2, as published by the Free Software Foundation. |
8 | | * |
9 | | * This program is distributed in the hope it will be useful, but WITHOUT |
10 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
12 | | * more details. |
13 | | * |
14 | | * You should have received a copy of the GNU General Public License along with |
15 | | * this program; If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | |
18 | | #include <xen/domain_page.h> |
19 | | #include <xen/sched.h> |
20 | | #include <asm/current.h> |
21 | | #include <asm/paging.h> |
22 | | #include <asm/types.h> |
23 | | #include <asm/domain.h> |
24 | | #include <asm/p2m.h> |
25 | | #include <asm/hvm/vmx/vmx.h> |
26 | | #include <asm/hvm/vmx/vmcs.h> |
27 | | #include <asm/hvm/nestedhvm.h> |
28 | | #include <xen/iommu.h> |
29 | | #include <asm/mtrr.h> |
30 | | #include <asm/hvm/cacheattr.h> |
31 | | #include <xen/keyhandler.h> |
32 | | #include <xen/softirq.h> |
33 | | |
34 | | #include "mm-locks.h" |
35 | | |
36 | | #define atomic_read_ept_entry(__pepte) \ |
37 | 18.6M | ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } ) |
38 | | |
39 | 20.7M | #define is_epte_present(ept_entry) ((ept_entry)->epte & 0x7) |
40 | 14.9M | #define is_epte_superpage(ept_entry) ((ept_entry)->sp) |
41 | | static inline bool_t is_epte_valid(ept_entry_t *e) |
42 | 5.61M | { |
43 | 5.61M | /* suppress_ve alone is not considered valid, so mask it off */ |
44 | 4.42M | return ((e->epte & ~(1ul << 63)) != 0 && e->sa_p2mt != p2m_invalid); |
45 | 5.61M | } |
46 | | |
47 | | /* returns : 0 for success, -errno otherwise */ |
48 | | static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new, |
49 | | int level) |
50 | 3.08M | { |
51 | 3.08M | int rc; |
52 | 3.08M | unsigned long oldmfn = mfn_x(INVALID_MFN); |
53 | 3.08M | bool_t check_foreign = (new.mfn != entryptr->mfn || |
54 | 2.02M | new.sa_p2mt != entryptr->sa_p2mt); |
55 | 3.08M | |
56 | 3.08M | if ( level ) |
57 | 1.01M | { |
58 | 1.01M | ASSERT(!is_epte_superpage(&new) || !p2m_is_foreign(new.sa_p2mt)); |
59 | 1.01M | write_atomic(&entryptr->epte, new.epte); |
60 | 1.01M | return 0; |
61 | 1.01M | } |
62 | 3.08M | |
63 | 2.07M | if ( unlikely(p2m_is_foreign(new.sa_p2mt)) ) |
64 | 0 | { |
65 | 0 | rc = -EINVAL; |
66 | 0 | if ( !is_epte_present(&new) ) |
67 | 0 | goto out; |
68 | 0 |
|
69 | 0 | if ( check_foreign ) |
70 | 0 | { |
71 | 0 | struct domain *fdom; |
72 | 0 |
|
73 | 0 | if ( !mfn_valid(_mfn(new.mfn)) ) |
74 | 0 | goto out; |
75 | 0 |
|
76 | 0 | rc = -ESRCH; |
77 | 0 | fdom = page_get_owner(mfn_to_page(new.mfn)); |
78 | 0 | if ( fdom == NULL ) |
79 | 0 | goto out; |
80 | 0 |
|
81 | 0 | /* get refcount on the page */ |
82 | 0 | rc = -EBUSY; |
83 | 0 | if ( !get_page(mfn_to_page(new.mfn), fdom) ) |
84 | 0 | goto out; |
85 | 0 | } |
86 | 0 | } |
87 | 2.07M | |
88 | 2.07M | if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && check_foreign ) |
89 | 0 | oldmfn = entryptr->mfn; |
90 | 2.07M | |
91 | 2.07M | write_atomic(&entryptr->epte, new.epte); |
92 | 2.07M | |
93 | 2.07M | if ( unlikely(oldmfn != mfn_x(INVALID_MFN)) ) |
94 | 0 | put_page(mfn_to_page(oldmfn)); |
95 | 2.07M | |
96 | 2.07M | rc = 0; |
97 | 2.07M | |
98 | 2.07M | out: |
99 | 2.07M | if ( rc ) |
100 | 0 | gdprintk(XENLOG_ERR, "epte o:%"PRIx64" n:%"PRIx64" rc:%d\n", |
101 | 2.07M | entryptr->epte, new.epte, rc); |
102 | 2.07M | return rc; |
103 | 2.07M | } |
104 | | |
105 | | static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry, |
106 | | p2m_type_t type, p2m_access_t access) |
107 | 850k | { |
108 | 850k | /* |
109 | 850k | * First apply type permissions. |
110 | 850k | * |
111 | 850k | * A/D bits are also manually set to avoid overhead of MMU having to set |
112 | 850k | * them later. Both A/D bits are safe to be updated directly as they are |
113 | 850k | * ignored by processor if EPT A/D bits is not turned on. |
114 | 850k | * |
115 | 850k | * A bit is set for all present p2m types in middle and leaf EPT entries. |
116 | 850k | * D bit is set for all writable types in EPT leaf entry, except for |
117 | 850k | * log-dirty type with PML. |
118 | 850k | */ |
119 | 850k | switch(type) |
120 | 850k | { |
121 | 0 | case p2m_invalid: |
122 | 0 | case p2m_mmio_dm: |
123 | 0 | case p2m_populate_on_demand: |
124 | 0 | case p2m_ram_paging_out: |
125 | 0 | case p2m_ram_paged: |
126 | 0 | case p2m_ram_paging_in: |
127 | 0 | default: |
128 | 0 | entry->r = entry->w = entry->x = 0; |
129 | 0 | break; |
130 | 561k | case p2m_ram_rw: |
131 | 561k | entry->r = entry->w = entry->x = 1; |
132 | 561k | entry->a = entry->d = !!cpu_has_vmx_ept_ad; |
133 | 561k | break; |
134 | 0 | case p2m_ioreq_server: |
135 | 0 | entry->r = 1; |
136 | 0 | entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE); |
137 | 0 | entry->x = 0; |
138 | 0 | entry->a = !!cpu_has_vmx_ept_ad; |
139 | 0 | entry->d = entry->w && entry->a; |
140 | 0 | break; |
141 | 289k | case p2m_mmio_direct: |
142 | 289k | entry->r = entry->x = 1; |
143 | 289k | entry->w = !rangeset_contains_singleton(mmio_ro_ranges, |
144 | 289k | entry->mfn); |
145 | 289k | ASSERT(entry->w || !is_epte_superpage(entry)); |
146 | 289k | entry->a = !!cpu_has_vmx_ept_ad; |
147 | 289k | entry->d = entry->w && cpu_has_vmx_ept_ad; |
148 | 289k | break; |
149 | 0 | case p2m_ram_logdirty: |
150 | 0 | entry->r = entry->x = 1; |
151 | 0 | /* |
152 | 0 | * In case of PML, we don't have to write protect 4K page, but |
153 | 0 | * only need to clear D-bit for it, but we still need to write |
154 | 0 | * protect super page in order to split it to 4K pages in EPT |
155 | 0 | * violation. |
156 | 0 | */ |
157 | 0 | if ( vmx_domain_pml_enabled(p2m->domain) && |
158 | 0 | !is_epte_superpage(entry) ) |
159 | 0 | entry->w = 1; |
160 | 0 | else |
161 | 0 | entry->w = 0; |
162 | 0 | entry->a = !!cpu_has_vmx_ept_ad; |
163 | 0 | /* For both PML or non-PML cases we clear D bit anyway */ |
164 | 0 | entry->d = 0; |
165 | 0 | break; |
166 | 0 | case p2m_ram_ro: |
167 | 0 | case p2m_ram_shared: |
168 | 0 | entry->r = entry->x = 1; |
169 | 0 | entry->w = 0; |
170 | 0 | entry->a = !!cpu_has_vmx_ept_ad; |
171 | 0 | entry->d = 0; |
172 | 0 | break; |
173 | 0 | case p2m_grant_map_rw: |
174 | 0 | case p2m_map_foreign: |
175 | 0 | entry->r = entry->w = 1; |
176 | 0 | entry->x = 0; |
177 | 0 | entry->a = entry->d = !!cpu_has_vmx_ept_ad; |
178 | 0 | break; |
179 | 0 | case p2m_grant_map_ro: |
180 | 0 | entry->r = 1; |
181 | 0 | entry->w = entry->x = 0; |
182 | 0 | entry->a = !!cpu_has_vmx_ept_ad; |
183 | 0 | entry->d = 0; |
184 | 0 | break; |
185 | 850k | } |
186 | 850k | |
187 | 850k | |
188 | 850k | /* Then restrict with access permissions */ |
189 | 850k | switch (access) |
190 | 850k | { |
191 | 0 | case p2m_access_n: |
192 | 0 | case p2m_access_n2rwx: |
193 | 0 | entry->r = entry->w = entry->x = 0; |
194 | 0 | break; |
195 | 0 | case p2m_access_r: |
196 | 0 | entry->w = entry->x = 0; |
197 | 0 | break; |
198 | 0 | case p2m_access_w: |
199 | 0 | entry->r = entry->x = 0; |
200 | 0 | break; |
201 | 0 | case p2m_access_x: |
202 | 0 | entry->r = entry->w = 0; |
203 | 0 | break; |
204 | 0 | case p2m_access_rx: |
205 | 0 | case p2m_access_rx2rw: |
206 | 0 | entry->w = 0; |
207 | 0 | break; |
208 | 0 | case p2m_access_wx: |
209 | 0 | entry->r = 0; |
210 | 0 | break; |
211 | 39 | case p2m_access_rw: |
212 | 39 | entry->x = 0; |
213 | 39 | break; |
214 | 850k | case p2m_access_rwx: |
215 | 850k | break; |
216 | 850k | } |
217 | 850k | |
218 | 850k | } |
219 | | |
220 | 1.64M | #define GUEST_TABLE_MAP_FAILED 0 |
221 | 14.8M | #define GUEST_TABLE_NORMAL_PAGE 1 |
222 | 8.48M | #define GUEST_TABLE_SUPER_PAGE 2 |
223 | 8.46M | #define GUEST_TABLE_POD_PAGE 3 |
224 | | |
225 | | /* Fill in middle levels of ept table */ |
226 | | static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry) |
227 | 1.27k | { |
228 | 1.27k | mfn_t mfn; |
229 | 1.27k | ept_entry_t *table; |
230 | 1.27k | unsigned int i; |
231 | 1.27k | |
232 | 1.27k | mfn = p2m_alloc_ptp(p2m, 0); |
233 | 1.27k | if ( mfn_eq(mfn, INVALID_MFN) ) |
234 | 0 | return 0; |
235 | 1.27k | |
236 | 1.27k | ept_entry->epte = 0; |
237 | 1.27k | ept_entry->mfn = mfn_x(mfn); |
238 | 1.27k | ept_entry->access = p2m->default_access; |
239 | 1.27k | |
240 | 1.27k | ept_entry->r = ept_entry->w = ept_entry->x = 1; |
241 | 1.27k | /* Manually set A bit to avoid overhead of MMU having to write it later. */ |
242 | 1.27k | ept_entry->a = !!cpu_has_vmx_ept_ad; |
243 | 1.27k | |
244 | 1.27k | ept_entry->suppress_ve = 1; |
245 | 1.27k | |
246 | 1.27k | table = map_domain_page(mfn); |
247 | 1.27k | |
248 | 654k | for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) |
249 | 653k | table[i].suppress_ve = 1; |
250 | 1.27k | |
251 | 1.27k | unmap_domain_page(table); |
252 | 1.27k | |
253 | 1.27k | return 1; |
254 | 1.27k | } |
255 | | |
256 | | /* free ept sub tree behind an entry */ |
257 | | static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level) |
258 | 218k | { |
259 | 218k | /* End if the entry is a leaf entry. */ |
260 | 218k | if ( level == 0 || !is_epte_present(ept_entry) || |
261 | 0 | is_epte_superpage(ept_entry) ) |
262 | 218k | return; |
263 | 218k | |
264 | 0 | if ( level > 1 ) |
265 | 0 | { |
266 | 0 | ept_entry_t *epte = map_domain_page(_mfn(ept_entry->mfn)); |
267 | 0 | for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) |
268 | 0 | ept_free_entry(p2m, epte + i, level - 1); |
269 | 0 | unmap_domain_page(epte); |
270 | 0 | } |
271 | 0 | |
272 | 0 | p2m_tlb_flush_sync(p2m); |
273 | 0 | p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn)); |
274 | 0 | } |
275 | | |
276 | | static bool_t ept_split_super_page(struct p2m_domain *p2m, |
277 | | ept_entry_t *ept_entry, |
278 | | unsigned int level, unsigned int target) |
279 | 0 | { |
280 | 0 | ept_entry_t new_ept, *table; |
281 | 0 | uint64_t trunk; |
282 | 0 | unsigned int i; |
283 | 0 | bool_t rv = 1; |
284 | 0 |
|
285 | 0 | /* End if the entry is a leaf entry or reaches the target level. */ |
286 | 0 | if ( level <= target ) |
287 | 0 | return 1; |
288 | 0 |
|
289 | 0 | ASSERT(is_epte_superpage(ept_entry)); |
290 | 0 |
|
291 | 0 | if ( !ept_set_middle_entry(p2m, &new_ept) ) |
292 | 0 | return 0; |
293 | 0 |
|
294 | 0 | table = map_domain_page(_mfn(new_ept.mfn)); |
295 | 0 | trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER); |
296 | 0 |
|
297 | 0 | for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) |
298 | 0 | { |
299 | 0 | ept_entry_t *epte = table + i; |
300 | 0 |
|
301 | 0 | *epte = *ept_entry; |
302 | 0 | epte->sp = (level > 1); |
303 | 0 | epte->mfn += i * trunk; |
304 | 0 | epte->snp = (iommu_enabled && iommu_snoop); |
305 | 0 | epte->suppress_ve = 1; |
306 | 0 |
|
307 | 0 | ept_p2m_type_to_flags(p2m, epte, epte->sa_p2mt, epte->access); |
308 | 0 |
|
309 | 0 | if ( (level - 1) == target ) |
310 | 0 | continue; |
311 | 0 |
|
312 | 0 | ASSERT(is_epte_superpage(epte)); |
313 | 0 |
|
314 | 0 | if ( !(rv = ept_split_super_page(p2m, epte, level - 1, target)) ) |
315 | 0 | break; |
316 | 0 | } |
317 | 0 |
|
318 | 0 | unmap_domain_page(table); |
319 | 0 |
|
320 | 0 | /* Even failed we should install the newly allocated ept page. */ |
321 | 0 | *ept_entry = new_ept; |
322 | 0 |
|
323 | 0 | return rv; |
324 | 0 | } |
325 | | |
326 | | /* Take the currently mapped table, find the corresponding gfn entry, |
327 | | * and map the next table, if available. If the entry is empty |
328 | | * and read_only is set, |
329 | | * Return values: |
330 | | * 0: Failed to map. Either read_only was set and the entry was |
331 | | * empty, or allocating a new page failed. |
332 | | * GUEST_TABLE_NORMAL_PAGE: next level mapped normally |
333 | | * GUEST_TABLE_SUPER_PAGE: |
334 | | * The next entry points to a superpage, and caller indicates |
335 | | * that they are going to the superpage level, or are only doing |
336 | | * a read. |
337 | | * GUEST_TABLE_POD: |
338 | | * The next entry is marked populate-on-demand. |
339 | | */ |
340 | | static int ept_next_level(struct p2m_domain *p2m, bool_t read_only, |
341 | | ept_entry_t **table, unsigned long *gfn_remainder, |
342 | | int next_level) |
343 | 12.2M | { |
344 | 12.2M | unsigned long mfn; |
345 | 12.2M | ept_entry_t *ept_entry, e; |
346 | 12.2M | u32 shift, index; |
347 | 12.2M | |
348 | 12.2M | shift = next_level * EPT_TABLE_ORDER; |
349 | 12.2M | |
350 | 12.2M | index = *gfn_remainder >> shift; |
351 | 12.2M | |
352 | 12.2M | /* index must be falling into the page */ |
353 | 12.2M | ASSERT(index < EPT_PAGETABLE_ENTRIES); |
354 | 12.2M | |
355 | 12.2M | ept_entry = (*table) + index; |
356 | 12.2M | |
357 | 12.2M | /* ept_next_level() is called (sometimes) without a lock. Read |
358 | 12.2M | * the entry once, and act on the "cached" entry after that to |
359 | 12.2M | * avoid races. */ |
360 | 12.2M | e = atomic_read_ept_entry(ept_entry); |
361 | 12.2M | |
362 | 12.2M | if ( !is_epte_present(&e) ) |
363 | 574k | { |
364 | 574k | if ( e.sa_p2mt == p2m_populate_on_demand ) |
365 | 0 | return GUEST_TABLE_POD_PAGE; |
366 | 574k | |
367 | 574k | if ( read_only ) |
368 | 572k | return GUEST_TABLE_MAP_FAILED; |
369 | 574k | |
370 | 1.27k | if ( !ept_set_middle_entry(p2m, ept_entry) ) |
371 | 0 | return GUEST_TABLE_MAP_FAILED; |
372 | 1.27k | else |
373 | 1.27k | e = atomic_read_ept_entry(ept_entry); /* Refresh */ |
374 | 1.27k | } |
375 | 12.2M | |
376 | 12.2M | /* The only time sp would be set here is if we had hit a superpage */ |
377 | 11.6M | if ( is_epte_superpage(&e) ) |
378 | 19.4k | return GUEST_TABLE_SUPER_PAGE; |
379 | 11.6M | |
380 | 11.6M | mfn = e.mfn; |
381 | 11.6M | unmap_domain_page(*table); |
382 | 11.6M | *table = map_domain_page(_mfn(mfn)); |
383 | 11.6M | *gfn_remainder &= (1UL << shift) - 1; |
384 | 11.6M | return GUEST_TABLE_NORMAL_PAGE; |
385 | 11.6M | } |
386 | | |
387 | | /* |
388 | | * Invalidate (via setting the EMT field to an invalid value) all valid |
389 | | * present entries in the given page table, optionally marking the entries |
390 | | * also for their subtrees needing P2M type re-calculation. |
391 | | */ |
392 | | static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level) |
393 | 2.07k | { |
394 | 2.07k | int rc; |
395 | 2.07k | ept_entry_t *epte = map_domain_page(mfn); |
396 | 2.07k | unsigned int i; |
397 | 2.07k | bool_t changed = 0; |
398 | 2.07k | |
399 | 1.06M | for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) |
400 | 1.06M | { |
401 | 1.06M | ept_entry_t e = atomic_read_ept_entry(&epte[i]); |
402 | 1.06M | |
403 | 1.06M | if ( !is_epte_valid(&e) || !is_epte_present(&e) || |
404 | 1.02M | (e.emt == MTRR_NUM_TYPES && (e.recalc || !recalc)) ) |
405 | 51.6k | continue; |
406 | 1.06M | |
407 | 1.01M | e.emt = MTRR_NUM_TYPES; |
408 | 1.01M | if ( recalc ) |
409 | 0 | e.recalc = 1; |
410 | 1.01M | rc = atomic_write_ept_entry(&epte[i], e, level); |
411 | 1.01M | ASSERT(rc == 0); |
412 | 1.01M | changed = 1; |
413 | 1.01M | } |
414 | 2.07k | |
415 | 2.07k | unmap_domain_page(epte); |
416 | 2.07k | |
417 | 2.07k | return changed; |
418 | 2.07k | } |
419 | | |
420 | | /* |
421 | | * Just like ept_invalidate_emt() except that |
422 | | * - not all entries at the targeted level may need processing, |
423 | | * - the re-calculation flag gets always set. |
424 | | * The passed in range is guaranteed to not cross a page (table) |
425 | | * boundary at the targeted level. |
426 | | */ |
427 | | static int ept_invalidate_emt_range(struct p2m_domain *p2m, |
428 | | unsigned int target, |
429 | | unsigned long first_gfn, |
430 | | unsigned long last_gfn) |
431 | 0 | { |
432 | 0 | ept_entry_t *table; |
433 | 0 | unsigned long gfn_remainder = first_gfn; |
434 | 0 | unsigned int i, index; |
435 | 0 | int wrc, rc = 0, ret = GUEST_TABLE_MAP_FAILED; |
436 | 0 |
|
437 | 0 | table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); |
438 | 0 | for ( i = p2m->ept.wl; i > target; --i ) |
439 | 0 | { |
440 | 0 | ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); |
441 | 0 | if ( ret == GUEST_TABLE_MAP_FAILED ) |
442 | 0 | goto out; |
443 | 0 | if ( ret != GUEST_TABLE_NORMAL_PAGE ) |
444 | 0 | break; |
445 | 0 | } |
446 | 0 |
|
447 | 0 | if ( i > target ) |
448 | 0 | { |
449 | 0 | /* We need to split the original page. */ |
450 | 0 | ept_entry_t split_ept_entry; |
451 | 0 |
|
452 | 0 | index = gfn_remainder >> (i * EPT_TABLE_ORDER); |
453 | 0 | split_ept_entry = atomic_read_ept_entry(&table[index]); |
454 | 0 | ASSERT(is_epte_superpage(&split_ept_entry)); |
455 | 0 | if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) |
456 | 0 | { |
457 | 0 | ept_free_entry(p2m, &split_ept_entry, i); |
458 | 0 | rc = -ENOMEM; |
459 | 0 | goto out; |
460 | 0 | } |
461 | 0 | wrc = atomic_write_ept_entry(&table[index], split_ept_entry, i); |
462 | 0 | ASSERT(wrc == 0); |
463 | 0 |
|
464 | 0 | for ( ; i > target; --i ) |
465 | 0 | if ( !ept_next_level(p2m, 1, &table, &gfn_remainder, i) ) |
466 | 0 | break; |
467 | 0 | ASSERT(i == target); |
468 | 0 | } |
469 | 0 |
|
470 | 0 | index = gfn_remainder >> (i * EPT_TABLE_ORDER); |
471 | 0 | i = (last_gfn >> (i * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1); |
472 | 0 | for ( ; index <= i; ++index ) |
473 | 0 | { |
474 | 0 | ept_entry_t e = atomic_read_ept_entry(&table[index]); |
475 | 0 |
|
476 | 0 | if ( is_epte_valid(&e) && is_epte_present(&e) && |
477 | 0 | (e.emt != MTRR_NUM_TYPES || !e.recalc) ) |
478 | 0 | { |
479 | 0 | e.emt = MTRR_NUM_TYPES; |
480 | 0 | e.recalc = 1; |
481 | 0 | wrc = atomic_write_ept_entry(&table[index], e, target); |
482 | 0 | ASSERT(wrc == 0); |
483 | 0 | rc = 1; |
484 | 0 | } |
485 | 0 | } |
486 | 0 |
|
487 | 0 | out: |
488 | 0 | unmap_domain_page(table); |
489 | 0 |
|
490 | 0 | return rc; |
491 | 0 | } |
492 | | |
493 | | /* |
494 | | * Resolve deliberately mis-configured (EMT field set to an invalid value) |
495 | | * entries in the page table hierarchy for the given GFN: |
496 | | * - calculate the correct value for the EMT field, |
497 | | * - if marked so, re-calculate the P2M type, |
498 | | * - propagate EMT and re-calculation flag down to the next page table level |
499 | | * for entries not involved in the translation of the given GFN. |
500 | | * Returns: |
501 | | * - negative errno values in error, |
502 | | * - zero if no adjustment was done, |
503 | | * - a positive value if at least one adjustment was done. |
504 | | */ |
505 | | static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) |
506 | 1.07M | { |
507 | 1.07M | struct ept_data *ept = &p2m->ept; |
508 | 1.07M | unsigned int level = ept->wl; |
509 | 1.07M | unsigned long mfn = ept->mfn; |
510 | 1.07M | ept_entry_t *epte; |
511 | 1.07M | int wrc, rc = 0; |
512 | 1.07M | |
513 | 1.07M | if ( !mfn ) |
514 | 12 | return 0; |
515 | 1.07M | |
516 | 3.21M | for ( ; ; --level ) |
517 | 4.28M | { |
518 | 4.28M | ept_entry_t e; |
519 | 4.28M | unsigned int i; |
520 | 4.28M | |
521 | 4.28M | epte = map_domain_page(_mfn(mfn)); |
522 | 4.28M | i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1); |
523 | 4.28M | e = atomic_read_ept_entry(&epte[i]); |
524 | 4.28M | |
525 | 4.28M | if ( level == 0 || is_epte_superpage(&e) ) |
526 | 1.06M | { |
527 | 1.06M | uint8_t ipat = 0; |
528 | 1.06M | |
529 | 1.06M | if ( e.emt != MTRR_NUM_TYPES ) |
530 | 1.06M | break; |
531 | 1.06M | |
532 | 2.73k | if ( level == 0 ) |
533 | 1.97k | { |
534 | 1.01M | for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i ) |
535 | 1.01M | { |
536 | 1.01M | p2m_type_t nt; |
537 | 1.01M | |
538 | 1.01M | e = atomic_read_ept_entry(&epte[i]); |
539 | 1.01M | if ( e.emt == MTRR_NUM_TYPES ) |
540 | 1.00M | e.emt = 0; |
541 | 1.01M | if ( !is_epte_valid(&e) || !is_epte_present(&e) ) |
542 | 5.02k | continue; |
543 | 1.00M | e.emt = epte_get_entry_emt(p2m->domain, gfn + i, |
544 | 1.00M | _mfn(e.mfn), 0, &ipat, |
545 | 1.00M | e.sa_p2mt == p2m_mmio_direct); |
546 | 1.00M | e.ipat = ipat; |
547 | 1.00M | |
548 | 1.00M | nt = p2m_recalc_type(e.recalc, e.sa_p2mt, p2m, gfn + i); |
549 | 1.00M | if ( nt != e.sa_p2mt ) |
550 | 0 | { |
551 | 0 | if ( e.sa_p2mt == p2m_ioreq_server ) |
552 | 0 | { |
553 | 0 | ASSERT(p2m->ioreq.entry_count > 0); |
554 | 0 | p2m->ioreq.entry_count--; |
555 | 0 | } |
556 | 0 |
|
557 | 0 | e.sa_p2mt = nt; |
558 | 0 | ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access); |
559 | 0 | } |
560 | 1.00M | e.recalc = 0; |
561 | 1.00M | wrc = atomic_write_ept_entry(&epte[i], e, level); |
562 | 1.00M | ASSERT(wrc == 0); |
563 | 1.00M | } |
564 | 1.97k | } |
565 | 2.73k | else |
566 | 755 | { |
567 | 755 | int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn), |
568 | 755 | level * EPT_TABLE_ORDER, &ipat, |
569 | 755 | e.sa_p2mt == p2m_mmio_direct); |
570 | 755 | bool_t recalc = e.recalc; |
571 | 755 | |
572 | 755 | if ( recalc && p2m_is_changeable(e.sa_p2mt) ) |
573 | 0 | { |
574 | 0 | unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER); |
575 | 0 |
|
576 | 0 | ASSERT(e.sa_p2mt != p2m_ioreq_server); |
577 | 0 | switch ( p2m_is_logdirty_range(p2m, gfn & mask, |
578 | 0 | gfn | ~mask) ) |
579 | 0 | { |
580 | 0 | case 0: |
581 | 0 | e.sa_p2mt = p2m_ram_rw; |
582 | 0 | e.recalc = 0; |
583 | 0 | break; |
584 | 0 | case 1: |
585 | 0 | e.sa_p2mt = p2m_ram_logdirty; |
586 | 0 | e.recalc = 0; |
587 | 0 | break; |
588 | 0 | default: /* Force split. */ |
589 | 0 | emt = -1; |
590 | 0 | break; |
591 | 0 | } |
592 | 0 | } |
593 | 755 | if ( unlikely(emt < 0) ) |
594 | 0 | { |
595 | 0 | if ( ept_split_super_page(p2m, &e, level, level - 1) ) |
596 | 0 | { |
597 | 0 | wrc = atomic_write_ept_entry(&epte[i], e, level); |
598 | 0 | ASSERT(wrc == 0); |
599 | 0 | unmap_domain_page(epte); |
600 | 0 | mfn = e.mfn; |
601 | 0 | continue; |
602 | 0 | } |
603 | 0 | ept_free_entry(p2m, &e, level); |
604 | 0 | rc = -ENOMEM; |
605 | 0 | break; |
606 | 0 | } |
607 | 755 | e.emt = emt; |
608 | 755 | e.ipat = ipat; |
609 | 755 | e.recalc = 0; |
610 | 755 | if ( recalc && p2m_is_changeable(e.sa_p2mt) ) |
611 | 0 | ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access); |
612 | 755 | wrc = atomic_write_ept_entry(&epte[i], e, level); |
613 | 755 | ASSERT(wrc == 0); |
614 | 755 | } |
615 | 2.73k | |
616 | 2.73k | rc = 1; |
617 | 2.73k | break; |
618 | 2.73k | } |
619 | 4.28M | |
620 | 3.21M | if ( e.emt == MTRR_NUM_TYPES ) |
621 | 2.05k | { |
622 | 2.05k | ASSERT(is_epte_present(&e)); |
623 | 2.05k | ept_invalidate_emt(_mfn(e.mfn), e.recalc, level); |
624 | 2.05k | smp_wmb(); |
625 | 2.05k | e.emt = 0; |
626 | 2.05k | e.recalc = 0; |
627 | 2.05k | wrc = atomic_write_ept_entry(&epte[i], e, level); |
628 | 2.05k | ASSERT(wrc == 0); |
629 | 2.05k | unmap_domain_page(epte); |
630 | 2.05k | rc = 1; |
631 | 2.05k | } |
632 | 3.21M | else if ( is_epte_present(&e) && !e.emt ) |
633 | 3.21M | unmap_domain_page(epte); |
634 | 3.21M | else |
635 | 1.93k | break; |
636 | 3.21M | |
637 | 3.21M | mfn = e.mfn; |
638 | 3.21M | } |
639 | 1.07M | |
640 | 1.07M | unmap_domain_page(epte); |
641 | 1.07M | if ( rc ) |
642 | 2.73k | { |
643 | 2.73k | struct vcpu *v; |
644 | 2.73k | |
645 | 2.73k | for_each_vcpu ( p2m->domain, v ) |
646 | 32.8k | v->arch.hvm_vmx.ept_spurious_misconfig = 1; |
647 | 2.73k | } |
648 | 1.07M | |
649 | 1.07M | return rc; |
650 | 1.07M | } |
651 | | |
652 | | bool_t ept_handle_misconfig(uint64_t gpa) |
653 | 3.14k | { |
654 | 3.14k | struct vcpu *curr = current; |
655 | 3.14k | struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); |
656 | 3.14k | bool_t spurious; |
657 | 3.14k | int rc; |
658 | 3.14k | |
659 | 3.14k | p2m_lock(p2m); |
660 | 3.14k | |
661 | 3.14k | spurious = curr->arch.hvm_vmx.ept_spurious_misconfig; |
662 | 3.14k | rc = resolve_misconfig(p2m, PFN_DOWN(gpa)); |
663 | 3.14k | curr->arch.hvm_vmx.ept_spurious_misconfig = 0; |
664 | 3.14k | |
665 | 3.14k | p2m_unlock(p2m); |
666 | 3.14k | |
667 | 2.21k | return spurious ? (rc >= 0) : (rc > 0); |
668 | 3.14k | } |
669 | | |
670 | | /* |
671 | | * ept_set_entry() computes 'need_modify_vtd_table' for itself, |
672 | | * by observing whether any gfn->mfn translations are modified. |
673 | | * |
674 | | * Returns: 0 for success, -errno for failure |
675 | | */ |
676 | | static int |
677 | | ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn, |
678 | | unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, |
679 | | int sve) |
680 | 1.06M | { |
681 | 1.06M | ept_entry_t *table, *ept_entry = NULL; |
682 | 1.06M | unsigned long gfn = gfn_x(gfn_); |
683 | 1.06M | unsigned long gfn_remainder = gfn; |
684 | 1.06M | unsigned int i, target = order / EPT_TABLE_ORDER; |
685 | 1.06M | unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? (gfn | mfn_x(mfn)) : gfn; |
686 | 1.06M | int ret, rc = 0; |
687 | 1.06M | bool_t entry_written = 0; |
688 | 1.06M | bool_t direct_mmio = (p2mt == p2m_mmio_direct); |
689 | 1.06M | uint8_t ipat = 0; |
690 | 1.06M | bool_t need_modify_vtd_table = 1; |
691 | 1.06M | bool_t vtd_pte_present = 0; |
692 | 1.06M | unsigned int iommu_flags = p2m_get_iommu_flags(p2mt, mfn); |
693 | 1.06M | bool_t needs_sync = 1; |
694 | 1.06M | ept_entry_t old_entry = { .epte = 0 }; |
695 | 1.06M | ept_entry_t new_entry = { .epte = 0 }; |
696 | 1.06M | struct ept_data *ept = &p2m->ept; |
697 | 1.06M | struct domain *d = p2m->domain; |
698 | 1.06M | |
699 | 1.06M | ASSERT(ept); |
700 | 1.06M | /* |
701 | 1.06M | * the caller must make sure: |
702 | 1.06M | * 1. passing valid gfn and mfn at order boundary. |
703 | 1.06M | * 2. gfn not exceeding guest physical address width. |
704 | 1.06M | * 3. passing a valid order. |
705 | 1.06M | */ |
706 | 1.06M | if ( (fn_mask & ((1UL << order) - 1)) || |
707 | 1.06M | ((u64)gfn >> ((ept->wl + 1) * EPT_TABLE_ORDER)) || |
708 | 1.06M | (order % EPT_TABLE_ORDER) ) |
709 | 0 | return -EINVAL; |
710 | 1.06M | |
711 | 1.06M | /* Carry out any eventually pending earlier changes first. */ |
712 | 1.06M | ret = resolve_misconfig(p2m, gfn); |
713 | 1.06M | if ( ret < 0 ) |
714 | 0 | return ret; |
715 | 1.06M | |
716 | 1.06M | ASSERT((target == 2 && hap_has_1gb) || |
717 | 1.06M | (target == 1 && hap_has_2mb) || |
718 | 1.06M | (target == 0)); |
719 | 1.06M | ASSERT(!p2m_is_foreign(p2mt) || target == 0); |
720 | 1.06M | |
721 | 1.06M | table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); |
722 | 1.06M | |
723 | 1.06M | ret = GUEST_TABLE_MAP_FAILED; |
724 | 4.27M | for ( i = ept->wl; i > target; i-- ) |
725 | 3.20M | { |
726 | 3.20M | ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i); |
727 | 3.20M | if ( !ret ) |
728 | 0 | { |
729 | 0 | rc = -ENOENT; |
730 | 0 | goto out; |
731 | 0 | } |
732 | 3.20M | else if ( ret != GUEST_TABLE_NORMAL_PAGE ) |
733 | 0 | break; |
734 | 3.20M | } |
735 | 1.06M | |
736 | 1.06M | ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target); |
737 | 1.06M | |
738 | 1.06M | ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER)); |
739 | 1.06M | |
740 | 1.06M | /* In case VT-d uses same page table, this flag is needed by VT-d */ |
741 | 1.06M | vtd_pte_present = is_epte_present(ept_entry); |
742 | 1.06M | |
743 | 1.06M | /* |
744 | 1.06M | * If we're here with i > target, we must be at a leaf node, and |
745 | 1.06M | * we need to break up the superpage. |
746 | 1.06M | * |
747 | 1.06M | * If we're here with i == target and i > 0, we need to check to see |
748 | 1.06M | * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table) |
749 | 1.06M | * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately. |
750 | 1.06M | */ |
751 | 1.06M | |
752 | 1.06M | if ( i == target ) |
753 | 1.06M | { |
754 | 1.06M | /* We reached the target level. */ |
755 | 1.06M | |
756 | 1.06M | /* No need to flush if the old entry wasn't valid */ |
757 | 1.06M | if ( !is_epte_present(ept_entry) ) |
758 | 850k | needs_sync = 0; |
759 | 1.06M | |
760 | 1.06M | /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB), |
761 | 1.06M | * the intermediate tables will be freed below after the ept flush |
762 | 1.06M | * |
763 | 1.06M | * Read-then-write is OK because we hold the p2m lock. */ |
764 | 1.06M | old_entry = *ept_entry; |
765 | 1.06M | } |
766 | 1.06M | else |
767 | 0 | { |
768 | 0 | /* We need to split the original page. */ |
769 | 0 | ept_entry_t split_ept_entry; |
770 | 0 |
|
771 | 0 | ASSERT(is_epte_superpage(ept_entry)); |
772 | 0 |
|
773 | 0 | split_ept_entry = atomic_read_ept_entry(ept_entry); |
774 | 0 |
|
775 | 0 | if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) |
776 | 0 | { |
777 | 0 | ept_free_entry(p2m, &split_ept_entry, i); |
778 | 0 | rc = -ENOMEM; |
779 | 0 | goto out; |
780 | 0 | } |
781 | 0 |
|
782 | 0 | /* now install the newly split ept sub-tree */ |
783 | 0 | /* NB: please make sure domian is paused and no in-fly VT-d DMA. */ |
784 | 0 | rc = atomic_write_ept_entry(ept_entry, split_ept_entry, i); |
785 | 0 | ASSERT(rc == 0); |
786 | 0 |
|
787 | 0 | /* then move to the level we want to make real changes */ |
788 | 0 | for ( ; i > target; i-- ) |
789 | 0 | if ( !ept_next_level(p2m, 0, &table, &gfn_remainder, i) ) |
790 | 0 | break; |
791 | 0 | /* We just installed the pages we need. */ |
792 | 0 | ASSERT(i == target); |
793 | 0 |
|
794 | 0 | ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER)); |
795 | 0 | } |
796 | 1.06M | |
797 | 1.06M | if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) |
798 | 850k | { |
799 | 850k | int emt = epte_get_entry_emt(p2m->domain, gfn, mfn, |
800 | 850k | i * EPT_TABLE_ORDER, &ipat, direct_mmio); |
801 | 850k | |
802 | 850k | if ( emt >= 0 ) |
803 | 850k | new_entry.emt = emt; |
804 | 850k | else /* ept_handle_misconfig() will need to take care of this. */ |
805 | 0 | new_entry.emt = MTRR_NUM_TYPES; |
806 | 850k | |
807 | 850k | new_entry.ipat = ipat; |
808 | 850k | new_entry.sp = !!i; |
809 | 850k | new_entry.sa_p2mt = p2mt; |
810 | 850k | new_entry.access = p2ma; |
811 | 850k | new_entry.snp = (iommu_enabled && iommu_snoop); |
812 | 850k | |
813 | 850k | /* the caller should take care of the previous page */ |
814 | 850k | new_entry.mfn = mfn_x(mfn); |
815 | 850k | |
816 | 850k | /* Safe to read-then-write because we hold the p2m lock */ |
817 | 850k | if ( ept_entry->mfn == new_entry.mfn && |
818 | 150 | p2m_get_iommu_flags(ept_entry->sa_p2mt, _mfn(ept_entry->mfn)) == |
819 | 150 | iommu_flags ) |
820 | 150 | need_modify_vtd_table = 0; |
821 | 850k | |
822 | 850k | ept_p2m_type_to_flags(p2m, &new_entry, p2mt, p2ma); |
823 | 850k | } |
824 | 1.06M | |
825 | 1.06M | if ( sve != -1 ) |
826 | 0 | new_entry.suppress_ve = !!sve; |
827 | 1.06M | else |
828 | 1.06M | new_entry.suppress_ve = is_epte_valid(&old_entry) ? |
829 | 850k | old_entry.suppress_ve : 1; |
830 | 1.06M | |
831 | 1.06M | /* |
832 | 1.06M | * p2m_ioreq_server is only used for 4K pages, so the |
833 | 1.06M | * count is only done on ept page table entries. |
834 | 1.06M | */ |
835 | 1.06M | if ( p2mt == p2m_ioreq_server ) |
836 | 0 | { |
837 | 0 | ASSERT(i == 0); |
838 | 0 | p2m->ioreq.entry_count++; |
839 | 0 | } |
840 | 1.06M | |
841 | 1.06M | if ( ept_entry->sa_p2mt == p2m_ioreq_server ) |
842 | 0 | { |
843 | 0 | ASSERT(i == 0); |
844 | 0 | ASSERT(p2m->ioreq.entry_count > 0); |
845 | 0 | p2m->ioreq.entry_count--; |
846 | 0 | } |
847 | 1.06M | |
848 | 1.06M | rc = atomic_write_ept_entry(ept_entry, new_entry, target); |
849 | 1.06M | if ( unlikely(rc) ) |
850 | 0 | old_entry.epte = 0; |
851 | 1.06M | else |
852 | 1.06M | { |
853 | 1.06M | entry_written = 1; |
854 | 1.06M | |
855 | 1.06M | if ( p2mt != p2m_invalid && |
856 | 850k | (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) ) |
857 | 1.06M | /* Track the highest gfn for which we have ever had a valid mapping */ |
858 | 1.00k | p2m->max_mapped_pfn = gfn + (1UL << order) - 1; |
859 | 1.06M | } |
860 | 1.06M | |
861 | 1.06M | out: |
862 | 1.06M | if ( needs_sync ) |
863 | 218k | ept_sync_domain(p2m); |
864 | 1.06M | |
865 | 1.06M | /* For host p2m, may need to change VT-d page table.*/ |
866 | 1.06M | if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) && |
867 | 1.06M | need_modify_vtd_table ) |
868 | 1.06M | { |
869 | 1.06M | if ( iommu_hap_pt_share ) |
870 | 0 | rc = iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present); |
871 | 1.06M | else |
872 | 1.06M | { |
873 | 1.06M | if ( iommu_flags ) |
874 | 5.19M | for ( i = 0; i < (1 << order); i++ ) |
875 | 4.34M | { |
876 | 4.34M | rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags); |
877 | 4.34M | if ( unlikely(rc) ) |
878 | 0 | { |
879 | 0 | while ( i-- ) |
880 | 0 | /* If statement to satisfy __must_check. */ |
881 | 0 | if ( iommu_unmap_page(p2m->domain, gfn + i) ) |
882 | 0 | continue; |
883 | 0 |
|
884 | 0 | break; |
885 | 0 | } |
886 | 4.34M | } |
887 | 1.06M | else |
888 | 436k | for ( i = 0; i < (1 << order); i++ ) |
889 | 218k | { |
890 | 218k | ret = iommu_unmap_page(d, gfn + i); |
891 | 218k | if ( !rc ) |
892 | 218k | rc = ret; |
893 | 218k | } |
894 | 1.06M | } |
895 | 1.06M | } |
896 | 1.06M | |
897 | 1.06M | unmap_domain_page(table); |
898 | 1.06M | |
899 | 1.06M | /* Release the old intermediate tables, if any. This has to be the |
900 | 1.06M | last thing we do, after the ept_sync_domain() and removal |
901 | 1.06M | from the iommu tables, so as to avoid a potential |
902 | 1.06M | use-after-free. */ |
903 | 1.06M | if ( is_epte_present(&old_entry) ) |
904 | 218k | ept_free_entry(p2m, &old_entry, target); |
905 | 1.06M | |
906 | 1.06M | if ( entry_written && p2m_is_hostp2m(p2m) ) |
907 | 1.06M | p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma); |
908 | 1.06M | |
909 | 1.06M | return rc; |
910 | 1.06M | } |
911 | | |
912 | | /* Read ept p2m entries */ |
913 | | static mfn_t ept_get_entry(struct p2m_domain *p2m, |
914 | | gfn_t gfn_, p2m_type_t *t, p2m_access_t* a, |
915 | | p2m_query_t q, unsigned int *page_order, |
916 | | bool_t *sve) |
917 | 6.52M | { |
918 | 6.52M | ept_entry_t *table = |
919 | 6.52M | map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); |
920 | 6.52M | unsigned long gfn = gfn_x(gfn_); |
921 | 6.52M | unsigned long gfn_remainder = gfn; |
922 | 6.52M | ept_entry_t *ept_entry; |
923 | 6.52M | u32 index; |
924 | 6.52M | int i; |
925 | 6.52M | int ret = 0; |
926 | 6.52M | bool_t recalc = 0; |
927 | 6.52M | mfn_t mfn = INVALID_MFN; |
928 | 6.52M | struct ept_data *ept = &p2m->ept; |
929 | 6.52M | |
930 | 6.52M | *t = p2m_mmio_dm; |
931 | 6.52M | *a = p2m_access_n; |
932 | 6.52M | if ( sve ) |
933 | 0 | *sve = 1; |
934 | 6.52M | |
935 | 6.52M | /* This pfn is higher than the highest the p2m map currently holds */ |
936 | 6.52M | if ( gfn > p2m->max_mapped_pfn ) |
937 | 3.47M | { |
938 | 7.14M | for ( i = ept->wl; i > 0; --i ) |
939 | 7.14M | if ( (gfn & ~((1UL << (i * EPT_TABLE_ORDER)) - 1)) > |
940 | 7.14M | p2m->max_mapped_pfn ) |
941 | 3.47M | break; |
942 | 3.47M | goto out; |
943 | 3.47M | } |
944 | 6.52M | |
945 | 6.52M | /* Should check if gfn obeys GAW here. */ |
946 | 6.52M | |
947 | 11.4M | for ( i = ept->wl; i > 0; i-- ) |
948 | 9.03M | { |
949 | 9.03M | retry: |
950 | 9.03M | if ( table[gfn_remainder >> (i * EPT_TABLE_ORDER)].recalc ) |
951 | 0 | recalc = 1; |
952 | 9.03M | ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); |
953 | 9.03M | if ( !ret ) |
954 | 572k | goto out; |
955 | 8.46M | else if ( ret == GUEST_TABLE_POD_PAGE ) |
956 | 0 | { |
957 | 0 | if ( !(q & P2M_ALLOC) ) |
958 | 0 | { |
959 | 0 | *t = p2m_populate_on_demand; |
960 | 0 | goto out; |
961 | 0 | } |
962 | 0 |
|
963 | 0 | /* Populate this superpage */ |
964 | 0 | ASSERT(i <= 2); |
965 | 0 |
|
966 | 0 | index = gfn_remainder >> ( i * EPT_TABLE_ORDER); |
967 | 0 | ept_entry = table + index; |
968 | 0 |
|
969 | 0 | if ( p2m_pod_demand_populate(p2m, gfn_, i * EPT_TABLE_ORDER) ) |
970 | 0 | goto retry; |
971 | 0 | else |
972 | 0 | goto out; |
973 | 0 | } |
974 | 8.46M | else if ( ret == GUEST_TABLE_SUPER_PAGE ) |
975 | 19.4k | break; |
976 | 9.03M | } |
977 | 3.04M | |
978 | 2.47M | index = gfn_remainder >> (i * EPT_TABLE_ORDER); |
979 | 2.47M | ept_entry = table + index; |
980 | 2.47M | |
981 | 2.47M | if ( ept_entry->sa_p2mt == p2m_populate_on_demand ) |
982 | 0 | { |
983 | 0 | if ( !(q & P2M_ALLOC) ) |
984 | 0 | { |
985 | 0 | *t = p2m_populate_on_demand; |
986 | 0 | goto out; |
987 | 0 | } |
988 | 0 |
|
989 | 0 | ASSERT(i == 0); |
990 | 0 | |
991 | 0 | if ( !p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) ) |
992 | 0 | goto out; |
993 | 0 | } |
994 | 2.47M | |
995 | 2.47M | if ( is_epte_valid(ept_entry) ) |
996 | 2.18M | { |
997 | 2.18M | *t = p2m_recalc_type(recalc || ept_entry->recalc, |
998 | 2.18M | ept_entry->sa_p2mt, p2m, gfn); |
999 | 2.18M | *a = ept_entry->access; |
1000 | 2.18M | if ( sve ) |
1001 | 0 | *sve = ept_entry->suppress_ve; |
1002 | 2.18M | |
1003 | 2.18M | mfn = _mfn(ept_entry->mfn); |
1004 | 2.18M | if ( i ) |
1005 | 19.4k | { |
1006 | 19.4k | /* |
1007 | 19.4k | * We may meet super pages, and to split into 4k pages |
1008 | 19.4k | * to emulate p2m table |
1009 | 19.4k | */ |
1010 | 19.4k | unsigned long split_mfn = mfn_x(mfn) + |
1011 | 19.4k | (gfn_remainder & |
1012 | 19.4k | ((1 << (i * EPT_TABLE_ORDER)) - 1)); |
1013 | 19.4k | mfn = _mfn(split_mfn); |
1014 | 19.4k | } |
1015 | 2.18M | } |
1016 | 2.47M | |
1017 | 6.52M | out: |
1018 | 6.52M | if ( page_order ) |
1019 | 507k | *page_order = i * EPT_TABLE_ORDER; |
1020 | 6.52M | |
1021 | 6.52M | unmap_domain_page(table); |
1022 | 6.52M | return mfn; |
1023 | 2.47M | } |
1024 | | |
1025 | | void ept_walk_table(struct domain *d, unsigned long gfn) |
1026 | 0 | { |
1027 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1028 | 0 | struct ept_data *ept = &p2m->ept; |
1029 | 0 | ept_entry_t *table = |
1030 | 0 | map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); |
1031 | 0 | unsigned long gfn_remainder = gfn; |
1032 | 0 |
|
1033 | 0 | int i; |
1034 | 0 |
|
1035 | 0 | gprintk(XENLOG_ERR, "Walking EPT tables for GFN %lx:\n", gfn); |
1036 | 0 |
|
1037 | 0 | /* This pfn is higher than the highest the p2m map currently holds */ |
1038 | 0 | if ( gfn > p2m->max_mapped_pfn ) |
1039 | 0 | { |
1040 | 0 | gprintk(XENLOG_ERR, " gfn exceeds max_mapped_pfn %lx\n", |
1041 | 0 | p2m->max_mapped_pfn); |
1042 | 0 | goto out; |
1043 | 0 | } |
1044 | 0 |
|
1045 | 0 | for ( i = ept->wl; i >= 0; i-- ) |
1046 | 0 | { |
1047 | 0 | ept_entry_t *ept_entry, *next; |
1048 | 0 | u32 index; |
1049 | 0 |
|
1050 | 0 | /* Stolen from ept_next_level */ |
1051 | 0 | index = gfn_remainder >> (i*EPT_TABLE_ORDER); |
1052 | 0 | ept_entry = table + index; |
1053 | 0 |
|
1054 | 0 | gprintk(XENLOG_ERR, " epte %"PRIx64"\n", ept_entry->epte); |
1055 | 0 |
|
1056 | 0 | if ( (i == 0) || !is_epte_present(ept_entry) || |
1057 | 0 | is_epte_superpage(ept_entry) ) |
1058 | 0 | goto out; |
1059 | 0 | else |
1060 | 0 | { |
1061 | 0 | gfn_remainder &= (1UL << (i*EPT_TABLE_ORDER)) - 1; |
1062 | 0 |
|
1063 | 0 | next = map_domain_page(_mfn(ept_entry->mfn)); |
1064 | 0 |
|
1065 | 0 | unmap_domain_page(table); |
1066 | 0 |
|
1067 | 0 | table = next; |
1068 | 0 | } |
1069 | 0 | } |
1070 | 0 |
|
1071 | 0 | out: |
1072 | 0 | unmap_domain_page(table); |
1073 | 0 | return; |
1074 | 0 | } |
1075 | | |
1076 | | static void ept_change_entry_type_global(struct p2m_domain *p2m, |
1077 | | p2m_type_t ot, p2m_type_t nt) |
1078 | 0 | { |
1079 | 0 | unsigned long mfn = p2m->ept.mfn; |
1080 | 0 |
|
1081 | 0 | if ( !mfn ) |
1082 | 0 | return; |
1083 | 0 |
|
1084 | 0 | if ( ept_invalidate_emt(_mfn(mfn), 1, p2m->ept.wl) ) |
1085 | 0 | ept_sync_domain(p2m); |
1086 | 0 | } |
1087 | | |
1088 | | static int ept_change_entry_type_range(struct p2m_domain *p2m, |
1089 | | p2m_type_t ot, p2m_type_t nt, |
1090 | | unsigned long first_gfn, |
1091 | | unsigned long last_gfn) |
1092 | 0 | { |
1093 | 0 | unsigned int i, wl = p2m->ept.wl; |
1094 | 0 | unsigned long mask = (1 << EPT_TABLE_ORDER) - 1; |
1095 | 0 | int rc = 0, sync = 0; |
1096 | 0 |
|
1097 | 0 | if ( !p2m->ept.mfn ) |
1098 | 0 | return -EINVAL; |
1099 | 0 |
|
1100 | 0 | for ( i = 0; i <= wl; ) |
1101 | 0 | { |
1102 | 0 | if ( first_gfn & mask ) |
1103 | 0 | { |
1104 | 0 | unsigned long end_gfn = min(first_gfn | mask, last_gfn); |
1105 | 0 |
|
1106 | 0 | rc = ept_invalidate_emt_range(p2m, i, first_gfn, end_gfn); |
1107 | 0 | sync |= rc; |
1108 | 0 | if ( rc < 0 || end_gfn >= last_gfn ) |
1109 | 0 | break; |
1110 | 0 | first_gfn = end_gfn + 1; |
1111 | 0 | } |
1112 | 0 | else if ( (last_gfn & mask) != mask ) |
1113 | 0 | { |
1114 | 0 | unsigned long start_gfn = max(first_gfn, last_gfn & ~mask); |
1115 | 0 |
|
1116 | 0 | rc = ept_invalidate_emt_range(p2m, i, start_gfn, last_gfn); |
1117 | 0 | sync |= rc; |
1118 | 0 | if ( rc < 0 || start_gfn <= first_gfn ) |
1119 | 0 | break; |
1120 | 0 | last_gfn = start_gfn - 1; |
1121 | 0 | } |
1122 | 0 | else |
1123 | 0 | { |
1124 | 0 | ++i; |
1125 | 0 | mask |= mask << EPT_TABLE_ORDER; |
1126 | 0 | } |
1127 | 0 | } |
1128 | 0 |
|
1129 | 0 | if ( sync ) |
1130 | 0 | ept_sync_domain(p2m); |
1131 | 0 |
|
1132 | 0 | return rc < 0 ? rc : 0; |
1133 | 0 | } |
1134 | | |
1135 | | static void ept_memory_type_changed(struct p2m_domain *p2m) |
1136 | 23 | { |
1137 | 23 | unsigned long mfn = p2m->ept.mfn; |
1138 | 23 | |
1139 | 23 | if ( !mfn ) |
1140 | 0 | return; |
1141 | 23 | |
1142 | 23 | if ( ept_invalidate_emt(_mfn(mfn), 0, p2m->ept.wl) ) |
1143 | 23 | ept_sync_domain(p2m); |
1144 | 23 | } |
1145 | | |
1146 | | static void __ept_sync_domain(void *info) |
1147 | 1.97M | { |
1148 | 1.97M | /* |
1149 | 1.97M | * The invalidation will be done before VMENTER (see |
1150 | 1.97M | * vmx_vmenter_helper()). |
1151 | 1.97M | */ |
1152 | 1.97M | } |
1153 | | |
1154 | | static void ept_sync_domain_prepare(struct p2m_domain *p2m) |
1155 | 218k | { |
1156 | 218k | struct domain *d = p2m->domain; |
1157 | 218k | struct ept_data *ept = &p2m->ept; |
1158 | 218k | |
1159 | 218k | if ( nestedhvm_enabled(d) ) |
1160 | 0 | { |
1161 | 0 | if ( p2m_is_nestedp2m(p2m) ) |
1162 | 0 | ept = &p2m_get_hostp2m(d)->ept; |
1163 | 0 | else |
1164 | 0 | p2m_flush_nestedp2m(d); |
1165 | 0 | } |
1166 | 218k | |
1167 | 218k | /* |
1168 | 218k | * Need to invalidate on all PCPUs because either: |
1169 | 218k | * |
1170 | 218k | * a) A VCPU has run and some translations may be cached. |
1171 | 218k | * b) A VCPU has not run and and the initial invalidation in case |
1172 | 218k | * of an EP4TA reuse is still needed. |
1173 | 218k | */ |
1174 | 218k | cpumask_setall(ept->invalidate); |
1175 | 218k | } |
1176 | | |
1177 | | static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask) |
1178 | 218k | { |
1179 | 218k | on_selected_cpus(mask, __ept_sync_domain, p2m, 1); |
1180 | 218k | } |
1181 | | |
1182 | | void ept_sync_domain(struct p2m_domain *p2m) |
1183 | 218k | { |
1184 | 218k | struct domain *d = p2m->domain; |
1185 | 218k | |
1186 | 218k | /* Only if using EPT and this domain has some VCPUs to dirty. */ |
1187 | 218k | if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) |
1188 | 0 | return; |
1189 | 218k | |
1190 | 218k | ept_sync_domain_prepare(p2m); |
1191 | 218k | |
1192 | 218k | if ( p2m->defer_flush ) |
1193 | 218k | { |
1194 | 218k | p2m->need_flush = 1; |
1195 | 218k | return; |
1196 | 218k | } |
1197 | 218k | |
1198 | 0 | ept_sync_domain_mask(p2m, d->domain_dirty_cpumask); |
1199 | 0 | } |
1200 | | |
1201 | | static void ept_tlb_flush(struct p2m_domain *p2m) |
1202 | 218k | { |
1203 | 218k | ept_sync_domain_mask(p2m, p2m->domain->domain_dirty_cpumask); |
1204 | 218k | } |
1205 | | |
1206 | | static void ept_enable_pml(struct p2m_domain *p2m) |
1207 | 0 | { |
1208 | 0 | /* Domain must have been paused */ |
1209 | 0 | ASSERT(atomic_read(&p2m->domain->pause_count)); |
1210 | 0 |
|
1211 | 0 | /* |
1212 | 0 | * No need to return whether vmx_domain_enable_pml has succeeded, as |
1213 | 0 | * ept_p2m_type_to_flags will do the check, and write protection will be |
1214 | 0 | * used if PML is not enabled. |
1215 | 0 | */ |
1216 | 0 | if ( vmx_domain_enable_pml(p2m->domain) ) |
1217 | 0 | return; |
1218 | 0 |
|
1219 | 0 | /* Enable EPT A/D bit for PML */ |
1220 | 0 | p2m->ept.ad = 1; |
1221 | 0 | vmx_domain_update_eptp(p2m->domain); |
1222 | 0 | } |
1223 | | |
1224 | | static void ept_disable_pml(struct p2m_domain *p2m) |
1225 | 0 | { |
1226 | 0 | /* Domain must have been paused */ |
1227 | 0 | ASSERT(atomic_read(&p2m->domain->pause_count)); |
1228 | 0 |
|
1229 | 0 | vmx_domain_disable_pml(p2m->domain); |
1230 | 0 |
|
1231 | 0 | /* Disable EPT A/D bit */ |
1232 | 0 | p2m->ept.ad = 0; |
1233 | 0 | vmx_domain_update_eptp(p2m->domain); |
1234 | 0 | } |
1235 | | |
1236 | | static void ept_flush_pml_buffers(struct p2m_domain *p2m) |
1237 | 0 | { |
1238 | 0 | /* Domain must have been paused */ |
1239 | 0 | ASSERT(atomic_read(&p2m->domain->pause_count)); |
1240 | 0 |
|
1241 | 0 | vmx_domain_flush_pml_buffers(p2m->domain); |
1242 | 0 | } |
1243 | | |
1244 | | int ept_p2m_init(struct p2m_domain *p2m) |
1245 | 21 | { |
1246 | 21 | struct ept_data *ept = &p2m->ept; |
1247 | 21 | |
1248 | 21 | p2m->set_entry = ept_set_entry; |
1249 | 21 | p2m->get_entry = ept_get_entry; |
1250 | 21 | p2m->recalc = resolve_misconfig; |
1251 | 21 | p2m->change_entry_type_global = ept_change_entry_type_global; |
1252 | 21 | p2m->change_entry_type_range = ept_change_entry_type_range; |
1253 | 21 | p2m->memory_type_changed = ept_memory_type_changed; |
1254 | 21 | p2m->audit_p2m = NULL; |
1255 | 21 | p2m->tlb_flush = ept_tlb_flush; |
1256 | 21 | |
1257 | 21 | /* Set the memory type used when accessing EPT paging structures. */ |
1258 | 21 | ept->mt = EPT_DEFAULT_MT; |
1259 | 21 | |
1260 | 21 | /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */ |
1261 | 21 | ept->wl = 3; |
1262 | 21 | |
1263 | 21 | if ( cpu_has_vmx_pml ) |
1264 | 0 | { |
1265 | 0 | p2m->enable_hardware_log_dirty = ept_enable_pml; |
1266 | 0 | p2m->disable_hardware_log_dirty = ept_disable_pml; |
1267 | 0 | p2m->flush_hardware_cached_dirty = ept_flush_pml_buffers; |
1268 | 0 | } |
1269 | 21 | |
1270 | 21 | if ( !zalloc_cpumask_var(&ept->invalidate) ) |
1271 | 0 | return -ENOMEM; |
1272 | 21 | |
1273 | 21 | /* |
1274 | 21 | * Assume an initial invalidation is required, in case an EP4TA is |
1275 | 21 | * reused. |
1276 | 21 | */ |
1277 | 21 | cpumask_setall(ept->invalidate); |
1278 | 21 | |
1279 | 21 | return 0; |
1280 | 21 | } |
1281 | | |
1282 | | void ept_p2m_uninit(struct p2m_domain *p2m) |
1283 | 0 | { |
1284 | 0 | struct ept_data *ept = &p2m->ept; |
1285 | 0 | free_cpumask_var(ept->invalidate); |
1286 | 0 | } |
1287 | | |
1288 | | static const char *memory_type_to_str(unsigned int x) |
1289 | 0 | { |
1290 | 0 | static const char memory_types[8][3] = { |
1291 | 0 | [MTRR_TYPE_UNCACHABLE] = "UC", |
1292 | 0 | [MTRR_TYPE_WRCOMB] = "WC", |
1293 | 0 | [MTRR_TYPE_WRTHROUGH] = "WT", |
1294 | 0 | [MTRR_TYPE_WRPROT] = "WP", |
1295 | 0 | [MTRR_TYPE_WRBACK] = "WB", |
1296 | 0 | [MTRR_NUM_TYPES] = "??" |
1297 | 0 | }; |
1298 | 0 |
|
1299 | 0 | ASSERT(x < ARRAY_SIZE(memory_types)); |
1300 | 0 | return memory_types[x][0] ? memory_types[x] : "?"; |
1301 | 0 | } |
1302 | | |
1303 | | static void ept_dump_p2m_table(unsigned char key) |
1304 | 0 | { |
1305 | 0 | struct domain *d; |
1306 | 0 | ept_entry_t *table, *ept_entry; |
1307 | 0 | int order; |
1308 | 0 | int i; |
1309 | 0 | int ret = 0; |
1310 | 0 | unsigned long gfn, gfn_remainder; |
1311 | 0 | unsigned long record_counter = 0; |
1312 | 0 | struct p2m_domain *p2m; |
1313 | 0 | struct ept_data *ept; |
1314 | 0 |
|
1315 | 0 | for_each_domain(d) |
1316 | 0 | { |
1317 | 0 | if ( !hap_enabled(d) ) |
1318 | 0 | continue; |
1319 | 0 |
|
1320 | 0 | p2m = p2m_get_hostp2m(d); |
1321 | 0 | ept = &p2m->ept; |
1322 | 0 | printk("\ndomain%d EPT p2m table:\n", d->domain_id); |
1323 | 0 |
|
1324 | 0 | for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order ) |
1325 | 0 | { |
1326 | 0 | char c = 0; |
1327 | 0 |
|
1328 | 0 | gfn_remainder = gfn; |
1329 | 0 | table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); |
1330 | 0 |
|
1331 | 0 | for ( i = ept->wl; i > 0; i-- ) |
1332 | 0 | { |
1333 | 0 | ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER)); |
1334 | 0 | if ( ept_entry->emt == MTRR_NUM_TYPES ) |
1335 | 0 | c = '?'; |
1336 | 0 | ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); |
1337 | 0 | if ( ret != GUEST_TABLE_NORMAL_PAGE ) |
1338 | 0 | break; |
1339 | 0 | } |
1340 | 0 |
|
1341 | 0 | order = i * EPT_TABLE_ORDER; |
1342 | 0 | ept_entry = table + (gfn_remainder >> order); |
1343 | 0 | if ( ret != GUEST_TABLE_MAP_FAILED && is_epte_valid(ept_entry) ) |
1344 | 0 | { |
1345 | 0 | if ( ept_entry->sa_p2mt == p2m_populate_on_demand ) |
1346 | 0 | printk("gfn: %13lx order: %2d PoD\n", gfn, order); |
1347 | 0 | else |
1348 | 0 | printk("gfn: %13lx order: %2d mfn: %13lx %c%c%c %c%c%c\n", |
1349 | 0 | gfn, order, ept_entry->mfn + 0UL, |
1350 | 0 | ept_entry->r ? 'r' : ' ', |
1351 | 0 | ept_entry->w ? 'w' : ' ', |
1352 | 0 | ept_entry->x ? 'x' : ' ', |
1353 | 0 | memory_type_to_str(ept_entry->emt)[0], |
1354 | 0 | memory_type_to_str(ept_entry->emt)[1] |
1355 | 0 | ?: ept_entry->emt + '0', |
1356 | 0 | c ?: ept_entry->ipat ? '!' : ' '); |
1357 | 0 |
|
1358 | 0 | if ( !(record_counter++ % 100) ) |
1359 | 0 | process_pending_softirqs(); |
1360 | 0 | } |
1361 | 0 | unmap_domain_page(table); |
1362 | 0 | } |
1363 | 0 | } |
1364 | 0 | } |
1365 | | |
1366 | | void setup_ept_dump(void) |
1367 | 1 | { |
1368 | 1 | register_keyhandler('D', ept_dump_p2m_table, "dump VT-x EPT tables", 0); |
1369 | 1 | } |
1370 | | |
1371 | | void p2m_init_altp2m_ept(struct domain *d, unsigned int i) |
1372 | 0 | { |
1373 | 0 | struct p2m_domain *p2m = d->arch.altp2m_p2m[i]; |
1374 | 0 | struct ept_data *ept; |
1375 | 0 |
|
1376 | 0 | p2m->min_remapped_gfn = gfn_x(INVALID_GFN); |
1377 | 0 | p2m->max_remapped_gfn = 0; |
1378 | 0 | ept = &p2m->ept; |
1379 | 0 | ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m)); |
1380 | 0 | d->arch.altp2m_eptp[i] = ept->eptp; |
1381 | 0 | } |
1382 | | |
1383 | | unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp) |
1384 | 0 | { |
1385 | 0 | struct p2m_domain *p2m; |
1386 | 0 | struct ept_data *ept; |
1387 | 0 | unsigned int i; |
1388 | 0 |
|
1389 | 0 | altp2m_list_lock(d); |
1390 | 0 |
|
1391 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
1392 | 0 | { |
1393 | 0 | if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) ) |
1394 | 0 | continue; |
1395 | 0 |
|
1396 | 0 | p2m = d->arch.altp2m_p2m[i]; |
1397 | 0 | ept = &p2m->ept; |
1398 | 0 |
|
1399 | 0 | if ( eptp == ept->eptp ) |
1400 | 0 | goto out; |
1401 | 0 | } |
1402 | 0 |
|
1403 | 0 | i = INVALID_ALTP2M; |
1404 | 0 |
|
1405 | 0 | out: |
1406 | 0 | altp2m_list_unlock(d); |
1407 | 0 | return i; |
1408 | 0 | } |
1409 | | |
1410 | | /* |
1411 | | * Local variables: |
1412 | | * mode: C |
1413 | | * c-file-style: "BSD" |
1414 | | * c-basic-offset: 4 |
1415 | | * tab-width: 4 |
1416 | | * indent-tabs-mode: nil |
1417 | | * End: |
1418 | | */ |