/root/src/xen/xen/arch/x86/mm/hap/hap.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/mm/hap/hap.c |
3 | | * |
4 | | * hardware assisted paging |
5 | | * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) |
6 | | * Parts of this code are Copyright (c) 2007 by XenSource Inc. |
7 | | * |
8 | | * This program is free software; you can redistribute it and/or modify |
9 | | * it under the terms of the GNU General Public License as published by |
10 | | * the Free Software Foundation; either version 2 of the License, or |
11 | | * (at your option) any later version. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU General Public License |
19 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <xen/types.h> |
23 | | #include <xen/mm.h> |
24 | | #include <xen/trace.h> |
25 | | #include <xen/sched.h> |
26 | | #include <xen/perfc.h> |
27 | | #include <xen/irq.h> |
28 | | #include <xen/domain_page.h> |
29 | | #include <xen/guest_access.h> |
30 | | #include <xen/keyhandler.h> |
31 | | #include <asm/event.h> |
32 | | #include <asm/page.h> |
33 | | #include <asm/current.h> |
34 | | #include <asm/flushtlb.h> |
35 | | #include <asm/shared.h> |
36 | | #include <asm/hap.h> |
37 | | #include <asm/paging.h> |
38 | | #include <asm/p2m.h> |
39 | | #include <asm/domain.h> |
40 | | #include <xen/numa.h> |
41 | | #include <asm/hvm/nestedhvm.h> |
42 | | |
43 | | #include "private.h" |
44 | | |
45 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
46 | | #undef mfn_to_page |
47 | 0 | #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) |
48 | | #undef page_to_mfn |
49 | 1.31k | #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) |
50 | | |
51 | | /************************************************/ |
52 | | /* HAP VRAM TRACKING SUPPORT */ |
53 | | /************************************************/ |
54 | | |
55 | | /* |
56 | | * hap_track_dirty_vram() |
57 | | * Create the domain's dv_dirty_vram struct on demand. |
58 | | * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is |
59 | | * first encountered. |
60 | | * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by |
61 | | * calling paging_log_dirty_range(), which interrogates each vram |
62 | | * page's p2m type looking for pages that have been made writable. |
63 | | */ |
64 | | |
65 | | int hap_track_dirty_vram(struct domain *d, |
66 | | unsigned long begin_pfn, |
67 | | unsigned long nr, |
68 | | XEN_GUEST_HANDLE_PARAM(void) guest_dirty_bitmap) |
69 | 0 | { |
70 | 0 | long rc = 0; |
71 | 0 | struct sh_dirty_vram *dirty_vram; |
72 | 0 | uint8_t *dirty_bitmap = NULL; |
73 | 0 |
|
74 | 0 | if ( nr ) |
75 | 0 | { |
76 | 0 | int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE; |
77 | 0 |
|
78 | 0 | if ( !paging_mode_log_dirty(d) ) |
79 | 0 | { |
80 | 0 | rc = paging_log_dirty_enable(d, 0); |
81 | 0 | if ( rc ) |
82 | 0 | goto out; |
83 | 0 | } |
84 | 0 |
|
85 | 0 | rc = -ENOMEM; |
86 | 0 | dirty_bitmap = vzalloc(size); |
87 | 0 | if ( !dirty_bitmap ) |
88 | 0 | goto out; |
89 | 0 |
|
90 | 0 | paging_lock(d); |
91 | 0 |
|
92 | 0 | dirty_vram = d->arch.hvm_domain.dirty_vram; |
93 | 0 | if ( !dirty_vram ) |
94 | 0 | { |
95 | 0 | rc = -ENOMEM; |
96 | 0 | if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL ) |
97 | 0 | { |
98 | 0 | paging_unlock(d); |
99 | 0 | goto out; |
100 | 0 | } |
101 | 0 |
|
102 | 0 | d->arch.hvm_domain.dirty_vram = dirty_vram; |
103 | 0 | } |
104 | 0 |
|
105 | 0 | if ( begin_pfn != dirty_vram->begin_pfn || |
106 | 0 | begin_pfn + nr != dirty_vram->end_pfn ) |
107 | 0 | { |
108 | 0 | unsigned long ostart = dirty_vram->begin_pfn; |
109 | 0 | unsigned long oend = dirty_vram->end_pfn; |
110 | 0 |
|
111 | 0 | dirty_vram->begin_pfn = begin_pfn; |
112 | 0 | dirty_vram->end_pfn = begin_pfn + nr; |
113 | 0 |
|
114 | 0 | paging_unlock(d); |
115 | 0 |
|
116 | 0 | if ( oend > ostart ) |
117 | 0 | p2m_change_type_range(d, ostart, oend, |
118 | 0 | p2m_ram_logdirty, p2m_ram_rw); |
119 | 0 |
|
120 | 0 | /* |
121 | 0 | * Switch vram to log dirty mode, either by setting l1e entries of |
122 | 0 | * P2M table to be read-only, or via hardware-assisted log-dirty. |
123 | 0 | */ |
124 | 0 | p2m_change_type_range(d, begin_pfn, begin_pfn + nr, |
125 | 0 | p2m_ram_rw, p2m_ram_logdirty); |
126 | 0 |
|
127 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
128 | 0 |
|
129 | 0 | memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */ |
130 | 0 | } |
131 | 0 | else |
132 | 0 | { |
133 | 0 | paging_unlock(d); |
134 | 0 |
|
135 | 0 | domain_pause(d); |
136 | 0 |
|
137 | 0 | /* Flush dirty GFNs potentially cached by hardware. */ |
138 | 0 | p2m_flush_hardware_cached_dirty(d); |
139 | 0 |
|
140 | 0 | /* get the bitmap */ |
141 | 0 | paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap); |
142 | 0 |
|
143 | 0 | domain_unpause(d); |
144 | 0 | } |
145 | 0 |
|
146 | 0 | rc = -EFAULT; |
147 | 0 | if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 ) |
148 | 0 | rc = 0; |
149 | 0 | } |
150 | 0 | else |
151 | 0 | { |
152 | 0 | paging_lock(d); |
153 | 0 |
|
154 | 0 | dirty_vram = d->arch.hvm_domain.dirty_vram; |
155 | 0 | if ( dirty_vram ) |
156 | 0 | { |
157 | 0 | /* |
158 | 0 | * If zero pages specified while tracking dirty vram |
159 | 0 | * then stop tracking |
160 | 0 | */ |
161 | 0 | begin_pfn = dirty_vram->begin_pfn; |
162 | 0 | nr = dirty_vram->end_pfn - dirty_vram->begin_pfn; |
163 | 0 | xfree(dirty_vram); |
164 | 0 | d->arch.hvm_domain.dirty_vram = NULL; |
165 | 0 | } |
166 | 0 |
|
167 | 0 | paging_unlock(d); |
168 | 0 | if ( nr ) |
169 | 0 | p2m_change_type_range(d, begin_pfn, begin_pfn + nr, |
170 | 0 | p2m_ram_logdirty, p2m_ram_rw); |
171 | 0 | } |
172 | 0 | out: |
173 | 0 | vfree(dirty_bitmap); |
174 | 0 |
|
175 | 0 | return rc; |
176 | 0 | } |
177 | | |
178 | | /************************************************/ |
179 | | /* HAP LOG DIRTY SUPPORT */ |
180 | | /************************************************/ |
181 | | |
182 | | /* |
183 | | * hap code to call when log_dirty is enable. return 0 if no problem found. |
184 | | * |
185 | | * NB: Domain that having device assigned should not set log_global. Because |
186 | | * there is no way to track the memory updating from device. |
187 | | */ |
188 | | static int hap_enable_log_dirty(struct domain *d, bool_t log_global) |
189 | 0 | { |
190 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
191 | 0 |
|
192 | 0 | /* |
193 | 0 | * Refuse to turn on global log-dirty mode if |
194 | 0 | * there are outstanding p2m_ioreq_server pages. |
195 | 0 | */ |
196 | 0 | if ( log_global && read_atomic(&p2m->ioreq.entry_count) ) |
197 | 0 | return -EBUSY; |
198 | 0 |
|
199 | 0 | /* turn on PG_log_dirty bit in paging mode */ |
200 | 0 | paging_lock(d); |
201 | 0 | d->arch.paging.mode |= PG_log_dirty; |
202 | 0 | paging_unlock(d); |
203 | 0 |
|
204 | 0 | /* Enable hardware-assisted log-dirty if it is supported. */ |
205 | 0 | p2m_enable_hardware_log_dirty(d); |
206 | 0 |
|
207 | 0 | if ( log_global ) |
208 | 0 | { |
209 | 0 | /* |
210 | 0 | * Switch to log dirty mode, either by setting l1e entries of P2M table |
211 | 0 | * to be read-only, or via hardware-assisted log-dirty. |
212 | 0 | */ |
213 | 0 | p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); |
214 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
215 | 0 | } |
216 | 0 | return 0; |
217 | 0 | } |
218 | | |
219 | | static int hap_disable_log_dirty(struct domain *d) |
220 | 0 | { |
221 | 0 | paging_lock(d); |
222 | 0 | d->arch.paging.mode &= ~PG_log_dirty; |
223 | 0 | paging_unlock(d); |
224 | 0 |
|
225 | 0 | /* Disable hardware-assisted log-dirty if it is supported. */ |
226 | 0 | p2m_disable_hardware_log_dirty(d); |
227 | 0 |
|
228 | 0 | /* |
229 | 0 | * switch to normal mode, either by setting l1e entries of P2M table to |
230 | 0 | * normal mode, or via hardware-assisted log-dirty. |
231 | 0 | */ |
232 | 0 | p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw); |
233 | 0 | return 0; |
234 | 0 | } |
235 | | |
236 | | static void hap_clean_dirty_bitmap(struct domain *d) |
237 | 0 | { |
238 | 0 | /* |
239 | 0 | * Switch to log-dirty mode, either by setting l1e entries of P2M table to |
240 | 0 | * be read-only, or via hardware-assisted log-dirty. |
241 | 0 | */ |
242 | 0 | p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); |
243 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
244 | 0 | } |
245 | | |
246 | | /************************************************/ |
247 | | /* HAP SUPPORT FUNCTIONS */ |
248 | | /************************************************/ |
249 | | static struct page_info *hap_alloc(struct domain *d) |
250 | 1.29k | { |
251 | 1.29k | struct page_info *pg; |
252 | 1.29k | |
253 | 1.29k | ASSERT(paging_locked_by_me(d)); |
254 | 1.29k | |
255 | 1.29k | pg = page_list_remove_head(&d->arch.paging.hap.freelist); |
256 | 1.29k | if ( unlikely(!pg) ) |
257 | 0 | return NULL; |
258 | 1.29k | |
259 | 1.29k | d->arch.paging.hap.free_pages--; |
260 | 1.29k | |
261 | 1.29k | clear_domain_page(page_to_mfn(pg)); |
262 | 1.29k | |
263 | 1.29k | return pg; |
264 | 1.29k | } |
265 | | |
266 | | static void hap_free(struct domain *d, mfn_t mfn) |
267 | 0 | { |
268 | 0 | struct page_info *pg = mfn_to_page(mfn); |
269 | 0 |
|
270 | 0 | ASSERT(paging_locked_by_me(d)); |
271 | 0 |
|
272 | 0 | d->arch.paging.hap.free_pages++; |
273 | 0 | page_list_add_tail(pg, &d->arch.paging.hap.freelist); |
274 | 0 | } |
275 | | |
276 | | static struct page_info *hap_alloc_p2m_page(struct domain *d) |
277 | 1.28k | { |
278 | 1.28k | struct page_info *pg; |
279 | 1.28k | |
280 | 1.28k | /* This is called both from the p2m code (which never holds the |
281 | 1.28k | * paging lock) and the log-dirty code (which always does). */ |
282 | 1.28k | paging_lock_recursive(d); |
283 | 1.28k | pg = hap_alloc(d); |
284 | 1.28k | |
285 | 1.28k | if ( likely(pg != NULL) ) |
286 | 1.28k | { |
287 | 1.28k | d->arch.paging.hap.total_pages--; |
288 | 1.28k | d->arch.paging.hap.p2m_pages++; |
289 | 1.28k | page_set_owner(pg, d); |
290 | 1.28k | pg->count_info |= 1; |
291 | 1.28k | } |
292 | 0 | else if ( !d->arch.paging.p2m_alloc_failed ) |
293 | 0 | { |
294 | 0 | d->arch.paging.p2m_alloc_failed = 1; |
295 | 0 | dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n", |
296 | 0 | d->domain_id); |
297 | 0 | } |
298 | 1.28k | |
299 | 1.28k | paging_unlock(d); |
300 | 1.28k | return pg; |
301 | 1.28k | } |
302 | | |
303 | | static void hap_free_p2m_page(struct domain *d, struct page_info *pg) |
304 | 0 | { |
305 | 0 | /* This is called both from the p2m code (which never holds the |
306 | 0 | * paging lock) and the log-dirty code (which always does). */ |
307 | 0 | paging_lock_recursive(d); |
308 | 0 |
|
309 | 0 | ASSERT(page_get_owner(pg) == d); |
310 | 0 | /* Should have just the one ref we gave it in alloc_p2m_page() */ |
311 | 0 | if ( (pg->count_info & PGC_count_mask) != 1 ) { |
312 | 0 | HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n", |
313 | 0 | pg, pg->count_info, pg->u.inuse.type_info); |
314 | 0 | WARN(); |
315 | 0 | } |
316 | 0 | pg->count_info &= ~PGC_count_mask; |
317 | 0 | /* Free should not decrement domain's total allocation, since |
318 | 0 | * these pages were allocated without an owner. */ |
319 | 0 | page_set_owner(pg, NULL); |
320 | 0 | d->arch.paging.hap.p2m_pages--; |
321 | 0 | d->arch.paging.hap.total_pages++; |
322 | 0 | hap_free(d, page_to_mfn(pg)); |
323 | 0 |
|
324 | 0 | paging_unlock(d); |
325 | 0 | } |
326 | | |
327 | | /* Return the size of the pool, rounded up to the nearest MB */ |
328 | | static unsigned int |
329 | | hap_get_allocation(struct domain *d) |
330 | 0 | { |
331 | 0 | unsigned int pg = d->arch.paging.hap.total_pages |
332 | 0 | + d->arch.paging.hap.p2m_pages; |
333 | 0 |
|
334 | 0 | return ((pg >> (20 - PAGE_SHIFT)) |
335 | 0 | + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); |
336 | 0 | } |
337 | | |
338 | | /* Set the pool of pages to the required number of pages. |
339 | | * Returns 0 for success, non-zero for failure. */ |
340 | | int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted) |
341 | 34.5k | { |
342 | 34.5k | struct page_info *pg; |
343 | 34.5k | |
344 | 34.5k | ASSERT(paging_locked_by_me(d)); |
345 | 34.5k | |
346 | 34.5k | if ( pages < d->arch.paging.hap.p2m_pages ) |
347 | 0 | pages = 0; |
348 | 34.5k | else |
349 | 34.5k | pages -= d->arch.paging.hap.p2m_pages; |
350 | 34.5k | |
351 | 34.5k | for ( ; ; ) |
352 | 34.8k | { |
353 | 34.8k | if ( d->arch.paging.hap.total_pages < pages ) |
354 | 34.8k | { |
355 | 34.8k | /* Need to allocate more memory from domheap */ |
356 | 34.8k | pg = alloc_domheap_page(d, MEMF_no_owner); |
357 | 34.8k | if ( pg == NULL ) |
358 | 0 | { |
359 | 0 | HAP_PRINTK("failed to allocate hap pages.\n"); |
360 | 0 | return -ENOMEM; |
361 | 0 | } |
362 | 34.8k | d->arch.paging.hap.free_pages++; |
363 | 34.8k | d->arch.paging.hap.total_pages++; |
364 | 34.8k | page_list_add_tail(pg, &d->arch.paging.hap.freelist); |
365 | 34.8k | } |
366 | 2 | else if ( d->arch.paging.hap.total_pages > pages ) |
367 | 0 | { |
368 | 0 | /* Need to return memory to domheap */ |
369 | 0 | if ( page_list_empty(&d->arch.paging.hap.freelist) ) |
370 | 0 | { |
371 | 0 | HAP_PRINTK("failed to free enough hap pages.\n"); |
372 | 0 | return -ENOMEM; |
373 | 0 | } |
374 | 0 | pg = page_list_remove_head(&d->arch.paging.hap.freelist); |
375 | 0 | ASSERT(pg); |
376 | 0 | d->arch.paging.hap.free_pages--; |
377 | 0 | d->arch.paging.hap.total_pages--; |
378 | 0 | free_domheap_page(pg); |
379 | 0 | } |
380 | 2 | else |
381 | 2 | break; |
382 | 34.8k | |
383 | 34.8k | /* Check to see if we need to yield and try again */ |
384 | 34.8k | if ( preempted && general_preempt_check() ) |
385 | 34.5k | { |
386 | 34.5k | *preempted = true; |
387 | 34.5k | return 0; |
388 | 34.5k | } |
389 | 34.8k | } |
390 | 34.5k | |
391 | 2 | return 0; |
392 | 34.5k | } |
393 | | |
394 | | static mfn_t hap_make_monitor_table(struct vcpu *v) |
395 | 12 | { |
396 | 12 | struct domain *d = v->domain; |
397 | 12 | struct page_info *pg; |
398 | 12 | l4_pgentry_t *l4e; |
399 | 12 | mfn_t m4mfn; |
400 | 12 | |
401 | 12 | ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); |
402 | 12 | |
403 | 12 | if ( (pg = hap_alloc(d)) == NULL ) |
404 | 0 | goto oom; |
405 | 12 | |
406 | 12 | m4mfn = page_to_mfn(pg); |
407 | 12 | l4e = map_domain_page(m4mfn); |
408 | 12 | |
409 | 12 | init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); |
410 | 12 | unmap_domain_page(l4e); |
411 | 12 | |
412 | 12 | return m4mfn; |
413 | 12 | |
414 | 0 | oom: |
415 | 0 | HAP_ERROR("out of memory building monitor pagetable\n"); |
416 | 0 | domain_crash(d); |
417 | 0 | return INVALID_MFN; |
418 | 12 | } |
419 | | |
420 | | static void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn) |
421 | 0 | { |
422 | 0 | struct domain *d = v->domain; |
423 | 0 |
|
424 | 0 | /* Put the memory back in the pool */ |
425 | 0 | hap_free(d, mmfn); |
426 | 0 | } |
427 | | |
428 | | /************************************************/ |
429 | | /* HAP DOMAIN LEVEL FUNCTIONS */ |
430 | | /************************************************/ |
431 | | void hap_domain_init(struct domain *d) |
432 | 1 | { |
433 | 1 | static const struct log_dirty_ops hap_ops = { |
434 | 1 | .enable = hap_enable_log_dirty, |
435 | 1 | .disable = hap_disable_log_dirty, |
436 | 1 | .clean = hap_clean_dirty_bitmap, |
437 | 1 | }; |
438 | 1 | |
439 | 1 | INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist); |
440 | 1 | |
441 | 1 | /* Use HAP logdirty mechanism. */ |
442 | 1 | paging_log_dirty_init(d, &hap_ops); |
443 | 1 | } |
444 | | |
445 | | /* return 0 for success, -errno for failure */ |
446 | | int hap_enable(struct domain *d, u32 mode) |
447 | 1 | { |
448 | 1 | unsigned int old_pages; |
449 | 1 | unsigned int i; |
450 | 1 | int rv = 0; |
451 | 1 | |
452 | 1 | domain_pause(d); |
453 | 1 | |
454 | 1 | old_pages = d->arch.paging.hap.total_pages; |
455 | 1 | if ( old_pages == 0 ) |
456 | 1 | { |
457 | 1 | paging_lock(d); |
458 | 1 | rv = hap_set_allocation(d, 256, NULL); |
459 | 1 | if ( rv != 0 ) |
460 | 0 | { |
461 | 0 | hap_set_allocation(d, 0, NULL); |
462 | 0 | paging_unlock(d); |
463 | 0 | goto out; |
464 | 0 | } |
465 | 1 | paging_unlock(d); |
466 | 1 | } |
467 | 1 | |
468 | 1 | /* Allow p2m and log-dirty code to borrow our memory */ |
469 | 1 | d->arch.paging.alloc_page = hap_alloc_p2m_page; |
470 | 1 | d->arch.paging.free_page = hap_free_p2m_page; |
471 | 1 | |
472 | 1 | /* allocate P2m table */ |
473 | 1 | if ( mode & PG_translate ) |
474 | 1 | { |
475 | 1 | rv = p2m_alloc_table(p2m_get_hostp2m(d)); |
476 | 1 | if ( rv != 0 ) |
477 | 0 | goto out; |
478 | 1 | } |
479 | 1 | |
480 | 11 | for (i = 0; i < MAX_NESTEDP2M; i++) { |
481 | 10 | rv = p2m_alloc_table(d->arch.nested_p2m[i]); |
482 | 10 | if ( rv != 0 ) |
483 | 0 | goto out; |
484 | 10 | } |
485 | 1 | |
486 | 1 | if ( hvm_altp2m_supported() ) |
487 | 0 | { |
488 | 0 | /* Init alternate p2m data */ |
489 | 0 | if ( (d->arch.altp2m_eptp = alloc_xenheap_page()) == NULL ) |
490 | 0 | { |
491 | 0 | rv = -ENOMEM; |
492 | 0 | goto out; |
493 | 0 | } |
494 | 0 |
|
495 | 0 | for ( i = 0; i < MAX_EPTP; i++ ) |
496 | 0 | d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN); |
497 | 0 |
|
498 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
499 | 0 | { |
500 | 0 | rv = p2m_alloc_table(d->arch.altp2m_p2m[i]); |
501 | 0 | if ( rv != 0 ) |
502 | 0 | goto out; |
503 | 0 | } |
504 | 0 |
|
505 | 0 | d->arch.altp2m_active = 0; |
506 | 0 | } |
507 | 1 | |
508 | 1 | /* Now let other users see the new mode */ |
509 | 1 | d->arch.paging.mode = mode | PG_HAP_enable; |
510 | 1 | |
511 | 1 | out: |
512 | 1 | domain_unpause(d); |
513 | 1 | return rv; |
514 | 1 | } |
515 | | |
516 | | void hap_final_teardown(struct domain *d) |
517 | 0 | { |
518 | 0 | unsigned int i; |
519 | 0 |
|
520 | 0 | if ( hvm_altp2m_supported() ) |
521 | 0 | { |
522 | 0 | d->arch.altp2m_active = 0; |
523 | 0 |
|
524 | 0 | if ( d->arch.altp2m_eptp ) |
525 | 0 | { |
526 | 0 | free_xenheap_page(d->arch.altp2m_eptp); |
527 | 0 | d->arch.altp2m_eptp = NULL; |
528 | 0 | } |
529 | 0 |
|
530 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
531 | 0 | p2m_teardown(d->arch.altp2m_p2m[i]); |
532 | 0 | } |
533 | 0 |
|
534 | 0 | /* Destroy nestedp2m's first */ |
535 | 0 | for (i = 0; i < MAX_NESTEDP2M; i++) { |
536 | 0 | p2m_teardown(d->arch.nested_p2m[i]); |
537 | 0 | } |
538 | 0 |
|
539 | 0 | if ( d->arch.paging.hap.total_pages != 0 ) |
540 | 0 | hap_teardown(d, NULL); |
541 | 0 |
|
542 | 0 | p2m_teardown(p2m_get_hostp2m(d)); |
543 | 0 | /* Free any memory that the p2m teardown released */ |
544 | 0 | paging_lock(d); |
545 | 0 | hap_set_allocation(d, 0, NULL); |
546 | 0 | ASSERT(d->arch.paging.hap.p2m_pages == 0); |
547 | 0 | paging_unlock(d); |
548 | 0 | } |
549 | | |
550 | | void hap_teardown(struct domain *d, bool *preempted) |
551 | 0 | { |
552 | 0 | struct vcpu *v; |
553 | 0 | mfn_t mfn; |
554 | 0 |
|
555 | 0 | ASSERT(d->is_dying); |
556 | 0 | ASSERT(d != current->domain); |
557 | 0 |
|
558 | 0 | paging_lock(d); /* Keep various asserts happy */ |
559 | 0 |
|
560 | 0 | if ( paging_mode_enabled(d) ) |
561 | 0 | { |
562 | 0 | /* release the monitor table held by each vcpu */ |
563 | 0 | for_each_vcpu ( d, v ) |
564 | 0 | { |
565 | 0 | if ( paging_get_hostmode(v) && paging_mode_external(d) ) |
566 | 0 | { |
567 | 0 | mfn = pagetable_get_mfn(v->arch.monitor_table); |
568 | 0 | if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) |
569 | 0 | hap_destroy_monitor_table(v, mfn); |
570 | 0 | v->arch.monitor_table = pagetable_null(); |
571 | 0 | } |
572 | 0 | } |
573 | 0 | } |
574 | 0 |
|
575 | 0 | if ( d->arch.paging.hap.total_pages != 0 ) |
576 | 0 | { |
577 | 0 | hap_set_allocation(d, 0, preempted); |
578 | 0 |
|
579 | 0 | if ( preempted && *preempted ) |
580 | 0 | goto out; |
581 | 0 |
|
582 | 0 | ASSERT(d->arch.paging.hap.total_pages == 0); |
583 | 0 | } |
584 | 0 |
|
585 | 0 | d->arch.paging.mode &= ~PG_log_dirty; |
586 | 0 |
|
587 | 0 | xfree(d->arch.hvm_domain.dirty_vram); |
588 | 0 | d->arch.hvm_domain.dirty_vram = NULL; |
589 | 0 |
|
590 | 0 | out: |
591 | 0 | paging_unlock(d); |
592 | 0 | } |
593 | | |
594 | | int hap_domctl(struct domain *d, struct xen_domctl_shadow_op *sc, |
595 | | XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) |
596 | 0 | { |
597 | 0 | int rc; |
598 | 0 | bool preempted = false; |
599 | 0 |
|
600 | 0 | switch ( sc->op ) |
601 | 0 | { |
602 | 0 | case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: |
603 | 0 | paging_lock(d); |
604 | 0 | rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); |
605 | 0 | paging_unlock(d); |
606 | 0 | if ( preempted ) |
607 | 0 | /* Not finished. Set up to re-run the call. */ |
608 | 0 | rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", |
609 | 0 | u_domctl); |
610 | 0 | else |
611 | 0 | /* Finished. Return the new allocation */ |
612 | 0 | sc->mb = hap_get_allocation(d); |
613 | 0 | return rc; |
614 | 0 | case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: |
615 | 0 | sc->mb = hap_get_allocation(d); |
616 | 0 | /* Fall through... */ |
617 | 0 | case XEN_DOMCTL_SHADOW_OP_OFF: |
618 | 0 | return 0; |
619 | 0 | default: |
620 | 0 | HAP_PRINTK("Bad hap domctl op %u\n", sc->op); |
621 | 0 | return -EINVAL; |
622 | 0 | } |
623 | 0 | } |
624 | | |
625 | | static const struct paging_mode hap_paging_real_mode; |
626 | | static const struct paging_mode hap_paging_protected_mode; |
627 | | static const struct paging_mode hap_paging_pae_mode; |
628 | | static const struct paging_mode hap_paging_long_mode; |
629 | | |
630 | | void hap_vcpu_init(struct vcpu *v) |
631 | 12 | { |
632 | 12 | v->arch.paging.mode = &hap_paging_real_mode; |
633 | 12 | v->arch.paging.nestedmode = &hap_paging_real_mode; |
634 | 12 | } |
635 | | |
636 | | /************************************************/ |
637 | | /* HAP PAGING MODE FUNCTIONS */ |
638 | | /************************************************/ |
639 | | /* |
640 | | * HAP guests can handle page faults (in the guest page tables) without |
641 | | * needing any action from Xen, so we should not be intercepting them. |
642 | | */ |
643 | | static int hap_page_fault(struct vcpu *v, unsigned long va, |
644 | | struct cpu_user_regs *regs) |
645 | 0 | { |
646 | 0 | struct domain *d = v->domain; |
647 | 0 |
|
648 | 0 | HAP_ERROR("Intercepted a guest #PF (%pv) with HAP enabled\n", v); |
649 | 0 | domain_crash(d); |
650 | 0 | return 0; |
651 | 0 | } |
652 | | |
653 | | /* |
654 | | * HAP guests can handle invlpg without needing any action from Xen, so |
655 | | * should not be intercepting it. However, we need to correctly handle |
656 | | * getting here from instruction emulation. |
657 | | */ |
658 | | static bool_t hap_invlpg(struct vcpu *v, unsigned long va) |
659 | 0 | { |
660 | 0 | /* |
661 | 0 | * Emulate INVLPGA: |
662 | 0 | * Must perform the flush right now or an other vcpu may |
663 | 0 | * use it when we use the next VMRUN emulation, otherwise. |
664 | 0 | */ |
665 | 0 | if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m ) |
666 | 0 | p2m_flush(v, vcpu_nestedhvm(v).nv_p2m); |
667 | 0 |
|
668 | 0 | return 1; |
669 | 0 | } |
670 | | |
671 | | static void hap_update_cr3(struct vcpu *v, int do_locking) |
672 | 98 | { |
673 | 98 | v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3]; |
674 | 98 | hvm_update_guest_cr(v, 3); |
675 | 98 | } |
676 | | |
677 | | const struct paging_mode * |
678 | | hap_paging_get_mode(struct vcpu *v) |
679 | 98 | { |
680 | 98 | return (!hvm_paging_enabled(v) ? &hap_paging_real_mode : |
681 | 62 | hvm_long_mode_active(v) ? &hap_paging_long_mode : |
682 | 0 | hvm_pae_enabled(v) ? &hap_paging_pae_mode : |
683 | 0 | &hap_paging_protected_mode); |
684 | 98 | } |
685 | | |
686 | | static void hap_update_paging_modes(struct vcpu *v) |
687 | 97 | { |
688 | 97 | struct domain *d = v->domain; |
689 | 97 | unsigned long cr3_gfn = v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT; |
690 | 97 | p2m_type_t t; |
691 | 97 | |
692 | 97 | /* We hold onto the cr3 as it may be modified later, and |
693 | 97 | * we need to respect lock ordering. No need for |
694 | 97 | * checks here as they are performed by vmx_load_pdptrs |
695 | 97 | * (the potential user of the cr3) */ |
696 | 97 | (void)get_gfn(d, cr3_gfn, &t); |
697 | 97 | paging_lock(d); |
698 | 97 | |
699 | 97 | v->arch.paging.mode = hap_paging_get_mode(v); |
700 | 97 | |
701 | 97 | if ( pagetable_is_null(v->arch.monitor_table) ) |
702 | 12 | { |
703 | 12 | mfn_t mmfn = hap_make_monitor_table(v); |
704 | 12 | v->arch.monitor_table = pagetable_from_mfn(mmfn); |
705 | 12 | make_cr3(v, mmfn); |
706 | 12 | hvm_update_host_cr3(v); |
707 | 12 | } |
708 | 97 | |
709 | 97 | /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */ |
710 | 97 | hap_update_cr3(v, 0); |
711 | 97 | |
712 | 97 | paging_unlock(d); |
713 | 97 | put_gfn(d, cr3_gfn); |
714 | 97 | } |
715 | | |
716 | | static void |
717 | | hap_write_p2m_entry(struct domain *d, unsigned long gfn, l1_pgentry_t *p, |
718 | | l1_pgentry_t new, unsigned int level) |
719 | 0 | { |
720 | 0 | uint32_t old_flags; |
721 | 0 | bool_t flush_nestedp2m = 0; |
722 | 0 |
|
723 | 0 | /* We know always use the host p2m here, regardless if the vcpu |
724 | 0 | * is in host or guest mode. The vcpu can be in guest mode by |
725 | 0 | * a hypercall which passes a domain and chooses mostly the first |
726 | 0 | * vcpu. */ |
727 | 0 |
|
728 | 0 | paging_lock(d); |
729 | 0 | old_flags = l1e_get_flags(*p); |
730 | 0 |
|
731 | 0 | if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) |
732 | 0 | && !p2m_get_hostp2m(d)->defer_nested_flush ) { |
733 | 0 | /* We are replacing a valid entry so we need to flush nested p2ms, |
734 | 0 | * unless the only change is an increase in access rights. */ |
735 | 0 | mfn_t omfn = l1e_get_mfn(*p); |
736 | 0 | mfn_t nmfn = l1e_get_mfn(new); |
737 | 0 | flush_nestedp2m = !( mfn_x(omfn) == mfn_x(nmfn) |
738 | 0 | && perms_strictly_increased(old_flags, l1e_get_flags(new)) ); |
739 | 0 | } |
740 | 0 |
|
741 | 0 | safe_write_pte(p, new); |
742 | 0 | if ( old_flags & _PAGE_PRESENT ) |
743 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
744 | 0 |
|
745 | 0 | paging_unlock(d); |
746 | 0 |
|
747 | 0 | if ( flush_nestedp2m ) |
748 | 0 | p2m_flush_nestedp2m(d); |
749 | 0 | } |
750 | | |
751 | | static unsigned long hap_gva_to_gfn_real_mode( |
752 | | struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec) |
753 | 0 | { |
754 | 0 | return ((paddr_t)gva >> PAGE_SHIFT); |
755 | 0 | } |
756 | | |
757 | | static unsigned long hap_p2m_ga_to_gfn_real_mode( |
758 | | struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, |
759 | | paddr_t ga, uint32_t *pfec, unsigned int *page_order) |
760 | 0 | { |
761 | 0 | if ( page_order ) |
762 | 0 | *page_order = PAGE_ORDER_4K; |
763 | 0 | return (ga >> PAGE_SHIFT); |
764 | 0 | } |
765 | | |
766 | | /* Entry points into this mode of the hap code. */ |
767 | | static const struct paging_mode hap_paging_real_mode = { |
768 | | .page_fault = hap_page_fault, |
769 | | .invlpg = hap_invlpg, |
770 | | .gva_to_gfn = hap_gva_to_gfn_real_mode, |
771 | | .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_real_mode, |
772 | | .update_cr3 = hap_update_cr3, |
773 | | .update_paging_modes = hap_update_paging_modes, |
774 | | .write_p2m_entry = hap_write_p2m_entry, |
775 | | .guest_levels = 1 |
776 | | }; |
777 | | |
778 | | static const struct paging_mode hap_paging_protected_mode = { |
779 | | .page_fault = hap_page_fault, |
780 | | .invlpg = hap_invlpg, |
781 | | .gva_to_gfn = hap_gva_to_gfn_2_levels, |
782 | | .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_2_levels, |
783 | | .update_cr3 = hap_update_cr3, |
784 | | .update_paging_modes = hap_update_paging_modes, |
785 | | .write_p2m_entry = hap_write_p2m_entry, |
786 | | .guest_levels = 2 |
787 | | }; |
788 | | |
789 | | static const struct paging_mode hap_paging_pae_mode = { |
790 | | .page_fault = hap_page_fault, |
791 | | .invlpg = hap_invlpg, |
792 | | .gva_to_gfn = hap_gva_to_gfn_3_levels, |
793 | | .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_3_levels, |
794 | | .update_cr3 = hap_update_cr3, |
795 | | .update_paging_modes = hap_update_paging_modes, |
796 | | .write_p2m_entry = hap_write_p2m_entry, |
797 | | .guest_levels = 3 |
798 | | }; |
799 | | |
800 | | static const struct paging_mode hap_paging_long_mode = { |
801 | | .page_fault = hap_page_fault, |
802 | | .invlpg = hap_invlpg, |
803 | | .gva_to_gfn = hap_gva_to_gfn_4_levels, |
804 | | .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_4_levels, |
805 | | .update_cr3 = hap_update_cr3, |
806 | | .update_paging_modes = hap_update_paging_modes, |
807 | | .write_p2m_entry = hap_write_p2m_entry, |
808 | | .guest_levels = 4 |
809 | | }; |
810 | | |
811 | | /* |
812 | | * Local variables: |
813 | | * mode: C |
814 | | * c-file-style: "BSD" |
815 | | * c-basic-offset: 4 |
816 | | * indent-tabs-mode: nil |
817 | | * End: |
818 | | */ |