/root/src/xen/xen/arch/x86/mm/paging.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/paging.c |
3 | | * |
4 | | * x86 specific paging support |
5 | | * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) |
6 | | * Copyright (c) 2007 XenSource Inc. |
7 | | * |
8 | | * This program is free software; you can redistribute it and/or modify |
9 | | * it under the terms of the GNU General Public License as published by |
10 | | * the Free Software Foundation; either version 2 of the License, or |
11 | | * (at your option) any later version. |
12 | | * |
13 | | * This program is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | * GNU General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU General Public License |
19 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
20 | | */ |
21 | | |
22 | | #include <xen/init.h> |
23 | | #include <xen/guest_access.h> |
24 | | #include <asm/paging.h> |
25 | | #include <asm/shadow.h> |
26 | | #include <asm/p2m.h> |
27 | | #include <asm/hap.h> |
28 | | #include <asm/event.h> |
29 | | #include <asm/hvm/nestedhvm.h> |
30 | | #include <xen/numa.h> |
31 | | #include <xsm/xsm.h> |
32 | | #include <public/sched.h> /* SHUTDOWN_suspend */ |
33 | | |
34 | | #include "mm-locks.h" |
35 | | |
36 | | /* Printouts */ |
37 | | #define PAGING_PRINTK(_f, _a...) \ |
38 | | debugtrace_printk("pg: %s(): " _f, __func__, ##_a) |
39 | | #define PAGING_ERROR(_f, _a...) \ |
40 | | printk("pg error: %s(): " _f, __func__, ##_a) |
41 | | #define PAGING_DEBUG(flag, _f, _a...) \ |
42 | 0 | do { \ |
43 | 0 | if (PAGING_DEBUG_ ## flag) \ |
44 | 0 | debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \ |
45 | 0 | } while (0) |
46 | | |
47 | | /* Per-CPU variable for enforcing the lock ordering */ |
48 | | DEFINE_PER_CPU(int, mm_lock_level); |
49 | | |
50 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
51 | | #undef mfn_to_page |
52 | 0 | #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) |
53 | | #undef page_to_mfn |
54 | 0 | #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) |
55 | | |
56 | | /************************************************/ |
57 | | /* LOG DIRTY SUPPORT */ |
58 | | /************************************************/ |
59 | | |
60 | | static mfn_t paging_new_log_dirty_page(struct domain *d) |
61 | 0 | { |
62 | 0 | struct page_info *page; |
63 | 0 |
|
64 | 0 | page = d->arch.paging.alloc_page(d); |
65 | 0 | if ( unlikely(page == NULL) ) |
66 | 0 | { |
67 | 0 | d->arch.paging.log_dirty.failed_allocs++; |
68 | 0 | return INVALID_MFN; |
69 | 0 | } |
70 | 0 |
|
71 | 0 | d->arch.paging.log_dirty.allocs++; |
72 | 0 |
|
73 | 0 | return page_to_mfn(page); |
74 | 0 | } |
75 | | |
76 | | /* Alloc and init a new leaf node */ |
77 | | static mfn_t paging_new_log_dirty_leaf(struct domain *d) |
78 | 0 | { |
79 | 0 | mfn_t mfn = paging_new_log_dirty_page(d); |
80 | 0 |
|
81 | 0 | if ( mfn_valid(mfn) ) |
82 | 0 | clear_domain_page(mfn); |
83 | 0 |
|
84 | 0 | return mfn; |
85 | 0 | } |
86 | | |
87 | | /* Alloc and init a new non-leaf node */ |
88 | | static mfn_t paging_new_log_dirty_node(struct domain *d) |
89 | 0 | { |
90 | 0 | mfn_t mfn = paging_new_log_dirty_page(d); |
91 | 0 | if ( mfn_valid(mfn) ) |
92 | 0 | { |
93 | 0 | int i; |
94 | 0 | mfn_t *node = map_domain_page(mfn); |
95 | 0 | for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ ) |
96 | 0 | node[i] = INVALID_MFN; |
97 | 0 | unmap_domain_page(node); |
98 | 0 | } |
99 | 0 | return mfn; |
100 | 0 | } |
101 | | |
102 | | /* get the top of the log-dirty bitmap trie */ |
103 | | static mfn_t *paging_map_log_dirty_bitmap(struct domain *d) |
104 | 0 | { |
105 | 0 | if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) ) |
106 | 0 | return map_domain_page(d->arch.paging.log_dirty.top); |
107 | 0 | return NULL; |
108 | 0 | } |
109 | | |
110 | | static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn) |
111 | 0 | { |
112 | 0 | d->arch.paging.log_dirty.allocs--; |
113 | 0 | d->arch.paging.free_page(d, mfn_to_page(mfn)); |
114 | 0 | } |
115 | | |
116 | | static int paging_free_log_dirty_bitmap(struct domain *d, int rc) |
117 | 0 | { |
118 | 0 | mfn_t *l4, *l3, *l2; |
119 | 0 | int i4, i3, i2; |
120 | 0 |
|
121 | 0 | paging_lock(d); |
122 | 0 |
|
123 | 0 | if ( !mfn_valid(d->arch.paging.log_dirty.top) ) |
124 | 0 | { |
125 | 0 | paging_unlock(d); |
126 | 0 | return 0; |
127 | 0 | } |
128 | 0 |
|
129 | 0 | if ( !d->arch.paging.preempt.dom ) |
130 | 0 | { |
131 | 0 | memset(&d->arch.paging.preempt.log_dirty, 0, |
132 | 0 | sizeof(d->arch.paging.preempt.log_dirty)); |
133 | 0 | ASSERT(rc <= 0); |
134 | 0 | d->arch.paging.preempt.log_dirty.done = -rc; |
135 | 0 | } |
136 | 0 | else if ( d->arch.paging.preempt.dom != current->domain || |
137 | 0 | d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF ) |
138 | 0 | { |
139 | 0 | paging_unlock(d); |
140 | 0 | return -EBUSY; |
141 | 0 | } |
142 | 0 |
|
143 | 0 | l4 = map_domain_page(d->arch.paging.log_dirty.top); |
144 | 0 | i4 = d->arch.paging.preempt.log_dirty.i4; |
145 | 0 | i3 = d->arch.paging.preempt.log_dirty.i3; |
146 | 0 | rc = 0; |
147 | 0 |
|
148 | 0 | for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 ) |
149 | 0 | { |
150 | 0 | if ( !mfn_valid(l4[i4]) ) |
151 | 0 | continue; |
152 | 0 |
|
153 | 0 | l3 = map_domain_page(l4[i4]); |
154 | 0 |
|
155 | 0 | for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ ) |
156 | 0 | { |
157 | 0 | if ( !mfn_valid(l3[i3]) ) |
158 | 0 | continue; |
159 | 0 |
|
160 | 0 | l2 = map_domain_page(l3[i3]); |
161 | 0 |
|
162 | 0 | for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ ) |
163 | 0 | if ( mfn_valid(l2[i2]) ) |
164 | 0 | paging_free_log_dirty_page(d, l2[i2]); |
165 | 0 |
|
166 | 0 | unmap_domain_page(l2); |
167 | 0 | paging_free_log_dirty_page(d, l3[i3]); |
168 | 0 | l3[i3] = INVALID_MFN; |
169 | 0 |
|
170 | 0 | if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) |
171 | 0 | { |
172 | 0 | d->arch.paging.preempt.log_dirty.i3 = i3 + 1; |
173 | 0 | d->arch.paging.preempt.log_dirty.i4 = i4; |
174 | 0 | rc = -ERESTART; |
175 | 0 | break; |
176 | 0 | } |
177 | 0 | } |
178 | 0 |
|
179 | 0 | unmap_domain_page(l3); |
180 | 0 | if ( rc ) |
181 | 0 | break; |
182 | 0 | paging_free_log_dirty_page(d, l4[i4]); |
183 | 0 | l4[i4] = INVALID_MFN; |
184 | 0 |
|
185 | 0 | if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) |
186 | 0 | { |
187 | 0 | d->arch.paging.preempt.log_dirty.i3 = 0; |
188 | 0 | d->arch.paging.preempt.log_dirty.i4 = i4 + 1; |
189 | 0 | rc = -ERESTART; |
190 | 0 | break; |
191 | 0 | } |
192 | 0 | } |
193 | 0 |
|
194 | 0 | unmap_domain_page(l4); |
195 | 0 |
|
196 | 0 | if ( !rc ) |
197 | 0 | { |
198 | 0 | paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top); |
199 | 0 | d->arch.paging.log_dirty.top = INVALID_MFN; |
200 | 0 |
|
201 | 0 | ASSERT(d->arch.paging.log_dirty.allocs == 0); |
202 | 0 | d->arch.paging.log_dirty.failed_allocs = 0; |
203 | 0 |
|
204 | 0 | rc = -d->arch.paging.preempt.log_dirty.done; |
205 | 0 | d->arch.paging.preempt.dom = NULL; |
206 | 0 | } |
207 | 0 | else |
208 | 0 | { |
209 | 0 | d->arch.paging.preempt.dom = current->domain; |
210 | 0 | d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF; |
211 | 0 | } |
212 | 0 |
|
213 | 0 | paging_unlock(d); |
214 | 0 |
|
215 | 0 | return rc; |
216 | 0 | } |
217 | | |
218 | | int paging_log_dirty_enable(struct domain *d, bool_t log_global) |
219 | 0 | { |
220 | 0 | int ret; |
221 | 0 |
|
222 | 0 | if ( need_iommu(d) && log_global ) |
223 | 0 | { |
224 | 0 | /* |
225 | 0 | * Refuse to turn on global log-dirty mode |
226 | 0 | * if the domain is using the IOMMU. |
227 | 0 | */ |
228 | 0 | return -EINVAL; |
229 | 0 | } |
230 | 0 |
|
231 | 0 | if ( paging_mode_log_dirty(d) ) |
232 | 0 | return -EINVAL; |
233 | 0 |
|
234 | 0 | domain_pause(d); |
235 | 0 | ret = d->arch.paging.log_dirty.ops->enable(d, log_global); |
236 | 0 | domain_unpause(d); |
237 | 0 |
|
238 | 0 | return ret; |
239 | 0 | } |
240 | | |
241 | | static int paging_log_dirty_disable(struct domain *d, bool_t resuming) |
242 | 0 | { |
243 | 0 | int ret = 1; |
244 | 0 |
|
245 | 0 | if ( !resuming ) |
246 | 0 | { |
247 | 0 | domain_pause(d); |
248 | 0 | /* Safe because the domain is paused. */ |
249 | 0 | if ( paging_mode_log_dirty(d) ) |
250 | 0 | { |
251 | 0 | ret = d->arch.paging.log_dirty.ops->disable(d); |
252 | 0 | ASSERT(ret <= 0); |
253 | 0 | } |
254 | 0 | } |
255 | 0 |
|
256 | 0 | ret = paging_free_log_dirty_bitmap(d, ret); |
257 | 0 | if ( ret == -ERESTART ) |
258 | 0 | return ret; |
259 | 0 |
|
260 | 0 | domain_unpause(d); |
261 | 0 |
|
262 | 0 | return ret; |
263 | 0 | } |
264 | | |
265 | | /* Mark a page as dirty, with taking guest pfn as parameter */ |
266 | | void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn) |
267 | 0 | { |
268 | 0 | bool changed; |
269 | 0 | mfn_t mfn, *l4, *l3, *l2; |
270 | 0 | unsigned long *l1; |
271 | 0 | unsigned int i1, i2, i3, i4; |
272 | 0 |
|
273 | 0 | if ( !paging_mode_log_dirty(d) ) |
274 | 0 | return; |
275 | 0 |
|
276 | 0 | /* Shared MFNs should NEVER be marked dirty */ |
277 | 0 | BUG_ON(SHARED_M2P(pfn_x(pfn))); |
278 | 0 |
|
279 | 0 | /* |
280 | 0 | * Values with the MSB set denote MFNs that aren't really part of the |
281 | 0 | * domain's pseudo-physical memory map (e.g., the shared info frame). |
282 | 0 | * Nothing to do here... |
283 | 0 | */ |
284 | 0 | if ( unlikely(!VALID_M2P(pfn_x(pfn))) ) |
285 | 0 | return; |
286 | 0 |
|
287 | 0 | i1 = L1_LOGDIRTY_IDX(pfn); |
288 | 0 | i2 = L2_LOGDIRTY_IDX(pfn); |
289 | 0 | i3 = L3_LOGDIRTY_IDX(pfn); |
290 | 0 | i4 = L4_LOGDIRTY_IDX(pfn); |
291 | 0 |
|
292 | 0 | /* Recursive: this is called from inside the shadow code */ |
293 | 0 | paging_lock_recursive(d); |
294 | 0 |
|
295 | 0 | if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) |
296 | 0 | { |
297 | 0 | d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d); |
298 | 0 | if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) |
299 | 0 | goto out; |
300 | 0 | } |
301 | 0 |
|
302 | 0 | l4 = paging_map_log_dirty_bitmap(d); |
303 | 0 | mfn = l4[i4]; |
304 | 0 | if ( !mfn_valid(mfn) ) |
305 | 0 | l4[i4] = mfn = paging_new_log_dirty_node(d); |
306 | 0 | unmap_domain_page(l4); |
307 | 0 | if ( !mfn_valid(mfn) ) |
308 | 0 | goto out; |
309 | 0 |
|
310 | 0 | l3 = map_domain_page(mfn); |
311 | 0 | mfn = l3[i3]; |
312 | 0 | if ( !mfn_valid(mfn) ) |
313 | 0 | l3[i3] = mfn = paging_new_log_dirty_node(d); |
314 | 0 | unmap_domain_page(l3); |
315 | 0 | if ( !mfn_valid(mfn) ) |
316 | 0 | goto out; |
317 | 0 |
|
318 | 0 | l2 = map_domain_page(mfn); |
319 | 0 | mfn = l2[i2]; |
320 | 0 | if ( !mfn_valid(mfn) ) |
321 | 0 | l2[i2] = mfn = paging_new_log_dirty_leaf(d); |
322 | 0 | unmap_domain_page(l2); |
323 | 0 | if ( !mfn_valid(mfn) ) |
324 | 0 | goto out; |
325 | 0 |
|
326 | 0 | l1 = map_domain_page(mfn); |
327 | 0 | changed = !__test_and_set_bit(i1, l1); |
328 | 0 | unmap_domain_page(l1); |
329 | 0 | if ( changed ) |
330 | 0 | { |
331 | 0 | PAGING_DEBUG(LOGDIRTY, |
332 | 0 | "d%d: marked mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n", |
333 | 0 | d->domain_id, mfn_x(mfn), pfn_x(pfn)); |
334 | 0 | d->arch.paging.log_dirty.dirty_count++; |
335 | 0 | } |
336 | 0 |
|
337 | 0 | out: |
338 | 0 | /* We've already recorded any failed allocations */ |
339 | 0 | paging_unlock(d); |
340 | 0 | return; |
341 | 0 | } |
342 | | |
343 | | /* Mark a page as dirty */ |
344 | | void paging_mark_dirty(struct domain *d, mfn_t gmfn) |
345 | 9.13k | { |
346 | 9.13k | pfn_t pfn; |
347 | 9.13k | |
348 | 9.13k | if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) || |
349 | 0 | page_get_owner(mfn_to_page(gmfn)) != d ) |
350 | 9.13k | return; |
351 | 9.13k | |
352 | 9.13k | /* We /really/ mean PFN here, even for non-translated guests. */ |
353 | 0 | pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn))); |
354 | 0 |
|
355 | 0 | paging_mark_pfn_dirty(d, pfn); |
356 | 0 | } |
357 | | |
358 | | |
359 | | /* Is this guest page dirty? */ |
360 | | int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn) |
361 | 0 | { |
362 | 0 | pfn_t pfn; |
363 | 0 | mfn_t mfn, *l4, *l3, *l2; |
364 | 0 | unsigned long *l1; |
365 | 0 | int rv; |
366 | 0 |
|
367 | 0 | ASSERT(paging_locked_by_me(d)); |
368 | 0 | ASSERT(paging_mode_log_dirty(d)); |
369 | 0 |
|
370 | 0 | /* We /really/ mean PFN here, even for non-translated guests. */ |
371 | 0 | pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn))); |
372 | 0 | /* Shared pages are always read-only; invalid pages can't be dirty. */ |
373 | 0 | if ( unlikely(SHARED_M2P(pfn_x(pfn)) || !VALID_M2P(pfn_x(pfn))) ) |
374 | 0 | return 0; |
375 | 0 |
|
376 | 0 | mfn = d->arch.paging.log_dirty.top; |
377 | 0 | if ( !mfn_valid(mfn) ) |
378 | 0 | return 0; |
379 | 0 |
|
380 | 0 | l4 = map_domain_page(mfn); |
381 | 0 | mfn = l4[L4_LOGDIRTY_IDX(pfn)]; |
382 | 0 | unmap_domain_page(l4); |
383 | 0 | if ( !mfn_valid(mfn) ) |
384 | 0 | return 0; |
385 | 0 |
|
386 | 0 | l3 = map_domain_page(mfn); |
387 | 0 | mfn = l3[L3_LOGDIRTY_IDX(pfn)]; |
388 | 0 | unmap_domain_page(l3); |
389 | 0 | if ( !mfn_valid(mfn) ) |
390 | 0 | return 0; |
391 | 0 |
|
392 | 0 | l2 = map_domain_page(mfn); |
393 | 0 | mfn = l2[L2_LOGDIRTY_IDX(pfn)]; |
394 | 0 | unmap_domain_page(l2); |
395 | 0 | if ( !mfn_valid(mfn) ) |
396 | 0 | return 0; |
397 | 0 |
|
398 | 0 | l1 = map_domain_page(mfn); |
399 | 0 | rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1); |
400 | 0 | unmap_domain_page(l1); |
401 | 0 | return rv; |
402 | 0 | } |
403 | | |
404 | | |
405 | | /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN, |
406 | | * clear the bitmap and stats as well. */ |
407 | | static int paging_log_dirty_op(struct domain *d, |
408 | | struct xen_domctl_shadow_op *sc, |
409 | | bool_t resuming) |
410 | 0 | { |
411 | 0 | int rv = 0, clean = 0, peek = 1; |
412 | 0 | unsigned long pages = 0; |
413 | 0 | mfn_t *l4 = NULL, *l3 = NULL, *l2 = NULL; |
414 | 0 | unsigned long *l1 = NULL; |
415 | 0 | int i4, i3, i2; |
416 | 0 |
|
417 | 0 | if ( !resuming ) |
418 | 0 | { |
419 | 0 | /* |
420 | 0 | * Mark dirty all currently write-mapped pages on e.g. the |
421 | 0 | * final iteration of a save operation. |
422 | 0 | */ |
423 | 0 | if ( is_hvm_domain(d) && |
424 | 0 | (sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL) ) |
425 | 0 | hvm_mapped_guest_frames_mark_dirty(d); |
426 | 0 |
|
427 | 0 | domain_pause(d); |
428 | 0 |
|
429 | 0 | /* |
430 | 0 | * Flush dirty GFNs potentially cached by hardware. Only need to flush |
431 | 0 | * when not resuming, as domain was paused in resuming case therefore |
432 | 0 | * it's not possible to have any new dirty pages. |
433 | 0 | */ |
434 | 0 | p2m_flush_hardware_cached_dirty(d); |
435 | 0 | } |
436 | 0 |
|
437 | 0 | paging_lock(d); |
438 | 0 |
|
439 | 0 | if ( !d->arch.paging.preempt.dom ) |
440 | 0 | memset(&d->arch.paging.preempt.log_dirty, 0, |
441 | 0 | sizeof(d->arch.paging.preempt.log_dirty)); |
442 | 0 | else if ( d->arch.paging.preempt.dom != current->domain || |
443 | 0 | d->arch.paging.preempt.op != sc->op ) |
444 | 0 | { |
445 | 0 | paging_unlock(d); |
446 | 0 | ASSERT(!resuming); |
447 | 0 | domain_unpause(d); |
448 | 0 | return -EBUSY; |
449 | 0 | } |
450 | 0 |
|
451 | 0 | clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN); |
452 | 0 |
|
453 | 0 | PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", |
454 | 0 | (clean) ? "clean" : "peek", |
455 | 0 | d->domain_id, |
456 | 0 | d->arch.paging.log_dirty.fault_count, |
457 | 0 | d->arch.paging.log_dirty.dirty_count); |
458 | 0 |
|
459 | 0 | sc->stats.fault_count = d->arch.paging.log_dirty.fault_count; |
460 | 0 | sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count; |
461 | 0 |
|
462 | 0 | if ( guest_handle_is_null(sc->dirty_bitmap) ) |
463 | 0 | /* caller may have wanted just to clean the state or access stats. */ |
464 | 0 | peek = 0; |
465 | 0 |
|
466 | 0 | if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) { |
467 | 0 | printk(XENLOG_WARNING |
468 | 0 | "%u failed page allocs while logging dirty pages of d%d\n", |
469 | 0 | d->arch.paging.log_dirty.failed_allocs, d->domain_id); |
470 | 0 | rv = -ENOMEM; |
471 | 0 | goto out; |
472 | 0 | } |
473 | 0 |
|
474 | 0 | l4 = paging_map_log_dirty_bitmap(d); |
475 | 0 | i4 = d->arch.paging.preempt.log_dirty.i4; |
476 | 0 | i3 = d->arch.paging.preempt.log_dirty.i3; |
477 | 0 | pages = d->arch.paging.preempt.log_dirty.done; |
478 | 0 |
|
479 | 0 | for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 ) |
480 | 0 | { |
481 | 0 | l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(l4[i4]) : NULL; |
482 | 0 | for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ ) |
483 | 0 | { |
484 | 0 | l2 = ((l3 && mfn_valid(l3[i3])) ? |
485 | 0 | map_domain_page(l3[i3]) : NULL); |
486 | 0 | for ( i2 = 0; |
487 | 0 | (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES); |
488 | 0 | i2++ ) |
489 | 0 | { |
490 | 0 | unsigned int bytes = PAGE_SIZE; |
491 | 0 | l1 = ((l2 && mfn_valid(l2[i2])) ? |
492 | 0 | map_domain_page(l2[i2]) : NULL); |
493 | 0 | if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) ) |
494 | 0 | bytes = (unsigned int)((sc->pages - pages + 7) >> 3); |
495 | 0 | if ( likely(peek) ) |
496 | 0 | { |
497 | 0 | if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap, |
498 | 0 | pages >> 3, (uint8_t *)l1, |
499 | 0 | bytes) |
500 | 0 | : clear_guest_offset(sc->dirty_bitmap, |
501 | 0 | pages >> 3, bytes)) != 0 ) |
502 | 0 | { |
503 | 0 | rv = -EFAULT; |
504 | 0 | goto out; |
505 | 0 | } |
506 | 0 | } |
507 | 0 | pages += bytes << 3; |
508 | 0 | if ( l1 ) |
509 | 0 | { |
510 | 0 | if ( clean ) |
511 | 0 | clear_page(l1); |
512 | 0 | unmap_domain_page(l1); |
513 | 0 | } |
514 | 0 | } |
515 | 0 | if ( l2 ) |
516 | 0 | unmap_domain_page(l2); |
517 | 0 |
|
518 | 0 | if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() ) |
519 | 0 | { |
520 | 0 | d->arch.paging.preempt.log_dirty.i4 = i4; |
521 | 0 | d->arch.paging.preempt.log_dirty.i3 = i3 + 1; |
522 | 0 | rv = -ERESTART; |
523 | 0 | break; |
524 | 0 | } |
525 | 0 | } |
526 | 0 | if ( l3 ) |
527 | 0 | unmap_domain_page(l3); |
528 | 0 |
|
529 | 0 | if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 && |
530 | 0 | hypercall_preempt_check() ) |
531 | 0 | { |
532 | 0 | d->arch.paging.preempt.log_dirty.i4 = i4 + 1; |
533 | 0 | d->arch.paging.preempt.log_dirty.i3 = 0; |
534 | 0 | rv = -ERESTART; |
535 | 0 | } |
536 | 0 | if ( rv ) |
537 | 0 | break; |
538 | 0 | } |
539 | 0 | if ( l4 ) |
540 | 0 | unmap_domain_page(l4); |
541 | 0 |
|
542 | 0 | if ( !rv ) |
543 | 0 | { |
544 | 0 | d->arch.paging.preempt.dom = NULL; |
545 | 0 | if ( clean ) |
546 | 0 | { |
547 | 0 | d->arch.paging.log_dirty.fault_count = 0; |
548 | 0 | d->arch.paging.log_dirty.dirty_count = 0; |
549 | 0 | } |
550 | 0 | } |
551 | 0 | else |
552 | 0 | { |
553 | 0 | d->arch.paging.preempt.dom = current->domain; |
554 | 0 | d->arch.paging.preempt.op = sc->op; |
555 | 0 | d->arch.paging.preempt.log_dirty.done = pages; |
556 | 0 | } |
557 | 0 |
|
558 | 0 | paging_unlock(d); |
559 | 0 |
|
560 | 0 | if ( rv ) |
561 | 0 | { |
562 | 0 | /* Never leave the domain paused on real errors. */ |
563 | 0 | ASSERT(rv == -ERESTART); |
564 | 0 | return rv; |
565 | 0 | } |
566 | 0 |
|
567 | 0 | if ( pages < sc->pages ) |
568 | 0 | sc->pages = pages; |
569 | 0 | if ( clean ) |
570 | 0 | { |
571 | 0 | /* We need to further call clean_dirty_bitmap() functions of specific |
572 | 0 | * paging modes (shadow or hap). Safe because the domain is paused. */ |
573 | 0 | d->arch.paging.log_dirty.ops->clean(d); |
574 | 0 | } |
575 | 0 | domain_unpause(d); |
576 | 0 | return rv; |
577 | 0 |
|
578 | 0 | out: |
579 | 0 | d->arch.paging.preempt.dom = NULL; |
580 | 0 | paging_unlock(d); |
581 | 0 | domain_unpause(d); |
582 | 0 |
|
583 | 0 | if ( l1 ) |
584 | 0 | unmap_domain_page(l1); |
585 | 0 | if ( l2 ) |
586 | 0 | unmap_domain_page(l2); |
587 | 0 | if ( l3 ) |
588 | 0 | unmap_domain_page(l3); |
589 | 0 | if ( l4 ) |
590 | 0 | unmap_domain_page(l4); |
591 | 0 |
|
592 | 0 | return rv; |
593 | 0 | } |
594 | | |
595 | | void paging_log_dirty_range(struct domain *d, |
596 | | unsigned long begin_pfn, |
597 | | unsigned long nr, |
598 | | uint8_t *dirty_bitmap) |
599 | 0 | { |
600 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
601 | 0 | int i; |
602 | 0 | unsigned long pfn; |
603 | 0 |
|
604 | 0 | /* |
605 | 0 | * Set l1e entries of P2M table to be read-only. |
606 | 0 | * |
607 | 0 | * On first write, it page faults, its entry is changed to read-write, |
608 | 0 | * and on retry the write succeeds. |
609 | 0 | * |
610 | 0 | * We populate dirty_bitmap by looking for entries that have been |
611 | 0 | * switched to read-write. |
612 | 0 | */ |
613 | 0 |
|
614 | 0 | p2m_lock(p2m); |
615 | 0 |
|
616 | 0 | for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ ) |
617 | 0 | if ( !p2m_change_type_one(d, pfn, p2m_ram_rw, p2m_ram_logdirty) ) |
618 | 0 | dirty_bitmap[i >> 3] |= (1 << (i & 7)); |
619 | 0 |
|
620 | 0 | p2m_unlock(p2m); |
621 | 0 |
|
622 | 0 | flush_tlb_mask(d->domain_dirty_cpumask); |
623 | 0 | } |
624 | | |
625 | | /* |
626 | | * Callers must supply log_dirty_ops for the log dirty code to call. This |
627 | | * function usually is invoked when paging is enabled. Check shadow_enable() |
628 | | * and hap_enable() for reference. |
629 | | * |
630 | | * These function pointers must not be followed with the log-dirty lock held. |
631 | | */ |
632 | | void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops) |
633 | 1 | { |
634 | 1 | d->arch.paging.log_dirty.ops = ops; |
635 | 1 | } |
636 | | |
637 | | /************************************************/ |
638 | | /* CODE FOR PAGING SUPPORT */ |
639 | | /************************************************/ |
640 | | /* Domain paging struct initialization. */ |
641 | | int paging_domain_init(struct domain *d, unsigned int domcr_flags) |
642 | 1 | { |
643 | 1 | int rc; |
644 | 1 | |
645 | 1 | if ( (rc = p2m_init(d)) != 0 ) |
646 | 0 | return rc; |
647 | 1 | |
648 | 1 | mm_lock_init(&d->arch.paging.lock); |
649 | 1 | |
650 | 1 | /* This must be initialized separately from the rest of the |
651 | 1 | * log-dirty init code as that can be called more than once and we |
652 | 1 | * don't want to leak any active log-dirty bitmaps */ |
653 | 1 | d->arch.paging.log_dirty.top = INVALID_MFN; |
654 | 1 | |
655 | 1 | /* |
656 | 1 | * Shadow pagetables are the default, but we will use |
657 | 1 | * hardware assistance if it's available and enabled. |
658 | 1 | */ |
659 | 1 | if ( hap_enabled(d) ) |
660 | 1 | hap_domain_init(d); |
661 | 1 | else |
662 | 0 | rc = shadow_domain_init(d, domcr_flags); |
663 | 1 | |
664 | 1 | return rc; |
665 | 1 | } |
666 | | |
667 | | /* vcpu paging struct initialization goes here */ |
668 | | void paging_vcpu_init(struct vcpu *v) |
669 | 12 | { |
670 | 12 | if ( hap_enabled(v->domain) ) |
671 | 12 | hap_vcpu_init(v); |
672 | 12 | else |
673 | 0 | shadow_vcpu_init(v); |
674 | 12 | } |
675 | | |
676 | | |
677 | | int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc, |
678 | | XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl, |
679 | | bool_t resuming) |
680 | 0 | { |
681 | 0 | int rc; |
682 | 0 |
|
683 | 0 | if ( unlikely(d == current->domain) ) |
684 | 0 | { |
685 | 0 | gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n"); |
686 | 0 | return -EINVAL; |
687 | 0 | } |
688 | 0 |
|
689 | 0 | if ( unlikely(d->is_dying) ) |
690 | 0 | { |
691 | 0 | gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n", |
692 | 0 | d->domain_id); |
693 | 0 | return 0; |
694 | 0 | } |
695 | 0 |
|
696 | 0 | if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) ) |
697 | 0 | { |
698 | 0 | gdprintk(XENLOG_DEBUG, "Paging op on a domain (%u) with no vcpus\n", |
699 | 0 | d->domain_id); |
700 | 0 | return -EINVAL; |
701 | 0 | } |
702 | 0 |
|
703 | 0 | if ( resuming |
704 | 0 | ? (d->arch.paging.preempt.dom != current->domain || |
705 | 0 | d->arch.paging.preempt.op != sc->op) |
706 | 0 | : (d->arch.paging.preempt.dom && |
707 | 0 | sc->op != XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION) ) |
708 | 0 | { |
709 | 0 | printk(XENLOG_G_DEBUG |
710 | 0 | "%pv: Paging op %#x on Dom%u with unfinished prior op %#x by Dom%u\n", |
711 | 0 | current, sc->op, d->domain_id, d->arch.paging.preempt.op, |
712 | 0 | d->arch.paging.preempt.dom |
713 | 0 | ? d->arch.paging.preempt.dom->domain_id : DOMID_INVALID); |
714 | 0 | return -EBUSY; |
715 | 0 | } |
716 | 0 |
|
717 | 0 | rc = xsm_shadow_control(XSM_HOOK, d, sc->op); |
718 | 0 | if ( rc ) |
719 | 0 | return rc; |
720 | 0 |
|
721 | 0 | /* Code to handle log-dirty. Note that some log dirty operations |
722 | 0 | * piggy-back on shadow operations. For example, when |
723 | 0 | * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty |
724 | 0 | * mode is enabled. If does, we disables log dirty and continues with |
725 | 0 | * shadow code. For this reason, we need to further dispatch domctl |
726 | 0 | * to next-level paging code (shadow or hap). |
727 | 0 | */ |
728 | 0 | switch ( sc->op ) |
729 | 0 | { |
730 | 0 |
|
731 | 0 | case XEN_DOMCTL_SHADOW_OP_ENABLE: |
732 | 0 | if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) ) |
733 | 0 | break; |
734 | 0 | /* Else fall through... */ |
735 | 0 | case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: |
736 | 0 | return paging_log_dirty_enable(d, 1); |
737 | 0 |
|
738 | 0 | case XEN_DOMCTL_SHADOW_OP_OFF: |
739 | 0 | if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 ) |
740 | 0 | return rc; |
741 | 0 | break; |
742 | 0 |
|
743 | 0 | case XEN_DOMCTL_SHADOW_OP_CLEAN: |
744 | 0 | case XEN_DOMCTL_SHADOW_OP_PEEK: |
745 | 0 | if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL ) |
746 | 0 | return -EINVAL; |
747 | 0 | return paging_log_dirty_op(d, sc, resuming); |
748 | 0 | } |
749 | 0 |
|
750 | 0 | /* Here, dispatch domctl to the appropriate paging code */ |
751 | 0 | if ( hap_enabled(d) ) |
752 | 0 | return hap_domctl(d, sc, u_domctl); |
753 | 0 | else |
754 | 0 | return shadow_domctl(d, sc, u_domctl); |
755 | 0 | } |
756 | | |
757 | | long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) |
758 | 0 | { |
759 | 0 | struct xen_domctl op; |
760 | 0 | struct domain *d; |
761 | 0 | int ret; |
762 | 0 |
|
763 | 0 | if ( copy_from_guest(&op, u_domctl, 1) ) |
764 | 0 | return -EFAULT; |
765 | 0 |
|
766 | 0 | if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION || |
767 | 0 | op.cmd != XEN_DOMCTL_shadow_op ) |
768 | 0 | return -EOPNOTSUPP; |
769 | 0 |
|
770 | 0 | d = rcu_lock_domain_by_id(op.domain); |
771 | 0 | if ( d == NULL ) |
772 | 0 | return -ESRCH; |
773 | 0 |
|
774 | 0 | ret = xsm_domctl(XSM_OTHER, d, op.cmd); |
775 | 0 | if ( !ret ) |
776 | 0 | { |
777 | 0 | if ( domctl_lock_acquire() ) |
778 | 0 | { |
779 | 0 | ret = paging_domctl(d, &op.u.shadow_op, u_domctl, 1); |
780 | 0 |
|
781 | 0 | domctl_lock_release(); |
782 | 0 | } |
783 | 0 | else |
784 | 0 | ret = -ERESTART; |
785 | 0 | } |
786 | 0 |
|
787 | 0 | rcu_unlock_domain(d); |
788 | 0 |
|
789 | 0 | if ( ret == -ERESTART ) |
790 | 0 | ret = hypercall_create_continuation(__HYPERVISOR_arch_1, |
791 | 0 | "h", u_domctl); |
792 | 0 | else if ( __copy_field_to_guest(u_domctl, &op, u.shadow_op) ) |
793 | 0 | ret = -EFAULT; |
794 | 0 |
|
795 | 0 | return ret; |
796 | 0 | } |
797 | | |
798 | | /* Call when destroying a domain */ |
799 | | int paging_teardown(struct domain *d) |
800 | 0 | { |
801 | 0 | int rc; |
802 | 0 | bool preempted = false; |
803 | 0 |
|
804 | 0 | if ( hap_enabled(d) ) |
805 | 0 | hap_teardown(d, &preempted); |
806 | 0 | else |
807 | 0 | shadow_teardown(d, &preempted); |
808 | 0 |
|
809 | 0 | if ( preempted ) |
810 | 0 | return -ERESTART; |
811 | 0 |
|
812 | 0 | /* clean up log dirty resources. */ |
813 | 0 | rc = paging_free_log_dirty_bitmap(d, 0); |
814 | 0 | if ( rc == -ERESTART ) |
815 | 0 | return rc; |
816 | 0 |
|
817 | 0 | /* Move populate-on-demand cache back to domain_list for destruction */ |
818 | 0 | rc = p2m_pod_empty_cache(d); |
819 | 0 |
|
820 | 0 | return rc; |
821 | 0 | } |
822 | | |
823 | | /* Call once all of the references to the domain have gone away */ |
824 | | void paging_final_teardown(struct domain *d) |
825 | 0 | { |
826 | 0 | if ( hap_enabled(d) ) |
827 | 0 | hap_final_teardown(d); |
828 | 0 | else |
829 | 0 | shadow_final_teardown(d); |
830 | 0 |
|
831 | 0 | p2m_final_teardown(d); |
832 | 0 | } |
833 | | |
834 | | /* Enable an arbitrary paging-assistance mode. Call once at domain |
835 | | * creation. */ |
836 | | int paging_enable(struct domain *d, u32 mode) |
837 | 1 | { |
838 | 1 | /* Unrecognised paging mode? */ |
839 | 1 | if ( mode & ~PG_MASK ) |
840 | 0 | return -EINVAL; |
841 | 1 | |
842 | 1 | /* All of external|translate|refcounts, or none. */ |
843 | 1 | switch ( mode & (PG_external | PG_translate | PG_refcounts) ) |
844 | 1 | { |
845 | 1 | case 0: |
846 | 1 | case PG_external | PG_translate | PG_refcounts: |
847 | 1 | break; |
848 | 0 | default: |
849 | 0 | return -EINVAL; |
850 | 1 | } |
851 | 1 | |
852 | 1 | if ( hap_enabled(d) ) |
853 | 1 | return hap_enable(d, mode); |
854 | 1 | else |
855 | 0 | return shadow_enable(d, mode); |
856 | 1 | } |
857 | | |
858 | | /* Called from the guest to indicate that a process is being torn down |
859 | | * and therefore its pagetables will soon be discarded */ |
860 | | void pagetable_dying(struct domain *d, paddr_t gpa) |
861 | 0 | { |
862 | 0 | #ifdef CONFIG_SHADOW_PAGING |
863 | 0 | struct vcpu *v; |
864 | 0 |
|
865 | 0 | ASSERT(paging_mode_shadow(d)); |
866 | 0 |
|
867 | 0 | v = d->vcpu[0]; |
868 | 0 | v->arch.paging.mode->shadow.pagetable_dying(v, gpa); |
869 | 0 | #else |
870 | | BUG(); |
871 | | #endif |
872 | 0 | } |
873 | | |
874 | | /* Print paging-assistance info to the console */ |
875 | | void paging_dump_domain_info(struct domain *d) |
876 | 0 | { |
877 | 0 | if ( paging_mode_enabled(d) ) |
878 | 0 | { |
879 | 0 | printk(" paging assistance: "); |
880 | 0 | if ( paging_mode_shadow(d) ) |
881 | 0 | printk("shadow "); |
882 | 0 | if ( paging_mode_hap(d) ) |
883 | 0 | printk("hap "); |
884 | 0 | if ( paging_mode_refcounts(d) ) |
885 | 0 | printk("refcounts "); |
886 | 0 | if ( paging_mode_log_dirty(d) ) |
887 | 0 | printk("log_dirty "); |
888 | 0 | if ( paging_mode_translate(d) ) |
889 | 0 | printk("translate "); |
890 | 0 | if ( paging_mode_external(d) ) |
891 | 0 | printk("external "); |
892 | 0 | printk("\n"); |
893 | 0 | } |
894 | 0 | } |
895 | | |
896 | | void paging_dump_vcpu_info(struct vcpu *v) |
897 | 0 | { |
898 | 0 | if ( paging_mode_enabled(v->domain) ) |
899 | 0 | { |
900 | 0 | printk(" paging assistance: "); |
901 | 0 | if ( paging_mode_shadow(v->domain) ) |
902 | 0 | { |
903 | 0 | if ( paging_get_hostmode(v) ) |
904 | 0 | printk("shadowed %u-on-%u\n", |
905 | 0 | paging_get_hostmode(v)->guest_levels, |
906 | 0 | paging_get_hostmode(v)->shadow.shadow_levels); |
907 | 0 | else |
908 | 0 | printk("not shadowed\n"); |
909 | 0 | } |
910 | 0 | else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) ) |
911 | 0 | printk("hap, %u levels\n", |
912 | 0 | paging_get_hostmode(v)->guest_levels); |
913 | 0 | else |
914 | 0 | printk("none\n"); |
915 | 0 | } |
916 | 0 | } |
917 | | |
918 | | const struct paging_mode *paging_get_mode(struct vcpu *v) |
919 | 0 | { |
920 | 0 | if (!nestedhvm_is_n2(v)) |
921 | 0 | return paging_get_hostmode(v); |
922 | 0 |
|
923 | 0 | return paging_get_nestedmode(v); |
924 | 0 | } |
925 | | |
926 | | void paging_update_nestedmode(struct vcpu *v) |
927 | 0 | { |
928 | 0 | ASSERT(nestedhvm_enabled(v->domain)); |
929 | 0 | if (nestedhvm_paging_mode_hap(v)) |
930 | 0 | /* nested-on-nested */ |
931 | 0 | v->arch.paging.nestedmode = hap_paging_get_mode(v); |
932 | 0 | else |
933 | 0 | /* TODO: shadow-on-shadow */ |
934 | 0 | v->arch.paging.nestedmode = NULL; |
935 | 0 | hvm_asid_flush_vcpu(v); |
936 | 0 | } |
937 | | |
938 | | void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, |
939 | | l1_pgentry_t *p, l1_pgentry_t new, |
940 | | unsigned int level) |
941 | 0 | { |
942 | 0 | struct domain *d = p2m->domain; |
943 | 0 | struct vcpu *v = current; |
944 | 0 | if ( v->domain != d ) |
945 | 0 | v = d->vcpu ? d->vcpu[0] : NULL; |
946 | 0 | if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) ) |
947 | 0 | paging_get_hostmode(v)->write_p2m_entry(d, gfn, p, new, level); |
948 | 0 | else |
949 | 0 | safe_write_pte(p, new); |
950 | 0 | } |
951 | | |
952 | | int paging_set_allocation(struct domain *d, unsigned int pages, bool *preempted) |
953 | 34.5k | { |
954 | 34.5k | int rc; |
955 | 34.5k | |
956 | 34.5k | ASSERT(paging_mode_enabled(d)); |
957 | 34.5k | |
958 | 34.5k | paging_lock(d); |
959 | 34.5k | if ( hap_enabled(d) ) |
960 | 34.5k | rc = hap_set_allocation(d, pages, preempted); |
961 | 34.5k | else |
962 | 0 | rc = shadow_set_allocation(d, pages, preempted); |
963 | 34.5k | paging_unlock(d); |
964 | 34.5k | |
965 | 34.5k | return rc; |
966 | 34.5k | } |
967 | | |
968 | | /* |
969 | | * Local variables: |
970 | | * mode: C |
971 | | * c-file-style: "BSD" |
972 | | * c-basic-offset: 4 |
973 | | * indent-tabs-mode: nil |
974 | | * End: |
975 | | */ |