/root/src/xen/xen/arch/x86/mm/p2m.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/mm/p2m.c |
3 | | * |
4 | | * physical-to-machine mappings for automatically-translated domains. |
5 | | * |
6 | | * Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) |
7 | | * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. |
8 | | * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc. |
9 | | * Parts of this code are Copyright (c) 2006 by Michael A Fetterman |
10 | | * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. |
11 | | * |
12 | | * This program is free software; you can redistribute it and/or modify |
13 | | * it under the terms of the GNU General Public License as published by |
14 | | * the Free Software Foundation; either version 2 of the License, or |
15 | | * (at your option) any later version. |
16 | | * |
17 | | * This program is distributed in the hope that it will be useful, |
18 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | | * GNU General Public License for more details. |
21 | | * |
22 | | * You should have received a copy of the GNU General Public License |
23 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
24 | | */ |
25 | | |
26 | | #include <xen/guest_access.h> /* copy_from_guest() */ |
27 | | #include <xen/iommu.h> |
28 | | #include <xen/vm_event.h> |
29 | | #include <xen/event.h> |
30 | | #include <public/vm_event.h> |
31 | | #include <asm/domain.h> |
32 | | #include <asm/page.h> |
33 | | #include <asm/paging.h> |
34 | | #include <asm/p2m.h> |
35 | | #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */ |
36 | | #include <asm/mem_sharing.h> |
37 | | #include <asm/hvm/nestedhvm.h> |
38 | | #include <asm/altp2m.h> |
39 | | #include <asm/hvm/svm/amd-iommu-proto.h> |
40 | | #include <asm/vm_event.h> |
41 | | #include <xsm/xsm.h> |
42 | | |
43 | | #include "mm-locks.h" |
44 | | |
45 | | /* Turn on/off host superpage page table support for hap, default on. */ |
46 | | bool_t __initdata opt_hap_1gb = 1, __initdata opt_hap_2mb = 1; |
47 | | boolean_param("hap_1gb", opt_hap_1gb); |
48 | | boolean_param("hap_2mb", opt_hap_2mb); |
49 | | |
50 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
51 | | #undef mfn_to_page |
52 | 1.96M | #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) |
53 | | #undef page_to_mfn |
54 | 868k | #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) |
55 | | |
56 | | DEFINE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock); |
57 | | |
58 | | /* Init the datastructures for later use by the p2m code */ |
59 | | static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) |
60 | 21 | { |
61 | 21 | unsigned int i; |
62 | 21 | int ret = 0; |
63 | 21 | |
64 | 21 | mm_rwlock_init(&p2m->lock); |
65 | 21 | mm_lock_init(&p2m->pod.lock); |
66 | 21 | INIT_LIST_HEAD(&p2m->np2m_list); |
67 | 21 | INIT_PAGE_LIST_HEAD(&p2m->pages); |
68 | 21 | INIT_PAGE_LIST_HEAD(&p2m->pod.super); |
69 | 21 | INIT_PAGE_LIST_HEAD(&p2m->pod.single); |
70 | 21 | |
71 | 21 | p2m->domain = d; |
72 | 21 | p2m->default_access = p2m_access_rwx; |
73 | 21 | p2m->p2m_class = p2m_host; |
74 | 21 | |
75 | 21 | p2m->np2m_base = P2M_BASE_EADDR; |
76 | 21 | p2m->np2m_generation = 0; |
77 | 21 | |
78 | 693 | for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i ) |
79 | 672 | p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN); |
80 | 21 | |
81 | 21 | if ( hap_enabled(d) && cpu_has_vmx ) |
82 | 21 | ret = ept_p2m_init(p2m); |
83 | 21 | else |
84 | 0 | p2m_pt_init(p2m); |
85 | 21 | |
86 | 21 | spin_lock_init(&p2m->ioreq.lock); |
87 | 21 | |
88 | 21 | return ret; |
89 | 21 | } |
90 | | |
91 | | static struct p2m_domain *p2m_init_one(struct domain *d) |
92 | 21 | { |
93 | 21 | struct p2m_domain *p2m = xzalloc(struct p2m_domain); |
94 | 21 | |
95 | 21 | if ( !p2m ) |
96 | 0 | return NULL; |
97 | 21 | |
98 | 21 | if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) ) |
99 | 0 | goto free_p2m; |
100 | 21 | |
101 | 21 | if ( p2m_initialise(d, p2m) ) |
102 | 0 | goto free_cpumask; |
103 | 21 | return p2m; |
104 | 21 | |
105 | 0 | free_cpumask: |
106 | 0 | free_cpumask_var(p2m->dirty_cpumask); |
107 | 0 | free_p2m: |
108 | 0 | xfree(p2m); |
109 | 0 | return NULL; |
110 | 0 | } |
111 | | |
112 | | static void p2m_free_one(struct p2m_domain *p2m) |
113 | 0 | { |
114 | 0 | if ( hap_enabled(p2m->domain) && cpu_has_vmx ) |
115 | 0 | ept_p2m_uninit(p2m); |
116 | 0 | free_cpumask_var(p2m->dirty_cpumask); |
117 | 0 | xfree(p2m); |
118 | 0 | } |
119 | | |
120 | | static int p2m_init_hostp2m(struct domain *d) |
121 | 1 | { |
122 | 1 | struct p2m_domain *p2m = p2m_init_one(d); |
123 | 1 | |
124 | 1 | if ( p2m ) |
125 | 1 | { |
126 | 1 | p2m->logdirty_ranges = rangeset_new(d, "log-dirty", |
127 | 1 | RANGESETF_prettyprint_hex); |
128 | 1 | if ( p2m->logdirty_ranges ) |
129 | 1 | { |
130 | 1 | d->arch.p2m = p2m; |
131 | 1 | return 0; |
132 | 1 | } |
133 | 0 | p2m_free_one(p2m); |
134 | 0 | } |
135 | 0 | return -ENOMEM; |
136 | 1 | } |
137 | | |
138 | | static void p2m_teardown_hostp2m(struct domain *d) |
139 | 0 | { |
140 | 0 | /* Iterate over all p2m tables per domain */ |
141 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
142 | 0 |
|
143 | 0 | if ( p2m ) |
144 | 0 | { |
145 | 0 | rangeset_destroy(p2m->logdirty_ranges); |
146 | 0 | p2m_free_one(p2m); |
147 | 0 | d->arch.p2m = NULL; |
148 | 0 | } |
149 | 0 | } |
150 | | |
151 | | static void p2m_teardown_nestedp2m(struct domain *d) |
152 | 0 | { |
153 | 0 | unsigned int i; |
154 | 0 | struct p2m_domain *p2m; |
155 | 0 |
|
156 | 0 | for ( i = 0; i < MAX_NESTEDP2M; i++ ) |
157 | 0 | { |
158 | 0 | if ( !d->arch.nested_p2m[i] ) |
159 | 0 | continue; |
160 | 0 | p2m = d->arch.nested_p2m[i]; |
161 | 0 | list_del(&p2m->np2m_list); |
162 | 0 | p2m_free_one(p2m); |
163 | 0 | d->arch.nested_p2m[i] = NULL; |
164 | 0 | } |
165 | 0 | } |
166 | | |
167 | | static int p2m_init_nestedp2m(struct domain *d) |
168 | 1 | { |
169 | 1 | unsigned int i; |
170 | 1 | struct p2m_domain *p2m; |
171 | 1 | |
172 | 1 | mm_lock_init(&d->arch.nested_p2m_lock); |
173 | 11 | for ( i = 0; i < MAX_NESTEDP2M; i++ ) |
174 | 10 | { |
175 | 10 | d->arch.nested_p2m[i] = p2m = p2m_init_one(d); |
176 | 10 | if ( p2m == NULL ) |
177 | 0 | { |
178 | 0 | p2m_teardown_nestedp2m(d); |
179 | 0 | return -ENOMEM; |
180 | 0 | } |
181 | 10 | p2m->p2m_class = p2m_nested; |
182 | 10 | p2m->write_p2m_entry = nestedp2m_write_p2m_entry; |
183 | 10 | list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list); |
184 | 10 | } |
185 | 1 | |
186 | 1 | return 0; |
187 | 1 | } |
188 | | |
189 | | static void p2m_teardown_altp2m(struct domain *d) |
190 | 0 | { |
191 | 0 | unsigned int i; |
192 | 0 | struct p2m_domain *p2m; |
193 | 0 |
|
194 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
195 | 0 | { |
196 | 0 | if ( !d->arch.altp2m_p2m[i] ) |
197 | 0 | continue; |
198 | 0 | p2m = d->arch.altp2m_p2m[i]; |
199 | 0 | d->arch.altp2m_p2m[i] = NULL; |
200 | 0 | p2m_free_one(p2m); |
201 | 0 | } |
202 | 0 | } |
203 | | |
204 | | static int p2m_init_altp2m(struct domain *d) |
205 | 1 | { |
206 | 1 | unsigned int i; |
207 | 1 | struct p2m_domain *p2m; |
208 | 1 | |
209 | 1 | mm_lock_init(&d->arch.altp2m_list_lock); |
210 | 11 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
211 | 10 | { |
212 | 10 | d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d); |
213 | 10 | if ( p2m == NULL ) |
214 | 0 | { |
215 | 0 | p2m_teardown_altp2m(d); |
216 | 0 | return -ENOMEM; |
217 | 0 | } |
218 | 10 | p2m->p2m_class = p2m_alternate; |
219 | 10 | p2m->access_required = 1; |
220 | 10 | _atomic_set(&p2m->active_vcpus, 0); |
221 | 10 | } |
222 | 1 | |
223 | 1 | return 0; |
224 | 1 | } |
225 | | |
226 | | int p2m_init(struct domain *d) |
227 | 1 | { |
228 | 1 | int rc; |
229 | 1 | |
230 | 1 | rc = p2m_init_hostp2m(d); |
231 | 1 | if ( rc ) |
232 | 0 | return rc; |
233 | 1 | |
234 | 1 | /* Must initialise nestedp2m unconditionally |
235 | 1 | * since nestedhvm_enabled(d) returns false here. |
236 | 1 | * (p2m_init runs too early for HVM_PARAM_* options) */ |
237 | 1 | rc = p2m_init_nestedp2m(d); |
238 | 1 | if ( rc ) |
239 | 0 | { |
240 | 0 | p2m_teardown_hostp2m(d); |
241 | 0 | return rc; |
242 | 0 | } |
243 | 1 | |
244 | 1 | rc = p2m_init_altp2m(d); |
245 | 1 | if ( rc ) |
246 | 0 | { |
247 | 0 | p2m_teardown_hostp2m(d); |
248 | 0 | p2m_teardown_nestedp2m(d); |
249 | 0 | } |
250 | 1 | |
251 | 1 | return rc; |
252 | 1 | } |
253 | | |
254 | | int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start, |
255 | | unsigned long end) |
256 | 0 | { |
257 | 0 | ASSERT(p2m_is_hostp2m(p2m)); |
258 | 0 | if ( p2m->global_logdirty || |
259 | 0 | rangeset_contains_range(p2m->logdirty_ranges, start, end) ) |
260 | 0 | return 1; |
261 | 0 | if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) ) |
262 | 0 | return -1; |
263 | 0 | return 0; |
264 | 0 | } |
265 | | |
266 | | void p2m_change_entry_type_global(struct domain *d, |
267 | | p2m_type_t ot, p2m_type_t nt) |
268 | 0 | { |
269 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
270 | 0 |
|
271 | 0 | ASSERT(ot != nt); |
272 | 0 | ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt)); |
273 | 0 |
|
274 | 0 | p2m_lock(p2m); |
275 | 0 | p2m->change_entry_type_global(p2m, ot, nt); |
276 | 0 | p2m->global_logdirty = (nt == p2m_ram_logdirty); |
277 | 0 | p2m_unlock(p2m); |
278 | 0 | } |
279 | | |
280 | | void p2m_memory_type_changed(struct domain *d) |
281 | 23 | { |
282 | 23 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
283 | 23 | |
284 | 23 | if ( p2m->memory_type_changed ) |
285 | 23 | { |
286 | 23 | p2m_lock(p2m); |
287 | 23 | p2m->memory_type_changed(p2m); |
288 | 23 | p2m_unlock(p2m); |
289 | 23 | } |
290 | 23 | } |
291 | | |
292 | | int p2m_set_ioreq_server(struct domain *d, |
293 | | unsigned int flags, |
294 | | struct hvm_ioreq_server *s) |
295 | 0 | { |
296 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
297 | 0 | int rc; |
298 | 0 |
|
299 | 0 | /* |
300 | 0 | * Use lock to prevent concurrent setting attempts |
301 | 0 | * from multiple ioreq servers. |
302 | 0 | */ |
303 | 0 | spin_lock(&p2m->ioreq.lock); |
304 | 0 |
|
305 | 0 | /* Unmap ioreq server from p2m type by passing flags with 0. */ |
306 | 0 | if ( flags == 0 ) |
307 | 0 | { |
308 | 0 | rc = -EINVAL; |
309 | 0 | if ( p2m->ioreq.server != s ) |
310 | 0 | goto out; |
311 | 0 |
|
312 | 0 | p2m->ioreq.server = NULL; |
313 | 0 | p2m->ioreq.flags = 0; |
314 | 0 | } |
315 | 0 | else |
316 | 0 | { |
317 | 0 | rc = -EBUSY; |
318 | 0 | if ( p2m->ioreq.server != NULL ) |
319 | 0 | goto out; |
320 | 0 |
|
321 | 0 | /* |
322 | 0 | * It is possible that an ioreq server has just been unmapped, |
323 | 0 | * released the spin lock, with some p2m_ioreq_server entries |
324 | 0 | * in p2m table remained. We shall refuse another ioreq server |
325 | 0 | * mapping request in such case. |
326 | 0 | */ |
327 | 0 | if ( read_atomic(&p2m->ioreq.entry_count) ) |
328 | 0 | goto out; |
329 | 0 |
|
330 | 0 | p2m->ioreq.server = s; |
331 | 0 | p2m->ioreq.flags = flags; |
332 | 0 | } |
333 | 0 |
|
334 | 0 | rc = 0; |
335 | 0 |
|
336 | 0 | out: |
337 | 0 | spin_unlock(&p2m->ioreq.lock); |
338 | 0 |
|
339 | 0 | return rc; |
340 | 0 | } |
341 | | |
342 | | struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d, |
343 | | unsigned int *flags) |
344 | 0 | { |
345 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
346 | 0 | struct hvm_ioreq_server *s; |
347 | 0 |
|
348 | 0 | spin_lock(&p2m->ioreq.lock); |
349 | 0 |
|
350 | 0 | s = p2m->ioreq.server; |
351 | 0 | *flags = p2m->ioreq.flags; |
352 | 0 |
|
353 | 0 | spin_unlock(&p2m->ioreq.lock); |
354 | 0 | return s; |
355 | 0 | } |
356 | | |
357 | | void p2m_enable_hardware_log_dirty(struct domain *d) |
358 | 0 | { |
359 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
360 | 0 |
|
361 | 0 | if ( p2m->enable_hardware_log_dirty ) |
362 | 0 | { |
363 | 0 | p2m_lock(p2m); |
364 | 0 | p2m->enable_hardware_log_dirty(p2m); |
365 | 0 | p2m_unlock(p2m); |
366 | 0 | } |
367 | 0 | } |
368 | | |
369 | | void p2m_disable_hardware_log_dirty(struct domain *d) |
370 | 0 | { |
371 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
372 | 0 |
|
373 | 0 | if ( p2m->disable_hardware_log_dirty ) |
374 | 0 | { |
375 | 0 | p2m_lock(p2m); |
376 | 0 | p2m->disable_hardware_log_dirty(p2m); |
377 | 0 | p2m_unlock(p2m); |
378 | 0 | } |
379 | 0 | } |
380 | | |
381 | | void p2m_flush_hardware_cached_dirty(struct domain *d) |
382 | 0 | { |
383 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
384 | 0 |
|
385 | 0 | if ( p2m->flush_hardware_cached_dirty ) |
386 | 0 | { |
387 | 0 | p2m_lock(p2m); |
388 | 0 | p2m->flush_hardware_cached_dirty(p2m); |
389 | 0 | p2m_unlock(p2m); |
390 | 0 | } |
391 | 0 | } |
392 | | |
393 | | /* |
394 | | * Force a synchronous P2M TLB flush if a deferred flush is pending. |
395 | | * |
396 | | * Must be called with the p2m lock held. |
397 | | */ |
398 | | void p2m_tlb_flush_sync(struct p2m_domain *p2m) |
399 | 0 | { |
400 | 0 | if ( p2m->need_flush ) { |
401 | 0 | p2m->need_flush = 0; |
402 | 0 | p2m->tlb_flush(p2m); |
403 | 0 | } |
404 | 0 | } |
405 | | |
406 | | /* |
407 | | * Unlock the p2m lock and do a P2M TLB flush if needed. |
408 | | */ |
409 | | void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m) |
410 | 510k | { |
411 | 510k | if ( p2m->need_flush ) { |
412 | 218k | p2m->need_flush = 0; |
413 | 218k | mm_write_unlock(&p2m->lock); |
414 | 218k | p2m->tlb_flush(p2m); |
415 | 218k | } else |
416 | 292k | mm_write_unlock(&p2m->lock); |
417 | 510k | } |
418 | | |
419 | | mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l, |
420 | | p2m_type_t *t, p2m_access_t *a, p2m_query_t q, |
421 | | unsigned int *page_order, bool_t locked) |
422 | 1.96M | { |
423 | 1.96M | mfn_t mfn; |
424 | 1.96M | gfn_t gfn = _gfn(gfn_l); |
425 | 1.96M | |
426 | 1.96M | /* Unshare makes no sense withuot populate. */ |
427 | 1.96M | if ( q & P2M_UNSHARE ) |
428 | 55 | q |= P2M_ALLOC; |
429 | 1.96M | |
430 | 1.96M | if ( !p2m || !paging_mode_translate(p2m->domain) ) |
431 | 0 | { |
432 | 0 | /* Not necessarily true, but for non-translated guests, we claim |
433 | 0 | * it's the most generic kind of memory */ |
434 | 0 | *t = p2m_ram_rw; |
435 | 0 | return _mfn(gfn_l); |
436 | 0 | } |
437 | 1.96M | |
438 | 1.96M | if ( locked ) |
439 | 1.96M | /* Grab the lock here, don't release until put_gfn */ |
440 | 1.96M | gfn_lock(p2m, gfn, 0); |
441 | 1.96M | |
442 | 1.96M | mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL); |
443 | 1.96M | |
444 | 1.96M | if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) ) |
445 | 0 | { |
446 | 0 | ASSERT(p2m_is_hostp2m(p2m)); |
447 | 0 | /* Try to unshare. If we fail, communicate ENOMEM without |
448 | 0 | * sleeping. */ |
449 | 0 | if ( mem_sharing_unshare_page(p2m->domain, gfn_l, 0) < 0 ) |
450 | 0 | (void)mem_sharing_notify_enomem(p2m->domain, gfn_l, 0); |
451 | 0 | mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL); |
452 | 0 | } |
453 | 1.96M | |
454 | 1.96M | if (unlikely((p2m_is_broken(*t)))) |
455 | 0 | { |
456 | 0 | /* Return invalid_mfn to avoid caller's access */ |
457 | 0 | mfn = INVALID_MFN; |
458 | 0 | if ( q & P2M_ALLOC ) |
459 | 0 | domain_crash(p2m->domain); |
460 | 0 | } |
461 | 1.96M | |
462 | 1.96M | return mfn; |
463 | 1.96M | } |
464 | | |
465 | | void __put_gfn(struct p2m_domain *p2m, unsigned long gfn) |
466 | 168 | { |
467 | 168 | if ( !p2m || !paging_mode_translate(p2m->domain) ) |
468 | 168 | /* Nothing to do in this case */ |
469 | 0 | return; |
470 | 168 | |
471 | 168 | ASSERT(gfn_locked_by_me(p2m, gfn)); |
472 | 168 | |
473 | 168 | gfn_unlock(p2m, gfn, 0); |
474 | 168 | } |
475 | | |
476 | | /* Atomically look up a GFN and take a reference count on the backing page. */ |
477 | | struct page_info *p2m_get_page_from_gfn( |
478 | | struct p2m_domain *p2m, gfn_t gfn, |
479 | | p2m_type_t *t, p2m_access_t *a, p2m_query_t q) |
480 | 1.95M | { |
481 | 1.95M | struct page_info *page = NULL; |
482 | 1.95M | p2m_access_t _a; |
483 | 1.95M | p2m_type_t _t; |
484 | 1.95M | mfn_t mfn; |
485 | 1.95M | |
486 | 1.95M | /* Allow t or a to be NULL */ |
487 | 18.4E | t = t ?: &_t; |
488 | 1.95M | a = a ?: &_a; |
489 | 1.95M | |
490 | 1.95M | if ( likely(!p2m_locked_by_me(p2m)) ) |
491 | 1.96M | { |
492 | 1.96M | /* Fast path: look up and get out */ |
493 | 1.96M | p2m_read_lock(p2m); |
494 | 1.96M | mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0); |
495 | 1.96M | if ( p2m_is_any_ram(*t) && mfn_valid(mfn) |
496 | 1.96M | && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) ) |
497 | 1.96M | { |
498 | 1.96M | page = mfn_to_page(mfn); |
499 | 1.96M | if ( unlikely(p2m_is_foreign(*t)) ) |
500 | 0 | { |
501 | 0 | struct domain *fdom = page_get_owner_and_reference(page); |
502 | 0 |
|
503 | 0 | ASSERT(fdom != p2m->domain); |
504 | 0 | if ( fdom == NULL ) |
505 | 0 | page = NULL; |
506 | 0 | } |
507 | 1.96M | else if ( !get_page(page, p2m->domain) && |
508 | 1.96M | /* Page could be shared */ |
509 | 0 | (!p2m_is_shared(*t) || !get_page(page, dom_cow)) ) |
510 | 0 | page = NULL; |
511 | 1.96M | } |
512 | 1.96M | p2m_read_unlock(p2m); |
513 | 1.96M | |
514 | 1.96M | if ( page ) |
515 | 1.97M | return page; |
516 | 1.96M | |
517 | 1.96M | /* Error path: not a suitable GFN at all */ |
518 | 18.4E | if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) ) |
519 | 0 | return NULL; |
520 | 18.4E | } |
521 | 1.95M | |
522 | 1.95M | /* Slow path: take the write lock and do fixups */ |
523 | 18.4E | mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL); |
524 | 18.4E | if ( p2m_is_ram(*t) && mfn_valid(mfn) ) |
525 | 55 | { |
526 | 55 | page = mfn_to_page(mfn); |
527 | 55 | if ( !get_page(page, p2m->domain) ) |
528 | 0 | page = NULL; |
529 | 55 | } |
530 | 18.4E | put_gfn(p2m->domain, gfn_x(gfn)); |
531 | 18.4E | |
532 | 18.4E | return page; |
533 | 1.95M | } |
534 | | |
535 | | /* Returns: 0 for success, -errno for failure */ |
536 | | int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn, |
537 | | unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) |
538 | 507k | { |
539 | 507k | struct domain *d = p2m->domain; |
540 | 507k | unsigned long todo = 1ul << page_order; |
541 | 507k | unsigned int order; |
542 | 507k | int set_rc, rc = 0; |
543 | 507k | |
544 | 507k | ASSERT(gfn_locked_by_me(p2m, gfn)); |
545 | 507k | |
546 | 1.57M | while ( todo ) |
547 | 1.06M | { |
548 | 1.06M | if ( hap_enabled(d) ) |
549 | 1.06M | { |
550 | 1.06M | unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0; |
551 | 1.06M | |
552 | 1.06M | fn_mask |= gfn_x(gfn) | todo; |
553 | 1.06M | |
554 | 1.06M | order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) && |
555 | 12 | hap_has_1gb) ? PAGE_ORDER_1G : |
556 | 1.06M | (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) && |
557 | 1.06M | hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K; |
558 | 1.06M | } |
559 | 1.06M | else |
560 | 0 | order = 0; |
561 | 1.06M | |
562 | 1.06M | set_rc = p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, -1); |
563 | 1.06M | if ( set_rc ) |
564 | 0 | rc = set_rc; |
565 | 1.06M | |
566 | 1.06M | gfn = gfn_add(gfn, 1ul << order); |
567 | 1.06M | if ( !mfn_eq(mfn, INVALID_MFN) ) |
568 | 850k | mfn = mfn_add(mfn, 1ul << order); |
569 | 1.06M | todo -= 1ul << order; |
570 | 1.06M | } |
571 | 507k | |
572 | 507k | return rc; |
573 | 507k | } |
574 | | |
575 | | mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level) |
576 | 1.28k | { |
577 | 1.28k | struct page_info *pg; |
578 | 1.28k | |
579 | 1.28k | ASSERT(p2m); |
580 | 1.28k | ASSERT(p2m->domain); |
581 | 1.28k | ASSERT(p2m->domain->arch.paging.alloc_page); |
582 | 1.28k | pg = p2m->domain->arch.paging.alloc_page(p2m->domain); |
583 | 1.28k | if ( !pg ) |
584 | 0 | return INVALID_MFN; |
585 | 1.28k | |
586 | 1.28k | page_list_add_tail(pg, &p2m->pages); |
587 | 1.28k | BUILD_BUG_ON(PGT_l1_page_table * 2 != PGT_l2_page_table); |
588 | 1.28k | BUILD_BUG_ON(PGT_l1_page_table * 3 != PGT_l3_page_table); |
589 | 1.28k | BUILD_BUG_ON(PGT_l1_page_table * 4 != PGT_l4_page_table); |
590 | 1.28k | pg->u.inuse.type_info = (PGT_l1_page_table * level) | 1 | PGT_validated; |
591 | 1.28k | |
592 | 1.28k | return page_to_mfn(pg); |
593 | 1.28k | } |
594 | | |
595 | | void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg) |
596 | 0 | { |
597 | 0 | ASSERT(pg); |
598 | 0 | ASSERT(p2m); |
599 | 0 | ASSERT(p2m->domain); |
600 | 0 | ASSERT(p2m->domain->arch.paging.free_page); |
601 | 0 |
|
602 | 0 | page_list_del(pg, &p2m->pages); |
603 | 0 | p2m->domain->arch.paging.free_page(p2m->domain, pg); |
604 | 0 |
|
605 | 0 | return; |
606 | 0 | } |
607 | | |
608 | | /* |
609 | | * Allocate a new p2m table for a domain. |
610 | | * |
611 | | * The structure of the p2m table is that of a pagetable for xen (i.e. it is |
612 | | * controlled by CONFIG_PAGING_LEVELS). |
613 | | * |
614 | | * Returns 0 for success, -errno for failure. |
615 | | */ |
616 | | int p2m_alloc_table(struct p2m_domain *p2m) |
617 | 11 | { |
618 | 11 | mfn_t top_mfn; |
619 | 11 | struct domain *d = p2m->domain; |
620 | 11 | int rc = 0; |
621 | 11 | |
622 | 11 | p2m_lock(p2m); |
623 | 11 | |
624 | 11 | if ( p2m_is_hostp2m(p2m) |
625 | 1 | && !page_list_empty(&d->page_list) ) |
626 | 0 | { |
627 | 0 | P2M_ERROR("dom %d already has memory allocated\n", d->domain_id); |
628 | 0 | p2m_unlock(p2m); |
629 | 0 | return -EINVAL; |
630 | 0 | } |
631 | 11 | |
632 | 11 | if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) |
633 | 0 | { |
634 | 0 | P2M_ERROR("p2m already allocated for this domain\n"); |
635 | 0 | p2m_unlock(p2m); |
636 | 0 | return -EINVAL; |
637 | 0 | } |
638 | 11 | |
639 | 11 | P2M_PRINTK("allocating p2m table\n"); |
640 | 11 | |
641 | 11 | top_mfn = p2m_alloc_ptp(p2m, 4); |
642 | 11 | if ( mfn_eq(top_mfn, INVALID_MFN) ) |
643 | 0 | { |
644 | 0 | p2m_unlock(p2m); |
645 | 0 | return -ENOMEM; |
646 | 0 | } |
647 | 11 | |
648 | 11 | p2m->phys_table = pagetable_from_mfn(top_mfn); |
649 | 11 | |
650 | 11 | if ( hap_enabled(d) ) |
651 | 11 | iommu_share_p2m_table(d); |
652 | 11 | |
653 | 11 | P2M_PRINTK("populating p2m table\n"); |
654 | 11 | |
655 | 11 | /* Initialise physmap tables for slot zero. Other code assumes this. */ |
656 | 11 | p2m->defer_nested_flush = 1; |
657 | 11 | rc = p2m_set_entry(p2m, _gfn(0), INVALID_MFN, PAGE_ORDER_4K, |
658 | 11 | p2m_invalid, p2m->default_access); |
659 | 11 | p2m->defer_nested_flush = 0; |
660 | 11 | p2m_unlock(p2m); |
661 | 11 | if ( !rc ) |
662 | 11 | P2M_PRINTK("p2m table initialised for slot zero\n"); |
663 | 11 | else |
664 | 0 | P2M_PRINTK("failed to initialise p2m table for slot zero (%d)\n", rc); |
665 | 11 | return rc; |
666 | 11 | } |
667 | | |
668 | | /* |
669 | | * hvm fixme: when adding support for pvh non-hardware domains, this path must |
670 | | * cleanup any foreign p2m types (release refcnts on them). |
671 | | */ |
672 | | void p2m_teardown(struct p2m_domain *p2m) |
673 | | /* Return all the p2m pages to Xen. |
674 | | * We know we don't have any extra mappings to these pages */ |
675 | 0 | { |
676 | 0 | struct page_info *pg; |
677 | 0 | struct domain *d; |
678 | 0 |
|
679 | 0 | if (p2m == NULL) |
680 | 0 | return; |
681 | 0 |
|
682 | 0 | d = p2m->domain; |
683 | 0 |
|
684 | 0 | p2m_lock(p2m); |
685 | 0 | ASSERT(atomic_read(&d->shr_pages) == 0); |
686 | 0 | p2m->phys_table = pagetable_null(); |
687 | 0 |
|
688 | 0 | while ( (pg = page_list_remove_head(&p2m->pages)) ) |
689 | 0 | d->arch.paging.free_page(d, pg); |
690 | 0 | p2m_unlock(p2m); |
691 | 0 | } |
692 | | |
693 | | void p2m_final_teardown(struct domain *d) |
694 | 0 | { |
695 | 0 | /* |
696 | 0 | * We must teardown both of them unconditionally because |
697 | 0 | * we initialise them unconditionally. |
698 | 0 | */ |
699 | 0 | p2m_teardown_altp2m(d); |
700 | 0 | p2m_teardown_nestedp2m(d); |
701 | 0 |
|
702 | 0 | /* Iterate over all p2m tables per domain */ |
703 | 0 | p2m_teardown_hostp2m(d); |
704 | 0 | } |
705 | | |
706 | | |
707 | | static int |
708 | | p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn_l, unsigned long mfn, |
709 | | unsigned int page_order) |
710 | 2 | { |
711 | 2 | unsigned long i; |
712 | 2 | gfn_t gfn = _gfn(gfn_l); |
713 | 2 | mfn_t mfn_return; |
714 | 2 | p2m_type_t t; |
715 | 2 | p2m_access_t a; |
716 | 2 | |
717 | 2 | if ( !paging_mode_translate(p2m->domain) ) |
718 | 0 | { |
719 | 0 | int rc = 0; |
720 | 0 |
|
721 | 0 | if ( need_iommu(p2m->domain) ) |
722 | 0 | { |
723 | 0 | for ( i = 0; i < (1 << page_order); i++ ) |
724 | 0 | { |
725 | 0 | int ret = iommu_unmap_page(p2m->domain, mfn + i); |
726 | 0 |
|
727 | 0 | if ( !rc ) |
728 | 0 | rc = ret; |
729 | 0 | } |
730 | 0 | } |
731 | 0 |
|
732 | 0 | return rc; |
733 | 0 | } |
734 | 2 | |
735 | 2 | ASSERT(gfn_locked_by_me(p2m, gfn)); |
736 | 2 | P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_l, mfn); |
737 | 2 | |
738 | 2 | if ( mfn_valid(_mfn(mfn)) ) |
739 | 2 | { |
740 | 4 | for ( i = 0; i < (1UL << page_order); i++ ) |
741 | 2 | { |
742 | 2 | mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, |
743 | 2 | NULL, NULL); |
744 | 2 | if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) ) |
745 | 2 | set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); |
746 | 2 | ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); |
747 | 2 | } |
748 | 2 | } |
749 | 2 | return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid, |
750 | 2 | p2m->default_access); |
751 | 2 | } |
752 | | |
753 | | int |
754 | | guest_physmap_remove_page(struct domain *d, gfn_t gfn, |
755 | | mfn_t mfn, unsigned int page_order) |
756 | 2 | { |
757 | 2 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
758 | 2 | int rc; |
759 | 2 | gfn_lock(p2m, gfn, page_order); |
760 | 2 | rc = p2m_remove_page(p2m, gfn_x(gfn), mfn_x(mfn), page_order); |
761 | 2 | gfn_unlock(p2m, gfn, page_order); |
762 | 2 | return rc; |
763 | 2 | } |
764 | | |
765 | | int |
766 | | guest_physmap_add_entry(struct domain *d, gfn_t gfn, mfn_t mfn, |
767 | | unsigned int page_order, p2m_type_t t) |
768 | 228 | { |
769 | 228 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
770 | 228 | unsigned long i; |
771 | 228 | gfn_t ogfn; |
772 | 228 | p2m_type_t ot; |
773 | 228 | p2m_access_t a; |
774 | 228 | mfn_t omfn; |
775 | 228 | int pod_count = 0; |
776 | 228 | int rc = 0; |
777 | 228 | |
778 | 228 | if ( !paging_mode_translate(d) ) |
779 | 0 | { |
780 | 0 | if ( need_iommu(d) && t == p2m_ram_rw ) |
781 | 0 | { |
782 | 0 | for ( i = 0; i < (1 << page_order); i++ ) |
783 | 0 | { |
784 | 0 | rc = iommu_map_page(d, mfn_x(mfn_add(mfn, i)), |
785 | 0 | mfn_x(mfn_add(mfn, i)), |
786 | 0 | IOMMUF_readable|IOMMUF_writable); |
787 | 0 | if ( rc != 0 ) |
788 | 0 | { |
789 | 0 | while ( i-- > 0 ) |
790 | 0 | /* If statement to satisfy __must_check. */ |
791 | 0 | if ( iommu_unmap_page(d, mfn_x(mfn_add(mfn, i))) ) |
792 | 0 | continue; |
793 | 0 |
|
794 | 0 | return rc; |
795 | 0 | } |
796 | 0 | } |
797 | 0 | } |
798 | 0 | return 0; |
799 | 0 | } |
800 | 228 | |
801 | 228 | /* foreign pages are added thru p2m_add_foreign */ |
802 | 228 | if ( p2m_is_foreign(t) ) |
803 | 0 | return -EINVAL; |
804 | 228 | |
805 | 228 | p2m_lock(p2m); |
806 | 228 | |
807 | 228 | P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn)); |
808 | 228 | |
809 | 228 | /* First, remove m->p mappings for existing p->m mappings */ |
810 | 4.05M | for ( i = 0; i < (1UL << page_order); i++ ) |
811 | 4.05M | { |
812 | 4.05M | omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot, |
813 | 4.05M | &a, 0, NULL, NULL); |
814 | 4.05M | if ( p2m_is_shared(ot) ) |
815 | 0 | { |
816 | 0 | /* Do an unshare to cleanly take care of all corner |
817 | 0 | * cases. */ |
818 | 0 | int rc; |
819 | 0 | rc = mem_sharing_unshare_page(p2m->domain, |
820 | 0 | gfn_x(gfn_add(gfn, i)), 0); |
821 | 0 | if ( rc ) |
822 | 0 | { |
823 | 0 | p2m_unlock(p2m); |
824 | 0 | /* NOTE: Should a guest domain bring this upon itself, |
825 | 0 | * there is not a whole lot we can do. We are buried |
826 | 0 | * deep in locks from most code paths by now. So, fail |
827 | 0 | * the call and don't try to sleep on a wait queue |
828 | 0 | * while placing the mem event. |
829 | 0 | * |
830 | 0 | * However, all current (changeset 3432abcf9380) code |
831 | 0 | * paths avoid this unsavoury situation. For now. |
832 | 0 | * |
833 | 0 | * Foreign domains are okay to place an event as they |
834 | 0 | * won't go to sleep. */ |
835 | 0 | (void)mem_sharing_notify_enomem(p2m->domain, |
836 | 0 | gfn_x(gfn_add(gfn, i)), |
837 | 0 | 0); |
838 | 0 | return rc; |
839 | 0 | } |
840 | 0 | omfn = p2m->get_entry(p2m, gfn_add(gfn, i), |
841 | 0 | &ot, &a, 0, NULL, NULL); |
842 | 0 | ASSERT(!p2m_is_shared(ot)); |
843 | 0 | } |
844 | 4.05M | if ( p2m_is_grant(ot) || p2m_is_foreign(ot) ) |
845 | 0 | { |
846 | 0 | /* Really shouldn't be unmapping grant/foreign maps this way */ |
847 | 0 | domain_crash(d); |
848 | 0 | p2m_unlock(p2m); |
849 | 0 | |
850 | 0 | return -EINVAL; |
851 | 0 | } |
852 | 4.05M | else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) |
853 | 0 | { |
854 | 0 | ASSERT(mfn_valid(omfn)); |
855 | 0 | set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); |
856 | 0 | } |
857 | 4.05M | else if ( ot == p2m_populate_on_demand ) |
858 | 0 | { |
859 | 0 | /* Count how man PoD entries we'll be replacing if successful */ |
860 | 0 | pod_count++; |
861 | 0 | } |
862 | 4.05M | else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) ) |
863 | 0 | { |
864 | 0 | /* We're plugging a hole in the physmap where a paged out page was */ |
865 | 0 | atomic_dec(&d->paged_pages); |
866 | 0 | } |
867 | 4.05M | } |
868 | 228 | |
869 | 228 | /* Then, look for m->p mappings for this range and deal with them */ |
870 | 4.05M | for ( i = 0; i < (1UL << page_order); i++ ) |
871 | 4.05M | { |
872 | 4.05M | if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) == dom_cow ) |
873 | 0 | { |
874 | 0 | /* This is no way to add a shared page to your physmap! */ |
875 | 0 | gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom%d physmap not allowed.\n", |
876 | 0 | mfn_x(mfn_add(mfn, i)), d->domain_id); |
877 | 0 | p2m_unlock(p2m); |
878 | 0 | return -EINVAL; |
879 | 0 | } |
880 | 4.05M | if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) != d ) |
881 | 0 | continue; |
882 | 4.05M | ogfn = _gfn(mfn_to_gfn(d, mfn_add(mfn, i))); |
883 | 4.05M | if ( !gfn_eq(ogfn, _gfn(INVALID_M2P_ENTRY)) && |
884 | 0 | !gfn_eq(ogfn, gfn_add(gfn, i)) ) |
885 | 0 | { |
886 | 0 | /* This machine frame is already mapped at another physical |
887 | 0 | * address */ |
888 | 0 | P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", |
889 | 0 | mfn_x(mfn_add(mfn, i)), gfn_x(ogfn), |
890 | 0 | gfn_x(gfn_add(gfn, i))); |
891 | 0 | omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL, NULL); |
892 | 0 | if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) |
893 | 0 | { |
894 | 0 | ASSERT(mfn_valid(omfn)); |
895 | 0 | P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", |
896 | 0 | gfn_x(ogfn) , mfn_x(omfn)); |
897 | 0 | if ( mfn_eq(omfn, mfn_add(mfn, i)) ) |
898 | 0 | p2m_remove_page(p2m, gfn_x(ogfn), mfn_x(mfn_add(mfn, i)), |
899 | 0 | 0); |
900 | 0 | } |
901 | 0 | } |
902 | 4.05M | } |
903 | 228 | |
904 | 228 | /* Now, actually do the two-way mapping */ |
905 | 228 | if ( mfn_valid(mfn) ) |
906 | 228 | { |
907 | 228 | rc = p2m_set_entry(p2m, gfn, mfn, page_order, t, |
908 | 228 | p2m->default_access); |
909 | 228 | if ( rc ) |
910 | 0 | goto out; /* Failed to update p2m, bail without updating m2p. */ |
911 | 228 | |
912 | 228 | if ( !p2m_is_grant(t) ) |
913 | 228 | { |
914 | 4.05M | for ( i = 0; i < (1UL << page_order); i++ ) |
915 | 4.05M | set_gpfn_from_mfn(mfn_x(mfn_add(mfn, i)), |
916 | 228 | gfn_x(gfn_add(gfn, i))); |
917 | 228 | } |
918 | 228 | } |
919 | 228 | else |
920 | 0 | { |
921 | 0 | gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", |
922 | 0 | gfn_x(gfn), mfn_x(mfn)); |
923 | 0 | rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, |
924 | 0 | p2m_invalid, p2m->default_access); |
925 | 0 | if ( rc == 0 ) |
926 | 0 | { |
927 | 0 | pod_lock(p2m); |
928 | 0 | p2m->pod.entry_count -= pod_count; |
929 | 0 | BUG_ON(p2m->pod.entry_count < 0); |
930 | 0 | pod_unlock(p2m); |
931 | 0 | } |
932 | 0 | } |
933 | 228 | |
934 | 228 | out: |
935 | 228 | p2m_unlock(p2m); |
936 | 228 | |
937 | 228 | return rc; |
938 | 228 | } |
939 | | |
940 | | |
941 | | /* |
942 | | * Modify the p2m type of a single gfn from ot to nt. |
943 | | * Returns: 0 for success, -errno for failure. |
944 | | * Resets the access permissions. |
945 | | */ |
946 | | int p2m_change_type_one(struct domain *d, unsigned long gfn_l, |
947 | | p2m_type_t ot, p2m_type_t nt) |
948 | 0 | { |
949 | 0 | p2m_access_t a; |
950 | 0 | p2m_type_t pt; |
951 | 0 | gfn_t gfn = _gfn(gfn_l); |
952 | 0 | mfn_t mfn; |
953 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
954 | 0 | int rc; |
955 | 0 |
|
956 | 0 | BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); |
957 | 0 | BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt)); |
958 | 0 |
|
959 | 0 | gfn_lock(p2m, gfn, 0); |
960 | 0 |
|
961 | 0 | mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL, NULL); |
962 | 0 | rc = likely(pt == ot) |
963 | 0 | ? p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt, |
964 | 0 | p2m->default_access) |
965 | 0 | : -EBUSY; |
966 | 0 |
|
967 | 0 | gfn_unlock(p2m, gfn, 0); |
968 | 0 |
|
969 | 0 | return rc; |
970 | 0 | } |
971 | | |
972 | | /* Modify the p2m type of a range of gfns from ot to nt. */ |
973 | | void p2m_change_type_range(struct domain *d, |
974 | | unsigned long start, unsigned long end, |
975 | | p2m_type_t ot, p2m_type_t nt) |
976 | 0 | { |
977 | 0 | unsigned long gfn = start; |
978 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
979 | 0 | int rc = 0; |
980 | 0 |
|
981 | 0 | ASSERT(ot != nt); |
982 | 0 | ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt)); |
983 | 0 |
|
984 | 0 | p2m_lock(p2m); |
985 | 0 | p2m->defer_nested_flush = 1; |
986 | 0 |
|
987 | 0 | if ( unlikely(end > p2m->max_mapped_pfn) ) |
988 | 0 | { |
989 | 0 | if ( !gfn ) |
990 | 0 | { |
991 | 0 | p2m->change_entry_type_global(p2m, ot, nt); |
992 | 0 | gfn = end; |
993 | 0 | } |
994 | 0 | end = p2m->max_mapped_pfn + 1; |
995 | 0 | } |
996 | 0 | if ( gfn < end ) |
997 | 0 | rc = p2m->change_entry_type_range(p2m, ot, nt, gfn, end - 1); |
998 | 0 | if ( rc ) |
999 | 0 | { |
1000 | 0 | printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d to %d\n", |
1001 | 0 | rc, d->domain_id, start, end - 1, ot, nt); |
1002 | 0 | domain_crash(d); |
1003 | 0 | } |
1004 | 0 |
|
1005 | 0 | switch ( nt ) |
1006 | 0 | { |
1007 | 0 | case p2m_ram_rw: |
1008 | 0 | if ( ot == p2m_ram_logdirty ) |
1009 | 0 | rc = rangeset_remove_range(p2m->logdirty_ranges, start, end - 1); |
1010 | 0 | break; |
1011 | 0 | case p2m_ram_logdirty: |
1012 | 0 | if ( ot == p2m_ram_rw ) |
1013 | 0 | rc = rangeset_add_range(p2m->logdirty_ranges, start, end - 1); |
1014 | 0 | break; |
1015 | 0 | default: |
1016 | 0 | break; |
1017 | 0 | } |
1018 | 0 | if ( rc ) |
1019 | 0 | { |
1020 | 0 | printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n", |
1021 | 0 | rc, d->domain_id); |
1022 | 0 | domain_crash(d); |
1023 | 0 | } |
1024 | 0 |
|
1025 | 0 | p2m->defer_nested_flush = 0; |
1026 | 0 | if ( nestedhvm_enabled(d) ) |
1027 | 0 | p2m_flush_nestedp2m(d); |
1028 | 0 | p2m_unlock(p2m); |
1029 | 0 | } |
1030 | | |
1031 | | /* |
1032 | | * Finish p2m type change for gfns which are marked as need_recalc in a range. |
1033 | | * Returns: 0/1 for success, negative for failure |
1034 | | */ |
1035 | | int p2m_finish_type_change(struct domain *d, |
1036 | | gfn_t first_gfn, unsigned long max_nr) |
1037 | 0 | { |
1038 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1039 | 0 | unsigned long gfn = gfn_x(first_gfn); |
1040 | 0 | unsigned long last_gfn = gfn + max_nr - 1; |
1041 | 0 | int rc = 0; |
1042 | 0 |
|
1043 | 0 | p2m_lock(p2m); |
1044 | 0 |
|
1045 | 0 | last_gfn = min(last_gfn, p2m->max_mapped_pfn); |
1046 | 0 | while ( gfn <= last_gfn ) |
1047 | 0 | { |
1048 | 0 | rc = p2m->recalc(p2m, gfn); |
1049 | 0 | /* |
1050 | 0 | * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return |
1051 | 0 | * 0/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping |
1052 | 0 | * gfn here. |
1053 | 0 | */ |
1054 | 0 | if ( rc == -ENOENT ) |
1055 | 0 | rc = 0; |
1056 | 0 | else if ( rc < 0 ) |
1057 | 0 | { |
1058 | 0 | gdprintk(XENLOG_ERR, "p2m->recalc failed! Dom%d gfn=%lx\n", |
1059 | 0 | d->domain_id, gfn); |
1060 | 0 | break; |
1061 | 0 | } |
1062 | 0 |
|
1063 | 0 | gfn++; |
1064 | 0 | } |
1065 | 0 |
|
1066 | 0 | p2m_unlock(p2m); |
1067 | 0 |
|
1068 | 0 | return rc; |
1069 | 0 | } |
1070 | | |
1071 | | /* |
1072 | | * Returns: |
1073 | | * 0 for success |
1074 | | * -errno for failure |
1075 | | * 1 + new order for caller to retry with smaller order (guaranteed |
1076 | | * to be smaller than order passed in) |
1077 | | */ |
1078 | | static int set_typed_p2m_entry(struct domain *d, unsigned long gfn_l, |
1079 | | mfn_t mfn, unsigned int order, |
1080 | | p2m_type_t gfn_p2mt, p2m_access_t access) |
1081 | 289k | { |
1082 | 289k | int rc = 0; |
1083 | 289k | p2m_access_t a; |
1084 | 289k | p2m_type_t ot; |
1085 | 289k | mfn_t omfn; |
1086 | 289k | gfn_t gfn = _gfn(gfn_l); |
1087 | 289k | unsigned int cur_order = 0; |
1088 | 289k | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1089 | 289k | |
1090 | 289k | if ( !paging_mode_translate(d) ) |
1091 | 0 | return -EIO; |
1092 | 289k | |
1093 | 289k | gfn_lock(p2m, gfn, order); |
1094 | 289k | omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL); |
1095 | 289k | if ( cur_order < order ) |
1096 | 0 | { |
1097 | 0 | gfn_unlock(p2m, gfn, order); |
1098 | 0 | return cur_order + 1; |
1099 | 0 | } |
1100 | 289k | if ( p2m_is_grant(ot) || p2m_is_foreign(ot) ) |
1101 | 0 | { |
1102 | 0 | gfn_unlock(p2m, gfn, order); |
1103 | 0 | domain_crash(d); |
1104 | 0 | return -ENOENT; |
1105 | 0 | } |
1106 | 289k | else if ( p2m_is_ram(ot) ) |
1107 | 0 | { |
1108 | 0 | unsigned long i; |
1109 | 0 |
|
1110 | 0 | for ( i = 0; i < (1UL << order); ++i ) |
1111 | 0 | { |
1112 | 0 | ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i))); |
1113 | 0 | set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY); |
1114 | 0 | } |
1115 | 0 | } |
1116 | 289k | |
1117 | 289k | P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn)); |
1118 | 289k | rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access); |
1119 | 289k | if ( rc ) |
1120 | 0 | gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n", |
1121 | 289k | gfn_l, order, rc, mfn_x(mfn)); |
1122 | 289k | else if ( p2m_is_pod(ot) ) |
1123 | 0 | { |
1124 | 0 | pod_lock(p2m); |
1125 | 0 | p2m->pod.entry_count -= 1UL << order; |
1126 | 0 | BUG_ON(p2m->pod.entry_count < 0); |
1127 | 0 | pod_unlock(p2m); |
1128 | 0 | } |
1129 | 289k | gfn_unlock(p2m, gfn, order); |
1130 | 289k | |
1131 | 289k | return rc; |
1132 | 289k | } |
1133 | | |
1134 | | /* Set foreign mfn in the given guest's p2m table. */ |
1135 | | static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, |
1136 | | mfn_t mfn) |
1137 | 0 | { |
1138 | 0 | return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign, |
1139 | 0 | p2m_get_hostp2m(d)->default_access); |
1140 | 0 | } |
1141 | | |
1142 | | int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, |
1143 | | unsigned int order, p2m_access_t access) |
1144 | 289k | { |
1145 | 289k | if ( order > PAGE_ORDER_4K && |
1146 | 0 | rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), |
1147 | 0 | mfn_x(mfn) + (1UL << order) - 1) ) |
1148 | 0 | return PAGE_ORDER_4K + 1; |
1149 | 289k | |
1150 | 289k | return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access); |
1151 | 289k | } |
1152 | | |
1153 | | int set_identity_p2m_entry(struct domain *d, unsigned long gfn_l, |
1154 | | p2m_access_t p2ma, unsigned int flag) |
1155 | 39 | { |
1156 | 39 | p2m_type_t p2mt; |
1157 | 39 | p2m_access_t a; |
1158 | 39 | gfn_t gfn = _gfn(gfn_l); |
1159 | 39 | mfn_t mfn; |
1160 | 39 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1161 | 39 | int ret; |
1162 | 39 | |
1163 | 39 | if ( !paging_mode_translate(p2m->domain) ) |
1164 | 0 | { |
1165 | 0 | if ( !need_iommu(d) ) |
1166 | 0 | return 0; |
1167 | 0 | return iommu_map_page(d, gfn_l, gfn_l, IOMMUF_readable|IOMMUF_writable); |
1168 | 0 | } |
1169 | 39 | |
1170 | 39 | gfn_lock(p2m, gfn, 0); |
1171 | 39 | |
1172 | 39 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1173 | 39 | |
1174 | 39 | if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm ) |
1175 | 39 | ret = p2m_set_entry(p2m, gfn, _mfn(gfn_l), PAGE_ORDER_4K, |
1176 | 39 | p2m_mmio_direct, p2ma); |
1177 | 0 | else if ( mfn_x(mfn) == gfn_l && p2mt == p2m_mmio_direct && a == p2ma ) |
1178 | 0 | ret = 0; |
1179 | 0 | else |
1180 | 0 | { |
1181 | 0 | if ( flag & XEN_DOMCTL_DEV_RDM_RELAXED ) |
1182 | 0 | ret = 0; |
1183 | 0 | else |
1184 | 0 | ret = -EBUSY; |
1185 | 0 | printk(XENLOG_G_WARNING |
1186 | 0 | "Cannot setup identity map d%d:%lx," |
1187 | 0 | " gfn already mapped to %lx.\n", |
1188 | 0 | d->domain_id, gfn_l, mfn_x(mfn)); |
1189 | 0 | } |
1190 | 39 | |
1191 | 39 | gfn_unlock(p2m, gfn, 0); |
1192 | 39 | return ret; |
1193 | 39 | } |
1194 | | |
1195 | | /* |
1196 | | * Returns: |
1197 | | * 0 for success |
1198 | | * -errno for failure |
1199 | | * order+1 for caller to retry with order (guaranteed smaller than |
1200 | | * the order value passed in) |
1201 | | */ |
1202 | | int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn, |
1203 | | unsigned int order) |
1204 | 218k | { |
1205 | 218k | int rc = -EINVAL; |
1206 | 218k | gfn_t gfn = _gfn(gfn_l); |
1207 | 218k | mfn_t actual_mfn; |
1208 | 218k | p2m_access_t a; |
1209 | 218k | p2m_type_t t; |
1210 | 218k | unsigned int cur_order = 0; |
1211 | 218k | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1212 | 218k | |
1213 | 218k | if ( !paging_mode_translate(d) ) |
1214 | 0 | return -EIO; |
1215 | 218k | |
1216 | 218k | gfn_lock(p2m, gfn, order); |
1217 | 218k | actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL); |
1218 | 218k | if ( cur_order < order ) |
1219 | 0 | { |
1220 | 0 | rc = cur_order + 1; |
1221 | 0 | goto out; |
1222 | 0 | } |
1223 | 218k | |
1224 | 218k | /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */ |
1225 | 218k | if ( mfn_eq(actual_mfn, INVALID_MFN) || (t != p2m_mmio_direct) ) |
1226 | 0 | { |
1227 | 0 | gdprintk(XENLOG_ERR, |
1228 | 0 | "gfn_to_mfn failed! gfn=%08lx type:%d\n", gfn_l, t); |
1229 | 0 | goto out; |
1230 | 0 | } |
1231 | 218k | if ( mfn_x(mfn) != mfn_x(actual_mfn) ) |
1232 | 0 | gdprintk(XENLOG_WARNING, |
1233 | 218k | "no mapping between mfn %08lx and gfn %08lx\n", |
1234 | 218k | mfn_x(mfn), gfn_l); |
1235 | 218k | rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid, |
1236 | 218k | p2m->default_access); |
1237 | 218k | |
1238 | 218k | out: |
1239 | 218k | gfn_unlock(p2m, gfn, order); |
1240 | 218k | |
1241 | 218k | return rc; |
1242 | 218k | } |
1243 | | |
1244 | | int clear_identity_p2m_entry(struct domain *d, unsigned long gfn_l) |
1245 | 0 | { |
1246 | 0 | p2m_type_t p2mt; |
1247 | 0 | p2m_access_t a; |
1248 | 0 | gfn_t gfn = _gfn(gfn_l); |
1249 | 0 | mfn_t mfn; |
1250 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1251 | 0 | int ret; |
1252 | 0 |
|
1253 | 0 | if ( !paging_mode_translate(d) ) |
1254 | 0 | { |
1255 | 0 | if ( !need_iommu(d) ) |
1256 | 0 | return 0; |
1257 | 0 | return iommu_unmap_page(d, gfn_l); |
1258 | 0 | } |
1259 | 0 |
|
1260 | 0 | gfn_lock(p2m, gfn, 0); |
1261 | 0 |
|
1262 | 0 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1263 | 0 | if ( p2mt == p2m_mmio_direct && mfn_x(mfn) == gfn_l ) |
1264 | 0 | { |
1265 | 0 | ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K, |
1266 | 0 | p2m_invalid, p2m->default_access); |
1267 | 0 | gfn_unlock(p2m, gfn, 0); |
1268 | 0 | } |
1269 | 0 | else |
1270 | 0 | { |
1271 | 0 | gfn_unlock(p2m, gfn, 0); |
1272 | 0 | printk(XENLOG_G_WARNING |
1273 | 0 | "non-identity map d%d:%lx not cleared (mapped to %lx)\n", |
1274 | 0 | d->domain_id, gfn_l, mfn_x(mfn)); |
1275 | 0 | ret = 0; |
1276 | 0 | } |
1277 | 0 |
|
1278 | 0 | return ret; |
1279 | 0 | } |
1280 | | |
1281 | | /* Returns: 0 for success, -errno for failure */ |
1282 | | int set_shared_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn) |
1283 | 0 | { |
1284 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1285 | 0 | int rc = 0; |
1286 | 0 | gfn_t gfn = _gfn(gfn_l); |
1287 | 0 | p2m_access_t a; |
1288 | 0 | p2m_type_t ot; |
1289 | 0 | mfn_t omfn; |
1290 | 0 | unsigned long pg_type; |
1291 | 0 |
|
1292 | 0 | if ( !paging_mode_translate(p2m->domain) ) |
1293 | 0 | return -EIO; |
1294 | 0 |
|
1295 | 0 | gfn_lock(p2m, gfn, 0); |
1296 | 0 | omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL); |
1297 | 0 | /* At the moment we only allow p2m change if gfn has already been made |
1298 | 0 | * sharable first */ |
1299 | 0 | ASSERT(p2m_is_shared(ot)); |
1300 | 0 | ASSERT(mfn_valid(omfn)); |
1301 | 0 | /* Set the m2p entry to invalid only if there are no further type |
1302 | 0 | * refs to this page as shared */ |
1303 | 0 | pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info)); |
1304 | 0 | if ( (pg_type & PGT_count_mask) == 0 |
1305 | 0 | || (pg_type & PGT_type_mask) != PGT_shared_page ) |
1306 | 0 | set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); |
1307 | 0 |
|
1308 | 0 | P2M_DEBUG("set shared %lx %lx\n", gfn_l, mfn_x(mfn)); |
1309 | 0 | rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared, |
1310 | 0 | p2m->default_access); |
1311 | 0 | gfn_unlock(p2m, gfn, 0); |
1312 | 0 | if ( rc ) |
1313 | 0 | gdprintk(XENLOG_ERR, |
1314 | 0 | "p2m_set_entry failed! mfn=%08lx rc:%d\n", |
1315 | 0 | mfn_x(get_gfn_query_unlocked(p2m->domain, gfn_l, &ot)), rc); |
1316 | 0 | return rc; |
1317 | 0 | } |
1318 | | |
1319 | | /** |
1320 | | * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out |
1321 | | * @d: guest domain |
1322 | | * @gfn: guest page to nominate |
1323 | | * |
1324 | | * Returns 0 for success or negative errno values if gfn is not pageable. |
1325 | | * |
1326 | | * p2m_mem_paging_nominate() is called by the pager and checks if a guest page |
1327 | | * can be paged out. If the following conditions are met the p2mt will be |
1328 | | * changed: |
1329 | | * - the gfn is backed by a mfn |
1330 | | * - the p2mt of the gfn is pageable |
1331 | | * - the mfn is not used for IO |
1332 | | * - the mfn has exactly one user and has no special meaning |
1333 | | * |
1334 | | * Once the p2mt is changed the page is readonly for the guest. On success the |
1335 | | * pager can write the page contents to disk and later evict the page. |
1336 | | */ |
1337 | | int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn_l) |
1338 | 0 | { |
1339 | 0 | struct page_info *page; |
1340 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1341 | 0 | p2m_type_t p2mt; |
1342 | 0 | p2m_access_t a; |
1343 | 0 | gfn_t gfn = _gfn(gfn_l); |
1344 | 0 | mfn_t mfn; |
1345 | 0 | int ret = -EBUSY; |
1346 | 0 |
|
1347 | 0 | gfn_lock(p2m, gfn, 0); |
1348 | 0 |
|
1349 | 0 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1350 | 0 |
|
1351 | 0 | /* Check if mfn is valid */ |
1352 | 0 | if ( !mfn_valid(mfn) ) |
1353 | 0 | goto out; |
1354 | 0 |
|
1355 | 0 | /* Check p2m type */ |
1356 | 0 | if ( !p2m_is_pageable(p2mt) ) |
1357 | 0 | goto out; |
1358 | 0 |
|
1359 | 0 | /* Check for io memory page */ |
1360 | 0 | if ( is_iomem_page(mfn) ) |
1361 | 0 | goto out; |
1362 | 0 |
|
1363 | 0 | /* Check page count and type */ |
1364 | 0 | page = mfn_to_page(mfn); |
1365 | 0 | if ( (page->count_info & (PGC_count_mask | PGC_allocated)) != |
1366 | 0 | (1 | PGC_allocated) ) |
1367 | 0 | goto out; |
1368 | 0 |
|
1369 | 0 | if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) |
1370 | 0 | goto out; |
1371 | 0 |
|
1372 | 0 | /* Fix p2m entry */ |
1373 | 0 | ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a); |
1374 | 0 |
|
1375 | 0 | out: |
1376 | 0 | gfn_unlock(p2m, gfn, 0); |
1377 | 0 | return ret; |
1378 | 0 | } |
1379 | | |
1380 | | /** |
1381 | | * p2m_mem_paging_evict - Mark a guest page as paged-out |
1382 | | * @d: guest domain |
1383 | | * @gfn: guest page to evict |
1384 | | * |
1385 | | * Returns 0 for success or negative errno values if eviction is not possible. |
1386 | | * |
1387 | | * p2m_mem_paging_evict() is called by the pager and will free a guest page and |
1388 | | * release it back to Xen. If the following conditions are met the page can be |
1389 | | * freed: |
1390 | | * - the gfn is backed by a mfn |
1391 | | * - the gfn was nominated |
1392 | | * - the mfn has still exactly one user and has no special meaning |
1393 | | * |
1394 | | * After successful nomination some other process could have mapped the page. In |
1395 | | * this case eviction can not be done. If the gfn was populated before the pager |
1396 | | * could evict it, eviction can not be done either. In this case the gfn is |
1397 | | * still backed by a mfn. |
1398 | | */ |
1399 | | int p2m_mem_paging_evict(struct domain *d, unsigned long gfn_l) |
1400 | 0 | { |
1401 | 0 | struct page_info *page; |
1402 | 0 | p2m_type_t p2mt; |
1403 | 0 | p2m_access_t a; |
1404 | 0 | gfn_t gfn = _gfn(gfn_l); |
1405 | 0 | mfn_t mfn; |
1406 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1407 | 0 | int ret = -EBUSY; |
1408 | 0 |
|
1409 | 0 | gfn_lock(p2m, gfn, 0); |
1410 | 0 |
|
1411 | 0 | /* Get mfn */ |
1412 | 0 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1413 | 0 | if ( unlikely(!mfn_valid(mfn)) ) |
1414 | 0 | goto out; |
1415 | 0 |
|
1416 | 0 | /* Allow only nominated pages */ |
1417 | 0 | if ( p2mt != p2m_ram_paging_out ) |
1418 | 0 | goto out; |
1419 | 0 |
|
1420 | 0 | /* Get the page so it doesn't get modified under Xen's feet */ |
1421 | 0 | page = mfn_to_page(mfn); |
1422 | 0 | if ( unlikely(!get_page(page, d)) ) |
1423 | 0 | goto out; |
1424 | 0 |
|
1425 | 0 | /* Check page count and type once more */ |
1426 | 0 | if ( (page->count_info & (PGC_count_mask | PGC_allocated)) != |
1427 | 0 | (2 | PGC_allocated) ) |
1428 | 0 | goto out_put; |
1429 | 0 |
|
1430 | 0 | if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) |
1431 | 0 | goto out_put; |
1432 | 0 |
|
1433 | 0 | /* Decrement guest domain's ref count of the page */ |
1434 | 0 | if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
1435 | 0 | put_page(page); |
1436 | 0 |
|
1437 | 0 | /* Remove mapping from p2m table */ |
1438 | 0 | ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K, |
1439 | 0 | p2m_ram_paged, a); |
1440 | 0 |
|
1441 | 0 | /* Clear content before returning the page to Xen */ |
1442 | 0 | scrub_one_page(page); |
1443 | 0 |
|
1444 | 0 | /* Track number of paged gfns */ |
1445 | 0 | atomic_inc(&d->paged_pages); |
1446 | 0 |
|
1447 | 0 | out_put: |
1448 | 0 | /* Put the page back so it gets freed */ |
1449 | 0 | put_page(page); |
1450 | 0 |
|
1451 | 0 | out: |
1452 | 0 | gfn_unlock(p2m, gfn, 0); |
1453 | 0 | return ret; |
1454 | 0 | } |
1455 | | |
1456 | | /** |
1457 | | * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page |
1458 | | * @d: guest domain |
1459 | | * @gfn: guest page to drop |
1460 | | * |
1461 | | * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was |
1462 | | * released by the guest. The pager is supposed to drop its reference of the |
1463 | | * gfn. |
1464 | | */ |
1465 | | void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn, |
1466 | | p2m_type_t p2mt) |
1467 | 0 | { |
1468 | 0 | vm_event_request_t req = { |
1469 | 0 | .reason = VM_EVENT_REASON_MEM_PAGING, |
1470 | 0 | .u.mem_paging.gfn = gfn |
1471 | 0 | }; |
1472 | 0 |
|
1473 | 0 | /* We allow no ring in this unique case, because it won't affect |
1474 | 0 | * correctness of the guest execution at this point. If this is the only |
1475 | 0 | * page that happens to be paged-out, we'll be okay.. but it's likely the |
1476 | 0 | * guest will crash shortly anyways. */ |
1477 | 0 | int rc = vm_event_claim_slot(d, d->vm_event_paging); |
1478 | 0 | if ( rc < 0 ) |
1479 | 0 | return; |
1480 | 0 |
|
1481 | 0 | /* Send release notification to pager */ |
1482 | 0 | req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE; |
1483 | 0 |
|
1484 | 0 | /* Update stats unless the page hasn't yet been evicted */ |
1485 | 0 | if ( p2mt != p2m_ram_paging_out ) |
1486 | 0 | atomic_dec(&d->paged_pages); |
1487 | 0 | else |
1488 | 0 | /* Evict will fail now, tag this request for pager */ |
1489 | 0 | req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL; |
1490 | 0 |
|
1491 | 0 | vm_event_put_request(d, d->vm_event_paging, &req); |
1492 | 0 | } |
1493 | | |
1494 | | /** |
1495 | | * p2m_mem_paging_populate - Tell pager to populate a paged page |
1496 | | * @d: guest domain |
1497 | | * @gfn: guest page in paging state |
1498 | | * |
1499 | | * p2m_mem_paging_populate() will notify the pager that a page in any of the |
1500 | | * paging states needs to be written back into the guest. |
1501 | | * This function needs to be called whenever gfn_to_mfn() returns any of the p2m |
1502 | | * paging types because the gfn may not be backed by a mfn. |
1503 | | * |
1504 | | * The gfn can be in any of the paging states, but the pager needs only be |
1505 | | * notified when the gfn is in the paging-out path (paging_out or paged). This |
1506 | | * function may be called more than once from several vcpus. If the vcpu belongs |
1507 | | * to the guest, the vcpu must be stopped and the pager notified that the vcpu |
1508 | | * was stopped. The pager needs to handle several requests for the same gfn. |
1509 | | * |
1510 | | * If the gfn is not in the paging-out path and the vcpu does not belong to the |
1511 | | * guest, nothing needs to be done and the function assumes that a request was |
1512 | | * already sent to the pager. In this case the caller has to try again until the |
1513 | | * gfn is fully paged in again. |
1514 | | */ |
1515 | | void p2m_mem_paging_populate(struct domain *d, unsigned long gfn_l) |
1516 | 0 | { |
1517 | 0 | struct vcpu *v = current; |
1518 | 0 | vm_event_request_t req = { |
1519 | 0 | .reason = VM_EVENT_REASON_MEM_PAGING, |
1520 | 0 | .u.mem_paging.gfn = gfn_l |
1521 | 0 | }; |
1522 | 0 | p2m_type_t p2mt; |
1523 | 0 | p2m_access_t a; |
1524 | 0 | gfn_t gfn = _gfn(gfn_l); |
1525 | 0 | mfn_t mfn; |
1526 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1527 | 0 |
|
1528 | 0 | /* We're paging. There should be a ring */ |
1529 | 0 | int rc = vm_event_claim_slot(d, d->vm_event_paging); |
1530 | 0 | if ( rc == -ENOSYS ) |
1531 | 0 | { |
1532 | 0 | gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring " |
1533 | 0 | "in place\n", d->domain_id, gfn_l); |
1534 | 0 | /* Prevent the vcpu from faulting repeatedly on the same gfn */ |
1535 | 0 | if ( v->domain == d ) |
1536 | 0 | vcpu_pause_nosync(v); |
1537 | 0 | domain_crash(d); |
1538 | 0 | return; |
1539 | 0 | } |
1540 | 0 | else if ( rc < 0 ) |
1541 | 0 | return; |
1542 | 0 |
|
1543 | 0 | /* Fix p2m mapping */ |
1544 | 0 | gfn_lock(p2m, gfn, 0); |
1545 | 0 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1546 | 0 | /* Allow only nominated or evicted pages to enter page-in path */ |
1547 | 0 | if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged ) |
1548 | 0 | { |
1549 | 0 | /* Evict will fail now, tag this request for pager */ |
1550 | 0 | if ( p2mt == p2m_ram_paging_out ) |
1551 | 0 | req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL; |
1552 | 0 |
|
1553 | 0 | p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a); |
1554 | 0 | } |
1555 | 0 | gfn_unlock(p2m, gfn, 0); |
1556 | 0 |
|
1557 | 0 | /* Pause domain if request came from guest and gfn has paging type */ |
1558 | 0 | if ( p2m_is_paging(p2mt) && v->domain == d ) |
1559 | 0 | { |
1560 | 0 | vm_event_vcpu_pause(v); |
1561 | 0 | req.flags |= VM_EVENT_FLAG_VCPU_PAUSED; |
1562 | 0 | } |
1563 | 0 | /* No need to inform pager if the gfn is not in the page-out path */ |
1564 | 0 | else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged ) |
1565 | 0 | { |
1566 | 0 | /* gfn is already on its way back and vcpu is not paused */ |
1567 | 0 | vm_event_cancel_slot(d, d->vm_event_paging); |
1568 | 0 | return; |
1569 | 0 | } |
1570 | 0 |
|
1571 | 0 | /* Send request to pager */ |
1572 | 0 | req.u.mem_paging.p2mt = p2mt; |
1573 | 0 | req.vcpu_id = v->vcpu_id; |
1574 | 0 |
|
1575 | 0 | vm_event_put_request(d, d->vm_event_paging, &req); |
1576 | 0 | } |
1577 | | |
1578 | | /** |
1579 | | * p2m_mem_paging_prep - Allocate a new page for the guest |
1580 | | * @d: guest domain |
1581 | | * @gfn: guest page in paging state |
1582 | | * |
1583 | | * p2m_mem_paging_prep() will allocate a new page for the guest if the gfn is |
1584 | | * not backed by a mfn. It is called by the pager. |
1585 | | * It is required that the gfn was already populated. The gfn may already have a |
1586 | | * mfn if populate was called for gfn which was nominated but not evicted. In |
1587 | | * this case only the p2mt needs to be forwarded. |
1588 | | */ |
1589 | | int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer) |
1590 | 0 | { |
1591 | 0 | struct page_info *page; |
1592 | 0 | p2m_type_t p2mt; |
1593 | 0 | p2m_access_t a; |
1594 | 0 | gfn_t gfn = _gfn(gfn_l); |
1595 | 0 | mfn_t mfn; |
1596 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1597 | 0 | int ret, page_extant = 1; |
1598 | 0 | const void *user_ptr = (const void *) buffer; |
1599 | 0 |
|
1600 | 0 | if ( user_ptr ) |
1601 | 0 | /* Sanity check the buffer and bail out early if trouble */ |
1602 | 0 | if ( (buffer & (PAGE_SIZE - 1)) || |
1603 | 0 | (!access_ok(user_ptr, PAGE_SIZE)) ) |
1604 | 0 | return -EINVAL; |
1605 | 0 |
|
1606 | 0 | gfn_lock(p2m, gfn, 0); |
1607 | 0 |
|
1608 | 0 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1609 | 0 |
|
1610 | 0 | ret = -ENOENT; |
1611 | 0 | /* Allow missing pages */ |
1612 | 0 | if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) ) |
1613 | 0 | goto out; |
1614 | 0 |
|
1615 | 0 | /* Allocate a page if the gfn does not have one yet */ |
1616 | 0 | if ( !mfn_valid(mfn) ) |
1617 | 0 | { |
1618 | 0 | /* If the user did not provide a buffer, we disallow */ |
1619 | 0 | ret = -EINVAL; |
1620 | 0 | if ( unlikely(user_ptr == NULL) ) |
1621 | 0 | goto out; |
1622 | 0 | /* Get a free page */ |
1623 | 0 | ret = -ENOMEM; |
1624 | 0 | page = alloc_domheap_page(p2m->domain, 0); |
1625 | 0 | if ( unlikely(page == NULL) ) |
1626 | 0 | goto out; |
1627 | 0 | mfn = page_to_mfn(page); |
1628 | 0 | page_extant = 0; |
1629 | 0 | } |
1630 | 0 |
|
1631 | 0 | /* If we were given a buffer, now is the time to use it */ |
1632 | 0 | if ( !page_extant && user_ptr ) |
1633 | 0 | { |
1634 | 0 | void *guest_map; |
1635 | 0 | int rc; |
1636 | 0 |
|
1637 | 0 | ASSERT( mfn_valid(mfn) ); |
1638 | 0 | guest_map = map_domain_page(mfn); |
1639 | 0 | rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE); |
1640 | 0 | unmap_domain_page(guest_map); |
1641 | 0 | if ( rc ) |
1642 | 0 | { |
1643 | 0 | gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u " |
1644 | 0 | "bytes left %d\n", gfn_l, d->domain_id, rc); |
1645 | 0 | ret = -EFAULT; |
1646 | 0 | put_page(page); /* Don't leak pages */ |
1647 | 0 | goto out; |
1648 | 0 | } |
1649 | 0 | } |
1650 | 0 |
|
1651 | 0 | /* Make the page already guest-accessible. If the pager still has a |
1652 | 0 | * pending resume operation, it will be idempotent p2m entry-wise, |
1653 | 0 | * but will unpause the vcpu */ |
1654 | 0 | ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, |
1655 | 0 | paging_mode_log_dirty(d) ? p2m_ram_logdirty |
1656 | 0 | : p2m_ram_rw, a); |
1657 | 0 | set_gpfn_from_mfn(mfn_x(mfn), gfn_l); |
1658 | 0 |
|
1659 | 0 | if ( !page_extant ) |
1660 | 0 | atomic_dec(&d->paged_pages); |
1661 | 0 |
|
1662 | 0 | out: |
1663 | 0 | gfn_unlock(p2m, gfn, 0); |
1664 | 0 | return ret; |
1665 | 0 | } |
1666 | | |
1667 | | /** |
1668 | | * p2m_mem_paging_resume - Resume guest gfn |
1669 | | * @d: guest domain |
1670 | | * @rsp: vm_event response received |
1671 | | * |
1672 | | * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is |
1673 | | * called by the pager. |
1674 | | * |
1675 | | * The gfn was previously either evicted and populated, or nominated and |
1676 | | * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If |
1677 | | * the page was just nominated the p2mt will be p2m_ram_paging_in_start because |
1678 | | * the pager did not call p2m_mem_paging_prep(). |
1679 | | * |
1680 | | * If the gfn was dropped the vcpu needs to be unpaused. |
1681 | | */ |
1682 | | |
1683 | | void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp) |
1684 | 0 | { |
1685 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1686 | 0 | p2m_type_t p2mt; |
1687 | 0 | p2m_access_t a; |
1688 | 0 | mfn_t mfn; |
1689 | 0 |
|
1690 | 0 | /* Fix p2m entry if the page was not dropped */ |
1691 | 0 | if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) ) |
1692 | 0 | { |
1693 | 0 | gfn_t gfn = _gfn(rsp->u.mem_access.gfn); |
1694 | 0 |
|
1695 | 0 | gfn_lock(p2m, gfn, 0); |
1696 | 0 | mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL); |
1697 | 0 | /* |
1698 | 0 | * Allow only pages which were prepared properly, or pages which |
1699 | 0 | * were nominated but not evicted. |
1700 | 0 | */ |
1701 | 0 | if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) ) |
1702 | 0 | { |
1703 | 0 | p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, |
1704 | 0 | paging_mode_log_dirty(d) ? p2m_ram_logdirty : |
1705 | 0 | p2m_ram_rw, a); |
1706 | 0 | set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn)); |
1707 | 0 | } |
1708 | 0 | gfn_unlock(p2m, gfn, 0); |
1709 | 0 | } |
1710 | 0 | } |
1711 | | |
1712 | | void p2m_altp2m_check(struct vcpu *v, uint16_t idx) |
1713 | 0 | { |
1714 | 0 | if ( altp2m_active(v->domain) ) |
1715 | 0 | p2m_switch_vcpu_altp2m_by_id(v, idx); |
1716 | 0 | } |
1717 | | |
1718 | | static struct p2m_domain * |
1719 | | p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m) |
1720 | 0 | { |
1721 | 0 | struct list_head *lru_list = &p2m_get_hostp2m(d)->np2m_list; |
1722 | 0 | |
1723 | 0 | ASSERT(!list_empty(lru_list)); |
1724 | 0 |
|
1725 | 0 | if ( p2m == NULL ) |
1726 | 0 | p2m = list_entry(lru_list->prev, struct p2m_domain, np2m_list); |
1727 | 0 |
|
1728 | 0 | list_move(&p2m->np2m_list, lru_list); |
1729 | 0 |
|
1730 | 0 | return p2m; |
1731 | 0 | } |
1732 | | |
1733 | | static void |
1734 | | p2m_flush_table_locked(struct p2m_domain *p2m) |
1735 | 0 | { |
1736 | 0 | struct page_info *top, *pg; |
1737 | 0 | struct domain *d = p2m->domain; |
1738 | 0 | mfn_t mfn; |
1739 | 0 |
|
1740 | 0 | ASSERT(p2m_locked_by_me(p2m)); |
1741 | 0 |
|
1742 | 0 | /* |
1743 | 0 | * "Host" p2m tables can have shared entries &c that need a bit more care |
1744 | 0 | * when discarding them. |
1745 | 0 | */ |
1746 | 0 | ASSERT(!p2m_is_hostp2m(p2m)); |
1747 | 0 | /* Nested p2m's do not do pod, hence the asserts (and no pod lock)*/ |
1748 | 0 | ASSERT(page_list_empty(&p2m->pod.super)); |
1749 | 0 | ASSERT(page_list_empty(&p2m->pod.single)); |
1750 | 0 |
|
1751 | 0 | /* No need to flush if it's already empty */ |
1752 | 0 | if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR ) |
1753 | 0 | return; |
1754 | 0 |
|
1755 | 0 | /* This is no longer a valid nested p2m for any address space */ |
1756 | 0 | p2m->np2m_base = P2M_BASE_EADDR; |
1757 | 0 | p2m->np2m_generation++; |
1758 | 0 |
|
1759 | 0 | /* Make sure nobody else is using this p2m table */ |
1760 | 0 | nestedhvm_vmcx_flushtlb(p2m); |
1761 | 0 |
|
1762 | 0 | /* Zap the top level of the trie */ |
1763 | 0 | mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); |
1764 | 0 | clear_domain_page(mfn); |
1765 | 0 |
|
1766 | 0 | /* Free the rest of the trie pages back to the paging pool */ |
1767 | 0 | top = mfn_to_page(mfn); |
1768 | 0 | while ( (pg = page_list_remove_head(&p2m->pages)) ) |
1769 | 0 | { |
1770 | 0 | if ( pg != top ) |
1771 | 0 | d->arch.paging.free_page(d, pg); |
1772 | 0 | } |
1773 | 0 | page_list_add(top, &p2m->pages); |
1774 | 0 | } |
1775 | | |
1776 | | /* Reset this p2m table to be empty */ |
1777 | | static void |
1778 | | p2m_flush_table(struct p2m_domain *p2m) |
1779 | 0 | { |
1780 | 0 | p2m_lock(p2m); |
1781 | 0 | p2m_flush_table_locked(p2m); |
1782 | 0 | p2m_unlock(p2m); |
1783 | 0 | } |
1784 | | |
1785 | | void |
1786 | | p2m_flush(struct vcpu *v, struct p2m_domain *p2m) |
1787 | 0 | { |
1788 | 0 | ASSERT(v->domain == p2m->domain); |
1789 | 0 | vcpu_nestedhvm(v).nv_p2m = NULL; |
1790 | 0 | p2m_flush_table(p2m); |
1791 | 0 | hvm_asid_flush_vcpu(v); |
1792 | 0 | } |
1793 | | |
1794 | | void |
1795 | | p2m_flush_nestedp2m(struct domain *d) |
1796 | 0 | { |
1797 | 0 | int i; |
1798 | 0 | for ( i = 0; i < MAX_NESTEDP2M; i++ ) |
1799 | 0 | p2m_flush_table(d->arch.nested_p2m[i]); |
1800 | 0 | } |
1801 | | |
1802 | | void np2m_flush_base(struct vcpu *v, unsigned long np2m_base) |
1803 | 0 | { |
1804 | 0 | struct domain *d = v->domain; |
1805 | 0 | struct p2m_domain *p2m; |
1806 | 0 | unsigned int i; |
1807 | 0 |
|
1808 | 0 | np2m_base &= ~(0xfffull); |
1809 | 0 |
|
1810 | 0 | nestedp2m_lock(d); |
1811 | 0 | for ( i = 0; i < MAX_NESTEDP2M; i++ ) |
1812 | 0 | { |
1813 | 0 | p2m = d->arch.nested_p2m[i]; |
1814 | 0 | p2m_lock(p2m); |
1815 | 0 | if ( p2m->np2m_base == np2m_base ) |
1816 | 0 | { |
1817 | 0 | p2m_flush_table_locked(p2m); |
1818 | 0 | p2m_unlock(p2m); |
1819 | 0 | break; |
1820 | 0 | } |
1821 | 0 | p2m_unlock(p2m); |
1822 | 0 | } |
1823 | 0 | nestedp2m_unlock(d); |
1824 | 0 | } |
1825 | | |
1826 | | static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m) |
1827 | 0 | { |
1828 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
1829 | 0 | struct domain *d = v->domain; |
1830 | 0 |
|
1831 | 0 | /* Bring this np2m to the top of the LRU list */ |
1832 | 0 | p2m_getlru_nestedp2m(d, p2m); |
1833 | 0 |
|
1834 | 0 | nv->nv_flushp2m = 0; |
1835 | 0 | nv->nv_p2m = p2m; |
1836 | 0 | nv->np2m_generation = p2m->np2m_generation; |
1837 | 0 | cpumask_set_cpu(v->processor, p2m->dirty_cpumask); |
1838 | 0 | } |
1839 | | |
1840 | | static void nvcpu_flush(struct vcpu *v) |
1841 | 0 | { |
1842 | 0 | hvm_asid_flush_vcpu(v); |
1843 | 0 | vcpu_nestedhvm(v).stale_np2m = true; |
1844 | 0 | } |
1845 | | |
1846 | | struct p2m_domain * |
1847 | | p2m_get_nestedp2m_locked(struct vcpu *v) |
1848 | 0 | { |
1849 | 0 | struct nestedvcpu *nv = &vcpu_nestedhvm(v); |
1850 | 0 | struct domain *d = v->domain; |
1851 | 0 | struct p2m_domain *p2m; |
1852 | 0 | uint64_t np2m_base = nhvm_vcpu_p2m_base(v); |
1853 | 0 | unsigned int i; |
1854 | 0 | bool needs_flush = true; |
1855 | 0 |
|
1856 | 0 | /* Mask out low bits; this avoids collisions with P2M_BASE_EADDR */ |
1857 | 0 | np2m_base &= ~(0xfffull); |
1858 | 0 |
|
1859 | 0 | if (nv->nv_flushp2m && nv->nv_p2m) { |
1860 | 0 | nv->nv_p2m = NULL; |
1861 | 0 | } |
1862 | 0 |
|
1863 | 0 | nestedp2m_lock(d); |
1864 | 0 | p2m = nv->nv_p2m; |
1865 | 0 | if ( p2m ) |
1866 | 0 | { |
1867 | 0 | p2m_lock(p2m); |
1868 | 0 | if ( p2m->np2m_base == np2m_base ) |
1869 | 0 | { |
1870 | 0 | /* Check if np2m was flushed just before the lock */ |
1871 | 0 | if ( nv->np2m_generation == p2m->np2m_generation ) |
1872 | 0 | needs_flush = false; |
1873 | 0 | /* np2m is up-to-date */ |
1874 | 0 | goto found; |
1875 | 0 | } |
1876 | 0 | else if ( p2m->np2m_base != P2M_BASE_EADDR ) |
1877 | 0 | { |
1878 | 0 | /* vCPU is switching from some other valid np2m */ |
1879 | 0 | cpumask_clear_cpu(v->processor, p2m->dirty_cpumask); |
1880 | 0 | } |
1881 | 0 | p2m_unlock(p2m); |
1882 | 0 | } |
1883 | 0 |
|
1884 | 0 | /* Share a np2m if possible */ |
1885 | 0 | for ( i = 0; i < MAX_NESTEDP2M; i++ ) |
1886 | 0 | { |
1887 | 0 | p2m = d->arch.nested_p2m[i]; |
1888 | 0 | p2m_lock(p2m); |
1889 | 0 |
|
1890 | 0 | if ( p2m->np2m_base == np2m_base ) |
1891 | 0 | goto found; |
1892 | 0 |
|
1893 | 0 | p2m_unlock(p2m); |
1894 | 0 | } |
1895 | 0 |
|
1896 | 0 | /* All p2m's are or were in use. Take the least recent used one, |
1897 | 0 | * flush it and reuse. */ |
1898 | 0 | p2m = p2m_getlru_nestedp2m(d, NULL); |
1899 | 0 | p2m_flush_table(p2m); |
1900 | 0 | p2m_lock(p2m); |
1901 | 0 |
|
1902 | 0 | found: |
1903 | 0 | if ( needs_flush ) |
1904 | 0 | nvcpu_flush(v); |
1905 | 0 | p2m->np2m_base = np2m_base; |
1906 | 0 | assign_np2m(v, p2m); |
1907 | 0 | nestedp2m_unlock(d); |
1908 | 0 |
|
1909 | 0 | return p2m; |
1910 | 0 | } |
1911 | | |
1912 | | struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v) |
1913 | 0 | { |
1914 | 0 | struct p2m_domain *p2m = p2m_get_nestedp2m_locked(v); |
1915 | 0 | p2m_unlock(p2m); |
1916 | 0 |
|
1917 | 0 | return p2m; |
1918 | 0 | } |
1919 | | |
1920 | | struct p2m_domain * |
1921 | | p2m_get_p2m(struct vcpu *v) |
1922 | 0 | { |
1923 | 0 | if (!nestedhvm_is_n2(v)) |
1924 | 0 | return p2m_get_hostp2m(v->domain); |
1925 | 0 |
|
1926 | 0 | return p2m_get_nestedp2m(v); |
1927 | 0 | } |
1928 | | |
1929 | | void np2m_schedule(int dir) |
1930 | 327k | { |
1931 | 327k | struct vcpu *curr = current; |
1932 | 327k | struct nestedvcpu *nv = &vcpu_nestedhvm(curr); |
1933 | 327k | struct p2m_domain *p2m; |
1934 | 327k | |
1935 | 327k | ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT); |
1936 | 327k | |
1937 | 327k | if ( !nestedhvm_enabled(curr->domain) || |
1938 | 0 | !nestedhvm_vcpu_in_guestmode(curr) || |
1939 | 0 | !nestedhvm_paging_mode_hap(curr) ) |
1940 | 327k | return; |
1941 | 327k | |
1942 | 61 | p2m = nv->nv_p2m; |
1943 | 61 | if ( p2m ) |
1944 | 0 | { |
1945 | 0 | bool np2m_valid; |
1946 | 0 |
|
1947 | 0 | p2m_lock(p2m); |
1948 | 0 | np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) && |
1949 | 0 | nv->np2m_generation == p2m->np2m_generation; |
1950 | 0 | if ( dir == NP2M_SCHEDLE_OUT && np2m_valid ) |
1951 | 0 | { |
1952 | 0 | /* |
1953 | 0 | * The np2m is up to date but this vCPU will no longer use it, |
1954 | 0 | * which means there are no reasons to send a flush IPI. |
1955 | 0 | */ |
1956 | 0 | cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask); |
1957 | 0 | } |
1958 | 0 | else if ( dir == NP2M_SCHEDLE_IN ) |
1959 | 0 | { |
1960 | 0 | if ( !np2m_valid ) |
1961 | 0 | { |
1962 | 0 | /* This vCPU's np2m was flushed while it was not runnable */ |
1963 | 0 | hvm_asid_flush_core(); |
1964 | 0 | vcpu_nestedhvm(curr).nv_p2m = NULL; |
1965 | 0 | } |
1966 | 0 | else |
1967 | 0 | cpumask_set_cpu(curr->processor, p2m->dirty_cpumask); |
1968 | 0 | } |
1969 | 0 | p2m_unlock(p2m); |
1970 | 0 | } |
1971 | 61 | } |
1972 | | |
1973 | | unsigned long paging_gva_to_gfn(struct vcpu *v, |
1974 | | unsigned long va, |
1975 | | uint32_t *pfec) |
1976 | 364k | { |
1977 | 364k | struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain); |
1978 | 364k | const struct paging_mode *hostmode = paging_get_hostmode(v); |
1979 | 364k | |
1980 | 364k | if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) ) |
1981 | 0 | { |
1982 | 0 | unsigned long l2_gfn, l1_gfn; |
1983 | 0 | struct p2m_domain *p2m; |
1984 | 0 | const struct paging_mode *mode; |
1985 | 0 | uint8_t l1_p2ma; |
1986 | 0 | unsigned int l1_page_order; |
1987 | 0 | int rv; |
1988 | 0 |
|
1989 | 0 | /* translate l2 guest va into l2 guest gfn */ |
1990 | 0 | p2m = p2m_get_nestedp2m(v); |
1991 | 0 | mode = paging_get_nestedmode(v); |
1992 | 0 | l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec); |
1993 | 0 |
|
1994 | 0 | if ( l2_gfn == gfn_x(INVALID_GFN) ) |
1995 | 0 | return gfn_x(INVALID_GFN); |
1996 | 0 |
|
1997 | 0 | /* translate l2 guest gfn into l1 guest gfn */ |
1998 | 0 | rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma, |
1999 | 0 | 1, |
2000 | 0 | !!(*pfec & PFEC_write_access), |
2001 | 0 | !!(*pfec & PFEC_insn_fetch)); |
2002 | 0 |
|
2003 | 0 | if ( rv != NESTEDHVM_PAGEFAULT_DONE ) |
2004 | 0 | return gfn_x(INVALID_GFN); |
2005 | 0 |
|
2006 | 0 | /* |
2007 | 0 | * Sanity check that l1_gfn can be used properly as a 4K mapping, even |
2008 | 0 | * if it mapped by a nested superpage. |
2009 | 0 | */ |
2010 | 0 | ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) == |
2011 | 0 | (l1_gfn & ((1ul << l1_page_order) - 1))); |
2012 | 0 |
|
2013 | 0 | return l1_gfn; |
2014 | 0 | } |
2015 | 364k | |
2016 | 364k | return hostmode->gva_to_gfn(v, hostp2m, va, pfec); |
2017 | 364k | } |
2018 | | |
2019 | | /* |
2020 | | * If the map is non-NULL, we leave this function having acquired an extra ref |
2021 | | * on mfn_to_page(*mfn). In all cases, *pfec contains appropriate |
2022 | | * synthetic/structure PFEC_* bits. |
2023 | | */ |
2024 | | void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn, |
2025 | | p2m_type_t *p2mt, p2m_query_t q, uint32_t *pfec) |
2026 | 867k | { |
2027 | 867k | struct page_info *page; |
2028 | 867k | |
2029 | 867k | if ( !gfn_valid(p2m->domain, gfn) ) |
2030 | 0 | { |
2031 | 0 | *pfec = PFEC_reserved_bit | PFEC_page_present; |
2032 | 0 | return NULL; |
2033 | 0 | } |
2034 | 867k | |
2035 | 867k | /* Translate the gfn, unsharing if shared. */ |
2036 | 867k | page = p2m_get_page_from_gfn(p2m, gfn, p2mt, NULL, q); |
2037 | 867k | if ( p2m_is_paging(*p2mt) ) |
2038 | 0 | { |
2039 | 0 | ASSERT(p2m_is_hostp2m(p2m)); |
2040 | 0 | if ( page ) |
2041 | 0 | put_page(page); |
2042 | 0 | p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); |
2043 | 0 | *pfec = PFEC_page_paged; |
2044 | 0 | return NULL; |
2045 | 0 | } |
2046 | 867k | if ( p2m_is_shared(*p2mt) ) |
2047 | 0 | { |
2048 | 0 | if ( page ) |
2049 | 0 | put_page(page); |
2050 | 0 | *pfec = PFEC_page_shared; |
2051 | 0 | return NULL; |
2052 | 0 | } |
2053 | 867k | if ( !page ) |
2054 | 0 | { |
2055 | 0 | *pfec = 0; |
2056 | 0 | return NULL; |
2057 | 0 | } |
2058 | 867k | |
2059 | 867k | *pfec = PFEC_page_present; |
2060 | 867k | *mfn = page_to_mfn(page); |
2061 | 867k | ASSERT(mfn_valid(*mfn)); |
2062 | 867k | |
2063 | 867k | return map_domain_page(*mfn); |
2064 | 867k | } |
2065 | | |
2066 | | static unsigned int mmio_order(const struct domain *d, |
2067 | | unsigned long start_fn, unsigned long nr) |
2068 | 507k | { |
2069 | 507k | /* |
2070 | 507k | * Note that the !iommu_use_hap_pt() here has three effects: |
2071 | 507k | * - cover iommu_{,un}map_page() not having an "order" input yet, |
2072 | 507k | * - exclude shadow mode (which doesn't support large MMIO mappings), |
2073 | 507k | * - exclude PV guests, should execution reach this code for such. |
2074 | 507k | * So be careful when altering this. |
2075 | 507k | */ |
2076 | 507k | if ( !need_iommu(d) || !iommu_use_hap_pt(d) || |
2077 | 0 | (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) ) |
2078 | 507k | return PAGE_ORDER_4K; |
2079 | 507k | |
2080 | 0 | if ( 0 /* |
2081 | 0 | * Don't use 1Gb pages, to limit the iteration count in |
2082 | 0 | * set_typed_p2m_entry() when it needs to zap M2P entries |
2083 | 0 | * for a RAM range. |
2084 | 0 | */ && |
2085 | 0 | !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) && |
2086 | 0 | hap_has_1gb ) |
2087 | 0 | return PAGE_ORDER_1G; |
2088 | 0 |
|
2089 | 0 | if ( hap_has_2mb ) |
2090 | 0 | return PAGE_ORDER_2M; |
2091 | 0 |
|
2092 | 0 | return PAGE_ORDER_4K; |
2093 | 0 | } |
2094 | | |
2095 | 515k | #define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */ |
2096 | | |
2097 | | int map_mmio_regions(struct domain *d, |
2098 | | gfn_t start_gfn, |
2099 | | unsigned long nr, |
2100 | | mfn_t mfn) |
2101 | 4.64k | { |
2102 | 4.64k | int ret = 0; |
2103 | 4.64k | unsigned long i; |
2104 | 4.64k | unsigned int iter, order; |
2105 | 4.64k | |
2106 | 4.64k | if ( !paging_mode_translate(d) ) |
2107 | 0 | return 0; |
2108 | 4.64k | |
2109 | 293k | for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER; |
2110 | 289k | i += 1UL << order, ++iter ) |
2111 | 289k | { |
2112 | 289k | /* OR'ing gfn and mfn values will return an order suitable to both. */ |
2113 | 289k | for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ; |
2114 | 0 | order = ret - 1 ) |
2115 | 289k | { |
2116 | 289k | ret = set_mmio_p2m_entry(d, gfn_x(start_gfn) + i, |
2117 | 289k | mfn_add(mfn, i), order, |
2118 | 289k | p2m_get_hostp2m(d)->default_access); |
2119 | 289k | if ( ret <= 0 ) |
2120 | 289k | break; |
2121 | 0 | ASSERT(ret <= order); |
2122 | 0 | } |
2123 | 289k | if ( ret < 0 ) |
2124 | 0 | break; |
2125 | 289k | } |
2126 | 4.64k | |
2127 | 4.47k | return i == nr ? 0 : i ?: ret; |
2128 | 4.64k | } |
2129 | | |
2130 | | int unmap_mmio_regions(struct domain *d, |
2131 | | gfn_t start_gfn, |
2132 | | unsigned long nr, |
2133 | | mfn_t mfn) |
2134 | 3.51k | { |
2135 | 3.51k | int ret = 0; |
2136 | 3.51k | unsigned long i; |
2137 | 3.51k | unsigned int iter, order; |
2138 | 3.51k | |
2139 | 3.51k | if ( !paging_mode_translate(d) ) |
2140 | 0 | return 0; |
2141 | 3.51k | |
2142 | 221k | for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER; |
2143 | 218k | i += 1UL << order, ++iter ) |
2144 | 218k | { |
2145 | 218k | /* OR'ing gfn and mfn values will return an order suitable to both. */ |
2146 | 218k | for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ; |
2147 | 0 | order = ret - 1 ) |
2148 | 218k | { |
2149 | 218k | ret = clear_mmio_p2m_entry(d, gfn_x(start_gfn) + i, |
2150 | 218k | mfn_add(mfn, i), order); |
2151 | 218k | if ( ret <= 0 ) |
2152 | 218k | break; |
2153 | 0 | ASSERT(ret <= order); |
2154 | 0 | } |
2155 | 218k | if ( ret < 0 ) |
2156 | 0 | break; |
2157 | 218k | } |
2158 | 3.51k | |
2159 | 3.36k | return i == nr ? 0 : i ?: ret; |
2160 | 3.51k | } |
2161 | | |
2162 | | bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx) |
2163 | 0 | { |
2164 | 0 | struct domain *d = v->domain; |
2165 | 0 | bool_t rc = 0; |
2166 | 0 |
|
2167 | 0 | if ( idx >= MAX_ALTP2M ) |
2168 | 0 | return rc; |
2169 | 0 |
|
2170 | 0 | altp2m_list_lock(d); |
2171 | 0 |
|
2172 | 0 | if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) ) |
2173 | 0 | { |
2174 | 0 | if ( idx != vcpu_altp2m(v).p2midx ) |
2175 | 0 | { |
2176 | 0 | atomic_dec(&p2m_get_altp2m(v)->active_vcpus); |
2177 | 0 | vcpu_altp2m(v).p2midx = idx; |
2178 | 0 | atomic_inc(&p2m_get_altp2m(v)->active_vcpus); |
2179 | 0 | altp2m_vcpu_update_p2m(v); |
2180 | 0 | } |
2181 | 0 | rc = 1; |
2182 | 0 | } |
2183 | 0 |
|
2184 | 0 | altp2m_list_unlock(d); |
2185 | 0 | return rc; |
2186 | 0 | } |
2187 | | |
2188 | | /* |
2189 | | * If the fault is for a not present entry: |
2190 | | * if the entry in the host p2m has a valid mfn, copy it and retry |
2191 | | * else indicate that outer handler should handle fault |
2192 | | * |
2193 | | * If the fault is for a present entry: |
2194 | | * indicate that outer handler should handle fault |
2195 | | */ |
2196 | | |
2197 | | bool_t p2m_altp2m_lazy_copy(struct vcpu *v, paddr_t gpa, |
2198 | | unsigned long gla, struct npfec npfec, |
2199 | | struct p2m_domain **ap2m) |
2200 | 0 | { |
2201 | 0 | struct p2m_domain *hp2m = p2m_get_hostp2m(v->domain); |
2202 | 0 | p2m_type_t p2mt; |
2203 | 0 | p2m_access_t p2ma; |
2204 | 0 | unsigned int page_order; |
2205 | 0 | gfn_t gfn = _gfn(paddr_to_pfn(gpa)); |
2206 | 0 | unsigned long mask; |
2207 | 0 | mfn_t mfn; |
2208 | 0 | int rv; |
2209 | 0 |
|
2210 | 0 | *ap2m = p2m_get_altp2m(v); |
2211 | 0 |
|
2212 | 0 | mfn = get_gfn_type_access(*ap2m, gfn_x(gfn), &p2mt, &p2ma, |
2213 | 0 | 0, &page_order); |
2214 | 0 | __put_gfn(*ap2m, gfn_x(gfn)); |
2215 | 0 |
|
2216 | 0 | if ( !mfn_eq(mfn, INVALID_MFN) ) |
2217 | 0 | return 0; |
2218 | 0 |
|
2219 | 0 | mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma, |
2220 | 0 | P2M_ALLOC, &page_order); |
2221 | 0 | __put_gfn(hp2m, gfn_x(gfn)); |
2222 | 0 |
|
2223 | 0 | if ( mfn_eq(mfn, INVALID_MFN) ) |
2224 | 0 | return 0; |
2225 | 0 |
|
2226 | 0 | p2m_lock(*ap2m); |
2227 | 0 |
|
2228 | 0 | /* |
2229 | 0 | * If this is a superpage mapping, round down both frame numbers |
2230 | 0 | * to the start of the superpage. |
2231 | 0 | */ |
2232 | 0 | mask = ~((1UL << page_order) - 1); |
2233 | 0 | mfn = _mfn(mfn_x(mfn) & mask); |
2234 | 0 | gfn = _gfn(gfn_x(gfn) & mask); |
2235 | 0 |
|
2236 | 0 | rv = p2m_set_entry(*ap2m, gfn, mfn, page_order, p2mt, p2ma); |
2237 | 0 | p2m_unlock(*ap2m); |
2238 | 0 |
|
2239 | 0 | if ( rv ) |
2240 | 0 | { |
2241 | 0 | gdprintk(XENLOG_ERR, |
2242 | 0 | "failed to set entry for %#"PRIx64" -> %#"PRIx64" p2m %#"PRIx64"\n", |
2243 | 0 | gfn_x(gfn), mfn_x(mfn), (unsigned long)*ap2m); |
2244 | 0 | domain_crash(hp2m->domain); |
2245 | 0 | } |
2246 | 0 |
|
2247 | 0 | return 1; |
2248 | 0 | } |
2249 | | |
2250 | | void p2m_flush_altp2m(struct domain *d) |
2251 | 0 | { |
2252 | 0 | unsigned int i; |
2253 | 0 |
|
2254 | 0 | altp2m_list_lock(d); |
2255 | 0 |
|
2256 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
2257 | 0 | { |
2258 | 0 | p2m_flush_table(d->arch.altp2m_p2m[i]); |
2259 | 0 | /* Uninit and reinit ept to force TLB shootdown */ |
2260 | 0 | ept_p2m_uninit(d->arch.altp2m_p2m[i]); |
2261 | 0 | ept_p2m_init(d->arch.altp2m_p2m[i]); |
2262 | 0 | d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN); |
2263 | 0 | } |
2264 | 0 |
|
2265 | 0 | altp2m_list_unlock(d); |
2266 | 0 | } |
2267 | | |
2268 | | int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx) |
2269 | 0 | { |
2270 | 0 | int rc = -EINVAL; |
2271 | 0 |
|
2272 | 0 | if ( idx >= MAX_ALTP2M ) |
2273 | 0 | return rc; |
2274 | 0 |
|
2275 | 0 | altp2m_list_lock(d); |
2276 | 0 |
|
2277 | 0 | if ( d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) ) |
2278 | 0 | { |
2279 | 0 | p2m_init_altp2m_ept(d, idx); |
2280 | 0 | rc = 0; |
2281 | 0 | } |
2282 | 0 |
|
2283 | 0 | altp2m_list_unlock(d); |
2284 | 0 | return rc; |
2285 | 0 | } |
2286 | | |
2287 | | int p2m_init_next_altp2m(struct domain *d, uint16_t *idx) |
2288 | 0 | { |
2289 | 0 | int rc = -EINVAL; |
2290 | 0 | unsigned int i; |
2291 | 0 |
|
2292 | 0 | altp2m_list_lock(d); |
2293 | 0 |
|
2294 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
2295 | 0 | { |
2296 | 0 | if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) ) |
2297 | 0 | continue; |
2298 | 0 |
|
2299 | 0 | p2m_init_altp2m_ept(d, i); |
2300 | 0 | *idx = i; |
2301 | 0 | rc = 0; |
2302 | 0 |
|
2303 | 0 | break; |
2304 | 0 | } |
2305 | 0 |
|
2306 | 0 | altp2m_list_unlock(d); |
2307 | 0 | return rc; |
2308 | 0 | } |
2309 | | |
2310 | | int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx) |
2311 | 0 | { |
2312 | 0 | struct p2m_domain *p2m; |
2313 | 0 | int rc = -EBUSY; |
2314 | 0 |
|
2315 | 0 | if ( !idx || idx >= MAX_ALTP2M ) |
2316 | 0 | return rc; |
2317 | 0 |
|
2318 | 0 | domain_pause_except_self(d); |
2319 | 0 |
|
2320 | 0 | altp2m_list_lock(d); |
2321 | 0 |
|
2322 | 0 | if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) ) |
2323 | 0 | { |
2324 | 0 | p2m = d->arch.altp2m_p2m[idx]; |
2325 | 0 |
|
2326 | 0 | if ( !_atomic_read(p2m->active_vcpus) ) |
2327 | 0 | { |
2328 | 0 | p2m_flush_table(d->arch.altp2m_p2m[idx]); |
2329 | 0 | /* Uninit and reinit ept to force TLB shootdown */ |
2330 | 0 | ept_p2m_uninit(d->arch.altp2m_p2m[idx]); |
2331 | 0 | ept_p2m_init(d->arch.altp2m_p2m[idx]); |
2332 | 0 | d->arch.altp2m_eptp[idx] = mfn_x(INVALID_MFN); |
2333 | 0 | rc = 0; |
2334 | 0 | } |
2335 | 0 | } |
2336 | 0 |
|
2337 | 0 | altp2m_list_unlock(d); |
2338 | 0 |
|
2339 | 0 | domain_unpause_except_self(d); |
2340 | 0 |
|
2341 | 0 | return rc; |
2342 | 0 | } |
2343 | | |
2344 | | int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx) |
2345 | 0 | { |
2346 | 0 | struct vcpu *v; |
2347 | 0 | int rc = -EINVAL; |
2348 | 0 |
|
2349 | 0 | if ( idx >= MAX_ALTP2M ) |
2350 | 0 | return rc; |
2351 | 0 |
|
2352 | 0 | domain_pause_except_self(d); |
2353 | 0 |
|
2354 | 0 | altp2m_list_lock(d); |
2355 | 0 |
|
2356 | 0 | if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) ) |
2357 | 0 | { |
2358 | 0 | for_each_vcpu( d, v ) |
2359 | 0 | if ( idx != vcpu_altp2m(v).p2midx ) |
2360 | 0 | { |
2361 | 0 | atomic_dec(&p2m_get_altp2m(v)->active_vcpus); |
2362 | 0 | vcpu_altp2m(v).p2midx = idx; |
2363 | 0 | atomic_inc(&p2m_get_altp2m(v)->active_vcpus); |
2364 | 0 | altp2m_vcpu_update_p2m(v); |
2365 | 0 | } |
2366 | 0 |
|
2367 | 0 | rc = 0; |
2368 | 0 | } |
2369 | 0 |
|
2370 | 0 | altp2m_list_unlock(d); |
2371 | 0 |
|
2372 | 0 | domain_unpause_except_self(d); |
2373 | 0 |
|
2374 | 0 | return rc; |
2375 | 0 | } |
2376 | | |
2377 | | int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx, |
2378 | | gfn_t old_gfn, gfn_t new_gfn) |
2379 | 0 | { |
2380 | 0 | struct p2m_domain *hp2m, *ap2m; |
2381 | 0 | p2m_access_t a; |
2382 | 0 | p2m_type_t t; |
2383 | 0 | mfn_t mfn; |
2384 | 0 | unsigned int page_order; |
2385 | 0 | int rc = -EINVAL; |
2386 | 0 |
|
2387 | 0 | if ( idx >= MAX_ALTP2M || d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) ) |
2388 | 0 | return rc; |
2389 | 0 |
|
2390 | 0 | hp2m = p2m_get_hostp2m(d); |
2391 | 0 | ap2m = d->arch.altp2m_p2m[idx]; |
2392 | 0 |
|
2393 | 0 | p2m_lock(hp2m); |
2394 | 0 | p2m_lock(ap2m); |
2395 | 0 |
|
2396 | 0 | mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL); |
2397 | 0 |
|
2398 | 0 | if ( gfn_eq(new_gfn, INVALID_GFN) ) |
2399 | 0 | { |
2400 | 0 | if ( mfn_valid(mfn) ) |
2401 | 0 | p2m_remove_page(ap2m, gfn_x(old_gfn), mfn_x(mfn), PAGE_ORDER_4K); |
2402 | 0 | rc = 0; |
2403 | 0 | goto out; |
2404 | 0 | } |
2405 | 0 |
|
2406 | 0 | /* Check host p2m if no valid entry in alternate */ |
2407 | 0 | if ( !mfn_valid(mfn) ) |
2408 | 0 | { |
2409 | 0 | mfn = __get_gfn_type_access(hp2m, gfn_x(old_gfn), &t, &a, |
2410 | 0 | P2M_ALLOC, &page_order, 0); |
2411 | 0 |
|
2412 | 0 | if ( !mfn_valid(mfn) || t != p2m_ram_rw ) |
2413 | 0 | goto out; |
2414 | 0 |
|
2415 | 0 | /* If this is a superpage, copy that first */ |
2416 | 0 | if ( page_order != PAGE_ORDER_4K ) |
2417 | 0 | { |
2418 | 0 | gfn_t gfn; |
2419 | 0 | unsigned long mask; |
2420 | 0 |
|
2421 | 0 | mask = ~((1UL << page_order) - 1); |
2422 | 0 | gfn = _gfn(gfn_x(old_gfn) & mask); |
2423 | 0 | mfn = _mfn(mfn_x(mfn) & mask); |
2424 | 0 |
|
2425 | 0 | if ( ap2m->set_entry(ap2m, gfn, mfn, page_order, t, a, 1) ) |
2426 | 0 | goto out; |
2427 | 0 | } |
2428 | 0 | } |
2429 | 0 |
|
2430 | 0 | mfn = ap2m->get_entry(ap2m, new_gfn, &t, &a, 0, NULL, NULL); |
2431 | 0 |
|
2432 | 0 | if ( !mfn_valid(mfn) ) |
2433 | 0 | mfn = hp2m->get_entry(hp2m, new_gfn, &t, &a, 0, NULL, NULL); |
2434 | 0 |
|
2435 | 0 | /* Note: currently it is not safe to remap to a shared entry */ |
2436 | 0 | if ( !mfn_valid(mfn) || (t != p2m_ram_rw) ) |
2437 | 0 | goto out; |
2438 | 0 |
|
2439 | 0 | if ( !ap2m->set_entry(ap2m, old_gfn, mfn, PAGE_ORDER_4K, t, a, |
2440 | 0 | (current->domain != d)) ) |
2441 | 0 | { |
2442 | 0 | rc = 0; |
2443 | 0 |
|
2444 | 0 | if ( gfn_x(new_gfn) < ap2m->min_remapped_gfn ) |
2445 | 0 | ap2m->min_remapped_gfn = gfn_x(new_gfn); |
2446 | 0 | if ( gfn_x(new_gfn) > ap2m->max_remapped_gfn ) |
2447 | 0 | ap2m->max_remapped_gfn = gfn_x(new_gfn); |
2448 | 0 | } |
2449 | 0 |
|
2450 | 0 | out: |
2451 | 0 | p2m_unlock(ap2m); |
2452 | 0 | p2m_unlock(hp2m); |
2453 | 0 | return rc; |
2454 | 0 | } |
2455 | | |
2456 | | static void p2m_reset_altp2m(struct p2m_domain *p2m) |
2457 | 0 | { |
2458 | 0 | p2m_flush_table(p2m); |
2459 | 0 | /* Uninit and reinit ept to force TLB shootdown */ |
2460 | 0 | ept_p2m_uninit(p2m); |
2461 | 0 | ept_p2m_init(p2m); |
2462 | 0 | p2m->min_remapped_gfn = gfn_x(INVALID_GFN); |
2463 | 0 | p2m->max_remapped_gfn = 0; |
2464 | 0 | } |
2465 | | |
2466 | | void p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn, |
2467 | | mfn_t mfn, unsigned int page_order, |
2468 | | p2m_type_t p2mt, p2m_access_t p2ma) |
2469 | 1.06M | { |
2470 | 1.06M | struct p2m_domain *p2m; |
2471 | 1.06M | p2m_access_t a; |
2472 | 1.06M | p2m_type_t t; |
2473 | 1.06M | mfn_t m; |
2474 | 1.06M | unsigned int i; |
2475 | 1.06M | unsigned int reset_count = 0; |
2476 | 1.06M | unsigned int last_reset_idx = ~0; |
2477 | 1.06M | |
2478 | 1.06M | if ( !altp2m_active(d) ) |
2479 | 1.06M | return; |
2480 | 1.06M | |
2481 | 0 | altp2m_list_lock(d); |
2482 | 0 |
|
2483 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
2484 | 0 | { |
2485 | 0 | if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) ) |
2486 | 0 | continue; |
2487 | 0 |
|
2488 | 0 | p2m = d->arch.altp2m_p2m[i]; |
2489 | 0 | m = get_gfn_type_access(p2m, gfn_x(gfn), &t, &a, 0, NULL); |
2490 | 0 |
|
2491 | 0 | /* Check for a dropped page that may impact this altp2m */ |
2492 | 0 | if ( mfn_eq(mfn, INVALID_MFN) && |
2493 | 0 | gfn_x(gfn) >= p2m->min_remapped_gfn && |
2494 | 0 | gfn_x(gfn) <= p2m->max_remapped_gfn ) |
2495 | 0 | { |
2496 | 0 | if ( !reset_count++ ) |
2497 | 0 | { |
2498 | 0 | p2m_reset_altp2m(p2m); |
2499 | 0 | last_reset_idx = i; |
2500 | 0 | } |
2501 | 0 | else |
2502 | 0 | { |
2503 | 0 | /* At least 2 altp2m's impacted, so reset everything */ |
2504 | 0 | __put_gfn(p2m, gfn_x(gfn)); |
2505 | 0 |
|
2506 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
2507 | 0 | { |
2508 | 0 | if ( i == last_reset_idx || |
2509 | 0 | d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) ) |
2510 | 0 | continue; |
2511 | 0 |
|
2512 | 0 | p2m = d->arch.altp2m_p2m[i]; |
2513 | 0 | p2m_lock(p2m); |
2514 | 0 | p2m_reset_altp2m(p2m); |
2515 | 0 | p2m_unlock(p2m); |
2516 | 0 | } |
2517 | 0 |
|
2518 | 0 | goto out; |
2519 | 0 | } |
2520 | 0 | } |
2521 | 0 | else if ( !mfn_eq(m, INVALID_MFN) ) |
2522 | 0 | p2m_set_entry(p2m, gfn, mfn, page_order, p2mt, p2ma); |
2523 | 0 |
|
2524 | 0 | __put_gfn(p2m, gfn_x(gfn)); |
2525 | 0 | } |
2526 | 0 |
|
2527 | 0 | out: |
2528 | 0 | altp2m_list_unlock(d); |
2529 | 0 | } |
2530 | | |
2531 | | /*** Audit ***/ |
2532 | | |
2533 | | #if P2M_AUDIT |
2534 | | void audit_p2m(struct domain *d, |
2535 | | uint64_t *orphans, |
2536 | | uint64_t *m2p_bad, |
2537 | | uint64_t *p2m_bad) |
2538 | 0 | { |
2539 | 0 | struct page_info *page; |
2540 | 0 | struct domain *od; |
2541 | 0 | unsigned long mfn, gfn; |
2542 | 0 | mfn_t p2mfn; |
2543 | 0 | unsigned long orphans_count = 0, mpbad = 0, pmbad = 0; |
2544 | 0 | p2m_access_t p2ma; |
2545 | 0 | p2m_type_t type; |
2546 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
2547 | 0 |
|
2548 | 0 | if ( !paging_mode_translate(d) ) |
2549 | 0 | goto out_p2m_audit; |
2550 | 0 |
|
2551 | 0 | P2M_PRINTK("p2m audit starts\n"); |
2552 | 0 |
|
2553 | 0 | p2m_lock(p2m); |
2554 | 0 | pod_lock(p2m); |
2555 | 0 |
|
2556 | 0 | if (p2m->audit_p2m) |
2557 | 0 | pmbad = p2m->audit_p2m(p2m); |
2558 | 0 |
|
2559 | 0 | /* Audit part two: walk the domain's page allocation list, checking |
2560 | 0 | * the m2p entries. */ |
2561 | 0 | spin_lock(&d->page_alloc_lock); |
2562 | 0 | page_list_for_each ( page, &d->page_list ) |
2563 | 0 | { |
2564 | 0 | mfn = mfn_x(page_to_mfn(page)); |
2565 | 0 |
|
2566 | 0 | P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn); |
2567 | 0 |
|
2568 | 0 | od = page_get_owner(page); |
2569 | 0 |
|
2570 | 0 | if ( od != d ) |
2571 | 0 | { |
2572 | 0 | P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", |
2573 | 0 | mfn, od, (od?od->domain_id:-1), d, d->domain_id); |
2574 | 0 | continue; |
2575 | 0 | } |
2576 | 0 |
|
2577 | 0 | gfn = get_gpfn_from_mfn(mfn); |
2578 | 0 | if ( gfn == INVALID_M2P_ENTRY ) |
2579 | 0 | { |
2580 | 0 | orphans_count++; |
2581 | 0 | P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", |
2582 | 0 | mfn); |
2583 | 0 | continue; |
2584 | 0 | } |
2585 | 0 |
|
2586 | 0 | if ( gfn == SHARED_M2P_ENTRY ) |
2587 | 0 | { |
2588 | 0 | P2M_PRINTK("shared mfn (%lx) on domain page list!\n", |
2589 | 0 | mfn); |
2590 | 0 | continue; |
2591 | 0 | } |
2592 | 0 |
|
2593 | 0 | p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, 0, NULL); |
2594 | 0 | if ( mfn_x(p2mfn) != mfn ) |
2595 | 0 | { |
2596 | 0 | mpbad++; |
2597 | 0 | P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" |
2598 | 0 | " (-> gfn %#lx)\n", |
2599 | 0 | mfn, gfn, mfn_x(p2mfn), |
2600 | 0 | (mfn_valid(p2mfn) |
2601 | 0 | ? get_gpfn_from_mfn(mfn_x(p2mfn)) |
2602 | 0 | : -1u)); |
2603 | 0 | /* This m2p entry is stale: the domain has another frame in |
2604 | 0 | * this physical slot. No great disaster, but for neatness, |
2605 | 0 | * blow away the m2p entry. */ |
2606 | 0 | set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); |
2607 | 0 | } |
2608 | 0 | __put_gfn(p2m, gfn); |
2609 | 0 |
|
2610 | 0 | P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx\n", |
2611 | 0 | mfn, gfn, mfn_x(p2mfn)); |
2612 | 0 | } |
2613 | 0 | spin_unlock(&d->page_alloc_lock); |
2614 | 0 |
|
2615 | 0 | pod_unlock(p2m); |
2616 | 0 | p2m_unlock(p2m); |
2617 | 0 | |
2618 | 0 | P2M_PRINTK("p2m audit complete\n"); |
2619 | 0 | if ( orphans_count | mpbad | pmbad ) |
2620 | 0 | P2M_PRINTK("p2m audit found %lu orphans\n", orphans_count); |
2621 | 0 | if ( mpbad | pmbad ) |
2622 | 0 | { |
2623 | 0 | P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", |
2624 | 0 | pmbad, mpbad); |
2625 | 0 | WARN(); |
2626 | 0 | } |
2627 | 0 |
|
2628 | 0 | out_p2m_audit: |
2629 | 0 | *orphans = (uint64_t) orphans_count; |
2630 | 0 | *m2p_bad = (uint64_t) mpbad; |
2631 | 0 | *p2m_bad = (uint64_t) pmbad; |
2632 | 0 | } |
2633 | | #endif /* P2M_AUDIT */ |
2634 | | |
2635 | | /* |
2636 | | * Add frame from foreign domain to target domain's physmap. Similar to |
2637 | | * XENMAPSPACE_gmfn but the frame is foreign being mapped into current, |
2638 | | * and is not removed from foreign domain. |
2639 | | * |
2640 | | * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap. |
2641 | | * - xentrace running on dom0 mapping xenheap pages. foreigndom would |
2642 | | * be DOMID_XEN in such a case. |
2643 | | * etc.. |
2644 | | * |
2645 | | * Side Effect: the mfn for fgfn will be refcounted in lower level routines |
2646 | | * so it is not lost while mapped here. The refcnt is released |
2647 | | * via the XENMEM_remove_from_physmap path. |
2648 | | * |
2649 | | * Returns: 0 ==> success |
2650 | | */ |
2651 | | int p2m_add_foreign(struct domain *tdom, unsigned long fgfn, |
2652 | | unsigned long gpfn, domid_t foreigndom) |
2653 | 0 | { |
2654 | 0 | p2m_type_t p2mt, p2mt_prev; |
2655 | 0 | mfn_t prev_mfn, mfn; |
2656 | 0 | struct page_info *page; |
2657 | 0 | int rc; |
2658 | 0 | struct domain *fdom; |
2659 | 0 |
|
2660 | 0 | ASSERT(tdom); |
2661 | 0 | if ( foreigndom == DOMID_SELF ) |
2662 | 0 | return -EINVAL; |
2663 | 0 | /* |
2664 | 0 | * hvm fixme: until support is added to p2m teardown code to cleanup any |
2665 | 0 | * foreign entries, limit this to hardware domain only. |
2666 | 0 | */ |
2667 | 0 | if ( !is_hardware_domain(tdom) ) |
2668 | 0 | return -EPERM; |
2669 | 0 |
|
2670 | 0 | if ( foreigndom == DOMID_XEN ) |
2671 | 0 | fdom = rcu_lock_domain(dom_xen); |
2672 | 0 | else |
2673 | 0 | fdom = rcu_lock_domain_by_id(foreigndom); |
2674 | 0 | if ( fdom == NULL ) |
2675 | 0 | return -ESRCH; |
2676 | 0 |
|
2677 | 0 | rc = -EINVAL; |
2678 | 0 | if ( tdom == fdom ) |
2679 | 0 | goto out; |
2680 | 0 |
|
2681 | 0 | rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom); |
2682 | 0 | if ( rc ) |
2683 | 0 | goto out; |
2684 | 0 |
|
2685 | 0 | /* |
2686 | 0 | * Take a refcnt on the mfn. NB: following supported for foreign mapping: |
2687 | 0 | * ram_rw | ram_logdirty | ram_ro | paging_out. |
2688 | 0 | */ |
2689 | 0 | page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC); |
2690 | 0 | if ( !page || |
2691 | 0 | !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) ) |
2692 | 0 | { |
2693 | 0 | if ( page ) |
2694 | 0 | put_page(page); |
2695 | 0 | rc = -EINVAL; |
2696 | 0 | goto out; |
2697 | 0 | } |
2698 | 0 | mfn = page_to_mfn(page); |
2699 | 0 |
|
2700 | 0 | /* Remove previously mapped page if it is present. */ |
2701 | 0 | prev_mfn = get_gfn(tdom, gpfn, &p2mt_prev); |
2702 | 0 | if ( mfn_valid(prev_mfn) ) |
2703 | 0 | { |
2704 | 0 | if ( is_xen_heap_mfn(mfn_x(prev_mfn)) ) |
2705 | 0 | /* Xen heap frames are simply unhooked from this phys slot */ |
2706 | 0 | rc = guest_physmap_remove_page(tdom, _gfn(gpfn), prev_mfn, 0); |
2707 | 0 | else |
2708 | 0 | /* Normal domain memory is freed, to avoid leaking memory. */ |
2709 | 0 | rc = guest_remove_page(tdom, gpfn); |
2710 | 0 | if ( rc ) |
2711 | 0 | goto put_both; |
2712 | 0 | } |
2713 | 0 | /* |
2714 | 0 | * Create the new mapping. Can't use guest_physmap_add_page() because it |
2715 | 0 | * will update the m2p table which will result in mfn -> gpfn of dom0 |
2716 | 0 | * and not fgfn of domU. |
2717 | 0 | */ |
2718 | 0 | rc = set_foreign_p2m_entry(tdom, gpfn, mfn); |
2719 | 0 | if ( rc ) |
2720 | 0 | gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. " |
2721 | 0 | "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n", |
2722 | 0 | gpfn, mfn_x(mfn), fgfn, tdom->domain_id, fdom->domain_id); |
2723 | 0 |
|
2724 | 0 | put_both: |
2725 | 0 | put_page(page); |
2726 | 0 |
|
2727 | 0 | /* |
2728 | 0 | * This put_gfn for the above get_gfn for prev_mfn. We must do this |
2729 | 0 | * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn |
2730 | 0 | * before us. |
2731 | 0 | */ |
2732 | 0 | put_gfn(tdom, gpfn); |
2733 | 0 |
|
2734 | 0 | out: |
2735 | 0 | if ( fdom ) |
2736 | 0 | rcu_unlock_domain(fdom); |
2737 | 0 | return rc; |
2738 | 0 | } |
2739 | | /* |
2740 | | * Local variables: |
2741 | | * mode: C |
2742 | | * c-file-style: "BSD" |
2743 | | * c-basic-offset: 4 |
2744 | | * indent-tabs-mode: nil |
2745 | | * End: |
2746 | | */ |