/root/src/xen/xen/arch/x86/mm/mem_sharing.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/mm/mem_sharing.c |
3 | | * |
4 | | * Memory sharing support. |
5 | | * |
6 | | * Copyright (c) 2011 GridCentric, Inc. (Adin Scannell & Andres Lagar-Cavilla) |
7 | | * Copyright (c) 2009 Citrix Systems, Inc. (Grzegorz Milos) |
8 | | * |
9 | | * This program is free software; you can redistribute it and/or modify |
10 | | * it under the terms of the GNU General Public License as published by |
11 | | * the Free Software Foundation; either version 2 of the License, or |
12 | | * (at your option) any later version. |
13 | | * |
14 | | * This program is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | * GNU General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU General Public License |
20 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
21 | | */ |
22 | | |
23 | | #include <xen/types.h> |
24 | | #include <xen/domain_page.h> |
25 | | #include <xen/spinlock.h> |
26 | | #include <xen/rwlock.h> |
27 | | #include <xen/mm.h> |
28 | | #include <xen/grant_table.h> |
29 | | #include <xen/sched.h> |
30 | | #include <xen/rcupdate.h> |
31 | | #include <xen/guest_access.h> |
32 | | #include <xen/vm_event.h> |
33 | | #include <asm/page.h> |
34 | | #include <asm/string.h> |
35 | | #include <asm/p2m.h> |
36 | | #include <asm/altp2m.h> |
37 | | #include <asm/atomic.h> |
38 | | #include <asm/event.h> |
39 | | #include <xsm/xsm.h> |
40 | | |
41 | | #include "mm-locks.h" |
42 | | |
43 | | static shr_handle_t next_handle = 1; |
44 | | |
45 | | typedef struct pg_lock_data { |
46 | | int mm_unlock_level; |
47 | | unsigned short recurse_count; |
48 | | } pg_lock_data_t; |
49 | | |
50 | | static DEFINE_PER_CPU(pg_lock_data_t, __pld); |
51 | | |
52 | | #define MEM_SHARING_DEBUG(_f, _a...) \ |
53 | 0 | debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a) |
54 | | |
55 | | /* Reverse map defines */ |
56 | 0 | #define RMAP_HASHTAB_ORDER 0 |
57 | | #define RMAP_HASHTAB_SIZE \ |
58 | 0 | ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head)) |
59 | | #define RMAP_USES_HASHTAB(page) \ |
60 | 0 | ((page)->sharing->hash_table.flag == NULL) |
61 | 0 | #define RMAP_HEAVY_SHARED_PAGE RMAP_HASHTAB_SIZE |
62 | | /* A bit of hysteresis. We don't want to be mutating between list and hash |
63 | | * table constantly. */ |
64 | 0 | #define RMAP_LIGHT_SHARED_PAGE (RMAP_HEAVY_SHARED_PAGE >> 2) |
65 | | |
66 | | #if MEM_SHARING_AUDIT |
67 | | |
68 | | static struct list_head shr_audit_list; |
69 | | static spinlock_t shr_audit_lock; |
70 | | static DEFINE_RCU_READ_LOCK(shr_audit_read_lock); |
71 | | |
72 | | /* RCU delayed free of audit list entry */ |
73 | | static void _free_pg_shared_info(struct rcu_head *head) |
74 | 0 | { |
75 | 0 | xfree(container_of(head, struct page_sharing_info, rcu_head)); |
76 | 0 | } |
77 | | |
78 | | static inline void audit_add_list(struct page_info *page) |
79 | 0 | { |
80 | 0 | INIT_LIST_HEAD(&page->sharing->entry); |
81 | 0 | spin_lock(&shr_audit_lock); |
82 | 0 | list_add_rcu(&page->sharing->entry, &shr_audit_list); |
83 | 0 | spin_unlock(&shr_audit_lock); |
84 | 0 | } |
85 | | |
86 | | /* Removes from the audit list and cleans up the page sharing metadata. */ |
87 | | static inline void page_sharing_dispose(struct page_info *page) |
88 | 0 | { |
89 | 0 | /* Unlikely given our thresholds, but we should be careful. */ |
90 | 0 | if ( unlikely(RMAP_USES_HASHTAB(page)) ) |
91 | 0 | free_xenheap_pages(page->sharing->hash_table.bucket, |
92 | 0 | RMAP_HASHTAB_ORDER); |
93 | 0 |
|
94 | 0 | spin_lock(&shr_audit_lock); |
95 | 0 | list_del_rcu(&page->sharing->entry); |
96 | 0 | spin_unlock(&shr_audit_lock); |
97 | 0 | INIT_RCU_HEAD(&page->sharing->rcu_head); |
98 | 0 | call_rcu(&page->sharing->rcu_head, _free_pg_shared_info); |
99 | 0 | } |
100 | | |
101 | | #else |
102 | | |
103 | | #define audit_add_list(p) ((void)0) |
104 | | static inline void page_sharing_dispose(struct page_info *page) |
105 | | { |
106 | | /* Unlikely given our thresholds, but we should be careful. */ |
107 | | if ( unlikely(RMAP_USES_HASHTAB(page)) ) |
108 | | free_xenheap_pages(page->sharing->hash_table.bucket, |
109 | | RMAP_HASHTAB_ORDER); |
110 | | xfree(page->sharing); |
111 | | } |
112 | | |
113 | | #endif /* MEM_SHARING_AUDIT */ |
114 | | |
115 | | static inline int mem_sharing_page_lock(struct page_info *pg) |
116 | 0 | { |
117 | 0 | int rc; |
118 | 0 | pg_lock_data_t *pld = &(this_cpu(__pld)); |
119 | 0 |
|
120 | 0 | page_sharing_mm_pre_lock(); |
121 | 0 | rc = page_lock(pg); |
122 | 0 | if ( rc ) |
123 | 0 | { |
124 | 0 | preempt_disable(); |
125 | 0 | page_sharing_mm_post_lock(&pld->mm_unlock_level, |
126 | 0 | &pld->recurse_count); |
127 | 0 | } |
128 | 0 | return rc; |
129 | 0 | } |
130 | | |
131 | | static inline void mem_sharing_page_unlock(struct page_info *pg) |
132 | 0 | { |
133 | 0 | pg_lock_data_t *pld = &(this_cpu(__pld)); |
134 | 0 |
|
135 | 0 | page_sharing_mm_unlock(pld->mm_unlock_level, |
136 | 0 | &pld->recurse_count); |
137 | 0 | preempt_enable(); |
138 | 0 | page_unlock(pg); |
139 | 0 | } |
140 | | |
141 | | static inline shr_handle_t get_next_handle(void) |
142 | 0 | { |
143 | 0 | /* Get the next handle get_page style */ |
144 | 0 | uint64_t x, y = next_handle; |
145 | 0 | do { |
146 | 0 | x = y; |
147 | 0 | } |
148 | 0 | while ( (y = cmpxchg(&next_handle, x, x + 1)) != x ); |
149 | 0 | return x + 1; |
150 | 0 | } |
151 | | |
152 | | #define mem_sharing_enabled(d) \ |
153 | 0 | (is_hvm_domain(d) && (d)->arch.hvm_domain.mem_sharing_enabled) |
154 | | |
155 | | #undef mfn_to_page |
156 | 0 | #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) |
157 | | #undef page_to_mfn |
158 | 0 | #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) |
159 | | |
160 | | static atomic_t nr_saved_mfns = ATOMIC_INIT(0); |
161 | | static atomic_t nr_shared_mfns = ATOMIC_INIT(0); |
162 | | |
163 | | /** Reverse map **/ |
164 | | /* Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that |
165 | | * this shared frame backs. For pages with a low degree of sharing, a O(n) |
166 | | * search linked list is good enough. For pages with higher degree of sharing, |
167 | | * we use a hash table instead. */ |
168 | | |
169 | | typedef struct gfn_info |
170 | | { |
171 | | unsigned long gfn; |
172 | | domid_t domain; |
173 | | struct list_head list; |
174 | | } gfn_info_t; |
175 | | |
176 | | static inline void |
177 | | rmap_init(struct page_info *page) |
178 | 0 | { |
179 | 0 | /* We always start off as a doubly linked list. */ |
180 | 0 | INIT_LIST_HEAD(&page->sharing->gfns); |
181 | 0 | } |
182 | | |
183 | | /* Exceedingly simple "hash function" */ |
184 | | #define HASH(domain, gfn) \ |
185 | 0 | (((gfn) + (domain)) % RMAP_HASHTAB_SIZE) |
186 | | |
187 | | /* Conversions. Tuned by the thresholds. Should only happen twice |
188 | | * (once each) during the lifetime of a shared page */ |
189 | | static inline int |
190 | | rmap_list_to_hash_table(struct page_info *page) |
191 | 0 | { |
192 | 0 | unsigned int i; |
193 | 0 | struct list_head *pos, *tmp, *b = |
194 | 0 | alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0); |
195 | 0 |
|
196 | 0 | if ( b == NULL ) |
197 | 0 | return -ENOMEM; |
198 | 0 |
|
199 | 0 | for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ ) |
200 | 0 | INIT_LIST_HEAD(b + i); |
201 | 0 |
|
202 | 0 | list_for_each_safe(pos, tmp, &page->sharing->gfns) |
203 | 0 | { |
204 | 0 | gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list); |
205 | 0 | struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn); |
206 | 0 | list_del(pos); |
207 | 0 | list_add(pos, bucket); |
208 | 0 | } |
209 | 0 |
|
210 | 0 | page->sharing->hash_table.bucket = b; |
211 | 0 | page->sharing->hash_table.flag = NULL; |
212 | 0 |
|
213 | 0 | return 0; |
214 | 0 | } |
215 | | |
216 | | static inline void |
217 | | rmap_hash_table_to_list(struct page_info *page) |
218 | 0 | { |
219 | 0 | unsigned int i; |
220 | 0 | struct list_head *bucket = page->sharing->hash_table.bucket; |
221 | 0 |
|
222 | 0 | INIT_LIST_HEAD(&page->sharing->gfns); |
223 | 0 |
|
224 | 0 | for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ ) |
225 | 0 | { |
226 | 0 | struct list_head *pos, *tmp, *head = bucket + i; |
227 | 0 | list_for_each_safe(pos, tmp, head) |
228 | 0 | { |
229 | 0 | list_del(pos); |
230 | 0 | list_add(pos, &page->sharing->gfns); |
231 | 0 | } |
232 | 0 | } |
233 | 0 |
|
234 | 0 | free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER); |
235 | 0 | } |
236 | | |
237 | | /* Generic accessors to the rmap */ |
238 | | static inline unsigned long |
239 | | rmap_count(struct page_info *pg) |
240 | 0 | { |
241 | 0 | unsigned long count; |
242 | 0 | unsigned long t = read_atomic(&pg->u.inuse.type_info); |
243 | 0 | count = t & PGT_count_mask; |
244 | 0 | if ( t & PGT_locked ) |
245 | 0 | count--; |
246 | 0 | return count; |
247 | 0 | } |
248 | | |
249 | | /* The page type count is always decreased after removing from the rmap. |
250 | | * Use a convert flag to avoid mutating the rmap if in the middle of an |
251 | | * iterator, or if the page will be soon destroyed anyways. */ |
252 | | static inline void |
253 | | rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert) |
254 | 0 | { |
255 | 0 | if ( RMAP_USES_HASHTAB(page) && convert && |
256 | 0 | (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) ) |
257 | 0 | rmap_hash_table_to_list(page); |
258 | 0 |
|
259 | 0 | /* Regardless of rmap type, same removal operation */ |
260 | 0 | list_del(&gfn_info->list); |
261 | 0 | } |
262 | | |
263 | | /* The page type count is always increased before adding to the rmap. */ |
264 | | static inline void |
265 | | rmap_add(gfn_info_t *gfn_info, struct page_info *page) |
266 | 0 | { |
267 | 0 | struct list_head *head; |
268 | 0 |
|
269 | 0 | if ( !RMAP_USES_HASHTAB(page) && |
270 | 0 | (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) ) |
271 | 0 | /* The conversion may fail with ENOMEM. We'll be less efficient, |
272 | 0 | * but no reason to panic. */ |
273 | 0 | (void)rmap_list_to_hash_table(page); |
274 | 0 |
|
275 | 0 | head = (RMAP_USES_HASHTAB(page)) ? |
276 | 0 | page->sharing->hash_table.bucket + |
277 | 0 | HASH(gfn_info->domain, gfn_info->gfn) : |
278 | 0 | &page->sharing->gfns; |
279 | 0 |
|
280 | 0 | INIT_LIST_HEAD(&gfn_info->list); |
281 | 0 | list_add(&gfn_info->list, head); |
282 | 0 | } |
283 | | |
284 | | static inline gfn_info_t * |
285 | | rmap_retrieve(uint16_t domain_id, unsigned long gfn, |
286 | | struct page_info *page) |
287 | 0 | { |
288 | 0 | gfn_info_t *gfn_info; |
289 | 0 | struct list_head *le, *head; |
290 | 0 |
|
291 | 0 | head = (RMAP_USES_HASHTAB(page)) ? |
292 | 0 | page->sharing->hash_table.bucket + HASH(domain_id, gfn) : |
293 | 0 | &page->sharing->gfns; |
294 | 0 |
|
295 | 0 | list_for_each(le, head) |
296 | 0 | { |
297 | 0 | gfn_info = list_entry(le, gfn_info_t, list); |
298 | 0 | if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) ) |
299 | 0 | return gfn_info; |
300 | 0 | } |
301 | 0 |
|
302 | 0 | /* Nothing was found */ |
303 | 0 | return NULL; |
304 | 0 | } |
305 | | |
306 | | /* Returns true if the rmap has only one entry. O(1) complexity. */ |
307 | | static inline int rmap_has_one_entry(struct page_info *page) |
308 | 0 | { |
309 | 0 | return (rmap_count(page) == 1); |
310 | 0 | } |
311 | | |
312 | | /* Returns true if the rmap has any entries. O(1) complexity. */ |
313 | | static inline int rmap_has_entries(struct page_info *page) |
314 | 0 | { |
315 | 0 | return (rmap_count(page) != 0); |
316 | 0 | } |
317 | | |
318 | | /* The iterator hides the details of how the rmap is implemented. This |
319 | | * involves splitting the list_for_each_safe macro into two steps. */ |
320 | | struct rmap_iterator { |
321 | | struct list_head *curr; |
322 | | struct list_head *next; |
323 | | unsigned int bucket; |
324 | | }; |
325 | | |
326 | | static inline void |
327 | | rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri) |
328 | 0 | { |
329 | 0 | ri->curr = (RMAP_USES_HASHTAB(page)) ? |
330 | 0 | page->sharing->hash_table.bucket : |
331 | 0 | &page->sharing->gfns; |
332 | 0 | ri->next = ri->curr->next; |
333 | 0 | ri->bucket = 0; |
334 | 0 | } |
335 | | |
336 | | static inline gfn_info_t * |
337 | | rmap_iterate(struct page_info *page, struct rmap_iterator *ri) |
338 | 0 | { |
339 | 0 | struct list_head *head = (RMAP_USES_HASHTAB(page)) ? |
340 | 0 | page->sharing->hash_table.bucket + ri->bucket : |
341 | 0 | &page->sharing->gfns; |
342 | 0 |
|
343 | 0 | retry: |
344 | 0 | if ( ri->next == head) |
345 | 0 | { |
346 | 0 | if ( RMAP_USES_HASHTAB(page) ) |
347 | 0 | { |
348 | 0 | ri->bucket++; |
349 | 0 | if ( ri->bucket >= RMAP_HASHTAB_SIZE ) |
350 | 0 | /* No more hash table buckets */ |
351 | 0 | return NULL; |
352 | 0 | head = page->sharing->hash_table.bucket + ri->bucket; |
353 | 0 | ri->curr = head; |
354 | 0 | ri->next = ri->curr->next; |
355 | 0 | goto retry; |
356 | 0 | } else |
357 | 0 | /* List exhausted */ |
358 | 0 | return NULL; |
359 | 0 | } |
360 | 0 |
|
361 | 0 | ri->curr = ri->next; |
362 | 0 | ri->next = ri->curr->next; |
363 | 0 |
|
364 | 0 | return list_entry(ri->curr, gfn_info_t, list); |
365 | 0 | } |
366 | | |
367 | | static inline gfn_info_t *mem_sharing_gfn_alloc(struct page_info *page, |
368 | | struct domain *d, |
369 | | unsigned long gfn) |
370 | 0 | { |
371 | 0 | gfn_info_t *gfn_info = xmalloc(gfn_info_t); |
372 | 0 |
|
373 | 0 | if ( gfn_info == NULL ) |
374 | 0 | return NULL; |
375 | 0 |
|
376 | 0 | gfn_info->gfn = gfn; |
377 | 0 | gfn_info->domain = d->domain_id; |
378 | 0 |
|
379 | 0 | rmap_add(gfn_info, page); |
380 | 0 |
|
381 | 0 | /* Increment our number of shared pges. */ |
382 | 0 | atomic_inc(&d->shr_pages); |
383 | 0 |
|
384 | 0 | return gfn_info; |
385 | 0 | } |
386 | | |
387 | | static inline void mem_sharing_gfn_destroy(struct page_info *page, |
388 | | struct domain *d, |
389 | | gfn_info_t *gfn_info) |
390 | 0 | { |
391 | 0 | /* Decrement the number of pages. */ |
392 | 0 | atomic_dec(&d->shr_pages); |
393 | 0 |
|
394 | 0 | /* Free the gfn_info structure. */ |
395 | 0 | rmap_del(gfn_info, page, 1); |
396 | 0 | xfree(gfn_info); |
397 | 0 | } |
398 | | |
399 | | static struct page_info* mem_sharing_lookup(unsigned long mfn) |
400 | 0 | { |
401 | 0 | if ( mfn_valid(_mfn(mfn)) ) |
402 | 0 | { |
403 | 0 | struct page_info* page = mfn_to_page(_mfn(mfn)); |
404 | 0 | if ( page_get_owner(page) == dom_cow ) |
405 | 0 | { |
406 | 0 | /* Count has to be at least two, because we're called |
407 | 0 | * with the mfn locked (1) and this is supposed to be |
408 | 0 | * a shared page (1). */ |
409 | 0 | unsigned long t = read_atomic(&page->u.inuse.type_info); |
410 | 0 | ASSERT((t & PGT_type_mask) == PGT_shared_page); |
411 | 0 | ASSERT((t & PGT_count_mask) >= 2); |
412 | 0 | ASSERT(get_gpfn_from_mfn(mfn) == SHARED_M2P_ENTRY); |
413 | 0 | return page; |
414 | 0 | } |
415 | 0 | } |
416 | 0 |
|
417 | 0 | return NULL; |
418 | 0 | } |
419 | | |
420 | | static int audit(void) |
421 | 0 | { |
422 | 0 | #if MEM_SHARING_AUDIT |
423 | 0 | int errors = 0; |
424 | 0 | unsigned long count_expected; |
425 | 0 | unsigned long count_found = 0; |
426 | 0 | struct list_head *ae; |
427 | 0 |
|
428 | 0 | count_expected = atomic_read(&nr_shared_mfns); |
429 | 0 |
|
430 | 0 | rcu_read_lock(&shr_audit_read_lock); |
431 | 0 |
|
432 | 0 | list_for_each_rcu(ae, &shr_audit_list) |
433 | 0 | { |
434 | 0 | struct page_sharing_info *pg_shared_info; |
435 | 0 | unsigned long nr_gfns = 0; |
436 | 0 | struct page_info *pg; |
437 | 0 | mfn_t mfn; |
438 | 0 | gfn_info_t *g; |
439 | 0 | struct rmap_iterator ri; |
440 | 0 |
|
441 | 0 | pg_shared_info = list_entry(ae, struct page_sharing_info, entry); |
442 | 0 | pg = pg_shared_info->pg; |
443 | 0 | mfn = page_to_mfn(pg); |
444 | 0 |
|
445 | 0 | /* If we can't lock it, it's definitely not a shared page */ |
446 | 0 | if ( !mem_sharing_page_lock(pg) ) |
447 | 0 | { |
448 | 0 | MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked (%lx)!\n", |
449 | 0 | mfn_x(mfn), pg->u.inuse.type_info); |
450 | 0 | errors++; |
451 | 0 | continue; |
452 | 0 | } |
453 | 0 |
|
454 | 0 | /* Check if the MFN has correct type, owner and handle. */ |
455 | 0 | if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page ) |
456 | 0 | { |
457 | 0 | MEM_SHARING_DEBUG("mfn %lx in audit list, but not PGT_shared_page (%lx)!\n", |
458 | 0 | mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask); |
459 | 0 | errors++; |
460 | 0 | continue; |
461 | 0 | } |
462 | 0 |
|
463 | 0 | /* Check the page owner. */ |
464 | 0 | if ( page_get_owner(pg) != dom_cow ) |
465 | 0 | { |
466 | 0 | MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner (%hu)!\n", |
467 | 0 | mfn_x(mfn), page_get_owner(pg)->domain_id); |
468 | 0 | errors++; |
469 | 0 | } |
470 | 0 |
|
471 | 0 | /* Check the m2p entry */ |
472 | 0 | if ( get_gpfn_from_mfn(mfn_x(mfn)) != SHARED_M2P_ENTRY ) |
473 | 0 | { |
474 | 0 | MEM_SHARING_DEBUG("mfn %lx shared, but wrong m2p entry (%lx)!\n", |
475 | 0 | mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn))); |
476 | 0 | errors++; |
477 | 0 | } |
478 | 0 |
|
479 | 0 | /* Check we have a list */ |
480 | 0 | if ( (!pg->sharing) || !rmap_has_entries(pg) ) |
481 | 0 | { |
482 | 0 | MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n", |
483 | 0 | mfn_x(mfn)); |
484 | 0 | errors++; |
485 | 0 | continue; |
486 | 0 | } |
487 | 0 |
|
488 | 0 | /* We've found a page that is shared */ |
489 | 0 | count_found++; |
490 | 0 |
|
491 | 0 | /* Check if all GFNs map to the MFN, and the p2m types */ |
492 | 0 | rmap_seed_iterator(pg, &ri); |
493 | 0 | while ( (g = rmap_iterate(pg, &ri)) != NULL ) |
494 | 0 | { |
495 | 0 | struct domain *d; |
496 | 0 | p2m_type_t t; |
497 | 0 | mfn_t o_mfn; |
498 | 0 |
|
499 | 0 | d = get_domain_by_id(g->domain); |
500 | 0 | if ( d == NULL ) |
501 | 0 | { |
502 | 0 | MEM_SHARING_DEBUG("Unknown dom: %hu, for PFN=%lx, MFN=%lx\n", |
503 | 0 | g->domain, g->gfn, mfn_x(mfn)); |
504 | 0 | errors++; |
505 | 0 | continue; |
506 | 0 | } |
507 | 0 | o_mfn = get_gfn_query_unlocked(d, g->gfn, &t); |
508 | 0 | if ( mfn_x(o_mfn) != mfn_x(mfn) ) |
509 | 0 | { |
510 | 0 | MEM_SHARING_DEBUG("Incorrect P2M for d=%hu, PFN=%lx." |
511 | 0 | "Expecting MFN=%lx, got %lx\n", |
512 | 0 | g->domain, g->gfn, mfn_x(mfn), mfn_x(o_mfn)); |
513 | 0 | errors++; |
514 | 0 | } |
515 | 0 | if ( t != p2m_ram_shared ) |
516 | 0 | { |
517 | 0 | MEM_SHARING_DEBUG("Incorrect P2M type for d=%hu, PFN=%lx MFN=%lx." |
518 | 0 | "Expecting t=%d, got %d\n", |
519 | 0 | g->domain, g->gfn, mfn_x(mfn), p2m_ram_shared, t); |
520 | 0 | errors++; |
521 | 0 | } |
522 | 0 | put_domain(d); |
523 | 0 | nr_gfns++; |
524 | 0 | } |
525 | 0 | /* The type count has an extra ref because we have locked the page */ |
526 | 0 | if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) ) |
527 | 0 | { |
528 | 0 | MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx." |
529 | 0 | "nr_gfns in list %lu, in type_info %lx\n", |
530 | 0 | mfn_x(mfn), nr_gfns, |
531 | 0 | (pg->u.inuse.type_info & PGT_count_mask)); |
532 | 0 | errors++; |
533 | 0 | } |
534 | 0 |
|
535 | 0 | mem_sharing_page_unlock(pg); |
536 | 0 | } |
537 | 0 |
|
538 | 0 | rcu_read_unlock(&shr_audit_read_lock); |
539 | 0 |
|
540 | 0 | if ( count_found != count_expected ) |
541 | 0 | { |
542 | 0 | MEM_SHARING_DEBUG("Expected %ld shared mfns, found %ld.", |
543 | 0 | count_expected, count_found); |
544 | 0 | errors++; |
545 | 0 | } |
546 | 0 |
|
547 | 0 | return errors; |
548 | 0 | #else |
549 | | return -EOPNOTSUPP; |
550 | | #endif |
551 | 0 | } |
552 | | |
553 | | int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn, |
554 | | bool_t allow_sleep) |
555 | 0 | { |
556 | 0 | struct vcpu *v = current; |
557 | 0 | int rc; |
558 | 0 | vm_event_request_t req = { |
559 | 0 | .reason = VM_EVENT_REASON_MEM_SHARING, |
560 | 0 | .vcpu_id = v->vcpu_id, |
561 | 0 | .u.mem_sharing.gfn = gfn, |
562 | 0 | .u.mem_sharing.p2mt = p2m_ram_shared |
563 | 0 | }; |
564 | 0 |
|
565 | 0 | if ( (rc = __vm_event_claim_slot(d, |
566 | 0 | d->vm_event_share, allow_sleep)) < 0 ) |
567 | 0 | return rc; |
568 | 0 |
|
569 | 0 | if ( v->domain == d ) |
570 | 0 | { |
571 | 0 | req.flags = VM_EVENT_FLAG_VCPU_PAUSED; |
572 | 0 | vm_event_vcpu_pause(v); |
573 | 0 | } |
574 | 0 |
|
575 | 0 | vm_event_put_request(d, d->vm_event_share, &req); |
576 | 0 |
|
577 | 0 | return 0; |
578 | 0 | } |
579 | | |
580 | | unsigned int mem_sharing_get_nr_saved_mfns(void) |
581 | 0 | { |
582 | 0 | return ((unsigned int)atomic_read(&nr_saved_mfns)); |
583 | 0 | } |
584 | | |
585 | | unsigned int mem_sharing_get_nr_shared_mfns(void) |
586 | 0 | { |
587 | 0 | return (unsigned int)atomic_read(&nr_shared_mfns); |
588 | 0 | } |
589 | | |
590 | | /* Functions that change a page's type and ownership */ |
591 | | static int page_make_sharable(struct domain *d, |
592 | | struct page_info *page, |
593 | | int expected_refcnt) |
594 | 0 | { |
595 | 0 | bool_t drop_dom_ref; |
596 | 0 |
|
597 | 0 | spin_lock(&d->page_alloc_lock); |
598 | 0 |
|
599 | 0 | if ( d->is_dying ) |
600 | 0 | { |
601 | 0 | spin_unlock(&d->page_alloc_lock); |
602 | 0 | return -EBUSY; |
603 | 0 | } |
604 | 0 |
|
605 | 0 | /* Change page type and count atomically */ |
606 | 0 | if ( !get_page_and_type(page, d, PGT_shared_page) ) |
607 | 0 | { |
608 | 0 | spin_unlock(&d->page_alloc_lock); |
609 | 0 | return -EINVAL; |
610 | 0 | } |
611 | 0 |
|
612 | 0 | /* Check it wasn't already sharable and undo if it was */ |
613 | 0 | if ( (page->u.inuse.type_info & PGT_count_mask) != 1 ) |
614 | 0 | { |
615 | 0 | spin_unlock(&d->page_alloc_lock); |
616 | 0 | put_page_and_type(page); |
617 | 0 | return -EEXIST; |
618 | 0 | } |
619 | 0 |
|
620 | 0 | /* Check if the ref count is 2. The first from PGC_allocated, and |
621 | 0 | * the second from get_page_and_type at the top of this function */ |
622 | 0 | if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) ) |
623 | 0 | { |
624 | 0 | spin_unlock(&d->page_alloc_lock); |
625 | 0 | /* Return type count back to zero */ |
626 | 0 | put_page_and_type(page); |
627 | 0 | return -E2BIG; |
628 | 0 | } |
629 | 0 |
|
630 | 0 | page_set_owner(page, dom_cow); |
631 | 0 | drop_dom_ref = !domain_adjust_tot_pages(d, -1); |
632 | 0 | page_list_del(page, &d->page_list); |
633 | 0 | spin_unlock(&d->page_alloc_lock); |
634 | 0 |
|
635 | 0 | if ( drop_dom_ref ) |
636 | 0 | put_domain(d); |
637 | 0 | return 0; |
638 | 0 | } |
639 | | |
640 | | static int page_make_private(struct domain *d, struct page_info *page) |
641 | 0 | { |
642 | 0 | unsigned long expected_type; |
643 | 0 |
|
644 | 0 | if ( !get_page(page, dom_cow) ) |
645 | 0 | return -EINVAL; |
646 | 0 | |
647 | 0 | spin_lock(&d->page_alloc_lock); |
648 | 0 |
|
649 | 0 | if ( d->is_dying ) |
650 | 0 | { |
651 | 0 | spin_unlock(&d->page_alloc_lock); |
652 | 0 | put_page(page); |
653 | 0 | return -EBUSY; |
654 | 0 | } |
655 | 0 |
|
656 | 0 | /* We can only change the type if count is one */ |
657 | 0 | /* Because we are locking pages individually, we need to drop |
658 | 0 | * the lock here, while the page is typed. We cannot risk the |
659 | 0 | * race of page_unlock and then put_page_type. */ |
660 | 0 | expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2); |
661 | 0 | if ( page->u.inuse.type_info != expected_type ) |
662 | 0 | { |
663 | 0 | spin_unlock(&d->page_alloc_lock); |
664 | 0 | put_page(page); |
665 | 0 | return -EEXIST; |
666 | 0 | } |
667 | 0 |
|
668 | 0 | /* Drop the final typecount */ |
669 | 0 | put_page_and_type(page); |
670 | 0 |
|
671 | 0 | /* Now that we've dropped the type, we can unlock */ |
672 | 0 | mem_sharing_page_unlock(page); |
673 | 0 |
|
674 | 0 | /* Change the owner */ |
675 | 0 | ASSERT(page_get_owner(page) == dom_cow); |
676 | 0 | page_set_owner(page, d); |
677 | 0 |
|
678 | 0 | if ( domain_adjust_tot_pages(d, 1) == 1 ) |
679 | 0 | get_knownalive_domain(d); |
680 | 0 | page_list_add_tail(page, &d->page_list); |
681 | 0 | spin_unlock(&d->page_alloc_lock); |
682 | 0 |
|
683 | 0 | put_page(page); |
684 | 0 |
|
685 | 0 | return 0; |
686 | 0 | } |
687 | | |
688 | | static inline struct page_info *__grab_shared_page(mfn_t mfn) |
689 | 0 | { |
690 | 0 | struct page_info *pg = NULL; |
691 | 0 |
|
692 | 0 | if ( !mfn_valid(mfn) ) |
693 | 0 | return NULL; |
694 | 0 | pg = mfn_to_page(mfn); |
695 | 0 |
|
696 | 0 | /* If the page is not validated we can't lock it, and if it's |
697 | 0 | * not validated it's obviously not shared. */ |
698 | 0 | if ( !mem_sharing_page_lock(pg) ) |
699 | 0 | return NULL; |
700 | 0 |
|
701 | 0 | if ( mem_sharing_lookup(mfn_x(mfn)) == NULL ) |
702 | 0 | { |
703 | 0 | mem_sharing_page_unlock(pg); |
704 | 0 | return NULL; |
705 | 0 | } |
706 | 0 |
|
707 | 0 | return pg; |
708 | 0 | } |
709 | | |
710 | | static int debug_mfn(mfn_t mfn) |
711 | 0 | { |
712 | 0 | struct page_info *page; |
713 | 0 | int num_refs; |
714 | 0 |
|
715 | 0 | if ( (page = __grab_shared_page(mfn)) == NULL) |
716 | 0 | { |
717 | 0 | gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn)); |
718 | 0 | return -EINVAL; |
719 | 0 | } |
720 | 0 |
|
721 | 0 | MEM_SHARING_DEBUG( |
722 | 0 | "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%d\n", |
723 | 0 | mfn_x(page_to_mfn(page)), |
724 | 0 | page->count_info, |
725 | 0 | page->u.inuse.type_info, |
726 | 0 | page_get_owner(page)->domain_id); |
727 | 0 |
|
728 | 0 | /* -1 because the page is locked and that's an additional type ref */ |
729 | 0 | num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1; |
730 | 0 | mem_sharing_page_unlock(page); |
731 | 0 | return num_refs; |
732 | 0 | } |
733 | | |
734 | | static int debug_gfn(struct domain *d, gfn_t gfn) |
735 | 0 | { |
736 | 0 | p2m_type_t p2mt; |
737 | 0 | mfn_t mfn; |
738 | 0 | int num_refs; |
739 | 0 |
|
740 | 0 | mfn = get_gfn_query(d, gfn_x(gfn), &p2mt); |
741 | 0 |
|
742 | 0 | MEM_SHARING_DEBUG("Debug for dom%d, gfn=%" PRI_gfn "\n", |
743 | 0 | d->domain_id, gfn_x(gfn)); |
744 | 0 | num_refs = debug_mfn(mfn); |
745 | 0 | put_gfn(d, gfn_x(gfn)); |
746 | 0 |
|
747 | 0 | return num_refs; |
748 | 0 | } |
749 | | |
750 | | static int debug_gref(struct domain *d, grant_ref_t ref) |
751 | 0 | { |
752 | 0 | int rc; |
753 | 0 | uint16_t status; |
754 | 0 | gfn_t gfn; |
755 | 0 |
|
756 | 0 | rc = mem_sharing_gref_to_gfn(d->grant_table, ref, &gfn, &status); |
757 | 0 | if ( rc ) |
758 | 0 | { |
759 | 0 | MEM_SHARING_DEBUG("Asked to debug [dom=%d,gref=%u]: error %d.\n", |
760 | 0 | d->domain_id, ref, rc); |
761 | 0 | return rc; |
762 | 0 | } |
763 | 0 | |
764 | 0 | MEM_SHARING_DEBUG( |
765 | 0 | "==> Grant [dom=%d,ref=%d], status=%x. ", |
766 | 0 | d->domain_id, ref, status); |
767 | 0 |
|
768 | 0 | return debug_gfn(d, gfn); |
769 | 0 | } |
770 | | |
771 | | static int nominate_page(struct domain *d, gfn_t gfn, |
772 | | int expected_refcnt, shr_handle_t *phandle) |
773 | 0 | { |
774 | 0 | struct p2m_domain *hp2m = p2m_get_hostp2m(d); |
775 | 0 | p2m_type_t p2mt; |
776 | 0 | p2m_access_t p2ma; |
777 | 0 | mfn_t mfn; |
778 | 0 | struct page_info *page = NULL; /* gcc... */ |
779 | 0 | int ret; |
780 | 0 |
|
781 | 0 | *phandle = 0UL; |
782 | 0 |
|
783 | 0 | mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL); |
784 | 0 |
|
785 | 0 | /* Check if mfn is valid */ |
786 | 0 | ret = -EINVAL; |
787 | 0 | if ( !mfn_valid(mfn) ) |
788 | 0 | goto out; |
789 | 0 |
|
790 | 0 | /* Return the handle if the page is already shared */ |
791 | 0 | if ( p2m_is_shared(p2mt) ) { |
792 | 0 | struct page_info *pg = __grab_shared_page(mfn); |
793 | 0 | if ( !pg ) |
794 | 0 | { |
795 | 0 | gprintk(XENLOG_ERR, |
796 | 0 | "Shared p2m entry gfn %" PRI_gfn ", but could not grab mfn %" PRI_mfn " dom%d\n", |
797 | 0 | gfn_x(gfn), mfn_x(mfn), d->domain_id); |
798 | 0 | BUG(); |
799 | 0 | } |
800 | 0 | *phandle = pg->sharing->handle; |
801 | 0 | ret = 0; |
802 | 0 | mem_sharing_page_unlock(pg); |
803 | 0 | goto out; |
804 | 0 | } |
805 | 0 |
|
806 | 0 | /* Check p2m type */ |
807 | 0 | if ( !p2m_is_sharable(p2mt) ) |
808 | 0 | goto out; |
809 | 0 |
|
810 | 0 | /* Check if there are mem_access/remapped altp2m entries for this page */ |
811 | 0 | if ( altp2m_active(d) ) |
812 | 0 | { |
813 | 0 | unsigned int i; |
814 | 0 | struct p2m_domain *ap2m; |
815 | 0 | mfn_t amfn; |
816 | 0 | p2m_type_t ap2mt; |
817 | 0 | p2m_access_t ap2ma; |
818 | 0 |
|
819 | 0 | altp2m_list_lock(d); |
820 | 0 |
|
821 | 0 | for ( i = 0; i < MAX_ALTP2M; i++ ) |
822 | 0 | { |
823 | 0 | ap2m = d->arch.altp2m_p2m[i]; |
824 | 0 | if ( !ap2m ) |
825 | 0 | continue; |
826 | 0 |
|
827 | 0 | amfn = get_gfn_type_access(ap2m, gfn_x(gfn), &ap2mt, &ap2ma, 0, NULL); |
828 | 0 | if ( mfn_valid(amfn) && (!mfn_eq(amfn, mfn) || ap2ma != p2ma) ) |
829 | 0 | { |
830 | 0 | altp2m_list_unlock(d); |
831 | 0 | goto out; |
832 | 0 | } |
833 | 0 | } |
834 | 0 |
|
835 | 0 | altp2m_list_unlock(d); |
836 | 0 | } |
837 | 0 |
|
838 | 0 | /* Try to convert the mfn to the sharable type */ |
839 | 0 | page = mfn_to_page(mfn); |
840 | 0 | ret = page_make_sharable(d, page, expected_refcnt); |
841 | 0 | if ( ret ) |
842 | 0 | goto out; |
843 | 0 |
|
844 | 0 | /* Now that the page is validated, we can lock it. There is no |
845 | 0 | * race because we're holding the p2m entry, so no one else |
846 | 0 | * could be nominating this gfn */ |
847 | 0 | ret = -ENOENT; |
848 | 0 | if ( !mem_sharing_page_lock(page) ) |
849 | 0 | goto out; |
850 | 0 |
|
851 | 0 | /* Initialize the shared state */ |
852 | 0 | ret = -ENOMEM; |
853 | 0 | if ( (page->sharing = |
854 | 0 | xmalloc(struct page_sharing_info)) == NULL ) |
855 | 0 | { |
856 | 0 | /* Making a page private atomically unlocks it */ |
857 | 0 | BUG_ON(page_make_private(d, page) != 0); |
858 | 0 | goto out; |
859 | 0 | } |
860 | 0 | page->sharing->pg = page; |
861 | 0 | rmap_init(page); |
862 | 0 |
|
863 | 0 | /* Create the handle */ |
864 | 0 | page->sharing->handle = get_next_handle(); |
865 | 0 |
|
866 | 0 | /* Create the local gfn info */ |
867 | 0 | if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL ) |
868 | 0 | { |
869 | 0 | xfree(page->sharing); |
870 | 0 | page->sharing = NULL; |
871 | 0 | BUG_ON(page_make_private(d, page) != 0); |
872 | 0 | goto out; |
873 | 0 | } |
874 | 0 |
|
875 | 0 | /* Change the p2m type, should never fail with p2m locked. */ |
876 | 0 | BUG_ON(p2m_change_type_one(d, gfn_x(gfn), p2mt, p2m_ram_shared)); |
877 | 0 |
|
878 | 0 | /* Account for this page. */ |
879 | 0 | atomic_inc(&nr_shared_mfns); |
880 | 0 |
|
881 | 0 | /* Update m2p entry to SHARED_M2P_ENTRY */ |
882 | 0 | set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY); |
883 | 0 |
|
884 | 0 | *phandle = page->sharing->handle; |
885 | 0 | audit_add_list(page); |
886 | 0 | mem_sharing_page_unlock(page); |
887 | 0 | ret = 0; |
888 | 0 |
|
889 | 0 | out: |
890 | 0 | put_gfn(d, gfn_x(gfn)); |
891 | 0 | return ret; |
892 | 0 | } |
893 | | |
894 | | static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh, |
895 | | struct domain *cd, gfn_t cgfn, shr_handle_t ch) |
896 | 0 | { |
897 | 0 | struct page_info *spage, *cpage, *firstpg, *secondpg; |
898 | 0 | gfn_info_t *gfn; |
899 | 0 | struct domain *d; |
900 | 0 | int ret = -EINVAL; |
901 | 0 | mfn_t smfn, cmfn; |
902 | 0 | p2m_type_t smfn_type, cmfn_type; |
903 | 0 | struct two_gfns tg; |
904 | 0 | struct rmap_iterator ri; |
905 | 0 |
|
906 | 0 | get_two_gfns(sd, gfn_x(sgfn), &smfn_type, NULL, &smfn, |
907 | 0 | cd, gfn_x(cgfn), &cmfn_type, NULL, &cmfn, |
908 | 0 | 0, &tg); |
909 | 0 |
|
910 | 0 | /* This tricky business is to avoid two callers deadlocking if |
911 | 0 | * grabbing pages in opposite client/source order */ |
912 | 0 | if( mfn_x(smfn) == mfn_x(cmfn) ) |
913 | 0 | { |
914 | 0 | /* The pages are already the same. We could return some |
915 | 0 | * kind of error here, but no matter how you look at it, |
916 | 0 | * the pages are already 'shared'. It possibly represents |
917 | 0 | * a big problem somewhere else, but as far as sharing is |
918 | 0 | * concerned: great success! */ |
919 | 0 | ret = 0; |
920 | 0 | goto err_out; |
921 | 0 | } |
922 | 0 | else if ( mfn_x(smfn) < mfn_x(cmfn) ) |
923 | 0 | { |
924 | 0 | ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; |
925 | 0 | spage = firstpg = __grab_shared_page(smfn); |
926 | 0 | if ( spage == NULL ) |
927 | 0 | goto err_out; |
928 | 0 |
|
929 | 0 | ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; |
930 | 0 | cpage = secondpg = __grab_shared_page(cmfn); |
931 | 0 | if ( cpage == NULL ) |
932 | 0 | { |
933 | 0 | mem_sharing_page_unlock(spage); |
934 | 0 | goto err_out; |
935 | 0 | } |
936 | 0 | } else { |
937 | 0 | ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; |
938 | 0 | cpage = firstpg = __grab_shared_page(cmfn); |
939 | 0 | if ( cpage == NULL ) |
940 | 0 | goto err_out; |
941 | 0 |
|
942 | 0 | ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; |
943 | 0 | spage = secondpg = __grab_shared_page(smfn); |
944 | 0 | if ( spage == NULL ) |
945 | 0 | { |
946 | 0 | mem_sharing_page_unlock(cpage); |
947 | 0 | goto err_out; |
948 | 0 | } |
949 | 0 | } |
950 | 0 |
|
951 | 0 | ASSERT(smfn_type == p2m_ram_shared); |
952 | 0 | ASSERT(cmfn_type == p2m_ram_shared); |
953 | 0 |
|
954 | 0 | /* Check that the handles match */ |
955 | 0 | if ( spage->sharing->handle != sh ) |
956 | 0 | { |
957 | 0 | ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; |
958 | 0 | mem_sharing_page_unlock(secondpg); |
959 | 0 | mem_sharing_page_unlock(firstpg); |
960 | 0 | goto err_out; |
961 | 0 | } |
962 | 0 | if ( cpage->sharing->handle != ch ) |
963 | 0 | { |
964 | 0 | ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; |
965 | 0 | mem_sharing_page_unlock(secondpg); |
966 | 0 | mem_sharing_page_unlock(firstpg); |
967 | 0 | goto err_out; |
968 | 0 | } |
969 | 0 |
|
970 | 0 | /* Merge the lists together */ |
971 | 0 | rmap_seed_iterator(cpage, &ri); |
972 | 0 | while ( (gfn = rmap_iterate(cpage, &ri)) != NULL) |
973 | 0 | { |
974 | 0 | /* Get the source page and type, this should never fail: |
975 | 0 | * we are under shr lock, and got a successful lookup */ |
976 | 0 | BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page)); |
977 | 0 | /* Move the gfn_info from client list to source list. |
978 | 0 | * Don't change the type of rmap for the client page. */ |
979 | 0 | rmap_del(gfn, cpage, 0); |
980 | 0 | rmap_add(gfn, spage); |
981 | 0 | put_page_and_type(cpage); |
982 | 0 | d = get_domain_by_id(gfn->domain); |
983 | 0 | BUG_ON(!d); |
984 | 0 | BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn)); |
985 | 0 | put_domain(d); |
986 | 0 | } |
987 | 0 | ASSERT(list_empty(&cpage->sharing->gfns)); |
988 | 0 |
|
989 | 0 | /* Clear the rest of the shared state */ |
990 | 0 | page_sharing_dispose(cpage); |
991 | 0 | cpage->sharing = NULL; |
992 | 0 |
|
993 | 0 | mem_sharing_page_unlock(secondpg); |
994 | 0 | mem_sharing_page_unlock(firstpg); |
995 | 0 |
|
996 | 0 | /* Free the client page */ |
997 | 0 | if(test_and_clear_bit(_PGC_allocated, &cpage->count_info)) |
998 | 0 | put_page(cpage); |
999 | 0 |
|
1000 | 0 | /* We managed to free a domain page. */ |
1001 | 0 | atomic_dec(&nr_shared_mfns); |
1002 | 0 | atomic_inc(&nr_saved_mfns); |
1003 | 0 | ret = 0; |
1004 | 0 | |
1005 | 0 | err_out: |
1006 | 0 | put_two_gfns(&tg); |
1007 | 0 | return ret; |
1008 | 0 | } |
1009 | | |
1010 | | int mem_sharing_add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh, |
1011 | | struct domain *cd, unsigned long cgfn) |
1012 | 0 | { |
1013 | 0 | struct page_info *spage; |
1014 | 0 | int ret = -EINVAL; |
1015 | 0 | mfn_t smfn, cmfn; |
1016 | 0 | p2m_type_t smfn_type, cmfn_type; |
1017 | 0 | struct gfn_info *gfn_info; |
1018 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(cd); |
1019 | 0 | p2m_access_t a; |
1020 | 0 | struct two_gfns tg; |
1021 | 0 |
|
1022 | 0 | get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, |
1023 | 0 | cd, cgfn, &cmfn_type, &a, &cmfn, |
1024 | 0 | 0, &tg); |
1025 | 0 |
|
1026 | 0 | /* Get the source shared page, check and lock */ |
1027 | 0 | ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; |
1028 | 0 | spage = __grab_shared_page(smfn); |
1029 | 0 | if ( spage == NULL ) |
1030 | 0 | goto err_out; |
1031 | 0 | ASSERT(smfn_type == p2m_ram_shared); |
1032 | 0 |
|
1033 | 0 | /* Check that the handles match */ |
1034 | 0 | if ( spage->sharing->handle != sh ) |
1035 | 0 | goto err_unlock; |
1036 | 0 |
|
1037 | 0 | /* Make sure the target page is a hole in the physmap. These are typically |
1038 | 0 | * p2m_mmio_dm, but also accept p2m_invalid and paged out pages. See the |
1039 | 0 | * definition of p2m_is_hole in p2m.h. */ |
1040 | 0 | if ( !p2m_is_hole(cmfn_type) ) |
1041 | 0 | { |
1042 | 0 | ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; |
1043 | 0 | goto err_unlock; |
1044 | 0 | } |
1045 | 0 |
|
1046 | 0 | /* This is simpler than regular sharing */ |
1047 | 0 | BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page)); |
1048 | 0 | if ( (gfn_info = mem_sharing_gfn_alloc(spage, cd, cgfn)) == NULL ) |
1049 | 0 | { |
1050 | 0 | put_page_and_type(spage); |
1051 | 0 | ret = -ENOMEM; |
1052 | 0 | goto err_unlock; |
1053 | 0 | } |
1054 | 0 |
|
1055 | 0 | ret = p2m_set_entry(p2m, _gfn(cgfn), smfn, PAGE_ORDER_4K, |
1056 | 0 | p2m_ram_shared, a); |
1057 | 0 |
|
1058 | 0 | /* Tempted to turn this into an assert */ |
1059 | 0 | if ( ret ) |
1060 | 0 | { |
1061 | 0 | mem_sharing_gfn_destroy(spage, cd, gfn_info); |
1062 | 0 | put_page_and_type(spage); |
1063 | 0 | } else { |
1064 | 0 | /* There is a chance we're plugging a hole where a paged out page was */ |
1065 | 0 | if ( p2m_is_paging(cmfn_type) && (cmfn_type != p2m_ram_paging_out) ) |
1066 | 0 | { |
1067 | 0 | atomic_dec(&cd->paged_pages); |
1068 | 0 | /* Further, there is a chance this was a valid page. Don't leak it. */ |
1069 | 0 | if ( mfn_valid(cmfn) ) |
1070 | 0 | { |
1071 | 0 | struct page_info *cpage = mfn_to_page(cmfn); |
1072 | 0 | ASSERT(cpage != NULL); |
1073 | 0 | if ( test_and_clear_bit(_PGC_allocated, &cpage->count_info) ) |
1074 | 0 | put_page(cpage); |
1075 | 0 | } |
1076 | 0 | } |
1077 | 0 | } |
1078 | 0 |
|
1079 | 0 | atomic_inc(&nr_saved_mfns); |
1080 | 0 |
|
1081 | 0 | err_unlock: |
1082 | 0 | mem_sharing_page_unlock(spage); |
1083 | 0 | err_out: |
1084 | 0 | put_two_gfns(&tg); |
1085 | 0 | return ret; |
1086 | 0 | } |
1087 | | |
1088 | | |
1089 | | /* A note on the rationale for unshare error handling: |
1090 | | * 1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()'s |
1091 | | * 2. We notify a potential dom0 helper through a vm_event ring. But we |
1092 | | * allow the notification to not go to sleep. If the event ring is full |
1093 | | * of ENOMEM warnings, then it's on the ball. |
1094 | | * 3. We cannot go to sleep until the unshare is resolved, because we might |
1095 | | * be buried deep into locks (e.g. something -> copy_to_user -> __hvm_copy) |
1096 | | * 4. So, we make sure we: |
1097 | | * 4.1. return an error |
1098 | | * 4.2. do not corrupt shared memory |
1099 | | * 4.3. do not corrupt guest memory |
1100 | | * 4.4. let the guest deal with it if the error propagation will reach it |
1101 | | */ |
1102 | | int __mem_sharing_unshare_page(struct domain *d, |
1103 | | unsigned long gfn, |
1104 | | uint16_t flags) |
1105 | 0 | { |
1106 | 0 | p2m_type_t p2mt; |
1107 | 0 | mfn_t mfn; |
1108 | 0 | struct page_info *page, *old_page; |
1109 | 0 | int last_gfn; |
1110 | 0 | gfn_info_t *gfn_info = NULL; |
1111 | 0 | |
1112 | 0 | mfn = get_gfn(d, gfn, &p2mt); |
1113 | 0 | |
1114 | 0 | /* Has someone already unshared it? */ |
1115 | 0 | if ( !p2m_is_shared(p2mt) ) { |
1116 | 0 | put_gfn(d, gfn); |
1117 | 0 | return 0; |
1118 | 0 | } |
1119 | 0 |
|
1120 | 0 | page = __grab_shared_page(mfn); |
1121 | 0 | if ( page == NULL ) |
1122 | 0 | { |
1123 | 0 | gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: " |
1124 | 0 | "%lx\n", gfn); |
1125 | 0 | BUG(); |
1126 | 0 | } |
1127 | 0 |
|
1128 | 0 | gfn_info = rmap_retrieve(d->domain_id, gfn, page); |
1129 | 0 | if ( unlikely(gfn_info == NULL) ) |
1130 | 0 | { |
1131 | 0 | gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: " |
1132 | 0 | "%lx\n", gfn); |
1133 | 0 | BUG(); |
1134 | 0 | } |
1135 | 0 |
|
1136 | 0 | /* Do the accounting first. If anything fails below, we have bigger |
1137 | 0 | * bigger fish to fry. First, remove the gfn from the list. */ |
1138 | 0 | last_gfn = rmap_has_one_entry(page); |
1139 | 0 | if ( last_gfn ) |
1140 | 0 | { |
1141 | 0 | /* Clean up shared state. Get rid of the <domid, gfn> tuple |
1142 | 0 | * before destroying the rmap. */ |
1143 | 0 | mem_sharing_gfn_destroy(page, d, gfn_info); |
1144 | 0 | page_sharing_dispose(page); |
1145 | 0 | page->sharing = NULL; |
1146 | 0 | atomic_dec(&nr_shared_mfns); |
1147 | 0 | } |
1148 | 0 | else |
1149 | 0 | atomic_dec(&nr_saved_mfns); |
1150 | 0 |
|
1151 | 0 | /* If the GFN is getting destroyed drop the references to MFN |
1152 | 0 | * (possibly freeing the page), and exit early */ |
1153 | 0 | if ( flags & MEM_SHARING_DESTROY_GFN ) |
1154 | 0 | { |
1155 | 0 | if ( !last_gfn ) |
1156 | 0 | mem_sharing_gfn_destroy(page, d, gfn_info); |
1157 | 0 | put_page_and_type(page); |
1158 | 0 | mem_sharing_page_unlock(page); |
1159 | 0 | if ( last_gfn && |
1160 | 0 | test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
1161 | 0 | put_page(page); |
1162 | 0 | put_gfn(d, gfn); |
1163 | 0 |
|
1164 | 0 | return 0; |
1165 | 0 | } |
1166 | 0 | |
1167 | 0 | if ( last_gfn ) |
1168 | 0 | { |
1169 | 0 | /* Making a page private atomically unlocks it */ |
1170 | 0 | BUG_ON(page_make_private(d, page) != 0); |
1171 | 0 | goto private_page_found; |
1172 | 0 | } |
1173 | 0 |
|
1174 | 0 | old_page = page; |
1175 | 0 | page = alloc_domheap_page(d, 0); |
1176 | 0 | if ( !page ) |
1177 | 0 | { |
1178 | 0 | /* Undo dec of nr_saved_mfns, as the retry will decrease again. */ |
1179 | 0 | atomic_inc(&nr_saved_mfns); |
1180 | 0 | mem_sharing_page_unlock(old_page); |
1181 | 0 | put_gfn(d, gfn); |
1182 | 0 | /* Caller is responsible for placing an event |
1183 | 0 | * in the ring */ |
1184 | 0 | return -ENOMEM; |
1185 | 0 | } |
1186 | 0 |
|
1187 | 0 | copy_domain_page(page_to_mfn(page), page_to_mfn(old_page)); |
1188 | 0 |
|
1189 | 0 | BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page))); |
1190 | 0 | mem_sharing_gfn_destroy(old_page, d, gfn_info); |
1191 | 0 | mem_sharing_page_unlock(old_page); |
1192 | 0 | put_page_and_type(old_page); |
1193 | 0 |
|
1194 | 0 | private_page_found: |
1195 | 0 | if ( p2m_change_type_one(d, gfn, p2m_ram_shared, p2m_ram_rw) ) |
1196 | 0 | { |
1197 | 0 | gdprintk(XENLOG_ERR, "Could not change p2m type d %hu gfn %lx.\n", |
1198 | 0 | d->domain_id, gfn); |
1199 | 0 | BUG(); |
1200 | 0 | } |
1201 | 0 |
|
1202 | 0 | /* Update m2p entry */ |
1203 | 0 | set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), gfn); |
1204 | 0 |
|
1205 | 0 | /* Now that the gfn<->mfn map is properly established, |
1206 | 0 | * marking dirty is feasible */ |
1207 | 0 | paging_mark_dirty(d, page_to_mfn(page)); |
1208 | 0 | /* We do not need to unlock a private page */ |
1209 | 0 | put_gfn(d, gfn); |
1210 | 0 | return 0; |
1211 | 0 | } |
1212 | | |
1213 | | int relinquish_shared_pages(struct domain *d) |
1214 | 0 | { |
1215 | 0 | int rc = 0; |
1216 | 0 | struct p2m_domain *p2m = p2m_get_hostp2m(d); |
1217 | 0 | unsigned long gfn, count = 0; |
1218 | 0 |
|
1219 | 0 | if ( p2m == NULL ) |
1220 | 0 | return 0; |
1221 | 0 |
|
1222 | 0 | p2m_lock(p2m); |
1223 | 0 | for ( gfn = p2m->next_shared_gfn_to_relinquish; |
1224 | 0 | gfn <= p2m->max_mapped_pfn; gfn++ ) |
1225 | 0 | { |
1226 | 0 | p2m_access_t a; |
1227 | 0 | p2m_type_t t; |
1228 | 0 | mfn_t mfn; |
1229 | 0 | int set_rc; |
1230 | 0 |
|
1231 | 0 | if ( atomic_read(&d->shr_pages) == 0 ) |
1232 | 0 | break; |
1233 | 0 | mfn = p2m->get_entry(p2m, _gfn(gfn), &t, &a, 0, NULL, NULL); |
1234 | 0 | if ( mfn_valid(mfn) && (t == p2m_ram_shared) ) |
1235 | 0 | { |
1236 | 0 | /* Does not fail with ENOMEM given the DESTROY flag */ |
1237 | 0 | BUG_ON(__mem_sharing_unshare_page(d, gfn, |
1238 | 0 | MEM_SHARING_DESTROY_GFN)); |
1239 | 0 | /* Clear out the p2m entry so no one else may try to |
1240 | 0 | * unshare. Must succeed: we just read the old entry and |
1241 | 0 | * we hold the p2m lock. */ |
1242 | 0 | set_rc = p2m->set_entry(p2m, _gfn(gfn), _mfn(0), PAGE_ORDER_4K, |
1243 | 0 | p2m_invalid, p2m_access_rwx, -1); |
1244 | 0 | ASSERT(set_rc == 0); |
1245 | 0 | count += 0x10; |
1246 | 0 | } |
1247 | 0 | else |
1248 | 0 | ++count; |
1249 | 0 |
|
1250 | 0 | /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */ |
1251 | 0 | if ( count >= 0x2000 ) |
1252 | 0 | { |
1253 | 0 | if ( hypercall_preempt_check() ) |
1254 | 0 | { |
1255 | 0 | p2m->next_shared_gfn_to_relinquish = gfn + 1; |
1256 | 0 | rc = -ERESTART; |
1257 | 0 | break; |
1258 | 0 | } |
1259 | 0 | count = 0; |
1260 | 0 | } |
1261 | 0 | } |
1262 | 0 |
|
1263 | 0 | p2m_unlock(p2m); |
1264 | 0 | return rc; |
1265 | 0 | } |
1266 | | |
1267 | | static int range_share(struct domain *d, struct domain *cd, |
1268 | | struct mem_sharing_op_range *range) |
1269 | 0 | { |
1270 | 0 | int rc = 0; |
1271 | 0 | shr_handle_t sh, ch; |
1272 | 0 | unsigned long start = range->opaque ?: range->first_gfn; |
1273 | 0 |
|
1274 | 0 | while ( range->last_gfn >= start ) |
1275 | 0 | { |
1276 | 0 | /* |
1277 | 0 | * We only break out if we run out of memory as individual pages may |
1278 | 0 | * legitimately be unsharable and we just want to skip over those. |
1279 | 0 | */ |
1280 | 0 | rc = nominate_page(d, _gfn(start), 0, &sh); |
1281 | 0 | if ( rc == -ENOMEM ) |
1282 | 0 | break; |
1283 | 0 |
|
1284 | 0 | if ( !rc ) |
1285 | 0 | { |
1286 | 0 | rc = nominate_page(cd, _gfn(start), 0, &ch); |
1287 | 0 | if ( rc == -ENOMEM ) |
1288 | 0 | break; |
1289 | 0 |
|
1290 | 0 | if ( !rc ) |
1291 | 0 | { |
1292 | 0 | /* If we get here this should be guaranteed to succeed. */ |
1293 | 0 | rc = share_pages(d, _gfn(start), sh, cd, _gfn(start), ch); |
1294 | 0 | ASSERT(!rc); |
1295 | 0 | } |
1296 | 0 | } |
1297 | 0 |
|
1298 | 0 | /* Check for continuation if it's not the last iteration. */ |
1299 | 0 | if ( range->last_gfn >= ++start && hypercall_preempt_check() ) |
1300 | 0 | { |
1301 | 0 | rc = 1; |
1302 | 0 | break; |
1303 | 0 | } |
1304 | 0 | } |
1305 | 0 |
|
1306 | 0 | range->opaque = start; |
1307 | 0 |
|
1308 | 0 | /* |
1309 | 0 | * The last page may fail with -EINVAL, and for range sharing we don't |
1310 | 0 | * care about that. |
1311 | 0 | */ |
1312 | 0 | if ( range->last_gfn < start && rc == -EINVAL ) |
1313 | 0 | rc = 0; |
1314 | 0 |
|
1315 | 0 | return rc; |
1316 | 0 | } |
1317 | | |
1318 | | int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) |
1319 | 0 | { |
1320 | 0 | int rc; |
1321 | 0 | xen_mem_sharing_op_t mso; |
1322 | 0 | struct domain *d; |
1323 | 0 |
|
1324 | 0 | rc = -EFAULT; |
1325 | 0 | if ( copy_from_guest(&mso, arg, 1) ) |
1326 | 0 | return rc; |
1327 | 0 |
|
1328 | 0 | if ( mso.op == XENMEM_sharing_op_audit ) |
1329 | 0 | return audit(); |
1330 | 0 |
|
1331 | 0 | rc = rcu_lock_live_remote_domain_by_id(mso.domain, &d); |
1332 | 0 | if ( rc ) |
1333 | 0 | return rc; |
1334 | 0 |
|
1335 | 0 | rc = xsm_mem_sharing(XSM_DM_PRIV, d); |
1336 | 0 | if ( rc ) |
1337 | 0 | goto out; |
1338 | 0 |
|
1339 | 0 | /* Only HAP is supported */ |
1340 | 0 | rc = -ENODEV; |
1341 | 0 | if ( !hap_enabled(d) || !d->arch.hvm_domain.mem_sharing_enabled ) |
1342 | 0 | goto out; |
1343 | 0 |
|
1344 | 0 | switch ( mso.op ) |
1345 | 0 | { |
1346 | 0 | case XENMEM_sharing_op_nominate_gfn: |
1347 | 0 | { |
1348 | 0 | shr_handle_t handle; |
1349 | 0 |
|
1350 | 0 | rc = -EINVAL; |
1351 | 0 | if ( !mem_sharing_enabled(d) ) |
1352 | 0 | goto out; |
1353 | 0 |
|
1354 | 0 | rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, &handle); |
1355 | 0 | mso.u.nominate.handle = handle; |
1356 | 0 | } |
1357 | 0 | break; |
1358 | 0 |
|
1359 | 0 | case XENMEM_sharing_op_nominate_gref: |
1360 | 0 | { |
1361 | 0 | grant_ref_t gref = mso.u.nominate.u.grant_ref; |
1362 | 0 | gfn_t gfn; |
1363 | 0 | shr_handle_t handle; |
1364 | 0 |
|
1365 | 0 | rc = -EINVAL; |
1366 | 0 | if ( !mem_sharing_enabled(d) ) |
1367 | 0 | goto out; |
1368 | 0 | rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &gfn, NULL); |
1369 | 0 | if ( rc < 0 ) |
1370 | 0 | goto out; |
1371 | 0 |
|
1372 | 0 | rc = nominate_page(d, gfn, 3, &handle); |
1373 | 0 | mso.u.nominate.handle = handle; |
1374 | 0 | } |
1375 | 0 | break; |
1376 | 0 |
|
1377 | 0 | case XENMEM_sharing_op_share: |
1378 | 0 | { |
1379 | 0 | gfn_t sgfn, cgfn; |
1380 | 0 | struct domain *cd; |
1381 | 0 | shr_handle_t sh, ch; |
1382 | 0 |
|
1383 | 0 | rc = -EINVAL; |
1384 | 0 | if ( !mem_sharing_enabled(d) ) |
1385 | 0 | goto out; |
1386 | 0 |
|
1387 | 0 | rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain, |
1388 | 0 | &cd); |
1389 | 0 | if ( rc ) |
1390 | 0 | goto out; |
1391 | 0 |
|
1392 | 0 | rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op); |
1393 | 0 | if ( rc ) |
1394 | 0 | { |
1395 | 0 | rcu_unlock_domain(cd); |
1396 | 0 | goto out; |
1397 | 0 | } |
1398 | 0 |
|
1399 | 0 | if ( !mem_sharing_enabled(cd) ) |
1400 | 0 | { |
1401 | 0 | rcu_unlock_domain(cd); |
1402 | 0 | rc = -EINVAL; |
1403 | 0 | goto out; |
1404 | 0 | } |
1405 | 0 |
|
1406 | 0 | if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) ) |
1407 | 0 | { |
1408 | 0 | grant_ref_t gref = (grant_ref_t) |
1409 | 0 | (XENMEM_SHARING_OP_FIELD_GET_GREF( |
1410 | 0 | mso.u.share.source_gfn)); |
1411 | 0 | rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &sgfn, |
1412 | 0 | NULL); |
1413 | 0 | if ( rc < 0 ) |
1414 | 0 | { |
1415 | 0 | rcu_unlock_domain(cd); |
1416 | 0 | goto out; |
1417 | 0 | } |
1418 | 0 | } |
1419 | 0 | else |
1420 | 0 | sgfn = _gfn(mso.u.share.source_gfn); |
1421 | 0 |
|
1422 | 0 | if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.client_gfn) ) |
1423 | 0 | { |
1424 | 0 | grant_ref_t gref = (grant_ref_t) |
1425 | 0 | (XENMEM_SHARING_OP_FIELD_GET_GREF( |
1426 | 0 | mso.u.share.client_gfn)); |
1427 | 0 | rc = mem_sharing_gref_to_gfn(cd->grant_table, gref, &cgfn, |
1428 | 0 | NULL); |
1429 | 0 | if ( rc < 0 ) |
1430 | 0 | { |
1431 | 0 | rcu_unlock_domain(cd); |
1432 | 0 | goto out; |
1433 | 0 | } |
1434 | 0 | } |
1435 | 0 | else |
1436 | 0 | cgfn = _gfn(mso.u.share.client_gfn); |
1437 | 0 |
|
1438 | 0 | sh = mso.u.share.source_handle; |
1439 | 0 | ch = mso.u.share.client_handle; |
1440 | 0 |
|
1441 | 0 | rc = share_pages(d, sgfn, sh, cd, cgfn, ch); |
1442 | 0 |
|
1443 | 0 | rcu_unlock_domain(cd); |
1444 | 0 | } |
1445 | 0 | break; |
1446 | 0 |
|
1447 | 0 | case XENMEM_sharing_op_add_physmap: |
1448 | 0 | { |
1449 | 0 | unsigned long sgfn, cgfn; |
1450 | 0 | struct domain *cd; |
1451 | 0 | shr_handle_t sh; |
1452 | 0 |
|
1453 | 0 | rc = -EINVAL; |
1454 | 0 | if ( !mem_sharing_enabled(d) ) |
1455 | 0 | goto out; |
1456 | 0 |
|
1457 | 0 | rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain, |
1458 | 0 | &cd); |
1459 | 0 | if ( rc ) |
1460 | 0 | goto out; |
1461 | 0 |
|
1462 | 0 | rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op); |
1463 | 0 | if ( rc ) |
1464 | 0 | { |
1465 | 0 | rcu_unlock_domain(cd); |
1466 | 0 | goto out; |
1467 | 0 | } |
1468 | 0 |
|
1469 | 0 | if ( !mem_sharing_enabled(cd) ) |
1470 | 0 | { |
1471 | 0 | rcu_unlock_domain(cd); |
1472 | 0 | rc = -EINVAL; |
1473 | 0 | goto out; |
1474 | 0 | } |
1475 | 0 |
|
1476 | 0 | if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) ) |
1477 | 0 | { |
1478 | 0 | /* Cannot add a gref to the physmap */ |
1479 | 0 | rcu_unlock_domain(cd); |
1480 | 0 | rc = -EINVAL; |
1481 | 0 | goto out; |
1482 | 0 | } |
1483 | 0 |
|
1484 | 0 | sgfn = mso.u.share.source_gfn; |
1485 | 0 | sh = mso.u.share.source_handle; |
1486 | 0 | cgfn = mso.u.share.client_gfn; |
1487 | 0 |
|
1488 | 0 | rc = mem_sharing_add_to_physmap(d, sgfn, sh, cd, cgfn); |
1489 | 0 |
|
1490 | 0 | rcu_unlock_domain(cd); |
1491 | 0 | } |
1492 | 0 | break; |
1493 | 0 |
|
1494 | 0 | case XENMEM_sharing_op_range_share: |
1495 | 0 | { |
1496 | 0 | unsigned long max_sgfn, max_cgfn; |
1497 | 0 | struct domain *cd; |
1498 | 0 |
|
1499 | 0 | rc = -EINVAL; |
1500 | 0 | if ( mso.u.range._pad[0] || mso.u.range._pad[1] || |
1501 | 0 | mso.u.range._pad[2] ) |
1502 | 0 | goto out; |
1503 | 0 |
|
1504 | 0 | /* |
1505 | 0 | * We use opaque for the hypercall continuation value. |
1506 | 0 | * Ideally the user sets this to 0 in the beginning but |
1507 | 0 | * there is no good way of enforcing that here, so we just check |
1508 | 0 | * that it's at least in range. |
1509 | 0 | */ |
1510 | 0 | if ( mso.u.range.opaque && |
1511 | 0 | (mso.u.range.opaque < mso.u.range.first_gfn || |
1512 | 0 | mso.u.range.opaque > mso.u.range.last_gfn) ) |
1513 | 0 | goto out; |
1514 | 0 |
|
1515 | 0 | if ( !mem_sharing_enabled(d) ) |
1516 | 0 | goto out; |
1517 | 0 |
|
1518 | 0 | rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain, |
1519 | 0 | &cd); |
1520 | 0 | if ( rc ) |
1521 | 0 | goto out; |
1522 | 0 |
|
1523 | 0 | /* |
1524 | 0 | * We reuse XENMEM_sharing_op_share XSM check here as this is |
1525 | 0 | * essentially the same concept repeated over multiple pages. |
1526 | 0 | */ |
1527 | 0 | rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, |
1528 | 0 | XENMEM_sharing_op_share); |
1529 | 0 | if ( rc ) |
1530 | 0 | { |
1531 | 0 | rcu_unlock_domain(cd); |
1532 | 0 | goto out; |
1533 | 0 | } |
1534 | 0 |
|
1535 | 0 | if ( !mem_sharing_enabled(cd) ) |
1536 | 0 | { |
1537 | 0 | rcu_unlock_domain(cd); |
1538 | 0 | rc = -EINVAL; |
1539 | 0 | goto out; |
1540 | 0 | } |
1541 | 0 |
|
1542 | 0 | /* |
1543 | 0 | * Sanity check only, the client should keep the domains paused for |
1544 | 0 | * the duration of this op. |
1545 | 0 | */ |
1546 | 0 | if ( !atomic_read(&d->pause_count) || |
1547 | 0 | !atomic_read(&cd->pause_count) ) |
1548 | 0 | { |
1549 | 0 | rcu_unlock_domain(cd); |
1550 | 0 | rc = -EINVAL; |
1551 | 0 | goto out; |
1552 | 0 | } |
1553 | 0 |
|
1554 | 0 | max_sgfn = domain_get_maximum_gpfn(d); |
1555 | 0 | max_cgfn = domain_get_maximum_gpfn(cd); |
1556 | 0 |
|
1557 | 0 | if ( max_sgfn < mso.u.range.first_gfn || |
1558 | 0 | max_sgfn < mso.u.range.last_gfn || |
1559 | 0 | max_cgfn < mso.u.range.first_gfn || |
1560 | 0 | max_cgfn < mso.u.range.last_gfn ) |
1561 | 0 | { |
1562 | 0 | rcu_unlock_domain(cd); |
1563 | 0 | rc = -EINVAL; |
1564 | 0 | goto out; |
1565 | 0 | } |
1566 | 0 |
|
1567 | 0 | rc = range_share(d, cd, &mso.u.range); |
1568 | 0 | rcu_unlock_domain(cd); |
1569 | 0 |
|
1570 | 0 | if ( rc > 0 ) |
1571 | 0 | { |
1572 | 0 | if ( __copy_to_guest(arg, &mso, 1) ) |
1573 | 0 | rc = -EFAULT; |
1574 | 0 | else |
1575 | 0 | rc = hypercall_create_continuation(__HYPERVISOR_memory_op, |
1576 | 0 | "lh", XENMEM_sharing_op, |
1577 | 0 | arg); |
1578 | 0 | } |
1579 | 0 | else |
1580 | 0 | mso.u.range.opaque = 0; |
1581 | 0 | } |
1582 | 0 | break; |
1583 | 0 |
|
1584 | 0 | case XENMEM_sharing_op_debug_gfn: |
1585 | 0 | rc = debug_gfn(d, _gfn(mso.u.debug.u.gfn)); |
1586 | 0 | break; |
1587 | 0 |
|
1588 | 0 | case XENMEM_sharing_op_debug_gref: |
1589 | 0 | rc = debug_gref(d, mso.u.debug.u.gref); |
1590 | 0 | break; |
1591 | 0 |
|
1592 | 0 | default: |
1593 | 0 | rc = -ENOSYS; |
1594 | 0 | break; |
1595 | 0 | } |
1596 | 0 |
|
1597 | 0 | if ( !rc && __copy_to_guest(arg, &mso, 1) ) |
1598 | 0 | rc = -EFAULT; |
1599 | 0 |
|
1600 | 0 | out: |
1601 | 0 | rcu_unlock_domain(d); |
1602 | 0 | return rc; |
1603 | 0 | } |
1604 | | |
1605 | | int mem_sharing_domctl(struct domain *d, struct xen_domctl_mem_sharing_op *mec) |
1606 | 0 | { |
1607 | 0 | int rc; |
1608 | 0 |
|
1609 | 0 | /* Only HAP is supported */ |
1610 | 0 | if ( !hap_enabled(d) ) |
1611 | 0 | return -ENODEV; |
1612 | 0 |
|
1613 | 0 | switch(mec->op) |
1614 | 0 | { |
1615 | 0 | case XEN_DOMCTL_MEM_SHARING_CONTROL: |
1616 | 0 | { |
1617 | 0 | rc = 0; |
1618 | 0 | if ( unlikely(need_iommu(d) && mec->u.enable) ) |
1619 | 0 | rc = -EXDEV; |
1620 | 0 | else |
1621 | 0 | d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable; |
1622 | 0 | } |
1623 | 0 | break; |
1624 | 0 |
|
1625 | 0 | default: |
1626 | 0 | rc = -ENOSYS; |
1627 | 0 | } |
1628 | 0 |
|
1629 | 0 | return rc; |
1630 | 0 | } |
1631 | | |
1632 | | void __init mem_sharing_init(void) |
1633 | 1 | { |
1634 | 1 | printk("Initing memory sharing.\n"); |
1635 | 1 | #if MEM_SHARING_AUDIT |
1636 | 1 | spin_lock_init(&shr_audit_lock); |
1637 | 1 | INIT_LIST_HEAD(&shr_audit_list); |
1638 | 1 | #endif |
1639 | 1 | } |
1640 | | |