/root/src/xen/xen/common/memory.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * memory.c |
3 | | * |
4 | | * Code to handle memory-related requests. |
5 | | * |
6 | | * Copyright (c) 2003-2004, B Dragovic |
7 | | * Copyright (c) 2003-2005, K A Fraser |
8 | | */ |
9 | | |
10 | | #include <xen/types.h> |
11 | | #include <xen/lib.h> |
12 | | #include <xen/mm.h> |
13 | | #include <xen/perfc.h> |
14 | | #include <xen/sched.h> |
15 | | #include <xen/event.h> |
16 | | #include <xen/paging.h> |
17 | | #include <xen/iocap.h> |
18 | | #include <xen/guest_access.h> |
19 | | #include <xen/hypercall.h> |
20 | | #include <xen/errno.h> |
21 | | #include <xen/tmem.h> |
22 | | #include <xen/tmem_xen.h> |
23 | | #include <xen/numa.h> |
24 | | #include <xen/mem_access.h> |
25 | | #include <xen/trace.h> |
26 | | #include <asm/current.h> |
27 | | #include <asm/hardirq.h> |
28 | | #include <asm/p2m.h> |
29 | | #include <public/memory.h> |
30 | | #include <xsm/xsm.h> |
31 | | |
32 | | struct memop_args { |
33 | | /* INPUT */ |
34 | | struct domain *domain; /* Domain to be affected. */ |
35 | | XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */ |
36 | | unsigned int nr_extents; /* Number of extents to allocate or free. */ |
37 | | unsigned int extent_order; /* Size of each extent. */ |
38 | | unsigned int memflags; /* Allocation flags. */ |
39 | | |
40 | | /* INPUT/OUTPUT */ |
41 | | unsigned int nr_done; /* Number of extents processed so far. */ |
42 | | int preempted; /* Was the hypercall preempted? */ |
43 | | }; |
44 | | |
45 | | #ifndef CONFIG_CTLDOM_MAX_ORDER |
46 | | #define CONFIG_CTLDOM_MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER |
47 | | #endif |
48 | | #ifndef CONFIG_PTDOM_MAX_ORDER |
49 | | #define CONFIG_PTDOM_MAX_ORDER CONFIG_HWDOM_MAX_ORDER |
50 | | #endif |
51 | | |
52 | | static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER; |
53 | | static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER; |
54 | | static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER; |
55 | | #ifdef HAS_PASSTHROUGH |
56 | | static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER; |
57 | | #endif |
58 | | |
59 | | static int __init parse_max_order(const char *s) |
60 | 0 | { |
61 | 0 | if ( *s != ',' ) |
62 | 0 | domu_max_order = simple_strtoul(s, &s, 0); |
63 | 0 | if ( *s == ',' && *++s != ',' ) |
64 | 0 | ctldom_max_order = simple_strtoul(s, &s, 0); |
65 | 0 | if ( *s == ',' && *++s != ',' ) |
66 | 0 | hwdom_max_order = simple_strtoul(s, &s, 0); |
67 | 0 | #ifdef HAS_PASSTHROUGH |
68 | | if ( *s == ',' && *++s != ',' ) |
69 | | ptdom_max_order = simple_strtoul(s, &s, 0); |
70 | | #endif |
71 | 0 |
|
72 | 0 | return *s ? -EINVAL : 0; |
73 | 0 | } |
74 | | custom_param("memop-max-order", parse_max_order); |
75 | | |
76 | | static unsigned int max_order(const struct domain *d) |
77 | 0 | { |
78 | 0 | unsigned int order = domu_max_order; |
79 | 0 |
|
80 | 0 | #ifdef HAS_PASSTHROUGH |
81 | | if ( cache_flush_permitted(d) && order < ptdom_max_order ) |
82 | | order = ptdom_max_order; |
83 | | #endif |
84 | 0 |
|
85 | 0 | if ( is_control_domain(d) && order < ctldom_max_order ) |
86 | 0 | order = ctldom_max_order; |
87 | 0 |
|
88 | 0 | if ( is_hardware_domain(d) && order < hwdom_max_order ) |
89 | 0 | order = hwdom_max_order; |
90 | 0 |
|
91 | 0 | return min(order, MAX_ORDER + 0U); |
92 | 0 | } |
93 | | |
94 | | static void increase_reservation(struct memop_args *a) |
95 | 0 | { |
96 | 0 | struct page_info *page; |
97 | 0 | unsigned long i; |
98 | 0 | xen_pfn_t mfn; |
99 | 0 | struct domain *d = a->domain; |
100 | 0 |
|
101 | 0 | if ( !guest_handle_is_null(a->extent_list) && |
102 | 0 | !guest_handle_subrange_okay(a->extent_list, a->nr_done, |
103 | 0 | a->nr_extents-1) ) |
104 | 0 | return; |
105 | 0 |
|
106 | 0 | if ( a->extent_order > max_order(current->domain) ) |
107 | 0 | return; |
108 | 0 |
|
109 | 0 | for ( i = a->nr_done; i < a->nr_extents; i++ ) |
110 | 0 | { |
111 | 0 | if ( i != a->nr_done && hypercall_preempt_check() ) |
112 | 0 | { |
113 | 0 | a->preempted = 1; |
114 | 0 | goto out; |
115 | 0 | } |
116 | 0 |
|
117 | 0 | page = alloc_domheap_pages(d, a->extent_order, a->memflags); |
118 | 0 | if ( unlikely(page == NULL) ) |
119 | 0 | { |
120 | 0 | gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " |
121 | 0 | "id=%d memflags=%x (%ld of %d)\n", |
122 | 0 | a->extent_order, d->domain_id, a->memflags, |
123 | 0 | i, a->nr_extents); |
124 | 0 | goto out; |
125 | 0 | } |
126 | 0 |
|
127 | 0 | /* Inform the domain of the new page's machine address. */ |
128 | 0 | if ( !paging_mode_translate(d) && |
129 | 0 | !guest_handle_is_null(a->extent_list) ) |
130 | 0 | { |
131 | 0 | mfn = page_to_mfn(page); |
132 | 0 | if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) |
133 | 0 | goto out; |
134 | 0 | } |
135 | 0 | } |
136 | 0 |
|
137 | 0 | out: |
138 | 0 | a->nr_done = i; |
139 | 0 | } |
140 | | |
141 | | static void populate_physmap(struct memop_args *a) |
142 | 0 | { |
143 | 0 | struct page_info *page; |
144 | 0 | unsigned int i, j; |
145 | 0 | xen_pfn_t gpfn, mfn; |
146 | 0 | struct domain *d = a->domain, *curr_d = current->domain; |
147 | 0 | bool need_tlbflush = false; |
148 | 0 | uint32_t tlbflush_timestamp = 0; |
149 | 0 |
|
150 | 0 | if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, |
151 | 0 | a->nr_extents-1) ) |
152 | 0 | return; |
153 | 0 |
|
154 | 0 | if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER : |
155 | 0 | max_order(curr_d)) ) |
156 | 0 | return; |
157 | 0 |
|
158 | 0 | if ( unlikely(!d->creation_finished) ) |
159 | 0 | { |
160 | 0 | /* |
161 | 0 | * With MEMF_no_tlbflush set, alloc_heap_pages() will ignore |
162 | 0 | * TLB-flushes. After VM creation, this is a security issue (it can |
163 | 0 | * make pages accessible to guest B, when guest A may still have a |
164 | 0 | * cached mapping to them). So we do this only during domain creation, |
165 | 0 | * when the domain itself has not yet been unpaused for the first |
166 | 0 | * time. |
167 | 0 | */ |
168 | 0 | a->memflags |= MEMF_no_tlbflush; |
169 | 0 | /* |
170 | 0 | * With MEMF_no_icache_flush, alloc_heap_pages() will skip |
171 | 0 | * performing icache flushes. We do it only before domain |
172 | 0 | * creation as once the domain is running there is a danger of |
173 | 0 | * executing instructions from stale caches if icache flush is |
174 | 0 | * delayed. |
175 | 0 | */ |
176 | 0 | a->memflags |= MEMF_no_icache_flush; |
177 | 0 | } |
178 | 0 |
|
179 | 0 | for ( i = a->nr_done; i < a->nr_extents; i++ ) |
180 | 0 | { |
181 | 0 | if ( i != a->nr_done && hypercall_preempt_check() ) |
182 | 0 | { |
183 | 0 | a->preempted = 1; |
184 | 0 | goto out; |
185 | 0 | } |
186 | 0 |
|
187 | 0 | if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) |
188 | 0 | goto out; |
189 | 0 |
|
190 | 0 | if ( a->memflags & MEMF_populate_on_demand ) |
191 | 0 | { |
192 | 0 | /* Disallow populating PoD pages on oneself. */ |
193 | 0 | if ( d == curr_d ) |
194 | 0 | goto out; |
195 | 0 |
|
196 | 0 | if ( guest_physmap_mark_populate_on_demand(d, gpfn, |
197 | 0 | a->extent_order) < 0 ) |
198 | 0 | goto out; |
199 | 0 | } |
200 | 0 | else |
201 | 0 | { |
202 | 0 | if ( is_domain_direct_mapped(d) ) |
203 | 0 | { |
204 | 0 | mfn = gpfn; |
205 | 0 |
|
206 | 0 | for ( j = 0; j < (1U << a->extent_order); j++, mfn++ ) |
207 | 0 | { |
208 | 0 | if ( !mfn_valid(_mfn(mfn)) ) |
209 | 0 | { |
210 | 0 | gdprintk(XENLOG_INFO, "Invalid mfn %#"PRI_xen_pfn"\n", |
211 | 0 | mfn); |
212 | 0 | goto out; |
213 | 0 | } |
214 | 0 |
|
215 | 0 | page = mfn_to_page(mfn); |
216 | 0 | if ( !get_page(page, d) ) |
217 | 0 | { |
218 | 0 | gdprintk(XENLOG_INFO, |
219 | 0 | "mfn %#"PRI_xen_pfn" doesn't belong to d%d\n", |
220 | 0 | mfn, d->domain_id); |
221 | 0 | goto out; |
222 | 0 | } |
223 | 0 | put_page(page); |
224 | 0 | } |
225 | 0 |
|
226 | 0 | mfn = gpfn; |
227 | 0 | } |
228 | 0 | else |
229 | 0 | { |
230 | 0 | page = alloc_domheap_pages(d, a->extent_order, a->memflags); |
231 | 0 |
|
232 | 0 | if ( unlikely(!page) ) |
233 | 0 | { |
234 | 0 | if ( !tmem_enabled() || a->extent_order ) |
235 | 0 | gdprintk(XENLOG_INFO, |
236 | 0 | "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n", |
237 | 0 | a->extent_order, d->domain_id, a->memflags, |
238 | 0 | i, a->nr_extents); |
239 | 0 | goto out; |
240 | 0 | } |
241 | 0 |
|
242 | 0 | if ( unlikely(a->memflags & MEMF_no_tlbflush) ) |
243 | 0 | { |
244 | 0 | for ( j = 0; j < (1U << a->extent_order); j++ ) |
245 | 0 | accumulate_tlbflush(&need_tlbflush, &page[j], |
246 | 0 | &tlbflush_timestamp); |
247 | 0 | } |
248 | 0 |
|
249 | 0 | mfn = page_to_mfn(page); |
250 | 0 | } |
251 | 0 |
|
252 | 0 | guest_physmap_add_page(d, _gfn(gpfn), _mfn(mfn), a->extent_order); |
253 | 0 |
|
254 | 0 | if ( !paging_mode_translate(d) ) |
255 | 0 | { |
256 | 0 | for ( j = 0; j < (1U << a->extent_order); j++ ) |
257 | 0 | set_gpfn_from_mfn(mfn + j, gpfn + j); |
258 | 0 |
|
259 | 0 | /* Inform the domain of the new page's machine address. */ |
260 | 0 | if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) |
261 | 0 | goto out; |
262 | 0 | } |
263 | 0 | } |
264 | 0 | } |
265 | 0 |
|
266 | 0 | out: |
267 | 0 | if ( need_tlbflush ) |
268 | 0 | filtered_flush_tlb_mask(tlbflush_timestamp); |
269 | 0 |
|
270 | 0 | if ( a->memflags & MEMF_no_icache_flush ) |
271 | 0 | invalidate_icache(); |
272 | 0 |
|
273 | 0 | a->nr_done = i; |
274 | 0 | } |
275 | | |
276 | | int guest_remove_page(struct domain *d, unsigned long gmfn) |
277 | 1 | { |
278 | 1 | struct page_info *page; |
279 | 1 | #ifdef CONFIG_X86 |
280 | 1 | p2m_type_t p2mt; |
281 | 1 | #endif |
282 | 1 | mfn_t mfn; |
283 | 1 | int rc; |
284 | 1 | |
285 | 1 | #ifdef CONFIG_X86 |
286 | 1 | mfn = get_gfn_query(d, gmfn, &p2mt); |
287 | 1 | if ( unlikely(p2m_is_paging(p2mt)) ) |
288 | 0 | { |
289 | 0 | rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); |
290 | 0 | put_gfn(d, gmfn); |
291 | 0 |
|
292 | 0 | if ( rc ) |
293 | 0 | return rc; |
294 | 0 |
|
295 | 0 | /* If the page hasn't yet been paged out, there is an |
296 | 0 | * actual page that needs to be released. */ |
297 | 0 | if ( p2mt == p2m_ram_paging_out ) |
298 | 0 | { |
299 | 0 | ASSERT(mfn_valid(mfn)); |
300 | 0 | page = mfn_to_page(mfn_x(mfn)); |
301 | 0 | if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
302 | 0 | put_page(page); |
303 | 0 | } |
304 | 0 | p2m_mem_paging_drop_page(d, gmfn, p2mt); |
305 | 0 |
|
306 | 0 | return 0; |
307 | 0 | } |
308 | 1 | if ( p2mt == p2m_mmio_direct ) |
309 | 0 | { |
310 | 0 | rc = clear_mmio_p2m_entry(d, gmfn, mfn, PAGE_ORDER_4K); |
311 | 0 | put_gfn(d, gmfn); |
312 | 0 |
|
313 | 0 | return rc; |
314 | 0 | } |
315 | 1 | #else |
316 | | mfn = gfn_to_mfn(d, _gfn(gmfn)); |
317 | | #endif |
318 | 1 | if ( unlikely(!mfn_valid(mfn)) ) |
319 | 0 | { |
320 | 0 | put_gfn(d, gmfn); |
321 | 0 | gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", |
322 | 0 | d->domain_id, gmfn); |
323 | 0 |
|
324 | 0 | return -EINVAL; |
325 | 0 | } |
326 | 1 | |
327 | 1 | #ifdef CONFIG_X86 |
328 | 1 | if ( p2m_is_shared(p2mt) ) |
329 | 0 | { |
330 | 0 | /* |
331 | 0 | * Unshare the page, bail out on error. We unshare because we |
332 | 0 | * might be the only one using this shared page, and we need to |
333 | 0 | * trigger proper cleanup. Once done, this is like any other page. |
334 | 0 | */ |
335 | 0 | rc = mem_sharing_unshare_page(d, gmfn, 0); |
336 | 0 | if ( rc ) |
337 | 0 | { |
338 | 0 | put_gfn(d, gmfn); |
339 | 0 | (void)mem_sharing_notify_enomem(d, gmfn, 0); |
340 | 0 |
|
341 | 0 | return rc; |
342 | 0 | } |
343 | 0 | /* Maybe the mfn changed */ |
344 | 0 | mfn = get_gfn_query_unlocked(d, gmfn, &p2mt); |
345 | 0 | ASSERT(!p2m_is_shared(p2mt)); |
346 | 0 | } |
347 | 1 | #endif /* CONFIG_X86 */ |
348 | 1 | |
349 | 1 | page = mfn_to_page(mfn_x(mfn)); |
350 | 1 | if ( unlikely(!get_page(page, d)) ) |
351 | 0 | { |
352 | 0 | put_gfn(d, gmfn); |
353 | 0 | gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); |
354 | 0 |
|
355 | 0 | return -ENXIO; |
356 | 0 | } |
357 | 1 | |
358 | 1 | rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0); |
359 | 1 | |
360 | 1 | #ifdef _PGT_pinned |
361 | 1 | if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) |
362 | 0 | put_page_and_type(page); |
363 | 1 | #endif |
364 | 1 | |
365 | 1 | /* |
366 | 1 | * With the lack of an IOMMU on some platforms, domains with DMA-capable |
367 | 1 | * device must retrieve the same pfn when the hypercall populate_physmap |
368 | 1 | * is called. |
369 | 1 | * |
370 | 1 | * For this purpose (and to match populate_physmap() behavior), the page |
371 | 1 | * is kept allocated. |
372 | 1 | */ |
373 | 1 | if ( !rc && !is_domain_direct_mapped(d) && |
374 | 1 | test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
375 | 1 | put_page(page); |
376 | 1 | |
377 | 1 | put_page(page); |
378 | 1 | put_gfn(d, gmfn); |
379 | 1 | |
380 | 1 | return rc; |
381 | 1 | } |
382 | | |
383 | | static void decrease_reservation(struct memop_args *a) |
384 | 0 | { |
385 | 0 | unsigned long i, j; |
386 | 0 | xen_pfn_t gmfn; |
387 | 0 |
|
388 | 0 | if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, |
389 | 0 | a->nr_extents-1) || |
390 | 0 | a->extent_order > max_order(current->domain) ) |
391 | 0 | return; |
392 | 0 |
|
393 | 0 | for ( i = a->nr_done; i < a->nr_extents; i++ ) |
394 | 0 | { |
395 | 0 | if ( i != a->nr_done && hypercall_preempt_check() ) |
396 | 0 | { |
397 | 0 | a->preempted = 1; |
398 | 0 | goto out; |
399 | 0 | } |
400 | 0 |
|
401 | 0 | if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) ) |
402 | 0 | goto out; |
403 | 0 |
|
404 | 0 | if ( tb_init_done ) |
405 | 0 | { |
406 | 0 | struct { |
407 | 0 | u64 gfn; |
408 | 0 | int d:16,order:16; |
409 | 0 | } t; |
410 | 0 |
|
411 | 0 | t.gfn = gmfn; |
412 | 0 | t.d = a->domain->domain_id; |
413 | 0 | t.order = a->extent_order; |
414 | 0 | |
415 | 0 | __trace_var(TRC_MEM_DECREASE_RESERVATION, 0, sizeof(t), &t); |
416 | 0 | } |
417 | 0 |
|
418 | 0 | /* See if populate-on-demand wants to handle this */ |
419 | 0 | if ( is_hvm_domain(a->domain) |
420 | 0 | && p2m_pod_decrease_reservation(a->domain, _gfn(gmfn), |
421 | 0 | a->extent_order) ) |
422 | 0 | continue; |
423 | 0 |
|
424 | 0 | for ( j = 0; j < (1 << a->extent_order); j++ ) |
425 | 0 | if ( guest_remove_page(a->domain, gmfn + j) ) |
426 | 0 | goto out; |
427 | 0 | } |
428 | 0 |
|
429 | 0 | out: |
430 | 0 | a->nr_done = i; |
431 | 0 | } |
432 | | |
433 | | static bool propagate_node(unsigned int xmf, unsigned int *memflags) |
434 | 0 | { |
435 | 0 | const struct domain *currd = current->domain; |
436 | 0 |
|
437 | 0 | BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE); |
438 | 0 | BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE); |
439 | 0 |
|
440 | 0 | if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE ) |
441 | 0 | return true; |
442 | 0 |
|
443 | 0 | if ( is_hardware_domain(currd) || is_control_domain(currd) ) |
444 | 0 | { |
445 | 0 | if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES ) |
446 | 0 | return false; |
447 | 0 |
|
448 | 0 | *memflags |= MEMF_node(XENMEMF_get_node(xmf)); |
449 | 0 | if ( xmf & XENMEMF_exact_node_request ) |
450 | 0 | *memflags |= MEMF_exact_node; |
451 | 0 | } |
452 | 0 | else if ( xmf & XENMEMF_exact_node_request ) |
453 | 0 | return false; |
454 | 0 |
|
455 | 0 | return true; |
456 | 0 | } |
457 | | |
458 | | static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) |
459 | 0 | { |
460 | 0 | struct xen_memory_exchange exch; |
461 | 0 | PAGE_LIST_HEAD(in_chunk_list); |
462 | 0 | PAGE_LIST_HEAD(out_chunk_list); |
463 | 0 | unsigned long in_chunk_order, out_chunk_order; |
464 | 0 | xen_pfn_t gpfn, gmfn, mfn; |
465 | 0 | unsigned long i, j, k; |
466 | 0 | unsigned int memflags = 0; |
467 | 0 | long rc = 0; |
468 | 0 | struct domain *d; |
469 | 0 | struct page_info *page; |
470 | 0 |
|
471 | 0 | if ( copy_from_guest(&exch, arg, 1) ) |
472 | 0 | return -EFAULT; |
473 | 0 |
|
474 | 0 | if ( max(exch.in.extent_order, exch.out.extent_order) > |
475 | 0 | max_order(current->domain) ) |
476 | 0 | { |
477 | 0 | rc = -EPERM; |
478 | 0 | goto fail_early; |
479 | 0 | } |
480 | 0 |
|
481 | 0 | /* Various sanity checks. */ |
482 | 0 | if ( (exch.nr_exchanged > exch.in.nr_extents) || |
483 | 0 | /* Input and output domain identifiers match? */ |
484 | 0 | (exch.in.domid != exch.out.domid) || |
485 | 0 | /* Sizes of input and output lists do not overflow a long? */ |
486 | 0 | ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) || |
487 | 0 | ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) || |
488 | 0 | /* Sizes of input and output lists match? */ |
489 | 0 | ((exch.in.nr_extents << exch.in.extent_order) != |
490 | 0 | (exch.out.nr_extents << exch.out.extent_order)) ) |
491 | 0 | { |
492 | 0 | rc = -EINVAL; |
493 | 0 | goto fail_early; |
494 | 0 | } |
495 | 0 |
|
496 | 0 | if ( !guest_handle_subrange_okay(exch.in.extent_start, exch.nr_exchanged, |
497 | 0 | exch.in.nr_extents - 1) ) |
498 | 0 | { |
499 | 0 | rc = -EFAULT; |
500 | 0 | goto fail_early; |
501 | 0 | } |
502 | 0 |
|
503 | 0 | if ( exch.in.extent_order <= exch.out.extent_order ) |
504 | 0 | { |
505 | 0 | in_chunk_order = exch.out.extent_order - exch.in.extent_order; |
506 | 0 | out_chunk_order = 0; |
507 | 0 |
|
508 | 0 | if ( !guest_handle_subrange_okay(exch.out.extent_start, |
509 | 0 | exch.nr_exchanged >> in_chunk_order, |
510 | 0 | exch.out.nr_extents - 1) ) |
511 | 0 | { |
512 | 0 | rc = -EFAULT; |
513 | 0 | goto fail_early; |
514 | 0 | } |
515 | 0 | } |
516 | 0 | else |
517 | 0 | { |
518 | 0 | in_chunk_order = 0; |
519 | 0 | out_chunk_order = exch.in.extent_order - exch.out.extent_order; |
520 | 0 |
|
521 | 0 | if ( !guest_handle_subrange_okay(exch.out.extent_start, |
522 | 0 | exch.nr_exchanged << out_chunk_order, |
523 | 0 | exch.out.nr_extents - 1) ) |
524 | 0 | { |
525 | 0 | rc = -EFAULT; |
526 | 0 | goto fail_early; |
527 | 0 | } |
528 | 0 | } |
529 | 0 |
|
530 | 0 | if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) ) |
531 | 0 | { |
532 | 0 | rc = -EINVAL; |
533 | 0 | goto fail_early; |
534 | 0 | } |
535 | 0 |
|
536 | 0 | d = rcu_lock_domain_by_any_id(exch.in.domid); |
537 | 0 | if ( d == NULL ) |
538 | 0 | { |
539 | 0 | rc = -ESRCH; |
540 | 0 | goto fail_early; |
541 | 0 | } |
542 | 0 |
|
543 | 0 | rc = xsm_memory_exchange(XSM_TARGET, d); |
544 | 0 | if ( rc ) |
545 | 0 | { |
546 | 0 | rcu_unlock_domain(d); |
547 | 0 | goto fail_early; |
548 | 0 | } |
549 | 0 |
|
550 | 0 | memflags |= MEMF_bits(domain_clamp_alloc_bitsize( |
551 | 0 | d, |
552 | 0 | XENMEMF_get_address_bits(exch.out.mem_flags) ? : |
553 | 0 | (BITS_PER_LONG+PAGE_SHIFT))); |
554 | 0 |
|
555 | 0 | for ( i = (exch.nr_exchanged >> in_chunk_order); |
556 | 0 | i < (exch.in.nr_extents >> in_chunk_order); |
557 | 0 | i++ ) |
558 | 0 | { |
559 | 0 | if ( i != (exch.nr_exchanged >> in_chunk_order) && |
560 | 0 | hypercall_preempt_check() ) |
561 | 0 | { |
562 | 0 | exch.nr_exchanged = i << in_chunk_order; |
563 | 0 | rcu_unlock_domain(d); |
564 | 0 | if ( __copy_field_to_guest(arg, &exch, nr_exchanged) ) |
565 | 0 | return -EFAULT; |
566 | 0 | return hypercall_create_continuation( |
567 | 0 | __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg); |
568 | 0 | } |
569 | 0 |
|
570 | 0 | /* Steal a chunk's worth of input pages from the domain. */ |
571 | 0 | for ( j = 0; j < (1UL << in_chunk_order); j++ ) |
572 | 0 | { |
573 | 0 | if ( unlikely(__copy_from_guest_offset( |
574 | 0 | &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) ) |
575 | 0 | { |
576 | 0 | rc = -EFAULT; |
577 | 0 | goto fail; |
578 | 0 | } |
579 | 0 |
|
580 | 0 | for ( k = 0; k < (1UL << exch.in.extent_order); k++ ) |
581 | 0 | { |
582 | 0 | #ifdef CONFIG_X86 |
583 | 0 | p2m_type_t p2mt; |
584 | 0 |
|
585 | 0 | /* Shared pages cannot be exchanged */ |
586 | 0 | mfn = mfn_x(get_gfn_unshare(d, gmfn + k, &p2mt)); |
587 | 0 | if ( p2m_is_shared(p2mt) ) |
588 | 0 | { |
589 | 0 | put_gfn(d, gmfn + k); |
590 | 0 | rc = -ENOMEM; |
591 | 0 | goto fail; |
592 | 0 | } |
593 | 0 | #else /* !CONFIG_X86 */ |
594 | | mfn = mfn_x(gfn_to_mfn(d, _gfn(gmfn + k))); |
595 | | #endif |
596 | 0 | if ( unlikely(!mfn_valid(_mfn(mfn))) ) |
597 | 0 | { |
598 | 0 | put_gfn(d, gmfn + k); |
599 | 0 | rc = -EINVAL; |
600 | 0 | goto fail; |
601 | 0 | } |
602 | 0 |
|
603 | 0 | page = mfn_to_page(mfn); |
604 | 0 |
|
605 | 0 | rc = steal_page(d, page, MEMF_no_refcount); |
606 | 0 | if ( unlikely(rc) ) |
607 | 0 | { |
608 | 0 | put_gfn(d, gmfn + k); |
609 | 0 | goto fail; |
610 | 0 | } |
611 | 0 |
|
612 | 0 | page_list_add(page, &in_chunk_list); |
613 | 0 | put_gfn(d, gmfn + k); |
614 | 0 | } |
615 | 0 | } |
616 | 0 |
|
617 | 0 | /* Allocate a chunk's worth of anonymous output pages. */ |
618 | 0 | for ( j = 0; j < (1UL << out_chunk_order); j++ ) |
619 | 0 | { |
620 | 0 | page = alloc_domheap_pages(d, exch.out.extent_order, |
621 | 0 | MEMF_no_owner | memflags); |
622 | 0 | if ( unlikely(page == NULL) ) |
623 | 0 | { |
624 | 0 | rc = -ENOMEM; |
625 | 0 | goto fail; |
626 | 0 | } |
627 | 0 |
|
628 | 0 | page_list_add(page, &out_chunk_list); |
629 | 0 | } |
630 | 0 |
|
631 | 0 | /* |
632 | 0 | * Success! Beyond this point we cannot fail for this chunk. |
633 | 0 | */ |
634 | 0 |
|
635 | 0 | /* Destroy final reference to each input page. */ |
636 | 0 | while ( (page = page_list_remove_head(&in_chunk_list)) ) |
637 | 0 | { |
638 | 0 | unsigned long gfn; |
639 | 0 |
|
640 | 0 | if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
641 | 0 | BUG(); |
642 | 0 | mfn = page_to_mfn(page); |
643 | 0 | gfn = mfn_to_gmfn(d, mfn); |
644 | 0 | /* Pages were unshared above */ |
645 | 0 | BUG_ON(SHARED_M2P(gfn)); |
646 | 0 | if ( guest_physmap_remove_page(d, _gfn(gfn), _mfn(mfn), 0) ) |
647 | 0 | domain_crash(d); |
648 | 0 | put_page(page); |
649 | 0 | } |
650 | 0 |
|
651 | 0 | /* Assign each output page to the domain. */ |
652 | 0 | for ( j = 0; (page = page_list_remove_head(&out_chunk_list)); ++j ) |
653 | 0 | { |
654 | 0 | if ( assign_pages(d, page, exch.out.extent_order, |
655 | 0 | MEMF_no_refcount) ) |
656 | 0 | { |
657 | 0 | unsigned long dec_count; |
658 | 0 | bool_t drop_dom_ref; |
659 | 0 |
|
660 | 0 | /* |
661 | 0 | * Pages in in_chunk_list is stolen without |
662 | 0 | * decreasing the tot_pages. If the domain is dying when |
663 | 0 | * assign pages, we need decrease the count. For those pages |
664 | 0 | * that has been assigned, it should be covered by |
665 | 0 | * domain_relinquish_resources(). |
666 | 0 | */ |
667 | 0 | dec_count = (((1UL << exch.in.extent_order) * |
668 | 0 | (1UL << in_chunk_order)) - |
669 | 0 | (j * (1UL << exch.out.extent_order))); |
670 | 0 |
|
671 | 0 | spin_lock(&d->page_alloc_lock); |
672 | 0 | drop_dom_ref = (dec_count && |
673 | 0 | !domain_adjust_tot_pages(d, -dec_count)); |
674 | 0 | spin_unlock(&d->page_alloc_lock); |
675 | 0 |
|
676 | 0 | if ( drop_dom_ref ) |
677 | 0 | put_domain(d); |
678 | 0 |
|
679 | 0 | free_domheap_pages(page, exch.out.extent_order); |
680 | 0 | goto dying; |
681 | 0 | } |
682 | 0 |
|
683 | 0 | if ( __copy_from_guest_offset(&gpfn, exch.out.extent_start, |
684 | 0 | (i << out_chunk_order) + j, 1) ) |
685 | 0 | { |
686 | 0 | rc = -EFAULT; |
687 | 0 | continue; |
688 | 0 | } |
689 | 0 |
|
690 | 0 | mfn = page_to_mfn(page); |
691 | 0 | guest_physmap_add_page(d, _gfn(gpfn), _mfn(mfn), |
692 | 0 | exch.out.extent_order); |
693 | 0 |
|
694 | 0 | if ( !paging_mode_translate(d) ) |
695 | 0 | { |
696 | 0 | for ( k = 0; k < (1UL << exch.out.extent_order); k++ ) |
697 | 0 | set_gpfn_from_mfn(mfn + k, gpfn + k); |
698 | 0 | if ( __copy_to_guest_offset(exch.out.extent_start, |
699 | 0 | (i << out_chunk_order) + j, |
700 | 0 | &mfn, 1) ) |
701 | 0 | rc = -EFAULT; |
702 | 0 | } |
703 | 0 | } |
704 | 0 | BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); |
705 | 0 |
|
706 | 0 | if ( rc ) |
707 | 0 | goto fail; |
708 | 0 | } |
709 | 0 |
|
710 | 0 | exch.nr_exchanged = exch.in.nr_extents; |
711 | 0 | if ( __copy_field_to_guest(arg, &exch, nr_exchanged) ) |
712 | 0 | rc = -EFAULT; |
713 | 0 | rcu_unlock_domain(d); |
714 | 0 | return rc; |
715 | 0 |
|
716 | 0 | /* |
717 | 0 | * Failed a chunk! Free any partial chunk work. Tell caller how many |
718 | 0 | * chunks succeeded. |
719 | 0 | */ |
720 | 0 | fail: |
721 | 0 | /* Reassign any input pages we managed to steal. */ |
722 | 0 | while ( (page = page_list_remove_head(&in_chunk_list)) ) |
723 | 0 | if ( assign_pages(d, page, 0, MEMF_no_refcount) ) |
724 | 0 | { |
725 | 0 | BUG_ON(!d->is_dying); |
726 | 0 | if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
727 | 0 | put_page(page); |
728 | 0 | } |
729 | 0 |
|
730 | 0 | dying: |
731 | 0 | rcu_unlock_domain(d); |
732 | 0 | /* Free any output pages we managed to allocate. */ |
733 | 0 | while ( (page = page_list_remove_head(&out_chunk_list)) ) |
734 | 0 | free_domheap_pages(page, exch.out.extent_order); |
735 | 0 |
|
736 | 0 | exch.nr_exchanged = i << in_chunk_order; |
737 | 0 |
|
738 | 0 | fail_early: |
739 | 0 | if ( __copy_field_to_guest(arg, &exch, nr_exchanged) ) |
740 | 0 | rc = -EFAULT; |
741 | 0 | return rc; |
742 | 0 | } |
743 | | |
744 | | static int xenmem_add_to_physmap(struct domain *d, |
745 | | struct xen_add_to_physmap *xatp, |
746 | | unsigned int start) |
747 | 3 | { |
748 | 3 | unsigned int done = 0; |
749 | 3 | long rc = 0; |
750 | 3 | union xen_add_to_physmap_batch_extra extra; |
751 | 3 | |
752 | 3 | if ( xatp->space != XENMAPSPACE_gmfn_foreign ) |
753 | 3 | extra.res0 = 0; |
754 | 3 | else |
755 | 0 | extra.foreign_domid = DOMID_INVALID; |
756 | 3 | |
757 | 3 | if ( xatp->space != XENMAPSPACE_gmfn_range ) |
758 | 3 | return xenmem_add_to_physmap_one(d, xatp->space, extra, |
759 | 3 | xatp->idx, _gfn(xatp->gpfn)); |
760 | 3 | |
761 | 0 | if ( xatp->size < start ) |
762 | 0 | return -EILSEQ; |
763 | 0 |
|
764 | 0 | xatp->idx += start; |
765 | 0 | xatp->gpfn += start; |
766 | 0 | xatp->size -= start; |
767 | 0 |
|
768 | 0 | #ifdef CONFIG_HAS_PASSTHROUGH |
769 | 0 | if ( need_iommu(d) ) |
770 | 0 | this_cpu(iommu_dont_flush_iotlb) = 1; |
771 | 0 | #endif |
772 | 0 |
|
773 | 0 | while ( xatp->size > done ) |
774 | 0 | { |
775 | 0 | rc = xenmem_add_to_physmap_one(d, xatp->space, extra, |
776 | 0 | xatp->idx, _gfn(xatp->gpfn)); |
777 | 0 | if ( rc < 0 ) |
778 | 0 | break; |
779 | 0 |
|
780 | 0 | xatp->idx++; |
781 | 0 | xatp->gpfn++; |
782 | 0 |
|
783 | 0 | /* Check for continuation if it's not the last iteration. */ |
784 | 0 | if ( xatp->size > ++done && hypercall_preempt_check() ) |
785 | 0 | { |
786 | 0 | rc = start + done; |
787 | 0 | break; |
788 | 0 | } |
789 | 0 | } |
790 | 0 |
|
791 | 0 | #ifdef CONFIG_HAS_PASSTHROUGH |
792 | 0 | if ( need_iommu(d) ) |
793 | 0 | { |
794 | 0 | int ret; |
795 | 0 |
|
796 | 0 | this_cpu(iommu_dont_flush_iotlb) = 0; |
797 | 0 |
|
798 | 0 | ret = iommu_iotlb_flush(d, xatp->idx - done, done); |
799 | 0 | if ( unlikely(ret) && rc >= 0 ) |
800 | 0 | rc = ret; |
801 | 0 |
|
802 | 0 | ret = iommu_iotlb_flush(d, xatp->gpfn - done, done); |
803 | 0 | if ( unlikely(ret) && rc >= 0 ) |
804 | 0 | rc = ret; |
805 | 0 | } |
806 | 0 | #endif |
807 | 0 |
|
808 | 0 | return rc; |
809 | 0 | } |
810 | | |
811 | | static int xenmem_add_to_physmap_batch(struct domain *d, |
812 | | struct xen_add_to_physmap_batch *xatpb, |
813 | | unsigned int start) |
814 | 0 | { |
815 | 0 | unsigned int done = 0; |
816 | 0 | int rc; |
817 | 0 |
|
818 | 0 | if ( xatpb->size < start ) |
819 | 0 | return -EILSEQ; |
820 | 0 |
|
821 | 0 | guest_handle_add_offset(xatpb->idxs, start); |
822 | 0 | guest_handle_add_offset(xatpb->gpfns, start); |
823 | 0 | guest_handle_add_offset(xatpb->errs, start); |
824 | 0 | xatpb->size -= start; |
825 | 0 |
|
826 | 0 | while ( xatpb->size > done ) |
827 | 0 | { |
828 | 0 | xen_ulong_t idx; |
829 | 0 | xen_pfn_t gpfn; |
830 | 0 |
|
831 | 0 | if ( unlikely(__copy_from_guest_offset(&idx, xatpb->idxs, 0, 1)) ) |
832 | 0 | { |
833 | 0 | rc = -EFAULT; |
834 | 0 | goto out; |
835 | 0 | } |
836 | 0 |
|
837 | 0 | if ( unlikely(__copy_from_guest_offset(&gpfn, xatpb->gpfns, 0, 1)) ) |
838 | 0 | { |
839 | 0 | rc = -EFAULT; |
840 | 0 | goto out; |
841 | 0 | } |
842 | 0 |
|
843 | 0 | rc = xenmem_add_to_physmap_one(d, xatpb->space, |
844 | 0 | xatpb->u, |
845 | 0 | idx, _gfn(gpfn)); |
846 | 0 |
|
847 | 0 | if ( unlikely(__copy_to_guest_offset(xatpb->errs, 0, &rc, 1)) ) |
848 | 0 | { |
849 | 0 | rc = -EFAULT; |
850 | 0 | goto out; |
851 | 0 | } |
852 | 0 |
|
853 | 0 | guest_handle_add_offset(xatpb->idxs, 1); |
854 | 0 | guest_handle_add_offset(xatpb->gpfns, 1); |
855 | 0 | guest_handle_add_offset(xatpb->errs, 1); |
856 | 0 |
|
857 | 0 | /* Check for continuation if it's not the last iteration. */ |
858 | 0 | if ( xatpb->size > ++done && hypercall_preempt_check() ) |
859 | 0 | { |
860 | 0 | rc = start + done; |
861 | 0 | goto out; |
862 | 0 | } |
863 | 0 | } |
864 | 0 |
|
865 | 0 | rc = 0; |
866 | 0 |
|
867 | 0 | out: |
868 | 0 | return rc; |
869 | 0 | } |
870 | | |
871 | | static int construct_memop_from_reservation( |
872 | | const struct xen_memory_reservation *r, |
873 | | struct memop_args *a) |
874 | 0 | { |
875 | 0 | unsigned int address_bits; |
876 | 0 |
|
877 | 0 | a->extent_list = r->extent_start; |
878 | 0 | a->nr_extents = r->nr_extents; |
879 | 0 | a->extent_order = r->extent_order; |
880 | 0 | a->memflags = 0; |
881 | 0 |
|
882 | 0 | address_bits = XENMEMF_get_address_bits(r->mem_flags); |
883 | 0 | if ( (address_bits != 0) && |
884 | 0 | (address_bits < (get_order_from_pages(max_page) + PAGE_SHIFT)) ) |
885 | 0 | { |
886 | 0 | if ( address_bits <= PAGE_SHIFT ) |
887 | 0 | return -EINVAL; |
888 | 0 | a->memflags = MEMF_bits(address_bits); |
889 | 0 | } |
890 | 0 |
|
891 | 0 | if ( r->mem_flags & XENMEMF_vnode ) |
892 | 0 | { |
893 | 0 | nodeid_t vnode, pnode; |
894 | 0 | struct domain *d = a->domain; |
895 | 0 |
|
896 | 0 | read_lock(&d->vnuma_rwlock); |
897 | 0 | if ( d->vnuma ) |
898 | 0 | { |
899 | 0 | vnode = XENMEMF_get_node(r->mem_flags); |
900 | 0 | if ( vnode >= d->vnuma->nr_vnodes ) |
901 | 0 | { |
902 | 0 | read_unlock(&d->vnuma_rwlock); |
903 | 0 | return -EINVAL; |
904 | 0 | } |
905 | 0 |
|
906 | 0 | pnode = d->vnuma->vnode_to_pnode[vnode]; |
907 | 0 | if ( pnode != NUMA_NO_NODE ) |
908 | 0 | { |
909 | 0 | a->memflags |= MEMF_node(pnode); |
910 | 0 | if ( r->mem_flags & XENMEMF_exact_node_request ) |
911 | 0 | a->memflags |= MEMF_exact_node; |
912 | 0 | } |
913 | 0 | } |
914 | 0 | read_unlock(&d->vnuma_rwlock); |
915 | 0 | } |
916 | 0 | else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) ) |
917 | 0 | return -EINVAL; |
918 | 0 |
|
919 | 0 | return 0; |
920 | 0 | } |
921 | | |
922 | | #ifdef CONFIG_HAS_PASSTHROUGH |
923 | | struct get_reserved_device_memory { |
924 | | struct xen_reserved_device_memory_map map; |
925 | | unsigned int used_entries; |
926 | | }; |
927 | | |
928 | | static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr, |
929 | | u32 id, void *ctxt) |
930 | 0 | { |
931 | 0 | struct get_reserved_device_memory *grdm = ctxt; |
932 | 0 | u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus, |
933 | 0 | grdm->map.dev.pci.devfn); |
934 | 0 |
|
935 | 0 | if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) ) |
936 | 0 | return 0; |
937 | 0 |
|
938 | 0 | if ( grdm->used_entries < grdm->map.nr_entries ) |
939 | 0 | { |
940 | 0 | struct xen_reserved_device_memory rdm = { |
941 | 0 | .start_pfn = start, .nr_pages = nr |
942 | 0 | }; |
943 | 0 |
|
944 | 0 | if ( __copy_to_guest_offset(grdm->map.buffer, grdm->used_entries, |
945 | 0 | &rdm, 1) ) |
946 | 0 | return -EFAULT; |
947 | 0 | } |
948 | 0 |
|
949 | 0 | ++grdm->used_entries; |
950 | 0 |
|
951 | 0 | return 1; |
952 | 0 | } |
953 | | #endif |
954 | | |
955 | | static long xatp_permission_check(struct domain *d, unsigned int space) |
956 | 3 | { |
957 | 3 | /* |
958 | 3 | * XENMAPSPACE_dev_mmio mapping is only supported for hardware Domain |
959 | 3 | * to map this kind of space to itself. |
960 | 3 | */ |
961 | 3 | if ( (space == XENMAPSPACE_dev_mmio) && |
962 | 0 | (!is_hardware_domain(current->domain) || (d != current->domain)) ) |
963 | 0 | return -EACCES; |
964 | 3 | |
965 | 3 | return xsm_add_to_physmap(XSM_TARGET, current->domain, d); |
966 | 3 | } |
967 | | |
968 | | long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) |
969 | 4 | { |
970 | 4 | struct domain *d, *curr_d = current->domain; |
971 | 4 | long rc; |
972 | 4 | struct xen_memory_reservation reservation; |
973 | 4 | struct memop_args args; |
974 | 4 | domid_t domid; |
975 | 4 | unsigned long start_extent = cmd >> MEMOP_EXTENT_SHIFT; |
976 | 4 | int op = cmd & MEMOP_CMD_MASK; |
977 | 4 | |
978 | 4 | switch ( op ) |
979 | 4 | { |
980 | 0 | case XENMEM_increase_reservation: |
981 | 0 | case XENMEM_decrease_reservation: |
982 | 0 | case XENMEM_populate_physmap: |
983 | 0 | if ( copy_from_guest(&reservation, arg, 1) ) |
984 | 0 | return start_extent; |
985 | 0 |
|
986 | 0 | /* Is size too large for us to encode a continuation? */ |
987 | 0 | if ( reservation.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) |
988 | 0 | return start_extent; |
989 | 0 |
|
990 | 0 | if ( unlikely(start_extent >= reservation.nr_extents) ) |
991 | 0 | return start_extent; |
992 | 0 |
|
993 | 0 | d = rcu_lock_domain_by_any_id(reservation.domid); |
994 | 0 | if ( d == NULL ) |
995 | 0 | return start_extent; |
996 | 0 | args.domain = d; |
997 | 0 |
|
998 | 0 | if ( construct_memop_from_reservation(&reservation, &args) ) |
999 | 0 | { |
1000 | 0 | rcu_unlock_domain(d); |
1001 | 0 | return start_extent; |
1002 | 0 | } |
1003 | 0 |
|
1004 | 0 | args.nr_done = start_extent; |
1005 | 0 | args.preempted = 0; |
1006 | 0 |
|
1007 | 0 | if ( op == XENMEM_populate_physmap |
1008 | 0 | && (reservation.mem_flags & XENMEMF_populate_on_demand) ) |
1009 | 0 | args.memflags |= MEMF_populate_on_demand; |
1010 | 0 |
|
1011 | 0 | if ( xsm_memory_adjust_reservation(XSM_TARGET, curr_d, d) ) |
1012 | 0 | { |
1013 | 0 | rcu_unlock_domain(d); |
1014 | 0 | return start_extent; |
1015 | 0 | } |
1016 | 0 |
|
1017 | 0 | switch ( op ) |
1018 | 0 | { |
1019 | 0 | case XENMEM_increase_reservation: |
1020 | 0 | increase_reservation(&args); |
1021 | 0 | break; |
1022 | 0 | case XENMEM_decrease_reservation: |
1023 | 0 | decrease_reservation(&args); |
1024 | 0 | break; |
1025 | 0 | default: /* XENMEM_populate_physmap */ |
1026 | 0 | populate_physmap(&args); |
1027 | 0 | break; |
1028 | 0 | } |
1029 | 0 |
|
1030 | 0 | rcu_unlock_domain(d); |
1031 | 0 |
|
1032 | 0 | rc = args.nr_done; |
1033 | 0 |
|
1034 | 0 | if ( args.preempted ) |
1035 | 0 | return hypercall_create_continuation( |
1036 | 0 | __HYPERVISOR_memory_op, "lh", |
1037 | 0 | op | (rc << MEMOP_EXTENT_SHIFT), arg); |
1038 | 0 |
|
1039 | 0 | break; |
1040 | 0 |
|
1041 | 0 | case XENMEM_exchange: |
1042 | 0 | if ( unlikely(start_extent) ) |
1043 | 0 | return -EINVAL; |
1044 | 0 |
|
1045 | 0 | rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t)); |
1046 | 0 | break; |
1047 | 0 |
|
1048 | 0 | case XENMEM_maximum_ram_page: |
1049 | 0 | if ( unlikely(start_extent) ) |
1050 | 0 | return -EINVAL; |
1051 | 0 |
|
1052 | 0 | rc = max_page; |
1053 | 0 | break; |
1054 | 0 |
|
1055 | 0 | case XENMEM_current_reservation: |
1056 | 0 | case XENMEM_maximum_reservation: |
1057 | 0 | case XENMEM_maximum_gpfn: |
1058 | 0 | if ( unlikely(start_extent) ) |
1059 | 0 | return -EINVAL; |
1060 | 0 |
|
1061 | 0 | if ( copy_from_guest(&domid, arg, 1) ) |
1062 | 0 | return -EFAULT; |
1063 | 0 |
|
1064 | 0 | d = rcu_lock_domain_by_any_id(domid); |
1065 | 0 | if ( d == NULL ) |
1066 | 0 | return -ESRCH; |
1067 | 0 |
|
1068 | 0 | rc = xsm_memory_stat_reservation(XSM_TARGET, curr_d, d); |
1069 | 0 | if ( rc ) |
1070 | 0 | { |
1071 | 0 | rcu_unlock_domain(d); |
1072 | 0 | return rc; |
1073 | 0 | } |
1074 | 0 |
|
1075 | 0 | switch ( op ) |
1076 | 0 | { |
1077 | 0 | case XENMEM_current_reservation: |
1078 | 0 | rc = d->tot_pages; |
1079 | 0 | break; |
1080 | 0 | case XENMEM_maximum_reservation: |
1081 | 0 | rc = d->max_pages; |
1082 | 0 | break; |
1083 | 0 | default: |
1084 | 0 | ASSERT(op == XENMEM_maximum_gpfn); |
1085 | 0 | rc = domain_get_maximum_gpfn(d); |
1086 | 0 | break; |
1087 | 0 | } |
1088 | 0 |
|
1089 | 0 | rcu_unlock_domain(d); |
1090 | 0 |
|
1091 | 0 | break; |
1092 | 0 |
|
1093 | 3 | case XENMEM_add_to_physmap: |
1094 | 3 | { |
1095 | 3 | struct xen_add_to_physmap xatp; |
1096 | 3 | |
1097 | 3 | BUILD_BUG_ON((typeof(xatp.size))-1 > (UINT_MAX >> MEMOP_EXTENT_SHIFT)); |
1098 | 3 | |
1099 | 3 | /* Check for malicious or buggy input. */ |
1100 | 3 | if ( start_extent != (typeof(xatp.size))start_extent ) |
1101 | 0 | return -EDOM; |
1102 | 3 | |
1103 | 3 | if ( copy_from_guest(&xatp, arg, 1) ) |
1104 | 0 | return -EFAULT; |
1105 | 3 | |
1106 | 3 | /* Foreign mapping is only possible via add_to_physmap_batch. */ |
1107 | 3 | if ( xatp.space == XENMAPSPACE_gmfn_foreign ) |
1108 | 0 | return -ENOSYS; |
1109 | 3 | |
1110 | 3 | d = rcu_lock_domain_by_any_id(xatp.domid); |
1111 | 3 | if ( d == NULL ) |
1112 | 0 | return -ESRCH; |
1113 | 3 | |
1114 | 3 | rc = xatp_permission_check(d, xatp.space); |
1115 | 3 | if ( rc ) |
1116 | 0 | { |
1117 | 0 | rcu_unlock_domain(d); |
1118 | 0 | return rc; |
1119 | 0 | } |
1120 | 3 | |
1121 | 3 | rc = xenmem_add_to_physmap(d, &xatp, start_extent); |
1122 | 3 | |
1123 | 3 | rcu_unlock_domain(d); |
1124 | 3 | |
1125 | 3 | if ( xatp.space == XENMAPSPACE_gmfn_range && rc > 0 ) |
1126 | 0 | rc = hypercall_create_continuation( |
1127 | 0 | __HYPERVISOR_memory_op, "lh", |
1128 | 0 | op | (rc << MEMOP_EXTENT_SHIFT), arg); |
1129 | 3 | |
1130 | 3 | return rc; |
1131 | 3 | } |
1132 | 3 | |
1133 | 0 | case XENMEM_add_to_physmap_batch: |
1134 | 0 | { |
1135 | 0 | struct xen_add_to_physmap_batch xatpb; |
1136 | 0 |
|
1137 | 0 | BUILD_BUG_ON((typeof(xatpb.size))-1 > |
1138 | 0 | (UINT_MAX >> MEMOP_EXTENT_SHIFT)); |
1139 | 0 |
|
1140 | 0 | /* Check for malicious or buggy input. */ |
1141 | 0 | if ( start_extent != (typeof(xatpb.size))start_extent ) |
1142 | 0 | return -EDOM; |
1143 | 0 |
|
1144 | 0 | if ( copy_from_guest(&xatpb, arg, 1) || |
1145 | 0 | !guest_handle_okay(xatpb.idxs, xatpb.size) || |
1146 | 0 | !guest_handle_okay(xatpb.gpfns, xatpb.size) || |
1147 | 0 | !guest_handle_okay(xatpb.errs, xatpb.size) ) |
1148 | 0 | return -EFAULT; |
1149 | 0 |
|
1150 | 0 | /* This mapspace is unsupported for this hypercall. */ |
1151 | 0 | if ( xatpb.space == XENMAPSPACE_gmfn_range ) |
1152 | 0 | return -EOPNOTSUPP; |
1153 | 0 |
|
1154 | 0 | d = rcu_lock_domain_by_any_id(xatpb.domid); |
1155 | 0 | if ( d == NULL ) |
1156 | 0 | return -ESRCH; |
1157 | 0 |
|
1158 | 0 | rc = xatp_permission_check(d, xatpb.space); |
1159 | 0 | if ( rc ) |
1160 | 0 | { |
1161 | 0 | rcu_unlock_domain(d); |
1162 | 0 | return rc; |
1163 | 0 | } |
1164 | 0 |
|
1165 | 0 | rc = xenmem_add_to_physmap_batch(d, &xatpb, start_extent); |
1166 | 0 |
|
1167 | 0 | rcu_unlock_domain(d); |
1168 | 0 |
|
1169 | 0 | if ( rc > 0 ) |
1170 | 0 | rc = hypercall_create_continuation( |
1171 | 0 | __HYPERVISOR_memory_op, "lh", |
1172 | 0 | op | (rc << MEMOP_EXTENT_SHIFT), arg); |
1173 | 0 |
|
1174 | 0 | return rc; |
1175 | 0 | } |
1176 | 0 |
|
1177 | 0 | case XENMEM_remove_from_physmap: |
1178 | 0 | { |
1179 | 0 | struct xen_remove_from_physmap xrfp; |
1180 | 0 | struct page_info *page; |
1181 | 0 |
|
1182 | 0 | if ( unlikely(start_extent) ) |
1183 | 0 | return -EINVAL; |
1184 | 0 |
|
1185 | 0 | if ( copy_from_guest(&xrfp, arg, 1) ) |
1186 | 0 | return -EFAULT; |
1187 | 0 |
|
1188 | 0 | d = rcu_lock_domain_by_any_id(xrfp.domid); |
1189 | 0 | if ( d == NULL ) |
1190 | 0 | return -ESRCH; |
1191 | 0 |
|
1192 | 0 | rc = xsm_remove_from_physmap(XSM_TARGET, curr_d, d); |
1193 | 0 | if ( rc ) |
1194 | 0 | { |
1195 | 0 | rcu_unlock_domain(d); |
1196 | 0 | return rc; |
1197 | 0 | } |
1198 | 0 |
|
1199 | 0 | page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); |
1200 | 0 | if ( page ) |
1201 | 0 | { |
1202 | 0 | rc = guest_physmap_remove_page(d, _gfn(xrfp.gpfn), |
1203 | 0 | _mfn(page_to_mfn(page)), 0); |
1204 | 0 | put_page(page); |
1205 | 0 | } |
1206 | 0 | else |
1207 | 0 | rc = -ENOENT; |
1208 | 0 |
|
1209 | 0 | rcu_unlock_domain(d); |
1210 | 0 |
|
1211 | 0 | break; |
1212 | 0 | } |
1213 | 0 |
|
1214 | 0 | case XENMEM_access_op: |
1215 | 0 | rc = mem_access_memop(cmd, guest_handle_cast(arg, xen_mem_access_op_t)); |
1216 | 0 | break; |
1217 | 0 |
|
1218 | 0 | case XENMEM_claim_pages: |
1219 | 0 | if ( unlikely(start_extent) ) |
1220 | 0 | return -EINVAL; |
1221 | 0 |
|
1222 | 0 | if ( copy_from_guest(&reservation, arg, 1) ) |
1223 | 0 | return -EFAULT; |
1224 | 0 |
|
1225 | 0 | if ( !guest_handle_is_null(reservation.extent_start) ) |
1226 | 0 | return -EINVAL; |
1227 | 0 |
|
1228 | 0 | if ( reservation.extent_order != 0 ) |
1229 | 0 | return -EINVAL; |
1230 | 0 |
|
1231 | 0 | if ( reservation.mem_flags != 0 ) |
1232 | 0 | return -EINVAL; |
1233 | 0 |
|
1234 | 0 | d = rcu_lock_domain_by_id(reservation.domid); |
1235 | 0 | if ( d == NULL ) |
1236 | 0 | return -EINVAL; |
1237 | 0 |
|
1238 | 0 | rc = xsm_claim_pages(XSM_PRIV, d); |
1239 | 0 |
|
1240 | 0 | if ( !rc ) |
1241 | 0 | rc = domain_set_outstanding_pages(d, reservation.nr_extents); |
1242 | 0 |
|
1243 | 0 | rcu_unlock_domain(d); |
1244 | 0 |
|
1245 | 0 | break; |
1246 | 0 |
|
1247 | 0 | case XENMEM_get_vnumainfo: |
1248 | 0 | { |
1249 | 0 | struct xen_vnuma_topology_info topology; |
1250 | 0 | unsigned int dom_vnodes, dom_vranges, dom_vcpus; |
1251 | 0 | struct vnuma_info tmp; |
1252 | 0 |
|
1253 | 0 | if ( unlikely(start_extent) ) |
1254 | 0 | return -EINVAL; |
1255 | 0 |
|
1256 | 0 | /* |
1257 | 0 | * Guest passes nr_vnodes, number of regions and nr_vcpus thus |
1258 | 0 | * we know how much memory guest has allocated. |
1259 | 0 | */ |
1260 | 0 | if ( copy_from_guest(&topology, arg, 1 )) |
1261 | 0 | return -EFAULT; |
1262 | 0 |
|
1263 | 0 | if ( topology.pad != 0 ) |
1264 | 0 | return -EINVAL; |
1265 | 0 |
|
1266 | 0 | if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL ) |
1267 | 0 | return -ESRCH; |
1268 | 0 |
|
1269 | 0 | rc = xsm_get_vnumainfo(XSM_TARGET, d); |
1270 | 0 | if ( rc ) |
1271 | 0 | { |
1272 | 0 | rcu_unlock_domain(d); |
1273 | 0 | return rc; |
1274 | 0 | } |
1275 | 0 |
|
1276 | 0 | read_lock(&d->vnuma_rwlock); |
1277 | 0 |
|
1278 | 0 | if ( d->vnuma == NULL ) |
1279 | 0 | { |
1280 | 0 | read_unlock(&d->vnuma_rwlock); |
1281 | 0 | rcu_unlock_domain(d); |
1282 | 0 | return -EOPNOTSUPP; |
1283 | 0 | } |
1284 | 0 |
|
1285 | 0 | dom_vnodes = d->vnuma->nr_vnodes; |
1286 | 0 | dom_vranges = d->vnuma->nr_vmemranges; |
1287 | 0 | dom_vcpus = d->max_vcpus; |
1288 | 0 |
|
1289 | 0 | /* |
1290 | 0 | * Copied from guest values may differ from domain vnuma config. |
1291 | 0 | * Check here guest parameters make sure we dont overflow. |
1292 | 0 | * Additionaly check padding. |
1293 | 0 | */ |
1294 | 0 | if ( topology.nr_vnodes < dom_vnodes || |
1295 | 0 | topology.nr_vcpus < dom_vcpus || |
1296 | 0 | topology.nr_vmemranges < dom_vranges ) |
1297 | 0 | { |
1298 | 0 | read_unlock(&d->vnuma_rwlock); |
1299 | 0 | rcu_unlock_domain(d); |
1300 | 0 |
|
1301 | 0 | topology.nr_vnodes = dom_vnodes; |
1302 | 0 | topology.nr_vcpus = dom_vcpus; |
1303 | 0 | topology.nr_vmemranges = dom_vranges; |
1304 | 0 |
|
1305 | 0 | /* Copy back needed values. */ |
1306 | 0 | return __copy_to_guest(arg, &topology, 1) ? -EFAULT : -ENOBUFS; |
1307 | 0 | } |
1308 | 0 |
|
1309 | 0 | read_unlock(&d->vnuma_rwlock); |
1310 | 0 |
|
1311 | 0 | tmp.vdistance = xmalloc_array(unsigned int, dom_vnodes * dom_vnodes); |
1312 | 0 | tmp.vmemrange = xmalloc_array(xen_vmemrange_t, dom_vranges); |
1313 | 0 | tmp.vcpu_to_vnode = xmalloc_array(unsigned int, dom_vcpus); |
1314 | 0 |
|
1315 | 0 | if ( tmp.vdistance == NULL || |
1316 | 0 | tmp.vmemrange == NULL || |
1317 | 0 | tmp.vcpu_to_vnode == NULL ) |
1318 | 0 | { |
1319 | 0 | rc = -ENOMEM; |
1320 | 0 | goto vnumainfo_out; |
1321 | 0 | } |
1322 | 0 |
|
1323 | 0 | /* |
1324 | 0 | * Check if vnuma info has changed and if the allocated arrays |
1325 | 0 | * are not big enough. |
1326 | 0 | */ |
1327 | 0 | read_lock(&d->vnuma_rwlock); |
1328 | 0 |
|
1329 | 0 | if ( dom_vnodes < d->vnuma->nr_vnodes || |
1330 | 0 | dom_vranges < d->vnuma->nr_vmemranges || |
1331 | 0 | dom_vcpus < d->max_vcpus ) |
1332 | 0 | { |
1333 | 0 | read_unlock(&d->vnuma_rwlock); |
1334 | 0 | rc = -EAGAIN; |
1335 | 0 | goto vnumainfo_out; |
1336 | 0 | } |
1337 | 0 |
|
1338 | 0 | dom_vnodes = d->vnuma->nr_vnodes; |
1339 | 0 | dom_vranges = d->vnuma->nr_vmemranges; |
1340 | 0 | dom_vcpus = d->max_vcpus; |
1341 | 0 |
|
1342 | 0 | memcpy(tmp.vmemrange, d->vnuma->vmemrange, |
1343 | 0 | sizeof(*d->vnuma->vmemrange) * dom_vranges); |
1344 | 0 | memcpy(tmp.vdistance, d->vnuma->vdistance, |
1345 | 0 | sizeof(*d->vnuma->vdistance) * dom_vnodes * dom_vnodes); |
1346 | 0 | memcpy(tmp.vcpu_to_vnode, d->vnuma->vcpu_to_vnode, |
1347 | 0 | sizeof(*d->vnuma->vcpu_to_vnode) * dom_vcpus); |
1348 | 0 |
|
1349 | 0 | read_unlock(&d->vnuma_rwlock); |
1350 | 0 |
|
1351 | 0 | rc = -EFAULT; |
1352 | 0 |
|
1353 | 0 | if ( copy_to_guest(topology.vmemrange.h, tmp.vmemrange, |
1354 | 0 | dom_vranges) != 0 ) |
1355 | 0 | goto vnumainfo_out; |
1356 | 0 |
|
1357 | 0 | if ( copy_to_guest(topology.vdistance.h, tmp.vdistance, |
1358 | 0 | dom_vnodes * dom_vnodes) != 0 ) |
1359 | 0 | goto vnumainfo_out; |
1360 | 0 |
|
1361 | 0 | if ( copy_to_guest(topology.vcpu_to_vnode.h, tmp.vcpu_to_vnode, |
1362 | 0 | dom_vcpus) != 0 ) |
1363 | 0 | goto vnumainfo_out; |
1364 | 0 |
|
1365 | 0 | topology.nr_vnodes = dom_vnodes; |
1366 | 0 | topology.nr_vcpus = dom_vcpus; |
1367 | 0 | topology.nr_vmemranges = dom_vranges; |
1368 | 0 |
|
1369 | 0 | rc = __copy_to_guest(arg, &topology, 1) ? -EFAULT : 0; |
1370 | 0 |
|
1371 | 0 | vnumainfo_out: |
1372 | 0 | rcu_unlock_domain(d); |
1373 | 0 |
|
1374 | 0 | xfree(tmp.vdistance); |
1375 | 0 | xfree(tmp.vmemrange); |
1376 | 0 | xfree(tmp.vcpu_to_vnode); |
1377 | 0 | break; |
1378 | 0 | } |
1379 | 0 |
|
1380 | 0 | #ifdef CONFIG_HAS_PASSTHROUGH |
1381 | 0 | case XENMEM_reserved_device_memory_map: |
1382 | 0 | { |
1383 | 0 | struct get_reserved_device_memory grdm; |
1384 | 0 |
|
1385 | 0 | if ( unlikely(start_extent) ) |
1386 | 0 | return -EINVAL; |
1387 | 0 |
|
1388 | 0 | if ( copy_from_guest(&grdm.map, arg, 1) || |
1389 | 0 | !guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) ) |
1390 | 0 | return -EFAULT; |
1391 | 0 |
|
1392 | 0 | if ( grdm.map.flags & ~XENMEM_RDM_ALL ) |
1393 | 0 | return -EINVAL; |
1394 | 0 |
|
1395 | 0 | grdm.used_entries = 0; |
1396 | 0 | rc = iommu_get_reserved_device_memory(get_reserved_device_memory, |
1397 | 0 | &grdm); |
1398 | 0 |
|
1399 | 0 | if ( !rc && grdm.map.nr_entries < grdm.used_entries ) |
1400 | 0 | rc = -ENOBUFS; |
1401 | 0 | grdm.map.nr_entries = grdm.used_entries; |
1402 | 0 | if ( __copy_to_guest(arg, &grdm.map, 1) ) |
1403 | 0 | rc = -EFAULT; |
1404 | 0 |
|
1405 | 0 | break; |
1406 | 0 | } |
1407 | 0 | #endif |
1408 | 0 |
|
1409 | 1 | default: |
1410 | 1 | rc = arch_memory_op(cmd, arg); |
1411 | 1 | break; |
1412 | 4 | } |
1413 | 4 | |
1414 | 1 | return rc; |
1415 | 4 | } |
1416 | | |
1417 | | void clear_domain_page(mfn_t mfn) |
1418 | 1.32k | { |
1419 | 1.32k | void *ptr = map_domain_page(mfn); |
1420 | 1.32k | |
1421 | 1.32k | clear_page(ptr); |
1422 | 1.32k | unmap_domain_page(ptr); |
1423 | 1.32k | } |
1424 | | |
1425 | | void copy_domain_page(mfn_t dest, mfn_t source) |
1426 | 0 | { |
1427 | 0 | const void *src = map_domain_page(source); |
1428 | 0 | void *dst = map_domain_page(dest); |
1429 | 0 |
|
1430 | 0 | copy_page(dst, src); |
1431 | 0 | unmap_domain_page(dst); |
1432 | 0 | unmap_domain_page(src); |
1433 | 0 | } |
1434 | | |
1435 | | void destroy_ring_for_helper( |
1436 | | void **_va, struct page_info *page) |
1437 | 0 | { |
1438 | 0 | void *va = *_va; |
1439 | 0 |
|
1440 | 0 | if ( va != NULL ) |
1441 | 0 | { |
1442 | 0 | unmap_domain_page_global(va); |
1443 | 0 | put_page_and_type(page); |
1444 | 0 | *_va = NULL; |
1445 | 0 | } |
1446 | 0 | } |
1447 | | |
1448 | | int prepare_ring_for_helper( |
1449 | | struct domain *d, unsigned long gmfn, struct page_info **_page, |
1450 | | void **_va) |
1451 | 0 | { |
1452 | 0 | struct page_info *page; |
1453 | 0 | p2m_type_t p2mt; |
1454 | 0 | void *va; |
1455 | 0 |
|
1456 | 0 | page = get_page_from_gfn(d, gmfn, &p2mt, P2M_UNSHARE); |
1457 | 0 |
|
1458 | 0 | #ifdef CONFIG_HAS_MEM_PAGING |
1459 | 0 | if ( p2m_is_paging(p2mt) ) |
1460 | 0 | { |
1461 | 0 | if ( page ) |
1462 | 0 | put_page(page); |
1463 | 0 | p2m_mem_paging_populate(d, gmfn); |
1464 | 0 | return -ENOENT; |
1465 | 0 | } |
1466 | 0 | #endif |
1467 | 0 | #ifdef CONFIG_HAS_MEM_SHARING |
1468 | 0 | if ( p2m_is_shared(p2mt) ) |
1469 | 0 | { |
1470 | 0 | if ( page ) |
1471 | 0 | put_page(page); |
1472 | 0 | return -ENOENT; |
1473 | 0 | } |
1474 | 0 | #endif |
1475 | 0 |
|
1476 | 0 | if ( !page ) |
1477 | 0 | return -EINVAL; |
1478 | 0 |
|
1479 | 0 | if ( !get_page_type(page, PGT_writable_page) ) |
1480 | 0 | { |
1481 | 0 | put_page(page); |
1482 | 0 | return -EINVAL; |
1483 | 0 | } |
1484 | 0 |
|
1485 | 0 | va = __map_domain_page_global(page); |
1486 | 0 | if ( va == NULL ) |
1487 | 0 | { |
1488 | 0 | put_page_and_type(page); |
1489 | 0 | return -ENOMEM; |
1490 | 0 | } |
1491 | 0 |
|
1492 | 0 | *_va = va; |
1493 | 0 | *_page = page; |
1494 | 0 |
|
1495 | 0 | return 0; |
1496 | 0 | } |
1497 | | |
1498 | | /* |
1499 | | * Local variables: |
1500 | | * mode: C |
1501 | | * c-file-style: "BSD" |
1502 | | * c-basic-offset: 4 |
1503 | | * tab-width: 4 |
1504 | | * indent-tabs-mode: nil |
1505 | | * End: |
1506 | | */ |