/root/src/xen/xen/arch/x86/mm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/mm.c |
3 | | * |
4 | | * Copyright (c) 2002-2005 K A Fraser |
5 | | * Copyright (c) 2004 Christian Limpach |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | /* |
22 | | * A description of the x86 page table API: |
23 | | * |
24 | | * Domains trap to do_mmu_update with a list of update requests. |
25 | | * This is a list of (ptr, val) pairs, where the requested operation |
26 | | * is *ptr = val. |
27 | | * |
28 | | * Reference counting of pages: |
29 | | * ---------------------------- |
30 | | * Each page has two refcounts: tot_count and type_count. |
31 | | * |
32 | | * TOT_COUNT is the obvious reference count. It counts all uses of a |
33 | | * physical page frame by a domain, including uses as a page directory, |
34 | | * a page table, or simple mappings via a PTE. This count prevents a |
35 | | * domain from releasing a frame back to the free pool when it still holds |
36 | | * a reference to it. |
37 | | * |
38 | | * TYPE_COUNT is more subtle. A frame can be put to one of three |
39 | | * mutually-exclusive uses: it might be used as a page directory, or a |
40 | | * page table, or it may be mapped writable by the domain [of course, a |
41 | | * frame may not be used in any of these three ways!]. |
42 | | * So, type_count is a count of the number of times a frame is being |
43 | | * referred to in its current incarnation. Therefore, a page can only |
44 | | * change its type when its type count is zero. |
45 | | * |
46 | | * Pinning the page type: |
47 | | * ---------------------- |
48 | | * The type of a page can be pinned/unpinned with the commands |
49 | | * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, |
50 | | * pinning is not reference counted, so it can't be nested). |
51 | | * This is useful to prevent a page's type count falling to zero, at which |
52 | | * point safety checks would need to be carried out next time the count |
53 | | * is increased again. |
54 | | * |
55 | | * A further note on writable page mappings: |
56 | | * ----------------------------------------- |
57 | | * For simplicity, the count of writable mappings for a page may not |
58 | | * correspond to reality. The 'writable count' is incremented for every |
59 | | * PTE which maps the page with the _PAGE_RW flag set. However, for |
60 | | * write access to be possible the page directory entry must also have |
61 | | * its _PAGE_RW bit set. We do not check this as it complicates the |
62 | | * reference counting considerably [consider the case of multiple |
63 | | * directory entries referencing a single page table, some with the RW |
64 | | * bit set, others not -- it starts getting a bit messy]. |
65 | | * In normal use, this simplification shouldn't be a problem. |
66 | | * However, the logic can be added if required. |
67 | | * |
68 | | * One more note on read-only page mappings: |
69 | | * ----------------------------------------- |
70 | | * We want domains to be able to map pages for read-only access. The |
71 | | * main reason is that page tables and directories should be readable |
72 | | * by a domain, but it would not be safe for them to be writable. |
73 | | * However, domains have free access to rings 1 & 2 of the Intel |
74 | | * privilege model. In terms of page protection, these are considered |
75 | | * to be part of 'supervisor mode'. The WP bit in CR0 controls whether |
76 | | * read-only restrictions are respected in supervisor mode -- if the |
77 | | * bit is clear then any mapped page is writable. |
78 | | * |
79 | | * We get round this by always setting the WP bit and disallowing |
80 | | * updates to it. This is very unlikely to cause a problem for guest |
81 | | * OS's, which will generally use the WP bit to simplify copy-on-write |
82 | | * implementation (in that case, OS wants a fault when it writes to |
83 | | * an application-supplied buffer). |
84 | | */ |
85 | | |
86 | | #include <xen/init.h> |
87 | | #include <xen/kernel.h> |
88 | | #include <xen/lib.h> |
89 | | #include <xen/mm.h> |
90 | | #include <xen/domain.h> |
91 | | #include <xen/sched.h> |
92 | | #include <xen/err.h> |
93 | | #include <xen/perfc.h> |
94 | | #include <xen/irq.h> |
95 | | #include <xen/softirq.h> |
96 | | #include <xen/domain_page.h> |
97 | | #include <xen/event.h> |
98 | | #include <xen/iocap.h> |
99 | | #include <xen/guest_access.h> |
100 | | #include <xen/pfn.h> |
101 | | #include <xen/vmap.h> |
102 | | #include <xen/xmalloc.h> |
103 | | #include <xen/efi.h> |
104 | | #include <xen/grant_table.h> |
105 | | #include <xen/hypercall.h> |
106 | | #include <asm/paging.h> |
107 | | #include <asm/shadow.h> |
108 | | #include <asm/page.h> |
109 | | #include <asm/flushtlb.h> |
110 | | #include <asm/io.h> |
111 | | #include <asm/ldt.h> |
112 | | #include <asm/x86_emulate.h> |
113 | | #include <asm/e820.h> |
114 | | #include <asm/hypercall.h> |
115 | | #include <asm/shared.h> |
116 | | #include <asm/mem_sharing.h> |
117 | | #include <public/memory.h> |
118 | | #include <public/sched.h> |
119 | | #include <xsm/xsm.h> |
120 | | #include <xen/trace.h> |
121 | | #include <asm/setup.h> |
122 | | #include <asm/fixmap.h> |
123 | | #include <asm/io_apic.h> |
124 | | #include <asm/pci.h> |
125 | | |
126 | | #include <asm/hvm/grant_table.h> |
127 | | #include <asm/pv/grant_table.h> |
128 | | |
129 | | #include "pv/mm.h" |
130 | | |
131 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
132 | | #undef mfn_to_page |
133 | 14.5k | #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn)) |
134 | | #undef page_to_mfn |
135 | 24 | #define page_to_mfn(pg) _mfn(__page_to_mfn(pg)) |
136 | | |
137 | | /* Mapping of the fixmap space needed early. */ |
138 | | l1_pgentry_t __section(".bss.page_aligned") __aligned(PAGE_SIZE) |
139 | | l1_fixmap[L1_PAGETABLE_ENTRIES]; |
140 | | |
141 | | paddr_t __read_mostly mem_hotplug; |
142 | | |
143 | | /* Private domain structs for DOMID_XEN and DOMID_IO. */ |
144 | | struct domain *dom_xen, *dom_io, *dom_cow; |
145 | | |
146 | | /* Frame table size in pages. */ |
147 | | unsigned long max_page; |
148 | | unsigned long total_pages; |
149 | | |
150 | | bool __read_mostly machine_to_phys_mapping_valid; |
151 | | |
152 | | struct rangeset *__read_mostly mmio_ro_ranges; |
153 | | |
154 | | static uint32_t base_disallow_mask; |
155 | | /* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */ |
156 | 0 | #define L1_DISALLOW_MASK ((base_disallow_mask | _PAGE_GNTTAB) & ~_PAGE_GLOBAL) |
157 | | |
158 | 0 | #define L2_DISALLOW_MASK base_disallow_mask |
159 | | |
160 | 0 | #define l3_disallow_mask(d) (!is_pv_32bit_domain(d) ? \ |
161 | 0 | base_disallow_mask : 0xFFFFF198U) |
162 | | |
163 | 0 | #define L4_DISALLOW_MASK (base_disallow_mask) |
164 | | |
165 | | #define l1_disallow_mask(d) \ |
166 | 0 | ((d != dom_io) && \ |
167 | 0 | (rangeset_is_empty((d)->iomem_caps) && \ |
168 | 0 | rangeset_is_empty((d)->arch.ioport_caps) && \ |
169 | 0 | !has_arch_pdevs(d) && \ |
170 | 0 | is_pv_domain(d)) ? \ |
171 | 0 | L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS)) |
172 | | |
173 | | static s8 __read_mostly opt_mmio_relax; |
174 | | |
175 | | static int __init parse_mmio_relax(const char *s) |
176 | 0 | { |
177 | 0 | if ( !*s ) |
178 | 0 | opt_mmio_relax = 1; |
179 | 0 | else |
180 | 0 | opt_mmio_relax = parse_bool(s, NULL); |
181 | 0 | if ( opt_mmio_relax < 0 && strcmp(s, "all") ) |
182 | 0 | { |
183 | 0 | opt_mmio_relax = 0; |
184 | 0 | return -EINVAL; |
185 | 0 | } |
186 | 0 |
|
187 | 0 | return 0; |
188 | 0 | } |
189 | | custom_param("mmio-relax", parse_mmio_relax); |
190 | | |
191 | | static void __init init_frametable_chunk(void *start, void *end) |
192 | 2 | { |
193 | 2 | unsigned long s = (unsigned long)start; |
194 | 2 | unsigned long e = (unsigned long)end; |
195 | 2 | unsigned long step; |
196 | 2 | mfn_t mfn; |
197 | 2 | |
198 | 2 | ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1))); |
199 | 66 | for ( ; s < e; s += step << PAGE_SHIFT ) |
200 | 64 | { |
201 | 64 | step = 1UL << (cpu_has_page1gb && |
202 | 64 | !(s & ((1UL << L3_PAGETABLE_SHIFT) - 1)) ? |
203 | 1 | L3_PAGETABLE_SHIFT - PAGE_SHIFT : |
204 | 63 | L2_PAGETABLE_SHIFT - PAGE_SHIFT); |
205 | 64 | /* |
206 | 64 | * The hardcoded 4 below is arbitrary - just pick whatever you think |
207 | 64 | * is reasonable to waste as a trade-off for using a large page. |
208 | 64 | */ |
209 | 65 | while ( step && s + (step << PAGE_SHIFT) > e + (4 << PAGE_SHIFT) ) |
210 | 1 | step >>= PAGETABLE_ORDER; |
211 | 64 | mfn = alloc_boot_pages(step, step); |
212 | 64 | map_pages_to_xen(s, mfn_x(mfn), step, PAGE_HYPERVISOR); |
213 | 64 | } |
214 | 2 | |
215 | 2 | memset(start, 0, end - start); |
216 | 2 | memset(end, -1, s - e); |
217 | 2 | } |
218 | | |
219 | | void __init init_frametable(void) |
220 | 1 | { |
221 | 1 | unsigned int sidx, eidx, nidx; |
222 | 1 | unsigned int max_idx = (max_pdx + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT; |
223 | 1 | struct page_info *end_pg, *top_pg; |
224 | 1 | |
225 | 1 | BUILD_BUG_ON(XEN_VIRT_END > FRAMETABLE_VIRT_START); |
226 | 1 | BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1)); |
227 | 1 | |
228 | 1 | for ( sidx = 0; ; sidx = nidx ) |
229 | 2 | { |
230 | 2 | eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx); |
231 | 2 | nidx = find_next_bit(pdx_group_valid, max_idx, eidx); |
232 | 2 | if ( nidx >= max_idx ) |
233 | 1 | break; |
234 | 1 | init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), |
235 | 1 | pdx_to_page(eidx * PDX_GROUP_COUNT)); |
236 | 1 | } |
237 | 1 | |
238 | 1 | end_pg = pdx_to_page(max_pdx - 1) + 1; |
239 | 0 | top_pg = mem_hotplug ? pdx_to_page(max_idx * PDX_GROUP_COUNT - 1) + 1 |
240 | 1 | : end_pg; |
241 | 1 | init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), top_pg); |
242 | 1 | memset(end_pg, -1, (unsigned long)top_pg - (unsigned long)end_pg); |
243 | 1 | } |
244 | | |
245 | | #ifndef NDEBUG |
246 | | static unsigned int __read_mostly root_pgt_pv_xen_slots |
247 | | = ROOT_PAGETABLE_PV_XEN_SLOTS; |
248 | | static l4_pgentry_t __read_mostly split_l4e; |
249 | | #else |
250 | | #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS |
251 | | #endif |
252 | | |
253 | | void __init arch_init_memory(void) |
254 | 1 | { |
255 | 1 | unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn; |
256 | 1 | |
257 | 1 | /* |
258 | 1 | * Basic guest-accessible flags: |
259 | 1 | * PRESENT, R/W, USER, A/D, AVAIL[0,1,2], AVAIL_HIGH, NX (if available). |
260 | 1 | */ |
261 | 1 | base_disallow_mask = |
262 | 1 | ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | |
263 | 1 | _PAGE_DIRTY | _PAGE_AVAIL | _PAGE_AVAIL_HIGH | _PAGE_NX); |
264 | 1 | |
265 | 1 | /* |
266 | 1 | * Initialise our DOMID_XEN domain. |
267 | 1 | * Any Xen-heap pages that we will allow to be mapped will have |
268 | 1 | * their domain field set to dom_xen. |
269 | 1 | * Hidden PCI devices will also be associated with this domain |
270 | 1 | * (but be [partly] controlled by Dom0 nevertheless). |
271 | 1 | */ |
272 | 1 | dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0, NULL); |
273 | 1 | BUG_ON(IS_ERR(dom_xen)); |
274 | 1 | INIT_LIST_HEAD(&dom_xen->arch.pdev_list); |
275 | 1 | |
276 | 1 | /* |
277 | 1 | * Initialise our DOMID_IO domain. |
278 | 1 | * This domain owns I/O pages that are within the range of the page_info |
279 | 1 | * array. Mappings occur at the priv of the caller. |
280 | 1 | */ |
281 | 1 | dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0, NULL); |
282 | 1 | BUG_ON(IS_ERR(dom_io)); |
283 | 1 | |
284 | 1 | /* |
285 | 1 | * Initialise our COW domain. |
286 | 1 | * This domain owns sharable pages. |
287 | 1 | */ |
288 | 1 | dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0, NULL); |
289 | 1 | BUG_ON(IS_ERR(dom_cow)); |
290 | 1 | |
291 | 1 | /* First 1MB of RAM is historically marked as I/O. */ |
292 | 257 | for ( i = 0; i < 0x100; i++ ) |
293 | 256 | share_xen_page_with_guest(mfn_to_page(_mfn(i)), |
294 | 256 | dom_io, XENSHARE_writable); |
295 | 1 | |
296 | 1 | /* Any areas not specified as RAM by the e820 map are considered I/O. */ |
297 | 9 | for ( i = 0, pfn = 0; pfn < max_page; i++ ) |
298 | 8 | { |
299 | 19 | while ( (i < e820.nr_map) && |
300 | 19 | (e820.map[i].type != E820_RAM) && |
301 | 11 | (e820.map[i].type != E820_UNUSABLE) ) |
302 | 11 | i++; |
303 | 8 | |
304 | 8 | if ( i >= e820.nr_map ) |
305 | 0 | { |
306 | 0 | /* No more RAM regions: mark as I/O right to end of memory map. */ |
307 | 0 | rstart_pfn = rend_pfn = max_page; |
308 | 0 | } |
309 | 8 | else |
310 | 8 | { |
311 | 8 | /* Mark as I/O just up as far as next RAM region. */ |
312 | 8 | rstart_pfn = min_t(unsigned long, max_page, |
313 | 8 | PFN_UP(e820.map[i].addr)); |
314 | 8 | rend_pfn = max_t(unsigned long, rstart_pfn, |
315 | 8 | PFN_DOWN(e820.map[i].addr + e820.map[i].size)); |
316 | 8 | } |
317 | 8 | |
318 | 8 | /* |
319 | 8 | * Make sure any Xen mappings of RAM holes above 1MB are blown away. |
320 | 8 | * In particular this ensures that RAM holes are respected even in |
321 | 8 | * the statically-initialised 1-16MB mapping area. |
322 | 8 | */ |
323 | 8 | iostart_pfn = max_t(unsigned long, pfn, 1UL << (20 - PAGE_SHIFT)); |
324 | 8 | ioend_pfn = min(rstart_pfn, 16UL << (20 - PAGE_SHIFT)); |
325 | 8 | if ( iostart_pfn < ioend_pfn ) |
326 | 0 | destroy_xen_mappings((unsigned long)mfn_to_virt(iostart_pfn), |
327 | 0 | (unsigned long)mfn_to_virt(ioend_pfn)); |
328 | 8 | |
329 | 8 | /* Mark as I/O up to next RAM region. */ |
330 | 473k | for ( ; pfn < rstart_pfn; pfn++ ) |
331 | 473k | { |
332 | 473k | if ( !mfn_valid(_mfn(pfn)) ) |
333 | 458k | continue; |
334 | 14.2k | share_xen_page_with_guest( |
335 | 14.2k | mfn_to_page(_mfn(pfn)), dom_io, XENSHARE_writable); |
336 | 14.2k | } |
337 | 8 | |
338 | 8 | /* Skip the RAM region. */ |
339 | 8 | pfn = rend_pfn; |
340 | 8 | } |
341 | 1 | |
342 | 1 | subarch_init_memory(); |
343 | 1 | |
344 | 1 | efi_init_memory(); |
345 | 1 | |
346 | 1 | mem_sharing_init(); |
347 | 1 | |
348 | 1 | #ifndef NDEBUG |
349 | 1 | if ( highmem_start ) |
350 | 0 | { |
351 | 0 | unsigned long split_va = (unsigned long)__va(highmem_start); |
352 | 0 |
|
353 | 0 | if ( split_va < HYPERVISOR_VIRT_END && |
354 | 0 | split_va - 1 == (unsigned long)__va(highmem_start - 1) ) |
355 | 0 | { |
356 | 0 | root_pgt_pv_xen_slots = l4_table_offset(split_va) - |
357 | 0 | ROOT_PAGETABLE_FIRST_XEN_SLOT; |
358 | 0 | ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS); |
359 | 0 | if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) ) |
360 | 0 | { |
361 | 0 | l3_pgentry_t *l3tab = alloc_xen_pagetable(); |
362 | 0 |
|
363 | 0 | if ( l3tab ) |
364 | 0 | { |
365 | 0 | const l3_pgentry_t *l3idle = |
366 | 0 | l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]); |
367 | 0 |
|
368 | 0 | for ( i = 0; i < l3_table_offset(split_va); ++i ) |
369 | 0 | l3tab[i] = l3idle[i]; |
370 | 0 | for ( ; i < L3_PAGETABLE_ENTRIES; ++i ) |
371 | 0 | l3tab[i] = l3e_empty(); |
372 | 0 | split_l4e = l4e_from_pfn(virt_to_mfn(l3tab), |
373 | 0 | __PAGE_HYPERVISOR_RW); |
374 | 0 | } |
375 | 0 | else |
376 | 0 | ++root_pgt_pv_xen_slots; |
377 | 0 | } |
378 | 0 | } |
379 | 0 | } |
380 | 1 | #endif |
381 | 1 | } |
382 | | |
383 | | int page_is_ram_type(unsigned long mfn, unsigned long mem_type) |
384 | 85 | { |
385 | 85 | uint64_t maddr = pfn_to_paddr(mfn); |
386 | 85 | int i; |
387 | 85 | |
388 | 1.20k | for ( i = 0; i < e820.nr_map; i++ ) |
389 | 1.16k | { |
390 | 1.16k | switch ( e820.map[i].type ) |
391 | 1.16k | { |
392 | 500 | case E820_RAM: |
393 | 500 | if ( mem_type & RAM_TYPE_CONVENTIONAL ) |
394 | 8 | break; |
395 | 492 | continue; |
396 | 498 | case E820_RESERVED: |
397 | 498 | if ( mem_type & RAM_TYPE_RESERVED ) |
398 | 330 | break; |
399 | 168 | continue; |
400 | 0 | case E820_UNUSABLE: |
401 | 0 | if ( mem_type & RAM_TYPE_UNUSABLE ) |
402 | 0 | break; |
403 | 0 | continue; |
404 | 167 | case E820_ACPI: |
405 | 167 | case E820_NVS: |
406 | 167 | if ( mem_type & RAM_TYPE_ACPI ) |
407 | 40 | break; |
408 | 127 | continue; |
409 | 0 | default: |
410 | 0 | /* unknown */ |
411 | 0 | continue; |
412 | 1.16k | } |
413 | 1.16k | |
414 | 1.16k | /* Test the range. */ |
415 | 378 | if ( (e820.map[i].addr <= maddr) && |
416 | 217 | ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) ) |
417 | 42 | return 1; |
418 | 378 | } |
419 | 85 | |
420 | 43 | return 0; |
421 | 85 | } |
422 | | |
423 | | unsigned long domain_get_maximum_gpfn(struct domain *d) |
424 | 0 | { |
425 | 0 | if ( is_hvm_domain(d) ) |
426 | 0 | return p2m_get_hostp2m(d)->max_mapped_pfn; |
427 | 0 | /* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */ |
428 | 0 | return (arch_get_max_pfn(d) ?: 1) - 1; |
429 | 0 | } |
430 | | |
431 | | void share_xen_page_with_guest( |
432 | | struct page_info *page, struct domain *d, int readonly) |
433 | 28.0k | { |
434 | 28.0k | if ( page_get_owner(page) == d ) |
435 | 107 | return; |
436 | 28.0k | |
437 | 27.9k | set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY); |
438 | 27.9k | |
439 | 27.9k | spin_lock(&d->page_alloc_lock); |
440 | 27.9k | |
441 | 27.9k | /* The incremented type count pins as writable or read-only. */ |
442 | 27.9k | page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); |
443 | 27.9k | page->u.inuse.type_info |= PGT_validated | 1; |
444 | 27.9k | |
445 | 27.9k | page_set_owner(page, d); |
446 | 27.9k | smp_wmb(); /* install valid domain ptr before updating refcnt. */ |
447 | 27.9k | ASSERT((page->count_info & ~PGC_xen_heap) == 0); |
448 | 27.9k | |
449 | 27.9k | /* Only add to the allocation list if the domain isn't dying. */ |
450 | 27.9k | if ( !d->is_dying ) |
451 | 27.9k | { |
452 | 27.9k | page->count_info |= PGC_xen_heap | PGC_allocated | 1; |
453 | 27.9k | if ( unlikely(d->xenheap_pages++ == 0) ) |
454 | 3 | get_knownalive_domain(d); |
455 | 27.9k | page_list_add_tail(page, &d->xenpage_list); |
456 | 27.9k | } |
457 | 27.9k | |
458 | 27.9k | spin_unlock(&d->page_alloc_lock); |
459 | 27.9k | } |
460 | | |
461 | | int __init unshare_xen_page_with_guest(struct page_info *page, |
462 | | struct domain *d) |
463 | 149 | { |
464 | 149 | if ( page_get_owner(page) != d || !is_xen_heap_page(page) ) |
465 | 0 | return -EINVAL; |
466 | 149 | |
467 | 149 | if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
468 | 149 | put_page(page); |
469 | 149 | |
470 | 149 | /* Remove the owner and clear the flags. */ |
471 | 149 | page->u.inuse.type_info = 0; |
472 | 149 | page_set_owner(page, NULL); |
473 | 149 | |
474 | 149 | return 0; |
475 | 149 | } |
476 | | |
477 | | void share_xen_page_with_privileged_guests( |
478 | | struct page_info *page, int readonly) |
479 | 13.3k | { |
480 | 13.3k | share_xen_page_with_guest(page, dom_xen, readonly); |
481 | 13.3k | } |
482 | | |
483 | | void free_shared_domheap_page(struct page_info *page) |
484 | 0 | { |
485 | 0 | if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
486 | 0 | put_page(page); |
487 | 0 | if ( !test_and_clear_bit(_PGC_xen_heap, &page->count_info) ) |
488 | 0 | ASSERT_UNREACHABLE(); |
489 | 0 | page->u.inuse.type_info = 0; |
490 | 0 | page_set_owner(page, NULL); |
491 | 0 | free_domheap_page(page); |
492 | 0 | } |
493 | | |
494 | | void make_cr3(struct vcpu *v, mfn_t mfn) |
495 | 12 | { |
496 | 12 | v->arch.cr3 = mfn_x(mfn) << PAGE_SHIFT; |
497 | 12 | } |
498 | | |
499 | | void write_ptbase(struct vcpu *v) |
500 | 39.5k | { |
501 | 39.5k | write_cr3(v->arch.cr3); |
502 | 39.5k | } |
503 | | |
504 | | /* |
505 | | * Should be called after CR3 is updated. |
506 | | * |
507 | | * Uses values found in vcpu->arch.(guest_table and guest_table_user), and |
508 | | * for HVM guests, arch.monitor_table and hvm's guest CR3. |
509 | | * |
510 | | * Update ref counts to shadow tables appropriately. |
511 | | */ |
512 | | void update_cr3(struct vcpu *v) |
513 | 0 | { |
514 | 0 | mfn_t cr3_mfn; |
515 | 0 |
|
516 | 0 | if ( paging_mode_enabled(v->domain) ) |
517 | 0 | { |
518 | 0 | paging_update_cr3(v); |
519 | 0 | return; |
520 | 0 | } |
521 | 0 |
|
522 | 0 | if ( !(v->arch.flags & TF_kernel_mode) ) |
523 | 0 | cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user); |
524 | 0 | else |
525 | 0 | cr3_mfn = pagetable_get_mfn(v->arch.guest_table); |
526 | 0 |
|
527 | 0 | make_cr3(v, cr3_mfn); |
528 | 0 | } |
529 | | |
530 | | static inline void set_tlbflush_timestamp(struct page_info *page) |
531 | 2 | { |
532 | 2 | /* |
533 | 2 | * Record TLB information for flush later. We do not stamp page tables |
534 | 2 | * when running in shadow mode: |
535 | 2 | * 1. Pointless, since it's the shadow pt's which must be tracked. |
536 | 2 | * 2. Shadow mode reuses this field for shadowed page tables to store |
537 | 2 | * flags info -- we don't want to conflict with that. |
538 | 2 | */ |
539 | 2 | if ( !(page->count_info & PGC_page_table) || |
540 | 0 | !shadow_mode_enabled(page_get_owner(page)) ) |
541 | 2 | page_set_tlbflush_timestamp(page); |
542 | 2 | } |
543 | | |
544 | | const char __section(".bss.page_aligned.const") __aligned(PAGE_SIZE) |
545 | | zero_page[PAGE_SIZE]; |
546 | | |
547 | | static void invalidate_shadow_ldt(struct vcpu *v, int flush) |
548 | 0 | { |
549 | 0 | l1_pgentry_t *pl1e; |
550 | 0 | unsigned int i; |
551 | 0 | struct page_info *page; |
552 | 0 |
|
553 | 0 | BUG_ON(unlikely(in_irq())); |
554 | 0 |
|
555 | 0 | spin_lock(&v->arch.pv_vcpu.shadow_ldt_lock); |
556 | 0 |
|
557 | 0 | if ( v->arch.pv_vcpu.shadow_ldt_mapcnt == 0 ) |
558 | 0 | goto out; |
559 | 0 |
|
560 | 0 | v->arch.pv_vcpu.shadow_ldt_mapcnt = 0; |
561 | 0 | pl1e = pv_ldt_ptes(v); |
562 | 0 |
|
563 | 0 | for ( i = 0; i < 16; i++ ) |
564 | 0 | { |
565 | 0 | if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) ) |
566 | 0 | continue; |
567 | 0 | page = l1e_get_page(pl1e[i]); |
568 | 0 | l1e_write(&pl1e[i], l1e_empty()); |
569 | 0 | ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page); |
570 | 0 | ASSERT_PAGE_IS_DOMAIN(page, v->domain); |
571 | 0 | put_page_and_type(page); |
572 | 0 | } |
573 | 0 |
|
574 | 0 | /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ |
575 | 0 | if ( flush ) |
576 | 0 | flush_tlb_mask(v->vcpu_dirty_cpumask); |
577 | 0 |
|
578 | 0 | out: |
579 | 0 | spin_unlock(&v->arch.pv_vcpu.shadow_ldt_lock); |
580 | 0 | } |
581 | | |
582 | | |
583 | | static int alloc_segdesc_page(struct page_info *page) |
584 | 0 | { |
585 | 0 | const struct domain *owner = page_get_owner(page); |
586 | 0 | struct desc_struct *descs = __map_domain_page(page); |
587 | 0 | unsigned i; |
588 | 0 |
|
589 | 0 | for ( i = 0; i < 512; i++ ) |
590 | 0 | if ( unlikely(!check_descriptor(owner, &descs[i])) ) |
591 | 0 | break; |
592 | 0 |
|
593 | 0 | unmap_domain_page(descs); |
594 | 0 |
|
595 | 0 | return i == 512 ? 0 : -EINVAL; |
596 | 0 | } |
597 | | |
598 | | static int get_page_and_type_from_mfn( |
599 | | mfn_t mfn, unsigned long type, struct domain *d, |
600 | | int partial, int preemptible) |
601 | 0 | { |
602 | 0 | struct page_info *page = mfn_to_page(mfn); |
603 | 0 | int rc; |
604 | 0 |
|
605 | 0 | if ( likely(partial >= 0) && |
606 | 0 | unlikely(!get_page_from_mfn(mfn, d)) ) |
607 | 0 | return -EINVAL; |
608 | 0 |
|
609 | 0 | rc = (preemptible ? |
610 | 0 | get_page_type_preemptible(page, type) : |
611 | 0 | (get_page_type(page, type) ? 0 : -EINVAL)); |
612 | 0 |
|
613 | 0 | if ( unlikely(rc) && partial >= 0 && |
614 | 0 | (!preemptible || page != current->arch.old_guest_table) ) |
615 | 0 | put_page(page); |
616 | 0 |
|
617 | 0 | return rc; |
618 | 0 | } |
619 | | |
620 | | static void put_data_page( |
621 | | struct page_info *page, int writeable) |
622 | 0 | { |
623 | 0 | if ( writeable ) |
624 | 0 | put_page_and_type(page); |
625 | 0 | else |
626 | 0 | put_page(page); |
627 | 0 | } |
628 | | |
629 | | static bool inc_linear_entries(struct page_info *pg) |
630 | 0 | { |
631 | 0 | typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; |
632 | 0 |
|
633 | 0 | do { |
634 | 0 | /* |
635 | 0 | * The check below checks for the "linear use" count being non-zero |
636 | 0 | * as well as overflow. Signed integer overflow is undefined behavior |
637 | 0 | * according to the C spec. However, as long as linear_pt_count is |
638 | 0 | * smaller in size than 'int', the arithmetic operation of the |
639 | 0 | * increment below won't overflow; rather the result will be truncated |
640 | 0 | * when stored. Ensure that this is always true. |
641 | 0 | */ |
642 | 0 | BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); |
643 | 0 | oc = nc++; |
644 | 0 | if ( nc <= 0 ) |
645 | 0 | return false; |
646 | 0 | nc = cmpxchg(&pg->linear_pt_count, oc, nc); |
647 | 0 | } while ( oc != nc ); |
648 | 0 |
|
649 | 0 | return true; |
650 | 0 | } |
651 | | |
652 | | static void dec_linear_entries(struct page_info *pg) |
653 | 0 | { |
654 | 0 | typeof(pg->linear_pt_count) oc; |
655 | 0 |
|
656 | 0 | oc = arch_fetch_and_add(&pg->linear_pt_count, -1); |
657 | 0 | ASSERT(oc > 0); |
658 | 0 | } |
659 | | |
660 | | static bool inc_linear_uses(struct page_info *pg) |
661 | 0 | { |
662 | 0 | typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; |
663 | 0 |
|
664 | 0 | do { |
665 | 0 | /* See the respective comment in inc_linear_entries(). */ |
666 | 0 | BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); |
667 | 0 | oc = nc--; |
668 | 0 | if ( nc >= 0 ) |
669 | 0 | return false; |
670 | 0 | nc = cmpxchg(&pg->linear_pt_count, oc, nc); |
671 | 0 | } while ( oc != nc ); |
672 | 0 |
|
673 | 0 | return true; |
674 | 0 | } |
675 | | |
676 | | static void dec_linear_uses(struct page_info *pg) |
677 | 0 | { |
678 | 0 | typeof(pg->linear_pt_count) oc; |
679 | 0 |
|
680 | 0 | oc = arch_fetch_and_add(&pg->linear_pt_count, 1); |
681 | 0 | ASSERT(oc < 0); |
682 | 0 | } |
683 | | |
684 | | /* |
685 | | * We allow root tables to map each other (a.k.a. linear page tables). It |
686 | | * needs some special care with reference counts and access permissions: |
687 | | * 1. The mapping entry must be read-only, or the guest may get write access |
688 | | * to its own PTEs. |
689 | | * 2. We must only bump the reference counts for an *already validated* |
690 | | * L2 table, or we can end up in a deadlock in get_page_type() by waiting |
691 | | * on a validation that is required to complete that validation. |
692 | | * 3. We only need to increment the reference counts for the mapped page |
693 | | * frame if it is mapped by a different root table. This is sufficient and |
694 | | * also necessary to allow validation of a root table mapping itself. |
695 | | */ |
696 | | #define define_get_linear_pagetable(level) \ |
697 | | static int \ |
698 | | get_##level##_linear_pagetable( \ |
699 | 0 | level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d) \ |
700 | 0 | { \ |
701 | 0 | unsigned long x, y; \ |
702 | 0 | struct page_info *page; \ |
703 | 0 | unsigned long pfn; \ |
704 | 0 | \ |
705 | 0 | if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ |
706 | 0 | { \ |
707 | 0 | gdprintk(XENLOG_WARNING, \ |
708 | 0 | "Attempt to create linear p.t. with write perms\n"); \ |
709 | 0 | return 0; \ |
710 | 0 | } \ |
711 | 0 | \ |
712 | 0 | if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ |
713 | 0 | { \ |
714 | 0 | struct page_info *ptpg = mfn_to_page(_mfn(pde_pfn)); \ |
715 | 0 | \ |
716 | 0 | /* Make sure the page table belongs to the correct domain. */ \ |
717 | 0 | if ( unlikely(page_get_owner(ptpg) != d) ) \ |
718 | 0 | return 0; \ |
719 | 0 | \ |
720 | 0 | /* Make sure the mapped frame belongs to the correct domain. */ \ |
721 | 0 | if ( unlikely(!get_page_from_mfn(_mfn(pfn), d)) ) \ |
722 | 0 | return 0; \ |
723 | 0 | \ |
724 | 0 | /* \ |
725 | 0 | * Ensure that the mapped frame is an already-validated page table \ |
726 | 0 | * and is not itself having linear entries, as well as that the \ |
727 | 0 | * containing page table is not iself in use as a linear page table \ |
728 | 0 | * elsewhere. \ |
729 | 0 | * If so, atomically increment the count (checking for overflow). \ |
730 | 0 | */ \ |
731 | 0 | page = mfn_to_page(_mfn(pfn)); \ |
732 | 0 | if ( !inc_linear_entries(ptpg) ) \ |
733 | 0 | { \ |
734 | 0 | put_page(page); \ |
735 | 0 | return 0; \ |
736 | 0 | } \ |
737 | 0 | if ( !inc_linear_uses(page) ) \ |
738 | 0 | { \ |
739 | 0 | dec_linear_entries(ptpg); \ |
740 | 0 | put_page(page); \ |
741 | 0 | return 0; \ |
742 | 0 | } \ |
743 | 0 | y = page->u.inuse.type_info; \ |
744 | 0 | do { \ |
745 | 0 | x = y; \ |
746 | 0 | if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || \ |
747 | 0 | unlikely((x & (PGT_type_mask|PGT_validated)) != \ |
748 | 0 | (PGT_##level##_page_table|PGT_validated)) ) \ |
749 | 0 | { \ |
750 | 0 | dec_linear_uses(page); \ |
751 | 0 | dec_linear_entries(ptpg); \ |
752 | 0 | put_page(page); \ |
753 | 0 | return 0; \ |
754 | 0 | } \ |
755 | 0 | } \ |
756 | 0 | while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); \ |
757 | 0 | } \ |
758 | 0 | \ |
759 | 0 | return 1; \ |
760 | 0 | } Unexecuted instantiation: mm.c:get_l2_linear_pagetable Unexecuted instantiation: mm.c:get_l3_linear_pagetable Unexecuted instantiation: mm.c:get_l4_linear_pagetable |
761 | | |
762 | | |
763 | | bool is_iomem_page(mfn_t mfn) |
764 | 0 | { |
765 | 0 | struct page_info *page; |
766 | 0 |
|
767 | 0 | if ( !mfn_valid(mfn) ) |
768 | 0 | return true; |
769 | 0 |
|
770 | 0 | /* Caller must know that it is an iomem page, or a reference is held. */ |
771 | 0 | page = mfn_to_page(mfn); |
772 | 0 | ASSERT((page->count_info & PGC_count_mask) != 0); |
773 | 0 |
|
774 | 0 | return (page_get_owner(page) == dom_io); |
775 | 0 | } |
776 | | |
777 | | static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr) |
778 | 0 | { |
779 | 0 | int err = 0; |
780 | 0 | bool alias = mfn >= PFN_DOWN(xen_phys_start) && |
781 | 0 | mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START); |
782 | 0 | unsigned long xen_va = |
783 | 0 | XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT); |
784 | 0 |
|
785 | 0 | if ( unlikely(alias) && cacheattr ) |
786 | 0 | err = map_pages_to_xen(xen_va, mfn, 1, 0); |
787 | 0 | if ( !err ) |
788 | 0 | err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, 1, |
789 | 0 | PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr)); |
790 | 0 | if ( unlikely(alias) && !cacheattr && !err ) |
791 | 0 | err = map_pages_to_xen(xen_va, mfn, 1, PAGE_HYPERVISOR); |
792 | 0 | return err; |
793 | 0 | } |
794 | | |
795 | | #ifndef NDEBUG |
796 | | struct mmio_emul_range_ctxt { |
797 | | const struct domain *d; |
798 | | unsigned long mfn; |
799 | | }; |
800 | | |
801 | | static int print_mmio_emul_range(unsigned long s, unsigned long e, void *arg) |
802 | 0 | { |
803 | 0 | const struct mmio_emul_range_ctxt *ctxt = arg; |
804 | 0 |
|
805 | 0 | if ( ctxt->mfn > e ) |
806 | 0 | return 0; |
807 | 0 |
|
808 | 0 | if ( ctxt->mfn >= s ) |
809 | 0 | { |
810 | 0 | static DEFINE_SPINLOCK(last_lock); |
811 | 0 | static const struct domain *last_d; |
812 | 0 | static unsigned long last_s = ~0UL, last_e; |
813 | 0 | bool print = false; |
814 | 0 |
|
815 | 0 | spin_lock(&last_lock); |
816 | 0 | if ( last_d != ctxt->d || last_s != s || last_e != e ) |
817 | 0 | { |
818 | 0 | last_d = ctxt->d; |
819 | 0 | last_s = s; |
820 | 0 | last_e = e; |
821 | 0 | print = true; |
822 | 0 | } |
823 | 0 | spin_unlock(&last_lock); |
824 | 0 |
|
825 | 0 | if ( print ) |
826 | 0 | printk(XENLOG_G_INFO |
827 | 0 | "d%d: Forcing write emulation on MFNs %lx-%lx\n", |
828 | 0 | ctxt->d->domain_id, s, e); |
829 | 0 | } |
830 | 0 |
|
831 | 0 | return 1; |
832 | 0 | } |
833 | | #endif |
834 | | |
835 | | /* |
836 | | * get_page_from_l1e returns: |
837 | | * 0 => success (page not present also counts as such) |
838 | | * <0 => error code |
839 | | * >0 => the page flags to be flipped |
840 | | */ |
841 | | int |
842 | | get_page_from_l1e( |
843 | | l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner) |
844 | 0 | { |
845 | 0 | unsigned long mfn = l1e_get_pfn(l1e); |
846 | 0 | struct page_info *page = mfn_to_page(_mfn(mfn)); |
847 | 0 | uint32_t l1f = l1e_get_flags(l1e); |
848 | 0 | struct vcpu *curr = current; |
849 | 0 | struct domain *real_pg_owner; |
850 | 0 | bool write; |
851 | 0 |
|
852 | 0 | if ( !(l1f & _PAGE_PRESENT) ) |
853 | 0 | return 0; |
854 | 0 |
|
855 | 0 | if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) ) |
856 | 0 | { |
857 | 0 | gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n", |
858 | 0 | l1f & l1_disallow_mask(l1e_owner)); |
859 | 0 | return -EINVAL; |
860 | 0 | } |
861 | 0 |
|
862 | 0 | if ( !mfn_valid(_mfn(mfn)) || |
863 | 0 | (real_pg_owner = page_get_owner_and_reference(page)) == dom_io ) |
864 | 0 | { |
865 | 0 | int flip = 0; |
866 | 0 |
|
867 | 0 | /* Only needed the reference to confirm dom_io ownership. */ |
868 | 0 | if ( mfn_valid(_mfn(mfn)) ) |
869 | 0 | put_page(page); |
870 | 0 |
|
871 | 0 | /* DOMID_IO reverts to caller for privilege checks. */ |
872 | 0 | if ( pg_owner == dom_io ) |
873 | 0 | pg_owner = curr->domain; |
874 | 0 |
|
875 | 0 | if ( !iomem_access_permitted(pg_owner, mfn, mfn) ) |
876 | 0 | { |
877 | 0 | if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */ |
878 | 0 | { |
879 | 0 | gdprintk(XENLOG_WARNING, |
880 | 0 | "d%d non-privileged attempt to map MMIO space %"PRI_mfn"\n", |
881 | 0 | pg_owner->domain_id, mfn); |
882 | 0 | return -EPERM; |
883 | 0 | } |
884 | 0 | return -EINVAL; |
885 | 0 | } |
886 | 0 |
|
887 | 0 | if ( pg_owner != l1e_owner && |
888 | 0 | !iomem_access_permitted(l1e_owner, mfn, mfn) ) |
889 | 0 | { |
890 | 0 | if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */ |
891 | 0 | { |
892 | 0 | gdprintk(XENLOG_WARNING, |
893 | 0 | "d%d attempted to map MMIO space %"PRI_mfn" in d%d to d%d\n", |
894 | 0 | curr->domain->domain_id, mfn, pg_owner->domain_id, |
895 | 0 | l1e_owner->domain_id); |
896 | 0 | return -EPERM; |
897 | 0 | } |
898 | 0 | return -EINVAL; |
899 | 0 | } |
900 | 0 |
|
901 | 0 | if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) |
902 | 0 | { |
903 | 0 | /* MMIO pages must not be mapped cachable unless requested so. */ |
904 | 0 | switch ( opt_mmio_relax ) |
905 | 0 | { |
906 | 0 | case 0: |
907 | 0 | break; |
908 | 0 | case 1: |
909 | 0 | if ( !is_hardware_domain(l1e_owner) ) |
910 | 0 | break; |
911 | 0 | /* fallthrough */ |
912 | 0 | case -1: |
913 | 0 | return 0; |
914 | 0 | default: |
915 | 0 | ASSERT_UNREACHABLE(); |
916 | 0 | } |
917 | 0 | } |
918 | 0 | else if ( l1f & _PAGE_RW ) |
919 | 0 | { |
920 | 0 | #ifndef NDEBUG |
921 | 0 | const unsigned long *ro_map; |
922 | 0 | unsigned int seg, bdf; |
923 | 0 |
|
924 | 0 | if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || |
925 | 0 | ((ro_map = pci_get_ro_map(seg)) != NULL && |
926 | 0 | test_bit(bdf, ro_map)) ) |
927 | 0 | printk(XENLOG_G_WARNING |
928 | 0 | "d%d: Forcing read-only access to MFN %lx\n", |
929 | 0 | l1e_owner->domain_id, mfn); |
930 | 0 | else |
931 | 0 | rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, |
932 | 0 | print_mmio_emul_range, |
933 | 0 | &(struct mmio_emul_range_ctxt){ |
934 | 0 | .d = l1e_owner, |
935 | 0 | .mfn = mfn }); |
936 | 0 | #endif |
937 | 0 | flip = _PAGE_RW; |
938 | 0 | } |
939 | 0 |
|
940 | 0 | switch ( l1f & PAGE_CACHE_ATTRS ) |
941 | 0 | { |
942 | 0 | case 0: /* WB */ |
943 | 0 | flip |= _PAGE_PWT | _PAGE_PCD; |
944 | 0 | break; |
945 | 0 | case _PAGE_PWT: /* WT */ |
946 | 0 | case _PAGE_PWT | _PAGE_PAT: /* WP */ |
947 | 0 | flip |= _PAGE_PCD | (l1f & _PAGE_PAT); |
948 | 0 | break; |
949 | 0 | } |
950 | 0 |
|
951 | 0 | return flip; |
952 | 0 | } |
953 | 0 |
|
954 | 0 | if ( unlikely( (real_pg_owner != pg_owner) && |
955 | 0 | (real_pg_owner != dom_cow) ) ) |
956 | 0 | { |
957 | 0 | /* |
958 | 0 | * Let privileged domains transfer the right to map their target |
959 | 0 | * domain's pages. This is used to allow stub-domain pvfb export to |
960 | 0 | * dom0, until pvfb supports granted mappings. At that time this |
961 | 0 | * minor hack can go away. |
962 | 0 | */ |
963 | 0 | if ( (real_pg_owner == NULL) || (pg_owner == l1e_owner) || |
964 | 0 | xsm_priv_mapping(XSM_TARGET, pg_owner, real_pg_owner) ) |
965 | 0 | { |
966 | 0 | gdprintk(XENLOG_WARNING, |
967 | 0 | "pg_owner d%d l1e_owner d%d, but real_pg_owner d%d\n", |
968 | 0 | pg_owner->domain_id, l1e_owner->domain_id, |
969 | 0 | real_pg_owner ? real_pg_owner->domain_id : -1); |
970 | 0 | goto could_not_pin; |
971 | 0 | } |
972 | 0 | pg_owner = real_pg_owner; |
973 | 0 | } |
974 | 0 |
|
975 | 0 | /* |
976 | 0 | * Extra paranoid check for shared memory. Writable mappings |
977 | 0 | * disallowed (unshare first!) |
978 | 0 | */ |
979 | 0 | if ( (l1f & _PAGE_RW) && (real_pg_owner == dom_cow) ) |
980 | 0 | goto could_not_pin; |
981 | 0 |
|
982 | 0 | /* |
983 | 0 | * Foreign mappings into guests in shadow external mode don't |
984 | 0 | * contribute to writeable mapping refcounts. (This allows the |
985 | 0 | * qemu-dm helper process in dom0 to map the domain's memory without |
986 | 0 | * messing up the count of "real" writable mappings.) |
987 | 0 | */ |
988 | 0 | write = (l1f & _PAGE_RW) && |
989 | 0 | ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)); |
990 | 0 | if ( write && !get_page_type(page, PGT_writable_page) ) |
991 | 0 | { |
992 | 0 | gdprintk(XENLOG_WARNING, "Could not get page type PGT_writable_page\n"); |
993 | 0 | goto could_not_pin; |
994 | 0 | } |
995 | 0 |
|
996 | 0 | if ( pte_flags_to_cacheattr(l1f) != |
997 | 0 | ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) ) |
998 | 0 | { |
999 | 0 | unsigned long x, nx, y = page->count_info; |
1000 | 0 | unsigned long cacheattr = pte_flags_to_cacheattr(l1f); |
1001 | 0 | int err; |
1002 | 0 |
|
1003 | 0 | if ( is_xen_heap_page(page) ) |
1004 | 0 | { |
1005 | 0 | if ( write ) |
1006 | 0 | put_page_type(page); |
1007 | 0 | put_page(page); |
1008 | 0 | gdprintk(XENLOG_WARNING, |
1009 | 0 | "Attempt to change cache attributes of Xen heap page\n"); |
1010 | 0 | return -EACCES; |
1011 | 0 | } |
1012 | 0 |
|
1013 | 0 | do { |
1014 | 0 | x = y; |
1015 | 0 | nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base); |
1016 | 0 | } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); |
1017 | 0 |
|
1018 | 0 | err = update_xen_mappings(mfn, cacheattr); |
1019 | 0 | if ( unlikely(err) ) |
1020 | 0 | { |
1021 | 0 | cacheattr = y & PGC_cacheattr_mask; |
1022 | 0 | do { |
1023 | 0 | x = y; |
1024 | 0 | nx = (x & ~PGC_cacheattr_mask) | cacheattr; |
1025 | 0 | } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); |
1026 | 0 |
|
1027 | 0 | if ( write ) |
1028 | 0 | put_page_type(page); |
1029 | 0 | put_page(page); |
1030 | 0 |
|
1031 | 0 | gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn |
1032 | 0 | " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n", |
1033 | 0 | mfn, get_gpfn_from_mfn(mfn), |
1034 | 0 | l1e_get_intpte(l1e), l1e_owner->domain_id); |
1035 | 0 | return err; |
1036 | 0 | } |
1037 | 0 | } |
1038 | 0 |
|
1039 | 0 | return 0; |
1040 | 0 |
|
1041 | 0 | could_not_pin: |
1042 | 0 | gdprintk(XENLOG_WARNING, "Error getting mfn %" PRI_mfn " (pfn %" PRI_pfn |
1043 | 0 | ") from L1 entry %" PRIpte " for l1e_owner d%d, pg_owner d%d\n", |
1044 | 0 | mfn, get_gpfn_from_mfn(mfn), |
1045 | 0 | l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id); |
1046 | 0 | if ( real_pg_owner != NULL ) |
1047 | 0 | put_page(page); |
1048 | 0 | return -EBUSY; |
1049 | 0 | } |
1050 | | |
1051 | | |
1052 | | /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */ |
1053 | | /* |
1054 | | * get_page_from_l2e returns: |
1055 | | * 1 => page not present |
1056 | | * 0 => success |
1057 | | * <0 => error code |
1058 | | */ |
1059 | | define_get_linear_pagetable(l2); |
1060 | | static int |
1061 | | get_page_from_l2e( |
1062 | | l2_pgentry_t l2e, unsigned long pfn, struct domain *d) |
1063 | 0 | { |
1064 | 0 | unsigned long mfn = l2e_get_pfn(l2e); |
1065 | 0 | int rc; |
1066 | 0 |
|
1067 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) |
1068 | 0 | return 1; |
1069 | 0 |
|
1070 | 0 | if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) |
1071 | 0 | { |
1072 | 0 | gdprintk(XENLOG_WARNING, "Bad L2 flags %x\n", |
1073 | 0 | l2e_get_flags(l2e) & L2_DISALLOW_MASK); |
1074 | 0 | return -EINVAL; |
1075 | 0 | } |
1076 | 0 |
|
1077 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PSE) ) |
1078 | 0 | { |
1079 | 0 | rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, 0, 0); |
1080 | 0 | if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) |
1081 | 0 | rc = 0; |
1082 | 0 | return rc; |
1083 | 0 | } |
1084 | 0 |
|
1085 | 0 | return -EINVAL; |
1086 | 0 | } |
1087 | | |
1088 | | |
1089 | | /* |
1090 | | * get_page_from_l3e returns: |
1091 | | * 1 => page not present |
1092 | | * 0 => success |
1093 | | * <0 => error code |
1094 | | */ |
1095 | | define_get_linear_pagetable(l3); |
1096 | | static int |
1097 | | get_page_from_l3e( |
1098 | | l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial) |
1099 | 0 | { |
1100 | 0 | int rc; |
1101 | 0 |
|
1102 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
1103 | 0 | return 1; |
1104 | 0 |
|
1105 | 0 | if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) ) |
1106 | 0 | { |
1107 | 0 | gdprintk(XENLOG_WARNING, "Bad L3 flags %x\n", |
1108 | 0 | l3e_get_flags(l3e) & l3_disallow_mask(d)); |
1109 | 0 | return -EINVAL; |
1110 | 0 | } |
1111 | 0 |
|
1112 | 0 | rc = get_page_and_type_from_mfn( |
1113 | 0 | l3e_get_mfn(l3e), PGT_l2_page_table, d, partial, 1); |
1114 | 0 | if ( unlikely(rc == -EINVAL) && |
1115 | 0 | !is_pv_32bit_domain(d) && |
1116 | 0 | get_l3_linear_pagetable(l3e, pfn, d) ) |
1117 | 0 | rc = 0; |
1118 | 0 |
|
1119 | 0 | return rc; |
1120 | 0 | } |
1121 | | |
1122 | | /* |
1123 | | * get_page_from_l4e returns: |
1124 | | * 1 => page not present |
1125 | | * 0 => success |
1126 | | * <0 => error code |
1127 | | */ |
1128 | | define_get_linear_pagetable(l4); |
1129 | | static int |
1130 | | get_page_from_l4e( |
1131 | | l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial) |
1132 | 0 | { |
1133 | 0 | int rc; |
1134 | 0 |
|
1135 | 0 | if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) |
1136 | 0 | return 1; |
1137 | 0 |
|
1138 | 0 | if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) |
1139 | 0 | { |
1140 | 0 | gdprintk(XENLOG_WARNING, "Bad L4 flags %x\n", |
1141 | 0 | l4e_get_flags(l4e) & L4_DISALLOW_MASK); |
1142 | 0 | return -EINVAL; |
1143 | 0 | } |
1144 | 0 |
|
1145 | 0 | rc = get_page_and_type_from_mfn( |
1146 | 0 | l4e_get_mfn(l4e), PGT_l3_page_table, d, partial, 1); |
1147 | 0 | if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) |
1148 | 0 | rc = 0; |
1149 | 0 |
|
1150 | 0 | return rc; |
1151 | 0 | } |
1152 | | |
1153 | | static int _put_page_type(struct page_info *page, bool preemptible, |
1154 | | struct page_info *ptpg); |
1155 | | |
1156 | | void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) |
1157 | 0 | { |
1158 | 0 | unsigned long pfn = l1e_get_pfn(l1e); |
1159 | 0 | struct page_info *page; |
1160 | 0 | struct domain *pg_owner; |
1161 | 0 | struct vcpu *v; |
1162 | 0 |
|
1163 | 0 | if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || is_iomem_page(_mfn(pfn)) ) |
1164 | 0 | return; |
1165 | 0 |
|
1166 | 0 | page = mfn_to_page(_mfn(pfn)); |
1167 | 0 | pg_owner = page_get_owner(page); |
1168 | 0 |
|
1169 | 0 | /* |
1170 | 0 | * Check if this is a mapping that was established via a grant reference. |
1171 | 0 | * If it was then we should not be here: we require that such mappings are |
1172 | 0 | * explicitly destroyed via the grant-table interface. |
1173 | 0 | * |
1174 | 0 | * The upshot of this is that the guest can end up with active grants that |
1175 | 0 | * it cannot destroy (because it no longer has a PTE to present to the |
1176 | 0 | * grant-table interface). This can lead to subtle hard-to-catch bugs, |
1177 | 0 | * hence a special grant PTE flag can be enabled to catch the bug early. |
1178 | 0 | * |
1179 | 0 | * (Note that the undestroyable active grants are not a security hole in |
1180 | 0 | * Xen. All active grants can safely be cleaned up when the domain dies.) |
1181 | 0 | */ |
1182 | 0 | if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && |
1183 | 0 | !l1e_owner->is_shutting_down && !l1e_owner->is_dying ) |
1184 | 0 | { |
1185 | 0 | gdprintk(XENLOG_WARNING, |
1186 | 0 | "Attempt to implicitly unmap a granted PTE %" PRIpte "\n", |
1187 | 0 | l1e_get_intpte(l1e)); |
1188 | 0 | domain_crash(l1e_owner); |
1189 | 0 | } |
1190 | 0 |
|
1191 | 0 | /* |
1192 | 0 | * Remember we didn't take a type-count of foreign writable mappings |
1193 | 0 | * to paging-external domains. |
1194 | 0 | */ |
1195 | 0 | if ( (l1e_get_flags(l1e) & _PAGE_RW) && |
1196 | 0 | ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) ) |
1197 | 0 | { |
1198 | 0 | put_page_and_type(page); |
1199 | 0 | } |
1200 | 0 | else |
1201 | 0 | { |
1202 | 0 | /* We expect this is rare so we blow the entire shadow LDT. */ |
1203 | 0 | if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == |
1204 | 0 | PGT_seg_desc_page)) && |
1205 | 0 | unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) && |
1206 | 0 | (l1e_owner == pg_owner) ) |
1207 | 0 | { |
1208 | 0 | for_each_vcpu ( pg_owner, v ) |
1209 | 0 | invalidate_shadow_ldt(v, 1); |
1210 | 0 | } |
1211 | 0 | put_page(page); |
1212 | 0 | } |
1213 | 0 | } |
1214 | | |
1215 | | |
1216 | | /* |
1217 | | * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. |
1218 | | * Note also that this automatically deals correctly with linear p.t.'s. |
1219 | | */ |
1220 | | static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) |
1221 | 0 | { |
1222 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) ) |
1223 | 0 | return 1; |
1224 | 0 |
|
1225 | 0 | if ( l2e_get_flags(l2e) & _PAGE_PSE ) |
1226 | 0 | { |
1227 | 0 | struct page_info *page = l2e_get_page(l2e); |
1228 | 0 | unsigned int i; |
1229 | 0 |
|
1230 | 0 | for ( i = 0; i < (1u << PAGETABLE_ORDER); i++, page++ ) |
1231 | 0 | put_page_and_type(page); |
1232 | 0 | } |
1233 | 0 | else |
1234 | 0 | { |
1235 | 0 | struct page_info *pg = l2e_get_page(l2e); |
1236 | 0 | int rc = _put_page_type(pg, false, mfn_to_page(_mfn(pfn))); |
1237 | 0 |
|
1238 | 0 | ASSERT(!rc); |
1239 | 0 | put_page(pg); |
1240 | 0 | } |
1241 | 0 |
|
1242 | 0 | return 0; |
1243 | 0 | } |
1244 | | |
1245 | | static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, |
1246 | | int partial, bool defer) |
1247 | 0 | { |
1248 | 0 | struct page_info *pg; |
1249 | 0 | int rc; |
1250 | 0 |
|
1251 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) |
1252 | 0 | return 1; |
1253 | 0 |
|
1254 | 0 | if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) ) |
1255 | 0 | { |
1256 | 0 | unsigned long mfn = l3e_get_pfn(l3e); |
1257 | 0 | int writeable = l3e_get_flags(l3e) & _PAGE_RW; |
1258 | 0 |
|
1259 | 0 | ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); |
1260 | 0 | do { |
1261 | 0 | put_data_page(mfn_to_page(_mfn(mfn)), writeable); |
1262 | 0 | } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) ); |
1263 | 0 |
|
1264 | 0 | return 0; |
1265 | 0 | } |
1266 | 0 |
|
1267 | 0 | pg = l3e_get_page(l3e); |
1268 | 0 |
|
1269 | 0 | if ( unlikely(partial > 0) ) |
1270 | 0 | { |
1271 | 0 | ASSERT(!defer); |
1272 | 0 | return _put_page_type(pg, true, mfn_to_page(_mfn(pfn))); |
1273 | 0 | } |
1274 | 0 |
|
1275 | 0 | if ( defer ) |
1276 | 0 | { |
1277 | 0 | current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); |
1278 | 0 | current->arch.old_guest_table = pg; |
1279 | 0 | return 0; |
1280 | 0 | } |
1281 | 0 |
|
1282 | 0 | rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn))); |
1283 | 0 | if ( likely(!rc) ) |
1284 | 0 | put_page(pg); |
1285 | 0 |
|
1286 | 0 | return rc; |
1287 | 0 | } |
1288 | | |
1289 | | static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, |
1290 | | int partial, bool defer) |
1291 | 0 | { |
1292 | 0 | int rc = 1; |
1293 | 0 |
|
1294 | 0 | if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && |
1295 | 0 | (l4e_get_pfn(l4e) != pfn) ) |
1296 | 0 | { |
1297 | 0 | struct page_info *pg = l4e_get_page(l4e); |
1298 | 0 |
|
1299 | 0 | if ( unlikely(partial > 0) ) |
1300 | 0 | { |
1301 | 0 | ASSERT(!defer); |
1302 | 0 | return _put_page_type(pg, true, mfn_to_page(_mfn(pfn))); |
1303 | 0 | } |
1304 | 0 |
|
1305 | 0 | if ( defer ) |
1306 | 0 | { |
1307 | 0 | current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); |
1308 | 0 | current->arch.old_guest_table = pg; |
1309 | 0 | return 0; |
1310 | 0 | } |
1311 | 0 |
|
1312 | 0 | rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn))); |
1313 | 0 | if ( likely(!rc) ) |
1314 | 0 | put_page(pg); |
1315 | 0 | } |
1316 | 0 |
|
1317 | 0 | return rc; |
1318 | 0 | } |
1319 | | |
1320 | | static int alloc_l1_table(struct page_info *page) |
1321 | 0 | { |
1322 | 0 | struct domain *d = page_get_owner(page); |
1323 | 0 | l1_pgentry_t *pl1e; |
1324 | 0 | unsigned int i; |
1325 | 0 | int ret = 0; |
1326 | 0 |
|
1327 | 0 | pl1e = __map_domain_page(page); |
1328 | 0 |
|
1329 | 0 | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
1330 | 0 | { |
1331 | 0 | switch ( ret = get_page_from_l1e(pl1e[i], d, d) ) |
1332 | 0 | { |
1333 | 0 | default: |
1334 | 0 | goto fail; |
1335 | 0 | case 0: |
1336 | 0 | break; |
1337 | 0 | case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: |
1338 | 0 | ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); |
1339 | 0 | l1e_flip_flags(pl1e[i], ret); |
1340 | 0 | break; |
1341 | 0 | } |
1342 | 0 |
|
1343 | 0 | pl1e[i] = adjust_guest_l1e(pl1e[i], d); |
1344 | 0 | } |
1345 | 0 |
|
1346 | 0 | unmap_domain_page(pl1e); |
1347 | 0 | return 0; |
1348 | 0 |
|
1349 | 0 | fail: |
1350 | 0 | gdprintk(XENLOG_WARNING, "Failure in alloc_l1_table: slot %#x\n", i); |
1351 | 0 | while ( i-- > 0 ) |
1352 | 0 | put_page_from_l1e(pl1e[i], d); |
1353 | 0 |
|
1354 | 0 | unmap_domain_page(pl1e); |
1355 | 0 | return ret; |
1356 | 0 | } |
1357 | | |
1358 | | static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e) |
1359 | 0 | { |
1360 | 0 | struct page_info *page; |
1361 | 0 | l3_pgentry_t l3e3; |
1362 | 0 |
|
1363 | 0 | if ( !is_pv_32bit_domain(d) ) |
1364 | 0 | return 1; |
1365 | 0 |
|
1366 | 0 | pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK); |
1367 | 0 |
|
1368 | 0 | /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */ |
1369 | 0 | l3e3 = pl3e[3]; |
1370 | 0 | if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) ) |
1371 | 0 | { |
1372 | 0 | gdprintk(XENLOG_WARNING, "PAE L3 3rd slot is empty\n"); |
1373 | 0 | return 0; |
1374 | 0 | } |
1375 | 0 |
|
1376 | 0 | /* |
1377 | 0 | * The Xen-private mappings include linear mappings. The L2 thus cannot |
1378 | 0 | * be shared by multiple L3 tables. The test here is adequate because: |
1379 | 0 | * 1. Cannot appear in slots != 3 because get_page_type() checks the |
1380 | 0 | * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3 |
1381 | 0 | * 2. Cannot appear in another page table's L3: |
1382 | 0 | * a. alloc_l3_table() calls this function and this check will fail |
1383 | 0 | * b. mod_l3_entry() disallows updates to slot 3 in an existing table |
1384 | 0 | */ |
1385 | 0 | page = l3e_get_page(l3e3); |
1386 | 0 | BUG_ON(page->u.inuse.type_info & PGT_pinned); |
1387 | 0 | BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0); |
1388 | 0 | BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2)); |
1389 | 0 | if ( (page->u.inuse.type_info & PGT_count_mask) != 1 ) |
1390 | 0 | { |
1391 | 0 | gdprintk(XENLOG_WARNING, "PAE L3 3rd slot is shared\n"); |
1392 | 0 | return 0; |
1393 | 0 | } |
1394 | 0 |
|
1395 | 0 | return 1; |
1396 | 0 | } |
1397 | | |
1398 | | static int alloc_l2_table(struct page_info *page, unsigned long type, |
1399 | | int preemptible) |
1400 | 0 | { |
1401 | 0 | struct domain *d = page_get_owner(page); |
1402 | 0 | unsigned long pfn = mfn_x(page_to_mfn(page)); |
1403 | 0 | l2_pgentry_t *pl2e; |
1404 | 0 | unsigned int i; |
1405 | 0 | int rc = 0; |
1406 | 0 |
|
1407 | 0 | pl2e = map_domain_page(_mfn(pfn)); |
1408 | 0 |
|
1409 | 0 | for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) |
1410 | 0 | { |
1411 | 0 | if ( preemptible && i > page->nr_validated_ptes |
1412 | 0 | && hypercall_preempt_check() ) |
1413 | 0 | { |
1414 | 0 | page->nr_validated_ptes = i; |
1415 | 0 | rc = -ERESTART; |
1416 | 0 | break; |
1417 | 0 | } |
1418 | 0 |
|
1419 | 0 | if ( !is_guest_l2_slot(d, type, i) || |
1420 | 0 | (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 ) |
1421 | 0 | continue; |
1422 | 0 |
|
1423 | 0 | if ( rc < 0 ) |
1424 | 0 | { |
1425 | 0 | gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", i); |
1426 | 0 | while ( i-- > 0 ) |
1427 | 0 | if ( is_guest_l2_slot(d, type, i) ) |
1428 | 0 | put_page_from_l2e(pl2e[i], pfn); |
1429 | 0 | break; |
1430 | 0 | } |
1431 | 0 |
|
1432 | 0 | pl2e[i] = adjust_guest_l2e(pl2e[i], d); |
1433 | 0 | } |
1434 | 0 |
|
1435 | 0 | if ( rc >= 0 && (type & PGT_pae_xen_l2) ) |
1436 | 0 | init_xen_pae_l2_slots(pl2e, d); |
1437 | 0 |
|
1438 | 0 | unmap_domain_page(pl2e); |
1439 | 0 | return rc > 0 ? 0 : rc; |
1440 | 0 | } |
1441 | | |
1442 | | static int alloc_l3_table(struct page_info *page) |
1443 | 0 | { |
1444 | 0 | struct domain *d = page_get_owner(page); |
1445 | 0 | unsigned long pfn = mfn_x(page_to_mfn(page)); |
1446 | 0 | l3_pgentry_t *pl3e; |
1447 | 0 | unsigned int i; |
1448 | 0 | int rc = 0, partial = page->partial_pte; |
1449 | 0 |
|
1450 | 0 | pl3e = map_domain_page(_mfn(pfn)); |
1451 | 0 |
|
1452 | 0 | /* |
1453 | 0 | * PAE guests allocate full pages, but aren't required to initialize |
1454 | 0 | * more than the first four entries; when running in compatibility |
1455 | 0 | * mode, however, the full page is visible to the MMU, and hence all |
1456 | 0 | * 512 entries must be valid/verified, which is most easily achieved |
1457 | 0 | * by clearing them out. |
1458 | 0 | */ |
1459 | 0 | if ( is_pv_32bit_domain(d) ) |
1460 | 0 | memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); |
1461 | 0 |
|
1462 | 0 | for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; |
1463 | 0 | i++, partial = 0 ) |
1464 | 0 | { |
1465 | 0 | if ( is_pv_32bit_domain(d) && (i == 3) ) |
1466 | 0 | { |
1467 | 0 | if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || |
1468 | 0 | (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ) |
1469 | 0 | rc = -EINVAL; |
1470 | 0 | else |
1471 | 0 | rc = get_page_and_type_from_mfn( |
1472 | 0 | l3e_get_mfn(pl3e[i]), |
1473 | 0 | PGT_l2_page_table | PGT_pae_xen_l2, d, partial, 1); |
1474 | 0 | } |
1475 | 0 | else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 ) |
1476 | 0 | continue; |
1477 | 0 |
|
1478 | 0 | if ( rc == -ERESTART ) |
1479 | 0 | { |
1480 | 0 | page->nr_validated_ptes = i; |
1481 | 0 | page->partial_pte = partial ?: 1; |
1482 | 0 | } |
1483 | 0 | else if ( rc == -EINTR && i ) |
1484 | 0 | { |
1485 | 0 | page->nr_validated_ptes = i; |
1486 | 0 | page->partial_pte = 0; |
1487 | 0 | rc = -ERESTART; |
1488 | 0 | } |
1489 | 0 | if ( rc < 0 ) |
1490 | 0 | break; |
1491 | 0 |
|
1492 | 0 | pl3e[i] = adjust_guest_l3e(pl3e[i], d); |
1493 | 0 | } |
1494 | 0 |
|
1495 | 0 | if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) ) |
1496 | 0 | rc = -EINVAL; |
1497 | 0 | if ( rc < 0 && rc != -ERESTART && rc != -EINTR ) |
1498 | 0 | { |
1499 | 0 | gdprintk(XENLOG_WARNING, "Failure in alloc_l3_table: slot %#x\n", i); |
1500 | 0 | if ( i ) |
1501 | 0 | { |
1502 | 0 | page->nr_validated_ptes = i; |
1503 | 0 | page->partial_pte = 0; |
1504 | 0 | current->arch.old_guest_ptpg = NULL; |
1505 | 0 | current->arch.old_guest_table = page; |
1506 | 0 | } |
1507 | 0 | while ( i-- > 0 ) |
1508 | 0 | pl3e[i] = unadjust_guest_l3e(pl3e[i], d); |
1509 | 0 | } |
1510 | 0 |
|
1511 | 0 | unmap_domain_page(pl3e); |
1512 | 0 | return rc > 0 ? 0 : rc; |
1513 | 0 | } |
1514 | | |
1515 | | void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d) |
1516 | 0 | { |
1517 | 0 | memcpy(&l2t[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], |
1518 | 0 | &compat_idle_pg_table_l2[ |
1519 | 0 | l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], |
1520 | 0 | COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2t)); |
1521 | 0 | } |
1522 | | |
1523 | | /* |
1524 | | * Fill an L4 with Xen entries. |
1525 | | * |
1526 | | * This function must write all ROOT_PAGETABLE_PV_XEN_SLOTS, to clobber any |
1527 | | * values a guest may have left there from alloc_l4_table(). |
1528 | | * |
1529 | | * l4t and l4mfn are mandatory, but l4mfn doesn't need to be the mfn under |
1530 | | * *l4t. All other parameters are optional and will either fill or zero the |
1531 | | * appropriate slots. Pagetables not shared with guests will gain the |
1532 | | * extended directmap. |
1533 | | */ |
1534 | | void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, |
1535 | | const struct domain *d, mfn_t sl4mfn, bool ro_mpt) |
1536 | 12 | { |
1537 | 12 | /* |
1538 | 12 | * PV vcpus need a shortened directmap. HVM and Idle vcpus get the full |
1539 | 12 | * directmap. |
1540 | 12 | */ |
1541 | 12 | bool short_directmap = d && !paging_mode_external(d); |
1542 | 12 | |
1543 | 12 | /* Slot 256: RO M2P (if applicable). */ |
1544 | 12 | l4t[l4_table_offset(RO_MPT_VIRT_START)] = |
1545 | 0 | ro_mpt ? idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] |
1546 | 12 | : l4e_empty(); |
1547 | 12 | |
1548 | 12 | /* Slot 257: PCI MMCFG. */ |
1549 | 12 | l4t[l4_table_offset(PCI_MCFG_VIRT_START)] = |
1550 | 12 | idle_pg_table[l4_table_offset(PCI_MCFG_VIRT_START)]; |
1551 | 12 | |
1552 | 12 | /* Slot 258: Self linear mappings. */ |
1553 | 12 | ASSERT(!mfn_eq(l4mfn, INVALID_MFN)); |
1554 | 12 | l4t[l4_table_offset(LINEAR_PT_VIRT_START)] = |
1555 | 12 | l4e_from_mfn(l4mfn, __PAGE_HYPERVISOR_RW); |
1556 | 12 | |
1557 | 12 | /* Slot 259: Shadow linear mappings (if applicable) .*/ |
1558 | 12 | l4t[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = |
1559 | 12 | mfn_eq(sl4mfn, INVALID_MFN) ? l4e_empty() : |
1560 | 12 | l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); |
1561 | 12 | |
1562 | 12 | /* Slot 260: Per-domain mappings (if applicable). */ |
1563 | 12 | l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = |
1564 | 12 | d ? l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW) |
1565 | 0 | : l4e_empty(); |
1566 | 12 | |
1567 | 12 | /* Slot 261-: text/data/bss, RW M2P, vmap, frametable, directmap. */ |
1568 | 12 | #ifndef NDEBUG |
1569 | 12 | if ( short_directmap && |
1570 | 0 | unlikely(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS) ) |
1571 | 0 | { |
1572 | 0 | /* |
1573 | 0 | * If using highmem-start=, artificially shorten the directmap to |
1574 | 0 | * simulate very large machines. |
1575 | 0 | */ |
1576 | 0 | l4_pgentry_t *next; |
1577 | 0 |
|
1578 | 0 | memcpy(&l4t[l4_table_offset(XEN_VIRT_START)], |
1579 | 0 | &idle_pg_table[l4_table_offset(XEN_VIRT_START)], |
1580 | 0 | (ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots - |
1581 | 0 | l4_table_offset(XEN_VIRT_START)) * sizeof(*l4t)); |
1582 | 0 |
|
1583 | 0 | next = &l4t[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots]; |
1584 | 0 |
|
1585 | 0 | if ( l4e_get_intpte(split_l4e) ) |
1586 | 0 | *next++ = split_l4e; |
1587 | 0 |
|
1588 | 0 | memset(next, 0, |
1589 | 0 | _p(&l4t[ROOT_PAGETABLE_LAST_XEN_SLOT + 1]) - _p(next)); |
1590 | 0 | } |
1591 | 12 | else |
1592 | 12 | #endif |
1593 | 12 | { |
1594 | 12 | unsigned int slots = (short_directmap |
1595 | 0 | ? ROOT_PAGETABLE_PV_XEN_SLOTS |
1596 | 12 | : ROOT_PAGETABLE_XEN_SLOTS); |
1597 | 12 | |
1598 | 12 | memcpy(&l4t[l4_table_offset(XEN_VIRT_START)], |
1599 | 12 | &idle_pg_table[l4_table_offset(XEN_VIRT_START)], |
1600 | 12 | (ROOT_PAGETABLE_FIRST_XEN_SLOT + slots - |
1601 | 12 | l4_table_offset(XEN_VIRT_START)) * sizeof(*l4t)); |
1602 | 12 | } |
1603 | 12 | } |
1604 | | |
1605 | | bool fill_ro_mpt(mfn_t mfn) |
1606 | 0 | { |
1607 | 0 | l4_pgentry_t *l4tab = map_domain_page(mfn); |
1608 | 0 | bool ret = false; |
1609 | 0 |
|
1610 | 0 | if ( !l4e_get_intpte(l4tab[l4_table_offset(RO_MPT_VIRT_START)]) ) |
1611 | 0 | { |
1612 | 0 | l4tab[l4_table_offset(RO_MPT_VIRT_START)] = |
1613 | 0 | idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; |
1614 | 0 | ret = true; |
1615 | 0 | } |
1616 | 0 | unmap_domain_page(l4tab); |
1617 | 0 |
|
1618 | 0 | return ret; |
1619 | 0 | } |
1620 | | |
1621 | | void zap_ro_mpt(mfn_t mfn) |
1622 | 0 | { |
1623 | 0 | l4_pgentry_t *l4tab = map_domain_page(mfn); |
1624 | 0 |
|
1625 | 0 | l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty(); |
1626 | 0 | unmap_domain_page(l4tab); |
1627 | 0 | } |
1628 | | |
1629 | | static int alloc_l4_table(struct page_info *page) |
1630 | 0 | { |
1631 | 0 | struct domain *d = page_get_owner(page); |
1632 | 0 | unsigned long pfn = mfn_x(page_to_mfn(page)); |
1633 | 0 | l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn)); |
1634 | 0 | unsigned int i; |
1635 | 0 | int rc = 0, partial = page->partial_pte; |
1636 | 0 |
|
1637 | 0 | for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; |
1638 | 0 | i++, partial = 0 ) |
1639 | 0 | { |
1640 | 0 | if ( !is_guest_l4_slot(d, i) || |
1641 | 0 | (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 ) |
1642 | 0 | continue; |
1643 | 0 |
|
1644 | 0 | if ( rc == -ERESTART ) |
1645 | 0 | { |
1646 | 0 | page->nr_validated_ptes = i; |
1647 | 0 | page->partial_pte = partial ?: 1; |
1648 | 0 | } |
1649 | 0 | else if ( rc < 0 ) |
1650 | 0 | { |
1651 | 0 | if ( rc != -EINTR ) |
1652 | 0 | gdprintk(XENLOG_WARNING, |
1653 | 0 | "Failure in alloc_l4_table: slot %#x\n", i); |
1654 | 0 | if ( i ) |
1655 | 0 | { |
1656 | 0 | page->nr_validated_ptes = i; |
1657 | 0 | page->partial_pte = 0; |
1658 | 0 | if ( rc == -EINTR ) |
1659 | 0 | rc = -ERESTART; |
1660 | 0 | else |
1661 | 0 | { |
1662 | 0 | if ( current->arch.old_guest_table ) |
1663 | 0 | page->nr_validated_ptes++; |
1664 | 0 | current->arch.old_guest_ptpg = NULL; |
1665 | 0 | current->arch.old_guest_table = page; |
1666 | 0 | } |
1667 | 0 | } |
1668 | 0 | } |
1669 | 0 | if ( rc < 0 ) |
1670 | 0 | { |
1671 | 0 | unmap_domain_page(pl4e); |
1672 | 0 | return rc; |
1673 | 0 | } |
1674 | 0 |
|
1675 | 0 | pl4e[i] = adjust_guest_l4e(pl4e[i], d); |
1676 | 0 | } |
1677 | 0 |
|
1678 | 0 | if ( rc >= 0 ) |
1679 | 0 | { |
1680 | 0 | init_xen_l4_slots(pl4e, _mfn(pfn), |
1681 | 0 | d, INVALID_MFN, VM_ASSIST(d, m2p_strict)); |
1682 | 0 | atomic_inc(&d->arch.pv_domain.nr_l4_pages); |
1683 | 0 | rc = 0; |
1684 | 0 | } |
1685 | 0 | unmap_domain_page(pl4e); |
1686 | 0 |
|
1687 | 0 | return rc; |
1688 | 0 | } |
1689 | | |
1690 | | static void free_l1_table(struct page_info *page) |
1691 | 0 | { |
1692 | 0 | struct domain *d = page_get_owner(page); |
1693 | 0 | l1_pgentry_t *pl1e; |
1694 | 0 | unsigned int i; |
1695 | 0 |
|
1696 | 0 | pl1e = __map_domain_page(page); |
1697 | 0 |
|
1698 | 0 | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
1699 | 0 | put_page_from_l1e(pl1e[i], d); |
1700 | 0 |
|
1701 | 0 | unmap_domain_page(pl1e); |
1702 | 0 | } |
1703 | | |
1704 | | |
1705 | | static int free_l2_table(struct page_info *page, int preemptible) |
1706 | 0 | { |
1707 | 0 | struct domain *d = page_get_owner(page); |
1708 | 0 | unsigned long pfn = mfn_x(page_to_mfn(page)); |
1709 | 0 | l2_pgentry_t *pl2e; |
1710 | 0 | unsigned int i = page->nr_validated_ptes - 1; |
1711 | 0 | int err = 0; |
1712 | 0 |
|
1713 | 0 | pl2e = map_domain_page(_mfn(pfn)); |
1714 | 0 |
|
1715 | 0 | ASSERT(page->nr_validated_ptes); |
1716 | 0 | do { |
1717 | 0 | if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) && |
1718 | 0 | put_page_from_l2e(pl2e[i], pfn) == 0 && |
1719 | 0 | preemptible && i && hypercall_preempt_check() ) |
1720 | 0 | { |
1721 | 0 | page->nr_validated_ptes = i; |
1722 | 0 | err = -ERESTART; |
1723 | 0 | } |
1724 | 0 | } while ( !err && i-- ); |
1725 | 0 |
|
1726 | 0 | unmap_domain_page(pl2e); |
1727 | 0 |
|
1728 | 0 | if ( !err ) |
1729 | 0 | page->u.inuse.type_info &= ~PGT_pae_xen_l2; |
1730 | 0 |
|
1731 | 0 | return err; |
1732 | 0 | } |
1733 | | |
1734 | | static int free_l3_table(struct page_info *page) |
1735 | 0 | { |
1736 | 0 | struct domain *d = page_get_owner(page); |
1737 | 0 | unsigned long pfn = mfn_x(page_to_mfn(page)); |
1738 | 0 | l3_pgentry_t *pl3e; |
1739 | 0 | int rc = 0, partial = page->partial_pte; |
1740 | 0 | unsigned int i = page->nr_validated_ptes - !partial; |
1741 | 0 |
|
1742 | 0 | pl3e = map_domain_page(_mfn(pfn)); |
1743 | 0 |
|
1744 | 0 | do { |
1745 | 0 | rc = put_page_from_l3e(pl3e[i], pfn, partial, 0); |
1746 | 0 | if ( rc < 0 ) |
1747 | 0 | break; |
1748 | 0 | partial = 0; |
1749 | 0 | if ( rc > 0 ) |
1750 | 0 | continue; |
1751 | 0 | pl3e[i] = unadjust_guest_l3e(pl3e[i], d); |
1752 | 0 | } while ( i-- ); |
1753 | 0 |
|
1754 | 0 | unmap_domain_page(pl3e); |
1755 | 0 |
|
1756 | 0 | if ( rc == -ERESTART ) |
1757 | 0 | { |
1758 | 0 | page->nr_validated_ptes = i; |
1759 | 0 | page->partial_pte = partial ?: -1; |
1760 | 0 | } |
1761 | 0 | else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) |
1762 | 0 | { |
1763 | 0 | page->nr_validated_ptes = i + 1; |
1764 | 0 | page->partial_pte = 0; |
1765 | 0 | rc = -ERESTART; |
1766 | 0 | } |
1767 | 0 | return rc > 0 ? 0 : rc; |
1768 | 0 | } |
1769 | | |
1770 | | static int free_l4_table(struct page_info *page) |
1771 | 0 | { |
1772 | 0 | struct domain *d = page_get_owner(page); |
1773 | 0 | unsigned long pfn = mfn_x(page_to_mfn(page)); |
1774 | 0 | l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn)); |
1775 | 0 | int rc = 0, partial = page->partial_pte; |
1776 | 0 | unsigned int i = page->nr_validated_ptes - !partial; |
1777 | 0 |
|
1778 | 0 | do { |
1779 | 0 | if ( is_guest_l4_slot(d, i) ) |
1780 | 0 | rc = put_page_from_l4e(pl4e[i], pfn, partial, 0); |
1781 | 0 | if ( rc < 0 ) |
1782 | 0 | break; |
1783 | 0 | partial = 0; |
1784 | 0 | } while ( i-- ); |
1785 | 0 |
|
1786 | 0 | if ( rc == -ERESTART ) |
1787 | 0 | { |
1788 | 0 | page->nr_validated_ptes = i; |
1789 | 0 | page->partial_pte = partial ?: -1; |
1790 | 0 | } |
1791 | 0 | else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) |
1792 | 0 | { |
1793 | 0 | page->nr_validated_ptes = i + 1; |
1794 | 0 | page->partial_pte = 0; |
1795 | 0 | rc = -ERESTART; |
1796 | 0 | } |
1797 | 0 |
|
1798 | 0 | unmap_domain_page(pl4e); |
1799 | 0 |
|
1800 | 0 | if ( rc >= 0 ) |
1801 | 0 | { |
1802 | 0 | atomic_dec(&d->arch.pv_domain.nr_l4_pages); |
1803 | 0 | rc = 0; |
1804 | 0 | } |
1805 | 0 |
|
1806 | 0 | return rc; |
1807 | 0 | } |
1808 | | |
1809 | | int page_lock(struct page_info *page) |
1810 | 0 | { |
1811 | 0 | unsigned long x, nx; |
1812 | 0 |
|
1813 | 0 | do { |
1814 | 0 | while ( (x = page->u.inuse.type_info) & PGT_locked ) |
1815 | 0 | cpu_relax(); |
1816 | 0 | nx = x + (1 | PGT_locked); |
1817 | 0 | if ( !(x & PGT_validated) || |
1818 | 0 | !(x & PGT_count_mask) || |
1819 | 0 | !(nx & PGT_count_mask) ) |
1820 | 0 | return 0; |
1821 | 0 | } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); |
1822 | 0 |
|
1823 | 0 | return 1; |
1824 | 0 | } |
1825 | | |
1826 | | void page_unlock(struct page_info *page) |
1827 | 0 | { |
1828 | 0 | unsigned long x, nx, y = page->u.inuse.type_info; |
1829 | 0 |
|
1830 | 0 | do { |
1831 | 0 | x = y; |
1832 | 0 | ASSERT((x & PGT_count_mask) && (x & PGT_locked)); |
1833 | 0 |
|
1834 | 0 | nx = x - (1 | PGT_locked); |
1835 | 0 | /* We must not drop the last reference here. */ |
1836 | 0 | ASSERT(nx & PGT_count_mask); |
1837 | 0 | } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); |
1838 | 0 | } |
1839 | | |
1840 | | /* |
1841 | | * PTE flags that a guest may change without re-validating the PTE. |
1842 | | * All other bits affect translation, caching, or Xen's safety. |
1843 | | */ |
1844 | | #define FASTPATH_FLAG_WHITELIST \ |
1845 | | (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \ |
1846 | | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER) |
1847 | | |
1848 | | /* Update the L1 entry at pl1e to new value nl1e. */ |
1849 | | static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, |
1850 | | unsigned long gl1mfn, int preserve_ad, |
1851 | | struct vcpu *pt_vcpu, struct domain *pg_dom) |
1852 | 0 | { |
1853 | 0 | l1_pgentry_t ol1e; |
1854 | 0 | struct domain *pt_dom = pt_vcpu->domain; |
1855 | 0 | int rc = 0; |
1856 | 0 |
|
1857 | 0 | if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) |
1858 | 0 | return -EFAULT; |
1859 | 0 |
|
1860 | 0 | ASSERT(!paging_mode_refcounts(pt_dom)); |
1861 | 0 |
|
1862 | 0 | if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) |
1863 | 0 | { |
1864 | 0 | struct page_info *page = NULL; |
1865 | 0 |
|
1866 | 0 | if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) ) |
1867 | 0 | { |
1868 | 0 | gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n", |
1869 | 0 | l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)); |
1870 | 0 | return -EINVAL; |
1871 | 0 | } |
1872 | 0 |
|
1873 | 0 | /* Translate foreign guest address. */ |
1874 | 0 | if ( paging_mode_translate(pg_dom) ) |
1875 | 0 | { |
1876 | 0 | p2m_type_t p2mt; |
1877 | 0 | p2m_query_t q = l1e_get_flags(nl1e) & _PAGE_RW ? |
1878 | 0 | P2M_ALLOC | P2M_UNSHARE : P2M_ALLOC; |
1879 | 0 |
|
1880 | 0 | page = get_page_from_gfn(pg_dom, l1e_get_pfn(nl1e), &p2mt, q); |
1881 | 0 |
|
1882 | 0 | if ( p2m_is_paged(p2mt) ) |
1883 | 0 | { |
1884 | 0 | if ( page ) |
1885 | 0 | put_page(page); |
1886 | 0 | p2m_mem_paging_populate(pg_dom, l1e_get_pfn(nl1e)); |
1887 | 0 | return -ENOENT; |
1888 | 0 | } |
1889 | 0 |
|
1890 | 0 | if ( p2mt == p2m_ram_paging_in && !page ) |
1891 | 0 | return -ENOENT; |
1892 | 0 |
|
1893 | 0 | /* Did our attempt to unshare fail? */ |
1894 | 0 | if ( (q & P2M_UNSHARE) && p2m_is_shared(p2mt) ) |
1895 | 0 | { |
1896 | 0 | /* We could not have obtained a page ref. */ |
1897 | 0 | ASSERT(!page); |
1898 | 0 | /* And mem_sharing_notify has already been called. */ |
1899 | 0 | return -ENOMEM; |
1900 | 0 | } |
1901 | 0 |
|
1902 | 0 | if ( !page ) |
1903 | 0 | return -EINVAL; |
1904 | 0 | nl1e = l1e_from_page(page, l1e_get_flags(nl1e)); |
1905 | 0 | } |
1906 | 0 |
|
1907 | 0 | /* Fast path for sufficiently-similar mappings. */ |
1908 | 0 | if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) ) |
1909 | 0 | { |
1910 | 0 | nl1e = adjust_guest_l1e(nl1e, pt_dom); |
1911 | 0 | rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, |
1912 | 0 | preserve_ad); |
1913 | 0 | if ( page ) |
1914 | 0 | put_page(page); |
1915 | 0 | return rc ? 0 : -EBUSY; |
1916 | 0 | } |
1917 | 0 |
|
1918 | 0 | switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) ) |
1919 | 0 | { |
1920 | 0 | default: |
1921 | 0 | if ( page ) |
1922 | 0 | put_page(page); |
1923 | 0 | return rc; |
1924 | 0 | case 0: |
1925 | 0 | break; |
1926 | 0 | case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: |
1927 | 0 | ASSERT(!(rc & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); |
1928 | 0 | l1e_flip_flags(nl1e, rc); |
1929 | 0 | rc = 0; |
1930 | 0 | break; |
1931 | 0 | } |
1932 | 0 | if ( page ) |
1933 | 0 | put_page(page); |
1934 | 0 |
|
1935 | 0 | nl1e = adjust_guest_l1e(nl1e, pt_dom); |
1936 | 0 | if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, |
1937 | 0 | preserve_ad)) ) |
1938 | 0 | { |
1939 | 0 | ol1e = nl1e; |
1940 | 0 | rc = -EBUSY; |
1941 | 0 | } |
1942 | 0 | } |
1943 | 0 | else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, |
1944 | 0 | preserve_ad)) ) |
1945 | 0 | { |
1946 | 0 | return -EBUSY; |
1947 | 0 | } |
1948 | 0 |
|
1949 | 0 | put_page_from_l1e(ol1e, pt_dom); |
1950 | 0 | return rc; |
1951 | 0 | } |
1952 | | |
1953 | | |
1954 | | /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ |
1955 | | static int mod_l2_entry(l2_pgentry_t *pl2e, |
1956 | | l2_pgentry_t nl2e, |
1957 | | unsigned long pfn, |
1958 | | int preserve_ad, |
1959 | | struct vcpu *vcpu) |
1960 | 0 | { |
1961 | 0 | l2_pgentry_t ol2e; |
1962 | 0 | struct domain *d = vcpu->domain; |
1963 | 0 | struct page_info *l2pg = mfn_to_page(_mfn(pfn)); |
1964 | 0 | unsigned long type = l2pg->u.inuse.type_info; |
1965 | 0 | int rc = 0; |
1966 | 0 |
|
1967 | 0 | if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) |
1968 | 0 | { |
1969 | 0 | gdprintk(XENLOG_WARNING, "L2 update in Xen-private area, slot %#lx\n", |
1970 | 0 | pgentry_ptr_to_slot(pl2e)); |
1971 | 0 | return -EPERM; |
1972 | 0 | } |
1973 | 0 |
|
1974 | 0 | if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) |
1975 | 0 | return -EFAULT; |
1976 | 0 |
|
1977 | 0 | if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) |
1978 | 0 | { |
1979 | 0 | if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) |
1980 | 0 | { |
1981 | 0 | gdprintk(XENLOG_WARNING, "Bad L2 flags %x\n", |
1982 | 0 | l2e_get_flags(nl2e) & L2_DISALLOW_MASK); |
1983 | 0 | return -EINVAL; |
1984 | 0 | } |
1985 | 0 |
|
1986 | 0 | /* Fast path for sufficiently-similar mappings. */ |
1987 | 0 | if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) ) |
1988 | 0 | { |
1989 | 0 | nl2e = adjust_guest_l2e(nl2e, d); |
1990 | 0 | if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) ) |
1991 | 0 | return 0; |
1992 | 0 | return -EBUSY; |
1993 | 0 | } |
1994 | 0 |
|
1995 | 0 | if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) ) |
1996 | 0 | return rc; |
1997 | 0 |
|
1998 | 0 | nl2e = adjust_guest_l2e(nl2e, d); |
1999 | 0 | if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, |
2000 | 0 | preserve_ad)) ) |
2001 | 0 | { |
2002 | 0 | ol2e = nl2e; |
2003 | 0 | rc = -EBUSY; |
2004 | 0 | } |
2005 | 0 | } |
2006 | 0 | else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, |
2007 | 0 | preserve_ad)) ) |
2008 | 0 | { |
2009 | 0 | return -EBUSY; |
2010 | 0 | } |
2011 | 0 |
|
2012 | 0 | put_page_from_l2e(ol2e, pfn); |
2013 | 0 | return rc; |
2014 | 0 | } |
2015 | | |
2016 | | /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ |
2017 | | static int mod_l3_entry(l3_pgentry_t *pl3e, |
2018 | | l3_pgentry_t nl3e, |
2019 | | unsigned long pfn, |
2020 | | int preserve_ad, |
2021 | | struct vcpu *vcpu) |
2022 | 0 | { |
2023 | 0 | l3_pgentry_t ol3e; |
2024 | 0 | struct domain *d = vcpu->domain; |
2025 | 0 | int rc = 0; |
2026 | 0 |
|
2027 | 0 | /* |
2028 | 0 | * Disallow updates to final L3 slot. It contains Xen mappings, and it |
2029 | 0 | * would be a pain to ensure they remain continuously valid throughout. |
2030 | 0 | */ |
2031 | 0 | if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) |
2032 | 0 | return -EINVAL; |
2033 | 0 |
|
2034 | 0 | if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) |
2035 | 0 | return -EFAULT; |
2036 | 0 |
|
2037 | 0 | if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) |
2038 | 0 | { |
2039 | 0 | if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) |
2040 | 0 | { |
2041 | 0 | gdprintk(XENLOG_WARNING, "Bad L3 flags %x\n", |
2042 | 0 | l3e_get_flags(nl3e) & l3_disallow_mask(d)); |
2043 | 0 | return -EINVAL; |
2044 | 0 | } |
2045 | 0 |
|
2046 | 0 | /* Fast path for sufficiently-similar mappings. */ |
2047 | 0 | if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) ) |
2048 | 0 | { |
2049 | 0 | nl3e = adjust_guest_l3e(nl3e, d); |
2050 | 0 | rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad); |
2051 | 0 | return rc ? 0 : -EFAULT; |
2052 | 0 | } |
2053 | 0 |
|
2054 | 0 | rc = get_page_from_l3e(nl3e, pfn, d, 0); |
2055 | 0 | if ( unlikely(rc < 0) ) |
2056 | 0 | return rc; |
2057 | 0 | rc = 0; |
2058 | 0 |
|
2059 | 0 | nl3e = adjust_guest_l3e(nl3e, d); |
2060 | 0 | if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, |
2061 | 0 | preserve_ad)) ) |
2062 | 0 | { |
2063 | 0 | ol3e = nl3e; |
2064 | 0 | rc = -EFAULT; |
2065 | 0 | } |
2066 | 0 | } |
2067 | 0 | else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, |
2068 | 0 | preserve_ad)) ) |
2069 | 0 | { |
2070 | 0 | return -EFAULT; |
2071 | 0 | } |
2072 | 0 |
|
2073 | 0 | if ( likely(rc == 0) ) |
2074 | 0 | if ( !create_pae_xen_mappings(d, pl3e) ) |
2075 | 0 | BUG(); |
2076 | 0 |
|
2077 | 0 | put_page_from_l3e(ol3e, pfn, 0, 1); |
2078 | 0 | return rc; |
2079 | 0 | } |
2080 | | |
2081 | | /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ |
2082 | | static int mod_l4_entry(l4_pgentry_t *pl4e, |
2083 | | l4_pgentry_t nl4e, |
2084 | | unsigned long pfn, |
2085 | | int preserve_ad, |
2086 | | struct vcpu *vcpu) |
2087 | 0 | { |
2088 | 0 | struct domain *d = vcpu->domain; |
2089 | 0 | l4_pgentry_t ol4e; |
2090 | 0 | int rc = 0; |
2091 | 0 |
|
2092 | 0 | if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) |
2093 | 0 | { |
2094 | 0 | gdprintk(XENLOG_WARNING, "L4 update in Xen-private area, slot %#lx\n", |
2095 | 0 | pgentry_ptr_to_slot(pl4e)); |
2096 | 0 | return -EINVAL; |
2097 | 0 | } |
2098 | 0 |
|
2099 | 0 | if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) |
2100 | 0 | return -EFAULT; |
2101 | 0 |
|
2102 | 0 | if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) |
2103 | 0 | { |
2104 | 0 | if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) |
2105 | 0 | { |
2106 | 0 | gdprintk(XENLOG_WARNING, "Bad L4 flags %x\n", |
2107 | 0 | l4e_get_flags(nl4e) & L4_DISALLOW_MASK); |
2108 | 0 | return -EINVAL; |
2109 | 0 | } |
2110 | 0 |
|
2111 | 0 | /* Fast path for sufficiently-similar mappings. */ |
2112 | 0 | if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) ) |
2113 | 0 | { |
2114 | 0 | nl4e = adjust_guest_l4e(nl4e, d); |
2115 | 0 | rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad); |
2116 | 0 | return rc ? 0 : -EFAULT; |
2117 | 0 | } |
2118 | 0 |
|
2119 | 0 | rc = get_page_from_l4e(nl4e, pfn, d, 0); |
2120 | 0 | if ( unlikely(rc < 0) ) |
2121 | 0 | return rc; |
2122 | 0 | rc = 0; |
2123 | 0 |
|
2124 | 0 | nl4e = adjust_guest_l4e(nl4e, d); |
2125 | 0 | if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, |
2126 | 0 | preserve_ad)) ) |
2127 | 0 | { |
2128 | 0 | ol4e = nl4e; |
2129 | 0 | rc = -EFAULT; |
2130 | 0 | } |
2131 | 0 | } |
2132 | 0 | else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, |
2133 | 0 | preserve_ad)) ) |
2134 | 0 | { |
2135 | 0 | return -EFAULT; |
2136 | 0 | } |
2137 | 0 |
|
2138 | 0 | put_page_from_l4e(ol4e, pfn, 0, 1); |
2139 | 0 | return rc; |
2140 | 0 | } |
2141 | | |
2142 | | static int cleanup_page_cacheattr(struct page_info *page) |
2143 | 150 | { |
2144 | 150 | unsigned int cacheattr = |
2145 | 150 | (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base; |
2146 | 150 | |
2147 | 150 | if ( likely(cacheattr == 0) ) |
2148 | 150 | return 0; |
2149 | 150 | |
2150 | 0 | page->count_info &= ~PGC_cacheattr_mask; |
2151 | 0 |
|
2152 | 0 | BUG_ON(is_xen_heap_page(page)); |
2153 | 0 |
|
2154 | 0 | return update_xen_mappings(mfn_x(page_to_mfn(page)), 0); |
2155 | 150 | } |
2156 | | |
2157 | | void put_page(struct page_info *page) |
2158 | 1.96M | { |
2159 | 1.96M | unsigned long nx, x, y = page->count_info; |
2160 | 1.96M | |
2161 | 1.97M | do { |
2162 | 1.97M | ASSERT((y & PGC_count_mask) != 0); |
2163 | 1.97M | x = y; |
2164 | 1.97M | nx = x - 1; |
2165 | 1.97M | } |
2166 | 1.97M | while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) ); |
2167 | 1.96M | |
2168 | 1.96M | if ( unlikely((nx & PGC_count_mask) == 0) ) |
2169 | 150 | { |
2170 | 150 | if ( cleanup_page_cacheattr(page) == 0 ) |
2171 | 150 | free_domheap_page(page); |
2172 | 150 | else |
2173 | 0 | gdprintk(XENLOG_WARNING, |
2174 | 0 | "Leaking mfn %" PRI_mfn "\n", mfn_x(page_to_mfn(page))); |
2175 | 150 | } |
2176 | 1.96M | } |
2177 | | |
2178 | | |
2179 | | struct domain *page_get_owner_and_reference(struct page_info *page) |
2180 | 1.96M | { |
2181 | 1.96M | unsigned long x, y = page->count_info; |
2182 | 1.96M | struct domain *owner; |
2183 | 1.96M | |
2184 | 1.97M | do { |
2185 | 1.97M | x = y; |
2186 | 1.97M | /* |
2187 | 1.97M | * Count == 0: Page is not allocated, so we cannot take a reference. |
2188 | 1.97M | * Count == -1: Reference count would wrap, which is invalid. |
2189 | 1.97M | * Count == -2: Remaining unused ref is reserved for get_page_light(). |
2190 | 1.97M | */ |
2191 | 1.97M | if ( unlikely(((x + 2) & PGC_count_mask) <= 2) ) |
2192 | 0 | return NULL; |
2193 | 1.97M | } |
2194 | 1.97M | while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x ); |
2195 | 1.96M | |
2196 | 1.96M | owner = page_get_owner(page); |
2197 | 1.96M | ASSERT(owner); |
2198 | 1.96M | |
2199 | 1.96M | return owner; |
2200 | 1.96M | } |
2201 | | |
2202 | | |
2203 | | int get_page(struct page_info *page, struct domain *domain) |
2204 | 1.96M | { |
2205 | 1.96M | struct domain *owner = page_get_owner_and_reference(page); |
2206 | 1.96M | |
2207 | 1.96M | if ( likely(owner == domain) ) |
2208 | 1.98M | return 1; |
2209 | 1.96M | |
2210 | 18.4E | if ( !paging_mode_refcounts(domain) && !domain->is_dying ) |
2211 | 0 | gprintk(XENLOG_INFO, |
2212 | 18.4E | "Error mfn %"PRI_mfn": rd=%d od=%d caf=%08lx taf=%" PRtype_info "\n", |
2213 | 0 | mfn_x(page_to_mfn(page)), domain->domain_id, |
2214 | 0 | owner ? owner->domain_id : DOMID_INVALID, |
2215 | 0 | page->count_info - !!owner, page->u.inuse.type_info); |
2216 | 18.4E | |
2217 | 18.4E | if ( owner ) |
2218 | 0 | put_page(page); |
2219 | 18.4E | |
2220 | 18.4E | return 0; |
2221 | 1.96M | } |
2222 | | |
2223 | | /* |
2224 | | * Special version of get_page() to be used exclusively when |
2225 | | * - a page is known to already have a non-zero reference count |
2226 | | * - the page does not need its owner to be checked |
2227 | | * - it will not be called more than once without dropping the thus |
2228 | | * acquired reference again. |
2229 | | * Due to get_page() reserving one reference, this call cannot fail. |
2230 | | */ |
2231 | | static void get_page_light(struct page_info *page) |
2232 | 0 | { |
2233 | 0 | unsigned long x, nx, y = page->count_info; |
2234 | 0 |
|
2235 | 0 | do { |
2236 | 0 | x = y; |
2237 | 0 | nx = x + 1; |
2238 | 0 | BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */ |
2239 | 0 | BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */ |
2240 | 0 | y = cmpxchg(&page->count_info, x, nx); |
2241 | 0 | } |
2242 | 0 | while ( unlikely(y != x) ); |
2243 | 0 | } |
2244 | | |
2245 | | static int alloc_page_type(struct page_info *page, unsigned long type, |
2246 | | int preemptible) |
2247 | 0 | { |
2248 | 0 | struct domain *owner = page_get_owner(page); |
2249 | 0 | int rc; |
2250 | 0 |
|
2251 | 0 | /* A page table is dirtied when its type count becomes non-zero. */ |
2252 | 0 | if ( likely(owner != NULL) ) |
2253 | 0 | paging_mark_dirty(owner, page_to_mfn(page)); |
2254 | 0 |
|
2255 | 0 | switch ( type & PGT_type_mask ) |
2256 | 0 | { |
2257 | 0 | case PGT_l1_page_table: |
2258 | 0 | rc = alloc_l1_table(page); |
2259 | 0 | break; |
2260 | 0 | case PGT_l2_page_table: |
2261 | 0 | rc = alloc_l2_table(page, type, preemptible); |
2262 | 0 | break; |
2263 | 0 | case PGT_l3_page_table: |
2264 | 0 | ASSERT(preemptible); |
2265 | 0 | rc = alloc_l3_table(page); |
2266 | 0 | break; |
2267 | 0 | case PGT_l4_page_table: |
2268 | 0 | ASSERT(preemptible); |
2269 | 0 | rc = alloc_l4_table(page); |
2270 | 0 | break; |
2271 | 0 | case PGT_seg_desc_page: |
2272 | 0 | rc = alloc_segdesc_page(page); |
2273 | 0 | break; |
2274 | 0 | default: |
2275 | 0 | printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n", |
2276 | 0 | type, page->u.inuse.type_info, |
2277 | 0 | page->count_info); |
2278 | 0 | rc = -EINVAL; |
2279 | 0 | BUG(); |
2280 | 0 | } |
2281 | 0 |
|
2282 | 0 | /* No need for atomic update of type_info here: noone else updates it. */ |
2283 | 0 | smp_wmb(); |
2284 | 0 | switch ( rc ) |
2285 | 0 | { |
2286 | 0 | case 0: |
2287 | 0 | page->u.inuse.type_info |= PGT_validated; |
2288 | 0 | break; |
2289 | 0 | case -EINTR: |
2290 | 0 | ASSERT((page->u.inuse.type_info & |
2291 | 0 | (PGT_count_mask|PGT_validated|PGT_partial)) == 1); |
2292 | 0 | page->u.inuse.type_info &= ~PGT_count_mask; |
2293 | 0 | break; |
2294 | 0 | default: |
2295 | 0 | ASSERT(rc < 0); |
2296 | 0 | gdprintk(XENLOG_WARNING, "Error while validating mfn %" PRI_mfn |
2297 | 0 | " (pfn %" PRI_pfn ") for type %" PRtype_info |
2298 | 0 | ": caf=%08lx taf=%" PRtype_info "\n", |
2299 | 0 | mfn_x(page_to_mfn(page)), |
2300 | 0 | get_gpfn_from_mfn(mfn_x(page_to_mfn(page))), |
2301 | 0 | type, page->count_info, page->u.inuse.type_info); |
2302 | 0 | if ( page != current->arch.old_guest_table ) |
2303 | 0 | page->u.inuse.type_info = 0; |
2304 | 0 | else |
2305 | 0 | { |
2306 | 0 | ASSERT((page->u.inuse.type_info & |
2307 | 0 | (PGT_count_mask | PGT_validated)) == 1); |
2308 | 0 | case -ERESTART: |
2309 | 0 | get_page_light(page); |
2310 | 0 | page->u.inuse.type_info |= PGT_partial; |
2311 | 0 | } |
2312 | 0 | break; |
2313 | 0 | } |
2314 | 0 |
|
2315 | 0 | return rc; |
2316 | 0 | } |
2317 | | |
2318 | | |
2319 | | int free_page_type(struct page_info *page, unsigned long type, |
2320 | | int preemptible) |
2321 | 0 | { |
2322 | 0 | struct domain *owner = page_get_owner(page); |
2323 | 0 | unsigned long gmfn; |
2324 | 0 | int rc; |
2325 | 0 |
|
2326 | 0 | if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) ) |
2327 | 0 | { |
2328 | 0 | /* A page table is dirtied when its type count becomes zero. */ |
2329 | 0 | paging_mark_dirty(owner, page_to_mfn(page)); |
2330 | 0 |
|
2331 | 0 | ASSERT(!shadow_mode_refcounts(owner)); |
2332 | 0 |
|
2333 | 0 | gmfn = mfn_to_gmfn(owner, mfn_x(page_to_mfn(page))); |
2334 | 0 | ASSERT(VALID_M2P(gmfn)); |
2335 | 0 | /* Page sharing not supported for shadowed domains */ |
2336 | 0 | if(!SHARED_M2P(gmfn)) |
2337 | 0 | shadow_remove_all_shadows(owner, _mfn(gmfn)); |
2338 | 0 | } |
2339 | 0 |
|
2340 | 0 | if ( !(type & PGT_partial) ) |
2341 | 0 | { |
2342 | 0 | page->nr_validated_ptes = 1U << PAGETABLE_ORDER; |
2343 | 0 | page->partial_pte = 0; |
2344 | 0 | } |
2345 | 0 |
|
2346 | 0 | switch ( type & PGT_type_mask ) |
2347 | 0 | { |
2348 | 0 | case PGT_l1_page_table: |
2349 | 0 | free_l1_table(page); |
2350 | 0 | rc = 0; |
2351 | 0 | break; |
2352 | 0 | case PGT_l2_page_table: |
2353 | 0 | rc = free_l2_table(page, preemptible); |
2354 | 0 | break; |
2355 | 0 | case PGT_l3_page_table: |
2356 | 0 | ASSERT(preemptible); |
2357 | 0 | rc = free_l3_table(page); |
2358 | 0 | break; |
2359 | 0 | case PGT_l4_page_table: |
2360 | 0 | ASSERT(preemptible); |
2361 | 0 | rc = free_l4_table(page); |
2362 | 0 | break; |
2363 | 0 | default: |
2364 | 0 | gdprintk(XENLOG_WARNING, "type %" PRtype_info " mfn %" PRI_mfn "\n", |
2365 | 0 | type, mfn_x(page_to_mfn(page))); |
2366 | 0 | rc = -EINVAL; |
2367 | 0 | BUG(); |
2368 | 0 | } |
2369 | 0 |
|
2370 | 0 | return rc; |
2371 | 0 | } |
2372 | | |
2373 | | |
2374 | | static int _put_final_page_type(struct page_info *page, unsigned long type, |
2375 | | bool preemptible, struct page_info *ptpg) |
2376 | 0 | { |
2377 | 0 | int rc = free_page_type(page, type, preemptible); |
2378 | 0 |
|
2379 | 0 | /* No need for atomic update of type_info here: noone else updates it. */ |
2380 | 0 | if ( rc == 0 ) |
2381 | 0 | { |
2382 | 0 | if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) |
2383 | 0 | { |
2384 | 0 | dec_linear_uses(page); |
2385 | 0 | dec_linear_entries(ptpg); |
2386 | 0 | } |
2387 | 0 | ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); |
2388 | 0 | set_tlbflush_timestamp(page); |
2389 | 0 | smp_wmb(); |
2390 | 0 | page->u.inuse.type_info--; |
2391 | 0 | } |
2392 | 0 | else if ( rc == -EINTR ) |
2393 | 0 | { |
2394 | 0 | ASSERT((page->u.inuse.type_info & |
2395 | 0 | (PGT_count_mask|PGT_validated|PGT_partial)) == 1); |
2396 | 0 | set_tlbflush_timestamp(page); |
2397 | 0 | smp_wmb(); |
2398 | 0 | page->u.inuse.type_info |= PGT_validated; |
2399 | 0 | } |
2400 | 0 | else |
2401 | 0 | { |
2402 | 0 | BUG_ON(rc != -ERESTART); |
2403 | 0 | smp_wmb(); |
2404 | 0 | get_page_light(page); |
2405 | 0 | page->u.inuse.type_info |= PGT_partial; |
2406 | 0 | } |
2407 | 0 |
|
2408 | 0 | return rc; |
2409 | 0 | } |
2410 | | |
2411 | | |
2412 | | static int _put_page_type(struct page_info *page, bool preemptible, |
2413 | | struct page_info *ptpg) |
2414 | 2 | { |
2415 | 2 | unsigned long nx, x, y = page->u.inuse.type_info; |
2416 | 2 | int rc = 0; |
2417 | 2 | |
2418 | 2 | for ( ; ; ) |
2419 | 2 | { |
2420 | 2 | x = y; |
2421 | 2 | nx = x - 1; |
2422 | 2 | |
2423 | 2 | ASSERT((x & PGT_count_mask) != 0); |
2424 | 2 | |
2425 | 2 | if ( unlikely((nx & PGT_count_mask) == 0) ) |
2426 | 2 | { |
2427 | 2 | if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && |
2428 | 0 | likely(nx & (PGT_validated|PGT_partial)) ) |
2429 | 0 | { |
2430 | 0 | /* |
2431 | 0 | * Page-table pages must be unvalidated when count is zero. The |
2432 | 0 | * 'free' is safe because the refcnt is non-zero and validated |
2433 | 0 | * bit is clear => other ops will spin or fail. |
2434 | 0 | */ |
2435 | 0 | nx = x & ~(PGT_validated|PGT_partial); |
2436 | 0 | if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, |
2437 | 0 | x, nx)) != x) ) |
2438 | 0 | continue; |
2439 | 0 | /* We cleared the 'valid bit' so we do the clean up. */ |
2440 | 0 | rc = _put_final_page_type(page, x, preemptible, ptpg); |
2441 | 0 | ptpg = NULL; |
2442 | 0 | if ( x & PGT_partial ) |
2443 | 0 | put_page(page); |
2444 | 0 | break; |
2445 | 0 | } |
2446 | 2 | |
2447 | 2 | if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) |
2448 | 0 | { |
2449 | 0 | /* |
2450 | 0 | * set_tlbflush_timestamp() accesses the same union |
2451 | 0 | * linear_pt_count lives in. Unvalidated page table pages, |
2452 | 0 | * however, should occur during domain destruction only |
2453 | 0 | * anyway. Updating of linear_pt_count luckily is not |
2454 | 0 | * necessary anymore for a dying domain. |
2455 | 0 | */ |
2456 | 0 | ASSERT(page_get_owner(page)->is_dying); |
2457 | 0 | ASSERT(page->linear_pt_count < 0); |
2458 | 0 | ASSERT(ptpg->linear_pt_count > 0); |
2459 | 0 | ptpg = NULL; |
2460 | 0 | } |
2461 | 2 | |
2462 | 2 | set_tlbflush_timestamp(page); |
2463 | 2 | } |
2464 | 0 | else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == |
2465 | 0 | (PGT_locked | 1)) ) |
2466 | 0 | { |
2467 | 0 | /* |
2468 | 0 | * We must not drop the second to last reference when the page is |
2469 | 0 | * locked, as page_unlock() doesn't do any cleanup of the type. |
2470 | 0 | */ |
2471 | 0 | cpu_relax(); |
2472 | 0 | y = page->u.inuse.type_info; |
2473 | 0 | continue; |
2474 | 0 | } |
2475 | 2 | |
2476 | 2 | if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) |
2477 | 2 | break; |
2478 | 2 | |
2479 | 0 | if ( preemptible && hypercall_preempt_check() ) |
2480 | 0 | return -EINTR; |
2481 | 0 | } |
2482 | 2 | |
2483 | 2 | if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) |
2484 | 0 | { |
2485 | 0 | ASSERT(!rc); |
2486 | 0 | dec_linear_uses(page); |
2487 | 0 | dec_linear_entries(ptpg); |
2488 | 0 | } |
2489 | 2 | |
2490 | 2 | return rc; |
2491 | 2 | } |
2492 | | |
2493 | | |
2494 | | static int __get_page_type(struct page_info *page, unsigned long type, |
2495 | | int preemptible) |
2496 | 14 | { |
2497 | 14 | unsigned long nx, x, y = page->u.inuse.type_info; |
2498 | 14 | int rc = 0, iommu_ret = 0; |
2499 | 14 | |
2500 | 14 | ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); |
2501 | 14 | ASSERT(!in_irq()); |
2502 | 14 | |
2503 | 14 | for ( ; ; ) |
2504 | 14 | { |
2505 | 14 | x = y; |
2506 | 14 | nx = x + 1; |
2507 | 14 | if ( unlikely((nx & PGT_count_mask) == 0) ) |
2508 | 0 | { |
2509 | 0 | gdprintk(XENLOG_WARNING, |
2510 | 0 | "Type count overflow on mfn %"PRI_mfn"\n", |
2511 | 0 | mfn_x(page_to_mfn(page))); |
2512 | 0 | return -EINVAL; |
2513 | 0 | } |
2514 | 14 | else if ( unlikely((x & PGT_count_mask) == 0) ) |
2515 | 14 | { |
2516 | 14 | struct domain *d = page_get_owner(page); |
2517 | 14 | |
2518 | 14 | /* |
2519 | 14 | * Normally we should never let a page go from type count 0 |
2520 | 14 | * to type count 1 when it is shadowed. One exception: |
2521 | 14 | * out-of-sync shadowed pages are allowed to become |
2522 | 14 | * writeable. |
2523 | 14 | */ |
2524 | 14 | if ( d && shadow_mode_enabled(d) |
2525 | 0 | && (page->count_info & PGC_page_table) |
2526 | 0 | && !((page->shadow_flags & (1u<<29)) |
2527 | 0 | && type == PGT_writable_page) ) |
2528 | 0 | shadow_remove_all_shadows(d, page_to_mfn(page)); |
2529 | 14 | |
2530 | 14 | ASSERT(!(x & PGT_pae_xen_l2)); |
2531 | 14 | if ( (x & PGT_type_mask) != type ) |
2532 | 13 | { |
2533 | 13 | /* |
2534 | 13 | * On type change we check to flush stale TLB entries. This |
2535 | 13 | * may be unnecessary (e.g., page was GDT/LDT) but those |
2536 | 13 | * circumstances should be very rare. |
2537 | 13 | */ |
2538 | 13 | cpumask_t *mask = this_cpu(scratch_cpumask); |
2539 | 13 | |
2540 | 13 | BUG_ON(in_irq()); |
2541 | 13 | cpumask_copy(mask, d->domain_dirty_cpumask); |
2542 | 13 | |
2543 | 13 | /* Don't flush if the timestamp is old enough */ |
2544 | 13 | tlbflush_filter(mask, page->tlbflush_timestamp); |
2545 | 13 | |
2546 | 13 | if ( unlikely(!cpumask_empty(mask)) && |
2547 | 13 | /* Shadow mode: track only writable pages. */ |
2548 | 1 | (!shadow_mode_enabled(page_get_owner(page)) || |
2549 | 0 | ((nx & PGT_type_mask) == PGT_writable_page)) ) |
2550 | 1 | { |
2551 | 1 | perfc_incr(need_flush_tlb_flush); |
2552 | 1 | flush_tlb_mask(mask); |
2553 | 1 | } |
2554 | 13 | |
2555 | 13 | /* We lose existing type and validity. */ |
2556 | 13 | nx &= ~(PGT_type_mask | PGT_validated); |
2557 | 13 | nx |= type; |
2558 | 13 | |
2559 | 13 | /* |
2560 | 13 | * No special validation needed for writable pages. |
2561 | 13 | * Page tables and GDT/LDT need to be scanned for validity. |
2562 | 13 | */ |
2563 | 13 | if ( type == PGT_writable_page || type == PGT_shared_page ) |
2564 | 13 | nx |= PGT_validated; |
2565 | 13 | } |
2566 | 14 | } |
2567 | 0 | else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) |
2568 | 0 | { |
2569 | 0 | /* Don't log failure if it could be a recursive-mapping attempt. */ |
2570 | 0 | if ( ((x & PGT_type_mask) == PGT_l2_page_table) && |
2571 | 0 | (type == PGT_l1_page_table) ) |
2572 | 0 | return -EINVAL; |
2573 | 0 | if ( ((x & PGT_type_mask) == PGT_l3_page_table) && |
2574 | 0 | (type == PGT_l2_page_table) ) |
2575 | 0 | return -EINVAL; |
2576 | 0 | if ( ((x & PGT_type_mask) == PGT_l4_page_table) && |
2577 | 0 | (type == PGT_l3_page_table) ) |
2578 | 0 | return -EINVAL; |
2579 | 0 | gdprintk(XENLOG_WARNING, |
2580 | 0 | "Bad type (saw %" PRtype_info " != exp %" PRtype_info ") " |
2581 | 0 | "for mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n", |
2582 | 0 | x, type, mfn_x(page_to_mfn(page)), |
2583 | 0 | get_gpfn_from_mfn(mfn_x(page_to_mfn(page)))); |
2584 | 0 | return -EINVAL; |
2585 | 0 | } |
2586 | 0 | else if ( unlikely(!(x & PGT_validated)) ) |
2587 | 0 | { |
2588 | 0 | if ( !(x & PGT_partial) ) |
2589 | 0 | { |
2590 | 0 | /* Someone else is updating validation of this page. Wait... */ |
2591 | 0 | while ( (y = page->u.inuse.type_info) == x ) |
2592 | 0 | { |
2593 | 0 | if ( preemptible && hypercall_preempt_check() ) |
2594 | 0 | return -EINTR; |
2595 | 0 | cpu_relax(); |
2596 | 0 | } |
2597 | 0 | continue; |
2598 | 0 | } |
2599 | 0 | /* Type ref count was left at 1 when PGT_partial got set. */ |
2600 | 0 | ASSERT((x & PGT_count_mask) == 1); |
2601 | 0 | nx = x & ~PGT_partial; |
2602 | 0 | } |
2603 | 14 | |
2604 | 14 | if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) |
2605 | 14 | break; |
2606 | 14 | |
2607 | 0 | if ( preemptible && hypercall_preempt_check() ) |
2608 | 0 | return -EINTR; |
2609 | 0 | } |
2610 | 14 | |
2611 | 14 | if ( unlikely((x & PGT_type_mask) != type) ) |
2612 | 13 | { |
2613 | 13 | /* Special pages should not be accessible from devices. */ |
2614 | 13 | struct domain *d = page_get_owner(page); |
2615 | 13 | if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) ) |
2616 | 0 | { |
2617 | 0 | gfn_t gfn = _gfn(mfn_to_gmfn(d, mfn_x(page_to_mfn(page)))); |
2618 | 0 |
|
2619 | 0 | if ( (x & PGT_type_mask) == PGT_writable_page ) |
2620 | 0 | iommu_ret = iommu_unmap_page(d, gfn_x(gfn)); |
2621 | 0 | else if ( type == PGT_writable_page ) |
2622 | 0 | iommu_ret = iommu_map_page(d, gfn_x(gfn), |
2623 | 0 | mfn_x(page_to_mfn(page)), |
2624 | 0 | IOMMUF_readable|IOMMUF_writable); |
2625 | 0 | } |
2626 | 13 | } |
2627 | 14 | |
2628 | 14 | if ( unlikely(!(nx & PGT_validated)) ) |
2629 | 0 | { |
2630 | 0 | if ( !(x & PGT_partial) ) |
2631 | 0 | { |
2632 | 0 | page->nr_validated_ptes = 0; |
2633 | 0 | page->partial_pte = 0; |
2634 | 0 | } |
2635 | 0 | page->linear_pt_count = 0; |
2636 | 0 | rc = alloc_page_type(page, type, preemptible); |
2637 | 0 | } |
2638 | 14 | |
2639 | 14 | if ( (x & PGT_partial) && !(nx & PGT_partial) ) |
2640 | 0 | put_page(page); |
2641 | 14 | |
2642 | 14 | if ( !rc ) |
2643 | 14 | rc = iommu_ret; |
2644 | 14 | |
2645 | 14 | return rc; |
2646 | 14 | } |
2647 | | |
2648 | | void put_page_type(struct page_info *page) |
2649 | 2 | { |
2650 | 2 | int rc = _put_page_type(page, false, NULL); |
2651 | 2 | ASSERT(rc == 0); |
2652 | 2 | (void)rc; |
2653 | 2 | } |
2654 | | |
2655 | | int get_page_type(struct page_info *page, unsigned long type) |
2656 | 14 | { |
2657 | 14 | int rc = __get_page_type(page, type, 0); |
2658 | 14 | if ( likely(rc == 0) ) |
2659 | 14 | return 1; |
2660 | 0 | ASSERT(rc != -EINTR && rc != -ERESTART); |
2661 | 0 | return 0; |
2662 | 14 | } |
2663 | | |
2664 | | int put_page_type_preemptible(struct page_info *page) |
2665 | 0 | { |
2666 | 0 | return _put_page_type(page, true, NULL); |
2667 | 0 | } |
2668 | | |
2669 | | int get_page_type_preemptible(struct page_info *page, unsigned long type) |
2670 | 0 | { |
2671 | 0 | ASSERT(!current->arch.old_guest_table); |
2672 | 0 | return __get_page_type(page, type, 1); |
2673 | 0 | } |
2674 | | |
2675 | | int put_old_guest_table(struct vcpu *v) |
2676 | 0 | { |
2677 | 0 | int rc; |
2678 | 0 |
|
2679 | 0 | if ( !v->arch.old_guest_table ) |
2680 | 0 | return 0; |
2681 | 0 |
|
2682 | 0 | switch ( rc = _put_page_type(v->arch.old_guest_table, true, |
2683 | 0 | v->arch.old_guest_ptpg) ) |
2684 | 0 | { |
2685 | 0 | case -EINTR: |
2686 | 0 | case -ERESTART: |
2687 | 0 | return -ERESTART; |
2688 | 0 | case 0: |
2689 | 0 | put_page(v->arch.old_guest_table); |
2690 | 0 | } |
2691 | 0 |
|
2692 | 0 | v->arch.old_guest_table = NULL; |
2693 | 0 |
|
2694 | 0 | return rc; |
2695 | 0 | } |
2696 | | |
2697 | | int vcpu_destroy_pagetables(struct vcpu *v) |
2698 | 0 | { |
2699 | 0 | unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); |
2700 | 0 | struct page_info *page; |
2701 | 0 | l4_pgentry_t *l4tab = NULL; |
2702 | 0 | int rc = put_old_guest_table(v); |
2703 | 0 |
|
2704 | 0 | if ( rc ) |
2705 | 0 | return rc; |
2706 | 0 |
|
2707 | 0 | if ( is_pv_32bit_vcpu(v) ) |
2708 | 0 | { |
2709 | 0 | l4tab = map_domain_page(_mfn(mfn)); |
2710 | 0 | mfn = l4e_get_pfn(*l4tab); |
2711 | 0 | } |
2712 | 0 |
|
2713 | 0 | if ( mfn ) |
2714 | 0 | { |
2715 | 0 | page = mfn_to_page(_mfn(mfn)); |
2716 | 0 | if ( paging_mode_refcounts(v->domain) ) |
2717 | 0 | put_page(page); |
2718 | 0 | else |
2719 | 0 | rc = put_page_and_type_preemptible(page); |
2720 | 0 | } |
2721 | 0 |
|
2722 | 0 | if ( l4tab ) |
2723 | 0 | { |
2724 | 0 | if ( !rc ) |
2725 | 0 | l4e_write(l4tab, l4e_empty()); |
2726 | 0 | unmap_domain_page(l4tab); |
2727 | 0 | } |
2728 | 0 | else if ( !rc ) |
2729 | 0 | { |
2730 | 0 | v->arch.guest_table = pagetable_null(); |
2731 | 0 |
|
2732 | 0 | /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ |
2733 | 0 | mfn = pagetable_get_pfn(v->arch.guest_table_user); |
2734 | 0 | if ( mfn ) |
2735 | 0 | { |
2736 | 0 | page = mfn_to_page(_mfn(mfn)); |
2737 | 0 | if ( paging_mode_refcounts(v->domain) ) |
2738 | 0 | put_page(page); |
2739 | 0 | else |
2740 | 0 | rc = put_page_and_type_preemptible(page); |
2741 | 0 | } |
2742 | 0 | if ( !rc ) |
2743 | 0 | v->arch.guest_table_user = pagetable_null(); |
2744 | 0 | } |
2745 | 0 |
|
2746 | 0 | v->arch.cr3 = 0; |
2747 | 0 |
|
2748 | 0 | /* |
2749 | 0 | * put_page_and_type_preemptible() is liable to return -EINTR. The |
2750 | 0 | * callers of us expect -ERESTART so convert it over. |
2751 | 0 | */ |
2752 | 0 | return rc != -EINTR ? rc : -ERESTART; |
2753 | 0 | } |
2754 | | |
2755 | | int new_guest_cr3(mfn_t mfn) |
2756 | 0 | { |
2757 | 0 | struct vcpu *curr = current; |
2758 | 0 | struct domain *d = curr->domain; |
2759 | 0 | int rc; |
2760 | 0 | mfn_t old_base_mfn; |
2761 | 0 |
|
2762 | 0 | if ( is_pv_32bit_domain(d) ) |
2763 | 0 | { |
2764 | 0 | mfn_t gt_mfn = pagetable_get_mfn(curr->arch.guest_table); |
2765 | 0 | l4_pgentry_t *pl4e = map_domain_page(gt_mfn); |
2766 | 0 |
|
2767 | 0 | rc = mod_l4_entry(pl4e, |
2768 | 0 | l4e_from_mfn(mfn, |
2769 | 0 | (_PAGE_PRESENT | _PAGE_RW | |
2770 | 0 | _PAGE_USER | _PAGE_ACCESSED)), |
2771 | 0 | mfn_x(gt_mfn), 0, curr); |
2772 | 0 | unmap_domain_page(pl4e); |
2773 | 0 | switch ( rc ) |
2774 | 0 | { |
2775 | 0 | case 0: |
2776 | 0 | break; |
2777 | 0 | case -EINTR: |
2778 | 0 | case -ERESTART: |
2779 | 0 | return -ERESTART; |
2780 | 0 | default: |
2781 | 0 | gdprintk(XENLOG_WARNING, |
2782 | 0 | "Error while installing new compat baseptr %" PRI_mfn "\n", |
2783 | 0 | mfn_x(mfn)); |
2784 | 0 | return rc; |
2785 | 0 | } |
2786 | 0 |
|
2787 | 0 | invalidate_shadow_ldt(curr, 0); |
2788 | 0 | write_ptbase(curr); |
2789 | 0 |
|
2790 | 0 | return 0; |
2791 | 0 | } |
2792 | 0 |
|
2793 | 0 | rc = put_old_guest_table(curr); |
2794 | 0 | if ( unlikely(rc) ) |
2795 | 0 | return rc; |
2796 | 0 |
|
2797 | 0 | old_base_mfn = pagetable_get_mfn(curr->arch.guest_table); |
2798 | 0 | /* |
2799 | 0 | * This is particularly important when getting restarted after the |
2800 | 0 | * previous attempt got preempted in the put-old-MFN phase. |
2801 | 0 | */ |
2802 | 0 | if ( mfn_eq(old_base_mfn, mfn) ) |
2803 | 0 | { |
2804 | 0 | write_ptbase(curr); |
2805 | 0 | return 0; |
2806 | 0 | } |
2807 | 0 |
|
2808 | 0 | rc = paging_mode_refcounts(d) |
2809 | 0 | ? (get_page_from_mfn(mfn, d) ? 0 : -EINVAL) |
2810 | 0 | : get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, 0, 1); |
2811 | 0 | switch ( rc ) |
2812 | 0 | { |
2813 | 0 | case 0: |
2814 | 0 | break; |
2815 | 0 | case -EINTR: |
2816 | 0 | case -ERESTART: |
2817 | 0 | return -ERESTART; |
2818 | 0 | default: |
2819 | 0 | gdprintk(XENLOG_WARNING, |
2820 | 0 | "Error while installing new baseptr %" PRI_mfn "\n", |
2821 | 0 | mfn_x(mfn)); |
2822 | 0 | return rc; |
2823 | 0 | } |
2824 | 0 |
|
2825 | 0 | invalidate_shadow_ldt(curr, 0); |
2826 | 0 |
|
2827 | 0 | if ( !VM_ASSIST(d, m2p_strict) && !paging_mode_refcounts(d) ) |
2828 | 0 | fill_ro_mpt(mfn); |
2829 | 0 | curr->arch.guest_table = pagetable_from_mfn(mfn); |
2830 | 0 | update_cr3(curr); |
2831 | 0 |
|
2832 | 0 | write_ptbase(curr); |
2833 | 0 |
|
2834 | 0 | if ( likely(mfn_x(old_base_mfn) != 0) ) |
2835 | 0 | { |
2836 | 0 | struct page_info *page = mfn_to_page(old_base_mfn); |
2837 | 0 |
|
2838 | 0 | if ( paging_mode_refcounts(d) ) |
2839 | 0 | put_page(page); |
2840 | 0 | else |
2841 | 0 | switch ( rc = put_page_and_type_preemptible(page) ) |
2842 | 0 | { |
2843 | 0 | case -EINTR: |
2844 | 0 | rc = -ERESTART; |
2845 | 0 | /* fallthrough */ |
2846 | 0 | case -ERESTART: |
2847 | 0 | curr->arch.old_guest_ptpg = NULL; |
2848 | 0 | curr->arch.old_guest_table = page; |
2849 | 0 | break; |
2850 | 0 | default: |
2851 | 0 | BUG_ON(rc); |
2852 | 0 | break; |
2853 | 0 | } |
2854 | 0 | } |
2855 | 0 |
|
2856 | 0 | return rc; |
2857 | 0 | } |
2858 | | |
2859 | | static struct domain *get_pg_owner(domid_t domid) |
2860 | 0 | { |
2861 | 0 | struct domain *pg_owner = NULL, *curr = current->domain; |
2862 | 0 |
|
2863 | 0 | if ( likely(domid == DOMID_SELF) ) |
2864 | 0 | { |
2865 | 0 | pg_owner = rcu_lock_current_domain(); |
2866 | 0 | goto out; |
2867 | 0 | } |
2868 | 0 |
|
2869 | 0 | if ( unlikely(domid == curr->domain_id) ) |
2870 | 0 | { |
2871 | 0 | gdprintk(XENLOG_WARNING, "Cannot specify itself as foreign domain\n"); |
2872 | 0 | goto out; |
2873 | 0 | } |
2874 | 0 |
|
2875 | 0 | switch ( domid ) |
2876 | 0 | { |
2877 | 0 | case DOMID_IO: |
2878 | 0 | pg_owner = rcu_lock_domain(dom_io); |
2879 | 0 | break; |
2880 | 0 | case DOMID_XEN: |
2881 | 0 | pg_owner = rcu_lock_domain(dom_xen); |
2882 | 0 | break; |
2883 | 0 | default: |
2884 | 0 | if ( (pg_owner = rcu_lock_domain_by_id(domid)) == NULL ) |
2885 | 0 | { |
2886 | 0 | gdprintk(XENLOG_WARNING, "Unknown domain d%d\n", domid); |
2887 | 0 | break; |
2888 | 0 | } |
2889 | 0 | break; |
2890 | 0 | } |
2891 | 0 |
|
2892 | 0 | out: |
2893 | 0 | return pg_owner; |
2894 | 0 | } |
2895 | | |
2896 | | static void put_pg_owner(struct domain *pg_owner) |
2897 | 0 | { |
2898 | 0 | rcu_unlock_domain(pg_owner); |
2899 | 0 | } |
2900 | | |
2901 | | static inline int vcpumask_to_pcpumask( |
2902 | | struct domain *d, XEN_GUEST_HANDLE_PARAM(const_void) bmap, cpumask_t *pmask) |
2903 | 0 | { |
2904 | 0 | unsigned int vcpu_id, vcpu_bias, offs; |
2905 | 0 | unsigned long vmask; |
2906 | 0 | struct vcpu *v; |
2907 | 0 | bool is_native = !is_pv_32bit_domain(d); |
2908 | 0 |
|
2909 | 0 | cpumask_clear(pmask); |
2910 | 0 | for ( vmask = 0, offs = 0; ; ++offs ) |
2911 | 0 | { |
2912 | 0 | vcpu_bias = offs * (is_native ? BITS_PER_LONG : 32); |
2913 | 0 | if ( vcpu_bias >= d->max_vcpus ) |
2914 | 0 | return 0; |
2915 | 0 |
|
2916 | 0 | if ( unlikely(is_native ? |
2917 | 0 | copy_from_guest_offset(&vmask, bmap, offs, 1) : |
2918 | 0 | copy_from_guest_offset((unsigned int *)&vmask, bmap, |
2919 | 0 | offs, 1)) ) |
2920 | 0 | { |
2921 | 0 | cpumask_clear(pmask); |
2922 | 0 | return -EFAULT; |
2923 | 0 | } |
2924 | 0 |
|
2925 | 0 | while ( vmask ) |
2926 | 0 | { |
2927 | 0 | vcpu_id = find_first_set_bit(vmask); |
2928 | 0 | vmask &= ~(1UL << vcpu_id); |
2929 | 0 | vcpu_id += vcpu_bias; |
2930 | 0 | if ( (vcpu_id >= d->max_vcpus) ) |
2931 | 0 | return 0; |
2932 | 0 | if ( ((v = d->vcpu[vcpu_id]) != NULL) ) |
2933 | 0 | cpumask_or(pmask, pmask, v->vcpu_dirty_cpumask); |
2934 | 0 | } |
2935 | 0 | } |
2936 | 0 | } |
2937 | | |
2938 | | long do_mmuext_op( |
2939 | | XEN_GUEST_HANDLE_PARAM(mmuext_op_t) uops, |
2940 | | unsigned int count, |
2941 | | XEN_GUEST_HANDLE_PARAM(uint) pdone, |
2942 | | unsigned int foreigndom) |
2943 | 0 | { |
2944 | 0 | struct mmuext_op op; |
2945 | 0 | unsigned long type; |
2946 | 0 | unsigned int i, done = 0; |
2947 | 0 | struct vcpu *curr = current; |
2948 | 0 | struct domain *currd = curr->domain; |
2949 | 0 | struct domain *pg_owner; |
2950 | 0 | int rc = put_old_guest_table(curr); |
2951 | 0 |
|
2952 | 0 | if ( unlikely(rc) ) |
2953 | 0 | { |
2954 | 0 | if ( likely(rc == -ERESTART) ) |
2955 | 0 | rc = hypercall_create_continuation( |
2956 | 0 | __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone, |
2957 | 0 | foreigndom); |
2958 | 0 | return rc; |
2959 | 0 | } |
2960 | 0 |
|
2961 | 0 | if ( unlikely(count == MMU_UPDATE_PREEMPTED) && |
2962 | 0 | likely(guest_handle_is_null(uops)) ) |
2963 | 0 | { |
2964 | 0 | /* |
2965 | 0 | * See the curr->arch.old_guest_table related |
2966 | 0 | * hypercall_create_continuation() below. |
2967 | 0 | */ |
2968 | 0 | return (int)foreigndom; |
2969 | 0 | } |
2970 | 0 |
|
2971 | 0 | if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) |
2972 | 0 | { |
2973 | 0 | count &= ~MMU_UPDATE_PREEMPTED; |
2974 | 0 | if ( unlikely(!guest_handle_is_null(pdone)) ) |
2975 | 0 | (void)copy_from_guest(&done, pdone, 1); |
2976 | 0 | } |
2977 | 0 | else |
2978 | 0 | perfc_incr(calls_to_mmuext_op); |
2979 | 0 |
|
2980 | 0 | if ( unlikely(!guest_handle_okay(uops, count)) ) |
2981 | 0 | return -EFAULT; |
2982 | 0 |
|
2983 | 0 | if ( (pg_owner = get_pg_owner(foreigndom)) == NULL ) |
2984 | 0 | return -ESRCH; |
2985 | 0 |
|
2986 | 0 | if ( !is_pv_domain(pg_owner) ) |
2987 | 0 | { |
2988 | 0 | put_pg_owner(pg_owner); |
2989 | 0 | return -EINVAL; |
2990 | 0 | } |
2991 | 0 |
|
2992 | 0 | rc = xsm_mmuext_op(XSM_TARGET, currd, pg_owner); |
2993 | 0 | if ( rc ) |
2994 | 0 | { |
2995 | 0 | put_pg_owner(pg_owner); |
2996 | 0 | return rc; |
2997 | 0 | } |
2998 | 0 |
|
2999 | 0 | for ( i = 0; i < count; i++ ) |
3000 | 0 | { |
3001 | 0 | if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) ) |
3002 | 0 | { |
3003 | 0 | rc = -ERESTART; |
3004 | 0 | break; |
3005 | 0 | } |
3006 | 0 |
|
3007 | 0 | if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) ) |
3008 | 0 | { |
3009 | 0 | rc = -EFAULT; |
3010 | 0 | break; |
3011 | 0 | } |
3012 | 0 |
|
3013 | 0 | if ( is_hvm_domain(currd) ) |
3014 | 0 | { |
3015 | 0 | switch ( op.cmd ) |
3016 | 0 | { |
3017 | 0 | case MMUEXT_PIN_L1_TABLE: |
3018 | 0 | case MMUEXT_PIN_L2_TABLE: |
3019 | 0 | case MMUEXT_PIN_L3_TABLE: |
3020 | 0 | case MMUEXT_PIN_L4_TABLE: |
3021 | 0 | case MMUEXT_UNPIN_TABLE: |
3022 | 0 | break; |
3023 | 0 | default: |
3024 | 0 | rc = -EOPNOTSUPP; |
3025 | 0 | goto done; |
3026 | 0 | } |
3027 | 0 | } |
3028 | 0 |
|
3029 | 0 | rc = 0; |
3030 | 0 |
|
3031 | 0 | switch ( op.cmd ) |
3032 | 0 | { |
3033 | 0 | struct page_info *page; |
3034 | 0 | p2m_type_t p2mt; |
3035 | 0 |
|
3036 | 0 | case MMUEXT_PIN_L1_TABLE: |
3037 | 0 | type = PGT_l1_page_table; |
3038 | 0 | goto pin_page; |
3039 | 0 |
|
3040 | 0 | case MMUEXT_PIN_L2_TABLE: |
3041 | 0 | type = PGT_l2_page_table; |
3042 | 0 | goto pin_page; |
3043 | 0 |
|
3044 | 0 | case MMUEXT_PIN_L3_TABLE: |
3045 | 0 | type = PGT_l3_page_table; |
3046 | 0 | goto pin_page; |
3047 | 0 |
|
3048 | 0 | case MMUEXT_PIN_L4_TABLE: |
3049 | 0 | if ( is_pv_32bit_domain(pg_owner) ) |
3050 | 0 | break; |
3051 | 0 | type = PGT_l4_page_table; |
3052 | 0 |
|
3053 | 0 | pin_page: |
3054 | 0 | /* Ignore pinning of invalid paging levels. */ |
3055 | 0 | if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) |
3056 | 0 | break; |
3057 | 0 |
|
3058 | 0 | if ( paging_mode_refcounts(pg_owner) ) |
3059 | 0 | break; |
3060 | 0 |
|
3061 | 0 | page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC); |
3062 | 0 | if ( unlikely(!page) ) |
3063 | 0 | { |
3064 | 0 | rc = -EINVAL; |
3065 | 0 | break; |
3066 | 0 | } |
3067 | 0 |
|
3068 | 0 | rc = get_page_type_preemptible(page, type); |
3069 | 0 | if ( unlikely(rc) ) |
3070 | 0 | { |
3071 | 0 | if ( rc == -EINTR ) |
3072 | 0 | rc = -ERESTART; |
3073 | 0 | else if ( rc != -ERESTART ) |
3074 | 0 | gdprintk(XENLOG_WARNING, |
3075 | 0 | "Error %d while pinning mfn %" PRI_mfn "\n", |
3076 | 0 | rc, mfn_x(page_to_mfn(page))); |
3077 | 0 | if ( page != curr->arch.old_guest_table ) |
3078 | 0 | put_page(page); |
3079 | 0 | break; |
3080 | 0 | } |
3081 | 0 |
|
3082 | 0 | rc = xsm_memory_pin_page(XSM_HOOK, currd, pg_owner, page); |
3083 | 0 | if ( !rc && unlikely(test_and_set_bit(_PGT_pinned, |
3084 | 0 | &page->u.inuse.type_info)) ) |
3085 | 0 | { |
3086 | 0 | gdprintk(XENLOG_WARNING, |
3087 | 0 | "mfn %" PRI_mfn " already pinned\n", |
3088 | 0 | mfn_x(page_to_mfn(page))); |
3089 | 0 | rc = -EINVAL; |
3090 | 0 | } |
3091 | 0 |
|
3092 | 0 | if ( unlikely(rc) ) |
3093 | 0 | goto pin_drop; |
3094 | 0 |
|
3095 | 0 | /* A page is dirtied when its pin status is set. */ |
3096 | 0 | paging_mark_dirty(pg_owner, page_to_mfn(page)); |
3097 | 0 |
|
3098 | 0 | /* We can race domain destruction (domain_relinquish_resources). */ |
3099 | 0 | if ( unlikely(pg_owner != currd) ) |
3100 | 0 | { |
3101 | 0 | bool drop_ref; |
3102 | 0 |
|
3103 | 0 | spin_lock(&pg_owner->page_alloc_lock); |
3104 | 0 | drop_ref = (pg_owner->is_dying && |
3105 | 0 | test_and_clear_bit(_PGT_pinned, |
3106 | 0 | &page->u.inuse.type_info)); |
3107 | 0 | spin_unlock(&pg_owner->page_alloc_lock); |
3108 | 0 | if ( drop_ref ) |
3109 | 0 | { |
3110 | 0 | pin_drop: |
3111 | 0 | if ( type == PGT_l1_page_table ) |
3112 | 0 | put_page_and_type(page); |
3113 | 0 | else |
3114 | 0 | { |
3115 | 0 | curr->arch.old_guest_ptpg = NULL; |
3116 | 0 | curr->arch.old_guest_table = page; |
3117 | 0 | } |
3118 | 0 | } |
3119 | 0 | } |
3120 | 0 | break; |
3121 | 0 |
|
3122 | 0 | case MMUEXT_UNPIN_TABLE: |
3123 | 0 | if ( paging_mode_refcounts(pg_owner) ) |
3124 | 0 | break; |
3125 | 0 |
|
3126 | 0 | page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC); |
3127 | 0 | if ( unlikely(!page) ) |
3128 | 0 | { |
3129 | 0 | gdprintk(XENLOG_WARNING, |
3130 | 0 | "mfn %" PRI_mfn " bad, or bad owner d%d\n", |
3131 | 0 | op.arg1.mfn, pg_owner->domain_id); |
3132 | 0 | rc = -EINVAL; |
3133 | 0 | break; |
3134 | 0 | } |
3135 | 0 |
|
3136 | 0 | if ( !test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) |
3137 | 0 | { |
3138 | 0 | put_page(page); |
3139 | 0 | gdprintk(XENLOG_WARNING, |
3140 | 0 | "mfn %" PRI_mfn " not pinned\n", op.arg1.mfn); |
3141 | 0 | rc = -EINVAL; |
3142 | 0 | break; |
3143 | 0 | } |
3144 | 0 |
|
3145 | 0 | switch ( rc = put_page_and_type_preemptible(page) ) |
3146 | 0 | { |
3147 | 0 | case -EINTR: |
3148 | 0 | case -ERESTART: |
3149 | 0 | curr->arch.old_guest_ptpg = NULL; |
3150 | 0 | curr->arch.old_guest_table = page; |
3151 | 0 | rc = 0; |
3152 | 0 | break; |
3153 | 0 | default: |
3154 | 0 | BUG_ON(rc); |
3155 | 0 | break; |
3156 | 0 | } |
3157 | 0 | put_page(page); |
3158 | 0 |
|
3159 | 0 | /* A page is dirtied when its pin status is cleared. */ |
3160 | 0 | paging_mark_dirty(pg_owner, page_to_mfn(page)); |
3161 | 0 | break; |
3162 | 0 |
|
3163 | 0 | case MMUEXT_NEW_BASEPTR: |
3164 | 0 | if ( unlikely(currd != pg_owner) ) |
3165 | 0 | rc = -EPERM; |
3166 | 0 | else if ( unlikely(paging_mode_translate(currd)) ) |
3167 | 0 | rc = -EINVAL; |
3168 | 0 | else |
3169 | 0 | rc = new_guest_cr3(_mfn(op.arg1.mfn)); |
3170 | 0 | break; |
3171 | 0 |
|
3172 | 0 | case MMUEXT_NEW_USER_BASEPTR: { |
3173 | 0 | unsigned long old_mfn; |
3174 | 0 |
|
3175 | 0 | if ( unlikely(currd != pg_owner) ) |
3176 | 0 | rc = -EPERM; |
3177 | 0 | else if ( unlikely(paging_mode_translate(currd)) ) |
3178 | 0 | rc = -EINVAL; |
3179 | 0 | if ( unlikely(rc) ) |
3180 | 0 | break; |
3181 | 0 |
|
3182 | 0 | old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); |
3183 | 0 | /* |
3184 | 0 | * This is particularly important when getting restarted after the |
3185 | 0 | * previous attempt got preempted in the put-old-MFN phase. |
3186 | 0 | */ |
3187 | 0 | if ( old_mfn == op.arg1.mfn ) |
3188 | 0 | break; |
3189 | 0 |
|
3190 | 0 | if ( op.arg1.mfn != 0 ) |
3191 | 0 | { |
3192 | 0 | rc = get_page_and_type_from_mfn( |
3193 | 0 | _mfn(op.arg1.mfn), PGT_root_page_table, currd, 0, 1); |
3194 | 0 |
|
3195 | 0 | if ( unlikely(rc) ) |
3196 | 0 | { |
3197 | 0 | if ( rc == -EINTR ) |
3198 | 0 | rc = -ERESTART; |
3199 | 0 | else if ( rc != -ERESTART ) |
3200 | 0 | gdprintk(XENLOG_WARNING, |
3201 | 0 | "Error %d installing new mfn %" PRI_mfn "\n", |
3202 | 0 | rc, op.arg1.mfn); |
3203 | 0 | break; |
3204 | 0 | } |
3205 | 0 |
|
3206 | 0 | if ( VM_ASSIST(currd, m2p_strict) ) |
3207 | 0 | zap_ro_mpt(_mfn(op.arg1.mfn)); |
3208 | 0 | } |
3209 | 0 |
|
3210 | 0 | curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn); |
3211 | 0 |
|
3212 | 0 | if ( old_mfn != 0 ) |
3213 | 0 | { |
3214 | 0 | page = mfn_to_page(_mfn(old_mfn)); |
3215 | 0 |
|
3216 | 0 | switch ( rc = put_page_and_type_preemptible(page) ) |
3217 | 0 | { |
3218 | 0 | case -EINTR: |
3219 | 0 | rc = -ERESTART; |
3220 | 0 | /* fallthrough */ |
3221 | 0 | case -ERESTART: |
3222 | 0 | curr->arch.old_guest_ptpg = NULL; |
3223 | 0 | curr->arch.old_guest_table = page; |
3224 | 0 | break; |
3225 | 0 | default: |
3226 | 0 | BUG_ON(rc); |
3227 | 0 | break; |
3228 | 0 | } |
3229 | 0 | } |
3230 | 0 |
|
3231 | 0 | break; |
3232 | 0 | } |
3233 | 0 |
|
3234 | 0 | case MMUEXT_TLB_FLUSH_LOCAL: |
3235 | 0 | if ( likely(currd == pg_owner) ) |
3236 | 0 | flush_tlb_local(); |
3237 | 0 | else |
3238 | 0 | rc = -EPERM; |
3239 | 0 | break; |
3240 | 0 |
|
3241 | 0 | case MMUEXT_INVLPG_LOCAL: |
3242 | 0 | if ( unlikely(currd != pg_owner) ) |
3243 | 0 | rc = -EPERM; |
3244 | 0 | else |
3245 | 0 | paging_invlpg(curr, op.arg1.linear_addr); |
3246 | 0 | break; |
3247 | 0 |
|
3248 | 0 | case MMUEXT_TLB_FLUSH_MULTI: |
3249 | 0 | case MMUEXT_INVLPG_MULTI: |
3250 | 0 | { |
3251 | 0 | cpumask_t *mask = this_cpu(scratch_cpumask); |
3252 | 0 |
|
3253 | 0 | if ( unlikely(currd != pg_owner) ) |
3254 | 0 | rc = -EPERM; |
3255 | 0 | else if ( unlikely(vcpumask_to_pcpumask(currd, |
3256 | 0 | guest_handle_to_param(op.arg2.vcpumask, |
3257 | 0 | const_void), |
3258 | 0 | mask)) ) |
3259 | 0 | rc = -EINVAL; |
3260 | 0 | if ( unlikely(rc) ) |
3261 | 0 | break; |
3262 | 0 |
|
3263 | 0 | if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) |
3264 | 0 | flush_tlb_mask(mask); |
3265 | 0 | else if ( __addr_ok(op.arg1.linear_addr) ) |
3266 | 0 | flush_tlb_one_mask(mask, op.arg1.linear_addr); |
3267 | 0 | break; |
3268 | 0 | } |
3269 | 0 |
|
3270 | 0 | case MMUEXT_TLB_FLUSH_ALL: |
3271 | 0 | if ( likely(currd == pg_owner) ) |
3272 | 0 | flush_tlb_mask(currd->domain_dirty_cpumask); |
3273 | 0 | else |
3274 | 0 | rc = -EPERM; |
3275 | 0 | break; |
3276 | 0 |
|
3277 | 0 | case MMUEXT_INVLPG_ALL: |
3278 | 0 | if ( unlikely(currd != pg_owner) ) |
3279 | 0 | rc = -EPERM; |
3280 | 0 | else if ( __addr_ok(op.arg1.linear_addr) ) |
3281 | 0 | flush_tlb_one_mask(currd->domain_dirty_cpumask, |
3282 | 0 | op.arg1.linear_addr); |
3283 | 0 | break; |
3284 | 0 |
|
3285 | 0 | case MMUEXT_FLUSH_CACHE: |
3286 | 0 | if ( unlikely(currd != pg_owner) ) |
3287 | 0 | rc = -EPERM; |
3288 | 0 | else if ( unlikely(!cache_flush_permitted(currd)) ) |
3289 | 0 | rc = -EACCES; |
3290 | 0 | else |
3291 | 0 | wbinvd(); |
3292 | 0 | break; |
3293 | 0 |
|
3294 | 0 | case MMUEXT_FLUSH_CACHE_GLOBAL: |
3295 | 0 | if ( unlikely(currd != pg_owner) ) |
3296 | 0 | rc = -EPERM; |
3297 | 0 | else if ( likely(cache_flush_permitted(currd)) ) |
3298 | 0 | { |
3299 | 0 | unsigned int cpu; |
3300 | 0 | cpumask_t *mask = this_cpu(scratch_cpumask); |
3301 | 0 |
|
3302 | 0 | cpumask_clear(mask); |
3303 | 0 | for_each_online_cpu(cpu) |
3304 | 0 | if ( !cpumask_intersects(mask, |
3305 | 0 | per_cpu(cpu_sibling_mask, cpu)) ) |
3306 | 0 | __cpumask_set_cpu(cpu, mask); |
3307 | 0 | flush_mask(mask, FLUSH_CACHE); |
3308 | 0 | } |
3309 | 0 | else |
3310 | 0 | rc = -EINVAL; |
3311 | 0 | break; |
3312 | 0 |
|
3313 | 0 | case MMUEXT_SET_LDT: |
3314 | 0 | { |
3315 | 0 | unsigned int ents = op.arg2.nr_ents; |
3316 | 0 | unsigned long ptr = ents ? op.arg1.linear_addr : 0; |
3317 | 0 |
|
3318 | 0 | if ( unlikely(currd != pg_owner) ) |
3319 | 0 | rc = -EPERM; |
3320 | 0 | else if ( paging_mode_external(currd) ) |
3321 | 0 | rc = -EINVAL; |
3322 | 0 | else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) || |
3323 | 0 | (ents > 8192) ) |
3324 | 0 | { |
3325 | 0 | gdprintk(XENLOG_WARNING, |
3326 | 0 | "Bad args to SET_LDT: ptr=%lx, ents=%x\n", ptr, ents); |
3327 | 0 | rc = -EINVAL; |
3328 | 0 | } |
3329 | 0 | else if ( (curr->arch.pv_vcpu.ldt_ents != ents) || |
3330 | 0 | (curr->arch.pv_vcpu.ldt_base != ptr) ) |
3331 | 0 | { |
3332 | 0 | invalidate_shadow_ldt(curr, 0); |
3333 | 0 | flush_tlb_local(); |
3334 | 0 | curr->arch.pv_vcpu.ldt_base = ptr; |
3335 | 0 | curr->arch.pv_vcpu.ldt_ents = ents; |
3336 | 0 | load_LDT(curr); |
3337 | 0 | } |
3338 | 0 | break; |
3339 | 0 | } |
3340 | 0 |
|
3341 | 0 | case MMUEXT_CLEAR_PAGE: |
3342 | 0 | page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, P2M_ALLOC); |
3343 | 0 | if ( unlikely(p2mt != p2m_ram_rw) && page ) |
3344 | 0 | { |
3345 | 0 | put_page(page); |
3346 | 0 | page = NULL; |
3347 | 0 | } |
3348 | 0 | if ( !page || !get_page_type(page, PGT_writable_page) ) |
3349 | 0 | { |
3350 | 0 | if ( page ) |
3351 | 0 | put_page(page); |
3352 | 0 | gdprintk(XENLOG_WARNING, |
3353 | 0 | "Error clearing mfn %" PRI_mfn "\n", op.arg1.mfn); |
3354 | 0 | rc = -EINVAL; |
3355 | 0 | break; |
3356 | 0 | } |
3357 | 0 |
|
3358 | 0 | /* A page is dirtied when it's being cleared. */ |
3359 | 0 | paging_mark_dirty(pg_owner, page_to_mfn(page)); |
3360 | 0 |
|
3361 | 0 | clear_domain_page(page_to_mfn(page)); |
3362 | 0 |
|
3363 | 0 | put_page_and_type(page); |
3364 | 0 | break; |
3365 | 0 |
|
3366 | 0 | case MMUEXT_COPY_PAGE: |
3367 | 0 | { |
3368 | 0 | struct page_info *src_page, *dst_page; |
3369 | 0 |
|
3370 | 0 | src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, &p2mt, |
3371 | 0 | P2M_ALLOC); |
3372 | 0 | if ( unlikely(p2mt != p2m_ram_rw) && src_page ) |
3373 | 0 | { |
3374 | 0 | put_page(src_page); |
3375 | 0 | src_page = NULL; |
3376 | 0 | } |
3377 | 0 | if ( unlikely(!src_page) ) |
3378 | 0 | { |
3379 | 0 | gdprintk(XENLOG_WARNING, |
3380 | 0 | "Error copying from mfn %" PRI_mfn "\n", |
3381 | 0 | op.arg2.src_mfn); |
3382 | 0 | rc = -EINVAL; |
3383 | 0 | break; |
3384 | 0 | } |
3385 | 0 |
|
3386 | 0 | dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, |
3387 | 0 | P2M_ALLOC); |
3388 | 0 | if ( unlikely(p2mt != p2m_ram_rw) && dst_page ) |
3389 | 0 | { |
3390 | 0 | put_page(dst_page); |
3391 | 0 | dst_page = NULL; |
3392 | 0 | } |
3393 | 0 | rc = (dst_page && |
3394 | 0 | get_page_type(dst_page, PGT_writable_page)) ? 0 : -EINVAL; |
3395 | 0 | if ( unlikely(rc) ) |
3396 | 0 | { |
3397 | 0 | put_page(src_page); |
3398 | 0 | if ( dst_page ) |
3399 | 0 | put_page(dst_page); |
3400 | 0 | gdprintk(XENLOG_WARNING, |
3401 | 0 | "Error copying to mfn %" PRI_mfn "\n", op.arg1.mfn); |
3402 | 0 | break; |
3403 | 0 | } |
3404 | 0 |
|
3405 | 0 | /* A page is dirtied when it's being copied to. */ |
3406 | 0 | paging_mark_dirty(pg_owner, page_to_mfn(dst_page)); |
3407 | 0 |
|
3408 | 0 | copy_domain_page(page_to_mfn(dst_page), page_to_mfn(src_page)); |
3409 | 0 |
|
3410 | 0 | put_page_and_type(dst_page); |
3411 | 0 | put_page(src_page); |
3412 | 0 | break; |
3413 | 0 | } |
3414 | 0 |
|
3415 | 0 | case MMUEXT_MARK_SUPER: |
3416 | 0 | case MMUEXT_UNMARK_SUPER: |
3417 | 0 | rc = -EOPNOTSUPP; |
3418 | 0 | break; |
3419 | 0 |
|
3420 | 0 | default: |
3421 | 0 | rc = -ENOSYS; |
3422 | 0 | break; |
3423 | 0 | } |
3424 | 0 |
|
3425 | 0 | done: |
3426 | 0 | if ( unlikely(rc) ) |
3427 | 0 | break; |
3428 | 0 |
|
3429 | 0 | guest_handle_add_offset(uops, 1); |
3430 | 0 | } |
3431 | 0 |
|
3432 | 0 | if ( rc == -ERESTART ) |
3433 | 0 | { |
3434 | 0 | ASSERT(i < count); |
3435 | 0 | rc = hypercall_create_continuation( |
3436 | 0 | __HYPERVISOR_mmuext_op, "hihi", |
3437 | 0 | uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); |
3438 | 0 | } |
3439 | 0 | else if ( curr->arch.old_guest_table ) |
3440 | 0 | { |
3441 | 0 | XEN_GUEST_HANDLE_PARAM(void) null; |
3442 | 0 |
|
3443 | 0 | ASSERT(rc || i == count); |
3444 | 0 | set_xen_guest_handle(null, NULL); |
3445 | 0 | /* |
3446 | 0 | * In order to have a way to communicate the final return value to |
3447 | 0 | * our continuation, we pass this in place of "foreigndom", building |
3448 | 0 | * on the fact that this argument isn't needed anymore. |
3449 | 0 | */ |
3450 | 0 | rc = hypercall_create_continuation( |
3451 | 0 | __HYPERVISOR_mmuext_op, "hihi", null, |
3452 | 0 | MMU_UPDATE_PREEMPTED, null, rc); |
3453 | 0 | } |
3454 | 0 |
|
3455 | 0 | put_pg_owner(pg_owner); |
3456 | 0 |
|
3457 | 0 | perfc_add(num_mmuext_ops, i); |
3458 | 0 |
|
3459 | 0 | /* Add incremental work we have done to the @done output parameter. */ |
3460 | 0 | if ( unlikely(!guest_handle_is_null(pdone)) ) |
3461 | 0 | { |
3462 | 0 | done += i; |
3463 | 0 | copy_to_guest(pdone, &done, 1); |
3464 | 0 | } |
3465 | 0 |
|
3466 | 0 | return rc; |
3467 | 0 | } |
3468 | | |
3469 | | long do_mmu_update( |
3470 | | XEN_GUEST_HANDLE_PARAM(mmu_update_t) ureqs, |
3471 | | unsigned int count, |
3472 | | XEN_GUEST_HANDLE_PARAM(uint) pdone, |
3473 | | unsigned int foreigndom) |
3474 | 0 | { |
3475 | 0 | struct mmu_update req; |
3476 | 0 | void *va = NULL; |
3477 | 0 | unsigned long gpfn, gmfn, mfn; |
3478 | 0 | struct page_info *page; |
3479 | 0 | unsigned int cmd, i = 0, done = 0, pt_dom; |
3480 | 0 | struct vcpu *curr = current, *v = curr; |
3481 | 0 | struct domain *d = v->domain, *pt_owner = d, *pg_owner; |
3482 | 0 | mfn_t map_mfn = INVALID_MFN; |
3483 | 0 | uint32_t xsm_needed = 0; |
3484 | 0 | uint32_t xsm_checked = 0; |
3485 | 0 | int rc = put_old_guest_table(curr); |
3486 | 0 |
|
3487 | 0 | if ( unlikely(rc) ) |
3488 | 0 | { |
3489 | 0 | if ( likely(rc == -ERESTART) ) |
3490 | 0 | rc = hypercall_create_continuation( |
3491 | 0 | __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, |
3492 | 0 | foreigndom); |
3493 | 0 | return rc; |
3494 | 0 | } |
3495 | 0 |
|
3496 | 0 | if ( unlikely(count == MMU_UPDATE_PREEMPTED) && |
3497 | 0 | likely(guest_handle_is_null(ureqs)) ) |
3498 | 0 | { |
3499 | 0 | /* |
3500 | 0 | * See the curr->arch.old_guest_table related |
3501 | 0 | * hypercall_create_continuation() below. |
3502 | 0 | */ |
3503 | 0 | return (int)foreigndom; |
3504 | 0 | } |
3505 | 0 |
|
3506 | 0 | if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) |
3507 | 0 | { |
3508 | 0 | count &= ~MMU_UPDATE_PREEMPTED; |
3509 | 0 | if ( unlikely(!guest_handle_is_null(pdone)) ) |
3510 | 0 | (void)copy_from_guest(&done, pdone, 1); |
3511 | 0 | } |
3512 | 0 | else |
3513 | 0 | perfc_incr(calls_to_mmu_update); |
3514 | 0 |
|
3515 | 0 | if ( unlikely(!guest_handle_okay(ureqs, count)) ) |
3516 | 0 | return -EFAULT; |
3517 | 0 |
|
3518 | 0 | if ( (pt_dom = foreigndom >> 16) != 0 ) |
3519 | 0 | { |
3520 | 0 | /* Pagetables belong to a foreign domain (PFD). */ |
3521 | 0 | if ( (pt_owner = rcu_lock_domain_by_id(pt_dom - 1)) == NULL ) |
3522 | 0 | return -ESRCH; |
3523 | 0 |
|
3524 | 0 | if ( pt_owner == d ) |
3525 | 0 | rcu_unlock_domain(pt_owner); |
3526 | 0 | else if ( !pt_owner->vcpu || (v = pt_owner->vcpu[0]) == NULL ) |
3527 | 0 | { |
3528 | 0 | rc = -EINVAL; |
3529 | 0 | goto out; |
3530 | 0 | } |
3531 | 0 | } |
3532 | 0 |
|
3533 | 0 | if ( (pg_owner = get_pg_owner((uint16_t)foreigndom)) == NULL ) |
3534 | 0 | { |
3535 | 0 | rc = -ESRCH; |
3536 | 0 | goto out; |
3537 | 0 | } |
3538 | 0 |
|
3539 | 0 | for ( i = 0; i < count; i++ ) |
3540 | 0 | { |
3541 | 0 | if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) ) |
3542 | 0 | { |
3543 | 0 | rc = -ERESTART; |
3544 | 0 | break; |
3545 | 0 | } |
3546 | 0 |
|
3547 | 0 | if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) ) |
3548 | 0 | { |
3549 | 0 | rc = -EFAULT; |
3550 | 0 | break; |
3551 | 0 | } |
3552 | 0 |
|
3553 | 0 | cmd = req.ptr & (sizeof(l1_pgentry_t)-1); |
3554 | 0 |
|
3555 | 0 | switch ( cmd ) |
3556 | 0 | { |
3557 | 0 | /* |
3558 | 0 | * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table. |
3559 | 0 | * MMU_UPDATE_PT_PRESERVE_AD: As above but also preserve (OR) |
3560 | 0 | * current A/D bits. |
3561 | 0 | */ |
3562 | 0 | case MMU_NORMAL_PT_UPDATE: |
3563 | 0 | case MMU_PT_UPDATE_PRESERVE_AD: |
3564 | 0 | { |
3565 | 0 | p2m_type_t p2mt; |
3566 | 0 |
|
3567 | 0 | rc = -EOPNOTSUPP; |
3568 | 0 | if ( unlikely(paging_mode_refcounts(pt_owner)) ) |
3569 | 0 | break; |
3570 | 0 |
|
3571 | 0 | xsm_needed |= XSM_MMU_NORMAL_UPDATE; |
3572 | 0 | if ( get_pte_flags(req.val) & _PAGE_PRESENT ) |
3573 | 0 | { |
3574 | 0 | xsm_needed |= XSM_MMU_UPDATE_READ; |
3575 | 0 | if ( get_pte_flags(req.val) & _PAGE_RW ) |
3576 | 0 | xsm_needed |= XSM_MMU_UPDATE_WRITE; |
3577 | 0 | } |
3578 | 0 | if ( xsm_needed != xsm_checked ) |
3579 | 0 | { |
3580 | 0 | rc = xsm_mmu_update(XSM_TARGET, d, pt_owner, pg_owner, xsm_needed); |
3581 | 0 | if ( rc ) |
3582 | 0 | break; |
3583 | 0 | xsm_checked = xsm_needed; |
3584 | 0 | } |
3585 | 0 | rc = -EINVAL; |
3586 | 0 |
|
3587 | 0 | req.ptr -= cmd; |
3588 | 0 | gmfn = req.ptr >> PAGE_SHIFT; |
3589 | 0 | page = get_page_from_gfn(pt_owner, gmfn, &p2mt, P2M_ALLOC); |
3590 | 0 |
|
3591 | 0 | if ( p2m_is_paged(p2mt) ) |
3592 | 0 | { |
3593 | 0 | ASSERT(!page); |
3594 | 0 | p2m_mem_paging_populate(pt_owner, gmfn); |
3595 | 0 | rc = -ENOENT; |
3596 | 0 | break; |
3597 | 0 | } |
3598 | 0 |
|
3599 | 0 | if ( unlikely(!page) ) |
3600 | 0 | { |
3601 | 0 | gdprintk(XENLOG_WARNING, |
3602 | 0 | "Could not get page for normal update\n"); |
3603 | 0 | break; |
3604 | 0 | } |
3605 | 0 |
|
3606 | 0 | mfn = mfn_x(page_to_mfn(page)); |
3607 | 0 |
|
3608 | 0 | if ( !mfn_eq(_mfn(mfn), map_mfn) ) |
3609 | 0 | { |
3610 | 0 | if ( va ) |
3611 | 0 | unmap_domain_page(va); |
3612 | 0 | va = map_domain_page(_mfn(mfn)); |
3613 | 0 | map_mfn = _mfn(mfn); |
3614 | 0 | } |
3615 | 0 | va = _p(((unsigned long)va & PAGE_MASK) + (req.ptr & ~PAGE_MASK)); |
3616 | 0 |
|
3617 | 0 | if ( page_lock(page) ) |
3618 | 0 | { |
3619 | 0 | switch ( page->u.inuse.type_info & PGT_type_mask ) |
3620 | 0 | { |
3621 | 0 | case PGT_l1_page_table: |
3622 | 0 | rc = mod_l1_entry(va, l1e_from_intpte(req.val), mfn, |
3623 | 0 | cmd == MMU_PT_UPDATE_PRESERVE_AD, v, |
3624 | 0 | pg_owner); |
3625 | 0 | break; |
3626 | 0 | case PGT_l2_page_table: |
3627 | 0 | rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn, |
3628 | 0 | cmd == MMU_PT_UPDATE_PRESERVE_AD, v); |
3629 | 0 | break; |
3630 | 0 | case PGT_l3_page_table: |
3631 | 0 | rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn, |
3632 | 0 | cmd == MMU_PT_UPDATE_PRESERVE_AD, v); |
3633 | 0 | break; |
3634 | 0 | case PGT_l4_page_table: |
3635 | 0 | rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, |
3636 | 0 | cmd == MMU_PT_UPDATE_PRESERVE_AD, v); |
3637 | 0 | break; |
3638 | 0 | case PGT_writable_page: |
3639 | 0 | perfc_incr(writable_mmu_updates); |
3640 | 0 | if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) ) |
3641 | 0 | rc = 0; |
3642 | 0 | break; |
3643 | 0 | } |
3644 | 0 | page_unlock(page); |
3645 | 0 | if ( rc == -EINTR ) |
3646 | 0 | rc = -ERESTART; |
3647 | 0 | } |
3648 | 0 | else if ( get_page_type(page, PGT_writable_page) ) |
3649 | 0 | { |
3650 | 0 | perfc_incr(writable_mmu_updates); |
3651 | 0 | if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) ) |
3652 | 0 | rc = 0; |
3653 | 0 | put_page_type(page); |
3654 | 0 | } |
3655 | 0 |
|
3656 | 0 | put_page(page); |
3657 | 0 | } |
3658 | 0 | break; |
3659 | 0 |
|
3660 | 0 | case MMU_MACHPHYS_UPDATE: |
3661 | 0 | if ( unlikely(d != pt_owner) ) |
3662 | 0 | { |
3663 | 0 | rc = -EPERM; |
3664 | 0 | break; |
3665 | 0 | } |
3666 | 0 |
|
3667 | 0 | if ( unlikely(paging_mode_translate(pg_owner)) ) |
3668 | 0 | { |
3669 | 0 | rc = -EINVAL; |
3670 | 0 | break; |
3671 | 0 | } |
3672 | 0 |
|
3673 | 0 | mfn = req.ptr >> PAGE_SHIFT; |
3674 | 0 | gpfn = req.val; |
3675 | 0 |
|
3676 | 0 | xsm_needed |= XSM_MMU_MACHPHYS_UPDATE; |
3677 | 0 | if ( xsm_needed != xsm_checked ) |
3678 | 0 | { |
3679 | 0 | rc = xsm_mmu_update(XSM_TARGET, d, NULL, pg_owner, xsm_needed); |
3680 | 0 | if ( rc ) |
3681 | 0 | break; |
3682 | 0 | xsm_checked = xsm_needed; |
3683 | 0 | } |
3684 | 0 |
|
3685 | 0 | if ( unlikely(!get_page_from_mfn(_mfn(mfn), pg_owner)) ) |
3686 | 0 | { |
3687 | 0 | gdprintk(XENLOG_WARNING, |
3688 | 0 | "Could not get page for mach->phys update\n"); |
3689 | 0 | rc = -EINVAL; |
3690 | 0 | break; |
3691 | 0 | } |
3692 | 0 |
|
3693 | 0 | set_gpfn_from_mfn(mfn, gpfn); |
3694 | 0 |
|
3695 | 0 | paging_mark_dirty(pg_owner, _mfn(mfn)); |
3696 | 0 |
|
3697 | 0 | put_page(mfn_to_page(_mfn(mfn))); |
3698 | 0 | break; |
3699 | 0 |
|
3700 | 0 | default: |
3701 | 0 | rc = -ENOSYS; |
3702 | 0 | break; |
3703 | 0 | } |
3704 | 0 |
|
3705 | 0 | if ( unlikely(rc) ) |
3706 | 0 | break; |
3707 | 0 |
|
3708 | 0 | guest_handle_add_offset(ureqs, 1); |
3709 | 0 | } |
3710 | 0 |
|
3711 | 0 | if ( rc == -ERESTART ) |
3712 | 0 | { |
3713 | 0 | ASSERT(i < count); |
3714 | 0 | rc = hypercall_create_continuation( |
3715 | 0 | __HYPERVISOR_mmu_update, "hihi", |
3716 | 0 | ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); |
3717 | 0 | } |
3718 | 0 | else if ( curr->arch.old_guest_table ) |
3719 | 0 | { |
3720 | 0 | XEN_GUEST_HANDLE_PARAM(void) null; |
3721 | 0 |
|
3722 | 0 | ASSERT(rc || i == count); |
3723 | 0 | set_xen_guest_handle(null, NULL); |
3724 | 0 | /* |
3725 | 0 | * In order to have a way to communicate the final return value to |
3726 | 0 | * our continuation, we pass this in place of "foreigndom", building |
3727 | 0 | * on the fact that this argument isn't needed anymore. |
3728 | 0 | */ |
3729 | 0 | rc = hypercall_create_continuation( |
3730 | 0 | __HYPERVISOR_mmu_update, "hihi", null, |
3731 | 0 | MMU_UPDATE_PREEMPTED, null, rc); |
3732 | 0 | } |
3733 | 0 |
|
3734 | 0 | put_pg_owner(pg_owner); |
3735 | 0 |
|
3736 | 0 | if ( va ) |
3737 | 0 | unmap_domain_page(va); |
3738 | 0 |
|
3739 | 0 | perfc_add(num_page_updates, i); |
3740 | 0 |
|
3741 | 0 | out: |
3742 | 0 | if ( pt_owner != d ) |
3743 | 0 | rcu_unlock_domain(pt_owner); |
3744 | 0 |
|
3745 | 0 | /* Add incremental work we have done to the @done output parameter. */ |
3746 | 0 | if ( unlikely(!guest_handle_is_null(pdone)) ) |
3747 | 0 | { |
3748 | 0 | done += i; |
3749 | 0 | copy_to_guest(pdone, &done, 1); |
3750 | 0 | } |
3751 | 0 |
|
3752 | 0 | return rc; |
3753 | 0 | } |
3754 | | |
3755 | | int donate_page( |
3756 | | struct domain *d, struct page_info *page, unsigned int memflags) |
3757 | 0 | { |
3758 | 0 | const struct domain *owner = dom_xen; |
3759 | 0 |
|
3760 | 0 | spin_lock(&d->page_alloc_lock); |
3761 | 0 |
|
3762 | 0 | if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != NULL) ) |
3763 | 0 | goto fail; |
3764 | 0 |
|
3765 | 0 | if ( d->is_dying ) |
3766 | 0 | goto fail; |
3767 | 0 |
|
3768 | 0 | if ( page->count_info & ~(PGC_allocated | 1) ) |
3769 | 0 | goto fail; |
3770 | 0 |
|
3771 | 0 | if ( !(memflags & MEMF_no_refcount) ) |
3772 | 0 | { |
3773 | 0 | if ( d->tot_pages >= d->max_pages ) |
3774 | 0 | goto fail; |
3775 | 0 | domain_adjust_tot_pages(d, 1); |
3776 | 0 | } |
3777 | 0 |
|
3778 | 0 | page->count_info = PGC_allocated | 1; |
3779 | 0 | page_set_owner(page, d); |
3780 | 0 | page_list_add_tail(page,&d->page_list); |
3781 | 0 |
|
3782 | 0 | spin_unlock(&d->page_alloc_lock); |
3783 | 0 | return 0; |
3784 | 0 |
|
3785 | 0 | fail: |
3786 | 0 | spin_unlock(&d->page_alloc_lock); |
3787 | 0 | gdprintk(XENLOG_WARNING, "Bad donate mfn %" PRI_mfn |
3788 | 0 | " to d%d (owner d%d) caf=%08lx taf=%" PRtype_info "\n", |
3789 | 0 | mfn_x(page_to_mfn(page)), d->domain_id, |
3790 | 0 | owner ? owner->domain_id : DOMID_INVALID, |
3791 | 0 | page->count_info, page->u.inuse.type_info); |
3792 | 0 | return -EINVAL; |
3793 | 0 | } |
3794 | | |
3795 | | int steal_page( |
3796 | | struct domain *d, struct page_info *page, unsigned int memflags) |
3797 | 0 | { |
3798 | 0 | unsigned long x, y; |
3799 | 0 | bool drop_dom_ref = false; |
3800 | 0 | const struct domain *owner = dom_xen; |
3801 | 0 |
|
3802 | 0 | if ( paging_mode_external(d) ) |
3803 | 0 | return -EOPNOTSUPP; |
3804 | 0 |
|
3805 | 0 | spin_lock(&d->page_alloc_lock); |
3806 | 0 |
|
3807 | 0 | if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != d) ) |
3808 | 0 | goto fail; |
3809 | 0 |
|
3810 | 0 | /* |
3811 | 0 | * We require there is just one reference (PGC_allocated). We temporarily |
3812 | 0 | * drop this reference now so that we can safely swizzle the owner. |
3813 | 0 | */ |
3814 | 0 | y = page->count_info; |
3815 | 0 | do { |
3816 | 0 | x = y; |
3817 | 0 | if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) ) |
3818 | 0 | goto fail; |
3819 | 0 | y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask); |
3820 | 0 | } while ( y != x ); |
3821 | 0 |
|
3822 | 0 | /* |
3823 | 0 | * With the sole reference dropped temporarily, no-one can update type |
3824 | 0 | * information. Type count also needs to be zero in this case, but e.g. |
3825 | 0 | * PGT_seg_desc_page may still have PGT_validated set, which we need to |
3826 | 0 | * clear before transferring ownership (as validation criteria vary |
3827 | 0 | * depending on domain type). |
3828 | 0 | */ |
3829 | 0 | BUG_ON(page->u.inuse.type_info & (PGT_count_mask | PGT_locked | |
3830 | 0 | PGT_pinned)); |
3831 | 0 | page->u.inuse.type_info = 0; |
3832 | 0 |
|
3833 | 0 | /* Swizzle the owner then reinstate the PGC_allocated reference. */ |
3834 | 0 | page_set_owner(page, NULL); |
3835 | 0 | y = page->count_info; |
3836 | 0 | do { |
3837 | 0 | x = y; |
3838 | 0 | BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated); |
3839 | 0 | } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x ); |
3840 | 0 |
|
3841 | 0 | /* Unlink from original owner. */ |
3842 | 0 | if ( !(memflags & MEMF_no_refcount) && !domain_adjust_tot_pages(d, -1) ) |
3843 | 0 | drop_dom_ref = true; |
3844 | 0 | page_list_del(page, &d->page_list); |
3845 | 0 |
|
3846 | 0 | spin_unlock(&d->page_alloc_lock); |
3847 | 0 | if ( unlikely(drop_dom_ref) ) |
3848 | 0 | put_domain(d); |
3849 | 0 | return 0; |
3850 | 0 |
|
3851 | 0 | fail: |
3852 | 0 | spin_unlock(&d->page_alloc_lock); |
3853 | 0 | gdprintk(XENLOG_WARNING, "Bad steal mfn %" PRI_mfn |
3854 | 0 | " from d%d (owner d%d) caf=%08lx taf=%" PRtype_info "\n", |
3855 | 0 | mfn_x(page_to_mfn(page)), d->domain_id, |
3856 | 0 | owner ? owner->domain_id : DOMID_INVALID, |
3857 | 0 | page->count_info, page->u.inuse.type_info); |
3858 | 0 | return -EINVAL; |
3859 | 0 | } |
3860 | | |
3861 | | static int __do_update_va_mapping( |
3862 | | unsigned long va, u64 val64, unsigned long flags, struct domain *pg_owner) |
3863 | 0 | { |
3864 | 0 | l1_pgentry_t val = l1e_from_intpte(val64); |
3865 | 0 | struct vcpu *v = current; |
3866 | 0 | struct domain *d = v->domain; |
3867 | 0 | struct page_info *gl1pg; |
3868 | 0 | l1_pgentry_t *pl1e; |
3869 | 0 | unsigned long bmap_ptr; |
3870 | 0 | mfn_t gl1mfn; |
3871 | 0 | cpumask_t *mask = NULL; |
3872 | 0 | int rc; |
3873 | 0 |
|
3874 | 0 | perfc_incr(calls_to_update_va); |
3875 | 0 |
|
3876 | 0 | rc = xsm_update_va_mapping(XSM_TARGET, d, pg_owner, val); |
3877 | 0 | if ( rc ) |
3878 | 0 | return rc; |
3879 | 0 |
|
3880 | 0 | rc = -EINVAL; |
3881 | 0 | pl1e = map_guest_l1e(va, &gl1mfn); |
3882 | 0 | if ( unlikely(!pl1e || !get_page_from_mfn(gl1mfn, d)) ) |
3883 | 0 | goto out; |
3884 | 0 |
|
3885 | 0 | gl1pg = mfn_to_page(gl1mfn); |
3886 | 0 | if ( !page_lock(gl1pg) ) |
3887 | 0 | { |
3888 | 0 | put_page(gl1pg); |
3889 | 0 | goto out; |
3890 | 0 | } |
3891 | 0 |
|
3892 | 0 | if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) |
3893 | 0 | { |
3894 | 0 | page_unlock(gl1pg); |
3895 | 0 | put_page(gl1pg); |
3896 | 0 | goto out; |
3897 | 0 | } |
3898 | 0 |
|
3899 | 0 | rc = mod_l1_entry(pl1e, val, mfn_x(gl1mfn), 0, v, pg_owner); |
3900 | 0 |
|
3901 | 0 | page_unlock(gl1pg); |
3902 | 0 | put_page(gl1pg); |
3903 | 0 |
|
3904 | 0 | out: |
3905 | 0 | if ( pl1e ) |
3906 | 0 | unmap_domain_page(pl1e); |
3907 | 0 |
|
3908 | 0 | switch ( flags & UVMF_FLUSHTYPE_MASK ) |
3909 | 0 | { |
3910 | 0 | case UVMF_TLB_FLUSH: |
3911 | 0 | switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) |
3912 | 0 | { |
3913 | 0 | case UVMF_LOCAL: |
3914 | 0 | flush_tlb_local(); |
3915 | 0 | break; |
3916 | 0 | case UVMF_ALL: |
3917 | 0 | mask = d->domain_dirty_cpumask; |
3918 | 0 | break; |
3919 | 0 | default: |
3920 | 0 | mask = this_cpu(scratch_cpumask); |
3921 | 0 | rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr, |
3922 | 0 | void), |
3923 | 0 | mask); |
3924 | 0 | break; |
3925 | 0 | } |
3926 | 0 | if ( mask ) |
3927 | 0 | flush_tlb_mask(mask); |
3928 | 0 | break; |
3929 | 0 |
|
3930 | 0 | case UVMF_INVLPG: |
3931 | 0 | switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) |
3932 | 0 | { |
3933 | 0 | case UVMF_LOCAL: |
3934 | 0 | paging_invlpg(v, va); |
3935 | 0 | break; |
3936 | 0 | case UVMF_ALL: |
3937 | 0 | mask = d->domain_dirty_cpumask; |
3938 | 0 | break; |
3939 | 0 | default: |
3940 | 0 | mask = this_cpu(scratch_cpumask); |
3941 | 0 | rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr, |
3942 | 0 | void), |
3943 | 0 | mask); |
3944 | 0 | break; |
3945 | 0 | } |
3946 | 0 | if ( mask ) |
3947 | 0 | flush_tlb_one_mask(mask, va); |
3948 | 0 | break; |
3949 | 0 | } |
3950 | 0 |
|
3951 | 0 | return rc; |
3952 | 0 | } |
3953 | | |
3954 | | long do_update_va_mapping(unsigned long va, u64 val64, |
3955 | | unsigned long flags) |
3956 | 0 | { |
3957 | 0 | return __do_update_va_mapping(va, val64, flags, current->domain); |
3958 | 0 | } |
3959 | | |
3960 | | long do_update_va_mapping_otherdomain(unsigned long va, u64 val64, |
3961 | | unsigned long flags, |
3962 | | domid_t domid) |
3963 | 0 | { |
3964 | 0 | struct domain *pg_owner; |
3965 | 0 | int rc; |
3966 | 0 |
|
3967 | 0 | if ( (pg_owner = get_pg_owner(domid)) == NULL ) |
3968 | 0 | return -ESRCH; |
3969 | 0 |
|
3970 | 0 | rc = __do_update_va_mapping(va, val64, flags, pg_owner); |
3971 | 0 |
|
3972 | 0 | put_pg_owner(pg_owner); |
3973 | 0 |
|
3974 | 0 | return rc; |
3975 | 0 | } |
3976 | | |
3977 | | typedef struct e820entry e820entry_t; |
3978 | | DEFINE_XEN_GUEST_HANDLE(e820entry_t); |
3979 | | |
3980 | | struct memory_map_context |
3981 | | { |
3982 | | unsigned int n; |
3983 | | unsigned long s; |
3984 | | struct xen_memory_map map; |
3985 | | }; |
3986 | | |
3987 | | static int _handle_iomem_range(unsigned long s, unsigned long e, |
3988 | | struct memory_map_context *ctxt) |
3989 | 0 | { |
3990 | 0 | if ( s > ctxt->s && !(s >> (paddr_bits - PAGE_SHIFT)) ) |
3991 | 0 | { |
3992 | 0 | e820entry_t ent; |
3993 | 0 | XEN_GUEST_HANDLE_PARAM(e820entry_t) buffer_param; |
3994 | 0 | XEN_GUEST_HANDLE(e820entry_t) buffer; |
3995 | 0 |
|
3996 | 0 | if ( !guest_handle_is_null(ctxt->map.buffer) ) |
3997 | 0 | { |
3998 | 0 | if ( ctxt->n + 1 >= ctxt->map.nr_entries ) |
3999 | 0 | return -EINVAL; |
4000 | 0 | ent.addr = (uint64_t)ctxt->s << PAGE_SHIFT; |
4001 | 0 | ent.size = (uint64_t)(s - ctxt->s) << PAGE_SHIFT; |
4002 | 0 | ent.type = E820_RESERVED; |
4003 | 0 | buffer_param = guest_handle_cast(ctxt->map.buffer, e820entry_t); |
4004 | 0 | buffer = guest_handle_from_param(buffer_param, e820entry_t); |
4005 | 0 | if ( __copy_to_guest_offset(buffer, ctxt->n, &ent, 1) ) |
4006 | 0 | return -EFAULT; |
4007 | 0 | } |
4008 | 0 | ctxt->n++; |
4009 | 0 | } |
4010 | 0 | ctxt->s = e + 1; |
4011 | 0 |
|
4012 | 0 | return 0; |
4013 | 0 | } |
4014 | | |
4015 | | static int handle_iomem_range(unsigned long s, unsigned long e, void *p) |
4016 | 0 | { |
4017 | 0 | int err = 0; |
4018 | 0 |
|
4019 | 0 | do { |
4020 | 0 | unsigned long low = -1UL; |
4021 | 0 | unsigned int i; |
4022 | 0 |
|
4023 | 0 | for ( i = 0; i < nr_ioapics; ++i ) |
4024 | 0 | { |
4025 | 0 | unsigned long mfn = paddr_to_pfn(mp_ioapics[i].mpc_apicaddr); |
4026 | 0 |
|
4027 | 0 | if ( mfn >= s && mfn <= e && mfn < low ) |
4028 | 0 | low = mfn; |
4029 | 0 | } |
4030 | 0 | if ( !(low + 1) ) |
4031 | 0 | break; |
4032 | 0 | if ( s < low ) |
4033 | 0 | err = _handle_iomem_range(s, low - 1, p); |
4034 | 0 | s = low + 1; |
4035 | 0 | } while ( !err ); |
4036 | 0 |
|
4037 | 0 | return err || s > e ? err : _handle_iomem_range(s, e, p); |
4038 | 0 | } |
4039 | | |
4040 | | int xenmem_add_to_physmap_one( |
4041 | | struct domain *d, |
4042 | | unsigned int space, |
4043 | | union xen_add_to_physmap_batch_extra extra, |
4044 | | unsigned long idx, |
4045 | | gfn_t gpfn) |
4046 | 3 | { |
4047 | 3 | struct page_info *page = NULL; |
4048 | 3 | unsigned long gfn = 0; /* gcc ... */ |
4049 | 3 | unsigned long prev_mfn, old_gpfn; |
4050 | 3 | int rc = 0; |
4051 | 3 | mfn_t mfn = INVALID_MFN; |
4052 | 3 | p2m_type_t p2mt; |
4053 | 3 | |
4054 | 3 | switch ( space ) |
4055 | 3 | { |
4056 | 2 | case XENMAPSPACE_shared_info: |
4057 | 2 | if ( idx == 0 ) |
4058 | 2 | mfn = _mfn(virt_to_mfn(d->shared_info)); |
4059 | 2 | break; |
4060 | 1 | case XENMAPSPACE_grant_table: |
4061 | 1 | rc = gnttab_map_frame(d, idx, gpfn, &mfn); |
4062 | 1 | if ( rc ) |
4063 | 0 | return rc; |
4064 | 1 | break; |
4065 | 0 | case XENMAPSPACE_gmfn_range: |
4066 | 0 | case XENMAPSPACE_gmfn: |
4067 | 0 | { |
4068 | 0 | p2m_type_t p2mt; |
4069 | 0 |
|
4070 | 0 | gfn = idx; |
4071 | 0 | idx = mfn_x(get_gfn_unshare(d, idx, &p2mt)); |
4072 | 0 | /* If the page is still shared, exit early */ |
4073 | 0 | if ( p2m_is_shared(p2mt) ) |
4074 | 0 | { |
4075 | 0 | put_gfn(d, gfn); |
4076 | 0 | return -ENOMEM; |
4077 | 0 | } |
4078 | 0 | if ( !get_page_from_mfn(_mfn(idx), d) ) |
4079 | 0 | break; |
4080 | 0 | mfn = _mfn(idx); |
4081 | 0 | page = mfn_to_page(mfn); |
4082 | 0 | break; |
4083 | 0 | } |
4084 | 0 | case XENMAPSPACE_gmfn_foreign: |
4085 | 0 | return p2m_add_foreign(d, idx, gfn_x(gpfn), extra.foreign_domid); |
4086 | 0 | default: |
4087 | 0 | break; |
4088 | 3 | } |
4089 | 3 | |
4090 | 3 | if ( !paging_mode_translate(d) || mfn_eq(mfn, INVALID_MFN) ) |
4091 | 0 | { |
4092 | 0 | rc = -EINVAL; |
4093 | 0 | goto put_both; |
4094 | 0 | } |
4095 | 3 | |
4096 | 3 | /* Remove previously mapped page if it was present. */ |
4097 | 3 | prev_mfn = mfn_x(get_gfn(d, gfn_x(gpfn), &p2mt)); |
4098 | 3 | if ( mfn_valid(_mfn(prev_mfn)) ) |
4099 | 2 | { |
4100 | 2 | if ( is_xen_heap_mfn(prev_mfn) ) |
4101 | 2 | /* Xen heap frames are simply unhooked from this phys slot. */ |
4102 | 1 | rc = guest_physmap_remove_page(d, gpfn, _mfn(prev_mfn), PAGE_ORDER_4K); |
4103 | 2 | else |
4104 | 2 | /* Normal domain memory is freed, to avoid leaking memory. */ |
4105 | 1 | rc = guest_remove_page(d, gfn_x(gpfn)); |
4106 | 2 | } |
4107 | 3 | /* In the XENMAPSPACE_gmfn case we still hold a ref on the old page. */ |
4108 | 3 | put_gfn(d, gfn_x(gpfn)); |
4109 | 3 | |
4110 | 3 | if ( rc ) |
4111 | 0 | goto put_both; |
4112 | 3 | |
4113 | 3 | /* Unmap from old location, if any. */ |
4114 | 3 | old_gpfn = get_gpfn_from_mfn(mfn_x(mfn)); |
4115 | 3 | ASSERT( old_gpfn != SHARED_M2P_ENTRY ); |
4116 | 3 | if ( space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range ) |
4117 | 0 | ASSERT( old_gpfn == gfn ); |
4118 | 3 | if ( old_gpfn != INVALID_M2P_ENTRY ) |
4119 | 0 | rc = guest_physmap_remove_page(d, _gfn(old_gpfn), mfn, PAGE_ORDER_4K); |
4120 | 3 | |
4121 | 3 | /* Map at new location. */ |
4122 | 3 | if ( !rc ) |
4123 | 3 | rc = guest_physmap_add_page(d, gpfn, mfn, PAGE_ORDER_4K); |
4124 | 3 | |
4125 | 3 | put_both: |
4126 | 3 | /* In the XENMAPSPACE_gmfn, we took a ref of the gfn at the top */ |
4127 | 3 | if ( space == XENMAPSPACE_gmfn || space == XENMAPSPACE_gmfn_range ) |
4128 | 0 | put_gfn(d, gfn); |
4129 | 3 | |
4130 | 3 | if ( page ) |
4131 | 0 | put_page(page); |
4132 | 3 | |
4133 | 3 | return rc; |
4134 | 3 | } |
4135 | | |
4136 | | long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) |
4137 | 1 | { |
4138 | 1 | int rc; |
4139 | 1 | |
4140 | 1 | switch ( cmd ) |
4141 | 1 | { |
4142 | 0 | case XENMEM_set_memory_map: |
4143 | 0 | { |
4144 | 0 | struct xen_foreign_memory_map fmap; |
4145 | 0 | struct domain *d; |
4146 | 0 | struct e820entry *e820; |
4147 | 0 |
|
4148 | 0 | if ( copy_from_guest(&fmap, arg, 1) ) |
4149 | 0 | return -EFAULT; |
4150 | 0 |
|
4151 | 0 | if ( fmap.map.nr_entries > E820MAX ) |
4152 | 0 | return -EINVAL; |
4153 | 0 |
|
4154 | 0 | d = rcu_lock_domain_by_any_id(fmap.domid); |
4155 | 0 | if ( d == NULL ) |
4156 | 0 | return -ESRCH; |
4157 | 0 |
|
4158 | 0 | rc = xsm_domain_memory_map(XSM_TARGET, d); |
4159 | 0 | if ( rc ) |
4160 | 0 | { |
4161 | 0 | rcu_unlock_domain(d); |
4162 | 0 | return rc; |
4163 | 0 | } |
4164 | 0 |
|
4165 | 0 | e820 = xmalloc_array(e820entry_t, fmap.map.nr_entries); |
4166 | 0 | if ( e820 == NULL ) |
4167 | 0 | { |
4168 | 0 | rcu_unlock_domain(d); |
4169 | 0 | return -ENOMEM; |
4170 | 0 | } |
4171 | 0 |
|
4172 | 0 | if ( copy_from_guest(e820, fmap.map.buffer, fmap.map.nr_entries) ) |
4173 | 0 | { |
4174 | 0 | xfree(e820); |
4175 | 0 | rcu_unlock_domain(d); |
4176 | 0 | return -EFAULT; |
4177 | 0 | } |
4178 | 0 |
|
4179 | 0 | spin_lock(&d->arch.e820_lock); |
4180 | 0 | xfree(d->arch.e820); |
4181 | 0 | d->arch.e820 = e820; |
4182 | 0 | d->arch.nr_e820 = fmap.map.nr_entries; |
4183 | 0 | spin_unlock(&d->arch.e820_lock); |
4184 | 0 |
|
4185 | 0 | rcu_unlock_domain(d); |
4186 | 0 | return rc; |
4187 | 0 | } |
4188 | 0 |
|
4189 | 1 | case XENMEM_memory_map: |
4190 | 1 | { |
4191 | 1 | struct xen_memory_map map; |
4192 | 1 | struct domain *d = current->domain; |
4193 | 1 | |
4194 | 1 | if ( copy_from_guest(&map, arg, 1) ) |
4195 | 0 | return -EFAULT; |
4196 | 1 | |
4197 | 1 | spin_lock(&d->arch.e820_lock); |
4198 | 1 | |
4199 | 1 | /* Backwards compatibility. */ |
4200 | 1 | if ( (d->arch.nr_e820 == 0) || (d->arch.e820 == NULL) ) |
4201 | 0 | { |
4202 | 0 | spin_unlock(&d->arch.e820_lock); |
4203 | 0 | return -ENOSYS; |
4204 | 0 | } |
4205 | 1 | |
4206 | 1 | map.nr_entries = min(map.nr_entries, d->arch.nr_e820); |
4207 | 1 | if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) || |
4208 | 1 | __copy_to_guest(arg, &map, 1) ) |
4209 | 0 | { |
4210 | 0 | spin_unlock(&d->arch.e820_lock); |
4211 | 0 | return -EFAULT; |
4212 | 0 | } |
4213 | 1 | |
4214 | 1 | spin_unlock(&d->arch.e820_lock); |
4215 | 1 | return 0; |
4216 | 1 | } |
4217 | 1 | |
4218 | 0 | case XENMEM_machine_memory_map: |
4219 | 0 | { |
4220 | 0 | struct memory_map_context ctxt; |
4221 | 0 | XEN_GUEST_HANDLE(e820entry_t) buffer; |
4222 | 0 | XEN_GUEST_HANDLE_PARAM(e820entry_t) buffer_param; |
4223 | 0 | unsigned int i; |
4224 | 0 | bool store; |
4225 | 0 |
|
4226 | 0 | rc = xsm_machine_memory_map(XSM_PRIV); |
4227 | 0 | if ( rc ) |
4228 | 0 | return rc; |
4229 | 0 |
|
4230 | 0 | if ( copy_from_guest(&ctxt.map, arg, 1) ) |
4231 | 0 | return -EFAULT; |
4232 | 0 |
|
4233 | 0 | store = !guest_handle_is_null(ctxt.map.buffer); |
4234 | 0 |
|
4235 | 0 | if ( store && ctxt.map.nr_entries < e820.nr_map + 1 ) |
4236 | 0 | return -EINVAL; |
4237 | 0 |
|
4238 | 0 | buffer_param = guest_handle_cast(ctxt.map.buffer, e820entry_t); |
4239 | 0 | buffer = guest_handle_from_param(buffer_param, e820entry_t); |
4240 | 0 | if ( store && !guest_handle_okay(buffer, ctxt.map.nr_entries) ) |
4241 | 0 | return -EFAULT; |
4242 | 0 |
|
4243 | 0 | for ( i = 0, ctxt.n = 0, ctxt.s = 0; i < e820.nr_map; ++i, ++ctxt.n ) |
4244 | 0 | { |
4245 | 0 | unsigned long s = PFN_DOWN(e820.map[i].addr); |
4246 | 0 |
|
4247 | 0 | if ( s > ctxt.s ) |
4248 | 0 | { |
4249 | 0 | rc = rangeset_report_ranges(current->domain->iomem_caps, |
4250 | 0 | ctxt.s, s - 1, |
4251 | 0 | handle_iomem_range, &ctxt); |
4252 | 0 | if ( !rc ) |
4253 | 0 | rc = handle_iomem_range(s, s, &ctxt); |
4254 | 0 | if ( rc ) |
4255 | 0 | return rc; |
4256 | 0 | } |
4257 | 0 | if ( store ) |
4258 | 0 | { |
4259 | 0 | if ( ctxt.map.nr_entries <= ctxt.n + (e820.nr_map - i) ) |
4260 | 0 | return -EINVAL; |
4261 | 0 | if ( __copy_to_guest_offset(buffer, ctxt.n, e820.map + i, 1) ) |
4262 | 0 | return -EFAULT; |
4263 | 0 | } |
4264 | 0 | ctxt.s = PFN_UP(e820.map[i].addr + e820.map[i].size); |
4265 | 0 | } |
4266 | 0 |
|
4267 | 0 | if ( ctxt.s ) |
4268 | 0 | { |
4269 | 0 | rc = rangeset_report_ranges(current->domain->iomem_caps, ctxt.s, |
4270 | 0 | ~0UL, handle_iomem_range, &ctxt); |
4271 | 0 | if ( !rc && ctxt.s ) |
4272 | 0 | rc = handle_iomem_range(~0UL, ~0UL, &ctxt); |
4273 | 0 | if ( rc ) |
4274 | 0 | return rc; |
4275 | 0 | } |
4276 | 0 |
|
4277 | 0 | ctxt.map.nr_entries = ctxt.n; |
4278 | 0 |
|
4279 | 0 | if ( __copy_to_guest(arg, &ctxt.map, 1) ) |
4280 | 0 | return -EFAULT; |
4281 | 0 |
|
4282 | 0 | return 0; |
4283 | 0 | } |
4284 | 0 |
|
4285 | 0 | case XENMEM_machphys_mapping: |
4286 | 0 | { |
4287 | 0 | struct xen_machphys_mapping mapping = { |
4288 | 0 | .v_start = MACH2PHYS_VIRT_START, |
4289 | 0 | .v_end = MACH2PHYS_VIRT_END, |
4290 | 0 | .max_mfn = MACH2PHYS_NR_ENTRIES - 1 |
4291 | 0 | }; |
4292 | 0 |
|
4293 | 0 | if ( !mem_hotplug && is_hardware_domain(current->domain) ) |
4294 | 0 | mapping.max_mfn = max_page - 1; |
4295 | 0 | if ( copy_to_guest(arg, &mapping, 1) ) |
4296 | 0 | return -EFAULT; |
4297 | 0 |
|
4298 | 0 | return 0; |
4299 | 0 | } |
4300 | 0 |
|
4301 | 0 | case XENMEM_set_pod_target: |
4302 | 0 | case XENMEM_get_pod_target: |
4303 | 0 | { |
4304 | 0 | xen_pod_target_t target; |
4305 | 0 | struct domain *d; |
4306 | 0 | struct p2m_domain *p2m; |
4307 | 0 |
|
4308 | 0 | if ( copy_from_guest(&target, arg, 1) ) |
4309 | 0 | return -EFAULT; |
4310 | 0 |
|
4311 | 0 | d = rcu_lock_domain_by_any_id(target.domid); |
4312 | 0 | if ( d == NULL ) |
4313 | 0 | return -ESRCH; |
4314 | 0 |
|
4315 | 0 | if ( cmd == XENMEM_set_pod_target ) |
4316 | 0 | rc = xsm_set_pod_target(XSM_PRIV, d); |
4317 | 0 | else |
4318 | 0 | rc = xsm_get_pod_target(XSM_PRIV, d); |
4319 | 0 |
|
4320 | 0 | if ( rc != 0 ) |
4321 | 0 | goto pod_target_out_unlock; |
4322 | 0 |
|
4323 | 0 | if ( cmd == XENMEM_set_pod_target ) |
4324 | 0 | { |
4325 | 0 | if ( target.target_pages > d->max_pages ) |
4326 | 0 | { |
4327 | 0 | rc = -EINVAL; |
4328 | 0 | goto pod_target_out_unlock; |
4329 | 0 | } |
4330 | 0 |
|
4331 | 0 | rc = p2m_pod_set_mem_target(d, target.target_pages); |
4332 | 0 | } |
4333 | 0 |
|
4334 | 0 | if ( rc == -ERESTART ) |
4335 | 0 | { |
4336 | 0 | rc = hypercall_create_continuation( |
4337 | 0 | __HYPERVISOR_memory_op, "lh", cmd, arg); |
4338 | 0 | } |
4339 | 0 | else if ( rc >= 0 ) |
4340 | 0 | { |
4341 | 0 | p2m = p2m_get_hostp2m(d); |
4342 | 0 | target.tot_pages = d->tot_pages; |
4343 | 0 | target.pod_cache_pages = p2m->pod.count; |
4344 | 0 | target.pod_entries = p2m->pod.entry_count; |
4345 | 0 |
|
4346 | 0 | if ( __copy_to_guest(arg, &target, 1) ) |
4347 | 0 | { |
4348 | 0 | rc= -EFAULT; |
4349 | 0 | goto pod_target_out_unlock; |
4350 | 0 | } |
4351 | 0 | } |
4352 | 0 |
|
4353 | 0 | pod_target_out_unlock: |
4354 | 0 | rcu_unlock_domain(d); |
4355 | 0 | return rc; |
4356 | 0 | } |
4357 | 0 |
|
4358 | 0 | default: |
4359 | 0 | return subarch_memory_op(cmd, arg); |
4360 | 1 | } |
4361 | 1 | |
4362 | 0 | return 0; |
4363 | 1 | } |
4364 | | |
4365 | | int mmio_ro_emulated_write( |
4366 | | enum x86_segment seg, |
4367 | | unsigned long offset, |
4368 | | void *p_data, |
4369 | | unsigned int bytes, |
4370 | | struct x86_emulate_ctxt *ctxt) |
4371 | 0 | { |
4372 | 0 | struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = ctxt->data; |
4373 | 0 |
|
4374 | 0 | /* Only allow naturally-aligned stores at the original %cr2 address. */ |
4375 | 0 | if ( ((bytes | offset) & (bytes - 1)) || !bytes || |
4376 | 0 | offset != mmio_ro_ctxt->cr2 ) |
4377 | 0 | { |
4378 | 0 | gdprintk(XENLOG_WARNING, "bad access (cr2=%lx, addr=%lx, bytes=%u)\n", |
4379 | 0 | mmio_ro_ctxt->cr2, offset, bytes); |
4380 | 0 | return X86EMUL_UNHANDLEABLE; |
4381 | 0 | } |
4382 | 0 |
|
4383 | 0 | return X86EMUL_OKAY; |
4384 | 0 | } |
4385 | | |
4386 | | int mmcfg_intercept_write( |
4387 | | enum x86_segment seg, |
4388 | | unsigned long offset, |
4389 | | void *p_data, |
4390 | | unsigned int bytes, |
4391 | | struct x86_emulate_ctxt *ctxt) |
4392 | 0 | { |
4393 | 0 | struct mmio_ro_emulate_ctxt *mmio_ctxt = ctxt->data; |
4394 | 0 |
|
4395 | 0 | /* |
4396 | 0 | * Only allow naturally-aligned stores no wider than 4 bytes to the |
4397 | 0 | * original %cr2 address. |
4398 | 0 | */ |
4399 | 0 | if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 || !bytes || |
4400 | 0 | offset != mmio_ctxt->cr2 ) |
4401 | 0 | { |
4402 | 0 | gdprintk(XENLOG_WARNING, "bad write (cr2=%lx, addr=%lx, bytes=%u)\n", |
4403 | 0 | mmio_ctxt->cr2, offset, bytes); |
4404 | 0 | return X86EMUL_UNHANDLEABLE; |
4405 | 0 | } |
4406 | 0 |
|
4407 | 0 | offset &= 0xfff; |
4408 | 0 | if ( pci_conf_write_intercept(mmio_ctxt->seg, mmio_ctxt->bdf, |
4409 | 0 | offset, bytes, p_data) >= 0 ) |
4410 | 0 | pci_mmcfg_write(mmio_ctxt->seg, PCI_BUS(mmio_ctxt->bdf), |
4411 | 0 | PCI_DEVFN2(mmio_ctxt->bdf), offset, bytes, |
4412 | 0 | *(uint32_t *)p_data); |
4413 | 0 |
|
4414 | 0 | return X86EMUL_OKAY; |
4415 | 0 | } |
4416 | | |
4417 | | void *alloc_xen_pagetable(void) |
4418 | 31 | { |
4419 | 31 | if ( system_state != SYS_STATE_early_boot ) |
4420 | 18 | { |
4421 | 18 | void *ptr = alloc_xenheap_page(); |
4422 | 18 | |
4423 | 18 | BUG_ON(!hardware_domain && !ptr); |
4424 | 18 | return ptr; |
4425 | 18 | } |
4426 | 31 | |
4427 | 13 | return mfn_to_virt(mfn_x(alloc_boot_pages(1, 1))); |
4428 | 31 | } |
4429 | | |
4430 | | void free_xen_pagetable(void *v) |
4431 | 3 | { |
4432 | 3 | if ( system_state != SYS_STATE_early_boot ) |
4433 | 0 | free_xenheap_page(v); |
4434 | 3 | } |
4435 | | |
4436 | | static DEFINE_SPINLOCK(map_pgdir_lock); |
4437 | | |
4438 | | static l3_pgentry_t *virt_to_xen_l3e(unsigned long v) |
4439 | 12.7k | { |
4440 | 12.7k | l4_pgentry_t *pl4e; |
4441 | 12.7k | |
4442 | 12.7k | pl4e = &idle_pg_table[l4_table_offset(v)]; |
4443 | 12.7k | if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) |
4444 | 2 | { |
4445 | 2 | bool locking = system_state > SYS_STATE_boot; |
4446 | 2 | l3_pgentry_t *pl3e = alloc_xen_pagetable(); |
4447 | 2 | |
4448 | 2 | if ( !pl3e ) |
4449 | 0 | return NULL; |
4450 | 2 | clear_page(pl3e); |
4451 | 2 | if ( locking ) |
4452 | 0 | spin_lock(&map_pgdir_lock); |
4453 | 2 | if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) |
4454 | 2 | { |
4455 | 2 | l4_pgentry_t l4e = l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR); |
4456 | 2 | |
4457 | 2 | l4e_write(pl4e, l4e); |
4458 | 2 | efi_update_l4_pgtable(l4_table_offset(v), l4e); |
4459 | 2 | pl3e = NULL; |
4460 | 2 | } |
4461 | 2 | if ( locking ) |
4462 | 0 | spin_unlock(&map_pgdir_lock); |
4463 | 2 | if ( pl3e ) |
4464 | 0 | free_xen_pagetable(pl3e); |
4465 | 2 | } |
4466 | 12.7k | |
4467 | 12.7k | return l4e_to_l3e(*pl4e) + l3_table_offset(v); |
4468 | 12.7k | } |
4469 | | |
4470 | | static l2_pgentry_t *virt_to_xen_l2e(unsigned long v) |
4471 | 4.60k | { |
4472 | 4.60k | l3_pgentry_t *pl3e; |
4473 | 4.60k | |
4474 | 4.60k | pl3e = virt_to_xen_l3e(v); |
4475 | 4.60k | if ( !pl3e ) |
4476 | 0 | return NULL; |
4477 | 4.60k | |
4478 | 4.60k | if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) |
4479 | 7 | { |
4480 | 7 | bool locking = system_state > SYS_STATE_boot; |
4481 | 7 | l2_pgentry_t *pl2e = alloc_xen_pagetable(); |
4482 | 7 | |
4483 | 7 | if ( !pl2e ) |
4484 | 0 | return NULL; |
4485 | 7 | clear_page(pl2e); |
4486 | 7 | if ( locking ) |
4487 | 0 | spin_lock(&map_pgdir_lock); |
4488 | 7 | if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) |
4489 | 7 | { |
4490 | 7 | l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); |
4491 | 7 | pl2e = NULL; |
4492 | 7 | } |
4493 | 7 | if ( locking ) |
4494 | 0 | spin_unlock(&map_pgdir_lock); |
4495 | 7 | if ( pl2e ) |
4496 | 0 | free_xen_pagetable(pl2e); |
4497 | 7 | } |
4498 | 4.60k | |
4499 | 4.60k | BUG_ON(l3e_get_flags(*pl3e) & _PAGE_PSE); |
4500 | 4.60k | return l3e_to_l2e(*pl3e) + l2_table_offset(v); |
4501 | 4.60k | } |
4502 | | |
4503 | | l1_pgentry_t *virt_to_xen_l1e(unsigned long v) |
4504 | 13 | { |
4505 | 13 | l2_pgentry_t *pl2e; |
4506 | 13 | |
4507 | 13 | pl2e = virt_to_xen_l2e(v); |
4508 | 13 | if ( !pl2e ) |
4509 | 0 | return NULL; |
4510 | 13 | |
4511 | 13 | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) |
4512 | 13 | { |
4513 | 13 | bool locking = system_state > SYS_STATE_boot; |
4514 | 13 | l1_pgentry_t *pl1e = alloc_xen_pagetable(); |
4515 | 13 | |
4516 | 13 | if ( !pl1e ) |
4517 | 0 | return NULL; |
4518 | 13 | clear_page(pl1e); |
4519 | 13 | if ( locking ) |
4520 | 0 | spin_lock(&map_pgdir_lock); |
4521 | 13 | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) |
4522 | 13 | { |
4523 | 13 | l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR)); |
4524 | 13 | pl1e = NULL; |
4525 | 13 | } |
4526 | 13 | if ( locking ) |
4527 | 0 | spin_unlock(&map_pgdir_lock); |
4528 | 13 | if ( pl1e ) |
4529 | 0 | free_xen_pagetable(pl1e); |
4530 | 13 | } |
4531 | 13 | |
4532 | 13 | BUG_ON(l2e_get_flags(*pl2e) & _PAGE_PSE); |
4533 | 13 | return l2e_to_l1e(*pl2e) + l1_table_offset(v); |
4534 | 13 | } |
4535 | | |
4536 | | /* Convert to from superpage-mapping flags for map_pages_to_xen(). */ |
4537 | 1.85k | #define l1f_to_lNf(f) (((f) & _PAGE_PRESENT) ? ((f) | _PAGE_PSE) : (f)) |
4538 | 4 | #define lNf_to_l1f(f) (((f) & _PAGE_PRESENT) ? ((f) & ~_PAGE_PSE) : (f)) |
4539 | | |
4540 | | /* |
4541 | | * map_pages_to_xen() can be called with interrupts disabled during |
4542 | | * early bootstrap. In this case it is safe to use flush_area_local() |
4543 | | * and avoid locking because only the local CPU is online. |
4544 | | */ |
4545 | 57 | #define flush_area(v,f) (!local_irq_is_enabled() ? \ |
4546 | 40 | flush_area_local((const void *)v, f) : \ |
4547 | 17 | flush_area_all((const void *)v, f)) |
4548 | | |
4549 | | int map_pages_to_xen( |
4550 | | unsigned long virt, |
4551 | | unsigned long mfn, |
4552 | | unsigned long nr_mfns, |
4553 | | unsigned int flags) |
4554 | 231 | { |
4555 | 231 | bool locking = system_state > SYS_STATE_boot; |
4556 | 231 | l2_pgentry_t *pl2e, ol2e; |
4557 | 231 | l1_pgentry_t *pl1e, ol1e; |
4558 | 231 | unsigned int i; |
4559 | 231 | |
4560 | 41 | #define flush_flags(oldf) do { \ |
4561 | 2 | unsigned int o_ = (oldf); \ |
4562 | 41 | if ( (o_) & _PAGE_GLOBAL ) \ |
4563 | 41 | flush_flags |= FLUSH_TLB_GLOBAL; \ |
4564 | 41 | if ( (flags & _PAGE_PRESENT) && \ |
4565 | 28 | (((o_) ^ flags) & PAGE_CACHE_ATTRS) ) \ |
4566 | 0 | { \ |
4567 | 0 | flush_flags |= FLUSH_CACHE; \ |
4568 | 0 | if ( virt >= DIRECTMAP_VIRT_START && \ |
4569 | 0 | virt < HYPERVISOR_VIRT_END ) \ |
4570 | 0 | flush_flags |= FLUSH_VA_VALID; \ |
4571 | 0 | } \ |
4572 | 41 | } while (0) |
4573 | 231 | |
4574 | 4.83k | while ( nr_mfns != 0 ) |
4575 | 4.60k | { |
4576 | 4.60k | l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt); |
4577 | 4.60k | |
4578 | 4.60k | if ( !pl3e ) |
4579 | 0 | return -ENOMEM; |
4580 | 4.60k | ol3e = *pl3e; |
4581 | 4.60k | |
4582 | 4.60k | if ( cpu_has_page1gb && |
4583 | 4.60k | !(((virt >> PAGE_SHIFT) | mfn) & |
4584 | 4.60k | ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)) && |
4585 | 16 | nr_mfns >= (1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) && |
4586 | 14 | !(flags & (_PAGE_PAT | MAP_SMALL_PAGES)) ) |
4587 | 14 | { |
4588 | 14 | /* 1GB-page mapping. */ |
4589 | 14 | l3e_write_atomic(pl3e, l3e_from_pfn(mfn, l1f_to_lNf(flags))); |
4590 | 14 | |
4591 | 14 | if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) ) |
4592 | 1 | { |
4593 | 1 | unsigned int flush_flags = |
4594 | 1 | FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); |
4595 | 1 | |
4596 | 1 | if ( l3e_get_flags(ol3e) & _PAGE_PSE ) |
4597 | 0 | { |
4598 | 0 | flush_flags(lNf_to_l1f(l3e_get_flags(ol3e))); |
4599 | 0 | flush_area(virt, flush_flags); |
4600 | 0 | } |
4601 | 1 | else |
4602 | 1 | { |
4603 | 1 | pl2e = l3e_to_l2e(ol3e); |
4604 | 513 | for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) |
4605 | 512 | { |
4606 | 512 | ol2e = pl2e[i]; |
4607 | 512 | if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) ) |
4608 | 512 | continue; |
4609 | 0 | if ( l2e_get_flags(ol2e) & _PAGE_PSE ) |
4610 | 0 | flush_flags(lNf_to_l1f(l2e_get_flags(ol2e))); |
4611 | 0 | else |
4612 | 0 | { |
4613 | 0 | unsigned int j; |
4614 | 0 |
|
4615 | 0 | pl1e = l2e_to_l1e(ol2e); |
4616 | 0 | for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ ) |
4617 | 0 | flush_flags(l1e_get_flags(pl1e[j])); |
4618 | 0 | } |
4619 | 0 | } |
4620 | 1 | flush_area(virt, flush_flags); |
4621 | 513 | for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) |
4622 | 512 | { |
4623 | 512 | ol2e = pl2e[i]; |
4624 | 512 | if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) && |
4625 | 0 | !(l2e_get_flags(ol2e) & _PAGE_PSE) ) |
4626 | 0 | free_xen_pagetable(l2e_to_l1e(ol2e)); |
4627 | 512 | } |
4628 | 1 | free_xen_pagetable(pl2e); |
4629 | 1 | } |
4630 | 1 | } |
4631 | 14 | |
4632 | 14 | virt += 1UL << L3_PAGETABLE_SHIFT; |
4633 | 14 | mfn += 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); |
4634 | 14 | nr_mfns -= 1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); |
4635 | 14 | continue; |
4636 | 14 | } |
4637 | 4.60k | |
4638 | 4.59k | if ( (l3e_get_flags(ol3e) & _PAGE_PRESENT) && |
4639 | 4.58k | (l3e_get_flags(ol3e) & _PAGE_PSE) ) |
4640 | 0 | { |
4641 | 0 | unsigned int flush_flags = |
4642 | 0 | FLUSH_TLB | FLUSH_ORDER(2 * PAGETABLE_ORDER); |
4643 | 0 |
|
4644 | 0 | /* Skip this PTE if there is no change. */ |
4645 | 0 | if ( ((l3e_get_pfn(ol3e) & ~(L2_PAGETABLE_ENTRIES * |
4646 | 0 | L1_PAGETABLE_ENTRIES - 1)) + |
4647 | 0 | (l2_table_offset(virt) << PAGETABLE_ORDER) + |
4648 | 0 | l1_table_offset(virt) == mfn) && |
4649 | 0 | ((lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) & |
4650 | 0 | ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0 ) |
4651 | 0 | { |
4652 | 0 | /* We can skip to end of L3 superpage if we got a match. */ |
4653 | 0 | i = (1u << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - |
4654 | 0 | (mfn & ((1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)); |
4655 | 0 | if ( i > nr_mfns ) |
4656 | 0 | i = nr_mfns; |
4657 | 0 | virt += i << PAGE_SHIFT; |
4658 | 0 | mfn += i; |
4659 | 0 | nr_mfns -= i; |
4660 | 0 | continue; |
4661 | 0 | } |
4662 | 0 |
|
4663 | 0 | pl2e = alloc_xen_pagetable(); |
4664 | 0 | if ( pl2e == NULL ) |
4665 | 0 | return -ENOMEM; |
4666 | 0 |
|
4667 | 0 | for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) |
4668 | 0 | l2e_write(pl2e + i, |
4669 | 0 | l2e_from_pfn(l3e_get_pfn(ol3e) + |
4670 | 0 | (i << PAGETABLE_ORDER), |
4671 | 0 | l3e_get_flags(ol3e))); |
4672 | 0 |
|
4673 | 0 | if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) |
4674 | 0 | flush_flags |= FLUSH_TLB_GLOBAL; |
4675 | 0 |
|
4676 | 0 | if ( locking ) |
4677 | 0 | spin_lock(&map_pgdir_lock); |
4678 | 0 | if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) && |
4679 | 0 | (l3e_get_flags(*pl3e) & _PAGE_PSE) ) |
4680 | 0 | { |
4681 | 0 | l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), |
4682 | 0 | __PAGE_HYPERVISOR)); |
4683 | 0 | pl2e = NULL; |
4684 | 0 | } |
4685 | 0 | if ( locking ) |
4686 | 0 | spin_unlock(&map_pgdir_lock); |
4687 | 0 | flush_area(virt, flush_flags); |
4688 | 0 | if ( pl2e ) |
4689 | 0 | free_xen_pagetable(pl2e); |
4690 | 0 | } |
4691 | 4.59k | |
4692 | 4.59k | pl2e = virt_to_xen_l2e(virt); |
4693 | 4.59k | if ( !pl2e ) |
4694 | 0 | return -ENOMEM; |
4695 | 4.59k | |
4696 | 4.59k | if ( ((((virt >> PAGE_SHIFT) | mfn) & |
4697 | 4.59k | ((1u << PAGETABLE_ORDER) - 1)) == 0) && |
4698 | 1.26k | (nr_mfns >= (1u << PAGETABLE_ORDER)) && |
4699 | 1.25k | !(flags & (_PAGE_PAT|MAP_SMALL_PAGES)) ) |
4700 | 1.25k | { |
4701 | 1.25k | /* Super-page mapping. */ |
4702 | 1.25k | ol2e = *pl2e; |
4703 | 1.25k | l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_lNf(flags))); |
4704 | 1.25k | |
4705 | 1.25k | if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) |
4706 | 2 | { |
4707 | 2 | unsigned int flush_flags = |
4708 | 2 | FLUSH_TLB | FLUSH_ORDER(PAGETABLE_ORDER); |
4709 | 2 | |
4710 | 2 | if ( l2e_get_flags(ol2e) & _PAGE_PSE ) |
4711 | 2 | { |
4712 | 2 | flush_flags(lNf_to_l1f(l2e_get_flags(ol2e))); |
4713 | 2 | flush_area(virt, flush_flags); |
4714 | 2 | } |
4715 | 2 | else |
4716 | 0 | { |
4717 | 0 | pl1e = l2e_to_l1e(ol2e); |
4718 | 0 | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
4719 | 0 | flush_flags(l1e_get_flags(pl1e[i])); |
4720 | 0 | flush_area(virt, flush_flags); |
4721 | 0 | free_xen_pagetable(pl1e); |
4722 | 0 | } |
4723 | 2 | } |
4724 | 1.25k | |
4725 | 1.25k | virt += 1UL << L2_PAGETABLE_SHIFT; |
4726 | 1.25k | mfn += 1UL << PAGETABLE_ORDER; |
4727 | 1.25k | nr_mfns -= 1UL << PAGETABLE_ORDER; |
4728 | 1.25k | } |
4729 | 4.59k | else |
4730 | 3.34k | { |
4731 | 3.34k | /* Normal page mapping. */ |
4732 | 3.34k | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) |
4733 | 13 | { |
4734 | 13 | pl1e = virt_to_xen_l1e(virt); |
4735 | 13 | if ( pl1e == NULL ) |
4736 | 0 | return -ENOMEM; |
4737 | 13 | } |
4738 | 3.32k | else if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) |
4739 | 4 | { |
4740 | 4 | unsigned int flush_flags = |
4741 | 4 | FLUSH_TLB | FLUSH_ORDER(PAGETABLE_ORDER); |
4742 | 4 | |
4743 | 4 | /* Skip this PTE if there is no change. */ |
4744 | 4 | if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) + |
4745 | 4 | l1_table_offset(virt)) == mfn) && |
4746 | 4 | (((lNf_to_l1f(l2e_get_flags(*pl2e)) ^ flags) & |
4747 | 4 | ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) ) |
4748 | 1 | { |
4749 | 1 | /* We can skip to end of L2 superpage if we got a match. */ |
4750 | 1 | i = (1u << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - |
4751 | 1 | (mfn & ((1u << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)); |
4752 | 1 | if ( i > nr_mfns ) |
4753 | 0 | i = nr_mfns; |
4754 | 1 | virt += i << L1_PAGETABLE_SHIFT; |
4755 | 1 | mfn += i; |
4756 | 1 | nr_mfns -= i; |
4757 | 1 | goto check_l3; |
4758 | 1 | } |
4759 | 4 | |
4760 | 3 | pl1e = alloc_xen_pagetable(); |
4761 | 3 | if ( pl1e == NULL ) |
4762 | 0 | return -ENOMEM; |
4763 | 3 | |
4764 | 1.53k | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
4765 | 3 | l1e_write(&pl1e[i], |
4766 | 3 | l1e_from_pfn(l2e_get_pfn(*pl2e) + i, |
4767 | 3 | lNf_to_l1f(l2e_get_flags(*pl2e)))); |
4768 | 3 | |
4769 | 3 | if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL ) |
4770 | 3 | flush_flags |= FLUSH_TLB_GLOBAL; |
4771 | 3 | |
4772 | 3 | if ( locking ) |
4773 | 3 | spin_lock(&map_pgdir_lock); |
4774 | 3 | if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) && |
4775 | 3 | (l2e_get_flags(*pl2e) & _PAGE_PSE) ) |
4776 | 3 | { |
4777 | 3 | l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e), |
4778 | 3 | __PAGE_HYPERVISOR)); |
4779 | 3 | pl1e = NULL; |
4780 | 3 | } |
4781 | 3 | if ( locking ) |
4782 | 3 | spin_unlock(&map_pgdir_lock); |
4783 | 3 | flush_area(virt, flush_flags); |
4784 | 3 | if ( pl1e ) |
4785 | 0 | free_xen_pagetable(pl1e); |
4786 | 3 | } |
4787 | 3.34k | |
4788 | 3.33k | pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt); |
4789 | 3.33k | ol1e = *pl1e; |
4790 | 3.33k | l1e_write_atomic(pl1e, l1e_from_pfn(mfn, flags)); |
4791 | 3.33k | if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) ) |
4792 | 39 | { |
4793 | 39 | unsigned int flush_flags = FLUSH_TLB | FLUSH_ORDER(0); |
4794 | 39 | |
4795 | 39 | flush_flags(l1e_get_flags(ol1e)); |
4796 | 39 | flush_area(virt, flush_flags); |
4797 | 39 | } |
4798 | 3.33k | |
4799 | 3.33k | virt += 1UL << L1_PAGETABLE_SHIFT; |
4800 | 3.33k | mfn += 1UL; |
4801 | 3.33k | nr_mfns -= 1UL; |
4802 | 3.33k | |
4803 | 3.33k | if ( (flags == PAGE_HYPERVISOR) && |
4804 | 2.76k | ((nr_mfns == 0) || |
4805 | 2.70k | ((((virt >> PAGE_SHIFT) | mfn) & |
4806 | 2.70k | ((1u << PAGETABLE_ORDER) - 1)) == 0)) ) |
4807 | 62 | { |
4808 | 62 | unsigned long base_mfn; |
4809 | 62 | |
4810 | 62 | pl1e = l2e_to_l1e(*pl2e); |
4811 | 62 | if ( locking ) |
4812 | 26 | spin_lock(&map_pgdir_lock); |
4813 | 62 | base_mfn = l1e_get_pfn(*pl1e) & ~(L1_PAGETABLE_ENTRIES - 1); |
4814 | 3.52k | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++, pl1e++ ) |
4815 | 3.51k | if ( (l1e_get_pfn(*pl1e) != (base_mfn + i)) || |
4816 | 3.50k | (l1e_get_flags(*pl1e) != flags) ) |
4817 | 60 | break; |
4818 | 62 | if ( i == L1_PAGETABLE_ENTRIES ) |
4819 | 2 | { |
4820 | 2 | ol2e = *pl2e; |
4821 | 2 | l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn, |
4822 | 2 | l1f_to_lNf(flags))); |
4823 | 2 | if ( locking ) |
4824 | 0 | spin_unlock(&map_pgdir_lock); |
4825 | 2 | flush_area(virt - PAGE_SIZE, |
4826 | 2 | FLUSH_TLB_GLOBAL | |
4827 | 2 | FLUSH_ORDER(PAGETABLE_ORDER)); |
4828 | 2 | free_xen_pagetable(l2e_to_l1e(ol2e)); |
4829 | 2 | } |
4830 | 60 | else if ( locking ) |
4831 | 26 | spin_unlock(&map_pgdir_lock); |
4832 | 62 | } |
4833 | 3.33k | } |
4834 | 4.59k | |
4835 | 4.59k | check_l3: |
4836 | 4.59k | if ( cpu_has_page1gb && |
4837 | 4.59k | (flags == PAGE_HYPERVISOR) && |
4838 | 3.89k | ((nr_mfns == 0) || |
4839 | 3.72k | !(((virt >> PAGE_SHIFT) | mfn) & |
4840 | 3.72k | ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))) ) |
4841 | 166 | { |
4842 | 166 | unsigned long base_mfn; |
4843 | 166 | |
4844 | 166 | if ( locking ) |
4845 | 26 | spin_lock(&map_pgdir_lock); |
4846 | 166 | ol3e = *pl3e; |
4847 | 166 | pl2e = l3e_to_l2e(ol3e); |
4848 | 166 | base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES * |
4849 | 166 | L1_PAGETABLE_ENTRIES - 1); |
4850 | 1.99k | for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ ) |
4851 | 1.99k | if ( (l2e_get_pfn(*pl2e) != |
4852 | 1.99k | (base_mfn + (i << PAGETABLE_ORDER))) || |
4853 | 1.85k | (l2e_get_flags(*pl2e) != l1f_to_lNf(flags)) ) |
4854 | 166 | break; |
4855 | 166 | if ( i == L2_PAGETABLE_ENTRIES ) |
4856 | 0 | { |
4857 | 0 | l3e_write_atomic(pl3e, l3e_from_pfn(base_mfn, |
4858 | 0 | l1f_to_lNf(flags))); |
4859 | 0 | if ( locking ) |
4860 | 0 | spin_unlock(&map_pgdir_lock); |
4861 | 0 | flush_area(virt - PAGE_SIZE, |
4862 | 0 | FLUSH_TLB_GLOBAL | |
4863 | 0 | FLUSH_ORDER(2*PAGETABLE_ORDER)); |
4864 | 0 | free_xen_pagetable(l3e_to_l2e(ol3e)); |
4865 | 0 | } |
4866 | 166 | else if ( locking ) |
4867 | 26 | spin_unlock(&map_pgdir_lock); |
4868 | 166 | } |
4869 | 4.59k | } |
4870 | 231 | |
4871 | 231 | #undef flush_flags |
4872 | 231 | |
4873 | 231 | return 0; |
4874 | 231 | } |
4875 | | |
4876 | | int populate_pt_range(unsigned long virt, unsigned long mfn, |
4877 | | unsigned long nr_mfns) |
4878 | 1 | { |
4879 | 1 | return map_pages_to_xen(virt, mfn, nr_mfns, MAP_SMALL_PAGES); |
4880 | 1 | } |
4881 | | |
4882 | | /* |
4883 | | * Alter the permissions of a range of Xen virtual address space. |
4884 | | * |
4885 | | * Does not create new mappings, and does not modify the mfn in existing |
4886 | | * mappings, but will shatter superpages if necessary, and will destroy |
4887 | | * mappings if not passed _PAGE_PRESENT. |
4888 | | * |
4889 | | * The only flags considered are NX, RW and PRESENT. All other input flags |
4890 | | * are ignored. |
4891 | | * |
4892 | | * It is an error to call with present flags over an unpopulated range. |
4893 | | */ |
4894 | | int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) |
4895 | 10 | { |
4896 | 10 | bool locking = system_state > SYS_STATE_boot; |
4897 | 10 | l2_pgentry_t *pl2e; |
4898 | 10 | l1_pgentry_t *pl1e; |
4899 | 10 | unsigned int i; |
4900 | 10 | unsigned long v = s; |
4901 | 10 | |
4902 | 10 | /* Set of valid PTE bits which may be altered. */ |
4903 | 10 | #define FLAGS_MASK (_PAGE_NX|_PAGE_RW|_PAGE_PRESENT) |
4904 | 10 | nf &= FLAGS_MASK; |
4905 | 10 | |
4906 | 10 | ASSERT(IS_ALIGNED(s, PAGE_SIZE)); |
4907 | 10 | ASSERT(IS_ALIGNED(e, PAGE_SIZE)); |
4908 | 10 | |
4909 | 3.56k | while ( v < e ) |
4910 | 3.55k | { |
4911 | 3.55k | l3_pgentry_t *pl3e = virt_to_xen_l3e(v); |
4912 | 3.55k | |
4913 | 3.55k | if ( !pl3e || !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) |
4914 | 0 | { |
4915 | 0 | /* Confirm the caller isn't trying to create new mappings. */ |
4916 | 0 | ASSERT(!(nf & _PAGE_PRESENT)); |
4917 | 0 |
|
4918 | 0 | v += 1UL << L3_PAGETABLE_SHIFT; |
4919 | 0 | v &= ~((1UL << L3_PAGETABLE_SHIFT) - 1); |
4920 | 0 | continue; |
4921 | 0 | } |
4922 | 3.55k | |
4923 | 3.55k | if ( l3e_get_flags(*pl3e) & _PAGE_PSE ) |
4924 | 0 | { |
4925 | 0 | if ( l2_table_offset(v) == 0 && |
4926 | 0 | l1_table_offset(v) == 0 && |
4927 | 0 | ((e - v) >= (1UL << L3_PAGETABLE_SHIFT)) ) |
4928 | 0 | { |
4929 | 0 | /* PAGE1GB: whole superpage is modified. */ |
4930 | 0 | l3_pgentry_t nl3e = !(nf & _PAGE_PRESENT) ? l3e_empty() |
4931 | 0 | : l3e_from_pfn(l3e_get_pfn(*pl3e), |
4932 | 0 | (l3e_get_flags(*pl3e) & ~FLAGS_MASK) | nf); |
4933 | 0 |
|
4934 | 0 | l3e_write_atomic(pl3e, nl3e); |
4935 | 0 | v += 1UL << L3_PAGETABLE_SHIFT; |
4936 | 0 | continue; |
4937 | 0 | } |
4938 | 0 |
|
4939 | 0 | /* PAGE1GB: shatter the superpage and fall through. */ |
4940 | 0 | pl2e = alloc_xen_pagetable(); |
4941 | 0 | if ( !pl2e ) |
4942 | 0 | return -ENOMEM; |
4943 | 0 | for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) |
4944 | 0 | l2e_write(pl2e + i, |
4945 | 0 | l2e_from_pfn(l3e_get_pfn(*pl3e) + |
4946 | 0 | (i << PAGETABLE_ORDER), |
4947 | 0 | l3e_get_flags(*pl3e))); |
4948 | 0 | if ( locking ) |
4949 | 0 | spin_lock(&map_pgdir_lock); |
4950 | 0 | if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) && |
4951 | 0 | (l3e_get_flags(*pl3e) & _PAGE_PSE) ) |
4952 | 0 | { |
4953 | 0 | l3e_write_atomic(pl3e, l3e_from_pfn(virt_to_mfn(pl2e), |
4954 | 0 | __PAGE_HYPERVISOR)); |
4955 | 0 | pl2e = NULL; |
4956 | 0 | } |
4957 | 0 | if ( locking ) |
4958 | 0 | spin_unlock(&map_pgdir_lock); |
4959 | 0 | if ( pl2e ) |
4960 | 0 | free_xen_pagetable(pl2e); |
4961 | 0 | } |
4962 | 3.55k | |
4963 | 3.55k | /* |
4964 | 3.55k | * The L3 entry has been verified to be present, and we've dealt with |
4965 | 3.55k | * 1G pages as well, so the L2 table cannot require allocation. |
4966 | 3.55k | */ |
4967 | 3.55k | pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); |
4968 | 3.55k | |
4969 | 3.55k | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) |
4970 | 1.96k | { |
4971 | 1.96k | /* Confirm the caller isn't trying to create new mappings. */ |
4972 | 1.96k | ASSERT(!(nf & _PAGE_PRESENT)); |
4973 | 1.96k | |
4974 | 1.96k | v += 1UL << L2_PAGETABLE_SHIFT; |
4975 | 1.96k | v &= ~((1UL << L2_PAGETABLE_SHIFT) - 1); |
4976 | 1.96k | continue; |
4977 | 1.96k | } |
4978 | 3.55k | |
4979 | 1.59k | if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) |
4980 | 60 | { |
4981 | 60 | if ( (l1_table_offset(v) == 0) && |
4982 | 60 | ((e-v) >= (1UL << L2_PAGETABLE_SHIFT)) ) |
4983 | 57 | { |
4984 | 57 | /* PSE: whole superpage is modified. */ |
4985 | 57 | l2_pgentry_t nl2e = !(nf & _PAGE_PRESENT) ? l2e_empty() |
4986 | 0 | : l2e_from_pfn(l2e_get_pfn(*pl2e), |
4987 | 57 | (l2e_get_flags(*pl2e) & ~FLAGS_MASK) | nf); |
4988 | 57 | |
4989 | 57 | l2e_write_atomic(pl2e, nl2e); |
4990 | 57 | v += 1UL << L2_PAGETABLE_SHIFT; |
4991 | 57 | } |
4992 | 60 | else |
4993 | 3 | { |
4994 | 3 | /* PSE: shatter the superpage and try again. */ |
4995 | 3 | pl1e = alloc_xen_pagetable(); |
4996 | 3 | if ( !pl1e ) |
4997 | 0 | return -ENOMEM; |
4998 | 1.53k | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
4999 | 3 | l1e_write(&pl1e[i], |
5000 | 3 | l1e_from_pfn(l2e_get_pfn(*pl2e) + i, |
5001 | 3 | l2e_get_flags(*pl2e) & ~_PAGE_PSE)); |
5002 | 3 | if ( locking ) |
5003 | 0 | spin_lock(&map_pgdir_lock); |
5004 | 3 | if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) && |
5005 | 3 | (l2e_get_flags(*pl2e) & _PAGE_PSE) ) |
5006 | 3 | { |
5007 | 3 | l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e), |
5008 | 3 | __PAGE_HYPERVISOR)); |
5009 | 3 | pl1e = NULL; |
5010 | 3 | } |
5011 | 3 | if ( locking ) |
5012 | 0 | spin_unlock(&map_pgdir_lock); |
5013 | 3 | if ( pl1e ) |
5014 | 0 | free_xen_pagetable(pl1e); |
5015 | 3 | } |
5016 | 60 | } |
5017 | 1.59k | else |
5018 | 1.53k | { |
5019 | 1.53k | l1_pgentry_t nl1e; |
5020 | 1.53k | |
5021 | 1.53k | /* |
5022 | 1.53k | * Ordinary 4kB mapping: The L2 entry has been verified to be |
5023 | 1.53k | * present, and we've dealt with 2M pages as well, so the L1 table |
5024 | 1.53k | * cannot require allocation. |
5025 | 1.53k | */ |
5026 | 1.53k | pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v); |
5027 | 1.53k | |
5028 | 1.53k | /* Confirm the caller isn't trying to create new mappings. */ |
5029 | 1.53k | if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) ) |
5030 | 0 | ASSERT(!(nf & _PAGE_PRESENT)); |
5031 | 1.53k | |
5032 | 1.53k | nl1e = !(nf & _PAGE_PRESENT) ? l1e_empty() |
5033 | 1.15k | : l1e_from_pfn(l1e_get_pfn(*pl1e), |
5034 | 1.53k | (l1e_get_flags(*pl1e) & ~FLAGS_MASK) | nf); |
5035 | 1.53k | |
5036 | 1.53k | l1e_write_atomic(pl1e, nl1e); |
5037 | 1.53k | v += PAGE_SIZE; |
5038 | 1.53k | |
5039 | 1.53k | /* |
5040 | 1.53k | * If we are not destroying mappings, or not done with the L2E, |
5041 | 1.53k | * skip the empty&free check. |
5042 | 1.53k | */ |
5043 | 1.53k | if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) ) |
5044 | 1.53k | continue; |
5045 | 2 | pl1e = l2e_to_l1e(*pl2e); |
5046 | 2 | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
5047 | 2 | if ( l1e_get_intpte(pl1e[i]) != 0 ) |
5048 | 2 | break; |
5049 | 2 | if ( i == L1_PAGETABLE_ENTRIES ) |
5050 | 0 | { |
5051 | 0 | /* Empty: zap the L2E and free the L1 page. */ |
5052 | 0 | l2e_write_atomic(pl2e, l2e_empty()); |
5053 | 0 | flush_area(NULL, FLUSH_TLB_GLOBAL); /* flush before free */ |
5054 | 0 | free_xen_pagetable(pl1e); |
5055 | 0 | } |
5056 | 2 | } |
5057 | 1.59k | |
5058 | 1.59k | /* |
5059 | 1.59k | * If we are not destroying mappings, or not done with the L3E, |
5060 | 1.59k | * skip the empty&free check. |
5061 | 1.59k | */ |
5062 | 62 | if ( (nf & _PAGE_PRESENT) || |
5063 | 59 | ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) ) |
5064 | 59 | continue; |
5065 | 3 | pl2e = l3e_to_l2e(*pl3e); |
5066 | 3 | for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) |
5067 | 3 | if ( l2e_get_intpte(pl2e[i]) != 0 ) |
5068 | 3 | break; |
5069 | 3 | if ( i == L2_PAGETABLE_ENTRIES ) |
5070 | 0 | { |
5071 | 0 | /* Empty: zap the L3E and free the L2 page. */ |
5072 | 0 | l3e_write_atomic(pl3e, l3e_empty()); |
5073 | 0 | flush_area(NULL, FLUSH_TLB_GLOBAL); /* flush before free */ |
5074 | 0 | free_xen_pagetable(pl2e); |
5075 | 0 | } |
5076 | 3 | } |
5077 | 10 | |
5078 | 10 | flush_area(NULL, FLUSH_TLB_GLOBAL); |
5079 | 10 | |
5080 | 10 | #undef FLAGS_MASK |
5081 | 10 | return 0; |
5082 | 10 | } |
5083 | | |
5084 | | #undef flush_area |
5085 | | |
5086 | | int destroy_xen_mappings(unsigned long s, unsigned long e) |
5087 | 7 | { |
5088 | 7 | return modify_xen_mappings(s, e, _PAGE_NONE); |
5089 | 7 | } |
5090 | | |
5091 | | void __set_fixmap( |
5092 | | enum fixed_addresses idx, unsigned long mfn, unsigned long flags) |
5093 | 37 | { |
5094 | 37 | BUG_ON(idx >= __end_of_fixed_addresses); |
5095 | 37 | map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags); |
5096 | 37 | } |
5097 | | |
5098 | | void *__init arch_vmap_virt_end(void) |
5099 | 1 | { |
5100 | 1 | return (void *)fix_to_virt(__end_of_fixed_addresses); |
5101 | 1 | } |
5102 | | |
5103 | | void __iomem *ioremap(paddr_t pa, size_t len) |
5104 | 2 | { |
5105 | 2 | mfn_t mfn = _mfn(PFN_DOWN(pa)); |
5106 | 2 | void *va; |
5107 | 2 | |
5108 | 2 | WARN_ON(page_is_ram_type(mfn_x(mfn), RAM_TYPE_CONVENTIONAL)); |
5109 | 2 | |
5110 | 2 | /* The low first Mb is always mapped. */ |
5111 | 2 | if ( !((pa + len - 1) >> 20) ) |
5112 | 1 | va = __va(pa); |
5113 | 2 | else |
5114 | 1 | { |
5115 | 1 | unsigned int offs = pa & (PAGE_SIZE - 1); |
5116 | 1 | unsigned int nr = PFN_UP(offs + len); |
5117 | 1 | |
5118 | 1 | va = __vmap(&mfn, nr, 1, 1, PAGE_HYPERVISOR_UCMINUS, VMAP_DEFAULT) + offs; |
5119 | 1 | } |
5120 | 2 | |
5121 | 2 | return (void __force __iomem *)va; |
5122 | 2 | } |
5123 | | |
5124 | | int create_perdomain_mapping(struct domain *d, unsigned long va, |
5125 | | unsigned int nr, l1_pgentry_t **pl1tab, |
5126 | | struct page_info **ppg) |
5127 | 13 | { |
5128 | 13 | struct page_info *pg; |
5129 | 13 | l3_pgentry_t *l3tab; |
5130 | 13 | l2_pgentry_t *l2tab; |
5131 | 13 | l1_pgentry_t *l1tab; |
5132 | 13 | int rc = 0; |
5133 | 13 | |
5134 | 13 | ASSERT(va >= PERDOMAIN_VIRT_START && |
5135 | 13 | va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS)); |
5136 | 13 | |
5137 | 13 | if ( !d->arch.perdomain_l3_pg ) |
5138 | 1 | { |
5139 | 1 | pg = alloc_domheap_page(d, MEMF_no_owner); |
5140 | 1 | if ( !pg ) |
5141 | 0 | return -ENOMEM; |
5142 | 1 | l3tab = __map_domain_page(pg); |
5143 | 1 | clear_page(l3tab); |
5144 | 1 | d->arch.perdomain_l3_pg = pg; |
5145 | 1 | if ( !nr ) |
5146 | 1 | { |
5147 | 1 | unmap_domain_page(l3tab); |
5148 | 1 | return 0; |
5149 | 1 | } |
5150 | 1 | } |
5151 | 12 | else if ( !nr ) |
5152 | 0 | return 0; |
5153 | 12 | else |
5154 | 12 | l3tab = __map_domain_page(d->arch.perdomain_l3_pg); |
5155 | 13 | |
5156 | 12 | ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1))); |
5157 | 12 | |
5158 | 12 | if ( !(l3e_get_flags(l3tab[l3_table_offset(va)]) & _PAGE_PRESENT) ) |
5159 | 1 | { |
5160 | 1 | pg = alloc_domheap_page(d, MEMF_no_owner); |
5161 | 1 | if ( !pg ) |
5162 | 0 | { |
5163 | 0 | unmap_domain_page(l3tab); |
5164 | 0 | return -ENOMEM; |
5165 | 0 | } |
5166 | 1 | l2tab = __map_domain_page(pg); |
5167 | 1 | clear_page(l2tab); |
5168 | 1 | l3tab[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR_RW); |
5169 | 1 | } |
5170 | 12 | else |
5171 | 11 | l2tab = map_l2t_from_l3e(l3tab[l3_table_offset(va)]); |
5172 | 12 | |
5173 | 12 | unmap_domain_page(l3tab); |
5174 | 12 | |
5175 | 12 | if ( !pl1tab && !ppg ) |
5176 | 0 | { |
5177 | 0 | unmap_domain_page(l2tab); |
5178 | 0 | return 0; |
5179 | 0 | } |
5180 | 12 | |
5181 | 36 | for ( l1tab = NULL; !rc && nr--; ) |
5182 | 24 | { |
5183 | 24 | l2_pgentry_t *pl2e = l2tab + l2_table_offset(va); |
5184 | 24 | |
5185 | 24 | if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) |
5186 | 1 | { |
5187 | 1 | if ( pl1tab && !IS_NIL(pl1tab) ) |
5188 | 0 | { |
5189 | 0 | l1tab = alloc_xenheap_pages(0, MEMF_node(domain_to_node(d))); |
5190 | 0 | if ( !l1tab ) |
5191 | 0 | { |
5192 | 0 | rc = -ENOMEM; |
5193 | 0 | break; |
5194 | 0 | } |
5195 | 0 | ASSERT(!pl1tab[l2_table_offset(va)]); |
5196 | 0 | pl1tab[l2_table_offset(va)] = l1tab; |
5197 | 0 | pg = virt_to_page(l1tab); |
5198 | 0 | } |
5199 | 1 | else |
5200 | 1 | { |
5201 | 1 | pg = alloc_domheap_page(d, MEMF_no_owner); |
5202 | 1 | if ( !pg ) |
5203 | 0 | { |
5204 | 0 | rc = -ENOMEM; |
5205 | 0 | break; |
5206 | 0 | } |
5207 | 1 | l1tab = __map_domain_page(pg); |
5208 | 1 | } |
5209 | 1 | clear_page(l1tab); |
5210 | 1 | *pl2e = l2e_from_page(pg, __PAGE_HYPERVISOR_RW); |
5211 | 1 | } |
5212 | 23 | else if ( !l1tab ) |
5213 | 11 | l1tab = map_l1t_from_l2e(*pl2e); |
5214 | 24 | |
5215 | 24 | if ( ppg && |
5216 | 24 | !(l1e_get_flags(l1tab[l1_table_offset(va)]) & _PAGE_PRESENT) ) |
5217 | 24 | { |
5218 | 24 | pg = alloc_domheap_page(d, MEMF_no_owner); |
5219 | 24 | if ( pg ) |
5220 | 24 | { |
5221 | 24 | clear_domain_page(page_to_mfn(pg)); |
5222 | 24 | if ( !IS_NIL(ppg) ) |
5223 | 0 | *ppg++ = pg; |
5224 | 24 | l1tab[l1_table_offset(va)] = |
5225 | 24 | l1e_from_page(pg, __PAGE_HYPERVISOR_RW | _PAGE_AVAIL0); |
5226 | 24 | l2e_add_flags(*pl2e, _PAGE_AVAIL0); |
5227 | 24 | } |
5228 | 24 | else |
5229 | 0 | rc = -ENOMEM; |
5230 | 24 | } |
5231 | 24 | |
5232 | 24 | va += PAGE_SIZE; |
5233 | 24 | if ( rc || !nr || !l1_table_offset(va) ) |
5234 | 12 | { |
5235 | 12 | /* Note that this is a no-op for the alloc_xenheap_page() case. */ |
5236 | 12 | unmap_domain_page(l1tab); |
5237 | 12 | l1tab = NULL; |
5238 | 12 | } |
5239 | 24 | } |
5240 | 12 | |
5241 | 12 | ASSERT(!l1tab); |
5242 | 12 | unmap_domain_page(l2tab); |
5243 | 12 | |
5244 | 12 | return rc; |
5245 | 12 | } |
5246 | | |
5247 | | void destroy_perdomain_mapping(struct domain *d, unsigned long va, |
5248 | | unsigned int nr) |
5249 | 0 | { |
5250 | 0 | const l3_pgentry_t *l3tab, *pl3e; |
5251 | 0 |
|
5252 | 0 | ASSERT(va >= PERDOMAIN_VIRT_START && |
5253 | 0 | va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS)); |
5254 | 0 | ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1))); |
5255 | 0 |
|
5256 | 0 | if ( !d->arch.perdomain_l3_pg ) |
5257 | 0 | return; |
5258 | 0 |
|
5259 | 0 | l3tab = __map_domain_page(d->arch.perdomain_l3_pg); |
5260 | 0 | pl3e = l3tab + l3_table_offset(va); |
5261 | 0 |
|
5262 | 0 | if ( l3e_get_flags(*pl3e) & _PAGE_PRESENT ) |
5263 | 0 | { |
5264 | 0 | const l2_pgentry_t *l2tab = map_l2t_from_l3e(*pl3e); |
5265 | 0 | const l2_pgentry_t *pl2e = l2tab + l2_table_offset(va); |
5266 | 0 | unsigned int i = l1_table_offset(va); |
5267 | 0 |
|
5268 | 0 | while ( nr ) |
5269 | 0 | { |
5270 | 0 | if ( l2e_get_flags(*pl2e) & _PAGE_PRESENT ) |
5271 | 0 | { |
5272 | 0 | l1_pgentry_t *l1tab = map_l1t_from_l2e(*pl2e); |
5273 | 0 |
|
5274 | 0 | for ( ; nr && i < L1_PAGETABLE_ENTRIES; --nr, ++i ) |
5275 | 0 | { |
5276 | 0 | if ( (l1e_get_flags(l1tab[i]) & |
5277 | 0 | (_PAGE_PRESENT | _PAGE_AVAIL0)) == |
5278 | 0 | (_PAGE_PRESENT | _PAGE_AVAIL0) ) |
5279 | 0 | free_domheap_page(l1e_get_page(l1tab[i])); |
5280 | 0 | l1tab[i] = l1e_empty(); |
5281 | 0 | } |
5282 | 0 |
|
5283 | 0 | unmap_domain_page(l1tab); |
5284 | 0 | } |
5285 | 0 | else if ( nr + i < L1_PAGETABLE_ENTRIES ) |
5286 | 0 | break; |
5287 | 0 | else |
5288 | 0 | nr -= L1_PAGETABLE_ENTRIES - i; |
5289 | 0 |
|
5290 | 0 | ++pl2e; |
5291 | 0 | i = 0; |
5292 | 0 | } |
5293 | 0 |
|
5294 | 0 | unmap_domain_page(l2tab); |
5295 | 0 | } |
5296 | 0 |
|
5297 | 0 | unmap_domain_page(l3tab); |
5298 | 0 | } |
5299 | | |
5300 | | void free_perdomain_mappings(struct domain *d) |
5301 | 0 | { |
5302 | 0 | l3_pgentry_t *l3tab; |
5303 | 0 | unsigned int i; |
5304 | 0 |
|
5305 | 0 | if ( !d->arch.perdomain_l3_pg ) |
5306 | 0 | return; |
5307 | 0 |
|
5308 | 0 | l3tab = __map_domain_page(d->arch.perdomain_l3_pg); |
5309 | 0 |
|
5310 | 0 | for ( i = 0; i < PERDOMAIN_SLOTS; ++i) |
5311 | 0 | if ( l3e_get_flags(l3tab[i]) & _PAGE_PRESENT ) |
5312 | 0 | { |
5313 | 0 | struct page_info *l2pg = l3e_get_page(l3tab[i]); |
5314 | 0 | l2_pgentry_t *l2tab = __map_domain_page(l2pg); |
5315 | 0 | unsigned int j; |
5316 | 0 |
|
5317 | 0 | for ( j = 0; j < L2_PAGETABLE_ENTRIES; ++j ) |
5318 | 0 | if ( l2e_get_flags(l2tab[j]) & _PAGE_PRESENT ) |
5319 | 0 | { |
5320 | 0 | struct page_info *l1pg = l2e_get_page(l2tab[j]); |
5321 | 0 |
|
5322 | 0 | if ( l2e_get_flags(l2tab[j]) & _PAGE_AVAIL0 ) |
5323 | 0 | { |
5324 | 0 | l1_pgentry_t *l1tab = __map_domain_page(l1pg); |
5325 | 0 | unsigned int k; |
5326 | 0 |
|
5327 | 0 | for ( k = 0; k < L1_PAGETABLE_ENTRIES; ++k ) |
5328 | 0 | if ( (l1e_get_flags(l1tab[k]) & |
5329 | 0 | (_PAGE_PRESENT | _PAGE_AVAIL0)) == |
5330 | 0 | (_PAGE_PRESENT | _PAGE_AVAIL0) ) |
5331 | 0 | free_domheap_page(l1e_get_page(l1tab[k])); |
5332 | 0 |
|
5333 | 0 | unmap_domain_page(l1tab); |
5334 | 0 | } |
5335 | 0 |
|
5336 | 0 | if ( is_xen_heap_page(l1pg) ) |
5337 | 0 | free_xenheap_page(page_to_virt(l1pg)); |
5338 | 0 | else |
5339 | 0 | free_domheap_page(l1pg); |
5340 | 0 | } |
5341 | 0 |
|
5342 | 0 | unmap_domain_page(l2tab); |
5343 | 0 | free_domheap_page(l2pg); |
5344 | 0 | } |
5345 | 0 |
|
5346 | 0 | unmap_domain_page(l3tab); |
5347 | 0 | free_domheap_page(d->arch.perdomain_l3_pg); |
5348 | 0 | d->arch.perdomain_l3_pg = NULL; |
5349 | 0 | } |
5350 | | |
5351 | | #ifdef MEMORY_GUARD |
5352 | | |
5353 | | static void __memguard_change_range(void *p, unsigned long l, int guard) |
5354 | 12 | { |
5355 | 12 | unsigned long _p = (unsigned long)p; |
5356 | 12 | unsigned long _l = (unsigned long)l; |
5357 | 12 | unsigned int flags = __PAGE_HYPERVISOR_RW | MAP_SMALL_PAGES; |
5358 | 12 | |
5359 | 12 | /* Ensure we are dealing with a page-aligned whole number of pages. */ |
5360 | 12 | ASSERT(IS_ALIGNED(_p, PAGE_SIZE)); |
5361 | 12 | ASSERT(IS_ALIGNED(_l, PAGE_SIZE)); |
5362 | 12 | |
5363 | 12 | if ( guard ) |
5364 | 12 | flags &= ~_PAGE_PRESENT; |
5365 | 12 | |
5366 | 12 | map_pages_to_xen( |
5367 | 12 | _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags); |
5368 | 12 | } |
5369 | | |
5370 | | void memguard_guard_range(void *p, unsigned long l) |
5371 | 12 | { |
5372 | 12 | __memguard_change_range(p, l, 1); |
5373 | 12 | } |
5374 | | |
5375 | | void memguard_unguard_range(void *p, unsigned long l) |
5376 | 0 | { |
5377 | 0 | __memguard_change_range(p, l, 0); |
5378 | 0 | } |
5379 | | |
5380 | | #endif |
5381 | | |
5382 | | void memguard_guard_stack(void *p) |
5383 | 12 | { |
5384 | 12 | BUILD_BUG_ON((PRIMARY_STACK_SIZE + PAGE_SIZE) > STACK_SIZE); |
5385 | 12 | p = (void *)((unsigned long)p + STACK_SIZE - |
5386 | 12 | PRIMARY_STACK_SIZE - PAGE_SIZE); |
5387 | 12 | memguard_guard_range(p, PAGE_SIZE); |
5388 | 12 | } |
5389 | | |
5390 | | void memguard_unguard_stack(void *p) |
5391 | 0 | { |
5392 | 0 | p = (void *)((unsigned long)p + STACK_SIZE - |
5393 | 0 | PRIMARY_STACK_SIZE - PAGE_SIZE); |
5394 | 0 | memguard_unguard_range(p, PAGE_SIZE); |
5395 | 0 | } |
5396 | | |
5397 | | void arch_dump_shared_mem_info(void) |
5398 | 0 | { |
5399 | 0 | printk("Shared frames %u -- Saved frames %u\n", |
5400 | 0 | mem_sharing_get_nr_shared_mfns(), |
5401 | 0 | mem_sharing_get_nr_saved_mfns()); |
5402 | 0 | } |
5403 | | |
5404 | | const unsigned long *__init get_platform_badpages(unsigned int *array_size) |
5405 | 15 | { |
5406 | 15 | u32 igd_id; |
5407 | 15 | static unsigned long __initdata bad_pages[] = { |
5408 | 15 | 0x20050000, |
5409 | 15 | 0x20110000, |
5410 | 15 | 0x20130000, |
5411 | 15 | 0x20138000, |
5412 | 15 | 0x40004000, |
5413 | 15 | }; |
5414 | 15 | |
5415 | 15 | *array_size = ARRAY_SIZE(bad_pages); |
5416 | 15 | igd_id = pci_conf_read32(0, 0, 2, 0, 0); |
5417 | 15 | if ( !IS_SNB_GFX(igd_id) ) |
5418 | 15 | return NULL; |
5419 | 15 | |
5420 | 0 | return bad_pages; |
5421 | 15 | } |
5422 | | |
5423 | | void paging_invlpg(struct vcpu *v, unsigned long va) |
5424 | 0 | { |
5425 | 0 | if ( !is_canonical_address(va) ) |
5426 | 0 | return; |
5427 | 0 |
|
5428 | 0 | if ( paging_mode_enabled(v->domain) && |
5429 | 0 | !paging_get_hostmode(v)->invlpg(v, va) ) |
5430 | 0 | return; |
5431 | 0 |
|
5432 | 0 | if ( is_pv_vcpu(v) ) |
5433 | 0 | flush_tlb_one_local(va); |
5434 | 0 | else |
5435 | 0 | hvm_funcs.invlpg(v, va); |
5436 | 0 | } |
5437 | | |
5438 | | /* Build a 32bit PSE page table using 4MB pages. */ |
5439 | | void write_32bit_pse_identmap(uint32_t *l2) |
5440 | 0 | { |
5441 | 0 | unsigned int i; |
5442 | 0 |
|
5443 | 0 | for ( i = 0; i < PAGE_SIZE / sizeof(*l2); i++ ) |
5444 | 0 | l2[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER | |
5445 | 0 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); |
5446 | 0 | } |
5447 | | |
5448 | | unsigned long get_upper_mfn_bound(void) |
5449 | 0 | { |
5450 | 0 | unsigned long max_mfn; |
5451 | 0 |
|
5452 | 0 | max_mfn = mem_hotplug ? PFN_DOWN(mem_hotplug) : max_page; |
5453 | 0 | #ifndef CONFIG_BIGMEM |
5454 | 0 | max_mfn = min(max_mfn, 1UL << 32); |
5455 | 0 | #endif |
5456 | 0 | return min(max_mfn, 1UL << (paddr_bits - PAGE_SHIFT)) - 1; |
5457 | 0 | } |
5458 | | |
5459 | | /* |
5460 | | * Local variables: |
5461 | | * mode: C |
5462 | | * c-file-style: "BSD" |
5463 | | * c-basic-offset: 4 |
5464 | | * tab-width: 4 |
5465 | | * indent-tabs-mode: nil |
5466 | | * End: |
5467 | | */ |