debuggers.hg
annotate xen/arch/x86/mm.c @ 20938:d311d1efc25e
x86: make max_mfn returned from XENMEM_machphys_mapping dynamic
This helps debugging in the guest kernels, as then MFNs there can then
be range checked based on the reported value.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
This helps debugging in the guest kernels, as then MFNs there can then
be range checked based on the reported value.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Thu Feb 04 08:53:49 2010 +0000 (2010-02-04) |
parents | 91358472d8c4 |
children | a3fa6d444b25 |
rev | line source |
---|---|
kaf24@3757 | 1 /****************************************************************************** |
kaf24@3757 | 2 * arch/x86/mm.c |
kaf24@3757 | 3 * |
kaf24@3757 | 4 * Copyright (c) 2002-2005 K A Fraser |
kaf24@3757 | 5 * Copyright (c) 2004 Christian Limpach |
kaf24@3757 | 6 * |
kaf24@3757 | 7 * This program is free software; you can redistribute it and/or modify |
kaf24@3757 | 8 * it under the terms of the GNU General Public License as published by |
kaf24@3757 | 9 * the Free Software Foundation; either version 2 of the License, or |
kaf24@3757 | 10 * (at your option) any later version. |
kaf24@3757 | 11 * |
kaf24@3757 | 12 * This program is distributed in the hope that it will be useful, |
kaf24@3757 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
kaf24@3757 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
kaf24@3757 | 15 * GNU General Public License for more details. |
kaf24@3757 | 16 * |
kaf24@3757 | 17 * You should have received a copy of the GNU General Public License |
kaf24@3757 | 18 * along with this program; if not, write to the Free Software |
kaf24@3757 | 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
kaf24@3757 | 20 */ |
kaf24@3757 | 21 |
kaf24@3757 | 22 /* |
kaf24@3757 | 23 * A description of the x86 page table API: |
kaf24@3757 | 24 * |
kaf24@3757 | 25 * Domains trap to do_mmu_update with a list of update requests. |
kaf24@3757 | 26 * This is a list of (ptr, val) pairs, where the requested operation |
kaf24@3757 | 27 * is *ptr = val. |
kaf24@3757 | 28 * |
kaf24@3757 | 29 * Reference counting of pages: |
kaf24@3757 | 30 * ---------------------------- |
kaf24@3757 | 31 * Each page has two refcounts: tot_count and type_count. |
kaf24@3757 | 32 * |
kaf24@3757 | 33 * TOT_COUNT is the obvious reference count. It counts all uses of a |
kaf24@3757 | 34 * physical page frame by a domain, including uses as a page directory, |
kaf24@3757 | 35 * a page table, or simple mappings via a PTE. This count prevents a |
kaf24@3757 | 36 * domain from releasing a frame back to the free pool when it still holds |
kaf24@3757 | 37 * a reference to it. |
kaf24@3757 | 38 * |
kaf24@3757 | 39 * TYPE_COUNT is more subtle. A frame can be put to one of three |
kaf24@3757 | 40 * mutually-exclusive uses: it might be used as a page directory, or a |
kaf24@3757 | 41 * page table, or it may be mapped writable by the domain [of course, a |
kaf24@3757 | 42 * frame may not be used in any of these three ways!]. |
kaf24@3757 | 43 * So, type_count is a count of the number of times a frame is being |
kaf24@3757 | 44 * referred to in its current incarnation. Therefore, a page can only |
kaf24@3757 | 45 * change its type when its type count is zero. |
kaf24@3757 | 46 * |
kaf24@3757 | 47 * Pinning the page type: |
kaf24@3757 | 48 * ---------------------- |
kaf24@3757 | 49 * The type of a page can be pinned/unpinned with the commands |
kaf24@3757 | 50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, |
kaf24@3757 | 51 * pinning is not reference counted, so it can't be nested). |
kaf24@3757 | 52 * This is useful to prevent a page's type count falling to zero, at which |
kaf24@3757 | 53 * point safety checks would need to be carried out next time the count |
kaf24@3757 | 54 * is increased again. |
kaf24@3757 | 55 * |
kaf24@3757 | 56 * A further note on writable page mappings: |
kaf24@3757 | 57 * ----------------------------------------- |
kaf24@3757 | 58 * For simplicity, the count of writable mappings for a page may not |
kaf24@3757 | 59 * correspond to reality. The 'writable count' is incremented for every |
kaf24@3757 | 60 * PTE which maps the page with the _PAGE_RW flag set. However, for |
kaf24@3757 | 61 * write access to be possible the page directory entry must also have |
kaf24@3757 | 62 * its _PAGE_RW bit set. We do not check this as it complicates the |
kaf24@3757 | 63 * reference counting considerably [consider the case of multiple |
kaf24@3757 | 64 * directory entries referencing a single page table, some with the RW |
kaf24@3757 | 65 * bit set, others not -- it starts getting a bit messy]. |
kaf24@3757 | 66 * In normal use, this simplification shouldn't be a problem. |
kaf24@3757 | 67 * However, the logic can be added if required. |
kaf24@3757 | 68 * |
kaf24@3757 | 69 * One more note on read-only page mappings: |
kaf24@3757 | 70 * ----------------------------------------- |
kaf24@3757 | 71 * We want domains to be able to map pages for read-only access. The |
kaf24@3757 | 72 * main reason is that page tables and directories should be readable |
kaf24@3757 | 73 * by a domain, but it would not be safe for them to be writable. |
kaf24@3757 | 74 * However, domains have free access to rings 1 & 2 of the Intel |
kaf24@3757 | 75 * privilege model. In terms of page protection, these are considered |
kaf24@3757 | 76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether |
kaf24@3757 | 77 * read-only restrictions are respected in supervisor mode -- if the |
kaf24@3757 | 78 * bit is clear then any mapped page is writable. |
kaf24@3757 | 79 * |
kaf24@3757 | 80 * We get round this by always setting the WP bit and disallowing |
kaf24@3757 | 81 * updates to it. This is very unlikely to cause a problem for guest |
kaf24@3757 | 82 * OS's, which will generally use the WP bit to simplify copy-on-write |
kaf24@3757 | 83 * implementation (in that case, OS wants a fault when it writes to |
kaf24@3757 | 84 * an application-supplied buffer). |
kaf24@3757 | 85 */ |
kaf24@3757 | 86 |
kaf24@3757 | 87 #include <xen/config.h> |
kaf24@3757 | 88 #include <xen/init.h> |
kaf24@3757 | 89 #include <xen/kernel.h> |
kaf24@3757 | 90 #include <xen/lib.h> |
kaf24@3757 | 91 #include <xen/mm.h> |
kaf24@10281 | 92 #include <xen/domain.h> |
kaf24@3757 | 93 #include <xen/sched.h> |
kaf24@3757 | 94 #include <xen/errno.h> |
kaf24@3757 | 95 #include <xen/perfc.h> |
kaf24@3757 | 96 #include <xen/irq.h> |
kaf24@3757 | 97 #include <xen/softirq.h> |
kaf24@5394 | 98 #include <xen/domain_page.h> |
kaf24@6133 | 99 #include <xen/event.h> |
kaf24@8498 | 100 #include <xen/iocap.h> |
kaf24@9054 | 101 #include <xen/guest_access.h> |
Tim@13938 | 102 #include <asm/paging.h> |
kaf24@3757 | 103 #include <asm/shadow.h> |
kaf24@3757 | 104 #include <asm/page.h> |
kaf24@3757 | 105 #include <asm/flushtlb.h> |
kaf24@3757 | 106 #include <asm/io.h> |
kaf24@3757 | 107 #include <asm/ldt.h> |
kaf24@4198 | 108 #include <asm/x86_emulate.h> |
kaf24@11109 | 109 #include <asm/e820.h> |
ack@13298 | 110 #include <asm/hypercall.h> |
kfraser@14478 | 111 #include <asm/shared.h> |
kaf24@8733 | 112 #include <public/memory.h> |
keir@19946 | 113 #include <public/sched.h> |
kfraser@15846 | 114 #include <xsm/xsm.h> |
keir@16142 | 115 #include <xen/trace.h> |
keir@20323 | 116 #include <asm/setup.h> |
keir@20728 | 117 #include <asm/mem_sharing.h> |
kaf24@3757 | 118 |
keir@16926 | 119 /* |
keir@16926 | 120 * Mapping of first 2 or 4 megabytes of memory. This is mapped with 4kB |
keir@16926 | 121 * mappings to avoid type conflicts with fixed-range MTRRs covering the |
keir@16926 | 122 * lowest megabyte of physical memory. In any case the VGA hole should be |
keir@16926 | 123 * mapped with type UC. |
keir@16926 | 124 */ |
keir@16926 | 125 l1_pgentry_t __attribute__ ((__section__ (".bss.page_aligned"))) |
keir@16926 | 126 l1_identmap[L1_PAGETABLE_ENTRIES]; |
keir@16926 | 127 |
kaf24@12063 | 128 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a) |
kaf24@3757 | 129 |
kaf24@4426 | 130 /* |
kfraser@10487 | 131 * PTE updates can be done with ordinary writes except: |
kfraser@10487 | 132 * 1. Debug builds get extra checking by using CMPXCHG[8B]. |
kfraser@10487 | 133 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B. |
kfraser@10487 | 134 */ |
keir@17638 | 135 #if !defined(NDEBUG) || defined(__i386__) |
kfraser@10487 | 136 #define PTE_UPDATE_WITH_CMPXCHG |
kfraser@10487 | 137 #endif |
kfraser@10487 | 138 |
keir@20645 | 139 int mem_hotplug = 0; |
keir@20645 | 140 |
kaf24@3757 | 141 /* Private domain structs for DOMID_XEN and DOMID_IO. */ |
keir@20723 | 142 struct domain *dom_xen, *dom_io, *dom_cow; |
kaf24@3757 | 143 |
keir@20201 | 144 /* Frame table size in pages. */ |
kaf24@3757 | 145 unsigned long max_page; |
kaf24@7249 | 146 unsigned long total_pages; |
kaf24@3757 | 147 |
keir@20275 | 148 unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS( |
keir@20275 | 149 (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1) |
keir@20275 | 150 / PDX_GROUP_COUNT)] = { [0] = 1 }; |
keir@20275 | 151 |
keir@16164 | 152 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) |
keir@16164 | 153 |
keir@18794 | 154 int opt_allow_hugepage; |
keir@18794 | 155 boolean_param("allowhugepage", opt_allow_hugepage); |
keir@18794 | 156 |
keir@16164 | 157 #define l1_disallow_mask(d) \ |
keir@16179 | 158 ((d != dom_io) && \ |
keir@16179 | 159 (rangeset_is_empty((d)->iomem_caps) && \ |
keir@18393 | 160 rangeset_is_empty((d)->arch.ioport_caps) && \ |
keir@18393 | 161 !has_arch_pdevs(d)) ? \ |
keir@16164 | 162 L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS)) |
keir@16164 | 163 |
keir@20341 | 164 #ifdef __x86_64__ |
ack@13295 | 165 l2_pgentry_t *compat_idle_pg_table_l2 = NULL; |
kfraser@15012 | 166 #define l3_disallow_mask(d) (!is_pv_32on64_domain(d) ? \ |
kfraser@15012 | 167 L3_DISALLOW_MASK : \ |
ack@13295 | 168 COMPAT_L3_DISALLOW_MASK) |
ack@13295 | 169 #else |
ack@13295 | 170 #define l3_disallow_mask(d) L3_DISALLOW_MASK |
ack@13295 | 171 #endif |
ack@13295 | 172 |
keir@20275 | 173 static void __init init_frametable_chunk(void *start, void *end) |
kaf24@3757 | 174 { |
keir@20275 | 175 unsigned long s = (unsigned long)start; |
keir@20275 | 176 unsigned long e = (unsigned long)end; |
keir@20275 | 177 unsigned long step, mfn; |
keir@20275 | 178 |
keir@20275 | 179 ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1))); |
keir@20275 | 180 for ( ; s < e; s += step << PAGE_SHIFT ) |
kaf24@3757 | 181 { |
keir@20275 | 182 step = 1UL << (cpu_has_page1gb && |
keir@20275 | 183 !(s & ((1UL << L3_PAGETABLE_SHIFT) - 1)) ? |
keir@20275 | 184 L3_PAGETABLE_SHIFT - PAGE_SHIFT : |
keir@20275 | 185 L2_PAGETABLE_SHIFT - PAGE_SHIFT); |
keir@19669 | 186 /* |
keir@19669 | 187 * The hardcoded 4 below is arbitrary - just pick whatever you think |
keir@19669 | 188 * is reasonable to waste as a trade-off for using a large page. |
keir@19669 | 189 */ |
keir@20275 | 190 while ( step && s + (step << PAGE_SHIFT) > e + (4 << PAGE_SHIFT) ) |
keir@20275 | 191 step >>= PAGETABLE_ORDER; |
keir@20275 | 192 do { |
keir@20275 | 193 mfn = alloc_boot_pages(step, step); |
keir@20275 | 194 } while ( !mfn && (step >>= PAGETABLE_ORDER) ); |
keir@20275 | 195 if ( !mfn ) |
keir@20275 | 196 panic("Not enough memory for frame table"); |
keir@20275 | 197 map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR); |
kaf24@3757 | 198 } |
kaf24@3757 | 199 |
keir@20275 | 200 memset(start, 0, end - start); |
keir@20275 | 201 memset(end, -1, s - (unsigned long)end); |
keir@20275 | 202 } |
keir@20275 | 203 |
keir@20275 | 204 void __init init_frametable(void) |
keir@20275 | 205 { |
keir@20275 | 206 unsigned int sidx, eidx, nidx; |
keir@20275 | 207 unsigned int max_idx = (max_pdx + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT; |
keir@20275 | 208 |
keir@20275 | 209 #ifdef __x86_64__ |
keir@20275 | 210 BUILD_BUG_ON(XEN_VIRT_END > FRAMETABLE_VIRT_END); |
keir@20275 | 211 #endif |
keir@20275 | 212 BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1)); |
keir@20275 | 213 |
keir@20275 | 214 for ( sidx = 0; ; sidx = nidx ) |
keir@20275 | 215 { |
keir@20275 | 216 eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx); |
keir@20275 | 217 nidx = find_next_bit(pdx_group_valid, max_idx, eidx); |
keir@20275 | 218 if ( nidx >= max_idx ) |
keir@20275 | 219 break; |
keir@20275 | 220 init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), |
keir@20275 | 221 pdx_to_page(eidx * PDX_GROUP_COUNT)); |
keir@20275 | 222 } |
keir@20663 | 223 if ( !mem_hotplug ) |
keir@20663 | 224 init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), |
keir@20663 | 225 pdx_to_page(max_pdx - 1) + 1); |
keir@20663 | 226 else |
keir@20663 | 227 { |
keir@20937 | 228 init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), |
keir@20937 | 229 pdx_to_page(max_idx * PDX_GROUP_COUNT - 1) + 1); |
keir@20937 | 230 memset(pdx_to_page(max_pdx), -1, |
keir@20937 | 231 (unsigned long)pdx_to_page(max_idx * PDX_GROUP_COUNT) - |
keir@20937 | 232 (unsigned long)pdx_to_page(max_pdx)); |
keir@20663 | 233 } |
kaf24@3757 | 234 } |
kaf24@3757 | 235 |
keir@15081 | 236 void __init arch_init_memory(void) |
kaf24@3757 | 237 { |
keir@16964 | 238 unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn; |
kaf24@4570 | 239 |
kaf24@3757 | 240 /* |
kaf24@3757 | 241 * Initialise our DOMID_XEN domain. |
kaf24@3757 | 242 * Any Xen-heap pages that we will allow to be mapped will have |
kaf24@3757 | 243 * their domain field set to dom_xen. |
kaf24@3757 | 244 */ |
keir@17922 | 245 dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0); |
kfraser@10280 | 246 BUG_ON(dom_xen == NULL); |
kaf24@3757 | 247 |
kaf24@3757 | 248 /* |
kaf24@3757 | 249 * Initialise our DOMID_IO domain. |
kaf24@8764 | 250 * This domain owns I/O pages that are within the range of the page_info |
kaf24@4570 | 251 * array. Mappings occur at the priv of the caller. |
kaf24@3757 | 252 */ |
keir@17922 | 253 dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0); |
kfraser@10280 | 254 BUG_ON(dom_io == NULL); |
keir@20723 | 255 |
keir@20723 | 256 /* |
keir@20723 | 257 * Initialise our DOMID_IO domain. |
keir@20723 | 258 * This domain owns sharable pages. |
keir@20723 | 259 */ |
keir@20723 | 260 dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0); |
keir@20723 | 261 BUG_ON(dom_cow == NULL); |
kaf24@3757 | 262 |
kaf24@4570 | 263 /* First 1MB of RAM is historically marked as I/O. */ |
kaf24@4570 | 264 for ( i = 0; i < 0x100; i++ ) |
kaf24@9214 | 265 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable); |
kaf24@4570 | 266 |
kaf24@5043 | 267 /* Any areas not specified as RAM by the e820 map are considered I/O. */ |
kfraser@15497 | 268 for ( i = 0, pfn = 0; pfn < max_page; i++ ) |
kaf24@4570 | 269 { |
kfraser@15830 | 270 while ( (i < e820.nr_map) && |
kfraser@15830 | 271 (e820.map[i].type != E820_RAM) && |
kfraser@15830 | 272 (e820.map[i].type != E820_UNUSABLE) ) |
kfraser@15495 | 273 i++; |
kfraser@15495 | 274 |
kfraser@15497 | 275 if ( i >= e820.nr_map ) |
kfraser@15495 | 276 { |
kfraser@15495 | 277 /* No more RAM regions: mark as I/O right to end of memory map. */ |
kfraser@15495 | 278 rstart_pfn = rend_pfn = max_page; |
kfraser@15495 | 279 } |
kfraser@15495 | 280 else |
kfraser@15495 | 281 { |
kfraser@15495 | 282 /* Mark as I/O just up as far as next RAM region. */ |
kfraser@15495 | 283 rstart_pfn = min_t(unsigned long, max_page, |
kfraser@15495 | 284 PFN_UP(e820.map[i].addr)); |
kfraser@15495 | 285 rend_pfn = max_t(unsigned long, rstart_pfn, |
kfraser@15495 | 286 PFN_DOWN(e820.map[i].addr + e820.map[i].size)); |
kfraser@15495 | 287 } |
kfraser@15495 | 288 |
keir@16927 | 289 /* |
keir@16964 | 290 * Make sure any Xen mappings of RAM holes above 1MB are blown away. |
keir@16927 | 291 * In particular this ensures that RAM holes are respected even in |
keir@16964 | 292 * the statically-initialised 1-16MB mapping area. |
keir@16927 | 293 */ |
keir@16964 | 294 iostart_pfn = max_t(unsigned long, pfn, 1UL << (20 - PAGE_SHIFT)); |
keir@16927 | 295 #if defined(CONFIG_X86_32) |
keir@20274 | 296 ioend_pfn = min_t(unsigned long, rstart_pfn, |
keir@16927 | 297 DIRECTMAP_MBYTES << (20 - PAGE_SHIFT)); |
keir@20274 | 298 #else |
keir@20274 | 299 ioend_pfn = min(rstart_pfn, 16UL << (20 - PAGE_SHIFT)); |
keir@16927 | 300 #endif |
keir@16964 | 301 if ( iostart_pfn < ioend_pfn ) |
keir@16964 | 302 destroy_xen_mappings((unsigned long)mfn_to_virt(iostart_pfn), |
keir@16927 | 303 (unsigned long)mfn_to_virt(ioend_pfn)); |
keir@16927 | 304 |
kfraser@15495 | 305 /* Mark as I/O up to next RAM region. */ |
kaf24@8434 | 306 for ( ; pfn < rstart_pfn; pfn++ ) |
kaf24@4570 | 307 { |
keir@20274 | 308 if ( !mfn_valid(pfn) ) |
keir@20274 | 309 continue; |
kaf24@9214 | 310 share_xen_page_with_guest( |
kaf24@9214 | 311 mfn_to_page(pfn), dom_io, XENSHARE_writable); |
kaf24@4570 | 312 } |
kfraser@15495 | 313 |
kaf24@5043 | 314 /* Skip the RAM region. */ |
kaf24@5043 | 315 pfn = rend_pfn; |
kaf24@4570 | 316 } |
kaf24@4570 | 317 |
kaf24@9214 | 318 subarch_init_memory(); |
keir@20728 | 319 |
keir@20728 | 320 mem_sharing_init(); |
kaf24@9214 | 321 } |
kaf24@9214 | 322 |
keir@19295 | 323 int page_is_ram_type(unsigned long mfn, unsigned long mem_type) |
kfraser@11194 | 324 { |
keir@19123 | 325 uint64_t maddr = pfn_to_paddr(mfn); |
kfraser@11194 | 326 int i; |
kfraser@11194 | 327 |
kfraser@11194 | 328 for ( i = 0; i < e820.nr_map; i++ ) |
kfraser@11194 | 329 { |
keir@19295 | 330 switch ( e820.map[i].type ) |
keir@19295 | 331 { |
keir@19295 | 332 case E820_RAM: |
keir@19295 | 333 if ( mem_type & RAM_TYPE_CONVENTIONAL ) |
keir@19295 | 334 break; |
keir@19295 | 335 continue; |
keir@19295 | 336 case E820_RESERVED: |
keir@19295 | 337 if ( mem_type & RAM_TYPE_RESERVED ) |
keir@19295 | 338 break; |
keir@19295 | 339 continue; |
keir@19295 | 340 case E820_UNUSABLE: |
keir@19295 | 341 if ( mem_type & RAM_TYPE_UNUSABLE ) |
keir@19295 | 342 break; |
keir@19295 | 343 continue; |
keir@19295 | 344 case E820_ACPI: |
keir@19295 | 345 case E820_NVS: |
keir@19295 | 346 if ( mem_type & RAM_TYPE_ACPI ) |
keir@19295 | 347 break; |
keir@19295 | 348 continue; |
keir@19295 | 349 default: |
keir@19295 | 350 /* unknown */ |
keir@19295 | 351 continue; |
keir@19295 | 352 } |
keir@19295 | 353 |
keir@19295 | 354 /* Test the range. */ |
keir@19295 | 355 if ( (e820.map[i].addr <= maddr) && |
keir@19123 | 356 ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) ) |
kfraser@11194 | 357 return 1; |
kfraser@11194 | 358 } |
kfraser@11194 | 359 |
kfraser@11194 | 360 return 0; |
kfraser@11194 | 361 } |
kfraser@11194 | 362 |
kfraser@14478 | 363 unsigned long domain_get_maximum_gpfn(struct domain *d) |
kfraser@14478 | 364 { |
keir@14982 | 365 if ( is_hvm_domain(d) ) |
keir@17442 | 366 return d->arch.p2m->max_mapped_pfn; |
keir@14982 | 367 /* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */ |
keir@14982 | 368 return arch_get_max_pfn(d) - 1; |
kfraser@14478 | 369 } |
kfraser@14478 | 370 |
kaf24@9214 | 371 void share_xen_page_with_guest( |
kaf24@9214 | 372 struct page_info *page, struct domain *d, int readonly) |
kaf24@9214 | 373 { |
kaf24@9214 | 374 if ( page_get_owner(page) == d ) |
kaf24@9214 | 375 return; |
kaf24@9214 | 376 |
kaf24@9237 | 377 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); |
kaf24@9237 | 378 |
kaf24@9214 | 379 spin_lock(&d->page_alloc_lock); |
kaf24@9214 | 380 |
kaf24@9214 | 381 /* The incremented type count pins as writable or read-only. */ |
kaf24@9214 | 382 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); |
kaf24@9214 | 383 page->u.inuse.type_info |= PGT_validated | 1; |
kaf24@9214 | 384 |
kaf24@9214 | 385 page_set_owner(page, d); |
kaf24@9214 | 386 wmb(); /* install valid domain ptr before updating refcnt. */ |
keir@19132 | 387 ASSERT((page->count_info & ~PGC_xen_heap) == 0); |
kfraser@14224 | 388 |
kfraser@14224 | 389 /* Only add to the allocation list if the domain isn't dying. */ |
kfraser@14677 | 390 if ( !d->is_dying ) |
kfraser@14224 | 391 { |
kfraser@14224 | 392 page->count_info |= PGC_allocated | 1; |
kfraser@14224 | 393 if ( unlikely(d->xenheap_pages++ == 0) ) |
kfraser@14224 | 394 get_knownalive_domain(d); |
keir@19170 | 395 page_list_add_tail(page, &d->xenpage_list); |
kfraser@14224 | 396 } |
kaf24@9214 | 397 |
kaf24@9214 | 398 spin_unlock(&d->page_alloc_lock); |
kaf24@9214 | 399 } |
kaf24@9214 | 400 |
kaf24@9214 | 401 void share_xen_page_with_privileged_guests( |
kaf24@9214 | 402 struct page_info *page, int readonly) |
kaf24@9214 | 403 { |
kaf24@9214 | 404 share_xen_page_with_guest(page, dom_xen, readonly); |
kaf24@3757 | 405 } |
kaf24@3757 | 406 |
keir@17638 | 407 #if defined(__i386__) |
kaf24@10215 | 408 |
kaf24@10505 | 409 #ifdef NDEBUG |
kaf24@10215 | 410 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */ |
kfraser@10490 | 411 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000) |
kaf24@10215 | 412 #else |
kaf24@10222 | 413 /* |
kaf24@10505 | 414 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths. |
keir@19306 | 415 * We cannot safely shadow the idle page table, nor shadow page tables |
keir@19306 | 416 * (detected by zero reference count). As required for correctness, we |
tdeegan@11189 | 417 * always shadow PDPTs above 4GB. |
kaf24@10222 | 418 */ |
keir@19306 | 419 #define l3tab_needs_shadow(mfn) \ |
keir@19306 | 420 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \ |
keir@19306 | 421 (mfn_to_page(mfn)->count_info & PGC_count_mask) && \ |
keir@19306 | 422 ((mfn) & 1)) || /* odd MFNs are shadowed */ \ |
kfraser@10490 | 423 ((mfn) >= 0x100000)) |
kaf24@10215 | 424 #endif |
kaf24@10215 | 425 |
kaf24@10215 | 426 static l1_pgentry_t *fix_pae_highmem_pl1e; |
kaf24@10215 | 427 |
kaf24@10215 | 428 /* Cache the address of PAE high-memory fixmap page tables. */ |
kaf24@10215 | 429 static int __init cache_pae_fixmap_address(void) |
kaf24@10215 | 430 { |
kaf24@10215 | 431 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0); |
kaf24@10215 | 432 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base); |
kaf24@10215 | 433 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base); |
kaf24@10215 | 434 return 0; |
kaf24@10215 | 435 } |
kaf24@10215 | 436 __initcall(cache_pae_fixmap_address); |
kaf24@10215 | 437 |
tdeegan@11189 | 438 static DEFINE_PER_CPU(u32, make_cr3_timestamp); |
tdeegan@11189 | 439 |
tdeegan@11189 | 440 void make_cr3(struct vcpu *v, unsigned long mfn) |
tdeegan@11189 | 441 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if |
tdeegan@11189 | 442 * necessary, and sets v->arch.cr3 to the value to load in CR3. */ |
kaf24@10211 | 443 { |
kaf24@10215 | 444 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab; |
tdeegan@11189 | 445 struct pae_l3_cache *cache = &v->arch.pae_l3_cache; |
kaf24@10215 | 446 unsigned int cpu = smp_processor_id(); |
kaf24@10215 | 447 |
tdeegan@11189 | 448 /* Fast path: does this mfn need a shadow at all? */ |
kaf24@10215 | 449 if ( !l3tab_needs_shadow(mfn) ) |
kaf24@10211 | 450 { |
tdeegan@11189 | 451 v->arch.cr3 = mfn << PAGE_SHIFT; |
tdeegan@11189 | 452 /* Cache is no longer in use or valid */ |
kaf24@10505 | 453 cache->high_mfn = 0; |
kaf24@10215 | 454 return; |
kaf24@10215 | 455 } |
kaf24@10215 | 456 |
kaf24@10215 | 457 /* Caching logic is not interrupt safe. */ |
kaf24@10215 | 458 ASSERT(!in_irq()); |
kaf24@10215 | 459 |
kaf24@10215 | 460 /* Protects against pae_flush_pgd(). */ |
kaf24@10215 | 461 spin_lock(&cache->lock); |
kaf24@10215 | 462 |
kaf24@10215 | 463 cache->inuse_idx ^= 1; |
kaf24@10215 | 464 cache->high_mfn = mfn; |
kaf24@10215 | 465 |
kaf24@10215 | 466 /* Map the guest L3 table and copy to the chosen low-memory cache. */ |
kfraser@12825 | 467 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); |
tdeegan@11189 | 468 /* First check the previous high mapping can't be in the TLB. |
tdeegan@11189 | 469 * (i.e. have we loaded CR3 since we last did this?) */ |
tdeegan@11189 | 470 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) ) |
keir@16155 | 471 flush_tlb_one_local(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu)); |
kaf24@10215 | 472 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); |
kaf24@10215 | 473 lowmem_l3tab = cache->table[cache->inuse_idx]; |
kaf24@10215 | 474 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0])); |
kfraser@12825 | 475 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty()); |
tdeegan@11189 | 476 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time); |
tdeegan@11189 | 477 |
tdeegan@11189 | 478 v->arch.cr3 = __pa(lowmem_l3tab); |
kaf24@10215 | 479 |
kaf24@10215 | 480 spin_unlock(&cache->lock); |
kaf24@10215 | 481 } |
kaf24@10215 | 482 |
keir@17638 | 483 #else /* !defined(__i386__) */ |
kaf24@10215 | 484 |
tdeegan@11189 | 485 void make_cr3(struct vcpu *v, unsigned long mfn) |
kaf24@10215 | 486 { |
tdeegan@11189 | 487 v->arch.cr3 = mfn << PAGE_SHIFT; |
kaf24@10211 | 488 } |
kaf24@10211 | 489 |
keir@17638 | 490 #endif /* !defined(__i386__) */ |
kaf24@10215 | 491 |
kaf24@5327 | 492 void write_ptbase(struct vcpu *v) |
kaf24@3757 | 493 { |
tdeegan@11189 | 494 write_cr3(v->arch.cr3); |
kaf24@3757 | 495 } |
kaf24@3757 | 496 |
kfraser@15738 | 497 /* |
kfraser@15738 | 498 * Should be called after CR3 is updated. |
Tim@13143 | 499 * |
Tim@13143 | 500 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and |
Tim@13143 | 501 * for HVM guests, arch.monitor_table and hvm's guest CR3. |
Tim@13143 | 502 * |
Tim@13143 | 503 * Update ref counts to shadow tables appropriately. |
Tim@13143 | 504 */ |
Tim@13143 | 505 void update_cr3(struct vcpu *v) |
Tim@13143 | 506 { |
Tim@13143 | 507 unsigned long cr3_mfn=0; |
Tim@13143 | 508 |
Tim@13938 | 509 if ( paging_mode_enabled(v->domain) ) |
Tim@13143 | 510 { |
Tim@13938 | 511 paging_update_cr3(v); |
Tim@13143 | 512 return; |
Tim@13143 | 513 } |
Tim@13143 | 514 |
Tim@13143 | 515 #if CONFIG_PAGING_LEVELS == 4 |
Tim@13143 | 516 if ( !(v->arch.flags & TF_kernel_mode) ) |
Tim@13143 | 517 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); |
Tim@13143 | 518 else |
Tim@13143 | 519 #endif |
Tim@13143 | 520 cr3_mfn = pagetable_get_pfn(v->arch.guest_table); |
Tim@13143 | 521 |
Tim@13143 | 522 make_cr3(v, cr3_mfn); |
Tim@13143 | 523 } |
Tim@13143 | 524 |
Tim@13143 | 525 |
keir@19199 | 526 static void invalidate_shadow_ldt(struct vcpu *v, int flush) |
kaf24@3757 | 527 { |
kaf24@3757 | 528 int i; |
kaf24@3757 | 529 unsigned long pfn; |
kaf24@8764 | 530 struct page_info *page; |
keir@19198 | 531 |
keir@19199 | 532 BUG_ON(unlikely(in_irq())); |
keir@19199 | 533 |
keir@19199 | 534 spin_lock(&v->arch.shadow_ldt_lock); |
keir@19199 | 535 |
kaf24@5327 | 536 if ( v->arch.shadow_ldt_mapcnt == 0 ) |
keir@19199 | 537 goto out; |
kaf24@4426 | 538 |
kaf24@5327 | 539 v->arch.shadow_ldt_mapcnt = 0; |
kaf24@3757 | 540 |
kaf24@3757 | 541 for ( i = 16; i < 32; i++ ) |
kaf24@3757 | 542 { |
kaf24@5327 | 543 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]); |
kaf24@3757 | 544 if ( pfn == 0 ) continue; |
kfraser@12825 | 545 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty()); |
kaf24@8764 | 546 page = mfn_to_page(pfn); |
keir@17425 | 547 ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page); |
kaf24@5327 | 548 ASSERT_PAGE_IS_DOMAIN(page, v->domain); |
kaf24@3757 | 549 put_page_and_type(page); |
kaf24@3757 | 550 } |
kaf24@3757 | 551 |
keir@19199 | 552 /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ |
keir@19199 | 553 if ( flush ) |
keir@19689 | 554 flush_tlb_mask(&v->vcpu_dirty_cpumask); |
keir@19199 | 555 |
keir@19199 | 556 out: |
keir@19199 | 557 spin_unlock(&v->arch.shadow_ldt_lock); |
kaf24@3757 | 558 } |
kaf24@3757 | 559 |
kaf24@3757 | 560 |
kaf24@8764 | 561 static int alloc_segdesc_page(struct page_info *page) |
kaf24@3757 | 562 { |
kaf24@3757 | 563 struct desc_struct *descs; |
kaf24@3757 | 564 int i; |
kaf24@3757 | 565 |
keir@20277 | 566 descs = __map_domain_page(page); |
kaf24@3757 | 567 |
kaf24@3757 | 568 for ( i = 0; i < 512; i++ ) |
ack@13290 | 569 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) ) |
kaf24@3757 | 570 goto fail; |
kaf24@3757 | 571 |
kaf24@5394 | 572 unmap_domain_page(descs); |
keir@18450 | 573 return 0; |
kaf24@3757 | 574 |
kaf24@3757 | 575 fail: |
kaf24@5394 | 576 unmap_domain_page(descs); |
keir@18450 | 577 return -EINVAL; |
kaf24@3757 | 578 } |
kaf24@3757 | 579 |
kaf24@3757 | 580 |
kaf24@3757 | 581 /* Map shadow page at offset @off. */ |
kaf24@3757 | 582 int map_ldt_shadow_page(unsigned int off) |
kaf24@3757 | 583 { |
kaf24@5327 | 584 struct vcpu *v = current; |
kaf24@5327 | 585 struct domain *d = v->domain; |
kaf24@8764 | 586 unsigned long gmfn, mfn; |
mafetter@4629 | 587 l1_pgentry_t l1e, nl1e; |
kaf24@5589 | 588 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT); |
tim@11687 | 589 int okay; |
kaf24@4176 | 590 |
kaf24@4198 | 591 BUG_ON(unlikely(in_irq())); |
kaf24@3757 | 592 |
tim@11687 | 593 guest_get_eff_kern_l1e(v, gva, &l1e); |
mafetter@4629 | 594 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) ) |
mafetter@4179 | 595 return 0; |
mafetter@4179 | 596 |
kaf24@8764 | 597 gmfn = l1e_get_pfn(l1e); |
kaf24@8764 | 598 mfn = gmfn_to_mfn(d, gmfn); |
kfraser@12606 | 599 if ( unlikely(!mfn_valid(mfn)) ) |
kaf24@3757 | 600 return 0; |
kaf24@3757 | 601 |
keir@17425 | 602 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page); |
tim@11687 | 603 if ( unlikely(!okay) ) |
mafetter@4179 | 604 return 0; |
mafetter@4179 | 605 |
kaf24@8764 | 606 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); |
mafetter@4629 | 607 |
keir@19199 | 608 spin_lock(&v->arch.shadow_ldt_lock); |
kfraser@12825 | 609 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e); |
kaf24@5327 | 610 v->arch.shadow_ldt_mapcnt++; |
keir@19199 | 611 spin_unlock(&v->arch.shadow_ldt_lock); |
kaf24@3757 | 612 |
kaf24@3757 | 613 return 1; |
kaf24@3757 | 614 } |
kaf24@3757 | 615 |
kaf24@3757 | 616 |
kaf24@3757 | 617 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d) |
kaf24@3757 | 618 { |
kaf24@8764 | 619 struct page_info *page = mfn_to_page(page_nr); |
kaf24@8764 | 620 |
kaf24@8764 | 621 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) ) |
kaf24@3757 | 622 { |
kaf24@4692 | 623 MEM_LOG("Could not get page ref for pfn %lx", page_nr); |
kaf24@3757 | 624 return 0; |
kaf24@3757 | 625 } |
kaf24@3757 | 626 |
kaf24@3757 | 627 return 1; |
kaf24@3757 | 628 } |
kaf24@3757 | 629 |
kaf24@3757 | 630 |
kaf24@3757 | 631 static int get_page_and_type_from_pagenr(unsigned long page_nr, |
kaf24@6077 | 632 unsigned long type, |
keir@18450 | 633 struct domain *d, |
keir@18780 | 634 int partial, |
keir@18450 | 635 int preemptible) |
kaf24@3757 | 636 { |
kaf24@8764 | 637 struct page_info *page = mfn_to_page(page_nr); |
keir@18450 | 638 int rc; |
kaf24@3757 | 639 |
keir@18780 | 640 if ( likely(partial >= 0) && |
keir@18780 | 641 unlikely(!get_page_from_pagenr(page_nr, d)) ) |
keir@18450 | 642 return -EINVAL; |
keir@18450 | 643 |
keir@18450 | 644 rc = (preemptible ? |
keir@18450 | 645 get_page_type_preemptible(page, type) : |
keir@18450 | 646 (get_page_type(page, type) ? 0 : -EINVAL)); |
keir@18450 | 647 |
keir@18780 | 648 if ( unlikely(rc) && partial >= 0 ) |
kaf24@3757 | 649 put_page(page); |
keir@18450 | 650 |
keir@18450 | 651 return rc; |
kaf24@3757 | 652 } |
kaf24@3757 | 653 |
keir@18794 | 654 static int get_data_page( |
keir@18794 | 655 struct page_info *page, struct domain *d, int writeable) |
keir@18794 | 656 { |
keir@18794 | 657 int rc; |
keir@18794 | 658 |
keir@18794 | 659 if ( writeable ) |
keir@18794 | 660 rc = get_page_and_type(page, d, PGT_writable_page); |
keir@18794 | 661 else |
keir@18794 | 662 rc = get_page(page, d); |
keir@18794 | 663 |
keir@18794 | 664 return rc; |
keir@18794 | 665 } |
keir@18794 | 666 |
keir@18794 | 667 static void put_data_page( |
keir@18794 | 668 struct page_info *page, int writeable) |
keir@18794 | 669 { |
keir@18794 | 670 if ( writeable ) |
keir@18794 | 671 put_page_and_type(page); |
keir@18794 | 672 else |
keir@18794 | 673 put_page(page); |
keir@18794 | 674 } |
keir@18794 | 675 |
kaf24@3757 | 676 /* |
kaf24@3782 | 677 * We allow root tables to map each other (a.k.a. linear page tables). It |
kaf24@3782 | 678 * needs some special care with reference counts and access permissions: |
kaf24@3757 | 679 * 1. The mapping entry must be read-only, or the guest may get write access |
kaf24@3757 | 680 * to its own PTEs. |
kaf24@3757 | 681 * 2. We must only bump the reference counts for an *already validated* |
kaf24@3757 | 682 * L2 table, or we can end up in a deadlock in get_page_type() by waiting |
kaf24@3757 | 683 * on a validation that is required to complete that validation. |
kaf24@3757 | 684 * 3. We only need to increment the reference counts for the mapped page |
kaf24@3782 | 685 * frame if it is mapped by a different root table. This is sufficient and |
kaf24@3782 | 686 * also necessary to allow validation of a root table mapping itself. |
kaf24@3757 | 687 */ |
kfraser@14392 | 688 #define define_get_linear_pagetable(level) \ |
kfraser@14391 | 689 static int \ |
kfraser@14392 | 690 get_##level##_linear_pagetable( \ |
kfraser@14392 | 691 level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d) \ |
kfraser@14391 | 692 { \ |
kfraser@14391 | 693 unsigned long x, y; \ |
kfraser@14391 | 694 struct page_info *page; \ |
kfraser@14391 | 695 unsigned long pfn; \ |
kfraser@14391 | 696 \ |
kfraser@14392 | 697 if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ |
kfraser@14391 | 698 { \ |
kfraser@14391 | 699 MEM_LOG("Attempt to create linear p.t. with write perms"); \ |
kfraser@14391 | 700 return 0; \ |
kfraser@14391 | 701 } \ |
kfraser@14391 | 702 \ |
kfraser@14392 | 703 if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ |
kfraser@14391 | 704 { \ |
kfraser@14391 | 705 /* Make sure the mapped frame belongs to the correct domain. */ \ |
kfraser@14391 | 706 if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ |
kfraser@14391 | 707 return 0; \ |
kfraser@14391 | 708 \ |
kfraser@14391 | 709 /* \ |
kfraser@14391 | 710 * Ensure that the mapped frame is an already-validated page table. \ |
kfraser@14391 | 711 * If so, atomically increment the count (checking for overflow). \ |
kfraser@14391 | 712 */ \ |
kfraser@14391 | 713 page = mfn_to_page(pfn); \ |
kfraser@14391 | 714 y = page->u.inuse.type_info; \ |
kfraser@14391 | 715 do { \ |
kfraser@14391 | 716 x = y; \ |
kfraser@14391 | 717 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || \ |
kfraser@14391 | 718 unlikely((x & (PGT_type_mask|PGT_validated)) != \ |
kfraser@14392 | 719 (PGT_##level##_page_table|PGT_validated)) ) \ |
kfraser@14391 | 720 { \ |
kfraser@14391 | 721 put_page(page); \ |
kfraser@14391 | 722 return 0; \ |
kfraser@14391 | 723 } \ |
kfraser@14391 | 724 } \ |
kfraser@14391 | 725 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); \ |
kfraser@14391 | 726 } \ |
kfraser@14391 | 727 \ |
kfraser@14391 | 728 return 1; \ |
kaf24@3757 | 729 } |
kaf24@3757 | 730 |
keir@16105 | 731 |
keir@16369 | 732 int is_iomem_page(unsigned long mfn) |
keir@16105 | 733 { |
keir@19306 | 734 struct page_info *page; |
keir@19306 | 735 |
keir@19306 | 736 if ( !mfn_valid(mfn) ) |
keir@19306 | 737 return 1; |
keir@19306 | 738 |
keir@19306 | 739 /* Caller must know that it is an iomem page, or a reference is held. */ |
keir@19306 | 740 page = mfn_to_page(mfn); |
keir@19306 | 741 ASSERT((page->count_info & PGC_count_mask) != 0); |
keir@19306 | 742 |
keir@19306 | 743 return (page_get_owner(page) == dom_io); |
keir@16105 | 744 } |
keir@16105 | 745 |
keir@19670 | 746 static void update_xen_mappings(unsigned long mfn, unsigned long cacheattr) |
keir@19670 | 747 { |
keir@19670 | 748 #ifdef __x86_64__ |
keir@19670 | 749 bool_t alias = mfn >= PFN_DOWN(xen_phys_start) && |
keir@19670 | 750 mfn < PFN_UP(xen_phys_start + (unsigned long)_end - XEN_VIRT_START); |
keir@19670 | 751 unsigned long xen_va = |
keir@19670 | 752 XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT); |
keir@19670 | 753 |
keir@19670 | 754 if ( unlikely(alias) && cacheattr ) |
keir@19670 | 755 map_pages_to_xen(xen_va, mfn, 1, 0); |
keir@19670 | 756 map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, 1, |
keir@19670 | 757 PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr)); |
keir@19670 | 758 if ( unlikely(alias) && !cacheattr ) |
keir@19670 | 759 map_pages_to_xen(xen_va, mfn, 1, PAGE_HYPERVISOR); |
keir@19670 | 760 #endif |
keir@19670 | 761 } |
keir@19670 | 762 |
mafetter@4179 | 763 int |
kaf24@3757 | 764 get_page_from_l1e( |
keir@19746 | 765 l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner) |
kaf24@3757 | 766 { |
mafetter@4629 | 767 unsigned long mfn = l1e_get_pfn(l1e); |
kaf24@8764 | 768 struct page_info *page = mfn_to_page(mfn); |
keir@16369 | 769 uint32_t l1f = l1e_get_flags(l1e); |
keir@16543 | 770 struct vcpu *curr = current; |
keir@19746 | 771 struct domain *real_pg_owner; |
kaf24@3757 | 772 |
keir@16369 | 773 if ( !(l1f & _PAGE_PRESENT) ) |
kaf24@3757 | 774 return 1; |
kaf24@3757 | 775 |
keir@19746 | 776 if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) ) |
kaf24@3757 | 777 { |
keir@19746 | 778 MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(l1e_owner)); |
kaf24@3757 | 779 return 0; |
kaf24@3757 | 780 } |
kaf24@3757 | 781 |
keir@19306 | 782 if ( !mfn_valid(mfn) || |
keir@19746 | 783 (real_pg_owner = page_get_owner_and_reference(page)) == dom_io ) |
kaf24@3757 | 784 { |
keir@19306 | 785 /* Only needed the reference to confirm dom_io ownership. */ |
keir@19306 | 786 if ( mfn_valid(mfn) ) |
keir@19306 | 787 put_page(page); |
keir@19306 | 788 |
kaf24@4570 | 789 /* DOMID_IO reverts to caller for privilege checks. */ |
keir@19746 | 790 if ( pg_owner == dom_io ) |
keir@19746 | 791 pg_owner = curr->domain; |
keir@19746 | 792 |
keir@19746 | 793 if ( !iomem_access_permitted(pg_owner, mfn, mfn) ) |
kaf24@4570 | 794 { |
kaf24@12281 | 795 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */ |
kfraser@12258 | 796 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx", |
keir@19746 | 797 pg_owner->domain_id, mfn); |
kaf24@4570 | 798 return 0; |
kaf24@4570 | 799 } |
kaf24@4570 | 800 |
keir@16440 | 801 return 1; |
kaf24@3757 | 802 } |
kaf24@3757 | 803 |
keir@19746 | 804 if ( unlikely(real_pg_owner != pg_owner) ) |
keir@19746 | 805 { |
keir@19746 | 806 /* |
keir@19746 | 807 * Let privileged domains transfer the right to map their target |
keir@19746 | 808 * domain's pages. This is used to allow stub-domain pvfb export to |
keir@19746 | 809 * dom0, until pvfb supports granted mappings. At that time this |
keir@19746 | 810 * minor hack can go away. |
keir@19746 | 811 */ |
keir@20415 | 812 if ( (real_pg_owner == NULL) || (pg_owner == l1e_owner) || |
keir@20415 | 813 !IS_PRIV_FOR(pg_owner, real_pg_owner) ) |
keir@19746 | 814 goto could_not_pin; |
keir@19746 | 815 pg_owner = real_pg_owner; |
keir@19746 | 816 } |
keir@17277 | 817 |
kaf24@11298 | 818 /* Foreign mappings into guests in shadow external mode don't |
tdeegan@11189 | 819 * contribute to writeable mapping refcounts. (This allows the |
tdeegan@11189 | 820 * qemu-dm helper process in dom0 to map the domain's memory without |
tdeegan@11189 | 821 * messing up the count of "real" writable mappings.) */ |
keir@19306 | 822 if ( (l1f & _PAGE_RW) && |
keir@19746 | 823 ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) && |
keir@19306 | 824 !get_page_type(page, PGT_writable_page) ) |
keir@19306 | 825 goto could_not_pin; |
keir@19306 | 826 |
keir@19306 | 827 if ( pte_flags_to_cacheattr(l1f) != |
keir@19642 | 828 ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) ) |
keir@16369 | 829 { |
keir@19127 | 830 unsigned long x, nx, y = page->count_info; |
keir@19127 | 831 unsigned long cacheattr = pte_flags_to_cacheattr(l1f); |
keir@16369 | 832 |
keir@16376 | 833 if ( is_xen_heap_page(page) ) |
keir@16369 | 834 { |
keir@16369 | 835 if ( (l1f & _PAGE_RW) && |
keir@19746 | 836 ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) ) |
keir@16369 | 837 put_page_type(page); |
keir@16369 | 838 put_page(page); |
keir@16369 | 839 MEM_LOG("Attempt to change cache attributes of Xen heap page"); |
keir@16369 | 840 return 0; |
keir@16369 | 841 } |
keir@16369 | 842 |
keir@19642 | 843 while ( ((y & PGC_cacheattr_mask) >> PGC_cacheattr_base) != cacheattr ) |
keir@16369 | 844 { |
keir@16369 | 845 x = y; |
keir@16369 | 846 nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base); |
keir@16369 | 847 y = cmpxchg(&page->count_info, x, nx); |
keir@16369 | 848 } |
keir@16369 | 849 |
keir@19670 | 850 update_xen_mappings(mfn, cacheattr); |
keir@16369 | 851 } |
kaf24@7434 | 852 |
keir@19306 | 853 return 1; |
keir@19306 | 854 |
keir@19306 | 855 could_not_pin: |
keir@19306 | 856 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte |
keir@19746 | 857 " for l1e_owner=%d, pg_owner=%d", |
keir@19306 | 858 mfn, get_gpfn_from_mfn(mfn), |
keir@19746 | 859 l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id); |
keir@19746 | 860 if ( real_pg_owner != NULL ) |
keir@19306 | 861 put_page(page); |
keir@19306 | 862 return 0; |
kaf24@3757 | 863 } |
kaf24@3757 | 864 |
kaf24@3757 | 865 |
kaf24@3757 | 866 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */ |
kfraser@14392 | 867 define_get_linear_pagetable(l2); |
kfraser@14392 | 868 static int |
kaf24@3757 | 869 get_page_from_l2e( |
kfraser@11522 | 870 l2_pgentry_t l2e, unsigned long pfn, struct domain *d) |
kaf24@3757 | 871 { |
keir@18794 | 872 unsigned long mfn = l2e_get_pfn(l2e); |
kaf24@3757 | 873 int rc; |
kaf24@3757 | 874 |
mafetter@4629 | 875 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) |
kaf24@3757 | 876 return 1; |
kaf24@3757 | 877 |
mafetter@4629 | 878 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) |
kaf24@3757 | 879 { |
kaf24@6368 | 880 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); |
keir@18450 | 881 return -EINVAL; |
kaf24@3757 | 882 } |
kaf24@3757 | 883 |
keir@18794 | 884 if ( !(l2e_get_flags(l2e) & _PAGE_PSE) ) |
keir@18794 | 885 { |
keir@18794 | 886 rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0); |
keir@18794 | 887 if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) |
keir@18794 | 888 rc = 0; |
keir@18794 | 889 } |
keir@18794 | 890 else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) ) |
keir@18794 | 891 { |
keir@18794 | 892 rc = -EINVAL; |
keir@18794 | 893 } |
keir@18794 | 894 else |
keir@18794 | 895 { |
keir@18794 | 896 unsigned long m = mfn; |
keir@18794 | 897 int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW); |
keir@18794 | 898 |
keir@18794 | 899 do { |
keir@20227 | 900 if ( !mfn_valid(m) || |
keir@20227 | 901 !get_data_page(mfn_to_page(m), d, writeable) ) |
keir@18794 | 902 { |
keir@18794 | 903 while ( m-- > mfn ) |
keir@18794 | 904 put_data_page(mfn_to_page(m), writeable); |
keir@18794 | 905 return -EINVAL; |
keir@18794 | 906 } |
keir@18794 | 907 } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); |
keir@20227 | 908 |
keir@20227 | 909 rc = 1; |
keir@18794 | 910 } |
kfraser@14391 | 911 |
kaf24@3782 | 912 return rc; |
kaf24@3782 | 913 } |
kaf24@3782 | 914 |
kaf24@3782 | 915 |
kfraser@14392 | 916 define_get_linear_pagetable(l3); |
kfraser@14392 | 917 static int |
kaf24@3782 | 918 get_page_from_l3e( |
keir@18780 | 919 l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible) |
kaf24@3782 | 920 { |
kaf24@5275 | 921 int rc; |
kaf24@5275 | 922 |
mafetter@4629 | 923 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
kaf24@3782 | 924 return 1; |
kaf24@3782 | 925 |
ack@13295 | 926 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) ) |
kaf24@3782 | 927 { |
ack@13295 | 928 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d)); |
keir@18450 | 929 return -EINVAL; |
kaf24@3782 | 930 } |
kaf24@3782 | 931 |
keir@18450 | 932 rc = get_page_and_type_from_pagenr( |
keir@18780 | 933 l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible); |
keir@18458 | 934 if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) |
keir@18458 | 935 rc = 0; |
kfraser@14392 | 936 |
kaf24@5275 | 937 return rc; |
kaf24@3782 | 938 } |
kaf24@5275 | 939 |
kaf24@5275 | 940 #if CONFIG_PAGING_LEVELS >= 4 |
kfraser@14392 | 941 define_get_linear_pagetable(l4); |
kfraser@14392 | 942 static int |
kaf24@3782 | 943 get_page_from_l4e( |
keir@18780 | 944 l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible) |
kaf24@3782 | 945 { |
kaf24@3782 | 946 int rc; |
kaf24@3782 | 947 |
mafetter@4629 | 948 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) |
kaf24@3782 | 949 return 1; |
kaf24@3782 | 950 |
mafetter@4629 | 951 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) |
kaf24@3782 | 952 { |
kaf24@6368 | 953 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK); |
keir@18450 | 954 return -EINVAL; |
kaf24@3782 | 955 } |
kaf24@3782 | 956 |
keir@18450 | 957 rc = get_page_and_type_from_pagenr( |
keir@18780 | 958 l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible); |
keir@18458 | 959 if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) |
keir@18458 | 960 rc = 0; |
mafetter@7730 | 961 |
mafetter@7730 | 962 return rc; |
kaf24@3757 | 963 } |
kaf24@5275 | 964 #endif /* 4 level */ |
kaf24@3782 | 965 |
kfraser@11264 | 966 #ifdef __x86_64__ |
kfraser@11567 | 967 |
kfraser@11567 | 968 #ifdef USER_MAPPINGS_ARE_GLOBAL |
ack@13295 | 969 #define adjust_guest_l1e(pl1e, d) \ |
kfraser@11567 | 970 do { \ |
ack@13295 | 971 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ |
kfraser@15012 | 972 likely(!is_pv_32on64_domain(d)) ) \ |
kfraser@11567 | 973 { \ |
kfraser@11567 | 974 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \ |
kfraser@11567 | 975 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \ |
kfraser@11567 | 976 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \ |
kfraser@11567 | 977 MEM_LOG("Global bit is set to kernel page %lx", \ |
kfraser@11567 | 978 l1e_get_pfn((pl1e))); \ |
kfraser@11567 | 979 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \ |
kfraser@11567 | 980 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \ |
kfraser@11567 | 981 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \ |
kfraser@11567 | 982 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \ |
kfraser@11567 | 983 } \ |
kfraser@11567 | 984 } while ( 0 ) |
kfraser@11567 | 985 #else |
ack@13295 | 986 #define adjust_guest_l1e(pl1e, d) \ |
kfraser@11567 | 987 do { \ |
ack@13295 | 988 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ |
kfraser@15012 | 989 likely(!is_pv_32on64_domain(d)) ) \ |
kfraser@11264 | 990 l1e_add_flags((pl1e), _PAGE_USER); \ |
kfraser@11264 | 991 } while ( 0 ) |
kfraser@11567 | 992 #endif |
kfraser@11264 | 993 |
ack@13295 | 994 #define adjust_guest_l2e(pl2e, d) \ |
kfraser@11264 | 995 do { \ |
ack@13295 | 996 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \ |
kfraser@15012 | 997 likely(!is_pv_32on64_domain(d)) ) \ |
kfraser@11264 | 998 l2e_add_flags((pl2e), _PAGE_USER); \ |
kfraser@11264 | 999 } while ( 0 ) |
kfraser@11264 | 1000 |
kfraser@15012 | 1001 #define adjust_guest_l3e(pl3e, d) \ |
kfraser@15012 | 1002 do { \ |
kfraser@15012 | 1003 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ |
kfraser@15012 | 1004 l3e_add_flags((pl3e), likely(!is_pv_32on64_domain(d)) ? \ |
kfraser@15012 | 1005 _PAGE_USER : \ |
kfraser@15012 | 1006 _PAGE_USER|_PAGE_RW); \ |
kfraser@11264 | 1007 } while ( 0 ) |
kfraser@11264 | 1008 |
ack@13295 | 1009 #define adjust_guest_l4e(pl4e, d) \ |
kfraser@11264 | 1010 do { \ |
ack@13295 | 1011 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \ |
kfraser@15012 | 1012 likely(!is_pv_32on64_domain(d)) ) \ |
kfraser@11264 | 1013 l4e_add_flags((pl4e), _PAGE_USER); \ |
kfraser@11264 | 1014 } while ( 0 ) |
kfraser@11567 | 1015 |
kfraser@11567 | 1016 #else /* !defined(__x86_64__) */ |
kfraser@11567 | 1017 |
ack@13295 | 1018 #define adjust_guest_l1e(_p, _d) ((void)(_d)) |
ack@13295 | 1019 #define adjust_guest_l2e(_p, _d) ((void)(_d)) |
ack@13295 | 1020 #define adjust_guest_l3e(_p, _d) ((void)(_d)) |
ack@13295 | 1021 |
ack@13295 | 1022 #endif |
ack@13295 | 1023 |
keir@20341 | 1024 #ifdef __x86_64__ |
kfraser@15012 | 1025 #define unadjust_guest_l3e(pl3e, d) \ |
kfraser@15012 | 1026 do { \ |
kfraser@15012 | 1027 if ( unlikely(is_pv_32on64_domain(d)) && \ |
kfraser@15012 | 1028 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ |
kfraser@15012 | 1029 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ |
ack@13295 | 1030 } while ( 0 ) |
ack@13295 | 1031 #else |
ack@13295 | 1032 #define unadjust_guest_l3e(_p, _d) ((void)(_d)) |
kfraser@11264 | 1033 #endif |
kaf24@3757 | 1034 |
keir@19746 | 1035 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) |
kaf24@3757 | 1036 { |
keir@16440 | 1037 unsigned long pfn = l1e_get_pfn(l1e); |
keir@16440 | 1038 struct page_info *page; |
keir@19746 | 1039 struct domain *pg_owner; |
keir@16440 | 1040 struct vcpu *v; |
keir@16440 | 1041 |
keir@16440 | 1042 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || is_iomem_page(pfn) ) |
kaf24@3757 | 1043 return; |
kaf24@3757 | 1044 |
keir@16440 | 1045 page = mfn_to_page(pfn); |
keir@19746 | 1046 pg_owner = page_get_owner(page); |
kaf24@8006 | 1047 |
kaf24@8006 | 1048 /* |
kaf24@8006 | 1049 * Check if this is a mapping that was established via a grant reference. |
kaf24@8006 | 1050 * If it was then we should not be here: we require that such mappings are |
kaf24@8006 | 1051 * explicitly destroyed via the grant-table interface. |
kaf24@8006 | 1052 * |
kaf24@8006 | 1053 * The upshot of this is that the guest can end up with active grants that |
kaf24@8006 | 1054 * it cannot destroy (because it no longer has a PTE to present to the |
kaf24@8006 | 1055 * grant-table interface). This can lead to subtle hard-to-catch bugs, |
kaf24@8006 | 1056 * hence a special grant PTE flag can be enabled to catch the bug early. |
kaf24@8006 | 1057 * |
kaf24@8006 | 1058 * (Note that the undestroyable active grants are not a security hole in |
kaf24@8006 | 1059 * Xen. All active grants can safely be cleaned up when the domain dies.) |
kaf24@8006 | 1060 */ |
kfraser@14739 | 1061 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && |
keir@19746 | 1062 !l1e_owner->is_shutting_down && !l1e_owner->is_dying ) |
kaf24@3757 | 1063 { |
kaf24@8006 | 1064 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte, |
kaf24@8006 | 1065 l1e_get_intpte(l1e)); |
keir@19746 | 1066 domain_crash(l1e_owner); |
kaf24@3757 | 1067 } |
kaf24@3757 | 1068 |
tdeegan@11189 | 1069 /* Remember we didn't take a type-count of foreign writable mappings |
Tim@13938 | 1070 * to paging-external domains */ |
tdeegan@11189 | 1071 if ( (l1e_get_flags(l1e) & _PAGE_RW) && |
keir@19746 | 1072 ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) ) |
kaf24@3757 | 1073 { |
kaf24@3757 | 1074 put_page_and_type(page); |
kaf24@3757 | 1075 } |
kaf24@3757 | 1076 else |
kaf24@3757 | 1077 { |
kaf24@3757 | 1078 /* We expect this is rare so we blow the entire shadow LDT. */ |
kaf24@3757 | 1079 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == |
keir@17425 | 1080 PGT_seg_desc_page)) && |
kaf24@7430 | 1081 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) && |
keir@19746 | 1082 (l1e_owner == pg_owner) ) |
kaf24@7430 | 1083 { |
keir@19746 | 1084 for_each_vcpu ( pg_owner, v ) |
keir@19199 | 1085 invalidate_shadow_ldt(v, 1); |
kaf24@7430 | 1086 } |
kaf24@3757 | 1087 put_page(page); |
kaf24@3757 | 1088 } |
kaf24@3757 | 1089 } |
kaf24@3757 | 1090 |
kaf24@3757 | 1091 |
kaf24@3757 | 1092 /* |
kaf24@3757 | 1093 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. |
kaf24@3757 | 1094 * Note also that this automatically deals correctly with linear p.t.'s. |
kaf24@3757 | 1095 */ |
keir@18450 | 1096 static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) |
kaf24@3757 | 1097 { |
keir@18794 | 1098 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) ) |
keir@18794 | 1099 return 1; |
keir@18794 | 1100 |
keir@18794 | 1101 if ( l2e_get_flags(l2e) & _PAGE_PSE ) |
keir@18794 | 1102 { |
keir@18794 | 1103 unsigned long mfn = l2e_get_pfn(l2e), m = mfn; |
keir@18794 | 1104 int writeable = l2e_get_flags(l2e) & _PAGE_RW; |
keir@19047 | 1105 |
keir@19047 | 1106 ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); |
keir@18794 | 1107 do { |
keir@18794 | 1108 put_data_page(mfn_to_page(m), writeable); |
keir@18794 | 1109 } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); |
keir@18794 | 1110 } |
keir@18794 | 1111 else |
keir@18450 | 1112 { |
kfraser@13380 | 1113 put_page_and_type(l2e_get_page(l2e)); |
keir@18450 | 1114 } |
keir@18794 | 1115 |
keir@18794 | 1116 return 0; |
kaf24@3757 | 1117 } |
kaf24@3757 | 1118 |
keir@18780 | 1119 static int __put_page_type(struct page_info *, int preemptible); |
kaf24@3757 | 1120 |
keir@18450 | 1121 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, |
keir@18780 | 1122 int partial, int preemptible) |
kaf24@3757 | 1123 { |
keir@19047 | 1124 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) |
keir@19047 | 1125 return 1; |
keir@19047 | 1126 |
keir@19047 | 1127 #ifdef __x86_64__ |
keir@19047 | 1128 if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) ) |
keir@18780 | 1129 { |
keir@19047 | 1130 unsigned long mfn = l3e_get_pfn(l3e); |
keir@19047 | 1131 int writeable = l3e_get_flags(l3e) & _PAGE_RW; |
keir@19047 | 1132 |
keir@19047 | 1133 ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); |
keir@19047 | 1134 do { |
keir@19047 | 1135 put_data_page(mfn_to_page(mfn), writeable); |
keir@19047 | 1136 } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) ); |
keir@19047 | 1137 |
keir@19047 | 1138 return 0; |
keir@18780 | 1139 } |
keir@19047 | 1140 #endif |
keir@19047 | 1141 |
keir@19047 | 1142 if ( unlikely(partial > 0) ) |
keir@19047 | 1143 return __put_page_type(l3e_get_page(l3e), preemptible); |
keir@19047 | 1144 |
keir@19047 | 1145 return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); |
kaf24@3757 | 1146 } |
kaf24@5275 | 1147 |
kaf24@5275 | 1148 #if CONFIG_PAGING_LEVELS >= 4 |
keir@18450 | 1149 static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, |
keir@18780 | 1150 int partial, int preemptible) |
kaf24@3782 | 1151 { |
mafetter@4629 | 1152 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && |
mafetter@4629 | 1153 (l4e_get_pfn(l4e) != pfn) ) |
keir@18780 | 1154 { |
keir@18780 | 1155 if ( unlikely(partial > 0) ) |
keir@18780 | 1156 return __put_page_type(l4e_get_page(l4e), preemptible); |
keir@18450 | 1157 return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); |
keir@18780 | 1158 } |
keir@18450 | 1159 return 1; |
kaf24@3782 | 1160 } |
kaf24@5275 | 1161 #endif |
kaf24@3782 | 1162 |
kaf24@8764 | 1163 static int alloc_l1_table(struct page_info *page) |
kaf24@3757 | 1164 { |
kaf24@3757 | 1165 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1166 unsigned long pfn = page_to_mfn(page); |
kaf24@3757 | 1167 l1_pgentry_t *pl1e; |
keir@18450 | 1168 unsigned int i; |
kaf24@3757 | 1169 |
kaf24@5394 | 1170 pl1e = map_domain_page(pfn); |
kaf24@3757 | 1171 |
kaf24@3775 | 1172 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
kfraser@11264 | 1173 { |
kaf24@3791 | 1174 if ( is_guest_l1_slot(i) && |
keir@19746 | 1175 unlikely(!get_page_from_l1e(pl1e[i], d, d)) ) |
kaf24@3757 | 1176 goto fail; |
kaf24@3757 | 1177 |
ack@13295 | 1178 adjust_guest_l1e(pl1e[i], d); |
kfraser@11264 | 1179 } |
kfraser@11264 | 1180 |
kaf24@5394 | 1181 unmap_domain_page(pl1e); |
keir@18450 | 1182 return 0; |
kaf24@3757 | 1183 |
kaf24@3757 | 1184 fail: |
kaf24@7434 | 1185 MEM_LOG("Failure in alloc_l1_table: entry %d", i); |
kaf24@3757 | 1186 while ( i-- > 0 ) |
kaf24@3791 | 1187 if ( is_guest_l1_slot(i) ) |
kaf24@3791 | 1188 put_page_from_l1e(pl1e[i], d); |
kaf24@3757 | 1189 |
kaf24@5394 | 1190 unmap_domain_page(pl1e); |
keir@18450 | 1191 return -EINVAL; |
kaf24@3757 | 1192 } |
kaf24@3757 | 1193 |
ack@13295 | 1194 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e) |
kaf24@5275 | 1195 { |
kaf24@8764 | 1196 struct page_info *page; |
kaf24@5399 | 1197 l3_pgentry_t l3e3; |
keir@19983 | 1198 #ifdef __i386__ |
keir@19983 | 1199 l2_pgentry_t *pl2e, l2e; |
kaf24@5399 | 1200 int i; |
kfraser@15012 | 1201 #endif |
kfraser@15012 | 1202 |
kfraser@15012 | 1203 if ( !is_pv_32bit_domain(d) ) |
ack@13295 | 1204 return 1; |
kaf24@5399 | 1205 |
kaf24@5399 | 1206 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK); |
kaf24@5399 | 1207 |
kaf24@5399 | 1208 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */ |
kaf24@5399 | 1209 l3e3 = pl3e[3]; |
kaf24@5399 | 1210 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) ) |
kaf24@5399 | 1211 { |
kaf24@5399 | 1212 MEM_LOG("PAE L3 3rd slot is empty"); |
kaf24@5275 | 1213 return 0; |
kaf24@5275 | 1214 } |
kaf24@5275 | 1215 |
kaf24@5399 | 1216 /* |
kaf24@5399 | 1217 * The Xen-private mappings include linear mappings. The L2 thus cannot |
kaf24@5399 | 1218 * be shared by multiple L3 tables. The test here is adequate because: |
kfraser@11522 | 1219 * 1. Cannot appear in slots != 3 because get_page_type() checks the |
kfraser@11522 | 1220 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3 |
kaf24@5399 | 1221 * 2. Cannot appear in another page table's L3: |
kaf24@5399 | 1222 * a. alloc_l3_table() calls this function and this check will fail |
kaf24@5399 | 1223 * b. mod_l3_entry() disallows updates to slot 3 in an existing table |
kaf24@5399 | 1224 */ |
kaf24@5399 | 1225 page = l3e_get_page(l3e3); |
kaf24@5399 | 1226 BUG_ON(page->u.inuse.type_info & PGT_pinned); |
kaf24@5399 | 1227 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0); |
kfraser@11522 | 1228 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2)); |
kaf24@5399 | 1229 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 ) |
kaf24@5399 | 1230 { |
kaf24@5399 | 1231 MEM_LOG("PAE L3 3rd slot is shared"); |
kaf24@5399 | 1232 return 0; |
kaf24@5275 | 1233 } |
kaf24@5399 | 1234 |
keir@19983 | 1235 #ifdef __i386__ |
keir@19983 | 1236 /* Xen linear pagetable mappings. */ |
kaf24@5399 | 1237 pl2e = map_domain_page(l3e_get_pfn(l3e3)); |
kaf24@5399 | 1238 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) |
kfraser@12825 | 1239 { |
kfraser@12825 | 1240 l2e = l2e_empty(); |
kfraser@12825 | 1241 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT ) |
kfraser@12825 | 1242 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR); |
kfraser@12825 | 1243 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e); |
kfraser@12825 | 1244 } |
keir@19983 | 1245 unmap_domain_page(pl2e); |
ack@13295 | 1246 #endif |
kaf24@5275 | 1247 |
kaf24@5275 | 1248 return 1; |
kaf24@5275 | 1249 } |
keir@17638 | 1250 |
keir@17638 | 1251 #ifdef __i386__ |
kaf24@10211 | 1252 /* Flush a pgdir update into low-memory caches. */ |
kaf24@10211 | 1253 static void pae_flush_pgd( |
kaf24@10211 | 1254 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e) |
kaf24@10211 | 1255 { |
kaf24@10211 | 1256 struct domain *d = page_get_owner(mfn_to_page(mfn)); |
kaf24@10215 | 1257 struct vcpu *v; |
kaf24@10215 | 1258 intpte_t _ol3e, _nl3e, _pl3e; |
kaf24@10215 | 1259 l3_pgentry_t *l3tab_ptr; |
kaf24@10215 | 1260 struct pae_l3_cache *cache; |
kaf24@10211 | 1261 |
Tim@13943 | 1262 if ( unlikely(shadow_mode_enabled(d)) ) |
Tim@13943 | 1263 { |
Tim@13943 | 1264 cpumask_t m = CPU_MASK_NONE; |
Tim@13943 | 1265 /* Re-shadow this l3 table on any vcpus that are using it */ |
Tim@13943 | 1266 for_each_vcpu ( d, v ) |
Tim@13943 | 1267 if ( pagetable_get_pfn(v->arch.guest_table) == mfn ) |
Tim@13943 | 1268 { |
Tim@13943 | 1269 paging_update_cr3(v); |
Tim@13943 | 1270 cpus_or(m, m, v->vcpu_dirty_cpumask); |
Tim@13943 | 1271 } |
keir@19689 | 1272 flush_tlb_mask(&m); |
Tim@13943 | 1273 } |
Tim@13943 | 1274 |
kaf24@10211 | 1275 /* If below 4GB then the pgdir is not shadowed in low memory. */ |
kaf24@10215 | 1276 if ( !l3tab_needs_shadow(mfn) ) |
kaf24@10211 | 1277 return; |
kaf24@10211 | 1278 |
kaf24@10215 | 1279 for_each_vcpu ( d, v ) |
kaf24@10215 | 1280 { |
kaf24@10215 | 1281 cache = &v->arch.pae_l3_cache; |
kaf24@10215 | 1282 |
kaf24@10215 | 1283 spin_lock(&cache->lock); |
kaf24@10215 | 1284 |
kaf24@10215 | 1285 if ( cache->high_mfn == mfn ) |
kaf24@10215 | 1286 { |
kaf24@10215 | 1287 l3tab_ptr = &cache->table[cache->inuse_idx][idx]; |
kaf24@10215 | 1288 _ol3e = l3e_get_intpte(*l3tab_ptr); |
kaf24@10215 | 1289 _nl3e = l3e_get_intpte(nl3e); |
kfraser@15100 | 1290 _pl3e = cmpxchg(&l3e_get_intpte(*l3tab_ptr), _ol3e, _nl3e); |
kaf24@10215 | 1291 BUG_ON(_pl3e != _ol3e); |
kaf24@10215 | 1292 } |
kaf24@10215 | 1293 |
kaf24@10215 | 1294 spin_unlock(&cache->lock); |
kaf24@10215 | 1295 } |
kaf24@10215 | 1296 |
keir@19689 | 1297 flush_tlb_mask(&d->domain_dirty_cpumask); |
kaf24@10211 | 1298 } |
ack@13295 | 1299 #else |
kaf24@10211 | 1300 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) |
kaf24@5275 | 1301 #endif |
kaf24@5275 | 1302 |
keir@18450 | 1303 static int alloc_l2_table(struct page_info *page, unsigned long type, |
keir@18450 | 1304 int preemptible) |
kaf24@3757 | 1305 { |
kaf24@3782 | 1306 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1307 unsigned long pfn = page_to_mfn(page); |
kaf24@3782 | 1308 l2_pgentry_t *pl2e; |
keir@18450 | 1309 unsigned int i; |
keir@18450 | 1310 int rc = 0; |
mafetter@4179 | 1311 |
kaf24@5394 | 1312 pl2e = map_domain_page(pfn); |
kaf24@3757 | 1313 |
keir@18450 | 1314 for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) |
kaf24@5399 | 1315 { |
keir@18450 | 1316 if ( preemptible && i && hypercall_preempt_check() ) |
keir@18450 | 1317 { |
keir@18450 | 1318 page->nr_validated_ptes = i; |
keir@18450 | 1319 rc = -EAGAIN; |
keir@18450 | 1320 break; |
keir@18450 | 1321 } |
keir@18450 | 1322 |
keir@18450 | 1323 if ( !is_guest_l2_slot(d, type, i) || |
keir@18450 | 1324 (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 ) |
keir@18197 | 1325 continue; |
keir@18197 | 1326 |
keir@18450 | 1327 if ( rc < 0 ) |
keir@18450 | 1328 { |
keir@18450 | 1329 MEM_LOG("Failure in alloc_l2_table: entry %d", i); |
keir@18450 | 1330 while ( i-- > 0 ) |
keir@18450 | 1331 if ( is_guest_l2_slot(d, type, i) ) |
keir@18450 | 1332 put_page_from_l2e(pl2e[i], pfn); |
keir@18450 | 1333 break; |
keir@18450 | 1334 } |
keir@18450 | 1335 |
ack@13295 | 1336 adjust_guest_l2e(pl2e[i], d); |
kaf24@5275 | 1337 } |
kaf24@5275 | 1338 |
keir@19983 | 1339 if ( rc >= 0 && (type & PGT_pae_xen_l2) ) |
keir@19983 | 1340 { |
keir@19983 | 1341 /* Xen private mappings. */ |
keir@19983 | 1342 #if defined(__i386__) |
keir@19983 | 1343 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], |
keir@19983 | 1344 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], |
keir@19983 | 1345 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); |
keir@19983 | 1346 for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) |
keir@19983 | 1347 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], |
keir@19983 | 1348 l2e_from_page(perdomain_pt_page(d, i), |
keir@19983 | 1349 __PAGE_HYPERVISOR)); |
keir@19983 | 1350 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = |
keir@19983 | 1351 l2e_from_pfn(pfn, __PAGE_HYPERVISOR); |
keir@20341 | 1352 #else |
keir@19983 | 1353 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], |
keir@19983 | 1354 &compat_idle_pg_table_l2[ |
keir@19983 | 1355 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], |
keir@19983 | 1356 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e)); |
keir@19983 | 1357 #endif |
keir@19983 | 1358 } |
keir@19983 | 1359 |
kaf24@5394 | 1360 unmap_domain_page(pl2e); |
keir@18450 | 1361 return rc > 0 ? 0 : rc; |
kaf24@3757 | 1362 } |
kaf24@3757 | 1363 |
keir@18450 | 1364 static int alloc_l3_table(struct page_info *page, int preemptible) |
kaf24@3782 | 1365 { |
kaf24@3782 | 1366 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1367 unsigned long pfn = page_to_mfn(page); |
kaf24@5275 | 1368 l3_pgentry_t *pl3e; |
keir@18450 | 1369 unsigned int i; |
keir@18780 | 1370 int rc = 0, partial = page->partial_pte; |
kaf24@3782 | 1371 |
keir@17638 | 1372 #if CONFIG_PAGING_LEVELS == 3 |
kaf24@10304 | 1373 /* |
kaf24@10304 | 1374 * PAE pgdirs above 4GB are unacceptable if the guest does not understand |
kaf24@10304 | 1375 * the weird 'extended cr3' format for dealing with high-order address |
kaf24@10304 | 1376 * bits. We cut some slack for control tools (before vcpu0 is initialised). |
kaf24@10304 | 1377 */ |
kaf24@10304 | 1378 if ( (pfn >= 0x100000) && |
kaf24@10304 | 1379 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) && |
keir@19826 | 1380 d->vcpu && d->vcpu[0] && d->vcpu[0]->is_initialised ) |
kaf24@10304 | 1381 { |
kaf24@10304 | 1382 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); |
keir@18450 | 1383 return -EINVAL; |
kaf24@10304 | 1384 } |
kaf24@10304 | 1385 #endif |
kaf24@10304 | 1386 |
kaf24@5394 | 1387 pl3e = map_domain_page(pfn); |
ack@13295 | 1388 |
ack@13295 | 1389 /* |
ack@13295 | 1390 * PAE guests allocate full pages, but aren't required to initialize |
ack@13295 | 1391 * more than the first four entries; when running in compatibility |
ack@13295 | 1392 * mode, however, the full page is visible to the MMU, and hence all |
ack@13295 | 1393 * 512 entries must be valid/verified, which is most easily achieved |
ack@13295 | 1394 * by clearing them out. |
ack@13295 | 1395 */ |
kfraser@15012 | 1396 if ( is_pv_32on64_domain(d) ) |
ack@13295 | 1397 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); |
ack@13295 | 1398 |
keir@18780 | 1399 for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; |
keir@18780 | 1400 i++, partial = 0 ) |
kaf24@5399 | 1401 { |
kfraser@15012 | 1402 if ( is_pv_32bit_domain(d) && (i == 3) ) |
kfraser@11522 | 1403 { |
kfraser@11522 | 1404 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || |
keir@18450 | 1405 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ) |
keir@18450 | 1406 rc = -EINVAL; |
keir@18450 | 1407 else |
keir@18450 | 1408 rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), |
keir@18450 | 1409 PGT_l2_page_table | |
keir@18450 | 1410 PGT_pae_xen_l2, |
keir@18780 | 1411 d, partial, preemptible); |
kfraser@11522 | 1412 } |
keir@18450 | 1413 else if ( !is_guest_l3_slot(i) || |
keir@18780 | 1414 (rc = get_page_from_l3e(pl3e[i], pfn, d, |
keir@18780 | 1415 partial, preemptible)) > 0 ) |
keir@18197 | 1416 continue; |
keir@18450 | 1417 |
keir@18450 | 1418 if ( rc == -EAGAIN ) |
keir@18450 | 1419 { |
keir@18450 | 1420 page->nr_validated_ptes = i; |
keir@18780 | 1421 page->partial_pte = partial ?: 1; |
keir@18450 | 1422 } |
keir@18450 | 1423 else if ( rc == -EINTR && i ) |
keir@18450 | 1424 { |
keir@18450 | 1425 page->nr_validated_ptes = i; |
keir@18450 | 1426 page->partial_pte = 0; |
keir@18450 | 1427 rc = -EAGAIN; |
keir@18450 | 1428 } |
keir@18450 | 1429 if ( rc < 0 ) |
keir@18450 | 1430 break; |
keir@17638 | 1431 |
ack@13295 | 1432 adjust_guest_l3e(pl3e[i], d); |
kaf24@5275 | 1433 } |
kaf24@5275 | 1434 |
keir@18450 | 1435 if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) ) |
keir@18450 | 1436 rc = -EINVAL; |
keir@18450 | 1437 if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) |
keir@18197 | 1438 { |
keir@18450 | 1439 MEM_LOG("Failure in alloc_l3_table: entry %d", i); |
keir@18450 | 1440 while ( i-- > 0 ) |
keir@18450 | 1441 { |
keir@18450 | 1442 if ( !is_guest_l3_slot(i) ) |
keir@18450 | 1443 continue; |
keir@18450 | 1444 unadjust_guest_l3e(pl3e[i], d); |
keir@18780 | 1445 put_page_from_l3e(pl3e[i], pfn, 0, 0); |
keir@18450 | 1446 } |
keir@18197 | 1447 } |
kaf24@3782 | 1448 |
kaf24@5394 | 1449 unmap_domain_page(pl3e); |
keir@18450 | 1450 return rc > 0 ? 0 : rc; |
kaf24@3782 | 1451 } |
kaf24@5275 | 1452 |
kaf24@5275 | 1453 #if CONFIG_PAGING_LEVELS >= 4 |
keir@18450 | 1454 static int alloc_l4_table(struct page_info *page, int preemptible) |
kaf24@3782 | 1455 { |
kaf24@3782 | 1456 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1457 unsigned long pfn = page_to_mfn(page); |
kaf24@3782 | 1458 l4_pgentry_t *pl4e = page_to_virt(page); |
keir@18450 | 1459 unsigned int i; |
keir@18780 | 1460 int rc = 0, partial = page->partial_pte; |
keir@18780 | 1461 |
keir@18780 | 1462 for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; |
keir@18780 | 1463 i++, partial = 0 ) |
kaf24@6094 | 1464 { |
keir@18450 | 1465 if ( !is_guest_l4_slot(d, i) || |
keir@18780 | 1466 (rc = get_page_from_l4e(pl4e[i], pfn, d, |
keir@18780 | 1467 partial, preemptible)) > 0 ) |
keir@18197 | 1468 continue; |
keir@18197 | 1469 |
keir@18450 | 1470 if ( rc == -EAGAIN ) |
keir@18450 | 1471 { |
keir@18450 | 1472 page->nr_validated_ptes = i; |
keir@18780 | 1473 page->partial_pte = partial ?: 1; |
keir@18450 | 1474 } |
keir@18450 | 1475 else if ( rc == -EINTR ) |
keir@18450 | 1476 { |
keir@18450 | 1477 if ( i ) |
keir@18450 | 1478 { |
keir@18450 | 1479 page->nr_validated_ptes = i; |
keir@18450 | 1480 page->partial_pte = 0; |
keir@18450 | 1481 rc = -EAGAIN; |
keir@18450 | 1482 } |
keir@18450 | 1483 } |
keir@18450 | 1484 else if ( rc < 0 ) |
keir@18450 | 1485 { |
keir@18450 | 1486 MEM_LOG("Failure in alloc_l4_table: entry %d", i); |
keir@18450 | 1487 while ( i-- > 0 ) |
keir@18450 | 1488 if ( is_guest_l4_slot(d, i) ) |
keir@18780 | 1489 put_page_from_l4e(pl4e[i], pfn, 0, 0); |
keir@18450 | 1490 } |
keir@18450 | 1491 if ( rc < 0 ) |
keir@18450 | 1492 return rc; |
kfraser@11264 | 1493 |
ack@13295 | 1494 adjust_guest_l4e(pl4e[i], d); |
kaf24@6077 | 1495 } |
kaf24@3782 | 1496 |
kaf24@3791 | 1497 /* Xen private mappings. */ |
kaf24@3791 | 1498 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT], |
kaf24@3791 | 1499 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], |
kaf24@3791 | 1500 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); |
kaf24@3791 | 1501 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = |
kaf24@5288 | 1502 l4e_from_pfn(pfn, __PAGE_HYPERVISOR); |
kaf24@3791 | 1503 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = |
ack@13297 | 1504 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3), |
ack@13297 | 1505 __PAGE_HYPERVISOR); |
kaf24@3791 | 1506 |
keir@18450 | 1507 return rc > 0 ? 0 : rc; |
kaf24@3782 | 1508 } |
kaf24@5399 | 1509 #else |
keir@18450 | 1510 #define alloc_l4_table(page, preemptible) (-EINVAL) |
kaf24@5399 | 1511 #endif |
kaf24@3782 | 1512 |
kaf24@3782 | 1513 |
kaf24@8764 | 1514 static void free_l1_table(struct page_info *page) |
kaf24@3757 | 1515 { |
kaf24@3757 | 1516 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1517 unsigned long pfn = page_to_mfn(page); |
kaf24@3757 | 1518 l1_pgentry_t *pl1e; |
keir@18450 | 1519 unsigned int i; |
kaf24@3757 | 1520 |
kaf24@5394 | 1521 pl1e = map_domain_page(pfn); |
kaf24@3757 | 1522 |
kaf24@3775 | 1523 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
kaf24@3791 | 1524 if ( is_guest_l1_slot(i) ) |
kaf24@3791 | 1525 put_page_from_l1e(pl1e[i], d); |
kaf24@3757 | 1526 |
kaf24@5394 | 1527 unmap_domain_page(pl1e); |
kaf24@3757 | 1528 } |
kaf24@3757 | 1529 |
kaf24@3757 | 1530 |
keir@18450 | 1531 static int free_l2_table(struct page_info *page, int preemptible) |
kaf24@3782 | 1532 { |
keir@20341 | 1533 #ifdef __x86_64__ |
ack@13295 | 1534 struct domain *d = page_get_owner(page); |
ack@13295 | 1535 #endif |
kaf24@8764 | 1536 unsigned long pfn = page_to_mfn(page); |
kaf24@3782 | 1537 l2_pgentry_t *pl2e; |
keir@18450 | 1538 unsigned int i = page->nr_validated_ptes - 1; |
keir@18450 | 1539 int err = 0; |
kaf24@3782 | 1540 |
kaf24@5394 | 1541 pl2e = map_domain_page(pfn); |
kaf24@3782 | 1542 |
keir@18450 | 1543 ASSERT(page->nr_validated_ptes); |
keir@18450 | 1544 do { |
keir@18450 | 1545 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) && |
keir@18450 | 1546 put_page_from_l2e(pl2e[i], pfn) == 0 && |
keir@18450 | 1547 preemptible && i && hypercall_preempt_check() ) |
keir@18450 | 1548 { |
keir@18450 | 1549 page->nr_validated_ptes = i; |
keir@18450 | 1550 err = -EAGAIN; |
keir@18450 | 1551 } |
keir@18450 | 1552 } while ( !err && i-- ); |
kaf24@3782 | 1553 |
kaf24@5394 | 1554 unmap_domain_page(pl2e); |
kfraser@11522 | 1555 |
keir@18450 | 1556 if ( !err ) |
keir@18450 | 1557 page->u.inuse.type_info &= ~PGT_pae_xen_l2; |
keir@18450 | 1558 |
keir@18450 | 1559 return err; |
kaf24@3782 | 1560 } |
kaf24@3782 | 1561 |
keir@18450 | 1562 static int free_l3_table(struct page_info *page, int preemptible) |
kaf24@3782 | 1563 { |
ack@13295 | 1564 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1565 unsigned long pfn = page_to_mfn(page); |
kaf24@5275 | 1566 l3_pgentry_t *pl3e; |
keir@18780 | 1567 int rc = 0, partial = page->partial_pte; |
keir@18780 | 1568 unsigned int i = page->nr_validated_ptes - !partial; |
kaf24@3782 | 1569 |
kaf24@5394 | 1570 pl3e = map_domain_page(pfn); |
kaf24@5275 | 1571 |
keir@18450 | 1572 do { |
kaf24@3791 | 1573 if ( is_guest_l3_slot(i) ) |
ack@13295 | 1574 { |
keir@18780 | 1575 rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible); |
keir@18780 | 1576 if ( rc < 0 ) |
keir@18780 | 1577 break; |
keir@18780 | 1578 partial = 0; |
keir@18450 | 1579 if ( rc > 0 ) |
keir@18450 | 1580 continue; |
ack@13295 | 1581 unadjust_guest_l3e(pl3e[i], d); |
ack@13295 | 1582 } |
keir@18450 | 1583 } while ( i-- ); |
kaf24@5275 | 1584 |
kaf24@5394 | 1585 unmap_domain_page(pl3e); |
keir@18450 | 1586 |
keir@18450 | 1587 if ( rc == -EAGAIN ) |
keir@18450 | 1588 { |
keir@18450 | 1589 page->nr_validated_ptes = i; |
keir@18780 | 1590 page->partial_pte = partial ?: -1; |
keir@18450 | 1591 } |
keir@18450 | 1592 else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) |
keir@18450 | 1593 { |
keir@18450 | 1594 page->nr_validated_ptes = i + 1; |
keir@18450 | 1595 page->partial_pte = 0; |
keir@18450 | 1596 rc = -EAGAIN; |
keir@18450 | 1597 } |
keir@18450 | 1598 return rc > 0 ? 0 : rc; |
kaf24@3782 | 1599 } |
kaf24@3782 | 1600 |
kaf24@5275 | 1601 #if CONFIG_PAGING_LEVELS >= 4 |
keir@18450 | 1602 static int free_l4_table(struct page_info *page, int preemptible) |
kaf24@3782 | 1603 { |
ack@14033 | 1604 struct domain *d = page_get_owner(page); |
kaf24@8764 | 1605 unsigned long pfn = page_to_mfn(page); |
kaf24@3782 | 1606 l4_pgentry_t *pl4e = page_to_virt(page); |
keir@18780 | 1607 int rc = 0, partial = page->partial_pte; |
keir@18780 | 1608 unsigned int i = page->nr_validated_ptes - !partial; |
kaf24@3782 | 1609 |
keir@18450 | 1610 do { |
ack@14033 | 1611 if ( is_guest_l4_slot(d, i) ) |
keir@18780 | 1612 rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible); |
keir@18780 | 1613 if ( rc < 0 ) |
keir@18780 | 1614 break; |
keir@18780 | 1615 partial = 0; |
keir@18780 | 1616 } while ( i-- ); |
keir@18450 | 1617 |
keir@18450 | 1618 if ( rc == -EAGAIN ) |
keir@18450 | 1619 { |
keir@18450 | 1620 page->nr_validated_ptes = i; |
keir@18780 | 1621 page->partial_pte = partial ?: -1; |
keir@18450 | 1622 } |
keir@18450 | 1623 else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) |
keir@18450 | 1624 { |
keir@18450 | 1625 page->nr_validated_ptes = i + 1; |
keir@18450 | 1626 page->partial_pte = 0; |
keir@18450 | 1627 rc = -EAGAIN; |
keir@18450 | 1628 } |
keir@18450 | 1629 return rc > 0 ? 0 : rc; |
kaf24@3782 | 1630 } |
keir@18450 | 1631 #else |
keir@18450 | 1632 #define free_l4_table(page, preemptible) (-EINVAL) |
kaf24@5275 | 1633 #endif |
kaf24@3782 | 1634 |
keir@19141 | 1635 static int page_lock(struct page_info *page) |
keir@17884 | 1636 { |
keir@19141 | 1637 unsigned long x, nx; |
keir@19141 | 1638 |
keir@19141 | 1639 do { |
keir@19141 | 1640 while ( (x = page->u.inuse.type_info) & PGT_locked ) |
keir@17884 | 1641 cpu_relax(); |
keir@19141 | 1642 nx = x + (1 | PGT_locked); |
keir@19141 | 1643 if ( !(x & PGT_validated) || |
keir@19141 | 1644 !(x & PGT_count_mask) || |
keir@19141 | 1645 !(nx & PGT_count_mask) ) |
keir@19141 | 1646 return 0; |
keir@19141 | 1647 } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); |
keir@19141 | 1648 |
keir@19141 | 1649 return 1; |
keir@17884 | 1650 } |
keir@17884 | 1651 |
keir@17884 | 1652 static void page_unlock(struct page_info *page) |
keir@17884 | 1653 { |
keir@19141 | 1654 unsigned long x, nx, y = page->u.inuse.type_info; |
keir@19141 | 1655 |
keir@19141 | 1656 do { |
keir@19141 | 1657 x = y; |
keir@19141 | 1658 nx = x - (1 | PGT_locked); |
keir@19141 | 1659 } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); |
keir@17884 | 1660 } |
Tim@13143 | 1661 |
Tim@13143 | 1662 /* How to write an entry to the guest pagetables. |
Tim@13143 | 1663 * Returns 0 for failure (pointer not valid), 1 for success. */ |
Tim@13143 | 1664 static inline int update_intpte(intpte_t *p, |
Tim@13143 | 1665 intpte_t old, |
Tim@13143 | 1666 intpte_t new, |
Tim@13143 | 1667 unsigned long mfn, |
keir@16756 | 1668 struct vcpu *v, |
keir@16756 | 1669 int preserve_ad) |
kaf24@3757 | 1670 { |
tdeegan@11189 | 1671 int rv = 1; |
Tim@13143 | 1672 #ifndef PTE_UPDATE_WITH_CMPXCHG |
keir@16756 | 1673 if ( !preserve_ad ) |
keir@16756 | 1674 { |
keir@16756 | 1675 rv = paging_write_guest_entry(v, p, new, _mfn(mfn)); |
keir@16756 | 1676 } |
keir@16756 | 1677 else |
keir@16756 | 1678 #endif |
kaf24@3757 | 1679 { |
Tim@13143 | 1680 intpte_t t = old; |
tdeegan@11189 | 1681 for ( ; ; ) |
kfraser@10487 | 1682 { |
keir@16756 | 1683 intpte_t _new = new; |
keir@16756 | 1684 if ( preserve_ad ) |
keir@16756 | 1685 _new |= old & (_PAGE_ACCESSED | _PAGE_DIRTY); |
keir@16756 | 1686 |
keir@16756 | 1687 rv = paging_cmpxchg_guest_entry(v, p, &t, _new, _mfn(mfn)); |
Tim@13143 | 1688 if ( unlikely(rv == 0) ) |
tdeegan@11189 | 1689 { |
tdeegan@11189 | 1690 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte |
keir@16756 | 1691 ": saw %" PRIpte, old, _new, t); |
tdeegan@11189 | 1692 break; |
tdeegan@11189 | 1693 } |
tdeegan@11189 | 1694 |
Tim@13143 | 1695 if ( t == old ) |
tdeegan@11189 | 1696 break; |
tdeegan@11189 | 1697 |
tdeegan@11189 | 1698 /* Allowed to change in Accessed/Dirty flags only. */ |
Tim@13143 | 1699 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY)); |
Tim@13143 | 1700 |
Tim@13143 | 1701 old = t; |
kfraser@10487 | 1702 } |
kaf24@3757 | 1703 } |
tdeegan@11189 | 1704 return rv; |
kaf24@3757 | 1705 } |
kaf24@3757 | 1706 |
Tim@13143 | 1707 /* Macro that wraps the appropriate type-changes around update_intpte(). |
Tim@13143 | 1708 * Arguments are: type, ptr, old, new, mfn, vcpu */ |
keir@16756 | 1709 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v,_ad) \ |
kfraser@15100 | 1710 update_intpte(&_t ## e_get_intpte(*(_p)), \ |
Tim@13143 | 1711 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ |
keir@16756 | 1712 (_m), (_v), (_ad)) |
kaf24@3757 | 1713 |
kaf24@3757 | 1714 /* Update the L1 entry at pl1e to new value nl1e. */ |
keir@19421 | 1715 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, |
keir@19421 | 1716 unsigned long gl1mfn, int preserve_ad, |
keir@20132 | 1717 struct vcpu *pt_vcpu, struct domain *pg_dom) |
kaf24@3757 | 1718 { |
kaf24@3757 | 1719 l1_pgentry_t ol1e; |
keir@20132 | 1720 struct domain *pt_dom = pt_vcpu->domain; |
kfraser@15212 | 1721 unsigned long mfn; |
keir@18826 | 1722 p2m_type_t p2mt; |
keir@17884 | 1723 int rc = 1; |
keir@17884 | 1724 |
mafetter@4629 | 1725 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) |
keir@19141 | 1726 return 0; |
mafetter@4629 | 1727 |
keir@20132 | 1728 if ( unlikely(paging_mode_refcounts(pt_dom)) ) |
keir@17884 | 1729 { |
keir@20132 | 1730 rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad); |
keir@17884 | 1731 return rc; |
keir@17884 | 1732 } |
tim@11687 | 1733 |
mafetter@4629 | 1734 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) |
kaf24@3757 | 1735 { |
kfraser@12601 | 1736 /* Translate foreign guest addresses. */ |
keir@20132 | 1737 mfn = mfn_x(gfn_to_mfn(pg_dom, l1e_get_pfn(nl1e), &p2mt)); |
keir@18826 | 1738 if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) ) |
keir@19141 | 1739 return 0; |
kfraser@15212 | 1740 ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); |
kfraser@15212 | 1741 nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); |
kfraser@12601 | 1742 |
keir@20132 | 1743 if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) ) |
kaf24@3791 | 1744 { |
kaf24@6368 | 1745 MEM_LOG("Bad L1 flags %x", |
keir@20132 | 1746 l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)); |
kaf24@3791 | 1747 return 0; |
kaf24@3791 | 1748 } |
kaf24@3791 | 1749 |
kaf24@3791 | 1750 /* Fast path for identical mapping, r/w and presence. */ |
tdeegan@11189 | 1751 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) |
keir@17099 | 1752 { |
keir@20132 | 1753 adjust_guest_l1e(nl1e, pt_dom); |
keir@20132 | 1754 rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, |
keir@17884 | 1755 preserve_ad); |
keir@17884 | 1756 return rc; |
keir@17099 | 1757 } |
kaf24@3757 | 1758 |
keir@20132 | 1759 if ( unlikely(!get_page_from_l1e(nl1e, pt_dom, pg_dom)) ) |
keir@19141 | 1760 return 0; |
kaf24@3757 | 1761 |
keir@20132 | 1762 adjust_guest_l1e(nl1e, pt_dom); |
keir@20132 | 1763 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, |
keir@16756 | 1764 preserve_ad)) ) |
kaf24@3757 | 1765 { |
keir@17884 | 1766 ol1e = nl1e; |
keir@17884 | 1767 rc = 0; |
kaf24@3757 | 1768 } |
kaf24@3757 | 1769 } |
keir@20132 | 1770 else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, |
keir@17884 | 1771 preserve_ad)) ) |
mafetter@4179 | 1772 { |
keir@17884 | 1773 return 0; |
mafetter@4179 | 1774 } |
mafetter@4837 | 1775 |
keir@20132 | 1776 put_page_from_l1e(ol1e, pt_dom); |
keir@17884 | 1777 return rc; |
kaf24@3757 | 1778 } |
kaf24@3757 | 1779 |
kaf24@3791 | 1780 |
kaf24@3791 | 1781 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ |
kaf24@3791 | 1782 static int mod_l2_entry(l2_pgentry_t *pl2e, |
kaf24@3791 | 1783 l2_pgentry_t nl2e, |
kaf24@5275 | 1784 unsigned long pfn, |
keir@19421 | 1785 int preserve_ad, |
keir@19421 | 1786 struct vcpu *vcpu) |
kaf24@3791 | 1787 { |
kaf24@3791 | 1788 l2_pgentry_t ol2e; |
keir@19421 | 1789 struct domain *d = vcpu->domain; |
keir@17884 | 1790 struct page_info *l2pg = mfn_to_page(pfn); |
keir@19141 | 1791 unsigned long type = l2pg->u.inuse.type_info; |
keir@17884 | 1792 int rc = 1; |
ack@13295 | 1793 |
ack@13295 | 1794 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) |
kaf24@3791 | 1795 { |
kaf24@3791 | 1796 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e); |
kaf24@3791 | 1797 return 0; |
kaf24@3791 | 1798 } |
kaf24@3791 | 1799 |
mafetter@4629 | 1800 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) |
keir@19141 | 1801 return 0; |
mafetter@4629 | 1802 |
mafetter@4629 | 1803 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) |
kaf24@3791 | 1804 { |
mafetter@4629 | 1805 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) |
kaf24@3791 | 1806 { |
kaf24@6368 | 1807 MEM_LOG("Bad L2 flags %x", |
kaf24@5283 | 1808 l2e_get_flags(nl2e) & L2_DISALLOW_MASK); |
kaf24@3791 | 1809 return 0; |
kaf24@3791 | 1810 } |
kaf24@3791 | 1811 |
kaf24@3791 | 1812 /* Fast path for identical mapping and presence. */ |
keir@17099 | 1813 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) |
keir@17099 | 1814 { |
keir@17099 | 1815 adjust_guest_l2e(nl2e, d); |
keir@19421 | 1816 rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad); |
keir@17884 | 1817 return rc; |
keir@17099 | 1818 } |
kaf24@3791 | 1819 |
keir@18450 | 1820 if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) ) |
keir@19141 | 1821 return 0; |
kaf24@3791 | 1822 |
keir@17099 | 1823 adjust_guest_l2e(nl2e, d); |
keir@19421 | 1824 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, |
keir@16756 | 1825 preserve_ad)) ) |
kaf24@3791 | 1826 { |
keir@17884 | 1827 ol2e = nl2e; |
keir@17884 | 1828 rc = 0; |
kaf24@3791 | 1829 } |
kaf24@3791 | 1830 } |
keir@19421 | 1831 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, |
keir@16756 | 1832 preserve_ad)) ) |
mafetter@4179 | 1833 { |
kaf24@6094 | 1834 return 0; |
mafetter@4179 | 1835 } |
kaf24@3791 | 1836 |
kaf24@3791 | 1837 put_page_from_l2e(ol2e, pfn); |
keir@17884 | 1838 return rc; |
kaf24@3791 | 1839 } |
kaf24@3791 | 1840 |
kaf24@3791 | 1841 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ |
kaf24@3791 | 1842 static int mod_l3_entry(l3_pgentry_t *pl3e, |
kaf24@3791 | 1843 l3_pgentry_t nl3e, |
keir@16756 | 1844 unsigned long pfn, |
keir@18450 | 1845 int preserve_ad, |
keir@19421 | 1846 int preemptible, |
keir@19421 | 1847 struct vcpu *vcpu) |
kaf24@3791 | 1848 { |
kaf24@3791 | 1849 l3_pgentry_t ol3e; |
keir@19421 | 1850 struct domain *d = vcpu->domain; |
keir@18450 | 1851 int rc = 0; |
kaf24@3791 | 1852 |
kaf24@3791 | 1853 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) |
kaf24@3791 | 1854 { |
kaf24@3791 | 1855 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e); |
keir@18450 | 1856 return -EINVAL; |
kaf24@3791 | 1857 } |
kaf24@3791 | 1858 |
kaf24@5399 | 1859 /* |
kaf24@5399 | 1860 * Disallow updates to final L3 slot. It contains Xen mappings, and it |
kaf24@5399 | 1861 * would be a pain to ensure they remain continuously valid throughout. |
kaf24@5399 | 1862 */ |
kfraser@15012 | 1863 if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) |
keir@18450 | 1864 return -EINVAL; |
kaf24@5399 | 1865 |
mafetter@4629 | 1866 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) |
keir@19141 | 1867 return -EFAULT; |
mafetter@4629 | 1868 |
mafetter@4629 | 1869 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) |
kaf24@3791 | 1870 { |
ack@13295 | 1871 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) |
kaf24@3791 | 1872 { |
kaf24@6368 | 1873 MEM_LOG("Bad L3 flags %x", |
ack@13295 | 1874 l3e_get_flags(nl3e) & l3_disallow_mask(d)); |
keir@18450 | 1875 return -EINVAL; |
kaf24@3791 | 1876 } |
kaf24@3791 | 1877 |
kaf24@3791 | 1878 /* Fast path for identical mapping and presence. */ |
keir@17099 | 1879 if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) |
keir@17099 | 1880 { |
keir@17099 | 1881 adjust_guest_l3e(nl3e, d); |
keir@19421 | 1882 rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad); |
keir@18450 | 1883 return rc ? 0 : -EFAULT; |
keir@17099 | 1884 } |
kaf24@3791 | 1885 |
keir@18780 | 1886 rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible); |
keir@18450 | 1887 if ( unlikely(rc < 0) ) |
keir@19141 | 1888 return rc; |
keir@18450 | 1889 rc = 0; |
tdeegan@11189 | 1890 |
keir@17099 | 1891 adjust_guest_l3e(nl3e, d); |
keir@19421 | 1892 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, |
keir@16756 | 1893 preserve_ad)) ) |
kaf24@3791 | 1894 { |
keir@17884 | 1895 ol3e = nl3e; |
keir@18450 | 1896 rc = -EFAULT; |
kaf24@3791 | 1897 } |
kaf24@3791 | 1898 } |
keir@19421 | 1899 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, |
keir@16756 | 1900 preserve_ad)) ) |
kaf24@6094 | 1901 { |
keir@18450 | 1902 return -EFAULT; |
kaf24@6094 | 1903 } |
kaf24@6094 | 1904 |
keir@18450 | 1905 if ( likely(rc == 0) ) |
keir@18197 | 1906 { |
keir@18197 | 1907 if ( !create_pae_xen_mappings(d, pl3e) ) |
keir@18197 | 1908 BUG(); |
keir@18197 | 1909 |
keir@18197 | 1910 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); |
keir@18197 | 1911 } |
kaf24@10211 | 1912 |
keir@18780 | 1913 put_page_from_l3e(ol3e, pfn, 0, 0); |
keir@17884 | 1914 return rc; |
kaf24@3791 | 1915 } |
kaf24@3791 | 1916 |
kaf24@5275 | 1917 #if CONFIG_PAGING_LEVELS >= 4 |
kaf24@3791 | 1918 |
kaf24@3791 | 1919 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ |
keir@16543 | 1920 static int mod_l4_entry(l4_pgentry_t *pl4e, |
kaf24@3791 | 1921 l4_pgentry_t nl4e, |
keir@16756 | 1922 unsigned long pfn, |
keir@18450 | 1923 int preserve_ad, |
keir@19421 | 1924 int preemptible, |
keir@19421 | 1925 struct vcpu *vcpu) |
kaf24@3791 | 1926 { |
keir@19421 | 1927 struct domain *d = vcpu->domain; |
kaf24@3791 | 1928 l4_pgentry_t ol4e; |
keir@18450 | 1929 int rc = 0; |
kaf24@3791 | 1930 |
ack@14033 | 1931 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) |
kaf24@3791 | 1932 { |
kaf24@3791 | 1933 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e); |
keir@18450 | 1934 return -EINVAL; |
kaf24@3791 | 1935 } |
kaf24@3791 | 1936 |
mafetter@4629 | 1937 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) |
keir@19141 | 1938 return -EFAULT; |
mafetter@4629 | 1939 |
mafetter@4629 | 1940 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) |
kaf24@3791 | 1941 { |
mafetter@4629 | 1942 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) |
kaf24@3791 | 1943 { |
kaf24@6368 | 1944 MEM_LOG("Bad L4 flags %x", |
kaf24@5283 | 1945 l4e_get_flags(nl4e) & L4_DISALLOW_MASK); |
keir@18450 | 1946 return -EINVAL; |
kaf24@3791 | 1947 } |
kaf24@3791 | 1948 |
kaf24@3791 | 1949 /* Fast path for identical mapping and presence. */ |
keir@17099 | 1950 if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) |
keir@17099 | 1951 { |
keir@17099 | 1952 adjust_guest_l4e(nl4e, d); |
keir@19421 | 1953 rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad); |
keir@18450 | 1954 return rc ? 0 : -EFAULT; |
keir@17099 | 1955 } |
keir@16543 | 1956 |
keir@18780 | 1957 rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible); |
keir@18450 | 1958 if ( unlikely(rc < 0) ) |
keir@19141 | 1959 return rc; |
keir@18450 | 1960 rc = 0; |
kaf24@3791 | 1961 |
keir@17099 | 1962 adjust_guest_l4e(nl4e, d); |
keir@19421 | 1963 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, |
keir@16756 | 1964 preserve_ad)) ) |
kaf24@3791 | 1965 { |
keir@17884 | 1966 ol4e = nl4e; |
keir@18450 | 1967 rc = -EFAULT; |
kaf24@3791 | 1968 } |
kaf24@3791 | 1969 } |
keir@19421 | 1970 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, |
keir@16756 | 1971 preserve_ad)) ) |
kaf24@6077 | 1972 { |
keir@18450 | 1973 return -EFAULT; |
kaf24@6094 | 1974 } |
kaf24@3791 | 1975 |
keir@18780 | 1976 put_page_from_l4e(ol4e, pfn, 0, 0); |
keir@17884 | 1977 return rc; |
kaf24@3791 | 1978 } |
kaf24@3791 | 1979 |
kaf24@5275 | 1980 #endif |
kaf24@3791 | 1981 |
keir@16530 | 1982 void put_page(struct page_info *page) |
keir@16530 | 1983 { |
keir@19127 | 1984 unsigned long nx, x, y = page->count_info; |
keir@16530 | 1985 |
keir@16530 | 1986 do { |
keir@19414 | 1987 ASSERT((y & PGC_count_mask) != 0); |
keir@16530 | 1988 x = y; |
keir@16530 | 1989 nx = x - 1; |
keir@16530 | 1990 } |
keir@16530 | 1991 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) ); |
keir@16530 | 1992 |
keir@16530 | 1993 if ( unlikely((nx & PGC_count_mask) == 0) ) |
keir@16530 | 1994 { |
keir@16530 | 1995 cleanup_page_cacheattr(page); |
keir@16530 | 1996 free_domheap_page(page); |
keir@16530 | 1997 } |
keir@16530 | 1998 } |
keir@16530 | 1999 |
keir@16530 | 2000 |
keir@19306 | 2001 struct domain *page_get_owner_and_reference(struct page_info *page) |
keir@16530 | 2002 { |
keir@19127 | 2003 unsigned long x, y = page->count_info; |
keir@16530 | 2004 |
keir@16530 | 2005 do { |
keir@19126 | 2006 x = y; |
keir@19412 | 2007 /* |
keir@19412 | 2008 * Count == 0: Page is not allocated, so we cannot take a reference. |
keir@19412 | 2009 * Count == -1: Reference count would wrap, which is invalid. |
keir@19412 | 2010 * Count == -2: Remaining unused ref is reserved for get_page_light(). |
keir@19412 | 2011 */ |
keir@19412 | 2012 if ( unlikely(((x + 2) & PGC_count_mask) <= 2) ) |
keir@19306 | 2013 return NULL; |
keir@16530 | 2014 } |
keir@19126 | 2015 while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x ); |
keir@19126 | 2016 |
keir@19306 | 2017 return page_get_owner(page); |
keir@19306 | 2018 } |
keir@19306 | 2019 |
keir@19306 | 2020 |
keir@19306 | 2021 int get_page(struct page_info *page, struct domain *domain) |
keir@19306 | 2022 { |
keir@19306 | 2023 struct domain *owner = page_get_owner_and_reference(page); |
keir@19306 | 2024 |
keir@19306 | 2025 if ( likely(owner == domain) ) |
keir@19126 | 2026 return 1; |
keir@19126 | 2027 |
keir@19412 | 2028 if ( owner != NULL ) |
keir@19412 | 2029 put_page(page); |
keir@19126 | 2030 |
keir@19126 | 2031 if ( !_shadow_mode_refcounts(domain) && !domain->is_dying ) |
keir@19126 | 2032 gdprintk(XENLOG_INFO, |
keir@19132 | 2033 "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%" |
keir@19132 | 2034 PRtype_info "\n", |
keir@19306 | 2035 page_to_mfn(page), domain, owner, |
keir@19306 | 2036 page->count_info, page->u.inuse.type_info); |
keir@19126 | 2037 return 0; |
keir@16530 | 2038 } |
keir@16530 | 2039 |
keir@18785 | 2040 /* |
keir@18785 | 2041 * Special version of get_page() to be used exclusively when |
keir@18785 | 2042 * - a page is known to already have a non-zero reference count |
keir@18785 | 2043 * - the page does not need its owner to be checked |
keir@18785 | 2044 * - it will not be called more than once without dropping the thus |
keir@18785 | 2045 * acquired reference again. |
keir@18785 | 2046 * Due to get_page() reserving one reference, this call cannot fail. |
keir@18785 | 2047 */ |
keir@18785 | 2048 static void get_page_light(struct page_info *page) |
keir@18785 | 2049 { |
keir@19127 | 2050 unsigned long x, nx, y = page->count_info; |
keir@18785 | 2051 |
keir@18785 | 2052 do { |
keir@18785 | 2053 x = y; |
keir@18785 | 2054 nx = x + 1; |
keir@18785 | 2055 BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */ |
keir@18785 | 2056 BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */ |
keir@18785 | 2057 y = cmpxchg(&page->count_info, x, nx); |
keir@18785 | 2058 } |
keir@18785 | 2059 while ( unlikely(y != x) ); |
keir@18785 | 2060 } |
keir@18785 | 2061 |
keir@18450 | 2062 static int alloc_page_type(struct page_info *page, unsigned long type, |
keir@18450 | 2063 int preemptible) |
kaf24@3757 | 2064 { |
kfraser@11588 | 2065 struct domain *owner = page_get_owner(page); |
keir@18450 | 2066 int rc; |
kfraser@11588 | 2067 |
kfraser@11588 | 2068 /* A page table is dirtied when its type count becomes non-zero. */ |
kfraser@11588 | 2069 if ( likely(owner != NULL) ) |
Tim@15293 | 2070 paging_mark_dirty(owner, page_to_mfn(page)); |
kfraser@11588 | 2071 |
kaf24@5275 | 2072 switch ( type & PGT_type_mask ) |
kaf24@3757 | 2073 { |
kaf24@3757 | 2074 case PGT_l1_page_table: |
keir@18658 | 2075 rc = alloc_l1_table(page); |
keir@18450 | 2076 break; |
kaf24@3757 | 2077 case PGT_l2_page_table: |
keir@18450 | 2078 rc = alloc_l2_table(page, type, preemptible); |
keir@18450 | 2079 break; |
kaf24@3782 | 2080 case PGT_l3_page_table: |
keir@18450 | 2081 rc = alloc_l3_table(page, preemptible); |
keir@18450 | 2082 break; |
kaf24@3782 | 2083 case PGT_l4_page_table: |
keir@18450 | 2084 rc = alloc_l4_table(page, preemptible); |
keir@18450 | 2085 break; |
keir@17425 | 2086 case PGT_seg_desc_page: |
keir@18450 | 2087 rc = alloc_segdesc_page(page); |
keir@18450 | 2088 break; |
kaf24@3757 | 2089 default: |
keir@19127 | 2090 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n", |
kaf24@3757 | 2091 type, page->u.inuse.type_info, |
kaf24@3757 | 2092 page->count_info); |
keir@18450 | 2093 rc = -EINVAL; |
kaf24@3757 | 2094 BUG(); |
kaf24@3757 | 2095 } |
kaf24@3757 | 2096 |
keir@18450 | 2097 /* No need for atomic update of type_info here: noone else updates it. */ |
keir@18450 | 2098 wmb(); |
keir@18450 | 2099 if ( rc == -EAGAIN ) |
keir@18450 | 2100 { |
keir@18785 | 2101 get_page_light(page); |
keir@18450 | 2102 page->u.inuse.type_info |= PGT_partial; |
keir@18450 | 2103 } |
keir@18785 | 2104 else if ( rc == -EINTR ) |
keir@18450 | 2105 { |
keir@18450 | 2106 ASSERT((page->u.inuse.type_info & |
keir@18450 | 2107 (PGT_count_mask|PGT_validated|PGT_partial)) == 1); |
keir@18450 | 2108 page->u.inuse.type_info &= ~PGT_count_mask; |
keir@18450 | 2109 } |
keir@18450 | 2110 else if ( rc ) |
keir@18450 | 2111 { |
keir@18450 | 2112 ASSERT(rc < 0); |
keir@18450 | 2113 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" |
keir@19127 | 2114 PRtype_info ": caf=%08lx taf=%" PRtype_info, |
keir@18450 | 2115 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), |
keir@18450 | 2116 type, page->count_info, page->u.inuse.type_info); |
keir@18450 | 2117 page->u.inuse.type_info = 0; |
keir@18450 | 2118 } |
keir@18450 | 2119 else |
keir@18450 | 2120 { |
keir@18450 | 2121 page->u.inuse.type_info |= PGT_validated; |
keir@18450 | 2122 } |
keir@18450 | 2123 |
keir@18450 | 2124 return rc; |
kaf24@3757 | 2125 } |
kaf24@3757 | 2126 |
kaf24@3757 | 2127 |
keir@18450 | 2128 int free_page_type(struct page_info *page, unsigned long type, |
keir@18450 | 2129 int preemptible) |
kaf24@3757 | 2130 { |
mafetter@4179 | 2131 struct domain *owner = page_get_owner(page); |
kaf24@8764 | 2132 unsigned long gmfn; |
keir@18450 | 2133 int rc; |
mafetter@4837 | 2134 |
keir@19197 | 2135 if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) ) |
mafetter@4837 | 2136 { |
keir@19197 | 2137 /* A page table is dirtied when its type count becomes zero. */ |
keir@19197 | 2138 paging_mark_dirty(owner, page_to_mfn(page)); |
keir@19197 | 2139 |
keir@19197 | 2140 if ( shadow_mode_refcounts(owner) ) |
keir@19197 | 2141 return 0; |
keir@19197 | 2142 |
keir@19197 | 2143 gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); |
keir@19197 | 2144 ASSERT(VALID_M2P(gmfn)); |
keir@20726 | 2145 /* Page sharing not supported for shadowed domains */ |
keir@20726 | 2146 if(!SHARED_M2P(gmfn)) |
keir@20726 | 2147 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); |
mafetter@4837 | 2148 } |
kaf24@3757 | 2149 |
keir@18450 | 2150 if ( !(type & PGT_partial) ) |
keir@18450 | 2151 { |
keir@18450 | 2152 page->nr_validated_ptes = 1U << PAGETABLE_ORDER; |
keir@18450 | 2153 page->partial_pte = 0; |
keir@18450 | 2154 } |
keir@18769 | 2155 |
kaf24@7430 | 2156 switch ( type & PGT_type_mask ) |
kaf24@3757 | 2157 { |
kaf24@3757 | 2158 case PGT_l1_page_table: |
kaf24@3757 | 2159 free_l1_table(page); |
keir@18450 | 2160 rc = 0; |
kaf24@3757 | 2161 break; |
kaf24@3757 | 2162 case PGT_l2_page_table: |
keir@18450 | 2163 rc = free_l2_table(page, preemptible); |
kaf24@3757 | 2164 break; |
kaf24@3782 | 2165 case PGT_l3_page_table: |
keir@18450 | 2166 #if CONFIG_PAGING_LEVELS == 3 |
keir@18450 | 2167 if ( !(type & PGT_partial) ) |
keir@18450 | 2168 page->nr_validated_ptes = L3_PAGETABLE_ENTRIES; |
kaf24@5275 | 2169 #endif |
keir@18450 | 2170 rc = free_l3_table(page, preemptible); |
kaf24@3782 | 2171 break; |
keir@18450 | 2172 case PGT_l4_page_table: |
keir@18450 | 2173 rc = free_l4_table(page, preemptible); |
keir@18450 | 2174 break; |
kaf24@3757 | 2175 default: |
keir@18450 | 2176 MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page)); |
keir@18450 | 2177 rc = -EINVAL; |
kaf24@3757 | 2178 BUG(); |
kaf24@3757 | 2179 } |
keir@18450 | 2180 |
keir@18769 | 2181 return rc; |
keir@18769 | 2182 } |
keir@18769 | 2183 |
keir@18769 | 2184 |
keir@18769 | 2185 static int __put_final_page_type( |
keir@18769 | 2186 struct page_info *page, unsigned long type, int preemptible) |
keir@18769 | 2187 { |
keir@18769 | 2188 int rc = free_page_type(page, type, preemptible); |
keir@18769 | 2189 |
keir@18450 | 2190 /* No need for atomic update of type_info here: noone else updates it. */ |
keir@18450 | 2191 if ( rc == 0 ) |
keir@18450 | 2192 { |
keir@18450 | 2193 /* |
keir@18450 | 2194 * Record TLB information for flush later. We do not stamp page tables |
keir@18450 | 2195 * when running in shadow mode: |
keir@18450 | 2196 * 1. Pointless, since it's the shadow pt's which must be tracked. |
keir@18450 | 2197 * 2. Shadow mode reuses this field for shadowed page tables to |
keir@18450 | 2198 * store flags info -- we don't want to conflict with that. |
keir@18450 | 2199 */ |
keir@18450 | 2200 if ( !(shadow_mode_enabled(page_get_owner(page)) && |
keir@18450 | 2201 (page->count_info & PGC_page_table)) ) |
keir@18450 | 2202 page->tlbflush_timestamp = tlbflush_current_time(); |
keir@18450 | 2203 wmb(); |
keir@18450 | 2204 page->u.inuse.type_info--; |
keir@18450 | 2205 } |
keir@18450 | 2206 else if ( rc == -EINTR ) |
keir@18450 | 2207 { |
keir@18785 | 2208 ASSERT((page->u.inuse.type_info & |
keir@18785 | 2209 (PGT_count_mask|PGT_validated|PGT_partial)) == 1); |
keir@18450 | 2210 if ( !(shadow_mode_enabled(page_get_owner(page)) && |
keir@18450 | 2211 (page->count_info & PGC_page_table)) ) |
keir@18450 | 2212 page->tlbflush_timestamp = tlbflush_current_time(); |
keir@18450 | 2213 wmb(); |
keir@18450 | 2214 page->u.inuse.type_info |= PGT_validated; |
keir@18450 | 2215 } |
keir@18450 | 2216 else |
keir@18450 | 2217 { |
keir@18450 | 2218 BUG_ON(rc != -EAGAIN); |
keir@18450 | 2219 wmb(); |
keir@18785 | 2220 get_page_light(page); |
keir@18450 | 2221 page->u.inuse.type_info |= PGT_partial; |
keir@18450 | 2222 } |
keir@18450 | 2223 |
keir@18450 | 2224 return rc; |
kaf24@3757 | 2225 } |
kaf24@3757 | 2226 |
kaf24@3757 | 2227 |
keir@18450 | 2228 static int __put_page_type(struct page_info *page, |
keir@18450 | 2229 int preemptible) |
kaf24@3757 | 2230 { |
kaf24@6077 | 2231 unsigned long nx, x, y = page->u.inuse.type_info; |
keir@18785 | 2232 int rc = 0; |
keir@18780 | 2233 |
keir@18450 | 2234 for ( ; ; ) |
keir@18450 | 2235 { |
kaf24@3757 | 2236 x = y; |
cwc22@4061 | 2237 nx = x - 1; |
kaf24@3757 | 2238 |
kaf24@3757 | 2239 ASSERT((x & PGT_count_mask) != 0); |
kaf24@3757 | 2240 |
kaf24@3757 | 2241 if ( unlikely((nx & PGT_count_mask) == 0) ) |
kaf24@3757 | 2242 { |
kaf24@3757 | 2243 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && |
keir@18450 | 2244 likely(nx & (PGT_validated|PGT_partial)) ) |
kaf24@3757 | 2245 { |
kaf24@3757 | 2246 /* |
kaf24@3757 | 2247 * Page-table pages must be unvalidated when count is zero. The |
kaf24@3757 | 2248 * 'free' is safe because the refcnt is non-zero and validated |
kaf24@3757 | 2249 * bit is clear => other ops will spin or fail. |
kaf24@3757 | 2250 */ |
keir@18450 | 2251 nx = x & ~(PGT_validated|PGT_partial); |
keir@18450 | 2252 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, |
keir@18450 | 2253 x, nx)) != x) ) |
keir@18450 | 2254 continue; |
keir@18785 | 2255 /* We cleared the 'valid bit' so we do the clean up. */ |
keir@18785 | 2256 rc = __put_final_page_type(page, x, preemptible); |
keir@18780 | 2257 if ( x & PGT_partial ) |
keir@18780 | 2258 put_page(page); |
keir@18785 | 2259 break; |
kaf24@3757 | 2260 } |
tdeegan@11189 | 2261 |
kfraser@11588 | 2262 /* |
kfraser@11588 | 2263 * Record TLB information for flush later. We do not stamp page |
kfraser@11588 | 2264 * tables when running in shadow mode: |
kfraser@11588 | 2265 * 1. Pointless, since it's the shadow pt's which must be tracked. |
kfraser@11588 | 2266 * 2. Shadow mode reuses this field for shadowed page tables to |
kfraser@11588 | 2267 * store flags info -- we don't want to conflict with that. |
kfraser@11588 | 2268 */ |
tim@11686 | 2269 if ( !(shadow_mode_enabled(page_get_owner(page)) && |
tim@11686 | 2270 (page->count_info & PGC_page_table)) ) |
kfraser@11588 | 2271 page->tlbflush_timestamp = tlbflush_current_time(); |
kaf24@3757 | 2272 } |
keir@18450 | 2273 |
keir@18450 | 2274 if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) |
keir@18450 | 2275 break; |
keir@18450 | 2276 |
keir@18450 | 2277 if ( preemptible && hypercall_preempt_check() ) |
keir@18450 | 2278 return -EINTR; |
kaf24@3757 | 2279 } |
keir@18450 | 2280 |
keir@18785 | 2281 return rc; |
kaf24@3757 | 2282 } |
kaf24@3757 | 2283 |
kaf24@3757 | 2284 |
keir@18450 | 2285 static int __get_page_type(struct page_info *page, unsigned long type, |
keir@18450 | 2286 int preemptible) |
kaf24@3757 | 2287 { |
kaf24@6077 | 2288 unsigned long nx, x, y = page->u.inuse.type_info; |
keir@18785 | 2289 int rc = 0; |
kaf24@3757 | 2290 |
kfraser@11522 | 2291 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); |
kfraser@11522 | 2292 |
keir@18450 | 2293 for ( ; ; ) |
keir@18450 | 2294 { |
kaf24@3757 | 2295 x = y; |
kaf24@3757 | 2296 nx = x + 1; |
kaf24@3757 | 2297 if ( unlikely((nx & PGT_count_mask) == 0) ) |
kaf24@3757 | 2298 { |
kaf24@8764 | 2299 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); |
keir@18450 | 2300 return -EINVAL; |
kaf24@3757 | 2301 } |
kaf24@3757 | 2302 else if ( unlikely((x & PGT_count_mask) == 0) ) |
kaf24@3757 | 2303 { |
tim@11686 | 2304 struct domain *d = page_get_owner(page); |
tim@11686 | 2305 |
keir@17927 | 2306 /* Normally we should never let a page go from type count 0 |
keir@17927 | 2307 * to type count 1 when it is shadowed. One exception: |
keir@17927 | 2308 * out-of-sync shadowed pages are allowed to become |
keir@17927 | 2309 * writeable. */ |
keir@17927 | 2310 if ( d && shadow_mode_enabled(d) |
keir@17927 | 2311 && (page->count_info & PGC_page_table) |
keir@17927 | 2312 && !((page->shadow_flags & (1u<<29)) |
keir@17927 | 2313 && type == PGT_writable_page) ) |
keir@17927 | 2314 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page))); |
tim@11686 | 2315 |
kfraser@11522 | 2316 ASSERT(!(x & PGT_pae_xen_l2)); |
kfraser@11522 | 2317 if ( (x & PGT_type_mask) != type ) |
kaf24@3757 | 2318 { |
kfraser@11522 | 2319 /* |
kfraser@11522 | 2320 * On type change we check to flush stale TLB entries. This |
kfraser@11522 | 2321 * may be unnecessary (e.g., page was GDT/LDT) but those |
kfraser@11522 | 2322 * circumstances should be very rare. |
kfraser@11522 | 2323 */ |
tim@11686 | 2324 cpumask_t mask = d->domain_dirty_cpumask; |
tim@11686 | 2325 |
tim@11686 | 2326 /* Don't flush if the timestamp is old enough */ |
kfraser@11522 | 2327 tlbflush_filter(mask, page->tlbflush_timestamp); |
kfraser@11522 | 2328 |
kfraser@11588 | 2329 if ( unlikely(!cpus_empty(mask)) && |
kfraser@11588 | 2330 /* Shadow mode: track only writable pages. */ |
kfraser@11588 | 2331 (!shadow_mode_enabled(page_get_owner(page)) || |
kfraser@11588 | 2332 ((nx & PGT_type_mask) == PGT_writable_page)) ) |
kaf24@3757 | 2333 { |
kfraser@14625 | 2334 perfc_incr(need_flush_tlb_flush); |
keir@19689 | 2335 flush_tlb_mask(&mask); |
kaf24@3757 | 2336 } |
kaf24@3757 | 2337 |
keir@16732 | 2338 /* We lose existing type and validity. */ |
kfraser@11522 | 2339 nx &= ~(PGT_type_mask | PGT_validated); |
kaf24@3757 | 2340 nx |= type; |
kaf24@3757 | 2341 |
kaf24@3757 | 2342 /* No special validation needed for writable pages. */ |
kaf24@3757 | 2343 /* Page tables and GDT/LDT need to be scanned for validity. */ |
kaf24@3757 | 2344 if ( type == PGT_writable_page ) |
kaf24@3757 | 2345 nx |= PGT_validated; |
kaf24@3757 | 2346 } |
kaf24@3757 | 2347 } |
kfraser@11522 | 2348 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) |
mafetter@4179 | 2349 { |
keir@16162 | 2350 /* Don't log failure if it could be a recursive-mapping attempt. */ |
keir@16162 | 2351 if ( ((x & PGT_type_mask) == PGT_l2_page_table) && |
keir@16162 | 2352 (type == PGT_l1_page_table) ) |
keir@18450 | 2353 return -EINVAL; |
keir@16162 | 2354 if ( ((x & PGT_type_mask) == PGT_l3_page_table) && |
keir@16162 | 2355 (type == PGT_l2_page_table) ) |
keir@18450 | 2356 return -EINVAL; |
keir@16162 | 2357 if ( ((x & PGT_type_mask) == PGT_l4_page_table) && |
keir@16162 | 2358 (type == PGT_l3_page_table) ) |
keir@18450 | 2359 return -EINVAL; |
keir@16162 | 2360 MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") " |
keir@16162 | 2361 "for mfn %lx (pfn %lx)", |
keir@16162 | 2362 x, type, page_to_mfn(page), |
keir@16162 | 2363 get_gpfn_from_mfn(page_to_mfn(page))); |
keir@18450 | 2364 return -EINVAL; |
kfraser@11522 | 2365 } |
kfraser@11522 | 2366 else if ( unlikely(!(x & PGT_validated)) ) |
kfraser@11522 | 2367 { |
keir@18450 | 2368 if ( !(x & PGT_partial) ) |
keir@18450 | 2369 { |
keir@18450 | 2370 /* Someone else is updating validation of this page. Wait... */ |
keir@18450 | 2371 while ( (y = page->u.inuse.type_info) == x ) |
keir@18450 | 2372 { |
keir@18450 | 2373 if ( preemptible && hypercall_preempt_check() ) |
keir@18450 | 2374 return -EINTR; |
keir@18450 | 2375 cpu_relax(); |
keir@18450 | 2376 } |
keir@18450 | 2377 continue; |
keir@18450 | 2378 } |
keir@18450 | 2379 /* Type ref count was left at 1 when PGT_partial got set. */ |
keir@18450 | 2380 ASSERT((x & PGT_count_mask) == 1); |
keir@18450 | 2381 nx = x & ~PGT_partial; |
kaf24@3757 | 2382 } |
keir@18450 | 2383 |
keir@18450 | 2384 if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) |
keir@18450 | 2385 break; |
keir@18450 | 2386 |
keir@18450 | 2387 if ( preemptible && hypercall_preempt_check() ) |
keir@18450 | 2388 return -EINTR; |
kaf24@3757 | 2389 } |
kaf24@3757 | 2390 |
keir@17761 | 2391 if ( unlikely((x & PGT_type_mask) != type) ) |
keir@17761 | 2392 { |
keir@17761 | 2393 /* Special pages should not be accessible from devices. */ |
keir@17761 | 2394 struct domain *d = page_get_owner(page); |
keir@20793 | 2395 if ( d && !is_hvm_domain(d) && unlikely(need_iommu(d)) ) |
keir@17761 | 2396 { |
keir@17761 | 2397 if ( (x & PGT_type_mask) == PGT_writable_page ) |
keir@17761 | 2398 iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); |
keir@17761 | 2399 else if ( type == PGT_writable_page ) |
keir@17761 | 2400 iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), |
keir@17761 | 2401 page_to_mfn(page)); |
keir@17761 | 2402 } |
keir@17761 | 2403 } |
keir@17761 | 2404 |
kaf24@3757 | 2405 if ( unlikely(!(nx & PGT_validated)) ) |
kaf24@3757 | 2406 { |
keir@18450 | 2407 if ( !(x & PGT_partial) ) |
kaf24@3757 | 2408 { |
keir@18450 | 2409 page->nr_validated_ptes = 0; |
keir@18450 | 2410 page->partial_pte = 0; |
kaf24@3757 | 2411 } |
keir@18785 | 2412 rc = alloc_page_type(page, type, preemptible); |
kaf24@3757 | 2413 } |
kaf24@3757 | 2414 |
keir@18785 | 2415 if ( (x & PGT_partial) && !(nx & PGT_partial) ) |
keir@18785 | 2416 put_page(page); |
keir@18785 | 2417 |
keir@18785 | 2418 return rc; |
keir@18450 | 2419 } |
keir@18450 | 2420 |
keir@18450 | 2421 void put_page_type(struct page_info *page) |
keir@18450 | 2422 { |
keir@18450 | 2423 int rc = __put_page_type(page, 0); |
keir@18450 | 2424 ASSERT(rc == 0); |
keir@18450 | 2425 (void)rc; |
kaf24@3757 | 2426 } |
kaf24@3757 | 2427 |
keir@18450 | 2428 int get_page_type(struct page_info *page, unsigned long type) |
keir@18450 | 2429 { |
keir@18450 | 2430 int rc = __get_page_type(page, type, 0); |
keir@18450 | 2431 if ( likely(rc == 0) ) |
keir@18450 | 2432 return 1; |
keir@18450 | 2433 ASSERT(rc == -EINVAL); |
keir@18450 | 2434 return 0; |
keir@18450 | 2435 } |
keir@18450 | 2436 |
keir@18450 | 2437 int put_page_type_preemptible(struct page_info *page) |
keir@18450 | 2438 { |
keir@18450 | 2439 return __put_page_type(page, 1); |
keir@18450 | 2440 } |
keir@18450 | 2441 |
keir@18450 | 2442 int get_page_type_preemptible(struct page_info *page, unsigned long type) |
keir@18450 | 2443 { |
keir@18450 | 2444 return __get_page_type(page, type, 1); |
keir@18450 | 2445 } |
kaf24@3757 | 2446 |
keir@16369 | 2447 void cleanup_page_cacheattr(struct page_info *page) |
keir@16369 | 2448 { |
keir@19642 | 2449 uint32_t cacheattr = |
keir@19642 | 2450 (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base; |
keir@16369 | 2451 |
keir@16369 | 2452 if ( likely(cacheattr == 0) ) |
keir@16369 | 2453 return; |
keir@16369 | 2454 |
keir@16369 | 2455 page->count_info &= ~PGC_cacheattr_mask; |
keir@16369 | 2456 |
keir@16376 | 2457 BUG_ON(is_xen_heap_page(page)); |
keir@16369 | 2458 |
keir@19670 | 2459 update_xen_mappings(page_to_mfn(page), 0); |
keir@16369 | 2460 } |
keir@16369 | 2461 |
keir@16369 | 2462 |
mafetter@4179 | 2463 int new_guest_cr3(unsigned long mfn) |
kaf24@3757 | 2464 { |
keir@19198 | 2465 struct vcpu *curr = current; |
keir@19198 | 2466 struct domain *d = curr->domain; |
mafetter@4179 | 2467 int okay; |
mafetter@4179 | 2468 unsigned long old_base_mfn; |
mafetter@4179 | 2469 |
keir@20341 | 2470 #ifdef __x86_64__ |
kfraser@15012 | 2471 if ( is_pv_32on64_domain(d) ) |
ack@13296 | 2472 { |
Tim@13938 | 2473 okay = paging_mode_refcounts(d) |
kaf24@13441 | 2474 ? 0 /* Old code was broken, but what should it be? */ |
ack@14033 | 2475 : mod_l4_entry( |
keir@19198 | 2476 __va(pagetable_get_paddr(curr->arch.guest_table)), |
ack@14033 | 2477 l4e_from_pfn( |
ack@14033 | 2478 mfn, |
ack@14033 | 2479 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), |
keir@19421 | 2480 pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0; |
ack@13296 | 2481 if ( unlikely(!okay) ) |
ack@13296 | 2482 { |
ack@13296 | 2483 MEM_LOG("Error while installing new compat baseptr %lx", mfn); |
ack@13296 | 2484 return 0; |
ack@13296 | 2485 } |
ack@13296 | 2486 |
keir@19199 | 2487 invalidate_shadow_ldt(curr, 0); |
keir@19198 | 2488 write_ptbase(curr); |
ack@13296 | 2489 |
ack@13296 | 2490 return 1; |
ack@13296 | 2491 } |
ack@13296 | 2492 #endif |
Tim@13938 | 2493 okay = paging_mode_refcounts(d) |
kaf24@13441 | 2494 ? get_page_from_pagenr(mfn, d) |
keir@18780 | 2495 : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); |
kaf24@13441 | 2496 if ( unlikely(!okay) ) |
kaf24@3757 | 2497 { |
kaf24@13441 | 2498 MEM_LOG("Error while installing new baseptr %lx", mfn); |
kaf24@13441 | 2499 return 0; |
kaf24@9037 | 2500 } |
kaf24@9037 | 2501 |
keir@19199 | 2502 invalidate_shadow_ldt(curr, 0); |
keir@19198 | 2503 |
keir@19198 | 2504 old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); |
keir@19198 | 2505 |
keir@19198 | 2506 curr->arch.guest_table = pagetable_from_pfn(mfn); |
keir@19198 | 2507 update_cr3(curr); |
keir@19198 | 2508 |
keir@19198 | 2509 write_ptbase(curr); |
kaf24@9037 | 2510 |
kaf24@9037 | 2511 if ( likely(old_base_mfn != 0) ) |
kaf24@9037 | 2512 { |
Tim@13938 | 2513 if ( paging_mode_refcounts(d) ) |
kaf24@8764 | 2514 put_page(mfn_to_page(old_base_mfn)); |
mafetter@4179 | 2515 else |
kaf24@8764 | 2516 put_page_and_type(mfn_to_page(old_base_mfn)); |
kaf24@3757 | 2517 } |
kaf24@9037 | 2518 |
kaf24@9037 | 2519 return 1; |
kaf24@3757 | 2520 } |
kaf24@3757 | 2521 |
keir@20132 | 2522 static struct domain *get_pg_owner(domid_t domid) |
kaf24@3757 | 2523 { |
keir@20132 | 2524 struct domain *pg_owner = NULL, *curr = current->domain; |
kaf24@8830 | 2525 |
kaf24@8830 | 2526 if ( likely(domid == DOMID_SELF) ) |
keir@20132 | 2527 { |
keir@20132 | 2528 pg_owner = rcu_lock_domain(curr); |
kaf24@4426 | 2529 goto out; |
keir@20132 | 2530 } |
keir@20132 | 2531 |
keir@20132 | 2532 if ( unlikely(domid == curr->domain_id) ) |
kaf24@8830 | 2533 { |
keir@16612 | 2534 MEM_LOG("Cannot specify itself as foreign domain"); |
keir@20132 | 2535 goto out; |
kaf24@8830 | 2536 } |
keir@20132 | 2537 |
keir@20132 | 2538 if ( unlikely(paging_mode_translate(curr)) ) |
kfraser@12601 | 2539 { |
kfraser@12601 | 2540 MEM_LOG("Cannot mix foreign mappings with translated domains"); |
keir@20132 | 2541 goto out; |
kfraser@12601 | 2542 } |
keir@20132 | 2543 |
keir@20132 | 2544 switch ( domid ) |
kaf24@4426 | 2545 { |
keir@16894 | 2546 case DOMID_IO: |
keir@20132 | 2547 pg_owner = rcu_lock_domain(dom_io); |
keir@16894 | 2548 break; |
keir@16894 | 2549 case DOMID_XEN: |
keir@20132 | 2550 if ( !IS_PRIV(curr) ) |
keir@20132 | 2551 { |
keir@16612 | 2552 MEM_LOG("Cannot set foreign dom"); |
kaf24@3757 | 2553 break; |
kaf24@3757 | 2554 } |
keir@20132 | 2555 pg_owner = rcu_lock_domain(dom_xen); |
keir@16894 | 2556 break; |
keir@16894 | 2557 default: |
keir@20132 | 2558 if ( (pg_owner = rcu_lock_domain_by_id(domid)) == NULL ) |
kaf24@3757 | 2559 { |
keir@16894 | 2560 MEM_LOG("Unknown domain '%u'", domid); |
keir@16894 | 2561 break; |
kaf24@3757 | 2562 } |
keir@20132 | 2563 if ( !IS_PRIV_FOR(curr, pg_owner) ) |
keir@17357 | 2564 { |
keir@16894 | 2565 MEM_LOG("Cannot set foreign dom"); |
keir@20132 | 2566 rcu_unlock_domain(pg_owner); |
keir@20132 | 2567 pg_owner = NULL; |
keir@16894 | 2568 } |
keir@16894 | 2569 break; |
kaf24@4426 | 2570 } |
kaf24@4426 | 2571 |
kaf24@4426 | 2572 out: |
keir@20132 | 2573 return pg_owner; |
keir@20132 | 2574 } |
keir@20132 | 2575 |
keir@20132 | 2576 static void put_pg_owner(struct domain *pg_owner) |
keir@20132 | 2577 { |
keir@20132 | 2578 rcu_unlock_domain(pg_owner); |
kaf24@4426 | 2579 } |
kaf24@4426 | 2580 |
keir@19824 | 2581 static inline int vcpumask_to_pcpumask( |
keir@19824 | 2582 struct domain *d, XEN_GUEST_HANDLE(const_void) bmap, cpumask_t *pmask) |
kaf24@4459 | 2583 { |
keir@19824 | 2584 unsigned int vcpu_id, vcpu_bias, offs; |
keir@19824 | 2585 unsigned long vmask; |
kaf24@5327 | 2586 struct vcpu *v; |
keir@19824 | 2587 bool_t is_native = !is_pv_32on64_domain(d); |
keir@19824 | 2588 |
keir@19824 | 2589 cpus_clear(*pmask); |
keir@19824 | 2590 for ( vmask = 0, offs = 0; ; ++offs) |
kaf24@4459 | 2591 { |
keir@19824 | 2592 vcpu_bias = offs * (is_native ? BITS_PER_LONG : 32); |
keir@19826 | 2593 if ( vcpu_bias >= d->max_vcpus ) |
keir@19824 | 2594 return 0; |
keir@19824 | 2595 |
keir@19824 | 2596 if ( unlikely(is_native ? |
keir@19824 | 2597 copy_from_guest_offset(&vmask, bmap, offs, 1) : |
keir@19824 | 2598 copy_from_guest_offset((unsigned int *)&vmask, bmap, |
keir@19824 | 2599 offs, 1)) ) |
keir@19824 | 2600 { |
keir@19824 | 2601 cpus_clear(*pmask); |
keir@19824 | 2602 return -EFAULT; |
keir@19824 | 2603 } |
keir@19824 | 2604 |
keir@19824 | 2605 while ( vmask ) |
keir@19824 | 2606 { |
keir@19824 | 2607 vcpu_id = find_first_set_bit(vmask); |
keir@19824 | 2608 vmask &= ~(1UL << vcpu_id); |
keir@19824 | 2609 vcpu_id += vcpu_bias; |
keir@19826 | 2610 if ( (vcpu_id >= d->max_vcpus) ) |
keir@19824 | 2611 return 0; |
keir@19824 | 2612 if ( ((v = d->vcpu[vcpu_id]) != NULL) ) |
keir@19824 | 2613 cpus_or(*pmask, *pmask, v->vcpu_dirty_cpumask); |
keir@19824 | 2614 } |
kaf24@4459 | 2615 } |
kaf24@4459 | 2616 } |
kaf24@4459 | 2617 |
keir@18762 | 2618 #ifdef __i386__ |
keir@18762 | 2619 static inline void *fixmap_domain_page(unsigned long mfn) |
keir@18762 | 2620 { |
keir@18762 | 2621 unsigned int cpu = smp_processor_id(); |
keir@18762 | 2622 void *ptr = (void *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); |
keir@18762 | 2623 |
keir@18762 | 2624 l1e_write(fix_pae_highmem_pl1e - cpu, |
keir@18762 | 2625 l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); |
keir@18762 | 2626 flush_tlb_one_local(ptr); |
keir@18762 | 2627 return ptr; |
keir@18762 | 2628 } |
keir@18762 | 2629 static inline void fixunmap_domain_page(const void *ptr) |
keir@18762 | 2630 { |
keir@18762 | 2631 unsigned int cpu = virt_to_fix((unsigned long)ptr) - FIX_PAE_HIGHMEM_0; |
keir@18762 | 2632 |
keir@18762 | 2633 l1e_write(fix_pae_highmem_pl1e - cpu, l1e_empty()); |
keir@18762 | 2634 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time); |
keir@18762 | 2635 } |
keir@18762 | 2636 #else |
keir@18762 | 2637 #define fixmap_domain_page(mfn) mfn_to_virt(mfn) |
keir@18762 | 2638 #define fixunmap_domain_page(ptr) ((void)(ptr)) |
keir@18762 | 2639 #endif |
keir@18762 | 2640 |
kaf24@4426 | 2641 int do_mmuext_op( |
kaf24@9904 | 2642 XEN_GUEST_HANDLE(mmuext_op_t) uops, |
kaf24@4426 | 2643 unsigned int count, |
kaf24@9904 | 2644 XEN_GUEST_HANDLE(uint) pdone, |
kaf24@4426 | 2645 unsigned int foreigndom) |
kaf24@4426 | 2646 { |
kaf24@4426 | 2647 struct mmuext_op op; |
kaf24@11019 | 2648 int rc = 0, i = 0, okay; |
keir@20769 | 2649 unsigned long type; |
kaf24@9197 | 2650 unsigned int done = 0; |
keir@19198 | 2651 struct vcpu *curr = current; |
keir@19198 | 2652 struct domain *d = curr->domain; |
keir@20132 | 2653 struct domain *pg_owner; |
kaf24@4426 | 2654 |
kaf24@4426 | 2655 if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) |
kaf24@4426 | 2656 { |
kaf24@4426 | 2657 count &= ~MMU_UPDATE_PREEMPTED; |
kaf24@9197 | 2658 if ( unlikely(!guest_handle_is_null(pdone)) ) |
kaf24@9197 | 2659 (void)copy_from_guest(&done, pdone, 1); |
kaf24@4426 | 2660 } |
kfraser@14624 | 2661 else |
kfraser@14624 | 2662 perfc_incr(calls_to_mmuext_op); |
kaf24@4426 | 2663 |
kfraser@14181 | 2664 if ( unlikely(!guest_handle_okay(uops, count)) ) |
kfraser@14181 | 2665 { |
kfraser@14181 | 2666 rc = -EFAULT; |
kfraser@14181 | 2667 goto out; |
kfraser@14181 | 2668 } |
kfraser@14181 | 2669 |
keir@20132 | 2670 if ( (pg_owner = get_pg_owner(foreigndom)) == NULL ) |
kaf24@4426 | 2671 { |
kaf24@8830 | 2672 rc = -ESRCH; |
kaf24@4426 | 2673 goto out; |
kaf24@4426 | 2674 } |
kaf24@4426 | 2675 |
kaf24@4426 | 2676 for ( i = 0; i < count; i++ ) |
kaf24@4426 | 2677 { |
kaf24@4426 | 2678 if ( hypercall_preempt_check() ) |
kaf24@3757 | 2679 { |
keir@18450 | 2680 rc = -EAGAIN; |
kaf24@4426 | 2681 break; |
kaf24@3757 | 2682 } |
kaf24@4426 | 2683 |
kaf24@9197 | 2684 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) ) |
kaf24@3757 | 2685 { |
kaf24@9197 | 2686 MEM_LOG("Bad __copy_from_guest"); |
kaf24@4426 | 2687 rc = -EFAULT; |
kaf24@3757 | 2688 break; |
kaf24@3757 | 2689 } |
kaf24@3757 | 2690 |
keir@19970 | 2691 okay = 1; |
kaf24@4426 | 2692 |
kaf24@4426 | 2693 switch ( op.cmd ) |
kaf24@4426 | 2694 { |
kaf24@4426 | 2695 case MMUEXT_PIN_L1_TABLE: |
kfraser@11522 | 2696 type = PGT_l1_page_table; |
kaf24@9263 | 2697 goto pin_page; |
kaf24@9263 | 2698 |
kaf24@9255 | 2699 case MMUEXT_PIN_L2_TABLE: |
kfraser@11522 | 2700 type = PGT_l2_page_table; |
kfraser@11522 | 2701 goto pin_page; |
kfraser@11522 | 2702 |
kaf24@9255 | 2703 case MMUEXT_PIN_L3_TABLE: |
kfraser@11522 | 2704 type = PGT_l3_page_table; |
kfraser@11522 | 2705 goto pin_page; |
kfraser@11522 | 2706 |
kaf24@9255 | 2707 case MMUEXT_PIN_L4_TABLE: |
keir@20132 | 2708 if ( is_pv_32bit_domain(pg_owner) ) |
ack@13298 | 2709 break; |
kfraser@11522 | 2710 type = PGT_l4_page_table; |
kaf24@9255 | 2711 |
keir@20769 | 2712 pin_page: { |
keir@20769 | 2713 unsigned long mfn; |
keir@20769 | 2714 struct page_info *page; |
kfraser@15846 | 2715 |
kfraser@11522 | 2716 /* Ignore pinning of invalid paging levels. */ |
kfraser@11522 | 2717 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) |
kfraser@11522 | 2718 break; |
kfraser@11522 | 2719 |
keir@20132 | 2720 if ( paging_mode_refcounts(pg_owner) ) |
kaf24@9263 | 2721 break; |
kaf24@9263 | 2722 |
keir@20769 | 2723 mfn = gmfn_to_mfn(pg_owner, op.arg1.mfn); |
keir@20132 | 2724 rc = get_page_and_type_from_pagenr(mfn, type, pg_owner, 0, 1); |
keir@18450 | 2725 okay = !rc; |
kaf24@4426 | 2726 if ( unlikely(!okay) ) |
kaf24@4426 | 2727 { |
keir@18450 | 2728 if ( rc == -EINTR ) |
keir@18450 | 2729 rc = -EAGAIN; |
keir@18450 | 2730 else if ( rc != -EAGAIN ) |
keir@18450 | 2731 MEM_LOG("Error while pinning mfn %lx", mfn); |
kaf24@4426 | 2732 break; |
kaf24@4426 | 2733 } |
keir@14888 | 2734 |
keir@20769 | 2735 page = mfn_to_page(mfn); |
keir@20769 | 2736 |
keir@20769 | 2737 if ( (rc = xsm_memory_pin_page(d, page)) != 0 ) |
keir@20769 | 2738 { |
keir@20769 | 2739 put_page_and_type(page); |
keir@20769 | 2740 okay = 0; |
keir@20769 | 2741 break; |
keir@20769 | 2742 } |
keir@20769 | 2743 |
kaf24@4426 | 2744 if ( unlikely(test_and_set_bit(_PGT_pinned, |
kaf24@4426 | 2745 &page->u.inuse.type_info)) ) |
kaf24@4426 | 2746 { |
kaf24@6841 | 2747 MEM_LOG("Mfn %lx already pinned", mfn); |
kaf24@4426 | 2748 put_page_and_type(page); |
kaf24@4426 | 2749 okay = 0; |
kaf24@4426 | 2750 break; |
kaf24@4426 | 2751 } |
steven@11579 | 2752 |
kfraser@11581 | 2753 /* A page is dirtied when its pin status is set. */ |
keir@20790 | 2754 paging_mark_dirty(pg_owner, mfn); |
steven@11579 | 2755 |
keir@14888 | 2756 /* We can race domain destruction (domain_relinquish_resources). */ |
keir@20132 | 2757 if ( unlikely(pg_owner != d) ) |
keir@14888 | 2758 { |
keir@14888 | 2759 int drop_ref; |
keir@20132 | 2760 spin_lock(&pg_owner->page_alloc_lock); |
keir@20132 | 2761 drop_ref = (pg_owner->is_dying && |
keir@14888 | 2762 test_and_clear_bit(_PGT_pinned, |
keir@14888 | 2763 &page->u.inuse.type_info)); |
keir@20132 | 2764 spin_unlock(&pg_owner->page_alloc_lock); |
keir@14888 | 2765 if ( drop_ref ) |
keir@14888 | 2766 put_page_and_type(page); |
keir@14888 | 2767 } |
kfraser@14225 | 2768 |
kaf24@4426 | 2769 break; |
keir@20769 | 2770 } |
keir@20769 | 2771 |
keir@20769 | 2772 case MMUEXT_UNPIN_TABLE: { |
keir@20769 | 2773 unsigned long mfn; |
keir@20769 | 2774 struct page_info *page; |
keir@20769 | 2775 |
keir@20790 | 2776 if ( paging_mode_refcounts(pg_owner) ) |
kaf24@8981 | 2777 break; |
kaf24@8981 | 2778 |
keir@20769 | 2779 mfn = gmfn_to_mfn(pg_owner, op.arg1.mfn); |
keir@20790 | 2780 if ( unlikely(!(okay = get_page_from_pagenr(mfn, pg_owner))) ) |
kaf24@4426 | 2781 { |
keir@20769 | 2782 MEM_LOG("Mfn %lx bad domain", mfn); |
keir@20769 | 2783 break; |
kaf24@4426 | 2784 } |
keir@20769 | 2785 |
keir@20769 | 2786 page = mfn_to_page(mfn); |
keir@20769 | 2787 |
keir@20769 | 2788 if ( !test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) |
kaf24@4426 | 2789 { |
kaf24@4426 | 2790 okay = 0; |
kaf24@4426 | 2791 put_page(page); |
kaf24@6841 | 2792 MEM_LOG("Mfn %lx not pinned", mfn); |
keir@20769 | 2793 break; |
kaf24@4426 | 2794 } |
keir@20769 | 2795 |
keir@20769 | 2796 put_page_and_type(page); |
keir@20769 | 2797 put_page(page); |
keir@20769 | 2798 |
keir@20769 | 2799 /* A page is dirtied when its pin status is cleared. */ |
keir@20790 | 2800 paging_mark_dirty(pg_owner, mfn); |
keir@20769 | 2801 |
kaf24@4426 | 2802 break; |
keir@20769 | 2803 } |
kaf24@4426 | 2804 |
kaf24@4426 | 2805 case MMUEXT_NEW_BASEPTR: |
keir@20769 | 2806 okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn)); |
kaf24@4426 | 2807 break; |
kaf24@4426 | 2808 |
kaf24@4426 | 2809 #ifdef __x86_64__ |
kfraser@14181 | 2810 case MMUEXT_NEW_USER_BASEPTR: { |
keir@20769 | 2811 unsigned long old_mfn, mfn; |
keir@20769 | 2812 |
keir@20769 | 2813 mfn = gmfn_to_mfn(d, op.arg1.mfn); |
kfraser@14181 | 2814 if ( mfn != 0 ) |
tim@11687 | 2815 { |
Tim@13938 | 2816 if ( paging_mode_refcounts(d) ) |
tim@11687 | 2817 okay = get_page_from_pagenr(mfn, d); |
tim@11687 | 2818 else |
keir@18450 | 2819 okay = !get_page_and_type_from_pagenr( |
keir@18780 | 2820 mfn, PGT_root_page_table, d, 0, 0); |
kfraser@14181 | 2821 if ( unlikely(!okay) ) |
tim@11687 | 2822 { |
kfraser@14181 | 2823 MEM_LOG("Error while installing new mfn %lx", mfn); |
kfraser@14181 | 2824 break; |
tim@11687 | 2825 } |
kaf24@4426 | 2826 } |
kfraser@14181 | 2827 |
keir@19198 | 2828 old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); |
keir@19198 | 2829 curr->arch.guest_table_user = pagetable_from_pfn(mfn); |
kfraser@14181 | 2830 |
kfraser@14181 | 2831 if ( old_mfn != 0 ) |
kfraser@14181 | 2832 { |
kfraser@14181 | 2833 if ( paging_mode_refcounts(d) ) |
kfraser@14181 | 2834 put_page(mfn_to_page(old_mfn)); |
kfraser@14181 | 2835 else |
kfraser@14181 | 2836 put_page_and_type(mfn_to_page(old_mfn)); |
kfraser@14181 | 2837 } |
kfraser@14181 | 2838 |
kaf24@4426 | 2839 break; |
kfraser@14181 | 2840 } |
kaf24@4426 | 2841 #endif |
kaf24@4426 | 2842 |
kaf24@4426 | 2843 case MMUEXT_TLB_FLUSH_LOCAL: |
keir@20132 | 2844 flush_tlb_local(); |
kaf24@4426 | 2845 break; |
kaf24@4426 | 2846 |
kaf24@4426 | 2847 case MMUEXT_INVLPG_LOCAL: |
Tim@13938 | 2848 if ( !paging_mode_enabled(d) |
keir@19198 | 2849 || paging_invlpg(curr, op.arg1.linear_addr) != 0 ) |
keir@16155 | 2850 flush_tlb_one_local(op.arg1.linear_addr); |
kaf24@4426 | 2851 break; |
kaf24@4426 | 2852 |
kaf24@4426 | 2853 case MMUEXT_TLB_FLUSH_MULTI: |
kaf24@4441 | 2854 case MMUEXT_INVLPG_MULTI: |
kaf24@4441 | 2855 { |
keir@20132 | 2856 cpumask_t pmask; |
keir@19824 | 2857 |
keir@19824 | 2858 if ( unlikely(vcpumask_to_pcpumask(d, op.arg2.vcpumask, &pmask)) ) |
kaf24@4441 | 2859 { |
kaf24@4459 | 2860 okay = 0; |
kaf24@4459 | 2861 break; |
kaf24@4441 | 2862 } |
kaf24@4441 | 2863 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) |
keir@19689 | 2864 flush_tlb_mask(&pmask); |
kaf24@4441 | 2865 else |
keir@19689 | 2866 flush_tlb_one_mask(&pmask, op.arg1.linear_addr); |
kaf24@4426 | 2867 break; |
kaf24@4441 | 2868 } |
kaf24@4426 | 2869 |
kaf24@4426 | 2870 case MMUEXT_TLB_FLUSH_ALL: |
keir@20132 | 2871 flush_tlb_mask(&d->domain_dirty_cpumask); |
kaf24@4426 | 2872 break; |
kaf24@4426 | 2873 |
kaf24@4426 | 2874 case MMUEXT_INVLPG_ALL: |
keir@19689 | 2875 flush_tlb_one_mask(&d->domain_dirty_cpumask, op.arg1.linear_addr); |
kaf24@4426 | 2876 break; |
kaf24@4426 | 2877 |
kaf24@4426 | 2878 case MMUEXT_FLUSH_CACHE: |
kaf24@8498 | 2879 if ( unlikely(!cache_flush_permitted(d)) ) |
kaf24@4426 | 2880 { |
kaf24@6368 | 2881 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE."); |
kaf24@4426 | 2882 okay = 0; |
kaf24@3757 | 2883 } |
kaf24@4426 | 2884 else |
kaf24@4426 | 2885 { |
kaf24@4426 | 2886 wbinvd(); |
kaf24@4426 | 2887 } |
kaf24@4426 | 2888 break; |
kaf24@4426 | 2889 |
kaf24@4426 | 2890 case MMUEXT_SET_LDT: |
kaf24@3757 | 2891 { |
kaf24@8011 | 2892 unsigned long ptr = op.arg1.linear_addr; |
kaf24@8011 | 2893 unsigned long ents = op.arg2.nr_ents; |
kaf24@8011 | 2894 |
Tim@13938 | 2895 if ( paging_mode_external(d) ) |
mafetter@4502 | 2896 { |
keir@16612 | 2897 MEM_LOG("ignoring SET_LDT hypercall from external domain"); |
mafetter@4502 | 2898 okay = 0; |
mafetter@4502 | 2899 } |
kaf24@8011 | 2900 else if ( ((ptr & (PAGE_SIZE-1)) != 0) || |
kaf24@8011 | 2901 (ents > 8192) || |
kaf24@8011 | 2902 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) ) |
kaf24@4426 | 2903 { |
kaf24@4426 | 2904 okay = 0; |
kaf24@4692 | 2905 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); |
kaf24@4426 | 2906 } |
keir@19198 | 2907 else if ( (curr->arch.guest_context.ldt_ents != ents) || |
keir@19198 | 2908 (curr->arch.guest_context.ldt_base != ptr) ) |
kaf24@4426 | 2909 { |
keir@19199 | 2910 invalidate_shadow_ldt(curr, 0); |
keir@20132 | 2911 flush_tlb_local(); |
keir@19198 | 2912 curr->arch.guest_context.ldt_base = ptr; |
keir@19198 | 2913 curr->arch.guest_context.ldt_ents = ents; |
keir@19198 | 2914 load_LDT(curr); |
kaf24@4426 | 2915 if ( ents != 0 ) |
keir@20132 | 2916 (void)map_ldt_shadow_page(0); |
kaf24@4426 | 2917 } |
kaf24@4426 | 2918 break; |
kaf24@4426 | 2919 } |
sos22@8724 | 2920 |
keir@20769 | 2921 case MMUEXT_CLEAR_PAGE: { |
keir@20769 | 2922 unsigned long mfn; |
keir@18762 | 2923 unsigned char *ptr; |
keir@18762 | 2924 |
keir@20769 | 2925 mfn = gmfn_to_mfn(d, op.arg1.mfn); |
keir@20790 | 2926 okay = !get_page_and_type_from_pagenr( |
keir@20790 | 2927 mfn, PGT_writable_page, d, 0, 0); |
keir@18762 | 2928 if ( unlikely(!okay) ) |
keir@18762 | 2929 { |
keir@18762 | 2930 MEM_LOG("Error while clearing mfn %lx", mfn); |
keir@18762 | 2931 break; |
keir@18762 | 2932 } |
keir@18762 | 2933 |
keir@18762 | 2934 /* A page is dirtied when it's being cleared. */ |
keir@18762 | 2935 paging_mark_dirty(d, mfn); |
keir@18762 | 2936 |
keir@18762 | 2937 ptr = fixmap_domain_page(mfn); |
keir@18762 | 2938 clear_page(ptr); |
keir@18762 | 2939 fixunmap_domain_page(ptr); |
keir@18762 | 2940 |
keir@20769 | 2941 put_page_and_type(mfn_to_page(mfn)); |
keir@18762 | 2942 break; |
keir@18762 | 2943 } |
keir@18762 | 2944 |
keir@18762 | 2945 case MMUEXT_COPY_PAGE: |
keir@18762 | 2946 { |
keir@18762 | 2947 const unsigned char *src; |
keir@18762 | 2948 unsigned char *dst; |
keir@20769 | 2949 unsigned long src_mfn, mfn; |
keir@18762 | 2950 |
keir@20790 | 2951 src_mfn = gmfn_to_mfn(d, op.arg2.src_mfn); |
keir@20790 | 2952 okay = get_page_from_pagenr(src_mfn, d); |
keir@18762 | 2953 if ( unlikely(!okay) ) |
keir@18762 | 2954 { |
keir@18762 | 2955 MEM_LOG("Error while copying from mfn %lx", src_mfn); |
keir@18762 | 2956 break; |
keir@18762 | 2957 } |
keir@18762 | 2958 |
keir@20769 | 2959 mfn = gmfn_to_mfn(d, op.arg1.mfn); |
keir@20790 | 2960 okay = !get_page_and_type_from_pagenr( |
keir@20790 | 2961 mfn, PGT_writable_page, d, 0, 0); |
keir@18762 | 2962 if ( unlikely(!okay) ) |
keir@18762 | 2963 { |
keir@18762 | 2964 put_page(mfn_to_page(src_mfn)); |
keir@18762 | 2965 MEM_LOG("Error while copying to mfn %lx", mfn); |
keir@18762 | 2966 break; |
keir@18762 | 2967 } |
keir@18762 | 2968 |
keir@18762 | 2969 /* A page is dirtied when it's being copied to. */ |
keir@18762 | 2970 paging_mark_dirty(d, mfn); |
keir@18762 | 2971 |
keir@18762 | 2972 src = map_domain_page(src_mfn); |
keir@18762 | 2973 dst = fixmap_domain_page(mfn); |
keir@18762 | 2974 copy_page(dst, src); |
keir@18762 | 2975 fixunmap_domain_page(dst); |
keir@18762 | 2976 unmap_domain_page(src); |
keir@18762 | 2977 |
keir@20769 | 2978 put_page_and_type(mfn_to_page(mfn)); |
keir@18762 | 2979 put_page(mfn_to_page(src_mfn)); |
keir@18762 | 2980 break; |
keir@18762 | 2981 } |
keir@18762 | 2982 |
kaf24@4426 | 2983 default: |
kaf24@4692 | 2984 MEM_LOG("Invalid extended pt command 0x%x", op.cmd); |
kfraser@12423 | 2985 rc = -ENOSYS; |
kaf24@3757 | 2986 okay = 0; |
kaf24@3757 | 2987 break; |
kaf24@3757 | 2988 } |
kaf24@3757 | 2989 |
kaf24@4426 | 2990 if ( unlikely(!okay) ) |
kaf24@3757 | 2991 { |
kfraser@12423 | 2992 rc = rc ? rc : -EINVAL; |
kaf24@3757 | 2993 break; |
kaf24@3757 | 2994 } |
kaf24@3757 | 2995 |
kaf24@9197 | 2996 guest_handle_add_offset(uops, 1); |
kaf24@3757 | 2997 } |
kaf24@3757 | 2998 |
keir@18450 | 2999 if ( rc == -EAGAIN ) |
keir@18450 | 3000 rc = hypercall_create_continuation( |
keir@18450 | 3001 __HYPERVISOR_mmuext_op, "hihi", |
keir@18450 | 3002 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); |
keir@18450 | 3003 |
keir@20132 | 3004 put_pg_owner(pg_owner); |
kfraser@14181 | 3005 |
kfraser@14624 | 3006 perfc_add(num_mmuext_ops, i); |
kfraser@14624 | 3007 |
kaf24@4426 | 3008 out: |
kaf24@4426 | 3009 /* Add incremental work we have done to the @done output parameter. */ |
kaf24@9197 | 3010 if ( unlikely(!guest_handle_is_null(pdone)) ) |
kfraser@12423 | 3011 { |
kfraser@12423 | 3012 done += i; |
kaf24@9197 | 3013 copy_to_guest(pdone, &done, 1); |
kfraser@12423 | 3014 } |
kaf24@4426 | 3015 |
kaf24@4426 | 3016 return rc; |
kaf24@3757 | 3017 } |
kaf24@3757 | 3018 |
kaf24@3757 | 3019 int do_mmu_update( |
kaf24@9904 | 3020 XEN_GUEST_HANDLE(mmu_update_t) ureqs, |
kaf24@4426 | 3021 unsigned int count, |
kaf24@9904 | 3022 XEN_GUEST_HANDLE(uint) pdone, |
kaf24@4426 | 3023 unsigned int foreigndom) |
kaf24@3757 | 3024 { |
kaf24@8717 | 3025 struct mmu_update req; |
mafetter@4837 | 3026 void *va; |
kaf24@8764 | 3027 unsigned long gpfn, gmfn, mfn; |
kaf24@8764 | 3028 struct page_info *page; |
kaf24@11019 | 3029 int rc = 0, okay = 1, i = 0; |
keir@19946 | 3030 unsigned int cmd, done = 0, pt_dom; |
keir@20132 | 3031 struct domain *d = current->domain, *pt_owner = d, *pg_owner; |
keir@19946 | 3032 struct vcpu *v = current; |
kfraser@15099 | 3033 struct domain_mmap_cache mapcache; |
kaf24@3757 | 3034 |
kaf24@4426 | 3035 if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) |
kaf24@3757 | 3036 { |
kaf24@3757 | 3037 count &= ~MMU_UPDATE_PREEMPTED; |
kaf24@9197 | 3038 if ( unlikely(!guest_handle_is_null(pdone)) ) |
kaf24@9197 | 3039 (void)copy_from_guest(&done, pdone, 1); |
kaf24@4426 | 3040 } |
kfraser@14624 | 3041 else |
kfraser@14624 | 3042 perfc_incr(calls_to_mmu_update); |
kaf24@4426 | 3043 |
kfraser@14181 | 3044 if ( unlikely(!guest_handle_okay(ureqs, count)) ) |
kfraser@14181 | 3045 { |
kfraser@14181 | 3046 rc = -EFAULT; |
kfraser@14181 | 3047 goto out; |
kfraser@14181 | 3048 } |
kaf24@4847 | 3049 |
keir@19946 | 3050 if ( (pt_dom = foreigndom >> 16) != 0 ) |
keir@19946 | 3051 { |
keir@19946 | 3052 /* Pagetables belong to a foreign domain (PFD). */ |
keir@19946 | 3053 if ( (pt_owner = rcu_lock_domain_by_id(pt_dom - 1)) == NULL ) |
keir@19946 | 3054 { |
keir@19946 | 3055 rc = -EINVAL; |
keir@19946 | 3056 goto out; |
keir@19946 | 3057 } |
keir@19946 | 3058 if ( pt_owner == d ) |
keir@19946 | 3059 rcu_unlock_domain(pt_owner); |
keir@19946 | 3060 if ( (v = pt_owner->vcpu ? pt_owner->vcpu[0] : NULL) == NULL ) |
keir@19946 | 3061 { |
keir@19946 | 3062 rc = -EINVAL; |
keir@19946 | 3063 goto out; |
keir@19946 | 3064 } |
keir@19946 | 3065 if ( !IS_PRIV_FOR(d, pt_owner) ) |
keir@19946 | 3066 { |
keir@19946 | 3067 rc = -ESRCH; |
keir@19946 | 3068 goto out; |
keir@19946 | 3069 } |
keir@19946 | 3070 } |
keir@19946 | 3071 |
keir@20132 | 3072 if ( (pg_owner = get_pg_owner((uint16_t)foreigndom)) == NULL ) |
kaf24@4426 | 3073 { |
kaf24@8830 | 3074 rc = -ESRCH; |
kaf24@4426 | 3075 goto out; |
kaf24@3757 | 3076 } |
kaf24@3757 | 3077 |
kfraser@14181 | 3078 domain_mmap_cache_init(&mapcache); |
kfraser@14181 | 3079 |
kaf24@3757 | 3080 for ( i = 0; i < count; i++ ) |
kaf24@3757 | 3081 { |
kaf24@3757 | 3082 if ( hypercall_preempt_check() ) |
kaf24@3757 | 3083 { |
keir@18450 | 3084 rc = -EAGAIN; |
kaf24@3757 | 3085 break; |
kaf24@3757 | 3086 } |
kaf24@3757 | 3087 |
kaf24@9197 | 3088 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) ) |
kaf24@3757 | 3089 { |
kaf24@9197 | 3090 MEM_LOG("Bad __copy_from_guest"); |
kaf24@3757 | 3091 rc = -EFAULT; |
kaf24@3757 | 3092 break; |
kaf24@3757 | 3093 } |
kaf24@3757 | 3094 |
kaf24@3757 | 3095 cmd = req.ptr & (sizeof(l1_pgentry_t)-1); |
kaf24@3757 | 3096 okay = 0; |
kaf24@3757 | 3097 |
kaf24@3757 | 3098 switch ( cmd ) |
kaf24@3757 | 3099 { |
kaf24@3757 | 3100 /* |
kaf24@3757 | 3101 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table. |
keir@16756 | 3102 * MMU_UPDATE_PT_PRESERVE_AD: As above but also preserve (OR) |
keir@16756 | 3103 * current A/D bits. |
kaf24@3757 | 3104 */ |
kaf24@3757 | 3105 case MMU_NORMAL_PT_UPDATE: |
keir@16756 | 3106 case MMU_PT_UPDATE_PRESERVE_AD: |
keir@20718 | 3107 { |
keir@20718 | 3108 p2m_type_t p2mt; |
keir@20718 | 3109 |
keir@20132 | 3110 rc = xsm_mmu_normal_update(d, pg_owner, req.val); |
kfraser@15846 | 3111 if ( rc ) |
kfraser@15846 | 3112 break; |
kfraser@15846 | 3113 |
keir@16756 | 3114 req.ptr -= cmd; |
kaf24@8764 | 3115 gmfn = req.ptr >> PAGE_SHIFT; |
keir@20718 | 3116 mfn = mfn_x(gfn_to_mfn(pt_owner, gmfn, &p2mt)); |
keir@20750 | 3117 if ( !p2m_is_valid(p2mt) ) |
keir@20750 | 3118 mfn = INVALID_MFN; |
keir@20718 | 3119 |
keir@20718 | 3120 if ( p2m_is_paged(p2mt) ) |
keir@20718 | 3121 { |
keir@20718 | 3122 p2m_mem_paging_populate(pg_owner, gmfn); |
keir@20718 | 3123 |
keir@20718 | 3124 rc = -ENOENT; |
keir@20718 | 3125 break; |
keir@20718 | 3126 } |
keir@19946 | 3127 |
keir@19946 | 3128 if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) |
kaf24@3757 | 3129 { |
kaf24@3757 | 3130 MEM_LOG("Could not get page for normal update"); |
kaf24@3757 | 3131 break; |
kaf24@3757 | 3132 } |
kaf24@3757 | 3133 |
kaf24@5394 | 3134 va = map_domain_page_with_cache(mfn, &mapcache); |
kaf24@5709 | 3135 va = (void *)((unsigned long)va + |
kaf24@5709 | 3136 (unsigned long)(req.ptr & ~PAGE_MASK)); |
kaf24@8764 | 3137 page = mfn_to_page(mfn); |
mafetter@4837 | 3138 |
keir@19141 | 3139 if ( page_lock(page) ) |
kfraser@10274 | 3140 { |
keir@19141 | 3141 switch ( page->u.inuse.type_info & PGT_type_mask ) |
kaf24@3757 | 3142 { |
kfraser@10274 | 3143 case PGT_l1_page_table: |
kfraser@10274 | 3144 { |
kfraser@10274 | 3145 l1_pgentry_t l1e = l1e_from_intpte(req.val); |
keir@20718 | 3146 p2m_type_t l1e_p2mt; |
keir@20718 | 3147 gfn_to_mfn(pg_owner, l1e_get_pfn(l1e), &l1e_p2mt); |
keir@20718 | 3148 |
keir@20718 | 3149 if ( p2m_is_paged(l1e_p2mt) ) |
keir@20718 | 3150 { |
keir@20718 | 3151 p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e)); |
keir@20718 | 3152 |
keir@20718 | 3153 rc = -ENOENT; |
keir@20718 | 3154 break; |
keir@20718 | 3155 } |
keir@20718 | 3156 else if ( p2m_ram_paging_in_start == l1e_p2mt ) |
keir@20718 | 3157 { |
keir@20718 | 3158 rc = -ENOENT; |
keir@20718 | 3159 break; |
keir@20718 | 3160 } |
keir@20729 | 3161 /* XXX: Ugly: pull all the checks into a separate function. |
keir@20729 | 3162 * Don't want to do it now, not to interfere with mem_paging |
keir@20729 | 3163 * patches */ |
keir@20729 | 3164 else if ( p2m_ram_shared == l1e_p2mt ) |
keir@20729 | 3165 { |
keir@20729 | 3166 /* Unshare the page for RW foreign mappings */ |
keir@20729 | 3167 if(l1e_get_flags(l1e) & _PAGE_RW) |
keir@20729 | 3168 { |
keir@20729 | 3169 rc = mem_sharing_unshare_page(pg_owner, |
keir@20729 | 3170 l1e_get_pfn(l1e), |
keir@20729 | 3171 0); |
keir@20729 | 3172 if(rc) break; |
keir@20729 | 3173 } |
keir@20729 | 3174 } |
keir@20718 | 3175 |
keir@16756 | 3176 okay = mod_l1_entry(va, l1e, mfn, |
keir@20132 | 3177 cmd == MMU_PT_UPDATE_PRESERVE_AD, v, |
keir@20132 | 3178 pg_owner); |
kaf24@3757 | 3179 } |
kaf24@3757 | 3180 break; |
kfraser@10274 | 3181 case PGT_l2_page_table: |
kaf24@3757 | 3182 { |
kfraser@10274 | 3183 l2_pgentry_t l2e = l2e_from_intpte(req.val); |
keir@20718 | 3184 p2m_type_t l2e_p2mt; |
keir@20718 | 3185 gfn_to_mfn(pg_owner, l2e_get_pfn(l2e), &l2e_p2mt); |
keir@20718 | 3186 |
keir@20718 | 3187 if ( p2m_is_paged(l2e_p2mt) ) |
keir@20718 | 3188 { |
keir@20718 | 3189 p2m_mem_paging_populate(pg_owner, l2e_get_pfn(l2e)); |
keir@20718 | 3190 |
keir@20718 | 3191 rc = -ENOENT; |
keir@20718 | 3192 break; |
keir@20718 | 3193 } |
keir@20718 | 3194 else if ( p2m_ram_paging_in_start == l2e_p2mt ) |
keir@20718 | 3195 { |
keir@20718 | 3196 rc = -ENOENT; |
keir@20718 | 3197 break; |
keir@20718 | 3198 } |
keir@20729 | 3199 else if ( p2m_ram_shared == l2e_p2mt ) |
keir@20729 | 3200 { |
keir@20729 | 3201 MEM_LOG("Unexpected attempt to map shared page.\n"); |
keir@20729 | 3202 rc = -EINVAL; |
keir@20729 | 3203 break; |
keir@20729 | 3204 } |
keir@20729 | 3205 |
keir@20718 | 3206 |
keir@19141 | 3207 okay = mod_l2_entry(va, l2e, mfn, |
keir@19946 | 3208 cmd == MMU_PT_UPDATE_PRESERVE_AD, v); |
kaf24@3757 | 3209 } |
kaf24@3757 | 3210 break; |
kfraser@10274 | 3211 case PGT_l3_page_table: |
kaf24@3791 | 3212 { |
kfraser@10274 | 3213 l3_pgentry_t l3e = l3e_from_intpte(req.val); |
keir@20718 | 3214 p2m_type_t l3e_p2mt; |
keir@20718 | 3215 gfn_to_mfn(pg_owner, l3e_get_pfn(l3e), &l3e_p2mt); |
keir@20718 | 3216 |
keir@20718 | 3217 if ( p2m_is_paged(l3e_p2mt) ) |
keir@20718 | 3218 { |
keir@20718 | 3219 p2m_mem_paging_populate(pg_owner, l3e_get_pfn(l3e)); |
keir@20718 | 3220 |
keir@20718 | 3221 rc = -ENOENT; |
keir@20718 | 3222 break; |
keir@20718 | 3223 } |
keir@20718 | 3224 else if ( p2m_ram_paging_in_start == l3e_p2mt ) |
keir@20718 | 3225 { |
keir@20718 | 3226 rc = -ENOENT; |
keir@20718 | 3227 break; |
keir@20718 | 3228 } |
keir@20729 | 3229 else if ( p2m_ram_shared == l3e_p2mt ) |
keir@20729 | 3230 { |
keir@20729 | 3231 MEM_LOG("Unexpected attempt to map shared page.\n"); |
keir@20729 | 3232 rc = -EINVAL; |
keir@20729 | 3233 break; |
keir@20729 | 3234 } |
keir@20718 | 3235 |
keir@18450 | 3236 rc = mod_l3_entry(va, l3e, mfn, |
keir@19946 | 3237 cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); |
keir@18450 | 3238 okay = !rc; |
kaf24@3791 | 3239 } |
kaf24@3791 | 3240 break; |
kaf24@5275 | 3241 #if CONFIG_PAGING_LEVELS >= 4 |
kfraser@10274 | 3242 case PGT_l4_page_table: |
kfraser@14181 | 3243 { |
kfraser@14181 | 3244 l4_pgentry_t l4e = l4e_from_intpte(req.val); |
keir@20718 | 3245 p2m_type_t l4e_p2mt; |
keir@20718 | 3246 gfn_to_mfn(pg_owner, l4e_get_pfn(l4e), &l4e_p2mt); |
keir@20718 | 3247 |
keir@20718 | 3248 if ( p2m_is_paged(l4e_p2mt) ) |
keir@20718 | 3249 { |
keir@20718 | 3250 p2m_mem_paging_populate(pg_owner, l4e_get_pfn(l4e)); |
keir@20718 | 3251 |
keir@20718 | 3252 rc = -ENOENT; |
keir@20718 | 3253 break; |
keir@20718 | 3254 } |
keir@20718 | 3255 else if ( p2m_ram_paging_in_start == l4e_p2mt ) |
keir@20718 | 3256 { |
keir@20718 | 3257 rc = -ENOENT; |
keir@20718 | 3258 break; |
keir@20718 | 3259 } |
keir@20729 | 3260 else if ( p2m_ram_shared == l4e_p2mt ) |
keir@20729 | 3261 { |
keir@20729 | 3262 MEM_LOG("Unexpected attempt to map shared page.\n"); |
keir@20729 | 3263 rc = -EINVAL; |
keir@20729 | 3264 break; |
keir@20729 | 3265 } |
keir@20718 | 3266 |
keir@18450 | 3267 rc = mod_l4_entry(va, l4e, mfn, |
keir@19946 | 3268 cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); |
keir@18450 | 3269 okay = !rc; |
kfraser@14181 | 3270 } |
kfraser@14181 | 3271 break; |
kaf24@5275 | 3272 #endif |
keir@19141 | 3273 case PGT_writable_page: |
keir@19141 | 3274 perfc_incr(writable_mmu_updates); |
keir@19198 | 3275 okay = paging_write_guest_entry( |
keir@19946 | 3276 v, va, req.val, _mfn(mfn)); |
keir@19141 | 3277 break; |
kfraser@10274 | 3278 } |
keir@19141 | 3279 page_unlock(page); |
keir@18450 | 3280 if ( rc == -EINTR ) |
keir@18450 | 3281 rc = -EAGAIN; |
kfraser@10274 | 3282 } |
keir@19141 | 3283 else if ( get_page_type(page, PGT_writable_page) ) |
kfraser@10274 | 3284 { |
keir@17876 | 3285 perfc_incr(writable_mmu_updates); |
keir@19198 | 3286 okay = paging_write_guest_entry( |
keir@19946 | 3287 v, va, req.val, _mfn(mfn)); |
kfraser@10274 | 3288 put_page_type(page); |
kfraser@10274 | 3289 } |